drbd: perpare for genetlink parallel_ops
[firefly-linux-kernel-4.4.55.git] / drivers / block / drbd / drbd_nl.c
1 /*
2    drbd_nl.c
3
4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10    drbd is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2, or (at your option)
13    any later version.
14
15    drbd is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with drbd; see the file COPYING.  If not, write to
22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24  */
25
26 #include <linux/module.h>
27 #include <linux/drbd.h>
28 #include <linux/in.h>
29 #include <linux/fs.h>
30 #include <linux/file.h>
31 #include <linux/slab.h>
32 #include <linux/blkpg.h>
33 #include <linux/cpumask.h>
34 #include "drbd_int.h"
35 #include "drbd_protocol.h"
36 #include "drbd_req.h"
37 #include <asm/unaligned.h>
38 #include <linux/drbd_limits.h>
39 #include <linux/kthread.h>
40
41 #include <net/genetlink.h>
42
43 /* .doit */
44 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
45 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
46
47 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
48 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
49
50 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
51 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
52 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
53
54 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
55 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
56 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
57 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
58 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
59 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
60 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
61 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
62 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
63 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
64 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
65 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
66 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
67 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
68 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
69 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
70 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
71 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
72 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
73 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
74 /* .dumpit */
75 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
76
77 #include <linux/drbd_genl_api.h>
78 #include "drbd_nla.h"
79 #include <linux/genl_magic_func.h>
80
81 /* used blkdev_get_by_path, to claim our meta data device(s) */
82 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
83
84 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
85 {
86         genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
87         if (genlmsg_reply(skb, info))
88                 printk(KERN_ERR "drbd: error sending genl reply\n");
89 }
90
91 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
92  * reason it could fail was no space in skb, and there are 4k available. */
93 int drbd_msg_put_info(struct sk_buff *skb, const char *info)
94 {
95         struct nlattr *nla;
96         int err = -EMSGSIZE;
97
98         if (!info || !info[0])
99                 return 0;
100
101         nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
102         if (!nla)
103                 return err;
104
105         err = nla_put_string(skb, T_info_text, info);
106         if (err) {
107                 nla_nest_cancel(skb, nla);
108                 return err;
109         } else
110                 nla_nest_end(skb, nla);
111         return 0;
112 }
113
114 /* This would be a good candidate for a "pre_doit" hook,
115  * and per-family private info->pointers.
116  * But we need to stay compatible with older kernels.
117  * If it returns successfully, adm_ctx members are valid.
118  */
119 #define DRBD_ADM_NEED_MINOR     1
120 #define DRBD_ADM_NEED_RESOURCE  2
121 #define DRBD_ADM_NEED_CONNECTION 4
122 static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
123         struct sk_buff *skb, struct genl_info *info, unsigned flags)
124 {
125         struct drbd_genlmsghdr *d_in = info->userhdr;
126         const u8 cmd = info->genlhdr->cmd;
127         int err;
128
129         memset(adm_ctx, 0, sizeof(*adm_ctx));
130
131         /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
132         if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
133                return -EPERM;
134
135         adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
136         if (!adm_ctx->reply_skb) {
137                 err = -ENOMEM;
138                 goto fail;
139         }
140
141         adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
142                                         info, &drbd_genl_family, 0, cmd);
143         /* put of a few bytes into a fresh skb of >= 4k will always succeed.
144          * but anyways */
145         if (!adm_ctx->reply_dh) {
146                 err = -ENOMEM;
147                 goto fail;
148         }
149
150         adm_ctx->reply_dh->minor = d_in->minor;
151         adm_ctx->reply_dh->ret_code = NO_ERROR;
152
153         adm_ctx->volume = VOLUME_UNSPECIFIED;
154         if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
155                 struct nlattr *nla;
156                 /* parse and validate only */
157                 err = drbd_cfg_context_from_attrs(NULL, info);
158                 if (err)
159                         goto fail;
160
161                 /* It was present, and valid,
162                  * copy it over to the reply skb. */
163                 err = nla_put_nohdr(adm_ctx->reply_skb,
164                                 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
165                                 info->attrs[DRBD_NLA_CFG_CONTEXT]);
166                 if (err)
167                         goto fail;
168
169                 /* and assign stuff to the global adm_ctx */
170                 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
171                 if (nla)
172                         adm_ctx->volume = nla_get_u32(nla);
173                 nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
174                 if (nla)
175                         adm_ctx->resource_name = nla_data(nla);
176                 adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
177                 adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
178                 if ((adm_ctx->my_addr &&
179                      nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
180                     (adm_ctx->peer_addr &&
181                      nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
182                         err = -EINVAL;
183                         goto fail;
184                 }
185         }
186
187         adm_ctx->minor = d_in->minor;
188         adm_ctx->device = minor_to_device(d_in->minor);
189         if (adm_ctx->resource_name) {
190                 adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
191         }
192
193         if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
194                 drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
195                 return ERR_MINOR_INVALID;
196         }
197         if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
198                 drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
199                 if (adm_ctx->resource_name)
200                         return ERR_RES_NOT_KNOWN;
201                 return ERR_INVALID_REQUEST;
202         }
203
204         if (flags & DRBD_ADM_NEED_CONNECTION) {
205                 if (adm_ctx->resource) {
206                         drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
207                         return ERR_INVALID_REQUEST;
208                 }
209                 if (adm_ctx->device) {
210                         drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
211                         return ERR_INVALID_REQUEST;
212                 }
213                 if (adm_ctx->my_addr && adm_ctx->peer_addr)
214                         adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
215                                                           nla_len(adm_ctx->my_addr),
216                                                           nla_data(adm_ctx->peer_addr),
217                                                           nla_len(adm_ctx->peer_addr));
218                 if (!adm_ctx->connection) {
219                         drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
220                         return ERR_INVALID_REQUEST;
221                 }
222         }
223
224         /* some more paranoia, if the request was over-determined */
225         if (adm_ctx->device && adm_ctx->resource &&
226             adm_ctx->device->resource != adm_ctx->resource) {
227                 pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
228                                 adm_ctx->minor, adm_ctx->resource->name,
229                                 adm_ctx->device->resource->name);
230                 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
231                 return ERR_INVALID_REQUEST;
232         }
233         if (adm_ctx->device &&
234             adm_ctx->volume != VOLUME_UNSPECIFIED &&
235             adm_ctx->volume != adm_ctx->device->vnr) {
236                 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
237                                 adm_ctx->minor, adm_ctx->volume,
238                                 adm_ctx->device->vnr,
239                                 adm_ctx->device->resource->name);
240                 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
241                 return ERR_INVALID_REQUEST;
242         }
243
244         return NO_ERROR;
245
246 fail:
247         nlmsg_free(adm_ctx->reply_skb);
248         adm_ctx->reply_skb = NULL;
249         return err;
250 }
251
252 static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
253         struct genl_info *info, int retcode)
254 {
255         if (adm_ctx->connection) {
256                 kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
257                 adm_ctx->connection = NULL;
258         }
259         if (adm_ctx->resource) {
260                 kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
261                 adm_ctx->resource = NULL;
262         }
263
264         if (!adm_ctx->reply_skb)
265                 return -ENOMEM;
266
267         adm_ctx->reply_dh->ret_code = retcode;
268         drbd_adm_send_reply(adm_ctx->reply_skb, info);
269         return 0;
270 }
271
272 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
273 {
274         char *afs;
275
276         /* FIXME: A future version will not allow this case. */
277         if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
278                 return;
279
280         switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
281         case AF_INET6:
282                 afs = "ipv6";
283                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
284                          &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
285                 break;
286         case AF_INET:
287                 afs = "ipv4";
288                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
289                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
290                 break;
291         default:
292                 afs = "ssocks";
293                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
294                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
295         }
296         snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
297 }
298
299 int drbd_khelper(struct drbd_device *device, char *cmd)
300 {
301         char *envp[] = { "HOME=/",
302                         "TERM=linux",
303                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
304                          (char[20]) { }, /* address family */
305                          (char[60]) { }, /* address */
306                         NULL };
307         char mb[12];
308         char *argv[] = {usermode_helper, cmd, mb, NULL };
309         struct drbd_connection *connection = first_peer_device(device)->connection;
310         struct sib_info sib;
311         int ret;
312
313         if (current == connection->worker.task)
314                 set_bit(CALLBACK_PENDING, &connection->flags);
315
316         snprintf(mb, 12, "minor-%d", device_to_minor(device));
317         setup_khelper_env(connection, envp);
318
319         /* The helper may take some time.
320          * write out any unsynced meta data changes now */
321         drbd_md_sync(device);
322
323         drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
324         sib.sib_reason = SIB_HELPER_PRE;
325         sib.helper_name = cmd;
326         drbd_bcast_event(device, &sib);
327         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
328         if (ret)
329                 drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
330                                 usermode_helper, cmd, mb,
331                                 (ret >> 8) & 0xff, ret);
332         else
333                 drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
334                                 usermode_helper, cmd, mb,
335                                 (ret >> 8) & 0xff, ret);
336         sib.sib_reason = SIB_HELPER_POST;
337         sib.helper_exit_code = ret;
338         drbd_bcast_event(device, &sib);
339
340         if (current == connection->worker.task)
341                 clear_bit(CALLBACK_PENDING, &connection->flags);
342
343         if (ret < 0) /* Ignore any ERRNOs we got. */
344                 ret = 0;
345
346         return ret;
347 }
348
349 static int conn_khelper(struct drbd_connection *connection, char *cmd)
350 {
351         char *envp[] = { "HOME=/",
352                         "TERM=linux",
353                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
354                          (char[20]) { }, /* address family */
355                          (char[60]) { }, /* address */
356                         NULL };
357         char *resource_name = connection->resource->name;
358         char *argv[] = {usermode_helper, cmd, resource_name, NULL };
359         int ret;
360
361         setup_khelper_env(connection, envp);
362         conn_md_sync(connection);
363
364         drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
365         /* TODO: conn_bcast_event() ?? */
366
367         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
368         if (ret)
369                 drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
370                           usermode_helper, cmd, resource_name,
371                           (ret >> 8) & 0xff, ret);
372         else
373                 drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
374                           usermode_helper, cmd, resource_name,
375                           (ret >> 8) & 0xff, ret);
376         /* TODO: conn_bcast_event() ?? */
377
378         if (ret < 0) /* Ignore any ERRNOs we got. */
379                 ret = 0;
380
381         return ret;
382 }
383
384 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
385 {
386         enum drbd_fencing_p fp = FP_NOT_AVAIL;
387         struct drbd_peer_device *peer_device;
388         int vnr;
389
390         rcu_read_lock();
391         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
392                 struct drbd_device *device = peer_device->device;
393                 if (get_ldev_if_state(device, D_CONSISTENT)) {
394                         struct disk_conf *disk_conf =
395                                 rcu_dereference(peer_device->device->ldev->disk_conf);
396                         fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
397                         put_ldev(device);
398                 }
399         }
400         rcu_read_unlock();
401
402         if (fp == FP_NOT_AVAIL) {
403                 /* IO Suspending works on the whole resource.
404                    Do it only for one device. */
405                 vnr = 0;
406                 peer_device = idr_get_next(&connection->peer_devices, &vnr);
407                 drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
408         }
409
410         return fp;
411 }
412
413 bool conn_try_outdate_peer(struct drbd_connection *connection)
414 {
415         unsigned int connect_cnt;
416         union drbd_state mask = { };
417         union drbd_state val = { };
418         enum drbd_fencing_p fp;
419         char *ex_to_string;
420         int r;
421
422         if (connection->cstate >= C_WF_REPORT_PARAMS) {
423                 drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
424                 return false;
425         }
426
427         spin_lock_irq(&connection->resource->req_lock);
428         connect_cnt = connection->connect_cnt;
429         spin_unlock_irq(&connection->resource->req_lock);
430
431         fp = highest_fencing_policy(connection);
432         switch (fp) {
433         case FP_NOT_AVAIL:
434                 drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
435                 goto out;
436         case FP_DONT_CARE:
437                 return true;
438         default: ;
439         }
440
441         r = conn_khelper(connection, "fence-peer");
442
443         switch ((r>>8) & 0xff) {
444         case 3: /* peer is inconsistent */
445                 ex_to_string = "peer is inconsistent or worse";
446                 mask.pdsk = D_MASK;
447                 val.pdsk = D_INCONSISTENT;
448                 break;
449         case 4: /* peer got outdated, or was already outdated */
450                 ex_to_string = "peer was fenced";
451                 mask.pdsk = D_MASK;
452                 val.pdsk = D_OUTDATED;
453                 break;
454         case 5: /* peer was down */
455                 if (conn_highest_disk(connection) == D_UP_TO_DATE) {
456                         /* we will(have) create(d) a new UUID anyways... */
457                         ex_to_string = "peer is unreachable, assumed to be dead";
458                         mask.pdsk = D_MASK;
459                         val.pdsk = D_OUTDATED;
460                 } else {
461                         ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
462                 }
463                 break;
464         case 6: /* Peer is primary, voluntarily outdate myself.
465                  * This is useful when an unconnected R_SECONDARY is asked to
466                  * become R_PRIMARY, but finds the other peer being active. */
467                 ex_to_string = "peer is active";
468                 drbd_warn(connection, "Peer is primary, outdating myself.\n");
469                 mask.disk = D_MASK;
470                 val.disk = D_OUTDATED;
471                 break;
472         case 7:
473                 if (fp != FP_STONITH)
474                         drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
475                 ex_to_string = "peer was stonithed";
476                 mask.pdsk = D_MASK;
477                 val.pdsk = D_OUTDATED;
478                 break;
479         default:
480                 /* The script is broken ... */
481                 drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
482                 return false; /* Eventually leave IO frozen */
483         }
484
485         drbd_info(connection, "fence-peer helper returned %d (%s)\n",
486                   (r>>8) & 0xff, ex_to_string);
487
488  out:
489
490         /* Not using
491            conn_request_state(connection, mask, val, CS_VERBOSE);
492            here, because we might were able to re-establish the connection in the
493            meantime. */
494         spin_lock_irq(&connection->resource->req_lock);
495         if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
496                 if (connection->connect_cnt != connect_cnt)
497                         /* In case the connection was established and droped
498                            while the fence-peer handler was running, ignore it */
499                         drbd_info(connection, "Ignoring fence-peer exit code\n");
500                 else
501                         _conn_request_state(connection, mask, val, CS_VERBOSE);
502         }
503         spin_unlock_irq(&connection->resource->req_lock);
504
505         return conn_highest_pdsk(connection) <= D_OUTDATED;
506 }
507
508 static int _try_outdate_peer_async(void *data)
509 {
510         struct drbd_connection *connection = (struct drbd_connection *)data;
511
512         conn_try_outdate_peer(connection);
513
514         kref_put(&connection->kref, drbd_destroy_connection);
515         return 0;
516 }
517
518 void conn_try_outdate_peer_async(struct drbd_connection *connection)
519 {
520         struct task_struct *opa;
521
522         kref_get(&connection->kref);
523         opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
524         if (IS_ERR(opa)) {
525                 drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
526                 kref_put(&connection->kref, drbd_destroy_connection);
527         }
528 }
529
530 enum drbd_state_rv
531 drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
532 {
533         const int max_tries = 4;
534         enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
535         struct net_conf *nc;
536         int try = 0;
537         int forced = 0;
538         union drbd_state mask, val;
539
540         if (new_role == R_PRIMARY) {
541                 struct drbd_connection *connection;
542
543                 /* Detect dead peers as soon as possible.  */
544
545                 rcu_read_lock();
546                 for_each_connection(connection, device->resource)
547                         request_ping(connection);
548                 rcu_read_unlock();
549         }
550
551         mutex_lock(device->state_mutex);
552
553         mask.i = 0; mask.role = R_MASK;
554         val.i  = 0; val.role  = new_role;
555
556         while (try++ < max_tries) {
557                 rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE);
558
559                 /* in case we first succeeded to outdate,
560                  * but now suddenly could establish a connection */
561                 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
562                         val.pdsk = 0;
563                         mask.pdsk = 0;
564                         continue;
565                 }
566
567                 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
568                     (device->state.disk < D_UP_TO_DATE &&
569                      device->state.disk >= D_INCONSISTENT)) {
570                         mask.disk = D_MASK;
571                         val.disk  = D_UP_TO_DATE;
572                         forced = 1;
573                         continue;
574                 }
575
576                 if (rv == SS_NO_UP_TO_DATE_DISK &&
577                     device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
578                         D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
579
580                         if (conn_try_outdate_peer(first_peer_device(device)->connection)) {
581                                 val.disk = D_UP_TO_DATE;
582                                 mask.disk = D_MASK;
583                         }
584                         continue;
585                 }
586
587                 if (rv == SS_NOTHING_TO_DO)
588                         goto out;
589                 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
590                         if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) {
591                                 drbd_warn(device, "Forced into split brain situation!\n");
592                                 mask.pdsk = D_MASK;
593                                 val.pdsk  = D_OUTDATED;
594
595                         }
596                         continue;
597                 }
598                 if (rv == SS_TWO_PRIMARIES) {
599                         /* Maybe the peer is detected as dead very soon...
600                            retry at most once more in this case. */
601                         int timeo;
602                         rcu_read_lock();
603                         nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
604                         timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
605                         rcu_read_unlock();
606                         schedule_timeout_interruptible(timeo);
607                         if (try < max_tries)
608                                 try = max_tries - 1;
609                         continue;
610                 }
611                 if (rv < SS_SUCCESS) {
612                         rv = _drbd_request_state(device, mask, val,
613                                                 CS_VERBOSE + CS_WAIT_COMPLETE);
614                         if (rv < SS_SUCCESS)
615                                 goto out;
616                 }
617                 break;
618         }
619
620         if (rv < SS_SUCCESS)
621                 goto out;
622
623         if (forced)
624                 drbd_warn(device, "Forced to consider local data as UpToDate!\n");
625
626         /* Wait until nothing is on the fly :) */
627         wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
628
629         /* FIXME also wait for all pending P_BARRIER_ACK? */
630
631         if (new_role == R_SECONDARY) {
632                 set_disk_ro(device->vdisk, true);
633                 if (get_ldev(device)) {
634                         device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
635                         put_ldev(device);
636                 }
637         } else {
638                 mutex_lock(&device->resource->conf_update);
639                 nc = first_peer_device(device)->connection->net_conf;
640                 if (nc)
641                         nc->discard_my_data = 0; /* without copy; single bit op is atomic */
642                 mutex_unlock(&device->resource->conf_update);
643
644                 set_disk_ro(device->vdisk, false);
645                 if (get_ldev(device)) {
646                         if (((device->state.conn < C_CONNECTED ||
647                                device->state.pdsk <= D_FAILED)
648                               && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
649                                 drbd_uuid_new_current(device);
650
651                         device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
652                         put_ldev(device);
653                 }
654         }
655
656         /* writeout of activity log covered areas of the bitmap
657          * to stable storage done in after state change already */
658
659         if (device->state.conn >= C_WF_REPORT_PARAMS) {
660                 /* if this was forced, we should consider sync */
661                 if (forced)
662                         drbd_send_uuids(first_peer_device(device));
663                 drbd_send_current_state(first_peer_device(device));
664         }
665
666         drbd_md_sync(device);
667
668         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
669 out:
670         mutex_unlock(device->state_mutex);
671         return rv;
672 }
673
674 static const char *from_attrs_err_to_txt(int err)
675 {
676         return  err == -ENOMSG ? "required attribute missing" :
677                 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
678                 err == -EEXIST ? "can not change invariant setting" :
679                 "invalid attribute value";
680 }
681
682 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
683 {
684         struct drbd_config_context adm_ctx;
685         struct set_role_parms parms;
686         int err;
687         enum drbd_ret_code retcode;
688
689         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
690         if (!adm_ctx.reply_skb)
691                 return retcode;
692         if (retcode != NO_ERROR)
693                 goto out;
694
695         memset(&parms, 0, sizeof(parms));
696         if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
697                 err = set_role_parms_from_attrs(&parms, info);
698                 if (err) {
699                         retcode = ERR_MANDATORY_TAG;
700                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
701                         goto out;
702                 }
703         }
704
705         if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
706                 retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
707         else
708                 retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
709 out:
710         drbd_adm_finish(&adm_ctx, info, retcode);
711         return 0;
712 }
713
714 /* Initializes the md.*_offset members, so we are able to find
715  * the on disk meta data.
716  *
717  * We currently have two possible layouts:
718  * external:
719  *   |----------- md_size_sect ------------------|
720  *   [ 4k superblock ][ activity log ][  Bitmap  ]
721  *   | al_offset == 8 |
722  *   | bm_offset = al_offset + X      |
723  *  ==> bitmap sectors = md_size_sect - bm_offset
724  *
725  * internal:
726  *            |----------- md_size_sect ------------------|
727  * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
728  *                        | al_offset < 0 |
729  *            | bm_offset = al_offset - Y |
730  *  ==> bitmap sectors = Y = al_offset - bm_offset
731  *
732  *  Activity log size used to be fixed 32kB,
733  *  but is about to become configurable.
734  */
735 static void drbd_md_set_sector_offsets(struct drbd_device *device,
736                                        struct drbd_backing_dev *bdev)
737 {
738         sector_t md_size_sect = 0;
739         unsigned int al_size_sect = bdev->md.al_size_4k * 8;
740
741         bdev->md.md_offset = drbd_md_ss(bdev);
742
743         switch (bdev->md.meta_dev_idx) {
744         default:
745                 /* v07 style fixed size indexed meta data */
746                 bdev->md.md_size_sect = MD_128MB_SECT;
747                 bdev->md.al_offset = MD_4kB_SECT;
748                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
749                 break;
750         case DRBD_MD_INDEX_FLEX_EXT:
751                 /* just occupy the full device; unit: sectors */
752                 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
753                 bdev->md.al_offset = MD_4kB_SECT;
754                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
755                 break;
756         case DRBD_MD_INDEX_INTERNAL:
757         case DRBD_MD_INDEX_FLEX_INT:
758                 /* al size is still fixed */
759                 bdev->md.al_offset = -al_size_sect;
760                 /* we need (slightly less than) ~ this much bitmap sectors: */
761                 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
762                 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
763                 md_size_sect = BM_SECT_TO_EXT(md_size_sect);
764                 md_size_sect = ALIGN(md_size_sect, 8);
765
766                 /* plus the "drbd meta data super block",
767                  * and the activity log; */
768                 md_size_sect += MD_4kB_SECT + al_size_sect;
769
770                 bdev->md.md_size_sect = md_size_sect;
771                 /* bitmap offset is adjusted by 'super' block size */
772                 bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
773                 break;
774         }
775 }
776
777 /* input size is expected to be in KB */
778 char *ppsize(char *buf, unsigned long long size)
779 {
780         /* Needs 9 bytes at max including trailing NUL:
781          * -1ULL ==> "16384 EB" */
782         static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
783         int base = 0;
784         while (size >= 10000 && base < sizeof(units)-1) {
785                 /* shift + round */
786                 size = (size >> 10) + !!(size & (1<<9));
787                 base++;
788         }
789         sprintf(buf, "%u %cB", (unsigned)size, units[base]);
790
791         return buf;
792 }
793
794 /* there is still a theoretical deadlock when called from receiver
795  * on an D_INCONSISTENT R_PRIMARY:
796  *  remote READ does inc_ap_bio, receiver would need to receive answer
797  *  packet from remote to dec_ap_bio again.
798  *  receiver receive_sizes(), comes here,
799  *  waits for ap_bio_cnt == 0. -> deadlock.
800  * but this cannot happen, actually, because:
801  *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
802  *  (not connected, or bad/no disk on peer):
803  *  see drbd_fail_request_early, ap_bio_cnt is zero.
804  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
805  *  peer may not initiate a resize.
806  */
807 /* Note these are not to be confused with
808  * drbd_adm_suspend_io/drbd_adm_resume_io,
809  * which are (sub) state changes triggered by admin (drbdsetup),
810  * and can be long lived.
811  * This changes an device->flag, is triggered by drbd internals,
812  * and should be short-lived. */
813 void drbd_suspend_io(struct drbd_device *device)
814 {
815         set_bit(SUSPEND_IO, &device->flags);
816         if (drbd_suspended(device))
817                 return;
818         wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
819 }
820
821 void drbd_resume_io(struct drbd_device *device)
822 {
823         clear_bit(SUSPEND_IO, &device->flags);
824         wake_up(&device->misc_wait);
825 }
826
827 /**
828  * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
829  * @device:     DRBD device.
830  *
831  * Returns 0 on success, negative return values indicate errors.
832  * You should call drbd_md_sync() after calling this function.
833  */
834 enum determine_dev_size
835 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
836 {
837         sector_t prev_first_sect, prev_size; /* previous meta location */
838         sector_t la_size_sect, u_size;
839         struct drbd_md *md = &device->ldev->md;
840         u32 prev_al_stripe_size_4k;
841         u32 prev_al_stripes;
842         sector_t size;
843         char ppb[10];
844         void *buffer;
845
846         int md_moved, la_size_changed;
847         enum determine_dev_size rv = DS_UNCHANGED;
848
849         /* race:
850          * application request passes inc_ap_bio,
851          * but then cannot get an AL-reference.
852          * this function later may wait on ap_bio_cnt == 0. -> deadlock.
853          *
854          * to avoid that:
855          * Suspend IO right here.
856          * still lock the act_log to not trigger ASSERTs there.
857          */
858         drbd_suspend_io(device);
859         buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
860         if (!buffer) {
861                 drbd_resume_io(device);
862                 return DS_ERROR;
863         }
864
865         /* no wait necessary anymore, actually we could assert that */
866         wait_event(device->al_wait, lc_try_lock(device->act_log));
867
868         prev_first_sect = drbd_md_first_sector(device->ldev);
869         prev_size = device->ldev->md.md_size_sect;
870         la_size_sect = device->ldev->md.la_size_sect;
871
872         if (rs) {
873                 /* rs is non NULL if we should change the AL layout only */
874
875                 prev_al_stripes = md->al_stripes;
876                 prev_al_stripe_size_4k = md->al_stripe_size_4k;
877
878                 md->al_stripes = rs->al_stripes;
879                 md->al_stripe_size_4k = rs->al_stripe_size / 4;
880                 md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
881         }
882
883         drbd_md_set_sector_offsets(device, device->ldev);
884
885         rcu_read_lock();
886         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
887         rcu_read_unlock();
888         size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
889
890         if (size < la_size_sect) {
891                 if (rs && u_size == 0) {
892                         /* Remove "rs &&" later. This check should always be active, but
893                            right now the receiver expects the permissive behavior */
894                         drbd_warn(device, "Implicit shrink not allowed. "
895                                  "Use --size=%llus for explicit shrink.\n",
896                                  (unsigned long long)size);
897                         rv = DS_ERROR_SHRINK;
898                 }
899                 if (u_size > size)
900                         rv = DS_ERROR_SPACE_MD;
901                 if (rv != DS_UNCHANGED)
902                         goto err_out;
903         }
904
905         if (drbd_get_capacity(device->this_bdev) != size ||
906             drbd_bm_capacity(device) != size) {
907                 int err;
908                 err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
909                 if (unlikely(err)) {
910                         /* currently there is only one error: ENOMEM! */
911                         size = drbd_bm_capacity(device)>>1;
912                         if (size == 0) {
913                                 drbd_err(device, "OUT OF MEMORY! "
914                                     "Could not allocate bitmap!\n");
915                         } else {
916                                 drbd_err(device, "BM resizing failed. "
917                                     "Leaving size unchanged at size = %lu KB\n",
918                                     (unsigned long)size);
919                         }
920                         rv = DS_ERROR;
921                 }
922                 /* racy, see comments above. */
923                 drbd_set_my_capacity(device, size);
924                 device->ldev->md.la_size_sect = size;
925                 drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
926                      (unsigned long long)size>>1);
927         }
928         if (rv <= DS_ERROR)
929                 goto err_out;
930
931         la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
932
933         md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
934                 || prev_size       != device->ldev->md.md_size_sect;
935
936         if (la_size_changed || md_moved || rs) {
937                 u32 prev_flags;
938
939                 drbd_al_shrink(device); /* All extents inactive. */
940
941                 prev_flags = md->flags;
942                 md->flags &= ~MDF_PRIMARY_IND;
943                 drbd_md_write(device, buffer);
944
945                 drbd_info(device, "Writing the whole bitmap, %s\n",
946                          la_size_changed && md_moved ? "size changed and md moved" :
947                          la_size_changed ? "size changed" : "md moved");
948                 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
949                 drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
950                                "size changed", BM_LOCKED_MASK);
951                 drbd_initialize_al(device, buffer);
952
953                 md->flags = prev_flags;
954                 drbd_md_write(device, buffer);
955
956                 if (rs)
957                         drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
958                                   md->al_stripes, md->al_stripe_size_4k * 4);
959         }
960
961         if (size > la_size_sect)
962                 rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
963         if (size < la_size_sect)
964                 rv = DS_SHRUNK;
965
966         if (0) {
967         err_out:
968                 if (rs) {
969                         md->al_stripes = prev_al_stripes;
970                         md->al_stripe_size_4k = prev_al_stripe_size_4k;
971                         md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
972
973                         drbd_md_set_sector_offsets(device, device->ldev);
974                 }
975         }
976         lc_unlock(device->act_log);
977         wake_up(&device->al_wait);
978         drbd_md_put_buffer(device);
979         drbd_resume_io(device);
980
981         return rv;
982 }
983
984 sector_t
985 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
986                   sector_t u_size, int assume_peer_has_space)
987 {
988         sector_t p_size = device->p_size;   /* partner's disk size. */
989         sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
990         sector_t m_size; /* my size */
991         sector_t size = 0;
992
993         m_size = drbd_get_max_capacity(bdev);
994
995         if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
996                 drbd_warn(device, "Resize while not connected was forced by the user!\n");
997                 p_size = m_size;
998         }
999
1000         if (p_size && m_size) {
1001                 size = min_t(sector_t, p_size, m_size);
1002         } else {
1003                 if (la_size_sect) {
1004                         size = la_size_sect;
1005                         if (m_size && m_size < size)
1006                                 size = m_size;
1007                         if (p_size && p_size < size)
1008                                 size = p_size;
1009                 } else {
1010                         if (m_size)
1011                                 size = m_size;
1012                         if (p_size)
1013                                 size = p_size;
1014                 }
1015         }
1016
1017         if (size == 0)
1018                 drbd_err(device, "Both nodes diskless!\n");
1019
1020         if (u_size) {
1021                 if (u_size > size)
1022                         drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1023                             (unsigned long)u_size>>1, (unsigned long)size>>1);
1024                 else
1025                         size = u_size;
1026         }
1027
1028         return size;
1029 }
1030
1031 /**
1032  * drbd_check_al_size() - Ensures that the AL is of the right size
1033  * @device:     DRBD device.
1034  *
1035  * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1036  * failed, and 0 on success. You should call drbd_md_sync() after you called
1037  * this function.
1038  */
1039 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1040 {
1041         struct lru_cache *n, *t;
1042         struct lc_element *e;
1043         unsigned int in_use;
1044         int i;
1045
1046         if (device->act_log &&
1047             device->act_log->nr_elements == dc->al_extents)
1048                 return 0;
1049
1050         in_use = 0;
1051         t = device->act_log;
1052         n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1053                 dc->al_extents, sizeof(struct lc_element), 0);
1054
1055         if (n == NULL) {
1056                 drbd_err(device, "Cannot allocate act_log lru!\n");
1057                 return -ENOMEM;
1058         }
1059         spin_lock_irq(&device->al_lock);
1060         if (t) {
1061                 for (i = 0; i < t->nr_elements; i++) {
1062                         e = lc_element_by_index(t, i);
1063                         if (e->refcnt)
1064                                 drbd_err(device, "refcnt(%d)==%d\n",
1065                                     e->lc_number, e->refcnt);
1066                         in_use += e->refcnt;
1067                 }
1068         }
1069         if (!in_use)
1070                 device->act_log = n;
1071         spin_unlock_irq(&device->al_lock);
1072         if (in_use) {
1073                 drbd_err(device, "Activity log still in use!\n");
1074                 lc_destroy(n);
1075                 return -EBUSY;
1076         } else {
1077                 if (t)
1078                         lc_destroy(t);
1079         }
1080         drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1081         return 0;
1082 }
1083
1084 static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
1085 {
1086         struct request_queue * const q = device->rq_queue;
1087         unsigned int max_hw_sectors = max_bio_size >> 9;
1088         unsigned int max_segments = 0;
1089
1090         if (get_ldev_if_state(device, D_ATTACHING)) {
1091                 struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
1092
1093                 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1094                 rcu_read_lock();
1095                 max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1096                 rcu_read_unlock();
1097                 put_ldev(device);
1098         }
1099
1100         blk_queue_logical_block_size(q, 512);
1101         blk_queue_max_hw_sectors(q, max_hw_sectors);
1102         /* This is the workaround for "bio would need to, but cannot, be split" */
1103         blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1104         blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
1105
1106         if (get_ldev_if_state(device, D_ATTACHING)) {
1107                 struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
1108
1109                 blk_queue_stack_limits(q, b);
1110
1111                 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1112                         drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1113                                  q->backing_dev_info.ra_pages,
1114                                  b->backing_dev_info.ra_pages);
1115                         q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1116                 }
1117                 put_ldev(device);
1118         }
1119 }
1120
1121 void drbd_reconsider_max_bio_size(struct drbd_device *device)
1122 {
1123         unsigned int now, new, local, peer;
1124
1125         now = queue_max_hw_sectors(device->rq_queue) << 9;
1126         local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1127         peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1128
1129         if (get_ldev_if_state(device, D_ATTACHING)) {
1130                 local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
1131                 device->local_max_bio_size = local;
1132                 put_ldev(device);
1133         }
1134         local = min(local, DRBD_MAX_BIO_SIZE);
1135
1136         /* We may ignore peer limits if the peer is modern enough.
1137            Because new from 8.3.8 onwards the peer can use multiple
1138            BIOs for a single peer_request */
1139         if (device->state.conn >= C_WF_REPORT_PARAMS) {
1140                 if (first_peer_device(device)->connection->agreed_pro_version < 94)
1141                         peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1142                         /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1143                 else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1144                         peer = DRBD_MAX_SIZE_H80_PACKET;
1145                 else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1146                         peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
1147                 else
1148                         peer = DRBD_MAX_BIO_SIZE;
1149         }
1150
1151         new = min(local, peer);
1152
1153         if (device->state.role == R_PRIMARY && new < now)
1154                 drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1155
1156         if (new != now)
1157                 drbd_info(device, "max BIO size = %u\n", new);
1158
1159         drbd_setup_queue_param(device, new);
1160 }
1161
1162 /* Starts the worker thread */
1163 static void conn_reconfig_start(struct drbd_connection *connection)
1164 {
1165         drbd_thread_start(&connection->worker);
1166         drbd_flush_workqueue(&connection->sender_work);
1167 }
1168
1169 /* if still unconfigured, stops worker again. */
1170 static void conn_reconfig_done(struct drbd_connection *connection)
1171 {
1172         bool stop_threads;
1173         spin_lock_irq(&connection->resource->req_lock);
1174         stop_threads = conn_all_vols_unconf(connection) &&
1175                 connection->cstate == C_STANDALONE;
1176         spin_unlock_irq(&connection->resource->req_lock);
1177         if (stop_threads) {
1178                 /* asender is implicitly stopped by receiver
1179                  * in conn_disconnect() */
1180                 drbd_thread_stop(&connection->receiver);
1181                 drbd_thread_stop(&connection->worker);
1182         }
1183 }
1184
1185 /* Make sure IO is suspended before calling this function(). */
1186 static void drbd_suspend_al(struct drbd_device *device)
1187 {
1188         int s = 0;
1189
1190         if (!lc_try_lock(device->act_log)) {
1191                 drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1192                 return;
1193         }
1194
1195         drbd_al_shrink(device);
1196         spin_lock_irq(&device->resource->req_lock);
1197         if (device->state.conn < C_CONNECTED)
1198                 s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1199         spin_unlock_irq(&device->resource->req_lock);
1200         lc_unlock(device->act_log);
1201
1202         if (s)
1203                 drbd_info(device, "Suspended AL updates\n");
1204 }
1205
1206
1207 static bool should_set_defaults(struct genl_info *info)
1208 {
1209         unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1210         return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1211 }
1212
1213 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1214 {
1215         /* This is limited by 16 bit "slot" numbers,
1216          * and by available on-disk context storage.
1217          *
1218          * Also (u16)~0 is special (denotes a "free" extent).
1219          *
1220          * One transaction occupies one 4kB on-disk block,
1221          * we have n such blocks in the on disk ring buffer,
1222          * the "current" transaction may fail (n-1),
1223          * and there is 919 slot numbers context information per transaction.
1224          *
1225          * 72 transaction blocks amounts to more than 2**16 context slots,
1226          * so cap there first.
1227          */
1228         const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1229         const unsigned int sufficient_on_disk =
1230                 (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1231                 /AL_CONTEXT_PER_TRANSACTION;
1232
1233         unsigned int al_size_4k = bdev->md.al_size_4k;
1234
1235         if (al_size_4k > sufficient_on_disk)
1236                 return max_al_nr;
1237
1238         return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1239 }
1240
1241 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1242 {
1243         struct drbd_config_context adm_ctx;
1244         enum drbd_ret_code retcode;
1245         struct drbd_device *device;
1246         struct disk_conf *new_disk_conf, *old_disk_conf;
1247         struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1248         int err, fifo_size;
1249
1250         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1251         if (!adm_ctx.reply_skb)
1252                 return retcode;
1253         if (retcode != NO_ERROR)
1254                 goto out;
1255
1256         device = adm_ctx.device;
1257
1258         /* we also need a disk
1259          * to change the options on */
1260         if (!get_ldev(device)) {
1261                 retcode = ERR_NO_DISK;
1262                 goto out;
1263         }
1264
1265         new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1266         if (!new_disk_conf) {
1267                 retcode = ERR_NOMEM;
1268                 goto fail;
1269         }
1270
1271         mutex_lock(&device->resource->conf_update);
1272         old_disk_conf = device->ldev->disk_conf;
1273         *new_disk_conf = *old_disk_conf;
1274         if (should_set_defaults(info))
1275                 set_disk_conf_defaults(new_disk_conf);
1276
1277         err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1278         if (err && err != -ENOMSG) {
1279                 retcode = ERR_MANDATORY_TAG;
1280                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1281                 goto fail_unlock;
1282         }
1283
1284         if (!expect(new_disk_conf->resync_rate >= 1))
1285                 new_disk_conf->resync_rate = 1;
1286
1287         if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1288                 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1289         if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
1290                 new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
1291
1292         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1293                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1294
1295         fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1296         if (fifo_size != device->rs_plan_s->size) {
1297                 new_plan = fifo_alloc(fifo_size);
1298                 if (!new_plan) {
1299                         drbd_err(device, "kmalloc of fifo_buffer failed");
1300                         retcode = ERR_NOMEM;
1301                         goto fail_unlock;
1302                 }
1303         }
1304
1305         drbd_suspend_io(device);
1306         wait_event(device->al_wait, lc_try_lock(device->act_log));
1307         drbd_al_shrink(device);
1308         err = drbd_check_al_size(device, new_disk_conf);
1309         lc_unlock(device->act_log);
1310         wake_up(&device->al_wait);
1311         drbd_resume_io(device);
1312
1313         if (err) {
1314                 retcode = ERR_NOMEM;
1315                 goto fail_unlock;
1316         }
1317
1318         write_lock_irq(&global_state_lock);
1319         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1320         if (retcode == NO_ERROR) {
1321                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1322                 drbd_resync_after_changed(device);
1323         }
1324         write_unlock_irq(&global_state_lock);
1325
1326         if (retcode != NO_ERROR)
1327                 goto fail_unlock;
1328
1329         if (new_plan) {
1330                 old_plan = device->rs_plan_s;
1331                 rcu_assign_pointer(device->rs_plan_s, new_plan);
1332         }
1333
1334         mutex_unlock(&device->resource->conf_update);
1335
1336         if (new_disk_conf->al_updates)
1337                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1338         else
1339                 device->ldev->md.flags |= MDF_AL_DISABLED;
1340
1341         if (new_disk_conf->md_flushes)
1342                 clear_bit(MD_NO_FUA, &device->flags);
1343         else
1344                 set_bit(MD_NO_FUA, &device->flags);
1345
1346         drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1347
1348         drbd_md_sync(device);
1349
1350         if (device->state.conn >= C_CONNECTED) {
1351                 struct drbd_peer_device *peer_device;
1352
1353                 for_each_peer_device(peer_device, device)
1354                         drbd_send_sync_param(peer_device);
1355         }
1356
1357         synchronize_rcu();
1358         kfree(old_disk_conf);
1359         kfree(old_plan);
1360         mod_timer(&device->request_timer, jiffies + HZ);
1361         goto success;
1362
1363 fail_unlock:
1364         mutex_unlock(&device->resource->conf_update);
1365  fail:
1366         kfree(new_disk_conf);
1367         kfree(new_plan);
1368 success:
1369         put_ldev(device);
1370  out:
1371         drbd_adm_finish(&adm_ctx, info, retcode);
1372         return 0;
1373 }
1374
1375 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1376 {
1377         struct drbd_config_context adm_ctx;
1378         struct drbd_device *device;
1379         int err;
1380         enum drbd_ret_code retcode;
1381         enum determine_dev_size dd;
1382         sector_t max_possible_sectors;
1383         sector_t min_md_device_sectors;
1384         struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1385         struct disk_conf *new_disk_conf = NULL;
1386         struct block_device *bdev;
1387         struct lru_cache *resync_lru = NULL;
1388         struct fifo_buffer *new_plan = NULL;
1389         union drbd_state ns, os;
1390         enum drbd_state_rv rv;
1391         struct net_conf *nc;
1392
1393         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1394         if (!adm_ctx.reply_skb)
1395                 return retcode;
1396         if (retcode != NO_ERROR)
1397                 goto finish;
1398
1399         device = adm_ctx.device;
1400         conn_reconfig_start(first_peer_device(device)->connection);
1401
1402         /* if you want to reconfigure, please tear down first */
1403         if (device->state.disk > D_DISKLESS) {
1404                 retcode = ERR_DISK_CONFIGURED;
1405                 goto fail;
1406         }
1407         /* It may just now have detached because of IO error.  Make sure
1408          * drbd_ldev_destroy is done already, we may end up here very fast,
1409          * e.g. if someone calls attach from the on-io-error handler,
1410          * to realize a "hot spare" feature (not that I'd recommend that) */
1411         wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
1412
1413         /* make sure there is no leftover from previous force-detach attempts */
1414         clear_bit(FORCE_DETACH, &device->flags);
1415         clear_bit(WAS_IO_ERROR, &device->flags);
1416         clear_bit(WAS_READ_ERROR, &device->flags);
1417
1418         /* and no leftover from previously aborted resync or verify, either */
1419         device->rs_total = 0;
1420         device->rs_failed = 0;
1421         atomic_set(&device->rs_pending_cnt, 0);
1422
1423         /* allocation not in the IO path, drbdsetup context */
1424         nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1425         if (!nbc) {
1426                 retcode = ERR_NOMEM;
1427                 goto fail;
1428         }
1429         spin_lock_init(&nbc->md.uuid_lock);
1430
1431         new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1432         if (!new_disk_conf) {
1433                 retcode = ERR_NOMEM;
1434                 goto fail;
1435         }
1436         nbc->disk_conf = new_disk_conf;
1437
1438         set_disk_conf_defaults(new_disk_conf);
1439         err = disk_conf_from_attrs(new_disk_conf, info);
1440         if (err) {
1441                 retcode = ERR_MANDATORY_TAG;
1442                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1443                 goto fail;
1444         }
1445
1446         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1447                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1448
1449         new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1450         if (!new_plan) {
1451                 retcode = ERR_NOMEM;
1452                 goto fail;
1453         }
1454
1455         if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1456                 retcode = ERR_MD_IDX_INVALID;
1457                 goto fail;
1458         }
1459
1460         write_lock_irq(&global_state_lock);
1461         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1462         write_unlock_irq(&global_state_lock);
1463         if (retcode != NO_ERROR)
1464                 goto fail;
1465
1466         rcu_read_lock();
1467         nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
1468         if (nc) {
1469                 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1470                         rcu_read_unlock();
1471                         retcode = ERR_STONITH_AND_PROT_A;
1472                         goto fail;
1473                 }
1474         }
1475         rcu_read_unlock();
1476
1477         bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1478                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
1479         if (IS_ERR(bdev)) {
1480                 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1481                         PTR_ERR(bdev));
1482                 retcode = ERR_OPEN_DISK;
1483                 goto fail;
1484         }
1485         nbc->backing_bdev = bdev;
1486
1487         /*
1488          * meta_dev_idx >= 0: external fixed size, possibly multiple
1489          * drbd sharing one meta device.  TODO in that case, paranoia
1490          * check that [md_bdev, meta_dev_idx] is not yet used by some
1491          * other drbd minor!  (if you use drbd.conf + drbdadm, that
1492          * should check it for you already; but if you don't, or
1493          * someone fooled it, we need to double check here)
1494          */
1495         bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1496                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1497                                   (new_disk_conf->meta_dev_idx < 0) ?
1498                                   (void *)device : (void *)drbd_m_holder);
1499         if (IS_ERR(bdev)) {
1500                 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1501                         PTR_ERR(bdev));
1502                 retcode = ERR_OPEN_MD_DISK;
1503                 goto fail;
1504         }
1505         nbc->md_bdev = bdev;
1506
1507         if ((nbc->backing_bdev == nbc->md_bdev) !=
1508             (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1509              new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1510                 retcode = ERR_MD_IDX_INVALID;
1511                 goto fail;
1512         }
1513
1514         resync_lru = lc_create("resync", drbd_bm_ext_cache,
1515                         1, 61, sizeof(struct bm_extent),
1516                         offsetof(struct bm_extent, lce));
1517         if (!resync_lru) {
1518                 retcode = ERR_NOMEM;
1519                 goto fail;
1520         }
1521
1522         /* Read our meta data super block early.
1523          * This also sets other on-disk offsets. */
1524         retcode = drbd_md_read(device, nbc);
1525         if (retcode != NO_ERROR)
1526                 goto fail;
1527
1528         if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1529                 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1530         if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
1531                 new_disk_conf->al_extents = drbd_al_extents_max(nbc);
1532
1533         if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1534                 drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1535                         (unsigned long long) drbd_get_max_capacity(nbc),
1536                         (unsigned long long) new_disk_conf->disk_size);
1537                 retcode = ERR_DISK_TOO_SMALL;
1538                 goto fail;
1539         }
1540
1541         if (new_disk_conf->meta_dev_idx < 0) {
1542                 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1543                 /* at least one MB, otherwise it does not make sense */
1544                 min_md_device_sectors = (2<<10);
1545         } else {
1546                 max_possible_sectors = DRBD_MAX_SECTORS;
1547                 min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1548         }
1549
1550         if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1551                 retcode = ERR_MD_DISK_TOO_SMALL;
1552                 drbd_warn(device, "refusing attach: md-device too small, "
1553                      "at least %llu sectors needed for this meta-disk type\n",
1554                      (unsigned long long) min_md_device_sectors);
1555                 goto fail;
1556         }
1557
1558         /* Make sure the new disk is big enough
1559          * (we may currently be R_PRIMARY with no local disk...) */
1560         if (drbd_get_max_capacity(nbc) <
1561             drbd_get_capacity(device->this_bdev)) {
1562                 retcode = ERR_DISK_TOO_SMALL;
1563                 goto fail;
1564         }
1565
1566         nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1567
1568         if (nbc->known_size > max_possible_sectors) {
1569                 drbd_warn(device, "==> truncating very big lower level device "
1570                         "to currently maximum possible %llu sectors <==\n",
1571                         (unsigned long long) max_possible_sectors);
1572                 if (new_disk_conf->meta_dev_idx >= 0)
1573                         drbd_warn(device, "==>> using internal or flexible "
1574                                       "meta data may help <<==\n");
1575         }
1576
1577         drbd_suspend_io(device);
1578         /* also wait for the last barrier ack. */
1579         /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1580          * We need a way to either ignore barrier acks for barriers sent before a device
1581          * was attached, or a way to wait for all pending barrier acks to come in.
1582          * As barriers are counted per resource,
1583          * we'd need to suspend io on all devices of a resource.
1584          */
1585         wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1586         /* and for any other previously queued work */
1587         drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
1588
1589         rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1590         retcode = rv;  /* FIXME: Type mismatch. */
1591         drbd_resume_io(device);
1592         if (rv < SS_SUCCESS)
1593                 goto fail;
1594
1595         if (!get_ldev_if_state(device, D_ATTACHING))
1596                 goto force_diskless;
1597
1598         if (!device->bitmap) {
1599                 if (drbd_bm_init(device)) {
1600                         retcode = ERR_NOMEM;
1601                         goto force_diskless_dec;
1602                 }
1603         }
1604
1605         if (device->state.conn < C_CONNECTED &&
1606             device->state.role == R_PRIMARY &&
1607             (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1608                 drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1609                     (unsigned long long)device->ed_uuid);
1610                 retcode = ERR_DATA_NOT_CURRENT;
1611                 goto force_diskless_dec;
1612         }
1613
1614         /* Since we are diskless, fix the activity log first... */
1615         if (drbd_check_al_size(device, new_disk_conf)) {
1616                 retcode = ERR_NOMEM;
1617                 goto force_diskless_dec;
1618         }
1619
1620         /* Prevent shrinking of consistent devices ! */
1621         if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1622             drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1623                 drbd_warn(device, "refusing to truncate a consistent device\n");
1624                 retcode = ERR_DISK_TOO_SMALL;
1625                 goto force_diskless_dec;
1626         }
1627
1628         /* Reset the "barriers don't work" bits here, then force meta data to
1629          * be written, to ensure we determine if barriers are supported. */
1630         if (new_disk_conf->md_flushes)
1631                 clear_bit(MD_NO_FUA, &device->flags);
1632         else
1633                 set_bit(MD_NO_FUA, &device->flags);
1634
1635         /* Point of no return reached.
1636          * Devices and memory are no longer released by error cleanup below.
1637          * now device takes over responsibility, and the state engine should
1638          * clean it up somewhere.  */
1639         D_ASSERT(device, device->ldev == NULL);
1640         device->ldev = nbc;
1641         device->resync = resync_lru;
1642         device->rs_plan_s = new_plan;
1643         nbc = NULL;
1644         resync_lru = NULL;
1645         new_disk_conf = NULL;
1646         new_plan = NULL;
1647
1648         drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1649
1650         if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1651                 set_bit(CRASHED_PRIMARY, &device->flags);
1652         else
1653                 clear_bit(CRASHED_PRIMARY, &device->flags);
1654
1655         if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1656             !(device->state.role == R_PRIMARY && device->resource->susp_nod))
1657                 set_bit(CRASHED_PRIMARY, &device->flags);
1658
1659         device->send_cnt = 0;
1660         device->recv_cnt = 0;
1661         device->read_cnt = 0;
1662         device->writ_cnt = 0;
1663
1664         drbd_reconsider_max_bio_size(device);
1665
1666         /* If I am currently not R_PRIMARY,
1667          * but meta data primary indicator is set,
1668          * I just now recover from a hard crash,
1669          * and have been R_PRIMARY before that crash.
1670          *
1671          * Now, if I had no connection before that crash
1672          * (have been degraded R_PRIMARY), chances are that
1673          * I won't find my peer now either.
1674          *
1675          * In that case, and _only_ in that case,
1676          * we use the degr-wfc-timeout instead of the default,
1677          * so we can automatically recover from a crash of a
1678          * degraded but active "cluster" after a certain timeout.
1679          */
1680         clear_bit(USE_DEGR_WFC_T, &device->flags);
1681         if (device->state.role != R_PRIMARY &&
1682              drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1683             !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
1684                 set_bit(USE_DEGR_WFC_T, &device->flags);
1685
1686         dd = drbd_determine_dev_size(device, 0, NULL);
1687         if (dd <= DS_ERROR) {
1688                 retcode = ERR_NOMEM_BITMAP;
1689                 goto force_diskless_dec;
1690         } else if (dd == DS_GREW)
1691                 set_bit(RESYNC_AFTER_NEG, &device->flags);
1692
1693         if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
1694             (test_bit(CRASHED_PRIMARY, &device->flags) &&
1695              drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
1696                 drbd_info(device, "Assuming that all blocks are out of sync "
1697                      "(aka FullSync)\n");
1698                 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
1699                         "set_n_write from attaching", BM_LOCKED_MASK)) {
1700                         retcode = ERR_IO_MD_DISK;
1701                         goto force_diskless_dec;
1702                 }
1703         } else {
1704                 if (drbd_bitmap_io(device, &drbd_bm_read,
1705                         "read from attaching", BM_LOCKED_MASK)) {
1706                         retcode = ERR_IO_MD_DISK;
1707                         goto force_diskless_dec;
1708                 }
1709         }
1710
1711         if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
1712                 drbd_suspend_al(device); /* IO is still suspended here... */
1713
1714         spin_lock_irq(&device->resource->req_lock);
1715         os = drbd_read_state(device);
1716         ns = os;
1717         /* If MDF_CONSISTENT is not set go into inconsistent state,
1718            otherwise investigate MDF_WasUpToDate...
1719            If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1720            otherwise into D_CONSISTENT state.
1721         */
1722         if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
1723                 if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
1724                         ns.disk = D_CONSISTENT;
1725                 else
1726                         ns.disk = D_OUTDATED;
1727         } else {
1728                 ns.disk = D_INCONSISTENT;
1729         }
1730
1731         if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
1732                 ns.pdsk = D_OUTDATED;
1733
1734         rcu_read_lock();
1735         if (ns.disk == D_CONSISTENT &&
1736             (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
1737                 ns.disk = D_UP_TO_DATE;
1738
1739         /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1740            MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1741            this point, because drbd_request_state() modifies these
1742            flags. */
1743
1744         if (rcu_dereference(device->ldev->disk_conf)->al_updates)
1745                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1746         else
1747                 device->ldev->md.flags |= MDF_AL_DISABLED;
1748
1749         rcu_read_unlock();
1750
1751         /* In case we are C_CONNECTED postpone any decision on the new disk
1752            state after the negotiation phase. */
1753         if (device->state.conn == C_CONNECTED) {
1754                 device->new_state_tmp.i = ns.i;
1755                 ns.i = os.i;
1756                 ns.disk = D_NEGOTIATING;
1757
1758                 /* We expect to receive up-to-date UUIDs soon.
1759                    To avoid a race in receive_state, free p_uuid while
1760                    holding req_lock. I.e. atomic with the state change */
1761                 kfree(device->p_uuid);
1762                 device->p_uuid = NULL;
1763         }
1764
1765         rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1766         spin_unlock_irq(&device->resource->req_lock);
1767
1768         if (rv < SS_SUCCESS)
1769                 goto force_diskless_dec;
1770
1771         mod_timer(&device->request_timer, jiffies + HZ);
1772
1773         if (device->state.role == R_PRIMARY)
1774                 device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
1775         else
1776                 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1777
1778         drbd_md_mark_dirty(device);
1779         drbd_md_sync(device);
1780
1781         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1782         put_ldev(device);
1783         conn_reconfig_done(first_peer_device(device)->connection);
1784         drbd_adm_finish(&adm_ctx, info, retcode);
1785         return 0;
1786
1787  force_diskless_dec:
1788         put_ldev(device);
1789  force_diskless:
1790         drbd_force_state(device, NS(disk, D_DISKLESS));
1791         drbd_md_sync(device);
1792  fail:
1793         conn_reconfig_done(first_peer_device(device)->connection);
1794         if (nbc) {
1795                 if (nbc->backing_bdev)
1796                         blkdev_put(nbc->backing_bdev,
1797                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1798                 if (nbc->md_bdev)
1799                         blkdev_put(nbc->md_bdev,
1800                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1801                 kfree(nbc);
1802         }
1803         kfree(new_disk_conf);
1804         lc_destroy(resync_lru);
1805         kfree(new_plan);
1806
1807  finish:
1808         drbd_adm_finish(&adm_ctx, info, retcode);
1809         return 0;
1810 }
1811
1812 static int adm_detach(struct drbd_device *device, int force)
1813 {
1814         enum drbd_state_rv retcode;
1815         int ret;
1816
1817         if (force) {
1818                 set_bit(FORCE_DETACH, &device->flags);
1819                 drbd_force_state(device, NS(disk, D_FAILED));
1820                 retcode = SS_SUCCESS;
1821                 goto out;
1822         }
1823
1824         drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1825         drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
1826         retcode = drbd_request_state(device, NS(disk, D_FAILED));
1827         drbd_md_put_buffer(device);
1828         /* D_FAILED will transition to DISKLESS. */
1829         ret = wait_event_interruptible(device->misc_wait,
1830                         device->state.disk != D_FAILED);
1831         drbd_resume_io(device);
1832         if ((int)retcode == (int)SS_IS_DISKLESS)
1833                 retcode = SS_NOTHING_TO_DO;
1834         if (ret)
1835                 retcode = ERR_INTR;
1836 out:
1837         return retcode;
1838 }
1839
1840 /* Detaching the disk is a process in multiple stages.  First we need to lock
1841  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1842  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1843  * internal references as well.
1844  * Only then we have finally detached. */
1845 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1846 {
1847         struct drbd_config_context adm_ctx;
1848         enum drbd_ret_code retcode;
1849         struct detach_parms parms = { };
1850         int err;
1851
1852         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1853         if (!adm_ctx.reply_skb)
1854                 return retcode;
1855         if (retcode != NO_ERROR)
1856                 goto out;
1857
1858         if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1859                 err = detach_parms_from_attrs(&parms, info);
1860                 if (err) {
1861                         retcode = ERR_MANDATORY_TAG;
1862                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1863                         goto out;
1864                 }
1865         }
1866
1867         retcode = adm_detach(adm_ctx.device, parms.force_detach);
1868 out:
1869         drbd_adm_finish(&adm_ctx, info, retcode);
1870         return 0;
1871 }
1872
1873 static bool conn_resync_running(struct drbd_connection *connection)
1874 {
1875         struct drbd_peer_device *peer_device;
1876         bool rv = false;
1877         int vnr;
1878
1879         rcu_read_lock();
1880         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1881                 struct drbd_device *device = peer_device->device;
1882                 if (device->state.conn == C_SYNC_SOURCE ||
1883                     device->state.conn == C_SYNC_TARGET ||
1884                     device->state.conn == C_PAUSED_SYNC_S ||
1885                     device->state.conn == C_PAUSED_SYNC_T) {
1886                         rv = true;
1887                         break;
1888                 }
1889         }
1890         rcu_read_unlock();
1891
1892         return rv;
1893 }
1894
1895 static bool conn_ov_running(struct drbd_connection *connection)
1896 {
1897         struct drbd_peer_device *peer_device;
1898         bool rv = false;
1899         int vnr;
1900
1901         rcu_read_lock();
1902         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1903                 struct drbd_device *device = peer_device->device;
1904                 if (device->state.conn == C_VERIFY_S ||
1905                     device->state.conn == C_VERIFY_T) {
1906                         rv = true;
1907                         break;
1908                 }
1909         }
1910         rcu_read_unlock();
1911
1912         return rv;
1913 }
1914
1915 static enum drbd_ret_code
1916 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
1917 {
1918         struct drbd_peer_device *peer_device;
1919         int i;
1920
1921         if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
1922                 if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
1923                         return ERR_NEED_APV_100;
1924
1925                 if (new_net_conf->two_primaries != old_net_conf->two_primaries)
1926                         return ERR_NEED_APV_100;
1927
1928                 if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
1929                         return ERR_NEED_APV_100;
1930         }
1931
1932         if (!new_net_conf->two_primaries &&
1933             conn_highest_role(connection) == R_PRIMARY &&
1934             conn_highest_peer(connection) == R_PRIMARY)
1935                 return ERR_NEED_ALLOW_TWO_PRI;
1936
1937         if (new_net_conf->two_primaries &&
1938             (new_net_conf->wire_protocol != DRBD_PROT_C))
1939                 return ERR_NOT_PROTO_C;
1940
1941         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
1942                 struct drbd_device *device = peer_device->device;
1943                 if (get_ldev(device)) {
1944                         enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
1945                         put_ldev(device);
1946                         if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
1947                                 return ERR_STONITH_AND_PROT_A;
1948                 }
1949                 if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
1950                         return ERR_DISCARD_IMPOSSIBLE;
1951         }
1952
1953         if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
1954                 return ERR_CONG_NOT_PROTO_A;
1955
1956         return NO_ERROR;
1957 }
1958
1959 static enum drbd_ret_code
1960 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
1961 {
1962         static enum drbd_ret_code rv;
1963         struct drbd_peer_device *peer_device;
1964         int i;
1965
1966         rcu_read_lock();
1967         rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
1968         rcu_read_unlock();
1969
1970         /* connection->volumes protected by genl_lock() here */
1971         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
1972                 struct drbd_device *device = peer_device->device;
1973                 if (!device->bitmap) {
1974                         if (drbd_bm_init(device))
1975                                 return ERR_NOMEM;
1976                 }
1977         }
1978
1979         return rv;
1980 }
1981
1982 struct crypto {
1983         struct crypto_hash *verify_tfm;
1984         struct crypto_hash *csums_tfm;
1985         struct crypto_hash *cram_hmac_tfm;
1986         struct crypto_hash *integrity_tfm;
1987 };
1988
1989 static int
1990 alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
1991 {
1992         if (!tfm_name[0])
1993                 return NO_ERROR;
1994
1995         *tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
1996         if (IS_ERR(*tfm)) {
1997                 *tfm = NULL;
1998                 return err_alg;
1999         }
2000
2001         return NO_ERROR;
2002 }
2003
2004 static enum drbd_ret_code
2005 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2006 {
2007         char hmac_name[CRYPTO_MAX_ALG_NAME];
2008         enum drbd_ret_code rv;
2009
2010         rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg,
2011                        ERR_CSUMS_ALG);
2012         if (rv != NO_ERROR)
2013                 return rv;
2014         rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg,
2015                        ERR_VERIFY_ALG);
2016         if (rv != NO_ERROR)
2017                 return rv;
2018         rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2019                        ERR_INTEGRITY_ALG);
2020         if (rv != NO_ERROR)
2021                 return rv;
2022         if (new_net_conf->cram_hmac_alg[0] != 0) {
2023                 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2024                          new_net_conf->cram_hmac_alg);
2025
2026                 rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
2027                                ERR_AUTH_ALG);
2028         }
2029
2030         return rv;
2031 }
2032
2033 static void free_crypto(struct crypto *crypto)
2034 {
2035         crypto_free_hash(crypto->cram_hmac_tfm);
2036         crypto_free_hash(crypto->integrity_tfm);
2037         crypto_free_hash(crypto->csums_tfm);
2038         crypto_free_hash(crypto->verify_tfm);
2039 }
2040
2041 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2042 {
2043         struct drbd_config_context adm_ctx;
2044         enum drbd_ret_code retcode;
2045         struct drbd_connection *connection;
2046         struct net_conf *old_net_conf, *new_net_conf = NULL;
2047         int err;
2048         int ovr; /* online verify running */
2049         int rsr; /* re-sync running */
2050         struct crypto crypto = { };
2051
2052         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2053         if (!adm_ctx.reply_skb)
2054                 return retcode;
2055         if (retcode != NO_ERROR)
2056                 goto out;
2057
2058         connection = adm_ctx.connection;
2059
2060         new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2061         if (!new_net_conf) {
2062                 retcode = ERR_NOMEM;
2063                 goto out;
2064         }
2065
2066         conn_reconfig_start(connection);
2067
2068         mutex_lock(&connection->data.mutex);
2069         mutex_lock(&connection->resource->conf_update);
2070         old_net_conf = connection->net_conf;
2071
2072         if (!old_net_conf) {
2073                 drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
2074                 retcode = ERR_INVALID_REQUEST;
2075                 goto fail;
2076         }
2077
2078         *new_net_conf = *old_net_conf;
2079         if (should_set_defaults(info))
2080                 set_net_conf_defaults(new_net_conf);
2081
2082         err = net_conf_from_attrs_for_change(new_net_conf, info);
2083         if (err && err != -ENOMSG) {
2084                 retcode = ERR_MANDATORY_TAG;
2085                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2086                 goto fail;
2087         }
2088
2089         retcode = check_net_options(connection, new_net_conf);
2090         if (retcode != NO_ERROR)
2091                 goto fail;
2092
2093         /* re-sync running */
2094         rsr = conn_resync_running(connection);
2095         if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2096                 retcode = ERR_CSUMS_RESYNC_RUNNING;
2097                 goto fail;
2098         }
2099
2100         /* online verify running */
2101         ovr = conn_ov_running(connection);
2102         if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2103                 retcode = ERR_VERIFY_RUNNING;
2104                 goto fail;
2105         }
2106
2107         retcode = alloc_crypto(&crypto, new_net_conf);
2108         if (retcode != NO_ERROR)
2109                 goto fail;
2110
2111         rcu_assign_pointer(connection->net_conf, new_net_conf);
2112
2113         if (!rsr) {
2114                 crypto_free_hash(connection->csums_tfm);
2115                 connection->csums_tfm = crypto.csums_tfm;
2116                 crypto.csums_tfm = NULL;
2117         }
2118         if (!ovr) {
2119                 crypto_free_hash(connection->verify_tfm);
2120                 connection->verify_tfm = crypto.verify_tfm;
2121                 crypto.verify_tfm = NULL;
2122         }
2123
2124         crypto_free_hash(connection->integrity_tfm);
2125         connection->integrity_tfm = crypto.integrity_tfm;
2126         if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2127                 /* Do this without trying to take connection->data.mutex again.  */
2128                 __drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2129
2130         crypto_free_hash(connection->cram_hmac_tfm);
2131         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2132
2133         mutex_unlock(&connection->resource->conf_update);
2134         mutex_unlock(&connection->data.mutex);
2135         synchronize_rcu();
2136         kfree(old_net_conf);
2137
2138         if (connection->cstate >= C_WF_REPORT_PARAMS) {
2139                 struct drbd_peer_device *peer_device;
2140                 int vnr;
2141
2142                 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2143                         drbd_send_sync_param(peer_device);
2144         }
2145
2146         goto done;
2147
2148  fail:
2149         mutex_unlock(&connection->resource->conf_update);
2150         mutex_unlock(&connection->data.mutex);
2151         free_crypto(&crypto);
2152         kfree(new_net_conf);
2153  done:
2154         conn_reconfig_done(connection);
2155  out:
2156         drbd_adm_finish(&adm_ctx, info, retcode);
2157         return 0;
2158 }
2159
2160 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2161 {
2162         struct drbd_config_context adm_ctx;
2163         struct drbd_peer_device *peer_device;
2164         struct net_conf *old_net_conf, *new_net_conf = NULL;
2165         struct crypto crypto = { };
2166         struct drbd_resource *resource;
2167         struct drbd_connection *connection;
2168         enum drbd_ret_code retcode;
2169         int i;
2170         int err;
2171
2172         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2173
2174         if (!adm_ctx.reply_skb)
2175                 return retcode;
2176         if (retcode != NO_ERROR)
2177                 goto out;
2178         if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2179                 drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
2180                 retcode = ERR_INVALID_REQUEST;
2181                 goto out;
2182         }
2183
2184         /* No need for _rcu here. All reconfiguration is
2185          * strictly serialized on genl_lock(). We are protected against
2186          * concurrent reconfiguration/addition/deletion */
2187         for_each_resource(resource, &drbd_resources) {
2188                 for_each_connection(connection, resource) {
2189                         if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2190                             !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2191                                     connection->my_addr_len)) {
2192                                 retcode = ERR_LOCAL_ADDR;
2193                                 goto out;
2194                         }
2195
2196                         if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2197                             !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2198                                     connection->peer_addr_len)) {
2199                                 retcode = ERR_PEER_ADDR;
2200                                 goto out;
2201                         }
2202                 }
2203         }
2204
2205         connection = first_connection(adm_ctx.resource);
2206         conn_reconfig_start(connection);
2207
2208         if (connection->cstate > C_STANDALONE) {
2209                 retcode = ERR_NET_CONFIGURED;
2210                 goto fail;
2211         }
2212
2213         /* allocation not in the IO path, drbdsetup / netlink process context */
2214         new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2215         if (!new_net_conf) {
2216                 retcode = ERR_NOMEM;
2217                 goto fail;
2218         }
2219
2220         set_net_conf_defaults(new_net_conf);
2221
2222         err = net_conf_from_attrs(new_net_conf, info);
2223         if (err && err != -ENOMSG) {
2224                 retcode = ERR_MANDATORY_TAG;
2225                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2226                 goto fail;
2227         }
2228
2229         retcode = check_net_options(connection, new_net_conf);
2230         if (retcode != NO_ERROR)
2231                 goto fail;
2232
2233         retcode = alloc_crypto(&crypto, new_net_conf);
2234         if (retcode != NO_ERROR)
2235                 goto fail;
2236
2237         ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2238
2239         drbd_flush_workqueue(&connection->sender_work);
2240
2241         mutex_lock(&adm_ctx.resource->conf_update);
2242         old_net_conf = connection->net_conf;
2243         if (old_net_conf) {
2244                 retcode = ERR_NET_CONFIGURED;
2245                 mutex_unlock(&adm_ctx.resource->conf_update);
2246                 goto fail;
2247         }
2248         rcu_assign_pointer(connection->net_conf, new_net_conf);
2249
2250         conn_free_crypto(connection);
2251         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2252         connection->integrity_tfm = crypto.integrity_tfm;
2253         connection->csums_tfm = crypto.csums_tfm;
2254         connection->verify_tfm = crypto.verify_tfm;
2255
2256         connection->my_addr_len = nla_len(adm_ctx.my_addr);
2257         memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2258         connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2259         memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2260
2261         mutex_unlock(&adm_ctx.resource->conf_update);
2262
2263         rcu_read_lock();
2264         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2265                 struct drbd_device *device = peer_device->device;
2266                 device->send_cnt = 0;
2267                 device->recv_cnt = 0;
2268         }
2269         rcu_read_unlock();
2270
2271         retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2272
2273         conn_reconfig_done(connection);
2274         drbd_adm_finish(&adm_ctx, info, retcode);
2275         return 0;
2276
2277 fail:
2278         free_crypto(&crypto);
2279         kfree(new_net_conf);
2280
2281         conn_reconfig_done(connection);
2282 out:
2283         drbd_adm_finish(&adm_ctx, info, retcode);
2284         return 0;
2285 }
2286
2287 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2288 {
2289         enum drbd_state_rv rv;
2290
2291         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2292                         force ? CS_HARD : 0);
2293
2294         switch (rv) {
2295         case SS_NOTHING_TO_DO:
2296                 break;
2297         case SS_ALREADY_STANDALONE:
2298                 return SS_SUCCESS;
2299         case SS_PRIMARY_NOP:
2300                 /* Our state checking code wants to see the peer outdated. */
2301                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2302
2303                 if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2304                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2305
2306                 break;
2307         case SS_CW_FAILED_BY_PEER:
2308                 /* The peer probably wants to see us outdated. */
2309                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2310                                                         disk, D_OUTDATED), 0);
2311                 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2312                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2313                                         CS_HARD);
2314                 }
2315                 break;
2316         default:;
2317                 /* no special handling necessary */
2318         }
2319
2320         if (rv >= SS_SUCCESS) {
2321                 enum drbd_state_rv rv2;
2322                 /* No one else can reconfigure the network while I am here.
2323                  * The state handling only uses drbd_thread_stop_nowait(),
2324                  * we want to really wait here until the receiver is no more.
2325                  */
2326                 drbd_thread_stop(&connection->receiver);
2327
2328                 /* Race breaker.  This additional state change request may be
2329                  * necessary, if this was a forced disconnect during a receiver
2330                  * restart.  We may have "killed" the receiver thread just
2331                  * after drbd_receiver() returned.  Typically, we should be
2332                  * C_STANDALONE already, now, and this becomes a no-op.
2333                  */
2334                 rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2335                                 CS_VERBOSE | CS_HARD);
2336                 if (rv2 < SS_SUCCESS)
2337                         drbd_err(connection,
2338                                 "unexpected rv2=%d in conn_try_disconnect()\n",
2339                                 rv2);
2340         }
2341         return rv;
2342 }
2343
2344 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2345 {
2346         struct drbd_config_context adm_ctx;
2347         struct disconnect_parms parms;
2348         struct drbd_connection *connection;
2349         enum drbd_state_rv rv;
2350         enum drbd_ret_code retcode;
2351         int err;
2352
2353         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2354         if (!adm_ctx.reply_skb)
2355                 return retcode;
2356         if (retcode != NO_ERROR)
2357                 goto fail;
2358
2359         connection = adm_ctx.connection;
2360         memset(&parms, 0, sizeof(parms));
2361         if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2362                 err = disconnect_parms_from_attrs(&parms, info);
2363                 if (err) {
2364                         retcode = ERR_MANDATORY_TAG;
2365                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2366                         goto fail;
2367                 }
2368         }
2369
2370         rv = conn_try_disconnect(connection, parms.force_disconnect);
2371         if (rv < SS_SUCCESS)
2372                 retcode = rv;  /* FIXME: Type mismatch. */
2373         else
2374                 retcode = NO_ERROR;
2375  fail:
2376         drbd_adm_finish(&adm_ctx, info, retcode);
2377         return 0;
2378 }
2379
2380 void resync_after_online_grow(struct drbd_device *device)
2381 {
2382         int iass; /* I am sync source */
2383
2384         drbd_info(device, "Resync of new storage after online grow\n");
2385         if (device->state.role != device->state.peer)
2386                 iass = (device->state.role == R_PRIMARY);
2387         else
2388                 iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2389
2390         if (iass)
2391                 drbd_start_resync(device, C_SYNC_SOURCE);
2392         else
2393                 _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2394 }
2395
2396 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2397 {
2398         struct drbd_config_context adm_ctx;
2399         struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2400         struct resize_parms rs;
2401         struct drbd_device *device;
2402         enum drbd_ret_code retcode;
2403         enum determine_dev_size dd;
2404         bool change_al_layout = false;
2405         enum dds_flags ddsf;
2406         sector_t u_size;
2407         int err;
2408
2409         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2410         if (!adm_ctx.reply_skb)
2411                 return retcode;
2412         if (retcode != NO_ERROR)
2413                 goto fail;
2414
2415         device = adm_ctx.device;
2416         if (!get_ldev(device)) {
2417                 retcode = ERR_NO_DISK;
2418                 goto fail;
2419         }
2420
2421         memset(&rs, 0, sizeof(struct resize_parms));
2422         rs.al_stripes = device->ldev->md.al_stripes;
2423         rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2424         if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2425                 err = resize_parms_from_attrs(&rs, info);
2426                 if (err) {
2427                         retcode = ERR_MANDATORY_TAG;
2428                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2429                         goto fail_ldev;
2430                 }
2431         }
2432
2433         if (device->state.conn > C_CONNECTED) {
2434                 retcode = ERR_RESIZE_RESYNC;
2435                 goto fail_ldev;
2436         }
2437
2438         if (device->state.role == R_SECONDARY &&
2439             device->state.peer == R_SECONDARY) {
2440                 retcode = ERR_NO_PRIMARY;
2441                 goto fail_ldev;
2442         }
2443
2444         if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2445                 retcode = ERR_NEED_APV_93;
2446                 goto fail_ldev;
2447         }
2448
2449         rcu_read_lock();
2450         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2451         rcu_read_unlock();
2452         if (u_size != (sector_t)rs.resize_size) {
2453                 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2454                 if (!new_disk_conf) {
2455                         retcode = ERR_NOMEM;
2456                         goto fail_ldev;
2457                 }
2458         }
2459
2460         if (device->ldev->md.al_stripes != rs.al_stripes ||
2461             device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2462                 u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2463
2464                 if (al_size_k > (16 * 1024 * 1024)) {
2465                         retcode = ERR_MD_LAYOUT_TOO_BIG;
2466                         goto fail_ldev;
2467                 }
2468
2469                 if (al_size_k < MD_32kB_SECT/2) {
2470                         retcode = ERR_MD_LAYOUT_TOO_SMALL;
2471                         goto fail_ldev;
2472                 }
2473
2474                 if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2475                         retcode = ERR_MD_LAYOUT_CONNECTED;
2476                         goto fail_ldev;
2477                 }
2478
2479                 change_al_layout = true;
2480         }
2481
2482         if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2483                 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2484
2485         if (new_disk_conf) {
2486                 mutex_lock(&device->resource->conf_update);
2487                 old_disk_conf = device->ldev->disk_conf;
2488                 *new_disk_conf = *old_disk_conf;
2489                 new_disk_conf->disk_size = (sector_t)rs.resize_size;
2490                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2491                 mutex_unlock(&device->resource->conf_update);
2492                 synchronize_rcu();
2493                 kfree(old_disk_conf);
2494         }
2495
2496         ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2497         dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2498         drbd_md_sync(device);
2499         put_ldev(device);
2500         if (dd == DS_ERROR) {
2501                 retcode = ERR_NOMEM_BITMAP;
2502                 goto fail;
2503         } else if (dd == DS_ERROR_SPACE_MD) {
2504                 retcode = ERR_MD_LAYOUT_NO_FIT;
2505                 goto fail;
2506         } else if (dd == DS_ERROR_SHRINK) {
2507                 retcode = ERR_IMPLICIT_SHRINK;
2508                 goto fail;
2509         }
2510
2511         if (device->state.conn == C_CONNECTED) {
2512                 if (dd == DS_GREW)
2513                         set_bit(RESIZE_PENDING, &device->flags);
2514
2515                 drbd_send_uuids(first_peer_device(device));
2516                 drbd_send_sizes(first_peer_device(device), 1, ddsf);
2517         }
2518
2519  fail:
2520         drbd_adm_finish(&adm_ctx, info, retcode);
2521         return 0;
2522
2523  fail_ldev:
2524         put_ldev(device);
2525         goto fail;
2526 }
2527
2528 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2529 {
2530         struct drbd_config_context adm_ctx;
2531         enum drbd_ret_code retcode;
2532         struct res_opts res_opts;
2533         int err;
2534
2535         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2536         if (!adm_ctx.reply_skb)
2537                 return retcode;
2538         if (retcode != NO_ERROR)
2539                 goto fail;
2540
2541         res_opts = adm_ctx.resource->res_opts;
2542         if (should_set_defaults(info))
2543                 set_res_opts_defaults(&res_opts);
2544
2545         err = res_opts_from_attrs(&res_opts, info);
2546         if (err && err != -ENOMSG) {
2547                 retcode = ERR_MANDATORY_TAG;
2548                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2549                 goto fail;
2550         }
2551
2552         err = set_resource_options(adm_ctx.resource, &res_opts);
2553         if (err) {
2554                 retcode = ERR_INVALID_REQUEST;
2555                 if (err == -ENOMEM)
2556                         retcode = ERR_NOMEM;
2557         }
2558
2559 fail:
2560         drbd_adm_finish(&adm_ctx, info, retcode);
2561         return 0;
2562 }
2563
2564 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2565 {
2566         struct drbd_config_context adm_ctx;
2567         struct drbd_device *device;
2568         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2569
2570         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2571         if (!adm_ctx.reply_skb)
2572                 return retcode;
2573         if (retcode != NO_ERROR)
2574                 goto out;
2575
2576         device = adm_ctx.device;
2577
2578         /* If there is still bitmap IO pending, probably because of a previous
2579          * resync just being finished, wait for it before requesting a new resync.
2580          * Also wait for it's after_state_ch(). */
2581         drbd_suspend_io(device);
2582         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2583         drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2584
2585         /* If we happen to be C_STANDALONE R_SECONDARY, just change to
2586          * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
2587          * try to start a resync handshake as sync target for full sync.
2588          */
2589         if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2590                 retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2591                 if (retcode >= SS_SUCCESS) {
2592                         if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2593                                 "set_n_write from invalidate", BM_LOCKED_MASK))
2594                                 retcode = ERR_IO_MD_DISK;
2595                 }
2596         } else
2597                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2598         drbd_resume_io(device);
2599
2600 out:
2601         drbd_adm_finish(&adm_ctx, info, retcode);
2602         return 0;
2603 }
2604
2605 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2606                 union drbd_state mask, union drbd_state val)
2607 {
2608         struct drbd_config_context adm_ctx;
2609         enum drbd_ret_code retcode;
2610
2611         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2612         if (!adm_ctx.reply_skb)
2613                 return retcode;
2614         if (retcode != NO_ERROR)
2615                 goto out;
2616
2617         retcode = drbd_request_state(adm_ctx.device, mask, val);
2618 out:
2619         drbd_adm_finish(&adm_ctx, info, retcode);
2620         return 0;
2621 }
2622
2623 static int drbd_bmio_set_susp_al(struct drbd_device *device)
2624 {
2625         int rv;
2626
2627         rv = drbd_bmio_set_n_write(device);
2628         drbd_suspend_al(device);
2629         return rv;
2630 }
2631
2632 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2633 {
2634         struct drbd_config_context adm_ctx;
2635         int retcode; /* drbd_ret_code, drbd_state_rv */
2636         struct drbd_device *device;
2637
2638         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2639         if (!adm_ctx.reply_skb)
2640                 return retcode;
2641         if (retcode != NO_ERROR)
2642                 goto out;
2643
2644         device = adm_ctx.device;
2645
2646         /* If there is still bitmap IO pending, probably because of a previous
2647          * resync just being finished, wait for it before requesting a new resync.
2648          * Also wait for it's after_state_ch(). */
2649         drbd_suspend_io(device);
2650         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2651         drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2652
2653         /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
2654          * in the bitmap.  Otherwise, try to start a resync handshake
2655          * as sync source for full sync.
2656          */
2657         if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
2658                 /* The peer will get a resync upon connect anyways. Just make that
2659                    into a full resync. */
2660                 retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
2661                 if (retcode >= SS_SUCCESS) {
2662                         if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
2663                                 "set_n_write from invalidate_peer",
2664                                 BM_LOCKED_SET_ALLOWED))
2665                                 retcode = ERR_IO_MD_DISK;
2666                 }
2667         } else
2668                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2669         drbd_resume_io(device);
2670
2671 out:
2672         drbd_adm_finish(&adm_ctx, info, retcode);
2673         return 0;
2674 }
2675
2676 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2677 {
2678         struct drbd_config_context adm_ctx;
2679         enum drbd_ret_code retcode;
2680
2681         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2682         if (!adm_ctx.reply_skb)
2683                 return retcode;
2684         if (retcode != NO_ERROR)
2685                 goto out;
2686
2687         if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2688                 retcode = ERR_PAUSE_IS_SET;
2689 out:
2690         drbd_adm_finish(&adm_ctx, info, retcode);
2691         return 0;
2692 }
2693
2694 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2695 {
2696         struct drbd_config_context adm_ctx;
2697         union drbd_dev_state s;
2698         enum drbd_ret_code retcode;
2699
2700         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2701         if (!adm_ctx.reply_skb)
2702                 return retcode;
2703         if (retcode != NO_ERROR)
2704                 goto out;
2705
2706         if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2707                 s = adm_ctx.device->state;
2708                 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2709                         retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2710                                   s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2711                 } else {
2712                         retcode = ERR_PAUSE_IS_CLEAR;
2713                 }
2714         }
2715
2716 out:
2717         drbd_adm_finish(&adm_ctx, info, retcode);
2718         return 0;
2719 }
2720
2721 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2722 {
2723         return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2724 }
2725
2726 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2727 {
2728         struct drbd_config_context adm_ctx;
2729         struct drbd_device *device;
2730         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2731
2732         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2733         if (!adm_ctx.reply_skb)
2734                 return retcode;
2735         if (retcode != NO_ERROR)
2736                 goto out;
2737
2738         device = adm_ctx.device;
2739         if (test_bit(NEW_CUR_UUID, &device->flags)) {
2740                 drbd_uuid_new_current(device);
2741                 clear_bit(NEW_CUR_UUID, &device->flags);
2742         }
2743         drbd_suspend_io(device);
2744         retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2745         if (retcode == SS_SUCCESS) {
2746                 if (device->state.conn < C_CONNECTED)
2747                         tl_clear(first_peer_device(device)->connection);
2748                 if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
2749                         tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
2750         }
2751         drbd_resume_io(device);
2752
2753 out:
2754         drbd_adm_finish(&adm_ctx, info, retcode);
2755         return 0;
2756 }
2757
2758 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2759 {
2760         return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2761 }
2762
2763 static int nla_put_drbd_cfg_context(struct sk_buff *skb,
2764                                     struct drbd_resource *resource,
2765                                     struct drbd_connection *connection,
2766                                     struct drbd_device *device)
2767 {
2768         struct nlattr *nla;
2769         nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2770         if (!nla)
2771                 goto nla_put_failure;
2772         if (device &&
2773             nla_put_u32(skb, T_ctx_volume, device->vnr))
2774                 goto nla_put_failure;
2775         if (nla_put_string(skb, T_ctx_resource_name, resource->name))
2776                 goto nla_put_failure;
2777         if (connection) {
2778                 if (connection->my_addr_len &&
2779                     nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
2780                         goto nla_put_failure;
2781                 if (connection->peer_addr_len &&
2782                     nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
2783                         goto nla_put_failure;
2784         }
2785         nla_nest_end(skb, nla);
2786         return 0;
2787
2788 nla_put_failure:
2789         if (nla)
2790                 nla_nest_cancel(skb, nla);
2791         return -EMSGSIZE;
2792 }
2793
2794 /*
2795  * Return the connection of @resource if @resource has exactly one connection.
2796  */
2797 static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
2798 {
2799         struct list_head *connections = &resource->connections;
2800
2801         if (list_empty(connections) || connections->next->next != connections)
2802                 return NULL;
2803         return list_first_entry(&resource->connections, struct drbd_connection, connections);
2804 }
2805
2806 int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2807                 const struct sib_info *sib)
2808 {
2809         struct drbd_resource *resource = device->resource;
2810         struct state_info *si = NULL; /* for sizeof(si->member); */
2811         struct nlattr *nla;
2812         int got_ldev;
2813         int err = 0;
2814         int exclude_sensitive;
2815
2816         /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2817          * to.  So we better exclude_sensitive information.
2818          *
2819          * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2820          * in the context of the requesting user process. Exclude sensitive
2821          * information, unless current has superuser.
2822          *
2823          * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2824          * relies on the current implementation of netlink_dump(), which
2825          * executes the dump callback successively from netlink_recvmsg(),
2826          * always in the context of the receiving process */
2827         exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2828
2829         got_ldev = get_ldev(device);
2830
2831         /* We need to add connection name and volume number information still.
2832          * Minor number is in drbd_genlmsghdr. */
2833         if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
2834                 goto nla_put_failure;
2835
2836         if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
2837                 goto nla_put_failure;
2838
2839         rcu_read_lock();
2840         if (got_ldev) {
2841                 struct disk_conf *disk_conf;
2842
2843                 disk_conf = rcu_dereference(device->ldev->disk_conf);
2844                 err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
2845         }
2846         if (!err) {
2847                 struct net_conf *nc;
2848
2849                 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2850                 if (nc)
2851                         err = net_conf_to_skb(skb, nc, exclude_sensitive);
2852         }
2853         rcu_read_unlock();
2854         if (err)
2855                 goto nla_put_failure;
2856
2857         nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2858         if (!nla)
2859                 goto nla_put_failure;
2860         if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2861             nla_put_u32(skb, T_current_state, device->state.i) ||
2862             nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
2863             nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
2864             nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
2865             nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
2866             nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
2867             nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
2868             nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
2869             nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
2870             nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
2871             nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
2872             nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
2873                 goto nla_put_failure;
2874
2875         if (got_ldev) {
2876                 int err;
2877
2878                 spin_lock_irq(&device->ldev->md.uuid_lock);
2879                 err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
2880                 spin_unlock_irq(&device->ldev->md.uuid_lock);
2881
2882                 if (err)
2883                         goto nla_put_failure;
2884
2885                 if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
2886                     nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
2887                     nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
2888                         goto nla_put_failure;
2889                 if (C_SYNC_SOURCE <= device->state.conn &&
2890                     C_PAUSED_SYNC_T >= device->state.conn) {
2891                         if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
2892                             nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
2893                                 goto nla_put_failure;
2894                 }
2895         }
2896
2897         if (sib) {
2898                 switch(sib->sib_reason) {
2899                 case SIB_SYNC_PROGRESS:
2900                 case SIB_GET_STATUS_REPLY:
2901                         break;
2902                 case SIB_STATE_CHANGE:
2903                         if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
2904                             nla_put_u32(skb, T_new_state, sib->ns.i))
2905                                 goto nla_put_failure;
2906                         break;
2907                 case SIB_HELPER_POST:
2908                         if (nla_put_u32(skb, T_helper_exit_code,
2909                                         sib->helper_exit_code))
2910                                 goto nla_put_failure;
2911                         /* fall through */
2912                 case SIB_HELPER_PRE:
2913                         if (nla_put_string(skb, T_helper, sib->helper_name))
2914                                 goto nla_put_failure;
2915                         break;
2916                 }
2917         }
2918         nla_nest_end(skb, nla);
2919
2920         if (0)
2921 nla_put_failure:
2922                 err = -EMSGSIZE;
2923         if (got_ldev)
2924                 put_ldev(device);
2925         return err;
2926 }
2927
2928 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
2929 {
2930         struct drbd_config_context adm_ctx;
2931         enum drbd_ret_code retcode;
2932         int err;
2933
2934         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2935         if (!adm_ctx.reply_skb)
2936                 return retcode;
2937         if (retcode != NO_ERROR)
2938                 goto out;
2939
2940         err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
2941         if (err) {
2942                 nlmsg_free(adm_ctx.reply_skb);
2943                 return err;
2944         }
2945 out:
2946         drbd_adm_finish(&adm_ctx, info, retcode);
2947         return 0;
2948 }
2949
2950 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
2951 {
2952         struct drbd_device *device;
2953         struct drbd_genlmsghdr *dh;
2954         struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
2955         struct drbd_resource *resource = NULL;
2956         struct drbd_resource *tmp;
2957         unsigned volume = cb->args[1];
2958
2959         /* Open coded, deferred, iteration:
2960          * for_each_resource_safe(resource, tmp, &drbd_resources) {
2961          *      connection = "first connection of resource or undefined";
2962          *      idr_for_each_entry(&resource->devices, device, i) {
2963          *        ...
2964          *      }
2965          * }
2966          * where resource is cb->args[0];
2967          * and i is cb->args[1];
2968          *
2969          * cb->args[2] indicates if we shall loop over all resources,
2970          * or just dump all volumes of a single resource.
2971          *
2972          * This may miss entries inserted after this dump started,
2973          * or entries deleted before they are reached.
2974          *
2975          * We need to make sure the device won't disappear while
2976          * we are looking at it, and revalidate our iterators
2977          * on each iteration.
2978          */
2979
2980         /* synchronize with conn_create()/drbd_destroy_connection() */
2981         rcu_read_lock();
2982         /* revalidate iterator position */
2983         for_each_resource_rcu(tmp, &drbd_resources) {
2984                 if (pos == NULL) {
2985                         /* first iteration */
2986                         pos = tmp;
2987                         resource = pos;
2988                         break;
2989                 }
2990                 if (tmp == pos) {
2991                         resource = pos;
2992                         break;
2993                 }
2994         }
2995         if (resource) {
2996 next_resource:
2997                 device = idr_get_next(&resource->devices, &volume);
2998                 if (!device) {
2999                         /* No more volumes to dump on this resource.
3000                          * Advance resource iterator. */
3001                         pos = list_entry_rcu(resource->resources.next,
3002                                              struct drbd_resource, resources);
3003                         /* Did we dump any volume of this resource yet? */
3004                         if (volume != 0) {
3005                                 /* If we reached the end of the list,
3006                                  * or only a single resource dump was requested,
3007                                  * we are done. */
3008                                 if (&pos->resources == &drbd_resources || cb->args[2])
3009                                         goto out;
3010                                 volume = 0;
3011                                 resource = pos;
3012                                 goto next_resource;
3013                         }
3014                 }
3015
3016                 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3017                                 cb->nlh->nlmsg_seq, &drbd_genl_family,
3018                                 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3019                 if (!dh)
3020                         goto out;
3021
3022                 if (!device) {
3023                         /* This is a connection without a single volume.
3024                          * Suprisingly enough, it may have a network
3025                          * configuration. */
3026                         struct drbd_connection *connection;
3027
3028                         dh->minor = -1U;
3029                         dh->ret_code = NO_ERROR;
3030                         connection = the_only_connection(resource);
3031                         if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3032                                 goto cancel;
3033                         if (connection) {
3034                                 struct net_conf *nc;
3035
3036                                 nc = rcu_dereference(connection->net_conf);
3037                                 if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3038                                         goto cancel;
3039                         }
3040                         goto done;
3041                 }
3042
3043                 D_ASSERT(device, device->vnr == volume);
3044                 D_ASSERT(device, device->resource == resource);
3045
3046                 dh->minor = device_to_minor(device);
3047                 dh->ret_code = NO_ERROR;
3048
3049                 if (nla_put_status_info(skb, device, NULL)) {
3050 cancel:
3051                         genlmsg_cancel(skb, dh);
3052                         goto out;
3053                 }
3054 done:
3055                 genlmsg_end(skb, dh);
3056         }
3057
3058 out:
3059         rcu_read_unlock();
3060         /* where to start the next iteration */
3061         cb->args[0] = (long)pos;
3062         cb->args[1] = (pos == resource) ? volume + 1 : 0;
3063
3064         /* No more resources/volumes/minors found results in an empty skb.
3065          * Which will terminate the dump. */
3066         return skb->len;
3067 }
3068
3069 /*
3070  * Request status of all resources, or of all volumes within a single resource.
3071  *
3072  * This is a dump, as the answer may not fit in a single reply skb otherwise.
3073  * Which means we cannot use the family->attrbuf or other such members, because
3074  * dump is NOT protected by the genl_lock().  During dump, we only have access
3075  * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3076  *
3077  * Once things are setup properly, we call into get_one_status().
3078  */
3079 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3080 {
3081         const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3082         struct nlattr *nla;
3083         const char *resource_name;
3084         struct drbd_resource *resource;
3085         int maxtype;
3086
3087         /* Is this a followup call? */
3088         if (cb->args[0]) {
3089                 /* ... of a single resource dump,
3090                  * and the resource iterator has been advanced already? */
3091                 if (cb->args[2] && cb->args[2] != cb->args[0])
3092                         return 0; /* DONE. */
3093                 goto dump;
3094         }
3095
3096         /* First call (from netlink_dump_start).  We need to figure out
3097          * which resource(s) the user wants us to dump. */
3098         nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3099                         nlmsg_attrlen(cb->nlh, hdrlen),
3100                         DRBD_NLA_CFG_CONTEXT);
3101
3102         /* No explicit context given.  Dump all. */
3103         if (!nla)
3104                 goto dump;
3105         maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3106         nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
3107         if (IS_ERR(nla))
3108                 return PTR_ERR(nla);
3109         /* context given, but no name present? */
3110         if (!nla)
3111                 return -EINVAL;
3112         resource_name = nla_data(nla);
3113         if (!*resource_name)
3114                 return -ENODEV;
3115         resource = drbd_find_resource(resource_name);
3116         if (!resource)
3117                 return -ENODEV;
3118
3119         kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
3120
3121         /* prime iterators, and set "filter" mode mark:
3122          * only dump this connection. */
3123         cb->args[0] = (long)resource;
3124         /* cb->args[1] = 0; passed in this way. */
3125         cb->args[2] = (long)resource;
3126
3127 dump:
3128         return get_one_status(skb, cb);
3129 }
3130
3131 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
3132 {
3133         struct drbd_config_context adm_ctx;
3134         enum drbd_ret_code retcode;
3135         struct timeout_parms tp;
3136         int err;
3137
3138         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3139         if (!adm_ctx.reply_skb)
3140                 return retcode;
3141         if (retcode != NO_ERROR)
3142                 goto out;
3143
3144         tp.timeout_type =
3145                 adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
3146                 test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
3147                 UT_DEFAULT;
3148
3149         err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
3150         if (err) {
3151                 nlmsg_free(adm_ctx.reply_skb);
3152                 return err;
3153         }
3154 out:
3155         drbd_adm_finish(&adm_ctx, info, retcode);
3156         return 0;
3157 }
3158
3159 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
3160 {
3161         struct drbd_config_context adm_ctx;
3162         struct drbd_device *device;
3163         enum drbd_ret_code retcode;
3164         struct start_ov_parms parms;
3165
3166         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3167         if (!adm_ctx.reply_skb)
3168                 return retcode;
3169         if (retcode != NO_ERROR)
3170                 goto out;
3171
3172         device = adm_ctx.device;
3173
3174         /* resume from last known position, if possible */
3175         parms.ov_start_sector = device->ov_start_sector;
3176         parms.ov_stop_sector = ULLONG_MAX;
3177         if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
3178                 int err = start_ov_parms_from_attrs(&parms, info);
3179                 if (err) {
3180                         retcode = ERR_MANDATORY_TAG;
3181                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3182                         goto out;
3183                 }
3184         }
3185         /* w_make_ov_request expects position to be aligned */
3186         device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
3187         device->ov_stop_sector = parms.ov_stop_sector;
3188
3189         /* If there is still bitmap IO pending, e.g. previous resync or verify
3190          * just being finished, wait for it before requesting a new resync. */
3191         drbd_suspend_io(device);
3192         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3193         retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
3194         drbd_resume_io(device);
3195 out:
3196         drbd_adm_finish(&adm_ctx, info, retcode);
3197         return 0;
3198 }
3199
3200
3201 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
3202 {
3203         struct drbd_config_context adm_ctx;
3204         struct drbd_device *device;
3205         enum drbd_ret_code retcode;
3206         int skip_initial_sync = 0;
3207         int err;
3208         struct new_c_uuid_parms args;
3209
3210         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3211         if (!adm_ctx.reply_skb)
3212                 return retcode;
3213         if (retcode != NO_ERROR)
3214                 goto out_nolock;
3215
3216         device = adm_ctx.device;
3217         memset(&args, 0, sizeof(args));
3218         if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
3219                 err = new_c_uuid_parms_from_attrs(&args, info);
3220                 if (err) {
3221                         retcode = ERR_MANDATORY_TAG;
3222                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3223                         goto out_nolock;
3224                 }
3225         }
3226
3227         mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
3228
3229         if (!get_ldev(device)) {
3230                 retcode = ERR_NO_DISK;
3231                 goto out;
3232         }
3233
3234         /* this is "skip initial sync", assume to be clean */
3235         if (device->state.conn == C_CONNECTED &&
3236             first_peer_device(device)->connection->agreed_pro_version >= 90 &&
3237             device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
3238                 drbd_info(device, "Preparing to skip initial sync\n");
3239                 skip_initial_sync = 1;
3240         } else if (device->state.conn != C_STANDALONE) {
3241                 retcode = ERR_CONNECTED;
3242                 goto out_dec;
3243         }
3244
3245         drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
3246         drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
3247
3248         if (args.clear_bm) {
3249                 err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
3250                         "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
3251                 if (err) {
3252                         drbd_err(device, "Writing bitmap failed with %d\n", err);
3253                         retcode = ERR_IO_MD_DISK;
3254                 }
3255                 if (skip_initial_sync) {
3256                         drbd_send_uuids_skip_initial_sync(first_peer_device(device));
3257                         _drbd_uuid_set(device, UI_BITMAP, 0);
3258                         drbd_print_uuids(device, "cleared bitmap UUID");
3259                         spin_lock_irq(&device->resource->req_lock);
3260                         _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3261                                         CS_VERBOSE, NULL);
3262                         spin_unlock_irq(&device->resource->req_lock);
3263                 }
3264         }
3265
3266         drbd_md_sync(device);
3267 out_dec:
3268         put_ldev(device);
3269 out:
3270         mutex_unlock(device->state_mutex);
3271 out_nolock:
3272         drbd_adm_finish(&adm_ctx, info, retcode);
3273         return 0;
3274 }
3275
3276 static enum drbd_ret_code
3277 drbd_check_resource_name(struct drbd_config_context *adm_ctx)
3278 {
3279         const char *name = adm_ctx->resource_name;
3280         if (!name || !name[0]) {
3281                 drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
3282                 return ERR_MANDATORY_TAG;
3283         }
3284         /* if we want to use these in sysfs/configfs/debugfs some day,
3285          * we must not allow slashes */
3286         if (strchr(name, '/')) {
3287                 drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
3288                 return ERR_INVALID_REQUEST;
3289         }
3290         return NO_ERROR;
3291 }
3292
3293 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
3294 {
3295         struct drbd_config_context adm_ctx;
3296         enum drbd_ret_code retcode;
3297         struct res_opts res_opts;
3298         int err;
3299
3300         retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
3301         if (!adm_ctx.reply_skb)
3302                 return retcode;
3303         if (retcode != NO_ERROR)
3304                 goto out;
3305
3306         set_res_opts_defaults(&res_opts);
3307         err = res_opts_from_attrs(&res_opts, info);
3308         if (err && err != -ENOMSG) {
3309                 retcode = ERR_MANDATORY_TAG;
3310                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3311                 goto out;
3312         }
3313
3314         retcode = drbd_check_resource_name(&adm_ctx);
3315         if (retcode != NO_ERROR)
3316                 goto out;
3317
3318         if (adm_ctx.resource) {
3319                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
3320                         retcode = ERR_INVALID_REQUEST;
3321                         drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
3322                 }
3323                 /* else: still NO_ERROR */
3324                 goto out;
3325         }
3326
3327         if (!conn_create(adm_ctx.resource_name, &res_opts))
3328                 retcode = ERR_NOMEM;
3329 out:
3330         drbd_adm_finish(&adm_ctx, info, retcode);
3331         return 0;
3332 }
3333
3334 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
3335 {
3336         struct drbd_config_context adm_ctx;
3337         struct drbd_genlmsghdr *dh = info->userhdr;
3338         enum drbd_ret_code retcode;
3339
3340         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3341         if (!adm_ctx.reply_skb)
3342                 return retcode;
3343         if (retcode != NO_ERROR)
3344                 goto out;
3345
3346         if (dh->minor > MINORMASK) {
3347                 drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
3348                 retcode = ERR_INVALID_REQUEST;
3349                 goto out;
3350         }
3351         if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3352                 drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
3353                 retcode = ERR_INVALID_REQUEST;
3354                 goto out;
3355         }
3356
3357         /* drbd_adm_prepare made sure already
3358          * that first_peer_device(device)->connection and device->vnr match the request. */
3359         if (adm_ctx.device) {
3360                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
3361                         retcode = ERR_MINOR_EXISTS;
3362                 /* else: still NO_ERROR */
3363                 goto out;
3364         }
3365
3366         retcode = drbd_create_device(&adm_ctx, dh->minor);
3367 out:
3368         drbd_adm_finish(&adm_ctx, info, retcode);
3369         return 0;
3370 }
3371
3372 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
3373 {
3374         if (device->state.disk == D_DISKLESS &&
3375             /* no need to be device->state.conn == C_STANDALONE &&
3376              * we may want to delete a minor from a live replication group.
3377              */
3378             device->state.role == R_SECONDARY) {
3379                 _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
3380                                     CS_VERBOSE + CS_WAIT_COMPLETE);
3381                 drbd_delete_device(device);
3382                 return NO_ERROR;
3383         } else
3384                 return ERR_MINOR_CONFIGURED;
3385 }
3386
3387 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
3388 {
3389         struct drbd_config_context adm_ctx;
3390         enum drbd_ret_code retcode;
3391
3392         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3393         if (!adm_ctx.reply_skb)
3394                 return retcode;
3395         if (retcode != NO_ERROR)
3396                 goto out;
3397
3398         retcode = adm_del_minor(adm_ctx.device);
3399 out:
3400         drbd_adm_finish(&adm_ctx, info, retcode);
3401         return 0;
3402 }
3403
3404 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3405 {
3406         struct drbd_config_context adm_ctx;
3407         struct drbd_resource *resource;
3408         struct drbd_connection *connection;
3409         struct drbd_device *device;
3410         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3411         unsigned i;
3412
3413         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3414         if (!adm_ctx.reply_skb)
3415                 return retcode;
3416         if (retcode != NO_ERROR)
3417                 goto out;
3418
3419         resource = adm_ctx.resource;
3420         /* demote */
3421         for_each_connection(connection, resource) {
3422                 struct drbd_peer_device *peer_device;
3423
3424                 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
3425                         retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
3426                         if (retcode < SS_SUCCESS) {
3427                                 drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
3428                                 goto out;
3429                         }
3430                 }
3431
3432                 retcode = conn_try_disconnect(connection, 0);
3433                 if (retcode < SS_SUCCESS) {
3434                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
3435                         goto out;
3436                 }
3437         }
3438
3439         /* detach */
3440         idr_for_each_entry(&resource->devices, device, i) {
3441                 retcode = adm_detach(device, 0);
3442                 if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
3443                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
3444                         goto out;
3445                 }
3446         }
3447
3448         /* If we reach this, all volumes (of this connection) are Secondary,
3449          * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3450          * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3451         for_each_connection(connection, resource)
3452                 drbd_thread_stop(&connection->worker);
3453
3454         /* Now, nothing can fail anymore */
3455
3456         /* delete volumes */
3457         idr_for_each_entry(&resource->devices, device, i) {
3458                 retcode = adm_del_minor(device);
3459                 if (retcode != NO_ERROR) {
3460                         /* "can not happen" */
3461                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
3462                         goto out;
3463                 }
3464         }
3465
3466         list_del_rcu(&resource->resources);
3467         synchronize_rcu();
3468         drbd_free_resource(resource);
3469         retcode = NO_ERROR;
3470
3471 out:
3472         drbd_adm_finish(&adm_ctx, info, retcode);
3473         return 0;
3474 }
3475
3476 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
3477 {
3478         struct drbd_config_context adm_ctx;
3479         struct drbd_resource *resource;
3480         struct drbd_connection *connection;
3481         enum drbd_ret_code retcode;
3482
3483         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3484         if (!adm_ctx.reply_skb)
3485                 return retcode;
3486         if (retcode != NO_ERROR)
3487                 goto out;
3488
3489         resource = adm_ctx.resource;
3490         for_each_connection(connection, resource) {
3491                 if (connection->cstate > C_STANDALONE) {
3492                         retcode = ERR_NET_CONFIGURED;
3493                         goto out;
3494                 }
3495         }
3496         if (!idr_is_empty(&resource->devices)) {
3497                 retcode = ERR_RES_IN_USE;
3498                 goto out;
3499         }
3500
3501         list_del_rcu(&resource->resources);
3502         for_each_connection(connection, resource)
3503                 drbd_thread_stop(&connection->worker);
3504         synchronize_rcu();
3505         drbd_free_resource(resource);
3506         retcode = NO_ERROR;
3507 out:
3508         drbd_adm_finish(&adm_ctx, info, retcode);
3509         return 0;
3510 }
3511
3512 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3513 {
3514         static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3515         struct sk_buff *msg;
3516         struct drbd_genlmsghdr *d_out;
3517         unsigned seq;
3518         int err = -ENOMEM;
3519
3520         if (sib->sib_reason == SIB_SYNC_PROGRESS) {
3521                 if (time_after(jiffies, device->rs_last_bcast + HZ))
3522                         device->rs_last_bcast = jiffies;
3523                 else
3524                         return;
3525         }
3526
3527         seq = atomic_inc_return(&drbd_genl_seq);
3528         msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3529         if (!msg)
3530                 goto failed;
3531
3532         err = -EMSGSIZE;
3533         d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3534         if (!d_out) /* cannot happen, but anyways. */
3535                 goto nla_put_failure;
3536         d_out->minor = device_to_minor(device);
3537         d_out->ret_code = NO_ERROR;
3538
3539         if (nla_put_status_info(msg, device, sib))
3540                 goto nla_put_failure;
3541         genlmsg_end(msg, d_out);
3542         err = drbd_genl_multicast_events(msg, 0);
3543         /* msg has been consumed or freed in netlink_broadcast() */
3544         if (err && err != -ESRCH)
3545                 goto failed;
3546
3547         return;
3548
3549 nla_put_failure:
3550         nlmsg_free(msg);
3551 failed:
3552         drbd_err(device, "Error %d while broadcasting event. "
3553                         "Event seq:%u sib_reason:%u\n",
3554                         err, seq, sib->sib_reason);
3555 }