1 /******************************************************************************
2 *******************************************************************************
4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5 ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
7 ** This copyrighted material is made available to anyone wishing to use,
8 ** modify, copy, or redistribute it subject to the terms and conditions
9 ** of the GNU General Public License v.2.
11 *******************************************************************************
12 ******************************************************************************/
14 #include "dlm_internal.h"
15 #include "lockspace.h"
24 #include "requestqueue.h"
29 static struct mutex ls_lock;
30 static struct list_head lslist;
31 static spinlock_t lslist_lock;
32 static struct task_struct * scand_task;
35 static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
38 int n = simple_strtol(buf, NULL, 0);
40 ls = dlm_find_lockspace_local(ls->ls_local_handle);
54 dlm_put_lockspace(ls);
58 static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
60 ls->ls_uevent_result = simple_strtol(buf, NULL, 0);
61 set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
62 wake_up(&ls->ls_uevent_wait);
66 static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
68 return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id);
71 static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
73 ls->ls_global_id = simple_strtoul(buf, NULL, 0);
77 static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf)
79 return snprintf(buf, PAGE_SIZE, "%u\n", dlm_no_directory(ls));
82 static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len)
84 int val = simple_strtoul(buf, NULL, 0);
86 set_bit(LSFL_NODIR, &ls->ls_flags);
90 static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf)
92 uint32_t status = dlm_recover_status(ls);
93 return snprintf(buf, PAGE_SIZE, "%x\n", status);
96 static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf)
98 return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid);
102 struct attribute attr;
103 ssize_t (*show)(struct dlm_ls *, char *);
104 ssize_t (*store)(struct dlm_ls *, const char *, size_t);
107 static struct dlm_attr dlm_attr_control = {
108 .attr = {.name = "control", .mode = S_IWUSR},
109 .store = dlm_control_store
112 static struct dlm_attr dlm_attr_event = {
113 .attr = {.name = "event_done", .mode = S_IWUSR},
114 .store = dlm_event_store
117 static struct dlm_attr dlm_attr_id = {
118 .attr = {.name = "id", .mode = S_IRUGO | S_IWUSR},
120 .store = dlm_id_store
123 static struct dlm_attr dlm_attr_nodir = {
124 .attr = {.name = "nodir", .mode = S_IRUGO | S_IWUSR},
125 .show = dlm_nodir_show,
126 .store = dlm_nodir_store
129 static struct dlm_attr dlm_attr_recover_status = {
130 .attr = {.name = "recover_status", .mode = S_IRUGO},
131 .show = dlm_recover_status_show
134 static struct dlm_attr dlm_attr_recover_nodeid = {
135 .attr = {.name = "recover_nodeid", .mode = S_IRUGO},
136 .show = dlm_recover_nodeid_show
139 static struct attribute *dlm_attrs[] = {
140 &dlm_attr_control.attr,
141 &dlm_attr_event.attr,
143 &dlm_attr_nodir.attr,
144 &dlm_attr_recover_status.attr,
145 &dlm_attr_recover_nodeid.attr,
149 static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
152 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
153 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
154 return a->show ? a->show(ls, buf) : 0;
157 static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
158 const char *buf, size_t len)
160 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
161 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
162 return a->store ? a->store(ls, buf, len) : len;
165 static void lockspace_kobj_release(struct kobject *k)
167 struct dlm_ls *ls = container_of(k, struct dlm_ls, ls_kobj);
171 static const struct sysfs_ops dlm_attr_ops = {
172 .show = dlm_attr_show,
173 .store = dlm_attr_store,
176 static struct kobj_type dlm_ktype = {
177 .default_attrs = dlm_attrs,
178 .sysfs_ops = &dlm_attr_ops,
179 .release = lockspace_kobj_release,
182 static struct kset *dlm_kset;
184 static int do_uevent(struct dlm_ls *ls, int in)
189 kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
191 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
193 log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
195 /* dlm_controld will see the uevent, do the necessary group management
196 and then write to sysfs to wake us */
198 error = wait_event_interruptible(ls->ls_uevent_wait,
199 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
201 log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
206 error = ls->ls_uevent_result;
209 log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
210 error, ls->ls_uevent_result);
214 static int dlm_uevent(struct kset *kset, struct kobject *kobj,
215 struct kobj_uevent_env *env)
217 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
219 add_uevent_var(env, "LOCKSPACE=%s", ls->ls_name);
223 static struct kset_uevent_ops dlm_uevent_ops = {
224 .uevent = dlm_uevent,
227 int __init dlm_lockspace_init(void)
230 mutex_init(&ls_lock);
231 INIT_LIST_HEAD(&lslist);
232 spin_lock_init(&lslist_lock);
234 dlm_kset = kset_create_and_add("dlm", &dlm_uevent_ops, kernel_kobj);
236 printk(KERN_WARNING "%s: can not create kset\n", __func__);
242 void dlm_lockspace_exit(void)
244 kset_unregister(dlm_kset);
247 static struct dlm_ls *find_ls_to_scan(void)
251 spin_lock(&lslist_lock);
252 list_for_each_entry(ls, &lslist, ls_list) {
253 if (time_after_eq(jiffies, ls->ls_scan_time +
254 dlm_config.ci_scan_secs * HZ)) {
255 spin_unlock(&lslist_lock);
259 spin_unlock(&lslist_lock);
263 static int dlm_scand(void *data)
267 while (!kthread_should_stop()) {
268 ls = find_ls_to_scan();
270 if (dlm_lock_recovery_try(ls)) {
271 ls->ls_scan_time = jiffies;
273 dlm_scan_timeout(ls);
274 dlm_scan_waiters(ls);
275 dlm_unlock_recovery(ls);
277 ls->ls_scan_time += HZ;
281 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
286 static int dlm_scand_start(void)
288 struct task_struct *p;
291 p = kthread_run(dlm_scand, NULL, "dlm_scand");
299 static void dlm_scand_stop(void)
301 kthread_stop(scand_task);
304 struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
308 spin_lock(&lslist_lock);
310 list_for_each_entry(ls, &lslist, ls_list) {
311 if (ls->ls_global_id == id) {
318 spin_unlock(&lslist_lock);
322 struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
326 spin_lock(&lslist_lock);
327 list_for_each_entry(ls, &lslist, ls_list) {
328 if (ls->ls_local_handle == lockspace) {
335 spin_unlock(&lslist_lock);
339 struct dlm_ls *dlm_find_lockspace_device(int minor)
343 spin_lock(&lslist_lock);
344 list_for_each_entry(ls, &lslist, ls_list) {
345 if (ls->ls_device.minor == minor) {
352 spin_unlock(&lslist_lock);
356 void dlm_put_lockspace(struct dlm_ls *ls)
358 spin_lock(&lslist_lock);
360 spin_unlock(&lslist_lock);
363 static void remove_lockspace(struct dlm_ls *ls)
366 spin_lock(&lslist_lock);
367 if (ls->ls_count == 0) {
368 WARN_ON(ls->ls_create_count != 0);
369 list_del(&ls->ls_list);
370 spin_unlock(&lslist_lock);
373 spin_unlock(&lslist_lock);
378 static int threads_start(void)
382 error = dlm_scand_start();
384 log_print("cannot start dlm_scand thread %d", error);
388 /* Thread for sending/receiving messages for all lockspace's */
389 error = dlm_lowcomms_start();
391 log_print("cannot start dlm lowcomms %d", error);
403 static void threads_stop(void)
409 static int new_lockspace(const char *name, const char *cluster,
410 uint32_t flags, int lvblen,
411 const struct dlm_lockspace_ops *ops, void *ops_arg,
412 int *ops_result, dlm_lockspace_t **lockspace)
417 int namelen = strlen(name);
419 if (namelen > DLM_LOCKSPACE_LEN)
422 if (!lvblen || (lvblen % 8))
425 if (!try_module_get(THIS_MODULE))
428 if (!dlm_user_daemon_available()) {
429 log_print("dlm user daemon not available");
434 if (ops && ops_result) {
435 if (!dlm_config.ci_recover_callbacks)
436 *ops_result = -EOPNOTSUPP;
441 if (dlm_config.ci_recover_callbacks && cluster &&
442 strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) {
443 log_print("dlm cluster name %s mismatch %s",
444 dlm_config.ci_cluster_name, cluster);
451 spin_lock(&lslist_lock);
452 list_for_each_entry(ls, &lslist, ls_list) {
453 WARN_ON(ls->ls_create_count <= 0);
454 if (ls->ls_namelen != namelen)
456 if (memcmp(ls->ls_name, name, namelen))
458 if (flags & DLM_LSFL_NEWEXCL) {
462 ls->ls_create_count++;
467 spin_unlock(&lslist_lock);
474 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
477 memcpy(ls->ls_name, name, namelen);
478 ls->ls_namelen = namelen;
479 ls->ls_lvblen = lvblen;
482 ls->ls_scan_time = jiffies;
484 if (ops && dlm_config.ci_recover_callbacks) {
486 ls->ls_ops_arg = ops_arg;
489 if (flags & DLM_LSFL_TIMEWARN)
490 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
492 /* ls_exflags are forced to match among nodes, and we don't
493 need to require all nodes to have some flags set */
494 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
497 size = dlm_config.ci_rsbtbl_size;
498 ls->ls_rsbtbl_size = size;
500 ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size);
503 for (i = 0; i < size; i++) {
504 ls->ls_rsbtbl[i].keep.rb_node = NULL;
505 ls->ls_rsbtbl[i].toss.rb_node = NULL;
506 spin_lock_init(&ls->ls_rsbtbl[i].lock);
509 idr_init(&ls->ls_lkbidr);
510 spin_lock_init(&ls->ls_lkbidr_spin);
512 INIT_LIST_HEAD(&ls->ls_waiters);
513 mutex_init(&ls->ls_waiters_mutex);
514 INIT_LIST_HEAD(&ls->ls_orphans);
515 mutex_init(&ls->ls_orphans_mutex);
516 INIT_LIST_HEAD(&ls->ls_timeout);
517 mutex_init(&ls->ls_timeout_mutex);
519 INIT_LIST_HEAD(&ls->ls_new_rsb);
520 spin_lock_init(&ls->ls_new_rsb_spin);
522 INIT_LIST_HEAD(&ls->ls_nodes);
523 INIT_LIST_HEAD(&ls->ls_nodes_gone);
524 ls->ls_num_nodes = 0;
525 ls->ls_low_nodeid = 0;
526 ls->ls_total_weight = 0;
527 ls->ls_node_array = NULL;
529 memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb));
530 ls->ls_stub_rsb.res_ls = ls;
532 ls->ls_debug_rsb_dentry = NULL;
533 ls->ls_debug_waiters_dentry = NULL;
535 init_waitqueue_head(&ls->ls_uevent_wait);
536 ls->ls_uevent_result = 0;
537 init_completion(&ls->ls_members_done);
538 ls->ls_members_result = -1;
540 mutex_init(&ls->ls_cb_mutex);
541 INIT_LIST_HEAD(&ls->ls_cb_delay);
543 ls->ls_recoverd_task = NULL;
544 mutex_init(&ls->ls_recoverd_active);
545 spin_lock_init(&ls->ls_recover_lock);
546 spin_lock_init(&ls->ls_rcom_spin);
547 get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
548 ls->ls_recover_status = 0;
549 ls->ls_recover_seq = 0;
550 ls->ls_recover_args = NULL;
551 init_rwsem(&ls->ls_in_recovery);
552 init_rwsem(&ls->ls_recv_active);
553 INIT_LIST_HEAD(&ls->ls_requestqueue);
554 mutex_init(&ls->ls_requestqueue_mutex);
555 mutex_init(&ls->ls_clear_proc_locks);
557 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
558 if (!ls->ls_recover_buf)
562 ls->ls_num_slots = 0;
563 ls->ls_slots_size = 0;
566 INIT_LIST_HEAD(&ls->ls_recover_list);
567 spin_lock_init(&ls->ls_recover_list_lock);
568 idr_init(&ls->ls_recover_idr);
569 spin_lock_init(&ls->ls_recover_idr_lock);
570 ls->ls_recover_list_count = 0;
571 ls->ls_local_handle = ls;
572 init_waitqueue_head(&ls->ls_wait_general);
573 INIT_LIST_HEAD(&ls->ls_root_list);
574 init_rwsem(&ls->ls_root_sem);
576 down_write(&ls->ls_in_recovery);
578 spin_lock(&lslist_lock);
579 ls->ls_create_count = 1;
580 list_add(&ls->ls_list, &lslist);
581 spin_unlock(&lslist_lock);
583 if (flags & DLM_LSFL_FS) {
584 error = dlm_callback_start(ls);
586 log_error(ls, "can't start dlm_callback %d", error);
591 /* needs to find ls in lslist */
592 error = dlm_recoverd_start(ls);
594 log_error(ls, "can't start dlm_recoverd %d", error);
598 ls->ls_kobj.kset = dlm_kset;
599 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
603 kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
605 /* let kobject handle freeing of ls if there's an error */
608 /* This uevent triggers dlm_controld in userspace to add us to the
609 group of nodes that are members of this lockspace (managed by the
610 cluster infrastructure.) Once it's done that, it tells us who the
611 current lockspace members are (via configfs) and then tells the
612 lockspace to start running (via sysfs) in dlm_ls_start(). */
614 error = do_uevent(ls, 1);
618 wait_for_completion(&ls->ls_members_done);
619 error = ls->ls_members_result;
623 dlm_create_debug_file(ls);
625 log_debug(ls, "join complete");
631 dlm_clear_members(ls);
632 kfree(ls->ls_node_array);
634 dlm_recoverd_stop(ls);
636 dlm_callback_stop(ls);
638 spin_lock(&lslist_lock);
639 list_del(&ls->ls_list);
640 spin_unlock(&lslist_lock);
641 idr_destroy(&ls->ls_recover_idr);
642 kfree(ls->ls_recover_buf);
644 idr_destroy(&ls->ls_lkbidr);
645 vfree(ls->ls_rsbtbl);
648 kobject_put(&ls->ls_kobj);
652 module_put(THIS_MODULE);
656 int dlm_new_lockspace(const char *name, const char *cluster,
657 uint32_t flags, int lvblen,
658 const struct dlm_lockspace_ops *ops, void *ops_arg,
659 int *ops_result, dlm_lockspace_t **lockspace)
663 mutex_lock(&ls_lock);
665 error = threads_start();
669 error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
670 ops_result, lockspace);
678 mutex_unlock(&ls_lock);
682 static int lkb_idr_is_local(int id, void *p, void *data)
684 struct dlm_lkb *lkb = p;
686 if (!lkb->lkb_nodeid)
691 static int lkb_idr_is_any(int id, void *p, void *data)
696 static int lkb_idr_free(int id, void *p, void *data)
698 struct dlm_lkb *lkb = p;
700 if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
701 dlm_free_lvb(lkb->lkb_lvbptr);
707 /* NOTE: We check the lkbidr here rather than the resource table.
708 This is because there may be LKBs queued as ASTs that have been unlinked
709 from their RSBs and are pending deletion once the AST has been delivered */
711 static int lockspace_busy(struct dlm_ls *ls, int force)
715 spin_lock(&ls->ls_lkbidr_spin);
717 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
718 } else if (force == 1) {
719 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
723 spin_unlock(&ls->ls_lkbidr_spin);
727 static int release_lockspace(struct dlm_ls *ls, int force)
733 busy = lockspace_busy(ls, force);
735 spin_lock(&lslist_lock);
736 if (ls->ls_create_count == 1) {
740 /* remove_lockspace takes ls off lslist */
741 ls->ls_create_count = 0;
744 } else if (ls->ls_create_count > 1) {
745 rv = --ls->ls_create_count;
749 spin_unlock(&lslist_lock);
752 log_debug(ls, "release_lockspace no remove %d", rv);
756 dlm_device_deregister(ls);
758 if (force < 3 && dlm_user_daemon_available())
761 dlm_recoverd_stop(ls);
763 dlm_callback_stop(ls);
765 remove_lockspace(ls);
767 dlm_delete_debug_file(ls);
769 kfree(ls->ls_recover_buf);
772 * Free all lkb's in idr
775 idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
776 idr_remove_all(&ls->ls_lkbidr);
777 idr_destroy(&ls->ls_lkbidr);
780 * Free all rsb's on rsbtbl[] lists
783 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
784 while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) {
785 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
786 rb_erase(n, &ls->ls_rsbtbl[i].keep);
790 while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) {
791 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
792 rb_erase(n, &ls->ls_rsbtbl[i].toss);
797 vfree(ls->ls_rsbtbl);
799 while (!list_empty(&ls->ls_new_rsb)) {
800 rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
802 list_del(&rsb->res_hashchain);
807 * Free structures on any other lists
810 dlm_purge_requestqueue(ls);
811 kfree(ls->ls_recover_args);
812 dlm_clear_members(ls);
813 dlm_clear_members_gone(ls);
814 kfree(ls->ls_node_array);
815 log_debug(ls, "release_lockspace final free");
816 kobject_put(&ls->ls_kobj);
817 /* The ls structure will be freed when the kobject is done with */
819 module_put(THIS_MODULE);
824 * Called when a system has released all its locks and is not going to use the
825 * lockspace any longer. We free everything we're managing for this lockspace.
826 * Remaining nodes will go through the recovery process as if we'd died. The
827 * lockspace must continue to function as usual, participating in recoveries,
828 * until this returns.
830 * Force has 4 possible values:
831 * 0 - don't destroy locksapce if it has any LKBs
832 * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
833 * 2 - destroy lockspace regardless of LKBs
834 * 3 - destroy lockspace as part of a forced shutdown
837 int dlm_release_lockspace(void *lockspace, int force)
842 ls = dlm_find_lockspace_local(lockspace);
845 dlm_put_lockspace(ls);
847 mutex_lock(&ls_lock);
848 error = release_lockspace(ls, force);
853 mutex_unlock(&ls_lock);
858 void dlm_stop_lockspaces(void)
863 spin_lock(&lslist_lock);
864 list_for_each_entry(ls, &lslist, ls_list) {
865 if (!test_bit(LSFL_RUNNING, &ls->ls_flags))
867 spin_unlock(&lslist_lock);
868 log_error(ls, "no userland control daemon, stopping lockspace");
872 spin_unlock(&lslist_lock);