From: Amir Shehata Date: Thu, 21 Nov 2013 14:24:50 +0000 (+0800) Subject: staging/lustre/lnet: Fix assert on empty group in selftest module X-Git-Tag: firefly_0821_release~176^2~4489^2~783^2 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=4f317748f2660cb62bcf43aac368aa42dbeb2b78;p=firefly-linux-kernel-4.4.55.git staging/lustre/lnet: Fix assert on empty group in selftest module The core of the issue is that the selftest module doesn't sanitize its own API, but it depends on lst utility to do such checks. As a result this issue manifests itself in this particular LU through an assert on an empty group. If the NID is misspelled then an empty group is added. An error output is provided, but if that's never checked in a batch script, as is the case with this issue, then the script will try to add an empty group to a test to run in a batch, and that will cause an assert The fix is two fold. Ensure that lst utility checks that a group is added with at least one node. If not the group is subsequently deleted. And the add_test command would fail, since the group no longer exists. The second fix is to ensure that the kernel module itself sanitizes its own API in this particular case, so that if a different utility is used other than lst to communicate with the selftest kernel module then this error would be caught. This fix looks up the batch and the groups, src and dst, in the ioctl handle and sanitizes that input at this point. If the group looked up either doesn't exist or doesn't have at least one ACTIVE node, then the command fails. NOTE:there are many other cases in the code where the selftest kernel module doesn't check for sanity of the input, but depends totally on the lst module to do such checks. Particularly around length of strings passed in. Thus it is possible to crash the selftest module if someone tries to create another userspace app to communicate with the selftest kernel module without ensuring sanity of the params sent to the kernel module. In effect, it's always assumed that lst is the front end for selftest and no other front end is to be used. Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3093 Lustre-change: http://review.whamcloud.com/6092 Signed-off-by: Amir Shehata Reviewed-by: Isaac Huang Reviewed-by: Liang Zhen Reviewed-by: Oleg Drokin [coding style fix of the original patch is splitted into a new one -- PengTao] Signed-off-by: Peng Tao Signed-off-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c index 067a8b535f7c..9556bc0412d6 100644 --- a/drivers/staging/lustre/lnet/selftest/console.c +++ b/drivers/staging/lustre/lnet/selftest/console.c @@ -1237,41 +1237,77 @@ again: goto again; } -int -lstcon_test_add(char *name, int type, int loop, int concur, - int dist, int span, char *src_name, char * dst_name, - void *param, int paramlen, int *retp, - struct list_head *result_up) +static int +lstcon_verify_batch(const char *name, lstcon_batch_t **batch) { - lstcon_group_t *src_grp = NULL; - lstcon_group_t *dst_grp = NULL; - lstcon_test_t *test = NULL; - lstcon_batch_t *batch; - int rc; + int rc; - rc = lstcon_batch_find(name, &batch); + rc = lstcon_batch_find(name, batch); if (rc != 0) { CDEBUG(D_NET, "Can't find batch %s\n", name); return rc; } - if (batch->bat_state != LST_BATCH_IDLE) { + if ((*batch)->bat_state != LST_BATCH_IDLE) { CDEBUG(D_NET, "Can't change running batch %s\n", name); - return rc; + return -EINVAL; } - rc = lstcon_group_find(src_name, &src_grp); + return 0; +} + +static int +lstcon_verify_group(const char *name, lstcon_group_t **grp) +{ + int rc; + lstcon_ndlink_t *ndl; + + rc = lstcon_group_find(name, grp); if (rc != 0) { - CDEBUG(D_NET, "Can't find group %s\n", src_name); - goto out; + CDEBUG(D_NET, "can't find group %s\n", name); + return rc; } - rc = lstcon_group_find(dst_name, &dst_grp); - if (rc != 0) { - CDEBUG(D_NET, "Can't find group %s\n", dst_name); - goto out; + list_for_each_entry(ndl, &(*grp)->grp_ndl_list, ndl_link) { + if (ndl->ndl_node->nd_state == LST_NODE_ACTIVE) + return 0; } + CDEBUG(D_NET, "Group %s has no ACTIVE nodes\n", name); + + return -EINVAL; +} + +int +lstcon_test_add(char *batch_name, int type, int loop, + int concur, int dist, int span, + char *src_name, char *dst_name, + void *param, int paramlen, int *retp, + struct list_head *result_up) +{ + lstcon_test_t *test = NULL; + int rc; + lstcon_group_t *src_grp = NULL; + lstcon_group_t *dst_grp = NULL; + lstcon_batch_t *batch = NULL; + + /* + * verify that a batch of the given name exists, and the groups + * that will be part of the batch exist and have at least one + * active node + */ + rc = lstcon_verify_batch(batch_name, &batch); + if (rc != 0) + goto out; + + rc = lstcon_verify_group(src_name, &src_grp); + if (rc != 0) + goto out; + + rc = lstcon_verify_group(dst_name, &dst_grp); + if (rc != 0) + goto out; + if (dst_grp->grp_userland) *retp = 1; @@ -1310,7 +1346,8 @@ lstcon_test_add(char *name, int type, int loop, int concur, if (lstcon_trans_stat()->trs_rpc_errno != 0 || lstcon_trans_stat()->trs_fwk_errno != 0) - CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type, name); + CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type, + batch_name); /* add to test list anyway, so user can check what's going on */ list_add_tail(&test->tes_link, &batch->bat_test_list); diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h index e61b26687dbb..b57dbd80478a 100644 --- a/drivers/staging/lustre/lnet/selftest/console.h +++ b/drivers/staging/lustre/lnet/selftest/console.h @@ -224,9 +224,9 @@ extern int lstcon_group_stat(char *grp_name, int timeout, struct list_head *result_up); extern int lstcon_nodes_stat(int count, lnet_process_id_t *ids_up, int timeout, struct list_head *result_up); -extern int lstcon_test_add(char *name, int type, int loop, int concur, - int dist, int span, char *src_name, char * dst_name, +extern int lstcon_test_add(char *batch_name, int type, int loop, + int concur, int dist, int span, + char *src_name, char *dst_name, void *param, int paramlen, int *retp, struct list_head *result_up); - #endif