2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
45 #include <linux/atomic.h>
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
52 #include <scsi/scsi_transport_srp.h>
56 #define DRV_NAME "ib_srp"
57 #define PFX DRV_NAME ": "
58 #define DRV_VERSION "2.0"
59 #define DRV_RELDATE "July 26, 2015"
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr = true;
72 static bool register_always = true;
73 static int topspin_workarounds = 1;
75 module_param(srp_sg_tablesize, uint, 0444);
76 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
78 module_param(cmd_sg_entries, uint, 0444);
79 MODULE_PARM_DESC(cmd_sg_entries,
80 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
82 module_param(indirect_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(indirect_sg_entries,
84 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
86 module_param(allow_ext_sg, bool, 0444);
87 MODULE_PARM_DESC(allow_ext_sg,
88 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
90 module_param(topspin_workarounds, int, 0444);
91 MODULE_PARM_DESC(topspin_workarounds,
92 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
94 module_param(prefer_fr, bool, 0444);
95 MODULE_PARM_DESC(prefer_fr,
96 "Whether to use fast registration if both FMR and fast registration are supported");
98 module_param(register_always, bool, 0444);
99 MODULE_PARM_DESC(register_always,
100 "Use memory registration even for contiguous memory regions");
102 static const struct kernel_param_ops srp_tmo_ops;
104 static int srp_reconnect_delay = 10;
105 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
107 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
109 static int srp_fast_io_fail_tmo = 15;
110 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
112 MODULE_PARM_DESC(fast_io_fail_tmo,
113 "Number of seconds between the observation of a transport"
114 " layer error and failing all I/O. \"off\" means that this"
115 " functionality is disabled.");
117 static int srp_dev_loss_tmo = 600;
118 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
120 MODULE_PARM_DESC(dev_loss_tmo,
121 "Maximum number of seconds that the SRP transport should"
122 " insulate transport layer errors. After this time has been"
123 " exceeded the SCSI host is removed. Should be"
124 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
125 " if fast_io_fail_tmo has not been set. \"off\" means that"
126 " this functionality is disabled.");
128 static unsigned ch_count;
129 module_param(ch_count, uint, 0444);
130 MODULE_PARM_DESC(ch_count,
131 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
133 static void srp_add_one(struct ib_device *device);
134 static void srp_remove_one(struct ib_device *device, void *client_data);
135 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
136 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
137 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
139 static struct scsi_transport_template *ib_srp_transport_template;
140 static struct workqueue_struct *srp_remove_wq;
142 static struct ib_client srp_client = {
145 .remove = srp_remove_one
148 static struct ib_sa_client srp_sa_client;
150 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
152 int tmo = *(int *)kp->arg;
155 return sprintf(buffer, "%d", tmo);
157 return sprintf(buffer, "off");
160 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
164 res = srp_parse_tmo(&tmo, val);
168 if (kp->arg == &srp_reconnect_delay)
169 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
171 else if (kp->arg == &srp_fast_io_fail_tmo)
172 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
174 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
178 *(int *)kp->arg = tmo;
184 static const struct kernel_param_ops srp_tmo_ops = {
189 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
191 return (struct srp_target_port *) host->hostdata;
194 static const char *srp_target_info(struct Scsi_Host *host)
196 return host_to_target(host)->target_name;
199 static int srp_target_is_topspin(struct srp_target_port *target)
201 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
202 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
204 return topspin_workarounds &&
205 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
206 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
209 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
211 enum dma_data_direction direction)
215 iu = kmalloc(sizeof *iu, gfp_mask);
219 iu->buf = kzalloc(size, gfp_mask);
223 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
225 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
229 iu->direction = direction;
241 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
246 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
252 static void srp_qp_event(struct ib_event *event, void *context)
254 pr_debug("QP event %s (%d)\n",
255 ib_event_msg(event->event), event->event);
258 static int srp_init_qp(struct srp_target_port *target,
261 struct ib_qp_attr *attr;
264 attr = kmalloc(sizeof *attr, GFP_KERNEL);
268 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
269 target->srp_host->port,
270 be16_to_cpu(target->pkey),
275 attr->qp_state = IB_QPS_INIT;
276 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
277 IB_ACCESS_REMOTE_WRITE);
278 attr->port_num = target->srp_host->port;
280 ret = ib_modify_qp(qp, attr,
291 static int srp_new_cm_id(struct srp_rdma_ch *ch)
293 struct srp_target_port *target = ch->target;
294 struct ib_cm_id *new_cm_id;
296 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
298 if (IS_ERR(new_cm_id))
299 return PTR_ERR(new_cm_id);
302 ib_destroy_cm_id(ch->cm_id);
303 ch->cm_id = new_cm_id;
304 ch->path.sgid = target->sgid;
305 ch->path.dgid = target->orig_dgid;
306 ch->path.pkey = target->pkey;
307 ch->path.service_id = target->service_id;
312 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
314 struct srp_device *dev = target->srp_host->srp_dev;
315 struct ib_fmr_pool_param fmr_param;
317 memset(&fmr_param, 0, sizeof(fmr_param));
318 fmr_param.pool_size = target->scsi_host->can_queue;
319 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
321 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
322 fmr_param.page_shift = ilog2(dev->mr_page_size);
323 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
324 IB_ACCESS_REMOTE_WRITE |
325 IB_ACCESS_REMOTE_READ);
327 return ib_create_fmr_pool(dev->pd, &fmr_param);
331 * srp_destroy_fr_pool() - free the resources owned by a pool
332 * @pool: Fast registration pool to be destroyed.
334 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
337 struct srp_fr_desc *d;
342 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
350 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
351 * @device: IB device to allocate fast registration descriptors for.
352 * @pd: Protection domain associated with the FR descriptors.
353 * @pool_size: Number of descriptors to allocate.
354 * @max_page_list_len: Maximum fast registration work request page list length.
356 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
357 struct ib_pd *pd, int pool_size,
358 int max_page_list_len)
360 struct srp_fr_pool *pool;
361 struct srp_fr_desc *d;
363 int i, ret = -EINVAL;
368 pool = kzalloc(sizeof(struct srp_fr_pool) +
369 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
372 pool->size = pool_size;
373 pool->max_page_list_len = max_page_list_len;
374 spin_lock_init(&pool->lock);
375 INIT_LIST_HEAD(&pool->free_list);
377 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
378 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
385 list_add_tail(&d->entry, &pool->free_list);
392 srp_destroy_fr_pool(pool);
400 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
401 * @pool: Pool to obtain descriptor from.
403 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
405 struct srp_fr_desc *d = NULL;
408 spin_lock_irqsave(&pool->lock, flags);
409 if (!list_empty(&pool->free_list)) {
410 d = list_first_entry(&pool->free_list, typeof(*d), entry);
413 spin_unlock_irqrestore(&pool->lock, flags);
419 * srp_fr_pool_put() - put an FR descriptor back in the free list
420 * @pool: Pool the descriptor was allocated from.
421 * @desc: Pointer to an array of fast registration descriptor pointers.
422 * @n: Number of descriptors to put back.
424 * Note: The caller must already have queued an invalidation request for
425 * desc->mr->rkey before calling this function.
427 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
433 spin_lock_irqsave(&pool->lock, flags);
434 for (i = 0; i < n; i++)
435 list_add(&desc[i]->entry, &pool->free_list);
436 spin_unlock_irqrestore(&pool->lock, flags);
439 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
441 struct srp_device *dev = target->srp_host->srp_dev;
443 return srp_create_fr_pool(dev->dev, dev->pd,
444 target->scsi_host->can_queue,
445 dev->max_pages_per_mr);
449 * srp_destroy_qp() - destroy an RDMA queue pair
450 * @ch: SRP RDMA channel.
452 * Change a queue pair into the error state and wait until all receive
453 * completions have been processed before destroying it. This avoids that
454 * the receive completion handler can access the queue pair while it is
457 static void srp_destroy_qp(struct srp_rdma_ch *ch)
459 static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
460 static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
461 struct ib_recv_wr *bad_wr;
464 /* Destroying a QP and reusing ch->done is only safe if not connected */
465 WARN_ON_ONCE(ch->connected);
467 ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
468 WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
472 init_completion(&ch->done);
473 ret = ib_post_recv(ch->qp, &wr, &bad_wr);
474 WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
476 wait_for_completion(&ch->done);
479 ib_destroy_qp(ch->qp);
482 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
484 struct srp_target_port *target = ch->target;
485 struct srp_device *dev = target->srp_host->srp_dev;
486 struct ib_qp_init_attr *init_attr;
487 struct ib_cq *recv_cq, *send_cq;
489 struct ib_fmr_pool *fmr_pool = NULL;
490 struct srp_fr_pool *fr_pool = NULL;
491 const int m = dev->use_fast_reg ? 3 : 1;
492 struct ib_cq_init_attr cq_attr = {};
495 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
499 /* + 1 for SRP_LAST_WR_ID */
500 cq_attr.cqe = target->queue_size + 1;
501 cq_attr.comp_vector = ch->comp_vector;
502 recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
504 if (IS_ERR(recv_cq)) {
505 ret = PTR_ERR(recv_cq);
509 cq_attr.cqe = m * target->queue_size;
510 cq_attr.comp_vector = ch->comp_vector;
511 send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
513 if (IS_ERR(send_cq)) {
514 ret = PTR_ERR(send_cq);
518 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
520 init_attr->event_handler = srp_qp_event;
521 init_attr->cap.max_send_wr = m * target->queue_size;
522 init_attr->cap.max_recv_wr = target->queue_size + 1;
523 init_attr->cap.max_recv_sge = 1;
524 init_attr->cap.max_send_sge = 1;
525 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
526 init_attr->qp_type = IB_QPT_RC;
527 init_attr->send_cq = send_cq;
528 init_attr->recv_cq = recv_cq;
530 qp = ib_create_qp(dev->pd, init_attr);
536 ret = srp_init_qp(target, qp);
540 if (dev->use_fast_reg) {
541 fr_pool = srp_alloc_fr_pool(target);
542 if (IS_ERR(fr_pool)) {
543 ret = PTR_ERR(fr_pool);
544 shost_printk(KERN_WARNING, target->scsi_host, PFX
545 "FR pool allocation failed (%d)\n", ret);
548 } else if (dev->use_fmr) {
549 fmr_pool = srp_alloc_fmr_pool(target);
550 if (IS_ERR(fmr_pool)) {
551 ret = PTR_ERR(fmr_pool);
552 shost_printk(KERN_WARNING, target->scsi_host, PFX
553 "FMR pool allocation failed (%d)\n", ret);
561 ib_destroy_cq(ch->recv_cq);
563 ib_destroy_cq(ch->send_cq);
566 ch->recv_cq = recv_cq;
567 ch->send_cq = send_cq;
569 if (dev->use_fast_reg) {
571 srp_destroy_fr_pool(ch->fr_pool);
572 ch->fr_pool = fr_pool;
573 } else if (dev->use_fmr) {
575 ib_destroy_fmr_pool(ch->fmr_pool);
576 ch->fmr_pool = fmr_pool;
586 ib_destroy_cq(send_cq);
589 ib_destroy_cq(recv_cq);
597 * Note: this function may be called without srp_alloc_iu_bufs() having been
598 * invoked. Hence the ch->[rt]x_ring checks.
600 static void srp_free_ch_ib(struct srp_target_port *target,
601 struct srp_rdma_ch *ch)
603 struct srp_device *dev = target->srp_host->srp_dev;
610 ib_destroy_cm_id(ch->cm_id);
614 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
618 if (dev->use_fast_reg) {
620 srp_destroy_fr_pool(ch->fr_pool);
621 } else if (dev->use_fmr) {
623 ib_destroy_fmr_pool(ch->fmr_pool);
626 ib_destroy_cq(ch->send_cq);
627 ib_destroy_cq(ch->recv_cq);
630 * Avoid that the SCSI error handler tries to use this channel after
631 * it has been freed. The SCSI error handler can namely continue
632 * trying to perform recovery actions after scsi_remove_host()
638 ch->send_cq = ch->recv_cq = NULL;
641 for (i = 0; i < target->queue_size; ++i)
642 srp_free_iu(target->srp_host, ch->rx_ring[i]);
647 for (i = 0; i < target->queue_size; ++i)
648 srp_free_iu(target->srp_host, ch->tx_ring[i]);
654 static void srp_path_rec_completion(int status,
655 struct ib_sa_path_rec *pathrec,
658 struct srp_rdma_ch *ch = ch_ptr;
659 struct srp_target_port *target = ch->target;
663 shost_printk(KERN_ERR, target->scsi_host,
664 PFX "Got failed path rec status %d\n", status);
670 static int srp_lookup_path(struct srp_rdma_ch *ch)
672 struct srp_target_port *target = ch->target;
675 ch->path.numb_path = 1;
677 init_completion(&ch->done);
679 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
680 target->srp_host->srp_dev->dev,
681 target->srp_host->port,
683 IB_SA_PATH_REC_SERVICE_ID |
684 IB_SA_PATH_REC_DGID |
685 IB_SA_PATH_REC_SGID |
686 IB_SA_PATH_REC_NUMB_PATH |
688 SRP_PATH_REC_TIMEOUT_MS,
690 srp_path_rec_completion,
691 ch, &ch->path_query);
692 if (ch->path_query_id < 0)
693 return ch->path_query_id;
695 ret = wait_for_completion_interruptible(&ch->done);
700 shost_printk(KERN_WARNING, target->scsi_host,
701 PFX "Path record query failed\n");
706 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
708 struct srp_target_port *target = ch->target;
710 struct ib_cm_req_param param;
711 struct srp_login_req priv;
715 req = kzalloc(sizeof *req, GFP_KERNEL);
719 req->param.primary_path = &ch->path;
720 req->param.alternate_path = NULL;
721 req->param.service_id = target->service_id;
722 req->param.qp_num = ch->qp->qp_num;
723 req->param.qp_type = ch->qp->qp_type;
724 req->param.private_data = &req->priv;
725 req->param.private_data_len = sizeof req->priv;
726 req->param.flow_control = 1;
728 get_random_bytes(&req->param.starting_psn, 4);
729 req->param.starting_psn &= 0xffffff;
732 * Pick some arbitrary defaults here; we could make these
733 * module parameters if anyone cared about setting them.
735 req->param.responder_resources = 4;
736 req->param.remote_cm_response_timeout = 20;
737 req->param.local_cm_response_timeout = 20;
738 req->param.retry_count = target->tl_retry_count;
739 req->param.rnr_retry_count = 7;
740 req->param.max_cm_retries = 15;
742 req->priv.opcode = SRP_LOGIN_REQ;
744 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
745 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
746 SRP_BUF_FORMAT_INDIRECT);
747 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
748 SRP_MULTICHAN_SINGLE);
750 * In the published SRP specification (draft rev. 16a), the
751 * port identifier format is 8 bytes of ID extension followed
752 * by 8 bytes of GUID. Older drafts put the two halves in the
753 * opposite order, so that the GUID comes first.
755 * Targets conforming to these obsolete drafts can be
756 * recognized by the I/O Class they report.
758 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
759 memcpy(req->priv.initiator_port_id,
760 &target->sgid.global.interface_id, 8);
761 memcpy(req->priv.initiator_port_id + 8,
762 &target->initiator_ext, 8);
763 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
764 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
766 memcpy(req->priv.initiator_port_id,
767 &target->initiator_ext, 8);
768 memcpy(req->priv.initiator_port_id + 8,
769 &target->sgid.global.interface_id, 8);
770 memcpy(req->priv.target_port_id, &target->id_ext, 8);
771 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
775 * Topspin/Cisco SRP targets will reject our login unless we
776 * zero out the first 8 bytes of our initiator port ID and set
777 * the second 8 bytes to the local node GUID.
779 if (srp_target_is_topspin(target)) {
780 shost_printk(KERN_DEBUG, target->scsi_host,
781 PFX "Topspin/Cisco initiator port ID workaround "
782 "activated for target GUID %016llx\n",
783 be64_to_cpu(target->ioc_guid));
784 memset(req->priv.initiator_port_id, 0, 8);
785 memcpy(req->priv.initiator_port_id + 8,
786 &target->srp_host->srp_dev->dev->node_guid, 8);
789 status = ib_send_cm_req(ch->cm_id, &req->param);
796 static bool srp_queue_remove_work(struct srp_target_port *target)
798 bool changed = false;
800 spin_lock_irq(&target->lock);
801 if (target->state != SRP_TARGET_REMOVED) {
802 target->state = SRP_TARGET_REMOVED;
805 spin_unlock_irq(&target->lock);
808 queue_work(srp_remove_wq, &target->remove_work);
813 static void srp_disconnect_target(struct srp_target_port *target)
815 struct srp_rdma_ch *ch;
818 /* XXX should send SRP_I_LOGOUT request */
820 for (i = 0; i < target->ch_count; i++) {
822 ch->connected = false;
823 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
824 shost_printk(KERN_DEBUG, target->scsi_host,
825 PFX "Sending CM DREQ failed\n");
830 static void srp_free_req_data(struct srp_target_port *target,
831 struct srp_rdma_ch *ch)
833 struct srp_device *dev = target->srp_host->srp_dev;
834 struct ib_device *ibdev = dev->dev;
835 struct srp_request *req;
841 for (i = 0; i < target->req_ring_size; ++i) {
842 req = &ch->req_ring[i];
843 if (dev->use_fast_reg) {
846 kfree(req->fmr_list);
847 kfree(req->map_page);
849 if (req->indirect_dma_addr) {
850 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
851 target->indirect_size,
854 kfree(req->indirect_desc);
861 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
863 struct srp_target_port *target = ch->target;
864 struct srp_device *srp_dev = target->srp_host->srp_dev;
865 struct ib_device *ibdev = srp_dev->dev;
866 struct srp_request *req;
869 int i, ret = -ENOMEM;
871 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
876 for (i = 0; i < target->req_ring_size; ++i) {
877 req = &ch->req_ring[i];
878 mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
882 if (srp_dev->use_fast_reg) {
883 req->fr_list = mr_list;
885 req->fmr_list = mr_list;
886 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
887 sizeof(void *), GFP_KERNEL);
891 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
892 if (!req->indirect_desc)
895 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
896 target->indirect_size,
898 if (ib_dma_mapping_error(ibdev, dma_addr))
901 req->indirect_dma_addr = dma_addr;
910 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
911 * @shost: SCSI host whose attributes to remove from sysfs.
913 * Note: Any attributes defined in the host template and that did not exist
914 * before invocation of this function will be ignored.
916 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
918 struct device_attribute **attr;
920 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
921 device_remove_file(&shost->shost_dev, *attr);
924 static void srp_remove_target(struct srp_target_port *target)
926 struct srp_rdma_ch *ch;
929 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
931 srp_del_scsi_host_attr(target->scsi_host);
932 srp_rport_get(target->rport);
933 srp_remove_host(target->scsi_host);
934 scsi_remove_host(target->scsi_host);
935 srp_stop_rport_timers(target->rport);
936 srp_disconnect_target(target);
937 for (i = 0; i < target->ch_count; i++) {
939 srp_free_ch_ib(target, ch);
941 cancel_work_sync(&target->tl_err_work);
942 srp_rport_put(target->rport);
943 for (i = 0; i < target->ch_count; i++) {
945 srp_free_req_data(target, ch);
950 spin_lock(&target->srp_host->target_lock);
951 list_del(&target->list);
952 spin_unlock(&target->srp_host->target_lock);
954 scsi_host_put(target->scsi_host);
957 static void srp_remove_work(struct work_struct *work)
959 struct srp_target_port *target =
960 container_of(work, struct srp_target_port, remove_work);
962 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
964 srp_remove_target(target);
967 static void srp_rport_delete(struct srp_rport *rport)
969 struct srp_target_port *target = rport->lld_data;
971 srp_queue_remove_work(target);
975 * srp_connected_ch() - number of connected channels
976 * @target: SRP target port.
978 static int srp_connected_ch(struct srp_target_port *target)
982 for (i = 0; i < target->ch_count; i++)
983 c += target->ch[i].connected;
988 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
990 struct srp_target_port *target = ch->target;
993 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
995 ret = srp_lookup_path(ch);
1000 init_completion(&ch->done);
1001 ret = srp_send_req(ch, multich);
1004 ret = wait_for_completion_interruptible(&ch->done);
1009 * The CM event handling code will set status to
1010 * SRP_PORT_REDIRECT if we get a port redirect REJ
1011 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1012 * redirect REJ back.
1017 ch->connected = true;
1020 case SRP_PORT_REDIRECT:
1021 ret = srp_lookup_path(ch);
1026 case SRP_DLID_REDIRECT:
1029 case SRP_STALE_CONN:
1030 shost_printk(KERN_ERR, target->scsi_host, PFX
1031 "giving up on stale connection\n");
1041 return ret <= 0 ? ret : -ENODEV;
1044 static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1046 struct ib_send_wr *bad_wr;
1047 struct ib_send_wr wr = {
1048 .opcode = IB_WR_LOCAL_INV,
1049 .wr_id = LOCAL_INV_WR_ID_MASK,
1053 .ex.invalidate_rkey = rkey,
1056 return ib_post_send(ch->qp, &wr, &bad_wr);
1059 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1060 struct srp_rdma_ch *ch,
1061 struct srp_request *req)
1063 struct srp_target_port *target = ch->target;
1064 struct srp_device *dev = target->srp_host->srp_dev;
1065 struct ib_device *ibdev = dev->dev;
1068 if (!scsi_sglist(scmnd) ||
1069 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1070 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1073 if (dev->use_fast_reg) {
1074 struct srp_fr_desc **pfr;
1076 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1077 res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1079 shost_printk(KERN_ERR, target->scsi_host, PFX
1080 "Queueing INV WR for rkey %#x failed (%d)\n",
1081 (*pfr)->mr->rkey, res);
1082 queue_work(system_long_wq,
1083 &target->tl_err_work);
1087 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1089 } else if (dev->use_fmr) {
1090 struct ib_pool_fmr **pfmr;
1092 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1093 ib_fmr_pool_unmap(*pfmr);
1096 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1097 scmnd->sc_data_direction);
1101 * srp_claim_req - Take ownership of the scmnd associated with a request.
1102 * @ch: SRP RDMA channel.
1103 * @req: SRP request.
1104 * @sdev: If not NULL, only take ownership for this SCSI device.
1105 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1106 * ownership of @req->scmnd if it equals @scmnd.
1109 * Either NULL or a pointer to the SCSI command the caller became owner of.
1111 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1112 struct srp_request *req,
1113 struct scsi_device *sdev,
1114 struct scsi_cmnd *scmnd)
1116 unsigned long flags;
1118 spin_lock_irqsave(&ch->lock, flags);
1120 (!sdev || req->scmnd->device == sdev) &&
1121 (!scmnd || req->scmnd == scmnd)) {
1127 spin_unlock_irqrestore(&ch->lock, flags);
1133 * srp_free_req() - Unmap data and add request to the free request list.
1134 * @ch: SRP RDMA channel.
1135 * @req: Request to be freed.
1136 * @scmnd: SCSI command associated with @req.
1137 * @req_lim_delta: Amount to be added to @target->req_lim.
1139 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1140 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1142 unsigned long flags;
1144 srp_unmap_data(scmnd, ch, req);
1146 spin_lock_irqsave(&ch->lock, flags);
1147 ch->req_lim += req_lim_delta;
1148 spin_unlock_irqrestore(&ch->lock, flags);
1151 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1152 struct scsi_device *sdev, int result)
1154 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1157 srp_free_req(ch, req, scmnd, 0);
1158 scmnd->result = result;
1159 scmnd->scsi_done(scmnd);
1163 static void srp_terminate_io(struct srp_rport *rport)
1165 struct srp_target_port *target = rport->lld_data;
1166 struct srp_rdma_ch *ch;
1167 struct Scsi_Host *shost = target->scsi_host;
1168 struct scsi_device *sdev;
1172 * Invoking srp_terminate_io() while srp_queuecommand() is running
1173 * is not safe. Hence the warning statement below.
1175 shost_for_each_device(sdev, shost)
1176 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1178 for (i = 0; i < target->ch_count; i++) {
1179 ch = &target->ch[i];
1181 for (j = 0; j < target->req_ring_size; ++j) {
1182 struct srp_request *req = &ch->req_ring[j];
1184 srp_finish_req(ch, req, NULL,
1185 DID_TRANSPORT_FAILFAST << 16);
1191 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1192 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1193 * srp_reset_device() or srp_reset_host() calls will occur while this function
1194 * is in progress. One way to realize that is not to call this function
1195 * directly but to call srp_reconnect_rport() instead since that last function
1196 * serializes calls of this function via rport->mutex and also blocks
1197 * srp_queuecommand() calls before invoking this function.
1199 static int srp_rport_reconnect(struct srp_rport *rport)
1201 struct srp_target_port *target = rport->lld_data;
1202 struct srp_rdma_ch *ch;
1204 bool multich = false;
1206 srp_disconnect_target(target);
1208 if (target->state == SRP_TARGET_SCANNING)
1212 * Now get a new local CM ID so that we avoid confusing the target in
1213 * case things are really fouled up. Doing so also ensures that all CM
1214 * callbacks will have finished before a new QP is allocated.
1216 for (i = 0; i < target->ch_count; i++) {
1217 ch = &target->ch[i];
1218 ret += srp_new_cm_id(ch);
1220 for (i = 0; i < target->ch_count; i++) {
1221 ch = &target->ch[i];
1222 for (j = 0; j < target->req_ring_size; ++j) {
1223 struct srp_request *req = &ch->req_ring[j];
1225 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1228 for (i = 0; i < target->ch_count; i++) {
1229 ch = &target->ch[i];
1231 * Whether or not creating a new CM ID succeeded, create a new
1232 * QP. This guarantees that all completion callback function
1233 * invocations have finished before request resetting starts.
1235 ret += srp_create_ch_ib(ch);
1237 INIT_LIST_HEAD(&ch->free_tx);
1238 for (j = 0; j < target->queue_size; ++j)
1239 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1242 target->qp_in_error = false;
1244 for (i = 0; i < target->ch_count; i++) {
1245 ch = &target->ch[i];
1248 ret = srp_connect_ch(ch, multich);
1253 shost_printk(KERN_INFO, target->scsi_host,
1254 PFX "reconnect succeeded\n");
1259 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1260 unsigned int dma_len, u32 rkey)
1262 struct srp_direct_buf *desc = state->desc;
1264 WARN_ON_ONCE(!dma_len);
1266 desc->va = cpu_to_be64(dma_addr);
1267 desc->key = cpu_to_be32(rkey);
1268 desc->len = cpu_to_be32(dma_len);
1270 state->total_len += dma_len;
1275 static int srp_map_finish_fmr(struct srp_map_state *state,
1276 struct srp_rdma_ch *ch)
1278 struct srp_target_port *target = ch->target;
1279 struct srp_device *dev = target->srp_host->srp_dev;
1280 struct ib_pool_fmr *fmr;
1283 if (state->fmr.next >= state->fmr.end)
1286 WARN_ON_ONCE(!dev->use_fmr);
1288 if (state->npages == 0)
1291 if (state->npages == 1 && target->global_mr) {
1292 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1293 target->global_mr->rkey);
1297 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1298 state->npages, io_addr);
1300 return PTR_ERR(fmr);
1302 *state->fmr.next++ = fmr;
1305 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1306 state->dma_len, fmr->fmr->rkey);
1315 static int srp_map_finish_fr(struct srp_map_state *state,
1316 struct srp_rdma_ch *ch, int sg_nents)
1318 struct srp_target_port *target = ch->target;
1319 struct srp_device *dev = target->srp_host->srp_dev;
1320 struct ib_send_wr *bad_wr;
1321 struct ib_reg_wr wr;
1322 struct srp_fr_desc *desc;
1326 if (state->fr.next >= state->fr.end)
1329 WARN_ON_ONCE(!dev->use_fast_reg);
1334 if (sg_nents == 1 && target->global_mr) {
1335 srp_map_desc(state, sg_dma_address(state->sg),
1336 sg_dma_len(state->sg),
1337 target->global_mr->rkey);
1341 desc = srp_fr_pool_get(ch->fr_pool);
1345 rkey = ib_inc_rkey(desc->mr->rkey);
1346 ib_update_fast_reg_key(desc->mr, rkey);
1348 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size);
1349 if (unlikely(n < 0))
1353 wr.wr.opcode = IB_WR_REG_MR;
1354 wr.wr.wr_id = FAST_REG_WR_ID_MASK;
1356 wr.wr.send_flags = 0;
1358 wr.key = desc->mr->rkey;
1359 wr.access = (IB_ACCESS_LOCAL_WRITE |
1360 IB_ACCESS_REMOTE_READ |
1361 IB_ACCESS_REMOTE_WRITE);
1363 *state->fr.next++ = desc;
1366 srp_map_desc(state, desc->mr->iova,
1367 desc->mr->length, desc->mr->rkey);
1369 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1376 static int srp_map_sg_entry(struct srp_map_state *state,
1377 struct srp_rdma_ch *ch,
1378 struct scatterlist *sg, int sg_index)
1380 struct srp_target_port *target = ch->target;
1381 struct srp_device *dev = target->srp_host->srp_dev;
1382 struct ib_device *ibdev = dev->dev;
1383 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1384 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1385 unsigned int len = 0;
1388 WARN_ON_ONCE(!dma_len);
1391 unsigned offset = dma_addr & ~dev->mr_page_mask;
1392 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1393 ret = srp_map_finish_fmr(state, ch);
1398 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1401 state->base_dma_addr = dma_addr;
1402 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1403 state->dma_len += len;
1409 * If the last entry of the MR wasn't a full page, then we need to
1410 * close it out and start a new one -- we can only merge at page
1414 if (len != dev->mr_page_size)
1415 ret = srp_map_finish_fmr(state, ch);
1419 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1420 struct srp_request *req, struct scatterlist *scat,
1423 struct scatterlist *sg;
1426 state->desc = req->indirect_desc;
1427 state->pages = req->map_page;
1428 state->fmr.next = req->fmr_list;
1429 state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
1431 for_each_sg(scat, sg, count, i) {
1432 ret = srp_map_sg_entry(state, ch, sg, i);
1437 ret = srp_map_finish_fmr(state, ch);
1441 req->nmdesc = state->nmdesc;
1446 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1447 struct srp_request *req, struct scatterlist *scat,
1450 state->desc = req->indirect_desc;
1451 state->fr.next = req->fr_list;
1452 state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
1458 n = srp_map_finish_fr(state, ch, count);
1459 if (unlikely(n < 0))
1463 for (i = 0; i < n; i++)
1464 state->sg = sg_next(state->sg);
1467 req->nmdesc = state->nmdesc;
1472 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1473 struct srp_request *req, struct scatterlist *scat,
1476 struct srp_target_port *target = ch->target;
1477 struct srp_device *dev = target->srp_host->srp_dev;
1478 struct scatterlist *sg;
1481 state->desc = req->indirect_desc;
1482 for_each_sg(scat, sg, count, i) {
1483 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1484 ib_sg_dma_len(dev->dev, sg),
1485 target->global_mr->rkey);
1488 req->nmdesc = state->nmdesc;
1494 * Register the indirect data buffer descriptor with the HCA.
1496 * Note: since the indirect data buffer descriptor has been allocated with
1497 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1500 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1501 void **next_mr, void **end_mr, u32 idb_len,
1504 struct srp_target_port *target = ch->target;
1505 struct srp_device *dev = target->srp_host->srp_dev;
1506 struct srp_map_state state;
1507 struct srp_direct_buf idb_desc;
1509 struct scatterlist idb_sg[1];
1512 memset(&state, 0, sizeof(state));
1513 memset(&idb_desc, 0, sizeof(idb_desc));
1514 state.gen.next = next_mr;
1515 state.gen.end = end_mr;
1516 state.desc = &idb_desc;
1517 state.base_dma_addr = req->indirect_dma_addr;
1518 state.dma_len = idb_len;
1520 if (dev->use_fast_reg) {
1522 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1523 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1524 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1525 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1527 ret = srp_map_finish_fr(&state, ch, 1);
1530 } else if (dev->use_fmr) {
1531 state.pages = idb_pages;
1532 state.pages[0] = (req->indirect_dma_addr &
1535 ret = srp_map_finish_fmr(&state, ch);
1542 *idb_rkey = idb_desc.key;
1547 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1548 struct srp_request *req)
1550 struct srp_target_port *target = ch->target;
1551 struct scatterlist *scat;
1552 struct srp_cmd *cmd = req->cmd->buf;
1553 int len, nents, count, ret;
1554 struct srp_device *dev;
1555 struct ib_device *ibdev;
1556 struct srp_map_state state;
1557 struct srp_indirect_buf *indirect_hdr;
1558 u32 idb_len, table_len;
1562 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1563 return sizeof (struct srp_cmd);
1565 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1566 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1567 shost_printk(KERN_WARNING, target->scsi_host,
1568 PFX "Unhandled data direction %d\n",
1569 scmnd->sc_data_direction);
1573 nents = scsi_sg_count(scmnd);
1574 scat = scsi_sglist(scmnd);
1576 dev = target->srp_host->srp_dev;
1579 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1580 if (unlikely(count == 0))
1583 fmt = SRP_DATA_DESC_DIRECT;
1584 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1586 if (count == 1 && target->global_mr) {
1588 * The midlayer only generated a single gather/scatter
1589 * entry, or DMA mapping coalesced everything to a
1590 * single entry. So a direct descriptor along with
1591 * the DMA MR suffices.
1593 struct srp_direct_buf *buf = (void *) cmd->add_data;
1595 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1596 buf->key = cpu_to_be32(target->global_mr->rkey);
1597 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1604 * We have more than one scatter/gather entry, so build our indirect
1605 * descriptor table, trying to merge as many entries as we can.
1607 indirect_hdr = (void *) cmd->add_data;
1609 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1610 target->indirect_size, DMA_TO_DEVICE);
1612 memset(&state, 0, sizeof(state));
1613 if (dev->use_fast_reg)
1614 srp_map_sg_fr(&state, ch, req, scat, count);
1615 else if (dev->use_fmr)
1616 srp_map_sg_fmr(&state, ch, req, scat, count);
1618 srp_map_sg_dma(&state, ch, req, scat, count);
1620 /* We've mapped the request, now pull as much of the indirect
1621 * descriptor table as we can into the command buffer. If this
1622 * target is not using an external indirect table, we are
1623 * guaranteed to fit into the command, as the SCSI layer won't
1624 * give us more S/G entries than we allow.
1626 if (state.ndesc == 1) {
1628 * Memory registration collapsed the sg-list into one entry,
1629 * so use a direct descriptor.
1631 struct srp_direct_buf *buf = (void *) cmd->add_data;
1633 *buf = req->indirect_desc[0];
1637 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1638 !target->allow_ext_sg)) {
1639 shost_printk(KERN_ERR, target->scsi_host,
1640 "Could not fit S/G list into SRP_CMD\n");
1644 count = min(state.ndesc, target->cmd_sg_cnt);
1645 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1646 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1648 fmt = SRP_DATA_DESC_INDIRECT;
1649 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1650 len += count * sizeof (struct srp_direct_buf);
1652 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1653 count * sizeof (struct srp_direct_buf));
1655 if (!target->global_mr) {
1656 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1657 idb_len, &idb_rkey);
1662 idb_rkey = cpu_to_be32(target->global_mr->rkey);
1665 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1666 indirect_hdr->table_desc.key = idb_rkey;
1667 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1668 indirect_hdr->len = cpu_to_be32(state.total_len);
1670 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1671 cmd->data_out_desc_cnt = count;
1673 cmd->data_in_desc_cnt = count;
1675 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1679 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1680 cmd->buf_fmt = fmt << 4;
1688 * Return an IU and possible credit to the free pool
1690 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1691 enum srp_iu_type iu_type)
1693 unsigned long flags;
1695 spin_lock_irqsave(&ch->lock, flags);
1696 list_add(&iu->list, &ch->free_tx);
1697 if (iu_type != SRP_IU_RSP)
1699 spin_unlock_irqrestore(&ch->lock, flags);
1703 * Must be called with ch->lock held to protect req_lim and free_tx.
1704 * If IU is not sent, it must be returned using srp_put_tx_iu().
1707 * An upper limit for the number of allocated information units for each
1709 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1710 * more than Scsi_Host.can_queue requests.
1711 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1712 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1713 * one unanswered SRP request to an initiator.
1715 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1716 enum srp_iu_type iu_type)
1718 struct srp_target_port *target = ch->target;
1719 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1722 srp_send_completion(ch->send_cq, ch);
1724 if (list_empty(&ch->free_tx))
1727 /* Initiator responses to target requests do not consume credits */
1728 if (iu_type != SRP_IU_RSP) {
1729 if (ch->req_lim <= rsv) {
1730 ++target->zero_req_lim;
1737 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1738 list_del(&iu->list);
1742 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1744 struct srp_target_port *target = ch->target;
1746 struct ib_send_wr wr, *bad_wr;
1748 list.addr = iu->dma;
1750 list.lkey = target->lkey;
1753 wr.wr_id = (uintptr_t) iu;
1756 wr.opcode = IB_WR_SEND;
1757 wr.send_flags = IB_SEND_SIGNALED;
1759 return ib_post_send(ch->qp, &wr, &bad_wr);
1762 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1764 struct srp_target_port *target = ch->target;
1765 struct ib_recv_wr wr, *bad_wr;
1768 list.addr = iu->dma;
1769 list.length = iu->size;
1770 list.lkey = target->lkey;
1773 wr.wr_id = (uintptr_t) iu;
1777 return ib_post_recv(ch->qp, &wr, &bad_wr);
1780 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1782 struct srp_target_port *target = ch->target;
1783 struct srp_request *req;
1784 struct scsi_cmnd *scmnd;
1785 unsigned long flags;
1787 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1788 spin_lock_irqsave(&ch->lock, flags);
1789 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1790 if (rsp->tag == ch->tsk_mgmt_tag) {
1791 ch->tsk_mgmt_status = -1;
1792 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1793 ch->tsk_mgmt_status = rsp->data[3];
1794 complete(&ch->tsk_mgmt_done);
1796 shost_printk(KERN_ERR, target->scsi_host,
1797 "Received tsk mgmt response too late for tag %#llx\n",
1800 spin_unlock_irqrestore(&ch->lock, flags);
1802 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1803 if (scmnd && scmnd->host_scribble) {
1804 req = (void *)scmnd->host_scribble;
1805 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1810 shost_printk(KERN_ERR, target->scsi_host,
1811 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1812 rsp->tag, ch - target->ch, ch->qp->qp_num);
1814 spin_lock_irqsave(&ch->lock, flags);
1815 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1816 spin_unlock_irqrestore(&ch->lock, flags);
1820 scmnd->result = rsp->status;
1822 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1823 memcpy(scmnd->sense_buffer, rsp->data +
1824 be32_to_cpu(rsp->resp_data_len),
1825 min_t(int, be32_to_cpu(rsp->sense_data_len),
1826 SCSI_SENSE_BUFFERSIZE));
1829 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1830 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1831 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1832 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1833 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1834 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1835 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1836 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1838 srp_free_req(ch, req, scmnd,
1839 be32_to_cpu(rsp->req_lim_delta));
1841 scmnd->host_scribble = NULL;
1842 scmnd->scsi_done(scmnd);
1846 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1849 struct srp_target_port *target = ch->target;
1850 struct ib_device *dev = target->srp_host->srp_dev->dev;
1851 unsigned long flags;
1855 spin_lock_irqsave(&ch->lock, flags);
1856 ch->req_lim += req_delta;
1857 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1858 spin_unlock_irqrestore(&ch->lock, flags);
1861 shost_printk(KERN_ERR, target->scsi_host, PFX
1862 "no IU available to send response\n");
1866 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1867 memcpy(iu->buf, rsp, len);
1868 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1870 err = srp_post_send(ch, iu, len);
1872 shost_printk(KERN_ERR, target->scsi_host, PFX
1873 "unable to post response: %d\n", err);
1874 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1880 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1881 struct srp_cred_req *req)
1883 struct srp_cred_rsp rsp = {
1884 .opcode = SRP_CRED_RSP,
1887 s32 delta = be32_to_cpu(req->req_lim_delta);
1889 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1890 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1891 "problems processing SRP_CRED_REQ\n");
1894 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1895 struct srp_aer_req *req)
1897 struct srp_target_port *target = ch->target;
1898 struct srp_aer_rsp rsp = {
1899 .opcode = SRP_AER_RSP,
1902 s32 delta = be32_to_cpu(req->req_lim_delta);
1904 shost_printk(KERN_ERR, target->scsi_host, PFX
1905 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1907 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1908 shost_printk(KERN_ERR, target->scsi_host, PFX
1909 "problems processing SRP_AER_REQ\n");
1912 static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
1914 struct srp_target_port *target = ch->target;
1915 struct ib_device *dev = target->srp_host->srp_dev->dev;
1916 struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1920 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1923 opcode = *(u8 *) iu->buf;
1926 shost_printk(KERN_ERR, target->scsi_host,
1927 PFX "recv completion, opcode 0x%02x\n", opcode);
1928 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1929 iu->buf, wc->byte_len, true);
1934 srp_process_rsp(ch, iu->buf);
1938 srp_process_cred_req(ch, iu->buf);
1942 srp_process_aer_req(ch, iu->buf);
1946 /* XXX Handle target logout */
1947 shost_printk(KERN_WARNING, target->scsi_host,
1948 PFX "Got target logout request\n");
1952 shost_printk(KERN_WARNING, target->scsi_host,
1953 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1957 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
1960 res = srp_post_recv(ch, iu);
1962 shost_printk(KERN_ERR, target->scsi_host,
1963 PFX "Recv failed with error code %d\n", res);
1967 * srp_tl_err_work() - handle a transport layer error
1968 * @work: Work structure embedded in an SRP target port.
1970 * Note: This function may get invoked before the rport has been created,
1971 * hence the target->rport test.
1973 static void srp_tl_err_work(struct work_struct *work)
1975 struct srp_target_port *target;
1977 target = container_of(work, struct srp_target_port, tl_err_work);
1979 srp_start_tl_fail_timers(target->rport);
1982 static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
1983 bool send_err, struct srp_rdma_ch *ch)
1985 struct srp_target_port *target = ch->target;
1987 if (wr_id == SRP_LAST_WR_ID) {
1988 complete(&ch->done);
1992 if (ch->connected && !target->qp_in_error) {
1993 if (wr_id & LOCAL_INV_WR_ID_MASK) {
1994 shost_printk(KERN_ERR, target->scsi_host, PFX
1995 "LOCAL_INV failed with status %s (%d)\n",
1996 ib_wc_status_msg(wc_status), wc_status);
1997 } else if (wr_id & FAST_REG_WR_ID_MASK) {
1998 shost_printk(KERN_ERR, target->scsi_host, PFX
1999 "FAST_REG_MR failed status %s (%d)\n",
2000 ib_wc_status_msg(wc_status), wc_status);
2002 shost_printk(KERN_ERR, target->scsi_host,
2003 PFX "failed %s status %s (%d) for iu %p\n",
2004 send_err ? "send" : "receive",
2005 ib_wc_status_msg(wc_status), wc_status,
2006 (void *)(uintptr_t)wr_id);
2008 queue_work(system_long_wq, &target->tl_err_work);
2010 target->qp_in_error = true;
2013 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
2015 struct srp_rdma_ch *ch = ch_ptr;
2018 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
2019 while (ib_poll_cq(cq, 1, &wc) > 0) {
2020 if (likely(wc.status == IB_WC_SUCCESS)) {
2021 srp_handle_recv(ch, &wc);
2023 srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
2028 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
2030 struct srp_rdma_ch *ch = ch_ptr;
2034 while (ib_poll_cq(cq, 1, &wc) > 0) {
2035 if (likely(wc.status == IB_WC_SUCCESS)) {
2036 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
2037 list_add(&iu->list, &ch->free_tx);
2039 srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
2044 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2046 struct srp_target_port *target = host_to_target(shost);
2047 struct srp_rport *rport = target->rport;
2048 struct srp_rdma_ch *ch;
2049 struct srp_request *req;
2051 struct srp_cmd *cmd;
2052 struct ib_device *dev;
2053 unsigned long flags;
2057 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2060 * The SCSI EH thread is the only context from which srp_queuecommand()
2061 * can get invoked for blocked devices (SDEV_BLOCK /
2062 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2063 * locking the rport mutex if invoked from inside the SCSI EH.
2066 mutex_lock(&rport->mutex);
2068 scmnd->result = srp_chkready(target->rport);
2069 if (unlikely(scmnd->result))
2072 WARN_ON_ONCE(scmnd->request->tag < 0);
2073 tag = blk_mq_unique_tag(scmnd->request);
2074 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2075 idx = blk_mq_unique_tag_to_tag(tag);
2076 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2077 dev_name(&shost->shost_gendev), tag, idx,
2078 target->req_ring_size);
2080 spin_lock_irqsave(&ch->lock, flags);
2081 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2082 spin_unlock_irqrestore(&ch->lock, flags);
2087 req = &ch->req_ring[idx];
2088 dev = target->srp_host->srp_dev->dev;
2089 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2092 scmnd->host_scribble = (void *) req;
2095 memset(cmd, 0, sizeof *cmd);
2097 cmd->opcode = SRP_CMD;
2098 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2100 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2105 len = srp_map_data(scmnd, ch, req);
2107 shost_printk(KERN_ERR, target->scsi_host,
2108 PFX "Failed to map data (%d)\n", len);
2110 * If we ran out of memory descriptors (-ENOMEM) because an
2111 * application is queuing many requests with more than
2112 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2113 * to reduce queue depth temporarily.
2115 scmnd->result = len == -ENOMEM ?
2116 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2120 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2123 if (srp_post_send(ch, iu, len)) {
2124 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2132 mutex_unlock(&rport->mutex);
2137 srp_unmap_data(scmnd, ch, req);
2140 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2143 * Avoid that the loops that iterate over the request ring can
2144 * encounter a dangling SCSI command pointer.
2149 if (scmnd->result) {
2150 scmnd->scsi_done(scmnd);
2153 ret = SCSI_MLQUEUE_HOST_BUSY;
2160 * Note: the resources allocated in this function are freed in
2163 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2165 struct srp_target_port *target = ch->target;
2168 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2172 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2177 for (i = 0; i < target->queue_size; ++i) {
2178 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2180 GFP_KERNEL, DMA_FROM_DEVICE);
2181 if (!ch->rx_ring[i])
2185 for (i = 0; i < target->queue_size; ++i) {
2186 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2188 GFP_KERNEL, DMA_TO_DEVICE);
2189 if (!ch->tx_ring[i])
2192 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2198 for (i = 0; i < target->queue_size; ++i) {
2199 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2200 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2213 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2215 uint64_t T_tr_ns, max_compl_time_ms;
2216 uint32_t rq_tmo_jiffies;
2219 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2220 * table 91), both the QP timeout and the retry count have to be set
2221 * for RC QP's during the RTR to RTS transition.
2223 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2224 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2227 * Set target->rq_tmo_jiffies to one second more than the largest time
2228 * it can take before an error completion is generated. See also
2229 * C9-140..142 in the IBTA spec for more information about how to
2230 * convert the QP Local ACK Timeout value to nanoseconds.
2232 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2233 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2234 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2235 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2237 return rq_tmo_jiffies;
2240 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2241 const struct srp_login_rsp *lrsp,
2242 struct srp_rdma_ch *ch)
2244 struct srp_target_port *target = ch->target;
2245 struct ib_qp_attr *qp_attr = NULL;
2250 if (lrsp->opcode == SRP_LOGIN_RSP) {
2251 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2252 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2255 * Reserve credits for task management so we don't
2256 * bounce requests back to the SCSI mid-layer.
2258 target->scsi_host->can_queue
2259 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2260 target->scsi_host->can_queue);
2261 target->scsi_host->cmd_per_lun
2262 = min_t(int, target->scsi_host->can_queue,
2263 target->scsi_host->cmd_per_lun);
2265 shost_printk(KERN_WARNING, target->scsi_host,
2266 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2272 ret = srp_alloc_iu_bufs(ch);
2278 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2282 qp_attr->qp_state = IB_QPS_RTR;
2283 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2287 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2291 for (i = 0; i < target->queue_size; i++) {
2292 struct srp_iu *iu = ch->rx_ring[i];
2294 ret = srp_post_recv(ch, iu);
2299 qp_attr->qp_state = IB_QPS_RTS;
2300 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2304 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2306 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2310 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2319 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2320 struct ib_cm_event *event,
2321 struct srp_rdma_ch *ch)
2323 struct srp_target_port *target = ch->target;
2324 struct Scsi_Host *shost = target->scsi_host;
2325 struct ib_class_port_info *cpi;
2328 switch (event->param.rej_rcvd.reason) {
2329 case IB_CM_REJ_PORT_CM_REDIRECT:
2330 cpi = event->param.rej_rcvd.ari;
2331 ch->path.dlid = cpi->redirect_lid;
2332 ch->path.pkey = cpi->redirect_pkey;
2333 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2334 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2336 ch->status = ch->path.dlid ?
2337 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2340 case IB_CM_REJ_PORT_REDIRECT:
2341 if (srp_target_is_topspin(target)) {
2343 * Topspin/Cisco SRP gateways incorrectly send
2344 * reject reason code 25 when they mean 24
2347 memcpy(ch->path.dgid.raw,
2348 event->param.rej_rcvd.ari, 16);
2350 shost_printk(KERN_DEBUG, shost,
2351 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2352 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2353 be64_to_cpu(ch->path.dgid.global.interface_id));
2355 ch->status = SRP_PORT_REDIRECT;
2357 shost_printk(KERN_WARNING, shost,
2358 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2359 ch->status = -ECONNRESET;
2363 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2364 shost_printk(KERN_WARNING, shost,
2365 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2366 ch->status = -ECONNRESET;
2369 case IB_CM_REJ_CONSUMER_DEFINED:
2370 opcode = *(u8 *) event->private_data;
2371 if (opcode == SRP_LOGIN_REJ) {
2372 struct srp_login_rej *rej = event->private_data;
2373 u32 reason = be32_to_cpu(rej->reason);
2375 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2376 shost_printk(KERN_WARNING, shost,
2377 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2379 shost_printk(KERN_WARNING, shost, PFX
2380 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2382 target->orig_dgid.raw, reason);
2384 shost_printk(KERN_WARNING, shost,
2385 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2386 " opcode 0x%02x\n", opcode);
2387 ch->status = -ECONNRESET;
2390 case IB_CM_REJ_STALE_CONN:
2391 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2392 ch->status = SRP_STALE_CONN;
2396 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2397 event->param.rej_rcvd.reason);
2398 ch->status = -ECONNRESET;
2402 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2404 struct srp_rdma_ch *ch = cm_id->context;
2405 struct srp_target_port *target = ch->target;
2408 switch (event->event) {
2409 case IB_CM_REQ_ERROR:
2410 shost_printk(KERN_DEBUG, target->scsi_host,
2411 PFX "Sending CM REQ failed\n");
2413 ch->status = -ECONNRESET;
2416 case IB_CM_REP_RECEIVED:
2418 srp_cm_rep_handler(cm_id, event->private_data, ch);
2421 case IB_CM_REJ_RECEIVED:
2422 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2425 srp_cm_rej_handler(cm_id, event, ch);
2428 case IB_CM_DREQ_RECEIVED:
2429 shost_printk(KERN_WARNING, target->scsi_host,
2430 PFX "DREQ received - connection closed\n");
2431 ch->connected = false;
2432 if (ib_send_cm_drep(cm_id, NULL, 0))
2433 shost_printk(KERN_ERR, target->scsi_host,
2434 PFX "Sending CM DREP failed\n");
2435 queue_work(system_long_wq, &target->tl_err_work);
2438 case IB_CM_TIMEWAIT_EXIT:
2439 shost_printk(KERN_ERR, target->scsi_host,
2440 PFX "connection closed\n");
2446 case IB_CM_MRA_RECEIVED:
2447 case IB_CM_DREQ_ERROR:
2448 case IB_CM_DREP_RECEIVED:
2452 shost_printk(KERN_WARNING, target->scsi_host,
2453 PFX "Unhandled CM event %d\n", event->event);
2458 complete(&ch->done);
2464 * srp_change_queue_depth - setting device queue depth
2465 * @sdev: scsi device struct
2466 * @qdepth: requested queue depth
2468 * Returns queue depth.
2471 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2473 if (!sdev->tagged_supported)
2475 return scsi_change_queue_depth(sdev, qdepth);
2478 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2479 u8 func, u8 *status)
2481 struct srp_target_port *target = ch->target;
2482 struct srp_rport *rport = target->rport;
2483 struct ib_device *dev = target->srp_host->srp_dev->dev;
2485 struct srp_tsk_mgmt *tsk_mgmt;
2488 if (!ch->connected || target->qp_in_error)
2492 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2493 * invoked while a task management function is being sent.
2495 mutex_lock(&rport->mutex);
2496 spin_lock_irq(&ch->lock);
2497 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2498 spin_unlock_irq(&ch->lock);
2501 mutex_unlock(&rport->mutex);
2506 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2509 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2511 tsk_mgmt->opcode = SRP_TSK_MGMT;
2512 int_to_scsilun(lun, &tsk_mgmt->lun);
2513 tsk_mgmt->tsk_mgmt_func = func;
2514 tsk_mgmt->task_tag = req_tag;
2516 spin_lock_irq(&ch->lock);
2517 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2518 tsk_mgmt->tag = ch->tsk_mgmt_tag;
2519 spin_unlock_irq(&ch->lock);
2521 init_completion(&ch->tsk_mgmt_done);
2523 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2525 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2526 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2527 mutex_unlock(&rport->mutex);
2531 res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2532 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2533 if (res > 0 && status)
2534 *status = ch->tsk_mgmt_status;
2535 mutex_unlock(&rport->mutex);
2537 WARN_ON_ONCE(res < 0);
2539 return res > 0 ? 0 : -1;
2542 static int srp_abort(struct scsi_cmnd *scmnd)
2544 struct srp_target_port *target = host_to_target(scmnd->device->host);
2545 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2548 struct srp_rdma_ch *ch;
2551 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2555 tag = blk_mq_unique_tag(scmnd->request);
2556 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2557 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2559 ch = &target->ch[ch_idx];
2560 if (!srp_claim_req(ch, req, NULL, scmnd))
2562 shost_printk(KERN_ERR, target->scsi_host,
2563 "Sending SRP abort for tag %#x\n", tag);
2564 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2565 SRP_TSK_ABORT_TASK, NULL) == 0)
2567 else if (target->rport->state == SRP_RPORT_LOST)
2571 srp_free_req(ch, req, scmnd, 0);
2572 scmnd->result = DID_ABORT << 16;
2573 scmnd->scsi_done(scmnd);
2578 static int srp_reset_device(struct scsi_cmnd *scmnd)
2580 struct srp_target_port *target = host_to_target(scmnd->device->host);
2581 struct srp_rdma_ch *ch;
2585 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2587 ch = &target->ch[0];
2588 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2589 SRP_TSK_LUN_RESET, &status))
2594 for (i = 0; i < target->ch_count; i++) {
2595 ch = &target->ch[i];
2596 for (i = 0; i < target->req_ring_size; ++i) {
2597 struct srp_request *req = &ch->req_ring[i];
2599 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2606 static int srp_reset_host(struct scsi_cmnd *scmnd)
2608 struct srp_target_port *target = host_to_target(scmnd->device->host);
2610 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2612 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2615 static int srp_slave_configure(struct scsi_device *sdev)
2617 struct Scsi_Host *shost = sdev->host;
2618 struct srp_target_port *target = host_to_target(shost);
2619 struct request_queue *q = sdev->request_queue;
2620 unsigned long timeout;
2622 if (sdev->type == TYPE_DISK) {
2623 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2624 blk_queue_rq_timeout(q, timeout);
2630 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2633 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2635 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2638 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2641 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2643 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2646 static ssize_t show_service_id(struct device *dev,
2647 struct device_attribute *attr, char *buf)
2649 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2651 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2654 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2657 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2659 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2662 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2665 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2667 return sprintf(buf, "%pI6\n", target->sgid.raw);
2670 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2673 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2674 struct srp_rdma_ch *ch = &target->ch[0];
2676 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2679 static ssize_t show_orig_dgid(struct device *dev,
2680 struct device_attribute *attr, char *buf)
2682 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2684 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2687 static ssize_t show_req_lim(struct device *dev,
2688 struct device_attribute *attr, char *buf)
2690 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2691 struct srp_rdma_ch *ch;
2692 int i, req_lim = INT_MAX;
2694 for (i = 0; i < target->ch_count; i++) {
2695 ch = &target->ch[i];
2696 req_lim = min(req_lim, ch->req_lim);
2698 return sprintf(buf, "%d\n", req_lim);
2701 static ssize_t show_zero_req_lim(struct device *dev,
2702 struct device_attribute *attr, char *buf)
2704 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2706 return sprintf(buf, "%d\n", target->zero_req_lim);
2709 static ssize_t show_local_ib_port(struct device *dev,
2710 struct device_attribute *attr, char *buf)
2712 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2714 return sprintf(buf, "%d\n", target->srp_host->port);
2717 static ssize_t show_local_ib_device(struct device *dev,
2718 struct device_attribute *attr, char *buf)
2720 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2722 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2725 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2728 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2730 return sprintf(buf, "%d\n", target->ch_count);
2733 static ssize_t show_comp_vector(struct device *dev,
2734 struct device_attribute *attr, char *buf)
2736 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2738 return sprintf(buf, "%d\n", target->comp_vector);
2741 static ssize_t show_tl_retry_count(struct device *dev,
2742 struct device_attribute *attr, char *buf)
2744 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2746 return sprintf(buf, "%d\n", target->tl_retry_count);
2749 static ssize_t show_cmd_sg_entries(struct device *dev,
2750 struct device_attribute *attr, char *buf)
2752 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2754 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2757 static ssize_t show_allow_ext_sg(struct device *dev,
2758 struct device_attribute *attr, char *buf)
2760 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2762 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2765 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2766 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2767 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2768 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2769 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2770 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2771 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2772 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2773 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2774 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2775 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2776 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2777 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2778 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2779 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2780 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2782 static struct device_attribute *srp_host_attrs[] = {
2785 &dev_attr_service_id,
2789 &dev_attr_orig_dgid,
2791 &dev_attr_zero_req_lim,
2792 &dev_attr_local_ib_port,
2793 &dev_attr_local_ib_device,
2795 &dev_attr_comp_vector,
2796 &dev_attr_tl_retry_count,
2797 &dev_attr_cmd_sg_entries,
2798 &dev_attr_allow_ext_sg,
2802 static struct scsi_host_template srp_template = {
2803 .module = THIS_MODULE,
2804 .name = "InfiniBand SRP initiator",
2805 .proc_name = DRV_NAME,
2806 .slave_configure = srp_slave_configure,
2807 .info = srp_target_info,
2808 .queuecommand = srp_queuecommand,
2809 .change_queue_depth = srp_change_queue_depth,
2810 .eh_abort_handler = srp_abort,
2811 .eh_device_reset_handler = srp_reset_device,
2812 .eh_host_reset_handler = srp_reset_host,
2813 .skip_settle_delay = true,
2814 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2815 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2817 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2818 .use_clustering = ENABLE_CLUSTERING,
2819 .shost_attrs = srp_host_attrs,
2820 .track_queue_depth = 1,
2823 static int srp_sdev_count(struct Scsi_Host *host)
2825 struct scsi_device *sdev;
2828 shost_for_each_device(sdev, host)
2836 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2837 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2838 * removal has been scheduled.
2839 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2841 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2843 struct srp_rport_identifiers ids;
2844 struct srp_rport *rport;
2846 target->state = SRP_TARGET_SCANNING;
2847 sprintf(target->target_name, "SRP.T10:%016llX",
2848 be64_to_cpu(target->id_ext));
2850 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2853 memcpy(ids.port_id, &target->id_ext, 8);
2854 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2855 ids.roles = SRP_RPORT_ROLE_TARGET;
2856 rport = srp_rport_add(target->scsi_host, &ids);
2857 if (IS_ERR(rport)) {
2858 scsi_remove_host(target->scsi_host);
2859 return PTR_ERR(rport);
2862 rport->lld_data = target;
2863 target->rport = rport;
2865 spin_lock(&host->target_lock);
2866 list_add_tail(&target->list, &host->target_list);
2867 spin_unlock(&host->target_lock);
2869 scsi_scan_target(&target->scsi_host->shost_gendev,
2870 0, target->scsi_id, SCAN_WILD_CARD, 0);
2872 if (srp_connected_ch(target) < target->ch_count ||
2873 target->qp_in_error) {
2874 shost_printk(KERN_INFO, target->scsi_host,
2875 PFX "SCSI scan failed - removing SCSI host\n");
2876 srp_queue_remove_work(target);
2880 pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
2881 dev_name(&target->scsi_host->shost_gendev),
2882 srp_sdev_count(target->scsi_host));
2884 spin_lock_irq(&target->lock);
2885 if (target->state == SRP_TARGET_SCANNING)
2886 target->state = SRP_TARGET_LIVE;
2887 spin_unlock_irq(&target->lock);
2893 static void srp_release_dev(struct device *dev)
2895 struct srp_host *host =
2896 container_of(dev, struct srp_host, dev);
2898 complete(&host->released);
2901 static struct class srp_class = {
2902 .name = "infiniband_srp",
2903 .dev_release = srp_release_dev
2907 * srp_conn_unique() - check whether the connection to a target is unique
2909 * @target: SRP target port.
2911 static bool srp_conn_unique(struct srp_host *host,
2912 struct srp_target_port *target)
2914 struct srp_target_port *t;
2917 if (target->state == SRP_TARGET_REMOVED)
2922 spin_lock(&host->target_lock);
2923 list_for_each_entry(t, &host->target_list, list) {
2925 target->id_ext == t->id_ext &&
2926 target->ioc_guid == t->ioc_guid &&
2927 target->initiator_ext == t->initiator_ext) {
2932 spin_unlock(&host->target_lock);
2939 * Target ports are added by writing
2941 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2942 * pkey=<P_Key>,service_id=<service ID>
2944 * to the add_target sysfs attribute.
2948 SRP_OPT_ID_EXT = 1 << 0,
2949 SRP_OPT_IOC_GUID = 1 << 1,
2950 SRP_OPT_DGID = 1 << 2,
2951 SRP_OPT_PKEY = 1 << 3,
2952 SRP_OPT_SERVICE_ID = 1 << 4,
2953 SRP_OPT_MAX_SECT = 1 << 5,
2954 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
2955 SRP_OPT_IO_CLASS = 1 << 7,
2956 SRP_OPT_INITIATOR_EXT = 1 << 8,
2957 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
2958 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2959 SRP_OPT_SG_TABLESIZE = 1 << 11,
2960 SRP_OPT_COMP_VECTOR = 1 << 12,
2961 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
2962 SRP_OPT_QUEUE_SIZE = 1 << 14,
2963 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2967 SRP_OPT_SERVICE_ID),
2970 static const match_table_t srp_opt_tokens = {
2971 { SRP_OPT_ID_EXT, "id_ext=%s" },
2972 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
2973 { SRP_OPT_DGID, "dgid=%s" },
2974 { SRP_OPT_PKEY, "pkey=%x" },
2975 { SRP_OPT_SERVICE_ID, "service_id=%s" },
2976 { SRP_OPT_MAX_SECT, "max_sect=%d" },
2977 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
2978 { SRP_OPT_IO_CLASS, "io_class=%x" },
2979 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
2980 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
2981 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2982 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2983 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2984 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
2985 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
2986 { SRP_OPT_ERR, NULL }
2989 static int srp_parse_options(const char *buf, struct srp_target_port *target)
2991 char *options, *sep_opt;
2994 substring_t args[MAX_OPT_ARGS];
3000 options = kstrdup(buf, GFP_KERNEL);
3005 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3009 token = match_token(p, srp_opt_tokens, args);
3013 case SRP_OPT_ID_EXT:
3014 p = match_strdup(args);
3019 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3023 case SRP_OPT_IOC_GUID:
3024 p = match_strdup(args);
3029 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3034 p = match_strdup(args);
3039 if (strlen(p) != 32) {
3040 pr_warn("bad dest GID parameter '%s'\n", p);
3045 for (i = 0; i < 16; ++i) {
3046 strlcpy(dgid, p + i * 2, sizeof(dgid));
3047 if (sscanf(dgid, "%hhx",
3048 &target->orig_dgid.raw[i]) < 1) {
3058 if (match_hex(args, &token)) {
3059 pr_warn("bad P_Key parameter '%s'\n", p);
3062 target->pkey = cpu_to_be16(token);
3065 case SRP_OPT_SERVICE_ID:
3066 p = match_strdup(args);
3071 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3075 case SRP_OPT_MAX_SECT:
3076 if (match_int(args, &token)) {
3077 pr_warn("bad max sect parameter '%s'\n", p);
3080 target->scsi_host->max_sectors = token;
3083 case SRP_OPT_QUEUE_SIZE:
3084 if (match_int(args, &token) || token < 1) {
3085 pr_warn("bad queue_size parameter '%s'\n", p);
3088 target->scsi_host->can_queue = token;
3089 target->queue_size = token + SRP_RSP_SQ_SIZE +
3090 SRP_TSK_MGMT_SQ_SIZE;
3091 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3092 target->scsi_host->cmd_per_lun = token;
3095 case SRP_OPT_MAX_CMD_PER_LUN:
3096 if (match_int(args, &token) || token < 1) {
3097 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3101 target->scsi_host->cmd_per_lun = token;
3104 case SRP_OPT_IO_CLASS:
3105 if (match_hex(args, &token)) {
3106 pr_warn("bad IO class parameter '%s'\n", p);
3109 if (token != SRP_REV10_IB_IO_CLASS &&
3110 token != SRP_REV16A_IB_IO_CLASS) {
3111 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3112 token, SRP_REV10_IB_IO_CLASS,
3113 SRP_REV16A_IB_IO_CLASS);
3116 target->io_class = token;
3119 case SRP_OPT_INITIATOR_EXT:
3120 p = match_strdup(args);
3125 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3129 case SRP_OPT_CMD_SG_ENTRIES:
3130 if (match_int(args, &token) || token < 1 || token > 255) {
3131 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3135 target->cmd_sg_cnt = token;
3138 case SRP_OPT_ALLOW_EXT_SG:
3139 if (match_int(args, &token)) {
3140 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3143 target->allow_ext_sg = !!token;
3146 case SRP_OPT_SG_TABLESIZE:
3147 if (match_int(args, &token) || token < 1 ||
3148 token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
3149 pr_warn("bad max sg_tablesize parameter '%s'\n",
3153 target->sg_tablesize = token;
3156 case SRP_OPT_COMP_VECTOR:
3157 if (match_int(args, &token) || token < 0) {
3158 pr_warn("bad comp_vector parameter '%s'\n", p);
3161 target->comp_vector = token;
3164 case SRP_OPT_TL_RETRY_COUNT:
3165 if (match_int(args, &token) || token < 2 || token > 7) {
3166 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3170 target->tl_retry_count = token;
3174 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3180 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3183 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3184 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3185 !(srp_opt_tokens[i].token & opt_mask))
3186 pr_warn("target creation request is missing parameter '%s'\n",
3187 srp_opt_tokens[i].pattern);
3189 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3190 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3191 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3192 target->scsi_host->cmd_per_lun,
3193 target->scsi_host->can_queue);
3200 static ssize_t srp_create_target(struct device *dev,
3201 struct device_attribute *attr,
3202 const char *buf, size_t count)
3204 struct srp_host *host =
3205 container_of(dev, struct srp_host, dev);
3206 struct Scsi_Host *target_host;
3207 struct srp_target_port *target;
3208 struct srp_rdma_ch *ch;
3209 struct srp_device *srp_dev = host->srp_dev;
3210 struct ib_device *ibdev = srp_dev->dev;
3211 int ret, node_idx, node, cpu, i;
3212 bool multich = false;
3214 target_host = scsi_host_alloc(&srp_template,
3215 sizeof (struct srp_target_port));
3219 target_host->transportt = ib_srp_transport_template;
3220 target_host->max_channel = 0;
3221 target_host->max_id = 1;
3222 target_host->max_lun = -1LL;
3223 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3225 target = host_to_target(target_host);
3227 target->io_class = SRP_REV16A_IB_IO_CLASS;
3228 target->scsi_host = target_host;
3229 target->srp_host = host;
3230 target->lkey = host->srp_dev->pd->local_dma_lkey;
3231 target->global_mr = host->srp_dev->global_mr;
3232 target->cmd_sg_cnt = cmd_sg_entries;
3233 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3234 target->allow_ext_sg = allow_ext_sg;
3235 target->tl_retry_count = 7;
3236 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3239 * Avoid that the SCSI host can be removed by srp_remove_target()
3240 * before this function returns.
3242 scsi_host_get(target->scsi_host);
3244 mutex_lock(&host->add_target_mutex);
3246 ret = srp_parse_options(buf, target);
3250 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3252 if (!srp_conn_unique(target->srp_host, target)) {
3253 shost_printk(KERN_INFO, target->scsi_host,
3254 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3255 be64_to_cpu(target->id_ext),
3256 be64_to_cpu(target->ioc_guid),
3257 be64_to_cpu(target->initiator_ext));
3262 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3263 target->cmd_sg_cnt < target->sg_tablesize) {
3264 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3265 target->sg_tablesize = target->cmd_sg_cnt;
3268 target_host->sg_tablesize = target->sg_tablesize;
3269 target->indirect_size = target->sg_tablesize *
3270 sizeof (struct srp_direct_buf);
3271 target->max_iu_len = sizeof (struct srp_cmd) +
3272 sizeof (struct srp_indirect_buf) +
3273 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3275 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3276 INIT_WORK(&target->remove_work, srp_remove_work);
3277 spin_lock_init(&target->lock);
3278 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3283 target->ch_count = max_t(unsigned, num_online_nodes(),
3285 min(4 * num_online_nodes(),
3286 ibdev->num_comp_vectors),
3287 num_online_cpus()));
3288 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3294 for_each_online_node(node) {
3295 const int ch_start = (node_idx * target->ch_count /
3296 num_online_nodes());
3297 const int ch_end = ((node_idx + 1) * target->ch_count /
3298 num_online_nodes());
3299 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3300 num_online_nodes() + target->comp_vector)
3301 % ibdev->num_comp_vectors;
3302 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3303 num_online_nodes() + target->comp_vector)
3304 % ibdev->num_comp_vectors;
3307 for_each_online_cpu(cpu) {
3308 if (cpu_to_node(cpu) != node)
3310 if (ch_start + cpu_idx >= ch_end)
3312 ch = &target->ch[ch_start + cpu_idx];
3313 ch->target = target;
3314 ch->comp_vector = cv_start == cv_end ? cv_start :
3315 cv_start + cpu_idx % (cv_end - cv_start);
3316 spin_lock_init(&ch->lock);
3317 INIT_LIST_HEAD(&ch->free_tx);
3318 ret = srp_new_cm_id(ch);
3320 goto err_disconnect;
3322 ret = srp_create_ch_ib(ch);
3324 goto err_disconnect;
3326 ret = srp_alloc_req_data(ch);
3328 goto err_disconnect;
3330 ret = srp_connect_ch(ch, multich);
3332 shost_printk(KERN_ERR, target->scsi_host,
3333 PFX "Connection %d/%d failed\n",
3336 if (node_idx == 0 && cpu_idx == 0) {
3337 goto err_disconnect;
3339 srp_free_ch_ib(target, ch);
3340 srp_free_req_data(target, ch);
3341 target->ch_count = ch - target->ch;
3353 target->scsi_host->nr_hw_queues = target->ch_count;
3355 ret = srp_add_target(host, target);
3357 goto err_disconnect;
3359 if (target->state != SRP_TARGET_REMOVED) {
3360 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3361 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3362 be64_to_cpu(target->id_ext),
3363 be64_to_cpu(target->ioc_guid),
3364 be16_to_cpu(target->pkey),
3365 be64_to_cpu(target->service_id),
3366 target->sgid.raw, target->orig_dgid.raw);
3372 mutex_unlock(&host->add_target_mutex);
3374 scsi_host_put(target->scsi_host);
3376 scsi_host_put(target->scsi_host);
3381 srp_disconnect_target(target);
3383 for (i = 0; i < target->ch_count; i++) {
3384 ch = &target->ch[i];
3385 srp_free_ch_ib(target, ch);
3386 srp_free_req_data(target, ch);
3393 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3395 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3398 struct srp_host *host = container_of(dev, struct srp_host, dev);
3400 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3403 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3405 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3408 struct srp_host *host = container_of(dev, struct srp_host, dev);
3410 return sprintf(buf, "%d\n", host->port);
3413 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3415 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3417 struct srp_host *host;
3419 host = kzalloc(sizeof *host, GFP_KERNEL);
3423 INIT_LIST_HEAD(&host->target_list);
3424 spin_lock_init(&host->target_lock);
3425 init_completion(&host->released);
3426 mutex_init(&host->add_target_mutex);
3427 host->srp_dev = device;
3430 host->dev.class = &srp_class;
3431 host->dev.parent = device->dev->dma_device;
3432 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3434 if (device_register(&host->dev))
3436 if (device_create_file(&host->dev, &dev_attr_add_target))
3438 if (device_create_file(&host->dev, &dev_attr_ibdev))
3440 if (device_create_file(&host->dev, &dev_attr_port))
3446 device_unregister(&host->dev);
3454 static void srp_add_one(struct ib_device *device)
3456 struct srp_device *srp_dev;
3457 struct ib_device_attr *dev_attr;
3458 struct srp_host *host;
3459 int mr_page_shift, p;
3460 u64 max_pages_per_mr;
3462 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
3466 if (ib_query_device(device, dev_attr)) {
3467 pr_warn("Query device failed for %s\n", device->name);
3471 srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3475 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3476 device->map_phys_fmr && device->unmap_fmr);
3477 srp_dev->has_fr = (dev_attr->device_cap_flags &
3478 IB_DEVICE_MEM_MGT_EXTENSIONS);
3479 if (!srp_dev->has_fmr && !srp_dev->has_fr)
3480 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3482 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3483 (!srp_dev->has_fmr || prefer_fr));
3484 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3487 * Use the smallest page size supported by the HCA, down to a
3488 * minimum of 4096 bytes. We're unlikely to build large sglists
3489 * out of smaller entries.
3491 mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
3492 srp_dev->mr_page_size = 1 << mr_page_shift;
3493 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3494 max_pages_per_mr = dev_attr->max_mr_size;
3495 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3496 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3498 if (srp_dev->use_fast_reg) {
3499 srp_dev->max_pages_per_mr =
3500 min_t(u32, srp_dev->max_pages_per_mr,
3501 dev_attr->max_fast_reg_page_list_len);
3503 srp_dev->mr_max_size = srp_dev->mr_page_size *
3504 srp_dev->max_pages_per_mr;
3505 pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3506 device->name, mr_page_shift, dev_attr->max_mr_size,
3507 dev_attr->max_fast_reg_page_list_len,
3508 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3510 INIT_LIST_HEAD(&srp_dev->dev_list);
3512 srp_dev->dev = device;
3513 srp_dev->pd = ib_alloc_pd(device);
3514 if (IS_ERR(srp_dev->pd))
3517 if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
3518 srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
3519 IB_ACCESS_LOCAL_WRITE |
3520 IB_ACCESS_REMOTE_READ |
3521 IB_ACCESS_REMOTE_WRITE);
3522 if (IS_ERR(srp_dev->global_mr))
3525 srp_dev->global_mr = NULL;
3528 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3529 host = srp_add_port(srp_dev, p);
3531 list_add_tail(&host->list, &srp_dev->dev_list);
3534 ib_set_client_data(device, &srp_client, srp_dev);
3539 ib_dealloc_pd(srp_dev->pd);
3548 static void srp_remove_one(struct ib_device *device, void *client_data)
3550 struct srp_device *srp_dev;
3551 struct srp_host *host, *tmp_host;
3552 struct srp_target_port *target;
3554 srp_dev = client_data;
3558 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3559 device_unregister(&host->dev);
3561 * Wait for the sysfs entry to go away, so that no new
3562 * target ports can be created.
3564 wait_for_completion(&host->released);
3567 * Remove all target ports.
3569 spin_lock(&host->target_lock);
3570 list_for_each_entry(target, &host->target_list, list)
3571 srp_queue_remove_work(target);
3572 spin_unlock(&host->target_lock);
3575 * Wait for tl_err and target port removal tasks.
3577 flush_workqueue(system_long_wq);
3578 flush_workqueue(srp_remove_wq);
3583 if (srp_dev->global_mr)
3584 ib_dereg_mr(srp_dev->global_mr);
3585 ib_dealloc_pd(srp_dev->pd);
3590 static struct srp_function_template ib_srp_transport_functions = {
3591 .has_rport_state = true,
3592 .reset_timer_if_blocked = true,
3593 .reconnect_delay = &srp_reconnect_delay,
3594 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3595 .dev_loss_tmo = &srp_dev_loss_tmo,
3596 .reconnect = srp_rport_reconnect,
3597 .rport_delete = srp_rport_delete,
3598 .terminate_rport_io = srp_terminate_io,
3601 static int __init srp_init_module(void)
3605 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3607 if (srp_sg_tablesize) {
3608 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3609 if (!cmd_sg_entries)
3610 cmd_sg_entries = srp_sg_tablesize;
3613 if (!cmd_sg_entries)
3614 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3616 if (cmd_sg_entries > 255) {
3617 pr_warn("Clamping cmd_sg_entries to 255\n");
3618 cmd_sg_entries = 255;
3621 if (!indirect_sg_entries)
3622 indirect_sg_entries = cmd_sg_entries;
3623 else if (indirect_sg_entries < cmd_sg_entries) {
3624 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3626 indirect_sg_entries = cmd_sg_entries;
3629 srp_remove_wq = create_workqueue("srp_remove");
3630 if (!srp_remove_wq) {
3636 ib_srp_transport_template =
3637 srp_attach_transport(&ib_srp_transport_functions);
3638 if (!ib_srp_transport_template)
3641 ret = class_register(&srp_class);
3643 pr_err("couldn't register class infiniband_srp\n");
3647 ib_sa_register_client(&srp_sa_client);
3649 ret = ib_register_client(&srp_client);
3651 pr_err("couldn't register IB client\n");
3659 ib_sa_unregister_client(&srp_sa_client);
3660 class_unregister(&srp_class);
3663 srp_release_transport(ib_srp_transport_template);
3666 destroy_workqueue(srp_remove_wq);
3670 static void __exit srp_cleanup_module(void)
3672 ib_unregister_client(&srp_client);
3673 ib_sa_unregister_client(&srp_sa_client);
3674 class_unregister(&srp_class);
3675 srp_release_transport(ib_srp_transport_template);
3676 destroy_workqueue(srp_remove_wq);
3679 module_init(srp_init_module);
3680 module_exit(srp_cleanup_module);