IB/ehca: Add "port connection autodetect mode"
authorHoang-Nam Nguyen <hnguyen@de.ibm.com>
Thu, 17 Jan 2008 14:05:45 +0000 (15:05 +0100)
committerRoland Dreier <rolandd@cisco.com>
Fri, 25 Jan 2008 22:15:44 +0000 (14:15 -0800)
This patch enhances ehca with a capability to "autodetect" the ports
being connected physically. In order to utilize that function the
module option nr_ports must be set to -1 (default is 2 - two
ports). This feature is experimental and will made the default later.

More detail:

If the user connects only one port to the switch, current code requires
  1) port one to be connected and
  2) module option nr_ports=1 to be given.

If autodetect is enabled, ehca will not wait at creation of the GSI QP
for the respective port to become active. Since firmware does not
accept modify_qp() while the port is down at initialization, we need
to cache all calls to modify_qp() for the SMI/GSI QP and just return a
good return code.

When a port is activated and we get a PORT_ACTIVE event, we replay the
cached modify-qp() parms and re-trigger any posted recv WRs. Only then
do we forward the PORT_ACTIVE event to registered clients.

The result of this autodetect patch is that all ports will be
accessible by the users. Depending on their respective cabling only
those ports that are connected properly will become operable. If a
user tries to modify a regular QP of a non-connected port, modify_qp()
will fail. Furthermore, ibv_devinfo should show the port state
accordingly.

Note that this patch primarily improves the loading behaviour of
ehca. If the cable is removed while the driver is operating and
plugged in again, firmware will handle that properly by sending an
appropriate async event.

Signed-off-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/hw/ehca/ehca_classes.h
drivers/infiniband/hw/ehca/ehca_irq.c
drivers/infiniband/hw/ehca/ehca_iverbs.h
drivers/infiniband/hw/ehca/ehca_main.c
drivers/infiniband/hw/ehca/ehca_qp.c
drivers/infiniband/hw/ehca/ehca_sqp.c

index 936580d86ad3f6d603aeb83ecc8aab52cca8c4d8..2502366e845f8939ce2a27c4d44dacda09a04836 100644 (file)
@@ -95,6 +95,10 @@ struct ehca_sma_attr {
 struct ehca_sport {
        struct ib_cq *ibcq_aqp1;
        struct ib_qp *ibqp_sqp[2];
+       /* lock to serialze modify_qp() calls for sqp in normal
+        * and irq path (when event PORT_ACTIVE is received first time)
+        */
+       spinlock_t mod_sqp_lock;
        enum ib_port_state port_state;
        struct ehca_sma_attr saved_attr;
 };
@@ -141,6 +145,14 @@ enum ehca_ext_qp_type {
        EQPT_SRQ       = 3,
 };
 
+/* struct to cache modify_qp()'s parms for GSI/SMI qp */
+struct ehca_mod_qp_parm {
+       int mask;
+       struct ib_qp_attr attr;
+};
+
+#define EHCA_MOD_QP_PARM_MAX 4
+
 struct ehca_qp {
        union {
                struct ib_qp ib_qp;
@@ -164,6 +176,9 @@ struct ehca_qp {
        struct ehca_cq *recv_cq;
        unsigned int sqerr_purgeflag;
        struct hlist_node list_entries;
+       /* array to cache modify_qp()'s parms for GSI/SMI qp */
+       struct ehca_mod_qp_parm *mod_qp_parm;
+       int mod_qp_parm_idx;
        /* mmap counter for resources mapped into user space */
        u32 mm_count_squeue;
        u32 mm_count_rqueue;
@@ -323,6 +338,7 @@ extern int ehca_port_act_time;
 extern int ehca_use_hp_mr;
 extern int ehca_scaling_code;
 extern int ehca_lock_hcalls;
+extern int ehca_nr_ports;
 
 struct ipzu_queue_resp {
        u32 qe_size;      /* queue entry size */
index 4c734ecef11dd3beafe047fe26a9b70cd978e341..863b34fa9ff9391c85fa1411d13ee4ce759436db 100644 (file)
@@ -356,17 +356,33 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
        u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
        u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
        u8 spec_event;
+       struct ehca_sport *sport = &shca->sport[port - 1];
+       unsigned long flags;
 
        switch (ec) {
        case 0x30: /* port availability change */
                if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
-                       shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+                       int suppress_event;
+                       /* replay modify_qp for sqps */
+                       spin_lock_irqsave(&sport->mod_sqp_lock, flags);
+                       suppress_event = !sport->ibqp_sqp[IB_QPT_GSI];
+                       if (sport->ibqp_sqp[IB_QPT_SMI])
+                               ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
+                       if (!suppress_event)
+                               ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
+                       spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+
+                       /* AQP1 was destroyed, ignore this event */
+                       if (suppress_event)
+                               break;
+
+                       sport->port_state = IB_PORT_ACTIVE;
                        dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
                                            "is active");
                        ehca_query_sma_attr(shca, port,
-                                           &shca->sport[port - 1].saved_attr);
+                                           &sport->saved_attr);
                } else {
-                       shca->sport[port - 1].port_state = IB_PORT_DOWN;
+                       sport->port_state = IB_PORT_DOWN;
                        dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
                                            "is inactive");
                }
@@ -380,11 +396,11 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
                        ehca_warn(&shca->ib_device, "disruptive port "
                                  "%d configuration change", port);
 
-                       shca->sport[port - 1].port_state = IB_PORT_DOWN;
+                       sport->port_state = IB_PORT_DOWN;
                        dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
                                            "is inactive");
 
-                       shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+                       sport->port_state = IB_PORT_ACTIVE;
                        dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
                                            "is active");
                } else
index 5485799cdc8d633360b0174c350a42fa16956f0b..c469bfde270875521f91411df5ea51b4f4bed13e 100644 (file)
@@ -200,4 +200,6 @@ void ehca_free_fw_ctrlblock(void *ptr);
 #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
 #endif
 
+void ehca_recover_sqp(struct ib_qp *sqp);
+
 #endif
index 0a34083dac8ab13620b3526d0ebd05ca5c65fe58..84c9b7b8669b0edc1833ff3c18c388d6feeb5999 100644 (file)
@@ -90,7 +90,8 @@ MODULE_PARM_DESC(hw_level,
                 "hardware level"
                 " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
 MODULE_PARM_DESC(nr_ports,
-                "number of connected ports (default: 2)");
+                "number of connected ports (-1: autodetect, 1: port one only, "
+                "2: two ports (default)");
 MODULE_PARM_DESC(use_hp_mr,
                 "high performance MRs (0: no (default), 1: yes)");
 MODULE_PARM_DESC(port_act_time,
@@ -693,7 +694,7 @@ static int __devinit ehca_probe(struct of_device *dev,
        struct ehca_shca *shca;
        const u64 *handle;
        struct ib_pd *ibpd;
-       int ret;
+       int ret, i;
 
        handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
        if (!handle) {
@@ -714,6 +715,8 @@ static int __devinit ehca_probe(struct of_device *dev,
                return -ENOMEM;
        }
        mutex_init(&shca->modify_mutex);
+       for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
+               spin_lock_init(&shca->sport[i].mod_sqp_lock);
 
        shca->ofdev = dev;
        shca->ipz_hca_handle.handle = *handle;
index 26c6a945459fba584b6760695200b6c6ccb3b384..8d3c35fa051b8b04c46df2bc2d320758bbc4476f 100644 (file)
@@ -729,12 +729,31 @@ static struct ehca_qp *internal_create_qp(
        init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
        my_qp->init_attr = *init_attr;
 
+       if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
+               shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
+                       &my_qp->ib_qp;
+               if (ehca_nr_ports < 0) {
+                       /* alloc array to cache subsequent modify qp parms
+                        * for autodetect mode
+                        */
+                       my_qp->mod_qp_parm =
+                               kzalloc(EHCA_MOD_QP_PARM_MAX *
+                                       sizeof(*my_qp->mod_qp_parm),
+                                       GFP_KERNEL);
+                       if (!my_qp->mod_qp_parm) {
+                               ehca_err(pd->device,
+                                        "Could not alloc mod_qp_parm");
+                               goto create_qp_exit4;
+                       }
+               }
+       }
+
        /* NOTE: define_apq0() not supported yet */
        if (qp_type == IB_QPT_GSI) {
                h_ret = ehca_define_sqp(shca, my_qp, init_attr);
                if (h_ret != H_SUCCESS) {
                        ret = ehca2ib_return_code(h_ret);
-                       goto create_qp_exit4;
+                       goto create_qp_exit5;
                }
        }
 
@@ -743,7 +762,7 @@ static struct ehca_qp *internal_create_qp(
                if (ret) {
                        ehca_err(pd->device,
                                 "Couldn't assign qp to send_cq ret=%i", ret);
-                       goto create_qp_exit4;
+                       goto create_qp_exit5;
                }
        }
 
@@ -769,15 +788,18 @@ static struct ehca_qp *internal_create_qp(
                if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
                        ehca_err(pd->device, "Copy to udata failed");
                        ret = -EINVAL;
-                       goto create_qp_exit5;
+                       goto create_qp_exit6;
                }
        }
 
        return my_qp;
 
-create_qp_exit5:
+create_qp_exit6:
        ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
 
+create_qp_exit5:
+       kfree(my_qp->mod_qp_parm);
+
 create_qp_exit4:
        if (HAS_RQ(my_qp))
                ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
@@ -995,7 +1017,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
        unsigned long flags = 0;
 
        /* do query_qp to obtain current attr values */
-       mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+       mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
        if (!mqpcb) {
                ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
                         "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
@@ -1183,6 +1205,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
                update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
        }
        if (attr_mask & IB_QP_PORT) {
+               struct ehca_sport *sport;
+               struct ehca_qp *aqp1;
                if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
                        ret = -EINVAL;
                        ehca_err(ibqp->device, "Invalid port=%x. "
@@ -1191,6 +1215,29 @@ static int internal_modify_qp(struct ib_qp *ibqp,
                                 shca->num_ports);
                        goto modify_qp_exit2;
                }
+               sport = &shca->sport[attr->port_num - 1];
+               if (!sport->ibqp_sqp[IB_QPT_GSI]) {
+                       /* should not occur */
+                       ret = -EFAULT;
+                       ehca_err(ibqp->device, "AQP1 was not created for "
+                                "port=%x", attr->port_num);
+                       goto modify_qp_exit2;
+               }
+               aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI],
+                                   struct ehca_qp, ib_qp);
+               if (ibqp->qp_type != IB_QPT_GSI &&
+                   ibqp->qp_type != IB_QPT_SMI &&
+                   aqp1->mod_qp_parm) {
+                       /*
+                        * firmware will reject this modify_qp() because
+                        * port is not activated/initialized fully
+                        */
+                       ret = -EFAULT;
+                       ehca_warn(ibqp->device, "Couldn't modify qp port=%x: "
+                                 "either port is being activated (try again) "
+                                 "or cabling issue", attr->port_num);
+                       goto modify_qp_exit2;
+               }
                mqpcb->prim_phys_port = attr->port_num;
                update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
        }
@@ -1470,6 +1517,8 @@ modify_qp_exit1:
 int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
                   struct ib_udata *udata)
 {
+       struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+                                             ib_device);
        struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
        struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
                                             ib_pd);
@@ -1482,9 +1531,100 @@ int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
                return -EINVAL;
        }
 
+       /* The if-block below caches qp_attr to be modified for GSI and SMI
+        * qps during the initialization by ib_mad. When the respective port
+        * is activated, ie we got an event PORT_ACTIVE, we'll replay the
+        * cached modify calls sequence, see ehca_recover_sqs() below.
+        * Why that is required:
+        * 1) If one port is connected, older code requires that port one
+        *    to be connected and module option nr_ports=1 to be given by
+        *    user, which is very inconvenient for end user.
+        * 2) Firmware accepts modify_qp() only if respective port has become
+        *    active. Older code had a wait loop of 30sec create_qp()/
+        *    define_aqp1(), which is not appropriate in practice. This
+        *    code now removes that wait loop, see define_aqp1(), and always
+        *    reports all ports to ib_mad resp. users. Only activated ports
+        *    will then usable for the users.
+        */
+       if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
+               int port = my_qp->init_attr.port_num;
+               struct ehca_sport *sport = &shca->sport[port - 1];
+               unsigned long flags;
+               spin_lock_irqsave(&sport->mod_sqp_lock, flags);
+               /* cache qp_attr only during init */
+               if (my_qp->mod_qp_parm) {
+                       struct ehca_mod_qp_parm *p;
+                       if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) {
+                               ehca_err(&shca->ib_device,
+                                        "mod_qp_parm overflow state=%x port=%x"
+                                        " type=%x", attr->qp_state,
+                                        my_qp->init_attr.port_num,
+                                        ibqp->qp_type);
+                               spin_unlock_irqrestore(&sport->mod_sqp_lock,
+                                                      flags);
+                               return -EINVAL;
+                       }
+                       p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx];
+                       p->mask = attr_mask;
+                       p->attr = *attr;
+                       my_qp->mod_qp_parm_idx++;
+                       ehca_dbg(&shca->ib_device,
+                                "Saved qp_attr for state=%x port=%x type=%x",
+                                attr->qp_state, my_qp->init_attr.port_num,
+                                ibqp->qp_type);
+                       spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+                       return 0;
+               }
+               spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+       }
+
        return internal_modify_qp(ibqp, attr, attr_mask, 0);
 }
 
+void ehca_recover_sqp(struct ib_qp *sqp)
+{
+       struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp);
+       int port = my_sqp->init_attr.port_num;
+       struct ib_qp_attr attr;
+       struct ehca_mod_qp_parm *qp_parm;
+       int i, qp_parm_idx, ret;
+       unsigned long flags, wr_cnt;
+
+       if (!my_sqp->mod_qp_parm)
+               return;
+       ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num);
+
+       qp_parm = my_sqp->mod_qp_parm;
+       qp_parm_idx = my_sqp->mod_qp_parm_idx;
+       for (i = 0; i < qp_parm_idx; i++) {
+               attr = qp_parm[i].attr;
+               ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0);
+               if (ret) {
+                       ehca_err(sqp->device, "Could not modify SQP port=%x "
+                                "qp_num=%x ret=%x", port, sqp->qp_num, ret);
+                       goto free_qp_parm;
+               }
+               ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x",
+                        port, sqp->qp_num, attr.qp_state);
+       }
+
+       /* re-trigger posted recv wrs */
+       wr_cnt =  my_sqp->ipz_rqueue.current_q_offset /
+               my_sqp->ipz_rqueue.qe_size;
+       if (wr_cnt) {
+               spin_lock_irqsave(&my_sqp->spinlock_r, flags);
+               hipz_update_rqa(my_sqp, wr_cnt);
+               spin_unlock_irqrestore(&my_sqp->spinlock_r, flags);
+               ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx",
+                        port, sqp->qp_num, wr_cnt);
+       }
+
+free_qp_parm:
+       kfree(qp_parm);
+       /* this prevents subsequent calls to modify_qp() to cache qp_attr */
+       my_sqp->mod_qp_parm = NULL;
+}
+
 int ehca_query_qp(struct ib_qp *qp,
                  struct ib_qp_attr *qp_attr,
                  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
@@ -1772,6 +1912,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
        struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
        struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
                                             ib_pd);
+       struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1];
        u32 cur_pid = current->tgid;
        u32 qp_num = my_qp->real_qp_num;
        int ret;
@@ -1818,6 +1959,14 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
        port_num = my_qp->init_attr.port_num;
        qp_type  = my_qp->init_attr.qp_type;
 
+       if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
+               spin_lock_irqsave(&sport->mod_sqp_lock, flags);
+               kfree(my_qp->mod_qp_parm);
+               my_qp->mod_qp_parm = NULL;
+               shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL;
+               spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+       }
+
        /* no support for IB_QPT_SMI yet */
        if (qp_type == IB_QPT_GSI) {
                struct ib_event event;
index f0792e5fbd02eccf7e6a02040df994a86505cc66..79e72b25b252f33fd6d1d6d5c1ac18b1e12969bd 100644 (file)
  */
 
 
-#include <linux/module.h>
-#include <linux/err.h>
 #include "ehca_classes.h"
 #include "ehca_tools.h"
-#include "ehca_qes.h"
 #include "ehca_iverbs.h"
 #include "hcp_if.h"
 
@@ -93,6 +90,9 @@ u64 ehca_define_sqp(struct ehca_shca *shca,
                return H_PARAMETER;
        }
 
+       if (ehca_nr_ports < 0) /* autodetect mode */
+               return H_SUCCESS;
+
        for (counter = 0;
             shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
                     counter < ehca_port_act_time;