IB/ipath: Fix driver crash (in interrupt or during unload) after chip reset
authorMichael Albaugh <Michael.Albaugh@QLogic.com>
Thu, 15 Mar 2007 21:45:08 +0000 (14:45 -0700)
committerRoland Dreier <rolandd@cisco.com>
Thu, 19 Apr 2007 03:20:58 +0000 (20:20 -0700)
Re-init of the kernel structures after a chip reset was leaving the
portdata structure for port zero in an inconsistent state, and a
pointer to it either stale (in re-init code) or NULL (in devdata)
Fixing the order of operations on this struct, and the condition for
interrupt access, prevents the crashes.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/hw/ipath/ipath_init_chip.c
drivers/infiniband/hw/ipath/ipath_stats.c

index 72caa9f091f6c0623d158ca93f1a34d8626722d4..7045ba6894944dd3f1a593661ef083b74069bb6d 100644 (file)
@@ -216,6 +216,20 @@ static int bringup_link(struct ipath_devdata *dd)
        return ret;
 }
 
+static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
+{
+       struct ipath_portdata *pd = NULL;
+
+       pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+       if (pd) {
+               pd->port_dd = dd;
+               pd->port_cnt = 1;
+               /* The port 0 pkey table is used by the layer interface. */
+               pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+       }
+       return pd;
+}
+
 static int init_chip_first(struct ipath_devdata *dd,
                           struct ipath_portdata **pdp)
 {
@@ -271,20 +285,16 @@ static int init_chip_first(struct ipath_devdata *dd,
                goto done;
        }
 
-       dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL);
+       pd = create_portdata0(dd);
 
-       if (!dd->ipath_pd[0]) {
+       if (!pd) {
                ipath_dev_err(dd, "Unable to allocate portdata for port "
                              "0, failing\n");
                ret = -ENOMEM;
                goto done;
        }
-       pd = dd->ipath_pd[0];
-       pd->port_dd = dd;
-       pd->port_port = 0;
-       pd->port_cnt = 1;
-       /* The port 0 pkey table is used by the layer interface. */
-       pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+       dd->ipath_pd[0] = pd;
+
        dd->ipath_rcvtidcnt =
                ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
        dd->ipath_rcvtidbase =
@@ -838,11 +848,24 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
         * Set up the port 0 (kernel) rcvhdr q and egr TIDs.  If doing
         * re-init, the simplest way to handle this is to free
         * existing, and re-allocate.
+        * Need to re-create rest of port 0 portdata as well.
         */
        if (reinit) {
-               struct ipath_portdata *pd = dd->ipath_pd[0];
-               dd->ipath_pd[0] = NULL;
-               ipath_free_pddata(dd, pd);
+               /* Alloc and init new ipath_portdata for port0,
+                * Then free old pd. Could lead to fragmentation, but also
+                * makes later support for hot-swap easier.
+                */
+               struct ipath_portdata *npd;
+               npd = create_portdata0(dd);
+               if (npd) {
+                       ipath_free_pddata(dd, pd);
+                       dd->ipath_pd[0] = pd = npd;
+               } else {
+                       ipath_dev_err(dd, "Unable to allocate portdata for"
+                                     "  port 0, failing\n");
+                       ret = -ENOMEM;
+                       goto done;
+               }
        }
        dd->ipath_f_tidtemplate(dd);
        ret = ipath_create_rcvhdrq(dd, pd);
index a627342a969c5e0e0c41de7be4493071191d524c..9307f7187ca5e315fbc22d2b92000519b7e592c7 100644 (file)
@@ -207,7 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
         * don't access the chip while running diags, or memory diags can
         * fail
         */
-       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) ||
+       if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
            ipath_diag_inuse)
                /* but re-arm the timer, for diags case; won't hurt other */
                goto done;