IB/ipath: Change packet problems vs chip errors handling and reporting
authorBryan O'Sullivan <bos@pathscale.com>
Thu, 15 Mar 2007 21:44:55 +0000 (14:44 -0700)
committerRoland Dreier <rolandd@cisco.com>
Thu, 19 Apr 2007 03:20:55 +0000 (20:20 -0700)
Some types of packet errors are moderately common with longer IB
cables and large clusters, and are not reported with prints by other
IB HCA drivers.  This suppresses those messages unless the new
__IPATH_ERRPKTDBG bit is set in ipath_debug.  Reporting of temporarily
disabled frequent error interrupts was also made clearer

We also distinguish between chip errors, and bad packets sent or
received in the wording of the messages.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/hw/ipath/ipath_debug.h
drivers/infiniband/hw/ipath/ipath_driver.c
drivers/infiniband/hw/ipath/ipath_intr.c
drivers/infiniband/hw/ipath/ipath_kernel.h
drivers/infiniband/hw/ipath/ipath_registers.h
drivers/infiniband/hw/ipath/ipath_stats.c

index df69f0d80b8bf84d22da9b688b960b09809338da..42bfbdb0d3e6af48abb11e411248afde6f850570 100644 (file)
@@ -57,6 +57,7 @@
 #define __IPATH_PROCDBG     0x100
 /* print mmap/nopage stuff, not using VDBG any more */
 #define __IPATH_MMDBG       0x200
+#define __IPATH_ERRPKTDBG   0x400
 #define __IPATH_USER_SEND   0x1000     /* use user mode send */
 #define __IPATH_KERNEL_SEND 0x2000     /* use kernel mode send */
 #define __IPATH_EPKTDBG     0x4000     /* print ethernet packet data */
index 53eb4550bcd39e1b754fc7afaa84c5df47f4b8f0..cf40cf2d1fbb0fd9ffaf8a5c56587ac260a74b7f 100644 (file)
@@ -754,9 +754,42 @@ static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
        return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
 }
 
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
+/*
+ * Decode the error status into strings, deciding whether to always
+ * print * it or not depending on "normal packet errors" vs everything
+ * else.   Return 1 if "real" errors, otherwise 0 if only packet
+ * errors, so caller can decide what to print with the string.
+ */
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
 {
+       int iserr = 1;
        *buf = '\0';
+       if (err & INFINIPATH_E_PKTERRS) {
+               if (!(err & ~INFINIPATH_E_PKTERRS))
+                       iserr = 0; // if only packet errors.
+               if (ipath_debug & __IPATH_ERRPKTDBG) {
+                       if (err & INFINIPATH_E_REBP)
+                               strlcat(buf, "EBP ", blen);
+                       if (err & INFINIPATH_E_RVCRC)
+                               strlcat(buf, "VCRC ", blen);
+                       if (err & INFINIPATH_E_RICRC) {
+                               strlcat(buf, "CRC ", blen);
+                               // clear for check below, so only once
+                               err &= INFINIPATH_E_RICRC;
+                       }
+                       if (err & INFINIPATH_E_RSHORTPKTLEN)
+                               strlcat(buf, "rshortpktlen ", blen);
+                       if (err & INFINIPATH_E_SDROPPEDDATAPKT)
+                               strlcat(buf, "sdroppeddatapkt ", blen);
+                       if (err & INFINIPATH_E_SPKTLEN)
+                               strlcat(buf, "spktlen ", blen);
+               }
+               if ((err & INFINIPATH_E_RICRC) &&
+                       !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
+                       strlcat(buf, "CRC ", blen);
+               if (!iserr)
+                       goto done;
+       }
        if (err & INFINIPATH_E_RHDRLEN)
                strlcat(buf, "rhdrlen ", blen);
        if (err & INFINIPATH_E_RBADTID)
@@ -767,12 +800,12 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
                strlcat(buf, "rhdr ", blen);
        if (err & INFINIPATH_E_RLONGPKTLEN)
                strlcat(buf, "rlongpktlen ", blen);
-       if (err & INFINIPATH_E_RSHORTPKTLEN)
-               strlcat(buf, "rshortpktlen ", blen);
        if (err & INFINIPATH_E_RMAXPKTLEN)
                strlcat(buf, "rmaxpktlen ", blen);
        if (err & INFINIPATH_E_RMINPKTLEN)
                strlcat(buf, "rminpktlen ", blen);
+       if (err & INFINIPATH_E_SMINPKTLEN)
+               strlcat(buf, "sminpktlen ", blen);
        if (err & INFINIPATH_E_RFORMATERR)
                strlcat(buf, "rformaterr ", blen);
        if (err & INFINIPATH_E_RUNSUPVL)
@@ -781,32 +814,20 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
                strlcat(buf, "runexpchar ", blen);
        if (err & INFINIPATH_E_RIBFLOW)
                strlcat(buf, "ribflow ", blen);
-       if (err & INFINIPATH_E_REBP)
-               strlcat(buf, "EBP ", blen);
        if (err & INFINIPATH_E_SUNDERRUN)
                strlcat(buf, "sunderrun ", blen);
        if (err & INFINIPATH_E_SPIOARMLAUNCH)
                strlcat(buf, "spioarmlaunch ", blen);
        if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
                strlcat(buf, "sunexperrpktnum ", blen);
-       if (err & INFINIPATH_E_SDROPPEDDATAPKT)
-               strlcat(buf, "sdroppeddatapkt ", blen);
        if (err & INFINIPATH_E_SDROPPEDSMPPKT)
                strlcat(buf, "sdroppedsmppkt ", blen);
        if (err & INFINIPATH_E_SMAXPKTLEN)
                strlcat(buf, "smaxpktlen ", blen);
-       if (err & INFINIPATH_E_SMINPKTLEN)
-               strlcat(buf, "sminpktlen ", blen);
        if (err & INFINIPATH_E_SUNSUPVL)
                strlcat(buf, "sunsupVL ", blen);
-       if (err & INFINIPATH_E_SPKTLEN)
-               strlcat(buf, "spktlen ", blen);
        if (err & INFINIPATH_E_INVALIDADDR)
                strlcat(buf, "invalidaddr ", blen);
-       if (err & INFINIPATH_E_RICRC)
-               strlcat(buf, "CRC ", blen);
-       if (err & INFINIPATH_E_RVCRC)
-               strlcat(buf, "VCRC ", blen);
        if (err & INFINIPATH_E_RRCVEGRFULL)
                strlcat(buf, "rcvegrfull ", blen);
        if (err & INFINIPATH_E_RRCVHDRFULL)
@@ -819,6 +840,8 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
                strlcat(buf, "hardware ", blen);
        if (err & INFINIPATH_E_RESET)
                strlcat(buf, "reset ", blen);
+done:
+       return iserr;
 }
 
 /**
index 72b9e279d19daa10a3ece87c2b830cc7ae8a599d..037b8e276429dadb507e7216f09e303e1349abe0 100644 (file)
@@ -403,10 +403,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
         * happens so often we never want to count it.
         */
        if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
-               ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror &
-                                ~INFINIPATH_E_IBSTATUSCHANGED);
+               int iserr;
+               iserr = ipath_decode_err(msg, sizeof msg,
+                               dd->ipath_lasterror &
+                               ~INFINIPATH_E_IBSTATUSCHANGED);
                if (dd->ipath_lasterror &
-                   ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+                       ~(INFINIPATH_E_RRCVEGRFULL |
+                       INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
                        ipath_dev_err(dd, "Suppressed %u messages for "
                                      "fast-repeating errors (%s) (%llx)\n",
                                      supp_msgs, msg,
@@ -420,8 +423,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
                         * them. So only complain about these at debug
                         * level.
                         */
-                       ipath_dbg("Suppressed %u messages for %s\n",
-                                 supp_msgs, msg);
+                       if (iserr)
+                               ipath_dbg("Suppressed %u messages for %s\n",
+                                         supp_msgs, msg);
+                       else
+                               ipath_cdbg(ERRPKT,
+                                       "Suppressed %u messages for %s\n",
+                                         supp_msgs, msg);
                }
        }
 }
@@ -462,7 +470,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 {
        char msg[512];
        u64 ignore_this_time = 0;
-       int i;
+       int i, iserr = 0;
        int chkerrpkts = 0, noprint = 0;
        unsigned supp_msgs;
 
@@ -502,6 +510,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
        }
 
        if (supp_msgs == 250000) {
+               int s_iserr;
                /*
                 * It's not entirely reasonable assuming that the errors set
                 * in the last clear period are all responsible for the
@@ -511,17 +520,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
                dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
                ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
                                 ~dd->ipath_maskederrs);
-               ipath_decode_err(msg, sizeof msg,
+               s_iserr = ipath_decode_err(msg, sizeof msg,
                                 (dd->ipath_maskederrs & ~dd->
                                  ipath_ignorederrs));
 
                if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
-                   ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
-                       ipath_dev_err(dd, "Disabling error(s) %llx because "
-                                     "occurring too frequently (%s)\n",
-                                     (unsigned long long)
-                                     (dd->ipath_maskederrs &
-                                      ~dd->ipath_ignorederrs), msg);
+                       ~(INFINIPATH_E_RRCVEGRFULL |
+                       INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
+                       ipath_dev_err(dd, "Temporarily disabling "
+                           "error(s) %llx reporting; too frequent (%s)\n",
+                               (unsigned long long) (dd->ipath_maskederrs &
+                               ~dd->ipath_ignorederrs), msg);
                else {
                        /*
                         * rcvegrfull and rcvhdrqfull are "normal",
@@ -530,8 +539,15 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
                         * processing them.  So only complain about
                         * these at debug level.
                         */
-                       ipath_dbg("Disabling frequent queue full errors "
-                                 "(%s)\n", msg);
+                       if (s_iserr)
+                               ipath_dbg("Temporarily disabling reporting "
+                                   "too frequent queue full errors (%s)\n",
+                                   msg);
+                       else
+                               ipath_cdbg(ERRPKT,
+                                   "Temporarily disabling reporting too"
+                                   " frequent packet errors (%s)\n",
+                                   msg);
                }
 
                /*
@@ -589,6 +605,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
                ipath_stats.sps_crcerrs++;
                chkerrpkts = 1;
        }
+       iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
+
 
        /*
         * We don't want to print these two as they happen, or we can make
@@ -677,8 +695,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
                *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
        }
 
-       if (!noprint && *msg)
-               ipath_dev_err(dd, "%s error\n", msg);
+       if (!noprint && *msg) {
+               if (iserr)
+                       ipath_dev_err(dd, "%s error\n", msg);
+               else
+                       dev_info(&dd->pcidev->dev, "%s packet problems\n",
+                               msg);
+       }
        if (dd->ipath_state_wanted & dd->ipath_flags) {
                ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
                           "waking\n", dd->ipath_state_wanted,
index c8df65a4d19d9b9b29ab5fa982676dc03dd351fb..a2162853f5bef9b20a832a3b706502096ac93ef8 100644 (file)
@@ -611,7 +611,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
 extern int ipath_diag_inuse;
 
 irqreturn_t ipath_intr(int irq, void *devid);
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
 #if __IPATH_INFO || __IPATH_DBG
 extern const char *ipath_ibcstatus_str[];
 #endif
index e0b20529da8bdbc7a49ba6c6f9429e6a07c9aef1..6e99eafdfd73efb6b15c352424d4e88f84705063 100644 (file)
 #define INFINIPATH_E_RESET           0x0004000000000000ULL
 #define INFINIPATH_E_HARDWARE        0x0008000000000000ULL
 
+/*
+ * this is used to print "common" packet errors only when the
+ * __IPATH_ERRPKTDBG bit is set in ipath_debug.
+ */
+#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
+               | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
+               | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
+               | INFINIPATH_E_REBP )
+
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 /* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
  * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2:  expTID, 3: eagerTID
index 30a825928fcf3e9791cb1fe17d923571331d0559..a627342a969c5e0e0c41de7be4493071191d524c 100644 (file)
@@ -237,11 +237,13 @@ void ipath_get_faststats(unsigned long opaque)
        if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
            && time_after(jiffies, dd->ipath_unmasktime)) {
                char ebuf[256];
-               ipath_decode_err(ebuf, sizeof ebuf,
+               int iserr;
+               iserr = ipath_decode_err(ebuf, sizeof ebuf,
                                 (dd->ipath_maskederrs & ~dd->
                                  ipath_ignorederrs));
                if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
-                   ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+                               ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+                               INFINIPATH_E_PKTERRS ))
                        ipath_dev_err(dd, "Re-enabling masked errors "
                                      "(%s)\n", ebuf);
                else {
@@ -252,8 +254,12 @@ void ipath_get_faststats(unsigned long opaque)
                         * them.  So only complain about these at debug
                         * level.
                         */
-                       ipath_dbg("Disabling frequent queue full errors "
-                                 "(%s)\n", ebuf);
+                       if (iserr)
+                                       ipath_dbg("Re-enabling queue full errors (%s)\n",
+                                                       ebuf);
+                       else
+                               ipath_cdbg(ERRPKT, "Re-enabling packet"
+                                               " problem interrupt (%s)\n", ebuf);
                }
                dd->ipath_maskederrs = dd->ipath_ignorederrs;
                ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,