[S390] qdio: 2nd stage retry on SIGA-W busy conditions
authorJan Glauber <jang@linux.vnet.ibm.com>
Wed, 3 Aug 2011 14:44:17 +0000 (16:44 +0200)
committerHeiko Carstens <heiko.carstens@de.ibm.com>
Wed, 3 Aug 2011 14:44:19 +0000 (16:44 +0200)
The SIGA-W may return with the busy bit set which means the device was
blocked. The busy loop which retries the SIGA-W for 100us may not be
long enough when running under a heavily loaded hypervisor.

Extend the retry mechanism by adding a longer second stage which retries
the SIGA-W for up to 10s. In difference to the first retry loop the second
stage is using mdelay to stop the cpu between the retries and thereby
avoid additional preassure in on the hypervisor.
If the second stage retry is successfull a device reset is avoided.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
drivers/s390/cio/qdio.h
drivers/s390/cio/qdio_main.c

index 7bc643f3f5ab1956b1480b8eece886e565fbf940..e5c966462c5ad2ae9001e6a4df863c8133feb46c 100644 (file)
@@ -14,6 +14,8 @@
 #include "chsc.h"
 
 #define QDIO_BUSY_BIT_PATIENCE         (100 << 12)     /* 100 microseconds */
+#define QDIO_BUSY_BIT_RETRY_DELAY      10              /* 10 milliseconds */
+#define QDIO_BUSY_BIT_RETRIES          1000            /* = 10s retry time */
 #define QDIO_INPUT_THRESHOLD           (500 << 12)     /* 500 microseconds */
 
 /*
index e58169c32474f41b436d3ad8259efed71af3121e..288c9140290e9a08b899b234c7e514c9bcc6838a 100644 (file)
@@ -313,7 +313,7 @@ static int qdio_siga_output(struct qdio_q *q, unsigned int *busy_bit)
        unsigned long schid = *((u32 *) &q->irq_ptr->schid);
        unsigned int fc = QDIO_SIGA_WRITE;
        u64 start_time = 0;
-       int cc;
+       int retries = 0, cc;
 
        if (is_qebsm(q)) {
                schid = q->irq_ptr->sch_token;
@@ -325,6 +325,7 @@ again:
        /* hipersocket busy condition */
        if (unlikely(*busy_bit)) {
                WARN_ON(queue_type(q) != QDIO_IQDIO_QFMT || cc != 2);
+               retries++;
 
                if (!start_time) {
                        start_time = get_clock();
@@ -333,6 +334,11 @@ again:
                if ((get_clock() - start_time) < QDIO_BUSY_BIT_PATIENCE)
                        goto again;
        }
+       if (retries) {
+               DBF_DEV_EVENT(DBF_WARN, q->irq_ptr,
+                             "%4x cc2 BB1:%1d", SCH_NO(q), q->nr);
+               DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "count:%u", retries);
+       }
        return cc;
 }
 
@@ -728,13 +734,14 @@ static inline int qdio_outbound_q_moved(struct qdio_q *q)
 
 static int qdio_kick_outbound_q(struct qdio_q *q)
 {
+       int retries = 0, cc;
        unsigned int busy_bit;
-       int cc;
 
        if (!need_siga_out(q))
                return 0;
 
        DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w:%1d", q->nr);
+retry:
        qperf_inc(q, siga_write);
 
        cc = qdio_siga_output(q, &busy_bit);
@@ -743,7 +750,11 @@ static int qdio_kick_outbound_q(struct qdio_q *q)
                break;
        case 2:
                if (busy_bit) {
-                       DBF_ERROR("%4x cc2 REP:%1d", SCH_NO(q), q->nr);
+                       while (++retries < QDIO_BUSY_BIT_RETRIES) {
+                               mdelay(QDIO_BUSY_BIT_RETRY_DELAY);
+                               goto retry;
+                       }
+                       DBF_ERROR("%4x cc2 BBC:%1d", SCH_NO(q), q->nr);
                        cc |= QDIO_ERROR_SIGA_BUSY;
                } else
                        DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w cc2:%1d", q->nr);
@@ -753,6 +764,10 @@ static int qdio_kick_outbound_q(struct qdio_q *q)
                DBF_ERROR("%4x SIGA-W:%1d", SCH_NO(q), cc);
                break;
        }
+       if (retries) {
+               DBF_ERROR("%4x cc2 BB2:%1d", SCH_NO(q), q->nr);
+               DBF_ERROR("count:%u", retries);
+       }
        return cc;
 }