[IA64] Remove needless delay in MCA rendezvous
authorRuss Anderson <rja@sgi.com>
Thu, 20 Sep 2007 18:59:12 +0000 (13:59 -0500)
committerTony Luck <tony.luck@intel.com>
Fri, 12 Oct 2007 22:17:44 +0000 (15:17 -0700)
While testing the MCA recovery code, noticed that some machines would have a
five second delay rendezvousing cpus.  What was happening is that
ia64_wait_for_slaves() would check to see if all the slave CPUs had
rendezvoused.  If any had not, it would wait 1 millisecond then check again.
If any CPUs had still not rendezvoused, it would wait 5 seconds before
checking again.

On some configs the rendezvous takes more than 1 millisecond, causing the code
to wait the full 5 seconds, even though the last CPU rendezvoused after only
a few milliseconds.

The fix is to check every 1 millisecond to see if all the cpus have
rendezvoused.  After 5 seconds the code concludes the CPUs will never
rendezvous (same as before).

The MCA code is, by definition, not performance critical, but a needless
delay of 5 seconds is senseless.  The 5 seconds also adds up quickly
when running the error injection code in a loop.

This patch both simplifies the code and removes the needless delay.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
arch/ia64/kernel/mca.c

index 63b73f3d4c9f9a7a2d0cbb6866c5cc2dfa9d3c26..92367faecbbff274c3396bdb9354a92f18bd393c 100644 (file)
@@ -1135,30 +1135,27 @@ no_mod:
 static void
 ia64_wait_for_slaves(int monarch, const char *type)
 {
-       int c, wait = 0, missing = 0;
-       for_each_online_cpu(c) {
-               if (c == monarch)
-                       continue;
-               if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
-                       udelay(1000);           /* short wait first */
-                       wait = 1;
-                       break;
-               }
-       }
-       if (!wait)
-               goto all_in;
-       for_each_online_cpu(c) {
-               if (c == monarch)
-                       continue;
-               if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
-                       udelay(5*1000000);      /* wait 5 seconds for slaves (arbitrary) */
-                       if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
-                               missing = 1;
-                       break;
+       int c, i , wait;
+
+       /*
+        * wait 5 seconds total for slaves (arbitrary)
+        */
+       for (i = 0; i < 5000; i++) {
+               wait = 0;
+               for_each_online_cpu(c) {
+                       if (c == monarch)
+                               continue;
+                       if (ia64_mc_info.imi_rendez_checkin[c]
+                                       == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
+                               udelay(1000);           /* short wait */
+                               wait = 1;
+                               break;
+                       }
                }
+               if (!wait)
+                       goto all_in;
        }
-       if (!missing)
-               goto all_in;
+
        /*
         * Maybe slave(s) dead. Print buffered messages immediately.
         */