kgdb: timeout if secondary CPUs ignore the roundup
authorDaniel Thompson <daniel.thompson@linaro.org>
Tue, 11 Nov 2014 15:31:53 +0000 (09:31 -0600)
committerJason Wessel <jason.wessel@windriver.com>
Tue, 11 Nov 2014 15:31:53 +0000 (09:31 -0600)
Currently if an active CPU fails to respond to a roundup request the CPU
that requested the roundup will become stuck.  This needlessly reduces the
robustness of the debugger.

This patch introduces a timeout allowing the system state to be examined
even when the system contains unresponsive processors.  It also modifies
kdb's cpu command to make it censor attempts to switch to unresponsive
processors and to report their state as (D)ead.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
kernel/debug/debug_core.c
kernel/debug/kdb/kdb_debugger.c
kernel/debug/kdb/kdb_main.c

index 1adf62b39b96b496e56ca8484c6939429981277c..acd7497368223534fb2cbf8aabbccb5f30920093 100644 (file)
@@ -471,6 +471,7 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
        int cpu;
        int trace_on = 0;
        int online_cpus = num_online_cpus();
+       u64 time_left;
 
        kgdb_info[ks->cpu].enter_kgdb++;
        kgdb_info[ks->cpu].exception_state |= exception_state;
@@ -595,9 +596,13 @@ return_normal:
        /*
         * Wait for the other CPUs to be notified and be waiting for us:
         */
-       while (kgdb_do_roundup && (atomic_read(&masters_in_kgdb) +
-                               atomic_read(&slaves_in_kgdb)) != online_cpus)
+       time_left = loops_per_jiffy * HZ;
+       while (kgdb_do_roundup && --time_left &&
+              (atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) !=
+                  online_cpus)
                cpu_relax();
+       if (!time_left)
+               pr_crit("KGDB: Timed out waiting for secondary CPUs.\n");
 
        /*
         * At this point the primary processor is completely
index 8859ca34dcfe0a58dbd530b8668d8d88760a3eb4..15e1a7af5dd033f130ef2b4ed96cf1e2809442f5 100644 (file)
@@ -129,6 +129,10 @@ int kdb_stub(struct kgdb_state *ks)
                ks->pass_exception = 1;
                KDB_FLAG_SET(CATASTROPHIC);
        }
+       /* set CATASTROPHIC if the system contains unresponsive processors */
+       for_each_online_cpu(i)
+               if (!kgdb_info[i].enter_kgdb)
+                       KDB_FLAG_SET(CATASTROPHIC);
        if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) {
                KDB_STATE_CLEAR(SSBPT);
                KDB_STATE_CLEAR(DOING_SS);
index 8d84979cbe057c146fdfe70fb2cb52b15bcd1a28..f191bddf64b8ebdd0227556e119b1a804a30db88 100644 (file)
@@ -2201,6 +2201,8 @@ static void kdb_cpu_status(void)
        for (start_cpu = -1, i = 0; i < NR_CPUS; i++) {
                if (!cpu_online(i)) {
                        state = 'F';    /* cpu is offline */
+               } else if (!kgdb_info[i].enter_kgdb) {
+                       state = 'D';    /* cpu is online but unresponsive */
                } else {
                        state = ' ';    /* cpu is responding to kdb */
                        if (kdb_task_state_char(KDB_TSK(i)) == 'I')
@@ -2254,7 +2256,7 @@ static int kdb_cpu(int argc, const char **argv)
        /*
         * Validate cpunum
         */
-       if ((cpunum > NR_CPUS) || !cpu_online(cpunum))
+       if ((cpunum > NR_CPUS) || !kgdb_info[cpunum].enter_kgdb)
                return KDB_BADCPUNUM;
 
        dbg_switch_cpu = cpunum;