kernel/sched/debug.c

   1 /*
   2  * kernel/sched/debug.c
   3  *
   4  * Print the CFS rbtree
   5  *
   6  * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License version 2 as
  10  * published by the Free Software Foundation.
  11  */
  12
  13 #include <linux/proc_fs.h>
  14 #include <linux/sched.h>
  15 #include <linux/seq_file.h>
  16 #include <linux/kallsyms.h>
  17 #include <linux/utsname.h>
  18
  19 #include "sched.h"
  20
  21 static DEFINE_SPINLOCK(sched_debug_lock);
  22
  23 /*
  24  * This allows printing both to /proc/sched_debug and
  25  * to the console
  26  */
  27 #define SEQ_printf(m, x...)                     \
  28  do {                                           \
  29         if (m)                                  \
  30                 seq_printf(m, x);               \
  31         else                                    \
  32                 printk(x);                      \
  33  } while (0)
  34
  35 /*
  36  * Ease the printing of nsec fields:
  37  */
  38 static long long nsec_high(unsigned long long nsec)
  39 {
  40         if ((long long)nsec < 0) {
  41                 nsec = -nsec;
  42                 do_div(nsec, 1000000);
  43                 return -nsec;
  44         }
  45         do_div(nsec, 1000000);
  46
  47         return nsec;
  48 }
  49
  50 static unsigned long nsec_low(unsigned long long nsec)
  51 {
  52         if ((long long)nsec < 0)
  53                 nsec = -nsec;
  54
  55         return do_div(nsec, 1000000);
  56 }
  57
  58 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
  59
  60 #ifdef CONFIG_FAIR_GROUP_SCHED
  61 static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
  62 {
  63         struct sched_entity *se = tg->se[cpu];
  64
  65 #define P(F) \
  66         SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)F)
  67 #define PN(F) \
  68         SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
  69
  70         if (!se) {
  71                 struct sched_avg *avg = &cpu_rq(cpu)->avg;
  72                 P(avg->runnable_avg_sum);
  73                 P(avg->runnable_avg_period);
  74                 return;
  75         }
  76
  77
  78         PN(se->exec_start);
  79         PN(se->vruntime);
  80         PN(se->sum_exec_runtime);
  81 #ifdef CONFIG_SCHEDSTATS
  82         PN(se->statistics.wait_start);
  83         PN(se->statistics.sleep_start);
  84         PN(se->statistics.block_start);
  85         PN(se->statistics.sleep_max);
  86         PN(se->statistics.block_max);
  87         PN(se->statistics.exec_max);
  88         PN(se->statistics.slice_max);
  89         PN(se->statistics.wait_max);
  90         PN(se->statistics.wait_sum);
  91         P(se->statistics.wait_count);
  92 #endif
  93         P(se->load.weight);
  94 #ifdef CONFIG_SMP
  95         P(se->avg.runnable_avg_sum);
  96         P(se->avg.runnable_avg_period);
  97         P(se->avg.usage_avg_sum);
  98         P(se->avg.load_avg_contrib);
  99         P(se->avg.decay_count);
 100 #endif
 101 #undef PN
 102 #undef P
 103 }
 104 #endif
 105
 106 #ifdef CONFIG_CGROUP_SCHED
 107 static char group_path[PATH_MAX];
 108
 109 static char *task_group_path(struct task_group *tg)
 110 {
 111         if (autogroup_path(tg, group_path, PATH_MAX))
 112                 return group_path;
 113
 114         cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
 115         return group_path;
 116 }
 117 #endif
 118
 119 static void
 120 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 121 {
 122         if (rq->curr == p)
 123                 SEQ_printf(m, "R");
 124         else
 125                 SEQ_printf(m, " ");
 126
 127         SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ",
 128                 p->comm, p->pid,
 129                 SPLIT_NS(p->se.vruntime),
 130                 (long long)(p->nvcsw + p->nivcsw),
 131                 p->prio);
 132 #ifdef CONFIG_SCHEDSTATS
 133         SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
 134                 SPLIT_NS(p->se.vruntime),
 135                 SPLIT_NS(p->se.sum_exec_runtime),
 136                 SPLIT_NS(p->se.statistics.sum_sleep_runtime));
 137 #else
 138         SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
 139                 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
 140 #endif
 141 #ifdef CONFIG_CGROUP_SCHED
 142         SEQ_printf(m, " %s", task_group_path(task_group(p)));
 143 #endif
 144
 145         SEQ_printf(m, "\n");
 146 }
 147
 148 static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 149 {
 150         struct task_struct *g, *p;
 151         unsigned long flags;
 152
 153         SEQ_printf(m,
 154         "\nrunnable tasks:\n"
 155         "            task   PID         tree-key  switches  prio"
 156         "     exec-runtime         sum-exec        sum-sleep\n"
 157         "------------------------------------------------------"
 158         "----------------------------------------------------\n");
 159
 160         read_lock_irqsave(&tasklist_lock, flags);
 161
 162         do_each_thread(g, p) {
 163                 if (!p->on_rq || task_cpu(p) != rq_cpu)
 164                         continue;
 165
 166                 print_task(m, rq, p);
 167         } while_each_thread(g, p);
 168
 169         read_unlock_irqrestore(&tasklist_lock, flags);
 170 }
 171
 172 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 173 {
 174         s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
 175                 spread, rq0_min_vruntime, spread0;
 176         struct rq *rq = cpu_rq(cpu);
 177         struct sched_entity *last;
 178         unsigned long flags;
 179
 180 #ifdef CONFIG_FAIR_GROUP_SCHED
 181         SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg));
 182 #else
 183         SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
 184 #endif
 185         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
 186                         SPLIT_NS(cfs_rq->exec_clock));
 187
 188         raw_spin_lock_irqsave(&rq->lock, flags);
 189         if (cfs_rq->rb_leftmost)
 190                 MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
 191         last = __pick_last_entity(cfs_rq);
 192         if (last)
 193                 max_vruntime = last->vruntime;
 194         min_vruntime = cfs_rq->min_vruntime;
 195         rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
 196         raw_spin_unlock_irqrestore(&rq->lock, flags);
 197         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
 198                         SPLIT_NS(MIN_vruntime));
 199         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
 200                         SPLIT_NS(min_vruntime));
 201         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
 202                         SPLIT_NS(max_vruntime));
 203         spread = max_vruntime - MIN_vruntime;
 204         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
 205                         SPLIT_NS(spread));
 206         spread0 = min_vruntime - rq0_min_vruntime;
 207         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
 208                         SPLIT_NS(spread0));
 209         SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
 210                         cfs_rq->nr_spread_over);
 211         SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
 212         SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
 213 #ifdef CONFIG_FAIR_GROUP_SCHED
 214 #ifdef CONFIG_SMP
 215         SEQ_printf(m, "  .%-30s: %lld\n", "runnable_load_avg",
 216                         cfs_rq->runnable_load_avg);
 217         SEQ_printf(m, "  .%-30s: %lld\n", "blocked_load_avg",
 218                         cfs_rq->blocked_load_avg);
 219         SEQ_printf(m, "  .%-30s: %lld\n", "tg_load_avg",
 220                         (unsigned long long)atomic64_read(&cfs_rq->tg->load_avg));
 221         SEQ_printf(m, "  .%-30s: %lld\n", "tg_load_contrib",
 222                         cfs_rq->tg_load_contrib);
 223         SEQ_printf(m, "  .%-30s: %d\n", "tg_runnable_contrib",
 224                         cfs_rq->tg_runnable_contrib);
 225         SEQ_printf(m, "  .%-30s: %d\n", "tg->runnable_avg",
 226                         atomic_read(&cfs_rq->tg->runnable_avg));
 227         SEQ_printf(m, "  .%-30s: %d\n", "tg->usage_avg",
 228                         atomic_read(&cfs_rq->tg->usage_avg));
 229 #endif
 230
 231         print_cfs_group_stats(m, cpu, cfs_rq->tg);
 232 #endif
 233 }
 234
 235 void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 236 {
 237 #ifdef CONFIG_RT_GROUP_SCHED
 238         SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg));
 239 #else
 240         SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
 241 #endif
 242
 243 #define P(x) \
 244         SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
 245 #define PN(x) \
 246         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
 247
 248         P(rt_nr_running);
 249         P(rt_throttled);
 250         PN(rt_time);
 251         PN(rt_runtime);
 252
 253 #undef PN
 254 #undef P
 255 }
 256
 257 extern __read_mostly int sched_clock_running;
 258
 259 static void print_cpu(struct seq_file *m, int cpu)
 260 {
 261         struct rq *rq = cpu_rq(cpu);
 262         unsigned long flags;
 263
 264 #ifdef CONFIG_X86
 265         {
 266                 unsigned int freq = cpu_khz ? : 1;
 267
 268                 SEQ_printf(m, "cpu#%d, %u.%03u MHz\n",
 269                            cpu, freq / 1000, (freq % 1000));
 270         }
 271 #else
 272         SEQ_printf(m, "cpu#%d\n", cpu);
 273 #endif
 274
 275 #define P(x)                                                            \
 276 do {                                                                    \
 277         if (sizeof(rq->x) == 4)                                         \
 278                 SEQ_printf(m, "  .%-30s: %ld\n", #x, (long)(rq->x));    \
 279         else                                                            \
 280                 SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rq->x));\
 281 } while (0)
 282
 283 #define PN(x) \
 284         SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
 285
 286         P(nr_running);
 287         SEQ_printf(m, "  .%-30s: %lu\n", "load",
 288                    rq->load.weight);
 289         P(nr_switches);
 290         P(nr_load_updates);
 291         P(nr_uninterruptible);
 292         PN(next_balance);
 293         P(curr->pid);
 294         PN(clock);
 295         P(cpu_load[0]);
 296         P(cpu_load[1]);
 297         P(cpu_load[2]);
 298         P(cpu_load[3]);
 299         P(cpu_load[4]);
 300 #undef P
 301 #undef PN
 302
 303 #ifdef CONFIG_SCHEDSTATS
 304 #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
 305 #define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
 306
 307         P(yld_count);
 308
 309         P(sched_count);
 310         P(sched_goidle);
 311 #ifdef CONFIG_SMP
 312         P64(avg_idle);
 313 #endif
 314
 315         P(ttwu_count);
 316         P(ttwu_local);
 317
 318 #undef P
 319 #undef P64
 320 #endif
 321         spin_lock_irqsave(&sched_debug_lock, flags);
 322         print_cfs_stats(m, cpu);
 323         print_rt_stats(m, cpu);
 324
 325         rcu_read_lock();
 326         print_rq(m, rq, cpu);
 327         rcu_read_unlock();
 328         spin_unlock_irqrestore(&sched_debug_lock, flags);
 329         SEQ_printf(m, "\n");
 330 }
 331
 332 static const char *sched_tunable_scaling_names[] = {
 333         "none",
 334         "logaritmic",
 335         "linear"
 336 };
 337
 338 static void sched_debug_header(struct seq_file *m)
 339 {
 340         u64 ktime, sched_clk, cpu_clk;
 341         unsigned long flags;
 342
 343         local_irq_save(flags);
 344         ktime = ktime_to_ns(ktime_get());
 345         sched_clk = sched_clock();
 346         cpu_clk = local_clock();
 347         local_irq_restore(flags);
 348
 349         SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n",
 350                 init_utsname()->release,
 351                 (int)strcspn(init_utsname()->version, " "),
 352                 init_utsname()->version);
 353
 354 #define P(x) \
 355         SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
 356 #define PN(x) \
 357         SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
 358         PN(ktime);
 359         PN(sched_clk);
 360         PN(cpu_clk);
 361         P(jiffies);
 362 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 363         P(sched_clock_stable);
 364 #endif
 365 #undef PN
 366 #undef P
 367
 368         SEQ_printf(m, "\n");
 369         SEQ_printf(m, "sysctl_sched\n");
 370
 371 #define P(x) \
 372         SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
 373 #define PN(x) \
 374         SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
 375         PN(sysctl_sched_latency);
 376         PN(sysctl_sched_min_granularity);
 377         PN(sysctl_sched_wakeup_granularity);
 378         P(sysctl_sched_child_runs_first);
 379         P(sysctl_sched_features);
 380 #undef PN
 381 #undef P
 382
 383         SEQ_printf(m, "  .%-40s: %d (%s)\n",
 384                 "sysctl_sched_tunable_scaling",
 385                 sysctl_sched_tunable_scaling,
 386                 sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
 387         SEQ_printf(m, "\n");
 388 }
 389
 390 static int sched_debug_show(struct seq_file *m, void *v)
 391 {
 392         int cpu = (unsigned long)(v - 2);
 393
 394         if (cpu != -1)
 395                 print_cpu(m, cpu);
 396         else
 397                 sched_debug_header(m);
 398
 399         return 0;
 400 }
 401
 402 void sysrq_sched_debug_show(void)
 403 {
 404         int cpu;
 405
 406         sched_debug_header(NULL);
 407         for_each_online_cpu(cpu)
 408                 print_cpu(NULL, cpu);
 409
 410 }
 411
 412 /*
 413  * This itererator needs some explanation.
 414  * It returns 1 for the header position.
 415  * This means 2 is cpu 0.
 416  * In a hotplugged system some cpus, including cpu 0, may be missing so we have
 417  * to use cpumask_* to iterate over the cpus.
 418  */
 419 static void *sched_debug_start(struct seq_file *file, loff_t *offset)
 420 {
 421         unsigned long n = *offset;
 422
 423         if (n == 0)
 424                 return (void *) 1;
 425
 426         n--;
 427
 428         if (n > 0)
 429                 n = cpumask_next(n - 1, cpu_online_mask);
 430         else
 431                 n = cpumask_first(cpu_online_mask);
 432
 433         *offset = n + 1;
 434
 435         if (n < nr_cpu_ids)
 436                 return (void *)(unsigned long)(n + 2);
 437         return NULL;
 438 }
 439
 440 static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset)
 441 {
 442         (*offset)++;
 443         return sched_debug_start(file, offset);
 444 }
 445
 446 static void sched_debug_stop(struct seq_file *file, void *data)
 447 {
 448 }
 449
 450 static const struct seq_operations sched_debug_sops = {
 451         .start = sched_debug_start,
 452         .next = sched_debug_next,
 453         .stop = sched_debug_stop,
 454         .show = sched_debug_show,
 455 };
 456
 457 static int sched_debug_release(struct inode *inode, struct file *file)
 458 {
 459         seq_release(inode, file);
 460
 461         return 0;
 462 }
 463
 464 static int sched_debug_open(struct inode *inode, struct file *filp)
 465 {
 466         int ret = 0;
 467
 468         ret = seq_open(filp, &sched_debug_sops);
 469
 470         return ret;
 471 }
 472
 473 static const struct file_operations sched_debug_fops = {
 474         .open           = sched_debug_open,
 475         .read           = seq_read,
 476         .llseek         = seq_lseek,
 477         .release        = sched_debug_release,
 478 };
 479
 480 static int __init init_sched_debug_procfs(void)
 481 {
 482         struct proc_dir_entry *pe;
 483
 484         pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops);
 485         if (!pe)
 486                 return -ENOMEM;
 487         return 0;
 488 }
 489
 490 __initcall(init_sched_debug_procfs);
 491
 492 void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 493 {
 494         unsigned long nr_switches;
 495
 496         SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid,
 497                                                 get_nr_threads(p));
 498         SEQ_printf(m,
 499                 "---------------------------------------------------------\n");
 500 #define __P(F) \
 501         SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)F)
 502 #define P(F) \
 503         SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)p->F)
 504 #define __PN(F) \
 505         SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
 506 #define PN(F) \
 507         SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
 508
 509         PN(se.exec_start);
 510         PN(se.vruntime);
 511         PN(se.sum_exec_runtime);
 512
 513         nr_switches = p->nvcsw + p->nivcsw;
 514
 515 #ifdef CONFIG_SCHEDSTATS
 516         PN(se.statistics.wait_start);
 517         PN(se.statistics.sleep_start);
 518         PN(se.statistics.block_start);
 519         PN(se.statistics.sleep_max);
 520         PN(se.statistics.block_max);
 521         PN(se.statistics.exec_max);
 522         PN(se.statistics.slice_max);
 523         PN(se.statistics.wait_max);
 524         PN(se.statistics.wait_sum);
 525         P(se.statistics.wait_count);
 526         PN(se.statistics.iowait_sum);
 527         P(se.statistics.iowait_count);
 528         P(se.nr_migrations);
 529         P(se.statistics.nr_migrations_cold);
 530         P(se.statistics.nr_failed_migrations_affine);
 531         P(se.statistics.nr_failed_migrations_running);
 532         P(se.statistics.nr_failed_migrations_hot);
 533         P(se.statistics.nr_forced_migrations);
 534         P(se.statistics.nr_wakeups);
 535         P(se.statistics.nr_wakeups_sync);
 536         P(se.statistics.nr_wakeups_migrate);
 537         P(se.statistics.nr_wakeups_local);
 538         P(se.statistics.nr_wakeups_remote);
 539         P(se.statistics.nr_wakeups_affine);
 540         P(se.statistics.nr_wakeups_affine_attempts);
 541         P(se.statistics.nr_wakeups_passive);
 542         P(se.statistics.nr_wakeups_idle);
 543
 544         {
 545                 u64 avg_atom, avg_per_cpu;
 546
 547                 avg_atom = p->se.sum_exec_runtime;
 548                 if (nr_switches)
 549                         do_div(avg_atom, nr_switches);
 550                 else
 551                         avg_atom = -1LL;
 552
 553                 avg_per_cpu = p->se.sum_exec_runtime;
 554                 if (p->se.nr_migrations) {
 555                         avg_per_cpu = div64_u64(avg_per_cpu,
 556                                                 p->se.nr_migrations);
 557                 } else {
 558                         avg_per_cpu = -1LL;
 559                 }
 560
 561                 __PN(avg_atom);
 562                 __PN(avg_per_cpu);
 563         }
 564 #endif
 565         __P(nr_switches);
 566         SEQ_printf(m, "%-35s:%21Ld\n",
 567                    "nr_voluntary_switches", (long long)p->nvcsw);
 568         SEQ_printf(m, "%-35s:%21Ld\n",
 569                    "nr_involuntary_switches", (long long)p->nivcsw);
 570
 571         P(se.load.weight);
 572         P(policy);
 573         P(prio);
 574 #undef PN
 575 #undef __PN
 576 #undef P
 577 #undef __P
 578
 579         {
 580                 unsigned int this_cpu = raw_smp_processor_id();
 581                 u64 t0, t1;
 582
 583                 t0 = cpu_clock(this_cpu);
 584                 t1 = cpu_clock(this_cpu);
 585                 SEQ_printf(m, "%-35s:%21Ld\n",
 586                            "clock-delta", (long long)(t1-t0));
 587         }
 588 }
 589
 590 void proc_sched_set_task(struct task_struct *p)
 591 {
 592 #ifdef CONFIG_SCHEDSTATS
 593         memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 594 #endif
 595 }