ftrace: replace simple_strtoul with strict_strtoul
[firefly-linux-kernel-4.4.55.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/utsrelease.h>
15 #include <linux/kallsyms.h>
16 #include <linux/seq_file.h>
17 #include <linux/debugfs.h>
18 #include <linux/pagemap.h>
19 #include <linux/hardirq.h>
20 #include <linux/linkage.h>
21 #include <linux/uaccess.h>
22 #include <linux/ftrace.h>
23 #include <linux/module.h>
24 #include <linux/percpu.h>
25 #include <linux/ctype.h>
26 #include <linux/init.h>
27 #include <linux/poll.h>
28 #include <linux/gfp.h>
29 #include <linux/fs.h>
30
31 #include <linux/stacktrace.h>
32
33 #include "trace.h"
34
35 unsigned long __read_mostly     tracing_max_latency = (cycle_t)ULONG_MAX;
36 unsigned long __read_mostly     tracing_thresh;
37
38 /* dummy trace to disable tracing */
39 static struct tracer no_tracer __read_mostly = {
40         .name           = "none",
41 };
42
43 static int trace_alloc_page(void);
44 static int trace_free_page(void);
45
46 static int tracing_disabled = 1;
47
48 long
49 ns2usecs(cycle_t nsec)
50 {
51         nsec += 500;
52         do_div(nsec, 1000);
53         return nsec;
54 }
55
56 cycle_t ftrace_now(int cpu)
57 {
58         return cpu_clock(cpu);
59 }
60
61 static struct trace_array       global_trace;
62
63 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
64
65 static struct trace_array       max_tr;
66
67 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
68
69 static int                      tracer_enabled = 1;
70 static unsigned long            trace_nr_entries = 65536UL;
71
72 static struct tracer            *trace_types __read_mostly;
73 static struct tracer            *current_trace __read_mostly;
74 static int                      max_tracer_type_len;
75
76 static DEFINE_MUTEX(trace_types_lock);
77 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
78
79 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
80
81 void trace_wake_up(void)
82 {
83         /*
84          * The runqueue_is_locked() can fail, but this is the best we
85          * have for now:
86          */
87         if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
88                 wake_up(&trace_wait);
89 }
90
91 #define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
92
93 static int __init set_nr_entries(char *str)
94 {
95         unsigned long nr_entries;
96         int ret;
97
98         if (!str)
99                 return 0;
100         ret = strict_strtoul(str, 0, &nr_entries);
101         /* nr_entries can not be zero */
102         if (ret < 0 || nr_entries == 0)
103                 return 0;
104         trace_nr_entries = nr_entries;
105         return 1;
106 }
107 __setup("trace_entries=", set_nr_entries);
108
109 unsigned long nsecs_to_usecs(unsigned long nsecs)
110 {
111         return nsecs / 1000;
112 }
113
114 enum trace_flag_type {
115         TRACE_FLAG_IRQS_OFF             = 0x01,
116         TRACE_FLAG_NEED_RESCHED         = 0x02,
117         TRACE_FLAG_HARDIRQ              = 0x04,
118         TRACE_FLAG_SOFTIRQ              = 0x08,
119 };
120
121 #define TRACE_ITER_SYM_MASK \
122         (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
123
124 /* These must match the bit postions above */
125 static const char *trace_options[] = {
126         "print-parent",
127         "sym-offset",
128         "sym-addr",
129         "verbose",
130         "raw",
131         "hex",
132         "bin",
133         "block",
134         "stacktrace",
135         "sched-tree",
136         NULL
137 };
138
139 static raw_spinlock_t ftrace_max_lock =
140         (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
141
142 /*
143  * Copy the new maximum trace into the separate maximum-trace
144  * structure. (this way the maximum trace is permanently saved,
145  * for later retrieval via /debugfs/tracing/latency_trace)
146  */
147 static void
148 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
149 {
150         struct trace_array_cpu *data = tr->data[cpu];
151
152         max_tr.cpu = cpu;
153         max_tr.time_start = data->preempt_timestamp;
154
155         data = max_tr.data[cpu];
156         data->saved_latency = tracing_max_latency;
157
158         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
159         data->pid = tsk->pid;
160         data->uid = tsk->uid;
161         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
162         data->policy = tsk->policy;
163         data->rt_priority = tsk->rt_priority;
164
165         /* record this tasks comm */
166         tracing_record_cmdline(current);
167 }
168
169 void check_pages(struct trace_array_cpu *data)
170 {
171         struct page *page, *tmp;
172
173         BUG_ON(data->trace_pages.next->prev != &data->trace_pages);
174         BUG_ON(data->trace_pages.prev->next != &data->trace_pages);
175
176         list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
177                 BUG_ON(page->lru.next->prev != &page->lru);
178                 BUG_ON(page->lru.prev->next != &page->lru);
179         }
180 }
181
182 void *head_page(struct trace_array_cpu *data)
183 {
184         struct page *page;
185
186         check_pages(data);
187         if (list_empty(&data->trace_pages))
188                 return NULL;
189
190         page = list_entry(data->trace_pages.next, struct page, lru);
191         BUG_ON(&page->lru == &data->trace_pages);
192
193         return page_address(page);
194 }
195
196 int
197 trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
198 {
199         int len = (PAGE_SIZE - 1) - s->len;
200         va_list ap;
201         int ret;
202
203         if (!len)
204                 return 0;
205
206         va_start(ap, fmt);
207         ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
208         va_end(ap);
209
210         /* If we can't write it all, don't bother writing anything */
211         if (ret >= len)
212                 return 0;
213
214         s->len += ret;
215
216         return len;
217 }
218
219 static int
220 trace_seq_puts(struct trace_seq *s, const char *str)
221 {
222         int len = strlen(str);
223
224         if (len > ((PAGE_SIZE - 1) - s->len))
225                 return 0;
226
227         memcpy(s->buffer + s->len, str, len);
228         s->len += len;
229
230         return len;
231 }
232
233 static int
234 trace_seq_putc(struct trace_seq *s, unsigned char c)
235 {
236         if (s->len >= (PAGE_SIZE - 1))
237                 return 0;
238
239         s->buffer[s->len++] = c;
240
241         return 1;
242 }
243
244 static int
245 trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
246 {
247         if (len > ((PAGE_SIZE - 1) - s->len))
248                 return 0;
249
250         memcpy(s->buffer + s->len, mem, len);
251         s->len += len;
252
253         return len;
254 }
255
256 #define HEX_CHARS 17
257 static const char hex2asc[] = "0123456789abcdef";
258
259 static int
260 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
261 {
262         unsigned char hex[HEX_CHARS];
263         unsigned char *data = mem;
264         unsigned char byte;
265         int i, j;
266
267         BUG_ON(len >= HEX_CHARS);
268
269 #ifdef __BIG_ENDIAN
270         for (i = 0, j = 0; i < len; i++) {
271 #else
272         for (i = len-1, j = 0; i >= 0; i--) {
273 #endif
274                 byte = data[i];
275
276                 hex[j++] = hex2asc[byte & 0x0f];
277                 hex[j++] = hex2asc[byte >> 4];
278         }
279         hex[j++] = ' ';
280
281         return trace_seq_putmem(s, hex, j);
282 }
283
284 static void
285 trace_seq_reset(struct trace_seq *s)
286 {
287         s->len = 0;
288 }
289
290 static void
291 trace_print_seq(struct seq_file *m, struct trace_seq *s)
292 {
293         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
294
295         s->buffer[len] = 0;
296         seq_puts(m, s->buffer);
297
298         trace_seq_reset(s);
299 }
300
301 static void
302 flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
303 {
304         struct list_head flip_pages;
305
306         INIT_LIST_HEAD(&flip_pages);
307
308         memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
309                 sizeof(struct trace_array_cpu) -
310                 offsetof(struct trace_array_cpu, trace_head_idx));
311
312         check_pages(tr1);
313         check_pages(tr2);
314         list_splice_init(&tr1->trace_pages, &flip_pages);
315         list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
316         list_splice_init(&flip_pages, &tr2->trace_pages);
317         BUG_ON(!list_empty(&flip_pages));
318         check_pages(tr1);
319         check_pages(tr2);
320 }
321
322 void
323 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
324 {
325         struct trace_array_cpu *data;
326         int i;
327
328         WARN_ON_ONCE(!irqs_disabled());
329         __raw_spin_lock(&ftrace_max_lock);
330         /* clear out all the previous traces */
331         for_each_possible_cpu(i) {
332                 data = tr->data[i];
333                 flip_trace(max_tr.data[i], data);
334                 tracing_reset(data);
335         }
336
337         __update_max_tr(tr, tsk, cpu);
338         __raw_spin_unlock(&ftrace_max_lock);
339 }
340
341 /**
342  * update_max_tr_single - only copy one trace over, and reset the rest
343  * @tr - tracer
344  * @tsk - task with the latency
345  * @cpu - the cpu of the buffer to copy.
346  */
347 void
348 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
349 {
350         struct trace_array_cpu *data = tr->data[cpu];
351         int i;
352
353         WARN_ON_ONCE(!irqs_disabled());
354         __raw_spin_lock(&ftrace_max_lock);
355         for_each_possible_cpu(i)
356                 tracing_reset(max_tr.data[i]);
357
358         flip_trace(max_tr.data[cpu], data);
359         tracing_reset(data);
360
361         __update_max_tr(tr, tsk, cpu);
362         __raw_spin_unlock(&ftrace_max_lock);
363 }
364
365 int register_tracer(struct tracer *type)
366 {
367         struct tracer *t;
368         int len;
369         int ret = 0;
370
371         if (!type->name) {
372                 pr_info("Tracer must have a name\n");
373                 return -1;
374         }
375
376         mutex_lock(&trace_types_lock);
377         for (t = trace_types; t; t = t->next) {
378                 if (strcmp(type->name, t->name) == 0) {
379                         /* already found */
380                         pr_info("Trace %s already registered\n",
381                                 type->name);
382                         ret = -1;
383                         goto out;
384                 }
385         }
386
387 #ifdef CONFIG_FTRACE_STARTUP_TEST
388         if (type->selftest) {
389                 struct tracer *saved_tracer = current_trace;
390                 struct trace_array_cpu *data;
391                 struct trace_array *tr = &global_trace;
392                 int saved_ctrl = tr->ctrl;
393                 int i;
394                 /*
395                  * Run a selftest on this tracer.
396                  * Here we reset the trace buffer, and set the current
397                  * tracer to be this tracer. The tracer can then run some
398                  * internal tracing to verify that everything is in order.
399                  * If we fail, we do not register this tracer.
400                  */
401                 for_each_possible_cpu(i) {
402                         data = tr->data[i];
403                         if (!head_page(data))
404                                 continue;
405                         tracing_reset(data);
406                 }
407                 current_trace = type;
408                 tr->ctrl = 0;
409                 /* the test is responsible for initializing and enabling */
410                 pr_info("Testing tracer %s: ", type->name);
411                 ret = type->selftest(type, tr);
412                 /* the test is responsible for resetting too */
413                 current_trace = saved_tracer;
414                 tr->ctrl = saved_ctrl;
415                 if (ret) {
416                         printk(KERN_CONT "FAILED!\n");
417                         goto out;
418                 }
419                 /* Only reset on passing, to avoid touching corrupted buffers */
420                 for_each_possible_cpu(i) {
421                         data = tr->data[i];
422                         if (!head_page(data))
423                                 continue;
424                         tracing_reset(data);
425                 }
426                 printk(KERN_CONT "PASSED\n");
427         }
428 #endif
429
430         type->next = trace_types;
431         trace_types = type;
432         len = strlen(type->name);
433         if (len > max_tracer_type_len)
434                 max_tracer_type_len = len;
435
436  out:
437         mutex_unlock(&trace_types_lock);
438
439         return ret;
440 }
441
442 void unregister_tracer(struct tracer *type)
443 {
444         struct tracer **t;
445         int len;
446
447         mutex_lock(&trace_types_lock);
448         for (t = &trace_types; *t; t = &(*t)->next) {
449                 if (*t == type)
450                         goto found;
451         }
452         pr_info("Trace %s not registered\n", type->name);
453         goto out;
454
455  found:
456         *t = (*t)->next;
457         if (strlen(type->name) != max_tracer_type_len)
458                 goto out;
459
460         max_tracer_type_len = 0;
461         for (t = &trace_types; *t; t = &(*t)->next) {
462                 len = strlen((*t)->name);
463                 if (len > max_tracer_type_len)
464                         max_tracer_type_len = len;
465         }
466  out:
467         mutex_unlock(&trace_types_lock);
468 }
469
470 void tracing_reset(struct trace_array_cpu *data)
471 {
472         data->trace_idx = 0;
473         data->trace_head = data->trace_tail = head_page(data);
474         data->trace_head_idx = 0;
475         data->trace_tail_idx = 0;
476 }
477
478 #define SAVED_CMDLINES 128
479 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
480 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
481 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
482 static int cmdline_idx;
483 static DEFINE_SPINLOCK(trace_cmdline_lock);
484 atomic_t trace_record_cmdline_disabled;
485
486 static void trace_init_cmdlines(void)
487 {
488         memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
489         memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
490         cmdline_idx = 0;
491 }
492
493 void trace_stop_cmdline_recording(void);
494
495 static void trace_save_cmdline(struct task_struct *tsk)
496 {
497         unsigned map;
498         unsigned idx;
499
500         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
501                 return;
502
503         /*
504          * It's not the end of the world if we don't get
505          * the lock, but we also don't want to spin
506          * nor do we want to disable interrupts,
507          * so if we miss here, then better luck next time.
508          */
509         if (!spin_trylock(&trace_cmdline_lock))
510                 return;
511
512         idx = map_pid_to_cmdline[tsk->pid];
513         if (idx >= SAVED_CMDLINES) {
514                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
515
516                 map = map_cmdline_to_pid[idx];
517                 if (map <= PID_MAX_DEFAULT)
518                         map_pid_to_cmdline[map] = (unsigned)-1;
519
520                 map_pid_to_cmdline[tsk->pid] = idx;
521
522                 cmdline_idx = idx;
523         }
524
525         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
526
527         spin_unlock(&trace_cmdline_lock);
528 }
529
530 static char *trace_find_cmdline(int pid)
531 {
532         char *cmdline = "<...>";
533         unsigned map;
534
535         if (!pid)
536                 return "<idle>";
537
538         if (pid > PID_MAX_DEFAULT)
539                 goto out;
540
541         map = map_pid_to_cmdline[pid];
542         if (map >= SAVED_CMDLINES)
543                 goto out;
544
545         cmdline = saved_cmdlines[map];
546
547  out:
548         return cmdline;
549 }
550
551 void tracing_record_cmdline(struct task_struct *tsk)
552 {
553         if (atomic_read(&trace_record_cmdline_disabled))
554                 return;
555
556         trace_save_cmdline(tsk);
557 }
558
559 static inline struct list_head *
560 trace_next_list(struct trace_array_cpu *data, struct list_head *next)
561 {
562         /*
563          * Roundrobin - but skip the head (which is not a real page):
564          */
565         next = next->next;
566         if (unlikely(next == &data->trace_pages))
567                 next = next->next;
568         BUG_ON(next == &data->trace_pages);
569
570         return next;
571 }
572
573 static inline void *
574 trace_next_page(struct trace_array_cpu *data, void *addr)
575 {
576         struct list_head *next;
577         struct page *page;
578
579         page = virt_to_page(addr);
580
581         next = trace_next_list(data, &page->lru);
582         page = list_entry(next, struct page, lru);
583
584         return page_address(page);
585 }
586
587 static inline struct trace_entry *
588 tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
589 {
590         unsigned long idx, idx_next;
591         struct trace_entry *entry;
592
593         data->trace_idx++;
594         idx = data->trace_head_idx;
595         idx_next = idx + 1;
596
597         BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
598
599         entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
600
601         if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
602                 data->trace_head = trace_next_page(data, data->trace_head);
603                 idx_next = 0;
604         }
605
606         if (data->trace_head == data->trace_tail &&
607             idx_next == data->trace_tail_idx) {
608                 /* overrun */
609                 data->trace_tail_idx++;
610                 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
611                         data->trace_tail =
612                                 trace_next_page(data, data->trace_tail);
613                         data->trace_tail_idx = 0;
614                 }
615         }
616
617         data->trace_head_idx = idx_next;
618
619         return entry;
620 }
621
622 static inline void
623 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
624 {
625         struct task_struct *tsk = current;
626         unsigned long pc;
627
628         pc = preempt_count();
629
630         entry->preempt_count    = pc & 0xff;
631         entry->pid              = (tsk) ? tsk->pid : 0;
632         entry->t                = ftrace_now(raw_smp_processor_id());
633         entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
634                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
635                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
636                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
637 }
638
639 void
640 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
641                unsigned long ip, unsigned long parent_ip, unsigned long flags)
642 {
643         struct trace_entry *entry;
644         unsigned long irq_flags;
645
646         raw_local_irq_save(irq_flags);
647         __raw_spin_lock(&data->lock);
648         entry                   = tracing_get_trace_entry(tr, data);
649         tracing_generic_entry_update(entry, flags);
650         entry->type             = TRACE_FN;
651         entry->fn.ip            = ip;
652         entry->fn.parent_ip     = parent_ip;
653         __raw_spin_unlock(&data->lock);
654         raw_local_irq_restore(irq_flags);
655 }
656
657 void
658 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
659        unsigned long ip, unsigned long parent_ip, unsigned long flags)
660 {
661         if (likely(!atomic_read(&data->disabled)))
662                 trace_function(tr, data, ip, parent_ip, flags);
663 }
664
665 void
666 __trace_special(void *__tr, void *__data,
667                 unsigned long arg1, unsigned long arg2, unsigned long arg3)
668 {
669         struct trace_array_cpu *data = __data;
670         struct trace_array *tr = __tr;
671         struct trace_entry *entry;
672         unsigned long irq_flags;
673
674         raw_local_irq_save(irq_flags);
675         __raw_spin_lock(&data->lock);
676         entry                   = tracing_get_trace_entry(tr, data);
677         tracing_generic_entry_update(entry, 0);
678         entry->type             = TRACE_SPECIAL;
679         entry->special.arg1     = arg1;
680         entry->special.arg2     = arg2;
681         entry->special.arg3     = arg3;
682         __raw_spin_unlock(&data->lock);
683         raw_local_irq_restore(irq_flags);
684
685         trace_wake_up();
686 }
687
688 void __trace_stack(struct trace_array *tr,
689                    struct trace_array_cpu *data,
690                    unsigned long flags,
691                    int skip)
692 {
693         struct trace_entry *entry;
694         struct stack_trace trace;
695
696         if (!(trace_flags & TRACE_ITER_STACKTRACE))
697                 return;
698
699         entry                   = tracing_get_trace_entry(tr, data);
700         tracing_generic_entry_update(entry, flags);
701         entry->type             = TRACE_STACK;
702
703         memset(&entry->stack, 0, sizeof(entry->stack));
704
705         trace.nr_entries        = 0;
706         trace.max_entries       = FTRACE_STACK_ENTRIES;
707         trace.skip              = skip;
708         trace.entries           = entry->stack.caller;
709
710         save_stack_trace(&trace);
711 }
712
713 void
714 tracing_sched_switch_trace(struct trace_array *tr,
715                            struct trace_array_cpu *data,
716                            struct task_struct *prev,
717                            struct task_struct *next,
718                            unsigned long flags)
719 {
720         struct trace_entry *entry;
721         unsigned long irq_flags;
722
723         raw_local_irq_save(irq_flags);
724         __raw_spin_lock(&data->lock);
725         entry                   = tracing_get_trace_entry(tr, data);
726         tracing_generic_entry_update(entry, flags);
727         entry->type             = TRACE_CTX;
728         entry->ctx.prev_pid     = prev->pid;
729         entry->ctx.prev_prio    = prev->prio;
730         entry->ctx.prev_state   = prev->state;
731         entry->ctx.next_pid     = next->pid;
732         entry->ctx.next_prio    = next->prio;
733         entry->ctx.next_state   = next->state;
734         __trace_stack(tr, data, flags, 4);
735         __raw_spin_unlock(&data->lock);
736         raw_local_irq_restore(irq_flags);
737 }
738
739 void
740 tracing_sched_wakeup_trace(struct trace_array *tr,
741                            struct trace_array_cpu *data,
742                            struct task_struct *wakee,
743                            struct task_struct *curr,
744                            unsigned long flags)
745 {
746         struct trace_entry *entry;
747         unsigned long irq_flags;
748
749         raw_local_irq_save(irq_flags);
750         __raw_spin_lock(&data->lock);
751         entry                   = tracing_get_trace_entry(tr, data);
752         tracing_generic_entry_update(entry, flags);
753         entry->type             = TRACE_WAKE;
754         entry->ctx.prev_pid     = curr->pid;
755         entry->ctx.prev_prio    = curr->prio;
756         entry->ctx.prev_state   = curr->state;
757         entry->ctx.next_pid     = wakee->pid;
758         entry->ctx.next_prio    = wakee->prio;
759         entry->ctx.next_state   = wakee->state;
760         __trace_stack(tr, data, flags, 5);
761         __raw_spin_unlock(&data->lock);
762         raw_local_irq_restore(irq_flags);
763
764         trace_wake_up();
765 }
766
767 #ifdef CONFIG_FTRACE
768 static void
769 function_trace_call(unsigned long ip, unsigned long parent_ip)
770 {
771         struct trace_array *tr = &global_trace;
772         struct trace_array_cpu *data;
773         unsigned long flags;
774         long disabled;
775         int cpu;
776
777         if (unlikely(!tracer_enabled))
778                 return;
779
780         local_irq_save(flags);
781         cpu = raw_smp_processor_id();
782         data = tr->data[cpu];
783         disabled = atomic_inc_return(&data->disabled);
784
785         if (likely(disabled == 1))
786                 trace_function(tr, data, ip, parent_ip, flags);
787
788         atomic_dec(&data->disabled);
789         local_irq_restore(flags);
790 }
791
792 static struct ftrace_ops trace_ops __read_mostly =
793 {
794         .func = function_trace_call,
795 };
796
797 void tracing_start_function_trace(void)
798 {
799         register_ftrace_function(&trace_ops);
800 }
801
802 void tracing_stop_function_trace(void)
803 {
804         unregister_ftrace_function(&trace_ops);
805 }
806 #endif
807
808 enum trace_file_type {
809         TRACE_FILE_LAT_FMT      = 1,
810 };
811
812 static struct trace_entry *
813 trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
814                 struct trace_iterator *iter, int cpu)
815 {
816         struct page *page;
817         struct trace_entry *array;
818
819         if (iter->next_idx[cpu] >= tr->entries ||
820             iter->next_idx[cpu] >= data->trace_idx ||
821             (data->trace_head == data->trace_tail &&
822              data->trace_head_idx == data->trace_tail_idx))
823                 return NULL;
824
825         if (!iter->next_page[cpu]) {
826                 /* Initialize the iterator for this cpu trace buffer */
827                 WARN_ON(!data->trace_tail);
828                 page = virt_to_page(data->trace_tail);
829                 iter->next_page[cpu] = &page->lru;
830                 iter->next_page_idx[cpu] = data->trace_tail_idx;
831         }
832
833         page = list_entry(iter->next_page[cpu], struct page, lru);
834         BUG_ON(&data->trace_pages == &page->lru);
835
836         array = page_address(page);
837
838         WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
839         return &array[iter->next_page_idx[cpu]];
840 }
841
842 static struct trace_entry *
843 find_next_entry(struct trace_iterator *iter, int *ent_cpu)
844 {
845         struct trace_array *tr = iter->tr;
846         struct trace_entry *ent, *next = NULL;
847         int next_cpu = -1;
848         int cpu;
849
850         for_each_possible_cpu(cpu) {
851                 if (!head_page(tr->data[cpu]))
852                         continue;
853                 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
854                 /*
855                  * Pick the entry with the smallest timestamp:
856                  */
857                 if (ent && (!next || ent->t < next->t)) {
858                         next = ent;
859                         next_cpu = cpu;
860                 }
861         }
862
863         if (ent_cpu)
864                 *ent_cpu = next_cpu;
865
866         return next;
867 }
868
869 static void trace_iterator_increment(struct trace_iterator *iter)
870 {
871         iter->idx++;
872         iter->next_idx[iter->cpu]++;
873         iter->next_page_idx[iter->cpu]++;
874
875         if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
876                 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
877
878                 iter->next_page_idx[iter->cpu] = 0;
879                 iter->next_page[iter->cpu] =
880                         trace_next_list(data, iter->next_page[iter->cpu]);
881         }
882 }
883
884 static void trace_consume(struct trace_iterator *iter)
885 {
886         struct trace_array_cpu *data = iter->tr->data[iter->cpu];
887
888         data->trace_tail_idx++;
889         if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
890                 data->trace_tail = trace_next_page(data, data->trace_tail);
891                 data->trace_tail_idx = 0;
892         }
893
894         /* Check if we empty it, then reset the index */
895         if (data->trace_head == data->trace_tail &&
896             data->trace_head_idx == data->trace_tail_idx)
897                 data->trace_idx = 0;
898 }
899
900 static void *find_next_entry_inc(struct trace_iterator *iter)
901 {
902         struct trace_entry *next;
903         int next_cpu = -1;
904
905         next = find_next_entry(iter, &next_cpu);
906
907         iter->prev_ent = iter->ent;
908         iter->prev_cpu = iter->cpu;
909
910         iter->ent = next;
911         iter->cpu = next_cpu;
912
913         if (next)
914                 trace_iterator_increment(iter);
915
916         return next ? iter : NULL;
917 }
918
919 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
920 {
921         struct trace_iterator *iter = m->private;
922         void *last_ent = iter->ent;
923         int i = (int)*pos;
924         void *ent;
925
926         (*pos)++;
927
928         /* can't go backwards */
929         if (iter->idx > i)
930                 return NULL;
931
932         if (iter->idx < 0)
933                 ent = find_next_entry_inc(iter);
934         else
935                 ent = iter;
936
937         while (ent && iter->idx < i)
938                 ent = find_next_entry_inc(iter);
939
940         iter->pos = *pos;
941
942         if (last_ent && !ent)
943                 seq_puts(m, "\n\nvim:ft=help\n");
944
945         return ent;
946 }
947
948 static void *s_start(struct seq_file *m, loff_t *pos)
949 {
950         struct trace_iterator *iter = m->private;
951         void *p = NULL;
952         loff_t l = 0;
953         int i;
954
955         mutex_lock(&trace_types_lock);
956
957         if (!current_trace || current_trace != iter->trace) {
958                 mutex_unlock(&trace_types_lock);
959                 return NULL;
960         }
961
962         atomic_inc(&trace_record_cmdline_disabled);
963
964         /* let the tracer grab locks here if needed */
965         if (current_trace->start)
966                 current_trace->start(iter);
967
968         if (*pos != iter->pos) {
969                 iter->ent = NULL;
970                 iter->cpu = 0;
971                 iter->idx = -1;
972                 iter->prev_ent = NULL;
973                 iter->prev_cpu = -1;
974
975                 for_each_possible_cpu(i) {
976                         iter->next_idx[i] = 0;
977                         iter->next_page[i] = NULL;
978                 }
979
980                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
981                         ;
982
983         } else {
984                 l = *pos - 1;
985                 p = s_next(m, p, &l);
986         }
987
988         return p;
989 }
990
991 static void s_stop(struct seq_file *m, void *p)
992 {
993         struct trace_iterator *iter = m->private;
994
995         atomic_dec(&trace_record_cmdline_disabled);
996
997         /* let the tracer release locks here if needed */
998         if (current_trace && current_trace == iter->trace && iter->trace->stop)
999                 iter->trace->stop(iter);
1000
1001         mutex_unlock(&trace_types_lock);
1002 }
1003
1004 static int
1005 seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1006 {
1007 #ifdef CONFIG_KALLSYMS
1008         char str[KSYM_SYMBOL_LEN];
1009
1010         kallsyms_lookup(address, NULL, NULL, NULL, str);
1011
1012         return trace_seq_printf(s, fmt, str);
1013 #endif
1014         return 1;
1015 }
1016
1017 static int
1018 seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1019                      unsigned long address)
1020 {
1021 #ifdef CONFIG_KALLSYMS
1022         char str[KSYM_SYMBOL_LEN];
1023
1024         sprint_symbol(str, address);
1025         return trace_seq_printf(s, fmt, str);
1026 #endif
1027         return 1;
1028 }
1029
1030 #ifndef CONFIG_64BIT
1031 # define IP_FMT "%08lx"
1032 #else
1033 # define IP_FMT "%016lx"
1034 #endif
1035
1036 static int
1037 seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1038 {
1039         int ret;
1040
1041         if (!ip)
1042                 return trace_seq_printf(s, "0");
1043
1044         if (sym_flags & TRACE_ITER_SYM_OFFSET)
1045                 ret = seq_print_sym_offset(s, "%s", ip);
1046         else
1047                 ret = seq_print_sym_short(s, "%s", ip);
1048
1049         if (!ret)
1050                 return 0;
1051
1052         if (sym_flags & TRACE_ITER_SYM_ADDR)
1053                 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1054         return ret;
1055 }
1056
1057 static void print_lat_help_header(struct seq_file *m)
1058 {
1059         seq_puts(m, "#                _------=> CPU#            \n");
1060         seq_puts(m, "#               / _-----=> irqs-off        \n");
1061         seq_puts(m, "#              | / _----=> need-resched    \n");
1062         seq_puts(m, "#              || / _---=> hardirq/softirq \n");
1063         seq_puts(m, "#              ||| / _--=> preempt-depth   \n");
1064         seq_puts(m, "#              |||| /                      \n");
1065         seq_puts(m, "#              |||||     delay             \n");
1066         seq_puts(m, "#  cmd     pid ||||| time  |   caller      \n");
1067         seq_puts(m, "#     \\   /    |||||   \\   |   /           \n");
1068 }
1069
1070 static void print_func_help_header(struct seq_file *m)
1071 {
1072         seq_puts(m, "#           TASK-PID   CPU#    TIMESTAMP  FUNCTION\n");
1073         seq_puts(m, "#              | |      |          |         |\n");
1074 }
1075
1076
1077 static void
1078 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1079 {
1080         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1081         struct trace_array *tr = iter->tr;
1082         struct trace_array_cpu *data = tr->data[tr->cpu];
1083         struct tracer *type = current_trace;
1084         unsigned long total   = 0;
1085         unsigned long entries = 0;
1086         int cpu;
1087         const char *name = "preemption";
1088
1089         if (type)
1090                 name = type->name;
1091
1092         for_each_possible_cpu(cpu) {
1093                 if (head_page(tr->data[cpu])) {
1094                         total += tr->data[cpu]->trace_idx;
1095                         if (tr->data[cpu]->trace_idx > tr->entries)
1096                                 entries += tr->entries;
1097                         else
1098                                 entries += tr->data[cpu]->trace_idx;
1099                 }
1100         }
1101
1102         seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1103                    name, UTS_RELEASE);
1104         seq_puts(m, "-----------------------------------"
1105                  "---------------------------------\n");
1106         seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1107                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1108                    nsecs_to_usecs(data->saved_latency),
1109                    entries,
1110                    total,
1111                    tr->cpu,
1112 #if defined(CONFIG_PREEMPT_NONE)
1113                    "server",
1114 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1115                    "desktop",
1116 #elif defined(CONFIG_PREEMPT_DESKTOP)
1117                    "preempt",
1118 #else
1119                    "unknown",
1120 #endif
1121                    /* These are reserved for later use */
1122                    0, 0, 0, 0);
1123 #ifdef CONFIG_SMP
1124         seq_printf(m, " #P:%d)\n", num_online_cpus());
1125 #else
1126         seq_puts(m, ")\n");
1127 #endif
1128         seq_puts(m, "    -----------------\n");
1129         seq_printf(m, "    | task: %.16s-%d "
1130                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1131                    data->comm, data->pid, data->uid, data->nice,
1132                    data->policy, data->rt_priority);
1133         seq_puts(m, "    -----------------\n");
1134
1135         if (data->critical_start) {
1136                 seq_puts(m, " => started at: ");
1137                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1138                 trace_print_seq(m, &iter->seq);
1139                 seq_puts(m, "\n => ended at:   ");
1140                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1141                 trace_print_seq(m, &iter->seq);
1142                 seq_puts(m, "\n");
1143         }
1144
1145         seq_puts(m, "\n");
1146 }
1147
1148 static void
1149 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1150 {
1151         int hardirq, softirq;
1152         char *comm;
1153
1154         comm = trace_find_cmdline(entry->pid);
1155
1156         trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1157         trace_seq_printf(s, "%d", cpu);
1158         trace_seq_printf(s, "%c%c",
1159                         (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1160                         ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1161
1162         hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1163         softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1164         if (hardirq && softirq) {
1165                 trace_seq_putc(s, 'H');
1166         } else {
1167                 if (hardirq) {
1168                         trace_seq_putc(s, 'h');
1169                 } else {
1170                         if (softirq)
1171                                 trace_seq_putc(s, 's');
1172                         else
1173                                 trace_seq_putc(s, '.');
1174                 }
1175         }
1176
1177         if (entry->preempt_count)
1178                 trace_seq_printf(s, "%x", entry->preempt_count);
1179         else
1180                 trace_seq_puts(s, ".");
1181 }
1182
1183 unsigned long preempt_mark_thresh = 100;
1184
1185 static void
1186 lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1187                     unsigned long rel_usecs)
1188 {
1189         trace_seq_printf(s, " %4lldus", abs_usecs);
1190         if (rel_usecs > preempt_mark_thresh)
1191                 trace_seq_puts(s, "!: ");
1192         else if (rel_usecs > 1)
1193                 trace_seq_puts(s, "+: ");
1194         else
1195                 trace_seq_puts(s, " : ");
1196 }
1197
1198 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1199
1200 static int
1201 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1202 {
1203         struct trace_seq *s = &iter->seq;
1204         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1205         struct trace_entry *next_entry = find_next_entry(iter, NULL);
1206         unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1207         struct trace_entry *entry = iter->ent;
1208         unsigned long abs_usecs;
1209         unsigned long rel_usecs;
1210         char *comm;
1211         int S, T;
1212         int i;
1213         unsigned state;
1214
1215         if (!next_entry)
1216                 next_entry = entry;
1217         rel_usecs = ns2usecs(next_entry->t - entry->t);
1218         abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
1219
1220         if (verbose) {
1221                 comm = trace_find_cmdline(entry->pid);
1222                 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
1223                                  " %ld.%03ldms (+%ld.%03ldms): ",
1224                                  comm,
1225                                  entry->pid, cpu, entry->flags,
1226                                  entry->preempt_count, trace_idx,
1227                                  ns2usecs(entry->t),
1228                                  abs_usecs/1000,
1229                                  abs_usecs % 1000, rel_usecs/1000,
1230                                  rel_usecs % 1000);
1231         } else {
1232                 lat_print_generic(s, entry, cpu);
1233                 lat_print_timestamp(s, abs_usecs, rel_usecs);
1234         }
1235         switch (entry->type) {
1236         case TRACE_FN:
1237                 seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1238                 trace_seq_puts(s, " (");
1239                 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1240                 trace_seq_puts(s, ")\n");
1241                 break;
1242         case TRACE_CTX:
1243         case TRACE_WAKE:
1244                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1245                         state_to_char[entry->ctx.next_state] : 'X';
1246
1247                 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
1248                 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1249                 comm = trace_find_cmdline(entry->ctx.next_pid);
1250                 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
1251                                  entry->ctx.prev_pid,
1252                                  entry->ctx.prev_prio,
1253                                  S, entry->type == TRACE_CTX ? "==>" : "  +",
1254                                  entry->ctx.next_pid,
1255                                  entry->ctx.next_prio,
1256                                  T, comm);
1257                 break;
1258         case TRACE_SPECIAL:
1259                 trace_seq_printf(s, "# %ld %ld %ld\n",
1260                                  entry->special.arg1,
1261                                  entry->special.arg2,
1262                                  entry->special.arg3);
1263                 break;
1264         case TRACE_STACK:
1265                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1266                         if (i)
1267                                 trace_seq_puts(s, " <= ");
1268                         seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
1269                 }
1270                 trace_seq_puts(s, "\n");
1271                 break;
1272         default:
1273                 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1274         }
1275         return 1;
1276 }
1277
1278 static int print_trace_fmt(struct trace_iterator *iter)
1279 {
1280         struct trace_seq *s = &iter->seq;
1281         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1282         struct trace_entry *entry;
1283         unsigned long usec_rem;
1284         unsigned long long t;
1285         unsigned long secs;
1286         char *comm;
1287         int ret;
1288         int S, T;
1289         int i;
1290
1291         entry = iter->ent;
1292
1293         comm = trace_find_cmdline(iter->ent->pid);
1294
1295         t = ns2usecs(entry->t);
1296         usec_rem = do_div(t, 1000000ULL);
1297         secs = (unsigned long)t;
1298
1299         ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1300         if (!ret)
1301                 return 0;
1302         ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
1303         if (!ret)
1304                 return 0;
1305         ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1306         if (!ret)
1307                 return 0;
1308
1309         switch (entry->type) {
1310         case TRACE_FN:
1311                 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1312                 if (!ret)
1313                         return 0;
1314                 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1315                                                 entry->fn.parent_ip) {
1316                         ret = trace_seq_printf(s, " <-");
1317                         if (!ret)
1318                                 return 0;
1319                         ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1320                                                sym_flags);
1321                         if (!ret)
1322                                 return 0;
1323                 }
1324                 ret = trace_seq_printf(s, "\n");
1325                 if (!ret)
1326                         return 0;
1327                 break;
1328         case TRACE_CTX:
1329         case TRACE_WAKE:
1330                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1331                         state_to_char[entry->ctx.prev_state] : 'X';
1332                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1333                         state_to_char[entry->ctx.next_state] : 'X';
1334                 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
1335                                        entry->ctx.prev_pid,
1336                                        entry->ctx.prev_prio,
1337                                        S,
1338                                        entry->type == TRACE_CTX ? "==>" : "  +",
1339                                        entry->ctx.next_pid,
1340                                        entry->ctx.next_prio,
1341                                        T);
1342                 if (!ret)
1343                         return 0;
1344                 break;
1345         case TRACE_SPECIAL:
1346                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1347                                  entry->special.arg1,
1348                                  entry->special.arg2,
1349                                  entry->special.arg3);
1350                 if (!ret)
1351                         return 0;
1352                 break;
1353         case TRACE_STACK:
1354                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1355                         if (i) {
1356                                 ret = trace_seq_puts(s, " <= ");
1357                                 if (!ret)
1358                                         return 0;
1359                         }
1360                         ret = seq_print_ip_sym(s, entry->stack.caller[i],
1361                                                sym_flags);
1362                         if (!ret)
1363                                 return 0;
1364                 }
1365                 ret = trace_seq_puts(s, "\n");
1366                 if (!ret)
1367                         return 0;
1368                 break;
1369         }
1370         return 1;
1371 }
1372
1373 static int print_raw_fmt(struct trace_iterator *iter)
1374 {
1375         struct trace_seq *s = &iter->seq;
1376         struct trace_entry *entry;
1377         int ret;
1378         int S, T;
1379
1380         entry = iter->ent;
1381
1382         ret = trace_seq_printf(s, "%d %d %llu ",
1383                 entry->pid, iter->cpu, entry->t);
1384         if (!ret)
1385                 return 0;
1386
1387         switch (entry->type) {
1388         case TRACE_FN:
1389                 ret = trace_seq_printf(s, "%x %x\n",
1390                                         entry->fn.ip, entry->fn.parent_ip);
1391                 if (!ret)
1392                         return 0;
1393                 break;
1394         case TRACE_CTX:
1395         case TRACE_WAKE:
1396                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1397                         state_to_char[entry->ctx.prev_state] : 'X';
1398                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1399                         state_to_char[entry->ctx.next_state] : 'X';
1400                 if (entry->type == TRACE_WAKE)
1401                         S = '+';
1402                 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
1403                                        entry->ctx.prev_pid,
1404                                        entry->ctx.prev_prio,
1405                                        S,
1406                                        entry->ctx.next_pid,
1407                                        entry->ctx.next_prio,
1408                                        T);
1409                 if (!ret)
1410                         return 0;
1411                 break;
1412         case TRACE_SPECIAL:
1413         case TRACE_STACK:
1414                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1415                                  entry->special.arg1,
1416                                  entry->special.arg2,
1417                                  entry->special.arg3);
1418                 if (!ret)
1419                         return 0;
1420                 break;
1421         }
1422         return 1;
1423 }
1424
1425 #define SEQ_PUT_FIELD_RET(s, x)                         \
1426 do {                                                    \
1427         if (!trace_seq_putmem(s, &(x), sizeof(x)))      \
1428                 return 0;                               \
1429 } while (0)
1430
1431 #define SEQ_PUT_HEX_FIELD_RET(s, x)                     \
1432 do {                                                    \
1433         if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))  \
1434                 return 0;                               \
1435 } while (0)
1436
1437 static int print_hex_fmt(struct trace_iterator *iter)
1438 {
1439         struct trace_seq *s = &iter->seq;
1440         unsigned char newline = '\n';
1441         struct trace_entry *entry;
1442         int S, T;
1443
1444         entry = iter->ent;
1445
1446         SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1447         SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1448         SEQ_PUT_HEX_FIELD_RET(s, entry->t);
1449
1450         switch (entry->type) {
1451         case TRACE_FN:
1452                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
1453                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1454                 break;
1455         case TRACE_CTX:
1456         case TRACE_WAKE:
1457                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1458                         state_to_char[entry->ctx.prev_state] : 'X';
1459                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1460                         state_to_char[entry->ctx.next_state] : 'X';
1461                 if (entry->type == TRACE_WAKE)
1462                         S = '+';
1463                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
1464                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
1465                 SEQ_PUT_HEX_FIELD_RET(s, S);
1466                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
1467                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
1468                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1469                 SEQ_PUT_HEX_FIELD_RET(s, T);
1470                 break;
1471         case TRACE_SPECIAL:
1472         case TRACE_STACK:
1473                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
1474                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
1475                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
1476                 break;
1477         }
1478         SEQ_PUT_FIELD_RET(s, newline);
1479
1480         return 1;
1481 }
1482
1483 static int print_bin_fmt(struct trace_iterator *iter)
1484 {
1485         struct trace_seq *s = &iter->seq;
1486         struct trace_entry *entry;
1487
1488         entry = iter->ent;
1489
1490         SEQ_PUT_FIELD_RET(s, entry->pid);
1491         SEQ_PUT_FIELD_RET(s, entry->cpu);
1492         SEQ_PUT_FIELD_RET(s, entry->t);
1493
1494         switch (entry->type) {
1495         case TRACE_FN:
1496                 SEQ_PUT_FIELD_RET(s, entry->fn.ip);
1497                 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
1498                 break;
1499         case TRACE_CTX:
1500                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
1501                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
1502                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
1503                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
1504                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
1505                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
1506                 break;
1507         case TRACE_SPECIAL:
1508         case TRACE_STACK:
1509                 SEQ_PUT_FIELD_RET(s, entry->special.arg1);
1510                 SEQ_PUT_FIELD_RET(s, entry->special.arg2);
1511                 SEQ_PUT_FIELD_RET(s, entry->special.arg3);
1512                 break;
1513         }
1514         return 1;
1515 }
1516
1517 static int trace_empty(struct trace_iterator *iter)
1518 {
1519         struct trace_array_cpu *data;
1520         int cpu;
1521
1522         for_each_possible_cpu(cpu) {
1523                 data = iter->tr->data[cpu];
1524
1525                 if (head_page(data) && data->trace_idx &&
1526                     (data->trace_tail != data->trace_head ||
1527                      data->trace_tail_idx != data->trace_head_idx))
1528                         return 0;
1529         }
1530         return 1;
1531 }
1532
1533 static int print_trace_line(struct trace_iterator *iter)
1534 {
1535         if (iter->trace && iter->trace->print_line)
1536                 return iter->trace->print_line(iter);
1537
1538         if (trace_flags & TRACE_ITER_BIN)
1539                 return print_bin_fmt(iter);
1540
1541         if (trace_flags & TRACE_ITER_HEX)
1542                 return print_hex_fmt(iter);
1543
1544         if (trace_flags & TRACE_ITER_RAW)
1545                 return print_raw_fmt(iter);
1546
1547         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1548                 return print_lat_fmt(iter, iter->idx, iter->cpu);
1549
1550         return print_trace_fmt(iter);
1551 }
1552
1553 static int s_show(struct seq_file *m, void *v)
1554 {
1555         struct trace_iterator *iter = v;
1556
1557         if (iter->ent == NULL) {
1558                 if (iter->tr) {
1559                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
1560                         seq_puts(m, "#\n");
1561                 }
1562                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1563                         /* print nothing if the buffers are empty */
1564                         if (trace_empty(iter))
1565                                 return 0;
1566                         print_trace_header(m, iter);
1567                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1568                                 print_lat_help_header(m);
1569                 } else {
1570                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1571                                 print_func_help_header(m);
1572                 }
1573         } else {
1574                 print_trace_line(iter);
1575                 trace_print_seq(m, &iter->seq);
1576         }
1577
1578         return 0;
1579 }
1580
1581 static struct seq_operations tracer_seq_ops = {
1582         .start          = s_start,
1583         .next           = s_next,
1584         .stop           = s_stop,
1585         .show           = s_show,
1586 };
1587
1588 static struct trace_iterator *
1589 __tracing_open(struct inode *inode, struct file *file, int *ret)
1590 {
1591         struct trace_iterator *iter;
1592
1593         if (tracing_disabled) {
1594                 *ret = -ENODEV;
1595                 return NULL;
1596         }
1597
1598         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1599         if (!iter) {
1600                 *ret = -ENOMEM;
1601                 goto out;
1602         }
1603
1604         mutex_lock(&trace_types_lock);
1605         if (current_trace && current_trace->print_max)
1606                 iter->tr = &max_tr;
1607         else
1608                 iter->tr = inode->i_private;
1609         iter->trace = current_trace;
1610         iter->pos = -1;
1611
1612         /* TODO stop tracer */
1613         *ret = seq_open(file, &tracer_seq_ops);
1614         if (!*ret) {
1615                 struct seq_file *m = file->private_data;
1616                 m->private = iter;
1617
1618                 /* stop the trace while dumping */
1619                 if (iter->tr->ctrl)
1620                         tracer_enabled = 0;
1621
1622                 if (iter->trace && iter->trace->open)
1623                         iter->trace->open(iter);
1624         } else {
1625                 kfree(iter);
1626                 iter = NULL;
1627         }
1628         mutex_unlock(&trace_types_lock);
1629
1630  out:
1631         return iter;
1632 }
1633
1634 int tracing_open_generic(struct inode *inode, struct file *filp)
1635 {
1636         if (tracing_disabled)
1637                 return -ENODEV;
1638
1639         filp->private_data = inode->i_private;
1640         return 0;
1641 }
1642
1643 int tracing_release(struct inode *inode, struct file *file)
1644 {
1645         struct seq_file *m = (struct seq_file *)file->private_data;
1646         struct trace_iterator *iter = m->private;
1647
1648         mutex_lock(&trace_types_lock);
1649         if (iter->trace && iter->trace->close)
1650                 iter->trace->close(iter);
1651
1652         /* reenable tracing if it was previously enabled */
1653         if (iter->tr->ctrl)
1654                 tracer_enabled = 1;
1655         mutex_unlock(&trace_types_lock);
1656
1657         seq_release(inode, file);
1658         kfree(iter);
1659         return 0;
1660 }
1661
1662 static int tracing_open(struct inode *inode, struct file *file)
1663 {
1664         int ret;
1665
1666         __tracing_open(inode, file, &ret);
1667
1668         return ret;
1669 }
1670
1671 static int tracing_lt_open(struct inode *inode, struct file *file)
1672 {
1673         struct trace_iterator *iter;
1674         int ret;
1675
1676         iter = __tracing_open(inode, file, &ret);
1677
1678         if (!ret)
1679                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1680
1681         return ret;
1682 }
1683
1684
1685 static void *
1686 t_next(struct seq_file *m, void *v, loff_t *pos)
1687 {
1688         struct tracer *t = m->private;
1689
1690         (*pos)++;
1691
1692         if (t)
1693                 t = t->next;
1694
1695         m->private = t;
1696
1697         return t;
1698 }
1699
1700 static void *t_start(struct seq_file *m, loff_t *pos)
1701 {
1702         struct tracer *t = m->private;
1703         loff_t l = 0;
1704
1705         mutex_lock(&trace_types_lock);
1706         for (; t && l < *pos; t = t_next(m, t, &l))
1707                 ;
1708
1709         return t;
1710 }
1711
1712 static void t_stop(struct seq_file *m, void *p)
1713 {
1714         mutex_unlock(&trace_types_lock);
1715 }
1716
1717 static int t_show(struct seq_file *m, void *v)
1718 {
1719         struct tracer *t = v;
1720
1721         if (!t)
1722                 return 0;
1723
1724         seq_printf(m, "%s", t->name);
1725         if (t->next)
1726                 seq_putc(m, ' ');
1727         else
1728                 seq_putc(m, '\n');
1729
1730         return 0;
1731 }
1732
1733 static struct seq_operations show_traces_seq_ops = {
1734         .start          = t_start,
1735         .next           = t_next,
1736         .stop           = t_stop,
1737         .show           = t_show,
1738 };
1739
1740 static int show_traces_open(struct inode *inode, struct file *file)
1741 {
1742         int ret;
1743
1744         if (tracing_disabled)
1745                 return -ENODEV;
1746
1747         ret = seq_open(file, &show_traces_seq_ops);
1748         if (!ret) {
1749                 struct seq_file *m = file->private_data;
1750                 m->private = trace_types;
1751         }
1752
1753         return ret;
1754 }
1755
1756 static struct file_operations tracing_fops = {
1757         .open           = tracing_open,
1758         .read           = seq_read,
1759         .llseek         = seq_lseek,
1760         .release        = tracing_release,
1761 };
1762
1763 static struct file_operations tracing_lt_fops = {
1764         .open           = tracing_lt_open,
1765         .read           = seq_read,
1766         .llseek         = seq_lseek,
1767         .release        = tracing_release,
1768 };
1769
1770 static struct file_operations show_traces_fops = {
1771         .open           = show_traces_open,
1772         .read           = seq_read,
1773         .release        = seq_release,
1774 };
1775
1776 /*
1777  * Only trace on a CPU if the bitmask is set:
1778  */
1779 static cpumask_t tracing_cpumask = CPU_MASK_ALL;
1780
1781 /*
1782  * When tracing/tracing_cpu_mask is modified then this holds
1783  * the new bitmask we are about to install:
1784  */
1785 static cpumask_t tracing_cpumask_new;
1786
1787 /*
1788  * The tracer itself will not take this lock, but still we want
1789  * to provide a consistent cpumask to user-space:
1790  */
1791 static DEFINE_MUTEX(tracing_cpumask_update_lock);
1792
1793 /*
1794  * Temporary storage for the character representation of the
1795  * CPU bitmask (and one more byte for the newline):
1796  */
1797 static char mask_str[NR_CPUS + 1];
1798
1799 static ssize_t
1800 tracing_cpumask_read(struct file *filp, char __user *ubuf,
1801                      size_t count, loff_t *ppos)
1802 {
1803         int len;
1804
1805         mutex_lock(&tracing_cpumask_update_lock);
1806
1807         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
1808         if (count - len < 2) {
1809                 count = -EINVAL;
1810                 goto out_err;
1811         }
1812         len += sprintf(mask_str + len, "\n");
1813         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
1814
1815 out_err:
1816         mutex_unlock(&tracing_cpumask_update_lock);
1817
1818         return count;
1819 }
1820
1821 static ssize_t
1822 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
1823                       size_t count, loff_t *ppos)
1824 {
1825         int err, cpu;
1826
1827         mutex_lock(&tracing_cpumask_update_lock);
1828         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
1829         if (err)
1830                 goto err_unlock;
1831
1832         raw_local_irq_disable();
1833         __raw_spin_lock(&ftrace_max_lock);
1834         for_each_possible_cpu(cpu) {
1835                 /*
1836                  * Increase/decrease the disabled counter if we are
1837                  * about to flip a bit in the cpumask:
1838                  */
1839                 if (cpu_isset(cpu, tracing_cpumask) &&
1840                                 !cpu_isset(cpu, tracing_cpumask_new)) {
1841                         atomic_inc(&global_trace.data[cpu]->disabled);
1842                 }
1843                 if (!cpu_isset(cpu, tracing_cpumask) &&
1844                                 cpu_isset(cpu, tracing_cpumask_new)) {
1845                         atomic_dec(&global_trace.data[cpu]->disabled);
1846                 }
1847         }
1848         __raw_spin_unlock(&ftrace_max_lock);
1849         raw_local_irq_enable();
1850
1851         tracing_cpumask = tracing_cpumask_new;
1852
1853         mutex_unlock(&tracing_cpumask_update_lock);
1854
1855         return count;
1856
1857 err_unlock:
1858         mutex_unlock(&tracing_cpumask_update_lock);
1859
1860         return err;
1861 }
1862
1863 static struct file_operations tracing_cpumask_fops = {
1864         .open           = tracing_open_generic,
1865         .read           = tracing_cpumask_read,
1866         .write          = tracing_cpumask_write,
1867 };
1868
1869 static ssize_t
1870 tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
1871                        size_t cnt, loff_t *ppos)
1872 {
1873         char *buf;
1874         int r = 0;
1875         int len = 0;
1876         int i;
1877
1878         /* calulate max size */
1879         for (i = 0; trace_options[i]; i++) {
1880                 len += strlen(trace_options[i]);
1881                 len += 3; /* "no" and space */
1882         }
1883
1884         /* +2 for \n and \0 */
1885         buf = kmalloc(len + 2, GFP_KERNEL);
1886         if (!buf)
1887                 return -ENOMEM;
1888
1889         for (i = 0; trace_options[i]; i++) {
1890                 if (trace_flags & (1 << i))
1891                         r += sprintf(buf + r, "%s ", trace_options[i]);
1892                 else
1893                         r += sprintf(buf + r, "no%s ", trace_options[i]);
1894         }
1895
1896         r += sprintf(buf + r, "\n");
1897         WARN_ON(r >= len + 2);
1898
1899         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1900
1901         kfree(buf);
1902
1903         return r;
1904 }
1905
1906 static ssize_t
1907 tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
1908                         size_t cnt, loff_t *ppos)
1909 {
1910         char buf[64];
1911         char *cmp = buf;
1912         int neg = 0;
1913         int i;
1914
1915         if (cnt >= sizeof(buf))
1916                 return -EINVAL;
1917
1918         if (copy_from_user(&buf, ubuf, cnt))
1919                 return -EFAULT;
1920
1921         buf[cnt] = 0;
1922
1923         if (strncmp(buf, "no", 2) == 0) {
1924                 neg = 1;
1925                 cmp += 2;
1926         }
1927
1928         for (i = 0; trace_options[i]; i++) {
1929                 int len = strlen(trace_options[i]);
1930
1931                 if (strncmp(cmp, trace_options[i], len) == 0) {
1932                         if (neg)
1933                                 trace_flags &= ~(1 << i);
1934                         else
1935                                 trace_flags |= (1 << i);
1936                         break;
1937                 }
1938         }
1939         /*
1940          * If no option could be set, return an error:
1941          */
1942         if (!trace_options[i])
1943                 return -EINVAL;
1944
1945         filp->f_pos += cnt;
1946
1947         return cnt;
1948 }
1949
1950 static struct file_operations tracing_iter_fops = {
1951         .open           = tracing_open_generic,
1952         .read           = tracing_iter_ctrl_read,
1953         .write          = tracing_iter_ctrl_write,
1954 };
1955
1956 static const char readme_msg[] =
1957         "tracing mini-HOWTO:\n\n"
1958         "# mkdir /debug\n"
1959         "# mount -t debugfs nodev /debug\n\n"
1960         "# cat /debug/tracing/available_tracers\n"
1961         "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
1962         "# cat /debug/tracing/current_tracer\n"
1963         "none\n"
1964         "# echo sched_switch > /debug/tracing/current_tracer\n"
1965         "# cat /debug/tracing/current_tracer\n"
1966         "sched_switch\n"
1967         "# cat /debug/tracing/iter_ctrl\n"
1968         "noprint-parent nosym-offset nosym-addr noverbose\n"
1969         "# echo print-parent > /debug/tracing/iter_ctrl\n"
1970         "# echo 1 > /debug/tracing/tracing_enabled\n"
1971         "# cat /debug/tracing/trace > /tmp/trace.txt\n"
1972         "echo 0 > /debug/tracing/tracing_enabled\n"
1973 ;
1974
1975 static ssize_t
1976 tracing_readme_read(struct file *filp, char __user *ubuf,
1977                        size_t cnt, loff_t *ppos)
1978 {
1979         return simple_read_from_buffer(ubuf, cnt, ppos,
1980                                         readme_msg, strlen(readme_msg));
1981 }
1982
1983 static struct file_operations tracing_readme_fops = {
1984         .open           = tracing_open_generic,
1985         .read           = tracing_readme_read,
1986 };
1987
1988 static ssize_t
1989 tracing_ctrl_read(struct file *filp, char __user *ubuf,
1990                   size_t cnt, loff_t *ppos)
1991 {
1992         struct trace_array *tr = filp->private_data;
1993         char buf[64];
1994         int r;
1995
1996         r = sprintf(buf, "%ld\n", tr->ctrl);
1997         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1998 }
1999
2000 static ssize_t
2001 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2002                    size_t cnt, loff_t *ppos)
2003 {
2004         struct trace_array *tr = filp->private_data;
2005         char buf[64];
2006         long val;
2007         int ret;
2008
2009         if (cnt >= sizeof(buf))
2010                 return -EINVAL;
2011
2012         if (copy_from_user(&buf, ubuf, cnt))
2013                 return -EFAULT;
2014
2015         buf[cnt] = 0;
2016
2017         ret = strict_strtoul(buf, 10, &val);
2018         if (ret < 0)
2019                 return ret;
2020
2021         val = !!val;
2022
2023         mutex_lock(&trace_types_lock);
2024         if (tr->ctrl ^ val) {
2025                 if (val)
2026                         tracer_enabled = 1;
2027                 else
2028                         tracer_enabled = 0;
2029
2030                 tr->ctrl = val;
2031
2032                 if (current_trace && current_trace->ctrl_update)
2033                         current_trace->ctrl_update(tr);
2034         }
2035         mutex_unlock(&trace_types_lock);
2036
2037         filp->f_pos += cnt;
2038
2039         return cnt;
2040 }
2041
2042 static ssize_t
2043 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2044                        size_t cnt, loff_t *ppos)
2045 {
2046         char buf[max_tracer_type_len+2];
2047         int r;
2048
2049         mutex_lock(&trace_types_lock);
2050         if (current_trace)
2051                 r = sprintf(buf, "%s\n", current_trace->name);
2052         else
2053                 r = sprintf(buf, "\n");
2054         mutex_unlock(&trace_types_lock);
2055
2056         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2057 }
2058
2059 static ssize_t
2060 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2061                         size_t cnt, loff_t *ppos)
2062 {
2063         struct trace_array *tr = &global_trace;
2064         struct tracer *t;
2065         char buf[max_tracer_type_len+1];
2066         int i;
2067
2068         if (cnt > max_tracer_type_len)
2069                 cnt = max_tracer_type_len;
2070
2071         if (copy_from_user(&buf, ubuf, cnt))
2072                 return -EFAULT;
2073
2074         buf[cnt] = 0;
2075
2076         /* strip ending whitespace. */
2077         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2078                 buf[i] = 0;
2079
2080         mutex_lock(&trace_types_lock);
2081         for (t = trace_types; t; t = t->next) {
2082                 if (strcmp(t->name, buf) == 0)
2083                         break;
2084         }
2085         if (!t || t == current_trace)
2086                 goto out;
2087
2088         if (current_trace && current_trace->reset)
2089                 current_trace->reset(tr);
2090
2091         current_trace = t;
2092         if (t->init)
2093                 t->init(tr);
2094
2095  out:
2096         mutex_unlock(&trace_types_lock);
2097
2098         filp->f_pos += cnt;
2099
2100         return cnt;
2101 }
2102
2103 static ssize_t
2104 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2105                      size_t cnt, loff_t *ppos)
2106 {
2107         unsigned long *ptr = filp->private_data;
2108         char buf[64];
2109         int r;
2110
2111         r = snprintf(buf, sizeof(buf), "%ld\n",
2112                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2113         if (r > sizeof(buf))
2114                 r = sizeof(buf);
2115         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2116 }
2117
2118 static ssize_t
2119 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2120                       size_t cnt, loff_t *ppos)
2121 {
2122         long *ptr = filp->private_data;
2123         char buf[64];
2124         long val;
2125         int ret;
2126
2127         if (cnt >= sizeof(buf))
2128                 return -EINVAL;
2129
2130         if (copy_from_user(&buf, ubuf, cnt))
2131                 return -EFAULT;
2132
2133         buf[cnt] = 0;
2134
2135         ret = strict_strtoul(buf, 10, &val);
2136         if (ret < 0)
2137                 return ret;
2138
2139         *ptr = val * 1000;
2140
2141         return cnt;
2142 }
2143
2144 static atomic_t tracing_reader;
2145
2146 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2147 {
2148         struct trace_iterator *iter;
2149
2150         if (tracing_disabled)
2151                 return -ENODEV;
2152
2153         /* We only allow for reader of the pipe */
2154         if (atomic_inc_return(&tracing_reader) != 1) {
2155                 atomic_dec(&tracing_reader);
2156                 return -EBUSY;
2157         }
2158
2159         /* create a buffer to store the information to pass to userspace */
2160         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2161         if (!iter)
2162                 return -ENOMEM;
2163
2164         iter->tr = &global_trace;
2165         iter->trace = current_trace;
2166
2167         filp->private_data = iter;
2168
2169         return 0;
2170 }
2171
2172 static int tracing_release_pipe(struct inode *inode, struct file *file)
2173 {
2174         struct trace_iterator *iter = file->private_data;
2175
2176         kfree(iter);
2177         atomic_dec(&tracing_reader);
2178
2179         return 0;
2180 }
2181
2182 static unsigned int
2183 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2184 {
2185         struct trace_iterator *iter = filp->private_data;
2186
2187         if (trace_flags & TRACE_ITER_BLOCK) {
2188                 /*
2189                  * Always select as readable when in blocking mode
2190                  */
2191                 return POLLIN | POLLRDNORM;
2192         } else {
2193                 if (!trace_empty(iter))
2194                         return POLLIN | POLLRDNORM;
2195                 poll_wait(filp, &trace_wait, poll_table);
2196                 if (!trace_empty(iter))
2197                         return POLLIN | POLLRDNORM;
2198
2199                 return 0;
2200         }
2201 }
2202
2203 /*
2204  * Consumer reader.
2205  */
2206 static ssize_t
2207 tracing_read_pipe(struct file *filp, char __user *ubuf,
2208                   size_t cnt, loff_t *ppos)
2209 {
2210         struct trace_iterator *iter = filp->private_data;
2211         struct trace_array_cpu *data;
2212         struct trace_array *tr = iter->tr;
2213         struct tracer *tracer = iter->trace;
2214         static cpumask_t mask;
2215         static int start;
2216         unsigned long flags;
2217 #ifdef CONFIG_FTRACE
2218         int ftrace_save;
2219 #endif
2220         int read = 0;
2221         int cpu;
2222         int len;
2223         int ret;
2224
2225         /* return any leftover data */
2226         if (iter->seq.len > start) {
2227                 len = iter->seq.len - start;
2228                 if (cnt > len)
2229                         cnt = len;
2230                 ret = copy_to_user(ubuf, iter->seq.buffer + start, cnt);
2231                 if (ret)
2232                         cnt = -EFAULT;
2233
2234                 start += len;
2235
2236                 return cnt;
2237         }
2238
2239         trace_seq_reset(&iter->seq);
2240         start = 0;
2241
2242         while (trace_empty(iter)) {
2243
2244                 if ((filp->f_flags & O_NONBLOCK))
2245                         return -EAGAIN;
2246
2247                 /*
2248                  * This is a make-shift waitqueue. The reason we don't use
2249                  * an actual wait queue is because:
2250                  *  1) we only ever have one waiter
2251                  *  2) the tracing, traces all functions, we don't want
2252                  *     the overhead of calling wake_up and friends
2253                  *     (and tracing them too)
2254                  *     Anyway, this is really very primitive wakeup.
2255                  */
2256                 set_current_state(TASK_INTERRUPTIBLE);
2257                 iter->tr->waiter = current;
2258
2259                 /* sleep for one second, and try again. */
2260                 schedule_timeout(HZ);
2261
2262                 iter->tr->waiter = NULL;
2263
2264                 if (signal_pending(current))
2265                         return -EINTR;
2266
2267                 if (iter->trace != current_trace)
2268                         return 0;
2269
2270                 /*
2271                  * We block until we read something and tracing is disabled.
2272                  * We still block if tracing is disabled, but we have never
2273                  * read anything. This allows a user to cat this file, and
2274                  * then enable tracing. But after we have read something,
2275                  * we give an EOF when tracing is again disabled.
2276                  *
2277                  * iter->pos will be 0 if we haven't read anything.
2278                  */
2279                 if (!tracer_enabled && iter->pos)
2280                         break;
2281
2282                 continue;
2283         }
2284
2285         /* stop when tracing is finished */
2286         if (trace_empty(iter))
2287                 return 0;
2288
2289         if (cnt >= PAGE_SIZE)
2290                 cnt = PAGE_SIZE - 1;
2291
2292         memset(iter, 0, sizeof(*iter));
2293         iter->tr = tr;
2294         iter->trace = tracer;
2295         iter->pos = -1;
2296
2297         /*
2298          * We need to stop all tracing on all CPUS to read the
2299          * the next buffer. This is a bit expensive, but is
2300          * not done often. We fill all what we can read,
2301          * and then release the locks again.
2302          */
2303
2304         cpus_clear(mask);
2305         local_irq_save(flags);
2306 #ifdef CONFIG_FTRACE
2307         ftrace_save = ftrace_enabled;
2308         ftrace_enabled = 0;
2309 #endif
2310         smp_wmb();
2311         for_each_possible_cpu(cpu) {
2312                 data = iter->tr->data[cpu];
2313
2314                 if (!head_page(data) || !data->trace_idx)
2315                         continue;
2316
2317                 atomic_inc(&data->disabled);
2318                 cpu_set(cpu, mask);
2319         }
2320
2321         for_each_cpu_mask(cpu, mask) {
2322                 data = iter->tr->data[cpu];
2323                 __raw_spin_lock(&data->lock);
2324         }
2325
2326         while (find_next_entry_inc(iter) != NULL) {
2327                 int len = iter->seq.len;
2328
2329                 ret = print_trace_line(iter);
2330                 if (!ret) {
2331                         /* don't print partial lines */
2332                         iter->seq.len = len;
2333                         break;
2334                 }
2335
2336                 trace_consume(iter);
2337
2338                 if (iter->seq.len >= cnt)
2339                         break;
2340         }
2341
2342         for_each_cpu_mask(cpu, mask) {
2343                 data = iter->tr->data[cpu];
2344                 __raw_spin_unlock(&data->lock);
2345         }
2346
2347         for_each_cpu_mask(cpu, mask) {
2348                 data = iter->tr->data[cpu];
2349                 atomic_dec(&data->disabled);
2350         }
2351 #ifdef CONFIG_FTRACE
2352         ftrace_enabled = ftrace_save;
2353 #endif
2354         local_irq_restore(flags);
2355
2356         /* Now copy what we have to the user */
2357         read = iter->seq.len;
2358         if (read > cnt)
2359                 read = cnt;
2360
2361         ret = copy_to_user(ubuf, iter->seq.buffer, read);
2362
2363         if (read < iter->seq.len)
2364                 start = read;
2365         else
2366                 trace_seq_reset(&iter->seq);
2367
2368         if (ret)
2369                 read = -EFAULT;
2370
2371         return read;
2372 }
2373
2374 static ssize_t
2375 tracing_entries_read(struct file *filp, char __user *ubuf,
2376                      size_t cnt, loff_t *ppos)
2377 {
2378         struct trace_array *tr = filp->private_data;
2379         char buf[64];
2380         int r;
2381
2382         r = sprintf(buf, "%lu\n", tr->entries);
2383         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2384 }
2385
2386 static ssize_t
2387 tracing_entries_write(struct file *filp, const char __user *ubuf,
2388                       size_t cnt, loff_t *ppos)
2389 {
2390         unsigned long val;
2391         char buf[64];
2392         int ret;
2393
2394         if (cnt >= sizeof(buf))
2395                 return -EINVAL;
2396
2397         if (copy_from_user(&buf, ubuf, cnt))
2398                 return -EFAULT;
2399
2400         buf[cnt] = 0;
2401
2402         ret = strict_strtoul(buf, 10, &val);
2403         if (ret < 0)
2404                 return ret;
2405
2406         /* must have at least 1 entry */
2407         if (!val)
2408                 return -EINVAL;
2409
2410         mutex_lock(&trace_types_lock);
2411
2412         if (current_trace != &no_tracer) {
2413                 cnt = -EBUSY;
2414                 pr_info("ftrace: set current_tracer to none"
2415                         " before modifying buffer size\n");
2416                 goto out;
2417         }
2418
2419         if (val > global_trace.entries) {
2420                 while (global_trace.entries < val) {
2421                         if (trace_alloc_page()) {
2422                                 cnt = -ENOMEM;
2423                                 goto out;
2424                         }
2425                 }
2426         } else {
2427                 /* include the number of entries in val (inc of page entries) */
2428                 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2429                         trace_free_page();
2430         }
2431
2432         filp->f_pos += cnt;
2433
2434  out:
2435         max_tr.entries = global_trace.entries;
2436         mutex_unlock(&trace_types_lock);
2437
2438         return cnt;
2439 }
2440
2441 static struct file_operations tracing_max_lat_fops = {
2442         .open           = tracing_open_generic,
2443         .read           = tracing_max_lat_read,
2444         .write          = tracing_max_lat_write,
2445 };
2446
2447 static struct file_operations tracing_ctrl_fops = {
2448         .open           = tracing_open_generic,
2449         .read           = tracing_ctrl_read,
2450         .write          = tracing_ctrl_write,
2451 };
2452
2453 static struct file_operations set_tracer_fops = {
2454         .open           = tracing_open_generic,
2455         .read           = tracing_set_trace_read,
2456         .write          = tracing_set_trace_write,
2457 };
2458
2459 static struct file_operations tracing_pipe_fops = {
2460         .open           = tracing_open_pipe,
2461         .poll           = tracing_poll_pipe,
2462         .read           = tracing_read_pipe,
2463         .release        = tracing_release_pipe,
2464 };
2465
2466 static struct file_operations tracing_entries_fops = {
2467         .open           = tracing_open_generic,
2468         .read           = tracing_entries_read,
2469         .write          = tracing_entries_write,
2470 };
2471
2472 #ifdef CONFIG_DYNAMIC_FTRACE
2473
2474 static ssize_t
2475 tracing_read_long(struct file *filp, char __user *ubuf,
2476                   size_t cnt, loff_t *ppos)
2477 {
2478         unsigned long *p = filp->private_data;
2479         char buf[64];
2480         int r;
2481
2482         r = sprintf(buf, "%ld\n", *p);
2483
2484         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2485 }
2486
2487 static struct file_operations tracing_read_long_fops = {
2488         .open           = tracing_open_generic,
2489         .read           = tracing_read_long,
2490 };
2491 #endif
2492
2493 static struct dentry *d_tracer;
2494
2495 struct dentry *tracing_init_dentry(void)
2496 {
2497         static int once;
2498
2499         if (d_tracer)
2500                 return d_tracer;
2501
2502         d_tracer = debugfs_create_dir("tracing", NULL);
2503
2504         if (!d_tracer && !once) {
2505                 once = 1;
2506                 pr_warning("Could not create debugfs directory 'tracing'\n");
2507                 return NULL;
2508         }
2509
2510         return d_tracer;
2511 }
2512
2513 #ifdef CONFIG_FTRACE_SELFTEST
2514 /* Let selftest have access to static functions in this file */
2515 #include "trace_selftest.c"
2516 #endif
2517
2518 static __init void tracer_init_debugfs(void)
2519 {
2520         struct dentry *d_tracer;
2521         struct dentry *entry;
2522
2523         d_tracer = tracing_init_dentry();
2524
2525         entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
2526                                     &global_trace, &tracing_ctrl_fops);
2527         if (!entry)
2528                 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2529
2530         entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
2531                                     NULL, &tracing_iter_fops);
2532         if (!entry)
2533                 pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
2534
2535         entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2536                                     NULL, &tracing_cpumask_fops);
2537         if (!entry)
2538                 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
2539
2540         entry = debugfs_create_file("latency_trace", 0444, d_tracer,
2541                                     &global_trace, &tracing_lt_fops);
2542         if (!entry)
2543                 pr_warning("Could not create debugfs 'latency_trace' entry\n");
2544
2545         entry = debugfs_create_file("trace", 0444, d_tracer,
2546                                     &global_trace, &tracing_fops);
2547         if (!entry)
2548                 pr_warning("Could not create debugfs 'trace' entry\n");
2549
2550         entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2551                                     &global_trace, &show_traces_fops);
2552         if (!entry)
2553                 pr_warning("Could not create debugfs 'trace' entry\n");
2554
2555         entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2556                                     &global_trace, &set_tracer_fops);
2557         if (!entry)
2558                 pr_warning("Could not create debugfs 'trace' entry\n");
2559
2560         entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2561                                     &tracing_max_latency,
2562                                     &tracing_max_lat_fops);
2563         if (!entry)
2564                 pr_warning("Could not create debugfs "
2565                            "'tracing_max_latency' entry\n");
2566
2567         entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
2568                                     &tracing_thresh, &tracing_max_lat_fops);
2569         if (!entry)
2570                 pr_warning("Could not create debugfs "
2571                            "'tracing_threash' entry\n");
2572         entry = debugfs_create_file("README", 0644, d_tracer,
2573                                     NULL, &tracing_readme_fops);
2574         if (!entry)
2575                 pr_warning("Could not create debugfs 'README' entry\n");
2576
2577         entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
2578                                     NULL, &tracing_pipe_fops);
2579         if (!entry)
2580                 pr_warning("Could not create debugfs "
2581                            "'tracing_threash' entry\n");
2582
2583         entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2584                                     &global_trace, &tracing_entries_fops);
2585         if (!entry)
2586                 pr_warning("Could not create debugfs "
2587                            "'tracing_threash' entry\n");
2588
2589 #ifdef CONFIG_DYNAMIC_FTRACE
2590         entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2591                                     &ftrace_update_tot_cnt,
2592                                     &tracing_read_long_fops);
2593         if (!entry)
2594                 pr_warning("Could not create debugfs "
2595                            "'dyn_ftrace_total_info' entry\n");
2596 #endif
2597 }
2598
2599 static int trace_alloc_page(void)
2600 {
2601         struct trace_array_cpu *data;
2602         struct page *page, *tmp;
2603         LIST_HEAD(pages);
2604         void *array;
2605         int i;
2606
2607         /* first allocate a page for each CPU */
2608         for_each_possible_cpu(i) {
2609                 array = (void *)__get_free_page(GFP_KERNEL);
2610                 if (array == NULL) {
2611                         printk(KERN_ERR "tracer: failed to allocate page"
2612                                "for trace buffer!\n");
2613                         goto free_pages;
2614                 }
2615
2616                 page = virt_to_page(array);
2617                 list_add(&page->lru, &pages);
2618
2619 /* Only allocate if we are actually using the max trace */
2620 #ifdef CONFIG_TRACER_MAX_TRACE
2621                 array = (void *)__get_free_page(GFP_KERNEL);
2622                 if (array == NULL) {
2623                         printk(KERN_ERR "tracer: failed to allocate page"
2624                                "for trace buffer!\n");
2625                         goto free_pages;
2626                 }
2627                 page = virt_to_page(array);
2628                 list_add(&page->lru, &pages);
2629 #endif
2630         }
2631
2632         /* Now that we successfully allocate a page per CPU, add them */
2633         for_each_possible_cpu(i) {
2634                 data = global_trace.data[i];
2635                 page = list_entry(pages.next, struct page, lru);
2636                 list_del_init(&page->lru);
2637                 list_add_tail(&page->lru, &data->trace_pages);
2638                 ClearPageLRU(page);
2639
2640 #ifdef CONFIG_TRACER_MAX_TRACE
2641                 data = max_tr.data[i];
2642                 page = list_entry(pages.next, struct page, lru);
2643                 list_del_init(&page->lru);
2644                 list_add_tail(&page->lru, &data->trace_pages);
2645                 SetPageLRU(page);
2646 #endif
2647         }
2648         global_trace.entries += ENTRIES_PER_PAGE;
2649
2650         return 0;
2651
2652  free_pages:
2653         list_for_each_entry_safe(page, tmp, &pages, lru) {
2654                 list_del_init(&page->lru);
2655                 __free_page(page);
2656         }
2657         return -ENOMEM;
2658 }
2659
2660 static int trace_free_page(void)
2661 {
2662         struct trace_array_cpu *data;
2663         struct page *page;
2664         struct list_head *p;
2665         int i;
2666         int ret = 0;
2667
2668         /* free one page from each buffer */
2669         for_each_possible_cpu(i) {
2670                 data = global_trace.data[i];
2671                 p = data->trace_pages.next;
2672                 if (p == &data->trace_pages) {
2673                         /* should never happen */
2674                         WARN_ON(1);
2675                         tracing_disabled = 1;
2676                         ret = -1;
2677                         break;
2678                 }
2679                 page = list_entry(p, struct page, lru);
2680                 ClearPageLRU(page);
2681                 list_del(&page->lru);
2682                 __free_page(page);
2683
2684                 tracing_reset(data);
2685
2686 #ifdef CONFIG_TRACER_MAX_TRACE
2687                 data = max_tr.data[i];
2688                 p = data->trace_pages.next;
2689                 if (p == &data->trace_pages) {
2690                         /* should never happen */
2691                         WARN_ON(1);
2692                         tracing_disabled = 1;
2693                         ret = -1;
2694                         break;
2695                 }
2696                 page = list_entry(p, struct page, lru);
2697                 ClearPageLRU(page);
2698                 list_del(&page->lru);
2699                 __free_page(page);
2700
2701                 tracing_reset(data);
2702 #endif
2703         }
2704         global_trace.entries -= ENTRIES_PER_PAGE;
2705
2706         return ret;
2707 }
2708
2709 __init static int tracer_alloc_buffers(void)
2710 {
2711         struct trace_array_cpu *data;
2712         void *array;
2713         struct page *page;
2714         int pages = 0;
2715         int ret = -ENOMEM;
2716         int i;
2717
2718         global_trace.ctrl = tracer_enabled;
2719
2720         /* Allocate the first page for all buffers */
2721         for_each_possible_cpu(i) {
2722                 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
2723                 max_tr.data[i] = &per_cpu(max_data, i);
2724
2725                 array = (void *)__get_free_page(GFP_KERNEL);
2726                 if (array == NULL) {
2727                         printk(KERN_ERR "tracer: failed to allocate page"
2728                                "for trace buffer!\n");
2729                         goto free_buffers;
2730                 }
2731
2732                 /* set the array to the list */
2733                 INIT_LIST_HEAD(&data->trace_pages);
2734                 page = virt_to_page(array);
2735                 list_add(&page->lru, &data->trace_pages);
2736                 /* use the LRU flag to differentiate the two buffers */
2737                 ClearPageLRU(page);
2738
2739                 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
2740                 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
2741
2742 /* Only allocate if we are actually using the max trace */
2743 #ifdef CONFIG_TRACER_MAX_TRACE
2744                 array = (void *)__get_free_page(GFP_KERNEL);
2745                 if (array == NULL) {
2746                         printk(KERN_ERR "tracer: failed to allocate page"
2747                                "for trace buffer!\n");
2748                         goto free_buffers;
2749                 }
2750
2751                 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
2752                 page = virt_to_page(array);
2753                 list_add(&page->lru, &max_tr.data[i]->trace_pages);
2754                 SetPageLRU(page);
2755 #endif
2756         }
2757
2758         /*
2759          * Since we allocate by orders of pages, we may be able to
2760          * round up a bit.
2761          */
2762         global_trace.entries = ENTRIES_PER_PAGE;
2763         pages++;
2764
2765         while (global_trace.entries < trace_nr_entries) {
2766                 if (trace_alloc_page())
2767                         break;
2768                 pages++;
2769         }
2770         max_tr.entries = global_trace.entries;
2771
2772         pr_info("tracer: %d pages allocated for %ld",
2773                 pages, trace_nr_entries);
2774         pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE);
2775         pr_info("   actual entries %ld\n", global_trace.entries);
2776
2777         tracer_init_debugfs();
2778
2779         trace_init_cmdlines();
2780
2781         register_tracer(&no_tracer);
2782         current_trace = &no_tracer;
2783
2784         /* All seems OK, enable tracing */
2785         tracing_disabled = 0;
2786
2787         return 0;
2788
2789  free_buffers:
2790         for (i-- ; i >= 0; i--) {
2791                 struct page *page, *tmp;
2792                 struct trace_array_cpu *data = global_trace.data[i];
2793
2794                 if (data) {
2795                         list_for_each_entry_safe(page, tmp,
2796                                                  &data->trace_pages, lru) {
2797                                 list_del_init(&page->lru);
2798                                 __free_page(page);
2799                         }
2800                 }
2801
2802 #ifdef CONFIG_TRACER_MAX_TRACE
2803                 data = max_tr.data[i];
2804                 if (data) {
2805                         list_for_each_entry_safe(page, tmp,
2806                                                  &data->trace_pages, lru) {
2807                                 list_del_init(&page->lru);
2808                                 __free_page(page);
2809                         }
2810                 }
2811 #endif
2812         }
2813         return ret;
2814 }
2815 fs_initcall(tracer_alloc_buffers);