tracing: Convert tracer->enabled to counter
[firefly-linux-kernel-4.4.55.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         __disable_trace_on_warning = 1;
159         return 1;
160 }
161 __setup("traceoff_on_warning=", stop_trace_on_warning);
162
163 static int __init boot_alloc_snapshot(char *str)
164 {
165         allocate_snapshot = true;
166         /* We also need the main ring buffer expanded */
167         ring_buffer_expanded = true;
168         return 1;
169 }
170 __setup("alloc_snapshot", boot_alloc_snapshot);
171
172
173 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
174 static char *trace_boot_options __initdata;
175
176 static int __init set_trace_boot_options(char *str)
177 {
178         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
179         trace_boot_options = trace_boot_options_buf;
180         return 0;
181 }
182 __setup("trace_options=", set_trace_boot_options);
183
184
185 unsigned long long ns2usecs(cycle_t nsec)
186 {
187         nsec += 500;
188         do_div(nsec, 1000);
189         return nsec;
190 }
191
192 /*
193  * The global_trace is the descriptor that holds the tracing
194  * buffers for the live tracing. For each CPU, it contains
195  * a link list of pages that will store trace entries. The
196  * page descriptor of the pages in the memory is used to hold
197  * the link list by linking the lru item in the page descriptor
198  * to each of the pages in the buffer per CPU.
199  *
200  * For each active CPU there is a data field that holds the
201  * pages for the buffer for that CPU. Each CPU has the same number
202  * of pages allocated for its buffer.
203  */
204 static struct trace_array       global_trace;
205
206 LIST_HEAD(ftrace_trace_arrays);
207
208 int trace_array_get(struct trace_array *this_tr)
209 {
210         struct trace_array *tr;
211         int ret = -ENODEV;
212
213         mutex_lock(&trace_types_lock);
214         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
215                 if (tr == this_tr) {
216                         tr->ref++;
217                         ret = 0;
218                         break;
219                 }
220         }
221         mutex_unlock(&trace_types_lock);
222
223         return ret;
224 }
225
226 static void __trace_array_put(struct trace_array *this_tr)
227 {
228         WARN_ON(!this_tr->ref);
229         this_tr->ref--;
230 }
231
232 void trace_array_put(struct trace_array *this_tr)
233 {
234         mutex_lock(&trace_types_lock);
235         __trace_array_put(this_tr);
236         mutex_unlock(&trace_types_lock);
237 }
238
239 int filter_check_discard(struct ftrace_event_file *file, void *rec,
240                          struct ring_buffer *buffer,
241                          struct ring_buffer_event *event)
242 {
243         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
244             !filter_match_preds(file->filter, rec)) {
245                 ring_buffer_discard_commit(buffer, event);
246                 return 1;
247         }
248
249         return 0;
250 }
251 EXPORT_SYMBOL_GPL(filter_check_discard);
252
253 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
254                               struct ring_buffer *buffer,
255                               struct ring_buffer_event *event)
256 {
257         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
258             !filter_match_preds(call->filter, rec)) {
259                 ring_buffer_discard_commit(buffer, event);
260                 return 1;
261         }
262
263         return 0;
264 }
265 EXPORT_SYMBOL_GPL(call_filter_check_discard);
266
267 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
268 {
269         u64 ts;
270
271         /* Early boot up does not have a buffer yet */
272         if (!buf->buffer)
273                 return trace_clock_local();
274
275         ts = ring_buffer_time_stamp(buf->buffer, cpu);
276         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
277
278         return ts;
279 }
280
281 cycle_t ftrace_now(int cpu)
282 {
283         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
284 }
285
286 /**
287  * tracing_is_enabled - Show if global_trace has been disabled
288  *
289  * Shows if the global trace has been enabled or not. It uses the
290  * mirror flag "buffer_disabled" to be used in fast paths such as for
291  * the irqsoff tracer. But it may be inaccurate due to races. If you
292  * need to know the accurate state, use tracing_is_on() which is a little
293  * slower, but accurate.
294  */
295 int tracing_is_enabled(void)
296 {
297         /*
298          * For quick access (irqsoff uses this in fast path), just
299          * return the mirror variable of the state of the ring buffer.
300          * It's a little racy, but we don't really care.
301          */
302         smp_rmb();
303         return !global_trace.buffer_disabled;
304 }
305
306 /*
307  * trace_buf_size is the size in bytes that is allocated
308  * for a buffer. Note, the number of bytes is always rounded
309  * to page size.
310  *
311  * This number is purposely set to a low number of 16384.
312  * If the dump on oops happens, it will be much appreciated
313  * to not have to wait for all that output. Anyway this can be
314  * boot time and run time configurable.
315  */
316 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
317
318 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
319
320 /* trace_types holds a link list of available tracers. */
321 static struct tracer            *trace_types __read_mostly;
322
323 /*
324  * trace_types_lock is used to protect the trace_types list.
325  */
326 DEFINE_MUTEX(trace_types_lock);
327
328 /*
329  * serialize the access of the ring buffer
330  *
331  * ring buffer serializes readers, but it is low level protection.
332  * The validity of the events (which returns by ring_buffer_peek() ..etc)
333  * are not protected by ring buffer.
334  *
335  * The content of events may become garbage if we allow other process consumes
336  * these events concurrently:
337  *   A) the page of the consumed events may become a normal page
338  *      (not reader page) in ring buffer, and this page will be rewrited
339  *      by events producer.
340  *   B) The page of the consumed events may become a page for splice_read,
341  *      and this page will be returned to system.
342  *
343  * These primitives allow multi process access to different cpu ring buffer
344  * concurrently.
345  *
346  * These primitives don't distinguish read-only and read-consume access.
347  * Multi read-only access are also serialized.
348  */
349
350 #ifdef CONFIG_SMP
351 static DECLARE_RWSEM(all_cpu_access_lock);
352 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
353
354 static inline void trace_access_lock(int cpu)
355 {
356         if (cpu == RING_BUFFER_ALL_CPUS) {
357                 /* gain it for accessing the whole ring buffer. */
358                 down_write(&all_cpu_access_lock);
359         } else {
360                 /* gain it for accessing a cpu ring buffer. */
361
362                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
363                 down_read(&all_cpu_access_lock);
364
365                 /* Secondly block other access to this @cpu ring buffer. */
366                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
367         }
368 }
369
370 static inline void trace_access_unlock(int cpu)
371 {
372         if (cpu == RING_BUFFER_ALL_CPUS) {
373                 up_write(&all_cpu_access_lock);
374         } else {
375                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
376                 up_read(&all_cpu_access_lock);
377         }
378 }
379
380 static inline void trace_access_lock_init(void)
381 {
382         int cpu;
383
384         for_each_possible_cpu(cpu)
385                 mutex_init(&per_cpu(cpu_access_lock, cpu));
386 }
387
388 #else
389
390 static DEFINE_MUTEX(access_lock);
391
392 static inline void trace_access_lock(int cpu)
393 {
394         (void)cpu;
395         mutex_lock(&access_lock);
396 }
397
398 static inline void trace_access_unlock(int cpu)
399 {
400         (void)cpu;
401         mutex_unlock(&access_lock);
402 }
403
404 static inline void trace_access_lock_init(void)
405 {
406 }
407
408 #endif
409
410 /* trace_flags holds trace_options default values */
411 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
412         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
413         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
414         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
415
416 static void tracer_tracing_on(struct trace_array *tr)
417 {
418         if (tr->trace_buffer.buffer)
419                 ring_buffer_record_on(tr->trace_buffer.buffer);
420         /*
421          * This flag is looked at when buffers haven't been allocated
422          * yet, or by some tracers (like irqsoff), that just want to
423          * know if the ring buffer has been disabled, but it can handle
424          * races of where it gets disabled but we still do a record.
425          * As the check is in the fast path of the tracers, it is more
426          * important to be fast than accurate.
427          */
428         tr->buffer_disabled = 0;
429         /* Make the flag seen by readers */
430         smp_wmb();
431 }
432
433 /**
434  * tracing_on - enable tracing buffers
435  *
436  * This function enables tracing buffers that may have been
437  * disabled with tracing_off.
438  */
439 void tracing_on(void)
440 {
441         tracer_tracing_on(&global_trace);
442 }
443 EXPORT_SYMBOL_GPL(tracing_on);
444
445 /**
446  * __trace_puts - write a constant string into the trace buffer.
447  * @ip:    The address of the caller
448  * @str:   The constant string to write
449  * @size:  The size of the string.
450  */
451 int __trace_puts(unsigned long ip, const char *str, int size)
452 {
453         struct ring_buffer_event *event;
454         struct ring_buffer *buffer;
455         struct print_entry *entry;
456         unsigned long irq_flags;
457         int alloc;
458
459         if (unlikely(tracing_selftest_running || tracing_disabled))
460                 return 0;
461
462         alloc = sizeof(*entry) + size + 2; /* possible \n added */
463
464         local_save_flags(irq_flags);
465         buffer = global_trace.trace_buffer.buffer;
466         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
467                                           irq_flags, preempt_count());
468         if (!event)
469                 return 0;
470
471         entry = ring_buffer_event_data(event);
472         entry->ip = ip;
473
474         memcpy(&entry->buf, str, size);
475
476         /* Add a newline if necessary */
477         if (entry->buf[size - 1] != '\n') {
478                 entry->buf[size] = '\n';
479                 entry->buf[size + 1] = '\0';
480         } else
481                 entry->buf[size] = '\0';
482
483         __buffer_unlock_commit(buffer, event);
484
485         return size;
486 }
487 EXPORT_SYMBOL_GPL(__trace_puts);
488
489 /**
490  * __trace_bputs - write the pointer to a constant string into trace buffer
491  * @ip:    The address of the caller
492  * @str:   The constant string to write to the buffer to
493  */
494 int __trace_bputs(unsigned long ip, const char *str)
495 {
496         struct ring_buffer_event *event;
497         struct ring_buffer *buffer;
498         struct bputs_entry *entry;
499         unsigned long irq_flags;
500         int size = sizeof(struct bputs_entry);
501
502         if (unlikely(tracing_selftest_running || tracing_disabled))
503                 return 0;
504
505         local_save_flags(irq_flags);
506         buffer = global_trace.trace_buffer.buffer;
507         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
508                                           irq_flags, preempt_count());
509         if (!event)
510                 return 0;
511
512         entry = ring_buffer_event_data(event);
513         entry->ip                       = ip;
514         entry->str                      = str;
515
516         __buffer_unlock_commit(buffer, event);
517
518         return 1;
519 }
520 EXPORT_SYMBOL_GPL(__trace_bputs);
521
522 #ifdef CONFIG_TRACER_SNAPSHOT
523 /**
524  * trace_snapshot - take a snapshot of the current buffer.
525  *
526  * This causes a swap between the snapshot buffer and the current live
527  * tracing buffer. You can use this to take snapshots of the live
528  * trace when some condition is triggered, but continue to trace.
529  *
530  * Note, make sure to allocate the snapshot with either
531  * a tracing_snapshot_alloc(), or by doing it manually
532  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
533  *
534  * If the snapshot buffer is not allocated, it will stop tracing.
535  * Basically making a permanent snapshot.
536  */
537 void tracing_snapshot(void)
538 {
539         struct trace_array *tr = &global_trace;
540         struct tracer *tracer = tr->current_trace;
541         unsigned long flags;
542
543         if (in_nmi()) {
544                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
545                 internal_trace_puts("*** snapshot is being ignored        ***\n");
546                 return;
547         }
548
549         if (!tr->allocated_snapshot) {
550                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
551                 internal_trace_puts("*** stopping trace here!   ***\n");
552                 tracing_off();
553                 return;
554         }
555
556         /* Note, snapshot can not be used when the tracer uses it */
557         if (tracer->use_max_tr) {
558                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
559                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
560                 return;
561         }
562
563         local_irq_save(flags);
564         update_max_tr(tr, current, smp_processor_id());
565         local_irq_restore(flags);
566 }
567 EXPORT_SYMBOL_GPL(tracing_snapshot);
568
569 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
570                                         struct trace_buffer *size_buf, int cpu_id);
571 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
572
573 static int alloc_snapshot(struct trace_array *tr)
574 {
575         int ret;
576
577         if (!tr->allocated_snapshot) {
578
579                 /* allocate spare buffer */
580                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
581                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
582                 if (ret < 0)
583                         return ret;
584
585                 tr->allocated_snapshot = true;
586         }
587
588         return 0;
589 }
590
591 void free_snapshot(struct trace_array *tr)
592 {
593         /*
594          * We don't free the ring buffer. instead, resize it because
595          * The max_tr ring buffer has some state (e.g. ring->clock) and
596          * we want preserve it.
597          */
598         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
599         set_buffer_entries(&tr->max_buffer, 1);
600         tracing_reset_online_cpus(&tr->max_buffer);
601         tr->allocated_snapshot = false;
602 }
603
604 /**
605  * tracing_alloc_snapshot - allocate snapshot buffer.
606  *
607  * This only allocates the snapshot buffer if it isn't already
608  * allocated - it doesn't also take a snapshot.
609  *
610  * This is meant to be used in cases where the snapshot buffer needs
611  * to be set up for events that can't sleep but need to be able to
612  * trigger a snapshot.
613  */
614 int tracing_alloc_snapshot(void)
615 {
616         struct trace_array *tr = &global_trace;
617         int ret;
618
619         ret = alloc_snapshot(tr);
620         WARN_ON(ret < 0);
621
622         return ret;
623 }
624 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
625
626 /**
627  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
628  *
629  * This is similar to trace_snapshot(), but it will allocate the
630  * snapshot buffer if it isn't already allocated. Use this only
631  * where it is safe to sleep, as the allocation may sleep.
632  *
633  * This causes a swap between the snapshot buffer and the current live
634  * tracing buffer. You can use this to take snapshots of the live
635  * trace when some condition is triggered, but continue to trace.
636  */
637 void tracing_snapshot_alloc(void)
638 {
639         int ret;
640
641         ret = tracing_alloc_snapshot();
642         if (ret < 0)
643                 return;
644
645         tracing_snapshot();
646 }
647 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
648 #else
649 void tracing_snapshot(void)
650 {
651         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
652 }
653 EXPORT_SYMBOL_GPL(tracing_snapshot);
654 int tracing_alloc_snapshot(void)
655 {
656         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
657         return -ENODEV;
658 }
659 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
660 void tracing_snapshot_alloc(void)
661 {
662         /* Give warning */
663         tracing_snapshot();
664 }
665 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
666 #endif /* CONFIG_TRACER_SNAPSHOT */
667
668 static void tracer_tracing_off(struct trace_array *tr)
669 {
670         if (tr->trace_buffer.buffer)
671                 ring_buffer_record_off(tr->trace_buffer.buffer);
672         /*
673          * This flag is looked at when buffers haven't been allocated
674          * yet, or by some tracers (like irqsoff), that just want to
675          * know if the ring buffer has been disabled, but it can handle
676          * races of where it gets disabled but we still do a record.
677          * As the check is in the fast path of the tracers, it is more
678          * important to be fast than accurate.
679          */
680         tr->buffer_disabled = 1;
681         /* Make the flag seen by readers */
682         smp_wmb();
683 }
684
685 /**
686  * tracing_off - turn off tracing buffers
687  *
688  * This function stops the tracing buffers from recording data.
689  * It does not disable any overhead the tracers themselves may
690  * be causing. This function simply causes all recording to
691  * the ring buffers to fail.
692  */
693 void tracing_off(void)
694 {
695         tracer_tracing_off(&global_trace);
696 }
697 EXPORT_SYMBOL_GPL(tracing_off);
698
699 void disable_trace_on_warning(void)
700 {
701         if (__disable_trace_on_warning)
702                 tracing_off();
703 }
704
705 /**
706  * tracer_tracing_is_on - show real state of ring buffer enabled
707  * @tr : the trace array to know if ring buffer is enabled
708  *
709  * Shows real state of the ring buffer if it is enabled or not.
710  */
711 static int tracer_tracing_is_on(struct trace_array *tr)
712 {
713         if (tr->trace_buffer.buffer)
714                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
715         return !tr->buffer_disabled;
716 }
717
718 /**
719  * tracing_is_on - show state of ring buffers enabled
720  */
721 int tracing_is_on(void)
722 {
723         return tracer_tracing_is_on(&global_trace);
724 }
725 EXPORT_SYMBOL_GPL(tracing_is_on);
726
727 static int __init set_buf_size(char *str)
728 {
729         unsigned long buf_size;
730
731         if (!str)
732                 return 0;
733         buf_size = memparse(str, &str);
734         /* nr_entries can not be zero */
735         if (buf_size == 0)
736                 return 0;
737         trace_buf_size = buf_size;
738         return 1;
739 }
740 __setup("trace_buf_size=", set_buf_size);
741
742 static int __init set_tracing_thresh(char *str)
743 {
744         unsigned long threshold;
745         int ret;
746
747         if (!str)
748                 return 0;
749         ret = kstrtoul(str, 0, &threshold);
750         if (ret < 0)
751                 return 0;
752         tracing_thresh = threshold * 1000;
753         return 1;
754 }
755 __setup("tracing_thresh=", set_tracing_thresh);
756
757 unsigned long nsecs_to_usecs(unsigned long nsecs)
758 {
759         return nsecs / 1000;
760 }
761
762 /* These must match the bit postions in trace_iterator_flags */
763 static const char *trace_options[] = {
764         "print-parent",
765         "sym-offset",
766         "sym-addr",
767         "verbose",
768         "raw",
769         "hex",
770         "bin",
771         "block",
772         "stacktrace",
773         "trace_printk",
774         "ftrace_preempt",
775         "branch",
776         "annotate",
777         "userstacktrace",
778         "sym-userobj",
779         "printk-msg-only",
780         "context-info",
781         "latency-format",
782         "sleep-time",
783         "graph-time",
784         "record-cmd",
785         "overwrite",
786         "disable_on_free",
787         "irq-info",
788         "markers",
789         "function-trace",
790         NULL
791 };
792
793 static struct {
794         u64 (*func)(void);
795         const char *name;
796         int in_ns;              /* is this clock in nanoseconds? */
797 } trace_clocks[] = {
798         { trace_clock_local,    "local",        1 },
799         { trace_clock_global,   "global",       1 },
800         { trace_clock_counter,  "counter",      0 },
801         { trace_clock_jiffies,  "uptime",       1 },
802         { trace_clock,          "perf",         1 },
803         ARCH_TRACE_CLOCKS
804 };
805
806 /*
807  * trace_parser_get_init - gets the buffer for trace parser
808  */
809 int trace_parser_get_init(struct trace_parser *parser, int size)
810 {
811         memset(parser, 0, sizeof(*parser));
812
813         parser->buffer = kmalloc(size, GFP_KERNEL);
814         if (!parser->buffer)
815                 return 1;
816
817         parser->size = size;
818         return 0;
819 }
820
821 /*
822  * trace_parser_put - frees the buffer for trace parser
823  */
824 void trace_parser_put(struct trace_parser *parser)
825 {
826         kfree(parser->buffer);
827 }
828
829 /*
830  * trace_get_user - reads the user input string separated by  space
831  * (matched by isspace(ch))
832  *
833  * For each string found the 'struct trace_parser' is updated,
834  * and the function returns.
835  *
836  * Returns number of bytes read.
837  *
838  * See kernel/trace/trace.h for 'struct trace_parser' details.
839  */
840 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
841         size_t cnt, loff_t *ppos)
842 {
843         char ch;
844         size_t read = 0;
845         ssize_t ret;
846
847         if (!*ppos)
848                 trace_parser_clear(parser);
849
850         ret = get_user(ch, ubuf++);
851         if (ret)
852                 goto out;
853
854         read++;
855         cnt--;
856
857         /*
858          * The parser is not finished with the last write,
859          * continue reading the user input without skipping spaces.
860          */
861         if (!parser->cont) {
862                 /* skip white space */
863                 while (cnt && isspace(ch)) {
864                         ret = get_user(ch, ubuf++);
865                         if (ret)
866                                 goto out;
867                         read++;
868                         cnt--;
869                 }
870
871                 /* only spaces were written */
872                 if (isspace(ch)) {
873                         *ppos += read;
874                         ret = read;
875                         goto out;
876                 }
877
878                 parser->idx = 0;
879         }
880
881         /* read the non-space input */
882         while (cnt && !isspace(ch)) {
883                 if (parser->idx < parser->size - 1)
884                         parser->buffer[parser->idx++] = ch;
885                 else {
886                         ret = -EINVAL;
887                         goto out;
888                 }
889                 ret = get_user(ch, ubuf++);
890                 if (ret)
891                         goto out;
892                 read++;
893                 cnt--;
894         }
895
896         /* We either got finished input or we have to wait for another call. */
897         if (isspace(ch)) {
898                 parser->buffer[parser->idx] = 0;
899                 parser->cont = false;
900         } else if (parser->idx < parser->size - 1) {
901                 parser->cont = true;
902                 parser->buffer[parser->idx++] = ch;
903         } else {
904                 ret = -EINVAL;
905                 goto out;
906         }
907
908         *ppos += read;
909         ret = read;
910
911 out:
912         return ret;
913 }
914
915 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
916 {
917         int len;
918         int ret;
919
920         if (!cnt)
921                 return 0;
922
923         if (s->len <= s->readpos)
924                 return -EBUSY;
925
926         len = s->len - s->readpos;
927         if (cnt > len)
928                 cnt = len;
929         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
930         if (ret == cnt)
931                 return -EFAULT;
932
933         cnt -= ret;
934
935         s->readpos += cnt;
936         return cnt;
937 }
938
939 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
940 {
941         int len;
942
943         if (s->len <= s->readpos)
944                 return -EBUSY;
945
946         len = s->len - s->readpos;
947         if (cnt > len)
948                 cnt = len;
949         memcpy(buf, s->buffer + s->readpos, cnt);
950
951         s->readpos += cnt;
952         return cnt;
953 }
954
955 /*
956  * ftrace_max_lock is used to protect the swapping of buffers
957  * when taking a max snapshot. The buffers themselves are
958  * protected by per_cpu spinlocks. But the action of the swap
959  * needs its own lock.
960  *
961  * This is defined as a arch_spinlock_t in order to help
962  * with performance when lockdep debugging is enabled.
963  *
964  * It is also used in other places outside the update_max_tr
965  * so it needs to be defined outside of the
966  * CONFIG_TRACER_MAX_TRACE.
967  */
968 static arch_spinlock_t ftrace_max_lock =
969         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
970
971 unsigned long __read_mostly     tracing_thresh;
972
973 #ifdef CONFIG_TRACER_MAX_TRACE
974 unsigned long __read_mostly     tracing_max_latency;
975
976 /*
977  * Copy the new maximum trace into the separate maximum-trace
978  * structure. (this way the maximum trace is permanently saved,
979  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
980  */
981 static void
982 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
983 {
984         struct trace_buffer *trace_buf = &tr->trace_buffer;
985         struct trace_buffer *max_buf = &tr->max_buffer;
986         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
987         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
988
989         max_buf->cpu = cpu;
990         max_buf->time_start = data->preempt_timestamp;
991
992         max_data->saved_latency = tracing_max_latency;
993         max_data->critical_start = data->critical_start;
994         max_data->critical_end = data->critical_end;
995
996         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
997         max_data->pid = tsk->pid;
998         /*
999          * If tsk == current, then use current_uid(), as that does not use
1000          * RCU. The irq tracer can be called out of RCU scope.
1001          */
1002         if (tsk == current)
1003                 max_data->uid = current_uid();
1004         else
1005                 max_data->uid = task_uid(tsk);
1006
1007         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1008         max_data->policy = tsk->policy;
1009         max_data->rt_priority = tsk->rt_priority;
1010
1011         /* record this tasks comm */
1012         tracing_record_cmdline(tsk);
1013 }
1014
1015 /**
1016  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1017  * @tr: tracer
1018  * @tsk: the task with the latency
1019  * @cpu: The cpu that initiated the trace.
1020  *
1021  * Flip the buffers between the @tr and the max_tr and record information
1022  * about which task was the cause of this latency.
1023  */
1024 void
1025 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1026 {
1027         struct ring_buffer *buf;
1028
1029         if (tr->stop_count)
1030                 return;
1031
1032         WARN_ON_ONCE(!irqs_disabled());
1033
1034         if (!tr->allocated_snapshot) {
1035                 /* Only the nop tracer should hit this when disabling */
1036                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1037                 return;
1038         }
1039
1040         arch_spin_lock(&ftrace_max_lock);
1041
1042         buf = tr->trace_buffer.buffer;
1043         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1044         tr->max_buffer.buffer = buf;
1045
1046         __update_max_tr(tr, tsk, cpu);
1047         arch_spin_unlock(&ftrace_max_lock);
1048 }
1049
1050 /**
1051  * update_max_tr_single - only copy one trace over, and reset the rest
1052  * @tr - tracer
1053  * @tsk - task with the latency
1054  * @cpu - the cpu of the buffer to copy.
1055  *
1056  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1057  */
1058 void
1059 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1060 {
1061         int ret;
1062
1063         if (tr->stop_count)
1064                 return;
1065
1066         WARN_ON_ONCE(!irqs_disabled());
1067         if (!tr->allocated_snapshot) {
1068                 /* Only the nop tracer should hit this when disabling */
1069                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1070                 return;
1071         }
1072
1073         arch_spin_lock(&ftrace_max_lock);
1074
1075         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1076
1077         if (ret == -EBUSY) {
1078                 /*
1079                  * We failed to swap the buffer due to a commit taking
1080                  * place on this CPU. We fail to record, but we reset
1081                  * the max trace buffer (no one writes directly to it)
1082                  * and flag that it failed.
1083                  */
1084                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1085                         "Failed to swap buffers due to commit in progress\n");
1086         }
1087
1088         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1089
1090         __update_max_tr(tr, tsk, cpu);
1091         arch_spin_unlock(&ftrace_max_lock);
1092 }
1093 #endif /* CONFIG_TRACER_MAX_TRACE */
1094
1095 static void default_wait_pipe(struct trace_iterator *iter)
1096 {
1097         /* Iterators are static, they should be filled or empty */
1098         if (trace_buffer_iter(iter, iter->cpu_file))
1099                 return;
1100
1101         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1102 }
1103
1104 #ifdef CONFIG_FTRACE_STARTUP_TEST
1105 static int run_tracer_selftest(struct tracer *type)
1106 {
1107         struct trace_array *tr = &global_trace;
1108         struct tracer *saved_tracer = tr->current_trace;
1109         int ret;
1110
1111         if (!type->selftest || tracing_selftest_disabled)
1112                 return 0;
1113
1114         /*
1115          * Run a selftest on this tracer.
1116          * Here we reset the trace buffer, and set the current
1117          * tracer to be this tracer. The tracer can then run some
1118          * internal tracing to verify that everything is in order.
1119          * If we fail, we do not register this tracer.
1120          */
1121         tracing_reset_online_cpus(&tr->trace_buffer);
1122
1123         tr->current_trace = type;
1124
1125 #ifdef CONFIG_TRACER_MAX_TRACE
1126         if (type->use_max_tr) {
1127                 /* If we expanded the buffers, make sure the max is expanded too */
1128                 if (ring_buffer_expanded)
1129                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1130                                            RING_BUFFER_ALL_CPUS);
1131                 tr->allocated_snapshot = true;
1132         }
1133 #endif
1134
1135         /* the test is responsible for initializing and enabling */
1136         pr_info("Testing tracer %s: ", type->name);
1137         ret = type->selftest(type, tr);
1138         /* the test is responsible for resetting too */
1139         tr->current_trace = saved_tracer;
1140         if (ret) {
1141                 printk(KERN_CONT "FAILED!\n");
1142                 /* Add the warning after printing 'FAILED' */
1143                 WARN_ON(1);
1144                 return -1;
1145         }
1146         /* Only reset on passing, to avoid touching corrupted buffers */
1147         tracing_reset_online_cpus(&tr->trace_buffer);
1148
1149 #ifdef CONFIG_TRACER_MAX_TRACE
1150         if (type->use_max_tr) {
1151                 tr->allocated_snapshot = false;
1152
1153                 /* Shrink the max buffer again */
1154                 if (ring_buffer_expanded)
1155                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1156                                            RING_BUFFER_ALL_CPUS);
1157         }
1158 #endif
1159
1160         printk(KERN_CONT "PASSED\n");
1161         return 0;
1162 }
1163 #else
1164 static inline int run_tracer_selftest(struct tracer *type)
1165 {
1166         return 0;
1167 }
1168 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1169
1170 /**
1171  * register_tracer - register a tracer with the ftrace system.
1172  * @type - the plugin for the tracer
1173  *
1174  * Register a new plugin tracer.
1175  */
1176 int register_tracer(struct tracer *type)
1177 {
1178         struct tracer *t;
1179         int ret = 0;
1180
1181         if (!type->name) {
1182                 pr_info("Tracer must have a name\n");
1183                 return -1;
1184         }
1185
1186         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1187                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1188                 return -1;
1189         }
1190
1191         mutex_lock(&trace_types_lock);
1192
1193         tracing_selftest_running = true;
1194
1195         for (t = trace_types; t; t = t->next) {
1196                 if (strcmp(type->name, t->name) == 0) {
1197                         /* already found */
1198                         pr_info("Tracer %s already registered\n",
1199                                 type->name);
1200                         ret = -1;
1201                         goto out;
1202                 }
1203         }
1204
1205         if (!type->set_flag)
1206                 type->set_flag = &dummy_set_flag;
1207         if (!type->flags)
1208                 type->flags = &dummy_tracer_flags;
1209         else
1210                 if (!type->flags->opts)
1211                         type->flags->opts = dummy_tracer_opt;
1212         if (!type->wait_pipe)
1213                 type->wait_pipe = default_wait_pipe;
1214
1215         ret = run_tracer_selftest(type);
1216         if (ret < 0)
1217                 goto out;
1218
1219         type->next = trace_types;
1220         trace_types = type;
1221
1222  out:
1223         tracing_selftest_running = false;
1224         mutex_unlock(&trace_types_lock);
1225
1226         if (ret || !default_bootup_tracer)
1227                 goto out_unlock;
1228
1229         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1230                 goto out_unlock;
1231
1232         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1233         /* Do we want this tracer to start on bootup? */
1234         tracing_set_tracer(&global_trace, type->name);
1235         default_bootup_tracer = NULL;
1236         /* disable other selftests, since this will break it. */
1237         tracing_selftest_disabled = true;
1238 #ifdef CONFIG_FTRACE_STARTUP_TEST
1239         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1240                type->name);
1241 #endif
1242
1243  out_unlock:
1244         return ret;
1245 }
1246
1247 void tracing_reset(struct trace_buffer *buf, int cpu)
1248 {
1249         struct ring_buffer *buffer = buf->buffer;
1250
1251         if (!buffer)
1252                 return;
1253
1254         ring_buffer_record_disable(buffer);
1255
1256         /* Make sure all commits have finished */
1257         synchronize_sched();
1258         ring_buffer_reset_cpu(buffer, cpu);
1259
1260         ring_buffer_record_enable(buffer);
1261 }
1262
1263 void tracing_reset_online_cpus(struct trace_buffer *buf)
1264 {
1265         struct ring_buffer *buffer = buf->buffer;
1266         int cpu;
1267
1268         if (!buffer)
1269                 return;
1270
1271         ring_buffer_record_disable(buffer);
1272
1273         /* Make sure all commits have finished */
1274         synchronize_sched();
1275
1276         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1277
1278         for_each_online_cpu(cpu)
1279                 ring_buffer_reset_cpu(buffer, cpu);
1280
1281         ring_buffer_record_enable(buffer);
1282 }
1283
1284 /* Must have trace_types_lock held */
1285 void tracing_reset_all_online_cpus(void)
1286 {
1287         struct trace_array *tr;
1288
1289         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1290                 tracing_reset_online_cpus(&tr->trace_buffer);
1291 #ifdef CONFIG_TRACER_MAX_TRACE
1292                 tracing_reset_online_cpus(&tr->max_buffer);
1293 #endif
1294         }
1295 }
1296
1297 #define SAVED_CMDLINES 128
1298 #define NO_CMDLINE_MAP UINT_MAX
1299 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1300 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1301 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1302 static int cmdline_idx;
1303 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1304
1305 /* temporary disable recording */
1306 static atomic_t trace_record_cmdline_disabled __read_mostly;
1307
1308 static void trace_init_cmdlines(void)
1309 {
1310         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1311         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1312         cmdline_idx = 0;
1313 }
1314
1315 int is_tracing_stopped(void)
1316 {
1317         return global_trace.stop_count;
1318 }
1319
1320 /**
1321  * tracing_start - quick start of the tracer
1322  *
1323  * If tracing is enabled but was stopped by tracing_stop,
1324  * this will start the tracer back up.
1325  */
1326 void tracing_start(void)
1327 {
1328         struct ring_buffer *buffer;
1329         unsigned long flags;
1330
1331         if (tracing_disabled)
1332                 return;
1333
1334         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1335         if (--global_trace.stop_count) {
1336                 if (global_trace.stop_count < 0) {
1337                         /* Someone screwed up their debugging */
1338                         WARN_ON_ONCE(1);
1339                         global_trace.stop_count = 0;
1340                 }
1341                 goto out;
1342         }
1343
1344         /* Prevent the buffers from switching */
1345         arch_spin_lock(&ftrace_max_lock);
1346
1347         buffer = global_trace.trace_buffer.buffer;
1348         if (buffer)
1349                 ring_buffer_record_enable(buffer);
1350
1351 #ifdef CONFIG_TRACER_MAX_TRACE
1352         buffer = global_trace.max_buffer.buffer;
1353         if (buffer)
1354                 ring_buffer_record_enable(buffer);
1355 #endif
1356
1357         arch_spin_unlock(&ftrace_max_lock);
1358
1359         ftrace_start();
1360  out:
1361         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1362 }
1363
1364 static void tracing_start_tr(struct trace_array *tr)
1365 {
1366         struct ring_buffer *buffer;
1367         unsigned long flags;
1368
1369         if (tracing_disabled)
1370                 return;
1371
1372         /* If global, we need to also start the max tracer */
1373         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1374                 return tracing_start();
1375
1376         raw_spin_lock_irqsave(&tr->start_lock, flags);
1377
1378         if (--tr->stop_count) {
1379                 if (tr->stop_count < 0) {
1380                         /* Someone screwed up their debugging */
1381                         WARN_ON_ONCE(1);
1382                         tr->stop_count = 0;
1383                 }
1384                 goto out;
1385         }
1386
1387         buffer = tr->trace_buffer.buffer;
1388         if (buffer)
1389                 ring_buffer_record_enable(buffer);
1390
1391  out:
1392         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1393 }
1394
1395 /**
1396  * tracing_stop - quick stop of the tracer
1397  *
1398  * Light weight way to stop tracing. Use in conjunction with
1399  * tracing_start.
1400  */
1401 void tracing_stop(void)
1402 {
1403         struct ring_buffer *buffer;
1404         unsigned long flags;
1405
1406         ftrace_stop();
1407         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1408         if (global_trace.stop_count++)
1409                 goto out;
1410
1411         /* Prevent the buffers from switching */
1412         arch_spin_lock(&ftrace_max_lock);
1413
1414         buffer = global_trace.trace_buffer.buffer;
1415         if (buffer)
1416                 ring_buffer_record_disable(buffer);
1417
1418 #ifdef CONFIG_TRACER_MAX_TRACE
1419         buffer = global_trace.max_buffer.buffer;
1420         if (buffer)
1421                 ring_buffer_record_disable(buffer);
1422 #endif
1423
1424         arch_spin_unlock(&ftrace_max_lock);
1425
1426  out:
1427         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1428 }
1429
1430 static void tracing_stop_tr(struct trace_array *tr)
1431 {
1432         struct ring_buffer *buffer;
1433         unsigned long flags;
1434
1435         /* If global, we need to also stop the max tracer */
1436         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1437                 return tracing_stop();
1438
1439         raw_spin_lock_irqsave(&tr->start_lock, flags);
1440         if (tr->stop_count++)
1441                 goto out;
1442
1443         buffer = tr->trace_buffer.buffer;
1444         if (buffer)
1445                 ring_buffer_record_disable(buffer);
1446
1447  out:
1448         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1449 }
1450
1451 void trace_stop_cmdline_recording(void);
1452
1453 static void trace_save_cmdline(struct task_struct *tsk)
1454 {
1455         unsigned pid, idx;
1456
1457         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1458                 return;
1459
1460         /*
1461          * It's not the end of the world if we don't get
1462          * the lock, but we also don't want to spin
1463          * nor do we want to disable interrupts,
1464          * so if we miss here, then better luck next time.
1465          */
1466         if (!arch_spin_trylock(&trace_cmdline_lock))
1467                 return;
1468
1469         idx = map_pid_to_cmdline[tsk->pid];
1470         if (idx == NO_CMDLINE_MAP) {
1471                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1472
1473                 /*
1474                  * Check whether the cmdline buffer at idx has a pid
1475                  * mapped. We are going to overwrite that entry so we
1476                  * need to clear the map_pid_to_cmdline. Otherwise we
1477                  * would read the new comm for the old pid.
1478                  */
1479                 pid = map_cmdline_to_pid[idx];
1480                 if (pid != NO_CMDLINE_MAP)
1481                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1482
1483                 map_cmdline_to_pid[idx] = tsk->pid;
1484                 map_pid_to_cmdline[tsk->pid] = idx;
1485
1486                 cmdline_idx = idx;
1487         }
1488
1489         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1490
1491         arch_spin_unlock(&trace_cmdline_lock);
1492 }
1493
1494 void trace_find_cmdline(int pid, char comm[])
1495 {
1496         unsigned map;
1497
1498         if (!pid) {
1499                 strcpy(comm, "<idle>");
1500                 return;
1501         }
1502
1503         if (WARN_ON_ONCE(pid < 0)) {
1504                 strcpy(comm, "<XXX>");
1505                 return;
1506         }
1507
1508         if (pid > PID_MAX_DEFAULT) {
1509                 strcpy(comm, "<...>");
1510                 return;
1511         }
1512
1513         preempt_disable();
1514         arch_spin_lock(&trace_cmdline_lock);
1515         map = map_pid_to_cmdline[pid];
1516         if (map != NO_CMDLINE_MAP)
1517                 strcpy(comm, saved_cmdlines[map]);
1518         else
1519                 strcpy(comm, "<...>");
1520
1521         arch_spin_unlock(&trace_cmdline_lock);
1522         preempt_enable();
1523 }
1524
1525 void tracing_record_cmdline(struct task_struct *tsk)
1526 {
1527         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1528                 return;
1529
1530         if (!__this_cpu_read(trace_cmdline_save))
1531                 return;
1532
1533         __this_cpu_write(trace_cmdline_save, false);
1534
1535         trace_save_cmdline(tsk);
1536 }
1537
1538 void
1539 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1540                              int pc)
1541 {
1542         struct task_struct *tsk = current;
1543
1544         entry->preempt_count            = pc & 0xff;
1545         entry->pid                      = (tsk) ? tsk->pid : 0;
1546         entry->flags =
1547 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1548                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1549 #else
1550                 TRACE_FLAG_IRQS_NOSUPPORT |
1551 #endif
1552                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1553                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1554                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1555                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1556 }
1557 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1558
1559 struct ring_buffer_event *
1560 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1561                           int type,
1562                           unsigned long len,
1563                           unsigned long flags, int pc)
1564 {
1565         struct ring_buffer_event *event;
1566
1567         event = ring_buffer_lock_reserve(buffer, len);
1568         if (event != NULL) {
1569                 struct trace_entry *ent = ring_buffer_event_data(event);
1570
1571                 tracing_generic_entry_update(ent, flags, pc);
1572                 ent->type = type;
1573         }
1574
1575         return event;
1576 }
1577
1578 void
1579 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1580 {
1581         __this_cpu_write(trace_cmdline_save, true);
1582         ring_buffer_unlock_commit(buffer, event);
1583 }
1584
1585 static inline void
1586 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1587                              struct ring_buffer_event *event,
1588                              unsigned long flags, int pc)
1589 {
1590         __buffer_unlock_commit(buffer, event);
1591
1592         ftrace_trace_stack(buffer, flags, 6, pc);
1593         ftrace_trace_userstack(buffer, flags, pc);
1594 }
1595
1596 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1597                                 struct ring_buffer_event *event,
1598                                 unsigned long flags, int pc)
1599 {
1600         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1601 }
1602 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1603
1604 struct ring_buffer_event *
1605 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1606                           struct ftrace_event_file *ftrace_file,
1607                           int type, unsigned long len,
1608                           unsigned long flags, int pc)
1609 {
1610         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1611         return trace_buffer_lock_reserve(*current_rb,
1612                                          type, len, flags, pc);
1613 }
1614 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1615
1616 struct ring_buffer_event *
1617 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1618                                   int type, unsigned long len,
1619                                   unsigned long flags, int pc)
1620 {
1621         *current_rb = global_trace.trace_buffer.buffer;
1622         return trace_buffer_lock_reserve(*current_rb,
1623                                          type, len, flags, pc);
1624 }
1625 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1626
1627 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1628                                         struct ring_buffer_event *event,
1629                                         unsigned long flags, int pc)
1630 {
1631         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1632 }
1633 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1634
1635 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1636                                      struct ring_buffer_event *event,
1637                                      unsigned long flags, int pc,
1638                                      struct pt_regs *regs)
1639 {
1640         __buffer_unlock_commit(buffer, event);
1641
1642         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1643         ftrace_trace_userstack(buffer, flags, pc);
1644 }
1645 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1646
1647 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1648                                          struct ring_buffer_event *event)
1649 {
1650         ring_buffer_discard_commit(buffer, event);
1651 }
1652 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1653
1654 void
1655 trace_function(struct trace_array *tr,
1656                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1657                int pc)
1658 {
1659         struct ftrace_event_call *call = &event_function;
1660         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1661         struct ring_buffer_event *event;
1662         struct ftrace_entry *entry;
1663
1664         /* If we are reading the ring buffer, don't trace */
1665         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1666                 return;
1667
1668         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1669                                           flags, pc);
1670         if (!event)
1671                 return;
1672         entry   = ring_buffer_event_data(event);
1673         entry->ip                       = ip;
1674         entry->parent_ip                = parent_ip;
1675
1676         if (!call_filter_check_discard(call, entry, buffer, event))
1677                 __buffer_unlock_commit(buffer, event);
1678 }
1679
1680 #ifdef CONFIG_STACKTRACE
1681
1682 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1683 struct ftrace_stack {
1684         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1685 };
1686
1687 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1688 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1689
1690 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1691                                  unsigned long flags,
1692                                  int skip, int pc, struct pt_regs *regs)
1693 {
1694         struct ftrace_event_call *call = &event_kernel_stack;
1695         struct ring_buffer_event *event;
1696         struct stack_entry *entry;
1697         struct stack_trace trace;
1698         int use_stack;
1699         int size = FTRACE_STACK_ENTRIES;
1700
1701         trace.nr_entries        = 0;
1702         trace.skip              = skip;
1703
1704         /*
1705          * Since events can happen in NMIs there's no safe way to
1706          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1707          * or NMI comes in, it will just have to use the default
1708          * FTRACE_STACK_SIZE.
1709          */
1710         preempt_disable_notrace();
1711
1712         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1713         /*
1714          * We don't need any atomic variables, just a barrier.
1715          * If an interrupt comes in, we don't care, because it would
1716          * have exited and put the counter back to what we want.
1717          * We just need a barrier to keep gcc from moving things
1718          * around.
1719          */
1720         barrier();
1721         if (use_stack == 1) {
1722                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1723                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1724
1725                 if (regs)
1726                         save_stack_trace_regs(regs, &trace);
1727                 else
1728                         save_stack_trace(&trace);
1729
1730                 if (trace.nr_entries > size)
1731                         size = trace.nr_entries;
1732         } else
1733                 /* From now on, use_stack is a boolean */
1734                 use_stack = 0;
1735
1736         size *= sizeof(unsigned long);
1737
1738         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1739                                           sizeof(*entry) + size, flags, pc);
1740         if (!event)
1741                 goto out;
1742         entry = ring_buffer_event_data(event);
1743
1744         memset(&entry->caller, 0, size);
1745
1746         if (use_stack)
1747                 memcpy(&entry->caller, trace.entries,
1748                        trace.nr_entries * sizeof(unsigned long));
1749         else {
1750                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1751                 trace.entries           = entry->caller;
1752                 if (regs)
1753                         save_stack_trace_regs(regs, &trace);
1754                 else
1755                         save_stack_trace(&trace);
1756         }
1757
1758         entry->size = trace.nr_entries;
1759
1760         if (!call_filter_check_discard(call, entry, buffer, event))
1761                 __buffer_unlock_commit(buffer, event);
1762
1763  out:
1764         /* Again, don't let gcc optimize things here */
1765         barrier();
1766         __this_cpu_dec(ftrace_stack_reserve);
1767         preempt_enable_notrace();
1768
1769 }
1770
1771 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1772                              int skip, int pc, struct pt_regs *regs)
1773 {
1774         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1775                 return;
1776
1777         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1778 }
1779
1780 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1781                         int skip, int pc)
1782 {
1783         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1784                 return;
1785
1786         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1787 }
1788
1789 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1790                    int pc)
1791 {
1792         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1793 }
1794
1795 /**
1796  * trace_dump_stack - record a stack back trace in the trace buffer
1797  * @skip: Number of functions to skip (helper handlers)
1798  */
1799 void trace_dump_stack(int skip)
1800 {
1801         unsigned long flags;
1802
1803         if (tracing_disabled || tracing_selftest_running)
1804                 return;
1805
1806         local_save_flags(flags);
1807
1808         /*
1809          * Skip 3 more, seems to get us at the caller of
1810          * this function.
1811          */
1812         skip += 3;
1813         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1814                              flags, skip, preempt_count(), NULL);
1815 }
1816
1817 static DEFINE_PER_CPU(int, user_stack_count);
1818
1819 void
1820 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1821 {
1822         struct ftrace_event_call *call = &event_user_stack;
1823         struct ring_buffer_event *event;
1824         struct userstack_entry *entry;
1825         struct stack_trace trace;
1826
1827         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1828                 return;
1829
1830         /*
1831          * NMIs can not handle page faults, even with fix ups.
1832          * The save user stack can (and often does) fault.
1833          */
1834         if (unlikely(in_nmi()))
1835                 return;
1836
1837         /*
1838          * prevent recursion, since the user stack tracing may
1839          * trigger other kernel events.
1840          */
1841         preempt_disable();
1842         if (__this_cpu_read(user_stack_count))
1843                 goto out;
1844
1845         __this_cpu_inc(user_stack_count);
1846
1847         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1848                                           sizeof(*entry), flags, pc);
1849         if (!event)
1850                 goto out_drop_count;
1851         entry   = ring_buffer_event_data(event);
1852
1853         entry->tgid             = current->tgid;
1854         memset(&entry->caller, 0, sizeof(entry->caller));
1855
1856         trace.nr_entries        = 0;
1857         trace.max_entries       = FTRACE_STACK_ENTRIES;
1858         trace.skip              = 0;
1859         trace.entries           = entry->caller;
1860
1861         save_stack_trace_user(&trace);
1862         if (!call_filter_check_discard(call, entry, buffer, event))
1863                 __buffer_unlock_commit(buffer, event);
1864
1865  out_drop_count:
1866         __this_cpu_dec(user_stack_count);
1867  out:
1868         preempt_enable();
1869 }
1870
1871 #ifdef UNUSED
1872 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1873 {
1874         ftrace_trace_userstack(tr, flags, preempt_count());
1875 }
1876 #endif /* UNUSED */
1877
1878 #endif /* CONFIG_STACKTRACE */
1879
1880 /* created for use with alloc_percpu */
1881 struct trace_buffer_struct {
1882         char buffer[TRACE_BUF_SIZE];
1883 };
1884
1885 static struct trace_buffer_struct *trace_percpu_buffer;
1886 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1887 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1888 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1889
1890 /*
1891  * The buffer used is dependent on the context. There is a per cpu
1892  * buffer for normal context, softirq contex, hard irq context and
1893  * for NMI context. Thise allows for lockless recording.
1894  *
1895  * Note, if the buffers failed to be allocated, then this returns NULL
1896  */
1897 static char *get_trace_buf(void)
1898 {
1899         struct trace_buffer_struct *percpu_buffer;
1900
1901         /*
1902          * If we have allocated per cpu buffers, then we do not
1903          * need to do any locking.
1904          */
1905         if (in_nmi())
1906                 percpu_buffer = trace_percpu_nmi_buffer;
1907         else if (in_irq())
1908                 percpu_buffer = trace_percpu_irq_buffer;
1909         else if (in_softirq())
1910                 percpu_buffer = trace_percpu_sirq_buffer;
1911         else
1912                 percpu_buffer = trace_percpu_buffer;
1913
1914         if (!percpu_buffer)
1915                 return NULL;
1916
1917         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1918 }
1919
1920 static int alloc_percpu_trace_buffer(void)
1921 {
1922         struct trace_buffer_struct *buffers;
1923         struct trace_buffer_struct *sirq_buffers;
1924         struct trace_buffer_struct *irq_buffers;
1925         struct trace_buffer_struct *nmi_buffers;
1926
1927         buffers = alloc_percpu(struct trace_buffer_struct);
1928         if (!buffers)
1929                 goto err_warn;
1930
1931         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1932         if (!sirq_buffers)
1933                 goto err_sirq;
1934
1935         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1936         if (!irq_buffers)
1937                 goto err_irq;
1938
1939         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1940         if (!nmi_buffers)
1941                 goto err_nmi;
1942
1943         trace_percpu_buffer = buffers;
1944         trace_percpu_sirq_buffer = sirq_buffers;
1945         trace_percpu_irq_buffer = irq_buffers;
1946         trace_percpu_nmi_buffer = nmi_buffers;
1947
1948         return 0;
1949
1950  err_nmi:
1951         free_percpu(irq_buffers);
1952  err_irq:
1953         free_percpu(sirq_buffers);
1954  err_sirq:
1955         free_percpu(buffers);
1956  err_warn:
1957         WARN(1, "Could not allocate percpu trace_printk buffer");
1958         return -ENOMEM;
1959 }
1960
1961 static int buffers_allocated;
1962
1963 void trace_printk_init_buffers(void)
1964 {
1965         if (buffers_allocated)
1966                 return;
1967
1968         if (alloc_percpu_trace_buffer())
1969                 return;
1970
1971         pr_info("ftrace: Allocated trace_printk buffers\n");
1972
1973         /* Expand the buffers to set size */
1974         tracing_update_buffers();
1975
1976         buffers_allocated = 1;
1977
1978         /*
1979          * trace_printk_init_buffers() can be called by modules.
1980          * If that happens, then we need to start cmdline recording
1981          * directly here. If the global_trace.buffer is already
1982          * allocated here, then this was called by module code.
1983          */
1984         if (global_trace.trace_buffer.buffer)
1985                 tracing_start_cmdline_record();
1986 }
1987
1988 void trace_printk_start_comm(void)
1989 {
1990         /* Start tracing comms if trace printk is set */
1991         if (!buffers_allocated)
1992                 return;
1993         tracing_start_cmdline_record();
1994 }
1995
1996 static void trace_printk_start_stop_comm(int enabled)
1997 {
1998         if (!buffers_allocated)
1999                 return;
2000
2001         if (enabled)
2002                 tracing_start_cmdline_record();
2003         else
2004                 tracing_stop_cmdline_record();
2005 }
2006
2007 /**
2008  * trace_vbprintk - write binary msg to tracing buffer
2009  *
2010  */
2011 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2012 {
2013         struct ftrace_event_call *call = &event_bprint;
2014         struct ring_buffer_event *event;
2015         struct ring_buffer *buffer;
2016         struct trace_array *tr = &global_trace;
2017         struct bprint_entry *entry;
2018         unsigned long flags;
2019         char *tbuffer;
2020         int len = 0, size, pc;
2021
2022         if (unlikely(tracing_selftest_running || tracing_disabled))
2023                 return 0;
2024
2025         /* Don't pollute graph traces with trace_vprintk internals */
2026         pause_graph_tracing();
2027
2028         pc = preempt_count();
2029         preempt_disable_notrace();
2030
2031         tbuffer = get_trace_buf();
2032         if (!tbuffer) {
2033                 len = 0;
2034                 goto out;
2035         }
2036
2037         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2038
2039         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2040                 goto out;
2041
2042         local_save_flags(flags);
2043         size = sizeof(*entry) + sizeof(u32) * len;
2044         buffer = tr->trace_buffer.buffer;
2045         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2046                                           flags, pc);
2047         if (!event)
2048                 goto out;
2049         entry = ring_buffer_event_data(event);
2050         entry->ip                       = ip;
2051         entry->fmt                      = fmt;
2052
2053         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2054         if (!call_filter_check_discard(call, entry, buffer, event)) {
2055                 __buffer_unlock_commit(buffer, event);
2056                 ftrace_trace_stack(buffer, flags, 6, pc);
2057         }
2058
2059 out:
2060         preempt_enable_notrace();
2061         unpause_graph_tracing();
2062
2063         return len;
2064 }
2065 EXPORT_SYMBOL_GPL(trace_vbprintk);
2066
2067 static int
2068 __trace_array_vprintk(struct ring_buffer *buffer,
2069                       unsigned long ip, const char *fmt, va_list args)
2070 {
2071         struct ftrace_event_call *call = &event_print;
2072         struct ring_buffer_event *event;
2073         int len = 0, size, pc;
2074         struct print_entry *entry;
2075         unsigned long flags;
2076         char *tbuffer;
2077
2078         if (tracing_disabled || tracing_selftest_running)
2079                 return 0;
2080
2081         /* Don't pollute graph traces with trace_vprintk internals */
2082         pause_graph_tracing();
2083
2084         pc = preempt_count();
2085         preempt_disable_notrace();
2086
2087
2088         tbuffer = get_trace_buf();
2089         if (!tbuffer) {
2090                 len = 0;
2091                 goto out;
2092         }
2093
2094         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2095         if (len > TRACE_BUF_SIZE)
2096                 goto out;
2097
2098         local_save_flags(flags);
2099         size = sizeof(*entry) + len + 1;
2100         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2101                                           flags, pc);
2102         if (!event)
2103                 goto out;
2104         entry = ring_buffer_event_data(event);
2105         entry->ip = ip;
2106
2107         memcpy(&entry->buf, tbuffer, len);
2108         entry->buf[len] = '\0';
2109         if (!call_filter_check_discard(call, entry, buffer, event)) {
2110                 __buffer_unlock_commit(buffer, event);
2111                 ftrace_trace_stack(buffer, flags, 6, pc);
2112         }
2113  out:
2114         preempt_enable_notrace();
2115         unpause_graph_tracing();
2116
2117         return len;
2118 }
2119
2120 int trace_array_vprintk(struct trace_array *tr,
2121                         unsigned long ip, const char *fmt, va_list args)
2122 {
2123         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2124 }
2125
2126 int trace_array_printk(struct trace_array *tr,
2127                        unsigned long ip, const char *fmt, ...)
2128 {
2129         int ret;
2130         va_list ap;
2131
2132         if (!(trace_flags & TRACE_ITER_PRINTK))
2133                 return 0;
2134
2135         va_start(ap, fmt);
2136         ret = trace_array_vprintk(tr, ip, fmt, ap);
2137         va_end(ap);
2138         return ret;
2139 }
2140
2141 int trace_array_printk_buf(struct ring_buffer *buffer,
2142                            unsigned long ip, const char *fmt, ...)
2143 {
2144         int ret;
2145         va_list ap;
2146
2147         if (!(trace_flags & TRACE_ITER_PRINTK))
2148                 return 0;
2149
2150         va_start(ap, fmt);
2151         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2152         va_end(ap);
2153         return ret;
2154 }
2155
2156 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2157 {
2158         return trace_array_vprintk(&global_trace, ip, fmt, args);
2159 }
2160 EXPORT_SYMBOL_GPL(trace_vprintk);
2161
2162 static void trace_iterator_increment(struct trace_iterator *iter)
2163 {
2164         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2165
2166         iter->idx++;
2167         if (buf_iter)
2168                 ring_buffer_read(buf_iter, NULL);
2169 }
2170
2171 static struct trace_entry *
2172 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2173                 unsigned long *lost_events)
2174 {
2175         struct ring_buffer_event *event;
2176         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2177
2178         if (buf_iter)
2179                 event = ring_buffer_iter_peek(buf_iter, ts);
2180         else
2181                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2182                                          lost_events);
2183
2184         if (event) {
2185                 iter->ent_size = ring_buffer_event_length(event);
2186                 return ring_buffer_event_data(event);
2187         }
2188         iter->ent_size = 0;
2189         return NULL;
2190 }
2191
2192 static struct trace_entry *
2193 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2194                   unsigned long *missing_events, u64 *ent_ts)
2195 {
2196         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2197         struct trace_entry *ent, *next = NULL;
2198         unsigned long lost_events = 0, next_lost = 0;
2199         int cpu_file = iter->cpu_file;
2200         u64 next_ts = 0, ts;
2201         int next_cpu = -1;
2202         int next_size = 0;
2203         int cpu;
2204
2205         /*
2206          * If we are in a per_cpu trace file, don't bother by iterating over
2207          * all cpu and peek directly.
2208          */
2209         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2210                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2211                         return NULL;
2212                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2213                 if (ent_cpu)
2214                         *ent_cpu = cpu_file;
2215
2216                 return ent;
2217         }
2218
2219         for_each_tracing_cpu(cpu) {
2220
2221                 if (ring_buffer_empty_cpu(buffer, cpu))
2222                         continue;
2223
2224                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2225
2226                 /*
2227                  * Pick the entry with the smallest timestamp:
2228                  */
2229                 if (ent && (!next || ts < next_ts)) {
2230                         next = ent;
2231                         next_cpu = cpu;
2232                         next_ts = ts;
2233                         next_lost = lost_events;
2234                         next_size = iter->ent_size;
2235                 }
2236         }
2237
2238         iter->ent_size = next_size;
2239
2240         if (ent_cpu)
2241                 *ent_cpu = next_cpu;
2242
2243         if (ent_ts)
2244                 *ent_ts = next_ts;
2245
2246         if (missing_events)
2247                 *missing_events = next_lost;
2248
2249         return next;
2250 }
2251
2252 /* Find the next real entry, without updating the iterator itself */
2253 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2254                                           int *ent_cpu, u64 *ent_ts)
2255 {
2256         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2257 }
2258
2259 /* Find the next real entry, and increment the iterator to the next entry */
2260 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2261 {
2262         iter->ent = __find_next_entry(iter, &iter->cpu,
2263                                       &iter->lost_events, &iter->ts);
2264
2265         if (iter->ent)
2266                 trace_iterator_increment(iter);
2267
2268         return iter->ent ? iter : NULL;
2269 }
2270
2271 static void trace_consume(struct trace_iterator *iter)
2272 {
2273         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2274                             &iter->lost_events);
2275 }
2276
2277 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2278 {
2279         struct trace_iterator *iter = m->private;
2280         int i = (int)*pos;
2281         void *ent;
2282
2283         WARN_ON_ONCE(iter->leftover);
2284
2285         (*pos)++;
2286
2287         /* can't go backwards */
2288         if (iter->idx > i)
2289                 return NULL;
2290
2291         if (iter->idx < 0)
2292                 ent = trace_find_next_entry_inc(iter);
2293         else
2294                 ent = iter;
2295
2296         while (ent && iter->idx < i)
2297                 ent = trace_find_next_entry_inc(iter);
2298
2299         iter->pos = *pos;
2300
2301         return ent;
2302 }
2303
2304 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2305 {
2306         struct ring_buffer_event *event;
2307         struct ring_buffer_iter *buf_iter;
2308         unsigned long entries = 0;
2309         u64 ts;
2310
2311         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2312
2313         buf_iter = trace_buffer_iter(iter, cpu);
2314         if (!buf_iter)
2315                 return;
2316
2317         ring_buffer_iter_reset(buf_iter);
2318
2319         /*
2320          * We could have the case with the max latency tracers
2321          * that a reset never took place on a cpu. This is evident
2322          * by the timestamp being before the start of the buffer.
2323          */
2324         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2325                 if (ts >= iter->trace_buffer->time_start)
2326                         break;
2327                 entries++;
2328                 ring_buffer_read(buf_iter, NULL);
2329         }
2330
2331         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2332 }
2333
2334 /*
2335  * The current tracer is copied to avoid a global locking
2336  * all around.
2337  */
2338 static void *s_start(struct seq_file *m, loff_t *pos)
2339 {
2340         struct trace_iterator *iter = m->private;
2341         struct trace_array *tr = iter->tr;
2342         int cpu_file = iter->cpu_file;
2343         void *p = NULL;
2344         loff_t l = 0;
2345         int cpu;
2346
2347         /*
2348          * copy the tracer to avoid using a global lock all around.
2349          * iter->trace is a copy of current_trace, the pointer to the
2350          * name may be used instead of a strcmp(), as iter->trace->name
2351          * will point to the same string as current_trace->name.
2352          */
2353         mutex_lock(&trace_types_lock);
2354         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2355                 *iter->trace = *tr->current_trace;
2356         mutex_unlock(&trace_types_lock);
2357
2358 #ifdef CONFIG_TRACER_MAX_TRACE
2359         if (iter->snapshot && iter->trace->use_max_tr)
2360                 return ERR_PTR(-EBUSY);
2361 #endif
2362
2363         if (!iter->snapshot)
2364                 atomic_inc(&trace_record_cmdline_disabled);
2365
2366         if (*pos != iter->pos) {
2367                 iter->ent = NULL;
2368                 iter->cpu = 0;
2369                 iter->idx = -1;
2370
2371                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2372                         for_each_tracing_cpu(cpu)
2373                                 tracing_iter_reset(iter, cpu);
2374                 } else
2375                         tracing_iter_reset(iter, cpu_file);
2376
2377                 iter->leftover = 0;
2378                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2379                         ;
2380
2381         } else {
2382                 /*
2383                  * If we overflowed the seq_file before, then we want
2384                  * to just reuse the trace_seq buffer again.
2385                  */
2386                 if (iter->leftover)
2387                         p = iter;
2388                 else {
2389                         l = *pos - 1;
2390                         p = s_next(m, p, &l);
2391                 }
2392         }
2393
2394         trace_event_read_lock();
2395         trace_access_lock(cpu_file);
2396         return p;
2397 }
2398
2399 static void s_stop(struct seq_file *m, void *p)
2400 {
2401         struct trace_iterator *iter = m->private;
2402
2403 #ifdef CONFIG_TRACER_MAX_TRACE
2404         if (iter->snapshot && iter->trace->use_max_tr)
2405                 return;
2406 #endif
2407
2408         if (!iter->snapshot)
2409                 atomic_dec(&trace_record_cmdline_disabled);
2410
2411         trace_access_unlock(iter->cpu_file);
2412         trace_event_read_unlock();
2413 }
2414
2415 static void
2416 get_total_entries(struct trace_buffer *buf,
2417                   unsigned long *total, unsigned long *entries)
2418 {
2419         unsigned long count;
2420         int cpu;
2421
2422         *total = 0;
2423         *entries = 0;
2424
2425         for_each_tracing_cpu(cpu) {
2426                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2427                 /*
2428                  * If this buffer has skipped entries, then we hold all
2429                  * entries for the trace and we need to ignore the
2430                  * ones before the time stamp.
2431                  */
2432                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2433                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2434                         /* total is the same as the entries */
2435                         *total += count;
2436                 } else
2437                         *total += count +
2438                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2439                 *entries += count;
2440         }
2441 }
2442
2443 static void print_lat_help_header(struct seq_file *m)
2444 {
2445         seq_puts(m, "#                  _------=> CPU#            \n");
2446         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2447         seq_puts(m, "#                | / _----=> need-resched    \n");
2448         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2449         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2450         seq_puts(m, "#                |||| /     delay             \n");
2451         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2452         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2453 }
2454
2455 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2456 {
2457         unsigned long total;
2458         unsigned long entries;
2459
2460         get_total_entries(buf, &total, &entries);
2461         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2462                    entries, total, num_online_cpus());
2463         seq_puts(m, "#\n");
2464 }
2465
2466 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2467 {
2468         print_event_info(buf, m);
2469         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2470         seq_puts(m, "#              | |       |          |         |\n");
2471 }
2472
2473 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2474 {
2475         print_event_info(buf, m);
2476         seq_puts(m, "#                              _-----=> irqs-off\n");
2477         seq_puts(m, "#                             / _----=> need-resched\n");
2478         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2479         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2480         seq_puts(m, "#                            ||| /     delay\n");
2481         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2482         seq_puts(m, "#              | |       |   ||||       |         |\n");
2483 }
2484
2485 void
2486 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2487 {
2488         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2489         struct trace_buffer *buf = iter->trace_buffer;
2490         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2491         struct tracer *type = iter->trace;
2492         unsigned long entries;
2493         unsigned long total;
2494         const char *name = "preemption";
2495
2496         name = type->name;
2497
2498         get_total_entries(buf, &total, &entries);
2499
2500         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2501                    name, UTS_RELEASE);
2502         seq_puts(m, "# -----------------------------------"
2503                  "---------------------------------\n");
2504         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2505                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2506                    nsecs_to_usecs(data->saved_latency),
2507                    entries,
2508                    total,
2509                    buf->cpu,
2510 #if defined(CONFIG_PREEMPT_NONE)
2511                    "server",
2512 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2513                    "desktop",
2514 #elif defined(CONFIG_PREEMPT)
2515                    "preempt",
2516 #else
2517                    "unknown",
2518 #endif
2519                    /* These are reserved for later use */
2520                    0, 0, 0, 0);
2521 #ifdef CONFIG_SMP
2522         seq_printf(m, " #P:%d)\n", num_online_cpus());
2523 #else
2524         seq_puts(m, ")\n");
2525 #endif
2526         seq_puts(m, "#    -----------------\n");
2527         seq_printf(m, "#    | task: %.16s-%d "
2528                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2529                    data->comm, data->pid,
2530                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2531                    data->policy, data->rt_priority);
2532         seq_puts(m, "#    -----------------\n");
2533
2534         if (data->critical_start) {
2535                 seq_puts(m, "#  => started at: ");
2536                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2537                 trace_print_seq(m, &iter->seq);
2538                 seq_puts(m, "\n#  => ended at:   ");
2539                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2540                 trace_print_seq(m, &iter->seq);
2541                 seq_puts(m, "\n#\n");
2542         }
2543
2544         seq_puts(m, "#\n");
2545 }
2546
2547 static void test_cpu_buff_start(struct trace_iterator *iter)
2548 {
2549         struct trace_seq *s = &iter->seq;
2550
2551         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2552                 return;
2553
2554         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2555                 return;
2556
2557         if (cpumask_test_cpu(iter->cpu, iter->started))
2558                 return;
2559
2560         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2561                 return;
2562
2563         cpumask_set_cpu(iter->cpu, iter->started);
2564
2565         /* Don't print started cpu buffer for the first entry of the trace */
2566         if (iter->idx > 1)
2567                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2568                                 iter->cpu);
2569 }
2570
2571 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2572 {
2573         struct trace_seq *s = &iter->seq;
2574         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2575         struct trace_entry *entry;
2576         struct trace_event *event;
2577
2578         entry = iter->ent;
2579
2580         test_cpu_buff_start(iter);
2581
2582         event = ftrace_find_event(entry->type);
2583
2584         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2585                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2586                         if (!trace_print_lat_context(iter))
2587                                 goto partial;
2588                 } else {
2589                         if (!trace_print_context(iter))
2590                                 goto partial;
2591                 }
2592         }
2593
2594         if (event)
2595                 return event->funcs->trace(iter, sym_flags, event);
2596
2597         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2598                 goto partial;
2599
2600         return TRACE_TYPE_HANDLED;
2601 partial:
2602         return TRACE_TYPE_PARTIAL_LINE;
2603 }
2604
2605 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2606 {
2607         struct trace_seq *s = &iter->seq;
2608         struct trace_entry *entry;
2609         struct trace_event *event;
2610
2611         entry = iter->ent;
2612
2613         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2614                 if (!trace_seq_printf(s, "%d %d %llu ",
2615                                       entry->pid, iter->cpu, iter->ts))
2616                         goto partial;
2617         }
2618
2619         event = ftrace_find_event(entry->type);
2620         if (event)
2621                 return event->funcs->raw(iter, 0, event);
2622
2623         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2624                 goto partial;
2625
2626         return TRACE_TYPE_HANDLED;
2627 partial:
2628         return TRACE_TYPE_PARTIAL_LINE;
2629 }
2630
2631 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2632 {
2633         struct trace_seq *s = &iter->seq;
2634         unsigned char newline = '\n';
2635         struct trace_entry *entry;
2636         struct trace_event *event;
2637
2638         entry = iter->ent;
2639
2640         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2641                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2642                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2643                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2644         }
2645
2646         event = ftrace_find_event(entry->type);
2647         if (event) {
2648                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2649                 if (ret != TRACE_TYPE_HANDLED)
2650                         return ret;
2651         }
2652
2653         SEQ_PUT_FIELD_RET(s, newline);
2654
2655         return TRACE_TYPE_HANDLED;
2656 }
2657
2658 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2659 {
2660         struct trace_seq *s = &iter->seq;
2661         struct trace_entry *entry;
2662         struct trace_event *event;
2663
2664         entry = iter->ent;
2665
2666         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2667                 SEQ_PUT_FIELD_RET(s, entry->pid);
2668                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2669                 SEQ_PUT_FIELD_RET(s, iter->ts);
2670         }
2671
2672         event = ftrace_find_event(entry->type);
2673         return event ? event->funcs->binary(iter, 0, event) :
2674                 TRACE_TYPE_HANDLED;
2675 }
2676
2677 int trace_empty(struct trace_iterator *iter)
2678 {
2679         struct ring_buffer_iter *buf_iter;
2680         int cpu;
2681
2682         /* If we are looking at one CPU buffer, only check that one */
2683         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2684                 cpu = iter->cpu_file;
2685                 buf_iter = trace_buffer_iter(iter, cpu);
2686                 if (buf_iter) {
2687                         if (!ring_buffer_iter_empty(buf_iter))
2688                                 return 0;
2689                 } else {
2690                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2691                                 return 0;
2692                 }
2693                 return 1;
2694         }
2695
2696         for_each_tracing_cpu(cpu) {
2697                 buf_iter = trace_buffer_iter(iter, cpu);
2698                 if (buf_iter) {
2699                         if (!ring_buffer_iter_empty(buf_iter))
2700                                 return 0;
2701                 } else {
2702                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2703                                 return 0;
2704                 }
2705         }
2706
2707         return 1;
2708 }
2709
2710 /*  Called with trace_event_read_lock() held. */
2711 enum print_line_t print_trace_line(struct trace_iterator *iter)
2712 {
2713         enum print_line_t ret;
2714
2715         if (iter->lost_events &&
2716             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2717                                  iter->cpu, iter->lost_events))
2718                 return TRACE_TYPE_PARTIAL_LINE;
2719
2720         if (iter->trace && iter->trace->print_line) {
2721                 ret = iter->trace->print_line(iter);
2722                 if (ret != TRACE_TYPE_UNHANDLED)
2723                         return ret;
2724         }
2725
2726         if (iter->ent->type == TRACE_BPUTS &&
2727                         trace_flags & TRACE_ITER_PRINTK &&
2728                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2729                 return trace_print_bputs_msg_only(iter);
2730
2731         if (iter->ent->type == TRACE_BPRINT &&
2732                         trace_flags & TRACE_ITER_PRINTK &&
2733                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2734                 return trace_print_bprintk_msg_only(iter);
2735
2736         if (iter->ent->type == TRACE_PRINT &&
2737                         trace_flags & TRACE_ITER_PRINTK &&
2738                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2739                 return trace_print_printk_msg_only(iter);
2740
2741         if (trace_flags & TRACE_ITER_BIN)
2742                 return print_bin_fmt(iter);
2743
2744         if (trace_flags & TRACE_ITER_HEX)
2745                 return print_hex_fmt(iter);
2746
2747         if (trace_flags & TRACE_ITER_RAW)
2748                 return print_raw_fmt(iter);
2749
2750         return print_trace_fmt(iter);
2751 }
2752
2753 void trace_latency_header(struct seq_file *m)
2754 {
2755         struct trace_iterator *iter = m->private;
2756
2757         /* print nothing if the buffers are empty */
2758         if (trace_empty(iter))
2759                 return;
2760
2761         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2762                 print_trace_header(m, iter);
2763
2764         if (!(trace_flags & TRACE_ITER_VERBOSE))
2765                 print_lat_help_header(m);
2766 }
2767
2768 void trace_default_header(struct seq_file *m)
2769 {
2770         struct trace_iterator *iter = m->private;
2771
2772         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2773                 return;
2774
2775         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2776                 /* print nothing if the buffers are empty */
2777                 if (trace_empty(iter))
2778                         return;
2779                 print_trace_header(m, iter);
2780                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2781                         print_lat_help_header(m);
2782         } else {
2783                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2784                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2785                                 print_func_help_header_irq(iter->trace_buffer, m);
2786                         else
2787                                 print_func_help_header(iter->trace_buffer, m);
2788                 }
2789         }
2790 }
2791
2792 static void test_ftrace_alive(struct seq_file *m)
2793 {
2794         if (!ftrace_is_dead())
2795                 return;
2796         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2797         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2798 }
2799
2800 #ifdef CONFIG_TRACER_MAX_TRACE
2801 static void show_snapshot_main_help(struct seq_file *m)
2802 {
2803         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2804         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2805         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2806         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2807         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2808         seq_printf(m, "#                       is not a '0' or '1')\n");
2809 }
2810
2811 static void show_snapshot_percpu_help(struct seq_file *m)
2812 {
2813         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2814 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2815         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2816         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2817 #else
2818         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2819         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2820 #endif
2821         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2822         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2823         seq_printf(m, "#                       is not a '0' or '1')\n");
2824 }
2825
2826 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2827 {
2828         if (iter->tr->allocated_snapshot)
2829                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2830         else
2831                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2832
2833         seq_printf(m, "# Snapshot commands:\n");
2834         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2835                 show_snapshot_main_help(m);
2836         else
2837                 show_snapshot_percpu_help(m);
2838 }
2839 #else
2840 /* Should never be called */
2841 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2842 #endif
2843
2844 static int s_show(struct seq_file *m, void *v)
2845 {
2846         struct trace_iterator *iter = v;
2847         int ret;
2848
2849         if (iter->ent == NULL) {
2850                 if (iter->tr) {
2851                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2852                         seq_puts(m, "#\n");
2853                         test_ftrace_alive(m);
2854                 }
2855                 if (iter->snapshot && trace_empty(iter))
2856                         print_snapshot_help(m, iter);
2857                 else if (iter->trace && iter->trace->print_header)
2858                         iter->trace->print_header(m);
2859                 else
2860                         trace_default_header(m);
2861
2862         } else if (iter->leftover) {
2863                 /*
2864                  * If we filled the seq_file buffer earlier, we
2865                  * want to just show it now.
2866                  */
2867                 ret = trace_print_seq(m, &iter->seq);
2868
2869                 /* ret should this time be zero, but you never know */
2870                 iter->leftover = ret;
2871
2872         } else {
2873                 print_trace_line(iter);
2874                 ret = trace_print_seq(m, &iter->seq);
2875                 /*
2876                  * If we overflow the seq_file buffer, then it will
2877                  * ask us for this data again at start up.
2878                  * Use that instead.
2879                  *  ret is 0 if seq_file write succeeded.
2880                  *        -1 otherwise.
2881                  */
2882                 iter->leftover = ret;
2883         }
2884
2885         return 0;
2886 }
2887
2888 /*
2889  * Should be used after trace_array_get(), trace_types_lock
2890  * ensures that i_cdev was already initialized.
2891  */
2892 static inline int tracing_get_cpu(struct inode *inode)
2893 {
2894         if (inode->i_cdev) /* See trace_create_cpu_file() */
2895                 return (long)inode->i_cdev - 1;
2896         return RING_BUFFER_ALL_CPUS;
2897 }
2898
2899 static const struct seq_operations tracer_seq_ops = {
2900         .start          = s_start,
2901         .next           = s_next,
2902         .stop           = s_stop,
2903         .show           = s_show,
2904 };
2905
2906 static struct trace_iterator *
2907 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2908 {
2909         struct trace_array *tr = inode->i_private;
2910         struct trace_iterator *iter;
2911         int cpu;
2912
2913         if (tracing_disabled)
2914                 return ERR_PTR(-ENODEV);
2915
2916         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2917         if (!iter)
2918                 return ERR_PTR(-ENOMEM);
2919
2920         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2921                                     GFP_KERNEL);
2922         if (!iter->buffer_iter)
2923                 goto release;
2924
2925         /*
2926          * We make a copy of the current tracer to avoid concurrent
2927          * changes on it while we are reading.
2928          */
2929         mutex_lock(&trace_types_lock);
2930         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2931         if (!iter->trace)
2932                 goto fail;
2933
2934         *iter->trace = *tr->current_trace;
2935
2936         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2937                 goto fail;
2938
2939         iter->tr = tr;
2940
2941 #ifdef CONFIG_TRACER_MAX_TRACE
2942         /* Currently only the top directory has a snapshot */
2943         if (tr->current_trace->print_max || snapshot)
2944                 iter->trace_buffer = &tr->max_buffer;
2945         else
2946 #endif
2947                 iter->trace_buffer = &tr->trace_buffer;
2948         iter->snapshot = snapshot;
2949         iter->pos = -1;
2950         iter->cpu_file = tracing_get_cpu(inode);
2951         mutex_init(&iter->mutex);
2952
2953         /* Notify the tracer early; before we stop tracing. */
2954         if (iter->trace && iter->trace->open)
2955                 iter->trace->open(iter);
2956
2957         /* Annotate start of buffers if we had overruns */
2958         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2959                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2960
2961         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2962         if (trace_clocks[tr->clock_id].in_ns)
2963                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2964
2965         /* stop the trace while dumping if we are not opening "snapshot" */
2966         if (!iter->snapshot)
2967                 tracing_stop_tr(tr);
2968
2969         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2970                 for_each_tracing_cpu(cpu) {
2971                         iter->buffer_iter[cpu] =
2972                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2973                 }
2974                 ring_buffer_read_prepare_sync();
2975                 for_each_tracing_cpu(cpu) {
2976                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2977                         tracing_iter_reset(iter, cpu);
2978                 }
2979         } else {
2980                 cpu = iter->cpu_file;
2981                 iter->buffer_iter[cpu] =
2982                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2983                 ring_buffer_read_prepare_sync();
2984                 ring_buffer_read_start(iter->buffer_iter[cpu]);
2985                 tracing_iter_reset(iter, cpu);
2986         }
2987
2988         mutex_unlock(&trace_types_lock);
2989
2990         return iter;
2991
2992  fail:
2993         mutex_unlock(&trace_types_lock);
2994         kfree(iter->trace);
2995         kfree(iter->buffer_iter);
2996 release:
2997         seq_release_private(inode, file);
2998         return ERR_PTR(-ENOMEM);
2999 }
3000
3001 int tracing_open_generic(struct inode *inode, struct file *filp)
3002 {
3003         if (tracing_disabled)
3004                 return -ENODEV;
3005
3006         filp->private_data = inode->i_private;
3007         return 0;
3008 }
3009
3010 bool tracing_is_disabled(void)
3011 {
3012         return (tracing_disabled) ? true: false;
3013 }
3014
3015 /*
3016  * Open and update trace_array ref count.
3017  * Must have the current trace_array passed to it.
3018  */
3019 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3020 {
3021         struct trace_array *tr = inode->i_private;
3022
3023         if (tracing_disabled)
3024                 return -ENODEV;
3025
3026         if (trace_array_get(tr) < 0)
3027                 return -ENODEV;
3028
3029         filp->private_data = inode->i_private;
3030
3031         return 0;
3032 }
3033
3034 static int tracing_release(struct inode *inode, struct file *file)
3035 {
3036         struct trace_array *tr = inode->i_private;
3037         struct seq_file *m = file->private_data;
3038         struct trace_iterator *iter;
3039         int cpu;
3040
3041         if (!(file->f_mode & FMODE_READ)) {
3042                 trace_array_put(tr);
3043                 return 0;
3044         }
3045
3046         /* Writes do not use seq_file */
3047         iter = m->private;
3048         mutex_lock(&trace_types_lock);
3049
3050         for_each_tracing_cpu(cpu) {
3051                 if (iter->buffer_iter[cpu])
3052                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3053         }
3054
3055         if (iter->trace && iter->trace->close)
3056                 iter->trace->close(iter);
3057
3058         if (!iter->snapshot)
3059                 /* reenable tracing if it was previously enabled */
3060                 tracing_start_tr(tr);
3061
3062         __trace_array_put(tr);
3063
3064         mutex_unlock(&trace_types_lock);
3065
3066         mutex_destroy(&iter->mutex);
3067         free_cpumask_var(iter->started);
3068         kfree(iter->trace);
3069         kfree(iter->buffer_iter);
3070         seq_release_private(inode, file);
3071
3072         return 0;
3073 }
3074
3075 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3076 {
3077         struct trace_array *tr = inode->i_private;
3078
3079         trace_array_put(tr);
3080         return 0;
3081 }
3082
3083 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3084 {
3085         struct trace_array *tr = inode->i_private;
3086
3087         trace_array_put(tr);
3088
3089         return single_release(inode, file);
3090 }
3091
3092 static int tracing_open(struct inode *inode, struct file *file)
3093 {
3094         struct trace_array *tr = inode->i_private;
3095         struct trace_iterator *iter;
3096         int ret = 0;
3097
3098         if (trace_array_get(tr) < 0)
3099                 return -ENODEV;
3100
3101         /* If this file was open for write, then erase contents */
3102         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3103                 int cpu = tracing_get_cpu(inode);
3104
3105                 if (cpu == RING_BUFFER_ALL_CPUS)
3106                         tracing_reset_online_cpus(&tr->trace_buffer);
3107                 else
3108                         tracing_reset(&tr->trace_buffer, cpu);
3109         }
3110
3111         if (file->f_mode & FMODE_READ) {
3112                 iter = __tracing_open(inode, file, false);
3113                 if (IS_ERR(iter))
3114                         ret = PTR_ERR(iter);
3115                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3116                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3117         }
3118
3119         if (ret < 0)
3120                 trace_array_put(tr);
3121
3122         return ret;
3123 }
3124
3125 /*
3126  * Some tracers are not suitable for instance buffers.
3127  * A tracer is always available for the global array (toplevel)
3128  * or if it explicitly states that it is.
3129  */
3130 static bool
3131 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3132 {
3133         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3134 }
3135
3136 /* Find the next tracer that this trace array may use */
3137 static struct tracer *
3138 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3139 {
3140         while (t && !trace_ok_for_array(t, tr))
3141                 t = t->next;
3142
3143         return t;
3144 }
3145
3146 static void *
3147 t_next(struct seq_file *m, void *v, loff_t *pos)
3148 {
3149         struct trace_array *tr = m->private;
3150         struct tracer *t = v;
3151
3152         (*pos)++;
3153
3154         if (t)
3155                 t = get_tracer_for_array(tr, t->next);
3156
3157         return t;
3158 }
3159
3160 static void *t_start(struct seq_file *m, loff_t *pos)
3161 {
3162         struct trace_array *tr = m->private;
3163         struct tracer *t;
3164         loff_t l = 0;
3165
3166         mutex_lock(&trace_types_lock);
3167
3168         t = get_tracer_for_array(tr, trace_types);
3169         for (; t && l < *pos; t = t_next(m, t, &l))
3170                         ;
3171
3172         return t;
3173 }
3174
3175 static void t_stop(struct seq_file *m, void *p)
3176 {
3177         mutex_unlock(&trace_types_lock);
3178 }
3179
3180 static int t_show(struct seq_file *m, void *v)
3181 {
3182         struct tracer *t = v;
3183
3184         if (!t)
3185                 return 0;
3186
3187         seq_printf(m, "%s", t->name);
3188         if (t->next)
3189                 seq_putc(m, ' ');
3190         else
3191                 seq_putc(m, '\n');
3192
3193         return 0;
3194 }
3195
3196 static const struct seq_operations show_traces_seq_ops = {
3197         .start          = t_start,
3198         .next           = t_next,
3199         .stop           = t_stop,
3200         .show           = t_show,
3201 };
3202
3203 static int show_traces_open(struct inode *inode, struct file *file)
3204 {
3205         struct trace_array *tr = inode->i_private;
3206         struct seq_file *m;
3207         int ret;
3208
3209         if (tracing_disabled)
3210                 return -ENODEV;
3211
3212         ret = seq_open(file, &show_traces_seq_ops);
3213         if (ret)
3214                 return ret;
3215
3216         m = file->private_data;
3217         m->private = tr;
3218
3219         return 0;
3220 }
3221
3222 static ssize_t
3223 tracing_write_stub(struct file *filp, const char __user *ubuf,
3224                    size_t count, loff_t *ppos)
3225 {
3226         return count;
3227 }
3228
3229 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3230 {
3231         int ret;
3232
3233         if (file->f_mode & FMODE_READ)
3234                 ret = seq_lseek(file, offset, whence);
3235         else
3236                 file->f_pos = ret = 0;
3237
3238         return ret;
3239 }
3240
3241 static const struct file_operations tracing_fops = {
3242         .open           = tracing_open,
3243         .read           = seq_read,
3244         .write          = tracing_write_stub,
3245         .llseek         = tracing_lseek,
3246         .release        = tracing_release,
3247 };
3248
3249 static const struct file_operations show_traces_fops = {
3250         .open           = show_traces_open,
3251         .read           = seq_read,
3252         .release        = seq_release,
3253         .llseek         = seq_lseek,
3254 };
3255
3256 /*
3257  * The tracer itself will not take this lock, but still we want
3258  * to provide a consistent cpumask to user-space:
3259  */
3260 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3261
3262 /*
3263  * Temporary storage for the character representation of the
3264  * CPU bitmask (and one more byte for the newline):
3265  */
3266 static char mask_str[NR_CPUS + 1];
3267
3268 static ssize_t
3269 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3270                      size_t count, loff_t *ppos)
3271 {
3272         struct trace_array *tr = file_inode(filp)->i_private;
3273         int len;
3274
3275         mutex_lock(&tracing_cpumask_update_lock);
3276
3277         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3278         if (count - len < 2) {
3279                 count = -EINVAL;
3280                 goto out_err;
3281         }
3282         len += sprintf(mask_str + len, "\n");
3283         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3284
3285 out_err:
3286         mutex_unlock(&tracing_cpumask_update_lock);
3287
3288         return count;
3289 }
3290
3291 static ssize_t
3292 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3293                       size_t count, loff_t *ppos)
3294 {
3295         struct trace_array *tr = file_inode(filp)->i_private;
3296         cpumask_var_t tracing_cpumask_new;
3297         int err, cpu;
3298
3299         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3300                 return -ENOMEM;
3301
3302         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3303         if (err)
3304                 goto err_unlock;
3305
3306         mutex_lock(&tracing_cpumask_update_lock);
3307
3308         local_irq_disable();
3309         arch_spin_lock(&ftrace_max_lock);
3310         for_each_tracing_cpu(cpu) {
3311                 /*
3312                  * Increase/decrease the disabled counter if we are
3313                  * about to flip a bit in the cpumask:
3314                  */
3315                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3316                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3317                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3318                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3319                 }
3320                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3321                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3322                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3323                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3324                 }
3325         }
3326         arch_spin_unlock(&ftrace_max_lock);
3327         local_irq_enable();
3328
3329         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3330
3331         mutex_unlock(&tracing_cpumask_update_lock);
3332         free_cpumask_var(tracing_cpumask_new);
3333
3334         return count;
3335
3336 err_unlock:
3337         free_cpumask_var(tracing_cpumask_new);
3338
3339         return err;
3340 }
3341
3342 static const struct file_operations tracing_cpumask_fops = {
3343         .open           = tracing_open_generic_tr,
3344         .read           = tracing_cpumask_read,
3345         .write          = tracing_cpumask_write,
3346         .release        = tracing_release_generic_tr,
3347         .llseek         = generic_file_llseek,
3348 };
3349
3350 static int tracing_trace_options_show(struct seq_file *m, void *v)
3351 {
3352         struct tracer_opt *trace_opts;
3353         struct trace_array *tr = m->private;
3354         u32 tracer_flags;
3355         int i;
3356
3357         mutex_lock(&trace_types_lock);
3358         tracer_flags = tr->current_trace->flags->val;
3359         trace_opts = tr->current_trace->flags->opts;
3360
3361         for (i = 0; trace_options[i]; i++) {
3362                 if (trace_flags & (1 << i))
3363                         seq_printf(m, "%s\n", trace_options[i]);
3364                 else
3365                         seq_printf(m, "no%s\n", trace_options[i]);
3366         }
3367
3368         for (i = 0; trace_opts[i].name; i++) {
3369                 if (tracer_flags & trace_opts[i].bit)
3370                         seq_printf(m, "%s\n", trace_opts[i].name);
3371                 else
3372                         seq_printf(m, "no%s\n", trace_opts[i].name);
3373         }
3374         mutex_unlock(&trace_types_lock);
3375
3376         return 0;
3377 }
3378
3379 static int __set_tracer_option(struct trace_array *tr,
3380                                struct tracer_flags *tracer_flags,
3381                                struct tracer_opt *opts, int neg)
3382 {
3383         struct tracer *trace = tr->current_trace;
3384         int ret;
3385
3386         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3387         if (ret)
3388                 return ret;
3389
3390         if (neg)
3391                 tracer_flags->val &= ~opts->bit;
3392         else
3393                 tracer_flags->val |= opts->bit;
3394         return 0;
3395 }
3396
3397 /* Try to assign a tracer specific option */
3398 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3399 {
3400         struct tracer *trace = tr->current_trace;
3401         struct tracer_flags *tracer_flags = trace->flags;
3402         struct tracer_opt *opts = NULL;
3403         int i;
3404
3405         for (i = 0; tracer_flags->opts[i].name; i++) {
3406                 opts = &tracer_flags->opts[i];
3407
3408                 if (strcmp(cmp, opts->name) == 0)
3409                         return __set_tracer_option(tr, trace->flags, opts, neg);
3410         }
3411
3412         return -EINVAL;
3413 }
3414
3415 /* Some tracers require overwrite to stay enabled */
3416 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3417 {
3418         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3419                 return -1;
3420
3421         return 0;
3422 }
3423
3424 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3425 {
3426         /* do nothing if flag is already set */
3427         if (!!(trace_flags & mask) == !!enabled)
3428                 return 0;
3429
3430         /* Give the tracer a chance to approve the change */
3431         if (tr->current_trace->flag_changed)
3432                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3433                         return -EINVAL;
3434
3435         if (enabled)
3436                 trace_flags |= mask;
3437         else
3438                 trace_flags &= ~mask;
3439
3440         if (mask == TRACE_ITER_RECORD_CMD)
3441                 trace_event_enable_cmd_record(enabled);
3442
3443         if (mask == TRACE_ITER_OVERWRITE) {
3444                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3445 #ifdef CONFIG_TRACER_MAX_TRACE
3446                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3447 #endif
3448         }
3449
3450         if (mask == TRACE_ITER_PRINTK)
3451                 trace_printk_start_stop_comm(enabled);
3452
3453         return 0;
3454 }
3455
3456 static int trace_set_options(struct trace_array *tr, char *option)
3457 {
3458         char *cmp;
3459         int neg = 0;
3460         int ret = -ENODEV;
3461         int i;
3462
3463         cmp = strstrip(option);
3464
3465         if (strncmp(cmp, "no", 2) == 0) {
3466                 neg = 1;
3467                 cmp += 2;
3468         }
3469
3470         mutex_lock(&trace_types_lock);
3471
3472         for (i = 0; trace_options[i]; i++) {
3473                 if (strcmp(cmp, trace_options[i]) == 0) {
3474                         ret = set_tracer_flag(tr, 1 << i, !neg);
3475                         break;
3476                 }
3477         }
3478
3479         /* If no option could be set, test the specific tracer options */
3480         if (!trace_options[i])
3481                 ret = set_tracer_option(tr, cmp, neg);
3482
3483         mutex_unlock(&trace_types_lock);
3484
3485         return ret;
3486 }
3487
3488 static ssize_t
3489 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3490                         size_t cnt, loff_t *ppos)
3491 {
3492         struct seq_file *m = filp->private_data;
3493         struct trace_array *tr = m->private;
3494         char buf[64];
3495         int ret;
3496
3497         if (cnt >= sizeof(buf))
3498                 return -EINVAL;
3499
3500         if (copy_from_user(&buf, ubuf, cnt))
3501                 return -EFAULT;
3502
3503         buf[cnt] = 0;
3504
3505         ret = trace_set_options(tr, buf);
3506         if (ret < 0)
3507                 return ret;
3508
3509         *ppos += cnt;
3510
3511         return cnt;
3512 }
3513
3514 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3515 {
3516         struct trace_array *tr = inode->i_private;
3517         int ret;
3518
3519         if (tracing_disabled)
3520                 return -ENODEV;
3521
3522         if (trace_array_get(tr) < 0)
3523                 return -ENODEV;
3524
3525         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3526         if (ret < 0)
3527                 trace_array_put(tr);
3528
3529         return ret;
3530 }
3531
3532 static const struct file_operations tracing_iter_fops = {
3533         .open           = tracing_trace_options_open,
3534         .read           = seq_read,
3535         .llseek         = seq_lseek,
3536         .release        = tracing_single_release_tr,
3537         .write          = tracing_trace_options_write,
3538 };
3539
3540 static const char readme_msg[] =
3541         "tracing mini-HOWTO:\n\n"
3542         "# echo 0 > tracing_on : quick way to disable tracing\n"
3543         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3544         " Important files:\n"
3545         "  trace\t\t\t- The static contents of the buffer\n"
3546         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3547         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3548         "  current_tracer\t- function and latency tracers\n"
3549         "  available_tracers\t- list of configured tracers for current_tracer\n"
3550         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3551         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3552         "  trace_clock\t\t-change the clock used to order events\n"
3553         "       local:   Per cpu clock but may not be synced across CPUs\n"
3554         "      global:   Synced across CPUs but slows tracing down.\n"
3555         "     counter:   Not a clock, but just an increment\n"
3556         "      uptime:   Jiffy counter from time of boot\n"
3557         "        perf:   Same clock that perf events use\n"
3558 #ifdef CONFIG_X86_64
3559         "     x86-tsc:   TSC cycle counter\n"
3560 #endif
3561         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3562         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3563         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3564         "\t\t\t  Remove sub-buffer with rmdir\n"
3565         "  trace_options\t\t- Set format or modify how tracing happens\n"
3566         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3567         "\t\t\t  option name\n"
3568 #ifdef CONFIG_DYNAMIC_FTRACE
3569         "\n  available_filter_functions - list of functions that can be filtered on\n"
3570         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3571         "\t\t\t  functions\n"
3572         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3573         "\t     modules: Can select a group via module\n"
3574         "\t      Format: :mod:<module-name>\n"
3575         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3576         "\t    triggers: a command to perform when function is hit\n"
3577         "\t      Format: <function>:<trigger>[:count]\n"
3578         "\t     trigger: traceon, traceoff\n"
3579         "\t\t      enable_event:<system>:<event>\n"
3580         "\t\t      disable_event:<system>:<event>\n"
3581 #ifdef CONFIG_STACKTRACE
3582         "\t\t      stacktrace\n"
3583 #endif
3584 #ifdef CONFIG_TRACER_SNAPSHOT
3585         "\t\t      snapshot\n"
3586 #endif
3587         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3588         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3589         "\t     The first one will disable tracing every time do_fault is hit\n"
3590         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3591         "\t       The first time do trap is hit and it disables tracing, the\n"
3592         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3593         "\t       the counter will not decrement. It only decrements when the\n"
3594         "\t       trigger did work\n"
3595         "\t     To remove trigger without count:\n"
3596         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3597         "\t     To remove trigger with a count:\n"
3598         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3599         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3600         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3601         "\t    modules: Can select a group via module command :mod:\n"
3602         "\t    Does not accept triggers\n"
3603 #endif /* CONFIG_DYNAMIC_FTRACE */
3604 #ifdef CONFIG_FUNCTION_TRACER
3605         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3606         "\t\t    (function)\n"
3607 #endif
3608 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3609         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3610         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3611 #endif
3612 #ifdef CONFIG_TRACER_SNAPSHOT
3613         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3614         "\t\t\t  snapshot buffer. Read the contents for more\n"
3615         "\t\t\t  information\n"
3616 #endif
3617 #ifdef CONFIG_STACK_TRACER
3618         "  stack_trace\t\t- Shows the max stack trace when active\n"
3619         "  stack_max_size\t- Shows current max stack size that was traced\n"
3620         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3621         "\t\t\t  new trace)\n"
3622 #ifdef CONFIG_DYNAMIC_FTRACE
3623         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3624         "\t\t\t  traces\n"
3625 #endif
3626 #endif /* CONFIG_STACK_TRACER */
3627         "  events/\t\t- Directory containing all trace event subsystems:\n"
3628         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3629         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3630         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3631         "\t\t\t  events\n"
3632         "      filter\t\t- If set, only events passing filter are traced\n"
3633         "  events/<system>/<event>/\t- Directory containing control files for\n"
3634         "\t\t\t  <event>:\n"
3635         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3636         "      filter\t\t- If set, only events passing filter are traced\n"
3637         "      trigger\t\t- If set, a command to perform when event is hit\n"
3638         "\t    Format: <trigger>[:count][if <filter>]\n"
3639         "\t   trigger: traceon, traceoff\n"
3640         "\t            enable_event:<system>:<event>\n"
3641         "\t            disable_event:<system>:<event>\n"
3642 #ifdef CONFIG_STACKTRACE
3643         "\t\t    stacktrace\n"
3644 #endif
3645 #ifdef CONFIG_TRACER_SNAPSHOT
3646         "\t\t    snapshot\n"
3647 #endif
3648         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3649         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3650         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3651         "\t                  events/block/block_unplug/trigger\n"
3652         "\t   The first disables tracing every time block_unplug is hit.\n"
3653         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3654         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3655         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3656         "\t   Like function triggers, the counter is only decremented if it\n"
3657         "\t    enabled or disabled tracing.\n"
3658         "\t   To remove a trigger without a count:\n"
3659         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3660         "\t   To remove a trigger with a count:\n"
3661         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3662         "\t   Filters can be ignored when removing a trigger.\n"
3663 ;
3664
3665 static ssize_t
3666 tracing_readme_read(struct file *filp, char __user *ubuf,
3667                        size_t cnt, loff_t *ppos)
3668 {
3669         return simple_read_from_buffer(ubuf, cnt, ppos,
3670                                         readme_msg, strlen(readme_msg));
3671 }
3672
3673 static const struct file_operations tracing_readme_fops = {
3674         .open           = tracing_open_generic,
3675         .read           = tracing_readme_read,
3676         .llseek         = generic_file_llseek,
3677 };
3678
3679 static ssize_t
3680 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3681                                 size_t cnt, loff_t *ppos)
3682 {
3683         char *buf_comm;
3684         char *file_buf;
3685         char *buf;
3686         int len = 0;
3687         int pid;
3688         int i;
3689
3690         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3691         if (!file_buf)
3692                 return -ENOMEM;
3693
3694         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3695         if (!buf_comm) {
3696                 kfree(file_buf);
3697                 return -ENOMEM;
3698         }
3699
3700         buf = file_buf;
3701
3702         for (i = 0; i < SAVED_CMDLINES; i++) {
3703                 int r;
3704
3705                 pid = map_cmdline_to_pid[i];
3706                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3707                         continue;
3708
3709                 trace_find_cmdline(pid, buf_comm);
3710                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3711                 buf += r;
3712                 len += r;
3713         }
3714
3715         len = simple_read_from_buffer(ubuf, cnt, ppos,
3716                                       file_buf, len);
3717
3718         kfree(file_buf);
3719         kfree(buf_comm);
3720
3721         return len;
3722 }
3723
3724 static const struct file_operations tracing_saved_cmdlines_fops = {
3725     .open       = tracing_open_generic,
3726     .read       = tracing_saved_cmdlines_read,
3727     .llseek     = generic_file_llseek,
3728 };
3729
3730 static ssize_t
3731 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3732                        size_t cnt, loff_t *ppos)
3733 {
3734         struct trace_array *tr = filp->private_data;
3735         char buf[MAX_TRACER_SIZE+2];
3736         int r;
3737
3738         mutex_lock(&trace_types_lock);
3739         r = sprintf(buf, "%s\n", tr->current_trace->name);
3740         mutex_unlock(&trace_types_lock);
3741
3742         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3743 }
3744
3745 int tracer_init(struct tracer *t, struct trace_array *tr)
3746 {
3747         tracing_reset_online_cpus(&tr->trace_buffer);
3748         return t->init(tr);
3749 }
3750
3751 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3752 {
3753         int cpu;
3754
3755         for_each_tracing_cpu(cpu)
3756                 per_cpu_ptr(buf->data, cpu)->entries = val;
3757 }
3758
3759 #ifdef CONFIG_TRACER_MAX_TRACE
3760 /* resize @tr's buffer to the size of @size_tr's entries */
3761 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3762                                         struct trace_buffer *size_buf, int cpu_id)
3763 {
3764         int cpu, ret = 0;
3765
3766         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3767                 for_each_tracing_cpu(cpu) {
3768                         ret = ring_buffer_resize(trace_buf->buffer,
3769                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3770                         if (ret < 0)
3771                                 break;
3772                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3773                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3774                 }
3775         } else {
3776                 ret = ring_buffer_resize(trace_buf->buffer,
3777                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3778                 if (ret == 0)
3779                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3780                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3781         }
3782
3783         return ret;
3784 }
3785 #endif /* CONFIG_TRACER_MAX_TRACE */
3786
3787 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3788                                         unsigned long size, int cpu)
3789 {
3790         int ret;
3791
3792         /*
3793          * If kernel or user changes the size of the ring buffer
3794          * we use the size that was given, and we can forget about
3795          * expanding it later.
3796          */
3797         ring_buffer_expanded = true;
3798
3799         /* May be called before buffers are initialized */
3800         if (!tr->trace_buffer.buffer)
3801                 return 0;
3802
3803         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3804         if (ret < 0)
3805                 return ret;
3806
3807 #ifdef CONFIG_TRACER_MAX_TRACE
3808         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3809             !tr->current_trace->use_max_tr)
3810                 goto out;
3811
3812         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3813         if (ret < 0) {
3814                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3815                                                      &tr->trace_buffer, cpu);
3816                 if (r < 0) {
3817                         /*
3818                          * AARGH! We are left with different
3819                          * size max buffer!!!!
3820                          * The max buffer is our "snapshot" buffer.
3821                          * When a tracer needs a snapshot (one of the
3822                          * latency tracers), it swaps the max buffer
3823                          * with the saved snap shot. We succeeded to
3824                          * update the size of the main buffer, but failed to
3825                          * update the size of the max buffer. But when we tried
3826                          * to reset the main buffer to the original size, we
3827                          * failed there too. This is very unlikely to
3828                          * happen, but if it does, warn and kill all
3829                          * tracing.
3830                          */
3831                         WARN_ON(1);
3832                         tracing_disabled = 1;
3833                 }
3834                 return ret;
3835         }
3836
3837         if (cpu == RING_BUFFER_ALL_CPUS)
3838                 set_buffer_entries(&tr->max_buffer, size);
3839         else
3840                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3841
3842  out:
3843 #endif /* CONFIG_TRACER_MAX_TRACE */
3844
3845         if (cpu == RING_BUFFER_ALL_CPUS)
3846                 set_buffer_entries(&tr->trace_buffer, size);
3847         else
3848                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3849
3850         return ret;
3851 }
3852
3853 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3854                                           unsigned long size, int cpu_id)
3855 {
3856         int ret = size;
3857
3858         mutex_lock(&trace_types_lock);
3859
3860         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3861                 /* make sure, this cpu is enabled in the mask */
3862                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3863                         ret = -EINVAL;
3864                         goto out;
3865                 }
3866         }
3867
3868         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3869         if (ret < 0)
3870                 ret = -ENOMEM;
3871
3872 out:
3873         mutex_unlock(&trace_types_lock);
3874
3875         return ret;
3876 }
3877
3878
3879 /**
3880  * tracing_update_buffers - used by tracing facility to expand ring buffers
3881  *
3882  * To save on memory when the tracing is never used on a system with it
3883  * configured in. The ring buffers are set to a minimum size. But once
3884  * a user starts to use the tracing facility, then they need to grow
3885  * to their default size.
3886  *
3887  * This function is to be called when a tracer is about to be used.
3888  */
3889 int tracing_update_buffers(void)
3890 {
3891         int ret = 0;
3892
3893         mutex_lock(&trace_types_lock);
3894         if (!ring_buffer_expanded)
3895                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3896                                                 RING_BUFFER_ALL_CPUS);
3897         mutex_unlock(&trace_types_lock);
3898
3899         return ret;
3900 }
3901
3902 struct trace_option_dentry;
3903
3904 static struct trace_option_dentry *
3905 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3906
3907 static void
3908 destroy_trace_option_files(struct trace_option_dentry *topts);
3909
3910 /*
3911  * Used to clear out the tracer before deletion of an instance.
3912  * Must have trace_types_lock held.
3913  */
3914 static void tracing_set_nop(struct trace_array *tr)
3915 {
3916         if (tr->current_trace == &nop_trace)
3917                 return;
3918         
3919         tr->current_trace->enabled--;
3920
3921         if (tr->current_trace->reset)
3922                 tr->current_trace->reset(tr);
3923
3924         tr->current_trace = &nop_trace;
3925 }
3926
3927 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
3928 {
3929         static struct trace_option_dentry *topts;
3930         struct tracer *t;
3931 #ifdef CONFIG_TRACER_MAX_TRACE
3932         bool had_max_tr;
3933 #endif
3934         int ret = 0;
3935
3936         mutex_lock(&trace_types_lock);
3937
3938         if (!ring_buffer_expanded) {
3939                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3940                                                 RING_BUFFER_ALL_CPUS);
3941                 if (ret < 0)
3942                         goto out;
3943                 ret = 0;
3944         }
3945
3946         for (t = trace_types; t; t = t->next) {
3947                 if (strcmp(t->name, buf) == 0)
3948                         break;
3949         }
3950         if (!t) {
3951                 ret = -EINVAL;
3952                 goto out;
3953         }
3954         if (t == tr->current_trace)
3955                 goto out;
3956
3957         /* Some tracers are only allowed for the top level buffer */
3958         if (!trace_ok_for_array(t, tr)) {
3959                 ret = -EINVAL;
3960                 goto out;
3961         }
3962
3963         trace_branch_disable();
3964
3965         tr->current_trace->enabled--;
3966
3967         if (tr->current_trace->reset)
3968                 tr->current_trace->reset(tr);
3969
3970         /* Current trace needs to be nop_trace before synchronize_sched */
3971         tr->current_trace = &nop_trace;
3972
3973 #ifdef CONFIG_TRACER_MAX_TRACE
3974         had_max_tr = tr->allocated_snapshot;
3975
3976         if (had_max_tr && !t->use_max_tr) {
3977                 /*
3978                  * We need to make sure that the update_max_tr sees that
3979                  * current_trace changed to nop_trace to keep it from
3980                  * swapping the buffers after we resize it.
3981                  * The update_max_tr is called from interrupts disabled
3982                  * so a synchronized_sched() is sufficient.
3983                  */
3984                 synchronize_sched();
3985                 free_snapshot(tr);
3986         }
3987 #endif
3988         /* Currently, only the top instance has options */
3989         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
3990                 destroy_trace_option_files(topts);
3991                 topts = create_trace_option_files(tr, t);
3992         }
3993
3994 #ifdef CONFIG_TRACER_MAX_TRACE
3995         if (t->use_max_tr && !had_max_tr) {
3996                 ret = alloc_snapshot(tr);
3997                 if (ret < 0)
3998                         goto out;
3999         }
4000 #endif
4001
4002         if (t->init) {
4003                 ret = tracer_init(t, tr);
4004                 if (ret)
4005                         goto out;
4006         }
4007
4008         tr->current_trace = t;
4009         tr->current_trace->enabled++;
4010         trace_branch_enable(tr);
4011  out:
4012         mutex_unlock(&trace_types_lock);
4013
4014         return ret;
4015 }
4016
4017 static ssize_t
4018 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4019                         size_t cnt, loff_t *ppos)
4020 {
4021         struct trace_array *tr = filp->private_data;
4022         char buf[MAX_TRACER_SIZE+1];
4023         int i;
4024         size_t ret;
4025         int err;
4026
4027         ret = cnt;
4028
4029         if (cnt > MAX_TRACER_SIZE)
4030                 cnt = MAX_TRACER_SIZE;
4031
4032         if (copy_from_user(&buf, ubuf, cnt))
4033                 return -EFAULT;
4034
4035         buf[cnt] = 0;
4036
4037         /* strip ending whitespace. */
4038         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4039                 buf[i] = 0;
4040
4041         err = tracing_set_tracer(tr, buf);
4042         if (err)
4043                 return err;
4044
4045         *ppos += ret;
4046
4047         return ret;
4048 }
4049
4050 static ssize_t
4051 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4052                      size_t cnt, loff_t *ppos)
4053 {
4054         unsigned long *ptr = filp->private_data;
4055         char buf[64];
4056         int r;
4057
4058         r = snprintf(buf, sizeof(buf), "%ld\n",
4059                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4060         if (r > sizeof(buf))
4061                 r = sizeof(buf);
4062         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4063 }
4064
4065 static ssize_t
4066 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4067                       size_t cnt, loff_t *ppos)
4068 {
4069         unsigned long *ptr = filp->private_data;
4070         unsigned long val;
4071         int ret;
4072
4073         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4074         if (ret)
4075                 return ret;
4076
4077         *ptr = val * 1000;
4078
4079         return cnt;
4080 }
4081
4082 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4083 {
4084         struct trace_array *tr = inode->i_private;
4085         struct trace_iterator *iter;
4086         int ret = 0;
4087
4088         if (tracing_disabled)
4089                 return -ENODEV;
4090
4091         if (trace_array_get(tr) < 0)
4092                 return -ENODEV;
4093
4094         mutex_lock(&trace_types_lock);
4095
4096         /* create a buffer to store the information to pass to userspace */
4097         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4098         if (!iter) {
4099                 ret = -ENOMEM;
4100                 __trace_array_put(tr);
4101                 goto out;
4102         }
4103
4104         /*
4105          * We make a copy of the current tracer to avoid concurrent
4106          * changes on it while we are reading.
4107          */
4108         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4109         if (!iter->trace) {
4110                 ret = -ENOMEM;
4111                 goto fail;
4112         }
4113         *iter->trace = *tr->current_trace;
4114
4115         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4116                 ret = -ENOMEM;
4117                 goto fail;
4118         }
4119
4120         /* trace pipe does not show start of buffer */
4121         cpumask_setall(iter->started);
4122
4123         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4124                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4125
4126         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4127         if (trace_clocks[tr->clock_id].in_ns)
4128                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4129
4130         iter->tr = tr;
4131         iter->trace_buffer = &tr->trace_buffer;
4132         iter->cpu_file = tracing_get_cpu(inode);
4133         mutex_init(&iter->mutex);
4134         filp->private_data = iter;
4135
4136         if (iter->trace->pipe_open)
4137                 iter->trace->pipe_open(iter);
4138
4139         nonseekable_open(inode, filp);
4140 out:
4141         mutex_unlock(&trace_types_lock);
4142         return ret;
4143
4144 fail:
4145         kfree(iter->trace);
4146         kfree(iter);
4147         __trace_array_put(tr);
4148         mutex_unlock(&trace_types_lock);
4149         return ret;
4150 }
4151
4152 static int tracing_release_pipe(struct inode *inode, struct file *file)
4153 {
4154         struct trace_iterator *iter = file->private_data;
4155         struct trace_array *tr = inode->i_private;
4156
4157         mutex_lock(&trace_types_lock);
4158
4159         if (iter->trace->pipe_close)
4160                 iter->trace->pipe_close(iter);
4161
4162         mutex_unlock(&trace_types_lock);
4163
4164         free_cpumask_var(iter->started);
4165         mutex_destroy(&iter->mutex);
4166         kfree(iter->trace);
4167         kfree(iter);
4168
4169         trace_array_put(tr);
4170
4171         return 0;
4172 }
4173
4174 static unsigned int
4175 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4176 {
4177         /* Iterators are static, they should be filled or empty */
4178         if (trace_buffer_iter(iter, iter->cpu_file))
4179                 return POLLIN | POLLRDNORM;
4180
4181         if (trace_flags & TRACE_ITER_BLOCK)
4182                 /*
4183                  * Always select as readable when in blocking mode
4184                  */
4185                 return POLLIN | POLLRDNORM;
4186         else
4187                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4188                                              filp, poll_table);
4189 }
4190
4191 static unsigned int
4192 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4193 {
4194         struct trace_iterator *iter = filp->private_data;
4195
4196         return trace_poll(iter, filp, poll_table);
4197 }
4198
4199 /*
4200  * This is a make-shift waitqueue.
4201  * A tracer might use this callback on some rare cases:
4202  *
4203  *  1) the current tracer might hold the runqueue lock when it wakes up
4204  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4205  *  2) the function tracers, trace all functions, we don't want
4206  *     the overhead of calling wake_up and friends
4207  *     (and tracing them too)
4208  *
4209  *     Anyway, this is really very primitive wakeup.
4210  */
4211 void poll_wait_pipe(struct trace_iterator *iter)
4212 {
4213         set_current_state(TASK_INTERRUPTIBLE);
4214         /* sleep for 100 msecs, and try again. */
4215         schedule_timeout(HZ / 10);
4216 }
4217
4218 /* Must be called with trace_types_lock mutex held. */
4219 static int tracing_wait_pipe(struct file *filp)
4220 {
4221         struct trace_iterator *iter = filp->private_data;
4222
4223         while (trace_empty(iter)) {
4224
4225                 if ((filp->f_flags & O_NONBLOCK)) {
4226                         return -EAGAIN;
4227                 }
4228
4229                 mutex_unlock(&iter->mutex);
4230
4231                 iter->trace->wait_pipe(iter);
4232
4233                 mutex_lock(&iter->mutex);
4234
4235                 if (signal_pending(current))
4236                         return -EINTR;
4237
4238                 /*
4239                  * We block until we read something and tracing is disabled.
4240                  * We still block if tracing is disabled, but we have never
4241                  * read anything. This allows a user to cat this file, and
4242                  * then enable tracing. But after we have read something,
4243                  * we give an EOF when tracing is again disabled.
4244                  *
4245                  * iter->pos will be 0 if we haven't read anything.
4246                  */
4247                 if (!tracing_is_on() && iter->pos)
4248                         break;
4249         }
4250
4251         return 1;
4252 }
4253
4254 /*
4255  * Consumer reader.
4256  */
4257 static ssize_t
4258 tracing_read_pipe(struct file *filp, char __user *ubuf,
4259                   size_t cnt, loff_t *ppos)
4260 {
4261         struct trace_iterator *iter = filp->private_data;
4262         struct trace_array *tr = iter->tr;
4263         ssize_t sret;
4264
4265         /* return any leftover data */
4266         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4267         if (sret != -EBUSY)
4268                 return sret;
4269
4270         trace_seq_init(&iter->seq);
4271
4272         /* copy the tracer to avoid using a global lock all around */
4273         mutex_lock(&trace_types_lock);
4274         if (unlikely(iter->trace->name != tr->current_trace->name))
4275                 *iter->trace = *tr->current_trace;
4276         mutex_unlock(&trace_types_lock);
4277
4278         /*
4279          * Avoid more than one consumer on a single file descriptor
4280          * This is just a matter of traces coherency, the ring buffer itself
4281          * is protected.
4282          */
4283         mutex_lock(&iter->mutex);
4284         if (iter->trace->read) {
4285                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4286                 if (sret)
4287                         goto out;
4288         }
4289
4290 waitagain:
4291         sret = tracing_wait_pipe(filp);
4292         if (sret <= 0)
4293                 goto out;
4294
4295         /* stop when tracing is finished */
4296         if (trace_empty(iter)) {
4297                 sret = 0;
4298                 goto out;
4299         }
4300
4301         if (cnt >= PAGE_SIZE)
4302                 cnt = PAGE_SIZE - 1;
4303
4304         /* reset all but tr, trace, and overruns */
4305         memset(&iter->seq, 0,
4306                sizeof(struct trace_iterator) -
4307                offsetof(struct trace_iterator, seq));
4308         cpumask_clear(iter->started);
4309         iter->pos = -1;
4310
4311         trace_event_read_lock();
4312         trace_access_lock(iter->cpu_file);
4313         while (trace_find_next_entry_inc(iter) != NULL) {
4314                 enum print_line_t ret;
4315                 int len = iter->seq.len;
4316
4317                 ret = print_trace_line(iter);
4318                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4319                         /* don't print partial lines */
4320                         iter->seq.len = len;
4321                         break;
4322                 }
4323                 if (ret != TRACE_TYPE_NO_CONSUME)
4324                         trace_consume(iter);
4325
4326                 if (iter->seq.len >= cnt)
4327                         break;
4328
4329                 /*
4330                  * Setting the full flag means we reached the trace_seq buffer
4331                  * size and we should leave by partial output condition above.
4332                  * One of the trace_seq_* functions is not used properly.
4333                  */
4334                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4335                           iter->ent->type);
4336         }
4337         trace_access_unlock(iter->cpu_file);
4338         trace_event_read_unlock();
4339
4340         /* Now copy what we have to the user */
4341         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4342         if (iter->seq.readpos >= iter->seq.len)
4343                 trace_seq_init(&iter->seq);
4344
4345         /*
4346          * If there was nothing to send to user, in spite of consuming trace
4347          * entries, go back to wait for more entries.
4348          */
4349         if (sret == -EBUSY)
4350                 goto waitagain;
4351
4352 out:
4353         mutex_unlock(&iter->mutex);
4354
4355         return sret;
4356 }
4357
4358 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4359                                      unsigned int idx)
4360 {
4361         __free_page(spd->pages[idx]);
4362 }
4363
4364 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4365         .can_merge              = 0,
4366         .map                    = generic_pipe_buf_map,
4367         .unmap                  = generic_pipe_buf_unmap,
4368         .confirm                = generic_pipe_buf_confirm,
4369         .release                = generic_pipe_buf_release,
4370         .steal                  = generic_pipe_buf_steal,
4371         .get                    = generic_pipe_buf_get,
4372 };
4373
4374 static size_t
4375 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4376 {
4377         size_t count;
4378         int ret;
4379
4380         /* Seq buffer is page-sized, exactly what we need. */
4381         for (;;) {
4382                 count = iter->seq.len;
4383                 ret = print_trace_line(iter);
4384                 count = iter->seq.len - count;
4385                 if (rem < count) {
4386                         rem = 0;
4387                         iter->seq.len -= count;
4388                         break;
4389                 }
4390                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4391                         iter->seq.len -= count;
4392                         break;
4393                 }
4394
4395                 if (ret != TRACE_TYPE_NO_CONSUME)
4396                         trace_consume(iter);
4397                 rem -= count;
4398                 if (!trace_find_next_entry_inc(iter))   {
4399                         rem = 0;
4400                         iter->ent = NULL;
4401                         break;
4402                 }
4403         }
4404
4405         return rem;
4406 }
4407
4408 static ssize_t tracing_splice_read_pipe(struct file *filp,
4409                                         loff_t *ppos,
4410                                         struct pipe_inode_info *pipe,
4411                                         size_t len,
4412                                         unsigned int flags)
4413 {
4414         struct page *pages_def[PIPE_DEF_BUFFERS];
4415         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4416         struct trace_iterator *iter = filp->private_data;
4417         struct splice_pipe_desc spd = {
4418                 .pages          = pages_def,
4419                 .partial        = partial_def,
4420                 .nr_pages       = 0, /* This gets updated below. */
4421                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4422                 .flags          = flags,
4423                 .ops            = &tracing_pipe_buf_ops,
4424                 .spd_release    = tracing_spd_release_pipe,
4425         };
4426         struct trace_array *tr = iter->tr;
4427         ssize_t ret;
4428         size_t rem;
4429         unsigned int i;
4430
4431         if (splice_grow_spd(pipe, &spd))
4432                 return -ENOMEM;
4433
4434         /* copy the tracer to avoid using a global lock all around */
4435         mutex_lock(&trace_types_lock);
4436         if (unlikely(iter->trace->name != tr->current_trace->name))
4437                 *iter->trace = *tr->current_trace;
4438         mutex_unlock(&trace_types_lock);
4439
4440         mutex_lock(&iter->mutex);
4441
4442         if (iter->trace->splice_read) {
4443                 ret = iter->trace->splice_read(iter, filp,
4444                                                ppos, pipe, len, flags);
4445                 if (ret)
4446                         goto out_err;
4447         }
4448
4449         ret = tracing_wait_pipe(filp);
4450         if (ret <= 0)
4451                 goto out_err;
4452
4453         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4454                 ret = -EFAULT;
4455                 goto out_err;
4456         }
4457
4458         trace_event_read_lock();
4459         trace_access_lock(iter->cpu_file);
4460
4461         /* Fill as many pages as possible. */
4462         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4463                 spd.pages[i] = alloc_page(GFP_KERNEL);
4464                 if (!spd.pages[i])
4465                         break;
4466
4467                 rem = tracing_fill_pipe_page(rem, iter);
4468
4469                 /* Copy the data into the page, so we can start over. */
4470                 ret = trace_seq_to_buffer(&iter->seq,
4471                                           page_address(spd.pages[i]),
4472                                           iter->seq.len);
4473                 if (ret < 0) {
4474                         __free_page(spd.pages[i]);
4475                         break;
4476                 }
4477                 spd.partial[i].offset = 0;
4478                 spd.partial[i].len = iter->seq.len;
4479
4480                 trace_seq_init(&iter->seq);
4481         }
4482
4483         trace_access_unlock(iter->cpu_file);
4484         trace_event_read_unlock();
4485         mutex_unlock(&iter->mutex);
4486
4487         spd.nr_pages = i;
4488
4489         ret = splice_to_pipe(pipe, &spd);
4490 out:
4491         splice_shrink_spd(&spd);
4492         return ret;
4493
4494 out_err:
4495         mutex_unlock(&iter->mutex);
4496         goto out;
4497 }
4498
4499 static ssize_t
4500 tracing_entries_read(struct file *filp, char __user *ubuf,
4501                      size_t cnt, loff_t *ppos)
4502 {
4503         struct inode *inode = file_inode(filp);
4504         struct trace_array *tr = inode->i_private;
4505         int cpu = tracing_get_cpu(inode);
4506         char buf[64];
4507         int r = 0;
4508         ssize_t ret;
4509
4510         mutex_lock(&trace_types_lock);
4511
4512         if (cpu == RING_BUFFER_ALL_CPUS) {
4513                 int cpu, buf_size_same;
4514                 unsigned long size;
4515
4516                 size = 0;
4517                 buf_size_same = 1;
4518                 /* check if all cpu sizes are same */
4519                 for_each_tracing_cpu(cpu) {
4520                         /* fill in the size from first enabled cpu */
4521                         if (size == 0)
4522                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4523                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4524                                 buf_size_same = 0;
4525                                 break;
4526                         }
4527                 }
4528
4529                 if (buf_size_same) {
4530                         if (!ring_buffer_expanded)
4531                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4532                                             size >> 10,
4533                                             trace_buf_size >> 10);
4534                         else
4535                                 r = sprintf(buf, "%lu\n", size >> 10);
4536                 } else
4537                         r = sprintf(buf, "X\n");
4538         } else
4539                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4540
4541         mutex_unlock(&trace_types_lock);
4542
4543         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4544         return ret;
4545 }
4546
4547 static ssize_t
4548 tracing_entries_write(struct file *filp, const char __user *ubuf,
4549                       size_t cnt, loff_t *ppos)
4550 {
4551         struct inode *inode = file_inode(filp);
4552         struct trace_array *tr = inode->i_private;
4553         unsigned long val;
4554         int ret;
4555
4556         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4557         if (ret)
4558                 return ret;
4559
4560         /* must have at least 1 entry */
4561         if (!val)
4562                 return -EINVAL;
4563
4564         /* value is in KB */
4565         val <<= 10;
4566         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4567         if (ret < 0)
4568                 return ret;
4569
4570         *ppos += cnt;
4571
4572         return cnt;
4573 }
4574
4575 static ssize_t
4576 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4577                                 size_t cnt, loff_t *ppos)
4578 {
4579         struct trace_array *tr = filp->private_data;
4580         char buf[64];
4581         int r, cpu;
4582         unsigned long size = 0, expanded_size = 0;
4583
4584         mutex_lock(&trace_types_lock);
4585         for_each_tracing_cpu(cpu) {
4586                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4587                 if (!ring_buffer_expanded)
4588                         expanded_size += trace_buf_size >> 10;
4589         }
4590         if (ring_buffer_expanded)
4591                 r = sprintf(buf, "%lu\n", size);
4592         else
4593                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4594         mutex_unlock(&trace_types_lock);
4595
4596         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4597 }
4598
4599 static ssize_t
4600 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4601                           size_t cnt, loff_t *ppos)
4602 {
4603         /*
4604          * There is no need to read what the user has written, this function
4605          * is just to make sure that there is no error when "echo" is used
4606          */
4607
4608         *ppos += cnt;
4609
4610         return cnt;
4611 }
4612
4613 static int
4614 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4615 {
4616         struct trace_array *tr = inode->i_private;
4617
4618         /* disable tracing ? */
4619         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4620                 tracer_tracing_off(tr);
4621         /* resize the ring buffer to 0 */
4622         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4623
4624         trace_array_put(tr);
4625
4626         return 0;
4627 }
4628
4629 static ssize_t
4630 tracing_mark_write(struct file *filp, const char __user *ubuf,
4631                                         size_t cnt, loff_t *fpos)
4632 {
4633         unsigned long addr = (unsigned long)ubuf;
4634         struct trace_array *tr = filp->private_data;
4635         struct ring_buffer_event *event;
4636         struct ring_buffer *buffer;
4637         struct print_entry *entry;
4638         unsigned long irq_flags;
4639         struct page *pages[2];
4640         void *map_page[2];
4641         int nr_pages = 1;
4642         ssize_t written;
4643         int offset;
4644         int size;
4645         int len;
4646         int ret;
4647         int i;
4648
4649         if (tracing_disabled)
4650                 return -EINVAL;
4651
4652         if (!(trace_flags & TRACE_ITER_MARKERS))
4653                 return -EINVAL;
4654
4655         if (cnt > TRACE_BUF_SIZE)
4656                 cnt = TRACE_BUF_SIZE;
4657
4658         /*
4659          * Userspace is injecting traces into the kernel trace buffer.
4660          * We want to be as non intrusive as possible.
4661          * To do so, we do not want to allocate any special buffers
4662          * or take any locks, but instead write the userspace data
4663          * straight into the ring buffer.
4664          *
4665          * First we need to pin the userspace buffer into memory,
4666          * which, most likely it is, because it just referenced it.
4667          * But there's no guarantee that it is. By using get_user_pages_fast()
4668          * and kmap_atomic/kunmap_atomic() we can get access to the
4669          * pages directly. We then write the data directly into the
4670          * ring buffer.
4671          */
4672         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4673
4674         /* check if we cross pages */
4675         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4676                 nr_pages = 2;
4677
4678         offset = addr & (PAGE_SIZE - 1);
4679         addr &= PAGE_MASK;
4680
4681         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4682         if (ret < nr_pages) {
4683                 while (--ret >= 0)
4684                         put_page(pages[ret]);
4685                 written = -EFAULT;
4686                 goto out;
4687         }
4688
4689         for (i = 0; i < nr_pages; i++)
4690                 map_page[i] = kmap_atomic(pages[i]);
4691
4692         local_save_flags(irq_flags);
4693         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4694         buffer = tr->trace_buffer.buffer;
4695         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4696                                           irq_flags, preempt_count());
4697         if (!event) {
4698                 /* Ring buffer disabled, return as if not open for write */
4699                 written = -EBADF;
4700                 goto out_unlock;
4701         }
4702
4703         entry = ring_buffer_event_data(event);
4704         entry->ip = _THIS_IP_;
4705
4706         if (nr_pages == 2) {
4707                 len = PAGE_SIZE - offset;
4708                 memcpy(&entry->buf, map_page[0] + offset, len);
4709                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4710         } else
4711                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4712
4713         if (entry->buf[cnt - 1] != '\n') {
4714                 entry->buf[cnt] = '\n';
4715                 entry->buf[cnt + 1] = '\0';
4716         } else
4717                 entry->buf[cnt] = '\0';
4718
4719         __buffer_unlock_commit(buffer, event);
4720
4721         written = cnt;
4722
4723         *fpos += written;
4724
4725  out_unlock:
4726         for (i = 0; i < nr_pages; i++){
4727                 kunmap_atomic(map_page[i]);
4728                 put_page(pages[i]);
4729         }
4730  out:
4731         return written;
4732 }
4733
4734 static int tracing_clock_show(struct seq_file *m, void *v)
4735 {
4736         struct trace_array *tr = m->private;
4737         int i;
4738
4739         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4740                 seq_printf(m,
4741                         "%s%s%s%s", i ? " " : "",
4742                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4743                         i == tr->clock_id ? "]" : "");
4744         seq_putc(m, '\n');
4745
4746         return 0;
4747 }
4748
4749 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4750                                    size_t cnt, loff_t *fpos)
4751 {
4752         struct seq_file *m = filp->private_data;
4753         struct trace_array *tr = m->private;
4754         char buf[64];
4755         const char *clockstr;
4756         int i;
4757
4758         if (cnt >= sizeof(buf))
4759                 return -EINVAL;
4760
4761         if (copy_from_user(&buf, ubuf, cnt))
4762                 return -EFAULT;
4763
4764         buf[cnt] = 0;
4765
4766         clockstr = strstrip(buf);
4767
4768         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4769                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4770                         break;
4771         }
4772         if (i == ARRAY_SIZE(trace_clocks))
4773                 return -EINVAL;
4774
4775         mutex_lock(&trace_types_lock);
4776
4777         tr->clock_id = i;
4778
4779         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4780
4781         /*
4782          * New clock may not be consistent with the previous clock.
4783          * Reset the buffer so that it doesn't have incomparable timestamps.
4784          */
4785         tracing_reset_online_cpus(&tr->trace_buffer);
4786
4787 #ifdef CONFIG_TRACER_MAX_TRACE
4788         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4789                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4790         tracing_reset_online_cpus(&tr->max_buffer);
4791 #endif
4792
4793         mutex_unlock(&trace_types_lock);
4794
4795         *fpos += cnt;
4796
4797         return cnt;
4798 }
4799
4800 static int tracing_clock_open(struct inode *inode, struct file *file)
4801 {
4802         struct trace_array *tr = inode->i_private;
4803         int ret;
4804
4805         if (tracing_disabled)
4806                 return -ENODEV;
4807
4808         if (trace_array_get(tr))
4809                 return -ENODEV;
4810
4811         ret = single_open(file, tracing_clock_show, inode->i_private);
4812         if (ret < 0)
4813                 trace_array_put(tr);
4814
4815         return ret;
4816 }
4817
4818 struct ftrace_buffer_info {
4819         struct trace_iterator   iter;
4820         void                    *spare;
4821         unsigned int            read;
4822 };
4823
4824 #ifdef CONFIG_TRACER_SNAPSHOT
4825 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4826 {
4827         struct trace_array *tr = inode->i_private;
4828         struct trace_iterator *iter;
4829         struct seq_file *m;
4830         int ret = 0;
4831
4832         if (trace_array_get(tr) < 0)
4833                 return -ENODEV;
4834
4835         if (file->f_mode & FMODE_READ) {
4836                 iter = __tracing_open(inode, file, true);
4837                 if (IS_ERR(iter))
4838                         ret = PTR_ERR(iter);
4839         } else {
4840                 /* Writes still need the seq_file to hold the private data */
4841                 ret = -ENOMEM;
4842                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4843                 if (!m)
4844                         goto out;
4845                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4846                 if (!iter) {
4847                         kfree(m);
4848                         goto out;
4849                 }
4850                 ret = 0;
4851
4852                 iter->tr = tr;
4853                 iter->trace_buffer = &tr->max_buffer;
4854                 iter->cpu_file = tracing_get_cpu(inode);
4855                 m->private = iter;
4856                 file->private_data = m;
4857         }
4858 out:
4859         if (ret < 0)
4860                 trace_array_put(tr);
4861
4862         return ret;
4863 }
4864
4865 static ssize_t
4866 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4867                        loff_t *ppos)
4868 {
4869         struct seq_file *m = filp->private_data;
4870         struct trace_iterator *iter = m->private;
4871         struct trace_array *tr = iter->tr;
4872         unsigned long val;
4873         int ret;
4874
4875         ret = tracing_update_buffers();
4876         if (ret < 0)
4877                 return ret;
4878
4879         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4880         if (ret)
4881                 return ret;
4882
4883         mutex_lock(&trace_types_lock);
4884
4885         if (tr->current_trace->use_max_tr) {
4886                 ret = -EBUSY;
4887                 goto out;
4888         }
4889
4890         switch (val) {
4891         case 0:
4892                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4893                         ret = -EINVAL;
4894                         break;
4895                 }
4896                 if (tr->allocated_snapshot)
4897                         free_snapshot(tr);
4898                 break;
4899         case 1:
4900 /* Only allow per-cpu swap if the ring buffer supports it */
4901 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4902                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4903                         ret = -EINVAL;
4904                         break;
4905                 }
4906 #endif
4907                 if (!tr->allocated_snapshot) {
4908                         ret = alloc_snapshot(tr);
4909                         if (ret < 0)
4910                                 break;
4911                 }
4912                 local_irq_disable();
4913                 /* Now, we're going to swap */
4914                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4915                         update_max_tr(tr, current, smp_processor_id());
4916                 else
4917                         update_max_tr_single(tr, current, iter->cpu_file);
4918                 local_irq_enable();
4919                 break;
4920         default:
4921                 if (tr->allocated_snapshot) {
4922                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4923                                 tracing_reset_online_cpus(&tr->max_buffer);
4924                         else
4925                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4926                 }
4927                 break;
4928         }
4929
4930         if (ret >= 0) {
4931                 *ppos += cnt;
4932                 ret = cnt;
4933         }
4934 out:
4935         mutex_unlock(&trace_types_lock);
4936         return ret;
4937 }
4938
4939 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4940 {
4941         struct seq_file *m = file->private_data;
4942         int ret;
4943
4944         ret = tracing_release(inode, file);
4945
4946         if (file->f_mode & FMODE_READ)
4947                 return ret;
4948
4949         /* If write only, the seq_file is just a stub */
4950         if (m)
4951                 kfree(m->private);
4952         kfree(m);
4953
4954         return 0;
4955 }
4956
4957 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4958 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4959                                     size_t count, loff_t *ppos);
4960 static int tracing_buffers_release(struct inode *inode, struct file *file);
4961 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4962                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4963
4964 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4965 {
4966         struct ftrace_buffer_info *info;
4967         int ret;
4968
4969         ret = tracing_buffers_open(inode, filp);
4970         if (ret < 0)
4971                 return ret;
4972
4973         info = filp->private_data;
4974
4975         if (info->iter.trace->use_max_tr) {
4976                 tracing_buffers_release(inode, filp);
4977                 return -EBUSY;
4978         }
4979
4980         info->iter.snapshot = true;
4981         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4982
4983         return ret;
4984 }
4985
4986 #endif /* CONFIG_TRACER_SNAPSHOT */
4987
4988
4989 static const struct file_operations tracing_max_lat_fops = {
4990         .open           = tracing_open_generic,
4991         .read           = tracing_max_lat_read,
4992         .write          = tracing_max_lat_write,
4993         .llseek         = generic_file_llseek,
4994 };
4995
4996 static const struct file_operations set_tracer_fops = {
4997         .open           = tracing_open_generic,
4998         .read           = tracing_set_trace_read,
4999         .write          = tracing_set_trace_write,
5000         .llseek         = generic_file_llseek,
5001 };
5002
5003 static const struct file_operations tracing_pipe_fops = {
5004         .open           = tracing_open_pipe,
5005         .poll           = tracing_poll_pipe,
5006         .read           = tracing_read_pipe,
5007         .splice_read    = tracing_splice_read_pipe,
5008         .release        = tracing_release_pipe,
5009         .llseek         = no_llseek,
5010 };
5011
5012 static const struct file_operations tracing_entries_fops = {
5013         .open           = tracing_open_generic_tr,
5014         .read           = tracing_entries_read,
5015         .write          = tracing_entries_write,
5016         .llseek         = generic_file_llseek,
5017         .release        = tracing_release_generic_tr,
5018 };
5019
5020 static const struct file_operations tracing_total_entries_fops = {
5021         .open           = tracing_open_generic_tr,
5022         .read           = tracing_total_entries_read,
5023         .llseek         = generic_file_llseek,
5024         .release        = tracing_release_generic_tr,
5025 };
5026
5027 static const struct file_operations tracing_free_buffer_fops = {
5028         .open           = tracing_open_generic_tr,
5029         .write          = tracing_free_buffer_write,
5030         .release        = tracing_free_buffer_release,
5031 };
5032
5033 static const struct file_operations tracing_mark_fops = {
5034         .open           = tracing_open_generic_tr,
5035         .write          = tracing_mark_write,
5036         .llseek         = generic_file_llseek,
5037         .release        = tracing_release_generic_tr,
5038 };
5039
5040 static const struct file_operations trace_clock_fops = {
5041         .open           = tracing_clock_open,
5042         .read           = seq_read,
5043         .llseek         = seq_lseek,
5044         .release        = tracing_single_release_tr,
5045         .write          = tracing_clock_write,
5046 };
5047
5048 #ifdef CONFIG_TRACER_SNAPSHOT
5049 static const struct file_operations snapshot_fops = {
5050         .open           = tracing_snapshot_open,
5051         .read           = seq_read,
5052         .write          = tracing_snapshot_write,
5053         .llseek         = tracing_lseek,
5054         .release        = tracing_snapshot_release,
5055 };
5056
5057 static const struct file_operations snapshot_raw_fops = {
5058         .open           = snapshot_raw_open,
5059         .read           = tracing_buffers_read,
5060         .release        = tracing_buffers_release,
5061         .splice_read    = tracing_buffers_splice_read,
5062         .llseek         = no_llseek,
5063 };
5064
5065 #endif /* CONFIG_TRACER_SNAPSHOT */
5066
5067 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5068 {
5069         struct trace_array *tr = inode->i_private;
5070         struct ftrace_buffer_info *info;
5071         int ret;
5072
5073         if (tracing_disabled)
5074                 return -ENODEV;
5075
5076         if (trace_array_get(tr) < 0)
5077                 return -ENODEV;
5078
5079         info = kzalloc(sizeof(*info), GFP_KERNEL);
5080         if (!info) {
5081                 trace_array_put(tr);
5082                 return -ENOMEM;
5083         }
5084
5085         mutex_lock(&trace_types_lock);
5086
5087         info->iter.tr           = tr;
5088         info->iter.cpu_file     = tracing_get_cpu(inode);
5089         info->iter.trace        = tr->current_trace;
5090         info->iter.trace_buffer = &tr->trace_buffer;
5091         info->spare             = NULL;
5092         /* Force reading ring buffer for first read */
5093         info->read              = (unsigned int)-1;
5094
5095         filp->private_data = info;
5096
5097         mutex_unlock(&trace_types_lock);
5098
5099         ret = nonseekable_open(inode, filp);
5100         if (ret < 0)
5101                 trace_array_put(tr);
5102
5103         return ret;
5104 }
5105
5106 static unsigned int
5107 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5108 {
5109         struct ftrace_buffer_info *info = filp->private_data;
5110         struct trace_iterator *iter = &info->iter;
5111
5112         return trace_poll(iter, filp, poll_table);
5113 }
5114
5115 static ssize_t
5116 tracing_buffers_read(struct file *filp, char __user *ubuf,
5117                      size_t count, loff_t *ppos)
5118 {
5119         struct ftrace_buffer_info *info = filp->private_data;
5120         struct trace_iterator *iter = &info->iter;
5121         ssize_t ret;
5122         ssize_t size;
5123
5124         if (!count)
5125                 return 0;
5126
5127         mutex_lock(&trace_types_lock);
5128
5129 #ifdef CONFIG_TRACER_MAX_TRACE
5130         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5131                 size = -EBUSY;
5132                 goto out_unlock;
5133         }
5134 #endif
5135
5136         if (!info->spare)
5137                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5138                                                           iter->cpu_file);
5139         size = -ENOMEM;
5140         if (!info->spare)
5141                 goto out_unlock;
5142
5143         /* Do we have previous read data to read? */
5144         if (info->read < PAGE_SIZE)
5145                 goto read;
5146
5147  again:
5148         trace_access_lock(iter->cpu_file);
5149         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5150                                     &info->spare,
5151                                     count,
5152                                     iter->cpu_file, 0);
5153         trace_access_unlock(iter->cpu_file);
5154
5155         if (ret < 0) {
5156                 if (trace_empty(iter)) {
5157                         if ((filp->f_flags & O_NONBLOCK)) {
5158                                 size = -EAGAIN;
5159                                 goto out_unlock;
5160                         }
5161                         mutex_unlock(&trace_types_lock);
5162                         iter->trace->wait_pipe(iter);
5163                         mutex_lock(&trace_types_lock);
5164                         if (signal_pending(current)) {
5165                                 size = -EINTR;
5166                                 goto out_unlock;
5167                         }
5168                         goto again;
5169                 }
5170                 size = 0;
5171                 goto out_unlock;
5172         }
5173
5174         info->read = 0;
5175  read:
5176         size = PAGE_SIZE - info->read;
5177         if (size > count)
5178                 size = count;
5179
5180         ret = copy_to_user(ubuf, info->spare + info->read, size);
5181         if (ret == size) {
5182                 size = -EFAULT;
5183                 goto out_unlock;
5184         }
5185         size -= ret;
5186
5187         *ppos += size;
5188         info->read += size;
5189
5190  out_unlock:
5191         mutex_unlock(&trace_types_lock);
5192
5193         return size;
5194 }
5195
5196 static int tracing_buffers_release(struct inode *inode, struct file *file)
5197 {
5198         struct ftrace_buffer_info *info = file->private_data;
5199         struct trace_iterator *iter = &info->iter;
5200
5201         mutex_lock(&trace_types_lock);
5202
5203         __trace_array_put(iter->tr);
5204
5205         if (info->spare)
5206                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5207         kfree(info);
5208
5209         mutex_unlock(&trace_types_lock);
5210
5211         return 0;
5212 }
5213
5214 struct buffer_ref {
5215         struct ring_buffer      *buffer;
5216         void                    *page;
5217         int                     ref;
5218 };
5219
5220 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5221                                     struct pipe_buffer *buf)
5222 {
5223         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5224
5225         if (--ref->ref)
5226                 return;
5227
5228         ring_buffer_free_read_page(ref->buffer, ref->page);
5229         kfree(ref);
5230         buf->private = 0;
5231 }
5232
5233 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5234                                 struct pipe_buffer *buf)
5235 {
5236         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5237
5238         ref->ref++;
5239 }
5240
5241 /* Pipe buffer operations for a buffer. */
5242 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5243         .can_merge              = 0,
5244         .map                    = generic_pipe_buf_map,
5245         .unmap                  = generic_pipe_buf_unmap,
5246         .confirm                = generic_pipe_buf_confirm,
5247         .release                = buffer_pipe_buf_release,
5248         .steal                  = generic_pipe_buf_steal,
5249         .get                    = buffer_pipe_buf_get,
5250 };
5251
5252 /*
5253  * Callback from splice_to_pipe(), if we need to release some pages
5254  * at the end of the spd in case we error'ed out in filling the pipe.
5255  */
5256 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5257 {
5258         struct buffer_ref *ref =
5259                 (struct buffer_ref *)spd->partial[i].private;
5260
5261         if (--ref->ref)
5262                 return;
5263
5264         ring_buffer_free_read_page(ref->buffer, ref->page);
5265         kfree(ref);
5266         spd->partial[i].private = 0;
5267 }
5268
5269 static ssize_t
5270 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5271                             struct pipe_inode_info *pipe, size_t len,
5272                             unsigned int flags)
5273 {
5274         struct ftrace_buffer_info *info = file->private_data;
5275         struct trace_iterator *iter = &info->iter;
5276         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5277         struct page *pages_def[PIPE_DEF_BUFFERS];
5278         struct splice_pipe_desc spd = {
5279                 .pages          = pages_def,
5280                 .partial        = partial_def,
5281                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5282                 .flags          = flags,
5283                 .ops            = &buffer_pipe_buf_ops,
5284                 .spd_release    = buffer_spd_release,
5285         };
5286         struct buffer_ref *ref;
5287         int entries, size, i;
5288         ssize_t ret;
5289
5290         mutex_lock(&trace_types_lock);
5291
5292 #ifdef CONFIG_TRACER_MAX_TRACE
5293         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5294                 ret = -EBUSY;
5295                 goto out;
5296         }
5297 #endif
5298
5299         if (splice_grow_spd(pipe, &spd)) {
5300                 ret = -ENOMEM;
5301                 goto out;
5302         }
5303
5304         if (*ppos & (PAGE_SIZE - 1)) {
5305                 ret = -EINVAL;
5306                 goto out;
5307         }
5308
5309         if (len & (PAGE_SIZE - 1)) {
5310                 if (len < PAGE_SIZE) {
5311                         ret = -EINVAL;
5312                         goto out;
5313                 }
5314                 len &= PAGE_MASK;
5315         }
5316
5317  again:
5318         trace_access_lock(iter->cpu_file);
5319         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5320
5321         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5322                 struct page *page;
5323                 int r;
5324
5325                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5326                 if (!ref)
5327                         break;
5328
5329                 ref->ref = 1;
5330                 ref->buffer = iter->trace_buffer->buffer;
5331                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5332                 if (!ref->page) {
5333                         kfree(ref);
5334                         break;
5335                 }
5336
5337                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5338                                           len, iter->cpu_file, 1);
5339                 if (r < 0) {
5340                         ring_buffer_free_read_page(ref->buffer, ref->page);
5341                         kfree(ref);
5342                         break;
5343                 }
5344
5345                 /*
5346                  * zero out any left over data, this is going to
5347                  * user land.
5348                  */
5349                 size = ring_buffer_page_len(ref->page);
5350                 if (size < PAGE_SIZE)
5351                         memset(ref->page + size, 0, PAGE_SIZE - size);
5352
5353                 page = virt_to_page(ref->page);
5354
5355                 spd.pages[i] = page;
5356                 spd.partial[i].len = PAGE_SIZE;
5357                 spd.partial[i].offset = 0;
5358                 spd.partial[i].private = (unsigned long)ref;
5359                 spd.nr_pages++;
5360                 *ppos += PAGE_SIZE;
5361
5362                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5363         }
5364
5365         trace_access_unlock(iter->cpu_file);
5366         spd.nr_pages = i;
5367
5368         /* did we read anything? */
5369         if (!spd.nr_pages) {
5370                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5371                         ret = -EAGAIN;
5372                         goto out;
5373                 }
5374                 mutex_unlock(&trace_types_lock);
5375                 iter->trace->wait_pipe(iter);
5376                 mutex_lock(&trace_types_lock);
5377                 if (signal_pending(current)) {
5378                         ret = -EINTR;
5379                         goto out;
5380                 }
5381                 goto again;
5382         }
5383
5384         ret = splice_to_pipe(pipe, &spd);
5385         splice_shrink_spd(&spd);
5386 out:
5387         mutex_unlock(&trace_types_lock);
5388
5389         return ret;
5390 }
5391
5392 static const struct file_operations tracing_buffers_fops = {
5393         .open           = tracing_buffers_open,
5394         .read           = tracing_buffers_read,
5395         .poll           = tracing_buffers_poll,
5396         .release        = tracing_buffers_release,
5397         .splice_read    = tracing_buffers_splice_read,
5398         .llseek         = no_llseek,
5399 };
5400
5401 static ssize_t
5402 tracing_stats_read(struct file *filp, char __user *ubuf,
5403                    size_t count, loff_t *ppos)
5404 {
5405         struct inode *inode = file_inode(filp);
5406         struct trace_array *tr = inode->i_private;
5407         struct trace_buffer *trace_buf = &tr->trace_buffer;
5408         int cpu = tracing_get_cpu(inode);
5409         struct trace_seq *s;
5410         unsigned long cnt;
5411         unsigned long long t;
5412         unsigned long usec_rem;
5413
5414         s = kmalloc(sizeof(*s), GFP_KERNEL);
5415         if (!s)
5416                 return -ENOMEM;
5417
5418         trace_seq_init(s);
5419
5420         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5421         trace_seq_printf(s, "entries: %ld\n", cnt);
5422
5423         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5424         trace_seq_printf(s, "overrun: %ld\n", cnt);
5425
5426         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5427         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5428
5429         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5430         trace_seq_printf(s, "bytes: %ld\n", cnt);
5431
5432         if (trace_clocks[tr->clock_id].in_ns) {
5433                 /* local or global for trace_clock */
5434                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5435                 usec_rem = do_div(t, USEC_PER_SEC);
5436                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5437                                                                 t, usec_rem);
5438
5439                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5440                 usec_rem = do_div(t, USEC_PER_SEC);
5441                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5442         } else {
5443                 /* counter or tsc mode for trace_clock */
5444                 trace_seq_printf(s, "oldest event ts: %llu\n",
5445                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5446
5447                 trace_seq_printf(s, "now ts: %llu\n",
5448                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5449         }
5450
5451         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5452         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5453
5454         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5455         trace_seq_printf(s, "read events: %ld\n", cnt);
5456
5457         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5458
5459         kfree(s);
5460
5461         return count;
5462 }
5463
5464 static const struct file_operations tracing_stats_fops = {
5465         .open           = tracing_open_generic_tr,
5466         .read           = tracing_stats_read,
5467         .llseek         = generic_file_llseek,
5468         .release        = tracing_release_generic_tr,
5469 };
5470
5471 #ifdef CONFIG_DYNAMIC_FTRACE
5472
5473 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5474 {
5475         return 0;
5476 }
5477
5478 static ssize_t
5479 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5480                   size_t cnt, loff_t *ppos)
5481 {
5482         static char ftrace_dyn_info_buffer[1024];
5483         static DEFINE_MUTEX(dyn_info_mutex);
5484         unsigned long *p = filp->private_data;
5485         char *buf = ftrace_dyn_info_buffer;
5486         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5487         int r;
5488
5489         mutex_lock(&dyn_info_mutex);
5490         r = sprintf(buf, "%ld ", *p);
5491
5492         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5493         buf[r++] = '\n';
5494
5495         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5496
5497         mutex_unlock(&dyn_info_mutex);
5498
5499         return r;
5500 }
5501
5502 static const struct file_operations tracing_dyn_info_fops = {
5503         .open           = tracing_open_generic,
5504         .read           = tracing_read_dyn_info,
5505         .llseek         = generic_file_llseek,
5506 };
5507 #endif /* CONFIG_DYNAMIC_FTRACE */
5508
5509 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5510 static void
5511 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5512 {
5513         tracing_snapshot();
5514 }
5515
5516 static void
5517 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5518 {
5519         unsigned long *count = (long *)data;
5520
5521         if (!*count)
5522                 return;
5523
5524         if (*count != -1)
5525                 (*count)--;
5526
5527         tracing_snapshot();
5528 }
5529
5530 static int
5531 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5532                       struct ftrace_probe_ops *ops, void *data)
5533 {
5534         long count = (long)data;
5535
5536         seq_printf(m, "%ps:", (void *)ip);
5537
5538         seq_printf(m, "snapshot");
5539
5540         if (count == -1)
5541                 seq_printf(m, ":unlimited\n");
5542         else
5543                 seq_printf(m, ":count=%ld\n", count);
5544
5545         return 0;
5546 }
5547
5548 static struct ftrace_probe_ops snapshot_probe_ops = {
5549         .func                   = ftrace_snapshot,
5550         .print                  = ftrace_snapshot_print,
5551 };
5552
5553 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5554         .func                   = ftrace_count_snapshot,
5555         .print                  = ftrace_snapshot_print,
5556 };
5557
5558 static int
5559 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5560                                char *glob, char *cmd, char *param, int enable)
5561 {
5562         struct ftrace_probe_ops *ops;
5563         void *count = (void *)-1;
5564         char *number;
5565         int ret;
5566
5567         /* hash funcs only work with set_ftrace_filter */
5568         if (!enable)
5569                 return -EINVAL;
5570
5571         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5572
5573         if (glob[0] == '!') {
5574                 unregister_ftrace_function_probe_func(glob+1, ops);
5575                 return 0;
5576         }
5577
5578         if (!param)
5579                 goto out_reg;
5580
5581         number = strsep(&param, ":");
5582
5583         if (!strlen(number))
5584                 goto out_reg;
5585
5586         /*
5587          * We use the callback data field (which is a pointer)
5588          * as our counter.
5589          */
5590         ret = kstrtoul(number, 0, (unsigned long *)&count);
5591         if (ret)
5592                 return ret;
5593
5594  out_reg:
5595         ret = register_ftrace_function_probe(glob, ops, count);
5596
5597         if (ret >= 0)
5598                 alloc_snapshot(&global_trace);
5599
5600         return ret < 0 ? ret : 0;
5601 }
5602
5603 static struct ftrace_func_command ftrace_snapshot_cmd = {
5604         .name                   = "snapshot",
5605         .func                   = ftrace_trace_snapshot_callback,
5606 };
5607
5608 static __init int register_snapshot_cmd(void)
5609 {
5610         return register_ftrace_command(&ftrace_snapshot_cmd);
5611 }
5612 #else
5613 static inline __init int register_snapshot_cmd(void) { return 0; }
5614 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5615
5616 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5617 {
5618         if (tr->dir)
5619                 return tr->dir;
5620
5621         if (!debugfs_initialized())
5622                 return NULL;
5623
5624         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5625                 tr->dir = debugfs_create_dir("tracing", NULL);
5626
5627         if (!tr->dir)
5628                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5629
5630         return tr->dir;
5631 }
5632
5633 struct dentry *tracing_init_dentry(void)
5634 {
5635         return tracing_init_dentry_tr(&global_trace);
5636 }
5637
5638 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5639 {
5640         struct dentry *d_tracer;
5641
5642         if (tr->percpu_dir)
5643                 return tr->percpu_dir;
5644
5645         d_tracer = tracing_init_dentry_tr(tr);
5646         if (!d_tracer)
5647                 return NULL;
5648
5649         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5650
5651         WARN_ONCE(!tr->percpu_dir,
5652                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5653
5654         return tr->percpu_dir;
5655 }
5656
5657 static struct dentry *
5658 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5659                       void *data, long cpu, const struct file_operations *fops)
5660 {
5661         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5662
5663         if (ret) /* See tracing_get_cpu() */
5664                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5665         return ret;
5666 }
5667
5668 static void
5669 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5670 {
5671         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5672         struct dentry *d_cpu;
5673         char cpu_dir[30]; /* 30 characters should be more than enough */
5674
5675         if (!d_percpu)
5676                 return;
5677
5678         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5679         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5680         if (!d_cpu) {
5681                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5682                 return;
5683         }
5684
5685         /* per cpu trace_pipe */
5686         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5687                                 tr, cpu, &tracing_pipe_fops);
5688
5689         /* per cpu trace */
5690         trace_create_cpu_file("trace", 0644, d_cpu,
5691                                 tr, cpu, &tracing_fops);
5692
5693         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5694                                 tr, cpu, &tracing_buffers_fops);
5695
5696         trace_create_cpu_file("stats", 0444, d_cpu,
5697                                 tr, cpu, &tracing_stats_fops);
5698
5699         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5700                                 tr, cpu, &tracing_entries_fops);
5701
5702 #ifdef CONFIG_TRACER_SNAPSHOT
5703         trace_create_cpu_file("snapshot", 0644, d_cpu,
5704                                 tr, cpu, &snapshot_fops);
5705
5706         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5707                                 tr, cpu, &snapshot_raw_fops);
5708 #endif
5709 }
5710
5711 #ifdef CONFIG_FTRACE_SELFTEST
5712 /* Let selftest have access to static functions in this file */
5713 #include "trace_selftest.c"
5714 #endif
5715
5716 struct trace_option_dentry {
5717         struct tracer_opt               *opt;
5718         struct tracer_flags             *flags;
5719         struct trace_array              *tr;
5720         struct dentry                   *entry;
5721 };
5722
5723 static ssize_t
5724 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5725                         loff_t *ppos)
5726 {
5727         struct trace_option_dentry *topt = filp->private_data;
5728         char *buf;
5729
5730         if (topt->flags->val & topt->opt->bit)
5731                 buf = "1\n";
5732         else
5733                 buf = "0\n";
5734
5735         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5736 }
5737
5738 static ssize_t
5739 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5740                          loff_t *ppos)
5741 {
5742         struct trace_option_dentry *topt = filp->private_data;
5743         unsigned long val;
5744         int ret;
5745
5746         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5747         if (ret)
5748                 return ret;
5749
5750         if (val != 0 && val != 1)
5751                 return -EINVAL;
5752
5753         if (!!(topt->flags->val & topt->opt->bit) != val) {
5754                 mutex_lock(&trace_types_lock);
5755                 ret = __set_tracer_option(topt->tr, topt->flags,
5756                                           topt->opt, !val);
5757                 mutex_unlock(&trace_types_lock);
5758                 if (ret)
5759                         return ret;
5760         }
5761
5762         *ppos += cnt;
5763
5764         return cnt;
5765 }
5766
5767
5768 static const struct file_operations trace_options_fops = {
5769         .open = tracing_open_generic,
5770         .read = trace_options_read,
5771         .write = trace_options_write,
5772         .llseek = generic_file_llseek,
5773 };
5774
5775 static ssize_t
5776 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5777                         loff_t *ppos)
5778 {
5779         long index = (long)filp->private_data;
5780         char *buf;
5781
5782         if (trace_flags & (1 << index))
5783                 buf = "1\n";
5784         else
5785                 buf = "0\n";
5786
5787         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5788 }
5789
5790 static ssize_t
5791 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5792                          loff_t *ppos)
5793 {
5794         struct trace_array *tr = &global_trace;
5795         long index = (long)filp->private_data;
5796         unsigned long val;
5797         int ret;
5798
5799         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5800         if (ret)
5801                 return ret;
5802
5803         if (val != 0 && val != 1)
5804                 return -EINVAL;
5805
5806         mutex_lock(&trace_types_lock);
5807         ret = set_tracer_flag(tr, 1 << index, val);
5808         mutex_unlock(&trace_types_lock);
5809
5810         if (ret < 0)
5811                 return ret;
5812
5813         *ppos += cnt;
5814
5815         return cnt;
5816 }
5817
5818 static const struct file_operations trace_options_core_fops = {
5819         .open = tracing_open_generic,
5820         .read = trace_options_core_read,
5821         .write = trace_options_core_write,
5822         .llseek = generic_file_llseek,
5823 };
5824
5825 struct dentry *trace_create_file(const char *name,
5826                                  umode_t mode,
5827                                  struct dentry *parent,
5828                                  void *data,
5829                                  const struct file_operations *fops)
5830 {
5831         struct dentry *ret;
5832
5833         ret = debugfs_create_file(name, mode, parent, data, fops);
5834         if (!ret)
5835                 pr_warning("Could not create debugfs '%s' entry\n", name);
5836
5837         return ret;
5838 }
5839
5840
5841 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5842 {
5843         struct dentry *d_tracer;
5844
5845         if (tr->options)
5846                 return tr->options;
5847
5848         d_tracer = tracing_init_dentry_tr(tr);
5849         if (!d_tracer)
5850                 return NULL;
5851
5852         tr->options = debugfs_create_dir("options", d_tracer);
5853         if (!tr->options) {
5854                 pr_warning("Could not create debugfs directory 'options'\n");
5855                 return NULL;
5856         }
5857
5858         return tr->options;
5859 }
5860
5861 static void
5862 create_trace_option_file(struct trace_array *tr,
5863                          struct trace_option_dentry *topt,
5864                          struct tracer_flags *flags,
5865                          struct tracer_opt *opt)
5866 {
5867         struct dentry *t_options;
5868
5869         t_options = trace_options_init_dentry(tr);
5870         if (!t_options)
5871                 return;
5872
5873         topt->flags = flags;
5874         topt->opt = opt;
5875         topt->tr = tr;
5876
5877         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5878                                     &trace_options_fops);
5879
5880 }
5881
5882 static struct trace_option_dentry *
5883 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5884 {
5885         struct trace_option_dentry *topts;
5886         struct tracer_flags *flags;
5887         struct tracer_opt *opts;
5888         int cnt;
5889
5890         if (!tracer)
5891                 return NULL;
5892
5893         flags = tracer->flags;
5894
5895         if (!flags || !flags->opts)
5896                 return NULL;
5897
5898         opts = flags->opts;
5899
5900         for (cnt = 0; opts[cnt].name; cnt++)
5901                 ;
5902
5903         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5904         if (!topts)
5905                 return NULL;
5906
5907         for (cnt = 0; opts[cnt].name; cnt++)
5908                 create_trace_option_file(tr, &topts[cnt], flags,
5909                                          &opts[cnt]);
5910
5911         return topts;
5912 }
5913
5914 static void
5915 destroy_trace_option_files(struct trace_option_dentry *topts)
5916 {
5917         int cnt;
5918
5919         if (!topts)
5920                 return;
5921
5922         for (cnt = 0; topts[cnt].opt; cnt++) {
5923                 if (topts[cnt].entry)
5924                         debugfs_remove(topts[cnt].entry);
5925         }
5926
5927         kfree(topts);
5928 }
5929
5930 static struct dentry *
5931 create_trace_option_core_file(struct trace_array *tr,
5932                               const char *option, long index)
5933 {
5934         struct dentry *t_options;
5935
5936         t_options = trace_options_init_dentry(tr);
5937         if (!t_options)
5938                 return NULL;
5939
5940         return trace_create_file(option, 0644, t_options, (void *)index,
5941                                     &trace_options_core_fops);
5942 }
5943
5944 static __init void create_trace_options_dir(struct trace_array *tr)
5945 {
5946         struct dentry *t_options;
5947         int i;
5948
5949         t_options = trace_options_init_dentry(tr);
5950         if (!t_options)
5951                 return;
5952
5953         for (i = 0; trace_options[i]; i++)
5954                 create_trace_option_core_file(tr, trace_options[i], i);
5955 }
5956
5957 static ssize_t
5958 rb_simple_read(struct file *filp, char __user *ubuf,
5959                size_t cnt, loff_t *ppos)
5960 {
5961         struct trace_array *tr = filp->private_data;
5962         char buf[64];
5963         int r;
5964
5965         r = tracer_tracing_is_on(tr);
5966         r = sprintf(buf, "%d\n", r);
5967
5968         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5969 }
5970
5971 static ssize_t
5972 rb_simple_write(struct file *filp, const char __user *ubuf,
5973                 size_t cnt, loff_t *ppos)
5974 {
5975         struct trace_array *tr = filp->private_data;
5976         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5977         unsigned long val;
5978         int ret;
5979
5980         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5981         if (ret)
5982                 return ret;
5983
5984         if (buffer) {
5985                 mutex_lock(&trace_types_lock);
5986                 if (val) {
5987                         tracer_tracing_on(tr);
5988                         if (tr->current_trace->start)
5989                                 tr->current_trace->start(tr);
5990                 } else {
5991                         tracer_tracing_off(tr);
5992                         if (tr->current_trace->stop)
5993                                 tr->current_trace->stop(tr);
5994                 }
5995                 mutex_unlock(&trace_types_lock);
5996         }
5997
5998         (*ppos)++;
5999
6000         return cnt;
6001 }
6002
6003 static const struct file_operations rb_simple_fops = {
6004         .open           = tracing_open_generic_tr,
6005         .read           = rb_simple_read,
6006         .write          = rb_simple_write,
6007         .release        = tracing_release_generic_tr,
6008         .llseek         = default_llseek,
6009 };
6010
6011 struct dentry *trace_instance_dir;
6012
6013 static void
6014 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6015
6016 static int
6017 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6018 {
6019         enum ring_buffer_flags rb_flags;
6020
6021         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6022
6023         buf->tr = tr;
6024
6025         buf->buffer = ring_buffer_alloc(size, rb_flags);
6026         if (!buf->buffer)
6027                 return -ENOMEM;
6028
6029         buf->data = alloc_percpu(struct trace_array_cpu);
6030         if (!buf->data) {
6031                 ring_buffer_free(buf->buffer);
6032                 return -ENOMEM;
6033         }
6034
6035         /* Allocate the first page for all buffers */
6036         set_buffer_entries(&tr->trace_buffer,
6037                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6038
6039         return 0;
6040 }
6041
6042 static int allocate_trace_buffers(struct trace_array *tr, int size)
6043 {
6044         int ret;
6045
6046         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6047         if (ret)
6048                 return ret;
6049
6050 #ifdef CONFIG_TRACER_MAX_TRACE
6051         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6052                                     allocate_snapshot ? size : 1);
6053         if (WARN_ON(ret)) {
6054                 ring_buffer_free(tr->trace_buffer.buffer);
6055                 free_percpu(tr->trace_buffer.data);
6056                 return -ENOMEM;
6057         }
6058         tr->allocated_snapshot = allocate_snapshot;
6059
6060         /*
6061          * Only the top level trace array gets its snapshot allocated
6062          * from the kernel command line.
6063          */
6064         allocate_snapshot = false;
6065 #endif
6066         return 0;
6067 }
6068
6069 static int new_instance_create(const char *name)
6070 {
6071         struct trace_array *tr;
6072         int ret;
6073
6074         mutex_lock(&trace_types_lock);
6075
6076         ret = -EEXIST;
6077         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6078                 if (tr->name && strcmp(tr->name, name) == 0)
6079                         goto out_unlock;
6080         }
6081
6082         ret = -ENOMEM;
6083         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6084         if (!tr)
6085                 goto out_unlock;
6086
6087         tr->name = kstrdup(name, GFP_KERNEL);
6088         if (!tr->name)
6089                 goto out_free_tr;
6090
6091         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6092                 goto out_free_tr;
6093
6094         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6095
6096         raw_spin_lock_init(&tr->start_lock);
6097
6098         tr->current_trace = &nop_trace;
6099
6100         INIT_LIST_HEAD(&tr->systems);
6101         INIT_LIST_HEAD(&tr->events);
6102
6103         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6104                 goto out_free_tr;
6105
6106         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6107         if (!tr->dir)
6108                 goto out_free_tr;
6109
6110         ret = event_trace_add_tracer(tr->dir, tr);
6111         if (ret) {
6112                 debugfs_remove_recursive(tr->dir);
6113                 goto out_free_tr;
6114         }
6115
6116         init_tracer_debugfs(tr, tr->dir);
6117
6118         list_add(&tr->list, &ftrace_trace_arrays);
6119
6120         mutex_unlock(&trace_types_lock);
6121
6122         return 0;
6123
6124  out_free_tr:
6125         if (tr->trace_buffer.buffer)
6126                 ring_buffer_free(tr->trace_buffer.buffer);
6127         free_cpumask_var(tr->tracing_cpumask);
6128         kfree(tr->name);
6129         kfree(tr);
6130
6131  out_unlock:
6132         mutex_unlock(&trace_types_lock);
6133
6134         return ret;
6135
6136 }
6137
6138 static int instance_delete(const char *name)
6139 {
6140         struct trace_array *tr;
6141         int found = 0;
6142         int ret;
6143
6144         mutex_lock(&trace_types_lock);
6145
6146         ret = -ENODEV;
6147         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6148                 if (tr->name && strcmp(tr->name, name) == 0) {
6149                         found = 1;
6150                         break;
6151                 }
6152         }
6153         if (!found)
6154                 goto out_unlock;
6155
6156         ret = -EBUSY;
6157         if (tr->ref)
6158                 goto out_unlock;
6159
6160         list_del(&tr->list);
6161
6162         tracing_set_nop(tr);
6163         event_trace_del_tracer(tr);
6164         debugfs_remove_recursive(tr->dir);
6165         free_percpu(tr->trace_buffer.data);
6166         ring_buffer_free(tr->trace_buffer.buffer);
6167
6168         kfree(tr->name);
6169         kfree(tr);
6170
6171         ret = 0;
6172
6173  out_unlock:
6174         mutex_unlock(&trace_types_lock);
6175
6176         return ret;
6177 }
6178
6179 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6180 {
6181         struct dentry *parent;
6182         int ret;
6183
6184         /* Paranoid: Make sure the parent is the "instances" directory */
6185         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6186         if (WARN_ON_ONCE(parent != trace_instance_dir))
6187                 return -ENOENT;
6188
6189         /*
6190          * The inode mutex is locked, but debugfs_create_dir() will also
6191          * take the mutex. As the instances directory can not be destroyed
6192          * or changed in any other way, it is safe to unlock it, and
6193          * let the dentry try. If two users try to make the same dir at
6194          * the same time, then the new_instance_create() will determine the
6195          * winner.
6196          */
6197         mutex_unlock(&inode->i_mutex);
6198
6199         ret = new_instance_create(dentry->d_iname);
6200
6201         mutex_lock(&inode->i_mutex);
6202
6203         return ret;
6204 }
6205
6206 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6207 {
6208         struct dentry *parent;
6209         int ret;
6210
6211         /* Paranoid: Make sure the parent is the "instances" directory */
6212         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6213         if (WARN_ON_ONCE(parent != trace_instance_dir))
6214                 return -ENOENT;
6215
6216         /* The caller did a dget() on dentry */
6217         mutex_unlock(&dentry->d_inode->i_mutex);
6218
6219         /*
6220          * The inode mutex is locked, but debugfs_create_dir() will also
6221          * take the mutex. As the instances directory can not be destroyed
6222          * or changed in any other way, it is safe to unlock it, and
6223          * let the dentry try. If two users try to make the same dir at
6224          * the same time, then the instance_delete() will determine the
6225          * winner.
6226          */
6227         mutex_unlock(&inode->i_mutex);
6228
6229         ret = instance_delete(dentry->d_iname);
6230
6231         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6232         mutex_lock(&dentry->d_inode->i_mutex);
6233
6234         return ret;
6235 }
6236
6237 static const struct inode_operations instance_dir_inode_operations = {
6238         .lookup         = simple_lookup,
6239         .mkdir          = instance_mkdir,
6240         .rmdir          = instance_rmdir,
6241 };
6242
6243 static __init void create_trace_instances(struct dentry *d_tracer)
6244 {
6245         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6246         if (WARN_ON(!trace_instance_dir))
6247                 return;
6248
6249         /* Hijack the dir inode operations, to allow mkdir */
6250         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6251 }
6252
6253 static void
6254 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6255 {
6256         int cpu;
6257
6258         trace_create_file("available_tracers", 0444, d_tracer,
6259                         tr, &show_traces_fops);
6260
6261         trace_create_file("current_tracer", 0644, d_tracer,
6262                         tr, &set_tracer_fops);
6263
6264         trace_create_file("tracing_cpumask", 0644, d_tracer,
6265                           tr, &tracing_cpumask_fops);
6266
6267         trace_create_file("trace_options", 0644, d_tracer,
6268                           tr, &tracing_iter_fops);
6269
6270         trace_create_file("trace", 0644, d_tracer,
6271                           tr, &tracing_fops);
6272
6273         trace_create_file("trace_pipe", 0444, d_tracer,
6274                           tr, &tracing_pipe_fops);
6275
6276         trace_create_file("buffer_size_kb", 0644, d_tracer,
6277                           tr, &tracing_entries_fops);
6278
6279         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6280                           tr, &tracing_total_entries_fops);
6281
6282         trace_create_file("free_buffer", 0200, d_tracer,
6283                           tr, &tracing_free_buffer_fops);
6284
6285         trace_create_file("trace_marker", 0220, d_tracer,
6286                           tr, &tracing_mark_fops);
6287
6288         trace_create_file("trace_clock", 0644, d_tracer, tr,
6289                           &trace_clock_fops);
6290
6291         trace_create_file("tracing_on", 0644, d_tracer,
6292                           tr, &rb_simple_fops);
6293
6294 #ifdef CONFIG_TRACER_SNAPSHOT
6295         trace_create_file("snapshot", 0644, d_tracer,
6296                           tr, &snapshot_fops);
6297 #endif
6298
6299         for_each_tracing_cpu(cpu)
6300                 tracing_init_debugfs_percpu(tr, cpu);
6301
6302 }
6303
6304 static __init int tracer_init_debugfs(void)
6305 {
6306         struct dentry *d_tracer;
6307
6308         trace_access_lock_init();
6309
6310         d_tracer = tracing_init_dentry();
6311         if (!d_tracer)
6312                 return 0;
6313
6314         init_tracer_debugfs(&global_trace, d_tracer);
6315
6316 #ifdef CONFIG_TRACER_MAX_TRACE
6317         trace_create_file("tracing_max_latency", 0644, d_tracer,
6318                         &tracing_max_latency, &tracing_max_lat_fops);
6319 #endif
6320
6321         trace_create_file("tracing_thresh", 0644, d_tracer,
6322                         &tracing_thresh, &tracing_max_lat_fops);
6323
6324         trace_create_file("README", 0444, d_tracer,
6325                         NULL, &tracing_readme_fops);
6326
6327         trace_create_file("saved_cmdlines", 0444, d_tracer,
6328                         NULL, &tracing_saved_cmdlines_fops);
6329
6330 #ifdef CONFIG_DYNAMIC_FTRACE
6331         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6332                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6333 #endif
6334
6335         create_trace_instances(d_tracer);
6336
6337         create_trace_options_dir(&global_trace);
6338
6339         return 0;
6340 }
6341
6342 static int trace_panic_handler(struct notifier_block *this,
6343                                unsigned long event, void *unused)
6344 {
6345         if (ftrace_dump_on_oops)
6346                 ftrace_dump(ftrace_dump_on_oops);
6347         return NOTIFY_OK;
6348 }
6349
6350 static struct notifier_block trace_panic_notifier = {
6351         .notifier_call  = trace_panic_handler,
6352         .next           = NULL,
6353         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6354 };
6355
6356 static int trace_die_handler(struct notifier_block *self,
6357                              unsigned long val,
6358                              void *data)
6359 {
6360         switch (val) {
6361         case DIE_OOPS:
6362                 if (ftrace_dump_on_oops)
6363                         ftrace_dump(ftrace_dump_on_oops);
6364                 break;
6365         default:
6366                 break;
6367         }
6368         return NOTIFY_OK;
6369 }
6370
6371 static struct notifier_block trace_die_notifier = {
6372         .notifier_call = trace_die_handler,
6373         .priority = 200
6374 };
6375
6376 /*
6377  * printk is set to max of 1024, we really don't need it that big.
6378  * Nothing should be printing 1000 characters anyway.
6379  */
6380 #define TRACE_MAX_PRINT         1000
6381
6382 /*
6383  * Define here KERN_TRACE so that we have one place to modify
6384  * it if we decide to change what log level the ftrace dump
6385  * should be at.
6386  */
6387 #define KERN_TRACE              KERN_EMERG
6388
6389 void
6390 trace_printk_seq(struct trace_seq *s)
6391 {
6392         /* Probably should print a warning here. */
6393         if (s->len >= TRACE_MAX_PRINT)
6394                 s->len = TRACE_MAX_PRINT;
6395
6396         /* should be zero ended, but we are paranoid. */
6397         s->buffer[s->len] = 0;
6398
6399         printk(KERN_TRACE "%s", s->buffer);
6400
6401         trace_seq_init(s);
6402 }
6403
6404 void trace_init_global_iter(struct trace_iterator *iter)
6405 {
6406         iter->tr = &global_trace;
6407         iter->trace = iter->tr->current_trace;
6408         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6409         iter->trace_buffer = &global_trace.trace_buffer;
6410
6411         if (iter->trace && iter->trace->open)
6412                 iter->trace->open(iter);
6413
6414         /* Annotate start of buffers if we had overruns */
6415         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6416                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6417
6418         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6419         if (trace_clocks[iter->tr->clock_id].in_ns)
6420                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6421 }
6422
6423 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6424 {
6425         /* use static because iter can be a bit big for the stack */
6426         static struct trace_iterator iter;
6427         static atomic_t dump_running;
6428         unsigned int old_userobj;
6429         unsigned long flags;
6430         int cnt = 0, cpu;
6431
6432         /* Only allow one dump user at a time. */
6433         if (atomic_inc_return(&dump_running) != 1) {
6434                 atomic_dec(&dump_running);
6435                 return;
6436         }
6437
6438         /*
6439          * Always turn off tracing when we dump.
6440          * We don't need to show trace output of what happens
6441          * between multiple crashes.
6442          *
6443          * If the user does a sysrq-z, then they can re-enable
6444          * tracing with echo 1 > tracing_on.
6445          */
6446         tracing_off();
6447
6448         local_irq_save(flags);
6449
6450         /* Simulate the iterator */
6451         trace_init_global_iter(&iter);
6452
6453         for_each_tracing_cpu(cpu) {
6454                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6455         }
6456
6457         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6458
6459         /* don't look at user memory in panic mode */
6460         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6461
6462         switch (oops_dump_mode) {
6463         case DUMP_ALL:
6464                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6465                 break;
6466         case DUMP_ORIG:
6467                 iter.cpu_file = raw_smp_processor_id();
6468                 break;
6469         case DUMP_NONE:
6470                 goto out_enable;
6471         default:
6472                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6473                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6474         }
6475
6476         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6477
6478         /* Did function tracer already get disabled? */
6479         if (ftrace_is_dead()) {
6480                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6481                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6482         }
6483
6484         /*
6485          * We need to stop all tracing on all CPUS to read the
6486          * the next buffer. This is a bit expensive, but is
6487          * not done often. We fill all what we can read,
6488          * and then release the locks again.
6489          */
6490
6491         while (!trace_empty(&iter)) {
6492
6493                 if (!cnt)
6494                         printk(KERN_TRACE "---------------------------------\n");
6495
6496                 cnt++;
6497
6498                 /* reset all but tr, trace, and overruns */
6499                 memset(&iter.seq, 0,
6500                        sizeof(struct trace_iterator) -
6501                        offsetof(struct trace_iterator, seq));
6502                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6503                 iter.pos = -1;
6504
6505                 if (trace_find_next_entry_inc(&iter) != NULL) {
6506                         int ret;
6507
6508                         ret = print_trace_line(&iter);
6509                         if (ret != TRACE_TYPE_NO_CONSUME)
6510                                 trace_consume(&iter);
6511                 }
6512                 touch_nmi_watchdog();
6513
6514                 trace_printk_seq(&iter.seq);
6515         }
6516
6517         if (!cnt)
6518                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6519         else
6520                 printk(KERN_TRACE "---------------------------------\n");
6521
6522  out_enable:
6523         trace_flags |= old_userobj;
6524
6525         for_each_tracing_cpu(cpu) {
6526                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6527         }
6528         atomic_dec(&dump_running);
6529         local_irq_restore(flags);
6530 }
6531 EXPORT_SYMBOL_GPL(ftrace_dump);
6532
6533 __init static int tracer_alloc_buffers(void)
6534 {
6535         int ring_buf_size;
6536         int ret = -ENOMEM;
6537
6538
6539         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6540                 goto out;
6541
6542         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6543                 goto out_free_buffer_mask;
6544
6545         /* Only allocate trace_printk buffers if a trace_printk exists */
6546         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6547                 /* Must be called before global_trace.buffer is allocated */
6548                 trace_printk_init_buffers();
6549
6550         /* To save memory, keep the ring buffer size to its minimum */
6551         if (ring_buffer_expanded)
6552                 ring_buf_size = trace_buf_size;
6553         else
6554                 ring_buf_size = 1;
6555
6556         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6557         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6558
6559         raw_spin_lock_init(&global_trace.start_lock);
6560
6561         /* TODO: make the number of buffers hot pluggable with CPUS */
6562         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6563                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6564                 WARN_ON(1);
6565                 goto out_free_cpumask;
6566         }
6567
6568         if (global_trace.buffer_disabled)
6569                 tracing_off();
6570
6571         trace_init_cmdlines();
6572
6573         /*
6574          * register_tracer() might reference current_trace, so it
6575          * needs to be set before we register anything. This is
6576          * just a bootstrap of current_trace anyway.
6577          */
6578         global_trace.current_trace = &nop_trace;
6579
6580         register_tracer(&nop_trace);
6581
6582         /* All seems OK, enable tracing */
6583         tracing_disabled = 0;
6584
6585         atomic_notifier_chain_register(&panic_notifier_list,
6586                                        &trace_panic_notifier);
6587
6588         register_die_notifier(&trace_die_notifier);
6589
6590         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6591
6592         INIT_LIST_HEAD(&global_trace.systems);
6593         INIT_LIST_HEAD(&global_trace.events);
6594         list_add(&global_trace.list, &ftrace_trace_arrays);
6595
6596         while (trace_boot_options) {
6597                 char *option;
6598
6599                 option = strsep(&trace_boot_options, ",");
6600                 trace_set_options(&global_trace, option);
6601         }
6602
6603         register_snapshot_cmd();
6604
6605         return 0;
6606
6607 out_free_cpumask:
6608         free_percpu(global_trace.trace_buffer.data);
6609 #ifdef CONFIG_TRACER_MAX_TRACE
6610         free_percpu(global_trace.max_buffer.data);
6611 #endif
6612         free_cpumask_var(global_trace.tracing_cpumask);
6613 out_free_buffer_mask:
6614         free_cpumask_var(tracing_buffer_mask);
6615 out:
6616         return ret;
6617 }
6618
6619 __init static int clear_boot_tracer(void)
6620 {
6621         /*
6622          * The default tracer at boot buffer is an init section.
6623          * This function is called in lateinit. If we did not
6624          * find the boot tracer, then clear it out, to prevent
6625          * later registration from accessing the buffer that is
6626          * about to be freed.
6627          */
6628         if (!default_bootup_tracer)
6629                 return 0;
6630
6631         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6632                default_bootup_tracer);
6633         default_bootup_tracer = NULL;
6634
6635         return 0;
6636 }
6637
6638 early_initcall(tracer_alloc_buffers);
6639 fs_initcall(tracer_init_debugfs);
6640 late_initcall(clear_boot_tracer);