tracing: Remove mock up poll wait function
[firefly-linux-kernel-4.4.55.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         __disable_trace_on_warning = 1;
159         return 1;
160 }
161 __setup("traceoff_on_warning=", stop_trace_on_warning);
162
163 static int __init boot_alloc_snapshot(char *str)
164 {
165         allocate_snapshot = true;
166         /* We also need the main ring buffer expanded */
167         ring_buffer_expanded = true;
168         return 1;
169 }
170 __setup("alloc_snapshot", boot_alloc_snapshot);
171
172
173 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
174 static char *trace_boot_options __initdata;
175
176 static int __init set_trace_boot_options(char *str)
177 {
178         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
179         trace_boot_options = trace_boot_options_buf;
180         return 0;
181 }
182 __setup("trace_options=", set_trace_boot_options);
183
184 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
185 static char *trace_boot_clock __initdata;
186
187 static int __init set_trace_boot_clock(char *str)
188 {
189         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
190         trace_boot_clock = trace_boot_clock_buf;
191         return 0;
192 }
193 __setup("trace_clock=", set_trace_boot_clock);
194
195
196 unsigned long long ns2usecs(cycle_t nsec)
197 {
198         nsec += 500;
199         do_div(nsec, 1000);
200         return nsec;
201 }
202
203 /*
204  * The global_trace is the descriptor that holds the tracing
205  * buffers for the live tracing. For each CPU, it contains
206  * a link list of pages that will store trace entries. The
207  * page descriptor of the pages in the memory is used to hold
208  * the link list by linking the lru item in the page descriptor
209  * to each of the pages in the buffer per CPU.
210  *
211  * For each active CPU there is a data field that holds the
212  * pages for the buffer for that CPU. Each CPU has the same number
213  * of pages allocated for its buffer.
214  */
215 static struct trace_array       global_trace;
216
217 LIST_HEAD(ftrace_trace_arrays);
218
219 int trace_array_get(struct trace_array *this_tr)
220 {
221         struct trace_array *tr;
222         int ret = -ENODEV;
223
224         mutex_lock(&trace_types_lock);
225         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
226                 if (tr == this_tr) {
227                         tr->ref++;
228                         ret = 0;
229                         break;
230                 }
231         }
232         mutex_unlock(&trace_types_lock);
233
234         return ret;
235 }
236
237 static void __trace_array_put(struct trace_array *this_tr)
238 {
239         WARN_ON(!this_tr->ref);
240         this_tr->ref--;
241 }
242
243 void trace_array_put(struct trace_array *this_tr)
244 {
245         mutex_lock(&trace_types_lock);
246         __trace_array_put(this_tr);
247         mutex_unlock(&trace_types_lock);
248 }
249
250 int filter_check_discard(struct ftrace_event_file *file, void *rec,
251                          struct ring_buffer *buffer,
252                          struct ring_buffer_event *event)
253 {
254         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
255             !filter_match_preds(file->filter, rec)) {
256                 ring_buffer_discard_commit(buffer, event);
257                 return 1;
258         }
259
260         return 0;
261 }
262 EXPORT_SYMBOL_GPL(filter_check_discard);
263
264 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
265                               struct ring_buffer *buffer,
266                               struct ring_buffer_event *event)
267 {
268         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
269             !filter_match_preds(call->filter, rec)) {
270                 ring_buffer_discard_commit(buffer, event);
271                 return 1;
272         }
273
274         return 0;
275 }
276 EXPORT_SYMBOL_GPL(call_filter_check_discard);
277
278 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
279 {
280         u64 ts;
281
282         /* Early boot up does not have a buffer yet */
283         if (!buf->buffer)
284                 return trace_clock_local();
285
286         ts = ring_buffer_time_stamp(buf->buffer, cpu);
287         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
288
289         return ts;
290 }
291
292 cycle_t ftrace_now(int cpu)
293 {
294         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
295 }
296
297 /**
298  * tracing_is_enabled - Show if global_trace has been disabled
299  *
300  * Shows if the global trace has been enabled or not. It uses the
301  * mirror flag "buffer_disabled" to be used in fast paths such as for
302  * the irqsoff tracer. But it may be inaccurate due to races. If you
303  * need to know the accurate state, use tracing_is_on() which is a little
304  * slower, but accurate.
305  */
306 int tracing_is_enabled(void)
307 {
308         /*
309          * For quick access (irqsoff uses this in fast path), just
310          * return the mirror variable of the state of the ring buffer.
311          * It's a little racy, but we don't really care.
312          */
313         smp_rmb();
314         return !global_trace.buffer_disabled;
315 }
316
317 /*
318  * trace_buf_size is the size in bytes that is allocated
319  * for a buffer. Note, the number of bytes is always rounded
320  * to page size.
321  *
322  * This number is purposely set to a low number of 16384.
323  * If the dump on oops happens, it will be much appreciated
324  * to not have to wait for all that output. Anyway this can be
325  * boot time and run time configurable.
326  */
327 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
328
329 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
330
331 /* trace_types holds a link list of available tracers. */
332 static struct tracer            *trace_types __read_mostly;
333
334 /*
335  * trace_types_lock is used to protect the trace_types list.
336  */
337 DEFINE_MUTEX(trace_types_lock);
338
339 /*
340  * serialize the access of the ring buffer
341  *
342  * ring buffer serializes readers, but it is low level protection.
343  * The validity of the events (which returns by ring_buffer_peek() ..etc)
344  * are not protected by ring buffer.
345  *
346  * The content of events may become garbage if we allow other process consumes
347  * these events concurrently:
348  *   A) the page of the consumed events may become a normal page
349  *      (not reader page) in ring buffer, and this page will be rewrited
350  *      by events producer.
351  *   B) The page of the consumed events may become a page for splice_read,
352  *      and this page will be returned to system.
353  *
354  * These primitives allow multi process access to different cpu ring buffer
355  * concurrently.
356  *
357  * These primitives don't distinguish read-only and read-consume access.
358  * Multi read-only access are also serialized.
359  */
360
361 #ifdef CONFIG_SMP
362 static DECLARE_RWSEM(all_cpu_access_lock);
363 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
364
365 static inline void trace_access_lock(int cpu)
366 {
367         if (cpu == RING_BUFFER_ALL_CPUS) {
368                 /* gain it for accessing the whole ring buffer. */
369                 down_write(&all_cpu_access_lock);
370         } else {
371                 /* gain it for accessing a cpu ring buffer. */
372
373                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
374                 down_read(&all_cpu_access_lock);
375
376                 /* Secondly block other access to this @cpu ring buffer. */
377                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
378         }
379 }
380
381 static inline void trace_access_unlock(int cpu)
382 {
383         if (cpu == RING_BUFFER_ALL_CPUS) {
384                 up_write(&all_cpu_access_lock);
385         } else {
386                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
387                 up_read(&all_cpu_access_lock);
388         }
389 }
390
391 static inline void trace_access_lock_init(void)
392 {
393         int cpu;
394
395         for_each_possible_cpu(cpu)
396                 mutex_init(&per_cpu(cpu_access_lock, cpu));
397 }
398
399 #else
400
401 static DEFINE_MUTEX(access_lock);
402
403 static inline void trace_access_lock(int cpu)
404 {
405         (void)cpu;
406         mutex_lock(&access_lock);
407 }
408
409 static inline void trace_access_unlock(int cpu)
410 {
411         (void)cpu;
412         mutex_unlock(&access_lock);
413 }
414
415 static inline void trace_access_lock_init(void)
416 {
417 }
418
419 #endif
420
421 /* trace_flags holds trace_options default values */
422 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
423         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
424         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
425         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
426
427 static void tracer_tracing_on(struct trace_array *tr)
428 {
429         if (tr->trace_buffer.buffer)
430                 ring_buffer_record_on(tr->trace_buffer.buffer);
431         /*
432          * This flag is looked at when buffers haven't been allocated
433          * yet, or by some tracers (like irqsoff), that just want to
434          * know if the ring buffer has been disabled, but it can handle
435          * races of where it gets disabled but we still do a record.
436          * As the check is in the fast path of the tracers, it is more
437          * important to be fast than accurate.
438          */
439         tr->buffer_disabled = 0;
440         /* Make the flag seen by readers */
441         smp_wmb();
442 }
443
444 /**
445  * tracing_on - enable tracing buffers
446  *
447  * This function enables tracing buffers that may have been
448  * disabled with tracing_off.
449  */
450 void tracing_on(void)
451 {
452         tracer_tracing_on(&global_trace);
453 }
454 EXPORT_SYMBOL_GPL(tracing_on);
455
456 /**
457  * __trace_puts - write a constant string into the trace buffer.
458  * @ip:    The address of the caller
459  * @str:   The constant string to write
460  * @size:  The size of the string.
461  */
462 int __trace_puts(unsigned long ip, const char *str, int size)
463 {
464         struct ring_buffer_event *event;
465         struct ring_buffer *buffer;
466         struct print_entry *entry;
467         unsigned long irq_flags;
468         int alloc;
469
470         if (unlikely(tracing_selftest_running || tracing_disabled))
471                 return 0;
472
473         alloc = sizeof(*entry) + size + 2; /* possible \n added */
474
475         local_save_flags(irq_flags);
476         buffer = global_trace.trace_buffer.buffer;
477         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
478                                           irq_flags, preempt_count());
479         if (!event)
480                 return 0;
481
482         entry = ring_buffer_event_data(event);
483         entry->ip = ip;
484
485         memcpy(&entry->buf, str, size);
486
487         /* Add a newline if necessary */
488         if (entry->buf[size - 1] != '\n') {
489                 entry->buf[size] = '\n';
490                 entry->buf[size + 1] = '\0';
491         } else
492                 entry->buf[size] = '\0';
493
494         __buffer_unlock_commit(buffer, event);
495
496         return size;
497 }
498 EXPORT_SYMBOL_GPL(__trace_puts);
499
500 /**
501  * __trace_bputs - write the pointer to a constant string into trace buffer
502  * @ip:    The address of the caller
503  * @str:   The constant string to write to the buffer to
504  */
505 int __trace_bputs(unsigned long ip, const char *str)
506 {
507         struct ring_buffer_event *event;
508         struct ring_buffer *buffer;
509         struct bputs_entry *entry;
510         unsigned long irq_flags;
511         int size = sizeof(struct bputs_entry);
512
513         if (unlikely(tracing_selftest_running || tracing_disabled))
514                 return 0;
515
516         local_save_flags(irq_flags);
517         buffer = global_trace.trace_buffer.buffer;
518         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
519                                           irq_flags, preempt_count());
520         if (!event)
521                 return 0;
522
523         entry = ring_buffer_event_data(event);
524         entry->ip                       = ip;
525         entry->str                      = str;
526
527         __buffer_unlock_commit(buffer, event);
528
529         return 1;
530 }
531 EXPORT_SYMBOL_GPL(__trace_bputs);
532
533 #ifdef CONFIG_TRACER_SNAPSHOT
534 /**
535  * trace_snapshot - take a snapshot of the current buffer.
536  *
537  * This causes a swap between the snapshot buffer and the current live
538  * tracing buffer. You can use this to take snapshots of the live
539  * trace when some condition is triggered, but continue to trace.
540  *
541  * Note, make sure to allocate the snapshot with either
542  * a tracing_snapshot_alloc(), or by doing it manually
543  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
544  *
545  * If the snapshot buffer is not allocated, it will stop tracing.
546  * Basically making a permanent snapshot.
547  */
548 void tracing_snapshot(void)
549 {
550         struct trace_array *tr = &global_trace;
551         struct tracer *tracer = tr->current_trace;
552         unsigned long flags;
553
554         if (in_nmi()) {
555                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
556                 internal_trace_puts("*** snapshot is being ignored        ***\n");
557                 return;
558         }
559
560         if (!tr->allocated_snapshot) {
561                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
562                 internal_trace_puts("*** stopping trace here!   ***\n");
563                 tracing_off();
564                 return;
565         }
566
567         /* Note, snapshot can not be used when the tracer uses it */
568         if (tracer->use_max_tr) {
569                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
570                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
571                 return;
572         }
573
574         local_irq_save(flags);
575         update_max_tr(tr, current, smp_processor_id());
576         local_irq_restore(flags);
577 }
578 EXPORT_SYMBOL_GPL(tracing_snapshot);
579
580 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
581                                         struct trace_buffer *size_buf, int cpu_id);
582 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
583
584 static int alloc_snapshot(struct trace_array *tr)
585 {
586         int ret;
587
588         if (!tr->allocated_snapshot) {
589
590                 /* allocate spare buffer */
591                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
592                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
593                 if (ret < 0)
594                         return ret;
595
596                 tr->allocated_snapshot = true;
597         }
598
599         return 0;
600 }
601
602 static void free_snapshot(struct trace_array *tr)
603 {
604         /*
605          * We don't free the ring buffer. instead, resize it because
606          * The max_tr ring buffer has some state (e.g. ring->clock) and
607          * we want preserve it.
608          */
609         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
610         set_buffer_entries(&tr->max_buffer, 1);
611         tracing_reset_online_cpus(&tr->max_buffer);
612         tr->allocated_snapshot = false;
613 }
614
615 /**
616  * tracing_alloc_snapshot - allocate snapshot buffer.
617  *
618  * This only allocates the snapshot buffer if it isn't already
619  * allocated - it doesn't also take a snapshot.
620  *
621  * This is meant to be used in cases where the snapshot buffer needs
622  * to be set up for events that can't sleep but need to be able to
623  * trigger a snapshot.
624  */
625 int tracing_alloc_snapshot(void)
626 {
627         struct trace_array *tr = &global_trace;
628         int ret;
629
630         ret = alloc_snapshot(tr);
631         WARN_ON(ret < 0);
632
633         return ret;
634 }
635 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
636
637 /**
638  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
639  *
640  * This is similar to trace_snapshot(), but it will allocate the
641  * snapshot buffer if it isn't already allocated. Use this only
642  * where it is safe to sleep, as the allocation may sleep.
643  *
644  * This causes a swap between the snapshot buffer and the current live
645  * tracing buffer. You can use this to take snapshots of the live
646  * trace when some condition is triggered, but continue to trace.
647  */
648 void tracing_snapshot_alloc(void)
649 {
650         int ret;
651
652         ret = tracing_alloc_snapshot();
653         if (ret < 0)
654                 return;
655
656         tracing_snapshot();
657 }
658 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
659 #else
660 void tracing_snapshot(void)
661 {
662         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
663 }
664 EXPORT_SYMBOL_GPL(tracing_snapshot);
665 int tracing_alloc_snapshot(void)
666 {
667         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
668         return -ENODEV;
669 }
670 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
671 void tracing_snapshot_alloc(void)
672 {
673         /* Give warning */
674         tracing_snapshot();
675 }
676 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
677 #endif /* CONFIG_TRACER_SNAPSHOT */
678
679 static void tracer_tracing_off(struct trace_array *tr)
680 {
681         if (tr->trace_buffer.buffer)
682                 ring_buffer_record_off(tr->trace_buffer.buffer);
683         /*
684          * This flag is looked at when buffers haven't been allocated
685          * yet, or by some tracers (like irqsoff), that just want to
686          * know if the ring buffer has been disabled, but it can handle
687          * races of where it gets disabled but we still do a record.
688          * As the check is in the fast path of the tracers, it is more
689          * important to be fast than accurate.
690          */
691         tr->buffer_disabled = 1;
692         /* Make the flag seen by readers */
693         smp_wmb();
694 }
695
696 /**
697  * tracing_off - turn off tracing buffers
698  *
699  * This function stops the tracing buffers from recording data.
700  * It does not disable any overhead the tracers themselves may
701  * be causing. This function simply causes all recording to
702  * the ring buffers to fail.
703  */
704 void tracing_off(void)
705 {
706         tracer_tracing_off(&global_trace);
707 }
708 EXPORT_SYMBOL_GPL(tracing_off);
709
710 void disable_trace_on_warning(void)
711 {
712         if (__disable_trace_on_warning)
713                 tracing_off();
714 }
715
716 /**
717  * tracer_tracing_is_on - show real state of ring buffer enabled
718  * @tr : the trace array to know if ring buffer is enabled
719  *
720  * Shows real state of the ring buffer if it is enabled or not.
721  */
722 static int tracer_tracing_is_on(struct trace_array *tr)
723 {
724         if (tr->trace_buffer.buffer)
725                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
726         return !tr->buffer_disabled;
727 }
728
729 /**
730  * tracing_is_on - show state of ring buffers enabled
731  */
732 int tracing_is_on(void)
733 {
734         return tracer_tracing_is_on(&global_trace);
735 }
736 EXPORT_SYMBOL_GPL(tracing_is_on);
737
738 static int __init set_buf_size(char *str)
739 {
740         unsigned long buf_size;
741
742         if (!str)
743                 return 0;
744         buf_size = memparse(str, &str);
745         /* nr_entries can not be zero */
746         if (buf_size == 0)
747                 return 0;
748         trace_buf_size = buf_size;
749         return 1;
750 }
751 __setup("trace_buf_size=", set_buf_size);
752
753 static int __init set_tracing_thresh(char *str)
754 {
755         unsigned long threshold;
756         int ret;
757
758         if (!str)
759                 return 0;
760         ret = kstrtoul(str, 0, &threshold);
761         if (ret < 0)
762                 return 0;
763         tracing_thresh = threshold * 1000;
764         return 1;
765 }
766 __setup("tracing_thresh=", set_tracing_thresh);
767
768 unsigned long nsecs_to_usecs(unsigned long nsecs)
769 {
770         return nsecs / 1000;
771 }
772
773 /* These must match the bit postions in trace_iterator_flags */
774 static const char *trace_options[] = {
775         "print-parent",
776         "sym-offset",
777         "sym-addr",
778         "verbose",
779         "raw",
780         "hex",
781         "bin",
782         "block",
783         "stacktrace",
784         "trace_printk",
785         "ftrace_preempt",
786         "branch",
787         "annotate",
788         "userstacktrace",
789         "sym-userobj",
790         "printk-msg-only",
791         "context-info",
792         "latency-format",
793         "sleep-time",
794         "graph-time",
795         "record-cmd",
796         "overwrite",
797         "disable_on_free",
798         "irq-info",
799         "markers",
800         "function-trace",
801         NULL
802 };
803
804 static struct {
805         u64 (*func)(void);
806         const char *name;
807         int in_ns;              /* is this clock in nanoseconds? */
808 } trace_clocks[] = {
809         { trace_clock_local,    "local",        1 },
810         { trace_clock_global,   "global",       1 },
811         { trace_clock_counter,  "counter",      0 },
812         { trace_clock_jiffies,  "uptime",       1 },
813         { trace_clock,          "perf",         1 },
814         ARCH_TRACE_CLOCKS
815 };
816
817 /*
818  * trace_parser_get_init - gets the buffer for trace parser
819  */
820 int trace_parser_get_init(struct trace_parser *parser, int size)
821 {
822         memset(parser, 0, sizeof(*parser));
823
824         parser->buffer = kmalloc(size, GFP_KERNEL);
825         if (!parser->buffer)
826                 return 1;
827
828         parser->size = size;
829         return 0;
830 }
831
832 /*
833  * trace_parser_put - frees the buffer for trace parser
834  */
835 void trace_parser_put(struct trace_parser *parser)
836 {
837         kfree(parser->buffer);
838 }
839
840 /*
841  * trace_get_user - reads the user input string separated by  space
842  * (matched by isspace(ch))
843  *
844  * For each string found the 'struct trace_parser' is updated,
845  * and the function returns.
846  *
847  * Returns number of bytes read.
848  *
849  * See kernel/trace/trace.h for 'struct trace_parser' details.
850  */
851 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
852         size_t cnt, loff_t *ppos)
853 {
854         char ch;
855         size_t read = 0;
856         ssize_t ret;
857
858         if (!*ppos)
859                 trace_parser_clear(parser);
860
861         ret = get_user(ch, ubuf++);
862         if (ret)
863                 goto out;
864
865         read++;
866         cnt--;
867
868         /*
869          * The parser is not finished with the last write,
870          * continue reading the user input without skipping spaces.
871          */
872         if (!parser->cont) {
873                 /* skip white space */
874                 while (cnt && isspace(ch)) {
875                         ret = get_user(ch, ubuf++);
876                         if (ret)
877                                 goto out;
878                         read++;
879                         cnt--;
880                 }
881
882                 /* only spaces were written */
883                 if (isspace(ch)) {
884                         *ppos += read;
885                         ret = read;
886                         goto out;
887                 }
888
889                 parser->idx = 0;
890         }
891
892         /* read the non-space input */
893         while (cnt && !isspace(ch)) {
894                 if (parser->idx < parser->size - 1)
895                         parser->buffer[parser->idx++] = ch;
896                 else {
897                         ret = -EINVAL;
898                         goto out;
899                 }
900                 ret = get_user(ch, ubuf++);
901                 if (ret)
902                         goto out;
903                 read++;
904                 cnt--;
905         }
906
907         /* We either got finished input or we have to wait for another call. */
908         if (isspace(ch)) {
909                 parser->buffer[parser->idx] = 0;
910                 parser->cont = false;
911         } else if (parser->idx < parser->size - 1) {
912                 parser->cont = true;
913                 parser->buffer[parser->idx++] = ch;
914         } else {
915                 ret = -EINVAL;
916                 goto out;
917         }
918
919         *ppos += read;
920         ret = read;
921
922 out:
923         return ret;
924 }
925
926 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
927 {
928         int len;
929         int ret;
930
931         if (!cnt)
932                 return 0;
933
934         if (s->len <= s->readpos)
935                 return -EBUSY;
936
937         len = s->len - s->readpos;
938         if (cnt > len)
939                 cnt = len;
940         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
941         if (ret == cnt)
942                 return -EFAULT;
943
944         cnt -= ret;
945
946         s->readpos += cnt;
947         return cnt;
948 }
949
950 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
951 {
952         int len;
953
954         if (s->len <= s->readpos)
955                 return -EBUSY;
956
957         len = s->len - s->readpos;
958         if (cnt > len)
959                 cnt = len;
960         memcpy(buf, s->buffer + s->readpos, cnt);
961
962         s->readpos += cnt;
963         return cnt;
964 }
965
966 unsigned long __read_mostly     tracing_thresh;
967
968 #ifdef CONFIG_TRACER_MAX_TRACE
969 /*
970  * Copy the new maximum trace into the separate maximum-trace
971  * structure. (this way the maximum trace is permanently saved,
972  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
973  */
974 static void
975 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
976 {
977         struct trace_buffer *trace_buf = &tr->trace_buffer;
978         struct trace_buffer *max_buf = &tr->max_buffer;
979         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
980         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
981
982         max_buf->cpu = cpu;
983         max_buf->time_start = data->preempt_timestamp;
984
985         max_data->saved_latency = tr->max_latency;
986         max_data->critical_start = data->critical_start;
987         max_data->critical_end = data->critical_end;
988
989         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
990         max_data->pid = tsk->pid;
991         /*
992          * If tsk == current, then use current_uid(), as that does not use
993          * RCU. The irq tracer can be called out of RCU scope.
994          */
995         if (tsk == current)
996                 max_data->uid = current_uid();
997         else
998                 max_data->uid = task_uid(tsk);
999
1000         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1001         max_data->policy = tsk->policy;
1002         max_data->rt_priority = tsk->rt_priority;
1003
1004         /* record this tasks comm */
1005         tracing_record_cmdline(tsk);
1006 }
1007
1008 /**
1009  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1010  * @tr: tracer
1011  * @tsk: the task with the latency
1012  * @cpu: The cpu that initiated the trace.
1013  *
1014  * Flip the buffers between the @tr and the max_tr and record information
1015  * about which task was the cause of this latency.
1016  */
1017 void
1018 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1019 {
1020         struct ring_buffer *buf;
1021
1022         if (tr->stop_count)
1023                 return;
1024
1025         WARN_ON_ONCE(!irqs_disabled());
1026
1027         if (!tr->allocated_snapshot) {
1028                 /* Only the nop tracer should hit this when disabling */
1029                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1030                 return;
1031         }
1032
1033         arch_spin_lock(&tr->max_lock);
1034
1035         buf = tr->trace_buffer.buffer;
1036         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1037         tr->max_buffer.buffer = buf;
1038
1039         __update_max_tr(tr, tsk, cpu);
1040         arch_spin_unlock(&tr->max_lock);
1041 }
1042
1043 /**
1044  * update_max_tr_single - only copy one trace over, and reset the rest
1045  * @tr - tracer
1046  * @tsk - task with the latency
1047  * @cpu - the cpu of the buffer to copy.
1048  *
1049  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1050  */
1051 void
1052 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1053 {
1054         int ret;
1055
1056         if (tr->stop_count)
1057                 return;
1058
1059         WARN_ON_ONCE(!irqs_disabled());
1060         if (!tr->allocated_snapshot) {
1061                 /* Only the nop tracer should hit this when disabling */
1062                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1063                 return;
1064         }
1065
1066         arch_spin_lock(&tr->max_lock);
1067
1068         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1069
1070         if (ret == -EBUSY) {
1071                 /*
1072                  * We failed to swap the buffer due to a commit taking
1073                  * place on this CPU. We fail to record, but we reset
1074                  * the max trace buffer (no one writes directly to it)
1075                  * and flag that it failed.
1076                  */
1077                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1078                         "Failed to swap buffers due to commit in progress\n");
1079         }
1080
1081         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1082
1083         __update_max_tr(tr, tsk, cpu);
1084         arch_spin_unlock(&tr->max_lock);
1085 }
1086 #endif /* CONFIG_TRACER_MAX_TRACE */
1087
1088 static void wait_on_pipe(struct trace_iterator *iter)
1089 {
1090         /* Iterators are static, they should be filled or empty */
1091         if (trace_buffer_iter(iter, iter->cpu_file))
1092                 return;
1093
1094         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1095 }
1096
1097 #ifdef CONFIG_FTRACE_STARTUP_TEST
1098 static int run_tracer_selftest(struct tracer *type)
1099 {
1100         struct trace_array *tr = &global_trace;
1101         struct tracer *saved_tracer = tr->current_trace;
1102         int ret;
1103
1104         if (!type->selftest || tracing_selftest_disabled)
1105                 return 0;
1106
1107         /*
1108          * Run a selftest on this tracer.
1109          * Here we reset the trace buffer, and set the current
1110          * tracer to be this tracer. The tracer can then run some
1111          * internal tracing to verify that everything is in order.
1112          * If we fail, we do not register this tracer.
1113          */
1114         tracing_reset_online_cpus(&tr->trace_buffer);
1115
1116         tr->current_trace = type;
1117
1118 #ifdef CONFIG_TRACER_MAX_TRACE
1119         if (type->use_max_tr) {
1120                 /* If we expanded the buffers, make sure the max is expanded too */
1121                 if (ring_buffer_expanded)
1122                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1123                                            RING_BUFFER_ALL_CPUS);
1124                 tr->allocated_snapshot = true;
1125         }
1126 #endif
1127
1128         /* the test is responsible for initializing and enabling */
1129         pr_info("Testing tracer %s: ", type->name);
1130         ret = type->selftest(type, tr);
1131         /* the test is responsible for resetting too */
1132         tr->current_trace = saved_tracer;
1133         if (ret) {
1134                 printk(KERN_CONT "FAILED!\n");
1135                 /* Add the warning after printing 'FAILED' */
1136                 WARN_ON(1);
1137                 return -1;
1138         }
1139         /* Only reset on passing, to avoid touching corrupted buffers */
1140         tracing_reset_online_cpus(&tr->trace_buffer);
1141
1142 #ifdef CONFIG_TRACER_MAX_TRACE
1143         if (type->use_max_tr) {
1144                 tr->allocated_snapshot = false;
1145
1146                 /* Shrink the max buffer again */
1147                 if (ring_buffer_expanded)
1148                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1149                                            RING_BUFFER_ALL_CPUS);
1150         }
1151 #endif
1152
1153         printk(KERN_CONT "PASSED\n");
1154         return 0;
1155 }
1156 #else
1157 static inline int run_tracer_selftest(struct tracer *type)
1158 {
1159         return 0;
1160 }
1161 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1162
1163 /**
1164  * register_tracer - register a tracer with the ftrace system.
1165  * @type - the plugin for the tracer
1166  *
1167  * Register a new plugin tracer.
1168  */
1169 int register_tracer(struct tracer *type)
1170 {
1171         struct tracer *t;
1172         int ret = 0;
1173
1174         if (!type->name) {
1175                 pr_info("Tracer must have a name\n");
1176                 return -1;
1177         }
1178
1179         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1180                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1181                 return -1;
1182         }
1183
1184         mutex_lock(&trace_types_lock);
1185
1186         tracing_selftest_running = true;
1187
1188         for (t = trace_types; t; t = t->next) {
1189                 if (strcmp(type->name, t->name) == 0) {
1190                         /* already found */
1191                         pr_info("Tracer %s already registered\n",
1192                                 type->name);
1193                         ret = -1;
1194                         goto out;
1195                 }
1196         }
1197
1198         if (!type->set_flag)
1199                 type->set_flag = &dummy_set_flag;
1200         if (!type->flags)
1201                 type->flags = &dummy_tracer_flags;
1202         else
1203                 if (!type->flags->opts)
1204                         type->flags->opts = dummy_tracer_opt;
1205
1206         ret = run_tracer_selftest(type);
1207         if (ret < 0)
1208                 goto out;
1209
1210         type->next = trace_types;
1211         trace_types = type;
1212
1213  out:
1214         tracing_selftest_running = false;
1215         mutex_unlock(&trace_types_lock);
1216
1217         if (ret || !default_bootup_tracer)
1218                 goto out_unlock;
1219
1220         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1221                 goto out_unlock;
1222
1223         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1224         /* Do we want this tracer to start on bootup? */
1225         tracing_set_tracer(&global_trace, type->name);
1226         default_bootup_tracer = NULL;
1227         /* disable other selftests, since this will break it. */
1228         tracing_selftest_disabled = true;
1229 #ifdef CONFIG_FTRACE_STARTUP_TEST
1230         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1231                type->name);
1232 #endif
1233
1234  out_unlock:
1235         return ret;
1236 }
1237
1238 void tracing_reset(struct trace_buffer *buf, int cpu)
1239 {
1240         struct ring_buffer *buffer = buf->buffer;
1241
1242         if (!buffer)
1243                 return;
1244
1245         ring_buffer_record_disable(buffer);
1246
1247         /* Make sure all commits have finished */
1248         synchronize_sched();
1249         ring_buffer_reset_cpu(buffer, cpu);
1250
1251         ring_buffer_record_enable(buffer);
1252 }
1253
1254 void tracing_reset_online_cpus(struct trace_buffer *buf)
1255 {
1256         struct ring_buffer *buffer = buf->buffer;
1257         int cpu;
1258
1259         if (!buffer)
1260                 return;
1261
1262         ring_buffer_record_disable(buffer);
1263
1264         /* Make sure all commits have finished */
1265         synchronize_sched();
1266
1267         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1268
1269         for_each_online_cpu(cpu)
1270                 ring_buffer_reset_cpu(buffer, cpu);
1271
1272         ring_buffer_record_enable(buffer);
1273 }
1274
1275 /* Must have trace_types_lock held */
1276 void tracing_reset_all_online_cpus(void)
1277 {
1278         struct trace_array *tr;
1279
1280         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1281                 tracing_reset_online_cpus(&tr->trace_buffer);
1282 #ifdef CONFIG_TRACER_MAX_TRACE
1283                 tracing_reset_online_cpus(&tr->max_buffer);
1284 #endif
1285         }
1286 }
1287
1288 #define SAVED_CMDLINES 128
1289 #define NO_CMDLINE_MAP UINT_MAX
1290 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1291 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1292 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1293 static int cmdline_idx;
1294 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1295
1296 /* temporary disable recording */
1297 static atomic_t trace_record_cmdline_disabled __read_mostly;
1298
1299 static void trace_init_cmdlines(void)
1300 {
1301         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1302         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1303         cmdline_idx = 0;
1304 }
1305
1306 int is_tracing_stopped(void)
1307 {
1308         return global_trace.stop_count;
1309 }
1310
1311 /**
1312  * tracing_start - quick start of the tracer
1313  *
1314  * If tracing is enabled but was stopped by tracing_stop,
1315  * this will start the tracer back up.
1316  */
1317 void tracing_start(void)
1318 {
1319         struct ring_buffer *buffer;
1320         unsigned long flags;
1321
1322         if (tracing_disabled)
1323                 return;
1324
1325         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1326         if (--global_trace.stop_count) {
1327                 if (global_trace.stop_count < 0) {
1328                         /* Someone screwed up their debugging */
1329                         WARN_ON_ONCE(1);
1330                         global_trace.stop_count = 0;
1331                 }
1332                 goto out;
1333         }
1334
1335         /* Prevent the buffers from switching */
1336         arch_spin_lock(&global_trace.max_lock);
1337
1338         buffer = global_trace.trace_buffer.buffer;
1339         if (buffer)
1340                 ring_buffer_record_enable(buffer);
1341
1342 #ifdef CONFIG_TRACER_MAX_TRACE
1343         buffer = global_trace.max_buffer.buffer;
1344         if (buffer)
1345                 ring_buffer_record_enable(buffer);
1346 #endif
1347
1348         arch_spin_unlock(&global_trace.max_lock);
1349
1350         ftrace_start();
1351  out:
1352         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1353 }
1354
1355 static void tracing_start_tr(struct trace_array *tr)
1356 {
1357         struct ring_buffer *buffer;
1358         unsigned long flags;
1359
1360         if (tracing_disabled)
1361                 return;
1362
1363         /* If global, we need to also start the max tracer */
1364         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1365                 return tracing_start();
1366
1367         raw_spin_lock_irqsave(&tr->start_lock, flags);
1368
1369         if (--tr->stop_count) {
1370                 if (tr->stop_count < 0) {
1371                         /* Someone screwed up their debugging */
1372                         WARN_ON_ONCE(1);
1373                         tr->stop_count = 0;
1374                 }
1375                 goto out;
1376         }
1377
1378         buffer = tr->trace_buffer.buffer;
1379         if (buffer)
1380                 ring_buffer_record_enable(buffer);
1381
1382  out:
1383         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1384 }
1385
1386 /**
1387  * tracing_stop - quick stop of the tracer
1388  *
1389  * Light weight way to stop tracing. Use in conjunction with
1390  * tracing_start.
1391  */
1392 void tracing_stop(void)
1393 {
1394         struct ring_buffer *buffer;
1395         unsigned long flags;
1396
1397         ftrace_stop();
1398         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1399         if (global_trace.stop_count++)
1400                 goto out;
1401
1402         /* Prevent the buffers from switching */
1403         arch_spin_lock(&global_trace.max_lock);
1404
1405         buffer = global_trace.trace_buffer.buffer;
1406         if (buffer)
1407                 ring_buffer_record_disable(buffer);
1408
1409 #ifdef CONFIG_TRACER_MAX_TRACE
1410         buffer = global_trace.max_buffer.buffer;
1411         if (buffer)
1412                 ring_buffer_record_disable(buffer);
1413 #endif
1414
1415         arch_spin_unlock(&global_trace.max_lock);
1416
1417  out:
1418         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1419 }
1420
1421 static void tracing_stop_tr(struct trace_array *tr)
1422 {
1423         struct ring_buffer *buffer;
1424         unsigned long flags;
1425
1426         /* If global, we need to also stop the max tracer */
1427         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1428                 return tracing_stop();
1429
1430         raw_spin_lock_irqsave(&tr->start_lock, flags);
1431         if (tr->stop_count++)
1432                 goto out;
1433
1434         buffer = tr->trace_buffer.buffer;
1435         if (buffer)
1436                 ring_buffer_record_disable(buffer);
1437
1438  out:
1439         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1440 }
1441
1442 void trace_stop_cmdline_recording(void);
1443
1444 static void trace_save_cmdline(struct task_struct *tsk)
1445 {
1446         unsigned pid, idx;
1447
1448         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1449                 return;
1450
1451         /*
1452          * It's not the end of the world if we don't get
1453          * the lock, but we also don't want to spin
1454          * nor do we want to disable interrupts,
1455          * so if we miss here, then better luck next time.
1456          */
1457         if (!arch_spin_trylock(&trace_cmdline_lock))
1458                 return;
1459
1460         idx = map_pid_to_cmdline[tsk->pid];
1461         if (idx == NO_CMDLINE_MAP) {
1462                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1463
1464                 /*
1465                  * Check whether the cmdline buffer at idx has a pid
1466                  * mapped. We are going to overwrite that entry so we
1467                  * need to clear the map_pid_to_cmdline. Otherwise we
1468                  * would read the new comm for the old pid.
1469                  */
1470                 pid = map_cmdline_to_pid[idx];
1471                 if (pid != NO_CMDLINE_MAP)
1472                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1473
1474                 map_cmdline_to_pid[idx] = tsk->pid;
1475                 map_pid_to_cmdline[tsk->pid] = idx;
1476
1477                 cmdline_idx = idx;
1478         }
1479
1480         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1481
1482         arch_spin_unlock(&trace_cmdline_lock);
1483 }
1484
1485 void trace_find_cmdline(int pid, char comm[])
1486 {
1487         unsigned map;
1488
1489         if (!pid) {
1490                 strcpy(comm, "<idle>");
1491                 return;
1492         }
1493
1494         if (WARN_ON_ONCE(pid < 0)) {
1495                 strcpy(comm, "<XXX>");
1496                 return;
1497         }
1498
1499         if (pid > PID_MAX_DEFAULT) {
1500                 strcpy(comm, "<...>");
1501                 return;
1502         }
1503
1504         preempt_disable();
1505         arch_spin_lock(&trace_cmdline_lock);
1506         map = map_pid_to_cmdline[pid];
1507         if (map != NO_CMDLINE_MAP)
1508                 strcpy(comm, saved_cmdlines[map]);
1509         else
1510                 strcpy(comm, "<...>");
1511
1512         arch_spin_unlock(&trace_cmdline_lock);
1513         preempt_enable();
1514 }
1515
1516 void tracing_record_cmdline(struct task_struct *tsk)
1517 {
1518         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1519                 return;
1520
1521         if (!__this_cpu_read(trace_cmdline_save))
1522                 return;
1523
1524         __this_cpu_write(trace_cmdline_save, false);
1525
1526         trace_save_cmdline(tsk);
1527 }
1528
1529 void
1530 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1531                              int pc)
1532 {
1533         struct task_struct *tsk = current;
1534
1535         entry->preempt_count            = pc & 0xff;
1536         entry->pid                      = (tsk) ? tsk->pid : 0;
1537         entry->flags =
1538 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1539                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1540 #else
1541                 TRACE_FLAG_IRQS_NOSUPPORT |
1542 #endif
1543                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1544                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1545                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1546                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1547 }
1548 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1549
1550 struct ring_buffer_event *
1551 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1552                           int type,
1553                           unsigned long len,
1554                           unsigned long flags, int pc)
1555 {
1556         struct ring_buffer_event *event;
1557
1558         event = ring_buffer_lock_reserve(buffer, len);
1559         if (event != NULL) {
1560                 struct trace_entry *ent = ring_buffer_event_data(event);
1561
1562                 tracing_generic_entry_update(ent, flags, pc);
1563                 ent->type = type;
1564         }
1565
1566         return event;
1567 }
1568
1569 void
1570 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1571 {
1572         __this_cpu_write(trace_cmdline_save, true);
1573         ring_buffer_unlock_commit(buffer, event);
1574 }
1575
1576 static inline void
1577 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1578                              struct ring_buffer_event *event,
1579                              unsigned long flags, int pc)
1580 {
1581         __buffer_unlock_commit(buffer, event);
1582
1583         ftrace_trace_stack(buffer, flags, 6, pc);
1584         ftrace_trace_userstack(buffer, flags, pc);
1585 }
1586
1587 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1588                                 struct ring_buffer_event *event,
1589                                 unsigned long flags, int pc)
1590 {
1591         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1592 }
1593 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1594
1595 static struct ring_buffer *temp_buffer;
1596
1597 struct ring_buffer_event *
1598 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1599                           struct ftrace_event_file *ftrace_file,
1600                           int type, unsigned long len,
1601                           unsigned long flags, int pc)
1602 {
1603         struct ring_buffer_event *entry;
1604
1605         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1606         entry = trace_buffer_lock_reserve(*current_rb,
1607                                          type, len, flags, pc);
1608         /*
1609          * If tracing is off, but we have triggers enabled
1610          * we still need to look at the event data. Use the temp_buffer
1611          * to store the trace event for the tigger to use. It's recusive
1612          * safe and will not be recorded anywhere.
1613          */
1614         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1615                 *current_rb = temp_buffer;
1616                 entry = trace_buffer_lock_reserve(*current_rb,
1617                                                   type, len, flags, pc);
1618         }
1619         return entry;
1620 }
1621 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1622
1623 struct ring_buffer_event *
1624 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1625                                   int type, unsigned long len,
1626                                   unsigned long flags, int pc)
1627 {
1628         *current_rb = global_trace.trace_buffer.buffer;
1629         return trace_buffer_lock_reserve(*current_rb,
1630                                          type, len, flags, pc);
1631 }
1632 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1633
1634 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1635                                         struct ring_buffer_event *event,
1636                                         unsigned long flags, int pc)
1637 {
1638         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1639 }
1640 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1641
1642 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1643                                      struct ring_buffer_event *event,
1644                                      unsigned long flags, int pc,
1645                                      struct pt_regs *regs)
1646 {
1647         __buffer_unlock_commit(buffer, event);
1648
1649         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1650         ftrace_trace_userstack(buffer, flags, pc);
1651 }
1652 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1653
1654 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1655                                          struct ring_buffer_event *event)
1656 {
1657         ring_buffer_discard_commit(buffer, event);
1658 }
1659 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1660
1661 void
1662 trace_function(struct trace_array *tr,
1663                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1664                int pc)
1665 {
1666         struct ftrace_event_call *call = &event_function;
1667         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1668         struct ring_buffer_event *event;
1669         struct ftrace_entry *entry;
1670
1671         /* If we are reading the ring buffer, don't trace */
1672         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1673                 return;
1674
1675         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1676                                           flags, pc);
1677         if (!event)
1678                 return;
1679         entry   = ring_buffer_event_data(event);
1680         entry->ip                       = ip;
1681         entry->parent_ip                = parent_ip;
1682
1683         if (!call_filter_check_discard(call, entry, buffer, event))
1684                 __buffer_unlock_commit(buffer, event);
1685 }
1686
1687 #ifdef CONFIG_STACKTRACE
1688
1689 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1690 struct ftrace_stack {
1691         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1692 };
1693
1694 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1695 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1696
1697 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1698                                  unsigned long flags,
1699                                  int skip, int pc, struct pt_regs *regs)
1700 {
1701         struct ftrace_event_call *call = &event_kernel_stack;
1702         struct ring_buffer_event *event;
1703         struct stack_entry *entry;
1704         struct stack_trace trace;
1705         int use_stack;
1706         int size = FTRACE_STACK_ENTRIES;
1707
1708         trace.nr_entries        = 0;
1709         trace.skip              = skip;
1710
1711         /*
1712          * Since events can happen in NMIs there's no safe way to
1713          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1714          * or NMI comes in, it will just have to use the default
1715          * FTRACE_STACK_SIZE.
1716          */
1717         preempt_disable_notrace();
1718
1719         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1720         /*
1721          * We don't need any atomic variables, just a barrier.
1722          * If an interrupt comes in, we don't care, because it would
1723          * have exited and put the counter back to what we want.
1724          * We just need a barrier to keep gcc from moving things
1725          * around.
1726          */
1727         barrier();
1728         if (use_stack == 1) {
1729                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1730                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1731
1732                 if (regs)
1733                         save_stack_trace_regs(regs, &trace);
1734                 else
1735                         save_stack_trace(&trace);
1736
1737                 if (trace.nr_entries > size)
1738                         size = trace.nr_entries;
1739         } else
1740                 /* From now on, use_stack is a boolean */
1741                 use_stack = 0;
1742
1743         size *= sizeof(unsigned long);
1744
1745         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1746                                           sizeof(*entry) + size, flags, pc);
1747         if (!event)
1748                 goto out;
1749         entry = ring_buffer_event_data(event);
1750
1751         memset(&entry->caller, 0, size);
1752
1753         if (use_stack)
1754                 memcpy(&entry->caller, trace.entries,
1755                        trace.nr_entries * sizeof(unsigned long));
1756         else {
1757                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1758                 trace.entries           = entry->caller;
1759                 if (regs)
1760                         save_stack_trace_regs(regs, &trace);
1761                 else
1762                         save_stack_trace(&trace);
1763         }
1764
1765         entry->size = trace.nr_entries;
1766
1767         if (!call_filter_check_discard(call, entry, buffer, event))
1768                 __buffer_unlock_commit(buffer, event);
1769
1770  out:
1771         /* Again, don't let gcc optimize things here */
1772         barrier();
1773         __this_cpu_dec(ftrace_stack_reserve);
1774         preempt_enable_notrace();
1775
1776 }
1777
1778 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1779                              int skip, int pc, struct pt_regs *regs)
1780 {
1781         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1782                 return;
1783
1784         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1785 }
1786
1787 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1788                         int skip, int pc)
1789 {
1790         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1791                 return;
1792
1793         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1794 }
1795
1796 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1797                    int pc)
1798 {
1799         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1800 }
1801
1802 /**
1803  * trace_dump_stack - record a stack back trace in the trace buffer
1804  * @skip: Number of functions to skip (helper handlers)
1805  */
1806 void trace_dump_stack(int skip)
1807 {
1808         unsigned long flags;
1809
1810         if (tracing_disabled || tracing_selftest_running)
1811                 return;
1812
1813         local_save_flags(flags);
1814
1815         /*
1816          * Skip 3 more, seems to get us at the caller of
1817          * this function.
1818          */
1819         skip += 3;
1820         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1821                              flags, skip, preempt_count(), NULL);
1822 }
1823
1824 static DEFINE_PER_CPU(int, user_stack_count);
1825
1826 void
1827 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1828 {
1829         struct ftrace_event_call *call = &event_user_stack;
1830         struct ring_buffer_event *event;
1831         struct userstack_entry *entry;
1832         struct stack_trace trace;
1833
1834         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1835                 return;
1836
1837         /*
1838          * NMIs can not handle page faults, even with fix ups.
1839          * The save user stack can (and often does) fault.
1840          */
1841         if (unlikely(in_nmi()))
1842                 return;
1843
1844         /*
1845          * prevent recursion, since the user stack tracing may
1846          * trigger other kernel events.
1847          */
1848         preempt_disable();
1849         if (__this_cpu_read(user_stack_count))
1850                 goto out;
1851
1852         __this_cpu_inc(user_stack_count);
1853
1854         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1855                                           sizeof(*entry), flags, pc);
1856         if (!event)
1857                 goto out_drop_count;
1858         entry   = ring_buffer_event_data(event);
1859
1860         entry->tgid             = current->tgid;
1861         memset(&entry->caller, 0, sizeof(entry->caller));
1862
1863         trace.nr_entries        = 0;
1864         trace.max_entries       = FTRACE_STACK_ENTRIES;
1865         trace.skip              = 0;
1866         trace.entries           = entry->caller;
1867
1868         save_stack_trace_user(&trace);
1869         if (!call_filter_check_discard(call, entry, buffer, event))
1870                 __buffer_unlock_commit(buffer, event);
1871
1872  out_drop_count:
1873         __this_cpu_dec(user_stack_count);
1874  out:
1875         preempt_enable();
1876 }
1877
1878 #ifdef UNUSED
1879 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1880 {
1881         ftrace_trace_userstack(tr, flags, preempt_count());
1882 }
1883 #endif /* UNUSED */
1884
1885 #endif /* CONFIG_STACKTRACE */
1886
1887 /* created for use with alloc_percpu */
1888 struct trace_buffer_struct {
1889         char buffer[TRACE_BUF_SIZE];
1890 };
1891
1892 static struct trace_buffer_struct *trace_percpu_buffer;
1893 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1894 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1895 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1896
1897 /*
1898  * The buffer used is dependent on the context. There is a per cpu
1899  * buffer for normal context, softirq contex, hard irq context and
1900  * for NMI context. Thise allows for lockless recording.
1901  *
1902  * Note, if the buffers failed to be allocated, then this returns NULL
1903  */
1904 static char *get_trace_buf(void)
1905 {
1906         struct trace_buffer_struct *percpu_buffer;
1907
1908         /*
1909          * If we have allocated per cpu buffers, then we do not
1910          * need to do any locking.
1911          */
1912         if (in_nmi())
1913                 percpu_buffer = trace_percpu_nmi_buffer;
1914         else if (in_irq())
1915                 percpu_buffer = trace_percpu_irq_buffer;
1916         else if (in_softirq())
1917                 percpu_buffer = trace_percpu_sirq_buffer;
1918         else
1919                 percpu_buffer = trace_percpu_buffer;
1920
1921         if (!percpu_buffer)
1922                 return NULL;
1923
1924         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1925 }
1926
1927 static int alloc_percpu_trace_buffer(void)
1928 {
1929         struct trace_buffer_struct *buffers;
1930         struct trace_buffer_struct *sirq_buffers;
1931         struct trace_buffer_struct *irq_buffers;
1932         struct trace_buffer_struct *nmi_buffers;
1933
1934         buffers = alloc_percpu(struct trace_buffer_struct);
1935         if (!buffers)
1936                 goto err_warn;
1937
1938         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1939         if (!sirq_buffers)
1940                 goto err_sirq;
1941
1942         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1943         if (!irq_buffers)
1944                 goto err_irq;
1945
1946         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1947         if (!nmi_buffers)
1948                 goto err_nmi;
1949
1950         trace_percpu_buffer = buffers;
1951         trace_percpu_sirq_buffer = sirq_buffers;
1952         trace_percpu_irq_buffer = irq_buffers;
1953         trace_percpu_nmi_buffer = nmi_buffers;
1954
1955         return 0;
1956
1957  err_nmi:
1958         free_percpu(irq_buffers);
1959  err_irq:
1960         free_percpu(sirq_buffers);
1961  err_sirq:
1962         free_percpu(buffers);
1963  err_warn:
1964         WARN(1, "Could not allocate percpu trace_printk buffer");
1965         return -ENOMEM;
1966 }
1967
1968 static int buffers_allocated;
1969
1970 void trace_printk_init_buffers(void)
1971 {
1972         if (buffers_allocated)
1973                 return;
1974
1975         if (alloc_percpu_trace_buffer())
1976                 return;
1977
1978         pr_info("ftrace: Allocated trace_printk buffers\n");
1979
1980         /* Expand the buffers to set size */
1981         tracing_update_buffers();
1982
1983         buffers_allocated = 1;
1984
1985         /*
1986          * trace_printk_init_buffers() can be called by modules.
1987          * If that happens, then we need to start cmdline recording
1988          * directly here. If the global_trace.buffer is already
1989          * allocated here, then this was called by module code.
1990          */
1991         if (global_trace.trace_buffer.buffer)
1992                 tracing_start_cmdline_record();
1993 }
1994
1995 void trace_printk_start_comm(void)
1996 {
1997         /* Start tracing comms if trace printk is set */
1998         if (!buffers_allocated)
1999                 return;
2000         tracing_start_cmdline_record();
2001 }
2002
2003 static void trace_printk_start_stop_comm(int enabled)
2004 {
2005         if (!buffers_allocated)
2006                 return;
2007
2008         if (enabled)
2009                 tracing_start_cmdline_record();
2010         else
2011                 tracing_stop_cmdline_record();
2012 }
2013
2014 /**
2015  * trace_vbprintk - write binary msg to tracing buffer
2016  *
2017  */
2018 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2019 {
2020         struct ftrace_event_call *call = &event_bprint;
2021         struct ring_buffer_event *event;
2022         struct ring_buffer *buffer;
2023         struct trace_array *tr = &global_trace;
2024         struct bprint_entry *entry;
2025         unsigned long flags;
2026         char *tbuffer;
2027         int len = 0, size, pc;
2028
2029         if (unlikely(tracing_selftest_running || tracing_disabled))
2030                 return 0;
2031
2032         /* Don't pollute graph traces with trace_vprintk internals */
2033         pause_graph_tracing();
2034
2035         pc = preempt_count();
2036         preempt_disable_notrace();
2037
2038         tbuffer = get_trace_buf();
2039         if (!tbuffer) {
2040                 len = 0;
2041                 goto out;
2042         }
2043
2044         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2045
2046         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2047                 goto out;
2048
2049         local_save_flags(flags);
2050         size = sizeof(*entry) + sizeof(u32) * len;
2051         buffer = tr->trace_buffer.buffer;
2052         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2053                                           flags, pc);
2054         if (!event)
2055                 goto out;
2056         entry = ring_buffer_event_data(event);
2057         entry->ip                       = ip;
2058         entry->fmt                      = fmt;
2059
2060         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2061         if (!call_filter_check_discard(call, entry, buffer, event)) {
2062                 __buffer_unlock_commit(buffer, event);
2063                 ftrace_trace_stack(buffer, flags, 6, pc);
2064         }
2065
2066 out:
2067         preempt_enable_notrace();
2068         unpause_graph_tracing();
2069
2070         return len;
2071 }
2072 EXPORT_SYMBOL_GPL(trace_vbprintk);
2073
2074 static int
2075 __trace_array_vprintk(struct ring_buffer *buffer,
2076                       unsigned long ip, const char *fmt, va_list args)
2077 {
2078         struct ftrace_event_call *call = &event_print;
2079         struct ring_buffer_event *event;
2080         int len = 0, size, pc;
2081         struct print_entry *entry;
2082         unsigned long flags;
2083         char *tbuffer;
2084
2085         if (tracing_disabled || tracing_selftest_running)
2086                 return 0;
2087
2088         /* Don't pollute graph traces with trace_vprintk internals */
2089         pause_graph_tracing();
2090
2091         pc = preempt_count();
2092         preempt_disable_notrace();
2093
2094
2095         tbuffer = get_trace_buf();
2096         if (!tbuffer) {
2097                 len = 0;
2098                 goto out;
2099         }
2100
2101         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2102         if (len > TRACE_BUF_SIZE)
2103                 goto out;
2104
2105         local_save_flags(flags);
2106         size = sizeof(*entry) + len + 1;
2107         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2108                                           flags, pc);
2109         if (!event)
2110                 goto out;
2111         entry = ring_buffer_event_data(event);
2112         entry->ip = ip;
2113
2114         memcpy(&entry->buf, tbuffer, len);
2115         entry->buf[len] = '\0';
2116         if (!call_filter_check_discard(call, entry, buffer, event)) {
2117                 __buffer_unlock_commit(buffer, event);
2118                 ftrace_trace_stack(buffer, flags, 6, pc);
2119         }
2120  out:
2121         preempt_enable_notrace();
2122         unpause_graph_tracing();
2123
2124         return len;
2125 }
2126
2127 int trace_array_vprintk(struct trace_array *tr,
2128                         unsigned long ip, const char *fmt, va_list args)
2129 {
2130         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2131 }
2132
2133 int trace_array_printk(struct trace_array *tr,
2134                        unsigned long ip, const char *fmt, ...)
2135 {
2136         int ret;
2137         va_list ap;
2138
2139         if (!(trace_flags & TRACE_ITER_PRINTK))
2140                 return 0;
2141
2142         va_start(ap, fmt);
2143         ret = trace_array_vprintk(tr, ip, fmt, ap);
2144         va_end(ap);
2145         return ret;
2146 }
2147
2148 int trace_array_printk_buf(struct ring_buffer *buffer,
2149                            unsigned long ip, const char *fmt, ...)
2150 {
2151         int ret;
2152         va_list ap;
2153
2154         if (!(trace_flags & TRACE_ITER_PRINTK))
2155                 return 0;
2156
2157         va_start(ap, fmt);
2158         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2159         va_end(ap);
2160         return ret;
2161 }
2162
2163 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2164 {
2165         return trace_array_vprintk(&global_trace, ip, fmt, args);
2166 }
2167 EXPORT_SYMBOL_GPL(trace_vprintk);
2168
2169 static void trace_iterator_increment(struct trace_iterator *iter)
2170 {
2171         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2172
2173         iter->idx++;
2174         if (buf_iter)
2175                 ring_buffer_read(buf_iter, NULL);
2176 }
2177
2178 static struct trace_entry *
2179 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2180                 unsigned long *lost_events)
2181 {
2182         struct ring_buffer_event *event;
2183         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2184
2185         if (buf_iter)
2186                 event = ring_buffer_iter_peek(buf_iter, ts);
2187         else
2188                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2189                                          lost_events);
2190
2191         if (event) {
2192                 iter->ent_size = ring_buffer_event_length(event);
2193                 return ring_buffer_event_data(event);
2194         }
2195         iter->ent_size = 0;
2196         return NULL;
2197 }
2198
2199 static struct trace_entry *
2200 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2201                   unsigned long *missing_events, u64 *ent_ts)
2202 {
2203         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2204         struct trace_entry *ent, *next = NULL;
2205         unsigned long lost_events = 0, next_lost = 0;
2206         int cpu_file = iter->cpu_file;
2207         u64 next_ts = 0, ts;
2208         int next_cpu = -1;
2209         int next_size = 0;
2210         int cpu;
2211
2212         /*
2213          * If we are in a per_cpu trace file, don't bother by iterating over
2214          * all cpu and peek directly.
2215          */
2216         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2217                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2218                         return NULL;
2219                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2220                 if (ent_cpu)
2221                         *ent_cpu = cpu_file;
2222
2223                 return ent;
2224         }
2225
2226         for_each_tracing_cpu(cpu) {
2227
2228                 if (ring_buffer_empty_cpu(buffer, cpu))
2229                         continue;
2230
2231                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2232
2233                 /*
2234                  * Pick the entry with the smallest timestamp:
2235                  */
2236                 if (ent && (!next || ts < next_ts)) {
2237                         next = ent;
2238                         next_cpu = cpu;
2239                         next_ts = ts;
2240                         next_lost = lost_events;
2241                         next_size = iter->ent_size;
2242                 }
2243         }
2244
2245         iter->ent_size = next_size;
2246
2247         if (ent_cpu)
2248                 *ent_cpu = next_cpu;
2249
2250         if (ent_ts)
2251                 *ent_ts = next_ts;
2252
2253         if (missing_events)
2254                 *missing_events = next_lost;
2255
2256         return next;
2257 }
2258
2259 /* Find the next real entry, without updating the iterator itself */
2260 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2261                                           int *ent_cpu, u64 *ent_ts)
2262 {
2263         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2264 }
2265
2266 /* Find the next real entry, and increment the iterator to the next entry */
2267 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2268 {
2269         iter->ent = __find_next_entry(iter, &iter->cpu,
2270                                       &iter->lost_events, &iter->ts);
2271
2272         if (iter->ent)
2273                 trace_iterator_increment(iter);
2274
2275         return iter->ent ? iter : NULL;
2276 }
2277
2278 static void trace_consume(struct trace_iterator *iter)
2279 {
2280         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2281                             &iter->lost_events);
2282 }
2283
2284 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2285 {
2286         struct trace_iterator *iter = m->private;
2287         int i = (int)*pos;
2288         void *ent;
2289
2290         WARN_ON_ONCE(iter->leftover);
2291
2292         (*pos)++;
2293
2294         /* can't go backwards */
2295         if (iter->idx > i)
2296                 return NULL;
2297
2298         if (iter->idx < 0)
2299                 ent = trace_find_next_entry_inc(iter);
2300         else
2301                 ent = iter;
2302
2303         while (ent && iter->idx < i)
2304                 ent = trace_find_next_entry_inc(iter);
2305
2306         iter->pos = *pos;
2307
2308         return ent;
2309 }
2310
2311 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2312 {
2313         struct ring_buffer_event *event;
2314         struct ring_buffer_iter *buf_iter;
2315         unsigned long entries = 0;
2316         u64 ts;
2317
2318         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2319
2320         buf_iter = trace_buffer_iter(iter, cpu);
2321         if (!buf_iter)
2322                 return;
2323
2324         ring_buffer_iter_reset(buf_iter);
2325
2326         /*
2327          * We could have the case with the max latency tracers
2328          * that a reset never took place on a cpu. This is evident
2329          * by the timestamp being before the start of the buffer.
2330          */
2331         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2332                 if (ts >= iter->trace_buffer->time_start)
2333                         break;
2334                 entries++;
2335                 ring_buffer_read(buf_iter, NULL);
2336         }
2337
2338         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2339 }
2340
2341 /*
2342  * The current tracer is copied to avoid a global locking
2343  * all around.
2344  */
2345 static void *s_start(struct seq_file *m, loff_t *pos)
2346 {
2347         struct trace_iterator *iter = m->private;
2348         struct trace_array *tr = iter->tr;
2349         int cpu_file = iter->cpu_file;
2350         void *p = NULL;
2351         loff_t l = 0;
2352         int cpu;
2353
2354         /*
2355          * copy the tracer to avoid using a global lock all around.
2356          * iter->trace is a copy of current_trace, the pointer to the
2357          * name may be used instead of a strcmp(), as iter->trace->name
2358          * will point to the same string as current_trace->name.
2359          */
2360         mutex_lock(&trace_types_lock);
2361         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2362                 *iter->trace = *tr->current_trace;
2363         mutex_unlock(&trace_types_lock);
2364
2365 #ifdef CONFIG_TRACER_MAX_TRACE
2366         if (iter->snapshot && iter->trace->use_max_tr)
2367                 return ERR_PTR(-EBUSY);
2368 #endif
2369
2370         if (!iter->snapshot)
2371                 atomic_inc(&trace_record_cmdline_disabled);
2372
2373         if (*pos != iter->pos) {
2374                 iter->ent = NULL;
2375                 iter->cpu = 0;
2376                 iter->idx = -1;
2377
2378                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2379                         for_each_tracing_cpu(cpu)
2380                                 tracing_iter_reset(iter, cpu);
2381                 } else
2382                         tracing_iter_reset(iter, cpu_file);
2383
2384                 iter->leftover = 0;
2385                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2386                         ;
2387
2388         } else {
2389                 /*
2390                  * If we overflowed the seq_file before, then we want
2391                  * to just reuse the trace_seq buffer again.
2392                  */
2393                 if (iter->leftover)
2394                         p = iter;
2395                 else {
2396                         l = *pos - 1;
2397                         p = s_next(m, p, &l);
2398                 }
2399         }
2400
2401         trace_event_read_lock();
2402         trace_access_lock(cpu_file);
2403         return p;
2404 }
2405
2406 static void s_stop(struct seq_file *m, void *p)
2407 {
2408         struct trace_iterator *iter = m->private;
2409
2410 #ifdef CONFIG_TRACER_MAX_TRACE
2411         if (iter->snapshot && iter->trace->use_max_tr)
2412                 return;
2413 #endif
2414
2415         if (!iter->snapshot)
2416                 atomic_dec(&trace_record_cmdline_disabled);
2417
2418         trace_access_unlock(iter->cpu_file);
2419         trace_event_read_unlock();
2420 }
2421
2422 static void
2423 get_total_entries(struct trace_buffer *buf,
2424                   unsigned long *total, unsigned long *entries)
2425 {
2426         unsigned long count;
2427         int cpu;
2428
2429         *total = 0;
2430         *entries = 0;
2431
2432         for_each_tracing_cpu(cpu) {
2433                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2434                 /*
2435                  * If this buffer has skipped entries, then we hold all
2436                  * entries for the trace and we need to ignore the
2437                  * ones before the time stamp.
2438                  */
2439                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2440                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2441                         /* total is the same as the entries */
2442                         *total += count;
2443                 } else
2444                         *total += count +
2445                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2446                 *entries += count;
2447         }
2448 }
2449
2450 static void print_lat_help_header(struct seq_file *m)
2451 {
2452         seq_puts(m, "#                  _------=> CPU#            \n");
2453         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2454         seq_puts(m, "#                | / _----=> need-resched    \n");
2455         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2456         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2457         seq_puts(m, "#                |||| /     delay             \n");
2458         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2459         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2460 }
2461
2462 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2463 {
2464         unsigned long total;
2465         unsigned long entries;
2466
2467         get_total_entries(buf, &total, &entries);
2468         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2469                    entries, total, num_online_cpus());
2470         seq_puts(m, "#\n");
2471 }
2472
2473 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2474 {
2475         print_event_info(buf, m);
2476         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2477         seq_puts(m, "#              | |       |          |         |\n");
2478 }
2479
2480 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2481 {
2482         print_event_info(buf, m);
2483         seq_puts(m, "#                              _-----=> irqs-off\n");
2484         seq_puts(m, "#                             / _----=> need-resched\n");
2485         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2486         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2487         seq_puts(m, "#                            ||| /     delay\n");
2488         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2489         seq_puts(m, "#              | |       |   ||||       |         |\n");
2490 }
2491
2492 void
2493 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2494 {
2495         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2496         struct trace_buffer *buf = iter->trace_buffer;
2497         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2498         struct tracer *type = iter->trace;
2499         unsigned long entries;
2500         unsigned long total;
2501         const char *name = "preemption";
2502
2503         name = type->name;
2504
2505         get_total_entries(buf, &total, &entries);
2506
2507         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2508                    name, UTS_RELEASE);
2509         seq_puts(m, "# -----------------------------------"
2510                  "---------------------------------\n");
2511         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2512                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2513                    nsecs_to_usecs(data->saved_latency),
2514                    entries,
2515                    total,
2516                    buf->cpu,
2517 #if defined(CONFIG_PREEMPT_NONE)
2518                    "server",
2519 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2520                    "desktop",
2521 #elif defined(CONFIG_PREEMPT)
2522                    "preempt",
2523 #else
2524                    "unknown",
2525 #endif
2526                    /* These are reserved for later use */
2527                    0, 0, 0, 0);
2528 #ifdef CONFIG_SMP
2529         seq_printf(m, " #P:%d)\n", num_online_cpus());
2530 #else
2531         seq_puts(m, ")\n");
2532 #endif
2533         seq_puts(m, "#    -----------------\n");
2534         seq_printf(m, "#    | task: %.16s-%d "
2535                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2536                    data->comm, data->pid,
2537                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2538                    data->policy, data->rt_priority);
2539         seq_puts(m, "#    -----------------\n");
2540
2541         if (data->critical_start) {
2542                 seq_puts(m, "#  => started at: ");
2543                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2544                 trace_print_seq(m, &iter->seq);
2545                 seq_puts(m, "\n#  => ended at:   ");
2546                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2547                 trace_print_seq(m, &iter->seq);
2548                 seq_puts(m, "\n#\n");
2549         }
2550
2551         seq_puts(m, "#\n");
2552 }
2553
2554 static void test_cpu_buff_start(struct trace_iterator *iter)
2555 {
2556         struct trace_seq *s = &iter->seq;
2557
2558         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2559                 return;
2560
2561         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2562                 return;
2563
2564         if (cpumask_test_cpu(iter->cpu, iter->started))
2565                 return;
2566
2567         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2568                 return;
2569
2570         cpumask_set_cpu(iter->cpu, iter->started);
2571
2572         /* Don't print started cpu buffer for the first entry of the trace */
2573         if (iter->idx > 1)
2574                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2575                                 iter->cpu);
2576 }
2577
2578 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2579 {
2580         struct trace_seq *s = &iter->seq;
2581         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2582         struct trace_entry *entry;
2583         struct trace_event *event;
2584
2585         entry = iter->ent;
2586
2587         test_cpu_buff_start(iter);
2588
2589         event = ftrace_find_event(entry->type);
2590
2591         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2592                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2593                         if (!trace_print_lat_context(iter))
2594                                 goto partial;
2595                 } else {
2596                         if (!trace_print_context(iter))
2597                                 goto partial;
2598                 }
2599         }
2600
2601         if (event)
2602                 return event->funcs->trace(iter, sym_flags, event);
2603
2604         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2605                 goto partial;
2606
2607         return TRACE_TYPE_HANDLED;
2608 partial:
2609         return TRACE_TYPE_PARTIAL_LINE;
2610 }
2611
2612 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2613 {
2614         struct trace_seq *s = &iter->seq;
2615         struct trace_entry *entry;
2616         struct trace_event *event;
2617
2618         entry = iter->ent;
2619
2620         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2621                 if (!trace_seq_printf(s, "%d %d %llu ",
2622                                       entry->pid, iter->cpu, iter->ts))
2623                         goto partial;
2624         }
2625
2626         event = ftrace_find_event(entry->type);
2627         if (event)
2628                 return event->funcs->raw(iter, 0, event);
2629
2630         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2631                 goto partial;
2632
2633         return TRACE_TYPE_HANDLED;
2634 partial:
2635         return TRACE_TYPE_PARTIAL_LINE;
2636 }
2637
2638 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2639 {
2640         struct trace_seq *s = &iter->seq;
2641         unsigned char newline = '\n';
2642         struct trace_entry *entry;
2643         struct trace_event *event;
2644
2645         entry = iter->ent;
2646
2647         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2648                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2649                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2650                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2651         }
2652
2653         event = ftrace_find_event(entry->type);
2654         if (event) {
2655                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2656                 if (ret != TRACE_TYPE_HANDLED)
2657                         return ret;
2658         }
2659
2660         SEQ_PUT_FIELD_RET(s, newline);
2661
2662         return TRACE_TYPE_HANDLED;
2663 }
2664
2665 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2666 {
2667         struct trace_seq *s = &iter->seq;
2668         struct trace_entry *entry;
2669         struct trace_event *event;
2670
2671         entry = iter->ent;
2672
2673         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2674                 SEQ_PUT_FIELD_RET(s, entry->pid);
2675                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2676                 SEQ_PUT_FIELD_RET(s, iter->ts);
2677         }
2678
2679         event = ftrace_find_event(entry->type);
2680         return event ? event->funcs->binary(iter, 0, event) :
2681                 TRACE_TYPE_HANDLED;
2682 }
2683
2684 int trace_empty(struct trace_iterator *iter)
2685 {
2686         struct ring_buffer_iter *buf_iter;
2687         int cpu;
2688
2689         /* If we are looking at one CPU buffer, only check that one */
2690         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2691                 cpu = iter->cpu_file;
2692                 buf_iter = trace_buffer_iter(iter, cpu);
2693                 if (buf_iter) {
2694                         if (!ring_buffer_iter_empty(buf_iter))
2695                                 return 0;
2696                 } else {
2697                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2698                                 return 0;
2699                 }
2700                 return 1;
2701         }
2702
2703         for_each_tracing_cpu(cpu) {
2704                 buf_iter = trace_buffer_iter(iter, cpu);
2705                 if (buf_iter) {
2706                         if (!ring_buffer_iter_empty(buf_iter))
2707                                 return 0;
2708                 } else {
2709                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2710                                 return 0;
2711                 }
2712         }
2713
2714         return 1;
2715 }
2716
2717 /*  Called with trace_event_read_lock() held. */
2718 enum print_line_t print_trace_line(struct trace_iterator *iter)
2719 {
2720         enum print_line_t ret;
2721
2722         if (iter->lost_events &&
2723             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2724                                  iter->cpu, iter->lost_events))
2725                 return TRACE_TYPE_PARTIAL_LINE;
2726
2727         if (iter->trace && iter->trace->print_line) {
2728                 ret = iter->trace->print_line(iter);
2729                 if (ret != TRACE_TYPE_UNHANDLED)
2730                         return ret;
2731         }
2732
2733         if (iter->ent->type == TRACE_BPUTS &&
2734                         trace_flags & TRACE_ITER_PRINTK &&
2735                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2736                 return trace_print_bputs_msg_only(iter);
2737
2738         if (iter->ent->type == TRACE_BPRINT &&
2739                         trace_flags & TRACE_ITER_PRINTK &&
2740                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2741                 return trace_print_bprintk_msg_only(iter);
2742
2743         if (iter->ent->type == TRACE_PRINT &&
2744                         trace_flags & TRACE_ITER_PRINTK &&
2745                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2746                 return trace_print_printk_msg_only(iter);
2747
2748         if (trace_flags & TRACE_ITER_BIN)
2749                 return print_bin_fmt(iter);
2750
2751         if (trace_flags & TRACE_ITER_HEX)
2752                 return print_hex_fmt(iter);
2753
2754         if (trace_flags & TRACE_ITER_RAW)
2755                 return print_raw_fmt(iter);
2756
2757         return print_trace_fmt(iter);
2758 }
2759
2760 void trace_latency_header(struct seq_file *m)
2761 {
2762         struct trace_iterator *iter = m->private;
2763
2764         /* print nothing if the buffers are empty */
2765         if (trace_empty(iter))
2766                 return;
2767
2768         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2769                 print_trace_header(m, iter);
2770
2771         if (!(trace_flags & TRACE_ITER_VERBOSE))
2772                 print_lat_help_header(m);
2773 }
2774
2775 void trace_default_header(struct seq_file *m)
2776 {
2777         struct trace_iterator *iter = m->private;
2778
2779         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2780                 return;
2781
2782         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2783                 /* print nothing if the buffers are empty */
2784                 if (trace_empty(iter))
2785                         return;
2786                 print_trace_header(m, iter);
2787                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2788                         print_lat_help_header(m);
2789         } else {
2790                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2791                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2792                                 print_func_help_header_irq(iter->trace_buffer, m);
2793                         else
2794                                 print_func_help_header(iter->trace_buffer, m);
2795                 }
2796         }
2797 }
2798
2799 static void test_ftrace_alive(struct seq_file *m)
2800 {
2801         if (!ftrace_is_dead())
2802                 return;
2803         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2804         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2805 }
2806
2807 #ifdef CONFIG_TRACER_MAX_TRACE
2808 static void show_snapshot_main_help(struct seq_file *m)
2809 {
2810         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2811         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2812         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2813         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2814         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2815         seq_printf(m, "#                       is not a '0' or '1')\n");
2816 }
2817
2818 static void show_snapshot_percpu_help(struct seq_file *m)
2819 {
2820         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2821 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2822         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2823         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2824 #else
2825         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2826         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2827 #endif
2828         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2829         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2830         seq_printf(m, "#                       is not a '0' or '1')\n");
2831 }
2832
2833 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2834 {
2835         if (iter->tr->allocated_snapshot)
2836                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2837         else
2838                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2839
2840         seq_printf(m, "# Snapshot commands:\n");
2841         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2842                 show_snapshot_main_help(m);
2843         else
2844                 show_snapshot_percpu_help(m);
2845 }
2846 #else
2847 /* Should never be called */
2848 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2849 #endif
2850
2851 static int s_show(struct seq_file *m, void *v)
2852 {
2853         struct trace_iterator *iter = v;
2854         int ret;
2855
2856         if (iter->ent == NULL) {
2857                 if (iter->tr) {
2858                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2859                         seq_puts(m, "#\n");
2860                         test_ftrace_alive(m);
2861                 }
2862                 if (iter->snapshot && trace_empty(iter))
2863                         print_snapshot_help(m, iter);
2864                 else if (iter->trace && iter->trace->print_header)
2865                         iter->trace->print_header(m);
2866                 else
2867                         trace_default_header(m);
2868
2869         } else if (iter->leftover) {
2870                 /*
2871                  * If we filled the seq_file buffer earlier, we
2872                  * want to just show it now.
2873                  */
2874                 ret = trace_print_seq(m, &iter->seq);
2875
2876                 /* ret should this time be zero, but you never know */
2877                 iter->leftover = ret;
2878
2879         } else {
2880                 print_trace_line(iter);
2881                 ret = trace_print_seq(m, &iter->seq);
2882                 /*
2883                  * If we overflow the seq_file buffer, then it will
2884                  * ask us for this data again at start up.
2885                  * Use that instead.
2886                  *  ret is 0 if seq_file write succeeded.
2887                  *        -1 otherwise.
2888                  */
2889                 iter->leftover = ret;
2890         }
2891
2892         return 0;
2893 }
2894
2895 /*
2896  * Should be used after trace_array_get(), trace_types_lock
2897  * ensures that i_cdev was already initialized.
2898  */
2899 static inline int tracing_get_cpu(struct inode *inode)
2900 {
2901         if (inode->i_cdev) /* See trace_create_cpu_file() */
2902                 return (long)inode->i_cdev - 1;
2903         return RING_BUFFER_ALL_CPUS;
2904 }
2905
2906 static const struct seq_operations tracer_seq_ops = {
2907         .start          = s_start,
2908         .next           = s_next,
2909         .stop           = s_stop,
2910         .show           = s_show,
2911 };
2912
2913 static struct trace_iterator *
2914 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2915 {
2916         struct trace_array *tr = inode->i_private;
2917         struct trace_iterator *iter;
2918         int cpu;
2919
2920         if (tracing_disabled)
2921                 return ERR_PTR(-ENODEV);
2922
2923         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2924         if (!iter)
2925                 return ERR_PTR(-ENOMEM);
2926
2927         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2928                                     GFP_KERNEL);
2929         if (!iter->buffer_iter)
2930                 goto release;
2931
2932         /*
2933          * We make a copy of the current tracer to avoid concurrent
2934          * changes on it while we are reading.
2935          */
2936         mutex_lock(&trace_types_lock);
2937         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2938         if (!iter->trace)
2939                 goto fail;
2940
2941         *iter->trace = *tr->current_trace;
2942
2943         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2944                 goto fail;
2945
2946         iter->tr = tr;
2947
2948 #ifdef CONFIG_TRACER_MAX_TRACE
2949         /* Currently only the top directory has a snapshot */
2950         if (tr->current_trace->print_max || snapshot)
2951                 iter->trace_buffer = &tr->max_buffer;
2952         else
2953 #endif
2954                 iter->trace_buffer = &tr->trace_buffer;
2955         iter->snapshot = snapshot;
2956         iter->pos = -1;
2957         iter->cpu_file = tracing_get_cpu(inode);
2958         mutex_init(&iter->mutex);
2959
2960         /* Notify the tracer early; before we stop tracing. */
2961         if (iter->trace && iter->trace->open)
2962                 iter->trace->open(iter);
2963
2964         /* Annotate start of buffers if we had overruns */
2965         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2966                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2967
2968         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2969         if (trace_clocks[tr->clock_id].in_ns)
2970                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2971
2972         /* stop the trace while dumping if we are not opening "snapshot" */
2973         if (!iter->snapshot)
2974                 tracing_stop_tr(tr);
2975
2976         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2977                 for_each_tracing_cpu(cpu) {
2978                         iter->buffer_iter[cpu] =
2979                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2980                 }
2981                 ring_buffer_read_prepare_sync();
2982                 for_each_tracing_cpu(cpu) {
2983                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2984                         tracing_iter_reset(iter, cpu);
2985                 }
2986         } else {
2987                 cpu = iter->cpu_file;
2988                 iter->buffer_iter[cpu] =
2989                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2990                 ring_buffer_read_prepare_sync();
2991                 ring_buffer_read_start(iter->buffer_iter[cpu]);
2992                 tracing_iter_reset(iter, cpu);
2993         }
2994
2995         mutex_unlock(&trace_types_lock);
2996
2997         return iter;
2998
2999  fail:
3000         mutex_unlock(&trace_types_lock);
3001         kfree(iter->trace);
3002         kfree(iter->buffer_iter);
3003 release:
3004         seq_release_private(inode, file);
3005         return ERR_PTR(-ENOMEM);
3006 }
3007
3008 int tracing_open_generic(struct inode *inode, struct file *filp)
3009 {
3010         if (tracing_disabled)
3011                 return -ENODEV;
3012
3013         filp->private_data = inode->i_private;
3014         return 0;
3015 }
3016
3017 bool tracing_is_disabled(void)
3018 {
3019         return (tracing_disabled) ? true: false;
3020 }
3021
3022 /*
3023  * Open and update trace_array ref count.
3024  * Must have the current trace_array passed to it.
3025  */
3026 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3027 {
3028         struct trace_array *tr = inode->i_private;
3029
3030         if (tracing_disabled)
3031                 return -ENODEV;
3032
3033         if (trace_array_get(tr) < 0)
3034                 return -ENODEV;
3035
3036         filp->private_data = inode->i_private;
3037
3038         return 0;
3039 }
3040
3041 static int tracing_release(struct inode *inode, struct file *file)
3042 {
3043         struct trace_array *tr = inode->i_private;
3044         struct seq_file *m = file->private_data;
3045         struct trace_iterator *iter;
3046         int cpu;
3047
3048         if (!(file->f_mode & FMODE_READ)) {
3049                 trace_array_put(tr);
3050                 return 0;
3051         }
3052
3053         /* Writes do not use seq_file */
3054         iter = m->private;
3055         mutex_lock(&trace_types_lock);
3056
3057         for_each_tracing_cpu(cpu) {
3058                 if (iter->buffer_iter[cpu])
3059                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3060         }
3061
3062         if (iter->trace && iter->trace->close)
3063                 iter->trace->close(iter);
3064
3065         if (!iter->snapshot)
3066                 /* reenable tracing if it was previously enabled */
3067                 tracing_start_tr(tr);
3068
3069         __trace_array_put(tr);
3070
3071         mutex_unlock(&trace_types_lock);
3072
3073         mutex_destroy(&iter->mutex);
3074         free_cpumask_var(iter->started);
3075         kfree(iter->trace);
3076         kfree(iter->buffer_iter);
3077         seq_release_private(inode, file);
3078
3079         return 0;
3080 }
3081
3082 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3083 {
3084         struct trace_array *tr = inode->i_private;
3085
3086         trace_array_put(tr);
3087         return 0;
3088 }
3089
3090 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3091 {
3092         struct trace_array *tr = inode->i_private;
3093
3094         trace_array_put(tr);
3095
3096         return single_release(inode, file);
3097 }
3098
3099 static int tracing_open(struct inode *inode, struct file *file)
3100 {
3101         struct trace_array *tr = inode->i_private;
3102         struct trace_iterator *iter;
3103         int ret = 0;
3104
3105         if (trace_array_get(tr) < 0)
3106                 return -ENODEV;
3107
3108         /* If this file was open for write, then erase contents */
3109         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3110                 int cpu = tracing_get_cpu(inode);
3111
3112                 if (cpu == RING_BUFFER_ALL_CPUS)
3113                         tracing_reset_online_cpus(&tr->trace_buffer);
3114                 else
3115                         tracing_reset(&tr->trace_buffer, cpu);
3116         }
3117
3118         if (file->f_mode & FMODE_READ) {
3119                 iter = __tracing_open(inode, file, false);
3120                 if (IS_ERR(iter))
3121                         ret = PTR_ERR(iter);
3122                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3123                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3124         }
3125
3126         if (ret < 0)
3127                 trace_array_put(tr);
3128
3129         return ret;
3130 }
3131
3132 /*
3133  * Some tracers are not suitable for instance buffers.
3134  * A tracer is always available for the global array (toplevel)
3135  * or if it explicitly states that it is.
3136  */
3137 static bool
3138 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3139 {
3140         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3141 }
3142
3143 /* Find the next tracer that this trace array may use */
3144 static struct tracer *
3145 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3146 {
3147         while (t && !trace_ok_for_array(t, tr))
3148                 t = t->next;
3149
3150         return t;
3151 }
3152
3153 static void *
3154 t_next(struct seq_file *m, void *v, loff_t *pos)
3155 {
3156         struct trace_array *tr = m->private;
3157         struct tracer *t = v;
3158
3159         (*pos)++;
3160
3161         if (t)
3162                 t = get_tracer_for_array(tr, t->next);
3163
3164         return t;
3165 }
3166
3167 static void *t_start(struct seq_file *m, loff_t *pos)
3168 {
3169         struct trace_array *tr = m->private;
3170         struct tracer *t;
3171         loff_t l = 0;
3172
3173         mutex_lock(&trace_types_lock);
3174
3175         t = get_tracer_for_array(tr, trace_types);
3176         for (; t && l < *pos; t = t_next(m, t, &l))
3177                         ;
3178
3179         return t;
3180 }
3181
3182 static void t_stop(struct seq_file *m, void *p)
3183 {
3184         mutex_unlock(&trace_types_lock);
3185 }
3186
3187 static int t_show(struct seq_file *m, void *v)
3188 {
3189         struct tracer *t = v;
3190
3191         if (!t)
3192                 return 0;
3193
3194         seq_printf(m, "%s", t->name);
3195         if (t->next)
3196                 seq_putc(m, ' ');
3197         else
3198                 seq_putc(m, '\n');
3199
3200         return 0;
3201 }
3202
3203 static const struct seq_operations show_traces_seq_ops = {
3204         .start          = t_start,
3205         .next           = t_next,
3206         .stop           = t_stop,
3207         .show           = t_show,
3208 };
3209
3210 static int show_traces_open(struct inode *inode, struct file *file)
3211 {
3212         struct trace_array *tr = inode->i_private;
3213         struct seq_file *m;
3214         int ret;
3215
3216         if (tracing_disabled)
3217                 return -ENODEV;
3218
3219         ret = seq_open(file, &show_traces_seq_ops);
3220         if (ret)
3221                 return ret;
3222
3223         m = file->private_data;
3224         m->private = tr;
3225
3226         return 0;
3227 }
3228
3229 static ssize_t
3230 tracing_write_stub(struct file *filp, const char __user *ubuf,
3231                    size_t count, loff_t *ppos)
3232 {
3233         return count;
3234 }
3235
3236 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3237 {
3238         int ret;
3239
3240         if (file->f_mode & FMODE_READ)
3241                 ret = seq_lseek(file, offset, whence);
3242         else
3243                 file->f_pos = ret = 0;
3244
3245         return ret;
3246 }
3247
3248 static const struct file_operations tracing_fops = {
3249         .open           = tracing_open,
3250         .read           = seq_read,
3251         .write          = tracing_write_stub,
3252         .llseek         = tracing_lseek,
3253         .release        = tracing_release,
3254 };
3255
3256 static const struct file_operations show_traces_fops = {
3257         .open           = show_traces_open,
3258         .read           = seq_read,
3259         .release        = seq_release,
3260         .llseek         = seq_lseek,
3261 };
3262
3263 /*
3264  * The tracer itself will not take this lock, but still we want
3265  * to provide a consistent cpumask to user-space:
3266  */
3267 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3268
3269 /*
3270  * Temporary storage for the character representation of the
3271  * CPU bitmask (and one more byte for the newline):
3272  */
3273 static char mask_str[NR_CPUS + 1];
3274
3275 static ssize_t
3276 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3277                      size_t count, loff_t *ppos)
3278 {
3279         struct trace_array *tr = file_inode(filp)->i_private;
3280         int len;
3281
3282         mutex_lock(&tracing_cpumask_update_lock);
3283
3284         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3285         if (count - len < 2) {
3286                 count = -EINVAL;
3287                 goto out_err;
3288         }
3289         len += sprintf(mask_str + len, "\n");
3290         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3291
3292 out_err:
3293         mutex_unlock(&tracing_cpumask_update_lock);
3294
3295         return count;
3296 }
3297
3298 static ssize_t
3299 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3300                       size_t count, loff_t *ppos)
3301 {
3302         struct trace_array *tr = file_inode(filp)->i_private;
3303         cpumask_var_t tracing_cpumask_new;
3304         int err, cpu;
3305
3306         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3307                 return -ENOMEM;
3308
3309         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3310         if (err)
3311                 goto err_unlock;
3312
3313         mutex_lock(&tracing_cpumask_update_lock);
3314
3315         local_irq_disable();
3316         arch_spin_lock(&tr->max_lock);
3317         for_each_tracing_cpu(cpu) {
3318                 /*
3319                  * Increase/decrease the disabled counter if we are
3320                  * about to flip a bit in the cpumask:
3321                  */
3322                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3323                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3324                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3325                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3326                 }
3327                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3328                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3329                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3330                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3331                 }
3332         }
3333         arch_spin_unlock(&tr->max_lock);
3334         local_irq_enable();
3335
3336         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3337
3338         mutex_unlock(&tracing_cpumask_update_lock);
3339         free_cpumask_var(tracing_cpumask_new);
3340
3341         return count;
3342
3343 err_unlock:
3344         free_cpumask_var(tracing_cpumask_new);
3345
3346         return err;
3347 }
3348
3349 static const struct file_operations tracing_cpumask_fops = {
3350         .open           = tracing_open_generic_tr,
3351         .read           = tracing_cpumask_read,
3352         .write          = tracing_cpumask_write,
3353         .release        = tracing_release_generic_tr,
3354         .llseek         = generic_file_llseek,
3355 };
3356
3357 static int tracing_trace_options_show(struct seq_file *m, void *v)
3358 {
3359         struct tracer_opt *trace_opts;
3360         struct trace_array *tr = m->private;
3361         u32 tracer_flags;
3362         int i;
3363
3364         mutex_lock(&trace_types_lock);
3365         tracer_flags = tr->current_trace->flags->val;
3366         trace_opts = tr->current_trace->flags->opts;
3367
3368         for (i = 0; trace_options[i]; i++) {
3369                 if (trace_flags & (1 << i))
3370                         seq_printf(m, "%s\n", trace_options[i]);
3371                 else
3372                         seq_printf(m, "no%s\n", trace_options[i]);
3373         }
3374
3375         for (i = 0; trace_opts[i].name; i++) {
3376                 if (tracer_flags & trace_opts[i].bit)
3377                         seq_printf(m, "%s\n", trace_opts[i].name);
3378                 else
3379                         seq_printf(m, "no%s\n", trace_opts[i].name);
3380         }
3381         mutex_unlock(&trace_types_lock);
3382
3383         return 0;
3384 }
3385
3386 static int __set_tracer_option(struct trace_array *tr,
3387                                struct tracer_flags *tracer_flags,
3388                                struct tracer_opt *opts, int neg)
3389 {
3390         struct tracer *trace = tr->current_trace;
3391         int ret;
3392
3393         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3394         if (ret)
3395                 return ret;
3396
3397         if (neg)
3398                 tracer_flags->val &= ~opts->bit;
3399         else
3400                 tracer_flags->val |= opts->bit;
3401         return 0;
3402 }
3403
3404 /* Try to assign a tracer specific option */
3405 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3406 {
3407         struct tracer *trace = tr->current_trace;
3408         struct tracer_flags *tracer_flags = trace->flags;
3409         struct tracer_opt *opts = NULL;
3410         int i;
3411
3412         for (i = 0; tracer_flags->opts[i].name; i++) {
3413                 opts = &tracer_flags->opts[i];
3414
3415                 if (strcmp(cmp, opts->name) == 0)
3416                         return __set_tracer_option(tr, trace->flags, opts, neg);
3417         }
3418
3419         return -EINVAL;
3420 }
3421
3422 /* Some tracers require overwrite to stay enabled */
3423 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3424 {
3425         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3426                 return -1;
3427
3428         return 0;
3429 }
3430
3431 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3432 {
3433         /* do nothing if flag is already set */
3434         if (!!(trace_flags & mask) == !!enabled)
3435                 return 0;
3436
3437         /* Give the tracer a chance to approve the change */
3438         if (tr->current_trace->flag_changed)
3439                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3440                         return -EINVAL;
3441
3442         if (enabled)
3443                 trace_flags |= mask;
3444         else
3445                 trace_flags &= ~mask;
3446
3447         if (mask == TRACE_ITER_RECORD_CMD)
3448                 trace_event_enable_cmd_record(enabled);
3449
3450         if (mask == TRACE_ITER_OVERWRITE) {
3451                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3452 #ifdef CONFIG_TRACER_MAX_TRACE
3453                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3454 #endif
3455         }
3456
3457         if (mask == TRACE_ITER_PRINTK)
3458                 trace_printk_start_stop_comm(enabled);
3459
3460         return 0;
3461 }
3462
3463 static int trace_set_options(struct trace_array *tr, char *option)
3464 {
3465         char *cmp;
3466         int neg = 0;
3467         int ret = -ENODEV;
3468         int i;
3469
3470         cmp = strstrip(option);
3471
3472         if (strncmp(cmp, "no", 2) == 0) {
3473                 neg = 1;
3474                 cmp += 2;
3475         }
3476
3477         mutex_lock(&trace_types_lock);
3478
3479         for (i = 0; trace_options[i]; i++) {
3480                 if (strcmp(cmp, trace_options[i]) == 0) {
3481                         ret = set_tracer_flag(tr, 1 << i, !neg);
3482                         break;
3483                 }
3484         }
3485
3486         /* If no option could be set, test the specific tracer options */
3487         if (!trace_options[i])
3488                 ret = set_tracer_option(tr, cmp, neg);
3489
3490         mutex_unlock(&trace_types_lock);
3491
3492         return ret;
3493 }
3494
3495 static ssize_t
3496 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3497                         size_t cnt, loff_t *ppos)
3498 {
3499         struct seq_file *m = filp->private_data;
3500         struct trace_array *tr = m->private;
3501         char buf[64];
3502         int ret;
3503
3504         if (cnt >= sizeof(buf))
3505                 return -EINVAL;
3506
3507         if (copy_from_user(&buf, ubuf, cnt))
3508                 return -EFAULT;
3509
3510         buf[cnt] = 0;
3511
3512         ret = trace_set_options(tr, buf);
3513         if (ret < 0)
3514                 return ret;
3515
3516         *ppos += cnt;
3517
3518         return cnt;
3519 }
3520
3521 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3522 {
3523         struct trace_array *tr = inode->i_private;
3524         int ret;
3525
3526         if (tracing_disabled)
3527                 return -ENODEV;
3528
3529         if (trace_array_get(tr) < 0)
3530                 return -ENODEV;
3531
3532         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3533         if (ret < 0)
3534                 trace_array_put(tr);
3535
3536         return ret;
3537 }
3538
3539 static const struct file_operations tracing_iter_fops = {
3540         .open           = tracing_trace_options_open,
3541         .read           = seq_read,
3542         .llseek         = seq_lseek,
3543         .release        = tracing_single_release_tr,
3544         .write          = tracing_trace_options_write,
3545 };
3546
3547 static const char readme_msg[] =
3548         "tracing mini-HOWTO:\n\n"
3549         "# echo 0 > tracing_on : quick way to disable tracing\n"
3550         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3551         " Important files:\n"
3552         "  trace\t\t\t- The static contents of the buffer\n"
3553         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3554         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3555         "  current_tracer\t- function and latency tracers\n"
3556         "  available_tracers\t- list of configured tracers for current_tracer\n"
3557         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3558         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3559         "  trace_clock\t\t-change the clock used to order events\n"
3560         "       local:   Per cpu clock but may not be synced across CPUs\n"
3561         "      global:   Synced across CPUs but slows tracing down.\n"
3562         "     counter:   Not a clock, but just an increment\n"
3563         "      uptime:   Jiffy counter from time of boot\n"
3564         "        perf:   Same clock that perf events use\n"
3565 #ifdef CONFIG_X86_64
3566         "     x86-tsc:   TSC cycle counter\n"
3567 #endif
3568         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3569         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3570         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3571         "\t\t\t  Remove sub-buffer with rmdir\n"
3572         "  trace_options\t\t- Set format or modify how tracing happens\n"
3573         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3574         "\t\t\t  option name\n"
3575 #ifdef CONFIG_DYNAMIC_FTRACE
3576         "\n  available_filter_functions - list of functions that can be filtered on\n"
3577         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3578         "\t\t\t  functions\n"
3579         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3580         "\t     modules: Can select a group via module\n"
3581         "\t      Format: :mod:<module-name>\n"
3582         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3583         "\t    triggers: a command to perform when function is hit\n"
3584         "\t      Format: <function>:<trigger>[:count]\n"
3585         "\t     trigger: traceon, traceoff\n"
3586         "\t\t      enable_event:<system>:<event>\n"
3587         "\t\t      disable_event:<system>:<event>\n"
3588 #ifdef CONFIG_STACKTRACE
3589         "\t\t      stacktrace\n"
3590 #endif
3591 #ifdef CONFIG_TRACER_SNAPSHOT
3592         "\t\t      snapshot\n"
3593 #endif
3594         "\t\t      dump\n"
3595         "\t\t      cpudump\n"
3596         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3597         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3598         "\t     The first one will disable tracing every time do_fault is hit\n"
3599         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3600         "\t       The first time do trap is hit and it disables tracing, the\n"
3601         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3602         "\t       the counter will not decrement. It only decrements when the\n"
3603         "\t       trigger did work\n"
3604         "\t     To remove trigger without count:\n"
3605         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3606         "\t     To remove trigger with a count:\n"
3607         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3608         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3609         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3610         "\t    modules: Can select a group via module command :mod:\n"
3611         "\t    Does not accept triggers\n"
3612 #endif /* CONFIG_DYNAMIC_FTRACE */
3613 #ifdef CONFIG_FUNCTION_TRACER
3614         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3615         "\t\t    (function)\n"
3616 #endif
3617 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3618         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3619         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3620 #endif
3621 #ifdef CONFIG_TRACER_SNAPSHOT
3622         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3623         "\t\t\t  snapshot buffer. Read the contents for more\n"
3624         "\t\t\t  information\n"
3625 #endif
3626 #ifdef CONFIG_STACK_TRACER
3627         "  stack_trace\t\t- Shows the max stack trace when active\n"
3628         "  stack_max_size\t- Shows current max stack size that was traced\n"
3629         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3630         "\t\t\t  new trace)\n"
3631 #ifdef CONFIG_DYNAMIC_FTRACE
3632         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3633         "\t\t\t  traces\n"
3634 #endif
3635 #endif /* CONFIG_STACK_TRACER */
3636         "  events/\t\t- Directory containing all trace event subsystems:\n"
3637         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3638         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3639         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3640         "\t\t\t  events\n"
3641         "      filter\t\t- If set, only events passing filter are traced\n"
3642         "  events/<system>/<event>/\t- Directory containing control files for\n"
3643         "\t\t\t  <event>:\n"
3644         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3645         "      filter\t\t- If set, only events passing filter are traced\n"
3646         "      trigger\t\t- If set, a command to perform when event is hit\n"
3647         "\t    Format: <trigger>[:count][if <filter>]\n"
3648         "\t   trigger: traceon, traceoff\n"
3649         "\t            enable_event:<system>:<event>\n"
3650         "\t            disable_event:<system>:<event>\n"
3651 #ifdef CONFIG_STACKTRACE
3652         "\t\t    stacktrace\n"
3653 #endif
3654 #ifdef CONFIG_TRACER_SNAPSHOT
3655         "\t\t    snapshot\n"
3656 #endif
3657         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3658         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3659         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3660         "\t                  events/block/block_unplug/trigger\n"
3661         "\t   The first disables tracing every time block_unplug is hit.\n"
3662         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3663         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3664         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3665         "\t   Like function triggers, the counter is only decremented if it\n"
3666         "\t    enabled or disabled tracing.\n"
3667         "\t   To remove a trigger without a count:\n"
3668         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3669         "\t   To remove a trigger with a count:\n"
3670         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3671         "\t   Filters can be ignored when removing a trigger.\n"
3672 ;
3673
3674 static ssize_t
3675 tracing_readme_read(struct file *filp, char __user *ubuf,
3676                        size_t cnt, loff_t *ppos)
3677 {
3678         return simple_read_from_buffer(ubuf, cnt, ppos,
3679                                         readme_msg, strlen(readme_msg));
3680 }
3681
3682 static const struct file_operations tracing_readme_fops = {
3683         .open           = tracing_open_generic,
3684         .read           = tracing_readme_read,
3685         .llseek         = generic_file_llseek,
3686 };
3687
3688 static ssize_t
3689 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3690                                 size_t cnt, loff_t *ppos)
3691 {
3692         char *buf_comm;
3693         char *file_buf;
3694         char *buf;
3695         int len = 0;
3696         int pid;
3697         int i;
3698
3699         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3700         if (!file_buf)
3701                 return -ENOMEM;
3702
3703         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3704         if (!buf_comm) {
3705                 kfree(file_buf);
3706                 return -ENOMEM;
3707         }
3708
3709         buf = file_buf;
3710
3711         for (i = 0; i < SAVED_CMDLINES; i++) {
3712                 int r;
3713
3714                 pid = map_cmdline_to_pid[i];
3715                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3716                         continue;
3717
3718                 trace_find_cmdline(pid, buf_comm);
3719                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3720                 buf += r;
3721                 len += r;
3722         }
3723
3724         len = simple_read_from_buffer(ubuf, cnt, ppos,
3725                                       file_buf, len);
3726
3727         kfree(file_buf);
3728         kfree(buf_comm);
3729
3730         return len;
3731 }
3732
3733 static const struct file_operations tracing_saved_cmdlines_fops = {
3734     .open       = tracing_open_generic,
3735     .read       = tracing_saved_cmdlines_read,
3736     .llseek     = generic_file_llseek,
3737 };
3738
3739 static ssize_t
3740 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3741                        size_t cnt, loff_t *ppos)
3742 {
3743         struct trace_array *tr = filp->private_data;
3744         char buf[MAX_TRACER_SIZE+2];
3745         int r;
3746
3747         mutex_lock(&trace_types_lock);
3748         r = sprintf(buf, "%s\n", tr->current_trace->name);
3749         mutex_unlock(&trace_types_lock);
3750
3751         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3752 }
3753
3754 int tracer_init(struct tracer *t, struct trace_array *tr)
3755 {
3756         tracing_reset_online_cpus(&tr->trace_buffer);
3757         return t->init(tr);
3758 }
3759
3760 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3761 {
3762         int cpu;
3763
3764         for_each_tracing_cpu(cpu)
3765                 per_cpu_ptr(buf->data, cpu)->entries = val;
3766 }
3767
3768 #ifdef CONFIG_TRACER_MAX_TRACE
3769 /* resize @tr's buffer to the size of @size_tr's entries */
3770 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3771                                         struct trace_buffer *size_buf, int cpu_id)
3772 {
3773         int cpu, ret = 0;
3774
3775         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3776                 for_each_tracing_cpu(cpu) {
3777                         ret = ring_buffer_resize(trace_buf->buffer,
3778                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3779                         if (ret < 0)
3780                                 break;
3781                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3782                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3783                 }
3784         } else {
3785                 ret = ring_buffer_resize(trace_buf->buffer,
3786                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3787                 if (ret == 0)
3788                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3789                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3790         }
3791
3792         return ret;
3793 }
3794 #endif /* CONFIG_TRACER_MAX_TRACE */
3795
3796 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3797                                         unsigned long size, int cpu)
3798 {
3799         int ret;
3800
3801         /*
3802          * If kernel or user changes the size of the ring buffer
3803          * we use the size that was given, and we can forget about
3804          * expanding it later.
3805          */
3806         ring_buffer_expanded = true;
3807
3808         /* May be called before buffers are initialized */
3809         if (!tr->trace_buffer.buffer)
3810                 return 0;
3811
3812         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3813         if (ret < 0)
3814                 return ret;
3815
3816 #ifdef CONFIG_TRACER_MAX_TRACE
3817         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3818             !tr->current_trace->use_max_tr)
3819                 goto out;
3820
3821         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3822         if (ret < 0) {
3823                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3824                                                      &tr->trace_buffer, cpu);
3825                 if (r < 0) {
3826                         /*
3827                          * AARGH! We are left with different
3828                          * size max buffer!!!!
3829                          * The max buffer is our "snapshot" buffer.
3830                          * When a tracer needs a snapshot (one of the
3831                          * latency tracers), it swaps the max buffer
3832                          * with the saved snap shot. We succeeded to
3833                          * update the size of the main buffer, but failed to
3834                          * update the size of the max buffer. But when we tried
3835                          * to reset the main buffer to the original size, we
3836                          * failed there too. This is very unlikely to
3837                          * happen, but if it does, warn and kill all
3838                          * tracing.
3839                          */
3840                         WARN_ON(1);
3841                         tracing_disabled = 1;
3842                 }
3843                 return ret;
3844         }
3845
3846         if (cpu == RING_BUFFER_ALL_CPUS)
3847                 set_buffer_entries(&tr->max_buffer, size);
3848         else
3849                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3850
3851  out:
3852 #endif /* CONFIG_TRACER_MAX_TRACE */
3853
3854         if (cpu == RING_BUFFER_ALL_CPUS)
3855                 set_buffer_entries(&tr->trace_buffer, size);
3856         else
3857                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3858
3859         return ret;
3860 }
3861
3862 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3863                                           unsigned long size, int cpu_id)
3864 {
3865         int ret = size;
3866
3867         mutex_lock(&trace_types_lock);
3868
3869         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3870                 /* make sure, this cpu is enabled in the mask */
3871                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3872                         ret = -EINVAL;
3873                         goto out;
3874                 }
3875         }
3876
3877         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3878         if (ret < 0)
3879                 ret = -ENOMEM;
3880
3881 out:
3882         mutex_unlock(&trace_types_lock);
3883
3884         return ret;
3885 }
3886
3887
3888 /**
3889  * tracing_update_buffers - used by tracing facility to expand ring buffers
3890  *
3891  * To save on memory when the tracing is never used on a system with it
3892  * configured in. The ring buffers are set to a minimum size. But once
3893  * a user starts to use the tracing facility, then they need to grow
3894  * to their default size.
3895  *
3896  * This function is to be called when a tracer is about to be used.
3897  */
3898 int tracing_update_buffers(void)
3899 {
3900         int ret = 0;
3901
3902         mutex_lock(&trace_types_lock);
3903         if (!ring_buffer_expanded)
3904                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3905                                                 RING_BUFFER_ALL_CPUS);
3906         mutex_unlock(&trace_types_lock);
3907
3908         return ret;
3909 }
3910
3911 struct trace_option_dentry;
3912
3913 static struct trace_option_dentry *
3914 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3915
3916 static void
3917 destroy_trace_option_files(struct trace_option_dentry *topts);
3918
3919 /*
3920  * Used to clear out the tracer before deletion of an instance.
3921  * Must have trace_types_lock held.
3922  */
3923 static void tracing_set_nop(struct trace_array *tr)
3924 {
3925         if (tr->current_trace == &nop_trace)
3926                 return;
3927         
3928         tr->current_trace->enabled--;
3929
3930         if (tr->current_trace->reset)
3931                 tr->current_trace->reset(tr);
3932
3933         tr->current_trace = &nop_trace;
3934 }
3935
3936 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
3937 {
3938         static struct trace_option_dentry *topts;
3939         struct tracer *t;
3940 #ifdef CONFIG_TRACER_MAX_TRACE
3941         bool had_max_tr;
3942 #endif
3943         int ret = 0;
3944
3945         mutex_lock(&trace_types_lock);
3946
3947         if (!ring_buffer_expanded) {
3948                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3949                                                 RING_BUFFER_ALL_CPUS);
3950                 if (ret < 0)
3951                         goto out;
3952                 ret = 0;
3953         }
3954
3955         for (t = trace_types; t; t = t->next) {
3956                 if (strcmp(t->name, buf) == 0)
3957                         break;
3958         }
3959         if (!t) {
3960                 ret = -EINVAL;
3961                 goto out;
3962         }
3963         if (t == tr->current_trace)
3964                 goto out;
3965
3966         /* Some tracers are only allowed for the top level buffer */
3967         if (!trace_ok_for_array(t, tr)) {
3968                 ret = -EINVAL;
3969                 goto out;
3970         }
3971
3972         trace_branch_disable();
3973
3974         tr->current_trace->enabled--;
3975
3976         if (tr->current_trace->reset)
3977                 tr->current_trace->reset(tr);
3978
3979         /* Current trace needs to be nop_trace before synchronize_sched */
3980         tr->current_trace = &nop_trace;
3981
3982 #ifdef CONFIG_TRACER_MAX_TRACE
3983         had_max_tr = tr->allocated_snapshot;
3984
3985         if (had_max_tr && !t->use_max_tr) {
3986                 /*
3987                  * We need to make sure that the update_max_tr sees that
3988                  * current_trace changed to nop_trace to keep it from
3989                  * swapping the buffers after we resize it.
3990                  * The update_max_tr is called from interrupts disabled
3991                  * so a synchronized_sched() is sufficient.
3992                  */
3993                 synchronize_sched();
3994                 free_snapshot(tr);
3995         }
3996 #endif
3997         /* Currently, only the top instance has options */
3998         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
3999                 destroy_trace_option_files(topts);
4000                 topts = create_trace_option_files(tr, t);
4001         }
4002
4003 #ifdef CONFIG_TRACER_MAX_TRACE
4004         if (t->use_max_tr && !had_max_tr) {
4005                 ret = alloc_snapshot(tr);
4006                 if (ret < 0)
4007                         goto out;
4008         }
4009 #endif
4010
4011         if (t->init) {
4012                 ret = tracer_init(t, tr);
4013                 if (ret)
4014                         goto out;
4015         }
4016
4017         tr->current_trace = t;
4018         tr->current_trace->enabled++;
4019         trace_branch_enable(tr);
4020  out:
4021         mutex_unlock(&trace_types_lock);
4022
4023         return ret;
4024 }
4025
4026 static ssize_t
4027 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4028                         size_t cnt, loff_t *ppos)
4029 {
4030         struct trace_array *tr = filp->private_data;
4031         char buf[MAX_TRACER_SIZE+1];
4032         int i;
4033         size_t ret;
4034         int err;
4035
4036         ret = cnt;
4037
4038         if (cnt > MAX_TRACER_SIZE)
4039                 cnt = MAX_TRACER_SIZE;
4040
4041         if (copy_from_user(&buf, ubuf, cnt))
4042                 return -EFAULT;
4043
4044         buf[cnt] = 0;
4045
4046         /* strip ending whitespace. */
4047         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4048                 buf[i] = 0;
4049
4050         err = tracing_set_tracer(tr, buf);
4051         if (err)
4052                 return err;
4053
4054         *ppos += ret;
4055
4056         return ret;
4057 }
4058
4059 static ssize_t
4060 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4061                      size_t cnt, loff_t *ppos)
4062 {
4063         unsigned long *ptr = filp->private_data;
4064         char buf[64];
4065         int r;
4066
4067         r = snprintf(buf, sizeof(buf), "%ld\n",
4068                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4069         if (r > sizeof(buf))
4070                 r = sizeof(buf);
4071         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4072 }
4073
4074 static ssize_t
4075 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4076                       size_t cnt, loff_t *ppos)
4077 {
4078         unsigned long *ptr = filp->private_data;
4079         unsigned long val;
4080         int ret;
4081
4082         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4083         if (ret)
4084                 return ret;
4085
4086         *ptr = val * 1000;
4087
4088         return cnt;
4089 }
4090
4091 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4092 {
4093         struct trace_array *tr = inode->i_private;
4094         struct trace_iterator *iter;
4095         int ret = 0;
4096
4097         if (tracing_disabled)
4098                 return -ENODEV;
4099
4100         if (trace_array_get(tr) < 0)
4101                 return -ENODEV;
4102
4103         mutex_lock(&trace_types_lock);
4104
4105         /* create a buffer to store the information to pass to userspace */
4106         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4107         if (!iter) {
4108                 ret = -ENOMEM;
4109                 __trace_array_put(tr);
4110                 goto out;
4111         }
4112
4113         /*
4114          * We make a copy of the current tracer to avoid concurrent
4115          * changes on it while we are reading.
4116          */
4117         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4118         if (!iter->trace) {
4119                 ret = -ENOMEM;
4120                 goto fail;
4121         }
4122         *iter->trace = *tr->current_trace;
4123
4124         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4125                 ret = -ENOMEM;
4126                 goto fail;
4127         }
4128
4129         /* trace pipe does not show start of buffer */
4130         cpumask_setall(iter->started);
4131
4132         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4133                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4134
4135         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4136         if (trace_clocks[tr->clock_id].in_ns)
4137                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4138
4139         iter->tr = tr;
4140         iter->trace_buffer = &tr->trace_buffer;
4141         iter->cpu_file = tracing_get_cpu(inode);
4142         mutex_init(&iter->mutex);
4143         filp->private_data = iter;
4144
4145         if (iter->trace->pipe_open)
4146                 iter->trace->pipe_open(iter);
4147
4148         nonseekable_open(inode, filp);
4149 out:
4150         mutex_unlock(&trace_types_lock);
4151         return ret;
4152
4153 fail:
4154         kfree(iter->trace);
4155         kfree(iter);
4156         __trace_array_put(tr);
4157         mutex_unlock(&trace_types_lock);
4158         return ret;
4159 }
4160
4161 static int tracing_release_pipe(struct inode *inode, struct file *file)
4162 {
4163         struct trace_iterator *iter = file->private_data;
4164         struct trace_array *tr = inode->i_private;
4165
4166         mutex_lock(&trace_types_lock);
4167
4168         if (iter->trace->pipe_close)
4169                 iter->trace->pipe_close(iter);
4170
4171         mutex_unlock(&trace_types_lock);
4172
4173         free_cpumask_var(iter->started);
4174         mutex_destroy(&iter->mutex);
4175         kfree(iter->trace);
4176         kfree(iter);
4177
4178         trace_array_put(tr);
4179
4180         return 0;
4181 }
4182
4183 static unsigned int
4184 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4185 {
4186         /* Iterators are static, they should be filled or empty */
4187         if (trace_buffer_iter(iter, iter->cpu_file))
4188                 return POLLIN | POLLRDNORM;
4189
4190         if (trace_flags & TRACE_ITER_BLOCK)
4191                 /*
4192                  * Always select as readable when in blocking mode
4193                  */
4194                 return POLLIN | POLLRDNORM;
4195         else
4196                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4197                                              filp, poll_table);
4198 }
4199
4200 static unsigned int
4201 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4202 {
4203         struct trace_iterator *iter = filp->private_data;
4204
4205         return trace_poll(iter, filp, poll_table);
4206 }
4207
4208 /* Must be called with trace_types_lock mutex held. */
4209 static int tracing_wait_pipe(struct file *filp)
4210 {
4211         struct trace_iterator *iter = filp->private_data;
4212
4213         while (trace_empty(iter)) {
4214
4215                 if ((filp->f_flags & O_NONBLOCK)) {
4216                         return -EAGAIN;
4217                 }
4218
4219                 /*
4220                  * We block until we read something and tracing is disabled.
4221                  * We still block if tracing is disabled, but we have never
4222                  * read anything. This allows a user to cat this file, and
4223                  * then enable tracing. But after we have read something,
4224                  * we give an EOF when tracing is again disabled.
4225                  *
4226                  * iter->pos will be 0 if we haven't read anything.
4227                  */
4228                 if (!tracing_is_on() && iter->pos)
4229                         break;
4230
4231                 mutex_unlock(&iter->mutex);
4232
4233                 wait_on_pipe(iter);
4234
4235                 mutex_lock(&iter->mutex);
4236
4237                 if (signal_pending(current))
4238                         return -EINTR;
4239         }
4240
4241         return 1;
4242 }
4243
4244 /*
4245  * Consumer reader.
4246  */
4247 static ssize_t
4248 tracing_read_pipe(struct file *filp, char __user *ubuf,
4249                   size_t cnt, loff_t *ppos)
4250 {
4251         struct trace_iterator *iter = filp->private_data;
4252         struct trace_array *tr = iter->tr;
4253         ssize_t sret;
4254
4255         /* return any leftover data */
4256         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4257         if (sret != -EBUSY)
4258                 return sret;
4259
4260         trace_seq_init(&iter->seq);
4261
4262         /* copy the tracer to avoid using a global lock all around */
4263         mutex_lock(&trace_types_lock);
4264         if (unlikely(iter->trace->name != tr->current_trace->name))
4265                 *iter->trace = *tr->current_trace;
4266         mutex_unlock(&trace_types_lock);
4267
4268         /*
4269          * Avoid more than one consumer on a single file descriptor
4270          * This is just a matter of traces coherency, the ring buffer itself
4271          * is protected.
4272          */
4273         mutex_lock(&iter->mutex);
4274         if (iter->trace->read) {
4275                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4276                 if (sret)
4277                         goto out;
4278         }
4279
4280 waitagain:
4281         sret = tracing_wait_pipe(filp);
4282         if (sret <= 0)
4283                 goto out;
4284
4285         /* stop when tracing is finished */
4286         if (trace_empty(iter)) {
4287                 sret = 0;
4288                 goto out;
4289         }
4290
4291         if (cnt >= PAGE_SIZE)
4292                 cnt = PAGE_SIZE - 1;
4293
4294         /* reset all but tr, trace, and overruns */
4295         memset(&iter->seq, 0,
4296                sizeof(struct trace_iterator) -
4297                offsetof(struct trace_iterator, seq));
4298         cpumask_clear(iter->started);
4299         iter->pos = -1;
4300
4301         trace_event_read_lock();
4302         trace_access_lock(iter->cpu_file);
4303         while (trace_find_next_entry_inc(iter) != NULL) {
4304                 enum print_line_t ret;
4305                 int len = iter->seq.len;
4306
4307                 ret = print_trace_line(iter);
4308                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4309                         /* don't print partial lines */
4310                         iter->seq.len = len;
4311                         break;
4312                 }
4313                 if (ret != TRACE_TYPE_NO_CONSUME)
4314                         trace_consume(iter);
4315
4316                 if (iter->seq.len >= cnt)
4317                         break;
4318
4319                 /*
4320                  * Setting the full flag means we reached the trace_seq buffer
4321                  * size and we should leave by partial output condition above.
4322                  * One of the trace_seq_* functions is not used properly.
4323                  */
4324                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4325                           iter->ent->type);
4326         }
4327         trace_access_unlock(iter->cpu_file);
4328         trace_event_read_unlock();
4329
4330         /* Now copy what we have to the user */
4331         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4332         if (iter->seq.readpos >= iter->seq.len)
4333                 trace_seq_init(&iter->seq);
4334
4335         /*
4336          * If there was nothing to send to user, in spite of consuming trace
4337          * entries, go back to wait for more entries.
4338          */
4339         if (sret == -EBUSY)
4340                 goto waitagain;
4341
4342 out:
4343         mutex_unlock(&iter->mutex);
4344
4345         return sret;
4346 }
4347
4348 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4349                                      unsigned int idx)
4350 {
4351         __free_page(spd->pages[idx]);
4352 }
4353
4354 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4355         .can_merge              = 0,
4356         .confirm                = generic_pipe_buf_confirm,
4357         .release                = generic_pipe_buf_release,
4358         .steal                  = generic_pipe_buf_steal,
4359         .get                    = generic_pipe_buf_get,
4360 };
4361
4362 static size_t
4363 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4364 {
4365         size_t count;
4366         int ret;
4367
4368         /* Seq buffer is page-sized, exactly what we need. */
4369         for (;;) {
4370                 count = iter->seq.len;
4371                 ret = print_trace_line(iter);
4372                 count = iter->seq.len - count;
4373                 if (rem < count) {
4374                         rem = 0;
4375                         iter->seq.len -= count;
4376                         break;
4377                 }
4378                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4379                         iter->seq.len -= count;
4380                         break;
4381                 }
4382
4383                 if (ret != TRACE_TYPE_NO_CONSUME)
4384                         trace_consume(iter);
4385                 rem -= count;
4386                 if (!trace_find_next_entry_inc(iter))   {
4387                         rem = 0;
4388                         iter->ent = NULL;
4389                         break;
4390                 }
4391         }
4392
4393         return rem;
4394 }
4395
4396 static ssize_t tracing_splice_read_pipe(struct file *filp,
4397                                         loff_t *ppos,
4398                                         struct pipe_inode_info *pipe,
4399                                         size_t len,
4400                                         unsigned int flags)
4401 {
4402         struct page *pages_def[PIPE_DEF_BUFFERS];
4403         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4404         struct trace_iterator *iter = filp->private_data;
4405         struct splice_pipe_desc spd = {
4406                 .pages          = pages_def,
4407                 .partial        = partial_def,
4408                 .nr_pages       = 0, /* This gets updated below. */
4409                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4410                 .flags          = flags,
4411                 .ops            = &tracing_pipe_buf_ops,
4412                 .spd_release    = tracing_spd_release_pipe,
4413         };
4414         struct trace_array *tr = iter->tr;
4415         ssize_t ret;
4416         size_t rem;
4417         unsigned int i;
4418
4419         if (splice_grow_spd(pipe, &spd))
4420                 return -ENOMEM;
4421
4422         /* copy the tracer to avoid using a global lock all around */
4423         mutex_lock(&trace_types_lock);
4424         if (unlikely(iter->trace->name != tr->current_trace->name))
4425                 *iter->trace = *tr->current_trace;
4426         mutex_unlock(&trace_types_lock);
4427
4428         mutex_lock(&iter->mutex);
4429
4430         if (iter->trace->splice_read) {
4431                 ret = iter->trace->splice_read(iter, filp,
4432                                                ppos, pipe, len, flags);
4433                 if (ret)
4434                         goto out_err;
4435         }
4436
4437         ret = tracing_wait_pipe(filp);
4438         if (ret <= 0)
4439                 goto out_err;
4440
4441         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4442                 ret = -EFAULT;
4443                 goto out_err;
4444         }
4445
4446         trace_event_read_lock();
4447         trace_access_lock(iter->cpu_file);
4448
4449         /* Fill as many pages as possible. */
4450         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4451                 spd.pages[i] = alloc_page(GFP_KERNEL);
4452                 if (!spd.pages[i])
4453                         break;
4454
4455                 rem = tracing_fill_pipe_page(rem, iter);
4456
4457                 /* Copy the data into the page, so we can start over. */
4458                 ret = trace_seq_to_buffer(&iter->seq,
4459                                           page_address(spd.pages[i]),
4460                                           iter->seq.len);
4461                 if (ret < 0) {
4462                         __free_page(spd.pages[i]);
4463                         break;
4464                 }
4465                 spd.partial[i].offset = 0;
4466                 spd.partial[i].len = iter->seq.len;
4467
4468                 trace_seq_init(&iter->seq);
4469         }
4470
4471         trace_access_unlock(iter->cpu_file);
4472         trace_event_read_unlock();
4473         mutex_unlock(&iter->mutex);
4474
4475         spd.nr_pages = i;
4476
4477         ret = splice_to_pipe(pipe, &spd);
4478 out:
4479         splice_shrink_spd(&spd);
4480         return ret;
4481
4482 out_err:
4483         mutex_unlock(&iter->mutex);
4484         goto out;
4485 }
4486
4487 static ssize_t
4488 tracing_entries_read(struct file *filp, char __user *ubuf,
4489                      size_t cnt, loff_t *ppos)
4490 {
4491         struct inode *inode = file_inode(filp);
4492         struct trace_array *tr = inode->i_private;
4493         int cpu = tracing_get_cpu(inode);
4494         char buf[64];
4495         int r = 0;
4496         ssize_t ret;
4497
4498         mutex_lock(&trace_types_lock);
4499
4500         if (cpu == RING_BUFFER_ALL_CPUS) {
4501                 int cpu, buf_size_same;
4502                 unsigned long size;
4503
4504                 size = 0;
4505                 buf_size_same = 1;
4506                 /* check if all cpu sizes are same */
4507                 for_each_tracing_cpu(cpu) {
4508                         /* fill in the size from first enabled cpu */
4509                         if (size == 0)
4510                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4511                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4512                                 buf_size_same = 0;
4513                                 break;
4514                         }
4515                 }
4516
4517                 if (buf_size_same) {
4518                         if (!ring_buffer_expanded)
4519                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4520                                             size >> 10,
4521                                             trace_buf_size >> 10);
4522                         else
4523                                 r = sprintf(buf, "%lu\n", size >> 10);
4524                 } else
4525                         r = sprintf(buf, "X\n");
4526         } else
4527                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4528
4529         mutex_unlock(&trace_types_lock);
4530
4531         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4532         return ret;
4533 }
4534
4535 static ssize_t
4536 tracing_entries_write(struct file *filp, const char __user *ubuf,
4537                       size_t cnt, loff_t *ppos)
4538 {
4539         struct inode *inode = file_inode(filp);
4540         struct trace_array *tr = inode->i_private;
4541         unsigned long val;
4542         int ret;
4543
4544         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4545         if (ret)
4546                 return ret;
4547
4548         /* must have at least 1 entry */
4549         if (!val)
4550                 return -EINVAL;
4551
4552         /* value is in KB */
4553         val <<= 10;
4554         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4555         if (ret < 0)
4556                 return ret;
4557
4558         *ppos += cnt;
4559
4560         return cnt;
4561 }
4562
4563 static ssize_t
4564 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4565                                 size_t cnt, loff_t *ppos)
4566 {
4567         struct trace_array *tr = filp->private_data;
4568         char buf[64];
4569         int r, cpu;
4570         unsigned long size = 0, expanded_size = 0;
4571
4572         mutex_lock(&trace_types_lock);
4573         for_each_tracing_cpu(cpu) {
4574                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4575                 if (!ring_buffer_expanded)
4576                         expanded_size += trace_buf_size >> 10;
4577         }
4578         if (ring_buffer_expanded)
4579                 r = sprintf(buf, "%lu\n", size);
4580         else
4581                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4582         mutex_unlock(&trace_types_lock);
4583
4584         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4585 }
4586
4587 static ssize_t
4588 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4589                           size_t cnt, loff_t *ppos)
4590 {
4591         /*
4592          * There is no need to read what the user has written, this function
4593          * is just to make sure that there is no error when "echo" is used
4594          */
4595
4596         *ppos += cnt;
4597
4598         return cnt;
4599 }
4600
4601 static int
4602 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4603 {
4604         struct trace_array *tr = inode->i_private;
4605
4606         /* disable tracing ? */
4607         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4608                 tracer_tracing_off(tr);
4609         /* resize the ring buffer to 0 */
4610         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4611
4612         trace_array_put(tr);
4613
4614         return 0;
4615 }
4616
4617 static ssize_t
4618 tracing_mark_write(struct file *filp, const char __user *ubuf,
4619                                         size_t cnt, loff_t *fpos)
4620 {
4621         unsigned long addr = (unsigned long)ubuf;
4622         struct trace_array *tr = filp->private_data;
4623         struct ring_buffer_event *event;
4624         struct ring_buffer *buffer;
4625         struct print_entry *entry;
4626         unsigned long irq_flags;
4627         struct page *pages[2];
4628         void *map_page[2];
4629         int nr_pages = 1;
4630         ssize_t written;
4631         int offset;
4632         int size;
4633         int len;
4634         int ret;
4635         int i;
4636
4637         if (tracing_disabled)
4638                 return -EINVAL;
4639
4640         if (!(trace_flags & TRACE_ITER_MARKERS))
4641                 return -EINVAL;
4642
4643         if (cnt > TRACE_BUF_SIZE)
4644                 cnt = TRACE_BUF_SIZE;
4645
4646         /*
4647          * Userspace is injecting traces into the kernel trace buffer.
4648          * We want to be as non intrusive as possible.
4649          * To do so, we do not want to allocate any special buffers
4650          * or take any locks, but instead write the userspace data
4651          * straight into the ring buffer.
4652          *
4653          * First we need to pin the userspace buffer into memory,
4654          * which, most likely it is, because it just referenced it.
4655          * But there's no guarantee that it is. By using get_user_pages_fast()
4656          * and kmap_atomic/kunmap_atomic() we can get access to the
4657          * pages directly. We then write the data directly into the
4658          * ring buffer.
4659          */
4660         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4661
4662         /* check if we cross pages */
4663         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4664                 nr_pages = 2;
4665
4666         offset = addr & (PAGE_SIZE - 1);
4667         addr &= PAGE_MASK;
4668
4669         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4670         if (ret < nr_pages) {
4671                 while (--ret >= 0)
4672                         put_page(pages[ret]);
4673                 written = -EFAULT;
4674                 goto out;
4675         }
4676
4677         for (i = 0; i < nr_pages; i++)
4678                 map_page[i] = kmap_atomic(pages[i]);
4679
4680         local_save_flags(irq_flags);
4681         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4682         buffer = tr->trace_buffer.buffer;
4683         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4684                                           irq_flags, preempt_count());
4685         if (!event) {
4686                 /* Ring buffer disabled, return as if not open for write */
4687                 written = -EBADF;
4688                 goto out_unlock;
4689         }
4690
4691         entry = ring_buffer_event_data(event);
4692         entry->ip = _THIS_IP_;
4693
4694         if (nr_pages == 2) {
4695                 len = PAGE_SIZE - offset;
4696                 memcpy(&entry->buf, map_page[0] + offset, len);
4697                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4698         } else
4699                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4700
4701         if (entry->buf[cnt - 1] != '\n') {
4702                 entry->buf[cnt] = '\n';
4703                 entry->buf[cnt + 1] = '\0';
4704         } else
4705                 entry->buf[cnt] = '\0';
4706
4707         __buffer_unlock_commit(buffer, event);
4708
4709         written = cnt;
4710
4711         *fpos += written;
4712
4713  out_unlock:
4714         for (i = 0; i < nr_pages; i++){
4715                 kunmap_atomic(map_page[i]);
4716                 put_page(pages[i]);
4717         }
4718  out:
4719         return written;
4720 }
4721
4722 static int tracing_clock_show(struct seq_file *m, void *v)
4723 {
4724         struct trace_array *tr = m->private;
4725         int i;
4726
4727         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4728                 seq_printf(m,
4729                         "%s%s%s%s", i ? " " : "",
4730                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4731                         i == tr->clock_id ? "]" : "");
4732         seq_putc(m, '\n');
4733
4734         return 0;
4735 }
4736
4737 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4738 {
4739         int i;
4740
4741         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4742                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4743                         break;
4744         }
4745         if (i == ARRAY_SIZE(trace_clocks))
4746                 return -EINVAL;
4747
4748         mutex_lock(&trace_types_lock);
4749
4750         tr->clock_id = i;
4751
4752         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4753
4754         /*
4755          * New clock may not be consistent with the previous clock.
4756          * Reset the buffer so that it doesn't have incomparable timestamps.
4757          */
4758         tracing_reset_online_cpus(&tr->trace_buffer);
4759
4760 #ifdef CONFIG_TRACER_MAX_TRACE
4761         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4762                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4763         tracing_reset_online_cpus(&tr->max_buffer);
4764 #endif
4765
4766         mutex_unlock(&trace_types_lock);
4767
4768         return 0;
4769 }
4770
4771 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4772                                    size_t cnt, loff_t *fpos)
4773 {
4774         struct seq_file *m = filp->private_data;
4775         struct trace_array *tr = m->private;
4776         char buf[64];
4777         const char *clockstr;
4778         int ret;
4779
4780         if (cnt >= sizeof(buf))
4781                 return -EINVAL;
4782
4783         if (copy_from_user(&buf, ubuf, cnt))
4784                 return -EFAULT;
4785
4786         buf[cnt] = 0;
4787
4788         clockstr = strstrip(buf);
4789
4790         ret = tracing_set_clock(tr, clockstr);
4791         if (ret)
4792                 return ret;
4793
4794         *fpos += cnt;
4795
4796         return cnt;
4797 }
4798
4799 static int tracing_clock_open(struct inode *inode, struct file *file)
4800 {
4801         struct trace_array *tr = inode->i_private;
4802         int ret;
4803
4804         if (tracing_disabled)
4805                 return -ENODEV;
4806
4807         if (trace_array_get(tr))
4808                 return -ENODEV;
4809
4810         ret = single_open(file, tracing_clock_show, inode->i_private);
4811         if (ret < 0)
4812                 trace_array_put(tr);
4813
4814         return ret;
4815 }
4816
4817 struct ftrace_buffer_info {
4818         struct trace_iterator   iter;
4819         void                    *spare;
4820         unsigned int            read;
4821 };
4822
4823 #ifdef CONFIG_TRACER_SNAPSHOT
4824 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4825 {
4826         struct trace_array *tr = inode->i_private;
4827         struct trace_iterator *iter;
4828         struct seq_file *m;
4829         int ret = 0;
4830
4831         if (trace_array_get(tr) < 0)
4832                 return -ENODEV;
4833
4834         if (file->f_mode & FMODE_READ) {
4835                 iter = __tracing_open(inode, file, true);
4836                 if (IS_ERR(iter))
4837                         ret = PTR_ERR(iter);
4838         } else {
4839                 /* Writes still need the seq_file to hold the private data */
4840                 ret = -ENOMEM;
4841                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4842                 if (!m)
4843                         goto out;
4844                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4845                 if (!iter) {
4846                         kfree(m);
4847                         goto out;
4848                 }
4849                 ret = 0;
4850
4851                 iter->tr = tr;
4852                 iter->trace_buffer = &tr->max_buffer;
4853                 iter->cpu_file = tracing_get_cpu(inode);
4854                 m->private = iter;
4855                 file->private_data = m;
4856         }
4857 out:
4858         if (ret < 0)
4859                 trace_array_put(tr);
4860
4861         return ret;
4862 }
4863
4864 static ssize_t
4865 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4866                        loff_t *ppos)
4867 {
4868         struct seq_file *m = filp->private_data;
4869         struct trace_iterator *iter = m->private;
4870         struct trace_array *tr = iter->tr;
4871         unsigned long val;
4872         int ret;
4873
4874         ret = tracing_update_buffers();
4875         if (ret < 0)
4876                 return ret;
4877
4878         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4879         if (ret)
4880                 return ret;
4881
4882         mutex_lock(&trace_types_lock);
4883
4884         if (tr->current_trace->use_max_tr) {
4885                 ret = -EBUSY;
4886                 goto out;
4887         }
4888
4889         switch (val) {
4890         case 0:
4891                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4892                         ret = -EINVAL;
4893                         break;
4894                 }
4895                 if (tr->allocated_snapshot)
4896                         free_snapshot(tr);
4897                 break;
4898         case 1:
4899 /* Only allow per-cpu swap if the ring buffer supports it */
4900 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4901                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4902                         ret = -EINVAL;
4903                         break;
4904                 }
4905 #endif
4906                 if (!tr->allocated_snapshot) {
4907                         ret = alloc_snapshot(tr);
4908                         if (ret < 0)
4909                                 break;
4910                 }
4911                 local_irq_disable();
4912                 /* Now, we're going to swap */
4913                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4914                         update_max_tr(tr, current, smp_processor_id());
4915                 else
4916                         update_max_tr_single(tr, current, iter->cpu_file);
4917                 local_irq_enable();
4918                 break;
4919         default:
4920                 if (tr->allocated_snapshot) {
4921                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4922                                 tracing_reset_online_cpus(&tr->max_buffer);
4923                         else
4924                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4925                 }
4926                 break;
4927         }
4928
4929         if (ret >= 0) {
4930                 *ppos += cnt;
4931                 ret = cnt;
4932         }
4933 out:
4934         mutex_unlock(&trace_types_lock);
4935         return ret;
4936 }
4937
4938 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4939 {
4940         struct seq_file *m = file->private_data;
4941         int ret;
4942
4943         ret = tracing_release(inode, file);
4944
4945         if (file->f_mode & FMODE_READ)
4946                 return ret;
4947
4948         /* If write only, the seq_file is just a stub */
4949         if (m)
4950                 kfree(m->private);
4951         kfree(m);
4952
4953         return 0;
4954 }
4955
4956 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4957 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4958                                     size_t count, loff_t *ppos);
4959 static int tracing_buffers_release(struct inode *inode, struct file *file);
4960 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4961                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4962
4963 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4964 {
4965         struct ftrace_buffer_info *info;
4966         int ret;
4967
4968         ret = tracing_buffers_open(inode, filp);
4969         if (ret < 0)
4970                 return ret;
4971
4972         info = filp->private_data;
4973
4974         if (info->iter.trace->use_max_tr) {
4975                 tracing_buffers_release(inode, filp);
4976                 return -EBUSY;
4977         }
4978
4979         info->iter.snapshot = true;
4980         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4981
4982         return ret;
4983 }
4984
4985 #endif /* CONFIG_TRACER_SNAPSHOT */
4986
4987
4988 static const struct file_operations tracing_max_lat_fops = {
4989         .open           = tracing_open_generic,
4990         .read           = tracing_max_lat_read,
4991         .write          = tracing_max_lat_write,
4992         .llseek         = generic_file_llseek,
4993 };
4994
4995 static const struct file_operations set_tracer_fops = {
4996         .open           = tracing_open_generic,
4997         .read           = tracing_set_trace_read,
4998         .write          = tracing_set_trace_write,
4999         .llseek         = generic_file_llseek,
5000 };
5001
5002 static const struct file_operations tracing_pipe_fops = {
5003         .open           = tracing_open_pipe,
5004         .poll           = tracing_poll_pipe,
5005         .read           = tracing_read_pipe,
5006         .splice_read    = tracing_splice_read_pipe,
5007         .release        = tracing_release_pipe,
5008         .llseek         = no_llseek,
5009 };
5010
5011 static const struct file_operations tracing_entries_fops = {
5012         .open           = tracing_open_generic_tr,
5013         .read           = tracing_entries_read,
5014         .write          = tracing_entries_write,
5015         .llseek         = generic_file_llseek,
5016         .release        = tracing_release_generic_tr,
5017 };
5018
5019 static const struct file_operations tracing_total_entries_fops = {
5020         .open           = tracing_open_generic_tr,
5021         .read           = tracing_total_entries_read,
5022         .llseek         = generic_file_llseek,
5023         .release        = tracing_release_generic_tr,
5024 };
5025
5026 static const struct file_operations tracing_free_buffer_fops = {
5027         .open           = tracing_open_generic_tr,
5028         .write          = tracing_free_buffer_write,
5029         .release        = tracing_free_buffer_release,
5030 };
5031
5032 static const struct file_operations tracing_mark_fops = {
5033         .open           = tracing_open_generic_tr,
5034         .write          = tracing_mark_write,
5035         .llseek         = generic_file_llseek,
5036         .release        = tracing_release_generic_tr,
5037 };
5038
5039 static const struct file_operations trace_clock_fops = {
5040         .open           = tracing_clock_open,
5041         .read           = seq_read,
5042         .llseek         = seq_lseek,
5043         .release        = tracing_single_release_tr,
5044         .write          = tracing_clock_write,
5045 };
5046
5047 #ifdef CONFIG_TRACER_SNAPSHOT
5048 static const struct file_operations snapshot_fops = {
5049         .open           = tracing_snapshot_open,
5050         .read           = seq_read,
5051         .write          = tracing_snapshot_write,
5052         .llseek         = tracing_lseek,
5053         .release        = tracing_snapshot_release,
5054 };
5055
5056 static const struct file_operations snapshot_raw_fops = {
5057         .open           = snapshot_raw_open,
5058         .read           = tracing_buffers_read,
5059         .release        = tracing_buffers_release,
5060         .splice_read    = tracing_buffers_splice_read,
5061         .llseek         = no_llseek,
5062 };
5063
5064 #endif /* CONFIG_TRACER_SNAPSHOT */
5065
5066 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5067 {
5068         struct trace_array *tr = inode->i_private;
5069         struct ftrace_buffer_info *info;
5070         int ret;
5071
5072         if (tracing_disabled)
5073                 return -ENODEV;
5074
5075         if (trace_array_get(tr) < 0)
5076                 return -ENODEV;
5077
5078         info = kzalloc(sizeof(*info), GFP_KERNEL);
5079         if (!info) {
5080                 trace_array_put(tr);
5081                 return -ENOMEM;
5082         }
5083
5084         mutex_lock(&trace_types_lock);
5085
5086         info->iter.tr           = tr;
5087         info->iter.cpu_file     = tracing_get_cpu(inode);
5088         info->iter.trace        = tr->current_trace;
5089         info->iter.trace_buffer = &tr->trace_buffer;
5090         info->spare             = NULL;
5091         /* Force reading ring buffer for first read */
5092         info->read              = (unsigned int)-1;
5093
5094         filp->private_data = info;
5095
5096         mutex_unlock(&trace_types_lock);
5097
5098         ret = nonseekable_open(inode, filp);
5099         if (ret < 0)
5100                 trace_array_put(tr);
5101
5102         return ret;
5103 }
5104
5105 static unsigned int
5106 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5107 {
5108         struct ftrace_buffer_info *info = filp->private_data;
5109         struct trace_iterator *iter = &info->iter;
5110
5111         return trace_poll(iter, filp, poll_table);
5112 }
5113
5114 static ssize_t
5115 tracing_buffers_read(struct file *filp, char __user *ubuf,
5116                      size_t count, loff_t *ppos)
5117 {
5118         struct ftrace_buffer_info *info = filp->private_data;
5119         struct trace_iterator *iter = &info->iter;
5120         ssize_t ret;
5121         ssize_t size;
5122
5123         if (!count)
5124                 return 0;
5125
5126         mutex_lock(&trace_types_lock);
5127
5128 #ifdef CONFIG_TRACER_MAX_TRACE
5129         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5130                 size = -EBUSY;
5131                 goto out_unlock;
5132         }
5133 #endif
5134
5135         if (!info->spare)
5136                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5137                                                           iter->cpu_file);
5138         size = -ENOMEM;
5139         if (!info->spare)
5140                 goto out_unlock;
5141
5142         /* Do we have previous read data to read? */
5143         if (info->read < PAGE_SIZE)
5144                 goto read;
5145
5146  again:
5147         trace_access_lock(iter->cpu_file);
5148         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5149                                     &info->spare,
5150                                     count,
5151                                     iter->cpu_file, 0);
5152         trace_access_unlock(iter->cpu_file);
5153
5154         if (ret < 0) {
5155                 if (trace_empty(iter)) {
5156                         if ((filp->f_flags & O_NONBLOCK)) {
5157                                 size = -EAGAIN;
5158                                 goto out_unlock;
5159                         }
5160                         mutex_unlock(&trace_types_lock);
5161                         wait_on_pipe(iter);
5162                         mutex_lock(&trace_types_lock);
5163                         if (signal_pending(current)) {
5164                                 size = -EINTR;
5165                                 goto out_unlock;
5166                         }
5167                         goto again;
5168                 }
5169                 size = 0;
5170                 goto out_unlock;
5171         }
5172
5173         info->read = 0;
5174  read:
5175         size = PAGE_SIZE - info->read;
5176         if (size > count)
5177                 size = count;
5178
5179         ret = copy_to_user(ubuf, info->spare + info->read, size);
5180         if (ret == size) {
5181                 size = -EFAULT;
5182                 goto out_unlock;
5183         }
5184         size -= ret;
5185
5186         *ppos += size;
5187         info->read += size;
5188
5189  out_unlock:
5190         mutex_unlock(&trace_types_lock);
5191
5192         return size;
5193 }
5194
5195 static int tracing_buffers_release(struct inode *inode, struct file *file)
5196 {
5197         struct ftrace_buffer_info *info = file->private_data;
5198         struct trace_iterator *iter = &info->iter;
5199
5200         mutex_lock(&trace_types_lock);
5201
5202         __trace_array_put(iter->tr);
5203
5204         if (info->spare)
5205                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5206         kfree(info);
5207
5208         mutex_unlock(&trace_types_lock);
5209
5210         return 0;
5211 }
5212
5213 struct buffer_ref {
5214         struct ring_buffer      *buffer;
5215         void                    *page;
5216         int                     ref;
5217 };
5218
5219 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5220                                     struct pipe_buffer *buf)
5221 {
5222         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5223
5224         if (--ref->ref)
5225                 return;
5226
5227         ring_buffer_free_read_page(ref->buffer, ref->page);
5228         kfree(ref);
5229         buf->private = 0;
5230 }
5231
5232 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5233                                 struct pipe_buffer *buf)
5234 {
5235         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5236
5237         ref->ref++;
5238 }
5239
5240 /* Pipe buffer operations for a buffer. */
5241 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5242         .can_merge              = 0,
5243         .confirm                = generic_pipe_buf_confirm,
5244         .release                = buffer_pipe_buf_release,
5245         .steal                  = generic_pipe_buf_steal,
5246         .get                    = buffer_pipe_buf_get,
5247 };
5248
5249 /*
5250  * Callback from splice_to_pipe(), if we need to release some pages
5251  * at the end of the spd in case we error'ed out in filling the pipe.
5252  */
5253 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5254 {
5255         struct buffer_ref *ref =
5256                 (struct buffer_ref *)spd->partial[i].private;
5257
5258         if (--ref->ref)
5259                 return;
5260
5261         ring_buffer_free_read_page(ref->buffer, ref->page);
5262         kfree(ref);
5263         spd->partial[i].private = 0;
5264 }
5265
5266 static ssize_t
5267 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5268                             struct pipe_inode_info *pipe, size_t len,
5269                             unsigned int flags)
5270 {
5271         struct ftrace_buffer_info *info = file->private_data;
5272         struct trace_iterator *iter = &info->iter;
5273         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5274         struct page *pages_def[PIPE_DEF_BUFFERS];
5275         struct splice_pipe_desc spd = {
5276                 .pages          = pages_def,
5277                 .partial        = partial_def,
5278                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5279                 .flags          = flags,
5280                 .ops            = &buffer_pipe_buf_ops,
5281                 .spd_release    = buffer_spd_release,
5282         };
5283         struct buffer_ref *ref;
5284         int entries, size, i;
5285         ssize_t ret;
5286
5287         mutex_lock(&trace_types_lock);
5288
5289 #ifdef CONFIG_TRACER_MAX_TRACE
5290         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5291                 ret = -EBUSY;
5292                 goto out;
5293         }
5294 #endif
5295
5296         if (splice_grow_spd(pipe, &spd)) {
5297                 ret = -ENOMEM;
5298                 goto out;
5299         }
5300
5301         if (*ppos & (PAGE_SIZE - 1)) {
5302                 ret = -EINVAL;
5303                 goto out;
5304         }
5305
5306         if (len & (PAGE_SIZE - 1)) {
5307                 if (len < PAGE_SIZE) {
5308                         ret = -EINVAL;
5309                         goto out;
5310                 }
5311                 len &= PAGE_MASK;
5312         }
5313
5314  again:
5315         trace_access_lock(iter->cpu_file);
5316         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5317
5318         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5319                 struct page *page;
5320                 int r;
5321
5322                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5323                 if (!ref)
5324                         break;
5325
5326                 ref->ref = 1;
5327                 ref->buffer = iter->trace_buffer->buffer;
5328                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5329                 if (!ref->page) {
5330                         kfree(ref);
5331                         break;
5332                 }
5333
5334                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5335                                           len, iter->cpu_file, 1);
5336                 if (r < 0) {
5337                         ring_buffer_free_read_page(ref->buffer, ref->page);
5338                         kfree(ref);
5339                         break;
5340                 }
5341
5342                 /*
5343                  * zero out any left over data, this is going to
5344                  * user land.
5345                  */
5346                 size = ring_buffer_page_len(ref->page);
5347                 if (size < PAGE_SIZE)
5348                         memset(ref->page + size, 0, PAGE_SIZE - size);
5349
5350                 page = virt_to_page(ref->page);
5351
5352                 spd.pages[i] = page;
5353                 spd.partial[i].len = PAGE_SIZE;
5354                 spd.partial[i].offset = 0;
5355                 spd.partial[i].private = (unsigned long)ref;
5356                 spd.nr_pages++;
5357                 *ppos += PAGE_SIZE;
5358
5359                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5360         }
5361
5362         trace_access_unlock(iter->cpu_file);
5363         spd.nr_pages = i;
5364
5365         /* did we read anything? */
5366         if (!spd.nr_pages) {
5367                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5368                         ret = -EAGAIN;
5369                         goto out;
5370                 }
5371                 mutex_unlock(&trace_types_lock);
5372                 wait_on_pipe(iter);
5373                 mutex_lock(&trace_types_lock);
5374                 if (signal_pending(current)) {
5375                         ret = -EINTR;
5376                         goto out;
5377                 }
5378                 goto again;
5379         }
5380
5381         ret = splice_to_pipe(pipe, &spd);
5382         splice_shrink_spd(&spd);
5383 out:
5384         mutex_unlock(&trace_types_lock);
5385
5386         return ret;
5387 }
5388
5389 static const struct file_operations tracing_buffers_fops = {
5390         .open           = tracing_buffers_open,
5391         .read           = tracing_buffers_read,
5392         .poll           = tracing_buffers_poll,
5393         .release        = tracing_buffers_release,
5394         .splice_read    = tracing_buffers_splice_read,
5395         .llseek         = no_llseek,
5396 };
5397
5398 static ssize_t
5399 tracing_stats_read(struct file *filp, char __user *ubuf,
5400                    size_t count, loff_t *ppos)
5401 {
5402         struct inode *inode = file_inode(filp);
5403         struct trace_array *tr = inode->i_private;
5404         struct trace_buffer *trace_buf = &tr->trace_buffer;
5405         int cpu = tracing_get_cpu(inode);
5406         struct trace_seq *s;
5407         unsigned long cnt;
5408         unsigned long long t;
5409         unsigned long usec_rem;
5410
5411         s = kmalloc(sizeof(*s), GFP_KERNEL);
5412         if (!s)
5413                 return -ENOMEM;
5414
5415         trace_seq_init(s);
5416
5417         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5418         trace_seq_printf(s, "entries: %ld\n", cnt);
5419
5420         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5421         trace_seq_printf(s, "overrun: %ld\n", cnt);
5422
5423         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5424         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5425
5426         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5427         trace_seq_printf(s, "bytes: %ld\n", cnt);
5428
5429         if (trace_clocks[tr->clock_id].in_ns) {
5430                 /* local or global for trace_clock */
5431                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5432                 usec_rem = do_div(t, USEC_PER_SEC);
5433                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5434                                                                 t, usec_rem);
5435
5436                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5437                 usec_rem = do_div(t, USEC_PER_SEC);
5438                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5439         } else {
5440                 /* counter or tsc mode for trace_clock */
5441                 trace_seq_printf(s, "oldest event ts: %llu\n",
5442                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5443
5444                 trace_seq_printf(s, "now ts: %llu\n",
5445                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5446         }
5447
5448         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5449         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5450
5451         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5452         trace_seq_printf(s, "read events: %ld\n", cnt);
5453
5454         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5455
5456         kfree(s);
5457
5458         return count;
5459 }
5460
5461 static const struct file_operations tracing_stats_fops = {
5462         .open           = tracing_open_generic_tr,
5463         .read           = tracing_stats_read,
5464         .llseek         = generic_file_llseek,
5465         .release        = tracing_release_generic_tr,
5466 };
5467
5468 #ifdef CONFIG_DYNAMIC_FTRACE
5469
5470 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5471 {
5472         return 0;
5473 }
5474
5475 static ssize_t
5476 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5477                   size_t cnt, loff_t *ppos)
5478 {
5479         static char ftrace_dyn_info_buffer[1024];
5480         static DEFINE_MUTEX(dyn_info_mutex);
5481         unsigned long *p = filp->private_data;
5482         char *buf = ftrace_dyn_info_buffer;
5483         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5484         int r;
5485
5486         mutex_lock(&dyn_info_mutex);
5487         r = sprintf(buf, "%ld ", *p);
5488
5489         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5490         buf[r++] = '\n';
5491
5492         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5493
5494         mutex_unlock(&dyn_info_mutex);
5495
5496         return r;
5497 }
5498
5499 static const struct file_operations tracing_dyn_info_fops = {
5500         .open           = tracing_open_generic,
5501         .read           = tracing_read_dyn_info,
5502         .llseek         = generic_file_llseek,
5503 };
5504 #endif /* CONFIG_DYNAMIC_FTRACE */
5505
5506 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5507 static void
5508 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5509 {
5510         tracing_snapshot();
5511 }
5512
5513 static void
5514 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5515 {
5516         unsigned long *count = (long *)data;
5517
5518         if (!*count)
5519                 return;
5520
5521         if (*count != -1)
5522                 (*count)--;
5523
5524         tracing_snapshot();
5525 }
5526
5527 static int
5528 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5529                       struct ftrace_probe_ops *ops, void *data)
5530 {
5531         long count = (long)data;
5532
5533         seq_printf(m, "%ps:", (void *)ip);
5534
5535         seq_printf(m, "snapshot");
5536
5537         if (count == -1)
5538                 seq_printf(m, ":unlimited\n");
5539         else
5540                 seq_printf(m, ":count=%ld\n", count);
5541
5542         return 0;
5543 }
5544
5545 static struct ftrace_probe_ops snapshot_probe_ops = {
5546         .func                   = ftrace_snapshot,
5547         .print                  = ftrace_snapshot_print,
5548 };
5549
5550 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5551         .func                   = ftrace_count_snapshot,
5552         .print                  = ftrace_snapshot_print,
5553 };
5554
5555 static int
5556 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5557                                char *glob, char *cmd, char *param, int enable)
5558 {
5559         struct ftrace_probe_ops *ops;
5560         void *count = (void *)-1;
5561         char *number;
5562         int ret;
5563
5564         /* hash funcs only work with set_ftrace_filter */
5565         if (!enable)
5566                 return -EINVAL;
5567
5568         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5569
5570         if (glob[0] == '!') {
5571                 unregister_ftrace_function_probe_func(glob+1, ops);
5572                 return 0;
5573         }
5574
5575         if (!param)
5576                 goto out_reg;
5577
5578         number = strsep(&param, ":");
5579
5580         if (!strlen(number))
5581                 goto out_reg;
5582
5583         /*
5584          * We use the callback data field (which is a pointer)
5585          * as our counter.
5586          */
5587         ret = kstrtoul(number, 0, (unsigned long *)&count);
5588         if (ret)
5589                 return ret;
5590
5591  out_reg:
5592         ret = register_ftrace_function_probe(glob, ops, count);
5593
5594         if (ret >= 0)
5595                 alloc_snapshot(&global_trace);
5596
5597         return ret < 0 ? ret : 0;
5598 }
5599
5600 static struct ftrace_func_command ftrace_snapshot_cmd = {
5601         .name                   = "snapshot",
5602         .func                   = ftrace_trace_snapshot_callback,
5603 };
5604
5605 static __init int register_snapshot_cmd(void)
5606 {
5607         return register_ftrace_command(&ftrace_snapshot_cmd);
5608 }
5609 #else
5610 static inline __init int register_snapshot_cmd(void) { return 0; }
5611 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5612
5613 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5614 {
5615         if (tr->dir)
5616                 return tr->dir;
5617
5618         if (!debugfs_initialized())
5619                 return NULL;
5620
5621         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5622                 tr->dir = debugfs_create_dir("tracing", NULL);
5623
5624         if (!tr->dir)
5625                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5626
5627         return tr->dir;
5628 }
5629
5630 struct dentry *tracing_init_dentry(void)
5631 {
5632         return tracing_init_dentry_tr(&global_trace);
5633 }
5634
5635 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5636 {
5637         struct dentry *d_tracer;
5638
5639         if (tr->percpu_dir)
5640                 return tr->percpu_dir;
5641
5642         d_tracer = tracing_init_dentry_tr(tr);
5643         if (!d_tracer)
5644                 return NULL;
5645
5646         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5647
5648         WARN_ONCE(!tr->percpu_dir,
5649                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5650
5651         return tr->percpu_dir;
5652 }
5653
5654 static struct dentry *
5655 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5656                       void *data, long cpu, const struct file_operations *fops)
5657 {
5658         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5659
5660         if (ret) /* See tracing_get_cpu() */
5661                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5662         return ret;
5663 }
5664
5665 static void
5666 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5667 {
5668         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5669         struct dentry *d_cpu;
5670         char cpu_dir[30]; /* 30 characters should be more than enough */
5671
5672         if (!d_percpu)
5673                 return;
5674
5675         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5676         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5677         if (!d_cpu) {
5678                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5679                 return;
5680         }
5681
5682         /* per cpu trace_pipe */
5683         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5684                                 tr, cpu, &tracing_pipe_fops);
5685
5686         /* per cpu trace */
5687         trace_create_cpu_file("trace", 0644, d_cpu,
5688                                 tr, cpu, &tracing_fops);
5689
5690         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5691                                 tr, cpu, &tracing_buffers_fops);
5692
5693         trace_create_cpu_file("stats", 0444, d_cpu,
5694                                 tr, cpu, &tracing_stats_fops);
5695
5696         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5697                                 tr, cpu, &tracing_entries_fops);
5698
5699 #ifdef CONFIG_TRACER_SNAPSHOT
5700         trace_create_cpu_file("snapshot", 0644, d_cpu,
5701                                 tr, cpu, &snapshot_fops);
5702
5703         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5704                                 tr, cpu, &snapshot_raw_fops);
5705 #endif
5706 }
5707
5708 #ifdef CONFIG_FTRACE_SELFTEST
5709 /* Let selftest have access to static functions in this file */
5710 #include "trace_selftest.c"
5711 #endif
5712
5713 struct trace_option_dentry {
5714         struct tracer_opt               *opt;
5715         struct tracer_flags             *flags;
5716         struct trace_array              *tr;
5717         struct dentry                   *entry;
5718 };
5719
5720 static ssize_t
5721 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5722                         loff_t *ppos)
5723 {
5724         struct trace_option_dentry *topt = filp->private_data;
5725         char *buf;
5726
5727         if (topt->flags->val & topt->opt->bit)
5728                 buf = "1\n";
5729         else
5730                 buf = "0\n";
5731
5732         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5733 }
5734
5735 static ssize_t
5736 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5737                          loff_t *ppos)
5738 {
5739         struct trace_option_dentry *topt = filp->private_data;
5740         unsigned long val;
5741         int ret;
5742
5743         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5744         if (ret)
5745                 return ret;
5746
5747         if (val != 0 && val != 1)
5748                 return -EINVAL;
5749
5750         if (!!(topt->flags->val & topt->opt->bit) != val) {
5751                 mutex_lock(&trace_types_lock);
5752                 ret = __set_tracer_option(topt->tr, topt->flags,
5753                                           topt->opt, !val);
5754                 mutex_unlock(&trace_types_lock);
5755                 if (ret)
5756                         return ret;
5757         }
5758
5759         *ppos += cnt;
5760
5761         return cnt;
5762 }
5763
5764
5765 static const struct file_operations trace_options_fops = {
5766         .open = tracing_open_generic,
5767         .read = trace_options_read,
5768         .write = trace_options_write,
5769         .llseek = generic_file_llseek,
5770 };
5771
5772 static ssize_t
5773 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5774                         loff_t *ppos)
5775 {
5776         long index = (long)filp->private_data;
5777         char *buf;
5778
5779         if (trace_flags & (1 << index))
5780                 buf = "1\n";
5781         else
5782                 buf = "0\n";
5783
5784         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5785 }
5786
5787 static ssize_t
5788 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5789                          loff_t *ppos)
5790 {
5791         struct trace_array *tr = &global_trace;
5792         long index = (long)filp->private_data;
5793         unsigned long val;
5794         int ret;
5795
5796         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5797         if (ret)
5798                 return ret;
5799
5800         if (val != 0 && val != 1)
5801                 return -EINVAL;
5802
5803         mutex_lock(&trace_types_lock);
5804         ret = set_tracer_flag(tr, 1 << index, val);
5805         mutex_unlock(&trace_types_lock);
5806
5807         if (ret < 0)
5808                 return ret;
5809
5810         *ppos += cnt;
5811
5812         return cnt;
5813 }
5814
5815 static const struct file_operations trace_options_core_fops = {
5816         .open = tracing_open_generic,
5817         .read = trace_options_core_read,
5818         .write = trace_options_core_write,
5819         .llseek = generic_file_llseek,
5820 };
5821
5822 struct dentry *trace_create_file(const char *name,
5823                                  umode_t mode,
5824                                  struct dentry *parent,
5825                                  void *data,
5826                                  const struct file_operations *fops)
5827 {
5828         struct dentry *ret;
5829
5830         ret = debugfs_create_file(name, mode, parent, data, fops);
5831         if (!ret)
5832                 pr_warning("Could not create debugfs '%s' entry\n", name);
5833
5834         return ret;
5835 }
5836
5837
5838 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5839 {
5840         struct dentry *d_tracer;
5841
5842         if (tr->options)
5843                 return tr->options;
5844
5845         d_tracer = tracing_init_dentry_tr(tr);
5846         if (!d_tracer)
5847                 return NULL;
5848
5849         tr->options = debugfs_create_dir("options", d_tracer);
5850         if (!tr->options) {
5851                 pr_warning("Could not create debugfs directory 'options'\n");
5852                 return NULL;
5853         }
5854
5855         return tr->options;
5856 }
5857
5858 static void
5859 create_trace_option_file(struct trace_array *tr,
5860                          struct trace_option_dentry *topt,
5861                          struct tracer_flags *flags,
5862                          struct tracer_opt *opt)
5863 {
5864         struct dentry *t_options;
5865
5866         t_options = trace_options_init_dentry(tr);
5867         if (!t_options)
5868                 return;
5869
5870         topt->flags = flags;
5871         topt->opt = opt;
5872         topt->tr = tr;
5873
5874         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5875                                     &trace_options_fops);
5876
5877 }
5878
5879 static struct trace_option_dentry *
5880 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5881 {
5882         struct trace_option_dentry *topts;
5883         struct tracer_flags *flags;
5884         struct tracer_opt *opts;
5885         int cnt;
5886
5887         if (!tracer)
5888                 return NULL;
5889
5890         flags = tracer->flags;
5891
5892         if (!flags || !flags->opts)
5893                 return NULL;
5894
5895         opts = flags->opts;
5896
5897         for (cnt = 0; opts[cnt].name; cnt++)
5898                 ;
5899
5900         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5901         if (!topts)
5902                 return NULL;
5903
5904         for (cnt = 0; opts[cnt].name; cnt++)
5905                 create_trace_option_file(tr, &topts[cnt], flags,
5906                                          &opts[cnt]);
5907
5908         return topts;
5909 }
5910
5911 static void
5912 destroy_trace_option_files(struct trace_option_dentry *topts)
5913 {
5914         int cnt;
5915
5916         if (!topts)
5917                 return;
5918
5919         for (cnt = 0; topts[cnt].opt; cnt++) {
5920                 if (topts[cnt].entry)
5921                         debugfs_remove(topts[cnt].entry);
5922         }
5923
5924         kfree(topts);
5925 }
5926
5927 static struct dentry *
5928 create_trace_option_core_file(struct trace_array *tr,
5929                               const char *option, long index)
5930 {
5931         struct dentry *t_options;
5932
5933         t_options = trace_options_init_dentry(tr);
5934         if (!t_options)
5935                 return NULL;
5936
5937         return trace_create_file(option, 0644, t_options, (void *)index,
5938                                     &trace_options_core_fops);
5939 }
5940
5941 static __init void create_trace_options_dir(struct trace_array *tr)
5942 {
5943         struct dentry *t_options;
5944         int i;
5945
5946         t_options = trace_options_init_dentry(tr);
5947         if (!t_options)
5948                 return;
5949
5950         for (i = 0; trace_options[i]; i++)
5951                 create_trace_option_core_file(tr, trace_options[i], i);
5952 }
5953
5954 static ssize_t
5955 rb_simple_read(struct file *filp, char __user *ubuf,
5956                size_t cnt, loff_t *ppos)
5957 {
5958         struct trace_array *tr = filp->private_data;
5959         char buf[64];
5960         int r;
5961
5962         r = tracer_tracing_is_on(tr);
5963         r = sprintf(buf, "%d\n", r);
5964
5965         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5966 }
5967
5968 static ssize_t
5969 rb_simple_write(struct file *filp, const char __user *ubuf,
5970                 size_t cnt, loff_t *ppos)
5971 {
5972         struct trace_array *tr = filp->private_data;
5973         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5974         unsigned long val;
5975         int ret;
5976
5977         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5978         if (ret)
5979                 return ret;
5980
5981         if (buffer) {
5982                 mutex_lock(&trace_types_lock);
5983                 if (val) {
5984                         tracer_tracing_on(tr);
5985                         if (tr->current_trace->start)
5986                                 tr->current_trace->start(tr);
5987                 } else {
5988                         tracer_tracing_off(tr);
5989                         if (tr->current_trace->stop)
5990                                 tr->current_trace->stop(tr);
5991                 }
5992                 mutex_unlock(&trace_types_lock);
5993         }
5994
5995         (*ppos)++;
5996
5997         return cnt;
5998 }
5999
6000 static const struct file_operations rb_simple_fops = {
6001         .open           = tracing_open_generic_tr,
6002         .read           = rb_simple_read,
6003         .write          = rb_simple_write,
6004         .release        = tracing_release_generic_tr,
6005         .llseek         = default_llseek,
6006 };
6007
6008 struct dentry *trace_instance_dir;
6009
6010 static void
6011 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6012
6013 static int
6014 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6015 {
6016         enum ring_buffer_flags rb_flags;
6017
6018         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6019
6020         buf->tr = tr;
6021
6022         buf->buffer = ring_buffer_alloc(size, rb_flags);
6023         if (!buf->buffer)
6024                 return -ENOMEM;
6025
6026         buf->data = alloc_percpu(struct trace_array_cpu);
6027         if (!buf->data) {
6028                 ring_buffer_free(buf->buffer);
6029                 return -ENOMEM;
6030         }
6031
6032         /* Allocate the first page for all buffers */
6033         set_buffer_entries(&tr->trace_buffer,
6034                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6035
6036         return 0;
6037 }
6038
6039 static int allocate_trace_buffers(struct trace_array *tr, int size)
6040 {
6041         int ret;
6042
6043         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6044         if (ret)
6045                 return ret;
6046
6047 #ifdef CONFIG_TRACER_MAX_TRACE
6048         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6049                                     allocate_snapshot ? size : 1);
6050         if (WARN_ON(ret)) {
6051                 ring_buffer_free(tr->trace_buffer.buffer);
6052                 free_percpu(tr->trace_buffer.data);
6053                 return -ENOMEM;
6054         }
6055         tr->allocated_snapshot = allocate_snapshot;
6056
6057         /*
6058          * Only the top level trace array gets its snapshot allocated
6059          * from the kernel command line.
6060          */
6061         allocate_snapshot = false;
6062 #endif
6063         return 0;
6064 }
6065
6066 static int new_instance_create(const char *name)
6067 {
6068         struct trace_array *tr;
6069         int ret;
6070
6071         mutex_lock(&trace_types_lock);
6072
6073         ret = -EEXIST;
6074         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6075                 if (tr->name && strcmp(tr->name, name) == 0)
6076                         goto out_unlock;
6077         }
6078
6079         ret = -ENOMEM;
6080         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6081         if (!tr)
6082                 goto out_unlock;
6083
6084         tr->name = kstrdup(name, GFP_KERNEL);
6085         if (!tr->name)
6086                 goto out_free_tr;
6087
6088         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6089                 goto out_free_tr;
6090
6091         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6092
6093         raw_spin_lock_init(&tr->start_lock);
6094
6095         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6096
6097         tr->current_trace = &nop_trace;
6098
6099         INIT_LIST_HEAD(&tr->systems);
6100         INIT_LIST_HEAD(&tr->events);
6101
6102         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6103                 goto out_free_tr;
6104
6105         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6106         if (!tr->dir)
6107                 goto out_free_tr;
6108
6109         ret = event_trace_add_tracer(tr->dir, tr);
6110         if (ret) {
6111                 debugfs_remove_recursive(tr->dir);
6112                 goto out_free_tr;
6113         }
6114
6115         init_tracer_debugfs(tr, tr->dir);
6116
6117         list_add(&tr->list, &ftrace_trace_arrays);
6118
6119         mutex_unlock(&trace_types_lock);
6120
6121         return 0;
6122
6123  out_free_tr:
6124         if (tr->trace_buffer.buffer)
6125                 ring_buffer_free(tr->trace_buffer.buffer);
6126         free_cpumask_var(tr->tracing_cpumask);
6127         kfree(tr->name);
6128         kfree(tr);
6129
6130  out_unlock:
6131         mutex_unlock(&trace_types_lock);
6132
6133         return ret;
6134
6135 }
6136
6137 static int instance_delete(const char *name)
6138 {
6139         struct trace_array *tr;
6140         int found = 0;
6141         int ret;
6142
6143         mutex_lock(&trace_types_lock);
6144
6145         ret = -ENODEV;
6146         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6147                 if (tr->name && strcmp(tr->name, name) == 0) {
6148                         found = 1;
6149                         break;
6150                 }
6151         }
6152         if (!found)
6153                 goto out_unlock;
6154
6155         ret = -EBUSY;
6156         if (tr->ref)
6157                 goto out_unlock;
6158
6159         list_del(&tr->list);
6160
6161         tracing_set_nop(tr);
6162         event_trace_del_tracer(tr);
6163         ftrace_destroy_function_files(tr);
6164         debugfs_remove_recursive(tr->dir);
6165         free_percpu(tr->trace_buffer.data);
6166         ring_buffer_free(tr->trace_buffer.buffer);
6167
6168         kfree(tr->name);
6169         kfree(tr);
6170
6171         ret = 0;
6172
6173  out_unlock:
6174         mutex_unlock(&trace_types_lock);
6175
6176         return ret;
6177 }
6178
6179 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6180 {
6181         struct dentry *parent;
6182         int ret;
6183
6184         /* Paranoid: Make sure the parent is the "instances" directory */
6185         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6186         if (WARN_ON_ONCE(parent != trace_instance_dir))
6187                 return -ENOENT;
6188
6189         /*
6190          * The inode mutex is locked, but debugfs_create_dir() will also
6191          * take the mutex. As the instances directory can not be destroyed
6192          * or changed in any other way, it is safe to unlock it, and
6193          * let the dentry try. If two users try to make the same dir at
6194          * the same time, then the new_instance_create() will determine the
6195          * winner.
6196          */
6197         mutex_unlock(&inode->i_mutex);
6198
6199         ret = new_instance_create(dentry->d_iname);
6200
6201         mutex_lock(&inode->i_mutex);
6202
6203         return ret;
6204 }
6205
6206 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6207 {
6208         struct dentry *parent;
6209         int ret;
6210
6211         /* Paranoid: Make sure the parent is the "instances" directory */
6212         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6213         if (WARN_ON_ONCE(parent != trace_instance_dir))
6214                 return -ENOENT;
6215
6216         /* The caller did a dget() on dentry */
6217         mutex_unlock(&dentry->d_inode->i_mutex);
6218
6219         /*
6220          * The inode mutex is locked, but debugfs_create_dir() will also
6221          * take the mutex. As the instances directory can not be destroyed
6222          * or changed in any other way, it is safe to unlock it, and
6223          * let the dentry try. If two users try to make the same dir at
6224          * the same time, then the instance_delete() will determine the
6225          * winner.
6226          */
6227         mutex_unlock(&inode->i_mutex);
6228
6229         ret = instance_delete(dentry->d_iname);
6230
6231         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6232         mutex_lock(&dentry->d_inode->i_mutex);
6233
6234         return ret;
6235 }
6236
6237 static const struct inode_operations instance_dir_inode_operations = {
6238         .lookup         = simple_lookup,
6239         .mkdir          = instance_mkdir,
6240         .rmdir          = instance_rmdir,
6241 };
6242
6243 static __init void create_trace_instances(struct dentry *d_tracer)
6244 {
6245         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6246         if (WARN_ON(!trace_instance_dir))
6247                 return;
6248
6249         /* Hijack the dir inode operations, to allow mkdir */
6250         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6251 }
6252
6253 static void
6254 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6255 {
6256         int cpu;
6257
6258         trace_create_file("available_tracers", 0444, d_tracer,
6259                         tr, &show_traces_fops);
6260
6261         trace_create_file("current_tracer", 0644, d_tracer,
6262                         tr, &set_tracer_fops);
6263
6264         trace_create_file("tracing_cpumask", 0644, d_tracer,
6265                           tr, &tracing_cpumask_fops);
6266
6267         trace_create_file("trace_options", 0644, d_tracer,
6268                           tr, &tracing_iter_fops);
6269
6270         trace_create_file("trace", 0644, d_tracer,
6271                           tr, &tracing_fops);
6272
6273         trace_create_file("trace_pipe", 0444, d_tracer,
6274                           tr, &tracing_pipe_fops);
6275
6276         trace_create_file("buffer_size_kb", 0644, d_tracer,
6277                           tr, &tracing_entries_fops);
6278
6279         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6280                           tr, &tracing_total_entries_fops);
6281
6282         trace_create_file("free_buffer", 0200, d_tracer,
6283                           tr, &tracing_free_buffer_fops);
6284
6285         trace_create_file("trace_marker", 0220, d_tracer,
6286                           tr, &tracing_mark_fops);
6287
6288         trace_create_file("trace_clock", 0644, d_tracer, tr,
6289                           &trace_clock_fops);
6290
6291         trace_create_file("tracing_on", 0644, d_tracer,
6292                           tr, &rb_simple_fops);
6293
6294 #ifdef CONFIG_TRACER_MAX_TRACE
6295         trace_create_file("tracing_max_latency", 0644, d_tracer,
6296                         &tr->max_latency, &tracing_max_lat_fops);
6297 #endif
6298
6299         if (ftrace_create_function_files(tr, d_tracer))
6300                 WARN(1, "Could not allocate function filter files");
6301
6302 #ifdef CONFIG_TRACER_SNAPSHOT
6303         trace_create_file("snapshot", 0644, d_tracer,
6304                           tr, &snapshot_fops);
6305 #endif
6306
6307         for_each_tracing_cpu(cpu)
6308                 tracing_init_debugfs_percpu(tr, cpu);
6309
6310 }
6311
6312 static __init int tracer_init_debugfs(void)
6313 {
6314         struct dentry *d_tracer;
6315
6316         trace_access_lock_init();
6317
6318         d_tracer = tracing_init_dentry();
6319         if (!d_tracer)
6320                 return 0;
6321
6322         init_tracer_debugfs(&global_trace, d_tracer);
6323
6324         trace_create_file("tracing_thresh", 0644, d_tracer,
6325                         &tracing_thresh, &tracing_max_lat_fops);
6326
6327         trace_create_file("README", 0444, d_tracer,
6328                         NULL, &tracing_readme_fops);
6329
6330         trace_create_file("saved_cmdlines", 0444, d_tracer,
6331                         NULL, &tracing_saved_cmdlines_fops);
6332
6333 #ifdef CONFIG_DYNAMIC_FTRACE
6334         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6335                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6336 #endif
6337
6338         create_trace_instances(d_tracer);
6339
6340         create_trace_options_dir(&global_trace);
6341
6342         return 0;
6343 }
6344
6345 static int trace_panic_handler(struct notifier_block *this,
6346                                unsigned long event, void *unused)
6347 {
6348         if (ftrace_dump_on_oops)
6349                 ftrace_dump(ftrace_dump_on_oops);
6350         return NOTIFY_OK;
6351 }
6352
6353 static struct notifier_block trace_panic_notifier = {
6354         .notifier_call  = trace_panic_handler,
6355         .next           = NULL,
6356         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6357 };
6358
6359 static int trace_die_handler(struct notifier_block *self,
6360                              unsigned long val,
6361                              void *data)
6362 {
6363         switch (val) {
6364         case DIE_OOPS:
6365                 if (ftrace_dump_on_oops)
6366                         ftrace_dump(ftrace_dump_on_oops);
6367                 break;
6368         default:
6369                 break;
6370         }
6371         return NOTIFY_OK;
6372 }
6373
6374 static struct notifier_block trace_die_notifier = {
6375         .notifier_call = trace_die_handler,
6376         .priority = 200
6377 };
6378
6379 /*
6380  * printk is set to max of 1024, we really don't need it that big.
6381  * Nothing should be printing 1000 characters anyway.
6382  */
6383 #define TRACE_MAX_PRINT         1000
6384
6385 /*
6386  * Define here KERN_TRACE so that we have one place to modify
6387  * it if we decide to change what log level the ftrace dump
6388  * should be at.
6389  */
6390 #define KERN_TRACE              KERN_EMERG
6391
6392 void
6393 trace_printk_seq(struct trace_seq *s)
6394 {
6395         /* Probably should print a warning here. */
6396         if (s->len >= TRACE_MAX_PRINT)
6397                 s->len = TRACE_MAX_PRINT;
6398
6399         /* should be zero ended, but we are paranoid. */
6400         s->buffer[s->len] = 0;
6401
6402         printk(KERN_TRACE "%s", s->buffer);
6403
6404         trace_seq_init(s);
6405 }
6406
6407 void trace_init_global_iter(struct trace_iterator *iter)
6408 {
6409         iter->tr = &global_trace;
6410         iter->trace = iter->tr->current_trace;
6411         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6412         iter->trace_buffer = &global_trace.trace_buffer;
6413
6414         if (iter->trace && iter->trace->open)
6415                 iter->trace->open(iter);
6416
6417         /* Annotate start of buffers if we had overruns */
6418         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6419                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6420
6421         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6422         if (trace_clocks[iter->tr->clock_id].in_ns)
6423                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6424 }
6425
6426 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6427 {
6428         /* use static because iter can be a bit big for the stack */
6429         static struct trace_iterator iter;
6430         static atomic_t dump_running;
6431         unsigned int old_userobj;
6432         unsigned long flags;
6433         int cnt = 0, cpu;
6434
6435         /* Only allow one dump user at a time. */
6436         if (atomic_inc_return(&dump_running) != 1) {
6437                 atomic_dec(&dump_running);
6438                 return;
6439         }
6440
6441         /*
6442          * Always turn off tracing when we dump.
6443          * We don't need to show trace output of what happens
6444          * between multiple crashes.
6445          *
6446          * If the user does a sysrq-z, then they can re-enable
6447          * tracing with echo 1 > tracing_on.
6448          */
6449         tracing_off();
6450
6451         local_irq_save(flags);
6452
6453         /* Simulate the iterator */
6454         trace_init_global_iter(&iter);
6455
6456         for_each_tracing_cpu(cpu) {
6457                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6458         }
6459
6460         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6461
6462         /* don't look at user memory in panic mode */
6463         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6464
6465         switch (oops_dump_mode) {
6466         case DUMP_ALL:
6467                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6468                 break;
6469         case DUMP_ORIG:
6470                 iter.cpu_file = raw_smp_processor_id();
6471                 break;
6472         case DUMP_NONE:
6473                 goto out_enable;
6474         default:
6475                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6476                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6477         }
6478
6479         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6480
6481         /* Did function tracer already get disabled? */
6482         if (ftrace_is_dead()) {
6483                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6484                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6485         }
6486
6487         /*
6488          * We need to stop all tracing on all CPUS to read the
6489          * the next buffer. This is a bit expensive, but is
6490          * not done often. We fill all what we can read,
6491          * and then release the locks again.
6492          */
6493
6494         while (!trace_empty(&iter)) {
6495
6496                 if (!cnt)
6497                         printk(KERN_TRACE "---------------------------------\n");
6498
6499                 cnt++;
6500
6501                 /* reset all but tr, trace, and overruns */
6502                 memset(&iter.seq, 0,
6503                        sizeof(struct trace_iterator) -
6504                        offsetof(struct trace_iterator, seq));
6505                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6506                 iter.pos = -1;
6507
6508                 if (trace_find_next_entry_inc(&iter) != NULL) {
6509                         int ret;
6510
6511                         ret = print_trace_line(&iter);
6512                         if (ret != TRACE_TYPE_NO_CONSUME)
6513                                 trace_consume(&iter);
6514                 }
6515                 touch_nmi_watchdog();
6516
6517                 trace_printk_seq(&iter.seq);
6518         }
6519
6520         if (!cnt)
6521                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6522         else
6523                 printk(KERN_TRACE "---------------------------------\n");
6524
6525  out_enable:
6526         trace_flags |= old_userobj;
6527
6528         for_each_tracing_cpu(cpu) {
6529                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6530         }
6531         atomic_dec(&dump_running);
6532         local_irq_restore(flags);
6533 }
6534 EXPORT_SYMBOL_GPL(ftrace_dump);
6535
6536 __init static int tracer_alloc_buffers(void)
6537 {
6538         int ring_buf_size;
6539         int ret = -ENOMEM;
6540
6541
6542         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6543                 goto out;
6544
6545         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6546                 goto out_free_buffer_mask;
6547
6548         /* Only allocate trace_printk buffers if a trace_printk exists */
6549         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6550                 /* Must be called before global_trace.buffer is allocated */
6551                 trace_printk_init_buffers();
6552
6553         /* To save memory, keep the ring buffer size to its minimum */
6554         if (ring_buffer_expanded)
6555                 ring_buf_size = trace_buf_size;
6556         else
6557                 ring_buf_size = 1;
6558
6559         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6560         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6561
6562         raw_spin_lock_init(&global_trace.start_lock);
6563
6564         /* Used for event triggers */
6565         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6566         if (!temp_buffer)
6567                 goto out_free_cpumask;
6568
6569         /* TODO: make the number of buffers hot pluggable with CPUS */
6570         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6571                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6572                 WARN_ON(1);
6573                 goto out_free_temp_buffer;
6574         }
6575
6576         if (global_trace.buffer_disabled)
6577                 tracing_off();
6578
6579         trace_init_cmdlines();
6580
6581         if (trace_boot_clock) {
6582                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6583                 if (ret < 0)
6584                         pr_warning("Trace clock %s not defined, going back to default\n",
6585                                    trace_boot_clock);
6586         }
6587
6588         /*
6589          * register_tracer() might reference current_trace, so it
6590          * needs to be set before we register anything. This is
6591          * just a bootstrap of current_trace anyway.
6592          */
6593         global_trace.current_trace = &nop_trace;
6594
6595         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6596
6597         ftrace_init_global_array_ops(&global_trace);
6598
6599         register_tracer(&nop_trace);
6600
6601         /* All seems OK, enable tracing */
6602         tracing_disabled = 0;
6603
6604         atomic_notifier_chain_register(&panic_notifier_list,
6605                                        &trace_panic_notifier);
6606
6607         register_die_notifier(&trace_die_notifier);
6608
6609         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6610
6611         INIT_LIST_HEAD(&global_trace.systems);
6612         INIT_LIST_HEAD(&global_trace.events);
6613         list_add(&global_trace.list, &ftrace_trace_arrays);
6614
6615         while (trace_boot_options) {
6616                 char *option;
6617
6618                 option = strsep(&trace_boot_options, ",");
6619                 trace_set_options(&global_trace, option);
6620         }
6621
6622         register_snapshot_cmd();
6623
6624         return 0;
6625
6626 out_free_temp_buffer:
6627         ring_buffer_free(temp_buffer);
6628 out_free_cpumask:
6629         free_percpu(global_trace.trace_buffer.data);
6630 #ifdef CONFIG_TRACER_MAX_TRACE
6631         free_percpu(global_trace.max_buffer.data);
6632 #endif
6633         free_cpumask_var(global_trace.tracing_cpumask);
6634 out_free_buffer_mask:
6635         free_cpumask_var(tracing_buffer_mask);
6636 out:
6637         return ret;
6638 }
6639
6640 __init static int clear_boot_tracer(void)
6641 {
6642         /*
6643          * The default tracer at boot buffer is an init section.
6644          * This function is called in lateinit. If we did not
6645          * find the boot tracer, then clear it out, to prevent
6646          * later registration from accessing the buffer that is
6647          * about to be freed.
6648          */
6649         if (!default_bootup_tracer)
6650                 return 0;
6651
6652         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6653                default_bootup_tracer);
6654         default_bootup_tracer = NULL;
6655
6656         return 0;
6657 }
6658
6659 early_initcall(tracer_alloc_buffers);
6660 fs_initcall(tracer_init_debugfs);
6661 late_initcall(clear_boot_tracer);