cb41e98cc64b0096897eafda5b5bd09f509c8fd3
[firefly-linux-kernel-4.4.55.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         __disable_trace_on_warning = 1;
159         return 1;
160 }
161 __setup("traceoff_on_warning=", stop_trace_on_warning);
162
163 static int __init boot_alloc_snapshot(char *str)
164 {
165         allocate_snapshot = true;
166         /* We also need the main ring buffer expanded */
167         ring_buffer_expanded = true;
168         return 1;
169 }
170 __setup("alloc_snapshot", boot_alloc_snapshot);
171
172
173 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
174 static char *trace_boot_options __initdata;
175
176 static int __init set_trace_boot_options(char *str)
177 {
178         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
179         trace_boot_options = trace_boot_options_buf;
180         return 0;
181 }
182 __setup("trace_options=", set_trace_boot_options);
183
184 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
185 static char *trace_boot_clock __initdata;
186
187 static int __init set_trace_boot_clock(char *str)
188 {
189         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
190         trace_boot_clock = trace_boot_clock_buf;
191         return 0;
192 }
193 __setup("trace_clock=", set_trace_boot_clock);
194
195
196 unsigned long long ns2usecs(cycle_t nsec)
197 {
198         nsec += 500;
199         do_div(nsec, 1000);
200         return nsec;
201 }
202
203 /*
204  * The global_trace is the descriptor that holds the tracing
205  * buffers for the live tracing. For each CPU, it contains
206  * a link list of pages that will store trace entries. The
207  * page descriptor of the pages in the memory is used to hold
208  * the link list by linking the lru item in the page descriptor
209  * to each of the pages in the buffer per CPU.
210  *
211  * For each active CPU there is a data field that holds the
212  * pages for the buffer for that CPU. Each CPU has the same number
213  * of pages allocated for its buffer.
214  */
215 static struct trace_array       global_trace;
216
217 LIST_HEAD(ftrace_trace_arrays);
218
219 int trace_array_get(struct trace_array *this_tr)
220 {
221         struct trace_array *tr;
222         int ret = -ENODEV;
223
224         mutex_lock(&trace_types_lock);
225         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
226                 if (tr == this_tr) {
227                         tr->ref++;
228                         ret = 0;
229                         break;
230                 }
231         }
232         mutex_unlock(&trace_types_lock);
233
234         return ret;
235 }
236
237 static void __trace_array_put(struct trace_array *this_tr)
238 {
239         WARN_ON(!this_tr->ref);
240         this_tr->ref--;
241 }
242
243 void trace_array_put(struct trace_array *this_tr)
244 {
245         mutex_lock(&trace_types_lock);
246         __trace_array_put(this_tr);
247         mutex_unlock(&trace_types_lock);
248 }
249
250 int filter_check_discard(struct ftrace_event_file *file, void *rec,
251                          struct ring_buffer *buffer,
252                          struct ring_buffer_event *event)
253 {
254         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
255             !filter_match_preds(file->filter, rec)) {
256                 ring_buffer_discard_commit(buffer, event);
257                 return 1;
258         }
259
260         return 0;
261 }
262 EXPORT_SYMBOL_GPL(filter_check_discard);
263
264 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
265                               struct ring_buffer *buffer,
266                               struct ring_buffer_event *event)
267 {
268         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
269             !filter_match_preds(call->filter, rec)) {
270                 ring_buffer_discard_commit(buffer, event);
271                 return 1;
272         }
273
274         return 0;
275 }
276 EXPORT_SYMBOL_GPL(call_filter_check_discard);
277
278 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
279 {
280         u64 ts;
281
282         /* Early boot up does not have a buffer yet */
283         if (!buf->buffer)
284                 return trace_clock_local();
285
286         ts = ring_buffer_time_stamp(buf->buffer, cpu);
287         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
288
289         return ts;
290 }
291
292 cycle_t ftrace_now(int cpu)
293 {
294         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
295 }
296
297 /**
298  * tracing_is_enabled - Show if global_trace has been disabled
299  *
300  * Shows if the global trace has been enabled or not. It uses the
301  * mirror flag "buffer_disabled" to be used in fast paths such as for
302  * the irqsoff tracer. But it may be inaccurate due to races. If you
303  * need to know the accurate state, use tracing_is_on() which is a little
304  * slower, but accurate.
305  */
306 int tracing_is_enabled(void)
307 {
308         /*
309          * For quick access (irqsoff uses this in fast path), just
310          * return the mirror variable of the state of the ring buffer.
311          * It's a little racy, but we don't really care.
312          */
313         smp_rmb();
314         return !global_trace.buffer_disabled;
315 }
316
317 /*
318  * trace_buf_size is the size in bytes that is allocated
319  * for a buffer. Note, the number of bytes is always rounded
320  * to page size.
321  *
322  * This number is purposely set to a low number of 16384.
323  * If the dump on oops happens, it will be much appreciated
324  * to not have to wait for all that output. Anyway this can be
325  * boot time and run time configurable.
326  */
327 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
328
329 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
330
331 /* trace_types holds a link list of available tracers. */
332 static struct tracer            *trace_types __read_mostly;
333
334 /*
335  * trace_types_lock is used to protect the trace_types list.
336  */
337 DEFINE_MUTEX(trace_types_lock);
338
339 /*
340  * serialize the access of the ring buffer
341  *
342  * ring buffer serializes readers, but it is low level protection.
343  * The validity of the events (which returns by ring_buffer_peek() ..etc)
344  * are not protected by ring buffer.
345  *
346  * The content of events may become garbage if we allow other process consumes
347  * these events concurrently:
348  *   A) the page of the consumed events may become a normal page
349  *      (not reader page) in ring buffer, and this page will be rewrited
350  *      by events producer.
351  *   B) The page of the consumed events may become a page for splice_read,
352  *      and this page will be returned to system.
353  *
354  * These primitives allow multi process access to different cpu ring buffer
355  * concurrently.
356  *
357  * These primitives don't distinguish read-only and read-consume access.
358  * Multi read-only access are also serialized.
359  */
360
361 #ifdef CONFIG_SMP
362 static DECLARE_RWSEM(all_cpu_access_lock);
363 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
364
365 static inline void trace_access_lock(int cpu)
366 {
367         if (cpu == RING_BUFFER_ALL_CPUS) {
368                 /* gain it for accessing the whole ring buffer. */
369                 down_write(&all_cpu_access_lock);
370         } else {
371                 /* gain it for accessing a cpu ring buffer. */
372
373                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
374                 down_read(&all_cpu_access_lock);
375
376                 /* Secondly block other access to this @cpu ring buffer. */
377                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
378         }
379 }
380
381 static inline void trace_access_unlock(int cpu)
382 {
383         if (cpu == RING_BUFFER_ALL_CPUS) {
384                 up_write(&all_cpu_access_lock);
385         } else {
386                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
387                 up_read(&all_cpu_access_lock);
388         }
389 }
390
391 static inline void trace_access_lock_init(void)
392 {
393         int cpu;
394
395         for_each_possible_cpu(cpu)
396                 mutex_init(&per_cpu(cpu_access_lock, cpu));
397 }
398
399 #else
400
401 static DEFINE_MUTEX(access_lock);
402
403 static inline void trace_access_lock(int cpu)
404 {
405         (void)cpu;
406         mutex_lock(&access_lock);
407 }
408
409 static inline void trace_access_unlock(int cpu)
410 {
411         (void)cpu;
412         mutex_unlock(&access_lock);
413 }
414
415 static inline void trace_access_lock_init(void)
416 {
417 }
418
419 #endif
420
421 /* trace_flags holds trace_options default values */
422 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
423         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
424         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
425         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
426
427 static void tracer_tracing_on(struct trace_array *tr)
428 {
429         if (tr->trace_buffer.buffer)
430                 ring_buffer_record_on(tr->trace_buffer.buffer);
431         /*
432          * This flag is looked at when buffers haven't been allocated
433          * yet, or by some tracers (like irqsoff), that just want to
434          * know if the ring buffer has been disabled, but it can handle
435          * races of where it gets disabled but we still do a record.
436          * As the check is in the fast path of the tracers, it is more
437          * important to be fast than accurate.
438          */
439         tr->buffer_disabled = 0;
440         /* Make the flag seen by readers */
441         smp_wmb();
442 }
443
444 /**
445  * tracing_on - enable tracing buffers
446  *
447  * This function enables tracing buffers that may have been
448  * disabled with tracing_off.
449  */
450 void tracing_on(void)
451 {
452         tracer_tracing_on(&global_trace);
453 }
454 EXPORT_SYMBOL_GPL(tracing_on);
455
456 /**
457  * __trace_puts - write a constant string into the trace buffer.
458  * @ip:    The address of the caller
459  * @str:   The constant string to write
460  * @size:  The size of the string.
461  */
462 int __trace_puts(unsigned long ip, const char *str, int size)
463 {
464         struct ring_buffer_event *event;
465         struct ring_buffer *buffer;
466         struct print_entry *entry;
467         unsigned long irq_flags;
468         int alloc;
469
470         if (unlikely(tracing_selftest_running || tracing_disabled))
471                 return 0;
472
473         alloc = sizeof(*entry) + size + 2; /* possible \n added */
474
475         local_save_flags(irq_flags);
476         buffer = global_trace.trace_buffer.buffer;
477         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
478                                           irq_flags, preempt_count());
479         if (!event)
480                 return 0;
481
482         entry = ring_buffer_event_data(event);
483         entry->ip = ip;
484
485         memcpy(&entry->buf, str, size);
486
487         /* Add a newline if necessary */
488         if (entry->buf[size - 1] != '\n') {
489                 entry->buf[size] = '\n';
490                 entry->buf[size + 1] = '\0';
491         } else
492                 entry->buf[size] = '\0';
493
494         __buffer_unlock_commit(buffer, event);
495
496         return size;
497 }
498 EXPORT_SYMBOL_GPL(__trace_puts);
499
500 /**
501  * __trace_bputs - write the pointer to a constant string into trace buffer
502  * @ip:    The address of the caller
503  * @str:   The constant string to write to the buffer to
504  */
505 int __trace_bputs(unsigned long ip, const char *str)
506 {
507         struct ring_buffer_event *event;
508         struct ring_buffer *buffer;
509         struct bputs_entry *entry;
510         unsigned long irq_flags;
511         int size = sizeof(struct bputs_entry);
512
513         if (unlikely(tracing_selftest_running || tracing_disabled))
514                 return 0;
515
516         local_save_flags(irq_flags);
517         buffer = global_trace.trace_buffer.buffer;
518         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
519                                           irq_flags, preempt_count());
520         if (!event)
521                 return 0;
522
523         entry = ring_buffer_event_data(event);
524         entry->ip                       = ip;
525         entry->str                      = str;
526
527         __buffer_unlock_commit(buffer, event);
528
529         return 1;
530 }
531 EXPORT_SYMBOL_GPL(__trace_bputs);
532
533 #ifdef CONFIG_TRACER_SNAPSHOT
534 /**
535  * trace_snapshot - take a snapshot of the current buffer.
536  *
537  * This causes a swap between the snapshot buffer and the current live
538  * tracing buffer. You can use this to take snapshots of the live
539  * trace when some condition is triggered, but continue to trace.
540  *
541  * Note, make sure to allocate the snapshot with either
542  * a tracing_snapshot_alloc(), or by doing it manually
543  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
544  *
545  * If the snapshot buffer is not allocated, it will stop tracing.
546  * Basically making a permanent snapshot.
547  */
548 void tracing_snapshot(void)
549 {
550         struct trace_array *tr = &global_trace;
551         struct tracer *tracer = tr->current_trace;
552         unsigned long flags;
553
554         if (in_nmi()) {
555                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
556                 internal_trace_puts("*** snapshot is being ignored        ***\n");
557                 return;
558         }
559
560         if (!tr->allocated_snapshot) {
561                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
562                 internal_trace_puts("*** stopping trace here!   ***\n");
563                 tracing_off();
564                 return;
565         }
566
567         /* Note, snapshot can not be used when the tracer uses it */
568         if (tracer->use_max_tr) {
569                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
570                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
571                 return;
572         }
573
574         local_irq_save(flags);
575         update_max_tr(tr, current, smp_processor_id());
576         local_irq_restore(flags);
577 }
578 EXPORT_SYMBOL_GPL(tracing_snapshot);
579
580 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
581                                         struct trace_buffer *size_buf, int cpu_id);
582 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
583
584 static int alloc_snapshot(struct trace_array *tr)
585 {
586         int ret;
587
588         if (!tr->allocated_snapshot) {
589
590                 /* allocate spare buffer */
591                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
592                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
593                 if (ret < 0)
594                         return ret;
595
596                 tr->allocated_snapshot = true;
597         }
598
599         return 0;
600 }
601
602 static void free_snapshot(struct trace_array *tr)
603 {
604         /*
605          * We don't free the ring buffer. instead, resize it because
606          * The max_tr ring buffer has some state (e.g. ring->clock) and
607          * we want preserve it.
608          */
609         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
610         set_buffer_entries(&tr->max_buffer, 1);
611         tracing_reset_online_cpus(&tr->max_buffer);
612         tr->allocated_snapshot = false;
613 }
614
615 /**
616  * tracing_alloc_snapshot - allocate snapshot buffer.
617  *
618  * This only allocates the snapshot buffer if it isn't already
619  * allocated - it doesn't also take a snapshot.
620  *
621  * This is meant to be used in cases where the snapshot buffer needs
622  * to be set up for events that can't sleep but need to be able to
623  * trigger a snapshot.
624  */
625 int tracing_alloc_snapshot(void)
626 {
627         struct trace_array *tr = &global_trace;
628         int ret;
629
630         ret = alloc_snapshot(tr);
631         WARN_ON(ret < 0);
632
633         return ret;
634 }
635 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
636
637 /**
638  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
639  *
640  * This is similar to trace_snapshot(), but it will allocate the
641  * snapshot buffer if it isn't already allocated. Use this only
642  * where it is safe to sleep, as the allocation may sleep.
643  *
644  * This causes a swap between the snapshot buffer and the current live
645  * tracing buffer. You can use this to take snapshots of the live
646  * trace when some condition is triggered, but continue to trace.
647  */
648 void tracing_snapshot_alloc(void)
649 {
650         int ret;
651
652         ret = tracing_alloc_snapshot();
653         if (ret < 0)
654                 return;
655
656         tracing_snapshot();
657 }
658 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
659 #else
660 void tracing_snapshot(void)
661 {
662         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
663 }
664 EXPORT_SYMBOL_GPL(tracing_snapshot);
665 int tracing_alloc_snapshot(void)
666 {
667         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
668         return -ENODEV;
669 }
670 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
671 void tracing_snapshot_alloc(void)
672 {
673         /* Give warning */
674         tracing_snapshot();
675 }
676 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
677 #endif /* CONFIG_TRACER_SNAPSHOT */
678
679 static void tracer_tracing_off(struct trace_array *tr)
680 {
681         if (tr->trace_buffer.buffer)
682                 ring_buffer_record_off(tr->trace_buffer.buffer);
683         /*
684          * This flag is looked at when buffers haven't been allocated
685          * yet, or by some tracers (like irqsoff), that just want to
686          * know if the ring buffer has been disabled, but it can handle
687          * races of where it gets disabled but we still do a record.
688          * As the check is in the fast path of the tracers, it is more
689          * important to be fast than accurate.
690          */
691         tr->buffer_disabled = 1;
692         /* Make the flag seen by readers */
693         smp_wmb();
694 }
695
696 /**
697  * tracing_off - turn off tracing buffers
698  *
699  * This function stops the tracing buffers from recording data.
700  * It does not disable any overhead the tracers themselves may
701  * be causing. This function simply causes all recording to
702  * the ring buffers to fail.
703  */
704 void tracing_off(void)
705 {
706         tracer_tracing_off(&global_trace);
707 }
708 EXPORT_SYMBOL_GPL(tracing_off);
709
710 void disable_trace_on_warning(void)
711 {
712         if (__disable_trace_on_warning)
713                 tracing_off();
714 }
715
716 /**
717  * tracer_tracing_is_on - show real state of ring buffer enabled
718  * @tr : the trace array to know if ring buffer is enabled
719  *
720  * Shows real state of the ring buffer if it is enabled or not.
721  */
722 static int tracer_tracing_is_on(struct trace_array *tr)
723 {
724         if (tr->trace_buffer.buffer)
725                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
726         return !tr->buffer_disabled;
727 }
728
729 /**
730  * tracing_is_on - show state of ring buffers enabled
731  */
732 int tracing_is_on(void)
733 {
734         return tracer_tracing_is_on(&global_trace);
735 }
736 EXPORT_SYMBOL_GPL(tracing_is_on);
737
738 static int __init set_buf_size(char *str)
739 {
740         unsigned long buf_size;
741
742         if (!str)
743                 return 0;
744         buf_size = memparse(str, &str);
745         /* nr_entries can not be zero */
746         if (buf_size == 0)
747                 return 0;
748         trace_buf_size = buf_size;
749         return 1;
750 }
751 __setup("trace_buf_size=", set_buf_size);
752
753 static int __init set_tracing_thresh(char *str)
754 {
755         unsigned long threshold;
756         int ret;
757
758         if (!str)
759                 return 0;
760         ret = kstrtoul(str, 0, &threshold);
761         if (ret < 0)
762                 return 0;
763         tracing_thresh = threshold * 1000;
764         return 1;
765 }
766 __setup("tracing_thresh=", set_tracing_thresh);
767
768 unsigned long nsecs_to_usecs(unsigned long nsecs)
769 {
770         return nsecs / 1000;
771 }
772
773 /* These must match the bit postions in trace_iterator_flags */
774 static const char *trace_options[] = {
775         "print-parent",
776         "sym-offset",
777         "sym-addr",
778         "verbose",
779         "raw",
780         "hex",
781         "bin",
782         "block",
783         "stacktrace",
784         "trace_printk",
785         "ftrace_preempt",
786         "branch",
787         "annotate",
788         "userstacktrace",
789         "sym-userobj",
790         "printk-msg-only",
791         "context-info",
792         "latency-format",
793         "sleep-time",
794         "graph-time",
795         "record-cmd",
796         "overwrite",
797         "disable_on_free",
798         "irq-info",
799         "markers",
800         "function-trace",
801         NULL
802 };
803
804 static struct {
805         u64 (*func)(void);
806         const char *name;
807         int in_ns;              /* is this clock in nanoseconds? */
808 } trace_clocks[] = {
809         { trace_clock_local,    "local",        1 },
810         { trace_clock_global,   "global",       1 },
811         { trace_clock_counter,  "counter",      0 },
812         { trace_clock_jiffies,  "uptime",       1 },
813         { trace_clock,          "perf",         1 },
814         ARCH_TRACE_CLOCKS
815 };
816
817 /*
818  * trace_parser_get_init - gets the buffer for trace parser
819  */
820 int trace_parser_get_init(struct trace_parser *parser, int size)
821 {
822         memset(parser, 0, sizeof(*parser));
823
824         parser->buffer = kmalloc(size, GFP_KERNEL);
825         if (!parser->buffer)
826                 return 1;
827
828         parser->size = size;
829         return 0;
830 }
831
832 /*
833  * trace_parser_put - frees the buffer for trace parser
834  */
835 void trace_parser_put(struct trace_parser *parser)
836 {
837         kfree(parser->buffer);
838 }
839
840 /*
841  * trace_get_user - reads the user input string separated by  space
842  * (matched by isspace(ch))
843  *
844  * For each string found the 'struct trace_parser' is updated,
845  * and the function returns.
846  *
847  * Returns number of bytes read.
848  *
849  * See kernel/trace/trace.h for 'struct trace_parser' details.
850  */
851 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
852         size_t cnt, loff_t *ppos)
853 {
854         char ch;
855         size_t read = 0;
856         ssize_t ret;
857
858         if (!*ppos)
859                 trace_parser_clear(parser);
860
861         ret = get_user(ch, ubuf++);
862         if (ret)
863                 goto out;
864
865         read++;
866         cnt--;
867
868         /*
869          * The parser is not finished with the last write,
870          * continue reading the user input without skipping spaces.
871          */
872         if (!parser->cont) {
873                 /* skip white space */
874                 while (cnt && isspace(ch)) {
875                         ret = get_user(ch, ubuf++);
876                         if (ret)
877                                 goto out;
878                         read++;
879                         cnt--;
880                 }
881
882                 /* only spaces were written */
883                 if (isspace(ch)) {
884                         *ppos += read;
885                         ret = read;
886                         goto out;
887                 }
888
889                 parser->idx = 0;
890         }
891
892         /* read the non-space input */
893         while (cnt && !isspace(ch)) {
894                 if (parser->idx < parser->size - 1)
895                         parser->buffer[parser->idx++] = ch;
896                 else {
897                         ret = -EINVAL;
898                         goto out;
899                 }
900                 ret = get_user(ch, ubuf++);
901                 if (ret)
902                         goto out;
903                 read++;
904                 cnt--;
905         }
906
907         /* We either got finished input or we have to wait for another call. */
908         if (isspace(ch)) {
909                 parser->buffer[parser->idx] = 0;
910                 parser->cont = false;
911         } else if (parser->idx < parser->size - 1) {
912                 parser->cont = true;
913                 parser->buffer[parser->idx++] = ch;
914         } else {
915                 ret = -EINVAL;
916                 goto out;
917         }
918
919         *ppos += read;
920         ret = read;
921
922 out:
923         return ret;
924 }
925
926 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
927 {
928         int len;
929         int ret;
930
931         if (!cnt)
932                 return 0;
933
934         if (s->len <= s->readpos)
935                 return -EBUSY;
936
937         len = s->len - s->readpos;
938         if (cnt > len)
939                 cnt = len;
940         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
941         if (ret == cnt)
942                 return -EFAULT;
943
944         cnt -= ret;
945
946         s->readpos += cnt;
947         return cnt;
948 }
949
950 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
951 {
952         int len;
953
954         if (s->len <= s->readpos)
955                 return -EBUSY;
956
957         len = s->len - s->readpos;
958         if (cnt > len)
959                 cnt = len;
960         memcpy(buf, s->buffer + s->readpos, cnt);
961
962         s->readpos += cnt;
963         return cnt;
964 }
965
966 unsigned long __read_mostly     tracing_thresh;
967
968 #ifdef CONFIG_TRACER_MAX_TRACE
969 /*
970  * Copy the new maximum trace into the separate maximum-trace
971  * structure. (this way the maximum trace is permanently saved,
972  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
973  */
974 static void
975 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
976 {
977         struct trace_buffer *trace_buf = &tr->trace_buffer;
978         struct trace_buffer *max_buf = &tr->max_buffer;
979         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
980         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
981
982         max_buf->cpu = cpu;
983         max_buf->time_start = data->preempt_timestamp;
984
985         max_data->saved_latency = tr->max_latency;
986         max_data->critical_start = data->critical_start;
987         max_data->critical_end = data->critical_end;
988
989         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
990         max_data->pid = tsk->pid;
991         /*
992          * If tsk == current, then use current_uid(), as that does not use
993          * RCU. The irq tracer can be called out of RCU scope.
994          */
995         if (tsk == current)
996                 max_data->uid = current_uid();
997         else
998                 max_data->uid = task_uid(tsk);
999
1000         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1001         max_data->policy = tsk->policy;
1002         max_data->rt_priority = tsk->rt_priority;
1003
1004         /* record this tasks comm */
1005         tracing_record_cmdline(tsk);
1006 }
1007
1008 /**
1009  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1010  * @tr: tracer
1011  * @tsk: the task with the latency
1012  * @cpu: The cpu that initiated the trace.
1013  *
1014  * Flip the buffers between the @tr and the max_tr and record information
1015  * about which task was the cause of this latency.
1016  */
1017 void
1018 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1019 {
1020         struct ring_buffer *buf;
1021
1022         if (tr->stop_count)
1023                 return;
1024
1025         WARN_ON_ONCE(!irqs_disabled());
1026
1027         if (!tr->allocated_snapshot) {
1028                 /* Only the nop tracer should hit this when disabling */
1029                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1030                 return;
1031         }
1032
1033         arch_spin_lock(&tr->max_lock);
1034
1035         buf = tr->trace_buffer.buffer;
1036         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1037         tr->max_buffer.buffer = buf;
1038
1039         __update_max_tr(tr, tsk, cpu);
1040         arch_spin_unlock(&tr->max_lock);
1041 }
1042
1043 /**
1044  * update_max_tr_single - only copy one trace over, and reset the rest
1045  * @tr - tracer
1046  * @tsk - task with the latency
1047  * @cpu - the cpu of the buffer to copy.
1048  *
1049  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1050  */
1051 void
1052 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1053 {
1054         int ret;
1055
1056         if (tr->stop_count)
1057                 return;
1058
1059         WARN_ON_ONCE(!irqs_disabled());
1060         if (!tr->allocated_snapshot) {
1061                 /* Only the nop tracer should hit this when disabling */
1062                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1063                 return;
1064         }
1065
1066         arch_spin_lock(&tr->max_lock);
1067
1068         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1069
1070         if (ret == -EBUSY) {
1071                 /*
1072                  * We failed to swap the buffer due to a commit taking
1073                  * place on this CPU. We fail to record, but we reset
1074                  * the max trace buffer (no one writes directly to it)
1075                  * and flag that it failed.
1076                  */
1077                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1078                         "Failed to swap buffers due to commit in progress\n");
1079         }
1080
1081         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1082
1083         __update_max_tr(tr, tsk, cpu);
1084         arch_spin_unlock(&tr->max_lock);
1085 }
1086 #endif /* CONFIG_TRACER_MAX_TRACE */
1087
1088 static void default_wait_pipe(struct trace_iterator *iter)
1089 {
1090         /* Iterators are static, they should be filled or empty */
1091         if (trace_buffer_iter(iter, iter->cpu_file))
1092                 return;
1093
1094         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1095 }
1096
1097 #ifdef CONFIG_FTRACE_STARTUP_TEST
1098 static int run_tracer_selftest(struct tracer *type)
1099 {
1100         struct trace_array *tr = &global_trace;
1101         struct tracer *saved_tracer = tr->current_trace;
1102         int ret;
1103
1104         if (!type->selftest || tracing_selftest_disabled)
1105                 return 0;
1106
1107         /*
1108          * Run a selftest on this tracer.
1109          * Here we reset the trace buffer, and set the current
1110          * tracer to be this tracer. The tracer can then run some
1111          * internal tracing to verify that everything is in order.
1112          * If we fail, we do not register this tracer.
1113          */
1114         tracing_reset_online_cpus(&tr->trace_buffer);
1115
1116         tr->current_trace = type;
1117
1118 #ifdef CONFIG_TRACER_MAX_TRACE
1119         if (type->use_max_tr) {
1120                 /* If we expanded the buffers, make sure the max is expanded too */
1121                 if (ring_buffer_expanded)
1122                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1123                                            RING_BUFFER_ALL_CPUS);
1124                 tr->allocated_snapshot = true;
1125         }
1126 #endif
1127
1128         /* the test is responsible for initializing and enabling */
1129         pr_info("Testing tracer %s: ", type->name);
1130         ret = type->selftest(type, tr);
1131         /* the test is responsible for resetting too */
1132         tr->current_trace = saved_tracer;
1133         if (ret) {
1134                 printk(KERN_CONT "FAILED!\n");
1135                 /* Add the warning after printing 'FAILED' */
1136                 WARN_ON(1);
1137                 return -1;
1138         }
1139         /* Only reset on passing, to avoid touching corrupted buffers */
1140         tracing_reset_online_cpus(&tr->trace_buffer);
1141
1142 #ifdef CONFIG_TRACER_MAX_TRACE
1143         if (type->use_max_tr) {
1144                 tr->allocated_snapshot = false;
1145
1146                 /* Shrink the max buffer again */
1147                 if (ring_buffer_expanded)
1148                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1149                                            RING_BUFFER_ALL_CPUS);
1150         }
1151 #endif
1152
1153         printk(KERN_CONT "PASSED\n");
1154         return 0;
1155 }
1156 #else
1157 static inline int run_tracer_selftest(struct tracer *type)
1158 {
1159         return 0;
1160 }
1161 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1162
1163 /**
1164  * register_tracer - register a tracer with the ftrace system.
1165  * @type - the plugin for the tracer
1166  *
1167  * Register a new plugin tracer.
1168  */
1169 int register_tracer(struct tracer *type)
1170 {
1171         struct tracer *t;
1172         int ret = 0;
1173
1174         if (!type->name) {
1175                 pr_info("Tracer must have a name\n");
1176                 return -1;
1177         }
1178
1179         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1180                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1181                 return -1;
1182         }
1183
1184         mutex_lock(&trace_types_lock);
1185
1186         tracing_selftest_running = true;
1187
1188         for (t = trace_types; t; t = t->next) {
1189                 if (strcmp(type->name, t->name) == 0) {
1190                         /* already found */
1191                         pr_info("Tracer %s already registered\n",
1192                                 type->name);
1193                         ret = -1;
1194                         goto out;
1195                 }
1196         }
1197
1198         if (!type->set_flag)
1199                 type->set_flag = &dummy_set_flag;
1200         if (!type->flags)
1201                 type->flags = &dummy_tracer_flags;
1202         else
1203                 if (!type->flags->opts)
1204                         type->flags->opts = dummy_tracer_opt;
1205         if (!type->wait_pipe)
1206                 type->wait_pipe = default_wait_pipe;
1207
1208         ret = run_tracer_selftest(type);
1209         if (ret < 0)
1210                 goto out;
1211
1212         type->next = trace_types;
1213         trace_types = type;
1214
1215  out:
1216         tracing_selftest_running = false;
1217         mutex_unlock(&trace_types_lock);
1218
1219         if (ret || !default_bootup_tracer)
1220                 goto out_unlock;
1221
1222         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1223                 goto out_unlock;
1224
1225         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1226         /* Do we want this tracer to start on bootup? */
1227         tracing_set_tracer(&global_trace, type->name);
1228         default_bootup_tracer = NULL;
1229         /* disable other selftests, since this will break it. */
1230         tracing_selftest_disabled = true;
1231 #ifdef CONFIG_FTRACE_STARTUP_TEST
1232         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1233                type->name);
1234 #endif
1235
1236  out_unlock:
1237         return ret;
1238 }
1239
1240 void tracing_reset(struct trace_buffer *buf, int cpu)
1241 {
1242         struct ring_buffer *buffer = buf->buffer;
1243
1244         if (!buffer)
1245                 return;
1246
1247         ring_buffer_record_disable(buffer);
1248
1249         /* Make sure all commits have finished */
1250         synchronize_sched();
1251         ring_buffer_reset_cpu(buffer, cpu);
1252
1253         ring_buffer_record_enable(buffer);
1254 }
1255
1256 void tracing_reset_online_cpus(struct trace_buffer *buf)
1257 {
1258         struct ring_buffer *buffer = buf->buffer;
1259         int cpu;
1260
1261         if (!buffer)
1262                 return;
1263
1264         ring_buffer_record_disable(buffer);
1265
1266         /* Make sure all commits have finished */
1267         synchronize_sched();
1268
1269         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1270
1271         for_each_online_cpu(cpu)
1272                 ring_buffer_reset_cpu(buffer, cpu);
1273
1274         ring_buffer_record_enable(buffer);
1275 }
1276
1277 /* Must have trace_types_lock held */
1278 void tracing_reset_all_online_cpus(void)
1279 {
1280         struct trace_array *tr;
1281
1282         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1283                 tracing_reset_online_cpus(&tr->trace_buffer);
1284 #ifdef CONFIG_TRACER_MAX_TRACE
1285                 tracing_reset_online_cpus(&tr->max_buffer);
1286 #endif
1287         }
1288 }
1289
1290 #define SAVED_CMDLINES 128
1291 #define NO_CMDLINE_MAP UINT_MAX
1292 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1293 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1294 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1295 static int cmdline_idx;
1296 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1297
1298 /* temporary disable recording */
1299 static atomic_t trace_record_cmdline_disabled __read_mostly;
1300
1301 static void trace_init_cmdlines(void)
1302 {
1303         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1304         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1305         cmdline_idx = 0;
1306 }
1307
1308 int is_tracing_stopped(void)
1309 {
1310         return global_trace.stop_count;
1311 }
1312
1313 /**
1314  * tracing_start - quick start of the tracer
1315  *
1316  * If tracing is enabled but was stopped by tracing_stop,
1317  * this will start the tracer back up.
1318  */
1319 void tracing_start(void)
1320 {
1321         struct ring_buffer *buffer;
1322         unsigned long flags;
1323
1324         if (tracing_disabled)
1325                 return;
1326
1327         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1328         if (--global_trace.stop_count) {
1329                 if (global_trace.stop_count < 0) {
1330                         /* Someone screwed up their debugging */
1331                         WARN_ON_ONCE(1);
1332                         global_trace.stop_count = 0;
1333                 }
1334                 goto out;
1335         }
1336
1337         /* Prevent the buffers from switching */
1338         arch_spin_lock(&global_trace.max_lock);
1339
1340         buffer = global_trace.trace_buffer.buffer;
1341         if (buffer)
1342                 ring_buffer_record_enable(buffer);
1343
1344 #ifdef CONFIG_TRACER_MAX_TRACE
1345         buffer = global_trace.max_buffer.buffer;
1346         if (buffer)
1347                 ring_buffer_record_enable(buffer);
1348 #endif
1349
1350         arch_spin_unlock(&global_trace.max_lock);
1351
1352         ftrace_start();
1353  out:
1354         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1355 }
1356
1357 static void tracing_start_tr(struct trace_array *tr)
1358 {
1359         struct ring_buffer *buffer;
1360         unsigned long flags;
1361
1362         if (tracing_disabled)
1363                 return;
1364
1365         /* If global, we need to also start the max tracer */
1366         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1367                 return tracing_start();
1368
1369         raw_spin_lock_irqsave(&tr->start_lock, flags);
1370
1371         if (--tr->stop_count) {
1372                 if (tr->stop_count < 0) {
1373                         /* Someone screwed up their debugging */
1374                         WARN_ON_ONCE(1);
1375                         tr->stop_count = 0;
1376                 }
1377                 goto out;
1378         }
1379
1380         buffer = tr->trace_buffer.buffer;
1381         if (buffer)
1382                 ring_buffer_record_enable(buffer);
1383
1384  out:
1385         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1386 }
1387
1388 /**
1389  * tracing_stop - quick stop of the tracer
1390  *
1391  * Light weight way to stop tracing. Use in conjunction with
1392  * tracing_start.
1393  */
1394 void tracing_stop(void)
1395 {
1396         struct ring_buffer *buffer;
1397         unsigned long flags;
1398
1399         ftrace_stop();
1400         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1401         if (global_trace.stop_count++)
1402                 goto out;
1403
1404         /* Prevent the buffers from switching */
1405         arch_spin_lock(&global_trace.max_lock);
1406
1407         buffer = global_trace.trace_buffer.buffer;
1408         if (buffer)
1409                 ring_buffer_record_disable(buffer);
1410
1411 #ifdef CONFIG_TRACER_MAX_TRACE
1412         buffer = global_trace.max_buffer.buffer;
1413         if (buffer)
1414                 ring_buffer_record_disable(buffer);
1415 #endif
1416
1417         arch_spin_unlock(&global_trace.max_lock);
1418
1419  out:
1420         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1421 }
1422
1423 static void tracing_stop_tr(struct trace_array *tr)
1424 {
1425         struct ring_buffer *buffer;
1426         unsigned long flags;
1427
1428         /* If global, we need to also stop the max tracer */
1429         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1430                 return tracing_stop();
1431
1432         raw_spin_lock_irqsave(&tr->start_lock, flags);
1433         if (tr->stop_count++)
1434                 goto out;
1435
1436         buffer = tr->trace_buffer.buffer;
1437         if (buffer)
1438                 ring_buffer_record_disable(buffer);
1439
1440  out:
1441         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1442 }
1443
1444 void trace_stop_cmdline_recording(void);
1445
1446 static void trace_save_cmdline(struct task_struct *tsk)
1447 {
1448         unsigned pid, idx;
1449
1450         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1451                 return;
1452
1453         /*
1454          * It's not the end of the world if we don't get
1455          * the lock, but we also don't want to spin
1456          * nor do we want to disable interrupts,
1457          * so if we miss here, then better luck next time.
1458          */
1459         if (!arch_spin_trylock(&trace_cmdline_lock))
1460                 return;
1461
1462         idx = map_pid_to_cmdline[tsk->pid];
1463         if (idx == NO_CMDLINE_MAP) {
1464                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1465
1466                 /*
1467                  * Check whether the cmdline buffer at idx has a pid
1468                  * mapped. We are going to overwrite that entry so we
1469                  * need to clear the map_pid_to_cmdline. Otherwise we
1470                  * would read the new comm for the old pid.
1471                  */
1472                 pid = map_cmdline_to_pid[idx];
1473                 if (pid != NO_CMDLINE_MAP)
1474                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1475
1476                 map_cmdline_to_pid[idx] = tsk->pid;
1477                 map_pid_to_cmdline[tsk->pid] = idx;
1478
1479                 cmdline_idx = idx;
1480         }
1481
1482         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1483
1484         arch_spin_unlock(&trace_cmdline_lock);
1485 }
1486
1487 void trace_find_cmdline(int pid, char comm[])
1488 {
1489         unsigned map;
1490
1491         if (!pid) {
1492                 strcpy(comm, "<idle>");
1493                 return;
1494         }
1495
1496         if (WARN_ON_ONCE(pid < 0)) {
1497                 strcpy(comm, "<XXX>");
1498                 return;
1499         }
1500
1501         if (pid > PID_MAX_DEFAULT) {
1502                 strcpy(comm, "<...>");
1503                 return;
1504         }
1505
1506         preempt_disable();
1507         arch_spin_lock(&trace_cmdline_lock);
1508         map = map_pid_to_cmdline[pid];
1509         if (map != NO_CMDLINE_MAP)
1510                 strcpy(comm, saved_cmdlines[map]);
1511         else
1512                 strcpy(comm, "<...>");
1513
1514         arch_spin_unlock(&trace_cmdline_lock);
1515         preempt_enable();
1516 }
1517
1518 void tracing_record_cmdline(struct task_struct *tsk)
1519 {
1520         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1521                 return;
1522
1523         if (!__this_cpu_read(trace_cmdline_save))
1524                 return;
1525
1526         __this_cpu_write(trace_cmdline_save, false);
1527
1528         trace_save_cmdline(tsk);
1529 }
1530
1531 void
1532 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1533                              int pc)
1534 {
1535         struct task_struct *tsk = current;
1536
1537         entry->preempt_count            = pc & 0xff;
1538         entry->pid                      = (tsk) ? tsk->pid : 0;
1539         entry->flags =
1540 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1541                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1542 #else
1543                 TRACE_FLAG_IRQS_NOSUPPORT |
1544 #endif
1545                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1546                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1547                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1548                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1549 }
1550 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1551
1552 struct ring_buffer_event *
1553 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1554                           int type,
1555                           unsigned long len,
1556                           unsigned long flags, int pc)
1557 {
1558         struct ring_buffer_event *event;
1559
1560         event = ring_buffer_lock_reserve(buffer, len);
1561         if (event != NULL) {
1562                 struct trace_entry *ent = ring_buffer_event_data(event);
1563
1564                 tracing_generic_entry_update(ent, flags, pc);
1565                 ent->type = type;
1566         }
1567
1568         return event;
1569 }
1570
1571 void
1572 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1573 {
1574         __this_cpu_write(trace_cmdline_save, true);
1575         ring_buffer_unlock_commit(buffer, event);
1576 }
1577
1578 static inline void
1579 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1580                              struct ring_buffer_event *event,
1581                              unsigned long flags, int pc)
1582 {
1583         __buffer_unlock_commit(buffer, event);
1584
1585         ftrace_trace_stack(buffer, flags, 6, pc);
1586         ftrace_trace_userstack(buffer, flags, pc);
1587 }
1588
1589 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1590                                 struct ring_buffer_event *event,
1591                                 unsigned long flags, int pc)
1592 {
1593         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1594 }
1595 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1596
1597 static struct ring_buffer *temp_buffer;
1598
1599 struct ring_buffer_event *
1600 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1601                           struct ftrace_event_file *ftrace_file,
1602                           int type, unsigned long len,
1603                           unsigned long flags, int pc)
1604 {
1605         struct ring_buffer_event *entry;
1606
1607         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1608         entry = trace_buffer_lock_reserve(*current_rb,
1609                                          type, len, flags, pc);
1610         /*
1611          * If tracing is off, but we have triggers enabled
1612          * we still need to look at the event data. Use the temp_buffer
1613          * to store the trace event for the tigger to use. It's recusive
1614          * safe and will not be recorded anywhere.
1615          */
1616         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1617                 *current_rb = temp_buffer;
1618                 entry = trace_buffer_lock_reserve(*current_rb,
1619                                                   type, len, flags, pc);
1620         }
1621         return entry;
1622 }
1623 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1624
1625 struct ring_buffer_event *
1626 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1627                                   int type, unsigned long len,
1628                                   unsigned long flags, int pc)
1629 {
1630         *current_rb = global_trace.trace_buffer.buffer;
1631         return trace_buffer_lock_reserve(*current_rb,
1632                                          type, len, flags, pc);
1633 }
1634 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1635
1636 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1637                                         struct ring_buffer_event *event,
1638                                         unsigned long flags, int pc)
1639 {
1640         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1641 }
1642 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1643
1644 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1645                                      struct ring_buffer_event *event,
1646                                      unsigned long flags, int pc,
1647                                      struct pt_regs *regs)
1648 {
1649         __buffer_unlock_commit(buffer, event);
1650
1651         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1652         ftrace_trace_userstack(buffer, flags, pc);
1653 }
1654 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1655
1656 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1657                                          struct ring_buffer_event *event)
1658 {
1659         ring_buffer_discard_commit(buffer, event);
1660 }
1661 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1662
1663 void
1664 trace_function(struct trace_array *tr,
1665                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1666                int pc)
1667 {
1668         struct ftrace_event_call *call = &event_function;
1669         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1670         struct ring_buffer_event *event;
1671         struct ftrace_entry *entry;
1672
1673         /* If we are reading the ring buffer, don't trace */
1674         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1675                 return;
1676
1677         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1678                                           flags, pc);
1679         if (!event)
1680                 return;
1681         entry   = ring_buffer_event_data(event);
1682         entry->ip                       = ip;
1683         entry->parent_ip                = parent_ip;
1684
1685         if (!call_filter_check_discard(call, entry, buffer, event))
1686                 __buffer_unlock_commit(buffer, event);
1687 }
1688
1689 #ifdef CONFIG_STACKTRACE
1690
1691 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1692 struct ftrace_stack {
1693         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1694 };
1695
1696 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1697 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1698
1699 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1700                                  unsigned long flags,
1701                                  int skip, int pc, struct pt_regs *regs)
1702 {
1703         struct ftrace_event_call *call = &event_kernel_stack;
1704         struct ring_buffer_event *event;
1705         struct stack_entry *entry;
1706         struct stack_trace trace;
1707         int use_stack;
1708         int size = FTRACE_STACK_ENTRIES;
1709
1710         trace.nr_entries        = 0;
1711         trace.skip              = skip;
1712
1713         /*
1714          * Since events can happen in NMIs there's no safe way to
1715          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1716          * or NMI comes in, it will just have to use the default
1717          * FTRACE_STACK_SIZE.
1718          */
1719         preempt_disable_notrace();
1720
1721         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1722         /*
1723          * We don't need any atomic variables, just a barrier.
1724          * If an interrupt comes in, we don't care, because it would
1725          * have exited and put the counter back to what we want.
1726          * We just need a barrier to keep gcc from moving things
1727          * around.
1728          */
1729         barrier();
1730         if (use_stack == 1) {
1731                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1732                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1733
1734                 if (regs)
1735                         save_stack_trace_regs(regs, &trace);
1736                 else
1737                         save_stack_trace(&trace);
1738
1739                 if (trace.nr_entries > size)
1740                         size = trace.nr_entries;
1741         } else
1742                 /* From now on, use_stack is a boolean */
1743                 use_stack = 0;
1744
1745         size *= sizeof(unsigned long);
1746
1747         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1748                                           sizeof(*entry) + size, flags, pc);
1749         if (!event)
1750                 goto out;
1751         entry = ring_buffer_event_data(event);
1752
1753         memset(&entry->caller, 0, size);
1754
1755         if (use_stack)
1756                 memcpy(&entry->caller, trace.entries,
1757                        trace.nr_entries * sizeof(unsigned long));
1758         else {
1759                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1760                 trace.entries           = entry->caller;
1761                 if (regs)
1762                         save_stack_trace_regs(regs, &trace);
1763                 else
1764                         save_stack_trace(&trace);
1765         }
1766
1767         entry->size = trace.nr_entries;
1768
1769         if (!call_filter_check_discard(call, entry, buffer, event))
1770                 __buffer_unlock_commit(buffer, event);
1771
1772  out:
1773         /* Again, don't let gcc optimize things here */
1774         barrier();
1775         __this_cpu_dec(ftrace_stack_reserve);
1776         preempt_enable_notrace();
1777
1778 }
1779
1780 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1781                              int skip, int pc, struct pt_regs *regs)
1782 {
1783         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1784                 return;
1785
1786         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1787 }
1788
1789 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1790                         int skip, int pc)
1791 {
1792         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1793                 return;
1794
1795         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1796 }
1797
1798 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1799                    int pc)
1800 {
1801         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1802 }
1803
1804 /**
1805  * trace_dump_stack - record a stack back trace in the trace buffer
1806  * @skip: Number of functions to skip (helper handlers)
1807  */
1808 void trace_dump_stack(int skip)
1809 {
1810         unsigned long flags;
1811
1812         if (tracing_disabled || tracing_selftest_running)
1813                 return;
1814
1815         local_save_flags(flags);
1816
1817         /*
1818          * Skip 3 more, seems to get us at the caller of
1819          * this function.
1820          */
1821         skip += 3;
1822         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1823                              flags, skip, preempt_count(), NULL);
1824 }
1825
1826 static DEFINE_PER_CPU(int, user_stack_count);
1827
1828 void
1829 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1830 {
1831         struct ftrace_event_call *call = &event_user_stack;
1832         struct ring_buffer_event *event;
1833         struct userstack_entry *entry;
1834         struct stack_trace trace;
1835
1836         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1837                 return;
1838
1839         /*
1840          * NMIs can not handle page faults, even with fix ups.
1841          * The save user stack can (and often does) fault.
1842          */
1843         if (unlikely(in_nmi()))
1844                 return;
1845
1846         /*
1847          * prevent recursion, since the user stack tracing may
1848          * trigger other kernel events.
1849          */
1850         preempt_disable();
1851         if (__this_cpu_read(user_stack_count))
1852                 goto out;
1853
1854         __this_cpu_inc(user_stack_count);
1855
1856         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1857                                           sizeof(*entry), flags, pc);
1858         if (!event)
1859                 goto out_drop_count;
1860         entry   = ring_buffer_event_data(event);
1861
1862         entry->tgid             = current->tgid;
1863         memset(&entry->caller, 0, sizeof(entry->caller));
1864
1865         trace.nr_entries        = 0;
1866         trace.max_entries       = FTRACE_STACK_ENTRIES;
1867         trace.skip              = 0;
1868         trace.entries           = entry->caller;
1869
1870         save_stack_trace_user(&trace);
1871         if (!call_filter_check_discard(call, entry, buffer, event))
1872                 __buffer_unlock_commit(buffer, event);
1873
1874  out_drop_count:
1875         __this_cpu_dec(user_stack_count);
1876  out:
1877         preempt_enable();
1878 }
1879
1880 #ifdef UNUSED
1881 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1882 {
1883         ftrace_trace_userstack(tr, flags, preempt_count());
1884 }
1885 #endif /* UNUSED */
1886
1887 #endif /* CONFIG_STACKTRACE */
1888
1889 /* created for use with alloc_percpu */
1890 struct trace_buffer_struct {
1891         char buffer[TRACE_BUF_SIZE];
1892 };
1893
1894 static struct trace_buffer_struct *trace_percpu_buffer;
1895 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1896 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1897 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1898
1899 /*
1900  * The buffer used is dependent on the context. There is a per cpu
1901  * buffer for normal context, softirq contex, hard irq context and
1902  * for NMI context. Thise allows for lockless recording.
1903  *
1904  * Note, if the buffers failed to be allocated, then this returns NULL
1905  */
1906 static char *get_trace_buf(void)
1907 {
1908         struct trace_buffer_struct *percpu_buffer;
1909
1910         /*
1911          * If we have allocated per cpu buffers, then we do not
1912          * need to do any locking.
1913          */
1914         if (in_nmi())
1915                 percpu_buffer = trace_percpu_nmi_buffer;
1916         else if (in_irq())
1917                 percpu_buffer = trace_percpu_irq_buffer;
1918         else if (in_softirq())
1919                 percpu_buffer = trace_percpu_sirq_buffer;
1920         else
1921                 percpu_buffer = trace_percpu_buffer;
1922
1923         if (!percpu_buffer)
1924                 return NULL;
1925
1926         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1927 }
1928
1929 static int alloc_percpu_trace_buffer(void)
1930 {
1931         struct trace_buffer_struct *buffers;
1932         struct trace_buffer_struct *sirq_buffers;
1933         struct trace_buffer_struct *irq_buffers;
1934         struct trace_buffer_struct *nmi_buffers;
1935
1936         buffers = alloc_percpu(struct trace_buffer_struct);
1937         if (!buffers)
1938                 goto err_warn;
1939
1940         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1941         if (!sirq_buffers)
1942                 goto err_sirq;
1943
1944         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1945         if (!irq_buffers)
1946                 goto err_irq;
1947
1948         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1949         if (!nmi_buffers)
1950                 goto err_nmi;
1951
1952         trace_percpu_buffer = buffers;
1953         trace_percpu_sirq_buffer = sirq_buffers;
1954         trace_percpu_irq_buffer = irq_buffers;
1955         trace_percpu_nmi_buffer = nmi_buffers;
1956
1957         return 0;
1958
1959  err_nmi:
1960         free_percpu(irq_buffers);
1961  err_irq:
1962         free_percpu(sirq_buffers);
1963  err_sirq:
1964         free_percpu(buffers);
1965  err_warn:
1966         WARN(1, "Could not allocate percpu trace_printk buffer");
1967         return -ENOMEM;
1968 }
1969
1970 static int buffers_allocated;
1971
1972 void trace_printk_init_buffers(void)
1973 {
1974         if (buffers_allocated)
1975                 return;
1976
1977         if (alloc_percpu_trace_buffer())
1978                 return;
1979
1980         pr_info("ftrace: Allocated trace_printk buffers\n");
1981
1982         /* Expand the buffers to set size */
1983         tracing_update_buffers();
1984
1985         buffers_allocated = 1;
1986
1987         /*
1988          * trace_printk_init_buffers() can be called by modules.
1989          * If that happens, then we need to start cmdline recording
1990          * directly here. If the global_trace.buffer is already
1991          * allocated here, then this was called by module code.
1992          */
1993         if (global_trace.trace_buffer.buffer)
1994                 tracing_start_cmdline_record();
1995 }
1996
1997 void trace_printk_start_comm(void)
1998 {
1999         /* Start tracing comms if trace printk is set */
2000         if (!buffers_allocated)
2001                 return;
2002         tracing_start_cmdline_record();
2003 }
2004
2005 static void trace_printk_start_stop_comm(int enabled)
2006 {
2007         if (!buffers_allocated)
2008                 return;
2009
2010         if (enabled)
2011                 tracing_start_cmdline_record();
2012         else
2013                 tracing_stop_cmdline_record();
2014 }
2015
2016 /**
2017  * trace_vbprintk - write binary msg to tracing buffer
2018  *
2019  */
2020 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2021 {
2022         struct ftrace_event_call *call = &event_bprint;
2023         struct ring_buffer_event *event;
2024         struct ring_buffer *buffer;
2025         struct trace_array *tr = &global_trace;
2026         struct bprint_entry *entry;
2027         unsigned long flags;
2028         char *tbuffer;
2029         int len = 0, size, pc;
2030
2031         if (unlikely(tracing_selftest_running || tracing_disabled))
2032                 return 0;
2033
2034         /* Don't pollute graph traces with trace_vprintk internals */
2035         pause_graph_tracing();
2036
2037         pc = preempt_count();
2038         preempt_disable_notrace();
2039
2040         tbuffer = get_trace_buf();
2041         if (!tbuffer) {
2042                 len = 0;
2043                 goto out;
2044         }
2045
2046         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2047
2048         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2049                 goto out;
2050
2051         local_save_flags(flags);
2052         size = sizeof(*entry) + sizeof(u32) * len;
2053         buffer = tr->trace_buffer.buffer;
2054         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2055                                           flags, pc);
2056         if (!event)
2057                 goto out;
2058         entry = ring_buffer_event_data(event);
2059         entry->ip                       = ip;
2060         entry->fmt                      = fmt;
2061
2062         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2063         if (!call_filter_check_discard(call, entry, buffer, event)) {
2064                 __buffer_unlock_commit(buffer, event);
2065                 ftrace_trace_stack(buffer, flags, 6, pc);
2066         }
2067
2068 out:
2069         preempt_enable_notrace();
2070         unpause_graph_tracing();
2071
2072         return len;
2073 }
2074 EXPORT_SYMBOL_GPL(trace_vbprintk);
2075
2076 static int
2077 __trace_array_vprintk(struct ring_buffer *buffer,
2078                       unsigned long ip, const char *fmt, va_list args)
2079 {
2080         struct ftrace_event_call *call = &event_print;
2081         struct ring_buffer_event *event;
2082         int len = 0, size, pc;
2083         struct print_entry *entry;
2084         unsigned long flags;
2085         char *tbuffer;
2086
2087         if (tracing_disabled || tracing_selftest_running)
2088                 return 0;
2089
2090         /* Don't pollute graph traces with trace_vprintk internals */
2091         pause_graph_tracing();
2092
2093         pc = preempt_count();
2094         preempt_disable_notrace();
2095
2096
2097         tbuffer = get_trace_buf();
2098         if (!tbuffer) {
2099                 len = 0;
2100                 goto out;
2101         }
2102
2103         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2104         if (len > TRACE_BUF_SIZE)
2105                 goto out;
2106
2107         local_save_flags(flags);
2108         size = sizeof(*entry) + len + 1;
2109         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2110                                           flags, pc);
2111         if (!event)
2112                 goto out;
2113         entry = ring_buffer_event_data(event);
2114         entry->ip = ip;
2115
2116         memcpy(&entry->buf, tbuffer, len);
2117         entry->buf[len] = '\0';
2118         if (!call_filter_check_discard(call, entry, buffer, event)) {
2119                 __buffer_unlock_commit(buffer, event);
2120                 ftrace_trace_stack(buffer, flags, 6, pc);
2121         }
2122  out:
2123         preempt_enable_notrace();
2124         unpause_graph_tracing();
2125
2126         return len;
2127 }
2128
2129 int trace_array_vprintk(struct trace_array *tr,
2130                         unsigned long ip, const char *fmt, va_list args)
2131 {
2132         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2133 }
2134
2135 int trace_array_printk(struct trace_array *tr,
2136                        unsigned long ip, const char *fmt, ...)
2137 {
2138         int ret;
2139         va_list ap;
2140
2141         if (!(trace_flags & TRACE_ITER_PRINTK))
2142                 return 0;
2143
2144         va_start(ap, fmt);
2145         ret = trace_array_vprintk(tr, ip, fmt, ap);
2146         va_end(ap);
2147         return ret;
2148 }
2149
2150 int trace_array_printk_buf(struct ring_buffer *buffer,
2151                            unsigned long ip, const char *fmt, ...)
2152 {
2153         int ret;
2154         va_list ap;
2155
2156         if (!(trace_flags & TRACE_ITER_PRINTK))
2157                 return 0;
2158
2159         va_start(ap, fmt);
2160         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2161         va_end(ap);
2162         return ret;
2163 }
2164
2165 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2166 {
2167         return trace_array_vprintk(&global_trace, ip, fmt, args);
2168 }
2169 EXPORT_SYMBOL_GPL(trace_vprintk);
2170
2171 static void trace_iterator_increment(struct trace_iterator *iter)
2172 {
2173         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2174
2175         iter->idx++;
2176         if (buf_iter)
2177                 ring_buffer_read(buf_iter, NULL);
2178 }
2179
2180 static struct trace_entry *
2181 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2182                 unsigned long *lost_events)
2183 {
2184         struct ring_buffer_event *event;
2185         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2186
2187         if (buf_iter)
2188                 event = ring_buffer_iter_peek(buf_iter, ts);
2189         else
2190                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2191                                          lost_events);
2192
2193         if (event) {
2194                 iter->ent_size = ring_buffer_event_length(event);
2195                 return ring_buffer_event_data(event);
2196         }
2197         iter->ent_size = 0;
2198         return NULL;
2199 }
2200
2201 static struct trace_entry *
2202 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2203                   unsigned long *missing_events, u64 *ent_ts)
2204 {
2205         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2206         struct trace_entry *ent, *next = NULL;
2207         unsigned long lost_events = 0, next_lost = 0;
2208         int cpu_file = iter->cpu_file;
2209         u64 next_ts = 0, ts;
2210         int next_cpu = -1;
2211         int next_size = 0;
2212         int cpu;
2213
2214         /*
2215          * If we are in a per_cpu trace file, don't bother by iterating over
2216          * all cpu and peek directly.
2217          */
2218         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2219                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2220                         return NULL;
2221                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2222                 if (ent_cpu)
2223                         *ent_cpu = cpu_file;
2224
2225                 return ent;
2226         }
2227
2228         for_each_tracing_cpu(cpu) {
2229
2230                 if (ring_buffer_empty_cpu(buffer, cpu))
2231                         continue;
2232
2233                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2234
2235                 /*
2236                  * Pick the entry with the smallest timestamp:
2237                  */
2238                 if (ent && (!next || ts < next_ts)) {
2239                         next = ent;
2240                         next_cpu = cpu;
2241                         next_ts = ts;
2242                         next_lost = lost_events;
2243                         next_size = iter->ent_size;
2244                 }
2245         }
2246
2247         iter->ent_size = next_size;
2248
2249         if (ent_cpu)
2250                 *ent_cpu = next_cpu;
2251
2252         if (ent_ts)
2253                 *ent_ts = next_ts;
2254
2255         if (missing_events)
2256                 *missing_events = next_lost;
2257
2258         return next;
2259 }
2260
2261 /* Find the next real entry, without updating the iterator itself */
2262 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2263                                           int *ent_cpu, u64 *ent_ts)
2264 {
2265         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2266 }
2267
2268 /* Find the next real entry, and increment the iterator to the next entry */
2269 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2270 {
2271         iter->ent = __find_next_entry(iter, &iter->cpu,
2272                                       &iter->lost_events, &iter->ts);
2273
2274         if (iter->ent)
2275                 trace_iterator_increment(iter);
2276
2277         return iter->ent ? iter : NULL;
2278 }
2279
2280 static void trace_consume(struct trace_iterator *iter)
2281 {
2282         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2283                             &iter->lost_events);
2284 }
2285
2286 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2287 {
2288         struct trace_iterator *iter = m->private;
2289         int i = (int)*pos;
2290         void *ent;
2291
2292         WARN_ON_ONCE(iter->leftover);
2293
2294         (*pos)++;
2295
2296         /* can't go backwards */
2297         if (iter->idx > i)
2298                 return NULL;
2299
2300         if (iter->idx < 0)
2301                 ent = trace_find_next_entry_inc(iter);
2302         else
2303                 ent = iter;
2304
2305         while (ent && iter->idx < i)
2306                 ent = trace_find_next_entry_inc(iter);
2307
2308         iter->pos = *pos;
2309
2310         return ent;
2311 }
2312
2313 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2314 {
2315         struct ring_buffer_event *event;
2316         struct ring_buffer_iter *buf_iter;
2317         unsigned long entries = 0;
2318         u64 ts;
2319
2320         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2321
2322         buf_iter = trace_buffer_iter(iter, cpu);
2323         if (!buf_iter)
2324                 return;
2325
2326         ring_buffer_iter_reset(buf_iter);
2327
2328         /*
2329          * We could have the case with the max latency tracers
2330          * that a reset never took place on a cpu. This is evident
2331          * by the timestamp being before the start of the buffer.
2332          */
2333         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2334                 if (ts >= iter->trace_buffer->time_start)
2335                         break;
2336                 entries++;
2337                 ring_buffer_read(buf_iter, NULL);
2338         }
2339
2340         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2341 }
2342
2343 /*
2344  * The current tracer is copied to avoid a global locking
2345  * all around.
2346  */
2347 static void *s_start(struct seq_file *m, loff_t *pos)
2348 {
2349         struct trace_iterator *iter = m->private;
2350         struct trace_array *tr = iter->tr;
2351         int cpu_file = iter->cpu_file;
2352         void *p = NULL;
2353         loff_t l = 0;
2354         int cpu;
2355
2356         /*
2357          * copy the tracer to avoid using a global lock all around.
2358          * iter->trace is a copy of current_trace, the pointer to the
2359          * name may be used instead of a strcmp(), as iter->trace->name
2360          * will point to the same string as current_trace->name.
2361          */
2362         mutex_lock(&trace_types_lock);
2363         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2364                 *iter->trace = *tr->current_trace;
2365         mutex_unlock(&trace_types_lock);
2366
2367 #ifdef CONFIG_TRACER_MAX_TRACE
2368         if (iter->snapshot && iter->trace->use_max_tr)
2369                 return ERR_PTR(-EBUSY);
2370 #endif
2371
2372         if (!iter->snapshot)
2373                 atomic_inc(&trace_record_cmdline_disabled);
2374
2375         if (*pos != iter->pos) {
2376                 iter->ent = NULL;
2377                 iter->cpu = 0;
2378                 iter->idx = -1;
2379
2380                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2381                         for_each_tracing_cpu(cpu)
2382                                 tracing_iter_reset(iter, cpu);
2383                 } else
2384                         tracing_iter_reset(iter, cpu_file);
2385
2386                 iter->leftover = 0;
2387                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2388                         ;
2389
2390         } else {
2391                 /*
2392                  * If we overflowed the seq_file before, then we want
2393                  * to just reuse the trace_seq buffer again.
2394                  */
2395                 if (iter->leftover)
2396                         p = iter;
2397                 else {
2398                         l = *pos - 1;
2399                         p = s_next(m, p, &l);
2400                 }
2401         }
2402
2403         trace_event_read_lock();
2404         trace_access_lock(cpu_file);
2405         return p;
2406 }
2407
2408 static void s_stop(struct seq_file *m, void *p)
2409 {
2410         struct trace_iterator *iter = m->private;
2411
2412 #ifdef CONFIG_TRACER_MAX_TRACE
2413         if (iter->snapshot && iter->trace->use_max_tr)
2414                 return;
2415 #endif
2416
2417         if (!iter->snapshot)
2418                 atomic_dec(&trace_record_cmdline_disabled);
2419
2420         trace_access_unlock(iter->cpu_file);
2421         trace_event_read_unlock();
2422 }
2423
2424 static void
2425 get_total_entries(struct trace_buffer *buf,
2426                   unsigned long *total, unsigned long *entries)
2427 {
2428         unsigned long count;
2429         int cpu;
2430
2431         *total = 0;
2432         *entries = 0;
2433
2434         for_each_tracing_cpu(cpu) {
2435                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2436                 /*
2437                  * If this buffer has skipped entries, then we hold all
2438                  * entries for the trace and we need to ignore the
2439                  * ones before the time stamp.
2440                  */
2441                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2442                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2443                         /* total is the same as the entries */
2444                         *total += count;
2445                 } else
2446                         *total += count +
2447                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2448                 *entries += count;
2449         }
2450 }
2451
2452 static void print_lat_help_header(struct seq_file *m)
2453 {
2454         seq_puts(m, "#                  _------=> CPU#            \n");
2455         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2456         seq_puts(m, "#                | / _----=> need-resched    \n");
2457         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2458         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2459         seq_puts(m, "#                |||| /     delay             \n");
2460         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2461         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2462 }
2463
2464 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2465 {
2466         unsigned long total;
2467         unsigned long entries;
2468
2469         get_total_entries(buf, &total, &entries);
2470         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2471                    entries, total, num_online_cpus());
2472         seq_puts(m, "#\n");
2473 }
2474
2475 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2476 {
2477         print_event_info(buf, m);
2478         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2479         seq_puts(m, "#              | |       |          |         |\n");
2480 }
2481
2482 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2483 {
2484         print_event_info(buf, m);
2485         seq_puts(m, "#                              _-----=> irqs-off\n");
2486         seq_puts(m, "#                             / _----=> need-resched\n");
2487         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2488         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2489         seq_puts(m, "#                            ||| /     delay\n");
2490         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2491         seq_puts(m, "#              | |       |   ||||       |         |\n");
2492 }
2493
2494 void
2495 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2496 {
2497         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2498         struct trace_buffer *buf = iter->trace_buffer;
2499         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2500         struct tracer *type = iter->trace;
2501         unsigned long entries;
2502         unsigned long total;
2503         const char *name = "preemption";
2504
2505         name = type->name;
2506
2507         get_total_entries(buf, &total, &entries);
2508
2509         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2510                    name, UTS_RELEASE);
2511         seq_puts(m, "# -----------------------------------"
2512                  "---------------------------------\n");
2513         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2514                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2515                    nsecs_to_usecs(data->saved_latency),
2516                    entries,
2517                    total,
2518                    buf->cpu,
2519 #if defined(CONFIG_PREEMPT_NONE)
2520                    "server",
2521 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2522                    "desktop",
2523 #elif defined(CONFIG_PREEMPT)
2524                    "preempt",
2525 #else
2526                    "unknown",
2527 #endif
2528                    /* These are reserved for later use */
2529                    0, 0, 0, 0);
2530 #ifdef CONFIG_SMP
2531         seq_printf(m, " #P:%d)\n", num_online_cpus());
2532 #else
2533         seq_puts(m, ")\n");
2534 #endif
2535         seq_puts(m, "#    -----------------\n");
2536         seq_printf(m, "#    | task: %.16s-%d "
2537                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2538                    data->comm, data->pid,
2539                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2540                    data->policy, data->rt_priority);
2541         seq_puts(m, "#    -----------------\n");
2542
2543         if (data->critical_start) {
2544                 seq_puts(m, "#  => started at: ");
2545                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2546                 trace_print_seq(m, &iter->seq);
2547                 seq_puts(m, "\n#  => ended at:   ");
2548                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2549                 trace_print_seq(m, &iter->seq);
2550                 seq_puts(m, "\n#\n");
2551         }
2552
2553         seq_puts(m, "#\n");
2554 }
2555
2556 static void test_cpu_buff_start(struct trace_iterator *iter)
2557 {
2558         struct trace_seq *s = &iter->seq;
2559
2560         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2561                 return;
2562
2563         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2564                 return;
2565
2566         if (cpumask_test_cpu(iter->cpu, iter->started))
2567                 return;
2568
2569         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2570                 return;
2571
2572         cpumask_set_cpu(iter->cpu, iter->started);
2573
2574         /* Don't print started cpu buffer for the first entry of the trace */
2575         if (iter->idx > 1)
2576                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2577                                 iter->cpu);
2578 }
2579
2580 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2581 {
2582         struct trace_seq *s = &iter->seq;
2583         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2584         struct trace_entry *entry;
2585         struct trace_event *event;
2586
2587         entry = iter->ent;
2588
2589         test_cpu_buff_start(iter);
2590
2591         event = ftrace_find_event(entry->type);
2592
2593         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2594                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2595                         if (!trace_print_lat_context(iter))
2596                                 goto partial;
2597                 } else {
2598                         if (!trace_print_context(iter))
2599                                 goto partial;
2600                 }
2601         }
2602
2603         if (event)
2604                 return event->funcs->trace(iter, sym_flags, event);
2605
2606         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2607                 goto partial;
2608
2609         return TRACE_TYPE_HANDLED;
2610 partial:
2611         return TRACE_TYPE_PARTIAL_LINE;
2612 }
2613
2614 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2615 {
2616         struct trace_seq *s = &iter->seq;
2617         struct trace_entry *entry;
2618         struct trace_event *event;
2619
2620         entry = iter->ent;
2621
2622         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2623                 if (!trace_seq_printf(s, "%d %d %llu ",
2624                                       entry->pid, iter->cpu, iter->ts))
2625                         goto partial;
2626         }
2627
2628         event = ftrace_find_event(entry->type);
2629         if (event)
2630                 return event->funcs->raw(iter, 0, event);
2631
2632         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2633                 goto partial;
2634
2635         return TRACE_TYPE_HANDLED;
2636 partial:
2637         return TRACE_TYPE_PARTIAL_LINE;
2638 }
2639
2640 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2641 {
2642         struct trace_seq *s = &iter->seq;
2643         unsigned char newline = '\n';
2644         struct trace_entry *entry;
2645         struct trace_event *event;
2646
2647         entry = iter->ent;
2648
2649         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2650                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2651                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2652                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2653         }
2654
2655         event = ftrace_find_event(entry->type);
2656         if (event) {
2657                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2658                 if (ret != TRACE_TYPE_HANDLED)
2659                         return ret;
2660         }
2661
2662         SEQ_PUT_FIELD_RET(s, newline);
2663
2664         return TRACE_TYPE_HANDLED;
2665 }
2666
2667 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2668 {
2669         struct trace_seq *s = &iter->seq;
2670         struct trace_entry *entry;
2671         struct trace_event *event;
2672
2673         entry = iter->ent;
2674
2675         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2676                 SEQ_PUT_FIELD_RET(s, entry->pid);
2677                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2678                 SEQ_PUT_FIELD_RET(s, iter->ts);
2679         }
2680
2681         event = ftrace_find_event(entry->type);
2682         return event ? event->funcs->binary(iter, 0, event) :
2683                 TRACE_TYPE_HANDLED;
2684 }
2685
2686 int trace_empty(struct trace_iterator *iter)
2687 {
2688         struct ring_buffer_iter *buf_iter;
2689         int cpu;
2690
2691         /* If we are looking at one CPU buffer, only check that one */
2692         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2693                 cpu = iter->cpu_file;
2694                 buf_iter = trace_buffer_iter(iter, cpu);
2695                 if (buf_iter) {
2696                         if (!ring_buffer_iter_empty(buf_iter))
2697                                 return 0;
2698                 } else {
2699                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2700                                 return 0;
2701                 }
2702                 return 1;
2703         }
2704
2705         for_each_tracing_cpu(cpu) {
2706                 buf_iter = trace_buffer_iter(iter, cpu);
2707                 if (buf_iter) {
2708                         if (!ring_buffer_iter_empty(buf_iter))
2709                                 return 0;
2710                 } else {
2711                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2712                                 return 0;
2713                 }
2714         }
2715
2716         return 1;
2717 }
2718
2719 /*  Called with trace_event_read_lock() held. */
2720 enum print_line_t print_trace_line(struct trace_iterator *iter)
2721 {
2722         enum print_line_t ret;
2723
2724         if (iter->lost_events &&
2725             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2726                                  iter->cpu, iter->lost_events))
2727                 return TRACE_TYPE_PARTIAL_LINE;
2728
2729         if (iter->trace && iter->trace->print_line) {
2730                 ret = iter->trace->print_line(iter);
2731                 if (ret != TRACE_TYPE_UNHANDLED)
2732                         return ret;
2733         }
2734
2735         if (iter->ent->type == TRACE_BPUTS &&
2736                         trace_flags & TRACE_ITER_PRINTK &&
2737                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2738                 return trace_print_bputs_msg_only(iter);
2739
2740         if (iter->ent->type == TRACE_BPRINT &&
2741                         trace_flags & TRACE_ITER_PRINTK &&
2742                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2743                 return trace_print_bprintk_msg_only(iter);
2744
2745         if (iter->ent->type == TRACE_PRINT &&
2746                         trace_flags & TRACE_ITER_PRINTK &&
2747                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2748                 return trace_print_printk_msg_only(iter);
2749
2750         if (trace_flags & TRACE_ITER_BIN)
2751                 return print_bin_fmt(iter);
2752
2753         if (trace_flags & TRACE_ITER_HEX)
2754                 return print_hex_fmt(iter);
2755
2756         if (trace_flags & TRACE_ITER_RAW)
2757                 return print_raw_fmt(iter);
2758
2759         return print_trace_fmt(iter);
2760 }
2761
2762 void trace_latency_header(struct seq_file *m)
2763 {
2764         struct trace_iterator *iter = m->private;
2765
2766         /* print nothing if the buffers are empty */
2767         if (trace_empty(iter))
2768                 return;
2769
2770         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2771                 print_trace_header(m, iter);
2772
2773         if (!(trace_flags & TRACE_ITER_VERBOSE))
2774                 print_lat_help_header(m);
2775 }
2776
2777 void trace_default_header(struct seq_file *m)
2778 {
2779         struct trace_iterator *iter = m->private;
2780
2781         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2782                 return;
2783
2784         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2785                 /* print nothing if the buffers are empty */
2786                 if (trace_empty(iter))
2787                         return;
2788                 print_trace_header(m, iter);
2789                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2790                         print_lat_help_header(m);
2791         } else {
2792                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2793                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2794                                 print_func_help_header_irq(iter->trace_buffer, m);
2795                         else
2796                                 print_func_help_header(iter->trace_buffer, m);
2797                 }
2798         }
2799 }
2800
2801 static void test_ftrace_alive(struct seq_file *m)
2802 {
2803         if (!ftrace_is_dead())
2804                 return;
2805         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2806         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2807 }
2808
2809 #ifdef CONFIG_TRACER_MAX_TRACE
2810 static void show_snapshot_main_help(struct seq_file *m)
2811 {
2812         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2813         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2814         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2815         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2816         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2817         seq_printf(m, "#                       is not a '0' or '1')\n");
2818 }
2819
2820 static void show_snapshot_percpu_help(struct seq_file *m)
2821 {
2822         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2823 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2824         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2825         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2826 #else
2827         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2828         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2829 #endif
2830         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2831         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2832         seq_printf(m, "#                       is not a '0' or '1')\n");
2833 }
2834
2835 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2836 {
2837         if (iter->tr->allocated_snapshot)
2838                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2839         else
2840                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2841
2842         seq_printf(m, "# Snapshot commands:\n");
2843         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2844                 show_snapshot_main_help(m);
2845         else
2846                 show_snapshot_percpu_help(m);
2847 }
2848 #else
2849 /* Should never be called */
2850 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2851 #endif
2852
2853 static int s_show(struct seq_file *m, void *v)
2854 {
2855         struct trace_iterator *iter = v;
2856         int ret;
2857
2858         if (iter->ent == NULL) {
2859                 if (iter->tr) {
2860                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2861                         seq_puts(m, "#\n");
2862                         test_ftrace_alive(m);
2863                 }
2864                 if (iter->snapshot && trace_empty(iter))
2865                         print_snapshot_help(m, iter);
2866                 else if (iter->trace && iter->trace->print_header)
2867                         iter->trace->print_header(m);
2868                 else
2869                         trace_default_header(m);
2870
2871         } else if (iter->leftover) {
2872                 /*
2873                  * If we filled the seq_file buffer earlier, we
2874                  * want to just show it now.
2875                  */
2876                 ret = trace_print_seq(m, &iter->seq);
2877
2878                 /* ret should this time be zero, but you never know */
2879                 iter->leftover = ret;
2880
2881         } else {
2882                 print_trace_line(iter);
2883                 ret = trace_print_seq(m, &iter->seq);
2884                 /*
2885                  * If we overflow the seq_file buffer, then it will
2886                  * ask us for this data again at start up.
2887                  * Use that instead.
2888                  *  ret is 0 if seq_file write succeeded.
2889                  *        -1 otherwise.
2890                  */
2891                 iter->leftover = ret;
2892         }
2893
2894         return 0;
2895 }
2896
2897 /*
2898  * Should be used after trace_array_get(), trace_types_lock
2899  * ensures that i_cdev was already initialized.
2900  */
2901 static inline int tracing_get_cpu(struct inode *inode)
2902 {
2903         if (inode->i_cdev) /* See trace_create_cpu_file() */
2904                 return (long)inode->i_cdev - 1;
2905         return RING_BUFFER_ALL_CPUS;
2906 }
2907
2908 static const struct seq_operations tracer_seq_ops = {
2909         .start          = s_start,
2910         .next           = s_next,
2911         .stop           = s_stop,
2912         .show           = s_show,
2913 };
2914
2915 static struct trace_iterator *
2916 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2917 {
2918         struct trace_array *tr = inode->i_private;
2919         struct trace_iterator *iter;
2920         int cpu;
2921
2922         if (tracing_disabled)
2923                 return ERR_PTR(-ENODEV);
2924
2925         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2926         if (!iter)
2927                 return ERR_PTR(-ENOMEM);
2928
2929         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2930                                     GFP_KERNEL);
2931         if (!iter->buffer_iter)
2932                 goto release;
2933
2934         /*
2935          * We make a copy of the current tracer to avoid concurrent
2936          * changes on it while we are reading.
2937          */
2938         mutex_lock(&trace_types_lock);
2939         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2940         if (!iter->trace)
2941                 goto fail;
2942
2943         *iter->trace = *tr->current_trace;
2944
2945         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2946                 goto fail;
2947
2948         iter->tr = tr;
2949
2950 #ifdef CONFIG_TRACER_MAX_TRACE
2951         /* Currently only the top directory has a snapshot */
2952         if (tr->current_trace->print_max || snapshot)
2953                 iter->trace_buffer = &tr->max_buffer;
2954         else
2955 #endif
2956                 iter->trace_buffer = &tr->trace_buffer;
2957         iter->snapshot = snapshot;
2958         iter->pos = -1;
2959         iter->cpu_file = tracing_get_cpu(inode);
2960         mutex_init(&iter->mutex);
2961
2962         /* Notify the tracer early; before we stop tracing. */
2963         if (iter->trace && iter->trace->open)
2964                 iter->trace->open(iter);
2965
2966         /* Annotate start of buffers if we had overruns */
2967         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2968                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2969
2970         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2971         if (trace_clocks[tr->clock_id].in_ns)
2972                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2973
2974         /* stop the trace while dumping if we are not opening "snapshot" */
2975         if (!iter->snapshot)
2976                 tracing_stop_tr(tr);
2977
2978         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2979                 for_each_tracing_cpu(cpu) {
2980                         iter->buffer_iter[cpu] =
2981                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2982                 }
2983                 ring_buffer_read_prepare_sync();
2984                 for_each_tracing_cpu(cpu) {
2985                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2986                         tracing_iter_reset(iter, cpu);
2987                 }
2988         } else {
2989                 cpu = iter->cpu_file;
2990                 iter->buffer_iter[cpu] =
2991                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2992                 ring_buffer_read_prepare_sync();
2993                 ring_buffer_read_start(iter->buffer_iter[cpu]);
2994                 tracing_iter_reset(iter, cpu);
2995         }
2996
2997         mutex_unlock(&trace_types_lock);
2998
2999         return iter;
3000
3001  fail:
3002         mutex_unlock(&trace_types_lock);
3003         kfree(iter->trace);
3004         kfree(iter->buffer_iter);
3005 release:
3006         seq_release_private(inode, file);
3007         return ERR_PTR(-ENOMEM);
3008 }
3009
3010 int tracing_open_generic(struct inode *inode, struct file *filp)
3011 {
3012         if (tracing_disabled)
3013                 return -ENODEV;
3014
3015         filp->private_data = inode->i_private;
3016         return 0;
3017 }
3018
3019 bool tracing_is_disabled(void)
3020 {
3021         return (tracing_disabled) ? true: false;
3022 }
3023
3024 /*
3025  * Open and update trace_array ref count.
3026  * Must have the current trace_array passed to it.
3027  */
3028 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3029 {
3030         struct trace_array *tr = inode->i_private;
3031
3032         if (tracing_disabled)
3033                 return -ENODEV;
3034
3035         if (trace_array_get(tr) < 0)
3036                 return -ENODEV;
3037
3038         filp->private_data = inode->i_private;
3039
3040         return 0;
3041 }
3042
3043 static int tracing_release(struct inode *inode, struct file *file)
3044 {
3045         struct trace_array *tr = inode->i_private;
3046         struct seq_file *m = file->private_data;
3047         struct trace_iterator *iter;
3048         int cpu;
3049
3050         if (!(file->f_mode & FMODE_READ)) {
3051                 trace_array_put(tr);
3052                 return 0;
3053         }
3054
3055         /* Writes do not use seq_file */
3056         iter = m->private;
3057         mutex_lock(&trace_types_lock);
3058
3059         for_each_tracing_cpu(cpu) {
3060                 if (iter->buffer_iter[cpu])
3061                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3062         }
3063
3064         if (iter->trace && iter->trace->close)
3065                 iter->trace->close(iter);
3066
3067         if (!iter->snapshot)
3068                 /* reenable tracing if it was previously enabled */
3069                 tracing_start_tr(tr);
3070
3071         __trace_array_put(tr);
3072
3073         mutex_unlock(&trace_types_lock);
3074
3075         mutex_destroy(&iter->mutex);
3076         free_cpumask_var(iter->started);
3077         kfree(iter->trace);
3078         kfree(iter->buffer_iter);
3079         seq_release_private(inode, file);
3080
3081         return 0;
3082 }
3083
3084 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3085 {
3086         struct trace_array *tr = inode->i_private;
3087
3088         trace_array_put(tr);
3089         return 0;
3090 }
3091
3092 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3093 {
3094         struct trace_array *tr = inode->i_private;
3095
3096         trace_array_put(tr);
3097
3098         return single_release(inode, file);
3099 }
3100
3101 static int tracing_open(struct inode *inode, struct file *file)
3102 {
3103         struct trace_array *tr = inode->i_private;
3104         struct trace_iterator *iter;
3105         int ret = 0;
3106
3107         if (trace_array_get(tr) < 0)
3108                 return -ENODEV;
3109
3110         /* If this file was open for write, then erase contents */
3111         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3112                 int cpu = tracing_get_cpu(inode);
3113
3114                 if (cpu == RING_BUFFER_ALL_CPUS)
3115                         tracing_reset_online_cpus(&tr->trace_buffer);
3116                 else
3117                         tracing_reset(&tr->trace_buffer, cpu);
3118         }
3119
3120         if (file->f_mode & FMODE_READ) {
3121                 iter = __tracing_open(inode, file, false);
3122                 if (IS_ERR(iter))
3123                         ret = PTR_ERR(iter);
3124                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3125                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3126         }
3127
3128         if (ret < 0)
3129                 trace_array_put(tr);
3130
3131         return ret;
3132 }
3133
3134 /*
3135  * Some tracers are not suitable for instance buffers.
3136  * A tracer is always available for the global array (toplevel)
3137  * or if it explicitly states that it is.
3138  */
3139 static bool
3140 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3141 {
3142         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3143 }
3144
3145 /* Find the next tracer that this trace array may use */
3146 static struct tracer *
3147 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3148 {
3149         while (t && !trace_ok_for_array(t, tr))
3150                 t = t->next;
3151
3152         return t;
3153 }
3154
3155 static void *
3156 t_next(struct seq_file *m, void *v, loff_t *pos)
3157 {
3158         struct trace_array *tr = m->private;
3159         struct tracer *t = v;
3160
3161         (*pos)++;
3162
3163         if (t)
3164                 t = get_tracer_for_array(tr, t->next);
3165
3166         return t;
3167 }
3168
3169 static void *t_start(struct seq_file *m, loff_t *pos)
3170 {
3171         struct trace_array *tr = m->private;
3172         struct tracer *t;
3173         loff_t l = 0;
3174
3175         mutex_lock(&trace_types_lock);
3176
3177         t = get_tracer_for_array(tr, trace_types);
3178         for (; t && l < *pos; t = t_next(m, t, &l))
3179                         ;
3180
3181         return t;
3182 }
3183
3184 static void t_stop(struct seq_file *m, void *p)
3185 {
3186         mutex_unlock(&trace_types_lock);
3187 }
3188
3189 static int t_show(struct seq_file *m, void *v)
3190 {
3191         struct tracer *t = v;
3192
3193         if (!t)
3194                 return 0;
3195
3196         seq_printf(m, "%s", t->name);
3197         if (t->next)
3198                 seq_putc(m, ' ');
3199         else
3200                 seq_putc(m, '\n');
3201
3202         return 0;
3203 }
3204
3205 static const struct seq_operations show_traces_seq_ops = {
3206         .start          = t_start,
3207         .next           = t_next,
3208         .stop           = t_stop,
3209         .show           = t_show,
3210 };
3211
3212 static int show_traces_open(struct inode *inode, struct file *file)
3213 {
3214         struct trace_array *tr = inode->i_private;
3215         struct seq_file *m;
3216         int ret;
3217
3218         if (tracing_disabled)
3219                 return -ENODEV;
3220
3221         ret = seq_open(file, &show_traces_seq_ops);
3222         if (ret)
3223                 return ret;
3224
3225         m = file->private_data;
3226         m->private = tr;
3227
3228         return 0;
3229 }
3230
3231 static ssize_t
3232 tracing_write_stub(struct file *filp, const char __user *ubuf,
3233                    size_t count, loff_t *ppos)
3234 {
3235         return count;
3236 }
3237
3238 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3239 {
3240         int ret;
3241
3242         if (file->f_mode & FMODE_READ)
3243                 ret = seq_lseek(file, offset, whence);
3244         else
3245                 file->f_pos = ret = 0;
3246
3247         return ret;
3248 }
3249
3250 static const struct file_operations tracing_fops = {
3251         .open           = tracing_open,
3252         .read           = seq_read,
3253         .write          = tracing_write_stub,
3254         .llseek         = tracing_lseek,
3255         .release        = tracing_release,
3256 };
3257
3258 static const struct file_operations show_traces_fops = {
3259         .open           = show_traces_open,
3260         .read           = seq_read,
3261         .release        = seq_release,
3262         .llseek         = seq_lseek,
3263 };
3264
3265 /*
3266  * The tracer itself will not take this lock, but still we want
3267  * to provide a consistent cpumask to user-space:
3268  */
3269 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3270
3271 /*
3272  * Temporary storage for the character representation of the
3273  * CPU bitmask (and one more byte for the newline):
3274  */
3275 static char mask_str[NR_CPUS + 1];
3276
3277 static ssize_t
3278 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3279                      size_t count, loff_t *ppos)
3280 {
3281         struct trace_array *tr = file_inode(filp)->i_private;
3282         int len;
3283
3284         mutex_lock(&tracing_cpumask_update_lock);
3285
3286         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3287         if (count - len < 2) {
3288                 count = -EINVAL;
3289                 goto out_err;
3290         }
3291         len += sprintf(mask_str + len, "\n");
3292         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3293
3294 out_err:
3295         mutex_unlock(&tracing_cpumask_update_lock);
3296
3297         return count;
3298 }
3299
3300 static ssize_t
3301 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3302                       size_t count, loff_t *ppos)
3303 {
3304         struct trace_array *tr = file_inode(filp)->i_private;
3305         cpumask_var_t tracing_cpumask_new;
3306         int err, cpu;
3307
3308         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3309                 return -ENOMEM;
3310
3311         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3312         if (err)
3313                 goto err_unlock;
3314
3315         mutex_lock(&tracing_cpumask_update_lock);
3316
3317         local_irq_disable();
3318         arch_spin_lock(&tr->max_lock);
3319         for_each_tracing_cpu(cpu) {
3320                 /*
3321                  * Increase/decrease the disabled counter if we are
3322                  * about to flip a bit in the cpumask:
3323                  */
3324                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3325                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3326                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3327                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3328                 }
3329                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3330                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3331                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3332                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3333                 }
3334         }
3335         arch_spin_unlock(&tr->max_lock);
3336         local_irq_enable();
3337
3338         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3339
3340         mutex_unlock(&tracing_cpumask_update_lock);
3341         free_cpumask_var(tracing_cpumask_new);
3342
3343         return count;
3344
3345 err_unlock:
3346         free_cpumask_var(tracing_cpumask_new);
3347
3348         return err;
3349 }
3350
3351 static const struct file_operations tracing_cpumask_fops = {
3352         .open           = tracing_open_generic_tr,
3353         .read           = tracing_cpumask_read,
3354         .write          = tracing_cpumask_write,
3355         .release        = tracing_release_generic_tr,
3356         .llseek         = generic_file_llseek,
3357 };
3358
3359 static int tracing_trace_options_show(struct seq_file *m, void *v)
3360 {
3361         struct tracer_opt *trace_opts;
3362         struct trace_array *tr = m->private;
3363         u32 tracer_flags;
3364         int i;
3365
3366         mutex_lock(&trace_types_lock);
3367         tracer_flags = tr->current_trace->flags->val;
3368         trace_opts = tr->current_trace->flags->opts;
3369
3370         for (i = 0; trace_options[i]; i++) {
3371                 if (trace_flags & (1 << i))
3372                         seq_printf(m, "%s\n", trace_options[i]);
3373                 else
3374                         seq_printf(m, "no%s\n", trace_options[i]);
3375         }
3376
3377         for (i = 0; trace_opts[i].name; i++) {
3378                 if (tracer_flags & trace_opts[i].bit)
3379                         seq_printf(m, "%s\n", trace_opts[i].name);
3380                 else
3381                         seq_printf(m, "no%s\n", trace_opts[i].name);
3382         }
3383         mutex_unlock(&trace_types_lock);
3384
3385         return 0;
3386 }
3387
3388 static int __set_tracer_option(struct trace_array *tr,
3389                                struct tracer_flags *tracer_flags,
3390                                struct tracer_opt *opts, int neg)
3391 {
3392         struct tracer *trace = tr->current_trace;
3393         int ret;
3394
3395         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3396         if (ret)
3397                 return ret;
3398
3399         if (neg)
3400                 tracer_flags->val &= ~opts->bit;
3401         else
3402                 tracer_flags->val |= opts->bit;
3403         return 0;
3404 }
3405
3406 /* Try to assign a tracer specific option */
3407 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3408 {
3409         struct tracer *trace = tr->current_trace;
3410         struct tracer_flags *tracer_flags = trace->flags;
3411         struct tracer_opt *opts = NULL;
3412         int i;
3413
3414         for (i = 0; tracer_flags->opts[i].name; i++) {
3415                 opts = &tracer_flags->opts[i];
3416
3417                 if (strcmp(cmp, opts->name) == 0)
3418                         return __set_tracer_option(tr, trace->flags, opts, neg);
3419         }
3420
3421         return -EINVAL;
3422 }
3423
3424 /* Some tracers require overwrite to stay enabled */
3425 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3426 {
3427         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3428                 return -1;
3429
3430         return 0;
3431 }
3432
3433 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3434 {
3435         /* do nothing if flag is already set */
3436         if (!!(trace_flags & mask) == !!enabled)
3437                 return 0;
3438
3439         /* Give the tracer a chance to approve the change */
3440         if (tr->current_trace->flag_changed)
3441                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3442                         return -EINVAL;
3443
3444         if (enabled)
3445                 trace_flags |= mask;
3446         else
3447                 trace_flags &= ~mask;
3448
3449         if (mask == TRACE_ITER_RECORD_CMD)
3450                 trace_event_enable_cmd_record(enabled);
3451
3452         if (mask == TRACE_ITER_OVERWRITE) {
3453                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3454 #ifdef CONFIG_TRACER_MAX_TRACE
3455                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3456 #endif
3457         }
3458
3459         if (mask == TRACE_ITER_PRINTK)
3460                 trace_printk_start_stop_comm(enabled);
3461
3462         return 0;
3463 }
3464
3465 static int trace_set_options(struct trace_array *tr, char *option)
3466 {
3467         char *cmp;
3468         int neg = 0;
3469         int ret = -ENODEV;
3470         int i;
3471
3472         cmp = strstrip(option);
3473
3474         if (strncmp(cmp, "no", 2) == 0) {
3475                 neg = 1;
3476                 cmp += 2;
3477         }
3478
3479         mutex_lock(&trace_types_lock);
3480
3481         for (i = 0; trace_options[i]; i++) {
3482                 if (strcmp(cmp, trace_options[i]) == 0) {
3483                         ret = set_tracer_flag(tr, 1 << i, !neg);
3484                         break;
3485                 }
3486         }
3487
3488         /* If no option could be set, test the specific tracer options */
3489         if (!trace_options[i])
3490                 ret = set_tracer_option(tr, cmp, neg);
3491
3492         mutex_unlock(&trace_types_lock);
3493
3494         return ret;
3495 }
3496
3497 static ssize_t
3498 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3499                         size_t cnt, loff_t *ppos)
3500 {
3501         struct seq_file *m = filp->private_data;
3502         struct trace_array *tr = m->private;
3503         char buf[64];
3504         int ret;
3505
3506         if (cnt >= sizeof(buf))
3507                 return -EINVAL;
3508
3509         if (copy_from_user(&buf, ubuf, cnt))
3510                 return -EFAULT;
3511
3512         buf[cnt] = 0;
3513
3514         ret = trace_set_options(tr, buf);
3515         if (ret < 0)
3516                 return ret;
3517
3518         *ppos += cnt;
3519
3520         return cnt;
3521 }
3522
3523 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3524 {
3525         struct trace_array *tr = inode->i_private;
3526         int ret;
3527
3528         if (tracing_disabled)
3529                 return -ENODEV;
3530
3531         if (trace_array_get(tr) < 0)
3532                 return -ENODEV;
3533
3534         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3535         if (ret < 0)
3536                 trace_array_put(tr);
3537
3538         return ret;
3539 }
3540
3541 static const struct file_operations tracing_iter_fops = {
3542         .open           = tracing_trace_options_open,
3543         .read           = seq_read,
3544         .llseek         = seq_lseek,
3545         .release        = tracing_single_release_tr,
3546         .write          = tracing_trace_options_write,
3547 };
3548
3549 static const char readme_msg[] =
3550         "tracing mini-HOWTO:\n\n"
3551         "# echo 0 > tracing_on : quick way to disable tracing\n"
3552         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3553         " Important files:\n"
3554         "  trace\t\t\t- The static contents of the buffer\n"
3555         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3556         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3557         "  current_tracer\t- function and latency tracers\n"
3558         "  available_tracers\t- list of configured tracers for current_tracer\n"
3559         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3560         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3561         "  trace_clock\t\t-change the clock used to order events\n"
3562         "       local:   Per cpu clock but may not be synced across CPUs\n"
3563         "      global:   Synced across CPUs but slows tracing down.\n"
3564         "     counter:   Not a clock, but just an increment\n"
3565         "      uptime:   Jiffy counter from time of boot\n"
3566         "        perf:   Same clock that perf events use\n"
3567 #ifdef CONFIG_X86_64
3568         "     x86-tsc:   TSC cycle counter\n"
3569 #endif
3570         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3571         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3572         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3573         "\t\t\t  Remove sub-buffer with rmdir\n"
3574         "  trace_options\t\t- Set format or modify how tracing happens\n"
3575         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3576         "\t\t\t  option name\n"
3577 #ifdef CONFIG_DYNAMIC_FTRACE
3578         "\n  available_filter_functions - list of functions that can be filtered on\n"
3579         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3580         "\t\t\t  functions\n"
3581         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3582         "\t     modules: Can select a group via module\n"
3583         "\t      Format: :mod:<module-name>\n"
3584         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3585         "\t    triggers: a command to perform when function is hit\n"
3586         "\t      Format: <function>:<trigger>[:count]\n"
3587         "\t     trigger: traceon, traceoff\n"
3588         "\t\t      enable_event:<system>:<event>\n"
3589         "\t\t      disable_event:<system>:<event>\n"
3590 #ifdef CONFIG_STACKTRACE
3591         "\t\t      stacktrace\n"
3592 #endif
3593 #ifdef CONFIG_TRACER_SNAPSHOT
3594         "\t\t      snapshot\n"
3595 #endif
3596         "\t\t      dump\n"
3597         "\t\t      cpudump\n"
3598         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3599         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3600         "\t     The first one will disable tracing every time do_fault is hit\n"
3601         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3602         "\t       The first time do trap is hit and it disables tracing, the\n"
3603         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3604         "\t       the counter will not decrement. It only decrements when the\n"
3605         "\t       trigger did work\n"
3606         "\t     To remove trigger without count:\n"
3607         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3608         "\t     To remove trigger with a count:\n"
3609         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3610         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3611         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3612         "\t    modules: Can select a group via module command :mod:\n"
3613         "\t    Does not accept triggers\n"
3614 #endif /* CONFIG_DYNAMIC_FTRACE */
3615 #ifdef CONFIG_FUNCTION_TRACER
3616         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3617         "\t\t    (function)\n"
3618 #endif
3619 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3620         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3621         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3622 #endif
3623 #ifdef CONFIG_TRACER_SNAPSHOT
3624         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3625         "\t\t\t  snapshot buffer. Read the contents for more\n"
3626         "\t\t\t  information\n"
3627 #endif
3628 #ifdef CONFIG_STACK_TRACER
3629         "  stack_trace\t\t- Shows the max stack trace when active\n"
3630         "  stack_max_size\t- Shows current max stack size that was traced\n"
3631         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3632         "\t\t\t  new trace)\n"
3633 #ifdef CONFIG_DYNAMIC_FTRACE
3634         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3635         "\t\t\t  traces\n"
3636 #endif
3637 #endif /* CONFIG_STACK_TRACER */
3638         "  events/\t\t- Directory containing all trace event subsystems:\n"
3639         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3640         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3641         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3642         "\t\t\t  events\n"
3643         "      filter\t\t- If set, only events passing filter are traced\n"
3644         "  events/<system>/<event>/\t- Directory containing control files for\n"
3645         "\t\t\t  <event>:\n"
3646         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3647         "      filter\t\t- If set, only events passing filter are traced\n"
3648         "      trigger\t\t- If set, a command to perform when event is hit\n"
3649         "\t    Format: <trigger>[:count][if <filter>]\n"
3650         "\t   trigger: traceon, traceoff\n"
3651         "\t            enable_event:<system>:<event>\n"
3652         "\t            disable_event:<system>:<event>\n"
3653 #ifdef CONFIG_STACKTRACE
3654         "\t\t    stacktrace\n"
3655 #endif
3656 #ifdef CONFIG_TRACER_SNAPSHOT
3657         "\t\t    snapshot\n"
3658 #endif
3659         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3660         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3661         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3662         "\t                  events/block/block_unplug/trigger\n"
3663         "\t   The first disables tracing every time block_unplug is hit.\n"
3664         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3665         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3666         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3667         "\t   Like function triggers, the counter is only decremented if it\n"
3668         "\t    enabled or disabled tracing.\n"
3669         "\t   To remove a trigger without a count:\n"
3670         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3671         "\t   To remove a trigger with a count:\n"
3672         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3673         "\t   Filters can be ignored when removing a trigger.\n"
3674 ;
3675
3676 static ssize_t
3677 tracing_readme_read(struct file *filp, char __user *ubuf,
3678                        size_t cnt, loff_t *ppos)
3679 {
3680         return simple_read_from_buffer(ubuf, cnt, ppos,
3681                                         readme_msg, strlen(readme_msg));
3682 }
3683
3684 static const struct file_operations tracing_readme_fops = {
3685         .open           = tracing_open_generic,
3686         .read           = tracing_readme_read,
3687         .llseek         = generic_file_llseek,
3688 };
3689
3690 static ssize_t
3691 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3692                                 size_t cnt, loff_t *ppos)
3693 {
3694         char *buf_comm;
3695         char *file_buf;
3696         char *buf;
3697         int len = 0;
3698         int pid;
3699         int i;
3700
3701         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3702         if (!file_buf)
3703                 return -ENOMEM;
3704
3705         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3706         if (!buf_comm) {
3707                 kfree(file_buf);
3708                 return -ENOMEM;
3709         }
3710
3711         buf = file_buf;
3712
3713         for (i = 0; i < SAVED_CMDLINES; i++) {
3714                 int r;
3715
3716                 pid = map_cmdline_to_pid[i];
3717                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3718                         continue;
3719
3720                 trace_find_cmdline(pid, buf_comm);
3721                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3722                 buf += r;
3723                 len += r;
3724         }
3725
3726         len = simple_read_from_buffer(ubuf, cnt, ppos,
3727                                       file_buf, len);
3728
3729         kfree(file_buf);
3730         kfree(buf_comm);
3731
3732         return len;
3733 }
3734
3735 static const struct file_operations tracing_saved_cmdlines_fops = {
3736     .open       = tracing_open_generic,
3737     .read       = tracing_saved_cmdlines_read,
3738     .llseek     = generic_file_llseek,
3739 };
3740
3741 static ssize_t
3742 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3743                        size_t cnt, loff_t *ppos)
3744 {
3745         struct trace_array *tr = filp->private_data;
3746         char buf[MAX_TRACER_SIZE+2];
3747         int r;
3748
3749         mutex_lock(&trace_types_lock);
3750         r = sprintf(buf, "%s\n", tr->current_trace->name);
3751         mutex_unlock(&trace_types_lock);
3752
3753         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3754 }
3755
3756 int tracer_init(struct tracer *t, struct trace_array *tr)
3757 {
3758         tracing_reset_online_cpus(&tr->trace_buffer);
3759         return t->init(tr);
3760 }
3761
3762 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3763 {
3764         int cpu;
3765
3766         for_each_tracing_cpu(cpu)
3767                 per_cpu_ptr(buf->data, cpu)->entries = val;
3768 }
3769
3770 #ifdef CONFIG_TRACER_MAX_TRACE
3771 /* resize @tr's buffer to the size of @size_tr's entries */
3772 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3773                                         struct trace_buffer *size_buf, int cpu_id)
3774 {
3775         int cpu, ret = 0;
3776
3777         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3778                 for_each_tracing_cpu(cpu) {
3779                         ret = ring_buffer_resize(trace_buf->buffer,
3780                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3781                         if (ret < 0)
3782                                 break;
3783                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3784                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3785                 }
3786         } else {
3787                 ret = ring_buffer_resize(trace_buf->buffer,
3788                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3789                 if (ret == 0)
3790                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3791                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3792         }
3793
3794         return ret;
3795 }
3796 #endif /* CONFIG_TRACER_MAX_TRACE */
3797
3798 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3799                                         unsigned long size, int cpu)
3800 {
3801         int ret;
3802
3803         /*
3804          * If kernel or user changes the size of the ring buffer
3805          * we use the size that was given, and we can forget about
3806          * expanding it later.
3807          */
3808         ring_buffer_expanded = true;
3809
3810         /* May be called before buffers are initialized */
3811         if (!tr->trace_buffer.buffer)
3812                 return 0;
3813
3814         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3815         if (ret < 0)
3816                 return ret;
3817
3818 #ifdef CONFIG_TRACER_MAX_TRACE
3819         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3820             !tr->current_trace->use_max_tr)
3821                 goto out;
3822
3823         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3824         if (ret < 0) {
3825                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3826                                                      &tr->trace_buffer, cpu);
3827                 if (r < 0) {
3828                         /*
3829                          * AARGH! We are left with different
3830                          * size max buffer!!!!
3831                          * The max buffer is our "snapshot" buffer.
3832                          * When a tracer needs a snapshot (one of the
3833                          * latency tracers), it swaps the max buffer
3834                          * with the saved snap shot. We succeeded to
3835                          * update the size of the main buffer, but failed to
3836                          * update the size of the max buffer. But when we tried
3837                          * to reset the main buffer to the original size, we
3838                          * failed there too. This is very unlikely to
3839                          * happen, but if it does, warn and kill all
3840                          * tracing.
3841                          */
3842                         WARN_ON(1);
3843                         tracing_disabled = 1;
3844                 }
3845                 return ret;
3846         }
3847
3848         if (cpu == RING_BUFFER_ALL_CPUS)
3849                 set_buffer_entries(&tr->max_buffer, size);
3850         else
3851                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3852
3853  out:
3854 #endif /* CONFIG_TRACER_MAX_TRACE */
3855
3856         if (cpu == RING_BUFFER_ALL_CPUS)
3857                 set_buffer_entries(&tr->trace_buffer, size);
3858         else
3859                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3860
3861         return ret;
3862 }
3863
3864 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3865                                           unsigned long size, int cpu_id)
3866 {
3867         int ret = size;
3868
3869         mutex_lock(&trace_types_lock);
3870
3871         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3872                 /* make sure, this cpu is enabled in the mask */
3873                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3874                         ret = -EINVAL;
3875                         goto out;
3876                 }
3877         }
3878
3879         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3880         if (ret < 0)
3881                 ret = -ENOMEM;
3882
3883 out:
3884         mutex_unlock(&trace_types_lock);
3885
3886         return ret;
3887 }
3888
3889
3890 /**
3891  * tracing_update_buffers - used by tracing facility to expand ring buffers
3892  *
3893  * To save on memory when the tracing is never used on a system with it
3894  * configured in. The ring buffers are set to a minimum size. But once
3895  * a user starts to use the tracing facility, then they need to grow
3896  * to their default size.
3897  *
3898  * This function is to be called when a tracer is about to be used.
3899  */
3900 int tracing_update_buffers(void)
3901 {
3902         int ret = 0;
3903
3904         mutex_lock(&trace_types_lock);
3905         if (!ring_buffer_expanded)
3906                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3907                                                 RING_BUFFER_ALL_CPUS);
3908         mutex_unlock(&trace_types_lock);
3909
3910         return ret;
3911 }
3912
3913 struct trace_option_dentry;
3914
3915 static struct trace_option_dentry *
3916 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3917
3918 static void
3919 destroy_trace_option_files(struct trace_option_dentry *topts);
3920
3921 /*
3922  * Used to clear out the tracer before deletion of an instance.
3923  * Must have trace_types_lock held.
3924  */
3925 static void tracing_set_nop(struct trace_array *tr)
3926 {
3927         if (tr->current_trace == &nop_trace)
3928                 return;
3929         
3930         tr->current_trace->enabled--;
3931
3932         if (tr->current_trace->reset)
3933                 tr->current_trace->reset(tr);
3934
3935         tr->current_trace = &nop_trace;
3936 }
3937
3938 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
3939 {
3940         static struct trace_option_dentry *topts;
3941         struct tracer *t;
3942 #ifdef CONFIG_TRACER_MAX_TRACE
3943         bool had_max_tr;
3944 #endif
3945         int ret = 0;
3946
3947         mutex_lock(&trace_types_lock);
3948
3949         if (!ring_buffer_expanded) {
3950                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3951                                                 RING_BUFFER_ALL_CPUS);
3952                 if (ret < 0)
3953                         goto out;
3954                 ret = 0;
3955         }
3956
3957         for (t = trace_types; t; t = t->next) {
3958                 if (strcmp(t->name, buf) == 0)
3959                         break;
3960         }
3961         if (!t) {
3962                 ret = -EINVAL;
3963                 goto out;
3964         }
3965         if (t == tr->current_trace)
3966                 goto out;
3967
3968         /* Some tracers are only allowed for the top level buffer */
3969         if (!trace_ok_for_array(t, tr)) {
3970                 ret = -EINVAL;
3971                 goto out;
3972         }
3973
3974         trace_branch_disable();
3975
3976         tr->current_trace->enabled--;
3977
3978         if (tr->current_trace->reset)
3979                 tr->current_trace->reset(tr);
3980
3981         /* Current trace needs to be nop_trace before synchronize_sched */
3982         tr->current_trace = &nop_trace;
3983
3984 #ifdef CONFIG_TRACER_MAX_TRACE
3985         had_max_tr = tr->allocated_snapshot;
3986
3987         if (had_max_tr && !t->use_max_tr) {
3988                 /*
3989                  * We need to make sure that the update_max_tr sees that
3990                  * current_trace changed to nop_trace to keep it from
3991                  * swapping the buffers after we resize it.
3992                  * The update_max_tr is called from interrupts disabled
3993                  * so a synchronized_sched() is sufficient.
3994                  */
3995                 synchronize_sched();
3996                 free_snapshot(tr);
3997         }
3998 #endif
3999         /* Currently, only the top instance has options */
4000         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
4001                 destroy_trace_option_files(topts);
4002                 topts = create_trace_option_files(tr, t);
4003         }
4004
4005 #ifdef CONFIG_TRACER_MAX_TRACE
4006         if (t->use_max_tr && !had_max_tr) {
4007                 ret = alloc_snapshot(tr);
4008                 if (ret < 0)
4009                         goto out;
4010         }
4011 #endif
4012
4013         if (t->init) {
4014                 ret = tracer_init(t, tr);
4015                 if (ret)
4016                         goto out;
4017         }
4018
4019         tr->current_trace = t;
4020         tr->current_trace->enabled++;
4021         trace_branch_enable(tr);
4022  out:
4023         mutex_unlock(&trace_types_lock);
4024
4025         return ret;
4026 }
4027
4028 static ssize_t
4029 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4030                         size_t cnt, loff_t *ppos)
4031 {
4032         struct trace_array *tr = filp->private_data;
4033         char buf[MAX_TRACER_SIZE+1];
4034         int i;
4035         size_t ret;
4036         int err;
4037
4038         ret = cnt;
4039
4040         if (cnt > MAX_TRACER_SIZE)
4041                 cnt = MAX_TRACER_SIZE;
4042
4043         if (copy_from_user(&buf, ubuf, cnt))
4044                 return -EFAULT;
4045
4046         buf[cnt] = 0;
4047
4048         /* strip ending whitespace. */
4049         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4050                 buf[i] = 0;
4051
4052         err = tracing_set_tracer(tr, buf);
4053         if (err)
4054                 return err;
4055
4056         *ppos += ret;
4057
4058         return ret;
4059 }
4060
4061 static ssize_t
4062 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4063                      size_t cnt, loff_t *ppos)
4064 {
4065         unsigned long *ptr = filp->private_data;
4066         char buf[64];
4067         int r;
4068
4069         r = snprintf(buf, sizeof(buf), "%ld\n",
4070                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4071         if (r > sizeof(buf))
4072                 r = sizeof(buf);
4073         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4074 }
4075
4076 static ssize_t
4077 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4078                       size_t cnt, loff_t *ppos)
4079 {
4080         unsigned long *ptr = filp->private_data;
4081         unsigned long val;
4082         int ret;
4083
4084         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4085         if (ret)
4086                 return ret;
4087
4088         *ptr = val * 1000;
4089
4090         return cnt;
4091 }
4092
4093 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4094 {
4095         struct trace_array *tr = inode->i_private;
4096         struct trace_iterator *iter;
4097         int ret = 0;
4098
4099         if (tracing_disabled)
4100                 return -ENODEV;
4101
4102         if (trace_array_get(tr) < 0)
4103                 return -ENODEV;
4104
4105         mutex_lock(&trace_types_lock);
4106
4107         /* create a buffer to store the information to pass to userspace */
4108         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4109         if (!iter) {
4110                 ret = -ENOMEM;
4111                 __trace_array_put(tr);
4112                 goto out;
4113         }
4114
4115         /*
4116          * We make a copy of the current tracer to avoid concurrent
4117          * changes on it while we are reading.
4118          */
4119         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4120         if (!iter->trace) {
4121                 ret = -ENOMEM;
4122                 goto fail;
4123         }
4124         *iter->trace = *tr->current_trace;
4125
4126         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4127                 ret = -ENOMEM;
4128                 goto fail;
4129         }
4130
4131         /* trace pipe does not show start of buffer */
4132         cpumask_setall(iter->started);
4133
4134         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4135                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4136
4137         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4138         if (trace_clocks[tr->clock_id].in_ns)
4139                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4140
4141         iter->tr = tr;
4142         iter->trace_buffer = &tr->trace_buffer;
4143         iter->cpu_file = tracing_get_cpu(inode);
4144         mutex_init(&iter->mutex);
4145         filp->private_data = iter;
4146
4147         if (iter->trace->pipe_open)
4148                 iter->trace->pipe_open(iter);
4149
4150         nonseekable_open(inode, filp);
4151 out:
4152         mutex_unlock(&trace_types_lock);
4153         return ret;
4154
4155 fail:
4156         kfree(iter->trace);
4157         kfree(iter);
4158         __trace_array_put(tr);
4159         mutex_unlock(&trace_types_lock);
4160         return ret;
4161 }
4162
4163 static int tracing_release_pipe(struct inode *inode, struct file *file)
4164 {
4165         struct trace_iterator *iter = file->private_data;
4166         struct trace_array *tr = inode->i_private;
4167
4168         mutex_lock(&trace_types_lock);
4169
4170         if (iter->trace->pipe_close)
4171                 iter->trace->pipe_close(iter);
4172
4173         mutex_unlock(&trace_types_lock);
4174
4175         free_cpumask_var(iter->started);
4176         mutex_destroy(&iter->mutex);
4177         kfree(iter->trace);
4178         kfree(iter);
4179
4180         trace_array_put(tr);
4181
4182         return 0;
4183 }
4184
4185 static unsigned int
4186 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4187 {
4188         /* Iterators are static, they should be filled or empty */
4189         if (trace_buffer_iter(iter, iter->cpu_file))
4190                 return POLLIN | POLLRDNORM;
4191
4192         if (trace_flags & TRACE_ITER_BLOCK)
4193                 /*
4194                  * Always select as readable when in blocking mode
4195                  */
4196                 return POLLIN | POLLRDNORM;
4197         else
4198                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4199                                              filp, poll_table);
4200 }
4201
4202 static unsigned int
4203 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4204 {
4205         struct trace_iterator *iter = filp->private_data;
4206
4207         return trace_poll(iter, filp, poll_table);
4208 }
4209
4210 /*
4211  * This is a make-shift waitqueue.
4212  * A tracer might use this callback on some rare cases:
4213  *
4214  *  1) the current tracer might hold the runqueue lock when it wakes up
4215  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4216  *  2) the function tracers, trace all functions, we don't want
4217  *     the overhead of calling wake_up and friends
4218  *     (and tracing them too)
4219  *
4220  *     Anyway, this is really very primitive wakeup.
4221  */
4222 void poll_wait_pipe(struct trace_iterator *iter)
4223 {
4224         set_current_state(TASK_INTERRUPTIBLE);
4225         /* sleep for 100 msecs, and try again. */
4226         schedule_timeout(HZ / 10);
4227 }
4228
4229 /* Must be called with trace_types_lock mutex held. */
4230 static int tracing_wait_pipe(struct file *filp)
4231 {
4232         struct trace_iterator *iter = filp->private_data;
4233
4234         while (trace_empty(iter)) {
4235
4236                 if ((filp->f_flags & O_NONBLOCK)) {
4237                         return -EAGAIN;
4238                 }
4239
4240                 mutex_unlock(&iter->mutex);
4241
4242                 iter->trace->wait_pipe(iter);
4243
4244                 mutex_lock(&iter->mutex);
4245
4246                 if (signal_pending(current))
4247                         return -EINTR;
4248
4249                 /*
4250                  * We block until we read something and tracing is disabled.
4251                  * We still block if tracing is disabled, but we have never
4252                  * read anything. This allows a user to cat this file, and
4253                  * then enable tracing. But after we have read something,
4254                  * we give an EOF when tracing is again disabled.
4255                  *
4256                  * iter->pos will be 0 if we haven't read anything.
4257                  */
4258                 if (!tracing_is_on() && iter->pos)
4259                         break;
4260         }
4261
4262         return 1;
4263 }
4264
4265 /*
4266  * Consumer reader.
4267  */
4268 static ssize_t
4269 tracing_read_pipe(struct file *filp, char __user *ubuf,
4270                   size_t cnt, loff_t *ppos)
4271 {
4272         struct trace_iterator *iter = filp->private_data;
4273         struct trace_array *tr = iter->tr;
4274         ssize_t sret;
4275
4276         /* return any leftover data */
4277         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4278         if (sret != -EBUSY)
4279                 return sret;
4280
4281         trace_seq_init(&iter->seq);
4282
4283         /* copy the tracer to avoid using a global lock all around */
4284         mutex_lock(&trace_types_lock);
4285         if (unlikely(iter->trace->name != tr->current_trace->name))
4286                 *iter->trace = *tr->current_trace;
4287         mutex_unlock(&trace_types_lock);
4288
4289         /*
4290          * Avoid more than one consumer on a single file descriptor
4291          * This is just a matter of traces coherency, the ring buffer itself
4292          * is protected.
4293          */
4294         mutex_lock(&iter->mutex);
4295         if (iter->trace->read) {
4296                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4297                 if (sret)
4298                         goto out;
4299         }
4300
4301 waitagain:
4302         sret = tracing_wait_pipe(filp);
4303         if (sret <= 0)
4304                 goto out;
4305
4306         /* stop when tracing is finished */
4307         if (trace_empty(iter)) {
4308                 sret = 0;
4309                 goto out;
4310         }
4311
4312         if (cnt >= PAGE_SIZE)
4313                 cnt = PAGE_SIZE - 1;
4314
4315         /* reset all but tr, trace, and overruns */
4316         memset(&iter->seq, 0,
4317                sizeof(struct trace_iterator) -
4318                offsetof(struct trace_iterator, seq));
4319         cpumask_clear(iter->started);
4320         iter->pos = -1;
4321
4322         trace_event_read_lock();
4323         trace_access_lock(iter->cpu_file);
4324         while (trace_find_next_entry_inc(iter) != NULL) {
4325                 enum print_line_t ret;
4326                 int len = iter->seq.len;
4327
4328                 ret = print_trace_line(iter);
4329                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4330                         /* don't print partial lines */
4331                         iter->seq.len = len;
4332                         break;
4333                 }
4334                 if (ret != TRACE_TYPE_NO_CONSUME)
4335                         trace_consume(iter);
4336
4337                 if (iter->seq.len >= cnt)
4338                         break;
4339
4340                 /*
4341                  * Setting the full flag means we reached the trace_seq buffer
4342                  * size and we should leave by partial output condition above.
4343                  * One of the trace_seq_* functions is not used properly.
4344                  */
4345                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4346                           iter->ent->type);
4347         }
4348         trace_access_unlock(iter->cpu_file);
4349         trace_event_read_unlock();
4350
4351         /* Now copy what we have to the user */
4352         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4353         if (iter->seq.readpos >= iter->seq.len)
4354                 trace_seq_init(&iter->seq);
4355
4356         /*
4357          * If there was nothing to send to user, in spite of consuming trace
4358          * entries, go back to wait for more entries.
4359          */
4360         if (sret == -EBUSY)
4361                 goto waitagain;
4362
4363 out:
4364         mutex_unlock(&iter->mutex);
4365
4366         return sret;
4367 }
4368
4369 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4370                                      unsigned int idx)
4371 {
4372         __free_page(spd->pages[idx]);
4373 }
4374
4375 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4376         .can_merge              = 0,
4377         .confirm                = generic_pipe_buf_confirm,
4378         .release                = generic_pipe_buf_release,
4379         .steal                  = generic_pipe_buf_steal,
4380         .get                    = generic_pipe_buf_get,
4381 };
4382
4383 static size_t
4384 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4385 {
4386         size_t count;
4387         int ret;
4388
4389         /* Seq buffer is page-sized, exactly what we need. */
4390         for (;;) {
4391                 count = iter->seq.len;
4392                 ret = print_trace_line(iter);
4393                 count = iter->seq.len - count;
4394                 if (rem < count) {
4395                         rem = 0;
4396                         iter->seq.len -= count;
4397                         break;
4398                 }
4399                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4400                         iter->seq.len -= count;
4401                         break;
4402                 }
4403
4404                 if (ret != TRACE_TYPE_NO_CONSUME)
4405                         trace_consume(iter);
4406                 rem -= count;
4407                 if (!trace_find_next_entry_inc(iter))   {
4408                         rem = 0;
4409                         iter->ent = NULL;
4410                         break;
4411                 }
4412         }
4413
4414         return rem;
4415 }
4416
4417 static ssize_t tracing_splice_read_pipe(struct file *filp,
4418                                         loff_t *ppos,
4419                                         struct pipe_inode_info *pipe,
4420                                         size_t len,
4421                                         unsigned int flags)
4422 {
4423         struct page *pages_def[PIPE_DEF_BUFFERS];
4424         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4425         struct trace_iterator *iter = filp->private_data;
4426         struct splice_pipe_desc spd = {
4427                 .pages          = pages_def,
4428                 .partial        = partial_def,
4429                 .nr_pages       = 0, /* This gets updated below. */
4430                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4431                 .flags          = flags,
4432                 .ops            = &tracing_pipe_buf_ops,
4433                 .spd_release    = tracing_spd_release_pipe,
4434         };
4435         struct trace_array *tr = iter->tr;
4436         ssize_t ret;
4437         size_t rem;
4438         unsigned int i;
4439
4440         if (splice_grow_spd(pipe, &spd))
4441                 return -ENOMEM;
4442
4443         /* copy the tracer to avoid using a global lock all around */
4444         mutex_lock(&trace_types_lock);
4445         if (unlikely(iter->trace->name != tr->current_trace->name))
4446                 *iter->trace = *tr->current_trace;
4447         mutex_unlock(&trace_types_lock);
4448
4449         mutex_lock(&iter->mutex);
4450
4451         if (iter->trace->splice_read) {
4452                 ret = iter->trace->splice_read(iter, filp,
4453                                                ppos, pipe, len, flags);
4454                 if (ret)
4455                         goto out_err;
4456         }
4457
4458         ret = tracing_wait_pipe(filp);
4459         if (ret <= 0)
4460                 goto out_err;
4461
4462         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4463                 ret = -EFAULT;
4464                 goto out_err;
4465         }
4466
4467         trace_event_read_lock();
4468         trace_access_lock(iter->cpu_file);
4469
4470         /* Fill as many pages as possible. */
4471         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4472                 spd.pages[i] = alloc_page(GFP_KERNEL);
4473                 if (!spd.pages[i])
4474                         break;
4475
4476                 rem = tracing_fill_pipe_page(rem, iter);
4477
4478                 /* Copy the data into the page, so we can start over. */
4479                 ret = trace_seq_to_buffer(&iter->seq,
4480                                           page_address(spd.pages[i]),
4481                                           iter->seq.len);
4482                 if (ret < 0) {
4483                         __free_page(spd.pages[i]);
4484                         break;
4485                 }
4486                 spd.partial[i].offset = 0;
4487                 spd.partial[i].len = iter->seq.len;
4488
4489                 trace_seq_init(&iter->seq);
4490         }
4491
4492         trace_access_unlock(iter->cpu_file);
4493         trace_event_read_unlock();
4494         mutex_unlock(&iter->mutex);
4495
4496         spd.nr_pages = i;
4497
4498         ret = splice_to_pipe(pipe, &spd);
4499 out:
4500         splice_shrink_spd(&spd);
4501         return ret;
4502
4503 out_err:
4504         mutex_unlock(&iter->mutex);
4505         goto out;
4506 }
4507
4508 static ssize_t
4509 tracing_entries_read(struct file *filp, char __user *ubuf,
4510                      size_t cnt, loff_t *ppos)
4511 {
4512         struct inode *inode = file_inode(filp);
4513         struct trace_array *tr = inode->i_private;
4514         int cpu = tracing_get_cpu(inode);
4515         char buf[64];
4516         int r = 0;
4517         ssize_t ret;
4518
4519         mutex_lock(&trace_types_lock);
4520
4521         if (cpu == RING_BUFFER_ALL_CPUS) {
4522                 int cpu, buf_size_same;
4523                 unsigned long size;
4524
4525                 size = 0;
4526                 buf_size_same = 1;
4527                 /* check if all cpu sizes are same */
4528                 for_each_tracing_cpu(cpu) {
4529                         /* fill in the size from first enabled cpu */
4530                         if (size == 0)
4531                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4532                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4533                                 buf_size_same = 0;
4534                                 break;
4535                         }
4536                 }
4537
4538                 if (buf_size_same) {
4539                         if (!ring_buffer_expanded)
4540                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4541                                             size >> 10,
4542                                             trace_buf_size >> 10);
4543                         else
4544                                 r = sprintf(buf, "%lu\n", size >> 10);
4545                 } else
4546                         r = sprintf(buf, "X\n");
4547         } else
4548                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4549
4550         mutex_unlock(&trace_types_lock);
4551
4552         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4553         return ret;
4554 }
4555
4556 static ssize_t
4557 tracing_entries_write(struct file *filp, const char __user *ubuf,
4558                       size_t cnt, loff_t *ppos)
4559 {
4560         struct inode *inode = file_inode(filp);
4561         struct trace_array *tr = inode->i_private;
4562         unsigned long val;
4563         int ret;
4564
4565         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4566         if (ret)
4567                 return ret;
4568
4569         /* must have at least 1 entry */
4570         if (!val)
4571                 return -EINVAL;
4572
4573         /* value is in KB */
4574         val <<= 10;
4575         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4576         if (ret < 0)
4577                 return ret;
4578
4579         *ppos += cnt;
4580
4581         return cnt;
4582 }
4583
4584 static ssize_t
4585 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4586                                 size_t cnt, loff_t *ppos)
4587 {
4588         struct trace_array *tr = filp->private_data;
4589         char buf[64];
4590         int r, cpu;
4591         unsigned long size = 0, expanded_size = 0;
4592
4593         mutex_lock(&trace_types_lock);
4594         for_each_tracing_cpu(cpu) {
4595                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4596                 if (!ring_buffer_expanded)
4597                         expanded_size += trace_buf_size >> 10;
4598         }
4599         if (ring_buffer_expanded)
4600                 r = sprintf(buf, "%lu\n", size);
4601         else
4602                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4603         mutex_unlock(&trace_types_lock);
4604
4605         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4606 }
4607
4608 static ssize_t
4609 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4610                           size_t cnt, loff_t *ppos)
4611 {
4612         /*
4613          * There is no need to read what the user has written, this function
4614          * is just to make sure that there is no error when "echo" is used
4615          */
4616
4617         *ppos += cnt;
4618
4619         return cnt;
4620 }
4621
4622 static int
4623 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4624 {
4625         struct trace_array *tr = inode->i_private;
4626
4627         /* disable tracing ? */
4628         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4629                 tracer_tracing_off(tr);
4630         /* resize the ring buffer to 0 */
4631         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4632
4633         trace_array_put(tr);
4634
4635         return 0;
4636 }
4637
4638 static ssize_t
4639 tracing_mark_write(struct file *filp, const char __user *ubuf,
4640                                         size_t cnt, loff_t *fpos)
4641 {
4642         unsigned long addr = (unsigned long)ubuf;
4643         struct trace_array *tr = filp->private_data;
4644         struct ring_buffer_event *event;
4645         struct ring_buffer *buffer;
4646         struct print_entry *entry;
4647         unsigned long irq_flags;
4648         struct page *pages[2];
4649         void *map_page[2];
4650         int nr_pages = 1;
4651         ssize_t written;
4652         int offset;
4653         int size;
4654         int len;
4655         int ret;
4656         int i;
4657
4658         if (tracing_disabled)
4659                 return -EINVAL;
4660
4661         if (!(trace_flags & TRACE_ITER_MARKERS))
4662                 return -EINVAL;
4663
4664         if (cnt > TRACE_BUF_SIZE)
4665                 cnt = TRACE_BUF_SIZE;
4666
4667         /*
4668          * Userspace is injecting traces into the kernel trace buffer.
4669          * We want to be as non intrusive as possible.
4670          * To do so, we do not want to allocate any special buffers
4671          * or take any locks, but instead write the userspace data
4672          * straight into the ring buffer.
4673          *
4674          * First we need to pin the userspace buffer into memory,
4675          * which, most likely it is, because it just referenced it.
4676          * But there's no guarantee that it is. By using get_user_pages_fast()
4677          * and kmap_atomic/kunmap_atomic() we can get access to the
4678          * pages directly. We then write the data directly into the
4679          * ring buffer.
4680          */
4681         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4682
4683         /* check if we cross pages */
4684         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4685                 nr_pages = 2;
4686
4687         offset = addr & (PAGE_SIZE - 1);
4688         addr &= PAGE_MASK;
4689
4690         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4691         if (ret < nr_pages) {
4692                 while (--ret >= 0)
4693                         put_page(pages[ret]);
4694                 written = -EFAULT;
4695                 goto out;
4696         }
4697
4698         for (i = 0; i < nr_pages; i++)
4699                 map_page[i] = kmap_atomic(pages[i]);
4700
4701         local_save_flags(irq_flags);
4702         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4703         buffer = tr->trace_buffer.buffer;
4704         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4705                                           irq_flags, preempt_count());
4706         if (!event) {
4707                 /* Ring buffer disabled, return as if not open for write */
4708                 written = -EBADF;
4709                 goto out_unlock;
4710         }
4711
4712         entry = ring_buffer_event_data(event);
4713         entry->ip = _THIS_IP_;
4714
4715         if (nr_pages == 2) {
4716                 len = PAGE_SIZE - offset;
4717                 memcpy(&entry->buf, map_page[0] + offset, len);
4718                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4719         } else
4720                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4721
4722         if (entry->buf[cnt - 1] != '\n') {
4723                 entry->buf[cnt] = '\n';
4724                 entry->buf[cnt + 1] = '\0';
4725         } else
4726                 entry->buf[cnt] = '\0';
4727
4728         __buffer_unlock_commit(buffer, event);
4729
4730         written = cnt;
4731
4732         *fpos += written;
4733
4734  out_unlock:
4735         for (i = 0; i < nr_pages; i++){
4736                 kunmap_atomic(map_page[i]);
4737                 put_page(pages[i]);
4738         }
4739  out:
4740         return written;
4741 }
4742
4743 static int tracing_clock_show(struct seq_file *m, void *v)
4744 {
4745         struct trace_array *tr = m->private;
4746         int i;
4747
4748         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4749                 seq_printf(m,
4750                         "%s%s%s%s", i ? " " : "",
4751                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4752                         i == tr->clock_id ? "]" : "");
4753         seq_putc(m, '\n');
4754
4755         return 0;
4756 }
4757
4758 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4759 {
4760         int i;
4761
4762         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4763                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4764                         break;
4765         }
4766         if (i == ARRAY_SIZE(trace_clocks))
4767                 return -EINVAL;
4768
4769         mutex_lock(&trace_types_lock);
4770
4771         tr->clock_id = i;
4772
4773         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4774
4775         /*
4776          * New clock may not be consistent with the previous clock.
4777          * Reset the buffer so that it doesn't have incomparable timestamps.
4778          */
4779         tracing_reset_online_cpus(&tr->trace_buffer);
4780
4781 #ifdef CONFIG_TRACER_MAX_TRACE
4782         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4783                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4784         tracing_reset_online_cpus(&tr->max_buffer);
4785 #endif
4786
4787         mutex_unlock(&trace_types_lock);
4788
4789         return 0;
4790 }
4791
4792 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4793                                    size_t cnt, loff_t *fpos)
4794 {
4795         struct seq_file *m = filp->private_data;
4796         struct trace_array *tr = m->private;
4797         char buf[64];
4798         const char *clockstr;
4799         int ret;
4800
4801         if (cnt >= sizeof(buf))
4802                 return -EINVAL;
4803
4804         if (copy_from_user(&buf, ubuf, cnt))
4805                 return -EFAULT;
4806
4807         buf[cnt] = 0;
4808
4809         clockstr = strstrip(buf);
4810
4811         ret = tracing_set_clock(tr, clockstr);
4812         if (ret)
4813                 return ret;
4814
4815         *fpos += cnt;
4816
4817         return cnt;
4818 }
4819
4820 static int tracing_clock_open(struct inode *inode, struct file *file)
4821 {
4822         struct trace_array *tr = inode->i_private;
4823         int ret;
4824
4825         if (tracing_disabled)
4826                 return -ENODEV;
4827
4828         if (trace_array_get(tr))
4829                 return -ENODEV;
4830
4831         ret = single_open(file, tracing_clock_show, inode->i_private);
4832         if (ret < 0)
4833                 trace_array_put(tr);
4834
4835         return ret;
4836 }
4837
4838 struct ftrace_buffer_info {
4839         struct trace_iterator   iter;
4840         void                    *spare;
4841         unsigned int            read;
4842 };
4843
4844 #ifdef CONFIG_TRACER_SNAPSHOT
4845 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4846 {
4847         struct trace_array *tr = inode->i_private;
4848         struct trace_iterator *iter;
4849         struct seq_file *m;
4850         int ret = 0;
4851
4852         if (trace_array_get(tr) < 0)
4853                 return -ENODEV;
4854
4855         if (file->f_mode & FMODE_READ) {
4856                 iter = __tracing_open(inode, file, true);
4857                 if (IS_ERR(iter))
4858                         ret = PTR_ERR(iter);
4859         } else {
4860                 /* Writes still need the seq_file to hold the private data */
4861                 ret = -ENOMEM;
4862                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4863                 if (!m)
4864                         goto out;
4865                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4866                 if (!iter) {
4867                         kfree(m);
4868                         goto out;
4869                 }
4870                 ret = 0;
4871
4872                 iter->tr = tr;
4873                 iter->trace_buffer = &tr->max_buffer;
4874                 iter->cpu_file = tracing_get_cpu(inode);
4875                 m->private = iter;
4876                 file->private_data = m;
4877         }
4878 out:
4879         if (ret < 0)
4880                 trace_array_put(tr);
4881
4882         return ret;
4883 }
4884
4885 static ssize_t
4886 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4887                        loff_t *ppos)
4888 {
4889         struct seq_file *m = filp->private_data;
4890         struct trace_iterator *iter = m->private;
4891         struct trace_array *tr = iter->tr;
4892         unsigned long val;
4893         int ret;
4894
4895         ret = tracing_update_buffers();
4896         if (ret < 0)
4897                 return ret;
4898
4899         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4900         if (ret)
4901                 return ret;
4902
4903         mutex_lock(&trace_types_lock);
4904
4905         if (tr->current_trace->use_max_tr) {
4906                 ret = -EBUSY;
4907                 goto out;
4908         }
4909
4910         switch (val) {
4911         case 0:
4912                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4913                         ret = -EINVAL;
4914                         break;
4915                 }
4916                 if (tr->allocated_snapshot)
4917                         free_snapshot(tr);
4918                 break;
4919         case 1:
4920 /* Only allow per-cpu swap if the ring buffer supports it */
4921 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4922                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4923                         ret = -EINVAL;
4924                         break;
4925                 }
4926 #endif
4927                 if (!tr->allocated_snapshot) {
4928                         ret = alloc_snapshot(tr);
4929                         if (ret < 0)
4930                                 break;
4931                 }
4932                 local_irq_disable();
4933                 /* Now, we're going to swap */
4934                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4935                         update_max_tr(tr, current, smp_processor_id());
4936                 else
4937                         update_max_tr_single(tr, current, iter->cpu_file);
4938                 local_irq_enable();
4939                 break;
4940         default:
4941                 if (tr->allocated_snapshot) {
4942                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4943                                 tracing_reset_online_cpus(&tr->max_buffer);
4944                         else
4945                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4946                 }
4947                 break;
4948         }
4949
4950         if (ret >= 0) {
4951                 *ppos += cnt;
4952                 ret = cnt;
4953         }
4954 out:
4955         mutex_unlock(&trace_types_lock);
4956         return ret;
4957 }
4958
4959 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4960 {
4961         struct seq_file *m = file->private_data;
4962         int ret;
4963
4964         ret = tracing_release(inode, file);
4965
4966         if (file->f_mode & FMODE_READ)
4967                 return ret;
4968
4969         /* If write only, the seq_file is just a stub */
4970         if (m)
4971                 kfree(m->private);
4972         kfree(m);
4973
4974         return 0;
4975 }
4976
4977 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4978 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4979                                     size_t count, loff_t *ppos);
4980 static int tracing_buffers_release(struct inode *inode, struct file *file);
4981 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4982                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4983
4984 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4985 {
4986         struct ftrace_buffer_info *info;
4987         int ret;
4988
4989         ret = tracing_buffers_open(inode, filp);
4990         if (ret < 0)
4991                 return ret;
4992
4993         info = filp->private_data;
4994
4995         if (info->iter.trace->use_max_tr) {
4996                 tracing_buffers_release(inode, filp);
4997                 return -EBUSY;
4998         }
4999
5000         info->iter.snapshot = true;
5001         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5002
5003         return ret;
5004 }
5005
5006 #endif /* CONFIG_TRACER_SNAPSHOT */
5007
5008
5009 static const struct file_operations tracing_max_lat_fops = {
5010         .open           = tracing_open_generic,
5011         .read           = tracing_max_lat_read,
5012         .write          = tracing_max_lat_write,
5013         .llseek         = generic_file_llseek,
5014 };
5015
5016 static const struct file_operations set_tracer_fops = {
5017         .open           = tracing_open_generic,
5018         .read           = tracing_set_trace_read,
5019         .write          = tracing_set_trace_write,
5020         .llseek         = generic_file_llseek,
5021 };
5022
5023 static const struct file_operations tracing_pipe_fops = {
5024         .open           = tracing_open_pipe,
5025         .poll           = tracing_poll_pipe,
5026         .read           = tracing_read_pipe,
5027         .splice_read    = tracing_splice_read_pipe,
5028         .release        = tracing_release_pipe,
5029         .llseek         = no_llseek,
5030 };
5031
5032 static const struct file_operations tracing_entries_fops = {
5033         .open           = tracing_open_generic_tr,
5034         .read           = tracing_entries_read,
5035         .write          = tracing_entries_write,
5036         .llseek         = generic_file_llseek,
5037         .release        = tracing_release_generic_tr,
5038 };
5039
5040 static const struct file_operations tracing_total_entries_fops = {
5041         .open           = tracing_open_generic_tr,
5042         .read           = tracing_total_entries_read,
5043         .llseek         = generic_file_llseek,
5044         .release        = tracing_release_generic_tr,
5045 };
5046
5047 static const struct file_operations tracing_free_buffer_fops = {
5048         .open           = tracing_open_generic_tr,
5049         .write          = tracing_free_buffer_write,
5050         .release        = tracing_free_buffer_release,
5051 };
5052
5053 static const struct file_operations tracing_mark_fops = {
5054         .open           = tracing_open_generic_tr,
5055         .write          = tracing_mark_write,
5056         .llseek         = generic_file_llseek,
5057         .release        = tracing_release_generic_tr,
5058 };
5059
5060 static const struct file_operations trace_clock_fops = {
5061         .open           = tracing_clock_open,
5062         .read           = seq_read,
5063         .llseek         = seq_lseek,
5064         .release        = tracing_single_release_tr,
5065         .write          = tracing_clock_write,
5066 };
5067
5068 #ifdef CONFIG_TRACER_SNAPSHOT
5069 static const struct file_operations snapshot_fops = {
5070         .open           = tracing_snapshot_open,
5071         .read           = seq_read,
5072         .write          = tracing_snapshot_write,
5073         .llseek         = tracing_lseek,
5074         .release        = tracing_snapshot_release,
5075 };
5076
5077 static const struct file_operations snapshot_raw_fops = {
5078         .open           = snapshot_raw_open,
5079         .read           = tracing_buffers_read,
5080         .release        = tracing_buffers_release,
5081         .splice_read    = tracing_buffers_splice_read,
5082         .llseek         = no_llseek,
5083 };
5084
5085 #endif /* CONFIG_TRACER_SNAPSHOT */
5086
5087 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5088 {
5089         struct trace_array *tr = inode->i_private;
5090         struct ftrace_buffer_info *info;
5091         int ret;
5092
5093         if (tracing_disabled)
5094                 return -ENODEV;
5095
5096         if (trace_array_get(tr) < 0)
5097                 return -ENODEV;
5098
5099         info = kzalloc(sizeof(*info), GFP_KERNEL);
5100         if (!info) {
5101                 trace_array_put(tr);
5102                 return -ENOMEM;
5103         }
5104
5105         mutex_lock(&trace_types_lock);
5106
5107         info->iter.tr           = tr;
5108         info->iter.cpu_file     = tracing_get_cpu(inode);
5109         info->iter.trace        = tr->current_trace;
5110         info->iter.trace_buffer = &tr->trace_buffer;
5111         info->spare             = NULL;
5112         /* Force reading ring buffer for first read */
5113         info->read              = (unsigned int)-1;
5114
5115         filp->private_data = info;
5116
5117         mutex_unlock(&trace_types_lock);
5118
5119         ret = nonseekable_open(inode, filp);
5120         if (ret < 0)
5121                 trace_array_put(tr);
5122
5123         return ret;
5124 }
5125
5126 static unsigned int
5127 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5128 {
5129         struct ftrace_buffer_info *info = filp->private_data;
5130         struct trace_iterator *iter = &info->iter;
5131
5132         return trace_poll(iter, filp, poll_table);
5133 }
5134
5135 static ssize_t
5136 tracing_buffers_read(struct file *filp, char __user *ubuf,
5137                      size_t count, loff_t *ppos)
5138 {
5139         struct ftrace_buffer_info *info = filp->private_data;
5140         struct trace_iterator *iter = &info->iter;
5141         ssize_t ret;
5142         ssize_t size;
5143
5144         if (!count)
5145                 return 0;
5146
5147         mutex_lock(&trace_types_lock);
5148
5149 #ifdef CONFIG_TRACER_MAX_TRACE
5150         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5151                 size = -EBUSY;
5152                 goto out_unlock;
5153         }
5154 #endif
5155
5156         if (!info->spare)
5157                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5158                                                           iter->cpu_file);
5159         size = -ENOMEM;
5160         if (!info->spare)
5161                 goto out_unlock;
5162
5163         /* Do we have previous read data to read? */
5164         if (info->read < PAGE_SIZE)
5165                 goto read;
5166
5167  again:
5168         trace_access_lock(iter->cpu_file);
5169         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5170                                     &info->spare,
5171                                     count,
5172                                     iter->cpu_file, 0);
5173         trace_access_unlock(iter->cpu_file);
5174
5175         if (ret < 0) {
5176                 if (trace_empty(iter)) {
5177                         if ((filp->f_flags & O_NONBLOCK)) {
5178                                 size = -EAGAIN;
5179                                 goto out_unlock;
5180                         }
5181                         mutex_unlock(&trace_types_lock);
5182                         iter->trace->wait_pipe(iter);
5183                         mutex_lock(&trace_types_lock);
5184                         if (signal_pending(current)) {
5185                                 size = -EINTR;
5186                                 goto out_unlock;
5187                         }
5188                         goto again;
5189                 }
5190                 size = 0;
5191                 goto out_unlock;
5192         }
5193
5194         info->read = 0;
5195  read:
5196         size = PAGE_SIZE - info->read;
5197         if (size > count)
5198                 size = count;
5199
5200         ret = copy_to_user(ubuf, info->spare + info->read, size);
5201         if (ret == size) {
5202                 size = -EFAULT;
5203                 goto out_unlock;
5204         }
5205         size -= ret;
5206
5207         *ppos += size;
5208         info->read += size;
5209
5210  out_unlock:
5211         mutex_unlock(&trace_types_lock);
5212
5213         return size;
5214 }
5215
5216 static int tracing_buffers_release(struct inode *inode, struct file *file)
5217 {
5218         struct ftrace_buffer_info *info = file->private_data;
5219         struct trace_iterator *iter = &info->iter;
5220
5221         mutex_lock(&trace_types_lock);
5222
5223         __trace_array_put(iter->tr);
5224
5225         if (info->spare)
5226                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5227         kfree(info);
5228
5229         mutex_unlock(&trace_types_lock);
5230
5231         return 0;
5232 }
5233
5234 struct buffer_ref {
5235         struct ring_buffer      *buffer;
5236         void                    *page;
5237         int                     ref;
5238 };
5239
5240 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5241                                     struct pipe_buffer *buf)
5242 {
5243         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5244
5245         if (--ref->ref)
5246                 return;
5247
5248         ring_buffer_free_read_page(ref->buffer, ref->page);
5249         kfree(ref);
5250         buf->private = 0;
5251 }
5252
5253 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5254                                 struct pipe_buffer *buf)
5255 {
5256         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5257
5258         ref->ref++;
5259 }
5260
5261 /* Pipe buffer operations for a buffer. */
5262 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5263         .can_merge              = 0,
5264         .confirm                = generic_pipe_buf_confirm,
5265         .release                = buffer_pipe_buf_release,
5266         .steal                  = generic_pipe_buf_steal,
5267         .get                    = buffer_pipe_buf_get,
5268 };
5269
5270 /*
5271  * Callback from splice_to_pipe(), if we need to release some pages
5272  * at the end of the spd in case we error'ed out in filling the pipe.
5273  */
5274 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5275 {
5276         struct buffer_ref *ref =
5277                 (struct buffer_ref *)spd->partial[i].private;
5278
5279         if (--ref->ref)
5280                 return;
5281
5282         ring_buffer_free_read_page(ref->buffer, ref->page);
5283         kfree(ref);
5284         spd->partial[i].private = 0;
5285 }
5286
5287 static ssize_t
5288 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5289                             struct pipe_inode_info *pipe, size_t len,
5290                             unsigned int flags)
5291 {
5292         struct ftrace_buffer_info *info = file->private_data;
5293         struct trace_iterator *iter = &info->iter;
5294         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5295         struct page *pages_def[PIPE_DEF_BUFFERS];
5296         struct splice_pipe_desc spd = {
5297                 .pages          = pages_def,
5298                 .partial        = partial_def,
5299                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5300                 .flags          = flags,
5301                 .ops            = &buffer_pipe_buf_ops,
5302                 .spd_release    = buffer_spd_release,
5303         };
5304         struct buffer_ref *ref;
5305         int entries, size, i;
5306         ssize_t ret;
5307
5308         mutex_lock(&trace_types_lock);
5309
5310 #ifdef CONFIG_TRACER_MAX_TRACE
5311         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5312                 ret = -EBUSY;
5313                 goto out;
5314         }
5315 #endif
5316
5317         if (splice_grow_spd(pipe, &spd)) {
5318                 ret = -ENOMEM;
5319                 goto out;
5320         }
5321
5322         if (*ppos & (PAGE_SIZE - 1)) {
5323                 ret = -EINVAL;
5324                 goto out;
5325         }
5326
5327         if (len & (PAGE_SIZE - 1)) {
5328                 if (len < PAGE_SIZE) {
5329                         ret = -EINVAL;
5330                         goto out;
5331                 }
5332                 len &= PAGE_MASK;
5333         }
5334
5335  again:
5336         trace_access_lock(iter->cpu_file);
5337         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5338
5339         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5340                 struct page *page;
5341                 int r;
5342
5343                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5344                 if (!ref)
5345                         break;
5346
5347                 ref->ref = 1;
5348                 ref->buffer = iter->trace_buffer->buffer;
5349                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5350                 if (!ref->page) {
5351                         kfree(ref);
5352                         break;
5353                 }
5354
5355                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5356                                           len, iter->cpu_file, 1);
5357                 if (r < 0) {
5358                         ring_buffer_free_read_page(ref->buffer, ref->page);
5359                         kfree(ref);
5360                         break;
5361                 }
5362
5363                 /*
5364                  * zero out any left over data, this is going to
5365                  * user land.
5366                  */
5367                 size = ring_buffer_page_len(ref->page);
5368                 if (size < PAGE_SIZE)
5369                         memset(ref->page + size, 0, PAGE_SIZE - size);
5370
5371                 page = virt_to_page(ref->page);
5372
5373                 spd.pages[i] = page;
5374                 spd.partial[i].len = PAGE_SIZE;
5375                 spd.partial[i].offset = 0;
5376                 spd.partial[i].private = (unsigned long)ref;
5377                 spd.nr_pages++;
5378                 *ppos += PAGE_SIZE;
5379
5380                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5381         }
5382
5383         trace_access_unlock(iter->cpu_file);
5384         spd.nr_pages = i;
5385
5386         /* did we read anything? */
5387         if (!spd.nr_pages) {
5388                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5389                         ret = -EAGAIN;
5390                         goto out;
5391                 }
5392                 mutex_unlock(&trace_types_lock);
5393                 iter->trace->wait_pipe(iter);
5394                 mutex_lock(&trace_types_lock);
5395                 if (signal_pending(current)) {
5396                         ret = -EINTR;
5397                         goto out;
5398                 }
5399                 goto again;
5400         }
5401
5402         ret = splice_to_pipe(pipe, &spd);
5403         splice_shrink_spd(&spd);
5404 out:
5405         mutex_unlock(&trace_types_lock);
5406
5407         return ret;
5408 }
5409
5410 static const struct file_operations tracing_buffers_fops = {
5411         .open           = tracing_buffers_open,
5412         .read           = tracing_buffers_read,
5413         .poll           = tracing_buffers_poll,
5414         .release        = tracing_buffers_release,
5415         .splice_read    = tracing_buffers_splice_read,
5416         .llseek         = no_llseek,
5417 };
5418
5419 static ssize_t
5420 tracing_stats_read(struct file *filp, char __user *ubuf,
5421                    size_t count, loff_t *ppos)
5422 {
5423         struct inode *inode = file_inode(filp);
5424         struct trace_array *tr = inode->i_private;
5425         struct trace_buffer *trace_buf = &tr->trace_buffer;
5426         int cpu = tracing_get_cpu(inode);
5427         struct trace_seq *s;
5428         unsigned long cnt;
5429         unsigned long long t;
5430         unsigned long usec_rem;
5431
5432         s = kmalloc(sizeof(*s), GFP_KERNEL);
5433         if (!s)
5434                 return -ENOMEM;
5435
5436         trace_seq_init(s);
5437
5438         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5439         trace_seq_printf(s, "entries: %ld\n", cnt);
5440
5441         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5442         trace_seq_printf(s, "overrun: %ld\n", cnt);
5443
5444         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5445         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5446
5447         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5448         trace_seq_printf(s, "bytes: %ld\n", cnt);
5449
5450         if (trace_clocks[tr->clock_id].in_ns) {
5451                 /* local or global for trace_clock */
5452                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5453                 usec_rem = do_div(t, USEC_PER_SEC);
5454                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5455                                                                 t, usec_rem);
5456
5457                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5458                 usec_rem = do_div(t, USEC_PER_SEC);
5459                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5460         } else {
5461                 /* counter or tsc mode for trace_clock */
5462                 trace_seq_printf(s, "oldest event ts: %llu\n",
5463                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5464
5465                 trace_seq_printf(s, "now ts: %llu\n",
5466                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5467         }
5468
5469         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5470         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5471
5472         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5473         trace_seq_printf(s, "read events: %ld\n", cnt);
5474
5475         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5476
5477         kfree(s);
5478
5479         return count;
5480 }
5481
5482 static const struct file_operations tracing_stats_fops = {
5483         .open           = tracing_open_generic_tr,
5484         .read           = tracing_stats_read,
5485         .llseek         = generic_file_llseek,
5486         .release        = tracing_release_generic_tr,
5487 };
5488
5489 #ifdef CONFIG_DYNAMIC_FTRACE
5490
5491 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5492 {
5493         return 0;
5494 }
5495
5496 static ssize_t
5497 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5498                   size_t cnt, loff_t *ppos)
5499 {
5500         static char ftrace_dyn_info_buffer[1024];
5501         static DEFINE_MUTEX(dyn_info_mutex);
5502         unsigned long *p = filp->private_data;
5503         char *buf = ftrace_dyn_info_buffer;
5504         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5505         int r;
5506
5507         mutex_lock(&dyn_info_mutex);
5508         r = sprintf(buf, "%ld ", *p);
5509
5510         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5511         buf[r++] = '\n';
5512
5513         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5514
5515         mutex_unlock(&dyn_info_mutex);
5516
5517         return r;
5518 }
5519
5520 static const struct file_operations tracing_dyn_info_fops = {
5521         .open           = tracing_open_generic,
5522         .read           = tracing_read_dyn_info,
5523         .llseek         = generic_file_llseek,
5524 };
5525 #endif /* CONFIG_DYNAMIC_FTRACE */
5526
5527 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5528 static void
5529 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5530 {
5531         tracing_snapshot();
5532 }
5533
5534 static void
5535 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5536 {
5537         unsigned long *count = (long *)data;
5538
5539         if (!*count)
5540                 return;
5541
5542         if (*count != -1)
5543                 (*count)--;
5544
5545         tracing_snapshot();
5546 }
5547
5548 static int
5549 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5550                       struct ftrace_probe_ops *ops, void *data)
5551 {
5552         long count = (long)data;
5553
5554         seq_printf(m, "%ps:", (void *)ip);
5555
5556         seq_printf(m, "snapshot");
5557
5558         if (count == -1)
5559                 seq_printf(m, ":unlimited\n");
5560         else
5561                 seq_printf(m, ":count=%ld\n", count);
5562
5563         return 0;
5564 }
5565
5566 static struct ftrace_probe_ops snapshot_probe_ops = {
5567         .func                   = ftrace_snapshot,
5568         .print                  = ftrace_snapshot_print,
5569 };
5570
5571 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5572         .func                   = ftrace_count_snapshot,
5573         .print                  = ftrace_snapshot_print,
5574 };
5575
5576 static int
5577 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5578                                char *glob, char *cmd, char *param, int enable)
5579 {
5580         struct ftrace_probe_ops *ops;
5581         void *count = (void *)-1;
5582         char *number;
5583         int ret;
5584
5585         /* hash funcs only work with set_ftrace_filter */
5586         if (!enable)
5587                 return -EINVAL;
5588
5589         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5590
5591         if (glob[0] == '!') {
5592                 unregister_ftrace_function_probe_func(glob+1, ops);
5593                 return 0;
5594         }
5595
5596         if (!param)
5597                 goto out_reg;
5598
5599         number = strsep(&param, ":");
5600
5601         if (!strlen(number))
5602                 goto out_reg;
5603
5604         /*
5605          * We use the callback data field (which is a pointer)
5606          * as our counter.
5607          */
5608         ret = kstrtoul(number, 0, (unsigned long *)&count);
5609         if (ret)
5610                 return ret;
5611
5612  out_reg:
5613         ret = register_ftrace_function_probe(glob, ops, count);
5614
5615         if (ret >= 0)
5616                 alloc_snapshot(&global_trace);
5617
5618         return ret < 0 ? ret : 0;
5619 }
5620
5621 static struct ftrace_func_command ftrace_snapshot_cmd = {
5622         .name                   = "snapshot",
5623         .func                   = ftrace_trace_snapshot_callback,
5624 };
5625
5626 static __init int register_snapshot_cmd(void)
5627 {
5628         return register_ftrace_command(&ftrace_snapshot_cmd);
5629 }
5630 #else
5631 static inline __init int register_snapshot_cmd(void) { return 0; }
5632 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5633
5634 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5635 {
5636         if (tr->dir)
5637                 return tr->dir;
5638
5639         if (!debugfs_initialized())
5640                 return NULL;
5641
5642         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5643                 tr->dir = debugfs_create_dir("tracing", NULL);
5644
5645         if (!tr->dir)
5646                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5647
5648         return tr->dir;
5649 }
5650
5651 struct dentry *tracing_init_dentry(void)
5652 {
5653         return tracing_init_dentry_tr(&global_trace);
5654 }
5655
5656 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5657 {
5658         struct dentry *d_tracer;
5659
5660         if (tr->percpu_dir)
5661                 return tr->percpu_dir;
5662
5663         d_tracer = tracing_init_dentry_tr(tr);
5664         if (!d_tracer)
5665                 return NULL;
5666
5667         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5668
5669         WARN_ONCE(!tr->percpu_dir,
5670                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5671
5672         return tr->percpu_dir;
5673 }
5674
5675 static struct dentry *
5676 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5677                       void *data, long cpu, const struct file_operations *fops)
5678 {
5679         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5680
5681         if (ret) /* See tracing_get_cpu() */
5682                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5683         return ret;
5684 }
5685
5686 static void
5687 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5688 {
5689         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5690         struct dentry *d_cpu;
5691         char cpu_dir[30]; /* 30 characters should be more than enough */
5692
5693         if (!d_percpu)
5694                 return;
5695
5696         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5697         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5698         if (!d_cpu) {
5699                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5700                 return;
5701         }
5702
5703         /* per cpu trace_pipe */
5704         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5705                                 tr, cpu, &tracing_pipe_fops);
5706
5707         /* per cpu trace */
5708         trace_create_cpu_file("trace", 0644, d_cpu,
5709                                 tr, cpu, &tracing_fops);
5710
5711         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5712                                 tr, cpu, &tracing_buffers_fops);
5713
5714         trace_create_cpu_file("stats", 0444, d_cpu,
5715                                 tr, cpu, &tracing_stats_fops);
5716
5717         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5718                                 tr, cpu, &tracing_entries_fops);
5719
5720 #ifdef CONFIG_TRACER_SNAPSHOT
5721         trace_create_cpu_file("snapshot", 0644, d_cpu,
5722                                 tr, cpu, &snapshot_fops);
5723
5724         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5725                                 tr, cpu, &snapshot_raw_fops);
5726 #endif
5727 }
5728
5729 #ifdef CONFIG_FTRACE_SELFTEST
5730 /* Let selftest have access to static functions in this file */
5731 #include "trace_selftest.c"
5732 #endif
5733
5734 struct trace_option_dentry {
5735         struct tracer_opt               *opt;
5736         struct tracer_flags             *flags;
5737         struct trace_array              *tr;
5738         struct dentry                   *entry;
5739 };
5740
5741 static ssize_t
5742 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5743                         loff_t *ppos)
5744 {
5745         struct trace_option_dentry *topt = filp->private_data;
5746         char *buf;
5747
5748         if (topt->flags->val & topt->opt->bit)
5749                 buf = "1\n";
5750         else
5751                 buf = "0\n";
5752
5753         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5754 }
5755
5756 static ssize_t
5757 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5758                          loff_t *ppos)
5759 {
5760         struct trace_option_dentry *topt = filp->private_data;
5761         unsigned long val;
5762         int ret;
5763
5764         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5765         if (ret)
5766                 return ret;
5767
5768         if (val != 0 && val != 1)
5769                 return -EINVAL;
5770
5771         if (!!(topt->flags->val & topt->opt->bit) != val) {
5772                 mutex_lock(&trace_types_lock);
5773                 ret = __set_tracer_option(topt->tr, topt->flags,
5774                                           topt->opt, !val);
5775                 mutex_unlock(&trace_types_lock);
5776                 if (ret)
5777                         return ret;
5778         }
5779
5780         *ppos += cnt;
5781
5782         return cnt;
5783 }
5784
5785
5786 static const struct file_operations trace_options_fops = {
5787         .open = tracing_open_generic,
5788         .read = trace_options_read,
5789         .write = trace_options_write,
5790         .llseek = generic_file_llseek,
5791 };
5792
5793 static ssize_t
5794 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5795                         loff_t *ppos)
5796 {
5797         long index = (long)filp->private_data;
5798         char *buf;
5799
5800         if (trace_flags & (1 << index))
5801                 buf = "1\n";
5802         else
5803                 buf = "0\n";
5804
5805         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5806 }
5807
5808 static ssize_t
5809 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5810                          loff_t *ppos)
5811 {
5812         struct trace_array *tr = &global_trace;
5813         long index = (long)filp->private_data;
5814         unsigned long val;
5815         int ret;
5816
5817         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5818         if (ret)
5819                 return ret;
5820
5821         if (val != 0 && val != 1)
5822                 return -EINVAL;
5823
5824         mutex_lock(&trace_types_lock);
5825         ret = set_tracer_flag(tr, 1 << index, val);
5826         mutex_unlock(&trace_types_lock);
5827
5828         if (ret < 0)
5829                 return ret;
5830
5831         *ppos += cnt;
5832
5833         return cnt;
5834 }
5835
5836 static const struct file_operations trace_options_core_fops = {
5837         .open = tracing_open_generic,
5838         .read = trace_options_core_read,
5839         .write = trace_options_core_write,
5840         .llseek = generic_file_llseek,
5841 };
5842
5843 struct dentry *trace_create_file(const char *name,
5844                                  umode_t mode,
5845                                  struct dentry *parent,
5846                                  void *data,
5847                                  const struct file_operations *fops)
5848 {
5849         struct dentry *ret;
5850
5851         ret = debugfs_create_file(name, mode, parent, data, fops);
5852         if (!ret)
5853                 pr_warning("Could not create debugfs '%s' entry\n", name);
5854
5855         return ret;
5856 }
5857
5858
5859 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5860 {
5861         struct dentry *d_tracer;
5862
5863         if (tr->options)
5864                 return tr->options;
5865
5866         d_tracer = tracing_init_dentry_tr(tr);
5867         if (!d_tracer)
5868                 return NULL;
5869
5870         tr->options = debugfs_create_dir("options", d_tracer);
5871         if (!tr->options) {
5872                 pr_warning("Could not create debugfs directory 'options'\n");
5873                 return NULL;
5874         }
5875
5876         return tr->options;
5877 }
5878
5879 static void
5880 create_trace_option_file(struct trace_array *tr,
5881                          struct trace_option_dentry *topt,
5882                          struct tracer_flags *flags,
5883                          struct tracer_opt *opt)
5884 {
5885         struct dentry *t_options;
5886
5887         t_options = trace_options_init_dentry(tr);
5888         if (!t_options)
5889                 return;
5890
5891         topt->flags = flags;
5892         topt->opt = opt;
5893         topt->tr = tr;
5894
5895         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5896                                     &trace_options_fops);
5897
5898 }
5899
5900 static struct trace_option_dentry *
5901 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5902 {
5903         struct trace_option_dentry *topts;
5904         struct tracer_flags *flags;
5905         struct tracer_opt *opts;
5906         int cnt;
5907
5908         if (!tracer)
5909                 return NULL;
5910
5911         flags = tracer->flags;
5912
5913         if (!flags || !flags->opts)
5914                 return NULL;
5915
5916         opts = flags->opts;
5917
5918         for (cnt = 0; opts[cnt].name; cnt++)
5919                 ;
5920
5921         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5922         if (!topts)
5923                 return NULL;
5924
5925         for (cnt = 0; opts[cnt].name; cnt++)
5926                 create_trace_option_file(tr, &topts[cnt], flags,
5927                                          &opts[cnt]);
5928
5929         return topts;
5930 }
5931
5932 static void
5933 destroy_trace_option_files(struct trace_option_dentry *topts)
5934 {
5935         int cnt;
5936
5937         if (!topts)
5938                 return;
5939
5940         for (cnt = 0; topts[cnt].opt; cnt++) {
5941                 if (topts[cnt].entry)
5942                         debugfs_remove(topts[cnt].entry);
5943         }
5944
5945         kfree(topts);
5946 }
5947
5948 static struct dentry *
5949 create_trace_option_core_file(struct trace_array *tr,
5950                               const char *option, long index)
5951 {
5952         struct dentry *t_options;
5953
5954         t_options = trace_options_init_dentry(tr);
5955         if (!t_options)
5956                 return NULL;
5957
5958         return trace_create_file(option, 0644, t_options, (void *)index,
5959                                     &trace_options_core_fops);
5960 }
5961
5962 static __init void create_trace_options_dir(struct trace_array *tr)
5963 {
5964         struct dentry *t_options;
5965         int i;
5966
5967         t_options = trace_options_init_dentry(tr);
5968         if (!t_options)
5969                 return;
5970
5971         for (i = 0; trace_options[i]; i++)
5972                 create_trace_option_core_file(tr, trace_options[i], i);
5973 }
5974
5975 static ssize_t
5976 rb_simple_read(struct file *filp, char __user *ubuf,
5977                size_t cnt, loff_t *ppos)
5978 {
5979         struct trace_array *tr = filp->private_data;
5980         char buf[64];
5981         int r;
5982
5983         r = tracer_tracing_is_on(tr);
5984         r = sprintf(buf, "%d\n", r);
5985
5986         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5987 }
5988
5989 static ssize_t
5990 rb_simple_write(struct file *filp, const char __user *ubuf,
5991                 size_t cnt, loff_t *ppos)
5992 {
5993         struct trace_array *tr = filp->private_data;
5994         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5995         unsigned long val;
5996         int ret;
5997
5998         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5999         if (ret)
6000                 return ret;
6001
6002         if (buffer) {
6003                 mutex_lock(&trace_types_lock);
6004                 if (val) {
6005                         tracer_tracing_on(tr);
6006                         if (tr->current_trace->start)
6007                                 tr->current_trace->start(tr);
6008                 } else {
6009                         tracer_tracing_off(tr);
6010                         if (tr->current_trace->stop)
6011                                 tr->current_trace->stop(tr);
6012                 }
6013                 mutex_unlock(&trace_types_lock);
6014         }
6015
6016         (*ppos)++;
6017
6018         return cnt;
6019 }
6020
6021 static const struct file_operations rb_simple_fops = {
6022         .open           = tracing_open_generic_tr,
6023         .read           = rb_simple_read,
6024         .write          = rb_simple_write,
6025         .release        = tracing_release_generic_tr,
6026         .llseek         = default_llseek,
6027 };
6028
6029 struct dentry *trace_instance_dir;
6030
6031 static void
6032 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6033
6034 static int
6035 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6036 {
6037         enum ring_buffer_flags rb_flags;
6038
6039         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6040
6041         buf->tr = tr;
6042
6043         buf->buffer = ring_buffer_alloc(size, rb_flags);
6044         if (!buf->buffer)
6045                 return -ENOMEM;
6046
6047         buf->data = alloc_percpu(struct trace_array_cpu);
6048         if (!buf->data) {
6049                 ring_buffer_free(buf->buffer);
6050                 return -ENOMEM;
6051         }
6052
6053         /* Allocate the first page for all buffers */
6054         set_buffer_entries(&tr->trace_buffer,
6055                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6056
6057         return 0;
6058 }
6059
6060 static int allocate_trace_buffers(struct trace_array *tr, int size)
6061 {
6062         int ret;
6063
6064         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6065         if (ret)
6066                 return ret;
6067
6068 #ifdef CONFIG_TRACER_MAX_TRACE
6069         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6070                                     allocate_snapshot ? size : 1);
6071         if (WARN_ON(ret)) {
6072                 ring_buffer_free(tr->trace_buffer.buffer);
6073                 free_percpu(tr->trace_buffer.data);
6074                 return -ENOMEM;
6075         }
6076         tr->allocated_snapshot = allocate_snapshot;
6077
6078         /*
6079          * Only the top level trace array gets its snapshot allocated
6080          * from the kernel command line.
6081          */
6082         allocate_snapshot = false;
6083 #endif
6084         return 0;
6085 }
6086
6087 static int new_instance_create(const char *name)
6088 {
6089         struct trace_array *tr;
6090         int ret;
6091
6092         mutex_lock(&trace_types_lock);
6093
6094         ret = -EEXIST;
6095         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6096                 if (tr->name && strcmp(tr->name, name) == 0)
6097                         goto out_unlock;
6098         }
6099
6100         ret = -ENOMEM;
6101         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6102         if (!tr)
6103                 goto out_unlock;
6104
6105         tr->name = kstrdup(name, GFP_KERNEL);
6106         if (!tr->name)
6107                 goto out_free_tr;
6108
6109         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6110                 goto out_free_tr;
6111
6112         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6113
6114         raw_spin_lock_init(&tr->start_lock);
6115
6116         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6117
6118         tr->current_trace = &nop_trace;
6119
6120         INIT_LIST_HEAD(&tr->systems);
6121         INIT_LIST_HEAD(&tr->events);
6122
6123         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6124                 goto out_free_tr;
6125
6126         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6127         if (!tr->dir)
6128                 goto out_free_tr;
6129
6130         ret = event_trace_add_tracer(tr->dir, tr);
6131         if (ret) {
6132                 debugfs_remove_recursive(tr->dir);
6133                 goto out_free_tr;
6134         }
6135
6136         init_tracer_debugfs(tr, tr->dir);
6137
6138         list_add(&tr->list, &ftrace_trace_arrays);
6139
6140         mutex_unlock(&trace_types_lock);
6141
6142         return 0;
6143
6144  out_free_tr:
6145         if (tr->trace_buffer.buffer)
6146                 ring_buffer_free(tr->trace_buffer.buffer);
6147         free_cpumask_var(tr->tracing_cpumask);
6148         kfree(tr->name);
6149         kfree(tr);
6150
6151  out_unlock:
6152         mutex_unlock(&trace_types_lock);
6153
6154         return ret;
6155
6156 }
6157
6158 static int instance_delete(const char *name)
6159 {
6160         struct trace_array *tr;
6161         int found = 0;
6162         int ret;
6163
6164         mutex_lock(&trace_types_lock);
6165
6166         ret = -ENODEV;
6167         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6168                 if (tr->name && strcmp(tr->name, name) == 0) {
6169                         found = 1;
6170                         break;
6171                 }
6172         }
6173         if (!found)
6174                 goto out_unlock;
6175
6176         ret = -EBUSY;
6177         if (tr->ref)
6178                 goto out_unlock;
6179
6180         list_del(&tr->list);
6181
6182         tracing_set_nop(tr);
6183         event_trace_del_tracer(tr);
6184         ftrace_destroy_function_files(tr);
6185         debugfs_remove_recursive(tr->dir);
6186         free_percpu(tr->trace_buffer.data);
6187         ring_buffer_free(tr->trace_buffer.buffer);
6188
6189         kfree(tr->name);
6190         kfree(tr);
6191
6192         ret = 0;
6193
6194  out_unlock:
6195         mutex_unlock(&trace_types_lock);
6196
6197         return ret;
6198 }
6199
6200 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6201 {
6202         struct dentry *parent;
6203         int ret;
6204
6205         /* Paranoid: Make sure the parent is the "instances" directory */
6206         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6207         if (WARN_ON_ONCE(parent != trace_instance_dir))
6208                 return -ENOENT;
6209
6210         /*
6211          * The inode mutex is locked, but debugfs_create_dir() will also
6212          * take the mutex. As the instances directory can not be destroyed
6213          * or changed in any other way, it is safe to unlock it, and
6214          * let the dentry try. If two users try to make the same dir at
6215          * the same time, then the new_instance_create() will determine the
6216          * winner.
6217          */
6218         mutex_unlock(&inode->i_mutex);
6219
6220         ret = new_instance_create(dentry->d_iname);
6221
6222         mutex_lock(&inode->i_mutex);
6223
6224         return ret;
6225 }
6226
6227 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6228 {
6229         struct dentry *parent;
6230         int ret;
6231
6232         /* Paranoid: Make sure the parent is the "instances" directory */
6233         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6234         if (WARN_ON_ONCE(parent != trace_instance_dir))
6235                 return -ENOENT;
6236
6237         /* The caller did a dget() on dentry */
6238         mutex_unlock(&dentry->d_inode->i_mutex);
6239
6240         /*
6241          * The inode mutex is locked, but debugfs_create_dir() will also
6242          * take the mutex. As the instances directory can not be destroyed
6243          * or changed in any other way, it is safe to unlock it, and
6244          * let the dentry try. If two users try to make the same dir at
6245          * the same time, then the instance_delete() will determine the
6246          * winner.
6247          */
6248         mutex_unlock(&inode->i_mutex);
6249
6250         ret = instance_delete(dentry->d_iname);
6251
6252         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6253         mutex_lock(&dentry->d_inode->i_mutex);
6254
6255         return ret;
6256 }
6257
6258 static const struct inode_operations instance_dir_inode_operations = {
6259         .lookup         = simple_lookup,
6260         .mkdir          = instance_mkdir,
6261         .rmdir          = instance_rmdir,
6262 };
6263
6264 static __init void create_trace_instances(struct dentry *d_tracer)
6265 {
6266         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6267         if (WARN_ON(!trace_instance_dir))
6268                 return;
6269
6270         /* Hijack the dir inode operations, to allow mkdir */
6271         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6272 }
6273
6274 static void
6275 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6276 {
6277         int cpu;
6278
6279         trace_create_file("available_tracers", 0444, d_tracer,
6280                         tr, &show_traces_fops);
6281
6282         trace_create_file("current_tracer", 0644, d_tracer,
6283                         tr, &set_tracer_fops);
6284
6285         trace_create_file("tracing_cpumask", 0644, d_tracer,
6286                           tr, &tracing_cpumask_fops);
6287
6288         trace_create_file("trace_options", 0644, d_tracer,
6289                           tr, &tracing_iter_fops);
6290
6291         trace_create_file("trace", 0644, d_tracer,
6292                           tr, &tracing_fops);
6293
6294         trace_create_file("trace_pipe", 0444, d_tracer,
6295                           tr, &tracing_pipe_fops);
6296
6297         trace_create_file("buffer_size_kb", 0644, d_tracer,
6298                           tr, &tracing_entries_fops);
6299
6300         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6301                           tr, &tracing_total_entries_fops);
6302
6303         trace_create_file("free_buffer", 0200, d_tracer,
6304                           tr, &tracing_free_buffer_fops);
6305
6306         trace_create_file("trace_marker", 0220, d_tracer,
6307                           tr, &tracing_mark_fops);
6308
6309         trace_create_file("trace_clock", 0644, d_tracer, tr,
6310                           &trace_clock_fops);
6311
6312         trace_create_file("tracing_on", 0644, d_tracer,
6313                           tr, &rb_simple_fops);
6314
6315 #ifdef CONFIG_TRACER_MAX_TRACE
6316         trace_create_file("tracing_max_latency", 0644, d_tracer,
6317                         &tr->max_latency, &tracing_max_lat_fops);
6318 #endif
6319
6320         if (ftrace_create_function_files(tr, d_tracer))
6321                 WARN(1, "Could not allocate function filter files");
6322
6323 #ifdef CONFIG_TRACER_SNAPSHOT
6324         trace_create_file("snapshot", 0644, d_tracer,
6325                           tr, &snapshot_fops);
6326 #endif
6327
6328         for_each_tracing_cpu(cpu)
6329                 tracing_init_debugfs_percpu(tr, cpu);
6330
6331 }
6332
6333 static __init int tracer_init_debugfs(void)
6334 {
6335         struct dentry *d_tracer;
6336
6337         trace_access_lock_init();
6338
6339         d_tracer = tracing_init_dentry();
6340         if (!d_tracer)
6341                 return 0;
6342
6343         init_tracer_debugfs(&global_trace, d_tracer);
6344
6345         trace_create_file("tracing_thresh", 0644, d_tracer,
6346                         &tracing_thresh, &tracing_max_lat_fops);
6347
6348         trace_create_file("README", 0444, d_tracer,
6349                         NULL, &tracing_readme_fops);
6350
6351         trace_create_file("saved_cmdlines", 0444, d_tracer,
6352                         NULL, &tracing_saved_cmdlines_fops);
6353
6354 #ifdef CONFIG_DYNAMIC_FTRACE
6355         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6356                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6357 #endif
6358
6359         create_trace_instances(d_tracer);
6360
6361         create_trace_options_dir(&global_trace);
6362
6363         return 0;
6364 }
6365
6366 static int trace_panic_handler(struct notifier_block *this,
6367                                unsigned long event, void *unused)
6368 {
6369         if (ftrace_dump_on_oops)
6370                 ftrace_dump(ftrace_dump_on_oops);
6371         return NOTIFY_OK;
6372 }
6373
6374 static struct notifier_block trace_panic_notifier = {
6375         .notifier_call  = trace_panic_handler,
6376         .next           = NULL,
6377         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6378 };
6379
6380 static int trace_die_handler(struct notifier_block *self,
6381                              unsigned long val,
6382                              void *data)
6383 {
6384         switch (val) {
6385         case DIE_OOPS:
6386                 if (ftrace_dump_on_oops)
6387                         ftrace_dump(ftrace_dump_on_oops);
6388                 break;
6389         default:
6390                 break;
6391         }
6392         return NOTIFY_OK;
6393 }
6394
6395 static struct notifier_block trace_die_notifier = {
6396         .notifier_call = trace_die_handler,
6397         .priority = 200
6398 };
6399
6400 /*
6401  * printk is set to max of 1024, we really don't need it that big.
6402  * Nothing should be printing 1000 characters anyway.
6403  */
6404 #define TRACE_MAX_PRINT         1000
6405
6406 /*
6407  * Define here KERN_TRACE so that we have one place to modify
6408  * it if we decide to change what log level the ftrace dump
6409  * should be at.
6410  */
6411 #define KERN_TRACE              KERN_EMERG
6412
6413 void
6414 trace_printk_seq(struct trace_seq *s)
6415 {
6416         /* Probably should print a warning here. */
6417         if (s->len >= TRACE_MAX_PRINT)
6418                 s->len = TRACE_MAX_PRINT;
6419
6420         /* should be zero ended, but we are paranoid. */
6421         s->buffer[s->len] = 0;
6422
6423         printk(KERN_TRACE "%s", s->buffer);
6424
6425         trace_seq_init(s);
6426 }
6427
6428 void trace_init_global_iter(struct trace_iterator *iter)
6429 {
6430         iter->tr = &global_trace;
6431         iter->trace = iter->tr->current_trace;
6432         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6433         iter->trace_buffer = &global_trace.trace_buffer;
6434
6435         if (iter->trace && iter->trace->open)
6436                 iter->trace->open(iter);
6437
6438         /* Annotate start of buffers if we had overruns */
6439         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6440                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6441
6442         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6443         if (trace_clocks[iter->tr->clock_id].in_ns)
6444                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6445 }
6446
6447 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6448 {
6449         /* use static because iter can be a bit big for the stack */
6450         static struct trace_iterator iter;
6451         static atomic_t dump_running;
6452         unsigned int old_userobj;
6453         unsigned long flags;
6454         int cnt = 0, cpu;
6455
6456         /* Only allow one dump user at a time. */
6457         if (atomic_inc_return(&dump_running) != 1) {
6458                 atomic_dec(&dump_running);
6459                 return;
6460         }
6461
6462         /*
6463          * Always turn off tracing when we dump.
6464          * We don't need to show trace output of what happens
6465          * between multiple crashes.
6466          *
6467          * If the user does a sysrq-z, then they can re-enable
6468          * tracing with echo 1 > tracing_on.
6469          */
6470         tracing_off();
6471
6472         local_irq_save(flags);
6473
6474         /* Simulate the iterator */
6475         trace_init_global_iter(&iter);
6476
6477         for_each_tracing_cpu(cpu) {
6478                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6479         }
6480
6481         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6482
6483         /* don't look at user memory in panic mode */
6484         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6485
6486         switch (oops_dump_mode) {
6487         case DUMP_ALL:
6488                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6489                 break;
6490         case DUMP_ORIG:
6491                 iter.cpu_file = raw_smp_processor_id();
6492                 break;
6493         case DUMP_NONE:
6494                 goto out_enable;
6495         default:
6496                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6497                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6498         }
6499
6500         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6501
6502         /* Did function tracer already get disabled? */
6503         if (ftrace_is_dead()) {
6504                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6505                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6506         }
6507
6508         /*
6509          * We need to stop all tracing on all CPUS to read the
6510          * the next buffer. This is a bit expensive, but is
6511          * not done often. We fill all what we can read,
6512          * and then release the locks again.
6513          */
6514
6515         while (!trace_empty(&iter)) {
6516
6517                 if (!cnt)
6518                         printk(KERN_TRACE "---------------------------------\n");
6519
6520                 cnt++;
6521
6522                 /* reset all but tr, trace, and overruns */
6523                 memset(&iter.seq, 0,
6524                        sizeof(struct trace_iterator) -
6525                        offsetof(struct trace_iterator, seq));
6526                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6527                 iter.pos = -1;
6528
6529                 if (trace_find_next_entry_inc(&iter) != NULL) {
6530                         int ret;
6531
6532                         ret = print_trace_line(&iter);
6533                         if (ret != TRACE_TYPE_NO_CONSUME)
6534                                 trace_consume(&iter);
6535                 }
6536                 touch_nmi_watchdog();
6537
6538                 trace_printk_seq(&iter.seq);
6539         }
6540
6541         if (!cnt)
6542                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6543         else
6544                 printk(KERN_TRACE "---------------------------------\n");
6545
6546  out_enable:
6547         trace_flags |= old_userobj;
6548
6549         for_each_tracing_cpu(cpu) {
6550                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6551         }
6552         atomic_dec(&dump_running);
6553         local_irq_restore(flags);
6554 }
6555 EXPORT_SYMBOL_GPL(ftrace_dump);
6556
6557 __init static int tracer_alloc_buffers(void)
6558 {
6559         int ring_buf_size;
6560         int ret = -ENOMEM;
6561
6562
6563         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6564                 goto out;
6565
6566         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6567                 goto out_free_buffer_mask;
6568
6569         /* Only allocate trace_printk buffers if a trace_printk exists */
6570         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6571                 /* Must be called before global_trace.buffer is allocated */
6572                 trace_printk_init_buffers();
6573
6574         /* To save memory, keep the ring buffer size to its minimum */
6575         if (ring_buffer_expanded)
6576                 ring_buf_size = trace_buf_size;
6577         else
6578                 ring_buf_size = 1;
6579
6580         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6581         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6582
6583         raw_spin_lock_init(&global_trace.start_lock);
6584
6585         /* Used for event triggers */
6586         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6587         if (!temp_buffer)
6588                 goto out_free_cpumask;
6589
6590         /* TODO: make the number of buffers hot pluggable with CPUS */
6591         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6592                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6593                 WARN_ON(1);
6594                 goto out_free_temp_buffer;
6595         }
6596
6597         if (global_trace.buffer_disabled)
6598                 tracing_off();
6599
6600         trace_init_cmdlines();
6601
6602         if (trace_boot_clock) {
6603                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6604                 if (ret < 0)
6605                         pr_warning("Trace clock %s not defined, going back to default\n",
6606                                    trace_boot_clock);
6607         }
6608
6609         /*
6610          * register_tracer() might reference current_trace, so it
6611          * needs to be set before we register anything. This is
6612          * just a bootstrap of current_trace anyway.
6613          */
6614         global_trace.current_trace = &nop_trace;
6615
6616         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6617
6618         ftrace_init_global_array_ops(&global_trace);
6619
6620         register_tracer(&nop_trace);
6621
6622         /* All seems OK, enable tracing */
6623         tracing_disabled = 0;
6624
6625         atomic_notifier_chain_register(&panic_notifier_list,
6626                                        &trace_panic_notifier);
6627
6628         register_die_notifier(&trace_die_notifier);
6629
6630         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6631
6632         INIT_LIST_HEAD(&global_trace.systems);
6633         INIT_LIST_HEAD(&global_trace.events);
6634         list_add(&global_trace.list, &ftrace_trace_arrays);
6635
6636         while (trace_boot_options) {
6637                 char *option;
6638
6639                 option = strsep(&trace_boot_options, ",");
6640                 trace_set_options(&global_trace, option);
6641         }
6642
6643         register_snapshot_cmd();
6644
6645         return 0;
6646
6647 out_free_temp_buffer:
6648         ring_buffer_free(temp_buffer);
6649 out_free_cpumask:
6650         free_percpu(global_trace.trace_buffer.data);
6651 #ifdef CONFIG_TRACER_MAX_TRACE
6652         free_percpu(global_trace.max_buffer.data);
6653 #endif
6654         free_cpumask_var(global_trace.tracing_cpumask);
6655 out_free_buffer_mask:
6656         free_cpumask_var(tracing_buffer_mask);
6657 out:
6658         return ret;
6659 }
6660
6661 __init static int clear_boot_tracer(void)
6662 {
6663         /*
6664          * The default tracer at boot buffer is an init section.
6665          * This function is called in lateinit. If we did not
6666          * find the boot tracer, then clear it out, to prevent
6667          * later registration from accessing the buffer that is
6668          * about to be freed.
6669          */
6670         if (!default_bootup_tracer)
6671                 return 0;
6672
6673         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6674                default_bootup_tracer);
6675         default_bootup_tracer = NULL;
6676
6677         return 0;
6678 }
6679
6680 early_initcall(tracer_alloc_buffers);
6681 fs_initcall(tracer_init_debugfs);
6682 late_initcall(clear_boot_tracer);