tracing: Update instance_rmdir() to use tracefs_remove_recursive
[firefly-linux-kernel-4.4.55.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static struct tracer_flags dummy_tracer_flags = {
78         .val = 0,
79         .opts = dummy_tracer_opt
80 };
81
82 static int
83 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
84 {
85         return 0;
86 }
87
88 /*
89  * To prevent the comm cache from being overwritten when no
90  * tracing is active, only save the comm when a trace event
91  * occurred.
92  */
93 static DEFINE_PER_CPU(bool, trace_cmdline_save);
94
95 /*
96  * Kill all tracing for good (never come back).
97  * It is initialized to 1 but will turn to zero if the initialization
98  * of the tracer is successful. But that is the only place that sets
99  * this back to zero.
100  */
101 static int tracing_disabled = 1;
102
103 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
104
105 cpumask_var_t __read_mostly     tracing_buffer_mask;
106
107 /*
108  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
109  *
110  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
111  * is set, then ftrace_dump is called. This will output the contents
112  * of the ftrace buffers to the console.  This is very useful for
113  * capturing traces that lead to crashes and outputing it to a
114  * serial console.
115  *
116  * It is default off, but you can enable it with either specifying
117  * "ftrace_dump_on_oops" in the kernel command line, or setting
118  * /proc/sys/kernel/ftrace_dump_on_oops
119  * Set 1 if you want to dump buffers of all CPUs
120  * Set 2 if you want to dump the buffer of the CPU that triggered oops
121  */
122
123 enum ftrace_dump_mode ftrace_dump_on_oops;
124
125 /* When set, tracing will stop when a WARN*() is hit */
126 int __disable_trace_on_warning;
127
128 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
129 /* Map of enums to their values, for "enum_map" file */
130 struct trace_enum_map_head {
131         struct module                   *mod;
132         unsigned long                   length;
133 };
134
135 union trace_enum_map_item;
136
137 struct trace_enum_map_tail {
138         /*
139          * "end" is first and points to NULL as it must be different
140          * than "mod" or "enum_string"
141          */
142         union trace_enum_map_item       *next;
143         const char                      *end;   /* points to NULL */
144 };
145
146 static DEFINE_MUTEX(trace_enum_mutex);
147
148 /*
149  * The trace_enum_maps are saved in an array with two extra elements,
150  * one at the beginning, and one at the end. The beginning item contains
151  * the count of the saved maps (head.length), and the module they
152  * belong to if not built in (head.mod). The ending item contains a
153  * pointer to the next array of saved enum_map items.
154  */
155 union trace_enum_map_item {
156         struct trace_enum_map           map;
157         struct trace_enum_map_head      head;
158         struct trace_enum_map_tail      tail;
159 };
160
161 static union trace_enum_map_item *trace_enum_maps;
162 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
163
164 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
165
166 #define MAX_TRACER_SIZE         100
167 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
168 static char *default_bootup_tracer;
169
170 static bool allocate_snapshot;
171
172 static int __init set_cmdline_ftrace(char *str)
173 {
174         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
175         default_bootup_tracer = bootup_tracer_buf;
176         /* We are using ftrace early, expand it */
177         ring_buffer_expanded = true;
178         return 1;
179 }
180 __setup("ftrace=", set_cmdline_ftrace);
181
182 static int __init set_ftrace_dump_on_oops(char *str)
183 {
184         if (*str++ != '=' || !*str) {
185                 ftrace_dump_on_oops = DUMP_ALL;
186                 return 1;
187         }
188
189         if (!strcmp("orig_cpu", str)) {
190                 ftrace_dump_on_oops = DUMP_ORIG;
191                 return 1;
192         }
193
194         return 0;
195 }
196 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
197
198 static int __init stop_trace_on_warning(char *str)
199 {
200         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
201                 __disable_trace_on_warning = 1;
202         return 1;
203 }
204 __setup("traceoff_on_warning", stop_trace_on_warning);
205
206 static int __init boot_alloc_snapshot(char *str)
207 {
208         allocate_snapshot = true;
209         /* We also need the main ring buffer expanded */
210         ring_buffer_expanded = true;
211         return 1;
212 }
213 __setup("alloc_snapshot", boot_alloc_snapshot);
214
215
216 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
217 static char *trace_boot_options __initdata;
218
219 static int __init set_trace_boot_options(char *str)
220 {
221         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
222         trace_boot_options = trace_boot_options_buf;
223         return 0;
224 }
225 __setup("trace_options=", set_trace_boot_options);
226
227 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
228 static char *trace_boot_clock __initdata;
229
230 static int __init set_trace_boot_clock(char *str)
231 {
232         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
233         trace_boot_clock = trace_boot_clock_buf;
234         return 0;
235 }
236 __setup("trace_clock=", set_trace_boot_clock);
237
238 static int __init set_tracepoint_printk(char *str)
239 {
240         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
241                 tracepoint_printk = 1;
242         return 1;
243 }
244 __setup("tp_printk", set_tracepoint_printk);
245
246 unsigned long long ns2usecs(cycle_t nsec)
247 {
248         nsec += 500;
249         do_div(nsec, 1000);
250         return nsec;
251 }
252
253 /* trace_flags holds trace_options default values */
254 #define TRACE_DEFAULT_FLAGS                                             \
255         (FUNCTION_DEFAULT_FLAGS |                                       \
256          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
257          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
258          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
259          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
260
261 /* trace_options that are only supported by global_trace */
262 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
263                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
264
265
266 /*
267  * The global_trace is the descriptor that holds the tracing
268  * buffers for the live tracing. For each CPU, it contains
269  * a link list of pages that will store trace entries. The
270  * page descriptor of the pages in the memory is used to hold
271  * the link list by linking the lru item in the page descriptor
272  * to each of the pages in the buffer per CPU.
273  *
274  * For each active CPU there is a data field that holds the
275  * pages for the buffer for that CPU. Each CPU has the same number
276  * of pages allocated for its buffer.
277  */
278 static struct trace_array global_trace = {
279         .trace_flags = TRACE_DEFAULT_FLAGS,
280 };
281
282 LIST_HEAD(ftrace_trace_arrays);
283
284 int trace_array_get(struct trace_array *this_tr)
285 {
286         struct trace_array *tr;
287         int ret = -ENODEV;
288
289         mutex_lock(&trace_types_lock);
290         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
291                 if (tr == this_tr) {
292                         tr->ref++;
293                         ret = 0;
294                         break;
295                 }
296         }
297         mutex_unlock(&trace_types_lock);
298
299         return ret;
300 }
301
302 static void __trace_array_put(struct trace_array *this_tr)
303 {
304         WARN_ON(!this_tr->ref);
305         this_tr->ref--;
306 }
307
308 void trace_array_put(struct trace_array *this_tr)
309 {
310         mutex_lock(&trace_types_lock);
311         __trace_array_put(this_tr);
312         mutex_unlock(&trace_types_lock);
313 }
314
315 int filter_check_discard(struct trace_event_file *file, void *rec,
316                          struct ring_buffer *buffer,
317                          struct ring_buffer_event *event)
318 {
319         if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
320             !filter_match_preds(file->filter, rec)) {
321                 ring_buffer_discard_commit(buffer, event);
322                 return 1;
323         }
324
325         return 0;
326 }
327 EXPORT_SYMBOL_GPL(filter_check_discard);
328
329 int call_filter_check_discard(struct trace_event_call *call, void *rec,
330                               struct ring_buffer *buffer,
331                               struct ring_buffer_event *event)
332 {
333         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
334             !filter_match_preds(call->filter, rec)) {
335                 ring_buffer_discard_commit(buffer, event);
336                 return 1;
337         }
338
339         return 0;
340 }
341 EXPORT_SYMBOL_GPL(call_filter_check_discard);
342
343 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
344 {
345         u64 ts;
346
347         /* Early boot up does not have a buffer yet */
348         if (!buf->buffer)
349                 return trace_clock_local();
350
351         ts = ring_buffer_time_stamp(buf->buffer, cpu);
352         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
353
354         return ts;
355 }
356
357 cycle_t ftrace_now(int cpu)
358 {
359         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
360 }
361
362 /**
363  * tracing_is_enabled - Show if global_trace has been disabled
364  *
365  * Shows if the global trace has been enabled or not. It uses the
366  * mirror flag "buffer_disabled" to be used in fast paths such as for
367  * the irqsoff tracer. But it may be inaccurate due to races. If you
368  * need to know the accurate state, use tracing_is_on() which is a little
369  * slower, but accurate.
370  */
371 int tracing_is_enabled(void)
372 {
373         /*
374          * For quick access (irqsoff uses this in fast path), just
375          * return the mirror variable of the state of the ring buffer.
376          * It's a little racy, but we don't really care.
377          */
378         smp_rmb();
379         return !global_trace.buffer_disabled;
380 }
381
382 /*
383  * trace_buf_size is the size in bytes that is allocated
384  * for a buffer. Note, the number of bytes is always rounded
385  * to page size.
386  *
387  * This number is purposely set to a low number of 16384.
388  * If the dump on oops happens, it will be much appreciated
389  * to not have to wait for all that output. Anyway this can be
390  * boot time and run time configurable.
391  */
392 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
393
394 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
395
396 /* trace_types holds a link list of available tracers. */
397 static struct tracer            *trace_types __read_mostly;
398
399 /*
400  * trace_types_lock is used to protect the trace_types list.
401  */
402 DEFINE_MUTEX(trace_types_lock);
403
404 /*
405  * serialize the access of the ring buffer
406  *
407  * ring buffer serializes readers, but it is low level protection.
408  * The validity of the events (which returns by ring_buffer_peek() ..etc)
409  * are not protected by ring buffer.
410  *
411  * The content of events may become garbage if we allow other process consumes
412  * these events concurrently:
413  *   A) the page of the consumed events may become a normal page
414  *      (not reader page) in ring buffer, and this page will be rewrited
415  *      by events producer.
416  *   B) The page of the consumed events may become a page for splice_read,
417  *      and this page will be returned to system.
418  *
419  * These primitives allow multi process access to different cpu ring buffer
420  * concurrently.
421  *
422  * These primitives don't distinguish read-only and read-consume access.
423  * Multi read-only access are also serialized.
424  */
425
426 #ifdef CONFIG_SMP
427 static DECLARE_RWSEM(all_cpu_access_lock);
428 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
429
430 static inline void trace_access_lock(int cpu)
431 {
432         if (cpu == RING_BUFFER_ALL_CPUS) {
433                 /* gain it for accessing the whole ring buffer. */
434                 down_write(&all_cpu_access_lock);
435         } else {
436                 /* gain it for accessing a cpu ring buffer. */
437
438                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
439                 down_read(&all_cpu_access_lock);
440
441                 /* Secondly block other access to this @cpu ring buffer. */
442                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
443         }
444 }
445
446 static inline void trace_access_unlock(int cpu)
447 {
448         if (cpu == RING_BUFFER_ALL_CPUS) {
449                 up_write(&all_cpu_access_lock);
450         } else {
451                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
452                 up_read(&all_cpu_access_lock);
453         }
454 }
455
456 static inline void trace_access_lock_init(void)
457 {
458         int cpu;
459
460         for_each_possible_cpu(cpu)
461                 mutex_init(&per_cpu(cpu_access_lock, cpu));
462 }
463
464 #else
465
466 static DEFINE_MUTEX(access_lock);
467
468 static inline void trace_access_lock(int cpu)
469 {
470         (void)cpu;
471         mutex_lock(&access_lock);
472 }
473
474 static inline void trace_access_unlock(int cpu)
475 {
476         (void)cpu;
477         mutex_unlock(&access_lock);
478 }
479
480 static inline void trace_access_lock_init(void)
481 {
482 }
483
484 #endif
485
486 #ifdef CONFIG_STACKTRACE
487 static void __ftrace_trace_stack(struct ring_buffer *buffer,
488                                  unsigned long flags,
489                                  int skip, int pc, struct pt_regs *regs);
490 static inline void ftrace_trace_stack(struct trace_array *tr,
491                                       struct ring_buffer *buffer,
492                                       unsigned long flags,
493                                       int skip, int pc, struct pt_regs *regs);
494
495 #else
496 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
497                                         unsigned long flags,
498                                         int skip, int pc, struct pt_regs *regs)
499 {
500 }
501 static inline void ftrace_trace_stack(struct trace_array *tr,
502                                       struct ring_buffer *buffer,
503                                       unsigned long flags,
504                                       int skip, int pc, struct pt_regs *regs)
505 {
506 }
507
508 #endif
509
510 static void tracer_tracing_on(struct trace_array *tr)
511 {
512         if (tr->trace_buffer.buffer)
513                 ring_buffer_record_on(tr->trace_buffer.buffer);
514         /*
515          * This flag is looked at when buffers haven't been allocated
516          * yet, or by some tracers (like irqsoff), that just want to
517          * know if the ring buffer has been disabled, but it can handle
518          * races of where it gets disabled but we still do a record.
519          * As the check is in the fast path of the tracers, it is more
520          * important to be fast than accurate.
521          */
522         tr->buffer_disabled = 0;
523         /* Make the flag seen by readers */
524         smp_wmb();
525 }
526
527 /**
528  * tracing_on - enable tracing buffers
529  *
530  * This function enables tracing buffers that may have been
531  * disabled with tracing_off.
532  */
533 void tracing_on(void)
534 {
535         tracer_tracing_on(&global_trace);
536 }
537 EXPORT_SYMBOL_GPL(tracing_on);
538
539 /**
540  * __trace_puts - write a constant string into the trace buffer.
541  * @ip:    The address of the caller
542  * @str:   The constant string to write
543  * @size:  The size of the string.
544  */
545 int __trace_puts(unsigned long ip, const char *str, int size)
546 {
547         struct ring_buffer_event *event;
548         struct ring_buffer *buffer;
549         struct print_entry *entry;
550         unsigned long irq_flags;
551         int alloc;
552         int pc;
553
554         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
555                 return 0;
556
557         pc = preempt_count();
558
559         if (unlikely(tracing_selftest_running || tracing_disabled))
560                 return 0;
561
562         alloc = sizeof(*entry) + size + 2; /* possible \n added */
563
564         local_save_flags(irq_flags);
565         buffer = global_trace.trace_buffer.buffer;
566         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
567                                           irq_flags, pc);
568         if (!event)
569                 return 0;
570
571         entry = ring_buffer_event_data(event);
572         entry->ip = ip;
573
574         memcpy(&entry->buf, str, size);
575
576         /* Add a newline if necessary */
577         if (entry->buf[size - 1] != '\n') {
578                 entry->buf[size] = '\n';
579                 entry->buf[size + 1] = '\0';
580         } else
581                 entry->buf[size] = '\0';
582
583         __buffer_unlock_commit(buffer, event);
584         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
585
586         return size;
587 }
588 EXPORT_SYMBOL_GPL(__trace_puts);
589
590 /**
591  * __trace_bputs - write the pointer to a constant string into trace buffer
592  * @ip:    The address of the caller
593  * @str:   The constant string to write to the buffer to
594  */
595 int __trace_bputs(unsigned long ip, const char *str)
596 {
597         struct ring_buffer_event *event;
598         struct ring_buffer *buffer;
599         struct bputs_entry *entry;
600         unsigned long irq_flags;
601         int size = sizeof(struct bputs_entry);
602         int pc;
603
604         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
605                 return 0;
606
607         pc = preempt_count();
608
609         if (unlikely(tracing_selftest_running || tracing_disabled))
610                 return 0;
611
612         local_save_flags(irq_flags);
613         buffer = global_trace.trace_buffer.buffer;
614         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
615                                           irq_flags, pc);
616         if (!event)
617                 return 0;
618
619         entry = ring_buffer_event_data(event);
620         entry->ip                       = ip;
621         entry->str                      = str;
622
623         __buffer_unlock_commit(buffer, event);
624         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
625
626         return 1;
627 }
628 EXPORT_SYMBOL_GPL(__trace_bputs);
629
630 #ifdef CONFIG_TRACER_SNAPSHOT
631 /**
632  * trace_snapshot - take a snapshot of the current buffer.
633  *
634  * This causes a swap between the snapshot buffer and the current live
635  * tracing buffer. You can use this to take snapshots of the live
636  * trace when some condition is triggered, but continue to trace.
637  *
638  * Note, make sure to allocate the snapshot with either
639  * a tracing_snapshot_alloc(), or by doing it manually
640  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
641  *
642  * If the snapshot buffer is not allocated, it will stop tracing.
643  * Basically making a permanent snapshot.
644  */
645 void tracing_snapshot(void)
646 {
647         struct trace_array *tr = &global_trace;
648         struct tracer *tracer = tr->current_trace;
649         unsigned long flags;
650
651         if (in_nmi()) {
652                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
653                 internal_trace_puts("*** snapshot is being ignored        ***\n");
654                 return;
655         }
656
657         if (!tr->allocated_snapshot) {
658                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
659                 internal_trace_puts("*** stopping trace here!   ***\n");
660                 tracing_off();
661                 return;
662         }
663
664         /* Note, snapshot can not be used when the tracer uses it */
665         if (tracer->use_max_tr) {
666                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
667                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
668                 return;
669         }
670
671         local_irq_save(flags);
672         update_max_tr(tr, current, smp_processor_id());
673         local_irq_restore(flags);
674 }
675 EXPORT_SYMBOL_GPL(tracing_snapshot);
676
677 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
678                                         struct trace_buffer *size_buf, int cpu_id);
679 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
680
681 static int alloc_snapshot(struct trace_array *tr)
682 {
683         int ret;
684
685         if (!tr->allocated_snapshot) {
686
687                 /* allocate spare buffer */
688                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
689                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
690                 if (ret < 0)
691                         return ret;
692
693                 tr->allocated_snapshot = true;
694         }
695
696         return 0;
697 }
698
699 static void free_snapshot(struct trace_array *tr)
700 {
701         /*
702          * We don't free the ring buffer. instead, resize it because
703          * The max_tr ring buffer has some state (e.g. ring->clock) and
704          * we want preserve it.
705          */
706         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
707         set_buffer_entries(&tr->max_buffer, 1);
708         tracing_reset_online_cpus(&tr->max_buffer);
709         tr->allocated_snapshot = false;
710 }
711
712 /**
713  * tracing_alloc_snapshot - allocate snapshot buffer.
714  *
715  * This only allocates the snapshot buffer if it isn't already
716  * allocated - it doesn't also take a snapshot.
717  *
718  * This is meant to be used in cases where the snapshot buffer needs
719  * to be set up for events that can't sleep but need to be able to
720  * trigger a snapshot.
721  */
722 int tracing_alloc_snapshot(void)
723 {
724         struct trace_array *tr = &global_trace;
725         int ret;
726
727         ret = alloc_snapshot(tr);
728         WARN_ON(ret < 0);
729
730         return ret;
731 }
732 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
733
734 /**
735  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
736  *
737  * This is similar to trace_snapshot(), but it will allocate the
738  * snapshot buffer if it isn't already allocated. Use this only
739  * where it is safe to sleep, as the allocation may sleep.
740  *
741  * This causes a swap between the snapshot buffer and the current live
742  * tracing buffer. You can use this to take snapshots of the live
743  * trace when some condition is triggered, but continue to trace.
744  */
745 void tracing_snapshot_alloc(void)
746 {
747         int ret;
748
749         ret = tracing_alloc_snapshot();
750         if (ret < 0)
751                 return;
752
753         tracing_snapshot();
754 }
755 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
756 #else
757 void tracing_snapshot(void)
758 {
759         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
760 }
761 EXPORT_SYMBOL_GPL(tracing_snapshot);
762 int tracing_alloc_snapshot(void)
763 {
764         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
765         return -ENODEV;
766 }
767 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
768 void tracing_snapshot_alloc(void)
769 {
770         /* Give warning */
771         tracing_snapshot();
772 }
773 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
774 #endif /* CONFIG_TRACER_SNAPSHOT */
775
776 static void tracer_tracing_off(struct trace_array *tr)
777 {
778         if (tr->trace_buffer.buffer)
779                 ring_buffer_record_off(tr->trace_buffer.buffer);
780         /*
781          * This flag is looked at when buffers haven't been allocated
782          * yet, or by some tracers (like irqsoff), that just want to
783          * know if the ring buffer has been disabled, but it can handle
784          * races of where it gets disabled but we still do a record.
785          * As the check is in the fast path of the tracers, it is more
786          * important to be fast than accurate.
787          */
788         tr->buffer_disabled = 1;
789         /* Make the flag seen by readers */
790         smp_wmb();
791 }
792
793 /**
794  * tracing_off - turn off tracing buffers
795  *
796  * This function stops the tracing buffers from recording data.
797  * It does not disable any overhead the tracers themselves may
798  * be causing. This function simply causes all recording to
799  * the ring buffers to fail.
800  */
801 void tracing_off(void)
802 {
803         tracer_tracing_off(&global_trace);
804 }
805 EXPORT_SYMBOL_GPL(tracing_off);
806
807 void disable_trace_on_warning(void)
808 {
809         if (__disable_trace_on_warning)
810                 tracing_off();
811 }
812
813 /**
814  * tracer_tracing_is_on - show real state of ring buffer enabled
815  * @tr : the trace array to know if ring buffer is enabled
816  *
817  * Shows real state of the ring buffer if it is enabled or not.
818  */
819 static int tracer_tracing_is_on(struct trace_array *tr)
820 {
821         if (tr->trace_buffer.buffer)
822                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
823         return !tr->buffer_disabled;
824 }
825
826 /**
827  * tracing_is_on - show state of ring buffers enabled
828  */
829 int tracing_is_on(void)
830 {
831         return tracer_tracing_is_on(&global_trace);
832 }
833 EXPORT_SYMBOL_GPL(tracing_is_on);
834
835 static int __init set_buf_size(char *str)
836 {
837         unsigned long buf_size;
838
839         if (!str)
840                 return 0;
841         buf_size = memparse(str, &str);
842         /* nr_entries can not be zero */
843         if (buf_size == 0)
844                 return 0;
845         trace_buf_size = buf_size;
846         return 1;
847 }
848 __setup("trace_buf_size=", set_buf_size);
849
850 static int __init set_tracing_thresh(char *str)
851 {
852         unsigned long threshold;
853         int ret;
854
855         if (!str)
856                 return 0;
857         ret = kstrtoul(str, 0, &threshold);
858         if (ret < 0)
859                 return 0;
860         tracing_thresh = threshold * 1000;
861         return 1;
862 }
863 __setup("tracing_thresh=", set_tracing_thresh);
864
865 unsigned long nsecs_to_usecs(unsigned long nsecs)
866 {
867         return nsecs / 1000;
868 }
869
870 /*
871  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
872  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
873  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
874  * of strings in the order that the enums were defined.
875  */
876 #undef C
877 #define C(a, b) b
878
879 /* These must match the bit postions in trace_iterator_flags */
880 static const char *trace_options[] = {
881         TRACE_FLAGS
882         NULL
883 };
884
885 static struct {
886         u64 (*func)(void);
887         const char *name;
888         int in_ns;              /* is this clock in nanoseconds? */
889 } trace_clocks[] = {
890         { trace_clock_local,            "local",        1 },
891         { trace_clock_global,           "global",       1 },
892         { trace_clock_counter,          "counter",      0 },
893         { trace_clock_jiffies,          "uptime",       0 },
894         { trace_clock,                  "perf",         1 },
895         { ktime_get_mono_fast_ns,       "mono",         1 },
896         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
897         ARCH_TRACE_CLOCKS
898 };
899
900 /*
901  * trace_parser_get_init - gets the buffer for trace parser
902  */
903 int trace_parser_get_init(struct trace_parser *parser, int size)
904 {
905         memset(parser, 0, sizeof(*parser));
906
907         parser->buffer = kmalloc(size, GFP_KERNEL);
908         if (!parser->buffer)
909                 return 1;
910
911         parser->size = size;
912         return 0;
913 }
914
915 /*
916  * trace_parser_put - frees the buffer for trace parser
917  */
918 void trace_parser_put(struct trace_parser *parser)
919 {
920         kfree(parser->buffer);
921 }
922
923 /*
924  * trace_get_user - reads the user input string separated by  space
925  * (matched by isspace(ch))
926  *
927  * For each string found the 'struct trace_parser' is updated,
928  * and the function returns.
929  *
930  * Returns number of bytes read.
931  *
932  * See kernel/trace/trace.h for 'struct trace_parser' details.
933  */
934 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
935         size_t cnt, loff_t *ppos)
936 {
937         char ch;
938         size_t read = 0;
939         ssize_t ret;
940
941         if (!*ppos)
942                 trace_parser_clear(parser);
943
944         ret = get_user(ch, ubuf++);
945         if (ret)
946                 goto out;
947
948         read++;
949         cnt--;
950
951         /*
952          * The parser is not finished with the last write,
953          * continue reading the user input without skipping spaces.
954          */
955         if (!parser->cont) {
956                 /* skip white space */
957                 while (cnt && isspace(ch)) {
958                         ret = get_user(ch, ubuf++);
959                         if (ret)
960                                 goto out;
961                         read++;
962                         cnt--;
963                 }
964
965                 /* only spaces were written */
966                 if (isspace(ch)) {
967                         *ppos += read;
968                         ret = read;
969                         goto out;
970                 }
971
972                 parser->idx = 0;
973         }
974
975         /* read the non-space input */
976         while (cnt && !isspace(ch)) {
977                 if (parser->idx < parser->size - 1)
978                         parser->buffer[parser->idx++] = ch;
979                 else {
980                         ret = -EINVAL;
981                         goto out;
982                 }
983                 ret = get_user(ch, ubuf++);
984                 if (ret)
985                         goto out;
986                 read++;
987                 cnt--;
988         }
989
990         /* We either got finished input or we have to wait for another call. */
991         if (isspace(ch)) {
992                 parser->buffer[parser->idx] = 0;
993                 parser->cont = false;
994         } else if (parser->idx < parser->size - 1) {
995                 parser->cont = true;
996                 parser->buffer[parser->idx++] = ch;
997         } else {
998                 ret = -EINVAL;
999                 goto out;
1000         }
1001
1002         *ppos += read;
1003         ret = read;
1004
1005 out:
1006         return ret;
1007 }
1008
1009 /* TODO add a seq_buf_to_buffer() */
1010 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1011 {
1012         int len;
1013
1014         if (trace_seq_used(s) <= s->seq.readpos)
1015                 return -EBUSY;
1016
1017         len = trace_seq_used(s) - s->seq.readpos;
1018         if (cnt > len)
1019                 cnt = len;
1020         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1021
1022         s->seq.readpos += cnt;
1023         return cnt;
1024 }
1025
1026 unsigned long __read_mostly     tracing_thresh;
1027
1028 #ifdef CONFIG_TRACER_MAX_TRACE
1029 /*
1030  * Copy the new maximum trace into the separate maximum-trace
1031  * structure. (this way the maximum trace is permanently saved,
1032  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1033  */
1034 static void
1035 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1036 {
1037         struct trace_buffer *trace_buf = &tr->trace_buffer;
1038         struct trace_buffer *max_buf = &tr->max_buffer;
1039         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1040         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1041
1042         max_buf->cpu = cpu;
1043         max_buf->time_start = data->preempt_timestamp;
1044
1045         max_data->saved_latency = tr->max_latency;
1046         max_data->critical_start = data->critical_start;
1047         max_data->critical_end = data->critical_end;
1048
1049         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1050         max_data->pid = tsk->pid;
1051         /*
1052          * If tsk == current, then use current_uid(), as that does not use
1053          * RCU. The irq tracer can be called out of RCU scope.
1054          */
1055         if (tsk == current)
1056                 max_data->uid = current_uid();
1057         else
1058                 max_data->uid = task_uid(tsk);
1059
1060         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1061         max_data->policy = tsk->policy;
1062         max_data->rt_priority = tsk->rt_priority;
1063
1064         /* record this tasks comm */
1065         tracing_record_cmdline(tsk);
1066 }
1067
1068 /**
1069  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1070  * @tr: tracer
1071  * @tsk: the task with the latency
1072  * @cpu: The cpu that initiated the trace.
1073  *
1074  * Flip the buffers between the @tr and the max_tr and record information
1075  * about which task was the cause of this latency.
1076  */
1077 void
1078 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1079 {
1080         struct ring_buffer *buf;
1081
1082         if (tr->stop_count)
1083                 return;
1084
1085         WARN_ON_ONCE(!irqs_disabled());
1086
1087         if (!tr->allocated_snapshot) {
1088                 /* Only the nop tracer should hit this when disabling */
1089                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1090                 return;
1091         }
1092
1093         arch_spin_lock(&tr->max_lock);
1094
1095         buf = tr->trace_buffer.buffer;
1096         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1097         tr->max_buffer.buffer = buf;
1098
1099         __update_max_tr(tr, tsk, cpu);
1100         arch_spin_unlock(&tr->max_lock);
1101 }
1102
1103 /**
1104  * update_max_tr_single - only copy one trace over, and reset the rest
1105  * @tr - tracer
1106  * @tsk - task with the latency
1107  * @cpu - the cpu of the buffer to copy.
1108  *
1109  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1110  */
1111 void
1112 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1113 {
1114         int ret;
1115
1116         if (tr->stop_count)
1117                 return;
1118
1119         WARN_ON_ONCE(!irqs_disabled());
1120         if (!tr->allocated_snapshot) {
1121                 /* Only the nop tracer should hit this when disabling */
1122                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1123                 return;
1124         }
1125
1126         arch_spin_lock(&tr->max_lock);
1127
1128         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1129
1130         if (ret == -EBUSY) {
1131                 /*
1132                  * We failed to swap the buffer due to a commit taking
1133                  * place on this CPU. We fail to record, but we reset
1134                  * the max trace buffer (no one writes directly to it)
1135                  * and flag that it failed.
1136                  */
1137                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1138                         "Failed to swap buffers due to commit in progress\n");
1139         }
1140
1141         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1142
1143         __update_max_tr(tr, tsk, cpu);
1144         arch_spin_unlock(&tr->max_lock);
1145 }
1146 #endif /* CONFIG_TRACER_MAX_TRACE */
1147
1148 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1149 {
1150         /* Iterators are static, they should be filled or empty */
1151         if (trace_buffer_iter(iter, iter->cpu_file))
1152                 return 0;
1153
1154         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1155                                 full);
1156 }
1157
1158 #ifdef CONFIG_FTRACE_STARTUP_TEST
1159 static int run_tracer_selftest(struct tracer *type)
1160 {
1161         struct trace_array *tr = &global_trace;
1162         struct tracer *saved_tracer = tr->current_trace;
1163         int ret;
1164
1165         if (!type->selftest || tracing_selftest_disabled)
1166                 return 0;
1167
1168         /*
1169          * Run a selftest on this tracer.
1170          * Here we reset the trace buffer, and set the current
1171          * tracer to be this tracer. The tracer can then run some
1172          * internal tracing to verify that everything is in order.
1173          * If we fail, we do not register this tracer.
1174          */
1175         tracing_reset_online_cpus(&tr->trace_buffer);
1176
1177         tr->current_trace = type;
1178
1179 #ifdef CONFIG_TRACER_MAX_TRACE
1180         if (type->use_max_tr) {
1181                 /* If we expanded the buffers, make sure the max is expanded too */
1182                 if (ring_buffer_expanded)
1183                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1184                                            RING_BUFFER_ALL_CPUS);
1185                 tr->allocated_snapshot = true;
1186         }
1187 #endif
1188
1189         /* the test is responsible for initializing and enabling */
1190         pr_info("Testing tracer %s: ", type->name);
1191         ret = type->selftest(type, tr);
1192         /* the test is responsible for resetting too */
1193         tr->current_trace = saved_tracer;
1194         if (ret) {
1195                 printk(KERN_CONT "FAILED!\n");
1196                 /* Add the warning after printing 'FAILED' */
1197                 WARN_ON(1);
1198                 return -1;
1199         }
1200         /* Only reset on passing, to avoid touching corrupted buffers */
1201         tracing_reset_online_cpus(&tr->trace_buffer);
1202
1203 #ifdef CONFIG_TRACER_MAX_TRACE
1204         if (type->use_max_tr) {
1205                 tr->allocated_snapshot = false;
1206
1207                 /* Shrink the max buffer again */
1208                 if (ring_buffer_expanded)
1209                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1210                                            RING_BUFFER_ALL_CPUS);
1211         }
1212 #endif
1213
1214         printk(KERN_CONT "PASSED\n");
1215         return 0;
1216 }
1217 #else
1218 static inline int run_tracer_selftest(struct tracer *type)
1219 {
1220         return 0;
1221 }
1222 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1223
1224 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1225
1226 /**
1227  * register_tracer - register a tracer with the ftrace system.
1228  * @type - the plugin for the tracer
1229  *
1230  * Register a new plugin tracer.
1231  */
1232 int register_tracer(struct tracer *type)
1233 {
1234         struct tracer *t;
1235         int ret = 0;
1236
1237         if (!type->name) {
1238                 pr_info("Tracer must have a name\n");
1239                 return -1;
1240         }
1241
1242         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1243                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1244                 return -1;
1245         }
1246
1247         mutex_lock(&trace_types_lock);
1248
1249         tracing_selftest_running = true;
1250
1251         for (t = trace_types; t; t = t->next) {
1252                 if (strcmp(type->name, t->name) == 0) {
1253                         /* already found */
1254                         pr_info("Tracer %s already registered\n",
1255                                 type->name);
1256                         ret = -1;
1257                         goto out;
1258                 }
1259         }
1260
1261         if (!type->set_flag)
1262                 type->set_flag = &dummy_set_flag;
1263         if (!type->flags)
1264                 type->flags = &dummy_tracer_flags;
1265         else
1266                 if (!type->flags->opts)
1267                         type->flags->opts = dummy_tracer_opt;
1268
1269         ret = run_tracer_selftest(type);
1270         if (ret < 0)
1271                 goto out;
1272
1273         type->next = trace_types;
1274         trace_types = type;
1275         add_tracer_options(&global_trace, type);
1276
1277  out:
1278         tracing_selftest_running = false;
1279         mutex_unlock(&trace_types_lock);
1280
1281         if (ret || !default_bootup_tracer)
1282                 goto out_unlock;
1283
1284         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1285                 goto out_unlock;
1286
1287         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1288         /* Do we want this tracer to start on bootup? */
1289         tracing_set_tracer(&global_trace, type->name);
1290         default_bootup_tracer = NULL;
1291         /* disable other selftests, since this will break it. */
1292         tracing_selftest_disabled = true;
1293 #ifdef CONFIG_FTRACE_STARTUP_TEST
1294         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1295                type->name);
1296 #endif
1297
1298  out_unlock:
1299         return ret;
1300 }
1301
1302 void tracing_reset(struct trace_buffer *buf, int cpu)
1303 {
1304         struct ring_buffer *buffer = buf->buffer;
1305
1306         if (!buffer)
1307                 return;
1308
1309         ring_buffer_record_disable(buffer);
1310
1311         /* Make sure all commits have finished */
1312         synchronize_sched();
1313         ring_buffer_reset_cpu(buffer, cpu);
1314
1315         ring_buffer_record_enable(buffer);
1316 }
1317
1318 void tracing_reset_online_cpus(struct trace_buffer *buf)
1319 {
1320         struct ring_buffer *buffer = buf->buffer;
1321         int cpu;
1322
1323         if (!buffer)
1324                 return;
1325
1326         ring_buffer_record_disable(buffer);
1327
1328         /* Make sure all commits have finished */
1329         synchronize_sched();
1330
1331         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1332
1333         for_each_online_cpu(cpu)
1334                 ring_buffer_reset_cpu(buffer, cpu);
1335
1336         ring_buffer_record_enable(buffer);
1337 }
1338
1339 /* Must have trace_types_lock held */
1340 void tracing_reset_all_online_cpus(void)
1341 {
1342         struct trace_array *tr;
1343
1344         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1345                 tracing_reset_online_cpus(&tr->trace_buffer);
1346 #ifdef CONFIG_TRACER_MAX_TRACE
1347                 tracing_reset_online_cpus(&tr->max_buffer);
1348 #endif
1349         }
1350 }
1351
1352 #define SAVED_CMDLINES_DEFAULT 128
1353 #define NO_CMDLINE_MAP UINT_MAX
1354 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1355 struct saved_cmdlines_buffer {
1356         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1357         unsigned *map_cmdline_to_pid;
1358         unsigned cmdline_num;
1359         int cmdline_idx;
1360         char *saved_cmdlines;
1361 };
1362 static struct saved_cmdlines_buffer *savedcmd;
1363
1364 /* temporary disable recording */
1365 static atomic_t trace_record_cmdline_disabled __read_mostly;
1366
1367 static inline char *get_saved_cmdlines(int idx)
1368 {
1369         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1370 }
1371
1372 static inline void set_cmdline(int idx, const char *cmdline)
1373 {
1374         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1375 }
1376
1377 static int allocate_cmdlines_buffer(unsigned int val,
1378                                     struct saved_cmdlines_buffer *s)
1379 {
1380         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1381                                         GFP_KERNEL);
1382         if (!s->map_cmdline_to_pid)
1383                 return -ENOMEM;
1384
1385         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1386         if (!s->saved_cmdlines) {
1387                 kfree(s->map_cmdline_to_pid);
1388                 return -ENOMEM;
1389         }
1390
1391         s->cmdline_idx = 0;
1392         s->cmdline_num = val;
1393         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1394                sizeof(s->map_pid_to_cmdline));
1395         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1396                val * sizeof(*s->map_cmdline_to_pid));
1397
1398         return 0;
1399 }
1400
1401 static int trace_create_savedcmd(void)
1402 {
1403         int ret;
1404
1405         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1406         if (!savedcmd)
1407                 return -ENOMEM;
1408
1409         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1410         if (ret < 0) {
1411                 kfree(savedcmd);
1412                 savedcmd = NULL;
1413                 return -ENOMEM;
1414         }
1415
1416         return 0;
1417 }
1418
1419 int is_tracing_stopped(void)
1420 {
1421         return global_trace.stop_count;
1422 }
1423
1424 /**
1425  * tracing_start - quick start of the tracer
1426  *
1427  * If tracing is enabled but was stopped by tracing_stop,
1428  * this will start the tracer back up.
1429  */
1430 void tracing_start(void)
1431 {
1432         struct ring_buffer *buffer;
1433         unsigned long flags;
1434
1435         if (tracing_disabled)
1436                 return;
1437
1438         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1439         if (--global_trace.stop_count) {
1440                 if (global_trace.stop_count < 0) {
1441                         /* Someone screwed up their debugging */
1442                         WARN_ON_ONCE(1);
1443                         global_trace.stop_count = 0;
1444                 }
1445                 goto out;
1446         }
1447
1448         /* Prevent the buffers from switching */
1449         arch_spin_lock(&global_trace.max_lock);
1450
1451         buffer = global_trace.trace_buffer.buffer;
1452         if (buffer)
1453                 ring_buffer_record_enable(buffer);
1454
1455 #ifdef CONFIG_TRACER_MAX_TRACE
1456         buffer = global_trace.max_buffer.buffer;
1457         if (buffer)
1458                 ring_buffer_record_enable(buffer);
1459 #endif
1460
1461         arch_spin_unlock(&global_trace.max_lock);
1462
1463  out:
1464         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1465 }
1466
1467 static void tracing_start_tr(struct trace_array *tr)
1468 {
1469         struct ring_buffer *buffer;
1470         unsigned long flags;
1471
1472         if (tracing_disabled)
1473                 return;
1474
1475         /* If global, we need to also start the max tracer */
1476         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1477                 return tracing_start();
1478
1479         raw_spin_lock_irqsave(&tr->start_lock, flags);
1480
1481         if (--tr->stop_count) {
1482                 if (tr->stop_count < 0) {
1483                         /* Someone screwed up their debugging */
1484                         WARN_ON_ONCE(1);
1485                         tr->stop_count = 0;
1486                 }
1487                 goto out;
1488         }
1489
1490         buffer = tr->trace_buffer.buffer;
1491         if (buffer)
1492                 ring_buffer_record_enable(buffer);
1493
1494  out:
1495         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1496 }
1497
1498 /**
1499  * tracing_stop - quick stop of the tracer
1500  *
1501  * Light weight way to stop tracing. Use in conjunction with
1502  * tracing_start.
1503  */
1504 void tracing_stop(void)
1505 {
1506         struct ring_buffer *buffer;
1507         unsigned long flags;
1508
1509         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1510         if (global_trace.stop_count++)
1511                 goto out;
1512
1513         /* Prevent the buffers from switching */
1514         arch_spin_lock(&global_trace.max_lock);
1515
1516         buffer = global_trace.trace_buffer.buffer;
1517         if (buffer)
1518                 ring_buffer_record_disable(buffer);
1519
1520 #ifdef CONFIG_TRACER_MAX_TRACE
1521         buffer = global_trace.max_buffer.buffer;
1522         if (buffer)
1523                 ring_buffer_record_disable(buffer);
1524 #endif
1525
1526         arch_spin_unlock(&global_trace.max_lock);
1527
1528  out:
1529         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1530 }
1531
1532 static void tracing_stop_tr(struct trace_array *tr)
1533 {
1534         struct ring_buffer *buffer;
1535         unsigned long flags;
1536
1537         /* If global, we need to also stop the max tracer */
1538         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1539                 return tracing_stop();
1540
1541         raw_spin_lock_irqsave(&tr->start_lock, flags);
1542         if (tr->stop_count++)
1543                 goto out;
1544
1545         buffer = tr->trace_buffer.buffer;
1546         if (buffer)
1547                 ring_buffer_record_disable(buffer);
1548
1549  out:
1550         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1551 }
1552
1553 void trace_stop_cmdline_recording(void);
1554
1555 static int trace_save_cmdline(struct task_struct *tsk)
1556 {
1557         unsigned pid, idx;
1558
1559         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1560                 return 0;
1561
1562         /*
1563          * It's not the end of the world if we don't get
1564          * the lock, but we also don't want to spin
1565          * nor do we want to disable interrupts,
1566          * so if we miss here, then better luck next time.
1567          */
1568         if (!arch_spin_trylock(&trace_cmdline_lock))
1569                 return 0;
1570
1571         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1572         if (idx == NO_CMDLINE_MAP) {
1573                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1574
1575                 /*
1576                  * Check whether the cmdline buffer at idx has a pid
1577                  * mapped. We are going to overwrite that entry so we
1578                  * need to clear the map_pid_to_cmdline. Otherwise we
1579                  * would read the new comm for the old pid.
1580                  */
1581                 pid = savedcmd->map_cmdline_to_pid[idx];
1582                 if (pid != NO_CMDLINE_MAP)
1583                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1584
1585                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1586                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1587
1588                 savedcmd->cmdline_idx = idx;
1589         }
1590
1591         set_cmdline(idx, tsk->comm);
1592
1593         arch_spin_unlock(&trace_cmdline_lock);
1594
1595         return 1;
1596 }
1597
1598 static void __trace_find_cmdline(int pid, char comm[])
1599 {
1600         unsigned map;
1601
1602         if (!pid) {
1603                 strcpy(comm, "<idle>");
1604                 return;
1605         }
1606
1607         if (WARN_ON_ONCE(pid < 0)) {
1608                 strcpy(comm, "<XXX>");
1609                 return;
1610         }
1611
1612         if (pid > PID_MAX_DEFAULT) {
1613                 strcpy(comm, "<...>");
1614                 return;
1615         }
1616
1617         map = savedcmd->map_pid_to_cmdline[pid];
1618         if (map != NO_CMDLINE_MAP)
1619                 strcpy(comm, get_saved_cmdlines(map));
1620         else
1621                 strcpy(comm, "<...>");
1622 }
1623
1624 void trace_find_cmdline(int pid, char comm[])
1625 {
1626         preempt_disable();
1627         arch_spin_lock(&trace_cmdline_lock);
1628
1629         __trace_find_cmdline(pid, comm);
1630
1631         arch_spin_unlock(&trace_cmdline_lock);
1632         preempt_enable();
1633 }
1634
1635 void tracing_record_cmdline(struct task_struct *tsk)
1636 {
1637         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1638                 return;
1639
1640         if (!__this_cpu_read(trace_cmdline_save))
1641                 return;
1642
1643         if (trace_save_cmdline(tsk))
1644                 __this_cpu_write(trace_cmdline_save, false);
1645 }
1646
1647 void
1648 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1649                              int pc)
1650 {
1651         struct task_struct *tsk = current;
1652
1653         entry->preempt_count            = pc & 0xff;
1654         entry->pid                      = (tsk) ? tsk->pid : 0;
1655         entry->flags =
1656 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1657                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1658 #else
1659                 TRACE_FLAG_IRQS_NOSUPPORT |
1660 #endif
1661                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1662                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1663                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1664                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1665 }
1666 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1667
1668 struct ring_buffer_event *
1669 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1670                           int type,
1671                           unsigned long len,
1672                           unsigned long flags, int pc)
1673 {
1674         struct ring_buffer_event *event;
1675
1676         event = ring_buffer_lock_reserve(buffer, len);
1677         if (event != NULL) {
1678                 struct trace_entry *ent = ring_buffer_event_data(event);
1679
1680                 tracing_generic_entry_update(ent, flags, pc);
1681                 ent->type = type;
1682         }
1683
1684         return event;
1685 }
1686
1687 void
1688 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1689 {
1690         __this_cpu_write(trace_cmdline_save, true);
1691         ring_buffer_unlock_commit(buffer, event);
1692 }
1693
1694 void trace_buffer_unlock_commit(struct trace_array *tr,
1695                                 struct ring_buffer *buffer,
1696                                 struct ring_buffer_event *event,
1697                                 unsigned long flags, int pc)
1698 {
1699         __buffer_unlock_commit(buffer, event);
1700
1701         ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
1702         ftrace_trace_userstack(buffer, flags, pc);
1703 }
1704 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1705
1706 static struct ring_buffer *temp_buffer;
1707
1708 struct ring_buffer_event *
1709 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1710                           struct trace_event_file *trace_file,
1711                           int type, unsigned long len,
1712                           unsigned long flags, int pc)
1713 {
1714         struct ring_buffer_event *entry;
1715
1716         *current_rb = trace_file->tr->trace_buffer.buffer;
1717         entry = trace_buffer_lock_reserve(*current_rb,
1718                                          type, len, flags, pc);
1719         /*
1720          * If tracing is off, but we have triggers enabled
1721          * we still need to look at the event data. Use the temp_buffer
1722          * to store the trace event for the tigger to use. It's recusive
1723          * safe and will not be recorded anywhere.
1724          */
1725         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1726                 *current_rb = temp_buffer;
1727                 entry = trace_buffer_lock_reserve(*current_rb,
1728                                                   type, len, flags, pc);
1729         }
1730         return entry;
1731 }
1732 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1733
1734 struct ring_buffer_event *
1735 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1736                                   int type, unsigned long len,
1737                                   unsigned long flags, int pc)
1738 {
1739         *current_rb = global_trace.trace_buffer.buffer;
1740         return trace_buffer_lock_reserve(*current_rb,
1741                                          type, len, flags, pc);
1742 }
1743 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1744
1745 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1746                                      struct ring_buffer *buffer,
1747                                      struct ring_buffer_event *event,
1748                                      unsigned long flags, int pc,
1749                                      struct pt_regs *regs)
1750 {
1751         __buffer_unlock_commit(buffer, event);
1752
1753         ftrace_trace_stack(tr, buffer, flags, 6, pc, regs);
1754         ftrace_trace_userstack(buffer, flags, pc);
1755 }
1756 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1757
1758 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1759                                          struct ring_buffer_event *event)
1760 {
1761         ring_buffer_discard_commit(buffer, event);
1762 }
1763 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1764
1765 void
1766 trace_function(struct trace_array *tr,
1767                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1768                int pc)
1769 {
1770         struct trace_event_call *call = &event_function;
1771         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1772         struct ring_buffer_event *event;
1773         struct ftrace_entry *entry;
1774
1775         /* If we are reading the ring buffer, don't trace */
1776         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1777                 return;
1778
1779         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1780                                           flags, pc);
1781         if (!event)
1782                 return;
1783         entry   = ring_buffer_event_data(event);
1784         entry->ip                       = ip;
1785         entry->parent_ip                = parent_ip;
1786
1787         if (!call_filter_check_discard(call, entry, buffer, event))
1788                 __buffer_unlock_commit(buffer, event);
1789 }
1790
1791 #ifdef CONFIG_STACKTRACE
1792
1793 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1794 struct ftrace_stack {
1795         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1796 };
1797
1798 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1799 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1800
1801 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1802                                  unsigned long flags,
1803                                  int skip, int pc, struct pt_regs *regs)
1804 {
1805         struct trace_event_call *call = &event_kernel_stack;
1806         struct ring_buffer_event *event;
1807         struct stack_entry *entry;
1808         struct stack_trace trace;
1809         int use_stack;
1810         int size = FTRACE_STACK_ENTRIES;
1811
1812         trace.nr_entries        = 0;
1813         trace.skip              = skip;
1814
1815         /*
1816          * Since events can happen in NMIs there's no safe way to
1817          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1818          * or NMI comes in, it will just have to use the default
1819          * FTRACE_STACK_SIZE.
1820          */
1821         preempt_disable_notrace();
1822
1823         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1824         /*
1825          * We don't need any atomic variables, just a barrier.
1826          * If an interrupt comes in, we don't care, because it would
1827          * have exited and put the counter back to what we want.
1828          * We just need a barrier to keep gcc from moving things
1829          * around.
1830          */
1831         barrier();
1832         if (use_stack == 1) {
1833                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1834                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1835
1836                 if (regs)
1837                         save_stack_trace_regs(regs, &trace);
1838                 else
1839                         save_stack_trace(&trace);
1840
1841                 if (trace.nr_entries > size)
1842                         size = trace.nr_entries;
1843         } else
1844                 /* From now on, use_stack is a boolean */
1845                 use_stack = 0;
1846
1847         size *= sizeof(unsigned long);
1848
1849         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1850                                           sizeof(*entry) + size, flags, pc);
1851         if (!event)
1852                 goto out;
1853         entry = ring_buffer_event_data(event);
1854
1855         memset(&entry->caller, 0, size);
1856
1857         if (use_stack)
1858                 memcpy(&entry->caller, trace.entries,
1859                        trace.nr_entries * sizeof(unsigned long));
1860         else {
1861                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1862                 trace.entries           = entry->caller;
1863                 if (regs)
1864                         save_stack_trace_regs(regs, &trace);
1865                 else
1866                         save_stack_trace(&trace);
1867         }
1868
1869         entry->size = trace.nr_entries;
1870
1871         if (!call_filter_check_discard(call, entry, buffer, event))
1872                 __buffer_unlock_commit(buffer, event);
1873
1874  out:
1875         /* Again, don't let gcc optimize things here */
1876         barrier();
1877         __this_cpu_dec(ftrace_stack_reserve);
1878         preempt_enable_notrace();
1879
1880 }
1881
1882 static inline void ftrace_trace_stack(struct trace_array *tr,
1883                                       struct ring_buffer *buffer,
1884                                       unsigned long flags,
1885                                       int skip, int pc, struct pt_regs *regs)
1886 {
1887         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1888                 return;
1889
1890         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1891 }
1892
1893 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1894                    int pc)
1895 {
1896         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1897 }
1898
1899 /**
1900  * trace_dump_stack - record a stack back trace in the trace buffer
1901  * @skip: Number of functions to skip (helper handlers)
1902  */
1903 void trace_dump_stack(int skip)
1904 {
1905         unsigned long flags;
1906
1907         if (tracing_disabled || tracing_selftest_running)
1908                 return;
1909
1910         local_save_flags(flags);
1911
1912         /*
1913          * Skip 3 more, seems to get us at the caller of
1914          * this function.
1915          */
1916         skip += 3;
1917         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1918                              flags, skip, preempt_count(), NULL);
1919 }
1920
1921 static DEFINE_PER_CPU(int, user_stack_count);
1922
1923 void
1924 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1925 {
1926         struct trace_event_call *call = &event_user_stack;
1927         struct ring_buffer_event *event;
1928         struct userstack_entry *entry;
1929         struct stack_trace trace;
1930
1931         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
1932                 return;
1933
1934         /*
1935          * NMIs can not handle page faults, even with fix ups.
1936          * The save user stack can (and often does) fault.
1937          */
1938         if (unlikely(in_nmi()))
1939                 return;
1940
1941         /*
1942          * prevent recursion, since the user stack tracing may
1943          * trigger other kernel events.
1944          */
1945         preempt_disable();
1946         if (__this_cpu_read(user_stack_count))
1947                 goto out;
1948
1949         __this_cpu_inc(user_stack_count);
1950
1951         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1952                                           sizeof(*entry), flags, pc);
1953         if (!event)
1954                 goto out_drop_count;
1955         entry   = ring_buffer_event_data(event);
1956
1957         entry->tgid             = current->tgid;
1958         memset(&entry->caller, 0, sizeof(entry->caller));
1959
1960         trace.nr_entries        = 0;
1961         trace.max_entries       = FTRACE_STACK_ENTRIES;
1962         trace.skip              = 0;
1963         trace.entries           = entry->caller;
1964
1965         save_stack_trace_user(&trace);
1966         if (!call_filter_check_discard(call, entry, buffer, event))
1967                 __buffer_unlock_commit(buffer, event);
1968
1969  out_drop_count:
1970         __this_cpu_dec(user_stack_count);
1971  out:
1972         preempt_enable();
1973 }
1974
1975 #ifdef UNUSED
1976 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1977 {
1978         ftrace_trace_userstack(tr, flags, preempt_count());
1979 }
1980 #endif /* UNUSED */
1981
1982 #endif /* CONFIG_STACKTRACE */
1983
1984 /* created for use with alloc_percpu */
1985 struct trace_buffer_struct {
1986         char buffer[TRACE_BUF_SIZE];
1987 };
1988
1989 static struct trace_buffer_struct *trace_percpu_buffer;
1990 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1991 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1992 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1993
1994 /*
1995  * The buffer used is dependent on the context. There is a per cpu
1996  * buffer for normal context, softirq contex, hard irq context and
1997  * for NMI context. Thise allows for lockless recording.
1998  *
1999  * Note, if the buffers failed to be allocated, then this returns NULL
2000  */
2001 static char *get_trace_buf(void)
2002 {
2003         struct trace_buffer_struct *percpu_buffer;
2004
2005         /*
2006          * If we have allocated per cpu buffers, then we do not
2007          * need to do any locking.
2008          */
2009         if (in_nmi())
2010                 percpu_buffer = trace_percpu_nmi_buffer;
2011         else if (in_irq())
2012                 percpu_buffer = trace_percpu_irq_buffer;
2013         else if (in_softirq())
2014                 percpu_buffer = trace_percpu_sirq_buffer;
2015         else
2016                 percpu_buffer = trace_percpu_buffer;
2017
2018         if (!percpu_buffer)
2019                 return NULL;
2020
2021         return this_cpu_ptr(&percpu_buffer->buffer[0]);
2022 }
2023
2024 static int alloc_percpu_trace_buffer(void)
2025 {
2026         struct trace_buffer_struct *buffers;
2027         struct trace_buffer_struct *sirq_buffers;
2028         struct trace_buffer_struct *irq_buffers;
2029         struct trace_buffer_struct *nmi_buffers;
2030
2031         buffers = alloc_percpu(struct trace_buffer_struct);
2032         if (!buffers)
2033                 goto err_warn;
2034
2035         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2036         if (!sirq_buffers)
2037                 goto err_sirq;
2038
2039         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2040         if (!irq_buffers)
2041                 goto err_irq;
2042
2043         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2044         if (!nmi_buffers)
2045                 goto err_nmi;
2046
2047         trace_percpu_buffer = buffers;
2048         trace_percpu_sirq_buffer = sirq_buffers;
2049         trace_percpu_irq_buffer = irq_buffers;
2050         trace_percpu_nmi_buffer = nmi_buffers;
2051
2052         return 0;
2053
2054  err_nmi:
2055         free_percpu(irq_buffers);
2056  err_irq:
2057         free_percpu(sirq_buffers);
2058  err_sirq:
2059         free_percpu(buffers);
2060  err_warn:
2061         WARN(1, "Could not allocate percpu trace_printk buffer");
2062         return -ENOMEM;
2063 }
2064
2065 static int buffers_allocated;
2066
2067 void trace_printk_init_buffers(void)
2068 {
2069         if (buffers_allocated)
2070                 return;
2071
2072         if (alloc_percpu_trace_buffer())
2073                 return;
2074
2075         /* trace_printk() is for debug use only. Don't use it in production. */
2076
2077         pr_warning("\n");
2078         pr_warning("**********************************************************\n");
2079         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2080         pr_warning("**                                                      **\n");
2081         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2082         pr_warning("**                                                      **\n");
2083         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2084         pr_warning("** unsafe for production use.                           **\n");
2085         pr_warning("**                                                      **\n");
2086         pr_warning("** If you see this message and you are not debugging    **\n");
2087         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2088         pr_warning("**                                                      **\n");
2089         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2090         pr_warning("**********************************************************\n");
2091
2092         /* Expand the buffers to set size */
2093         tracing_update_buffers();
2094
2095         buffers_allocated = 1;
2096
2097         /*
2098          * trace_printk_init_buffers() can be called by modules.
2099          * If that happens, then we need to start cmdline recording
2100          * directly here. If the global_trace.buffer is already
2101          * allocated here, then this was called by module code.
2102          */
2103         if (global_trace.trace_buffer.buffer)
2104                 tracing_start_cmdline_record();
2105 }
2106
2107 void trace_printk_start_comm(void)
2108 {
2109         /* Start tracing comms if trace printk is set */
2110         if (!buffers_allocated)
2111                 return;
2112         tracing_start_cmdline_record();
2113 }
2114
2115 static void trace_printk_start_stop_comm(int enabled)
2116 {
2117         if (!buffers_allocated)
2118                 return;
2119
2120         if (enabled)
2121                 tracing_start_cmdline_record();
2122         else
2123                 tracing_stop_cmdline_record();
2124 }
2125
2126 /**
2127  * trace_vbprintk - write binary msg to tracing buffer
2128  *
2129  */
2130 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2131 {
2132         struct trace_event_call *call = &event_bprint;
2133         struct ring_buffer_event *event;
2134         struct ring_buffer *buffer;
2135         struct trace_array *tr = &global_trace;
2136         struct bprint_entry *entry;
2137         unsigned long flags;
2138         char *tbuffer;
2139         int len = 0, size, pc;
2140
2141         if (unlikely(tracing_selftest_running || tracing_disabled))
2142                 return 0;
2143
2144         /* Don't pollute graph traces with trace_vprintk internals */
2145         pause_graph_tracing();
2146
2147         pc = preempt_count();
2148         preempt_disable_notrace();
2149
2150         tbuffer = get_trace_buf();
2151         if (!tbuffer) {
2152                 len = 0;
2153                 goto out;
2154         }
2155
2156         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2157
2158         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2159                 goto out;
2160
2161         local_save_flags(flags);
2162         size = sizeof(*entry) + sizeof(u32) * len;
2163         buffer = tr->trace_buffer.buffer;
2164         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2165                                           flags, pc);
2166         if (!event)
2167                 goto out;
2168         entry = ring_buffer_event_data(event);
2169         entry->ip                       = ip;
2170         entry->fmt                      = fmt;
2171
2172         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2173         if (!call_filter_check_discard(call, entry, buffer, event)) {
2174                 __buffer_unlock_commit(buffer, event);
2175                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2176         }
2177
2178 out:
2179         preempt_enable_notrace();
2180         unpause_graph_tracing();
2181
2182         return len;
2183 }
2184 EXPORT_SYMBOL_GPL(trace_vbprintk);
2185
2186 static int
2187 __trace_array_vprintk(struct ring_buffer *buffer,
2188                       unsigned long ip, const char *fmt, va_list args)
2189 {
2190         struct trace_event_call *call = &event_print;
2191         struct ring_buffer_event *event;
2192         int len = 0, size, pc;
2193         struct print_entry *entry;
2194         unsigned long flags;
2195         char *tbuffer;
2196
2197         if (tracing_disabled || tracing_selftest_running)
2198                 return 0;
2199
2200         /* Don't pollute graph traces with trace_vprintk internals */
2201         pause_graph_tracing();
2202
2203         pc = preempt_count();
2204         preempt_disable_notrace();
2205
2206
2207         tbuffer = get_trace_buf();
2208         if (!tbuffer) {
2209                 len = 0;
2210                 goto out;
2211         }
2212
2213         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2214
2215         local_save_flags(flags);
2216         size = sizeof(*entry) + len + 1;
2217         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2218                                           flags, pc);
2219         if (!event)
2220                 goto out;
2221         entry = ring_buffer_event_data(event);
2222         entry->ip = ip;
2223
2224         memcpy(&entry->buf, tbuffer, len + 1);
2225         if (!call_filter_check_discard(call, entry, buffer, event)) {
2226                 __buffer_unlock_commit(buffer, event);
2227                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2228         }
2229  out:
2230         preempt_enable_notrace();
2231         unpause_graph_tracing();
2232
2233         return len;
2234 }
2235
2236 int trace_array_vprintk(struct trace_array *tr,
2237                         unsigned long ip, const char *fmt, va_list args)
2238 {
2239         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2240 }
2241
2242 int trace_array_printk(struct trace_array *tr,
2243                        unsigned long ip, const char *fmt, ...)
2244 {
2245         int ret;
2246         va_list ap;
2247
2248         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2249                 return 0;
2250
2251         va_start(ap, fmt);
2252         ret = trace_array_vprintk(tr, ip, fmt, ap);
2253         va_end(ap);
2254         return ret;
2255 }
2256
2257 int trace_array_printk_buf(struct ring_buffer *buffer,
2258                            unsigned long ip, const char *fmt, ...)
2259 {
2260         int ret;
2261         va_list ap;
2262
2263         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2264                 return 0;
2265
2266         va_start(ap, fmt);
2267         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2268         va_end(ap);
2269         return ret;
2270 }
2271
2272 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2273 {
2274         return trace_array_vprintk(&global_trace, ip, fmt, args);
2275 }
2276 EXPORT_SYMBOL_GPL(trace_vprintk);
2277
2278 static void trace_iterator_increment(struct trace_iterator *iter)
2279 {
2280         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2281
2282         iter->idx++;
2283         if (buf_iter)
2284                 ring_buffer_read(buf_iter, NULL);
2285 }
2286
2287 static struct trace_entry *
2288 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2289                 unsigned long *lost_events)
2290 {
2291         struct ring_buffer_event *event;
2292         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2293
2294         if (buf_iter)
2295                 event = ring_buffer_iter_peek(buf_iter, ts);
2296         else
2297                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2298                                          lost_events);
2299
2300         if (event) {
2301                 iter->ent_size = ring_buffer_event_length(event);
2302                 return ring_buffer_event_data(event);
2303         }
2304         iter->ent_size = 0;
2305         return NULL;
2306 }
2307
2308 static struct trace_entry *
2309 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2310                   unsigned long *missing_events, u64 *ent_ts)
2311 {
2312         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2313         struct trace_entry *ent, *next = NULL;
2314         unsigned long lost_events = 0, next_lost = 0;
2315         int cpu_file = iter->cpu_file;
2316         u64 next_ts = 0, ts;
2317         int next_cpu = -1;
2318         int next_size = 0;
2319         int cpu;
2320
2321         /*
2322          * If we are in a per_cpu trace file, don't bother by iterating over
2323          * all cpu and peek directly.
2324          */
2325         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2326                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2327                         return NULL;
2328                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2329                 if (ent_cpu)
2330                         *ent_cpu = cpu_file;
2331
2332                 return ent;
2333         }
2334
2335         for_each_tracing_cpu(cpu) {
2336
2337                 if (ring_buffer_empty_cpu(buffer, cpu))
2338                         continue;
2339
2340                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2341
2342                 /*
2343                  * Pick the entry with the smallest timestamp:
2344                  */
2345                 if (ent && (!next || ts < next_ts)) {
2346                         next = ent;
2347                         next_cpu = cpu;
2348                         next_ts = ts;
2349                         next_lost = lost_events;
2350                         next_size = iter->ent_size;
2351                 }
2352         }
2353
2354         iter->ent_size = next_size;
2355
2356         if (ent_cpu)
2357                 *ent_cpu = next_cpu;
2358
2359         if (ent_ts)
2360                 *ent_ts = next_ts;
2361
2362         if (missing_events)
2363                 *missing_events = next_lost;
2364
2365         return next;
2366 }
2367
2368 /* Find the next real entry, without updating the iterator itself */
2369 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2370                                           int *ent_cpu, u64 *ent_ts)
2371 {
2372         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2373 }
2374
2375 /* Find the next real entry, and increment the iterator to the next entry */
2376 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2377 {
2378         iter->ent = __find_next_entry(iter, &iter->cpu,
2379                                       &iter->lost_events, &iter->ts);
2380
2381         if (iter->ent)
2382                 trace_iterator_increment(iter);
2383
2384         return iter->ent ? iter : NULL;
2385 }
2386
2387 static void trace_consume(struct trace_iterator *iter)
2388 {
2389         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2390                             &iter->lost_events);
2391 }
2392
2393 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2394 {
2395         struct trace_iterator *iter = m->private;
2396         int i = (int)*pos;
2397         void *ent;
2398
2399         WARN_ON_ONCE(iter->leftover);
2400
2401         (*pos)++;
2402
2403         /* can't go backwards */
2404         if (iter->idx > i)
2405                 return NULL;
2406
2407         if (iter->idx < 0)
2408                 ent = trace_find_next_entry_inc(iter);
2409         else
2410                 ent = iter;
2411
2412         while (ent && iter->idx < i)
2413                 ent = trace_find_next_entry_inc(iter);
2414
2415         iter->pos = *pos;
2416
2417         return ent;
2418 }
2419
2420 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2421 {
2422         struct ring_buffer_event *event;
2423         struct ring_buffer_iter *buf_iter;
2424         unsigned long entries = 0;
2425         u64 ts;
2426
2427         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2428
2429         buf_iter = trace_buffer_iter(iter, cpu);
2430         if (!buf_iter)
2431                 return;
2432
2433         ring_buffer_iter_reset(buf_iter);
2434
2435         /*
2436          * We could have the case with the max latency tracers
2437          * that a reset never took place on a cpu. This is evident
2438          * by the timestamp being before the start of the buffer.
2439          */
2440         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2441                 if (ts >= iter->trace_buffer->time_start)
2442                         break;
2443                 entries++;
2444                 ring_buffer_read(buf_iter, NULL);
2445         }
2446
2447         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2448 }
2449
2450 /*
2451  * The current tracer is copied to avoid a global locking
2452  * all around.
2453  */
2454 static void *s_start(struct seq_file *m, loff_t *pos)
2455 {
2456         struct trace_iterator *iter = m->private;
2457         struct trace_array *tr = iter->tr;
2458         int cpu_file = iter->cpu_file;
2459         void *p = NULL;
2460         loff_t l = 0;
2461         int cpu;
2462
2463         /*
2464          * copy the tracer to avoid using a global lock all around.
2465          * iter->trace is a copy of current_trace, the pointer to the
2466          * name may be used instead of a strcmp(), as iter->trace->name
2467          * will point to the same string as current_trace->name.
2468          */
2469         mutex_lock(&trace_types_lock);
2470         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2471                 *iter->trace = *tr->current_trace;
2472         mutex_unlock(&trace_types_lock);
2473
2474 #ifdef CONFIG_TRACER_MAX_TRACE
2475         if (iter->snapshot && iter->trace->use_max_tr)
2476                 return ERR_PTR(-EBUSY);
2477 #endif
2478
2479         if (!iter->snapshot)
2480                 atomic_inc(&trace_record_cmdline_disabled);
2481
2482         if (*pos != iter->pos) {
2483                 iter->ent = NULL;
2484                 iter->cpu = 0;
2485                 iter->idx = -1;
2486
2487                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2488                         for_each_tracing_cpu(cpu)
2489                                 tracing_iter_reset(iter, cpu);
2490                 } else
2491                         tracing_iter_reset(iter, cpu_file);
2492
2493                 iter->leftover = 0;
2494                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2495                         ;
2496
2497         } else {
2498                 /*
2499                  * If we overflowed the seq_file before, then we want
2500                  * to just reuse the trace_seq buffer again.
2501                  */
2502                 if (iter->leftover)
2503                         p = iter;
2504                 else {
2505                         l = *pos - 1;
2506                         p = s_next(m, p, &l);
2507                 }
2508         }
2509
2510         trace_event_read_lock();
2511         trace_access_lock(cpu_file);
2512         return p;
2513 }
2514
2515 static void s_stop(struct seq_file *m, void *p)
2516 {
2517         struct trace_iterator *iter = m->private;
2518
2519 #ifdef CONFIG_TRACER_MAX_TRACE
2520         if (iter->snapshot && iter->trace->use_max_tr)
2521                 return;
2522 #endif
2523
2524         if (!iter->snapshot)
2525                 atomic_dec(&trace_record_cmdline_disabled);
2526
2527         trace_access_unlock(iter->cpu_file);
2528         trace_event_read_unlock();
2529 }
2530
2531 static void
2532 get_total_entries(struct trace_buffer *buf,
2533                   unsigned long *total, unsigned long *entries)
2534 {
2535         unsigned long count;
2536         int cpu;
2537
2538         *total = 0;
2539         *entries = 0;
2540
2541         for_each_tracing_cpu(cpu) {
2542                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2543                 /*
2544                  * If this buffer has skipped entries, then we hold all
2545                  * entries for the trace and we need to ignore the
2546                  * ones before the time stamp.
2547                  */
2548                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2549                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2550                         /* total is the same as the entries */
2551                         *total += count;
2552                 } else
2553                         *total += count +
2554                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2555                 *entries += count;
2556         }
2557 }
2558
2559 static void print_lat_help_header(struct seq_file *m)
2560 {
2561         seq_puts(m, "#                  _------=> CPU#            \n"
2562                     "#                 / _-----=> irqs-off        \n"
2563                     "#                | / _----=> need-resched    \n"
2564                     "#                || / _---=> hardirq/softirq \n"
2565                     "#                ||| / _--=> preempt-depth   \n"
2566                     "#                |||| /     delay            \n"
2567                     "#  cmd     pid   ||||| time  |   caller      \n"
2568                     "#     \\   /      |||||  \\    |   /         \n");
2569 }
2570
2571 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2572 {
2573         unsigned long total;
2574         unsigned long entries;
2575
2576         get_total_entries(buf, &total, &entries);
2577         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2578                    entries, total, num_online_cpus());
2579         seq_puts(m, "#\n");
2580 }
2581
2582 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2583 {
2584         print_event_info(buf, m);
2585         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2586                     "#              | |       |          |         |\n");
2587 }
2588
2589 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2590 {
2591         print_event_info(buf, m);
2592         seq_puts(m, "#                              _-----=> irqs-off\n"
2593                     "#                             / _----=> need-resched\n"
2594                     "#                            | / _---=> hardirq/softirq\n"
2595                     "#                            || / _--=> preempt-depth\n"
2596                     "#                            ||| /     delay\n"
2597                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2598                     "#              | |       |   ||||       |         |\n");
2599 }
2600
2601 void
2602 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2603 {
2604         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2605         struct trace_buffer *buf = iter->trace_buffer;
2606         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2607         struct tracer *type = iter->trace;
2608         unsigned long entries;
2609         unsigned long total;
2610         const char *name = "preemption";
2611
2612         name = type->name;
2613
2614         get_total_entries(buf, &total, &entries);
2615
2616         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2617                    name, UTS_RELEASE);
2618         seq_puts(m, "# -----------------------------------"
2619                  "---------------------------------\n");
2620         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2621                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2622                    nsecs_to_usecs(data->saved_latency),
2623                    entries,
2624                    total,
2625                    buf->cpu,
2626 #if defined(CONFIG_PREEMPT_NONE)
2627                    "server",
2628 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2629                    "desktop",
2630 #elif defined(CONFIG_PREEMPT)
2631                    "preempt",
2632 #else
2633                    "unknown",
2634 #endif
2635                    /* These are reserved for later use */
2636                    0, 0, 0, 0);
2637 #ifdef CONFIG_SMP
2638         seq_printf(m, " #P:%d)\n", num_online_cpus());
2639 #else
2640         seq_puts(m, ")\n");
2641 #endif
2642         seq_puts(m, "#    -----------------\n");
2643         seq_printf(m, "#    | task: %.16s-%d "
2644                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2645                    data->comm, data->pid,
2646                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2647                    data->policy, data->rt_priority);
2648         seq_puts(m, "#    -----------------\n");
2649
2650         if (data->critical_start) {
2651                 seq_puts(m, "#  => started at: ");
2652                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2653                 trace_print_seq(m, &iter->seq);
2654                 seq_puts(m, "\n#  => ended at:   ");
2655                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2656                 trace_print_seq(m, &iter->seq);
2657                 seq_puts(m, "\n#\n");
2658         }
2659
2660         seq_puts(m, "#\n");
2661 }
2662
2663 static void test_cpu_buff_start(struct trace_iterator *iter)
2664 {
2665         struct trace_seq *s = &iter->seq;
2666         struct trace_array *tr = iter->tr;
2667
2668         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2669                 return;
2670
2671         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2672                 return;
2673
2674         if (cpumask_test_cpu(iter->cpu, iter->started))
2675                 return;
2676
2677         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2678                 return;
2679
2680         cpumask_set_cpu(iter->cpu, iter->started);
2681
2682         /* Don't print started cpu buffer for the first entry of the trace */
2683         if (iter->idx > 1)
2684                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2685                                 iter->cpu);
2686 }
2687
2688 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2689 {
2690         struct trace_array *tr = iter->tr;
2691         struct trace_seq *s = &iter->seq;
2692         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
2693         struct trace_entry *entry;
2694         struct trace_event *event;
2695
2696         entry = iter->ent;
2697
2698         test_cpu_buff_start(iter);
2699
2700         event = ftrace_find_event(entry->type);
2701
2702         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2703                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2704                         trace_print_lat_context(iter);
2705                 else
2706                         trace_print_context(iter);
2707         }
2708
2709         if (trace_seq_has_overflowed(s))
2710                 return TRACE_TYPE_PARTIAL_LINE;
2711
2712         if (event)
2713                 return event->funcs->trace(iter, sym_flags, event);
2714
2715         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2716
2717         return trace_handle_return(s);
2718 }
2719
2720 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2721 {
2722         struct trace_array *tr = iter->tr;
2723         struct trace_seq *s = &iter->seq;
2724         struct trace_entry *entry;
2725         struct trace_event *event;
2726
2727         entry = iter->ent;
2728
2729         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2730                 trace_seq_printf(s, "%d %d %llu ",
2731                                  entry->pid, iter->cpu, iter->ts);
2732
2733         if (trace_seq_has_overflowed(s))
2734                 return TRACE_TYPE_PARTIAL_LINE;
2735
2736         event = ftrace_find_event(entry->type);
2737         if (event)
2738                 return event->funcs->raw(iter, 0, event);
2739
2740         trace_seq_printf(s, "%d ?\n", entry->type);
2741
2742         return trace_handle_return(s);
2743 }
2744
2745 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2746 {
2747         struct trace_array *tr = iter->tr;
2748         struct trace_seq *s = &iter->seq;
2749         unsigned char newline = '\n';
2750         struct trace_entry *entry;
2751         struct trace_event *event;
2752
2753         entry = iter->ent;
2754
2755         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2756                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2757                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2758                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2759                 if (trace_seq_has_overflowed(s))
2760                         return TRACE_TYPE_PARTIAL_LINE;
2761         }
2762
2763         event = ftrace_find_event(entry->type);
2764         if (event) {
2765                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2766                 if (ret != TRACE_TYPE_HANDLED)
2767                         return ret;
2768         }
2769
2770         SEQ_PUT_FIELD(s, newline);
2771
2772         return trace_handle_return(s);
2773 }
2774
2775 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2776 {
2777         struct trace_array *tr = iter->tr;
2778         struct trace_seq *s = &iter->seq;
2779         struct trace_entry *entry;
2780         struct trace_event *event;
2781
2782         entry = iter->ent;
2783
2784         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2785                 SEQ_PUT_FIELD(s, entry->pid);
2786                 SEQ_PUT_FIELD(s, iter->cpu);
2787                 SEQ_PUT_FIELD(s, iter->ts);
2788                 if (trace_seq_has_overflowed(s))
2789                         return TRACE_TYPE_PARTIAL_LINE;
2790         }
2791
2792         event = ftrace_find_event(entry->type);
2793         return event ? event->funcs->binary(iter, 0, event) :
2794                 TRACE_TYPE_HANDLED;
2795 }
2796
2797 int trace_empty(struct trace_iterator *iter)
2798 {
2799         struct ring_buffer_iter *buf_iter;
2800         int cpu;
2801
2802         /* If we are looking at one CPU buffer, only check that one */
2803         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2804                 cpu = iter->cpu_file;
2805                 buf_iter = trace_buffer_iter(iter, cpu);
2806                 if (buf_iter) {
2807                         if (!ring_buffer_iter_empty(buf_iter))
2808                                 return 0;
2809                 } else {
2810                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2811                                 return 0;
2812                 }
2813                 return 1;
2814         }
2815
2816         for_each_tracing_cpu(cpu) {
2817                 buf_iter = trace_buffer_iter(iter, cpu);
2818                 if (buf_iter) {
2819                         if (!ring_buffer_iter_empty(buf_iter))
2820                                 return 0;
2821                 } else {
2822                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2823                                 return 0;
2824                 }
2825         }
2826
2827         return 1;
2828 }
2829
2830 /*  Called with trace_event_read_lock() held. */
2831 enum print_line_t print_trace_line(struct trace_iterator *iter)
2832 {
2833         struct trace_array *tr = iter->tr;
2834         unsigned long trace_flags = tr->trace_flags;
2835         enum print_line_t ret;
2836
2837         if (iter->lost_events) {
2838                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2839                                  iter->cpu, iter->lost_events);
2840                 if (trace_seq_has_overflowed(&iter->seq))
2841                         return TRACE_TYPE_PARTIAL_LINE;
2842         }
2843
2844         if (iter->trace && iter->trace->print_line) {
2845                 ret = iter->trace->print_line(iter);
2846                 if (ret != TRACE_TYPE_UNHANDLED)
2847                         return ret;
2848         }
2849
2850         if (iter->ent->type == TRACE_BPUTS &&
2851                         trace_flags & TRACE_ITER_PRINTK &&
2852                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2853                 return trace_print_bputs_msg_only(iter);
2854
2855         if (iter->ent->type == TRACE_BPRINT &&
2856                         trace_flags & TRACE_ITER_PRINTK &&
2857                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2858                 return trace_print_bprintk_msg_only(iter);
2859
2860         if (iter->ent->type == TRACE_PRINT &&
2861                         trace_flags & TRACE_ITER_PRINTK &&
2862                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2863                 return trace_print_printk_msg_only(iter);
2864
2865         if (trace_flags & TRACE_ITER_BIN)
2866                 return print_bin_fmt(iter);
2867
2868         if (trace_flags & TRACE_ITER_HEX)
2869                 return print_hex_fmt(iter);
2870
2871         if (trace_flags & TRACE_ITER_RAW)
2872                 return print_raw_fmt(iter);
2873
2874         return print_trace_fmt(iter);
2875 }
2876
2877 void trace_latency_header(struct seq_file *m)
2878 {
2879         struct trace_iterator *iter = m->private;
2880         struct trace_array *tr = iter->tr;
2881
2882         /* print nothing if the buffers are empty */
2883         if (trace_empty(iter))
2884                 return;
2885
2886         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2887                 print_trace_header(m, iter);
2888
2889         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2890                 print_lat_help_header(m);
2891 }
2892
2893 void trace_default_header(struct seq_file *m)
2894 {
2895         struct trace_iterator *iter = m->private;
2896         struct trace_array *tr = iter->tr;
2897         unsigned long trace_flags = tr->trace_flags;
2898
2899         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2900                 return;
2901
2902         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2903                 /* print nothing if the buffers are empty */
2904                 if (trace_empty(iter))
2905                         return;
2906                 print_trace_header(m, iter);
2907                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2908                         print_lat_help_header(m);
2909         } else {
2910                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2911                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2912                                 print_func_help_header_irq(iter->trace_buffer, m);
2913                         else
2914                                 print_func_help_header(iter->trace_buffer, m);
2915                 }
2916         }
2917 }
2918
2919 static void test_ftrace_alive(struct seq_file *m)
2920 {
2921         if (!ftrace_is_dead())
2922                 return;
2923         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2924                     "#          MAY BE MISSING FUNCTION EVENTS\n");
2925 }
2926
2927 #ifdef CONFIG_TRACER_MAX_TRACE
2928 static void show_snapshot_main_help(struct seq_file *m)
2929 {
2930         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2931                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2932                     "#                      Takes a snapshot of the main buffer.\n"
2933                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2934                     "#                      (Doesn't have to be '2' works with any number that\n"
2935                     "#                       is not a '0' or '1')\n");
2936 }
2937
2938 static void show_snapshot_percpu_help(struct seq_file *m)
2939 {
2940         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2941 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2942         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2943                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
2944 #else
2945         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2946                     "#                     Must use main snapshot file to allocate.\n");
2947 #endif
2948         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2949                     "#                      (Doesn't have to be '2' works with any number that\n"
2950                     "#                       is not a '0' or '1')\n");
2951 }
2952
2953 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2954 {
2955         if (iter->tr->allocated_snapshot)
2956                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2957         else
2958                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2959
2960         seq_puts(m, "# Snapshot commands:\n");
2961         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2962                 show_snapshot_main_help(m);
2963         else
2964                 show_snapshot_percpu_help(m);
2965 }
2966 #else
2967 /* Should never be called */
2968 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2969 #endif
2970
2971 static int s_show(struct seq_file *m, void *v)
2972 {
2973         struct trace_iterator *iter = v;
2974         int ret;
2975
2976         if (iter->ent == NULL) {
2977                 if (iter->tr) {
2978                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2979                         seq_puts(m, "#\n");
2980                         test_ftrace_alive(m);
2981                 }
2982                 if (iter->snapshot && trace_empty(iter))
2983                         print_snapshot_help(m, iter);
2984                 else if (iter->trace && iter->trace->print_header)
2985                         iter->trace->print_header(m);
2986                 else
2987                         trace_default_header(m);
2988
2989         } else if (iter->leftover) {
2990                 /*
2991                  * If we filled the seq_file buffer earlier, we
2992                  * want to just show it now.
2993                  */
2994                 ret = trace_print_seq(m, &iter->seq);
2995
2996                 /* ret should this time be zero, but you never know */
2997                 iter->leftover = ret;
2998
2999         } else {
3000                 print_trace_line(iter);
3001                 ret = trace_print_seq(m, &iter->seq);
3002                 /*
3003                  * If we overflow the seq_file buffer, then it will
3004                  * ask us for this data again at start up.
3005                  * Use that instead.
3006                  *  ret is 0 if seq_file write succeeded.
3007                  *        -1 otherwise.
3008                  */
3009                 iter->leftover = ret;
3010         }
3011
3012         return 0;
3013 }
3014
3015 /*
3016  * Should be used after trace_array_get(), trace_types_lock
3017  * ensures that i_cdev was already initialized.
3018  */
3019 static inline int tracing_get_cpu(struct inode *inode)
3020 {
3021         if (inode->i_cdev) /* See trace_create_cpu_file() */
3022                 return (long)inode->i_cdev - 1;
3023         return RING_BUFFER_ALL_CPUS;
3024 }
3025
3026 static const struct seq_operations tracer_seq_ops = {
3027         .start          = s_start,
3028         .next           = s_next,
3029         .stop           = s_stop,
3030         .show           = s_show,
3031 };
3032
3033 static struct trace_iterator *
3034 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3035 {
3036         struct trace_array *tr = inode->i_private;
3037         struct trace_iterator *iter;
3038         int cpu;
3039
3040         if (tracing_disabled)
3041                 return ERR_PTR(-ENODEV);
3042
3043         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3044         if (!iter)
3045                 return ERR_PTR(-ENOMEM);
3046
3047         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3048                                     GFP_KERNEL);
3049         if (!iter->buffer_iter)
3050                 goto release;
3051
3052         /*
3053          * We make a copy of the current tracer to avoid concurrent
3054          * changes on it while we are reading.
3055          */
3056         mutex_lock(&trace_types_lock);
3057         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3058         if (!iter->trace)
3059                 goto fail;
3060
3061         *iter->trace = *tr->current_trace;
3062
3063         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3064                 goto fail;
3065
3066         iter->tr = tr;
3067
3068 #ifdef CONFIG_TRACER_MAX_TRACE
3069         /* Currently only the top directory has a snapshot */
3070         if (tr->current_trace->print_max || snapshot)
3071                 iter->trace_buffer = &tr->max_buffer;
3072         else
3073 #endif
3074                 iter->trace_buffer = &tr->trace_buffer;
3075         iter->snapshot = snapshot;
3076         iter->pos = -1;
3077         iter->cpu_file = tracing_get_cpu(inode);
3078         mutex_init(&iter->mutex);
3079
3080         /* Notify the tracer early; before we stop tracing. */
3081         if (iter->trace && iter->trace->open)
3082                 iter->trace->open(iter);
3083
3084         /* Annotate start of buffers if we had overruns */
3085         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3086                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3087
3088         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3089         if (trace_clocks[tr->clock_id].in_ns)
3090                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3091
3092         /* stop the trace while dumping if we are not opening "snapshot" */
3093         if (!iter->snapshot)
3094                 tracing_stop_tr(tr);
3095
3096         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3097                 for_each_tracing_cpu(cpu) {
3098                         iter->buffer_iter[cpu] =
3099                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3100                 }
3101                 ring_buffer_read_prepare_sync();
3102                 for_each_tracing_cpu(cpu) {
3103                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3104                         tracing_iter_reset(iter, cpu);
3105                 }
3106         } else {
3107                 cpu = iter->cpu_file;
3108                 iter->buffer_iter[cpu] =
3109                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3110                 ring_buffer_read_prepare_sync();
3111                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3112                 tracing_iter_reset(iter, cpu);
3113         }
3114
3115         mutex_unlock(&trace_types_lock);
3116
3117         return iter;
3118
3119  fail:
3120         mutex_unlock(&trace_types_lock);
3121         kfree(iter->trace);
3122         kfree(iter->buffer_iter);
3123 release:
3124         seq_release_private(inode, file);
3125         return ERR_PTR(-ENOMEM);
3126 }
3127
3128 int tracing_open_generic(struct inode *inode, struct file *filp)
3129 {
3130         if (tracing_disabled)
3131                 return -ENODEV;
3132
3133         filp->private_data = inode->i_private;
3134         return 0;
3135 }
3136
3137 bool tracing_is_disabled(void)
3138 {
3139         return (tracing_disabled) ? true: false;
3140 }
3141
3142 /*
3143  * Open and update trace_array ref count.
3144  * Must have the current trace_array passed to it.
3145  */
3146 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3147 {
3148         struct trace_array *tr = inode->i_private;
3149
3150         if (tracing_disabled)
3151                 return -ENODEV;
3152
3153         if (trace_array_get(tr) < 0)
3154                 return -ENODEV;
3155
3156         filp->private_data = inode->i_private;
3157
3158         return 0;
3159 }
3160
3161 static int tracing_release(struct inode *inode, struct file *file)
3162 {
3163         struct trace_array *tr = inode->i_private;
3164         struct seq_file *m = file->private_data;
3165         struct trace_iterator *iter;
3166         int cpu;
3167
3168         if (!(file->f_mode & FMODE_READ)) {
3169                 trace_array_put(tr);
3170                 return 0;
3171         }
3172
3173         /* Writes do not use seq_file */
3174         iter = m->private;
3175         mutex_lock(&trace_types_lock);
3176
3177         for_each_tracing_cpu(cpu) {
3178                 if (iter->buffer_iter[cpu])
3179                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3180         }
3181
3182         if (iter->trace && iter->trace->close)
3183                 iter->trace->close(iter);
3184
3185         if (!iter->snapshot)
3186                 /* reenable tracing if it was previously enabled */
3187                 tracing_start_tr(tr);
3188
3189         __trace_array_put(tr);
3190
3191         mutex_unlock(&trace_types_lock);
3192
3193         mutex_destroy(&iter->mutex);
3194         free_cpumask_var(iter->started);
3195         kfree(iter->trace);
3196         kfree(iter->buffer_iter);
3197         seq_release_private(inode, file);
3198
3199         return 0;
3200 }
3201
3202 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3203 {
3204         struct trace_array *tr = inode->i_private;
3205
3206         trace_array_put(tr);
3207         return 0;
3208 }
3209
3210 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3211 {
3212         struct trace_array *tr = inode->i_private;
3213
3214         trace_array_put(tr);
3215
3216         return single_release(inode, file);
3217 }
3218
3219 static int tracing_open(struct inode *inode, struct file *file)
3220 {
3221         struct trace_array *tr = inode->i_private;
3222         struct trace_iterator *iter;
3223         int ret = 0;
3224
3225         if (trace_array_get(tr) < 0)
3226                 return -ENODEV;
3227
3228         /* If this file was open for write, then erase contents */
3229         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3230                 int cpu = tracing_get_cpu(inode);
3231
3232                 if (cpu == RING_BUFFER_ALL_CPUS)
3233                         tracing_reset_online_cpus(&tr->trace_buffer);
3234                 else
3235                         tracing_reset(&tr->trace_buffer, cpu);
3236         }
3237
3238         if (file->f_mode & FMODE_READ) {
3239                 iter = __tracing_open(inode, file, false);
3240                 if (IS_ERR(iter))
3241                         ret = PTR_ERR(iter);
3242                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3243                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3244         }
3245
3246         if (ret < 0)
3247                 trace_array_put(tr);
3248
3249         return ret;
3250 }
3251
3252 /*
3253  * Some tracers are not suitable for instance buffers.
3254  * A tracer is always available for the global array (toplevel)
3255  * or if it explicitly states that it is.
3256  */
3257 static bool
3258 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3259 {
3260         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3261 }
3262
3263 /* Find the next tracer that this trace array may use */
3264 static struct tracer *
3265 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3266 {
3267         while (t && !trace_ok_for_array(t, tr))
3268                 t = t->next;
3269
3270         return t;
3271 }
3272
3273 static void *
3274 t_next(struct seq_file *m, void *v, loff_t *pos)
3275 {
3276         struct trace_array *tr = m->private;
3277         struct tracer *t = v;
3278
3279         (*pos)++;
3280
3281         if (t)
3282                 t = get_tracer_for_array(tr, t->next);
3283
3284         return t;
3285 }
3286
3287 static void *t_start(struct seq_file *m, loff_t *pos)
3288 {
3289         struct trace_array *tr = m->private;
3290         struct tracer *t;
3291         loff_t l = 0;
3292
3293         mutex_lock(&trace_types_lock);
3294
3295         t = get_tracer_for_array(tr, trace_types);
3296         for (; t && l < *pos; t = t_next(m, t, &l))
3297                         ;
3298
3299         return t;
3300 }
3301
3302 static void t_stop(struct seq_file *m, void *p)
3303 {
3304         mutex_unlock(&trace_types_lock);
3305 }
3306
3307 static int t_show(struct seq_file *m, void *v)
3308 {
3309         struct tracer *t = v;
3310
3311         if (!t)
3312                 return 0;
3313
3314         seq_puts(m, t->name);
3315         if (t->next)
3316                 seq_putc(m, ' ');
3317         else
3318                 seq_putc(m, '\n');
3319
3320         return 0;
3321 }
3322
3323 static const struct seq_operations show_traces_seq_ops = {
3324         .start          = t_start,
3325         .next           = t_next,
3326         .stop           = t_stop,
3327         .show           = t_show,
3328 };
3329
3330 static int show_traces_open(struct inode *inode, struct file *file)
3331 {
3332         struct trace_array *tr = inode->i_private;
3333         struct seq_file *m;
3334         int ret;
3335
3336         if (tracing_disabled)
3337                 return -ENODEV;
3338
3339         ret = seq_open(file, &show_traces_seq_ops);
3340         if (ret)
3341                 return ret;
3342
3343         m = file->private_data;
3344         m->private = tr;
3345
3346         return 0;
3347 }
3348
3349 static ssize_t
3350 tracing_write_stub(struct file *filp, const char __user *ubuf,
3351                    size_t count, loff_t *ppos)
3352 {
3353         return count;
3354 }
3355
3356 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3357 {
3358         int ret;
3359
3360         if (file->f_mode & FMODE_READ)
3361                 ret = seq_lseek(file, offset, whence);
3362         else
3363                 file->f_pos = ret = 0;
3364
3365         return ret;
3366 }
3367
3368 static const struct file_operations tracing_fops = {
3369         .open           = tracing_open,
3370         .read           = seq_read,
3371         .write          = tracing_write_stub,
3372         .llseek         = tracing_lseek,
3373         .release        = tracing_release,
3374 };
3375
3376 static const struct file_operations show_traces_fops = {
3377         .open           = show_traces_open,
3378         .read           = seq_read,
3379         .release        = seq_release,
3380         .llseek         = seq_lseek,
3381 };
3382
3383 /*
3384  * The tracer itself will not take this lock, but still we want
3385  * to provide a consistent cpumask to user-space:
3386  */
3387 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3388
3389 /*
3390  * Temporary storage for the character representation of the
3391  * CPU bitmask (and one more byte for the newline):
3392  */
3393 static char mask_str[NR_CPUS + 1];
3394
3395 static ssize_t
3396 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3397                      size_t count, loff_t *ppos)
3398 {
3399         struct trace_array *tr = file_inode(filp)->i_private;
3400         int len;
3401
3402         mutex_lock(&tracing_cpumask_update_lock);
3403
3404         len = snprintf(mask_str, count, "%*pb\n",
3405                        cpumask_pr_args(tr->tracing_cpumask));
3406         if (len >= count) {
3407                 count = -EINVAL;
3408                 goto out_err;
3409         }
3410         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3411
3412 out_err:
3413         mutex_unlock(&tracing_cpumask_update_lock);
3414
3415         return count;
3416 }
3417
3418 static ssize_t
3419 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3420                       size_t count, loff_t *ppos)
3421 {
3422         struct trace_array *tr = file_inode(filp)->i_private;
3423         cpumask_var_t tracing_cpumask_new;
3424         int err, cpu;
3425
3426         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3427                 return -ENOMEM;
3428
3429         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3430         if (err)
3431                 goto err_unlock;
3432
3433         mutex_lock(&tracing_cpumask_update_lock);
3434
3435         local_irq_disable();
3436         arch_spin_lock(&tr->max_lock);
3437         for_each_tracing_cpu(cpu) {
3438                 /*
3439                  * Increase/decrease the disabled counter if we are
3440                  * about to flip a bit in the cpumask:
3441                  */
3442                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3443                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3444                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3445                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3446                 }
3447                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3448                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3449                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3450                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3451                 }
3452         }
3453         arch_spin_unlock(&tr->max_lock);
3454         local_irq_enable();
3455
3456         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3457
3458         mutex_unlock(&tracing_cpumask_update_lock);
3459         free_cpumask_var(tracing_cpumask_new);
3460
3461         return count;
3462
3463 err_unlock:
3464         free_cpumask_var(tracing_cpumask_new);
3465
3466         return err;
3467 }
3468
3469 static const struct file_operations tracing_cpumask_fops = {
3470         .open           = tracing_open_generic_tr,
3471         .read           = tracing_cpumask_read,
3472         .write          = tracing_cpumask_write,
3473         .release        = tracing_release_generic_tr,
3474         .llseek         = generic_file_llseek,
3475 };
3476
3477 static int tracing_trace_options_show(struct seq_file *m, void *v)
3478 {
3479         struct tracer_opt *trace_opts;
3480         struct trace_array *tr = m->private;
3481         u32 tracer_flags;
3482         int i;
3483
3484         mutex_lock(&trace_types_lock);
3485         tracer_flags = tr->current_trace->flags->val;
3486         trace_opts = tr->current_trace->flags->opts;
3487
3488         for (i = 0; trace_options[i]; i++) {
3489                 if (tr->trace_flags & (1 << i))
3490                         seq_printf(m, "%s\n", trace_options[i]);
3491                 else
3492                         seq_printf(m, "no%s\n", trace_options[i]);
3493         }
3494
3495         for (i = 0; trace_opts[i].name; i++) {
3496                 if (tracer_flags & trace_opts[i].bit)
3497                         seq_printf(m, "%s\n", trace_opts[i].name);
3498                 else
3499                         seq_printf(m, "no%s\n", trace_opts[i].name);
3500         }
3501         mutex_unlock(&trace_types_lock);
3502
3503         return 0;
3504 }
3505
3506 static int __set_tracer_option(struct trace_array *tr,
3507                                struct tracer_flags *tracer_flags,
3508                                struct tracer_opt *opts, int neg)
3509 {
3510         struct tracer *trace = tr->current_trace;
3511         int ret;
3512
3513         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3514         if (ret)
3515                 return ret;
3516
3517         if (neg)
3518                 tracer_flags->val &= ~opts->bit;
3519         else
3520                 tracer_flags->val |= opts->bit;
3521         return 0;
3522 }
3523
3524 /* Try to assign a tracer specific option */
3525 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3526 {
3527         struct tracer *trace = tr->current_trace;
3528         struct tracer_flags *tracer_flags = trace->flags;
3529         struct tracer_opt *opts = NULL;
3530         int i;
3531
3532         for (i = 0; tracer_flags->opts[i].name; i++) {
3533                 opts = &tracer_flags->opts[i];
3534
3535                 if (strcmp(cmp, opts->name) == 0)
3536                         return __set_tracer_option(tr, trace->flags, opts, neg);
3537         }
3538
3539         return -EINVAL;
3540 }
3541
3542 /* Some tracers require overwrite to stay enabled */
3543 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3544 {
3545         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3546                 return -1;
3547
3548         return 0;
3549 }
3550
3551 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3552 {
3553         /* do nothing if flag is already set */
3554         if (!!(tr->trace_flags & mask) == !!enabled)
3555                 return 0;
3556
3557         /* Give the tracer a chance to approve the change */
3558         if (tr->current_trace->flag_changed)
3559                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3560                         return -EINVAL;
3561
3562         if (enabled)
3563                 tr->trace_flags |= mask;
3564         else
3565                 tr->trace_flags &= ~mask;
3566
3567         if (mask == TRACE_ITER_RECORD_CMD)
3568                 trace_event_enable_cmd_record(enabled);
3569
3570         if (mask == TRACE_ITER_OVERWRITE) {
3571                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3572 #ifdef CONFIG_TRACER_MAX_TRACE
3573                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3574 #endif
3575         }
3576
3577         if (mask == TRACE_ITER_PRINTK) {
3578                 trace_printk_start_stop_comm(enabled);
3579                 trace_printk_control(enabled);
3580         }
3581
3582         return 0;
3583 }
3584
3585 static int trace_set_options(struct trace_array *tr, char *option)
3586 {
3587         char *cmp;
3588         int neg = 0;
3589         int ret = -ENODEV;
3590         int i;
3591
3592         cmp = strstrip(option);
3593
3594         if (strncmp(cmp, "no", 2) == 0) {
3595                 neg = 1;
3596                 cmp += 2;
3597         }
3598
3599         mutex_lock(&trace_types_lock);
3600
3601         for (i = 0; trace_options[i]; i++) {
3602                 if (strcmp(cmp, trace_options[i]) == 0) {
3603                         ret = set_tracer_flag(tr, 1 << i, !neg);
3604                         break;
3605                 }
3606         }
3607
3608         /* If no option could be set, test the specific tracer options */
3609         if (!trace_options[i])
3610                 ret = set_tracer_option(tr, cmp, neg);
3611
3612         mutex_unlock(&trace_types_lock);
3613
3614         return ret;
3615 }
3616
3617 static ssize_t
3618 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3619                         size_t cnt, loff_t *ppos)
3620 {
3621         struct seq_file *m = filp->private_data;
3622         struct trace_array *tr = m->private;
3623         char buf[64];
3624         int ret;
3625
3626         if (cnt >= sizeof(buf))
3627                 return -EINVAL;
3628
3629         if (copy_from_user(&buf, ubuf, cnt))
3630                 return -EFAULT;
3631
3632         buf[cnt] = 0;
3633
3634         ret = trace_set_options(tr, buf);
3635         if (ret < 0)
3636                 return ret;
3637
3638         *ppos += cnt;
3639
3640         return cnt;
3641 }
3642
3643 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3644 {
3645         struct trace_array *tr = inode->i_private;
3646         int ret;
3647
3648         if (tracing_disabled)
3649                 return -ENODEV;
3650
3651         if (trace_array_get(tr) < 0)
3652                 return -ENODEV;
3653
3654         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3655         if (ret < 0)
3656                 trace_array_put(tr);
3657
3658         return ret;
3659 }
3660
3661 static const struct file_operations tracing_iter_fops = {
3662         .open           = tracing_trace_options_open,
3663         .read           = seq_read,
3664         .llseek         = seq_lseek,
3665         .release        = tracing_single_release_tr,
3666         .write          = tracing_trace_options_write,
3667 };
3668
3669 static const char readme_msg[] =
3670         "tracing mini-HOWTO:\n\n"
3671         "# echo 0 > tracing_on : quick way to disable tracing\n"
3672         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3673         " Important files:\n"
3674         "  trace\t\t\t- The static contents of the buffer\n"
3675         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3676         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3677         "  current_tracer\t- function and latency tracers\n"
3678         "  available_tracers\t- list of configured tracers for current_tracer\n"
3679         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3680         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3681         "  trace_clock\t\t-change the clock used to order events\n"
3682         "       local:   Per cpu clock but may not be synced across CPUs\n"
3683         "      global:   Synced across CPUs but slows tracing down.\n"
3684         "     counter:   Not a clock, but just an increment\n"
3685         "      uptime:   Jiffy counter from time of boot\n"
3686         "        perf:   Same clock that perf events use\n"
3687 #ifdef CONFIG_X86_64
3688         "     x86-tsc:   TSC cycle counter\n"
3689 #endif
3690         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3691         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3692         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3693         "\t\t\t  Remove sub-buffer with rmdir\n"
3694         "  trace_options\t\t- Set format or modify how tracing happens\n"
3695         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3696         "\t\t\t  option name\n"
3697         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3698 #ifdef CONFIG_DYNAMIC_FTRACE
3699         "\n  available_filter_functions - list of functions that can be filtered on\n"
3700         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3701         "\t\t\t  functions\n"
3702         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3703         "\t     modules: Can select a group via module\n"
3704         "\t      Format: :mod:<module-name>\n"
3705         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3706         "\t    triggers: a command to perform when function is hit\n"
3707         "\t      Format: <function>:<trigger>[:count]\n"
3708         "\t     trigger: traceon, traceoff\n"
3709         "\t\t      enable_event:<system>:<event>\n"
3710         "\t\t      disable_event:<system>:<event>\n"
3711 #ifdef CONFIG_STACKTRACE
3712         "\t\t      stacktrace\n"
3713 #endif
3714 #ifdef CONFIG_TRACER_SNAPSHOT
3715         "\t\t      snapshot\n"
3716 #endif
3717         "\t\t      dump\n"
3718         "\t\t      cpudump\n"
3719         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3720         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3721         "\t     The first one will disable tracing every time do_fault is hit\n"
3722         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3723         "\t       The first time do trap is hit and it disables tracing, the\n"
3724         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3725         "\t       the counter will not decrement. It only decrements when the\n"
3726         "\t       trigger did work\n"
3727         "\t     To remove trigger without count:\n"
3728         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3729         "\t     To remove trigger with a count:\n"
3730         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3731         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3732         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3733         "\t    modules: Can select a group via module command :mod:\n"
3734         "\t    Does not accept triggers\n"
3735 #endif /* CONFIG_DYNAMIC_FTRACE */
3736 #ifdef CONFIG_FUNCTION_TRACER
3737         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3738         "\t\t    (function)\n"
3739 #endif
3740 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3741         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3742         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3743         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3744 #endif
3745 #ifdef CONFIG_TRACER_SNAPSHOT
3746         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3747         "\t\t\t  snapshot buffer. Read the contents for more\n"
3748         "\t\t\t  information\n"
3749 #endif
3750 #ifdef CONFIG_STACK_TRACER
3751         "  stack_trace\t\t- Shows the max stack trace when active\n"
3752         "  stack_max_size\t- Shows current max stack size that was traced\n"
3753         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3754         "\t\t\t  new trace)\n"
3755 #ifdef CONFIG_DYNAMIC_FTRACE
3756         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3757         "\t\t\t  traces\n"
3758 #endif
3759 #endif /* CONFIG_STACK_TRACER */
3760         "  events/\t\t- Directory containing all trace event subsystems:\n"
3761         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3762         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3763         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3764         "\t\t\t  events\n"
3765         "      filter\t\t- If set, only events passing filter are traced\n"
3766         "  events/<system>/<event>/\t- Directory containing control files for\n"
3767         "\t\t\t  <event>:\n"
3768         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3769         "      filter\t\t- If set, only events passing filter are traced\n"
3770         "      trigger\t\t- If set, a command to perform when event is hit\n"
3771         "\t    Format: <trigger>[:count][if <filter>]\n"
3772         "\t   trigger: traceon, traceoff\n"
3773         "\t            enable_event:<system>:<event>\n"
3774         "\t            disable_event:<system>:<event>\n"
3775 #ifdef CONFIG_STACKTRACE
3776         "\t\t    stacktrace\n"
3777 #endif
3778 #ifdef CONFIG_TRACER_SNAPSHOT
3779         "\t\t    snapshot\n"
3780 #endif
3781         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3782         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3783         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3784         "\t                  events/block/block_unplug/trigger\n"
3785         "\t   The first disables tracing every time block_unplug is hit.\n"
3786         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3787         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3788         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3789         "\t   Like function triggers, the counter is only decremented if it\n"
3790         "\t    enabled or disabled tracing.\n"
3791         "\t   To remove a trigger without a count:\n"
3792         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3793         "\t   To remove a trigger with a count:\n"
3794         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3795         "\t   Filters can be ignored when removing a trigger.\n"
3796 ;
3797
3798 static ssize_t
3799 tracing_readme_read(struct file *filp, char __user *ubuf,
3800                        size_t cnt, loff_t *ppos)
3801 {
3802         return simple_read_from_buffer(ubuf, cnt, ppos,
3803                                         readme_msg, strlen(readme_msg));
3804 }
3805
3806 static const struct file_operations tracing_readme_fops = {
3807         .open           = tracing_open_generic,
3808         .read           = tracing_readme_read,
3809         .llseek         = generic_file_llseek,
3810 };
3811
3812 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3813 {
3814         unsigned int *ptr = v;
3815
3816         if (*pos || m->count)
3817                 ptr++;
3818
3819         (*pos)++;
3820
3821         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3822              ptr++) {
3823                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3824                         continue;
3825
3826                 return ptr;
3827         }
3828
3829         return NULL;
3830 }
3831
3832 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3833 {
3834         void *v;
3835         loff_t l = 0;
3836
3837         preempt_disable();
3838         arch_spin_lock(&trace_cmdline_lock);
3839
3840         v = &savedcmd->map_cmdline_to_pid[0];
3841         while (l <= *pos) {
3842                 v = saved_cmdlines_next(m, v, &l);
3843                 if (!v)
3844                         return NULL;
3845         }
3846
3847         return v;
3848 }
3849
3850 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3851 {
3852         arch_spin_unlock(&trace_cmdline_lock);
3853         preempt_enable();
3854 }
3855
3856 static int saved_cmdlines_show(struct seq_file *m, void *v)
3857 {
3858         char buf[TASK_COMM_LEN];
3859         unsigned int *pid = v;
3860
3861         __trace_find_cmdline(*pid, buf);
3862         seq_printf(m, "%d %s\n", *pid, buf);
3863         return 0;
3864 }
3865
3866 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3867         .start          = saved_cmdlines_start,
3868         .next           = saved_cmdlines_next,
3869         .stop           = saved_cmdlines_stop,
3870         .show           = saved_cmdlines_show,
3871 };
3872
3873 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3874 {
3875         if (tracing_disabled)
3876                 return -ENODEV;
3877
3878         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3879 }
3880
3881 static const struct file_operations tracing_saved_cmdlines_fops = {
3882         .open           = tracing_saved_cmdlines_open,
3883         .read           = seq_read,
3884         .llseek         = seq_lseek,
3885         .release        = seq_release,
3886 };
3887
3888 static ssize_t
3889 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3890                                  size_t cnt, loff_t *ppos)
3891 {
3892         char buf[64];
3893         int r;
3894
3895         arch_spin_lock(&trace_cmdline_lock);
3896         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3897         arch_spin_unlock(&trace_cmdline_lock);
3898
3899         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3900 }
3901
3902 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3903 {
3904         kfree(s->saved_cmdlines);
3905         kfree(s->map_cmdline_to_pid);
3906         kfree(s);
3907 }
3908
3909 static int tracing_resize_saved_cmdlines(unsigned int val)
3910 {
3911         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3912
3913         s = kmalloc(sizeof(*s), GFP_KERNEL);
3914         if (!s)
3915                 return -ENOMEM;
3916
3917         if (allocate_cmdlines_buffer(val, s) < 0) {
3918                 kfree(s);
3919                 return -ENOMEM;
3920         }
3921
3922         arch_spin_lock(&trace_cmdline_lock);
3923         savedcmd_temp = savedcmd;
3924         savedcmd = s;
3925         arch_spin_unlock(&trace_cmdline_lock);
3926         free_saved_cmdlines_buffer(savedcmd_temp);
3927
3928         return 0;
3929 }
3930
3931 static ssize_t
3932 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3933                                   size_t cnt, loff_t *ppos)
3934 {
3935         unsigned long val;
3936         int ret;
3937
3938         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3939         if (ret)
3940                 return ret;
3941
3942         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3943         if (!val || val > PID_MAX_DEFAULT)
3944                 return -EINVAL;
3945
3946         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3947         if (ret < 0)
3948                 return ret;
3949
3950         *ppos += cnt;
3951
3952         return cnt;
3953 }
3954
3955 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3956         .open           = tracing_open_generic,
3957         .read           = tracing_saved_cmdlines_size_read,
3958         .write          = tracing_saved_cmdlines_size_write,
3959 };
3960
3961 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
3962 static union trace_enum_map_item *
3963 update_enum_map(union trace_enum_map_item *ptr)
3964 {
3965         if (!ptr->map.enum_string) {
3966                 if (ptr->tail.next) {
3967                         ptr = ptr->tail.next;
3968                         /* Set ptr to the next real item (skip head) */
3969                         ptr++;
3970                 } else
3971                         return NULL;
3972         }
3973         return ptr;
3974 }
3975
3976 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
3977 {
3978         union trace_enum_map_item *ptr = v;
3979
3980         /*
3981          * Paranoid! If ptr points to end, we don't want to increment past it.
3982          * This really should never happen.
3983          */
3984         ptr = update_enum_map(ptr);
3985         if (WARN_ON_ONCE(!ptr))
3986                 return NULL;
3987
3988         ptr++;
3989
3990         (*pos)++;
3991
3992         ptr = update_enum_map(ptr);
3993
3994         return ptr;
3995 }
3996
3997 static void *enum_map_start(struct seq_file *m, loff_t *pos)
3998 {
3999         union trace_enum_map_item *v;
4000         loff_t l = 0;
4001
4002         mutex_lock(&trace_enum_mutex);
4003
4004         v = trace_enum_maps;
4005         if (v)
4006                 v++;
4007
4008         while (v && l < *pos) {
4009                 v = enum_map_next(m, v, &l);
4010         }
4011
4012         return v;
4013 }
4014
4015 static void enum_map_stop(struct seq_file *m, void *v)
4016 {
4017         mutex_unlock(&trace_enum_mutex);
4018 }
4019
4020 static int enum_map_show(struct seq_file *m, void *v)
4021 {
4022         union trace_enum_map_item *ptr = v;
4023
4024         seq_printf(m, "%s %ld (%s)\n",
4025                    ptr->map.enum_string, ptr->map.enum_value,
4026                    ptr->map.system);
4027
4028         return 0;
4029 }
4030
4031 static const struct seq_operations tracing_enum_map_seq_ops = {
4032         .start          = enum_map_start,
4033         .next           = enum_map_next,
4034         .stop           = enum_map_stop,
4035         .show           = enum_map_show,
4036 };
4037
4038 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4039 {
4040         if (tracing_disabled)
4041                 return -ENODEV;
4042
4043         return seq_open(filp, &tracing_enum_map_seq_ops);
4044 }
4045
4046 static const struct file_operations tracing_enum_map_fops = {
4047         .open           = tracing_enum_map_open,
4048         .read           = seq_read,
4049         .llseek         = seq_lseek,
4050         .release        = seq_release,
4051 };
4052
4053 static inline union trace_enum_map_item *
4054 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4055 {
4056         /* Return tail of array given the head */
4057         return ptr + ptr->head.length + 1;
4058 }
4059
4060 static void
4061 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4062                            int len)
4063 {
4064         struct trace_enum_map **stop;
4065         struct trace_enum_map **map;
4066         union trace_enum_map_item *map_array;
4067         union trace_enum_map_item *ptr;
4068
4069         stop = start + len;
4070
4071         /*
4072          * The trace_enum_maps contains the map plus a head and tail item,
4073          * where the head holds the module and length of array, and the
4074          * tail holds a pointer to the next list.
4075          */
4076         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4077         if (!map_array) {
4078                 pr_warning("Unable to allocate trace enum mapping\n");
4079                 return;
4080         }
4081
4082         mutex_lock(&trace_enum_mutex);
4083
4084         if (!trace_enum_maps)
4085                 trace_enum_maps = map_array;
4086         else {
4087                 ptr = trace_enum_maps;
4088                 for (;;) {
4089                         ptr = trace_enum_jmp_to_tail(ptr);
4090                         if (!ptr->tail.next)
4091                                 break;
4092                         ptr = ptr->tail.next;
4093
4094                 }
4095                 ptr->tail.next = map_array;
4096         }
4097         map_array->head.mod = mod;
4098         map_array->head.length = len;
4099         map_array++;
4100
4101         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4102                 map_array->map = **map;
4103                 map_array++;
4104         }
4105         memset(map_array, 0, sizeof(*map_array));
4106
4107         mutex_unlock(&trace_enum_mutex);
4108 }
4109
4110 static void trace_create_enum_file(struct dentry *d_tracer)
4111 {
4112         trace_create_file("enum_map", 0444, d_tracer,
4113                           NULL, &tracing_enum_map_fops);
4114 }
4115
4116 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4117 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4118 static inline void trace_insert_enum_map_file(struct module *mod,
4119                               struct trace_enum_map **start, int len) { }
4120 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4121
4122 static void trace_insert_enum_map(struct module *mod,
4123                                   struct trace_enum_map **start, int len)
4124 {
4125         struct trace_enum_map **map;
4126
4127         if (len <= 0)
4128                 return;
4129
4130         map = start;
4131
4132         trace_event_enum_update(map, len);
4133
4134         trace_insert_enum_map_file(mod, start, len);
4135 }
4136
4137 static ssize_t
4138 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4139                        size_t cnt, loff_t *ppos)
4140 {
4141         struct trace_array *tr = filp->private_data;
4142         char buf[MAX_TRACER_SIZE+2];
4143         int r;
4144
4145         mutex_lock(&trace_types_lock);
4146         r = sprintf(buf, "%s\n", tr->current_trace->name);
4147         mutex_unlock(&trace_types_lock);
4148
4149         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4150 }
4151
4152 int tracer_init(struct tracer *t, struct trace_array *tr)
4153 {
4154         tracing_reset_online_cpus(&tr->trace_buffer);
4155         return t->init(tr);
4156 }
4157
4158 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4159 {
4160         int cpu;
4161
4162         for_each_tracing_cpu(cpu)
4163                 per_cpu_ptr(buf->data, cpu)->entries = val;
4164 }
4165
4166 #ifdef CONFIG_TRACER_MAX_TRACE
4167 /* resize @tr's buffer to the size of @size_tr's entries */
4168 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4169                                         struct trace_buffer *size_buf, int cpu_id)
4170 {
4171         int cpu, ret = 0;
4172
4173         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4174                 for_each_tracing_cpu(cpu) {
4175                         ret = ring_buffer_resize(trace_buf->buffer,
4176                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4177                         if (ret < 0)
4178                                 break;
4179                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4180                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4181                 }
4182         } else {
4183                 ret = ring_buffer_resize(trace_buf->buffer,
4184                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4185                 if (ret == 0)
4186                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4187                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4188         }
4189
4190         return ret;
4191 }
4192 #endif /* CONFIG_TRACER_MAX_TRACE */
4193
4194 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4195                                         unsigned long size, int cpu)
4196 {
4197         int ret;
4198
4199         /*
4200          * If kernel or user changes the size of the ring buffer
4201          * we use the size that was given, and we can forget about
4202          * expanding it later.
4203          */
4204         ring_buffer_expanded = true;
4205
4206         /* May be called before buffers are initialized */
4207         if (!tr->trace_buffer.buffer)
4208                 return 0;
4209
4210         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4211         if (ret < 0)
4212                 return ret;
4213
4214 #ifdef CONFIG_TRACER_MAX_TRACE
4215         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4216             !tr->current_trace->use_max_tr)
4217                 goto out;
4218
4219         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4220         if (ret < 0) {
4221                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4222                                                      &tr->trace_buffer, cpu);
4223                 if (r < 0) {
4224                         /*
4225                          * AARGH! We are left with different
4226                          * size max buffer!!!!
4227                          * The max buffer is our "snapshot" buffer.
4228                          * When a tracer needs a snapshot (one of the
4229                          * latency tracers), it swaps the max buffer
4230                          * with the saved snap shot. We succeeded to
4231                          * update the size of the main buffer, but failed to
4232                          * update the size of the max buffer. But when we tried
4233                          * to reset the main buffer to the original size, we
4234                          * failed there too. This is very unlikely to
4235                          * happen, but if it does, warn and kill all
4236                          * tracing.
4237                          */
4238                         WARN_ON(1);
4239                         tracing_disabled = 1;
4240                 }
4241                 return ret;
4242         }
4243
4244         if (cpu == RING_BUFFER_ALL_CPUS)
4245                 set_buffer_entries(&tr->max_buffer, size);
4246         else
4247                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4248
4249  out:
4250 #endif /* CONFIG_TRACER_MAX_TRACE */
4251
4252         if (cpu == RING_BUFFER_ALL_CPUS)
4253                 set_buffer_entries(&tr->trace_buffer, size);
4254         else
4255                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4256
4257         return ret;
4258 }
4259
4260 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4261                                           unsigned long size, int cpu_id)
4262 {
4263         int ret = size;
4264
4265         mutex_lock(&trace_types_lock);
4266
4267         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4268                 /* make sure, this cpu is enabled in the mask */
4269                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4270                         ret = -EINVAL;
4271                         goto out;
4272                 }
4273         }
4274
4275         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4276         if (ret < 0)
4277                 ret = -ENOMEM;
4278
4279 out:
4280         mutex_unlock(&trace_types_lock);
4281
4282         return ret;
4283 }
4284
4285
4286 /**
4287  * tracing_update_buffers - used by tracing facility to expand ring buffers
4288  *
4289  * To save on memory when the tracing is never used on a system with it
4290  * configured in. The ring buffers are set to a minimum size. But once
4291  * a user starts to use the tracing facility, then they need to grow
4292  * to their default size.
4293  *
4294  * This function is to be called when a tracer is about to be used.
4295  */
4296 int tracing_update_buffers(void)
4297 {
4298         int ret = 0;
4299
4300         mutex_lock(&trace_types_lock);
4301         if (!ring_buffer_expanded)
4302                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4303                                                 RING_BUFFER_ALL_CPUS);
4304         mutex_unlock(&trace_types_lock);
4305
4306         return ret;
4307 }
4308
4309 struct trace_option_dentry;
4310
4311 static void
4312 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4313
4314 /*
4315  * Used to clear out the tracer before deletion of an instance.
4316  * Must have trace_types_lock held.
4317  */
4318 static void tracing_set_nop(struct trace_array *tr)
4319 {
4320         if (tr->current_trace == &nop_trace)
4321                 return;
4322         
4323         tr->current_trace->enabled--;
4324
4325         if (tr->current_trace->reset)
4326                 tr->current_trace->reset(tr);
4327
4328         tr->current_trace = &nop_trace;
4329 }
4330
4331 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4332 {
4333         /* Only enable if the directory has been created already. */
4334         if (!tr->dir)
4335                 return;
4336
4337         create_trace_option_files(tr, t);
4338 }
4339
4340 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4341 {
4342         struct tracer *t;
4343 #ifdef CONFIG_TRACER_MAX_TRACE
4344         bool had_max_tr;
4345 #endif
4346         int ret = 0;
4347
4348         mutex_lock(&trace_types_lock);
4349
4350         if (!ring_buffer_expanded) {
4351                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4352                                                 RING_BUFFER_ALL_CPUS);
4353                 if (ret < 0)
4354                         goto out;
4355                 ret = 0;
4356         }
4357
4358         for (t = trace_types; t; t = t->next) {
4359                 if (strcmp(t->name, buf) == 0)
4360                         break;
4361         }
4362         if (!t) {
4363                 ret = -EINVAL;
4364                 goto out;
4365         }
4366         if (t == tr->current_trace)
4367                 goto out;
4368
4369         /* Some tracers are only allowed for the top level buffer */
4370         if (!trace_ok_for_array(t, tr)) {
4371                 ret = -EINVAL;
4372                 goto out;
4373         }
4374
4375         /* If trace pipe files are being read, we can't change the tracer */
4376         if (tr->current_trace->ref) {
4377                 ret = -EBUSY;
4378                 goto out;
4379         }
4380
4381         trace_branch_disable();
4382
4383         tr->current_trace->enabled--;
4384
4385         if (tr->current_trace->reset)
4386                 tr->current_trace->reset(tr);
4387
4388         /* Current trace needs to be nop_trace before synchronize_sched */
4389         tr->current_trace = &nop_trace;
4390
4391 #ifdef CONFIG_TRACER_MAX_TRACE
4392         had_max_tr = tr->allocated_snapshot;
4393
4394         if (had_max_tr && !t->use_max_tr) {
4395                 /*
4396                  * We need to make sure that the update_max_tr sees that
4397                  * current_trace changed to nop_trace to keep it from
4398                  * swapping the buffers after we resize it.
4399                  * The update_max_tr is called from interrupts disabled
4400                  * so a synchronized_sched() is sufficient.
4401                  */
4402                 synchronize_sched();
4403                 free_snapshot(tr);
4404         }
4405 #endif
4406
4407 #ifdef CONFIG_TRACER_MAX_TRACE
4408         if (t->use_max_tr && !had_max_tr) {
4409                 ret = alloc_snapshot(tr);
4410                 if (ret < 0)
4411                         goto out;
4412         }
4413 #endif
4414
4415         if (t->init) {
4416                 ret = tracer_init(t, tr);
4417                 if (ret)
4418                         goto out;
4419         }
4420
4421         tr->current_trace = t;
4422         tr->current_trace->enabled++;
4423         trace_branch_enable(tr);
4424  out:
4425         mutex_unlock(&trace_types_lock);
4426
4427         return ret;
4428 }
4429
4430 static ssize_t
4431 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4432                         size_t cnt, loff_t *ppos)
4433 {
4434         struct trace_array *tr = filp->private_data;
4435         char buf[MAX_TRACER_SIZE+1];
4436         int i;
4437         size_t ret;
4438         int err;
4439
4440         ret = cnt;
4441
4442         if (cnt > MAX_TRACER_SIZE)
4443                 cnt = MAX_TRACER_SIZE;
4444
4445         if (copy_from_user(&buf, ubuf, cnt))
4446                 return -EFAULT;
4447
4448         buf[cnt] = 0;
4449
4450         /* strip ending whitespace. */
4451         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4452                 buf[i] = 0;
4453
4454         err = tracing_set_tracer(tr, buf);
4455         if (err)
4456                 return err;
4457
4458         *ppos += ret;
4459
4460         return ret;
4461 }
4462
4463 static ssize_t
4464 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4465                    size_t cnt, loff_t *ppos)
4466 {
4467         char buf[64];
4468         int r;
4469
4470         r = snprintf(buf, sizeof(buf), "%ld\n",
4471                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4472         if (r > sizeof(buf))
4473                 r = sizeof(buf);
4474         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4475 }
4476
4477 static ssize_t
4478 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4479                     size_t cnt, loff_t *ppos)
4480 {
4481         unsigned long val;
4482         int ret;
4483
4484         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4485         if (ret)
4486                 return ret;
4487
4488         *ptr = val * 1000;
4489
4490         return cnt;
4491 }
4492
4493 static ssize_t
4494 tracing_thresh_read(struct file *filp, char __user *ubuf,
4495                     size_t cnt, loff_t *ppos)
4496 {
4497         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4498 }
4499
4500 static ssize_t
4501 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4502                      size_t cnt, loff_t *ppos)
4503 {
4504         struct trace_array *tr = filp->private_data;
4505         int ret;
4506
4507         mutex_lock(&trace_types_lock);
4508         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4509         if (ret < 0)
4510                 goto out;
4511
4512         if (tr->current_trace->update_thresh) {
4513                 ret = tr->current_trace->update_thresh(tr);
4514                 if (ret < 0)
4515                         goto out;
4516         }
4517
4518         ret = cnt;
4519 out:
4520         mutex_unlock(&trace_types_lock);
4521
4522         return ret;
4523 }
4524
4525 static ssize_t
4526 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4527                      size_t cnt, loff_t *ppos)
4528 {
4529         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4530 }
4531
4532 static ssize_t
4533 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4534                       size_t cnt, loff_t *ppos)
4535 {
4536         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4537 }
4538
4539 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4540 {
4541         struct trace_array *tr = inode->i_private;
4542         struct trace_iterator *iter;
4543         int ret = 0;
4544
4545         if (tracing_disabled)
4546                 return -ENODEV;
4547
4548         if (trace_array_get(tr) < 0)
4549                 return -ENODEV;
4550
4551         mutex_lock(&trace_types_lock);
4552
4553         /* create a buffer to store the information to pass to userspace */
4554         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4555         if (!iter) {
4556                 ret = -ENOMEM;
4557                 __trace_array_put(tr);
4558                 goto out;
4559         }
4560
4561         trace_seq_init(&iter->seq);
4562         iter->trace = tr->current_trace;
4563
4564         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4565                 ret = -ENOMEM;
4566                 goto fail;
4567         }
4568
4569         /* trace pipe does not show start of buffer */
4570         cpumask_setall(iter->started);
4571
4572         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4573                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4574
4575         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4576         if (trace_clocks[tr->clock_id].in_ns)
4577                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4578
4579         iter->tr = tr;
4580         iter->trace_buffer = &tr->trace_buffer;
4581         iter->cpu_file = tracing_get_cpu(inode);
4582         mutex_init(&iter->mutex);
4583         filp->private_data = iter;
4584
4585         if (iter->trace->pipe_open)
4586                 iter->trace->pipe_open(iter);
4587
4588         nonseekable_open(inode, filp);
4589
4590         tr->current_trace->ref++;
4591 out:
4592         mutex_unlock(&trace_types_lock);
4593         return ret;
4594
4595 fail:
4596         kfree(iter->trace);
4597         kfree(iter);
4598         __trace_array_put(tr);
4599         mutex_unlock(&trace_types_lock);
4600         return ret;
4601 }
4602
4603 static int tracing_release_pipe(struct inode *inode, struct file *file)
4604 {
4605         struct trace_iterator *iter = file->private_data;
4606         struct trace_array *tr = inode->i_private;
4607
4608         mutex_lock(&trace_types_lock);
4609
4610         tr->current_trace->ref--;
4611
4612         if (iter->trace->pipe_close)
4613                 iter->trace->pipe_close(iter);
4614
4615         mutex_unlock(&trace_types_lock);
4616
4617         free_cpumask_var(iter->started);
4618         mutex_destroy(&iter->mutex);
4619         kfree(iter);
4620
4621         trace_array_put(tr);
4622
4623         return 0;
4624 }
4625
4626 static unsigned int
4627 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4628 {
4629         struct trace_array *tr = iter->tr;
4630
4631         /* Iterators are static, they should be filled or empty */
4632         if (trace_buffer_iter(iter, iter->cpu_file))
4633                 return POLLIN | POLLRDNORM;
4634
4635         if (tr->trace_flags & TRACE_ITER_BLOCK)
4636                 /*
4637                  * Always select as readable when in blocking mode
4638                  */
4639                 return POLLIN | POLLRDNORM;
4640         else
4641                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4642                                              filp, poll_table);
4643 }
4644
4645 static unsigned int
4646 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4647 {
4648         struct trace_iterator *iter = filp->private_data;
4649
4650         return trace_poll(iter, filp, poll_table);
4651 }
4652
4653 /* Must be called with iter->mutex held. */
4654 static int tracing_wait_pipe(struct file *filp)
4655 {
4656         struct trace_iterator *iter = filp->private_data;
4657         int ret;
4658
4659         while (trace_empty(iter)) {
4660
4661                 if ((filp->f_flags & O_NONBLOCK)) {
4662                         return -EAGAIN;
4663                 }
4664
4665                 /*
4666                  * We block until we read something and tracing is disabled.
4667                  * We still block if tracing is disabled, but we have never
4668                  * read anything. This allows a user to cat this file, and
4669                  * then enable tracing. But after we have read something,
4670                  * we give an EOF when tracing is again disabled.
4671                  *
4672                  * iter->pos will be 0 if we haven't read anything.
4673                  */
4674                 if (!tracing_is_on() && iter->pos)
4675                         break;
4676
4677                 mutex_unlock(&iter->mutex);
4678
4679                 ret = wait_on_pipe(iter, false);
4680
4681                 mutex_lock(&iter->mutex);
4682
4683                 if (ret)
4684                         return ret;
4685         }
4686
4687         return 1;
4688 }
4689
4690 /*
4691  * Consumer reader.
4692  */
4693 static ssize_t
4694 tracing_read_pipe(struct file *filp, char __user *ubuf,
4695                   size_t cnt, loff_t *ppos)
4696 {
4697         struct trace_iterator *iter = filp->private_data;
4698         ssize_t sret;
4699
4700         /* return any leftover data */
4701         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4702         if (sret != -EBUSY)
4703                 return sret;
4704
4705         trace_seq_init(&iter->seq);
4706
4707         /*
4708          * Avoid more than one consumer on a single file descriptor
4709          * This is just a matter of traces coherency, the ring buffer itself
4710          * is protected.
4711          */
4712         mutex_lock(&iter->mutex);
4713         if (iter->trace->read) {
4714                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4715                 if (sret)
4716                         goto out;
4717         }
4718
4719 waitagain:
4720         sret = tracing_wait_pipe(filp);
4721         if (sret <= 0)
4722                 goto out;
4723
4724         /* stop when tracing is finished */
4725         if (trace_empty(iter)) {
4726                 sret = 0;
4727                 goto out;
4728         }
4729
4730         if (cnt >= PAGE_SIZE)
4731                 cnt = PAGE_SIZE - 1;
4732
4733         /* reset all but tr, trace, and overruns */
4734         memset(&iter->seq, 0,
4735                sizeof(struct trace_iterator) -
4736                offsetof(struct trace_iterator, seq));
4737         cpumask_clear(iter->started);
4738         iter->pos = -1;
4739
4740         trace_event_read_lock();
4741         trace_access_lock(iter->cpu_file);
4742         while (trace_find_next_entry_inc(iter) != NULL) {
4743                 enum print_line_t ret;
4744                 int save_len = iter->seq.seq.len;
4745
4746                 ret = print_trace_line(iter);
4747                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4748                         /* don't print partial lines */
4749                         iter->seq.seq.len = save_len;
4750                         break;
4751                 }
4752                 if (ret != TRACE_TYPE_NO_CONSUME)
4753                         trace_consume(iter);
4754
4755                 if (trace_seq_used(&iter->seq) >= cnt)
4756                         break;
4757
4758                 /*
4759                  * Setting the full flag means we reached the trace_seq buffer
4760                  * size and we should leave by partial output condition above.
4761                  * One of the trace_seq_* functions is not used properly.
4762                  */
4763                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4764                           iter->ent->type);
4765         }
4766         trace_access_unlock(iter->cpu_file);
4767         trace_event_read_unlock();
4768
4769         /* Now copy what we have to the user */
4770         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4771         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4772                 trace_seq_init(&iter->seq);
4773
4774         /*
4775          * If there was nothing to send to user, in spite of consuming trace
4776          * entries, go back to wait for more entries.
4777          */
4778         if (sret == -EBUSY)
4779                 goto waitagain;
4780
4781 out:
4782         mutex_unlock(&iter->mutex);
4783
4784         return sret;
4785 }
4786
4787 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4788                                      unsigned int idx)
4789 {
4790         __free_page(spd->pages[idx]);
4791 }
4792
4793 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4794         .can_merge              = 0,
4795         .confirm                = generic_pipe_buf_confirm,
4796         .release                = generic_pipe_buf_release,
4797         .steal                  = generic_pipe_buf_steal,
4798         .get                    = generic_pipe_buf_get,
4799 };
4800
4801 static size_t
4802 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4803 {
4804         size_t count;
4805         int save_len;
4806         int ret;
4807
4808         /* Seq buffer is page-sized, exactly what we need. */
4809         for (;;) {
4810                 save_len = iter->seq.seq.len;
4811                 ret = print_trace_line(iter);
4812
4813                 if (trace_seq_has_overflowed(&iter->seq)) {
4814                         iter->seq.seq.len = save_len;
4815                         break;
4816                 }
4817
4818                 /*
4819                  * This should not be hit, because it should only
4820                  * be set if the iter->seq overflowed. But check it
4821                  * anyway to be safe.
4822                  */
4823                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4824                         iter->seq.seq.len = save_len;
4825                         break;
4826                 }
4827
4828                 count = trace_seq_used(&iter->seq) - save_len;
4829                 if (rem < count) {
4830                         rem = 0;
4831                         iter->seq.seq.len = save_len;
4832                         break;
4833                 }
4834
4835                 if (ret != TRACE_TYPE_NO_CONSUME)
4836                         trace_consume(iter);
4837                 rem -= count;
4838                 if (!trace_find_next_entry_inc(iter))   {
4839                         rem = 0;
4840                         iter->ent = NULL;
4841                         break;
4842                 }
4843         }
4844
4845         return rem;
4846 }
4847
4848 static ssize_t tracing_splice_read_pipe(struct file *filp,
4849                                         loff_t *ppos,
4850                                         struct pipe_inode_info *pipe,
4851                                         size_t len,
4852                                         unsigned int flags)
4853 {
4854         struct page *pages_def[PIPE_DEF_BUFFERS];
4855         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4856         struct trace_iterator *iter = filp->private_data;
4857         struct splice_pipe_desc spd = {
4858                 .pages          = pages_def,
4859                 .partial        = partial_def,
4860                 .nr_pages       = 0, /* This gets updated below. */
4861                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4862                 .flags          = flags,
4863                 .ops            = &tracing_pipe_buf_ops,
4864                 .spd_release    = tracing_spd_release_pipe,
4865         };
4866         ssize_t ret;
4867         size_t rem;
4868         unsigned int i;
4869
4870         if (splice_grow_spd(pipe, &spd))
4871                 return -ENOMEM;
4872
4873         mutex_lock(&iter->mutex);
4874
4875         if (iter->trace->splice_read) {
4876                 ret = iter->trace->splice_read(iter, filp,
4877                                                ppos, pipe, len, flags);
4878                 if (ret)
4879                         goto out_err;
4880         }
4881
4882         ret = tracing_wait_pipe(filp);
4883         if (ret <= 0)
4884                 goto out_err;
4885
4886         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4887                 ret = -EFAULT;
4888                 goto out_err;
4889         }
4890
4891         trace_event_read_lock();
4892         trace_access_lock(iter->cpu_file);
4893
4894         /* Fill as many pages as possible. */
4895         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4896                 spd.pages[i] = alloc_page(GFP_KERNEL);
4897                 if (!spd.pages[i])
4898                         break;
4899
4900                 rem = tracing_fill_pipe_page(rem, iter);
4901
4902                 /* Copy the data into the page, so we can start over. */
4903                 ret = trace_seq_to_buffer(&iter->seq,
4904                                           page_address(spd.pages[i]),
4905                                           trace_seq_used(&iter->seq));
4906                 if (ret < 0) {
4907                         __free_page(spd.pages[i]);
4908                         break;
4909                 }
4910                 spd.partial[i].offset = 0;
4911                 spd.partial[i].len = trace_seq_used(&iter->seq);
4912
4913                 trace_seq_init(&iter->seq);
4914         }
4915
4916         trace_access_unlock(iter->cpu_file);
4917         trace_event_read_unlock();
4918         mutex_unlock(&iter->mutex);
4919
4920         spd.nr_pages = i;
4921
4922         ret = splice_to_pipe(pipe, &spd);
4923 out:
4924         splice_shrink_spd(&spd);
4925         return ret;
4926
4927 out_err:
4928         mutex_unlock(&iter->mutex);
4929         goto out;
4930 }
4931
4932 static ssize_t
4933 tracing_entries_read(struct file *filp, char __user *ubuf,
4934                      size_t cnt, loff_t *ppos)
4935 {
4936         struct inode *inode = file_inode(filp);
4937         struct trace_array *tr = inode->i_private;
4938         int cpu = tracing_get_cpu(inode);
4939         char buf[64];
4940         int r = 0;
4941         ssize_t ret;
4942
4943         mutex_lock(&trace_types_lock);
4944
4945         if (cpu == RING_BUFFER_ALL_CPUS) {
4946                 int cpu, buf_size_same;
4947                 unsigned long size;
4948
4949                 size = 0;
4950                 buf_size_same = 1;
4951                 /* check if all cpu sizes are same */
4952                 for_each_tracing_cpu(cpu) {
4953                         /* fill in the size from first enabled cpu */
4954                         if (size == 0)
4955                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4956                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4957                                 buf_size_same = 0;
4958                                 break;
4959                         }
4960                 }
4961
4962                 if (buf_size_same) {
4963                         if (!ring_buffer_expanded)
4964                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4965                                             size >> 10,
4966                                             trace_buf_size >> 10);
4967                         else
4968                                 r = sprintf(buf, "%lu\n", size >> 10);
4969                 } else
4970                         r = sprintf(buf, "X\n");
4971         } else
4972                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4973
4974         mutex_unlock(&trace_types_lock);
4975
4976         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4977         return ret;
4978 }
4979
4980 static ssize_t
4981 tracing_entries_write(struct file *filp, const char __user *ubuf,
4982                       size_t cnt, loff_t *ppos)
4983 {
4984         struct inode *inode = file_inode(filp);
4985         struct trace_array *tr = inode->i_private;
4986         unsigned long val;
4987         int ret;
4988
4989         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4990         if (ret)
4991                 return ret;
4992
4993         /* must have at least 1 entry */
4994         if (!val)
4995                 return -EINVAL;
4996
4997         /* value is in KB */
4998         val <<= 10;
4999         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5000         if (ret < 0)
5001                 return ret;
5002
5003         *ppos += cnt;
5004
5005         return cnt;
5006 }
5007
5008 static ssize_t
5009 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5010                                 size_t cnt, loff_t *ppos)
5011 {
5012         struct trace_array *tr = filp->private_data;
5013         char buf[64];
5014         int r, cpu;
5015         unsigned long size = 0, expanded_size = 0;
5016
5017         mutex_lock(&trace_types_lock);
5018         for_each_tracing_cpu(cpu) {
5019                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5020                 if (!ring_buffer_expanded)
5021                         expanded_size += trace_buf_size >> 10;
5022         }
5023         if (ring_buffer_expanded)
5024                 r = sprintf(buf, "%lu\n", size);
5025         else
5026                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5027         mutex_unlock(&trace_types_lock);
5028
5029         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5030 }
5031
5032 static ssize_t
5033 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5034                           size_t cnt, loff_t *ppos)
5035 {
5036         /*
5037          * There is no need to read what the user has written, this function
5038          * is just to make sure that there is no error when "echo" is used
5039          */
5040
5041         *ppos += cnt;
5042
5043         return cnt;
5044 }
5045
5046 static int
5047 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5048 {
5049         struct trace_array *tr = inode->i_private;
5050
5051         /* disable tracing ? */
5052         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5053                 tracer_tracing_off(tr);
5054         /* resize the ring buffer to 0 */
5055         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5056
5057         trace_array_put(tr);
5058
5059         return 0;
5060 }
5061
5062 static ssize_t
5063 tracing_mark_write(struct file *filp, const char __user *ubuf,
5064                                         size_t cnt, loff_t *fpos)
5065 {
5066         unsigned long addr = (unsigned long)ubuf;
5067         struct trace_array *tr = filp->private_data;
5068         struct ring_buffer_event *event;
5069         struct ring_buffer *buffer;
5070         struct print_entry *entry;
5071         unsigned long irq_flags;
5072         struct page *pages[2];
5073         void *map_page[2];
5074         int nr_pages = 1;
5075         ssize_t written;
5076         int offset;
5077         int size;
5078         int len;
5079         int ret;
5080         int i;
5081
5082         if (tracing_disabled)
5083                 return -EINVAL;
5084
5085         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5086                 return -EINVAL;
5087
5088         if (cnt > TRACE_BUF_SIZE)
5089                 cnt = TRACE_BUF_SIZE;
5090
5091         /*
5092          * Userspace is injecting traces into the kernel trace buffer.
5093          * We want to be as non intrusive as possible.
5094          * To do so, we do not want to allocate any special buffers
5095          * or take any locks, but instead write the userspace data
5096          * straight into the ring buffer.
5097          *
5098          * First we need to pin the userspace buffer into memory,
5099          * which, most likely it is, because it just referenced it.
5100          * But there's no guarantee that it is. By using get_user_pages_fast()
5101          * and kmap_atomic/kunmap_atomic() we can get access to the
5102          * pages directly. We then write the data directly into the
5103          * ring buffer.
5104          */
5105         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5106
5107         /* check if we cross pages */
5108         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5109                 nr_pages = 2;
5110
5111         offset = addr & (PAGE_SIZE - 1);
5112         addr &= PAGE_MASK;
5113
5114         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5115         if (ret < nr_pages) {
5116                 while (--ret >= 0)
5117                         put_page(pages[ret]);
5118                 written = -EFAULT;
5119                 goto out;
5120         }
5121
5122         for (i = 0; i < nr_pages; i++)
5123                 map_page[i] = kmap_atomic(pages[i]);
5124
5125         local_save_flags(irq_flags);
5126         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5127         buffer = tr->trace_buffer.buffer;
5128         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5129                                           irq_flags, preempt_count());
5130         if (!event) {
5131                 /* Ring buffer disabled, return as if not open for write */
5132                 written = -EBADF;
5133                 goto out_unlock;
5134         }
5135
5136         entry = ring_buffer_event_data(event);
5137         entry->ip = _THIS_IP_;
5138
5139         if (nr_pages == 2) {
5140                 len = PAGE_SIZE - offset;
5141                 memcpy(&entry->buf, map_page[0] + offset, len);
5142                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5143         } else
5144                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5145
5146         if (entry->buf[cnt - 1] != '\n') {
5147                 entry->buf[cnt] = '\n';
5148                 entry->buf[cnt + 1] = '\0';
5149         } else
5150                 entry->buf[cnt] = '\0';
5151
5152         __buffer_unlock_commit(buffer, event);
5153
5154         written = cnt;
5155
5156         *fpos += written;
5157
5158  out_unlock:
5159         for (i = nr_pages - 1; i >= 0; i--) {
5160                 kunmap_atomic(map_page[i]);
5161                 put_page(pages[i]);
5162         }
5163  out:
5164         return written;
5165 }
5166
5167 static int tracing_clock_show(struct seq_file *m, void *v)
5168 {
5169         struct trace_array *tr = m->private;
5170         int i;
5171
5172         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5173                 seq_printf(m,
5174                         "%s%s%s%s", i ? " " : "",
5175                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5176                         i == tr->clock_id ? "]" : "");
5177         seq_putc(m, '\n');
5178
5179         return 0;
5180 }
5181
5182 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5183 {
5184         int i;
5185
5186         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5187                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5188                         break;
5189         }
5190         if (i == ARRAY_SIZE(trace_clocks))
5191                 return -EINVAL;
5192
5193         mutex_lock(&trace_types_lock);
5194
5195         tr->clock_id = i;
5196
5197         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5198
5199         /*
5200          * New clock may not be consistent with the previous clock.
5201          * Reset the buffer so that it doesn't have incomparable timestamps.
5202          */
5203         tracing_reset_online_cpus(&tr->trace_buffer);
5204
5205 #ifdef CONFIG_TRACER_MAX_TRACE
5206         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5207                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5208         tracing_reset_online_cpus(&tr->max_buffer);
5209 #endif
5210
5211         mutex_unlock(&trace_types_lock);
5212
5213         return 0;
5214 }
5215
5216 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5217                                    size_t cnt, loff_t *fpos)
5218 {
5219         struct seq_file *m = filp->private_data;
5220         struct trace_array *tr = m->private;
5221         char buf[64];
5222         const char *clockstr;
5223         int ret;
5224
5225         if (cnt >= sizeof(buf))
5226                 return -EINVAL;
5227
5228         if (copy_from_user(&buf, ubuf, cnt))
5229                 return -EFAULT;
5230
5231         buf[cnt] = 0;
5232
5233         clockstr = strstrip(buf);
5234
5235         ret = tracing_set_clock(tr, clockstr);
5236         if (ret)
5237                 return ret;
5238
5239         *fpos += cnt;
5240
5241         return cnt;
5242 }
5243
5244 static int tracing_clock_open(struct inode *inode, struct file *file)
5245 {
5246         struct trace_array *tr = inode->i_private;
5247         int ret;
5248
5249         if (tracing_disabled)
5250                 return -ENODEV;
5251
5252         if (trace_array_get(tr))
5253                 return -ENODEV;
5254
5255         ret = single_open(file, tracing_clock_show, inode->i_private);
5256         if (ret < 0)
5257                 trace_array_put(tr);
5258
5259         return ret;
5260 }
5261
5262 struct ftrace_buffer_info {
5263         struct trace_iterator   iter;
5264         void                    *spare;
5265         unsigned int            read;
5266 };
5267
5268 #ifdef CONFIG_TRACER_SNAPSHOT
5269 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5270 {
5271         struct trace_array *tr = inode->i_private;
5272         struct trace_iterator *iter;
5273         struct seq_file *m;
5274         int ret = 0;
5275
5276         if (trace_array_get(tr) < 0)
5277                 return -ENODEV;
5278
5279         if (file->f_mode & FMODE_READ) {
5280                 iter = __tracing_open(inode, file, true);
5281                 if (IS_ERR(iter))
5282                         ret = PTR_ERR(iter);
5283         } else {
5284                 /* Writes still need the seq_file to hold the private data */
5285                 ret = -ENOMEM;
5286                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5287                 if (!m)
5288                         goto out;
5289                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5290                 if (!iter) {
5291                         kfree(m);
5292                         goto out;
5293                 }
5294                 ret = 0;
5295
5296                 iter->tr = tr;
5297                 iter->trace_buffer = &tr->max_buffer;
5298                 iter->cpu_file = tracing_get_cpu(inode);
5299                 m->private = iter;
5300                 file->private_data = m;
5301         }
5302 out:
5303         if (ret < 0)
5304                 trace_array_put(tr);
5305
5306         return ret;
5307 }
5308
5309 static ssize_t
5310 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5311                        loff_t *ppos)
5312 {
5313         struct seq_file *m = filp->private_data;
5314         struct trace_iterator *iter = m->private;
5315         struct trace_array *tr = iter->tr;
5316         unsigned long val;
5317         int ret;
5318
5319         ret = tracing_update_buffers();
5320         if (ret < 0)
5321                 return ret;
5322
5323         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5324         if (ret)
5325                 return ret;
5326
5327         mutex_lock(&trace_types_lock);
5328
5329         if (tr->current_trace->use_max_tr) {
5330                 ret = -EBUSY;
5331                 goto out;
5332         }
5333
5334         switch (val) {
5335         case 0:
5336                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5337                         ret = -EINVAL;
5338                         break;
5339                 }
5340                 if (tr->allocated_snapshot)
5341                         free_snapshot(tr);
5342                 break;
5343         case 1:
5344 /* Only allow per-cpu swap if the ring buffer supports it */
5345 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5346                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5347                         ret = -EINVAL;
5348                         break;
5349                 }
5350 #endif
5351                 if (!tr->allocated_snapshot) {
5352                         ret = alloc_snapshot(tr);
5353                         if (ret < 0)
5354                                 break;
5355                 }
5356                 local_irq_disable();
5357                 /* Now, we're going to swap */
5358                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5359                         update_max_tr(tr, current, smp_processor_id());
5360                 else
5361                         update_max_tr_single(tr, current, iter->cpu_file);
5362                 local_irq_enable();
5363                 break;
5364         default:
5365                 if (tr->allocated_snapshot) {
5366                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5367                                 tracing_reset_online_cpus(&tr->max_buffer);
5368                         else
5369                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5370                 }
5371                 break;
5372         }
5373
5374         if (ret >= 0) {
5375                 *ppos += cnt;
5376                 ret = cnt;
5377         }
5378 out:
5379         mutex_unlock(&trace_types_lock);
5380         return ret;
5381 }
5382
5383 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5384 {
5385         struct seq_file *m = file->private_data;
5386         int ret;
5387
5388         ret = tracing_release(inode, file);
5389
5390         if (file->f_mode & FMODE_READ)
5391                 return ret;
5392
5393         /* If write only, the seq_file is just a stub */
5394         if (m)
5395                 kfree(m->private);
5396         kfree(m);
5397
5398         return 0;
5399 }
5400
5401 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5402 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5403                                     size_t count, loff_t *ppos);
5404 static int tracing_buffers_release(struct inode *inode, struct file *file);
5405 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5406                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5407
5408 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5409 {
5410         struct ftrace_buffer_info *info;
5411         int ret;
5412
5413         ret = tracing_buffers_open(inode, filp);
5414         if (ret < 0)
5415                 return ret;
5416
5417         info = filp->private_data;
5418
5419         if (info->iter.trace->use_max_tr) {
5420                 tracing_buffers_release(inode, filp);
5421                 return -EBUSY;
5422         }
5423
5424         info->iter.snapshot = true;
5425         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5426
5427         return ret;
5428 }
5429
5430 #endif /* CONFIG_TRACER_SNAPSHOT */
5431
5432
5433 static const struct file_operations tracing_thresh_fops = {
5434         .open           = tracing_open_generic,
5435         .read           = tracing_thresh_read,
5436         .write          = tracing_thresh_write,
5437         .llseek         = generic_file_llseek,
5438 };
5439
5440 static const struct file_operations tracing_max_lat_fops = {
5441         .open           = tracing_open_generic,
5442         .read           = tracing_max_lat_read,
5443         .write          = tracing_max_lat_write,
5444         .llseek         = generic_file_llseek,
5445 };
5446
5447 static const struct file_operations set_tracer_fops = {
5448         .open           = tracing_open_generic,
5449         .read           = tracing_set_trace_read,
5450         .write          = tracing_set_trace_write,
5451         .llseek         = generic_file_llseek,
5452 };
5453
5454 static const struct file_operations tracing_pipe_fops = {
5455         .open           = tracing_open_pipe,
5456         .poll           = tracing_poll_pipe,
5457         .read           = tracing_read_pipe,
5458         .splice_read    = tracing_splice_read_pipe,
5459         .release        = tracing_release_pipe,
5460         .llseek         = no_llseek,
5461 };
5462
5463 static const struct file_operations tracing_entries_fops = {
5464         .open           = tracing_open_generic_tr,
5465         .read           = tracing_entries_read,
5466         .write          = tracing_entries_write,
5467         .llseek         = generic_file_llseek,
5468         .release        = tracing_release_generic_tr,
5469 };
5470
5471 static const struct file_operations tracing_total_entries_fops = {
5472         .open           = tracing_open_generic_tr,
5473         .read           = tracing_total_entries_read,
5474         .llseek         = generic_file_llseek,
5475         .release        = tracing_release_generic_tr,
5476 };
5477
5478 static const struct file_operations tracing_free_buffer_fops = {
5479         .open           = tracing_open_generic_tr,
5480         .write          = tracing_free_buffer_write,
5481         .release        = tracing_free_buffer_release,
5482 };
5483
5484 static const struct file_operations tracing_mark_fops = {
5485         .open           = tracing_open_generic_tr,
5486         .write          = tracing_mark_write,
5487         .llseek         = generic_file_llseek,
5488         .release        = tracing_release_generic_tr,
5489 };
5490
5491 static const struct file_operations trace_clock_fops = {
5492         .open           = tracing_clock_open,
5493         .read           = seq_read,
5494         .llseek         = seq_lseek,
5495         .release        = tracing_single_release_tr,
5496         .write          = tracing_clock_write,
5497 };
5498
5499 #ifdef CONFIG_TRACER_SNAPSHOT
5500 static const struct file_operations snapshot_fops = {
5501         .open           = tracing_snapshot_open,
5502         .read           = seq_read,
5503         .write          = tracing_snapshot_write,
5504         .llseek         = tracing_lseek,
5505         .release        = tracing_snapshot_release,
5506 };
5507
5508 static const struct file_operations snapshot_raw_fops = {
5509         .open           = snapshot_raw_open,
5510         .read           = tracing_buffers_read,
5511         .release        = tracing_buffers_release,
5512         .splice_read    = tracing_buffers_splice_read,
5513         .llseek         = no_llseek,
5514 };
5515
5516 #endif /* CONFIG_TRACER_SNAPSHOT */
5517
5518 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5519 {
5520         struct trace_array *tr = inode->i_private;
5521         struct ftrace_buffer_info *info;
5522         int ret;
5523
5524         if (tracing_disabled)
5525                 return -ENODEV;
5526
5527         if (trace_array_get(tr) < 0)
5528                 return -ENODEV;
5529
5530         info = kzalloc(sizeof(*info), GFP_KERNEL);
5531         if (!info) {
5532                 trace_array_put(tr);
5533                 return -ENOMEM;
5534         }
5535
5536         mutex_lock(&trace_types_lock);
5537
5538         info->iter.tr           = tr;
5539         info->iter.cpu_file     = tracing_get_cpu(inode);
5540         info->iter.trace        = tr->current_trace;
5541         info->iter.trace_buffer = &tr->trace_buffer;
5542         info->spare             = NULL;
5543         /* Force reading ring buffer for first read */
5544         info->read              = (unsigned int)-1;
5545
5546         filp->private_data = info;
5547
5548         tr->current_trace->ref++;
5549
5550         mutex_unlock(&trace_types_lock);
5551
5552         ret = nonseekable_open(inode, filp);
5553         if (ret < 0)
5554                 trace_array_put(tr);
5555
5556         return ret;
5557 }
5558
5559 static unsigned int
5560 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5561 {
5562         struct ftrace_buffer_info *info = filp->private_data;
5563         struct trace_iterator *iter = &info->iter;
5564
5565         return trace_poll(iter, filp, poll_table);
5566 }
5567
5568 static ssize_t
5569 tracing_buffers_read(struct file *filp, char __user *ubuf,
5570                      size_t count, loff_t *ppos)
5571 {
5572         struct ftrace_buffer_info *info = filp->private_data;
5573         struct trace_iterator *iter = &info->iter;
5574         ssize_t ret;
5575         ssize_t size;
5576
5577         if (!count)
5578                 return 0;
5579
5580 #ifdef CONFIG_TRACER_MAX_TRACE
5581         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5582                 return -EBUSY;
5583 #endif
5584
5585         if (!info->spare)
5586                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5587                                                           iter->cpu_file);
5588         if (!info->spare)
5589                 return -ENOMEM;
5590
5591         /* Do we have previous read data to read? */
5592         if (info->read < PAGE_SIZE)
5593                 goto read;
5594
5595  again:
5596         trace_access_lock(iter->cpu_file);
5597         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5598                                     &info->spare,
5599                                     count,
5600                                     iter->cpu_file, 0);
5601         trace_access_unlock(iter->cpu_file);
5602
5603         if (ret < 0) {
5604                 if (trace_empty(iter)) {
5605                         if ((filp->f_flags & O_NONBLOCK))
5606                                 return -EAGAIN;
5607
5608                         ret = wait_on_pipe(iter, false);
5609                         if (ret)
5610                                 return ret;
5611
5612                         goto again;
5613                 }
5614                 return 0;
5615         }
5616
5617         info->read = 0;
5618  read:
5619         size = PAGE_SIZE - info->read;
5620         if (size > count)
5621                 size = count;
5622
5623         ret = copy_to_user(ubuf, info->spare + info->read, size);
5624         if (ret == size)
5625                 return -EFAULT;
5626
5627         size -= ret;
5628
5629         *ppos += size;
5630         info->read += size;
5631
5632         return size;
5633 }
5634
5635 static int tracing_buffers_release(struct inode *inode, struct file *file)
5636 {
5637         struct ftrace_buffer_info *info = file->private_data;
5638         struct trace_iterator *iter = &info->iter;
5639
5640         mutex_lock(&trace_types_lock);
5641
5642         iter->tr->current_trace->ref--;
5643
5644         __trace_array_put(iter->tr);
5645
5646         if (info->spare)
5647                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5648         kfree(info);
5649
5650         mutex_unlock(&trace_types_lock);
5651
5652         return 0;
5653 }
5654
5655 struct buffer_ref {
5656         struct ring_buffer      *buffer;
5657         void                    *page;
5658         int                     ref;
5659 };
5660
5661 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5662                                     struct pipe_buffer *buf)
5663 {
5664         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5665
5666         if (--ref->ref)
5667                 return;
5668
5669         ring_buffer_free_read_page(ref->buffer, ref->page);
5670         kfree(ref);
5671         buf->private = 0;
5672 }
5673
5674 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5675                                 struct pipe_buffer *buf)
5676 {
5677         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5678
5679         ref->ref++;
5680 }
5681
5682 /* Pipe buffer operations for a buffer. */
5683 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5684         .can_merge              = 0,
5685         .confirm                = generic_pipe_buf_confirm,
5686         .release                = buffer_pipe_buf_release,
5687         .steal                  = generic_pipe_buf_steal,
5688         .get                    = buffer_pipe_buf_get,
5689 };
5690
5691 /*
5692  * Callback from splice_to_pipe(), if we need to release some pages
5693  * at the end of the spd in case we error'ed out in filling the pipe.
5694  */
5695 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5696 {
5697         struct buffer_ref *ref =
5698                 (struct buffer_ref *)spd->partial[i].private;
5699
5700         if (--ref->ref)
5701                 return;
5702
5703         ring_buffer_free_read_page(ref->buffer, ref->page);
5704         kfree(ref);
5705         spd->partial[i].private = 0;
5706 }
5707
5708 static ssize_t
5709 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5710                             struct pipe_inode_info *pipe, size_t len,
5711                             unsigned int flags)
5712 {
5713         struct ftrace_buffer_info *info = file->private_data;
5714         struct trace_iterator *iter = &info->iter;
5715         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5716         struct page *pages_def[PIPE_DEF_BUFFERS];
5717         struct splice_pipe_desc spd = {
5718                 .pages          = pages_def,
5719                 .partial        = partial_def,
5720                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5721                 .flags          = flags,
5722                 .ops            = &buffer_pipe_buf_ops,
5723                 .spd_release    = buffer_spd_release,
5724         };
5725         struct buffer_ref *ref;
5726         int entries, size, i;
5727         ssize_t ret = 0;
5728
5729 #ifdef CONFIG_TRACER_MAX_TRACE
5730         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5731                 return -EBUSY;
5732 #endif
5733
5734         if (splice_grow_spd(pipe, &spd))
5735                 return -ENOMEM;
5736
5737         if (*ppos & (PAGE_SIZE - 1))
5738                 return -EINVAL;
5739
5740         if (len & (PAGE_SIZE - 1)) {
5741                 if (len < PAGE_SIZE)
5742                         return -EINVAL;
5743                 len &= PAGE_MASK;
5744         }
5745
5746  again:
5747         trace_access_lock(iter->cpu_file);
5748         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5749
5750         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5751                 struct page *page;
5752                 int r;
5753
5754                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5755                 if (!ref) {
5756                         ret = -ENOMEM;
5757                         break;
5758                 }
5759
5760                 ref->ref = 1;
5761                 ref->buffer = iter->trace_buffer->buffer;
5762                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5763                 if (!ref->page) {
5764                         ret = -ENOMEM;
5765                         kfree(ref);
5766                         break;
5767                 }
5768
5769                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5770                                           len, iter->cpu_file, 1);
5771                 if (r < 0) {
5772                         ring_buffer_free_read_page(ref->buffer, ref->page);
5773                         kfree(ref);
5774                         break;
5775                 }
5776
5777                 /*
5778                  * zero out any left over data, this is going to
5779                  * user land.
5780                  */
5781                 size = ring_buffer_page_len(ref->page);
5782                 if (size < PAGE_SIZE)
5783                         memset(ref->page + size, 0, PAGE_SIZE - size);
5784
5785                 page = virt_to_page(ref->page);
5786
5787                 spd.pages[i] = page;
5788                 spd.partial[i].len = PAGE_SIZE;
5789                 spd.partial[i].offset = 0;
5790                 spd.partial[i].private = (unsigned long)ref;
5791                 spd.nr_pages++;
5792                 *ppos += PAGE_SIZE;
5793
5794                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5795         }
5796
5797         trace_access_unlock(iter->cpu_file);
5798         spd.nr_pages = i;
5799
5800         /* did we read anything? */
5801         if (!spd.nr_pages) {
5802                 if (ret)
5803                         return ret;
5804
5805                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5806                         return -EAGAIN;
5807
5808                 ret = wait_on_pipe(iter, true);
5809                 if (ret)
5810                         return ret;
5811
5812                 goto again;
5813         }
5814
5815         ret = splice_to_pipe(pipe, &spd);
5816         splice_shrink_spd(&spd);
5817
5818         return ret;
5819 }
5820
5821 static const struct file_operations tracing_buffers_fops = {
5822         .open           = tracing_buffers_open,
5823         .read           = tracing_buffers_read,
5824         .poll           = tracing_buffers_poll,
5825         .release        = tracing_buffers_release,
5826         .splice_read    = tracing_buffers_splice_read,
5827         .llseek         = no_llseek,
5828 };
5829
5830 static ssize_t
5831 tracing_stats_read(struct file *filp, char __user *ubuf,
5832                    size_t count, loff_t *ppos)
5833 {
5834         struct inode *inode = file_inode(filp);
5835         struct trace_array *tr = inode->i_private;
5836         struct trace_buffer *trace_buf = &tr->trace_buffer;
5837         int cpu = tracing_get_cpu(inode);
5838         struct trace_seq *s;
5839         unsigned long cnt;
5840         unsigned long long t;
5841         unsigned long usec_rem;
5842
5843         s = kmalloc(sizeof(*s), GFP_KERNEL);
5844         if (!s)
5845                 return -ENOMEM;
5846
5847         trace_seq_init(s);
5848
5849         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5850         trace_seq_printf(s, "entries: %ld\n", cnt);
5851
5852         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5853         trace_seq_printf(s, "overrun: %ld\n", cnt);
5854
5855         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5856         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5857
5858         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5859         trace_seq_printf(s, "bytes: %ld\n", cnt);
5860
5861         if (trace_clocks[tr->clock_id].in_ns) {
5862                 /* local or global for trace_clock */
5863                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5864                 usec_rem = do_div(t, USEC_PER_SEC);
5865                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5866                                                                 t, usec_rem);
5867
5868                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5869                 usec_rem = do_div(t, USEC_PER_SEC);
5870                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5871         } else {
5872                 /* counter or tsc mode for trace_clock */
5873                 trace_seq_printf(s, "oldest event ts: %llu\n",
5874                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5875
5876                 trace_seq_printf(s, "now ts: %llu\n",
5877                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5878         }
5879
5880         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5881         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5882
5883         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5884         trace_seq_printf(s, "read events: %ld\n", cnt);
5885
5886         count = simple_read_from_buffer(ubuf, count, ppos,
5887                                         s->buffer, trace_seq_used(s));
5888
5889         kfree(s);
5890
5891         return count;
5892 }
5893
5894 static const struct file_operations tracing_stats_fops = {
5895         .open           = tracing_open_generic_tr,
5896         .read           = tracing_stats_read,
5897         .llseek         = generic_file_llseek,
5898         .release        = tracing_release_generic_tr,
5899 };
5900
5901 #ifdef CONFIG_DYNAMIC_FTRACE
5902
5903 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5904 {
5905         return 0;
5906 }
5907
5908 static ssize_t
5909 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5910                   size_t cnt, loff_t *ppos)
5911 {
5912         static char ftrace_dyn_info_buffer[1024];
5913         static DEFINE_MUTEX(dyn_info_mutex);
5914         unsigned long *p = filp->private_data;
5915         char *buf = ftrace_dyn_info_buffer;
5916         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5917         int r;
5918
5919         mutex_lock(&dyn_info_mutex);
5920         r = sprintf(buf, "%ld ", *p);
5921
5922         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5923         buf[r++] = '\n';
5924
5925         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5926
5927         mutex_unlock(&dyn_info_mutex);
5928
5929         return r;
5930 }
5931
5932 static const struct file_operations tracing_dyn_info_fops = {
5933         .open           = tracing_open_generic,
5934         .read           = tracing_read_dyn_info,
5935         .llseek         = generic_file_llseek,
5936 };
5937 #endif /* CONFIG_DYNAMIC_FTRACE */
5938
5939 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5940 static void
5941 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5942 {
5943         tracing_snapshot();
5944 }
5945
5946 static void
5947 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5948 {
5949         unsigned long *count = (long *)data;
5950
5951         if (!*count)
5952                 return;
5953
5954         if (*count != -1)
5955                 (*count)--;
5956
5957         tracing_snapshot();
5958 }
5959
5960 static int
5961 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5962                       struct ftrace_probe_ops *ops, void *data)
5963 {
5964         long count = (long)data;
5965
5966         seq_printf(m, "%ps:", (void *)ip);
5967
5968         seq_puts(m, "snapshot");
5969
5970         if (count == -1)
5971                 seq_puts(m, ":unlimited\n");
5972         else
5973                 seq_printf(m, ":count=%ld\n", count);
5974
5975         return 0;
5976 }
5977
5978 static struct ftrace_probe_ops snapshot_probe_ops = {
5979         .func                   = ftrace_snapshot,
5980         .print                  = ftrace_snapshot_print,
5981 };
5982
5983 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5984         .func                   = ftrace_count_snapshot,
5985         .print                  = ftrace_snapshot_print,
5986 };
5987
5988 static int
5989 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5990                                char *glob, char *cmd, char *param, int enable)
5991 {
5992         struct ftrace_probe_ops *ops;
5993         void *count = (void *)-1;
5994         char *number;
5995         int ret;
5996
5997         /* hash funcs only work with set_ftrace_filter */
5998         if (!enable)
5999                 return -EINVAL;
6000
6001         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6002
6003         if (glob[0] == '!') {
6004                 unregister_ftrace_function_probe_func(glob+1, ops);
6005                 return 0;
6006         }
6007
6008         if (!param)
6009                 goto out_reg;
6010
6011         number = strsep(&param, ":");
6012
6013         if (!strlen(number))
6014                 goto out_reg;
6015
6016         /*
6017          * We use the callback data field (which is a pointer)
6018          * as our counter.
6019          */
6020         ret = kstrtoul(number, 0, (unsigned long *)&count);
6021         if (ret)
6022                 return ret;
6023
6024  out_reg:
6025         ret = register_ftrace_function_probe(glob, ops, count);
6026
6027         if (ret >= 0)
6028                 alloc_snapshot(&global_trace);
6029
6030         return ret < 0 ? ret : 0;
6031 }
6032
6033 static struct ftrace_func_command ftrace_snapshot_cmd = {
6034         .name                   = "snapshot",
6035         .func                   = ftrace_trace_snapshot_callback,
6036 };
6037
6038 static __init int register_snapshot_cmd(void)
6039 {
6040         return register_ftrace_command(&ftrace_snapshot_cmd);
6041 }
6042 #else
6043 static inline __init int register_snapshot_cmd(void) { return 0; }
6044 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6045
6046 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6047 {
6048         if (WARN_ON(!tr->dir))
6049                 return ERR_PTR(-ENODEV);
6050
6051         /* Top directory uses NULL as the parent */
6052         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6053                 return NULL;
6054
6055         /* All sub buffers have a descriptor */
6056         return tr->dir;
6057 }
6058
6059 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6060 {
6061         struct dentry *d_tracer;
6062
6063         if (tr->percpu_dir)
6064                 return tr->percpu_dir;
6065
6066         d_tracer = tracing_get_dentry(tr);
6067         if (IS_ERR(d_tracer))
6068                 return NULL;
6069
6070         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6071
6072         WARN_ONCE(!tr->percpu_dir,
6073                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6074
6075         return tr->percpu_dir;
6076 }
6077
6078 static struct dentry *
6079 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6080                       void *data, long cpu, const struct file_operations *fops)
6081 {
6082         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6083
6084         if (ret) /* See tracing_get_cpu() */
6085                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6086         return ret;
6087 }
6088
6089 static void
6090 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6091 {
6092         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6093         struct dentry *d_cpu;
6094         char cpu_dir[30]; /* 30 characters should be more than enough */
6095
6096         if (!d_percpu)
6097                 return;
6098
6099         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6100         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6101         if (!d_cpu) {
6102                 pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
6103                 return;
6104         }
6105
6106         /* per cpu trace_pipe */
6107         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6108                                 tr, cpu, &tracing_pipe_fops);
6109
6110         /* per cpu trace */
6111         trace_create_cpu_file("trace", 0644, d_cpu,
6112                                 tr, cpu, &tracing_fops);
6113
6114         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6115                                 tr, cpu, &tracing_buffers_fops);
6116
6117         trace_create_cpu_file("stats", 0444, d_cpu,
6118                                 tr, cpu, &tracing_stats_fops);
6119
6120         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6121                                 tr, cpu, &tracing_entries_fops);
6122
6123 #ifdef CONFIG_TRACER_SNAPSHOT
6124         trace_create_cpu_file("snapshot", 0644, d_cpu,
6125                                 tr, cpu, &snapshot_fops);
6126
6127         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6128                                 tr, cpu, &snapshot_raw_fops);
6129 #endif
6130 }
6131
6132 #ifdef CONFIG_FTRACE_SELFTEST
6133 /* Let selftest have access to static functions in this file */
6134 #include "trace_selftest.c"
6135 #endif
6136
6137 static ssize_t
6138 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6139                         loff_t *ppos)
6140 {
6141         struct trace_option_dentry *topt = filp->private_data;
6142         char *buf;
6143
6144         if (topt->flags->val & topt->opt->bit)
6145                 buf = "1\n";
6146         else
6147                 buf = "0\n";
6148
6149         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6150 }
6151
6152 static ssize_t
6153 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6154                          loff_t *ppos)
6155 {
6156         struct trace_option_dentry *topt = filp->private_data;
6157         unsigned long val;
6158         int ret;
6159
6160         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6161         if (ret)
6162                 return ret;
6163
6164         if (val != 0 && val != 1)
6165                 return -EINVAL;
6166
6167         if (!!(topt->flags->val & topt->opt->bit) != val) {
6168                 mutex_lock(&trace_types_lock);
6169                 ret = __set_tracer_option(topt->tr, topt->flags,
6170                                           topt->opt, !val);
6171                 mutex_unlock(&trace_types_lock);
6172                 if (ret)
6173                         return ret;
6174         }
6175
6176         *ppos += cnt;
6177
6178         return cnt;
6179 }
6180
6181
6182 static const struct file_operations trace_options_fops = {
6183         .open = tracing_open_generic,
6184         .read = trace_options_read,
6185         .write = trace_options_write,
6186         .llseek = generic_file_llseek,
6187 };
6188
6189 /*
6190  * In order to pass in both the trace_array descriptor as well as the index
6191  * to the flag that the trace option file represents, the trace_array
6192  * has a character array of trace_flags_index[], which holds the index
6193  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6194  * The address of this character array is passed to the flag option file
6195  * read/write callbacks.
6196  *
6197  * In order to extract both the index and the trace_array descriptor,
6198  * get_tr_index() uses the following algorithm.
6199  *
6200  *   idx = *ptr;
6201  *
6202  * As the pointer itself contains the address of the index (remember
6203  * index[1] == 1).
6204  *
6205  * Then to get the trace_array descriptor, by subtracting that index
6206  * from the ptr, we get to the start of the index itself.
6207  *
6208  *   ptr - idx == &index[0]
6209  *
6210  * Then a simple container_of() from that pointer gets us to the
6211  * trace_array descriptor.
6212  */
6213 static void get_tr_index(void *data, struct trace_array **ptr,
6214                          unsigned int *pindex)
6215 {
6216         *pindex = *(unsigned char *)data;
6217
6218         *ptr = container_of(data - *pindex, struct trace_array,
6219                             trace_flags_index);
6220 }
6221
6222 static ssize_t
6223 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6224                         loff_t *ppos)
6225 {
6226         void *tr_index = filp->private_data;
6227         struct trace_array *tr;
6228         unsigned int index;
6229         char *buf;
6230
6231         get_tr_index(tr_index, &tr, &index);
6232
6233         if (tr->trace_flags & (1 << index))
6234                 buf = "1\n";
6235         else
6236                 buf = "0\n";
6237
6238         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6239 }
6240
6241 static ssize_t
6242 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6243                          loff_t *ppos)
6244 {
6245         void *tr_index = filp->private_data;
6246         struct trace_array *tr;
6247         unsigned int index;
6248         unsigned long val;
6249         int ret;
6250
6251         get_tr_index(tr_index, &tr, &index);
6252
6253         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6254         if (ret)
6255                 return ret;
6256
6257         if (val != 0 && val != 1)
6258                 return -EINVAL;
6259
6260         mutex_lock(&trace_types_lock);
6261         ret = set_tracer_flag(tr, 1 << index, val);
6262         mutex_unlock(&trace_types_lock);
6263
6264         if (ret < 0)
6265                 return ret;
6266
6267         *ppos += cnt;
6268
6269         return cnt;
6270 }
6271
6272 static const struct file_operations trace_options_core_fops = {
6273         .open = tracing_open_generic,
6274         .read = trace_options_core_read,
6275         .write = trace_options_core_write,
6276         .llseek = generic_file_llseek,
6277 };
6278
6279 struct dentry *trace_create_file(const char *name,
6280                                  umode_t mode,
6281                                  struct dentry *parent,
6282                                  void *data,
6283                                  const struct file_operations *fops)
6284 {
6285         struct dentry *ret;
6286
6287         ret = tracefs_create_file(name, mode, parent, data, fops);
6288         if (!ret)
6289                 pr_warning("Could not create tracefs '%s' entry\n", name);
6290
6291         return ret;
6292 }
6293
6294
6295 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6296 {
6297         struct dentry *d_tracer;
6298
6299         if (tr->options)
6300                 return tr->options;
6301
6302         d_tracer = tracing_get_dentry(tr);
6303         if (IS_ERR(d_tracer))
6304                 return NULL;
6305
6306         tr->options = tracefs_create_dir("options", d_tracer);
6307         if (!tr->options) {
6308                 pr_warning("Could not create tracefs directory 'options'\n");
6309                 return NULL;
6310         }
6311
6312         return tr->options;
6313 }
6314
6315 static void
6316 create_trace_option_file(struct trace_array *tr,
6317                          struct trace_option_dentry *topt,
6318                          struct tracer_flags *flags,
6319                          struct tracer_opt *opt)
6320 {
6321         struct dentry *t_options;
6322
6323         t_options = trace_options_init_dentry(tr);
6324         if (!t_options)
6325                 return;
6326
6327         topt->flags = flags;
6328         topt->opt = opt;
6329         topt->tr = tr;
6330
6331         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6332                                     &trace_options_fops);
6333
6334 }
6335
6336 static void
6337 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6338 {
6339         struct trace_option_dentry *topts;
6340         struct trace_options *tr_topts;
6341         struct tracer_flags *flags;
6342         struct tracer_opt *opts;
6343         int cnt;
6344         int i;
6345
6346         if (!tracer)
6347                 return;
6348
6349         flags = tracer->flags;
6350
6351         if (!flags || !flags->opts)
6352                 return;
6353
6354         /*
6355          * If this is an instance, only create flags for tracers
6356          * the instance may have.
6357          */
6358         if (!trace_ok_for_array(tracer, tr))
6359                 return;
6360
6361         for (i = 0; i < tr->nr_topts; i++) {
6362                 /*
6363                  * Check if these flags have already been added.
6364                  * Some tracers share flags.
6365                  */
6366                 if (tr->topts[i].tracer->flags == tracer->flags)
6367                         return;
6368         }
6369
6370         opts = flags->opts;
6371
6372         for (cnt = 0; opts[cnt].name; cnt++)
6373                 ;
6374
6375         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6376         if (!topts)
6377                 return;
6378
6379         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6380                             GFP_KERNEL);
6381         if (!tr_topts) {
6382                 kfree(topts);
6383                 return;
6384         }
6385
6386         tr->topts = tr_topts;
6387         tr->topts[tr->nr_topts].tracer = tracer;
6388         tr->topts[tr->nr_topts].topts = topts;
6389         tr->nr_topts++;
6390
6391         for (cnt = 0; opts[cnt].name; cnt++) {
6392                 create_trace_option_file(tr, &topts[cnt], flags,
6393                                          &opts[cnt]);
6394                 WARN_ONCE(topts[cnt].entry == NULL,
6395                           "Failed to create trace option: %s",
6396                           opts[cnt].name);
6397         }
6398 }
6399
6400 static struct dentry *
6401 create_trace_option_core_file(struct trace_array *tr,
6402                               const char *option, long index)
6403 {
6404         struct dentry *t_options;
6405
6406         t_options = trace_options_init_dentry(tr);
6407         if (!t_options)
6408                 return NULL;
6409
6410         return trace_create_file(option, 0644, t_options,
6411                                  (void *)&tr->trace_flags_index[index],
6412                                  &trace_options_core_fops);
6413 }
6414
6415 static void create_trace_options_dir(struct trace_array *tr)
6416 {
6417         struct dentry *t_options;
6418         bool top_level = tr == &global_trace;
6419         int i;
6420
6421         t_options = trace_options_init_dentry(tr);
6422         if (!t_options)
6423                 return;
6424
6425         for (i = 0; trace_options[i]; i++) {
6426                 if (top_level ||
6427                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6428                         create_trace_option_core_file(tr, trace_options[i], i);
6429         }
6430 }
6431
6432 static ssize_t
6433 rb_simple_read(struct file *filp, char __user *ubuf,
6434                size_t cnt, loff_t *ppos)
6435 {
6436         struct trace_array *tr = filp->private_data;
6437         char buf[64];
6438         int r;
6439
6440         r = tracer_tracing_is_on(tr);
6441         r = sprintf(buf, "%d\n", r);
6442
6443         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6444 }
6445
6446 static ssize_t
6447 rb_simple_write(struct file *filp, const char __user *ubuf,
6448                 size_t cnt, loff_t *ppos)
6449 {
6450         struct trace_array *tr = filp->private_data;
6451         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6452         unsigned long val;
6453         int ret;
6454
6455         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6456         if (ret)
6457                 return ret;
6458
6459         if (buffer) {
6460                 mutex_lock(&trace_types_lock);
6461                 if (val) {
6462                         tracer_tracing_on(tr);
6463                         if (tr->current_trace->start)
6464                                 tr->current_trace->start(tr);
6465                 } else {
6466                         tracer_tracing_off(tr);
6467                         if (tr->current_trace->stop)
6468                                 tr->current_trace->stop(tr);
6469                 }
6470                 mutex_unlock(&trace_types_lock);
6471         }
6472
6473         (*ppos)++;
6474
6475         return cnt;
6476 }
6477
6478 static const struct file_operations rb_simple_fops = {
6479         .open           = tracing_open_generic_tr,
6480         .read           = rb_simple_read,
6481         .write          = rb_simple_write,
6482         .release        = tracing_release_generic_tr,
6483         .llseek         = default_llseek,
6484 };
6485
6486 struct dentry *trace_instance_dir;
6487
6488 static void
6489 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6490
6491 static int
6492 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6493 {
6494         enum ring_buffer_flags rb_flags;
6495
6496         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6497
6498         buf->tr = tr;
6499
6500         buf->buffer = ring_buffer_alloc(size, rb_flags);
6501         if (!buf->buffer)
6502                 return -ENOMEM;
6503
6504         buf->data = alloc_percpu(struct trace_array_cpu);
6505         if (!buf->data) {
6506                 ring_buffer_free(buf->buffer);
6507                 return -ENOMEM;
6508         }
6509
6510         /* Allocate the first page for all buffers */
6511         set_buffer_entries(&tr->trace_buffer,
6512                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6513
6514         return 0;
6515 }
6516
6517 static int allocate_trace_buffers(struct trace_array *tr, int size)
6518 {
6519         int ret;
6520
6521         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6522         if (ret)
6523                 return ret;
6524
6525 #ifdef CONFIG_TRACER_MAX_TRACE
6526         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6527                                     allocate_snapshot ? size : 1);
6528         if (WARN_ON(ret)) {
6529                 ring_buffer_free(tr->trace_buffer.buffer);
6530                 free_percpu(tr->trace_buffer.data);
6531                 return -ENOMEM;
6532         }
6533         tr->allocated_snapshot = allocate_snapshot;
6534
6535         /*
6536          * Only the top level trace array gets its snapshot allocated
6537          * from the kernel command line.
6538          */
6539         allocate_snapshot = false;
6540 #endif
6541         return 0;
6542 }
6543
6544 static void free_trace_buffer(struct trace_buffer *buf)
6545 {
6546         if (buf->buffer) {
6547                 ring_buffer_free(buf->buffer);
6548                 buf->buffer = NULL;
6549                 free_percpu(buf->data);
6550                 buf->data = NULL;
6551         }
6552 }
6553
6554 static void free_trace_buffers(struct trace_array *tr)
6555 {
6556         if (!tr)
6557                 return;
6558
6559         free_trace_buffer(&tr->trace_buffer);
6560
6561 #ifdef CONFIG_TRACER_MAX_TRACE
6562         free_trace_buffer(&tr->max_buffer);
6563 #endif
6564 }
6565
6566 static void init_trace_flags_index(struct trace_array *tr)
6567 {
6568         int i;
6569
6570         /* Used by the trace options files */
6571         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6572                 tr->trace_flags_index[i] = i;
6573 }
6574
6575 static void __update_tracer_options(struct trace_array *tr)
6576 {
6577         struct tracer *t;
6578
6579         for (t = trace_types; t; t = t->next)
6580                 add_tracer_options(tr, t);
6581 }
6582
6583 static void update_tracer_options(struct trace_array *tr)
6584 {
6585         mutex_lock(&trace_types_lock);
6586         __update_tracer_options(tr);
6587         mutex_unlock(&trace_types_lock);
6588 }
6589
6590 static int instance_mkdir(const char *name)
6591 {
6592         struct trace_array *tr;
6593         int ret;
6594
6595         mutex_lock(&trace_types_lock);
6596
6597         ret = -EEXIST;
6598         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6599                 if (tr->name && strcmp(tr->name, name) == 0)
6600                         goto out_unlock;
6601         }
6602
6603         ret = -ENOMEM;
6604         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6605         if (!tr)
6606                 goto out_unlock;
6607
6608         tr->name = kstrdup(name, GFP_KERNEL);
6609         if (!tr->name)
6610                 goto out_free_tr;
6611
6612         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6613                 goto out_free_tr;
6614
6615         tr->trace_flags = global_trace.trace_flags;
6616
6617         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6618
6619         raw_spin_lock_init(&tr->start_lock);
6620
6621         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6622
6623         tr->current_trace = &nop_trace;
6624
6625         INIT_LIST_HEAD(&tr->systems);
6626         INIT_LIST_HEAD(&tr->events);
6627
6628         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6629                 goto out_free_tr;
6630
6631         tr->dir = tracefs_create_dir(name, trace_instance_dir);
6632         if (!tr->dir)
6633                 goto out_free_tr;
6634
6635         ret = event_trace_add_tracer(tr->dir, tr);
6636         if (ret) {
6637                 tracefs_remove_recursive(tr->dir);
6638                 goto out_free_tr;
6639         }
6640
6641         init_tracer_tracefs(tr, tr->dir);
6642         init_trace_flags_index(tr);
6643         __update_tracer_options(tr);
6644
6645         list_add(&tr->list, &ftrace_trace_arrays);
6646
6647         mutex_unlock(&trace_types_lock);
6648
6649         return 0;
6650
6651  out_free_tr:
6652         free_trace_buffers(tr);
6653         free_cpumask_var(tr->tracing_cpumask);
6654         kfree(tr->name);
6655         kfree(tr);
6656
6657  out_unlock:
6658         mutex_unlock(&trace_types_lock);
6659
6660         return ret;
6661
6662 }
6663
6664 static int instance_rmdir(const char *name)
6665 {
6666         struct trace_array *tr;
6667         int found = 0;
6668         int ret;
6669         int i;
6670
6671         mutex_lock(&trace_types_lock);
6672
6673         ret = -ENODEV;
6674         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6675                 if (tr->name && strcmp(tr->name, name) == 0) {
6676                         found = 1;
6677                         break;
6678                 }
6679         }
6680         if (!found)
6681                 goto out_unlock;
6682
6683         ret = -EBUSY;
6684         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6685                 goto out_unlock;
6686
6687         list_del(&tr->list);
6688
6689         tracing_set_nop(tr);
6690         event_trace_del_tracer(tr);
6691         ftrace_destroy_function_files(tr);
6692         tracefs_remove_recursive(tr->dir);
6693         free_trace_buffers(tr);
6694
6695         for (i = 0; i < tr->nr_topts; i++) {
6696                 kfree(tr->topts[i].topts);
6697         }
6698         kfree(tr->topts);
6699
6700         kfree(tr->name);
6701         kfree(tr);
6702
6703         ret = 0;
6704
6705  out_unlock:
6706         mutex_unlock(&trace_types_lock);
6707
6708         return ret;
6709 }
6710
6711 static __init void create_trace_instances(struct dentry *d_tracer)
6712 {
6713         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6714                                                          instance_mkdir,
6715                                                          instance_rmdir);
6716         if (WARN_ON(!trace_instance_dir))
6717                 return;
6718 }
6719
6720 static void
6721 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6722 {
6723         int cpu;
6724
6725         trace_create_file("available_tracers", 0444, d_tracer,
6726                         tr, &show_traces_fops);
6727
6728         trace_create_file("current_tracer", 0644, d_tracer,
6729                         tr, &set_tracer_fops);
6730
6731         trace_create_file("tracing_cpumask", 0644, d_tracer,
6732                           tr, &tracing_cpumask_fops);
6733
6734         trace_create_file("trace_options", 0644, d_tracer,
6735                           tr, &tracing_iter_fops);
6736
6737         trace_create_file("trace", 0644, d_tracer,
6738                           tr, &tracing_fops);
6739
6740         trace_create_file("trace_pipe", 0444, d_tracer,
6741                           tr, &tracing_pipe_fops);
6742
6743         trace_create_file("buffer_size_kb", 0644, d_tracer,
6744                           tr, &tracing_entries_fops);
6745
6746         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6747                           tr, &tracing_total_entries_fops);
6748
6749         trace_create_file("free_buffer", 0200, d_tracer,
6750                           tr, &tracing_free_buffer_fops);
6751
6752         trace_create_file("trace_marker", 0220, d_tracer,
6753                           tr, &tracing_mark_fops);
6754
6755         trace_create_file("trace_clock", 0644, d_tracer, tr,
6756                           &trace_clock_fops);
6757
6758         trace_create_file("tracing_on", 0644, d_tracer,
6759                           tr, &rb_simple_fops);
6760
6761         create_trace_options_dir(tr);
6762
6763 #ifdef CONFIG_TRACER_MAX_TRACE
6764         trace_create_file("tracing_max_latency", 0644, d_tracer,
6765                         &tr->max_latency, &tracing_max_lat_fops);
6766 #endif
6767
6768         if (ftrace_create_function_files(tr, d_tracer))
6769                 WARN(1, "Could not allocate function filter files");
6770
6771 #ifdef CONFIG_TRACER_SNAPSHOT
6772         trace_create_file("snapshot", 0644, d_tracer,
6773                           tr, &snapshot_fops);
6774 #endif
6775
6776         for_each_tracing_cpu(cpu)
6777                 tracing_init_tracefs_percpu(tr, cpu);
6778
6779 }
6780
6781 static struct vfsmount *trace_automount(void *ingore)
6782 {
6783         struct vfsmount *mnt;
6784         struct file_system_type *type;
6785
6786         /*
6787          * To maintain backward compatibility for tools that mount
6788          * debugfs to get to the tracing facility, tracefs is automatically
6789          * mounted to the debugfs/tracing directory.
6790          */
6791         type = get_fs_type("tracefs");
6792         if (!type)
6793                 return NULL;
6794         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6795         put_filesystem(type);
6796         if (IS_ERR(mnt))
6797                 return NULL;
6798         mntget(mnt);
6799
6800         return mnt;
6801 }
6802
6803 /**
6804  * tracing_init_dentry - initialize top level trace array
6805  *
6806  * This is called when creating files or directories in the tracing
6807  * directory. It is called via fs_initcall() by any of the boot up code
6808  * and expects to return the dentry of the top level tracing directory.
6809  */
6810 struct dentry *tracing_init_dentry(void)
6811 {
6812         struct trace_array *tr = &global_trace;
6813
6814         /* The top level trace array uses  NULL as parent */
6815         if (tr->dir)
6816                 return NULL;
6817
6818         if (WARN_ON(!debugfs_initialized()))
6819                 return ERR_PTR(-ENODEV);
6820
6821         /*
6822          * As there may still be users that expect the tracing
6823          * files to exist in debugfs/tracing, we must automount
6824          * the tracefs file system there, so older tools still
6825          * work with the newer kerenl.
6826          */
6827         tr->dir = debugfs_create_automount("tracing", NULL,
6828                                            trace_automount, NULL);
6829         if (!tr->dir) {
6830                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
6831                 return ERR_PTR(-ENOMEM);
6832         }
6833
6834         return NULL;
6835 }
6836
6837 extern struct trace_enum_map *__start_ftrace_enum_maps[];
6838 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
6839
6840 static void __init trace_enum_init(void)
6841 {
6842         int len;
6843
6844         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
6845         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
6846 }
6847
6848 #ifdef CONFIG_MODULES
6849 static void trace_module_add_enums(struct module *mod)
6850 {
6851         if (!mod->num_trace_enums)
6852                 return;
6853
6854         /*
6855          * Modules with bad taint do not have events created, do
6856          * not bother with enums either.
6857          */
6858         if (trace_module_has_bad_taint(mod))
6859                 return;
6860
6861         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
6862 }
6863
6864 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
6865 static void trace_module_remove_enums(struct module *mod)
6866 {
6867         union trace_enum_map_item *map;
6868         union trace_enum_map_item **last = &trace_enum_maps;
6869
6870         if (!mod->num_trace_enums)
6871                 return;
6872
6873         mutex_lock(&trace_enum_mutex);
6874
6875         map = trace_enum_maps;
6876
6877         while (map) {
6878                 if (map->head.mod == mod)
6879                         break;
6880                 map = trace_enum_jmp_to_tail(map);
6881                 last = &map->tail.next;
6882                 map = map->tail.next;
6883         }
6884         if (!map)
6885                 goto out;
6886
6887         *last = trace_enum_jmp_to_tail(map)->tail.next;
6888         kfree(map);
6889  out:
6890         mutex_unlock(&trace_enum_mutex);
6891 }
6892 #else
6893 static inline void trace_module_remove_enums(struct module *mod) { }
6894 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
6895
6896 static int trace_module_notify(struct notifier_block *self,
6897                                unsigned long val, void *data)
6898 {
6899         struct module *mod = data;
6900
6901         switch (val) {
6902         case MODULE_STATE_COMING:
6903                 trace_module_add_enums(mod);
6904                 break;
6905         case MODULE_STATE_GOING:
6906                 trace_module_remove_enums(mod);
6907                 break;
6908         }
6909
6910         return 0;
6911 }
6912
6913 static struct notifier_block trace_module_nb = {
6914         .notifier_call = trace_module_notify,
6915         .priority = 0,
6916 };
6917 #endif /* CONFIG_MODULES */
6918
6919 static __init int tracer_init_tracefs(void)
6920 {
6921         struct dentry *d_tracer;
6922
6923         trace_access_lock_init();
6924
6925         d_tracer = tracing_init_dentry();
6926         if (IS_ERR(d_tracer))
6927                 return 0;
6928
6929         init_tracer_tracefs(&global_trace, d_tracer);
6930
6931         trace_create_file("tracing_thresh", 0644, d_tracer,
6932                         &global_trace, &tracing_thresh_fops);
6933
6934         trace_create_file("README", 0444, d_tracer,
6935                         NULL, &tracing_readme_fops);
6936
6937         trace_create_file("saved_cmdlines", 0444, d_tracer,
6938                         NULL, &tracing_saved_cmdlines_fops);
6939
6940         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6941                           NULL, &tracing_saved_cmdlines_size_fops);
6942
6943         trace_enum_init();
6944
6945         trace_create_enum_file(d_tracer);
6946
6947 #ifdef CONFIG_MODULES
6948         register_module_notifier(&trace_module_nb);
6949 #endif
6950
6951 #ifdef CONFIG_DYNAMIC_FTRACE
6952         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6953                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6954 #endif
6955
6956         create_trace_instances(d_tracer);
6957
6958         update_tracer_options(&global_trace);
6959
6960         return 0;
6961 }
6962
6963 static int trace_panic_handler(struct notifier_block *this,
6964                                unsigned long event, void *unused)
6965 {
6966         if (ftrace_dump_on_oops)
6967                 ftrace_dump(ftrace_dump_on_oops);
6968         return NOTIFY_OK;
6969 }
6970
6971 static struct notifier_block trace_panic_notifier = {
6972         .notifier_call  = trace_panic_handler,
6973         .next           = NULL,
6974         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6975 };
6976
6977 static int trace_die_handler(struct notifier_block *self,
6978                              unsigned long val,
6979                              void *data)
6980 {
6981         switch (val) {
6982         case DIE_OOPS:
6983                 if (ftrace_dump_on_oops)
6984                         ftrace_dump(ftrace_dump_on_oops);
6985                 break;
6986         default:
6987                 break;
6988         }
6989         return NOTIFY_OK;
6990 }
6991
6992 static struct notifier_block trace_die_notifier = {
6993         .notifier_call = trace_die_handler,
6994         .priority = 200
6995 };
6996
6997 /*
6998  * printk is set to max of 1024, we really don't need it that big.
6999  * Nothing should be printing 1000 characters anyway.
7000  */
7001 #define TRACE_MAX_PRINT         1000
7002
7003 /*
7004  * Define here KERN_TRACE so that we have one place to modify
7005  * it if we decide to change what log level the ftrace dump
7006  * should be at.
7007  */
7008 #define KERN_TRACE              KERN_EMERG
7009
7010 void
7011 trace_printk_seq(struct trace_seq *s)
7012 {
7013         /* Probably should print a warning here. */
7014         if (s->seq.len >= TRACE_MAX_PRINT)
7015                 s->seq.len = TRACE_MAX_PRINT;
7016
7017         /*
7018          * More paranoid code. Although the buffer size is set to
7019          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7020          * an extra layer of protection.
7021          */
7022         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7023                 s->seq.len = s->seq.size - 1;
7024
7025         /* should be zero ended, but we are paranoid. */
7026         s->buffer[s->seq.len] = 0;
7027
7028         printk(KERN_TRACE "%s", s->buffer);
7029
7030         trace_seq_init(s);
7031 }
7032
7033 void trace_init_global_iter(struct trace_iterator *iter)
7034 {
7035         iter->tr = &global_trace;
7036         iter->trace = iter->tr->current_trace;
7037         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7038         iter->trace_buffer = &global_trace.trace_buffer;
7039
7040         if (iter->trace && iter->trace->open)
7041                 iter->trace->open(iter);
7042
7043         /* Annotate start of buffers if we had overruns */
7044         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7045                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7046
7047         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7048         if (trace_clocks[iter->tr->clock_id].in_ns)
7049                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7050 }
7051
7052 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7053 {
7054         /* use static because iter can be a bit big for the stack */
7055         static struct trace_iterator iter;
7056         static atomic_t dump_running;
7057         struct trace_array *tr = &global_trace;
7058         unsigned int old_userobj;
7059         unsigned long flags;
7060         int cnt = 0, cpu;
7061
7062         /* Only allow one dump user at a time. */
7063         if (atomic_inc_return(&dump_running) != 1) {
7064                 atomic_dec(&dump_running);
7065                 return;
7066         }
7067
7068         /*
7069          * Always turn off tracing when we dump.
7070          * We don't need to show trace output of what happens
7071          * between multiple crashes.
7072          *
7073          * If the user does a sysrq-z, then they can re-enable
7074          * tracing with echo 1 > tracing_on.
7075          */
7076         tracing_off();
7077
7078         local_irq_save(flags);
7079
7080         /* Simulate the iterator */
7081         trace_init_global_iter(&iter);
7082
7083         for_each_tracing_cpu(cpu) {
7084                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7085         }
7086
7087         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7088
7089         /* don't look at user memory in panic mode */
7090         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7091
7092         switch (oops_dump_mode) {
7093         case DUMP_ALL:
7094                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7095                 break;
7096         case DUMP_ORIG:
7097                 iter.cpu_file = raw_smp_processor_id();
7098                 break;
7099         case DUMP_NONE:
7100                 goto out_enable;
7101         default:
7102                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7103                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7104         }
7105
7106         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7107
7108         /* Did function tracer already get disabled? */
7109         if (ftrace_is_dead()) {
7110                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7111                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7112         }
7113
7114         /*
7115          * We need to stop all tracing on all CPUS to read the
7116          * the next buffer. This is a bit expensive, but is
7117          * not done often. We fill all what we can read,
7118          * and then release the locks again.
7119          */
7120
7121         while (!trace_empty(&iter)) {
7122
7123                 if (!cnt)
7124                         printk(KERN_TRACE "---------------------------------\n");
7125
7126                 cnt++;
7127
7128                 /* reset all but tr, trace, and overruns */
7129                 memset(&iter.seq, 0,
7130                        sizeof(struct trace_iterator) -
7131                        offsetof(struct trace_iterator, seq));
7132                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7133                 iter.pos = -1;
7134
7135                 if (trace_find_next_entry_inc(&iter) != NULL) {
7136                         int ret;
7137
7138                         ret = print_trace_line(&iter);
7139                         if (ret != TRACE_TYPE_NO_CONSUME)
7140                                 trace_consume(&iter);
7141                 }
7142                 touch_nmi_watchdog();
7143
7144                 trace_printk_seq(&iter.seq);
7145         }
7146
7147         if (!cnt)
7148                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7149         else
7150                 printk(KERN_TRACE "---------------------------------\n");
7151
7152  out_enable:
7153         tr->trace_flags |= old_userobj;
7154
7155         for_each_tracing_cpu(cpu) {
7156                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7157         }
7158         atomic_dec(&dump_running);
7159         local_irq_restore(flags);
7160 }
7161 EXPORT_SYMBOL_GPL(ftrace_dump);
7162
7163 __init static int tracer_alloc_buffers(void)
7164 {
7165         int ring_buf_size;
7166         int ret = -ENOMEM;
7167
7168         /*
7169          * Make sure we don't accidently add more trace options
7170          * than we have bits for.
7171          */
7172         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7173
7174         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7175                 goto out;
7176
7177         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7178                 goto out_free_buffer_mask;
7179
7180         /* Only allocate trace_printk buffers if a trace_printk exists */
7181         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7182                 /* Must be called before global_trace.buffer is allocated */
7183                 trace_printk_init_buffers();
7184
7185         /* To save memory, keep the ring buffer size to its minimum */
7186         if (ring_buffer_expanded)
7187                 ring_buf_size = trace_buf_size;
7188         else
7189                 ring_buf_size = 1;
7190
7191         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7192         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7193
7194         raw_spin_lock_init(&global_trace.start_lock);
7195
7196         /* Used for event triggers */
7197         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7198         if (!temp_buffer)
7199                 goto out_free_cpumask;
7200
7201         if (trace_create_savedcmd() < 0)
7202                 goto out_free_temp_buffer;
7203
7204         /* TODO: make the number of buffers hot pluggable with CPUS */
7205         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7206                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7207                 WARN_ON(1);
7208                 goto out_free_savedcmd;
7209         }
7210
7211         if (global_trace.buffer_disabled)
7212                 tracing_off();
7213
7214         if (trace_boot_clock) {
7215                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7216                 if (ret < 0)
7217                         pr_warning("Trace clock %s not defined, going back to default\n",
7218                                    trace_boot_clock);
7219         }
7220
7221         /*
7222          * register_tracer() might reference current_trace, so it
7223          * needs to be set before we register anything. This is
7224          * just a bootstrap of current_trace anyway.
7225          */
7226         global_trace.current_trace = &nop_trace;
7227
7228         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7229
7230         ftrace_init_global_array_ops(&global_trace);
7231
7232         init_trace_flags_index(&global_trace);
7233
7234         register_tracer(&nop_trace);
7235
7236         /* All seems OK, enable tracing */
7237         tracing_disabled = 0;
7238
7239         atomic_notifier_chain_register(&panic_notifier_list,
7240                                        &trace_panic_notifier);
7241
7242         register_die_notifier(&trace_die_notifier);
7243
7244         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7245
7246         INIT_LIST_HEAD(&global_trace.systems);
7247         INIT_LIST_HEAD(&global_trace.events);
7248         list_add(&global_trace.list, &ftrace_trace_arrays);
7249
7250         while (trace_boot_options) {
7251                 char *option;
7252
7253                 option = strsep(&trace_boot_options, ",");
7254                 trace_set_options(&global_trace, option);
7255         }
7256
7257         register_snapshot_cmd();
7258
7259         return 0;
7260
7261 out_free_savedcmd:
7262         free_saved_cmdlines_buffer(savedcmd);
7263 out_free_temp_buffer:
7264         ring_buffer_free(temp_buffer);
7265 out_free_cpumask:
7266         free_cpumask_var(global_trace.tracing_cpumask);
7267 out_free_buffer_mask:
7268         free_cpumask_var(tracing_buffer_mask);
7269 out:
7270         return ret;
7271 }
7272
7273 void __init trace_init(void)
7274 {
7275         if (tracepoint_printk) {
7276                 tracepoint_print_iter =
7277                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7278                 if (WARN_ON(!tracepoint_print_iter))
7279                         tracepoint_printk = 0;
7280         }
7281         tracer_alloc_buffers();
7282         trace_event_init();
7283 }
7284
7285 __init static int clear_boot_tracer(void)
7286 {
7287         /*
7288          * The default tracer at boot buffer is an init section.
7289          * This function is called in lateinit. If we did not
7290          * find the boot tracer, then clear it out, to prevent
7291          * later registration from accessing the buffer that is
7292          * about to be freed.
7293          */
7294         if (!default_bootup_tracer)
7295                 return 0;
7296
7297         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7298                default_bootup_tracer);
7299         default_bootup_tracer = NULL;
7300
7301         return 0;
7302 }
7303
7304 fs_initcall(tracer_init_tracefs);
7305 late_initcall(clear_boot_tracer);