Merge tag 'v3.10.3' into linux-linaro-lsk
[firefly-linux-kernel-4.4.55.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88 /*
89  * Kill all tracing for good (never come back).
90  * It is initialized to 1 but will turn to zero if the initialization
91  * of the tracer is successful. But that is the only place that sets
92  * this back to zero.
93  */
94 static int tracing_disabled = 1;
95
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 static int tracing_set_tracer(const char *buf);
119
120 #define MAX_TRACER_SIZE         100
121 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
122 static char *default_bootup_tracer;
123
124 static bool allocate_snapshot;
125
126 static int __init set_cmdline_ftrace(char *str)
127 {
128         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
129         default_bootup_tracer = bootup_tracer_buf;
130         /* We are using ftrace early, expand it */
131         ring_buffer_expanded = true;
132         return 1;
133 }
134 __setup("ftrace=", set_cmdline_ftrace);
135
136 static int __init set_ftrace_dump_on_oops(char *str)
137 {
138         if (*str++ != '=' || !*str) {
139                 ftrace_dump_on_oops = DUMP_ALL;
140                 return 1;
141         }
142
143         if (!strcmp("orig_cpu", str)) {
144                 ftrace_dump_on_oops = DUMP_ORIG;
145                 return 1;
146         }
147
148         return 0;
149 }
150 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
151
152 static int __init boot_alloc_snapshot(char *str)
153 {
154         allocate_snapshot = true;
155         /* We also need the main ring buffer expanded */
156         ring_buffer_expanded = true;
157         return 1;
158 }
159 __setup("alloc_snapshot", boot_alloc_snapshot);
160
161
162 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
163 static char *trace_boot_options __initdata;
164
165 static int __init set_trace_boot_options(char *str)
166 {
167         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
168         trace_boot_options = trace_boot_options_buf;
169         return 0;
170 }
171 __setup("trace_options=", set_trace_boot_options);
172
173 unsigned long long ns2usecs(cycle_t nsec)
174 {
175         nsec += 500;
176         do_div(nsec, 1000);
177         return nsec;
178 }
179
180 /*
181  * The global_trace is the descriptor that holds the tracing
182  * buffers for the live tracing. For each CPU, it contains
183  * a link list of pages that will store trace entries. The
184  * page descriptor of the pages in the memory is used to hold
185  * the link list by linking the lru item in the page descriptor
186  * to each of the pages in the buffer per CPU.
187  *
188  * For each active CPU there is a data field that holds the
189  * pages for the buffer for that CPU. Each CPU has the same number
190  * of pages allocated for its buffer.
191  */
192 static struct trace_array       global_trace;
193
194 LIST_HEAD(ftrace_trace_arrays);
195
196 int trace_array_get(struct trace_array *this_tr)
197 {
198         struct trace_array *tr;
199         int ret = -ENODEV;
200
201         mutex_lock(&trace_types_lock);
202         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
203                 if (tr == this_tr) {
204                         tr->ref++;
205                         ret = 0;
206                         break;
207                 }
208         }
209         mutex_unlock(&trace_types_lock);
210
211         return ret;
212 }
213
214 static void __trace_array_put(struct trace_array *this_tr)
215 {
216         WARN_ON(!this_tr->ref);
217         this_tr->ref--;
218 }
219
220 void trace_array_put(struct trace_array *this_tr)
221 {
222         mutex_lock(&trace_types_lock);
223         __trace_array_put(this_tr);
224         mutex_unlock(&trace_types_lock);
225 }
226
227 int filter_current_check_discard(struct ring_buffer *buffer,
228                                  struct ftrace_event_call *call, void *rec,
229                                  struct ring_buffer_event *event)
230 {
231         return filter_check_discard(call, rec, buffer, event);
232 }
233 EXPORT_SYMBOL_GPL(filter_current_check_discard);
234
235 cycle_t ftrace_now(int cpu)
236 {
237         u64 ts;
238
239         /* Early boot up does not have a buffer yet */
240         if (!global_trace.trace_buffer.buffer)
241                 return trace_clock_local();
242
243         ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu);
244         ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts);
245
246         return ts;
247 }
248
249 int tracing_is_enabled(void)
250 {
251         return tracing_is_on();
252 }
253
254 /*
255  * trace_buf_size is the size in bytes that is allocated
256  * for a buffer. Note, the number of bytes is always rounded
257  * to page size.
258  *
259  * This number is purposely set to a low number of 16384.
260  * If the dump on oops happens, it will be much appreciated
261  * to not have to wait for all that output. Anyway this can be
262  * boot time and run time configurable.
263  */
264 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
265
266 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
267
268 /* trace_types holds a link list of available tracers. */
269 static struct tracer            *trace_types __read_mostly;
270
271 /*
272  * trace_types_lock is used to protect the trace_types list.
273  */
274 DEFINE_MUTEX(trace_types_lock);
275
276 /*
277  * serialize the access of the ring buffer
278  *
279  * ring buffer serializes readers, but it is low level protection.
280  * The validity of the events (which returns by ring_buffer_peek() ..etc)
281  * are not protected by ring buffer.
282  *
283  * The content of events may become garbage if we allow other process consumes
284  * these events concurrently:
285  *   A) the page of the consumed events may become a normal page
286  *      (not reader page) in ring buffer, and this page will be rewrited
287  *      by events producer.
288  *   B) The page of the consumed events may become a page for splice_read,
289  *      and this page will be returned to system.
290  *
291  * These primitives allow multi process access to different cpu ring buffer
292  * concurrently.
293  *
294  * These primitives don't distinguish read-only and read-consume access.
295  * Multi read-only access are also serialized.
296  */
297
298 #ifdef CONFIG_SMP
299 static DECLARE_RWSEM(all_cpu_access_lock);
300 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
301
302 static inline void trace_access_lock(int cpu)
303 {
304         if (cpu == RING_BUFFER_ALL_CPUS) {
305                 /* gain it for accessing the whole ring buffer. */
306                 down_write(&all_cpu_access_lock);
307         } else {
308                 /* gain it for accessing a cpu ring buffer. */
309
310                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
311                 down_read(&all_cpu_access_lock);
312
313                 /* Secondly block other access to this @cpu ring buffer. */
314                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
315         }
316 }
317
318 static inline void trace_access_unlock(int cpu)
319 {
320         if (cpu == RING_BUFFER_ALL_CPUS) {
321                 up_write(&all_cpu_access_lock);
322         } else {
323                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
324                 up_read(&all_cpu_access_lock);
325         }
326 }
327
328 static inline void trace_access_lock_init(void)
329 {
330         int cpu;
331
332         for_each_possible_cpu(cpu)
333                 mutex_init(&per_cpu(cpu_access_lock, cpu));
334 }
335
336 #else
337
338 static DEFINE_MUTEX(access_lock);
339
340 static inline void trace_access_lock(int cpu)
341 {
342         (void)cpu;
343         mutex_lock(&access_lock);
344 }
345
346 static inline void trace_access_unlock(int cpu)
347 {
348         (void)cpu;
349         mutex_unlock(&access_lock);
350 }
351
352 static inline void trace_access_lock_init(void)
353 {
354 }
355
356 #endif
357
358 /* trace_flags holds trace_options default values */
359 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
360         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
361         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
362         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
363
364 /**
365  * tracing_on - enable tracing buffers
366  *
367  * This function enables tracing buffers that may have been
368  * disabled with tracing_off.
369  */
370 void tracing_on(void)
371 {
372         if (global_trace.trace_buffer.buffer)
373                 ring_buffer_record_on(global_trace.trace_buffer.buffer);
374         /*
375          * This flag is only looked at when buffers haven't been
376          * allocated yet. We don't really care about the race
377          * between setting this flag and actually turning
378          * on the buffer.
379          */
380         global_trace.buffer_disabled = 0;
381 }
382 EXPORT_SYMBOL_GPL(tracing_on);
383
384 /**
385  * __trace_puts - write a constant string into the trace buffer.
386  * @ip:    The address of the caller
387  * @str:   The constant string to write
388  * @size:  The size of the string.
389  */
390 int __trace_puts(unsigned long ip, const char *str, int size)
391 {
392         struct ring_buffer_event *event;
393         struct ring_buffer *buffer;
394         struct print_entry *entry;
395         unsigned long irq_flags;
396         int alloc;
397
398         alloc = sizeof(*entry) + size + 2; /* possible \n added */
399
400         local_save_flags(irq_flags);
401         buffer = global_trace.trace_buffer.buffer;
402         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
403                                           irq_flags, preempt_count());
404         if (!event)
405                 return 0;
406
407         entry = ring_buffer_event_data(event);
408         entry->ip = ip;
409
410         memcpy(&entry->buf, str, size);
411
412         /* Add a newline if necessary */
413         if (entry->buf[size - 1] != '\n') {
414                 entry->buf[size] = '\n';
415                 entry->buf[size + 1] = '\0';
416         } else
417                 entry->buf[size] = '\0';
418
419         __buffer_unlock_commit(buffer, event);
420
421         return size;
422 }
423 EXPORT_SYMBOL_GPL(__trace_puts);
424
425 /**
426  * __trace_bputs - write the pointer to a constant string into trace buffer
427  * @ip:    The address of the caller
428  * @str:   The constant string to write to the buffer to
429  */
430 int __trace_bputs(unsigned long ip, const char *str)
431 {
432         struct ring_buffer_event *event;
433         struct ring_buffer *buffer;
434         struct bputs_entry *entry;
435         unsigned long irq_flags;
436         int size = sizeof(struct bputs_entry);
437
438         local_save_flags(irq_flags);
439         buffer = global_trace.trace_buffer.buffer;
440         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
441                                           irq_flags, preempt_count());
442         if (!event)
443                 return 0;
444
445         entry = ring_buffer_event_data(event);
446         entry->ip                       = ip;
447         entry->str                      = str;
448
449         __buffer_unlock_commit(buffer, event);
450
451         return 1;
452 }
453 EXPORT_SYMBOL_GPL(__trace_bputs);
454
455 #ifdef CONFIG_TRACER_SNAPSHOT
456 /**
457  * trace_snapshot - take a snapshot of the current buffer.
458  *
459  * This causes a swap between the snapshot buffer and the current live
460  * tracing buffer. You can use this to take snapshots of the live
461  * trace when some condition is triggered, but continue to trace.
462  *
463  * Note, make sure to allocate the snapshot with either
464  * a tracing_snapshot_alloc(), or by doing it manually
465  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
466  *
467  * If the snapshot buffer is not allocated, it will stop tracing.
468  * Basically making a permanent snapshot.
469  */
470 void tracing_snapshot(void)
471 {
472         struct trace_array *tr = &global_trace;
473         struct tracer *tracer = tr->current_trace;
474         unsigned long flags;
475
476         if (in_nmi()) {
477                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
478                 internal_trace_puts("*** snapshot is being ignored        ***\n");
479                 return;
480         }
481
482         if (!tr->allocated_snapshot) {
483                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
484                 internal_trace_puts("*** stopping trace here!   ***\n");
485                 tracing_off();
486                 return;
487         }
488
489         /* Note, snapshot can not be used when the tracer uses it */
490         if (tracer->use_max_tr) {
491                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
492                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
493                 return;
494         }
495
496         local_irq_save(flags);
497         update_max_tr(tr, current, smp_processor_id());
498         local_irq_restore(flags);
499 }
500 EXPORT_SYMBOL_GPL(tracing_snapshot);
501
502 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
503                                         struct trace_buffer *size_buf, int cpu_id);
504 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
505
506 static int alloc_snapshot(struct trace_array *tr)
507 {
508         int ret;
509
510         if (!tr->allocated_snapshot) {
511
512                 /* allocate spare buffer */
513                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
514                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
515                 if (ret < 0)
516                         return ret;
517
518                 tr->allocated_snapshot = true;
519         }
520
521         return 0;
522 }
523
524 void free_snapshot(struct trace_array *tr)
525 {
526         /*
527          * We don't free the ring buffer. instead, resize it because
528          * The max_tr ring buffer has some state (e.g. ring->clock) and
529          * we want preserve it.
530          */
531         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
532         set_buffer_entries(&tr->max_buffer, 1);
533         tracing_reset_online_cpus(&tr->max_buffer);
534         tr->allocated_snapshot = false;
535 }
536
537 /**
538  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
539  *
540  * This is similar to trace_snapshot(), but it will allocate the
541  * snapshot buffer if it isn't already allocated. Use this only
542  * where it is safe to sleep, as the allocation may sleep.
543  *
544  * This causes a swap between the snapshot buffer and the current live
545  * tracing buffer. You can use this to take snapshots of the live
546  * trace when some condition is triggered, but continue to trace.
547  */
548 void tracing_snapshot_alloc(void)
549 {
550         struct trace_array *tr = &global_trace;
551         int ret;
552
553         ret = alloc_snapshot(tr);
554         if (WARN_ON(ret < 0))
555                 return;
556
557         tracing_snapshot();
558 }
559 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
560 #else
561 void tracing_snapshot(void)
562 {
563         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
564 }
565 EXPORT_SYMBOL_GPL(tracing_snapshot);
566 void tracing_snapshot_alloc(void)
567 {
568         /* Give warning */
569         tracing_snapshot();
570 }
571 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
572 #endif /* CONFIG_TRACER_SNAPSHOT */
573
574 /**
575  * tracing_off - turn off tracing buffers
576  *
577  * This function stops the tracing buffers from recording data.
578  * It does not disable any overhead the tracers themselves may
579  * be causing. This function simply causes all recording to
580  * the ring buffers to fail.
581  */
582 void tracing_off(void)
583 {
584         if (global_trace.trace_buffer.buffer)
585                 ring_buffer_record_off(global_trace.trace_buffer.buffer);
586         /*
587          * This flag is only looked at when buffers haven't been
588          * allocated yet. We don't really care about the race
589          * between setting this flag and actually turning
590          * on the buffer.
591          */
592         global_trace.buffer_disabled = 1;
593 }
594 EXPORT_SYMBOL_GPL(tracing_off);
595
596 /**
597  * tracing_is_on - show state of ring buffers enabled
598  */
599 int tracing_is_on(void)
600 {
601         if (global_trace.trace_buffer.buffer)
602                 return ring_buffer_record_is_on(global_trace.trace_buffer.buffer);
603         return !global_trace.buffer_disabled;
604 }
605 EXPORT_SYMBOL_GPL(tracing_is_on);
606
607 static int __init set_buf_size(char *str)
608 {
609         unsigned long buf_size;
610
611         if (!str)
612                 return 0;
613         buf_size = memparse(str, &str);
614         /* nr_entries can not be zero */
615         if (buf_size == 0)
616                 return 0;
617         trace_buf_size = buf_size;
618         return 1;
619 }
620 __setup("trace_buf_size=", set_buf_size);
621
622 static int __init set_tracing_thresh(char *str)
623 {
624         unsigned long threshold;
625         int ret;
626
627         if (!str)
628                 return 0;
629         ret = kstrtoul(str, 0, &threshold);
630         if (ret < 0)
631                 return 0;
632         tracing_thresh = threshold * 1000;
633         return 1;
634 }
635 __setup("tracing_thresh=", set_tracing_thresh);
636
637 unsigned long nsecs_to_usecs(unsigned long nsecs)
638 {
639         return nsecs / 1000;
640 }
641
642 /* These must match the bit postions in trace_iterator_flags */
643 static const char *trace_options[] = {
644         "print-parent",
645         "sym-offset",
646         "sym-addr",
647         "verbose",
648         "raw",
649         "hex",
650         "bin",
651         "block",
652         "stacktrace",
653         "trace_printk",
654         "ftrace_preempt",
655         "branch",
656         "annotate",
657         "userstacktrace",
658         "sym-userobj",
659         "printk-msg-only",
660         "context-info",
661         "latency-format",
662         "sleep-time",
663         "graph-time",
664         "record-cmd",
665         "overwrite",
666         "disable_on_free",
667         "irq-info",
668         "markers",
669         "function-trace",
670         NULL
671 };
672
673 static struct {
674         u64 (*func)(void);
675         const char *name;
676         int in_ns;              /* is this clock in nanoseconds? */
677 } trace_clocks[] = {
678         { trace_clock_local,    "local",        1 },
679         { trace_clock_global,   "global",       1 },
680         { trace_clock_counter,  "counter",      0 },
681         { trace_clock_jiffies,  "uptime",       1 },
682         { trace_clock,          "perf",         1 },
683         ARCH_TRACE_CLOCKS
684 };
685
686 /*
687  * trace_parser_get_init - gets the buffer for trace parser
688  */
689 int trace_parser_get_init(struct trace_parser *parser, int size)
690 {
691         memset(parser, 0, sizeof(*parser));
692
693         parser->buffer = kmalloc(size, GFP_KERNEL);
694         if (!parser->buffer)
695                 return 1;
696
697         parser->size = size;
698         return 0;
699 }
700
701 /*
702  * trace_parser_put - frees the buffer for trace parser
703  */
704 void trace_parser_put(struct trace_parser *parser)
705 {
706         kfree(parser->buffer);
707 }
708
709 /*
710  * trace_get_user - reads the user input string separated by  space
711  * (matched by isspace(ch))
712  *
713  * For each string found the 'struct trace_parser' is updated,
714  * and the function returns.
715  *
716  * Returns number of bytes read.
717  *
718  * See kernel/trace/trace.h for 'struct trace_parser' details.
719  */
720 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
721         size_t cnt, loff_t *ppos)
722 {
723         char ch;
724         size_t read = 0;
725         ssize_t ret;
726
727         if (!*ppos)
728                 trace_parser_clear(parser);
729
730         ret = get_user(ch, ubuf++);
731         if (ret)
732                 goto out;
733
734         read++;
735         cnt--;
736
737         /*
738          * The parser is not finished with the last write,
739          * continue reading the user input without skipping spaces.
740          */
741         if (!parser->cont) {
742                 /* skip white space */
743                 while (cnt && isspace(ch)) {
744                         ret = get_user(ch, ubuf++);
745                         if (ret)
746                                 goto out;
747                         read++;
748                         cnt--;
749                 }
750
751                 /* only spaces were written */
752                 if (isspace(ch)) {
753                         *ppos += read;
754                         ret = read;
755                         goto out;
756                 }
757
758                 parser->idx = 0;
759         }
760
761         /* read the non-space input */
762         while (cnt && !isspace(ch)) {
763                 if (parser->idx < parser->size - 1)
764                         parser->buffer[parser->idx++] = ch;
765                 else {
766                         ret = -EINVAL;
767                         goto out;
768                 }
769                 ret = get_user(ch, ubuf++);
770                 if (ret)
771                         goto out;
772                 read++;
773                 cnt--;
774         }
775
776         /* We either got finished input or we have to wait for another call. */
777         if (isspace(ch)) {
778                 parser->buffer[parser->idx] = 0;
779                 parser->cont = false;
780         } else {
781                 parser->cont = true;
782                 parser->buffer[parser->idx++] = ch;
783         }
784
785         *ppos += read;
786         ret = read;
787
788 out:
789         return ret;
790 }
791
792 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
793 {
794         int len;
795         int ret;
796
797         if (!cnt)
798                 return 0;
799
800         if (s->len <= s->readpos)
801                 return -EBUSY;
802
803         len = s->len - s->readpos;
804         if (cnt > len)
805                 cnt = len;
806         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
807         if (ret == cnt)
808                 return -EFAULT;
809
810         cnt -= ret;
811
812         s->readpos += cnt;
813         return cnt;
814 }
815
816 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
817 {
818         int len;
819
820         if (s->len <= s->readpos)
821                 return -EBUSY;
822
823         len = s->len - s->readpos;
824         if (cnt > len)
825                 cnt = len;
826         memcpy(buf, s->buffer + s->readpos, cnt);
827
828         s->readpos += cnt;
829         return cnt;
830 }
831
832 /*
833  * ftrace_max_lock is used to protect the swapping of buffers
834  * when taking a max snapshot. The buffers themselves are
835  * protected by per_cpu spinlocks. But the action of the swap
836  * needs its own lock.
837  *
838  * This is defined as a arch_spinlock_t in order to help
839  * with performance when lockdep debugging is enabled.
840  *
841  * It is also used in other places outside the update_max_tr
842  * so it needs to be defined outside of the
843  * CONFIG_TRACER_MAX_TRACE.
844  */
845 static arch_spinlock_t ftrace_max_lock =
846         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
847
848 unsigned long __read_mostly     tracing_thresh;
849
850 #ifdef CONFIG_TRACER_MAX_TRACE
851 unsigned long __read_mostly     tracing_max_latency;
852
853 /*
854  * Copy the new maximum trace into the separate maximum-trace
855  * structure. (this way the maximum trace is permanently saved,
856  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
857  */
858 static void
859 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
860 {
861         struct trace_buffer *trace_buf = &tr->trace_buffer;
862         struct trace_buffer *max_buf = &tr->max_buffer;
863         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
864         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
865
866         max_buf->cpu = cpu;
867         max_buf->time_start = data->preempt_timestamp;
868
869         max_data->saved_latency = tracing_max_latency;
870         max_data->critical_start = data->critical_start;
871         max_data->critical_end = data->critical_end;
872
873         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
874         max_data->pid = tsk->pid;
875         /*
876          * If tsk == current, then use current_uid(), as that does not use
877          * RCU. The irq tracer can be called out of RCU scope.
878          */
879         if (tsk == current)
880                 max_data->uid = current_uid();
881         else
882                 max_data->uid = task_uid(tsk);
883
884         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
885         max_data->policy = tsk->policy;
886         max_data->rt_priority = tsk->rt_priority;
887
888         /* record this tasks comm */
889         tracing_record_cmdline(tsk);
890 }
891
892 /**
893  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
894  * @tr: tracer
895  * @tsk: the task with the latency
896  * @cpu: The cpu that initiated the trace.
897  *
898  * Flip the buffers between the @tr and the max_tr and record information
899  * about which task was the cause of this latency.
900  */
901 void
902 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
903 {
904         struct ring_buffer *buf;
905
906         if (tr->stop_count)
907                 return;
908
909         WARN_ON_ONCE(!irqs_disabled());
910
911         if (!tr->allocated_snapshot) {
912                 /* Only the nop tracer should hit this when disabling */
913                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
914                 return;
915         }
916
917         arch_spin_lock(&ftrace_max_lock);
918
919         buf = tr->trace_buffer.buffer;
920         tr->trace_buffer.buffer = tr->max_buffer.buffer;
921         tr->max_buffer.buffer = buf;
922
923         __update_max_tr(tr, tsk, cpu);
924         arch_spin_unlock(&ftrace_max_lock);
925 }
926
927 /**
928  * update_max_tr_single - only copy one trace over, and reset the rest
929  * @tr - tracer
930  * @tsk - task with the latency
931  * @cpu - the cpu of the buffer to copy.
932  *
933  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
934  */
935 void
936 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
937 {
938         int ret;
939
940         if (tr->stop_count)
941                 return;
942
943         WARN_ON_ONCE(!irqs_disabled());
944         if (!tr->allocated_snapshot) {
945                 /* Only the nop tracer should hit this when disabling */
946                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
947                 return;
948         }
949
950         arch_spin_lock(&ftrace_max_lock);
951
952         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
953
954         if (ret == -EBUSY) {
955                 /*
956                  * We failed to swap the buffer due to a commit taking
957                  * place on this CPU. We fail to record, but we reset
958                  * the max trace buffer (no one writes directly to it)
959                  * and flag that it failed.
960                  */
961                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
962                         "Failed to swap buffers due to commit in progress\n");
963         }
964
965         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
966
967         __update_max_tr(tr, tsk, cpu);
968         arch_spin_unlock(&ftrace_max_lock);
969 }
970 #endif /* CONFIG_TRACER_MAX_TRACE */
971
972 static void default_wait_pipe(struct trace_iterator *iter)
973 {
974         /* Iterators are static, they should be filled or empty */
975         if (trace_buffer_iter(iter, iter->cpu_file))
976                 return;
977
978         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
979 }
980
981 #ifdef CONFIG_FTRACE_STARTUP_TEST
982 static int run_tracer_selftest(struct tracer *type)
983 {
984         struct trace_array *tr = &global_trace;
985         struct tracer *saved_tracer = tr->current_trace;
986         int ret;
987
988         if (!type->selftest || tracing_selftest_disabled)
989                 return 0;
990
991         /*
992          * Run a selftest on this tracer.
993          * Here we reset the trace buffer, and set the current
994          * tracer to be this tracer. The tracer can then run some
995          * internal tracing to verify that everything is in order.
996          * If we fail, we do not register this tracer.
997          */
998         tracing_reset_online_cpus(&tr->trace_buffer);
999
1000         tr->current_trace = type;
1001
1002 #ifdef CONFIG_TRACER_MAX_TRACE
1003         if (type->use_max_tr) {
1004                 /* If we expanded the buffers, make sure the max is expanded too */
1005                 if (ring_buffer_expanded)
1006                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1007                                            RING_BUFFER_ALL_CPUS);
1008                 tr->allocated_snapshot = true;
1009         }
1010 #endif
1011
1012         /* the test is responsible for initializing and enabling */
1013         pr_info("Testing tracer %s: ", type->name);
1014         ret = type->selftest(type, tr);
1015         /* the test is responsible for resetting too */
1016         tr->current_trace = saved_tracer;
1017         if (ret) {
1018                 printk(KERN_CONT "FAILED!\n");
1019                 /* Add the warning after printing 'FAILED' */
1020                 WARN_ON(1);
1021                 return -1;
1022         }
1023         /* Only reset on passing, to avoid touching corrupted buffers */
1024         tracing_reset_online_cpus(&tr->trace_buffer);
1025
1026 #ifdef CONFIG_TRACER_MAX_TRACE
1027         if (type->use_max_tr) {
1028                 tr->allocated_snapshot = false;
1029
1030                 /* Shrink the max buffer again */
1031                 if (ring_buffer_expanded)
1032                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1033                                            RING_BUFFER_ALL_CPUS);
1034         }
1035 #endif
1036
1037         printk(KERN_CONT "PASSED\n");
1038         return 0;
1039 }
1040 #else
1041 static inline int run_tracer_selftest(struct tracer *type)
1042 {
1043         return 0;
1044 }
1045 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1046
1047 /**
1048  * register_tracer - register a tracer with the ftrace system.
1049  * @type - the plugin for the tracer
1050  *
1051  * Register a new plugin tracer.
1052  */
1053 int register_tracer(struct tracer *type)
1054 {
1055         struct tracer *t;
1056         int ret = 0;
1057
1058         if (!type->name) {
1059                 pr_info("Tracer must have a name\n");
1060                 return -1;
1061         }
1062
1063         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1064                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1065                 return -1;
1066         }
1067
1068         mutex_lock(&trace_types_lock);
1069
1070         tracing_selftest_running = true;
1071
1072         for (t = trace_types; t; t = t->next) {
1073                 if (strcmp(type->name, t->name) == 0) {
1074                         /* already found */
1075                         pr_info("Tracer %s already registered\n",
1076                                 type->name);
1077                         ret = -1;
1078                         goto out;
1079                 }
1080         }
1081
1082         if (!type->set_flag)
1083                 type->set_flag = &dummy_set_flag;
1084         if (!type->flags)
1085                 type->flags = &dummy_tracer_flags;
1086         else
1087                 if (!type->flags->opts)
1088                         type->flags->opts = dummy_tracer_opt;
1089         if (!type->wait_pipe)
1090                 type->wait_pipe = default_wait_pipe;
1091
1092         ret = run_tracer_selftest(type);
1093         if (ret < 0)
1094                 goto out;
1095
1096         type->next = trace_types;
1097         trace_types = type;
1098
1099  out:
1100         tracing_selftest_running = false;
1101         mutex_unlock(&trace_types_lock);
1102
1103         if (ret || !default_bootup_tracer)
1104                 goto out_unlock;
1105
1106         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1107                 goto out_unlock;
1108
1109         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1110         /* Do we want this tracer to start on bootup? */
1111         tracing_set_tracer(type->name);
1112         default_bootup_tracer = NULL;
1113         /* disable other selftests, since this will break it. */
1114         tracing_selftest_disabled = true;
1115 #ifdef CONFIG_FTRACE_STARTUP_TEST
1116         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1117                type->name);
1118 #endif
1119
1120  out_unlock:
1121         return ret;
1122 }
1123
1124 void tracing_reset(struct trace_buffer *buf, int cpu)
1125 {
1126         struct ring_buffer *buffer = buf->buffer;
1127
1128         if (!buffer)
1129                 return;
1130
1131         ring_buffer_record_disable(buffer);
1132
1133         /* Make sure all commits have finished */
1134         synchronize_sched();
1135         ring_buffer_reset_cpu(buffer, cpu);
1136
1137         ring_buffer_record_enable(buffer);
1138 }
1139
1140 void tracing_reset_online_cpus(struct trace_buffer *buf)
1141 {
1142         struct ring_buffer *buffer = buf->buffer;
1143         int cpu;
1144
1145         if (!buffer)
1146                 return;
1147
1148         ring_buffer_record_disable(buffer);
1149
1150         /* Make sure all commits have finished */
1151         synchronize_sched();
1152
1153         buf->time_start = ftrace_now(buf->cpu);
1154
1155         for_each_online_cpu(cpu)
1156                 ring_buffer_reset_cpu(buffer, cpu);
1157
1158         ring_buffer_record_enable(buffer);
1159 }
1160
1161 void tracing_reset_current(int cpu)
1162 {
1163         tracing_reset(&global_trace.trace_buffer, cpu);
1164 }
1165
1166 void tracing_reset_all_online_cpus(void)
1167 {
1168         struct trace_array *tr;
1169
1170         mutex_lock(&trace_types_lock);
1171         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1172                 tracing_reset_online_cpus(&tr->trace_buffer);
1173 #ifdef CONFIG_TRACER_MAX_TRACE
1174                 tracing_reset_online_cpus(&tr->max_buffer);
1175 #endif
1176         }
1177         mutex_unlock(&trace_types_lock);
1178 }
1179
1180 #define SAVED_CMDLINES 128
1181 #define NO_CMDLINE_MAP UINT_MAX
1182 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1183 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1184 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1185 static int cmdline_idx;
1186 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1187
1188 /* temporary disable recording */
1189 static atomic_t trace_record_cmdline_disabled __read_mostly;
1190
1191 static void trace_init_cmdlines(void)
1192 {
1193         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1194         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1195         cmdline_idx = 0;
1196 }
1197
1198 int is_tracing_stopped(void)
1199 {
1200         return global_trace.stop_count;
1201 }
1202
1203 /**
1204  * ftrace_off_permanent - disable all ftrace code permanently
1205  *
1206  * This should only be called when a serious anomally has
1207  * been detected.  This will turn off the function tracing,
1208  * ring buffers, and other tracing utilites. It takes no
1209  * locks and can be called from any context.
1210  */
1211 void ftrace_off_permanent(void)
1212 {
1213         tracing_disabled = 1;
1214         ftrace_stop();
1215         tracing_off_permanent();
1216 }
1217
1218 /**
1219  * tracing_start - quick start of the tracer
1220  *
1221  * If tracing is enabled but was stopped by tracing_stop,
1222  * this will start the tracer back up.
1223  */
1224 void tracing_start(void)
1225 {
1226         struct ring_buffer *buffer;
1227         unsigned long flags;
1228
1229         if (tracing_disabled)
1230                 return;
1231
1232         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1233         if (--global_trace.stop_count) {
1234                 if (global_trace.stop_count < 0) {
1235                         /* Someone screwed up their debugging */
1236                         WARN_ON_ONCE(1);
1237                         global_trace.stop_count = 0;
1238                 }
1239                 goto out;
1240         }
1241
1242         /* Prevent the buffers from switching */
1243         arch_spin_lock(&ftrace_max_lock);
1244
1245         buffer = global_trace.trace_buffer.buffer;
1246         if (buffer)
1247                 ring_buffer_record_enable(buffer);
1248
1249 #ifdef CONFIG_TRACER_MAX_TRACE
1250         buffer = global_trace.max_buffer.buffer;
1251         if (buffer)
1252                 ring_buffer_record_enable(buffer);
1253 #endif
1254
1255         arch_spin_unlock(&ftrace_max_lock);
1256
1257         ftrace_start();
1258  out:
1259         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1260 }
1261
1262 static void tracing_start_tr(struct trace_array *tr)
1263 {
1264         struct ring_buffer *buffer;
1265         unsigned long flags;
1266
1267         if (tracing_disabled)
1268                 return;
1269
1270         /* If global, we need to also start the max tracer */
1271         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1272                 return tracing_start();
1273
1274         raw_spin_lock_irqsave(&tr->start_lock, flags);
1275
1276         if (--tr->stop_count) {
1277                 if (tr->stop_count < 0) {
1278                         /* Someone screwed up their debugging */
1279                         WARN_ON_ONCE(1);
1280                         tr->stop_count = 0;
1281                 }
1282                 goto out;
1283         }
1284
1285         buffer = tr->trace_buffer.buffer;
1286         if (buffer)
1287                 ring_buffer_record_enable(buffer);
1288
1289  out:
1290         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1291 }
1292
1293 /**
1294  * tracing_stop - quick stop of the tracer
1295  *
1296  * Light weight way to stop tracing. Use in conjunction with
1297  * tracing_start.
1298  */
1299 void tracing_stop(void)
1300 {
1301         struct ring_buffer *buffer;
1302         unsigned long flags;
1303
1304         ftrace_stop();
1305         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1306         if (global_trace.stop_count++)
1307                 goto out;
1308
1309         /* Prevent the buffers from switching */
1310         arch_spin_lock(&ftrace_max_lock);
1311
1312         buffer = global_trace.trace_buffer.buffer;
1313         if (buffer)
1314                 ring_buffer_record_disable(buffer);
1315
1316 #ifdef CONFIG_TRACER_MAX_TRACE
1317         buffer = global_trace.max_buffer.buffer;
1318         if (buffer)
1319                 ring_buffer_record_disable(buffer);
1320 #endif
1321
1322         arch_spin_unlock(&ftrace_max_lock);
1323
1324  out:
1325         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1326 }
1327
1328 static void tracing_stop_tr(struct trace_array *tr)
1329 {
1330         struct ring_buffer *buffer;
1331         unsigned long flags;
1332
1333         /* If global, we need to also stop the max tracer */
1334         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1335                 return tracing_stop();
1336
1337         raw_spin_lock_irqsave(&tr->start_lock, flags);
1338         if (tr->stop_count++)
1339                 goto out;
1340
1341         buffer = tr->trace_buffer.buffer;
1342         if (buffer)
1343                 ring_buffer_record_disable(buffer);
1344
1345  out:
1346         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1347 }
1348
1349 void trace_stop_cmdline_recording(void);
1350
1351 static void trace_save_cmdline(struct task_struct *tsk)
1352 {
1353         unsigned pid, idx;
1354
1355         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1356                 return;
1357
1358         /*
1359          * It's not the end of the world if we don't get
1360          * the lock, but we also don't want to spin
1361          * nor do we want to disable interrupts,
1362          * so if we miss here, then better luck next time.
1363          */
1364         if (!arch_spin_trylock(&trace_cmdline_lock))
1365                 return;
1366
1367         idx = map_pid_to_cmdline[tsk->pid];
1368         if (idx == NO_CMDLINE_MAP) {
1369                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1370
1371                 /*
1372                  * Check whether the cmdline buffer at idx has a pid
1373                  * mapped. We are going to overwrite that entry so we
1374                  * need to clear the map_pid_to_cmdline. Otherwise we
1375                  * would read the new comm for the old pid.
1376                  */
1377                 pid = map_cmdline_to_pid[idx];
1378                 if (pid != NO_CMDLINE_MAP)
1379                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1380
1381                 map_cmdline_to_pid[idx] = tsk->pid;
1382                 map_pid_to_cmdline[tsk->pid] = idx;
1383
1384                 cmdline_idx = idx;
1385         }
1386
1387         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1388
1389         arch_spin_unlock(&trace_cmdline_lock);
1390 }
1391
1392 void trace_find_cmdline(int pid, char comm[])
1393 {
1394         unsigned map;
1395
1396         if (!pid) {
1397                 strcpy(comm, "<idle>");
1398                 return;
1399         }
1400
1401         if (WARN_ON_ONCE(pid < 0)) {
1402                 strcpy(comm, "<XXX>");
1403                 return;
1404         }
1405
1406         if (pid > PID_MAX_DEFAULT) {
1407                 strcpy(comm, "<...>");
1408                 return;
1409         }
1410
1411         preempt_disable();
1412         arch_spin_lock(&trace_cmdline_lock);
1413         map = map_pid_to_cmdline[pid];
1414         if (map != NO_CMDLINE_MAP)
1415                 strcpy(comm, saved_cmdlines[map]);
1416         else
1417                 strcpy(comm, "<...>");
1418
1419         arch_spin_unlock(&trace_cmdline_lock);
1420         preempt_enable();
1421 }
1422
1423 void tracing_record_cmdline(struct task_struct *tsk)
1424 {
1425         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1426                 return;
1427
1428         if (!__this_cpu_read(trace_cmdline_save))
1429                 return;
1430
1431         __this_cpu_write(trace_cmdline_save, false);
1432
1433         trace_save_cmdline(tsk);
1434 }
1435
1436 void
1437 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1438                              int pc)
1439 {
1440         struct task_struct *tsk = current;
1441
1442         entry->preempt_count            = pc & 0xff;
1443         entry->pid                      = (tsk) ? tsk->pid : 0;
1444         entry->flags =
1445 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1446                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1447 #else
1448                 TRACE_FLAG_IRQS_NOSUPPORT |
1449 #endif
1450                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1451                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1452                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
1453 }
1454 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1455
1456 struct ring_buffer_event *
1457 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1458                           int type,
1459                           unsigned long len,
1460                           unsigned long flags, int pc)
1461 {
1462         struct ring_buffer_event *event;
1463
1464         event = ring_buffer_lock_reserve(buffer, len);
1465         if (event != NULL) {
1466                 struct trace_entry *ent = ring_buffer_event_data(event);
1467
1468                 tracing_generic_entry_update(ent, flags, pc);
1469                 ent->type = type;
1470         }
1471
1472         return event;
1473 }
1474
1475 void
1476 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1477 {
1478         __this_cpu_write(trace_cmdline_save, true);
1479         ring_buffer_unlock_commit(buffer, event);
1480 }
1481
1482 static inline void
1483 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1484                              struct ring_buffer_event *event,
1485                              unsigned long flags, int pc)
1486 {
1487         __buffer_unlock_commit(buffer, event);
1488
1489         ftrace_trace_stack(buffer, flags, 6, pc);
1490         ftrace_trace_userstack(buffer, flags, pc);
1491 }
1492
1493 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1494                                 struct ring_buffer_event *event,
1495                                 unsigned long flags, int pc)
1496 {
1497         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1498 }
1499 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1500
1501 struct ring_buffer_event *
1502 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1503                           struct ftrace_event_file *ftrace_file,
1504                           int type, unsigned long len,
1505                           unsigned long flags, int pc)
1506 {
1507         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1508         return trace_buffer_lock_reserve(*current_rb,
1509                                          type, len, flags, pc);
1510 }
1511 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1512
1513 struct ring_buffer_event *
1514 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1515                                   int type, unsigned long len,
1516                                   unsigned long flags, int pc)
1517 {
1518         *current_rb = global_trace.trace_buffer.buffer;
1519         return trace_buffer_lock_reserve(*current_rb,
1520                                          type, len, flags, pc);
1521 }
1522 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1523
1524 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1525                                         struct ring_buffer_event *event,
1526                                         unsigned long flags, int pc)
1527 {
1528         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1529 }
1530 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1531
1532 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1533                                      struct ring_buffer_event *event,
1534                                      unsigned long flags, int pc,
1535                                      struct pt_regs *regs)
1536 {
1537         __buffer_unlock_commit(buffer, event);
1538
1539         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1540         ftrace_trace_userstack(buffer, flags, pc);
1541 }
1542 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1543
1544 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1545                                          struct ring_buffer_event *event)
1546 {
1547         ring_buffer_discard_commit(buffer, event);
1548 }
1549 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1550
1551 void
1552 trace_function(struct trace_array *tr,
1553                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1554                int pc)
1555 {
1556         struct ftrace_event_call *call = &event_function;
1557         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1558         struct ring_buffer_event *event;
1559         struct ftrace_entry *entry;
1560
1561         /* If we are reading the ring buffer, don't trace */
1562         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1563                 return;
1564
1565         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1566                                           flags, pc);
1567         if (!event)
1568                 return;
1569         entry   = ring_buffer_event_data(event);
1570         entry->ip                       = ip;
1571         entry->parent_ip                = parent_ip;
1572
1573         if (!filter_check_discard(call, entry, buffer, event))
1574                 __buffer_unlock_commit(buffer, event);
1575 }
1576
1577 void
1578 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1579        unsigned long ip, unsigned long parent_ip, unsigned long flags,
1580        int pc)
1581 {
1582         if (likely(!atomic_read(&data->disabled)))
1583                 trace_function(tr, ip, parent_ip, flags, pc);
1584 }
1585
1586 #ifdef CONFIG_STACKTRACE
1587
1588 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1589 struct ftrace_stack {
1590         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1591 };
1592
1593 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1594 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1595
1596 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1597                                  unsigned long flags,
1598                                  int skip, int pc, struct pt_regs *regs)
1599 {
1600         struct ftrace_event_call *call = &event_kernel_stack;
1601         struct ring_buffer_event *event;
1602         struct stack_entry *entry;
1603         struct stack_trace trace;
1604         int use_stack;
1605         int size = FTRACE_STACK_ENTRIES;
1606
1607         trace.nr_entries        = 0;
1608         trace.skip              = skip;
1609
1610         /*
1611          * Since events can happen in NMIs there's no safe way to
1612          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1613          * or NMI comes in, it will just have to use the default
1614          * FTRACE_STACK_SIZE.
1615          */
1616         preempt_disable_notrace();
1617
1618         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1619         /*
1620          * We don't need any atomic variables, just a barrier.
1621          * If an interrupt comes in, we don't care, because it would
1622          * have exited and put the counter back to what we want.
1623          * We just need a barrier to keep gcc from moving things
1624          * around.
1625          */
1626         barrier();
1627         if (use_stack == 1) {
1628                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1629                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1630
1631                 if (regs)
1632                         save_stack_trace_regs(regs, &trace);
1633                 else
1634                         save_stack_trace(&trace);
1635
1636                 if (trace.nr_entries > size)
1637                         size = trace.nr_entries;
1638         } else
1639                 /* From now on, use_stack is a boolean */
1640                 use_stack = 0;
1641
1642         size *= sizeof(unsigned long);
1643
1644         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1645                                           sizeof(*entry) + size, flags, pc);
1646         if (!event)
1647                 goto out;
1648         entry = ring_buffer_event_data(event);
1649
1650         memset(&entry->caller, 0, size);
1651
1652         if (use_stack)
1653                 memcpy(&entry->caller, trace.entries,
1654                        trace.nr_entries * sizeof(unsigned long));
1655         else {
1656                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1657                 trace.entries           = entry->caller;
1658                 if (regs)
1659                         save_stack_trace_regs(regs, &trace);
1660                 else
1661                         save_stack_trace(&trace);
1662         }
1663
1664         entry->size = trace.nr_entries;
1665
1666         if (!filter_check_discard(call, entry, buffer, event))
1667                 __buffer_unlock_commit(buffer, event);
1668
1669  out:
1670         /* Again, don't let gcc optimize things here */
1671         barrier();
1672         __this_cpu_dec(ftrace_stack_reserve);
1673         preempt_enable_notrace();
1674
1675 }
1676
1677 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1678                              int skip, int pc, struct pt_regs *regs)
1679 {
1680         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1681                 return;
1682
1683         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1684 }
1685
1686 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1687                         int skip, int pc)
1688 {
1689         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1690                 return;
1691
1692         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1693 }
1694
1695 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1696                    int pc)
1697 {
1698         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1699 }
1700
1701 /**
1702  * trace_dump_stack - record a stack back trace in the trace buffer
1703  * @skip: Number of functions to skip (helper handlers)
1704  */
1705 void trace_dump_stack(int skip)
1706 {
1707         unsigned long flags;
1708
1709         if (tracing_disabled || tracing_selftest_running)
1710                 return;
1711
1712         local_save_flags(flags);
1713
1714         /*
1715          * Skip 3 more, seems to get us at the caller of
1716          * this function.
1717          */
1718         skip += 3;
1719         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1720                              flags, skip, preempt_count(), NULL);
1721 }
1722
1723 static DEFINE_PER_CPU(int, user_stack_count);
1724
1725 void
1726 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1727 {
1728         struct ftrace_event_call *call = &event_user_stack;
1729         struct ring_buffer_event *event;
1730         struct userstack_entry *entry;
1731         struct stack_trace trace;
1732
1733         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1734                 return;
1735
1736         /*
1737          * NMIs can not handle page faults, even with fix ups.
1738          * The save user stack can (and often does) fault.
1739          */
1740         if (unlikely(in_nmi()))
1741                 return;
1742
1743         /*
1744          * prevent recursion, since the user stack tracing may
1745          * trigger other kernel events.
1746          */
1747         preempt_disable();
1748         if (__this_cpu_read(user_stack_count))
1749                 goto out;
1750
1751         __this_cpu_inc(user_stack_count);
1752
1753         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1754                                           sizeof(*entry), flags, pc);
1755         if (!event)
1756                 goto out_drop_count;
1757         entry   = ring_buffer_event_data(event);
1758
1759         entry->tgid             = current->tgid;
1760         memset(&entry->caller, 0, sizeof(entry->caller));
1761
1762         trace.nr_entries        = 0;
1763         trace.max_entries       = FTRACE_STACK_ENTRIES;
1764         trace.skip              = 0;
1765         trace.entries           = entry->caller;
1766
1767         save_stack_trace_user(&trace);
1768         if (!filter_check_discard(call, entry, buffer, event))
1769                 __buffer_unlock_commit(buffer, event);
1770
1771  out_drop_count:
1772         __this_cpu_dec(user_stack_count);
1773  out:
1774         preempt_enable();
1775 }
1776
1777 #ifdef UNUSED
1778 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1779 {
1780         ftrace_trace_userstack(tr, flags, preempt_count());
1781 }
1782 #endif /* UNUSED */
1783
1784 #endif /* CONFIG_STACKTRACE */
1785
1786 /* created for use with alloc_percpu */
1787 struct trace_buffer_struct {
1788         char buffer[TRACE_BUF_SIZE];
1789 };
1790
1791 static struct trace_buffer_struct *trace_percpu_buffer;
1792 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1793 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1794 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1795
1796 /*
1797  * The buffer used is dependent on the context. There is a per cpu
1798  * buffer for normal context, softirq contex, hard irq context and
1799  * for NMI context. Thise allows for lockless recording.
1800  *
1801  * Note, if the buffers failed to be allocated, then this returns NULL
1802  */
1803 static char *get_trace_buf(void)
1804 {
1805         struct trace_buffer_struct *percpu_buffer;
1806
1807         /*
1808          * If we have allocated per cpu buffers, then we do not
1809          * need to do any locking.
1810          */
1811         if (in_nmi())
1812                 percpu_buffer = trace_percpu_nmi_buffer;
1813         else if (in_irq())
1814                 percpu_buffer = trace_percpu_irq_buffer;
1815         else if (in_softirq())
1816                 percpu_buffer = trace_percpu_sirq_buffer;
1817         else
1818                 percpu_buffer = trace_percpu_buffer;
1819
1820         if (!percpu_buffer)
1821                 return NULL;
1822
1823         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1824 }
1825
1826 static int alloc_percpu_trace_buffer(void)
1827 {
1828         struct trace_buffer_struct *buffers;
1829         struct trace_buffer_struct *sirq_buffers;
1830         struct trace_buffer_struct *irq_buffers;
1831         struct trace_buffer_struct *nmi_buffers;
1832
1833         buffers = alloc_percpu(struct trace_buffer_struct);
1834         if (!buffers)
1835                 goto err_warn;
1836
1837         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1838         if (!sirq_buffers)
1839                 goto err_sirq;
1840
1841         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1842         if (!irq_buffers)
1843                 goto err_irq;
1844
1845         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1846         if (!nmi_buffers)
1847                 goto err_nmi;
1848
1849         trace_percpu_buffer = buffers;
1850         trace_percpu_sirq_buffer = sirq_buffers;
1851         trace_percpu_irq_buffer = irq_buffers;
1852         trace_percpu_nmi_buffer = nmi_buffers;
1853
1854         return 0;
1855
1856  err_nmi:
1857         free_percpu(irq_buffers);
1858  err_irq:
1859         free_percpu(sirq_buffers);
1860  err_sirq:
1861         free_percpu(buffers);
1862  err_warn:
1863         WARN(1, "Could not allocate percpu trace_printk buffer");
1864         return -ENOMEM;
1865 }
1866
1867 static int buffers_allocated;
1868
1869 void trace_printk_init_buffers(void)
1870 {
1871         if (buffers_allocated)
1872                 return;
1873
1874         if (alloc_percpu_trace_buffer())
1875                 return;
1876
1877         pr_info("ftrace: Allocated trace_printk buffers\n");
1878
1879         /* Expand the buffers to set size */
1880         tracing_update_buffers();
1881
1882         buffers_allocated = 1;
1883
1884         /*
1885          * trace_printk_init_buffers() can be called by modules.
1886          * If that happens, then we need to start cmdline recording
1887          * directly here. If the global_trace.buffer is already
1888          * allocated here, then this was called by module code.
1889          */
1890         if (global_trace.trace_buffer.buffer)
1891                 tracing_start_cmdline_record();
1892 }
1893
1894 void trace_printk_start_comm(void)
1895 {
1896         /* Start tracing comms if trace printk is set */
1897         if (!buffers_allocated)
1898                 return;
1899         tracing_start_cmdline_record();
1900 }
1901
1902 static void trace_printk_start_stop_comm(int enabled)
1903 {
1904         if (!buffers_allocated)
1905                 return;
1906
1907         if (enabled)
1908                 tracing_start_cmdline_record();
1909         else
1910                 tracing_stop_cmdline_record();
1911 }
1912
1913 /**
1914  * trace_vbprintk - write binary msg to tracing buffer
1915  *
1916  */
1917 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1918 {
1919         struct ftrace_event_call *call = &event_bprint;
1920         struct ring_buffer_event *event;
1921         struct ring_buffer *buffer;
1922         struct trace_array *tr = &global_trace;
1923         struct bprint_entry *entry;
1924         unsigned long flags;
1925         char *tbuffer;
1926         int len = 0, size, pc;
1927
1928         if (unlikely(tracing_selftest_running || tracing_disabled))
1929                 return 0;
1930
1931         /* Don't pollute graph traces with trace_vprintk internals */
1932         pause_graph_tracing();
1933
1934         pc = preempt_count();
1935         preempt_disable_notrace();
1936
1937         tbuffer = get_trace_buf();
1938         if (!tbuffer) {
1939                 len = 0;
1940                 goto out;
1941         }
1942
1943         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
1944
1945         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
1946                 goto out;
1947
1948         local_save_flags(flags);
1949         size = sizeof(*entry) + sizeof(u32) * len;
1950         buffer = tr->trace_buffer.buffer;
1951         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1952                                           flags, pc);
1953         if (!event)
1954                 goto out;
1955         entry = ring_buffer_event_data(event);
1956         entry->ip                       = ip;
1957         entry->fmt                      = fmt;
1958
1959         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
1960         if (!filter_check_discard(call, entry, buffer, event)) {
1961                 __buffer_unlock_commit(buffer, event);
1962                 ftrace_trace_stack(buffer, flags, 6, pc);
1963         }
1964
1965 out:
1966         preempt_enable_notrace();
1967         unpause_graph_tracing();
1968
1969         return len;
1970 }
1971 EXPORT_SYMBOL_GPL(trace_vbprintk);
1972
1973 static int
1974 __trace_array_vprintk(struct ring_buffer *buffer,
1975                       unsigned long ip, const char *fmt, va_list args)
1976 {
1977         struct ftrace_event_call *call = &event_print;
1978         struct ring_buffer_event *event;
1979         int len = 0, size, pc;
1980         struct print_entry *entry;
1981         unsigned long flags;
1982         char *tbuffer;
1983
1984         if (tracing_disabled || tracing_selftest_running)
1985                 return 0;
1986
1987         /* Don't pollute graph traces with trace_vprintk internals */
1988         pause_graph_tracing();
1989
1990         pc = preempt_count();
1991         preempt_disable_notrace();
1992
1993
1994         tbuffer = get_trace_buf();
1995         if (!tbuffer) {
1996                 len = 0;
1997                 goto out;
1998         }
1999
2000         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2001         if (len > TRACE_BUF_SIZE)
2002                 goto out;
2003
2004         local_save_flags(flags);
2005         size = sizeof(*entry) + len + 1;
2006         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2007                                           flags, pc);
2008         if (!event)
2009                 goto out;
2010         entry = ring_buffer_event_data(event);
2011         entry->ip = ip;
2012
2013         memcpy(&entry->buf, tbuffer, len);
2014         entry->buf[len] = '\0';
2015         if (!filter_check_discard(call, entry, buffer, event)) {
2016                 __buffer_unlock_commit(buffer, event);
2017                 ftrace_trace_stack(buffer, flags, 6, pc);
2018         }
2019  out:
2020         preempt_enable_notrace();
2021         unpause_graph_tracing();
2022
2023         return len;
2024 }
2025
2026 int trace_array_vprintk(struct trace_array *tr,
2027                         unsigned long ip, const char *fmt, va_list args)
2028 {
2029         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2030 }
2031
2032 int trace_array_printk(struct trace_array *tr,
2033                        unsigned long ip, const char *fmt, ...)
2034 {
2035         int ret;
2036         va_list ap;
2037
2038         if (!(trace_flags & TRACE_ITER_PRINTK))
2039                 return 0;
2040
2041         va_start(ap, fmt);
2042         ret = trace_array_vprintk(tr, ip, fmt, ap);
2043         va_end(ap);
2044         return ret;
2045 }
2046
2047 int trace_array_printk_buf(struct ring_buffer *buffer,
2048                            unsigned long ip, const char *fmt, ...)
2049 {
2050         int ret;
2051         va_list ap;
2052
2053         if (!(trace_flags & TRACE_ITER_PRINTK))
2054                 return 0;
2055
2056         va_start(ap, fmt);
2057         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2058         va_end(ap);
2059         return ret;
2060 }
2061
2062 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2063 {
2064         return trace_array_vprintk(&global_trace, ip, fmt, args);
2065 }
2066 EXPORT_SYMBOL_GPL(trace_vprintk);
2067
2068 static void trace_iterator_increment(struct trace_iterator *iter)
2069 {
2070         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2071
2072         iter->idx++;
2073         if (buf_iter)
2074                 ring_buffer_read(buf_iter, NULL);
2075 }
2076
2077 static struct trace_entry *
2078 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2079                 unsigned long *lost_events)
2080 {
2081         struct ring_buffer_event *event;
2082         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2083
2084         if (buf_iter)
2085                 event = ring_buffer_iter_peek(buf_iter, ts);
2086         else
2087                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2088                                          lost_events);
2089
2090         if (event) {
2091                 iter->ent_size = ring_buffer_event_length(event);
2092                 return ring_buffer_event_data(event);
2093         }
2094         iter->ent_size = 0;
2095         return NULL;
2096 }
2097
2098 static struct trace_entry *
2099 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2100                   unsigned long *missing_events, u64 *ent_ts)
2101 {
2102         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2103         struct trace_entry *ent, *next = NULL;
2104         unsigned long lost_events = 0, next_lost = 0;
2105         int cpu_file = iter->cpu_file;
2106         u64 next_ts = 0, ts;
2107         int next_cpu = -1;
2108         int next_size = 0;
2109         int cpu;
2110
2111         /*
2112          * If we are in a per_cpu trace file, don't bother by iterating over
2113          * all cpu and peek directly.
2114          */
2115         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2116                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2117                         return NULL;
2118                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2119                 if (ent_cpu)
2120                         *ent_cpu = cpu_file;
2121
2122                 return ent;
2123         }
2124
2125         for_each_tracing_cpu(cpu) {
2126
2127                 if (ring_buffer_empty_cpu(buffer, cpu))
2128                         continue;
2129
2130                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2131
2132                 /*
2133                  * Pick the entry with the smallest timestamp:
2134                  */
2135                 if (ent && (!next || ts < next_ts)) {
2136                         next = ent;
2137                         next_cpu = cpu;
2138                         next_ts = ts;
2139                         next_lost = lost_events;
2140                         next_size = iter->ent_size;
2141                 }
2142         }
2143
2144         iter->ent_size = next_size;
2145
2146         if (ent_cpu)
2147                 *ent_cpu = next_cpu;
2148
2149         if (ent_ts)
2150                 *ent_ts = next_ts;
2151
2152         if (missing_events)
2153                 *missing_events = next_lost;
2154
2155         return next;
2156 }
2157
2158 /* Find the next real entry, without updating the iterator itself */
2159 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2160                                           int *ent_cpu, u64 *ent_ts)
2161 {
2162         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2163 }
2164
2165 /* Find the next real entry, and increment the iterator to the next entry */
2166 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2167 {
2168         iter->ent = __find_next_entry(iter, &iter->cpu,
2169                                       &iter->lost_events, &iter->ts);
2170
2171         if (iter->ent)
2172                 trace_iterator_increment(iter);
2173
2174         return iter->ent ? iter : NULL;
2175 }
2176
2177 static void trace_consume(struct trace_iterator *iter)
2178 {
2179         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2180                             &iter->lost_events);
2181 }
2182
2183 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2184 {
2185         struct trace_iterator *iter = m->private;
2186         int i = (int)*pos;
2187         void *ent;
2188
2189         WARN_ON_ONCE(iter->leftover);
2190
2191         (*pos)++;
2192
2193         /* can't go backwards */
2194         if (iter->idx > i)
2195                 return NULL;
2196
2197         if (iter->idx < 0)
2198                 ent = trace_find_next_entry_inc(iter);
2199         else
2200                 ent = iter;
2201
2202         while (ent && iter->idx < i)
2203                 ent = trace_find_next_entry_inc(iter);
2204
2205         iter->pos = *pos;
2206
2207         return ent;
2208 }
2209
2210 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2211 {
2212         struct ring_buffer_event *event;
2213         struct ring_buffer_iter *buf_iter;
2214         unsigned long entries = 0;
2215         u64 ts;
2216
2217         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2218
2219         buf_iter = trace_buffer_iter(iter, cpu);
2220         if (!buf_iter)
2221                 return;
2222
2223         ring_buffer_iter_reset(buf_iter);
2224
2225         /*
2226          * We could have the case with the max latency tracers
2227          * that a reset never took place on a cpu. This is evident
2228          * by the timestamp being before the start of the buffer.
2229          */
2230         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2231                 if (ts >= iter->trace_buffer->time_start)
2232                         break;
2233                 entries++;
2234                 ring_buffer_read(buf_iter, NULL);
2235         }
2236
2237         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2238 }
2239
2240 /*
2241  * The current tracer is copied to avoid a global locking
2242  * all around.
2243  */
2244 static void *s_start(struct seq_file *m, loff_t *pos)
2245 {
2246         struct trace_iterator *iter = m->private;
2247         struct trace_array *tr = iter->tr;
2248         int cpu_file = iter->cpu_file;
2249         void *p = NULL;
2250         loff_t l = 0;
2251         int cpu;
2252
2253         /*
2254          * copy the tracer to avoid using a global lock all around.
2255          * iter->trace is a copy of current_trace, the pointer to the
2256          * name may be used instead of a strcmp(), as iter->trace->name
2257          * will point to the same string as current_trace->name.
2258          */
2259         mutex_lock(&trace_types_lock);
2260         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2261                 *iter->trace = *tr->current_trace;
2262         mutex_unlock(&trace_types_lock);
2263
2264 #ifdef CONFIG_TRACER_MAX_TRACE
2265         if (iter->snapshot && iter->trace->use_max_tr)
2266                 return ERR_PTR(-EBUSY);
2267 #endif
2268
2269         if (!iter->snapshot)
2270                 atomic_inc(&trace_record_cmdline_disabled);
2271
2272         if (*pos != iter->pos) {
2273                 iter->ent = NULL;
2274                 iter->cpu = 0;
2275                 iter->idx = -1;
2276
2277                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2278                         for_each_tracing_cpu(cpu)
2279                                 tracing_iter_reset(iter, cpu);
2280                 } else
2281                         tracing_iter_reset(iter, cpu_file);
2282
2283                 iter->leftover = 0;
2284                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2285                         ;
2286
2287         } else {
2288                 /*
2289                  * If we overflowed the seq_file before, then we want
2290                  * to just reuse the trace_seq buffer again.
2291                  */
2292                 if (iter->leftover)
2293                         p = iter;
2294                 else {
2295                         l = *pos - 1;
2296                         p = s_next(m, p, &l);
2297                 }
2298         }
2299
2300         trace_event_read_lock();
2301         trace_access_lock(cpu_file);
2302         return p;
2303 }
2304
2305 static void s_stop(struct seq_file *m, void *p)
2306 {
2307         struct trace_iterator *iter = m->private;
2308
2309 #ifdef CONFIG_TRACER_MAX_TRACE
2310         if (iter->snapshot && iter->trace->use_max_tr)
2311                 return;
2312 #endif
2313
2314         if (!iter->snapshot)
2315                 atomic_dec(&trace_record_cmdline_disabled);
2316
2317         trace_access_unlock(iter->cpu_file);
2318         trace_event_read_unlock();
2319 }
2320
2321 static void
2322 get_total_entries(struct trace_buffer *buf,
2323                   unsigned long *total, unsigned long *entries)
2324 {
2325         unsigned long count;
2326         int cpu;
2327
2328         *total = 0;
2329         *entries = 0;
2330
2331         for_each_tracing_cpu(cpu) {
2332                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2333                 /*
2334                  * If this buffer has skipped entries, then we hold all
2335                  * entries for the trace and we need to ignore the
2336                  * ones before the time stamp.
2337                  */
2338                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2339                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2340                         /* total is the same as the entries */
2341                         *total += count;
2342                 } else
2343                         *total += count +
2344                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2345                 *entries += count;
2346         }
2347 }
2348
2349 static void print_lat_help_header(struct seq_file *m)
2350 {
2351         seq_puts(m, "#                  _------=> CPU#            \n");
2352         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2353         seq_puts(m, "#                | / _----=> need-resched    \n");
2354         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2355         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2356         seq_puts(m, "#                |||| /     delay             \n");
2357         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2358         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2359 }
2360
2361 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2362 {
2363         unsigned long total;
2364         unsigned long entries;
2365
2366         get_total_entries(buf, &total, &entries);
2367         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2368                    entries, total, num_online_cpus());
2369         seq_puts(m, "#\n");
2370 }
2371
2372 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2373 {
2374         print_event_info(buf, m);
2375         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2376         seq_puts(m, "#              | |       |          |         |\n");
2377 }
2378
2379 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2380 {
2381         print_event_info(buf, m);
2382         seq_puts(m, "#                              _-----=> irqs-off\n");
2383         seq_puts(m, "#                             / _----=> need-resched\n");
2384         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2385         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2386         seq_puts(m, "#                            ||| /     delay\n");
2387         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2388         seq_puts(m, "#              | |       |   ||||       |         |\n");
2389 }
2390
2391 void
2392 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2393 {
2394         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2395         struct trace_buffer *buf = iter->trace_buffer;
2396         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2397         struct tracer *type = iter->trace;
2398         unsigned long entries;
2399         unsigned long total;
2400         const char *name = "preemption";
2401
2402         name = type->name;
2403
2404         get_total_entries(buf, &total, &entries);
2405
2406         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2407                    name, UTS_RELEASE);
2408         seq_puts(m, "# -----------------------------------"
2409                  "---------------------------------\n");
2410         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2411                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2412                    nsecs_to_usecs(data->saved_latency),
2413                    entries,
2414                    total,
2415                    buf->cpu,
2416 #if defined(CONFIG_PREEMPT_NONE)
2417                    "server",
2418 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2419                    "desktop",
2420 #elif defined(CONFIG_PREEMPT)
2421                    "preempt",
2422 #else
2423                    "unknown",
2424 #endif
2425                    /* These are reserved for later use */
2426                    0, 0, 0, 0);
2427 #ifdef CONFIG_SMP
2428         seq_printf(m, " #P:%d)\n", num_online_cpus());
2429 #else
2430         seq_puts(m, ")\n");
2431 #endif
2432         seq_puts(m, "#    -----------------\n");
2433         seq_printf(m, "#    | task: %.16s-%d "
2434                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2435                    data->comm, data->pid,
2436                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2437                    data->policy, data->rt_priority);
2438         seq_puts(m, "#    -----------------\n");
2439
2440         if (data->critical_start) {
2441                 seq_puts(m, "#  => started at: ");
2442                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2443                 trace_print_seq(m, &iter->seq);
2444                 seq_puts(m, "\n#  => ended at:   ");
2445                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2446                 trace_print_seq(m, &iter->seq);
2447                 seq_puts(m, "\n#\n");
2448         }
2449
2450         seq_puts(m, "#\n");
2451 }
2452
2453 static void test_cpu_buff_start(struct trace_iterator *iter)
2454 {
2455         struct trace_seq *s = &iter->seq;
2456
2457         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2458                 return;
2459
2460         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2461                 return;
2462
2463         if (cpumask_test_cpu(iter->cpu, iter->started))
2464                 return;
2465
2466         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2467                 return;
2468
2469         cpumask_set_cpu(iter->cpu, iter->started);
2470
2471         /* Don't print started cpu buffer for the first entry of the trace */
2472         if (iter->idx > 1)
2473                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2474                                 iter->cpu);
2475 }
2476
2477 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2478 {
2479         struct trace_seq *s = &iter->seq;
2480         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2481         struct trace_entry *entry;
2482         struct trace_event *event;
2483
2484         entry = iter->ent;
2485
2486         test_cpu_buff_start(iter);
2487
2488         event = ftrace_find_event(entry->type);
2489
2490         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2491                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2492                         if (!trace_print_lat_context(iter))
2493                                 goto partial;
2494                 } else {
2495                         if (!trace_print_context(iter))
2496                                 goto partial;
2497                 }
2498         }
2499
2500         if (event)
2501                 return event->funcs->trace(iter, sym_flags, event);
2502
2503         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2504                 goto partial;
2505
2506         return TRACE_TYPE_HANDLED;
2507 partial:
2508         return TRACE_TYPE_PARTIAL_LINE;
2509 }
2510
2511 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2512 {
2513         struct trace_seq *s = &iter->seq;
2514         struct trace_entry *entry;
2515         struct trace_event *event;
2516
2517         entry = iter->ent;
2518
2519         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2520                 if (!trace_seq_printf(s, "%d %d %llu ",
2521                                       entry->pid, iter->cpu, iter->ts))
2522                         goto partial;
2523         }
2524
2525         event = ftrace_find_event(entry->type);
2526         if (event)
2527                 return event->funcs->raw(iter, 0, event);
2528
2529         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2530                 goto partial;
2531
2532         return TRACE_TYPE_HANDLED;
2533 partial:
2534         return TRACE_TYPE_PARTIAL_LINE;
2535 }
2536
2537 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2538 {
2539         struct trace_seq *s = &iter->seq;
2540         unsigned char newline = '\n';
2541         struct trace_entry *entry;
2542         struct trace_event *event;
2543
2544         entry = iter->ent;
2545
2546         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2547                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2548                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2549                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2550         }
2551
2552         event = ftrace_find_event(entry->type);
2553         if (event) {
2554                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2555                 if (ret != TRACE_TYPE_HANDLED)
2556                         return ret;
2557         }
2558
2559         SEQ_PUT_FIELD_RET(s, newline);
2560
2561         return TRACE_TYPE_HANDLED;
2562 }
2563
2564 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2565 {
2566         struct trace_seq *s = &iter->seq;
2567         struct trace_entry *entry;
2568         struct trace_event *event;
2569
2570         entry = iter->ent;
2571
2572         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2573                 SEQ_PUT_FIELD_RET(s, entry->pid);
2574                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2575                 SEQ_PUT_FIELD_RET(s, iter->ts);
2576         }
2577
2578         event = ftrace_find_event(entry->type);
2579         return event ? event->funcs->binary(iter, 0, event) :
2580                 TRACE_TYPE_HANDLED;
2581 }
2582
2583 int trace_empty(struct trace_iterator *iter)
2584 {
2585         struct ring_buffer_iter *buf_iter;
2586         int cpu;
2587
2588         /* If we are looking at one CPU buffer, only check that one */
2589         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2590                 cpu = iter->cpu_file;
2591                 buf_iter = trace_buffer_iter(iter, cpu);
2592                 if (buf_iter) {
2593                         if (!ring_buffer_iter_empty(buf_iter))
2594                                 return 0;
2595                 } else {
2596                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2597                                 return 0;
2598                 }
2599                 return 1;
2600         }
2601
2602         for_each_tracing_cpu(cpu) {
2603                 buf_iter = trace_buffer_iter(iter, cpu);
2604                 if (buf_iter) {
2605                         if (!ring_buffer_iter_empty(buf_iter))
2606                                 return 0;
2607                 } else {
2608                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2609                                 return 0;
2610                 }
2611         }
2612
2613         return 1;
2614 }
2615
2616 /*  Called with trace_event_read_lock() held. */
2617 enum print_line_t print_trace_line(struct trace_iterator *iter)
2618 {
2619         enum print_line_t ret;
2620
2621         if (iter->lost_events &&
2622             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2623                                  iter->cpu, iter->lost_events))
2624                 return TRACE_TYPE_PARTIAL_LINE;
2625
2626         if (iter->trace && iter->trace->print_line) {
2627                 ret = iter->trace->print_line(iter);
2628                 if (ret != TRACE_TYPE_UNHANDLED)
2629                         return ret;
2630         }
2631
2632         if (iter->ent->type == TRACE_BPUTS &&
2633                         trace_flags & TRACE_ITER_PRINTK &&
2634                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2635                 return trace_print_bputs_msg_only(iter);
2636
2637         if (iter->ent->type == TRACE_BPRINT &&
2638                         trace_flags & TRACE_ITER_PRINTK &&
2639                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2640                 return trace_print_bprintk_msg_only(iter);
2641
2642         if (iter->ent->type == TRACE_PRINT &&
2643                         trace_flags & TRACE_ITER_PRINTK &&
2644                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2645                 return trace_print_printk_msg_only(iter);
2646
2647         if (trace_flags & TRACE_ITER_BIN)
2648                 return print_bin_fmt(iter);
2649
2650         if (trace_flags & TRACE_ITER_HEX)
2651                 return print_hex_fmt(iter);
2652
2653         if (trace_flags & TRACE_ITER_RAW)
2654                 return print_raw_fmt(iter);
2655
2656         return print_trace_fmt(iter);
2657 }
2658
2659 void trace_latency_header(struct seq_file *m)
2660 {
2661         struct trace_iterator *iter = m->private;
2662
2663         /* print nothing if the buffers are empty */
2664         if (trace_empty(iter))
2665                 return;
2666
2667         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2668                 print_trace_header(m, iter);
2669
2670         if (!(trace_flags & TRACE_ITER_VERBOSE))
2671                 print_lat_help_header(m);
2672 }
2673
2674 void trace_default_header(struct seq_file *m)
2675 {
2676         struct trace_iterator *iter = m->private;
2677
2678         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2679                 return;
2680
2681         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2682                 /* print nothing if the buffers are empty */
2683                 if (trace_empty(iter))
2684                         return;
2685                 print_trace_header(m, iter);
2686                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2687                         print_lat_help_header(m);
2688         } else {
2689                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2690                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2691                                 print_func_help_header_irq(iter->trace_buffer, m);
2692                         else
2693                                 print_func_help_header(iter->trace_buffer, m);
2694                 }
2695         }
2696 }
2697
2698 static void test_ftrace_alive(struct seq_file *m)
2699 {
2700         if (!ftrace_is_dead())
2701                 return;
2702         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2703         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2704 }
2705
2706 #ifdef CONFIG_TRACER_MAX_TRACE
2707 static void show_snapshot_main_help(struct seq_file *m)
2708 {
2709         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2710         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2711         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2712         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n");
2713         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2714         seq_printf(m, "#                       is not a '0' or '1')\n");
2715 }
2716
2717 static void show_snapshot_percpu_help(struct seq_file *m)
2718 {
2719         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2720 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2721         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2722         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2723 #else
2724         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2725         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2726 #endif
2727         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2728         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2729         seq_printf(m, "#                       is not a '0' or '1')\n");
2730 }
2731
2732 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2733 {
2734         if (iter->tr->allocated_snapshot)
2735                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2736         else
2737                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2738
2739         seq_printf(m, "# Snapshot commands:\n");
2740         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2741                 show_snapshot_main_help(m);
2742         else
2743                 show_snapshot_percpu_help(m);
2744 }
2745 #else
2746 /* Should never be called */
2747 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2748 #endif
2749
2750 static int s_show(struct seq_file *m, void *v)
2751 {
2752         struct trace_iterator *iter = v;
2753         int ret;
2754
2755         if (iter->ent == NULL) {
2756                 if (iter->tr) {
2757                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2758                         seq_puts(m, "#\n");
2759                         test_ftrace_alive(m);
2760                 }
2761                 if (iter->snapshot && trace_empty(iter))
2762                         print_snapshot_help(m, iter);
2763                 else if (iter->trace && iter->trace->print_header)
2764                         iter->trace->print_header(m);
2765                 else
2766                         trace_default_header(m);
2767
2768         } else if (iter->leftover) {
2769                 /*
2770                  * If we filled the seq_file buffer earlier, we
2771                  * want to just show it now.
2772                  */
2773                 ret = trace_print_seq(m, &iter->seq);
2774
2775                 /* ret should this time be zero, but you never know */
2776                 iter->leftover = ret;
2777
2778         } else {
2779                 print_trace_line(iter);
2780                 ret = trace_print_seq(m, &iter->seq);
2781                 /*
2782                  * If we overflow the seq_file buffer, then it will
2783                  * ask us for this data again at start up.
2784                  * Use that instead.
2785                  *  ret is 0 if seq_file write succeeded.
2786                  *        -1 otherwise.
2787                  */
2788                 iter->leftover = ret;
2789         }
2790
2791         return 0;
2792 }
2793
2794 static const struct seq_operations tracer_seq_ops = {
2795         .start          = s_start,
2796         .next           = s_next,
2797         .stop           = s_stop,
2798         .show           = s_show,
2799 };
2800
2801 static struct trace_iterator *
2802 __tracing_open(struct trace_array *tr, struct trace_cpu *tc,
2803                struct inode *inode, struct file *file, bool snapshot)
2804 {
2805         struct trace_iterator *iter;
2806         int cpu;
2807
2808         if (tracing_disabled)
2809                 return ERR_PTR(-ENODEV);
2810
2811         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2812         if (!iter)
2813                 return ERR_PTR(-ENOMEM);
2814
2815         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2816                                     GFP_KERNEL);
2817         if (!iter->buffer_iter)
2818                 goto release;
2819
2820         /*
2821          * We make a copy of the current tracer to avoid concurrent
2822          * changes on it while we are reading.
2823          */
2824         mutex_lock(&trace_types_lock);
2825         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2826         if (!iter->trace)
2827                 goto fail;
2828
2829         *iter->trace = *tr->current_trace;
2830
2831         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2832                 goto fail;
2833
2834         iter->tr = tr;
2835
2836 #ifdef CONFIG_TRACER_MAX_TRACE
2837         /* Currently only the top directory has a snapshot */
2838         if (tr->current_trace->print_max || snapshot)
2839                 iter->trace_buffer = &tr->max_buffer;
2840         else
2841 #endif
2842                 iter->trace_buffer = &tr->trace_buffer;
2843         iter->snapshot = snapshot;
2844         iter->pos = -1;
2845         mutex_init(&iter->mutex);
2846         iter->cpu_file = tc->cpu;
2847
2848         /* Notify the tracer early; before we stop tracing. */
2849         if (iter->trace && iter->trace->open)
2850                 iter->trace->open(iter);
2851
2852         /* Annotate start of buffers if we had overruns */
2853         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2854                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2855
2856         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2857         if (trace_clocks[tr->clock_id].in_ns)
2858                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2859
2860         /* stop the trace while dumping if we are not opening "snapshot" */
2861         if (!iter->snapshot)
2862                 tracing_stop_tr(tr);
2863
2864         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2865                 for_each_tracing_cpu(cpu) {
2866                         iter->buffer_iter[cpu] =
2867                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2868                 }
2869                 ring_buffer_read_prepare_sync();
2870                 for_each_tracing_cpu(cpu) {
2871                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2872                         tracing_iter_reset(iter, cpu);
2873                 }
2874         } else {
2875                 cpu = iter->cpu_file;
2876                 iter->buffer_iter[cpu] =
2877                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2878                 ring_buffer_read_prepare_sync();
2879                 ring_buffer_read_start(iter->buffer_iter[cpu]);
2880                 tracing_iter_reset(iter, cpu);
2881         }
2882
2883         mutex_unlock(&trace_types_lock);
2884
2885         return iter;
2886
2887  fail:
2888         mutex_unlock(&trace_types_lock);
2889         kfree(iter->trace);
2890         kfree(iter->buffer_iter);
2891 release:
2892         seq_release_private(inode, file);
2893         return ERR_PTR(-ENOMEM);
2894 }
2895
2896 int tracing_open_generic(struct inode *inode, struct file *filp)
2897 {
2898         if (tracing_disabled)
2899                 return -ENODEV;
2900
2901         filp->private_data = inode->i_private;
2902         return 0;
2903 }
2904
2905 /*
2906  * Open and update trace_array ref count.
2907  * Must have the current trace_array passed to it.
2908  */
2909 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
2910 {
2911         struct trace_array *tr = inode->i_private;
2912
2913         if (tracing_disabled)
2914                 return -ENODEV;
2915
2916         if (trace_array_get(tr) < 0)
2917                 return -ENODEV;
2918
2919         filp->private_data = inode->i_private;
2920
2921         return 0;
2922
2923 }
2924
2925 int tracing_open_generic_tc(struct inode *inode, struct file *filp)
2926 {
2927         struct trace_cpu *tc = inode->i_private;
2928         struct trace_array *tr = tc->tr;
2929
2930         if (tracing_disabled)
2931                 return -ENODEV;
2932
2933         if (trace_array_get(tr) < 0)
2934                 return -ENODEV;
2935
2936         filp->private_data = inode->i_private;
2937
2938         return 0;
2939
2940 }
2941
2942 static int tracing_release(struct inode *inode, struct file *file)
2943 {
2944         struct seq_file *m = file->private_data;
2945         struct trace_iterator *iter;
2946         struct trace_array *tr;
2947         int cpu;
2948
2949         /* Writes do not use seq_file, need to grab tr from inode */
2950         if (!(file->f_mode & FMODE_READ)) {
2951                 struct trace_cpu *tc = inode->i_private;
2952
2953                 trace_array_put(tc->tr);
2954                 return 0;
2955         }
2956
2957         iter = m->private;
2958         tr = iter->tr;
2959         trace_array_put(tr);
2960
2961         mutex_lock(&trace_types_lock);
2962
2963         for_each_tracing_cpu(cpu) {
2964                 if (iter->buffer_iter[cpu])
2965                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
2966         }
2967
2968         if (iter->trace && iter->trace->close)
2969                 iter->trace->close(iter);
2970
2971         if (!iter->snapshot)
2972                 /* reenable tracing if it was previously enabled */
2973                 tracing_start_tr(tr);
2974         mutex_unlock(&trace_types_lock);
2975
2976         mutex_destroy(&iter->mutex);
2977         free_cpumask_var(iter->started);
2978         kfree(iter->trace);
2979         kfree(iter->buffer_iter);
2980         seq_release_private(inode, file);
2981
2982         return 0;
2983 }
2984
2985 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
2986 {
2987         struct trace_array *tr = inode->i_private;
2988
2989         trace_array_put(tr);
2990         return 0;
2991 }
2992
2993 static int tracing_release_generic_tc(struct inode *inode, struct file *file)
2994 {
2995         struct trace_cpu *tc = inode->i_private;
2996         struct trace_array *tr = tc->tr;
2997
2998         trace_array_put(tr);
2999         return 0;
3000 }
3001
3002 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3003 {
3004         struct trace_array *tr = inode->i_private;
3005
3006         trace_array_put(tr);
3007
3008         return single_release(inode, file);
3009 }
3010
3011 static int tracing_open(struct inode *inode, struct file *file)
3012 {
3013         struct trace_cpu *tc = inode->i_private;
3014         struct trace_array *tr = tc->tr;
3015         struct trace_iterator *iter;
3016         int ret = 0;
3017
3018         if (trace_array_get(tr) < 0)
3019                 return -ENODEV;
3020
3021         /* If this file was open for write, then erase contents */
3022         if ((file->f_mode & FMODE_WRITE) &&
3023             (file->f_flags & O_TRUNC)) {
3024                 if (tc->cpu == RING_BUFFER_ALL_CPUS)
3025                         tracing_reset_online_cpus(&tr->trace_buffer);
3026                 else
3027                         tracing_reset(&tr->trace_buffer, tc->cpu);
3028         }
3029
3030         if (file->f_mode & FMODE_READ) {
3031                 iter = __tracing_open(tr, tc, inode, file, false);
3032                 if (IS_ERR(iter))
3033                         ret = PTR_ERR(iter);
3034                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3035                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3036         }
3037
3038         if (ret < 0)
3039                 trace_array_put(tr);
3040
3041         return ret;
3042 }
3043
3044 static void *
3045 t_next(struct seq_file *m, void *v, loff_t *pos)
3046 {
3047         struct tracer *t = v;
3048
3049         (*pos)++;
3050
3051         if (t)
3052                 t = t->next;
3053
3054         return t;
3055 }
3056
3057 static void *t_start(struct seq_file *m, loff_t *pos)
3058 {
3059         struct tracer *t;
3060         loff_t l = 0;
3061
3062         mutex_lock(&trace_types_lock);
3063         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
3064                 ;
3065
3066         return t;
3067 }
3068
3069 static void t_stop(struct seq_file *m, void *p)
3070 {
3071         mutex_unlock(&trace_types_lock);
3072 }
3073
3074 static int t_show(struct seq_file *m, void *v)
3075 {
3076         struct tracer *t = v;
3077
3078         if (!t)
3079                 return 0;
3080
3081         seq_printf(m, "%s", t->name);
3082         if (t->next)
3083                 seq_putc(m, ' ');
3084         else
3085                 seq_putc(m, '\n');
3086
3087         return 0;
3088 }
3089
3090 static const struct seq_operations show_traces_seq_ops = {
3091         .start          = t_start,
3092         .next           = t_next,
3093         .stop           = t_stop,
3094         .show           = t_show,
3095 };
3096
3097 static int show_traces_open(struct inode *inode, struct file *file)
3098 {
3099         if (tracing_disabled)
3100                 return -ENODEV;
3101
3102         return seq_open(file, &show_traces_seq_ops);
3103 }
3104
3105 static ssize_t
3106 tracing_write_stub(struct file *filp, const char __user *ubuf,
3107                    size_t count, loff_t *ppos)
3108 {
3109         return count;
3110 }
3111
3112 static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
3113 {
3114         if (file->f_mode & FMODE_READ)
3115                 return seq_lseek(file, offset, origin);
3116         else
3117                 return 0;
3118 }
3119
3120 static const struct file_operations tracing_fops = {
3121         .open           = tracing_open,
3122         .read           = seq_read,
3123         .write          = tracing_write_stub,
3124         .llseek         = tracing_seek,
3125         .release        = tracing_release,
3126 };
3127
3128 static const struct file_operations show_traces_fops = {
3129         .open           = show_traces_open,
3130         .read           = seq_read,
3131         .release        = seq_release,
3132         .llseek         = seq_lseek,
3133 };
3134
3135 /*
3136  * Only trace on a CPU if the bitmask is set:
3137  */
3138 static cpumask_var_t tracing_cpumask;
3139
3140 /*
3141  * The tracer itself will not take this lock, but still we want
3142  * to provide a consistent cpumask to user-space:
3143  */
3144 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3145
3146 /*
3147  * Temporary storage for the character representation of the
3148  * CPU bitmask (and one more byte for the newline):
3149  */
3150 static char mask_str[NR_CPUS + 1];
3151
3152 static ssize_t
3153 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3154                      size_t count, loff_t *ppos)
3155 {
3156         int len;
3157
3158         mutex_lock(&tracing_cpumask_update_lock);
3159
3160         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
3161         if (count - len < 2) {
3162                 count = -EINVAL;
3163                 goto out_err;
3164         }
3165         len += sprintf(mask_str + len, "\n");
3166         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3167
3168 out_err:
3169         mutex_unlock(&tracing_cpumask_update_lock);
3170
3171         return count;
3172 }
3173
3174 static ssize_t
3175 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3176                       size_t count, loff_t *ppos)
3177 {
3178         struct trace_array *tr = filp->private_data;
3179         cpumask_var_t tracing_cpumask_new;
3180         int err, cpu;
3181
3182         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3183                 return -ENOMEM;
3184
3185         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3186         if (err)
3187                 goto err_unlock;
3188
3189         mutex_lock(&tracing_cpumask_update_lock);
3190
3191         local_irq_disable();
3192         arch_spin_lock(&ftrace_max_lock);
3193         for_each_tracing_cpu(cpu) {
3194                 /*
3195                  * Increase/decrease the disabled counter if we are
3196                  * about to flip a bit in the cpumask:
3197                  */
3198                 if (cpumask_test_cpu(cpu, tracing_cpumask) &&
3199                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3200                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3201                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3202                 }
3203                 if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
3204                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3205                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3206                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3207                 }
3208         }
3209         arch_spin_unlock(&ftrace_max_lock);
3210         local_irq_enable();
3211
3212         cpumask_copy(tracing_cpumask, tracing_cpumask_new);
3213
3214         mutex_unlock(&tracing_cpumask_update_lock);
3215         free_cpumask_var(tracing_cpumask_new);
3216
3217         return count;
3218
3219 err_unlock:
3220         free_cpumask_var(tracing_cpumask_new);
3221
3222         return err;
3223 }
3224
3225 static const struct file_operations tracing_cpumask_fops = {
3226         .open           = tracing_open_generic,
3227         .read           = tracing_cpumask_read,
3228         .write          = tracing_cpumask_write,
3229         .llseek         = generic_file_llseek,
3230 };
3231
3232 static int tracing_trace_options_show(struct seq_file *m, void *v)
3233 {
3234         struct tracer_opt *trace_opts;
3235         struct trace_array *tr = m->private;
3236         u32 tracer_flags;
3237         int i;
3238
3239         mutex_lock(&trace_types_lock);
3240         tracer_flags = tr->current_trace->flags->val;
3241         trace_opts = tr->current_trace->flags->opts;
3242
3243         for (i = 0; trace_options[i]; i++) {
3244                 if (trace_flags & (1 << i))
3245                         seq_printf(m, "%s\n", trace_options[i]);
3246                 else
3247                         seq_printf(m, "no%s\n", trace_options[i]);
3248         }
3249
3250         for (i = 0; trace_opts[i].name; i++) {
3251                 if (tracer_flags & trace_opts[i].bit)
3252                         seq_printf(m, "%s\n", trace_opts[i].name);
3253                 else
3254                         seq_printf(m, "no%s\n", trace_opts[i].name);
3255         }
3256         mutex_unlock(&trace_types_lock);
3257
3258         return 0;
3259 }
3260
3261 static int __set_tracer_option(struct tracer *trace,
3262                                struct tracer_flags *tracer_flags,
3263                                struct tracer_opt *opts, int neg)
3264 {
3265         int ret;
3266
3267         ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3268         if (ret)
3269                 return ret;
3270
3271         if (neg)
3272                 tracer_flags->val &= ~opts->bit;
3273         else
3274                 tracer_flags->val |= opts->bit;
3275         return 0;
3276 }
3277
3278 /* Try to assign a tracer specific option */
3279 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3280 {
3281         struct tracer_flags *tracer_flags = trace->flags;
3282         struct tracer_opt *opts = NULL;
3283         int i;
3284
3285         for (i = 0; tracer_flags->opts[i].name; i++) {
3286                 opts = &tracer_flags->opts[i];
3287
3288                 if (strcmp(cmp, opts->name) == 0)
3289                         return __set_tracer_option(trace, trace->flags,
3290                                                    opts, neg);
3291         }
3292
3293         return -EINVAL;
3294 }
3295
3296 /* Some tracers require overwrite to stay enabled */
3297 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3298 {
3299         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3300                 return -1;
3301
3302         return 0;
3303 }
3304
3305 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3306 {
3307         /* do nothing if flag is already set */
3308         if (!!(trace_flags & mask) == !!enabled)
3309                 return 0;
3310
3311         /* Give the tracer a chance to approve the change */
3312         if (tr->current_trace->flag_changed)
3313                 if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3314                         return -EINVAL;
3315
3316         if (enabled)
3317                 trace_flags |= mask;
3318         else
3319                 trace_flags &= ~mask;
3320
3321         if (mask == TRACE_ITER_RECORD_CMD)
3322                 trace_event_enable_cmd_record(enabled);
3323
3324         if (mask == TRACE_ITER_OVERWRITE) {
3325                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3326 #ifdef CONFIG_TRACER_MAX_TRACE
3327                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3328 #endif
3329         }
3330
3331         if (mask == TRACE_ITER_PRINTK)
3332                 trace_printk_start_stop_comm(enabled);
3333
3334         return 0;
3335 }
3336
3337 static int trace_set_options(struct trace_array *tr, char *option)
3338 {
3339         char *cmp;
3340         int neg = 0;
3341         int ret = -ENODEV;
3342         int i;
3343
3344         cmp = strstrip(option);
3345
3346         if (strncmp(cmp, "no", 2) == 0) {
3347                 neg = 1;
3348                 cmp += 2;
3349         }
3350
3351         mutex_lock(&trace_types_lock);
3352
3353         for (i = 0; trace_options[i]; i++) {
3354                 if (strcmp(cmp, trace_options[i]) == 0) {
3355                         ret = set_tracer_flag(tr, 1 << i, !neg);
3356                         break;
3357                 }
3358         }
3359
3360         /* If no option could be set, test the specific tracer options */
3361         if (!trace_options[i])
3362                 ret = set_tracer_option(tr->current_trace, cmp, neg);
3363
3364         mutex_unlock(&trace_types_lock);
3365
3366         return ret;
3367 }
3368
3369 static ssize_t
3370 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3371                         size_t cnt, loff_t *ppos)
3372 {
3373         struct seq_file *m = filp->private_data;
3374         struct trace_array *tr = m->private;
3375         char buf[64];
3376         int ret;
3377
3378         if (cnt >= sizeof(buf))
3379                 return -EINVAL;
3380
3381         if (copy_from_user(&buf, ubuf, cnt))
3382                 return -EFAULT;
3383
3384         buf[cnt] = 0;
3385
3386         ret = trace_set_options(tr, buf);
3387         if (ret < 0)
3388                 return ret;
3389
3390         *ppos += cnt;
3391
3392         return cnt;
3393 }
3394
3395 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3396 {
3397         struct trace_array *tr = inode->i_private;
3398
3399         if (tracing_disabled)
3400                 return -ENODEV;
3401
3402         if (trace_array_get(tr) < 0)
3403                 return -ENODEV;
3404
3405         return single_open(file, tracing_trace_options_show, inode->i_private);
3406 }
3407
3408 static const struct file_operations tracing_iter_fops = {
3409         .open           = tracing_trace_options_open,
3410         .read           = seq_read,
3411         .llseek         = seq_lseek,
3412         .release        = tracing_single_release_tr,
3413         .write          = tracing_trace_options_write,
3414 };
3415
3416 static const char readme_msg[] =
3417         "tracing mini-HOWTO:\n\n"
3418         "# echo 0 > tracing_on : quick way to disable tracing\n"
3419         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3420         " Important files:\n"
3421         "  trace\t\t\t- The static contents of the buffer\n"
3422         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3423         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3424         "  current_tracer\t- function and latency tracers\n"
3425         "  available_tracers\t- list of configured tracers for current_tracer\n"
3426         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3427         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3428         "  trace_clock\t\t-change the clock used to order events\n"
3429         "       local:   Per cpu clock but may not be synced across CPUs\n"
3430         "      global:   Synced across CPUs but slows tracing down.\n"
3431         "     counter:   Not a clock, but just an increment\n"
3432         "      uptime:   Jiffy counter from time of boot\n"
3433         "        perf:   Same clock that perf events use\n"
3434 #ifdef CONFIG_X86_64
3435         "     x86-tsc:   TSC cycle counter\n"
3436 #endif
3437         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3438         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3439         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3440         "\t\t\t  Remove sub-buffer with rmdir\n"
3441         "  trace_options\t\t- Set format or modify how tracing happens\n"
3442         "\t\t\t  Disable an option by adding a suffix 'no' to the option name\n"
3443 #ifdef CONFIG_DYNAMIC_FTRACE
3444         "\n  available_filter_functions - list of functions that can be filtered on\n"
3445         "  set_ftrace_filter\t- echo function name in here to only trace these functions\n"
3446         "            accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3447         "            modules: Can select a group via module\n"
3448         "             Format: :mod:<module-name>\n"
3449         "             example: echo :mod:ext3 > set_ftrace_filter\n"
3450         "            triggers: a command to perform when function is hit\n"
3451         "              Format: <function>:<trigger>[:count]\n"
3452         "             trigger: traceon, traceoff\n"
3453         "                      enable_event:<system>:<event>\n"
3454         "                      disable_event:<system>:<event>\n"
3455 #ifdef CONFIG_STACKTRACE
3456         "                      stacktrace\n"
3457 #endif
3458 #ifdef CONFIG_TRACER_SNAPSHOT
3459         "                      snapshot\n"
3460 #endif
3461         "             example: echo do_fault:traceoff > set_ftrace_filter\n"
3462         "                      echo do_trap:traceoff:3 > set_ftrace_filter\n"
3463         "             The first one will disable tracing every time do_fault is hit\n"
3464         "             The second will disable tracing at most 3 times when do_trap is hit\n"
3465         "               The first time do trap is hit and it disables tracing, the counter\n"
3466         "               will decrement to 2. If tracing is already disabled, the counter\n"
3467         "               will not decrement. It only decrements when the trigger did work\n"
3468         "             To remove trigger without count:\n"
3469         "               echo '!<function>:<trigger> > set_ftrace_filter\n"
3470         "             To remove trigger with a count:\n"
3471         "               echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3472         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3473         "            accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3474         "            modules: Can select a group via module command :mod:\n"
3475         "            Does not accept triggers\n"
3476 #endif /* CONFIG_DYNAMIC_FTRACE */
3477 #ifdef CONFIG_FUNCTION_TRACER
3478         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids (function)\n"
3479 #endif
3480 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3481         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3482         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3483 #endif
3484 #ifdef CONFIG_TRACER_SNAPSHOT
3485         "\n  snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n"
3486         "\t\t\t  Read the contents for more information\n"
3487 #endif
3488 #ifdef CONFIG_STACKTRACE
3489         "  stack_trace\t\t- Shows the max stack trace when active\n"
3490         "  stack_max_size\t- Shows current max stack size that was traced\n"
3491         "\t\t\t  Write into this file to reset the max size (trigger a new trace)\n"
3492 #ifdef CONFIG_DYNAMIC_FTRACE
3493         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n"
3494 #endif
3495 #endif /* CONFIG_STACKTRACE */
3496 ;
3497
3498 static ssize_t
3499 tracing_readme_read(struct file *filp, char __user *ubuf,
3500                        size_t cnt, loff_t *ppos)
3501 {
3502         return simple_read_from_buffer(ubuf, cnt, ppos,
3503                                         readme_msg, strlen(readme_msg));
3504 }
3505
3506 static const struct file_operations tracing_readme_fops = {
3507         .open           = tracing_open_generic,
3508         .read           = tracing_readme_read,
3509         .llseek         = generic_file_llseek,
3510 };
3511
3512 static ssize_t
3513 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3514                                 size_t cnt, loff_t *ppos)
3515 {
3516         char *buf_comm;
3517         char *file_buf;
3518         char *buf;
3519         int len = 0;
3520         int pid;
3521         int i;
3522
3523         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3524         if (!file_buf)
3525                 return -ENOMEM;
3526
3527         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3528         if (!buf_comm) {
3529                 kfree(file_buf);
3530                 return -ENOMEM;
3531         }
3532
3533         buf = file_buf;
3534
3535         for (i = 0; i < SAVED_CMDLINES; i++) {
3536                 int r;
3537
3538                 pid = map_cmdline_to_pid[i];
3539                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3540                         continue;
3541
3542                 trace_find_cmdline(pid, buf_comm);
3543                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3544                 buf += r;
3545                 len += r;
3546         }
3547
3548         len = simple_read_from_buffer(ubuf, cnt, ppos,
3549                                       file_buf, len);
3550
3551         kfree(file_buf);
3552         kfree(buf_comm);
3553
3554         return len;
3555 }
3556
3557 static const struct file_operations tracing_saved_cmdlines_fops = {
3558     .open       = tracing_open_generic,
3559     .read       = tracing_saved_cmdlines_read,
3560     .llseek     = generic_file_llseek,
3561 };
3562
3563 static ssize_t
3564 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3565                        size_t cnt, loff_t *ppos)
3566 {
3567         struct trace_array *tr = filp->private_data;
3568         char buf[MAX_TRACER_SIZE+2];
3569         int r;
3570
3571         mutex_lock(&trace_types_lock);
3572         r = sprintf(buf, "%s\n", tr->current_trace->name);
3573         mutex_unlock(&trace_types_lock);
3574
3575         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3576 }
3577
3578 int tracer_init(struct tracer *t, struct trace_array *tr)
3579 {
3580         tracing_reset_online_cpus(&tr->trace_buffer);
3581         return t->init(tr);
3582 }
3583
3584 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3585 {
3586         int cpu;
3587
3588         for_each_tracing_cpu(cpu)
3589                 per_cpu_ptr(buf->data, cpu)->entries = val;
3590 }
3591
3592 #ifdef CONFIG_TRACER_MAX_TRACE
3593 /* resize @tr's buffer to the size of @size_tr's entries */
3594 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3595                                         struct trace_buffer *size_buf, int cpu_id)
3596 {
3597         int cpu, ret = 0;
3598
3599         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3600                 for_each_tracing_cpu(cpu) {
3601                         ret = ring_buffer_resize(trace_buf->buffer,
3602                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3603                         if (ret < 0)
3604                                 break;
3605                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3606                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3607                 }
3608         } else {
3609                 ret = ring_buffer_resize(trace_buf->buffer,
3610                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3611                 if (ret == 0)
3612                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3613                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3614         }
3615
3616         return ret;
3617 }
3618 #endif /* CONFIG_TRACER_MAX_TRACE */
3619
3620 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3621                                         unsigned long size, int cpu)
3622 {
3623         int ret;
3624
3625         /*
3626          * If kernel or user changes the size of the ring buffer
3627          * we use the size that was given, and we can forget about
3628          * expanding it later.
3629          */
3630         ring_buffer_expanded = true;
3631
3632         /* May be called before buffers are initialized */
3633         if (!tr->trace_buffer.buffer)
3634                 return 0;
3635
3636         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3637         if (ret < 0)
3638                 return ret;
3639
3640 #ifdef CONFIG_TRACER_MAX_TRACE
3641         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3642             !tr->current_trace->use_max_tr)
3643                 goto out;
3644
3645         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3646         if (ret < 0) {
3647                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3648                                                      &tr->trace_buffer, cpu);
3649                 if (r < 0) {
3650                         /*
3651                          * AARGH! We are left with different
3652                          * size max buffer!!!!
3653                          * The max buffer is our "snapshot" buffer.
3654                          * When a tracer needs a snapshot (one of the
3655                          * latency tracers), it swaps the max buffer
3656                          * with the saved snap shot. We succeeded to
3657                          * update the size of the main buffer, but failed to
3658                          * update the size of the max buffer. But when we tried
3659                          * to reset the main buffer to the original size, we
3660                          * failed there too. This is very unlikely to
3661                          * happen, but if it does, warn and kill all
3662                          * tracing.
3663                          */
3664                         WARN_ON(1);
3665                         tracing_disabled = 1;
3666                 }
3667                 return ret;
3668         }
3669
3670         if (cpu == RING_BUFFER_ALL_CPUS)
3671                 set_buffer_entries(&tr->max_buffer, size);
3672         else
3673                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3674
3675  out:
3676 #endif /* CONFIG_TRACER_MAX_TRACE */
3677
3678         if (cpu == RING_BUFFER_ALL_CPUS)
3679                 set_buffer_entries(&tr->trace_buffer, size);
3680         else
3681                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3682
3683         return ret;
3684 }
3685
3686 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3687                                           unsigned long size, int cpu_id)
3688 {
3689         int ret = size;
3690
3691         mutex_lock(&trace_types_lock);
3692
3693         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3694                 /* make sure, this cpu is enabled in the mask */
3695                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3696                         ret = -EINVAL;
3697                         goto out;
3698                 }
3699         }
3700
3701         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3702         if (ret < 0)
3703                 ret = -ENOMEM;
3704
3705 out:
3706         mutex_unlock(&trace_types_lock);
3707
3708         return ret;
3709 }
3710
3711
3712 /**
3713  * tracing_update_buffers - used by tracing facility to expand ring buffers
3714  *
3715  * To save on memory when the tracing is never used on a system with it
3716  * configured in. The ring buffers are set to a minimum size. But once
3717  * a user starts to use the tracing facility, then they need to grow
3718  * to their default size.
3719  *
3720  * This function is to be called when a tracer is about to be used.
3721  */
3722 int tracing_update_buffers(void)
3723 {
3724         int ret = 0;
3725
3726         mutex_lock(&trace_types_lock);
3727         if (!ring_buffer_expanded)
3728                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3729                                                 RING_BUFFER_ALL_CPUS);
3730         mutex_unlock(&trace_types_lock);
3731
3732         return ret;
3733 }
3734
3735 struct trace_option_dentry;
3736
3737 static struct trace_option_dentry *
3738 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3739
3740 static void
3741 destroy_trace_option_files(struct trace_option_dentry *topts);
3742
3743 static int tracing_set_tracer(const char *buf)
3744 {
3745         static struct trace_option_dentry *topts;
3746         struct trace_array *tr = &global_trace;
3747         struct tracer *t;
3748 #ifdef CONFIG_TRACER_MAX_TRACE
3749         bool had_max_tr;
3750 #endif
3751         int ret = 0;
3752
3753         mutex_lock(&trace_types_lock);
3754
3755         if (!ring_buffer_expanded) {
3756                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3757                                                 RING_BUFFER_ALL_CPUS);
3758                 if (ret < 0)
3759                         goto out;
3760                 ret = 0;
3761         }
3762
3763         for (t = trace_types; t; t = t->next) {
3764                 if (strcmp(t->name, buf) == 0)
3765                         break;
3766         }
3767         if (!t) {
3768                 ret = -EINVAL;
3769                 goto out;
3770         }
3771         if (t == tr->current_trace)
3772                 goto out;
3773
3774         trace_branch_disable();
3775
3776         tr->current_trace->enabled = false;
3777
3778         if (tr->current_trace->reset)
3779                 tr->current_trace->reset(tr);
3780
3781         /* Current trace needs to be nop_trace before synchronize_sched */
3782         tr->current_trace = &nop_trace;
3783
3784 #ifdef CONFIG_TRACER_MAX_TRACE
3785         had_max_tr = tr->allocated_snapshot;
3786
3787         if (had_max_tr && !t->use_max_tr) {
3788                 /*
3789                  * We need to make sure that the update_max_tr sees that
3790                  * current_trace changed to nop_trace to keep it from
3791                  * swapping the buffers after we resize it.
3792                  * The update_max_tr is called from interrupts disabled
3793                  * so a synchronized_sched() is sufficient.
3794                  */
3795                 synchronize_sched();
3796                 free_snapshot(tr);
3797         }
3798 #endif
3799         destroy_trace_option_files(topts);
3800
3801         topts = create_trace_option_files(tr, t);
3802
3803 #ifdef CONFIG_TRACER_MAX_TRACE
3804         if (t->use_max_tr && !had_max_tr) {
3805                 ret = alloc_snapshot(tr);
3806                 if (ret < 0)
3807                         goto out;
3808         }
3809 #endif
3810
3811         if (t->init) {
3812                 ret = tracer_init(t, tr);
3813                 if (ret)
3814                         goto out;
3815         }
3816
3817         tr->current_trace = t;
3818         tr->current_trace->enabled = true;
3819         trace_branch_enable(tr);
3820  out:
3821         mutex_unlock(&trace_types_lock);
3822
3823         return ret;
3824 }
3825
3826 static ssize_t
3827 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3828                         size_t cnt, loff_t *ppos)
3829 {
3830         char buf[MAX_TRACER_SIZE+1];
3831         int i;
3832         size_t ret;
3833         int err;
3834
3835         ret = cnt;
3836
3837         if (cnt > MAX_TRACER_SIZE)
3838                 cnt = MAX_TRACER_SIZE;
3839
3840         if (copy_from_user(&buf, ubuf, cnt))
3841                 return -EFAULT;
3842
3843         buf[cnt] = 0;
3844
3845         /* strip ending whitespace. */
3846         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3847                 buf[i] = 0;
3848
3849         err = tracing_set_tracer(buf);
3850         if (err)
3851                 return err;
3852
3853         *ppos += ret;
3854
3855         return ret;
3856 }
3857
3858 static ssize_t
3859 tracing_max_lat_read(struct file *filp, char __user *ubuf,
3860                      size_t cnt, loff_t *ppos)
3861 {
3862         unsigned long *ptr = filp->private_data;
3863         char buf[64];
3864         int r;
3865
3866         r = snprintf(buf, sizeof(buf), "%ld\n",
3867                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
3868         if (r > sizeof(buf))
3869                 r = sizeof(buf);
3870         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3871 }
3872
3873 static ssize_t
3874 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3875                       size_t cnt, loff_t *ppos)
3876 {
3877         unsigned long *ptr = filp->private_data;
3878         unsigned long val;
3879         int ret;
3880
3881         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3882         if (ret)
3883                 return ret;
3884
3885         *ptr = val * 1000;
3886
3887         return cnt;
3888 }
3889
3890 static int tracing_open_pipe(struct inode *inode, struct file *filp)
3891 {
3892         struct trace_cpu *tc = inode->i_private;
3893         struct trace_array *tr = tc->tr;
3894         struct trace_iterator *iter;
3895         int ret = 0;
3896
3897         if (tracing_disabled)
3898                 return -ENODEV;
3899
3900         if (trace_array_get(tr) < 0)
3901                 return -ENODEV;
3902
3903         mutex_lock(&trace_types_lock);
3904
3905         /* create a buffer to store the information to pass to userspace */
3906         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
3907         if (!iter) {
3908                 ret = -ENOMEM;
3909                 goto out;
3910         }
3911
3912         /*
3913          * We make a copy of the current tracer to avoid concurrent
3914          * changes on it while we are reading.
3915          */
3916         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
3917         if (!iter->trace) {
3918                 ret = -ENOMEM;
3919                 goto fail;
3920         }
3921         *iter->trace = *tr->current_trace;
3922
3923         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
3924                 ret = -ENOMEM;
3925                 goto fail;
3926         }
3927
3928         /* trace pipe does not show start of buffer */
3929         cpumask_setall(iter->started);
3930
3931         if (trace_flags & TRACE_ITER_LATENCY_FMT)
3932                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3933
3934         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3935         if (trace_clocks[tr->clock_id].in_ns)
3936                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3937
3938         iter->cpu_file = tc->cpu;
3939         iter->tr = tc->tr;
3940         iter->trace_buffer = &tc->tr->trace_buffer;
3941         mutex_init(&iter->mutex);
3942         filp->private_data = iter;
3943
3944         if (iter->trace->pipe_open)
3945                 iter->trace->pipe_open(iter);
3946
3947         nonseekable_open(inode, filp);
3948 out:
3949         mutex_unlock(&trace_types_lock);
3950         return ret;
3951
3952 fail:
3953         kfree(iter->trace);
3954         kfree(iter);
3955         __trace_array_put(tr);
3956         mutex_unlock(&trace_types_lock);
3957         return ret;
3958 }
3959
3960 static int tracing_release_pipe(struct inode *inode, struct file *file)
3961 {
3962         struct trace_iterator *iter = file->private_data;
3963         struct trace_cpu *tc = inode->i_private;
3964         struct trace_array *tr = tc->tr;
3965
3966         mutex_lock(&trace_types_lock);
3967
3968         if (iter->trace->pipe_close)
3969                 iter->trace->pipe_close(iter);
3970
3971         mutex_unlock(&trace_types_lock);
3972
3973         free_cpumask_var(iter->started);
3974         mutex_destroy(&iter->mutex);
3975         kfree(iter->trace);
3976         kfree(iter);
3977
3978         trace_array_put(tr);
3979
3980         return 0;
3981 }
3982
3983 static unsigned int
3984 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
3985 {
3986         /* Iterators are static, they should be filled or empty */
3987         if (trace_buffer_iter(iter, iter->cpu_file))
3988                 return POLLIN | POLLRDNORM;
3989
3990         if (trace_flags & TRACE_ITER_BLOCK)
3991                 /*
3992                  * Always select as readable when in blocking mode
3993                  */
3994                 return POLLIN | POLLRDNORM;
3995         else
3996                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
3997                                              filp, poll_table);
3998 }
3999
4000 static unsigned int
4001 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4002 {
4003         struct trace_iterator *iter = filp->private_data;
4004
4005         return trace_poll(iter, filp, poll_table);
4006 }
4007
4008 /*
4009  * This is a make-shift waitqueue.
4010  * A tracer might use this callback on some rare cases:
4011  *
4012  *  1) the current tracer might hold the runqueue lock when it wakes up
4013  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4014  *  2) the function tracers, trace all functions, we don't want
4015  *     the overhead of calling wake_up and friends
4016  *     (and tracing them too)
4017  *
4018  *     Anyway, this is really very primitive wakeup.
4019  */
4020 void poll_wait_pipe(struct trace_iterator *iter)
4021 {
4022         set_current_state(TASK_INTERRUPTIBLE);
4023         /* sleep for 100 msecs, and try again. */
4024         schedule_timeout(HZ / 10);
4025 }
4026
4027 /* Must be called with trace_types_lock mutex held. */
4028 static int tracing_wait_pipe(struct file *filp)
4029 {
4030         struct trace_iterator *iter = filp->private_data;
4031
4032         while (trace_empty(iter)) {
4033
4034                 if ((filp->f_flags & O_NONBLOCK)) {
4035                         return -EAGAIN;
4036                 }
4037
4038                 mutex_unlock(&iter->mutex);
4039
4040                 iter->trace->wait_pipe(iter);
4041
4042                 mutex_lock(&iter->mutex);
4043
4044                 if (signal_pending(current))
4045                         return -EINTR;
4046
4047                 /*
4048                  * We block until we read something and tracing is disabled.
4049                  * We still block if tracing is disabled, but we have never
4050                  * read anything. This allows a user to cat this file, and
4051                  * then enable tracing. But after we have read something,
4052                  * we give an EOF when tracing is again disabled.
4053                  *
4054                  * iter->pos will be 0 if we haven't read anything.
4055                  */
4056                 if (!tracing_is_enabled() && iter->pos)
4057                         break;
4058         }
4059
4060         return 1;
4061 }
4062
4063 /*
4064  * Consumer reader.
4065  */
4066 static ssize_t
4067 tracing_read_pipe(struct file *filp, char __user *ubuf,
4068                   size_t cnt, loff_t *ppos)
4069 {
4070         struct trace_iterator *iter = filp->private_data;
4071         struct trace_array *tr = iter->tr;
4072         ssize_t sret;
4073
4074         /* return any leftover data */
4075         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4076         if (sret != -EBUSY)
4077                 return sret;
4078
4079         trace_seq_init(&iter->seq);
4080
4081         /* copy the tracer to avoid using a global lock all around */
4082         mutex_lock(&trace_types_lock);
4083         if (unlikely(iter->trace->name != tr->current_trace->name))
4084                 *iter->trace = *tr->current_trace;
4085         mutex_unlock(&trace_types_lock);
4086
4087         /*
4088          * Avoid more than one consumer on a single file descriptor
4089          * This is just a matter of traces coherency, the ring buffer itself
4090          * is protected.
4091          */
4092         mutex_lock(&iter->mutex);
4093         if (iter->trace->read) {
4094                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4095                 if (sret)
4096                         goto out;
4097         }
4098
4099 waitagain:
4100         sret = tracing_wait_pipe(filp);
4101         if (sret <= 0)
4102                 goto out;
4103
4104         /* stop when tracing is finished */
4105         if (trace_empty(iter)) {
4106                 sret = 0;
4107                 goto out;
4108         }
4109
4110         if (cnt >= PAGE_SIZE)
4111                 cnt = PAGE_SIZE - 1;
4112
4113         /* reset all but tr, trace, and overruns */
4114         memset(&iter->seq, 0,
4115                sizeof(struct trace_iterator) -
4116                offsetof(struct trace_iterator, seq));
4117         iter->pos = -1;
4118
4119         trace_event_read_lock();
4120         trace_access_lock(iter->cpu_file);
4121         while (trace_find_next_entry_inc(iter) != NULL) {
4122                 enum print_line_t ret;
4123                 int len = iter->seq.len;
4124
4125                 ret = print_trace_line(iter);
4126                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4127                         /* don't print partial lines */
4128                         iter->seq.len = len;
4129                         break;
4130                 }
4131                 if (ret != TRACE_TYPE_NO_CONSUME)
4132                         trace_consume(iter);
4133
4134                 if (iter->seq.len >= cnt)
4135                         break;
4136
4137                 /*
4138                  * Setting the full flag means we reached the trace_seq buffer
4139                  * size and we should leave by partial output condition above.
4140                  * One of the trace_seq_* functions is not used properly.
4141                  */
4142                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4143                           iter->ent->type);
4144         }
4145         trace_access_unlock(iter->cpu_file);
4146         trace_event_read_unlock();
4147
4148         /* Now copy what we have to the user */
4149         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4150         if (iter->seq.readpos >= iter->seq.len)
4151                 trace_seq_init(&iter->seq);
4152
4153         /*
4154          * If there was nothing to send to user, in spite of consuming trace
4155          * entries, go back to wait for more entries.
4156          */
4157         if (sret == -EBUSY)
4158                 goto waitagain;
4159
4160 out:
4161         mutex_unlock(&iter->mutex);
4162
4163         return sret;
4164 }
4165
4166 static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
4167                                      struct pipe_buffer *buf)
4168 {
4169         __free_page(buf->page);
4170 }
4171
4172 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4173                                      unsigned int idx)
4174 {
4175         __free_page(spd->pages[idx]);
4176 }
4177
4178 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4179         .can_merge              = 0,
4180         .map                    = generic_pipe_buf_map,
4181         .unmap                  = generic_pipe_buf_unmap,
4182         .confirm                = generic_pipe_buf_confirm,
4183         .release                = tracing_pipe_buf_release,
4184         .steal                  = generic_pipe_buf_steal,
4185         .get                    = generic_pipe_buf_get,
4186 };
4187
4188 static size_t
4189 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4190 {
4191         size_t count;
4192         int ret;
4193
4194         /* Seq buffer is page-sized, exactly what we need. */
4195         for (;;) {
4196                 count = iter->seq.len;
4197                 ret = print_trace_line(iter);
4198                 count = iter->seq.len - count;
4199                 if (rem < count) {
4200                         rem = 0;
4201                         iter->seq.len -= count;
4202                         break;
4203                 }
4204                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4205                         iter->seq.len -= count;
4206                         break;
4207                 }
4208
4209                 if (ret != TRACE_TYPE_NO_CONSUME)
4210                         trace_consume(iter);
4211                 rem -= count;
4212                 if (!trace_find_next_entry_inc(iter))   {
4213                         rem = 0;
4214                         iter->ent = NULL;
4215                         break;
4216                 }
4217         }
4218
4219         return rem;
4220 }
4221
4222 static ssize_t tracing_splice_read_pipe(struct file *filp,
4223                                         loff_t *ppos,
4224                                         struct pipe_inode_info *pipe,
4225                                         size_t len,
4226                                         unsigned int flags)
4227 {
4228         struct page *pages_def[PIPE_DEF_BUFFERS];
4229         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4230         struct trace_iterator *iter = filp->private_data;
4231         struct splice_pipe_desc spd = {
4232                 .pages          = pages_def,
4233                 .partial        = partial_def,
4234                 .nr_pages       = 0, /* This gets updated below. */
4235                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4236                 .flags          = flags,
4237                 .ops            = &tracing_pipe_buf_ops,
4238                 .spd_release    = tracing_spd_release_pipe,
4239         };
4240         struct trace_array *tr = iter->tr;
4241         ssize_t ret;
4242         size_t rem;
4243         unsigned int i;
4244
4245         if (splice_grow_spd(pipe, &spd))
4246                 return -ENOMEM;
4247
4248         /* copy the tracer to avoid using a global lock all around */
4249         mutex_lock(&trace_types_lock);
4250         if (unlikely(iter->trace->name != tr->current_trace->name))
4251                 *iter->trace = *tr->current_trace;
4252         mutex_unlock(&trace_types_lock);
4253
4254         mutex_lock(&iter->mutex);
4255
4256         if (iter->trace->splice_read) {
4257                 ret = iter->trace->splice_read(iter, filp,
4258                                                ppos, pipe, len, flags);
4259                 if (ret)
4260                         goto out_err;
4261         }
4262
4263         ret = tracing_wait_pipe(filp);
4264         if (ret <= 0)
4265                 goto out_err;
4266
4267         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4268                 ret = -EFAULT;
4269                 goto out_err;
4270         }
4271
4272         trace_event_read_lock();
4273         trace_access_lock(iter->cpu_file);
4274
4275         /* Fill as many pages as possible. */
4276         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4277                 spd.pages[i] = alloc_page(GFP_KERNEL);
4278                 if (!spd.pages[i])
4279                         break;
4280
4281                 rem = tracing_fill_pipe_page(rem, iter);
4282
4283                 /* Copy the data into the page, so we can start over. */
4284                 ret = trace_seq_to_buffer(&iter->seq,
4285                                           page_address(spd.pages[i]),
4286                                           iter->seq.len);
4287                 if (ret < 0) {
4288                         __free_page(spd.pages[i]);
4289                         break;
4290                 }
4291                 spd.partial[i].offset = 0;
4292                 spd.partial[i].len = iter->seq.len;
4293
4294                 trace_seq_init(&iter->seq);
4295         }
4296
4297         trace_access_unlock(iter->cpu_file);
4298         trace_event_read_unlock();
4299         mutex_unlock(&iter->mutex);
4300
4301         spd.nr_pages = i;
4302
4303         ret = splice_to_pipe(pipe, &spd);
4304 out:
4305         splice_shrink_spd(&spd);
4306         return ret;
4307
4308 out_err:
4309         mutex_unlock(&iter->mutex);
4310         goto out;
4311 }
4312
4313 static ssize_t
4314 tracing_entries_read(struct file *filp, char __user *ubuf,
4315                      size_t cnt, loff_t *ppos)
4316 {
4317         struct trace_cpu *tc = filp->private_data;
4318         struct trace_array *tr = tc->tr;
4319         char buf[64];
4320         int r = 0;
4321         ssize_t ret;
4322
4323         mutex_lock(&trace_types_lock);
4324
4325         if (tc->cpu == RING_BUFFER_ALL_CPUS) {
4326                 int cpu, buf_size_same;
4327                 unsigned long size;
4328
4329                 size = 0;
4330                 buf_size_same = 1;
4331                 /* check if all cpu sizes are same */
4332                 for_each_tracing_cpu(cpu) {
4333                         /* fill in the size from first enabled cpu */
4334                         if (size == 0)
4335                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4336                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4337                                 buf_size_same = 0;
4338                                 break;
4339                         }
4340                 }
4341
4342                 if (buf_size_same) {
4343                         if (!ring_buffer_expanded)
4344                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4345                                             size >> 10,
4346                                             trace_buf_size >> 10);
4347                         else
4348                                 r = sprintf(buf, "%lu\n", size >> 10);
4349                 } else
4350                         r = sprintf(buf, "X\n");
4351         } else
4352                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10);
4353
4354         mutex_unlock(&trace_types_lock);
4355
4356         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4357         return ret;
4358 }
4359
4360 static ssize_t
4361 tracing_entries_write(struct file *filp, const char __user *ubuf,
4362                       size_t cnt, loff_t *ppos)
4363 {
4364         struct trace_cpu *tc = filp->private_data;
4365         unsigned long val;
4366         int ret;
4367
4368         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4369         if (ret)
4370                 return ret;
4371
4372         /* must have at least 1 entry */
4373         if (!val)
4374                 return -EINVAL;
4375
4376         /* value is in KB */
4377         val <<= 10;
4378
4379         ret = tracing_resize_ring_buffer(tc->tr, val, tc->cpu);
4380         if (ret < 0)
4381                 return ret;
4382
4383         *ppos += cnt;
4384
4385         return cnt;
4386 }
4387
4388 static ssize_t
4389 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4390                                 size_t cnt, loff_t *ppos)
4391 {
4392         struct trace_array *tr = filp->private_data;
4393         char buf[64];
4394         int r, cpu;
4395         unsigned long size = 0, expanded_size = 0;
4396
4397         mutex_lock(&trace_types_lock);
4398         for_each_tracing_cpu(cpu) {
4399                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4400                 if (!ring_buffer_expanded)
4401                         expanded_size += trace_buf_size >> 10;
4402         }
4403         if (ring_buffer_expanded)
4404                 r = sprintf(buf, "%lu\n", size);
4405         else
4406                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4407         mutex_unlock(&trace_types_lock);
4408
4409         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4410 }
4411
4412 static ssize_t
4413 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4414                           size_t cnt, loff_t *ppos)
4415 {
4416         /*
4417          * There is no need to read what the user has written, this function
4418          * is just to make sure that there is no error when "echo" is used
4419          */
4420
4421         *ppos += cnt;
4422
4423         return cnt;
4424 }
4425
4426 static int
4427 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4428 {
4429         struct trace_array *tr = inode->i_private;
4430
4431         /* disable tracing ? */
4432         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4433                 tracing_off();
4434         /* resize the ring buffer to 0 */
4435         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4436
4437         trace_array_put(tr);
4438
4439         return 0;
4440 }
4441
4442 static ssize_t
4443 tracing_mark_write(struct file *filp, const char __user *ubuf,
4444                                         size_t cnt, loff_t *fpos)
4445 {
4446         unsigned long addr = (unsigned long)ubuf;
4447         struct trace_array *tr = filp->private_data;
4448         struct ring_buffer_event *event;
4449         struct ring_buffer *buffer;
4450         struct print_entry *entry;
4451         unsigned long irq_flags;
4452         struct page *pages[2];
4453         void *map_page[2];
4454         int nr_pages = 1;
4455         ssize_t written;
4456         int offset;
4457         int size;
4458         int len;
4459         int ret;
4460         int i;
4461
4462         if (tracing_disabled)
4463                 return -EINVAL;
4464
4465         if (!(trace_flags & TRACE_ITER_MARKERS))
4466                 return -EINVAL;
4467
4468         if (cnt > TRACE_BUF_SIZE)
4469                 cnt = TRACE_BUF_SIZE;
4470
4471         /*
4472          * Userspace is injecting traces into the kernel trace buffer.
4473          * We want to be as non intrusive as possible.
4474          * To do so, we do not want to allocate any special buffers
4475          * or take any locks, but instead write the userspace data
4476          * straight into the ring buffer.
4477          *
4478          * First we need to pin the userspace buffer into memory,
4479          * which, most likely it is, because it just referenced it.
4480          * But there's no guarantee that it is. By using get_user_pages_fast()
4481          * and kmap_atomic/kunmap_atomic() we can get access to the
4482          * pages directly. We then write the data directly into the
4483          * ring buffer.
4484          */
4485         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4486
4487         /* check if we cross pages */
4488         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4489                 nr_pages = 2;
4490
4491         offset = addr & (PAGE_SIZE - 1);
4492         addr &= PAGE_MASK;
4493
4494         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4495         if (ret < nr_pages) {
4496                 while (--ret >= 0)
4497                         put_page(pages[ret]);
4498                 written = -EFAULT;
4499                 goto out;
4500         }
4501
4502         for (i = 0; i < nr_pages; i++)
4503                 map_page[i] = kmap_atomic(pages[i]);
4504
4505         local_save_flags(irq_flags);
4506         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4507         buffer = tr->trace_buffer.buffer;
4508         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4509                                           irq_flags, preempt_count());
4510         if (!event) {
4511                 /* Ring buffer disabled, return as if not open for write */
4512                 written = -EBADF;
4513                 goto out_unlock;
4514         }
4515
4516         entry = ring_buffer_event_data(event);
4517         entry->ip = _THIS_IP_;
4518
4519         if (nr_pages == 2) {
4520                 len = PAGE_SIZE - offset;
4521                 memcpy(&entry->buf, map_page[0] + offset, len);
4522                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4523         } else
4524                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4525
4526         if (entry->buf[cnt - 1] != '\n') {
4527                 entry->buf[cnt] = '\n';
4528                 entry->buf[cnt + 1] = '\0';
4529         } else
4530                 entry->buf[cnt] = '\0';
4531
4532         __buffer_unlock_commit(buffer, event);
4533
4534         written = cnt;
4535
4536         *fpos += written;
4537
4538  out_unlock:
4539         for (i = 0; i < nr_pages; i++){
4540                 kunmap_atomic(map_page[i]);
4541                 put_page(pages[i]);
4542         }
4543  out:
4544         return written;
4545 }
4546
4547 static int tracing_clock_show(struct seq_file *m, void *v)
4548 {
4549         struct trace_array *tr = m->private;
4550         int i;
4551
4552         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4553                 seq_printf(m,
4554                         "%s%s%s%s", i ? " " : "",
4555                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4556                         i == tr->clock_id ? "]" : "");
4557         seq_putc(m, '\n');
4558
4559         return 0;
4560 }
4561
4562 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4563                                    size_t cnt, loff_t *fpos)
4564 {
4565         struct seq_file *m = filp->private_data;
4566         struct trace_array *tr = m->private;
4567         char buf[64];
4568         const char *clockstr;
4569         int i;
4570
4571         if (cnt >= sizeof(buf))
4572                 return -EINVAL;
4573
4574         if (copy_from_user(&buf, ubuf, cnt))
4575                 return -EFAULT;
4576
4577         buf[cnt] = 0;
4578
4579         clockstr = strstrip(buf);
4580
4581         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4582                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4583                         break;
4584         }
4585         if (i == ARRAY_SIZE(trace_clocks))
4586                 return -EINVAL;
4587
4588         mutex_lock(&trace_types_lock);
4589
4590         tr->clock_id = i;
4591
4592         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4593
4594         /*
4595          * New clock may not be consistent with the previous clock.
4596          * Reset the buffer so that it doesn't have incomparable timestamps.
4597          */
4598         tracing_reset_online_cpus(&global_trace.trace_buffer);
4599
4600 #ifdef CONFIG_TRACER_MAX_TRACE
4601         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4602                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4603         tracing_reset_online_cpus(&global_trace.max_buffer);
4604 #endif
4605
4606         mutex_unlock(&trace_types_lock);
4607
4608         *fpos += cnt;
4609
4610         return cnt;
4611 }
4612
4613 static int tracing_clock_open(struct inode *inode, struct file *file)
4614 {
4615         struct trace_array *tr = inode->i_private;
4616         int ret;
4617
4618         if (tracing_disabled)
4619                 return -ENODEV;
4620
4621         if (trace_array_get(tr))
4622                 return -ENODEV;
4623
4624         ret = single_open(file, tracing_clock_show, inode->i_private);
4625         if (ret < 0)
4626                 trace_array_put(tr);
4627
4628         return ret;
4629 }
4630
4631 struct ftrace_buffer_info {
4632         struct trace_iterator   iter;
4633         void                    *spare;
4634         unsigned int            read;
4635 };
4636
4637 #ifdef CONFIG_TRACER_SNAPSHOT
4638 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4639 {
4640         struct trace_cpu *tc = inode->i_private;
4641         struct trace_array *tr = tc->tr;
4642         struct trace_iterator *iter;
4643         struct seq_file *m;
4644         int ret = 0;
4645
4646         if (trace_array_get(tr) < 0)
4647                 return -ENODEV;
4648
4649         if (file->f_mode & FMODE_READ) {
4650                 iter = __tracing_open(tr, tc, inode, file, true);
4651                 if (IS_ERR(iter))
4652                         ret = PTR_ERR(iter);
4653         } else {
4654                 /* Writes still need the seq_file to hold the private data */
4655                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4656                 if (!m)
4657                         return -ENOMEM;
4658                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4659                 if (!iter) {
4660                         kfree(m);
4661                         return -ENOMEM;
4662                 }
4663                 iter->tr = tr;
4664                 iter->trace_buffer = &tc->tr->max_buffer;
4665                 iter->cpu_file = tc->cpu;
4666                 m->private = iter;
4667                 file->private_data = m;
4668         }
4669
4670         if (ret < 0)
4671                 trace_array_put(tr);
4672
4673         return ret;
4674 }
4675
4676 static ssize_t
4677 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4678                        loff_t *ppos)
4679 {
4680         struct seq_file *m = filp->private_data;
4681         struct trace_iterator *iter = m->private;
4682         struct trace_array *tr = iter->tr;
4683         unsigned long val;
4684         int ret;
4685
4686         ret = tracing_update_buffers();
4687         if (ret < 0)
4688                 return ret;
4689
4690         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4691         if (ret)
4692                 return ret;
4693
4694         mutex_lock(&trace_types_lock);
4695
4696         if (tr->current_trace->use_max_tr) {
4697                 ret = -EBUSY;
4698                 goto out;
4699         }
4700
4701         switch (val) {
4702         case 0:
4703                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4704                         ret = -EINVAL;
4705                         break;
4706                 }
4707                 if (tr->allocated_snapshot)
4708                         free_snapshot(tr);
4709                 break;
4710         case 1:
4711 /* Only allow per-cpu swap if the ring buffer supports it */
4712 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4713                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4714                         ret = -EINVAL;
4715                         break;
4716                 }
4717 #endif
4718                 if (!tr->allocated_snapshot) {
4719                         ret = alloc_snapshot(tr);
4720                         if (ret < 0)
4721                                 break;
4722                 }
4723                 local_irq_disable();
4724                 /* Now, we're going to swap */
4725                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4726                         update_max_tr(tr, current, smp_processor_id());
4727                 else
4728                         update_max_tr_single(tr, current, iter->cpu_file);
4729                 local_irq_enable();
4730                 break;
4731         default:
4732                 if (tr->allocated_snapshot) {
4733                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4734                                 tracing_reset_online_cpus(&tr->max_buffer);
4735                         else
4736                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4737                 }
4738                 break;
4739         }
4740
4741         if (ret >= 0) {
4742                 *ppos += cnt;
4743                 ret = cnt;
4744         }
4745 out:
4746         mutex_unlock(&trace_types_lock);
4747         return ret;
4748 }
4749
4750 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4751 {
4752         struct seq_file *m = file->private_data;
4753         int ret;
4754
4755         ret = tracing_release(inode, file);
4756
4757         if (file->f_mode & FMODE_READ)
4758                 return ret;
4759
4760         /* If write only, the seq_file is just a stub */
4761         if (m)
4762                 kfree(m->private);
4763         kfree(m);
4764
4765         return 0;
4766 }
4767
4768 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4769 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4770                                     size_t count, loff_t *ppos);
4771 static int tracing_buffers_release(struct inode *inode, struct file *file);
4772 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4773                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4774
4775 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4776 {
4777         struct ftrace_buffer_info *info;
4778         int ret;
4779
4780         ret = tracing_buffers_open(inode, filp);
4781         if (ret < 0)
4782                 return ret;
4783
4784         info = filp->private_data;
4785
4786         if (info->iter.trace->use_max_tr) {
4787                 tracing_buffers_release(inode, filp);
4788                 return -EBUSY;
4789         }
4790
4791         info->iter.snapshot = true;
4792         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4793
4794         return ret;
4795 }
4796
4797 #endif /* CONFIG_TRACER_SNAPSHOT */
4798
4799
4800 static const struct file_operations tracing_max_lat_fops = {
4801         .open           = tracing_open_generic,
4802         .read           = tracing_max_lat_read,
4803         .write          = tracing_max_lat_write,
4804         .llseek         = generic_file_llseek,
4805 };
4806
4807 static const struct file_operations set_tracer_fops = {
4808         .open           = tracing_open_generic,
4809         .read           = tracing_set_trace_read,
4810         .write          = tracing_set_trace_write,
4811         .llseek         = generic_file_llseek,
4812 };
4813
4814 static const struct file_operations tracing_pipe_fops = {
4815         .open           = tracing_open_pipe,
4816         .poll           = tracing_poll_pipe,
4817         .read           = tracing_read_pipe,
4818         .splice_read    = tracing_splice_read_pipe,
4819         .release        = tracing_release_pipe,
4820         .llseek         = no_llseek,
4821 };
4822
4823 static const struct file_operations tracing_entries_fops = {
4824         .open           = tracing_open_generic_tc,
4825         .read           = tracing_entries_read,
4826         .write          = tracing_entries_write,
4827         .llseek         = generic_file_llseek,
4828         .release        = tracing_release_generic_tc,
4829 };
4830
4831 static const struct file_operations tracing_total_entries_fops = {
4832         .open           = tracing_open_generic_tr,
4833         .read           = tracing_total_entries_read,
4834         .llseek         = generic_file_llseek,
4835         .release        = tracing_release_generic_tr,
4836 };
4837
4838 static const struct file_operations tracing_free_buffer_fops = {
4839         .open           = tracing_open_generic_tr,
4840         .write          = tracing_free_buffer_write,
4841         .release        = tracing_free_buffer_release,
4842 };
4843
4844 static const struct file_operations tracing_mark_fops = {
4845         .open           = tracing_open_generic_tr,
4846         .write          = tracing_mark_write,
4847         .llseek         = generic_file_llseek,
4848         .release        = tracing_release_generic_tr,
4849 };
4850
4851 static const struct file_operations trace_clock_fops = {
4852         .open           = tracing_clock_open,
4853         .read           = seq_read,
4854         .llseek         = seq_lseek,
4855         .release        = tracing_single_release_tr,
4856         .write          = tracing_clock_write,
4857 };
4858
4859 #ifdef CONFIG_TRACER_SNAPSHOT
4860 static const struct file_operations snapshot_fops = {
4861         .open           = tracing_snapshot_open,
4862         .read           = seq_read,
4863         .write          = tracing_snapshot_write,
4864         .llseek         = tracing_seek,
4865         .release        = tracing_snapshot_release,
4866 };
4867
4868 static const struct file_operations snapshot_raw_fops = {
4869         .open           = snapshot_raw_open,
4870         .read           = tracing_buffers_read,
4871         .release        = tracing_buffers_release,
4872         .splice_read    = tracing_buffers_splice_read,
4873         .llseek         = no_llseek,
4874 };
4875
4876 #endif /* CONFIG_TRACER_SNAPSHOT */
4877
4878 static int tracing_buffers_open(struct inode *inode, struct file *filp)
4879 {
4880         struct trace_cpu *tc = inode->i_private;
4881         struct trace_array *tr = tc->tr;
4882         struct ftrace_buffer_info *info;
4883         int ret;
4884
4885         if (tracing_disabled)
4886                 return -ENODEV;
4887
4888         if (trace_array_get(tr) < 0)
4889                 return -ENODEV;
4890
4891         info = kzalloc(sizeof(*info), GFP_KERNEL);
4892         if (!info) {
4893                 trace_array_put(tr);
4894                 return -ENOMEM;
4895         }
4896
4897         mutex_lock(&trace_types_lock);
4898
4899         tr->ref++;
4900
4901         info->iter.tr           = tr;
4902         info->iter.cpu_file     = tc->cpu;
4903         info->iter.trace        = tr->current_trace;
4904         info->iter.trace_buffer = &tr->trace_buffer;
4905         info->spare             = NULL;
4906         /* Force reading ring buffer for first read */
4907         info->read              = (unsigned int)-1;
4908
4909         filp->private_data = info;
4910
4911         mutex_unlock(&trace_types_lock);
4912
4913         ret = nonseekable_open(inode, filp);
4914         if (ret < 0)
4915                 trace_array_put(tr);
4916
4917         return ret;
4918 }
4919
4920 static unsigned int
4921 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
4922 {
4923         struct ftrace_buffer_info *info = filp->private_data;
4924         struct trace_iterator *iter = &info->iter;
4925
4926         return trace_poll(iter, filp, poll_table);
4927 }
4928
4929 static ssize_t
4930 tracing_buffers_read(struct file *filp, char __user *ubuf,
4931                      size_t count, loff_t *ppos)
4932 {
4933         struct ftrace_buffer_info *info = filp->private_data;
4934         struct trace_iterator *iter = &info->iter;
4935         ssize_t ret;
4936         ssize_t size;
4937
4938         if (!count)
4939                 return 0;
4940
4941         mutex_lock(&trace_types_lock);
4942
4943 #ifdef CONFIG_TRACER_MAX_TRACE
4944         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
4945                 size = -EBUSY;
4946                 goto out_unlock;
4947         }
4948 #endif
4949
4950         if (!info->spare)
4951                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
4952                                                           iter->cpu_file);
4953         size = -ENOMEM;
4954         if (!info->spare)
4955                 goto out_unlock;
4956
4957         /* Do we have previous read data to read? */
4958         if (info->read < PAGE_SIZE)
4959                 goto read;
4960
4961  again:
4962         trace_access_lock(iter->cpu_file);
4963         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
4964                                     &info->spare,
4965                                     count,
4966                                     iter->cpu_file, 0);
4967         trace_access_unlock(iter->cpu_file);
4968
4969         if (ret < 0) {
4970                 if (trace_empty(iter)) {
4971                         if ((filp->f_flags & O_NONBLOCK)) {
4972                                 size = -EAGAIN;
4973                                 goto out_unlock;
4974                         }
4975                         mutex_unlock(&trace_types_lock);
4976                         iter->trace->wait_pipe(iter);
4977                         mutex_lock(&trace_types_lock);
4978                         if (signal_pending(current)) {
4979                                 size = -EINTR;
4980                                 goto out_unlock;
4981                         }
4982                         goto again;
4983                 }
4984                 size = 0;
4985                 goto out_unlock;
4986         }
4987
4988         info->read = 0;
4989  read:
4990         size = PAGE_SIZE - info->read;
4991         if (size > count)
4992                 size = count;
4993
4994         ret = copy_to_user(ubuf, info->spare + info->read, size);
4995         if (ret == size) {
4996                 size = -EFAULT;
4997                 goto out_unlock;
4998         }
4999         size -= ret;
5000
5001         *ppos += size;
5002         info->read += size;
5003
5004  out_unlock:
5005         mutex_unlock(&trace_types_lock);
5006
5007         return size;
5008 }
5009
5010 static int tracing_buffers_release(struct inode *inode, struct file *file)
5011 {
5012         struct ftrace_buffer_info *info = file->private_data;
5013         struct trace_iterator *iter = &info->iter;
5014
5015         mutex_lock(&trace_types_lock);
5016
5017         __trace_array_put(iter->tr);
5018
5019         if (info->spare)
5020                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5021         kfree(info);
5022
5023         mutex_unlock(&trace_types_lock);
5024
5025         return 0;
5026 }
5027
5028 struct buffer_ref {
5029         struct ring_buffer      *buffer;
5030         void                    *page;
5031         int                     ref;
5032 };
5033
5034 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5035                                     struct pipe_buffer *buf)
5036 {
5037         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5038
5039         if (--ref->ref)
5040                 return;
5041
5042         ring_buffer_free_read_page(ref->buffer, ref->page);
5043         kfree(ref);
5044         buf->private = 0;
5045 }
5046
5047 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5048                                 struct pipe_buffer *buf)
5049 {
5050         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5051
5052         ref->ref++;
5053 }
5054
5055 /* Pipe buffer operations for a buffer. */
5056 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5057         .can_merge              = 0,
5058         .map                    = generic_pipe_buf_map,
5059         .unmap                  = generic_pipe_buf_unmap,
5060         .confirm                = generic_pipe_buf_confirm,
5061         .release                = buffer_pipe_buf_release,
5062         .steal                  = generic_pipe_buf_steal,
5063         .get                    = buffer_pipe_buf_get,
5064 };
5065
5066 /*
5067  * Callback from splice_to_pipe(), if we need to release some pages
5068  * at the end of the spd in case we error'ed out in filling the pipe.
5069  */
5070 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5071 {
5072         struct buffer_ref *ref =
5073                 (struct buffer_ref *)spd->partial[i].private;
5074
5075         if (--ref->ref)
5076                 return;
5077
5078         ring_buffer_free_read_page(ref->buffer, ref->page);
5079         kfree(ref);
5080         spd->partial[i].private = 0;
5081 }
5082
5083 static ssize_t
5084 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5085                             struct pipe_inode_info *pipe, size_t len,
5086                             unsigned int flags)
5087 {
5088         struct ftrace_buffer_info *info = file->private_data;
5089         struct trace_iterator *iter = &info->iter;
5090         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5091         struct page *pages_def[PIPE_DEF_BUFFERS];
5092         struct splice_pipe_desc spd = {
5093                 .pages          = pages_def,
5094                 .partial        = partial_def,
5095                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5096                 .flags          = flags,
5097                 .ops            = &buffer_pipe_buf_ops,
5098                 .spd_release    = buffer_spd_release,
5099         };
5100         struct buffer_ref *ref;
5101         int entries, size, i;
5102         ssize_t ret;
5103
5104         mutex_lock(&trace_types_lock);
5105
5106 #ifdef CONFIG_TRACER_MAX_TRACE
5107         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5108                 ret = -EBUSY;
5109                 goto out;
5110         }
5111 #endif
5112
5113         if (splice_grow_spd(pipe, &spd)) {
5114                 ret = -ENOMEM;
5115                 goto out;
5116         }
5117
5118         if (*ppos & (PAGE_SIZE - 1)) {
5119                 ret = -EINVAL;
5120                 goto out;
5121         }
5122
5123         if (len & (PAGE_SIZE - 1)) {
5124                 if (len < PAGE_SIZE) {
5125                         ret = -EINVAL;
5126                         goto out;
5127                 }
5128                 len &= PAGE_MASK;
5129         }
5130
5131  again:
5132         trace_access_lock(iter->cpu_file);
5133         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5134
5135         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5136                 struct page *page;
5137                 int r;
5138
5139                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5140                 if (!ref)
5141                         break;
5142
5143                 ref->ref = 1;
5144                 ref->buffer = iter->trace_buffer->buffer;
5145                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5146                 if (!ref->page) {
5147                         kfree(ref);
5148                         break;
5149                 }
5150
5151                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5152                                           len, iter->cpu_file, 1);
5153                 if (r < 0) {
5154                         ring_buffer_free_read_page(ref->buffer, ref->page);
5155                         kfree(ref);
5156                         break;
5157                 }
5158
5159                 /*
5160                  * zero out any left over data, this is going to
5161                  * user land.
5162                  */
5163                 size = ring_buffer_page_len(ref->page);
5164                 if (size < PAGE_SIZE)
5165                         memset(ref->page + size, 0, PAGE_SIZE - size);
5166
5167                 page = virt_to_page(ref->page);
5168
5169                 spd.pages[i] = page;
5170                 spd.partial[i].len = PAGE_SIZE;
5171                 spd.partial[i].offset = 0;
5172                 spd.partial[i].private = (unsigned long)ref;
5173                 spd.nr_pages++;
5174                 *ppos += PAGE_SIZE;
5175
5176                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5177         }
5178
5179         trace_access_unlock(iter->cpu_file);
5180         spd.nr_pages = i;
5181
5182         /* did we read anything? */
5183         if (!spd.nr_pages) {
5184                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5185                         ret = -EAGAIN;
5186                         goto out;
5187                 }
5188                 mutex_unlock(&trace_types_lock);
5189                 iter->trace->wait_pipe(iter);
5190                 mutex_lock(&trace_types_lock);
5191                 if (signal_pending(current)) {
5192                         ret = -EINTR;
5193                         goto out;
5194                 }
5195                 goto again;
5196         }
5197
5198         ret = splice_to_pipe(pipe, &spd);
5199         splice_shrink_spd(&spd);
5200 out:
5201         mutex_unlock(&trace_types_lock);
5202
5203         return ret;
5204 }
5205
5206 static const struct file_operations tracing_buffers_fops = {
5207         .open           = tracing_buffers_open,
5208         .read           = tracing_buffers_read,
5209         .poll           = tracing_buffers_poll,
5210         .release        = tracing_buffers_release,
5211         .splice_read    = tracing_buffers_splice_read,
5212         .llseek         = no_llseek,
5213 };
5214
5215 static ssize_t
5216 tracing_stats_read(struct file *filp, char __user *ubuf,
5217                    size_t count, loff_t *ppos)
5218 {
5219         struct trace_cpu *tc = filp->private_data;
5220         struct trace_array *tr = tc->tr;
5221         struct trace_buffer *trace_buf = &tr->trace_buffer;
5222         struct trace_seq *s;
5223         unsigned long cnt;
5224         unsigned long long t;
5225         unsigned long usec_rem;
5226         int cpu = tc->cpu;
5227
5228         s = kmalloc(sizeof(*s), GFP_KERNEL);
5229         if (!s)
5230                 return -ENOMEM;
5231
5232         trace_seq_init(s);
5233
5234         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5235         trace_seq_printf(s, "entries: %ld\n", cnt);
5236
5237         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5238         trace_seq_printf(s, "overrun: %ld\n", cnt);
5239
5240         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5241         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5242
5243         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5244         trace_seq_printf(s, "bytes: %ld\n", cnt);
5245
5246         if (trace_clocks[tr->clock_id].in_ns) {
5247                 /* local or global for trace_clock */
5248                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5249                 usec_rem = do_div(t, USEC_PER_SEC);
5250                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5251                                                                 t, usec_rem);
5252
5253                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5254                 usec_rem = do_div(t, USEC_PER_SEC);
5255                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5256         } else {
5257                 /* counter or tsc mode for trace_clock */
5258                 trace_seq_printf(s, "oldest event ts: %llu\n",
5259                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5260
5261                 trace_seq_printf(s, "now ts: %llu\n",
5262                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5263         }
5264
5265         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5266         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5267
5268         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5269         trace_seq_printf(s, "read events: %ld\n", cnt);
5270
5271         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5272
5273         kfree(s);
5274
5275         return count;
5276 }
5277
5278 static const struct file_operations tracing_stats_fops = {
5279         .open           = tracing_open_generic,
5280         .read           = tracing_stats_read,
5281         .llseek         = generic_file_llseek,
5282 };
5283
5284 #ifdef CONFIG_DYNAMIC_FTRACE
5285
5286 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5287 {
5288         return 0;
5289 }
5290
5291 static ssize_t
5292 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5293                   size_t cnt, loff_t *ppos)
5294 {
5295         static char ftrace_dyn_info_buffer[1024];
5296         static DEFINE_MUTEX(dyn_info_mutex);
5297         unsigned long *p = filp->private_data;
5298         char *buf = ftrace_dyn_info_buffer;
5299         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5300         int r;
5301
5302         mutex_lock(&dyn_info_mutex);
5303         r = sprintf(buf, "%ld ", *p);
5304
5305         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5306         buf[r++] = '\n';
5307
5308         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5309
5310         mutex_unlock(&dyn_info_mutex);
5311
5312         return r;
5313 }
5314
5315 static const struct file_operations tracing_dyn_info_fops = {
5316         .open           = tracing_open_generic,
5317         .read           = tracing_read_dyn_info,
5318         .llseek         = generic_file_llseek,
5319 };
5320 #endif /* CONFIG_DYNAMIC_FTRACE */
5321
5322 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5323 static void
5324 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5325 {
5326         tracing_snapshot();
5327 }
5328
5329 static void
5330 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5331 {
5332         unsigned long *count = (long *)data;
5333
5334         if (!*count)
5335                 return;
5336
5337         if (*count != -1)
5338                 (*count)--;
5339
5340         tracing_snapshot();
5341 }
5342
5343 static int
5344 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5345                       struct ftrace_probe_ops *ops, void *data)
5346 {
5347         long count = (long)data;
5348
5349         seq_printf(m, "%ps:", (void *)ip);
5350
5351         seq_printf(m, "snapshot");
5352
5353         if (count == -1)
5354                 seq_printf(m, ":unlimited\n");
5355         else
5356                 seq_printf(m, ":count=%ld\n", count);
5357
5358         return 0;
5359 }
5360
5361 static struct ftrace_probe_ops snapshot_probe_ops = {
5362         .func                   = ftrace_snapshot,
5363         .print                  = ftrace_snapshot_print,
5364 };
5365
5366 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5367         .func                   = ftrace_count_snapshot,
5368         .print                  = ftrace_snapshot_print,
5369 };
5370
5371 static int
5372 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5373                                char *glob, char *cmd, char *param, int enable)
5374 {
5375         struct ftrace_probe_ops *ops;
5376         void *count = (void *)-1;
5377         char *number;
5378         int ret;
5379
5380         /* hash funcs only work with set_ftrace_filter */
5381         if (!enable)
5382                 return -EINVAL;
5383
5384         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5385
5386         if (glob[0] == '!') {
5387                 unregister_ftrace_function_probe_func(glob+1, ops);
5388                 return 0;
5389         }
5390
5391         if (!param)
5392                 goto out_reg;
5393
5394         number = strsep(&param, ":");
5395
5396         if (!strlen(number))
5397                 goto out_reg;
5398
5399         /*
5400          * We use the callback data field (which is a pointer)
5401          * as our counter.
5402          */
5403         ret = kstrtoul(number, 0, (unsigned long *)&count);
5404         if (ret)
5405                 return ret;
5406
5407  out_reg:
5408         ret = register_ftrace_function_probe(glob, ops, count);
5409
5410         if (ret >= 0)
5411                 alloc_snapshot(&global_trace);
5412
5413         return ret < 0 ? ret : 0;
5414 }
5415
5416 static struct ftrace_func_command ftrace_snapshot_cmd = {
5417         .name                   = "snapshot",
5418         .func                   = ftrace_trace_snapshot_callback,
5419 };
5420
5421 static int register_snapshot_cmd(void)
5422 {
5423         return register_ftrace_command(&ftrace_snapshot_cmd);
5424 }
5425 #else
5426 static inline int register_snapshot_cmd(void) { return 0; }
5427 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5428
5429 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5430 {
5431         if (tr->dir)
5432                 return tr->dir;
5433
5434         if (!debugfs_initialized())
5435                 return NULL;
5436
5437         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5438                 tr->dir = debugfs_create_dir("tracing", NULL);
5439
5440         if (!tr->dir)
5441                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5442
5443         return tr->dir;
5444 }
5445
5446 struct dentry *tracing_init_dentry(void)
5447 {
5448         return tracing_init_dentry_tr(&global_trace);
5449 }
5450
5451 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5452 {
5453         struct dentry *d_tracer;
5454
5455         if (tr->percpu_dir)
5456                 return tr->percpu_dir;
5457
5458         d_tracer = tracing_init_dentry_tr(tr);
5459         if (!d_tracer)
5460                 return NULL;
5461
5462         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5463
5464         WARN_ONCE(!tr->percpu_dir,
5465                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5466
5467         return tr->percpu_dir;
5468 }
5469
5470 static void
5471 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5472 {
5473         struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu);
5474         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5475         struct dentry *d_cpu;
5476         char cpu_dir[30]; /* 30 characters should be more than enough */
5477
5478         if (!d_percpu)
5479                 return;
5480
5481         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5482         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5483         if (!d_cpu) {
5484                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5485                 return;
5486         }
5487
5488         /* per cpu trace_pipe */
5489         trace_create_file("trace_pipe", 0444, d_cpu,
5490                         (void *)&data->trace_cpu, &tracing_pipe_fops);
5491
5492         /* per cpu trace */
5493         trace_create_file("trace", 0644, d_cpu,
5494                         (void *)&data->trace_cpu, &tracing_fops);
5495
5496         trace_create_file("trace_pipe_raw", 0444, d_cpu,
5497                         (void *)&data->trace_cpu, &tracing_buffers_fops);
5498
5499         trace_create_file("stats", 0444, d_cpu,
5500                         (void *)&data->trace_cpu, &tracing_stats_fops);
5501
5502         trace_create_file("buffer_size_kb", 0444, d_cpu,
5503                         (void *)&data->trace_cpu, &tracing_entries_fops);
5504
5505 #ifdef CONFIG_TRACER_SNAPSHOT
5506         trace_create_file("snapshot", 0644, d_cpu,
5507                           (void *)&data->trace_cpu, &snapshot_fops);
5508
5509         trace_create_file("snapshot_raw", 0444, d_cpu,
5510                         (void *)&data->trace_cpu, &snapshot_raw_fops);
5511 #endif
5512 }
5513
5514 #ifdef CONFIG_FTRACE_SELFTEST
5515 /* Let selftest have access to static functions in this file */
5516 #include "trace_selftest.c"
5517 #endif
5518
5519 struct trace_option_dentry {
5520         struct tracer_opt               *opt;
5521         struct tracer_flags             *flags;
5522         struct trace_array              *tr;
5523         struct dentry                   *entry;
5524 };
5525
5526 static ssize_t
5527 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5528                         loff_t *ppos)
5529 {
5530         struct trace_option_dentry *topt = filp->private_data;
5531         char *buf;
5532
5533         if (topt->flags->val & topt->opt->bit)
5534                 buf = "1\n";
5535         else
5536                 buf = "0\n";
5537
5538         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5539 }
5540
5541 static ssize_t
5542 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5543                          loff_t *ppos)
5544 {
5545         struct trace_option_dentry *topt = filp->private_data;
5546         unsigned long val;
5547         int ret;
5548
5549         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5550         if (ret)
5551                 return ret;
5552
5553         if (val != 0 && val != 1)
5554                 return -EINVAL;
5555
5556         if (!!(topt->flags->val & topt->opt->bit) != val) {
5557                 mutex_lock(&trace_types_lock);
5558                 ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5559                                           topt->opt, !val);
5560                 mutex_unlock(&trace_types_lock);
5561                 if (ret)
5562                         return ret;
5563         }
5564
5565         *ppos += cnt;
5566
5567         return cnt;
5568 }
5569
5570
5571 static const struct file_operations trace_options_fops = {
5572         .open = tracing_open_generic,
5573         .read = trace_options_read,
5574         .write = trace_options_write,
5575         .llseek = generic_file_llseek,
5576 };
5577
5578 static ssize_t
5579 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5580                         loff_t *ppos)
5581 {
5582         long index = (long)filp->private_data;
5583         char *buf;
5584
5585         if (trace_flags & (1 << index))
5586                 buf = "1\n";
5587         else
5588                 buf = "0\n";
5589
5590         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5591 }
5592
5593 static ssize_t
5594 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5595                          loff_t *ppos)
5596 {
5597         struct trace_array *tr = &global_trace;
5598         long index = (long)filp->private_data;
5599         unsigned long val;
5600         int ret;
5601
5602         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5603         if (ret)
5604                 return ret;
5605
5606         if (val != 0 && val != 1)
5607                 return -EINVAL;
5608
5609         mutex_lock(&trace_types_lock);
5610         ret = set_tracer_flag(tr, 1 << index, val);
5611         mutex_unlock(&trace_types_lock);
5612
5613         if (ret < 0)
5614                 return ret;
5615
5616         *ppos += cnt;
5617
5618         return cnt;
5619 }
5620
5621 static const struct file_operations trace_options_core_fops = {
5622         .open = tracing_open_generic,
5623         .read = trace_options_core_read,
5624         .write = trace_options_core_write,
5625         .llseek = generic_file_llseek,
5626 };
5627
5628 struct dentry *trace_create_file(const char *name,
5629                                  umode_t mode,
5630                                  struct dentry *parent,
5631                                  void *data,
5632                                  const struct file_operations *fops)
5633 {
5634         struct dentry *ret;
5635
5636         ret = debugfs_create_file(name, mode, parent, data, fops);
5637         if (!ret)
5638                 pr_warning("Could not create debugfs '%s' entry\n", name);
5639
5640         return ret;
5641 }
5642
5643
5644 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5645 {
5646         struct dentry *d_tracer;
5647
5648         if (tr->options)
5649                 return tr->options;
5650
5651         d_tracer = tracing_init_dentry_tr(tr);
5652         if (!d_tracer)
5653                 return NULL;
5654
5655         tr->options = debugfs_create_dir("options", d_tracer);
5656         if (!tr->options) {
5657                 pr_warning("Could not create debugfs directory 'options'\n");
5658                 return NULL;
5659         }
5660
5661         return tr->options;
5662 }
5663
5664 static void
5665 create_trace_option_file(struct trace_array *tr,
5666                          struct trace_option_dentry *topt,
5667                          struct tracer_flags *flags,
5668                          struct tracer_opt *opt)
5669 {
5670         struct dentry *t_options;
5671
5672         t_options = trace_options_init_dentry(tr);
5673         if (!t_options)
5674                 return;
5675
5676         topt->flags = flags;
5677         topt->opt = opt;
5678         topt->tr = tr;
5679
5680         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5681                                     &trace_options_fops);
5682
5683 }
5684
5685 static struct trace_option_dentry *
5686 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5687 {
5688         struct trace_option_dentry *topts;
5689         struct tracer_flags *flags;
5690         struct tracer_opt *opts;
5691         int cnt;
5692
5693         if (!tracer)
5694                 return NULL;
5695
5696         flags = tracer->flags;
5697
5698         if (!flags || !flags->opts)
5699                 return NULL;
5700
5701         opts = flags->opts;
5702
5703         for (cnt = 0; opts[cnt].name; cnt++)
5704                 ;
5705
5706         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5707         if (!topts)
5708                 return NULL;
5709
5710         for (cnt = 0; opts[cnt].name; cnt++)
5711                 create_trace_option_file(tr, &topts[cnt], flags,
5712                                          &opts[cnt]);
5713
5714         return topts;
5715 }
5716
5717 static void
5718 destroy_trace_option_files(struct trace_option_dentry *topts)
5719 {
5720         int cnt;
5721
5722         if (!topts)
5723                 return;
5724
5725         for (cnt = 0; topts[cnt].opt; cnt++) {
5726                 if (topts[cnt].entry)
5727                         debugfs_remove(topts[cnt].entry);
5728         }
5729
5730         kfree(topts);
5731 }
5732
5733 static struct dentry *
5734 create_trace_option_core_file(struct trace_array *tr,
5735                               const char *option, long index)
5736 {
5737         struct dentry *t_options;
5738
5739         t_options = trace_options_init_dentry(tr);
5740         if (!t_options)
5741                 return NULL;
5742
5743         return trace_create_file(option, 0644, t_options, (void *)index,
5744                                     &trace_options_core_fops);
5745 }
5746
5747 static __init void create_trace_options_dir(struct trace_array *tr)
5748 {
5749         struct dentry *t_options;
5750         int i;
5751
5752         t_options = trace_options_init_dentry(tr);
5753         if (!t_options)
5754                 return;
5755
5756         for (i = 0; trace_options[i]; i++)
5757                 create_trace_option_core_file(tr, trace_options[i], i);
5758 }
5759
5760 static ssize_t
5761 rb_simple_read(struct file *filp, char __user *ubuf,
5762                size_t cnt, loff_t *ppos)
5763 {
5764         struct trace_array *tr = filp->private_data;
5765         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5766         char buf[64];
5767         int r;
5768
5769         if (buffer)
5770                 r = ring_buffer_record_is_on(buffer);
5771         else
5772                 r = 0;
5773
5774         r = sprintf(buf, "%d\n", r);
5775
5776         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5777 }
5778
5779 static ssize_t
5780 rb_simple_write(struct file *filp, const char __user *ubuf,
5781                 size_t cnt, loff_t *ppos)
5782 {
5783         struct trace_array *tr = filp->private_data;
5784         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5785         unsigned long val;
5786         int ret;
5787
5788         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5789         if (ret)
5790                 return ret;
5791
5792         if (buffer) {
5793                 mutex_lock(&trace_types_lock);
5794                 if (val) {
5795                         ring_buffer_record_on(buffer);
5796                         if (tr->current_trace->start)
5797                                 tr->current_trace->start(tr);
5798                 } else {
5799                         ring_buffer_record_off(buffer);
5800                         if (tr->current_trace->stop)
5801                                 tr->current_trace->stop(tr);
5802                 }
5803                 mutex_unlock(&trace_types_lock);
5804         }
5805
5806         (*ppos)++;
5807
5808         return cnt;
5809 }
5810
5811 static const struct file_operations rb_simple_fops = {
5812         .open           = tracing_open_generic_tr,
5813         .read           = rb_simple_read,
5814         .write          = rb_simple_write,
5815         .release        = tracing_release_generic_tr,
5816         .llseek         = default_llseek,
5817 };
5818
5819 struct dentry *trace_instance_dir;
5820
5821 static void
5822 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5823
5824 static void init_trace_buffers(struct trace_array *tr, struct trace_buffer *buf)
5825 {
5826         int cpu;
5827
5828         for_each_tracing_cpu(cpu) {
5829                 memset(per_cpu_ptr(buf->data, cpu), 0, sizeof(struct trace_array_cpu));
5830                 per_cpu_ptr(buf->data, cpu)->trace_cpu.cpu = cpu;
5831                 per_cpu_ptr(buf->data, cpu)->trace_cpu.tr = tr;
5832         }
5833 }
5834
5835 static int
5836 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5837 {
5838         enum ring_buffer_flags rb_flags;
5839
5840         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5841
5842         buf->buffer = ring_buffer_alloc(size, rb_flags);
5843         if (!buf->buffer)
5844                 return -ENOMEM;
5845
5846         buf->data = alloc_percpu(struct trace_array_cpu);
5847         if (!buf->data) {
5848                 ring_buffer_free(buf->buffer);
5849                 return -ENOMEM;
5850         }
5851
5852         init_trace_buffers(tr, buf);
5853
5854         /* Allocate the first page for all buffers */
5855         set_buffer_entries(&tr->trace_buffer,
5856                            ring_buffer_size(tr->trace_buffer.buffer, 0));
5857
5858         return 0;
5859 }
5860
5861 static int allocate_trace_buffers(struct trace_array *tr, int size)
5862 {
5863         int ret;
5864
5865         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
5866         if (ret)
5867                 return ret;
5868
5869 #ifdef CONFIG_TRACER_MAX_TRACE
5870         ret = allocate_trace_buffer(tr, &tr->max_buffer,
5871                                     allocate_snapshot ? size : 1);
5872         if (WARN_ON(ret)) {
5873                 ring_buffer_free(tr->trace_buffer.buffer);
5874                 free_percpu(tr->trace_buffer.data);
5875                 return -ENOMEM;
5876         }
5877         tr->allocated_snapshot = allocate_snapshot;
5878
5879         /*
5880          * Only the top level trace array gets its snapshot allocated
5881          * from the kernel command line.
5882          */
5883         allocate_snapshot = false;
5884 #endif
5885         return 0;
5886 }
5887
5888 static int new_instance_create(const char *name)
5889 {
5890         struct trace_array *tr;
5891         int ret;
5892
5893         mutex_lock(&trace_types_lock);
5894
5895         ret = -EEXIST;
5896         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
5897                 if (tr->name && strcmp(tr->name, name) == 0)
5898                         goto out_unlock;
5899         }
5900
5901         ret = -ENOMEM;
5902         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
5903         if (!tr)
5904                 goto out_unlock;
5905
5906         tr->name = kstrdup(name, GFP_KERNEL);
5907         if (!tr->name)
5908                 goto out_free_tr;
5909
5910         raw_spin_lock_init(&tr->start_lock);
5911
5912         tr->current_trace = &nop_trace;
5913
5914         INIT_LIST_HEAD(&tr->systems);
5915         INIT_LIST_HEAD(&tr->events);
5916
5917         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
5918                 goto out_free_tr;
5919
5920         /* Holder for file callbacks */
5921         tr->trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
5922         tr->trace_cpu.tr = tr;
5923
5924         tr->dir = debugfs_create_dir(name, trace_instance_dir);
5925         if (!tr->dir)
5926                 goto out_free_tr;
5927
5928         ret = event_trace_add_tracer(tr->dir, tr);
5929         if (ret)
5930                 goto out_free_tr;
5931
5932         init_tracer_debugfs(tr, tr->dir);
5933
5934         list_add(&tr->list, &ftrace_trace_arrays);
5935
5936         mutex_unlock(&trace_types_lock);
5937
5938         return 0;
5939
5940  out_free_tr:
5941         if (tr->trace_buffer.buffer)
5942                 ring_buffer_free(tr->trace_buffer.buffer);
5943         kfree(tr->name);
5944         kfree(tr);
5945
5946  out_unlock:
5947         mutex_unlock(&trace_types_lock);
5948
5949         return ret;
5950
5951 }
5952
5953 static int instance_delete(const char *name)
5954 {
5955         struct trace_array *tr;
5956         int found = 0;
5957         int ret;
5958
5959         mutex_lock(&trace_types_lock);
5960
5961         ret = -ENODEV;
5962         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
5963                 if (tr->name && strcmp(tr->name, name) == 0) {
5964                         found = 1;
5965                         break;
5966                 }
5967         }
5968         if (!found)
5969                 goto out_unlock;
5970
5971         ret = -EBUSY;
5972         if (tr->ref)
5973                 goto out_unlock;
5974
5975         list_del(&tr->list);
5976
5977         event_trace_del_tracer(tr);
5978         debugfs_remove_recursive(tr->dir);
5979         free_percpu(tr->trace_buffer.data);
5980         ring_buffer_free(tr->trace_buffer.buffer);
5981
5982         kfree(tr->name);
5983         kfree(tr);
5984
5985         ret = 0;
5986
5987  out_unlock:
5988         mutex_unlock(&trace_types_lock);
5989
5990         return ret;
5991 }
5992
5993 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
5994 {
5995         struct dentry *parent;
5996         int ret;
5997
5998         /* Paranoid: Make sure the parent is the "instances" directory */
5999         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6000         if (WARN_ON_ONCE(parent != trace_instance_dir))
6001                 return -ENOENT;
6002
6003         /*
6004          * The inode mutex is locked, but debugfs_create_dir() will also
6005          * take the mutex. As the instances directory can not be destroyed
6006          * or changed in any other way, it is safe to unlock it, and
6007          * let the dentry try. If two users try to make the same dir at
6008          * the same time, then the new_instance_create() will determine the
6009          * winner.
6010          */
6011         mutex_unlock(&inode->i_mutex);
6012
6013         ret = new_instance_create(dentry->d_iname);
6014
6015         mutex_lock(&inode->i_mutex);
6016
6017         return ret;
6018 }
6019
6020 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6021 {
6022         struct dentry *parent;
6023         int ret;
6024
6025         /* Paranoid: Make sure the parent is the "instances" directory */
6026         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6027         if (WARN_ON_ONCE(parent != trace_instance_dir))
6028                 return -ENOENT;
6029
6030         /* The caller did a dget() on dentry */
6031         mutex_unlock(&dentry->d_inode->i_mutex);
6032
6033         /*
6034          * The inode mutex is locked, but debugfs_create_dir() will also
6035          * take the mutex. As the instances directory can not be destroyed
6036          * or changed in any other way, it is safe to unlock it, and
6037          * let the dentry try. If two users try to make the same dir at
6038          * the same time, then the instance_delete() will determine the
6039          * winner.
6040          */
6041         mutex_unlock(&inode->i_mutex);
6042
6043         ret = instance_delete(dentry->d_iname);
6044
6045         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6046         mutex_lock(&dentry->d_inode->i_mutex);
6047
6048         return ret;
6049 }
6050
6051 static const struct inode_operations instance_dir_inode_operations = {
6052         .lookup         = simple_lookup,
6053         .mkdir          = instance_mkdir,
6054         .rmdir          = instance_rmdir,
6055 };
6056
6057 static __init void create_trace_instances(struct dentry *d_tracer)
6058 {
6059         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6060         if (WARN_ON(!trace_instance_dir))
6061                 return;
6062
6063         /* Hijack the dir inode operations, to allow mkdir */
6064         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6065 }
6066
6067 static void
6068 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6069 {
6070         int cpu;
6071
6072         trace_create_file("trace_options", 0644, d_tracer,
6073                           tr, &tracing_iter_fops);
6074
6075         trace_create_file("trace", 0644, d_tracer,
6076                         (void *)&tr->trace_cpu, &tracing_fops);
6077
6078         trace_create_file("trace_pipe", 0444, d_tracer,
6079                         (void *)&tr->trace_cpu, &tracing_pipe_fops);
6080
6081         trace_create_file("buffer_size_kb", 0644, d_tracer,
6082                         (void *)&tr->trace_cpu, &tracing_entries_fops);
6083
6084         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6085                           tr, &tracing_total_entries_fops);
6086
6087         trace_create_file("free_buffer", 0644, d_tracer,
6088                           tr, &tracing_free_buffer_fops);
6089
6090         trace_create_file("trace_marker", 0220, d_tracer,
6091                           tr, &tracing_mark_fops);
6092
6093         trace_create_file("trace_clock", 0644, d_tracer, tr,
6094                           &trace_clock_fops);
6095
6096         trace_create_file("tracing_on", 0644, d_tracer,
6097                             tr, &rb_simple_fops);
6098
6099 #ifdef CONFIG_TRACER_SNAPSHOT
6100         trace_create_file("snapshot", 0644, d_tracer,
6101                           (void *)&tr->trace_cpu, &snapshot_fops);
6102 #endif
6103
6104         for_each_tracing_cpu(cpu)
6105                 tracing_init_debugfs_percpu(tr, cpu);
6106
6107 }
6108
6109 static __init int tracer_init_debugfs(void)
6110 {
6111         struct dentry *d_tracer;
6112
6113         trace_access_lock_init();
6114
6115         d_tracer = tracing_init_dentry();
6116         if (!d_tracer)
6117                 return 0;
6118
6119         init_tracer_debugfs(&global_trace, d_tracer);
6120
6121         trace_create_file("tracing_cpumask", 0644, d_tracer,
6122                         &global_trace, &tracing_cpumask_fops);
6123
6124         trace_create_file("available_tracers", 0444, d_tracer,
6125                         &global_trace, &show_traces_fops);
6126
6127         trace_create_file("current_tracer", 0644, d_tracer,
6128                         &global_trace, &set_tracer_fops);
6129
6130 #ifdef CONFIG_TRACER_MAX_TRACE
6131         trace_create_file("tracing_max_latency", 0644, d_tracer,
6132                         &tracing_max_latency, &tracing_max_lat_fops);
6133 #endif
6134
6135         trace_create_file("tracing_thresh", 0644, d_tracer,
6136                         &tracing_thresh, &tracing_max_lat_fops);
6137
6138         trace_create_file("README", 0444, d_tracer,
6139                         NULL, &tracing_readme_fops);
6140
6141         trace_create_file("saved_cmdlines", 0444, d_tracer,
6142                         NULL, &tracing_saved_cmdlines_fops);
6143
6144 #ifdef CONFIG_DYNAMIC_FTRACE
6145         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6146                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6147 #endif
6148
6149         create_trace_instances(d_tracer);
6150
6151         create_trace_options_dir(&global_trace);
6152
6153         return 0;
6154 }
6155
6156 static int trace_panic_handler(struct notifier_block *this,
6157                                unsigned long event, void *unused)
6158 {
6159         if (ftrace_dump_on_oops)
6160                 ftrace_dump(ftrace_dump_on_oops);
6161         return NOTIFY_OK;
6162 }
6163
6164 static struct notifier_block trace_panic_notifier = {
6165         .notifier_call  = trace_panic_handler,
6166         .next           = NULL,
6167         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6168 };
6169
6170 static int trace_die_handler(struct notifier_block *self,
6171                              unsigned long val,
6172                              void *data)
6173 {
6174         switch (val) {
6175         case DIE_OOPS:
6176                 if (ftrace_dump_on_oops)
6177                         ftrace_dump(ftrace_dump_on_oops);
6178                 break;
6179         default:
6180                 break;
6181         }
6182         return NOTIFY_OK;
6183 }
6184
6185 static struct notifier_block trace_die_notifier = {
6186         .notifier_call = trace_die_handler,
6187         .priority = 200
6188 };
6189
6190 /*
6191  * printk is set to max of 1024, we really don't need it that big.
6192  * Nothing should be printing 1000 characters anyway.
6193  */
6194 #define TRACE_MAX_PRINT         1000
6195
6196 /*
6197  * Define here KERN_TRACE so that we have one place to modify
6198  * it if we decide to change what log level the ftrace dump
6199  * should be at.
6200  */
6201 #define KERN_TRACE              KERN_EMERG
6202
6203 void
6204 trace_printk_seq(struct trace_seq *s)
6205 {
6206         /* Probably should print a warning here. */
6207         if (s->len >= TRACE_MAX_PRINT)
6208                 s->len = TRACE_MAX_PRINT;
6209
6210         /* should be zero ended, but we are paranoid. */
6211         s->buffer[s->len] = 0;
6212
6213         printk(KERN_TRACE "%s", s->buffer);
6214
6215         trace_seq_init(s);
6216 }
6217
6218 void trace_init_global_iter(struct trace_iterator *iter)
6219 {
6220         iter->tr = &global_trace;
6221         iter->trace = iter->tr->current_trace;
6222         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6223         iter->trace_buffer = &global_trace.trace_buffer;
6224 }
6225
6226 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6227 {
6228         /* use static because iter can be a bit big for the stack */
6229         static struct trace_iterator iter;
6230         static atomic_t dump_running;
6231         unsigned int old_userobj;
6232         unsigned long flags;
6233         int cnt = 0, cpu;
6234
6235         /* Only allow one dump user at a time. */
6236         if (atomic_inc_return(&dump_running) != 1) {
6237                 atomic_dec(&dump_running);
6238                 return;
6239         }
6240
6241         /*
6242          * Always turn off tracing when we dump.
6243          * We don't need to show trace output of what happens
6244          * between multiple crashes.
6245          *
6246          * If the user does a sysrq-z, then they can re-enable
6247          * tracing with echo 1 > tracing_on.
6248          */
6249         tracing_off();
6250
6251         local_irq_save(flags);
6252
6253         /* Simulate the iterator */
6254         trace_init_global_iter(&iter);
6255
6256         for_each_tracing_cpu(cpu) {
6257                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6258         }
6259
6260         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6261
6262         /* don't look at user memory in panic mode */
6263         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6264
6265         switch (oops_dump_mode) {
6266         case DUMP_ALL:
6267                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6268                 break;
6269         case DUMP_ORIG:
6270                 iter.cpu_file = raw_smp_processor_id();
6271                 break;
6272         case DUMP_NONE:
6273                 goto out_enable;
6274         default:
6275                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6276                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6277         }
6278
6279         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6280
6281         /* Did function tracer already get disabled? */
6282         if (ftrace_is_dead()) {
6283                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6284                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6285         }
6286
6287         /*
6288          * We need to stop all tracing on all CPUS to read the
6289          * the next buffer. This is a bit expensive, but is
6290          * not done often. We fill all what we can read,
6291          * and then release the locks again.
6292          */
6293
6294         while (!trace_empty(&iter)) {
6295
6296                 if (!cnt)
6297                         printk(KERN_TRACE "---------------------------------\n");
6298
6299                 cnt++;
6300
6301                 /* reset all but tr, trace, and overruns */
6302                 memset(&iter.seq, 0,
6303                        sizeof(struct trace_iterator) -
6304                        offsetof(struct trace_iterator, seq));
6305                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6306                 iter.pos = -1;
6307
6308                 if (trace_find_next_entry_inc(&iter) != NULL) {
6309                         int ret;
6310
6311                         ret = print_trace_line(&iter);
6312                         if (ret != TRACE_TYPE_NO_CONSUME)
6313                                 trace_consume(&iter);
6314                 }
6315                 touch_nmi_watchdog();
6316
6317                 trace_printk_seq(&iter.seq);
6318         }
6319
6320         if (!cnt)
6321                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6322         else
6323                 printk(KERN_TRACE "---------------------------------\n");
6324
6325  out_enable:
6326         trace_flags |= old_userobj;
6327
6328         for_each_tracing_cpu(cpu) {
6329                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6330         }
6331         atomic_dec(&dump_running);
6332         local_irq_restore(flags);
6333 }
6334 EXPORT_SYMBOL_GPL(ftrace_dump);
6335
6336 __init static int tracer_alloc_buffers(void)
6337 {
6338         int ring_buf_size;
6339         int ret = -ENOMEM;
6340
6341
6342         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6343                 goto out;
6344
6345         if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
6346                 goto out_free_buffer_mask;
6347
6348         /* Only allocate trace_printk buffers if a trace_printk exists */
6349         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6350                 /* Must be called before global_trace.buffer is allocated */
6351                 trace_printk_init_buffers();
6352
6353         /* To save memory, keep the ring buffer size to its minimum */
6354         if (ring_buffer_expanded)
6355                 ring_buf_size = trace_buf_size;
6356         else
6357                 ring_buf_size = 1;
6358
6359         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6360         cpumask_copy(tracing_cpumask, cpu_all_mask);
6361
6362         raw_spin_lock_init(&global_trace.start_lock);
6363
6364         /* TODO: make the number of buffers hot pluggable with CPUS */
6365         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6366                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6367                 WARN_ON(1);
6368                 goto out_free_cpumask;
6369         }
6370
6371         if (global_trace.buffer_disabled)
6372                 tracing_off();
6373
6374         trace_init_cmdlines();
6375
6376         /*
6377          * register_tracer() might reference current_trace, so it
6378          * needs to be set before we register anything. This is
6379          * just a bootstrap of current_trace anyway.
6380          */
6381         global_trace.current_trace = &nop_trace;
6382
6383         register_tracer(&nop_trace);
6384
6385         /* All seems OK, enable tracing */
6386         tracing_disabled = 0;
6387
6388         atomic_notifier_chain_register(&panic_notifier_list,
6389                                        &trace_panic_notifier);
6390
6391         register_die_notifier(&trace_die_notifier);
6392
6393         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6394
6395         /* Holder for file callbacks */
6396         global_trace.trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
6397         global_trace.trace_cpu.tr = &global_trace;
6398
6399         INIT_LIST_HEAD(&global_trace.systems);
6400         INIT_LIST_HEAD(&global_trace.events);
6401         list_add(&global_trace.list, &ftrace_trace_arrays);
6402
6403         while (trace_boot_options) {
6404                 char *option;
6405
6406                 option = strsep(&trace_boot_options, ",");
6407                 trace_set_options(&global_trace, option);
6408         }
6409
6410         register_snapshot_cmd();
6411
6412         return 0;
6413
6414 out_free_cpumask:
6415         free_percpu(global_trace.trace_buffer.data);
6416 #ifdef CONFIG_TRACER_MAX_TRACE
6417         free_percpu(global_trace.max_buffer.data);
6418 #endif
6419         free_cpumask_var(tracing_cpumask);
6420 out_free_buffer_mask:
6421         free_cpumask_var(tracing_buffer_mask);
6422 out:
6423         return ret;
6424 }
6425
6426 __init static int clear_boot_tracer(void)
6427 {
6428         /*
6429          * The default tracer at boot buffer is an init section.
6430          * This function is called in lateinit. If we did not
6431          * find the boot tracer, then clear it out, to prevent
6432          * later registration from accessing the buffer that is
6433          * about to be freed.
6434          */
6435         if (!default_bootup_tracer)
6436                 return 0;
6437
6438         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6439                default_bootup_tracer);
6440         default_bootup_tracer = NULL;
6441
6442         return 0;
6443 }
6444
6445 early_initcall(tracer_alloc_buffers);
6446 fs_initcall(tracer_init_debugfs);
6447 late_initcall(clear_boot_tracer);