tracing: Add internal tracing_snapshot() functions
[firefly-linux-kernel-4.4.55.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 int ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88 /*
89  * Kill all tracing for good (never come back).
90  * It is initialized to 1 but will turn to zero if the initialization
91  * of the tracer is successful. But that is the only place that sets
92  * this back to zero.
93  */
94 static int tracing_disabled = 1;
95
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 static int tracing_set_tracer(const char *buf);
119
120 #define MAX_TRACER_SIZE         100
121 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
122 static char *default_bootup_tracer;
123
124 static int __init set_cmdline_ftrace(char *str)
125 {
126         strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
127         default_bootup_tracer = bootup_tracer_buf;
128         /* We are using ftrace early, expand it */
129         ring_buffer_expanded = 1;
130         return 1;
131 }
132 __setup("ftrace=", set_cmdline_ftrace);
133
134 static int __init set_ftrace_dump_on_oops(char *str)
135 {
136         if (*str++ != '=' || !*str) {
137                 ftrace_dump_on_oops = DUMP_ALL;
138                 return 1;
139         }
140
141         if (!strcmp("orig_cpu", str)) {
142                 ftrace_dump_on_oops = DUMP_ORIG;
143                 return 1;
144         }
145
146         return 0;
147 }
148 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
149
150
151 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
152 static char *trace_boot_options __initdata;
153
154 static int __init set_trace_boot_options(char *str)
155 {
156         strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
157         trace_boot_options = trace_boot_options_buf;
158         return 0;
159 }
160 __setup("trace_options=", set_trace_boot_options);
161
162 unsigned long long ns2usecs(cycle_t nsec)
163 {
164         nsec += 500;
165         do_div(nsec, 1000);
166         return nsec;
167 }
168
169 /*
170  * The global_trace is the descriptor that holds the tracing
171  * buffers for the live tracing. For each CPU, it contains
172  * a link list of pages that will store trace entries. The
173  * page descriptor of the pages in the memory is used to hold
174  * the link list by linking the lru item in the page descriptor
175  * to each of the pages in the buffer per CPU.
176  *
177  * For each active CPU there is a data field that holds the
178  * pages for the buffer for that CPU. Each CPU has the same number
179  * of pages allocated for its buffer.
180  */
181 static struct trace_array       global_trace;
182
183 LIST_HEAD(ftrace_trace_arrays);
184
185 int filter_current_check_discard(struct ring_buffer *buffer,
186                                  struct ftrace_event_call *call, void *rec,
187                                  struct ring_buffer_event *event)
188 {
189         return filter_check_discard(call, rec, buffer, event);
190 }
191 EXPORT_SYMBOL_GPL(filter_current_check_discard);
192
193 cycle_t ftrace_now(int cpu)
194 {
195         u64 ts;
196
197         /* Early boot up does not have a buffer yet */
198         if (!global_trace.trace_buffer.buffer)
199                 return trace_clock_local();
200
201         ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu);
202         ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts);
203
204         return ts;
205 }
206
207 int tracing_is_enabled(void)
208 {
209         return tracing_is_on();
210 }
211
212 /*
213  * trace_buf_size is the size in bytes that is allocated
214  * for a buffer. Note, the number of bytes is always rounded
215  * to page size.
216  *
217  * This number is purposely set to a low number of 16384.
218  * If the dump on oops happens, it will be much appreciated
219  * to not have to wait for all that output. Anyway this can be
220  * boot time and run time configurable.
221  */
222 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
223
224 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
225
226 /* trace_types holds a link list of available tracers. */
227 static struct tracer            *trace_types __read_mostly;
228
229 /*
230  * trace_types_lock is used to protect the trace_types list.
231  */
232 static DEFINE_MUTEX(trace_types_lock);
233
234 /*
235  * serialize the access of the ring buffer
236  *
237  * ring buffer serializes readers, but it is low level protection.
238  * The validity of the events (which returns by ring_buffer_peek() ..etc)
239  * are not protected by ring buffer.
240  *
241  * The content of events may become garbage if we allow other process consumes
242  * these events concurrently:
243  *   A) the page of the consumed events may become a normal page
244  *      (not reader page) in ring buffer, and this page will be rewrited
245  *      by events producer.
246  *   B) The page of the consumed events may become a page for splice_read,
247  *      and this page will be returned to system.
248  *
249  * These primitives allow multi process access to different cpu ring buffer
250  * concurrently.
251  *
252  * These primitives don't distinguish read-only and read-consume access.
253  * Multi read-only access are also serialized.
254  */
255
256 #ifdef CONFIG_SMP
257 static DECLARE_RWSEM(all_cpu_access_lock);
258 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
259
260 static inline void trace_access_lock(int cpu)
261 {
262         if (cpu == RING_BUFFER_ALL_CPUS) {
263                 /* gain it for accessing the whole ring buffer. */
264                 down_write(&all_cpu_access_lock);
265         } else {
266                 /* gain it for accessing a cpu ring buffer. */
267
268                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
269                 down_read(&all_cpu_access_lock);
270
271                 /* Secondly block other access to this @cpu ring buffer. */
272                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
273         }
274 }
275
276 static inline void trace_access_unlock(int cpu)
277 {
278         if (cpu == RING_BUFFER_ALL_CPUS) {
279                 up_write(&all_cpu_access_lock);
280         } else {
281                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
282                 up_read(&all_cpu_access_lock);
283         }
284 }
285
286 static inline void trace_access_lock_init(void)
287 {
288         int cpu;
289
290         for_each_possible_cpu(cpu)
291                 mutex_init(&per_cpu(cpu_access_lock, cpu));
292 }
293
294 #else
295
296 static DEFINE_MUTEX(access_lock);
297
298 static inline void trace_access_lock(int cpu)
299 {
300         (void)cpu;
301         mutex_lock(&access_lock);
302 }
303
304 static inline void trace_access_unlock(int cpu)
305 {
306         (void)cpu;
307         mutex_unlock(&access_lock);
308 }
309
310 static inline void trace_access_lock_init(void)
311 {
312 }
313
314 #endif
315
316 /* trace_flags holds trace_options default values */
317 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
318         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
319         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
320         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS;
321
322 /**
323  * tracing_on - enable tracing buffers
324  *
325  * This function enables tracing buffers that may have been
326  * disabled with tracing_off.
327  */
328 void tracing_on(void)
329 {
330         if (global_trace.trace_buffer.buffer)
331                 ring_buffer_record_on(global_trace.trace_buffer.buffer);
332         /*
333          * This flag is only looked at when buffers haven't been
334          * allocated yet. We don't really care about the race
335          * between setting this flag and actually turning
336          * on the buffer.
337          */
338         global_trace.buffer_disabled = 0;
339 }
340 EXPORT_SYMBOL_GPL(tracing_on);
341
342 #ifdef CONFIG_TRACER_SNAPSHOT
343 /**
344  * trace_snapshot - take a snapshot of the current buffer.
345  *
346  * This causes a swap between the snapshot buffer and the current live
347  * tracing buffer. You can use this to take snapshots of the live
348  * trace when some condition is triggered, but continue to trace.
349  *
350  * Note, make sure to allocate the snapshot with either
351  * a tracing_snapshot_alloc(), or by doing it manually
352  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
353  *
354  * If the snapshot buffer is not allocated, it will stop tracing.
355  * Basically making a permanent snapshot.
356  */
357 void tracing_snapshot(void)
358 {
359         struct trace_array *tr = &global_trace;
360         struct tracer *tracer = tr->current_trace;
361         unsigned long flags;
362
363         if (!tr->allocated_snapshot) {
364                 trace_printk("*** SNAPSHOT NOT ALLOCATED ***\n");
365                 trace_printk("*** stopping trace here!   ***\n");
366                 tracing_off();
367                 return;
368         }
369
370         /* Note, snapshot can not be used when the tracer uses it */
371         if (tracer->use_max_tr) {
372                 trace_printk("*** LATENCY TRACER ACTIVE ***\n");
373                 trace_printk("*** Can not use snapshot (sorry) ***\n");
374                 return;
375         }
376
377         local_irq_save(flags);
378         update_max_tr(tr, current, smp_processor_id());
379         local_irq_restore(flags);
380 }
381
382 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
383                                         struct trace_buffer *size_buf, int cpu_id);
384
385 /**
386  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
387  *
388  * This is similar to trace_snapshot(), but it will allocate the
389  * snapshot buffer if it isn't already allocated. Use this only
390  * where it is safe to sleep, as the allocation may sleep.
391  *
392  * This causes a swap between the snapshot buffer and the current live
393  * tracing buffer. You can use this to take snapshots of the live
394  * trace when some condition is triggered, but continue to trace.
395  */
396 void tracing_snapshot_alloc(void)
397 {
398         struct trace_array *tr = &global_trace;
399         int ret;
400
401         if (!tr->allocated_snapshot) {
402
403                 /* allocate spare buffer */
404                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
405                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
406                 if (WARN_ON(ret < 0))
407                         return;
408
409                 tr->allocated_snapshot = true;
410         }
411
412         tracing_snapshot();
413 }
414 #else
415 void tracing_snapshot(void)
416 {
417         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
418 }
419 void tracing_snapshot_alloc(void)
420 {
421         /* Give warning */
422         tracing_snapshot();
423 }
424 #endif /* CONFIG_TRACER_SNAPSHOT */
425
426 /**
427  * tracing_off - turn off tracing buffers
428  *
429  * This function stops the tracing buffers from recording data.
430  * It does not disable any overhead the tracers themselves may
431  * be causing. This function simply causes all recording to
432  * the ring buffers to fail.
433  */
434 void tracing_off(void)
435 {
436         if (global_trace.trace_buffer.buffer)
437                 ring_buffer_record_off(global_trace.trace_buffer.buffer);
438         /*
439          * This flag is only looked at when buffers haven't been
440          * allocated yet. We don't really care about the race
441          * between setting this flag and actually turning
442          * on the buffer.
443          */
444         global_trace.buffer_disabled = 1;
445 }
446 EXPORT_SYMBOL_GPL(tracing_off);
447
448 /**
449  * tracing_is_on - show state of ring buffers enabled
450  */
451 int tracing_is_on(void)
452 {
453         if (global_trace.trace_buffer.buffer)
454                 return ring_buffer_record_is_on(global_trace.trace_buffer.buffer);
455         return !global_trace.buffer_disabled;
456 }
457 EXPORT_SYMBOL_GPL(tracing_is_on);
458
459 static int __init set_buf_size(char *str)
460 {
461         unsigned long buf_size;
462
463         if (!str)
464                 return 0;
465         buf_size = memparse(str, &str);
466         /* nr_entries can not be zero */
467         if (buf_size == 0)
468                 return 0;
469         trace_buf_size = buf_size;
470         return 1;
471 }
472 __setup("trace_buf_size=", set_buf_size);
473
474 static int __init set_tracing_thresh(char *str)
475 {
476         unsigned long threshold;
477         int ret;
478
479         if (!str)
480                 return 0;
481         ret = kstrtoul(str, 0, &threshold);
482         if (ret < 0)
483                 return 0;
484         tracing_thresh = threshold * 1000;
485         return 1;
486 }
487 __setup("tracing_thresh=", set_tracing_thresh);
488
489 unsigned long nsecs_to_usecs(unsigned long nsecs)
490 {
491         return nsecs / 1000;
492 }
493
494 /* These must match the bit postions in trace_iterator_flags */
495 static const char *trace_options[] = {
496         "print-parent",
497         "sym-offset",
498         "sym-addr",
499         "verbose",
500         "raw",
501         "hex",
502         "bin",
503         "block",
504         "stacktrace",
505         "trace_printk",
506         "ftrace_preempt",
507         "branch",
508         "annotate",
509         "userstacktrace",
510         "sym-userobj",
511         "printk-msg-only",
512         "context-info",
513         "latency-format",
514         "sleep-time",
515         "graph-time",
516         "record-cmd",
517         "overwrite",
518         "disable_on_free",
519         "irq-info",
520         "markers",
521         NULL
522 };
523
524 static struct {
525         u64 (*func)(void);
526         const char *name;
527         int in_ns;              /* is this clock in nanoseconds? */
528 } trace_clocks[] = {
529         { trace_clock_local,    "local",        1 },
530         { trace_clock_global,   "global",       1 },
531         { trace_clock_counter,  "counter",      0 },
532         ARCH_TRACE_CLOCKS
533 };
534
535 int trace_clock_id;
536
537 /*
538  * trace_parser_get_init - gets the buffer for trace parser
539  */
540 int trace_parser_get_init(struct trace_parser *parser, int size)
541 {
542         memset(parser, 0, sizeof(*parser));
543
544         parser->buffer = kmalloc(size, GFP_KERNEL);
545         if (!parser->buffer)
546                 return 1;
547
548         parser->size = size;
549         return 0;
550 }
551
552 /*
553  * trace_parser_put - frees the buffer for trace parser
554  */
555 void trace_parser_put(struct trace_parser *parser)
556 {
557         kfree(parser->buffer);
558 }
559
560 /*
561  * trace_get_user - reads the user input string separated by  space
562  * (matched by isspace(ch))
563  *
564  * For each string found the 'struct trace_parser' is updated,
565  * and the function returns.
566  *
567  * Returns number of bytes read.
568  *
569  * See kernel/trace/trace.h for 'struct trace_parser' details.
570  */
571 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
572         size_t cnt, loff_t *ppos)
573 {
574         char ch;
575         size_t read = 0;
576         ssize_t ret;
577
578         if (!*ppos)
579                 trace_parser_clear(parser);
580
581         ret = get_user(ch, ubuf++);
582         if (ret)
583                 goto out;
584
585         read++;
586         cnt--;
587
588         /*
589          * The parser is not finished with the last write,
590          * continue reading the user input without skipping spaces.
591          */
592         if (!parser->cont) {
593                 /* skip white space */
594                 while (cnt && isspace(ch)) {
595                         ret = get_user(ch, ubuf++);
596                         if (ret)
597                                 goto out;
598                         read++;
599                         cnt--;
600                 }
601
602                 /* only spaces were written */
603                 if (isspace(ch)) {
604                         *ppos += read;
605                         ret = read;
606                         goto out;
607                 }
608
609                 parser->idx = 0;
610         }
611
612         /* read the non-space input */
613         while (cnt && !isspace(ch)) {
614                 if (parser->idx < parser->size - 1)
615                         parser->buffer[parser->idx++] = ch;
616                 else {
617                         ret = -EINVAL;
618                         goto out;
619                 }
620                 ret = get_user(ch, ubuf++);
621                 if (ret)
622                         goto out;
623                 read++;
624                 cnt--;
625         }
626
627         /* We either got finished input or we have to wait for another call. */
628         if (isspace(ch)) {
629                 parser->buffer[parser->idx] = 0;
630                 parser->cont = false;
631         } else {
632                 parser->cont = true;
633                 parser->buffer[parser->idx++] = ch;
634         }
635
636         *ppos += read;
637         ret = read;
638
639 out:
640         return ret;
641 }
642
643 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
644 {
645         int len;
646         int ret;
647
648         if (!cnt)
649                 return 0;
650
651         if (s->len <= s->readpos)
652                 return -EBUSY;
653
654         len = s->len - s->readpos;
655         if (cnt > len)
656                 cnt = len;
657         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
658         if (ret == cnt)
659                 return -EFAULT;
660
661         cnt -= ret;
662
663         s->readpos += cnt;
664         return cnt;
665 }
666
667 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
668 {
669         int len;
670
671         if (s->len <= s->readpos)
672                 return -EBUSY;
673
674         len = s->len - s->readpos;
675         if (cnt > len)
676                 cnt = len;
677         memcpy(buf, s->buffer + s->readpos, cnt);
678
679         s->readpos += cnt;
680         return cnt;
681 }
682
683 /*
684  * ftrace_max_lock is used to protect the swapping of buffers
685  * when taking a max snapshot. The buffers themselves are
686  * protected by per_cpu spinlocks. But the action of the swap
687  * needs its own lock.
688  *
689  * This is defined as a arch_spinlock_t in order to help
690  * with performance when lockdep debugging is enabled.
691  *
692  * It is also used in other places outside the update_max_tr
693  * so it needs to be defined outside of the
694  * CONFIG_TRACER_MAX_TRACE.
695  */
696 static arch_spinlock_t ftrace_max_lock =
697         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
698
699 unsigned long __read_mostly     tracing_thresh;
700
701 #ifdef CONFIG_TRACER_MAX_TRACE
702 unsigned long __read_mostly     tracing_max_latency;
703
704 /*
705  * Copy the new maximum trace into the separate maximum-trace
706  * structure. (this way the maximum trace is permanently saved,
707  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
708  */
709 static void
710 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
711 {
712         struct trace_buffer *trace_buf = &tr->trace_buffer;
713         struct trace_buffer *max_buf = &tr->max_buffer;
714         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
715         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
716
717         max_buf->cpu = cpu;
718         max_buf->time_start = data->preempt_timestamp;
719
720         max_data->saved_latency = tracing_max_latency;
721         max_data->critical_start = data->critical_start;
722         max_data->critical_end = data->critical_end;
723
724         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
725         max_data->pid = tsk->pid;
726         max_data->uid = task_uid(tsk);
727         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
728         max_data->policy = tsk->policy;
729         max_data->rt_priority = tsk->rt_priority;
730
731         /* record this tasks comm */
732         tracing_record_cmdline(tsk);
733 }
734
735 /**
736  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
737  * @tr: tracer
738  * @tsk: the task with the latency
739  * @cpu: The cpu that initiated the trace.
740  *
741  * Flip the buffers between the @tr and the max_tr and record information
742  * about which task was the cause of this latency.
743  */
744 void
745 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
746 {
747         struct ring_buffer *buf;
748
749         if (tr->stop_count)
750                 return;
751
752         WARN_ON_ONCE(!irqs_disabled());
753
754         if (!tr->allocated_snapshot) {
755                 /* Only the nop tracer should hit this when disabling */
756                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
757                 return;
758         }
759
760         arch_spin_lock(&ftrace_max_lock);
761
762         buf = tr->trace_buffer.buffer;
763         tr->trace_buffer.buffer = tr->max_buffer.buffer;
764         tr->max_buffer.buffer = buf;
765
766         __update_max_tr(tr, tsk, cpu);
767         arch_spin_unlock(&ftrace_max_lock);
768 }
769
770 /**
771  * update_max_tr_single - only copy one trace over, and reset the rest
772  * @tr - tracer
773  * @tsk - task with the latency
774  * @cpu - the cpu of the buffer to copy.
775  *
776  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
777  */
778 void
779 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
780 {
781         int ret;
782
783         if (tr->stop_count)
784                 return;
785
786         WARN_ON_ONCE(!irqs_disabled());
787         if (WARN_ON_ONCE(!tr->allocated_snapshot))
788                 return;
789
790         arch_spin_lock(&ftrace_max_lock);
791
792         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
793
794         if (ret == -EBUSY) {
795                 /*
796                  * We failed to swap the buffer due to a commit taking
797                  * place on this CPU. We fail to record, but we reset
798                  * the max trace buffer (no one writes directly to it)
799                  * and flag that it failed.
800                  */
801                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
802                         "Failed to swap buffers due to commit in progress\n");
803         }
804
805         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
806
807         __update_max_tr(tr, tsk, cpu);
808         arch_spin_unlock(&ftrace_max_lock);
809 }
810 #endif /* CONFIG_TRACER_MAX_TRACE */
811
812 static void default_wait_pipe(struct trace_iterator *iter)
813 {
814         /* Iterators are static, they should be filled or empty */
815         if (trace_buffer_iter(iter, iter->cpu_file))
816                 return;
817
818         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
819 }
820
821 /**
822  * register_tracer - register a tracer with the ftrace system.
823  * @type - the plugin for the tracer
824  *
825  * Register a new plugin tracer.
826  */
827 int register_tracer(struct tracer *type)
828 {
829         struct tracer *t;
830         int ret = 0;
831
832         if (!type->name) {
833                 pr_info("Tracer must have a name\n");
834                 return -1;
835         }
836
837         if (strlen(type->name) >= MAX_TRACER_SIZE) {
838                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
839                 return -1;
840         }
841
842         mutex_lock(&trace_types_lock);
843
844         tracing_selftest_running = true;
845
846         for (t = trace_types; t; t = t->next) {
847                 if (strcmp(type->name, t->name) == 0) {
848                         /* already found */
849                         pr_info("Tracer %s already registered\n",
850                                 type->name);
851                         ret = -1;
852                         goto out;
853                 }
854         }
855
856         if (!type->set_flag)
857                 type->set_flag = &dummy_set_flag;
858         if (!type->flags)
859                 type->flags = &dummy_tracer_flags;
860         else
861                 if (!type->flags->opts)
862                         type->flags->opts = dummy_tracer_opt;
863         if (!type->wait_pipe)
864                 type->wait_pipe = default_wait_pipe;
865
866
867 #ifdef CONFIG_FTRACE_STARTUP_TEST
868         if (type->selftest && !tracing_selftest_disabled) {
869                 struct trace_array *tr = &global_trace;
870                 struct tracer *saved_tracer = tr->current_trace;
871
872                 /*
873                  * Run a selftest on this tracer.
874                  * Here we reset the trace buffer, and set the current
875                  * tracer to be this tracer. The tracer can then run some
876                  * internal tracing to verify that everything is in order.
877                  * If we fail, we do not register this tracer.
878                  */
879                 tracing_reset_online_cpus(&tr->trace_buffer);
880
881                 tr->current_trace = type;
882
883 #ifdef CONFIG_TRACER_MAX_TRACE
884                 if (type->use_max_tr) {
885                         /* If we expanded the buffers, make sure the max is expanded too */
886                         if (ring_buffer_expanded)
887                                 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
888                                                    RING_BUFFER_ALL_CPUS);
889                         tr->allocated_snapshot = true;
890                 }
891 #endif
892
893                 /* the test is responsible for initializing and enabling */
894                 pr_info("Testing tracer %s: ", type->name);
895                 ret = type->selftest(type, tr);
896                 /* the test is responsible for resetting too */
897                 tr->current_trace = saved_tracer;
898                 if (ret) {
899                         printk(KERN_CONT "FAILED!\n");
900                         /* Add the warning after printing 'FAILED' */
901                         WARN_ON(1);
902                         goto out;
903                 }
904                 /* Only reset on passing, to avoid touching corrupted buffers */
905                 tracing_reset_online_cpus(&tr->trace_buffer);
906
907 #ifdef CONFIG_TRACER_MAX_TRACE
908                 if (type->use_max_tr) {
909                         tr->allocated_snapshot = false;
910
911                         /* Shrink the max buffer again */
912                         if (ring_buffer_expanded)
913                                 ring_buffer_resize(tr->max_buffer.buffer, 1,
914                                                    RING_BUFFER_ALL_CPUS);
915                 }
916 #endif
917
918                 printk(KERN_CONT "PASSED\n");
919         }
920 #endif
921
922         type->next = trace_types;
923         trace_types = type;
924
925  out:
926         tracing_selftest_running = false;
927         mutex_unlock(&trace_types_lock);
928
929         if (ret || !default_bootup_tracer)
930                 goto out_unlock;
931
932         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
933                 goto out_unlock;
934
935         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
936         /* Do we want this tracer to start on bootup? */
937         tracing_set_tracer(type->name);
938         default_bootup_tracer = NULL;
939         /* disable other selftests, since this will break it. */
940         tracing_selftest_disabled = 1;
941 #ifdef CONFIG_FTRACE_STARTUP_TEST
942         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
943                type->name);
944 #endif
945
946  out_unlock:
947         return ret;
948 }
949
950 void tracing_reset(struct trace_buffer *buf, int cpu)
951 {
952         struct ring_buffer *buffer = buf->buffer;
953
954         if (!buffer)
955                 return;
956
957         ring_buffer_record_disable(buffer);
958
959         /* Make sure all commits have finished */
960         synchronize_sched();
961         ring_buffer_reset_cpu(buffer, cpu);
962
963         ring_buffer_record_enable(buffer);
964 }
965
966 void tracing_reset_online_cpus(struct trace_buffer *buf)
967 {
968         struct ring_buffer *buffer = buf->buffer;
969         int cpu;
970
971         if (!buffer)
972                 return;
973
974         ring_buffer_record_disable(buffer);
975
976         /* Make sure all commits have finished */
977         synchronize_sched();
978
979         buf->time_start = ftrace_now(buf->cpu);
980
981         for_each_online_cpu(cpu)
982                 ring_buffer_reset_cpu(buffer, cpu);
983
984         ring_buffer_record_enable(buffer);
985 }
986
987 void tracing_reset_current(int cpu)
988 {
989         tracing_reset(&global_trace.trace_buffer, cpu);
990 }
991
992 void tracing_reset_all_online_cpus(void)
993 {
994         struct trace_array *tr;
995
996         mutex_lock(&trace_types_lock);
997         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
998                 tracing_reset_online_cpus(&tr->trace_buffer);
999 #ifdef CONFIG_TRACER_MAX_TRACE
1000                 tracing_reset_online_cpus(&tr->max_buffer);
1001 #endif
1002         }
1003         mutex_unlock(&trace_types_lock);
1004 }
1005
1006 #define SAVED_CMDLINES 128
1007 #define NO_CMDLINE_MAP UINT_MAX
1008 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1009 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1010 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1011 static int cmdline_idx;
1012 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1013
1014 /* temporary disable recording */
1015 static atomic_t trace_record_cmdline_disabled __read_mostly;
1016
1017 static void trace_init_cmdlines(void)
1018 {
1019         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1020         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1021         cmdline_idx = 0;
1022 }
1023
1024 int is_tracing_stopped(void)
1025 {
1026         return global_trace.stop_count;
1027 }
1028
1029 /**
1030  * ftrace_off_permanent - disable all ftrace code permanently
1031  *
1032  * This should only be called when a serious anomally has
1033  * been detected.  This will turn off the function tracing,
1034  * ring buffers, and other tracing utilites. It takes no
1035  * locks and can be called from any context.
1036  */
1037 void ftrace_off_permanent(void)
1038 {
1039         tracing_disabled = 1;
1040         ftrace_stop();
1041         tracing_off_permanent();
1042 }
1043
1044 /**
1045  * tracing_start - quick start of the tracer
1046  *
1047  * If tracing is enabled but was stopped by tracing_stop,
1048  * this will start the tracer back up.
1049  */
1050 void tracing_start(void)
1051 {
1052         struct ring_buffer *buffer;
1053         unsigned long flags;
1054
1055         if (tracing_disabled)
1056                 return;
1057
1058         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1059         if (--global_trace.stop_count) {
1060                 if (global_trace.stop_count < 0) {
1061                         /* Someone screwed up their debugging */
1062                         WARN_ON_ONCE(1);
1063                         global_trace.stop_count = 0;
1064                 }
1065                 goto out;
1066         }
1067
1068         /* Prevent the buffers from switching */
1069         arch_spin_lock(&ftrace_max_lock);
1070
1071         buffer = global_trace.trace_buffer.buffer;
1072         if (buffer)
1073                 ring_buffer_record_enable(buffer);
1074
1075 #ifdef CONFIG_TRACER_MAX_TRACE
1076         buffer = global_trace.max_buffer.buffer;
1077         if (buffer)
1078                 ring_buffer_record_enable(buffer);
1079 #endif
1080
1081         arch_spin_unlock(&ftrace_max_lock);
1082
1083         ftrace_start();
1084  out:
1085         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1086 }
1087
1088 static void tracing_start_tr(struct trace_array *tr)
1089 {
1090         struct ring_buffer *buffer;
1091         unsigned long flags;
1092
1093         if (tracing_disabled)
1094                 return;
1095
1096         /* If global, we need to also start the max tracer */
1097         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1098                 return tracing_start();
1099
1100         raw_spin_lock_irqsave(&tr->start_lock, flags);
1101
1102         if (--tr->stop_count) {
1103                 if (tr->stop_count < 0) {
1104                         /* Someone screwed up their debugging */
1105                         WARN_ON_ONCE(1);
1106                         tr->stop_count = 0;
1107                 }
1108                 goto out;
1109         }
1110
1111         buffer = tr->trace_buffer.buffer;
1112         if (buffer)
1113                 ring_buffer_record_enable(buffer);
1114
1115  out:
1116         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1117 }
1118
1119 /**
1120  * tracing_stop - quick stop of the tracer
1121  *
1122  * Light weight way to stop tracing. Use in conjunction with
1123  * tracing_start.
1124  */
1125 void tracing_stop(void)
1126 {
1127         struct ring_buffer *buffer;
1128         unsigned long flags;
1129
1130         ftrace_stop();
1131         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1132         if (global_trace.stop_count++)
1133                 goto out;
1134
1135         /* Prevent the buffers from switching */
1136         arch_spin_lock(&ftrace_max_lock);
1137
1138         buffer = global_trace.trace_buffer.buffer;
1139         if (buffer)
1140                 ring_buffer_record_disable(buffer);
1141
1142 #ifdef CONFIG_TRACER_MAX_TRACE
1143         buffer = global_trace.max_buffer.buffer;
1144         if (buffer)
1145                 ring_buffer_record_disable(buffer);
1146 #endif
1147
1148         arch_spin_unlock(&ftrace_max_lock);
1149
1150  out:
1151         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1152 }
1153
1154 static void tracing_stop_tr(struct trace_array *tr)
1155 {
1156         struct ring_buffer *buffer;
1157         unsigned long flags;
1158
1159         /* If global, we need to also stop the max tracer */
1160         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1161                 return tracing_stop();
1162
1163         raw_spin_lock_irqsave(&tr->start_lock, flags);
1164         if (tr->stop_count++)
1165                 goto out;
1166
1167         buffer = tr->trace_buffer.buffer;
1168         if (buffer)
1169                 ring_buffer_record_disable(buffer);
1170
1171  out:
1172         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1173 }
1174
1175 void trace_stop_cmdline_recording(void);
1176
1177 static void trace_save_cmdline(struct task_struct *tsk)
1178 {
1179         unsigned pid, idx;
1180
1181         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1182                 return;
1183
1184         /*
1185          * It's not the end of the world if we don't get
1186          * the lock, but we also don't want to spin
1187          * nor do we want to disable interrupts,
1188          * so if we miss here, then better luck next time.
1189          */
1190         if (!arch_spin_trylock(&trace_cmdline_lock))
1191                 return;
1192
1193         idx = map_pid_to_cmdline[tsk->pid];
1194         if (idx == NO_CMDLINE_MAP) {
1195                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1196
1197                 /*
1198                  * Check whether the cmdline buffer at idx has a pid
1199                  * mapped. We are going to overwrite that entry so we
1200                  * need to clear the map_pid_to_cmdline. Otherwise we
1201                  * would read the new comm for the old pid.
1202                  */
1203                 pid = map_cmdline_to_pid[idx];
1204                 if (pid != NO_CMDLINE_MAP)
1205                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1206
1207                 map_cmdline_to_pid[idx] = tsk->pid;
1208                 map_pid_to_cmdline[tsk->pid] = idx;
1209
1210                 cmdline_idx = idx;
1211         }
1212
1213         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1214
1215         arch_spin_unlock(&trace_cmdline_lock);
1216 }
1217
1218 void trace_find_cmdline(int pid, char comm[])
1219 {
1220         unsigned map;
1221
1222         if (!pid) {
1223                 strcpy(comm, "<idle>");
1224                 return;
1225         }
1226
1227         if (WARN_ON_ONCE(pid < 0)) {
1228                 strcpy(comm, "<XXX>");
1229                 return;
1230         }
1231
1232         if (pid > PID_MAX_DEFAULT) {
1233                 strcpy(comm, "<...>");
1234                 return;
1235         }
1236
1237         preempt_disable();
1238         arch_spin_lock(&trace_cmdline_lock);
1239         map = map_pid_to_cmdline[pid];
1240         if (map != NO_CMDLINE_MAP)
1241                 strcpy(comm, saved_cmdlines[map]);
1242         else
1243                 strcpy(comm, "<...>");
1244
1245         arch_spin_unlock(&trace_cmdline_lock);
1246         preempt_enable();
1247 }
1248
1249 void tracing_record_cmdline(struct task_struct *tsk)
1250 {
1251         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1252                 return;
1253
1254         if (!__this_cpu_read(trace_cmdline_save))
1255                 return;
1256
1257         __this_cpu_write(trace_cmdline_save, false);
1258
1259         trace_save_cmdline(tsk);
1260 }
1261
1262 void
1263 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1264                              int pc)
1265 {
1266         struct task_struct *tsk = current;
1267
1268         entry->preempt_count            = pc & 0xff;
1269         entry->pid                      = (tsk) ? tsk->pid : 0;
1270         entry->flags =
1271 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1272                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1273 #else
1274                 TRACE_FLAG_IRQS_NOSUPPORT |
1275 #endif
1276                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1277                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1278                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
1279 }
1280 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1281
1282 struct ring_buffer_event *
1283 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1284                           int type,
1285                           unsigned long len,
1286                           unsigned long flags, int pc)
1287 {
1288         struct ring_buffer_event *event;
1289
1290         event = ring_buffer_lock_reserve(buffer, len);
1291         if (event != NULL) {
1292                 struct trace_entry *ent = ring_buffer_event_data(event);
1293
1294                 tracing_generic_entry_update(ent, flags, pc);
1295                 ent->type = type;
1296         }
1297
1298         return event;
1299 }
1300
1301 void
1302 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1303 {
1304         __this_cpu_write(trace_cmdline_save, true);
1305         ring_buffer_unlock_commit(buffer, event);
1306 }
1307
1308 static inline void
1309 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1310                              struct ring_buffer_event *event,
1311                              unsigned long flags, int pc)
1312 {
1313         __buffer_unlock_commit(buffer, event);
1314
1315         ftrace_trace_stack(buffer, flags, 6, pc);
1316         ftrace_trace_userstack(buffer, flags, pc);
1317 }
1318
1319 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1320                                 struct ring_buffer_event *event,
1321                                 unsigned long flags, int pc)
1322 {
1323         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1324 }
1325 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1326
1327 struct ring_buffer_event *
1328 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1329                           struct ftrace_event_file *ftrace_file,
1330                           int type, unsigned long len,
1331                           unsigned long flags, int pc)
1332 {
1333         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1334         return trace_buffer_lock_reserve(*current_rb,
1335                                          type, len, flags, pc);
1336 }
1337 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1338
1339 struct ring_buffer_event *
1340 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1341                                   int type, unsigned long len,
1342                                   unsigned long flags, int pc)
1343 {
1344         *current_rb = global_trace.trace_buffer.buffer;
1345         return trace_buffer_lock_reserve(*current_rb,
1346                                          type, len, flags, pc);
1347 }
1348 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1349
1350 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1351                                         struct ring_buffer_event *event,
1352                                         unsigned long flags, int pc)
1353 {
1354         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1355 }
1356 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1357
1358 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1359                                      struct ring_buffer_event *event,
1360                                      unsigned long flags, int pc,
1361                                      struct pt_regs *regs)
1362 {
1363         __buffer_unlock_commit(buffer, event);
1364
1365         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1366         ftrace_trace_userstack(buffer, flags, pc);
1367 }
1368 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1369
1370 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1371                                          struct ring_buffer_event *event)
1372 {
1373         ring_buffer_discard_commit(buffer, event);
1374 }
1375 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1376
1377 void
1378 trace_function(struct trace_array *tr,
1379                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1380                int pc)
1381 {
1382         struct ftrace_event_call *call = &event_function;
1383         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1384         struct ring_buffer_event *event;
1385         struct ftrace_entry *entry;
1386
1387         /* If we are reading the ring buffer, don't trace */
1388         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1389                 return;
1390
1391         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1392                                           flags, pc);
1393         if (!event)
1394                 return;
1395         entry   = ring_buffer_event_data(event);
1396         entry->ip                       = ip;
1397         entry->parent_ip                = parent_ip;
1398
1399         if (!filter_check_discard(call, entry, buffer, event))
1400                 __buffer_unlock_commit(buffer, event);
1401 }
1402
1403 void
1404 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1405        unsigned long ip, unsigned long parent_ip, unsigned long flags,
1406        int pc)
1407 {
1408         if (likely(!atomic_read(&data->disabled)))
1409                 trace_function(tr, ip, parent_ip, flags, pc);
1410 }
1411
1412 #ifdef CONFIG_STACKTRACE
1413
1414 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1415 struct ftrace_stack {
1416         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1417 };
1418
1419 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1420 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1421
1422 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1423                                  unsigned long flags,
1424                                  int skip, int pc, struct pt_regs *regs)
1425 {
1426         struct ftrace_event_call *call = &event_kernel_stack;
1427         struct ring_buffer_event *event;
1428         struct stack_entry *entry;
1429         struct stack_trace trace;
1430         int use_stack;
1431         int size = FTRACE_STACK_ENTRIES;
1432
1433         trace.nr_entries        = 0;
1434         trace.skip              = skip;
1435
1436         /*
1437          * Since events can happen in NMIs there's no safe way to
1438          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1439          * or NMI comes in, it will just have to use the default
1440          * FTRACE_STACK_SIZE.
1441          */
1442         preempt_disable_notrace();
1443
1444         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1445         /*
1446          * We don't need any atomic variables, just a barrier.
1447          * If an interrupt comes in, we don't care, because it would
1448          * have exited and put the counter back to what we want.
1449          * We just need a barrier to keep gcc from moving things
1450          * around.
1451          */
1452         barrier();
1453         if (use_stack == 1) {
1454                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1455                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1456
1457                 if (regs)
1458                         save_stack_trace_regs(regs, &trace);
1459                 else
1460                         save_stack_trace(&trace);
1461
1462                 if (trace.nr_entries > size)
1463                         size = trace.nr_entries;
1464         } else
1465                 /* From now on, use_stack is a boolean */
1466                 use_stack = 0;
1467
1468         size *= sizeof(unsigned long);
1469
1470         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1471                                           sizeof(*entry) + size, flags, pc);
1472         if (!event)
1473                 goto out;
1474         entry = ring_buffer_event_data(event);
1475
1476         memset(&entry->caller, 0, size);
1477
1478         if (use_stack)
1479                 memcpy(&entry->caller, trace.entries,
1480                        trace.nr_entries * sizeof(unsigned long));
1481         else {
1482                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1483                 trace.entries           = entry->caller;
1484                 if (regs)
1485                         save_stack_trace_regs(regs, &trace);
1486                 else
1487                         save_stack_trace(&trace);
1488         }
1489
1490         entry->size = trace.nr_entries;
1491
1492         if (!filter_check_discard(call, entry, buffer, event))
1493                 __buffer_unlock_commit(buffer, event);
1494
1495  out:
1496         /* Again, don't let gcc optimize things here */
1497         barrier();
1498         __this_cpu_dec(ftrace_stack_reserve);
1499         preempt_enable_notrace();
1500
1501 }
1502
1503 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1504                              int skip, int pc, struct pt_regs *regs)
1505 {
1506         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1507                 return;
1508
1509         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1510 }
1511
1512 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1513                         int skip, int pc)
1514 {
1515         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1516                 return;
1517
1518         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1519 }
1520
1521 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1522                    int pc)
1523 {
1524         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1525 }
1526
1527 /**
1528  * trace_dump_stack - record a stack back trace in the trace buffer
1529  */
1530 void trace_dump_stack(void)
1531 {
1532         unsigned long flags;
1533
1534         if (tracing_disabled || tracing_selftest_running)
1535                 return;
1536
1537         local_save_flags(flags);
1538
1539         /* skipping 3 traces, seems to get us at the caller of this function */
1540         __ftrace_trace_stack(global_trace.trace_buffer.buffer, flags, 3,
1541                              preempt_count(), NULL);
1542 }
1543
1544 static DEFINE_PER_CPU(int, user_stack_count);
1545
1546 void
1547 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1548 {
1549         struct ftrace_event_call *call = &event_user_stack;
1550         struct ring_buffer_event *event;
1551         struct userstack_entry *entry;
1552         struct stack_trace trace;
1553
1554         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1555                 return;
1556
1557         /*
1558          * NMIs can not handle page faults, even with fix ups.
1559          * The save user stack can (and often does) fault.
1560          */
1561         if (unlikely(in_nmi()))
1562                 return;
1563
1564         /*
1565          * prevent recursion, since the user stack tracing may
1566          * trigger other kernel events.
1567          */
1568         preempt_disable();
1569         if (__this_cpu_read(user_stack_count))
1570                 goto out;
1571
1572         __this_cpu_inc(user_stack_count);
1573
1574         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1575                                           sizeof(*entry), flags, pc);
1576         if (!event)
1577                 goto out_drop_count;
1578         entry   = ring_buffer_event_data(event);
1579
1580         entry->tgid             = current->tgid;
1581         memset(&entry->caller, 0, sizeof(entry->caller));
1582
1583         trace.nr_entries        = 0;
1584         trace.max_entries       = FTRACE_STACK_ENTRIES;
1585         trace.skip              = 0;
1586         trace.entries           = entry->caller;
1587
1588         save_stack_trace_user(&trace);
1589         if (!filter_check_discard(call, entry, buffer, event))
1590                 __buffer_unlock_commit(buffer, event);
1591
1592  out_drop_count:
1593         __this_cpu_dec(user_stack_count);
1594  out:
1595         preempt_enable();
1596 }
1597
1598 #ifdef UNUSED
1599 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1600 {
1601         ftrace_trace_userstack(tr, flags, preempt_count());
1602 }
1603 #endif /* UNUSED */
1604
1605 #endif /* CONFIG_STACKTRACE */
1606
1607 /* created for use with alloc_percpu */
1608 struct trace_buffer_struct {
1609         char buffer[TRACE_BUF_SIZE];
1610 };
1611
1612 static struct trace_buffer_struct *trace_percpu_buffer;
1613 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1614 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1615 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1616
1617 /*
1618  * The buffer used is dependent on the context. There is a per cpu
1619  * buffer for normal context, softirq contex, hard irq context and
1620  * for NMI context. Thise allows for lockless recording.
1621  *
1622  * Note, if the buffers failed to be allocated, then this returns NULL
1623  */
1624 static char *get_trace_buf(void)
1625 {
1626         struct trace_buffer_struct *percpu_buffer;
1627
1628         /*
1629          * If we have allocated per cpu buffers, then we do not
1630          * need to do any locking.
1631          */
1632         if (in_nmi())
1633                 percpu_buffer = trace_percpu_nmi_buffer;
1634         else if (in_irq())
1635                 percpu_buffer = trace_percpu_irq_buffer;
1636         else if (in_softirq())
1637                 percpu_buffer = trace_percpu_sirq_buffer;
1638         else
1639                 percpu_buffer = trace_percpu_buffer;
1640
1641         if (!percpu_buffer)
1642                 return NULL;
1643
1644         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1645 }
1646
1647 static int alloc_percpu_trace_buffer(void)
1648 {
1649         struct trace_buffer_struct *buffers;
1650         struct trace_buffer_struct *sirq_buffers;
1651         struct trace_buffer_struct *irq_buffers;
1652         struct trace_buffer_struct *nmi_buffers;
1653
1654         buffers = alloc_percpu(struct trace_buffer_struct);
1655         if (!buffers)
1656                 goto err_warn;
1657
1658         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1659         if (!sirq_buffers)
1660                 goto err_sirq;
1661
1662         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1663         if (!irq_buffers)
1664                 goto err_irq;
1665
1666         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1667         if (!nmi_buffers)
1668                 goto err_nmi;
1669
1670         trace_percpu_buffer = buffers;
1671         trace_percpu_sirq_buffer = sirq_buffers;
1672         trace_percpu_irq_buffer = irq_buffers;
1673         trace_percpu_nmi_buffer = nmi_buffers;
1674
1675         return 0;
1676
1677  err_nmi:
1678         free_percpu(irq_buffers);
1679  err_irq:
1680         free_percpu(sirq_buffers);
1681  err_sirq:
1682         free_percpu(buffers);
1683  err_warn:
1684         WARN(1, "Could not allocate percpu trace_printk buffer");
1685         return -ENOMEM;
1686 }
1687
1688 static int buffers_allocated;
1689
1690 void trace_printk_init_buffers(void)
1691 {
1692         if (buffers_allocated)
1693                 return;
1694
1695         if (alloc_percpu_trace_buffer())
1696                 return;
1697
1698         pr_info("ftrace: Allocated trace_printk buffers\n");
1699
1700         /* Expand the buffers to set size */
1701         tracing_update_buffers();
1702
1703         buffers_allocated = 1;
1704
1705         /*
1706          * trace_printk_init_buffers() can be called by modules.
1707          * If that happens, then we need to start cmdline recording
1708          * directly here. If the global_trace.buffer is already
1709          * allocated here, then this was called by module code.
1710          */
1711         if (global_trace.trace_buffer.buffer)
1712                 tracing_start_cmdline_record();
1713 }
1714
1715 void trace_printk_start_comm(void)
1716 {
1717         /* Start tracing comms if trace printk is set */
1718         if (!buffers_allocated)
1719                 return;
1720         tracing_start_cmdline_record();
1721 }
1722
1723 static void trace_printk_start_stop_comm(int enabled)
1724 {
1725         if (!buffers_allocated)
1726                 return;
1727
1728         if (enabled)
1729                 tracing_start_cmdline_record();
1730         else
1731                 tracing_stop_cmdline_record();
1732 }
1733
1734 /**
1735  * trace_vbprintk - write binary msg to tracing buffer
1736  *
1737  */
1738 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1739 {
1740         struct ftrace_event_call *call = &event_bprint;
1741         struct ring_buffer_event *event;
1742         struct ring_buffer *buffer;
1743         struct trace_array *tr = &global_trace;
1744         struct bprint_entry *entry;
1745         unsigned long flags;
1746         char *tbuffer;
1747         int len = 0, size, pc;
1748
1749         if (unlikely(tracing_selftest_running || tracing_disabled))
1750                 return 0;
1751
1752         /* Don't pollute graph traces with trace_vprintk internals */
1753         pause_graph_tracing();
1754
1755         pc = preempt_count();
1756         preempt_disable_notrace();
1757
1758         tbuffer = get_trace_buf();
1759         if (!tbuffer) {
1760                 len = 0;
1761                 goto out;
1762         }
1763
1764         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
1765
1766         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
1767                 goto out;
1768
1769         local_save_flags(flags);
1770         size = sizeof(*entry) + sizeof(u32) * len;
1771         buffer = tr->trace_buffer.buffer;
1772         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1773                                           flags, pc);
1774         if (!event)
1775                 goto out;
1776         entry = ring_buffer_event_data(event);
1777         entry->ip                       = ip;
1778         entry->fmt                      = fmt;
1779
1780         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
1781         if (!filter_check_discard(call, entry, buffer, event)) {
1782                 __buffer_unlock_commit(buffer, event);
1783                 ftrace_trace_stack(buffer, flags, 6, pc);
1784         }
1785
1786 out:
1787         preempt_enable_notrace();
1788         unpause_graph_tracing();
1789
1790         return len;
1791 }
1792 EXPORT_SYMBOL_GPL(trace_vbprintk);
1793
1794 static int
1795 __trace_array_vprintk(struct ring_buffer *buffer,
1796                       unsigned long ip, const char *fmt, va_list args)
1797 {
1798         struct ftrace_event_call *call = &event_print;
1799         struct ring_buffer_event *event;
1800         int len = 0, size, pc;
1801         struct print_entry *entry;
1802         unsigned long flags;
1803         char *tbuffer;
1804
1805         if (tracing_disabled || tracing_selftest_running)
1806                 return 0;
1807
1808         /* Don't pollute graph traces with trace_vprintk internals */
1809         pause_graph_tracing();
1810
1811         pc = preempt_count();
1812         preempt_disable_notrace();
1813
1814
1815         tbuffer = get_trace_buf();
1816         if (!tbuffer) {
1817                 len = 0;
1818                 goto out;
1819         }
1820
1821         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
1822         if (len > TRACE_BUF_SIZE)
1823                 goto out;
1824
1825         local_save_flags(flags);
1826         size = sizeof(*entry) + len + 1;
1827         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1828                                           flags, pc);
1829         if (!event)
1830                 goto out;
1831         entry = ring_buffer_event_data(event);
1832         entry->ip = ip;
1833
1834         memcpy(&entry->buf, tbuffer, len);
1835         entry->buf[len] = '\0';
1836         if (!filter_check_discard(call, entry, buffer, event)) {
1837                 __buffer_unlock_commit(buffer, event);
1838                 ftrace_trace_stack(buffer, flags, 6, pc);
1839         }
1840  out:
1841         preempt_enable_notrace();
1842         unpause_graph_tracing();
1843
1844         return len;
1845 }
1846
1847 int trace_array_vprintk(struct trace_array *tr,
1848                         unsigned long ip, const char *fmt, va_list args)
1849 {
1850         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
1851 }
1852
1853 int trace_array_printk(struct trace_array *tr,
1854                        unsigned long ip, const char *fmt, ...)
1855 {
1856         int ret;
1857         va_list ap;
1858
1859         if (!(trace_flags & TRACE_ITER_PRINTK))
1860                 return 0;
1861
1862         va_start(ap, fmt);
1863         ret = trace_array_vprintk(tr, ip, fmt, ap);
1864         va_end(ap);
1865         return ret;
1866 }
1867
1868 int trace_array_printk_buf(struct ring_buffer *buffer,
1869                            unsigned long ip, const char *fmt, ...)
1870 {
1871         int ret;
1872         va_list ap;
1873
1874         if (!(trace_flags & TRACE_ITER_PRINTK))
1875                 return 0;
1876
1877         va_start(ap, fmt);
1878         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
1879         va_end(ap);
1880         return ret;
1881 }
1882
1883 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1884 {
1885         return trace_array_vprintk(&global_trace, ip, fmt, args);
1886 }
1887 EXPORT_SYMBOL_GPL(trace_vprintk);
1888
1889 static void trace_iterator_increment(struct trace_iterator *iter)
1890 {
1891         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
1892
1893         iter->idx++;
1894         if (buf_iter)
1895                 ring_buffer_read(buf_iter, NULL);
1896 }
1897
1898 static struct trace_entry *
1899 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1900                 unsigned long *lost_events)
1901 {
1902         struct ring_buffer_event *event;
1903         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
1904
1905         if (buf_iter)
1906                 event = ring_buffer_iter_peek(buf_iter, ts);
1907         else
1908                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
1909                                          lost_events);
1910
1911         if (event) {
1912                 iter->ent_size = ring_buffer_event_length(event);
1913                 return ring_buffer_event_data(event);
1914         }
1915         iter->ent_size = 0;
1916         return NULL;
1917 }
1918
1919 static struct trace_entry *
1920 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1921                   unsigned long *missing_events, u64 *ent_ts)
1922 {
1923         struct ring_buffer *buffer = iter->trace_buffer->buffer;
1924         struct trace_entry *ent, *next = NULL;
1925         unsigned long lost_events = 0, next_lost = 0;
1926         int cpu_file = iter->cpu_file;
1927         u64 next_ts = 0, ts;
1928         int next_cpu = -1;
1929         int next_size = 0;
1930         int cpu;
1931
1932         /*
1933          * If we are in a per_cpu trace file, don't bother by iterating over
1934          * all cpu and peek directly.
1935          */
1936         if (cpu_file > RING_BUFFER_ALL_CPUS) {
1937                 if (ring_buffer_empty_cpu(buffer, cpu_file))
1938                         return NULL;
1939                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
1940                 if (ent_cpu)
1941                         *ent_cpu = cpu_file;
1942
1943                 return ent;
1944         }
1945
1946         for_each_tracing_cpu(cpu) {
1947
1948                 if (ring_buffer_empty_cpu(buffer, cpu))
1949                         continue;
1950
1951                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
1952
1953                 /*
1954                  * Pick the entry with the smallest timestamp:
1955                  */
1956                 if (ent && (!next || ts < next_ts)) {
1957                         next = ent;
1958                         next_cpu = cpu;
1959                         next_ts = ts;
1960                         next_lost = lost_events;
1961                         next_size = iter->ent_size;
1962                 }
1963         }
1964
1965         iter->ent_size = next_size;
1966
1967         if (ent_cpu)
1968                 *ent_cpu = next_cpu;
1969
1970         if (ent_ts)
1971                 *ent_ts = next_ts;
1972
1973         if (missing_events)
1974                 *missing_events = next_lost;
1975
1976         return next;
1977 }
1978
1979 /* Find the next real entry, without updating the iterator itself */
1980 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1981                                           int *ent_cpu, u64 *ent_ts)
1982 {
1983         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
1984 }
1985
1986 /* Find the next real entry, and increment the iterator to the next entry */
1987 void *trace_find_next_entry_inc(struct trace_iterator *iter)
1988 {
1989         iter->ent = __find_next_entry(iter, &iter->cpu,
1990                                       &iter->lost_events, &iter->ts);
1991
1992         if (iter->ent)
1993                 trace_iterator_increment(iter);
1994
1995         return iter->ent ? iter : NULL;
1996 }
1997
1998 static void trace_consume(struct trace_iterator *iter)
1999 {
2000         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2001                             &iter->lost_events);
2002 }
2003
2004 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2005 {
2006         struct trace_iterator *iter = m->private;
2007         int i = (int)*pos;
2008         void *ent;
2009
2010         WARN_ON_ONCE(iter->leftover);
2011
2012         (*pos)++;
2013
2014         /* can't go backwards */
2015         if (iter->idx > i)
2016                 return NULL;
2017
2018         if (iter->idx < 0)
2019                 ent = trace_find_next_entry_inc(iter);
2020         else
2021                 ent = iter;
2022
2023         while (ent && iter->idx < i)
2024                 ent = trace_find_next_entry_inc(iter);
2025
2026         iter->pos = *pos;
2027
2028         return ent;
2029 }
2030
2031 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2032 {
2033         struct ring_buffer_event *event;
2034         struct ring_buffer_iter *buf_iter;
2035         unsigned long entries = 0;
2036         u64 ts;
2037
2038         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2039
2040         buf_iter = trace_buffer_iter(iter, cpu);
2041         if (!buf_iter)
2042                 return;
2043
2044         ring_buffer_iter_reset(buf_iter);
2045
2046         /*
2047          * We could have the case with the max latency tracers
2048          * that a reset never took place on a cpu. This is evident
2049          * by the timestamp being before the start of the buffer.
2050          */
2051         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2052                 if (ts >= iter->trace_buffer->time_start)
2053                         break;
2054                 entries++;
2055                 ring_buffer_read(buf_iter, NULL);
2056         }
2057
2058         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2059 }
2060
2061 /*
2062  * The current tracer is copied to avoid a global locking
2063  * all around.
2064  */
2065 static void *s_start(struct seq_file *m, loff_t *pos)
2066 {
2067         struct trace_iterator *iter = m->private;
2068         struct trace_array *tr = iter->tr;
2069         int cpu_file = iter->cpu_file;
2070         void *p = NULL;
2071         loff_t l = 0;
2072         int cpu;
2073
2074         /*
2075          * copy the tracer to avoid using a global lock all around.
2076          * iter->trace is a copy of current_trace, the pointer to the
2077          * name may be used instead of a strcmp(), as iter->trace->name
2078          * will point to the same string as current_trace->name.
2079          */
2080         mutex_lock(&trace_types_lock);
2081         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2082                 *iter->trace = *tr->current_trace;
2083         mutex_unlock(&trace_types_lock);
2084
2085 #ifdef CONFIG_TRACER_MAX_TRACE
2086         if (iter->snapshot && iter->trace->use_max_tr)
2087                 return ERR_PTR(-EBUSY);
2088 #endif
2089
2090         if (!iter->snapshot)
2091                 atomic_inc(&trace_record_cmdline_disabled);
2092
2093         if (*pos != iter->pos) {
2094                 iter->ent = NULL;
2095                 iter->cpu = 0;
2096                 iter->idx = -1;
2097
2098                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2099                         for_each_tracing_cpu(cpu)
2100                                 tracing_iter_reset(iter, cpu);
2101                 } else
2102                         tracing_iter_reset(iter, cpu_file);
2103
2104                 iter->leftover = 0;
2105                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2106                         ;
2107
2108         } else {
2109                 /*
2110                  * If we overflowed the seq_file before, then we want
2111                  * to just reuse the trace_seq buffer again.
2112                  */
2113                 if (iter->leftover)
2114                         p = iter;
2115                 else {
2116                         l = *pos - 1;
2117                         p = s_next(m, p, &l);
2118                 }
2119         }
2120
2121         trace_event_read_lock();
2122         trace_access_lock(cpu_file);
2123         return p;
2124 }
2125
2126 static void s_stop(struct seq_file *m, void *p)
2127 {
2128         struct trace_iterator *iter = m->private;
2129
2130 #ifdef CONFIG_TRACER_MAX_TRACE
2131         if (iter->snapshot && iter->trace->use_max_tr)
2132                 return;
2133 #endif
2134
2135         if (!iter->snapshot)
2136                 atomic_dec(&trace_record_cmdline_disabled);
2137
2138         trace_access_unlock(iter->cpu_file);
2139         trace_event_read_unlock();
2140 }
2141
2142 static void
2143 get_total_entries(struct trace_buffer *buf,
2144                   unsigned long *total, unsigned long *entries)
2145 {
2146         unsigned long count;
2147         int cpu;
2148
2149         *total = 0;
2150         *entries = 0;
2151
2152         for_each_tracing_cpu(cpu) {
2153                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2154                 /*
2155                  * If this buffer has skipped entries, then we hold all
2156                  * entries for the trace and we need to ignore the
2157                  * ones before the time stamp.
2158                  */
2159                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2160                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2161                         /* total is the same as the entries */
2162                         *total += count;
2163                 } else
2164                         *total += count +
2165                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2166                 *entries += count;
2167         }
2168 }
2169
2170 static void print_lat_help_header(struct seq_file *m)
2171 {
2172         seq_puts(m, "#                  _------=> CPU#            \n");
2173         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2174         seq_puts(m, "#                | / _----=> need-resched    \n");
2175         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2176         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2177         seq_puts(m, "#                |||| /     delay             \n");
2178         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2179         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2180 }
2181
2182 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2183 {
2184         unsigned long total;
2185         unsigned long entries;
2186
2187         get_total_entries(buf, &total, &entries);
2188         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2189                    entries, total, num_online_cpus());
2190         seq_puts(m, "#\n");
2191 }
2192
2193 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2194 {
2195         print_event_info(buf, m);
2196         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2197         seq_puts(m, "#              | |       |          |         |\n");
2198 }
2199
2200 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2201 {
2202         print_event_info(buf, m);
2203         seq_puts(m, "#                              _-----=> irqs-off\n");
2204         seq_puts(m, "#                             / _----=> need-resched\n");
2205         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2206         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2207         seq_puts(m, "#                            ||| /     delay\n");
2208         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2209         seq_puts(m, "#              | |       |   ||||       |         |\n");
2210 }
2211
2212 void
2213 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2214 {
2215         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2216         struct trace_buffer *buf = iter->trace_buffer;
2217         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2218         struct tracer *type = iter->trace;
2219         unsigned long entries;
2220         unsigned long total;
2221         const char *name = "preemption";
2222
2223         name = type->name;
2224
2225         get_total_entries(buf, &total, &entries);
2226
2227         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2228                    name, UTS_RELEASE);
2229         seq_puts(m, "# -----------------------------------"
2230                  "---------------------------------\n");
2231         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2232                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2233                    nsecs_to_usecs(data->saved_latency),
2234                    entries,
2235                    total,
2236                    buf->cpu,
2237 #if defined(CONFIG_PREEMPT_NONE)
2238                    "server",
2239 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2240                    "desktop",
2241 #elif defined(CONFIG_PREEMPT)
2242                    "preempt",
2243 #else
2244                    "unknown",
2245 #endif
2246                    /* These are reserved for later use */
2247                    0, 0, 0, 0);
2248 #ifdef CONFIG_SMP
2249         seq_printf(m, " #P:%d)\n", num_online_cpus());
2250 #else
2251         seq_puts(m, ")\n");
2252 #endif
2253         seq_puts(m, "#    -----------------\n");
2254         seq_printf(m, "#    | task: %.16s-%d "
2255                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2256                    data->comm, data->pid,
2257                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2258                    data->policy, data->rt_priority);
2259         seq_puts(m, "#    -----------------\n");
2260
2261         if (data->critical_start) {
2262                 seq_puts(m, "#  => started at: ");
2263                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2264                 trace_print_seq(m, &iter->seq);
2265                 seq_puts(m, "\n#  => ended at:   ");
2266                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2267                 trace_print_seq(m, &iter->seq);
2268                 seq_puts(m, "\n#\n");
2269         }
2270
2271         seq_puts(m, "#\n");
2272 }
2273
2274 static void test_cpu_buff_start(struct trace_iterator *iter)
2275 {
2276         struct trace_seq *s = &iter->seq;
2277
2278         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2279                 return;
2280
2281         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2282                 return;
2283
2284         if (cpumask_test_cpu(iter->cpu, iter->started))
2285                 return;
2286
2287         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2288                 return;
2289
2290         cpumask_set_cpu(iter->cpu, iter->started);
2291
2292         /* Don't print started cpu buffer for the first entry of the trace */
2293         if (iter->idx > 1)
2294                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2295                                 iter->cpu);
2296 }
2297
2298 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2299 {
2300         struct trace_seq *s = &iter->seq;
2301         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2302         struct trace_entry *entry;
2303         struct trace_event *event;
2304
2305         entry = iter->ent;
2306
2307         test_cpu_buff_start(iter);
2308
2309         event = ftrace_find_event(entry->type);
2310
2311         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2312                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2313                         if (!trace_print_lat_context(iter))
2314                                 goto partial;
2315                 } else {
2316                         if (!trace_print_context(iter))
2317                                 goto partial;
2318                 }
2319         }
2320
2321         if (event)
2322                 return event->funcs->trace(iter, sym_flags, event);
2323
2324         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2325                 goto partial;
2326
2327         return TRACE_TYPE_HANDLED;
2328 partial:
2329         return TRACE_TYPE_PARTIAL_LINE;
2330 }
2331
2332 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2333 {
2334         struct trace_seq *s = &iter->seq;
2335         struct trace_entry *entry;
2336         struct trace_event *event;
2337
2338         entry = iter->ent;
2339
2340         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2341                 if (!trace_seq_printf(s, "%d %d %llu ",
2342                                       entry->pid, iter->cpu, iter->ts))
2343                         goto partial;
2344         }
2345
2346         event = ftrace_find_event(entry->type);
2347         if (event)
2348                 return event->funcs->raw(iter, 0, event);
2349
2350         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2351                 goto partial;
2352
2353         return TRACE_TYPE_HANDLED;
2354 partial:
2355         return TRACE_TYPE_PARTIAL_LINE;
2356 }
2357
2358 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2359 {
2360         struct trace_seq *s = &iter->seq;
2361         unsigned char newline = '\n';
2362         struct trace_entry *entry;
2363         struct trace_event *event;
2364
2365         entry = iter->ent;
2366
2367         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2368                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2369                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2370                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2371         }
2372
2373         event = ftrace_find_event(entry->type);
2374         if (event) {
2375                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2376                 if (ret != TRACE_TYPE_HANDLED)
2377                         return ret;
2378         }
2379
2380         SEQ_PUT_FIELD_RET(s, newline);
2381
2382         return TRACE_TYPE_HANDLED;
2383 }
2384
2385 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2386 {
2387         struct trace_seq *s = &iter->seq;
2388         struct trace_entry *entry;
2389         struct trace_event *event;
2390
2391         entry = iter->ent;
2392
2393         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2394                 SEQ_PUT_FIELD_RET(s, entry->pid);
2395                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2396                 SEQ_PUT_FIELD_RET(s, iter->ts);
2397         }
2398
2399         event = ftrace_find_event(entry->type);
2400         return event ? event->funcs->binary(iter, 0, event) :
2401                 TRACE_TYPE_HANDLED;
2402 }
2403
2404 int trace_empty(struct trace_iterator *iter)
2405 {
2406         struct ring_buffer_iter *buf_iter;
2407         int cpu;
2408
2409         /* If we are looking at one CPU buffer, only check that one */
2410         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2411                 cpu = iter->cpu_file;
2412                 buf_iter = trace_buffer_iter(iter, cpu);
2413                 if (buf_iter) {
2414                         if (!ring_buffer_iter_empty(buf_iter))
2415                                 return 0;
2416                 } else {
2417                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2418                                 return 0;
2419                 }
2420                 return 1;
2421         }
2422
2423         for_each_tracing_cpu(cpu) {
2424                 buf_iter = trace_buffer_iter(iter, cpu);
2425                 if (buf_iter) {
2426                         if (!ring_buffer_iter_empty(buf_iter))
2427                                 return 0;
2428                 } else {
2429                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2430                                 return 0;
2431                 }
2432         }
2433
2434         return 1;
2435 }
2436
2437 /*  Called with trace_event_read_lock() held. */
2438 enum print_line_t print_trace_line(struct trace_iterator *iter)
2439 {
2440         enum print_line_t ret;
2441
2442         if (iter->lost_events &&
2443             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2444                                  iter->cpu, iter->lost_events))
2445                 return TRACE_TYPE_PARTIAL_LINE;
2446
2447         if (iter->trace && iter->trace->print_line) {
2448                 ret = iter->trace->print_line(iter);
2449                 if (ret != TRACE_TYPE_UNHANDLED)
2450                         return ret;
2451         }
2452
2453         if (iter->ent->type == TRACE_BPRINT &&
2454                         trace_flags & TRACE_ITER_PRINTK &&
2455                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2456                 return trace_print_bprintk_msg_only(iter);
2457
2458         if (iter->ent->type == TRACE_PRINT &&
2459                         trace_flags & TRACE_ITER_PRINTK &&
2460                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2461                 return trace_print_printk_msg_only(iter);
2462
2463         if (trace_flags & TRACE_ITER_BIN)
2464                 return print_bin_fmt(iter);
2465
2466         if (trace_flags & TRACE_ITER_HEX)
2467                 return print_hex_fmt(iter);
2468
2469         if (trace_flags & TRACE_ITER_RAW)
2470                 return print_raw_fmt(iter);
2471
2472         return print_trace_fmt(iter);
2473 }
2474
2475 void trace_latency_header(struct seq_file *m)
2476 {
2477         struct trace_iterator *iter = m->private;
2478
2479         /* print nothing if the buffers are empty */
2480         if (trace_empty(iter))
2481                 return;
2482
2483         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2484                 print_trace_header(m, iter);
2485
2486         if (!(trace_flags & TRACE_ITER_VERBOSE))
2487                 print_lat_help_header(m);
2488 }
2489
2490 void trace_default_header(struct seq_file *m)
2491 {
2492         struct trace_iterator *iter = m->private;
2493
2494         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2495                 return;
2496
2497         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2498                 /* print nothing if the buffers are empty */
2499                 if (trace_empty(iter))
2500                         return;
2501                 print_trace_header(m, iter);
2502                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2503                         print_lat_help_header(m);
2504         } else {
2505                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2506                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2507                                 print_func_help_header_irq(iter->trace_buffer, m);
2508                         else
2509                                 print_func_help_header(iter->trace_buffer, m);
2510                 }
2511         }
2512 }
2513
2514 static void test_ftrace_alive(struct seq_file *m)
2515 {
2516         if (!ftrace_is_dead())
2517                 return;
2518         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2519         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2520 }
2521
2522 #ifdef CONFIG_TRACER_MAX_TRACE
2523 static void show_snapshot_main_help(struct seq_file *m)
2524 {
2525         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2526         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2527         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2528         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n");
2529         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2530         seq_printf(m, "#                       is not a '0' or '1')\n");
2531 }
2532
2533 static void show_snapshot_percpu_help(struct seq_file *m)
2534 {
2535         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2536 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2537         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2538         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2539 #else
2540         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2541         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2542 #endif
2543         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2544         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2545         seq_printf(m, "#                       is not a '0' or '1')\n");
2546 }
2547
2548 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2549 {
2550         if (iter->tr->allocated_snapshot)
2551                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2552         else
2553                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2554
2555         seq_printf(m, "# Snapshot commands:\n");
2556         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2557                 show_snapshot_main_help(m);
2558         else
2559                 show_snapshot_percpu_help(m);
2560 }
2561 #else
2562 /* Should never be called */
2563 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2564 #endif
2565
2566 static int s_show(struct seq_file *m, void *v)
2567 {
2568         struct trace_iterator *iter = v;
2569         int ret;
2570
2571         if (iter->ent == NULL) {
2572                 if (iter->tr) {
2573                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2574                         seq_puts(m, "#\n");
2575                         test_ftrace_alive(m);
2576                 }
2577                 if (iter->snapshot && trace_empty(iter))
2578                         print_snapshot_help(m, iter);
2579                 else if (iter->trace && iter->trace->print_header)
2580                         iter->trace->print_header(m);
2581                 else
2582                         trace_default_header(m);
2583
2584         } else if (iter->leftover) {
2585                 /*
2586                  * If we filled the seq_file buffer earlier, we
2587                  * want to just show it now.
2588                  */
2589                 ret = trace_print_seq(m, &iter->seq);
2590
2591                 /* ret should this time be zero, but you never know */
2592                 iter->leftover = ret;
2593
2594         } else {
2595                 print_trace_line(iter);
2596                 ret = trace_print_seq(m, &iter->seq);
2597                 /*
2598                  * If we overflow the seq_file buffer, then it will
2599                  * ask us for this data again at start up.
2600                  * Use that instead.
2601                  *  ret is 0 if seq_file write succeeded.
2602                  *        -1 otherwise.
2603                  */
2604                 iter->leftover = ret;
2605         }
2606
2607         return 0;
2608 }
2609
2610 static const struct seq_operations tracer_seq_ops = {
2611         .start          = s_start,
2612         .next           = s_next,
2613         .stop           = s_stop,
2614         .show           = s_show,
2615 };
2616
2617 static struct trace_iterator *
2618 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2619 {
2620         struct trace_cpu *tc = inode->i_private;
2621         struct trace_array *tr = tc->tr;
2622         struct trace_iterator *iter;
2623         int cpu;
2624
2625         if (tracing_disabled)
2626                 return ERR_PTR(-ENODEV);
2627
2628         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2629         if (!iter)
2630                 return ERR_PTR(-ENOMEM);
2631
2632         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2633                                     GFP_KERNEL);
2634         if (!iter->buffer_iter)
2635                 goto release;
2636
2637         /*
2638          * We make a copy of the current tracer to avoid concurrent
2639          * changes on it while we are reading.
2640          */
2641         mutex_lock(&trace_types_lock);
2642         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2643         if (!iter->trace)
2644                 goto fail;
2645
2646         *iter->trace = *tr->current_trace;
2647
2648         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2649                 goto fail;
2650
2651         iter->tr = tr;
2652
2653 #ifdef CONFIG_TRACER_MAX_TRACE
2654         /* Currently only the top directory has a snapshot */
2655         if (tr->current_trace->print_max || snapshot)
2656                 iter->trace_buffer = &tr->max_buffer;
2657         else
2658 #endif
2659                 iter->trace_buffer = &tr->trace_buffer;
2660         iter->snapshot = snapshot;
2661         iter->pos = -1;
2662         mutex_init(&iter->mutex);
2663         iter->cpu_file = tc->cpu;
2664
2665         /* Notify the tracer early; before we stop tracing. */
2666         if (iter->trace && iter->trace->open)
2667                 iter->trace->open(iter);
2668
2669         /* Annotate start of buffers if we had overruns */
2670         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2671                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2672
2673         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2674         if (trace_clocks[trace_clock_id].in_ns)
2675                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2676
2677         /* stop the trace while dumping if we are not opening "snapshot" */
2678         if (!iter->snapshot)
2679                 tracing_stop_tr(tr);
2680
2681         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2682                 for_each_tracing_cpu(cpu) {
2683                         iter->buffer_iter[cpu] =
2684                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2685                 }
2686                 ring_buffer_read_prepare_sync();
2687                 for_each_tracing_cpu(cpu) {
2688                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2689                         tracing_iter_reset(iter, cpu);
2690                 }
2691         } else {
2692                 cpu = iter->cpu_file;
2693                 iter->buffer_iter[cpu] =
2694                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2695                 ring_buffer_read_prepare_sync();
2696                 ring_buffer_read_start(iter->buffer_iter[cpu]);
2697                 tracing_iter_reset(iter, cpu);
2698         }
2699
2700         tr->ref++;
2701
2702         mutex_unlock(&trace_types_lock);
2703
2704         return iter;
2705
2706  fail:
2707         mutex_unlock(&trace_types_lock);
2708         kfree(iter->trace);
2709         kfree(iter->buffer_iter);
2710 release:
2711         seq_release_private(inode, file);
2712         return ERR_PTR(-ENOMEM);
2713 }
2714
2715 int tracing_open_generic(struct inode *inode, struct file *filp)
2716 {
2717         if (tracing_disabled)
2718                 return -ENODEV;
2719
2720         filp->private_data = inode->i_private;
2721         return 0;
2722 }
2723
2724 static int tracing_release(struct inode *inode, struct file *file)
2725 {
2726         struct seq_file *m = file->private_data;
2727         struct trace_iterator *iter;
2728         struct trace_array *tr;
2729         int cpu;
2730
2731         if (!(file->f_mode & FMODE_READ))
2732                 return 0;
2733
2734         iter = m->private;
2735         tr = iter->tr;
2736
2737         mutex_lock(&trace_types_lock);
2738
2739         WARN_ON(!tr->ref);
2740         tr->ref--;
2741
2742         for_each_tracing_cpu(cpu) {
2743                 if (iter->buffer_iter[cpu])
2744                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
2745         }
2746
2747         if (iter->trace && iter->trace->close)
2748                 iter->trace->close(iter);
2749
2750         if (!iter->snapshot)
2751                 /* reenable tracing if it was previously enabled */
2752                 tracing_start_tr(tr);
2753         mutex_unlock(&trace_types_lock);
2754
2755         mutex_destroy(&iter->mutex);
2756         free_cpumask_var(iter->started);
2757         kfree(iter->trace);
2758         kfree(iter->buffer_iter);
2759         seq_release_private(inode, file);
2760         return 0;
2761 }
2762
2763 static int tracing_open(struct inode *inode, struct file *file)
2764 {
2765         struct trace_iterator *iter;
2766         int ret = 0;
2767
2768         /* If this file was open for write, then erase contents */
2769         if ((file->f_mode & FMODE_WRITE) &&
2770             (file->f_flags & O_TRUNC)) {
2771                 struct trace_cpu *tc = inode->i_private;
2772                 struct trace_array *tr = tc->tr;
2773
2774                 if (tc->cpu == RING_BUFFER_ALL_CPUS)
2775                         tracing_reset_online_cpus(&tr->trace_buffer);
2776                 else
2777                         tracing_reset(&tr->trace_buffer, tc->cpu);
2778         }
2779
2780         if (file->f_mode & FMODE_READ) {
2781                 iter = __tracing_open(inode, file, false);
2782                 if (IS_ERR(iter))
2783                         ret = PTR_ERR(iter);
2784                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
2785                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
2786         }
2787         return ret;
2788 }
2789
2790 static void *
2791 t_next(struct seq_file *m, void *v, loff_t *pos)
2792 {
2793         struct tracer *t = v;
2794
2795         (*pos)++;
2796
2797         if (t)
2798                 t = t->next;
2799
2800         return t;
2801 }
2802
2803 static void *t_start(struct seq_file *m, loff_t *pos)
2804 {
2805         struct tracer *t;
2806         loff_t l = 0;
2807
2808         mutex_lock(&trace_types_lock);
2809         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
2810                 ;
2811
2812         return t;
2813 }
2814
2815 static void t_stop(struct seq_file *m, void *p)
2816 {
2817         mutex_unlock(&trace_types_lock);
2818 }
2819
2820 static int t_show(struct seq_file *m, void *v)
2821 {
2822         struct tracer *t = v;
2823
2824         if (!t)
2825                 return 0;
2826
2827         seq_printf(m, "%s", t->name);
2828         if (t->next)
2829                 seq_putc(m, ' ');
2830         else
2831                 seq_putc(m, '\n');
2832
2833         return 0;
2834 }
2835
2836 static const struct seq_operations show_traces_seq_ops = {
2837         .start          = t_start,
2838         .next           = t_next,
2839         .stop           = t_stop,
2840         .show           = t_show,
2841 };
2842
2843 static int show_traces_open(struct inode *inode, struct file *file)
2844 {
2845         if (tracing_disabled)
2846                 return -ENODEV;
2847
2848         return seq_open(file, &show_traces_seq_ops);
2849 }
2850
2851 static ssize_t
2852 tracing_write_stub(struct file *filp, const char __user *ubuf,
2853                    size_t count, loff_t *ppos)
2854 {
2855         return count;
2856 }
2857
2858 static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
2859 {
2860         if (file->f_mode & FMODE_READ)
2861                 return seq_lseek(file, offset, origin);
2862         else
2863                 return 0;
2864 }
2865
2866 static const struct file_operations tracing_fops = {
2867         .open           = tracing_open,
2868         .read           = seq_read,
2869         .write          = tracing_write_stub,
2870         .llseek         = tracing_seek,
2871         .release        = tracing_release,
2872 };
2873
2874 static const struct file_operations show_traces_fops = {
2875         .open           = show_traces_open,
2876         .read           = seq_read,
2877         .release        = seq_release,
2878         .llseek         = seq_lseek,
2879 };
2880
2881 /*
2882  * Only trace on a CPU if the bitmask is set:
2883  */
2884 static cpumask_var_t tracing_cpumask;
2885
2886 /*
2887  * The tracer itself will not take this lock, but still we want
2888  * to provide a consistent cpumask to user-space:
2889  */
2890 static DEFINE_MUTEX(tracing_cpumask_update_lock);
2891
2892 /*
2893  * Temporary storage for the character representation of the
2894  * CPU bitmask (and one more byte for the newline):
2895  */
2896 static char mask_str[NR_CPUS + 1];
2897
2898 static ssize_t
2899 tracing_cpumask_read(struct file *filp, char __user *ubuf,
2900                      size_t count, loff_t *ppos)
2901 {
2902         int len;
2903
2904         mutex_lock(&tracing_cpumask_update_lock);
2905
2906         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2907         if (count - len < 2) {
2908                 count = -EINVAL;
2909                 goto out_err;
2910         }
2911         len += sprintf(mask_str + len, "\n");
2912         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2913
2914 out_err:
2915         mutex_unlock(&tracing_cpumask_update_lock);
2916
2917         return count;
2918 }
2919
2920 static ssize_t
2921 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2922                       size_t count, loff_t *ppos)
2923 {
2924         struct trace_array *tr = filp->private_data;
2925         cpumask_var_t tracing_cpumask_new;
2926         int err, cpu;
2927
2928         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
2929                 return -ENOMEM;
2930
2931         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2932         if (err)
2933                 goto err_unlock;
2934
2935         mutex_lock(&tracing_cpumask_update_lock);
2936
2937         local_irq_disable();
2938         arch_spin_lock(&ftrace_max_lock);
2939         for_each_tracing_cpu(cpu) {
2940                 /*
2941                  * Increase/decrease the disabled counter if we are
2942                  * about to flip a bit in the cpumask:
2943                  */
2944                 if (cpumask_test_cpu(cpu, tracing_cpumask) &&
2945                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2946                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
2947                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
2948                 }
2949                 if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
2950                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2951                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
2952                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
2953                 }
2954         }
2955         arch_spin_unlock(&ftrace_max_lock);
2956         local_irq_enable();
2957
2958         cpumask_copy(tracing_cpumask, tracing_cpumask_new);
2959
2960         mutex_unlock(&tracing_cpumask_update_lock);
2961         free_cpumask_var(tracing_cpumask_new);
2962
2963         return count;
2964
2965 err_unlock:
2966         free_cpumask_var(tracing_cpumask_new);
2967
2968         return err;
2969 }
2970
2971 static const struct file_operations tracing_cpumask_fops = {
2972         .open           = tracing_open_generic,
2973         .read           = tracing_cpumask_read,
2974         .write          = tracing_cpumask_write,
2975         .llseek         = generic_file_llseek,
2976 };
2977
2978 static int tracing_trace_options_show(struct seq_file *m, void *v)
2979 {
2980         struct tracer_opt *trace_opts;
2981         struct trace_array *tr = m->private;
2982         u32 tracer_flags;
2983         int i;
2984
2985         mutex_lock(&trace_types_lock);
2986         tracer_flags = tr->current_trace->flags->val;
2987         trace_opts = tr->current_trace->flags->opts;
2988
2989         for (i = 0; trace_options[i]; i++) {
2990                 if (trace_flags & (1 << i))
2991                         seq_printf(m, "%s\n", trace_options[i]);
2992                 else
2993                         seq_printf(m, "no%s\n", trace_options[i]);
2994         }
2995
2996         for (i = 0; trace_opts[i].name; i++) {
2997                 if (tracer_flags & trace_opts[i].bit)
2998                         seq_printf(m, "%s\n", trace_opts[i].name);
2999                 else
3000                         seq_printf(m, "no%s\n", trace_opts[i].name);
3001         }
3002         mutex_unlock(&trace_types_lock);
3003
3004         return 0;
3005 }
3006
3007 static int __set_tracer_option(struct tracer *trace,
3008                                struct tracer_flags *tracer_flags,
3009                                struct tracer_opt *opts, int neg)
3010 {
3011         int ret;
3012
3013         ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3014         if (ret)
3015                 return ret;
3016
3017         if (neg)
3018                 tracer_flags->val &= ~opts->bit;
3019         else
3020                 tracer_flags->val |= opts->bit;
3021         return 0;
3022 }
3023
3024 /* Try to assign a tracer specific option */
3025 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3026 {
3027         struct tracer_flags *tracer_flags = trace->flags;
3028         struct tracer_opt *opts = NULL;
3029         int i;
3030
3031         for (i = 0; tracer_flags->opts[i].name; i++) {
3032                 opts = &tracer_flags->opts[i];
3033
3034                 if (strcmp(cmp, opts->name) == 0)
3035                         return __set_tracer_option(trace, trace->flags,
3036                                                    opts, neg);
3037         }
3038
3039         return -EINVAL;
3040 }
3041
3042 /* Some tracers require overwrite to stay enabled */
3043 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3044 {
3045         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3046                 return -1;
3047
3048         return 0;
3049 }
3050
3051 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3052 {
3053         /* do nothing if flag is already set */
3054         if (!!(trace_flags & mask) == !!enabled)
3055                 return 0;
3056
3057         /* Give the tracer a chance to approve the change */
3058         if (tr->current_trace->flag_changed)
3059                 if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3060                         return -EINVAL;
3061
3062         if (enabled)
3063                 trace_flags |= mask;
3064         else
3065                 trace_flags &= ~mask;
3066
3067         if (mask == TRACE_ITER_RECORD_CMD)
3068                 trace_event_enable_cmd_record(enabled);
3069
3070         if (mask == TRACE_ITER_OVERWRITE) {
3071                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3072 #ifdef CONFIG_TRACER_MAX_TRACE
3073                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3074 #endif
3075         }
3076
3077         if (mask == TRACE_ITER_PRINTK)
3078                 trace_printk_start_stop_comm(enabled);
3079
3080         return 0;
3081 }
3082
3083 static int trace_set_options(struct trace_array *tr, char *option)
3084 {
3085         char *cmp;
3086         int neg = 0;
3087         int ret = -ENODEV;
3088         int i;
3089
3090         cmp = strstrip(option);
3091
3092         if (strncmp(cmp, "no", 2) == 0) {
3093                 neg = 1;
3094                 cmp += 2;
3095         }
3096
3097         mutex_lock(&trace_types_lock);
3098
3099         for (i = 0; trace_options[i]; i++) {
3100                 if (strcmp(cmp, trace_options[i]) == 0) {
3101                         ret = set_tracer_flag(tr, 1 << i, !neg);
3102                         break;
3103                 }
3104         }
3105
3106         /* If no option could be set, test the specific tracer options */
3107         if (!trace_options[i])
3108                 ret = set_tracer_option(tr->current_trace, cmp, neg);
3109
3110         mutex_unlock(&trace_types_lock);
3111
3112         return ret;
3113 }
3114
3115 static ssize_t
3116 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3117                         size_t cnt, loff_t *ppos)
3118 {
3119         struct seq_file *m = filp->private_data;
3120         struct trace_array *tr = m->private;
3121         char buf[64];
3122         int ret;
3123
3124         if (cnt >= sizeof(buf))
3125                 return -EINVAL;
3126
3127         if (copy_from_user(&buf, ubuf, cnt))
3128                 return -EFAULT;
3129
3130         buf[cnt] = 0;
3131
3132         ret = trace_set_options(tr, buf);
3133         if (ret < 0)
3134                 return ret;
3135
3136         *ppos += cnt;
3137
3138         return cnt;
3139 }
3140
3141 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3142 {
3143         if (tracing_disabled)
3144                 return -ENODEV;
3145
3146         return single_open(file, tracing_trace_options_show, inode->i_private);
3147 }
3148
3149 static const struct file_operations tracing_iter_fops = {
3150         .open           = tracing_trace_options_open,
3151         .read           = seq_read,
3152         .llseek         = seq_lseek,
3153         .release        = single_release,
3154         .write          = tracing_trace_options_write,
3155 };
3156
3157 static const char readme_msg[] =
3158         "tracing mini-HOWTO:\n\n"
3159         "# mount -t debugfs nodev /sys/kernel/debug\n\n"
3160         "# cat /sys/kernel/debug/tracing/available_tracers\n"
3161         "wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n"
3162         "# cat /sys/kernel/debug/tracing/current_tracer\n"
3163         "nop\n"
3164         "# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n"
3165         "# cat /sys/kernel/debug/tracing/current_tracer\n"
3166         "wakeup\n"
3167         "# cat /sys/kernel/debug/tracing/trace_options\n"
3168         "noprint-parent nosym-offset nosym-addr noverbose\n"
3169         "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
3170         "# echo 1 > /sys/kernel/debug/tracing/tracing_on\n"
3171         "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
3172         "# echo 0 > /sys/kernel/debug/tracing/tracing_on\n"
3173 ;
3174
3175 static ssize_t
3176 tracing_readme_read(struct file *filp, char __user *ubuf,
3177                        size_t cnt, loff_t *ppos)
3178 {
3179         return simple_read_from_buffer(ubuf, cnt, ppos,
3180                                         readme_msg, strlen(readme_msg));
3181 }
3182
3183 static const struct file_operations tracing_readme_fops = {
3184         .open           = tracing_open_generic,
3185         .read           = tracing_readme_read,
3186         .llseek         = generic_file_llseek,
3187 };
3188
3189 static ssize_t
3190 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3191                                 size_t cnt, loff_t *ppos)
3192 {
3193         char *buf_comm;
3194         char *file_buf;
3195         char *buf;
3196         int len = 0;
3197         int pid;
3198         int i;
3199
3200         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3201         if (!file_buf)
3202                 return -ENOMEM;
3203
3204         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3205         if (!buf_comm) {
3206                 kfree(file_buf);
3207                 return -ENOMEM;
3208         }
3209
3210         buf = file_buf;
3211
3212         for (i = 0; i < SAVED_CMDLINES; i++) {
3213                 int r;
3214
3215                 pid = map_cmdline_to_pid[i];
3216                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3217                         continue;
3218
3219                 trace_find_cmdline(pid, buf_comm);
3220                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3221                 buf += r;
3222                 len += r;
3223         }
3224
3225         len = simple_read_from_buffer(ubuf, cnt, ppos,
3226                                       file_buf, len);
3227
3228         kfree(file_buf);
3229         kfree(buf_comm);
3230
3231         return len;
3232 }
3233
3234 static const struct file_operations tracing_saved_cmdlines_fops = {
3235     .open       = tracing_open_generic,
3236     .read       = tracing_saved_cmdlines_read,
3237     .llseek     = generic_file_llseek,
3238 };
3239
3240 static ssize_t
3241 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3242                        size_t cnt, loff_t *ppos)
3243 {
3244         struct trace_array *tr = filp->private_data;
3245         char buf[MAX_TRACER_SIZE+2];
3246         int r;
3247
3248         mutex_lock(&trace_types_lock);
3249         r = sprintf(buf, "%s\n", tr->current_trace->name);
3250         mutex_unlock(&trace_types_lock);
3251
3252         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3253 }
3254
3255 int tracer_init(struct tracer *t, struct trace_array *tr)
3256 {
3257         tracing_reset_online_cpus(&tr->trace_buffer);
3258         return t->init(tr);
3259 }
3260
3261 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3262 {
3263         int cpu;
3264
3265         for_each_tracing_cpu(cpu)
3266                 per_cpu_ptr(buf->data, cpu)->entries = val;
3267 }
3268
3269 #ifdef CONFIG_TRACER_MAX_TRACE
3270 /* resize @tr's buffer to the size of @size_tr's entries */
3271 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3272                                         struct trace_buffer *size_buf, int cpu_id)
3273 {
3274         int cpu, ret = 0;
3275
3276         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3277                 for_each_tracing_cpu(cpu) {
3278                         ret = ring_buffer_resize(trace_buf->buffer,
3279                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3280                         if (ret < 0)
3281                                 break;
3282                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3283                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3284                 }
3285         } else {
3286                 ret = ring_buffer_resize(trace_buf->buffer,
3287                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3288                 if (ret == 0)
3289                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3290                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3291         }
3292
3293         return ret;
3294 }
3295 #endif /* CONFIG_TRACER_MAX_TRACE */
3296
3297 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3298                                         unsigned long size, int cpu)
3299 {
3300         int ret;
3301
3302         /*
3303          * If kernel or user changes the size of the ring buffer
3304          * we use the size that was given, and we can forget about
3305          * expanding it later.
3306          */
3307         ring_buffer_expanded = 1;
3308
3309         /* May be called before buffers are initialized */
3310         if (!tr->trace_buffer.buffer)
3311                 return 0;
3312
3313         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3314         if (ret < 0)
3315                 return ret;
3316
3317 #ifdef CONFIG_TRACER_MAX_TRACE
3318         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3319             !tr->current_trace->use_max_tr)
3320                 goto out;
3321
3322         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3323         if (ret < 0) {
3324                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3325                                                      &tr->trace_buffer, cpu);
3326                 if (r < 0) {
3327                         /*
3328                          * AARGH! We are left with different
3329                          * size max buffer!!!!
3330                          * The max buffer is our "snapshot" buffer.
3331                          * When a tracer needs a snapshot (one of the
3332                          * latency tracers), it swaps the max buffer
3333                          * with the saved snap shot. We succeeded to
3334                          * update the size of the main buffer, but failed to
3335                          * update the size of the max buffer. But when we tried
3336                          * to reset the main buffer to the original size, we
3337                          * failed there too. This is very unlikely to
3338                          * happen, but if it does, warn and kill all
3339                          * tracing.
3340                          */
3341                         WARN_ON(1);
3342                         tracing_disabled = 1;
3343                 }
3344                 return ret;
3345         }
3346
3347         if (cpu == RING_BUFFER_ALL_CPUS)
3348                 set_buffer_entries(&tr->max_buffer, size);
3349         else
3350                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3351
3352  out:
3353 #endif /* CONFIG_TRACER_MAX_TRACE */
3354
3355         if (cpu == RING_BUFFER_ALL_CPUS)
3356                 set_buffer_entries(&tr->trace_buffer, size);
3357         else
3358                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3359
3360         return ret;
3361 }
3362
3363 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3364                                           unsigned long size, int cpu_id)
3365 {
3366         int ret = size;
3367
3368         mutex_lock(&trace_types_lock);
3369
3370         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3371                 /* make sure, this cpu is enabled in the mask */
3372                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3373                         ret = -EINVAL;
3374                         goto out;
3375                 }
3376         }
3377
3378         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3379         if (ret < 0)
3380                 ret = -ENOMEM;
3381
3382 out:
3383         mutex_unlock(&trace_types_lock);
3384
3385         return ret;
3386 }
3387
3388
3389 /**
3390  * tracing_update_buffers - used by tracing facility to expand ring buffers
3391  *
3392  * To save on memory when the tracing is never used on a system with it
3393  * configured in. The ring buffers are set to a minimum size. But once
3394  * a user starts to use the tracing facility, then they need to grow
3395  * to their default size.
3396  *
3397  * This function is to be called when a tracer is about to be used.
3398  */
3399 int tracing_update_buffers(void)
3400 {
3401         int ret = 0;
3402
3403         mutex_lock(&trace_types_lock);
3404         if (!ring_buffer_expanded)
3405                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3406                                                 RING_BUFFER_ALL_CPUS);
3407         mutex_unlock(&trace_types_lock);
3408
3409         return ret;
3410 }
3411
3412 struct trace_option_dentry;
3413
3414 static struct trace_option_dentry *
3415 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3416
3417 static void
3418 destroy_trace_option_files(struct trace_option_dentry *topts);
3419
3420 static int tracing_set_tracer(const char *buf)
3421 {
3422         static struct trace_option_dentry *topts;
3423         struct trace_array *tr = &global_trace;
3424         struct tracer *t;
3425 #ifdef CONFIG_TRACER_MAX_TRACE
3426         bool had_max_tr;
3427 #endif
3428         int ret = 0;
3429
3430         mutex_lock(&trace_types_lock);
3431
3432         if (!ring_buffer_expanded) {
3433                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3434                                                 RING_BUFFER_ALL_CPUS);
3435                 if (ret < 0)
3436                         goto out;
3437                 ret = 0;
3438         }
3439
3440         for (t = trace_types; t; t = t->next) {
3441                 if (strcmp(t->name, buf) == 0)
3442                         break;
3443         }
3444         if (!t) {
3445                 ret = -EINVAL;
3446                 goto out;
3447         }
3448         if (t == tr->current_trace)
3449                 goto out;
3450
3451         trace_branch_disable();
3452
3453         tr->current_trace->enabled = false;
3454
3455         if (tr->current_trace->reset)
3456                 tr->current_trace->reset(tr);
3457
3458         /* Current trace needs to be nop_trace before synchronize_sched */
3459         tr->current_trace = &nop_trace;
3460
3461 #ifdef CONFIG_TRACER_MAX_TRACE
3462         had_max_tr = tr->allocated_snapshot;
3463
3464         if (had_max_tr && !t->use_max_tr) {
3465                 /*
3466                  * We need to make sure that the update_max_tr sees that
3467                  * current_trace changed to nop_trace to keep it from
3468                  * swapping the buffers after we resize it.
3469                  * The update_max_tr is called from interrupts disabled
3470                  * so a synchronized_sched() is sufficient.
3471                  */
3472                 synchronize_sched();
3473                 /*
3474                  * We don't free the ring buffer. instead, resize it because
3475                  * The max_tr ring buffer has some state (e.g. ring->clock) and
3476                  * we want preserve it.
3477                  */
3478                 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
3479                 set_buffer_entries(&tr->max_buffer, 1);
3480                 tracing_reset_online_cpus(&tr->max_buffer);
3481                 tr->allocated_snapshot = false;
3482         }
3483 #endif
3484         destroy_trace_option_files(topts);
3485
3486         topts = create_trace_option_files(tr, t);
3487
3488 #ifdef CONFIG_TRACER_MAX_TRACE
3489         if (t->use_max_tr && !had_max_tr) {
3490                 /* we need to make per cpu buffer sizes equivalent */
3491                 ret = resize_buffer_duplicate_size(&tr->max_buffer, &tr->trace_buffer,
3492                                                    RING_BUFFER_ALL_CPUS);
3493                 if (ret < 0)
3494                         goto out;
3495                 tr->allocated_snapshot = true;
3496         }
3497 #endif
3498
3499         if (t->init) {
3500                 ret = tracer_init(t, tr);
3501                 if (ret)
3502                         goto out;
3503         }
3504
3505         tr->current_trace = t;
3506         tr->current_trace->enabled = true;
3507         trace_branch_enable(tr);
3508  out:
3509         mutex_unlock(&trace_types_lock);
3510
3511         return ret;
3512 }
3513
3514 static ssize_t
3515 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3516                         size_t cnt, loff_t *ppos)
3517 {
3518         char buf[MAX_TRACER_SIZE+1];
3519         int i;
3520         size_t ret;
3521         int err;
3522
3523         ret = cnt;
3524
3525         if (cnt > MAX_TRACER_SIZE)
3526                 cnt = MAX_TRACER_SIZE;
3527
3528         if (copy_from_user(&buf, ubuf, cnt))
3529                 return -EFAULT;
3530
3531         buf[cnt] = 0;
3532
3533         /* strip ending whitespace. */
3534         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3535                 buf[i] = 0;
3536
3537         err = tracing_set_tracer(buf);
3538         if (err)
3539                 return err;
3540
3541         *ppos += ret;
3542
3543         return ret;
3544 }
3545
3546 static ssize_t
3547 tracing_max_lat_read(struct file *filp, char __user *ubuf,
3548                      size_t cnt, loff_t *ppos)
3549 {
3550         unsigned long *ptr = filp->private_data;
3551         char buf[64];
3552         int r;
3553
3554         r = snprintf(buf, sizeof(buf), "%ld\n",
3555                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
3556         if (r > sizeof(buf))
3557                 r = sizeof(buf);
3558         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3559 }
3560
3561 static ssize_t
3562 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3563                       size_t cnt, loff_t *ppos)
3564 {
3565         unsigned long *ptr = filp->private_data;
3566         unsigned long val;
3567         int ret;
3568
3569         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3570         if (ret)
3571                 return ret;
3572
3573         *ptr = val * 1000;
3574
3575         return cnt;
3576 }
3577
3578 static int tracing_open_pipe(struct inode *inode, struct file *filp)
3579 {
3580         struct trace_cpu *tc = inode->i_private;
3581         struct trace_array *tr = tc->tr;
3582         struct trace_iterator *iter;
3583         int ret = 0;
3584
3585         if (tracing_disabled)
3586                 return -ENODEV;
3587
3588         mutex_lock(&trace_types_lock);
3589
3590         /* create a buffer to store the information to pass to userspace */
3591         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
3592         if (!iter) {
3593                 ret = -ENOMEM;
3594                 goto out;
3595         }
3596
3597         /*
3598          * We make a copy of the current tracer to avoid concurrent
3599          * changes on it while we are reading.
3600          */
3601         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
3602         if (!iter->trace) {
3603                 ret = -ENOMEM;
3604                 goto fail;
3605         }
3606         *iter->trace = *tr->current_trace;
3607
3608         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
3609                 ret = -ENOMEM;
3610                 goto fail;
3611         }
3612
3613         /* trace pipe does not show start of buffer */
3614         cpumask_setall(iter->started);
3615
3616         if (trace_flags & TRACE_ITER_LATENCY_FMT)
3617                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3618
3619         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3620         if (trace_clocks[trace_clock_id].in_ns)
3621                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3622
3623         iter->cpu_file = tc->cpu;
3624         iter->tr = tc->tr;
3625         iter->trace_buffer = &tc->tr->trace_buffer;
3626         mutex_init(&iter->mutex);
3627         filp->private_data = iter;
3628
3629         if (iter->trace->pipe_open)
3630                 iter->trace->pipe_open(iter);
3631
3632         nonseekable_open(inode, filp);
3633 out:
3634         mutex_unlock(&trace_types_lock);
3635         return ret;
3636
3637 fail:
3638         kfree(iter->trace);
3639         kfree(iter);
3640         mutex_unlock(&trace_types_lock);
3641         return ret;
3642 }
3643
3644 static int tracing_release_pipe(struct inode *inode, struct file *file)
3645 {
3646         struct trace_iterator *iter = file->private_data;
3647
3648         mutex_lock(&trace_types_lock);
3649
3650         if (iter->trace->pipe_close)
3651                 iter->trace->pipe_close(iter);
3652
3653         mutex_unlock(&trace_types_lock);
3654
3655         free_cpumask_var(iter->started);
3656         mutex_destroy(&iter->mutex);
3657         kfree(iter->trace);
3658         kfree(iter);
3659
3660         return 0;
3661 }
3662
3663 static unsigned int
3664 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
3665 {
3666         /* Iterators are static, they should be filled or empty */
3667         if (trace_buffer_iter(iter, iter->cpu_file))
3668                 return POLLIN | POLLRDNORM;
3669
3670         if (trace_flags & TRACE_ITER_BLOCK)
3671                 /*
3672                  * Always select as readable when in blocking mode
3673                  */
3674                 return POLLIN | POLLRDNORM;
3675         else
3676                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
3677                                              filp, poll_table);
3678 }
3679
3680 static unsigned int
3681 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
3682 {
3683         struct trace_iterator *iter = filp->private_data;
3684
3685         return trace_poll(iter, filp, poll_table);
3686 }
3687
3688 /*
3689  * This is a make-shift waitqueue.
3690  * A tracer might use this callback on some rare cases:
3691  *
3692  *  1) the current tracer might hold the runqueue lock when it wakes up
3693  *     a reader, hence a deadlock (sched, function, and function graph tracers)
3694  *  2) the function tracers, trace all functions, we don't want
3695  *     the overhead of calling wake_up and friends
3696  *     (and tracing them too)
3697  *
3698  *     Anyway, this is really very primitive wakeup.
3699  */
3700 void poll_wait_pipe(struct trace_iterator *iter)
3701 {
3702         set_current_state(TASK_INTERRUPTIBLE);
3703         /* sleep for 100 msecs, and try again. */
3704         schedule_timeout(HZ / 10);
3705 }
3706
3707 /* Must be called with trace_types_lock mutex held. */
3708 static int tracing_wait_pipe(struct file *filp)
3709 {
3710         struct trace_iterator *iter = filp->private_data;
3711
3712         while (trace_empty(iter)) {
3713
3714                 if ((filp->f_flags & O_NONBLOCK)) {
3715                         return -EAGAIN;
3716                 }
3717
3718                 mutex_unlock(&iter->mutex);
3719
3720                 iter->trace->wait_pipe(iter);
3721
3722                 mutex_lock(&iter->mutex);
3723
3724                 if (signal_pending(current))
3725                         return -EINTR;
3726
3727                 /*
3728                  * We block until we read something and tracing is disabled.
3729                  * We still block if tracing is disabled, but we have never
3730                  * read anything. This allows a user to cat this file, and
3731                  * then enable tracing. But after we have read something,
3732                  * we give an EOF when tracing is again disabled.
3733                  *
3734                  * iter->pos will be 0 if we haven't read anything.
3735                  */
3736                 if (!tracing_is_enabled() && iter->pos)
3737                         break;
3738         }
3739
3740         return 1;
3741 }
3742
3743 /*
3744  * Consumer reader.
3745  */
3746 static ssize_t
3747 tracing_read_pipe(struct file *filp, char __user *ubuf,
3748                   size_t cnt, loff_t *ppos)
3749 {
3750         struct trace_iterator *iter = filp->private_data;
3751         struct trace_array *tr = iter->tr;
3752         ssize_t sret;
3753
3754         /* return any leftover data */
3755         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3756         if (sret != -EBUSY)
3757                 return sret;
3758
3759         trace_seq_init(&iter->seq);
3760
3761         /* copy the tracer to avoid using a global lock all around */
3762         mutex_lock(&trace_types_lock);
3763         if (unlikely(iter->trace->name != tr->current_trace->name))
3764                 *iter->trace = *tr->current_trace;
3765         mutex_unlock(&trace_types_lock);
3766
3767         /*
3768          * Avoid more than one consumer on a single file descriptor
3769          * This is just a matter of traces coherency, the ring buffer itself
3770          * is protected.
3771          */
3772         mutex_lock(&iter->mutex);
3773         if (iter->trace->read) {
3774                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
3775                 if (sret)
3776                         goto out;
3777         }
3778
3779 waitagain:
3780         sret = tracing_wait_pipe(filp);
3781         if (sret <= 0)
3782                 goto out;
3783
3784         /* stop when tracing is finished */
3785         if (trace_empty(iter)) {
3786                 sret = 0;
3787                 goto out;
3788         }
3789
3790         if (cnt >= PAGE_SIZE)
3791                 cnt = PAGE_SIZE - 1;
3792
3793         /* reset all but tr, trace, and overruns */
3794         memset(&iter->seq, 0,
3795                sizeof(struct trace_iterator) -
3796                offsetof(struct trace_iterator, seq));
3797         iter->pos = -1;
3798
3799         trace_event_read_lock();
3800         trace_access_lock(iter->cpu_file);
3801         while (trace_find_next_entry_inc(iter) != NULL) {
3802                 enum print_line_t ret;
3803                 int len = iter->seq.len;
3804
3805                 ret = print_trace_line(iter);
3806                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
3807                         /* don't print partial lines */
3808                         iter->seq.len = len;
3809                         break;
3810                 }
3811                 if (ret != TRACE_TYPE_NO_CONSUME)
3812                         trace_consume(iter);
3813
3814                 if (iter->seq.len >= cnt)
3815                         break;
3816
3817                 /*
3818                  * Setting the full flag means we reached the trace_seq buffer
3819                  * size and we should leave by partial output condition above.
3820                  * One of the trace_seq_* functions is not used properly.
3821                  */
3822                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
3823                           iter->ent->type);
3824         }
3825         trace_access_unlock(iter->cpu_file);
3826         trace_event_read_unlock();
3827
3828         /* Now copy what we have to the user */
3829         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
3830         if (iter->seq.readpos >= iter->seq.len)
3831                 trace_seq_init(&iter->seq);
3832
3833         /*
3834          * If there was nothing to send to user, in spite of consuming trace
3835          * entries, go back to wait for more entries.
3836          */
3837         if (sret == -EBUSY)
3838                 goto waitagain;
3839
3840 out:
3841         mutex_unlock(&iter->mutex);
3842
3843         return sret;
3844 }
3845
3846 static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
3847                                      struct pipe_buffer *buf)
3848 {
3849         __free_page(buf->page);
3850 }
3851
3852 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3853                                      unsigned int idx)
3854 {
3855         __free_page(spd->pages[idx]);
3856 }
3857
3858 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
3859         .can_merge              = 0,
3860         .map                    = generic_pipe_buf_map,
3861         .unmap                  = generic_pipe_buf_unmap,
3862         .confirm                = generic_pipe_buf_confirm,
3863         .release                = tracing_pipe_buf_release,
3864         .steal                  = generic_pipe_buf_steal,
3865         .get                    = generic_pipe_buf_get,
3866 };
3867
3868 static size_t
3869 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
3870 {
3871         size_t count;
3872         int ret;
3873
3874         /* Seq buffer is page-sized, exactly what we need. */
3875         for (;;) {
3876                 count = iter->seq.len;
3877                 ret = print_trace_line(iter);
3878                 count = iter->seq.len - count;
3879                 if (rem < count) {
3880                         rem = 0;
3881                         iter->seq.len -= count;
3882                         break;
3883                 }
3884                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
3885                         iter->seq.len -= count;
3886                         break;
3887                 }
3888
3889                 if (ret != TRACE_TYPE_NO_CONSUME)
3890                         trace_consume(iter);
3891                 rem -= count;
3892                 if (!trace_find_next_entry_inc(iter))   {
3893                         rem = 0;
3894                         iter->ent = NULL;
3895                         break;
3896                 }
3897         }
3898
3899         return rem;
3900 }
3901
3902 static ssize_t tracing_splice_read_pipe(struct file *filp,
3903                                         loff_t *ppos,
3904                                         struct pipe_inode_info *pipe,
3905                                         size_t len,
3906                                         unsigned int flags)
3907 {
3908         struct page *pages_def[PIPE_DEF_BUFFERS];
3909         struct partial_page partial_def[PIPE_DEF_BUFFERS];
3910         struct trace_iterator *iter = filp->private_data;
3911         struct splice_pipe_desc spd = {
3912                 .pages          = pages_def,
3913                 .partial        = partial_def,
3914                 .nr_pages       = 0, /* This gets updated below. */
3915                 .nr_pages_max   = PIPE_DEF_BUFFERS,
3916                 .flags          = flags,
3917                 .ops            = &tracing_pipe_buf_ops,
3918                 .spd_release    = tracing_spd_release_pipe,
3919         };
3920         struct trace_array *tr = iter->tr;
3921         ssize_t ret;
3922         size_t rem;
3923         unsigned int i;
3924
3925         if (splice_grow_spd(pipe, &spd))
3926                 return -ENOMEM;
3927
3928         /* copy the tracer to avoid using a global lock all around */
3929         mutex_lock(&trace_types_lock);
3930         if (unlikely(iter->trace->name != tr->current_trace->name))
3931                 *iter->trace = *tr->current_trace;
3932         mutex_unlock(&trace_types_lock);
3933
3934         mutex_lock(&iter->mutex);
3935
3936         if (iter->trace->splice_read) {
3937                 ret = iter->trace->splice_read(iter, filp,
3938                                                ppos, pipe, len, flags);
3939                 if (ret)
3940                         goto out_err;
3941         }
3942
3943         ret = tracing_wait_pipe(filp);
3944         if (ret <= 0)
3945                 goto out_err;
3946
3947         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
3948                 ret = -EFAULT;
3949                 goto out_err;
3950         }
3951
3952         trace_event_read_lock();
3953         trace_access_lock(iter->cpu_file);
3954
3955         /* Fill as many pages as possible. */
3956         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
3957                 spd.pages[i] = alloc_page(GFP_KERNEL);
3958                 if (!spd.pages[i])
3959                         break;
3960
3961                 rem = tracing_fill_pipe_page(rem, iter);
3962
3963                 /* Copy the data into the page, so we can start over. */
3964                 ret = trace_seq_to_buffer(&iter->seq,
3965                                           page_address(spd.pages[i]),
3966                                           iter->seq.len);
3967                 if (ret < 0) {
3968                         __free_page(spd.pages[i]);
3969                         break;
3970                 }
3971                 spd.partial[i].offset = 0;
3972                 spd.partial[i].len = iter->seq.len;
3973
3974                 trace_seq_init(&iter->seq);
3975         }
3976
3977         trace_access_unlock(iter->cpu_file);
3978         trace_event_read_unlock();
3979         mutex_unlock(&iter->mutex);
3980
3981         spd.nr_pages = i;
3982
3983         ret = splice_to_pipe(pipe, &spd);
3984 out:
3985         splice_shrink_spd(&spd);
3986         return ret;
3987
3988 out_err:
3989         mutex_unlock(&iter->mutex);
3990         goto out;
3991 }
3992
3993 static ssize_t
3994 tracing_entries_read(struct file *filp, char __user *ubuf,
3995                      size_t cnt, loff_t *ppos)
3996 {
3997         struct trace_cpu *tc = filp->private_data;
3998         struct trace_array *tr = tc->tr;
3999         char buf[64];
4000         int r = 0;
4001         ssize_t ret;
4002
4003         mutex_lock(&trace_types_lock);
4004
4005         if (tc->cpu == RING_BUFFER_ALL_CPUS) {
4006                 int cpu, buf_size_same;
4007                 unsigned long size;
4008
4009                 size = 0;
4010                 buf_size_same = 1;
4011                 /* check if all cpu sizes are same */
4012                 for_each_tracing_cpu(cpu) {
4013                         /* fill in the size from first enabled cpu */
4014                         if (size == 0)
4015                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4016                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4017                                 buf_size_same = 0;
4018                                 break;
4019                         }
4020                 }
4021
4022                 if (buf_size_same) {
4023                         if (!ring_buffer_expanded)
4024                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4025                                             size >> 10,
4026                                             trace_buf_size >> 10);
4027                         else
4028                                 r = sprintf(buf, "%lu\n", size >> 10);
4029                 } else
4030                         r = sprintf(buf, "X\n");
4031         } else
4032                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10);
4033
4034         mutex_unlock(&trace_types_lock);
4035
4036         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4037         return ret;
4038 }
4039
4040 static ssize_t
4041 tracing_entries_write(struct file *filp, const char __user *ubuf,
4042                       size_t cnt, loff_t *ppos)
4043 {
4044         struct trace_cpu *tc = filp->private_data;
4045         unsigned long val;
4046         int ret;
4047
4048         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4049         if (ret)
4050                 return ret;
4051
4052         /* must have at least 1 entry */
4053         if (!val)
4054                 return -EINVAL;
4055
4056         /* value is in KB */
4057         val <<= 10;
4058
4059         ret = tracing_resize_ring_buffer(tc->tr, val, tc->cpu);
4060         if (ret < 0)
4061                 return ret;
4062
4063         *ppos += cnt;
4064
4065         return cnt;
4066 }
4067
4068 static ssize_t
4069 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4070                                 size_t cnt, loff_t *ppos)
4071 {
4072         struct trace_array *tr = filp->private_data;
4073         char buf[64];
4074         int r, cpu;
4075         unsigned long size = 0, expanded_size = 0;
4076
4077         mutex_lock(&trace_types_lock);
4078         for_each_tracing_cpu(cpu) {
4079                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4080                 if (!ring_buffer_expanded)
4081                         expanded_size += trace_buf_size >> 10;
4082         }
4083         if (ring_buffer_expanded)
4084                 r = sprintf(buf, "%lu\n", size);
4085         else
4086                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4087         mutex_unlock(&trace_types_lock);
4088
4089         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4090 }
4091
4092 static ssize_t
4093 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4094                           size_t cnt, loff_t *ppos)
4095 {
4096         /*
4097          * There is no need to read what the user has written, this function
4098          * is just to make sure that there is no error when "echo" is used
4099          */
4100
4101         *ppos += cnt;
4102
4103         return cnt;
4104 }
4105
4106 static int
4107 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4108 {
4109         struct trace_array *tr = inode->i_private;
4110
4111         /* disable tracing ? */
4112         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4113                 tracing_off();
4114         /* resize the ring buffer to 0 */
4115         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4116
4117         return 0;
4118 }
4119
4120 static ssize_t
4121 tracing_mark_write(struct file *filp, const char __user *ubuf,
4122                                         size_t cnt, loff_t *fpos)
4123 {
4124         unsigned long addr = (unsigned long)ubuf;
4125         struct ring_buffer_event *event;
4126         struct ring_buffer *buffer;
4127         struct print_entry *entry;
4128         unsigned long irq_flags;
4129         struct page *pages[2];
4130         void *map_page[2];
4131         int nr_pages = 1;
4132         ssize_t written;
4133         int offset;
4134         int size;
4135         int len;
4136         int ret;
4137         int i;
4138
4139         if (tracing_disabled)
4140                 return -EINVAL;
4141
4142         if (!(trace_flags & TRACE_ITER_MARKERS))
4143                 return -EINVAL;
4144
4145         if (cnt > TRACE_BUF_SIZE)
4146                 cnt = TRACE_BUF_SIZE;
4147
4148         /*
4149          * Userspace is injecting traces into the kernel trace buffer.
4150          * We want to be as non intrusive as possible.
4151          * To do so, we do not want to allocate any special buffers
4152          * or take any locks, but instead write the userspace data
4153          * straight into the ring buffer.
4154          *
4155          * First we need to pin the userspace buffer into memory,
4156          * which, most likely it is, because it just referenced it.
4157          * But there's no guarantee that it is. By using get_user_pages_fast()
4158          * and kmap_atomic/kunmap_atomic() we can get access to the
4159          * pages directly. We then write the data directly into the
4160          * ring buffer.
4161          */
4162         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4163
4164         /* check if we cross pages */
4165         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4166                 nr_pages = 2;
4167
4168         offset = addr & (PAGE_SIZE - 1);
4169         addr &= PAGE_MASK;
4170
4171         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4172         if (ret < nr_pages) {
4173                 while (--ret >= 0)
4174                         put_page(pages[ret]);
4175                 written = -EFAULT;
4176                 goto out;
4177         }
4178
4179         for (i = 0; i < nr_pages; i++)
4180                 map_page[i] = kmap_atomic(pages[i]);
4181
4182         local_save_flags(irq_flags);
4183         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4184         buffer = global_trace.trace_buffer.buffer;
4185         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4186                                           irq_flags, preempt_count());
4187         if (!event) {
4188                 /* Ring buffer disabled, return as if not open for write */
4189                 written = -EBADF;
4190                 goto out_unlock;
4191         }
4192
4193         entry = ring_buffer_event_data(event);
4194         entry->ip = _THIS_IP_;
4195
4196         if (nr_pages == 2) {
4197                 len = PAGE_SIZE - offset;
4198                 memcpy(&entry->buf, map_page[0] + offset, len);
4199                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4200         } else
4201                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4202
4203         if (entry->buf[cnt - 1] != '\n') {
4204                 entry->buf[cnt] = '\n';
4205                 entry->buf[cnt + 1] = '\0';
4206         } else
4207                 entry->buf[cnt] = '\0';
4208
4209         __buffer_unlock_commit(buffer, event);
4210
4211         written = cnt;
4212
4213         *fpos += written;
4214
4215  out_unlock:
4216         for (i = 0; i < nr_pages; i++){
4217                 kunmap_atomic(map_page[i]);
4218                 put_page(pages[i]);
4219         }
4220  out:
4221         return written;
4222 }
4223
4224 static int tracing_clock_show(struct seq_file *m, void *v)
4225 {
4226         struct trace_array *tr = m->private;
4227         int i;
4228
4229         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4230                 seq_printf(m,
4231                         "%s%s%s%s", i ? " " : "",
4232                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4233                         i == tr->clock_id ? "]" : "");
4234         seq_putc(m, '\n');
4235
4236         return 0;
4237 }
4238
4239 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4240                                    size_t cnt, loff_t *fpos)
4241 {
4242         struct seq_file *m = filp->private_data;
4243         struct trace_array *tr = m->private;
4244         char buf[64];
4245         const char *clockstr;
4246         int i;
4247
4248         if (cnt >= sizeof(buf))
4249                 return -EINVAL;
4250
4251         if (copy_from_user(&buf, ubuf, cnt))
4252                 return -EFAULT;
4253
4254         buf[cnt] = 0;
4255
4256         clockstr = strstrip(buf);
4257
4258         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4259                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4260                         break;
4261         }
4262         if (i == ARRAY_SIZE(trace_clocks))
4263                 return -EINVAL;
4264
4265         mutex_lock(&trace_types_lock);
4266
4267         tr->clock_id = i;
4268
4269         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4270
4271         /*
4272          * New clock may not be consistent with the previous clock.
4273          * Reset the buffer so that it doesn't have incomparable timestamps.
4274          */
4275         tracing_reset_online_cpus(&global_trace.trace_buffer);
4276
4277 #ifdef CONFIG_TRACER_MAX_TRACE
4278         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4279                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4280         tracing_reset_online_cpus(&global_trace.max_buffer);
4281 #endif
4282
4283         mutex_unlock(&trace_types_lock);
4284
4285         *fpos += cnt;
4286
4287         return cnt;
4288 }
4289
4290 static int tracing_clock_open(struct inode *inode, struct file *file)
4291 {
4292         if (tracing_disabled)
4293                 return -ENODEV;
4294
4295         return single_open(file, tracing_clock_show, inode->i_private);
4296 }
4297
4298 struct ftrace_buffer_info {
4299         struct trace_iterator   iter;
4300         void                    *spare;
4301         unsigned int            read;
4302 };
4303
4304 #ifdef CONFIG_TRACER_SNAPSHOT
4305 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4306 {
4307         struct trace_cpu *tc = inode->i_private;
4308         struct trace_iterator *iter;
4309         struct seq_file *m;
4310         int ret = 0;
4311
4312         if (file->f_mode & FMODE_READ) {
4313                 iter = __tracing_open(inode, file, true);
4314                 if (IS_ERR(iter))
4315                         ret = PTR_ERR(iter);
4316         } else {
4317                 /* Writes still need the seq_file to hold the private data */
4318                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4319                 if (!m)
4320                         return -ENOMEM;
4321                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4322                 if (!iter) {
4323                         kfree(m);
4324                         return -ENOMEM;
4325                 }
4326                 iter->tr = tc->tr;
4327                 iter->trace_buffer = &tc->tr->max_buffer;
4328                 iter->cpu_file = tc->cpu;
4329                 m->private = iter;
4330                 file->private_data = m;
4331         }
4332
4333         return ret;
4334 }
4335
4336 static ssize_t
4337 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4338                        loff_t *ppos)
4339 {
4340         struct seq_file *m = filp->private_data;
4341         struct trace_iterator *iter = m->private;
4342         struct trace_array *tr = iter->tr;
4343         unsigned long val;
4344         int ret;
4345
4346         ret = tracing_update_buffers();
4347         if (ret < 0)
4348                 return ret;
4349
4350         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4351         if (ret)
4352                 return ret;
4353
4354         mutex_lock(&trace_types_lock);
4355
4356         if (tr->current_trace->use_max_tr) {
4357                 ret = -EBUSY;
4358                 goto out;
4359         }
4360
4361         switch (val) {
4362         case 0:
4363                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4364                         ret = -EINVAL;
4365                         break;
4366                 }
4367                 if (tr->allocated_snapshot) {
4368                         /* free spare buffer */
4369                         ring_buffer_resize(tr->max_buffer.buffer, 1,
4370                                            RING_BUFFER_ALL_CPUS);
4371                         set_buffer_entries(&tr->max_buffer, 1);
4372                         tracing_reset_online_cpus(&tr->max_buffer);
4373                         tr->allocated_snapshot = false;
4374                 }
4375                 break;
4376         case 1:
4377 /* Only allow per-cpu swap if the ring buffer supports it */
4378 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4379                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4380                         ret = -EINVAL;
4381                         break;
4382                 }
4383 #endif
4384                 if (!tr->allocated_snapshot) {
4385                         /* allocate spare buffer */
4386                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
4387                                         &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
4388                         if (ret < 0)
4389                                 break;
4390                         tr->allocated_snapshot = true;
4391                 }
4392                 local_irq_disable();
4393                 /* Now, we're going to swap */
4394                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4395                         update_max_tr(tr, current, smp_processor_id());
4396                 else
4397                         update_max_tr_single(tr, current, iter->cpu_file);
4398                 local_irq_enable();
4399                 break;
4400         default:
4401                 if (tr->allocated_snapshot) {
4402                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4403                                 tracing_reset_online_cpus(&tr->max_buffer);
4404                         else
4405                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4406                 }
4407                 break;
4408         }
4409
4410         if (ret >= 0) {
4411                 *ppos += cnt;
4412                 ret = cnt;
4413         }
4414 out:
4415         mutex_unlock(&trace_types_lock);
4416         return ret;
4417 }
4418
4419 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4420 {
4421         struct seq_file *m = file->private_data;
4422
4423         if (file->f_mode & FMODE_READ)
4424                 return tracing_release(inode, file);
4425
4426         /* If write only, the seq_file is just a stub */
4427         if (m)
4428                 kfree(m->private);
4429         kfree(m);
4430
4431         return 0;
4432 }
4433
4434 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4435 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4436                                     size_t count, loff_t *ppos);
4437 static int tracing_buffers_release(struct inode *inode, struct file *file);
4438 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4439                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4440
4441 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4442 {
4443         struct ftrace_buffer_info *info;
4444         int ret;
4445
4446         ret = tracing_buffers_open(inode, filp);
4447         if (ret < 0)
4448                 return ret;
4449
4450         info = filp->private_data;
4451
4452         if (info->iter.trace->use_max_tr) {
4453                 tracing_buffers_release(inode, filp);
4454                 return -EBUSY;
4455         }
4456
4457         info->iter.snapshot = true;
4458         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4459
4460         return ret;
4461 }
4462
4463 #endif /* CONFIG_TRACER_SNAPSHOT */
4464
4465
4466 static const struct file_operations tracing_max_lat_fops = {
4467         .open           = tracing_open_generic,
4468         .read           = tracing_max_lat_read,
4469         .write          = tracing_max_lat_write,
4470         .llseek         = generic_file_llseek,
4471 };
4472
4473 static const struct file_operations set_tracer_fops = {
4474         .open           = tracing_open_generic,
4475         .read           = tracing_set_trace_read,
4476         .write          = tracing_set_trace_write,
4477         .llseek         = generic_file_llseek,
4478 };
4479
4480 static const struct file_operations tracing_pipe_fops = {
4481         .open           = tracing_open_pipe,
4482         .poll           = tracing_poll_pipe,
4483         .read           = tracing_read_pipe,
4484         .splice_read    = tracing_splice_read_pipe,
4485         .release        = tracing_release_pipe,
4486         .llseek         = no_llseek,
4487 };
4488
4489 static const struct file_operations tracing_entries_fops = {
4490         .open           = tracing_open_generic,
4491         .read           = tracing_entries_read,
4492         .write          = tracing_entries_write,
4493         .llseek         = generic_file_llseek,
4494 };
4495
4496 static const struct file_operations tracing_total_entries_fops = {
4497         .open           = tracing_open_generic,
4498         .read           = tracing_total_entries_read,
4499         .llseek         = generic_file_llseek,
4500 };
4501
4502 static const struct file_operations tracing_free_buffer_fops = {
4503         .write          = tracing_free_buffer_write,
4504         .release        = tracing_free_buffer_release,
4505 };
4506
4507 static const struct file_operations tracing_mark_fops = {
4508         .open           = tracing_open_generic,
4509         .write          = tracing_mark_write,
4510         .llseek         = generic_file_llseek,
4511 };
4512
4513 static const struct file_operations trace_clock_fops = {
4514         .open           = tracing_clock_open,
4515         .read           = seq_read,
4516         .llseek         = seq_lseek,
4517         .release        = single_release,
4518         .write          = tracing_clock_write,
4519 };
4520
4521 #ifdef CONFIG_TRACER_SNAPSHOT
4522 static const struct file_operations snapshot_fops = {
4523         .open           = tracing_snapshot_open,
4524         .read           = seq_read,
4525         .write          = tracing_snapshot_write,
4526         .llseek         = tracing_seek,
4527         .release        = tracing_snapshot_release,
4528 };
4529
4530 static const struct file_operations snapshot_raw_fops = {
4531         .open           = snapshot_raw_open,
4532         .read           = tracing_buffers_read,
4533         .release        = tracing_buffers_release,
4534         .splice_read    = tracing_buffers_splice_read,
4535         .llseek         = no_llseek,
4536 };
4537
4538 #endif /* CONFIG_TRACER_SNAPSHOT */
4539
4540 static int tracing_buffers_open(struct inode *inode, struct file *filp)
4541 {
4542         struct trace_cpu *tc = inode->i_private;
4543         struct trace_array *tr = tc->tr;
4544         struct ftrace_buffer_info *info;
4545
4546         if (tracing_disabled)
4547                 return -ENODEV;
4548
4549         info = kzalloc(sizeof(*info), GFP_KERNEL);
4550         if (!info)
4551                 return -ENOMEM;
4552
4553         mutex_lock(&trace_types_lock);
4554
4555         tr->ref++;
4556
4557         info->iter.tr           = tr;
4558         info->iter.cpu_file     = tc->cpu;
4559         info->iter.trace        = tr->current_trace;
4560         info->iter.trace_buffer = &tr->trace_buffer;
4561         info->spare             = NULL;
4562         /* Force reading ring buffer for first read */
4563         info->read              = (unsigned int)-1;
4564
4565         filp->private_data = info;
4566
4567         mutex_unlock(&trace_types_lock);
4568
4569         return nonseekable_open(inode, filp);
4570 }
4571
4572 static unsigned int
4573 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
4574 {
4575         struct ftrace_buffer_info *info = filp->private_data;
4576         struct trace_iterator *iter = &info->iter;
4577
4578         return trace_poll(iter, filp, poll_table);
4579 }
4580
4581 static ssize_t
4582 tracing_buffers_read(struct file *filp, char __user *ubuf,
4583                      size_t count, loff_t *ppos)
4584 {
4585         struct ftrace_buffer_info *info = filp->private_data;
4586         struct trace_iterator *iter = &info->iter;
4587         ssize_t ret;
4588         ssize_t size;
4589
4590         if (!count)
4591                 return 0;
4592
4593         mutex_lock(&trace_types_lock);
4594
4595 #ifdef CONFIG_TRACER_MAX_TRACE
4596         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
4597                 size = -EBUSY;
4598                 goto out_unlock;
4599         }
4600 #endif
4601
4602         if (!info->spare)
4603                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
4604                                                           iter->cpu_file);
4605         size = -ENOMEM;
4606         if (!info->spare)
4607                 goto out_unlock;
4608
4609         /* Do we have previous read data to read? */
4610         if (info->read < PAGE_SIZE)
4611                 goto read;
4612
4613  again:
4614         trace_access_lock(iter->cpu_file);
4615         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
4616                                     &info->spare,
4617                                     count,
4618                                     iter->cpu_file, 0);
4619         trace_access_unlock(iter->cpu_file);
4620
4621         if (ret < 0) {
4622                 if (trace_empty(iter)) {
4623                         if ((filp->f_flags & O_NONBLOCK)) {
4624                                 size = -EAGAIN;
4625                                 goto out_unlock;
4626                         }
4627                         mutex_unlock(&trace_types_lock);
4628                         iter->trace->wait_pipe(iter);
4629                         mutex_lock(&trace_types_lock);
4630                         if (signal_pending(current)) {
4631                                 size = -EINTR;
4632                                 goto out_unlock;
4633                         }
4634                         goto again;
4635                 }
4636                 size = 0;
4637                 goto out_unlock;
4638         }
4639
4640         info->read = 0;
4641  read:
4642         size = PAGE_SIZE - info->read;
4643         if (size > count)
4644                 size = count;
4645
4646         ret = copy_to_user(ubuf, info->spare + info->read, size);
4647         if (ret == size) {
4648                 size = -EFAULT;
4649                 goto out_unlock;
4650         }
4651         size -= ret;
4652
4653         *ppos += size;
4654         info->read += size;
4655
4656  out_unlock:
4657         mutex_unlock(&trace_types_lock);
4658
4659         return size;
4660 }
4661
4662 static int tracing_buffers_release(struct inode *inode, struct file *file)
4663 {
4664         struct ftrace_buffer_info *info = file->private_data;
4665         struct trace_iterator *iter = &info->iter;
4666
4667         mutex_lock(&trace_types_lock);
4668
4669         WARN_ON(!iter->tr->ref);
4670         iter->tr->ref--;
4671
4672         if (info->spare)
4673                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
4674         kfree(info);
4675
4676         mutex_unlock(&trace_types_lock);
4677
4678         return 0;
4679 }
4680
4681 struct buffer_ref {
4682         struct ring_buffer      *buffer;
4683         void                    *page;
4684         int                     ref;
4685 };
4686
4687 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
4688                                     struct pipe_buffer *buf)
4689 {
4690         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
4691
4692         if (--ref->ref)
4693                 return;
4694
4695         ring_buffer_free_read_page(ref->buffer, ref->page);
4696         kfree(ref);
4697         buf->private = 0;
4698 }
4699
4700 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
4701                                 struct pipe_buffer *buf)
4702 {
4703         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
4704
4705         ref->ref++;
4706 }
4707
4708 /* Pipe buffer operations for a buffer. */
4709 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
4710         .can_merge              = 0,
4711         .map                    = generic_pipe_buf_map,
4712         .unmap                  = generic_pipe_buf_unmap,
4713         .confirm                = generic_pipe_buf_confirm,
4714         .release                = buffer_pipe_buf_release,
4715         .steal                  = generic_pipe_buf_steal,
4716         .get                    = buffer_pipe_buf_get,
4717 };
4718
4719 /*
4720  * Callback from splice_to_pipe(), if we need to release some pages
4721  * at the end of the spd in case we error'ed out in filling the pipe.
4722  */
4723 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
4724 {
4725         struct buffer_ref *ref =
4726                 (struct buffer_ref *)spd->partial[i].private;
4727
4728         if (--ref->ref)
4729                 return;
4730
4731         ring_buffer_free_read_page(ref->buffer, ref->page);
4732         kfree(ref);
4733         spd->partial[i].private = 0;
4734 }
4735
4736 static ssize_t
4737 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4738                             struct pipe_inode_info *pipe, size_t len,
4739                             unsigned int flags)
4740 {
4741         struct ftrace_buffer_info *info = file->private_data;
4742         struct trace_iterator *iter = &info->iter;
4743         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4744         struct page *pages_def[PIPE_DEF_BUFFERS];
4745         struct splice_pipe_desc spd = {
4746                 .pages          = pages_def,
4747                 .partial        = partial_def,
4748                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4749                 .flags          = flags,
4750                 .ops            = &buffer_pipe_buf_ops,
4751                 .spd_release    = buffer_spd_release,
4752         };
4753         struct buffer_ref *ref;
4754         int entries, size, i;
4755         ssize_t ret;
4756
4757         mutex_lock(&trace_types_lock);
4758
4759 #ifdef CONFIG_TRACER_MAX_TRACE
4760         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
4761                 ret = -EBUSY;
4762                 goto out;
4763         }
4764 #endif
4765
4766         if (splice_grow_spd(pipe, &spd)) {
4767                 ret = -ENOMEM;
4768                 goto out;
4769         }
4770
4771         if (*ppos & (PAGE_SIZE - 1)) {
4772                 ret = -EINVAL;
4773                 goto out;
4774         }
4775
4776         if (len & (PAGE_SIZE - 1)) {
4777                 if (len < PAGE_SIZE) {
4778                         ret = -EINVAL;
4779                         goto out;
4780                 }
4781                 len &= PAGE_MASK;
4782         }
4783
4784  again:
4785         trace_access_lock(iter->cpu_file);
4786         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
4787
4788         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
4789                 struct page *page;
4790                 int r;
4791
4792                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
4793                 if (!ref)
4794                         break;
4795
4796                 ref->ref = 1;
4797                 ref->buffer = iter->trace_buffer->buffer;
4798                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
4799                 if (!ref->page) {
4800                         kfree(ref);
4801                         break;
4802                 }
4803
4804                 r = ring_buffer_read_page(ref->buffer, &ref->page,
4805                                           len, iter->cpu_file, 1);
4806                 if (r < 0) {
4807                         ring_buffer_free_read_page(ref->buffer, ref->page);
4808                         kfree(ref);
4809                         break;
4810                 }
4811
4812                 /*
4813                  * zero out any left over data, this is going to
4814                  * user land.
4815                  */
4816                 size = ring_buffer_page_len(ref->page);
4817                 if (size < PAGE_SIZE)
4818                         memset(ref->page + size, 0, PAGE_SIZE - size);
4819
4820                 page = virt_to_page(ref->page);
4821
4822                 spd.pages[i] = page;
4823                 spd.partial[i].len = PAGE_SIZE;
4824                 spd.partial[i].offset = 0;
4825                 spd.partial[i].private = (unsigned long)ref;
4826                 spd.nr_pages++;
4827                 *ppos += PAGE_SIZE;
4828
4829                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
4830         }
4831
4832         trace_access_unlock(iter->cpu_file);
4833         spd.nr_pages = i;
4834
4835         /* did we read anything? */
4836         if (!spd.nr_pages) {
4837                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
4838                         ret = -EAGAIN;
4839                         goto out;
4840                 }
4841                 mutex_unlock(&trace_types_lock);
4842                 iter->trace->wait_pipe(iter);
4843                 mutex_lock(&trace_types_lock);
4844                 if (signal_pending(current)) {
4845                         ret = -EINTR;
4846                         goto out;
4847                 }
4848                 goto again;
4849         }
4850
4851         ret = splice_to_pipe(pipe, &spd);
4852         splice_shrink_spd(&spd);
4853 out:
4854         mutex_unlock(&trace_types_lock);
4855
4856         return ret;
4857 }
4858
4859 static const struct file_operations tracing_buffers_fops = {
4860         .open           = tracing_buffers_open,
4861         .read           = tracing_buffers_read,
4862         .poll           = tracing_buffers_poll,
4863         .release        = tracing_buffers_release,
4864         .splice_read    = tracing_buffers_splice_read,
4865         .llseek         = no_llseek,
4866 };
4867
4868 static ssize_t
4869 tracing_stats_read(struct file *filp, char __user *ubuf,
4870                    size_t count, loff_t *ppos)
4871 {
4872         struct trace_cpu *tc = filp->private_data;
4873         struct trace_array *tr = tc->tr;
4874         struct trace_buffer *trace_buf = &tr->trace_buffer;
4875         struct trace_seq *s;
4876         unsigned long cnt;
4877         unsigned long long t;
4878         unsigned long usec_rem;
4879         int cpu = tc->cpu;
4880
4881         s = kmalloc(sizeof(*s), GFP_KERNEL);
4882         if (!s)
4883                 return -ENOMEM;
4884
4885         trace_seq_init(s);
4886
4887         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
4888         trace_seq_printf(s, "entries: %ld\n", cnt);
4889
4890         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
4891         trace_seq_printf(s, "overrun: %ld\n", cnt);
4892
4893         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
4894         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
4895
4896         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
4897         trace_seq_printf(s, "bytes: %ld\n", cnt);
4898
4899         if (trace_clocks[trace_clock_id].in_ns) {
4900                 /* local or global for trace_clock */
4901                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
4902                 usec_rem = do_div(t, USEC_PER_SEC);
4903                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
4904                                                                 t, usec_rem);
4905
4906                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
4907                 usec_rem = do_div(t, USEC_PER_SEC);
4908                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
4909         } else {
4910                 /* counter or tsc mode for trace_clock */
4911                 trace_seq_printf(s, "oldest event ts: %llu\n",
4912                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
4913
4914                 trace_seq_printf(s, "now ts: %llu\n",
4915                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
4916         }
4917
4918         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
4919         trace_seq_printf(s, "dropped events: %ld\n", cnt);
4920
4921         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
4922         trace_seq_printf(s, "read events: %ld\n", cnt);
4923
4924         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
4925
4926         kfree(s);
4927
4928         return count;
4929 }
4930
4931 static const struct file_operations tracing_stats_fops = {
4932         .open           = tracing_open_generic,
4933         .read           = tracing_stats_read,
4934         .llseek         = generic_file_llseek,
4935 };
4936
4937 #ifdef CONFIG_DYNAMIC_FTRACE
4938
4939 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
4940 {
4941         return 0;
4942 }
4943
4944 static ssize_t
4945 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
4946                   size_t cnt, loff_t *ppos)
4947 {
4948         static char ftrace_dyn_info_buffer[1024];
4949         static DEFINE_MUTEX(dyn_info_mutex);
4950         unsigned long *p = filp->private_data;
4951         char *buf = ftrace_dyn_info_buffer;
4952         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
4953         int r;
4954
4955         mutex_lock(&dyn_info_mutex);
4956         r = sprintf(buf, "%ld ", *p);
4957
4958         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
4959         buf[r++] = '\n';
4960
4961         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4962
4963         mutex_unlock(&dyn_info_mutex);
4964
4965         return r;
4966 }
4967
4968 static const struct file_operations tracing_dyn_info_fops = {
4969         .open           = tracing_open_generic,
4970         .read           = tracing_read_dyn_info,
4971         .llseek         = generic_file_llseek,
4972 };
4973 #endif
4974
4975 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
4976 {
4977         static int once;
4978
4979         if (tr->dir)
4980                 return tr->dir;
4981
4982         if (!debugfs_initialized())
4983                 return NULL;
4984
4985         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
4986                 tr->dir = debugfs_create_dir("tracing", NULL);
4987
4988         if (!tr->dir && !once) {
4989                 once = 1;
4990                 pr_warning("Could not create debugfs directory 'tracing'\n");
4991                 return NULL;
4992         }
4993
4994         return tr->dir;
4995 }
4996
4997 struct dentry *tracing_init_dentry(void)
4998 {
4999         return tracing_init_dentry_tr(&global_trace);
5000 }
5001
5002 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5003 {
5004         struct dentry *d_tracer;
5005
5006         if (tr->percpu_dir)
5007                 return tr->percpu_dir;
5008
5009         d_tracer = tracing_init_dentry_tr(tr);
5010         if (!d_tracer)
5011                 return NULL;
5012
5013         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5014
5015         WARN_ONCE(!tr->percpu_dir,
5016                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5017
5018         return tr->percpu_dir;
5019 }
5020
5021 static void
5022 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5023 {
5024         struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu);
5025         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5026         struct dentry *d_cpu;
5027         char cpu_dir[30]; /* 30 characters should be more than enough */
5028
5029         if (!d_percpu)
5030                 return;
5031
5032         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5033         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5034         if (!d_cpu) {
5035                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5036                 return;
5037         }
5038
5039         /* per cpu trace_pipe */
5040         trace_create_file("trace_pipe", 0444, d_cpu,
5041                         (void *)&data->trace_cpu, &tracing_pipe_fops);
5042
5043         /* per cpu trace */
5044         trace_create_file("trace", 0644, d_cpu,
5045                         (void *)&data->trace_cpu, &tracing_fops);
5046
5047         trace_create_file("trace_pipe_raw", 0444, d_cpu,
5048                         (void *)&data->trace_cpu, &tracing_buffers_fops);
5049
5050         trace_create_file("stats", 0444, d_cpu,
5051                         (void *)&data->trace_cpu, &tracing_stats_fops);
5052
5053         trace_create_file("buffer_size_kb", 0444, d_cpu,
5054                         (void *)&data->trace_cpu, &tracing_entries_fops);
5055
5056 #ifdef CONFIG_TRACER_SNAPSHOT
5057         trace_create_file("snapshot", 0644, d_cpu,
5058                           (void *)&data->trace_cpu, &snapshot_fops);
5059
5060         trace_create_file("snapshot_raw", 0444, d_cpu,
5061                         (void *)&data->trace_cpu, &snapshot_raw_fops);
5062 #endif
5063 }
5064
5065 #ifdef CONFIG_FTRACE_SELFTEST
5066 /* Let selftest have access to static functions in this file */
5067 #include "trace_selftest.c"
5068 #endif
5069
5070 struct trace_option_dentry {
5071         struct tracer_opt               *opt;
5072         struct tracer_flags             *flags;
5073         struct trace_array              *tr;
5074         struct dentry                   *entry;
5075 };
5076
5077 static ssize_t
5078 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5079                         loff_t *ppos)
5080 {
5081         struct trace_option_dentry *topt = filp->private_data;
5082         char *buf;
5083
5084         if (topt->flags->val & topt->opt->bit)
5085                 buf = "1\n";
5086         else
5087                 buf = "0\n";
5088
5089         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5090 }
5091
5092 static ssize_t
5093 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5094                          loff_t *ppos)
5095 {
5096         struct trace_option_dentry *topt = filp->private_data;
5097         unsigned long val;
5098         int ret;
5099
5100         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5101         if (ret)
5102                 return ret;
5103
5104         if (val != 0 && val != 1)
5105                 return -EINVAL;
5106
5107         if (!!(topt->flags->val & topt->opt->bit) != val) {
5108                 mutex_lock(&trace_types_lock);
5109                 ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5110                                           topt->opt, !val);
5111                 mutex_unlock(&trace_types_lock);
5112                 if (ret)
5113                         return ret;
5114         }
5115
5116         *ppos += cnt;
5117
5118         return cnt;
5119 }
5120
5121
5122 static const struct file_operations trace_options_fops = {
5123         .open = tracing_open_generic,
5124         .read = trace_options_read,
5125         .write = trace_options_write,
5126         .llseek = generic_file_llseek,
5127 };
5128
5129 static ssize_t
5130 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5131                         loff_t *ppos)
5132 {
5133         long index = (long)filp->private_data;
5134         char *buf;
5135
5136         if (trace_flags & (1 << index))
5137                 buf = "1\n";
5138         else
5139                 buf = "0\n";
5140
5141         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5142 }
5143
5144 static ssize_t
5145 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5146                          loff_t *ppos)
5147 {
5148         struct trace_array *tr = &global_trace;
5149         long index = (long)filp->private_data;
5150         unsigned long val;
5151         int ret;
5152
5153         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5154         if (ret)
5155                 return ret;
5156
5157         if (val != 0 && val != 1)
5158                 return -EINVAL;
5159
5160         mutex_lock(&trace_types_lock);
5161         ret = set_tracer_flag(tr, 1 << index, val);
5162         mutex_unlock(&trace_types_lock);
5163
5164         if (ret < 0)
5165                 return ret;
5166
5167         *ppos += cnt;
5168
5169         return cnt;
5170 }
5171
5172 static const struct file_operations trace_options_core_fops = {
5173         .open = tracing_open_generic,
5174         .read = trace_options_core_read,
5175         .write = trace_options_core_write,
5176         .llseek = generic_file_llseek,
5177 };
5178
5179 struct dentry *trace_create_file(const char *name,
5180                                  umode_t mode,
5181                                  struct dentry *parent,
5182                                  void *data,
5183                                  const struct file_operations *fops)
5184 {
5185         struct dentry *ret;
5186
5187         ret = debugfs_create_file(name, mode, parent, data, fops);
5188         if (!ret)
5189                 pr_warning("Could not create debugfs '%s' entry\n", name);
5190
5191         return ret;
5192 }
5193
5194
5195 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5196 {
5197         struct dentry *d_tracer;
5198
5199         if (tr->options)
5200                 return tr->options;
5201
5202         d_tracer = tracing_init_dentry_tr(tr);
5203         if (!d_tracer)
5204                 return NULL;
5205
5206         tr->options = debugfs_create_dir("options", d_tracer);
5207         if (!tr->options) {
5208                 pr_warning("Could not create debugfs directory 'options'\n");
5209                 return NULL;
5210         }
5211
5212         return tr->options;
5213 }
5214
5215 static void
5216 create_trace_option_file(struct trace_array *tr,
5217                          struct trace_option_dentry *topt,
5218                          struct tracer_flags *flags,
5219                          struct tracer_opt *opt)
5220 {
5221         struct dentry *t_options;
5222
5223         t_options = trace_options_init_dentry(tr);
5224         if (!t_options)
5225                 return;
5226
5227         topt->flags = flags;
5228         topt->opt = opt;
5229         topt->tr = tr;
5230
5231         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5232                                     &trace_options_fops);
5233
5234 }
5235
5236 static struct trace_option_dentry *
5237 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5238 {
5239         struct trace_option_dentry *topts;
5240         struct tracer_flags *flags;
5241         struct tracer_opt *opts;
5242         int cnt;
5243
5244         if (!tracer)
5245                 return NULL;
5246
5247         flags = tracer->flags;
5248
5249         if (!flags || !flags->opts)
5250                 return NULL;
5251
5252         opts = flags->opts;
5253
5254         for (cnt = 0; opts[cnt].name; cnt++)
5255                 ;
5256
5257         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5258         if (!topts)
5259                 return NULL;
5260
5261         for (cnt = 0; opts[cnt].name; cnt++)
5262                 create_trace_option_file(tr, &topts[cnt], flags,
5263                                          &opts[cnt]);
5264
5265         return topts;
5266 }
5267
5268 static void
5269 destroy_trace_option_files(struct trace_option_dentry *topts)
5270 {
5271         int cnt;
5272
5273         if (!topts)
5274                 return;
5275
5276         for (cnt = 0; topts[cnt].opt; cnt++) {
5277                 if (topts[cnt].entry)
5278                         debugfs_remove(topts[cnt].entry);
5279         }
5280
5281         kfree(topts);
5282 }
5283
5284 static struct dentry *
5285 create_trace_option_core_file(struct trace_array *tr,
5286                               const char *option, long index)
5287 {
5288         struct dentry *t_options;
5289
5290         t_options = trace_options_init_dentry(tr);
5291         if (!t_options)
5292                 return NULL;
5293
5294         return trace_create_file(option, 0644, t_options, (void *)index,
5295                                     &trace_options_core_fops);
5296 }
5297
5298 static __init void create_trace_options_dir(struct trace_array *tr)
5299 {
5300         struct dentry *t_options;
5301         int i;
5302
5303         t_options = trace_options_init_dentry(tr);
5304         if (!t_options)
5305                 return;
5306
5307         for (i = 0; trace_options[i]; i++)
5308                 create_trace_option_core_file(tr, trace_options[i], i);
5309 }
5310
5311 static ssize_t
5312 rb_simple_read(struct file *filp, char __user *ubuf,
5313                size_t cnt, loff_t *ppos)
5314 {
5315         struct trace_array *tr = filp->private_data;
5316         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5317         char buf[64];
5318         int r;
5319
5320         if (buffer)
5321                 r = ring_buffer_record_is_on(buffer);
5322         else
5323                 r = 0;
5324
5325         r = sprintf(buf, "%d\n", r);
5326
5327         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5328 }
5329
5330 static ssize_t
5331 rb_simple_write(struct file *filp, const char __user *ubuf,
5332                 size_t cnt, loff_t *ppos)
5333 {
5334         struct trace_array *tr = filp->private_data;
5335         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5336         unsigned long val;
5337         int ret;
5338
5339         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5340         if (ret)
5341                 return ret;
5342
5343         if (buffer) {
5344                 mutex_lock(&trace_types_lock);
5345                 if (val) {
5346                         ring_buffer_record_on(buffer);
5347                         if (tr->current_trace->start)
5348                                 tr->current_trace->start(tr);
5349                 } else {
5350                         ring_buffer_record_off(buffer);
5351                         if (tr->current_trace->stop)
5352                                 tr->current_trace->stop(tr);
5353                 }
5354                 mutex_unlock(&trace_types_lock);
5355         }
5356
5357         (*ppos)++;
5358
5359         return cnt;
5360 }
5361
5362 static const struct file_operations rb_simple_fops = {
5363         .open           = tracing_open_generic,
5364         .read           = rb_simple_read,
5365         .write          = rb_simple_write,
5366         .llseek         = default_llseek,
5367 };
5368
5369 struct dentry *trace_instance_dir;
5370
5371 static void
5372 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5373
5374 static void init_trace_buffers(struct trace_array *tr, struct trace_buffer *buf)
5375 {
5376         int cpu;
5377
5378         for_each_tracing_cpu(cpu) {
5379                 memset(per_cpu_ptr(buf->data, cpu), 0, sizeof(struct trace_array_cpu));
5380                 per_cpu_ptr(buf->data, cpu)->trace_cpu.cpu = cpu;
5381                 per_cpu_ptr(buf->data, cpu)->trace_cpu.tr = tr;
5382         }
5383 }
5384
5385 static int allocate_trace_buffers(struct trace_array *tr, int size)
5386 {
5387         enum ring_buffer_flags rb_flags;
5388
5389         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5390
5391         tr->trace_buffer.buffer = ring_buffer_alloc(size, rb_flags);
5392         if (!tr->trace_buffer.buffer)
5393                 goto out_free;
5394
5395         tr->trace_buffer.data = alloc_percpu(struct trace_array_cpu);
5396         if (!tr->trace_buffer.data)
5397                 goto out_free;
5398
5399         init_trace_buffers(tr, &tr->trace_buffer);
5400
5401         /* Allocate the first page for all buffers */
5402         set_buffer_entries(&tr->trace_buffer,
5403                            ring_buffer_size(tr->trace_buffer.buffer, 0));
5404
5405 #ifdef CONFIG_TRACER_MAX_TRACE
5406
5407         tr->max_buffer.buffer = ring_buffer_alloc(1, rb_flags);
5408         if (!tr->max_buffer.buffer)
5409                 goto out_free;
5410
5411         tr->max_buffer.data = alloc_percpu(struct trace_array_cpu);
5412         if (!tr->max_buffer.data)
5413                 goto out_free;
5414
5415         init_trace_buffers(tr, &tr->max_buffer);
5416
5417         set_buffer_entries(&tr->max_buffer, 1);
5418 #endif
5419         return 0;
5420
5421  out_free:
5422         if (tr->trace_buffer.buffer)
5423                 ring_buffer_free(tr->trace_buffer.buffer);
5424         free_percpu(tr->trace_buffer.data);
5425
5426 #ifdef CONFIG_TRACER_MAX_TRACE
5427         if (tr->max_buffer.buffer)
5428                 ring_buffer_free(tr->max_buffer.buffer);
5429         free_percpu(tr->max_buffer.data);
5430 #endif
5431         return -ENOMEM;
5432 }
5433
5434 static int new_instance_create(const char *name)
5435 {
5436         struct trace_array *tr;
5437         int ret;
5438
5439         mutex_lock(&trace_types_lock);
5440
5441         ret = -EEXIST;
5442         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
5443                 if (tr->name && strcmp(tr->name, name) == 0)
5444                         goto out_unlock;
5445         }
5446
5447         ret = -ENOMEM;
5448         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
5449         if (!tr)
5450                 goto out_unlock;
5451
5452         tr->name = kstrdup(name, GFP_KERNEL);
5453         if (!tr->name)
5454                 goto out_free_tr;
5455
5456         raw_spin_lock_init(&tr->start_lock);
5457
5458         tr->current_trace = &nop_trace;
5459
5460         INIT_LIST_HEAD(&tr->systems);
5461         INIT_LIST_HEAD(&tr->events);
5462
5463         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
5464                 goto out_free_tr;
5465
5466         /* Holder for file callbacks */
5467         tr->trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
5468         tr->trace_cpu.tr = tr;
5469
5470         tr->dir = debugfs_create_dir(name, trace_instance_dir);
5471         if (!tr->dir)
5472                 goto out_free_tr;
5473
5474         ret = event_trace_add_tracer(tr->dir, tr);
5475         if (ret)
5476                 goto out_free_tr;
5477
5478         init_tracer_debugfs(tr, tr->dir);
5479
5480         list_add(&tr->list, &ftrace_trace_arrays);
5481
5482         mutex_unlock(&trace_types_lock);
5483
5484         return 0;
5485
5486  out_free_tr:
5487         if (tr->trace_buffer.buffer)
5488                 ring_buffer_free(tr->trace_buffer.buffer);
5489         kfree(tr->name);
5490         kfree(tr);
5491
5492  out_unlock:
5493         mutex_unlock(&trace_types_lock);
5494
5495         return ret;
5496
5497 }
5498
5499 static int instance_delete(const char *name)
5500 {
5501         struct trace_array *tr;
5502         int found = 0;
5503         int ret;
5504
5505         mutex_lock(&trace_types_lock);
5506
5507         ret = -ENODEV;
5508         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
5509                 if (tr->name && strcmp(tr->name, name) == 0) {
5510                         found = 1;
5511                         break;
5512                 }
5513         }
5514         if (!found)
5515                 goto out_unlock;
5516
5517         ret = -EBUSY;
5518         if (tr->ref)
5519                 goto out_unlock;
5520
5521         list_del(&tr->list);
5522
5523         event_trace_del_tracer(tr);
5524         debugfs_remove_recursive(tr->dir);
5525         free_percpu(tr->trace_buffer.data);
5526         ring_buffer_free(tr->trace_buffer.buffer);
5527
5528         kfree(tr->name);
5529         kfree(tr);
5530
5531         ret = 0;
5532
5533  out_unlock:
5534         mutex_unlock(&trace_types_lock);
5535
5536         return ret;
5537 }
5538
5539 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
5540 {
5541         struct dentry *parent;
5542         int ret;
5543
5544         /* Paranoid: Make sure the parent is the "instances" directory */
5545         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
5546         if (WARN_ON_ONCE(parent != trace_instance_dir))
5547                 return -ENOENT;
5548
5549         /*
5550          * The inode mutex is locked, but debugfs_create_dir() will also
5551          * take the mutex. As the instances directory can not be destroyed
5552          * or changed in any other way, it is safe to unlock it, and
5553          * let the dentry try. If two users try to make the same dir at
5554          * the same time, then the new_instance_create() will determine the
5555          * winner.
5556          */
5557         mutex_unlock(&inode->i_mutex);
5558
5559         ret = new_instance_create(dentry->d_iname);
5560
5561         mutex_lock(&inode->i_mutex);
5562
5563         return ret;
5564 }
5565
5566 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
5567 {
5568         struct dentry *parent;
5569         int ret;
5570
5571         /* Paranoid: Make sure the parent is the "instances" directory */
5572         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
5573         if (WARN_ON_ONCE(parent != trace_instance_dir))
5574                 return -ENOENT;
5575
5576         /* The caller did a dget() on dentry */
5577         mutex_unlock(&dentry->d_inode->i_mutex);
5578
5579         /*
5580          * The inode mutex is locked, but debugfs_create_dir() will also
5581          * take the mutex. As the instances directory can not be destroyed
5582          * or changed in any other way, it is safe to unlock it, and
5583          * let the dentry try. If two users try to make the same dir at
5584          * the same time, then the instance_delete() will determine the
5585          * winner.
5586          */
5587         mutex_unlock(&inode->i_mutex);
5588
5589         ret = instance_delete(dentry->d_iname);
5590
5591         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
5592         mutex_lock(&dentry->d_inode->i_mutex);
5593
5594         return ret;
5595 }
5596
5597 static const struct inode_operations instance_dir_inode_operations = {
5598         .lookup         = simple_lookup,
5599         .mkdir          = instance_mkdir,
5600         .rmdir          = instance_rmdir,
5601 };
5602
5603 static __init void create_trace_instances(struct dentry *d_tracer)
5604 {
5605         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
5606         if (WARN_ON(!trace_instance_dir))
5607                 return;
5608
5609         /* Hijack the dir inode operations, to allow mkdir */
5610         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
5611 }
5612
5613 static void
5614 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
5615 {
5616         int cpu;
5617
5618         trace_create_file("trace_options", 0644, d_tracer,
5619                           tr, &tracing_iter_fops);
5620
5621         trace_create_file("trace", 0644, d_tracer,
5622                         (void *)&tr->trace_cpu, &tracing_fops);
5623
5624         trace_create_file("trace_pipe", 0444, d_tracer,
5625                         (void *)&tr->trace_cpu, &tracing_pipe_fops);
5626
5627         trace_create_file("buffer_size_kb", 0644, d_tracer,
5628                         (void *)&tr->trace_cpu, &tracing_entries_fops);
5629
5630         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
5631                           tr, &tracing_total_entries_fops);
5632
5633         trace_create_file("free_buffer", 0644, d_tracer,
5634                           tr, &tracing_free_buffer_fops);
5635
5636         trace_create_file("trace_marker", 0220, d_tracer,
5637                           tr, &tracing_mark_fops);
5638
5639         trace_create_file("trace_clock", 0644, d_tracer, tr,
5640                           &trace_clock_fops);
5641
5642         trace_create_file("tracing_on", 0644, d_tracer,
5643                             tr, &rb_simple_fops);
5644
5645 #ifdef CONFIG_TRACER_SNAPSHOT
5646         trace_create_file("snapshot", 0644, d_tracer,
5647                           (void *)&tr->trace_cpu, &snapshot_fops);
5648 #endif
5649
5650         for_each_tracing_cpu(cpu)
5651                 tracing_init_debugfs_percpu(tr, cpu);
5652
5653 }
5654
5655 static __init int tracer_init_debugfs(void)
5656 {
5657         struct dentry *d_tracer;
5658
5659         trace_access_lock_init();
5660
5661         d_tracer = tracing_init_dentry();
5662
5663         init_tracer_debugfs(&global_trace, d_tracer);
5664
5665         trace_create_file("tracing_cpumask", 0644, d_tracer,
5666                         &global_trace, &tracing_cpumask_fops);
5667
5668         trace_create_file("available_tracers", 0444, d_tracer,
5669                         &global_trace, &show_traces_fops);
5670
5671         trace_create_file("current_tracer", 0644, d_tracer,
5672                         &global_trace, &set_tracer_fops);
5673
5674 #ifdef CONFIG_TRACER_MAX_TRACE
5675         trace_create_file("tracing_max_latency", 0644, d_tracer,
5676                         &tracing_max_latency, &tracing_max_lat_fops);
5677 #endif
5678
5679         trace_create_file("tracing_thresh", 0644, d_tracer,
5680                         &tracing_thresh, &tracing_max_lat_fops);
5681
5682         trace_create_file("README", 0444, d_tracer,
5683                         NULL, &tracing_readme_fops);
5684
5685         trace_create_file("saved_cmdlines", 0444, d_tracer,
5686                         NULL, &tracing_saved_cmdlines_fops);
5687
5688 #ifdef CONFIG_DYNAMIC_FTRACE
5689         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
5690                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
5691 #endif
5692
5693         create_trace_instances(d_tracer);
5694
5695         create_trace_options_dir(&global_trace);
5696
5697         return 0;
5698 }
5699
5700 static int trace_panic_handler(struct notifier_block *this,
5701                                unsigned long event, void *unused)
5702 {
5703         if (ftrace_dump_on_oops)
5704                 ftrace_dump(ftrace_dump_on_oops);
5705         return NOTIFY_OK;
5706 }
5707
5708 static struct notifier_block trace_panic_notifier = {
5709         .notifier_call  = trace_panic_handler,
5710         .next           = NULL,
5711         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
5712 };
5713
5714 static int trace_die_handler(struct notifier_block *self,
5715                              unsigned long val,
5716                              void *data)
5717 {
5718         switch (val) {
5719         case DIE_OOPS:
5720                 if (ftrace_dump_on_oops)
5721                         ftrace_dump(ftrace_dump_on_oops);
5722                 break;
5723         default:
5724                 break;
5725         }
5726         return NOTIFY_OK;
5727 }
5728
5729 static struct notifier_block trace_die_notifier = {
5730         .notifier_call = trace_die_handler,
5731         .priority = 200
5732 };
5733
5734 /*
5735  * printk is set to max of 1024, we really don't need it that big.
5736  * Nothing should be printing 1000 characters anyway.
5737  */
5738 #define TRACE_MAX_PRINT         1000
5739
5740 /*
5741  * Define here KERN_TRACE so that we have one place to modify
5742  * it if we decide to change what log level the ftrace dump
5743  * should be at.
5744  */
5745 #define KERN_TRACE              KERN_EMERG
5746
5747 void
5748 trace_printk_seq(struct trace_seq *s)
5749 {
5750         /* Probably should print a warning here. */
5751         if (s->len >= 1000)
5752                 s->len = 1000;
5753
5754         /* should be zero ended, but we are paranoid. */
5755         s->buffer[s->len] = 0;
5756
5757         printk(KERN_TRACE "%s", s->buffer);
5758
5759         trace_seq_init(s);
5760 }
5761
5762 void trace_init_global_iter(struct trace_iterator *iter)
5763 {
5764         iter->tr = &global_trace;
5765         iter->trace = iter->tr->current_trace;
5766         iter->cpu_file = RING_BUFFER_ALL_CPUS;
5767         iter->trace_buffer = &global_trace.trace_buffer;
5768 }
5769
5770 static void
5771 __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
5772 {
5773         static arch_spinlock_t ftrace_dump_lock =
5774                 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
5775         /* use static because iter can be a bit big for the stack */
5776         static struct trace_iterator iter;
5777         unsigned int old_userobj;
5778         static int dump_ran;
5779         unsigned long flags;
5780         int cnt = 0, cpu;
5781
5782         /* only one dump */
5783         local_irq_save(flags);
5784         arch_spin_lock(&ftrace_dump_lock);
5785         if (dump_ran)
5786                 goto out;
5787
5788         dump_ran = 1;
5789
5790         tracing_off();
5791
5792         /* Did function tracer already get disabled? */
5793         if (ftrace_is_dead()) {
5794                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
5795                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
5796         }
5797
5798         if (disable_tracing)
5799                 ftrace_kill();
5800
5801         /* Simulate the iterator */
5802         trace_init_global_iter(&iter);
5803
5804         for_each_tracing_cpu(cpu) {
5805                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
5806         }
5807
5808         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
5809
5810         /* don't look at user memory in panic mode */
5811         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
5812
5813         switch (oops_dump_mode) {
5814         case DUMP_ALL:
5815                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
5816                 break;
5817         case DUMP_ORIG:
5818                 iter.cpu_file = raw_smp_processor_id();
5819                 break;
5820         case DUMP_NONE:
5821                 goto out_enable;
5822         default:
5823                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
5824                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
5825         }
5826
5827         printk(KERN_TRACE "Dumping ftrace buffer:\n");
5828
5829         /*
5830          * We need to stop all tracing on all CPUS to read the
5831          * the next buffer. This is a bit expensive, but is
5832          * not done often. We fill all what we can read,
5833          * and then release the locks again.
5834          */
5835
5836         while (!trace_empty(&iter)) {
5837
5838                 if (!cnt)
5839                         printk(KERN_TRACE "---------------------------------\n");
5840
5841                 cnt++;
5842
5843                 /* reset all but tr, trace, and overruns */
5844                 memset(&iter.seq, 0,
5845                        sizeof(struct trace_iterator) -
5846                        offsetof(struct trace_iterator, seq));
5847                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
5848                 iter.pos = -1;
5849
5850                 if (trace_find_next_entry_inc(&iter) != NULL) {
5851                         int ret;
5852
5853                         ret = print_trace_line(&iter);
5854                         if (ret != TRACE_TYPE_NO_CONSUME)
5855                                 trace_consume(&iter);
5856                 }
5857                 touch_nmi_watchdog();
5858
5859                 trace_printk_seq(&iter.seq);
5860         }
5861
5862         if (!cnt)
5863                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
5864         else
5865                 printk(KERN_TRACE "---------------------------------\n");
5866
5867  out_enable:
5868         /* Re-enable tracing if requested */
5869         if (!disable_tracing) {
5870                 trace_flags |= old_userobj;
5871
5872                 for_each_tracing_cpu(cpu) {
5873                         atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
5874                 }
5875                 tracing_on();
5876         }
5877
5878  out:
5879         arch_spin_unlock(&ftrace_dump_lock);
5880         local_irq_restore(flags);
5881 }
5882
5883 /* By default: disable tracing after the dump */
5884 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
5885 {
5886         __ftrace_dump(true, oops_dump_mode);
5887 }
5888 EXPORT_SYMBOL_GPL(ftrace_dump);
5889
5890 __init static int tracer_alloc_buffers(void)
5891 {
5892         int ring_buf_size;
5893         int ret = -ENOMEM;
5894
5895
5896         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
5897                 goto out;
5898
5899         if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
5900                 goto out_free_buffer_mask;
5901
5902         /* Only allocate trace_printk buffers if a trace_printk exists */
5903         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
5904                 /* Must be called before global_trace.buffer is allocated */
5905                 trace_printk_init_buffers();
5906
5907         /* To save memory, keep the ring buffer size to its minimum */
5908         if (ring_buffer_expanded)
5909                 ring_buf_size = trace_buf_size;
5910         else
5911                 ring_buf_size = 1;
5912
5913         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
5914         cpumask_copy(tracing_cpumask, cpu_all_mask);
5915
5916         raw_spin_lock_init(&global_trace.start_lock);
5917
5918         /* TODO: make the number of buffers hot pluggable with CPUS */
5919         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
5920                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
5921                 WARN_ON(1);
5922                 goto out_free_cpumask;
5923         }
5924
5925         if (global_trace.buffer_disabled)
5926                 tracing_off();
5927
5928         trace_init_cmdlines();
5929
5930         register_tracer(&nop_trace);
5931
5932         global_trace.current_trace = &nop_trace;
5933
5934         /* All seems OK, enable tracing */
5935         tracing_disabled = 0;
5936
5937         atomic_notifier_chain_register(&panic_notifier_list,
5938                                        &trace_panic_notifier);
5939
5940         register_die_notifier(&trace_die_notifier);
5941
5942         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
5943
5944         /* Holder for file callbacks */
5945         global_trace.trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
5946         global_trace.trace_cpu.tr = &global_trace;
5947
5948         INIT_LIST_HEAD(&global_trace.systems);
5949         INIT_LIST_HEAD(&global_trace.events);
5950         list_add(&global_trace.list, &ftrace_trace_arrays);
5951
5952         while (trace_boot_options) {
5953                 char *option;
5954
5955                 option = strsep(&trace_boot_options, ",");
5956                 trace_set_options(&global_trace, option);
5957         }
5958
5959         return 0;
5960
5961 out_free_cpumask:
5962         free_percpu(global_trace.trace_buffer.data);
5963 #ifdef CONFIG_TRACER_MAX_TRACE
5964         free_percpu(global_trace.max_buffer.data);
5965 #endif
5966         free_cpumask_var(tracing_cpumask);
5967 out_free_buffer_mask:
5968         free_cpumask_var(tracing_buffer_mask);
5969 out:
5970         return ret;
5971 }
5972
5973 __init static int clear_boot_tracer(void)
5974 {
5975         /*
5976          * The default tracer at boot buffer is an init section.
5977          * This function is called in lateinit. If we did not
5978          * find the boot tracer, then clear it out, to prevent
5979          * later registration from accessing the buffer that is
5980          * about to be freed.
5981          */
5982         if (!default_bootup_tracer)
5983                 return 0;
5984
5985         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
5986                default_bootup_tracer);
5987         default_bootup_tracer = NULL;
5988
5989         return 0;
5990 }
5991
5992 early_initcall(tracer_alloc_buffers);
5993 fs_initcall(tracer_init_debugfs);
5994 late_initcall(clear_boot_tracer);