tracing: Add internal tracing_snapshot() functions
[firefly-linux-kernel-4.4.55.git] / kernel / trace / trace.c
index 406adbc277a06225fa6a9e58a84a777a7d3be0c0..307524d784ecbeda1a7a0ca05178dbb44f8faed1 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/seq_file.h>
 #include <linux/notifier.h>
 #include <linux/irqflags.h>
-#include <linux/irq_work.h>
 #include <linux/debugfs.h>
 #include <linux/pagemap.h>
 #include <linux/hardirq.h>
@@ -86,14 +85,6 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
  */
 static DEFINE_PER_CPU(bool, trace_cmdline_save);
 
-/*
- * When a reader is waiting for data, then this variable is
- * set to true.
- */
-static bool trace_wakeup_needed;
-
-static struct irq_work trace_work_wakeup;
-
 /*
  * Kill all tracing for good (never come back).
  * It is initialized to 1 but will turn to zero if the initialization
@@ -204,27 +195,15 @@ cycle_t ftrace_now(int cpu)
        u64 ts;
 
        /* Early boot up does not have a buffer yet */
-       if (!global_trace.buffer)
+       if (!global_trace.trace_buffer.buffer)
                return trace_clock_local();
 
-       ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
-       ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
+       ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu);
+       ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts);
 
        return ts;
 }
 
-/*
- * The max_tr is used to snapshot the global_trace when a maximum
- * latency is reached. Some tracers will use this to store a maximum
- * trace while it continues examining live traces.
- *
- * The buffers for the max_tr are set up the same as the global_trace.
- * When a snapshot is taken, the link list of the max_tr is swapped
- * with the link list of the global_trace and the buffers are reset for
- * the global_trace so the tracing can continue.
- */
-static struct trace_array      max_tr;
-
 int tracing_is_enabled(void)
 {
        return tracing_is_on();
@@ -334,28 +313,12 @@ static inline void trace_access_lock_init(void)
 
 #endif
 
-/* trace_wait is a waitqueue for tasks blocked on trace_poll */
-static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
-
 /* trace_flags holds trace_options default values */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
        TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
        TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
        TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS;
 
-/**
- * trace_wake_up - wake up tasks waiting for trace input
- *
- * Schedules a delayed work to wake up any task that is blocked on the
- * trace_wait queue. These is used with trace_poll for tasks polling the
- * trace.
- */
-static void trace_wake_up(struct irq_work *work)
-{
-       wake_up_all(&trace_wait);
-
-}
-
 /**
  * tracing_on - enable tracing buffers
  *
@@ -364,8 +327,8 @@ static void trace_wake_up(struct irq_work *work)
  */
 void tracing_on(void)
 {
-       if (global_trace.buffer)
-               ring_buffer_record_on(global_trace.buffer);
+       if (global_trace.trace_buffer.buffer)
+               ring_buffer_record_on(global_trace.trace_buffer.buffer);
        /*
         * This flag is only looked at when buffers haven't been
         * allocated yet. We don't really care about the race
@@ -376,6 +339,90 @@ void tracing_on(void)
 }
 EXPORT_SYMBOL_GPL(tracing_on);
 
+#ifdef CONFIG_TRACER_SNAPSHOT
+/**
+ * trace_snapshot - take a snapshot of the current buffer.
+ *
+ * This causes a swap between the snapshot buffer and the current live
+ * tracing buffer. You can use this to take snapshots of the live
+ * trace when some condition is triggered, but continue to trace.
+ *
+ * Note, make sure to allocate the snapshot with either
+ * a tracing_snapshot_alloc(), or by doing it manually
+ * with: echo 1 > /sys/kernel/debug/tracing/snapshot
+ *
+ * If the snapshot buffer is not allocated, it will stop tracing.
+ * Basically making a permanent snapshot.
+ */
+void tracing_snapshot(void)
+{
+       struct trace_array *tr = &global_trace;
+       struct tracer *tracer = tr->current_trace;
+       unsigned long flags;
+
+       if (!tr->allocated_snapshot) {
+               trace_printk("*** SNAPSHOT NOT ALLOCATED ***\n");
+               trace_printk("*** stopping trace here!   ***\n");
+               tracing_off();
+               return;
+       }
+
+       /* Note, snapshot can not be used when the tracer uses it */
+       if (tracer->use_max_tr) {
+               trace_printk("*** LATENCY TRACER ACTIVE ***\n");
+               trace_printk("*** Can not use snapshot (sorry) ***\n");
+               return;
+       }
+
+       local_irq_save(flags);
+       update_max_tr(tr, current, smp_processor_id());
+       local_irq_restore(flags);
+}
+
+static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
+                                       struct trace_buffer *size_buf, int cpu_id);
+
+/**
+ * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
+ *
+ * This is similar to trace_snapshot(), but it will allocate the
+ * snapshot buffer if it isn't already allocated. Use this only
+ * where it is safe to sleep, as the allocation may sleep.
+ *
+ * This causes a swap between the snapshot buffer and the current live
+ * tracing buffer. You can use this to take snapshots of the live
+ * trace when some condition is triggered, but continue to trace.
+ */
+void tracing_snapshot_alloc(void)
+{
+       struct trace_array *tr = &global_trace;
+       int ret;
+
+       if (!tr->allocated_snapshot) {
+
+               /* allocate spare buffer */
+               ret = resize_buffer_duplicate_size(&tr->max_buffer,
+                                  &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
+               if (WARN_ON(ret < 0))
+                       return;
+
+               tr->allocated_snapshot = true;
+       }
+
+       tracing_snapshot();
+}
+#else
+void tracing_snapshot(void)
+{
+       WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
+}
+void tracing_snapshot_alloc(void)
+{
+       /* Give warning */
+       tracing_snapshot();
+}
+#endif /* CONFIG_TRACER_SNAPSHOT */
+
 /**
  * tracing_off - turn off tracing buffers
  *
@@ -386,8 +433,8 @@ EXPORT_SYMBOL_GPL(tracing_on);
  */
 void tracing_off(void)
 {
-       if (global_trace.buffer)
-               ring_buffer_record_off(global_trace.buffer);
+       if (global_trace.trace_buffer.buffer)
+               ring_buffer_record_off(global_trace.trace_buffer.buffer);
        /*
         * This flag is only looked at when buffers haven't been
         * allocated yet. We don't really care about the race
@@ -403,8 +450,8 @@ EXPORT_SYMBOL_GPL(tracing_off);
  */
 int tracing_is_on(void)
 {
-       if (global_trace.buffer)
-               return ring_buffer_record_is_on(global_trace.buffer);
+       if (global_trace.trace_buffer.buffer)
+               return ring_buffer_record_is_on(global_trace.trace_buffer.buffer);
        return !global_trace.buffer_disabled;
 }
 EXPORT_SYMBOL_GPL(tracing_is_on);
@@ -662,13 +709,14 @@ unsigned long __read_mostly       tracing_max_latency;
 static void
 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-       struct trace_array_cpu *data = per_cpu_ptr(tr->data, cpu);
-       struct trace_array_cpu *max_data;
+       struct trace_buffer *trace_buf = &tr->trace_buffer;
+       struct trace_buffer *max_buf = &tr->max_buffer;
+       struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
+       struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
 
-       max_tr.cpu = cpu;
-       max_tr.time_start = data->preempt_timestamp;
+       max_buf->cpu = cpu;
+       max_buf->time_start = data->preempt_timestamp;
 
-       max_data = per_cpu_ptr(max_tr.data, cpu);
        max_data->saved_latency = tracing_max_latency;
        max_data->critical_start = data->critical_start;
        max_data->critical_end = data->critical_end;
@@ -703,7 +751,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
        WARN_ON_ONCE(!irqs_disabled());
 
-       if (!tr->current_trace->allocated_snapshot) {
+       if (!tr->allocated_snapshot) {
                /* Only the nop tracer should hit this when disabling */
                WARN_ON_ONCE(tr->current_trace != &nop_trace);
                return;
@@ -711,9 +759,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
        arch_spin_lock(&ftrace_max_lock);
 
-       buf = tr->buffer;
-       tr->buffer = max_tr.buffer;
-       max_tr.buffer = buf;
+       buf = tr->trace_buffer.buffer;
+       tr->trace_buffer.buffer = tr->max_buffer.buffer;
+       tr->max_buffer.buffer = buf;
 
        __update_max_tr(tr, tsk, cpu);
        arch_spin_unlock(&ftrace_max_lock);
@@ -736,12 +784,12 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
                return;
 
        WARN_ON_ONCE(!irqs_disabled());
-       if (WARN_ON_ONCE(!tr->current_trace->allocated_snapshot))
+       if (WARN_ON_ONCE(!tr->allocated_snapshot))
                return;
 
        arch_spin_lock(&ftrace_max_lock);
 
-       ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
+       ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
 
        if (ret == -EBUSY) {
                /*
@@ -750,7 +798,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
                 * the max trace buffer (no one writes directly to it)
                 * and flag that it failed.
                 */
-               trace_array_printk(&max_tr, _THIS_IP_,
+               trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
                        "Failed to swap buffers due to commit in progress\n");
        }
 
@@ -763,36 +811,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 static void default_wait_pipe(struct trace_iterator *iter)
 {
-       DEFINE_WAIT(wait);
-
-       prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
-
-       /*
-        * The events can happen in critical sections where
-        * checking a work queue can cause deadlocks.
-        * After adding a task to the queue, this flag is set
-        * only to notify events to try to wake up the queue
-        * using irq_work.
-        *
-        * We don't clear it even if the buffer is no longer
-        * empty. The flag only causes the next event to run
-        * irq_work to do the work queue wake up. The worse
-        * that can happen if we race with !trace_empty() is that
-        * an event will cause an irq_work to try to wake up
-        * an empty queue.
-        *
-        * There's no reason to protect this flag either, as
-        * the work queue and irq_work logic will do the necessary
-        * synchronization for the wake ups. The only thing
-        * that is necessary is that the wake up happens after
-        * a task has been queued. It's OK for spurious wake ups.
-        */
-       trace_wakeup_needed = true;
-
-       if (trace_empty(iter))
-               schedule();
+       /* Iterators are static, they should be filled or empty */
+       if (trace_buffer_iter(iter, iter->cpu_file))
+               return;
 
-       finish_wait(&trace_wait, &wait);
+       ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
 }
 
 /**
@@ -853,17 +876,19 @@ int register_tracer(struct tracer *type)
                 * internal tracing to verify that everything is in order.
                 * If we fail, we do not register this tracer.
                 */
-               tracing_reset_online_cpus(tr);
+               tracing_reset_online_cpus(&tr->trace_buffer);
 
                tr->current_trace = type;
 
+#ifdef CONFIG_TRACER_MAX_TRACE
                if (type->use_max_tr) {
                        /* If we expanded the buffers, make sure the max is expanded too */
                        if (ring_buffer_expanded)
-                               ring_buffer_resize(max_tr.buffer, trace_buf_size,
+                               ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
                                                   RING_BUFFER_ALL_CPUS);
-                       type->allocated_snapshot = true;
+                       tr->allocated_snapshot = true;
                }
+#endif
 
                /* the test is responsible for initializing and enabling */
                pr_info("Testing tracer %s: ", type->name);
@@ -877,16 +902,18 @@ int register_tracer(struct tracer *type)
                        goto out;
                }
                /* Only reset on passing, to avoid touching corrupted buffers */
-               tracing_reset_online_cpus(tr);
+               tracing_reset_online_cpus(&tr->trace_buffer);
 
+#ifdef CONFIG_TRACER_MAX_TRACE
                if (type->use_max_tr) {
-                       type->allocated_snapshot = false;
+                       tr->allocated_snapshot = false;
 
                        /* Shrink the max buffer again */
                        if (ring_buffer_expanded)
-                               ring_buffer_resize(max_tr.buffer, 1,
+                               ring_buffer_resize(tr->max_buffer.buffer, 1,
                                                   RING_BUFFER_ALL_CPUS);
                }
+#endif
 
                printk(KERN_CONT "PASSED\n");
        }
@@ -920,9 +947,9 @@ int register_tracer(struct tracer *type)
        return ret;
 }
 
-void tracing_reset(struct trace_array *tr, int cpu)
+void tracing_reset(struct trace_buffer *buf, int cpu)
 {
-       struct ring_buffer *buffer = tr->buffer;
+       struct ring_buffer *buffer = buf->buffer;
 
        if (!buffer)
                return;
@@ -936,9 +963,9 @@ void tracing_reset(struct trace_array *tr, int cpu)
        ring_buffer_record_enable(buffer);
 }
 
-void tracing_reset_online_cpus(struct trace_array *tr)
+void tracing_reset_online_cpus(struct trace_buffer *buf)
 {
-       struct ring_buffer *buffer = tr->buffer;
+       struct ring_buffer *buffer = buf->buffer;
        int cpu;
 
        if (!buffer)
@@ -949,7 +976,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
        /* Make sure all commits have finished */
        synchronize_sched();
 
-       tr->time_start = ftrace_now(tr->cpu);
+       buf->time_start = ftrace_now(buf->cpu);
 
        for_each_online_cpu(cpu)
                ring_buffer_reset_cpu(buffer, cpu);
@@ -959,12 +986,21 @@ void tracing_reset_online_cpus(struct trace_array *tr)
 
 void tracing_reset_current(int cpu)
 {
-       tracing_reset(&global_trace, cpu);
+       tracing_reset(&global_trace.trace_buffer, cpu);
 }
 
-void tracing_reset_current_online_cpus(void)
+void tracing_reset_all_online_cpus(void)
 {
-       tracing_reset_online_cpus(&global_trace);
+       struct trace_array *tr;
+
+       mutex_lock(&trace_types_lock);
+       list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+               tracing_reset_online_cpus(&tr->trace_buffer);
+#ifdef CONFIG_TRACER_MAX_TRACE
+               tracing_reset_online_cpus(&tr->max_buffer);
+#endif
+       }
+       mutex_unlock(&trace_types_lock);
 }
 
 #define SAVED_CMDLINES 128
@@ -1032,13 +1068,15 @@ void tracing_start(void)
        /* Prevent the buffers from switching */
        arch_spin_lock(&ftrace_max_lock);
 
-       buffer = global_trace.buffer;
+       buffer = global_trace.trace_buffer.buffer;
        if (buffer)
                ring_buffer_record_enable(buffer);
 
-       buffer = max_tr.buffer;
+#ifdef CONFIG_TRACER_MAX_TRACE
+       buffer = global_trace.max_buffer.buffer;
        if (buffer)
                ring_buffer_record_enable(buffer);
+#endif
 
        arch_spin_unlock(&ftrace_max_lock);
 
@@ -1070,7 +1108,7 @@ static void tracing_start_tr(struct trace_array *tr)
                goto out;
        }
 
-       buffer = tr->buffer;
+       buffer = tr->trace_buffer.buffer;
        if (buffer)
                ring_buffer_record_enable(buffer);
 
@@ -1097,13 +1135,15 @@ void tracing_stop(void)
        /* Prevent the buffers from switching */
        arch_spin_lock(&ftrace_max_lock);
 
-       buffer = global_trace.buffer;
+       buffer = global_trace.trace_buffer.buffer;
        if (buffer)
                ring_buffer_record_disable(buffer);
 
-       buffer = max_tr.buffer;
+#ifdef CONFIG_TRACER_MAX_TRACE
+       buffer = global_trace.max_buffer.buffer;
        if (buffer)
                ring_buffer_record_disable(buffer);
+#endif
 
        arch_spin_unlock(&ftrace_max_lock);
 
@@ -1124,7 +1164,7 @@ static void tracing_stop_tr(struct trace_array *tr)
        if (tr->stop_count++)
                goto out;
 
-       buffer = tr->buffer;
+       buffer = tr->trace_buffer.buffer;
        if (buffer)
                ring_buffer_record_disable(buffer);
 
@@ -1262,11 +1302,6 @@ void
 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
 {
        __this_cpu_write(trace_cmdline_save, true);
-       if (trace_wakeup_needed) {
-               trace_wakeup_needed = false;
-               /* irq_work_queue() supplies it's own memory barriers */
-               irq_work_queue(&trace_work_wakeup);
-       }
        ring_buffer_unlock_commit(buffer, event);
 }
 
@@ -1295,7 +1330,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
                          int type, unsigned long len,
                          unsigned long flags, int pc)
 {
-       *current_rb = ftrace_file->tr->buffer;
+       *current_rb = ftrace_file->tr->trace_buffer.buffer;
        return trace_buffer_lock_reserve(*current_rb,
                                         type, len, flags, pc);
 }
@@ -1306,7 +1341,7 @@ trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
                                  int type, unsigned long len,
                                  unsigned long flags, int pc)
 {
-       *current_rb = global_trace.buffer;
+       *current_rb = global_trace.trace_buffer.buffer;
        return trace_buffer_lock_reserve(*current_rb,
                                         type, len, flags, pc);
 }
@@ -1345,7 +1380,7 @@ trace_function(struct trace_array *tr,
               int pc)
 {
        struct ftrace_event_call *call = &event_function;
-       struct ring_buffer *buffer = tr->buffer;
+       struct ring_buffer *buffer = tr->trace_buffer.buffer;
        struct ring_buffer_event *event;
        struct ftrace_entry *entry;
 
@@ -1486,7 +1521,7 @@ void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
                   int pc)
 {
-       __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL);
+       __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
 }
 
 /**
@@ -1502,7 +1537,8 @@ void trace_dump_stack(void)
        local_save_flags(flags);
 
        /* skipping 3 traces, seems to get us at the caller of this function */
-       __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL);
+       __ftrace_trace_stack(global_trace.trace_buffer.buffer, flags, 3,
+                            preempt_count(), NULL);
 }
 
 static DEFINE_PER_CPU(int, user_stack_count);
@@ -1672,7 +1708,7 @@ void trace_printk_init_buffers(void)
         * directly here. If the global_trace.buffer is already
         * allocated here, then this was called by module code.
         */
-       if (global_trace.buffer)
+       if (global_trace.trace_buffer.buffer)
                tracing_start_cmdline_record();
 }
 
@@ -1732,7 +1768,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 
        local_save_flags(flags);
        size = sizeof(*entry) + sizeof(u32) * len;
-       buffer = tr->buffer;
+       buffer = tr->trace_buffer.buffer;
        event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
                                          flags, pc);
        if (!event)
@@ -1755,27 +1791,12 @@ out:
 }
 EXPORT_SYMBOL_GPL(trace_vbprintk);
 
-int trace_array_printk(struct trace_array *tr,
-                      unsigned long ip, const char *fmt, ...)
-{
-       int ret;
-       va_list ap;
-
-       if (!(trace_flags & TRACE_ITER_PRINTK))
-               return 0;
-
-       va_start(ap, fmt);
-       ret = trace_array_vprintk(tr, ip, fmt, ap);
-       va_end(ap);
-       return ret;
-}
-
-int trace_array_vprintk(struct trace_array *tr,
-                       unsigned long ip, const char *fmt, va_list args)
+static int
+__trace_array_vprintk(struct ring_buffer *buffer,
+                     unsigned long ip, const char *fmt, va_list args)
 {
        struct ftrace_event_call *call = &event_print;
        struct ring_buffer_event *event;
-       struct ring_buffer *buffer;
        int len = 0, size, pc;
        struct print_entry *entry;
        unsigned long flags;
@@ -1803,7 +1824,6 @@ int trace_array_vprintk(struct trace_array *tr,
 
        local_save_flags(flags);
        size = sizeof(*entry) + len + 1;
-       buffer = tr->buffer;
        event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
                                          flags, pc);
        if (!event)
@@ -1824,6 +1844,42 @@ int trace_array_vprintk(struct trace_array *tr,
        return len;
 }
 
+int trace_array_vprintk(struct trace_array *tr,
+                       unsigned long ip, const char *fmt, va_list args)
+{
+       return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
+}
+
+int trace_array_printk(struct trace_array *tr,
+                      unsigned long ip, const char *fmt, ...)
+{
+       int ret;
+       va_list ap;
+
+       if (!(trace_flags & TRACE_ITER_PRINTK))
+               return 0;
+
+       va_start(ap, fmt);
+       ret = trace_array_vprintk(tr, ip, fmt, ap);
+       va_end(ap);
+       return ret;
+}
+
+int trace_array_printk_buf(struct ring_buffer *buffer,
+                          unsigned long ip, const char *fmt, ...)
+{
+       int ret;
+       va_list ap;
+
+       if (!(trace_flags & TRACE_ITER_PRINTK))
+               return 0;
+
+       va_start(ap, fmt);
+       ret = __trace_array_vprintk(buffer, ip, fmt, ap);
+       va_end(ap);
+       return ret;
+}
+
 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 {
        return trace_array_vprintk(&global_trace, ip, fmt, args);
@@ -1849,7 +1905,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
        if (buf_iter)
                event = ring_buffer_iter_peek(buf_iter, ts);
        else
-               event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
+               event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
                                         lost_events);
 
        if (event) {
@@ -1864,7 +1920,7 @@ static struct trace_entry *
 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
                  unsigned long *missing_events, u64 *ent_ts)
 {
-       struct ring_buffer *buffer = iter->tr->buffer;
+       struct ring_buffer *buffer = iter->trace_buffer->buffer;
        struct trace_entry *ent, *next = NULL;
        unsigned long lost_events = 0, next_lost = 0;
        int cpu_file = iter->cpu_file;
@@ -1941,7 +1997,7 @@ void *trace_find_next_entry_inc(struct trace_iterator *iter)
 
 static void trace_consume(struct trace_iterator *iter)
 {
-       ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
+       ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
                            &iter->lost_events);
 }
 
@@ -1974,13 +2030,12 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
 
 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
 {
-       struct trace_array *tr = iter->tr;
        struct ring_buffer_event *event;
        struct ring_buffer_iter *buf_iter;
        unsigned long entries = 0;
        u64 ts;
 
-       per_cpu_ptr(tr->data, cpu)->skipped_entries = 0;
+       per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
 
        buf_iter = trace_buffer_iter(iter, cpu);
        if (!buf_iter)
@@ -1994,13 +2049,13 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
         * by the timestamp being before the start of the buffer.
         */
        while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
-               if (ts >= iter->tr->time_start)
+               if (ts >= iter->trace_buffer->time_start)
                        break;
                entries++;
                ring_buffer_read(buf_iter, NULL);
        }
 
-       per_cpu_ptr(tr->data, cpu)->skipped_entries = entries;
+       per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
 }
 
 /*
@@ -2027,8 +2082,10 @@ static void *s_start(struct seq_file *m, loff_t *pos)
                *iter->trace = *tr->current_trace;
        mutex_unlock(&trace_types_lock);
 
+#ifdef CONFIG_TRACER_MAX_TRACE
        if (iter->snapshot && iter->trace->use_max_tr)
                return ERR_PTR(-EBUSY);
+#endif
 
        if (!iter->snapshot)
                atomic_inc(&trace_record_cmdline_disabled);
@@ -2070,17 +2127,21 @@ static void s_stop(struct seq_file *m, void *p)
 {
        struct trace_iterator *iter = m->private;
 
+#ifdef CONFIG_TRACER_MAX_TRACE
        if (iter->snapshot && iter->trace->use_max_tr)
                return;
+#endif
 
        if (!iter->snapshot)
                atomic_dec(&trace_record_cmdline_disabled);
+
        trace_access_unlock(iter->cpu_file);
        trace_event_read_unlock();
 }
 
 static void
-get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries)
+get_total_entries(struct trace_buffer *buf,
+                 unsigned long *total, unsigned long *entries)
 {
        unsigned long count;
        int cpu;
@@ -2089,19 +2150,19 @@ get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *e
        *entries = 0;
 
        for_each_tracing_cpu(cpu) {
-               count = ring_buffer_entries_cpu(tr->buffer, cpu);
+               count = ring_buffer_entries_cpu(buf->buffer, cpu);
                /*
                 * If this buffer has skipped entries, then we hold all
                 * entries for the trace and we need to ignore the
                 * ones before the time stamp.
                 */
-               if (per_cpu_ptr(tr->data, cpu)->skipped_entries) {
-                       count -= per_cpu_ptr(tr->data, cpu)->skipped_entries;
+               if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
+                       count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
                        /* total is the same as the entries */
                        *total += count;
                } else
                        *total += count +
-                               ring_buffer_overrun_cpu(tr->buffer, cpu);
+                               ring_buffer_overrun_cpu(buf->buffer, cpu);
                *entries += count;
        }
 }
@@ -2118,27 +2179,27 @@ static void print_lat_help_header(struct seq_file *m)
        seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
 }
 
-static void print_event_info(struct trace_array *tr, struct seq_file *m)
+static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
 {
        unsigned long total;
        unsigned long entries;
 
-       get_total_entries(tr, &total, &entries);
+       get_total_entries(buf, &total, &entries);
        seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
                   entries, total, num_online_cpus());
        seq_puts(m, "#\n");
 }
 
-static void print_func_help_header(struct trace_array *tr, struct seq_file *m)
+static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
 {
-       print_event_info(tr, m);
+       print_event_info(buf, m);
        seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
        seq_puts(m, "#              | |       |          |         |\n");
 }
 
-static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m)
+static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
 {
-       print_event_info(tr, m);
+       print_event_info(buf, m);
        seq_puts(m, "#                              _-----=> irqs-off\n");
        seq_puts(m, "#                             / _----=> need-resched\n");
        seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
@@ -2152,8 +2213,8 @@ void
 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 {
        unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
-       struct trace_array *tr = iter->tr;
-       struct trace_array_cpu *data = per_cpu_ptr(tr->data, tr->cpu);
+       struct trace_buffer *buf = iter->trace_buffer;
+       struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
        struct tracer *type = iter->trace;
        unsigned long entries;
        unsigned long total;
@@ -2161,7 +2222,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 
        name = type->name;
 
-       get_total_entries(tr, &total, &entries);
+       get_total_entries(buf, &total, &entries);
 
        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
                   name, UTS_RELEASE);
@@ -2172,7 +2233,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
                   nsecs_to_usecs(data->saved_latency),
                   entries,
                   total,
-                  tr->cpu,
+                  buf->cpu,
 #if defined(CONFIG_PREEMPT_NONE)
                   "server",
 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
@@ -2223,7 +2284,7 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
        if (cpumask_test_cpu(iter->cpu, iter->started))
                return;
 
-       if (per_cpu_ptr(iter->tr->data, iter->cpu)->skipped_entries)
+       if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
                return;
 
        cpumask_set_cpu(iter->cpu, iter->started);
@@ -2353,7 +2414,7 @@ int trace_empty(struct trace_iterator *iter)
                        if (!ring_buffer_iter_empty(buf_iter))
                                return 0;
                } else {
-                       if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
+                       if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
                                return 0;
                }
                return 1;
@@ -2365,7 +2426,7 @@ int trace_empty(struct trace_iterator *iter)
                        if (!ring_buffer_iter_empty(buf_iter))
                                return 0;
                } else {
-                       if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
+                       if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
                                return 0;
                }
        }
@@ -2443,9 +2504,9 @@ void trace_default_header(struct seq_file *m)
        } else {
                if (!(trace_flags & TRACE_ITER_VERBOSE)) {
                        if (trace_flags & TRACE_ITER_IRQ_INFO)
-                               print_func_help_header_irq(iter->tr, m);
+                               print_func_help_header_irq(iter->trace_buffer, m);
                        else
-                               print_func_help_header(iter->tr, m);
+                               print_func_help_header(iter->trace_buffer, m);
                }
        }
 }
@@ -2459,14 +2520,8 @@ static void test_ftrace_alive(struct seq_file *m)
 }
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
+static void show_snapshot_main_help(struct seq_file *m)
 {
-       if (iter->trace->allocated_snapshot)
-               seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
-       else
-               seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
-
-       seq_printf(m, "# Snapshot commands:\n");
        seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
        seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
        seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
@@ -2474,6 +2529,35 @@ static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
        seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
        seq_printf(m, "#                       is not a '0' or '1')\n");
 }
+
+static void show_snapshot_percpu_help(struct seq_file *m)
+{
+       seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
+#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
+       seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
+       seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
+#else
+       seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
+       seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
+#endif
+       seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
+       seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
+       seq_printf(m, "#                       is not a '0' or '1')\n");
+}
+
+static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
+{
+       if (iter->tr->allocated_snapshot)
+               seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
+       else
+               seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
+
+       seq_printf(m, "# Snapshot commands:\n");
+       if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
+               show_snapshot_main_help(m);
+       else
+               show_snapshot_percpu_help(m);
+}
 #else
 /* Should never be called */
 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
@@ -2564,11 +2648,15 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
        if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
                goto fail;
 
+       iter->tr = tr;
+
+#ifdef CONFIG_TRACER_MAX_TRACE
        /* Currently only the top directory has a snapshot */
        if (tr->current_trace->print_max || snapshot)
-               iter->tr = &max_tr;
+               iter->trace_buffer = &tr->max_buffer;
        else
-               iter->tr = tr;
+#endif
+               iter->trace_buffer = &tr->trace_buffer;
        iter->snapshot = snapshot;
        iter->pos = -1;
        mutex_init(&iter->mutex);
@@ -2579,7 +2667,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
                iter->trace->open(iter);
 
        /* Annotate start of buffers if we had overruns */
-       if (ring_buffer_overruns(iter->tr->buffer))
+       if (ring_buffer_overruns(iter->trace_buffer->buffer))
                iter->iter_flags |= TRACE_FILE_ANNOTATE;
 
        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
@@ -2593,7 +2681,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
        if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
                for_each_tracing_cpu(cpu) {
                        iter->buffer_iter[cpu] =
-                               ring_buffer_read_prepare(iter->tr->buffer, cpu);
+                               ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
                }
                ring_buffer_read_prepare_sync();
                for_each_tracing_cpu(cpu) {
@@ -2603,12 +2691,14 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
        } else {
                cpu = iter->cpu_file;
                iter->buffer_iter[cpu] =
-                       ring_buffer_read_prepare(iter->tr->buffer, cpu);
+                       ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
                ring_buffer_read_prepare_sync();
                ring_buffer_read_start(iter->buffer_iter[cpu]);
                tracing_iter_reset(iter, cpu);
        }
 
+       tr->ref++;
+
        mutex_unlock(&trace_types_lock);
 
        return iter;
@@ -2642,14 +2732,13 @@ static int tracing_release(struct inode *inode, struct file *file)
                return 0;
 
        iter = m->private;
-
-       /* Only the global tracer has a matching max_tr */
-       if (iter->tr == &max_tr)
-               tr = &global_trace;
-       else
-               tr = iter->tr;
+       tr = iter->tr;
 
        mutex_lock(&trace_types_lock);
+
+       WARN_ON(!tr->ref);
+       tr->ref--;
+
        for_each_tracing_cpu(cpu) {
                if (iter->buffer_iter[cpu])
                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
@@ -2683,9 +2772,9 @@ static int tracing_open(struct inode *inode, struct file *file)
                struct trace_array *tr = tc->tr;
 
                if (tc->cpu == RING_BUFFER_ALL_CPUS)
-                       tracing_reset_online_cpus(tr);
+                       tracing_reset_online_cpus(&tr->trace_buffer);
                else
-                       tracing_reset(tr, tc->cpu);
+                       tracing_reset(&tr->trace_buffer, tc->cpu);
        }
 
        if (file->f_mode & FMODE_READ) {
@@ -2854,13 +2943,13 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
                 */
                if (cpumask_test_cpu(cpu, tracing_cpumask) &&
                                !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
-                       atomic_inc(&per_cpu_ptr(tr->data, cpu)->disabled);
-                       ring_buffer_record_disable_cpu(tr->buffer, cpu);
+                       atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
+                       ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
                }
                if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
                                cpumask_test_cpu(cpu, tracing_cpumask_new)) {
-                       atomic_dec(&per_cpu_ptr(tr->data, cpu)->disabled);
-                       ring_buffer_record_enable_cpu(tr->buffer, cpu);
+                       atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
+                       ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
                }
        }
        arch_spin_unlock(&ftrace_max_lock);
@@ -2979,9 +3068,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
                trace_event_enable_cmd_record(enabled);
 
        if (mask == TRACE_ITER_OVERWRITE) {
-               ring_buffer_change_overwrite(global_trace.buffer, enabled);
+               ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
 #ifdef CONFIG_TRACER_MAX_TRACE
-               ring_buffer_change_overwrite(max_tr.buffer, enabled);
+               ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
 #endif
        }
 
@@ -3165,42 +3254,45 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
 
 int tracer_init(struct tracer *t, struct trace_array *tr)
 {
-       tracing_reset_online_cpus(tr);
+       tracing_reset_online_cpus(&tr->trace_buffer);
        return t->init(tr);
 }
 
-static void set_buffer_entries(struct trace_array *tr, unsigned long val)
+static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
 {
        int cpu;
+
        for_each_tracing_cpu(cpu)
-               per_cpu_ptr(tr->data, cpu)->entries = val;
+               per_cpu_ptr(buf->data, cpu)->entries = val;
 }
 
+#ifdef CONFIG_TRACER_MAX_TRACE
 /* resize @tr's buffer to the size of @size_tr's entries */
-static int resize_buffer_duplicate_size(struct trace_array *tr,
-                                       struct trace_array *size_tr, int cpu_id)
+static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
+                                       struct trace_buffer *size_buf, int cpu_id)
 {
        int cpu, ret = 0;
 
        if (cpu_id == RING_BUFFER_ALL_CPUS) {
                for_each_tracing_cpu(cpu) {
-                       ret = ring_buffer_resize(tr->buffer,
-                                per_cpu_ptr(size_tr->data, cpu)->entries, cpu);
+                       ret = ring_buffer_resize(trace_buf->buffer,
+                                per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
                        if (ret < 0)
                                break;
-                       per_cpu_ptr(tr->data, cpu)->entries =
-                               per_cpu_ptr(size_tr->data, cpu)->entries;
+                       per_cpu_ptr(trace_buf->data, cpu)->entries =
+                               per_cpu_ptr(size_buf->data, cpu)->entries;
                }
        } else {
-               ret = ring_buffer_resize(tr->buffer,
-                                per_cpu_ptr(size_tr->data, cpu_id)->entries, cpu_id);
+               ret = ring_buffer_resize(trace_buf->buffer,
+                                per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
                if (ret == 0)
-                       per_cpu_ptr(tr->data, cpu_id)->entries =
-                               per_cpu_ptr(size_tr->data, cpu_id)->entries;
+                       per_cpu_ptr(trace_buf->data, cpu_id)->entries =
+                               per_cpu_ptr(size_buf->data, cpu_id)->entries;
        }
 
        return ret;
 }
+#endif /* CONFIG_TRACER_MAX_TRACE */
 
 static int __tracing_resize_ring_buffer(struct trace_array *tr,
                                        unsigned long size, int cpu)
@@ -3215,20 +3307,22 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
        ring_buffer_expanded = 1;
 
        /* May be called before buffers are initialized */
-       if (!tr->buffer)
+       if (!tr->trace_buffer.buffer)
                return 0;
 
-       ret = ring_buffer_resize(tr->buffer, size, cpu);
+       ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
        if (ret < 0)
                return ret;
 
+#ifdef CONFIG_TRACER_MAX_TRACE
        if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
            !tr->current_trace->use_max_tr)
                goto out;
 
-       ret = ring_buffer_resize(max_tr.buffer, size, cpu);
+       ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
        if (ret < 0) {
-               int r = resize_buffer_duplicate_size(tr, tr, cpu);
+               int r = resize_buffer_duplicate_size(&tr->trace_buffer,
+                                                    &tr->trace_buffer, cpu);
                if (r < 0) {
                        /*
                         * AARGH! We are left with different
@@ -3251,15 +3345,17 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
        }
 
        if (cpu == RING_BUFFER_ALL_CPUS)
-               set_buffer_entries(&max_tr, size);
+               set_buffer_entries(&tr->max_buffer, size);
        else
-               per_cpu_ptr(max_tr.data, cpu)->entries = size;
+               per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
 
  out:
+#endif /* CONFIG_TRACER_MAX_TRACE */
+
        if (cpu == RING_BUFFER_ALL_CPUS)
-               set_buffer_entries(tr, size);
+               set_buffer_entries(&tr->trace_buffer, size);
        else
-               per_cpu_ptr(tr->data, cpu)->entries = size;
+               per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
 
        return ret;
 }
@@ -3326,7 +3422,9 @@ static int tracing_set_tracer(const char *buf)
        static struct trace_option_dentry *topts;
        struct trace_array *tr = &global_trace;
        struct tracer *t;
+#ifdef CONFIG_TRACER_MAX_TRACE
        bool had_max_tr;
+#endif
        int ret = 0;
 
        mutex_lock(&trace_types_lock);
@@ -3357,9 +3455,12 @@ static int tracing_set_tracer(const char *buf)
        if (tr->current_trace->reset)
                tr->current_trace->reset(tr);
 
-       had_max_tr = tr->current_trace->allocated_snapshot;
+       /* Current trace needs to be nop_trace before synchronize_sched */
        tr->current_trace = &nop_trace;
 
+#ifdef CONFIG_TRACER_MAX_TRACE
+       had_max_tr = tr->allocated_snapshot;
+
        if (had_max_tr && !t->use_max_tr) {
                /*
                 * We need to make sure that the update_max_tr sees that
@@ -3374,22 +3475,26 @@ static int tracing_set_tracer(const char *buf)
                 * The max_tr ring buffer has some state (e.g. ring->clock) and
                 * we want preserve it.
                 */
-               ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
-               set_buffer_entries(&max_tr, 1);
-               tracing_reset_online_cpus(&max_tr);
-               tr->current_trace->allocated_snapshot = false;
+               ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
+               set_buffer_entries(&tr->max_buffer, 1);
+               tracing_reset_online_cpus(&tr->max_buffer);
+               tr->allocated_snapshot = false;
        }
+#endif
        destroy_trace_option_files(topts);
 
        topts = create_trace_option_files(tr, t);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
        if (t->use_max_tr && !had_max_tr) {
                /* we need to make per cpu buffer sizes equivalent */
-               ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
+               ret = resize_buffer_duplicate_size(&tr->max_buffer, &tr->trace_buffer,
                                                   RING_BUFFER_ALL_CPUS);
                if (ret < 0)
                        goto out;
-               t->allocated_snapshot = true;
+               tr->allocated_snapshot = true;
        }
+#endif
 
        if (t->init) {
                ret = tracer_init(t, tr);
@@ -3517,6 +3622,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 
        iter->cpu_file = tc->cpu;
        iter->tr = tc->tr;
+       iter->trace_buffer = &tc->tr->trace_buffer;
        mutex_init(&iter->mutex);
        filp->private_data = iter;
 
@@ -3555,24 +3661,28 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
 }
 
 static unsigned int
-tracing_poll_pipe(struct file *filp, poll_table *poll_table)
+trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
 {
-       struct trace_iterator *iter = filp->private_data;
+       /* Iterators are static, they should be filled or empty */
+       if (trace_buffer_iter(iter, iter->cpu_file))
+               return POLLIN | POLLRDNORM;
 
-       if (trace_flags & TRACE_ITER_BLOCK) {
+       if (trace_flags & TRACE_ITER_BLOCK)
                /*
                 * Always select as readable when in blocking mode
                 */
                return POLLIN | POLLRDNORM;
-       } else {
-               if (!trace_empty(iter))
-                       return POLLIN | POLLRDNORM;
-               poll_wait(filp, &trace_wait, poll_table);
-               if (!trace_empty(iter))
-                       return POLLIN | POLLRDNORM;
+       else
+               return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
+                                            filp, poll_table);
+}
 
-               return 0;
-       }
+static unsigned int
+tracing_poll_pipe(struct file *filp, poll_table *poll_table)
+{
+       struct trace_iterator *iter = filp->private_data;
+
+       return trace_poll(iter, filp, poll_table);
 }
 
 /*
@@ -3902,8 +4012,8 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
                for_each_tracing_cpu(cpu) {
                        /* fill in the size from first enabled cpu */
                        if (size == 0)
-                               size = per_cpu_ptr(tr->data, cpu)->entries;
-                       if (size != per_cpu_ptr(tr->data, cpu)->entries) {
+                               size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
+                       if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
                                buf_size_same = 0;
                                break;
                        }
@@ -3919,7 +4029,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
                } else
                        r = sprintf(buf, "X\n");
        } else
-               r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->data, tc->cpu)->entries >> 10);
+               r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10);
 
        mutex_unlock(&trace_types_lock);
 
@@ -3966,7 +4076,7 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf,
 
        mutex_lock(&trace_types_lock);
        for_each_tracing_cpu(cpu) {
-               size += per_cpu_ptr(tr->data, cpu)->entries >> 10;
+               size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
                if (!ring_buffer_expanded)
                        expanded_size += trace_buf_size >> 10;
        }
@@ -4071,7 +4181,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
 
        local_save_flags(irq_flags);
        size = sizeof(*entry) + cnt + 2; /* possible \n added */
-       buffer = global_trace.buffer;
+       buffer = global_trace.trace_buffer.buffer;
        event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
                                          irq_flags, preempt_count());
        if (!event) {
@@ -4156,16 +4266,19 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
 
        tr->clock_id = i;
 
-       ring_buffer_set_clock(tr->buffer, trace_clocks[i].func);
-       if (tr->flags & TRACE_ARRAY_FL_GLOBAL && max_tr.buffer)
-               ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
+       ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
 
        /*
         * New clock may not be consistent with the previous clock.
         * Reset the buffer so that it doesn't have incomparable timestamps.
         */
-       tracing_reset_online_cpus(&global_trace);
-       tracing_reset_online_cpus(&max_tr);
+       tracing_reset_online_cpus(&global_trace.trace_buffer);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+       if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
+               ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
+       tracing_reset_online_cpus(&global_trace.max_buffer);
+#endif
 
        mutex_unlock(&trace_types_lock);
 
@@ -4182,6 +4295,12 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
        return single_open(file, tracing_clock_show, inode->i_private);
 }
 
+struct ftrace_buffer_info {
+       struct trace_iterator   iter;
+       void                    *spare;
+       unsigned int            read;
+};
+
 #ifdef CONFIG_TRACER_SNAPSHOT
 static int tracing_snapshot_open(struct inode *inode, struct file *file)
 {
@@ -4205,6 +4324,8 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file)
                        return -ENOMEM;
                }
                iter->tr = tc->tr;
+               iter->trace_buffer = &tc->tr->max_buffer;
+               iter->cpu_file = tc->cpu;
                m->private = iter;
                file->private_data = m;
        }
@@ -4239,33 +4360,50 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
        switch (val) {
        case 0:
-               if (tr->current_trace->allocated_snapshot) {
+               if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
+                       ret = -EINVAL;
+                       break;
+               }
+               if (tr->allocated_snapshot) {
                        /* free spare buffer */
-                       ring_buffer_resize(max_tr.buffer, 1,
+                       ring_buffer_resize(tr->max_buffer.buffer, 1,
                                           RING_BUFFER_ALL_CPUS);
-                       set_buffer_entries(&max_tr, 1);
-                       tracing_reset_online_cpus(&max_tr);
-                       tr->current_trace->allocated_snapshot = false;
+                       set_buffer_entries(&tr->max_buffer, 1);
+                       tracing_reset_online_cpus(&tr->max_buffer);
+                       tr->allocated_snapshot = false;
                }
                break;
        case 1:
-               if (!tr->current_trace->allocated_snapshot) {
+/* Only allow per-cpu swap if the ring buffer supports it */
+#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
+               if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
+                       ret = -EINVAL;
+                       break;
+               }
+#endif
+               if (!tr->allocated_snapshot) {
                        /* allocate spare buffer */
-                       ret = resize_buffer_duplicate_size(&max_tr,
-                                       &global_trace, RING_BUFFER_ALL_CPUS);
+                       ret = resize_buffer_duplicate_size(&tr->max_buffer,
+                                       &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
                        if (ret < 0)
                                break;
-                       tr->current_trace->allocated_snapshot = true;
+                       tr->allocated_snapshot = true;
                }
-
                local_irq_disable();
                /* Now, we're going to swap */
-               update_max_tr(&global_trace, current, smp_processor_id());
+               if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
+                       update_max_tr(tr, current, smp_processor_id());
+               else
+                       update_max_tr_single(tr, current, iter->cpu_file);
                local_irq_enable();
                break;
        default:
-               if (tr->current_trace->allocated_snapshot)
-                       tracing_reset_online_cpus(&max_tr);
+               if (tr->allocated_snapshot) {
+                       if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
+                               tracing_reset_online_cpus(&tr->max_buffer);
+                       else
+                               tracing_reset(&tr->max_buffer, iter->cpu_file);
+               }
                break;
        }
 
@@ -4293,6 +4431,35 @@ static int tracing_snapshot_release(struct inode *inode, struct file *file)
        return 0;
 }
 
+static int tracing_buffers_open(struct inode *inode, struct file *filp);
+static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
+                                   size_t count, loff_t *ppos);
+static int tracing_buffers_release(struct inode *inode, struct file *file);
+static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
+                  struct pipe_inode_info *pipe, size_t len, unsigned int flags);
+
+static int snapshot_raw_open(struct inode *inode, struct file *filp)
+{
+       struct ftrace_buffer_info *info;
+       int ret;
+
+       ret = tracing_buffers_open(inode, filp);
+       if (ret < 0)
+               return ret;
+
+       info = filp->private_data;
+
+       if (info->iter.trace->use_max_tr) {
+               tracing_buffers_release(inode, filp);
+               return -EBUSY;
+       }
+
+       info->iter.snapshot = true;
+       info->iter.trace_buffer = &info->iter.tr->max_buffer;
+
+       return ret;
+}
+
 #endif /* CONFIG_TRACER_SNAPSHOT */
 
 
@@ -4359,15 +4526,17 @@ static const struct file_operations snapshot_fops = {
        .llseek         = tracing_seek,
        .release        = tracing_snapshot_release,
 };
-#endif /* CONFIG_TRACER_SNAPSHOT */
 
-struct ftrace_buffer_info {
-       struct trace_array      *tr;
-       void                    *spare;
-       int                     cpu;
-       unsigned int            read;
+static const struct file_operations snapshot_raw_fops = {
+       .open           = snapshot_raw_open,
+       .read           = tracing_buffers_read,
+       .release        = tracing_buffers_release,
+       .splice_read    = tracing_buffers_splice_read,
+       .llseek         = no_llseek,
 };
 
+#endif /* CONFIG_TRACER_SNAPSHOT */
+
 static int tracing_buffers_open(struct inode *inode, struct file *filp)
 {
        struct trace_cpu *tc = inode->i_private;
@@ -4381,72 +4550,131 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
        if (!info)
                return -ENOMEM;
 
-       info->tr        = tr;
-       info->cpu       = tc->cpu;
-       info->spare     = NULL;
+       mutex_lock(&trace_types_lock);
+
+       tr->ref++;
+
+       info->iter.tr           = tr;
+       info->iter.cpu_file     = tc->cpu;
+       info->iter.trace        = tr->current_trace;
+       info->iter.trace_buffer = &tr->trace_buffer;
+       info->spare             = NULL;
        /* Force reading ring buffer for first read */
-       info->read      = (unsigned int)-1;
+       info->read              = (unsigned int)-1;
 
        filp->private_data = info;
 
+       mutex_unlock(&trace_types_lock);
+
        return nonseekable_open(inode, filp);
 }
 
+static unsigned int
+tracing_buffers_poll(struct file *filp, poll_table *poll_table)
+{
+       struct ftrace_buffer_info *info = filp->private_data;
+       struct trace_iterator *iter = &info->iter;
+
+       return trace_poll(iter, filp, poll_table);
+}
+
 static ssize_t
 tracing_buffers_read(struct file *filp, char __user *ubuf,
                     size_t count, loff_t *ppos)
 {
        struct ftrace_buffer_info *info = filp->private_data;
+       struct trace_iterator *iter = &info->iter;
        ssize_t ret;
-       size_t size;
+       ssize_t size;
 
        if (!count)
                return 0;
 
+       mutex_lock(&trace_types_lock);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+       if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
+               size = -EBUSY;
+               goto out_unlock;
+       }
+#endif
+
        if (!info->spare)
-               info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu);
+               info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
+                                                         iter->cpu_file);
+       size = -ENOMEM;
        if (!info->spare)
-               return -ENOMEM;
+               goto out_unlock;
 
        /* Do we have previous read data to read? */
        if (info->read < PAGE_SIZE)
                goto read;
 
-       trace_access_lock(info->cpu);
-       ret = ring_buffer_read_page(info->tr->buffer,
+ again:
+       trace_access_lock(iter->cpu_file);
+       ret = ring_buffer_read_page(iter->trace_buffer->buffer,
                                    &info->spare,
                                    count,
-                                   info->cpu, 0);
-       trace_access_unlock(info->cpu);
-       if (ret < 0)
-               return 0;
+                                   iter->cpu_file, 0);
+       trace_access_unlock(iter->cpu_file);
 
-       info->read = 0;
+       if (ret < 0) {
+               if (trace_empty(iter)) {
+                       if ((filp->f_flags & O_NONBLOCK)) {
+                               size = -EAGAIN;
+                               goto out_unlock;
+                       }
+                       mutex_unlock(&trace_types_lock);
+                       iter->trace->wait_pipe(iter);
+                       mutex_lock(&trace_types_lock);
+                       if (signal_pending(current)) {
+                               size = -EINTR;
+                               goto out_unlock;
+                       }
+                       goto again;
+               }
+               size = 0;
+               goto out_unlock;
+       }
 
-read:
+       info->read = 0;
+ read:
        size = PAGE_SIZE - info->read;
        if (size > count)
                size = count;
 
        ret = copy_to_user(ubuf, info->spare + info->read, size);
-       if (ret == size)
-               return -EFAULT;
+       if (ret == size) {
+               size = -EFAULT;
+               goto out_unlock;
+       }
        size -= ret;
 
        *ppos += size;
        info->read += size;
 
+ out_unlock:
+       mutex_unlock(&trace_types_lock);
+
        return size;
 }
 
 static int tracing_buffers_release(struct inode *inode, struct file *file)
 {
        struct ftrace_buffer_info *info = file->private_data;
+       struct trace_iterator *iter = &info->iter;
+
+       mutex_lock(&trace_types_lock);
+
+       WARN_ON(!iter->tr->ref);
+       iter->tr->ref--;
 
        if (info->spare)
-               ring_buffer_free_read_page(info->tr->buffer, info->spare);
+               ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
        kfree(info);
 
+       mutex_unlock(&trace_types_lock);
+
        return 0;
 }
 
@@ -4511,6 +4739,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                            unsigned int flags)
 {
        struct ftrace_buffer_info *info = file->private_data;
+       struct trace_iterator *iter = &info->iter;
        struct partial_page partial_def[PIPE_DEF_BUFFERS];
        struct page *pages_def[PIPE_DEF_BUFFERS];
        struct splice_pipe_desc spd = {
@@ -4523,10 +4752,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
        };
        struct buffer_ref *ref;
        int entries, size, i;
-       size_t ret;
+       ssize_t ret;
 
-       if (splice_grow_spd(pipe, &spd))
-               return -ENOMEM;
+       mutex_lock(&trace_types_lock);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+       if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
+               ret = -EBUSY;
+               goto out;
+       }
+#endif
+
+       if (splice_grow_spd(pipe, &spd)) {
+               ret = -ENOMEM;
+               goto out;
+       }
 
        if (*ppos & (PAGE_SIZE - 1)) {
                ret = -EINVAL;
@@ -4541,8 +4781,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                len &= PAGE_MASK;
        }
 
-       trace_access_lock(info->cpu);
-       entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
+ again:
+       trace_access_lock(iter->cpu_file);
+       entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
 
        for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
                struct page *page;
@@ -4553,15 +4794,15 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                        break;
 
                ref->ref = 1;
-               ref->buffer = info->tr->buffer;
-               ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu);
+               ref->buffer = iter->trace_buffer->buffer;
+               ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
                if (!ref->page) {
                        kfree(ref);
                        break;
                }
 
                r = ring_buffer_read_page(ref->buffer, &ref->page,
-                                         len, info->cpu, 1);
+                                         len, iter->cpu_file, 1);
                if (r < 0) {
                        ring_buffer_free_read_page(ref->buffer, ref->page);
                        kfree(ref);
@@ -4585,31 +4826,40 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                spd.nr_pages++;
                *ppos += PAGE_SIZE;
 
-               entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
+               entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
        }
 
-       trace_access_unlock(info->cpu);
+       trace_access_unlock(iter->cpu_file);
        spd.nr_pages = i;
 
        /* did we read anything? */
        if (!spd.nr_pages) {
-               if (flags & SPLICE_F_NONBLOCK)
+               if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
                        ret = -EAGAIN;
-               else
-                       ret = 0;
-               /* TODO: block */
-               goto out;
+                       goto out;
+               }
+               mutex_unlock(&trace_types_lock);
+               iter->trace->wait_pipe(iter);
+               mutex_lock(&trace_types_lock);
+               if (signal_pending(current)) {
+                       ret = -EINTR;
+                       goto out;
+               }
+               goto again;
        }
 
        ret = splice_to_pipe(pipe, &spd);
        splice_shrink_spd(&spd);
 out:
+       mutex_unlock(&trace_types_lock);
+
        return ret;
 }
 
 static const struct file_operations tracing_buffers_fops = {
        .open           = tracing_buffers_open,
        .read           = tracing_buffers_read,
+       .poll           = tracing_buffers_poll,
        .release        = tracing_buffers_release,
        .splice_read    = tracing_buffers_splice_read,
        .llseek         = no_llseek,
@@ -4621,6 +4871,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 {
        struct trace_cpu *tc = filp->private_data;
        struct trace_array *tr = tc->tr;
+       struct trace_buffer *trace_buf = &tr->trace_buffer;
        struct trace_seq *s;
        unsigned long cnt;
        unsigned long long t;
@@ -4633,41 +4884,41 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 
        trace_seq_init(s);
 
-       cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
+       cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "entries: %ld\n", cnt);
 
-       cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
+       cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "overrun: %ld\n", cnt);
 
-       cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
+       cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "commit overrun: %ld\n", cnt);
 
-       cnt = ring_buffer_bytes_cpu(tr->buffer, cpu);
+       cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "bytes: %ld\n", cnt);
 
        if (trace_clocks[trace_clock_id].in_ns) {
                /* local or global for trace_clock */
-               t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
+               t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
                usec_rem = do_div(t, USEC_PER_SEC);
                trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
                                                                t, usec_rem);
 
-               t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
+               t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
                usec_rem = do_div(t, USEC_PER_SEC);
                trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
        } else {
                /* counter or tsc mode for trace_clock */
                trace_seq_printf(s, "oldest event ts: %llu\n",
-                               ring_buffer_oldest_event_ts(tr->buffer, cpu));
+                               ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
 
                trace_seq_printf(s, "now ts: %llu\n",
-                               ring_buffer_time_stamp(tr->buffer, cpu));
+                               ring_buffer_time_stamp(trace_buf->buffer, cpu));
        }
 
-       cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
+       cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "dropped events: %ld\n", cnt);
 
-       cnt = ring_buffer_read_events_cpu(tr->buffer, cpu);
+       cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "read events: %ld\n", cnt);
 
        count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
@@ -4770,7 +5021,7 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
 static void
 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 {
-       struct trace_array_cpu *data = per_cpu_ptr(tr->data, cpu);
+       struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu);
        struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
        struct dentry *d_cpu;
        char cpu_dir[30]; /* 30 characters should be more than enough */
@@ -4801,6 +5052,14 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 
        trace_create_file("buffer_size_kb", 0444, d_cpu,
                        (void *)&data->trace_cpu, &tracing_entries_fops);
+
+#ifdef CONFIG_TRACER_SNAPSHOT
+       trace_create_file("snapshot", 0644, d_cpu,
+                         (void *)&data->trace_cpu, &snapshot_fops);
+
+       trace_create_file("snapshot_raw", 0444, d_cpu,
+                       (void *)&data->trace_cpu, &snapshot_raw_fops);
+#endif
 }
 
 #ifdef CONFIG_FTRACE_SELFTEST
@@ -5054,7 +5313,7 @@ rb_simple_read(struct file *filp, char __user *ubuf,
               size_t cnt, loff_t *ppos)
 {
        struct trace_array *tr = filp->private_data;
-       struct ring_buffer *buffer = tr->buffer;
+       struct ring_buffer *buffer = tr->trace_buffer.buffer;
        char buf[64];
        int r;
 
@@ -5073,7 +5332,7 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
                size_t cnt, loff_t *ppos)
 {
        struct trace_array *tr = filp->private_data;
-       struct ring_buffer *buffer = tr->buffer;
+       struct ring_buffer *buffer = tr->trace_buffer.buffer;
        unsigned long val;
        int ret;
 
@@ -5107,9 +5366,254 @@ static const struct file_operations rb_simple_fops = {
        .llseek         = default_llseek,
 };
 
+struct dentry *trace_instance_dir;
+
+static void
+init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
+
+static void init_trace_buffers(struct trace_array *tr, struct trace_buffer *buf)
+{
+       int cpu;
+
+       for_each_tracing_cpu(cpu) {
+               memset(per_cpu_ptr(buf->data, cpu), 0, sizeof(struct trace_array_cpu));
+               per_cpu_ptr(buf->data, cpu)->trace_cpu.cpu = cpu;
+               per_cpu_ptr(buf->data, cpu)->trace_cpu.tr = tr;
+       }
+}
+
+static int allocate_trace_buffers(struct trace_array *tr, int size)
+{
+       enum ring_buffer_flags rb_flags;
+
+       rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
+
+       tr->trace_buffer.buffer = ring_buffer_alloc(size, rb_flags);
+       if (!tr->trace_buffer.buffer)
+               goto out_free;
+
+       tr->trace_buffer.data = alloc_percpu(struct trace_array_cpu);
+       if (!tr->trace_buffer.data)
+               goto out_free;
+
+       init_trace_buffers(tr, &tr->trace_buffer);
+
+       /* Allocate the first page for all buffers */
+       set_buffer_entries(&tr->trace_buffer,
+                          ring_buffer_size(tr->trace_buffer.buffer, 0));
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+
+       tr->max_buffer.buffer = ring_buffer_alloc(1, rb_flags);
+       if (!tr->max_buffer.buffer)
+               goto out_free;
+
+       tr->max_buffer.data = alloc_percpu(struct trace_array_cpu);
+       if (!tr->max_buffer.data)
+               goto out_free;
+
+       init_trace_buffers(tr, &tr->max_buffer);
+
+       set_buffer_entries(&tr->max_buffer, 1);
+#endif
+       return 0;
+
+ out_free:
+       if (tr->trace_buffer.buffer)
+               ring_buffer_free(tr->trace_buffer.buffer);
+       free_percpu(tr->trace_buffer.data);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+       if (tr->max_buffer.buffer)
+               ring_buffer_free(tr->max_buffer.buffer);
+       free_percpu(tr->max_buffer.data);
+#endif
+       return -ENOMEM;
+}
+
+static int new_instance_create(const char *name)
+{
+       struct trace_array *tr;
+       int ret;
+
+       mutex_lock(&trace_types_lock);
+
+       ret = -EEXIST;
+       list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+               if (tr->name && strcmp(tr->name, name) == 0)
+                       goto out_unlock;
+       }
+
+       ret = -ENOMEM;
+       tr = kzalloc(sizeof(*tr), GFP_KERNEL);
+       if (!tr)
+               goto out_unlock;
+
+       tr->name = kstrdup(name, GFP_KERNEL);
+       if (!tr->name)
+               goto out_free_tr;
+
+       raw_spin_lock_init(&tr->start_lock);
+
+       tr->current_trace = &nop_trace;
+
+       INIT_LIST_HEAD(&tr->systems);
+       INIT_LIST_HEAD(&tr->events);
+
+       if (allocate_trace_buffers(tr, trace_buf_size) < 0)
+               goto out_free_tr;
+
+       /* Holder for file callbacks */
+       tr->trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
+       tr->trace_cpu.tr = tr;
+
+       tr->dir = debugfs_create_dir(name, trace_instance_dir);
+       if (!tr->dir)
+               goto out_free_tr;
+
+       ret = event_trace_add_tracer(tr->dir, tr);
+       if (ret)
+               goto out_free_tr;
+
+       init_tracer_debugfs(tr, tr->dir);
+
+       list_add(&tr->list, &ftrace_trace_arrays);
+
+       mutex_unlock(&trace_types_lock);
+
+       return 0;
+
+ out_free_tr:
+       if (tr->trace_buffer.buffer)
+               ring_buffer_free(tr->trace_buffer.buffer);
+       kfree(tr->name);
+       kfree(tr);
+
+ out_unlock:
+       mutex_unlock(&trace_types_lock);
+
+       return ret;
+
+}
+
+static int instance_delete(const char *name)
+{
+       struct trace_array *tr;
+       int found = 0;
+       int ret;
+
+       mutex_lock(&trace_types_lock);
+
+       ret = -ENODEV;
+       list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+               if (tr->name && strcmp(tr->name, name) == 0) {
+                       found = 1;
+                       break;
+               }
+       }
+       if (!found)
+               goto out_unlock;
+
+       ret = -EBUSY;
+       if (tr->ref)
+               goto out_unlock;
+
+       list_del(&tr->list);
+
+       event_trace_del_tracer(tr);
+       debugfs_remove_recursive(tr->dir);
+       free_percpu(tr->trace_buffer.data);
+       ring_buffer_free(tr->trace_buffer.buffer);
+
+       kfree(tr->name);
+       kfree(tr);
+
+       ret = 0;
+
+ out_unlock:
+       mutex_unlock(&trace_types_lock);
+
+       return ret;
+}
+
+static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
+{
+       struct dentry *parent;
+       int ret;
+
+       /* Paranoid: Make sure the parent is the "instances" directory */
+       parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+       if (WARN_ON_ONCE(parent != trace_instance_dir))
+               return -ENOENT;
+
+       /*
+        * The inode mutex is locked, but debugfs_create_dir() will also
+        * take the mutex. As the instances directory can not be destroyed
+        * or changed in any other way, it is safe to unlock it, and
+        * let the dentry try. If two users try to make the same dir at
+        * the same time, then the new_instance_create() will determine the
+        * winner.
+        */
+       mutex_unlock(&inode->i_mutex);
+
+       ret = new_instance_create(dentry->d_iname);
+
+       mutex_lock(&inode->i_mutex);
+
+       return ret;
+}
+
+static int instance_rmdir(struct inode *inode, struct dentry *dentry)
+{
+       struct dentry *parent;
+       int ret;
+
+       /* Paranoid: Make sure the parent is the "instances" directory */
+       parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+       if (WARN_ON_ONCE(parent != trace_instance_dir))
+               return -ENOENT;
+
+       /* The caller did a dget() on dentry */
+       mutex_unlock(&dentry->d_inode->i_mutex);
+
+       /*
+        * The inode mutex is locked, but debugfs_create_dir() will also
+        * take the mutex. As the instances directory can not be destroyed
+        * or changed in any other way, it is safe to unlock it, and
+        * let the dentry try. If two users try to make the same dir at
+        * the same time, then the instance_delete() will determine the
+        * winner.
+        */
+       mutex_unlock(&inode->i_mutex);
+
+       ret = instance_delete(dentry->d_iname);
+
+       mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock(&dentry->d_inode->i_mutex);
+
+       return ret;
+}
+
+static const struct inode_operations instance_dir_inode_operations = {
+       .lookup         = simple_lookup,
+       .mkdir          = instance_mkdir,
+       .rmdir          = instance_rmdir,
+};
+
+static __init void create_trace_instances(struct dentry *d_tracer)
+{
+       trace_instance_dir = debugfs_create_dir("instances", d_tracer);
+       if (WARN_ON(!trace_instance_dir))
+               return;
+
+       /* Hijack the dir inode operations, to allow mkdir */
+       trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
+}
+
 static void
 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
 {
+       int cpu;
 
        trace_create_file("trace_options", 0644, d_tracer,
                          tr, &tracing_iter_fops);
@@ -5137,12 +5641,20 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
 
        trace_create_file("tracing_on", 0644, d_tracer,
                            tr, &rb_simple_fops);
+
+#ifdef CONFIG_TRACER_SNAPSHOT
+       trace_create_file("snapshot", 0644, d_tracer,
+                         (void *)&tr->trace_cpu, &snapshot_fops);
+#endif
+
+       for_each_tracing_cpu(cpu)
+               tracing_init_debugfs_percpu(tr, cpu);
+
 }
 
 static __init int tracer_init_debugfs(void)
 {
        struct dentry *d_tracer;
-       int cpu;
 
        trace_access_lock_init();
 
@@ -5178,16 +5690,10 @@ static __init int tracer_init_debugfs(void)
                        &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 #endif
 
-#ifdef CONFIG_TRACER_SNAPSHOT
-       trace_create_file("snapshot", 0644, d_tracer,
-                         (void *)&global_trace.trace_cpu, &snapshot_fops);
-#endif
+       create_trace_instances(d_tracer);
 
        create_trace_options_dir(&global_trace);
 
-       for_each_tracing_cpu(cpu)
-               tracing_init_debugfs_percpu(&global_trace, cpu);
-
        return 0;
 }
 
@@ -5258,6 +5764,7 @@ void trace_init_global_iter(struct trace_iterator *iter)
        iter->tr = &global_trace;
        iter->trace = iter->tr->current_trace;
        iter->cpu_file = RING_BUFFER_ALL_CPUS;
+       iter->trace_buffer = &global_trace.trace_buffer;
 }
 
 static void
@@ -5295,7 +5802,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
        trace_init_global_iter(&iter);
 
        for_each_tracing_cpu(cpu) {
-               atomic_inc(&per_cpu_ptr(iter.tr->data, cpu)->disabled);
+               atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
        }
 
        old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
@@ -5363,7 +5870,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
                trace_flags |= old_userobj;
 
                for_each_tracing_cpu(cpu) {
-                       atomic_dec(&per_cpu_ptr(iter.tr->data, cpu)->disabled);
+                       atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
                }
                tracing_on();
        }
@@ -5383,8 +5890,6 @@ EXPORT_SYMBOL_GPL(ftrace_dump);
 __init static int tracer_alloc_buffers(void)
 {
        int ring_buf_size;
-       enum ring_buffer_flags rb_flags;
-       int i;
        int ret = -ENOMEM;
 
 
@@ -5405,70 +5910,22 @@ __init static int tracer_alloc_buffers(void)
        else
                ring_buf_size = 1;
 
-       rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
-
        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
        cpumask_copy(tracing_cpumask, cpu_all_mask);
 
        raw_spin_lock_init(&global_trace.start_lock);
 
        /* TODO: make the number of buffers hot pluggable with CPUS */
-       global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags);
-       if (!global_trace.buffer) {
+       if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
                printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
                WARN_ON(1);
                goto out_free_cpumask;
        }
 
-       global_trace.data = alloc_percpu(struct trace_array_cpu);
-
-       if (!global_trace.data) {
-               printk(KERN_ERR "tracer: failed to allocate percpu memory!\n");
-               WARN_ON(1);
-               goto out_free_cpumask;
-       }
-
-       for_each_tracing_cpu(i) {
-               memset(per_cpu_ptr(global_trace.data, i), 0, sizeof(struct trace_array_cpu));
-               per_cpu_ptr(global_trace.data, i)->trace_cpu.cpu = i;
-               per_cpu_ptr(global_trace.data, i)->trace_cpu.tr = &global_trace;
-       }
-
        if (global_trace.buffer_disabled)
                tracing_off();
 
-#ifdef CONFIG_TRACER_MAX_TRACE
-       max_tr.data = alloc_percpu(struct trace_array_cpu);
-       if (!max_tr.data) {
-               printk(KERN_ERR "tracer: failed to allocate percpu memory!\n");
-               WARN_ON(1);
-               goto out_free_cpumask;
-       }
-       max_tr.buffer = ring_buffer_alloc(1, rb_flags);
-       raw_spin_lock_init(&max_tr.start_lock);
-       if (!max_tr.buffer) {
-               printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
-               WARN_ON(1);
-               ring_buffer_free(global_trace.buffer);
-               goto out_free_cpumask;
-       }
-
-       for_each_tracing_cpu(i) {
-               memset(per_cpu_ptr(max_tr.data, i), 0, sizeof(struct trace_array_cpu));
-               per_cpu_ptr(max_tr.data, i)->trace_cpu.cpu = i;
-               per_cpu_ptr(max_tr.data, i)->trace_cpu.tr = &max_tr;
-       }
-#endif
-
-       /* Allocate the first page for all buffers */
-       set_buffer_entries(&global_trace,
-                          ring_buffer_size(global_trace.buffer, 0));
-#ifdef CONFIG_TRACER_MAX_TRACE
-       set_buffer_entries(&max_tr, 1);
-#endif
-
        trace_init_cmdlines();
-       init_irq_work(&trace_work_wakeup, trace_wake_up);
 
        register_tracer(&nop_trace);
 
@@ -5502,8 +5959,10 @@ __init static int tracer_alloc_buffers(void)
        return 0;
 
 out_free_cpumask:
-       free_percpu(global_trace.data);
-       free_percpu(max_tr.data);
+       free_percpu(global_trace.trace_buffer.data);
+#ifdef CONFIG_TRACER_MAX_TRACE
+       free_percpu(global_trace.max_buffer.data);
+#endif
        free_cpumask_var(tracing_cpumask);
 out_free_buffer_mask:
        free_cpumask_var(tracing_buffer_mask);