Merge remote-tracking branches 'regulator/fix/anatop', 'regulator/fix/gpio', 'regulat...
[firefly-linux-kernel-4.4.55.git] / kernel / events / core.c
index d3dae3419b99566c127f1682b29f39bb184bbdb1..f548f69c4299dd1ee44bfdc1f84d79d655d0d6d7 100644 (file)
@@ -163,6 +163,7 @@ static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
 static atomic_t nr_task_events __read_mostly;
 static atomic_t nr_freq_events __read_mostly;
+static atomic_t nr_switch_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -1868,8 +1869,6 @@ event_sched_in(struct perf_event *event,
 
        perf_pmu_disable(event->pmu);
 
-       event->tstamp_running += tstamp - event->tstamp_stopped;
-
        perf_set_shadow_time(event, ctx, tstamp);
 
        perf_log_itrace_start(event);
@@ -1881,6 +1880,8 @@ event_sched_in(struct perf_event *event,
                goto out;
        }
 
+       event->tstamp_running += tstamp - event->tstamp_stopped;
+
        if (!is_software_event(event))
                cpuctx->active_oncpu++;
        if (!ctx->nr_active++)
@@ -2619,6 +2620,9 @@ static void perf_pmu_sched_task(struct task_struct *prev,
        local_irq_restore(flags);
 }
 
+static void perf_event_switch(struct task_struct *task,
+                             struct task_struct *next_prev, bool sched_in);
+
 #define for_each_task_context_nr(ctxn)                                 \
        for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
 
@@ -2641,6 +2645,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
        if (__this_cpu_read(perf_sched_cb_usages))
                perf_pmu_sched_task(task, next, false);
 
+       if (atomic_read(&nr_switch_events))
+               perf_event_switch(task, next, false);
+
        for_each_task_context_nr(ctxn)
                perf_event_context_sched_out(task, ctxn, next);
 
@@ -2831,6 +2838,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
        if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
                perf_cgroup_sched_in(prev, task);
 
+       if (atomic_read(&nr_switch_events))
+               perf_event_switch(task, prev, true);
+
        if (__this_cpu_read(perf_sched_cb_usages))
                perf_pmu_sched_task(prev, task, true);
 }
@@ -3212,6 +3222,59 @@ static inline u64 perf_event_count(struct perf_event *event)
        return __perf_event_count(event);
 }
 
+/*
+ * NMI-safe method to read a local event, that is an event that
+ * is:
+ *   - either for the current task, or for this CPU
+ *   - does not have inherit set, for inherited task events
+ *     will not be local and we cannot read them atomically
+ *   - must not have a pmu::count method
+ */
+u64 perf_event_read_local(struct perf_event *event)
+{
+       unsigned long flags;
+       u64 val;
+
+       /*
+        * Disabling interrupts avoids all counter scheduling (context
+        * switches, timer based rotation and IPIs).
+        */
+       local_irq_save(flags);
+
+       /* If this is a per-task event, it must be for current */
+       WARN_ON_ONCE((event->attach_state & PERF_ATTACH_TASK) &&
+                    event->hw.target != current);
+
+       /* If this is a per-CPU event, it must be for this CPU */
+       WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_TASK) &&
+                    event->cpu != smp_processor_id());
+
+       /*
+        * It must not be an event with inherit set, we cannot read
+        * all child counters from atomic context.
+        */
+       WARN_ON_ONCE(event->attr.inherit);
+
+       /*
+        * It must not have a pmu::count method, those are not
+        * NMI safe.
+        */
+       WARN_ON_ONCE(event->pmu->count);
+
+       /*
+        * If the event is currently on this CPU, its either a per-task event,
+        * or local to this CPU. Furthermore it means its ACTIVE (otherwise
+        * oncpu == -1).
+        */
+       if (event->oncpu == smp_processor_id())
+               event->pmu->read(event);
+
+       val = local64_read(&event->count);
+       local_irq_restore(flags);
+
+       return val;
+}
+
 static u64 perf_event_read(struct perf_event *event)
 {
        /*
@@ -3454,6 +3517,10 @@ static void unaccount_event(struct perf_event *event)
                atomic_dec(&nr_task_events);
        if (event->attr.freq)
                atomic_dec(&nr_freq_events);
+       if (event->attr.context_switch) {
+               static_key_slow_dec_deferred(&perf_sched_events);
+               atomic_dec(&nr_switch_events);
+       }
        if (is_cgroup_event(event))
                static_key_slow_dec_deferred(&perf_sched_events);
        if (has_branch_stack(event))
@@ -3958,28 +4025,21 @@ static void perf_event_for_each(struct perf_event *event,
                perf_event_for_each_child(sibling, func);
 }
 
-static int perf_event_period(struct perf_event *event, u64 __user *arg)
-{
-       struct perf_event_context *ctx = event->ctx;
-       int ret = 0, active;
+struct period_event {
+       struct perf_event *event;
        u64 value;
+};
 
-       if (!is_sampling_event(event))
-               return -EINVAL;
-
-       if (copy_from_user(&value, arg, sizeof(value)))
-               return -EFAULT;
-
-       if (!value)
-               return -EINVAL;
+static int __perf_event_period(void *info)
+{
+       struct period_event *pe = info;
+       struct perf_event *event = pe->event;
+       struct perf_event_context *ctx = event->ctx;
+       u64 value = pe->value;
+       bool active;
 
-       raw_spin_lock_irq(&ctx->lock);
+       raw_spin_lock(&ctx->lock);
        if (event->attr.freq) {
-               if (value > sysctl_perf_event_sample_rate) {
-                       ret = -EINVAL;
-                       goto unlock;
-               }
-
                event->attr.sample_freq = value;
        } else {
                event->attr.sample_period = value;
@@ -3998,11 +4058,53 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
                event->pmu->start(event, PERF_EF_RELOAD);
                perf_pmu_enable(ctx->pmu);
        }
+       raw_spin_unlock(&ctx->lock);
 
-unlock:
+       return 0;
+}
+
+static int perf_event_period(struct perf_event *event, u64 __user *arg)
+{
+       struct period_event pe = { .event = event, };
+       struct perf_event_context *ctx = event->ctx;
+       struct task_struct *task;
+       u64 value;
+
+       if (!is_sampling_event(event))
+               return -EINVAL;
+
+       if (copy_from_user(&value, arg, sizeof(value)))
+               return -EFAULT;
+
+       if (!value)
+               return -EINVAL;
+
+       if (event->attr.freq && value > sysctl_perf_event_sample_rate)
+               return -EINVAL;
+
+       task = ctx->task;
+       pe.value = value;
+
+       if (!task) {
+               cpu_function_call(event->cpu, __perf_event_period, &pe);
+               return 0;
+       }
+
+retry:
+       if (!task_function_call(task, __perf_event_period, &pe))
+               return 0;
+
+       raw_spin_lock_irq(&ctx->lock);
+       if (ctx->is_active) {
+               raw_spin_unlock_irq(&ctx->lock);
+               task = ctx->task;
+               goto retry;
+       }
+
+       __perf_event_period(&pe);
        raw_spin_unlock_irq(&ctx->lock);
 
-       return ret;
+       return 0;
 }
 
 static const struct file_operations perf_fops;
@@ -4740,12 +4842,20 @@ static const struct file_operations perf_fops = {
  * to user-space before waking everybody up.
  */
 
+static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
+{
+       /* only the parent has fasync state */
+       if (event->parent)
+               event = event->parent;
+       return &event->fasync;
+}
+
 void perf_event_wakeup(struct perf_event *event)
 {
        ring_buffer_wakeup(event);
 
        if (event->pending_kill) {
-               kill_fasync(&event->fasync, SIGIO, event->pending_kill);
+               kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill);
                event->pending_kill = 0;
        }
 }
@@ -5981,6 +6091,91 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost)
        perf_output_end(&handle);
 }
 
+/*
+ * context_switch tracking
+ */
+
+struct perf_switch_event {
+       struct task_struct      *task;
+       struct task_struct      *next_prev;
+
+       struct {
+               struct perf_event_header        header;
+               u32                             next_prev_pid;
+               u32                             next_prev_tid;
+       } event_id;
+};
+
+static int perf_event_switch_match(struct perf_event *event)
+{
+       return event->attr.context_switch;
+}
+
+static void perf_event_switch_output(struct perf_event *event, void *data)
+{
+       struct perf_switch_event *se = data;
+       struct perf_output_handle handle;
+       struct perf_sample_data sample;
+       int ret;
+
+       if (!perf_event_switch_match(event))
+               return;
+
+       /* Only CPU-wide events are allowed to see next/prev pid/tid */
+       if (event->ctx->task) {
+               se->event_id.header.type = PERF_RECORD_SWITCH;
+               se->event_id.header.size = sizeof(se->event_id.header);
+       } else {
+               se->event_id.header.type = PERF_RECORD_SWITCH_CPU_WIDE;
+               se->event_id.header.size = sizeof(se->event_id);
+               se->event_id.next_prev_pid =
+                                       perf_event_pid(event, se->next_prev);
+               se->event_id.next_prev_tid =
+                                       perf_event_tid(event, se->next_prev);
+       }
+
+       perf_event_header__init_id(&se->event_id.header, &sample, event);
+
+       ret = perf_output_begin(&handle, event, se->event_id.header.size);
+       if (ret)
+               return;
+
+       if (event->ctx->task)
+               perf_output_put(&handle, se->event_id.header);
+       else
+               perf_output_put(&handle, se->event_id);
+
+       perf_event__output_id_sample(event, &handle, &sample);
+
+       perf_output_end(&handle);
+}
+
+static void perf_event_switch(struct task_struct *task,
+                             struct task_struct *next_prev, bool sched_in)
+{
+       struct perf_switch_event switch_event;
+
+       /* N.B. caller checks nr_switch_events != 0 */
+
+       switch_event = (struct perf_switch_event){
+               .task           = task,
+               .next_prev      = next_prev,
+               .event_id       = {
+                       .header = {
+                               /* .type */
+                               .misc = sched_in ? 0 : PERF_RECORD_MISC_SWITCH_OUT,
+                               /* .size */
+                       },
+                       /* .next_prev_pid */
+                       /* .next_prev_tid */
+               },
+       };
+
+       perf_event_aux(perf_event_switch_output,
+                      &switch_event,
+                      NULL);
+}
+
 /*
  * IRQ throttle logging
  */
@@ -6040,8 +6235,6 @@ static void perf_log_itrace_start(struct perf_event *event)
            event->hw.itrace_started)
                return;
 
-       event->hw.itrace_started = 1;
-
        rec.header.type = PERF_RECORD_ITRACE_START;
        rec.header.misc = 0;
        rec.header.size = sizeof(rec);
@@ -6124,7 +6317,7 @@ static int __perf_event_overflow(struct perf_event *event,
        else
                perf_event_output(event, data, regs);
 
-       if (event->fasync && event->pending_kill) {
+       if (*perf_event_fasync(event) && event->pending_kill) {
                event->pending_wakeup = 1;
                irq_work_queue(&event->pending);
        }
@@ -6749,8 +6942,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
        if (event->tp_event->prog)
                return -EEXIST;
 
-       if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
-               /* bpf programs can only be attached to kprobes */
+       if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE))
+               /* bpf programs can only be attached to u/kprobes */
                return -EINVAL;
 
        prog = bpf_prog_get(prog_fd);
@@ -7479,6 +7672,10 @@ static void account_event(struct perf_event *event)
                if (atomic_inc_return(&nr_freq_events) == 1)
                        tick_nohz_full_kick_all();
        }
+       if (event->attr.context_switch) {
+               atomic_inc(&nr_switch_events);
+               static_key_slow_inc(&perf_sched_events.key);
+       }
        if (has_branch_stack(event))
                static_key_slow_inc(&perf_sched_events.key);
        if (is_cgroup_event(event))
@@ -8574,6 +8771,31 @@ void perf_event_delayed_put(struct task_struct *task)
                WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
 }
 
+struct perf_event *perf_event_get(unsigned int fd)
+{
+       int err;
+       struct fd f;
+       struct perf_event *event;
+
+       err = perf_fget_light(fd, &f);
+       if (err)
+               return ERR_PTR(err);
+
+       event = f.file->private_data;
+       atomic_long_inc(&event->refcount);
+       fdput(f);
+
+       return event;
+}
+
+const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
+{
+       if (!event)
+               return ERR_PTR(-EINVAL);
+
+       return &event->attr;
+}
+
 /*
  * inherit a event from parent task to child task:
  */
@@ -8872,7 +9094,7 @@ static void perf_event_init_cpu(int cpu)
        mutex_unlock(&swhash->hlist_mutex);
 }
 
-#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
+#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
 static void __perf_event_exit_context(void *__info)
 {
        struct remove_event re = { .detach_group = true };