X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=kernel%2Fevents%2Fcore.c;h=f548f69c4299dd1ee44bfdc1f84d79d655d0d6d7;hb=16651fc2f30dfe14dcdd59cf0ccb4e1ccdba4dc7;hp=e6feb51141340a99a248fea0ad1dc17402b0dbdb;hpb=072502a67c9164625288cca17704808e6c06273f;p=firefly-linux-kernel-4.4.55.git diff --git a/kernel/events/core.c b/kernel/events/core.c index e6feb5114134..f548f69c4299 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -163,6 +163,7 @@ static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; +static atomic_t nr_switch_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -2619,6 +2620,9 @@ static void perf_pmu_sched_task(struct task_struct *prev, local_irq_restore(flags); } +static void perf_event_switch(struct task_struct *task, + struct task_struct *next_prev, bool sched_in); + #define for_each_task_context_nr(ctxn) \ for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) @@ -2641,6 +2645,9 @@ void __perf_event_task_sched_out(struct task_struct *task, if (__this_cpu_read(perf_sched_cb_usages)) perf_pmu_sched_task(task, next, false); + if (atomic_read(&nr_switch_events)) + perf_event_switch(task, next, false); + for_each_task_context_nr(ctxn) perf_event_context_sched_out(task, ctxn, next); @@ -2831,6 +2838,9 @@ void __perf_event_task_sched_in(struct task_struct *prev, if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) perf_cgroup_sched_in(prev, task); + if (atomic_read(&nr_switch_events)) + perf_event_switch(task, prev, true); + if (__this_cpu_read(perf_sched_cb_usages)) perf_pmu_sched_task(prev, task, true); } @@ -3212,6 +3222,59 @@ static inline u64 perf_event_count(struct perf_event *event) return __perf_event_count(event); } +/* + * NMI-safe method to read a local event, that is an event that + * is: + * - either for the current task, or for this CPU + * - does not have inherit set, for inherited task events + * will not be local and we cannot read them atomically + * - must not have a pmu::count method + */ +u64 perf_event_read_local(struct perf_event *event) +{ + unsigned long flags; + u64 val; + + /* + * Disabling interrupts avoids all counter scheduling (context + * switches, timer based rotation and IPIs). + */ + local_irq_save(flags); + + /* If this is a per-task event, it must be for current */ + WARN_ON_ONCE((event->attach_state & PERF_ATTACH_TASK) && + event->hw.target != current); + + /* If this is a per-CPU event, it must be for this CPU */ + WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_TASK) && + event->cpu != smp_processor_id()); + + /* + * It must not be an event with inherit set, we cannot read + * all child counters from atomic context. + */ + WARN_ON_ONCE(event->attr.inherit); + + /* + * It must not have a pmu::count method, those are not + * NMI safe. + */ + WARN_ON_ONCE(event->pmu->count); + + /* + * If the event is currently on this CPU, its either a per-task event, + * or local to this CPU. Furthermore it means its ACTIVE (otherwise + * oncpu == -1). + */ + if (event->oncpu == smp_processor_id()) + event->pmu->read(event); + + val = local64_read(&event->count); + local_irq_restore(flags); + + return val; +} + static u64 perf_event_read(struct perf_event *event) { /* @@ -3454,6 +3517,10 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_task_events); if (event->attr.freq) atomic_dec(&nr_freq_events); + if (event->attr.context_switch) { + static_key_slow_dec_deferred(&perf_sched_events); + atomic_dec(&nr_switch_events); + } if (is_cgroup_event(event)) static_key_slow_dec_deferred(&perf_sched_events); if (has_branch_stack(event)) @@ -6024,6 +6091,91 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost) perf_output_end(&handle); } +/* + * context_switch tracking + */ + +struct perf_switch_event { + struct task_struct *task; + struct task_struct *next_prev; + + struct { + struct perf_event_header header; + u32 next_prev_pid; + u32 next_prev_tid; + } event_id; +}; + +static int perf_event_switch_match(struct perf_event *event) +{ + return event->attr.context_switch; +} + +static void perf_event_switch_output(struct perf_event *event, void *data) +{ + struct perf_switch_event *se = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_switch_match(event)) + return; + + /* Only CPU-wide events are allowed to see next/prev pid/tid */ + if (event->ctx->task) { + se->event_id.header.type = PERF_RECORD_SWITCH; + se->event_id.header.size = sizeof(se->event_id.header); + } else { + se->event_id.header.type = PERF_RECORD_SWITCH_CPU_WIDE; + se->event_id.header.size = sizeof(se->event_id); + se->event_id.next_prev_pid = + perf_event_pid(event, se->next_prev); + se->event_id.next_prev_tid = + perf_event_tid(event, se->next_prev); + } + + perf_event_header__init_id(&se->event_id.header, &sample, event); + + ret = perf_output_begin(&handle, event, se->event_id.header.size); + if (ret) + return; + + if (event->ctx->task) + perf_output_put(&handle, se->event_id.header); + else + perf_output_put(&handle, se->event_id); + + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +static void perf_event_switch(struct task_struct *task, + struct task_struct *next_prev, bool sched_in) +{ + struct perf_switch_event switch_event; + + /* N.B. caller checks nr_switch_events != 0 */ + + switch_event = (struct perf_switch_event){ + .task = task, + .next_prev = next_prev, + .event_id = { + .header = { + /* .type */ + .misc = sched_in ? 0 : PERF_RECORD_MISC_SWITCH_OUT, + /* .size */ + }, + /* .next_prev_pid */ + /* .next_prev_tid */ + }, + }; + + perf_event_aux(perf_event_switch_output, + &switch_event, + NULL); +} + /* * IRQ throttle logging */ @@ -6083,8 +6235,6 @@ static void perf_log_itrace_start(struct perf_event *event) event->hw.itrace_started) return; - event->hw.itrace_started = 1; - rec.header.type = PERF_RECORD_ITRACE_START; rec.header.misc = 0; rec.header.size = sizeof(rec); @@ -6792,8 +6942,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) if (event->tp_event->prog) return -EEXIST; - if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) - /* bpf programs can only be attached to kprobes */ + if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE)) + /* bpf programs can only be attached to u/kprobes */ return -EINVAL; prog = bpf_prog_get(prog_fd); @@ -7522,6 +7672,10 @@ static void account_event(struct perf_event *event) if (atomic_inc_return(&nr_freq_events) == 1) tick_nohz_full_kick_all(); } + if (event->attr.context_switch) { + atomic_inc(&nr_switch_events); + static_key_slow_inc(&perf_sched_events.key); + } if (has_branch_stack(event)) static_key_slow_inc(&perf_sched_events.key); if (is_cgroup_event(event)) @@ -8617,6 +8771,31 @@ void perf_event_delayed_put(struct task_struct *task) WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); } +struct perf_event *perf_event_get(unsigned int fd) +{ + int err; + struct fd f; + struct perf_event *event; + + err = perf_fget_light(fd, &f); + if (err) + return ERR_PTR(err); + + event = f.file->private_data; + atomic_long_inc(&event->refcount); + fdput(f); + + return event; +} + +const struct perf_event_attr *perf_event_attrs(struct perf_event *event) +{ + if (!event) + return ERR_PTR(-EINVAL); + + return &event->attr; +} + /* * inherit a event from parent task to child task: */ @@ -8915,7 +9094,7 @@ static void perf_event_init_cpu(int cpu) mutex_unlock(&swhash->hlist_mutex); } -#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC +#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE static void __perf_event_exit_context(void *__info) { struct remove_event re = { .detach_group = true };