perf report: Fix -T/--threads option to work again
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
index cb33e4c8821a8d141de07441f3dfc9a364595a4f..e122970361f21af6d07c321480aefa2cb90bf31d 100644 (file)
@@ -52,7 +52,9 @@ struct tp_field {
 #define TP_UINT_FIELD(bits) \
 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 { \
-       return *(u##bits *)(sample->raw_data + field->offset); \
+       u##bits value; \
+       memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
+       return value;  \
 }
 
 TP_UINT_FIELD(8);
@@ -63,7 +65,8 @@ TP_UINT_FIELD(64);
 #define TP_UINT_FIELD__SWAPPED(bits) \
 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
 { \
-       u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
+       u##bits value; \
+       memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
        return bswap_##bits(value);\
 }
 
@@ -1132,6 +1135,8 @@ static struct syscall_fmt *syscall_fmt__find(const char *name)
 
 struct syscall {
        struct event_format *tp_format;
+       int                 nr_args;
+       struct format_field *args;
        const char          *name;
        bool                filtered;
        bool                is_exit;
@@ -1229,6 +1234,10 @@ struct trace {
        const char              *last_vfs_getname;
        struct intlist          *tid_list;
        struct intlist          *pid_list;
+       struct {
+               size_t          nr;
+               pid_t           *entries;
+       }                       filter_pids;
        double                  duration_filter;
        double                  runtime_ms;
        struct {
@@ -1245,6 +1254,7 @@ struct trace {
        bool                    show_comm;
        bool                    show_tool_stats;
        bool                    trace_syscalls;
+       bool                    force;
        int                     trace_pgfaults;
 };
 
@@ -1435,14 +1445,14 @@ static int syscall__set_arg_fmts(struct syscall *sc)
        struct format_field *field;
        int idx = 0;
 
-       sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
+       sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
        if (sc->arg_scnprintf == NULL)
                return -1;
 
        if (sc->fmt)
                sc->arg_parm = sc->fmt->arg_parm;
 
-       for (field = sc->tp_format->format.fields->next; field; field = field->next) {
+       for (field = sc->args; field; field = field->next) {
                if (sc->fmt && sc->fmt->arg_scnprintf[idx])
                        sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
                else if (field->flags & FIELD_IS_POINTER)
@@ -1508,18 +1518,37 @@ static int trace__read_syscall_info(struct trace *trace, int id)
        if (sc->tp_format == NULL)
                return -1;
 
+       sc->args = sc->tp_format->format.fields;
+       sc->nr_args = sc->tp_format->format.nr_fields;
+       /* drop nr field - not relevant here; does not exist on older kernels */
+       if (sc->args && strcmp(sc->args->name, "nr") == 0) {
+               sc->args = sc->args->next;
+               --sc->nr_args;
+       }
+
        sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
 
        return syscall__set_arg_fmts(sc);
 }
 
+/*
+ * args is to be interpreted as a series of longs but we need to handle
+ * 8-byte unaligned accesses. args points to raw_data within the event
+ * and raw_data is guaranteed to be 8-byte unaligned because it is
+ * preceded by raw_size which is a u32. So we need to copy args to a temp
+ * variable to read it. Most notably this avoids extended load instructions
+ * on unaligned addresses
+ */
+
 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
-                                     unsigned long *args, struct trace *trace,
+                                     unsigned char *args, struct trace *trace,
                                      struct thread *thread)
 {
        size_t printed = 0;
+       unsigned char *p;
+       unsigned long val;
 
-       if (sc->tp_format != NULL) {
+       if (sc->args != NULL) {
                struct format_field *field;
                u8 bit = 1;
                struct syscall_arg arg = {
@@ -1529,16 +1558,21 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
                        .thread = thread,
                };
 
-               for (field = sc->tp_format->format.fields->next; field;
+               for (field = sc->args; field;
                     field = field->next, ++arg.idx, bit <<= 1) {
                        if (arg.mask & bit)
                                continue;
+
+                       /* special care for unaligned accesses */
+                       p = args + sizeof(unsigned long) * arg.idx;
+                       memcpy(&val, p, sizeof(val));
+
                        /*
                         * Suppress this argument if its value is zero and
                         * and we don't have a string associated in an
                         * strarray for it.
                         */
-                       if (args[arg.idx] == 0 &&
+                       if (val == 0 &&
                            !(sc->arg_scnprintf &&
                              sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
                              sc->arg_parm[arg.idx]))
@@ -1547,23 +1581,26 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
                        printed += scnprintf(bf + printed, size - printed,
                                             "%s%s: ", printed ? ", " : "", field->name);
                        if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
-                               arg.val = args[arg.idx];
+                               arg.val = val;
                                if (sc->arg_parm)
                                        arg.parm = sc->arg_parm[arg.idx];
                                printed += sc->arg_scnprintf[arg.idx](bf + printed,
                                                                      size - printed, &arg);
                        } else {
                                printed += scnprintf(bf + printed, size - printed,
-                                                    "%ld", args[arg.idx]);
+                                                    "%ld", val);
                        }
                }
        } else {
                int i = 0;
 
                while (i < 6) {
+                       /* special care for unaligned accesses */
+                       p = args + sizeof(unsigned long) * i;
+                       memcpy(&val, p, sizeof(val));
                        printed += scnprintf(bf + printed, size - printed,
                                             "%sarg%d: %ld",
-                                            printed ? ", " : "", i, args[i]);
+                                            printed ? ", " : "", i, val);
                        ++i;
                }
        }
@@ -1698,7 +1735,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
                        return -1;
        }
 
-       printed += trace__printf_interrupted_entry(trace, sample);
+       if (!trace->summary_only)
+               trace__printf_interrupted_entry(trace, sample);
 
        ttrace->entry_time = sample->time;
        msg = ttrace->entry_str;
@@ -1715,7 +1753,10 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
        } else
                ttrace->entry_pending = true;
 
-       trace->current = thread;
+       if (trace->current != thread) {
+               thread__put(trace->current);
+               trace->current = thread__get(thread);
+       }
 
        return 0;
 }
@@ -2088,10 +2129,39 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
        return 0;
 }
 
+static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
+{
+       const u32 type = event->header.type;
+       struct perf_evsel *evsel;
+
+       if (!trace->full_time && trace->base_time == 0)
+               trace->base_time = sample->time;
+
+       if (type != PERF_RECORD_SAMPLE) {
+               trace__process_event(trace, trace->host, event, sample);
+               return;
+       }
+
+       evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
+       if (evsel == NULL) {
+               fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
+               return;
+       }
+
+       if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
+           sample->raw_data == NULL) {
+               fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
+                      perf_evsel__name(evsel), sample->tid,
+                      sample->cpu, sample->raw_size);
+       } else {
+               tracepoint_handler handler = evsel->handler;
+               handler(trace, evsel, event, sample);
+       }
+}
+
 static int trace__run(struct trace *trace, int argc, const char **argv)
 {
        struct perf_evlist *evlist = trace->evlist;
-       struct perf_evsel *evsel;
        int err = -1, i;
        unsigned long before;
        const bool forks = argc > 0;
@@ -2157,17 +2227,25 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
         * workload was, and in that case we will fill in the thread_map when
         * we fork the workload in perf_evlist__prepare_workload.
         */
-       if (evlist->threads->map[0] == -1)
-               perf_evlist__set_filter_pid(evlist, getpid());
+       if (trace->filter_pids.nr > 0)
+               err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
+       else if (evlist->threads->map[0] == -1)
+               err = perf_evlist__set_filter_pid(evlist, getpid());
+
+       if (err < 0) {
+               printf("err=%d,%s\n", -err, strerror(-err));
+               exit(1);
+       }
 
        err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
        if (err < 0)
                goto out_error_mmap;
 
+       if (!target__none(&trace->opts.target))
+               perf_evlist__enable(evlist);
+
        if (forks)
                perf_evlist__start_workload(evlist);
-       else
-               perf_evlist__enable(evlist);
 
        trace->multiple_threads = evlist->threads->map[0] == -1 ||
                                  evlist->threads->nr > 1 ||
@@ -2179,8 +2257,6 @@ again:
                union perf_event *event;
 
                while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
-                       const u32 type = event->header.type;
-                       tracepoint_handler handler;
                        struct perf_sample sample;
 
                        ++trace->nr_events;
@@ -2191,35 +2267,17 @@ again:
                                goto next_event;
                        }
 
-                       if (!trace->full_time && trace->base_time == 0)
-                               trace->base_time = sample.time;
-
-                       if (type != PERF_RECORD_SAMPLE) {
-                               trace__process_event(trace, trace->host, event, &sample);
-                               continue;
-                       }
-
-                       evsel = perf_evlist__id2evsel(evlist, sample.id);
-                       if (evsel == NULL) {
-                               fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
-                               goto next_event;
-                       }
-
-                       if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
-                           sample.raw_data == NULL) {
-                               fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
-                                      perf_evsel__name(evsel), sample.tid,
-                                      sample.cpu, sample.raw_size);
-                               goto next_event;
-                       }
-
-                       handler = evsel->handler;
-                       handler(trace, evsel, event, &sample);
+                       trace__handle_event(trace, event, &sample);
 next_event:
                        perf_evlist__mmap_consume(evlist, i);
 
                        if (interrupted)
                                goto out_disable;
+
+                       if (done && !draining) {
+                               perf_evlist__disable(evlist);
+                               draining = true;
+                       }
                }
        }
 
@@ -2237,6 +2295,8 @@ next_event:
        }
 
 out_disable:
+       thread__zput(trace->current);
+
        perf_evlist__disable(evlist);
 
        if (!err) {
@@ -2292,6 +2352,7 @@ static int trace__replay(struct trace *trace)
        struct perf_data_file file = {
                .path  = input_name,
                .mode  = PERF_DATA_MODE_READ,
+               .force = trace->force,
        };
        struct perf_session *session;
        struct perf_evsel *evsel;
@@ -2366,7 +2427,7 @@ static int trace__replay(struct trace *trace)
 
        setup_pager();
 
-       err = perf_session__process_events(session, &trace->tool);
+       err = perf_session__process_events(session);
        if (err)
                pr_err("Failed to process events, error %d", err);
 
@@ -2491,6 +2552,38 @@ static int trace__set_duration(const struct option *opt, const char *str,
        return 0;
 }
 
+static int trace__set_filter_pids(const struct option *opt, const char *str,
+                                 int unset __maybe_unused)
+{
+       int ret = -1;
+       size_t i;
+       struct trace *trace = opt->value;
+       /*
+        * FIXME: introduce a intarray class, plain parse csv and create a
+        * { int nr, int entries[] } struct...
+        */
+       struct intlist *list = intlist__new(str);
+
+       if (list == NULL)
+               return -1;
+
+       i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
+       trace->filter_pids.entries = calloc(i, sizeof(pid_t));
+
+       if (trace->filter_pids.entries == NULL)
+               goto out;
+
+       trace->filter_pids.entries[0] = getpid();
+
+       for (i = 1; i < trace->filter_pids.nr; ++i)
+               trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
+
+       intlist__delete(list);
+       ret = 0;
+out:
+       return ret;
+}
+
 static int trace__open_output(struct trace *trace, const char *filename)
 {
        struct stat st;
@@ -2535,7 +2628,7 @@ static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
 
 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-       const char * const trace_usage[] = {
+       const char *trace_usage[] = {
                "perf trace [<options>] [<command>]",
                "perf trace [<options>] -- <command> [<options>]",
                "perf trace record [<options>] [<command>]",
@@ -2581,6 +2674,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                    "trace events on existing process id"),
        OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
                    "trace events on existing thread id"),
+       OPT_CALLBACK(0, "filter-pids", &trace, "float",
+                    "show only events with duration > N.M ms", trace__set_filter_pids),
        OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
                    "system-wide collection from all CPUs"),
        OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
@@ -2606,11 +2701,16 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
                     "Trace pagefaults", parse_pagefaults, "maj"),
        OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
+       OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
        OPT_END()
        };
+       const char * const trace_subcommands[] = { "record", NULL };
        int err;
        char bf[BUFSIZ];
 
+       signal(SIGSEGV, sighandler_dump_stack);
+       signal(SIGFPE, sighandler_dump_stack);
+
        trace.evlist = perf_evlist__new();
        if (trace.evlist == NULL)
                return -ENOMEM;
@@ -2620,8 +2720,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                goto out;
        }
 
-       argc = parse_options(argc, argv, trace_options, trace_usage,
-                            PARSE_OPT_STOP_AT_NON_OPTION);
+       argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
+                                trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
 
        if (trace.trace_pgfaults) {
                trace.opts.sample_address = true;