Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorIngo Molnar <mingo@kernel.org>
Fri, 7 Aug 2015 07:11:30 +0000 (09:11 +0200)
committerIngo Molnar <mingo@kernel.org>
Fri, 7 Aug 2015 07:11:30 +0000 (09:11 +0200)
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

  - IPC and cycle accounting in 'perf annotate'. (Andi Kleen)

  - Display cycles in branch sort mode in 'perf report'. (Andi Kleen)

  - Add total time column to 'perf trace' syscall stats summary. (Milian Woff)

Infrastructure changes:

  - PMU helpers to use in Intel PT. (Adrian Hunter)

  - Fix perf-with-kcore script not to split args with spaces. (Adrian Hunter)

  - Add empty Build files for some more architectures. (Ben Hutchings)

  - Move 'perf stat' config variables to a struct to allow using some
    of its functions in more places. (Jiri Olsa)

  - Add DWARF register names for 'xtensa' arch. (Max Filippov)

  - Implement BPF programs attached to uprobes. (Wang Nan)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
39 files changed:
include/linux/trace_events.h
kernel/events/core.c
kernel/trace/Kconfig
kernel/trace/trace_uprobe.c
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-top.txt
tools/perf/arch/alpha/Build [new file with mode: 0644]
tools/perf/arch/mips/Build [new file with mode: 0644]
tools/perf/arch/parisc/Build [new file with mode: 0644]
tools/perf/arch/xtensa/Build [new file with mode: 0644]
tools/perf/arch/xtensa/Makefile [new file with mode: 0644]
tools/perf/arch/xtensa/util/Build [new file with mode: 0644]
tools/perf/arch/xtensa/util/dwarf-regs.c [new file with mode: 0644]
tools/perf/builtin-annotate.c
tools/perf/builtin-report.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/perf-with-kcore.sh
tools/perf/ui/browsers/annotate.c
tools/perf/util/annotate.c
tools/perf/util/annotate.h
tools/perf/util/auxtrace.c
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/event.h
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/parse-events.c
tools/perf/util/pmu.c
tools/perf/util/pmu.h
tools/perf/util/probe-event.h
tools/perf/util/session.c
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/stat.c
tools/perf/util/stat.h

index 1063c850dbab695046d973c5e91a9af499d15fee..ed27917cabc9d95ff3b30b115f1f661626edba17 100644 (file)
@@ -243,6 +243,7 @@ enum {
        TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
        TRACE_EVENT_FL_TRACEPOINT_BIT,
        TRACE_EVENT_FL_KPROBE_BIT,
+       TRACE_EVENT_FL_UPROBE_BIT,
 };
 
 /*
@@ -257,6 +258,7 @@ enum {
  *  USE_CALL_FILTER - For trace internal events, don't use file filter
  *  TRACEPOINT    - Event is a tracepoint
  *  KPROBE        - Event is a kprobe
+ *  UPROBE        - Event is a uprobe
  */
 enum {
        TRACE_EVENT_FL_FILTERED         = (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -267,8 +269,11 @@ enum {
        TRACE_EVENT_FL_USE_CALL_FILTER  = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
        TRACE_EVENT_FL_TRACEPOINT       = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
        TRACE_EVENT_FL_KPROBE           = (1 << TRACE_EVENT_FL_KPROBE_BIT),
+       TRACE_EVENT_FL_UPROBE           = (1 << TRACE_EVENT_FL_UPROBE_BIT),
 };
 
+#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
+
 struct trace_event_call {
        struct list_head        list;
        struct trace_event_class *class;
@@ -542,7 +547,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file,
                event_triggers_post_call(file, tt);
 }
 
-#ifdef CONFIG_BPF_SYSCALL
+#ifdef CONFIG_BPF_EVENTS
 unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
 #else
 static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
index bdea12924b111faa2647038e93b051e365887c16..77f9e5d0e2d17d4b11ce0209311f4fe783a07db3 100644 (file)
@@ -6846,8 +6846,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
        if (event->tp_event->prog)
                return -EEXIST;
 
-       if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
-               /* bpf programs can only be attached to kprobes */
+       if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE))
+               /* bpf programs can only be attached to u/kprobes */
                return -EINVAL;
 
        prog = bpf_prog_get(prog_fd);
index 3b9a48ae153ac85c5f601b03ef64c7e9d49f153c..1153c43428f3b51d43aba2e8df21950d9029ab04 100644 (file)
@@ -434,7 +434,7 @@ config UPROBE_EVENT
 
 config BPF_EVENTS
        depends on BPF_SYSCALL
-       depends on KPROBE_EVENT
+       depends on KPROBE_EVENT || UPROBE_EVENT
        bool
        default y
        help
index aa1ea7b36fa889877bb790831a01a8d14b472221..f97479f1ce3523783df853d4fd0f2c76ba4ae129 100644 (file)
@@ -1095,11 +1095,15 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
 {
        struct trace_event_call *call = &tu->tp.call;
        struct uprobe_trace_entry_head *entry;
+       struct bpf_prog *prog = call->prog;
        struct hlist_head *head;
        void *data;
        int size, esize;
        int rctx;
 
+       if (prog && !trace_call_bpf(prog, regs))
+               return;
+
        esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
 
        size = esize + tu->tp.size + dsize;
@@ -1289,6 +1293,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)
                return -ENODEV;
        }
 
+       call->flags = TRACE_EVENT_FL_UPROBE;
        call->class->reg = trace_uprobe_register;
        call->data = tu;
        ret = trace_add_event_call(call);
index c33b69f3374fda01b6a9ee7dcca6b7bbb2f74e8c..960da203ec11143ea529bfbf80b0fab0503322d5 100644 (file)
@@ -109,6 +109,7 @@ OPTIONS
        - mispredict: "N" for predicted branch, "Y" for mispredicted branch
        - in_tx: branch in TSX transaction
        - abort: TSX transaction abort.
+       - cycles: Cycles in basic block
 
        And default sort keys are changed to comm, dso_from, symbol_from, dso_to
        and symbol_to, see '--branch-stack'.
index 776aec4d092771ed8ea7c68c7ce205d0b7578aaa..f6a23eb294e77ad2f364f7275d4916336ca60809 100644 (file)
@@ -208,6 +208,27 @@ Default is to monitor all CPUS.
        This option sets the time out limit. The default value is 500 ms.
 
 
+-b::
+--branch-any::
+       Enable taken branch stack sampling. Any type of taken branch may be sampled.
+       This is a shortcut for --branch-filter any. See --branch-filter for more infos.
+
+-j::
+--branch-filter::
+       Enable taken branch stack sampling. Each sample captures a series of consecutive
+       taken branches. The number of branches captured with each sample depends on the
+       underlying hardware, the type of branches of interest, and the executed code.
+       It is possible to select the types of branches captured by enabling filters.
+       For a full list of modifiers please see the perf record manpage.
+
+       The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
+       The privilege levels may be omitted, in which case, the privilege levels of the associated
+       event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
+       levels are subject to permissions.  When sampling on multiple events, branch stack sampling
+       is enabled for all the sampling events. The sampled branch type is the same for all events.
+       The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
+       Note that this feature may not be available on all processors.
+
 INTERACTIVE PROMPTING KEYS
 --------------------------
 
diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build
new file mode 100644 (file)
index 0000000..1bb8bf6
--- /dev/null
@@ -0,0 +1 @@
+# empty
diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build
new file mode 100644 (file)
index 0000000..1bb8bf6
--- /dev/null
@@ -0,0 +1 @@
+# empty
diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build
new file mode 100644 (file)
index 0000000..1bb8bf6
--- /dev/null
@@ -0,0 +1 @@
+# empty
diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build
new file mode 100644 (file)
index 0000000..54afe4a
--- /dev/null
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile
new file mode 100644 (file)
index 0000000..7fbca17
--- /dev/null
@@ -0,0 +1,3 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build
new file mode 100644 (file)
index 0000000..954e287
--- /dev/null
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c
new file mode 100644 (file)
index 0000000..4dba76b
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (c) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define XTENSA_MAX_REGS 16
+
+const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
+       "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+       "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+       return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
+}
index 2c1bec39c30ea191fecb46654df14dc6229d27d5..467a23b14e2f9a2cb745bdb1d44cf22350fd66e6 100644 (file)
@@ -187,6 +187,7 @@ find_next:
                         * symbol, free he->ms.sym->src to signal we already
                         * processed this symbol.
                         */
+                       zfree(&notes->src->cycles_hist);
                        zfree(&notes->src);
                }
        }
index 95a47719aec302318defcb8de17fd8789012b819..3a9d1b659fcd353a1b4df9222c13b83cda045efb 100644 (file)
@@ -53,6 +53,7 @@ struct report {
        bool                    mem_mode;
        bool                    header;
        bool                    header_only;
+       bool                    nonany_branch_mode;
        int                     max_stack;
        struct perf_read_values show_threads_values;
        const char              *pretty_printing_style;
@@ -102,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
        if (!ui__has_annotation())
                return 0;
 
+       hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
+                            rep->nonany_branch_mode);
+
        if (sort__mode == SORT_MODE__BRANCH) {
                bi = he->branch_info;
                err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
@@ -258,6 +262,12 @@ static int report__setup_sample_type(struct report *rep)
                else
                        callchain_param.record_mode = CALLCHAIN_FP;
        }
+
+       /* ??? handle more cases than just ANY? */
+       if (!(perf_evlist__combined_branch_type(session->evlist) &
+                               PERF_SAMPLE_BRANCH_ANY))
+               rep->nonany_branch_mode = true;
+
        return 0;
 }
 
index d99d850e1444c9751cdbf7e0552b314e26597fef..a054ddc0b2a013aca7023ee9cfb2b3840f4827d5 100644 (file)
@@ -101,8 +101,6 @@ static struct target target = {
 
 static int                     run_count                       =  1;
 static bool                    no_inherit                      = false;
-static bool                    scale                           =  true;
-static enum aggr_mode          aggr_mode                       = AGGR_GLOBAL;
 static volatile pid_t          child_pid                       = -1;
 static bool                    null_run                        =  false;
 static int                     detailed_run                    =  0;
@@ -112,11 +110,9 @@ static int                 big_num_opt                     =  -1;
 static const char              *csv_sep                        = NULL;
 static bool                    csv_output                      = false;
 static bool                    group                           = false;
-static FILE                    *output                         = NULL;
 static const char              *pre_cmd                        = NULL;
 static const char              *post_cmd                       = NULL;
 static bool                    sync_run                        = false;
-static unsigned int            interval                        = 0;
 static unsigned int            initial_delay                   = 0;
 static unsigned int            unit_width                      = 4; /* strlen("unit") */
 static bool                    forever                         = false;
@@ -126,6 +122,11 @@ static int                 (*aggr_get_id)(struct cpu_map *m, int cpu);
 
 static volatile int done = 0;
 
+static struct perf_stat_config stat_config = {
+       .aggr_mode      = AGGR_GLOBAL,
+       .scale          = true,
+};
+
 static inline void diff_timespec(struct timespec *r, struct timespec *a,
                                 struct timespec *b)
 {
@@ -148,7 +149,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
 {
        struct perf_event_attr *attr = &evsel->attr;
 
-       if (scale)
+       if (stat_config.scale)
                attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
                                    PERF_FORMAT_TOTAL_TIME_RUNNING;
 
@@ -178,142 +179,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
        return 0;
 }
 
-static void zero_per_pkg(struct perf_evsel *counter)
-{
-       if (counter->per_pkg_mask)
-               memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
-}
-
-static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
-{
-       unsigned long *mask = counter->per_pkg_mask;
-       struct cpu_map *cpus = perf_evsel__cpus(counter);
-       int s;
-
-       *skip = false;
-
-       if (!counter->per_pkg)
-               return 0;
-
-       if (cpu_map__empty(cpus))
-               return 0;
-
-       if (!mask) {
-               mask = zalloc(MAX_NR_CPUS);
-               if (!mask)
-                       return -ENOMEM;
-
-               counter->per_pkg_mask = mask;
-       }
-
-       s = cpu_map__get_socket(cpus, cpu);
-       if (s < 0)
-               return -1;
-
-       *skip = test_and_set_bit(s, mask) == 1;
-       return 0;
-}
-
-static int
-process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
-                      struct perf_counts_values *count)
-{
-       struct perf_counts_values *aggr = &evsel->counts->aggr;
-       static struct perf_counts_values zero;
-       bool skip = false;
-
-       if (check_per_pkg(evsel, cpu, &skip)) {
-               pr_err("failed to read per-pkg counter\n");
-               return -1;
-       }
-
-       if (skip)
-               count = &zero;
-
-       switch (aggr_mode) {
-       case AGGR_THREAD:
-       case AGGR_CORE:
-       case AGGR_SOCKET:
-       case AGGR_NONE:
-               if (!evsel->snapshot)
-                       perf_evsel__compute_deltas(evsel, cpu, thread, count);
-               perf_counts_values__scale(count, scale, NULL);
-               if (aggr_mode == AGGR_NONE)
-                       perf_stat__update_shadow_stats(evsel, count->values, cpu);
-               break;
-       case AGGR_GLOBAL:
-               aggr->val += count->val;
-               if (scale) {
-                       aggr->ena += count->ena;
-                       aggr->run += count->run;
-               }
-       default:
-               break;
-       }
-
-       return 0;
-}
-
-static int process_counter_maps(struct perf_evsel *counter)
-{
-       int nthreads = thread_map__nr(counter->threads);
-       int ncpus = perf_evsel__nr_cpus(counter);
-       int cpu, thread;
-
-       if (counter->system_wide)
-               nthreads = 1;
-
-       for (thread = 0; thread < nthreads; thread++) {
-               for (cpu = 0; cpu < ncpus; cpu++) {
-                       if (process_counter_values(counter, cpu, thread,
-                                                  perf_counts(counter->counts, cpu, thread)))
-                               return -1;
-               }
-       }
-
-       return 0;
-}
-
-static int process_counter(struct perf_evsel *counter)
-{
-       struct perf_counts_values *aggr = &counter->counts->aggr;
-       struct perf_stat *ps = counter->priv;
-       u64 *count = counter->counts->aggr.values;
-       int i, ret;
-
-       aggr->val = aggr->ena = aggr->run = 0;
-       init_stats(ps->res_stats);
-
-       if (counter->per_pkg)
-               zero_per_pkg(counter);
-
-       ret = process_counter_maps(counter);
-       if (ret)
-               return ret;
-
-       if (aggr_mode != AGGR_GLOBAL)
-               return 0;
-
-       if (!counter->snapshot)
-               perf_evsel__compute_deltas(counter, -1, -1, aggr);
-       perf_counts_values__scale(aggr, scale, &counter->counts->scaled);
-
-       for (i = 0; i < 3; i++)
-               update_stats(&ps->res_stats[i], count[i]);
-
-       if (verbose) {
-               fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-                       perf_evsel__name(counter), count[0], count[1], count[2]);
-       }
-
-       /*
-        * Save the full runtime - to allow normalization during printout:
-        */
-       perf_stat__update_shadow_stats(counter, count, 0);
-
-       return 0;
-}
-
 /*
  * Read out the results of a single counter:
  * do not aggregate counts across CPUs in system-wide mode
@@ -351,7 +216,7 @@ static void read_counters(bool close_counters)
                if (read_counter(counter))
                        pr_warning("failed to read counter %s\n", counter->name);
 
-               if (process_counter(counter))
+               if (perf_stat_process_counter(&stat_config, counter))
                        pr_warning("failed to process counter %s\n", counter->name);
 
                if (close_counters) {
@@ -402,6 +267,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
 
 static int __run_perf_stat(int argc, const char **argv)
 {
+       int interval = stat_config.interval;
        char msg[512];
        unsigned long long t0, t1;
        struct perf_evsel *counter;
@@ -545,13 +411,13 @@ static int run_perf_stat(int argc, const char **argv)
 static void print_running(u64 run, u64 ena)
 {
        if (csv_output) {
-               fprintf(output, "%s%" PRIu64 "%s%.2f",
+               fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
                                        csv_sep,
                                        run,
                                        csv_sep,
                                        ena ? 100.0 * run / ena : 100.0);
        } else if (run != ena) {
-               fprintf(output, "  (%.2f%%)", 100.0 * run / ena);
+               fprintf(stat_config.output, "  (%.2f%%)", 100.0 * run / ena);
        }
 }
 
@@ -560,9 +426,9 @@ static void print_noise_pct(double total, double avg)
        double pct = rel_stddev_stats(total, avg);
 
        if (csv_output)
-               fprintf(output, "%s%.2f%%", csv_sep, pct);
+               fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
        else if (pct)
-               fprintf(output, "  ( +-%6.2f%% )", pct);
+               fprintf(stat_config.output, "  ( +-%6.2f%% )", pct);
 }
 
 static void print_noise(struct perf_evsel *evsel, double avg)
@@ -578,9 +444,9 @@ static void print_noise(struct perf_evsel *evsel, double avg)
 
 static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 {
-       switch (aggr_mode) {
+       switch (stat_config.aggr_mode) {
        case AGGR_CORE:
-               fprintf(output, "S%d-C%*d%s%*d%s",
+               fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
                        cpu_map__id_to_socket(id),
                        csv_output ? 0 : -8,
                        cpu_map__id_to_cpu(id),
@@ -590,7 +456,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
                        csv_sep);
                break;
        case AGGR_SOCKET:
-               fprintf(output, "S%*d%s%*d%s",
+               fprintf(stat_config.output, "S%*d%s%*d%s",
                        csv_output ? 0 : -5,
                        id,
                        csv_sep,
@@ -599,12 +465,12 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
                        csv_sep);
                        break;
        case AGGR_NONE:
-               fprintf(output, "CPU%*d%s",
+               fprintf(stat_config.output, "CPU%*d%s",
                        csv_output ? 0 : -4,
                        perf_evsel__cpus(evsel)->map[id], csv_sep);
                break;
        case AGGR_THREAD:
-               fprintf(output, "%*s-%*d%s",
+               fprintf(stat_config.output, "%*s-%*d%s",
                        csv_output ? 0 : 16,
                        thread_map__comm(evsel->threads, id),
                        csv_output ? 0 : -8,
@@ -619,6 +485,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
+       FILE *output = stat_config.output;
        double msecs = avg / 1e6;
        const char *fmt_v, *fmt_n;
        char name[25];
@@ -643,7 +510,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
        if (evsel->cgrp)
                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 
-       if (csv_output || interval)
+       if (csv_output || stat_config.interval)
                return;
 
        if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
@@ -655,6 +522,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
+       FILE *output = stat_config.output;
        double sc =  evsel->scale;
        const char *fmt;
        int cpu = cpu_map__id_to_cpu(id);
@@ -670,7 +538,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 
        aggr_printout(evsel, id, nr);
 
-       if (aggr_mode == AGGR_GLOBAL)
+       if (stat_config.aggr_mode == AGGR_GLOBAL)
                cpu = 0;
 
        fprintf(output, fmt, avg, csv_sep);
@@ -685,14 +553,16 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
        if (evsel->cgrp)
                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 
-       if (csv_output || interval)
+       if (csv_output || stat_config.interval)
                return;
 
-       perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode);
+       perf_stat__print_shadow_stats(output, evsel, avg, cpu,
+                                     stat_config.aggr_mode);
 }
 
 static void print_aggr(char *prefix)
 {
+       FILE *output = stat_config.output;
        struct perf_evsel *counter;
        int cpu, cpu2, s, s2, id, nr;
        double uval;
@@ -761,6 +631,7 @@ static void print_aggr(char *prefix)
 
 static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
 {
+       FILE *output = stat_config.output;
        int nthreads = thread_map__nr(counter->threads);
        int ncpus = cpu_map__nr(counter->cpus);
        int cpu, thread;
@@ -799,6 +670,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
  */
 static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 {
+       FILE *output = stat_config.output;
        struct perf_stat *ps = counter->priv;
        double avg = avg_stats(&ps->res_stats[0]);
        int scaled = counter->counts->scaled;
@@ -850,6 +722,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
  */
 static void print_counter(struct perf_evsel *counter, char *prefix)
 {
+       FILE *output = stat_config.output;
        u64 ena, run, val;
        double uval;
        int cpu;
@@ -904,12 +777,13 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
 
 static void print_interval(char *prefix, struct timespec *ts)
 {
+       FILE *output = stat_config.output;
        static int num_print_interval;
 
        sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
 
        if (num_print_interval == 0 && !csv_output) {
-               switch (aggr_mode) {
+               switch (stat_config.aggr_mode) {
                case AGGR_SOCKET:
                        fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
                        break;
@@ -934,6 +808,7 @@ static void print_interval(char *prefix, struct timespec *ts)
 
 static void print_header(int argc, const char **argv)
 {
+       FILE *output = stat_config.output;
        int i;
 
        fflush(stdout);
@@ -963,6 +838,8 @@ static void print_header(int argc, const char **argv)
 
 static void print_footer(void)
 {
+       FILE *output = stat_config.output;
+
        if (!null_run)
                fprintf(output, "\n");
        fprintf(output, " %17.9f seconds time elapsed",
@@ -977,6 +854,7 @@ static void print_footer(void)
 
 static void print_counters(struct timespec *ts, int argc, const char **argv)
 {
+       int interval = stat_config.interval;
        struct perf_evsel *counter;
        char buf[64], *prefix = NULL;
 
@@ -985,7 +863,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
        else
                print_header(argc, argv);
 
-       switch (aggr_mode) {
+       switch (stat_config.aggr_mode) {
        case AGGR_CORE:
        case AGGR_SOCKET:
                print_aggr(prefix);
@@ -1009,14 +887,14 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
        if (!interval && !csv_output)
                print_footer();
 
-       fflush(output);
+       fflush(stat_config.output);
 }
 
 static volatile int signr = -1;
 
 static void skip_signal(int signo)
 {
-       if ((child_pid == -1) || interval)
+       if ((child_pid == -1) || stat_config.interval)
                done = 1;
 
        signr = signo;
@@ -1064,7 +942,7 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
 
 static int perf_stat_init_aggr_mode(void)
 {
-       switch (aggr_mode) {
+       switch (stat_config.aggr_mode) {
        case AGGR_SOCKET:
                if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
                        perror("cannot build socket map");
@@ -1270,7 +1148,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
                    "system-wide collection from all CPUs"),
        OPT_BOOLEAN('g', "group", &group,
                    "put the counters into a counter group"),
-       OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
+       OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
        OPT_INCR('v', "verbose", &verbose,
                    "be more verbose (show counter open errors, etc)"),
        OPT_INTEGER('r', "repeat", &run_count,
@@ -1286,7 +1164,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
                           stat__set_big_num),
        OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
                    "list of cpus to monitor in system-wide"),
-       OPT_SET_UINT('A', "no-aggr", &aggr_mode,
+       OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
                    "disable CPU count aggregation", AGGR_NONE),
        OPT_STRING('x', "field-separator", &csv_sep, "separator",
                   "print counts with custom separator"),
@@ -1300,13 +1178,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
                        "command to run prior to the measured command"),
        OPT_STRING(0, "post", &post_cmd, "command",
                        "command to run after to the measured command"),
-       OPT_UINTEGER('I', "interval-print", &interval,
+       OPT_UINTEGER('I', "interval-print", &stat_config.interval,
                    "print counts at regular interval in ms (>= 100)"),
-       OPT_SET_UINT(0, "per-socket", &aggr_mode,
+       OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
                     "aggregate counts per processor socket", AGGR_SOCKET),
-       OPT_SET_UINT(0, "per-core", &aggr_mode,
+       OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
                     "aggregate counts per physical processor core", AGGR_CORE),
-       OPT_SET_UINT(0, "per-thread", &aggr_mode,
+       OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
                     "aggregate counts per thread", AGGR_THREAD),
        OPT_UINTEGER('D', "delay", &initial_delay,
                     "ms to wait before starting measurement after program start"),
@@ -1318,6 +1196,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
        };
        int status = -EINVAL, run_idx;
        const char *mode;
+       FILE *output = stderr;
+       unsigned int interval;
 
        setlocale(LC_ALL, "");
 
@@ -1328,7 +1208,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
        argc = parse_options(argc, argv, options, stat_usage,
                PARSE_OPT_STOP_AT_NON_OPTION);
 
-       output = stderr;
+       interval = stat_config.interval;
+
        if (output_name && strcmp(output_name, "-"))
                output = NULL;
 
@@ -1365,6 +1246,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
                }
        }
 
+       stat_config.output = output;
+
        if (csv_sep) {
                csv_output = true;
                if (!strcmp(csv_sep, "\\t"))
@@ -1399,7 +1282,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
                run_count = 1;
        }
 
-       if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
+       if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
                fprintf(stderr, "The --per-thread option is only available "
                        "when monitoring via -p -t options.\n");
                parse_options_usage(NULL, options, "p", 1);
@@ -1411,7 +1294,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
         * no_aggr, cgroup are for system-wide only
         * --per-thread is aggregated per thread, we dont mix it with cpu mode
         */
-       if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) &&
+       if (((stat_config.aggr_mode != AGGR_GLOBAL &&
+             stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
            !target__has_cpu(&target)) {
                fprintf(stderr, "both cgroup and no-aggregation "
                        "modes only available in system-wide mode\n");
@@ -1444,7 +1328,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
         * Initialize thread_map with comm names,
         * so we could print it out on output.
         */
-       if (aggr_mode == AGGR_THREAD)
+       if (stat_config.aggr_mode == AGGR_THREAD)
                thread_map__read_comms(evsel_list->threads);
 
        if (interval && interval < 100) {
index ecf319728f25d649768e33b3e1f274d04432f3fc..bfe24f1e362f4bd40d4c583e2f025b85a05c2005 100644 (file)
@@ -40,6 +40,7 @@
 #include "util/xyarray.h"
 #include "util/sort.h"
 #include "util/intlist.h"
+#include "util/parse-branch-options.h"
 #include "arch/common.h"
 
 #include "util/debug.h"
@@ -695,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
                perf_top__record_precise_ip(top, he, evsel->idx, ip);
        }
 
+       hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
+                    !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
        return 0;
 }
 
@@ -1171,6 +1174,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                   "don't try to adjust column width, use these fixed values"),
        OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
                        "per thread proc mmap processing timeout in ms"),
+       OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
+                    "branch any", "sample any taken branches",
+                    parse_branch_stack),
+       OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
+                    "branch filter mask", "branch stack filter modes",
+                    parse_branch_stack),
        OPT_END()
        };
        const char * const top_usage[] = {
index a47497011c93e7ad305d6c0891562e2c6e4aa386..a25048c85b76f2e5e5c7f9a1513a405796957af2 100644 (file)
@@ -2773,9 +2773,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
 
        printed += fprintf(fp, "\n");
 
-       printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
-       printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
-       printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
+       printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
+       printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
+       printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
 
        /* each int_node is a syscall */
        while (inode) {
@@ -2792,8 +2792,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
 
                        sc = &trace->syscalls.table[inode->i];
                        printed += fprintf(fp, "   %-15s", sc->name);
-                       printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
-                                          n, min, avg);
+                       printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
+                                          n, avg * n, min, avg);
                        printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
                }
 
index c7ff90a90e4eb898b18177390220662d8c4a34e0..7e47a7cbc1950eff7b513159db41cf63a23d4fb1 100644 (file)
@@ -50,7 +50,7 @@ copy_kcore()
        fi
 
        rm -f perf.data.junk
-       ("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null &
+       ("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null &
        PERF_PID=$!
 
        # Need to make sure that perf has started
@@ -160,18 +160,18 @@ record()
                        echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2
                fi
 
-               if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
+               if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
                        echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2
                fi
 
-               if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
+               if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
                        if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then
                                echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2
                        fi
 
-                       if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
+                       if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
                                true
-                       elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
+                       elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
                                true
                        elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then
                                echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2
@@ -193,8 +193,8 @@ record()
 
        mkdir "$PERF_DATA_DIR"
 
-       echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*"
-       "$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true
+       echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@"
+       "$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true
 
        if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then
                exit 1
@@ -209,8 +209,8 @@ subcommand()
 {
        find_perf
        check_buildid_cache_permissions
-       echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*"
-       "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $*
+       echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@"
+       "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@"
 }
 
 if [ "$1" = "fix_buildid_cache_permissions" ] ; then
@@ -234,7 +234,7 @@ fi
 case "$PERF_SUB_COMMAND" in
 "record")
        while [ "$1" != "--" ] ; do
-               PERF_OPTIONS+="$1 "
+               PERF_OPTIONS+=("$1")
                shift || break
        done
        if [ "$1" != "--" ] ; then
@@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in
                usage
        fi
        shift
-       record $*
+       record "$@"
 ;;
 "script")
-       subcommand $*
+       subcommand "$@"
 ;;
 "report")
-       subcommand $*
+       subcommand "$@"
 ;;
 "inject")
-       subcommand $*
+       subcommand "$@"
 ;;
 *)
        usage
index 5995a8bd7c6971dc4300f9ecc508135645420e7e..b5fc847f9660898fcf305567ebd1c8750b1f161d 100644 (file)
@@ -16,6 +16,9 @@ struct disasm_line_samples {
        u64             nr;
 };
 
+#define IPC_WIDTH 6
+#define CYCLES_WIDTH 6
+
 struct browser_disasm_line {
        struct rb_node                  rb_node;
        u32                             idx;
@@ -53,6 +56,7 @@ struct annotate_browser {
        int                 max_jump_sources;
        int                 nr_jumps;
        bool                searching_backwards;
+       bool                have_cycles;
        u8                  addr_width;
        u8                  jumps_width;
        u8                  target_width;
@@ -96,6 +100,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br
         return ui_browser__set_color(&browser->b, color);
 }
 
+static int annotate_browser__pcnt_width(struct annotate_browser *ab)
+{
+       int w = 7 * ab->nr_events;
+
+       if (ab->have_cycles)
+               w += IPC_WIDTH + CYCLES_WIDTH;
+       return w;
+}
+
 static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
 {
        struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
@@ -106,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
                             (!current_entry || (browser->use_navkeypressed &&
                                                 !browser->navkeypressed)));
        int width = browser->width, printed;
-       int i, pcnt_width = 7 * ab->nr_events;
+       int i, pcnt_width = annotate_browser__pcnt_width(ab);
        double percent_max = 0.0;
        char bf[256];
 
@@ -116,19 +129,34 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
        }
 
        if (dl->offset != -1 && percent_max != 0.0) {
-               for (i = 0; i < ab->nr_events; i++) {
-                       ui_browser__set_percent_color(browser,
-                                                     bdl->samples[i].percent,
-                                                     current_entry);
-                       if (annotate_browser__opts.show_total_period)
-                               slsmg_printf("%6" PRIu64 " ",
-                                            bdl->samples[i].nr);
-                       else
-                               slsmg_printf("%6.2f ", bdl->samples[i].percent);
+               if (percent_max != 0.0) {
+                       for (i = 0; i < ab->nr_events; i++) {
+                               ui_browser__set_percent_color(browser,
+                                                       bdl->samples[i].percent,
+                                                       current_entry);
+                               if (annotate_browser__opts.show_total_period)
+                                       slsmg_printf("%6" PRIu64 " ",
+                                                    bdl->samples[i].nr);
+                               else
+                                       slsmg_printf("%6.2f ", bdl->samples[i].percent);
+                       }
+               } else {
+                       slsmg_write_nstring(" ", 7 * ab->nr_events);
                }
        } else {
                ui_browser__set_percent_color(browser, 0, current_entry);
-               slsmg_write_nstring(" ", pcnt_width);
+               slsmg_write_nstring(" ", 7 * ab->nr_events);
+       }
+       if (ab->have_cycles) {
+               if (dl->ipc)
+                       slsmg_printf("%*.2f ", IPC_WIDTH - 1, dl->ipc);
+               else
+                       slsmg_write_nstring(" ", IPC_WIDTH);
+               if (dl->cycles)
+                       slsmg_printf("%*" PRIu64 " ",
+                                    CYCLES_WIDTH - 1, dl->cycles);
+               else
+                       slsmg_write_nstring(" ", CYCLES_WIDTH);
        }
 
        SLsmg_write_char(' ');
@@ -231,7 +259,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
        unsigned int from, to;
        struct map_symbol *ms = ab->b.priv;
        struct symbol *sym = ms->sym;
-       u8 pcnt_width = 7;
+       u8 pcnt_width = annotate_browser__pcnt_width(ab);
 
        /* PLT symbols contain external offsets */
        if (strstr(sym->name, "@plt"))
@@ -255,8 +283,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
                to = (u64)btarget->idx;
        }
 
-       pcnt_width *= ab->nr_events;
-
        ui_browser__set_color(browser, HE_COLORSET_CODE);
        __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
                                 from, to);
@@ -266,9 +292,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
 {
        struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
        int ret = ui_browser__list_head_refresh(browser);
-       int pcnt_width;
-
-       pcnt_width = 7 * ab->nr_events;
+       int pcnt_width = annotate_browser__pcnt_width(ab);
 
        if (annotate_browser__opts.jump_arrows)
                annotate_browser__draw_current_jump(browser);
@@ -390,7 +414,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
                                max_percent = bpos->samples[i].percent;
                }
 
-               if (max_percent < 0.01) {
+               if (max_percent < 0.01 && pos->ipc == 0) {
                        RB_CLEAR_NODE(&bpos->rb_node);
                        continue;
                }
@@ -869,6 +893,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
        return map_symbol__tui_annotate(&he->ms, evsel, hbt);
 }
 
+
+static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end)
+{
+       unsigned n_insn = 0;
+       u64 offset;
+
+       for (offset = start; offset <= end; offset++) {
+               if (browser->offsets[offset])
+                       n_insn++;
+       }
+       return n_insn;
+}
+
+static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end,
+                          struct cyc_hist *ch)
+{
+       unsigned n_insn;
+       u64 offset;
+
+       n_insn = count_insn(browser, start, end);
+       if (n_insn && ch->num && ch->cycles) {
+               float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
+
+               /* Hide data when there are too many overlaps. */
+               if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2)
+                       return;
+
+               for (offset = start; offset <= end; offset++) {
+                       struct disasm_line *dl = browser->offsets[offset];
+
+                       if (dl)
+                               dl->ipc = ipc;
+               }
+       }
+}
+
+/*
+ * This should probably be in util/annotate.c to share with the tty
+ * annotate, but right now we need the per byte offsets arrays,
+ * which are only here.
+ */
+static void annotate__compute_ipc(struct annotate_browser *browser, size_t size,
+                          struct symbol *sym)
+{
+       u64 offset;
+       struct annotation *notes = symbol__annotation(sym);
+
+       if (!notes->src || !notes->src->cycles_hist)
+               return;
+
+       pthread_mutex_lock(&notes->lock);
+       for (offset = 0; offset < size; ++offset) {
+               struct cyc_hist *ch;
+
+               ch = &notes->src->cycles_hist[offset];
+               if (ch && ch->cycles) {
+                       struct disasm_line *dl;
+
+                       if (ch->have_start)
+                               count_and_fill(browser, ch->start, offset, ch);
+                       dl = browser->offsets[offset];
+                       if (dl && ch->num_aggr)
+                               dl->cycles = ch->cycles_aggr / ch->num_aggr;
+                       browser->have_cycles = true;
+               }
+       }
+       pthread_mutex_unlock(&notes->lock);
+}
+
 static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
                                                size_t size)
 {
@@ -991,6 +1084,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
        }
 
        annotate_browser__mark_jump_targets(&browser, size);
+       annotate__compute_ipc(&browser, size, sym);
 
        browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
        browser.max_addr_width = hex_width(sym->end);
index 03b7bc70eb66032d4502ec8bfda2e15a9d44cd57..e0b6146480442ca6046cc498d53af0e8f1987d7c 100644 (file)
@@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym)
        return 0;
 }
 
+/* The cycles histogram is lazily allocated. */
+static int symbol__alloc_hist_cycles(struct symbol *sym)
+{
+       struct annotation *notes = symbol__annotation(sym);
+       const size_t size = symbol__size(sym);
+
+       notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
+       if (notes->src->cycles_hist == NULL)
+               return -1;
+       return 0;
+}
+
 void symbol__annotate_zero_histograms(struct symbol *sym)
 {
        struct annotation *notes = symbol__annotation(sym);
 
        pthread_mutex_lock(&notes->lock);
-       if (notes->src != NULL)
+       if (notes->src != NULL) {
                memset(notes->src->histograms, 0,
                       notes->src->nr_histograms * notes->src->sizeof_sym_hist);
+               if (notes->src->cycles_hist)
+                       memset(notes->src->cycles_hist, 0,
+                               symbol__size(sym) * sizeof(struct cyc_hist));
+       }
        pthread_mutex_unlock(&notes->lock);
 }
 
+static int __symbol__account_cycles(struct annotation *notes,
+                                   u64 start,
+                                   unsigned offset, unsigned cycles,
+                                   unsigned have_start)
+{
+       struct cyc_hist *ch;
+
+       ch = notes->src->cycles_hist;
+       /*
+        * For now we can only account one basic block per
+        * final jump. But multiple could be overlapping.
+        * Always account the longest one. So when
+        * a shorter one has been already seen throw it away.
+        *
+        * We separately always account the full cycles.
+        */
+       ch[offset].num_aggr++;
+       ch[offset].cycles_aggr += cycles;
+
+       if (!have_start && ch[offset].have_start)
+               return 0;
+       if (ch[offset].num) {
+               if (have_start && (!ch[offset].have_start ||
+                                  ch[offset].start > start)) {
+                       ch[offset].have_start = 0;
+                       ch[offset].cycles = 0;
+                       ch[offset].num = 0;
+                       if (ch[offset].reset < 0xffff)
+                               ch[offset].reset++;
+               } else if (have_start &&
+                          ch[offset].start < start)
+                       return 0;
+       }
+       ch[offset].have_start = have_start;
+       ch[offset].start = start;
+       ch[offset].cycles += cycles;
+       ch[offset].num++;
+       return 0;
+}
+
 static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
                                      struct annotation *notes, int evidx, u64 addr)
 {
@@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
        return 0;
 }
 
-static struct annotation *symbol__get_annotation(struct symbol *sym)
+static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles)
 {
        struct annotation *notes = symbol__annotation(sym);
 
@@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym)
                if (symbol__alloc_hist(sym) < 0)
                        return NULL;
        }
+       if (!notes->src->cycles_hist && cycles) {
+               if (symbol__alloc_hist_cycles(sym) < 0)
+                       return NULL;
+       }
        return notes;
 }
 
@@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
 
        if (sym == NULL)
                return 0;
-       notes = symbol__get_annotation(sym);
+       notes = symbol__get_annotation(sym, false);
        if (notes == NULL)
                return -ENOMEM;
        return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
 }
 
+static int symbol__account_cycles(u64 addr, u64 start,
+                                 struct symbol *sym, unsigned cycles)
+{
+       struct annotation *notes;
+       unsigned offset;
+
+       if (sym == NULL)
+               return 0;
+       notes = symbol__get_annotation(sym, true);
+       if (notes == NULL)
+               return -ENOMEM;
+       if (addr < sym->start || addr >= sym->end)
+               return -ERANGE;
+
+       if (start) {
+               if (start < sym->start || start >= sym->end)
+                       return -ERANGE;
+               if (start >= addr)
+                       start = 0;
+       }
+       offset = addr - sym->start;
+       return __symbol__account_cycles(notes,
+                                       start ? start - sym->start : 0,
+                                       offset, cycles,
+                                       !!start);
+}
+
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+                                   struct addr_map_symbol *start,
+                                   unsigned cycles)
+{
+       unsigned long saddr = 0;
+       int err;
+
+       if (!cycles)
+               return 0;
+
+       /*
+        * Only set start when IPC can be computed. We can only
+        * compute it when the basic block is completely in a single
+        * function.
+        * Special case the case when the jump is elsewhere, but
+        * it starts on the function start.
+        */
+       if (start &&
+               (start->sym == ams->sym ||
+                (ams->sym &&
+                  start->addr == ams->sym->start + ams->map->start)))
+               saddr = start->al_addr;
+       if (saddr == 0)
+               pr_debug2("BB with bad start: addr %lx start %lx sym %lx saddr %lx\n",
+                       ams->addr,
+                       start ? start->addr : 0,
+                       ams->sym ? ams->sym->start + ams->map->start : 0,
+                       saddr);
+       err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles);
+       if (err)
+               pr_debug2("account_cycles failed %d\n", err);
+       return err;
+}
+
 int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx)
 {
        return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr);
index 7e78e6c270783475acb6dc897109254d6d266b35..e9996092a093d7ef6af5aa72ac78fdd5cab61d81 100644 (file)
@@ -59,6 +59,8 @@ struct disasm_line {
        char                *name;
        struct ins          *ins;
        int                 line_nr;
+       float               ipc;
+       u64                 cycles;
        struct ins_operands ops;
 };
 
@@ -79,6 +81,17 @@ struct sym_hist {
        u64             addr[0];
 };
 
+struct cyc_hist {
+       u64     start;
+       u64     cycles;
+       u64     cycles_aggr;
+       u32     num;
+       u32     num_aggr;
+       u8      have_start;
+       /* 1 byte padding */
+       u16     reset;
+};
+
 struct source_line_samples {
        double          percent;
        double          percent_sum;
@@ -97,6 +110,7 @@ struct source_line {
  * @histogram: Array of addr hit histograms per event being monitored
  * @lines: If 'print_lines' is specified, per source code line percentages
  * @source: source parsed from a disassembler like objdump -dS
+ * @cyc_hist: Average cycles per basic block
  *
  * lines is allocated, percentages calculated and all sorted by percentage
  * when the annotation is about to be presented, so the percentages are for
@@ -109,6 +123,7 @@ struct annotated_source {
        struct source_line *lines;
        int                nr_histograms;
        int                sizeof_sym_hist;
+       struct cyc_hist    *cycles_hist;
        struct sym_hist    histograms[0];
 };
 
@@ -130,6 +145,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
 
 int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx);
 
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+                                   struct addr_map_symbol *start,
+                                   unsigned cycles);
+
 int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
 
 int symbol__alloc_hist(struct symbol *sym);
index 83d9dd96fe08ea8c613d1bda4db9b00bf4d04cd9..a25b3609cef8154ec6c6c5b4c7fc44ae3b0d56b8 100644 (file)
@@ -942,6 +942,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
        struct itrace_synth_opts *synth_opts = opt->value;
        const char *p;
        char *endptr;
+       bool period_type_set = false;
 
        synth_opts->set = true;
 
@@ -970,10 +971,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
                                case 'i':
                                        synth_opts->period_type =
                                                PERF_ITRACE_PERIOD_INSTRUCTIONS;
+                                       period_type_set = true;
                                        break;
                                case 't':
                                        synth_opts->period_type =
                                                PERF_ITRACE_PERIOD_TICKS;
+                                       period_type_set = true;
                                        break;
                                case 'm':
                                        synth_opts->period *= 1000;
@@ -986,6 +989,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
                                                goto out_err;
                                        synth_opts->period_type =
                                                PERF_ITRACE_PERIOD_NANOSECS;
+                                       period_type_set = true;
                                        break;
                                case '\0':
                                        goto out;
@@ -1039,7 +1043,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
        }
 out:
        if (synth_opts->instructions) {
-               if (!synth_opts->period_type)
+               if (!period_type_set)
                        synth_opts->period_type =
                                        PERF_ITRACE_DEFAULT_PERIOD_TYPE;
                if (!synth_opts->period)
index 2da5581ec74d35945dcdf3cb45369284a876976c..86d9c73025983d0132a4dddc8607161d713bfbf2 100644 (file)
@@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args)
        return ret;
 }
 
+int veprintf(int level, int var, const char *fmt, va_list args)
+{
+       return _eprintf(level, var, fmt, args);
+}
+
 int eprintf(int level, int var, const char *fmt, ...)
 {
        va_list args;
index caac2fdc6105f7591a225f12e65b6e1ece48d342..8b9a088c32ab4e330ece47ae91397bdd87bdd0ee 100644 (file)
@@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...);
 
 int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
 int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5)));
+int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
 
index 4bb2ae894c78c04cf99946f7add1e98b53fb1d11..f729df5e25e634607f6b4c2f2cb4cc4e1ec933a3 100644 (file)
@@ -134,7 +134,8 @@ struct branch_flags {
        u64 predicted:1;
        u64 in_tx:1;
        u64 abort:1;
-       u64 reserved:60;
+       u64 cycles:16;
+       u64 reserved:44;
 };
 
 struct branch_entry {
index 3b9f411a6b461272903e9b8559ada718d2e57867..373f65b0254509f7b5536b3b87e93009656769b8 100644 (file)
@@ -1273,6 +1273,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
        return __perf_evlist__combined_sample_type(evlist);
 }
 
+u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel;
+       u64 branch_type = 0;
+
+       evlist__for_each(evlist, evsel)
+               branch_type |= evsel->attr.branch_sample_type;
+       return branch_type;
+}
+
 bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
 {
        struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
index a8930b68456b07e1ee05317deb375e1788b29e53..397757063da1924fb09778a7cbfcb567fcd58620 100644 (file)
@@ -165,6 +165,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist);
 u64 perf_evlist__read_format(struct perf_evlist *evlist);
 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist);
 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist);
+u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist);
 bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
 
index 6f28d53d4e46093293e71363d9aa5e7c1e0b23f5..a6e9ddd37913088c7a427d8c97a55c59e5de3813 100644 (file)
@@ -618,7 +618,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
         * and not events sampled. Thus we use a pseudo period of 1.
         */
        he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
-                               1, 1, 0, true);
+                               1, bi->flags.cycles ? bi->flags.cycles : 1,
+                               0, true);
        if (he == NULL)
                return -ENOMEM;
 
@@ -1414,6 +1415,39 @@ int hists__link(struct hists *leader, struct hists *other)
        return 0;
 }
 
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+                         struct perf_sample *sample, bool nonany_branch_mode)
+{
+       struct branch_info *bi;
+
+       /* If we have branch cycles always annotate them. */
+       if (bs && bs->nr && bs->entries[0].flags.cycles) {
+               int i;
+
+               bi = sample__resolve_bstack(sample, al);
+               if (bi) {
+                       struct addr_map_symbol *prev = NULL;
+
+                       /*
+                        * Ignore errors, still want to process the
+                        * other entries.
+                        *
+                        * For non standard branch modes always
+                        * force no IPC (prev == NULL)
+                        *
+                        * Note that perf stores branches reversed from
+                        * program order!
+                        */
+                       for (i = bs->nr - 1; i >= 0; i--) {
+                               addr_map_symbol__account_cycles(&bi[i].from,
+                                       nonany_branch_mode ? NULL : prev,
+                                       bi[i].flags.cycles);
+                               prev = &bi[i].to;
+                       }
+                       free(bi);
+               }
+       }
+}
 
 size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp)
 {
index 5ed8d9c229814d9c6942ce3528898bbd9de1cb79..e2f712f85d2e401b5f906b04b564cb86d9917e9b 100644 (file)
@@ -47,6 +47,7 @@ enum hist_column {
        HISTC_MEM_SNOOP,
        HISTC_MEM_DCACHELINE,
        HISTC_TRANSACTION,
+       HISTC_CYCLES,
        HISTC_NR_COLS, /* Last entry */
 };
 
@@ -349,6 +350,9 @@ static inline int script_browse(const char *script_opt __maybe_unused)
 
 unsigned int hists__sort_list_width(struct hists *hists);
 
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+                         struct perf_sample *sample, bool nonany_branch_mode);
+
 struct option;
 int parse_filter_percentage(const struct option *opt __maybe_unused,
                            const char *arg, int unset __maybe_unused);
index a6cb9afc20e28539f5d323bd34fc641fbe1bf681..828936dc3f1ee9bd77d9142634c7fc1676ea66b2 100644 (file)
@@ -1168,7 +1168,7 @@ static void parse_events_print_error(struct parse_events_error *err,
                 * Maximum error index indent, we will cut
                 * the event string if it's bigger.
                 */
-               int max_err_idx = 10;
+               int max_err_idx = 13;
 
                /*
                 * Let's be specific with the message when
index b615cdf211d6e241109ad66b988f93ab3e65c5af..d4b0e6454bc627e8491d5bcf04e732c36226a3dd 100644 (file)
@@ -542,7 +542,7 @@ struct perf_pmu *perf_pmu__find(const char *name)
 }
 
 static struct perf_pmu_format *
-pmu_find_format(struct list_head *formats, char *name)
+pmu_find_format(struct list_head *formats, const char *name)
 {
        struct perf_pmu_format *format;
 
@@ -553,6 +553,21 @@ pmu_find_format(struct list_head *formats, char *name)
        return NULL;
 }
 
+__u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
+{
+       struct perf_pmu_format *format = pmu_find_format(formats, name);
+       __u64 bits = 0;
+       int fbit;
+
+       if (!format)
+               return 0;
+
+       for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
+               bits |= 1ULL << fbit;
+
+       return bits;
+}
+
 /*
  * Sets value based on the format definition (format parameter)
  * and unformated value (value parameter).
@@ -574,6 +589,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
        }
 }
 
+static __u64 pmu_format_max_value(const unsigned long *format)
+{
+       int w;
+
+       w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
+       if (!w)
+               return 0;
+       if (w < 64)
+               return (1ULL << w) - 1;
+       return -1;
+}
+
 /*
  * Term is a string term, and might be a param-term. Try to look up it's value
  * in the remaining terms.
@@ -647,7 +674,7 @@ static int pmu_config_term(struct list_head *formats,
 {
        struct perf_pmu_format *format;
        __u64 *vp;
-       __u64 val;
+       __u64 val, max_val;
 
        /*
         * If this is a parameter we've already used for parameterized-eval,
@@ -713,6 +740,22 @@ static int pmu_config_term(struct list_head *formats,
        } else
                return -EINVAL;
 
+       max_val = pmu_format_max_value(format->bits);
+       if (val > max_val) {
+               if (err) {
+                       err->idx = term->err_val;
+                       if (asprintf(&err->str,
+                                    "value too big for format, maximum is %llu",
+                                    (unsigned long long)max_val) < 0)
+                               err->str = strdup("value too big for format");
+                       return -EINVAL;
+               }
+               /*
+                * Assume we don't care if !err, in which case the value will be
+                * silently truncated.
+                */
+       }
+
        pmu_format_value(format->bits, val, vp, zero);
        return 0;
 }
index 7b9c8cf8ae3e590578abb0f71a41739cb8fc968a..5d7e84466bee5a0124fe8c81db64e83a9f483e99 100644 (file)
@@ -54,6 +54,7 @@ int perf_pmu__config_terms(struct list_head *formats,
                           struct perf_event_attr *attr,
                           struct list_head *head_terms,
                           bool zero, struct parse_events_error *error);
+__u64 perf_pmu__format_bits(struct list_head *formats, const char *name);
 int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
                          struct perf_pmu_info *info);
 struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
index 20f555d1ae1c5142dbe7b3382d319397ac1d4284..83ee95e9743b55969001dd792bcf07b63dc885d0 100644 (file)
@@ -106,6 +106,8 @@ struct variable_list {
        struct strlist                  *vars;  /* Available variables */
 };
 
+struct map;
+
 /* Command string to events */
 extern int parse_perf_probe_command(const char *cmd,
                                    struct perf_probe_event *pev);
index f51eb54aeeb3a7fa8cd172a1d670ba82ac3e2394..18722e774a69c5c8fdbbc1c55454b0b85a0e1b16 100644 (file)
@@ -784,10 +784,18 @@ static void branch_stack__printf(struct perf_sample *sample)
 
        printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
 
-       for (i = 0; i < sample->branch_stack->nr; i++)
-               printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n",
-                       i, sample->branch_stack->entries[i].from,
-                       sample->branch_stack->entries[i].to);
+       for (i = 0; i < sample->branch_stack->nr; i++) {
+               struct branch_entry *e = &sample->branch_stack->entries[i];
+
+               printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
+                       i, e->from, e->to,
+                       e->flags.cycles,
+                       e->flags.mispred ? "M" : " ",
+                       e->flags.predicted ? "P" : " ",
+                       e->flags.abort ? "A" : " ",
+                       e->flags.in_tx ? "T" : " ",
+                       (unsigned)e->flags.reserved);
+       }
 }
 
 static void regs_dump__printf(u64 mask, u64 *regs)
index 4c65a143a34c96747ab7c6d39284f264f0f8d41e..5177088a71d3848262844b31bdf65247e2a13660 100644 (file)
@@ -9,7 +9,7 @@ regex_t         parent_regex;
 const char     default_parent_pattern[] = "^sys_|^do_page_fault";
 const char     *parent_pattern = default_parent_pattern;
 const char     default_sort_order[] = "comm,dso,symbol";
-const char     default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to";
+const char     default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
 const char     default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
 const char     default_top_sort_order[] = "dso,symbol";
 const char     default_diff_sort_order[] = "dso,symbol";
@@ -526,6 +526,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
        return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
 }
 
+static int64_t
+sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return left->branch_info->flags.cycles -
+               right->branch_info->flags.cycles;
+}
+
+static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
+                                   size_t size, unsigned int width)
+{
+       if (he->branch_info->flags.cycles == 0)
+               return repsep_snprintf(bf, size, "%-*s", width, "-");
+       return repsep_snprintf(bf, size, "%-*hd", width,
+                              he->branch_info->flags.cycles);
+}
+
+struct sort_entry sort_cycles = {
+       .se_header      = "Basic Block Cycles",
+       .se_cmp         = sort__cycles_cmp,
+       .se_snprintf    = hist_entry__cycles_snprintf,
+       .se_width_idx   = HISTC_CYCLES,
+};
+
 /* --sort daddr_sym */
 static int64_t
 sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
@@ -1190,6 +1213,7 @@ static struct sort_dimension bstack_sort_dimensions[] = {
        DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
        DIM(SORT_IN_TX, "in_tx", sort_in_tx),
        DIM(SORT_ABORT, "abort", sort_abort),
+       DIM(SORT_CYCLES, "cycles", sort_cycles),
 };
 
 #undef DIM
index e97cd476d336f2a9cad2a1eeb9daba34d08ed3a4..bc6c87a76d16544e856b621649e79a4f9e3e3b19 100644 (file)
@@ -185,6 +185,7 @@ enum sort_type {
        SORT_MISPREDICT,
        SORT_ABORT,
        SORT_IN_TX,
+       SORT_CYCLES,
 
        /* memory mode specific sort keys */
        __SORT_MEMORY_MODE,
index f2a0d1521e266a32a0df10b60f22d88107fae276..c5c709cdc3ce7da55f450922bb5109da754d5db0 100644 (file)
@@ -238,3 +238,142 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist)
                perf_evsel__reset_counts(evsel);
        }
 }
+
+static void zero_per_pkg(struct perf_evsel *counter)
+{
+       if (counter->per_pkg_mask)
+               memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
+}
+
+static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
+{
+       unsigned long *mask = counter->per_pkg_mask;
+       struct cpu_map *cpus = perf_evsel__cpus(counter);
+       int s;
+
+       *skip = false;
+
+       if (!counter->per_pkg)
+               return 0;
+
+       if (cpu_map__empty(cpus))
+               return 0;
+
+       if (!mask) {
+               mask = zalloc(MAX_NR_CPUS);
+               if (!mask)
+                       return -ENOMEM;
+
+               counter->per_pkg_mask = mask;
+       }
+
+       s = cpu_map__get_socket(cpus, cpu);
+       if (s < 0)
+               return -1;
+
+       *skip = test_and_set_bit(s, mask) == 1;
+       return 0;
+}
+
+static int
+process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
+                      int cpu, int thread,
+                      struct perf_counts_values *count)
+{
+       struct perf_counts_values *aggr = &evsel->counts->aggr;
+       static struct perf_counts_values zero;
+       bool skip = false;
+
+       if (check_per_pkg(evsel, cpu, &skip)) {
+               pr_err("failed to read per-pkg counter\n");
+               return -1;
+       }
+
+       if (skip)
+               count = &zero;
+
+       switch (config->aggr_mode) {
+       case AGGR_THREAD:
+       case AGGR_CORE:
+       case AGGR_SOCKET:
+       case AGGR_NONE:
+               if (!evsel->snapshot)
+                       perf_evsel__compute_deltas(evsel, cpu, thread, count);
+               perf_counts_values__scale(count, config->scale, NULL);
+               if (config->aggr_mode == AGGR_NONE)
+                       perf_stat__update_shadow_stats(evsel, count->values, cpu);
+               break;
+       case AGGR_GLOBAL:
+               aggr->val += count->val;
+               if (config->scale) {
+                       aggr->ena += count->ena;
+                       aggr->run += count->run;
+               }
+       default:
+               break;
+       }
+
+       return 0;
+}
+
+static int process_counter_maps(struct perf_stat_config *config,
+                               struct perf_evsel *counter)
+{
+       int nthreads = thread_map__nr(counter->threads);
+       int ncpus = perf_evsel__nr_cpus(counter);
+       int cpu, thread;
+
+       if (counter->system_wide)
+               nthreads = 1;
+
+       for (thread = 0; thread < nthreads; thread++) {
+               for (cpu = 0; cpu < ncpus; cpu++) {
+                       if (process_counter_values(config, counter, cpu, thread,
+                                                  perf_counts(counter->counts, cpu, thread)))
+                               return -1;
+               }
+       }
+
+       return 0;
+}
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+                             struct perf_evsel *counter)
+{
+       struct perf_counts_values *aggr = &counter->counts->aggr;
+       struct perf_stat *ps = counter->priv;
+       u64 *count = counter->counts->aggr.values;
+       int i, ret;
+
+       aggr->val = aggr->ena = aggr->run = 0;
+       init_stats(ps->res_stats);
+
+       if (counter->per_pkg)
+               zero_per_pkg(counter);
+
+       ret = process_counter_maps(config, counter);
+       if (ret)
+               return ret;
+
+       if (config->aggr_mode != AGGR_GLOBAL)
+               return 0;
+
+       if (!counter->snapshot)
+               perf_evsel__compute_deltas(counter, -1, -1, aggr);
+       perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
+
+       for (i = 0; i < 3; i++)
+               update_stats(&ps->res_stats[i], count[i]);
+
+       if (verbose) {
+               fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+                       perf_evsel__name(counter), count[0], count[1], count[2]);
+       }
+
+       /*
+        * Save the full runtime - to allow normalization during printout:
+        */
+       perf_stat__update_shadow_stats(counter, count, 0);
+
+       return 0;
+}
index 1cfbe0a980ac77a1a1af31db8a01141a449a6a4f..0b897b083682bb0eee9e400a1a3329ac3ee2374f 100644 (file)
@@ -50,6 +50,13 @@ struct perf_counts {
        struct xyarray            *values;
 };
 
+struct perf_stat_config {
+       enum aggr_mode  aggr_mode;
+       bool            scale;
+       FILE            *output;
+       unsigned int    interval;
+};
+
 static inline struct perf_counts_values*
 perf_counts(struct perf_counts *counts, int cpu, int thread)
 {
@@ -109,4 +116,7 @@ int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
 int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
 void perf_evlist__free_stats(struct perf_evlist *evlist);
 void perf_evlist__reset_stats(struct perf_evlist *evlist);
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+                             struct perf_evsel *counter);
 #endif