perf symbols: Handle /proc/sys/kernel/kptr_restrict
authorArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 26 May 2011 12:53:51 +0000 (09:53 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 26 May 2011 14:15:25 +0000 (11:15 -0300)
Perf uses /proc/modules to figure out where kernel modules are loaded.

With the advent of kptr_restrict, non root users get zeroes for all module
start addresses.

So check if kptr_restrict is non zero and don't generate the syntethic
PERF_RECORD_MMAP events for them.

Warn the user about it in perf record and in perf report.

In perf report the reference relocation symbol being zero means that
kptr_restrict was set, thus /proc/kallsyms has only zeroed addresses, so don't
use it to fixup symbol addresses when using a valid kallsyms (in the buildid
cache) or vmlinux (in the vmlinux path) build-id located automatically or
specified by the user.

Provide an explanation about it in 'perf report' if kernel samples were taken,
checking if a suitable vmlinux or kallsyms was found/specified.

Restricted /proc/kallsyms don't go to the buildid cache anymore.

Example:

 [acme@emilia ~]$ perf record -F 100000 sleep 1

 WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted, check
 /proc/sys/kernel/kptr_restrict.

 Samples in kernel functions may not be resolved if a suitable vmlinux file is
 not found in the buildid cache or in the vmlinux path.

 Samples in kernel modules won't be resolved at all.

 If some relocation was applied (e.g. kexec) symbols may be misresolved even
 with a suitable vmlinux or kallsyms file.

 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.005 MB perf.data (~231 samples) ]
 [acme@emilia ~]$

 [acme@emilia ~]$ perf report --stdio
 Kernel address maps (/proc/{kallsyms,modules}) were restricted,
 check /proc/sys/kernel/kptr_restrict before running 'perf record'.

 If some relocation was applied (e.g. kexec) symbols may be misresolved.

 Samples in kernel modules can't be resolved as well.

 # Events: 13  cycles
 #
 # Overhead  Command      Shared Object                 Symbol
 # ........  .......  .................  .....................
 #
    20.24%    sleep  [kernel.kallsyms]  [k] page_fault
    20.04%    sleep  [kernel.kallsyms]  [k] filemap_fault
    19.78%    sleep  [kernel.kallsyms]  [k] __lru_cache_add
    19.69%    sleep  ld-2.12.so         [.] memcpy
    14.71%    sleep  [kernel.kallsyms]  [k] dput
     4.70%    sleep  [kernel.kallsyms]  [k] flush_signal_handlers
     0.73%    sleep  [kernel.kallsyms]  [k] perf_event_comm
     0.11%    sleep  [kernel.kallsyms]  [k] native_write_msr_safe

 #
 # (For a higher level overview, try: perf report --sort comm,dso)
 #
 [acme@emilia ~]$

This is because it found a suitable vmlinux (build-id checked) in
/lib/modules/2.6.39-rc7+/build/vmlinux (use -v in perf report to see the long
file name).

If we remove that file from the vmlinux path:

 [root@emilia ~]# mv /lib/modules/2.6.39-rc7+/build/vmlinux \
     /lib/modules/2.6.39-rc7+/build/vmlinux.OFF
 [acme@emilia ~]$ perf report --stdio
 [kernel.kallsyms] with build id 57298cdbe0131f6871667ec0eaab4804dcf6f562
 not found, continuing without symbols

 Kernel address maps (/proc/{kallsyms,modules}) were restricted, check
 /proc/sys/kernel/kptr_restrict before running 'perf record'.

 As no suitable kallsyms nor vmlinux was found, kernel samples can't be
 resolved.

 Samples in kernel modules can't be resolved as well.

 # Events: 13  cycles
 #
 # Overhead  Command      Shared Object  Symbol
 # ........  .......  .................  ......
 #
    80.31%    sleep  [kernel.kallsyms]  [k] 0xffffffff8103425a
    19.69%    sleep  ld-2.12.so         [.] memcpy

 #
 # (For a higher level overview, try: perf report --sort comm,dso)
 #
 [acme@emilia ~]$

Reported-by: Stephane Eranian <eranian@google.com>
Suggested-by: David Miller <davem@davemloft.net>
Cc: Dave Jones <davej@redhat.com>
Cc: David Miller <davem@davemloft.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Kees Cook <kees.cook@canonical.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Link: http://lkml.kernel.org/n/tip-mt512joaxxbhhp1odop04yit@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/util/event.c
tools/perf/util/header.c
tools/perf/util/symbol.c
tools/perf/util/symbol.h

index 0974f957b8fa2634409ff46e8de915b1380e87a8..2ca107f3efdf3c8434fff20349908f7cc3531e74 100644 (file)
@@ -823,6 +823,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
 
        symbol__init();
 
+       if (symbol_conf.kptr_restrict)
+               pr_warning("WARNING: Kernel address maps "
+                          "(/proc/{kallsyms,modules}) are restricted, "
+                          "check /proc/sys/kernel/kptr_restrict.\n\n"
+                          "Samples in kernel functions may not be resolved "
+                          "if a suitable vmlinux file is not found in the "
+                          "buildid cache or in the vmlinux path.\n\n"
+                          "Samples in kernel modules won't be resolved "
+                          "at all.\n\n"
+                          "If some relocation was applied (e.g. kexec) "
+                          "symbols may be misresolved even with a suitable "
+                          "vmlinux or kallsyms file.\n\n");
+
        if (no_buildid_cache || no_buildid)
                disable_buildid_cache();
 
index 498c6f70a74784ecd249ccd0c9cc495651f1047d..99156c35bc6284b5fc42cc2731979bd5a6b3ba9b 100644 (file)
@@ -116,6 +116,9 @@ static int process_sample_event(union perf_event *event,
        if (al.filtered || (hide_unresolved && al.sym == NULL))
                return 0;
 
+       if (al.map != NULL)
+               al.map->dso->hit = 1;
+
        if (perf_session__add_hist_entry(session, &al, sample, evsel)) {
                pr_debug("problem incrementing symbol period, skipping event\n");
                return -1;
@@ -249,6 +252,8 @@ static int __cmd_report(void)
        u64 nr_samples;
        struct perf_session *session;
        struct perf_evsel *pos;
+       struct map *kernel_map;
+       struct kmap *kernel_kmap;
        const char *help = "For a higher level overview, try: perf report --sort comm,dso";
 
        signal(SIGINT, sig_handler);
@@ -268,6 +273,27 @@ static int __cmd_report(void)
        if (ret)
                goto out_delete;
 
+       kernel_map = session->host_machine.vmlinux_maps[MAP__FUNCTION];
+       kernel_kmap = map__kmap(kernel_map);
+       if (kernel_map == NULL ||
+           (kernel_map->dso->hit &&
+            (kernel_kmap->ref_reloc_sym == NULL ||
+             kernel_kmap->ref_reloc_sym->addr == 0))) {
+               const struct dso *kdso = kernel_map->dso;
+
+               ui__warning("Kernel address maps "
+                           "(/proc/{kallsyms,modules}) were restricted, "
+                           "check /proc/sys/kernel/kptr_restrict before "
+                           "running 'perf record'.\n\n%s\n\n"
+                           "Samples in kernel modules can't be resolved "
+                           "as well.\n\n",
+                           RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION]) ?
+                           "As no suitable kallsyms nor vmlinux was found, "
+                           "kernel samples can't be resolved." :
+                           "If some relocation was applied (e.g. kexec) "
+                           "symbols may be misresolved.");
+       }
+
        if (dump_trace) {
                perf_session__fprintf_nr_events(session, stdout);
                goto out_delete;
index 6635fcd11ca558e1831d331a500474c8908a58d1..0fe9adf76379ecc02895cfbb46f35dcc8666dcbd 100644 (file)
@@ -553,9 +553,18 @@ static int perf_event__process_kernel_mmap(union perf_event *event,
                        goto out_problem;
 
                perf_event__set_kernel_mmap_len(event, machine->vmlinux_maps);
-               perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
-                                                        symbol_name,
-                                                        event->mmap.pgoff);
+
+               /*
+                * Avoid using a zero address (kptr_restrict) for the ref reloc
+                * symbol. Effectively having zero here means that at record
+                * time /proc/sys/kernel/kptr_restrict was non zero.
+                */
+               if (event->mmap.pgoff != 0) {
+                       perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
+                                                                symbol_name,
+                                                                event->mmap.pgoff);
+               }
+
                if (machine__is_default_guest(machine)) {
                        /*
                         * preload dso of guest kernel and modules
index 0717bebc76494d24cfc68bec984aba404058438c..afb0849fe530e5f4a06937457bb98644acf681df 100644 (file)
@@ -193,9 +193,13 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
             *linkname = malloc(size), *targetname;
        int len, err = -1;
 
-       if (is_kallsyms)
+       if (is_kallsyms) {
+               if (symbol_conf.kptr_restrict) {
+                       pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
+                       return 0;
+               }
                realname = (char *)name;
-       else
+       else
                realname = realpath(name, NULL);
 
        if (realname == NULL || filename == NULL || linkname == NULL)
index 516876dfbe5280af9bd52179c07d99b16ed43233..eec196329fd92c2e0567c8af5535ae1a99fcc4b1 100644 (file)
@@ -676,9 +676,30 @@ discard_symbol:            rb_erase(&pos->rb_node, root);
        return count + moved;
 }
 
+static bool symbol__restricted_filename(const char *filename,
+                                       const char *restricted_filename)
+{
+       bool restricted = false;
+
+       if (symbol_conf.kptr_restrict) {
+               char *r = realpath(filename, NULL);
+
+               if (r != NULL) {
+                       restricted = strcmp(r, restricted_filename) == 0;
+                       free(r);
+                       return restricted;
+               }
+       }
+
+       return restricted;
+}
+
 int dso__load_kallsyms(struct dso *dso, const char *filename,
                       struct map *map, symbol_filter_t filter)
 {
+       if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+               return -1;
+
        if (dso__load_all_kallsyms(dso, filename, map) < 0)
                return -1;
 
@@ -1790,6 +1811,9 @@ static int machine__create_modules(struct machine *machine)
                modules = path;
        }
 
+       if (symbol__restricted_filename(path, "/proc/modules"))
+               return -1;
+
        file = fopen(modules, "r");
        if (file == NULL)
                return -1;
@@ -2239,6 +2263,9 @@ static u64 machine__get_kernel_start_addr(struct machine *machine)
                }
        }
 
+       if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+               return 0;
+
        if (kallsyms__parse(filename, &args, symbol__in_kernel) <= 0)
                return 0;
 
@@ -2410,6 +2437,25 @@ static int setup_list(struct strlist **list, const char *list_str,
        return 0;
 }
 
+static bool symbol__read_kptr_restrict(void)
+{
+       bool value = false;
+
+       if (geteuid() != 0) {
+               FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r");
+               if (fp != NULL) {
+                       char line[8];
+
+                       if (fgets(line, sizeof(line), fp) != NULL)
+                               value = atoi(line) != 0;
+
+                       fclose(fp);
+               }
+       }
+
+       return value;
+}
+
 int symbol__init(void)
 {
        const char *symfs;
@@ -2456,6 +2502,8 @@ int symbol__init(void)
        if (symfs != symbol_conf.symfs)
                free((void *)symfs);
 
+       symbol_conf.kptr_restrict = symbol__read_kptr_restrict();
+
        symbol_conf.initialized = true;
        return 0;
 
index 242de0101a868f99d38c0fbf7348590556f8187f..325ee36a9d29d63d82973d262816f91a064813f2 100644 (file)
@@ -75,7 +75,8 @@ struct symbol_conf {
                        use_callchain,
                        exclude_other,
                        show_cpu_utilization,
-                       initialized;
+                       initialized,
+                       kptr_restrict;
        const char      *vmlinux_name,
                        *kallsyms_name,
                        *source_prefix,