From: Don Zickus Date: Fri, 14 Mar 2014 14:43:44 +0000 (-0400) Subject: perf tools: Speed up thread map generation X-Git-Tag: firefly_0821_release~176^2~4235^2~1^2~1 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=363b785f3805a2632eb09a8b430842461c21a640;p=firefly-linux-kernel-4.4.55.git perf tools: Speed up thread map generation When trying to capture perf data on a system running spejbb2013, perf hung for about 15 minutes. This is because it took that long to gather about 10,000 thread maps and process them. I don't think a user wants to wait that long. Instead, recognize that thread maps are roughly equivalent to pid maps and just quickly copy those instead. To do this, I synthesize 'fork' events, this eventually calls thread__fork() and copies the maps over. The overhead goes from 15 minutes down to about a few seconds. -- V2: based on Jiri's comments, moved malloc up a level and made sure the memory was freed Signed-off-by: Don Zickus Acked-by: Jiri Olsa Cc: Jiri Olsa Cc: Joe Mario Link: http://lkml.kernel.org/r/1394808224-113774-1-git-send-email-dzickus@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 55eebe936513..3e580be0f6fb 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -129,6 +129,28 @@ out: return tgid; } +static int perf_event__synthesize_fork(struct perf_tool *tool, + union perf_event *event, pid_t pid, + pid_t tgid, perf_event__handler_t process, + struct machine *machine) +{ + memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size); + + /* this is really a clone event but we use fork to synthesize it */ + event->fork.ppid = tgid; + event->fork.ptid = tgid; + event->fork.pid = tgid; + event->fork.tid = pid; + event->fork.header.type = PERF_RECORD_FORK; + + event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); + + if (process(tool, event, &synth_sample, machine) != 0) + return -1; + + return 0; +} + int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, @@ -278,6 +300,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *mmap_event, + union perf_event *fork_event, pid_t pid, int full, perf_event__handler_t process, struct perf_tool *tool, @@ -326,9 +349,15 @@ static int __event__synthesize_thread(union perf_event *comm_event, if (tgid == -1) return -1; - /* process the thread's maps too */ - rc = perf_event__synthesize_mmap_events(tool, mmap_event, _pid, tgid, - process, machine, mmap_data); + if (_pid == pid) { + /* process the parent's maps too */ + rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, + process, machine, mmap_data); + } else { + /* only fork the tid's map, to save time */ + rc = perf_event__synthesize_fork(tool, fork_event, _pid, tgid, + process, machine); + } if (rc) return rc; @@ -344,7 +373,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, struct machine *machine, bool mmap_data) { - union perf_event *comm_event, *mmap_event; + union perf_event *comm_event, *mmap_event, *fork_event; int err = -1, thread, j; comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); @@ -355,9 +384,14 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, if (mmap_event == NULL) goto out_free_comm; + fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); + if (fork_event == NULL) + goto out_free_mmap; + err = 0; for (thread = 0; thread < threads->nr; ++thread) { if (__event__synthesize_thread(comm_event, mmap_event, + fork_event, threads->map[thread], 0, process, tool, machine, mmap_data)) { @@ -383,6 +417,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, /* if not, generate events for it */ if (need_leader && __event__synthesize_thread(comm_event, mmap_event, + fork_event, comm_event->comm.pid, 0, process, tool, machine, mmap_data)) { @@ -391,6 +426,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, } } } + free(fork_event); +out_free_mmap: free(mmap_event); out_free_comm: free(comm_event); @@ -405,7 +442,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, DIR *proc; char proc_path[PATH_MAX]; struct dirent dirent, *next; - union perf_event *comm_event, *mmap_event; + union perf_event *comm_event, *mmap_event, *fork_event; int err = -1; comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); @@ -416,6 +453,10 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (mmap_event == NULL) goto out_free_comm; + fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); + if (fork_event == NULL) + goto out_free_mmap; + if (machine__is_default_guest(machine)) return 0; @@ -423,7 +464,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, proc = opendir(proc_path); if (proc == NULL) - goto out_free_mmap; + goto out_free_fork; while (!readdir_r(proc, &dirent, &next) && next) { char *end; @@ -435,12 +476,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool, * We may race with exiting thread, so don't stop just because * one thread couldn't be synthesized. */ - __event__synthesize_thread(comm_event, mmap_event, pid, 1, - process, tool, machine, mmap_data); + __event__synthesize_thread(comm_event, mmap_event, fork_event, pid, + 1, process, tool, machine, mmap_data); } err = 0; closedir(proc); +out_free_fork: + free(fork_event); out_free_mmap: free(mmap_event); out_free_comm: