perf tools: Add Intel PT support
[firefly-linux-kernel-4.4.55.git] / tools / perf / util / intel-pt.c
1 /*
2  * intel_pt.c: Intel Processor Trace support
3  * Copyright (c) 2013-2015, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15
16 #include <stdio.h>
17 #include <stdbool.h>
18 #include <errno.h>
19 #include <linux/kernel.h>
20 #include <linux/types.h>
21
22 #include "../perf.h"
23 #include "session.h"
24 #include "machine.h"
25 #include "tool.h"
26 #include "event.h"
27 #include "evlist.h"
28 #include "evsel.h"
29 #include "map.h"
30 #include "color.h"
31 #include "util.h"
32 #include "thread.h"
33 #include "thread-stack.h"
34 #include "symbol.h"
35 #include "callchain.h"
36 #include "dso.h"
37 #include "debug.h"
38 #include "auxtrace.h"
39 #include "tsc.h"
40 #include "intel-pt.h"
41
42 #include "intel-pt-decoder/intel-pt-log.h"
43 #include "intel-pt-decoder/intel-pt-decoder.h"
44 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
45 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
46
47 #define MAX_TIMESTAMP (~0ULL)
48
49 struct intel_pt {
50         struct auxtrace auxtrace;
51         struct auxtrace_queues queues;
52         struct auxtrace_heap heap;
53         u32 auxtrace_type;
54         struct perf_session *session;
55         struct machine *machine;
56         struct perf_evsel *switch_evsel;
57         struct thread *unknown_thread;
58         bool timeless_decoding;
59         bool sampling_mode;
60         bool snapshot_mode;
61         bool per_cpu_mmaps;
62         bool have_tsc;
63         bool data_queued;
64         bool est_tsc;
65         bool sync_switch;
66         int have_sched_switch;
67         u32 pmu_type;
68         u64 kernel_start;
69         u64 switch_ip;
70         u64 ptss_ip;
71
72         struct perf_tsc_conversion tc;
73         bool cap_user_time_zero;
74
75         struct itrace_synth_opts synth_opts;
76
77         bool sample_instructions;
78         u64 instructions_sample_type;
79         u64 instructions_sample_period;
80         u64 instructions_id;
81
82         bool sample_branches;
83         u32 branches_filter;
84         u64 branches_sample_type;
85         u64 branches_id;
86
87         bool sample_transactions;
88         u64 transactions_sample_type;
89         u64 transactions_id;
90
91         bool synth_needs_swap;
92
93         u64 tsc_bit;
94         u64 noretcomp_bit;
95         unsigned max_non_turbo_ratio;
96 };
97
98 enum switch_state {
99         INTEL_PT_SS_NOT_TRACING,
100         INTEL_PT_SS_UNKNOWN,
101         INTEL_PT_SS_TRACING,
102         INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
103         INTEL_PT_SS_EXPECTING_SWITCH_IP,
104 };
105
106 struct intel_pt_queue {
107         struct intel_pt *pt;
108         unsigned int queue_nr;
109         struct auxtrace_buffer *buffer;
110         void *decoder;
111         const struct intel_pt_state *state;
112         struct ip_callchain *chain;
113         union perf_event *event_buf;
114         bool on_heap;
115         bool stop;
116         bool step_through_buffers;
117         bool use_buffer_pid_tid;
118         pid_t pid, tid;
119         int cpu;
120         int switch_state;
121         pid_t next_tid;
122         struct thread *thread;
123         bool exclude_kernel;
124         bool have_sample;
125         u64 time;
126         u64 timestamp;
127         u32 flags;
128         u16 insn_len;
129 };
130
131 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
132                           unsigned char *buf, size_t len)
133 {
134         struct intel_pt_pkt packet;
135         size_t pos = 0;
136         int ret, pkt_len, i;
137         char desc[INTEL_PT_PKT_DESC_MAX];
138         const char *color = PERF_COLOR_BLUE;
139
140         color_fprintf(stdout, color,
141                       ". ... Intel Processor Trace data: size %zu bytes\n",
142                       len);
143
144         while (len) {
145                 ret = intel_pt_get_packet(buf, len, &packet);
146                 if (ret > 0)
147                         pkt_len = ret;
148                 else
149                         pkt_len = 1;
150                 printf(".");
151                 color_fprintf(stdout, color, "  %08x: ", pos);
152                 for (i = 0; i < pkt_len; i++)
153                         color_fprintf(stdout, color, " %02x", buf[i]);
154                 for (; i < 16; i++)
155                         color_fprintf(stdout, color, "   ");
156                 if (ret > 0) {
157                         ret = intel_pt_pkt_desc(&packet, desc,
158                                                 INTEL_PT_PKT_DESC_MAX);
159                         if (ret > 0)
160                                 color_fprintf(stdout, color, " %s\n", desc);
161                 } else {
162                         color_fprintf(stdout, color, " Bad packet!\n");
163                 }
164                 pos += pkt_len;
165                 buf += pkt_len;
166                 len -= pkt_len;
167         }
168 }
169
170 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
171                                 size_t len)
172 {
173         printf(".\n");
174         intel_pt_dump(pt, buf, len);
175 }
176
177 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
178                                    struct auxtrace_buffer *b)
179 {
180         void *start;
181
182         start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
183                                       pt->have_tsc);
184         if (!start)
185                 return -EINVAL;
186         b->use_size = b->data + b->size - start;
187         b->use_data = start;
188         return 0;
189 }
190
191 static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
192                                         struct auxtrace_queue *queue,
193                                         struct auxtrace_buffer *buffer)
194 {
195         if (queue->cpu == -1 && buffer->cpu != -1)
196                 ptq->cpu = buffer->cpu;
197
198         ptq->pid = buffer->pid;
199         ptq->tid = buffer->tid;
200
201         intel_pt_log("queue %u cpu %d pid %d tid %d\n",
202                      ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
203
204         thread__zput(ptq->thread);
205
206         if (ptq->tid != -1) {
207                 if (ptq->pid != -1)
208                         ptq->thread = machine__findnew_thread(ptq->pt->machine,
209                                                               ptq->pid,
210                                                               ptq->tid);
211                 else
212                         ptq->thread = machine__find_thread(ptq->pt->machine, -1,
213                                                            ptq->tid);
214         }
215 }
216
217 /* This function assumes data is processed sequentially only */
218 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
219 {
220         struct intel_pt_queue *ptq = data;
221         struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
222         struct auxtrace_queue *queue;
223
224         if (ptq->stop) {
225                 b->len = 0;
226                 return 0;
227         }
228
229         queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
230
231         buffer = auxtrace_buffer__next(queue, buffer);
232         if (!buffer) {
233                 if (old_buffer)
234                         auxtrace_buffer__drop_data(old_buffer);
235                 b->len = 0;
236                 return 0;
237         }
238
239         ptq->buffer = buffer;
240
241         if (!buffer->data) {
242                 int fd = perf_data_file__fd(ptq->pt->session->file);
243
244                 buffer->data = auxtrace_buffer__get_data(buffer, fd);
245                 if (!buffer->data)
246                         return -ENOMEM;
247         }
248
249         if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
250             intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
251                 return -ENOMEM;
252
253         if (old_buffer)
254                 auxtrace_buffer__drop_data(old_buffer);
255
256         if (buffer->use_data) {
257                 b->len = buffer->use_size;
258                 b->buf = buffer->use_data;
259         } else {
260                 b->len = buffer->size;
261                 b->buf = buffer->data;
262         }
263         b->ref_timestamp = buffer->reference;
264
265         if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
266                                                       !buffer->consecutive)) {
267                 b->consecutive = false;
268                 b->trace_nr = buffer->buffer_nr + 1;
269         } else {
270                 b->consecutive = true;
271         }
272
273         if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
274                                         ptq->tid != buffer->tid))
275                 intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
276
277         if (ptq->step_through_buffers)
278                 ptq->stop = true;
279
280         if (!b->len)
281                 return intel_pt_get_trace(b, data);
282
283         return 0;
284 }
285
286 struct intel_pt_cache_entry {
287         struct auxtrace_cache_entry     entry;
288         u64                             insn_cnt;
289         u64                             byte_cnt;
290         enum intel_pt_insn_op           op;
291         enum intel_pt_insn_branch       branch;
292         int                             length;
293         int32_t                         rel;
294 };
295
296 static int intel_pt_config_div(const char *var, const char *value, void *data)
297 {
298         int *d = data;
299         long val;
300
301         if (!strcmp(var, "intel-pt.cache-divisor")) {
302                 val = strtol(value, NULL, 0);
303                 if (val > 0 && val <= INT_MAX)
304                         *d = val;
305         }
306
307         return 0;
308 }
309
310 static int intel_pt_cache_divisor(void)
311 {
312         static int d;
313
314         if (d)
315                 return d;
316
317         perf_config(intel_pt_config_div, &d);
318
319         if (!d)
320                 d = 64;
321
322         return d;
323 }
324
325 static unsigned int intel_pt_cache_size(struct dso *dso,
326                                         struct machine *machine)
327 {
328         off_t size;
329
330         size = dso__data_size(dso, machine);
331         size /= intel_pt_cache_divisor();
332         if (size < 1000)
333                 return 10;
334         if (size > (1 << 21))
335                 return 21;
336         return 32 - __builtin_clz(size);
337 }
338
339 static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
340                                              struct machine *machine)
341 {
342         struct auxtrace_cache *c;
343         unsigned int bits;
344
345         if (dso->auxtrace_cache)
346                 return dso->auxtrace_cache;
347
348         bits = intel_pt_cache_size(dso, machine);
349
350         /* Ignoring cache creation failure */
351         c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
352
353         dso->auxtrace_cache = c;
354
355         return c;
356 }
357
358 static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
359                               u64 offset, u64 insn_cnt, u64 byte_cnt,
360                               struct intel_pt_insn *intel_pt_insn)
361 {
362         struct auxtrace_cache *c = intel_pt_cache(dso, machine);
363         struct intel_pt_cache_entry *e;
364         int err;
365
366         if (!c)
367                 return -ENOMEM;
368
369         e = auxtrace_cache__alloc_entry(c);
370         if (!e)
371                 return -ENOMEM;
372
373         e->insn_cnt = insn_cnt;
374         e->byte_cnt = byte_cnt;
375         e->op = intel_pt_insn->op;
376         e->branch = intel_pt_insn->branch;
377         e->length = intel_pt_insn->length;
378         e->rel = intel_pt_insn->rel;
379
380         err = auxtrace_cache__add(c, offset, &e->entry);
381         if (err)
382                 auxtrace_cache__free_entry(c, e);
383
384         return err;
385 }
386
387 static struct intel_pt_cache_entry *
388 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
389 {
390         struct auxtrace_cache *c = intel_pt_cache(dso, machine);
391
392         if (!c)
393                 return NULL;
394
395         return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
396 }
397
398 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
399                                    uint64_t *insn_cnt_ptr, uint64_t *ip,
400                                    uint64_t to_ip, uint64_t max_insn_cnt,
401                                    void *data)
402 {
403         struct intel_pt_queue *ptq = data;
404         struct machine *machine = ptq->pt->machine;
405         struct thread *thread;
406         struct addr_location al;
407         unsigned char buf[1024];
408         size_t bufsz;
409         ssize_t len;
410         int x86_64;
411         u8 cpumode;
412         u64 offset, start_offset, start_ip;
413         u64 insn_cnt = 0;
414         bool one_map = true;
415
416         if (to_ip && *ip == to_ip)
417                 goto out_no_cache;
418
419         bufsz = intel_pt_insn_max_size();
420
421         if (*ip >= ptq->pt->kernel_start)
422                 cpumode = PERF_RECORD_MISC_KERNEL;
423         else
424                 cpumode = PERF_RECORD_MISC_USER;
425
426         thread = ptq->thread;
427         if (!thread) {
428                 if (cpumode != PERF_RECORD_MISC_KERNEL)
429                         return -EINVAL;
430                 thread = ptq->pt->unknown_thread;
431         }
432
433         while (1) {
434                 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
435                 if (!al.map || !al.map->dso)
436                         return -EINVAL;
437
438                 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
439                     dso__data_status_seen(al.map->dso,
440                                           DSO_DATA_STATUS_SEEN_ITRACE))
441                         return -ENOENT;
442
443                 offset = al.map->map_ip(al.map, *ip);
444
445                 if (!to_ip && one_map) {
446                         struct intel_pt_cache_entry *e;
447
448                         e = intel_pt_cache_lookup(al.map->dso, machine, offset);
449                         if (e &&
450                             (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
451                                 *insn_cnt_ptr = e->insn_cnt;
452                                 *ip += e->byte_cnt;
453                                 intel_pt_insn->op = e->op;
454                                 intel_pt_insn->branch = e->branch;
455                                 intel_pt_insn->length = e->length;
456                                 intel_pt_insn->rel = e->rel;
457                                 intel_pt_log_insn_no_data(intel_pt_insn, *ip);
458                                 return 0;
459                         }
460                 }
461
462                 start_offset = offset;
463                 start_ip = *ip;
464
465                 /* Load maps to ensure dso->is_64_bit has been updated */
466                 map__load(al.map, machine->symbol_filter);
467
468                 x86_64 = al.map->dso->is_64_bit;
469
470                 while (1) {
471                         len = dso__data_read_offset(al.map->dso, machine,
472                                                     offset, buf, bufsz);
473                         if (len <= 0)
474                                 return -EINVAL;
475
476                         if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
477                                 return -EINVAL;
478
479                         intel_pt_log_insn(intel_pt_insn, *ip);
480
481                         insn_cnt += 1;
482
483                         if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
484                                 goto out;
485
486                         if (max_insn_cnt && insn_cnt >= max_insn_cnt)
487                                 goto out_no_cache;
488
489                         *ip += intel_pt_insn->length;
490
491                         if (to_ip && *ip == to_ip)
492                                 goto out_no_cache;
493
494                         if (*ip >= al.map->end)
495                                 break;
496
497                         offset += intel_pt_insn->length;
498                 }
499                 one_map = false;
500         }
501 out:
502         *insn_cnt_ptr = insn_cnt;
503
504         if (!one_map)
505                 goto out_no_cache;
506
507         /*
508          * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
509          * entries.
510          */
511         if (to_ip) {
512                 struct intel_pt_cache_entry *e;
513
514                 e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
515                 if (e)
516                         return 0;
517         }
518
519         /* Ignore cache errors */
520         intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
521                            *ip - start_ip, intel_pt_insn);
522
523         return 0;
524
525 out_no_cache:
526         *insn_cnt_ptr = insn_cnt;
527         return 0;
528 }
529
530 static bool intel_pt_get_config(struct intel_pt *pt,
531                                 struct perf_event_attr *attr, u64 *config)
532 {
533         if (attr->type == pt->pmu_type) {
534                 if (config)
535                         *config = attr->config;
536                 return true;
537         }
538
539         return false;
540 }
541
542 static bool intel_pt_exclude_kernel(struct intel_pt *pt)
543 {
544         struct perf_evsel *evsel;
545
546         evlist__for_each(pt->session->evlist, evsel) {
547                 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
548                     !evsel->attr.exclude_kernel)
549                         return false;
550         }
551         return true;
552 }
553
554 static bool intel_pt_return_compression(struct intel_pt *pt)
555 {
556         struct perf_evsel *evsel;
557         u64 config;
558
559         if (!pt->noretcomp_bit)
560                 return true;
561
562         evlist__for_each(pt->session->evlist, evsel) {
563                 if (intel_pt_get_config(pt, &evsel->attr, &config) &&
564                     (config & pt->noretcomp_bit))
565                         return false;
566         }
567         return true;
568 }
569
570 static bool intel_pt_timeless_decoding(struct intel_pt *pt)
571 {
572         struct perf_evsel *evsel;
573         bool timeless_decoding = true;
574         u64 config;
575
576         if (!pt->tsc_bit || !pt->cap_user_time_zero)
577                 return true;
578
579         evlist__for_each(pt->session->evlist, evsel) {
580                 if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
581                         return true;
582                 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
583                         if (config & pt->tsc_bit)
584                                 timeless_decoding = false;
585                         else
586                                 return true;
587                 }
588         }
589         return timeless_decoding;
590 }
591
592 static bool intel_pt_tracing_kernel(struct intel_pt *pt)
593 {
594         struct perf_evsel *evsel;
595
596         evlist__for_each(pt->session->evlist, evsel) {
597                 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
598                     !evsel->attr.exclude_kernel)
599                         return true;
600         }
601         return false;
602 }
603
604 static bool intel_pt_have_tsc(struct intel_pt *pt)
605 {
606         struct perf_evsel *evsel;
607         bool have_tsc = false;
608         u64 config;
609
610         if (!pt->tsc_bit)
611                 return false;
612
613         evlist__for_each(pt->session->evlist, evsel) {
614                 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
615                         if (config & pt->tsc_bit)
616                                 have_tsc = true;
617                         else
618                                 return false;
619                 }
620         }
621         return have_tsc;
622 }
623
624 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
625 {
626         u64 quot, rem;
627
628         quot = ns / pt->tc.time_mult;
629         rem  = ns % pt->tc.time_mult;
630         return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
631                 pt->tc.time_mult;
632 }
633
634 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
635                                                    unsigned int queue_nr)
636 {
637         struct intel_pt_params params = { .get_trace = 0, };
638         struct intel_pt_queue *ptq;
639
640         ptq = zalloc(sizeof(struct intel_pt_queue));
641         if (!ptq)
642                 return NULL;
643
644         if (pt->synth_opts.callchain) {
645                 size_t sz = sizeof(struct ip_callchain);
646
647                 sz += pt->synth_opts.callchain_sz * sizeof(u64);
648                 ptq->chain = zalloc(sz);
649                 if (!ptq->chain)
650                         goto out_free;
651         }
652
653         ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
654         if (!ptq->event_buf)
655                 goto out_free;
656
657         ptq->pt = pt;
658         ptq->queue_nr = queue_nr;
659         ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
660         ptq->pid = -1;
661         ptq->tid = -1;
662         ptq->cpu = -1;
663         ptq->next_tid = -1;
664
665         params.get_trace = intel_pt_get_trace;
666         params.walk_insn = intel_pt_walk_next_insn;
667         params.data = ptq;
668         params.return_compression = intel_pt_return_compression(pt);
669         params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
670
671         if (pt->synth_opts.instructions) {
672                 if (pt->synth_opts.period) {
673                         switch (pt->synth_opts.period_type) {
674                         case PERF_ITRACE_PERIOD_INSTRUCTIONS:
675                                 params.period_type =
676                                                 INTEL_PT_PERIOD_INSTRUCTIONS;
677                                 params.period = pt->synth_opts.period;
678                                 break;
679                         case PERF_ITRACE_PERIOD_TICKS:
680                                 params.period_type = INTEL_PT_PERIOD_TICKS;
681                                 params.period = pt->synth_opts.period;
682                                 break;
683                         case PERF_ITRACE_PERIOD_NANOSECS:
684                                 params.period_type = INTEL_PT_PERIOD_TICKS;
685                                 params.period = intel_pt_ns_to_ticks(pt,
686                                                         pt->synth_opts.period);
687                                 break;
688                         default:
689                                 break;
690                         }
691                 }
692
693                 if (!params.period) {
694                         params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
695                         params.period = 1000;
696                 }
697         }
698
699         ptq->decoder = intel_pt_decoder_new(&params);
700         if (!ptq->decoder)
701                 goto out_free;
702
703         return ptq;
704
705 out_free:
706         zfree(&ptq->event_buf);
707         zfree(&ptq->chain);
708         free(ptq);
709         return NULL;
710 }
711
712 static void intel_pt_free_queue(void *priv)
713 {
714         struct intel_pt_queue *ptq = priv;
715
716         if (!ptq)
717                 return;
718         thread__zput(ptq->thread);
719         intel_pt_decoder_free(ptq->decoder);
720         zfree(&ptq->event_buf);
721         zfree(&ptq->chain);
722         free(ptq);
723 }
724
725 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
726                                      struct auxtrace_queue *queue)
727 {
728         struct intel_pt_queue *ptq = queue->priv;
729
730         if (queue->tid == -1 || pt->have_sched_switch) {
731                 ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
732                 thread__zput(ptq->thread);
733         }
734
735         if (!ptq->thread && ptq->tid != -1)
736                 ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
737
738         if (ptq->thread) {
739                 ptq->pid = ptq->thread->pid_;
740                 if (queue->cpu == -1)
741                         ptq->cpu = ptq->thread->cpu;
742         }
743 }
744
745 static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
746 {
747         if (ptq->state->flags & INTEL_PT_ABORT_TX) {
748                 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
749         } else if (ptq->state->flags & INTEL_PT_ASYNC) {
750                 if (ptq->state->to_ip)
751                         ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
752                                      PERF_IP_FLAG_ASYNC |
753                                      PERF_IP_FLAG_INTERRUPT;
754                 else
755                         ptq->flags = PERF_IP_FLAG_BRANCH |
756                                      PERF_IP_FLAG_TRACE_END;
757                 ptq->insn_len = 0;
758         } else {
759                 if (ptq->state->from_ip)
760                         ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
761                 else
762                         ptq->flags = PERF_IP_FLAG_BRANCH |
763                                      PERF_IP_FLAG_TRACE_BEGIN;
764                 if (ptq->state->flags & INTEL_PT_IN_TX)
765                         ptq->flags |= PERF_IP_FLAG_IN_TX;
766                 ptq->insn_len = ptq->state->insn_len;
767         }
768 }
769
770 static int intel_pt_setup_queue(struct intel_pt *pt,
771                                 struct auxtrace_queue *queue,
772                                 unsigned int queue_nr)
773 {
774         struct intel_pt_queue *ptq = queue->priv;
775
776         if (list_empty(&queue->head))
777                 return 0;
778
779         if (!ptq) {
780                 ptq = intel_pt_alloc_queue(pt, queue_nr);
781                 if (!ptq)
782                         return -ENOMEM;
783                 queue->priv = ptq;
784
785                 if (queue->cpu != -1)
786                         ptq->cpu = queue->cpu;
787                 ptq->tid = queue->tid;
788
789                 if (pt->sampling_mode) {
790                         if (pt->timeless_decoding)
791                                 ptq->step_through_buffers = true;
792                         if (pt->timeless_decoding || !pt->have_sched_switch)
793                                 ptq->use_buffer_pid_tid = true;
794                 }
795         }
796
797         if (!ptq->on_heap &&
798             (!pt->sync_switch ||
799              ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
800                 const struct intel_pt_state *state;
801                 int ret;
802
803                 if (pt->timeless_decoding)
804                         return 0;
805
806                 intel_pt_log("queue %u getting timestamp\n", queue_nr);
807                 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
808                              queue_nr, ptq->cpu, ptq->pid, ptq->tid);
809                 while (1) {
810                         state = intel_pt_decode(ptq->decoder);
811                         if (state->err) {
812                                 if (state->err == INTEL_PT_ERR_NODATA) {
813                                         intel_pt_log("queue %u has no timestamp\n",
814                                                      queue_nr);
815                                         return 0;
816                                 }
817                                 continue;
818                         }
819                         if (state->timestamp)
820                                 break;
821                 }
822
823                 ptq->timestamp = state->timestamp;
824                 intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
825                              queue_nr, ptq->timestamp);
826                 ptq->state = state;
827                 ptq->have_sample = true;
828                 intel_pt_sample_flags(ptq);
829                 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
830                 if (ret)
831                         return ret;
832                 ptq->on_heap = true;
833         }
834
835         return 0;
836 }
837
838 static int intel_pt_setup_queues(struct intel_pt *pt)
839 {
840         unsigned int i;
841         int ret;
842
843         for (i = 0; i < pt->queues.nr_queues; i++) {
844                 ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
845                 if (ret)
846                         return ret;
847         }
848         return 0;
849 }
850
851 static int intel_pt_inject_event(union perf_event *event,
852                                  struct perf_sample *sample, u64 type,
853                                  bool swapped)
854 {
855         event->header.size = perf_event__sample_event_size(sample, type, 0);
856         return perf_event__synthesize_sample(event, type, 0, sample, swapped);
857 }
858
859 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
860 {
861         int ret;
862         struct intel_pt *pt = ptq->pt;
863         union perf_event *event = ptq->event_buf;
864         struct perf_sample sample = { .ip = 0, };
865
866         event->sample.header.type = PERF_RECORD_SAMPLE;
867         event->sample.header.misc = PERF_RECORD_MISC_USER;
868         event->sample.header.size = sizeof(struct perf_event_header);
869
870         if (!pt->timeless_decoding)
871                 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
872
873         sample.ip = ptq->state->from_ip;
874         sample.pid = ptq->pid;
875         sample.tid = ptq->tid;
876         sample.addr = ptq->state->to_ip;
877         sample.id = ptq->pt->branches_id;
878         sample.stream_id = ptq->pt->branches_id;
879         sample.period = 1;
880         sample.cpu = ptq->cpu;
881         sample.flags = ptq->flags;
882         sample.insn_len = ptq->insn_len;
883
884         if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
885                 return 0;
886
887         if (pt->synth_opts.inject) {
888                 ret = intel_pt_inject_event(event, &sample,
889                                             pt->branches_sample_type,
890                                             pt->synth_needs_swap);
891                 if (ret)
892                         return ret;
893         }
894
895         ret = perf_session__deliver_synth_event(pt->session, event, &sample);
896         if (ret)
897                 pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
898                        ret);
899
900         return ret;
901 }
902
903 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
904 {
905         int ret;
906         struct intel_pt *pt = ptq->pt;
907         union perf_event *event = ptq->event_buf;
908         struct perf_sample sample = { .ip = 0, };
909
910         event->sample.header.type = PERF_RECORD_SAMPLE;
911         event->sample.header.misc = PERF_RECORD_MISC_USER;
912         event->sample.header.size = sizeof(struct perf_event_header);
913
914         if (!pt->timeless_decoding)
915                 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
916
917         sample.ip = ptq->state->from_ip;
918         sample.pid = ptq->pid;
919         sample.tid = ptq->tid;
920         sample.addr = ptq->state->to_ip;
921         sample.id = ptq->pt->instructions_id;
922         sample.stream_id = ptq->pt->instructions_id;
923         sample.period = ptq->pt->instructions_sample_period;
924         sample.cpu = ptq->cpu;
925         sample.flags = ptq->flags;
926         sample.insn_len = ptq->insn_len;
927
928         if (pt->synth_opts.callchain) {
929                 thread_stack__sample(ptq->thread, ptq->chain,
930                                      pt->synth_opts.callchain_sz, sample.ip);
931                 sample.callchain = ptq->chain;
932         }
933
934         if (pt->synth_opts.inject) {
935                 ret = intel_pt_inject_event(event, &sample,
936                                             pt->instructions_sample_type,
937                                             pt->synth_needs_swap);
938                 if (ret)
939                         return ret;
940         }
941
942         ret = perf_session__deliver_synth_event(pt->session, event, &sample);
943         if (ret)
944                 pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
945                        ret);
946
947         return ret;
948 }
949
950 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
951 {
952         int ret;
953         struct intel_pt *pt = ptq->pt;
954         union perf_event *event = ptq->event_buf;
955         struct perf_sample sample = { .ip = 0, };
956
957         event->sample.header.type = PERF_RECORD_SAMPLE;
958         event->sample.header.misc = PERF_RECORD_MISC_USER;
959         event->sample.header.size = sizeof(struct perf_event_header);
960
961         if (!pt->timeless_decoding)
962                 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
963
964         sample.ip = ptq->state->from_ip;
965         sample.pid = ptq->pid;
966         sample.tid = ptq->tid;
967         sample.addr = ptq->state->to_ip;
968         sample.id = ptq->pt->transactions_id;
969         sample.stream_id = ptq->pt->transactions_id;
970         sample.period = 1;
971         sample.cpu = ptq->cpu;
972         sample.flags = ptq->flags;
973         sample.insn_len = ptq->insn_len;
974
975         if (pt->synth_opts.callchain) {
976                 thread_stack__sample(ptq->thread, ptq->chain,
977                                      pt->synth_opts.callchain_sz, sample.ip);
978                 sample.callchain = ptq->chain;
979         }
980
981         if (pt->synth_opts.inject) {
982                 ret = intel_pt_inject_event(event, &sample,
983                                             pt->transactions_sample_type,
984                                             pt->synth_needs_swap);
985                 if (ret)
986                         return ret;
987         }
988
989         ret = perf_session__deliver_synth_event(pt->session, event, &sample);
990         if (ret)
991                 pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
992                        ret);
993
994         return ret;
995 }
996
997 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
998                                 pid_t pid, pid_t tid, u64 ip)
999 {
1000         union perf_event event;
1001         char msg[MAX_AUXTRACE_ERROR_MSG];
1002         int err;
1003
1004         intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1005
1006         auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1007                              code, cpu, pid, tid, ip, msg);
1008
1009         err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1010         if (err)
1011                 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1012                        err);
1013
1014         return err;
1015 }
1016
1017 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1018 {
1019         struct auxtrace_queue *queue;
1020         pid_t tid = ptq->next_tid;
1021         int err;
1022
1023         if (tid == -1)
1024                 return 0;
1025
1026         intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1027
1028         err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1029
1030         queue = &pt->queues.queue_array[ptq->queue_nr];
1031         intel_pt_set_pid_tid_cpu(pt, queue);
1032
1033         ptq->next_tid = -1;
1034
1035         return err;
1036 }
1037
1038 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1039 {
1040         struct intel_pt *pt = ptq->pt;
1041
1042         return ip == pt->switch_ip &&
1043                (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1044                !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1045                                PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1046 }
1047
1048 static int intel_pt_sample(struct intel_pt_queue *ptq)
1049 {
1050         const struct intel_pt_state *state = ptq->state;
1051         struct intel_pt *pt = ptq->pt;
1052         int err;
1053
1054         if (!ptq->have_sample)
1055                 return 0;
1056
1057         ptq->have_sample = false;
1058
1059         if (pt->sample_instructions &&
1060             (state->type & INTEL_PT_INSTRUCTION)) {
1061                 err = intel_pt_synth_instruction_sample(ptq);
1062                 if (err)
1063                         return err;
1064         }
1065
1066         if (pt->sample_transactions &&
1067             (state->type & INTEL_PT_TRANSACTION)) {
1068                 err = intel_pt_synth_transaction_sample(ptq);
1069                 if (err)
1070                         return err;
1071         }
1072
1073         if (!(state->type & INTEL_PT_BRANCH))
1074                 return 0;
1075
1076         if (pt->synth_opts.callchain)
1077                 thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
1078                                     state->to_ip, ptq->insn_len,
1079                                     state->trace_nr);
1080         else
1081                 thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
1082
1083         if (pt->sample_branches) {
1084                 err = intel_pt_synth_branch_sample(ptq);
1085                 if (err)
1086                         return err;
1087         }
1088
1089         if (!pt->sync_switch)
1090                 return 0;
1091
1092         if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1093                 switch (ptq->switch_state) {
1094                 case INTEL_PT_SS_UNKNOWN:
1095                 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1096                         err = intel_pt_next_tid(pt, ptq);
1097                         if (err)
1098                                 return err;
1099                         ptq->switch_state = INTEL_PT_SS_TRACING;
1100                         break;
1101                 default:
1102                         ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
1103                         return 1;
1104                 }
1105         } else if (!state->to_ip) {
1106                 ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1107         } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1108                 ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1109         } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1110                    state->to_ip == pt->ptss_ip &&
1111                    (ptq->flags & PERF_IP_FLAG_CALL)) {
1112                 ptq->switch_state = INTEL_PT_SS_TRACING;
1113         }
1114
1115         return 0;
1116 }
1117
1118 static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip)
1119 {
1120         struct map *map;
1121         struct symbol *sym, *start;
1122         u64 ip, switch_ip = 0;
1123
1124         if (ptss_ip)
1125                 *ptss_ip = 0;
1126
1127         map = machine__kernel_map(machine, MAP__FUNCTION);
1128         if (!map)
1129                 return 0;
1130
1131         if (map__load(map, machine->symbol_filter))
1132                 return 0;
1133
1134         start = dso__first_symbol(map->dso, MAP__FUNCTION);
1135
1136         for (sym = start; sym; sym = dso__next_symbol(sym)) {
1137                 if (sym->binding == STB_GLOBAL &&
1138                     !strcmp(sym->name, "__switch_to")) {
1139                         ip = map->unmap_ip(map, sym->start);
1140                         if (ip >= map->start && ip < map->end) {
1141                                 switch_ip = ip;
1142                                 break;
1143                         }
1144                 }
1145         }
1146
1147         if (!switch_ip || !ptss_ip)
1148                 return 0;
1149
1150         for (sym = start; sym; sym = dso__next_symbol(sym)) {
1151                 if (!strcmp(sym->name, "perf_trace_sched_switch")) {
1152                         ip = map->unmap_ip(map, sym->start);
1153                         if (ip >= map->start && ip < map->end) {
1154                                 *ptss_ip = ip;
1155                                 break;
1156                         }
1157                 }
1158         }
1159
1160         return switch_ip;
1161 }
1162
1163 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1164 {
1165         const struct intel_pt_state *state = ptq->state;
1166         struct intel_pt *pt = ptq->pt;
1167         int err;
1168
1169         if (!pt->kernel_start) {
1170                 pt->kernel_start = machine__kernel_start(pt->machine);
1171                 if (pt->per_cpu_mmaps && pt->have_sched_switch &&
1172                     !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
1173                     !pt->sampling_mode) {
1174                         pt->switch_ip = intel_pt_switch_ip(pt->machine,
1175                                                            &pt->ptss_ip);
1176                         if (pt->switch_ip) {
1177                                 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1178                                              pt->switch_ip, pt->ptss_ip);
1179                                 pt->sync_switch = true;
1180                         }
1181                 }
1182         }
1183
1184         intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1185                      ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1186         while (1) {
1187                 err = intel_pt_sample(ptq);
1188                 if (err)
1189                         return err;
1190
1191                 state = intel_pt_decode(ptq->decoder);
1192                 if (state->err) {
1193                         if (state->err == INTEL_PT_ERR_NODATA)
1194                                 return 1;
1195                         if (pt->sync_switch &&
1196                             state->from_ip >= pt->kernel_start) {
1197                                 pt->sync_switch = false;
1198                                 intel_pt_next_tid(pt, ptq);
1199                         }
1200                         if (pt->synth_opts.errors) {
1201                                 err = intel_pt_synth_error(pt, state->err,
1202                                                            ptq->cpu, ptq->pid,
1203                                                            ptq->tid,
1204                                                            state->from_ip);
1205                                 if (err)
1206                                         return err;
1207                         }
1208                         continue;
1209                 }
1210
1211                 ptq->state = state;
1212                 ptq->have_sample = true;
1213                 intel_pt_sample_flags(ptq);
1214
1215                 /* Use estimated TSC upon return to user space */
1216                 if (pt->est_tsc &&
1217                     (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1218                     state->to_ip && state->to_ip < pt->kernel_start) {
1219                         intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1220                                      state->timestamp, state->est_timestamp);
1221                         ptq->timestamp = state->est_timestamp;
1222                 /* Use estimated TSC in unknown switch state */
1223                 } else if (pt->sync_switch &&
1224                            ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1225                            intel_pt_is_switch_ip(ptq, state->to_ip) &&
1226                            ptq->next_tid == -1) {
1227                         intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1228                                      state->timestamp, state->est_timestamp);
1229                         ptq->timestamp = state->est_timestamp;
1230                 } else if (state->timestamp > ptq->timestamp) {
1231                         ptq->timestamp = state->timestamp;
1232                 }
1233
1234                 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1235                         *timestamp = ptq->timestamp;
1236                         return 0;
1237                 }
1238         }
1239         return 0;
1240 }
1241
1242 static inline int intel_pt_update_queues(struct intel_pt *pt)
1243 {
1244         if (pt->queues.new_data) {
1245                 pt->queues.new_data = false;
1246                 return intel_pt_setup_queues(pt);
1247         }
1248         return 0;
1249 }
1250
1251 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1252 {
1253         unsigned int queue_nr;
1254         u64 ts;
1255         int ret;
1256
1257         while (1) {
1258                 struct auxtrace_queue *queue;
1259                 struct intel_pt_queue *ptq;
1260
1261                 if (!pt->heap.heap_cnt)
1262                         return 0;
1263
1264                 if (pt->heap.heap_array[0].ordinal >= timestamp)
1265                         return 0;
1266
1267                 queue_nr = pt->heap.heap_array[0].queue_nr;
1268                 queue = &pt->queues.queue_array[queue_nr];
1269                 ptq = queue->priv;
1270
1271                 intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1272                              queue_nr, pt->heap.heap_array[0].ordinal,
1273                              timestamp);
1274
1275                 auxtrace_heap__pop(&pt->heap);
1276
1277                 if (pt->heap.heap_cnt) {
1278                         ts = pt->heap.heap_array[0].ordinal + 1;
1279                         if (ts > timestamp)
1280                                 ts = timestamp;
1281                 } else {
1282                         ts = timestamp;
1283                 }
1284
1285                 intel_pt_set_pid_tid_cpu(pt, queue);
1286
1287                 ret = intel_pt_run_decoder(ptq, &ts);
1288
1289                 if (ret < 0) {
1290                         auxtrace_heap__add(&pt->heap, queue_nr, ts);
1291                         return ret;
1292                 }
1293
1294                 if (!ret) {
1295                         ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
1296                         if (ret < 0)
1297                                 return ret;
1298                 } else {
1299                         ptq->on_heap = false;
1300                 }
1301         }
1302
1303         return 0;
1304 }
1305
1306 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
1307                                             u64 time_)
1308 {
1309         struct auxtrace_queues *queues = &pt->queues;
1310         unsigned int i;
1311         u64 ts = 0;
1312
1313         for (i = 0; i < queues->nr_queues; i++) {
1314                 struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1315                 struct intel_pt_queue *ptq = queue->priv;
1316
1317                 if (ptq && (tid == -1 || ptq->tid == tid)) {
1318                         ptq->time = time_;
1319                         intel_pt_set_pid_tid_cpu(pt, queue);
1320                         intel_pt_run_decoder(ptq, &ts);
1321                 }
1322         }
1323         return 0;
1324 }
1325
1326 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
1327 {
1328         return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1329                                     sample->pid, sample->tid, 0);
1330 }
1331
1332 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
1333 {
1334         unsigned i, j;
1335
1336         if (cpu < 0 || !pt->queues.nr_queues)
1337                 return NULL;
1338
1339         if ((unsigned)cpu >= pt->queues.nr_queues)
1340                 i = pt->queues.nr_queues - 1;
1341         else
1342                 i = cpu;
1343
1344         if (pt->queues.queue_array[i].cpu == cpu)
1345                 return pt->queues.queue_array[i].priv;
1346
1347         for (j = 0; i > 0; j++) {
1348                 if (pt->queues.queue_array[--i].cpu == cpu)
1349                         return pt->queues.queue_array[i].priv;
1350         }
1351
1352         for (; j < pt->queues.nr_queues; j++) {
1353                 if (pt->queues.queue_array[j].cpu == cpu)
1354                         return pt->queues.queue_array[j].priv;
1355         }
1356
1357         return NULL;
1358 }
1359
1360 static int intel_pt_process_switch(struct intel_pt *pt,
1361                                    struct perf_sample *sample)
1362 {
1363         struct intel_pt_queue *ptq;
1364         struct perf_evsel *evsel;
1365         pid_t tid;
1366         int cpu, err;
1367
1368         evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
1369         if (evsel != pt->switch_evsel)
1370                 return 0;
1371
1372         tid = perf_evsel__intval(evsel, sample, "next_pid");
1373         cpu = sample->cpu;
1374
1375         intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1376                      cpu, tid, sample->time, perf_time_to_tsc(sample->time,
1377                      &pt->tc));
1378
1379         if (!pt->sync_switch)
1380                 goto out;
1381
1382         ptq = intel_pt_cpu_to_ptq(pt, cpu);
1383         if (!ptq)
1384                 goto out;
1385
1386         switch (ptq->switch_state) {
1387         case INTEL_PT_SS_NOT_TRACING:
1388                 ptq->next_tid = -1;
1389                 break;
1390         case INTEL_PT_SS_UNKNOWN:
1391         case INTEL_PT_SS_TRACING:
1392                 ptq->next_tid = tid;
1393                 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
1394                 return 0;
1395         case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
1396                 if (!ptq->on_heap) {
1397                         ptq->timestamp = perf_time_to_tsc(sample->time,
1398                                                           &pt->tc);
1399                         err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
1400                                                  ptq->timestamp);
1401                         if (err)
1402                                 return err;
1403                         ptq->on_heap = true;
1404                 }
1405                 ptq->switch_state = INTEL_PT_SS_TRACING;
1406                 break;
1407         case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1408                 ptq->next_tid = tid;
1409                 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
1410                 break;
1411         default:
1412                 break;
1413         }
1414 out:
1415         return machine__set_current_tid(pt->machine, cpu, -1, tid);
1416 }
1417
1418 static int intel_pt_process_itrace_start(struct intel_pt *pt,
1419                                          union perf_event *event,
1420                                          struct perf_sample *sample)
1421 {
1422         if (!pt->per_cpu_mmaps)
1423                 return 0;
1424
1425         intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1426                      sample->cpu, event->itrace_start.pid,
1427                      event->itrace_start.tid, sample->time,
1428                      perf_time_to_tsc(sample->time, &pt->tc));
1429
1430         return machine__set_current_tid(pt->machine, sample->cpu,
1431                                         event->itrace_start.pid,
1432                                         event->itrace_start.tid);
1433 }
1434
1435 static int intel_pt_process_event(struct perf_session *session,
1436                                   union perf_event *event,
1437                                   struct perf_sample *sample,
1438                                   struct perf_tool *tool)
1439 {
1440         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1441                                            auxtrace);
1442         u64 timestamp;
1443         int err = 0;
1444
1445         if (dump_trace)
1446                 return 0;
1447
1448         if (!tool->ordered_events) {
1449                 pr_err("Intel Processor Trace requires ordered events\n");
1450                 return -EINVAL;
1451         }
1452
1453         if (sample->time)
1454                 timestamp = perf_time_to_tsc(sample->time, &pt->tc);
1455         else
1456                 timestamp = 0;
1457
1458         if (timestamp || pt->timeless_decoding) {
1459                 err = intel_pt_update_queues(pt);
1460                 if (err)
1461                         return err;
1462         }
1463
1464         if (pt->timeless_decoding) {
1465                 if (event->header.type == PERF_RECORD_EXIT) {
1466                         err = intel_pt_process_timeless_queues(pt,
1467                                                                event->comm.tid,
1468                                                                sample->time);
1469                 }
1470         } else if (timestamp) {
1471                 err = intel_pt_process_queues(pt, timestamp);
1472         }
1473         if (err)
1474                 return err;
1475
1476         if (event->header.type == PERF_RECORD_AUX &&
1477             (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
1478             pt->synth_opts.errors) {
1479                 err = intel_pt_lost(pt, sample);
1480                 if (err)
1481                         return err;
1482         }
1483
1484         if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
1485                 err = intel_pt_process_switch(pt, sample);
1486         else if (event->header.type == PERF_RECORD_ITRACE_START)
1487                 err = intel_pt_process_itrace_start(pt, event, sample);
1488
1489         intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
1490                      perf_event__name(event->header.type), event->header.type,
1491                      sample->cpu, sample->time, timestamp);
1492
1493         return err;
1494 }
1495
1496 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
1497 {
1498         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1499                                            auxtrace);
1500         int ret;
1501
1502         if (dump_trace)
1503                 return 0;
1504
1505         if (!tool->ordered_events)
1506                 return -EINVAL;
1507
1508         ret = intel_pt_update_queues(pt);
1509         if (ret < 0)
1510                 return ret;
1511
1512         if (pt->timeless_decoding)
1513                 return intel_pt_process_timeless_queues(pt, -1,
1514                                                         MAX_TIMESTAMP - 1);
1515
1516         return intel_pt_process_queues(pt, MAX_TIMESTAMP);
1517 }
1518
1519 static void intel_pt_free_events(struct perf_session *session)
1520 {
1521         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1522                                            auxtrace);
1523         struct auxtrace_queues *queues = &pt->queues;
1524         unsigned int i;
1525
1526         for (i = 0; i < queues->nr_queues; i++) {
1527                 intel_pt_free_queue(queues->queue_array[i].priv);
1528                 queues->queue_array[i].priv = NULL;
1529         }
1530         intel_pt_log_disable();
1531         auxtrace_queues__free(queues);
1532 }
1533
1534 static void intel_pt_free(struct perf_session *session)
1535 {
1536         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1537                                            auxtrace);
1538
1539         auxtrace_heap__free(&pt->heap);
1540         intel_pt_free_events(session);
1541         session->auxtrace = NULL;
1542         thread__delete(pt->unknown_thread);
1543         free(pt);
1544 }
1545
1546 static int intel_pt_process_auxtrace_event(struct perf_session *session,
1547                                            union perf_event *event,
1548                                            struct perf_tool *tool __maybe_unused)
1549 {
1550         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1551                                            auxtrace);
1552
1553         if (pt->sampling_mode)
1554                 return 0;
1555
1556         if (!pt->data_queued) {
1557                 struct auxtrace_buffer *buffer;
1558                 off_t data_offset;
1559                 int fd = perf_data_file__fd(session->file);
1560                 int err;
1561
1562                 if (perf_data_file__is_pipe(session->file)) {
1563                         data_offset = 0;
1564                 } else {
1565                         data_offset = lseek(fd, 0, SEEK_CUR);
1566                         if (data_offset == -1)
1567                                 return -errno;
1568                 }
1569
1570                 err = auxtrace_queues__add_event(&pt->queues, session, event,
1571                                                  data_offset, &buffer);
1572                 if (err)
1573                         return err;
1574
1575                 /* Dump here now we have copied a piped trace out of the pipe */
1576                 if (dump_trace) {
1577                         if (auxtrace_buffer__get_data(buffer, fd)) {
1578                                 intel_pt_dump_event(pt, buffer->data,
1579                                                     buffer->size);
1580                                 auxtrace_buffer__put_data(buffer);
1581                         }
1582                 }
1583         }
1584
1585         return 0;
1586 }
1587
1588 struct intel_pt_synth {
1589         struct perf_tool dummy_tool;
1590         struct perf_session *session;
1591 };
1592
1593 static int intel_pt_event_synth(struct perf_tool *tool,
1594                                 union perf_event *event,
1595                                 struct perf_sample *sample __maybe_unused,
1596                                 struct machine *machine __maybe_unused)
1597 {
1598         struct intel_pt_synth *intel_pt_synth =
1599                         container_of(tool, struct intel_pt_synth, dummy_tool);
1600
1601         return perf_session__deliver_synth_event(intel_pt_synth->session, event,
1602                                                  NULL);
1603 }
1604
1605 static int intel_pt_synth_event(struct perf_session *session,
1606                                 struct perf_event_attr *attr, u64 id)
1607 {
1608         struct intel_pt_synth intel_pt_synth;
1609
1610         memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
1611         intel_pt_synth.session = session;
1612
1613         return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
1614                                            &id, intel_pt_event_synth);
1615 }
1616
1617 static int intel_pt_synth_events(struct intel_pt *pt,
1618                                  struct perf_session *session)
1619 {
1620         struct perf_evlist *evlist = session->evlist;
1621         struct perf_evsel *evsel;
1622         struct perf_event_attr attr;
1623         bool found = false;
1624         u64 id;
1625         int err;
1626
1627         evlist__for_each(evlist, evsel) {
1628                 if (evsel->attr.type == pt->pmu_type && evsel->ids) {
1629                         found = true;
1630                         break;
1631                 }
1632         }
1633
1634         if (!found) {
1635                 pr_debug("There are no selected events with Intel Processor Trace data\n");
1636                 return 0;
1637         }
1638
1639         memset(&attr, 0, sizeof(struct perf_event_attr));
1640         attr.size = sizeof(struct perf_event_attr);
1641         attr.type = PERF_TYPE_HARDWARE;
1642         attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
1643         attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1644                             PERF_SAMPLE_PERIOD;
1645         if (pt->timeless_decoding)
1646                 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1647         else
1648                 attr.sample_type |= PERF_SAMPLE_TIME;
1649         if (!pt->per_cpu_mmaps)
1650                 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
1651         attr.exclude_user = evsel->attr.exclude_user;
1652         attr.exclude_kernel = evsel->attr.exclude_kernel;
1653         attr.exclude_hv = evsel->attr.exclude_hv;
1654         attr.exclude_host = evsel->attr.exclude_host;
1655         attr.exclude_guest = evsel->attr.exclude_guest;
1656         attr.sample_id_all = evsel->attr.sample_id_all;
1657         attr.read_format = evsel->attr.read_format;
1658
1659         id = evsel->id[0] + 1000000000;
1660         if (!id)
1661                 id = 1;
1662
1663         if (pt->synth_opts.instructions) {
1664                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1665                 if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
1666                         attr.sample_period =
1667                                 intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
1668                 else
1669                         attr.sample_period = pt->synth_opts.period;
1670                 pt->instructions_sample_period = attr.sample_period;
1671                 if (pt->synth_opts.callchain)
1672                         attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1673                 pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1674                          id, (u64)attr.sample_type);
1675                 err = intel_pt_synth_event(session, &attr, id);
1676                 if (err) {
1677                         pr_err("%s: failed to synthesize 'instructions' event type\n",
1678                                __func__);
1679                         return err;
1680                 }
1681                 pt->sample_instructions = true;
1682                 pt->instructions_sample_type = attr.sample_type;
1683                 pt->instructions_id = id;
1684                 id += 1;
1685         }
1686
1687         if (pt->synth_opts.transactions) {
1688                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1689                 attr.sample_period = 1;
1690                 if (pt->synth_opts.callchain)
1691                         attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1692                 pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1693                          id, (u64)attr.sample_type);
1694                 err = intel_pt_synth_event(session, &attr, id);
1695                 if (err) {
1696                         pr_err("%s: failed to synthesize 'transactions' event type\n",
1697                                __func__);
1698                         return err;
1699                 }
1700                 pt->sample_transactions = true;
1701                 pt->transactions_id = id;
1702                 id += 1;
1703                 evlist__for_each(evlist, evsel) {
1704                         if (evsel->id && evsel->id[0] == pt->transactions_id) {
1705                                 if (evsel->name)
1706                                         zfree(&evsel->name);
1707                                 evsel->name = strdup("transactions");
1708                                 break;
1709                         }
1710                 }
1711         }
1712
1713         if (pt->synth_opts.branches) {
1714                 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1715                 attr.sample_period = 1;
1716                 attr.sample_type |= PERF_SAMPLE_ADDR;
1717                 attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
1718                 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1719                          id, (u64)attr.sample_type);
1720                 err = intel_pt_synth_event(session, &attr, id);
1721                 if (err) {
1722                         pr_err("%s: failed to synthesize 'branches' event type\n",
1723                                __func__);
1724                         return err;
1725                 }
1726                 pt->sample_branches = true;
1727                 pt->branches_sample_type = attr.sample_type;
1728                 pt->branches_id = id;
1729         }
1730
1731         pt->synth_needs_swap = evsel->needs_swap;
1732
1733         return 0;
1734 }
1735
1736 static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
1737 {
1738         struct perf_evsel *evsel;
1739
1740         evlist__for_each_reverse(evlist, evsel) {
1741                 const char *name = perf_evsel__name(evsel);
1742
1743                 if (!strcmp(name, "sched:sched_switch"))
1744                         return evsel;
1745         }
1746
1747         return NULL;
1748 }
1749
1750 static const char * const intel_pt_info_fmts[] = {
1751         [INTEL_PT_PMU_TYPE]             = "  PMU Type           %"PRId64"\n",
1752         [INTEL_PT_TIME_SHIFT]           = "  Time Shift         %"PRIu64"\n",
1753         [INTEL_PT_TIME_MULT]            = "  Time Muliplier     %"PRIu64"\n",
1754         [INTEL_PT_TIME_ZERO]            = "  Time Zero          %"PRIu64"\n",
1755         [INTEL_PT_CAP_USER_TIME_ZERO]   = "  Cap Time Zero      %"PRId64"\n",
1756         [INTEL_PT_TSC_BIT]              = "  TSC bit            %#"PRIx64"\n",
1757         [INTEL_PT_NORETCOMP_BIT]        = "  NoRETComp bit      %#"PRIx64"\n",
1758         [INTEL_PT_HAVE_SCHED_SWITCH]    = "  Have sched_switch  %"PRId64"\n",
1759         [INTEL_PT_SNAPSHOT_MODE]        = "  Snapshot mode      %"PRId64"\n",
1760         [INTEL_PT_PER_CPU_MMAPS]        = "  Per-cpu maps       %"PRId64"\n",
1761 };
1762
1763 static void intel_pt_print_info(u64 *arr, int start, int finish)
1764 {
1765         int i;
1766
1767         if (!dump_trace)
1768                 return;
1769
1770         for (i = start; i <= finish; i++)
1771                 fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
1772 }
1773
1774 int intel_pt_process_auxtrace_info(union perf_event *event,
1775                                    struct perf_session *session)
1776 {
1777         struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
1778         size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
1779         struct intel_pt *pt;
1780         int err;
1781
1782         if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
1783                                         min_sz)
1784                 return -EINVAL;
1785
1786         pt = zalloc(sizeof(struct intel_pt));
1787         if (!pt)
1788                 return -ENOMEM;
1789
1790         err = auxtrace_queues__init(&pt->queues);
1791         if (err)
1792                 goto err_free;
1793
1794         intel_pt_log_set_name(INTEL_PT_PMU_NAME);
1795
1796         pt->session = session;
1797         pt->machine = &session->machines.host; /* No kvm support */
1798         pt->auxtrace_type = auxtrace_info->type;
1799         pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
1800         pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
1801         pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
1802         pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
1803         pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
1804         pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
1805         pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
1806         pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
1807         pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
1808         pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
1809         intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
1810                             INTEL_PT_PER_CPU_MMAPS);
1811
1812         pt->timeless_decoding = intel_pt_timeless_decoding(pt);
1813         pt->have_tsc = intel_pt_have_tsc(pt);
1814         pt->sampling_mode = false;
1815         pt->est_tsc = !pt->timeless_decoding;
1816
1817         pt->unknown_thread = thread__new(999999999, 999999999);
1818         if (!pt->unknown_thread) {
1819                 err = -ENOMEM;
1820                 goto err_free_queues;
1821         }
1822         err = thread__set_comm(pt->unknown_thread, "unknown", 0);
1823         if (err)
1824                 goto err_delete_thread;
1825         if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
1826                 err = -ENOMEM;
1827                 goto err_delete_thread;
1828         }
1829
1830         pt->auxtrace.process_event = intel_pt_process_event;
1831         pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
1832         pt->auxtrace.flush_events = intel_pt_flush;
1833         pt->auxtrace.free_events = intel_pt_free_events;
1834         pt->auxtrace.free = intel_pt_free;
1835         session->auxtrace = &pt->auxtrace;
1836
1837         if (dump_trace)
1838                 return 0;
1839
1840         if (pt->have_sched_switch == 1) {
1841                 pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
1842                 if (!pt->switch_evsel) {
1843                         pr_err("%s: missing sched_switch event\n", __func__);
1844                         goto err_delete_thread;
1845                 }
1846         }
1847
1848         if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
1849                 pt->synth_opts = *session->itrace_synth_opts;
1850         } else {
1851                 itrace_synth_opts__set_default(&pt->synth_opts);
1852                 if (use_browser != -1) {
1853                         pt->synth_opts.branches = false;
1854                         pt->synth_opts.callchain = true;
1855                 }
1856         }
1857
1858         if (pt->synth_opts.log)
1859                 intel_pt_log_enable();
1860
1861         /* Maximum non-turbo ratio is TSC freq / 100 MHz */
1862         if (pt->tc.time_mult) {
1863                 u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
1864
1865                 pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
1866                 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
1867                 intel_pt_log("Maximum non-turbo ratio %u\n",
1868                              pt->max_non_turbo_ratio);
1869         }
1870
1871         if (pt->synth_opts.calls)
1872                 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
1873                                        PERF_IP_FLAG_TRACE_END;
1874         if (pt->synth_opts.returns)
1875                 pt->branches_filter |= PERF_IP_FLAG_RETURN |
1876                                        PERF_IP_FLAG_TRACE_BEGIN;
1877
1878         if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
1879                 symbol_conf.use_callchain = true;
1880                 if (callchain_register_param(&callchain_param) < 0) {
1881                         symbol_conf.use_callchain = false;
1882                         pt->synth_opts.callchain = false;
1883                 }
1884         }
1885
1886         err = intel_pt_synth_events(pt, session);
1887         if (err)
1888                 goto err_delete_thread;
1889
1890         err = auxtrace_queues__process_index(&pt->queues, session);
1891         if (err)
1892                 goto err_delete_thread;
1893
1894         if (pt->queues.populated)
1895                 pt->data_queued = true;
1896
1897         if (pt->timeless_decoding)
1898                 pr_debug2("Intel PT decoding without timestamps\n");
1899
1900         return 0;
1901
1902 err_delete_thread:
1903         thread__delete(pt->unknown_thread);
1904 err_free_queues:
1905         intel_pt_log_disable();
1906         auxtrace_queues__free(&pt->queues);
1907         session->auxtrace = NULL;
1908 err_free:
1909         free(pt);
1910         return err;
1911 }