ba6d3bd48889f76509bcfc680a6163475c7f1376
[firefly-linux-kernel-4.4.55.git] / kernel / trace / trace_kprobe.c
1 /*
2  * kprobe based kernel tracer
3  *
4  * Created by Masami Hiramatsu <mhiramat@redhat.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19
20 #include <linux/module.h>
21 #include <linux/uaccess.h>
22 #include <linux/kprobes.h>
23 #include <linux/seq_file.h>
24 #include <linux/slab.h>
25 #include <linux/smp.h>
26 #include <linux/debugfs.h>
27 #include <linux/types.h>
28 #include <linux/string.h>
29 #include <linux/ctype.h>
30 #include <linux/ptrace.h>
31 #include <linux/perf_event.h>
32
33 #include "trace.h"
34 #include "trace_output.h"
35
36 #define MAX_TRACE_ARGS 128
37 #define MAX_ARGSTR_LEN 63
38 #define MAX_EVENT_NAME_LEN 64
39 #define KPROBE_EVENT_SYSTEM "kprobes"
40
41 /* currently, trace_kprobe only supports X86. */
42
43 struct fetch_func {
44         unsigned long (*func)(struct pt_regs *, void *);
45         void *data;
46 };
47
48 static __kprobes unsigned long call_fetch(struct fetch_func *f,
49                                           struct pt_regs *regs)
50 {
51         return f->func(regs, f->data);
52 }
53
54 /* fetch handlers */
55 static __kprobes unsigned long fetch_register(struct pt_regs *regs,
56                                               void *offset)
57 {
58         return regs_get_register(regs, (unsigned int)((unsigned long)offset));
59 }
60
61 static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
62                                            void *num)
63 {
64         return regs_get_kernel_stack_nth(regs,
65                                          (unsigned int)((unsigned long)num));
66 }
67
68 static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
69 {
70         unsigned long retval;
71
72         if (probe_kernel_address(addr, retval))
73                 return 0;
74         return retval;
75 }
76
77 static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
78 {
79         return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
80 }
81
82 static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
83                                               void *dummy)
84 {
85         return regs_return_value(regs);
86 }
87
88 static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
89                                                    void *dummy)
90 {
91         return kernel_stack_pointer(regs);
92 }
93
94 /* Memory fetching by symbol */
95 struct symbol_cache {
96         char *symbol;
97         long offset;
98         unsigned long addr;
99 };
100
101 static unsigned long update_symbol_cache(struct symbol_cache *sc)
102 {
103         sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
104         if (sc->addr)
105                 sc->addr += sc->offset;
106         return sc->addr;
107 }
108
109 static void free_symbol_cache(struct symbol_cache *sc)
110 {
111         kfree(sc->symbol);
112         kfree(sc);
113 }
114
115 static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
116 {
117         struct symbol_cache *sc;
118
119         if (!sym || strlen(sym) == 0)
120                 return NULL;
121         sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
122         if (!sc)
123                 return NULL;
124
125         sc->symbol = kstrdup(sym, GFP_KERNEL);
126         if (!sc->symbol) {
127                 kfree(sc);
128                 return NULL;
129         }
130         sc->offset = offset;
131
132         update_symbol_cache(sc);
133         return sc;
134 }
135
136 static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
137 {
138         struct symbol_cache *sc = data;
139
140         if (sc->addr)
141                 return fetch_memory(regs, (void *)sc->addr);
142         else
143                 return 0;
144 }
145
146 /* Special indirect memory access interface */
147 struct indirect_fetch_data {
148         struct fetch_func orig;
149         long offset;
150 };
151
152 static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
153 {
154         struct indirect_fetch_data *ind = data;
155         unsigned long addr;
156
157         addr = call_fetch(&ind->orig, regs);
158         if (addr) {
159                 addr += ind->offset;
160                 return fetch_memory(regs, (void *)addr);
161         } else
162                 return 0;
163 }
164
165 static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
166 {
167         if (data->orig.func == fetch_indirect)
168                 free_indirect_fetch_data(data->orig.data);
169         else if (data->orig.func == fetch_symbol)
170                 free_symbol_cache(data->orig.data);
171         kfree(data);
172 }
173
174 /**
175  * Kprobe tracer core functions
176  */
177
178 struct probe_arg {
179         struct fetch_func       fetch;
180         const char              *name;
181 };
182
183 /* Flags for trace_probe */
184 #define TP_FLAG_TRACE   1
185 #define TP_FLAG_PROFILE 2
186
187 struct trace_probe {
188         struct list_head        list;
189         struct kretprobe        rp;     /* Use rp.kp for kprobe use */
190         unsigned long           nhit;
191         unsigned int            flags;  /* For TP_FLAG_* */
192         const char              *symbol;        /* symbol name */
193         struct ftrace_event_call        call;
194         struct trace_event              event;
195         unsigned int            nr_args;
196         struct probe_arg        args[];
197 };
198
199 #define SIZEOF_TRACE_PROBE(n)                   \
200         (offsetof(struct trace_probe, args) +   \
201         (sizeof(struct probe_arg) * (n)))
202
203 static __kprobes int probe_is_return(struct trace_probe *tp)
204 {
205         return tp->rp.handler != NULL;
206 }
207
208 static __kprobes const char *probe_symbol(struct trace_probe *tp)
209 {
210         return tp->symbol ? tp->symbol : "unknown";
211 }
212
213 static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
214 {
215         int ret = -EINVAL;
216
217         if (ff->func == fetch_argument)
218                 ret = snprintf(buf, n, "$a%lu", (unsigned long)ff->data);
219         else if (ff->func == fetch_register) {
220                 const char *name;
221                 name = regs_query_register_name((unsigned int)((long)ff->data));
222                 ret = snprintf(buf, n, "%%%s", name);
223         } else if (ff->func == fetch_stack)
224                 ret = snprintf(buf, n, "$s%lu", (unsigned long)ff->data);
225         else if (ff->func == fetch_memory)
226                 ret = snprintf(buf, n, "@0x%p", ff->data);
227         else if (ff->func == fetch_symbol) {
228                 struct symbol_cache *sc = ff->data;
229                 ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset);
230         } else if (ff->func == fetch_retvalue)
231                 ret = snprintf(buf, n, "$rv");
232         else if (ff->func == fetch_stack_address)
233                 ret = snprintf(buf, n, "$sa");
234         else if (ff->func == fetch_indirect) {
235                 struct indirect_fetch_data *id = ff->data;
236                 size_t l = 0;
237                 ret = snprintf(buf, n, "%+ld(", id->offset);
238                 if (ret >= n)
239                         goto end;
240                 l += ret;
241                 ret = probe_arg_string(buf + l, n - l, &id->orig);
242                 if (ret < 0)
243                         goto end;
244                 l += ret;
245                 ret = snprintf(buf + l, n - l, ")");
246                 ret += l;
247         }
248 end:
249         if (ret >= n)
250                 return -ENOSPC;
251         return ret;
252 }
253
254 static int register_probe_event(struct trace_probe *tp);
255 static void unregister_probe_event(struct trace_probe *tp);
256
257 static DEFINE_MUTEX(probe_lock);
258 static LIST_HEAD(probe_list);
259
260 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
261 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
262                                 struct pt_regs *regs);
263
264 /*
265  * Allocate new trace_probe and initialize it (including kprobes).
266  */
267 static struct trace_probe *alloc_trace_probe(const char *group,
268                                              const char *event,
269                                              void *addr,
270                                              const char *symbol,
271                                              unsigned long offs,
272                                              int nargs, int is_return)
273 {
274         struct trace_probe *tp;
275
276         tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
277         if (!tp)
278                 return ERR_PTR(-ENOMEM);
279
280         if (symbol) {
281                 tp->symbol = kstrdup(symbol, GFP_KERNEL);
282                 if (!tp->symbol)
283                         goto error;
284                 tp->rp.kp.symbol_name = tp->symbol;
285                 tp->rp.kp.offset = offs;
286         } else
287                 tp->rp.kp.addr = addr;
288
289         if (is_return)
290                 tp->rp.handler = kretprobe_dispatcher;
291         else
292                 tp->rp.kp.pre_handler = kprobe_dispatcher;
293
294         if (!event)
295                 goto error;
296         tp->call.name = kstrdup(event, GFP_KERNEL);
297         if (!tp->call.name)
298                 goto error;
299
300         if (!group)
301                 goto error;
302         tp->call.system = kstrdup(group, GFP_KERNEL);
303         if (!tp->call.system)
304                 goto error;
305
306         INIT_LIST_HEAD(&tp->list);
307         return tp;
308 error:
309         kfree(tp->call.name);
310         kfree(tp->symbol);
311         kfree(tp);
312         return ERR_PTR(-ENOMEM);
313 }
314
315 static void free_probe_arg(struct probe_arg *arg)
316 {
317         if (arg->fetch.func == fetch_symbol)
318                 free_symbol_cache(arg->fetch.data);
319         else if (arg->fetch.func == fetch_indirect)
320                 free_indirect_fetch_data(arg->fetch.data);
321         kfree(arg->name);
322 }
323
324 static void free_trace_probe(struct trace_probe *tp)
325 {
326         int i;
327
328         for (i = 0; i < tp->nr_args; i++)
329                 free_probe_arg(&tp->args[i]);
330
331         kfree(tp->call.system);
332         kfree(tp->call.name);
333         kfree(tp->symbol);
334         kfree(tp);
335 }
336
337 static struct trace_probe *find_probe_event(const char *event)
338 {
339         struct trace_probe *tp;
340
341         list_for_each_entry(tp, &probe_list, list)
342                 if (!strcmp(tp->call.name, event))
343                         return tp;
344         return NULL;
345 }
346
347 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
348 static void unregister_trace_probe(struct trace_probe *tp)
349 {
350         if (probe_is_return(tp))
351                 unregister_kretprobe(&tp->rp);
352         else
353                 unregister_kprobe(&tp->rp.kp);
354         list_del(&tp->list);
355         unregister_probe_event(tp);
356 }
357
358 /* Register a trace_probe and probe_event */
359 static int register_trace_probe(struct trace_probe *tp)
360 {
361         struct trace_probe *old_tp;
362         int ret;
363
364         mutex_lock(&probe_lock);
365
366         /* register as an event */
367         old_tp = find_probe_event(tp->call.name);
368         if (old_tp) {
369                 /* delete old event */
370                 unregister_trace_probe(old_tp);
371                 free_trace_probe(old_tp);
372         }
373         ret = register_probe_event(tp);
374         if (ret) {
375                 pr_warning("Faild to register probe event(%d)\n", ret);
376                 goto end;
377         }
378
379         tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
380         if (probe_is_return(tp))
381                 ret = register_kretprobe(&tp->rp);
382         else
383                 ret = register_kprobe(&tp->rp.kp);
384
385         if (ret) {
386                 pr_warning("Could not insert probe(%d)\n", ret);
387                 if (ret == -EILSEQ) {
388                         pr_warning("Probing address(0x%p) is not an "
389                                    "instruction boundary.\n",
390                                    tp->rp.kp.addr);
391                         ret = -EINVAL;
392                 }
393                 unregister_probe_event(tp);
394         } else
395                 list_add_tail(&tp->list, &probe_list);
396 end:
397         mutex_unlock(&probe_lock);
398         return ret;
399 }
400
401 /* Split symbol and offset. */
402 static int split_symbol_offset(char *symbol, unsigned long *offset)
403 {
404         char *tmp;
405         int ret;
406
407         if (!offset)
408                 return -EINVAL;
409
410         tmp = strchr(symbol, '+');
411         if (tmp) {
412                 /* skip sign because strict_strtol doesn't accept '+' */
413                 ret = strict_strtoul(tmp + 1, 0, offset);
414                 if (ret)
415                         return ret;
416                 *tmp = '\0';
417         } else
418                 *offset = 0;
419         return 0;
420 }
421
422 #define PARAM_MAX_ARGS 16
423 #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
424
425 static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
426 {
427         int ret = 0;
428         unsigned long param;
429
430         switch (arg[0]) {
431         case 'a':       /* argument */
432                 ret = strict_strtoul(arg + 1, 10, &param);
433                 if (ret || param > PARAM_MAX_ARGS)
434                         ret = -EINVAL;
435                 else {
436                         ff->func = fetch_argument;
437                         ff->data = (void *)param;
438                 }
439                 break;
440         case 'r':       /* retval or retaddr */
441                 if (is_return && arg[1] == 'v') {
442                         ff->func = fetch_retvalue;
443                         ff->data = NULL;
444                 } else
445                         ret = -EINVAL;
446                 break;
447         case 's':       /* stack */
448                 if (arg[1] == 'a') {
449                         ff->func = fetch_stack_address;
450                         ff->data = NULL;
451                 } else {
452                         ret = strict_strtoul(arg + 1, 10, &param);
453                         if (ret || param > PARAM_MAX_STACK)
454                                 ret = -EINVAL;
455                         else {
456                                 ff->func = fetch_stack;
457                                 ff->data = (void *)param;
458                         }
459                 }
460                 break;
461         default:
462                 ret = -EINVAL;
463         }
464         return ret;
465 }
466
467 static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
468 {
469         int ret = 0;
470         unsigned long param;
471         long offset;
472         char *tmp;
473
474         switch (arg[0]) {
475         case '$':
476                 ret = parse_probe_vars(arg + 1, ff, is_return);
477                 break;
478         case '%':       /* named register */
479                 ret = regs_query_register_offset(arg + 1);
480                 if (ret >= 0) {
481                         ff->func = fetch_register;
482                         ff->data = (void *)(unsigned long)ret;
483                         ret = 0;
484                 }
485                 break;
486         case '@':       /* memory or symbol */
487                 if (isdigit(arg[1])) {
488                         ret = strict_strtoul(arg + 1, 0, &param);
489                         if (ret)
490                                 break;
491                         ff->func = fetch_memory;
492                         ff->data = (void *)param;
493                 } else {
494                         ret = split_symbol_offset(arg + 1, &offset);
495                         if (ret)
496                                 break;
497                         ff->data = alloc_symbol_cache(arg + 1, offset);
498                         if (ff->data)
499                                 ff->func = fetch_symbol;
500                         else
501                                 ret = -EINVAL;
502                 }
503                 break;
504         case '+':       /* indirect memory */
505         case '-':
506                 tmp = strchr(arg, '(');
507                 if (!tmp) {
508                         ret = -EINVAL;
509                         break;
510                 }
511                 *tmp = '\0';
512                 ret = strict_strtol(arg + 1, 0, &offset);
513                 if (ret)
514                         break;
515                 if (arg[0] == '-')
516                         offset = -offset;
517                 arg = tmp + 1;
518                 tmp = strrchr(arg, ')');
519                 if (tmp) {
520                         struct indirect_fetch_data *id;
521                         *tmp = '\0';
522                         id = kzalloc(sizeof(struct indirect_fetch_data),
523                                      GFP_KERNEL);
524                         if (!id)
525                                 return -ENOMEM;
526                         id->offset = offset;
527                         ret = parse_probe_arg(arg, &id->orig, is_return);
528                         if (ret)
529                                 kfree(id);
530                         else {
531                                 ff->func = fetch_indirect;
532                                 ff->data = (void *)id;
533                         }
534                 } else
535                         ret = -EINVAL;
536                 break;
537         default:
538                 /* TODO: support custom handler */
539                 ret = -EINVAL;
540         }
541         return ret;
542 }
543
544 static int create_trace_probe(int argc, char **argv)
545 {
546         /*
547          * Argument syntax:
548          *  - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
549          *  - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
550          * Fetch args:
551          *  $aN : fetch Nth of function argument. (N:0-)
552          *  $rv : fetch return value
553          *  $sa : fetch stack address
554          *  $sN : fetch Nth of stack (N:0-)
555          *  @ADDR       : fetch memory at ADDR (ADDR should be in kernel)
556          *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
557          *  %REG        : fetch register REG
558          * Indirect memory fetch:
559          *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
560          * Alias name of args:
561          *  NAME=FETCHARG : set NAME as alias of FETCHARG.
562          */
563         struct trace_probe *tp;
564         int i, ret = 0;
565         int is_return = 0;
566         char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
567         unsigned long offset = 0;
568         void *addr = NULL;
569         char buf[MAX_EVENT_NAME_LEN];
570
571         if (argc < 2)
572                 return -EINVAL;
573
574         if (argv[0][0] == 'p')
575                 is_return = 0;
576         else if (argv[0][0] == 'r')
577                 is_return = 1;
578         else
579                 return -EINVAL;
580
581         if (argv[0][1] == ':') {
582                 event = &argv[0][2];
583                 if (strchr(event, '/')) {
584                         group = event;
585                         event = strchr(group, '/') + 1;
586                         event[-1] = '\0';
587                         if (strlen(group) == 0) {
588                                 pr_info("Group name is not specifiled\n");
589                                 return -EINVAL;
590                         }
591                 }
592                 if (strlen(event) == 0) {
593                         pr_info("Event name is not specifiled\n");
594                         return -EINVAL;
595                 }
596         }
597
598         if (isdigit(argv[1][0])) {
599                 if (is_return)
600                         return -EINVAL;
601                 /* an address specified */
602                 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
603                 if (ret)
604                         return ret;
605         } else {
606                 /* a symbol specified */
607                 symbol = argv[1];
608                 /* TODO: support .init module functions */
609                 ret = split_symbol_offset(symbol, &offset);
610                 if (ret)
611                         return ret;
612                 if (offset && is_return)
613                         return -EINVAL;
614         }
615         argc -= 2; argv += 2;
616
617         /* setup a probe */
618         if (!group)
619                 group = KPROBE_EVENT_SYSTEM;
620         if (!event) {
621                 /* Make a new event name */
622                 if (symbol)
623                         snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
624                                  is_return ? 'r' : 'p', symbol, offset);
625                 else
626                         snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
627                                  is_return ? 'r' : 'p', addr);
628                 event = buf;
629         }
630         tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
631                                is_return);
632         if (IS_ERR(tp))
633                 return PTR_ERR(tp);
634
635         /* parse arguments */
636         ret = 0;
637         for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
638                 /* Parse argument name */
639                 arg = strchr(argv[i], '=');
640                 if (arg)
641                         *arg++ = '\0';
642                 else
643                         arg = argv[i];
644                 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
645
646                 /* Parse fetch argument */
647                 if (strlen(arg) > MAX_ARGSTR_LEN) {
648                         pr_info("Argument%d(%s) is too long.\n", i, arg);
649                         ret = -ENOSPC;
650                         goto error;
651                 }
652                 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
653                 if (ret)
654                         goto error;
655         }
656         tp->nr_args = i;
657
658         ret = register_trace_probe(tp);
659         if (ret)
660                 goto error;
661         return 0;
662
663 error:
664         free_trace_probe(tp);
665         return ret;
666 }
667
668 static void cleanup_all_probes(void)
669 {
670         struct trace_probe *tp;
671
672         mutex_lock(&probe_lock);
673         /* TODO: Use batch unregistration */
674         while (!list_empty(&probe_list)) {
675                 tp = list_entry(probe_list.next, struct trace_probe, list);
676                 unregister_trace_probe(tp);
677                 free_trace_probe(tp);
678         }
679         mutex_unlock(&probe_lock);
680 }
681
682
683 /* Probes listing interfaces */
684 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
685 {
686         mutex_lock(&probe_lock);
687         return seq_list_start(&probe_list, *pos);
688 }
689
690 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
691 {
692         return seq_list_next(v, &probe_list, pos);
693 }
694
695 static void probes_seq_stop(struct seq_file *m, void *v)
696 {
697         mutex_unlock(&probe_lock);
698 }
699
700 static int probes_seq_show(struct seq_file *m, void *v)
701 {
702         struct trace_probe *tp = v;
703         int i, ret;
704         char buf[MAX_ARGSTR_LEN + 1];
705
706         seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
707         seq_printf(m, ":%s", tp->call.name);
708
709         if (tp->symbol)
710                 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
711         else
712                 seq_printf(m, " 0x%p", tp->rp.kp.addr);
713
714         for (i = 0; i < tp->nr_args; i++) {
715                 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
716                 if (ret < 0) {
717                         pr_warning("Argument%d decoding error(%d).\n", i, ret);
718                         return ret;
719                 }
720                 seq_printf(m, " %s=%s", tp->args[i].name, buf);
721         }
722         seq_printf(m, "\n");
723         return 0;
724 }
725
726 static const struct seq_operations probes_seq_op = {
727         .start  = probes_seq_start,
728         .next   = probes_seq_next,
729         .stop   = probes_seq_stop,
730         .show   = probes_seq_show
731 };
732
733 static int probes_open(struct inode *inode, struct file *file)
734 {
735         if ((file->f_mode & FMODE_WRITE) &&
736             (file->f_flags & O_TRUNC))
737                 cleanup_all_probes();
738
739         return seq_open(file, &probes_seq_op);
740 }
741
742 static int command_trace_probe(const char *buf)
743 {
744         char **argv;
745         int argc = 0, ret = 0;
746
747         argv = argv_split(GFP_KERNEL, buf, &argc);
748         if (!argv)
749                 return -ENOMEM;
750
751         if (argc)
752                 ret = create_trace_probe(argc, argv);
753
754         argv_free(argv);
755         return ret;
756 }
757
758 #define WRITE_BUFSIZE 128
759
760 static ssize_t probes_write(struct file *file, const char __user *buffer,
761                             size_t count, loff_t *ppos)
762 {
763         char *kbuf, *tmp;
764         int ret;
765         size_t done;
766         size_t size;
767
768         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
769         if (!kbuf)
770                 return -ENOMEM;
771
772         ret = done = 0;
773         while (done < count) {
774                 size = count - done;
775                 if (size >= WRITE_BUFSIZE)
776                         size = WRITE_BUFSIZE - 1;
777                 if (copy_from_user(kbuf, buffer + done, size)) {
778                         ret = -EFAULT;
779                         goto out;
780                 }
781                 kbuf[size] = '\0';
782                 tmp = strchr(kbuf, '\n');
783                 if (tmp) {
784                         *tmp = '\0';
785                         size = tmp - kbuf + 1;
786                 } else if (done + size < count) {
787                         pr_warning("Line length is too long: "
788                                    "Should be less than %d.", WRITE_BUFSIZE);
789                         ret = -EINVAL;
790                         goto out;
791                 }
792                 done += size;
793                 /* Remove comments */
794                 tmp = strchr(kbuf, '#');
795                 if (tmp)
796                         *tmp = '\0';
797
798                 ret = command_trace_probe(kbuf);
799                 if (ret)
800                         goto out;
801         }
802         ret = done;
803 out:
804         kfree(kbuf);
805         return ret;
806 }
807
808 static const struct file_operations kprobe_events_ops = {
809         .owner          = THIS_MODULE,
810         .open           = probes_open,
811         .read           = seq_read,
812         .llseek         = seq_lseek,
813         .release        = seq_release,
814         .write          = probes_write,
815 };
816
817 /* Probes profiling interfaces */
818 static int probes_profile_seq_show(struct seq_file *m, void *v)
819 {
820         struct trace_probe *tp = v;
821
822         seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
823                    tp->rp.kp.nmissed);
824
825         return 0;
826 }
827
828 static const struct seq_operations profile_seq_op = {
829         .start  = probes_seq_start,
830         .next   = probes_seq_next,
831         .stop   = probes_seq_stop,
832         .show   = probes_profile_seq_show
833 };
834
835 static int profile_open(struct inode *inode, struct file *file)
836 {
837         return seq_open(file, &profile_seq_op);
838 }
839
840 static const struct file_operations kprobe_profile_ops = {
841         .owner          = THIS_MODULE,
842         .open           = profile_open,
843         .read           = seq_read,
844         .llseek         = seq_lseek,
845         .release        = seq_release,
846 };
847
848 /* Kprobe handler */
849 static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
850 {
851         struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
852         struct kprobe_trace_entry *entry;
853         struct ring_buffer_event *event;
854         struct ring_buffer *buffer;
855         int size, i, pc;
856         unsigned long irq_flags;
857         struct ftrace_event_call *call = &tp->call;
858
859         tp->nhit++;
860
861         local_save_flags(irq_flags);
862         pc = preempt_count();
863
864         size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
865
866         event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
867                                                   irq_flags, pc);
868         if (!event)
869                 return 0;
870
871         entry = ring_buffer_event_data(event);
872         entry->nargs = tp->nr_args;
873         entry->ip = (unsigned long)kp->addr;
874         for (i = 0; i < tp->nr_args; i++)
875                 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
876
877         if (!filter_current_check_discard(buffer, call, entry, event))
878                 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
879         return 0;
880 }
881
882 /* Kretprobe handler */
883 static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
884                                           struct pt_regs *regs)
885 {
886         struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
887         struct kretprobe_trace_entry *entry;
888         struct ring_buffer_event *event;
889         struct ring_buffer *buffer;
890         int size, i, pc;
891         unsigned long irq_flags;
892         struct ftrace_event_call *call = &tp->call;
893
894         local_save_flags(irq_flags);
895         pc = preempt_count();
896
897         size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
898
899         event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
900                                                   irq_flags, pc);
901         if (!event)
902                 return 0;
903
904         entry = ring_buffer_event_data(event);
905         entry->nargs = tp->nr_args;
906         entry->func = (unsigned long)tp->rp.kp.addr;
907         entry->ret_ip = (unsigned long)ri->ret_addr;
908         for (i = 0; i < tp->nr_args; i++)
909                 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
910
911         if (!filter_current_check_discard(buffer, call, entry, event))
912                 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
913
914         return 0;
915 }
916
917 /* Event entry printers */
918 enum print_line_t
919 print_kprobe_event(struct trace_iterator *iter, int flags)
920 {
921         struct kprobe_trace_entry *field;
922         struct trace_seq *s = &iter->seq;
923         struct trace_event *event;
924         struct trace_probe *tp;
925         int i;
926
927         field = (struct kprobe_trace_entry *)iter->ent;
928         event = ftrace_find_event(field->ent.type);
929         tp = container_of(event, struct trace_probe, event);
930
931         if (!trace_seq_printf(s, "%s: (", tp->call.name))
932                 goto partial;
933
934         if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
935                 goto partial;
936
937         if (!trace_seq_puts(s, ")"))
938                 goto partial;
939
940         for (i = 0; i < field->nargs; i++)
941                 if (!trace_seq_printf(s, " %s=%lx",
942                                       tp->args[i].name, field->args[i]))
943                         goto partial;
944
945         if (!trace_seq_puts(s, "\n"))
946                 goto partial;
947
948         return TRACE_TYPE_HANDLED;
949 partial:
950         return TRACE_TYPE_PARTIAL_LINE;
951 }
952
953 enum print_line_t
954 print_kretprobe_event(struct trace_iterator *iter, int flags)
955 {
956         struct kretprobe_trace_entry *field;
957         struct trace_seq *s = &iter->seq;
958         struct trace_event *event;
959         struct trace_probe *tp;
960         int i;
961
962         field = (struct kretprobe_trace_entry *)iter->ent;
963         event = ftrace_find_event(field->ent.type);
964         tp = container_of(event, struct trace_probe, event);
965
966         if (!trace_seq_printf(s, "%s: (", tp->call.name))
967                 goto partial;
968
969         if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
970                 goto partial;
971
972         if (!trace_seq_puts(s, " <- "))
973                 goto partial;
974
975         if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
976                 goto partial;
977
978         if (!trace_seq_puts(s, ")"))
979                 goto partial;
980
981         for (i = 0; i < field->nargs; i++)
982                 if (!trace_seq_printf(s, " %s=%lx",
983                                       tp->args[i].name, field->args[i]))
984                         goto partial;
985
986         if (!trace_seq_puts(s, "\n"))
987                 goto partial;
988
989         return TRACE_TYPE_HANDLED;
990 partial:
991         return TRACE_TYPE_PARTIAL_LINE;
992 }
993
994 static int probe_event_enable(struct ftrace_event_call *call)
995 {
996         struct trace_probe *tp = (struct trace_probe *)call->data;
997
998         tp->flags |= TP_FLAG_TRACE;
999         if (probe_is_return(tp))
1000                 return enable_kretprobe(&tp->rp);
1001         else
1002                 return enable_kprobe(&tp->rp.kp);
1003 }
1004
1005 static void probe_event_disable(struct ftrace_event_call *call)
1006 {
1007         struct trace_probe *tp = (struct trace_probe *)call->data;
1008
1009         tp->flags &= ~TP_FLAG_TRACE;
1010         if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1011                 if (probe_is_return(tp))
1012                         disable_kretprobe(&tp->rp);
1013                 else
1014                         disable_kprobe(&tp->rp.kp);
1015         }
1016 }
1017
1018 static int probe_event_raw_init(struct ftrace_event_call *event_call)
1019 {
1020         INIT_LIST_HEAD(&event_call->fields);
1021
1022         return 0;
1023 }
1024
1025 #undef DEFINE_FIELD
1026 #define DEFINE_FIELD(type, item, name, is_signed)                       \
1027         do {                                                            \
1028                 ret = trace_define_field(event_call, #type, name,       \
1029                                          offsetof(typeof(field), item), \
1030                                          sizeof(field.item), is_signed, \
1031                                          FILTER_OTHER);                 \
1032                 if (ret)                                                \
1033                         return ret;                                     \
1034         } while (0)
1035
1036 static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1037 {
1038         int ret, i;
1039         struct kprobe_trace_entry field;
1040         struct trace_probe *tp = (struct trace_probe *)event_call->data;
1041
1042         ret = trace_define_common_fields(event_call);
1043         if (!ret)
1044                 return ret;
1045
1046         DEFINE_FIELD(unsigned long, ip, "ip", 0);
1047         DEFINE_FIELD(int, nargs, "nargs", 1);
1048         /* Set argument names as fields */
1049         for (i = 0; i < tp->nr_args; i++)
1050                 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1051         return 0;
1052 }
1053
1054 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1055 {
1056         int ret, i;
1057         struct kretprobe_trace_entry field;
1058         struct trace_probe *tp = (struct trace_probe *)event_call->data;
1059
1060         ret = trace_define_common_fields(event_call);
1061         if (!ret)
1062                 return ret;
1063
1064         DEFINE_FIELD(unsigned long, func, "func", 0);
1065         DEFINE_FIELD(unsigned long, ret_ip, "ret_ip", 0);
1066         DEFINE_FIELD(int, nargs, "nargs", 1);
1067         /* Set argument names as fields */
1068         for (i = 0; i < tp->nr_args; i++)
1069                 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1070         return 0;
1071 }
1072
1073 static int __probe_event_show_format(struct trace_seq *s,
1074                                      struct trace_probe *tp, const char *fmt,
1075                                      const char *arg)
1076 {
1077         int i;
1078
1079         /* Show format */
1080         if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1081                 return 0;
1082
1083         for (i = 0; i < tp->nr_args; i++)
1084                 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
1085                         return 0;
1086
1087         if (!trace_seq_printf(s, "\", %s", arg))
1088                 return 0;
1089
1090         for (i = 0; i < tp->nr_args; i++)
1091                 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1092                         return 0;
1093
1094         return trace_seq_puts(s, "\n");
1095 }
1096
1097 #undef SHOW_FIELD
1098 #define SHOW_FIELD(type, item, name)                                    \
1099         do {                                                            \
1100                 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t"    \
1101                                 "offset:%u;\tsize:%u;\n", name,         \
1102                                 (unsigned int)offsetof(typeof(field), item),\
1103                                 (unsigned int)sizeof(type));            \
1104                 if (!ret)                                               \
1105                         return 0;                                       \
1106         } while (0)
1107
1108 static int kprobe_event_show_format(struct ftrace_event_call *call,
1109                                     struct trace_seq *s)
1110 {
1111         struct kprobe_trace_entry field __attribute__((unused));
1112         int ret, i;
1113         struct trace_probe *tp = (struct trace_probe *)call->data;
1114
1115         SHOW_FIELD(unsigned long, ip, "ip");
1116         SHOW_FIELD(int, nargs, "nargs");
1117
1118         /* Show fields */
1119         for (i = 0; i < tp->nr_args; i++)
1120                 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1121         trace_seq_puts(s, "\n");
1122
1123         return __probe_event_show_format(s, tp, "(%lx)", "REC->ip");
1124 }
1125
1126 static int kretprobe_event_show_format(struct ftrace_event_call *call,
1127                                        struct trace_seq *s)
1128 {
1129         struct kretprobe_trace_entry field __attribute__((unused));
1130         int ret, i;
1131         struct trace_probe *tp = (struct trace_probe *)call->data;
1132
1133         SHOW_FIELD(unsigned long, func, "func");
1134         SHOW_FIELD(unsigned long, ret_ip, "ret_ip");
1135         SHOW_FIELD(int, nargs, "nargs");
1136
1137         /* Show fields */
1138         for (i = 0; i < tp->nr_args; i++)
1139                 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1140         trace_seq_puts(s, "\n");
1141
1142         return __probe_event_show_format(s, tp, "(%lx <- %lx)",
1143                                           "REC->func, REC->ret_ip");
1144 }
1145
1146 #ifdef CONFIG_EVENT_PROFILE
1147
1148 /* Kprobe profile handler */
1149 static __kprobes int kprobe_profile_func(struct kprobe *kp,
1150                                          struct pt_regs *regs)
1151 {
1152         struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1153         struct ftrace_event_call *call = &tp->call;
1154         struct kprobe_trace_entry *entry;
1155         struct trace_entry *ent;
1156         int size, __size, i, pc, __cpu;
1157         unsigned long irq_flags;
1158         char *raw_data;
1159
1160         pc = preempt_count();
1161         __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1162         size = ALIGN(__size + sizeof(u32), sizeof(u64));
1163         size -= sizeof(u32);
1164         if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1165                      "profile buffer not large enough"))
1166                 return 0;
1167
1168         /*
1169          * Protect the non nmi buffer
1170          * This also protects the rcu read side
1171          */
1172         local_irq_save(irq_flags);
1173         __cpu = smp_processor_id();
1174
1175         if (in_nmi())
1176                 raw_data = rcu_dereference(trace_profile_buf_nmi);
1177         else
1178                 raw_data = rcu_dereference(trace_profile_buf);
1179
1180         if (!raw_data)
1181                 goto end;
1182
1183         raw_data = per_cpu_ptr(raw_data, __cpu);
1184         /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1185         *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1186         entry = (struct kprobe_trace_entry *)raw_data;
1187         ent = &entry->ent;
1188
1189         tracing_generic_entry_update(ent, irq_flags, pc);
1190         ent->type = call->id;
1191         entry->nargs = tp->nr_args;
1192         entry->ip = (unsigned long)kp->addr;
1193         for (i = 0; i < tp->nr_args; i++)
1194                 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1195         perf_tp_event(call->id, entry->ip, 1, entry, size);
1196 end:
1197         local_irq_restore(irq_flags);
1198         return 0;
1199 }
1200
1201 /* Kretprobe profile handler */
1202 static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1203                                             struct pt_regs *regs)
1204 {
1205         struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1206         struct ftrace_event_call *call = &tp->call;
1207         struct kretprobe_trace_entry *entry;
1208         struct trace_entry *ent;
1209         int size, __size, i, pc, __cpu;
1210         unsigned long irq_flags;
1211         char *raw_data;
1212
1213         pc = preempt_count();
1214         __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1215         size = ALIGN(__size + sizeof(u32), sizeof(u64));
1216         size -= sizeof(u32);
1217         if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1218                      "profile buffer not large enough"))
1219                 return 0;
1220
1221         /*
1222          * Protect the non nmi buffer
1223          * This also protects the rcu read side
1224          */
1225         local_irq_save(irq_flags);
1226         __cpu = smp_processor_id();
1227
1228         if (in_nmi())
1229                 raw_data = rcu_dereference(trace_profile_buf_nmi);
1230         else
1231                 raw_data = rcu_dereference(trace_profile_buf);
1232
1233         if (!raw_data)
1234                 goto end;
1235
1236         raw_data = per_cpu_ptr(raw_data, __cpu);
1237         /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1238         *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1239         entry = (struct kretprobe_trace_entry *)raw_data;
1240         ent = &entry->ent;
1241
1242         tracing_generic_entry_update(ent, irq_flags, pc);
1243         ent->type = call->id;
1244         entry->nargs = tp->nr_args;
1245         entry->func = (unsigned long)tp->rp.kp.addr;
1246         entry->ret_ip = (unsigned long)ri->ret_addr;
1247         for (i = 0; i < tp->nr_args; i++)
1248                 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1249         perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1250 end:
1251         local_irq_restore(irq_flags);
1252         return 0;
1253 }
1254
1255 static int probe_profile_enable(struct ftrace_event_call *call)
1256 {
1257         struct trace_probe *tp = (struct trace_probe *)call->data;
1258
1259         tp->flags |= TP_FLAG_PROFILE;
1260
1261         if (probe_is_return(tp))
1262                 return enable_kretprobe(&tp->rp);
1263         else
1264                 return enable_kprobe(&tp->rp.kp);
1265 }
1266
1267 static void probe_profile_disable(struct ftrace_event_call *call)
1268 {
1269         struct trace_probe *tp = (struct trace_probe *)call->data;
1270
1271         tp->flags &= ~TP_FLAG_PROFILE;
1272
1273         if (!(tp->flags & TP_FLAG_TRACE)) {
1274                 if (probe_is_return(tp))
1275                         disable_kretprobe(&tp->rp);
1276                 else
1277                         disable_kprobe(&tp->rp.kp);
1278         }
1279 }
1280 #endif  /* CONFIG_EVENT_PROFILE */
1281
1282
1283 static __kprobes
1284 int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1285 {
1286         struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1287
1288         if (tp->flags & TP_FLAG_TRACE)
1289                 kprobe_trace_func(kp, regs);
1290 #ifdef CONFIG_EVENT_PROFILE
1291         if (tp->flags & TP_FLAG_PROFILE)
1292                 kprobe_profile_func(kp, regs);
1293 #endif  /* CONFIG_EVENT_PROFILE */
1294         return 0;       /* We don't tweek kernel, so just return 0 */
1295 }
1296
1297 static __kprobes
1298 int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1299 {
1300         struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1301
1302         if (tp->flags & TP_FLAG_TRACE)
1303                 kretprobe_trace_func(ri, regs);
1304 #ifdef CONFIG_EVENT_PROFILE
1305         if (tp->flags & TP_FLAG_PROFILE)
1306                 kretprobe_profile_func(ri, regs);
1307 #endif  /* CONFIG_EVENT_PROFILE */
1308         return 0;       /* We don't tweek kernel, so just return 0 */
1309 }
1310
1311 static int register_probe_event(struct trace_probe *tp)
1312 {
1313         struct ftrace_event_call *call = &tp->call;
1314         int ret;
1315
1316         /* Initialize ftrace_event_call */
1317         if (probe_is_return(tp)) {
1318                 tp->event.trace = print_kretprobe_event;
1319                 call->raw_init = probe_event_raw_init;
1320                 call->show_format = kretprobe_event_show_format;
1321                 call->define_fields = kretprobe_event_define_fields;
1322         } else {
1323                 tp->event.trace = print_kprobe_event;
1324                 call->raw_init = probe_event_raw_init;
1325                 call->show_format = kprobe_event_show_format;
1326                 call->define_fields = kprobe_event_define_fields;
1327         }
1328         call->event = &tp->event;
1329         call->id = register_ftrace_event(&tp->event);
1330         if (!call->id)
1331                 return -ENODEV;
1332         call->enabled = 0;
1333         call->regfunc = probe_event_enable;
1334         call->unregfunc = probe_event_disable;
1335
1336 #ifdef CONFIG_EVENT_PROFILE
1337         atomic_set(&call->profile_count, -1);
1338         call->profile_enable = probe_profile_enable;
1339         call->profile_disable = probe_profile_disable;
1340 #endif
1341         call->data = tp;
1342         ret = trace_add_event_call(call);
1343         if (ret) {
1344                 pr_info("Failed to register kprobe event: %s\n", call->name);
1345                 unregister_ftrace_event(&tp->event);
1346         }
1347         return ret;
1348 }
1349
1350 static void unregister_probe_event(struct trace_probe *tp)
1351 {
1352         /* tp->event is unregistered in trace_remove_event_call() */
1353         trace_remove_event_call(&tp->call);
1354 }
1355
1356 /* Make a debugfs interface for controling probe points */
1357 static __init int init_kprobe_trace(void)
1358 {
1359         struct dentry *d_tracer;
1360         struct dentry *entry;
1361
1362         d_tracer = tracing_init_dentry();
1363         if (!d_tracer)
1364                 return 0;
1365
1366         entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1367                                     NULL, &kprobe_events_ops);
1368
1369         /* Event list interface */
1370         if (!entry)
1371                 pr_warning("Could not create debugfs "
1372                            "'kprobe_events' entry\n");
1373
1374         /* Profile interface */
1375         entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1376                                     NULL, &kprobe_profile_ops);
1377
1378         if (!entry)
1379                 pr_warning("Could not create debugfs "
1380                            "'kprobe_profile' entry\n");
1381         return 0;
1382 }
1383 fs_initcall(init_kprobe_trace);
1384
1385
1386 #ifdef CONFIG_FTRACE_STARTUP_TEST
1387
1388 static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1389                                         int a4, int a5, int a6)
1390 {
1391         return a1 + a2 + a3 + a4 + a5 + a6;
1392 }
1393
1394 static __init int kprobe_trace_self_tests_init(void)
1395 {
1396         int ret;
1397         int (*target)(int, int, int, int, int, int);
1398
1399         target = kprobe_trace_selftest_target;
1400
1401         pr_info("Testing kprobe tracing: ");
1402
1403         ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1404                                   "a1 a2 a3 a4 a5 a6");
1405         if (WARN_ON_ONCE(ret))
1406                 pr_warning("error enabling function entry\n");
1407
1408         ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1409                                   "ra rv");
1410         if (WARN_ON_ONCE(ret))
1411                 pr_warning("error enabling function return\n");
1412
1413         ret = target(1, 2, 3, 4, 5, 6);
1414
1415         cleanup_all_probes();
1416
1417         pr_cont("OK\n");
1418         return 0;
1419 }
1420
1421 late_initcall(kprobe_trace_self_tests_init);
1422
1423 #endif