1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <linux/sort.h>
54 #include <linux/io.h> /* vmap_page_range() */
55 #include <linux/fs_context.h>
56
57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
58
59 #include "trace.h"
60 #include "trace_output.h"
61
62 #ifdef CONFIG_FTRACE_STARTUP_TEST
63 /*
64 * We need to change this state when a selftest is running.
65 * A selftest will lurk into the ring-buffer to count the
66 * entries inserted during the selftest although some concurrent
67 * insertions into the ring-buffer such as trace_printk could occurred
68 * at the same time, giving false positive or negative results.
69 */
70 bool __read_mostly tracing_selftest_running;
71
72 /*
73 * If boot-time tracing including tracers/events via kernel cmdline
74 * is running, we do not want to run SELFTEST.
75 */
76 bool __read_mostly tracing_selftest_disabled;
77
disable_tracing_selftest(const char * reason)78 void __init disable_tracing_selftest(const char *reason)
79 {
80 if (!tracing_selftest_disabled) {
81 tracing_selftest_disabled = true;
82 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 }
84 }
85 #else
86 #define tracing_selftest_disabled 0
87 #endif
88
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95
96 /* Store tracers and their flags per instance */
97 struct tracers {
98 struct list_head list;
99 struct tracer *tracer;
100 struct tracer_flags *flags;
101 };
102
103 /*
104 * To prevent the comm cache from being overwritten when no
105 * tracing is active, only save the comm when a trace event
106 * occurred.
107 */
108 DEFINE_PER_CPU(bool, trace_taskinfo_save);
109
110 /*
111 * Kill all tracing for good (never come back).
112 * It is initialized to 1 but will turn to zero if the initialization
113 * of the tracer is successful. But that is the only place that sets
114 * this back to zero.
115 */
116 int tracing_disabled = 1;
117
118 cpumask_var_t __read_mostly tracing_buffer_mask;
119
120 #define MAX_TRACER_SIZE 100
121 /*
122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123 *
124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125 * is set, then ftrace_dump is called. This will output the contents
126 * of the ftrace buffers to the console. This is very useful for
127 * capturing traces that lead to crashes and outputting it to a
128 * serial console.
129 *
130 * It is default off, but you can enable it with either specifying
131 * "ftrace_dump_on_oops" in the kernel command line, or setting
132 * /proc/sys/kernel/ftrace_dump_on_oops
133 * Set 1 if you want to dump buffers of all CPUs
134 * Set 2 if you want to dump the buffer of the CPU that triggered oops
135 * Set instance name if you want to dump the specific trace instance
136 * Multiple instance dump is also supported, and instances are separated
137 * by commas.
138 */
139 /* Set to string format zero to disable by default */
140 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
141
142 /* When set, tracing will stop when a WARN*() is hit */
143 static int __disable_trace_on_warning;
144
145 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
146 void *buffer, size_t *lenp, loff_t *ppos);
147 static const struct ctl_table trace_sysctl_table[] = {
148 {
149 .procname = "ftrace_dump_on_oops",
150 .data = &ftrace_dump_on_oops,
151 .maxlen = MAX_TRACER_SIZE,
152 .mode = 0644,
153 .proc_handler = proc_dostring,
154 },
155 {
156 .procname = "traceoff_on_warning",
157 .data = &__disable_trace_on_warning,
158 .maxlen = sizeof(__disable_trace_on_warning),
159 .mode = 0644,
160 .proc_handler = proc_dointvec,
161 },
162 {
163 .procname = "tracepoint_printk",
164 .data = &tracepoint_printk,
165 .maxlen = sizeof(tracepoint_printk),
166 .mode = 0644,
167 .proc_handler = tracepoint_printk_sysctl,
168 },
169 };
170
init_trace_sysctls(void)171 static int __init init_trace_sysctls(void)
172 {
173 register_sysctl_init("kernel", trace_sysctl_table);
174 return 0;
175 }
176 subsys_initcall(init_trace_sysctls);
177
178 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
179 /* Map of enums to their values, for "eval_map" file */
180 struct trace_eval_map_head {
181 struct module *mod;
182 unsigned long length;
183 };
184
185 union trace_eval_map_item;
186
187 struct trace_eval_map_tail {
188 /*
189 * "end" is first and points to NULL as it must be different
190 * than "mod" or "eval_string"
191 */
192 union trace_eval_map_item *next;
193 const char *end; /* points to NULL */
194 };
195
196 static DEFINE_MUTEX(trace_eval_mutex);
197
198 /*
199 * The trace_eval_maps are saved in an array with two extra elements,
200 * one at the beginning, and one at the end. The beginning item contains
201 * the count of the saved maps (head.length), and the module they
202 * belong to if not built in (head.mod). The ending item contains a
203 * pointer to the next array of saved eval_map items.
204 */
205 union trace_eval_map_item {
206 struct trace_eval_map map;
207 struct trace_eval_map_head head;
208 struct trace_eval_map_tail tail;
209 };
210
211 static union trace_eval_map_item *trace_eval_maps;
212 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
213
214 int tracing_set_tracer(struct trace_array *tr, const char *buf);
215 static void ftrace_trace_userstack(struct trace_array *tr,
216 struct trace_buffer *buffer,
217 unsigned int trace_ctx);
218
219 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
220 static char *default_bootup_tracer;
221
222 static bool allocate_snapshot;
223 static bool snapshot_at_boot;
224
225 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
226 static int boot_instance_index;
227
228 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
229 static int boot_snapshot_index;
230
set_cmdline_ftrace(char * str)231 static int __init set_cmdline_ftrace(char *str)
232 {
233 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
234 default_bootup_tracer = bootup_tracer_buf;
235 /* We are using ftrace early, expand it */
236 trace_set_ring_buffer_expanded(NULL);
237 return 1;
238 }
239 __setup("ftrace=", set_cmdline_ftrace);
240
ftrace_dump_on_oops_enabled(void)241 int ftrace_dump_on_oops_enabled(void)
242 {
243 if (!strcmp("0", ftrace_dump_on_oops))
244 return 0;
245 else
246 return 1;
247 }
248
set_ftrace_dump_on_oops(char * str)249 static int __init set_ftrace_dump_on_oops(char *str)
250 {
251 if (!*str) {
252 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
253 return 1;
254 }
255
256 if (*str == ',') {
257 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
258 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
259 return 1;
260 }
261
262 if (*str++ == '=') {
263 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
264 return 1;
265 }
266
267 return 0;
268 }
269 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
270
stop_trace_on_warning(char * str)271 static int __init stop_trace_on_warning(char *str)
272 {
273 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
274 __disable_trace_on_warning = 1;
275 return 1;
276 }
277 __setup("traceoff_on_warning", stop_trace_on_warning);
278
boot_alloc_snapshot(char * str)279 static int __init boot_alloc_snapshot(char *str)
280 {
281 char *slot = boot_snapshot_info + boot_snapshot_index;
282 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
283 int ret;
284
285 if (str[0] == '=') {
286 str++;
287 if (strlen(str) >= left)
288 return -1;
289
290 ret = snprintf(slot, left, "%s\t", str);
291 boot_snapshot_index += ret;
292 } else {
293 allocate_snapshot = true;
294 /* We also need the main ring buffer expanded */
295 trace_set_ring_buffer_expanded(NULL);
296 }
297 return 1;
298 }
299 __setup("alloc_snapshot", boot_alloc_snapshot);
300
301
boot_snapshot(char * str)302 static int __init boot_snapshot(char *str)
303 {
304 snapshot_at_boot = true;
305 boot_alloc_snapshot(str);
306 return 1;
307 }
308 __setup("ftrace_boot_snapshot", boot_snapshot);
309
310
boot_instance(char * str)311 static int __init boot_instance(char *str)
312 {
313 char *slot = boot_instance_info + boot_instance_index;
314 int left = sizeof(boot_instance_info) - boot_instance_index;
315 int ret;
316
317 if (strlen(str) >= left)
318 return -1;
319
320 ret = snprintf(slot, left, "%s\t", str);
321 boot_instance_index += ret;
322
323 return 1;
324 }
325 __setup("trace_instance=", boot_instance);
326
327
328 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
329
set_trace_boot_options(char * str)330 static int __init set_trace_boot_options(char *str)
331 {
332 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
333 return 1;
334 }
335 __setup("trace_options=", set_trace_boot_options);
336
337 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
338 static char *trace_boot_clock __initdata;
339
set_trace_boot_clock(char * str)340 static int __init set_trace_boot_clock(char *str)
341 {
342 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
343 trace_boot_clock = trace_boot_clock_buf;
344 return 1;
345 }
346 __setup("trace_clock=", set_trace_boot_clock);
347
set_tracepoint_printk(char * str)348 static int __init set_tracepoint_printk(char *str)
349 {
350 /* Ignore the "tp_printk_stop_on_boot" param */
351 if (*str == '_')
352 return 0;
353
354 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
355 tracepoint_printk = 1;
356 return 1;
357 }
358 __setup("tp_printk", set_tracepoint_printk);
359
set_tracepoint_printk_stop(char * str)360 static int __init set_tracepoint_printk_stop(char *str)
361 {
362 tracepoint_printk_stop_on_boot = true;
363 return 1;
364 }
365 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
366
set_traceoff_after_boot(char * str)367 static int __init set_traceoff_after_boot(char *str)
368 {
369 traceoff_after_boot = true;
370 return 1;
371 }
372 __setup("traceoff_after_boot", set_traceoff_after_boot);
373
ns2usecs(u64 nsec)374 unsigned long long ns2usecs(u64 nsec)
375 {
376 nsec += 500;
377 do_div(nsec, 1000);
378 return nsec;
379 }
380
381 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)382 trace_process_export(struct trace_export *export,
383 struct ring_buffer_event *event, int flag)
384 {
385 struct trace_entry *entry;
386 unsigned int size = 0;
387
388 if (export->flags & flag) {
389 entry = ring_buffer_event_data(event);
390 size = ring_buffer_event_length(event);
391 export->write(export, entry, size);
392 }
393 }
394
395 static DEFINE_MUTEX(ftrace_export_lock);
396
397 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
398
399 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
400 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
401 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
402
ftrace_exports_enable(struct trace_export * export)403 static inline void ftrace_exports_enable(struct trace_export *export)
404 {
405 if (export->flags & TRACE_EXPORT_FUNCTION)
406 static_branch_inc(&trace_function_exports_enabled);
407
408 if (export->flags & TRACE_EXPORT_EVENT)
409 static_branch_inc(&trace_event_exports_enabled);
410
411 if (export->flags & TRACE_EXPORT_MARKER)
412 static_branch_inc(&trace_marker_exports_enabled);
413 }
414
ftrace_exports_disable(struct trace_export * export)415 static inline void ftrace_exports_disable(struct trace_export *export)
416 {
417 if (export->flags & TRACE_EXPORT_FUNCTION)
418 static_branch_dec(&trace_function_exports_enabled);
419
420 if (export->flags & TRACE_EXPORT_EVENT)
421 static_branch_dec(&trace_event_exports_enabled);
422
423 if (export->flags & TRACE_EXPORT_MARKER)
424 static_branch_dec(&trace_marker_exports_enabled);
425 }
426
ftrace_exports(struct ring_buffer_event * event,int flag)427 static void ftrace_exports(struct ring_buffer_event *event, int flag)
428 {
429 struct trace_export *export;
430
431 guard(preempt_notrace)();
432
433 export = rcu_dereference_raw_check(ftrace_exports_list);
434 while (export) {
435 trace_process_export(export, event, flag);
436 export = rcu_dereference_raw_check(export->next);
437 }
438 }
439
440 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)441 add_trace_export(struct trace_export **list, struct trace_export *export)
442 {
443 rcu_assign_pointer(export->next, *list);
444 /*
445 * We are entering export into the list but another
446 * CPU might be walking that list. We need to make sure
447 * the export->next pointer is valid before another CPU sees
448 * the export pointer included into the list.
449 */
450 rcu_assign_pointer(*list, export);
451 }
452
453 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)454 rm_trace_export(struct trace_export **list, struct trace_export *export)
455 {
456 struct trace_export **p;
457
458 for (p = list; *p != NULL; p = &(*p)->next)
459 if (*p == export)
460 break;
461
462 if (*p != export)
463 return -1;
464
465 rcu_assign_pointer(*p, (*p)->next);
466
467 return 0;
468 }
469
470 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)471 add_ftrace_export(struct trace_export **list, struct trace_export *export)
472 {
473 ftrace_exports_enable(export);
474
475 add_trace_export(list, export);
476 }
477
478 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)479 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
480 {
481 int ret;
482
483 ret = rm_trace_export(list, export);
484 ftrace_exports_disable(export);
485
486 return ret;
487 }
488
register_ftrace_export(struct trace_export * export)489 int register_ftrace_export(struct trace_export *export)
490 {
491 if (WARN_ON_ONCE(!export->write))
492 return -1;
493
494 guard(mutex)(&ftrace_export_lock);
495
496 add_ftrace_export(&ftrace_exports_list, export);
497
498 return 0;
499 }
500 EXPORT_SYMBOL_GPL(register_ftrace_export);
501
unregister_ftrace_export(struct trace_export * export)502 int unregister_ftrace_export(struct trace_export *export)
503 {
504 guard(mutex)(&ftrace_export_lock);
505 return rm_ftrace_export(&ftrace_exports_list, export);
506 }
507 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
508
509 /* trace_flags holds trace_options default values */
510 #define TRACE_DEFAULT_FLAGS \
511 (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \
512 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \
513 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \
514 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \
515 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \
516 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \
517 TRACE_ITER(COPY_MARKER))
518
519 /* trace_options that are only supported by global_trace */
520 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \
521 TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \
522 TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
523
524 /* trace_flags that are default zero for instances */
525 #define ZEROED_TRACE_FLAGS \
526 (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
527 TRACE_ITER(COPY_MARKER))
528
529 /*
530 * The global_trace is the descriptor that holds the top-level tracing
531 * buffers for the live tracing.
532 */
533 static struct trace_array global_trace = {
534 .trace_flags = TRACE_DEFAULT_FLAGS,
535 };
536
537 struct trace_array *printk_trace = &global_trace;
538
539 /* List of trace_arrays interested in the top level trace_marker */
540 static LIST_HEAD(marker_copies);
541
update_printk_trace(struct trace_array * tr)542 static void update_printk_trace(struct trace_array *tr)
543 {
544 if (printk_trace == tr)
545 return;
546
547 printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
548 printk_trace = tr;
549 tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
550 }
551
552 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)553 static bool update_marker_trace(struct trace_array *tr, int enabled)
554 {
555 lockdep_assert_held(&event_mutex);
556
557 if (enabled) {
558 if (tr->trace_flags & TRACE_ITER(COPY_MARKER))
559 return false;
560
561 list_add_rcu(&tr->marker_list, &marker_copies);
562 tr->trace_flags |= TRACE_ITER(COPY_MARKER);
563 return true;
564 }
565
566 if (!(tr->trace_flags & TRACE_ITER(COPY_MARKER)))
567 return false;
568
569 list_del_rcu(&tr->marker_list);
570 tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
571 return true;
572 }
573
trace_set_ring_buffer_expanded(struct trace_array * tr)574 void trace_set_ring_buffer_expanded(struct trace_array *tr)
575 {
576 if (!tr)
577 tr = &global_trace;
578 tr->ring_buffer_expanded = true;
579 }
580
trace_array_autoremove(struct work_struct * work)581 static void trace_array_autoremove(struct work_struct *work)
582 {
583 struct trace_array *tr = container_of(work, struct trace_array, autoremove_work);
584
585 trace_array_destroy(tr);
586 }
587
588 static struct workqueue_struct *autoremove_wq;
589
trace_array_kick_autoremove(struct trace_array * tr)590 static void trace_array_kick_autoremove(struct trace_array *tr)
591 {
592 if (autoremove_wq)
593 queue_work(autoremove_wq, &tr->autoremove_work);
594 }
595
trace_array_cancel_autoremove(struct trace_array * tr)596 static void trace_array_cancel_autoremove(struct trace_array *tr)
597 {
598 /*
599 * Since this can be called inside trace_array_autoremove(),
600 * it has to avoid deadlock of the workqueue.
601 */
602 if (work_pending(&tr->autoremove_work))
603 cancel_work_sync(&tr->autoremove_work);
604 }
605
trace_array_init_autoremove(struct trace_array * tr)606 static void trace_array_init_autoremove(struct trace_array *tr)
607 {
608 INIT_WORK(&tr->autoremove_work, trace_array_autoremove);
609 }
610
trace_array_start_autoremove(void)611 static void trace_array_start_autoremove(void)
612 {
613 if (autoremove_wq)
614 return;
615
616 autoremove_wq = alloc_workqueue("tr_autoremove_wq",
617 WQ_UNBOUND | WQ_HIGHPRI, 0);
618 if (!autoremove_wq)
619 pr_warn("Unable to allocate tr_autoremove_wq. autoremove disabled.\n");
620 }
621
622 LIST_HEAD(ftrace_trace_arrays);
623
__trace_array_get(struct trace_array * this_tr)624 static int __trace_array_get(struct trace_array *this_tr)
625 {
626 /* When free_on_close is set, this is not available anymore. */
627 if (autoremove_wq && this_tr->free_on_close)
628 return -ENODEV;
629
630 this_tr->ref++;
631 return 0;
632 }
633
trace_array_get(struct trace_array * this_tr)634 int trace_array_get(struct trace_array *this_tr)
635 {
636 struct trace_array *tr;
637
638 guard(mutex)(&trace_types_lock);
639 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
640 if (tr == this_tr) {
641 return __trace_array_get(tr);
642 }
643 }
644
645 return -ENODEV;
646 }
647
__trace_array_put(struct trace_array * this_tr)648 static void __trace_array_put(struct trace_array *this_tr)
649 {
650 WARN_ON(!this_tr->ref);
651 this_tr->ref--;
652 /*
653 * When free_on_close is set, prepare removing the array
654 * when the last reference is released.
655 */
656 if (this_tr->ref == 1 && this_tr->free_on_close)
657 trace_array_kick_autoremove(this_tr);
658 }
659
660 /**
661 * trace_array_put - Decrement the reference counter for this trace array.
662 * @this_tr : pointer to the trace array
663 *
664 * NOTE: Use this when we no longer need the trace array returned by
665 * trace_array_get_by_name(). This ensures the trace array can be later
666 * destroyed.
667 *
668 */
trace_array_put(struct trace_array * this_tr)669 void trace_array_put(struct trace_array *this_tr)
670 {
671 if (!this_tr)
672 return;
673
674 guard(mutex)(&trace_types_lock);
675 __trace_array_put(this_tr);
676 }
677 EXPORT_SYMBOL_GPL(trace_array_put);
678
tracing_check_open_get_tr(struct trace_array * tr)679 int tracing_check_open_get_tr(struct trace_array *tr)
680 {
681 int ret;
682
683 ret = security_locked_down(LOCKDOWN_TRACEFS);
684 if (ret)
685 return ret;
686
687 if (tracing_disabled)
688 return -ENODEV;
689
690 if (tr && trace_array_get(tr) < 0)
691 return -ENODEV;
692
693 return 0;
694 }
695
buffer_ftrace_now(struct array_buffer * buf,int cpu)696 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
697 {
698 u64 ts;
699
700 /* Early boot up does not have a buffer yet */
701 if (!buf->buffer)
702 return trace_clock_local();
703
704 ts = ring_buffer_time_stamp(buf->buffer);
705 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
706
707 return ts;
708 }
709
ftrace_now(int cpu)710 u64 ftrace_now(int cpu)
711 {
712 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
713 }
714
715 /**
716 * tracing_is_enabled - Show if global_trace has been enabled
717 *
718 * Shows if the global trace has been enabled or not. It uses the
719 * mirror flag "buffer_disabled" to be used in fast paths such as for
720 * the irqsoff tracer. But it may be inaccurate due to races. If you
721 * need to know the accurate state, use tracing_is_on() which is a little
722 * slower, but accurate.
723 */
tracing_is_enabled(void)724 int tracing_is_enabled(void)
725 {
726 /*
727 * For quick access (irqsoff uses this in fast path), just
728 * return the mirror variable of the state of the ring buffer.
729 * It's a little racy, but we don't really care.
730 */
731 return !global_trace.buffer_disabled;
732 }
733
734 /*
735 * trace_buf_size is the size in bytes that is allocated
736 * for a buffer. Note, the number of bytes is always rounded
737 * to page size.
738 *
739 * This number is purposely set to a low number of 16384.
740 * If the dump on oops happens, it will be much appreciated
741 * to not have to wait for all that output. Anyway this can be
742 * boot time and run time configurable.
743 */
744 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
745
746 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
747
748 /* trace_types holds a link list of available tracers. */
749 static struct tracer *trace_types __read_mostly;
750
751 /*
752 * trace_types_lock is used to protect the trace_types list.
753 */
754 DEFINE_MUTEX(trace_types_lock);
755
756 /*
757 * serialize the access of the ring buffer
758 *
759 * ring buffer serializes readers, but it is low level protection.
760 * The validity of the events (which returns by ring_buffer_peek() ..etc)
761 * are not protected by ring buffer.
762 *
763 * The content of events may become garbage if we allow other process consumes
764 * these events concurrently:
765 * A) the page of the consumed events may become a normal page
766 * (not reader page) in ring buffer, and this page will be rewritten
767 * by events producer.
768 * B) The page of the consumed events may become a page for splice_read,
769 * and this page will be returned to system.
770 *
771 * These primitives allow multi process access to different cpu ring buffer
772 * concurrently.
773 *
774 * These primitives don't distinguish read-only and read-consume access.
775 * Multi read-only access are also serialized.
776 */
777
778 #ifdef CONFIG_SMP
779 static DECLARE_RWSEM(all_cpu_access_lock);
780 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
781
trace_access_lock(int cpu)782 static inline void trace_access_lock(int cpu)
783 {
784 if (cpu == RING_BUFFER_ALL_CPUS) {
785 /* gain it for accessing the whole ring buffer. */
786 down_write(&all_cpu_access_lock);
787 } else {
788 /* gain it for accessing a cpu ring buffer. */
789
790 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
791 down_read(&all_cpu_access_lock);
792
793 /* Secondly block other access to this @cpu ring buffer. */
794 mutex_lock(&per_cpu(cpu_access_lock, cpu));
795 }
796 }
797
trace_access_unlock(int cpu)798 static inline void trace_access_unlock(int cpu)
799 {
800 if (cpu == RING_BUFFER_ALL_CPUS) {
801 up_write(&all_cpu_access_lock);
802 } else {
803 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
804 up_read(&all_cpu_access_lock);
805 }
806 }
807
trace_access_lock_init(void)808 static inline void trace_access_lock_init(void)
809 {
810 int cpu;
811
812 for_each_possible_cpu(cpu)
813 mutex_init(&per_cpu(cpu_access_lock, cpu));
814 }
815
816 #else
817
818 static DEFINE_MUTEX(access_lock);
819
trace_access_lock(int cpu)820 static inline void trace_access_lock(int cpu)
821 {
822 (void)cpu;
823 mutex_lock(&access_lock);
824 }
825
trace_access_unlock(int cpu)826 static inline void trace_access_unlock(int cpu)
827 {
828 (void)cpu;
829 mutex_unlock(&access_lock);
830 }
831
trace_access_lock_init(void)832 static inline void trace_access_lock_init(void)
833 {
834 }
835
836 #endif
837
tracer_tracing_on(struct trace_array * tr)838 void tracer_tracing_on(struct trace_array *tr)
839 {
840 if (tr->array_buffer.buffer)
841 ring_buffer_record_on(tr->array_buffer.buffer);
842 /*
843 * This flag is looked at when buffers haven't been allocated
844 * yet, or by some tracers (like irqsoff), that just want to
845 * know if the ring buffer has been disabled, but it can handle
846 * races of where it gets disabled but we still do a record.
847 * As the check is in the fast path of the tracers, it is more
848 * important to be fast than accurate.
849 */
850 tr->buffer_disabled = 0;
851 }
852
853 /**
854 * tracing_on - enable tracing buffers
855 *
856 * This function enables tracing buffers that may have been
857 * disabled with tracing_off.
858 */
tracing_on(void)859 void tracing_on(void)
860 {
861 tracer_tracing_on(&global_trace);
862 }
863 EXPORT_SYMBOL_GPL(tracing_on);
864
865 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)866 static void tracing_snapshot_instance_cond(struct trace_array *tr,
867 void *cond_data)
868 {
869 unsigned long flags;
870
871 if (in_nmi()) {
872 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
873 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
874 return;
875 }
876
877 if (!tr->allocated_snapshot) {
878 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
879 trace_array_puts(tr, "*** stopping trace here! ***\n");
880 tracer_tracing_off(tr);
881 return;
882 }
883
884 if (tr->mapped) {
885 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
886 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
887 return;
888 }
889
890 /* Note, snapshot can not be used when the tracer uses it */
891 if (tracer_uses_snapshot(tr->current_trace)) {
892 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
893 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
894 return;
895 }
896
897 local_irq_save(flags);
898 update_max_tr(tr, current, smp_processor_id(), cond_data);
899 local_irq_restore(flags);
900 }
901
tracing_snapshot_instance(struct trace_array * tr)902 void tracing_snapshot_instance(struct trace_array *tr)
903 {
904 tracing_snapshot_instance_cond(tr, NULL);
905 }
906
907 /**
908 * tracing_snapshot - take a snapshot of the current buffer.
909 *
910 * This causes a swap between the snapshot buffer and the current live
911 * tracing buffer. You can use this to take snapshots of the live
912 * trace when some condition is triggered, but continue to trace.
913 *
914 * Note, make sure to allocate the snapshot with either
915 * a tracing_snapshot_alloc(), or by doing it manually
916 * with: echo 1 > /sys/kernel/tracing/snapshot
917 *
918 * If the snapshot buffer is not allocated, it will stop tracing.
919 * Basically making a permanent snapshot.
920 */
tracing_snapshot(void)921 void tracing_snapshot(void)
922 {
923 struct trace_array *tr = &global_trace;
924
925 tracing_snapshot_instance(tr);
926 }
927 EXPORT_SYMBOL_GPL(tracing_snapshot);
928
929 /**
930 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
931 * @tr: The tracing instance to snapshot
932 * @cond_data: The data to be tested conditionally, and possibly saved
933 *
934 * This is the same as tracing_snapshot() except that the snapshot is
935 * conditional - the snapshot will only happen if the
936 * cond_snapshot.update() implementation receiving the cond_data
937 * returns true, which means that the trace array's cond_snapshot
938 * update() operation used the cond_data to determine whether the
939 * snapshot should be taken, and if it was, presumably saved it along
940 * with the snapshot.
941 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)942 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
943 {
944 tracing_snapshot_instance_cond(tr, cond_data);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
947
948 /**
949 * tracing_cond_snapshot_data - get the user data associated with a snapshot
950 * @tr: The tracing instance
951 *
952 * When the user enables a conditional snapshot using
953 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
954 * with the snapshot. This accessor is used to retrieve it.
955 *
956 * Should not be called from cond_snapshot.update(), since it takes
957 * the tr->max_lock lock, which the code calling
958 * cond_snapshot.update() has already done.
959 *
960 * Returns the cond_data associated with the trace array's snapshot.
961 */
tracing_cond_snapshot_data(struct trace_array * tr)962 void *tracing_cond_snapshot_data(struct trace_array *tr)
963 {
964 void *cond_data = NULL;
965
966 local_irq_disable();
967 arch_spin_lock(&tr->max_lock);
968
969 if (tr->cond_snapshot)
970 cond_data = tr->cond_snapshot->cond_data;
971
972 arch_spin_unlock(&tr->max_lock);
973 local_irq_enable();
974
975 return cond_data;
976 }
977 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
978
979 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
980 struct array_buffer *size_buf, int cpu_id);
981 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
982
tracing_alloc_snapshot_instance(struct trace_array * tr)983 int tracing_alloc_snapshot_instance(struct trace_array *tr)
984 {
985 int order;
986 int ret;
987
988 if (!tr->allocated_snapshot) {
989
990 /* Make the snapshot buffer have the same order as main buffer */
991 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
992 ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
993 if (ret < 0)
994 return ret;
995
996 /* allocate spare buffer */
997 ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
998 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
999 if (ret < 0)
1000 return ret;
1001
1002 tr->allocated_snapshot = true;
1003 }
1004
1005 return 0;
1006 }
1007
free_snapshot(struct trace_array * tr)1008 static void free_snapshot(struct trace_array *tr)
1009 {
1010 /*
1011 * We don't free the ring buffer. instead, resize it because
1012 * The max_tr ring buffer has some state (e.g. ring->clock) and
1013 * we want preserve it.
1014 */
1015 ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0);
1016 ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1017 set_buffer_entries(&tr->snapshot_buffer, 1);
1018 tracing_reset_online_cpus(&tr->snapshot_buffer);
1019 tr->allocated_snapshot = false;
1020 }
1021
tracing_arm_snapshot_locked(struct trace_array * tr)1022 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1023 {
1024 int ret;
1025
1026 lockdep_assert_held(&trace_types_lock);
1027
1028 spin_lock(&tr->snapshot_trigger_lock);
1029 if (tr->snapshot == UINT_MAX || tr->mapped) {
1030 spin_unlock(&tr->snapshot_trigger_lock);
1031 return -EBUSY;
1032 }
1033
1034 tr->snapshot++;
1035 spin_unlock(&tr->snapshot_trigger_lock);
1036
1037 ret = tracing_alloc_snapshot_instance(tr);
1038 if (ret) {
1039 spin_lock(&tr->snapshot_trigger_lock);
1040 tr->snapshot--;
1041 spin_unlock(&tr->snapshot_trigger_lock);
1042 }
1043
1044 return ret;
1045 }
1046
tracing_arm_snapshot(struct trace_array * tr)1047 int tracing_arm_snapshot(struct trace_array *tr)
1048 {
1049 guard(mutex)(&trace_types_lock);
1050 return tracing_arm_snapshot_locked(tr);
1051 }
1052
tracing_disarm_snapshot(struct trace_array * tr)1053 void tracing_disarm_snapshot(struct trace_array *tr)
1054 {
1055 spin_lock(&tr->snapshot_trigger_lock);
1056 if (!WARN_ON(!tr->snapshot))
1057 tr->snapshot--;
1058 spin_unlock(&tr->snapshot_trigger_lock);
1059 }
1060
1061 /**
1062 * tracing_alloc_snapshot - allocate snapshot buffer.
1063 *
1064 * This only allocates the snapshot buffer if it isn't already
1065 * allocated - it doesn't also take a snapshot.
1066 *
1067 * This is meant to be used in cases where the snapshot buffer needs
1068 * to be set up for events that can't sleep but need to be able to
1069 * trigger a snapshot.
1070 */
tracing_alloc_snapshot(void)1071 int tracing_alloc_snapshot(void)
1072 {
1073 struct trace_array *tr = &global_trace;
1074 int ret;
1075
1076 ret = tracing_alloc_snapshot_instance(tr);
1077 WARN_ON(ret < 0);
1078
1079 return ret;
1080 }
1081 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1082
1083 /**
1084 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1085 *
1086 * This is similar to tracing_snapshot(), but it will allocate the
1087 * snapshot buffer if it isn't already allocated. Use this only
1088 * where it is safe to sleep, as the allocation may sleep.
1089 *
1090 * This causes a swap between the snapshot buffer and the current live
1091 * tracing buffer. You can use this to take snapshots of the live
1092 * trace when some condition is triggered, but continue to trace.
1093 */
tracing_snapshot_alloc(void)1094 void tracing_snapshot_alloc(void)
1095 {
1096 int ret;
1097
1098 ret = tracing_alloc_snapshot();
1099 if (ret < 0)
1100 return;
1101
1102 tracing_snapshot();
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1105
1106 /**
1107 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1108 * @tr: The tracing instance
1109 * @cond_data: User data to associate with the snapshot
1110 * @update: Implementation of the cond_snapshot update function
1111 *
1112 * Check whether the conditional snapshot for the given instance has
1113 * already been enabled, or if the current tracer is already using a
1114 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1115 * save the cond_data and update function inside.
1116 *
1117 * Returns 0 if successful, error otherwise.
1118 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1119 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1120 cond_update_fn_t update)
1121 {
1122 struct cond_snapshot *cond_snapshot __free(kfree) =
1123 kzalloc_obj(*cond_snapshot);
1124 int ret;
1125
1126 if (!cond_snapshot)
1127 return -ENOMEM;
1128
1129 cond_snapshot->cond_data = cond_data;
1130 cond_snapshot->update = update;
1131
1132 guard(mutex)(&trace_types_lock);
1133
1134 if (tracer_uses_snapshot(tr->current_trace))
1135 return -EBUSY;
1136
1137 /*
1138 * The cond_snapshot can only change to NULL without the
1139 * trace_types_lock. We don't care if we race with it going
1140 * to NULL, but we want to make sure that it's not set to
1141 * something other than NULL when we get here, which we can
1142 * do safely with only holding the trace_types_lock and not
1143 * having to take the max_lock.
1144 */
1145 if (tr->cond_snapshot)
1146 return -EBUSY;
1147
1148 ret = tracing_arm_snapshot_locked(tr);
1149 if (ret)
1150 return ret;
1151
1152 local_irq_disable();
1153 arch_spin_lock(&tr->max_lock);
1154 tr->cond_snapshot = no_free_ptr(cond_snapshot);
1155 arch_spin_unlock(&tr->max_lock);
1156 local_irq_enable();
1157
1158 return 0;
1159 }
1160 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1161
1162 /**
1163 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1164 * @tr: The tracing instance
1165 *
1166 * Check whether the conditional snapshot for the given instance is
1167 * enabled; if so, free the cond_snapshot associated with it,
1168 * otherwise return -EINVAL.
1169 *
1170 * Returns 0 if successful, error otherwise.
1171 */
tracing_snapshot_cond_disable(struct trace_array * tr)1172 int tracing_snapshot_cond_disable(struct trace_array *tr)
1173 {
1174 int ret = 0;
1175
1176 local_irq_disable();
1177 arch_spin_lock(&tr->max_lock);
1178
1179 if (!tr->cond_snapshot)
1180 ret = -EINVAL;
1181 else {
1182 kfree(tr->cond_snapshot);
1183 tr->cond_snapshot = NULL;
1184 }
1185
1186 arch_spin_unlock(&tr->max_lock);
1187 local_irq_enable();
1188
1189 tracing_disarm_snapshot(tr);
1190
1191 return ret;
1192 }
1193 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1194 #else
tracing_snapshot(void)1195 void tracing_snapshot(void)
1196 {
1197 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1200 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1201 {
1202 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1203 }
1204 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1205 int tracing_alloc_snapshot(void)
1206 {
1207 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1208 return -ENODEV;
1209 }
1210 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1211 void tracing_snapshot_alloc(void)
1212 {
1213 /* Give warning */
1214 tracing_snapshot();
1215 }
1216 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1217 void *tracing_cond_snapshot_data(struct trace_array *tr)
1218 {
1219 return NULL;
1220 }
1221 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1222 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1223 {
1224 return -ENODEV;
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1227 int tracing_snapshot_cond_disable(struct trace_array *tr)
1228 {
1229 return false;
1230 }
1231 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1232 #define free_snapshot(tr) do { } while (0)
1233 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1234 #endif /* CONFIG_TRACER_SNAPSHOT */
1235
tracer_tracing_off(struct trace_array * tr)1236 void tracer_tracing_off(struct trace_array *tr)
1237 {
1238 if (tr->array_buffer.buffer)
1239 ring_buffer_record_off(tr->array_buffer.buffer);
1240 /*
1241 * This flag is looked at when buffers haven't been allocated
1242 * yet, or by some tracers (like irqsoff), that just want to
1243 * know if the ring buffer has been disabled, but it can handle
1244 * races of where it gets disabled but we still do a record.
1245 * As the check is in the fast path of the tracers, it is more
1246 * important to be fast than accurate.
1247 */
1248 tr->buffer_disabled = 1;
1249 }
1250
1251 /**
1252 * tracer_tracing_disable() - temporary disable the buffer from write
1253 * @tr: The trace array to disable its buffer for
1254 *
1255 * Expects trace_tracing_enable() to re-enable tracing.
1256 * The difference between this and tracer_tracing_off() is that this
1257 * is a counter and can nest, whereas, tracer_tracing_off() can
1258 * be called multiple times and a single trace_tracing_on() will
1259 * enable it.
1260 */
tracer_tracing_disable(struct trace_array * tr)1261 void tracer_tracing_disable(struct trace_array *tr)
1262 {
1263 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1264 return;
1265
1266 ring_buffer_record_disable(tr->array_buffer.buffer);
1267 }
1268
1269 /**
1270 * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1271 * @tr: The trace array that had tracer_tracincg_disable() called on it
1272 *
1273 * This is called after tracer_tracing_disable() has been called on @tr,
1274 * when it's safe to re-enable tracing.
1275 */
tracer_tracing_enable(struct trace_array * tr)1276 void tracer_tracing_enable(struct trace_array *tr)
1277 {
1278 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1279 return;
1280
1281 ring_buffer_record_enable(tr->array_buffer.buffer);
1282 }
1283
1284 /**
1285 * tracing_off - turn off tracing buffers
1286 *
1287 * This function stops the tracing buffers from recording data.
1288 * It does not disable any overhead the tracers themselves may
1289 * be causing. This function simply causes all recording to
1290 * the ring buffers to fail.
1291 */
tracing_off(void)1292 void tracing_off(void)
1293 {
1294 tracer_tracing_off(&global_trace);
1295 }
1296 EXPORT_SYMBOL_GPL(tracing_off);
1297
disable_trace_on_warning(void)1298 void disable_trace_on_warning(void)
1299 {
1300 if (__disable_trace_on_warning) {
1301 struct trace_array *tr = READ_ONCE(printk_trace);
1302
1303 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304 "Disabling tracing due to warning\n");
1305 tracing_off();
1306
1307 /* Disable trace_printk() buffer too */
1308 if (tr != &global_trace) {
1309 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1310 "Disabling tracing due to warning\n");
1311 tracer_tracing_off(tr);
1312 }
1313 }
1314 }
1315
1316 /**
1317 * tracer_tracing_is_on - show real state of ring buffer enabled
1318 * @tr : the trace array to know if ring buffer is enabled
1319 *
1320 * Shows real state of the ring buffer if it is enabled or not.
1321 */
tracer_tracing_is_on(struct trace_array * tr)1322 bool tracer_tracing_is_on(struct trace_array *tr)
1323 {
1324 if (tr->array_buffer.buffer)
1325 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1326 return !tr->buffer_disabled;
1327 }
1328
1329 /**
1330 * tracing_is_on - show state of ring buffers enabled
1331 */
tracing_is_on(void)1332 int tracing_is_on(void)
1333 {
1334 return tracer_tracing_is_on(&global_trace);
1335 }
1336 EXPORT_SYMBOL_GPL(tracing_is_on);
1337
set_buf_size(char * str)1338 static int __init set_buf_size(char *str)
1339 {
1340 unsigned long buf_size;
1341
1342 if (!str)
1343 return 0;
1344 buf_size = memparse(str, &str);
1345 /*
1346 * nr_entries can not be zero and the startup
1347 * tests require some buffer space. Therefore
1348 * ensure we have at least 4096 bytes of buffer.
1349 */
1350 trace_buf_size = max(4096UL, buf_size);
1351 return 1;
1352 }
1353 __setup("trace_buf_size=", set_buf_size);
1354
set_tracing_thresh(char * str)1355 static int __init set_tracing_thresh(char *str)
1356 {
1357 unsigned long threshold;
1358 int ret;
1359
1360 if (!str)
1361 return 0;
1362 ret = kstrtoul(str, 0, &threshold);
1363 if (ret < 0)
1364 return 0;
1365 tracing_thresh = threshold * 1000;
1366 return 1;
1367 }
1368 __setup("tracing_thresh=", set_tracing_thresh);
1369
nsecs_to_usecs(unsigned long nsecs)1370 unsigned long nsecs_to_usecs(unsigned long nsecs)
1371 {
1372 return nsecs / 1000;
1373 }
1374
1375 /*
1376 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1377 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1378 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1379 * of strings in the order that the evals (enum) were defined.
1380 */
1381 #undef C
1382 #define C(a, b) b
1383
1384 /* These must match the bit positions in trace_iterator_flags */
1385 static const char *trace_options[] = {
1386 TRACE_FLAGS
1387 NULL
1388 };
1389
1390 static struct {
1391 u64 (*func)(void);
1392 const char *name;
1393 int in_ns; /* is this clock in nanoseconds? */
1394 } trace_clocks[] = {
1395 { trace_clock_local, "local", 1 },
1396 { trace_clock_global, "global", 1 },
1397 { trace_clock_counter, "counter", 0 },
1398 { trace_clock_jiffies, "uptime", 0 },
1399 { trace_clock, "perf", 1 },
1400 { ktime_get_mono_fast_ns, "mono", 1 },
1401 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1402 { ktime_get_boot_fast_ns, "boot", 1 },
1403 { ktime_get_tai_fast_ns, "tai", 1 },
1404 ARCH_TRACE_CLOCKS
1405 };
1406
trace_clock_in_ns(struct trace_array * tr)1407 bool trace_clock_in_ns(struct trace_array *tr)
1408 {
1409 if (trace_clocks[tr->clock_id].in_ns)
1410 return true;
1411
1412 return false;
1413 }
1414
1415 /*
1416 * trace_parser_get_init - gets the buffer for trace parser
1417 */
trace_parser_get_init(struct trace_parser * parser,int size)1418 int trace_parser_get_init(struct trace_parser *parser, int size)
1419 {
1420 memset(parser, 0, sizeof(*parser));
1421
1422 parser->buffer = kmalloc(size, GFP_KERNEL);
1423 if (!parser->buffer)
1424 return 1;
1425
1426 parser->size = size;
1427 return 0;
1428 }
1429
1430 /*
1431 * trace_parser_put - frees the buffer for trace parser
1432 */
trace_parser_put(struct trace_parser * parser)1433 void trace_parser_put(struct trace_parser *parser)
1434 {
1435 kfree(parser->buffer);
1436 parser->buffer = NULL;
1437 }
1438
1439 /*
1440 * trace_get_user - reads the user input string separated by space
1441 * (matched by isspace(ch))
1442 *
1443 * For each string found the 'struct trace_parser' is updated,
1444 * and the function returns.
1445 *
1446 * Returns number of bytes read.
1447 *
1448 * See kernel/trace/trace.h for 'struct trace_parser' details.
1449 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1450 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1451 size_t cnt, loff_t *ppos)
1452 {
1453 char ch;
1454 size_t read = 0;
1455 ssize_t ret;
1456
1457 if (!*ppos)
1458 trace_parser_clear(parser);
1459
1460 ret = get_user(ch, ubuf++);
1461 if (ret)
1462 goto fail;
1463
1464 read++;
1465 cnt--;
1466
1467 /*
1468 * The parser is not finished with the last write,
1469 * continue reading the user input without skipping spaces.
1470 */
1471 if (!parser->cont) {
1472 /* skip white space */
1473 while (cnt && isspace(ch)) {
1474 ret = get_user(ch, ubuf++);
1475 if (ret)
1476 goto fail;
1477 read++;
1478 cnt--;
1479 }
1480
1481 parser->idx = 0;
1482
1483 /* only spaces were written */
1484 if (isspace(ch) || !ch) {
1485 *ppos += read;
1486 return read;
1487 }
1488 }
1489
1490 /* read the non-space input */
1491 while (cnt && !isspace(ch) && ch) {
1492 if (parser->idx < parser->size - 1)
1493 parser->buffer[parser->idx++] = ch;
1494 else {
1495 ret = -EINVAL;
1496 goto fail;
1497 }
1498
1499 ret = get_user(ch, ubuf++);
1500 if (ret)
1501 goto fail;
1502 read++;
1503 cnt--;
1504 }
1505
1506 /* We either got finished input or we have to wait for another call. */
1507 if (isspace(ch) || !ch) {
1508 parser->buffer[parser->idx] = 0;
1509 parser->cont = false;
1510 } else if (parser->idx < parser->size - 1) {
1511 parser->cont = true;
1512 parser->buffer[parser->idx++] = ch;
1513 /* Make sure the parsed string always terminates with '\0'. */
1514 parser->buffer[parser->idx] = 0;
1515 } else {
1516 ret = -EINVAL;
1517 goto fail;
1518 }
1519
1520 *ppos += read;
1521 return read;
1522 fail:
1523 trace_parser_fail(parser);
1524 return ret;
1525 }
1526
1527 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1528 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1529 {
1530 int len;
1531
1532 if (trace_seq_used(s) <= s->readpos)
1533 return -EBUSY;
1534
1535 len = trace_seq_used(s) - s->readpos;
1536 if (cnt > len)
1537 cnt = len;
1538 memcpy(buf, s->buffer + s->readpos, cnt);
1539
1540 s->readpos += cnt;
1541 return cnt;
1542 }
1543
1544 unsigned long __read_mostly tracing_thresh;
1545
1546 #ifdef CONFIG_TRACER_MAX_TRACE
1547 #ifdef LATENCY_FS_NOTIFY
1548 static struct workqueue_struct *fsnotify_wq;
1549
latency_fsnotify_workfn(struct work_struct * work)1550 static void latency_fsnotify_workfn(struct work_struct *work)
1551 {
1552 struct trace_array *tr = container_of(work, struct trace_array,
1553 fsnotify_work);
1554 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1555 }
1556
latency_fsnotify_workfn_irq(struct irq_work * iwork)1557 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1558 {
1559 struct trace_array *tr = container_of(iwork, struct trace_array,
1560 fsnotify_irqwork);
1561 queue_work(fsnotify_wq, &tr->fsnotify_work);
1562 }
1563
latency_fsnotify_init(void)1564 __init static int latency_fsnotify_init(void)
1565 {
1566 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1567 WQ_UNBOUND | WQ_HIGHPRI, 0);
1568 if (!fsnotify_wq) {
1569 pr_err("Unable to allocate tr_max_lat_wq\n");
1570 return -ENOMEM;
1571 }
1572 return 0;
1573 }
1574
1575 late_initcall_sync(latency_fsnotify_init);
1576
latency_fsnotify(struct trace_array * tr)1577 void latency_fsnotify(struct trace_array *tr)
1578 {
1579 if (!fsnotify_wq)
1580 return;
1581 /*
1582 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1583 * possible that we are called from __schedule() or do_idle(), which
1584 * could cause a deadlock.
1585 */
1586 irq_work_queue(&tr->fsnotify_irqwork);
1587 }
1588 #endif /* !LATENCY_FS_NOTIFY */
1589
1590 static const struct file_operations tracing_max_lat_fops;
1591
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1592 static void trace_create_maxlat_file(struct trace_array *tr,
1593 struct dentry *d_tracer)
1594 {
1595 #ifdef LATENCY_FS_NOTIFY
1596 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1597 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1598 #endif
1599 tr->d_max_latency = trace_create_file("tracing_max_latency",
1600 TRACE_MODE_WRITE,
1601 d_tracer, tr,
1602 &tracing_max_lat_fops);
1603 }
1604
1605 /*
1606 * Copy the new maximum trace into the separate maximum-trace
1607 * structure. (this way the maximum trace is permanently saved,
1608 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1609 */
1610 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1611 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1612 {
1613 struct array_buffer *trace_buf = &tr->array_buffer;
1614 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1615 struct array_buffer *max_buf = &tr->snapshot_buffer;
1616 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1617
1618 max_buf->cpu = cpu;
1619 max_buf->time_start = data->preempt_timestamp;
1620
1621 max_data->saved_latency = tr->max_latency;
1622 max_data->critical_start = data->critical_start;
1623 max_data->critical_end = data->critical_end;
1624
1625 strscpy(max_data->comm, tsk->comm);
1626 max_data->pid = tsk->pid;
1627 /*
1628 * If tsk == current, then use current_uid(), as that does not use
1629 * RCU. The irq tracer can be called out of RCU scope.
1630 */
1631 if (tsk == current)
1632 max_data->uid = current_uid();
1633 else
1634 max_data->uid = task_uid(tsk);
1635
1636 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1637 max_data->policy = tsk->policy;
1638 max_data->rt_priority = tsk->rt_priority;
1639
1640 /* record this tasks comm */
1641 tracing_record_cmdline(tsk);
1642 latency_fsnotify(tr);
1643 }
1644 #else
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1645 static inline void trace_create_maxlat_file(struct trace_array *tr,
1646 struct dentry *d_tracer) { }
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1647 static inline void __update_max_tr(struct trace_array *tr,
1648 struct task_struct *tsk, int cpu) { }
1649 #endif /* CONFIG_TRACER_MAX_TRACE */
1650
1651 #ifdef CONFIG_TRACER_SNAPSHOT
1652 /**
1653 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1654 * @tr: tracer
1655 * @tsk: the task with the latency
1656 * @cpu: The cpu that initiated the trace.
1657 * @cond_data: User data associated with a conditional snapshot
1658 *
1659 * Flip the buffers between the @tr and the max_tr and record information
1660 * about which task was the cause of this latency.
1661 */
1662 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1663 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1664 void *cond_data)
1665 {
1666 if (tr->stop_count)
1667 return;
1668
1669 WARN_ON_ONCE(!irqs_disabled());
1670
1671 if (!tr->allocated_snapshot) {
1672 /* Only the nop tracer should hit this when disabling */
1673 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1674 return;
1675 }
1676
1677 arch_spin_lock(&tr->max_lock);
1678
1679 /* Inherit the recordable setting from array_buffer */
1680 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1681 ring_buffer_record_on(tr->snapshot_buffer.buffer);
1682 else
1683 ring_buffer_record_off(tr->snapshot_buffer.buffer);
1684
1685 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1686 arch_spin_unlock(&tr->max_lock);
1687 return;
1688 }
1689
1690 swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer);
1691
1692 __update_max_tr(tr, tsk, cpu);
1693
1694 arch_spin_unlock(&tr->max_lock);
1695
1696 /* Any waiters on the old snapshot buffer need to wake up */
1697 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1698 }
1699
1700 /**
1701 * update_max_tr_single - only copy one trace over, and reset the rest
1702 * @tr: tracer
1703 * @tsk: task with the latency
1704 * @cpu: the cpu of the buffer to copy.
1705 *
1706 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1707 */
1708 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1709 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1710 {
1711 int ret;
1712
1713 if (tr->stop_count)
1714 return;
1715
1716 WARN_ON_ONCE(!irqs_disabled());
1717 if (!tr->allocated_snapshot) {
1718 /* Only the nop tracer should hit this when disabling */
1719 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1720 return;
1721 }
1722
1723 arch_spin_lock(&tr->max_lock);
1724
1725 ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu);
1726
1727 if (ret == -EBUSY) {
1728 /*
1729 * We failed to swap the buffer due to a commit taking
1730 * place on this CPU. We fail to record, but we reset
1731 * the max trace buffer (no one writes directly to it)
1732 * and flag that it failed.
1733 * Another reason is resize is in progress.
1734 */
1735 trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_,
1736 "Failed to swap buffers due to commit or resize in progress\n");
1737 }
1738
1739 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1740
1741 __update_max_tr(tr, tsk, cpu);
1742 arch_spin_unlock(&tr->max_lock);
1743 }
1744 #endif /* CONFIG_TRACER_SNAPSHOT */
1745
1746 struct pipe_wait {
1747 struct trace_iterator *iter;
1748 int wait_index;
1749 };
1750
wait_pipe_cond(void * data)1751 static bool wait_pipe_cond(void *data)
1752 {
1753 struct pipe_wait *pwait = data;
1754 struct trace_iterator *iter = pwait->iter;
1755
1756 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1757 return true;
1758
1759 return iter->closed;
1760 }
1761
wait_on_pipe(struct trace_iterator * iter,int full)1762 static int wait_on_pipe(struct trace_iterator *iter, int full)
1763 {
1764 struct pipe_wait pwait;
1765 int ret;
1766
1767 /* Iterators are static, they should be filled or empty */
1768 if (trace_buffer_iter(iter, iter->cpu_file))
1769 return 0;
1770
1771 pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1772 pwait.iter = iter;
1773
1774 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1775 wait_pipe_cond, &pwait);
1776
1777 #ifdef CONFIG_TRACER_SNAPSHOT
1778 /*
1779 * Make sure this is still the snapshot buffer, as if a snapshot were
1780 * to happen, this would now be the main buffer.
1781 */
1782 if (iter->snapshot)
1783 iter->array_buffer = &iter->tr->snapshot_buffer;
1784 #endif
1785 return ret;
1786 }
1787
1788 #ifdef CONFIG_FTRACE_STARTUP_TEST
1789 static bool selftests_can_run;
1790
1791 struct trace_selftests {
1792 struct list_head list;
1793 struct tracer *type;
1794 };
1795
1796 static LIST_HEAD(postponed_selftests);
1797
save_selftest(struct tracer * type)1798 static int save_selftest(struct tracer *type)
1799 {
1800 struct trace_selftests *selftest;
1801
1802 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1803 if (!selftest)
1804 return -ENOMEM;
1805
1806 selftest->type = type;
1807 list_add(&selftest->list, &postponed_selftests);
1808 return 0;
1809 }
1810
run_tracer_selftest(struct tracer * type)1811 static int run_tracer_selftest(struct tracer *type)
1812 {
1813 struct trace_array *tr = &global_trace;
1814 struct tracer_flags *saved_flags = tr->current_trace_flags;
1815 struct tracer *saved_tracer = tr->current_trace;
1816 int ret;
1817
1818 if (!type->selftest || tracing_selftest_disabled)
1819 return 0;
1820
1821 /*
1822 * If a tracer registers early in boot up (before scheduling is
1823 * initialized and such), then do not run its selftests yet.
1824 * Instead, run it a little later in the boot process.
1825 */
1826 if (!selftests_can_run)
1827 return save_selftest(type);
1828
1829 if (!tracing_is_on()) {
1830 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1831 type->name);
1832 return 0;
1833 }
1834
1835 /*
1836 * Run a selftest on this tracer.
1837 * Here we reset the trace buffer, and set the current
1838 * tracer to be this tracer. The tracer can then run some
1839 * internal tracing to verify that everything is in order.
1840 * If we fail, we do not register this tracer.
1841 */
1842 tracing_reset_online_cpus(&tr->array_buffer);
1843
1844 tr->current_trace = type;
1845 tr->current_trace_flags = type->flags ? : type->default_flags;
1846
1847 #ifdef CONFIG_TRACER_MAX_TRACE
1848 if (tracer_uses_snapshot(type)) {
1849 /* If we expanded the buffers, make sure the max is expanded too */
1850 if (tr->ring_buffer_expanded)
1851 ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size,
1852 RING_BUFFER_ALL_CPUS);
1853 tr->allocated_snapshot = true;
1854 }
1855 #endif
1856
1857 /* the test is responsible for initializing and enabling */
1858 pr_info("Testing tracer %s: ", type->name);
1859 ret = type->selftest(type, tr);
1860 /* the test is responsible for resetting too */
1861 tr->current_trace = saved_tracer;
1862 tr->current_trace_flags = saved_flags;
1863 if (ret) {
1864 printk(KERN_CONT "FAILED!\n");
1865 /* Add the warning after printing 'FAILED' */
1866 WARN_ON(1);
1867 return -1;
1868 }
1869 /* Only reset on passing, to avoid touching corrupted buffers */
1870 tracing_reset_online_cpus(&tr->array_buffer);
1871
1872 #ifdef CONFIG_TRACER_MAX_TRACE
1873 if (tracer_uses_snapshot(type)) {
1874 tr->allocated_snapshot = false;
1875
1876 /* Shrink the max buffer again */
1877 if (tr->ring_buffer_expanded)
1878 ring_buffer_resize(tr->snapshot_buffer.buffer, 1,
1879 RING_BUFFER_ALL_CPUS);
1880 }
1881 #endif
1882
1883 printk(KERN_CONT "PASSED\n");
1884 return 0;
1885 }
1886
do_run_tracer_selftest(struct tracer * type)1887 static int do_run_tracer_selftest(struct tracer *type)
1888 {
1889 int ret;
1890
1891 /*
1892 * Tests can take a long time, especially if they are run one after the
1893 * other, as does happen during bootup when all the tracers are
1894 * registered. This could cause the soft lockup watchdog to trigger.
1895 */
1896 cond_resched();
1897
1898 tracing_selftest_running = true;
1899 ret = run_tracer_selftest(type);
1900 tracing_selftest_running = false;
1901
1902 return ret;
1903 }
1904
init_trace_selftests(void)1905 static __init int init_trace_selftests(void)
1906 {
1907 struct trace_selftests *p, *n;
1908 struct tracer *t, **last;
1909 int ret;
1910
1911 selftests_can_run = true;
1912
1913 guard(mutex)(&trace_types_lock);
1914
1915 if (list_empty(&postponed_selftests))
1916 return 0;
1917
1918 pr_info("Running postponed tracer tests:\n");
1919
1920 tracing_selftest_running = true;
1921 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1922 /* This loop can take minutes when sanitizers are enabled, so
1923 * lets make sure we allow RCU processing.
1924 */
1925 cond_resched();
1926 ret = run_tracer_selftest(p->type);
1927 /* If the test fails, then warn and remove from available_tracers */
1928 if (ret < 0) {
1929 WARN(1, "tracer: %s failed selftest, disabling\n",
1930 p->type->name);
1931 last = &trace_types;
1932 for (t = trace_types; t; t = t->next) {
1933 if (t == p->type) {
1934 *last = t->next;
1935 break;
1936 }
1937 last = &t->next;
1938 }
1939 }
1940 list_del(&p->list);
1941 kfree(p);
1942 }
1943 tracing_selftest_running = false;
1944
1945 return 0;
1946 }
1947 core_initcall(init_trace_selftests);
1948 #else
do_run_tracer_selftest(struct tracer * type)1949 static inline int do_run_tracer_selftest(struct tracer *type)
1950 {
1951 return 0;
1952 }
1953 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1954
1955 static int add_tracer(struct trace_array *tr, struct tracer *t);
1956
1957 static void __init apply_trace_boot_options(void);
1958
free_tracers(struct trace_array * tr)1959 static void free_tracers(struct trace_array *tr)
1960 {
1961 struct tracers *t, *n;
1962
1963 lockdep_assert_held(&trace_types_lock);
1964
1965 list_for_each_entry_safe(t, n, &tr->tracers, list) {
1966 list_del(&t->list);
1967 kfree(t->flags);
1968 kfree(t);
1969 }
1970 }
1971
1972 /**
1973 * register_tracer - register a tracer with the ftrace system.
1974 * @type: the plugin for the tracer
1975 *
1976 * Register a new plugin tracer.
1977 */
register_tracer(struct tracer * type)1978 int __init register_tracer(struct tracer *type)
1979 {
1980 struct trace_array *tr;
1981 struct tracer *t;
1982 int ret = 0;
1983
1984 if (!type->name) {
1985 pr_info("Tracer must have a name\n");
1986 return -1;
1987 }
1988
1989 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1990 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1991 return -1;
1992 }
1993
1994 if (security_locked_down(LOCKDOWN_TRACEFS)) {
1995 pr_warn("Can not register tracer %s due to lockdown\n",
1996 type->name);
1997 return -EPERM;
1998 }
1999
2000 mutex_lock(&trace_types_lock);
2001
2002 for (t = trace_types; t; t = t->next) {
2003 if (strcmp(type->name, t->name) == 0) {
2004 /* already found */
2005 pr_info("Tracer %s already registered\n",
2006 type->name);
2007 ret = -1;
2008 goto out;
2009 }
2010 }
2011
2012 /* store the tracer for __set_tracer_option */
2013 if (type->flags)
2014 type->flags->trace = type;
2015
2016 ret = do_run_tracer_selftest(type);
2017 if (ret < 0)
2018 goto out;
2019
2020 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2021 ret = add_tracer(tr, type);
2022 if (ret < 0) {
2023 /* The tracer will still exist but without options */
2024 pr_warn("Failed to create tracer options for %s\n", type->name);
2025 break;
2026 }
2027 }
2028
2029 type->next = trace_types;
2030 trace_types = type;
2031
2032 out:
2033 mutex_unlock(&trace_types_lock);
2034
2035 if (ret || !default_bootup_tracer)
2036 return ret;
2037
2038 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2039 return 0;
2040
2041 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2042 /* Do we want this tracer to start on bootup? */
2043 WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
2044 default_bootup_tracer = NULL;
2045
2046 apply_trace_boot_options();
2047
2048 /* disable other selftests, since this will break it. */
2049 disable_tracing_selftest("running a tracer");
2050
2051 return 0;
2052 }
2053
tracing_reset_cpu(struct array_buffer * buf,int cpu)2054 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2055 {
2056 struct trace_buffer *buffer = buf->buffer;
2057
2058 if (!buffer)
2059 return;
2060
2061 ring_buffer_record_disable(buffer);
2062
2063 /* Make sure all commits have finished */
2064 synchronize_rcu();
2065 ring_buffer_reset_cpu(buffer, cpu);
2066
2067 ring_buffer_record_enable(buffer);
2068 }
2069
tracing_reset_online_cpus(struct array_buffer * buf)2070 void tracing_reset_online_cpus(struct array_buffer *buf)
2071 {
2072 struct trace_buffer *buffer = buf->buffer;
2073
2074 if (!buffer)
2075 return;
2076
2077 ring_buffer_record_disable(buffer);
2078
2079 /* Make sure all commits have finished */
2080 synchronize_rcu();
2081
2082 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2083
2084 ring_buffer_reset_online_cpus(buffer);
2085
2086 ring_buffer_record_enable(buffer);
2087 }
2088
tracing_reset_all_cpus(struct array_buffer * buf)2089 static void tracing_reset_all_cpus(struct array_buffer *buf)
2090 {
2091 struct trace_buffer *buffer = buf->buffer;
2092
2093 if (!buffer)
2094 return;
2095
2096 ring_buffer_record_disable(buffer);
2097
2098 /* Make sure all commits have finished */
2099 synchronize_rcu();
2100
2101 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2102
2103 ring_buffer_reset(buffer);
2104
2105 ring_buffer_record_enable(buffer);
2106 }
2107
2108 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2109 void tracing_reset_all_online_cpus_unlocked(void)
2110 {
2111 struct trace_array *tr;
2112
2113 lockdep_assert_held(&trace_types_lock);
2114
2115 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2116 if (!tr->clear_trace)
2117 continue;
2118 tr->clear_trace = false;
2119 tracing_reset_online_cpus(&tr->array_buffer);
2120 #ifdef CONFIG_TRACER_SNAPSHOT
2121 tracing_reset_online_cpus(&tr->snapshot_buffer);
2122 #endif
2123 }
2124 }
2125
tracing_reset_all_online_cpus(void)2126 void tracing_reset_all_online_cpus(void)
2127 {
2128 guard(mutex)(&trace_types_lock);
2129 tracing_reset_all_online_cpus_unlocked();
2130 }
2131
is_tracing_stopped(void)2132 int is_tracing_stopped(void)
2133 {
2134 return global_trace.stop_count;
2135 }
2136
tracing_start_tr(struct trace_array * tr)2137 static void tracing_start_tr(struct trace_array *tr)
2138 {
2139 struct trace_buffer *buffer;
2140
2141 if (tracing_disabled)
2142 return;
2143
2144 guard(raw_spinlock_irqsave)(&tr->start_lock);
2145 if (--tr->stop_count) {
2146 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2147 /* Someone screwed up their debugging */
2148 tr->stop_count = 0;
2149 }
2150 return;
2151 }
2152
2153 /* Prevent the buffers from switching */
2154 arch_spin_lock(&tr->max_lock);
2155
2156 buffer = tr->array_buffer.buffer;
2157 if (buffer)
2158 ring_buffer_record_enable(buffer);
2159
2160 #ifdef CONFIG_TRACER_SNAPSHOT
2161 buffer = tr->snapshot_buffer.buffer;
2162 if (buffer)
2163 ring_buffer_record_enable(buffer);
2164 #endif
2165
2166 arch_spin_unlock(&tr->max_lock);
2167 }
2168
2169 /**
2170 * tracing_start - quick start of the tracer
2171 *
2172 * If tracing is enabled but was stopped by tracing_stop,
2173 * this will start the tracer back up.
2174 */
tracing_start(void)2175 void tracing_start(void)
2176
2177 {
2178 return tracing_start_tr(&global_trace);
2179 }
2180
tracing_stop_tr(struct trace_array * tr)2181 static void tracing_stop_tr(struct trace_array *tr)
2182 {
2183 struct trace_buffer *buffer;
2184
2185 guard(raw_spinlock_irqsave)(&tr->start_lock);
2186 if (tr->stop_count++)
2187 return;
2188
2189 /* Prevent the buffers from switching */
2190 arch_spin_lock(&tr->max_lock);
2191
2192 buffer = tr->array_buffer.buffer;
2193 if (buffer)
2194 ring_buffer_record_disable(buffer);
2195
2196 #ifdef CONFIG_TRACER_SNAPSHOT
2197 buffer = tr->snapshot_buffer.buffer;
2198 if (buffer)
2199 ring_buffer_record_disable(buffer);
2200 #endif
2201
2202 arch_spin_unlock(&tr->max_lock);
2203 }
2204
2205 /**
2206 * tracing_stop - quick stop of the tracer
2207 *
2208 * Light weight way to stop tracing. Use in conjunction with
2209 * tracing_start.
2210 */
tracing_stop(void)2211 void tracing_stop(void)
2212 {
2213 return tracing_stop_tr(&global_trace);
2214 }
2215
2216 /*
2217 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2218 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2219 * simplifies those functions and keeps them in sync.
2220 */
trace_handle_return(struct trace_seq * s)2221 enum print_line_t trace_handle_return(struct trace_seq *s)
2222 {
2223 return trace_seq_has_overflowed(s) ?
2224 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2225 }
2226 EXPORT_SYMBOL_GPL(trace_handle_return);
2227
migration_disable_value(void)2228 static unsigned short migration_disable_value(void)
2229 {
2230 #if defined(CONFIG_SMP)
2231 return current->migration_disabled;
2232 #else
2233 return 0;
2234 #endif
2235 }
2236
tracing_gen_ctx_irq_test(unsigned int irqs_status)2237 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2238 {
2239 unsigned int trace_flags = irqs_status;
2240 unsigned int pc;
2241
2242 pc = preempt_count();
2243
2244 if (pc & NMI_MASK)
2245 trace_flags |= TRACE_FLAG_NMI;
2246 if (pc & HARDIRQ_MASK)
2247 trace_flags |= TRACE_FLAG_HARDIRQ;
2248 if (in_serving_softirq())
2249 trace_flags |= TRACE_FLAG_SOFTIRQ;
2250 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2251 trace_flags |= TRACE_FLAG_BH_OFF;
2252
2253 if (tif_need_resched())
2254 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2255 if (test_preempt_need_resched())
2256 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2257 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2258 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2259 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2260 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2261 }
2262
2263 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2264 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2265 int type,
2266 unsigned long len,
2267 unsigned int trace_ctx)
2268 {
2269 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2270 }
2271
2272 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2273 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2274 static int trace_buffered_event_ref;
2275
2276 /**
2277 * trace_buffered_event_enable - enable buffering events
2278 *
2279 * When events are being filtered, it is quicker to use a temporary
2280 * buffer to write the event data into if there's a likely chance
2281 * that it will not be committed. The discard of the ring buffer
2282 * is not as fast as committing, and is much slower than copying
2283 * a commit.
2284 *
2285 * When an event is to be filtered, allocate per cpu buffers to
2286 * write the event data into, and if the event is filtered and discarded
2287 * it is simply dropped, otherwise, the entire data is to be committed
2288 * in one shot.
2289 */
trace_buffered_event_enable(void)2290 void trace_buffered_event_enable(void)
2291 {
2292 struct ring_buffer_event *event;
2293 struct page *page;
2294 int cpu;
2295
2296 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2297
2298 if (trace_buffered_event_ref++)
2299 return;
2300
2301 for_each_tracing_cpu(cpu) {
2302 page = alloc_pages_node(cpu_to_node(cpu),
2303 GFP_KERNEL | __GFP_NORETRY, 0);
2304 /* This is just an optimization and can handle failures */
2305 if (!page) {
2306 pr_err("Failed to allocate event buffer\n");
2307 break;
2308 }
2309
2310 event = page_address(page);
2311 memset(event, 0, sizeof(*event));
2312
2313 per_cpu(trace_buffered_event, cpu) = event;
2314
2315 scoped_guard(preempt,) {
2316 if (cpu == smp_processor_id() &&
2317 __this_cpu_read(trace_buffered_event) !=
2318 per_cpu(trace_buffered_event, cpu))
2319 WARN_ON_ONCE(1);
2320 }
2321 }
2322 }
2323
enable_trace_buffered_event(void * data)2324 static void enable_trace_buffered_event(void *data)
2325 {
2326 this_cpu_dec(trace_buffered_event_cnt);
2327 }
2328
disable_trace_buffered_event(void * data)2329 static void disable_trace_buffered_event(void *data)
2330 {
2331 this_cpu_inc(trace_buffered_event_cnt);
2332 }
2333
2334 /**
2335 * trace_buffered_event_disable - disable buffering events
2336 *
2337 * When a filter is removed, it is faster to not use the buffered
2338 * events, and to commit directly into the ring buffer. Free up
2339 * the temp buffers when there are no more users. This requires
2340 * special synchronization with current events.
2341 */
trace_buffered_event_disable(void)2342 void trace_buffered_event_disable(void)
2343 {
2344 int cpu;
2345
2346 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2347
2348 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2349 return;
2350
2351 if (--trace_buffered_event_ref)
2352 return;
2353
2354 /* For each CPU, set the buffer as used. */
2355 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2356 NULL, true);
2357
2358 /* Wait for all current users to finish */
2359 synchronize_rcu();
2360
2361 for_each_tracing_cpu(cpu) {
2362 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2363 per_cpu(trace_buffered_event, cpu) = NULL;
2364 }
2365
2366 /*
2367 * Wait for all CPUs that potentially started checking if they can use
2368 * their event buffer only after the previous synchronize_rcu() call and
2369 * they still read a valid pointer from trace_buffered_event. It must be
2370 * ensured they don't see cleared trace_buffered_event_cnt else they
2371 * could wrongly decide to use the pointed-to buffer which is now freed.
2372 */
2373 synchronize_rcu();
2374
2375 /* For each CPU, relinquish the buffer */
2376 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2377 true);
2378 }
2379
2380 static struct trace_buffer *temp_buffer;
2381
2382 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2383 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2384 struct trace_event_file *trace_file,
2385 int type, unsigned long len,
2386 unsigned int trace_ctx)
2387 {
2388 struct ring_buffer_event *entry;
2389 struct trace_array *tr = trace_file->tr;
2390 int val;
2391
2392 *current_rb = tr->array_buffer.buffer;
2393
2394 if (!tr->no_filter_buffering_ref &&
2395 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2396 preempt_disable_notrace();
2397 /*
2398 * Filtering is on, so try to use the per cpu buffer first.
2399 * This buffer will simulate a ring_buffer_event,
2400 * where the type_len is zero and the array[0] will
2401 * hold the full length.
2402 * (see include/linux/ring-buffer.h for details on
2403 * how the ring_buffer_event is structured).
2404 *
2405 * Using a temp buffer during filtering and copying it
2406 * on a matched filter is quicker than writing directly
2407 * into the ring buffer and then discarding it when
2408 * it doesn't match. That is because the discard
2409 * requires several atomic operations to get right.
2410 * Copying on match and doing nothing on a failed match
2411 * is still quicker than no copy on match, but having
2412 * to discard out of the ring buffer on a failed match.
2413 */
2414 if ((entry = __this_cpu_read(trace_buffered_event))) {
2415 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2416
2417 val = this_cpu_inc_return(trace_buffered_event_cnt);
2418
2419 /*
2420 * Preemption is disabled, but interrupts and NMIs
2421 * can still come in now. If that happens after
2422 * the above increment, then it will have to go
2423 * back to the old method of allocating the event
2424 * on the ring buffer, and if the filter fails, it
2425 * will have to call ring_buffer_discard_commit()
2426 * to remove it.
2427 *
2428 * Need to also check the unlikely case that the
2429 * length is bigger than the temp buffer size.
2430 * If that happens, then the reserve is pretty much
2431 * guaranteed to fail, as the ring buffer currently
2432 * only allows events less than a page. But that may
2433 * change in the future, so let the ring buffer reserve
2434 * handle the failure in that case.
2435 */
2436 if (val == 1 && likely(len <= max_len)) {
2437 trace_event_setup(entry, type, trace_ctx);
2438 entry->array[0] = len;
2439 /* Return with preemption disabled */
2440 return entry;
2441 }
2442 this_cpu_dec(trace_buffered_event_cnt);
2443 }
2444 /* __trace_buffer_lock_reserve() disables preemption */
2445 preempt_enable_notrace();
2446 }
2447
2448 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2449 trace_ctx);
2450 /*
2451 * If tracing is off, but we have triggers enabled
2452 * we still need to look at the event data. Use the temp_buffer
2453 * to store the trace event for the trigger to use. It's recursive
2454 * safe and will not be recorded anywhere.
2455 */
2456 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2457 *current_rb = temp_buffer;
2458 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2459 trace_ctx);
2460 }
2461 return entry;
2462 }
2463 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2464
2465 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2466 static DEFINE_MUTEX(tracepoint_printk_mutex);
2467
output_printk(struct trace_event_buffer * fbuffer)2468 static void output_printk(struct trace_event_buffer *fbuffer)
2469 {
2470 struct trace_event_call *event_call;
2471 struct trace_event_file *file;
2472 struct trace_event *event;
2473 unsigned long flags;
2474 struct trace_iterator *iter = tracepoint_print_iter;
2475
2476 /* We should never get here if iter is NULL */
2477 if (WARN_ON_ONCE(!iter))
2478 return;
2479
2480 event_call = fbuffer->trace_file->event_call;
2481 if (!event_call || !event_call->event.funcs ||
2482 !event_call->event.funcs->trace)
2483 return;
2484
2485 file = fbuffer->trace_file;
2486 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2487 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2488 !filter_match_preds(file->filter, fbuffer->entry)))
2489 return;
2490
2491 event = &fbuffer->trace_file->event_call->event;
2492
2493 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2494 trace_seq_init(&iter->seq);
2495 iter->ent = fbuffer->entry;
2496 event_call->event.funcs->trace(iter, 0, event);
2497 trace_seq_putc(&iter->seq, 0);
2498 printk("%s", iter->seq.buffer);
2499
2500 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2501 }
2502
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2503 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2504 void *buffer, size_t *lenp,
2505 loff_t *ppos)
2506 {
2507 int save_tracepoint_printk;
2508 int ret;
2509
2510 guard(mutex)(&tracepoint_printk_mutex);
2511 save_tracepoint_printk = tracepoint_printk;
2512
2513 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2514
2515 /*
2516 * This will force exiting early, as tracepoint_printk
2517 * is always zero when tracepoint_printk_iter is not allocated
2518 */
2519 if (!tracepoint_print_iter)
2520 tracepoint_printk = 0;
2521
2522 if (save_tracepoint_printk == tracepoint_printk)
2523 return ret;
2524
2525 if (tracepoint_printk)
2526 static_key_enable(&tracepoint_printk_key.key);
2527 else
2528 static_key_disable(&tracepoint_printk_key.key);
2529
2530 return ret;
2531 }
2532
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2533 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2534 {
2535 enum event_trigger_type tt = ETT_NONE;
2536 struct trace_event_file *file = fbuffer->trace_file;
2537
2538 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2539 fbuffer->entry, &tt))
2540 goto discard;
2541
2542 if (static_key_false(&tracepoint_printk_key.key))
2543 output_printk(fbuffer);
2544
2545 if (static_branch_unlikely(&trace_event_exports_enabled))
2546 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2547
2548 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2549 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2550
2551 discard:
2552 if (tt)
2553 event_triggers_post_call(file, tt);
2554
2555 }
2556 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2557
2558 /*
2559 * Skip 3:
2560 *
2561 * trace_buffer_unlock_commit_regs()
2562 * trace_event_buffer_commit()
2563 * trace_event_raw_event_xxx()
2564 */
2565 # define STACK_SKIP 3
2566
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2567 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2568 struct trace_buffer *buffer,
2569 struct ring_buffer_event *event,
2570 unsigned int trace_ctx,
2571 struct pt_regs *regs)
2572 {
2573 __buffer_unlock_commit(buffer, event);
2574
2575 /*
2576 * If regs is not set, then skip the necessary functions.
2577 * Note, we can still get here via blktrace, wakeup tracer
2578 * and mmiotrace, but that's ok if they lose a function or
2579 * two. They are not that meaningful.
2580 */
2581 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2582 ftrace_trace_userstack(tr, buffer, trace_ctx);
2583 }
2584
2585 /*
2586 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2587 */
2588 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2589 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2590 struct ring_buffer_event *event)
2591 {
2592 __buffer_unlock_commit(buffer, event);
2593 }
2594
2595 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2596 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2597 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2598 {
2599 struct trace_buffer *buffer = tr->array_buffer.buffer;
2600 struct ring_buffer_event *event;
2601 struct ftrace_entry *entry;
2602 int size = sizeof(*entry);
2603
2604 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2605
2606 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2607 trace_ctx);
2608 if (!event)
2609 return;
2610 entry = ring_buffer_event_data(event);
2611 entry->ip = ip;
2612 entry->parent_ip = parent_ip;
2613
2614 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2615 if (fregs) {
2616 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2617 entry->args[i] = ftrace_regs_get_argument(fregs, i);
2618 }
2619 #endif
2620
2621 if (static_branch_unlikely(&trace_function_exports_enabled))
2622 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2623 __buffer_unlock_commit(buffer, event);
2624 }
2625
2626 #ifdef CONFIG_STACKTRACE
2627
2628 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2629 #define FTRACE_KSTACK_NESTING 4
2630
2631 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING)
2632
2633 struct ftrace_stack {
2634 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2635 };
2636
2637
2638 struct ftrace_stacks {
2639 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2640 };
2641
2642 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2643 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2644
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2645 void __ftrace_trace_stack(struct trace_array *tr,
2646 struct trace_buffer *buffer,
2647 unsigned int trace_ctx,
2648 int skip, struct pt_regs *regs)
2649 {
2650 struct ring_buffer_event *event;
2651 unsigned int size, nr_entries;
2652 struct ftrace_stack *fstack;
2653 struct stack_entry *entry;
2654 int stackidx;
2655 int bit;
2656
2657 bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
2658 if (bit < 0)
2659 return;
2660
2661 /*
2662 * Add one, for this function and the call to save_stack_trace()
2663 * If regs is set, then these functions will not be in the way.
2664 */
2665 #ifndef CONFIG_UNWINDER_ORC
2666 if (!regs)
2667 skip++;
2668 #endif
2669
2670 guard(preempt_notrace)();
2671
2672 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2673
2674 /* This should never happen. If it does, yell once and skip */
2675 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2676 goto out;
2677
2678 /*
2679 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2680 * interrupt will either see the value pre increment or post
2681 * increment. If the interrupt happens pre increment it will have
2682 * restored the counter when it returns. We just need a barrier to
2683 * keep gcc from moving things around.
2684 */
2685 barrier();
2686
2687 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2688 size = ARRAY_SIZE(fstack->calls);
2689
2690 if (regs) {
2691 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2692 size, skip);
2693 } else {
2694 nr_entries = stack_trace_save(fstack->calls, size, skip);
2695 }
2696
2697 #ifdef CONFIG_DYNAMIC_FTRACE
2698 /* Mark entry of stack trace as trampoline code */
2699 if (tr->ops && tr->ops->trampoline) {
2700 unsigned long tramp_start = tr->ops->trampoline;
2701 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2702 unsigned long *calls = fstack->calls;
2703
2704 for (int i = 0; i < nr_entries; i++) {
2705 if (calls[i] >= tramp_start && calls[i] < tramp_end)
2706 calls[i] = FTRACE_TRAMPOLINE_MARKER;
2707 }
2708 }
2709 #endif
2710
2711 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2712 struct_size(entry, caller, nr_entries),
2713 trace_ctx);
2714 if (!event)
2715 goto out;
2716 entry = ring_buffer_event_data(event);
2717
2718 entry->size = nr_entries;
2719 memcpy(&entry->caller, fstack->calls,
2720 flex_array_size(entry, caller, nr_entries));
2721
2722 __buffer_unlock_commit(buffer, event);
2723
2724 out:
2725 /* Again, don't let gcc optimize things here */
2726 barrier();
2727 __this_cpu_dec(ftrace_stack_reserve);
2728 trace_clear_recursion(bit);
2729 }
2730
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)2731 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2732 int skip)
2733 {
2734 struct trace_buffer *buffer = tr->array_buffer.buffer;
2735
2736 if (rcu_is_watching()) {
2737 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2738 return;
2739 }
2740
2741 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2742 return;
2743
2744 /*
2745 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
2746 * but if the above rcu_is_watching() failed, then the NMI
2747 * triggered someplace critical, and ct_irq_enter() should
2748 * not be called from NMI.
2749 */
2750 if (unlikely(in_nmi()))
2751 return;
2752
2753 ct_irq_enter_irqson();
2754 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2755 ct_irq_exit_irqson();
2756 }
2757
2758 /**
2759 * trace_dump_stack - record a stack back trace in the trace buffer
2760 * @skip: Number of functions to skip (helper handlers)
2761 */
trace_dump_stack(int skip)2762 void trace_dump_stack(int skip)
2763 {
2764 if (tracing_disabled || tracing_selftest_running)
2765 return;
2766
2767 #ifndef CONFIG_UNWINDER_ORC
2768 /* Skip 1 to skip this function. */
2769 skip++;
2770 #endif
2771 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
2772 tracing_gen_ctx(), skip, NULL);
2773 }
2774 EXPORT_SYMBOL_GPL(trace_dump_stack);
2775
2776 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2777 static DEFINE_PER_CPU(int, user_stack_count);
2778
2779 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)2780 ftrace_trace_userstack(struct trace_array *tr,
2781 struct trace_buffer *buffer, unsigned int trace_ctx)
2782 {
2783 struct ring_buffer_event *event;
2784 struct userstack_entry *entry;
2785
2786 if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
2787 return;
2788
2789 /*
2790 * NMIs can not handle page faults, even with fix ups.
2791 * The save user stack can (and often does) fault.
2792 */
2793 if (unlikely(in_nmi()))
2794 return;
2795
2796 /*
2797 * prevent recursion, since the user stack tracing may
2798 * trigger other kernel events.
2799 */
2800 guard(preempt)();
2801 if (__this_cpu_read(user_stack_count))
2802 return;
2803
2804 __this_cpu_inc(user_stack_count);
2805
2806 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2807 sizeof(*entry), trace_ctx);
2808 if (!event)
2809 goto out_drop_count;
2810 entry = ring_buffer_event_data(event);
2811
2812 entry->tgid = current->tgid;
2813 memset(&entry->caller, 0, sizeof(entry->caller));
2814
2815 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2816 __buffer_unlock_commit(buffer, event);
2817
2818 out_drop_count:
2819 __this_cpu_dec(user_stack_count);
2820 }
2821 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)2822 static void ftrace_trace_userstack(struct trace_array *tr,
2823 struct trace_buffer *buffer,
2824 unsigned int trace_ctx)
2825 {
2826 }
2827 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2828
2829 #endif /* CONFIG_STACKTRACE */
2830
2831 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)2832 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
2833 unsigned long long delta)
2834 {
2835 entry->bottom_delta_ts = delta & U32_MAX;
2836 entry->top_delta_ts = (delta >> 32);
2837 }
2838
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)2839 void trace_last_func_repeats(struct trace_array *tr,
2840 struct trace_func_repeats *last_info,
2841 unsigned int trace_ctx)
2842 {
2843 struct trace_buffer *buffer = tr->array_buffer.buffer;
2844 struct func_repeats_entry *entry;
2845 struct ring_buffer_event *event;
2846 u64 delta;
2847
2848 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
2849 sizeof(*entry), trace_ctx);
2850 if (!event)
2851 return;
2852
2853 delta = ring_buffer_event_time_stamp(buffer, event) -
2854 last_info->ts_last_call;
2855
2856 entry = ring_buffer_event_data(event);
2857 entry->ip = last_info->ip;
2858 entry->parent_ip = last_info->parent_ip;
2859 entry->count = last_info->count;
2860 func_repeats_set_delta_ts(entry, delta);
2861
2862 __buffer_unlock_commit(buffer, event);
2863 }
2864
trace_iterator_increment(struct trace_iterator * iter)2865 static void trace_iterator_increment(struct trace_iterator *iter)
2866 {
2867 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2868
2869 iter->idx++;
2870 if (buf_iter)
2871 ring_buffer_iter_advance(buf_iter);
2872 }
2873
2874 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)2875 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2876 unsigned long *lost_events)
2877 {
2878 struct ring_buffer_event *event;
2879 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2880
2881 if (buf_iter) {
2882 event = ring_buffer_iter_peek(buf_iter, ts);
2883 if (lost_events)
2884 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
2885 (unsigned long)-1 : 0;
2886 } else {
2887 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
2888 lost_events);
2889 }
2890
2891 if (event) {
2892 iter->ent_size = ring_buffer_event_length(event);
2893 return ring_buffer_event_data(event);
2894 }
2895 iter->ent_size = 0;
2896 return NULL;
2897 }
2898
2899 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)2900 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2901 unsigned long *missing_events, u64 *ent_ts)
2902 {
2903 struct trace_buffer *buffer = iter->array_buffer->buffer;
2904 struct trace_entry *ent, *next = NULL;
2905 unsigned long lost_events = 0, next_lost = 0;
2906 int cpu_file = iter->cpu_file;
2907 u64 next_ts = 0, ts;
2908 int next_cpu = -1;
2909 int next_size = 0;
2910 int cpu;
2911
2912 /*
2913 * If we are in a per_cpu trace file, don't bother by iterating over
2914 * all cpu and peek directly.
2915 */
2916 if (cpu_file > RING_BUFFER_ALL_CPUS) {
2917 if (ring_buffer_empty_cpu(buffer, cpu_file))
2918 return NULL;
2919 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2920 if (ent_cpu)
2921 *ent_cpu = cpu_file;
2922
2923 return ent;
2924 }
2925
2926 for_each_tracing_cpu(cpu) {
2927
2928 if (ring_buffer_empty_cpu(buffer, cpu))
2929 continue;
2930
2931 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2932
2933 /*
2934 * Pick the entry with the smallest timestamp:
2935 */
2936 if (ent && (!next || ts < next_ts)) {
2937 next = ent;
2938 next_cpu = cpu;
2939 next_ts = ts;
2940 next_lost = lost_events;
2941 next_size = iter->ent_size;
2942 }
2943 }
2944
2945 iter->ent_size = next_size;
2946
2947 if (ent_cpu)
2948 *ent_cpu = next_cpu;
2949
2950 if (ent_ts)
2951 *ent_ts = next_ts;
2952
2953 if (missing_events)
2954 *missing_events = next_lost;
2955
2956 return next;
2957 }
2958
2959 #define STATIC_FMT_BUF_SIZE 128
2960 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
2961
trace_iter_expand_format(struct trace_iterator * iter)2962 char *trace_iter_expand_format(struct trace_iterator *iter)
2963 {
2964 char *tmp;
2965
2966 /*
2967 * iter->tr is NULL when used with tp_printk, which makes
2968 * this get called where it is not safe to call krealloc().
2969 */
2970 if (!iter->tr || iter->fmt == static_fmt_buf)
2971 return NULL;
2972
2973 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
2974 GFP_KERNEL);
2975 if (tmp) {
2976 iter->fmt_size += STATIC_FMT_BUF_SIZE;
2977 iter->fmt = tmp;
2978 }
2979
2980 return tmp;
2981 }
2982
2983 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)2984 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
2985 {
2986 unsigned long addr = (unsigned long)str;
2987 struct trace_event *trace_event;
2988 struct trace_event_call *event;
2989
2990 /* OK if part of the event data */
2991 if ((addr >= (unsigned long)iter->ent) &&
2992 (addr < (unsigned long)iter->ent + iter->ent_size))
2993 return true;
2994
2995 /* OK if part of the temp seq buffer */
2996 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
2997 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
2998 return true;
2999
3000 /* Core rodata can not be freed */
3001 if (is_kernel_rodata(addr))
3002 return true;
3003
3004 if (trace_is_tracepoint_string(str))
3005 return true;
3006
3007 /*
3008 * Now this could be a module event, referencing core module
3009 * data, which is OK.
3010 */
3011 if (!iter->ent)
3012 return false;
3013
3014 trace_event = ftrace_find_event(iter->ent->type);
3015 if (!trace_event)
3016 return false;
3017
3018 event = container_of(trace_event, struct trace_event_call, event);
3019 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3020 return false;
3021
3022 /* Would rather have rodata, but this will suffice */
3023 if (within_module_core(addr, event->module))
3024 return true;
3025
3026 return false;
3027 }
3028
3029 /**
3030 * ignore_event - Check dereferenced fields while writing to the seq buffer
3031 * @iter: The iterator that holds the seq buffer and the event being printed
3032 *
3033 * At boot up, test_event_printk() will flag any event that dereferences
3034 * a string with "%s" that does exist in the ring buffer. It may still
3035 * be valid, as the string may point to a static string in the kernel
3036 * rodata that never gets freed. But if the string pointer is pointing
3037 * to something that was allocated, there's a chance that it can be freed
3038 * by the time the user reads the trace. This would cause a bad memory
3039 * access by the kernel and possibly crash the system.
3040 *
3041 * This function will check if the event has any fields flagged as needing
3042 * to be checked at runtime and perform those checks.
3043 *
3044 * If it is found that a field is unsafe, it will write into the @iter->seq
3045 * a message stating what was found to be unsafe.
3046 *
3047 * @return: true if the event is unsafe and should be ignored,
3048 * false otherwise.
3049 */
ignore_event(struct trace_iterator * iter)3050 bool ignore_event(struct trace_iterator *iter)
3051 {
3052 struct ftrace_event_field *field;
3053 struct trace_event *trace_event;
3054 struct trace_event_call *event;
3055 struct list_head *head;
3056 struct trace_seq *seq;
3057 const void *ptr;
3058
3059 trace_event = ftrace_find_event(iter->ent->type);
3060
3061 seq = &iter->seq;
3062
3063 if (!trace_event) {
3064 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3065 return true;
3066 }
3067
3068 event = container_of(trace_event, struct trace_event_call, event);
3069 if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3070 return false;
3071
3072 head = trace_get_fields(event);
3073 if (!head) {
3074 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3075 trace_event_name(event));
3076 return true;
3077 }
3078
3079 /* Offsets are from the iter->ent that points to the raw event */
3080 ptr = iter->ent;
3081
3082 list_for_each_entry(field, head, link) {
3083 const char *str;
3084 bool good;
3085
3086 if (!field->needs_test)
3087 continue;
3088
3089 str = *(const char **)(ptr + field->offset);
3090
3091 good = trace_safe_str(iter, str);
3092
3093 /*
3094 * If you hit this warning, it is likely that the
3095 * trace event in question used %s on a string that
3096 * was saved at the time of the event, but may not be
3097 * around when the trace is read. Use __string(),
3098 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3099 * instead. See samples/trace_events/trace-events-sample.h
3100 * for reference.
3101 */
3102 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3103 trace_event_name(event), field->name)) {
3104 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3105 trace_event_name(event), field->name);
3106 return true;
3107 }
3108 }
3109 return false;
3110 }
3111
trace_event_format(struct trace_iterator * iter,const char * fmt)3112 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3113 {
3114 const char *p, *new_fmt;
3115 char *q;
3116
3117 if (WARN_ON_ONCE(!fmt))
3118 return fmt;
3119
3120 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
3121 return fmt;
3122
3123 p = fmt;
3124 new_fmt = q = iter->fmt;
3125 while (*p) {
3126 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3127 if (!trace_iter_expand_format(iter))
3128 return fmt;
3129
3130 q += iter->fmt - new_fmt;
3131 new_fmt = iter->fmt;
3132 }
3133
3134 *q++ = *p++;
3135
3136 /* Replace %p with %px */
3137 if (p[-1] == '%') {
3138 if (p[0] == '%') {
3139 *q++ = *p++;
3140 } else if (p[0] == 'p' && !isalnum(p[1])) {
3141 *q++ = *p++;
3142 *q++ = 'x';
3143 }
3144 }
3145 }
3146 *q = '\0';
3147
3148 return new_fmt;
3149 }
3150
3151 #define STATIC_TEMP_BUF_SIZE 128
3152 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3153
3154 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3155 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3156 int *ent_cpu, u64 *ent_ts)
3157 {
3158 /* __find_next_entry will reset ent_size */
3159 int ent_size = iter->ent_size;
3160 struct trace_entry *entry;
3161
3162 /*
3163 * If called from ftrace_dump(), then the iter->temp buffer
3164 * will be the static_temp_buf and not created from kmalloc.
3165 * If the entry size is greater than the buffer, we can
3166 * not save it. Just return NULL in that case. This is only
3167 * used to add markers when two consecutive events' time
3168 * stamps have a large delta. See trace_print_lat_context()
3169 */
3170 if (iter->temp == static_temp_buf &&
3171 STATIC_TEMP_BUF_SIZE < ent_size)
3172 return NULL;
3173
3174 /*
3175 * The __find_next_entry() may call peek_next_entry(), which may
3176 * call ring_buffer_peek() that may make the contents of iter->ent
3177 * undefined. Need to copy iter->ent now.
3178 */
3179 if (iter->ent && iter->ent != iter->temp) {
3180 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3181 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3182 void *temp;
3183 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3184 if (!temp)
3185 return NULL;
3186 kfree(iter->temp);
3187 iter->temp = temp;
3188 iter->temp_size = iter->ent_size;
3189 }
3190 memcpy(iter->temp, iter->ent, iter->ent_size);
3191 iter->ent = iter->temp;
3192 }
3193 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3194 /* Put back the original ent_size */
3195 iter->ent_size = ent_size;
3196
3197 return entry;
3198 }
3199
3200 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3201 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3202 {
3203 iter->ent = __find_next_entry(iter, &iter->cpu,
3204 &iter->lost_events, &iter->ts);
3205
3206 if (iter->ent)
3207 trace_iterator_increment(iter);
3208
3209 return iter->ent ? iter : NULL;
3210 }
3211
trace_consume(struct trace_iterator * iter)3212 static void trace_consume(struct trace_iterator *iter)
3213 {
3214 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3215 &iter->lost_events);
3216 }
3217
s_next(struct seq_file * m,void * v,loff_t * pos)3218 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3219 {
3220 struct trace_iterator *iter = m->private;
3221 int i = (int)*pos;
3222 void *ent;
3223
3224 WARN_ON_ONCE(iter->leftover);
3225
3226 (*pos)++;
3227
3228 /* can't go backwards */
3229 if (iter->idx > i)
3230 return NULL;
3231
3232 if (iter->idx < 0)
3233 ent = trace_find_next_entry_inc(iter);
3234 else
3235 ent = iter;
3236
3237 while (ent && iter->idx < i)
3238 ent = trace_find_next_entry_inc(iter);
3239
3240 iter->pos = *pos;
3241
3242 return ent;
3243 }
3244
tracing_iter_reset(struct trace_iterator * iter,int cpu)3245 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3246 {
3247 struct ring_buffer_iter *buf_iter;
3248 unsigned long entries = 0;
3249 u64 ts;
3250
3251 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3252
3253 buf_iter = trace_buffer_iter(iter, cpu);
3254 if (!buf_iter)
3255 return;
3256
3257 ring_buffer_iter_reset(buf_iter);
3258
3259 /*
3260 * We could have the case with the max latency tracers
3261 * that a reset never took place on a cpu. This is evident
3262 * by the timestamp being before the start of the buffer.
3263 */
3264 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3265 if (ts >= iter->array_buffer->time_start)
3266 break;
3267 entries++;
3268 ring_buffer_iter_advance(buf_iter);
3269 /* This could be a big loop */
3270 cond_resched();
3271 }
3272
3273 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3274 }
3275
3276 /*
3277 * The current tracer is copied to avoid a global locking
3278 * all around.
3279 */
s_start(struct seq_file * m,loff_t * pos)3280 static void *s_start(struct seq_file *m, loff_t *pos)
3281 {
3282 struct trace_iterator *iter = m->private;
3283 struct trace_array *tr = iter->tr;
3284 int cpu_file = iter->cpu_file;
3285 void *p = NULL;
3286 loff_t l = 0;
3287 int cpu;
3288
3289 mutex_lock(&trace_types_lock);
3290 if (unlikely(tr->current_trace != iter->trace)) {
3291 /* Close iter->trace before switching to the new current tracer */
3292 if (iter->trace->close)
3293 iter->trace->close(iter);
3294 iter->trace = tr->current_trace;
3295 /* Reopen the new current tracer */
3296 if (iter->trace->open)
3297 iter->trace->open(iter);
3298 }
3299 mutex_unlock(&trace_types_lock);
3300
3301 if (iter->snapshot && tracer_uses_snapshot(iter->trace))
3302 return ERR_PTR(-EBUSY);
3303
3304 if (*pos != iter->pos) {
3305 iter->ent = NULL;
3306 iter->cpu = 0;
3307 iter->idx = -1;
3308
3309 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3310 for_each_tracing_cpu(cpu)
3311 tracing_iter_reset(iter, cpu);
3312 } else
3313 tracing_iter_reset(iter, cpu_file);
3314
3315 iter->leftover = 0;
3316 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3317 ;
3318
3319 } else {
3320 /*
3321 * If we overflowed the seq_file before, then we want
3322 * to just reuse the trace_seq buffer again.
3323 */
3324 if (iter->leftover)
3325 p = iter;
3326 else {
3327 l = *pos - 1;
3328 p = s_next(m, p, &l);
3329 }
3330 }
3331
3332 trace_event_read_lock();
3333 trace_access_lock(cpu_file);
3334 return p;
3335 }
3336
s_stop(struct seq_file * m,void * p)3337 static void s_stop(struct seq_file *m, void *p)
3338 {
3339 struct trace_iterator *iter = m->private;
3340
3341 if (iter->snapshot && tracer_uses_snapshot(iter->trace))
3342 return;
3343
3344 trace_access_unlock(iter->cpu_file);
3345 trace_event_read_unlock();
3346 }
3347
3348 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3349 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3350 unsigned long *entries, int cpu)
3351 {
3352 unsigned long count;
3353
3354 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3355 /*
3356 * If this buffer has skipped entries, then we hold all
3357 * entries for the trace and we need to ignore the
3358 * ones before the time stamp.
3359 */
3360 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3361 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3362 /* total is the same as the entries */
3363 *total = count;
3364 } else
3365 *total = count +
3366 ring_buffer_overrun_cpu(buf->buffer, cpu);
3367 *entries = count;
3368 }
3369
3370 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)3371 get_total_entries(struct array_buffer *buf,
3372 unsigned long *total, unsigned long *entries)
3373 {
3374 unsigned long t, e;
3375 int cpu;
3376
3377 *total = 0;
3378 *entries = 0;
3379
3380 for_each_tracing_cpu(cpu) {
3381 get_total_entries_cpu(buf, &t, &e, cpu);
3382 *total += t;
3383 *entries += e;
3384 }
3385 }
3386
trace_total_entries_cpu(struct trace_array * tr,int cpu)3387 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3388 {
3389 unsigned long total, entries;
3390
3391 if (!tr)
3392 tr = &global_trace;
3393
3394 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3395
3396 return entries;
3397 }
3398
trace_total_entries(struct trace_array * tr)3399 unsigned long trace_total_entries(struct trace_array *tr)
3400 {
3401 unsigned long total, entries;
3402
3403 if (!tr)
3404 tr = &global_trace;
3405
3406 get_total_entries(&tr->array_buffer, &total, &entries);
3407
3408 return entries;
3409 }
3410
print_lat_help_header(struct seq_file * m)3411 static void print_lat_help_header(struct seq_file *m)
3412 {
3413 seq_puts(m, "# _------=> CPU# \n"
3414 "# / _-----=> irqs-off/BH-disabled\n"
3415 "# | / _----=> need-resched \n"
3416 "# || / _---=> hardirq/softirq \n"
3417 "# ||| / _--=> preempt-depth \n"
3418 "# |||| / _-=> migrate-disable \n"
3419 "# ||||| / delay \n"
3420 "# cmd pid |||||| time | caller \n"
3421 "# \\ / |||||| \\ | / \n");
3422 }
3423
print_event_info(struct array_buffer * buf,struct seq_file * m)3424 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3425 {
3426 unsigned long total;
3427 unsigned long entries;
3428
3429 get_total_entries(buf, &total, &entries);
3430 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3431 entries, total, num_online_cpus());
3432 seq_puts(m, "#\n");
3433 }
3434
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3435 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3436 unsigned int flags)
3437 {
3438 bool tgid = flags & TRACE_ITER(RECORD_TGID);
3439
3440 print_event_info(buf, m);
3441
3442 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3443 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3444 }
3445
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3446 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3447 unsigned int flags)
3448 {
3449 bool tgid = flags & TRACE_ITER(RECORD_TGID);
3450 static const char space[] = " ";
3451 int prec = tgid ? 12 : 2;
3452
3453 print_event_info(buf, m);
3454
3455 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
3456 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3457 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3458 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3459 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
3460 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
3461 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3462 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
3463 }
3464
3465 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3466 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3467 {
3468 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3469 struct array_buffer *buf = iter->array_buffer;
3470 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3471 struct tracer *type = iter->trace;
3472 unsigned long entries;
3473 unsigned long total;
3474 const char *name = type->name;
3475
3476 get_total_entries(buf, &total, &entries);
3477
3478 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3479 name, init_utsname()->release);
3480 seq_puts(m, "# -----------------------------------"
3481 "---------------------------------\n");
3482 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3483 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3484 nsecs_to_usecs(data->saved_latency),
3485 entries,
3486 total,
3487 buf->cpu,
3488 preempt_model_str(),
3489 /* These are reserved for later use */
3490 0, 0, 0, 0);
3491 #ifdef CONFIG_SMP
3492 seq_printf(m, " #P:%d)\n", num_online_cpus());
3493 #else
3494 seq_puts(m, ")\n");
3495 #endif
3496 seq_puts(m, "# -----------------\n");
3497 seq_printf(m, "# | task: %.16s-%d "
3498 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3499 data->comm, data->pid,
3500 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3501 data->policy, data->rt_priority);
3502 seq_puts(m, "# -----------------\n");
3503
3504 if (data->critical_start) {
3505 seq_puts(m, "# => started at: ");
3506 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3507 trace_print_seq(m, &iter->seq);
3508 seq_puts(m, "\n# => ended at: ");
3509 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3510 trace_print_seq(m, &iter->seq);
3511 seq_puts(m, "\n#\n");
3512 }
3513
3514 seq_puts(m, "#\n");
3515 }
3516
test_cpu_buff_start(struct trace_iterator * iter)3517 static void test_cpu_buff_start(struct trace_iterator *iter)
3518 {
3519 struct trace_seq *s = &iter->seq;
3520 struct trace_array *tr = iter->tr;
3521
3522 if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
3523 return;
3524
3525 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3526 return;
3527
3528 if (cpumask_available(iter->started) &&
3529 cpumask_test_cpu(iter->cpu, iter->started))
3530 return;
3531
3532 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3533 return;
3534
3535 if (cpumask_available(iter->started))
3536 cpumask_set_cpu(iter->cpu, iter->started);
3537
3538 /* Don't print started cpu buffer for the first entry of the trace */
3539 if (iter->idx > 1)
3540 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3541 iter->cpu);
3542 }
3543
3544 #ifdef CONFIG_FTRACE_SYSCALLS
is_syscall_event(struct trace_event * event)3545 static bool is_syscall_event(struct trace_event *event)
3546 {
3547 return (event->funcs == &enter_syscall_print_funcs) ||
3548 (event->funcs == &exit_syscall_print_funcs);
3549
3550 }
3551 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
3552 #else
is_syscall_event(struct trace_event * event)3553 static inline bool is_syscall_event(struct trace_event *event)
3554 {
3555 return false;
3556 }
3557 #define syscall_buf_size 0
3558 #endif /* CONFIG_FTRACE_SYSCALLS */
3559
print_trace_fmt(struct trace_iterator * iter)3560 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3561 {
3562 struct trace_array *tr = iter->tr;
3563 struct trace_seq *s = &iter->seq;
3564 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3565 struct trace_entry *entry;
3566 struct trace_event *event;
3567
3568 entry = iter->ent;
3569
3570 test_cpu_buff_start(iter);
3571
3572 event = ftrace_find_event(entry->type);
3573
3574 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3575 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3576 trace_print_lat_context(iter);
3577 else
3578 trace_print_context(iter);
3579 }
3580
3581 if (trace_seq_has_overflowed(s))
3582 return TRACE_TYPE_PARTIAL_LINE;
3583
3584 if (event) {
3585 if (tr->trace_flags & TRACE_ITER(FIELDS))
3586 return print_event_fields(iter, event);
3587 /*
3588 * For TRACE_EVENT() events, the print_fmt is not
3589 * safe to use if the array has delta offsets
3590 * Force printing via the fields.
3591 */
3592 if ((tr->text_delta)) {
3593 /* ftrace and system call events are still OK */
3594 if ((event->type > __TRACE_LAST_TYPE) &&
3595 !is_syscall_event(event))
3596 return print_event_fields(iter, event);
3597 }
3598 return event->funcs->trace(iter, sym_flags, event);
3599 }
3600
3601 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3602
3603 return trace_handle_return(s);
3604 }
3605
print_raw_fmt(struct trace_iterator * iter)3606 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3607 {
3608 struct trace_array *tr = iter->tr;
3609 struct trace_seq *s = &iter->seq;
3610 struct trace_entry *entry;
3611 struct trace_event *event;
3612
3613 entry = iter->ent;
3614
3615 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
3616 trace_seq_printf(s, "%d %d %llu ",
3617 entry->pid, iter->cpu, iter->ts);
3618
3619 if (trace_seq_has_overflowed(s))
3620 return TRACE_TYPE_PARTIAL_LINE;
3621
3622 event = ftrace_find_event(entry->type);
3623 if (event)
3624 return event->funcs->raw(iter, 0, event);
3625
3626 trace_seq_printf(s, "%d ?\n", entry->type);
3627
3628 return trace_handle_return(s);
3629 }
3630
print_hex_fmt(struct trace_iterator * iter)3631 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3632 {
3633 struct trace_array *tr = iter->tr;
3634 struct trace_seq *s = &iter->seq;
3635 unsigned char newline = '\n';
3636 struct trace_entry *entry;
3637 struct trace_event *event;
3638
3639 entry = iter->ent;
3640
3641 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3642 SEQ_PUT_HEX_FIELD(s, entry->pid);
3643 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3644 SEQ_PUT_HEX_FIELD(s, iter->ts);
3645 if (trace_seq_has_overflowed(s))
3646 return TRACE_TYPE_PARTIAL_LINE;
3647 }
3648
3649 event = ftrace_find_event(entry->type);
3650 if (event) {
3651 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3652 if (ret != TRACE_TYPE_HANDLED)
3653 return ret;
3654 }
3655
3656 SEQ_PUT_FIELD(s, newline);
3657
3658 return trace_handle_return(s);
3659 }
3660
print_bin_fmt(struct trace_iterator * iter)3661 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3662 {
3663 struct trace_array *tr = iter->tr;
3664 struct trace_seq *s = &iter->seq;
3665 struct trace_entry *entry;
3666 struct trace_event *event;
3667
3668 entry = iter->ent;
3669
3670 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3671 SEQ_PUT_FIELD(s, entry->pid);
3672 SEQ_PUT_FIELD(s, iter->cpu);
3673 SEQ_PUT_FIELD(s, iter->ts);
3674 if (trace_seq_has_overflowed(s))
3675 return TRACE_TYPE_PARTIAL_LINE;
3676 }
3677
3678 event = ftrace_find_event(entry->type);
3679 return event ? event->funcs->binary(iter, 0, event) :
3680 TRACE_TYPE_HANDLED;
3681 }
3682
trace_empty(struct trace_iterator * iter)3683 int trace_empty(struct trace_iterator *iter)
3684 {
3685 struct ring_buffer_iter *buf_iter;
3686 int cpu;
3687
3688 /* If we are looking at one CPU buffer, only check that one */
3689 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3690 cpu = iter->cpu_file;
3691 buf_iter = trace_buffer_iter(iter, cpu);
3692 if (buf_iter) {
3693 if (!ring_buffer_iter_empty(buf_iter))
3694 return 0;
3695 } else {
3696 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3697 return 0;
3698 }
3699 return 1;
3700 }
3701
3702 for_each_tracing_cpu(cpu) {
3703 buf_iter = trace_buffer_iter(iter, cpu);
3704 if (buf_iter) {
3705 if (!ring_buffer_iter_empty(buf_iter))
3706 return 0;
3707 } else {
3708 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3709 return 0;
3710 }
3711 }
3712
3713 return 1;
3714 }
3715
3716 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3717 enum print_line_t print_trace_line(struct trace_iterator *iter)
3718 {
3719 struct trace_array *tr = iter->tr;
3720 unsigned long trace_flags = tr->trace_flags;
3721 enum print_line_t ret;
3722
3723 if (iter->lost_events) {
3724 if (iter->lost_events == (unsigned long)-1)
3725 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
3726 iter->cpu);
3727 else
3728 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3729 iter->cpu, iter->lost_events);
3730 if (trace_seq_has_overflowed(&iter->seq))
3731 return TRACE_TYPE_PARTIAL_LINE;
3732 }
3733
3734 if (iter->trace && iter->trace->print_line) {
3735 ret = iter->trace->print_line(iter);
3736 if (ret != TRACE_TYPE_UNHANDLED)
3737 return ret;
3738 }
3739
3740 if (iter->ent->type == TRACE_BPUTS &&
3741 trace_flags & TRACE_ITER(PRINTK) &&
3742 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3743 return trace_print_bputs_msg_only(iter);
3744
3745 if (iter->ent->type == TRACE_BPRINT &&
3746 trace_flags & TRACE_ITER(PRINTK) &&
3747 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3748 return trace_print_bprintk_msg_only(iter);
3749
3750 if (iter->ent->type == TRACE_PRINT &&
3751 trace_flags & TRACE_ITER(PRINTK) &&
3752 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3753 return trace_print_printk_msg_only(iter);
3754
3755 if (trace_flags & TRACE_ITER(BIN))
3756 return print_bin_fmt(iter);
3757
3758 if (trace_flags & TRACE_ITER(HEX))
3759 return print_hex_fmt(iter);
3760
3761 if (trace_flags & TRACE_ITER(RAW))
3762 return print_raw_fmt(iter);
3763
3764 return print_trace_fmt(iter);
3765 }
3766
trace_latency_header(struct seq_file * m)3767 void trace_latency_header(struct seq_file *m)
3768 {
3769 struct trace_iterator *iter = m->private;
3770 struct trace_array *tr = iter->tr;
3771
3772 /* print nothing if the buffers are empty */
3773 if (trace_empty(iter))
3774 return;
3775
3776 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3777 print_trace_header(m, iter);
3778
3779 if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
3780 print_lat_help_header(m);
3781 }
3782
trace_default_header(struct seq_file * m)3783 void trace_default_header(struct seq_file *m)
3784 {
3785 struct trace_iterator *iter = m->private;
3786 struct trace_array *tr = iter->tr;
3787 unsigned long trace_flags = tr->trace_flags;
3788
3789 if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
3790 return;
3791
3792 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3793 /* print nothing if the buffers are empty */
3794 if (trace_empty(iter))
3795 return;
3796 print_trace_header(m, iter);
3797 if (!(trace_flags & TRACE_ITER(VERBOSE)))
3798 print_lat_help_header(m);
3799 } else {
3800 if (!(trace_flags & TRACE_ITER(VERBOSE))) {
3801 if (trace_flags & TRACE_ITER(IRQ_INFO))
3802 print_func_help_header_irq(iter->array_buffer,
3803 m, trace_flags);
3804 else
3805 print_func_help_header(iter->array_buffer, m,
3806 trace_flags);
3807 }
3808 }
3809 }
3810
test_ftrace_alive(struct seq_file * m)3811 static void test_ftrace_alive(struct seq_file *m)
3812 {
3813 if (!ftrace_is_dead())
3814 return;
3815 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3816 "# MAY BE MISSING FUNCTION EVENTS\n");
3817 }
3818
3819 #ifdef CONFIG_TRACER_SNAPSHOT
show_snapshot_main_help(struct seq_file * m)3820 static void show_snapshot_main_help(struct seq_file *m)
3821 {
3822 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3823 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3824 "# Takes a snapshot of the main buffer.\n"
3825 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3826 "# (Doesn't have to be '2' works with any number that\n"
3827 "# is not a '0' or '1')\n");
3828 }
3829
show_snapshot_percpu_help(struct seq_file * m)3830 static void show_snapshot_percpu_help(struct seq_file *m)
3831 {
3832 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3833 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3834 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3835 "# Takes a snapshot of the main buffer for this cpu.\n");
3836 #else
3837 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3838 "# Must use main snapshot file to allocate.\n");
3839 #endif
3840 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3841 "# (Doesn't have to be '2' works with any number that\n"
3842 "# is not a '0' or '1')\n");
3843 }
3844
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3845 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3846 {
3847 if (iter->tr->allocated_snapshot)
3848 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3849 else
3850 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3851
3852 seq_puts(m, "# Snapshot commands:\n");
3853 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3854 show_snapshot_main_help(m);
3855 else
3856 show_snapshot_percpu_help(m);
3857 }
3858 #else
3859 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3860 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3861 #endif
3862
s_show(struct seq_file * m,void * v)3863 static int s_show(struct seq_file *m, void *v)
3864 {
3865 struct trace_iterator *iter = v;
3866 int ret;
3867
3868 if (iter->ent == NULL) {
3869 if (iter->tr) {
3870 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3871 seq_puts(m, "#\n");
3872 test_ftrace_alive(m);
3873 }
3874 if (iter->snapshot && trace_empty(iter))
3875 print_snapshot_help(m, iter);
3876 else if (iter->trace && iter->trace->print_header)
3877 iter->trace->print_header(m);
3878 else
3879 trace_default_header(m);
3880
3881 } else if (iter->leftover) {
3882 /*
3883 * If we filled the seq_file buffer earlier, we
3884 * want to just show it now.
3885 */
3886 ret = trace_print_seq(m, &iter->seq);
3887
3888 /* ret should this time be zero, but you never know */
3889 iter->leftover = ret;
3890
3891 } else {
3892 ret = print_trace_line(iter);
3893 if (ret == TRACE_TYPE_PARTIAL_LINE) {
3894 iter->seq.full = 0;
3895 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
3896 }
3897 ret = trace_print_seq(m, &iter->seq);
3898 /*
3899 * If we overflow the seq_file buffer, then it will
3900 * ask us for this data again at start up.
3901 * Use that instead.
3902 * ret is 0 if seq_file write succeeded.
3903 * -1 otherwise.
3904 */
3905 iter->leftover = ret;
3906 }
3907
3908 return 0;
3909 }
3910
3911 /*
3912 * Should be used after trace_array_get(), trace_types_lock
3913 * ensures that i_cdev was already initialized.
3914 */
tracing_get_cpu(struct inode * inode)3915 int tracing_get_cpu(struct inode *inode)
3916 {
3917 if (inode->i_cdev) /* See trace_create_cpu_file() */
3918 return (long)inode->i_cdev - 1;
3919 return RING_BUFFER_ALL_CPUS;
3920 }
3921
3922 static const struct seq_operations tracer_seq_ops = {
3923 .start = s_start,
3924 .next = s_next,
3925 .stop = s_stop,
3926 .show = s_show,
3927 };
3928
3929 /*
3930 * Note, as iter itself can be allocated and freed in different
3931 * ways, this function is only used to free its content, and not
3932 * the iterator itself. The only requirement to all the allocations
3933 * is that it must zero all fields (kzalloc), as freeing works with
3934 * ethier allocated content or NULL.
3935 */
free_trace_iter_content(struct trace_iterator * iter)3936 static void free_trace_iter_content(struct trace_iterator *iter)
3937 {
3938 /* The fmt is either NULL, allocated or points to static_fmt_buf */
3939 if (iter->fmt != static_fmt_buf)
3940 kfree(iter->fmt);
3941
3942 kfree(iter->temp);
3943 kfree(iter->buffer_iter);
3944 mutex_destroy(&iter->mutex);
3945 free_cpumask_var(iter->started);
3946 }
3947
3948 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)3949 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3950 {
3951 struct trace_array *tr = inode->i_private;
3952 struct trace_iterator *iter;
3953 int cpu;
3954
3955 if (tracing_disabled)
3956 return ERR_PTR(-ENODEV);
3957
3958 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3959 if (!iter)
3960 return ERR_PTR(-ENOMEM);
3961
3962 iter->buffer_iter = kzalloc_objs(*iter->buffer_iter, nr_cpu_ids);
3963 if (!iter->buffer_iter)
3964 goto release;
3965
3966 /*
3967 * trace_find_next_entry() may need to save off iter->ent.
3968 * It will place it into the iter->temp buffer. As most
3969 * events are less than 128, allocate a buffer of that size.
3970 * If one is greater, then trace_find_next_entry() will
3971 * allocate a new buffer to adjust for the bigger iter->ent.
3972 * It's not critical if it fails to get allocated here.
3973 */
3974 iter->temp = kmalloc(128, GFP_KERNEL);
3975 if (iter->temp)
3976 iter->temp_size = 128;
3977
3978 /*
3979 * trace_event_printf() may need to modify given format
3980 * string to replace %p with %px so that it shows real address
3981 * instead of hash value. However, that is only for the event
3982 * tracing, other tracer may not need. Defer the allocation
3983 * until it is needed.
3984 */
3985 iter->fmt = NULL;
3986 iter->fmt_size = 0;
3987
3988 mutex_lock(&trace_types_lock);
3989 iter->trace = tr->current_trace;
3990
3991 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3992 goto fail;
3993
3994 iter->tr = tr;
3995
3996 #ifdef CONFIG_TRACER_SNAPSHOT
3997 /* Currently only the top directory has a snapshot */
3998 if (tr->current_trace->print_max || snapshot)
3999 iter->array_buffer = &tr->snapshot_buffer;
4000 else
4001 #endif
4002 iter->array_buffer = &tr->array_buffer;
4003 iter->snapshot = snapshot;
4004 iter->pos = -1;
4005 iter->cpu_file = tracing_get_cpu(inode);
4006 mutex_init(&iter->mutex);
4007
4008 /* Notify the tracer early; before we stop tracing. */
4009 if (iter->trace->open)
4010 iter->trace->open(iter);
4011
4012 /* Annotate start of buffers if we had overruns */
4013 if (ring_buffer_overruns(iter->array_buffer->buffer))
4014 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4015
4016 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4017 if (trace_clocks[tr->clock_id].in_ns)
4018 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4019
4020 /*
4021 * If pause-on-trace is enabled, then stop the trace while
4022 * dumping, unless this is the "snapshot" file
4023 */
4024 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
4025 iter->iter_flags |= TRACE_FILE_PAUSE;
4026 tracing_stop_tr(tr);
4027 }
4028
4029 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4030 for_each_tracing_cpu(cpu) {
4031 iter->buffer_iter[cpu] =
4032 ring_buffer_read_start(iter->array_buffer->buffer,
4033 cpu, GFP_KERNEL);
4034 tracing_iter_reset(iter, cpu);
4035 }
4036 } else {
4037 cpu = iter->cpu_file;
4038 iter->buffer_iter[cpu] =
4039 ring_buffer_read_start(iter->array_buffer->buffer,
4040 cpu, GFP_KERNEL);
4041 tracing_iter_reset(iter, cpu);
4042 }
4043
4044 mutex_unlock(&trace_types_lock);
4045
4046 return iter;
4047
4048 fail:
4049 mutex_unlock(&trace_types_lock);
4050 free_trace_iter_content(iter);
4051 release:
4052 seq_release_private(inode, file);
4053 return ERR_PTR(-ENOMEM);
4054 }
4055
tracing_open_generic(struct inode * inode,struct file * filp)4056 int tracing_open_generic(struct inode *inode, struct file *filp)
4057 {
4058 int ret;
4059
4060 ret = tracing_check_open_get_tr(NULL);
4061 if (ret)
4062 return ret;
4063
4064 filp->private_data = inode->i_private;
4065 return 0;
4066 }
4067
4068 /*
4069 * Open and update trace_array ref count.
4070 * Must have the current trace_array passed to it.
4071 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4072 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4073 {
4074 struct trace_array *tr = inode->i_private;
4075 int ret;
4076
4077 ret = tracing_check_open_get_tr(tr);
4078 if (ret)
4079 return ret;
4080
4081 if ((filp->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) {
4082 trace_array_put(tr);
4083 return -EACCES;
4084 }
4085
4086 filp->private_data = inode->i_private;
4087
4088 return 0;
4089 }
4090
4091 /*
4092 * The private pointer of the inode is the trace_event_file.
4093 * Update the tr ref count associated to it.
4094 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4095 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4096 {
4097 struct trace_event_file *file = inode->i_private;
4098 int ret;
4099
4100 ret = tracing_check_open_get_tr(file->tr);
4101 if (ret)
4102 return ret;
4103
4104 guard(mutex)(&event_mutex);
4105
4106 /* Fail if the file is marked for removal */
4107 if (file->flags & EVENT_FILE_FL_FREED) {
4108 trace_array_put(file->tr);
4109 return -ENODEV;
4110 } else {
4111 event_file_get(file);
4112 }
4113
4114 filp->private_data = inode->i_private;
4115
4116 return 0;
4117 }
4118
tracing_release_file_tr(struct inode * inode,struct file * filp)4119 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4120 {
4121 struct trace_event_file *file = inode->i_private;
4122
4123 trace_array_put(file->tr);
4124 event_file_put(file);
4125
4126 return 0;
4127 }
4128
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4129 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4130 {
4131 tracing_release_file_tr(inode, filp);
4132 return single_release(inode, filp);
4133 }
4134
tracing_release(struct inode * inode,struct file * file)4135 static int tracing_release(struct inode *inode, struct file *file)
4136 {
4137 struct trace_array *tr = inode->i_private;
4138 struct seq_file *m = file->private_data;
4139 struct trace_iterator *iter;
4140 int cpu;
4141
4142 if (!(file->f_mode & FMODE_READ)) {
4143 trace_array_put(tr);
4144 return 0;
4145 }
4146
4147 /* Writes do not use seq_file */
4148 iter = m->private;
4149 mutex_lock(&trace_types_lock);
4150
4151 for_each_tracing_cpu(cpu) {
4152 if (iter->buffer_iter[cpu])
4153 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4154 }
4155
4156 if (iter->trace && iter->trace->close)
4157 iter->trace->close(iter);
4158
4159 if (iter->iter_flags & TRACE_FILE_PAUSE)
4160 /* reenable tracing if it was previously enabled */
4161 tracing_start_tr(tr);
4162
4163 __trace_array_put(tr);
4164
4165 mutex_unlock(&trace_types_lock);
4166
4167 free_trace_iter_content(iter);
4168 seq_release_private(inode, file);
4169
4170 return 0;
4171 }
4172
tracing_release_generic_tr(struct inode * inode,struct file * file)4173 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4174 {
4175 struct trace_array *tr = inode->i_private;
4176
4177 trace_array_put(tr);
4178 return 0;
4179 }
4180
tracing_single_release_tr(struct inode * inode,struct file * file)4181 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4182 {
4183 struct trace_array *tr = inode->i_private;
4184
4185 trace_array_put(tr);
4186
4187 return single_release(inode, file);
4188 }
4189
4190 static bool update_last_data_if_empty(struct trace_array *tr);
4191
tracing_open(struct inode * inode,struct file * file)4192 static int tracing_open(struct inode *inode, struct file *file)
4193 {
4194 struct trace_array *tr = inode->i_private;
4195 struct trace_iterator *iter;
4196 int ret;
4197
4198 ret = tracing_check_open_get_tr(tr);
4199 if (ret)
4200 return ret;
4201
4202 /* If this file was open for write, then erase contents */
4203 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4204 int cpu = tracing_get_cpu(inode);
4205 struct array_buffer *trace_buf = &tr->array_buffer;
4206
4207 #ifdef CONFIG_TRACER_MAX_TRACE
4208 if (tr->current_trace->print_max)
4209 trace_buf = &tr->snapshot_buffer;
4210 #endif
4211
4212 if (cpu == RING_BUFFER_ALL_CPUS)
4213 tracing_reset_online_cpus(trace_buf);
4214 else
4215 tracing_reset_cpu(trace_buf, cpu);
4216
4217 update_last_data_if_empty(tr);
4218 }
4219
4220 if (file->f_mode & FMODE_READ) {
4221 iter = __tracing_open(inode, file, false);
4222 if (IS_ERR(iter))
4223 ret = PTR_ERR(iter);
4224 else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
4225 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4226 }
4227
4228 if (ret < 0)
4229 trace_array_put(tr);
4230
4231 return ret;
4232 }
4233
4234 /*
4235 * Some tracers are not suitable for instance buffers.
4236 * A tracer is always available for the global array (toplevel)
4237 * or if it explicitly states that it is.
4238 */
4239 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4240 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4241 {
4242 /* arrays with mapped buffer range do not have snapshots */
4243 if (tr->range_addr_start && tracer_uses_snapshot(t))
4244 return false;
4245 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4246 }
4247
4248 /* Find the next tracer that this trace array may use */
4249 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4250 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4251 {
4252 while (t && !trace_ok_for_array(t, tr))
4253 t = t->next;
4254
4255 return t;
4256 }
4257
4258 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4259 t_next(struct seq_file *m, void *v, loff_t *pos)
4260 {
4261 struct trace_array *tr = m->private;
4262 struct tracer *t = v;
4263
4264 (*pos)++;
4265
4266 if (t)
4267 t = get_tracer_for_array(tr, t->next);
4268
4269 return t;
4270 }
4271
t_start(struct seq_file * m,loff_t * pos)4272 static void *t_start(struct seq_file *m, loff_t *pos)
4273 {
4274 struct trace_array *tr = m->private;
4275 struct tracer *t;
4276 loff_t l = 0;
4277
4278 mutex_lock(&trace_types_lock);
4279
4280 t = get_tracer_for_array(tr, trace_types);
4281 for (; t && l < *pos; t = t_next(m, t, &l))
4282 ;
4283
4284 return t;
4285 }
4286
t_stop(struct seq_file * m,void * p)4287 static void t_stop(struct seq_file *m, void *p)
4288 {
4289 mutex_unlock(&trace_types_lock);
4290 }
4291
t_show(struct seq_file * m,void * v)4292 static int t_show(struct seq_file *m, void *v)
4293 {
4294 struct tracer *t = v;
4295
4296 if (!t)
4297 return 0;
4298
4299 seq_puts(m, t->name);
4300 if (t->next)
4301 seq_putc(m, ' ');
4302 else
4303 seq_putc(m, '\n');
4304
4305 return 0;
4306 }
4307
4308 static const struct seq_operations show_traces_seq_ops = {
4309 .start = t_start,
4310 .next = t_next,
4311 .stop = t_stop,
4312 .show = t_show,
4313 };
4314
show_traces_open(struct inode * inode,struct file * file)4315 static int show_traces_open(struct inode *inode, struct file *file)
4316 {
4317 struct trace_array *tr = inode->i_private;
4318 struct seq_file *m;
4319 int ret;
4320
4321 ret = tracing_check_open_get_tr(tr);
4322 if (ret)
4323 return ret;
4324
4325 ret = seq_open(file, &show_traces_seq_ops);
4326 if (ret) {
4327 trace_array_put(tr);
4328 return ret;
4329 }
4330
4331 m = file->private_data;
4332 m->private = tr;
4333
4334 return 0;
4335 }
4336
tracing_seq_release(struct inode * inode,struct file * file)4337 static int tracing_seq_release(struct inode *inode, struct file *file)
4338 {
4339 struct trace_array *tr = inode->i_private;
4340
4341 trace_array_put(tr);
4342 return seq_release(inode, file);
4343 }
4344
4345 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4346 tracing_write_stub(struct file *filp, const char __user *ubuf,
4347 size_t count, loff_t *ppos)
4348 {
4349 return count;
4350 }
4351
tracing_lseek(struct file * file,loff_t offset,int whence)4352 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4353 {
4354 int ret;
4355
4356 if (file->f_mode & FMODE_READ)
4357 ret = seq_lseek(file, offset, whence);
4358 else
4359 file->f_pos = ret = 0;
4360
4361 return ret;
4362 }
4363
4364 static const struct file_operations tracing_fops = {
4365 .open = tracing_open,
4366 .read = seq_read,
4367 .read_iter = seq_read_iter,
4368 .splice_read = copy_splice_read,
4369 .write = tracing_write_stub,
4370 .llseek = tracing_lseek,
4371 .release = tracing_release,
4372 };
4373
4374 static const struct file_operations show_traces_fops = {
4375 .open = show_traces_open,
4376 .read = seq_read,
4377 .llseek = seq_lseek,
4378 .release = tracing_seq_release,
4379 };
4380
4381 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4382 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4383 size_t count, loff_t *ppos)
4384 {
4385 struct trace_array *tr = file_inode(filp)->i_private;
4386 char *mask_str __free(kfree) = NULL;
4387 int len;
4388
4389 len = snprintf(NULL, 0, "%*pb\n",
4390 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4391 mask_str = kmalloc(len, GFP_KERNEL);
4392 if (!mask_str)
4393 return -ENOMEM;
4394
4395 len = snprintf(mask_str, len, "%*pb\n",
4396 cpumask_pr_args(tr->tracing_cpumask));
4397 if (len >= count)
4398 return -EINVAL;
4399
4400 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4401 }
4402
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)4403 int tracing_set_cpumask(struct trace_array *tr,
4404 cpumask_var_t tracing_cpumask_new)
4405 {
4406 int cpu;
4407
4408 if (!tr)
4409 return -EINVAL;
4410
4411 local_irq_disable();
4412 arch_spin_lock(&tr->max_lock);
4413 for_each_tracing_cpu(cpu) {
4414 /*
4415 * Increase/decrease the disabled counter if we are
4416 * about to flip a bit in the cpumask:
4417 */
4418 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4419 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4420 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4421 #ifdef CONFIG_TRACER_SNAPSHOT
4422 ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu);
4423 #endif
4424 }
4425 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4426 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4427 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4428 #ifdef CONFIG_TRACER_SNAPSHOT
4429 ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu);
4430 #endif
4431 }
4432 }
4433 arch_spin_unlock(&tr->max_lock);
4434 local_irq_enable();
4435
4436 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4437
4438 return 0;
4439 }
4440
4441 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4442 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4443 size_t count, loff_t *ppos)
4444 {
4445 struct trace_array *tr = file_inode(filp)->i_private;
4446 cpumask_var_t tracing_cpumask_new;
4447 int err;
4448
4449 if (count == 0 || count > KMALLOC_MAX_SIZE)
4450 return -EINVAL;
4451
4452 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4453 return -ENOMEM;
4454
4455 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4456 if (err)
4457 goto err_free;
4458
4459 err = tracing_set_cpumask(tr, tracing_cpumask_new);
4460 if (err)
4461 goto err_free;
4462
4463 free_cpumask_var(tracing_cpumask_new);
4464
4465 return count;
4466
4467 err_free:
4468 free_cpumask_var(tracing_cpumask_new);
4469
4470 return err;
4471 }
4472
4473 static const struct file_operations tracing_cpumask_fops = {
4474 .open = tracing_open_generic_tr,
4475 .read = tracing_cpumask_read,
4476 .write = tracing_cpumask_write,
4477 .release = tracing_release_generic_tr,
4478 .llseek = generic_file_llseek,
4479 };
4480
tracing_trace_options_show(struct seq_file * m,void * v)4481 static int tracing_trace_options_show(struct seq_file *m, void *v)
4482 {
4483 struct tracer_opt *trace_opts;
4484 struct trace_array *tr = m->private;
4485 struct tracer_flags *flags;
4486 u32 tracer_flags;
4487 int i;
4488
4489 guard(mutex)(&trace_types_lock);
4490
4491 for (i = 0; trace_options[i]; i++) {
4492 if (tr->trace_flags & (1ULL << i))
4493 seq_printf(m, "%s\n", trace_options[i]);
4494 else
4495 seq_printf(m, "no%s\n", trace_options[i]);
4496 }
4497
4498 flags = tr->current_trace_flags;
4499 if (!flags || !flags->opts)
4500 return 0;
4501
4502 tracer_flags = flags->val;
4503 trace_opts = flags->opts;
4504
4505 for (i = 0; trace_opts[i].name; i++) {
4506 if (tracer_flags & trace_opts[i].bit)
4507 seq_printf(m, "%s\n", trace_opts[i].name);
4508 else
4509 seq_printf(m, "no%s\n", trace_opts[i].name);
4510 }
4511
4512 return 0;
4513 }
4514
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4515 static int __set_tracer_option(struct trace_array *tr,
4516 struct tracer_flags *tracer_flags,
4517 struct tracer_opt *opts, int neg)
4518 {
4519 struct tracer *trace = tracer_flags->trace;
4520 int ret = 0;
4521
4522 if (trace->set_flag)
4523 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4524 if (ret)
4525 return ret;
4526
4527 if (neg)
4528 tracer_flags->val &= ~opts->bit;
4529 else
4530 tracer_flags->val |= opts->bit;
4531 return 0;
4532 }
4533
4534 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4535 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4536 {
4537 struct tracer_flags *tracer_flags = tr->current_trace_flags;
4538 struct tracer_opt *opts = NULL;
4539 int i;
4540
4541 if (!tracer_flags || !tracer_flags->opts)
4542 return 0;
4543
4544 for (i = 0; tracer_flags->opts[i].name; i++) {
4545 opts = &tracer_flags->opts[i];
4546
4547 if (strcmp(cmp, opts->name) == 0)
4548 return __set_tracer_option(tr, tracer_flags, opts, neg);
4549 }
4550
4551 return -EINVAL;
4552 }
4553
4554 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u64 mask,int set)4555 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
4556 {
4557 if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
4558 return -1;
4559
4560 return 0;
4561 }
4562
set_tracer_flag(struct trace_array * tr,u64 mask,int enabled)4563 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
4564 {
4565 switch (mask) {
4566 case TRACE_ITER(RECORD_TGID):
4567 case TRACE_ITER(RECORD_CMD):
4568 case TRACE_ITER(TRACE_PRINTK):
4569 case TRACE_ITER(COPY_MARKER):
4570 lockdep_assert_held(&event_mutex);
4571 }
4572
4573 /* do nothing if flag is already set */
4574 if (!!(tr->trace_flags & mask) == !!enabled)
4575 return 0;
4576
4577 /* Give the tracer a chance to approve the change */
4578 if (tr->current_trace->flag_changed)
4579 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4580 return -EINVAL;
4581
4582 switch (mask) {
4583 case TRACE_ITER(TRACE_PRINTK):
4584 if (enabled) {
4585 update_printk_trace(tr);
4586 } else {
4587 /*
4588 * The global_trace cannot clear this.
4589 * It's flag only gets cleared if another instance sets it.
4590 */
4591 if (printk_trace == &global_trace)
4592 return -EINVAL;
4593 /*
4594 * An instance must always have it set.
4595 * by default, that's the global_trace instance.
4596 */
4597 if (printk_trace == tr)
4598 update_printk_trace(&global_trace);
4599 }
4600 break;
4601
4602 case TRACE_ITER(COPY_MARKER):
4603 update_marker_trace(tr, enabled);
4604 /* update_marker_trace updates the tr->trace_flags */
4605 return 0;
4606 }
4607
4608 if (enabled)
4609 tr->trace_flags |= mask;
4610 else
4611 tr->trace_flags &= ~mask;
4612
4613 switch (mask) {
4614 case TRACE_ITER(RECORD_CMD):
4615 trace_event_enable_cmd_record(enabled);
4616 break;
4617
4618 case TRACE_ITER(RECORD_TGID):
4619
4620 if (trace_alloc_tgid_map() < 0) {
4621 tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
4622 return -ENOMEM;
4623 }
4624
4625 trace_event_enable_tgid_record(enabled);
4626 break;
4627
4628 case TRACE_ITER(EVENT_FORK):
4629 trace_event_follow_fork(tr, enabled);
4630 break;
4631
4632 case TRACE_ITER(FUNC_FORK):
4633 ftrace_pid_follow_fork(tr, enabled);
4634 break;
4635
4636 case TRACE_ITER(OVERWRITE):
4637 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4638 #ifdef CONFIG_TRACER_SNAPSHOT
4639 ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled);
4640 #endif
4641 break;
4642
4643 case TRACE_ITER(PRINTK):
4644 trace_printk_start_stop_comm(enabled);
4645 trace_printk_control(enabled);
4646 break;
4647
4648 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
4649 case TRACE_GRAPH_GRAPH_TIME:
4650 ftrace_graph_graph_time_control(enabled);
4651 break;
4652 #endif
4653 }
4654
4655 return 0;
4656 }
4657
trace_set_options(struct trace_array * tr,char * option)4658 int trace_set_options(struct trace_array *tr, char *option)
4659 {
4660 char *cmp;
4661 int neg = 0;
4662 int ret;
4663 size_t orig_len = strlen(option);
4664 int len;
4665
4666 cmp = strstrip(option);
4667
4668 len = str_has_prefix(cmp, "no");
4669 if (len)
4670 neg = 1;
4671
4672 cmp += len;
4673
4674 mutex_lock(&event_mutex);
4675 mutex_lock(&trace_types_lock);
4676
4677 ret = match_string(trace_options, -1, cmp);
4678 /* If no option could be set, test the specific tracer options */
4679 if (ret < 0)
4680 ret = set_tracer_option(tr, cmp, neg);
4681 else
4682 ret = set_tracer_flag(tr, 1ULL << ret, !neg);
4683
4684 mutex_unlock(&trace_types_lock);
4685 mutex_unlock(&event_mutex);
4686
4687 /*
4688 * If the first trailing whitespace is replaced with '\0' by strstrip,
4689 * turn it back into a space.
4690 */
4691 if (orig_len > strlen(option))
4692 option[strlen(option)] = ' ';
4693
4694 return ret;
4695 }
4696
apply_trace_boot_options(void)4697 static void __init apply_trace_boot_options(void)
4698 {
4699 char *buf = trace_boot_options_buf;
4700 char *option;
4701
4702 while (true) {
4703 option = strsep(&buf, ",");
4704
4705 if (!option)
4706 break;
4707
4708 if (*option)
4709 trace_set_options(&global_trace, option);
4710
4711 /* Put back the comma to allow this to be called again */
4712 if (buf)
4713 *(buf - 1) = ',';
4714 }
4715 }
4716
4717 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4718 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4719 size_t cnt, loff_t *ppos)
4720 {
4721 struct seq_file *m = filp->private_data;
4722 struct trace_array *tr = m->private;
4723 char buf[64];
4724 int ret;
4725
4726 if (cnt >= sizeof(buf))
4727 return -EINVAL;
4728
4729 if (copy_from_user(buf, ubuf, cnt))
4730 return -EFAULT;
4731
4732 buf[cnt] = 0;
4733
4734 ret = trace_set_options(tr, buf);
4735 if (ret < 0)
4736 return ret;
4737
4738 *ppos += cnt;
4739
4740 return cnt;
4741 }
4742
tracing_trace_options_open(struct inode * inode,struct file * file)4743 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4744 {
4745 struct trace_array *tr = inode->i_private;
4746 int ret;
4747
4748 ret = tracing_check_open_get_tr(tr);
4749 if (ret)
4750 return ret;
4751
4752 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4753 if (ret < 0)
4754 trace_array_put(tr);
4755
4756 return ret;
4757 }
4758
4759 static const struct file_operations tracing_iter_fops = {
4760 .open = tracing_trace_options_open,
4761 .read = seq_read,
4762 .llseek = seq_lseek,
4763 .release = tracing_single_release_tr,
4764 .write = tracing_trace_options_write,
4765 };
4766
4767 static const char readme_msg[] =
4768 "tracing mini-HOWTO:\n\n"
4769 "By default tracefs removes all OTH file permission bits.\n"
4770 "When mounting tracefs an optional group id can be specified\n"
4771 "which adds the group to every directory and file in tracefs:\n\n"
4772 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
4773 "# echo 0 > tracing_on : quick way to disable tracing\n"
4774 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4775 " Important files:\n"
4776 " trace\t\t\t- The static contents of the buffer\n"
4777 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4778 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4779 " current_tracer\t- function and latency tracers\n"
4780 " available_tracers\t- list of configured tracers for current_tracer\n"
4781 " error_log\t- error log for failed commands (that support it)\n"
4782 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4783 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4784 " trace_clock\t\t- change the clock used to order events\n"
4785 " local: Per cpu clock but may not be synced across CPUs\n"
4786 " global: Synced across CPUs but slows tracing down.\n"
4787 " counter: Not a clock, but just an increment\n"
4788 " uptime: Jiffy counter from time of boot\n"
4789 " perf: Same clock that perf events use\n"
4790 #ifdef CONFIG_X86_64
4791 " x86-tsc: TSC cycle counter\n"
4792 #endif
4793 "\n timestamp_mode\t- view the mode used to timestamp events\n"
4794 " delta: Delta difference against a buffer-wide timestamp\n"
4795 " absolute: Absolute (standalone) timestamp\n"
4796 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4797 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4798 " tracing_cpumask\t- Limit which CPUs to trace\n"
4799 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4800 "\t\t\t Remove sub-buffer with rmdir\n"
4801 " trace_options\t\t- Set format or modify how tracing happens\n"
4802 "\t\t\t Disable an option by prefixing 'no' to the\n"
4803 "\t\t\t option name\n"
4804 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4805 #ifdef CONFIG_DYNAMIC_FTRACE
4806 "\n available_filter_functions - list of functions that can be filtered on\n"
4807 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4808 "\t\t\t functions\n"
4809 "\t accepts: func_full_name or glob-matching-pattern\n"
4810 "\t modules: Can select a group via module\n"
4811 "\t Format: :mod:<module-name>\n"
4812 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4813 "\t triggers: a command to perform when function is hit\n"
4814 "\t Format: <function>:<trigger>[:count]\n"
4815 "\t trigger: traceon, traceoff\n"
4816 "\t\t enable_event:<system>:<event>\n"
4817 "\t\t disable_event:<system>:<event>\n"
4818 #ifdef CONFIG_STACKTRACE
4819 "\t\t stacktrace\n"
4820 #endif
4821 #ifdef CONFIG_TRACER_SNAPSHOT
4822 "\t\t snapshot\n"
4823 #endif
4824 "\t\t dump\n"
4825 "\t\t cpudump\n"
4826 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4827 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4828 "\t The first one will disable tracing every time do_fault is hit\n"
4829 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4830 "\t The first time do trap is hit and it disables tracing, the\n"
4831 "\t counter will decrement to 2. If tracing is already disabled,\n"
4832 "\t the counter will not decrement. It only decrements when the\n"
4833 "\t trigger did work\n"
4834 "\t To remove trigger without count:\n"
4835 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4836 "\t To remove trigger with a count:\n"
4837 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4838 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4839 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4840 "\t modules: Can select a group via module command :mod:\n"
4841 "\t Does not accept triggers\n"
4842 #endif /* CONFIG_DYNAMIC_FTRACE */
4843 #ifdef CONFIG_FUNCTION_TRACER
4844 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4845 "\t\t (function)\n"
4846 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
4847 "\t\t (function)\n"
4848 #endif
4849 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4850 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4851 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4852 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4853 #endif
4854 #ifdef CONFIG_TRACER_SNAPSHOT
4855 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4856 "\t\t\t snapshot buffer. Read the contents for more\n"
4857 "\t\t\t information\n"
4858 #endif
4859 #ifdef CONFIG_STACK_TRACER
4860 " stack_trace\t\t- Shows the max stack trace when active\n"
4861 " stack_max_size\t- Shows current max stack size that was traced\n"
4862 "\t\t\t Write into this file to reset the max size (trigger a\n"
4863 "\t\t\t new trace)\n"
4864 #ifdef CONFIG_DYNAMIC_FTRACE
4865 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4866 "\t\t\t traces\n"
4867 #endif
4868 #endif /* CONFIG_STACK_TRACER */
4869 #ifdef CONFIG_DYNAMIC_EVENTS
4870 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4871 "\t\t\t Write into this file to define/undefine new trace events.\n"
4872 #endif
4873 #ifdef CONFIG_KPROBE_EVENTS
4874 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4875 "\t\t\t Write into this file to define/undefine new trace events.\n"
4876 #endif
4877 #ifdef CONFIG_UPROBE_EVENTS
4878 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4879 "\t\t\t Write into this file to define/undefine new trace events.\n"
4880 #endif
4881 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
4882 defined(CONFIG_FPROBE_EVENTS)
4883 "\t accepts: event-definitions (one definition per line)\n"
4884 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4885 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
4886 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
4887 #endif
4888 #ifdef CONFIG_FPROBE_EVENTS
4889 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
4890 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
4891 #endif
4892 #ifdef CONFIG_HIST_TRIGGERS
4893 "\t s:[synthetic/]<event> <field> [<field>]\n"
4894 #endif
4895 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
4896 "\t -:[<group>/][<event>]\n"
4897 #ifdef CONFIG_KPROBE_EVENTS
4898 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4899 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
4900 #endif
4901 #ifdef CONFIG_UPROBE_EVENTS
4902 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
4903 #endif
4904 "\t args: <name>=fetcharg[:type]\n"
4905 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
4906 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4907 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4908 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
4909 "\t <argname>[->field[->field|.field...]],\n"
4910 #endif
4911 #else
4912 "\t $stack<index>, $stack, $retval, $comm,\n"
4913 #endif
4914 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4915 "\t kernel return probes support: $retval, $arg<N>, $comm\n"
4916 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
4917 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4918 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
4919 #ifdef CONFIG_HIST_TRIGGERS
4920 "\t field: <stype> <name>;\n"
4921 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4922 "\t [unsigned] char/int/long\n"
4923 #endif
4924 "\t efield: For event probes ('e' types), the field is on of the fields\n"
4925 "\t of the <attached-group>/<attached-event>.\n"
4926 #endif
4927 " set_event\t\t- Enables events by name written into it\n"
4928 "\t\t\t Can enable module events via: :mod:<module>\n"
4929 " events/\t\t- Directory containing all trace event subsystems:\n"
4930 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4931 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4932 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4933 "\t\t\t events\n"
4934 " filter\t\t- If set, only events passing filter are traced\n"
4935 " events/<system>/<event>/\t- Directory containing control files for\n"
4936 "\t\t\t <event>:\n"
4937 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4938 " filter\t\t- If set, only events passing filter are traced\n"
4939 " trigger\t\t- If set, a command to perform when event is hit\n"
4940 "\t Format: <trigger>[:count][if <filter>]\n"
4941 "\t trigger: traceon, traceoff\n"
4942 "\t enable_event:<system>:<event>\n"
4943 "\t disable_event:<system>:<event>\n"
4944 #ifdef CONFIG_HIST_TRIGGERS
4945 "\t enable_hist:<system>:<event>\n"
4946 "\t disable_hist:<system>:<event>\n"
4947 #endif
4948 #ifdef CONFIG_STACKTRACE
4949 "\t\t stacktrace\n"
4950 #endif
4951 #ifdef CONFIG_TRACER_SNAPSHOT
4952 "\t\t snapshot\n"
4953 #endif
4954 #ifdef CONFIG_HIST_TRIGGERS
4955 "\t\t hist (see below)\n"
4956 #endif
4957 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4958 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4959 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4960 "\t events/block/block_unplug/trigger\n"
4961 "\t The first disables tracing every time block_unplug is hit.\n"
4962 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4963 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4964 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4965 "\t Like function triggers, the counter is only decremented if it\n"
4966 "\t enabled or disabled tracing.\n"
4967 "\t To remove a trigger without a count:\n"
4968 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4969 "\t To remove a trigger with a count:\n"
4970 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4971 "\t Filters can be ignored when removing a trigger.\n"
4972 #ifdef CONFIG_HIST_TRIGGERS
4973 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4974 "\t Format: hist:keys=<field1[,field2,...]>\n"
4975 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
4976 "\t [:values=<field1[,field2,...]>]\n"
4977 "\t [:sort=<field1[,field2,...]>]\n"
4978 "\t [:size=#entries]\n"
4979 "\t [:pause][:continue][:clear]\n"
4980 "\t [:name=histname1]\n"
4981 "\t [:nohitcount]\n"
4982 "\t [:<handler>.<action>]\n"
4983 "\t [if <filter>]\n\n"
4984 "\t Note, special fields can be used as well:\n"
4985 "\t common_timestamp - to record current timestamp\n"
4986 "\t common_cpu - to record the CPU the event happened on\n"
4987 "\n"
4988 "\t A hist trigger variable can be:\n"
4989 "\t - a reference to a field e.g. x=current_timestamp,\n"
4990 "\t - a reference to another variable e.g. y=$x,\n"
4991 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
4992 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
4993 "\n"
4994 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
4995 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
4996 "\t variable reference, field or numeric literal.\n"
4997 "\n"
4998 "\t When a matching event is hit, an entry is added to a hash\n"
4999 "\t table using the key(s) and value(s) named, and the value of a\n"
5000 "\t sum called 'hitcount' is incremented. Keys and values\n"
5001 "\t correspond to fields in the event's format description. Keys\n"
5002 "\t can be any field, or the special string 'common_stacktrace'.\n"
5003 "\t Compound keys consisting of up to two fields can be specified\n"
5004 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5005 "\t fields. Sort keys consisting of up to two fields can be\n"
5006 "\t specified using the 'sort' keyword. The sort direction can\n"
5007 "\t be modified by appending '.descending' or '.ascending' to a\n"
5008 "\t sort field. The 'size' parameter can be used to specify more\n"
5009 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5010 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5011 "\t its histogram data will be shared with other triggers of the\n"
5012 "\t same name, and trigger hits will update this common data.\n\n"
5013 "\t Reading the 'hist' file for the event will dump the hash\n"
5014 "\t table in its entirety to stdout. If there are multiple hist\n"
5015 "\t triggers attached to an event, there will be a table for each\n"
5016 "\t trigger in the output. The table displayed for a named\n"
5017 "\t trigger will be the same as any other instance having the\n"
5018 "\t same name. The default format used to display a given field\n"
5019 "\t can be modified by appending any of the following modifiers\n"
5020 "\t to the field name, as applicable:\n\n"
5021 "\t .hex display a number as a hex value\n"
5022 "\t .sym display an address as a symbol\n"
5023 "\t .sym-offset display an address as a symbol and offset\n"
5024 "\t .execname display a common_pid as a program name\n"
5025 "\t .syscall display a syscall id as a syscall name\n"
5026 "\t .log2 display log2 value rather than raw number\n"
5027 "\t .buckets=size display values in groups of size rather than raw number\n"
5028 "\t .usecs display a common_timestamp in microseconds\n"
5029 "\t .percent display a number of percentage value\n"
5030 "\t .graph display a bar-graph of a value\n\n"
5031 "\t The 'pause' parameter can be used to pause an existing hist\n"
5032 "\t trigger or to start a hist trigger but not log any events\n"
5033 "\t until told to do so. 'continue' can be used to start or\n"
5034 "\t restart a paused hist trigger.\n\n"
5035 "\t The 'clear' parameter will clear the contents of a running\n"
5036 "\t hist trigger and leave its current paused/active state\n"
5037 "\t unchanged.\n\n"
5038 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5039 "\t raw hitcount in the histogram.\n\n"
5040 "\t The enable_hist and disable_hist triggers can be used to\n"
5041 "\t have one event conditionally start and stop another event's\n"
5042 "\t already-attached hist trigger. The syntax is analogous to\n"
5043 "\t the enable_event and disable_event triggers.\n\n"
5044 "\t Hist trigger handlers and actions are executed whenever a\n"
5045 "\t a histogram entry is added or updated. They take the form:\n\n"
5046 "\t <handler>.<action>\n\n"
5047 "\t The available handlers are:\n\n"
5048 "\t onmatch(matching.event) - invoke on addition or update\n"
5049 "\t onmax(var) - invoke if var exceeds current max\n"
5050 "\t onchange(var) - invoke action if var changes\n\n"
5051 "\t The available actions are:\n\n"
5052 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5053 "\t save(field,...) - save current event fields\n"
5054 #ifdef CONFIG_TRACER_SNAPSHOT
5055 "\t snapshot() - snapshot the trace buffer\n\n"
5056 #endif
5057 #ifdef CONFIG_SYNTH_EVENTS
5058 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5059 "\t Write into this file to define/undefine new synthetic events.\n"
5060 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5061 #endif
5062 #endif
5063 ;
5064
5065 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5066 tracing_readme_read(struct file *filp, char __user *ubuf,
5067 size_t cnt, loff_t *ppos)
5068 {
5069 return simple_read_from_buffer(ubuf, cnt, ppos,
5070 readme_msg, strlen(readme_msg));
5071 }
5072
5073 static const struct file_operations tracing_readme_fops = {
5074 .open = tracing_open_generic,
5075 .read = tracing_readme_read,
5076 .llseek = generic_file_llseek,
5077 };
5078
5079 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5080 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5081 update_eval_map(union trace_eval_map_item *ptr)
5082 {
5083 if (!ptr->map.eval_string) {
5084 if (ptr->tail.next) {
5085 ptr = ptr->tail.next;
5086 /* Set ptr to the next real item (skip head) */
5087 ptr++;
5088 } else
5089 return NULL;
5090 }
5091 return ptr;
5092 }
5093
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5094 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5095 {
5096 union trace_eval_map_item *ptr = v;
5097
5098 /*
5099 * Paranoid! If ptr points to end, we don't want to increment past it.
5100 * This really should never happen.
5101 */
5102 (*pos)++;
5103 ptr = update_eval_map(ptr);
5104 if (WARN_ON_ONCE(!ptr))
5105 return NULL;
5106
5107 ptr++;
5108 ptr = update_eval_map(ptr);
5109
5110 return ptr;
5111 }
5112
eval_map_start(struct seq_file * m,loff_t * pos)5113 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5114 {
5115 union trace_eval_map_item *v;
5116 loff_t l = 0;
5117
5118 mutex_lock(&trace_eval_mutex);
5119
5120 v = trace_eval_maps;
5121 if (v)
5122 v++;
5123
5124 while (v && l < *pos) {
5125 v = eval_map_next(m, v, &l);
5126 }
5127
5128 return v;
5129 }
5130
eval_map_stop(struct seq_file * m,void * v)5131 static void eval_map_stop(struct seq_file *m, void *v)
5132 {
5133 mutex_unlock(&trace_eval_mutex);
5134 }
5135
eval_map_show(struct seq_file * m,void * v)5136 static int eval_map_show(struct seq_file *m, void *v)
5137 {
5138 union trace_eval_map_item *ptr = v;
5139
5140 seq_printf(m, "%s %ld (%s)\n",
5141 ptr->map.eval_string, ptr->map.eval_value,
5142 ptr->map.system);
5143
5144 return 0;
5145 }
5146
5147 static const struct seq_operations tracing_eval_map_seq_ops = {
5148 .start = eval_map_start,
5149 .next = eval_map_next,
5150 .stop = eval_map_stop,
5151 .show = eval_map_show,
5152 };
5153
tracing_eval_map_open(struct inode * inode,struct file * filp)5154 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5155 {
5156 int ret;
5157
5158 ret = tracing_check_open_get_tr(NULL);
5159 if (ret)
5160 return ret;
5161
5162 return seq_open(filp, &tracing_eval_map_seq_ops);
5163 }
5164
5165 static const struct file_operations tracing_eval_map_fops = {
5166 .open = tracing_eval_map_open,
5167 .read = seq_read,
5168 .llseek = seq_lseek,
5169 .release = seq_release,
5170 };
5171
5172 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5173 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5174 {
5175 /* Return tail of array given the head */
5176 return ptr + ptr->head.length + 1;
5177 }
5178
5179 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5180 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5181 int len)
5182 {
5183 struct trace_eval_map **stop;
5184 struct trace_eval_map **map;
5185 union trace_eval_map_item *map_array;
5186 union trace_eval_map_item *ptr;
5187
5188 stop = start + len;
5189
5190 /*
5191 * The trace_eval_maps contains the map plus a head and tail item,
5192 * where the head holds the module and length of array, and the
5193 * tail holds a pointer to the next list.
5194 */
5195 map_array = kmalloc_objs(*map_array, len + 2);
5196 if (!map_array) {
5197 pr_warn("Unable to allocate trace eval mapping\n");
5198 return;
5199 }
5200
5201 guard(mutex)(&trace_eval_mutex);
5202
5203 if (!trace_eval_maps)
5204 trace_eval_maps = map_array;
5205 else {
5206 ptr = trace_eval_maps;
5207 for (;;) {
5208 ptr = trace_eval_jmp_to_tail(ptr);
5209 if (!ptr->tail.next)
5210 break;
5211 ptr = ptr->tail.next;
5212
5213 }
5214 ptr->tail.next = map_array;
5215 }
5216 map_array->head.mod = mod;
5217 map_array->head.length = len;
5218 map_array++;
5219
5220 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5221 map_array->map = **map;
5222 map_array++;
5223 }
5224 memset(map_array, 0, sizeof(*map_array));
5225 }
5226
trace_create_eval_file(struct dentry * d_tracer)5227 static void trace_create_eval_file(struct dentry *d_tracer)
5228 {
5229 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5230 NULL, &tracing_eval_map_fops);
5231 }
5232
5233 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5234 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5235 static inline void trace_insert_eval_map_file(struct module *mod,
5236 struct trace_eval_map **start, int len) { }
5237 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5238
5239 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5240 trace_event_update_with_eval_map(struct module *mod,
5241 struct trace_eval_map **start,
5242 int len)
5243 {
5244 struct trace_eval_map **map;
5245
5246 /* Always run sanitizer only if btf_type_tag attr exists. */
5247 if (len <= 0) {
5248 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5249 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5250 __has_attribute(btf_type_tag)))
5251 return;
5252 }
5253
5254 map = start;
5255
5256 trace_event_update_all(map, len);
5257
5258 if (len <= 0)
5259 return;
5260
5261 trace_insert_eval_map_file(mod, start, len);
5262 }
5263
5264 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5265 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5266 size_t cnt, loff_t *ppos)
5267 {
5268 struct trace_array *tr = filp->private_data;
5269 char buf[MAX_TRACER_SIZE+2];
5270 int r;
5271
5272 scoped_guard(mutex, &trace_types_lock) {
5273 r = sprintf(buf, "%s\n", tr->current_trace->name);
5274 }
5275
5276 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5277 }
5278
tracer_init(struct tracer * t,struct trace_array * tr)5279 int tracer_init(struct tracer *t, struct trace_array *tr)
5280 {
5281 tracing_reset_online_cpus(&tr->array_buffer);
5282 update_last_data_if_empty(tr);
5283 return t->init(tr);
5284 }
5285
set_buffer_entries(struct array_buffer * buf,unsigned long val)5286 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5287 {
5288 int cpu;
5289
5290 for_each_tracing_cpu(cpu)
5291 per_cpu_ptr(buf->data, cpu)->entries = val;
5292 }
5293
update_buffer_entries(struct array_buffer * buf,int cpu)5294 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5295 {
5296 if (cpu == RING_BUFFER_ALL_CPUS) {
5297 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5298 } else {
5299 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5300 }
5301 }
5302
5303 #ifdef CONFIG_TRACER_SNAPSHOT
5304 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5305 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5306 struct array_buffer *size_buf, int cpu_id)
5307 {
5308 int cpu, ret = 0;
5309
5310 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5311 for_each_tracing_cpu(cpu) {
5312 ret = ring_buffer_resize(trace_buf->buffer,
5313 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5314 if (ret < 0)
5315 break;
5316 per_cpu_ptr(trace_buf->data, cpu)->entries =
5317 per_cpu_ptr(size_buf->data, cpu)->entries;
5318 }
5319 } else {
5320 ret = ring_buffer_resize(trace_buf->buffer,
5321 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5322 if (ret == 0)
5323 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5324 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5325 }
5326
5327 return ret;
5328 }
5329 #endif /* CONFIG_TRACER_SNAPSHOT */
5330
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5331 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5332 unsigned long size, int cpu)
5333 {
5334 int ret;
5335
5336 /*
5337 * If kernel or user changes the size of the ring buffer
5338 * we use the size that was given, and we can forget about
5339 * expanding it later.
5340 */
5341 trace_set_ring_buffer_expanded(tr);
5342
5343 /* May be called before buffers are initialized */
5344 if (!tr->array_buffer.buffer)
5345 return 0;
5346
5347 /* Do not allow tracing while resizing ring buffer */
5348 tracing_stop_tr(tr);
5349
5350 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5351 if (ret < 0)
5352 goto out_start;
5353
5354 #ifdef CONFIG_TRACER_SNAPSHOT
5355 if (!tr->allocated_snapshot)
5356 goto out;
5357
5358 ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu);
5359 if (ret < 0) {
5360 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5361 &tr->array_buffer, cpu);
5362 if (r < 0) {
5363 /*
5364 * AARGH! We are left with different
5365 * size max buffer!!!!
5366 * The max buffer is our "snapshot" buffer.
5367 * When a tracer needs a snapshot (one of the
5368 * latency tracers), it swaps the max buffer
5369 * with the saved snap shot. We succeeded to
5370 * update the size of the main buffer, but failed to
5371 * update the size of the max buffer. But when we tried
5372 * to reset the main buffer to the original size, we
5373 * failed there too. This is very unlikely to
5374 * happen, but if it does, warn and kill all
5375 * tracing.
5376 */
5377 WARN_ON(1);
5378 tracing_disabled = 1;
5379 }
5380 goto out_start;
5381 }
5382
5383 update_buffer_entries(&tr->snapshot_buffer, cpu);
5384
5385 out:
5386 #endif /* CONFIG_TRACER_SNAPSHOT */
5387
5388 update_buffer_entries(&tr->array_buffer, cpu);
5389 out_start:
5390 tracing_start_tr(tr);
5391 return ret;
5392 }
5393
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5394 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5395 unsigned long size, int cpu_id)
5396 {
5397 guard(mutex)(&trace_types_lock);
5398
5399 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5400 /* make sure, this cpu is enabled in the mask */
5401 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5402 return -EINVAL;
5403 }
5404
5405 return __tracing_resize_ring_buffer(tr, size, cpu_id);
5406 }
5407
5408 struct trace_mod_entry {
5409 unsigned long mod_addr;
5410 char mod_name[MODULE_NAME_LEN];
5411 };
5412
5413 struct trace_scratch {
5414 unsigned int clock_id;
5415 unsigned long text_addr;
5416 unsigned long nr_entries;
5417 struct trace_mod_entry entries[];
5418 };
5419
5420 static DEFINE_MUTEX(scratch_mutex);
5421
cmp_mod_entry(const void * key,const void * pivot)5422 static int cmp_mod_entry(const void *key, const void *pivot)
5423 {
5424 unsigned long addr = (unsigned long)key;
5425 const struct trace_mod_entry *ent = pivot;
5426
5427 if (addr < ent[0].mod_addr)
5428 return -1;
5429
5430 return addr >= ent[1].mod_addr;
5431 }
5432
5433 /**
5434 * trace_adjust_address() - Adjust prev boot address to current address.
5435 * @tr: Persistent ring buffer's trace_array.
5436 * @addr: Address in @tr which is adjusted.
5437 */
trace_adjust_address(struct trace_array * tr,unsigned long addr)5438 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
5439 {
5440 struct trace_module_delta *module_delta;
5441 struct trace_scratch *tscratch;
5442 struct trace_mod_entry *entry;
5443 unsigned long raddr;
5444 int idx = 0, nr_entries;
5445
5446 /* If we don't have last boot delta, return the address */
5447 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5448 return addr;
5449
5450 /* tr->module_delta must be protected by rcu. */
5451 guard(rcu)();
5452 tscratch = tr->scratch;
5453 /* if there is no tscrach, module_delta must be NULL. */
5454 module_delta = READ_ONCE(tr->module_delta);
5455 if (!module_delta || !tscratch->nr_entries ||
5456 tscratch->entries[0].mod_addr > addr) {
5457 raddr = addr + tr->text_delta;
5458 return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
5459 is_kernel_rodata(raddr) ? raddr : addr;
5460 }
5461
5462 /* Note that entries must be sorted. */
5463 nr_entries = tscratch->nr_entries;
5464 if (nr_entries == 1 ||
5465 tscratch->entries[nr_entries - 1].mod_addr < addr)
5466 idx = nr_entries - 1;
5467 else {
5468 entry = __inline_bsearch((void *)addr,
5469 tscratch->entries,
5470 nr_entries - 1,
5471 sizeof(tscratch->entries[0]),
5472 cmp_mod_entry);
5473 if (entry)
5474 idx = entry - tscratch->entries;
5475 }
5476
5477 return addr + module_delta->delta[idx];
5478 }
5479
5480 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)5481 static int save_mod(struct module *mod, void *data)
5482 {
5483 struct trace_array *tr = data;
5484 struct trace_scratch *tscratch;
5485 struct trace_mod_entry *entry;
5486 unsigned int size;
5487
5488 tscratch = tr->scratch;
5489 if (!tscratch)
5490 return -1;
5491 size = tr->scratch_size;
5492
5493 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
5494 return -1;
5495
5496 entry = &tscratch->entries[tscratch->nr_entries];
5497
5498 tscratch->nr_entries++;
5499
5500 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
5501 strscpy(entry->mod_name, mod->name);
5502
5503 return 0;
5504 }
5505 #else
save_mod(struct module * mod,void * data)5506 static int save_mod(struct module *mod, void *data)
5507 {
5508 return 0;
5509 }
5510 #endif
5511
update_last_data(struct trace_array * tr)5512 static void update_last_data(struct trace_array *tr)
5513 {
5514 struct trace_module_delta *module_delta;
5515 struct trace_scratch *tscratch;
5516
5517 if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
5518 return;
5519
5520 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5521 return;
5522
5523 /* Only if the buffer has previous boot data clear and update it. */
5524 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
5525
5526 /* If this is a backup instance, mark it for autoremove. */
5527 if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
5528 tr->free_on_close = true;
5529
5530 /* Reset the module list and reload them */
5531 if (tr->scratch) {
5532 struct trace_scratch *tscratch = tr->scratch;
5533
5534 tscratch->clock_id = tr->clock_id;
5535 memset(tscratch->entries, 0,
5536 flex_array_size(tscratch, entries, tscratch->nr_entries));
5537 tscratch->nr_entries = 0;
5538
5539 guard(mutex)(&scratch_mutex);
5540 module_for_each_mod(save_mod, tr);
5541 }
5542
5543 /*
5544 * Need to clear all CPU buffers as there cannot be events
5545 * from the previous boot mixed with events with this boot
5546 * as that will cause a confusing trace. Need to clear all
5547 * CPU buffers, even for those that may currently be offline.
5548 */
5549 tracing_reset_all_cpus(&tr->array_buffer);
5550
5551 /* Using current data now */
5552 tr->text_delta = 0;
5553
5554 if (!tr->scratch)
5555 return;
5556
5557 tscratch = tr->scratch;
5558 module_delta = READ_ONCE(tr->module_delta);
5559 WRITE_ONCE(tr->module_delta, NULL);
5560 kfree_rcu(module_delta, rcu);
5561
5562 /* Set the persistent ring buffer meta data to this address */
5563 tscratch->text_addr = (unsigned long)_text;
5564 }
5565
5566 /**
5567 * tracing_update_buffers - used by tracing facility to expand ring buffers
5568 * @tr: The tracing instance
5569 *
5570 * To save on memory when the tracing is never used on a system with it
5571 * configured in. The ring buffers are set to a minimum size. But once
5572 * a user starts to use the tracing facility, then they need to grow
5573 * to their default size.
5574 *
5575 * This function is to be called when a tracer is about to be used.
5576 */
tracing_update_buffers(struct trace_array * tr)5577 int tracing_update_buffers(struct trace_array *tr)
5578 {
5579 int ret = 0;
5580
5581 if (!tr)
5582 tr = &global_trace;
5583
5584 guard(mutex)(&trace_types_lock);
5585
5586 update_last_data(tr);
5587
5588 if (!tr->ring_buffer_expanded)
5589 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5590 RING_BUFFER_ALL_CPUS);
5591 return ret;
5592 }
5593
5594 /*
5595 * Used to clear out the tracer before deletion of an instance.
5596 * Must have trace_types_lock held.
5597 */
tracing_set_nop(struct trace_array * tr)5598 static void tracing_set_nop(struct trace_array *tr)
5599 {
5600 if (tr->current_trace == &nop_trace)
5601 return;
5602
5603 tr->current_trace->enabled--;
5604
5605 if (tr->current_trace->reset)
5606 tr->current_trace->reset(tr);
5607
5608 tr->current_trace = &nop_trace;
5609 tr->current_trace_flags = nop_trace.flags;
5610 }
5611
5612 static bool tracer_options_updated;
5613
tracing_set_tracer(struct trace_array * tr,const char * buf)5614 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5615 {
5616 struct tracer *trace = NULL;
5617 struct tracers *t;
5618 bool had_max_tr;
5619 int ret;
5620
5621 guard(mutex)(&trace_types_lock);
5622
5623 update_last_data(tr);
5624
5625 if (!tr->ring_buffer_expanded) {
5626 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5627 RING_BUFFER_ALL_CPUS);
5628 if (ret < 0)
5629 return ret;
5630 ret = 0;
5631 }
5632
5633 list_for_each_entry(t, &tr->tracers, list) {
5634 if (strcmp(t->tracer->name, buf) == 0) {
5635 trace = t->tracer;
5636 break;
5637 }
5638 }
5639 if (!trace)
5640 return -EINVAL;
5641
5642 if (trace == tr->current_trace)
5643 return 0;
5644
5645 #ifdef CONFIG_TRACER_SNAPSHOT
5646 if (tracer_uses_snapshot(trace)) {
5647 local_irq_disable();
5648 arch_spin_lock(&tr->max_lock);
5649 ret = tr->cond_snapshot ? -EBUSY : 0;
5650 arch_spin_unlock(&tr->max_lock);
5651 local_irq_enable();
5652 if (ret)
5653 return ret;
5654 }
5655 #endif
5656 /* Some tracers won't work on kernel command line */
5657 if (system_state < SYSTEM_RUNNING && trace->noboot) {
5658 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5659 trace->name);
5660 return -EINVAL;
5661 }
5662
5663 /* Some tracers are only allowed for the top level buffer */
5664 if (!trace_ok_for_array(trace, tr))
5665 return -EINVAL;
5666
5667 /* If trace pipe files are being read, we can't change the tracer */
5668 if (tr->trace_ref)
5669 return -EBUSY;
5670
5671 trace_branch_disable();
5672
5673 tr->current_trace->enabled--;
5674
5675 if (tr->current_trace->reset)
5676 tr->current_trace->reset(tr);
5677
5678 had_max_tr = tracer_uses_snapshot(tr->current_trace);
5679
5680 /* Current trace needs to be nop_trace before synchronize_rcu */
5681 tr->current_trace = &nop_trace;
5682 tr->current_trace_flags = nop_trace.flags;
5683
5684 if (had_max_tr && !tracer_uses_snapshot(trace)) {
5685 /*
5686 * We need to make sure that the update_max_tr sees that
5687 * current_trace changed to nop_trace to keep it from
5688 * swapping the buffers after we resize it.
5689 * The update_max_tr is called from interrupts disabled
5690 * so a synchronized_sched() is sufficient.
5691 */
5692 synchronize_rcu();
5693 free_snapshot(tr);
5694 tracing_disarm_snapshot(tr);
5695 }
5696
5697 if (!had_max_tr && tracer_uses_snapshot(trace)) {
5698 ret = tracing_arm_snapshot_locked(tr);
5699 if (ret)
5700 return ret;
5701 }
5702
5703 tr->current_trace_flags = t->flags ? : t->tracer->flags;
5704
5705 if (trace->init) {
5706 ret = tracer_init(trace, tr);
5707 if (ret) {
5708 if (tracer_uses_snapshot(trace))
5709 tracing_disarm_snapshot(tr);
5710 tr->current_trace_flags = nop_trace.flags;
5711 return ret;
5712 }
5713 }
5714
5715 tr->current_trace = trace;
5716 tr->current_trace->enabled++;
5717 trace_branch_enable(tr);
5718
5719 return 0;
5720 }
5721
5722 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5723 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5724 size_t cnt, loff_t *ppos)
5725 {
5726 struct trace_array *tr = filp->private_data;
5727 char buf[MAX_TRACER_SIZE+1];
5728 char *name;
5729 size_t ret;
5730 int err;
5731
5732 ret = cnt;
5733
5734 if (cnt > MAX_TRACER_SIZE)
5735 cnt = MAX_TRACER_SIZE;
5736
5737 if (copy_from_user(buf, ubuf, cnt))
5738 return -EFAULT;
5739
5740 buf[cnt] = 0;
5741
5742 name = strim(buf);
5743
5744 err = tracing_set_tracer(tr, name);
5745 if (err)
5746 return err;
5747
5748 *ppos += ret;
5749
5750 return ret;
5751 }
5752
5753 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5754 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5755 size_t cnt, loff_t *ppos)
5756 {
5757 char buf[64];
5758 int r;
5759
5760 r = snprintf(buf, sizeof(buf), "%ld\n",
5761 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5762 if (r > sizeof(buf))
5763 r = sizeof(buf);
5764 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5765 }
5766
5767 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5768 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5769 size_t cnt, loff_t *ppos)
5770 {
5771 unsigned long val;
5772 int ret;
5773
5774 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5775 if (ret)
5776 return ret;
5777
5778 *ptr = val * 1000;
5779
5780 return cnt;
5781 }
5782
5783 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5784 tracing_thresh_read(struct file *filp, char __user *ubuf,
5785 size_t cnt, loff_t *ppos)
5786 {
5787 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5788 }
5789
5790 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5791 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5792 size_t cnt, loff_t *ppos)
5793 {
5794 struct trace_array *tr = filp->private_data;
5795 int ret;
5796
5797 guard(mutex)(&trace_types_lock);
5798 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5799 if (ret < 0)
5800 return ret;
5801
5802 if (tr->current_trace->update_thresh) {
5803 ret = tr->current_trace->update_thresh(tr);
5804 if (ret < 0)
5805 return ret;
5806 }
5807
5808 return cnt;
5809 }
5810
5811 #ifdef CONFIG_TRACER_MAX_TRACE
5812
5813 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5814 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5815 size_t cnt, loff_t *ppos)
5816 {
5817 struct trace_array *tr = filp->private_data;
5818
5819 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
5820 }
5821
5822 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5823 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5824 size_t cnt, loff_t *ppos)
5825 {
5826 struct trace_array *tr = filp->private_data;
5827
5828 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
5829 }
5830
5831 #endif
5832
open_pipe_on_cpu(struct trace_array * tr,int cpu)5833 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
5834 {
5835 if (cpu == RING_BUFFER_ALL_CPUS) {
5836 if (cpumask_empty(tr->pipe_cpumask)) {
5837 cpumask_setall(tr->pipe_cpumask);
5838 return 0;
5839 }
5840 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
5841 cpumask_set_cpu(cpu, tr->pipe_cpumask);
5842 return 0;
5843 }
5844 return -EBUSY;
5845 }
5846
close_pipe_on_cpu(struct trace_array * tr,int cpu)5847 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
5848 {
5849 if (cpu == RING_BUFFER_ALL_CPUS) {
5850 WARN_ON(!cpumask_full(tr->pipe_cpumask));
5851 cpumask_clear(tr->pipe_cpumask);
5852 } else {
5853 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
5854 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
5855 }
5856 }
5857
tracing_open_pipe(struct inode * inode,struct file * filp)5858 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5859 {
5860 struct trace_array *tr = inode->i_private;
5861 struct trace_iterator *iter;
5862 int cpu;
5863 int ret;
5864
5865 ret = tracing_check_open_get_tr(tr);
5866 if (ret)
5867 return ret;
5868
5869 guard(mutex)(&trace_types_lock);
5870 cpu = tracing_get_cpu(inode);
5871 ret = open_pipe_on_cpu(tr, cpu);
5872 if (ret)
5873 goto fail_pipe_on_cpu;
5874
5875 /* create a buffer to store the information to pass to userspace */
5876 iter = kzalloc_obj(*iter);
5877 if (!iter) {
5878 ret = -ENOMEM;
5879 goto fail_alloc_iter;
5880 }
5881
5882 trace_seq_init(&iter->seq);
5883 iter->trace = tr->current_trace;
5884
5885 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5886 ret = -ENOMEM;
5887 goto fail;
5888 }
5889
5890 /* trace pipe does not show start of buffer */
5891 cpumask_setall(iter->started);
5892
5893 if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
5894 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5895
5896 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5897 if (trace_clocks[tr->clock_id].in_ns)
5898 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5899
5900 iter->tr = tr;
5901 iter->array_buffer = &tr->array_buffer;
5902 iter->cpu_file = cpu;
5903 mutex_init(&iter->mutex);
5904 filp->private_data = iter;
5905
5906 if (iter->trace->pipe_open)
5907 iter->trace->pipe_open(iter);
5908
5909 nonseekable_open(inode, filp);
5910
5911 tr->trace_ref++;
5912
5913 return ret;
5914
5915 fail:
5916 kfree(iter);
5917 fail_alloc_iter:
5918 close_pipe_on_cpu(tr, cpu);
5919 fail_pipe_on_cpu:
5920 __trace_array_put(tr);
5921 return ret;
5922 }
5923
tracing_release_pipe(struct inode * inode,struct file * file)5924 static int tracing_release_pipe(struct inode *inode, struct file *file)
5925 {
5926 struct trace_iterator *iter = file->private_data;
5927 struct trace_array *tr = inode->i_private;
5928
5929 scoped_guard(mutex, &trace_types_lock) {
5930 tr->trace_ref--;
5931
5932 if (iter->trace->pipe_close)
5933 iter->trace->pipe_close(iter);
5934 close_pipe_on_cpu(tr, iter->cpu_file);
5935 }
5936
5937 free_trace_iter_content(iter);
5938 kfree(iter);
5939
5940 trace_array_put(tr);
5941
5942 return 0;
5943 }
5944
5945 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)5946 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5947 {
5948 struct trace_array *tr = iter->tr;
5949
5950 /* Iterators are static, they should be filled or empty */
5951 if (trace_buffer_iter(iter, iter->cpu_file))
5952 return EPOLLIN | EPOLLRDNORM;
5953
5954 if (tr->trace_flags & TRACE_ITER(BLOCK))
5955 /*
5956 * Always select as readable when in blocking mode
5957 */
5958 return EPOLLIN | EPOLLRDNORM;
5959 else
5960 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
5961 filp, poll_table, iter->tr->buffer_percent);
5962 }
5963
5964 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)5965 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5966 {
5967 struct trace_iterator *iter = filp->private_data;
5968
5969 return trace_poll(iter, filp, poll_table);
5970 }
5971
5972 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)5973 static int tracing_wait_pipe(struct file *filp)
5974 {
5975 struct trace_iterator *iter = filp->private_data;
5976 int ret;
5977
5978 while (trace_empty(iter)) {
5979
5980 if ((filp->f_flags & O_NONBLOCK)) {
5981 return -EAGAIN;
5982 }
5983
5984 /*
5985 * We block until we read something and tracing is disabled.
5986 * We still block if tracing is disabled, but we have never
5987 * read anything. This allows a user to cat this file, and
5988 * then enable tracing. But after we have read something,
5989 * we give an EOF when tracing is again disabled.
5990 *
5991 * iter->pos will be 0 if we haven't read anything.
5992 */
5993 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5994 break;
5995
5996 mutex_unlock(&iter->mutex);
5997
5998 ret = wait_on_pipe(iter, 0);
5999
6000 mutex_lock(&iter->mutex);
6001
6002 if (ret)
6003 return ret;
6004 }
6005
6006 return 1;
6007 }
6008
update_last_data_if_empty(struct trace_array * tr)6009 static bool update_last_data_if_empty(struct trace_array *tr)
6010 {
6011 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6012 return false;
6013
6014 if (!ring_buffer_empty(tr->array_buffer.buffer))
6015 return false;
6016
6017 /*
6018 * If the buffer contains the last boot data and all per-cpu
6019 * buffers are empty, reset it from the kernel side.
6020 */
6021 update_last_data(tr);
6022 return true;
6023 }
6024
6025 /*
6026 * Consumer reader.
6027 */
6028 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6029 tracing_read_pipe(struct file *filp, char __user *ubuf,
6030 size_t cnt, loff_t *ppos)
6031 {
6032 struct trace_iterator *iter = filp->private_data;
6033 ssize_t sret;
6034
6035 /*
6036 * Avoid more than one consumer on a single file descriptor
6037 * This is just a matter of traces coherency, the ring buffer itself
6038 * is protected.
6039 */
6040 guard(mutex)(&iter->mutex);
6041
6042 /* return any leftover data */
6043 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6044 if (sret != -EBUSY)
6045 return sret;
6046
6047 trace_seq_init(&iter->seq);
6048
6049 if (iter->trace->read) {
6050 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6051 if (sret)
6052 return sret;
6053 }
6054
6055 waitagain:
6056 if (update_last_data_if_empty(iter->tr))
6057 return 0;
6058
6059 sret = tracing_wait_pipe(filp);
6060 if (sret <= 0)
6061 return sret;
6062
6063 /* stop when tracing is finished */
6064 if (trace_empty(iter))
6065 return 0;
6066
6067 if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6068 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6069
6070 /* reset all but tr, trace, and overruns */
6071 trace_iterator_reset(iter);
6072 cpumask_clear(iter->started);
6073 trace_seq_init(&iter->seq);
6074
6075 trace_event_read_lock();
6076 trace_access_lock(iter->cpu_file);
6077 while (trace_find_next_entry_inc(iter) != NULL) {
6078 enum print_line_t ret;
6079 int save_len = iter->seq.seq.len;
6080
6081 ret = print_trace_line(iter);
6082 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6083 /*
6084 * If one print_trace_line() fills entire trace_seq in one shot,
6085 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6086 * In this case, we need to consume it, otherwise, loop will peek
6087 * this event next time, resulting in an infinite loop.
6088 */
6089 if (save_len == 0) {
6090 iter->seq.full = 0;
6091 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6092 trace_consume(iter);
6093 break;
6094 }
6095
6096 /* In other cases, don't print partial lines */
6097 iter->seq.seq.len = save_len;
6098 break;
6099 }
6100 if (ret != TRACE_TYPE_NO_CONSUME)
6101 trace_consume(iter);
6102
6103 if (trace_seq_used(&iter->seq) >= cnt)
6104 break;
6105
6106 /*
6107 * Setting the full flag means we reached the trace_seq buffer
6108 * size and we should leave by partial output condition above.
6109 * One of the trace_seq_* functions is not used properly.
6110 */
6111 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6112 iter->ent->type);
6113 }
6114 trace_access_unlock(iter->cpu_file);
6115 trace_event_read_unlock();
6116
6117 /* Now copy what we have to the user */
6118 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6119 if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6120 trace_seq_init(&iter->seq);
6121
6122 /*
6123 * If there was nothing to send to user, in spite of consuming trace
6124 * entries, go back to wait for more entries.
6125 */
6126 if (sret == -EBUSY)
6127 goto waitagain;
6128
6129 return sret;
6130 }
6131
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6132 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6133 unsigned int idx)
6134 {
6135 __free_page(spd->pages[idx]);
6136 }
6137
6138 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6139 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6140 {
6141 size_t count;
6142 int save_len;
6143 int ret;
6144
6145 /* Seq buffer is page-sized, exactly what we need. */
6146 for (;;) {
6147 save_len = iter->seq.seq.len;
6148 ret = print_trace_line(iter);
6149
6150 if (trace_seq_has_overflowed(&iter->seq)) {
6151 iter->seq.seq.len = save_len;
6152 break;
6153 }
6154
6155 /*
6156 * This should not be hit, because it should only
6157 * be set if the iter->seq overflowed. But check it
6158 * anyway to be safe.
6159 */
6160 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6161 iter->seq.seq.len = save_len;
6162 break;
6163 }
6164
6165 count = trace_seq_used(&iter->seq) - save_len;
6166 if (rem < count) {
6167 rem = 0;
6168 iter->seq.seq.len = save_len;
6169 break;
6170 }
6171
6172 if (ret != TRACE_TYPE_NO_CONSUME)
6173 trace_consume(iter);
6174 rem -= count;
6175 if (!trace_find_next_entry_inc(iter)) {
6176 rem = 0;
6177 iter->ent = NULL;
6178 break;
6179 }
6180 }
6181
6182 return rem;
6183 }
6184
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6185 static ssize_t tracing_splice_read_pipe(struct file *filp,
6186 loff_t *ppos,
6187 struct pipe_inode_info *pipe,
6188 size_t len,
6189 unsigned int flags)
6190 {
6191 struct page *pages_def[PIPE_DEF_BUFFERS];
6192 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6193 struct trace_iterator *iter = filp->private_data;
6194 struct splice_pipe_desc spd = {
6195 .pages = pages_def,
6196 .partial = partial_def,
6197 .nr_pages = 0, /* This gets updated below. */
6198 .nr_pages_max = PIPE_DEF_BUFFERS,
6199 .ops = &default_pipe_buf_ops,
6200 .spd_release = tracing_spd_release_pipe,
6201 };
6202 ssize_t ret;
6203 size_t rem;
6204 unsigned int i;
6205
6206 if (splice_grow_spd(pipe, &spd))
6207 return -ENOMEM;
6208
6209 mutex_lock(&iter->mutex);
6210
6211 if (iter->trace->splice_read) {
6212 ret = iter->trace->splice_read(iter, filp,
6213 ppos, pipe, len, flags);
6214 if (ret)
6215 goto out_err;
6216 }
6217
6218 ret = tracing_wait_pipe(filp);
6219 if (ret <= 0)
6220 goto out_err;
6221
6222 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6223 ret = -EFAULT;
6224 goto out_err;
6225 }
6226
6227 trace_event_read_lock();
6228 trace_access_lock(iter->cpu_file);
6229
6230 /* Fill as many pages as possible. */
6231 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6232 spd.pages[i] = alloc_page(GFP_KERNEL);
6233 if (!spd.pages[i])
6234 break;
6235
6236 rem = tracing_fill_pipe_page(rem, iter);
6237
6238 /* Copy the data into the page, so we can start over. */
6239 ret = trace_seq_to_buffer(&iter->seq,
6240 page_address(spd.pages[i]),
6241 min((size_t)trace_seq_used(&iter->seq),
6242 (size_t)PAGE_SIZE));
6243 if (ret < 0) {
6244 __free_page(spd.pages[i]);
6245 break;
6246 }
6247 spd.partial[i].offset = 0;
6248 spd.partial[i].len = ret;
6249
6250 trace_seq_init(&iter->seq);
6251 }
6252
6253 trace_access_unlock(iter->cpu_file);
6254 trace_event_read_unlock();
6255 mutex_unlock(&iter->mutex);
6256
6257 spd.nr_pages = i;
6258
6259 if (i)
6260 ret = splice_to_pipe(pipe, &spd);
6261 else
6262 ret = 0;
6263 out:
6264 splice_shrink_spd(&spd);
6265 return ret;
6266
6267 out_err:
6268 mutex_unlock(&iter->mutex);
6269 goto out;
6270 }
6271
6272 static ssize_t
tracing_syscall_buf_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6273 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
6274 size_t cnt, loff_t *ppos)
6275 {
6276 struct inode *inode = file_inode(filp);
6277 struct trace_array *tr = inode->i_private;
6278 char buf[64];
6279 int r;
6280
6281 r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
6282
6283 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6284 }
6285
6286 static ssize_t
tracing_syscall_buf_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6287 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
6288 size_t cnt, loff_t *ppos)
6289 {
6290 struct inode *inode = file_inode(filp);
6291 struct trace_array *tr = inode->i_private;
6292 unsigned long val;
6293 int ret;
6294
6295 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6296 if (ret)
6297 return ret;
6298
6299 if (val > SYSCALL_FAULT_USER_MAX)
6300 val = SYSCALL_FAULT_USER_MAX;
6301
6302 tr->syscall_buf_sz = val;
6303
6304 *ppos += cnt;
6305
6306 return cnt;
6307 }
6308
6309 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6310 tracing_entries_read(struct file *filp, char __user *ubuf,
6311 size_t cnt, loff_t *ppos)
6312 {
6313 struct inode *inode = file_inode(filp);
6314 struct trace_array *tr = inode->i_private;
6315 int cpu = tracing_get_cpu(inode);
6316 char buf[64];
6317 int r = 0;
6318 ssize_t ret;
6319
6320 mutex_lock(&trace_types_lock);
6321
6322 if (cpu == RING_BUFFER_ALL_CPUS) {
6323 int cpu, buf_size_same;
6324 unsigned long size;
6325
6326 size = 0;
6327 buf_size_same = 1;
6328 /* check if all cpu sizes are same */
6329 for_each_tracing_cpu(cpu) {
6330 /* fill in the size from first enabled cpu */
6331 if (size == 0)
6332 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6333 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6334 buf_size_same = 0;
6335 break;
6336 }
6337 }
6338
6339 if (buf_size_same) {
6340 if (!tr->ring_buffer_expanded)
6341 r = sprintf(buf, "%lu (expanded: %lu)\n",
6342 size >> 10,
6343 trace_buf_size >> 10);
6344 else
6345 r = sprintf(buf, "%lu\n", size >> 10);
6346 } else
6347 r = sprintf(buf, "X\n");
6348 } else
6349 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6350
6351 mutex_unlock(&trace_types_lock);
6352
6353 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6354 return ret;
6355 }
6356
6357 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6358 tracing_entries_write(struct file *filp, const char __user *ubuf,
6359 size_t cnt, loff_t *ppos)
6360 {
6361 struct inode *inode = file_inode(filp);
6362 struct trace_array *tr = inode->i_private;
6363 unsigned long val;
6364 int ret;
6365
6366 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6367 if (ret)
6368 return ret;
6369
6370 /* must have at least 1 entry */
6371 if (!val)
6372 return -EINVAL;
6373
6374 /* value is in KB */
6375 val <<= 10;
6376 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6377 if (ret < 0)
6378 return ret;
6379
6380 *ppos += cnt;
6381
6382 return cnt;
6383 }
6384
6385 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6386 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6387 size_t cnt, loff_t *ppos)
6388 {
6389 struct trace_array *tr = filp->private_data;
6390 char buf[64];
6391 int r, cpu;
6392 unsigned long size = 0, expanded_size = 0;
6393
6394 mutex_lock(&trace_types_lock);
6395 for_each_tracing_cpu(cpu) {
6396 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6397 if (!tr->ring_buffer_expanded)
6398 expanded_size += trace_buf_size >> 10;
6399 }
6400 if (tr->ring_buffer_expanded)
6401 r = sprintf(buf, "%lu\n", size);
6402 else
6403 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6404 mutex_unlock(&trace_types_lock);
6405
6406 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6407 }
6408
6409 #define LAST_BOOT_HEADER ((void *)1)
6410
l_next(struct seq_file * m,void * v,loff_t * pos)6411 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
6412 {
6413 struct trace_array *tr = m->private;
6414 struct trace_scratch *tscratch = tr->scratch;
6415 unsigned int index = *pos;
6416
6417 (*pos)++;
6418
6419 if (*pos == 1)
6420 return LAST_BOOT_HEADER;
6421
6422 /* Only show offsets of the last boot data */
6423 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6424 return NULL;
6425
6426 /* *pos 0 is for the header, 1 is for the first module */
6427 index--;
6428
6429 if (index >= tscratch->nr_entries)
6430 return NULL;
6431
6432 return &tscratch->entries[index];
6433 }
6434
l_start(struct seq_file * m,loff_t * pos)6435 static void *l_start(struct seq_file *m, loff_t *pos)
6436 {
6437 mutex_lock(&scratch_mutex);
6438
6439 return l_next(m, NULL, pos);
6440 }
6441
l_stop(struct seq_file * m,void * p)6442 static void l_stop(struct seq_file *m, void *p)
6443 {
6444 mutex_unlock(&scratch_mutex);
6445 }
6446
show_last_boot_header(struct seq_file * m,struct trace_array * tr)6447 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
6448 {
6449 struct trace_scratch *tscratch = tr->scratch;
6450
6451 /*
6452 * Do not leak KASLR address. This only shows the KASLR address of
6453 * the last boot. When the ring buffer is started, the LAST_BOOT
6454 * flag gets cleared, and this should only report "current".
6455 * Otherwise it shows the KASLR address from the previous boot which
6456 * should not be the same as the current boot.
6457 */
6458 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6459 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
6460 else
6461 seq_puts(m, "# Current\n");
6462 }
6463
l_show(struct seq_file * m,void * v)6464 static int l_show(struct seq_file *m, void *v)
6465 {
6466 struct trace_array *tr = m->private;
6467 struct trace_mod_entry *entry = v;
6468
6469 if (v == LAST_BOOT_HEADER) {
6470 show_last_boot_header(m, tr);
6471 return 0;
6472 }
6473
6474 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
6475 return 0;
6476 }
6477
6478 static const struct seq_operations last_boot_seq_ops = {
6479 .start = l_start,
6480 .next = l_next,
6481 .stop = l_stop,
6482 .show = l_show,
6483 };
6484
tracing_last_boot_open(struct inode * inode,struct file * file)6485 static int tracing_last_boot_open(struct inode *inode, struct file *file)
6486 {
6487 struct trace_array *tr = inode->i_private;
6488 struct seq_file *m;
6489 int ret;
6490
6491 ret = tracing_check_open_get_tr(tr);
6492 if (ret)
6493 return ret;
6494
6495 ret = seq_open(file, &last_boot_seq_ops);
6496 if (ret) {
6497 trace_array_put(tr);
6498 return ret;
6499 }
6500
6501 m = file->private_data;
6502 m->private = tr;
6503
6504 return 0;
6505 }
6506
tracing_buffer_meta_open(struct inode * inode,struct file * filp)6507 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6508 {
6509 struct trace_array *tr = inode->i_private;
6510 int cpu = tracing_get_cpu(inode);
6511 int ret;
6512
6513 ret = tracing_check_open_get_tr(tr);
6514 if (ret)
6515 return ret;
6516
6517 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6518 if (ret < 0)
6519 __trace_array_put(tr);
6520 return ret;
6521 }
6522
6523 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6524 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6525 size_t cnt, loff_t *ppos)
6526 {
6527 /*
6528 * There is no need to read what the user has written, this function
6529 * is just to make sure that there is no error when "echo" is used
6530 */
6531
6532 *ppos += cnt;
6533
6534 return cnt;
6535 }
6536
6537 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6538 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6539 {
6540 struct trace_array *tr = inode->i_private;
6541
6542 /* disable tracing ? */
6543 if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
6544 tracer_tracing_off(tr);
6545 /* resize the ring buffer to 0 */
6546 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6547
6548 trace_array_put(tr);
6549
6550 return 0;
6551 }
6552
6553 #define TRACE_MARKER_MAX_SIZE 4096
6554
write_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt,unsigned long ip)6555 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
6556 size_t cnt, unsigned long ip)
6557 {
6558 struct ring_buffer_event *event;
6559 enum event_trigger_type tt = ETT_NONE;
6560 struct trace_buffer *buffer;
6561 struct print_entry *entry;
6562 int meta_size;
6563 ssize_t written;
6564 size_t size;
6565
6566 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
6567 again:
6568 size = cnt + meta_size;
6569
6570 buffer = tr->array_buffer.buffer;
6571 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6572 tracing_gen_ctx());
6573 if (unlikely(!event)) {
6574 /*
6575 * If the size was greater than what was allowed, then
6576 * make it smaller and try again.
6577 */
6578 if (size > ring_buffer_max_event_size(buffer)) {
6579 cnt = ring_buffer_max_event_size(buffer) - meta_size;
6580 /* The above should only happen once */
6581 if (WARN_ON_ONCE(cnt + meta_size == size))
6582 return -EBADF;
6583 goto again;
6584 }
6585
6586 /* Ring buffer disabled, return as if not open for write */
6587 return -EBADF;
6588 }
6589
6590 entry = ring_buffer_event_data(event);
6591 entry->ip = ip;
6592 memcpy(&entry->buf, buf, cnt);
6593 written = cnt;
6594
6595 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6596 /* do not add \n before testing triggers, but add \0 */
6597 entry->buf[cnt] = '\0';
6598 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6599 }
6600
6601 if (entry->buf[cnt - 1] != '\n') {
6602 entry->buf[cnt] = '\n';
6603 entry->buf[cnt + 1] = '\0';
6604 } else
6605 entry->buf[cnt] = '\0';
6606
6607 if (static_branch_unlikely(&trace_marker_exports_enabled))
6608 ftrace_exports(event, TRACE_EXPORT_MARKER);
6609 __buffer_unlock_commit(buffer, event);
6610
6611 if (tt)
6612 event_triggers_post_call(tr->trace_marker_file, tt);
6613
6614 return written;
6615 }
6616
6617 struct trace_user_buf {
6618 char *buf;
6619 };
6620
6621 static DEFINE_MUTEX(trace_user_buffer_mutex);
6622 static struct trace_user_buf_info *trace_user_buffer;
6623
6624 /**
6625 * trace_user_fault_destroy - free up allocated memory of a trace user buffer
6626 * @tinfo: The descriptor to free up
6627 *
6628 * Frees any data allocated in the trace info dsecriptor.
6629 */
trace_user_fault_destroy(struct trace_user_buf_info * tinfo)6630 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
6631 {
6632 char *buf;
6633 int cpu;
6634
6635 if (!tinfo || !tinfo->tbuf)
6636 return;
6637
6638 for_each_possible_cpu(cpu) {
6639 buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6640 kfree(buf);
6641 }
6642 free_percpu(tinfo->tbuf);
6643 }
6644
user_fault_buffer_enable(struct trace_user_buf_info * tinfo,size_t size)6645 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
6646 {
6647 char *buf;
6648 int cpu;
6649
6650 lockdep_assert_held(&trace_user_buffer_mutex);
6651
6652 tinfo->tbuf = alloc_percpu(struct trace_user_buf);
6653 if (!tinfo->tbuf)
6654 return -ENOMEM;
6655
6656 tinfo->ref = 1;
6657 tinfo->size = size;
6658
6659 /* Clear each buffer in case of error */
6660 for_each_possible_cpu(cpu) {
6661 per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
6662 }
6663
6664 for_each_possible_cpu(cpu) {
6665 buf = kmalloc_node(size, GFP_KERNEL,
6666 cpu_to_node(cpu));
6667 if (!buf)
6668 return -ENOMEM;
6669 per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
6670 }
6671
6672 return 0;
6673 }
6674
6675 /* For internal use. Free and reinitialize */
user_buffer_free(struct trace_user_buf_info ** tinfo)6676 static void user_buffer_free(struct trace_user_buf_info **tinfo)
6677 {
6678 lockdep_assert_held(&trace_user_buffer_mutex);
6679
6680 trace_user_fault_destroy(*tinfo);
6681 kfree(*tinfo);
6682 *tinfo = NULL;
6683 }
6684
6685 /* For internal use. Initialize and allocate */
user_buffer_init(struct trace_user_buf_info ** tinfo,size_t size)6686 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
6687 {
6688 bool alloc = false;
6689 int ret;
6690
6691 lockdep_assert_held(&trace_user_buffer_mutex);
6692
6693 if (!*tinfo) {
6694 alloc = true;
6695 *tinfo = kzalloc_obj(**tinfo);
6696 if (!*tinfo)
6697 return -ENOMEM;
6698 }
6699
6700 ret = user_fault_buffer_enable(*tinfo, size);
6701 if (ret < 0 && alloc)
6702 user_buffer_free(tinfo);
6703
6704 return ret;
6705 }
6706
6707 /* For internal use, derefrence and free if necessary */
user_buffer_put(struct trace_user_buf_info ** tinfo)6708 static void user_buffer_put(struct trace_user_buf_info **tinfo)
6709 {
6710 guard(mutex)(&trace_user_buffer_mutex);
6711
6712 if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
6713 return;
6714
6715 if (--(*tinfo)->ref)
6716 return;
6717
6718 user_buffer_free(tinfo);
6719 }
6720
6721 /**
6722 * trace_user_fault_init - Allocated or reference a per CPU buffer
6723 * @tinfo: A pointer to the trace buffer descriptor
6724 * @size: The size to allocate each per CPU buffer
6725 *
6726 * Create a per CPU buffer that can be used to copy from user space
6727 * in a task context. When calling trace_user_fault_read(), preemption
6728 * must be disabled, and it will enable preemption and copy user
6729 * space data to the buffer. If any schedule switches occur, it will
6730 * retry until it succeeds without a schedule switch knowing the buffer
6731 * is still valid.
6732 *
6733 * Returns 0 on success, negative on failure.
6734 */
trace_user_fault_init(struct trace_user_buf_info * tinfo,size_t size)6735 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
6736 {
6737 int ret;
6738
6739 if (!tinfo)
6740 return -EINVAL;
6741
6742 guard(mutex)(&trace_user_buffer_mutex);
6743
6744 ret = user_buffer_init(&tinfo, size);
6745 if (ret < 0)
6746 trace_user_fault_destroy(tinfo);
6747
6748 return ret;
6749 }
6750
6751 /**
6752 * trace_user_fault_get - up the ref count for the user buffer
6753 * @tinfo: A pointer to a pointer to the trace buffer descriptor
6754 *
6755 * Ups the ref count of the trace buffer.
6756 *
6757 * Returns the new ref count.
6758 */
trace_user_fault_get(struct trace_user_buf_info * tinfo)6759 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
6760 {
6761 if (!tinfo)
6762 return -1;
6763
6764 guard(mutex)(&trace_user_buffer_mutex);
6765
6766 tinfo->ref++;
6767 return tinfo->ref;
6768 }
6769
6770 /**
6771 * trace_user_fault_put - dereference a per cpu trace buffer
6772 * @tinfo: The @tinfo that was passed to trace_user_fault_get()
6773 *
6774 * Decrement the ref count of @tinfo.
6775 *
6776 * Returns the new refcount (negative on error).
6777 */
trace_user_fault_put(struct trace_user_buf_info * tinfo)6778 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
6779 {
6780 guard(mutex)(&trace_user_buffer_mutex);
6781
6782 if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
6783 return -1;
6784
6785 --tinfo->ref;
6786 return tinfo->ref;
6787 }
6788
6789 /**
6790 * trace_user_fault_read - Read user space into a per CPU buffer
6791 * @tinfo: The @tinfo allocated by trace_user_fault_get()
6792 * @ptr: The user space pointer to read
6793 * @size: The size of user space to read.
6794 * @copy_func: Optional function to use to copy from user space
6795 * @data: Data to pass to copy_func if it was supplied
6796 *
6797 * Preemption must be disabled when this is called, and must not
6798 * be enabled while using the returned buffer.
6799 * This does the copying from user space into a per CPU buffer.
6800 *
6801 * The @size must not be greater than the size passed in to
6802 * trace_user_fault_init().
6803 *
6804 * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
6805 * otherwise it will call @copy_func. It will call @copy_func with:
6806 *
6807 * buffer: the per CPU buffer of the @tinfo.
6808 * ptr: The pointer @ptr to user space to read
6809 * size: The @size of the ptr to read
6810 * data: The @data parameter
6811 *
6812 * It is expected that @copy_func will return 0 on success and non zero
6813 * if there was a fault.
6814 *
6815 * Returns a pointer to the buffer with the content read from @ptr.
6816 * Preemption must remain disabled while the caller accesses the
6817 * buffer returned by this function.
6818 * Returns NULL if there was a fault, or the size passed in is
6819 * greater than the size passed to trace_user_fault_init().
6820 */
trace_user_fault_read(struct trace_user_buf_info * tinfo,const char __user * ptr,size_t size,trace_user_buf_copy copy_func,void * data)6821 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
6822 const char __user *ptr, size_t size,
6823 trace_user_buf_copy copy_func, void *data)
6824 {
6825 int cpu = smp_processor_id();
6826 char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6827 unsigned int cnt;
6828 int trys = 0;
6829 int ret;
6830
6831 lockdep_assert_preemption_disabled();
6832
6833 /*
6834 * It's up to the caller to not try to copy more than it said
6835 * it would.
6836 */
6837 if (size > tinfo->size)
6838 return NULL;
6839
6840 /*
6841 * This acts similar to a seqcount. The per CPU context switches are
6842 * recorded, migration is disabled and preemption is enabled. The
6843 * read of the user space memory is copied into the per CPU buffer.
6844 * Preemption is disabled again, and if the per CPU context switches count
6845 * is still the same, it means the buffer has not been corrupted.
6846 * If the count is different, it is assumed the buffer is corrupted
6847 * and reading must be tried again.
6848 */
6849
6850 do {
6851 /*
6852 * It is possible that something is trying to migrate this
6853 * task. What happens then, is when preemption is enabled,
6854 * the migration thread will preempt this task, try to
6855 * migrate it, fail, then let it run again. That will
6856 * cause this to loop again and never succeed.
6857 * On failures, enabled and disable preemption with
6858 * migration enabled, to allow the migration thread to
6859 * migrate this task.
6860 */
6861 if (trys) {
6862 preempt_enable_notrace();
6863 preempt_disable_notrace();
6864 cpu = smp_processor_id();
6865 buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6866 }
6867
6868 /*
6869 * If for some reason, copy_from_user() always causes a context
6870 * switch, this would then cause an infinite loop.
6871 * If this task is preempted by another user space task, it
6872 * will cause this task to try again. But just in case something
6873 * changes where the copying from user space causes another task
6874 * to run, prevent this from going into an infinite loop.
6875 * 100 tries should be plenty.
6876 */
6877 if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
6878 return NULL;
6879
6880 /* Read the current CPU context switch counter */
6881 cnt = nr_context_switches_cpu(cpu);
6882
6883 /*
6884 * Preemption is going to be enabled, but this task must
6885 * remain on this CPU.
6886 */
6887 migrate_disable();
6888
6889 /*
6890 * Now preemption is being enabled and another task can come in
6891 * and use the same buffer and corrupt our data.
6892 */
6893 preempt_enable_notrace();
6894
6895 /* Make sure preemption is enabled here */
6896 lockdep_assert_preemption_enabled();
6897
6898 if (copy_func) {
6899 ret = copy_func(buffer, ptr, size, data);
6900 } else {
6901 ret = __copy_from_user(buffer, ptr, size);
6902 }
6903
6904 preempt_disable_notrace();
6905 migrate_enable();
6906
6907 /* if it faulted, no need to test if the buffer was corrupted */
6908 if (ret)
6909 return NULL;
6910
6911 /*
6912 * Preemption is disabled again, now check the per CPU context
6913 * switch counter. If it doesn't match, then another user space
6914 * process may have schedule in and corrupted our buffer. In that
6915 * case the copying must be retried.
6916 */
6917 } while (nr_context_switches_cpu(cpu) != cnt);
6918
6919 return buffer;
6920 }
6921
6922 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6923 tracing_mark_write(struct file *filp, const char __user *ubuf,
6924 size_t cnt, loff_t *fpos)
6925 {
6926 struct trace_array *tr = filp->private_data;
6927 ssize_t written = -ENODEV;
6928 unsigned long ip;
6929 char *buf;
6930
6931 if (unlikely(tracing_disabled))
6932 return -EINVAL;
6933
6934 if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6935 return -EINVAL;
6936
6937 if ((ssize_t)cnt < 0)
6938 return -EINVAL;
6939
6940 if (cnt > TRACE_MARKER_MAX_SIZE)
6941 cnt = TRACE_MARKER_MAX_SIZE;
6942
6943 /* Must have preemption disabled while having access to the buffer */
6944 guard(preempt_notrace)();
6945
6946 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6947 if (!buf)
6948 return -EFAULT;
6949
6950 /* The selftests expect this function to be the IP address */
6951 ip = _THIS_IP_;
6952
6953 /* The global trace_marker can go to multiple instances */
6954 if (tr == &global_trace) {
6955 guard(rcu)();
6956 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6957 written = write_marker_to_buffer(tr, buf, cnt, ip);
6958 if (written < 0)
6959 break;
6960 }
6961 } else {
6962 written = write_marker_to_buffer(tr, buf, cnt, ip);
6963 }
6964
6965 return written;
6966 }
6967
write_raw_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt)6968 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
6969 const char *buf, size_t cnt)
6970 {
6971 struct ring_buffer_event *event;
6972 struct trace_buffer *buffer;
6973 struct raw_data_entry *entry;
6974 ssize_t written;
6975 size_t size;
6976
6977 /* cnt includes both the entry->id and the data behind it. */
6978 size = struct_offset(entry, id) + cnt;
6979
6980 buffer = tr->array_buffer.buffer;
6981
6982 if (size > ring_buffer_max_event_size(buffer))
6983 return -EINVAL;
6984
6985 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6986 tracing_gen_ctx());
6987 if (!event)
6988 /* Ring buffer disabled, return as if not open for write */
6989 return -EBADF;
6990
6991 entry = ring_buffer_event_data(event);
6992 unsafe_memcpy(&entry->id, buf, cnt,
6993 "id and content already reserved on ring buffer"
6994 "'buf' includes the 'id' and the data."
6995 "'entry' was allocated with cnt from 'id'.");
6996 written = cnt;
6997
6998 __buffer_unlock_commit(buffer, event);
6999
7000 return written;
7001 }
7002
7003 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7004 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7005 size_t cnt, loff_t *fpos)
7006 {
7007 struct trace_array *tr = filp->private_data;
7008 ssize_t written = -ENODEV;
7009 char *buf;
7010
7011 if (unlikely(tracing_disabled))
7012 return -EINVAL;
7013
7014 if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7015 return -EINVAL;
7016
7017 /* The marker must at least have a tag id */
7018 if (cnt < sizeof(unsigned int))
7019 return -EINVAL;
7020
7021 /* raw write is all or nothing */
7022 if (cnt > TRACE_MARKER_MAX_SIZE)
7023 return -EINVAL;
7024
7025 /* Must have preemption disabled while having access to the buffer */
7026 guard(preempt_notrace)();
7027
7028 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7029 if (!buf)
7030 return -EFAULT;
7031
7032 /* The global trace_marker_raw can go to multiple instances */
7033 if (tr == &global_trace) {
7034 guard(rcu)();
7035 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7036 written = write_raw_marker_to_buffer(tr, buf, cnt);
7037 if (written < 0)
7038 break;
7039 }
7040 } else {
7041 written = write_raw_marker_to_buffer(tr, buf, cnt);
7042 }
7043
7044 return written;
7045 }
7046
tracing_mark_open(struct inode * inode,struct file * filp)7047 static int tracing_mark_open(struct inode *inode, struct file *filp)
7048 {
7049 int ret;
7050
7051 scoped_guard(mutex, &trace_user_buffer_mutex) {
7052 if (!trace_user_buffer) {
7053 ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
7054 if (ret < 0)
7055 return ret;
7056 } else {
7057 trace_user_buffer->ref++;
7058 }
7059 }
7060
7061 stream_open(inode, filp);
7062 ret = tracing_open_generic_tr(inode, filp);
7063 if (ret < 0)
7064 user_buffer_put(&trace_user_buffer);
7065 return ret;
7066 }
7067
tracing_mark_release(struct inode * inode,struct file * file)7068 static int tracing_mark_release(struct inode *inode, struct file *file)
7069 {
7070 user_buffer_put(&trace_user_buffer);
7071 return tracing_release_generic_tr(inode, file);
7072 }
7073
tracing_clock_show(struct seq_file * m,void * v)7074 static int tracing_clock_show(struct seq_file *m, void *v)
7075 {
7076 struct trace_array *tr = m->private;
7077 int i;
7078
7079 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7080 seq_printf(m,
7081 "%s%s%s%s", i ? " " : "",
7082 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7083 i == tr->clock_id ? "]" : "");
7084 seq_putc(m, '\n');
7085
7086 return 0;
7087 }
7088
tracing_set_clock(struct trace_array * tr,const char * clockstr)7089 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7090 {
7091 int i;
7092
7093 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7094 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7095 break;
7096 }
7097 if (i == ARRAY_SIZE(trace_clocks))
7098 return -EINVAL;
7099
7100 guard(mutex)(&trace_types_lock);
7101
7102 tr->clock_id = i;
7103
7104 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7105
7106 /*
7107 * New clock may not be consistent with the previous clock.
7108 * Reset the buffer so that it doesn't have incomparable timestamps.
7109 */
7110 tracing_reset_online_cpus(&tr->array_buffer);
7111
7112 #ifdef CONFIG_TRACER_SNAPSHOT
7113 if (tr->snapshot_buffer.buffer)
7114 ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func);
7115 tracing_reset_online_cpus(&tr->snapshot_buffer);
7116 #endif
7117 update_last_data_if_empty(tr);
7118
7119 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7120 struct trace_scratch *tscratch = tr->scratch;
7121
7122 tscratch->clock_id = i;
7123 }
7124
7125 return 0;
7126 }
7127
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7128 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7129 size_t cnt, loff_t *fpos)
7130 {
7131 struct seq_file *m = filp->private_data;
7132 struct trace_array *tr = m->private;
7133 char buf[64];
7134 const char *clockstr;
7135 int ret;
7136
7137 if (cnt >= sizeof(buf))
7138 return -EINVAL;
7139
7140 if (copy_from_user(buf, ubuf, cnt))
7141 return -EFAULT;
7142
7143 buf[cnt] = 0;
7144
7145 clockstr = strstrip(buf);
7146
7147 ret = tracing_set_clock(tr, clockstr);
7148 if (ret)
7149 return ret;
7150
7151 *fpos += cnt;
7152
7153 return cnt;
7154 }
7155
tracing_clock_open(struct inode * inode,struct file * file)7156 static int tracing_clock_open(struct inode *inode, struct file *file)
7157 {
7158 struct trace_array *tr = inode->i_private;
7159 int ret;
7160
7161 ret = tracing_check_open_get_tr(tr);
7162 if (ret)
7163 return ret;
7164
7165 if ((file->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) {
7166 trace_array_put(tr);
7167 return -EACCES;
7168 }
7169
7170 ret = single_open(file, tracing_clock_show, inode->i_private);
7171 if (ret < 0)
7172 trace_array_put(tr);
7173
7174 return ret;
7175 }
7176
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7177 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7178 {
7179 struct trace_array *tr = m->private;
7180
7181 guard(mutex)(&trace_types_lock);
7182
7183 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7184 seq_puts(m, "delta [absolute]\n");
7185 else
7186 seq_puts(m, "[delta] absolute\n");
7187
7188 return 0;
7189 }
7190
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7191 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7192 {
7193 struct trace_array *tr = inode->i_private;
7194 int ret;
7195
7196 ret = tracing_check_open_get_tr(tr);
7197 if (ret)
7198 return ret;
7199
7200 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7201 if (ret < 0)
7202 trace_array_put(tr);
7203
7204 return ret;
7205 }
7206
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7207 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7208 {
7209 if (rbe == this_cpu_read(trace_buffered_event))
7210 return ring_buffer_time_stamp(buffer);
7211
7212 return ring_buffer_event_time_stamp(buffer, rbe);
7213 }
7214
7215 struct ftrace_buffer_info {
7216 struct trace_iterator iter;
7217 void *spare;
7218 unsigned int spare_cpu;
7219 unsigned int spare_size;
7220 unsigned int read;
7221 };
7222
7223 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7224 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7225 {
7226 struct trace_array *tr = inode->i_private;
7227 struct trace_iterator *iter;
7228 struct seq_file *m;
7229 int ret;
7230
7231 ret = tracing_check_open_get_tr(tr);
7232 if (ret)
7233 return ret;
7234
7235 if (file->f_mode & FMODE_READ) {
7236 iter = __tracing_open(inode, file, true);
7237 if (IS_ERR(iter))
7238 ret = PTR_ERR(iter);
7239 } else {
7240 /* Writes still need the seq_file to hold the private data */
7241 ret = -ENOMEM;
7242 m = kzalloc_obj(*m);
7243 if (!m)
7244 goto out;
7245 iter = kzalloc_obj(*iter);
7246 if (!iter) {
7247 kfree(m);
7248 goto out;
7249 }
7250 ret = 0;
7251
7252 iter->tr = tr;
7253 iter->array_buffer = &tr->snapshot_buffer;
7254 iter->cpu_file = tracing_get_cpu(inode);
7255 m->private = iter;
7256 file->private_data = m;
7257 }
7258 out:
7259 if (ret < 0)
7260 trace_array_put(tr);
7261
7262 return ret;
7263 }
7264
tracing_swap_cpu_buffer(void * tr)7265 static void tracing_swap_cpu_buffer(void *tr)
7266 {
7267 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7268 }
7269
7270 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7271 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7272 loff_t *ppos)
7273 {
7274 struct seq_file *m = filp->private_data;
7275 struct trace_iterator *iter = m->private;
7276 struct trace_array *tr = iter->tr;
7277 unsigned long val;
7278 int ret;
7279
7280 ret = tracing_update_buffers(tr);
7281 if (ret < 0)
7282 return ret;
7283
7284 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7285 if (ret)
7286 return ret;
7287
7288 guard(mutex)(&trace_types_lock);
7289
7290 if (tracer_uses_snapshot(tr->current_trace))
7291 return -EBUSY;
7292
7293 local_irq_disable();
7294 arch_spin_lock(&tr->max_lock);
7295 if (tr->cond_snapshot)
7296 ret = -EBUSY;
7297 arch_spin_unlock(&tr->max_lock);
7298 local_irq_enable();
7299 if (ret)
7300 return ret;
7301
7302 switch (val) {
7303 case 0:
7304 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7305 return -EINVAL;
7306 if (tr->allocated_snapshot)
7307 free_snapshot(tr);
7308 break;
7309 case 1:
7310 /* Only allow per-cpu swap if the ring buffer supports it */
7311 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7312 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7313 return -EINVAL;
7314 #endif
7315 if (tr->allocated_snapshot)
7316 ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
7317 &tr->array_buffer, iter->cpu_file);
7318
7319 ret = tracing_arm_snapshot_locked(tr);
7320 if (ret)
7321 return ret;
7322
7323 /* Now, we're going to swap */
7324 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7325 local_irq_disable();
7326 update_max_tr(tr, current, smp_processor_id(), NULL);
7327 local_irq_enable();
7328 } else {
7329 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7330 (void *)tr, 1);
7331 }
7332 tracing_disarm_snapshot(tr);
7333 break;
7334 default:
7335 if (tr->allocated_snapshot) {
7336 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7337 tracing_reset_online_cpus(&tr->snapshot_buffer);
7338 else
7339 tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file);
7340 }
7341 break;
7342 }
7343
7344 if (ret >= 0) {
7345 *ppos += cnt;
7346 ret = cnt;
7347 }
7348
7349 return ret;
7350 }
7351
tracing_snapshot_release(struct inode * inode,struct file * file)7352 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7353 {
7354 struct seq_file *m = file->private_data;
7355 int ret;
7356
7357 ret = tracing_release(inode, file);
7358
7359 if (file->f_mode & FMODE_READ)
7360 return ret;
7361
7362 /* If write only, the seq_file is just a stub */
7363 if (m)
7364 kfree(m->private);
7365 kfree(m);
7366
7367 return 0;
7368 }
7369
7370 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7371 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7372 size_t count, loff_t *ppos);
7373 static int tracing_buffers_release(struct inode *inode, struct file *file);
7374 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7375 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7376
snapshot_raw_open(struct inode * inode,struct file * filp)7377 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7378 {
7379 struct ftrace_buffer_info *info;
7380 int ret;
7381
7382 /* The following checks for tracefs lockdown */
7383 ret = tracing_buffers_open(inode, filp);
7384 if (ret < 0)
7385 return ret;
7386
7387 info = filp->private_data;
7388
7389 if (tracer_uses_snapshot(info->iter.trace)) {
7390 tracing_buffers_release(inode, filp);
7391 return -EBUSY;
7392 }
7393
7394 info->iter.snapshot = true;
7395 info->iter.array_buffer = &info->iter.tr->snapshot_buffer;
7396
7397 return ret;
7398 }
7399
7400 #endif /* CONFIG_TRACER_SNAPSHOT */
7401
7402
7403 static const struct file_operations tracing_thresh_fops = {
7404 .open = tracing_open_generic,
7405 .read = tracing_thresh_read,
7406 .write = tracing_thresh_write,
7407 .llseek = generic_file_llseek,
7408 };
7409
7410 #ifdef CONFIG_TRACER_MAX_TRACE
7411 static const struct file_operations tracing_max_lat_fops = {
7412 .open = tracing_open_generic_tr,
7413 .read = tracing_max_lat_read,
7414 .write = tracing_max_lat_write,
7415 .llseek = generic_file_llseek,
7416 .release = tracing_release_generic_tr,
7417 };
7418 #endif
7419
7420 static const struct file_operations set_tracer_fops = {
7421 .open = tracing_open_generic_tr,
7422 .read = tracing_set_trace_read,
7423 .write = tracing_set_trace_write,
7424 .llseek = generic_file_llseek,
7425 .release = tracing_release_generic_tr,
7426 };
7427
7428 static const struct file_operations tracing_pipe_fops = {
7429 .open = tracing_open_pipe,
7430 .poll = tracing_poll_pipe,
7431 .read = tracing_read_pipe,
7432 .splice_read = tracing_splice_read_pipe,
7433 .release = tracing_release_pipe,
7434 };
7435
7436 static const struct file_operations tracing_entries_fops = {
7437 .open = tracing_open_generic_tr,
7438 .read = tracing_entries_read,
7439 .write = tracing_entries_write,
7440 .llseek = generic_file_llseek,
7441 .release = tracing_release_generic_tr,
7442 };
7443
7444 static const struct file_operations tracing_syscall_buf_fops = {
7445 .open = tracing_open_generic_tr,
7446 .read = tracing_syscall_buf_read,
7447 .write = tracing_syscall_buf_write,
7448 .llseek = generic_file_llseek,
7449 .release = tracing_release_generic_tr,
7450 };
7451
7452 static const struct file_operations tracing_buffer_meta_fops = {
7453 .open = tracing_buffer_meta_open,
7454 .read = seq_read,
7455 .llseek = seq_lseek,
7456 .release = tracing_seq_release,
7457 };
7458
7459 static const struct file_operations tracing_total_entries_fops = {
7460 .open = tracing_open_generic_tr,
7461 .read = tracing_total_entries_read,
7462 .llseek = generic_file_llseek,
7463 .release = tracing_release_generic_tr,
7464 };
7465
7466 static const struct file_operations tracing_free_buffer_fops = {
7467 .open = tracing_open_generic_tr,
7468 .write = tracing_free_buffer_write,
7469 .release = tracing_free_buffer_release,
7470 };
7471
7472 static const struct file_operations tracing_mark_fops = {
7473 .open = tracing_mark_open,
7474 .write = tracing_mark_write,
7475 .release = tracing_mark_release,
7476 };
7477
7478 static const struct file_operations tracing_mark_raw_fops = {
7479 .open = tracing_mark_open,
7480 .write = tracing_mark_raw_write,
7481 .release = tracing_mark_release,
7482 };
7483
7484 static const struct file_operations trace_clock_fops = {
7485 .open = tracing_clock_open,
7486 .read = seq_read,
7487 .llseek = seq_lseek,
7488 .release = tracing_single_release_tr,
7489 .write = tracing_clock_write,
7490 };
7491
7492 static const struct file_operations trace_time_stamp_mode_fops = {
7493 .open = tracing_time_stamp_mode_open,
7494 .read = seq_read,
7495 .llseek = seq_lseek,
7496 .release = tracing_single_release_tr,
7497 };
7498
7499 static const struct file_operations last_boot_fops = {
7500 .open = tracing_last_boot_open,
7501 .read = seq_read,
7502 .llseek = seq_lseek,
7503 .release = tracing_seq_release,
7504 };
7505
7506 #ifdef CONFIG_TRACER_SNAPSHOT
7507 static const struct file_operations snapshot_fops = {
7508 .open = tracing_snapshot_open,
7509 .read = seq_read,
7510 .write = tracing_snapshot_write,
7511 .llseek = tracing_lseek,
7512 .release = tracing_snapshot_release,
7513 };
7514
7515 static const struct file_operations snapshot_raw_fops = {
7516 .open = snapshot_raw_open,
7517 .read = tracing_buffers_read,
7518 .release = tracing_buffers_release,
7519 .splice_read = tracing_buffers_splice_read,
7520 };
7521
7522 #endif /* CONFIG_TRACER_SNAPSHOT */
7523
7524 /*
7525 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7526 * @filp: The active open file structure
7527 * @ubuf: The userspace provided buffer to read value into
7528 * @cnt: The maximum number of bytes to read
7529 * @ppos: The current "file" position
7530 *
7531 * This function implements the write interface for a struct trace_min_max_param.
7532 * The filp->private_data must point to a trace_min_max_param structure that
7533 * defines where to write the value, the min and the max acceptable values,
7534 * and a lock to protect the write.
7535 */
7536 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7537 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7538 {
7539 struct trace_min_max_param *param = filp->private_data;
7540 u64 val;
7541 int err;
7542
7543 if (!param)
7544 return -EFAULT;
7545
7546 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7547 if (err)
7548 return err;
7549
7550 if (param->lock)
7551 mutex_lock(param->lock);
7552
7553 if (param->min && val < *param->min)
7554 err = -EINVAL;
7555
7556 if (param->max && val > *param->max)
7557 err = -EINVAL;
7558
7559 if (!err)
7560 *param->val = val;
7561
7562 if (param->lock)
7563 mutex_unlock(param->lock);
7564
7565 if (err)
7566 return err;
7567
7568 return cnt;
7569 }
7570
7571 /*
7572 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7573 * @filp: The active open file structure
7574 * @ubuf: The userspace provided buffer to read value into
7575 * @cnt: The maximum number of bytes to read
7576 * @ppos: The current "file" position
7577 *
7578 * This function implements the read interface for a struct trace_min_max_param.
7579 * The filp->private_data must point to a trace_min_max_param struct with valid
7580 * data.
7581 */
7582 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7583 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7584 {
7585 struct trace_min_max_param *param = filp->private_data;
7586 char buf[U64_STR_SIZE];
7587 int len;
7588 u64 val;
7589
7590 if (!param)
7591 return -EFAULT;
7592
7593 val = *param->val;
7594
7595 if (cnt > sizeof(buf))
7596 cnt = sizeof(buf);
7597
7598 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7599
7600 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7601 }
7602
7603 const struct file_operations trace_min_max_fops = {
7604 .open = tracing_open_generic,
7605 .read = trace_min_max_read,
7606 .write = trace_min_max_write,
7607 };
7608
7609 #define TRACING_LOG_ERRS_MAX 8
7610 #define TRACING_LOG_LOC_MAX 128
7611
7612 #define CMD_PREFIX " Command: "
7613
7614 struct err_info {
7615 const char **errs; /* ptr to loc-specific array of err strings */
7616 u8 type; /* index into errs -> specific err string */
7617 u16 pos; /* caret position */
7618 u64 ts;
7619 };
7620
7621 struct tracing_log_err {
7622 struct list_head list;
7623 struct err_info info;
7624 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7625 char *cmd; /* what caused err */
7626 };
7627
7628 static DEFINE_MUTEX(tracing_err_log_lock);
7629
alloc_tracing_log_err(int len)7630 static struct tracing_log_err *alloc_tracing_log_err(int len)
7631 {
7632 struct tracing_log_err *err;
7633
7634 err = kzalloc_obj(*err);
7635 if (!err)
7636 return ERR_PTR(-ENOMEM);
7637
7638 err->cmd = kzalloc(len, GFP_KERNEL);
7639 if (!err->cmd) {
7640 kfree(err);
7641 return ERR_PTR(-ENOMEM);
7642 }
7643
7644 return err;
7645 }
7646
free_tracing_log_err(struct tracing_log_err * err)7647 static void free_tracing_log_err(struct tracing_log_err *err)
7648 {
7649 kfree(err->cmd);
7650 kfree(err);
7651 }
7652
get_tracing_log_err(struct trace_array * tr,int len)7653 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7654 int len)
7655 {
7656 struct tracing_log_err *err;
7657 char *cmd;
7658
7659 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7660 err = alloc_tracing_log_err(len);
7661 if (PTR_ERR(err) != -ENOMEM)
7662 tr->n_err_log_entries++;
7663
7664 return err;
7665 }
7666 cmd = kzalloc(len, GFP_KERNEL);
7667 if (!cmd)
7668 return ERR_PTR(-ENOMEM);
7669 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7670 kfree(err->cmd);
7671 err->cmd = cmd;
7672 list_del(&err->list);
7673
7674 return err;
7675 }
7676
7677 /**
7678 * err_pos - find the position of a string within a command for error careting
7679 * @cmd: The tracing command that caused the error
7680 * @str: The string to position the caret at within @cmd
7681 *
7682 * Finds the position of the first occurrence of @str within @cmd. The
7683 * return value can be passed to tracing_log_err() for caret placement
7684 * within @cmd.
7685 *
7686 * Returns the index within @cmd of the first occurrence of @str or 0
7687 * if @str was not found.
7688 */
err_pos(char * cmd,const char * str)7689 unsigned int err_pos(char *cmd, const char *str)
7690 {
7691 char *found;
7692
7693 if (WARN_ON(!strlen(cmd)))
7694 return 0;
7695
7696 found = strstr(cmd, str);
7697 if (found)
7698 return found - cmd;
7699
7700 return 0;
7701 }
7702
7703 /**
7704 * tracing_log_err - write an error to the tracing error log
7705 * @tr: The associated trace array for the error (NULL for top level array)
7706 * @loc: A string describing where the error occurred
7707 * @cmd: The tracing command that caused the error
7708 * @errs: The array of loc-specific static error strings
7709 * @type: The index into errs[], which produces the specific static err string
7710 * @pos: The position the caret should be placed in the cmd
7711 *
7712 * Writes an error into tracing/error_log of the form:
7713 *
7714 * <loc>: error: <text>
7715 * Command: <cmd>
7716 * ^
7717 *
7718 * tracing/error_log is a small log file containing the last
7719 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7720 * unless there has been a tracing error, and the error log can be
7721 * cleared and have its memory freed by writing the empty string in
7722 * truncation mode to it i.e. echo > tracing/error_log.
7723 *
7724 * NOTE: the @errs array along with the @type param are used to
7725 * produce a static error string - this string is not copied and saved
7726 * when the error is logged - only a pointer to it is saved. See
7727 * existing callers for examples of how static strings are typically
7728 * defined for use with tracing_log_err().
7729 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7730 void tracing_log_err(struct trace_array *tr,
7731 const char *loc, const char *cmd,
7732 const char **errs, u8 type, u16 pos)
7733 {
7734 struct tracing_log_err *err;
7735 int len = 0;
7736
7737 if (!tr)
7738 tr = &global_trace;
7739
7740 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7741
7742 guard(mutex)(&tracing_err_log_lock);
7743
7744 err = get_tracing_log_err(tr, len);
7745 if (PTR_ERR(err) == -ENOMEM)
7746 return;
7747
7748 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7749 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7750
7751 err->info.errs = errs;
7752 err->info.type = type;
7753 err->info.pos = pos;
7754 err->info.ts = local_clock();
7755
7756 list_add_tail(&err->list, &tr->err_log);
7757 }
7758
clear_tracing_err_log(struct trace_array * tr)7759 static void clear_tracing_err_log(struct trace_array *tr)
7760 {
7761 struct tracing_log_err *err, *next;
7762
7763 guard(mutex)(&tracing_err_log_lock);
7764
7765 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7766 list_del(&err->list);
7767 free_tracing_log_err(err);
7768 }
7769
7770 tr->n_err_log_entries = 0;
7771 }
7772
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7773 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7774 {
7775 struct trace_array *tr = m->private;
7776
7777 mutex_lock(&tracing_err_log_lock);
7778
7779 return seq_list_start(&tr->err_log, *pos);
7780 }
7781
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7782 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7783 {
7784 struct trace_array *tr = m->private;
7785
7786 return seq_list_next(v, &tr->err_log, pos);
7787 }
7788
tracing_err_log_seq_stop(struct seq_file * m,void * v)7789 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7790 {
7791 mutex_unlock(&tracing_err_log_lock);
7792 }
7793
tracing_err_log_show_pos(struct seq_file * m,u16 pos)7794 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7795 {
7796 u16 i;
7797
7798 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7799 seq_putc(m, ' ');
7800 for (i = 0; i < pos; i++)
7801 seq_putc(m, ' ');
7802 seq_puts(m, "^\n");
7803 }
7804
tracing_err_log_seq_show(struct seq_file * m,void * v)7805 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7806 {
7807 struct tracing_log_err *err = v;
7808
7809 if (err) {
7810 const char *err_text = err->info.errs[err->info.type];
7811 u64 sec = err->info.ts;
7812 u32 nsec;
7813
7814 nsec = do_div(sec, NSEC_PER_SEC);
7815 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7816 err->loc, err_text);
7817 seq_printf(m, "%s", err->cmd);
7818 tracing_err_log_show_pos(m, err->info.pos);
7819 }
7820
7821 return 0;
7822 }
7823
7824 static const struct seq_operations tracing_err_log_seq_ops = {
7825 .start = tracing_err_log_seq_start,
7826 .next = tracing_err_log_seq_next,
7827 .stop = tracing_err_log_seq_stop,
7828 .show = tracing_err_log_seq_show
7829 };
7830
tracing_err_log_open(struct inode * inode,struct file * file)7831 static int tracing_err_log_open(struct inode *inode, struct file *file)
7832 {
7833 struct trace_array *tr = inode->i_private;
7834 int ret = 0;
7835
7836 ret = tracing_check_open_get_tr(tr);
7837 if (ret)
7838 return ret;
7839
7840 /* If this file was opened for write, then erase contents */
7841 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7842 clear_tracing_err_log(tr);
7843
7844 if (file->f_mode & FMODE_READ) {
7845 ret = seq_open(file, &tracing_err_log_seq_ops);
7846 if (!ret) {
7847 struct seq_file *m = file->private_data;
7848 m->private = tr;
7849 } else {
7850 trace_array_put(tr);
7851 }
7852 }
7853 return ret;
7854 }
7855
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7856 static ssize_t tracing_err_log_write(struct file *file,
7857 const char __user *buffer,
7858 size_t count, loff_t *ppos)
7859 {
7860 return count;
7861 }
7862
tracing_err_log_release(struct inode * inode,struct file * file)7863 static int tracing_err_log_release(struct inode *inode, struct file *file)
7864 {
7865 struct trace_array *tr = inode->i_private;
7866
7867 trace_array_put(tr);
7868
7869 if (file->f_mode & FMODE_READ)
7870 seq_release(inode, file);
7871
7872 return 0;
7873 }
7874
7875 static const struct file_operations tracing_err_log_fops = {
7876 .open = tracing_err_log_open,
7877 .write = tracing_err_log_write,
7878 .read = seq_read,
7879 .llseek = tracing_lseek,
7880 .release = tracing_err_log_release,
7881 };
7882
tracing_buffers_open(struct inode * inode,struct file * filp)7883 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7884 {
7885 struct trace_array *tr = inode->i_private;
7886 struct ftrace_buffer_info *info;
7887 int ret;
7888
7889 ret = tracing_check_open_get_tr(tr);
7890 if (ret)
7891 return ret;
7892
7893 info = kvzalloc_obj(*info);
7894 if (!info) {
7895 trace_array_put(tr);
7896 return -ENOMEM;
7897 }
7898
7899 mutex_lock(&trace_types_lock);
7900
7901 info->iter.tr = tr;
7902 info->iter.cpu_file = tracing_get_cpu(inode);
7903 info->iter.trace = tr->current_trace;
7904 info->iter.array_buffer = &tr->array_buffer;
7905 info->spare = NULL;
7906 /* Force reading ring buffer for first read */
7907 info->read = (unsigned int)-1;
7908
7909 filp->private_data = info;
7910
7911 tr->trace_ref++;
7912
7913 mutex_unlock(&trace_types_lock);
7914
7915 ret = nonseekable_open(inode, filp);
7916 if (ret < 0)
7917 trace_array_put(tr);
7918
7919 return ret;
7920 }
7921
7922 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7923 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7924 {
7925 struct ftrace_buffer_info *info = filp->private_data;
7926 struct trace_iterator *iter = &info->iter;
7927
7928 return trace_poll(iter, filp, poll_table);
7929 }
7930
7931 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7932 tracing_buffers_read(struct file *filp, char __user *ubuf,
7933 size_t count, loff_t *ppos)
7934 {
7935 struct ftrace_buffer_info *info = filp->private_data;
7936 struct trace_iterator *iter = &info->iter;
7937 void *trace_data;
7938 int page_size;
7939 ssize_t ret = 0;
7940 ssize_t size;
7941
7942 if (!count)
7943 return 0;
7944
7945 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
7946 return -EBUSY;
7947
7948 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7949
7950 /* Make sure the spare matches the current sub buffer size */
7951 if (info->spare) {
7952 if (page_size != info->spare_size) {
7953 ring_buffer_free_read_page(iter->array_buffer->buffer,
7954 info->spare_cpu, info->spare);
7955 info->spare = NULL;
7956 }
7957 }
7958
7959 if (!info->spare) {
7960 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7961 iter->cpu_file);
7962 if (IS_ERR(info->spare)) {
7963 ret = PTR_ERR(info->spare);
7964 info->spare = NULL;
7965 } else {
7966 info->spare_cpu = iter->cpu_file;
7967 info->spare_size = page_size;
7968 }
7969 }
7970 if (!info->spare)
7971 return ret;
7972
7973 /* Do we have previous read data to read? */
7974 if (info->read < page_size)
7975 goto read;
7976
7977 again:
7978 trace_access_lock(iter->cpu_file);
7979 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7980 info->spare,
7981 count,
7982 iter->cpu_file, 0);
7983 trace_access_unlock(iter->cpu_file);
7984
7985 if (ret < 0) {
7986 if (trace_empty(iter) && !iter->closed) {
7987 if (update_last_data_if_empty(iter->tr))
7988 return 0;
7989
7990 if ((filp->f_flags & O_NONBLOCK))
7991 return -EAGAIN;
7992
7993 ret = wait_on_pipe(iter, 0);
7994 if (ret)
7995 return ret;
7996
7997 goto again;
7998 }
7999 return 0;
8000 }
8001
8002 info->read = 0;
8003 read:
8004 size = page_size - info->read;
8005 if (size > count)
8006 size = count;
8007 trace_data = ring_buffer_read_page_data(info->spare);
8008 ret = copy_to_user(ubuf, trace_data + info->read, size);
8009 if (ret == size)
8010 return -EFAULT;
8011
8012 size -= ret;
8013
8014 *ppos += size;
8015 info->read += size;
8016
8017 return size;
8018 }
8019
tracing_buffers_flush(struct file * file,fl_owner_t id)8020 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8021 {
8022 struct ftrace_buffer_info *info = file->private_data;
8023 struct trace_iterator *iter = &info->iter;
8024
8025 iter->closed = true;
8026 /* Make sure the waiters see the new wait_index */
8027 (void)atomic_fetch_inc_release(&iter->wait_index);
8028
8029 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8030
8031 return 0;
8032 }
8033
tracing_buffers_release(struct inode * inode,struct file * file)8034 static int tracing_buffers_release(struct inode *inode, struct file *file)
8035 {
8036 struct ftrace_buffer_info *info = file->private_data;
8037 struct trace_iterator *iter = &info->iter;
8038
8039 guard(mutex)(&trace_types_lock);
8040
8041 iter->tr->trace_ref--;
8042
8043 __trace_array_put(iter->tr);
8044
8045 if (info->spare)
8046 ring_buffer_free_read_page(iter->array_buffer->buffer,
8047 info->spare_cpu, info->spare);
8048 kvfree(info);
8049
8050 return 0;
8051 }
8052
8053 struct buffer_ref {
8054 struct trace_buffer *buffer;
8055 void *page;
8056 int cpu;
8057 refcount_t refcount;
8058 };
8059
buffer_ref_release(struct buffer_ref * ref)8060 static void buffer_ref_release(struct buffer_ref *ref)
8061 {
8062 if (!refcount_dec_and_test(&ref->refcount))
8063 return;
8064 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8065 kfree(ref);
8066 }
8067
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8068 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8069 struct pipe_buffer *buf)
8070 {
8071 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8072
8073 buffer_ref_release(ref);
8074 buf->private = 0;
8075 }
8076
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8077 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8078 struct pipe_buffer *buf)
8079 {
8080 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8081
8082 if (refcount_read(&ref->refcount) > INT_MAX/2)
8083 return false;
8084
8085 refcount_inc(&ref->refcount);
8086 return true;
8087 }
8088
8089 /* Pipe buffer operations for a buffer. */
8090 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8091 .release = buffer_pipe_buf_release,
8092 .get = buffer_pipe_buf_get,
8093 };
8094
8095 /*
8096 * Callback from splice_to_pipe(), if we need to release some pages
8097 * at the end of the spd in case we error'ed out in filling the pipe.
8098 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8099 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8100 {
8101 struct buffer_ref *ref =
8102 (struct buffer_ref *)spd->partial[i].private;
8103
8104 buffer_ref_release(ref);
8105 spd->partial[i].private = 0;
8106 }
8107
8108 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8109 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8110 struct pipe_inode_info *pipe, size_t len,
8111 unsigned int flags)
8112 {
8113 struct ftrace_buffer_info *info = file->private_data;
8114 struct trace_iterator *iter = &info->iter;
8115 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8116 struct page *pages_def[PIPE_DEF_BUFFERS];
8117 struct splice_pipe_desc spd = {
8118 .pages = pages_def,
8119 .partial = partial_def,
8120 .nr_pages_max = PIPE_DEF_BUFFERS,
8121 .ops = &buffer_pipe_buf_ops,
8122 .spd_release = buffer_spd_release,
8123 };
8124 struct buffer_ref *ref;
8125 bool woken = false;
8126 int page_size;
8127 int entries, i;
8128 ssize_t ret = 0;
8129
8130 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
8131 return -EBUSY;
8132
8133 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8134 if (*ppos & (page_size - 1))
8135 return -EINVAL;
8136
8137 if (len & (page_size - 1)) {
8138 if (len < page_size)
8139 return -EINVAL;
8140 len &= (~(page_size - 1));
8141 }
8142
8143 if (splice_grow_spd(pipe, &spd))
8144 return -ENOMEM;
8145
8146 again:
8147 trace_access_lock(iter->cpu_file);
8148 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8149
8150 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8151 struct page *page;
8152 int r;
8153
8154 ref = kzalloc_obj(*ref);
8155 if (!ref) {
8156 ret = -ENOMEM;
8157 break;
8158 }
8159
8160 refcount_set(&ref->refcount, 1);
8161 ref->buffer = iter->array_buffer->buffer;
8162 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8163 if (IS_ERR(ref->page)) {
8164 ret = PTR_ERR(ref->page);
8165 ref->page = NULL;
8166 kfree(ref);
8167 break;
8168 }
8169 ref->cpu = iter->cpu_file;
8170
8171 r = ring_buffer_read_page(ref->buffer, ref->page,
8172 len, iter->cpu_file, 1);
8173 if (r < 0) {
8174 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8175 ref->page);
8176 kfree(ref);
8177 break;
8178 }
8179
8180 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8181
8182 spd.pages[i] = page;
8183 spd.partial[i].len = page_size;
8184 spd.partial[i].offset = 0;
8185 spd.partial[i].private = (unsigned long)ref;
8186 spd.nr_pages++;
8187 *ppos += page_size;
8188
8189 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8190 }
8191
8192 trace_access_unlock(iter->cpu_file);
8193 spd.nr_pages = i;
8194
8195 /* did we read anything? */
8196 if (!spd.nr_pages) {
8197
8198 if (ret)
8199 goto out;
8200
8201 if (woken)
8202 goto out;
8203
8204 ret = -EAGAIN;
8205 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8206 goto out;
8207
8208 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8209 if (ret)
8210 goto out;
8211
8212 /* No need to wait after waking up when tracing is off */
8213 if (!tracer_tracing_is_on(iter->tr))
8214 goto out;
8215
8216 /* Iterate one more time to collect any new data then exit */
8217 woken = true;
8218
8219 goto again;
8220 }
8221
8222 ret = splice_to_pipe(pipe, &spd);
8223 out:
8224 splice_shrink_spd(&spd);
8225
8226 return ret;
8227 }
8228
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8229 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8230 {
8231 struct ftrace_buffer_info *info = file->private_data;
8232 struct trace_iterator *iter = &info->iter;
8233 int err;
8234
8235 if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8236 if (!(file->f_flags & O_NONBLOCK)) {
8237 err = ring_buffer_wait(iter->array_buffer->buffer,
8238 iter->cpu_file,
8239 iter->tr->buffer_percent,
8240 NULL, NULL);
8241 if (err)
8242 return err;
8243 }
8244
8245 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8246 iter->cpu_file);
8247 } else if (cmd) {
8248 return -ENOTTY;
8249 }
8250
8251 /*
8252 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8253 * waiters
8254 */
8255 guard(mutex)(&trace_types_lock);
8256
8257 /* Make sure the waiters see the new wait_index */
8258 (void)atomic_fetch_inc_release(&iter->wait_index);
8259
8260 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8261
8262 return 0;
8263 }
8264
8265 #ifdef CONFIG_TRACER_SNAPSHOT
get_snapshot_map(struct trace_array * tr)8266 static int get_snapshot_map(struct trace_array *tr)
8267 {
8268 int err = 0;
8269
8270 /*
8271 * Called with mmap_lock held. lockdep would be unhappy if we would now
8272 * take trace_types_lock. Instead use the specific
8273 * snapshot_trigger_lock.
8274 */
8275 spin_lock(&tr->snapshot_trigger_lock);
8276
8277 if (tr->snapshot || tr->mapped == UINT_MAX)
8278 err = -EBUSY;
8279 else
8280 tr->mapped++;
8281
8282 spin_unlock(&tr->snapshot_trigger_lock);
8283
8284 /* Wait for update_max_tr() to observe iter->tr->mapped */
8285 if (tr->mapped == 1)
8286 synchronize_rcu();
8287
8288 return err;
8289
8290 }
put_snapshot_map(struct trace_array * tr)8291 static void put_snapshot_map(struct trace_array *tr)
8292 {
8293 spin_lock(&tr->snapshot_trigger_lock);
8294 if (!WARN_ON(!tr->mapped))
8295 tr->mapped--;
8296 spin_unlock(&tr->snapshot_trigger_lock);
8297 }
8298 #else
get_snapshot_map(struct trace_array * tr)8299 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8300 static inline void put_snapshot_map(struct trace_array *tr) { }
8301 #endif
8302
8303 /*
8304 * This is called when a VMA is duplicated (e.g., on fork()) to increment
8305 * the user_mapped counter without remapping pages.
8306 */
tracing_buffers_mmap_open(struct vm_area_struct * vma)8307 static void tracing_buffers_mmap_open(struct vm_area_struct *vma)
8308 {
8309 struct ftrace_buffer_info *info = vma->vm_file->private_data;
8310 struct trace_iterator *iter = &info->iter;
8311
8312 ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file);
8313 }
8314
tracing_buffers_mmap_close(struct vm_area_struct * vma)8315 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8316 {
8317 struct ftrace_buffer_info *info = vma->vm_file->private_data;
8318 struct trace_iterator *iter = &info->iter;
8319
8320 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8321 put_snapshot_map(iter->tr);
8322 }
8323
tracing_buffers_may_split(struct vm_area_struct * vma,unsigned long addr)8324 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
8325 {
8326 /*
8327 * Trace buffer mappings require the complete buffer including
8328 * the meta page. Partial mappings are not supported.
8329 */
8330 return -EINVAL;
8331 }
8332
8333 static const struct vm_operations_struct tracing_buffers_vmops = {
8334 .open = tracing_buffers_mmap_open,
8335 .close = tracing_buffers_mmap_close,
8336 .may_split = tracing_buffers_may_split,
8337 };
8338
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8339 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8340 {
8341 struct ftrace_buffer_info *info = filp->private_data;
8342 struct trace_iterator *iter = &info->iter;
8343 int ret = 0;
8344
8345 /* A memmap'ed and backup buffers are not supported for user space mmap */
8346 if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
8347 return -ENODEV;
8348
8349 ret = get_snapshot_map(iter->tr);
8350 if (ret)
8351 return ret;
8352
8353 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8354 if (ret)
8355 put_snapshot_map(iter->tr);
8356
8357 vma->vm_ops = &tracing_buffers_vmops;
8358
8359 return ret;
8360 }
8361
8362 static const struct file_operations tracing_buffers_fops = {
8363 .open = tracing_buffers_open,
8364 .read = tracing_buffers_read,
8365 .poll = tracing_buffers_poll,
8366 .release = tracing_buffers_release,
8367 .flush = tracing_buffers_flush,
8368 .splice_read = tracing_buffers_splice_read,
8369 .unlocked_ioctl = tracing_buffers_ioctl,
8370 .mmap = tracing_buffers_mmap,
8371 };
8372
8373 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8374 tracing_stats_read(struct file *filp, char __user *ubuf,
8375 size_t count, loff_t *ppos)
8376 {
8377 struct inode *inode = file_inode(filp);
8378 struct trace_array *tr = inode->i_private;
8379 struct array_buffer *trace_buf = &tr->array_buffer;
8380 int cpu = tracing_get_cpu(inode);
8381 struct trace_seq *s;
8382 unsigned long cnt;
8383 unsigned long long t;
8384 unsigned long usec_rem;
8385
8386 s = kmalloc_obj(*s);
8387 if (!s)
8388 return -ENOMEM;
8389
8390 trace_seq_init(s);
8391
8392 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8393 trace_seq_printf(s, "entries: %ld\n", cnt);
8394
8395 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8396 trace_seq_printf(s, "overrun: %ld\n", cnt);
8397
8398 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8399 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8400
8401 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8402 trace_seq_printf(s, "bytes: %ld\n", cnt);
8403
8404 if (trace_clocks[tr->clock_id].in_ns) {
8405 /* local or global for trace_clock */
8406 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8407 usec_rem = do_div(t, USEC_PER_SEC);
8408 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8409 t, usec_rem);
8410
8411 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8412 usec_rem = do_div(t, USEC_PER_SEC);
8413 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8414 } else {
8415 /* counter or tsc mode for trace_clock */
8416 trace_seq_printf(s, "oldest event ts: %llu\n",
8417 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8418
8419 trace_seq_printf(s, "now ts: %llu\n",
8420 ring_buffer_time_stamp(trace_buf->buffer));
8421 }
8422
8423 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8424 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8425
8426 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8427 trace_seq_printf(s, "read events: %ld\n", cnt);
8428
8429 count = simple_read_from_buffer(ubuf, count, ppos,
8430 s->buffer, trace_seq_used(s));
8431
8432 kfree(s);
8433
8434 return count;
8435 }
8436
8437 static const struct file_operations tracing_stats_fops = {
8438 .open = tracing_open_generic_tr,
8439 .read = tracing_stats_read,
8440 .llseek = generic_file_llseek,
8441 .release = tracing_release_generic_tr,
8442 };
8443
8444 #ifdef CONFIG_DYNAMIC_FTRACE
8445
8446 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8447 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8448 size_t cnt, loff_t *ppos)
8449 {
8450 ssize_t ret;
8451 char *buf;
8452 int r;
8453
8454 /* 512 should be plenty to hold the amount needed */
8455 #define DYN_INFO_BUF_SIZE 512
8456
8457 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8458 if (!buf)
8459 return -ENOMEM;
8460
8461 r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8462 "%ld pages:%ld groups: %ld\n"
8463 "ftrace boot update time = %llu (ns)\n"
8464 "ftrace module total update time = %llu (ns)\n",
8465 ftrace_update_tot_cnt,
8466 ftrace_number_of_pages,
8467 ftrace_number_of_groups,
8468 ftrace_update_time,
8469 ftrace_total_mod_time);
8470
8471 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8472 kfree(buf);
8473 return ret;
8474 }
8475
8476 static const struct file_operations tracing_dyn_info_fops = {
8477 .open = tracing_open_generic,
8478 .read = tracing_read_dyn_info,
8479 .llseek = generic_file_llseek,
8480 };
8481 #endif /* CONFIG_DYNAMIC_FTRACE */
8482
8483 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8484 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8485 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8486 struct trace_array *tr, struct ftrace_probe_ops *ops,
8487 void *data)
8488 {
8489 tracing_snapshot_instance(tr);
8490 }
8491
8492 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8493 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8494 struct trace_array *tr, struct ftrace_probe_ops *ops,
8495 void *data)
8496 {
8497 struct ftrace_func_mapper *mapper = data;
8498 long *count = NULL;
8499
8500 if (mapper)
8501 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8502
8503 if (count) {
8504
8505 if (*count <= 0)
8506 return;
8507
8508 (*count)--;
8509 }
8510
8511 tracing_snapshot_instance(tr);
8512 }
8513
8514 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8515 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8516 struct ftrace_probe_ops *ops, void *data)
8517 {
8518 struct ftrace_func_mapper *mapper = data;
8519 long *count = NULL;
8520
8521 seq_printf(m, "%ps:", (void *)ip);
8522
8523 seq_puts(m, "snapshot");
8524
8525 if (mapper)
8526 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8527
8528 if (count)
8529 seq_printf(m, ":count=%ld\n", *count);
8530 else
8531 seq_puts(m, ":unlimited\n");
8532
8533 return 0;
8534 }
8535
8536 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8537 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8538 unsigned long ip, void *init_data, void **data)
8539 {
8540 struct ftrace_func_mapper *mapper = *data;
8541
8542 if (!mapper) {
8543 mapper = allocate_ftrace_func_mapper();
8544 if (!mapper)
8545 return -ENOMEM;
8546 *data = mapper;
8547 }
8548
8549 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8550 }
8551
8552 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8553 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8554 unsigned long ip, void *data)
8555 {
8556 struct ftrace_func_mapper *mapper = data;
8557
8558 if (!ip) {
8559 if (!mapper)
8560 return;
8561 free_ftrace_func_mapper(mapper, NULL);
8562 return;
8563 }
8564
8565 ftrace_func_mapper_remove_ip(mapper, ip);
8566 }
8567
8568 static struct ftrace_probe_ops snapshot_probe_ops = {
8569 .func = ftrace_snapshot,
8570 .print = ftrace_snapshot_print,
8571 };
8572
8573 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8574 .func = ftrace_count_snapshot,
8575 .print = ftrace_snapshot_print,
8576 .init = ftrace_snapshot_init,
8577 .free = ftrace_snapshot_free,
8578 };
8579
8580 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8581 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8582 char *glob, char *cmd, char *param, int enable)
8583 {
8584 struct ftrace_probe_ops *ops;
8585 void *count = (void *)-1;
8586 char *number;
8587 int ret;
8588
8589 if (!tr)
8590 return -ENODEV;
8591
8592 /* hash funcs only work with set_ftrace_filter */
8593 if (!enable)
8594 return -EINVAL;
8595
8596 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8597
8598 if (glob[0] == '!') {
8599 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8600 if (!ret)
8601 tracing_disarm_snapshot(tr);
8602
8603 return ret;
8604 }
8605
8606 if (!param)
8607 goto out_reg;
8608
8609 number = strsep(¶m, ":");
8610
8611 if (!strlen(number))
8612 goto out_reg;
8613
8614 /*
8615 * We use the callback data field (which is a pointer)
8616 * as our counter.
8617 */
8618 ret = kstrtoul(number, 0, (unsigned long *)&count);
8619 if (ret)
8620 return ret;
8621
8622 out_reg:
8623 ret = tracing_arm_snapshot(tr);
8624 if (ret < 0)
8625 return ret;
8626
8627 ret = register_ftrace_function_probe(glob, tr, ops, count);
8628 if (ret < 0)
8629 tracing_disarm_snapshot(tr);
8630
8631 return ret < 0 ? ret : 0;
8632 }
8633
8634 static struct ftrace_func_command ftrace_snapshot_cmd = {
8635 .name = "snapshot",
8636 .func = ftrace_trace_snapshot_callback,
8637 };
8638
register_snapshot_cmd(void)8639 static __init int register_snapshot_cmd(void)
8640 {
8641 return register_ftrace_command(&ftrace_snapshot_cmd);
8642 }
8643 #else
register_snapshot_cmd(void)8644 static inline __init int register_snapshot_cmd(void) { return 0; }
8645 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8646
tracing_get_dentry(struct trace_array * tr)8647 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8648 {
8649 /* Top directory uses NULL as the parent */
8650 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8651 return NULL;
8652
8653 if (WARN_ON(!tr->dir))
8654 return ERR_PTR(-ENODEV);
8655
8656 /* All sub buffers have a descriptor */
8657 return tr->dir;
8658 }
8659
tracing_dentry_percpu(struct trace_array * tr,int cpu)8660 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8661 {
8662 struct dentry *d_tracer;
8663
8664 if (tr->percpu_dir)
8665 return tr->percpu_dir;
8666
8667 d_tracer = tracing_get_dentry(tr);
8668 if (IS_ERR(d_tracer))
8669 return NULL;
8670
8671 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8672
8673 MEM_FAIL(!tr->percpu_dir,
8674 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8675
8676 return tr->percpu_dir;
8677 }
8678
8679 struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8680 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8681 void *data, long cpu, const struct file_operations *fops)
8682 {
8683 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8684
8685 if (ret) /* See tracing_get_cpu() */
8686 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8687 return ret;
8688 }
8689
8690 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8691 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8692 {
8693 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8694 struct dentry *d_cpu;
8695 char cpu_dir[30]; /* 30 characters should be more than enough */
8696
8697 if (!d_percpu)
8698 return;
8699
8700 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8701 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8702 if (!d_cpu) {
8703 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8704 return;
8705 }
8706
8707 /* per cpu trace_pipe */
8708 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8709 tr, cpu, &tracing_pipe_fops);
8710
8711 /* per cpu trace */
8712 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8713 tr, cpu, &tracing_fops);
8714
8715 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8716 tr, cpu, &tracing_buffers_fops);
8717
8718 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8719 tr, cpu, &tracing_stats_fops);
8720
8721 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu,
8722 tr, cpu, &tracing_entries_fops);
8723
8724 if (tr->range_addr_start)
8725 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8726 tr, cpu, &tracing_buffer_meta_fops);
8727 #ifdef CONFIG_TRACER_SNAPSHOT
8728 if (!tr->range_addr_start) {
8729 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8730 tr, cpu, &snapshot_fops);
8731
8732 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8733 tr, cpu, &snapshot_raw_fops);
8734 }
8735 #endif
8736 }
8737
8738 #ifdef CONFIG_FTRACE_SELFTEST
8739 /* Let selftest have access to static functions in this file */
8740 #include "trace_selftest.c"
8741 #endif
8742
8743 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8744 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8745 loff_t *ppos)
8746 {
8747 struct trace_option_dentry *topt = filp->private_data;
8748 char *buf;
8749
8750 if (topt->flags->val & topt->opt->bit)
8751 buf = "1\n";
8752 else
8753 buf = "0\n";
8754
8755 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8756 }
8757
8758 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8759 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8760 loff_t *ppos)
8761 {
8762 struct trace_option_dentry *topt = filp->private_data;
8763 unsigned long val;
8764 int ret;
8765
8766 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8767 if (ret)
8768 return ret;
8769
8770 if (val != 0 && val != 1)
8771 return -EINVAL;
8772
8773 if (!!(topt->flags->val & topt->opt->bit) != val) {
8774 guard(mutex)(&trace_types_lock);
8775 ret = __set_tracer_option(topt->tr, topt->flags,
8776 topt->opt, !val);
8777 if (ret)
8778 return ret;
8779 }
8780
8781 *ppos += cnt;
8782
8783 return cnt;
8784 }
8785
tracing_open_options(struct inode * inode,struct file * filp)8786 static int tracing_open_options(struct inode *inode, struct file *filp)
8787 {
8788 struct trace_option_dentry *topt = inode->i_private;
8789 int ret;
8790
8791 ret = tracing_check_open_get_tr(topt->tr);
8792 if (ret)
8793 return ret;
8794
8795 filp->private_data = inode->i_private;
8796 return 0;
8797 }
8798
tracing_release_options(struct inode * inode,struct file * file)8799 static int tracing_release_options(struct inode *inode, struct file *file)
8800 {
8801 struct trace_option_dentry *topt = file->private_data;
8802
8803 trace_array_put(topt->tr);
8804 return 0;
8805 }
8806
8807 static const struct file_operations trace_options_fops = {
8808 .open = tracing_open_options,
8809 .read = trace_options_read,
8810 .write = trace_options_write,
8811 .llseek = generic_file_llseek,
8812 .release = tracing_release_options,
8813 };
8814
8815 /*
8816 * In order to pass in both the trace_array descriptor as well as the index
8817 * to the flag that the trace option file represents, the trace_array
8818 * has a character array of trace_flags_index[], which holds the index
8819 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8820 * The address of this character array is passed to the flag option file
8821 * read/write callbacks.
8822 *
8823 * In order to extract both the index and the trace_array descriptor,
8824 * get_tr_index() uses the following algorithm.
8825 *
8826 * idx = *ptr;
8827 *
8828 * As the pointer itself contains the address of the index (remember
8829 * index[1] == 1).
8830 *
8831 * Then to get the trace_array descriptor, by subtracting that index
8832 * from the ptr, we get to the start of the index itself.
8833 *
8834 * ptr - idx == &index[0]
8835 *
8836 * Then a simple container_of() from that pointer gets us to the
8837 * trace_array descriptor.
8838 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8839 static void get_tr_index(void *data, struct trace_array **ptr,
8840 unsigned int *pindex)
8841 {
8842 *pindex = *(unsigned char *)data;
8843
8844 *ptr = container_of(data - *pindex, struct trace_array,
8845 trace_flags_index);
8846 }
8847
8848 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8849 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8850 loff_t *ppos)
8851 {
8852 void *tr_index = filp->private_data;
8853 struct trace_array *tr;
8854 unsigned int index;
8855 char *buf;
8856
8857 get_tr_index(tr_index, &tr, &index);
8858
8859 if (tr->trace_flags & (1ULL << index))
8860 buf = "1\n";
8861 else
8862 buf = "0\n";
8863
8864 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8865 }
8866
8867 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8868 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8869 loff_t *ppos)
8870 {
8871 void *tr_index = filp->private_data;
8872 struct trace_array *tr;
8873 unsigned int index;
8874 unsigned long val;
8875 int ret;
8876
8877 get_tr_index(tr_index, &tr, &index);
8878
8879 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8880 if (ret)
8881 return ret;
8882
8883 if (val != 0 && val != 1)
8884 return -EINVAL;
8885
8886 mutex_lock(&event_mutex);
8887 mutex_lock(&trace_types_lock);
8888 ret = set_tracer_flag(tr, 1ULL << index, val);
8889 mutex_unlock(&trace_types_lock);
8890 mutex_unlock(&event_mutex);
8891
8892 if (ret < 0)
8893 return ret;
8894
8895 *ppos += cnt;
8896
8897 return cnt;
8898 }
8899
8900 static const struct file_operations trace_options_core_fops = {
8901 .open = tracing_open_generic,
8902 .read = trace_options_core_read,
8903 .write = trace_options_core_write,
8904 .llseek = generic_file_llseek,
8905 };
8906
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8907 struct dentry *trace_create_file(const char *name,
8908 umode_t mode,
8909 struct dentry *parent,
8910 void *data,
8911 const struct file_operations *fops)
8912 {
8913 struct dentry *ret;
8914
8915 ret = tracefs_create_file(name, mode, parent, data, fops);
8916 if (!ret)
8917 pr_warn("Could not create tracefs '%s' entry\n", name);
8918
8919 return ret;
8920 }
8921
8922
trace_options_init_dentry(struct trace_array * tr)8923 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8924 {
8925 struct dentry *d_tracer;
8926
8927 if (tr->options)
8928 return tr->options;
8929
8930 d_tracer = tracing_get_dentry(tr);
8931 if (IS_ERR(d_tracer))
8932 return NULL;
8933
8934 tr->options = tracefs_create_dir("options", d_tracer);
8935 if (!tr->options) {
8936 pr_warn("Could not create tracefs directory 'options'\n");
8937 return NULL;
8938 }
8939
8940 return tr->options;
8941 }
8942
8943 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8944 create_trace_option_file(struct trace_array *tr,
8945 struct trace_option_dentry *topt,
8946 struct tracer_flags *flags,
8947 struct tracer_opt *opt)
8948 {
8949 struct dentry *t_options;
8950
8951 t_options = trace_options_init_dentry(tr);
8952 if (!t_options)
8953 return;
8954
8955 topt->flags = flags;
8956 topt->opt = opt;
8957 topt->tr = tr;
8958
8959 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8960 t_options, topt, &trace_options_fops);
8961 }
8962
8963 static int
create_trace_option_files(struct trace_array * tr,struct tracer * tracer,struct tracer_flags * flags)8964 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
8965 struct tracer_flags *flags)
8966 {
8967 struct trace_option_dentry *topts;
8968 struct trace_options *tr_topts;
8969 struct tracer_opt *opts;
8970 int cnt;
8971
8972 if (!flags || !flags->opts)
8973 return 0;
8974
8975 opts = flags->opts;
8976
8977 for (cnt = 0; opts[cnt].name; cnt++)
8978 ;
8979
8980 topts = kzalloc_objs(*topts, cnt + 1);
8981 if (!topts)
8982 return 0;
8983
8984 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8985 GFP_KERNEL);
8986 if (!tr_topts) {
8987 kfree(topts);
8988 return -ENOMEM;
8989 }
8990
8991 tr->topts = tr_topts;
8992 tr->topts[tr->nr_topts].tracer = tracer;
8993 tr->topts[tr->nr_topts].topts = topts;
8994 tr->nr_topts++;
8995
8996 for (cnt = 0; opts[cnt].name; cnt++) {
8997 create_trace_option_file(tr, &topts[cnt], flags,
8998 &opts[cnt]);
8999 MEM_FAIL(topts[cnt].entry == NULL,
9000 "Failed to create trace option: %s",
9001 opts[cnt].name);
9002 }
9003 return 0;
9004 }
9005
get_global_flags_val(struct tracer * tracer)9006 static int get_global_flags_val(struct tracer *tracer)
9007 {
9008 struct tracers *t;
9009
9010 list_for_each_entry(t, &global_trace.tracers, list) {
9011 if (t->tracer != tracer)
9012 continue;
9013 if (!t->flags)
9014 return -1;
9015 return t->flags->val;
9016 }
9017 return -1;
9018 }
9019
add_tracer_options(struct trace_array * tr,struct tracers * t)9020 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
9021 {
9022 struct tracer *tracer = t->tracer;
9023 struct tracer_flags *flags = t->flags ?: tracer->flags;
9024
9025 if (!flags)
9026 return 0;
9027
9028 /* Only add tracer options after update_tracer_options finish */
9029 if (!tracer_options_updated)
9030 return 0;
9031
9032 return create_trace_option_files(tr, tracer, flags);
9033 }
9034
add_tracer(struct trace_array * tr,struct tracer * tracer)9035 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
9036 {
9037 struct tracer_flags *flags;
9038 struct tracers *t;
9039 int ret;
9040
9041 /* Only enable if the directory has been created already. */
9042 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
9043 return 0;
9044
9045 /*
9046 * If this is an instance, only create flags for tracers
9047 * the instance may have.
9048 */
9049 if (!trace_ok_for_array(tracer, tr))
9050 return 0;
9051
9052 t = kmalloc_obj(*t);
9053 if (!t)
9054 return -ENOMEM;
9055
9056 t->tracer = tracer;
9057 t->flags = NULL;
9058 list_add(&t->list, &tr->tracers);
9059
9060 flags = tracer->flags;
9061 if (!flags) {
9062 if (!tracer->default_flags)
9063 return 0;
9064
9065 /*
9066 * If the tracer defines default flags, it means the flags are
9067 * per trace instance.
9068 */
9069 flags = kmalloc_obj(*flags);
9070 if (!flags)
9071 return -ENOMEM;
9072
9073 *flags = *tracer->default_flags;
9074 flags->trace = tracer;
9075
9076 t->flags = flags;
9077
9078 /* If this is an instance, inherit the global_trace flags */
9079 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
9080 int val = get_global_flags_val(tracer);
9081 if (!WARN_ON_ONCE(val < 0))
9082 flags->val = val;
9083 }
9084 }
9085
9086 ret = add_tracer_options(tr, t);
9087 if (ret < 0) {
9088 list_del(&t->list);
9089 kfree(t->flags);
9090 kfree(t);
9091 }
9092
9093 return ret;
9094 }
9095
9096 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9097 create_trace_option_core_file(struct trace_array *tr,
9098 const char *option, long index)
9099 {
9100 struct dentry *t_options;
9101
9102 t_options = trace_options_init_dentry(tr);
9103 if (!t_options)
9104 return NULL;
9105
9106 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9107 (void *)&tr->trace_flags_index[index],
9108 &trace_options_core_fops);
9109 }
9110
create_trace_options_dir(struct trace_array * tr)9111 static void create_trace_options_dir(struct trace_array *tr)
9112 {
9113 struct dentry *t_options;
9114 bool top_level = tr == &global_trace;
9115 int i;
9116
9117 t_options = trace_options_init_dentry(tr);
9118 if (!t_options)
9119 return;
9120
9121 for (i = 0; trace_options[i]; i++) {
9122 if (top_level ||
9123 !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
9124 create_trace_option_core_file(tr, trace_options[i], i);
9125 }
9126 }
9127 }
9128
9129 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9130 rb_simple_read(struct file *filp, char __user *ubuf,
9131 size_t cnt, loff_t *ppos)
9132 {
9133 struct trace_array *tr = filp->private_data;
9134 char buf[64];
9135 int r;
9136
9137 r = tracer_tracing_is_on(tr);
9138 r = sprintf(buf, "%d\n", r);
9139
9140 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9141 }
9142
9143 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9144 rb_simple_write(struct file *filp, const char __user *ubuf,
9145 size_t cnt, loff_t *ppos)
9146 {
9147 struct trace_array *tr = filp->private_data;
9148 struct trace_buffer *buffer = tr->array_buffer.buffer;
9149 unsigned long val;
9150 int ret;
9151
9152 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9153 if (ret)
9154 return ret;
9155
9156 if (buffer) {
9157 guard(mutex)(&trace_types_lock);
9158 if (!!val == tracer_tracing_is_on(tr)) {
9159 val = 0; /* do nothing */
9160 } else if (val) {
9161 tracer_tracing_on(tr);
9162 if (tr->current_trace->start)
9163 tr->current_trace->start(tr);
9164 } else {
9165 tracer_tracing_off(tr);
9166 if (tr->current_trace->stop)
9167 tr->current_trace->stop(tr);
9168 /* Wake up any waiters */
9169 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9170 }
9171 }
9172
9173 (*ppos)++;
9174
9175 return cnt;
9176 }
9177
9178 static const struct file_operations rb_simple_fops = {
9179 .open = tracing_open_generic_tr,
9180 .read = rb_simple_read,
9181 .write = rb_simple_write,
9182 .release = tracing_release_generic_tr,
9183 .llseek = default_llseek,
9184 };
9185
9186 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9187 buffer_percent_read(struct file *filp, char __user *ubuf,
9188 size_t cnt, loff_t *ppos)
9189 {
9190 struct trace_array *tr = filp->private_data;
9191 char buf[64];
9192 int r;
9193
9194 r = tr->buffer_percent;
9195 r = sprintf(buf, "%d\n", r);
9196
9197 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9198 }
9199
9200 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9201 buffer_percent_write(struct file *filp, const char __user *ubuf,
9202 size_t cnt, loff_t *ppos)
9203 {
9204 struct trace_array *tr = filp->private_data;
9205 unsigned long val;
9206 int ret;
9207
9208 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9209 if (ret)
9210 return ret;
9211
9212 if (val > 100)
9213 return -EINVAL;
9214
9215 tr->buffer_percent = val;
9216
9217 (*ppos)++;
9218
9219 return cnt;
9220 }
9221
9222 static const struct file_operations buffer_percent_fops = {
9223 .open = tracing_open_generic_tr,
9224 .read = buffer_percent_read,
9225 .write = buffer_percent_write,
9226 .release = tracing_release_generic_tr,
9227 .llseek = default_llseek,
9228 };
9229
9230 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9231 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9232 {
9233 struct trace_array *tr = filp->private_data;
9234 size_t size;
9235 char buf[64];
9236 int order;
9237 int r;
9238
9239 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9240 size = (PAGE_SIZE << order) / 1024;
9241
9242 r = sprintf(buf, "%zd\n", size);
9243
9244 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9245 }
9246
9247 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9248 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9249 size_t cnt, loff_t *ppos)
9250 {
9251 struct trace_array *tr = filp->private_data;
9252 unsigned long val;
9253 int old_order;
9254 int order;
9255 int pages;
9256 int ret;
9257
9258 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9259 if (ret)
9260 return ret;
9261
9262 val *= 1024; /* value passed in is in KB */
9263
9264 pages = DIV_ROUND_UP(val, PAGE_SIZE);
9265 order = fls(pages - 1);
9266
9267 /* limit between 1 and 128 system pages */
9268 if (order < 0 || order > 7)
9269 return -EINVAL;
9270
9271 /* Do not allow tracing while changing the order of the ring buffer */
9272 tracing_stop_tr(tr);
9273
9274 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9275 if (old_order == order)
9276 goto out;
9277
9278 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9279 if (ret)
9280 goto out;
9281
9282 #ifdef CONFIG_TRACER_SNAPSHOT
9283
9284 if (!tr->allocated_snapshot)
9285 goto out_max;
9286
9287 ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
9288 if (ret) {
9289 /* Put back the old order */
9290 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9291 if (WARN_ON_ONCE(cnt)) {
9292 /*
9293 * AARGH! We are left with different orders!
9294 * The max buffer is our "snapshot" buffer.
9295 * When a tracer needs a snapshot (one of the
9296 * latency tracers), it swaps the max buffer
9297 * with the saved snap shot. We succeeded to
9298 * update the order of the main buffer, but failed to
9299 * update the order of the max buffer. But when we tried
9300 * to reset the main buffer to the original size, we
9301 * failed there too. This is very unlikely to
9302 * happen, but if it does, warn and kill all
9303 * tracing.
9304 */
9305 tracing_disabled = 1;
9306 }
9307 goto out;
9308 }
9309 out_max:
9310 #endif
9311 (*ppos)++;
9312 out:
9313 if (ret)
9314 cnt = ret;
9315 tracing_start_tr(tr);
9316 return cnt;
9317 }
9318
9319 static const struct file_operations buffer_subbuf_size_fops = {
9320 .open = tracing_open_generic_tr,
9321 .read = buffer_subbuf_size_read,
9322 .write = buffer_subbuf_size_write,
9323 .release = tracing_release_generic_tr,
9324 .llseek = default_llseek,
9325 };
9326
9327 static struct dentry *trace_instance_dir;
9328
9329 static void
9330 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9331
9332 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9333 static int make_mod_delta(struct module *mod, void *data)
9334 {
9335 struct trace_module_delta *module_delta;
9336 struct trace_scratch *tscratch;
9337 struct trace_mod_entry *entry;
9338 struct trace_array *tr = data;
9339 int i;
9340
9341 tscratch = tr->scratch;
9342 module_delta = READ_ONCE(tr->module_delta);
9343 for (i = 0; i < tscratch->nr_entries; i++) {
9344 entry = &tscratch->entries[i];
9345 if (strcmp(mod->name, entry->mod_name))
9346 continue;
9347 if (mod->state == MODULE_STATE_GOING)
9348 module_delta->delta[i] = 0;
9349 else
9350 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9351 - entry->mod_addr;
9352 break;
9353 }
9354 return 0;
9355 }
9356 #else
make_mod_delta(struct module * mod,void * data)9357 static int make_mod_delta(struct module *mod, void *data)
9358 {
9359 return 0;
9360 }
9361 #endif
9362
mod_addr_comp(const void * a,const void * b,const void * data)9363 static int mod_addr_comp(const void *a, const void *b, const void *data)
9364 {
9365 const struct trace_mod_entry *e1 = a;
9366 const struct trace_mod_entry *e2 = b;
9367
9368 return e1->mod_addr > e2->mod_addr ? 1 : -1;
9369 }
9370
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9371 static void setup_trace_scratch(struct trace_array *tr,
9372 struct trace_scratch *tscratch, unsigned int size)
9373 {
9374 struct trace_module_delta *module_delta;
9375 struct trace_mod_entry *entry;
9376 int i, nr_entries;
9377
9378 if (!tscratch)
9379 return;
9380
9381 tr->scratch = tscratch;
9382 tr->scratch_size = size;
9383
9384 if (tscratch->text_addr)
9385 tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9386
9387 if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9388 goto reset;
9389
9390 /* Check if each module name is a valid string */
9391 for (i = 0; i < tscratch->nr_entries; i++) {
9392 int n;
9393
9394 entry = &tscratch->entries[i];
9395
9396 for (n = 0; n < MODULE_NAME_LEN; n++) {
9397 if (entry->mod_name[n] == '\0')
9398 break;
9399 if (!isprint(entry->mod_name[n]))
9400 goto reset;
9401 }
9402 if (n == MODULE_NAME_LEN)
9403 goto reset;
9404 }
9405
9406 /* Sort the entries so that we can find appropriate module from address. */
9407 nr_entries = tscratch->nr_entries;
9408 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9409 mod_addr_comp, NULL, NULL);
9410
9411 if (IS_ENABLED(CONFIG_MODULES)) {
9412 module_delta = kzalloc_flex(*module_delta, delta, nr_entries);
9413 if (!module_delta) {
9414 pr_info("module_delta allocation failed. Not able to decode module address.");
9415 goto reset;
9416 }
9417 init_rcu_head(&module_delta->rcu);
9418 } else
9419 module_delta = NULL;
9420 WRITE_ONCE(tr->module_delta, module_delta);
9421
9422 /* Scan modules to make text delta for modules. */
9423 module_for_each_mod(make_mod_delta, tr);
9424
9425 /* Set trace_clock as the same of the previous boot. */
9426 if (tscratch->clock_id != tr->clock_id) {
9427 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9428 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9429 pr_info("the previous trace_clock info is not valid.");
9430 goto reset;
9431 }
9432 }
9433 return;
9434 reset:
9435 /* Invalid trace modules */
9436 memset(tscratch, 0, size);
9437 }
9438
9439 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,unsigned long size)9440 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, unsigned long size)
9441 {
9442 enum ring_buffer_flags rb_flags;
9443 struct trace_scratch *tscratch;
9444 unsigned int scratch_size = 0;
9445
9446 rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
9447
9448 buf->tr = tr;
9449
9450 if (tr->range_addr_start && tr->range_addr_size) {
9451 /* Add scratch buffer to handle 128 modules */
9452 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9453 tr->range_addr_start,
9454 tr->range_addr_size,
9455 struct_size(tscratch, entries, 128));
9456
9457 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9458 setup_trace_scratch(tr, tscratch, scratch_size);
9459
9460 /*
9461 * This is basically the same as a mapped buffer,
9462 * with the same restrictions.
9463 */
9464 tr->mapped++;
9465 } else {
9466 buf->buffer = ring_buffer_alloc(size, rb_flags);
9467 }
9468 if (!buf->buffer)
9469 return -ENOMEM;
9470
9471 buf->data = alloc_percpu(struct trace_array_cpu);
9472 if (!buf->data) {
9473 ring_buffer_free(buf->buffer);
9474 buf->buffer = NULL;
9475 return -ENOMEM;
9476 }
9477
9478 /* Allocate the first page for all buffers */
9479 set_buffer_entries(&tr->array_buffer,
9480 ring_buffer_size(tr->array_buffer.buffer, 0));
9481
9482 return 0;
9483 }
9484
free_trace_buffer(struct array_buffer * buf)9485 static void free_trace_buffer(struct array_buffer *buf)
9486 {
9487 if (buf->buffer) {
9488 ring_buffer_free(buf->buffer);
9489 buf->buffer = NULL;
9490 free_percpu(buf->data);
9491 buf->data = NULL;
9492 }
9493 }
9494
allocate_trace_buffers(struct trace_array * tr,unsigned long size)9495 static int allocate_trace_buffers(struct trace_array *tr, unsigned long size)
9496 {
9497 int ret;
9498
9499 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9500 if (ret)
9501 return ret;
9502
9503 #ifdef CONFIG_TRACER_SNAPSHOT
9504 /* Fix mapped buffer trace arrays do not have snapshot buffers */
9505 if (tr->range_addr_start)
9506 return 0;
9507
9508 ret = allocate_trace_buffer(tr, &tr->snapshot_buffer,
9509 allocate_snapshot ? size : 1);
9510 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9511 free_trace_buffer(&tr->array_buffer);
9512 return -ENOMEM;
9513 }
9514 tr->allocated_snapshot = allocate_snapshot;
9515
9516 allocate_snapshot = false;
9517 #endif
9518
9519 return 0;
9520 }
9521
free_trace_buffers(struct trace_array * tr)9522 static void free_trace_buffers(struct trace_array *tr)
9523 {
9524 if (!tr)
9525 return;
9526
9527 free_trace_buffer(&tr->array_buffer);
9528 kfree(tr->module_delta);
9529
9530 #ifdef CONFIG_TRACER_SNAPSHOT
9531 free_trace_buffer(&tr->snapshot_buffer);
9532 #endif
9533 }
9534
init_trace_flags_index(struct trace_array * tr)9535 static void init_trace_flags_index(struct trace_array *tr)
9536 {
9537 int i;
9538
9539 /* Used by the trace options files */
9540 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9541 tr->trace_flags_index[i] = i;
9542 }
9543
__update_tracer(struct trace_array * tr)9544 static int __update_tracer(struct trace_array *tr)
9545 {
9546 struct tracer *t;
9547 int ret = 0;
9548
9549 for (t = trace_types; t && !ret; t = t->next)
9550 ret = add_tracer(tr, t);
9551
9552 return ret;
9553 }
9554
__update_tracer_options(struct trace_array * tr)9555 static __init int __update_tracer_options(struct trace_array *tr)
9556 {
9557 struct tracers *t;
9558 int ret = 0;
9559
9560 list_for_each_entry(t, &tr->tracers, list) {
9561 ret = add_tracer_options(tr, t);
9562 if (ret < 0)
9563 break;
9564 }
9565
9566 return ret;
9567 }
9568
update_tracer_options(void)9569 static __init void update_tracer_options(void)
9570 {
9571 struct trace_array *tr;
9572
9573 guard(mutex)(&trace_types_lock);
9574 tracer_options_updated = true;
9575 list_for_each_entry(tr, &ftrace_trace_arrays, list)
9576 __update_tracer_options(tr);
9577 }
9578
9579 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9580 struct trace_array *trace_array_find(const char *instance)
9581 {
9582 struct trace_array *tr, *found = NULL;
9583
9584 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9585 if (tr->name && strcmp(tr->name, instance) == 0) {
9586 found = tr;
9587 break;
9588 }
9589 }
9590
9591 return found;
9592 }
9593
trace_array_find_get(const char * instance)9594 struct trace_array *trace_array_find_get(const char *instance)
9595 {
9596 struct trace_array *tr;
9597
9598 guard(mutex)(&trace_types_lock);
9599 tr = trace_array_find(instance);
9600 if (tr && __trace_array_get(tr) < 0)
9601 tr = NULL;
9602
9603 return tr;
9604 }
9605
trace_array_create_dir(struct trace_array * tr)9606 static int trace_array_create_dir(struct trace_array *tr)
9607 {
9608 int ret;
9609
9610 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9611 if (!tr->dir)
9612 return -EINVAL;
9613
9614 ret = event_trace_add_tracer(tr->dir, tr);
9615 if (ret) {
9616 tracefs_remove(tr->dir);
9617 return ret;
9618 }
9619
9620 init_tracer_tracefs(tr, tr->dir);
9621 ret = __update_tracer(tr);
9622 if (ret) {
9623 event_trace_del_tracer(tr);
9624 tracefs_remove(tr->dir);
9625 return ret;
9626 }
9627 return 0;
9628 }
9629
9630 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9631 trace_array_create_systems(const char *name, const char *systems,
9632 unsigned long range_addr_start,
9633 unsigned long range_addr_size)
9634 {
9635 struct trace_array *tr;
9636 int ret;
9637
9638 ret = -ENOMEM;
9639 tr = kzalloc_obj(*tr);
9640 if (!tr)
9641 return ERR_PTR(ret);
9642
9643 tr->name = kstrdup(name, GFP_KERNEL);
9644 if (!tr->name)
9645 goto out_free_tr;
9646
9647 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9648 goto out_free_tr;
9649
9650 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9651 goto out_free_tr;
9652
9653 if (systems) {
9654 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9655 if (!tr->system_names)
9656 goto out_free_tr;
9657 }
9658
9659 /* Only for boot up memory mapped ring buffers */
9660 tr->range_addr_start = range_addr_start;
9661 tr->range_addr_size = range_addr_size;
9662
9663 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9664
9665 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9666
9667 raw_spin_lock_init(&tr->start_lock);
9668
9669 tr->syscall_buf_sz = global_trace.syscall_buf_sz;
9670
9671 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9672 #ifdef CONFIG_TRACER_SNAPSHOT
9673 spin_lock_init(&tr->snapshot_trigger_lock);
9674 #endif
9675 tr->current_trace = &nop_trace;
9676 tr->current_trace_flags = nop_trace.flags;
9677
9678 INIT_LIST_HEAD(&tr->systems);
9679 INIT_LIST_HEAD(&tr->events);
9680 INIT_LIST_HEAD(&tr->hist_vars);
9681 INIT_LIST_HEAD(&tr->err_log);
9682 INIT_LIST_HEAD(&tr->tracers);
9683 INIT_LIST_HEAD(&tr->marker_list);
9684
9685 #ifdef CONFIG_MODULES
9686 INIT_LIST_HEAD(&tr->mod_events);
9687 #endif
9688
9689 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9690 goto out_free_tr;
9691
9692 /* The ring buffer is defaultly expanded */
9693 trace_set_ring_buffer_expanded(tr);
9694
9695 if (ftrace_allocate_ftrace_ops(tr) < 0)
9696 goto out_free_tr;
9697
9698 trace_array_init_autoremove(tr);
9699
9700 ftrace_init_trace_array(tr);
9701
9702 init_trace_flags_index(tr);
9703
9704 if (trace_instance_dir) {
9705 ret = trace_array_create_dir(tr);
9706 if (ret)
9707 goto out_free_tr;
9708 } else
9709 __trace_early_add_events(tr);
9710
9711 list_add(&tr->list, &ftrace_trace_arrays);
9712
9713 tr->ref++;
9714
9715 return tr;
9716
9717 out_free_tr:
9718 ftrace_free_ftrace_ops(tr);
9719 free_trace_buffers(tr);
9720 free_cpumask_var(tr->pipe_cpumask);
9721 free_cpumask_var(tr->tracing_cpumask);
9722 kfree_const(tr->system_names);
9723 kfree(tr->range_name);
9724 kfree(tr->name);
9725 kfree(tr);
9726
9727 return ERR_PTR(ret);
9728 }
9729
trace_array_create(const char * name)9730 static struct trace_array *trace_array_create(const char *name)
9731 {
9732 return trace_array_create_systems(name, NULL, 0, 0);
9733 }
9734
instance_mkdir(const char * name)9735 static int instance_mkdir(const char *name)
9736 {
9737 struct trace_array *tr;
9738 int ret;
9739
9740 guard(mutex)(&event_mutex);
9741 guard(mutex)(&trace_types_lock);
9742
9743 ret = -EEXIST;
9744 if (trace_array_find(name))
9745 return -EEXIST;
9746
9747 tr = trace_array_create(name);
9748
9749 ret = PTR_ERR_OR_ZERO(tr);
9750
9751 return ret;
9752 }
9753
9754 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)9755 static u64 map_pages(unsigned long start, unsigned long size)
9756 {
9757 unsigned long vmap_start, vmap_end;
9758 struct vm_struct *area;
9759 int ret;
9760
9761 area = get_vm_area(size, VM_IOREMAP);
9762 if (!area)
9763 return 0;
9764
9765 vmap_start = (unsigned long) area->addr;
9766 vmap_end = vmap_start + size;
9767
9768 ret = vmap_page_range(vmap_start, vmap_end,
9769 start, pgprot_nx(PAGE_KERNEL));
9770 if (ret < 0) {
9771 free_vm_area(area);
9772 return 0;
9773 }
9774
9775 return (u64)vmap_start;
9776 }
9777 #else
map_pages(unsigned long start,unsigned long size)9778 static inline u64 map_pages(unsigned long start, unsigned long size)
9779 {
9780 return 0;
9781 }
9782 #endif
9783
9784 /**
9785 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9786 * @name: The name of the trace array to be looked up/created.
9787 * @systems: A list of systems to create event directories for (NULL for all)
9788 *
9789 * Returns pointer to trace array with given name.
9790 * NULL, if it cannot be created.
9791 *
9792 * NOTE: This function increments the reference counter associated with the
9793 * trace array returned. This makes sure it cannot be freed while in use.
9794 * Use trace_array_put() once the trace array is no longer needed.
9795 * If the trace_array is to be freed, trace_array_destroy() needs to
9796 * be called after the trace_array_put(), or simply let user space delete
9797 * it from the tracefs instances directory. But until the
9798 * trace_array_put() is called, user space can not delete it.
9799 *
9800 */
trace_array_get_by_name(const char * name,const char * systems)9801 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9802 {
9803 struct trace_array *tr;
9804
9805 guard(mutex)(&event_mutex);
9806 guard(mutex)(&trace_types_lock);
9807
9808 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9809 if (tr->name && strcmp(tr->name, name) == 0) {
9810 /* if this fails, @tr is going to be removed. */
9811 if (__trace_array_get(tr) < 0)
9812 tr = NULL;
9813 return tr;
9814 }
9815 }
9816
9817 tr = trace_array_create_systems(name, systems, 0, 0);
9818
9819 if (IS_ERR(tr))
9820 tr = NULL;
9821 else
9822 tr->ref++;
9823
9824 return tr;
9825 }
9826 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9827
__remove_instance(struct trace_array * tr)9828 static int __remove_instance(struct trace_array *tr)
9829 {
9830 int i;
9831
9832 /* Reference counter for a newly created trace array = 1. */
9833 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9834 return -EBUSY;
9835
9836 list_del(&tr->list);
9837
9838 if (printk_trace == tr)
9839 update_printk_trace(&global_trace);
9840
9841 /* Must be done before disabling all the flags */
9842 if (update_marker_trace(tr, 0))
9843 synchronize_rcu();
9844
9845 /* Disable all the flags that were enabled coming in */
9846 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9847 if ((1ULL << i) & ZEROED_TRACE_FLAGS)
9848 set_tracer_flag(tr, 1ULL << i, 0);
9849 }
9850
9851 trace_array_cancel_autoremove(tr);
9852 tracing_set_nop(tr);
9853 clear_ftrace_function_probes(tr);
9854 event_trace_del_tracer(tr);
9855 ftrace_clear_pids(tr);
9856 ftrace_destroy_function_files(tr);
9857 tracefs_remove(tr->dir);
9858 free_percpu(tr->last_func_repeats);
9859 free_trace_buffers(tr);
9860 clear_tracing_err_log(tr);
9861 free_tracers(tr);
9862
9863 if (tr->range_name) {
9864 reserve_mem_release_by_name(tr->range_name);
9865 kfree(tr->range_name);
9866 }
9867 if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
9868 vfree((void *)tr->range_addr_start);
9869
9870 for (i = 0; i < tr->nr_topts; i++) {
9871 kfree(tr->topts[i].topts);
9872 }
9873 kfree(tr->topts);
9874
9875 free_cpumask_var(tr->pipe_cpumask);
9876 free_cpumask_var(tr->tracing_cpumask);
9877 kfree_const(tr->system_names);
9878 kfree(tr->name);
9879 kfree(tr);
9880
9881 return 0;
9882 }
9883
trace_array_destroy(struct trace_array * this_tr)9884 int trace_array_destroy(struct trace_array *this_tr)
9885 {
9886 struct trace_array *tr;
9887
9888 if (!this_tr)
9889 return -EINVAL;
9890
9891 guard(mutex)(&event_mutex);
9892 guard(mutex)(&trace_types_lock);
9893
9894
9895 /* Making sure trace array exists before destroying it. */
9896 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9897 if (tr == this_tr)
9898 return __remove_instance(tr);
9899 }
9900
9901 return -ENODEV;
9902 }
9903 EXPORT_SYMBOL_GPL(trace_array_destroy);
9904
instance_rmdir(const char * name)9905 static int instance_rmdir(const char *name)
9906 {
9907 struct trace_array *tr;
9908
9909 guard(mutex)(&event_mutex);
9910 guard(mutex)(&trace_types_lock);
9911
9912 tr = trace_array_find(name);
9913 if (!tr)
9914 return -ENODEV;
9915
9916 return __remove_instance(tr);
9917 }
9918
create_trace_instances(struct dentry * d_tracer)9919 static __init void create_trace_instances(struct dentry *d_tracer)
9920 {
9921 struct trace_array *tr;
9922
9923 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9924 instance_mkdir,
9925 instance_rmdir);
9926 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9927 return;
9928
9929 guard(mutex)(&event_mutex);
9930 guard(mutex)(&trace_types_lock);
9931
9932 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9933 if (!tr->name)
9934 continue;
9935 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9936 "Failed to create instance directory\n"))
9937 return;
9938 }
9939 }
9940
9941 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9942 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9943 {
9944 umode_t writable_mode = TRACE_MODE_WRITE;
9945 int cpu;
9946
9947 if (trace_array_is_readonly(tr))
9948 writable_mode = TRACE_MODE_READ;
9949
9950 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9951 tr, &show_traces_fops);
9952
9953 trace_create_file("current_tracer", writable_mode, d_tracer,
9954 tr, &set_tracer_fops);
9955
9956 trace_create_file("tracing_cpumask", writable_mode, d_tracer,
9957 tr, &tracing_cpumask_fops);
9958
9959 /* Options are used for changing print-format even for readonly instance. */
9960 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9961 tr, &tracing_iter_fops);
9962
9963 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9964 tr, &tracing_fops);
9965
9966 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9967 tr, &tracing_pipe_fops);
9968
9969 trace_create_file("buffer_size_kb", writable_mode, d_tracer,
9970 tr, &tracing_entries_fops);
9971
9972 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9973 tr, &tracing_total_entries_fops);
9974
9975 trace_create_file("trace_clock", writable_mode, d_tracer, tr,
9976 &trace_clock_fops);
9977
9978 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9979 &trace_time_stamp_mode_fops);
9980
9981 tr->buffer_percent = 50;
9982
9983 trace_create_file("buffer_subbuf_size_kb", writable_mode, d_tracer,
9984 tr, &buffer_subbuf_size_fops);
9985
9986 create_trace_options_dir(tr);
9987
9988 if (tr->range_addr_start)
9989 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9990 tr, &last_boot_fops);
9991
9992 for_each_tracing_cpu(cpu)
9993 tracing_init_tracefs_percpu(tr, cpu);
9994
9995 /* Read-only instance has above files only. */
9996 if (trace_array_is_readonly(tr))
9997 return;
9998
9999 trace_create_file("free_buffer", 0200, d_tracer,
10000 tr, &tracing_free_buffer_fops);
10001
10002 trace_create_file("trace_marker", 0220, d_tracer,
10003 tr, &tracing_mark_fops);
10004
10005 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10006
10007 trace_create_file("trace_marker_raw", 0220, d_tracer,
10008 tr, &tracing_mark_raw_fops);
10009
10010 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10011 tr, &buffer_percent_fops);
10012
10013 trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
10014 tr, &tracing_syscall_buf_fops);
10015
10016 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10017 tr, &rb_simple_fops);
10018
10019 trace_create_maxlat_file(tr, d_tracer);
10020
10021 if (ftrace_create_function_files(tr, d_tracer))
10022 MEM_FAIL(1, "Could not allocate function filter files");
10023
10024 #ifdef CONFIG_TRACER_SNAPSHOT
10025 if (!tr->range_addr_start)
10026 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10027 tr, &snapshot_fops);
10028 #endif
10029
10030 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10031 tr, &tracing_err_log_fops);
10032
10033 ftrace_init_tracefs(tr, d_tracer);
10034 }
10035
10036 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10037 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10038 {
10039 struct vfsmount *mnt;
10040 struct file_system_type *type;
10041 struct fs_context *fc;
10042 int ret;
10043
10044 /*
10045 * To maintain backward compatibility for tools that mount
10046 * debugfs to get to the tracing facility, tracefs is automatically
10047 * mounted to the debugfs/tracing directory.
10048 */
10049 type = get_fs_type("tracefs");
10050 if (!type)
10051 return NULL;
10052
10053 fc = fs_context_for_submount(type, mntpt);
10054 put_filesystem(type);
10055 if (IS_ERR(fc))
10056 return ERR_CAST(fc);
10057
10058 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10059
10060 ret = vfs_parse_fs_string(fc, "source", "tracefs");
10061 if (!ret)
10062 mnt = fc_mount(fc);
10063 else
10064 mnt = ERR_PTR(ret);
10065
10066 put_fs_context(fc);
10067 return mnt;
10068 }
10069 #endif
10070
10071 /**
10072 * tracing_init_dentry - initialize top level trace array
10073 *
10074 * This is called when creating files or directories in the tracing
10075 * directory. It is called via fs_initcall() by any of the boot up code
10076 * and expects to return the dentry of the top level tracing directory.
10077 */
tracing_init_dentry(void)10078 int tracing_init_dentry(void)
10079 {
10080 struct trace_array *tr = &global_trace;
10081
10082 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10083 pr_warn("Tracing disabled due to lockdown\n");
10084 return -EPERM;
10085 }
10086
10087 /* The top level trace array uses NULL as parent */
10088 if (tr->dir)
10089 return 0;
10090
10091 if (WARN_ON(!tracefs_initialized()))
10092 return -ENODEV;
10093
10094 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10095 /*
10096 * As there may still be users that expect the tracing
10097 * files to exist in debugfs/tracing, we must automount
10098 * the tracefs file system there, so older tools still
10099 * work with the newer kernel.
10100 */
10101 tr->dir = debugfs_create_automount("tracing", NULL,
10102 trace_automount, NULL);
10103 #endif
10104
10105 return 0;
10106 }
10107
10108 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10109 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10110
10111 struct workqueue_struct *trace_init_wq __initdata;
10112 static struct work_struct eval_map_work __initdata;
10113 static struct work_struct tracerfs_init_work __initdata;
10114
eval_map_work_func(struct work_struct * work)10115 static void __init eval_map_work_func(struct work_struct *work)
10116 {
10117 int len;
10118
10119 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10120 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10121 }
10122
trace_eval_init(void)10123 static int __init trace_eval_init(void)
10124 {
10125 INIT_WORK(&eval_map_work, eval_map_work_func);
10126
10127 trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0);
10128 if (!trace_init_wq) {
10129 pr_err("Unable to allocate trace_init_wq\n");
10130 /* Do work here */
10131 eval_map_work_func(&eval_map_work);
10132 return -ENOMEM;
10133 }
10134
10135 queue_work(trace_init_wq, &eval_map_work);
10136 return 0;
10137 }
10138
10139 subsys_initcall(trace_eval_init);
10140
trace_eval_sync(void)10141 static int __init trace_eval_sync(void)
10142 {
10143 /* Make sure the eval map updates are finished */
10144 if (trace_init_wq)
10145 destroy_workqueue(trace_init_wq);
10146 return 0;
10147 }
10148
10149 late_initcall_sync(trace_eval_sync);
10150
10151
10152 #ifdef CONFIG_MODULES
10153
module_exists(const char * module)10154 bool module_exists(const char *module)
10155 {
10156 /* All modules have the symbol __this_module */
10157 static const char this_mod[] = "__this_module";
10158 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10159 unsigned long val;
10160 int n;
10161
10162 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10163
10164 if (n > sizeof(modname) - 1)
10165 return false;
10166
10167 val = module_kallsyms_lookup_name(modname);
10168 return val != 0;
10169 }
10170
trace_module_add_evals(struct module * mod)10171 static void trace_module_add_evals(struct module *mod)
10172 {
10173 /*
10174 * Modules with bad taint do not have events created, do
10175 * not bother with enums either.
10176 */
10177 if (trace_module_has_bad_taint(mod))
10178 return;
10179
10180 /* Even if no trace_evals, this need to sanitize field types. */
10181 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10182 }
10183
10184 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10185 static void trace_module_remove_evals(struct module *mod)
10186 {
10187 union trace_eval_map_item *map;
10188 union trace_eval_map_item **last = &trace_eval_maps;
10189
10190 if (!mod->num_trace_evals)
10191 return;
10192
10193 guard(mutex)(&trace_eval_mutex);
10194
10195 map = trace_eval_maps;
10196
10197 while (map) {
10198 if (map->head.mod == mod)
10199 break;
10200 map = trace_eval_jmp_to_tail(map);
10201 last = &map->tail.next;
10202 map = map->tail.next;
10203 }
10204 if (!map)
10205 return;
10206
10207 *last = trace_eval_jmp_to_tail(map)->tail.next;
10208 kfree(map);
10209 }
10210 #else
trace_module_remove_evals(struct module * mod)10211 static inline void trace_module_remove_evals(struct module *mod) { }
10212 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10213
trace_module_record(struct module * mod,bool add)10214 static void trace_module_record(struct module *mod, bool add)
10215 {
10216 struct trace_array *tr;
10217 unsigned long flags;
10218
10219 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10220 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10221 /* Update any persistent trace array that has already been started */
10222 if (flags == TRACE_ARRAY_FL_BOOT && add) {
10223 guard(mutex)(&scratch_mutex);
10224 save_mod(mod, tr);
10225 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10226 /* Update delta if the module loaded in previous boot */
10227 make_mod_delta(mod, tr);
10228 }
10229 }
10230 }
10231
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10232 static int trace_module_notify(struct notifier_block *self,
10233 unsigned long val, void *data)
10234 {
10235 struct module *mod = data;
10236
10237 switch (val) {
10238 case MODULE_STATE_COMING:
10239 trace_module_add_evals(mod);
10240 trace_module_record(mod, true);
10241 break;
10242 case MODULE_STATE_GOING:
10243 trace_module_remove_evals(mod);
10244 trace_module_record(mod, false);
10245 break;
10246 }
10247
10248 return NOTIFY_OK;
10249 }
10250
10251 static struct notifier_block trace_module_nb = {
10252 .notifier_call = trace_module_notify,
10253 .priority = 0,
10254 };
10255 #endif /* CONFIG_MODULES */
10256
tracer_init_tracefs_work_func(struct work_struct * work)10257 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10258 {
10259
10260 event_trace_init();
10261
10262 init_tracer_tracefs(&global_trace, NULL);
10263 ftrace_init_tracefs_toplevel(&global_trace, NULL);
10264
10265 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10266 &global_trace, &tracing_thresh_fops);
10267
10268 trace_create_file("README", TRACE_MODE_READ, NULL,
10269 NULL, &tracing_readme_fops);
10270
10271 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10272 NULL, &tracing_saved_cmdlines_fops);
10273
10274 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10275 NULL, &tracing_saved_cmdlines_size_fops);
10276
10277 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10278 NULL, &tracing_saved_tgids_fops);
10279
10280 trace_create_eval_file(NULL);
10281
10282 #ifdef CONFIG_MODULES
10283 register_module_notifier(&trace_module_nb);
10284 #endif
10285
10286 #ifdef CONFIG_DYNAMIC_FTRACE
10287 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10288 NULL, &tracing_dyn_info_fops);
10289 #endif
10290
10291 create_trace_instances(NULL);
10292
10293 update_tracer_options();
10294 }
10295
tracer_init_tracefs(void)10296 static __init int tracer_init_tracefs(void)
10297 {
10298 int ret;
10299
10300 trace_access_lock_init();
10301
10302 ret = tracing_init_dentry();
10303 if (ret)
10304 return 0;
10305
10306 if (trace_init_wq) {
10307 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10308 queue_work(trace_init_wq, &tracerfs_init_work);
10309 } else {
10310 tracer_init_tracefs_work_func(NULL);
10311 }
10312
10313 if (rv_init_interface())
10314 pr_err("RV: Error while creating the RV interface\n");
10315
10316 return 0;
10317 }
10318
10319 fs_initcall(tracer_init_tracefs);
10320
10321 static int trace_die_panic_handler(struct notifier_block *self,
10322 unsigned long ev, void *unused);
10323
10324 static struct notifier_block trace_panic_notifier = {
10325 .notifier_call = trace_die_panic_handler,
10326 .priority = INT_MAX - 1,
10327 };
10328
10329 static struct notifier_block trace_die_notifier = {
10330 .notifier_call = trace_die_panic_handler,
10331 .priority = INT_MAX - 1,
10332 };
10333
10334 /*
10335 * The idea is to execute the following die/panic callback early, in order
10336 * to avoid showing irrelevant information in the trace (like other panic
10337 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10338 * warnings get disabled (to prevent potential log flooding).
10339 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10340 static int trace_die_panic_handler(struct notifier_block *self,
10341 unsigned long ev, void *unused)
10342 {
10343 if (!ftrace_dump_on_oops_enabled())
10344 return NOTIFY_DONE;
10345
10346 /* The die notifier requires DIE_OOPS to trigger */
10347 if (self == &trace_die_notifier && ev != DIE_OOPS)
10348 return NOTIFY_DONE;
10349
10350 ftrace_dump(DUMP_PARAM);
10351
10352 return NOTIFY_DONE;
10353 }
10354
10355 /*
10356 * printk is set to max of 1024, we really don't need it that big.
10357 * Nothing should be printing 1000 characters anyway.
10358 */
10359 #define TRACE_MAX_PRINT 1000
10360
10361 /*
10362 * Define here KERN_TRACE so that we have one place to modify
10363 * it if we decide to change what log level the ftrace dump
10364 * should be at.
10365 */
10366 #define KERN_TRACE KERN_EMERG
10367
10368 void
trace_printk_seq(struct trace_seq * s)10369 trace_printk_seq(struct trace_seq *s)
10370 {
10371 /* Probably should print a warning here. */
10372 if (s->seq.len >= TRACE_MAX_PRINT)
10373 s->seq.len = TRACE_MAX_PRINT;
10374
10375 /*
10376 * More paranoid code. Although the buffer size is set to
10377 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10378 * an extra layer of protection.
10379 */
10380 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10381 s->seq.len = s->seq.size - 1;
10382
10383 /* should be zero ended, but we are paranoid. */
10384 s->buffer[s->seq.len] = 0;
10385
10386 printk(KERN_TRACE "%s", s->buffer);
10387
10388 trace_seq_init(s);
10389 }
10390
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10391 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10392 {
10393 iter->tr = tr;
10394 iter->trace = iter->tr->current_trace;
10395 iter->cpu_file = RING_BUFFER_ALL_CPUS;
10396 iter->array_buffer = &tr->array_buffer;
10397
10398 if (iter->trace && iter->trace->open)
10399 iter->trace->open(iter);
10400
10401 /* Annotate start of buffers if we had overruns */
10402 if (ring_buffer_overruns(iter->array_buffer->buffer))
10403 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10404
10405 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10406 if (trace_clocks[iter->tr->clock_id].in_ns)
10407 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10408
10409 /* Can not use kmalloc for iter.temp and iter.fmt */
10410 iter->temp = static_temp_buf;
10411 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10412 iter->fmt = static_fmt_buf;
10413 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10414 }
10415
trace_init_global_iter(struct trace_iterator * iter)10416 void trace_init_global_iter(struct trace_iterator *iter)
10417 {
10418 trace_init_iter(iter, &global_trace);
10419 }
10420
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10421 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10422 {
10423 /* use static because iter can be a bit big for the stack */
10424 static struct trace_iterator iter;
10425 unsigned int old_userobj;
10426 unsigned long flags;
10427 int cnt = 0;
10428
10429 /*
10430 * Always turn off tracing when we dump.
10431 * We don't need to show trace output of what happens
10432 * between multiple crashes.
10433 *
10434 * If the user does a sysrq-z, then they can re-enable
10435 * tracing with echo 1 > tracing_on.
10436 */
10437 tracer_tracing_off(tr);
10438
10439 local_irq_save(flags);
10440
10441 /* Simulate the iterator */
10442 trace_init_iter(&iter, tr);
10443
10444 /* While dumping, do not allow the buffer to be enable */
10445 tracer_tracing_disable(tr);
10446
10447 old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
10448
10449 /* don't look at user memory in panic mode */
10450 tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
10451
10452 if (dump_mode == DUMP_ORIG)
10453 iter.cpu_file = raw_smp_processor_id();
10454 else
10455 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10456
10457 if (tr == &global_trace)
10458 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10459 else
10460 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10461
10462 /* Did function tracer already get disabled? */
10463 if (ftrace_is_dead()) {
10464 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10465 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10466 }
10467
10468 /*
10469 * We need to stop all tracing on all CPUS to read
10470 * the next buffer. This is a bit expensive, but is
10471 * not done often. We fill all what we can read,
10472 * and then release the locks again.
10473 */
10474
10475 while (!trace_empty(&iter)) {
10476
10477 if (!cnt)
10478 printk(KERN_TRACE "---------------------------------\n");
10479
10480 cnt++;
10481
10482 trace_iterator_reset(&iter);
10483 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10484
10485 if (trace_find_next_entry_inc(&iter) != NULL) {
10486 int ret;
10487
10488 ret = print_trace_line(&iter);
10489 if (ret != TRACE_TYPE_NO_CONSUME)
10490 trace_consume(&iter);
10491
10492 trace_printk_seq(&iter.seq);
10493 }
10494 touch_nmi_watchdog();
10495 }
10496
10497 if (!cnt)
10498 printk(KERN_TRACE " (ftrace buffer empty)\n");
10499 else
10500 printk(KERN_TRACE "---------------------------------\n");
10501
10502 tr->trace_flags |= old_userobj;
10503
10504 tracer_tracing_enable(tr);
10505 local_irq_restore(flags);
10506 }
10507
ftrace_dump_by_param(void)10508 static void ftrace_dump_by_param(void)
10509 {
10510 bool first_param = true;
10511 char dump_param[MAX_TRACER_SIZE];
10512 char *buf, *token, *inst_name;
10513 struct trace_array *tr;
10514
10515 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10516 buf = dump_param;
10517
10518 while ((token = strsep(&buf, ",")) != NULL) {
10519 if (first_param) {
10520 first_param = false;
10521 if (!strcmp("0", token))
10522 continue;
10523 else if (!strcmp("1", token)) {
10524 ftrace_dump_one(&global_trace, DUMP_ALL);
10525 continue;
10526 }
10527 else if (!strcmp("2", token) ||
10528 !strcmp("orig_cpu", token)) {
10529 ftrace_dump_one(&global_trace, DUMP_ORIG);
10530 continue;
10531 }
10532 }
10533
10534 inst_name = strsep(&token, "=");
10535 tr = trace_array_find(inst_name);
10536 if (!tr) {
10537 printk(KERN_TRACE "Instance %s not found\n", inst_name);
10538 continue;
10539 }
10540
10541 if (token && (!strcmp("2", token) ||
10542 !strcmp("orig_cpu", token)))
10543 ftrace_dump_one(tr, DUMP_ORIG);
10544 else
10545 ftrace_dump_one(tr, DUMP_ALL);
10546 }
10547 }
10548
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10549 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10550 {
10551 static atomic_t dump_running;
10552
10553 /* Only allow one dump user at a time. */
10554 if (atomic_inc_return(&dump_running) != 1) {
10555 atomic_dec(&dump_running);
10556 return;
10557 }
10558
10559 switch (oops_dump_mode) {
10560 case DUMP_ALL:
10561 ftrace_dump_one(&global_trace, DUMP_ALL);
10562 break;
10563 case DUMP_ORIG:
10564 ftrace_dump_one(&global_trace, DUMP_ORIG);
10565 break;
10566 case DUMP_PARAM:
10567 ftrace_dump_by_param();
10568 break;
10569 case DUMP_NONE:
10570 break;
10571 default:
10572 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10573 ftrace_dump_one(&global_trace, DUMP_ALL);
10574 }
10575
10576 atomic_dec(&dump_running);
10577 }
10578 EXPORT_SYMBOL_GPL(ftrace_dump);
10579
10580 #define WRITE_BUFSIZE 4096
10581
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10582 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10583 size_t count, loff_t *ppos,
10584 int (*createfn)(const char *))
10585 {
10586 char *kbuf __free(kfree) = NULL;
10587 char *buf, *tmp;
10588 int ret = 0;
10589 size_t done = 0;
10590 size_t size;
10591
10592 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10593 if (!kbuf)
10594 return -ENOMEM;
10595
10596 while (done < count) {
10597 size = count - done;
10598
10599 if (size >= WRITE_BUFSIZE)
10600 size = WRITE_BUFSIZE - 1;
10601
10602 if (copy_from_user(kbuf, buffer + done, size))
10603 return -EFAULT;
10604
10605 kbuf[size] = '\0';
10606 buf = kbuf;
10607 do {
10608 tmp = strchr(buf, '\n');
10609 if (tmp) {
10610 *tmp = '\0';
10611 size = tmp - buf + 1;
10612 } else {
10613 size = strlen(buf);
10614 if (done + size < count) {
10615 if (buf != kbuf)
10616 break;
10617 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10618 pr_warn("Line length is too long: Should be less than %d\n",
10619 WRITE_BUFSIZE - 2);
10620 return -EINVAL;
10621 }
10622 }
10623 done += size;
10624
10625 /* Remove comments */
10626 tmp = strchr(buf, '#');
10627
10628 if (tmp)
10629 *tmp = '\0';
10630
10631 ret = createfn(buf);
10632 if (ret)
10633 return ret;
10634 buf += size;
10635
10636 } while (done < count);
10637 }
10638 return done;
10639 }
10640
10641 #ifdef CONFIG_TRACER_SNAPSHOT
tr_needs_alloc_snapshot(const char * name)10642 __init static bool tr_needs_alloc_snapshot(const char *name)
10643 {
10644 char *test;
10645 int len = strlen(name);
10646 bool ret;
10647
10648 if (!boot_snapshot_index)
10649 return false;
10650
10651 if (strncmp(name, boot_snapshot_info, len) == 0 &&
10652 boot_snapshot_info[len] == '\t')
10653 return true;
10654
10655 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10656 if (!test)
10657 return false;
10658
10659 sprintf(test, "\t%s\t", name);
10660 ret = strstr(boot_snapshot_info, test) == NULL;
10661 kfree(test);
10662 return ret;
10663 }
10664
do_allocate_snapshot(const char * name)10665 __init static void do_allocate_snapshot(const char *name)
10666 {
10667 if (!tr_needs_alloc_snapshot(name))
10668 return;
10669
10670 /*
10671 * When allocate_snapshot is set, the next call to
10672 * allocate_trace_buffers() (called by trace_array_get_by_name())
10673 * will allocate the snapshot buffer. That will also clear
10674 * this flag.
10675 */
10676 allocate_snapshot = true;
10677 }
10678 #else
do_allocate_snapshot(const char * name)10679 static inline void do_allocate_snapshot(const char *name) { }
10680 #endif
10681
backup_instance_area(const char * backup,unsigned long * addr,phys_addr_t * size)10682 __init static int backup_instance_area(const char *backup,
10683 unsigned long *addr, phys_addr_t *size)
10684 {
10685 struct trace_array *backup_tr;
10686 void *allocated_vaddr = NULL;
10687
10688 backup_tr = trace_array_get_by_name(backup, NULL);
10689 if (!backup_tr) {
10690 pr_warn("Tracing: Instance %s is not found.\n", backup);
10691 return -ENOENT;
10692 }
10693
10694 if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
10695 pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
10696 trace_array_put(backup_tr);
10697 return -EINVAL;
10698 }
10699
10700 *size = backup_tr->range_addr_size;
10701
10702 allocated_vaddr = vzalloc(*size);
10703 if (!allocated_vaddr) {
10704 pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
10705 backup, (unsigned long)*size);
10706 trace_array_put(backup_tr);
10707 return -ENOMEM;
10708 }
10709
10710 memcpy(allocated_vaddr,
10711 (void *)backup_tr->range_addr_start, (size_t)*size);
10712 *addr = (unsigned long)allocated_vaddr;
10713
10714 trace_array_put(backup_tr);
10715 return 0;
10716 }
10717
enable_instances(void)10718 __init static void enable_instances(void)
10719 {
10720 struct trace_array *tr;
10721 bool memmap_area = false;
10722 char *curr_str;
10723 char *name;
10724 char *str;
10725 char *tok;
10726
10727 /* A tab is always appended */
10728 boot_instance_info[boot_instance_index - 1] = '\0';
10729 str = boot_instance_info;
10730
10731 while ((curr_str = strsep(&str, "\t"))) {
10732 phys_addr_t start = 0;
10733 phys_addr_t size = 0;
10734 unsigned long addr = 0;
10735 bool traceprintk = false;
10736 bool traceoff = false;
10737 char *flag_delim;
10738 char *addr_delim;
10739 char *rname __free(kfree) = NULL;
10740 char *backup;
10741
10742 tok = strsep(&curr_str, ",");
10743
10744 name = strsep(&tok, "=");
10745 backup = tok;
10746
10747 flag_delim = strchr(name, '^');
10748 addr_delim = strchr(name, '@');
10749
10750 if (addr_delim)
10751 *addr_delim++ = '\0';
10752
10753 if (flag_delim)
10754 *flag_delim++ = '\0';
10755
10756 if (backup) {
10757 if (backup_instance_area(backup, &addr, &size) < 0)
10758 continue;
10759 }
10760
10761 if (flag_delim) {
10762 char *flag;
10763
10764 while ((flag = strsep(&flag_delim, "^"))) {
10765 if (strcmp(flag, "traceoff") == 0) {
10766 traceoff = true;
10767 } else if ((strcmp(flag, "printk") == 0) ||
10768 (strcmp(flag, "traceprintk") == 0) ||
10769 (strcmp(flag, "trace_printk") == 0)) {
10770 traceprintk = true;
10771 } else {
10772 pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10773 flag, name);
10774 }
10775 }
10776 }
10777
10778 tok = addr_delim;
10779 if (tok && isdigit(*tok)) {
10780 start = memparse(tok, &tok);
10781 if (!start) {
10782 pr_warn("Tracing: Invalid boot instance address for %s\n",
10783 name);
10784 continue;
10785 }
10786 if (*tok != ':') {
10787 pr_warn("Tracing: No size specified for instance %s\n", name);
10788 continue;
10789 }
10790 tok++;
10791 size = memparse(tok, &tok);
10792 if (!size) {
10793 pr_warn("Tracing: Invalid boot instance size for %s\n",
10794 name);
10795 continue;
10796 }
10797 memmap_area = true;
10798 } else if (tok) {
10799 if (!reserve_mem_find_by_name(tok, &start, &size)) {
10800 start = 0;
10801 pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10802 continue;
10803 }
10804 rname = kstrdup(tok, GFP_KERNEL);
10805 }
10806
10807 if (start) {
10808 /* Start and size must be page aligned */
10809 if (start & ~PAGE_MASK) {
10810 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10811 continue;
10812 }
10813 if (size & ~PAGE_MASK) {
10814 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10815 continue;
10816 }
10817
10818 if (memmap_area)
10819 addr = map_pages(start, size);
10820 else
10821 addr = (unsigned long)phys_to_virt(start);
10822 if (addr) {
10823 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10824 name, &start, (unsigned long)size);
10825 } else {
10826 pr_warn("Tracing: Failed to map boot instance %s\n", name);
10827 continue;
10828 }
10829 } else {
10830 /* Only non mapped buffers have snapshot buffers */
10831 if (IS_ENABLED(CONFIG_TRACER_SNAPSHOT))
10832 do_allocate_snapshot(name);
10833 }
10834
10835 tr = trace_array_create_systems(name, NULL, addr, size);
10836 if (IS_ERR(tr)) {
10837 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10838 continue;
10839 }
10840
10841 if (traceoff)
10842 tracer_tracing_off(tr);
10843
10844 if (traceprintk)
10845 update_printk_trace(tr);
10846
10847 /*
10848 * memmap'd buffers can not be freed.
10849 */
10850 if (memmap_area) {
10851 tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10852 tr->ref++;
10853 }
10854
10855 /*
10856 * Backup buffers can be freed but need vfree().
10857 */
10858 if (backup) {
10859 tr->flags |= TRACE_ARRAY_FL_VMALLOC | TRACE_ARRAY_FL_RDONLY;
10860 trace_array_start_autoremove();
10861 }
10862
10863 if (start || backup) {
10864 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10865 tr->range_name = no_free_ptr(rname);
10866 }
10867
10868 /*
10869 * Save the events to start and enabled them after all boot instances
10870 * have been created.
10871 */
10872 tr->boot_events = curr_str;
10873 }
10874
10875 /* Enable the events after all boot instances have been created */
10876 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10877
10878 if (!tr->boot_events || !(*tr->boot_events)) {
10879 tr->boot_events = NULL;
10880 continue;
10881 }
10882
10883 curr_str = tr->boot_events;
10884
10885 /* Clear the instance if this is a persistent buffer */
10886 if (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)
10887 update_last_data(tr);
10888
10889 while ((tok = strsep(&curr_str, ","))) {
10890 early_enable_events(tr, tok, true);
10891 }
10892 tr->boot_events = NULL;
10893 }
10894 }
10895
tracer_alloc_buffers(void)10896 __init static int tracer_alloc_buffers(void)
10897 {
10898 unsigned long ring_buf_size;
10899 int ret = -ENOMEM;
10900
10901
10902 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10903 pr_warn("Tracing disabled due to lockdown\n");
10904 return -EPERM;
10905 }
10906
10907 /*
10908 * Make sure we don't accidentally add more trace options
10909 * than we have bits for.
10910 */
10911 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10912
10913 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10914 return -ENOMEM;
10915
10916 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10917 goto out_free_buffer_mask;
10918
10919 /* Only allocate trace_printk buffers if a trace_printk exists */
10920 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10921 /* Must be called before global_trace.buffer is allocated */
10922 trace_printk_init_buffers();
10923
10924 /* To save memory, keep the ring buffer size to its minimum */
10925 if (global_trace.ring_buffer_expanded)
10926 ring_buf_size = trace_buf_size;
10927 else
10928 ring_buf_size = 1;
10929
10930 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10931 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10932
10933 raw_spin_lock_init(&global_trace.start_lock);
10934
10935 /*
10936 * The prepare callbacks allocates some memory for the ring buffer. We
10937 * don't free the buffer if the CPU goes down. If we were to free
10938 * the buffer, then the user would lose any trace that was in the
10939 * buffer. The memory will be removed once the "instance" is removed.
10940 */
10941 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10942 "trace/RB:prepare", trace_rb_cpu_prepare,
10943 NULL);
10944 if (ret < 0)
10945 goto out_free_cpumask;
10946 /* Used for event triggers */
10947 ret = -ENOMEM;
10948 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10949 if (!temp_buffer)
10950 goto out_rm_hp_state;
10951
10952 if (trace_create_savedcmd() < 0)
10953 goto out_free_temp_buffer;
10954
10955 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10956 goto out_free_savedcmd;
10957
10958 /* TODO: make the number of buffers hot pluggable with CPUS */
10959 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10960 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10961 goto out_free_pipe_cpumask;
10962 }
10963 if (global_trace.buffer_disabled)
10964 tracing_off();
10965
10966 if (trace_boot_clock) {
10967 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10968 if (ret < 0)
10969 pr_warn("Trace clock %s not defined, going back to default\n",
10970 trace_boot_clock);
10971 }
10972
10973 /*
10974 * register_tracer() might reference current_trace, so it
10975 * needs to be set before we register anything. This is
10976 * just a bootstrap of current_trace anyway.
10977 */
10978 global_trace.current_trace = &nop_trace;
10979 global_trace.current_trace_flags = nop_trace.flags;
10980
10981 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10982 #ifdef CONFIG_TRACER_SNAPSHOT
10983 spin_lock_init(&global_trace.snapshot_trigger_lock);
10984 #endif
10985 ftrace_init_global_array_ops(&global_trace);
10986
10987 #ifdef CONFIG_MODULES
10988 INIT_LIST_HEAD(&global_trace.mod_events);
10989 #endif
10990
10991 init_trace_flags_index(&global_trace);
10992
10993 INIT_LIST_HEAD(&global_trace.tracers);
10994
10995 /* All seems OK, enable tracing */
10996 tracing_disabled = 0;
10997
10998 atomic_notifier_chain_register(&panic_notifier_list,
10999 &trace_panic_notifier);
11000
11001 register_die_notifier(&trace_die_notifier);
11002
11003 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11004
11005 global_trace.syscall_buf_sz = syscall_buf_size;
11006
11007 INIT_LIST_HEAD(&global_trace.systems);
11008 INIT_LIST_HEAD(&global_trace.events);
11009 INIT_LIST_HEAD(&global_trace.hist_vars);
11010 INIT_LIST_HEAD(&global_trace.err_log);
11011 list_add(&global_trace.marker_list, &marker_copies);
11012 list_add(&global_trace.list, &ftrace_trace_arrays);
11013
11014 register_tracer(&nop_trace);
11015
11016 /* Function tracing may start here (via kernel command line) */
11017 init_function_trace();
11018
11019 apply_trace_boot_options();
11020
11021 register_snapshot_cmd();
11022
11023 return 0;
11024
11025 out_free_pipe_cpumask:
11026 free_cpumask_var(global_trace.pipe_cpumask);
11027 out_free_savedcmd:
11028 trace_free_saved_cmdlines_buffer();
11029 out_free_temp_buffer:
11030 ring_buffer_free(temp_buffer);
11031 out_rm_hp_state:
11032 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11033 out_free_cpumask:
11034 free_cpumask_var(global_trace.tracing_cpumask);
11035 out_free_buffer_mask:
11036 free_cpumask_var(tracing_buffer_mask);
11037 return ret;
11038 }
11039
11040 #ifdef CONFIG_FUNCTION_TRACER
11041 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11042 struct trace_array *trace_get_global_array(void)
11043 {
11044 return &global_trace;
11045 }
11046 #endif
11047
ftrace_boot_snapshot(void)11048 void __init ftrace_boot_snapshot(void)
11049 {
11050 #ifdef CONFIG_TRACER_SNAPSHOT
11051 struct trace_array *tr;
11052
11053 if (!snapshot_at_boot)
11054 return;
11055
11056 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11057 if (!tr->allocated_snapshot)
11058 continue;
11059
11060 tracing_snapshot_instance(tr);
11061 trace_array_puts(tr, "** Boot snapshot taken **\n");
11062 }
11063 #endif
11064 }
11065
early_trace_init(void)11066 void __init early_trace_init(void)
11067 {
11068 if (tracepoint_printk) {
11069 tracepoint_print_iter = kzalloc_obj(*tracepoint_print_iter);
11070 if (MEM_FAIL(!tracepoint_print_iter,
11071 "Failed to allocate trace iterator\n"))
11072 tracepoint_printk = 0;
11073 else
11074 static_key_enable(&tracepoint_printk_key.key);
11075 }
11076 tracer_alloc_buffers();
11077
11078 init_events();
11079 }
11080
trace_init(void)11081 void __init trace_init(void)
11082 {
11083 trace_event_init();
11084
11085 if (boot_instance_index)
11086 enable_instances();
11087 }
11088
clear_boot_tracer(void)11089 __init static void clear_boot_tracer(void)
11090 {
11091 /*
11092 * The default tracer at boot buffer is an init section.
11093 * This function is called in lateinit. If we did not
11094 * find the boot tracer, then clear it out, to prevent
11095 * later registration from accessing the buffer that is
11096 * about to be freed.
11097 */
11098 if (!default_bootup_tracer)
11099 return;
11100
11101 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11102 default_bootup_tracer);
11103 default_bootup_tracer = NULL;
11104 }
11105
11106 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11107 __init static void tracing_set_default_clock(void)
11108 {
11109 /* sched_clock_stable() is determined in late_initcall */
11110 if (!trace_boot_clock && !sched_clock_stable()) {
11111 if (security_locked_down(LOCKDOWN_TRACEFS)) {
11112 pr_warn("Can not set tracing clock due to lockdown\n");
11113 return;
11114 }
11115
11116 printk(KERN_WARNING
11117 "Unstable clock detected, switching default tracing clock to \"global\"\n"
11118 "If you want to keep using the local clock, then add:\n"
11119 " \"trace_clock=local\"\n"
11120 "on the kernel command line\n");
11121 tracing_set_clock(&global_trace, "global");
11122 }
11123 }
11124 #else
tracing_set_default_clock(void)11125 static inline void tracing_set_default_clock(void) { }
11126 #endif
11127
late_trace_init(void)11128 __init static int late_trace_init(void)
11129 {
11130 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11131 static_key_disable(&tracepoint_printk_key.key);
11132 tracepoint_printk = 0;
11133 }
11134
11135 if (traceoff_after_boot)
11136 tracing_off();
11137
11138 tracing_set_default_clock();
11139 clear_boot_tracer();
11140 return 0;
11141 }
11142
11143 late_initcall_sync(late_trace_init);
11144