xref: /linux/kernel/trace/trace.c (revision e4bf304f000e6fcceaf60b1455a5124b783b3a66) !
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <linux/sort.h>
54 #include <linux/io.h> /* vmap_page_range() */
55 #include <linux/fs_context.h>
56 
57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
58 
59 #include "trace.h"
60 #include "trace_output.h"
61 
62 #ifdef CONFIG_FTRACE_STARTUP_TEST
63 /*
64  * We need to change this state when a selftest is running.
65  * A selftest will lurk into the ring-buffer to count the
66  * entries inserted during the selftest although some concurrent
67  * insertions into the ring-buffer such as trace_printk could occurred
68  * at the same time, giving false positive or negative results.
69  */
70 bool __read_mostly tracing_selftest_running;
71 
72 /*
73  * If boot-time tracing including tracers/events via kernel cmdline
74  * is running, we do not want to run SELFTEST.
75  */
76 bool __read_mostly tracing_selftest_disabled;
77 
disable_tracing_selftest(const char * reason)78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #else
86 #define tracing_selftest_disabled	0
87 #endif
88 
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95 
96 /* Store tracers and their flags per instance */
97 struct tracers {
98 	struct list_head	list;
99 	struct tracer		*tracer;
100 	struct tracer_flags	*flags;
101 };
102 
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 DEFINE_PER_CPU(bool, trace_taskinfo_save);
109 
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 int tracing_disabled = 1;
117 
118 cpumask_var_t __read_mostly	tracing_buffer_mask;
119 
120 #define MAX_TRACER_SIZE		100
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputting it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  * Set instance name if you want to dump the specific trace instance
136  * Multiple instance dump is also supported, and instances are separated
137  * by commas.
138  */
139 /* Set to string format zero to disable by default */
140 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
141 
142 /* When set, tracing will stop when a WARN*() is hit */
143 static int __disable_trace_on_warning;
144 
145 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
146 			     void *buffer, size_t *lenp, loff_t *ppos);
147 static const struct ctl_table trace_sysctl_table[] = {
148 	{
149 		.procname	= "ftrace_dump_on_oops",
150 		.data		= &ftrace_dump_on_oops,
151 		.maxlen		= MAX_TRACER_SIZE,
152 		.mode		= 0644,
153 		.proc_handler	= proc_dostring,
154 	},
155 	{
156 		.procname	= "traceoff_on_warning",
157 		.data		= &__disable_trace_on_warning,
158 		.maxlen		= sizeof(__disable_trace_on_warning),
159 		.mode		= 0644,
160 		.proc_handler	= proc_dointvec,
161 	},
162 	{
163 		.procname	= "tracepoint_printk",
164 		.data		= &tracepoint_printk,
165 		.maxlen		= sizeof(tracepoint_printk),
166 		.mode		= 0644,
167 		.proc_handler	= tracepoint_printk_sysctl,
168 	},
169 };
170 
init_trace_sysctls(void)171 static int __init init_trace_sysctls(void)
172 {
173 	register_sysctl_init("kernel", trace_sysctl_table);
174 	return 0;
175 }
176 subsys_initcall(init_trace_sysctls);
177 
178 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
179 /* Map of enums to their values, for "eval_map" file */
180 struct trace_eval_map_head {
181 	struct module			*mod;
182 	unsigned long			length;
183 };
184 
185 union trace_eval_map_item;
186 
187 struct trace_eval_map_tail {
188 	/*
189 	 * "end" is first and points to NULL as it must be different
190 	 * than "mod" or "eval_string"
191 	 */
192 	union trace_eval_map_item	*next;
193 	const char			*end;	/* points to NULL */
194 };
195 
196 static DEFINE_MUTEX(trace_eval_mutex);
197 
198 /*
199  * The trace_eval_maps are saved in an array with two extra elements,
200  * one at the beginning, and one at the end. The beginning item contains
201  * the count of the saved maps (head.length), and the module they
202  * belong to if not built in (head.mod). The ending item contains a
203  * pointer to the next array of saved eval_map items.
204  */
205 union trace_eval_map_item {
206 	struct trace_eval_map		map;
207 	struct trace_eval_map_head	head;
208 	struct trace_eval_map_tail	tail;
209 };
210 
211 static union trace_eval_map_item *trace_eval_maps;
212 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
213 
214 int tracing_set_tracer(struct trace_array *tr, const char *buf);
215 static void ftrace_trace_userstack(struct trace_array *tr,
216 				   struct trace_buffer *buffer,
217 				   unsigned int trace_ctx);
218 
219 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
220 static char *default_bootup_tracer;
221 
222 static bool allocate_snapshot;
223 static bool snapshot_at_boot;
224 
225 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
226 static int boot_instance_index;
227 
228 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
229 static int boot_snapshot_index;
230 
set_cmdline_ftrace(char * str)231 static int __init set_cmdline_ftrace(char *str)
232 {
233 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
234 	default_bootup_tracer = bootup_tracer_buf;
235 	/* We are using ftrace early, expand it */
236 	trace_set_ring_buffer_expanded(NULL);
237 	return 1;
238 }
239 __setup("ftrace=", set_cmdline_ftrace);
240 
ftrace_dump_on_oops_enabled(void)241 int ftrace_dump_on_oops_enabled(void)
242 {
243 	if (!strcmp("0", ftrace_dump_on_oops))
244 		return 0;
245 	else
246 		return 1;
247 }
248 
set_ftrace_dump_on_oops(char * str)249 static int __init set_ftrace_dump_on_oops(char *str)
250 {
251 	if (!*str) {
252 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
253 		return 1;
254 	}
255 
256 	if (*str == ',') {
257 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
258 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
259 		return 1;
260 	}
261 
262 	if (*str++ == '=') {
263 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
264 		return 1;
265 	}
266 
267 	return 0;
268 }
269 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
270 
stop_trace_on_warning(char * str)271 static int __init stop_trace_on_warning(char *str)
272 {
273 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
274 		__disable_trace_on_warning = 1;
275 	return 1;
276 }
277 __setup("traceoff_on_warning", stop_trace_on_warning);
278 
boot_alloc_snapshot(char * str)279 static int __init boot_alloc_snapshot(char *str)
280 {
281 	char *slot = boot_snapshot_info + boot_snapshot_index;
282 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
283 	int ret;
284 
285 	if (str[0] == '=') {
286 		str++;
287 		if (strlen(str) >= left)
288 			return -1;
289 
290 		ret = snprintf(slot, left, "%s\t", str);
291 		boot_snapshot_index += ret;
292 	} else {
293 		allocate_snapshot = true;
294 		/* We also need the main ring buffer expanded */
295 		trace_set_ring_buffer_expanded(NULL);
296 	}
297 	return 1;
298 }
299 __setup("alloc_snapshot", boot_alloc_snapshot);
300 
301 
boot_snapshot(char * str)302 static int __init boot_snapshot(char *str)
303 {
304 	snapshot_at_boot = true;
305 	boot_alloc_snapshot(str);
306 	return 1;
307 }
308 __setup("ftrace_boot_snapshot", boot_snapshot);
309 
310 
boot_instance(char * str)311 static int __init boot_instance(char *str)
312 {
313 	char *slot = boot_instance_info + boot_instance_index;
314 	int left = sizeof(boot_instance_info) - boot_instance_index;
315 	int ret;
316 
317 	if (strlen(str) >= left)
318 		return -1;
319 
320 	ret = snprintf(slot, left, "%s\t", str);
321 	boot_instance_index += ret;
322 
323 	return 1;
324 }
325 __setup("trace_instance=", boot_instance);
326 
327 
328 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
329 
set_trace_boot_options(char * str)330 static int __init set_trace_boot_options(char *str)
331 {
332 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
333 	return 1;
334 }
335 __setup("trace_options=", set_trace_boot_options);
336 
337 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
338 static char *trace_boot_clock __initdata;
339 
set_trace_boot_clock(char * str)340 static int __init set_trace_boot_clock(char *str)
341 {
342 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
343 	trace_boot_clock = trace_boot_clock_buf;
344 	return 1;
345 }
346 __setup("trace_clock=", set_trace_boot_clock);
347 
set_tracepoint_printk(char * str)348 static int __init set_tracepoint_printk(char *str)
349 {
350 	/* Ignore the "tp_printk_stop_on_boot" param */
351 	if (*str == '_')
352 		return 0;
353 
354 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
355 		tracepoint_printk = 1;
356 	return 1;
357 }
358 __setup("tp_printk", set_tracepoint_printk);
359 
set_tracepoint_printk_stop(char * str)360 static int __init set_tracepoint_printk_stop(char *str)
361 {
362 	tracepoint_printk_stop_on_boot = true;
363 	return 1;
364 }
365 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
366 
set_traceoff_after_boot(char * str)367 static int __init set_traceoff_after_boot(char *str)
368 {
369 	traceoff_after_boot = true;
370 	return 1;
371 }
372 __setup("traceoff_after_boot", set_traceoff_after_boot);
373 
ns2usecs(u64 nsec)374 unsigned long long ns2usecs(u64 nsec)
375 {
376 	nsec += 500;
377 	do_div(nsec, 1000);
378 	return nsec;
379 }
380 
381 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)382 trace_process_export(struct trace_export *export,
383 	       struct ring_buffer_event *event, int flag)
384 {
385 	struct trace_entry *entry;
386 	unsigned int size = 0;
387 
388 	if (export->flags & flag) {
389 		entry = ring_buffer_event_data(event);
390 		size = ring_buffer_event_length(event);
391 		export->write(export, entry, size);
392 	}
393 }
394 
395 static DEFINE_MUTEX(ftrace_export_lock);
396 
397 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
398 
399 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
400 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
401 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
402 
ftrace_exports_enable(struct trace_export * export)403 static inline void ftrace_exports_enable(struct trace_export *export)
404 {
405 	if (export->flags & TRACE_EXPORT_FUNCTION)
406 		static_branch_inc(&trace_function_exports_enabled);
407 
408 	if (export->flags & TRACE_EXPORT_EVENT)
409 		static_branch_inc(&trace_event_exports_enabled);
410 
411 	if (export->flags & TRACE_EXPORT_MARKER)
412 		static_branch_inc(&trace_marker_exports_enabled);
413 }
414 
ftrace_exports_disable(struct trace_export * export)415 static inline void ftrace_exports_disable(struct trace_export *export)
416 {
417 	if (export->flags & TRACE_EXPORT_FUNCTION)
418 		static_branch_dec(&trace_function_exports_enabled);
419 
420 	if (export->flags & TRACE_EXPORT_EVENT)
421 		static_branch_dec(&trace_event_exports_enabled);
422 
423 	if (export->flags & TRACE_EXPORT_MARKER)
424 		static_branch_dec(&trace_marker_exports_enabled);
425 }
426 
ftrace_exports(struct ring_buffer_event * event,int flag)427 static void ftrace_exports(struct ring_buffer_event *event, int flag)
428 {
429 	struct trace_export *export;
430 
431 	guard(preempt_notrace)();
432 
433 	export = rcu_dereference_raw_check(ftrace_exports_list);
434 	while (export) {
435 		trace_process_export(export, event, flag);
436 		export = rcu_dereference_raw_check(export->next);
437 	}
438 }
439 
440 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)441 add_trace_export(struct trace_export **list, struct trace_export *export)
442 {
443 	rcu_assign_pointer(export->next, *list);
444 	/*
445 	 * We are entering export into the list but another
446 	 * CPU might be walking that list. We need to make sure
447 	 * the export->next pointer is valid before another CPU sees
448 	 * the export pointer included into the list.
449 	 */
450 	rcu_assign_pointer(*list, export);
451 }
452 
453 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)454 rm_trace_export(struct trace_export **list, struct trace_export *export)
455 {
456 	struct trace_export **p;
457 
458 	for (p = list; *p != NULL; p = &(*p)->next)
459 		if (*p == export)
460 			break;
461 
462 	if (*p != export)
463 		return -1;
464 
465 	rcu_assign_pointer(*p, (*p)->next);
466 
467 	return 0;
468 }
469 
470 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)471 add_ftrace_export(struct trace_export **list, struct trace_export *export)
472 {
473 	ftrace_exports_enable(export);
474 
475 	add_trace_export(list, export);
476 }
477 
478 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)479 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
480 {
481 	int ret;
482 
483 	ret = rm_trace_export(list, export);
484 	ftrace_exports_disable(export);
485 
486 	return ret;
487 }
488 
register_ftrace_export(struct trace_export * export)489 int register_ftrace_export(struct trace_export *export)
490 {
491 	if (WARN_ON_ONCE(!export->write))
492 		return -1;
493 
494 	guard(mutex)(&ftrace_export_lock);
495 
496 	add_ftrace_export(&ftrace_exports_list, export);
497 
498 	return 0;
499 }
500 EXPORT_SYMBOL_GPL(register_ftrace_export);
501 
unregister_ftrace_export(struct trace_export * export)502 int unregister_ftrace_export(struct trace_export *export)
503 {
504 	guard(mutex)(&ftrace_export_lock);
505 	return rm_ftrace_export(&ftrace_exports_list, export);
506 }
507 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
508 
509 /* trace_flags holds trace_options default values */
510 #define TRACE_DEFAULT_FLAGS						\
511 	(FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS |		\
512 	 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) |		\
513 	 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) |		\
514 	 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) |		\
515 	 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) |			\
516 	 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) |		\
517 	 TRACE_ITER(COPY_MARKER))
518 
519 /* trace_options that are only supported by global_trace */
520 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) |			\
521 	       TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) |	\
522 	       TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
523 
524 /* trace_flags that are default zero for instances */
525 #define ZEROED_TRACE_FLAGS \
526 	(TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
527 	 TRACE_ITER(COPY_MARKER))
528 
529 /*
530  * The global_trace is the descriptor that holds the top-level tracing
531  * buffers for the live tracing.
532  */
533 static struct trace_array global_trace = {
534 	.trace_flags = TRACE_DEFAULT_FLAGS,
535 };
536 
537 struct trace_array *printk_trace = &global_trace;
538 
539 /* List of trace_arrays interested in the top level trace_marker */
540 static LIST_HEAD(marker_copies);
541 
update_printk_trace(struct trace_array * tr)542 static void update_printk_trace(struct trace_array *tr)
543 {
544 	if (printk_trace == tr)
545 		return;
546 
547 	printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
548 	printk_trace = tr;
549 	tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
550 }
551 
552 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)553 static bool update_marker_trace(struct trace_array *tr, int enabled)
554 {
555 	lockdep_assert_held(&event_mutex);
556 
557 	if (enabled) {
558 		if (tr->trace_flags & TRACE_ITER(COPY_MARKER))
559 			return false;
560 
561 		list_add_rcu(&tr->marker_list, &marker_copies);
562 		tr->trace_flags |= TRACE_ITER(COPY_MARKER);
563 		return true;
564 	}
565 
566 	if (!(tr->trace_flags & TRACE_ITER(COPY_MARKER)))
567 		return false;
568 
569 	list_del_rcu(&tr->marker_list);
570 	tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
571 	return true;
572 }
573 
trace_set_ring_buffer_expanded(struct trace_array * tr)574 void trace_set_ring_buffer_expanded(struct trace_array *tr)
575 {
576 	if (!tr)
577 		tr = &global_trace;
578 	tr->ring_buffer_expanded = true;
579 }
580 
trace_array_autoremove(struct work_struct * work)581 static void trace_array_autoremove(struct work_struct *work)
582 {
583 	struct trace_array *tr = container_of(work, struct trace_array, autoremove_work);
584 
585 	trace_array_destroy(tr);
586 }
587 
588 static struct workqueue_struct *autoremove_wq;
589 
trace_array_kick_autoremove(struct trace_array * tr)590 static void trace_array_kick_autoremove(struct trace_array *tr)
591 {
592 	if (autoremove_wq)
593 		queue_work(autoremove_wq, &tr->autoremove_work);
594 }
595 
trace_array_cancel_autoremove(struct trace_array * tr)596 static void trace_array_cancel_autoremove(struct trace_array *tr)
597 {
598 	/*
599 	 * Since this can be called inside trace_array_autoremove(),
600 	 * it has to avoid deadlock of the workqueue.
601 	 */
602 	if (work_pending(&tr->autoremove_work))
603 		cancel_work_sync(&tr->autoremove_work);
604 }
605 
trace_array_init_autoremove(struct trace_array * tr)606 static void trace_array_init_autoremove(struct trace_array *tr)
607 {
608 	INIT_WORK(&tr->autoremove_work, trace_array_autoremove);
609 }
610 
trace_array_start_autoremove(void)611 static void trace_array_start_autoremove(void)
612 {
613 	if (autoremove_wq)
614 		return;
615 
616 	autoremove_wq = alloc_workqueue("tr_autoremove_wq",
617 					WQ_UNBOUND | WQ_HIGHPRI, 0);
618 	if (!autoremove_wq)
619 		pr_warn("Unable to allocate tr_autoremove_wq. autoremove disabled.\n");
620 }
621 
622 LIST_HEAD(ftrace_trace_arrays);
623 
__trace_array_get(struct trace_array * this_tr)624 static int __trace_array_get(struct trace_array *this_tr)
625 {
626 	/* When free_on_close is set, this is not available anymore. */
627 	if (autoremove_wq && this_tr->free_on_close)
628 		return -ENODEV;
629 
630 	this_tr->ref++;
631 	return 0;
632 }
633 
trace_array_get(struct trace_array * this_tr)634 int trace_array_get(struct trace_array *this_tr)
635 {
636 	struct trace_array *tr;
637 
638 	guard(mutex)(&trace_types_lock);
639 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
640 		if (tr == this_tr) {
641 			return __trace_array_get(tr);
642 		}
643 	}
644 
645 	return -ENODEV;
646 }
647 
__trace_array_put(struct trace_array * this_tr)648 static void __trace_array_put(struct trace_array *this_tr)
649 {
650 	WARN_ON(!this_tr->ref);
651 	this_tr->ref--;
652 	/*
653 	 * When free_on_close is set, prepare removing the array
654 	 * when the last reference is released.
655 	 */
656 	if (this_tr->ref == 1 && this_tr->free_on_close)
657 		trace_array_kick_autoremove(this_tr);
658 }
659 
660 /**
661  * trace_array_put - Decrement the reference counter for this trace array.
662  * @this_tr : pointer to the trace array
663  *
664  * NOTE: Use this when we no longer need the trace array returned by
665  * trace_array_get_by_name(). This ensures the trace array can be later
666  * destroyed.
667  *
668  */
trace_array_put(struct trace_array * this_tr)669 void trace_array_put(struct trace_array *this_tr)
670 {
671 	if (!this_tr)
672 		return;
673 
674 	guard(mutex)(&trace_types_lock);
675 	__trace_array_put(this_tr);
676 }
677 EXPORT_SYMBOL_GPL(trace_array_put);
678 
tracing_check_open_get_tr(struct trace_array * tr)679 int tracing_check_open_get_tr(struct trace_array *tr)
680 {
681 	int ret;
682 
683 	ret = security_locked_down(LOCKDOWN_TRACEFS);
684 	if (ret)
685 		return ret;
686 
687 	if (tracing_disabled)
688 		return -ENODEV;
689 
690 	if (tr && trace_array_get(tr) < 0)
691 		return -ENODEV;
692 
693 	return 0;
694 }
695 
buffer_ftrace_now(struct array_buffer * buf,int cpu)696 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
697 {
698 	u64 ts;
699 
700 	/* Early boot up does not have a buffer yet */
701 	if (!buf->buffer)
702 		return trace_clock_local();
703 
704 	ts = ring_buffer_time_stamp(buf->buffer);
705 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
706 
707 	return ts;
708 }
709 
ftrace_now(int cpu)710 u64 ftrace_now(int cpu)
711 {
712 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
713 }
714 
715 /**
716  * tracing_is_enabled - Show if global_trace has been enabled
717  *
718  * Shows if the global trace has been enabled or not. It uses the
719  * mirror flag "buffer_disabled" to be used in fast paths such as for
720  * the irqsoff tracer. But it may be inaccurate due to races. If you
721  * need to know the accurate state, use tracing_is_on() which is a little
722  * slower, but accurate.
723  */
tracing_is_enabled(void)724 int tracing_is_enabled(void)
725 {
726 	/*
727 	 * For quick access (irqsoff uses this in fast path), just
728 	 * return the mirror variable of the state of the ring buffer.
729 	 * It's a little racy, but we don't really care.
730 	 */
731 	return !global_trace.buffer_disabled;
732 }
733 
734 /*
735  * trace_buf_size is the size in bytes that is allocated
736  * for a buffer. Note, the number of bytes is always rounded
737  * to page size.
738  *
739  * This number is purposely set to a low number of 16384.
740  * If the dump on oops happens, it will be much appreciated
741  * to not have to wait for all that output. Anyway this can be
742  * boot time and run time configurable.
743  */
744 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
745 
746 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
747 
748 /* trace_types holds a link list of available tracers. */
749 static struct tracer		*trace_types __read_mostly;
750 
751 /*
752  * trace_types_lock is used to protect the trace_types list.
753  */
754 DEFINE_MUTEX(trace_types_lock);
755 
756 /*
757  * serialize the access of the ring buffer
758  *
759  * ring buffer serializes readers, but it is low level protection.
760  * The validity of the events (which returns by ring_buffer_peek() ..etc)
761  * are not protected by ring buffer.
762  *
763  * The content of events may become garbage if we allow other process consumes
764  * these events concurrently:
765  *   A) the page of the consumed events may become a normal page
766  *      (not reader page) in ring buffer, and this page will be rewritten
767  *      by events producer.
768  *   B) The page of the consumed events may become a page for splice_read,
769  *      and this page will be returned to system.
770  *
771  * These primitives allow multi process access to different cpu ring buffer
772  * concurrently.
773  *
774  * These primitives don't distinguish read-only and read-consume access.
775  * Multi read-only access are also serialized.
776  */
777 
778 #ifdef CONFIG_SMP
779 static DECLARE_RWSEM(all_cpu_access_lock);
780 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
781 
trace_access_lock(int cpu)782 static inline void trace_access_lock(int cpu)
783 {
784 	if (cpu == RING_BUFFER_ALL_CPUS) {
785 		/* gain it for accessing the whole ring buffer. */
786 		down_write(&all_cpu_access_lock);
787 	} else {
788 		/* gain it for accessing a cpu ring buffer. */
789 
790 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
791 		down_read(&all_cpu_access_lock);
792 
793 		/* Secondly block other access to this @cpu ring buffer. */
794 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
795 	}
796 }
797 
trace_access_unlock(int cpu)798 static inline void trace_access_unlock(int cpu)
799 {
800 	if (cpu == RING_BUFFER_ALL_CPUS) {
801 		up_write(&all_cpu_access_lock);
802 	} else {
803 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
804 		up_read(&all_cpu_access_lock);
805 	}
806 }
807 
trace_access_lock_init(void)808 static inline void trace_access_lock_init(void)
809 {
810 	int cpu;
811 
812 	for_each_possible_cpu(cpu)
813 		mutex_init(&per_cpu(cpu_access_lock, cpu));
814 }
815 
816 #else
817 
818 static DEFINE_MUTEX(access_lock);
819 
trace_access_lock(int cpu)820 static inline void trace_access_lock(int cpu)
821 {
822 	(void)cpu;
823 	mutex_lock(&access_lock);
824 }
825 
trace_access_unlock(int cpu)826 static inline void trace_access_unlock(int cpu)
827 {
828 	(void)cpu;
829 	mutex_unlock(&access_lock);
830 }
831 
trace_access_lock_init(void)832 static inline void trace_access_lock_init(void)
833 {
834 }
835 
836 #endif
837 
tracer_tracing_on(struct trace_array * tr)838 void tracer_tracing_on(struct trace_array *tr)
839 {
840 	if (tr->array_buffer.buffer)
841 		ring_buffer_record_on(tr->array_buffer.buffer);
842 	/*
843 	 * This flag is looked at when buffers haven't been allocated
844 	 * yet, or by some tracers (like irqsoff), that just want to
845 	 * know if the ring buffer has been disabled, but it can handle
846 	 * races of where it gets disabled but we still do a record.
847 	 * As the check is in the fast path of the tracers, it is more
848 	 * important to be fast than accurate.
849 	 */
850 	tr->buffer_disabled = 0;
851 }
852 
853 /**
854  * tracing_on - enable tracing buffers
855  *
856  * This function enables tracing buffers that may have been
857  * disabled with tracing_off.
858  */
tracing_on(void)859 void tracing_on(void)
860 {
861 	tracer_tracing_on(&global_trace);
862 }
863 EXPORT_SYMBOL_GPL(tracing_on);
864 
865 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)866 static void tracing_snapshot_instance_cond(struct trace_array *tr,
867 					   void *cond_data)
868 {
869 	unsigned long flags;
870 
871 	if (in_nmi()) {
872 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
873 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
874 		return;
875 	}
876 
877 	if (!tr->allocated_snapshot) {
878 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
879 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
880 		tracer_tracing_off(tr);
881 		return;
882 	}
883 
884 	if (tr->mapped) {
885 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
886 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
887 		return;
888 	}
889 
890 	/* Note, snapshot can not be used when the tracer uses it */
891 	if (tracer_uses_snapshot(tr->current_trace)) {
892 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
893 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
894 		return;
895 	}
896 
897 	local_irq_save(flags);
898 	update_max_tr(tr, current, smp_processor_id(), cond_data);
899 	local_irq_restore(flags);
900 }
901 
tracing_snapshot_instance(struct trace_array * tr)902 void tracing_snapshot_instance(struct trace_array *tr)
903 {
904 	tracing_snapshot_instance_cond(tr, NULL);
905 }
906 
907 /**
908  * tracing_snapshot - take a snapshot of the current buffer.
909  *
910  * This causes a swap between the snapshot buffer and the current live
911  * tracing buffer. You can use this to take snapshots of the live
912  * trace when some condition is triggered, but continue to trace.
913  *
914  * Note, make sure to allocate the snapshot with either
915  * a tracing_snapshot_alloc(), or by doing it manually
916  * with: echo 1 > /sys/kernel/tracing/snapshot
917  *
918  * If the snapshot buffer is not allocated, it will stop tracing.
919  * Basically making a permanent snapshot.
920  */
tracing_snapshot(void)921 void tracing_snapshot(void)
922 {
923 	struct trace_array *tr = &global_trace;
924 
925 	tracing_snapshot_instance(tr);
926 }
927 EXPORT_SYMBOL_GPL(tracing_snapshot);
928 
929 /**
930  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
931  * @tr:		The tracing instance to snapshot
932  * @cond_data:	The data to be tested conditionally, and possibly saved
933  *
934  * This is the same as tracing_snapshot() except that the snapshot is
935  * conditional - the snapshot will only happen if the
936  * cond_snapshot.update() implementation receiving the cond_data
937  * returns true, which means that the trace array's cond_snapshot
938  * update() operation used the cond_data to determine whether the
939  * snapshot should be taken, and if it was, presumably saved it along
940  * with the snapshot.
941  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)942 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
943 {
944 	tracing_snapshot_instance_cond(tr, cond_data);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
947 
948 /**
949  * tracing_cond_snapshot_data - get the user data associated with a snapshot
950  * @tr:		The tracing instance
951  *
952  * When the user enables a conditional snapshot using
953  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
954  * with the snapshot.  This accessor is used to retrieve it.
955  *
956  * Should not be called from cond_snapshot.update(), since it takes
957  * the tr->max_lock lock, which the code calling
958  * cond_snapshot.update() has already done.
959  *
960  * Returns the cond_data associated with the trace array's snapshot.
961  */
tracing_cond_snapshot_data(struct trace_array * tr)962 void *tracing_cond_snapshot_data(struct trace_array *tr)
963 {
964 	void *cond_data = NULL;
965 
966 	local_irq_disable();
967 	arch_spin_lock(&tr->max_lock);
968 
969 	if (tr->cond_snapshot)
970 		cond_data = tr->cond_snapshot->cond_data;
971 
972 	arch_spin_unlock(&tr->max_lock);
973 	local_irq_enable();
974 
975 	return cond_data;
976 }
977 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
978 
979 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
980 					struct array_buffer *size_buf, int cpu_id);
981 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
982 
tracing_alloc_snapshot_instance(struct trace_array * tr)983 int tracing_alloc_snapshot_instance(struct trace_array *tr)
984 {
985 	int order;
986 	int ret;
987 
988 	if (!tr->allocated_snapshot) {
989 
990 		/* Make the snapshot buffer have the same order as main buffer */
991 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
992 		ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
993 		if (ret < 0)
994 			return ret;
995 
996 		/* allocate spare buffer */
997 		ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
998 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
999 		if (ret < 0)
1000 			return ret;
1001 
1002 		tr->allocated_snapshot = true;
1003 	}
1004 
1005 	return 0;
1006 }
1007 
free_snapshot(struct trace_array * tr)1008 static void free_snapshot(struct trace_array *tr)
1009 {
1010 	/*
1011 	 * We don't free the ring buffer. instead, resize it because
1012 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1013 	 * we want preserve it.
1014 	 */
1015 	ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0);
1016 	ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1017 	set_buffer_entries(&tr->snapshot_buffer, 1);
1018 	tracing_reset_online_cpus(&tr->snapshot_buffer);
1019 	tr->allocated_snapshot = false;
1020 }
1021 
tracing_arm_snapshot_locked(struct trace_array * tr)1022 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1023 {
1024 	int ret;
1025 
1026 	lockdep_assert_held(&trace_types_lock);
1027 
1028 	spin_lock(&tr->snapshot_trigger_lock);
1029 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1030 		spin_unlock(&tr->snapshot_trigger_lock);
1031 		return -EBUSY;
1032 	}
1033 
1034 	tr->snapshot++;
1035 	spin_unlock(&tr->snapshot_trigger_lock);
1036 
1037 	ret = tracing_alloc_snapshot_instance(tr);
1038 	if (ret) {
1039 		spin_lock(&tr->snapshot_trigger_lock);
1040 		tr->snapshot--;
1041 		spin_unlock(&tr->snapshot_trigger_lock);
1042 	}
1043 
1044 	return ret;
1045 }
1046 
tracing_arm_snapshot(struct trace_array * tr)1047 int tracing_arm_snapshot(struct trace_array *tr)
1048 {
1049 	guard(mutex)(&trace_types_lock);
1050 	return tracing_arm_snapshot_locked(tr);
1051 }
1052 
tracing_disarm_snapshot(struct trace_array * tr)1053 void tracing_disarm_snapshot(struct trace_array *tr)
1054 {
1055 	spin_lock(&tr->snapshot_trigger_lock);
1056 	if (!WARN_ON(!tr->snapshot))
1057 		tr->snapshot--;
1058 	spin_unlock(&tr->snapshot_trigger_lock);
1059 }
1060 
1061 /**
1062  * tracing_alloc_snapshot - allocate snapshot buffer.
1063  *
1064  * This only allocates the snapshot buffer if it isn't already
1065  * allocated - it doesn't also take a snapshot.
1066  *
1067  * This is meant to be used in cases where the snapshot buffer needs
1068  * to be set up for events that can't sleep but need to be able to
1069  * trigger a snapshot.
1070  */
tracing_alloc_snapshot(void)1071 int tracing_alloc_snapshot(void)
1072 {
1073 	struct trace_array *tr = &global_trace;
1074 	int ret;
1075 
1076 	ret = tracing_alloc_snapshot_instance(tr);
1077 	WARN_ON(ret < 0);
1078 
1079 	return ret;
1080 }
1081 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1082 
1083 /**
1084  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1085  *
1086  * This is similar to tracing_snapshot(), but it will allocate the
1087  * snapshot buffer if it isn't already allocated. Use this only
1088  * where it is safe to sleep, as the allocation may sleep.
1089  *
1090  * This causes a swap between the snapshot buffer and the current live
1091  * tracing buffer. You can use this to take snapshots of the live
1092  * trace when some condition is triggered, but continue to trace.
1093  */
tracing_snapshot_alloc(void)1094 void tracing_snapshot_alloc(void)
1095 {
1096 	int ret;
1097 
1098 	ret = tracing_alloc_snapshot();
1099 	if (ret < 0)
1100 		return;
1101 
1102 	tracing_snapshot();
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1105 
1106 /**
1107  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1108  * @tr:		The tracing instance
1109  * @cond_data:	User data to associate with the snapshot
1110  * @update:	Implementation of the cond_snapshot update function
1111  *
1112  * Check whether the conditional snapshot for the given instance has
1113  * already been enabled, or if the current tracer is already using a
1114  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1115  * save the cond_data and update function inside.
1116  *
1117  * Returns 0 if successful, error otherwise.
1118  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1119 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1120 				 cond_update_fn_t update)
1121 {
1122 	struct cond_snapshot *cond_snapshot __free(kfree) =
1123 		kzalloc_obj(*cond_snapshot);
1124 	int ret;
1125 
1126 	if (!cond_snapshot)
1127 		return -ENOMEM;
1128 
1129 	cond_snapshot->cond_data = cond_data;
1130 	cond_snapshot->update = update;
1131 
1132 	guard(mutex)(&trace_types_lock);
1133 
1134 	if (tracer_uses_snapshot(tr->current_trace))
1135 		return -EBUSY;
1136 
1137 	/*
1138 	 * The cond_snapshot can only change to NULL without the
1139 	 * trace_types_lock. We don't care if we race with it going
1140 	 * to NULL, but we want to make sure that it's not set to
1141 	 * something other than NULL when we get here, which we can
1142 	 * do safely with only holding the trace_types_lock and not
1143 	 * having to take the max_lock.
1144 	 */
1145 	if (tr->cond_snapshot)
1146 		return -EBUSY;
1147 
1148 	ret = tracing_arm_snapshot_locked(tr);
1149 	if (ret)
1150 		return ret;
1151 
1152 	local_irq_disable();
1153 	arch_spin_lock(&tr->max_lock);
1154 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1155 	arch_spin_unlock(&tr->max_lock);
1156 	local_irq_enable();
1157 
1158 	return 0;
1159 }
1160 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1161 
1162 /**
1163  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1164  * @tr:		The tracing instance
1165  *
1166  * Check whether the conditional snapshot for the given instance is
1167  * enabled; if so, free the cond_snapshot associated with it,
1168  * otherwise return -EINVAL.
1169  *
1170  * Returns 0 if successful, error otherwise.
1171  */
tracing_snapshot_cond_disable(struct trace_array * tr)1172 int tracing_snapshot_cond_disable(struct trace_array *tr)
1173 {
1174 	int ret = 0;
1175 
1176 	local_irq_disable();
1177 	arch_spin_lock(&tr->max_lock);
1178 
1179 	if (!tr->cond_snapshot)
1180 		ret = -EINVAL;
1181 	else {
1182 		kfree(tr->cond_snapshot);
1183 		tr->cond_snapshot = NULL;
1184 	}
1185 
1186 	arch_spin_unlock(&tr->max_lock);
1187 	local_irq_enable();
1188 
1189 	tracing_disarm_snapshot(tr);
1190 
1191 	return ret;
1192 }
1193 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1194 #else
tracing_snapshot(void)1195 void tracing_snapshot(void)
1196 {
1197 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1200 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1201 {
1202 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1203 }
1204 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1205 int tracing_alloc_snapshot(void)
1206 {
1207 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1208 	return -ENODEV;
1209 }
1210 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1211 void tracing_snapshot_alloc(void)
1212 {
1213 	/* Give warning */
1214 	tracing_snapshot();
1215 }
1216 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1217 void *tracing_cond_snapshot_data(struct trace_array *tr)
1218 {
1219 	return NULL;
1220 }
1221 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1222 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1223 {
1224 	return -ENODEV;
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1227 int tracing_snapshot_cond_disable(struct trace_array *tr)
1228 {
1229 	return false;
1230 }
1231 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1232 #define free_snapshot(tr)	do { } while (0)
1233 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1234 #endif /* CONFIG_TRACER_SNAPSHOT */
1235 
tracer_tracing_off(struct trace_array * tr)1236 void tracer_tracing_off(struct trace_array *tr)
1237 {
1238 	if (tr->array_buffer.buffer)
1239 		ring_buffer_record_off(tr->array_buffer.buffer);
1240 	/*
1241 	 * This flag is looked at when buffers haven't been allocated
1242 	 * yet, or by some tracers (like irqsoff), that just want to
1243 	 * know if the ring buffer has been disabled, but it can handle
1244 	 * races of where it gets disabled but we still do a record.
1245 	 * As the check is in the fast path of the tracers, it is more
1246 	 * important to be fast than accurate.
1247 	 */
1248 	tr->buffer_disabled = 1;
1249 }
1250 
1251 /**
1252  * tracer_tracing_disable() - temporary disable the buffer from write
1253  * @tr: The trace array to disable its buffer for
1254  *
1255  * Expects trace_tracing_enable() to re-enable tracing.
1256  * The difference between this and tracer_tracing_off() is that this
1257  * is a counter and can nest, whereas, tracer_tracing_off() can
1258  * be called multiple times and a single trace_tracing_on() will
1259  * enable it.
1260  */
tracer_tracing_disable(struct trace_array * tr)1261 void tracer_tracing_disable(struct trace_array *tr)
1262 {
1263 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1264 		return;
1265 
1266 	ring_buffer_record_disable(tr->array_buffer.buffer);
1267 }
1268 
1269 /**
1270  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1271  * @tr: The trace array that had tracer_tracincg_disable() called on it
1272  *
1273  * This is called after tracer_tracing_disable() has been called on @tr,
1274  * when it's safe to re-enable tracing.
1275  */
tracer_tracing_enable(struct trace_array * tr)1276 void tracer_tracing_enable(struct trace_array *tr)
1277 {
1278 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1279 		return;
1280 
1281 	ring_buffer_record_enable(tr->array_buffer.buffer);
1282 }
1283 
1284 /**
1285  * tracing_off - turn off tracing buffers
1286  *
1287  * This function stops the tracing buffers from recording data.
1288  * It does not disable any overhead the tracers themselves may
1289  * be causing. This function simply causes all recording to
1290  * the ring buffers to fail.
1291  */
tracing_off(void)1292 void tracing_off(void)
1293 {
1294 	tracer_tracing_off(&global_trace);
1295 }
1296 EXPORT_SYMBOL_GPL(tracing_off);
1297 
disable_trace_on_warning(void)1298 void disable_trace_on_warning(void)
1299 {
1300 	if (__disable_trace_on_warning) {
1301 		struct trace_array *tr = READ_ONCE(printk_trace);
1302 
1303 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304 			"Disabling tracing due to warning\n");
1305 		tracing_off();
1306 
1307 		/* Disable trace_printk() buffer too */
1308 		if (tr != &global_trace) {
1309 			trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1310 					       "Disabling tracing due to warning\n");
1311 			tracer_tracing_off(tr);
1312 		}
1313 	}
1314 }
1315 
1316 /**
1317  * tracer_tracing_is_on - show real state of ring buffer enabled
1318  * @tr : the trace array to know if ring buffer is enabled
1319  *
1320  * Shows real state of the ring buffer if it is enabled or not.
1321  */
tracer_tracing_is_on(struct trace_array * tr)1322 bool tracer_tracing_is_on(struct trace_array *tr)
1323 {
1324 	if (tr->array_buffer.buffer)
1325 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1326 	return !tr->buffer_disabled;
1327 }
1328 
1329 /**
1330  * tracing_is_on - show state of ring buffers enabled
1331  */
tracing_is_on(void)1332 int tracing_is_on(void)
1333 {
1334 	return tracer_tracing_is_on(&global_trace);
1335 }
1336 EXPORT_SYMBOL_GPL(tracing_is_on);
1337 
set_buf_size(char * str)1338 static int __init set_buf_size(char *str)
1339 {
1340 	unsigned long buf_size;
1341 
1342 	if (!str)
1343 		return 0;
1344 	buf_size = memparse(str, &str);
1345 	/*
1346 	 * nr_entries can not be zero and the startup
1347 	 * tests require some buffer space. Therefore
1348 	 * ensure we have at least 4096 bytes of buffer.
1349 	 */
1350 	trace_buf_size = max(4096UL, buf_size);
1351 	return 1;
1352 }
1353 __setup("trace_buf_size=", set_buf_size);
1354 
set_tracing_thresh(char * str)1355 static int __init set_tracing_thresh(char *str)
1356 {
1357 	unsigned long threshold;
1358 	int ret;
1359 
1360 	if (!str)
1361 		return 0;
1362 	ret = kstrtoul(str, 0, &threshold);
1363 	if (ret < 0)
1364 		return 0;
1365 	tracing_thresh = threshold * 1000;
1366 	return 1;
1367 }
1368 __setup("tracing_thresh=", set_tracing_thresh);
1369 
nsecs_to_usecs(unsigned long nsecs)1370 unsigned long nsecs_to_usecs(unsigned long nsecs)
1371 {
1372 	return nsecs / 1000;
1373 }
1374 
1375 /*
1376  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1377  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1378  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1379  * of strings in the order that the evals (enum) were defined.
1380  */
1381 #undef C
1382 #define C(a, b) b
1383 
1384 /* These must match the bit positions in trace_iterator_flags */
1385 static const char *trace_options[] = {
1386 	TRACE_FLAGS
1387 	NULL
1388 };
1389 
1390 static struct {
1391 	u64 (*func)(void);
1392 	const char *name;
1393 	int in_ns;		/* is this clock in nanoseconds? */
1394 } trace_clocks[] = {
1395 	{ trace_clock_local,		"local",	1 },
1396 	{ trace_clock_global,		"global",	1 },
1397 	{ trace_clock_counter,		"counter",	0 },
1398 	{ trace_clock_jiffies,		"uptime",	0 },
1399 	{ trace_clock,			"perf",		1 },
1400 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1401 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1402 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1403 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1404 	ARCH_TRACE_CLOCKS
1405 };
1406 
trace_clock_in_ns(struct trace_array * tr)1407 bool trace_clock_in_ns(struct trace_array *tr)
1408 {
1409 	if (trace_clocks[tr->clock_id].in_ns)
1410 		return true;
1411 
1412 	return false;
1413 }
1414 
1415 /*
1416  * trace_parser_get_init - gets the buffer for trace parser
1417  */
trace_parser_get_init(struct trace_parser * parser,int size)1418 int trace_parser_get_init(struct trace_parser *parser, int size)
1419 {
1420 	memset(parser, 0, sizeof(*parser));
1421 
1422 	parser->buffer = kmalloc(size, GFP_KERNEL);
1423 	if (!parser->buffer)
1424 		return 1;
1425 
1426 	parser->size = size;
1427 	return 0;
1428 }
1429 
1430 /*
1431  * trace_parser_put - frees the buffer for trace parser
1432  */
trace_parser_put(struct trace_parser * parser)1433 void trace_parser_put(struct trace_parser *parser)
1434 {
1435 	kfree(parser->buffer);
1436 	parser->buffer = NULL;
1437 }
1438 
1439 /*
1440  * trace_get_user - reads the user input string separated by  space
1441  * (matched by isspace(ch))
1442  *
1443  * For each string found the 'struct trace_parser' is updated,
1444  * and the function returns.
1445  *
1446  * Returns number of bytes read.
1447  *
1448  * See kernel/trace/trace.h for 'struct trace_parser' details.
1449  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1450 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1451 	size_t cnt, loff_t *ppos)
1452 {
1453 	char ch;
1454 	size_t read = 0;
1455 	ssize_t ret;
1456 
1457 	if (!*ppos)
1458 		trace_parser_clear(parser);
1459 
1460 	ret = get_user(ch, ubuf++);
1461 	if (ret)
1462 		goto fail;
1463 
1464 	read++;
1465 	cnt--;
1466 
1467 	/*
1468 	 * The parser is not finished with the last write,
1469 	 * continue reading the user input without skipping spaces.
1470 	 */
1471 	if (!parser->cont) {
1472 		/* skip white space */
1473 		while (cnt && isspace(ch)) {
1474 			ret = get_user(ch, ubuf++);
1475 			if (ret)
1476 				goto fail;
1477 			read++;
1478 			cnt--;
1479 		}
1480 
1481 		parser->idx = 0;
1482 
1483 		/* only spaces were written */
1484 		if (isspace(ch) || !ch) {
1485 			*ppos += read;
1486 			return read;
1487 		}
1488 	}
1489 
1490 	/* read the non-space input */
1491 	while (cnt && !isspace(ch) && ch) {
1492 		if (parser->idx < parser->size - 1)
1493 			parser->buffer[parser->idx++] = ch;
1494 		else {
1495 			ret = -EINVAL;
1496 			goto fail;
1497 		}
1498 
1499 		ret = get_user(ch, ubuf++);
1500 		if (ret)
1501 			goto fail;
1502 		read++;
1503 		cnt--;
1504 	}
1505 
1506 	/* We either got finished input or we have to wait for another call. */
1507 	if (isspace(ch) || !ch) {
1508 		parser->buffer[parser->idx] = 0;
1509 		parser->cont = false;
1510 	} else if (parser->idx < parser->size - 1) {
1511 		parser->cont = true;
1512 		parser->buffer[parser->idx++] = ch;
1513 		/* Make sure the parsed string always terminates with '\0'. */
1514 		parser->buffer[parser->idx] = 0;
1515 	} else {
1516 		ret = -EINVAL;
1517 		goto fail;
1518 	}
1519 
1520 	*ppos += read;
1521 	return read;
1522 fail:
1523 	trace_parser_fail(parser);
1524 	return ret;
1525 }
1526 
1527 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1528 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1529 {
1530 	int len;
1531 
1532 	if (trace_seq_used(s) <= s->readpos)
1533 		return -EBUSY;
1534 
1535 	len = trace_seq_used(s) - s->readpos;
1536 	if (cnt > len)
1537 		cnt = len;
1538 	memcpy(buf, s->buffer + s->readpos, cnt);
1539 
1540 	s->readpos += cnt;
1541 	return cnt;
1542 }
1543 
1544 unsigned long __read_mostly	tracing_thresh;
1545 
1546 #ifdef CONFIG_TRACER_MAX_TRACE
1547 #ifdef LATENCY_FS_NOTIFY
1548 static struct workqueue_struct *fsnotify_wq;
1549 
latency_fsnotify_workfn(struct work_struct * work)1550 static void latency_fsnotify_workfn(struct work_struct *work)
1551 {
1552 	struct trace_array *tr = container_of(work, struct trace_array,
1553 					      fsnotify_work);
1554 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1555 }
1556 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1557 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1558 {
1559 	struct trace_array *tr = container_of(iwork, struct trace_array,
1560 					      fsnotify_irqwork);
1561 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1562 }
1563 
latency_fsnotify_init(void)1564 __init static int latency_fsnotify_init(void)
1565 {
1566 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1567 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1568 	if (!fsnotify_wq) {
1569 		pr_err("Unable to allocate tr_max_lat_wq\n");
1570 		return -ENOMEM;
1571 	}
1572 	return 0;
1573 }
1574 
1575 late_initcall_sync(latency_fsnotify_init);
1576 
latency_fsnotify(struct trace_array * tr)1577 void latency_fsnotify(struct trace_array *tr)
1578 {
1579 	if (!fsnotify_wq)
1580 		return;
1581 	/*
1582 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1583 	 * possible that we are called from __schedule() or do_idle(), which
1584 	 * could cause a deadlock.
1585 	 */
1586 	irq_work_queue(&tr->fsnotify_irqwork);
1587 }
1588 #endif /* !LATENCY_FS_NOTIFY */
1589 
1590 static const struct file_operations tracing_max_lat_fops;
1591 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1592 static void trace_create_maxlat_file(struct trace_array *tr,
1593 				     struct dentry *d_tracer)
1594 {
1595 #ifdef LATENCY_FS_NOTIFY
1596 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1597 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1598 #endif
1599 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1600 					      TRACE_MODE_WRITE,
1601 					      d_tracer, tr,
1602 					      &tracing_max_lat_fops);
1603 }
1604 
1605 /*
1606  * Copy the new maximum trace into the separate maximum-trace
1607  * structure. (this way the maximum trace is permanently saved,
1608  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1609  */
1610 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1611 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1612 {
1613 	struct array_buffer *trace_buf = &tr->array_buffer;
1614 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1615 	struct array_buffer *max_buf = &tr->snapshot_buffer;
1616 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1617 
1618 	max_buf->cpu = cpu;
1619 	max_buf->time_start = data->preempt_timestamp;
1620 
1621 	max_data->saved_latency = tr->max_latency;
1622 	max_data->critical_start = data->critical_start;
1623 	max_data->critical_end = data->critical_end;
1624 
1625 	strscpy(max_data->comm, tsk->comm);
1626 	max_data->pid = tsk->pid;
1627 	/*
1628 	 * If tsk == current, then use current_uid(), as that does not use
1629 	 * RCU. The irq tracer can be called out of RCU scope.
1630 	 */
1631 	if (tsk == current)
1632 		max_data->uid = current_uid();
1633 	else
1634 		max_data->uid = task_uid(tsk);
1635 
1636 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1637 	max_data->policy = tsk->policy;
1638 	max_data->rt_priority = tsk->rt_priority;
1639 
1640 	/* record this tasks comm */
1641 	tracing_record_cmdline(tsk);
1642 	latency_fsnotify(tr);
1643 }
1644 #else
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1645 static inline void trace_create_maxlat_file(struct trace_array *tr,
1646 					    struct dentry *d_tracer) { }
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1647 static inline void __update_max_tr(struct trace_array *tr,
1648 				   struct task_struct *tsk, int cpu) { }
1649 #endif /* CONFIG_TRACER_MAX_TRACE */
1650 
1651 #ifdef CONFIG_TRACER_SNAPSHOT
1652 /**
1653  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1654  * @tr: tracer
1655  * @tsk: the task with the latency
1656  * @cpu: The cpu that initiated the trace.
1657  * @cond_data: User data associated with a conditional snapshot
1658  *
1659  * Flip the buffers between the @tr and the max_tr and record information
1660  * about which task was the cause of this latency.
1661  */
1662 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1663 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1664 	      void *cond_data)
1665 {
1666 	if (tr->stop_count)
1667 		return;
1668 
1669 	WARN_ON_ONCE(!irqs_disabled());
1670 
1671 	if (!tr->allocated_snapshot) {
1672 		/* Only the nop tracer should hit this when disabling */
1673 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1674 		return;
1675 	}
1676 
1677 	arch_spin_lock(&tr->max_lock);
1678 
1679 	/* Inherit the recordable setting from array_buffer */
1680 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1681 		ring_buffer_record_on(tr->snapshot_buffer.buffer);
1682 	else
1683 		ring_buffer_record_off(tr->snapshot_buffer.buffer);
1684 
1685 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1686 		arch_spin_unlock(&tr->max_lock);
1687 		return;
1688 	}
1689 
1690 	swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer);
1691 
1692 	__update_max_tr(tr, tsk, cpu);
1693 
1694 	arch_spin_unlock(&tr->max_lock);
1695 
1696 	/* Any waiters on the old snapshot buffer need to wake up */
1697 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1698 }
1699 
1700 /**
1701  * update_max_tr_single - only copy one trace over, and reset the rest
1702  * @tr: tracer
1703  * @tsk: task with the latency
1704  * @cpu: the cpu of the buffer to copy.
1705  *
1706  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1707  */
1708 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1709 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1710 {
1711 	int ret;
1712 
1713 	if (tr->stop_count)
1714 		return;
1715 
1716 	WARN_ON_ONCE(!irqs_disabled());
1717 	if (!tr->allocated_snapshot) {
1718 		/* Only the nop tracer should hit this when disabling */
1719 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1720 		return;
1721 	}
1722 
1723 	arch_spin_lock(&tr->max_lock);
1724 
1725 	ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu);
1726 
1727 	if (ret == -EBUSY) {
1728 		/*
1729 		 * We failed to swap the buffer due to a commit taking
1730 		 * place on this CPU. We fail to record, but we reset
1731 		 * the max trace buffer (no one writes directly to it)
1732 		 * and flag that it failed.
1733 		 * Another reason is resize is in progress.
1734 		 */
1735 		trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_,
1736 			"Failed to swap buffers due to commit or resize in progress\n");
1737 	}
1738 
1739 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1740 
1741 	__update_max_tr(tr, tsk, cpu);
1742 	arch_spin_unlock(&tr->max_lock);
1743 }
1744 #endif /* CONFIG_TRACER_SNAPSHOT */
1745 
1746 struct pipe_wait {
1747 	struct trace_iterator		*iter;
1748 	int				wait_index;
1749 };
1750 
wait_pipe_cond(void * data)1751 static bool wait_pipe_cond(void *data)
1752 {
1753 	struct pipe_wait *pwait = data;
1754 	struct trace_iterator *iter = pwait->iter;
1755 
1756 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1757 		return true;
1758 
1759 	return iter->closed;
1760 }
1761 
wait_on_pipe(struct trace_iterator * iter,int full)1762 static int wait_on_pipe(struct trace_iterator *iter, int full)
1763 {
1764 	struct pipe_wait pwait;
1765 	int ret;
1766 
1767 	/* Iterators are static, they should be filled or empty */
1768 	if (trace_buffer_iter(iter, iter->cpu_file))
1769 		return 0;
1770 
1771 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1772 	pwait.iter = iter;
1773 
1774 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1775 			       wait_pipe_cond, &pwait);
1776 
1777 #ifdef CONFIG_TRACER_SNAPSHOT
1778 	/*
1779 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1780 	 * to happen, this would now be the main buffer.
1781 	 */
1782 	if (iter->snapshot)
1783 		iter->array_buffer = &iter->tr->snapshot_buffer;
1784 #endif
1785 	return ret;
1786 }
1787 
1788 #ifdef CONFIG_FTRACE_STARTUP_TEST
1789 static bool selftests_can_run;
1790 
1791 struct trace_selftests {
1792 	struct list_head		list;
1793 	struct tracer			*type;
1794 };
1795 
1796 static LIST_HEAD(postponed_selftests);
1797 
save_selftest(struct tracer * type)1798 static int save_selftest(struct tracer *type)
1799 {
1800 	struct trace_selftests *selftest;
1801 
1802 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1803 	if (!selftest)
1804 		return -ENOMEM;
1805 
1806 	selftest->type = type;
1807 	list_add(&selftest->list, &postponed_selftests);
1808 	return 0;
1809 }
1810 
run_tracer_selftest(struct tracer * type)1811 static int run_tracer_selftest(struct tracer *type)
1812 {
1813 	struct trace_array *tr = &global_trace;
1814 	struct tracer_flags *saved_flags = tr->current_trace_flags;
1815 	struct tracer *saved_tracer = tr->current_trace;
1816 	int ret;
1817 
1818 	if (!type->selftest || tracing_selftest_disabled)
1819 		return 0;
1820 
1821 	/*
1822 	 * If a tracer registers early in boot up (before scheduling is
1823 	 * initialized and such), then do not run its selftests yet.
1824 	 * Instead, run it a little later in the boot process.
1825 	 */
1826 	if (!selftests_can_run)
1827 		return save_selftest(type);
1828 
1829 	if (!tracing_is_on()) {
1830 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1831 			type->name);
1832 		return 0;
1833 	}
1834 
1835 	/*
1836 	 * Run a selftest on this tracer.
1837 	 * Here we reset the trace buffer, and set the current
1838 	 * tracer to be this tracer. The tracer can then run some
1839 	 * internal tracing to verify that everything is in order.
1840 	 * If we fail, we do not register this tracer.
1841 	 */
1842 	tracing_reset_online_cpus(&tr->array_buffer);
1843 
1844 	tr->current_trace = type;
1845 	tr->current_trace_flags = type->flags ? : type->default_flags;
1846 
1847 #ifdef CONFIG_TRACER_MAX_TRACE
1848 	if (tracer_uses_snapshot(type)) {
1849 		/* If we expanded the buffers, make sure the max is expanded too */
1850 		if (tr->ring_buffer_expanded)
1851 			ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size,
1852 					   RING_BUFFER_ALL_CPUS);
1853 		tr->allocated_snapshot = true;
1854 	}
1855 #endif
1856 
1857 	/* the test is responsible for initializing and enabling */
1858 	pr_info("Testing tracer %s: ", type->name);
1859 	ret = type->selftest(type, tr);
1860 	/* the test is responsible for resetting too */
1861 	tr->current_trace = saved_tracer;
1862 	tr->current_trace_flags = saved_flags;
1863 	if (ret) {
1864 		printk(KERN_CONT "FAILED!\n");
1865 		/* Add the warning after printing 'FAILED' */
1866 		WARN_ON(1);
1867 		return -1;
1868 	}
1869 	/* Only reset on passing, to avoid touching corrupted buffers */
1870 	tracing_reset_online_cpus(&tr->array_buffer);
1871 
1872 #ifdef CONFIG_TRACER_MAX_TRACE
1873 	if (tracer_uses_snapshot(type)) {
1874 		tr->allocated_snapshot = false;
1875 
1876 		/* Shrink the max buffer again */
1877 		if (tr->ring_buffer_expanded)
1878 			ring_buffer_resize(tr->snapshot_buffer.buffer, 1,
1879 					   RING_BUFFER_ALL_CPUS);
1880 	}
1881 #endif
1882 
1883 	printk(KERN_CONT "PASSED\n");
1884 	return 0;
1885 }
1886 
do_run_tracer_selftest(struct tracer * type)1887 static int do_run_tracer_selftest(struct tracer *type)
1888 {
1889 	int ret;
1890 
1891 	/*
1892 	 * Tests can take a long time, especially if they are run one after the
1893 	 * other, as does happen during bootup when all the tracers are
1894 	 * registered. This could cause the soft lockup watchdog to trigger.
1895 	 */
1896 	cond_resched();
1897 
1898 	tracing_selftest_running = true;
1899 	ret = run_tracer_selftest(type);
1900 	tracing_selftest_running = false;
1901 
1902 	return ret;
1903 }
1904 
init_trace_selftests(void)1905 static __init int init_trace_selftests(void)
1906 {
1907 	struct trace_selftests *p, *n;
1908 	struct tracer *t, **last;
1909 	int ret;
1910 
1911 	selftests_can_run = true;
1912 
1913 	guard(mutex)(&trace_types_lock);
1914 
1915 	if (list_empty(&postponed_selftests))
1916 		return 0;
1917 
1918 	pr_info("Running postponed tracer tests:\n");
1919 
1920 	tracing_selftest_running = true;
1921 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1922 		/* This loop can take minutes when sanitizers are enabled, so
1923 		 * lets make sure we allow RCU processing.
1924 		 */
1925 		cond_resched();
1926 		ret = run_tracer_selftest(p->type);
1927 		/* If the test fails, then warn and remove from available_tracers */
1928 		if (ret < 0) {
1929 			WARN(1, "tracer: %s failed selftest, disabling\n",
1930 			     p->type->name);
1931 			last = &trace_types;
1932 			for (t = trace_types; t; t = t->next) {
1933 				if (t == p->type) {
1934 					*last = t->next;
1935 					break;
1936 				}
1937 				last = &t->next;
1938 			}
1939 		}
1940 		list_del(&p->list);
1941 		kfree(p);
1942 	}
1943 	tracing_selftest_running = false;
1944 
1945 	return 0;
1946 }
1947 core_initcall(init_trace_selftests);
1948 #else
do_run_tracer_selftest(struct tracer * type)1949 static inline int do_run_tracer_selftest(struct tracer *type)
1950 {
1951 	return 0;
1952 }
1953 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1954 
1955 static int add_tracer(struct trace_array *tr, struct tracer *t);
1956 
1957 static void __init apply_trace_boot_options(void);
1958 
free_tracers(struct trace_array * tr)1959 static void free_tracers(struct trace_array *tr)
1960 {
1961 	struct tracers *t, *n;
1962 
1963 	lockdep_assert_held(&trace_types_lock);
1964 
1965 	list_for_each_entry_safe(t, n, &tr->tracers, list) {
1966 		list_del(&t->list);
1967 		kfree(t->flags);
1968 		kfree(t);
1969 	}
1970 }
1971 
1972 /**
1973  * register_tracer - register a tracer with the ftrace system.
1974  * @type: the plugin for the tracer
1975  *
1976  * Register a new plugin tracer.
1977  */
register_tracer(struct tracer * type)1978 int __init register_tracer(struct tracer *type)
1979 {
1980 	struct trace_array *tr;
1981 	struct tracer *t;
1982 	int ret = 0;
1983 
1984 	if (!type->name) {
1985 		pr_info("Tracer must have a name\n");
1986 		return -1;
1987 	}
1988 
1989 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1990 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1991 		return -1;
1992 	}
1993 
1994 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1995 		pr_warn("Can not register tracer %s due to lockdown\n",
1996 			   type->name);
1997 		return -EPERM;
1998 	}
1999 
2000 	mutex_lock(&trace_types_lock);
2001 
2002 	for (t = trace_types; t; t = t->next) {
2003 		if (strcmp(type->name, t->name) == 0) {
2004 			/* already found */
2005 			pr_info("Tracer %s already registered\n",
2006 				type->name);
2007 			ret = -1;
2008 			goto out;
2009 		}
2010 	}
2011 
2012 	/* store the tracer for __set_tracer_option */
2013 	if (type->flags)
2014 		type->flags->trace = type;
2015 
2016 	ret = do_run_tracer_selftest(type);
2017 	if (ret < 0)
2018 		goto out;
2019 
2020 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2021 		ret = add_tracer(tr, type);
2022 		if (ret < 0) {
2023 			/* The tracer will still exist but without options */
2024 			pr_warn("Failed to create tracer options for %s\n", type->name);
2025 			break;
2026 		}
2027 	}
2028 
2029 	type->next = trace_types;
2030 	trace_types = type;
2031 
2032  out:
2033 	mutex_unlock(&trace_types_lock);
2034 
2035 	if (ret || !default_bootup_tracer)
2036 		return ret;
2037 
2038 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2039 		return 0;
2040 
2041 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2042 	/* Do we want this tracer to start on bootup? */
2043 	WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
2044 	default_bootup_tracer = NULL;
2045 
2046 	apply_trace_boot_options();
2047 
2048 	/* disable other selftests, since this will break it. */
2049 	disable_tracing_selftest("running a tracer");
2050 
2051 	return 0;
2052 }
2053 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2054 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2055 {
2056 	struct trace_buffer *buffer = buf->buffer;
2057 
2058 	if (!buffer)
2059 		return;
2060 
2061 	ring_buffer_record_disable(buffer);
2062 
2063 	/* Make sure all commits have finished */
2064 	synchronize_rcu();
2065 	ring_buffer_reset_cpu(buffer, cpu);
2066 
2067 	ring_buffer_record_enable(buffer);
2068 }
2069 
tracing_reset_online_cpus(struct array_buffer * buf)2070 void tracing_reset_online_cpus(struct array_buffer *buf)
2071 {
2072 	struct trace_buffer *buffer = buf->buffer;
2073 
2074 	if (!buffer)
2075 		return;
2076 
2077 	ring_buffer_record_disable(buffer);
2078 
2079 	/* Make sure all commits have finished */
2080 	synchronize_rcu();
2081 
2082 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2083 
2084 	ring_buffer_reset_online_cpus(buffer);
2085 
2086 	ring_buffer_record_enable(buffer);
2087 }
2088 
tracing_reset_all_cpus(struct array_buffer * buf)2089 static void tracing_reset_all_cpus(struct array_buffer *buf)
2090 {
2091 	struct trace_buffer *buffer = buf->buffer;
2092 
2093 	if (!buffer)
2094 		return;
2095 
2096 	ring_buffer_record_disable(buffer);
2097 
2098 	/* Make sure all commits have finished */
2099 	synchronize_rcu();
2100 
2101 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2102 
2103 	ring_buffer_reset(buffer);
2104 
2105 	ring_buffer_record_enable(buffer);
2106 }
2107 
2108 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2109 void tracing_reset_all_online_cpus_unlocked(void)
2110 {
2111 	struct trace_array *tr;
2112 
2113 	lockdep_assert_held(&trace_types_lock);
2114 
2115 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2116 		if (!tr->clear_trace)
2117 			continue;
2118 		tr->clear_trace = false;
2119 		tracing_reset_online_cpus(&tr->array_buffer);
2120 #ifdef CONFIG_TRACER_SNAPSHOT
2121 		tracing_reset_online_cpus(&tr->snapshot_buffer);
2122 #endif
2123 	}
2124 }
2125 
tracing_reset_all_online_cpus(void)2126 void tracing_reset_all_online_cpus(void)
2127 {
2128 	guard(mutex)(&trace_types_lock);
2129 	tracing_reset_all_online_cpus_unlocked();
2130 }
2131 
is_tracing_stopped(void)2132 int is_tracing_stopped(void)
2133 {
2134 	return global_trace.stop_count;
2135 }
2136 
tracing_start_tr(struct trace_array * tr)2137 static void tracing_start_tr(struct trace_array *tr)
2138 {
2139 	struct trace_buffer *buffer;
2140 
2141 	if (tracing_disabled)
2142 		return;
2143 
2144 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2145 	if (--tr->stop_count) {
2146 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2147 			/* Someone screwed up their debugging */
2148 			tr->stop_count = 0;
2149 		}
2150 		return;
2151 	}
2152 
2153 	/* Prevent the buffers from switching */
2154 	arch_spin_lock(&tr->max_lock);
2155 
2156 	buffer = tr->array_buffer.buffer;
2157 	if (buffer)
2158 		ring_buffer_record_enable(buffer);
2159 
2160 #ifdef CONFIG_TRACER_SNAPSHOT
2161 	buffer = tr->snapshot_buffer.buffer;
2162 	if (buffer)
2163 		ring_buffer_record_enable(buffer);
2164 #endif
2165 
2166 	arch_spin_unlock(&tr->max_lock);
2167 }
2168 
2169 /**
2170  * tracing_start - quick start of the tracer
2171  *
2172  * If tracing is enabled but was stopped by tracing_stop,
2173  * this will start the tracer back up.
2174  */
tracing_start(void)2175 void tracing_start(void)
2176 
2177 {
2178 	return tracing_start_tr(&global_trace);
2179 }
2180 
tracing_stop_tr(struct trace_array * tr)2181 static void tracing_stop_tr(struct trace_array *tr)
2182 {
2183 	struct trace_buffer *buffer;
2184 
2185 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2186 	if (tr->stop_count++)
2187 		return;
2188 
2189 	/* Prevent the buffers from switching */
2190 	arch_spin_lock(&tr->max_lock);
2191 
2192 	buffer = tr->array_buffer.buffer;
2193 	if (buffer)
2194 		ring_buffer_record_disable(buffer);
2195 
2196 #ifdef CONFIG_TRACER_SNAPSHOT
2197 	buffer = tr->snapshot_buffer.buffer;
2198 	if (buffer)
2199 		ring_buffer_record_disable(buffer);
2200 #endif
2201 
2202 	arch_spin_unlock(&tr->max_lock);
2203 }
2204 
2205 /**
2206  * tracing_stop - quick stop of the tracer
2207  *
2208  * Light weight way to stop tracing. Use in conjunction with
2209  * tracing_start.
2210  */
tracing_stop(void)2211 void tracing_stop(void)
2212 {
2213 	return tracing_stop_tr(&global_trace);
2214 }
2215 
2216 /*
2217  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2218  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2219  * simplifies those functions and keeps them in sync.
2220  */
trace_handle_return(struct trace_seq * s)2221 enum print_line_t trace_handle_return(struct trace_seq *s)
2222 {
2223 	return trace_seq_has_overflowed(s) ?
2224 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2225 }
2226 EXPORT_SYMBOL_GPL(trace_handle_return);
2227 
migration_disable_value(void)2228 static unsigned short migration_disable_value(void)
2229 {
2230 #if defined(CONFIG_SMP)
2231 	return current->migration_disabled;
2232 #else
2233 	return 0;
2234 #endif
2235 }
2236 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2237 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2238 {
2239 	unsigned int trace_flags = irqs_status;
2240 	unsigned int pc;
2241 
2242 	pc = preempt_count();
2243 
2244 	if (pc & NMI_MASK)
2245 		trace_flags |= TRACE_FLAG_NMI;
2246 	if (pc & HARDIRQ_MASK)
2247 		trace_flags |= TRACE_FLAG_HARDIRQ;
2248 	if (in_serving_softirq())
2249 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2250 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2251 		trace_flags |= TRACE_FLAG_BH_OFF;
2252 
2253 	if (tif_need_resched())
2254 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2255 	if (test_preempt_need_resched())
2256 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2257 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2258 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2259 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2260 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2261 }
2262 
2263 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2264 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2265 			  int type,
2266 			  unsigned long len,
2267 			  unsigned int trace_ctx)
2268 {
2269 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2270 }
2271 
2272 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2273 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2274 static int trace_buffered_event_ref;
2275 
2276 /**
2277  * trace_buffered_event_enable - enable buffering events
2278  *
2279  * When events are being filtered, it is quicker to use a temporary
2280  * buffer to write the event data into if there's a likely chance
2281  * that it will not be committed. The discard of the ring buffer
2282  * is not as fast as committing, and is much slower than copying
2283  * a commit.
2284  *
2285  * When an event is to be filtered, allocate per cpu buffers to
2286  * write the event data into, and if the event is filtered and discarded
2287  * it is simply dropped, otherwise, the entire data is to be committed
2288  * in one shot.
2289  */
trace_buffered_event_enable(void)2290 void trace_buffered_event_enable(void)
2291 {
2292 	struct ring_buffer_event *event;
2293 	struct page *page;
2294 	int cpu;
2295 
2296 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2297 
2298 	if (trace_buffered_event_ref++)
2299 		return;
2300 
2301 	for_each_tracing_cpu(cpu) {
2302 		page = alloc_pages_node(cpu_to_node(cpu),
2303 					GFP_KERNEL | __GFP_NORETRY, 0);
2304 		/* This is just an optimization and can handle failures */
2305 		if (!page) {
2306 			pr_err("Failed to allocate event buffer\n");
2307 			break;
2308 		}
2309 
2310 		event = page_address(page);
2311 		memset(event, 0, sizeof(*event));
2312 
2313 		per_cpu(trace_buffered_event, cpu) = event;
2314 
2315 		scoped_guard(preempt,) {
2316 			if (cpu == smp_processor_id() &&
2317 			    __this_cpu_read(trace_buffered_event) !=
2318 			    per_cpu(trace_buffered_event, cpu))
2319 				WARN_ON_ONCE(1);
2320 		}
2321 	}
2322 }
2323 
enable_trace_buffered_event(void * data)2324 static void enable_trace_buffered_event(void *data)
2325 {
2326 	this_cpu_dec(trace_buffered_event_cnt);
2327 }
2328 
disable_trace_buffered_event(void * data)2329 static void disable_trace_buffered_event(void *data)
2330 {
2331 	this_cpu_inc(trace_buffered_event_cnt);
2332 }
2333 
2334 /**
2335  * trace_buffered_event_disable - disable buffering events
2336  *
2337  * When a filter is removed, it is faster to not use the buffered
2338  * events, and to commit directly into the ring buffer. Free up
2339  * the temp buffers when there are no more users. This requires
2340  * special synchronization with current events.
2341  */
trace_buffered_event_disable(void)2342 void trace_buffered_event_disable(void)
2343 {
2344 	int cpu;
2345 
2346 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2347 
2348 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2349 		return;
2350 
2351 	if (--trace_buffered_event_ref)
2352 		return;
2353 
2354 	/* For each CPU, set the buffer as used. */
2355 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2356 			 NULL, true);
2357 
2358 	/* Wait for all current users to finish */
2359 	synchronize_rcu();
2360 
2361 	for_each_tracing_cpu(cpu) {
2362 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2363 		per_cpu(trace_buffered_event, cpu) = NULL;
2364 	}
2365 
2366 	/*
2367 	 * Wait for all CPUs that potentially started checking if they can use
2368 	 * their event buffer only after the previous synchronize_rcu() call and
2369 	 * they still read a valid pointer from trace_buffered_event. It must be
2370 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2371 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2372 	 */
2373 	synchronize_rcu();
2374 
2375 	/* For each CPU, relinquish the buffer */
2376 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2377 			 true);
2378 }
2379 
2380 static struct trace_buffer *temp_buffer;
2381 
2382 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2383 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2384 			  struct trace_event_file *trace_file,
2385 			  int type, unsigned long len,
2386 			  unsigned int trace_ctx)
2387 {
2388 	struct ring_buffer_event *entry;
2389 	struct trace_array *tr = trace_file->tr;
2390 	int val;
2391 
2392 	*current_rb = tr->array_buffer.buffer;
2393 
2394 	if (!tr->no_filter_buffering_ref &&
2395 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2396 		preempt_disable_notrace();
2397 		/*
2398 		 * Filtering is on, so try to use the per cpu buffer first.
2399 		 * This buffer will simulate a ring_buffer_event,
2400 		 * where the type_len is zero and the array[0] will
2401 		 * hold the full length.
2402 		 * (see include/linux/ring-buffer.h for details on
2403 		 *  how the ring_buffer_event is structured).
2404 		 *
2405 		 * Using a temp buffer during filtering and copying it
2406 		 * on a matched filter is quicker than writing directly
2407 		 * into the ring buffer and then discarding it when
2408 		 * it doesn't match. That is because the discard
2409 		 * requires several atomic operations to get right.
2410 		 * Copying on match and doing nothing on a failed match
2411 		 * is still quicker than no copy on match, but having
2412 		 * to discard out of the ring buffer on a failed match.
2413 		 */
2414 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2415 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2416 
2417 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2418 
2419 			/*
2420 			 * Preemption is disabled, but interrupts and NMIs
2421 			 * can still come in now. If that happens after
2422 			 * the above increment, then it will have to go
2423 			 * back to the old method of allocating the event
2424 			 * on the ring buffer, and if the filter fails, it
2425 			 * will have to call ring_buffer_discard_commit()
2426 			 * to remove it.
2427 			 *
2428 			 * Need to also check the unlikely case that the
2429 			 * length is bigger than the temp buffer size.
2430 			 * If that happens, then the reserve is pretty much
2431 			 * guaranteed to fail, as the ring buffer currently
2432 			 * only allows events less than a page. But that may
2433 			 * change in the future, so let the ring buffer reserve
2434 			 * handle the failure in that case.
2435 			 */
2436 			if (val == 1 && likely(len <= max_len)) {
2437 				trace_event_setup(entry, type, trace_ctx);
2438 				entry->array[0] = len;
2439 				/* Return with preemption disabled */
2440 				return entry;
2441 			}
2442 			this_cpu_dec(trace_buffered_event_cnt);
2443 		}
2444 		/* __trace_buffer_lock_reserve() disables preemption */
2445 		preempt_enable_notrace();
2446 	}
2447 
2448 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2449 					    trace_ctx);
2450 	/*
2451 	 * If tracing is off, but we have triggers enabled
2452 	 * we still need to look at the event data. Use the temp_buffer
2453 	 * to store the trace event for the trigger to use. It's recursive
2454 	 * safe and will not be recorded anywhere.
2455 	 */
2456 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2457 		*current_rb = temp_buffer;
2458 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2459 						    trace_ctx);
2460 	}
2461 	return entry;
2462 }
2463 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2464 
2465 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2466 static DEFINE_MUTEX(tracepoint_printk_mutex);
2467 
output_printk(struct trace_event_buffer * fbuffer)2468 static void output_printk(struct trace_event_buffer *fbuffer)
2469 {
2470 	struct trace_event_call *event_call;
2471 	struct trace_event_file *file;
2472 	struct trace_event *event;
2473 	unsigned long flags;
2474 	struct trace_iterator *iter = tracepoint_print_iter;
2475 
2476 	/* We should never get here if iter is NULL */
2477 	if (WARN_ON_ONCE(!iter))
2478 		return;
2479 
2480 	event_call = fbuffer->trace_file->event_call;
2481 	if (!event_call || !event_call->event.funcs ||
2482 	    !event_call->event.funcs->trace)
2483 		return;
2484 
2485 	file = fbuffer->trace_file;
2486 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2487 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2488 	     !filter_match_preds(file->filter, fbuffer->entry)))
2489 		return;
2490 
2491 	event = &fbuffer->trace_file->event_call->event;
2492 
2493 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2494 	trace_seq_init(&iter->seq);
2495 	iter->ent = fbuffer->entry;
2496 	event_call->event.funcs->trace(iter, 0, event);
2497 	trace_seq_putc(&iter->seq, 0);
2498 	printk("%s", iter->seq.buffer);
2499 
2500 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2501 }
2502 
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2503 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2504 			     void *buffer, size_t *lenp,
2505 			     loff_t *ppos)
2506 {
2507 	int save_tracepoint_printk;
2508 	int ret;
2509 
2510 	guard(mutex)(&tracepoint_printk_mutex);
2511 	save_tracepoint_printk = tracepoint_printk;
2512 
2513 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2514 
2515 	/*
2516 	 * This will force exiting early, as tracepoint_printk
2517 	 * is always zero when tracepoint_printk_iter is not allocated
2518 	 */
2519 	if (!tracepoint_print_iter)
2520 		tracepoint_printk = 0;
2521 
2522 	if (save_tracepoint_printk == tracepoint_printk)
2523 		return ret;
2524 
2525 	if (tracepoint_printk)
2526 		static_key_enable(&tracepoint_printk_key.key);
2527 	else
2528 		static_key_disable(&tracepoint_printk_key.key);
2529 
2530 	return ret;
2531 }
2532 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2533 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2534 {
2535 	enum event_trigger_type tt = ETT_NONE;
2536 	struct trace_event_file *file = fbuffer->trace_file;
2537 
2538 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2539 			fbuffer->entry, &tt))
2540 		goto discard;
2541 
2542 	if (static_key_false(&tracepoint_printk_key.key))
2543 		output_printk(fbuffer);
2544 
2545 	if (static_branch_unlikely(&trace_event_exports_enabled))
2546 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2547 
2548 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2549 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2550 
2551 discard:
2552 	if (tt)
2553 		event_triggers_post_call(file, tt);
2554 
2555 }
2556 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2557 
2558 /*
2559  * Skip 3:
2560  *
2561  *   trace_buffer_unlock_commit_regs()
2562  *   trace_event_buffer_commit()
2563  *   trace_event_raw_event_xxx()
2564  */
2565 # define STACK_SKIP 3
2566 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2567 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2568 				     struct trace_buffer *buffer,
2569 				     struct ring_buffer_event *event,
2570 				     unsigned int trace_ctx,
2571 				     struct pt_regs *regs)
2572 {
2573 	__buffer_unlock_commit(buffer, event);
2574 
2575 	/*
2576 	 * If regs is not set, then skip the necessary functions.
2577 	 * Note, we can still get here via blktrace, wakeup tracer
2578 	 * and mmiotrace, but that's ok if they lose a function or
2579 	 * two. They are not that meaningful.
2580 	 */
2581 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2582 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2583 }
2584 
2585 /*
2586  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2587  */
2588 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2589 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2590 				   struct ring_buffer_event *event)
2591 {
2592 	__buffer_unlock_commit(buffer, event);
2593 }
2594 
2595 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2596 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2597 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2598 {
2599 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2600 	struct ring_buffer_event *event;
2601 	struct ftrace_entry *entry;
2602 	int size = sizeof(*entry);
2603 
2604 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2605 
2606 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2607 					    trace_ctx);
2608 	if (!event)
2609 		return;
2610 	entry	= ring_buffer_event_data(event);
2611 	entry->ip			= ip;
2612 	entry->parent_ip		= parent_ip;
2613 
2614 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2615 	if (fregs) {
2616 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2617 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2618 	}
2619 #endif
2620 
2621 	if (static_branch_unlikely(&trace_function_exports_enabled))
2622 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2623 	__buffer_unlock_commit(buffer, event);
2624 }
2625 
2626 #ifdef CONFIG_STACKTRACE
2627 
2628 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2629 #define FTRACE_KSTACK_NESTING	4
2630 
2631 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2632 
2633 struct ftrace_stack {
2634 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2635 };
2636 
2637 
2638 struct ftrace_stacks {
2639 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2640 };
2641 
2642 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2643 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2644 
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2645 void __ftrace_trace_stack(struct trace_array *tr,
2646 			  struct trace_buffer *buffer,
2647 			  unsigned int trace_ctx,
2648 			  int skip, struct pt_regs *regs)
2649 {
2650 	struct ring_buffer_event *event;
2651 	unsigned int size, nr_entries;
2652 	struct ftrace_stack *fstack;
2653 	struct stack_entry *entry;
2654 	int stackidx;
2655 	int bit;
2656 
2657 	bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
2658 	if (bit < 0)
2659 		return;
2660 
2661 	/*
2662 	 * Add one, for this function and the call to save_stack_trace()
2663 	 * If regs is set, then these functions will not be in the way.
2664 	 */
2665 #ifndef CONFIG_UNWINDER_ORC
2666 	if (!regs)
2667 		skip++;
2668 #endif
2669 
2670 	guard(preempt_notrace)();
2671 
2672 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2673 
2674 	/* This should never happen. If it does, yell once and skip */
2675 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2676 		goto out;
2677 
2678 	/*
2679 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2680 	 * interrupt will either see the value pre increment or post
2681 	 * increment. If the interrupt happens pre increment it will have
2682 	 * restored the counter when it returns.  We just need a barrier to
2683 	 * keep gcc from moving things around.
2684 	 */
2685 	barrier();
2686 
2687 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2688 	size = ARRAY_SIZE(fstack->calls);
2689 
2690 	if (regs) {
2691 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2692 						   size, skip);
2693 	} else {
2694 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2695 	}
2696 
2697 #ifdef CONFIG_DYNAMIC_FTRACE
2698 	/* Mark entry of stack trace as trampoline code */
2699 	if (tr->ops && tr->ops->trampoline) {
2700 		unsigned long tramp_start = tr->ops->trampoline;
2701 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2702 		unsigned long *calls = fstack->calls;
2703 
2704 		for (int i = 0; i < nr_entries; i++) {
2705 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2706 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2707 		}
2708 	}
2709 #endif
2710 
2711 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2712 				    struct_size(entry, caller, nr_entries),
2713 				    trace_ctx);
2714 	if (!event)
2715 		goto out;
2716 	entry = ring_buffer_event_data(event);
2717 
2718 	entry->size = nr_entries;
2719 	memcpy(&entry->caller, fstack->calls,
2720 	       flex_array_size(entry, caller, nr_entries));
2721 
2722 	__buffer_unlock_commit(buffer, event);
2723 
2724  out:
2725 	/* Again, don't let gcc optimize things here */
2726 	barrier();
2727 	__this_cpu_dec(ftrace_stack_reserve);
2728 	trace_clear_recursion(bit);
2729 }
2730 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)2731 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2732 		   int skip)
2733 {
2734 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2735 
2736 	if (rcu_is_watching()) {
2737 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2738 		return;
2739 	}
2740 
2741 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2742 		return;
2743 
2744 	/*
2745 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
2746 	 * but if the above rcu_is_watching() failed, then the NMI
2747 	 * triggered someplace critical, and ct_irq_enter() should
2748 	 * not be called from NMI.
2749 	 */
2750 	if (unlikely(in_nmi()))
2751 		return;
2752 
2753 	ct_irq_enter_irqson();
2754 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2755 	ct_irq_exit_irqson();
2756 }
2757 
2758 /**
2759  * trace_dump_stack - record a stack back trace in the trace buffer
2760  * @skip: Number of functions to skip (helper handlers)
2761  */
trace_dump_stack(int skip)2762 void trace_dump_stack(int skip)
2763 {
2764 	if (tracing_disabled || tracing_selftest_running)
2765 		return;
2766 
2767 #ifndef CONFIG_UNWINDER_ORC
2768 	/* Skip 1 to skip this function. */
2769 	skip++;
2770 #endif
2771 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
2772 				tracing_gen_ctx(), skip, NULL);
2773 }
2774 EXPORT_SYMBOL_GPL(trace_dump_stack);
2775 
2776 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2777 static DEFINE_PER_CPU(int, user_stack_count);
2778 
2779 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)2780 ftrace_trace_userstack(struct trace_array *tr,
2781 		       struct trace_buffer *buffer, unsigned int trace_ctx)
2782 {
2783 	struct ring_buffer_event *event;
2784 	struct userstack_entry *entry;
2785 
2786 	if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
2787 		return;
2788 
2789 	/*
2790 	 * NMIs can not handle page faults, even with fix ups.
2791 	 * The save user stack can (and often does) fault.
2792 	 */
2793 	if (unlikely(in_nmi()))
2794 		return;
2795 
2796 	/*
2797 	 * prevent recursion, since the user stack tracing may
2798 	 * trigger other kernel events.
2799 	 */
2800 	guard(preempt)();
2801 	if (__this_cpu_read(user_stack_count))
2802 		return;
2803 
2804 	__this_cpu_inc(user_stack_count);
2805 
2806 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2807 					    sizeof(*entry), trace_ctx);
2808 	if (!event)
2809 		goto out_drop_count;
2810 	entry	= ring_buffer_event_data(event);
2811 
2812 	entry->tgid		= current->tgid;
2813 	memset(&entry->caller, 0, sizeof(entry->caller));
2814 
2815 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2816 	__buffer_unlock_commit(buffer, event);
2817 
2818  out_drop_count:
2819 	__this_cpu_dec(user_stack_count);
2820 }
2821 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)2822 static void ftrace_trace_userstack(struct trace_array *tr,
2823 				   struct trace_buffer *buffer,
2824 				   unsigned int trace_ctx)
2825 {
2826 }
2827 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2828 
2829 #endif /* CONFIG_STACKTRACE */
2830 
2831 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)2832 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
2833 			  unsigned long long delta)
2834 {
2835 	entry->bottom_delta_ts = delta & U32_MAX;
2836 	entry->top_delta_ts = (delta >> 32);
2837 }
2838 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)2839 void trace_last_func_repeats(struct trace_array *tr,
2840 			     struct trace_func_repeats *last_info,
2841 			     unsigned int trace_ctx)
2842 {
2843 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2844 	struct func_repeats_entry *entry;
2845 	struct ring_buffer_event *event;
2846 	u64 delta;
2847 
2848 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
2849 					    sizeof(*entry), trace_ctx);
2850 	if (!event)
2851 		return;
2852 
2853 	delta = ring_buffer_event_time_stamp(buffer, event) -
2854 		last_info->ts_last_call;
2855 
2856 	entry = ring_buffer_event_data(event);
2857 	entry->ip = last_info->ip;
2858 	entry->parent_ip = last_info->parent_ip;
2859 	entry->count = last_info->count;
2860 	func_repeats_set_delta_ts(entry, delta);
2861 
2862 	__buffer_unlock_commit(buffer, event);
2863 }
2864 
trace_iterator_increment(struct trace_iterator * iter)2865 static void trace_iterator_increment(struct trace_iterator *iter)
2866 {
2867 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2868 
2869 	iter->idx++;
2870 	if (buf_iter)
2871 		ring_buffer_iter_advance(buf_iter);
2872 }
2873 
2874 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)2875 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2876 		unsigned long *lost_events)
2877 {
2878 	struct ring_buffer_event *event;
2879 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2880 
2881 	if (buf_iter) {
2882 		event = ring_buffer_iter_peek(buf_iter, ts);
2883 		if (lost_events)
2884 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
2885 				(unsigned long)-1 : 0;
2886 	} else {
2887 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
2888 					 lost_events);
2889 	}
2890 
2891 	if (event) {
2892 		iter->ent_size = ring_buffer_event_length(event);
2893 		return ring_buffer_event_data(event);
2894 	}
2895 	iter->ent_size = 0;
2896 	return NULL;
2897 }
2898 
2899 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)2900 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2901 		  unsigned long *missing_events, u64 *ent_ts)
2902 {
2903 	struct trace_buffer *buffer = iter->array_buffer->buffer;
2904 	struct trace_entry *ent, *next = NULL;
2905 	unsigned long lost_events = 0, next_lost = 0;
2906 	int cpu_file = iter->cpu_file;
2907 	u64 next_ts = 0, ts;
2908 	int next_cpu = -1;
2909 	int next_size = 0;
2910 	int cpu;
2911 
2912 	/*
2913 	 * If we are in a per_cpu trace file, don't bother by iterating over
2914 	 * all cpu and peek directly.
2915 	 */
2916 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2917 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2918 			return NULL;
2919 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2920 		if (ent_cpu)
2921 			*ent_cpu = cpu_file;
2922 
2923 		return ent;
2924 	}
2925 
2926 	for_each_tracing_cpu(cpu) {
2927 
2928 		if (ring_buffer_empty_cpu(buffer, cpu))
2929 			continue;
2930 
2931 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2932 
2933 		/*
2934 		 * Pick the entry with the smallest timestamp:
2935 		 */
2936 		if (ent && (!next || ts < next_ts)) {
2937 			next = ent;
2938 			next_cpu = cpu;
2939 			next_ts = ts;
2940 			next_lost = lost_events;
2941 			next_size = iter->ent_size;
2942 		}
2943 	}
2944 
2945 	iter->ent_size = next_size;
2946 
2947 	if (ent_cpu)
2948 		*ent_cpu = next_cpu;
2949 
2950 	if (ent_ts)
2951 		*ent_ts = next_ts;
2952 
2953 	if (missing_events)
2954 		*missing_events = next_lost;
2955 
2956 	return next;
2957 }
2958 
2959 #define STATIC_FMT_BUF_SIZE	128
2960 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
2961 
trace_iter_expand_format(struct trace_iterator * iter)2962 char *trace_iter_expand_format(struct trace_iterator *iter)
2963 {
2964 	char *tmp;
2965 
2966 	/*
2967 	 * iter->tr is NULL when used with tp_printk, which makes
2968 	 * this get called where it is not safe to call krealloc().
2969 	 */
2970 	if (!iter->tr || iter->fmt == static_fmt_buf)
2971 		return NULL;
2972 
2973 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
2974 		       GFP_KERNEL);
2975 	if (tmp) {
2976 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
2977 		iter->fmt = tmp;
2978 	}
2979 
2980 	return tmp;
2981 }
2982 
2983 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)2984 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
2985 {
2986 	unsigned long addr = (unsigned long)str;
2987 	struct trace_event *trace_event;
2988 	struct trace_event_call *event;
2989 
2990 	/* OK if part of the event data */
2991 	if ((addr >= (unsigned long)iter->ent) &&
2992 	    (addr < (unsigned long)iter->ent + iter->ent_size))
2993 		return true;
2994 
2995 	/* OK if part of the temp seq buffer */
2996 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
2997 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
2998 		return true;
2999 
3000 	/* Core rodata can not be freed */
3001 	if (is_kernel_rodata(addr))
3002 		return true;
3003 
3004 	if (trace_is_tracepoint_string(str))
3005 		return true;
3006 
3007 	/*
3008 	 * Now this could be a module event, referencing core module
3009 	 * data, which is OK.
3010 	 */
3011 	if (!iter->ent)
3012 		return false;
3013 
3014 	trace_event = ftrace_find_event(iter->ent->type);
3015 	if (!trace_event)
3016 		return false;
3017 
3018 	event = container_of(trace_event, struct trace_event_call, event);
3019 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3020 		return false;
3021 
3022 	/* Would rather have rodata, but this will suffice */
3023 	if (within_module_core(addr, event->module))
3024 		return true;
3025 
3026 	return false;
3027 }
3028 
3029 /**
3030  * ignore_event - Check dereferenced fields while writing to the seq buffer
3031  * @iter: The iterator that holds the seq buffer and the event being printed
3032  *
3033  * At boot up, test_event_printk() will flag any event that dereferences
3034  * a string with "%s" that does exist in the ring buffer. It may still
3035  * be valid, as the string may point to a static string in the kernel
3036  * rodata that never gets freed. But if the string pointer is pointing
3037  * to something that was allocated, there's a chance that it can be freed
3038  * by the time the user reads the trace. This would cause a bad memory
3039  * access by the kernel and possibly crash the system.
3040  *
3041  * This function will check if the event has any fields flagged as needing
3042  * to be checked at runtime and perform those checks.
3043  *
3044  * If it is found that a field is unsafe, it will write into the @iter->seq
3045  * a message stating what was found to be unsafe.
3046  *
3047  * @return: true if the event is unsafe and should be ignored,
3048  *          false otherwise.
3049  */
ignore_event(struct trace_iterator * iter)3050 bool ignore_event(struct trace_iterator *iter)
3051 {
3052 	struct ftrace_event_field *field;
3053 	struct trace_event *trace_event;
3054 	struct trace_event_call *event;
3055 	struct list_head *head;
3056 	struct trace_seq *seq;
3057 	const void *ptr;
3058 
3059 	trace_event = ftrace_find_event(iter->ent->type);
3060 
3061 	seq = &iter->seq;
3062 
3063 	if (!trace_event) {
3064 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3065 		return true;
3066 	}
3067 
3068 	event = container_of(trace_event, struct trace_event_call, event);
3069 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3070 		return false;
3071 
3072 	head = trace_get_fields(event);
3073 	if (!head) {
3074 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3075 				 trace_event_name(event));
3076 		return true;
3077 	}
3078 
3079 	/* Offsets are from the iter->ent that points to the raw event */
3080 	ptr = iter->ent;
3081 
3082 	list_for_each_entry(field, head, link) {
3083 		const char *str;
3084 		bool good;
3085 
3086 		if (!field->needs_test)
3087 			continue;
3088 
3089 		str = *(const char **)(ptr + field->offset);
3090 
3091 		good = trace_safe_str(iter, str);
3092 
3093 		/*
3094 		 * If you hit this warning, it is likely that the
3095 		 * trace event in question used %s on a string that
3096 		 * was saved at the time of the event, but may not be
3097 		 * around when the trace is read. Use __string(),
3098 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3099 		 * instead. See samples/trace_events/trace-events-sample.h
3100 		 * for reference.
3101 		 */
3102 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3103 			      trace_event_name(event), field->name)) {
3104 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3105 					 trace_event_name(event), field->name);
3106 			return true;
3107 		}
3108 	}
3109 	return false;
3110 }
3111 
trace_event_format(struct trace_iterator * iter,const char * fmt)3112 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3113 {
3114 	const char *p, *new_fmt;
3115 	char *q;
3116 
3117 	if (WARN_ON_ONCE(!fmt))
3118 		return fmt;
3119 
3120 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
3121 		return fmt;
3122 
3123 	p = fmt;
3124 	new_fmt = q = iter->fmt;
3125 	while (*p) {
3126 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3127 			if (!trace_iter_expand_format(iter))
3128 				return fmt;
3129 
3130 			q += iter->fmt - new_fmt;
3131 			new_fmt = iter->fmt;
3132 		}
3133 
3134 		*q++ = *p++;
3135 
3136 		/* Replace %p with %px */
3137 		if (p[-1] == '%') {
3138 			if (p[0] == '%') {
3139 				*q++ = *p++;
3140 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3141 				*q++ = *p++;
3142 				*q++ = 'x';
3143 			}
3144 		}
3145 	}
3146 	*q = '\0';
3147 
3148 	return new_fmt;
3149 }
3150 
3151 #define STATIC_TEMP_BUF_SIZE	128
3152 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3153 
3154 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3155 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3156 					  int *ent_cpu, u64 *ent_ts)
3157 {
3158 	/* __find_next_entry will reset ent_size */
3159 	int ent_size = iter->ent_size;
3160 	struct trace_entry *entry;
3161 
3162 	/*
3163 	 * If called from ftrace_dump(), then the iter->temp buffer
3164 	 * will be the static_temp_buf and not created from kmalloc.
3165 	 * If the entry size is greater than the buffer, we can
3166 	 * not save it. Just return NULL in that case. This is only
3167 	 * used to add markers when two consecutive events' time
3168 	 * stamps have a large delta. See trace_print_lat_context()
3169 	 */
3170 	if (iter->temp == static_temp_buf &&
3171 	    STATIC_TEMP_BUF_SIZE < ent_size)
3172 		return NULL;
3173 
3174 	/*
3175 	 * The __find_next_entry() may call peek_next_entry(), which may
3176 	 * call ring_buffer_peek() that may make the contents of iter->ent
3177 	 * undefined. Need to copy iter->ent now.
3178 	 */
3179 	if (iter->ent && iter->ent != iter->temp) {
3180 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3181 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3182 			void *temp;
3183 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3184 			if (!temp)
3185 				return NULL;
3186 			kfree(iter->temp);
3187 			iter->temp = temp;
3188 			iter->temp_size = iter->ent_size;
3189 		}
3190 		memcpy(iter->temp, iter->ent, iter->ent_size);
3191 		iter->ent = iter->temp;
3192 	}
3193 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3194 	/* Put back the original ent_size */
3195 	iter->ent_size = ent_size;
3196 
3197 	return entry;
3198 }
3199 
3200 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3201 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3202 {
3203 	iter->ent = __find_next_entry(iter, &iter->cpu,
3204 				      &iter->lost_events, &iter->ts);
3205 
3206 	if (iter->ent)
3207 		trace_iterator_increment(iter);
3208 
3209 	return iter->ent ? iter : NULL;
3210 }
3211 
trace_consume(struct trace_iterator * iter)3212 static void trace_consume(struct trace_iterator *iter)
3213 {
3214 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3215 			    &iter->lost_events);
3216 }
3217 
s_next(struct seq_file * m,void * v,loff_t * pos)3218 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3219 {
3220 	struct trace_iterator *iter = m->private;
3221 	int i = (int)*pos;
3222 	void *ent;
3223 
3224 	WARN_ON_ONCE(iter->leftover);
3225 
3226 	(*pos)++;
3227 
3228 	/* can't go backwards */
3229 	if (iter->idx > i)
3230 		return NULL;
3231 
3232 	if (iter->idx < 0)
3233 		ent = trace_find_next_entry_inc(iter);
3234 	else
3235 		ent = iter;
3236 
3237 	while (ent && iter->idx < i)
3238 		ent = trace_find_next_entry_inc(iter);
3239 
3240 	iter->pos = *pos;
3241 
3242 	return ent;
3243 }
3244 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3245 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3246 {
3247 	struct ring_buffer_iter *buf_iter;
3248 	unsigned long entries = 0;
3249 	u64 ts;
3250 
3251 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3252 
3253 	buf_iter = trace_buffer_iter(iter, cpu);
3254 	if (!buf_iter)
3255 		return;
3256 
3257 	ring_buffer_iter_reset(buf_iter);
3258 
3259 	/*
3260 	 * We could have the case with the max latency tracers
3261 	 * that a reset never took place on a cpu. This is evident
3262 	 * by the timestamp being before the start of the buffer.
3263 	 */
3264 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3265 		if (ts >= iter->array_buffer->time_start)
3266 			break;
3267 		entries++;
3268 		ring_buffer_iter_advance(buf_iter);
3269 		/* This could be a big loop */
3270 		cond_resched();
3271 	}
3272 
3273 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3274 }
3275 
3276 /*
3277  * The current tracer is copied to avoid a global locking
3278  * all around.
3279  */
s_start(struct seq_file * m,loff_t * pos)3280 static void *s_start(struct seq_file *m, loff_t *pos)
3281 {
3282 	struct trace_iterator *iter = m->private;
3283 	struct trace_array *tr = iter->tr;
3284 	int cpu_file = iter->cpu_file;
3285 	void *p = NULL;
3286 	loff_t l = 0;
3287 	int cpu;
3288 
3289 	mutex_lock(&trace_types_lock);
3290 	if (unlikely(tr->current_trace != iter->trace)) {
3291 		/* Close iter->trace before switching to the new current tracer */
3292 		if (iter->trace->close)
3293 			iter->trace->close(iter);
3294 		iter->trace = tr->current_trace;
3295 		/* Reopen the new current tracer */
3296 		if (iter->trace->open)
3297 			iter->trace->open(iter);
3298 	}
3299 	mutex_unlock(&trace_types_lock);
3300 
3301 	if (iter->snapshot && tracer_uses_snapshot(iter->trace))
3302 		return ERR_PTR(-EBUSY);
3303 
3304 	if (*pos != iter->pos) {
3305 		iter->ent = NULL;
3306 		iter->cpu = 0;
3307 		iter->idx = -1;
3308 
3309 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3310 			for_each_tracing_cpu(cpu)
3311 				tracing_iter_reset(iter, cpu);
3312 		} else
3313 			tracing_iter_reset(iter, cpu_file);
3314 
3315 		iter->leftover = 0;
3316 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3317 			;
3318 
3319 	} else {
3320 		/*
3321 		 * If we overflowed the seq_file before, then we want
3322 		 * to just reuse the trace_seq buffer again.
3323 		 */
3324 		if (iter->leftover)
3325 			p = iter;
3326 		else {
3327 			l = *pos - 1;
3328 			p = s_next(m, p, &l);
3329 		}
3330 	}
3331 
3332 	trace_event_read_lock();
3333 	trace_access_lock(cpu_file);
3334 	return p;
3335 }
3336 
s_stop(struct seq_file * m,void * p)3337 static void s_stop(struct seq_file *m, void *p)
3338 {
3339 	struct trace_iterator *iter = m->private;
3340 
3341 	if (iter->snapshot && tracer_uses_snapshot(iter->trace))
3342 		return;
3343 
3344 	trace_access_unlock(iter->cpu_file);
3345 	trace_event_read_unlock();
3346 }
3347 
3348 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3349 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3350 		      unsigned long *entries, int cpu)
3351 {
3352 	unsigned long count;
3353 
3354 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3355 	/*
3356 	 * If this buffer has skipped entries, then we hold all
3357 	 * entries for the trace and we need to ignore the
3358 	 * ones before the time stamp.
3359 	 */
3360 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3361 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3362 		/* total is the same as the entries */
3363 		*total = count;
3364 	} else
3365 		*total = count +
3366 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3367 	*entries = count;
3368 }
3369 
3370 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)3371 get_total_entries(struct array_buffer *buf,
3372 		  unsigned long *total, unsigned long *entries)
3373 {
3374 	unsigned long t, e;
3375 	int cpu;
3376 
3377 	*total = 0;
3378 	*entries = 0;
3379 
3380 	for_each_tracing_cpu(cpu) {
3381 		get_total_entries_cpu(buf, &t, &e, cpu);
3382 		*total += t;
3383 		*entries += e;
3384 	}
3385 }
3386 
trace_total_entries_cpu(struct trace_array * tr,int cpu)3387 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3388 {
3389 	unsigned long total, entries;
3390 
3391 	if (!tr)
3392 		tr = &global_trace;
3393 
3394 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3395 
3396 	return entries;
3397 }
3398 
trace_total_entries(struct trace_array * tr)3399 unsigned long trace_total_entries(struct trace_array *tr)
3400 {
3401 	unsigned long total, entries;
3402 
3403 	if (!tr)
3404 		tr = &global_trace;
3405 
3406 	get_total_entries(&tr->array_buffer, &total, &entries);
3407 
3408 	return entries;
3409 }
3410 
print_lat_help_header(struct seq_file * m)3411 static void print_lat_help_header(struct seq_file *m)
3412 {
3413 	seq_puts(m, "#                    _------=> CPU#            \n"
3414 		    "#                   / _-----=> irqs-off/BH-disabled\n"
3415 		    "#                  | / _----=> need-resched    \n"
3416 		    "#                  || / _---=> hardirq/softirq \n"
3417 		    "#                  ||| / _--=> preempt-depth   \n"
3418 		    "#                  |||| / _-=> migrate-disable \n"
3419 		    "#                  ||||| /     delay           \n"
3420 		    "#  cmd     pid     |||||| time  |   caller     \n"
3421 		    "#     \\   /        ||||||  \\    |    /       \n");
3422 }
3423 
print_event_info(struct array_buffer * buf,struct seq_file * m)3424 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3425 {
3426 	unsigned long total;
3427 	unsigned long entries;
3428 
3429 	get_total_entries(buf, &total, &entries);
3430 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3431 		   entries, total, num_online_cpus());
3432 	seq_puts(m, "#\n");
3433 }
3434 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3435 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3436 				   unsigned int flags)
3437 {
3438 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
3439 
3440 	print_event_info(buf, m);
3441 
3442 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3443 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3444 }
3445 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3446 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3447 				       unsigned int flags)
3448 {
3449 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
3450 	static const char space[] = "            ";
3451 	int prec = tgid ? 12 : 2;
3452 
3453 	print_event_info(buf, m);
3454 
3455 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
3456 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3457 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3458 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3459 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
3460 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
3461 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3462 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
3463 }
3464 
3465 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3466 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3467 {
3468 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3469 	struct array_buffer *buf = iter->array_buffer;
3470 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3471 	struct tracer *type = iter->trace;
3472 	unsigned long entries;
3473 	unsigned long total;
3474 	const char *name = type->name;
3475 
3476 	get_total_entries(buf, &total, &entries);
3477 
3478 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3479 		   name, init_utsname()->release);
3480 	seq_puts(m, "# -----------------------------------"
3481 		 "---------------------------------\n");
3482 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3483 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3484 		   nsecs_to_usecs(data->saved_latency),
3485 		   entries,
3486 		   total,
3487 		   buf->cpu,
3488 		   preempt_model_str(),
3489 		   /* These are reserved for later use */
3490 		   0, 0, 0, 0);
3491 #ifdef CONFIG_SMP
3492 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3493 #else
3494 	seq_puts(m, ")\n");
3495 #endif
3496 	seq_puts(m, "#    -----------------\n");
3497 	seq_printf(m, "#    | task: %.16s-%d "
3498 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3499 		   data->comm, data->pid,
3500 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3501 		   data->policy, data->rt_priority);
3502 	seq_puts(m, "#    -----------------\n");
3503 
3504 	if (data->critical_start) {
3505 		seq_puts(m, "#  => started at: ");
3506 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3507 		trace_print_seq(m, &iter->seq);
3508 		seq_puts(m, "\n#  => ended at:   ");
3509 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3510 		trace_print_seq(m, &iter->seq);
3511 		seq_puts(m, "\n#\n");
3512 	}
3513 
3514 	seq_puts(m, "#\n");
3515 }
3516 
test_cpu_buff_start(struct trace_iterator * iter)3517 static void test_cpu_buff_start(struct trace_iterator *iter)
3518 {
3519 	struct trace_seq *s = &iter->seq;
3520 	struct trace_array *tr = iter->tr;
3521 
3522 	if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
3523 		return;
3524 
3525 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3526 		return;
3527 
3528 	if (cpumask_available(iter->started) &&
3529 	    cpumask_test_cpu(iter->cpu, iter->started))
3530 		return;
3531 
3532 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3533 		return;
3534 
3535 	if (cpumask_available(iter->started))
3536 		cpumask_set_cpu(iter->cpu, iter->started);
3537 
3538 	/* Don't print started cpu buffer for the first entry of the trace */
3539 	if (iter->idx > 1)
3540 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3541 				iter->cpu);
3542 }
3543 
3544 #ifdef CONFIG_FTRACE_SYSCALLS
is_syscall_event(struct trace_event * event)3545 static bool is_syscall_event(struct trace_event *event)
3546 {
3547 	return (event->funcs == &enter_syscall_print_funcs) ||
3548 	       (event->funcs == &exit_syscall_print_funcs);
3549 
3550 }
3551 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
3552 #else
is_syscall_event(struct trace_event * event)3553 static inline bool is_syscall_event(struct trace_event *event)
3554 {
3555 	return false;
3556 }
3557 #define syscall_buf_size 0
3558 #endif /* CONFIG_FTRACE_SYSCALLS */
3559 
print_trace_fmt(struct trace_iterator * iter)3560 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3561 {
3562 	struct trace_array *tr = iter->tr;
3563 	struct trace_seq *s = &iter->seq;
3564 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3565 	struct trace_entry *entry;
3566 	struct trace_event *event;
3567 
3568 	entry = iter->ent;
3569 
3570 	test_cpu_buff_start(iter);
3571 
3572 	event = ftrace_find_event(entry->type);
3573 
3574 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3575 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3576 			trace_print_lat_context(iter);
3577 		else
3578 			trace_print_context(iter);
3579 	}
3580 
3581 	if (trace_seq_has_overflowed(s))
3582 		return TRACE_TYPE_PARTIAL_LINE;
3583 
3584 	if (event) {
3585 		if (tr->trace_flags & TRACE_ITER(FIELDS))
3586 			return print_event_fields(iter, event);
3587 		/*
3588 		 * For TRACE_EVENT() events, the print_fmt is not
3589 		 * safe to use if the array has delta offsets
3590 		 * Force printing via the fields.
3591 		 */
3592 		if ((tr->text_delta)) {
3593 			/* ftrace and system call events are still OK */
3594 			if ((event->type > __TRACE_LAST_TYPE) &&
3595 			    !is_syscall_event(event))
3596 				return print_event_fields(iter, event);
3597 		}
3598 		return event->funcs->trace(iter, sym_flags, event);
3599 	}
3600 
3601 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3602 
3603 	return trace_handle_return(s);
3604 }
3605 
print_raw_fmt(struct trace_iterator * iter)3606 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3607 {
3608 	struct trace_array *tr = iter->tr;
3609 	struct trace_seq *s = &iter->seq;
3610 	struct trace_entry *entry;
3611 	struct trace_event *event;
3612 
3613 	entry = iter->ent;
3614 
3615 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
3616 		trace_seq_printf(s, "%d %d %llu ",
3617 				 entry->pid, iter->cpu, iter->ts);
3618 
3619 	if (trace_seq_has_overflowed(s))
3620 		return TRACE_TYPE_PARTIAL_LINE;
3621 
3622 	event = ftrace_find_event(entry->type);
3623 	if (event)
3624 		return event->funcs->raw(iter, 0, event);
3625 
3626 	trace_seq_printf(s, "%d ?\n", entry->type);
3627 
3628 	return trace_handle_return(s);
3629 }
3630 
print_hex_fmt(struct trace_iterator * iter)3631 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3632 {
3633 	struct trace_array *tr = iter->tr;
3634 	struct trace_seq *s = &iter->seq;
3635 	unsigned char newline = '\n';
3636 	struct trace_entry *entry;
3637 	struct trace_event *event;
3638 
3639 	entry = iter->ent;
3640 
3641 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3642 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3643 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3644 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3645 		if (trace_seq_has_overflowed(s))
3646 			return TRACE_TYPE_PARTIAL_LINE;
3647 	}
3648 
3649 	event = ftrace_find_event(entry->type);
3650 	if (event) {
3651 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3652 		if (ret != TRACE_TYPE_HANDLED)
3653 			return ret;
3654 	}
3655 
3656 	SEQ_PUT_FIELD(s, newline);
3657 
3658 	return trace_handle_return(s);
3659 }
3660 
print_bin_fmt(struct trace_iterator * iter)3661 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3662 {
3663 	struct trace_array *tr = iter->tr;
3664 	struct trace_seq *s = &iter->seq;
3665 	struct trace_entry *entry;
3666 	struct trace_event *event;
3667 
3668 	entry = iter->ent;
3669 
3670 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3671 		SEQ_PUT_FIELD(s, entry->pid);
3672 		SEQ_PUT_FIELD(s, iter->cpu);
3673 		SEQ_PUT_FIELD(s, iter->ts);
3674 		if (trace_seq_has_overflowed(s))
3675 			return TRACE_TYPE_PARTIAL_LINE;
3676 	}
3677 
3678 	event = ftrace_find_event(entry->type);
3679 	return event ? event->funcs->binary(iter, 0, event) :
3680 		TRACE_TYPE_HANDLED;
3681 }
3682 
trace_empty(struct trace_iterator * iter)3683 int trace_empty(struct trace_iterator *iter)
3684 {
3685 	struct ring_buffer_iter *buf_iter;
3686 	int cpu;
3687 
3688 	/* If we are looking at one CPU buffer, only check that one */
3689 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3690 		cpu = iter->cpu_file;
3691 		buf_iter = trace_buffer_iter(iter, cpu);
3692 		if (buf_iter) {
3693 			if (!ring_buffer_iter_empty(buf_iter))
3694 				return 0;
3695 		} else {
3696 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3697 				return 0;
3698 		}
3699 		return 1;
3700 	}
3701 
3702 	for_each_tracing_cpu(cpu) {
3703 		buf_iter = trace_buffer_iter(iter, cpu);
3704 		if (buf_iter) {
3705 			if (!ring_buffer_iter_empty(buf_iter))
3706 				return 0;
3707 		} else {
3708 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3709 				return 0;
3710 		}
3711 	}
3712 
3713 	return 1;
3714 }
3715 
3716 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3717 enum print_line_t print_trace_line(struct trace_iterator *iter)
3718 {
3719 	struct trace_array *tr = iter->tr;
3720 	unsigned long trace_flags = tr->trace_flags;
3721 	enum print_line_t ret;
3722 
3723 	if (iter->lost_events) {
3724 		if (iter->lost_events == (unsigned long)-1)
3725 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
3726 					 iter->cpu);
3727 		else
3728 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3729 					 iter->cpu, iter->lost_events);
3730 		if (trace_seq_has_overflowed(&iter->seq))
3731 			return TRACE_TYPE_PARTIAL_LINE;
3732 	}
3733 
3734 	if (iter->trace && iter->trace->print_line) {
3735 		ret = iter->trace->print_line(iter);
3736 		if (ret != TRACE_TYPE_UNHANDLED)
3737 			return ret;
3738 	}
3739 
3740 	if (iter->ent->type == TRACE_BPUTS &&
3741 			trace_flags & TRACE_ITER(PRINTK) &&
3742 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3743 		return trace_print_bputs_msg_only(iter);
3744 
3745 	if (iter->ent->type == TRACE_BPRINT &&
3746 			trace_flags & TRACE_ITER(PRINTK) &&
3747 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3748 		return trace_print_bprintk_msg_only(iter);
3749 
3750 	if (iter->ent->type == TRACE_PRINT &&
3751 			trace_flags & TRACE_ITER(PRINTK) &&
3752 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3753 		return trace_print_printk_msg_only(iter);
3754 
3755 	if (trace_flags & TRACE_ITER(BIN))
3756 		return print_bin_fmt(iter);
3757 
3758 	if (trace_flags & TRACE_ITER(HEX))
3759 		return print_hex_fmt(iter);
3760 
3761 	if (trace_flags & TRACE_ITER(RAW))
3762 		return print_raw_fmt(iter);
3763 
3764 	return print_trace_fmt(iter);
3765 }
3766 
trace_latency_header(struct seq_file * m)3767 void trace_latency_header(struct seq_file *m)
3768 {
3769 	struct trace_iterator *iter = m->private;
3770 	struct trace_array *tr = iter->tr;
3771 
3772 	/* print nothing if the buffers are empty */
3773 	if (trace_empty(iter))
3774 		return;
3775 
3776 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3777 		print_trace_header(m, iter);
3778 
3779 	if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
3780 		print_lat_help_header(m);
3781 }
3782 
trace_default_header(struct seq_file * m)3783 void trace_default_header(struct seq_file *m)
3784 {
3785 	struct trace_iterator *iter = m->private;
3786 	struct trace_array *tr = iter->tr;
3787 	unsigned long trace_flags = tr->trace_flags;
3788 
3789 	if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
3790 		return;
3791 
3792 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3793 		/* print nothing if the buffers are empty */
3794 		if (trace_empty(iter))
3795 			return;
3796 		print_trace_header(m, iter);
3797 		if (!(trace_flags & TRACE_ITER(VERBOSE)))
3798 			print_lat_help_header(m);
3799 	} else {
3800 		if (!(trace_flags & TRACE_ITER(VERBOSE))) {
3801 			if (trace_flags & TRACE_ITER(IRQ_INFO))
3802 				print_func_help_header_irq(iter->array_buffer,
3803 							   m, trace_flags);
3804 			else
3805 				print_func_help_header(iter->array_buffer, m,
3806 						       trace_flags);
3807 		}
3808 	}
3809 }
3810 
test_ftrace_alive(struct seq_file * m)3811 static void test_ftrace_alive(struct seq_file *m)
3812 {
3813 	if (!ftrace_is_dead())
3814 		return;
3815 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3816 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3817 }
3818 
3819 #ifdef CONFIG_TRACER_SNAPSHOT
show_snapshot_main_help(struct seq_file * m)3820 static void show_snapshot_main_help(struct seq_file *m)
3821 {
3822 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3823 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3824 		    "#                      Takes a snapshot of the main buffer.\n"
3825 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3826 		    "#                      (Doesn't have to be '2' works with any number that\n"
3827 		    "#                       is not a '0' or '1')\n");
3828 }
3829 
show_snapshot_percpu_help(struct seq_file * m)3830 static void show_snapshot_percpu_help(struct seq_file *m)
3831 {
3832 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3833 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3834 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3835 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3836 #else
3837 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3838 		    "#                     Must use main snapshot file to allocate.\n");
3839 #endif
3840 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3841 		    "#                      (Doesn't have to be '2' works with any number that\n"
3842 		    "#                       is not a '0' or '1')\n");
3843 }
3844 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3845 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3846 {
3847 	if (iter->tr->allocated_snapshot)
3848 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3849 	else
3850 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3851 
3852 	seq_puts(m, "# Snapshot commands:\n");
3853 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3854 		show_snapshot_main_help(m);
3855 	else
3856 		show_snapshot_percpu_help(m);
3857 }
3858 #else
3859 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3860 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3861 #endif
3862 
s_show(struct seq_file * m,void * v)3863 static int s_show(struct seq_file *m, void *v)
3864 {
3865 	struct trace_iterator *iter = v;
3866 	int ret;
3867 
3868 	if (iter->ent == NULL) {
3869 		if (iter->tr) {
3870 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3871 			seq_puts(m, "#\n");
3872 			test_ftrace_alive(m);
3873 		}
3874 		if (iter->snapshot && trace_empty(iter))
3875 			print_snapshot_help(m, iter);
3876 		else if (iter->trace && iter->trace->print_header)
3877 			iter->trace->print_header(m);
3878 		else
3879 			trace_default_header(m);
3880 
3881 	} else if (iter->leftover) {
3882 		/*
3883 		 * If we filled the seq_file buffer earlier, we
3884 		 * want to just show it now.
3885 		 */
3886 		ret = trace_print_seq(m, &iter->seq);
3887 
3888 		/* ret should this time be zero, but you never know */
3889 		iter->leftover = ret;
3890 
3891 	} else {
3892 		ret = print_trace_line(iter);
3893 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
3894 			iter->seq.full = 0;
3895 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
3896 		}
3897 		ret = trace_print_seq(m, &iter->seq);
3898 		/*
3899 		 * If we overflow the seq_file buffer, then it will
3900 		 * ask us for this data again at start up.
3901 		 * Use that instead.
3902 		 *  ret is 0 if seq_file write succeeded.
3903 		 *        -1 otherwise.
3904 		 */
3905 		iter->leftover = ret;
3906 	}
3907 
3908 	return 0;
3909 }
3910 
3911 /*
3912  * Should be used after trace_array_get(), trace_types_lock
3913  * ensures that i_cdev was already initialized.
3914  */
tracing_get_cpu(struct inode * inode)3915 int tracing_get_cpu(struct inode *inode)
3916 {
3917 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3918 		return (long)inode->i_cdev - 1;
3919 	return RING_BUFFER_ALL_CPUS;
3920 }
3921 
3922 static const struct seq_operations tracer_seq_ops = {
3923 	.start		= s_start,
3924 	.next		= s_next,
3925 	.stop		= s_stop,
3926 	.show		= s_show,
3927 };
3928 
3929 /*
3930  * Note, as iter itself can be allocated and freed in different
3931  * ways, this function is only used to free its content, and not
3932  * the iterator itself. The only requirement to all the allocations
3933  * is that it must zero all fields (kzalloc), as freeing works with
3934  * ethier allocated content or NULL.
3935  */
free_trace_iter_content(struct trace_iterator * iter)3936 static void free_trace_iter_content(struct trace_iterator *iter)
3937 {
3938 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
3939 	if (iter->fmt != static_fmt_buf)
3940 		kfree(iter->fmt);
3941 
3942 	kfree(iter->temp);
3943 	kfree(iter->buffer_iter);
3944 	mutex_destroy(&iter->mutex);
3945 	free_cpumask_var(iter->started);
3946 }
3947 
3948 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)3949 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3950 {
3951 	struct trace_array *tr = inode->i_private;
3952 	struct trace_iterator *iter;
3953 	int cpu;
3954 
3955 	if (tracing_disabled)
3956 		return ERR_PTR(-ENODEV);
3957 
3958 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3959 	if (!iter)
3960 		return ERR_PTR(-ENOMEM);
3961 
3962 	iter->buffer_iter = kzalloc_objs(*iter->buffer_iter, nr_cpu_ids);
3963 	if (!iter->buffer_iter)
3964 		goto release;
3965 
3966 	/*
3967 	 * trace_find_next_entry() may need to save off iter->ent.
3968 	 * It will place it into the iter->temp buffer. As most
3969 	 * events are less than 128, allocate a buffer of that size.
3970 	 * If one is greater, then trace_find_next_entry() will
3971 	 * allocate a new buffer to adjust for the bigger iter->ent.
3972 	 * It's not critical if it fails to get allocated here.
3973 	 */
3974 	iter->temp = kmalloc(128, GFP_KERNEL);
3975 	if (iter->temp)
3976 		iter->temp_size = 128;
3977 
3978 	/*
3979 	 * trace_event_printf() may need to modify given format
3980 	 * string to replace %p with %px so that it shows real address
3981 	 * instead of hash value. However, that is only for the event
3982 	 * tracing, other tracer may not need. Defer the allocation
3983 	 * until it is needed.
3984 	 */
3985 	iter->fmt = NULL;
3986 	iter->fmt_size = 0;
3987 
3988 	mutex_lock(&trace_types_lock);
3989 	iter->trace = tr->current_trace;
3990 
3991 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3992 		goto fail;
3993 
3994 	iter->tr = tr;
3995 
3996 #ifdef CONFIG_TRACER_SNAPSHOT
3997 	/* Currently only the top directory has a snapshot */
3998 	if (tr->current_trace->print_max || snapshot)
3999 		iter->array_buffer = &tr->snapshot_buffer;
4000 	else
4001 #endif
4002 		iter->array_buffer = &tr->array_buffer;
4003 	iter->snapshot = snapshot;
4004 	iter->pos = -1;
4005 	iter->cpu_file = tracing_get_cpu(inode);
4006 	mutex_init(&iter->mutex);
4007 
4008 	/* Notify the tracer early; before we stop tracing. */
4009 	if (iter->trace->open)
4010 		iter->trace->open(iter);
4011 
4012 	/* Annotate start of buffers if we had overruns */
4013 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4014 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4015 
4016 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4017 	if (trace_clocks[tr->clock_id].in_ns)
4018 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4019 
4020 	/*
4021 	 * If pause-on-trace is enabled, then stop the trace while
4022 	 * dumping, unless this is the "snapshot" file
4023 	 */
4024 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
4025 		iter->iter_flags |= TRACE_FILE_PAUSE;
4026 		tracing_stop_tr(tr);
4027 	}
4028 
4029 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4030 		for_each_tracing_cpu(cpu) {
4031 			iter->buffer_iter[cpu] =
4032 				ring_buffer_read_start(iter->array_buffer->buffer,
4033 						       cpu, GFP_KERNEL);
4034 			tracing_iter_reset(iter, cpu);
4035 		}
4036 	} else {
4037 		cpu = iter->cpu_file;
4038 		iter->buffer_iter[cpu] =
4039 			ring_buffer_read_start(iter->array_buffer->buffer,
4040 					       cpu, GFP_KERNEL);
4041 		tracing_iter_reset(iter, cpu);
4042 	}
4043 
4044 	mutex_unlock(&trace_types_lock);
4045 
4046 	return iter;
4047 
4048  fail:
4049 	mutex_unlock(&trace_types_lock);
4050 	free_trace_iter_content(iter);
4051 release:
4052 	seq_release_private(inode, file);
4053 	return ERR_PTR(-ENOMEM);
4054 }
4055 
tracing_open_generic(struct inode * inode,struct file * filp)4056 int tracing_open_generic(struct inode *inode, struct file *filp)
4057 {
4058 	int ret;
4059 
4060 	ret = tracing_check_open_get_tr(NULL);
4061 	if (ret)
4062 		return ret;
4063 
4064 	filp->private_data = inode->i_private;
4065 	return 0;
4066 }
4067 
4068 /*
4069  * Open and update trace_array ref count.
4070  * Must have the current trace_array passed to it.
4071  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4072 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4073 {
4074 	struct trace_array *tr = inode->i_private;
4075 	int ret;
4076 
4077 	ret = tracing_check_open_get_tr(tr);
4078 	if (ret)
4079 		return ret;
4080 
4081 	if ((filp->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) {
4082 		trace_array_put(tr);
4083 		return -EACCES;
4084 	}
4085 
4086 	filp->private_data = inode->i_private;
4087 
4088 	return 0;
4089 }
4090 
4091 /*
4092  * The private pointer of the inode is the trace_event_file.
4093  * Update the tr ref count associated to it.
4094  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4095 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4096 {
4097 	struct trace_event_file *file = inode->i_private;
4098 	int ret;
4099 
4100 	ret = tracing_check_open_get_tr(file->tr);
4101 	if (ret)
4102 		return ret;
4103 
4104 	guard(mutex)(&event_mutex);
4105 
4106 	/* Fail if the file is marked for removal */
4107 	if (file->flags & EVENT_FILE_FL_FREED) {
4108 		trace_array_put(file->tr);
4109 		return -ENODEV;
4110 	} else {
4111 		event_file_get(file);
4112 	}
4113 
4114 	filp->private_data = inode->i_private;
4115 
4116 	return 0;
4117 }
4118 
tracing_release_file_tr(struct inode * inode,struct file * filp)4119 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4120 {
4121 	struct trace_event_file *file = inode->i_private;
4122 
4123 	trace_array_put(file->tr);
4124 	event_file_put(file);
4125 
4126 	return 0;
4127 }
4128 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4129 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4130 {
4131 	tracing_release_file_tr(inode, filp);
4132 	return single_release(inode, filp);
4133 }
4134 
tracing_release(struct inode * inode,struct file * file)4135 static int tracing_release(struct inode *inode, struct file *file)
4136 {
4137 	struct trace_array *tr = inode->i_private;
4138 	struct seq_file *m = file->private_data;
4139 	struct trace_iterator *iter;
4140 	int cpu;
4141 
4142 	if (!(file->f_mode & FMODE_READ)) {
4143 		trace_array_put(tr);
4144 		return 0;
4145 	}
4146 
4147 	/* Writes do not use seq_file */
4148 	iter = m->private;
4149 	mutex_lock(&trace_types_lock);
4150 
4151 	for_each_tracing_cpu(cpu) {
4152 		if (iter->buffer_iter[cpu])
4153 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4154 	}
4155 
4156 	if (iter->trace && iter->trace->close)
4157 		iter->trace->close(iter);
4158 
4159 	if (iter->iter_flags & TRACE_FILE_PAUSE)
4160 		/* reenable tracing if it was previously enabled */
4161 		tracing_start_tr(tr);
4162 
4163 	__trace_array_put(tr);
4164 
4165 	mutex_unlock(&trace_types_lock);
4166 
4167 	free_trace_iter_content(iter);
4168 	seq_release_private(inode, file);
4169 
4170 	return 0;
4171 }
4172 
tracing_release_generic_tr(struct inode * inode,struct file * file)4173 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4174 {
4175 	struct trace_array *tr = inode->i_private;
4176 
4177 	trace_array_put(tr);
4178 	return 0;
4179 }
4180 
tracing_single_release_tr(struct inode * inode,struct file * file)4181 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4182 {
4183 	struct trace_array *tr = inode->i_private;
4184 
4185 	trace_array_put(tr);
4186 
4187 	return single_release(inode, file);
4188 }
4189 
4190 static bool update_last_data_if_empty(struct trace_array *tr);
4191 
tracing_open(struct inode * inode,struct file * file)4192 static int tracing_open(struct inode *inode, struct file *file)
4193 {
4194 	struct trace_array *tr = inode->i_private;
4195 	struct trace_iterator *iter;
4196 	int ret;
4197 
4198 	ret = tracing_check_open_get_tr(tr);
4199 	if (ret)
4200 		return ret;
4201 
4202 	/* If this file was open for write, then erase contents */
4203 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4204 		int cpu = tracing_get_cpu(inode);
4205 		struct array_buffer *trace_buf = &tr->array_buffer;
4206 
4207 #ifdef CONFIG_TRACER_MAX_TRACE
4208 		if (tr->current_trace->print_max)
4209 			trace_buf = &tr->snapshot_buffer;
4210 #endif
4211 
4212 		if (cpu == RING_BUFFER_ALL_CPUS)
4213 			tracing_reset_online_cpus(trace_buf);
4214 		else
4215 			tracing_reset_cpu(trace_buf, cpu);
4216 
4217 		update_last_data_if_empty(tr);
4218 	}
4219 
4220 	if (file->f_mode & FMODE_READ) {
4221 		iter = __tracing_open(inode, file, false);
4222 		if (IS_ERR(iter))
4223 			ret = PTR_ERR(iter);
4224 		else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
4225 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4226 	}
4227 
4228 	if (ret < 0)
4229 		trace_array_put(tr);
4230 
4231 	return ret;
4232 }
4233 
4234 /*
4235  * Some tracers are not suitable for instance buffers.
4236  * A tracer is always available for the global array (toplevel)
4237  * or if it explicitly states that it is.
4238  */
4239 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4240 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4241 {
4242 	/* arrays with mapped buffer range do not have snapshots */
4243 	if (tr->range_addr_start && tracer_uses_snapshot(t))
4244 		return false;
4245 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4246 }
4247 
4248 /* Find the next tracer that this trace array may use */
4249 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4250 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4251 {
4252 	while (t && !trace_ok_for_array(t, tr))
4253 		t = t->next;
4254 
4255 	return t;
4256 }
4257 
4258 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4259 t_next(struct seq_file *m, void *v, loff_t *pos)
4260 {
4261 	struct trace_array *tr = m->private;
4262 	struct tracer *t = v;
4263 
4264 	(*pos)++;
4265 
4266 	if (t)
4267 		t = get_tracer_for_array(tr, t->next);
4268 
4269 	return t;
4270 }
4271 
t_start(struct seq_file * m,loff_t * pos)4272 static void *t_start(struct seq_file *m, loff_t *pos)
4273 {
4274 	struct trace_array *tr = m->private;
4275 	struct tracer *t;
4276 	loff_t l = 0;
4277 
4278 	mutex_lock(&trace_types_lock);
4279 
4280 	t = get_tracer_for_array(tr, trace_types);
4281 	for (; t && l < *pos; t = t_next(m, t, &l))
4282 			;
4283 
4284 	return t;
4285 }
4286 
t_stop(struct seq_file * m,void * p)4287 static void t_stop(struct seq_file *m, void *p)
4288 {
4289 	mutex_unlock(&trace_types_lock);
4290 }
4291 
t_show(struct seq_file * m,void * v)4292 static int t_show(struct seq_file *m, void *v)
4293 {
4294 	struct tracer *t = v;
4295 
4296 	if (!t)
4297 		return 0;
4298 
4299 	seq_puts(m, t->name);
4300 	if (t->next)
4301 		seq_putc(m, ' ');
4302 	else
4303 		seq_putc(m, '\n');
4304 
4305 	return 0;
4306 }
4307 
4308 static const struct seq_operations show_traces_seq_ops = {
4309 	.start		= t_start,
4310 	.next		= t_next,
4311 	.stop		= t_stop,
4312 	.show		= t_show,
4313 };
4314 
show_traces_open(struct inode * inode,struct file * file)4315 static int show_traces_open(struct inode *inode, struct file *file)
4316 {
4317 	struct trace_array *tr = inode->i_private;
4318 	struct seq_file *m;
4319 	int ret;
4320 
4321 	ret = tracing_check_open_get_tr(tr);
4322 	if (ret)
4323 		return ret;
4324 
4325 	ret = seq_open(file, &show_traces_seq_ops);
4326 	if (ret) {
4327 		trace_array_put(tr);
4328 		return ret;
4329 	}
4330 
4331 	m = file->private_data;
4332 	m->private = tr;
4333 
4334 	return 0;
4335 }
4336 
tracing_seq_release(struct inode * inode,struct file * file)4337 static int tracing_seq_release(struct inode *inode, struct file *file)
4338 {
4339 	struct trace_array *tr = inode->i_private;
4340 
4341 	trace_array_put(tr);
4342 	return seq_release(inode, file);
4343 }
4344 
4345 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4346 tracing_write_stub(struct file *filp, const char __user *ubuf,
4347 		   size_t count, loff_t *ppos)
4348 {
4349 	return count;
4350 }
4351 
tracing_lseek(struct file * file,loff_t offset,int whence)4352 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4353 {
4354 	int ret;
4355 
4356 	if (file->f_mode & FMODE_READ)
4357 		ret = seq_lseek(file, offset, whence);
4358 	else
4359 		file->f_pos = ret = 0;
4360 
4361 	return ret;
4362 }
4363 
4364 static const struct file_operations tracing_fops = {
4365 	.open		= tracing_open,
4366 	.read		= seq_read,
4367 	.read_iter	= seq_read_iter,
4368 	.splice_read	= copy_splice_read,
4369 	.write		= tracing_write_stub,
4370 	.llseek		= tracing_lseek,
4371 	.release	= tracing_release,
4372 };
4373 
4374 static const struct file_operations show_traces_fops = {
4375 	.open		= show_traces_open,
4376 	.read		= seq_read,
4377 	.llseek		= seq_lseek,
4378 	.release	= tracing_seq_release,
4379 };
4380 
4381 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4382 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4383 		     size_t count, loff_t *ppos)
4384 {
4385 	struct trace_array *tr = file_inode(filp)->i_private;
4386 	char *mask_str __free(kfree) = NULL;
4387 	int len;
4388 
4389 	len = snprintf(NULL, 0, "%*pb\n",
4390 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4391 	mask_str = kmalloc(len, GFP_KERNEL);
4392 	if (!mask_str)
4393 		return -ENOMEM;
4394 
4395 	len = snprintf(mask_str, len, "%*pb\n",
4396 		       cpumask_pr_args(tr->tracing_cpumask));
4397 	if (len >= count)
4398 		return -EINVAL;
4399 
4400 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4401 }
4402 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)4403 int tracing_set_cpumask(struct trace_array *tr,
4404 			cpumask_var_t tracing_cpumask_new)
4405 {
4406 	int cpu;
4407 
4408 	if (!tr)
4409 		return -EINVAL;
4410 
4411 	local_irq_disable();
4412 	arch_spin_lock(&tr->max_lock);
4413 	for_each_tracing_cpu(cpu) {
4414 		/*
4415 		 * Increase/decrease the disabled counter if we are
4416 		 * about to flip a bit in the cpumask:
4417 		 */
4418 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4419 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4420 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4421 #ifdef CONFIG_TRACER_SNAPSHOT
4422 			ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu);
4423 #endif
4424 		}
4425 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4426 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4427 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4428 #ifdef CONFIG_TRACER_SNAPSHOT
4429 			ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu);
4430 #endif
4431 		}
4432 	}
4433 	arch_spin_unlock(&tr->max_lock);
4434 	local_irq_enable();
4435 
4436 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4437 
4438 	return 0;
4439 }
4440 
4441 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4442 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4443 		      size_t count, loff_t *ppos)
4444 {
4445 	struct trace_array *tr = file_inode(filp)->i_private;
4446 	cpumask_var_t tracing_cpumask_new;
4447 	int err;
4448 
4449 	if (count == 0 || count > KMALLOC_MAX_SIZE)
4450 		return -EINVAL;
4451 
4452 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4453 		return -ENOMEM;
4454 
4455 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4456 	if (err)
4457 		goto err_free;
4458 
4459 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4460 	if (err)
4461 		goto err_free;
4462 
4463 	free_cpumask_var(tracing_cpumask_new);
4464 
4465 	return count;
4466 
4467 err_free:
4468 	free_cpumask_var(tracing_cpumask_new);
4469 
4470 	return err;
4471 }
4472 
4473 static const struct file_operations tracing_cpumask_fops = {
4474 	.open		= tracing_open_generic_tr,
4475 	.read		= tracing_cpumask_read,
4476 	.write		= tracing_cpumask_write,
4477 	.release	= tracing_release_generic_tr,
4478 	.llseek		= generic_file_llseek,
4479 };
4480 
tracing_trace_options_show(struct seq_file * m,void * v)4481 static int tracing_trace_options_show(struct seq_file *m, void *v)
4482 {
4483 	struct tracer_opt *trace_opts;
4484 	struct trace_array *tr = m->private;
4485 	struct tracer_flags *flags;
4486 	u32 tracer_flags;
4487 	int i;
4488 
4489 	guard(mutex)(&trace_types_lock);
4490 
4491 	for (i = 0; trace_options[i]; i++) {
4492 		if (tr->trace_flags & (1ULL << i))
4493 			seq_printf(m, "%s\n", trace_options[i]);
4494 		else
4495 			seq_printf(m, "no%s\n", trace_options[i]);
4496 	}
4497 
4498 	flags = tr->current_trace_flags;
4499 	if (!flags || !flags->opts)
4500 		return 0;
4501 
4502 	tracer_flags = flags->val;
4503 	trace_opts = flags->opts;
4504 
4505 	for (i = 0; trace_opts[i].name; i++) {
4506 		if (tracer_flags & trace_opts[i].bit)
4507 			seq_printf(m, "%s\n", trace_opts[i].name);
4508 		else
4509 			seq_printf(m, "no%s\n", trace_opts[i].name);
4510 	}
4511 
4512 	return 0;
4513 }
4514 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4515 static int __set_tracer_option(struct trace_array *tr,
4516 			       struct tracer_flags *tracer_flags,
4517 			       struct tracer_opt *opts, int neg)
4518 {
4519 	struct tracer *trace = tracer_flags->trace;
4520 	int ret = 0;
4521 
4522 	if (trace->set_flag)
4523 		ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4524 	if (ret)
4525 		return ret;
4526 
4527 	if (neg)
4528 		tracer_flags->val &= ~opts->bit;
4529 	else
4530 		tracer_flags->val |= opts->bit;
4531 	return 0;
4532 }
4533 
4534 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4535 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4536 {
4537 	struct tracer_flags *tracer_flags = tr->current_trace_flags;
4538 	struct tracer_opt *opts = NULL;
4539 	int i;
4540 
4541 	if (!tracer_flags || !tracer_flags->opts)
4542 		return 0;
4543 
4544 	for (i = 0; tracer_flags->opts[i].name; i++) {
4545 		opts = &tracer_flags->opts[i];
4546 
4547 		if (strcmp(cmp, opts->name) == 0)
4548 			return __set_tracer_option(tr, tracer_flags, opts, neg);
4549 	}
4550 
4551 	return -EINVAL;
4552 }
4553 
4554 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u64 mask,int set)4555 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
4556 {
4557 	if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
4558 		return -1;
4559 
4560 	return 0;
4561 }
4562 
set_tracer_flag(struct trace_array * tr,u64 mask,int enabled)4563 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
4564 {
4565 	switch (mask) {
4566 	case TRACE_ITER(RECORD_TGID):
4567 	case TRACE_ITER(RECORD_CMD):
4568 	case TRACE_ITER(TRACE_PRINTK):
4569 	case TRACE_ITER(COPY_MARKER):
4570 		lockdep_assert_held(&event_mutex);
4571 	}
4572 
4573 	/* do nothing if flag is already set */
4574 	if (!!(tr->trace_flags & mask) == !!enabled)
4575 		return 0;
4576 
4577 	/* Give the tracer a chance to approve the change */
4578 	if (tr->current_trace->flag_changed)
4579 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4580 			return -EINVAL;
4581 
4582 	switch (mask) {
4583 	case TRACE_ITER(TRACE_PRINTK):
4584 		if (enabled) {
4585 			update_printk_trace(tr);
4586 		} else {
4587 			/*
4588 			 * The global_trace cannot clear this.
4589 			 * It's flag only gets cleared if another instance sets it.
4590 			 */
4591 			if (printk_trace == &global_trace)
4592 				return -EINVAL;
4593 			/*
4594 			 * An instance must always have it set.
4595 			 * by default, that's the global_trace instance.
4596 			 */
4597 			if (printk_trace == tr)
4598 				update_printk_trace(&global_trace);
4599 		}
4600 		break;
4601 
4602 	case TRACE_ITER(COPY_MARKER):
4603 		update_marker_trace(tr, enabled);
4604 		/* update_marker_trace updates the tr->trace_flags */
4605 		return 0;
4606 	}
4607 
4608 	if (enabled)
4609 		tr->trace_flags |= mask;
4610 	else
4611 		tr->trace_flags &= ~mask;
4612 
4613 	switch (mask) {
4614 	case TRACE_ITER(RECORD_CMD):
4615 		trace_event_enable_cmd_record(enabled);
4616 		break;
4617 
4618 	case TRACE_ITER(RECORD_TGID):
4619 
4620 		if (trace_alloc_tgid_map() < 0) {
4621 			tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
4622 			return -ENOMEM;
4623 		}
4624 
4625 		trace_event_enable_tgid_record(enabled);
4626 		break;
4627 
4628 	case TRACE_ITER(EVENT_FORK):
4629 		trace_event_follow_fork(tr, enabled);
4630 		break;
4631 
4632 	case TRACE_ITER(FUNC_FORK):
4633 		ftrace_pid_follow_fork(tr, enabled);
4634 		break;
4635 
4636 	case TRACE_ITER(OVERWRITE):
4637 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4638 #ifdef CONFIG_TRACER_SNAPSHOT
4639 		ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled);
4640 #endif
4641 		break;
4642 
4643 	case TRACE_ITER(PRINTK):
4644 		trace_printk_start_stop_comm(enabled);
4645 		trace_printk_control(enabled);
4646 		break;
4647 
4648 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
4649 	case TRACE_GRAPH_GRAPH_TIME:
4650 		ftrace_graph_graph_time_control(enabled);
4651 		break;
4652 #endif
4653 	}
4654 
4655 	return 0;
4656 }
4657 
trace_set_options(struct trace_array * tr,char * option)4658 int trace_set_options(struct trace_array *tr, char *option)
4659 {
4660 	char *cmp;
4661 	int neg = 0;
4662 	int ret;
4663 	size_t orig_len = strlen(option);
4664 	int len;
4665 
4666 	cmp = strstrip(option);
4667 
4668 	len = str_has_prefix(cmp, "no");
4669 	if (len)
4670 		neg = 1;
4671 
4672 	cmp += len;
4673 
4674 	mutex_lock(&event_mutex);
4675 	mutex_lock(&trace_types_lock);
4676 
4677 	ret = match_string(trace_options, -1, cmp);
4678 	/* If no option could be set, test the specific tracer options */
4679 	if (ret < 0)
4680 		ret = set_tracer_option(tr, cmp, neg);
4681 	else
4682 		ret = set_tracer_flag(tr, 1ULL << ret, !neg);
4683 
4684 	mutex_unlock(&trace_types_lock);
4685 	mutex_unlock(&event_mutex);
4686 
4687 	/*
4688 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4689 	 * turn it back into a space.
4690 	 */
4691 	if (orig_len > strlen(option))
4692 		option[strlen(option)] = ' ';
4693 
4694 	return ret;
4695 }
4696 
apply_trace_boot_options(void)4697 static void __init apply_trace_boot_options(void)
4698 {
4699 	char *buf = trace_boot_options_buf;
4700 	char *option;
4701 
4702 	while (true) {
4703 		option = strsep(&buf, ",");
4704 
4705 		if (!option)
4706 			break;
4707 
4708 		if (*option)
4709 			trace_set_options(&global_trace, option);
4710 
4711 		/* Put back the comma to allow this to be called again */
4712 		if (buf)
4713 			*(buf - 1) = ',';
4714 	}
4715 }
4716 
4717 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4718 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4719 			size_t cnt, loff_t *ppos)
4720 {
4721 	struct seq_file *m = filp->private_data;
4722 	struct trace_array *tr = m->private;
4723 	char buf[64];
4724 	int ret;
4725 
4726 	if (cnt >= sizeof(buf))
4727 		return -EINVAL;
4728 
4729 	if (copy_from_user(buf, ubuf, cnt))
4730 		return -EFAULT;
4731 
4732 	buf[cnt] = 0;
4733 
4734 	ret = trace_set_options(tr, buf);
4735 	if (ret < 0)
4736 		return ret;
4737 
4738 	*ppos += cnt;
4739 
4740 	return cnt;
4741 }
4742 
tracing_trace_options_open(struct inode * inode,struct file * file)4743 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4744 {
4745 	struct trace_array *tr = inode->i_private;
4746 	int ret;
4747 
4748 	ret = tracing_check_open_get_tr(tr);
4749 	if (ret)
4750 		return ret;
4751 
4752 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4753 	if (ret < 0)
4754 		trace_array_put(tr);
4755 
4756 	return ret;
4757 }
4758 
4759 static const struct file_operations tracing_iter_fops = {
4760 	.open		= tracing_trace_options_open,
4761 	.read		= seq_read,
4762 	.llseek		= seq_lseek,
4763 	.release	= tracing_single_release_tr,
4764 	.write		= tracing_trace_options_write,
4765 };
4766 
4767 static const char readme_msg[] =
4768 	"tracing mini-HOWTO:\n\n"
4769 	"By default tracefs removes all OTH file permission bits.\n"
4770 	"When mounting tracefs an optional group id can be specified\n"
4771 	"which adds the group to every directory and file in tracefs:\n\n"
4772 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
4773 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4774 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4775 	" Important files:\n"
4776 	"  trace\t\t\t- The static contents of the buffer\n"
4777 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4778 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4779 	"  current_tracer\t- function and latency tracers\n"
4780 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4781 	"  error_log\t- error log for failed commands (that support it)\n"
4782 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4783 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4784 	"  trace_clock\t\t- change the clock used to order events\n"
4785 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4786 	"      global:   Synced across CPUs but slows tracing down.\n"
4787 	"     counter:   Not a clock, but just an increment\n"
4788 	"      uptime:   Jiffy counter from time of boot\n"
4789 	"        perf:   Same clock that perf events use\n"
4790 #ifdef CONFIG_X86_64
4791 	"     x86-tsc:   TSC cycle counter\n"
4792 #endif
4793 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
4794 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4795 	"    absolute:   Absolute (standalone) timestamp\n"
4796 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4797 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4798 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4799 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4800 	"\t\t\t  Remove sub-buffer with rmdir\n"
4801 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4802 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4803 	"\t\t\t  option name\n"
4804 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4805 #ifdef CONFIG_DYNAMIC_FTRACE
4806 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4807 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4808 	"\t\t\t  functions\n"
4809 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4810 	"\t     modules: Can select a group via module\n"
4811 	"\t      Format: :mod:<module-name>\n"
4812 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4813 	"\t    triggers: a command to perform when function is hit\n"
4814 	"\t      Format: <function>:<trigger>[:count]\n"
4815 	"\t     trigger: traceon, traceoff\n"
4816 	"\t\t      enable_event:<system>:<event>\n"
4817 	"\t\t      disable_event:<system>:<event>\n"
4818 #ifdef CONFIG_STACKTRACE
4819 	"\t\t      stacktrace\n"
4820 #endif
4821 #ifdef CONFIG_TRACER_SNAPSHOT
4822 	"\t\t      snapshot\n"
4823 #endif
4824 	"\t\t      dump\n"
4825 	"\t\t      cpudump\n"
4826 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4827 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4828 	"\t     The first one will disable tracing every time do_fault is hit\n"
4829 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4830 	"\t       The first time do trap is hit and it disables tracing, the\n"
4831 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4832 	"\t       the counter will not decrement. It only decrements when the\n"
4833 	"\t       trigger did work\n"
4834 	"\t     To remove trigger without count:\n"
4835 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4836 	"\t     To remove trigger with a count:\n"
4837 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4838 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4839 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4840 	"\t    modules: Can select a group via module command :mod:\n"
4841 	"\t    Does not accept triggers\n"
4842 #endif /* CONFIG_DYNAMIC_FTRACE */
4843 #ifdef CONFIG_FUNCTION_TRACER
4844 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4845 	"\t\t    (function)\n"
4846 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
4847 	"\t\t    (function)\n"
4848 #endif
4849 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4850 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4851 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4852 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4853 #endif
4854 #ifdef CONFIG_TRACER_SNAPSHOT
4855 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4856 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4857 	"\t\t\t  information\n"
4858 #endif
4859 #ifdef CONFIG_STACK_TRACER
4860 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4861 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4862 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4863 	"\t\t\t  new trace)\n"
4864 #ifdef CONFIG_DYNAMIC_FTRACE
4865 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4866 	"\t\t\t  traces\n"
4867 #endif
4868 #endif /* CONFIG_STACK_TRACER */
4869 #ifdef CONFIG_DYNAMIC_EVENTS
4870 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4871 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4872 #endif
4873 #ifdef CONFIG_KPROBE_EVENTS
4874 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4875 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4876 #endif
4877 #ifdef CONFIG_UPROBE_EVENTS
4878 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4879 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4880 #endif
4881 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
4882     defined(CONFIG_FPROBE_EVENTS)
4883 	"\t  accepts: event-definitions (one definition per line)\n"
4884 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4885 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
4886 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
4887 #endif
4888 #ifdef CONFIG_FPROBE_EVENTS
4889 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
4890 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
4891 #endif
4892 #ifdef CONFIG_HIST_TRIGGERS
4893 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4894 #endif
4895 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
4896 	"\t           -:[<group>/][<event>]\n"
4897 #ifdef CONFIG_KPROBE_EVENTS
4898 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4899   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
4900 #endif
4901 #ifdef CONFIG_UPROBE_EVENTS
4902   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
4903 #endif
4904 	"\t     args: <name>=fetcharg[:type]\n"
4905 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
4906 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4907 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4908 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
4909 	"\t           <argname>[->field[->field|.field...]],\n"
4910 #endif
4911 #else
4912 	"\t           $stack<index>, $stack, $retval, $comm,\n"
4913 #endif
4914 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4915 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
4916 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
4917 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4918 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
4919 #ifdef CONFIG_HIST_TRIGGERS
4920 	"\t    field: <stype> <name>;\n"
4921 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4922 	"\t           [unsigned] char/int/long\n"
4923 #endif
4924 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
4925 	"\t            of the <attached-group>/<attached-event>.\n"
4926 #endif
4927 	"  set_event\t\t- Enables events by name written into it\n"
4928 	"\t\t\t  Can enable module events via: :mod:<module>\n"
4929 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4930 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4931 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4932 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4933 	"\t\t\t  events\n"
4934 	"      filter\t\t- If set, only events passing filter are traced\n"
4935 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4936 	"\t\t\t  <event>:\n"
4937 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4938 	"      filter\t\t- If set, only events passing filter are traced\n"
4939 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4940 	"\t    Format: <trigger>[:count][if <filter>]\n"
4941 	"\t   trigger: traceon, traceoff\n"
4942 	"\t            enable_event:<system>:<event>\n"
4943 	"\t            disable_event:<system>:<event>\n"
4944 #ifdef CONFIG_HIST_TRIGGERS
4945 	"\t            enable_hist:<system>:<event>\n"
4946 	"\t            disable_hist:<system>:<event>\n"
4947 #endif
4948 #ifdef CONFIG_STACKTRACE
4949 	"\t\t    stacktrace\n"
4950 #endif
4951 #ifdef CONFIG_TRACER_SNAPSHOT
4952 	"\t\t    snapshot\n"
4953 #endif
4954 #ifdef CONFIG_HIST_TRIGGERS
4955 	"\t\t    hist (see below)\n"
4956 #endif
4957 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4958 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4959 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4960 	"\t                  events/block/block_unplug/trigger\n"
4961 	"\t   The first disables tracing every time block_unplug is hit.\n"
4962 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4963 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4964 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4965 	"\t   Like function triggers, the counter is only decremented if it\n"
4966 	"\t    enabled or disabled tracing.\n"
4967 	"\t   To remove a trigger without a count:\n"
4968 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4969 	"\t   To remove a trigger with a count:\n"
4970 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4971 	"\t   Filters can be ignored when removing a trigger.\n"
4972 #ifdef CONFIG_HIST_TRIGGERS
4973 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4974 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4975 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
4976 	"\t            [:values=<field1[,field2,...]>]\n"
4977 	"\t            [:sort=<field1[,field2,...]>]\n"
4978 	"\t            [:size=#entries]\n"
4979 	"\t            [:pause][:continue][:clear]\n"
4980 	"\t            [:name=histname1]\n"
4981 	"\t            [:nohitcount]\n"
4982 	"\t            [:<handler>.<action>]\n"
4983 	"\t            [if <filter>]\n\n"
4984 	"\t    Note, special fields can be used as well:\n"
4985 	"\t            common_timestamp - to record current timestamp\n"
4986 	"\t            common_cpu - to record the CPU the event happened on\n"
4987 	"\n"
4988 	"\t    A hist trigger variable can be:\n"
4989 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
4990 	"\t        - a reference to another variable e.g. y=$x,\n"
4991 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
4992 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
4993 	"\n"
4994 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
4995 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
4996 	"\t    variable reference, field or numeric literal.\n"
4997 	"\n"
4998 	"\t    When a matching event is hit, an entry is added to a hash\n"
4999 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5000 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5001 	"\t    correspond to fields in the event's format description.  Keys\n"
5002 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5003 	"\t    Compound keys consisting of up to two fields can be specified\n"
5004 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5005 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5006 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5007 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5008 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5009 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5010 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5011 	"\t    its histogram data will be shared with other triggers of the\n"
5012 	"\t    same name, and trigger hits will update this common data.\n\n"
5013 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5014 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5015 	"\t    triggers attached to an event, there will be a table for each\n"
5016 	"\t    trigger in the output.  The table displayed for a named\n"
5017 	"\t    trigger will be the same as any other instance having the\n"
5018 	"\t    same name.  The default format used to display a given field\n"
5019 	"\t    can be modified by appending any of the following modifiers\n"
5020 	"\t    to the field name, as applicable:\n\n"
5021 	"\t            .hex        display a number as a hex value\n"
5022 	"\t            .sym        display an address as a symbol\n"
5023 	"\t            .sym-offset display an address as a symbol and offset\n"
5024 	"\t            .execname   display a common_pid as a program name\n"
5025 	"\t            .syscall    display a syscall id as a syscall name\n"
5026 	"\t            .log2       display log2 value rather than raw number\n"
5027 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5028 	"\t            .usecs      display a common_timestamp in microseconds\n"
5029 	"\t            .percent    display a number of percentage value\n"
5030 	"\t            .graph      display a bar-graph of a value\n\n"
5031 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5032 	"\t    trigger or to start a hist trigger but not log any events\n"
5033 	"\t    until told to do so.  'continue' can be used to start or\n"
5034 	"\t    restart a paused hist trigger.\n\n"
5035 	"\t    The 'clear' parameter will clear the contents of a running\n"
5036 	"\t    hist trigger and leave its current paused/active state\n"
5037 	"\t    unchanged.\n\n"
5038 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5039 	"\t    raw hitcount in the histogram.\n\n"
5040 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5041 	"\t    have one event conditionally start and stop another event's\n"
5042 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5043 	"\t    the enable_event and disable_event triggers.\n\n"
5044 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5045 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5046 	"\t        <handler>.<action>\n\n"
5047 	"\t    The available handlers are:\n\n"
5048 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5049 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5050 	"\t        onchange(var)            - invoke action if var changes\n\n"
5051 	"\t    The available actions are:\n\n"
5052 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5053 	"\t        save(field,...)                      - save current event fields\n"
5054 #ifdef CONFIG_TRACER_SNAPSHOT
5055 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5056 #endif
5057 #ifdef CONFIG_SYNTH_EVENTS
5058 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5059 	"\t  Write into this file to define/undefine new synthetic events.\n"
5060 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5061 #endif
5062 #endif
5063 ;
5064 
5065 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5066 tracing_readme_read(struct file *filp, char __user *ubuf,
5067 		       size_t cnt, loff_t *ppos)
5068 {
5069 	return simple_read_from_buffer(ubuf, cnt, ppos,
5070 					readme_msg, strlen(readme_msg));
5071 }
5072 
5073 static const struct file_operations tracing_readme_fops = {
5074 	.open		= tracing_open_generic,
5075 	.read		= tracing_readme_read,
5076 	.llseek		= generic_file_llseek,
5077 };
5078 
5079 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5080 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5081 update_eval_map(union trace_eval_map_item *ptr)
5082 {
5083 	if (!ptr->map.eval_string) {
5084 		if (ptr->tail.next) {
5085 			ptr = ptr->tail.next;
5086 			/* Set ptr to the next real item (skip head) */
5087 			ptr++;
5088 		} else
5089 			return NULL;
5090 	}
5091 	return ptr;
5092 }
5093 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5094 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5095 {
5096 	union trace_eval_map_item *ptr = v;
5097 
5098 	/*
5099 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5100 	 * This really should never happen.
5101 	 */
5102 	(*pos)++;
5103 	ptr = update_eval_map(ptr);
5104 	if (WARN_ON_ONCE(!ptr))
5105 		return NULL;
5106 
5107 	ptr++;
5108 	ptr = update_eval_map(ptr);
5109 
5110 	return ptr;
5111 }
5112 
eval_map_start(struct seq_file * m,loff_t * pos)5113 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5114 {
5115 	union trace_eval_map_item *v;
5116 	loff_t l = 0;
5117 
5118 	mutex_lock(&trace_eval_mutex);
5119 
5120 	v = trace_eval_maps;
5121 	if (v)
5122 		v++;
5123 
5124 	while (v && l < *pos) {
5125 		v = eval_map_next(m, v, &l);
5126 	}
5127 
5128 	return v;
5129 }
5130 
eval_map_stop(struct seq_file * m,void * v)5131 static void eval_map_stop(struct seq_file *m, void *v)
5132 {
5133 	mutex_unlock(&trace_eval_mutex);
5134 }
5135 
eval_map_show(struct seq_file * m,void * v)5136 static int eval_map_show(struct seq_file *m, void *v)
5137 {
5138 	union trace_eval_map_item *ptr = v;
5139 
5140 	seq_printf(m, "%s %ld (%s)\n",
5141 		   ptr->map.eval_string, ptr->map.eval_value,
5142 		   ptr->map.system);
5143 
5144 	return 0;
5145 }
5146 
5147 static const struct seq_operations tracing_eval_map_seq_ops = {
5148 	.start		= eval_map_start,
5149 	.next		= eval_map_next,
5150 	.stop		= eval_map_stop,
5151 	.show		= eval_map_show,
5152 };
5153 
tracing_eval_map_open(struct inode * inode,struct file * filp)5154 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5155 {
5156 	int ret;
5157 
5158 	ret = tracing_check_open_get_tr(NULL);
5159 	if (ret)
5160 		return ret;
5161 
5162 	return seq_open(filp, &tracing_eval_map_seq_ops);
5163 }
5164 
5165 static const struct file_operations tracing_eval_map_fops = {
5166 	.open		= tracing_eval_map_open,
5167 	.read		= seq_read,
5168 	.llseek		= seq_lseek,
5169 	.release	= seq_release,
5170 };
5171 
5172 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5173 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5174 {
5175 	/* Return tail of array given the head */
5176 	return ptr + ptr->head.length + 1;
5177 }
5178 
5179 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5180 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5181 			   int len)
5182 {
5183 	struct trace_eval_map **stop;
5184 	struct trace_eval_map **map;
5185 	union trace_eval_map_item *map_array;
5186 	union trace_eval_map_item *ptr;
5187 
5188 	stop = start + len;
5189 
5190 	/*
5191 	 * The trace_eval_maps contains the map plus a head and tail item,
5192 	 * where the head holds the module and length of array, and the
5193 	 * tail holds a pointer to the next list.
5194 	 */
5195 	map_array = kmalloc_objs(*map_array, len + 2);
5196 	if (!map_array) {
5197 		pr_warn("Unable to allocate trace eval mapping\n");
5198 		return;
5199 	}
5200 
5201 	guard(mutex)(&trace_eval_mutex);
5202 
5203 	if (!trace_eval_maps)
5204 		trace_eval_maps = map_array;
5205 	else {
5206 		ptr = trace_eval_maps;
5207 		for (;;) {
5208 			ptr = trace_eval_jmp_to_tail(ptr);
5209 			if (!ptr->tail.next)
5210 				break;
5211 			ptr = ptr->tail.next;
5212 
5213 		}
5214 		ptr->tail.next = map_array;
5215 	}
5216 	map_array->head.mod = mod;
5217 	map_array->head.length = len;
5218 	map_array++;
5219 
5220 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5221 		map_array->map = **map;
5222 		map_array++;
5223 	}
5224 	memset(map_array, 0, sizeof(*map_array));
5225 }
5226 
trace_create_eval_file(struct dentry * d_tracer)5227 static void trace_create_eval_file(struct dentry *d_tracer)
5228 {
5229 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5230 			  NULL, &tracing_eval_map_fops);
5231 }
5232 
5233 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5234 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5235 static inline void trace_insert_eval_map_file(struct module *mod,
5236 			      struct trace_eval_map **start, int len) { }
5237 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5238 
5239 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5240 trace_event_update_with_eval_map(struct module *mod,
5241 				 struct trace_eval_map **start,
5242 				 int len)
5243 {
5244 	struct trace_eval_map **map;
5245 
5246 	/* Always run sanitizer only if btf_type_tag attr exists. */
5247 	if (len <= 0) {
5248 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5249 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5250 		      __has_attribute(btf_type_tag)))
5251 			return;
5252 	}
5253 
5254 	map = start;
5255 
5256 	trace_event_update_all(map, len);
5257 
5258 	if (len <= 0)
5259 		return;
5260 
5261 	trace_insert_eval_map_file(mod, start, len);
5262 }
5263 
5264 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5265 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5266 		       size_t cnt, loff_t *ppos)
5267 {
5268 	struct trace_array *tr = filp->private_data;
5269 	char buf[MAX_TRACER_SIZE+2];
5270 	int r;
5271 
5272 	scoped_guard(mutex, &trace_types_lock) {
5273 		r = sprintf(buf, "%s\n", tr->current_trace->name);
5274 	}
5275 
5276 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5277 }
5278 
tracer_init(struct tracer * t,struct trace_array * tr)5279 int tracer_init(struct tracer *t, struct trace_array *tr)
5280 {
5281 	tracing_reset_online_cpus(&tr->array_buffer);
5282 	update_last_data_if_empty(tr);
5283 	return t->init(tr);
5284 }
5285 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5286 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5287 {
5288 	int cpu;
5289 
5290 	for_each_tracing_cpu(cpu)
5291 		per_cpu_ptr(buf->data, cpu)->entries = val;
5292 }
5293 
update_buffer_entries(struct array_buffer * buf,int cpu)5294 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5295 {
5296 	if (cpu == RING_BUFFER_ALL_CPUS) {
5297 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5298 	} else {
5299 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5300 	}
5301 }
5302 
5303 #ifdef CONFIG_TRACER_SNAPSHOT
5304 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5305 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5306 					struct array_buffer *size_buf, int cpu_id)
5307 {
5308 	int cpu, ret = 0;
5309 
5310 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5311 		for_each_tracing_cpu(cpu) {
5312 			ret = ring_buffer_resize(trace_buf->buffer,
5313 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5314 			if (ret < 0)
5315 				break;
5316 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5317 				per_cpu_ptr(size_buf->data, cpu)->entries;
5318 		}
5319 	} else {
5320 		ret = ring_buffer_resize(trace_buf->buffer,
5321 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5322 		if (ret == 0)
5323 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5324 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5325 	}
5326 
5327 	return ret;
5328 }
5329 #endif /* CONFIG_TRACER_SNAPSHOT */
5330 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5331 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5332 					unsigned long size, int cpu)
5333 {
5334 	int ret;
5335 
5336 	/*
5337 	 * If kernel or user changes the size of the ring buffer
5338 	 * we use the size that was given, and we can forget about
5339 	 * expanding it later.
5340 	 */
5341 	trace_set_ring_buffer_expanded(tr);
5342 
5343 	/* May be called before buffers are initialized */
5344 	if (!tr->array_buffer.buffer)
5345 		return 0;
5346 
5347 	/* Do not allow tracing while resizing ring buffer */
5348 	tracing_stop_tr(tr);
5349 
5350 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5351 	if (ret < 0)
5352 		goto out_start;
5353 
5354 #ifdef CONFIG_TRACER_SNAPSHOT
5355 	if (!tr->allocated_snapshot)
5356 		goto out;
5357 
5358 	ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu);
5359 	if (ret < 0) {
5360 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5361 						     &tr->array_buffer, cpu);
5362 		if (r < 0) {
5363 			/*
5364 			 * AARGH! We are left with different
5365 			 * size max buffer!!!!
5366 			 * The max buffer is our "snapshot" buffer.
5367 			 * When a tracer needs a snapshot (one of the
5368 			 * latency tracers), it swaps the max buffer
5369 			 * with the saved snap shot. We succeeded to
5370 			 * update the size of the main buffer, but failed to
5371 			 * update the size of the max buffer. But when we tried
5372 			 * to reset the main buffer to the original size, we
5373 			 * failed there too. This is very unlikely to
5374 			 * happen, but if it does, warn and kill all
5375 			 * tracing.
5376 			 */
5377 			WARN_ON(1);
5378 			tracing_disabled = 1;
5379 		}
5380 		goto out_start;
5381 	}
5382 
5383 	update_buffer_entries(&tr->snapshot_buffer, cpu);
5384 
5385  out:
5386 #endif /* CONFIG_TRACER_SNAPSHOT */
5387 
5388 	update_buffer_entries(&tr->array_buffer, cpu);
5389  out_start:
5390 	tracing_start_tr(tr);
5391 	return ret;
5392 }
5393 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5394 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5395 				  unsigned long size, int cpu_id)
5396 {
5397 	guard(mutex)(&trace_types_lock);
5398 
5399 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5400 		/* make sure, this cpu is enabled in the mask */
5401 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5402 			return -EINVAL;
5403 	}
5404 
5405 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
5406 }
5407 
5408 struct trace_mod_entry {
5409 	unsigned long	mod_addr;
5410 	char		mod_name[MODULE_NAME_LEN];
5411 };
5412 
5413 struct trace_scratch {
5414 	unsigned int		clock_id;
5415 	unsigned long		text_addr;
5416 	unsigned long		nr_entries;
5417 	struct trace_mod_entry	entries[];
5418 };
5419 
5420 static DEFINE_MUTEX(scratch_mutex);
5421 
cmp_mod_entry(const void * key,const void * pivot)5422 static int cmp_mod_entry(const void *key, const void *pivot)
5423 {
5424 	unsigned long addr = (unsigned long)key;
5425 	const struct trace_mod_entry *ent = pivot;
5426 
5427 	if (addr < ent[0].mod_addr)
5428 		return -1;
5429 
5430 	return addr >= ent[1].mod_addr;
5431 }
5432 
5433 /**
5434  * trace_adjust_address() - Adjust prev boot address to current address.
5435  * @tr: Persistent ring buffer's trace_array.
5436  * @addr: Address in @tr which is adjusted.
5437  */
trace_adjust_address(struct trace_array * tr,unsigned long addr)5438 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
5439 {
5440 	struct trace_module_delta *module_delta;
5441 	struct trace_scratch *tscratch;
5442 	struct trace_mod_entry *entry;
5443 	unsigned long raddr;
5444 	int idx = 0, nr_entries;
5445 
5446 	/* If we don't have last boot delta, return the address */
5447 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5448 		return addr;
5449 
5450 	/* tr->module_delta must be protected by rcu. */
5451 	guard(rcu)();
5452 	tscratch = tr->scratch;
5453 	/* if there is no tscrach, module_delta must be NULL. */
5454 	module_delta = READ_ONCE(tr->module_delta);
5455 	if (!module_delta || !tscratch->nr_entries ||
5456 	    tscratch->entries[0].mod_addr > addr) {
5457 		raddr = addr + tr->text_delta;
5458 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
5459 			is_kernel_rodata(raddr) ? raddr : addr;
5460 	}
5461 
5462 	/* Note that entries must be sorted. */
5463 	nr_entries = tscratch->nr_entries;
5464 	if (nr_entries == 1 ||
5465 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
5466 		idx = nr_entries - 1;
5467 	else {
5468 		entry = __inline_bsearch((void *)addr,
5469 				tscratch->entries,
5470 				nr_entries - 1,
5471 				sizeof(tscratch->entries[0]),
5472 				cmp_mod_entry);
5473 		if (entry)
5474 			idx = entry - tscratch->entries;
5475 	}
5476 
5477 	return addr + module_delta->delta[idx];
5478 }
5479 
5480 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)5481 static int save_mod(struct module *mod, void *data)
5482 {
5483 	struct trace_array *tr = data;
5484 	struct trace_scratch *tscratch;
5485 	struct trace_mod_entry *entry;
5486 	unsigned int size;
5487 
5488 	tscratch = tr->scratch;
5489 	if (!tscratch)
5490 		return -1;
5491 	size = tr->scratch_size;
5492 
5493 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
5494 		return -1;
5495 
5496 	entry = &tscratch->entries[tscratch->nr_entries];
5497 
5498 	tscratch->nr_entries++;
5499 
5500 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
5501 	strscpy(entry->mod_name, mod->name);
5502 
5503 	return 0;
5504 }
5505 #else
save_mod(struct module * mod,void * data)5506 static int save_mod(struct module *mod, void *data)
5507 {
5508 	return 0;
5509 }
5510 #endif
5511 
update_last_data(struct trace_array * tr)5512 static void update_last_data(struct trace_array *tr)
5513 {
5514 	struct trace_module_delta *module_delta;
5515 	struct trace_scratch *tscratch;
5516 
5517 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
5518 		return;
5519 
5520 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5521 		return;
5522 
5523 	/* Only if the buffer has previous boot data clear and update it. */
5524 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
5525 
5526 	/* If this is a backup instance, mark it for autoremove. */
5527 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
5528 		tr->free_on_close = true;
5529 
5530 	/* Reset the module list and reload them */
5531 	if (tr->scratch) {
5532 		struct trace_scratch *tscratch = tr->scratch;
5533 
5534 		tscratch->clock_id = tr->clock_id;
5535 		memset(tscratch->entries, 0,
5536 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
5537 		tscratch->nr_entries = 0;
5538 
5539 		guard(mutex)(&scratch_mutex);
5540 		module_for_each_mod(save_mod, tr);
5541 	}
5542 
5543 	/*
5544 	 * Need to clear all CPU buffers as there cannot be events
5545 	 * from the previous boot mixed with events with this boot
5546 	 * as that will cause a confusing trace. Need to clear all
5547 	 * CPU buffers, even for those that may currently be offline.
5548 	 */
5549 	tracing_reset_all_cpus(&tr->array_buffer);
5550 
5551 	/* Using current data now */
5552 	tr->text_delta = 0;
5553 
5554 	if (!tr->scratch)
5555 		return;
5556 
5557 	tscratch = tr->scratch;
5558 	module_delta = READ_ONCE(tr->module_delta);
5559 	WRITE_ONCE(tr->module_delta, NULL);
5560 	kfree_rcu(module_delta, rcu);
5561 
5562 	/* Set the persistent ring buffer meta data to this address */
5563 	tscratch->text_addr = (unsigned long)_text;
5564 }
5565 
5566 /**
5567  * tracing_update_buffers - used by tracing facility to expand ring buffers
5568  * @tr: The tracing instance
5569  *
5570  * To save on memory when the tracing is never used on a system with it
5571  * configured in. The ring buffers are set to a minimum size. But once
5572  * a user starts to use the tracing facility, then they need to grow
5573  * to their default size.
5574  *
5575  * This function is to be called when a tracer is about to be used.
5576  */
tracing_update_buffers(struct trace_array * tr)5577 int tracing_update_buffers(struct trace_array *tr)
5578 {
5579 	int ret = 0;
5580 
5581 	if (!tr)
5582 		tr = &global_trace;
5583 
5584 	guard(mutex)(&trace_types_lock);
5585 
5586 	update_last_data(tr);
5587 
5588 	if (!tr->ring_buffer_expanded)
5589 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5590 						RING_BUFFER_ALL_CPUS);
5591 	return ret;
5592 }
5593 
5594 /*
5595  * Used to clear out the tracer before deletion of an instance.
5596  * Must have trace_types_lock held.
5597  */
tracing_set_nop(struct trace_array * tr)5598 static void tracing_set_nop(struct trace_array *tr)
5599 {
5600 	if (tr->current_trace == &nop_trace)
5601 		return;
5602 
5603 	tr->current_trace->enabled--;
5604 
5605 	if (tr->current_trace->reset)
5606 		tr->current_trace->reset(tr);
5607 
5608 	tr->current_trace = &nop_trace;
5609 	tr->current_trace_flags = nop_trace.flags;
5610 }
5611 
5612 static bool tracer_options_updated;
5613 
tracing_set_tracer(struct trace_array * tr,const char * buf)5614 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5615 {
5616 	struct tracer *trace = NULL;
5617 	struct tracers *t;
5618 	bool had_max_tr;
5619 	int ret;
5620 
5621 	guard(mutex)(&trace_types_lock);
5622 
5623 	update_last_data(tr);
5624 
5625 	if (!tr->ring_buffer_expanded) {
5626 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5627 						RING_BUFFER_ALL_CPUS);
5628 		if (ret < 0)
5629 			return ret;
5630 		ret = 0;
5631 	}
5632 
5633 	list_for_each_entry(t, &tr->tracers, list) {
5634 		if (strcmp(t->tracer->name, buf) == 0) {
5635 			trace = t->tracer;
5636 			break;
5637 		}
5638 	}
5639 	if (!trace)
5640 		return -EINVAL;
5641 
5642 	if (trace == tr->current_trace)
5643 		return 0;
5644 
5645 #ifdef CONFIG_TRACER_SNAPSHOT
5646 	if (tracer_uses_snapshot(trace)) {
5647 		local_irq_disable();
5648 		arch_spin_lock(&tr->max_lock);
5649 		ret = tr->cond_snapshot ? -EBUSY : 0;
5650 		arch_spin_unlock(&tr->max_lock);
5651 		local_irq_enable();
5652 		if (ret)
5653 			return ret;
5654 	}
5655 #endif
5656 	/* Some tracers won't work on kernel command line */
5657 	if (system_state < SYSTEM_RUNNING && trace->noboot) {
5658 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5659 			trace->name);
5660 		return -EINVAL;
5661 	}
5662 
5663 	/* Some tracers are only allowed for the top level buffer */
5664 	if (!trace_ok_for_array(trace, tr))
5665 		return -EINVAL;
5666 
5667 	/* If trace pipe files are being read, we can't change the tracer */
5668 	if (tr->trace_ref)
5669 		return -EBUSY;
5670 
5671 	trace_branch_disable();
5672 
5673 	tr->current_trace->enabled--;
5674 
5675 	if (tr->current_trace->reset)
5676 		tr->current_trace->reset(tr);
5677 
5678 	had_max_tr = tracer_uses_snapshot(tr->current_trace);
5679 
5680 	/* Current trace needs to be nop_trace before synchronize_rcu */
5681 	tr->current_trace = &nop_trace;
5682 	tr->current_trace_flags = nop_trace.flags;
5683 
5684 	if (had_max_tr && !tracer_uses_snapshot(trace)) {
5685 		/*
5686 		 * We need to make sure that the update_max_tr sees that
5687 		 * current_trace changed to nop_trace to keep it from
5688 		 * swapping the buffers after we resize it.
5689 		 * The update_max_tr is called from interrupts disabled
5690 		 * so a synchronized_sched() is sufficient.
5691 		 */
5692 		synchronize_rcu();
5693 		free_snapshot(tr);
5694 		tracing_disarm_snapshot(tr);
5695 	}
5696 
5697 	if (!had_max_tr && tracer_uses_snapshot(trace)) {
5698 		ret = tracing_arm_snapshot_locked(tr);
5699 		if (ret)
5700 			return ret;
5701 	}
5702 
5703 	tr->current_trace_flags = t->flags ? : t->tracer->flags;
5704 
5705 	if (trace->init) {
5706 		ret = tracer_init(trace, tr);
5707 		if (ret) {
5708 			if (tracer_uses_snapshot(trace))
5709 				tracing_disarm_snapshot(tr);
5710 			tr->current_trace_flags = nop_trace.flags;
5711 			return ret;
5712 		}
5713 	}
5714 
5715 	tr->current_trace = trace;
5716 	tr->current_trace->enabled++;
5717 	trace_branch_enable(tr);
5718 
5719 	return 0;
5720 }
5721 
5722 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5723 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5724 			size_t cnt, loff_t *ppos)
5725 {
5726 	struct trace_array *tr = filp->private_data;
5727 	char buf[MAX_TRACER_SIZE+1];
5728 	char *name;
5729 	size_t ret;
5730 	int err;
5731 
5732 	ret = cnt;
5733 
5734 	if (cnt > MAX_TRACER_SIZE)
5735 		cnt = MAX_TRACER_SIZE;
5736 
5737 	if (copy_from_user(buf, ubuf, cnt))
5738 		return -EFAULT;
5739 
5740 	buf[cnt] = 0;
5741 
5742 	name = strim(buf);
5743 
5744 	err = tracing_set_tracer(tr, name);
5745 	if (err)
5746 		return err;
5747 
5748 	*ppos += ret;
5749 
5750 	return ret;
5751 }
5752 
5753 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5754 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5755 		   size_t cnt, loff_t *ppos)
5756 {
5757 	char buf[64];
5758 	int r;
5759 
5760 	r = snprintf(buf, sizeof(buf), "%ld\n",
5761 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5762 	if (r > sizeof(buf))
5763 		r = sizeof(buf);
5764 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5765 }
5766 
5767 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5768 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5769 		    size_t cnt, loff_t *ppos)
5770 {
5771 	unsigned long val;
5772 	int ret;
5773 
5774 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5775 	if (ret)
5776 		return ret;
5777 
5778 	*ptr = val * 1000;
5779 
5780 	return cnt;
5781 }
5782 
5783 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5784 tracing_thresh_read(struct file *filp, char __user *ubuf,
5785 		    size_t cnt, loff_t *ppos)
5786 {
5787 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5788 }
5789 
5790 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5791 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5792 		     size_t cnt, loff_t *ppos)
5793 {
5794 	struct trace_array *tr = filp->private_data;
5795 	int ret;
5796 
5797 	guard(mutex)(&trace_types_lock);
5798 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5799 	if (ret < 0)
5800 		return ret;
5801 
5802 	if (tr->current_trace->update_thresh) {
5803 		ret = tr->current_trace->update_thresh(tr);
5804 		if (ret < 0)
5805 			return ret;
5806 	}
5807 
5808 	return cnt;
5809 }
5810 
5811 #ifdef CONFIG_TRACER_MAX_TRACE
5812 
5813 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5814 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5815 		     size_t cnt, loff_t *ppos)
5816 {
5817 	struct trace_array *tr = filp->private_data;
5818 
5819 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
5820 }
5821 
5822 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5823 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5824 		      size_t cnt, loff_t *ppos)
5825 {
5826 	struct trace_array *tr = filp->private_data;
5827 
5828 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
5829 }
5830 
5831 #endif
5832 
open_pipe_on_cpu(struct trace_array * tr,int cpu)5833 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
5834 {
5835 	if (cpu == RING_BUFFER_ALL_CPUS) {
5836 		if (cpumask_empty(tr->pipe_cpumask)) {
5837 			cpumask_setall(tr->pipe_cpumask);
5838 			return 0;
5839 		}
5840 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
5841 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
5842 		return 0;
5843 	}
5844 	return -EBUSY;
5845 }
5846 
close_pipe_on_cpu(struct trace_array * tr,int cpu)5847 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
5848 {
5849 	if (cpu == RING_BUFFER_ALL_CPUS) {
5850 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
5851 		cpumask_clear(tr->pipe_cpumask);
5852 	} else {
5853 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
5854 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
5855 	}
5856 }
5857 
tracing_open_pipe(struct inode * inode,struct file * filp)5858 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5859 {
5860 	struct trace_array *tr = inode->i_private;
5861 	struct trace_iterator *iter;
5862 	int cpu;
5863 	int ret;
5864 
5865 	ret = tracing_check_open_get_tr(tr);
5866 	if (ret)
5867 		return ret;
5868 
5869 	guard(mutex)(&trace_types_lock);
5870 	cpu = tracing_get_cpu(inode);
5871 	ret = open_pipe_on_cpu(tr, cpu);
5872 	if (ret)
5873 		goto fail_pipe_on_cpu;
5874 
5875 	/* create a buffer to store the information to pass to userspace */
5876 	iter = kzalloc_obj(*iter);
5877 	if (!iter) {
5878 		ret = -ENOMEM;
5879 		goto fail_alloc_iter;
5880 	}
5881 
5882 	trace_seq_init(&iter->seq);
5883 	iter->trace = tr->current_trace;
5884 
5885 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5886 		ret = -ENOMEM;
5887 		goto fail;
5888 	}
5889 
5890 	/* trace pipe does not show start of buffer */
5891 	cpumask_setall(iter->started);
5892 
5893 	if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
5894 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5895 
5896 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5897 	if (trace_clocks[tr->clock_id].in_ns)
5898 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5899 
5900 	iter->tr = tr;
5901 	iter->array_buffer = &tr->array_buffer;
5902 	iter->cpu_file = cpu;
5903 	mutex_init(&iter->mutex);
5904 	filp->private_data = iter;
5905 
5906 	if (iter->trace->pipe_open)
5907 		iter->trace->pipe_open(iter);
5908 
5909 	nonseekable_open(inode, filp);
5910 
5911 	tr->trace_ref++;
5912 
5913 	return ret;
5914 
5915 fail:
5916 	kfree(iter);
5917 fail_alloc_iter:
5918 	close_pipe_on_cpu(tr, cpu);
5919 fail_pipe_on_cpu:
5920 	__trace_array_put(tr);
5921 	return ret;
5922 }
5923 
tracing_release_pipe(struct inode * inode,struct file * file)5924 static int tracing_release_pipe(struct inode *inode, struct file *file)
5925 {
5926 	struct trace_iterator *iter = file->private_data;
5927 	struct trace_array *tr = inode->i_private;
5928 
5929 	scoped_guard(mutex, &trace_types_lock) {
5930 		tr->trace_ref--;
5931 
5932 		if (iter->trace->pipe_close)
5933 			iter->trace->pipe_close(iter);
5934 		close_pipe_on_cpu(tr, iter->cpu_file);
5935 	}
5936 
5937 	free_trace_iter_content(iter);
5938 	kfree(iter);
5939 
5940 	trace_array_put(tr);
5941 
5942 	return 0;
5943 }
5944 
5945 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)5946 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5947 {
5948 	struct trace_array *tr = iter->tr;
5949 
5950 	/* Iterators are static, they should be filled or empty */
5951 	if (trace_buffer_iter(iter, iter->cpu_file))
5952 		return EPOLLIN | EPOLLRDNORM;
5953 
5954 	if (tr->trace_flags & TRACE_ITER(BLOCK))
5955 		/*
5956 		 * Always select as readable when in blocking mode
5957 		 */
5958 		return EPOLLIN | EPOLLRDNORM;
5959 	else
5960 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
5961 					     filp, poll_table, iter->tr->buffer_percent);
5962 }
5963 
5964 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)5965 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5966 {
5967 	struct trace_iterator *iter = filp->private_data;
5968 
5969 	return trace_poll(iter, filp, poll_table);
5970 }
5971 
5972 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)5973 static int tracing_wait_pipe(struct file *filp)
5974 {
5975 	struct trace_iterator *iter = filp->private_data;
5976 	int ret;
5977 
5978 	while (trace_empty(iter)) {
5979 
5980 		if ((filp->f_flags & O_NONBLOCK)) {
5981 			return -EAGAIN;
5982 		}
5983 
5984 		/*
5985 		 * We block until we read something and tracing is disabled.
5986 		 * We still block if tracing is disabled, but we have never
5987 		 * read anything. This allows a user to cat this file, and
5988 		 * then enable tracing. But after we have read something,
5989 		 * we give an EOF when tracing is again disabled.
5990 		 *
5991 		 * iter->pos will be 0 if we haven't read anything.
5992 		 */
5993 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5994 			break;
5995 
5996 		mutex_unlock(&iter->mutex);
5997 
5998 		ret = wait_on_pipe(iter, 0);
5999 
6000 		mutex_lock(&iter->mutex);
6001 
6002 		if (ret)
6003 			return ret;
6004 	}
6005 
6006 	return 1;
6007 }
6008 
update_last_data_if_empty(struct trace_array * tr)6009 static bool update_last_data_if_empty(struct trace_array *tr)
6010 {
6011 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6012 		return false;
6013 
6014 	if (!ring_buffer_empty(tr->array_buffer.buffer))
6015 		return false;
6016 
6017 	/*
6018 	 * If the buffer contains the last boot data and all per-cpu
6019 	 * buffers are empty, reset it from the kernel side.
6020 	 */
6021 	update_last_data(tr);
6022 	return true;
6023 }
6024 
6025 /*
6026  * Consumer reader.
6027  */
6028 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6029 tracing_read_pipe(struct file *filp, char __user *ubuf,
6030 		  size_t cnt, loff_t *ppos)
6031 {
6032 	struct trace_iterator *iter = filp->private_data;
6033 	ssize_t sret;
6034 
6035 	/*
6036 	 * Avoid more than one consumer on a single file descriptor
6037 	 * This is just a matter of traces coherency, the ring buffer itself
6038 	 * is protected.
6039 	 */
6040 	guard(mutex)(&iter->mutex);
6041 
6042 	/* return any leftover data */
6043 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6044 	if (sret != -EBUSY)
6045 		return sret;
6046 
6047 	trace_seq_init(&iter->seq);
6048 
6049 	if (iter->trace->read) {
6050 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6051 		if (sret)
6052 			return sret;
6053 	}
6054 
6055 waitagain:
6056 	if (update_last_data_if_empty(iter->tr))
6057 		return 0;
6058 
6059 	sret = tracing_wait_pipe(filp);
6060 	if (sret <= 0)
6061 		return sret;
6062 
6063 	/* stop when tracing is finished */
6064 	if (trace_empty(iter))
6065 		return 0;
6066 
6067 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6068 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6069 
6070 	/* reset all but tr, trace, and overruns */
6071 	trace_iterator_reset(iter);
6072 	cpumask_clear(iter->started);
6073 	trace_seq_init(&iter->seq);
6074 
6075 	trace_event_read_lock();
6076 	trace_access_lock(iter->cpu_file);
6077 	while (trace_find_next_entry_inc(iter) != NULL) {
6078 		enum print_line_t ret;
6079 		int save_len = iter->seq.seq.len;
6080 
6081 		ret = print_trace_line(iter);
6082 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6083 			/*
6084 			 * If one print_trace_line() fills entire trace_seq in one shot,
6085 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6086 			 * In this case, we need to consume it, otherwise, loop will peek
6087 			 * this event next time, resulting in an infinite loop.
6088 			 */
6089 			if (save_len == 0) {
6090 				iter->seq.full = 0;
6091 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6092 				trace_consume(iter);
6093 				break;
6094 			}
6095 
6096 			/* In other cases, don't print partial lines */
6097 			iter->seq.seq.len = save_len;
6098 			break;
6099 		}
6100 		if (ret != TRACE_TYPE_NO_CONSUME)
6101 			trace_consume(iter);
6102 
6103 		if (trace_seq_used(&iter->seq) >= cnt)
6104 			break;
6105 
6106 		/*
6107 		 * Setting the full flag means we reached the trace_seq buffer
6108 		 * size and we should leave by partial output condition above.
6109 		 * One of the trace_seq_* functions is not used properly.
6110 		 */
6111 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6112 			  iter->ent->type);
6113 	}
6114 	trace_access_unlock(iter->cpu_file);
6115 	trace_event_read_unlock();
6116 
6117 	/* Now copy what we have to the user */
6118 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6119 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6120 		trace_seq_init(&iter->seq);
6121 
6122 	/*
6123 	 * If there was nothing to send to user, in spite of consuming trace
6124 	 * entries, go back to wait for more entries.
6125 	 */
6126 	if (sret == -EBUSY)
6127 		goto waitagain;
6128 
6129 	return sret;
6130 }
6131 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6132 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6133 				     unsigned int idx)
6134 {
6135 	__free_page(spd->pages[idx]);
6136 }
6137 
6138 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6139 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6140 {
6141 	size_t count;
6142 	int save_len;
6143 	int ret;
6144 
6145 	/* Seq buffer is page-sized, exactly what we need. */
6146 	for (;;) {
6147 		save_len = iter->seq.seq.len;
6148 		ret = print_trace_line(iter);
6149 
6150 		if (trace_seq_has_overflowed(&iter->seq)) {
6151 			iter->seq.seq.len = save_len;
6152 			break;
6153 		}
6154 
6155 		/*
6156 		 * This should not be hit, because it should only
6157 		 * be set if the iter->seq overflowed. But check it
6158 		 * anyway to be safe.
6159 		 */
6160 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6161 			iter->seq.seq.len = save_len;
6162 			break;
6163 		}
6164 
6165 		count = trace_seq_used(&iter->seq) - save_len;
6166 		if (rem < count) {
6167 			rem = 0;
6168 			iter->seq.seq.len = save_len;
6169 			break;
6170 		}
6171 
6172 		if (ret != TRACE_TYPE_NO_CONSUME)
6173 			trace_consume(iter);
6174 		rem -= count;
6175 		if (!trace_find_next_entry_inc(iter))	{
6176 			rem = 0;
6177 			iter->ent = NULL;
6178 			break;
6179 		}
6180 	}
6181 
6182 	return rem;
6183 }
6184 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6185 static ssize_t tracing_splice_read_pipe(struct file *filp,
6186 					loff_t *ppos,
6187 					struct pipe_inode_info *pipe,
6188 					size_t len,
6189 					unsigned int flags)
6190 {
6191 	struct page *pages_def[PIPE_DEF_BUFFERS];
6192 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6193 	struct trace_iterator *iter = filp->private_data;
6194 	struct splice_pipe_desc spd = {
6195 		.pages		= pages_def,
6196 		.partial	= partial_def,
6197 		.nr_pages	= 0, /* This gets updated below. */
6198 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6199 		.ops		= &default_pipe_buf_ops,
6200 		.spd_release	= tracing_spd_release_pipe,
6201 	};
6202 	ssize_t ret;
6203 	size_t rem;
6204 	unsigned int i;
6205 
6206 	if (splice_grow_spd(pipe, &spd))
6207 		return -ENOMEM;
6208 
6209 	mutex_lock(&iter->mutex);
6210 
6211 	if (iter->trace->splice_read) {
6212 		ret = iter->trace->splice_read(iter, filp,
6213 					       ppos, pipe, len, flags);
6214 		if (ret)
6215 			goto out_err;
6216 	}
6217 
6218 	ret = tracing_wait_pipe(filp);
6219 	if (ret <= 0)
6220 		goto out_err;
6221 
6222 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6223 		ret = -EFAULT;
6224 		goto out_err;
6225 	}
6226 
6227 	trace_event_read_lock();
6228 	trace_access_lock(iter->cpu_file);
6229 
6230 	/* Fill as many pages as possible. */
6231 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6232 		spd.pages[i] = alloc_page(GFP_KERNEL);
6233 		if (!spd.pages[i])
6234 			break;
6235 
6236 		rem = tracing_fill_pipe_page(rem, iter);
6237 
6238 		/* Copy the data into the page, so we can start over. */
6239 		ret = trace_seq_to_buffer(&iter->seq,
6240 					  page_address(spd.pages[i]),
6241 					  min((size_t)trace_seq_used(&iter->seq),
6242 						  (size_t)PAGE_SIZE));
6243 		if (ret < 0) {
6244 			__free_page(spd.pages[i]);
6245 			break;
6246 		}
6247 		spd.partial[i].offset = 0;
6248 		spd.partial[i].len = ret;
6249 
6250 		trace_seq_init(&iter->seq);
6251 	}
6252 
6253 	trace_access_unlock(iter->cpu_file);
6254 	trace_event_read_unlock();
6255 	mutex_unlock(&iter->mutex);
6256 
6257 	spd.nr_pages = i;
6258 
6259 	if (i)
6260 		ret = splice_to_pipe(pipe, &spd);
6261 	else
6262 		ret = 0;
6263 out:
6264 	splice_shrink_spd(&spd);
6265 	return ret;
6266 
6267 out_err:
6268 	mutex_unlock(&iter->mutex);
6269 	goto out;
6270 }
6271 
6272 static ssize_t
tracing_syscall_buf_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6273 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
6274 			 size_t cnt, loff_t *ppos)
6275 {
6276 	struct inode *inode = file_inode(filp);
6277 	struct trace_array *tr = inode->i_private;
6278 	char buf[64];
6279 	int r;
6280 
6281 	r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
6282 
6283 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6284 }
6285 
6286 static ssize_t
tracing_syscall_buf_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6287 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
6288 			  size_t cnt, loff_t *ppos)
6289 {
6290 	struct inode *inode = file_inode(filp);
6291 	struct trace_array *tr = inode->i_private;
6292 	unsigned long val;
6293 	int ret;
6294 
6295 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6296 	if (ret)
6297 		return ret;
6298 
6299 	if (val > SYSCALL_FAULT_USER_MAX)
6300 		val = SYSCALL_FAULT_USER_MAX;
6301 
6302 	tr->syscall_buf_sz = val;
6303 
6304 	*ppos += cnt;
6305 
6306 	return cnt;
6307 }
6308 
6309 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6310 tracing_entries_read(struct file *filp, char __user *ubuf,
6311 		     size_t cnt, loff_t *ppos)
6312 {
6313 	struct inode *inode = file_inode(filp);
6314 	struct trace_array *tr = inode->i_private;
6315 	int cpu = tracing_get_cpu(inode);
6316 	char buf[64];
6317 	int r = 0;
6318 	ssize_t ret;
6319 
6320 	mutex_lock(&trace_types_lock);
6321 
6322 	if (cpu == RING_BUFFER_ALL_CPUS) {
6323 		int cpu, buf_size_same;
6324 		unsigned long size;
6325 
6326 		size = 0;
6327 		buf_size_same = 1;
6328 		/* check if all cpu sizes are same */
6329 		for_each_tracing_cpu(cpu) {
6330 			/* fill in the size from first enabled cpu */
6331 			if (size == 0)
6332 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6333 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6334 				buf_size_same = 0;
6335 				break;
6336 			}
6337 		}
6338 
6339 		if (buf_size_same) {
6340 			if (!tr->ring_buffer_expanded)
6341 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6342 					    size >> 10,
6343 					    trace_buf_size >> 10);
6344 			else
6345 				r = sprintf(buf, "%lu\n", size >> 10);
6346 		} else
6347 			r = sprintf(buf, "X\n");
6348 	} else
6349 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6350 
6351 	mutex_unlock(&trace_types_lock);
6352 
6353 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6354 	return ret;
6355 }
6356 
6357 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6358 tracing_entries_write(struct file *filp, const char __user *ubuf,
6359 		      size_t cnt, loff_t *ppos)
6360 {
6361 	struct inode *inode = file_inode(filp);
6362 	struct trace_array *tr = inode->i_private;
6363 	unsigned long val;
6364 	int ret;
6365 
6366 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6367 	if (ret)
6368 		return ret;
6369 
6370 	/* must have at least 1 entry */
6371 	if (!val)
6372 		return -EINVAL;
6373 
6374 	/* value is in KB */
6375 	val <<= 10;
6376 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6377 	if (ret < 0)
6378 		return ret;
6379 
6380 	*ppos += cnt;
6381 
6382 	return cnt;
6383 }
6384 
6385 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6386 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6387 				size_t cnt, loff_t *ppos)
6388 {
6389 	struct trace_array *tr = filp->private_data;
6390 	char buf[64];
6391 	int r, cpu;
6392 	unsigned long size = 0, expanded_size = 0;
6393 
6394 	mutex_lock(&trace_types_lock);
6395 	for_each_tracing_cpu(cpu) {
6396 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6397 		if (!tr->ring_buffer_expanded)
6398 			expanded_size += trace_buf_size >> 10;
6399 	}
6400 	if (tr->ring_buffer_expanded)
6401 		r = sprintf(buf, "%lu\n", size);
6402 	else
6403 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6404 	mutex_unlock(&trace_types_lock);
6405 
6406 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6407 }
6408 
6409 #define LAST_BOOT_HEADER ((void *)1)
6410 
l_next(struct seq_file * m,void * v,loff_t * pos)6411 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
6412 {
6413 	struct trace_array *tr = m->private;
6414 	struct trace_scratch *tscratch = tr->scratch;
6415 	unsigned int index = *pos;
6416 
6417 	(*pos)++;
6418 
6419 	if (*pos == 1)
6420 		return LAST_BOOT_HEADER;
6421 
6422 	/* Only show offsets of the last boot data */
6423 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6424 		return NULL;
6425 
6426 	/* *pos 0 is for the header, 1 is for the first module */
6427 	index--;
6428 
6429 	if (index >= tscratch->nr_entries)
6430 		return NULL;
6431 
6432 	return &tscratch->entries[index];
6433 }
6434 
l_start(struct seq_file * m,loff_t * pos)6435 static void *l_start(struct seq_file *m, loff_t *pos)
6436 {
6437 	mutex_lock(&scratch_mutex);
6438 
6439 	return l_next(m, NULL, pos);
6440 }
6441 
l_stop(struct seq_file * m,void * p)6442 static void l_stop(struct seq_file *m, void *p)
6443 {
6444 	mutex_unlock(&scratch_mutex);
6445 }
6446 
show_last_boot_header(struct seq_file * m,struct trace_array * tr)6447 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
6448 {
6449 	struct trace_scratch *tscratch = tr->scratch;
6450 
6451 	/*
6452 	 * Do not leak KASLR address. This only shows the KASLR address of
6453 	 * the last boot. When the ring buffer is started, the LAST_BOOT
6454 	 * flag gets cleared, and this should only report "current".
6455 	 * Otherwise it shows the KASLR address from the previous boot which
6456 	 * should not be the same as the current boot.
6457 	 */
6458 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6459 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
6460 	else
6461 		seq_puts(m, "# Current\n");
6462 }
6463 
l_show(struct seq_file * m,void * v)6464 static int l_show(struct seq_file *m, void *v)
6465 {
6466 	struct trace_array *tr = m->private;
6467 	struct trace_mod_entry *entry = v;
6468 
6469 	if (v == LAST_BOOT_HEADER) {
6470 		show_last_boot_header(m, tr);
6471 		return 0;
6472 	}
6473 
6474 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
6475 	return 0;
6476 }
6477 
6478 static const struct seq_operations last_boot_seq_ops = {
6479 	.start		= l_start,
6480 	.next		= l_next,
6481 	.stop		= l_stop,
6482 	.show		= l_show,
6483 };
6484 
tracing_last_boot_open(struct inode * inode,struct file * file)6485 static int tracing_last_boot_open(struct inode *inode, struct file *file)
6486 {
6487 	struct trace_array *tr = inode->i_private;
6488 	struct seq_file *m;
6489 	int ret;
6490 
6491 	ret = tracing_check_open_get_tr(tr);
6492 	if (ret)
6493 		return ret;
6494 
6495 	ret = seq_open(file, &last_boot_seq_ops);
6496 	if (ret) {
6497 		trace_array_put(tr);
6498 		return ret;
6499 	}
6500 
6501 	m = file->private_data;
6502 	m->private = tr;
6503 
6504 	return 0;
6505 }
6506 
tracing_buffer_meta_open(struct inode * inode,struct file * filp)6507 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6508 {
6509 	struct trace_array *tr = inode->i_private;
6510 	int cpu = tracing_get_cpu(inode);
6511 	int ret;
6512 
6513 	ret = tracing_check_open_get_tr(tr);
6514 	if (ret)
6515 		return ret;
6516 
6517 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6518 	if (ret < 0)
6519 		__trace_array_put(tr);
6520 	return ret;
6521 }
6522 
6523 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6524 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6525 			  size_t cnt, loff_t *ppos)
6526 {
6527 	/*
6528 	 * There is no need to read what the user has written, this function
6529 	 * is just to make sure that there is no error when "echo" is used
6530 	 */
6531 
6532 	*ppos += cnt;
6533 
6534 	return cnt;
6535 }
6536 
6537 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6538 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6539 {
6540 	struct trace_array *tr = inode->i_private;
6541 
6542 	/* disable tracing ? */
6543 	if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
6544 		tracer_tracing_off(tr);
6545 	/* resize the ring buffer to 0 */
6546 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6547 
6548 	trace_array_put(tr);
6549 
6550 	return 0;
6551 }
6552 
6553 #define TRACE_MARKER_MAX_SIZE		4096
6554 
write_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt,unsigned long ip)6555 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
6556 				      size_t cnt, unsigned long ip)
6557 {
6558 	struct ring_buffer_event *event;
6559 	enum event_trigger_type tt = ETT_NONE;
6560 	struct trace_buffer *buffer;
6561 	struct print_entry *entry;
6562 	int meta_size;
6563 	ssize_t written;
6564 	size_t size;
6565 
6566 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6567  again:
6568 	size = cnt + meta_size;
6569 
6570 	buffer = tr->array_buffer.buffer;
6571 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6572 					    tracing_gen_ctx());
6573 	if (unlikely(!event)) {
6574 		/*
6575 		 * If the size was greater than what was allowed, then
6576 		 * make it smaller and try again.
6577 		 */
6578 		if (size > ring_buffer_max_event_size(buffer)) {
6579 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
6580 			/* The above should only happen once */
6581 			if (WARN_ON_ONCE(cnt + meta_size == size))
6582 				return -EBADF;
6583 			goto again;
6584 		}
6585 
6586 		/* Ring buffer disabled, return as if not open for write */
6587 		return -EBADF;
6588 	}
6589 
6590 	entry = ring_buffer_event_data(event);
6591 	entry->ip = ip;
6592 	memcpy(&entry->buf, buf, cnt);
6593 	written = cnt;
6594 
6595 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6596 		/* do not add \n before testing triggers, but add \0 */
6597 		entry->buf[cnt] = '\0';
6598 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6599 	}
6600 
6601 	if (entry->buf[cnt - 1] != '\n') {
6602 		entry->buf[cnt] = '\n';
6603 		entry->buf[cnt + 1] = '\0';
6604 	} else
6605 		entry->buf[cnt] = '\0';
6606 
6607 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6608 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6609 	__buffer_unlock_commit(buffer, event);
6610 
6611 	if (tt)
6612 		event_triggers_post_call(tr->trace_marker_file, tt);
6613 
6614 	return written;
6615 }
6616 
6617 struct trace_user_buf {
6618 	char		*buf;
6619 };
6620 
6621 static DEFINE_MUTEX(trace_user_buffer_mutex);
6622 static struct trace_user_buf_info *trace_user_buffer;
6623 
6624 /**
6625  * trace_user_fault_destroy - free up allocated memory of a trace user buffer
6626  * @tinfo: The descriptor to free up
6627  *
6628  * Frees any data allocated in the trace info dsecriptor.
6629  */
trace_user_fault_destroy(struct trace_user_buf_info * tinfo)6630 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
6631 {
6632 	char *buf;
6633 	int cpu;
6634 
6635 	if (!tinfo || !tinfo->tbuf)
6636 		return;
6637 
6638 	for_each_possible_cpu(cpu) {
6639 		buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6640 		kfree(buf);
6641 	}
6642 	free_percpu(tinfo->tbuf);
6643 }
6644 
user_fault_buffer_enable(struct trace_user_buf_info * tinfo,size_t size)6645 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
6646 {
6647 	char *buf;
6648 	int cpu;
6649 
6650 	lockdep_assert_held(&trace_user_buffer_mutex);
6651 
6652 	tinfo->tbuf = alloc_percpu(struct trace_user_buf);
6653 	if (!tinfo->tbuf)
6654 		return -ENOMEM;
6655 
6656 	tinfo->ref = 1;
6657 	tinfo->size = size;
6658 
6659 	/* Clear each buffer in case of error */
6660 	for_each_possible_cpu(cpu) {
6661 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
6662 	}
6663 
6664 	for_each_possible_cpu(cpu) {
6665 		buf = kmalloc_node(size, GFP_KERNEL,
6666 				   cpu_to_node(cpu));
6667 		if (!buf)
6668 			return -ENOMEM;
6669 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
6670 	}
6671 
6672 	return 0;
6673 }
6674 
6675 /* For internal use. Free and reinitialize */
user_buffer_free(struct trace_user_buf_info ** tinfo)6676 static void user_buffer_free(struct trace_user_buf_info **tinfo)
6677 {
6678 	lockdep_assert_held(&trace_user_buffer_mutex);
6679 
6680 	trace_user_fault_destroy(*tinfo);
6681 	kfree(*tinfo);
6682 	*tinfo = NULL;
6683 }
6684 
6685 /* For internal use. Initialize and allocate */
user_buffer_init(struct trace_user_buf_info ** tinfo,size_t size)6686 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
6687 {
6688 	bool alloc = false;
6689 	int ret;
6690 
6691 	lockdep_assert_held(&trace_user_buffer_mutex);
6692 
6693 	if (!*tinfo) {
6694 		alloc = true;
6695 		*tinfo = kzalloc_obj(**tinfo);
6696 		if (!*tinfo)
6697 			return -ENOMEM;
6698 	}
6699 
6700 	ret = user_fault_buffer_enable(*tinfo, size);
6701 	if (ret < 0 && alloc)
6702 		user_buffer_free(tinfo);
6703 
6704 	return ret;
6705 }
6706 
6707 /* For internal use, derefrence and free if necessary */
user_buffer_put(struct trace_user_buf_info ** tinfo)6708 static void user_buffer_put(struct trace_user_buf_info **tinfo)
6709 {
6710 	guard(mutex)(&trace_user_buffer_mutex);
6711 
6712 	if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
6713 		return;
6714 
6715 	if (--(*tinfo)->ref)
6716 		return;
6717 
6718 	user_buffer_free(tinfo);
6719 }
6720 
6721 /**
6722  * trace_user_fault_init - Allocated or reference a per CPU buffer
6723  * @tinfo: A pointer to the trace buffer descriptor
6724  * @size: The size to allocate each per CPU buffer
6725  *
6726  * Create a per CPU buffer that can be used to copy from user space
6727  * in a task context. When calling trace_user_fault_read(), preemption
6728  * must be disabled, and it will enable preemption and copy user
6729  * space data to the buffer. If any schedule switches occur, it will
6730  * retry until it succeeds without a schedule switch knowing the buffer
6731  * is still valid.
6732  *
6733  * Returns 0 on success, negative on failure.
6734  */
trace_user_fault_init(struct trace_user_buf_info * tinfo,size_t size)6735 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
6736 {
6737 	int ret;
6738 
6739 	if (!tinfo)
6740 		return -EINVAL;
6741 
6742 	guard(mutex)(&trace_user_buffer_mutex);
6743 
6744 	ret = user_buffer_init(&tinfo, size);
6745 	if (ret < 0)
6746 		trace_user_fault_destroy(tinfo);
6747 
6748 	return ret;
6749 }
6750 
6751 /**
6752  * trace_user_fault_get - up the ref count for the user buffer
6753  * @tinfo: A pointer to a pointer to the trace buffer descriptor
6754  *
6755  * Ups the ref count of the trace buffer.
6756  *
6757  * Returns the new ref count.
6758  */
trace_user_fault_get(struct trace_user_buf_info * tinfo)6759 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
6760 {
6761 	if (!tinfo)
6762 		return -1;
6763 
6764 	guard(mutex)(&trace_user_buffer_mutex);
6765 
6766 	tinfo->ref++;
6767 	return tinfo->ref;
6768 }
6769 
6770 /**
6771  * trace_user_fault_put - dereference a per cpu trace buffer
6772  * @tinfo: The @tinfo that was passed to trace_user_fault_get()
6773  *
6774  * Decrement the ref count of @tinfo.
6775  *
6776  * Returns the new refcount (negative on error).
6777  */
trace_user_fault_put(struct trace_user_buf_info * tinfo)6778 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
6779 {
6780 	guard(mutex)(&trace_user_buffer_mutex);
6781 
6782 	if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
6783 		return -1;
6784 
6785 	--tinfo->ref;
6786 	return tinfo->ref;
6787 }
6788 
6789 /**
6790  * trace_user_fault_read - Read user space into a per CPU buffer
6791  * @tinfo: The @tinfo allocated by trace_user_fault_get()
6792  * @ptr: The user space pointer to read
6793  * @size: The size of user space to read.
6794  * @copy_func: Optional function to use to copy from user space
6795  * @data: Data to pass to copy_func if it was supplied
6796  *
6797  * Preemption must be disabled when this is called, and must not
6798  * be enabled while using the returned buffer.
6799  * This does the copying from user space into a per CPU buffer.
6800  *
6801  * The @size must not be greater than the size passed in to
6802  * trace_user_fault_init().
6803  *
6804  * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
6805  * otherwise it will call @copy_func. It will call @copy_func with:
6806  *
6807  *   buffer: the per CPU buffer of the @tinfo.
6808  *   ptr: The pointer @ptr to user space to read
6809  *   size: The @size of the ptr to read
6810  *   data: The @data parameter
6811  *
6812  * It is expected that @copy_func will return 0 on success and non zero
6813  * if there was a fault.
6814  *
6815  * Returns a pointer to the buffer with the content read from @ptr.
6816  *   Preemption must remain disabled while the caller accesses the
6817  *   buffer returned by this function.
6818  * Returns NULL if there was a fault, or the size passed in is
6819  *   greater than the size passed to trace_user_fault_init().
6820  */
trace_user_fault_read(struct trace_user_buf_info * tinfo,const char __user * ptr,size_t size,trace_user_buf_copy copy_func,void * data)6821 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
6822 			     const char __user *ptr, size_t size,
6823 			     trace_user_buf_copy copy_func, void *data)
6824 {
6825 	int cpu = smp_processor_id();
6826 	char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6827 	unsigned int cnt;
6828 	int trys = 0;
6829 	int ret;
6830 
6831 	lockdep_assert_preemption_disabled();
6832 
6833 	/*
6834 	 * It's up to the caller to not try to copy more than it said
6835 	 * it would.
6836 	 */
6837 	if (size > tinfo->size)
6838 		return NULL;
6839 
6840 	/*
6841 	 * This acts similar to a seqcount. The per CPU context switches are
6842 	 * recorded, migration is disabled and preemption is enabled. The
6843 	 * read of the user space memory is copied into the per CPU buffer.
6844 	 * Preemption is disabled again, and if the per CPU context switches count
6845 	 * is still the same, it means the buffer has not been corrupted.
6846 	 * If the count is different, it is assumed the buffer is corrupted
6847 	 * and reading must be tried again.
6848 	 */
6849 
6850 	do {
6851 		/*
6852 		 * It is possible that something is trying to migrate this
6853 		 * task. What happens then, is when preemption is enabled,
6854 		 * the migration thread will preempt this task, try to
6855 		 * migrate it, fail, then let it run again. That will
6856 		 * cause this to loop again and never succeed.
6857 		 * On failures, enabled and disable preemption with
6858 		 * migration enabled, to allow the migration thread to
6859 		 * migrate this task.
6860 		 */
6861 		if (trys) {
6862 			preempt_enable_notrace();
6863 			preempt_disable_notrace();
6864 			cpu = smp_processor_id();
6865 			buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6866 		}
6867 
6868 		/*
6869 		 * If for some reason, copy_from_user() always causes a context
6870 		 * switch, this would then cause an infinite loop.
6871 		 * If this task is preempted by another user space task, it
6872 		 * will cause this task to try again. But just in case something
6873 		 * changes where the copying from user space causes another task
6874 		 * to run, prevent this from going into an infinite loop.
6875 		 * 100 tries should be plenty.
6876 		 */
6877 		if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
6878 			return NULL;
6879 
6880 		/* Read the current CPU context switch counter */
6881 		cnt = nr_context_switches_cpu(cpu);
6882 
6883 		/*
6884 		 * Preemption is going to be enabled, but this task must
6885 		 * remain on this CPU.
6886 		 */
6887 		migrate_disable();
6888 
6889 		/*
6890 		 * Now preemption is being enabled and another task can come in
6891 		 * and use the same buffer and corrupt our data.
6892 		 */
6893 		preempt_enable_notrace();
6894 
6895 		/* Make sure preemption is enabled here */
6896 		lockdep_assert_preemption_enabled();
6897 
6898 		if (copy_func) {
6899 			ret = copy_func(buffer, ptr, size, data);
6900 		} else {
6901 			ret = __copy_from_user(buffer, ptr, size);
6902 		}
6903 
6904 		preempt_disable_notrace();
6905 		migrate_enable();
6906 
6907 		/* if it faulted, no need to test if the buffer was corrupted */
6908 		if (ret)
6909 			return NULL;
6910 
6911 		/*
6912 		 * Preemption is disabled again, now check the per CPU context
6913 		 * switch counter. If it doesn't match, then another user space
6914 		 * process may have schedule in and corrupted our buffer. In that
6915 		 * case the copying must be retried.
6916 		 */
6917 	} while (nr_context_switches_cpu(cpu) != cnt);
6918 
6919 	return buffer;
6920 }
6921 
6922 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6923 tracing_mark_write(struct file *filp, const char __user *ubuf,
6924 					size_t cnt, loff_t *fpos)
6925 {
6926 	struct trace_array *tr = filp->private_data;
6927 	ssize_t written = -ENODEV;
6928 	unsigned long ip;
6929 	char *buf;
6930 
6931 	if (unlikely(tracing_disabled))
6932 		return -EINVAL;
6933 
6934 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6935 		return -EINVAL;
6936 
6937 	if ((ssize_t)cnt < 0)
6938 		return -EINVAL;
6939 
6940 	if (cnt > TRACE_MARKER_MAX_SIZE)
6941 		cnt = TRACE_MARKER_MAX_SIZE;
6942 
6943 	/* Must have preemption disabled while having access to the buffer */
6944 	guard(preempt_notrace)();
6945 
6946 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6947 	if (!buf)
6948 		return -EFAULT;
6949 
6950 	/* The selftests expect this function to be the IP address */
6951 	ip = _THIS_IP_;
6952 
6953 	/* The global trace_marker can go to multiple instances */
6954 	if (tr == &global_trace) {
6955 		guard(rcu)();
6956 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6957 			written = write_marker_to_buffer(tr, buf, cnt, ip);
6958 			if (written < 0)
6959 				break;
6960 		}
6961 	} else {
6962 		written = write_marker_to_buffer(tr, buf, cnt, ip);
6963 	}
6964 
6965 	return written;
6966 }
6967 
write_raw_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt)6968 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
6969 					  const char *buf, size_t cnt)
6970 {
6971 	struct ring_buffer_event *event;
6972 	struct trace_buffer *buffer;
6973 	struct raw_data_entry *entry;
6974 	ssize_t written;
6975 	size_t size;
6976 
6977 	/* cnt includes both the entry->id and the data behind it. */
6978 	size = struct_offset(entry, id) + cnt;
6979 
6980 	buffer = tr->array_buffer.buffer;
6981 
6982 	if (size > ring_buffer_max_event_size(buffer))
6983 		return -EINVAL;
6984 
6985 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6986 					    tracing_gen_ctx());
6987 	if (!event)
6988 		/* Ring buffer disabled, return as if not open for write */
6989 		return -EBADF;
6990 
6991 	entry = ring_buffer_event_data(event);
6992 	unsafe_memcpy(&entry->id, buf, cnt,
6993 		      "id and content already reserved on ring buffer"
6994 		      "'buf' includes the 'id' and the data."
6995 		      "'entry' was allocated with cnt from 'id'.");
6996 	written = cnt;
6997 
6998 	__buffer_unlock_commit(buffer, event);
6999 
7000 	return written;
7001 }
7002 
7003 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7004 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7005 					size_t cnt, loff_t *fpos)
7006 {
7007 	struct trace_array *tr = filp->private_data;
7008 	ssize_t written = -ENODEV;
7009 	char *buf;
7010 
7011 	if (unlikely(tracing_disabled))
7012 		return -EINVAL;
7013 
7014 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7015 		return -EINVAL;
7016 
7017 	/* The marker must at least have a tag id */
7018 	if (cnt < sizeof(unsigned int))
7019 		return -EINVAL;
7020 
7021 	/* raw write is all or nothing */
7022 	if (cnt > TRACE_MARKER_MAX_SIZE)
7023 		return -EINVAL;
7024 
7025 	/* Must have preemption disabled while having access to the buffer */
7026 	guard(preempt_notrace)();
7027 
7028 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7029 	if (!buf)
7030 		return -EFAULT;
7031 
7032 	/* The global trace_marker_raw can go to multiple instances */
7033 	if (tr == &global_trace) {
7034 		guard(rcu)();
7035 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7036 			written = write_raw_marker_to_buffer(tr, buf, cnt);
7037 			if (written < 0)
7038 				break;
7039 		}
7040 	} else {
7041 		written = write_raw_marker_to_buffer(tr, buf, cnt);
7042 	}
7043 
7044 	return written;
7045 }
7046 
tracing_mark_open(struct inode * inode,struct file * filp)7047 static int tracing_mark_open(struct inode *inode, struct file *filp)
7048 {
7049 	int ret;
7050 
7051 	scoped_guard(mutex, &trace_user_buffer_mutex) {
7052 		if (!trace_user_buffer) {
7053 			ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
7054 			if (ret < 0)
7055 				return ret;
7056 		} else {
7057 			trace_user_buffer->ref++;
7058 		}
7059 	}
7060 
7061 	stream_open(inode, filp);
7062 	ret = tracing_open_generic_tr(inode, filp);
7063 	if (ret < 0)
7064 		user_buffer_put(&trace_user_buffer);
7065 	return ret;
7066 }
7067 
tracing_mark_release(struct inode * inode,struct file * file)7068 static int tracing_mark_release(struct inode *inode, struct file *file)
7069 {
7070 	user_buffer_put(&trace_user_buffer);
7071 	return tracing_release_generic_tr(inode, file);
7072 }
7073 
tracing_clock_show(struct seq_file * m,void * v)7074 static int tracing_clock_show(struct seq_file *m, void *v)
7075 {
7076 	struct trace_array *tr = m->private;
7077 	int i;
7078 
7079 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7080 		seq_printf(m,
7081 			"%s%s%s%s", i ? " " : "",
7082 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7083 			i == tr->clock_id ? "]" : "");
7084 	seq_putc(m, '\n');
7085 
7086 	return 0;
7087 }
7088 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7089 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7090 {
7091 	int i;
7092 
7093 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7094 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7095 			break;
7096 	}
7097 	if (i == ARRAY_SIZE(trace_clocks))
7098 		return -EINVAL;
7099 
7100 	guard(mutex)(&trace_types_lock);
7101 
7102 	tr->clock_id = i;
7103 
7104 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7105 
7106 	/*
7107 	 * New clock may not be consistent with the previous clock.
7108 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7109 	 */
7110 	tracing_reset_online_cpus(&tr->array_buffer);
7111 
7112 #ifdef CONFIG_TRACER_SNAPSHOT
7113 	if (tr->snapshot_buffer.buffer)
7114 		ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func);
7115 	tracing_reset_online_cpus(&tr->snapshot_buffer);
7116 #endif
7117 	update_last_data_if_empty(tr);
7118 
7119 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7120 		struct trace_scratch *tscratch = tr->scratch;
7121 
7122 		tscratch->clock_id = i;
7123 	}
7124 
7125 	return 0;
7126 }
7127 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7128 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7129 				   size_t cnt, loff_t *fpos)
7130 {
7131 	struct seq_file *m = filp->private_data;
7132 	struct trace_array *tr = m->private;
7133 	char buf[64];
7134 	const char *clockstr;
7135 	int ret;
7136 
7137 	if (cnt >= sizeof(buf))
7138 		return -EINVAL;
7139 
7140 	if (copy_from_user(buf, ubuf, cnt))
7141 		return -EFAULT;
7142 
7143 	buf[cnt] = 0;
7144 
7145 	clockstr = strstrip(buf);
7146 
7147 	ret = tracing_set_clock(tr, clockstr);
7148 	if (ret)
7149 		return ret;
7150 
7151 	*fpos += cnt;
7152 
7153 	return cnt;
7154 }
7155 
tracing_clock_open(struct inode * inode,struct file * file)7156 static int tracing_clock_open(struct inode *inode, struct file *file)
7157 {
7158 	struct trace_array *tr = inode->i_private;
7159 	int ret;
7160 
7161 	ret = tracing_check_open_get_tr(tr);
7162 	if (ret)
7163 		return ret;
7164 
7165 	if ((file->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) {
7166 		trace_array_put(tr);
7167 		return -EACCES;
7168 	}
7169 
7170 	ret = single_open(file, tracing_clock_show, inode->i_private);
7171 	if (ret < 0)
7172 		trace_array_put(tr);
7173 
7174 	return ret;
7175 }
7176 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7177 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7178 {
7179 	struct trace_array *tr = m->private;
7180 
7181 	guard(mutex)(&trace_types_lock);
7182 
7183 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7184 		seq_puts(m, "delta [absolute]\n");
7185 	else
7186 		seq_puts(m, "[delta] absolute\n");
7187 
7188 	return 0;
7189 }
7190 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7191 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7192 {
7193 	struct trace_array *tr = inode->i_private;
7194 	int ret;
7195 
7196 	ret = tracing_check_open_get_tr(tr);
7197 	if (ret)
7198 		return ret;
7199 
7200 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7201 	if (ret < 0)
7202 		trace_array_put(tr);
7203 
7204 	return ret;
7205 }
7206 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7207 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7208 {
7209 	if (rbe == this_cpu_read(trace_buffered_event))
7210 		return ring_buffer_time_stamp(buffer);
7211 
7212 	return ring_buffer_event_time_stamp(buffer, rbe);
7213 }
7214 
7215 struct ftrace_buffer_info {
7216 	struct trace_iterator	iter;
7217 	void			*spare;
7218 	unsigned int		spare_cpu;
7219 	unsigned int		spare_size;
7220 	unsigned int		read;
7221 };
7222 
7223 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7224 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7225 {
7226 	struct trace_array *tr = inode->i_private;
7227 	struct trace_iterator *iter;
7228 	struct seq_file *m;
7229 	int ret;
7230 
7231 	ret = tracing_check_open_get_tr(tr);
7232 	if (ret)
7233 		return ret;
7234 
7235 	if (file->f_mode & FMODE_READ) {
7236 		iter = __tracing_open(inode, file, true);
7237 		if (IS_ERR(iter))
7238 			ret = PTR_ERR(iter);
7239 	} else {
7240 		/* Writes still need the seq_file to hold the private data */
7241 		ret = -ENOMEM;
7242 		m = kzalloc_obj(*m);
7243 		if (!m)
7244 			goto out;
7245 		iter = kzalloc_obj(*iter);
7246 		if (!iter) {
7247 			kfree(m);
7248 			goto out;
7249 		}
7250 		ret = 0;
7251 
7252 		iter->tr = tr;
7253 		iter->array_buffer = &tr->snapshot_buffer;
7254 		iter->cpu_file = tracing_get_cpu(inode);
7255 		m->private = iter;
7256 		file->private_data = m;
7257 	}
7258 out:
7259 	if (ret < 0)
7260 		trace_array_put(tr);
7261 
7262 	return ret;
7263 }
7264 
tracing_swap_cpu_buffer(void * tr)7265 static void tracing_swap_cpu_buffer(void *tr)
7266 {
7267 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7268 }
7269 
7270 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7271 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7272 		       loff_t *ppos)
7273 {
7274 	struct seq_file *m = filp->private_data;
7275 	struct trace_iterator *iter = m->private;
7276 	struct trace_array *tr = iter->tr;
7277 	unsigned long val;
7278 	int ret;
7279 
7280 	ret = tracing_update_buffers(tr);
7281 	if (ret < 0)
7282 		return ret;
7283 
7284 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7285 	if (ret)
7286 		return ret;
7287 
7288 	guard(mutex)(&trace_types_lock);
7289 
7290 	if (tracer_uses_snapshot(tr->current_trace))
7291 		return -EBUSY;
7292 
7293 	local_irq_disable();
7294 	arch_spin_lock(&tr->max_lock);
7295 	if (tr->cond_snapshot)
7296 		ret = -EBUSY;
7297 	arch_spin_unlock(&tr->max_lock);
7298 	local_irq_enable();
7299 	if (ret)
7300 		return ret;
7301 
7302 	switch (val) {
7303 	case 0:
7304 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7305 			return -EINVAL;
7306 		if (tr->allocated_snapshot)
7307 			free_snapshot(tr);
7308 		break;
7309 	case 1:
7310 /* Only allow per-cpu swap if the ring buffer supports it */
7311 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7312 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7313 			return -EINVAL;
7314 #endif
7315 		if (tr->allocated_snapshot)
7316 			ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
7317 					&tr->array_buffer, iter->cpu_file);
7318 
7319 		ret = tracing_arm_snapshot_locked(tr);
7320 		if (ret)
7321 			return ret;
7322 
7323 		/* Now, we're going to swap */
7324 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7325 			local_irq_disable();
7326 			update_max_tr(tr, current, smp_processor_id(), NULL);
7327 			local_irq_enable();
7328 		} else {
7329 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7330 						 (void *)tr, 1);
7331 		}
7332 		tracing_disarm_snapshot(tr);
7333 		break;
7334 	default:
7335 		if (tr->allocated_snapshot) {
7336 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7337 				tracing_reset_online_cpus(&tr->snapshot_buffer);
7338 			else
7339 				tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file);
7340 		}
7341 		break;
7342 	}
7343 
7344 	if (ret >= 0) {
7345 		*ppos += cnt;
7346 		ret = cnt;
7347 	}
7348 
7349 	return ret;
7350 }
7351 
tracing_snapshot_release(struct inode * inode,struct file * file)7352 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7353 {
7354 	struct seq_file *m = file->private_data;
7355 	int ret;
7356 
7357 	ret = tracing_release(inode, file);
7358 
7359 	if (file->f_mode & FMODE_READ)
7360 		return ret;
7361 
7362 	/* If write only, the seq_file is just a stub */
7363 	if (m)
7364 		kfree(m->private);
7365 	kfree(m);
7366 
7367 	return 0;
7368 }
7369 
7370 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7371 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7372 				    size_t count, loff_t *ppos);
7373 static int tracing_buffers_release(struct inode *inode, struct file *file);
7374 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7375 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7376 
snapshot_raw_open(struct inode * inode,struct file * filp)7377 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7378 {
7379 	struct ftrace_buffer_info *info;
7380 	int ret;
7381 
7382 	/* The following checks for tracefs lockdown */
7383 	ret = tracing_buffers_open(inode, filp);
7384 	if (ret < 0)
7385 		return ret;
7386 
7387 	info = filp->private_data;
7388 
7389 	if (tracer_uses_snapshot(info->iter.trace)) {
7390 		tracing_buffers_release(inode, filp);
7391 		return -EBUSY;
7392 	}
7393 
7394 	info->iter.snapshot = true;
7395 	info->iter.array_buffer = &info->iter.tr->snapshot_buffer;
7396 
7397 	return ret;
7398 }
7399 
7400 #endif /* CONFIG_TRACER_SNAPSHOT */
7401 
7402 
7403 static const struct file_operations tracing_thresh_fops = {
7404 	.open		= tracing_open_generic,
7405 	.read		= tracing_thresh_read,
7406 	.write		= tracing_thresh_write,
7407 	.llseek		= generic_file_llseek,
7408 };
7409 
7410 #ifdef CONFIG_TRACER_MAX_TRACE
7411 static const struct file_operations tracing_max_lat_fops = {
7412 	.open		= tracing_open_generic_tr,
7413 	.read		= tracing_max_lat_read,
7414 	.write		= tracing_max_lat_write,
7415 	.llseek		= generic_file_llseek,
7416 	.release	= tracing_release_generic_tr,
7417 };
7418 #endif
7419 
7420 static const struct file_operations set_tracer_fops = {
7421 	.open		= tracing_open_generic_tr,
7422 	.read		= tracing_set_trace_read,
7423 	.write		= tracing_set_trace_write,
7424 	.llseek		= generic_file_llseek,
7425 	.release	= tracing_release_generic_tr,
7426 };
7427 
7428 static const struct file_operations tracing_pipe_fops = {
7429 	.open		= tracing_open_pipe,
7430 	.poll		= tracing_poll_pipe,
7431 	.read		= tracing_read_pipe,
7432 	.splice_read	= tracing_splice_read_pipe,
7433 	.release	= tracing_release_pipe,
7434 };
7435 
7436 static const struct file_operations tracing_entries_fops = {
7437 	.open		= tracing_open_generic_tr,
7438 	.read		= tracing_entries_read,
7439 	.write		= tracing_entries_write,
7440 	.llseek		= generic_file_llseek,
7441 	.release	= tracing_release_generic_tr,
7442 };
7443 
7444 static const struct file_operations tracing_syscall_buf_fops = {
7445 	.open		= tracing_open_generic_tr,
7446 	.read		= tracing_syscall_buf_read,
7447 	.write		= tracing_syscall_buf_write,
7448 	.llseek		= generic_file_llseek,
7449 	.release	= tracing_release_generic_tr,
7450 };
7451 
7452 static const struct file_operations tracing_buffer_meta_fops = {
7453 	.open		= tracing_buffer_meta_open,
7454 	.read		= seq_read,
7455 	.llseek		= seq_lseek,
7456 	.release	= tracing_seq_release,
7457 };
7458 
7459 static const struct file_operations tracing_total_entries_fops = {
7460 	.open		= tracing_open_generic_tr,
7461 	.read		= tracing_total_entries_read,
7462 	.llseek		= generic_file_llseek,
7463 	.release	= tracing_release_generic_tr,
7464 };
7465 
7466 static const struct file_operations tracing_free_buffer_fops = {
7467 	.open		= tracing_open_generic_tr,
7468 	.write		= tracing_free_buffer_write,
7469 	.release	= tracing_free_buffer_release,
7470 };
7471 
7472 static const struct file_operations tracing_mark_fops = {
7473 	.open		= tracing_mark_open,
7474 	.write		= tracing_mark_write,
7475 	.release	= tracing_mark_release,
7476 };
7477 
7478 static const struct file_operations tracing_mark_raw_fops = {
7479 	.open		= tracing_mark_open,
7480 	.write		= tracing_mark_raw_write,
7481 	.release	= tracing_mark_release,
7482 };
7483 
7484 static const struct file_operations trace_clock_fops = {
7485 	.open		= tracing_clock_open,
7486 	.read		= seq_read,
7487 	.llseek		= seq_lseek,
7488 	.release	= tracing_single_release_tr,
7489 	.write		= tracing_clock_write,
7490 };
7491 
7492 static const struct file_operations trace_time_stamp_mode_fops = {
7493 	.open		= tracing_time_stamp_mode_open,
7494 	.read		= seq_read,
7495 	.llseek		= seq_lseek,
7496 	.release	= tracing_single_release_tr,
7497 };
7498 
7499 static const struct file_operations last_boot_fops = {
7500 	.open		= tracing_last_boot_open,
7501 	.read		= seq_read,
7502 	.llseek		= seq_lseek,
7503 	.release	= tracing_seq_release,
7504 };
7505 
7506 #ifdef CONFIG_TRACER_SNAPSHOT
7507 static const struct file_operations snapshot_fops = {
7508 	.open		= tracing_snapshot_open,
7509 	.read		= seq_read,
7510 	.write		= tracing_snapshot_write,
7511 	.llseek		= tracing_lseek,
7512 	.release	= tracing_snapshot_release,
7513 };
7514 
7515 static const struct file_operations snapshot_raw_fops = {
7516 	.open		= snapshot_raw_open,
7517 	.read		= tracing_buffers_read,
7518 	.release	= tracing_buffers_release,
7519 	.splice_read	= tracing_buffers_splice_read,
7520 };
7521 
7522 #endif /* CONFIG_TRACER_SNAPSHOT */
7523 
7524 /*
7525  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7526  * @filp: The active open file structure
7527  * @ubuf: The userspace provided buffer to read value into
7528  * @cnt: The maximum number of bytes to read
7529  * @ppos: The current "file" position
7530  *
7531  * This function implements the write interface for a struct trace_min_max_param.
7532  * The filp->private_data must point to a trace_min_max_param structure that
7533  * defines where to write the value, the min and the max acceptable values,
7534  * and a lock to protect the write.
7535  */
7536 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7537 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7538 {
7539 	struct trace_min_max_param *param = filp->private_data;
7540 	u64 val;
7541 	int err;
7542 
7543 	if (!param)
7544 		return -EFAULT;
7545 
7546 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7547 	if (err)
7548 		return err;
7549 
7550 	if (param->lock)
7551 		mutex_lock(param->lock);
7552 
7553 	if (param->min && val < *param->min)
7554 		err = -EINVAL;
7555 
7556 	if (param->max && val > *param->max)
7557 		err = -EINVAL;
7558 
7559 	if (!err)
7560 		*param->val = val;
7561 
7562 	if (param->lock)
7563 		mutex_unlock(param->lock);
7564 
7565 	if (err)
7566 		return err;
7567 
7568 	return cnt;
7569 }
7570 
7571 /*
7572  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7573  * @filp: The active open file structure
7574  * @ubuf: The userspace provided buffer to read value into
7575  * @cnt: The maximum number of bytes to read
7576  * @ppos: The current "file" position
7577  *
7578  * This function implements the read interface for a struct trace_min_max_param.
7579  * The filp->private_data must point to a trace_min_max_param struct with valid
7580  * data.
7581  */
7582 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7583 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7584 {
7585 	struct trace_min_max_param *param = filp->private_data;
7586 	char buf[U64_STR_SIZE];
7587 	int len;
7588 	u64 val;
7589 
7590 	if (!param)
7591 		return -EFAULT;
7592 
7593 	val = *param->val;
7594 
7595 	if (cnt > sizeof(buf))
7596 		cnt = sizeof(buf);
7597 
7598 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7599 
7600 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7601 }
7602 
7603 const struct file_operations trace_min_max_fops = {
7604 	.open		= tracing_open_generic,
7605 	.read		= trace_min_max_read,
7606 	.write		= trace_min_max_write,
7607 };
7608 
7609 #define TRACING_LOG_ERRS_MAX	8
7610 #define TRACING_LOG_LOC_MAX	128
7611 
7612 #define CMD_PREFIX "  Command: "
7613 
7614 struct err_info {
7615 	const char	**errs;	/* ptr to loc-specific array of err strings */
7616 	u8		type;	/* index into errs -> specific err string */
7617 	u16		pos;	/* caret position */
7618 	u64		ts;
7619 };
7620 
7621 struct tracing_log_err {
7622 	struct list_head	list;
7623 	struct err_info		info;
7624 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7625 	char			*cmd;                     /* what caused err */
7626 };
7627 
7628 static DEFINE_MUTEX(tracing_err_log_lock);
7629 
alloc_tracing_log_err(int len)7630 static struct tracing_log_err *alloc_tracing_log_err(int len)
7631 {
7632 	struct tracing_log_err *err;
7633 
7634 	err = kzalloc_obj(*err);
7635 	if (!err)
7636 		return ERR_PTR(-ENOMEM);
7637 
7638 	err->cmd = kzalloc(len, GFP_KERNEL);
7639 	if (!err->cmd) {
7640 		kfree(err);
7641 		return ERR_PTR(-ENOMEM);
7642 	}
7643 
7644 	return err;
7645 }
7646 
free_tracing_log_err(struct tracing_log_err * err)7647 static void free_tracing_log_err(struct tracing_log_err *err)
7648 {
7649 	kfree(err->cmd);
7650 	kfree(err);
7651 }
7652 
get_tracing_log_err(struct trace_array * tr,int len)7653 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7654 						   int len)
7655 {
7656 	struct tracing_log_err *err;
7657 	char *cmd;
7658 
7659 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7660 		err = alloc_tracing_log_err(len);
7661 		if (PTR_ERR(err) != -ENOMEM)
7662 			tr->n_err_log_entries++;
7663 
7664 		return err;
7665 	}
7666 	cmd = kzalloc(len, GFP_KERNEL);
7667 	if (!cmd)
7668 		return ERR_PTR(-ENOMEM);
7669 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7670 	kfree(err->cmd);
7671 	err->cmd = cmd;
7672 	list_del(&err->list);
7673 
7674 	return err;
7675 }
7676 
7677 /**
7678  * err_pos - find the position of a string within a command for error careting
7679  * @cmd: The tracing command that caused the error
7680  * @str: The string to position the caret at within @cmd
7681  *
7682  * Finds the position of the first occurrence of @str within @cmd.  The
7683  * return value can be passed to tracing_log_err() for caret placement
7684  * within @cmd.
7685  *
7686  * Returns the index within @cmd of the first occurrence of @str or 0
7687  * if @str was not found.
7688  */
err_pos(char * cmd,const char * str)7689 unsigned int err_pos(char *cmd, const char *str)
7690 {
7691 	char *found;
7692 
7693 	if (WARN_ON(!strlen(cmd)))
7694 		return 0;
7695 
7696 	found = strstr(cmd, str);
7697 	if (found)
7698 		return found - cmd;
7699 
7700 	return 0;
7701 }
7702 
7703 /**
7704  * tracing_log_err - write an error to the tracing error log
7705  * @tr: The associated trace array for the error (NULL for top level array)
7706  * @loc: A string describing where the error occurred
7707  * @cmd: The tracing command that caused the error
7708  * @errs: The array of loc-specific static error strings
7709  * @type: The index into errs[], which produces the specific static err string
7710  * @pos: The position the caret should be placed in the cmd
7711  *
7712  * Writes an error into tracing/error_log of the form:
7713  *
7714  * <loc>: error: <text>
7715  *   Command: <cmd>
7716  *              ^
7717  *
7718  * tracing/error_log is a small log file containing the last
7719  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7720  * unless there has been a tracing error, and the error log can be
7721  * cleared and have its memory freed by writing the empty string in
7722  * truncation mode to it i.e. echo > tracing/error_log.
7723  *
7724  * NOTE: the @errs array along with the @type param are used to
7725  * produce a static error string - this string is not copied and saved
7726  * when the error is logged - only a pointer to it is saved.  See
7727  * existing callers for examples of how static strings are typically
7728  * defined for use with tracing_log_err().
7729  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7730 void tracing_log_err(struct trace_array *tr,
7731 		     const char *loc, const char *cmd,
7732 		     const char **errs, u8 type, u16 pos)
7733 {
7734 	struct tracing_log_err *err;
7735 	int len = 0;
7736 
7737 	if (!tr)
7738 		tr = &global_trace;
7739 
7740 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7741 
7742 	guard(mutex)(&tracing_err_log_lock);
7743 
7744 	err = get_tracing_log_err(tr, len);
7745 	if (PTR_ERR(err) == -ENOMEM)
7746 		return;
7747 
7748 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7749 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7750 
7751 	err->info.errs = errs;
7752 	err->info.type = type;
7753 	err->info.pos = pos;
7754 	err->info.ts = local_clock();
7755 
7756 	list_add_tail(&err->list, &tr->err_log);
7757 }
7758 
clear_tracing_err_log(struct trace_array * tr)7759 static void clear_tracing_err_log(struct trace_array *tr)
7760 {
7761 	struct tracing_log_err *err, *next;
7762 
7763 	guard(mutex)(&tracing_err_log_lock);
7764 
7765 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7766 		list_del(&err->list);
7767 		free_tracing_log_err(err);
7768 	}
7769 
7770 	tr->n_err_log_entries = 0;
7771 }
7772 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7773 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7774 {
7775 	struct trace_array *tr = m->private;
7776 
7777 	mutex_lock(&tracing_err_log_lock);
7778 
7779 	return seq_list_start(&tr->err_log, *pos);
7780 }
7781 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7782 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7783 {
7784 	struct trace_array *tr = m->private;
7785 
7786 	return seq_list_next(v, &tr->err_log, pos);
7787 }
7788 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7789 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7790 {
7791 	mutex_unlock(&tracing_err_log_lock);
7792 }
7793 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)7794 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7795 {
7796 	u16 i;
7797 
7798 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7799 		seq_putc(m, ' ');
7800 	for (i = 0; i < pos; i++)
7801 		seq_putc(m, ' ');
7802 	seq_puts(m, "^\n");
7803 }
7804 
tracing_err_log_seq_show(struct seq_file * m,void * v)7805 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7806 {
7807 	struct tracing_log_err *err = v;
7808 
7809 	if (err) {
7810 		const char *err_text = err->info.errs[err->info.type];
7811 		u64 sec = err->info.ts;
7812 		u32 nsec;
7813 
7814 		nsec = do_div(sec, NSEC_PER_SEC);
7815 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7816 			   err->loc, err_text);
7817 		seq_printf(m, "%s", err->cmd);
7818 		tracing_err_log_show_pos(m, err->info.pos);
7819 	}
7820 
7821 	return 0;
7822 }
7823 
7824 static const struct seq_operations tracing_err_log_seq_ops = {
7825 	.start  = tracing_err_log_seq_start,
7826 	.next   = tracing_err_log_seq_next,
7827 	.stop   = tracing_err_log_seq_stop,
7828 	.show   = tracing_err_log_seq_show
7829 };
7830 
tracing_err_log_open(struct inode * inode,struct file * file)7831 static int tracing_err_log_open(struct inode *inode, struct file *file)
7832 {
7833 	struct trace_array *tr = inode->i_private;
7834 	int ret = 0;
7835 
7836 	ret = tracing_check_open_get_tr(tr);
7837 	if (ret)
7838 		return ret;
7839 
7840 	/* If this file was opened for write, then erase contents */
7841 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7842 		clear_tracing_err_log(tr);
7843 
7844 	if (file->f_mode & FMODE_READ) {
7845 		ret = seq_open(file, &tracing_err_log_seq_ops);
7846 		if (!ret) {
7847 			struct seq_file *m = file->private_data;
7848 			m->private = tr;
7849 		} else {
7850 			trace_array_put(tr);
7851 		}
7852 	}
7853 	return ret;
7854 }
7855 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7856 static ssize_t tracing_err_log_write(struct file *file,
7857 				     const char __user *buffer,
7858 				     size_t count, loff_t *ppos)
7859 {
7860 	return count;
7861 }
7862 
tracing_err_log_release(struct inode * inode,struct file * file)7863 static int tracing_err_log_release(struct inode *inode, struct file *file)
7864 {
7865 	struct trace_array *tr = inode->i_private;
7866 
7867 	trace_array_put(tr);
7868 
7869 	if (file->f_mode & FMODE_READ)
7870 		seq_release(inode, file);
7871 
7872 	return 0;
7873 }
7874 
7875 static const struct file_operations tracing_err_log_fops = {
7876 	.open           = tracing_err_log_open,
7877 	.write		= tracing_err_log_write,
7878 	.read           = seq_read,
7879 	.llseek         = tracing_lseek,
7880 	.release        = tracing_err_log_release,
7881 };
7882 
tracing_buffers_open(struct inode * inode,struct file * filp)7883 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7884 {
7885 	struct trace_array *tr = inode->i_private;
7886 	struct ftrace_buffer_info *info;
7887 	int ret;
7888 
7889 	ret = tracing_check_open_get_tr(tr);
7890 	if (ret)
7891 		return ret;
7892 
7893 	info = kvzalloc_obj(*info);
7894 	if (!info) {
7895 		trace_array_put(tr);
7896 		return -ENOMEM;
7897 	}
7898 
7899 	mutex_lock(&trace_types_lock);
7900 
7901 	info->iter.tr		= tr;
7902 	info->iter.cpu_file	= tracing_get_cpu(inode);
7903 	info->iter.trace	= tr->current_trace;
7904 	info->iter.array_buffer = &tr->array_buffer;
7905 	info->spare		= NULL;
7906 	/* Force reading ring buffer for first read */
7907 	info->read		= (unsigned int)-1;
7908 
7909 	filp->private_data = info;
7910 
7911 	tr->trace_ref++;
7912 
7913 	mutex_unlock(&trace_types_lock);
7914 
7915 	ret = nonseekable_open(inode, filp);
7916 	if (ret < 0)
7917 		trace_array_put(tr);
7918 
7919 	return ret;
7920 }
7921 
7922 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7923 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7924 {
7925 	struct ftrace_buffer_info *info = filp->private_data;
7926 	struct trace_iterator *iter = &info->iter;
7927 
7928 	return trace_poll(iter, filp, poll_table);
7929 }
7930 
7931 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7932 tracing_buffers_read(struct file *filp, char __user *ubuf,
7933 		     size_t count, loff_t *ppos)
7934 {
7935 	struct ftrace_buffer_info *info = filp->private_data;
7936 	struct trace_iterator *iter = &info->iter;
7937 	void *trace_data;
7938 	int page_size;
7939 	ssize_t ret = 0;
7940 	ssize_t size;
7941 
7942 	if (!count)
7943 		return 0;
7944 
7945 	if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
7946 		return -EBUSY;
7947 
7948 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7949 
7950 	/* Make sure the spare matches the current sub buffer size */
7951 	if (info->spare) {
7952 		if (page_size != info->spare_size) {
7953 			ring_buffer_free_read_page(iter->array_buffer->buffer,
7954 						   info->spare_cpu, info->spare);
7955 			info->spare = NULL;
7956 		}
7957 	}
7958 
7959 	if (!info->spare) {
7960 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7961 							  iter->cpu_file);
7962 		if (IS_ERR(info->spare)) {
7963 			ret = PTR_ERR(info->spare);
7964 			info->spare = NULL;
7965 		} else {
7966 			info->spare_cpu = iter->cpu_file;
7967 			info->spare_size = page_size;
7968 		}
7969 	}
7970 	if (!info->spare)
7971 		return ret;
7972 
7973 	/* Do we have previous read data to read? */
7974 	if (info->read < page_size)
7975 		goto read;
7976 
7977  again:
7978 	trace_access_lock(iter->cpu_file);
7979 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7980 				    info->spare,
7981 				    count,
7982 				    iter->cpu_file, 0);
7983 	trace_access_unlock(iter->cpu_file);
7984 
7985 	if (ret < 0) {
7986 		if (trace_empty(iter) && !iter->closed) {
7987 			if (update_last_data_if_empty(iter->tr))
7988 				return 0;
7989 
7990 			if ((filp->f_flags & O_NONBLOCK))
7991 				return -EAGAIN;
7992 
7993 			ret = wait_on_pipe(iter, 0);
7994 			if (ret)
7995 				return ret;
7996 
7997 			goto again;
7998 		}
7999 		return 0;
8000 	}
8001 
8002 	info->read = 0;
8003  read:
8004 	size = page_size - info->read;
8005 	if (size > count)
8006 		size = count;
8007 	trace_data = ring_buffer_read_page_data(info->spare);
8008 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8009 	if (ret == size)
8010 		return -EFAULT;
8011 
8012 	size -= ret;
8013 
8014 	*ppos += size;
8015 	info->read += size;
8016 
8017 	return size;
8018 }
8019 
tracing_buffers_flush(struct file * file,fl_owner_t id)8020 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8021 {
8022 	struct ftrace_buffer_info *info = file->private_data;
8023 	struct trace_iterator *iter = &info->iter;
8024 
8025 	iter->closed = true;
8026 	/* Make sure the waiters see the new wait_index */
8027 	(void)atomic_fetch_inc_release(&iter->wait_index);
8028 
8029 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8030 
8031 	return 0;
8032 }
8033 
tracing_buffers_release(struct inode * inode,struct file * file)8034 static int tracing_buffers_release(struct inode *inode, struct file *file)
8035 {
8036 	struct ftrace_buffer_info *info = file->private_data;
8037 	struct trace_iterator *iter = &info->iter;
8038 
8039 	guard(mutex)(&trace_types_lock);
8040 
8041 	iter->tr->trace_ref--;
8042 
8043 	__trace_array_put(iter->tr);
8044 
8045 	if (info->spare)
8046 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8047 					   info->spare_cpu, info->spare);
8048 	kvfree(info);
8049 
8050 	return 0;
8051 }
8052 
8053 struct buffer_ref {
8054 	struct trace_buffer	*buffer;
8055 	void			*page;
8056 	int			cpu;
8057 	refcount_t		refcount;
8058 };
8059 
buffer_ref_release(struct buffer_ref * ref)8060 static void buffer_ref_release(struct buffer_ref *ref)
8061 {
8062 	if (!refcount_dec_and_test(&ref->refcount))
8063 		return;
8064 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8065 	kfree(ref);
8066 }
8067 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8068 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8069 				    struct pipe_buffer *buf)
8070 {
8071 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8072 
8073 	buffer_ref_release(ref);
8074 	buf->private = 0;
8075 }
8076 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8077 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8078 				struct pipe_buffer *buf)
8079 {
8080 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8081 
8082 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8083 		return false;
8084 
8085 	refcount_inc(&ref->refcount);
8086 	return true;
8087 }
8088 
8089 /* Pipe buffer operations for a buffer. */
8090 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8091 	.release		= buffer_pipe_buf_release,
8092 	.get			= buffer_pipe_buf_get,
8093 };
8094 
8095 /*
8096  * Callback from splice_to_pipe(), if we need to release some pages
8097  * at the end of the spd in case we error'ed out in filling the pipe.
8098  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8099 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8100 {
8101 	struct buffer_ref *ref =
8102 		(struct buffer_ref *)spd->partial[i].private;
8103 
8104 	buffer_ref_release(ref);
8105 	spd->partial[i].private = 0;
8106 }
8107 
8108 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8109 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8110 			    struct pipe_inode_info *pipe, size_t len,
8111 			    unsigned int flags)
8112 {
8113 	struct ftrace_buffer_info *info = file->private_data;
8114 	struct trace_iterator *iter = &info->iter;
8115 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8116 	struct page *pages_def[PIPE_DEF_BUFFERS];
8117 	struct splice_pipe_desc spd = {
8118 		.pages		= pages_def,
8119 		.partial	= partial_def,
8120 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8121 		.ops		= &buffer_pipe_buf_ops,
8122 		.spd_release	= buffer_spd_release,
8123 	};
8124 	struct buffer_ref *ref;
8125 	bool woken = false;
8126 	int page_size;
8127 	int entries, i;
8128 	ssize_t ret = 0;
8129 
8130 	if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
8131 		return -EBUSY;
8132 
8133 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8134 	if (*ppos & (page_size - 1))
8135 		return -EINVAL;
8136 
8137 	if (len & (page_size - 1)) {
8138 		if (len < page_size)
8139 			return -EINVAL;
8140 		len &= (~(page_size - 1));
8141 	}
8142 
8143 	if (splice_grow_spd(pipe, &spd))
8144 		return -ENOMEM;
8145 
8146  again:
8147 	trace_access_lock(iter->cpu_file);
8148 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8149 
8150 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8151 		struct page *page;
8152 		int r;
8153 
8154 		ref = kzalloc_obj(*ref);
8155 		if (!ref) {
8156 			ret = -ENOMEM;
8157 			break;
8158 		}
8159 
8160 		refcount_set(&ref->refcount, 1);
8161 		ref->buffer = iter->array_buffer->buffer;
8162 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8163 		if (IS_ERR(ref->page)) {
8164 			ret = PTR_ERR(ref->page);
8165 			ref->page = NULL;
8166 			kfree(ref);
8167 			break;
8168 		}
8169 		ref->cpu = iter->cpu_file;
8170 
8171 		r = ring_buffer_read_page(ref->buffer, ref->page,
8172 					  len, iter->cpu_file, 1);
8173 		if (r < 0) {
8174 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8175 						   ref->page);
8176 			kfree(ref);
8177 			break;
8178 		}
8179 
8180 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8181 
8182 		spd.pages[i] = page;
8183 		spd.partial[i].len = page_size;
8184 		spd.partial[i].offset = 0;
8185 		spd.partial[i].private = (unsigned long)ref;
8186 		spd.nr_pages++;
8187 		*ppos += page_size;
8188 
8189 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8190 	}
8191 
8192 	trace_access_unlock(iter->cpu_file);
8193 	spd.nr_pages = i;
8194 
8195 	/* did we read anything? */
8196 	if (!spd.nr_pages) {
8197 
8198 		if (ret)
8199 			goto out;
8200 
8201 		if (woken)
8202 			goto out;
8203 
8204 		ret = -EAGAIN;
8205 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8206 			goto out;
8207 
8208 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8209 		if (ret)
8210 			goto out;
8211 
8212 		/* No need to wait after waking up when tracing is off */
8213 		if (!tracer_tracing_is_on(iter->tr))
8214 			goto out;
8215 
8216 		/* Iterate one more time to collect any new data then exit */
8217 		woken = true;
8218 
8219 		goto again;
8220 	}
8221 
8222 	ret = splice_to_pipe(pipe, &spd);
8223 out:
8224 	splice_shrink_spd(&spd);
8225 
8226 	return ret;
8227 }
8228 
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8229 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8230 {
8231 	struct ftrace_buffer_info *info = file->private_data;
8232 	struct trace_iterator *iter = &info->iter;
8233 	int err;
8234 
8235 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8236 		if (!(file->f_flags & O_NONBLOCK)) {
8237 			err = ring_buffer_wait(iter->array_buffer->buffer,
8238 					       iter->cpu_file,
8239 					       iter->tr->buffer_percent,
8240 					       NULL, NULL);
8241 			if (err)
8242 				return err;
8243 		}
8244 
8245 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8246 						  iter->cpu_file);
8247 	} else if (cmd) {
8248 		return -ENOTTY;
8249 	}
8250 
8251 	/*
8252 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8253 	 * waiters
8254 	 */
8255 	guard(mutex)(&trace_types_lock);
8256 
8257 	/* Make sure the waiters see the new wait_index */
8258 	(void)atomic_fetch_inc_release(&iter->wait_index);
8259 
8260 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8261 
8262 	return 0;
8263 }
8264 
8265 #ifdef CONFIG_TRACER_SNAPSHOT
get_snapshot_map(struct trace_array * tr)8266 static int get_snapshot_map(struct trace_array *tr)
8267 {
8268 	int err = 0;
8269 
8270 	/*
8271 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8272 	 * take trace_types_lock. Instead use the specific
8273 	 * snapshot_trigger_lock.
8274 	 */
8275 	spin_lock(&tr->snapshot_trigger_lock);
8276 
8277 	if (tr->snapshot || tr->mapped == UINT_MAX)
8278 		err = -EBUSY;
8279 	else
8280 		tr->mapped++;
8281 
8282 	spin_unlock(&tr->snapshot_trigger_lock);
8283 
8284 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8285 	if (tr->mapped == 1)
8286 		synchronize_rcu();
8287 
8288 	return err;
8289 
8290 }
put_snapshot_map(struct trace_array * tr)8291 static void put_snapshot_map(struct trace_array *tr)
8292 {
8293 	spin_lock(&tr->snapshot_trigger_lock);
8294 	if (!WARN_ON(!tr->mapped))
8295 		tr->mapped--;
8296 	spin_unlock(&tr->snapshot_trigger_lock);
8297 }
8298 #else
get_snapshot_map(struct trace_array * tr)8299 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8300 static inline void put_snapshot_map(struct trace_array *tr) { }
8301 #endif
8302 
8303 /*
8304  * This is called when a VMA is duplicated (e.g., on fork()) to increment
8305  * the user_mapped counter without remapping pages.
8306  */
tracing_buffers_mmap_open(struct vm_area_struct * vma)8307 static void tracing_buffers_mmap_open(struct vm_area_struct *vma)
8308 {
8309 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8310 	struct trace_iterator *iter = &info->iter;
8311 
8312 	ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file);
8313 }
8314 
tracing_buffers_mmap_close(struct vm_area_struct * vma)8315 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8316 {
8317 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8318 	struct trace_iterator *iter = &info->iter;
8319 
8320 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8321 	put_snapshot_map(iter->tr);
8322 }
8323 
tracing_buffers_may_split(struct vm_area_struct * vma,unsigned long addr)8324 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
8325 {
8326 	/*
8327 	 * Trace buffer mappings require the complete buffer including
8328 	 * the meta page. Partial mappings are not supported.
8329 	 */
8330 	return -EINVAL;
8331 }
8332 
8333 static const struct vm_operations_struct tracing_buffers_vmops = {
8334 	.open		= tracing_buffers_mmap_open,
8335 	.close		= tracing_buffers_mmap_close,
8336 	.may_split      = tracing_buffers_may_split,
8337 };
8338 
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8339 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8340 {
8341 	struct ftrace_buffer_info *info = filp->private_data;
8342 	struct trace_iterator *iter = &info->iter;
8343 	int ret = 0;
8344 
8345 	/* A memmap'ed and backup buffers are not supported for user space mmap */
8346 	if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
8347 		return -ENODEV;
8348 
8349 	ret = get_snapshot_map(iter->tr);
8350 	if (ret)
8351 		return ret;
8352 
8353 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8354 	if (ret)
8355 		put_snapshot_map(iter->tr);
8356 
8357 	vma->vm_ops = &tracing_buffers_vmops;
8358 
8359 	return ret;
8360 }
8361 
8362 static const struct file_operations tracing_buffers_fops = {
8363 	.open		= tracing_buffers_open,
8364 	.read		= tracing_buffers_read,
8365 	.poll		= tracing_buffers_poll,
8366 	.release	= tracing_buffers_release,
8367 	.flush		= tracing_buffers_flush,
8368 	.splice_read	= tracing_buffers_splice_read,
8369 	.unlocked_ioctl = tracing_buffers_ioctl,
8370 	.mmap		= tracing_buffers_mmap,
8371 };
8372 
8373 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8374 tracing_stats_read(struct file *filp, char __user *ubuf,
8375 		   size_t count, loff_t *ppos)
8376 {
8377 	struct inode *inode = file_inode(filp);
8378 	struct trace_array *tr = inode->i_private;
8379 	struct array_buffer *trace_buf = &tr->array_buffer;
8380 	int cpu = tracing_get_cpu(inode);
8381 	struct trace_seq *s;
8382 	unsigned long cnt;
8383 	unsigned long long t;
8384 	unsigned long usec_rem;
8385 
8386 	s = kmalloc_obj(*s);
8387 	if (!s)
8388 		return -ENOMEM;
8389 
8390 	trace_seq_init(s);
8391 
8392 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8393 	trace_seq_printf(s, "entries: %ld\n", cnt);
8394 
8395 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8396 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8397 
8398 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8399 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8400 
8401 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8402 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8403 
8404 	if (trace_clocks[tr->clock_id].in_ns) {
8405 		/* local or global for trace_clock */
8406 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8407 		usec_rem = do_div(t, USEC_PER_SEC);
8408 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8409 								t, usec_rem);
8410 
8411 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8412 		usec_rem = do_div(t, USEC_PER_SEC);
8413 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8414 	} else {
8415 		/* counter or tsc mode for trace_clock */
8416 		trace_seq_printf(s, "oldest event ts: %llu\n",
8417 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8418 
8419 		trace_seq_printf(s, "now ts: %llu\n",
8420 				ring_buffer_time_stamp(trace_buf->buffer));
8421 	}
8422 
8423 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8424 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8425 
8426 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8427 	trace_seq_printf(s, "read events: %ld\n", cnt);
8428 
8429 	count = simple_read_from_buffer(ubuf, count, ppos,
8430 					s->buffer, trace_seq_used(s));
8431 
8432 	kfree(s);
8433 
8434 	return count;
8435 }
8436 
8437 static const struct file_operations tracing_stats_fops = {
8438 	.open		= tracing_open_generic_tr,
8439 	.read		= tracing_stats_read,
8440 	.llseek		= generic_file_llseek,
8441 	.release	= tracing_release_generic_tr,
8442 };
8443 
8444 #ifdef CONFIG_DYNAMIC_FTRACE
8445 
8446 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8447 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8448 		  size_t cnt, loff_t *ppos)
8449 {
8450 	ssize_t ret;
8451 	char *buf;
8452 	int r;
8453 
8454 	/* 512 should be plenty to hold the amount needed */
8455 #define DYN_INFO_BUF_SIZE	512
8456 
8457 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8458 	if (!buf)
8459 		return -ENOMEM;
8460 
8461 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8462 		      "%ld pages:%ld groups: %ld\n"
8463 		      "ftrace boot update time = %llu (ns)\n"
8464 		      "ftrace module total update time = %llu (ns)\n",
8465 		      ftrace_update_tot_cnt,
8466 		      ftrace_number_of_pages,
8467 		      ftrace_number_of_groups,
8468 		      ftrace_update_time,
8469 		      ftrace_total_mod_time);
8470 
8471 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8472 	kfree(buf);
8473 	return ret;
8474 }
8475 
8476 static const struct file_operations tracing_dyn_info_fops = {
8477 	.open		= tracing_open_generic,
8478 	.read		= tracing_read_dyn_info,
8479 	.llseek		= generic_file_llseek,
8480 };
8481 #endif /* CONFIG_DYNAMIC_FTRACE */
8482 
8483 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8484 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8485 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8486 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8487 		void *data)
8488 {
8489 	tracing_snapshot_instance(tr);
8490 }
8491 
8492 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8493 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8494 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8495 		      void *data)
8496 {
8497 	struct ftrace_func_mapper *mapper = data;
8498 	long *count = NULL;
8499 
8500 	if (mapper)
8501 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8502 
8503 	if (count) {
8504 
8505 		if (*count <= 0)
8506 			return;
8507 
8508 		(*count)--;
8509 	}
8510 
8511 	tracing_snapshot_instance(tr);
8512 }
8513 
8514 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8515 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8516 		      struct ftrace_probe_ops *ops, void *data)
8517 {
8518 	struct ftrace_func_mapper *mapper = data;
8519 	long *count = NULL;
8520 
8521 	seq_printf(m, "%ps:", (void *)ip);
8522 
8523 	seq_puts(m, "snapshot");
8524 
8525 	if (mapper)
8526 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8527 
8528 	if (count)
8529 		seq_printf(m, ":count=%ld\n", *count);
8530 	else
8531 		seq_puts(m, ":unlimited\n");
8532 
8533 	return 0;
8534 }
8535 
8536 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8537 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8538 		     unsigned long ip, void *init_data, void **data)
8539 {
8540 	struct ftrace_func_mapper *mapper = *data;
8541 
8542 	if (!mapper) {
8543 		mapper = allocate_ftrace_func_mapper();
8544 		if (!mapper)
8545 			return -ENOMEM;
8546 		*data = mapper;
8547 	}
8548 
8549 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8550 }
8551 
8552 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8553 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8554 		     unsigned long ip, void *data)
8555 {
8556 	struct ftrace_func_mapper *mapper = data;
8557 
8558 	if (!ip) {
8559 		if (!mapper)
8560 			return;
8561 		free_ftrace_func_mapper(mapper, NULL);
8562 		return;
8563 	}
8564 
8565 	ftrace_func_mapper_remove_ip(mapper, ip);
8566 }
8567 
8568 static struct ftrace_probe_ops snapshot_probe_ops = {
8569 	.func			= ftrace_snapshot,
8570 	.print			= ftrace_snapshot_print,
8571 };
8572 
8573 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8574 	.func			= ftrace_count_snapshot,
8575 	.print			= ftrace_snapshot_print,
8576 	.init			= ftrace_snapshot_init,
8577 	.free			= ftrace_snapshot_free,
8578 };
8579 
8580 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8581 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8582 			       char *glob, char *cmd, char *param, int enable)
8583 {
8584 	struct ftrace_probe_ops *ops;
8585 	void *count = (void *)-1;
8586 	char *number;
8587 	int ret;
8588 
8589 	if (!tr)
8590 		return -ENODEV;
8591 
8592 	/* hash funcs only work with set_ftrace_filter */
8593 	if (!enable)
8594 		return -EINVAL;
8595 
8596 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8597 
8598 	if (glob[0] == '!') {
8599 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8600 		if (!ret)
8601 			tracing_disarm_snapshot(tr);
8602 
8603 		return ret;
8604 	}
8605 
8606 	if (!param)
8607 		goto out_reg;
8608 
8609 	number = strsep(&param, ":");
8610 
8611 	if (!strlen(number))
8612 		goto out_reg;
8613 
8614 	/*
8615 	 * We use the callback data field (which is a pointer)
8616 	 * as our counter.
8617 	 */
8618 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8619 	if (ret)
8620 		return ret;
8621 
8622  out_reg:
8623 	ret = tracing_arm_snapshot(tr);
8624 	if (ret < 0)
8625 		return ret;
8626 
8627 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8628 	if (ret < 0)
8629 		tracing_disarm_snapshot(tr);
8630 
8631 	return ret < 0 ? ret : 0;
8632 }
8633 
8634 static struct ftrace_func_command ftrace_snapshot_cmd = {
8635 	.name			= "snapshot",
8636 	.func			= ftrace_trace_snapshot_callback,
8637 };
8638 
register_snapshot_cmd(void)8639 static __init int register_snapshot_cmd(void)
8640 {
8641 	return register_ftrace_command(&ftrace_snapshot_cmd);
8642 }
8643 #else
register_snapshot_cmd(void)8644 static inline __init int register_snapshot_cmd(void) { return 0; }
8645 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8646 
tracing_get_dentry(struct trace_array * tr)8647 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8648 {
8649 	/* Top directory uses NULL as the parent */
8650 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8651 		return NULL;
8652 
8653 	if (WARN_ON(!tr->dir))
8654 		return ERR_PTR(-ENODEV);
8655 
8656 	/* All sub buffers have a descriptor */
8657 	return tr->dir;
8658 }
8659 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8660 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8661 {
8662 	struct dentry *d_tracer;
8663 
8664 	if (tr->percpu_dir)
8665 		return tr->percpu_dir;
8666 
8667 	d_tracer = tracing_get_dentry(tr);
8668 	if (IS_ERR(d_tracer))
8669 		return NULL;
8670 
8671 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8672 
8673 	MEM_FAIL(!tr->percpu_dir,
8674 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8675 
8676 	return tr->percpu_dir;
8677 }
8678 
8679 struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8680 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8681 		      void *data, long cpu, const struct file_operations *fops)
8682 {
8683 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8684 
8685 	if (ret) /* See tracing_get_cpu() */
8686 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8687 	return ret;
8688 }
8689 
8690 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8691 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8692 {
8693 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8694 	struct dentry *d_cpu;
8695 	char cpu_dir[30]; /* 30 characters should be more than enough */
8696 
8697 	if (!d_percpu)
8698 		return;
8699 
8700 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8701 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8702 	if (!d_cpu) {
8703 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8704 		return;
8705 	}
8706 
8707 	/* per cpu trace_pipe */
8708 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8709 				tr, cpu, &tracing_pipe_fops);
8710 
8711 	/* per cpu trace */
8712 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8713 				tr, cpu, &tracing_fops);
8714 
8715 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8716 				tr, cpu, &tracing_buffers_fops);
8717 
8718 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8719 				tr, cpu, &tracing_stats_fops);
8720 
8721 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu,
8722 				tr, cpu, &tracing_entries_fops);
8723 
8724 	if (tr->range_addr_start)
8725 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8726 				      tr, cpu, &tracing_buffer_meta_fops);
8727 #ifdef CONFIG_TRACER_SNAPSHOT
8728 	if (!tr->range_addr_start) {
8729 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8730 				      tr, cpu, &snapshot_fops);
8731 
8732 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8733 				      tr, cpu, &snapshot_raw_fops);
8734 	}
8735 #endif
8736 }
8737 
8738 #ifdef CONFIG_FTRACE_SELFTEST
8739 /* Let selftest have access to static functions in this file */
8740 #include "trace_selftest.c"
8741 #endif
8742 
8743 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8744 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8745 			loff_t *ppos)
8746 {
8747 	struct trace_option_dentry *topt = filp->private_data;
8748 	char *buf;
8749 
8750 	if (topt->flags->val & topt->opt->bit)
8751 		buf = "1\n";
8752 	else
8753 		buf = "0\n";
8754 
8755 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8756 }
8757 
8758 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8759 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8760 			 loff_t *ppos)
8761 {
8762 	struct trace_option_dentry *topt = filp->private_data;
8763 	unsigned long val;
8764 	int ret;
8765 
8766 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8767 	if (ret)
8768 		return ret;
8769 
8770 	if (val != 0 && val != 1)
8771 		return -EINVAL;
8772 
8773 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8774 		guard(mutex)(&trace_types_lock);
8775 		ret = __set_tracer_option(topt->tr, topt->flags,
8776 					  topt->opt, !val);
8777 		if (ret)
8778 			return ret;
8779 	}
8780 
8781 	*ppos += cnt;
8782 
8783 	return cnt;
8784 }
8785 
tracing_open_options(struct inode * inode,struct file * filp)8786 static int tracing_open_options(struct inode *inode, struct file *filp)
8787 {
8788 	struct trace_option_dentry *topt = inode->i_private;
8789 	int ret;
8790 
8791 	ret = tracing_check_open_get_tr(topt->tr);
8792 	if (ret)
8793 		return ret;
8794 
8795 	filp->private_data = inode->i_private;
8796 	return 0;
8797 }
8798 
tracing_release_options(struct inode * inode,struct file * file)8799 static int tracing_release_options(struct inode *inode, struct file *file)
8800 {
8801 	struct trace_option_dentry *topt = file->private_data;
8802 
8803 	trace_array_put(topt->tr);
8804 	return 0;
8805 }
8806 
8807 static const struct file_operations trace_options_fops = {
8808 	.open = tracing_open_options,
8809 	.read = trace_options_read,
8810 	.write = trace_options_write,
8811 	.llseek	= generic_file_llseek,
8812 	.release = tracing_release_options,
8813 };
8814 
8815 /*
8816  * In order to pass in both the trace_array descriptor as well as the index
8817  * to the flag that the trace option file represents, the trace_array
8818  * has a character array of trace_flags_index[], which holds the index
8819  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8820  * The address of this character array is passed to the flag option file
8821  * read/write callbacks.
8822  *
8823  * In order to extract both the index and the trace_array descriptor,
8824  * get_tr_index() uses the following algorithm.
8825  *
8826  *   idx = *ptr;
8827  *
8828  * As the pointer itself contains the address of the index (remember
8829  * index[1] == 1).
8830  *
8831  * Then to get the trace_array descriptor, by subtracting that index
8832  * from the ptr, we get to the start of the index itself.
8833  *
8834  *   ptr - idx == &index[0]
8835  *
8836  * Then a simple container_of() from that pointer gets us to the
8837  * trace_array descriptor.
8838  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8839 static void get_tr_index(void *data, struct trace_array **ptr,
8840 			 unsigned int *pindex)
8841 {
8842 	*pindex = *(unsigned char *)data;
8843 
8844 	*ptr = container_of(data - *pindex, struct trace_array,
8845 			    trace_flags_index);
8846 }
8847 
8848 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8849 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8850 			loff_t *ppos)
8851 {
8852 	void *tr_index = filp->private_data;
8853 	struct trace_array *tr;
8854 	unsigned int index;
8855 	char *buf;
8856 
8857 	get_tr_index(tr_index, &tr, &index);
8858 
8859 	if (tr->trace_flags & (1ULL << index))
8860 		buf = "1\n";
8861 	else
8862 		buf = "0\n";
8863 
8864 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8865 }
8866 
8867 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8868 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8869 			 loff_t *ppos)
8870 {
8871 	void *tr_index = filp->private_data;
8872 	struct trace_array *tr;
8873 	unsigned int index;
8874 	unsigned long val;
8875 	int ret;
8876 
8877 	get_tr_index(tr_index, &tr, &index);
8878 
8879 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8880 	if (ret)
8881 		return ret;
8882 
8883 	if (val != 0 && val != 1)
8884 		return -EINVAL;
8885 
8886 	mutex_lock(&event_mutex);
8887 	mutex_lock(&trace_types_lock);
8888 	ret = set_tracer_flag(tr, 1ULL << index, val);
8889 	mutex_unlock(&trace_types_lock);
8890 	mutex_unlock(&event_mutex);
8891 
8892 	if (ret < 0)
8893 		return ret;
8894 
8895 	*ppos += cnt;
8896 
8897 	return cnt;
8898 }
8899 
8900 static const struct file_operations trace_options_core_fops = {
8901 	.open = tracing_open_generic,
8902 	.read = trace_options_core_read,
8903 	.write = trace_options_core_write,
8904 	.llseek = generic_file_llseek,
8905 };
8906 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8907 struct dentry *trace_create_file(const char *name,
8908 				 umode_t mode,
8909 				 struct dentry *parent,
8910 				 void *data,
8911 				 const struct file_operations *fops)
8912 {
8913 	struct dentry *ret;
8914 
8915 	ret = tracefs_create_file(name, mode, parent, data, fops);
8916 	if (!ret)
8917 		pr_warn("Could not create tracefs '%s' entry\n", name);
8918 
8919 	return ret;
8920 }
8921 
8922 
trace_options_init_dentry(struct trace_array * tr)8923 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8924 {
8925 	struct dentry *d_tracer;
8926 
8927 	if (tr->options)
8928 		return tr->options;
8929 
8930 	d_tracer = tracing_get_dentry(tr);
8931 	if (IS_ERR(d_tracer))
8932 		return NULL;
8933 
8934 	tr->options = tracefs_create_dir("options", d_tracer);
8935 	if (!tr->options) {
8936 		pr_warn("Could not create tracefs directory 'options'\n");
8937 		return NULL;
8938 	}
8939 
8940 	return tr->options;
8941 }
8942 
8943 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8944 create_trace_option_file(struct trace_array *tr,
8945 			 struct trace_option_dentry *topt,
8946 			 struct tracer_flags *flags,
8947 			 struct tracer_opt *opt)
8948 {
8949 	struct dentry *t_options;
8950 
8951 	t_options = trace_options_init_dentry(tr);
8952 	if (!t_options)
8953 		return;
8954 
8955 	topt->flags = flags;
8956 	topt->opt = opt;
8957 	topt->tr = tr;
8958 
8959 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8960 					t_options, topt, &trace_options_fops);
8961 }
8962 
8963 static int
create_trace_option_files(struct trace_array * tr,struct tracer * tracer,struct tracer_flags * flags)8964 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
8965 			  struct tracer_flags *flags)
8966 {
8967 	struct trace_option_dentry *topts;
8968 	struct trace_options *tr_topts;
8969 	struct tracer_opt *opts;
8970 	int cnt;
8971 
8972 	if (!flags || !flags->opts)
8973 		return 0;
8974 
8975 	opts = flags->opts;
8976 
8977 	for (cnt = 0; opts[cnt].name; cnt++)
8978 		;
8979 
8980 	topts = kzalloc_objs(*topts, cnt + 1);
8981 	if (!topts)
8982 		return 0;
8983 
8984 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8985 			    GFP_KERNEL);
8986 	if (!tr_topts) {
8987 		kfree(topts);
8988 		return -ENOMEM;
8989 	}
8990 
8991 	tr->topts = tr_topts;
8992 	tr->topts[tr->nr_topts].tracer = tracer;
8993 	tr->topts[tr->nr_topts].topts = topts;
8994 	tr->nr_topts++;
8995 
8996 	for (cnt = 0; opts[cnt].name; cnt++) {
8997 		create_trace_option_file(tr, &topts[cnt], flags,
8998 					 &opts[cnt]);
8999 		MEM_FAIL(topts[cnt].entry == NULL,
9000 			  "Failed to create trace option: %s",
9001 			  opts[cnt].name);
9002 	}
9003 	return 0;
9004 }
9005 
get_global_flags_val(struct tracer * tracer)9006 static int get_global_flags_val(struct tracer *tracer)
9007 {
9008 	struct tracers *t;
9009 
9010 	list_for_each_entry(t, &global_trace.tracers, list) {
9011 		if (t->tracer != tracer)
9012 			continue;
9013 		if (!t->flags)
9014 			return -1;
9015 		return t->flags->val;
9016 	}
9017 	return -1;
9018 }
9019 
add_tracer_options(struct trace_array * tr,struct tracers * t)9020 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
9021 {
9022 	struct tracer *tracer = t->tracer;
9023 	struct tracer_flags *flags = t->flags ?: tracer->flags;
9024 
9025 	if (!flags)
9026 		return 0;
9027 
9028 	/* Only add tracer options after update_tracer_options finish */
9029 	if (!tracer_options_updated)
9030 		return 0;
9031 
9032 	return create_trace_option_files(tr, tracer, flags);
9033 }
9034 
add_tracer(struct trace_array * tr,struct tracer * tracer)9035 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
9036 {
9037 	struct tracer_flags *flags;
9038 	struct tracers *t;
9039 	int ret;
9040 
9041 	/* Only enable if the directory has been created already. */
9042 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
9043 		return 0;
9044 
9045 	/*
9046 	 * If this is an instance, only create flags for tracers
9047 	 * the instance may have.
9048 	 */
9049 	if (!trace_ok_for_array(tracer, tr))
9050 		return 0;
9051 
9052 	t = kmalloc_obj(*t);
9053 	if (!t)
9054 		return -ENOMEM;
9055 
9056 	t->tracer = tracer;
9057 	t->flags = NULL;
9058 	list_add(&t->list, &tr->tracers);
9059 
9060 	flags = tracer->flags;
9061 	if (!flags) {
9062 		if (!tracer->default_flags)
9063 			return 0;
9064 
9065 		/*
9066 		 * If the tracer defines default flags, it means the flags are
9067 		 * per trace instance.
9068 		 */
9069 		flags = kmalloc_obj(*flags);
9070 		if (!flags)
9071 			return -ENOMEM;
9072 
9073 		*flags = *tracer->default_flags;
9074 		flags->trace = tracer;
9075 
9076 		t->flags = flags;
9077 
9078 		/* If this is an instance, inherit the global_trace flags */
9079 		if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
9080 			int val = get_global_flags_val(tracer);
9081 			if (!WARN_ON_ONCE(val < 0))
9082 				flags->val = val;
9083 		}
9084 	}
9085 
9086 	ret = add_tracer_options(tr, t);
9087 	if (ret < 0) {
9088 		list_del(&t->list);
9089 		kfree(t->flags);
9090 		kfree(t);
9091 	}
9092 
9093 	return ret;
9094 }
9095 
9096 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9097 create_trace_option_core_file(struct trace_array *tr,
9098 			      const char *option, long index)
9099 {
9100 	struct dentry *t_options;
9101 
9102 	t_options = trace_options_init_dentry(tr);
9103 	if (!t_options)
9104 		return NULL;
9105 
9106 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9107 				 (void *)&tr->trace_flags_index[index],
9108 				 &trace_options_core_fops);
9109 }
9110 
create_trace_options_dir(struct trace_array * tr)9111 static void create_trace_options_dir(struct trace_array *tr)
9112 {
9113 	struct dentry *t_options;
9114 	bool top_level = tr == &global_trace;
9115 	int i;
9116 
9117 	t_options = trace_options_init_dentry(tr);
9118 	if (!t_options)
9119 		return;
9120 
9121 	for (i = 0; trace_options[i]; i++) {
9122 		if (top_level ||
9123 		    !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
9124 			create_trace_option_core_file(tr, trace_options[i], i);
9125 		}
9126 	}
9127 }
9128 
9129 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9130 rb_simple_read(struct file *filp, char __user *ubuf,
9131 	       size_t cnt, loff_t *ppos)
9132 {
9133 	struct trace_array *tr = filp->private_data;
9134 	char buf[64];
9135 	int r;
9136 
9137 	r = tracer_tracing_is_on(tr);
9138 	r = sprintf(buf, "%d\n", r);
9139 
9140 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9141 }
9142 
9143 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9144 rb_simple_write(struct file *filp, const char __user *ubuf,
9145 		size_t cnt, loff_t *ppos)
9146 {
9147 	struct trace_array *tr = filp->private_data;
9148 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9149 	unsigned long val;
9150 	int ret;
9151 
9152 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9153 	if (ret)
9154 		return ret;
9155 
9156 	if (buffer) {
9157 		guard(mutex)(&trace_types_lock);
9158 		if (!!val == tracer_tracing_is_on(tr)) {
9159 			val = 0; /* do nothing */
9160 		} else if (val) {
9161 			tracer_tracing_on(tr);
9162 			if (tr->current_trace->start)
9163 				tr->current_trace->start(tr);
9164 		} else {
9165 			tracer_tracing_off(tr);
9166 			if (tr->current_trace->stop)
9167 				tr->current_trace->stop(tr);
9168 			/* Wake up any waiters */
9169 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9170 		}
9171 	}
9172 
9173 	(*ppos)++;
9174 
9175 	return cnt;
9176 }
9177 
9178 static const struct file_operations rb_simple_fops = {
9179 	.open		= tracing_open_generic_tr,
9180 	.read		= rb_simple_read,
9181 	.write		= rb_simple_write,
9182 	.release	= tracing_release_generic_tr,
9183 	.llseek		= default_llseek,
9184 };
9185 
9186 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9187 buffer_percent_read(struct file *filp, char __user *ubuf,
9188 		    size_t cnt, loff_t *ppos)
9189 {
9190 	struct trace_array *tr = filp->private_data;
9191 	char buf[64];
9192 	int r;
9193 
9194 	r = tr->buffer_percent;
9195 	r = sprintf(buf, "%d\n", r);
9196 
9197 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9198 }
9199 
9200 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9201 buffer_percent_write(struct file *filp, const char __user *ubuf,
9202 		     size_t cnt, loff_t *ppos)
9203 {
9204 	struct trace_array *tr = filp->private_data;
9205 	unsigned long val;
9206 	int ret;
9207 
9208 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9209 	if (ret)
9210 		return ret;
9211 
9212 	if (val > 100)
9213 		return -EINVAL;
9214 
9215 	tr->buffer_percent = val;
9216 
9217 	(*ppos)++;
9218 
9219 	return cnt;
9220 }
9221 
9222 static const struct file_operations buffer_percent_fops = {
9223 	.open		= tracing_open_generic_tr,
9224 	.read		= buffer_percent_read,
9225 	.write		= buffer_percent_write,
9226 	.release	= tracing_release_generic_tr,
9227 	.llseek		= default_llseek,
9228 };
9229 
9230 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9231 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9232 {
9233 	struct trace_array *tr = filp->private_data;
9234 	size_t size;
9235 	char buf[64];
9236 	int order;
9237 	int r;
9238 
9239 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9240 	size = (PAGE_SIZE << order) / 1024;
9241 
9242 	r = sprintf(buf, "%zd\n", size);
9243 
9244 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9245 }
9246 
9247 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9248 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9249 			 size_t cnt, loff_t *ppos)
9250 {
9251 	struct trace_array *tr = filp->private_data;
9252 	unsigned long val;
9253 	int old_order;
9254 	int order;
9255 	int pages;
9256 	int ret;
9257 
9258 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9259 	if (ret)
9260 		return ret;
9261 
9262 	val *= 1024; /* value passed in is in KB */
9263 
9264 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9265 	order = fls(pages - 1);
9266 
9267 	/* limit between 1 and 128 system pages */
9268 	if (order < 0 || order > 7)
9269 		return -EINVAL;
9270 
9271 	/* Do not allow tracing while changing the order of the ring buffer */
9272 	tracing_stop_tr(tr);
9273 
9274 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9275 	if (old_order == order)
9276 		goto out;
9277 
9278 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9279 	if (ret)
9280 		goto out;
9281 
9282 #ifdef CONFIG_TRACER_SNAPSHOT
9283 
9284 	if (!tr->allocated_snapshot)
9285 		goto out_max;
9286 
9287 	ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
9288 	if (ret) {
9289 		/* Put back the old order */
9290 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9291 		if (WARN_ON_ONCE(cnt)) {
9292 			/*
9293 			 * AARGH! We are left with different orders!
9294 			 * The max buffer is our "snapshot" buffer.
9295 			 * When a tracer needs a snapshot (one of the
9296 			 * latency tracers), it swaps the max buffer
9297 			 * with the saved snap shot. We succeeded to
9298 			 * update the order of the main buffer, but failed to
9299 			 * update the order of the max buffer. But when we tried
9300 			 * to reset the main buffer to the original size, we
9301 			 * failed there too. This is very unlikely to
9302 			 * happen, but if it does, warn and kill all
9303 			 * tracing.
9304 			 */
9305 			tracing_disabled = 1;
9306 		}
9307 		goto out;
9308 	}
9309  out_max:
9310 #endif
9311 	(*ppos)++;
9312  out:
9313 	if (ret)
9314 		cnt = ret;
9315 	tracing_start_tr(tr);
9316 	return cnt;
9317 }
9318 
9319 static const struct file_operations buffer_subbuf_size_fops = {
9320 	.open		= tracing_open_generic_tr,
9321 	.read		= buffer_subbuf_size_read,
9322 	.write		= buffer_subbuf_size_write,
9323 	.release	= tracing_release_generic_tr,
9324 	.llseek		= default_llseek,
9325 };
9326 
9327 static struct dentry *trace_instance_dir;
9328 
9329 static void
9330 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9331 
9332 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9333 static int make_mod_delta(struct module *mod, void *data)
9334 {
9335 	struct trace_module_delta *module_delta;
9336 	struct trace_scratch *tscratch;
9337 	struct trace_mod_entry *entry;
9338 	struct trace_array *tr = data;
9339 	int i;
9340 
9341 	tscratch = tr->scratch;
9342 	module_delta = READ_ONCE(tr->module_delta);
9343 	for (i = 0; i < tscratch->nr_entries; i++) {
9344 		entry = &tscratch->entries[i];
9345 		if (strcmp(mod->name, entry->mod_name))
9346 			continue;
9347 		if (mod->state == MODULE_STATE_GOING)
9348 			module_delta->delta[i] = 0;
9349 		else
9350 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9351 						 - entry->mod_addr;
9352 		break;
9353 	}
9354 	return 0;
9355 }
9356 #else
make_mod_delta(struct module * mod,void * data)9357 static int make_mod_delta(struct module *mod, void *data)
9358 {
9359 	return 0;
9360 }
9361 #endif
9362 
mod_addr_comp(const void * a,const void * b,const void * data)9363 static int mod_addr_comp(const void *a, const void *b, const void *data)
9364 {
9365 	const struct trace_mod_entry *e1 = a;
9366 	const struct trace_mod_entry *e2 = b;
9367 
9368 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9369 }
9370 
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9371 static void setup_trace_scratch(struct trace_array *tr,
9372 				struct trace_scratch *tscratch, unsigned int size)
9373 {
9374 	struct trace_module_delta *module_delta;
9375 	struct trace_mod_entry *entry;
9376 	int i, nr_entries;
9377 
9378 	if (!tscratch)
9379 		return;
9380 
9381 	tr->scratch = tscratch;
9382 	tr->scratch_size = size;
9383 
9384 	if (tscratch->text_addr)
9385 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9386 
9387 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9388 		goto reset;
9389 
9390 	/* Check if each module name is a valid string */
9391 	for (i = 0; i < tscratch->nr_entries; i++) {
9392 		int n;
9393 
9394 		entry = &tscratch->entries[i];
9395 
9396 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9397 			if (entry->mod_name[n] == '\0')
9398 				break;
9399 			if (!isprint(entry->mod_name[n]))
9400 				goto reset;
9401 		}
9402 		if (n == MODULE_NAME_LEN)
9403 			goto reset;
9404 	}
9405 
9406 	/* Sort the entries so that we can find appropriate module from address. */
9407 	nr_entries = tscratch->nr_entries;
9408 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9409 	       mod_addr_comp, NULL, NULL);
9410 
9411 	if (IS_ENABLED(CONFIG_MODULES)) {
9412 		module_delta = kzalloc_flex(*module_delta, delta, nr_entries);
9413 		if (!module_delta) {
9414 			pr_info("module_delta allocation failed. Not able to decode module address.");
9415 			goto reset;
9416 		}
9417 		init_rcu_head(&module_delta->rcu);
9418 	} else
9419 		module_delta = NULL;
9420 	WRITE_ONCE(tr->module_delta, module_delta);
9421 
9422 	/* Scan modules to make text delta for modules. */
9423 	module_for_each_mod(make_mod_delta, tr);
9424 
9425 	/* Set trace_clock as the same of the previous boot. */
9426 	if (tscratch->clock_id != tr->clock_id) {
9427 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9428 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9429 			pr_info("the previous trace_clock info is not valid.");
9430 			goto reset;
9431 		}
9432 	}
9433 	return;
9434  reset:
9435 	/* Invalid trace modules */
9436 	memset(tscratch, 0, size);
9437 }
9438 
9439 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,unsigned long size)9440 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, unsigned long size)
9441 {
9442 	enum ring_buffer_flags rb_flags;
9443 	struct trace_scratch *tscratch;
9444 	unsigned int scratch_size = 0;
9445 
9446 	rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
9447 
9448 	buf->tr = tr;
9449 
9450 	if (tr->range_addr_start && tr->range_addr_size) {
9451 		/* Add scratch buffer to handle 128 modules */
9452 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9453 						      tr->range_addr_start,
9454 						      tr->range_addr_size,
9455 						      struct_size(tscratch, entries, 128));
9456 
9457 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9458 		setup_trace_scratch(tr, tscratch, scratch_size);
9459 
9460 		/*
9461 		 * This is basically the same as a mapped buffer,
9462 		 * with the same restrictions.
9463 		 */
9464 		tr->mapped++;
9465 	} else {
9466 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9467 	}
9468 	if (!buf->buffer)
9469 		return -ENOMEM;
9470 
9471 	buf->data = alloc_percpu(struct trace_array_cpu);
9472 	if (!buf->data) {
9473 		ring_buffer_free(buf->buffer);
9474 		buf->buffer = NULL;
9475 		return -ENOMEM;
9476 	}
9477 
9478 	/* Allocate the first page for all buffers */
9479 	set_buffer_entries(&tr->array_buffer,
9480 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9481 
9482 	return 0;
9483 }
9484 
free_trace_buffer(struct array_buffer * buf)9485 static void free_trace_buffer(struct array_buffer *buf)
9486 {
9487 	if (buf->buffer) {
9488 		ring_buffer_free(buf->buffer);
9489 		buf->buffer = NULL;
9490 		free_percpu(buf->data);
9491 		buf->data = NULL;
9492 	}
9493 }
9494 
allocate_trace_buffers(struct trace_array * tr,unsigned long size)9495 static int allocate_trace_buffers(struct trace_array *tr, unsigned long size)
9496 {
9497 	int ret;
9498 
9499 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9500 	if (ret)
9501 		return ret;
9502 
9503 #ifdef CONFIG_TRACER_SNAPSHOT
9504 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9505 	if (tr->range_addr_start)
9506 		return 0;
9507 
9508 	ret = allocate_trace_buffer(tr, &tr->snapshot_buffer,
9509 				    allocate_snapshot ? size : 1);
9510 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9511 		free_trace_buffer(&tr->array_buffer);
9512 		return -ENOMEM;
9513 	}
9514 	tr->allocated_snapshot = allocate_snapshot;
9515 
9516 	allocate_snapshot = false;
9517 #endif
9518 
9519 	return 0;
9520 }
9521 
free_trace_buffers(struct trace_array * tr)9522 static void free_trace_buffers(struct trace_array *tr)
9523 {
9524 	if (!tr)
9525 		return;
9526 
9527 	free_trace_buffer(&tr->array_buffer);
9528 	kfree(tr->module_delta);
9529 
9530 #ifdef CONFIG_TRACER_SNAPSHOT
9531 	free_trace_buffer(&tr->snapshot_buffer);
9532 #endif
9533 }
9534 
init_trace_flags_index(struct trace_array * tr)9535 static void init_trace_flags_index(struct trace_array *tr)
9536 {
9537 	int i;
9538 
9539 	/* Used by the trace options files */
9540 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9541 		tr->trace_flags_index[i] = i;
9542 }
9543 
__update_tracer(struct trace_array * tr)9544 static int __update_tracer(struct trace_array *tr)
9545 {
9546 	struct tracer *t;
9547 	int ret = 0;
9548 
9549 	for (t = trace_types; t && !ret; t = t->next)
9550 		ret = add_tracer(tr, t);
9551 
9552 	return ret;
9553 }
9554 
__update_tracer_options(struct trace_array * tr)9555 static __init int __update_tracer_options(struct trace_array *tr)
9556 {
9557 	struct tracers *t;
9558 	int ret = 0;
9559 
9560 	list_for_each_entry(t, &tr->tracers, list) {
9561 		ret = add_tracer_options(tr, t);
9562 		if (ret < 0)
9563 			break;
9564 	}
9565 
9566 	return ret;
9567 }
9568 
update_tracer_options(void)9569 static __init void update_tracer_options(void)
9570 {
9571 	struct trace_array *tr;
9572 
9573 	guard(mutex)(&trace_types_lock);
9574 	tracer_options_updated = true;
9575 	list_for_each_entry(tr, &ftrace_trace_arrays, list)
9576 		__update_tracer_options(tr);
9577 }
9578 
9579 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9580 struct trace_array *trace_array_find(const char *instance)
9581 {
9582 	struct trace_array *tr, *found = NULL;
9583 
9584 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9585 		if (tr->name && strcmp(tr->name, instance) == 0) {
9586 			found = tr;
9587 			break;
9588 		}
9589 	}
9590 
9591 	return found;
9592 }
9593 
trace_array_find_get(const char * instance)9594 struct trace_array *trace_array_find_get(const char *instance)
9595 {
9596 	struct trace_array *tr;
9597 
9598 	guard(mutex)(&trace_types_lock);
9599 	tr = trace_array_find(instance);
9600 	if (tr && __trace_array_get(tr) < 0)
9601 		tr = NULL;
9602 
9603 	return tr;
9604 }
9605 
trace_array_create_dir(struct trace_array * tr)9606 static int trace_array_create_dir(struct trace_array *tr)
9607 {
9608 	int ret;
9609 
9610 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9611 	if (!tr->dir)
9612 		return -EINVAL;
9613 
9614 	ret = event_trace_add_tracer(tr->dir, tr);
9615 	if (ret) {
9616 		tracefs_remove(tr->dir);
9617 		return ret;
9618 	}
9619 
9620 	init_tracer_tracefs(tr, tr->dir);
9621 	ret = __update_tracer(tr);
9622 	if (ret) {
9623 		event_trace_del_tracer(tr);
9624 		tracefs_remove(tr->dir);
9625 		return ret;
9626 	}
9627 	return 0;
9628 }
9629 
9630 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9631 trace_array_create_systems(const char *name, const char *systems,
9632 			   unsigned long range_addr_start,
9633 			   unsigned long range_addr_size)
9634 {
9635 	struct trace_array *tr;
9636 	int ret;
9637 
9638 	ret = -ENOMEM;
9639 	tr = kzalloc_obj(*tr);
9640 	if (!tr)
9641 		return ERR_PTR(ret);
9642 
9643 	tr->name = kstrdup(name, GFP_KERNEL);
9644 	if (!tr->name)
9645 		goto out_free_tr;
9646 
9647 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9648 		goto out_free_tr;
9649 
9650 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9651 		goto out_free_tr;
9652 
9653 	if (systems) {
9654 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9655 		if (!tr->system_names)
9656 			goto out_free_tr;
9657 	}
9658 
9659 	/* Only for boot up memory mapped ring buffers */
9660 	tr->range_addr_start = range_addr_start;
9661 	tr->range_addr_size = range_addr_size;
9662 
9663 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9664 
9665 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9666 
9667 	raw_spin_lock_init(&tr->start_lock);
9668 
9669 	tr->syscall_buf_sz = global_trace.syscall_buf_sz;
9670 
9671 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9672 #ifdef CONFIG_TRACER_SNAPSHOT
9673 	spin_lock_init(&tr->snapshot_trigger_lock);
9674 #endif
9675 	tr->current_trace = &nop_trace;
9676 	tr->current_trace_flags = nop_trace.flags;
9677 
9678 	INIT_LIST_HEAD(&tr->systems);
9679 	INIT_LIST_HEAD(&tr->events);
9680 	INIT_LIST_HEAD(&tr->hist_vars);
9681 	INIT_LIST_HEAD(&tr->err_log);
9682 	INIT_LIST_HEAD(&tr->tracers);
9683 	INIT_LIST_HEAD(&tr->marker_list);
9684 
9685 #ifdef CONFIG_MODULES
9686 	INIT_LIST_HEAD(&tr->mod_events);
9687 #endif
9688 
9689 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9690 		goto out_free_tr;
9691 
9692 	/* The ring buffer is defaultly expanded */
9693 	trace_set_ring_buffer_expanded(tr);
9694 
9695 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9696 		goto out_free_tr;
9697 
9698 	trace_array_init_autoremove(tr);
9699 
9700 	ftrace_init_trace_array(tr);
9701 
9702 	init_trace_flags_index(tr);
9703 
9704 	if (trace_instance_dir) {
9705 		ret = trace_array_create_dir(tr);
9706 		if (ret)
9707 			goto out_free_tr;
9708 	} else
9709 		__trace_early_add_events(tr);
9710 
9711 	list_add(&tr->list, &ftrace_trace_arrays);
9712 
9713 	tr->ref++;
9714 
9715 	return tr;
9716 
9717  out_free_tr:
9718 	ftrace_free_ftrace_ops(tr);
9719 	free_trace_buffers(tr);
9720 	free_cpumask_var(tr->pipe_cpumask);
9721 	free_cpumask_var(tr->tracing_cpumask);
9722 	kfree_const(tr->system_names);
9723 	kfree(tr->range_name);
9724 	kfree(tr->name);
9725 	kfree(tr);
9726 
9727 	return ERR_PTR(ret);
9728 }
9729 
trace_array_create(const char * name)9730 static struct trace_array *trace_array_create(const char *name)
9731 {
9732 	return trace_array_create_systems(name, NULL, 0, 0);
9733 }
9734 
instance_mkdir(const char * name)9735 static int instance_mkdir(const char *name)
9736 {
9737 	struct trace_array *tr;
9738 	int ret;
9739 
9740 	guard(mutex)(&event_mutex);
9741 	guard(mutex)(&trace_types_lock);
9742 
9743 	ret = -EEXIST;
9744 	if (trace_array_find(name))
9745 		return -EEXIST;
9746 
9747 	tr = trace_array_create(name);
9748 
9749 	ret = PTR_ERR_OR_ZERO(tr);
9750 
9751 	return ret;
9752 }
9753 
9754 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)9755 static u64 map_pages(unsigned long start, unsigned long size)
9756 {
9757 	unsigned long vmap_start, vmap_end;
9758 	struct vm_struct *area;
9759 	int ret;
9760 
9761 	area = get_vm_area(size, VM_IOREMAP);
9762 	if (!area)
9763 		return 0;
9764 
9765 	vmap_start = (unsigned long) area->addr;
9766 	vmap_end = vmap_start + size;
9767 
9768 	ret = vmap_page_range(vmap_start, vmap_end,
9769 			      start, pgprot_nx(PAGE_KERNEL));
9770 	if (ret < 0) {
9771 		free_vm_area(area);
9772 		return 0;
9773 	}
9774 
9775 	return (u64)vmap_start;
9776 }
9777 #else
map_pages(unsigned long start,unsigned long size)9778 static inline u64 map_pages(unsigned long start, unsigned long size)
9779 {
9780 	return 0;
9781 }
9782 #endif
9783 
9784 /**
9785  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9786  * @name: The name of the trace array to be looked up/created.
9787  * @systems: A list of systems to create event directories for (NULL for all)
9788  *
9789  * Returns pointer to trace array with given name.
9790  * NULL, if it cannot be created.
9791  *
9792  * NOTE: This function increments the reference counter associated with the
9793  * trace array returned. This makes sure it cannot be freed while in use.
9794  * Use trace_array_put() once the trace array is no longer needed.
9795  * If the trace_array is to be freed, trace_array_destroy() needs to
9796  * be called after the trace_array_put(), or simply let user space delete
9797  * it from the tracefs instances directory. But until the
9798  * trace_array_put() is called, user space can not delete it.
9799  *
9800  */
trace_array_get_by_name(const char * name,const char * systems)9801 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9802 {
9803 	struct trace_array *tr;
9804 
9805 	guard(mutex)(&event_mutex);
9806 	guard(mutex)(&trace_types_lock);
9807 
9808 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9809 		if (tr->name && strcmp(tr->name, name) == 0) {
9810 			/* if this fails, @tr is going to be removed. */
9811 			if (__trace_array_get(tr) < 0)
9812 				tr = NULL;
9813 			return tr;
9814 		}
9815 	}
9816 
9817 	tr = trace_array_create_systems(name, systems, 0, 0);
9818 
9819 	if (IS_ERR(tr))
9820 		tr = NULL;
9821 	else
9822 		tr->ref++;
9823 
9824 	return tr;
9825 }
9826 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9827 
__remove_instance(struct trace_array * tr)9828 static int __remove_instance(struct trace_array *tr)
9829 {
9830 	int i;
9831 
9832 	/* Reference counter for a newly created trace array = 1. */
9833 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9834 		return -EBUSY;
9835 
9836 	list_del(&tr->list);
9837 
9838 	if (printk_trace == tr)
9839 		update_printk_trace(&global_trace);
9840 
9841 	/* Must be done before disabling all the flags */
9842 	if (update_marker_trace(tr, 0))
9843 		synchronize_rcu();
9844 
9845 	/* Disable all the flags that were enabled coming in */
9846 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9847 		if ((1ULL << i) & ZEROED_TRACE_FLAGS)
9848 			set_tracer_flag(tr, 1ULL << i, 0);
9849 	}
9850 
9851 	trace_array_cancel_autoremove(tr);
9852 	tracing_set_nop(tr);
9853 	clear_ftrace_function_probes(tr);
9854 	event_trace_del_tracer(tr);
9855 	ftrace_clear_pids(tr);
9856 	ftrace_destroy_function_files(tr);
9857 	tracefs_remove(tr->dir);
9858 	free_percpu(tr->last_func_repeats);
9859 	free_trace_buffers(tr);
9860 	clear_tracing_err_log(tr);
9861 	free_tracers(tr);
9862 
9863 	if (tr->range_name) {
9864 		reserve_mem_release_by_name(tr->range_name);
9865 		kfree(tr->range_name);
9866 	}
9867 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
9868 		vfree((void *)tr->range_addr_start);
9869 
9870 	for (i = 0; i < tr->nr_topts; i++) {
9871 		kfree(tr->topts[i].topts);
9872 	}
9873 	kfree(tr->topts);
9874 
9875 	free_cpumask_var(tr->pipe_cpumask);
9876 	free_cpumask_var(tr->tracing_cpumask);
9877 	kfree_const(tr->system_names);
9878 	kfree(tr->name);
9879 	kfree(tr);
9880 
9881 	return 0;
9882 }
9883 
trace_array_destroy(struct trace_array * this_tr)9884 int trace_array_destroy(struct trace_array *this_tr)
9885 {
9886 	struct trace_array *tr;
9887 
9888 	if (!this_tr)
9889 		return -EINVAL;
9890 
9891 	guard(mutex)(&event_mutex);
9892 	guard(mutex)(&trace_types_lock);
9893 
9894 
9895 	/* Making sure trace array exists before destroying it. */
9896 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9897 		if (tr == this_tr)
9898 			return __remove_instance(tr);
9899 	}
9900 
9901 	return -ENODEV;
9902 }
9903 EXPORT_SYMBOL_GPL(trace_array_destroy);
9904 
instance_rmdir(const char * name)9905 static int instance_rmdir(const char *name)
9906 {
9907 	struct trace_array *tr;
9908 
9909 	guard(mutex)(&event_mutex);
9910 	guard(mutex)(&trace_types_lock);
9911 
9912 	tr = trace_array_find(name);
9913 	if (!tr)
9914 		return -ENODEV;
9915 
9916 	return __remove_instance(tr);
9917 }
9918 
create_trace_instances(struct dentry * d_tracer)9919 static __init void create_trace_instances(struct dentry *d_tracer)
9920 {
9921 	struct trace_array *tr;
9922 
9923 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9924 							 instance_mkdir,
9925 							 instance_rmdir);
9926 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9927 		return;
9928 
9929 	guard(mutex)(&event_mutex);
9930 	guard(mutex)(&trace_types_lock);
9931 
9932 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9933 		if (!tr->name)
9934 			continue;
9935 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9936 			     "Failed to create instance directory\n"))
9937 			return;
9938 	}
9939 }
9940 
9941 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9942 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9943 {
9944 	umode_t writable_mode = TRACE_MODE_WRITE;
9945 	int cpu;
9946 
9947 	if (trace_array_is_readonly(tr))
9948 		writable_mode = TRACE_MODE_READ;
9949 
9950 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9951 			  tr, &show_traces_fops);
9952 
9953 	trace_create_file("current_tracer", writable_mode, d_tracer,
9954 			  tr, &set_tracer_fops);
9955 
9956 	trace_create_file("tracing_cpumask", writable_mode, d_tracer,
9957 			  tr, &tracing_cpumask_fops);
9958 
9959 	/* Options are used for changing print-format even for readonly instance. */
9960 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9961 			  tr, &tracing_iter_fops);
9962 
9963 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9964 			  tr, &tracing_fops);
9965 
9966 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9967 			  tr, &tracing_pipe_fops);
9968 
9969 	trace_create_file("buffer_size_kb", writable_mode, d_tracer,
9970 			  tr, &tracing_entries_fops);
9971 
9972 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9973 			  tr, &tracing_total_entries_fops);
9974 
9975 	trace_create_file("trace_clock", writable_mode, d_tracer, tr,
9976 			  &trace_clock_fops);
9977 
9978 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9979 			  &trace_time_stamp_mode_fops);
9980 
9981 	tr->buffer_percent = 50;
9982 
9983 	trace_create_file("buffer_subbuf_size_kb", writable_mode, d_tracer,
9984 			  tr, &buffer_subbuf_size_fops);
9985 
9986 	create_trace_options_dir(tr);
9987 
9988 	if (tr->range_addr_start)
9989 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9990 				  tr, &last_boot_fops);
9991 
9992 	for_each_tracing_cpu(cpu)
9993 		tracing_init_tracefs_percpu(tr, cpu);
9994 
9995 	/* Read-only instance has above files only. */
9996 	if (trace_array_is_readonly(tr))
9997 		return;
9998 
9999 	trace_create_file("free_buffer", 0200, d_tracer,
10000 			  tr, &tracing_free_buffer_fops);
10001 
10002 	trace_create_file("trace_marker", 0220, d_tracer,
10003 			  tr, &tracing_mark_fops);
10004 
10005 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10006 
10007 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10008 			  tr, &tracing_mark_raw_fops);
10009 
10010 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10011 			  tr, &buffer_percent_fops);
10012 
10013 	trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
10014 			  tr, &tracing_syscall_buf_fops);
10015 
10016 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10017 			  tr, &rb_simple_fops);
10018 
10019 	trace_create_maxlat_file(tr, d_tracer);
10020 
10021 	if (ftrace_create_function_files(tr, d_tracer))
10022 		MEM_FAIL(1, "Could not allocate function filter files");
10023 
10024 #ifdef CONFIG_TRACER_SNAPSHOT
10025 	if (!tr->range_addr_start)
10026 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10027 				  tr, &snapshot_fops);
10028 #endif
10029 
10030 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10031 			  tr, &tracing_err_log_fops);
10032 
10033 	ftrace_init_tracefs(tr, d_tracer);
10034 }
10035 
10036 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10037 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10038 {
10039 	struct vfsmount *mnt;
10040 	struct file_system_type *type;
10041 	struct fs_context *fc;
10042 	int ret;
10043 
10044 	/*
10045 	 * To maintain backward compatibility for tools that mount
10046 	 * debugfs to get to the tracing facility, tracefs is automatically
10047 	 * mounted to the debugfs/tracing directory.
10048 	 */
10049 	type = get_fs_type("tracefs");
10050 	if (!type)
10051 		return NULL;
10052 
10053 	fc = fs_context_for_submount(type, mntpt);
10054 	put_filesystem(type);
10055 	if (IS_ERR(fc))
10056 		return ERR_CAST(fc);
10057 
10058 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10059 
10060 	ret = vfs_parse_fs_string(fc, "source", "tracefs");
10061 	if (!ret)
10062 		mnt = fc_mount(fc);
10063 	else
10064 		mnt = ERR_PTR(ret);
10065 
10066 	put_fs_context(fc);
10067 	return mnt;
10068 }
10069 #endif
10070 
10071 /**
10072  * tracing_init_dentry - initialize top level trace array
10073  *
10074  * This is called when creating files or directories in the tracing
10075  * directory. It is called via fs_initcall() by any of the boot up code
10076  * and expects to return the dentry of the top level tracing directory.
10077  */
tracing_init_dentry(void)10078 int tracing_init_dentry(void)
10079 {
10080 	struct trace_array *tr = &global_trace;
10081 
10082 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10083 		pr_warn("Tracing disabled due to lockdown\n");
10084 		return -EPERM;
10085 	}
10086 
10087 	/* The top level trace array uses  NULL as parent */
10088 	if (tr->dir)
10089 		return 0;
10090 
10091 	if (WARN_ON(!tracefs_initialized()))
10092 		return -ENODEV;
10093 
10094 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10095 	/*
10096 	 * As there may still be users that expect the tracing
10097 	 * files to exist in debugfs/tracing, we must automount
10098 	 * the tracefs file system there, so older tools still
10099 	 * work with the newer kernel.
10100 	 */
10101 	tr->dir = debugfs_create_automount("tracing", NULL,
10102 					   trace_automount, NULL);
10103 #endif
10104 
10105 	return 0;
10106 }
10107 
10108 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10109 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10110 
10111 struct workqueue_struct *trace_init_wq __initdata;
10112 static struct work_struct eval_map_work __initdata;
10113 static struct work_struct tracerfs_init_work __initdata;
10114 
eval_map_work_func(struct work_struct * work)10115 static void __init eval_map_work_func(struct work_struct *work)
10116 {
10117 	int len;
10118 
10119 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10120 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10121 }
10122 
trace_eval_init(void)10123 static int __init trace_eval_init(void)
10124 {
10125 	INIT_WORK(&eval_map_work, eval_map_work_func);
10126 
10127 	trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0);
10128 	if (!trace_init_wq) {
10129 		pr_err("Unable to allocate trace_init_wq\n");
10130 		/* Do work here */
10131 		eval_map_work_func(&eval_map_work);
10132 		return -ENOMEM;
10133 	}
10134 
10135 	queue_work(trace_init_wq, &eval_map_work);
10136 	return 0;
10137 }
10138 
10139 subsys_initcall(trace_eval_init);
10140 
trace_eval_sync(void)10141 static int __init trace_eval_sync(void)
10142 {
10143 	/* Make sure the eval map updates are finished */
10144 	if (trace_init_wq)
10145 		destroy_workqueue(trace_init_wq);
10146 	return 0;
10147 }
10148 
10149 late_initcall_sync(trace_eval_sync);
10150 
10151 
10152 #ifdef CONFIG_MODULES
10153 
module_exists(const char * module)10154 bool module_exists(const char *module)
10155 {
10156 	/* All modules have the symbol __this_module */
10157 	static const char this_mod[] = "__this_module";
10158 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10159 	unsigned long val;
10160 	int n;
10161 
10162 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10163 
10164 	if (n > sizeof(modname) - 1)
10165 		return false;
10166 
10167 	val = module_kallsyms_lookup_name(modname);
10168 	return val != 0;
10169 }
10170 
trace_module_add_evals(struct module * mod)10171 static void trace_module_add_evals(struct module *mod)
10172 {
10173 	/*
10174 	 * Modules with bad taint do not have events created, do
10175 	 * not bother with enums either.
10176 	 */
10177 	if (trace_module_has_bad_taint(mod))
10178 		return;
10179 
10180 	/* Even if no trace_evals, this need to sanitize field types. */
10181 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10182 }
10183 
10184 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10185 static void trace_module_remove_evals(struct module *mod)
10186 {
10187 	union trace_eval_map_item *map;
10188 	union trace_eval_map_item **last = &trace_eval_maps;
10189 
10190 	if (!mod->num_trace_evals)
10191 		return;
10192 
10193 	guard(mutex)(&trace_eval_mutex);
10194 
10195 	map = trace_eval_maps;
10196 
10197 	while (map) {
10198 		if (map->head.mod == mod)
10199 			break;
10200 		map = trace_eval_jmp_to_tail(map);
10201 		last = &map->tail.next;
10202 		map = map->tail.next;
10203 	}
10204 	if (!map)
10205 		return;
10206 
10207 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10208 	kfree(map);
10209 }
10210 #else
trace_module_remove_evals(struct module * mod)10211 static inline void trace_module_remove_evals(struct module *mod) { }
10212 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10213 
trace_module_record(struct module * mod,bool add)10214 static void trace_module_record(struct module *mod, bool add)
10215 {
10216 	struct trace_array *tr;
10217 	unsigned long flags;
10218 
10219 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10220 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10221 		/* Update any persistent trace array that has already been started */
10222 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10223 			guard(mutex)(&scratch_mutex);
10224 			save_mod(mod, tr);
10225 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10226 			/* Update delta if the module loaded in previous boot */
10227 			make_mod_delta(mod, tr);
10228 		}
10229 	}
10230 }
10231 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10232 static int trace_module_notify(struct notifier_block *self,
10233 			       unsigned long val, void *data)
10234 {
10235 	struct module *mod = data;
10236 
10237 	switch (val) {
10238 	case MODULE_STATE_COMING:
10239 		trace_module_add_evals(mod);
10240 		trace_module_record(mod, true);
10241 		break;
10242 	case MODULE_STATE_GOING:
10243 		trace_module_remove_evals(mod);
10244 		trace_module_record(mod, false);
10245 		break;
10246 	}
10247 
10248 	return NOTIFY_OK;
10249 }
10250 
10251 static struct notifier_block trace_module_nb = {
10252 	.notifier_call = trace_module_notify,
10253 	.priority = 0,
10254 };
10255 #endif /* CONFIG_MODULES */
10256 
tracer_init_tracefs_work_func(struct work_struct * work)10257 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10258 {
10259 
10260 	event_trace_init();
10261 
10262 	init_tracer_tracefs(&global_trace, NULL);
10263 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10264 
10265 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10266 			&global_trace, &tracing_thresh_fops);
10267 
10268 	trace_create_file("README", TRACE_MODE_READ, NULL,
10269 			NULL, &tracing_readme_fops);
10270 
10271 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10272 			NULL, &tracing_saved_cmdlines_fops);
10273 
10274 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10275 			  NULL, &tracing_saved_cmdlines_size_fops);
10276 
10277 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10278 			NULL, &tracing_saved_tgids_fops);
10279 
10280 	trace_create_eval_file(NULL);
10281 
10282 #ifdef CONFIG_MODULES
10283 	register_module_notifier(&trace_module_nb);
10284 #endif
10285 
10286 #ifdef CONFIG_DYNAMIC_FTRACE
10287 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10288 			NULL, &tracing_dyn_info_fops);
10289 #endif
10290 
10291 	create_trace_instances(NULL);
10292 
10293 	update_tracer_options();
10294 }
10295 
tracer_init_tracefs(void)10296 static __init int tracer_init_tracefs(void)
10297 {
10298 	int ret;
10299 
10300 	trace_access_lock_init();
10301 
10302 	ret = tracing_init_dentry();
10303 	if (ret)
10304 		return 0;
10305 
10306 	if (trace_init_wq) {
10307 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10308 		queue_work(trace_init_wq, &tracerfs_init_work);
10309 	} else {
10310 		tracer_init_tracefs_work_func(NULL);
10311 	}
10312 
10313 	if (rv_init_interface())
10314 		pr_err("RV: Error while creating the RV interface\n");
10315 
10316 	return 0;
10317 }
10318 
10319 fs_initcall(tracer_init_tracefs);
10320 
10321 static int trace_die_panic_handler(struct notifier_block *self,
10322 				unsigned long ev, void *unused);
10323 
10324 static struct notifier_block trace_panic_notifier = {
10325 	.notifier_call = trace_die_panic_handler,
10326 	.priority = INT_MAX - 1,
10327 };
10328 
10329 static struct notifier_block trace_die_notifier = {
10330 	.notifier_call = trace_die_panic_handler,
10331 	.priority = INT_MAX - 1,
10332 };
10333 
10334 /*
10335  * The idea is to execute the following die/panic callback early, in order
10336  * to avoid showing irrelevant information in the trace (like other panic
10337  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10338  * warnings get disabled (to prevent potential log flooding).
10339  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10340 static int trace_die_panic_handler(struct notifier_block *self,
10341 				unsigned long ev, void *unused)
10342 {
10343 	if (!ftrace_dump_on_oops_enabled())
10344 		return NOTIFY_DONE;
10345 
10346 	/* The die notifier requires DIE_OOPS to trigger */
10347 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10348 		return NOTIFY_DONE;
10349 
10350 	ftrace_dump(DUMP_PARAM);
10351 
10352 	return NOTIFY_DONE;
10353 }
10354 
10355 /*
10356  * printk is set to max of 1024, we really don't need it that big.
10357  * Nothing should be printing 1000 characters anyway.
10358  */
10359 #define TRACE_MAX_PRINT		1000
10360 
10361 /*
10362  * Define here KERN_TRACE so that we have one place to modify
10363  * it if we decide to change what log level the ftrace dump
10364  * should be at.
10365  */
10366 #define KERN_TRACE		KERN_EMERG
10367 
10368 void
trace_printk_seq(struct trace_seq * s)10369 trace_printk_seq(struct trace_seq *s)
10370 {
10371 	/* Probably should print a warning here. */
10372 	if (s->seq.len >= TRACE_MAX_PRINT)
10373 		s->seq.len = TRACE_MAX_PRINT;
10374 
10375 	/*
10376 	 * More paranoid code. Although the buffer size is set to
10377 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10378 	 * an extra layer of protection.
10379 	 */
10380 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10381 		s->seq.len = s->seq.size - 1;
10382 
10383 	/* should be zero ended, but we are paranoid. */
10384 	s->buffer[s->seq.len] = 0;
10385 
10386 	printk(KERN_TRACE "%s", s->buffer);
10387 
10388 	trace_seq_init(s);
10389 }
10390 
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10391 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10392 {
10393 	iter->tr = tr;
10394 	iter->trace = iter->tr->current_trace;
10395 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10396 	iter->array_buffer = &tr->array_buffer;
10397 
10398 	if (iter->trace && iter->trace->open)
10399 		iter->trace->open(iter);
10400 
10401 	/* Annotate start of buffers if we had overruns */
10402 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10403 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10404 
10405 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10406 	if (trace_clocks[iter->tr->clock_id].in_ns)
10407 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10408 
10409 	/* Can not use kmalloc for iter.temp and iter.fmt */
10410 	iter->temp = static_temp_buf;
10411 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10412 	iter->fmt = static_fmt_buf;
10413 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10414 }
10415 
trace_init_global_iter(struct trace_iterator * iter)10416 void trace_init_global_iter(struct trace_iterator *iter)
10417 {
10418 	trace_init_iter(iter, &global_trace);
10419 }
10420 
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10421 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10422 {
10423 	/* use static because iter can be a bit big for the stack */
10424 	static struct trace_iterator iter;
10425 	unsigned int old_userobj;
10426 	unsigned long flags;
10427 	int cnt = 0;
10428 
10429 	/*
10430 	 * Always turn off tracing when we dump.
10431 	 * We don't need to show trace output of what happens
10432 	 * between multiple crashes.
10433 	 *
10434 	 * If the user does a sysrq-z, then they can re-enable
10435 	 * tracing with echo 1 > tracing_on.
10436 	 */
10437 	tracer_tracing_off(tr);
10438 
10439 	local_irq_save(flags);
10440 
10441 	/* Simulate the iterator */
10442 	trace_init_iter(&iter, tr);
10443 
10444 	/* While dumping, do not allow the buffer to be enable */
10445 	tracer_tracing_disable(tr);
10446 
10447 	old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
10448 
10449 	/* don't look at user memory in panic mode */
10450 	tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
10451 
10452 	if (dump_mode == DUMP_ORIG)
10453 		iter.cpu_file = raw_smp_processor_id();
10454 	else
10455 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10456 
10457 	if (tr == &global_trace)
10458 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10459 	else
10460 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10461 
10462 	/* Did function tracer already get disabled? */
10463 	if (ftrace_is_dead()) {
10464 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10465 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10466 	}
10467 
10468 	/*
10469 	 * We need to stop all tracing on all CPUS to read
10470 	 * the next buffer. This is a bit expensive, but is
10471 	 * not done often. We fill all what we can read,
10472 	 * and then release the locks again.
10473 	 */
10474 
10475 	while (!trace_empty(&iter)) {
10476 
10477 		if (!cnt)
10478 			printk(KERN_TRACE "---------------------------------\n");
10479 
10480 		cnt++;
10481 
10482 		trace_iterator_reset(&iter);
10483 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10484 
10485 		if (trace_find_next_entry_inc(&iter) != NULL) {
10486 			int ret;
10487 
10488 			ret = print_trace_line(&iter);
10489 			if (ret != TRACE_TYPE_NO_CONSUME)
10490 				trace_consume(&iter);
10491 
10492 			trace_printk_seq(&iter.seq);
10493 		}
10494 		touch_nmi_watchdog();
10495 	}
10496 
10497 	if (!cnt)
10498 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10499 	else
10500 		printk(KERN_TRACE "---------------------------------\n");
10501 
10502 	tr->trace_flags |= old_userobj;
10503 
10504 	tracer_tracing_enable(tr);
10505 	local_irq_restore(flags);
10506 }
10507 
ftrace_dump_by_param(void)10508 static void ftrace_dump_by_param(void)
10509 {
10510 	bool first_param = true;
10511 	char dump_param[MAX_TRACER_SIZE];
10512 	char *buf, *token, *inst_name;
10513 	struct trace_array *tr;
10514 
10515 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10516 	buf = dump_param;
10517 
10518 	while ((token = strsep(&buf, ",")) != NULL) {
10519 		if (first_param) {
10520 			first_param = false;
10521 			if (!strcmp("0", token))
10522 				continue;
10523 			else if (!strcmp("1", token)) {
10524 				ftrace_dump_one(&global_trace, DUMP_ALL);
10525 				continue;
10526 			}
10527 			else if (!strcmp("2", token) ||
10528 			  !strcmp("orig_cpu", token)) {
10529 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10530 				continue;
10531 			}
10532 		}
10533 
10534 		inst_name = strsep(&token, "=");
10535 		tr = trace_array_find(inst_name);
10536 		if (!tr) {
10537 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10538 			continue;
10539 		}
10540 
10541 		if (token && (!strcmp("2", token) ||
10542 			  !strcmp("orig_cpu", token)))
10543 			ftrace_dump_one(tr, DUMP_ORIG);
10544 		else
10545 			ftrace_dump_one(tr, DUMP_ALL);
10546 	}
10547 }
10548 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10549 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10550 {
10551 	static atomic_t dump_running;
10552 
10553 	/* Only allow one dump user at a time. */
10554 	if (atomic_inc_return(&dump_running) != 1) {
10555 		atomic_dec(&dump_running);
10556 		return;
10557 	}
10558 
10559 	switch (oops_dump_mode) {
10560 	case DUMP_ALL:
10561 		ftrace_dump_one(&global_trace, DUMP_ALL);
10562 		break;
10563 	case DUMP_ORIG:
10564 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10565 		break;
10566 	case DUMP_PARAM:
10567 		ftrace_dump_by_param();
10568 		break;
10569 	case DUMP_NONE:
10570 		break;
10571 	default:
10572 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10573 		ftrace_dump_one(&global_trace, DUMP_ALL);
10574 	}
10575 
10576 	atomic_dec(&dump_running);
10577 }
10578 EXPORT_SYMBOL_GPL(ftrace_dump);
10579 
10580 #define WRITE_BUFSIZE  4096
10581 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10582 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10583 				size_t count, loff_t *ppos,
10584 				int (*createfn)(const char *))
10585 {
10586 	char *kbuf __free(kfree) = NULL;
10587 	char *buf, *tmp;
10588 	int ret = 0;
10589 	size_t done = 0;
10590 	size_t size;
10591 
10592 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10593 	if (!kbuf)
10594 		return -ENOMEM;
10595 
10596 	while (done < count) {
10597 		size = count - done;
10598 
10599 		if (size >= WRITE_BUFSIZE)
10600 			size = WRITE_BUFSIZE - 1;
10601 
10602 		if (copy_from_user(kbuf, buffer + done, size))
10603 			return -EFAULT;
10604 
10605 		kbuf[size] = '\0';
10606 		buf = kbuf;
10607 		do {
10608 			tmp = strchr(buf, '\n');
10609 			if (tmp) {
10610 				*tmp = '\0';
10611 				size = tmp - buf + 1;
10612 			} else {
10613 				size = strlen(buf);
10614 				if (done + size < count) {
10615 					if (buf != kbuf)
10616 						break;
10617 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10618 					pr_warn("Line length is too long: Should be less than %d\n",
10619 						WRITE_BUFSIZE - 2);
10620 					return -EINVAL;
10621 				}
10622 			}
10623 			done += size;
10624 
10625 			/* Remove comments */
10626 			tmp = strchr(buf, '#');
10627 
10628 			if (tmp)
10629 				*tmp = '\0';
10630 
10631 			ret = createfn(buf);
10632 			if (ret)
10633 				return ret;
10634 			buf += size;
10635 
10636 		} while (done < count);
10637 	}
10638 	return done;
10639 }
10640 
10641 #ifdef CONFIG_TRACER_SNAPSHOT
tr_needs_alloc_snapshot(const char * name)10642 __init static bool tr_needs_alloc_snapshot(const char *name)
10643 {
10644 	char *test;
10645 	int len = strlen(name);
10646 	bool ret;
10647 
10648 	if (!boot_snapshot_index)
10649 		return false;
10650 
10651 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10652 	    boot_snapshot_info[len] == '\t')
10653 		return true;
10654 
10655 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10656 	if (!test)
10657 		return false;
10658 
10659 	sprintf(test, "\t%s\t", name);
10660 	ret = strstr(boot_snapshot_info, test) == NULL;
10661 	kfree(test);
10662 	return ret;
10663 }
10664 
do_allocate_snapshot(const char * name)10665 __init static void do_allocate_snapshot(const char *name)
10666 {
10667 	if (!tr_needs_alloc_snapshot(name))
10668 		return;
10669 
10670 	/*
10671 	 * When allocate_snapshot is set, the next call to
10672 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10673 	 * will allocate the snapshot buffer. That will also clear
10674 	 * this flag.
10675 	 */
10676 	allocate_snapshot = true;
10677 }
10678 #else
do_allocate_snapshot(const char * name)10679 static inline void do_allocate_snapshot(const char *name) { }
10680 #endif
10681 
backup_instance_area(const char * backup,unsigned long * addr,phys_addr_t * size)10682 __init static int backup_instance_area(const char *backup,
10683 				       unsigned long *addr, phys_addr_t *size)
10684 {
10685 	struct trace_array *backup_tr;
10686 	void *allocated_vaddr = NULL;
10687 
10688 	backup_tr = trace_array_get_by_name(backup, NULL);
10689 	if (!backup_tr) {
10690 		pr_warn("Tracing: Instance %s is not found.\n", backup);
10691 		return -ENOENT;
10692 	}
10693 
10694 	if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
10695 		pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
10696 		trace_array_put(backup_tr);
10697 		return -EINVAL;
10698 	}
10699 
10700 	*size = backup_tr->range_addr_size;
10701 
10702 	allocated_vaddr = vzalloc(*size);
10703 	if (!allocated_vaddr) {
10704 		pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
10705 			backup, (unsigned long)*size);
10706 		trace_array_put(backup_tr);
10707 		return -ENOMEM;
10708 	}
10709 
10710 	memcpy(allocated_vaddr,
10711 		(void *)backup_tr->range_addr_start, (size_t)*size);
10712 	*addr = (unsigned long)allocated_vaddr;
10713 
10714 	trace_array_put(backup_tr);
10715 	return 0;
10716 }
10717 
enable_instances(void)10718 __init static void enable_instances(void)
10719 {
10720 	struct trace_array *tr;
10721 	bool memmap_area = false;
10722 	char *curr_str;
10723 	char *name;
10724 	char *str;
10725 	char *tok;
10726 
10727 	/* A tab is always appended */
10728 	boot_instance_info[boot_instance_index - 1] = '\0';
10729 	str = boot_instance_info;
10730 
10731 	while ((curr_str = strsep(&str, "\t"))) {
10732 		phys_addr_t start = 0;
10733 		phys_addr_t size = 0;
10734 		unsigned long addr = 0;
10735 		bool traceprintk = false;
10736 		bool traceoff = false;
10737 		char *flag_delim;
10738 		char *addr_delim;
10739 		char *rname __free(kfree) = NULL;
10740 		char *backup;
10741 
10742 		tok = strsep(&curr_str, ",");
10743 
10744 		name = strsep(&tok, "=");
10745 		backup = tok;
10746 
10747 		flag_delim = strchr(name, '^');
10748 		addr_delim = strchr(name, '@');
10749 
10750 		if (addr_delim)
10751 			*addr_delim++ = '\0';
10752 
10753 		if (flag_delim)
10754 			*flag_delim++ = '\0';
10755 
10756 		if (backup) {
10757 			if (backup_instance_area(backup, &addr, &size) < 0)
10758 				continue;
10759 		}
10760 
10761 		if (flag_delim) {
10762 			char *flag;
10763 
10764 			while ((flag = strsep(&flag_delim, "^"))) {
10765 				if (strcmp(flag, "traceoff") == 0) {
10766 					traceoff = true;
10767 				} else if ((strcmp(flag, "printk") == 0) ||
10768 					   (strcmp(flag, "traceprintk") == 0) ||
10769 					   (strcmp(flag, "trace_printk") == 0)) {
10770 					traceprintk = true;
10771 				} else {
10772 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10773 						flag, name);
10774 				}
10775 			}
10776 		}
10777 
10778 		tok = addr_delim;
10779 		if (tok && isdigit(*tok)) {
10780 			start = memparse(tok, &tok);
10781 			if (!start) {
10782 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10783 					name);
10784 				continue;
10785 			}
10786 			if (*tok != ':') {
10787 				pr_warn("Tracing: No size specified for instance %s\n", name);
10788 				continue;
10789 			}
10790 			tok++;
10791 			size = memparse(tok, &tok);
10792 			if (!size) {
10793 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10794 					name);
10795 				continue;
10796 			}
10797 			memmap_area = true;
10798 		} else if (tok) {
10799 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10800 				start = 0;
10801 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10802 				continue;
10803 			}
10804 			rname = kstrdup(tok, GFP_KERNEL);
10805 		}
10806 
10807 		if (start) {
10808 			/* Start and size must be page aligned */
10809 			if (start & ~PAGE_MASK) {
10810 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10811 				continue;
10812 			}
10813 			if (size & ~PAGE_MASK) {
10814 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10815 				continue;
10816 			}
10817 
10818 			if (memmap_area)
10819 				addr = map_pages(start, size);
10820 			else
10821 				addr = (unsigned long)phys_to_virt(start);
10822 			if (addr) {
10823 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10824 					name, &start, (unsigned long)size);
10825 			} else {
10826 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10827 				continue;
10828 			}
10829 		} else {
10830 			/* Only non mapped buffers have snapshot buffers */
10831 			if (IS_ENABLED(CONFIG_TRACER_SNAPSHOT))
10832 				do_allocate_snapshot(name);
10833 		}
10834 
10835 		tr = trace_array_create_systems(name, NULL, addr, size);
10836 		if (IS_ERR(tr)) {
10837 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10838 			continue;
10839 		}
10840 
10841 		if (traceoff)
10842 			tracer_tracing_off(tr);
10843 
10844 		if (traceprintk)
10845 			update_printk_trace(tr);
10846 
10847 		/*
10848 		 * memmap'd buffers can not be freed.
10849 		 */
10850 		if (memmap_area) {
10851 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10852 			tr->ref++;
10853 		}
10854 
10855 		/*
10856 		 * Backup buffers can be freed but need vfree().
10857 		 */
10858 		if (backup) {
10859 			tr->flags |= TRACE_ARRAY_FL_VMALLOC | TRACE_ARRAY_FL_RDONLY;
10860 			trace_array_start_autoremove();
10861 		}
10862 
10863 		if (start || backup) {
10864 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10865 			tr->range_name = no_free_ptr(rname);
10866 		}
10867 
10868 		/*
10869 		 * Save the events to start and enabled them after all boot instances
10870 		 * have been created.
10871 		 */
10872 		tr->boot_events = curr_str;
10873 	}
10874 
10875 	/* Enable the events after all boot instances have been created */
10876 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10877 
10878 		if (!tr->boot_events || !(*tr->boot_events)) {
10879 			tr->boot_events = NULL;
10880 			continue;
10881 		}
10882 
10883 		curr_str = tr->boot_events;
10884 
10885 		/* Clear the instance if this is a persistent buffer */
10886 		if (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)
10887 			update_last_data(tr);
10888 
10889 		while ((tok = strsep(&curr_str, ","))) {
10890 			early_enable_events(tr, tok, true);
10891 		}
10892 		tr->boot_events = NULL;
10893 	}
10894 }
10895 
tracer_alloc_buffers(void)10896 __init static int tracer_alloc_buffers(void)
10897 {
10898 	unsigned long ring_buf_size;
10899 	int ret = -ENOMEM;
10900 
10901 
10902 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10903 		pr_warn("Tracing disabled due to lockdown\n");
10904 		return -EPERM;
10905 	}
10906 
10907 	/*
10908 	 * Make sure we don't accidentally add more trace options
10909 	 * than we have bits for.
10910 	 */
10911 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10912 
10913 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10914 		return -ENOMEM;
10915 
10916 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10917 		goto out_free_buffer_mask;
10918 
10919 	/* Only allocate trace_printk buffers if a trace_printk exists */
10920 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10921 		/* Must be called before global_trace.buffer is allocated */
10922 		trace_printk_init_buffers();
10923 
10924 	/* To save memory, keep the ring buffer size to its minimum */
10925 	if (global_trace.ring_buffer_expanded)
10926 		ring_buf_size = trace_buf_size;
10927 	else
10928 		ring_buf_size = 1;
10929 
10930 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10931 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10932 
10933 	raw_spin_lock_init(&global_trace.start_lock);
10934 
10935 	/*
10936 	 * The prepare callbacks allocates some memory for the ring buffer. We
10937 	 * don't free the buffer if the CPU goes down. If we were to free
10938 	 * the buffer, then the user would lose any trace that was in the
10939 	 * buffer. The memory will be removed once the "instance" is removed.
10940 	 */
10941 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10942 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10943 				      NULL);
10944 	if (ret < 0)
10945 		goto out_free_cpumask;
10946 	/* Used for event triggers */
10947 	ret = -ENOMEM;
10948 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10949 	if (!temp_buffer)
10950 		goto out_rm_hp_state;
10951 
10952 	if (trace_create_savedcmd() < 0)
10953 		goto out_free_temp_buffer;
10954 
10955 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10956 		goto out_free_savedcmd;
10957 
10958 	/* TODO: make the number of buffers hot pluggable with CPUS */
10959 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10960 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10961 		goto out_free_pipe_cpumask;
10962 	}
10963 	if (global_trace.buffer_disabled)
10964 		tracing_off();
10965 
10966 	if (trace_boot_clock) {
10967 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10968 		if (ret < 0)
10969 			pr_warn("Trace clock %s not defined, going back to default\n",
10970 				trace_boot_clock);
10971 	}
10972 
10973 	/*
10974 	 * register_tracer() might reference current_trace, so it
10975 	 * needs to be set before we register anything. This is
10976 	 * just a bootstrap of current_trace anyway.
10977 	 */
10978 	global_trace.current_trace = &nop_trace;
10979 	global_trace.current_trace_flags = nop_trace.flags;
10980 
10981 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10982 #ifdef CONFIG_TRACER_SNAPSHOT
10983 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10984 #endif
10985 	ftrace_init_global_array_ops(&global_trace);
10986 
10987 #ifdef CONFIG_MODULES
10988 	INIT_LIST_HEAD(&global_trace.mod_events);
10989 #endif
10990 
10991 	init_trace_flags_index(&global_trace);
10992 
10993 	INIT_LIST_HEAD(&global_trace.tracers);
10994 
10995 	/* All seems OK, enable tracing */
10996 	tracing_disabled = 0;
10997 
10998 	atomic_notifier_chain_register(&panic_notifier_list,
10999 				       &trace_panic_notifier);
11000 
11001 	register_die_notifier(&trace_die_notifier);
11002 
11003 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11004 
11005 	global_trace.syscall_buf_sz = syscall_buf_size;
11006 
11007 	INIT_LIST_HEAD(&global_trace.systems);
11008 	INIT_LIST_HEAD(&global_trace.events);
11009 	INIT_LIST_HEAD(&global_trace.hist_vars);
11010 	INIT_LIST_HEAD(&global_trace.err_log);
11011 	list_add(&global_trace.marker_list, &marker_copies);
11012 	list_add(&global_trace.list, &ftrace_trace_arrays);
11013 
11014 	register_tracer(&nop_trace);
11015 
11016 	/* Function tracing may start here (via kernel command line) */
11017 	init_function_trace();
11018 
11019 	apply_trace_boot_options();
11020 
11021 	register_snapshot_cmd();
11022 
11023 	return 0;
11024 
11025 out_free_pipe_cpumask:
11026 	free_cpumask_var(global_trace.pipe_cpumask);
11027 out_free_savedcmd:
11028 	trace_free_saved_cmdlines_buffer();
11029 out_free_temp_buffer:
11030 	ring_buffer_free(temp_buffer);
11031 out_rm_hp_state:
11032 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11033 out_free_cpumask:
11034 	free_cpumask_var(global_trace.tracing_cpumask);
11035 out_free_buffer_mask:
11036 	free_cpumask_var(tracing_buffer_mask);
11037 	return ret;
11038 }
11039 
11040 #ifdef CONFIG_FUNCTION_TRACER
11041 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11042 struct trace_array *trace_get_global_array(void)
11043 {
11044 	return &global_trace;
11045 }
11046 #endif
11047 
ftrace_boot_snapshot(void)11048 void __init ftrace_boot_snapshot(void)
11049 {
11050 #ifdef CONFIG_TRACER_SNAPSHOT
11051 	struct trace_array *tr;
11052 
11053 	if (!snapshot_at_boot)
11054 		return;
11055 
11056 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11057 		if (!tr->allocated_snapshot)
11058 			continue;
11059 
11060 		tracing_snapshot_instance(tr);
11061 		trace_array_puts(tr, "** Boot snapshot taken **\n");
11062 	}
11063 #endif
11064 }
11065 
early_trace_init(void)11066 void __init early_trace_init(void)
11067 {
11068 	if (tracepoint_printk) {
11069 		tracepoint_print_iter = kzalloc_obj(*tracepoint_print_iter);
11070 		if (MEM_FAIL(!tracepoint_print_iter,
11071 			     "Failed to allocate trace iterator\n"))
11072 			tracepoint_printk = 0;
11073 		else
11074 			static_key_enable(&tracepoint_printk_key.key);
11075 	}
11076 	tracer_alloc_buffers();
11077 
11078 	init_events();
11079 }
11080 
trace_init(void)11081 void __init trace_init(void)
11082 {
11083 	trace_event_init();
11084 
11085 	if (boot_instance_index)
11086 		enable_instances();
11087 }
11088 
clear_boot_tracer(void)11089 __init static void clear_boot_tracer(void)
11090 {
11091 	/*
11092 	 * The default tracer at boot buffer is an init section.
11093 	 * This function is called in lateinit. If we did not
11094 	 * find the boot tracer, then clear it out, to prevent
11095 	 * later registration from accessing the buffer that is
11096 	 * about to be freed.
11097 	 */
11098 	if (!default_bootup_tracer)
11099 		return;
11100 
11101 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11102 	       default_bootup_tracer);
11103 	default_bootup_tracer = NULL;
11104 }
11105 
11106 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11107 __init static void tracing_set_default_clock(void)
11108 {
11109 	/* sched_clock_stable() is determined in late_initcall */
11110 	if (!trace_boot_clock && !sched_clock_stable()) {
11111 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11112 			pr_warn("Can not set tracing clock due to lockdown\n");
11113 			return;
11114 		}
11115 
11116 		printk(KERN_WARNING
11117 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11118 		       "If you want to keep using the local clock, then add:\n"
11119 		       "  \"trace_clock=local\"\n"
11120 		       "on the kernel command line\n");
11121 		tracing_set_clock(&global_trace, "global");
11122 	}
11123 }
11124 #else
tracing_set_default_clock(void)11125 static inline void tracing_set_default_clock(void) { }
11126 #endif
11127 
late_trace_init(void)11128 __init static int late_trace_init(void)
11129 {
11130 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11131 		static_key_disable(&tracepoint_printk_key.key);
11132 		tracepoint_printk = 0;
11133 	}
11134 
11135 	if (traceoff_after_boot)
11136 		tracing_off();
11137 
11138 	tracing_set_default_clock();
11139 	clear_boot_tracer();
11140 	return 0;
11141 }
11142 
11143 late_initcall_sync(late_trace_init);
11144