xref: /linux/kernel/trace/trace.c (revision 0074281bb6316108e0cff094bd4db78ab3eee236) !
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * ring buffer based function tracer
4   *
5   * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6   * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7   *
8   * Originally taken from the RT patch by:
9   *    Arnaldo Carvalho de Melo <acme@redhat.com>
10   *
11   * Based on code from the latency_tracer, that is:
12   *  Copyright (C) 2004-2006 Ingo Molnar
13   *  Copyright (C) 2004 Nadia Yvette Chambers
14   */
15  #include <linux/ring_buffer.h>
16  #include <linux/utsname.h>
17  #include <linux/stacktrace.h>
18  #include <linux/writeback.h>
19  #include <linux/kallsyms.h>
20  #include <linux/security.h>
21  #include <linux/seq_file.h>
22  #include <linux/irqflags.h>
23  #include <linux/debugfs.h>
24  #include <linux/tracefs.h>
25  #include <linux/pagemap.h>
26  #include <linux/hardirq.h>
27  #include <linux/linkage.h>
28  #include <linux/uaccess.h>
29  #include <linux/cleanup.h>
30  #include <linux/vmalloc.h>
31  #include <linux/ftrace.h>
32  #include <linux/module.h>
33  #include <linux/percpu.h>
34  #include <linux/splice.h>
35  #include <linux/kdebug.h>
36  #include <linux/string.h>
37  #include <linux/mount.h>
38  #include <linux/rwsem.h>
39  #include <linux/slab.h>
40  #include <linux/ctype.h>
41  #include <linux/init.h>
42  #include <linux/panic_notifier.h>
43  #include <linux/poll.h>
44  #include <linux/nmi.h>
45  #include <linux/fs.h>
46  #include <linux/trace.h>
47  #include <linux/sched/clock.h>
48  #include <linux/sched/rt.h>
49  #include <linux/fsnotify.h>
50  #include <linux/irq_work.h>
51  #include <linux/workqueue.h>
52  #include <linux/sort.h>
53  #include <linux/io.h> /* vmap_page_range() */
54  #include <linux/fs_context.h>
55  
56  #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57  
58  #include "trace.h"
59  #include "trace_output.h"
60  
61  #ifdef CONFIG_FTRACE_STARTUP_TEST
62  /*
63   * We need to change this state when a selftest is running.
64   * A selftest will lurk into the ring-buffer to count the
65   * entries inserted during the selftest although some concurrent
66   * insertions into the ring-buffer such as trace_printk could occurred
67   * at the same time, giving false positive or negative results.
68   */
69  static bool __read_mostly tracing_selftest_running;
70  
71  /*
72   * If boot-time tracing including tracers/events via kernel cmdline
73   * is running, we do not want to run SELFTEST.
74   */
75  bool __read_mostly tracing_selftest_disabled;
76  
disable_tracing_selftest(const char * reason)77  void __init disable_tracing_selftest(const char *reason)
78  {
79  	if (!tracing_selftest_disabled) {
80  		tracing_selftest_disabled = true;
81  		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82  	}
83  }
84  #else
85  #define tracing_selftest_running	0
86  #define tracing_selftest_disabled	0
87  #endif
88  
89  /* Pipe tracepoints to printk */
90  static struct trace_iterator *tracepoint_print_iter;
91  int tracepoint_printk;
92  static bool tracepoint_printk_stop_on_boot __initdata;
93  static bool traceoff_after_boot __initdata;
94  static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95  
96  /* For tracers that don't implement custom flags */
97  static struct tracer_opt dummy_tracer_opt[] = {
98  	{ }
99  };
100  
101  static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)102  dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103  {
104  	return 0;
105  }
106  
107  /*
108   * To prevent the comm cache from being overwritten when no
109   * tracing is active, only save the comm when a trace event
110   * occurred.
111   */
112  DEFINE_PER_CPU(bool, trace_taskinfo_save);
113  
114  /*
115   * Kill all tracing for good (never come back).
116   * It is initialized to 1 but will turn to zero if the initialization
117   * of the tracer is successful. But that is the only place that sets
118   * this back to zero.
119   */
120  static int tracing_disabled = 1;
121  
122  cpumask_var_t __read_mostly	tracing_buffer_mask;
123  
124  #define MAX_TRACER_SIZE		100
125  /*
126   * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127   *
128   * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129   * is set, then ftrace_dump is called. This will output the contents
130   * of the ftrace buffers to the console.  This is very useful for
131   * capturing traces that lead to crashes and outputing it to a
132   * serial console.
133   *
134   * It is default off, but you can enable it with either specifying
135   * "ftrace_dump_on_oops" in the kernel command line, or setting
136   * /proc/sys/kernel/ftrace_dump_on_oops
137   * Set 1 if you want to dump buffers of all CPUs
138   * Set 2 if you want to dump the buffer of the CPU that triggered oops
139   * Set instance name if you want to dump the specific trace instance
140   * Multiple instance dump is also supported, and instances are seperated
141   * by commas.
142   */
143  /* Set to string format zero to disable by default */
144  char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
145  
146  /* When set, tracing will stop when a WARN*() is hit */
147  static int __disable_trace_on_warning;
148  
149  int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
150  			     void *buffer, size_t *lenp, loff_t *ppos);
151  static const struct ctl_table trace_sysctl_table[] = {
152  	{
153  		.procname	= "ftrace_dump_on_oops",
154  		.data		= &ftrace_dump_on_oops,
155  		.maxlen		= MAX_TRACER_SIZE,
156  		.mode		= 0644,
157  		.proc_handler	= proc_dostring,
158  	},
159  	{
160  		.procname	= "traceoff_on_warning",
161  		.data		= &__disable_trace_on_warning,
162  		.maxlen		= sizeof(__disable_trace_on_warning),
163  		.mode		= 0644,
164  		.proc_handler	= proc_dointvec,
165  	},
166  	{
167  		.procname	= "tracepoint_printk",
168  		.data		= &tracepoint_printk,
169  		.maxlen		= sizeof(tracepoint_printk),
170  		.mode		= 0644,
171  		.proc_handler	= tracepoint_printk_sysctl,
172  	},
173  };
174  
init_trace_sysctls(void)175  static int __init init_trace_sysctls(void)
176  {
177  	register_sysctl_init("kernel", trace_sysctl_table);
178  	return 0;
179  }
180  subsys_initcall(init_trace_sysctls);
181  
182  #ifdef CONFIG_TRACE_EVAL_MAP_FILE
183  /* Map of enums to their values, for "eval_map" file */
184  struct trace_eval_map_head {
185  	struct module			*mod;
186  	unsigned long			length;
187  };
188  
189  union trace_eval_map_item;
190  
191  struct trace_eval_map_tail {
192  	/*
193  	 * "end" is first and points to NULL as it must be different
194  	 * than "mod" or "eval_string"
195  	 */
196  	union trace_eval_map_item	*next;
197  	const char			*end;	/* points to NULL */
198  };
199  
200  static DEFINE_MUTEX(trace_eval_mutex);
201  
202  /*
203   * The trace_eval_maps are saved in an array with two extra elements,
204   * one at the beginning, and one at the end. The beginning item contains
205   * the count of the saved maps (head.length), and the module they
206   * belong to if not built in (head.mod). The ending item contains a
207   * pointer to the next array of saved eval_map items.
208   */
209  union trace_eval_map_item {
210  	struct trace_eval_map		map;
211  	struct trace_eval_map_head	head;
212  	struct trace_eval_map_tail	tail;
213  };
214  
215  static union trace_eval_map_item *trace_eval_maps;
216  #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
217  
218  int tracing_set_tracer(struct trace_array *tr, const char *buf);
219  static void ftrace_trace_userstack(struct trace_array *tr,
220  				   struct trace_buffer *buffer,
221  				   unsigned int trace_ctx);
222  
223  static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
224  static char *default_bootup_tracer;
225  
226  static bool allocate_snapshot;
227  static bool snapshot_at_boot;
228  
229  static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
230  static int boot_instance_index;
231  
232  static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
233  static int boot_snapshot_index;
234  
set_cmdline_ftrace(char * str)235  static int __init set_cmdline_ftrace(char *str)
236  {
237  	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
238  	default_bootup_tracer = bootup_tracer_buf;
239  	/* We are using ftrace early, expand it */
240  	trace_set_ring_buffer_expanded(NULL);
241  	return 1;
242  }
243  __setup("ftrace=", set_cmdline_ftrace);
244  
ftrace_dump_on_oops_enabled(void)245  int ftrace_dump_on_oops_enabled(void)
246  {
247  	if (!strcmp("0", ftrace_dump_on_oops))
248  		return 0;
249  	else
250  		return 1;
251  }
252  
set_ftrace_dump_on_oops(char * str)253  static int __init set_ftrace_dump_on_oops(char *str)
254  {
255  	if (!*str) {
256  		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
257  		return 1;
258  	}
259  
260  	if (*str == ',') {
261  		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
262  		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
263  		return 1;
264  	}
265  
266  	if (*str++ == '=') {
267  		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
268  		return 1;
269  	}
270  
271  	return 0;
272  }
273  __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
274  
stop_trace_on_warning(char * str)275  static int __init stop_trace_on_warning(char *str)
276  {
277  	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
278  		__disable_trace_on_warning = 1;
279  	return 1;
280  }
281  __setup("traceoff_on_warning", stop_trace_on_warning);
282  
boot_alloc_snapshot(char * str)283  static int __init boot_alloc_snapshot(char *str)
284  {
285  	char *slot = boot_snapshot_info + boot_snapshot_index;
286  	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
287  	int ret;
288  
289  	if (str[0] == '=') {
290  		str++;
291  		if (strlen(str) >= left)
292  			return -1;
293  
294  		ret = snprintf(slot, left, "%s\t", str);
295  		boot_snapshot_index += ret;
296  	} else {
297  		allocate_snapshot = true;
298  		/* We also need the main ring buffer expanded */
299  		trace_set_ring_buffer_expanded(NULL);
300  	}
301  	return 1;
302  }
303  __setup("alloc_snapshot", boot_alloc_snapshot);
304  
305  
boot_snapshot(char * str)306  static int __init boot_snapshot(char *str)
307  {
308  	snapshot_at_boot = true;
309  	boot_alloc_snapshot(str);
310  	return 1;
311  }
312  __setup("ftrace_boot_snapshot", boot_snapshot);
313  
314  
boot_instance(char * str)315  static int __init boot_instance(char *str)
316  {
317  	char *slot = boot_instance_info + boot_instance_index;
318  	int left = sizeof(boot_instance_info) - boot_instance_index;
319  	int ret;
320  
321  	if (strlen(str) >= left)
322  		return -1;
323  
324  	ret = snprintf(slot, left, "%s\t", str);
325  	boot_instance_index += ret;
326  
327  	return 1;
328  }
329  __setup("trace_instance=", boot_instance);
330  
331  
332  static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
333  
set_trace_boot_options(char * str)334  static int __init set_trace_boot_options(char *str)
335  {
336  	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
337  	return 1;
338  }
339  __setup("trace_options=", set_trace_boot_options);
340  
341  static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
342  static char *trace_boot_clock __initdata;
343  
set_trace_boot_clock(char * str)344  static int __init set_trace_boot_clock(char *str)
345  {
346  	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
347  	trace_boot_clock = trace_boot_clock_buf;
348  	return 1;
349  }
350  __setup("trace_clock=", set_trace_boot_clock);
351  
set_tracepoint_printk(char * str)352  static int __init set_tracepoint_printk(char *str)
353  {
354  	/* Ignore the "tp_printk_stop_on_boot" param */
355  	if (*str == '_')
356  		return 0;
357  
358  	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
359  		tracepoint_printk = 1;
360  	return 1;
361  }
362  __setup("tp_printk", set_tracepoint_printk);
363  
set_tracepoint_printk_stop(char * str)364  static int __init set_tracepoint_printk_stop(char *str)
365  {
366  	tracepoint_printk_stop_on_boot = true;
367  	return 1;
368  }
369  __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
370  
set_traceoff_after_boot(char * str)371  static int __init set_traceoff_after_boot(char *str)
372  {
373  	traceoff_after_boot = true;
374  	return 1;
375  }
376  __setup("traceoff_after_boot", set_traceoff_after_boot);
377  
ns2usecs(u64 nsec)378  unsigned long long ns2usecs(u64 nsec)
379  {
380  	nsec += 500;
381  	do_div(nsec, 1000);
382  	return nsec;
383  }
384  
385  static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)386  trace_process_export(struct trace_export *export,
387  	       struct ring_buffer_event *event, int flag)
388  {
389  	struct trace_entry *entry;
390  	unsigned int size = 0;
391  
392  	if (export->flags & flag) {
393  		entry = ring_buffer_event_data(event);
394  		size = ring_buffer_event_length(event);
395  		export->write(export, entry, size);
396  	}
397  }
398  
399  static DEFINE_MUTEX(ftrace_export_lock);
400  
401  static struct trace_export __rcu *ftrace_exports_list __read_mostly;
402  
403  static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
404  static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
405  static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
406  
ftrace_exports_enable(struct trace_export * export)407  static inline void ftrace_exports_enable(struct trace_export *export)
408  {
409  	if (export->flags & TRACE_EXPORT_FUNCTION)
410  		static_branch_inc(&trace_function_exports_enabled);
411  
412  	if (export->flags & TRACE_EXPORT_EVENT)
413  		static_branch_inc(&trace_event_exports_enabled);
414  
415  	if (export->flags & TRACE_EXPORT_MARKER)
416  		static_branch_inc(&trace_marker_exports_enabled);
417  }
418  
ftrace_exports_disable(struct trace_export * export)419  static inline void ftrace_exports_disable(struct trace_export *export)
420  {
421  	if (export->flags & TRACE_EXPORT_FUNCTION)
422  		static_branch_dec(&trace_function_exports_enabled);
423  
424  	if (export->flags & TRACE_EXPORT_EVENT)
425  		static_branch_dec(&trace_event_exports_enabled);
426  
427  	if (export->flags & TRACE_EXPORT_MARKER)
428  		static_branch_dec(&trace_marker_exports_enabled);
429  }
430  
ftrace_exports(struct ring_buffer_event * event,int flag)431  static void ftrace_exports(struct ring_buffer_event *event, int flag)
432  {
433  	struct trace_export *export;
434  
435  	guard(preempt_notrace)();
436  
437  	export = rcu_dereference_raw_check(ftrace_exports_list);
438  	while (export) {
439  		trace_process_export(export, event, flag);
440  		export = rcu_dereference_raw_check(export->next);
441  	}
442  }
443  
444  static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)445  add_trace_export(struct trace_export **list, struct trace_export *export)
446  {
447  	rcu_assign_pointer(export->next, *list);
448  	/*
449  	 * We are entering export into the list but another
450  	 * CPU might be walking that list. We need to make sure
451  	 * the export->next pointer is valid before another CPU sees
452  	 * the export pointer included into the list.
453  	 */
454  	rcu_assign_pointer(*list, export);
455  }
456  
457  static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)458  rm_trace_export(struct trace_export **list, struct trace_export *export)
459  {
460  	struct trace_export **p;
461  
462  	for (p = list; *p != NULL; p = &(*p)->next)
463  		if (*p == export)
464  			break;
465  
466  	if (*p != export)
467  		return -1;
468  
469  	rcu_assign_pointer(*p, (*p)->next);
470  
471  	return 0;
472  }
473  
474  static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)475  add_ftrace_export(struct trace_export **list, struct trace_export *export)
476  {
477  	ftrace_exports_enable(export);
478  
479  	add_trace_export(list, export);
480  }
481  
482  static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)483  rm_ftrace_export(struct trace_export **list, struct trace_export *export)
484  {
485  	int ret;
486  
487  	ret = rm_trace_export(list, export);
488  	ftrace_exports_disable(export);
489  
490  	return ret;
491  }
492  
register_ftrace_export(struct trace_export * export)493  int register_ftrace_export(struct trace_export *export)
494  {
495  	if (WARN_ON_ONCE(!export->write))
496  		return -1;
497  
498  	guard(mutex)(&ftrace_export_lock);
499  
500  	add_ftrace_export(&ftrace_exports_list, export);
501  
502  	return 0;
503  }
504  EXPORT_SYMBOL_GPL(register_ftrace_export);
505  
unregister_ftrace_export(struct trace_export * export)506  int unregister_ftrace_export(struct trace_export *export)
507  {
508  	guard(mutex)(&ftrace_export_lock);
509  	return rm_ftrace_export(&ftrace_exports_list, export);
510  }
511  EXPORT_SYMBOL_GPL(unregister_ftrace_export);
512  
513  /* trace_flags holds trace_options default values */
514  #define TRACE_DEFAULT_FLAGS						\
515  	(FUNCTION_DEFAULT_FLAGS |					\
516  	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
517  	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
518  	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
519  	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
520  	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK |		\
521  	 TRACE_ITER_COPY_MARKER)
522  
523  /* trace_options that are only supported by global_trace */
524  #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
525  	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
526  
527  /* trace_flags that are default zero for instances */
528  #define ZEROED_TRACE_FLAGS \
529  	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
530  	 TRACE_ITER_COPY_MARKER)
531  
532  /*
533   * The global_trace is the descriptor that holds the top-level tracing
534   * buffers for the live tracing.
535   */
536  static struct trace_array global_trace = {
537  	.trace_flags = TRACE_DEFAULT_FLAGS,
538  };
539  
540  static struct trace_array *printk_trace = &global_trace;
541  
542  /* List of trace_arrays interested in the top level trace_marker */
543  static LIST_HEAD(marker_copies);
544  
printk_binsafe(struct trace_array * tr)545  static __always_inline bool printk_binsafe(struct trace_array *tr)
546  {
547  	/*
548  	 * The binary format of traceprintk can cause a crash if used
549  	 * by a buffer from another boot. Force the use of the
550  	 * non binary version of trace_printk if the trace_printk
551  	 * buffer is a boot mapped ring buffer.
552  	 */
553  	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
554  }
555  
update_printk_trace(struct trace_array * tr)556  static void update_printk_trace(struct trace_array *tr)
557  {
558  	if (printk_trace == tr)
559  		return;
560  
561  	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
562  	printk_trace = tr;
563  	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
564  }
565  
566  /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)567  static bool update_marker_trace(struct trace_array *tr, int enabled)
568  {
569  	lockdep_assert_held(&event_mutex);
570  
571  	if (enabled) {
572  		if (!list_empty(&tr->marker_list))
573  			return false;
574  
575  		list_add_rcu(&tr->marker_list, &marker_copies);
576  		tr->trace_flags |= TRACE_ITER_COPY_MARKER;
577  		return true;
578  	}
579  
580  	if (list_empty(&tr->marker_list))
581  		return false;
582  
583  	list_del_init(&tr->marker_list);
584  	tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
585  	return true;
586  }
587  
trace_set_ring_buffer_expanded(struct trace_array * tr)588  void trace_set_ring_buffer_expanded(struct trace_array *tr)
589  {
590  	if (!tr)
591  		tr = &global_trace;
592  	tr->ring_buffer_expanded = true;
593  }
594  
595  LIST_HEAD(ftrace_trace_arrays);
596  
trace_array_get(struct trace_array * this_tr)597  int trace_array_get(struct trace_array *this_tr)
598  {
599  	struct trace_array *tr;
600  
601  	guard(mutex)(&trace_types_lock);
602  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
603  		if (tr == this_tr) {
604  			tr->ref++;
605  			return 0;
606  		}
607  	}
608  
609  	return -ENODEV;
610  }
611  
__trace_array_put(struct trace_array * this_tr)612  static void __trace_array_put(struct trace_array *this_tr)
613  {
614  	WARN_ON(!this_tr->ref);
615  	this_tr->ref--;
616  }
617  
618  /**
619   * trace_array_put - Decrement the reference counter for this trace array.
620   * @this_tr : pointer to the trace array
621   *
622   * NOTE: Use this when we no longer need the trace array returned by
623   * trace_array_get_by_name(). This ensures the trace array can be later
624   * destroyed.
625   *
626   */
trace_array_put(struct trace_array * this_tr)627  void trace_array_put(struct trace_array *this_tr)
628  {
629  	if (!this_tr)
630  		return;
631  
632  	guard(mutex)(&trace_types_lock);
633  	__trace_array_put(this_tr);
634  }
635  EXPORT_SYMBOL_GPL(trace_array_put);
636  
tracing_check_open_get_tr(struct trace_array * tr)637  int tracing_check_open_get_tr(struct trace_array *tr)
638  {
639  	int ret;
640  
641  	ret = security_locked_down(LOCKDOWN_TRACEFS);
642  	if (ret)
643  		return ret;
644  
645  	if (tracing_disabled)
646  		return -ENODEV;
647  
648  	if (tr && trace_array_get(tr) < 0)
649  		return -ENODEV;
650  
651  	return 0;
652  }
653  
654  /**
655   * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
656   * @filtered_pids: The list of pids to check
657   * @search_pid: The PID to find in @filtered_pids
658   *
659   * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
660   */
661  bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)662  trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
663  {
664  	return trace_pid_list_is_set(filtered_pids, search_pid);
665  }
666  
667  /**
668   * trace_ignore_this_task - should a task be ignored for tracing
669   * @filtered_pids: The list of pids to check
670   * @filtered_no_pids: The list of pids not to be traced
671   * @task: The task that should be ignored if not filtered
672   *
673   * Checks if @task should be traced or not from @filtered_pids.
674   * Returns true if @task should *NOT* be traced.
675   * Returns false if @task should be traced.
676   */
677  bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)678  trace_ignore_this_task(struct trace_pid_list *filtered_pids,
679  		       struct trace_pid_list *filtered_no_pids,
680  		       struct task_struct *task)
681  {
682  	/*
683  	 * If filtered_no_pids is not empty, and the task's pid is listed
684  	 * in filtered_no_pids, then return true.
685  	 * Otherwise, if filtered_pids is empty, that means we can
686  	 * trace all tasks. If it has content, then only trace pids
687  	 * within filtered_pids.
688  	 */
689  
690  	return (filtered_pids &&
691  		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
692  		(filtered_no_pids &&
693  		 trace_find_filtered_pid(filtered_no_pids, task->pid));
694  }
695  
696  /**
697   * trace_filter_add_remove_task - Add or remove a task from a pid_list
698   * @pid_list: The list to modify
699   * @self: The current task for fork or NULL for exit
700   * @task: The task to add or remove
701   *
702   * If adding a task, if @self is defined, the task is only added if @self
703   * is also included in @pid_list. This happens on fork and tasks should
704   * only be added when the parent is listed. If @self is NULL, then the
705   * @task pid will be removed from the list, which would happen on exit
706   * of a task.
707   */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)708  void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
709  				  struct task_struct *self,
710  				  struct task_struct *task)
711  {
712  	if (!pid_list)
713  		return;
714  
715  	/* For forks, we only add if the forking task is listed */
716  	if (self) {
717  		if (!trace_find_filtered_pid(pid_list, self->pid))
718  			return;
719  	}
720  
721  	/* "self" is set for forks, and NULL for exits */
722  	if (self)
723  		trace_pid_list_set(pid_list, task->pid);
724  	else
725  		trace_pid_list_clear(pid_list, task->pid);
726  }
727  
728  /**
729   * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
730   * @pid_list: The pid list to show
731   * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
732   * @pos: The position of the file
733   *
734   * This is used by the seq_file "next" operation to iterate the pids
735   * listed in a trace_pid_list structure.
736   *
737   * Returns the pid+1 as we want to display pid of zero, but NULL would
738   * stop the iteration.
739   */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)740  void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
741  {
742  	long pid = (unsigned long)v;
743  	unsigned int next;
744  
745  	(*pos)++;
746  
747  	/* pid already is +1 of the actual previous bit */
748  	if (trace_pid_list_next(pid_list, pid, &next) < 0)
749  		return NULL;
750  
751  	pid = next;
752  
753  	/* Return pid + 1 to allow zero to be represented */
754  	return (void *)(pid + 1);
755  }
756  
757  /**
758   * trace_pid_start - Used for seq_file to start reading pid lists
759   * @pid_list: The pid list to show
760   * @pos: The position of the file
761   *
762   * This is used by seq_file "start" operation to start the iteration
763   * of listing pids.
764   *
765   * Returns the pid+1 as we want to display pid of zero, but NULL would
766   * stop the iteration.
767   */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)768  void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
769  {
770  	unsigned long pid;
771  	unsigned int first;
772  	loff_t l = 0;
773  
774  	if (trace_pid_list_first(pid_list, &first) < 0)
775  		return NULL;
776  
777  	pid = first;
778  
779  	/* Return pid + 1 so that zero can be the exit value */
780  	for (pid++; pid && l < *pos;
781  	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
782  		;
783  	return (void *)pid;
784  }
785  
786  /**
787   * trace_pid_show - show the current pid in seq_file processing
788   * @m: The seq_file structure to write into
789   * @v: A void pointer of the pid (+1) value to display
790   *
791   * Can be directly used by seq_file operations to display the current
792   * pid value.
793   */
trace_pid_show(struct seq_file * m,void * v)794  int trace_pid_show(struct seq_file *m, void *v)
795  {
796  	unsigned long pid = (unsigned long)v - 1;
797  
798  	seq_printf(m, "%lu\n", pid);
799  	return 0;
800  }
801  
802  /* 128 should be much more than enough */
803  #define PID_BUF_SIZE		127
804  
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)805  int trace_pid_write(struct trace_pid_list *filtered_pids,
806  		    struct trace_pid_list **new_pid_list,
807  		    const char __user *ubuf, size_t cnt)
808  {
809  	struct trace_pid_list *pid_list;
810  	struct trace_parser parser;
811  	unsigned long val;
812  	int nr_pids = 0;
813  	ssize_t read = 0;
814  	ssize_t ret;
815  	loff_t pos;
816  	pid_t pid;
817  
818  	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
819  		return -ENOMEM;
820  
821  	/*
822  	 * Always recreate a new array. The write is an all or nothing
823  	 * operation. Always create a new array when adding new pids by
824  	 * the user. If the operation fails, then the current list is
825  	 * not modified.
826  	 */
827  	pid_list = trace_pid_list_alloc();
828  	if (!pid_list) {
829  		trace_parser_put(&parser);
830  		return -ENOMEM;
831  	}
832  
833  	if (filtered_pids) {
834  		/* copy the current bits to the new max */
835  		ret = trace_pid_list_first(filtered_pids, &pid);
836  		while (!ret) {
837  			trace_pid_list_set(pid_list, pid);
838  			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
839  			nr_pids++;
840  		}
841  	}
842  
843  	ret = 0;
844  	while (cnt > 0) {
845  
846  		pos = 0;
847  
848  		ret = trace_get_user(&parser, ubuf, cnt, &pos);
849  		if (ret < 0)
850  			break;
851  
852  		read += ret;
853  		ubuf += ret;
854  		cnt -= ret;
855  
856  		if (!trace_parser_loaded(&parser))
857  			break;
858  
859  		ret = -EINVAL;
860  		if (kstrtoul(parser.buffer, 0, &val))
861  			break;
862  
863  		pid = (pid_t)val;
864  
865  		if (trace_pid_list_set(pid_list, pid) < 0) {
866  			ret = -1;
867  			break;
868  		}
869  		nr_pids++;
870  
871  		trace_parser_clear(&parser);
872  		ret = 0;
873  	}
874  	trace_parser_put(&parser);
875  
876  	if (ret < 0) {
877  		trace_pid_list_free(pid_list);
878  		return ret;
879  	}
880  
881  	if (!nr_pids) {
882  		/* Cleared the list of pids */
883  		trace_pid_list_free(pid_list);
884  		pid_list = NULL;
885  	}
886  
887  	*new_pid_list = pid_list;
888  
889  	return read;
890  }
891  
buffer_ftrace_now(struct array_buffer * buf,int cpu)892  static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
893  {
894  	u64 ts;
895  
896  	/* Early boot up does not have a buffer yet */
897  	if (!buf->buffer)
898  		return trace_clock_local();
899  
900  	ts = ring_buffer_time_stamp(buf->buffer);
901  	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
902  
903  	return ts;
904  }
905  
ftrace_now(int cpu)906  u64 ftrace_now(int cpu)
907  {
908  	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
909  }
910  
911  /**
912   * tracing_is_enabled - Show if global_trace has been enabled
913   *
914   * Shows if the global trace has been enabled or not. It uses the
915   * mirror flag "buffer_disabled" to be used in fast paths such as for
916   * the irqsoff tracer. But it may be inaccurate due to races. If you
917   * need to know the accurate state, use tracing_is_on() which is a little
918   * slower, but accurate.
919   */
tracing_is_enabled(void)920  int tracing_is_enabled(void)
921  {
922  	/*
923  	 * For quick access (irqsoff uses this in fast path), just
924  	 * return the mirror variable of the state of the ring buffer.
925  	 * It's a little racy, but we don't really care.
926  	 */
927  	return !global_trace.buffer_disabled;
928  }
929  
930  /*
931   * trace_buf_size is the size in bytes that is allocated
932   * for a buffer. Note, the number of bytes is always rounded
933   * to page size.
934   *
935   * This number is purposely set to a low number of 16384.
936   * If the dump on oops happens, it will be much appreciated
937   * to not have to wait for all that output. Anyway this can be
938   * boot time and run time configurable.
939   */
940  #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
941  
942  static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
943  
944  /* trace_types holds a link list of available tracers. */
945  static struct tracer		*trace_types __read_mostly;
946  
947  /*
948   * trace_types_lock is used to protect the trace_types list.
949   */
950  DEFINE_MUTEX(trace_types_lock);
951  
952  /*
953   * serialize the access of the ring buffer
954   *
955   * ring buffer serializes readers, but it is low level protection.
956   * The validity of the events (which returns by ring_buffer_peek() ..etc)
957   * are not protected by ring buffer.
958   *
959   * The content of events may become garbage if we allow other process consumes
960   * these events concurrently:
961   *   A) the page of the consumed events may become a normal page
962   *      (not reader page) in ring buffer, and this page will be rewritten
963   *      by events producer.
964   *   B) The page of the consumed events may become a page for splice_read,
965   *      and this page will be returned to system.
966   *
967   * These primitives allow multi process access to different cpu ring buffer
968   * concurrently.
969   *
970   * These primitives don't distinguish read-only and read-consume access.
971   * Multi read-only access are also serialized.
972   */
973  
974  #ifdef CONFIG_SMP
975  static DECLARE_RWSEM(all_cpu_access_lock);
976  static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
977  
trace_access_lock(int cpu)978  static inline void trace_access_lock(int cpu)
979  {
980  	if (cpu == RING_BUFFER_ALL_CPUS) {
981  		/* gain it for accessing the whole ring buffer. */
982  		down_write(&all_cpu_access_lock);
983  	} else {
984  		/* gain it for accessing a cpu ring buffer. */
985  
986  		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
987  		down_read(&all_cpu_access_lock);
988  
989  		/* Secondly block other access to this @cpu ring buffer. */
990  		mutex_lock(&per_cpu(cpu_access_lock, cpu));
991  	}
992  }
993  
trace_access_unlock(int cpu)994  static inline void trace_access_unlock(int cpu)
995  {
996  	if (cpu == RING_BUFFER_ALL_CPUS) {
997  		up_write(&all_cpu_access_lock);
998  	} else {
999  		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1000  		up_read(&all_cpu_access_lock);
1001  	}
1002  }
1003  
trace_access_lock_init(void)1004  static inline void trace_access_lock_init(void)
1005  {
1006  	int cpu;
1007  
1008  	for_each_possible_cpu(cpu)
1009  		mutex_init(&per_cpu(cpu_access_lock, cpu));
1010  }
1011  
1012  #else
1013  
1014  static DEFINE_MUTEX(access_lock);
1015  
trace_access_lock(int cpu)1016  static inline void trace_access_lock(int cpu)
1017  {
1018  	(void)cpu;
1019  	mutex_lock(&access_lock);
1020  }
1021  
trace_access_unlock(int cpu)1022  static inline void trace_access_unlock(int cpu)
1023  {
1024  	(void)cpu;
1025  	mutex_unlock(&access_lock);
1026  }
1027  
trace_access_lock_init(void)1028  static inline void trace_access_lock_init(void)
1029  {
1030  }
1031  
1032  #endif
1033  
1034  #ifdef CONFIG_STACKTRACE
1035  static void __ftrace_trace_stack(struct trace_array *tr,
1036  				 struct trace_buffer *buffer,
1037  				 unsigned int trace_ctx,
1038  				 int skip, struct pt_regs *regs);
1039  static inline void ftrace_trace_stack(struct trace_array *tr,
1040  				      struct trace_buffer *buffer,
1041  				      unsigned int trace_ctx,
1042  				      int skip, struct pt_regs *regs);
1043  
1044  #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1045  static inline void __ftrace_trace_stack(struct trace_array *tr,
1046  					struct trace_buffer *buffer,
1047  					unsigned int trace_ctx,
1048  					int skip, struct pt_regs *regs)
1049  {
1050  }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1051  static inline void ftrace_trace_stack(struct trace_array *tr,
1052  				      struct trace_buffer *buffer,
1053  				      unsigned long trace_ctx,
1054  				      int skip, struct pt_regs *regs)
1055  {
1056  }
1057  
1058  #endif
1059  
1060  static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1061  trace_event_setup(struct ring_buffer_event *event,
1062  		  int type, unsigned int trace_ctx)
1063  {
1064  	struct trace_entry *ent = ring_buffer_event_data(event);
1065  
1066  	tracing_generic_entry_update(ent, type, trace_ctx);
1067  }
1068  
1069  static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1070  __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1071  			  int type,
1072  			  unsigned long len,
1073  			  unsigned int trace_ctx)
1074  {
1075  	struct ring_buffer_event *event;
1076  
1077  	event = ring_buffer_lock_reserve(buffer, len);
1078  	if (event != NULL)
1079  		trace_event_setup(event, type, trace_ctx);
1080  
1081  	return event;
1082  }
1083  
tracer_tracing_on(struct trace_array * tr)1084  void tracer_tracing_on(struct trace_array *tr)
1085  {
1086  	if (tr->array_buffer.buffer)
1087  		ring_buffer_record_on(tr->array_buffer.buffer);
1088  	/*
1089  	 * This flag is looked at when buffers haven't been allocated
1090  	 * yet, or by some tracers (like irqsoff), that just want to
1091  	 * know if the ring buffer has been disabled, but it can handle
1092  	 * races of where it gets disabled but we still do a record.
1093  	 * As the check is in the fast path of the tracers, it is more
1094  	 * important to be fast than accurate.
1095  	 */
1096  	tr->buffer_disabled = 0;
1097  }
1098  
1099  /**
1100   * tracing_on - enable tracing buffers
1101   *
1102   * This function enables tracing buffers that may have been
1103   * disabled with tracing_off.
1104   */
tracing_on(void)1105  void tracing_on(void)
1106  {
1107  	tracer_tracing_on(&global_trace);
1108  }
1109  EXPORT_SYMBOL_GPL(tracing_on);
1110  
1111  
1112  static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1113  __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1114  {
1115  	__this_cpu_write(trace_taskinfo_save, true);
1116  
1117  	/* If this is the temp buffer, we need to commit fully */
1118  	if (this_cpu_read(trace_buffered_event) == event) {
1119  		/* Length is in event->array[0] */
1120  		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1121  		/* Release the temp buffer */
1122  		this_cpu_dec(trace_buffered_event_cnt);
1123  		/* ring_buffer_unlock_commit() enables preemption */
1124  		preempt_enable_notrace();
1125  	} else
1126  		ring_buffer_unlock_commit(buffer);
1127  }
1128  
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1129  int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1130  		       const char *str, int size)
1131  {
1132  	struct ring_buffer_event *event;
1133  	struct trace_buffer *buffer;
1134  	struct print_entry *entry;
1135  	unsigned int trace_ctx;
1136  	int alloc;
1137  
1138  	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1139  		return 0;
1140  
1141  	if (unlikely(tracing_selftest_running && tr == &global_trace))
1142  		return 0;
1143  
1144  	if (unlikely(tracing_disabled))
1145  		return 0;
1146  
1147  	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1148  
1149  	trace_ctx = tracing_gen_ctx();
1150  	buffer = tr->array_buffer.buffer;
1151  	guard(ring_buffer_nest)(buffer);
1152  	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1153  					    trace_ctx);
1154  	if (!event)
1155  		return 0;
1156  
1157  	entry = ring_buffer_event_data(event);
1158  	entry->ip = ip;
1159  
1160  	memcpy(&entry->buf, str, size);
1161  
1162  	/* Add a newline if necessary */
1163  	if (entry->buf[size - 1] != '\n') {
1164  		entry->buf[size] = '\n';
1165  		entry->buf[size + 1] = '\0';
1166  	} else
1167  		entry->buf[size] = '\0';
1168  
1169  	__buffer_unlock_commit(buffer, event);
1170  	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1171  	return size;
1172  }
1173  EXPORT_SYMBOL_GPL(__trace_array_puts);
1174  
1175  /**
1176   * __trace_puts - write a constant string into the trace buffer.
1177   * @ip:	   The address of the caller
1178   * @str:   The constant string to write
1179   * @size:  The size of the string.
1180   */
__trace_puts(unsigned long ip,const char * str,int size)1181  int __trace_puts(unsigned long ip, const char *str, int size)
1182  {
1183  	return __trace_array_puts(printk_trace, ip, str, size);
1184  }
1185  EXPORT_SYMBOL_GPL(__trace_puts);
1186  
1187  /**
1188   * __trace_bputs - write the pointer to a constant string into trace buffer
1189   * @ip:	   The address of the caller
1190   * @str:   The constant string to write to the buffer to
1191   */
__trace_bputs(unsigned long ip,const char * str)1192  int __trace_bputs(unsigned long ip, const char *str)
1193  {
1194  	struct trace_array *tr = READ_ONCE(printk_trace);
1195  	struct ring_buffer_event *event;
1196  	struct trace_buffer *buffer;
1197  	struct bputs_entry *entry;
1198  	unsigned int trace_ctx;
1199  	int size = sizeof(struct bputs_entry);
1200  
1201  	if (!printk_binsafe(tr))
1202  		return __trace_puts(ip, str, strlen(str));
1203  
1204  	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1205  		return 0;
1206  
1207  	if (unlikely(tracing_selftest_running || tracing_disabled))
1208  		return 0;
1209  
1210  	trace_ctx = tracing_gen_ctx();
1211  	buffer = tr->array_buffer.buffer;
1212  
1213  	guard(ring_buffer_nest)(buffer);
1214  	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1215  					    trace_ctx);
1216  	if (!event)
1217  		return 0;
1218  
1219  	entry = ring_buffer_event_data(event);
1220  	entry->ip			= ip;
1221  	entry->str			= str;
1222  
1223  	__buffer_unlock_commit(buffer, event);
1224  	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1225  
1226  	return 1;
1227  }
1228  EXPORT_SYMBOL_GPL(__trace_bputs);
1229  
1230  #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1231  static void tracing_snapshot_instance_cond(struct trace_array *tr,
1232  					   void *cond_data)
1233  {
1234  	struct tracer *tracer = tr->current_trace;
1235  	unsigned long flags;
1236  
1237  	if (in_nmi()) {
1238  		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1239  		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1240  		return;
1241  	}
1242  
1243  	if (!tr->allocated_snapshot) {
1244  		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1245  		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1246  		tracer_tracing_off(tr);
1247  		return;
1248  	}
1249  
1250  	/* Note, snapshot can not be used when the tracer uses it */
1251  	if (tracer->use_max_tr) {
1252  		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1253  		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1254  		return;
1255  	}
1256  
1257  	if (tr->mapped) {
1258  		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1259  		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1260  		return;
1261  	}
1262  
1263  	local_irq_save(flags);
1264  	update_max_tr(tr, current, smp_processor_id(), cond_data);
1265  	local_irq_restore(flags);
1266  }
1267  
tracing_snapshot_instance(struct trace_array * tr)1268  void tracing_snapshot_instance(struct trace_array *tr)
1269  {
1270  	tracing_snapshot_instance_cond(tr, NULL);
1271  }
1272  
1273  /**
1274   * tracing_snapshot - take a snapshot of the current buffer.
1275   *
1276   * This causes a swap between the snapshot buffer and the current live
1277   * tracing buffer. You can use this to take snapshots of the live
1278   * trace when some condition is triggered, but continue to trace.
1279   *
1280   * Note, make sure to allocate the snapshot with either
1281   * a tracing_snapshot_alloc(), or by doing it manually
1282   * with: echo 1 > /sys/kernel/tracing/snapshot
1283   *
1284   * If the snapshot buffer is not allocated, it will stop tracing.
1285   * Basically making a permanent snapshot.
1286   */
tracing_snapshot(void)1287  void tracing_snapshot(void)
1288  {
1289  	struct trace_array *tr = &global_trace;
1290  
1291  	tracing_snapshot_instance(tr);
1292  }
1293  EXPORT_SYMBOL_GPL(tracing_snapshot);
1294  
1295  /**
1296   * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1297   * @tr:		The tracing instance to snapshot
1298   * @cond_data:	The data to be tested conditionally, and possibly saved
1299   *
1300   * This is the same as tracing_snapshot() except that the snapshot is
1301   * conditional - the snapshot will only happen if the
1302   * cond_snapshot.update() implementation receiving the cond_data
1303   * returns true, which means that the trace array's cond_snapshot
1304   * update() operation used the cond_data to determine whether the
1305   * snapshot should be taken, and if it was, presumably saved it along
1306   * with the snapshot.
1307   */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1308  void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1309  {
1310  	tracing_snapshot_instance_cond(tr, cond_data);
1311  }
1312  EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1313  
1314  /**
1315   * tracing_cond_snapshot_data - get the user data associated with a snapshot
1316   * @tr:		The tracing instance
1317   *
1318   * When the user enables a conditional snapshot using
1319   * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1320   * with the snapshot.  This accessor is used to retrieve it.
1321   *
1322   * Should not be called from cond_snapshot.update(), since it takes
1323   * the tr->max_lock lock, which the code calling
1324   * cond_snapshot.update() has already done.
1325   *
1326   * Returns the cond_data associated with the trace array's snapshot.
1327   */
tracing_cond_snapshot_data(struct trace_array * tr)1328  void *tracing_cond_snapshot_data(struct trace_array *tr)
1329  {
1330  	void *cond_data = NULL;
1331  
1332  	local_irq_disable();
1333  	arch_spin_lock(&tr->max_lock);
1334  
1335  	if (tr->cond_snapshot)
1336  		cond_data = tr->cond_snapshot->cond_data;
1337  
1338  	arch_spin_unlock(&tr->max_lock);
1339  	local_irq_enable();
1340  
1341  	return cond_data;
1342  }
1343  EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1344  
1345  static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1346  					struct array_buffer *size_buf, int cpu_id);
1347  static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1348  
tracing_alloc_snapshot_instance(struct trace_array * tr)1349  int tracing_alloc_snapshot_instance(struct trace_array *tr)
1350  {
1351  	int order;
1352  	int ret;
1353  
1354  	if (!tr->allocated_snapshot) {
1355  
1356  		/* Make the snapshot buffer have the same order as main buffer */
1357  		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1358  		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1359  		if (ret < 0)
1360  			return ret;
1361  
1362  		/* allocate spare buffer */
1363  		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1364  				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1365  		if (ret < 0)
1366  			return ret;
1367  
1368  		tr->allocated_snapshot = true;
1369  	}
1370  
1371  	return 0;
1372  }
1373  
free_snapshot(struct trace_array * tr)1374  static void free_snapshot(struct trace_array *tr)
1375  {
1376  	/*
1377  	 * We don't free the ring buffer. instead, resize it because
1378  	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1379  	 * we want preserve it.
1380  	 */
1381  	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1382  	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1383  	set_buffer_entries(&tr->max_buffer, 1);
1384  	tracing_reset_online_cpus(&tr->max_buffer);
1385  	tr->allocated_snapshot = false;
1386  }
1387  
tracing_arm_snapshot_locked(struct trace_array * tr)1388  static int tracing_arm_snapshot_locked(struct trace_array *tr)
1389  {
1390  	int ret;
1391  
1392  	lockdep_assert_held(&trace_types_lock);
1393  
1394  	spin_lock(&tr->snapshot_trigger_lock);
1395  	if (tr->snapshot == UINT_MAX || tr->mapped) {
1396  		spin_unlock(&tr->snapshot_trigger_lock);
1397  		return -EBUSY;
1398  	}
1399  
1400  	tr->snapshot++;
1401  	spin_unlock(&tr->snapshot_trigger_lock);
1402  
1403  	ret = tracing_alloc_snapshot_instance(tr);
1404  	if (ret) {
1405  		spin_lock(&tr->snapshot_trigger_lock);
1406  		tr->snapshot--;
1407  		spin_unlock(&tr->snapshot_trigger_lock);
1408  	}
1409  
1410  	return ret;
1411  }
1412  
tracing_arm_snapshot(struct trace_array * tr)1413  int tracing_arm_snapshot(struct trace_array *tr)
1414  {
1415  	guard(mutex)(&trace_types_lock);
1416  	return tracing_arm_snapshot_locked(tr);
1417  }
1418  
tracing_disarm_snapshot(struct trace_array * tr)1419  void tracing_disarm_snapshot(struct trace_array *tr)
1420  {
1421  	spin_lock(&tr->snapshot_trigger_lock);
1422  	if (!WARN_ON(!tr->snapshot))
1423  		tr->snapshot--;
1424  	spin_unlock(&tr->snapshot_trigger_lock);
1425  }
1426  
1427  /**
1428   * tracing_alloc_snapshot - allocate snapshot buffer.
1429   *
1430   * This only allocates the snapshot buffer if it isn't already
1431   * allocated - it doesn't also take a snapshot.
1432   *
1433   * This is meant to be used in cases where the snapshot buffer needs
1434   * to be set up for events that can't sleep but need to be able to
1435   * trigger a snapshot.
1436   */
tracing_alloc_snapshot(void)1437  int tracing_alloc_snapshot(void)
1438  {
1439  	struct trace_array *tr = &global_trace;
1440  	int ret;
1441  
1442  	ret = tracing_alloc_snapshot_instance(tr);
1443  	WARN_ON(ret < 0);
1444  
1445  	return ret;
1446  }
1447  EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1448  
1449  /**
1450   * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1451   *
1452   * This is similar to tracing_snapshot(), but it will allocate the
1453   * snapshot buffer if it isn't already allocated. Use this only
1454   * where it is safe to sleep, as the allocation may sleep.
1455   *
1456   * This causes a swap between the snapshot buffer and the current live
1457   * tracing buffer. You can use this to take snapshots of the live
1458   * trace when some condition is triggered, but continue to trace.
1459   */
tracing_snapshot_alloc(void)1460  void tracing_snapshot_alloc(void)
1461  {
1462  	int ret;
1463  
1464  	ret = tracing_alloc_snapshot();
1465  	if (ret < 0)
1466  		return;
1467  
1468  	tracing_snapshot();
1469  }
1470  EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1471  
1472  /**
1473   * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1474   * @tr:		The tracing instance
1475   * @cond_data:	User data to associate with the snapshot
1476   * @update:	Implementation of the cond_snapshot update function
1477   *
1478   * Check whether the conditional snapshot for the given instance has
1479   * already been enabled, or if the current tracer is already using a
1480   * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1481   * save the cond_data and update function inside.
1482   *
1483   * Returns 0 if successful, error otherwise.
1484   */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1485  int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1486  				 cond_update_fn_t update)
1487  {
1488  	struct cond_snapshot *cond_snapshot __free(kfree) =
1489  		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1490  	int ret;
1491  
1492  	if (!cond_snapshot)
1493  		return -ENOMEM;
1494  
1495  	cond_snapshot->cond_data = cond_data;
1496  	cond_snapshot->update = update;
1497  
1498  	guard(mutex)(&trace_types_lock);
1499  
1500  	if (tr->current_trace->use_max_tr)
1501  		return -EBUSY;
1502  
1503  	/*
1504  	 * The cond_snapshot can only change to NULL without the
1505  	 * trace_types_lock. We don't care if we race with it going
1506  	 * to NULL, but we want to make sure that it's not set to
1507  	 * something other than NULL when we get here, which we can
1508  	 * do safely with only holding the trace_types_lock and not
1509  	 * having to take the max_lock.
1510  	 */
1511  	if (tr->cond_snapshot)
1512  		return -EBUSY;
1513  
1514  	ret = tracing_arm_snapshot_locked(tr);
1515  	if (ret)
1516  		return ret;
1517  
1518  	local_irq_disable();
1519  	arch_spin_lock(&tr->max_lock);
1520  	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1521  	arch_spin_unlock(&tr->max_lock);
1522  	local_irq_enable();
1523  
1524  	return 0;
1525  }
1526  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1527  
1528  /**
1529   * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1530   * @tr:		The tracing instance
1531   *
1532   * Check whether the conditional snapshot for the given instance is
1533   * enabled; if so, free the cond_snapshot associated with it,
1534   * otherwise return -EINVAL.
1535   *
1536   * Returns 0 if successful, error otherwise.
1537   */
tracing_snapshot_cond_disable(struct trace_array * tr)1538  int tracing_snapshot_cond_disable(struct trace_array *tr)
1539  {
1540  	int ret = 0;
1541  
1542  	local_irq_disable();
1543  	arch_spin_lock(&tr->max_lock);
1544  
1545  	if (!tr->cond_snapshot)
1546  		ret = -EINVAL;
1547  	else {
1548  		kfree(tr->cond_snapshot);
1549  		tr->cond_snapshot = NULL;
1550  	}
1551  
1552  	arch_spin_unlock(&tr->max_lock);
1553  	local_irq_enable();
1554  
1555  	tracing_disarm_snapshot(tr);
1556  
1557  	return ret;
1558  }
1559  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1560  #else
tracing_snapshot(void)1561  void tracing_snapshot(void)
1562  {
1563  	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1564  }
1565  EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1566  void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1567  {
1568  	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1569  }
1570  EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1571  int tracing_alloc_snapshot(void)
1572  {
1573  	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1574  	return -ENODEV;
1575  }
1576  EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1577  void tracing_snapshot_alloc(void)
1578  {
1579  	/* Give warning */
1580  	tracing_snapshot();
1581  }
1582  EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1583  void *tracing_cond_snapshot_data(struct trace_array *tr)
1584  {
1585  	return NULL;
1586  }
1587  EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1588  int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1589  {
1590  	return -ENODEV;
1591  }
1592  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1593  int tracing_snapshot_cond_disable(struct trace_array *tr)
1594  {
1595  	return false;
1596  }
1597  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1598  #define free_snapshot(tr)	do { } while (0)
1599  #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1600  #endif /* CONFIG_TRACER_SNAPSHOT */
1601  
tracer_tracing_off(struct trace_array * tr)1602  void tracer_tracing_off(struct trace_array *tr)
1603  {
1604  	if (tr->array_buffer.buffer)
1605  		ring_buffer_record_off(tr->array_buffer.buffer);
1606  	/*
1607  	 * This flag is looked at when buffers haven't been allocated
1608  	 * yet, or by some tracers (like irqsoff), that just want to
1609  	 * know if the ring buffer has been disabled, but it can handle
1610  	 * races of where it gets disabled but we still do a record.
1611  	 * As the check is in the fast path of the tracers, it is more
1612  	 * important to be fast than accurate.
1613  	 */
1614  	tr->buffer_disabled = 1;
1615  }
1616  
1617  /**
1618   * tracer_tracing_disable() - temporary disable the buffer from write
1619   * @tr: The trace array to disable its buffer for
1620   *
1621   * Expects trace_tracing_enable() to re-enable tracing.
1622   * The difference between this and tracer_tracing_off() is that this
1623   * is a counter and can nest, whereas, tracer_tracing_off() can
1624   * be called multiple times and a single trace_tracing_on() will
1625   * enable it.
1626   */
tracer_tracing_disable(struct trace_array * tr)1627  void tracer_tracing_disable(struct trace_array *tr)
1628  {
1629  	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1630  		return;
1631  
1632  	ring_buffer_record_disable(tr->array_buffer.buffer);
1633  }
1634  
1635  /**
1636   * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1637   * @tr: The trace array that had tracer_tracincg_disable() called on it
1638   *
1639   * This is called after tracer_tracing_disable() has been called on @tr,
1640   * when it's safe to re-enable tracing.
1641   */
tracer_tracing_enable(struct trace_array * tr)1642  void tracer_tracing_enable(struct trace_array *tr)
1643  {
1644  	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1645  		return;
1646  
1647  	ring_buffer_record_enable(tr->array_buffer.buffer);
1648  }
1649  
1650  /**
1651   * tracing_off - turn off tracing buffers
1652   *
1653   * This function stops the tracing buffers from recording data.
1654   * It does not disable any overhead the tracers themselves may
1655   * be causing. This function simply causes all recording to
1656   * the ring buffers to fail.
1657   */
tracing_off(void)1658  void tracing_off(void)
1659  {
1660  	tracer_tracing_off(&global_trace);
1661  }
1662  EXPORT_SYMBOL_GPL(tracing_off);
1663  
disable_trace_on_warning(void)1664  void disable_trace_on_warning(void)
1665  {
1666  	if (__disable_trace_on_warning) {
1667  		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1668  			"Disabling tracing due to warning\n");
1669  		tracing_off();
1670  	}
1671  }
1672  
1673  /**
1674   * tracer_tracing_is_on - show real state of ring buffer enabled
1675   * @tr : the trace array to know if ring buffer is enabled
1676   *
1677   * Shows real state of the ring buffer if it is enabled or not.
1678   */
tracer_tracing_is_on(struct trace_array * tr)1679  bool tracer_tracing_is_on(struct trace_array *tr)
1680  {
1681  	if (tr->array_buffer.buffer)
1682  		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1683  	return !tr->buffer_disabled;
1684  }
1685  
1686  /**
1687   * tracing_is_on - show state of ring buffers enabled
1688   */
tracing_is_on(void)1689  int tracing_is_on(void)
1690  {
1691  	return tracer_tracing_is_on(&global_trace);
1692  }
1693  EXPORT_SYMBOL_GPL(tracing_is_on);
1694  
set_buf_size(char * str)1695  static int __init set_buf_size(char *str)
1696  {
1697  	unsigned long buf_size;
1698  
1699  	if (!str)
1700  		return 0;
1701  	buf_size = memparse(str, &str);
1702  	/*
1703  	 * nr_entries can not be zero and the startup
1704  	 * tests require some buffer space. Therefore
1705  	 * ensure we have at least 4096 bytes of buffer.
1706  	 */
1707  	trace_buf_size = max(4096UL, buf_size);
1708  	return 1;
1709  }
1710  __setup("trace_buf_size=", set_buf_size);
1711  
set_tracing_thresh(char * str)1712  static int __init set_tracing_thresh(char *str)
1713  {
1714  	unsigned long threshold;
1715  	int ret;
1716  
1717  	if (!str)
1718  		return 0;
1719  	ret = kstrtoul(str, 0, &threshold);
1720  	if (ret < 0)
1721  		return 0;
1722  	tracing_thresh = threshold * 1000;
1723  	return 1;
1724  }
1725  __setup("tracing_thresh=", set_tracing_thresh);
1726  
nsecs_to_usecs(unsigned long nsecs)1727  unsigned long nsecs_to_usecs(unsigned long nsecs)
1728  {
1729  	return nsecs / 1000;
1730  }
1731  
1732  /*
1733   * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1734   * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1735   * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1736   * of strings in the order that the evals (enum) were defined.
1737   */
1738  #undef C
1739  #define C(a, b) b
1740  
1741  /* These must match the bit positions in trace_iterator_flags */
1742  static const char *trace_options[] = {
1743  	TRACE_FLAGS
1744  	NULL
1745  };
1746  
1747  static struct {
1748  	u64 (*func)(void);
1749  	const char *name;
1750  	int in_ns;		/* is this clock in nanoseconds? */
1751  } trace_clocks[] = {
1752  	{ trace_clock_local,		"local",	1 },
1753  	{ trace_clock_global,		"global",	1 },
1754  	{ trace_clock_counter,		"counter",	0 },
1755  	{ trace_clock_jiffies,		"uptime",	0 },
1756  	{ trace_clock,			"perf",		1 },
1757  	{ ktime_get_mono_fast_ns,	"mono",		1 },
1758  	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1759  	{ ktime_get_boot_fast_ns,	"boot",		1 },
1760  	{ ktime_get_tai_fast_ns,	"tai",		1 },
1761  	ARCH_TRACE_CLOCKS
1762  };
1763  
trace_clock_in_ns(struct trace_array * tr)1764  bool trace_clock_in_ns(struct trace_array *tr)
1765  {
1766  	if (trace_clocks[tr->clock_id].in_ns)
1767  		return true;
1768  
1769  	return false;
1770  }
1771  
1772  /*
1773   * trace_parser_get_init - gets the buffer for trace parser
1774   */
trace_parser_get_init(struct trace_parser * parser,int size)1775  int trace_parser_get_init(struct trace_parser *parser, int size)
1776  {
1777  	memset(parser, 0, sizeof(*parser));
1778  
1779  	parser->buffer = kmalloc(size, GFP_KERNEL);
1780  	if (!parser->buffer)
1781  		return 1;
1782  
1783  	parser->size = size;
1784  	return 0;
1785  }
1786  
1787  /*
1788   * trace_parser_put - frees the buffer for trace parser
1789   */
trace_parser_put(struct trace_parser * parser)1790  void trace_parser_put(struct trace_parser *parser)
1791  {
1792  	kfree(parser->buffer);
1793  	parser->buffer = NULL;
1794  }
1795  
1796  /*
1797   * trace_get_user - reads the user input string separated by  space
1798   * (matched by isspace(ch))
1799   *
1800   * For each string found the 'struct trace_parser' is updated,
1801   * and the function returns.
1802   *
1803   * Returns number of bytes read.
1804   *
1805   * See kernel/trace/trace.h for 'struct trace_parser' details.
1806   */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1807  int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1808  	size_t cnt, loff_t *ppos)
1809  {
1810  	char ch;
1811  	size_t read = 0;
1812  	ssize_t ret;
1813  
1814  	if (!*ppos)
1815  		trace_parser_clear(parser);
1816  
1817  	ret = get_user(ch, ubuf++);
1818  	if (ret)
1819  		return ret;
1820  
1821  	read++;
1822  	cnt--;
1823  
1824  	/*
1825  	 * The parser is not finished with the last write,
1826  	 * continue reading the user input without skipping spaces.
1827  	 */
1828  	if (!parser->cont) {
1829  		/* skip white space */
1830  		while (cnt && isspace(ch)) {
1831  			ret = get_user(ch, ubuf++);
1832  			if (ret)
1833  				return ret;
1834  			read++;
1835  			cnt--;
1836  		}
1837  
1838  		parser->idx = 0;
1839  
1840  		/* only spaces were written */
1841  		if (isspace(ch) || !ch) {
1842  			*ppos += read;
1843  			return read;
1844  		}
1845  	}
1846  
1847  	/* read the non-space input */
1848  	while (cnt && !isspace(ch) && ch) {
1849  		if (parser->idx < parser->size - 1)
1850  			parser->buffer[parser->idx++] = ch;
1851  		else
1852  			return -EINVAL;
1853  
1854  		ret = get_user(ch, ubuf++);
1855  		if (ret)
1856  			return ret;
1857  		read++;
1858  		cnt--;
1859  	}
1860  
1861  	/* We either got finished input or we have to wait for another call. */
1862  	if (isspace(ch) || !ch) {
1863  		parser->buffer[parser->idx] = 0;
1864  		parser->cont = false;
1865  	} else if (parser->idx < parser->size - 1) {
1866  		parser->cont = true;
1867  		parser->buffer[parser->idx++] = ch;
1868  		/* Make sure the parsed string always terminates with '\0'. */
1869  		parser->buffer[parser->idx] = 0;
1870  	} else {
1871  		return -EINVAL;
1872  	}
1873  
1874  	*ppos += read;
1875  	return read;
1876  }
1877  
1878  /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1879  static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1880  {
1881  	int len;
1882  
1883  	if (trace_seq_used(s) <= s->readpos)
1884  		return -EBUSY;
1885  
1886  	len = trace_seq_used(s) - s->readpos;
1887  	if (cnt > len)
1888  		cnt = len;
1889  	memcpy(buf, s->buffer + s->readpos, cnt);
1890  
1891  	s->readpos += cnt;
1892  	return cnt;
1893  }
1894  
1895  unsigned long __read_mostly	tracing_thresh;
1896  
1897  #ifdef CONFIG_TRACER_MAX_TRACE
1898  static const struct file_operations tracing_max_lat_fops;
1899  
1900  #ifdef LATENCY_FS_NOTIFY
1901  
1902  static struct workqueue_struct *fsnotify_wq;
1903  
latency_fsnotify_workfn(struct work_struct * work)1904  static void latency_fsnotify_workfn(struct work_struct *work)
1905  {
1906  	struct trace_array *tr = container_of(work, struct trace_array,
1907  					      fsnotify_work);
1908  	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1909  }
1910  
latency_fsnotify_workfn_irq(struct irq_work * iwork)1911  static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1912  {
1913  	struct trace_array *tr = container_of(iwork, struct trace_array,
1914  					      fsnotify_irqwork);
1915  	queue_work(fsnotify_wq, &tr->fsnotify_work);
1916  }
1917  
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1918  static void trace_create_maxlat_file(struct trace_array *tr,
1919  				     struct dentry *d_tracer)
1920  {
1921  	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1922  	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1923  	tr->d_max_latency = trace_create_file("tracing_max_latency",
1924  					      TRACE_MODE_WRITE,
1925  					      d_tracer, tr,
1926  					      &tracing_max_lat_fops);
1927  }
1928  
latency_fsnotify_init(void)1929  __init static int latency_fsnotify_init(void)
1930  {
1931  	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1932  				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1933  	if (!fsnotify_wq) {
1934  		pr_err("Unable to allocate tr_max_lat_wq\n");
1935  		return -ENOMEM;
1936  	}
1937  	return 0;
1938  }
1939  
1940  late_initcall_sync(latency_fsnotify_init);
1941  
latency_fsnotify(struct trace_array * tr)1942  void latency_fsnotify(struct trace_array *tr)
1943  {
1944  	if (!fsnotify_wq)
1945  		return;
1946  	/*
1947  	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1948  	 * possible that we are called from __schedule() or do_idle(), which
1949  	 * could cause a deadlock.
1950  	 */
1951  	irq_work_queue(&tr->fsnotify_irqwork);
1952  }
1953  
1954  #else /* !LATENCY_FS_NOTIFY */
1955  
1956  #define trace_create_maxlat_file(tr, d_tracer)				\
1957  	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1958  			  d_tracer, tr, &tracing_max_lat_fops)
1959  
1960  #endif
1961  
1962  /*
1963   * Copy the new maximum trace into the separate maximum-trace
1964   * structure. (this way the maximum trace is permanently saved,
1965   * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1966   */
1967  static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1968  __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1969  {
1970  	struct array_buffer *trace_buf = &tr->array_buffer;
1971  	struct array_buffer *max_buf = &tr->max_buffer;
1972  	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1973  	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1974  
1975  	max_buf->cpu = cpu;
1976  	max_buf->time_start = data->preempt_timestamp;
1977  
1978  	max_data->saved_latency = tr->max_latency;
1979  	max_data->critical_start = data->critical_start;
1980  	max_data->critical_end = data->critical_end;
1981  
1982  	strscpy(max_data->comm, tsk->comm);
1983  	max_data->pid = tsk->pid;
1984  	/*
1985  	 * If tsk == current, then use current_uid(), as that does not use
1986  	 * RCU. The irq tracer can be called out of RCU scope.
1987  	 */
1988  	if (tsk == current)
1989  		max_data->uid = current_uid();
1990  	else
1991  		max_data->uid = task_uid(tsk);
1992  
1993  	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1994  	max_data->policy = tsk->policy;
1995  	max_data->rt_priority = tsk->rt_priority;
1996  
1997  	/* record this tasks comm */
1998  	tracing_record_cmdline(tsk);
1999  	latency_fsnotify(tr);
2000  }
2001  
2002  /**
2003   * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2004   * @tr: tracer
2005   * @tsk: the task with the latency
2006   * @cpu: The cpu that initiated the trace.
2007   * @cond_data: User data associated with a conditional snapshot
2008   *
2009   * Flip the buffers between the @tr and the max_tr and record information
2010   * about which task was the cause of this latency.
2011   */
2012  void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2013  update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2014  	      void *cond_data)
2015  {
2016  	if (tr->stop_count)
2017  		return;
2018  
2019  	WARN_ON_ONCE(!irqs_disabled());
2020  
2021  	if (!tr->allocated_snapshot) {
2022  		/* Only the nop tracer should hit this when disabling */
2023  		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2024  		return;
2025  	}
2026  
2027  	arch_spin_lock(&tr->max_lock);
2028  
2029  	/* Inherit the recordable setting from array_buffer */
2030  	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2031  		ring_buffer_record_on(tr->max_buffer.buffer);
2032  	else
2033  		ring_buffer_record_off(tr->max_buffer.buffer);
2034  
2035  #ifdef CONFIG_TRACER_SNAPSHOT
2036  	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2037  		arch_spin_unlock(&tr->max_lock);
2038  		return;
2039  	}
2040  #endif
2041  	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2042  
2043  	__update_max_tr(tr, tsk, cpu);
2044  
2045  	arch_spin_unlock(&tr->max_lock);
2046  
2047  	/* Any waiters on the old snapshot buffer need to wake up */
2048  	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2049  }
2050  
2051  /**
2052   * update_max_tr_single - only copy one trace over, and reset the rest
2053   * @tr: tracer
2054   * @tsk: task with the latency
2055   * @cpu: the cpu of the buffer to copy.
2056   *
2057   * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2058   */
2059  void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2060  update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2061  {
2062  	int ret;
2063  
2064  	if (tr->stop_count)
2065  		return;
2066  
2067  	WARN_ON_ONCE(!irqs_disabled());
2068  	if (!tr->allocated_snapshot) {
2069  		/* Only the nop tracer should hit this when disabling */
2070  		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2071  		return;
2072  	}
2073  
2074  	arch_spin_lock(&tr->max_lock);
2075  
2076  	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2077  
2078  	if (ret == -EBUSY) {
2079  		/*
2080  		 * We failed to swap the buffer due to a commit taking
2081  		 * place on this CPU. We fail to record, but we reset
2082  		 * the max trace buffer (no one writes directly to it)
2083  		 * and flag that it failed.
2084  		 * Another reason is resize is in progress.
2085  		 */
2086  		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2087  			"Failed to swap buffers due to commit or resize in progress\n");
2088  	}
2089  
2090  	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2091  
2092  	__update_max_tr(tr, tsk, cpu);
2093  	arch_spin_unlock(&tr->max_lock);
2094  }
2095  
2096  #endif /* CONFIG_TRACER_MAX_TRACE */
2097  
2098  struct pipe_wait {
2099  	struct trace_iterator		*iter;
2100  	int				wait_index;
2101  };
2102  
wait_pipe_cond(void * data)2103  static bool wait_pipe_cond(void *data)
2104  {
2105  	struct pipe_wait *pwait = data;
2106  	struct trace_iterator *iter = pwait->iter;
2107  
2108  	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2109  		return true;
2110  
2111  	return iter->closed;
2112  }
2113  
wait_on_pipe(struct trace_iterator * iter,int full)2114  static int wait_on_pipe(struct trace_iterator *iter, int full)
2115  {
2116  	struct pipe_wait pwait;
2117  	int ret;
2118  
2119  	/* Iterators are static, they should be filled or empty */
2120  	if (trace_buffer_iter(iter, iter->cpu_file))
2121  		return 0;
2122  
2123  	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2124  	pwait.iter = iter;
2125  
2126  	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2127  			       wait_pipe_cond, &pwait);
2128  
2129  #ifdef CONFIG_TRACER_MAX_TRACE
2130  	/*
2131  	 * Make sure this is still the snapshot buffer, as if a snapshot were
2132  	 * to happen, this would now be the main buffer.
2133  	 */
2134  	if (iter->snapshot)
2135  		iter->array_buffer = &iter->tr->max_buffer;
2136  #endif
2137  	return ret;
2138  }
2139  
2140  #ifdef CONFIG_FTRACE_STARTUP_TEST
2141  static bool selftests_can_run;
2142  
2143  struct trace_selftests {
2144  	struct list_head		list;
2145  	struct tracer			*type;
2146  };
2147  
2148  static LIST_HEAD(postponed_selftests);
2149  
save_selftest(struct tracer * type)2150  static int save_selftest(struct tracer *type)
2151  {
2152  	struct trace_selftests *selftest;
2153  
2154  	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2155  	if (!selftest)
2156  		return -ENOMEM;
2157  
2158  	selftest->type = type;
2159  	list_add(&selftest->list, &postponed_selftests);
2160  	return 0;
2161  }
2162  
run_tracer_selftest(struct tracer * type)2163  static int run_tracer_selftest(struct tracer *type)
2164  {
2165  	struct trace_array *tr = &global_trace;
2166  	struct tracer *saved_tracer = tr->current_trace;
2167  	int ret;
2168  
2169  	if (!type->selftest || tracing_selftest_disabled)
2170  		return 0;
2171  
2172  	/*
2173  	 * If a tracer registers early in boot up (before scheduling is
2174  	 * initialized and such), then do not run its selftests yet.
2175  	 * Instead, run it a little later in the boot process.
2176  	 */
2177  	if (!selftests_can_run)
2178  		return save_selftest(type);
2179  
2180  	if (!tracing_is_on()) {
2181  		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2182  			type->name);
2183  		return 0;
2184  	}
2185  
2186  	/*
2187  	 * Run a selftest on this tracer.
2188  	 * Here we reset the trace buffer, and set the current
2189  	 * tracer to be this tracer. The tracer can then run some
2190  	 * internal tracing to verify that everything is in order.
2191  	 * If we fail, we do not register this tracer.
2192  	 */
2193  	tracing_reset_online_cpus(&tr->array_buffer);
2194  
2195  	tr->current_trace = type;
2196  
2197  #ifdef CONFIG_TRACER_MAX_TRACE
2198  	if (type->use_max_tr) {
2199  		/* If we expanded the buffers, make sure the max is expanded too */
2200  		if (tr->ring_buffer_expanded)
2201  			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2202  					   RING_BUFFER_ALL_CPUS);
2203  		tr->allocated_snapshot = true;
2204  	}
2205  #endif
2206  
2207  	/* the test is responsible for initializing and enabling */
2208  	pr_info("Testing tracer %s: ", type->name);
2209  	ret = type->selftest(type, tr);
2210  	/* the test is responsible for resetting too */
2211  	tr->current_trace = saved_tracer;
2212  	if (ret) {
2213  		printk(KERN_CONT "FAILED!\n");
2214  		/* Add the warning after printing 'FAILED' */
2215  		WARN_ON(1);
2216  		return -1;
2217  	}
2218  	/* Only reset on passing, to avoid touching corrupted buffers */
2219  	tracing_reset_online_cpus(&tr->array_buffer);
2220  
2221  #ifdef CONFIG_TRACER_MAX_TRACE
2222  	if (type->use_max_tr) {
2223  		tr->allocated_snapshot = false;
2224  
2225  		/* Shrink the max buffer again */
2226  		if (tr->ring_buffer_expanded)
2227  			ring_buffer_resize(tr->max_buffer.buffer, 1,
2228  					   RING_BUFFER_ALL_CPUS);
2229  	}
2230  #endif
2231  
2232  	printk(KERN_CONT "PASSED\n");
2233  	return 0;
2234  }
2235  
do_run_tracer_selftest(struct tracer * type)2236  static int do_run_tracer_selftest(struct tracer *type)
2237  {
2238  	int ret;
2239  
2240  	/*
2241  	 * Tests can take a long time, especially if they are run one after the
2242  	 * other, as does happen during bootup when all the tracers are
2243  	 * registered. This could cause the soft lockup watchdog to trigger.
2244  	 */
2245  	cond_resched();
2246  
2247  	tracing_selftest_running = true;
2248  	ret = run_tracer_selftest(type);
2249  	tracing_selftest_running = false;
2250  
2251  	return ret;
2252  }
2253  
init_trace_selftests(void)2254  static __init int init_trace_selftests(void)
2255  {
2256  	struct trace_selftests *p, *n;
2257  	struct tracer *t, **last;
2258  	int ret;
2259  
2260  	selftests_can_run = true;
2261  
2262  	guard(mutex)(&trace_types_lock);
2263  
2264  	if (list_empty(&postponed_selftests))
2265  		return 0;
2266  
2267  	pr_info("Running postponed tracer tests:\n");
2268  
2269  	tracing_selftest_running = true;
2270  	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2271  		/* This loop can take minutes when sanitizers are enabled, so
2272  		 * lets make sure we allow RCU processing.
2273  		 */
2274  		cond_resched();
2275  		ret = run_tracer_selftest(p->type);
2276  		/* If the test fails, then warn and remove from available_tracers */
2277  		if (ret < 0) {
2278  			WARN(1, "tracer: %s failed selftest, disabling\n",
2279  			     p->type->name);
2280  			last = &trace_types;
2281  			for (t = trace_types; t; t = t->next) {
2282  				if (t == p->type) {
2283  					*last = t->next;
2284  					break;
2285  				}
2286  				last = &t->next;
2287  			}
2288  		}
2289  		list_del(&p->list);
2290  		kfree(p);
2291  	}
2292  	tracing_selftest_running = false;
2293  
2294  	return 0;
2295  }
2296  core_initcall(init_trace_selftests);
2297  #else
do_run_tracer_selftest(struct tracer * type)2298  static inline int do_run_tracer_selftest(struct tracer *type)
2299  {
2300  	return 0;
2301  }
2302  #endif /* CONFIG_FTRACE_STARTUP_TEST */
2303  
2304  static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2305  
2306  static void __init apply_trace_boot_options(void);
2307  
2308  /**
2309   * register_tracer - register a tracer with the ftrace system.
2310   * @type: the plugin for the tracer
2311   *
2312   * Register a new plugin tracer.
2313   */
register_tracer(struct tracer * type)2314  int __init register_tracer(struct tracer *type)
2315  {
2316  	struct tracer *t;
2317  	int ret = 0;
2318  
2319  	if (!type->name) {
2320  		pr_info("Tracer must have a name\n");
2321  		return -1;
2322  	}
2323  
2324  	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2325  		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2326  		return -1;
2327  	}
2328  
2329  	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2330  		pr_warn("Can not register tracer %s due to lockdown\n",
2331  			   type->name);
2332  		return -EPERM;
2333  	}
2334  
2335  	mutex_lock(&trace_types_lock);
2336  
2337  	for (t = trace_types; t; t = t->next) {
2338  		if (strcmp(type->name, t->name) == 0) {
2339  			/* already found */
2340  			pr_info("Tracer %s already registered\n",
2341  				type->name);
2342  			ret = -1;
2343  			goto out;
2344  		}
2345  	}
2346  
2347  	if (!type->set_flag)
2348  		type->set_flag = &dummy_set_flag;
2349  	if (!type->flags) {
2350  		/*allocate a dummy tracer_flags*/
2351  		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2352  		if (!type->flags) {
2353  			ret = -ENOMEM;
2354  			goto out;
2355  		}
2356  		type->flags->val = 0;
2357  		type->flags->opts = dummy_tracer_opt;
2358  	} else
2359  		if (!type->flags->opts)
2360  			type->flags->opts = dummy_tracer_opt;
2361  
2362  	/* store the tracer for __set_tracer_option */
2363  	type->flags->trace = type;
2364  
2365  	ret = do_run_tracer_selftest(type);
2366  	if (ret < 0)
2367  		goto out;
2368  
2369  	type->next = trace_types;
2370  	trace_types = type;
2371  	add_tracer_options(&global_trace, type);
2372  
2373   out:
2374  	mutex_unlock(&trace_types_lock);
2375  
2376  	if (ret || !default_bootup_tracer)
2377  		return ret;
2378  
2379  	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2380  		return 0;
2381  
2382  	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2383  	/* Do we want this tracer to start on bootup? */
2384  	tracing_set_tracer(&global_trace, type->name);
2385  	default_bootup_tracer = NULL;
2386  
2387  	apply_trace_boot_options();
2388  
2389  	/* disable other selftests, since this will break it. */
2390  	disable_tracing_selftest("running a tracer");
2391  
2392  	return 0;
2393  }
2394  
tracing_reset_cpu(struct array_buffer * buf,int cpu)2395  static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2396  {
2397  	struct trace_buffer *buffer = buf->buffer;
2398  
2399  	if (!buffer)
2400  		return;
2401  
2402  	ring_buffer_record_disable(buffer);
2403  
2404  	/* Make sure all commits have finished */
2405  	synchronize_rcu();
2406  	ring_buffer_reset_cpu(buffer, cpu);
2407  
2408  	ring_buffer_record_enable(buffer);
2409  }
2410  
tracing_reset_online_cpus(struct array_buffer * buf)2411  void tracing_reset_online_cpus(struct array_buffer *buf)
2412  {
2413  	struct trace_buffer *buffer = buf->buffer;
2414  
2415  	if (!buffer)
2416  		return;
2417  
2418  	ring_buffer_record_disable(buffer);
2419  
2420  	/* Make sure all commits have finished */
2421  	synchronize_rcu();
2422  
2423  	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2424  
2425  	ring_buffer_reset_online_cpus(buffer);
2426  
2427  	ring_buffer_record_enable(buffer);
2428  }
2429  
tracing_reset_all_cpus(struct array_buffer * buf)2430  static void tracing_reset_all_cpus(struct array_buffer *buf)
2431  {
2432  	struct trace_buffer *buffer = buf->buffer;
2433  
2434  	if (!buffer)
2435  		return;
2436  
2437  	ring_buffer_record_disable(buffer);
2438  
2439  	/* Make sure all commits have finished */
2440  	synchronize_rcu();
2441  
2442  	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2443  
2444  	ring_buffer_reset(buffer);
2445  
2446  	ring_buffer_record_enable(buffer);
2447  }
2448  
2449  /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2450  void tracing_reset_all_online_cpus_unlocked(void)
2451  {
2452  	struct trace_array *tr;
2453  
2454  	lockdep_assert_held(&trace_types_lock);
2455  
2456  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2457  		if (!tr->clear_trace)
2458  			continue;
2459  		tr->clear_trace = false;
2460  		tracing_reset_online_cpus(&tr->array_buffer);
2461  #ifdef CONFIG_TRACER_MAX_TRACE
2462  		tracing_reset_online_cpus(&tr->max_buffer);
2463  #endif
2464  	}
2465  }
2466  
tracing_reset_all_online_cpus(void)2467  void tracing_reset_all_online_cpus(void)
2468  {
2469  	guard(mutex)(&trace_types_lock);
2470  	tracing_reset_all_online_cpus_unlocked();
2471  }
2472  
is_tracing_stopped(void)2473  int is_tracing_stopped(void)
2474  {
2475  	return global_trace.stop_count;
2476  }
2477  
tracing_start_tr(struct trace_array * tr)2478  static void tracing_start_tr(struct trace_array *tr)
2479  {
2480  	struct trace_buffer *buffer;
2481  
2482  	if (tracing_disabled)
2483  		return;
2484  
2485  	guard(raw_spinlock_irqsave)(&tr->start_lock);
2486  	if (--tr->stop_count) {
2487  		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2488  			/* Someone screwed up their debugging */
2489  			tr->stop_count = 0;
2490  		}
2491  		return;
2492  	}
2493  
2494  	/* Prevent the buffers from switching */
2495  	arch_spin_lock(&tr->max_lock);
2496  
2497  	buffer = tr->array_buffer.buffer;
2498  	if (buffer)
2499  		ring_buffer_record_enable(buffer);
2500  
2501  #ifdef CONFIG_TRACER_MAX_TRACE
2502  	buffer = tr->max_buffer.buffer;
2503  	if (buffer)
2504  		ring_buffer_record_enable(buffer);
2505  #endif
2506  
2507  	arch_spin_unlock(&tr->max_lock);
2508  }
2509  
2510  /**
2511   * tracing_start - quick start of the tracer
2512   *
2513   * If tracing is enabled but was stopped by tracing_stop,
2514   * this will start the tracer back up.
2515   */
tracing_start(void)2516  void tracing_start(void)
2517  
2518  {
2519  	return tracing_start_tr(&global_trace);
2520  }
2521  
tracing_stop_tr(struct trace_array * tr)2522  static void tracing_stop_tr(struct trace_array *tr)
2523  {
2524  	struct trace_buffer *buffer;
2525  
2526  	guard(raw_spinlock_irqsave)(&tr->start_lock);
2527  	if (tr->stop_count++)
2528  		return;
2529  
2530  	/* Prevent the buffers from switching */
2531  	arch_spin_lock(&tr->max_lock);
2532  
2533  	buffer = tr->array_buffer.buffer;
2534  	if (buffer)
2535  		ring_buffer_record_disable(buffer);
2536  
2537  #ifdef CONFIG_TRACER_MAX_TRACE
2538  	buffer = tr->max_buffer.buffer;
2539  	if (buffer)
2540  		ring_buffer_record_disable(buffer);
2541  #endif
2542  
2543  	arch_spin_unlock(&tr->max_lock);
2544  }
2545  
2546  /**
2547   * tracing_stop - quick stop of the tracer
2548   *
2549   * Light weight way to stop tracing. Use in conjunction with
2550   * tracing_start.
2551   */
tracing_stop(void)2552  void tracing_stop(void)
2553  {
2554  	return tracing_stop_tr(&global_trace);
2555  }
2556  
2557  /*
2558   * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2559   * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2560   * simplifies those functions and keeps them in sync.
2561   */
trace_handle_return(struct trace_seq * s)2562  enum print_line_t trace_handle_return(struct trace_seq *s)
2563  {
2564  	return trace_seq_has_overflowed(s) ?
2565  		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2566  }
2567  EXPORT_SYMBOL_GPL(trace_handle_return);
2568  
migration_disable_value(void)2569  static unsigned short migration_disable_value(void)
2570  {
2571  #if defined(CONFIG_SMP)
2572  	return current->migration_disabled;
2573  #else
2574  	return 0;
2575  #endif
2576  }
2577  
tracing_gen_ctx_irq_test(unsigned int irqs_status)2578  unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2579  {
2580  	unsigned int trace_flags = irqs_status;
2581  	unsigned int pc;
2582  
2583  	pc = preempt_count();
2584  
2585  	if (pc & NMI_MASK)
2586  		trace_flags |= TRACE_FLAG_NMI;
2587  	if (pc & HARDIRQ_MASK)
2588  		trace_flags |= TRACE_FLAG_HARDIRQ;
2589  	if (in_serving_softirq())
2590  		trace_flags |= TRACE_FLAG_SOFTIRQ;
2591  	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2592  		trace_flags |= TRACE_FLAG_BH_OFF;
2593  
2594  	if (tif_need_resched())
2595  		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2596  	if (test_preempt_need_resched())
2597  		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2598  	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2599  		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2600  	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2601  		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2602  }
2603  
2604  struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2605  trace_buffer_lock_reserve(struct trace_buffer *buffer,
2606  			  int type,
2607  			  unsigned long len,
2608  			  unsigned int trace_ctx)
2609  {
2610  	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2611  }
2612  
2613  DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2614  DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2615  static int trace_buffered_event_ref;
2616  
2617  /**
2618   * trace_buffered_event_enable - enable buffering events
2619   *
2620   * When events are being filtered, it is quicker to use a temporary
2621   * buffer to write the event data into if there's a likely chance
2622   * that it will not be committed. The discard of the ring buffer
2623   * is not as fast as committing, and is much slower than copying
2624   * a commit.
2625   *
2626   * When an event is to be filtered, allocate per cpu buffers to
2627   * write the event data into, and if the event is filtered and discarded
2628   * it is simply dropped, otherwise, the entire data is to be committed
2629   * in one shot.
2630   */
trace_buffered_event_enable(void)2631  void trace_buffered_event_enable(void)
2632  {
2633  	struct ring_buffer_event *event;
2634  	struct page *page;
2635  	int cpu;
2636  
2637  	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2638  
2639  	if (trace_buffered_event_ref++)
2640  		return;
2641  
2642  	for_each_tracing_cpu(cpu) {
2643  		page = alloc_pages_node(cpu_to_node(cpu),
2644  					GFP_KERNEL | __GFP_NORETRY, 0);
2645  		/* This is just an optimization and can handle failures */
2646  		if (!page) {
2647  			pr_err("Failed to allocate event buffer\n");
2648  			break;
2649  		}
2650  
2651  		event = page_address(page);
2652  		memset(event, 0, sizeof(*event));
2653  
2654  		per_cpu(trace_buffered_event, cpu) = event;
2655  
2656  		scoped_guard(preempt,) {
2657  			if (cpu == smp_processor_id() &&
2658  			    __this_cpu_read(trace_buffered_event) !=
2659  			    per_cpu(trace_buffered_event, cpu))
2660  				WARN_ON_ONCE(1);
2661  		}
2662  	}
2663  }
2664  
enable_trace_buffered_event(void * data)2665  static void enable_trace_buffered_event(void *data)
2666  {
2667  	this_cpu_dec(trace_buffered_event_cnt);
2668  }
2669  
disable_trace_buffered_event(void * data)2670  static void disable_trace_buffered_event(void *data)
2671  {
2672  	this_cpu_inc(trace_buffered_event_cnt);
2673  }
2674  
2675  /**
2676   * trace_buffered_event_disable - disable buffering events
2677   *
2678   * When a filter is removed, it is faster to not use the buffered
2679   * events, and to commit directly into the ring buffer. Free up
2680   * the temp buffers when there are no more users. This requires
2681   * special synchronization with current events.
2682   */
trace_buffered_event_disable(void)2683  void trace_buffered_event_disable(void)
2684  {
2685  	int cpu;
2686  
2687  	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2688  
2689  	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2690  		return;
2691  
2692  	if (--trace_buffered_event_ref)
2693  		return;
2694  
2695  	/* For each CPU, set the buffer as used. */
2696  	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2697  			 NULL, true);
2698  
2699  	/* Wait for all current users to finish */
2700  	synchronize_rcu();
2701  
2702  	for_each_tracing_cpu(cpu) {
2703  		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2704  		per_cpu(trace_buffered_event, cpu) = NULL;
2705  	}
2706  
2707  	/*
2708  	 * Wait for all CPUs that potentially started checking if they can use
2709  	 * their event buffer only after the previous synchronize_rcu() call and
2710  	 * they still read a valid pointer from trace_buffered_event. It must be
2711  	 * ensured they don't see cleared trace_buffered_event_cnt else they
2712  	 * could wrongly decide to use the pointed-to buffer which is now freed.
2713  	 */
2714  	synchronize_rcu();
2715  
2716  	/* For each CPU, relinquish the buffer */
2717  	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2718  			 true);
2719  }
2720  
2721  static struct trace_buffer *temp_buffer;
2722  
2723  struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2724  trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2725  			  struct trace_event_file *trace_file,
2726  			  int type, unsigned long len,
2727  			  unsigned int trace_ctx)
2728  {
2729  	struct ring_buffer_event *entry;
2730  	struct trace_array *tr = trace_file->tr;
2731  	int val;
2732  
2733  	*current_rb = tr->array_buffer.buffer;
2734  
2735  	if (!tr->no_filter_buffering_ref &&
2736  	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2737  		preempt_disable_notrace();
2738  		/*
2739  		 * Filtering is on, so try to use the per cpu buffer first.
2740  		 * This buffer will simulate a ring_buffer_event,
2741  		 * where the type_len is zero and the array[0] will
2742  		 * hold the full length.
2743  		 * (see include/linux/ring-buffer.h for details on
2744  		 *  how the ring_buffer_event is structured).
2745  		 *
2746  		 * Using a temp buffer during filtering and copying it
2747  		 * on a matched filter is quicker than writing directly
2748  		 * into the ring buffer and then discarding it when
2749  		 * it doesn't match. That is because the discard
2750  		 * requires several atomic operations to get right.
2751  		 * Copying on match and doing nothing on a failed match
2752  		 * is still quicker than no copy on match, but having
2753  		 * to discard out of the ring buffer on a failed match.
2754  		 */
2755  		if ((entry = __this_cpu_read(trace_buffered_event))) {
2756  			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2757  
2758  			val = this_cpu_inc_return(trace_buffered_event_cnt);
2759  
2760  			/*
2761  			 * Preemption is disabled, but interrupts and NMIs
2762  			 * can still come in now. If that happens after
2763  			 * the above increment, then it will have to go
2764  			 * back to the old method of allocating the event
2765  			 * on the ring buffer, and if the filter fails, it
2766  			 * will have to call ring_buffer_discard_commit()
2767  			 * to remove it.
2768  			 *
2769  			 * Need to also check the unlikely case that the
2770  			 * length is bigger than the temp buffer size.
2771  			 * If that happens, then the reserve is pretty much
2772  			 * guaranteed to fail, as the ring buffer currently
2773  			 * only allows events less than a page. But that may
2774  			 * change in the future, so let the ring buffer reserve
2775  			 * handle the failure in that case.
2776  			 */
2777  			if (val == 1 && likely(len <= max_len)) {
2778  				trace_event_setup(entry, type, trace_ctx);
2779  				entry->array[0] = len;
2780  				/* Return with preemption disabled */
2781  				return entry;
2782  			}
2783  			this_cpu_dec(trace_buffered_event_cnt);
2784  		}
2785  		/* __trace_buffer_lock_reserve() disables preemption */
2786  		preempt_enable_notrace();
2787  	}
2788  
2789  	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2790  					    trace_ctx);
2791  	/*
2792  	 * If tracing is off, but we have triggers enabled
2793  	 * we still need to look at the event data. Use the temp_buffer
2794  	 * to store the trace event for the trigger to use. It's recursive
2795  	 * safe and will not be recorded anywhere.
2796  	 */
2797  	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2798  		*current_rb = temp_buffer;
2799  		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2800  						    trace_ctx);
2801  	}
2802  	return entry;
2803  }
2804  EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2805  
2806  static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2807  static DEFINE_MUTEX(tracepoint_printk_mutex);
2808  
output_printk(struct trace_event_buffer * fbuffer)2809  static void output_printk(struct trace_event_buffer *fbuffer)
2810  {
2811  	struct trace_event_call *event_call;
2812  	struct trace_event_file *file;
2813  	struct trace_event *event;
2814  	unsigned long flags;
2815  	struct trace_iterator *iter = tracepoint_print_iter;
2816  
2817  	/* We should never get here if iter is NULL */
2818  	if (WARN_ON_ONCE(!iter))
2819  		return;
2820  
2821  	event_call = fbuffer->trace_file->event_call;
2822  	if (!event_call || !event_call->event.funcs ||
2823  	    !event_call->event.funcs->trace)
2824  		return;
2825  
2826  	file = fbuffer->trace_file;
2827  	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2828  	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2829  	     !filter_match_preds(file->filter, fbuffer->entry)))
2830  		return;
2831  
2832  	event = &fbuffer->trace_file->event_call->event;
2833  
2834  	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2835  	trace_seq_init(&iter->seq);
2836  	iter->ent = fbuffer->entry;
2837  	event_call->event.funcs->trace(iter, 0, event);
2838  	trace_seq_putc(&iter->seq, 0);
2839  	printk("%s", iter->seq.buffer);
2840  
2841  	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2842  }
2843  
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2844  int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2845  			     void *buffer, size_t *lenp,
2846  			     loff_t *ppos)
2847  {
2848  	int save_tracepoint_printk;
2849  	int ret;
2850  
2851  	guard(mutex)(&tracepoint_printk_mutex);
2852  	save_tracepoint_printk = tracepoint_printk;
2853  
2854  	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2855  
2856  	/*
2857  	 * This will force exiting early, as tracepoint_printk
2858  	 * is always zero when tracepoint_printk_iter is not allocated
2859  	 */
2860  	if (!tracepoint_print_iter)
2861  		tracepoint_printk = 0;
2862  
2863  	if (save_tracepoint_printk == tracepoint_printk)
2864  		return ret;
2865  
2866  	if (tracepoint_printk)
2867  		static_key_enable(&tracepoint_printk_key.key);
2868  	else
2869  		static_key_disable(&tracepoint_printk_key.key);
2870  
2871  	return ret;
2872  }
2873  
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2874  void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2875  {
2876  	enum event_trigger_type tt = ETT_NONE;
2877  	struct trace_event_file *file = fbuffer->trace_file;
2878  
2879  	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2880  			fbuffer->entry, &tt))
2881  		goto discard;
2882  
2883  	if (static_key_false(&tracepoint_printk_key.key))
2884  		output_printk(fbuffer);
2885  
2886  	if (static_branch_unlikely(&trace_event_exports_enabled))
2887  		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2888  
2889  	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2890  			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2891  
2892  discard:
2893  	if (tt)
2894  		event_triggers_post_call(file, tt);
2895  
2896  }
2897  EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2898  
2899  /*
2900   * Skip 3:
2901   *
2902   *   trace_buffer_unlock_commit_regs()
2903   *   trace_event_buffer_commit()
2904   *   trace_event_raw_event_xxx()
2905   */
2906  # define STACK_SKIP 3
2907  
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2908  void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2909  				     struct trace_buffer *buffer,
2910  				     struct ring_buffer_event *event,
2911  				     unsigned int trace_ctx,
2912  				     struct pt_regs *regs)
2913  {
2914  	__buffer_unlock_commit(buffer, event);
2915  
2916  	/*
2917  	 * If regs is not set, then skip the necessary functions.
2918  	 * Note, we can still get here via blktrace, wakeup tracer
2919  	 * and mmiotrace, but that's ok if they lose a function or
2920  	 * two. They are not that meaningful.
2921  	 */
2922  	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2923  	ftrace_trace_userstack(tr, buffer, trace_ctx);
2924  }
2925  
2926  /*
2927   * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2928   */
2929  void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2930  trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2931  				   struct ring_buffer_event *event)
2932  {
2933  	__buffer_unlock_commit(buffer, event);
2934  }
2935  
2936  void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2937  trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2938  	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2939  {
2940  	struct trace_buffer *buffer = tr->array_buffer.buffer;
2941  	struct ring_buffer_event *event;
2942  	struct ftrace_entry *entry;
2943  	int size = sizeof(*entry);
2944  
2945  	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2946  
2947  	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2948  					    trace_ctx);
2949  	if (!event)
2950  		return;
2951  	entry	= ring_buffer_event_data(event);
2952  	entry->ip			= ip;
2953  	entry->parent_ip		= parent_ip;
2954  
2955  #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2956  	if (fregs) {
2957  		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2958  			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2959  	}
2960  #endif
2961  
2962  	if (static_branch_unlikely(&trace_function_exports_enabled))
2963  		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2964  	__buffer_unlock_commit(buffer, event);
2965  }
2966  
2967  #ifdef CONFIG_STACKTRACE
2968  
2969  /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2970  #define FTRACE_KSTACK_NESTING	4
2971  
2972  #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2973  
2974  struct ftrace_stack {
2975  	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2976  };
2977  
2978  
2979  struct ftrace_stacks {
2980  	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2981  };
2982  
2983  static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2984  static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2985  
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2986  static void __ftrace_trace_stack(struct trace_array *tr,
2987  				 struct trace_buffer *buffer,
2988  				 unsigned int trace_ctx,
2989  				 int skip, struct pt_regs *regs)
2990  {
2991  	struct ring_buffer_event *event;
2992  	unsigned int size, nr_entries;
2993  	struct ftrace_stack *fstack;
2994  	struct stack_entry *entry;
2995  	int stackidx;
2996  
2997  	/*
2998  	 * Add one, for this function and the call to save_stack_trace()
2999  	 * If regs is set, then these functions will not be in the way.
3000  	 */
3001  #ifndef CONFIG_UNWINDER_ORC
3002  	if (!regs)
3003  		skip++;
3004  #endif
3005  
3006  	guard(preempt_notrace)();
3007  
3008  	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3009  
3010  	/* This should never happen. If it does, yell once and skip */
3011  	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3012  		goto out;
3013  
3014  	/*
3015  	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3016  	 * interrupt will either see the value pre increment or post
3017  	 * increment. If the interrupt happens pre increment it will have
3018  	 * restored the counter when it returns.  We just need a barrier to
3019  	 * keep gcc from moving things around.
3020  	 */
3021  	barrier();
3022  
3023  	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3024  	size = ARRAY_SIZE(fstack->calls);
3025  
3026  	if (regs) {
3027  		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3028  						   size, skip);
3029  	} else {
3030  		nr_entries = stack_trace_save(fstack->calls, size, skip);
3031  	}
3032  
3033  #ifdef CONFIG_DYNAMIC_FTRACE
3034  	/* Mark entry of stack trace as trampoline code */
3035  	if (tr->ops && tr->ops->trampoline) {
3036  		unsigned long tramp_start = tr->ops->trampoline;
3037  		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3038  		unsigned long *calls = fstack->calls;
3039  
3040  		for (int i = 0; i < nr_entries; i++) {
3041  			if (calls[i] >= tramp_start && calls[i] < tramp_end)
3042  				calls[i] = FTRACE_TRAMPOLINE_MARKER;
3043  		}
3044  	}
3045  #endif
3046  
3047  	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3048  				    struct_size(entry, caller, nr_entries),
3049  				    trace_ctx);
3050  	if (!event)
3051  		goto out;
3052  	entry = ring_buffer_event_data(event);
3053  
3054  	entry->size = nr_entries;
3055  	memcpy(&entry->caller, fstack->calls,
3056  	       flex_array_size(entry, caller, nr_entries));
3057  
3058  	__buffer_unlock_commit(buffer, event);
3059  
3060   out:
3061  	/* Again, don't let gcc optimize things here */
3062  	barrier();
3063  	__this_cpu_dec(ftrace_stack_reserve);
3064  }
3065  
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3066  static inline void ftrace_trace_stack(struct trace_array *tr,
3067  				      struct trace_buffer *buffer,
3068  				      unsigned int trace_ctx,
3069  				      int skip, struct pt_regs *regs)
3070  {
3071  	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3072  		return;
3073  
3074  	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3075  }
3076  
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3077  void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3078  		   int skip)
3079  {
3080  	struct trace_buffer *buffer = tr->array_buffer.buffer;
3081  
3082  	if (rcu_is_watching()) {
3083  		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3084  		return;
3085  	}
3086  
3087  	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3088  		return;
3089  
3090  	/*
3091  	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3092  	 * but if the above rcu_is_watching() failed, then the NMI
3093  	 * triggered someplace critical, and ct_irq_enter() should
3094  	 * not be called from NMI.
3095  	 */
3096  	if (unlikely(in_nmi()))
3097  		return;
3098  
3099  	ct_irq_enter_irqson();
3100  	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3101  	ct_irq_exit_irqson();
3102  }
3103  
3104  /**
3105   * trace_dump_stack - record a stack back trace in the trace buffer
3106   * @skip: Number of functions to skip (helper handlers)
3107   */
trace_dump_stack(int skip)3108  void trace_dump_stack(int skip)
3109  {
3110  	if (tracing_disabled || tracing_selftest_running)
3111  		return;
3112  
3113  #ifndef CONFIG_UNWINDER_ORC
3114  	/* Skip 1 to skip this function. */
3115  	skip++;
3116  #endif
3117  	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3118  				tracing_gen_ctx(), skip, NULL);
3119  }
3120  EXPORT_SYMBOL_GPL(trace_dump_stack);
3121  
3122  #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3123  static DEFINE_PER_CPU(int, user_stack_count);
3124  
3125  static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3126  ftrace_trace_userstack(struct trace_array *tr,
3127  		       struct trace_buffer *buffer, unsigned int trace_ctx)
3128  {
3129  	struct ring_buffer_event *event;
3130  	struct userstack_entry *entry;
3131  
3132  	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3133  		return;
3134  
3135  	/*
3136  	 * NMIs can not handle page faults, even with fix ups.
3137  	 * The save user stack can (and often does) fault.
3138  	 */
3139  	if (unlikely(in_nmi()))
3140  		return;
3141  
3142  	/*
3143  	 * prevent recursion, since the user stack tracing may
3144  	 * trigger other kernel events.
3145  	 */
3146  	guard(preempt)();
3147  	if (__this_cpu_read(user_stack_count))
3148  		return;
3149  
3150  	__this_cpu_inc(user_stack_count);
3151  
3152  	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3153  					    sizeof(*entry), trace_ctx);
3154  	if (!event)
3155  		goto out_drop_count;
3156  	entry	= ring_buffer_event_data(event);
3157  
3158  	entry->tgid		= current->tgid;
3159  	memset(&entry->caller, 0, sizeof(entry->caller));
3160  
3161  	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3162  	__buffer_unlock_commit(buffer, event);
3163  
3164   out_drop_count:
3165  	__this_cpu_dec(user_stack_count);
3166  }
3167  #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3168  static void ftrace_trace_userstack(struct trace_array *tr,
3169  				   struct trace_buffer *buffer,
3170  				   unsigned int trace_ctx)
3171  {
3172  }
3173  #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3174  
3175  #endif /* CONFIG_STACKTRACE */
3176  
3177  static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3178  func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3179  			  unsigned long long delta)
3180  {
3181  	entry->bottom_delta_ts = delta & U32_MAX;
3182  	entry->top_delta_ts = (delta >> 32);
3183  }
3184  
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3185  void trace_last_func_repeats(struct trace_array *tr,
3186  			     struct trace_func_repeats *last_info,
3187  			     unsigned int trace_ctx)
3188  {
3189  	struct trace_buffer *buffer = tr->array_buffer.buffer;
3190  	struct func_repeats_entry *entry;
3191  	struct ring_buffer_event *event;
3192  	u64 delta;
3193  
3194  	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3195  					    sizeof(*entry), trace_ctx);
3196  	if (!event)
3197  		return;
3198  
3199  	delta = ring_buffer_event_time_stamp(buffer, event) -
3200  		last_info->ts_last_call;
3201  
3202  	entry = ring_buffer_event_data(event);
3203  	entry->ip = last_info->ip;
3204  	entry->parent_ip = last_info->parent_ip;
3205  	entry->count = last_info->count;
3206  	func_repeats_set_delta_ts(entry, delta);
3207  
3208  	__buffer_unlock_commit(buffer, event);
3209  }
3210  
3211  /* created for use with alloc_percpu */
3212  struct trace_buffer_struct {
3213  	int nesting;
3214  	char buffer[4][TRACE_BUF_SIZE];
3215  };
3216  
3217  static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3218  
3219  /*
3220   * This allows for lockless recording.  If we're nested too deeply, then
3221   * this returns NULL.
3222   */
get_trace_buf(void)3223  static char *get_trace_buf(void)
3224  {
3225  	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3226  
3227  	if (!trace_percpu_buffer || buffer->nesting >= 4)
3228  		return NULL;
3229  
3230  	buffer->nesting++;
3231  
3232  	/* Interrupts must see nesting incremented before we use the buffer */
3233  	barrier();
3234  	return &buffer->buffer[buffer->nesting - 1][0];
3235  }
3236  
put_trace_buf(void)3237  static void put_trace_buf(void)
3238  {
3239  	/* Don't let the decrement of nesting leak before this */
3240  	barrier();
3241  	this_cpu_dec(trace_percpu_buffer->nesting);
3242  }
3243  
alloc_percpu_trace_buffer(void)3244  static int alloc_percpu_trace_buffer(void)
3245  {
3246  	struct trace_buffer_struct __percpu *buffers;
3247  
3248  	if (trace_percpu_buffer)
3249  		return 0;
3250  
3251  	buffers = alloc_percpu(struct trace_buffer_struct);
3252  	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3253  		return -ENOMEM;
3254  
3255  	trace_percpu_buffer = buffers;
3256  	return 0;
3257  }
3258  
3259  static int buffers_allocated;
3260  
trace_printk_init_buffers(void)3261  void trace_printk_init_buffers(void)
3262  {
3263  	if (buffers_allocated)
3264  		return;
3265  
3266  	if (alloc_percpu_trace_buffer())
3267  		return;
3268  
3269  	/* trace_printk() is for debug use only. Don't use it in production. */
3270  
3271  	pr_warn("\n");
3272  	pr_warn("**********************************************************\n");
3273  	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3274  	pr_warn("**                                                      **\n");
3275  	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3276  	pr_warn("**                                                      **\n");
3277  	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3278  	pr_warn("** unsafe for production use.                           **\n");
3279  	pr_warn("**                                                      **\n");
3280  	pr_warn("** If you see this message and you are not debugging    **\n");
3281  	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3282  	pr_warn("**                                                      **\n");
3283  	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3284  	pr_warn("**********************************************************\n");
3285  
3286  	/* Expand the buffers to set size */
3287  	tracing_update_buffers(&global_trace);
3288  
3289  	buffers_allocated = 1;
3290  
3291  	/*
3292  	 * trace_printk_init_buffers() can be called by modules.
3293  	 * If that happens, then we need to start cmdline recording
3294  	 * directly here. If the global_trace.buffer is already
3295  	 * allocated here, then this was called by module code.
3296  	 */
3297  	if (global_trace.array_buffer.buffer)
3298  		tracing_start_cmdline_record();
3299  }
3300  EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3301  
trace_printk_start_comm(void)3302  void trace_printk_start_comm(void)
3303  {
3304  	/* Start tracing comms if trace printk is set */
3305  	if (!buffers_allocated)
3306  		return;
3307  	tracing_start_cmdline_record();
3308  }
3309  
trace_printk_start_stop_comm(int enabled)3310  static void trace_printk_start_stop_comm(int enabled)
3311  {
3312  	if (!buffers_allocated)
3313  		return;
3314  
3315  	if (enabled)
3316  		tracing_start_cmdline_record();
3317  	else
3318  		tracing_stop_cmdline_record();
3319  }
3320  
3321  /**
3322   * trace_vbprintk - write binary msg to tracing buffer
3323   * @ip:    The address of the caller
3324   * @fmt:   The string format to write to the buffer
3325   * @args:  Arguments for @fmt
3326   */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3327  int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3328  {
3329  	struct ring_buffer_event *event;
3330  	struct trace_buffer *buffer;
3331  	struct trace_array *tr = READ_ONCE(printk_trace);
3332  	struct bprint_entry *entry;
3333  	unsigned int trace_ctx;
3334  	char *tbuffer;
3335  	int len = 0, size;
3336  
3337  	if (!printk_binsafe(tr))
3338  		return trace_vprintk(ip, fmt, args);
3339  
3340  	if (unlikely(tracing_selftest_running || tracing_disabled))
3341  		return 0;
3342  
3343  	/* Don't pollute graph traces with trace_vprintk internals */
3344  	pause_graph_tracing();
3345  
3346  	trace_ctx = tracing_gen_ctx();
3347  	guard(preempt_notrace)();
3348  
3349  	tbuffer = get_trace_buf();
3350  	if (!tbuffer) {
3351  		len = 0;
3352  		goto out_nobuffer;
3353  	}
3354  
3355  	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3356  
3357  	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3358  		goto out_put;
3359  
3360  	size = sizeof(*entry) + sizeof(u32) * len;
3361  	buffer = tr->array_buffer.buffer;
3362  	scoped_guard(ring_buffer_nest, buffer) {
3363  		event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3364  						    trace_ctx);
3365  		if (!event)
3366  			goto out_put;
3367  		entry = ring_buffer_event_data(event);
3368  		entry->ip			= ip;
3369  		entry->fmt			= fmt;
3370  
3371  		memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3372  		__buffer_unlock_commit(buffer, event);
3373  		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3374  	}
3375  out_put:
3376  	put_trace_buf();
3377  
3378  out_nobuffer:
3379  	unpause_graph_tracing();
3380  
3381  	return len;
3382  }
3383  EXPORT_SYMBOL_GPL(trace_vbprintk);
3384  
3385  static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3386  int __trace_array_vprintk(struct trace_buffer *buffer,
3387  			  unsigned long ip, const char *fmt, va_list args)
3388  {
3389  	struct ring_buffer_event *event;
3390  	int len = 0, size;
3391  	struct print_entry *entry;
3392  	unsigned int trace_ctx;
3393  	char *tbuffer;
3394  
3395  	if (tracing_disabled)
3396  		return 0;
3397  
3398  	/* Don't pollute graph traces with trace_vprintk internals */
3399  	pause_graph_tracing();
3400  
3401  	trace_ctx = tracing_gen_ctx();
3402  	guard(preempt_notrace)();
3403  
3404  
3405  	tbuffer = get_trace_buf();
3406  	if (!tbuffer) {
3407  		len = 0;
3408  		goto out_nobuffer;
3409  	}
3410  
3411  	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3412  
3413  	size = sizeof(*entry) + len + 1;
3414  	scoped_guard(ring_buffer_nest, buffer) {
3415  		event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3416  						    trace_ctx);
3417  		if (!event)
3418  			goto out;
3419  		entry = ring_buffer_event_data(event);
3420  		entry->ip = ip;
3421  
3422  		memcpy(&entry->buf, tbuffer, len + 1);
3423  		__buffer_unlock_commit(buffer, event);
3424  		ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3425  	}
3426  out:
3427  	put_trace_buf();
3428  
3429  out_nobuffer:
3430  	unpause_graph_tracing();
3431  
3432  	return len;
3433  }
3434  
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3435  int trace_array_vprintk(struct trace_array *tr,
3436  			unsigned long ip, const char *fmt, va_list args)
3437  {
3438  	if (tracing_selftest_running && tr == &global_trace)
3439  		return 0;
3440  
3441  	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3442  }
3443  
3444  /**
3445   * trace_array_printk - Print a message to a specific instance
3446   * @tr: The instance trace_array descriptor
3447   * @ip: The instruction pointer that this is called from.
3448   * @fmt: The format to print (printf format)
3449   *
3450   * If a subsystem sets up its own instance, they have the right to
3451   * printk strings into their tracing instance buffer using this
3452   * function. Note, this function will not write into the top level
3453   * buffer (use trace_printk() for that), as writing into the top level
3454   * buffer should only have events that can be individually disabled.
3455   * trace_printk() is only used for debugging a kernel, and should not
3456   * be ever incorporated in normal use.
3457   *
3458   * trace_array_printk() can be used, as it will not add noise to the
3459   * top level tracing buffer.
3460   *
3461   * Note, trace_array_init_printk() must be called on @tr before this
3462   * can be used.
3463   */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3464  int trace_array_printk(struct trace_array *tr,
3465  		       unsigned long ip, const char *fmt, ...)
3466  {
3467  	int ret;
3468  	va_list ap;
3469  
3470  	if (!tr)
3471  		return -ENOENT;
3472  
3473  	/* This is only allowed for created instances */
3474  	if (tr == &global_trace)
3475  		return 0;
3476  
3477  	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3478  		return 0;
3479  
3480  	va_start(ap, fmt);
3481  	ret = trace_array_vprintk(tr, ip, fmt, ap);
3482  	va_end(ap);
3483  	return ret;
3484  }
3485  EXPORT_SYMBOL_GPL(trace_array_printk);
3486  
3487  /**
3488   * trace_array_init_printk - Initialize buffers for trace_array_printk()
3489   * @tr: The trace array to initialize the buffers for
3490   *
3491   * As trace_array_printk() only writes into instances, they are OK to
3492   * have in the kernel (unlike trace_printk()). This needs to be called
3493   * before trace_array_printk() can be used on a trace_array.
3494   */
trace_array_init_printk(struct trace_array * tr)3495  int trace_array_init_printk(struct trace_array *tr)
3496  {
3497  	if (!tr)
3498  		return -ENOENT;
3499  
3500  	/* This is only allowed for created instances */
3501  	if (tr == &global_trace)
3502  		return -EINVAL;
3503  
3504  	return alloc_percpu_trace_buffer();
3505  }
3506  EXPORT_SYMBOL_GPL(trace_array_init_printk);
3507  
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3508  int trace_array_printk_buf(struct trace_buffer *buffer,
3509  			   unsigned long ip, const char *fmt, ...)
3510  {
3511  	int ret;
3512  	va_list ap;
3513  
3514  	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3515  		return 0;
3516  
3517  	va_start(ap, fmt);
3518  	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3519  	va_end(ap);
3520  	return ret;
3521  }
3522  
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3523  int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3524  {
3525  	return trace_array_vprintk(printk_trace, ip, fmt, args);
3526  }
3527  EXPORT_SYMBOL_GPL(trace_vprintk);
3528  
trace_iterator_increment(struct trace_iterator * iter)3529  static void trace_iterator_increment(struct trace_iterator *iter)
3530  {
3531  	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3532  
3533  	iter->idx++;
3534  	if (buf_iter)
3535  		ring_buffer_iter_advance(buf_iter);
3536  }
3537  
3538  static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3539  peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3540  		unsigned long *lost_events)
3541  {
3542  	struct ring_buffer_event *event;
3543  	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3544  
3545  	if (buf_iter) {
3546  		event = ring_buffer_iter_peek(buf_iter, ts);
3547  		if (lost_events)
3548  			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3549  				(unsigned long)-1 : 0;
3550  	} else {
3551  		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3552  					 lost_events);
3553  	}
3554  
3555  	if (event) {
3556  		iter->ent_size = ring_buffer_event_length(event);
3557  		return ring_buffer_event_data(event);
3558  	}
3559  	iter->ent_size = 0;
3560  	return NULL;
3561  }
3562  
3563  static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3564  __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3565  		  unsigned long *missing_events, u64 *ent_ts)
3566  {
3567  	struct trace_buffer *buffer = iter->array_buffer->buffer;
3568  	struct trace_entry *ent, *next = NULL;
3569  	unsigned long lost_events = 0, next_lost = 0;
3570  	int cpu_file = iter->cpu_file;
3571  	u64 next_ts = 0, ts;
3572  	int next_cpu = -1;
3573  	int next_size = 0;
3574  	int cpu;
3575  
3576  	/*
3577  	 * If we are in a per_cpu trace file, don't bother by iterating over
3578  	 * all cpu and peek directly.
3579  	 */
3580  	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3581  		if (ring_buffer_empty_cpu(buffer, cpu_file))
3582  			return NULL;
3583  		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3584  		if (ent_cpu)
3585  			*ent_cpu = cpu_file;
3586  
3587  		return ent;
3588  	}
3589  
3590  	for_each_tracing_cpu(cpu) {
3591  
3592  		if (ring_buffer_empty_cpu(buffer, cpu))
3593  			continue;
3594  
3595  		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3596  
3597  		/*
3598  		 * Pick the entry with the smallest timestamp:
3599  		 */
3600  		if (ent && (!next || ts < next_ts)) {
3601  			next = ent;
3602  			next_cpu = cpu;
3603  			next_ts = ts;
3604  			next_lost = lost_events;
3605  			next_size = iter->ent_size;
3606  		}
3607  	}
3608  
3609  	iter->ent_size = next_size;
3610  
3611  	if (ent_cpu)
3612  		*ent_cpu = next_cpu;
3613  
3614  	if (ent_ts)
3615  		*ent_ts = next_ts;
3616  
3617  	if (missing_events)
3618  		*missing_events = next_lost;
3619  
3620  	return next;
3621  }
3622  
3623  #define STATIC_FMT_BUF_SIZE	128
3624  static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3625  
trace_iter_expand_format(struct trace_iterator * iter)3626  char *trace_iter_expand_format(struct trace_iterator *iter)
3627  {
3628  	char *tmp;
3629  
3630  	/*
3631  	 * iter->tr is NULL when used with tp_printk, which makes
3632  	 * this get called where it is not safe to call krealloc().
3633  	 */
3634  	if (!iter->tr || iter->fmt == static_fmt_buf)
3635  		return NULL;
3636  
3637  	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3638  		       GFP_KERNEL);
3639  	if (tmp) {
3640  		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3641  		iter->fmt = tmp;
3642  	}
3643  
3644  	return tmp;
3645  }
3646  
3647  /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3648  static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3649  {
3650  	unsigned long addr = (unsigned long)str;
3651  	struct trace_event *trace_event;
3652  	struct trace_event_call *event;
3653  
3654  	/* OK if part of the event data */
3655  	if ((addr >= (unsigned long)iter->ent) &&
3656  	    (addr < (unsigned long)iter->ent + iter->ent_size))
3657  		return true;
3658  
3659  	/* OK if part of the temp seq buffer */
3660  	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3661  	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3662  		return true;
3663  
3664  	/* Core rodata can not be freed */
3665  	if (is_kernel_rodata(addr))
3666  		return true;
3667  
3668  	if (trace_is_tracepoint_string(str))
3669  		return true;
3670  
3671  	/*
3672  	 * Now this could be a module event, referencing core module
3673  	 * data, which is OK.
3674  	 */
3675  	if (!iter->ent)
3676  		return false;
3677  
3678  	trace_event = ftrace_find_event(iter->ent->type);
3679  	if (!trace_event)
3680  		return false;
3681  
3682  	event = container_of(trace_event, struct trace_event_call, event);
3683  	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3684  		return false;
3685  
3686  	/* Would rather have rodata, but this will suffice */
3687  	if (within_module_core(addr, event->module))
3688  		return true;
3689  
3690  	return false;
3691  }
3692  
3693  /**
3694   * ignore_event - Check dereferenced fields while writing to the seq buffer
3695   * @iter: The iterator that holds the seq buffer and the event being printed
3696   *
3697   * At boot up, test_event_printk() will flag any event that dereferences
3698   * a string with "%s" that does exist in the ring buffer. It may still
3699   * be valid, as the string may point to a static string in the kernel
3700   * rodata that never gets freed. But if the string pointer is pointing
3701   * to something that was allocated, there's a chance that it can be freed
3702   * by the time the user reads the trace. This would cause a bad memory
3703   * access by the kernel and possibly crash the system.
3704   *
3705   * This function will check if the event has any fields flagged as needing
3706   * to be checked at runtime and perform those checks.
3707   *
3708   * If it is found that a field is unsafe, it will write into the @iter->seq
3709   * a message stating what was found to be unsafe.
3710   *
3711   * @return: true if the event is unsafe and should be ignored,
3712   *          false otherwise.
3713   */
ignore_event(struct trace_iterator * iter)3714  bool ignore_event(struct trace_iterator *iter)
3715  {
3716  	struct ftrace_event_field *field;
3717  	struct trace_event *trace_event;
3718  	struct trace_event_call *event;
3719  	struct list_head *head;
3720  	struct trace_seq *seq;
3721  	const void *ptr;
3722  
3723  	trace_event = ftrace_find_event(iter->ent->type);
3724  
3725  	seq = &iter->seq;
3726  
3727  	if (!trace_event) {
3728  		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3729  		return true;
3730  	}
3731  
3732  	event = container_of(trace_event, struct trace_event_call, event);
3733  	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3734  		return false;
3735  
3736  	head = trace_get_fields(event);
3737  	if (!head) {
3738  		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3739  				 trace_event_name(event));
3740  		return true;
3741  	}
3742  
3743  	/* Offsets are from the iter->ent that points to the raw event */
3744  	ptr = iter->ent;
3745  
3746  	list_for_each_entry(field, head, link) {
3747  		const char *str;
3748  		bool good;
3749  
3750  		if (!field->needs_test)
3751  			continue;
3752  
3753  		str = *(const char **)(ptr + field->offset);
3754  
3755  		good = trace_safe_str(iter, str);
3756  
3757  		/*
3758  		 * If you hit this warning, it is likely that the
3759  		 * trace event in question used %s on a string that
3760  		 * was saved at the time of the event, but may not be
3761  		 * around when the trace is read. Use __string(),
3762  		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3763  		 * instead. See samples/trace_events/trace-events-sample.h
3764  		 * for reference.
3765  		 */
3766  		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3767  			      trace_event_name(event), field->name)) {
3768  			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3769  					 trace_event_name(event), field->name);
3770  			return true;
3771  		}
3772  	}
3773  	return false;
3774  }
3775  
trace_event_format(struct trace_iterator * iter,const char * fmt)3776  const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3777  {
3778  	const char *p, *new_fmt;
3779  	char *q;
3780  
3781  	if (WARN_ON_ONCE(!fmt))
3782  		return fmt;
3783  
3784  	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3785  		return fmt;
3786  
3787  	p = fmt;
3788  	new_fmt = q = iter->fmt;
3789  	while (*p) {
3790  		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3791  			if (!trace_iter_expand_format(iter))
3792  				return fmt;
3793  
3794  			q += iter->fmt - new_fmt;
3795  			new_fmt = iter->fmt;
3796  		}
3797  
3798  		*q++ = *p++;
3799  
3800  		/* Replace %p with %px */
3801  		if (p[-1] == '%') {
3802  			if (p[0] == '%') {
3803  				*q++ = *p++;
3804  			} else if (p[0] == 'p' && !isalnum(p[1])) {
3805  				*q++ = *p++;
3806  				*q++ = 'x';
3807  			}
3808  		}
3809  	}
3810  	*q = '\0';
3811  
3812  	return new_fmt;
3813  }
3814  
3815  #define STATIC_TEMP_BUF_SIZE	128
3816  static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3817  
3818  /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3819  struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3820  					  int *ent_cpu, u64 *ent_ts)
3821  {
3822  	/* __find_next_entry will reset ent_size */
3823  	int ent_size = iter->ent_size;
3824  	struct trace_entry *entry;
3825  
3826  	/*
3827  	 * If called from ftrace_dump(), then the iter->temp buffer
3828  	 * will be the static_temp_buf and not created from kmalloc.
3829  	 * If the entry size is greater than the buffer, we can
3830  	 * not save it. Just return NULL in that case. This is only
3831  	 * used to add markers when two consecutive events' time
3832  	 * stamps have a large delta. See trace_print_lat_context()
3833  	 */
3834  	if (iter->temp == static_temp_buf &&
3835  	    STATIC_TEMP_BUF_SIZE < ent_size)
3836  		return NULL;
3837  
3838  	/*
3839  	 * The __find_next_entry() may call peek_next_entry(), which may
3840  	 * call ring_buffer_peek() that may make the contents of iter->ent
3841  	 * undefined. Need to copy iter->ent now.
3842  	 */
3843  	if (iter->ent && iter->ent != iter->temp) {
3844  		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3845  		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3846  			void *temp;
3847  			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3848  			if (!temp)
3849  				return NULL;
3850  			kfree(iter->temp);
3851  			iter->temp = temp;
3852  			iter->temp_size = iter->ent_size;
3853  		}
3854  		memcpy(iter->temp, iter->ent, iter->ent_size);
3855  		iter->ent = iter->temp;
3856  	}
3857  	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3858  	/* Put back the original ent_size */
3859  	iter->ent_size = ent_size;
3860  
3861  	return entry;
3862  }
3863  
3864  /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3865  void *trace_find_next_entry_inc(struct trace_iterator *iter)
3866  {
3867  	iter->ent = __find_next_entry(iter, &iter->cpu,
3868  				      &iter->lost_events, &iter->ts);
3869  
3870  	if (iter->ent)
3871  		trace_iterator_increment(iter);
3872  
3873  	return iter->ent ? iter : NULL;
3874  }
3875  
trace_consume(struct trace_iterator * iter)3876  static void trace_consume(struct trace_iterator *iter)
3877  {
3878  	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3879  			    &iter->lost_events);
3880  }
3881  
s_next(struct seq_file * m,void * v,loff_t * pos)3882  static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3883  {
3884  	struct trace_iterator *iter = m->private;
3885  	int i = (int)*pos;
3886  	void *ent;
3887  
3888  	WARN_ON_ONCE(iter->leftover);
3889  
3890  	(*pos)++;
3891  
3892  	/* can't go backwards */
3893  	if (iter->idx > i)
3894  		return NULL;
3895  
3896  	if (iter->idx < 0)
3897  		ent = trace_find_next_entry_inc(iter);
3898  	else
3899  		ent = iter;
3900  
3901  	while (ent && iter->idx < i)
3902  		ent = trace_find_next_entry_inc(iter);
3903  
3904  	iter->pos = *pos;
3905  
3906  	return ent;
3907  }
3908  
tracing_iter_reset(struct trace_iterator * iter,int cpu)3909  void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3910  {
3911  	struct ring_buffer_iter *buf_iter;
3912  	unsigned long entries = 0;
3913  	u64 ts;
3914  
3915  	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3916  
3917  	buf_iter = trace_buffer_iter(iter, cpu);
3918  	if (!buf_iter)
3919  		return;
3920  
3921  	ring_buffer_iter_reset(buf_iter);
3922  
3923  	/*
3924  	 * We could have the case with the max latency tracers
3925  	 * that a reset never took place on a cpu. This is evident
3926  	 * by the timestamp being before the start of the buffer.
3927  	 */
3928  	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3929  		if (ts >= iter->array_buffer->time_start)
3930  			break;
3931  		entries++;
3932  		ring_buffer_iter_advance(buf_iter);
3933  		/* This could be a big loop */
3934  		cond_resched();
3935  	}
3936  
3937  	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3938  }
3939  
3940  /*
3941   * The current tracer is copied to avoid a global locking
3942   * all around.
3943   */
s_start(struct seq_file * m,loff_t * pos)3944  static void *s_start(struct seq_file *m, loff_t *pos)
3945  {
3946  	struct trace_iterator *iter = m->private;
3947  	struct trace_array *tr = iter->tr;
3948  	int cpu_file = iter->cpu_file;
3949  	void *p = NULL;
3950  	loff_t l = 0;
3951  	int cpu;
3952  
3953  	mutex_lock(&trace_types_lock);
3954  	if (unlikely(tr->current_trace != iter->trace)) {
3955  		/* Close iter->trace before switching to the new current tracer */
3956  		if (iter->trace->close)
3957  			iter->trace->close(iter);
3958  		iter->trace = tr->current_trace;
3959  		/* Reopen the new current tracer */
3960  		if (iter->trace->open)
3961  			iter->trace->open(iter);
3962  	}
3963  	mutex_unlock(&trace_types_lock);
3964  
3965  #ifdef CONFIG_TRACER_MAX_TRACE
3966  	if (iter->snapshot && iter->trace->use_max_tr)
3967  		return ERR_PTR(-EBUSY);
3968  #endif
3969  
3970  	if (*pos != iter->pos) {
3971  		iter->ent = NULL;
3972  		iter->cpu = 0;
3973  		iter->idx = -1;
3974  
3975  		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3976  			for_each_tracing_cpu(cpu)
3977  				tracing_iter_reset(iter, cpu);
3978  		} else
3979  			tracing_iter_reset(iter, cpu_file);
3980  
3981  		iter->leftover = 0;
3982  		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3983  			;
3984  
3985  	} else {
3986  		/*
3987  		 * If we overflowed the seq_file before, then we want
3988  		 * to just reuse the trace_seq buffer again.
3989  		 */
3990  		if (iter->leftover)
3991  			p = iter;
3992  		else {
3993  			l = *pos - 1;
3994  			p = s_next(m, p, &l);
3995  		}
3996  	}
3997  
3998  	trace_event_read_lock();
3999  	trace_access_lock(cpu_file);
4000  	return p;
4001  }
4002  
s_stop(struct seq_file * m,void * p)4003  static void s_stop(struct seq_file *m, void *p)
4004  {
4005  	struct trace_iterator *iter = m->private;
4006  
4007  #ifdef CONFIG_TRACER_MAX_TRACE
4008  	if (iter->snapshot && iter->trace->use_max_tr)
4009  		return;
4010  #endif
4011  
4012  	trace_access_unlock(iter->cpu_file);
4013  	trace_event_read_unlock();
4014  }
4015  
4016  static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4017  get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4018  		      unsigned long *entries, int cpu)
4019  {
4020  	unsigned long count;
4021  
4022  	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4023  	/*
4024  	 * If this buffer has skipped entries, then we hold all
4025  	 * entries for the trace and we need to ignore the
4026  	 * ones before the time stamp.
4027  	 */
4028  	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4029  		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4030  		/* total is the same as the entries */
4031  		*total = count;
4032  	} else
4033  		*total = count +
4034  			ring_buffer_overrun_cpu(buf->buffer, cpu);
4035  	*entries = count;
4036  }
4037  
4038  static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4039  get_total_entries(struct array_buffer *buf,
4040  		  unsigned long *total, unsigned long *entries)
4041  {
4042  	unsigned long t, e;
4043  	int cpu;
4044  
4045  	*total = 0;
4046  	*entries = 0;
4047  
4048  	for_each_tracing_cpu(cpu) {
4049  		get_total_entries_cpu(buf, &t, &e, cpu);
4050  		*total += t;
4051  		*entries += e;
4052  	}
4053  }
4054  
trace_total_entries_cpu(struct trace_array * tr,int cpu)4055  unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4056  {
4057  	unsigned long total, entries;
4058  
4059  	if (!tr)
4060  		tr = &global_trace;
4061  
4062  	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4063  
4064  	return entries;
4065  }
4066  
trace_total_entries(struct trace_array * tr)4067  unsigned long trace_total_entries(struct trace_array *tr)
4068  {
4069  	unsigned long total, entries;
4070  
4071  	if (!tr)
4072  		tr = &global_trace;
4073  
4074  	get_total_entries(&tr->array_buffer, &total, &entries);
4075  
4076  	return entries;
4077  }
4078  
print_lat_help_header(struct seq_file * m)4079  static void print_lat_help_header(struct seq_file *m)
4080  {
4081  	seq_puts(m, "#                    _------=> CPU#            \n"
4082  		    "#                   / _-----=> irqs-off/BH-disabled\n"
4083  		    "#                  | / _----=> need-resched    \n"
4084  		    "#                  || / _---=> hardirq/softirq \n"
4085  		    "#                  ||| / _--=> preempt-depth   \n"
4086  		    "#                  |||| / _-=> migrate-disable \n"
4087  		    "#                  ||||| /     delay           \n"
4088  		    "#  cmd     pid     |||||| time  |   caller     \n"
4089  		    "#     \\   /        ||||||  \\    |    /       \n");
4090  }
4091  
print_event_info(struct array_buffer * buf,struct seq_file * m)4092  static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4093  {
4094  	unsigned long total;
4095  	unsigned long entries;
4096  
4097  	get_total_entries(buf, &total, &entries);
4098  	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4099  		   entries, total, num_online_cpus());
4100  	seq_puts(m, "#\n");
4101  }
4102  
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4103  static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4104  				   unsigned int flags)
4105  {
4106  	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4107  
4108  	print_event_info(buf, m);
4109  
4110  	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4111  	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4112  }
4113  
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4114  static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4115  				       unsigned int flags)
4116  {
4117  	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4118  	static const char space[] = "            ";
4119  	int prec = tgid ? 12 : 2;
4120  
4121  	print_event_info(buf, m);
4122  
4123  	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4124  	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4125  	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4126  	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4127  	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4128  	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4129  	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4130  	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4131  }
4132  
4133  void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4134  print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4135  {
4136  	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4137  	struct array_buffer *buf = iter->array_buffer;
4138  	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4139  	struct tracer *type = iter->trace;
4140  	unsigned long entries;
4141  	unsigned long total;
4142  	const char *name = type->name;
4143  
4144  	get_total_entries(buf, &total, &entries);
4145  
4146  	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4147  		   name, init_utsname()->release);
4148  	seq_puts(m, "# -----------------------------------"
4149  		 "---------------------------------\n");
4150  	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4151  		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4152  		   nsecs_to_usecs(data->saved_latency),
4153  		   entries,
4154  		   total,
4155  		   buf->cpu,
4156  		   preempt_model_str(),
4157  		   /* These are reserved for later use */
4158  		   0, 0, 0, 0);
4159  #ifdef CONFIG_SMP
4160  	seq_printf(m, " #P:%d)\n", num_online_cpus());
4161  #else
4162  	seq_puts(m, ")\n");
4163  #endif
4164  	seq_puts(m, "#    -----------------\n");
4165  	seq_printf(m, "#    | task: %.16s-%d "
4166  		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4167  		   data->comm, data->pid,
4168  		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4169  		   data->policy, data->rt_priority);
4170  	seq_puts(m, "#    -----------------\n");
4171  
4172  	if (data->critical_start) {
4173  		seq_puts(m, "#  => started at: ");
4174  		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4175  		trace_print_seq(m, &iter->seq);
4176  		seq_puts(m, "\n#  => ended at:   ");
4177  		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4178  		trace_print_seq(m, &iter->seq);
4179  		seq_puts(m, "\n#\n");
4180  	}
4181  
4182  	seq_puts(m, "#\n");
4183  }
4184  
test_cpu_buff_start(struct trace_iterator * iter)4185  static void test_cpu_buff_start(struct trace_iterator *iter)
4186  {
4187  	struct trace_seq *s = &iter->seq;
4188  	struct trace_array *tr = iter->tr;
4189  
4190  	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4191  		return;
4192  
4193  	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4194  		return;
4195  
4196  	if (cpumask_available(iter->started) &&
4197  	    cpumask_test_cpu(iter->cpu, iter->started))
4198  		return;
4199  
4200  	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4201  		return;
4202  
4203  	if (cpumask_available(iter->started))
4204  		cpumask_set_cpu(iter->cpu, iter->started);
4205  
4206  	/* Don't print started cpu buffer for the first entry of the trace */
4207  	if (iter->idx > 1)
4208  		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4209  				iter->cpu);
4210  }
4211  
print_trace_fmt(struct trace_iterator * iter)4212  static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4213  {
4214  	struct trace_array *tr = iter->tr;
4215  	struct trace_seq *s = &iter->seq;
4216  	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4217  	struct trace_entry *entry;
4218  	struct trace_event *event;
4219  
4220  	entry = iter->ent;
4221  
4222  	test_cpu_buff_start(iter);
4223  
4224  	event = ftrace_find_event(entry->type);
4225  
4226  	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4227  		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4228  			trace_print_lat_context(iter);
4229  		else
4230  			trace_print_context(iter);
4231  	}
4232  
4233  	if (trace_seq_has_overflowed(s))
4234  		return TRACE_TYPE_PARTIAL_LINE;
4235  
4236  	if (event) {
4237  		if (tr->trace_flags & TRACE_ITER_FIELDS)
4238  			return print_event_fields(iter, event);
4239  		/*
4240  		 * For TRACE_EVENT() events, the print_fmt is not
4241  		 * safe to use if the array has delta offsets
4242  		 * Force printing via the fields.
4243  		 */
4244  		if ((tr->text_delta) &&
4245  		    event->type > __TRACE_LAST_TYPE)
4246  			return print_event_fields(iter, event);
4247  
4248  		return event->funcs->trace(iter, sym_flags, event);
4249  	}
4250  
4251  	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4252  
4253  	return trace_handle_return(s);
4254  }
4255  
print_raw_fmt(struct trace_iterator * iter)4256  static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4257  {
4258  	struct trace_array *tr = iter->tr;
4259  	struct trace_seq *s = &iter->seq;
4260  	struct trace_entry *entry;
4261  	struct trace_event *event;
4262  
4263  	entry = iter->ent;
4264  
4265  	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4266  		trace_seq_printf(s, "%d %d %llu ",
4267  				 entry->pid, iter->cpu, iter->ts);
4268  
4269  	if (trace_seq_has_overflowed(s))
4270  		return TRACE_TYPE_PARTIAL_LINE;
4271  
4272  	event = ftrace_find_event(entry->type);
4273  	if (event)
4274  		return event->funcs->raw(iter, 0, event);
4275  
4276  	trace_seq_printf(s, "%d ?\n", entry->type);
4277  
4278  	return trace_handle_return(s);
4279  }
4280  
print_hex_fmt(struct trace_iterator * iter)4281  static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4282  {
4283  	struct trace_array *tr = iter->tr;
4284  	struct trace_seq *s = &iter->seq;
4285  	unsigned char newline = '\n';
4286  	struct trace_entry *entry;
4287  	struct trace_event *event;
4288  
4289  	entry = iter->ent;
4290  
4291  	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4292  		SEQ_PUT_HEX_FIELD(s, entry->pid);
4293  		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4294  		SEQ_PUT_HEX_FIELD(s, iter->ts);
4295  		if (trace_seq_has_overflowed(s))
4296  			return TRACE_TYPE_PARTIAL_LINE;
4297  	}
4298  
4299  	event = ftrace_find_event(entry->type);
4300  	if (event) {
4301  		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4302  		if (ret != TRACE_TYPE_HANDLED)
4303  			return ret;
4304  	}
4305  
4306  	SEQ_PUT_FIELD(s, newline);
4307  
4308  	return trace_handle_return(s);
4309  }
4310  
print_bin_fmt(struct trace_iterator * iter)4311  static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4312  {
4313  	struct trace_array *tr = iter->tr;
4314  	struct trace_seq *s = &iter->seq;
4315  	struct trace_entry *entry;
4316  	struct trace_event *event;
4317  
4318  	entry = iter->ent;
4319  
4320  	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4321  		SEQ_PUT_FIELD(s, entry->pid);
4322  		SEQ_PUT_FIELD(s, iter->cpu);
4323  		SEQ_PUT_FIELD(s, iter->ts);
4324  		if (trace_seq_has_overflowed(s))
4325  			return TRACE_TYPE_PARTIAL_LINE;
4326  	}
4327  
4328  	event = ftrace_find_event(entry->type);
4329  	return event ? event->funcs->binary(iter, 0, event) :
4330  		TRACE_TYPE_HANDLED;
4331  }
4332  
trace_empty(struct trace_iterator * iter)4333  int trace_empty(struct trace_iterator *iter)
4334  {
4335  	struct ring_buffer_iter *buf_iter;
4336  	int cpu;
4337  
4338  	/* If we are looking at one CPU buffer, only check that one */
4339  	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4340  		cpu = iter->cpu_file;
4341  		buf_iter = trace_buffer_iter(iter, cpu);
4342  		if (buf_iter) {
4343  			if (!ring_buffer_iter_empty(buf_iter))
4344  				return 0;
4345  		} else {
4346  			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4347  				return 0;
4348  		}
4349  		return 1;
4350  	}
4351  
4352  	for_each_tracing_cpu(cpu) {
4353  		buf_iter = trace_buffer_iter(iter, cpu);
4354  		if (buf_iter) {
4355  			if (!ring_buffer_iter_empty(buf_iter))
4356  				return 0;
4357  		} else {
4358  			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4359  				return 0;
4360  		}
4361  	}
4362  
4363  	return 1;
4364  }
4365  
4366  /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4367  enum print_line_t print_trace_line(struct trace_iterator *iter)
4368  {
4369  	struct trace_array *tr = iter->tr;
4370  	unsigned long trace_flags = tr->trace_flags;
4371  	enum print_line_t ret;
4372  
4373  	if (iter->lost_events) {
4374  		if (iter->lost_events == (unsigned long)-1)
4375  			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4376  					 iter->cpu);
4377  		else
4378  			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4379  					 iter->cpu, iter->lost_events);
4380  		if (trace_seq_has_overflowed(&iter->seq))
4381  			return TRACE_TYPE_PARTIAL_LINE;
4382  	}
4383  
4384  	if (iter->trace && iter->trace->print_line) {
4385  		ret = iter->trace->print_line(iter);
4386  		if (ret != TRACE_TYPE_UNHANDLED)
4387  			return ret;
4388  	}
4389  
4390  	if (iter->ent->type == TRACE_BPUTS &&
4391  			trace_flags & TRACE_ITER_PRINTK &&
4392  			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4393  		return trace_print_bputs_msg_only(iter);
4394  
4395  	if (iter->ent->type == TRACE_BPRINT &&
4396  			trace_flags & TRACE_ITER_PRINTK &&
4397  			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4398  		return trace_print_bprintk_msg_only(iter);
4399  
4400  	if (iter->ent->type == TRACE_PRINT &&
4401  			trace_flags & TRACE_ITER_PRINTK &&
4402  			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4403  		return trace_print_printk_msg_only(iter);
4404  
4405  	if (trace_flags & TRACE_ITER_BIN)
4406  		return print_bin_fmt(iter);
4407  
4408  	if (trace_flags & TRACE_ITER_HEX)
4409  		return print_hex_fmt(iter);
4410  
4411  	if (trace_flags & TRACE_ITER_RAW)
4412  		return print_raw_fmt(iter);
4413  
4414  	return print_trace_fmt(iter);
4415  }
4416  
trace_latency_header(struct seq_file * m)4417  void trace_latency_header(struct seq_file *m)
4418  {
4419  	struct trace_iterator *iter = m->private;
4420  	struct trace_array *tr = iter->tr;
4421  
4422  	/* print nothing if the buffers are empty */
4423  	if (trace_empty(iter))
4424  		return;
4425  
4426  	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4427  		print_trace_header(m, iter);
4428  
4429  	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4430  		print_lat_help_header(m);
4431  }
4432  
trace_default_header(struct seq_file * m)4433  void trace_default_header(struct seq_file *m)
4434  {
4435  	struct trace_iterator *iter = m->private;
4436  	struct trace_array *tr = iter->tr;
4437  	unsigned long trace_flags = tr->trace_flags;
4438  
4439  	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4440  		return;
4441  
4442  	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4443  		/* print nothing if the buffers are empty */
4444  		if (trace_empty(iter))
4445  			return;
4446  		print_trace_header(m, iter);
4447  		if (!(trace_flags & TRACE_ITER_VERBOSE))
4448  			print_lat_help_header(m);
4449  	} else {
4450  		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4451  			if (trace_flags & TRACE_ITER_IRQ_INFO)
4452  				print_func_help_header_irq(iter->array_buffer,
4453  							   m, trace_flags);
4454  			else
4455  				print_func_help_header(iter->array_buffer, m,
4456  						       trace_flags);
4457  		}
4458  	}
4459  }
4460  
test_ftrace_alive(struct seq_file * m)4461  static void test_ftrace_alive(struct seq_file *m)
4462  {
4463  	if (!ftrace_is_dead())
4464  		return;
4465  	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4466  		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4467  }
4468  
4469  #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4470  static void show_snapshot_main_help(struct seq_file *m)
4471  {
4472  	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4473  		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4474  		    "#                      Takes a snapshot of the main buffer.\n"
4475  		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4476  		    "#                      (Doesn't have to be '2' works with any number that\n"
4477  		    "#                       is not a '0' or '1')\n");
4478  }
4479  
show_snapshot_percpu_help(struct seq_file * m)4480  static void show_snapshot_percpu_help(struct seq_file *m)
4481  {
4482  	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4483  #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4484  	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4485  		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4486  #else
4487  	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4488  		    "#                     Must use main snapshot file to allocate.\n");
4489  #endif
4490  	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4491  		    "#                      (Doesn't have to be '2' works with any number that\n"
4492  		    "#                       is not a '0' or '1')\n");
4493  }
4494  
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4495  static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4496  {
4497  	if (iter->tr->allocated_snapshot)
4498  		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4499  	else
4500  		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4501  
4502  	seq_puts(m, "# Snapshot commands:\n");
4503  	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4504  		show_snapshot_main_help(m);
4505  	else
4506  		show_snapshot_percpu_help(m);
4507  }
4508  #else
4509  /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4510  static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4511  #endif
4512  
s_show(struct seq_file * m,void * v)4513  static int s_show(struct seq_file *m, void *v)
4514  {
4515  	struct trace_iterator *iter = v;
4516  	int ret;
4517  
4518  	if (iter->ent == NULL) {
4519  		if (iter->tr) {
4520  			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4521  			seq_puts(m, "#\n");
4522  			test_ftrace_alive(m);
4523  		}
4524  		if (iter->snapshot && trace_empty(iter))
4525  			print_snapshot_help(m, iter);
4526  		else if (iter->trace && iter->trace->print_header)
4527  			iter->trace->print_header(m);
4528  		else
4529  			trace_default_header(m);
4530  
4531  	} else if (iter->leftover) {
4532  		/*
4533  		 * If we filled the seq_file buffer earlier, we
4534  		 * want to just show it now.
4535  		 */
4536  		ret = trace_print_seq(m, &iter->seq);
4537  
4538  		/* ret should this time be zero, but you never know */
4539  		iter->leftover = ret;
4540  
4541  	} else {
4542  		ret = print_trace_line(iter);
4543  		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4544  			iter->seq.full = 0;
4545  			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4546  		}
4547  		ret = trace_print_seq(m, &iter->seq);
4548  		/*
4549  		 * If we overflow the seq_file buffer, then it will
4550  		 * ask us for this data again at start up.
4551  		 * Use that instead.
4552  		 *  ret is 0 if seq_file write succeeded.
4553  		 *        -1 otherwise.
4554  		 */
4555  		iter->leftover = ret;
4556  	}
4557  
4558  	return 0;
4559  }
4560  
4561  /*
4562   * Should be used after trace_array_get(), trace_types_lock
4563   * ensures that i_cdev was already initialized.
4564   */
tracing_get_cpu(struct inode * inode)4565  static inline int tracing_get_cpu(struct inode *inode)
4566  {
4567  	if (inode->i_cdev) /* See trace_create_cpu_file() */
4568  		return (long)inode->i_cdev - 1;
4569  	return RING_BUFFER_ALL_CPUS;
4570  }
4571  
4572  static const struct seq_operations tracer_seq_ops = {
4573  	.start		= s_start,
4574  	.next		= s_next,
4575  	.stop		= s_stop,
4576  	.show		= s_show,
4577  };
4578  
4579  /*
4580   * Note, as iter itself can be allocated and freed in different
4581   * ways, this function is only used to free its content, and not
4582   * the iterator itself. The only requirement to all the allocations
4583   * is that it must zero all fields (kzalloc), as freeing works with
4584   * ethier allocated content or NULL.
4585   */
free_trace_iter_content(struct trace_iterator * iter)4586  static void free_trace_iter_content(struct trace_iterator *iter)
4587  {
4588  	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4589  	if (iter->fmt != static_fmt_buf)
4590  		kfree(iter->fmt);
4591  
4592  	kfree(iter->temp);
4593  	kfree(iter->buffer_iter);
4594  	mutex_destroy(&iter->mutex);
4595  	free_cpumask_var(iter->started);
4596  }
4597  
4598  static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4599  __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4600  {
4601  	struct trace_array *tr = inode->i_private;
4602  	struct trace_iterator *iter;
4603  	int cpu;
4604  
4605  	if (tracing_disabled)
4606  		return ERR_PTR(-ENODEV);
4607  
4608  	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4609  	if (!iter)
4610  		return ERR_PTR(-ENOMEM);
4611  
4612  	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4613  				    GFP_KERNEL);
4614  	if (!iter->buffer_iter)
4615  		goto release;
4616  
4617  	/*
4618  	 * trace_find_next_entry() may need to save off iter->ent.
4619  	 * It will place it into the iter->temp buffer. As most
4620  	 * events are less than 128, allocate a buffer of that size.
4621  	 * If one is greater, then trace_find_next_entry() will
4622  	 * allocate a new buffer to adjust for the bigger iter->ent.
4623  	 * It's not critical if it fails to get allocated here.
4624  	 */
4625  	iter->temp = kmalloc(128, GFP_KERNEL);
4626  	if (iter->temp)
4627  		iter->temp_size = 128;
4628  
4629  	/*
4630  	 * trace_event_printf() may need to modify given format
4631  	 * string to replace %p with %px so that it shows real address
4632  	 * instead of hash value. However, that is only for the event
4633  	 * tracing, other tracer may not need. Defer the allocation
4634  	 * until it is needed.
4635  	 */
4636  	iter->fmt = NULL;
4637  	iter->fmt_size = 0;
4638  
4639  	mutex_lock(&trace_types_lock);
4640  	iter->trace = tr->current_trace;
4641  
4642  	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4643  		goto fail;
4644  
4645  	iter->tr = tr;
4646  
4647  #ifdef CONFIG_TRACER_MAX_TRACE
4648  	/* Currently only the top directory has a snapshot */
4649  	if (tr->current_trace->print_max || snapshot)
4650  		iter->array_buffer = &tr->max_buffer;
4651  	else
4652  #endif
4653  		iter->array_buffer = &tr->array_buffer;
4654  	iter->snapshot = snapshot;
4655  	iter->pos = -1;
4656  	iter->cpu_file = tracing_get_cpu(inode);
4657  	mutex_init(&iter->mutex);
4658  
4659  	/* Notify the tracer early; before we stop tracing. */
4660  	if (iter->trace->open)
4661  		iter->trace->open(iter);
4662  
4663  	/* Annotate start of buffers if we had overruns */
4664  	if (ring_buffer_overruns(iter->array_buffer->buffer))
4665  		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4666  
4667  	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4668  	if (trace_clocks[tr->clock_id].in_ns)
4669  		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4670  
4671  	/*
4672  	 * If pause-on-trace is enabled, then stop the trace while
4673  	 * dumping, unless this is the "snapshot" file
4674  	 */
4675  	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4676  		tracing_stop_tr(tr);
4677  
4678  	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4679  		for_each_tracing_cpu(cpu) {
4680  			iter->buffer_iter[cpu] =
4681  				ring_buffer_read_start(iter->array_buffer->buffer,
4682  						       cpu, GFP_KERNEL);
4683  			tracing_iter_reset(iter, cpu);
4684  		}
4685  	} else {
4686  		cpu = iter->cpu_file;
4687  		iter->buffer_iter[cpu] =
4688  			ring_buffer_read_start(iter->array_buffer->buffer,
4689  					       cpu, GFP_KERNEL);
4690  		tracing_iter_reset(iter, cpu);
4691  	}
4692  
4693  	mutex_unlock(&trace_types_lock);
4694  
4695  	return iter;
4696  
4697   fail:
4698  	mutex_unlock(&trace_types_lock);
4699  	free_trace_iter_content(iter);
4700  release:
4701  	seq_release_private(inode, file);
4702  	return ERR_PTR(-ENOMEM);
4703  }
4704  
tracing_open_generic(struct inode * inode,struct file * filp)4705  int tracing_open_generic(struct inode *inode, struct file *filp)
4706  {
4707  	int ret;
4708  
4709  	ret = tracing_check_open_get_tr(NULL);
4710  	if (ret)
4711  		return ret;
4712  
4713  	filp->private_data = inode->i_private;
4714  	return 0;
4715  }
4716  
tracing_is_disabled(void)4717  bool tracing_is_disabled(void)
4718  {
4719  	return (tracing_disabled) ? true: false;
4720  }
4721  
4722  /*
4723   * Open and update trace_array ref count.
4724   * Must have the current trace_array passed to it.
4725   */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4726  int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4727  {
4728  	struct trace_array *tr = inode->i_private;
4729  	int ret;
4730  
4731  	ret = tracing_check_open_get_tr(tr);
4732  	if (ret)
4733  		return ret;
4734  
4735  	filp->private_data = inode->i_private;
4736  
4737  	return 0;
4738  }
4739  
4740  /*
4741   * The private pointer of the inode is the trace_event_file.
4742   * Update the tr ref count associated to it.
4743   */
tracing_open_file_tr(struct inode * inode,struct file * filp)4744  int tracing_open_file_tr(struct inode *inode, struct file *filp)
4745  {
4746  	struct trace_event_file *file = inode->i_private;
4747  	int ret;
4748  
4749  	ret = tracing_check_open_get_tr(file->tr);
4750  	if (ret)
4751  		return ret;
4752  
4753  	guard(mutex)(&event_mutex);
4754  
4755  	/* Fail if the file is marked for removal */
4756  	if (file->flags & EVENT_FILE_FL_FREED) {
4757  		trace_array_put(file->tr);
4758  		return -ENODEV;
4759  	} else {
4760  		event_file_get(file);
4761  	}
4762  
4763  	filp->private_data = inode->i_private;
4764  
4765  	return 0;
4766  }
4767  
tracing_release_file_tr(struct inode * inode,struct file * filp)4768  int tracing_release_file_tr(struct inode *inode, struct file *filp)
4769  {
4770  	struct trace_event_file *file = inode->i_private;
4771  
4772  	trace_array_put(file->tr);
4773  	event_file_put(file);
4774  
4775  	return 0;
4776  }
4777  
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4778  int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4779  {
4780  	tracing_release_file_tr(inode, filp);
4781  	return single_release(inode, filp);
4782  }
4783  
tracing_mark_open(struct inode * inode,struct file * filp)4784  static int tracing_mark_open(struct inode *inode, struct file *filp)
4785  {
4786  	stream_open(inode, filp);
4787  	return tracing_open_generic_tr(inode, filp);
4788  }
4789  
tracing_release(struct inode * inode,struct file * file)4790  static int tracing_release(struct inode *inode, struct file *file)
4791  {
4792  	struct trace_array *tr = inode->i_private;
4793  	struct seq_file *m = file->private_data;
4794  	struct trace_iterator *iter;
4795  	int cpu;
4796  
4797  	if (!(file->f_mode & FMODE_READ)) {
4798  		trace_array_put(tr);
4799  		return 0;
4800  	}
4801  
4802  	/* Writes do not use seq_file */
4803  	iter = m->private;
4804  	mutex_lock(&trace_types_lock);
4805  
4806  	for_each_tracing_cpu(cpu) {
4807  		if (iter->buffer_iter[cpu])
4808  			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4809  	}
4810  
4811  	if (iter->trace && iter->trace->close)
4812  		iter->trace->close(iter);
4813  
4814  	if (!iter->snapshot && tr->stop_count)
4815  		/* reenable tracing if it was previously enabled */
4816  		tracing_start_tr(tr);
4817  
4818  	__trace_array_put(tr);
4819  
4820  	mutex_unlock(&trace_types_lock);
4821  
4822  	free_trace_iter_content(iter);
4823  	seq_release_private(inode, file);
4824  
4825  	return 0;
4826  }
4827  
tracing_release_generic_tr(struct inode * inode,struct file * file)4828  int tracing_release_generic_tr(struct inode *inode, struct file *file)
4829  {
4830  	struct trace_array *tr = inode->i_private;
4831  
4832  	trace_array_put(tr);
4833  	return 0;
4834  }
4835  
tracing_single_release_tr(struct inode * inode,struct file * file)4836  static int tracing_single_release_tr(struct inode *inode, struct file *file)
4837  {
4838  	struct trace_array *tr = inode->i_private;
4839  
4840  	trace_array_put(tr);
4841  
4842  	return single_release(inode, file);
4843  }
4844  
tracing_open(struct inode * inode,struct file * file)4845  static int tracing_open(struct inode *inode, struct file *file)
4846  {
4847  	struct trace_array *tr = inode->i_private;
4848  	struct trace_iterator *iter;
4849  	int ret;
4850  
4851  	ret = tracing_check_open_get_tr(tr);
4852  	if (ret)
4853  		return ret;
4854  
4855  	/* If this file was open for write, then erase contents */
4856  	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4857  		int cpu = tracing_get_cpu(inode);
4858  		struct array_buffer *trace_buf = &tr->array_buffer;
4859  
4860  #ifdef CONFIG_TRACER_MAX_TRACE
4861  		if (tr->current_trace->print_max)
4862  			trace_buf = &tr->max_buffer;
4863  #endif
4864  
4865  		if (cpu == RING_BUFFER_ALL_CPUS)
4866  			tracing_reset_online_cpus(trace_buf);
4867  		else
4868  			tracing_reset_cpu(trace_buf, cpu);
4869  	}
4870  
4871  	if (file->f_mode & FMODE_READ) {
4872  		iter = __tracing_open(inode, file, false);
4873  		if (IS_ERR(iter))
4874  			ret = PTR_ERR(iter);
4875  		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4876  			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4877  	}
4878  
4879  	if (ret < 0)
4880  		trace_array_put(tr);
4881  
4882  	return ret;
4883  }
4884  
4885  /*
4886   * Some tracers are not suitable for instance buffers.
4887   * A tracer is always available for the global array (toplevel)
4888   * or if it explicitly states that it is.
4889   */
4890  static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4891  trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4892  {
4893  #ifdef CONFIG_TRACER_SNAPSHOT
4894  	/* arrays with mapped buffer range do not have snapshots */
4895  	if (tr->range_addr_start && t->use_max_tr)
4896  		return false;
4897  #endif
4898  	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4899  }
4900  
4901  /* Find the next tracer that this trace array may use */
4902  static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4903  get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4904  {
4905  	while (t && !trace_ok_for_array(t, tr))
4906  		t = t->next;
4907  
4908  	return t;
4909  }
4910  
4911  static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4912  t_next(struct seq_file *m, void *v, loff_t *pos)
4913  {
4914  	struct trace_array *tr = m->private;
4915  	struct tracer *t = v;
4916  
4917  	(*pos)++;
4918  
4919  	if (t)
4920  		t = get_tracer_for_array(tr, t->next);
4921  
4922  	return t;
4923  }
4924  
t_start(struct seq_file * m,loff_t * pos)4925  static void *t_start(struct seq_file *m, loff_t *pos)
4926  {
4927  	struct trace_array *tr = m->private;
4928  	struct tracer *t;
4929  	loff_t l = 0;
4930  
4931  	mutex_lock(&trace_types_lock);
4932  
4933  	t = get_tracer_for_array(tr, trace_types);
4934  	for (; t && l < *pos; t = t_next(m, t, &l))
4935  			;
4936  
4937  	return t;
4938  }
4939  
t_stop(struct seq_file * m,void * p)4940  static void t_stop(struct seq_file *m, void *p)
4941  {
4942  	mutex_unlock(&trace_types_lock);
4943  }
4944  
t_show(struct seq_file * m,void * v)4945  static int t_show(struct seq_file *m, void *v)
4946  {
4947  	struct tracer *t = v;
4948  
4949  	if (!t)
4950  		return 0;
4951  
4952  	seq_puts(m, t->name);
4953  	if (t->next)
4954  		seq_putc(m, ' ');
4955  	else
4956  		seq_putc(m, '\n');
4957  
4958  	return 0;
4959  }
4960  
4961  static const struct seq_operations show_traces_seq_ops = {
4962  	.start		= t_start,
4963  	.next		= t_next,
4964  	.stop		= t_stop,
4965  	.show		= t_show,
4966  };
4967  
show_traces_open(struct inode * inode,struct file * file)4968  static int show_traces_open(struct inode *inode, struct file *file)
4969  {
4970  	struct trace_array *tr = inode->i_private;
4971  	struct seq_file *m;
4972  	int ret;
4973  
4974  	ret = tracing_check_open_get_tr(tr);
4975  	if (ret)
4976  		return ret;
4977  
4978  	ret = seq_open(file, &show_traces_seq_ops);
4979  	if (ret) {
4980  		trace_array_put(tr);
4981  		return ret;
4982  	}
4983  
4984  	m = file->private_data;
4985  	m->private = tr;
4986  
4987  	return 0;
4988  }
4989  
tracing_seq_release(struct inode * inode,struct file * file)4990  static int tracing_seq_release(struct inode *inode, struct file *file)
4991  {
4992  	struct trace_array *tr = inode->i_private;
4993  
4994  	trace_array_put(tr);
4995  	return seq_release(inode, file);
4996  }
4997  
4998  static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4999  tracing_write_stub(struct file *filp, const char __user *ubuf,
5000  		   size_t count, loff_t *ppos)
5001  {
5002  	return count;
5003  }
5004  
tracing_lseek(struct file * file,loff_t offset,int whence)5005  loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5006  {
5007  	int ret;
5008  
5009  	if (file->f_mode & FMODE_READ)
5010  		ret = seq_lseek(file, offset, whence);
5011  	else
5012  		file->f_pos = ret = 0;
5013  
5014  	return ret;
5015  }
5016  
5017  static const struct file_operations tracing_fops = {
5018  	.open		= tracing_open,
5019  	.read		= seq_read,
5020  	.read_iter	= seq_read_iter,
5021  	.splice_read	= copy_splice_read,
5022  	.write		= tracing_write_stub,
5023  	.llseek		= tracing_lseek,
5024  	.release	= tracing_release,
5025  };
5026  
5027  static const struct file_operations show_traces_fops = {
5028  	.open		= show_traces_open,
5029  	.read		= seq_read,
5030  	.llseek		= seq_lseek,
5031  	.release	= tracing_seq_release,
5032  };
5033  
5034  static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5035  tracing_cpumask_read(struct file *filp, char __user *ubuf,
5036  		     size_t count, loff_t *ppos)
5037  {
5038  	struct trace_array *tr = file_inode(filp)->i_private;
5039  	char *mask_str __free(kfree) = NULL;
5040  	int len;
5041  
5042  	len = snprintf(NULL, 0, "%*pb\n",
5043  		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5044  	mask_str = kmalloc(len, GFP_KERNEL);
5045  	if (!mask_str)
5046  		return -ENOMEM;
5047  
5048  	len = snprintf(mask_str, len, "%*pb\n",
5049  		       cpumask_pr_args(tr->tracing_cpumask));
5050  	if (len >= count)
5051  		return -EINVAL;
5052  
5053  	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5054  }
5055  
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5056  int tracing_set_cpumask(struct trace_array *tr,
5057  			cpumask_var_t tracing_cpumask_new)
5058  {
5059  	int cpu;
5060  
5061  	if (!tr)
5062  		return -EINVAL;
5063  
5064  	local_irq_disable();
5065  	arch_spin_lock(&tr->max_lock);
5066  	for_each_tracing_cpu(cpu) {
5067  		/*
5068  		 * Increase/decrease the disabled counter if we are
5069  		 * about to flip a bit in the cpumask:
5070  		 */
5071  		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5072  				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5073  			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5074  #ifdef CONFIG_TRACER_MAX_TRACE
5075  			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5076  #endif
5077  		}
5078  		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5079  				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5080  			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5081  #ifdef CONFIG_TRACER_MAX_TRACE
5082  			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5083  #endif
5084  		}
5085  	}
5086  	arch_spin_unlock(&tr->max_lock);
5087  	local_irq_enable();
5088  
5089  	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5090  
5091  	return 0;
5092  }
5093  
5094  static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5095  tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5096  		      size_t count, loff_t *ppos)
5097  {
5098  	struct trace_array *tr = file_inode(filp)->i_private;
5099  	cpumask_var_t tracing_cpumask_new;
5100  	int err;
5101  
5102  	if (count == 0 || count > KMALLOC_MAX_SIZE)
5103  		return -EINVAL;
5104  
5105  	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5106  		return -ENOMEM;
5107  
5108  	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5109  	if (err)
5110  		goto err_free;
5111  
5112  	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5113  	if (err)
5114  		goto err_free;
5115  
5116  	free_cpumask_var(tracing_cpumask_new);
5117  
5118  	return count;
5119  
5120  err_free:
5121  	free_cpumask_var(tracing_cpumask_new);
5122  
5123  	return err;
5124  }
5125  
5126  static const struct file_operations tracing_cpumask_fops = {
5127  	.open		= tracing_open_generic_tr,
5128  	.read		= tracing_cpumask_read,
5129  	.write		= tracing_cpumask_write,
5130  	.release	= tracing_release_generic_tr,
5131  	.llseek		= generic_file_llseek,
5132  };
5133  
tracing_trace_options_show(struct seq_file * m,void * v)5134  static int tracing_trace_options_show(struct seq_file *m, void *v)
5135  {
5136  	struct tracer_opt *trace_opts;
5137  	struct trace_array *tr = m->private;
5138  	u32 tracer_flags;
5139  	int i;
5140  
5141  	guard(mutex)(&trace_types_lock);
5142  
5143  	tracer_flags = tr->current_trace->flags->val;
5144  	trace_opts = tr->current_trace->flags->opts;
5145  
5146  	for (i = 0; trace_options[i]; i++) {
5147  		if (tr->trace_flags & (1 << i))
5148  			seq_printf(m, "%s\n", trace_options[i]);
5149  		else
5150  			seq_printf(m, "no%s\n", trace_options[i]);
5151  	}
5152  
5153  	for (i = 0; trace_opts[i].name; i++) {
5154  		if (tracer_flags & trace_opts[i].bit)
5155  			seq_printf(m, "%s\n", trace_opts[i].name);
5156  		else
5157  			seq_printf(m, "no%s\n", trace_opts[i].name);
5158  	}
5159  
5160  	return 0;
5161  }
5162  
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5163  static int __set_tracer_option(struct trace_array *tr,
5164  			       struct tracer_flags *tracer_flags,
5165  			       struct tracer_opt *opts, int neg)
5166  {
5167  	struct tracer *trace = tracer_flags->trace;
5168  	int ret;
5169  
5170  	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5171  	if (ret)
5172  		return ret;
5173  
5174  	if (neg)
5175  		tracer_flags->val &= ~opts->bit;
5176  	else
5177  		tracer_flags->val |= opts->bit;
5178  	return 0;
5179  }
5180  
5181  /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5182  static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5183  {
5184  	struct tracer *trace = tr->current_trace;
5185  	struct tracer_flags *tracer_flags = trace->flags;
5186  	struct tracer_opt *opts = NULL;
5187  	int i;
5188  
5189  	for (i = 0; tracer_flags->opts[i].name; i++) {
5190  		opts = &tracer_flags->opts[i];
5191  
5192  		if (strcmp(cmp, opts->name) == 0)
5193  			return __set_tracer_option(tr, trace->flags, opts, neg);
5194  	}
5195  
5196  	return -EINVAL;
5197  }
5198  
5199  /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5200  int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5201  {
5202  	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5203  		return -1;
5204  
5205  	return 0;
5206  }
5207  
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5208  int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5209  {
5210  	if ((mask == TRACE_ITER_RECORD_TGID) ||
5211  	    (mask == TRACE_ITER_RECORD_CMD) ||
5212  	    (mask == TRACE_ITER_TRACE_PRINTK) ||
5213  	    (mask == TRACE_ITER_COPY_MARKER))
5214  		lockdep_assert_held(&event_mutex);
5215  
5216  	/* do nothing if flag is already set */
5217  	if (!!(tr->trace_flags & mask) == !!enabled)
5218  		return 0;
5219  
5220  	/* Give the tracer a chance to approve the change */
5221  	if (tr->current_trace->flag_changed)
5222  		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5223  			return -EINVAL;
5224  
5225  	if (mask == TRACE_ITER_TRACE_PRINTK) {
5226  		if (enabled) {
5227  			update_printk_trace(tr);
5228  		} else {
5229  			/*
5230  			 * The global_trace cannot clear this.
5231  			 * It's flag only gets cleared if another instance sets it.
5232  			 */
5233  			if (printk_trace == &global_trace)
5234  				return -EINVAL;
5235  			/*
5236  			 * An instance must always have it set.
5237  			 * by default, that's the global_trace instane.
5238  			 */
5239  			if (printk_trace == tr)
5240  				update_printk_trace(&global_trace);
5241  		}
5242  	}
5243  
5244  	if (mask == TRACE_ITER_COPY_MARKER)
5245  		update_marker_trace(tr, enabled);
5246  
5247  	if (enabled)
5248  		tr->trace_flags |= mask;
5249  	else
5250  		tr->trace_flags &= ~mask;
5251  
5252  	if (mask == TRACE_ITER_RECORD_CMD)
5253  		trace_event_enable_cmd_record(enabled);
5254  
5255  	if (mask == TRACE_ITER_RECORD_TGID) {
5256  
5257  		if (trace_alloc_tgid_map() < 0) {
5258  			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5259  			return -ENOMEM;
5260  		}
5261  
5262  		trace_event_enable_tgid_record(enabled);
5263  	}
5264  
5265  	if (mask == TRACE_ITER_EVENT_FORK)
5266  		trace_event_follow_fork(tr, enabled);
5267  
5268  	if (mask == TRACE_ITER_FUNC_FORK)
5269  		ftrace_pid_follow_fork(tr, enabled);
5270  
5271  	if (mask == TRACE_ITER_OVERWRITE) {
5272  		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5273  #ifdef CONFIG_TRACER_MAX_TRACE
5274  		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5275  #endif
5276  	}
5277  
5278  	if (mask == TRACE_ITER_PRINTK) {
5279  		trace_printk_start_stop_comm(enabled);
5280  		trace_printk_control(enabled);
5281  	}
5282  
5283  	return 0;
5284  }
5285  
trace_set_options(struct trace_array * tr,char * option)5286  int trace_set_options(struct trace_array *tr, char *option)
5287  {
5288  	char *cmp;
5289  	int neg = 0;
5290  	int ret;
5291  	size_t orig_len = strlen(option);
5292  	int len;
5293  
5294  	cmp = strstrip(option);
5295  
5296  	len = str_has_prefix(cmp, "no");
5297  	if (len)
5298  		neg = 1;
5299  
5300  	cmp += len;
5301  
5302  	mutex_lock(&event_mutex);
5303  	mutex_lock(&trace_types_lock);
5304  
5305  	ret = match_string(trace_options, -1, cmp);
5306  	/* If no option could be set, test the specific tracer options */
5307  	if (ret < 0)
5308  		ret = set_tracer_option(tr, cmp, neg);
5309  	else
5310  		ret = set_tracer_flag(tr, 1 << ret, !neg);
5311  
5312  	mutex_unlock(&trace_types_lock);
5313  	mutex_unlock(&event_mutex);
5314  
5315  	/*
5316  	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5317  	 * turn it back into a space.
5318  	 */
5319  	if (orig_len > strlen(option))
5320  		option[strlen(option)] = ' ';
5321  
5322  	return ret;
5323  }
5324  
apply_trace_boot_options(void)5325  static void __init apply_trace_boot_options(void)
5326  {
5327  	char *buf = trace_boot_options_buf;
5328  	char *option;
5329  
5330  	while (true) {
5331  		option = strsep(&buf, ",");
5332  
5333  		if (!option)
5334  			break;
5335  
5336  		if (*option)
5337  			trace_set_options(&global_trace, option);
5338  
5339  		/* Put back the comma to allow this to be called again */
5340  		if (buf)
5341  			*(buf - 1) = ',';
5342  	}
5343  }
5344  
5345  static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5346  tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5347  			size_t cnt, loff_t *ppos)
5348  {
5349  	struct seq_file *m = filp->private_data;
5350  	struct trace_array *tr = m->private;
5351  	char buf[64];
5352  	int ret;
5353  
5354  	if (cnt >= sizeof(buf))
5355  		return -EINVAL;
5356  
5357  	if (copy_from_user(buf, ubuf, cnt))
5358  		return -EFAULT;
5359  
5360  	buf[cnt] = 0;
5361  
5362  	ret = trace_set_options(tr, buf);
5363  	if (ret < 0)
5364  		return ret;
5365  
5366  	*ppos += cnt;
5367  
5368  	return cnt;
5369  }
5370  
tracing_trace_options_open(struct inode * inode,struct file * file)5371  static int tracing_trace_options_open(struct inode *inode, struct file *file)
5372  {
5373  	struct trace_array *tr = inode->i_private;
5374  	int ret;
5375  
5376  	ret = tracing_check_open_get_tr(tr);
5377  	if (ret)
5378  		return ret;
5379  
5380  	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5381  	if (ret < 0)
5382  		trace_array_put(tr);
5383  
5384  	return ret;
5385  }
5386  
5387  static const struct file_operations tracing_iter_fops = {
5388  	.open		= tracing_trace_options_open,
5389  	.read		= seq_read,
5390  	.llseek		= seq_lseek,
5391  	.release	= tracing_single_release_tr,
5392  	.write		= tracing_trace_options_write,
5393  };
5394  
5395  static const char readme_msg[] =
5396  	"tracing mini-HOWTO:\n\n"
5397  	"By default tracefs removes all OTH file permission bits.\n"
5398  	"When mounting tracefs an optional group id can be specified\n"
5399  	"which adds the group to every directory and file in tracefs:\n\n"
5400  	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5401  	"# echo 0 > tracing_on : quick way to disable tracing\n"
5402  	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5403  	" Important files:\n"
5404  	"  trace\t\t\t- The static contents of the buffer\n"
5405  	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5406  	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5407  	"  current_tracer\t- function and latency tracers\n"
5408  	"  available_tracers\t- list of configured tracers for current_tracer\n"
5409  	"  error_log\t- error log for failed commands (that support it)\n"
5410  	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5411  	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5412  	"  trace_clock\t\t- change the clock used to order events\n"
5413  	"       local:   Per cpu clock but may not be synced across CPUs\n"
5414  	"      global:   Synced across CPUs but slows tracing down.\n"
5415  	"     counter:   Not a clock, but just an increment\n"
5416  	"      uptime:   Jiffy counter from time of boot\n"
5417  	"        perf:   Same clock that perf events use\n"
5418  #ifdef CONFIG_X86_64
5419  	"     x86-tsc:   TSC cycle counter\n"
5420  #endif
5421  	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5422  	"       delta:   Delta difference against a buffer-wide timestamp\n"
5423  	"    absolute:   Absolute (standalone) timestamp\n"
5424  	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5425  	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5426  	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5427  	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5428  	"\t\t\t  Remove sub-buffer with rmdir\n"
5429  	"  trace_options\t\t- Set format or modify how tracing happens\n"
5430  	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5431  	"\t\t\t  option name\n"
5432  	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5433  #ifdef CONFIG_DYNAMIC_FTRACE
5434  	"\n  available_filter_functions - list of functions that can be filtered on\n"
5435  	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5436  	"\t\t\t  functions\n"
5437  	"\t     accepts: func_full_name or glob-matching-pattern\n"
5438  	"\t     modules: Can select a group via module\n"
5439  	"\t      Format: :mod:<module-name>\n"
5440  	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5441  	"\t    triggers: a command to perform when function is hit\n"
5442  	"\t      Format: <function>:<trigger>[:count]\n"
5443  	"\t     trigger: traceon, traceoff\n"
5444  	"\t\t      enable_event:<system>:<event>\n"
5445  	"\t\t      disable_event:<system>:<event>\n"
5446  #ifdef CONFIG_STACKTRACE
5447  	"\t\t      stacktrace\n"
5448  #endif
5449  #ifdef CONFIG_TRACER_SNAPSHOT
5450  	"\t\t      snapshot\n"
5451  #endif
5452  	"\t\t      dump\n"
5453  	"\t\t      cpudump\n"
5454  	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5455  	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5456  	"\t     The first one will disable tracing every time do_fault is hit\n"
5457  	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5458  	"\t       The first time do trap is hit and it disables tracing, the\n"
5459  	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5460  	"\t       the counter will not decrement. It only decrements when the\n"
5461  	"\t       trigger did work\n"
5462  	"\t     To remove trigger without count:\n"
5463  	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5464  	"\t     To remove trigger with a count:\n"
5465  	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5466  	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5467  	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5468  	"\t    modules: Can select a group via module command :mod:\n"
5469  	"\t    Does not accept triggers\n"
5470  #endif /* CONFIG_DYNAMIC_FTRACE */
5471  #ifdef CONFIG_FUNCTION_TRACER
5472  	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5473  	"\t\t    (function)\n"
5474  	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5475  	"\t\t    (function)\n"
5476  #endif
5477  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5478  	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5479  	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5480  	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5481  #endif
5482  #ifdef CONFIG_TRACER_SNAPSHOT
5483  	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5484  	"\t\t\t  snapshot buffer. Read the contents for more\n"
5485  	"\t\t\t  information\n"
5486  #endif
5487  #ifdef CONFIG_STACK_TRACER
5488  	"  stack_trace\t\t- Shows the max stack trace when active\n"
5489  	"  stack_max_size\t- Shows current max stack size that was traced\n"
5490  	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5491  	"\t\t\t  new trace)\n"
5492  #ifdef CONFIG_DYNAMIC_FTRACE
5493  	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5494  	"\t\t\t  traces\n"
5495  #endif
5496  #endif /* CONFIG_STACK_TRACER */
5497  #ifdef CONFIG_DYNAMIC_EVENTS
5498  	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5499  	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5500  #endif
5501  #ifdef CONFIG_KPROBE_EVENTS
5502  	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5503  	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5504  #endif
5505  #ifdef CONFIG_UPROBE_EVENTS
5506  	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5507  	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5508  #endif
5509  #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5510      defined(CONFIG_FPROBE_EVENTS)
5511  	"\t  accepts: event-definitions (one definition per line)\n"
5512  #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5513  	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5514  	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5515  #endif
5516  #ifdef CONFIG_FPROBE_EVENTS
5517  	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5518  	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5519  #endif
5520  #ifdef CONFIG_HIST_TRIGGERS
5521  	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5522  #endif
5523  	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5524  	"\t           -:[<group>/][<event>]\n"
5525  #ifdef CONFIG_KPROBE_EVENTS
5526  	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5527    "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5528  #endif
5529  #ifdef CONFIG_UPROBE_EVENTS
5530    "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5531  #endif
5532  	"\t     args: <name>=fetcharg[:type]\n"
5533  	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5534  #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5535  	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5536  #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5537  	"\t           <argname>[->field[->field|.field...]],\n"
5538  #endif
5539  #else
5540  	"\t           $stack<index>, $stack, $retval, $comm,\n"
5541  #endif
5542  	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5543  	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5544  	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5545  	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5546  	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5547  #ifdef CONFIG_HIST_TRIGGERS
5548  	"\t    field: <stype> <name>;\n"
5549  	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5550  	"\t           [unsigned] char/int/long\n"
5551  #endif
5552  	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5553  	"\t            of the <attached-group>/<attached-event>.\n"
5554  #endif
5555  	"  set_event\t\t- Enables events by name written into it\n"
5556  	"\t\t\t  Can enable module events via: :mod:<module>\n"
5557  	"  events/\t\t- Directory containing all trace event subsystems:\n"
5558  	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5559  	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5560  	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5561  	"\t\t\t  events\n"
5562  	"      filter\t\t- If set, only events passing filter are traced\n"
5563  	"  events/<system>/<event>/\t- Directory containing control files for\n"
5564  	"\t\t\t  <event>:\n"
5565  	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5566  	"      filter\t\t- If set, only events passing filter are traced\n"
5567  	"      trigger\t\t- If set, a command to perform when event is hit\n"
5568  	"\t    Format: <trigger>[:count][if <filter>]\n"
5569  	"\t   trigger: traceon, traceoff\n"
5570  	"\t            enable_event:<system>:<event>\n"
5571  	"\t            disable_event:<system>:<event>\n"
5572  #ifdef CONFIG_HIST_TRIGGERS
5573  	"\t            enable_hist:<system>:<event>\n"
5574  	"\t            disable_hist:<system>:<event>\n"
5575  #endif
5576  #ifdef CONFIG_STACKTRACE
5577  	"\t\t    stacktrace\n"
5578  #endif
5579  #ifdef CONFIG_TRACER_SNAPSHOT
5580  	"\t\t    snapshot\n"
5581  #endif
5582  #ifdef CONFIG_HIST_TRIGGERS
5583  	"\t\t    hist (see below)\n"
5584  #endif
5585  	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5586  	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5587  	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5588  	"\t                  events/block/block_unplug/trigger\n"
5589  	"\t   The first disables tracing every time block_unplug is hit.\n"
5590  	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5591  	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5592  	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5593  	"\t   Like function triggers, the counter is only decremented if it\n"
5594  	"\t    enabled or disabled tracing.\n"
5595  	"\t   To remove a trigger without a count:\n"
5596  	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5597  	"\t   To remove a trigger with a count:\n"
5598  	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5599  	"\t   Filters can be ignored when removing a trigger.\n"
5600  #ifdef CONFIG_HIST_TRIGGERS
5601  	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5602  	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5603  	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5604  	"\t            [:values=<field1[,field2,...]>]\n"
5605  	"\t            [:sort=<field1[,field2,...]>]\n"
5606  	"\t            [:size=#entries]\n"
5607  	"\t            [:pause][:continue][:clear]\n"
5608  	"\t            [:name=histname1]\n"
5609  	"\t            [:nohitcount]\n"
5610  	"\t            [:<handler>.<action>]\n"
5611  	"\t            [if <filter>]\n\n"
5612  	"\t    Note, special fields can be used as well:\n"
5613  	"\t            common_timestamp - to record current timestamp\n"
5614  	"\t            common_cpu - to record the CPU the event happened on\n"
5615  	"\n"
5616  	"\t    A hist trigger variable can be:\n"
5617  	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5618  	"\t        - a reference to another variable e.g. y=$x,\n"
5619  	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5620  	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5621  	"\n"
5622  	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5623  	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5624  	"\t    variable reference, field or numeric literal.\n"
5625  	"\n"
5626  	"\t    When a matching event is hit, an entry is added to a hash\n"
5627  	"\t    table using the key(s) and value(s) named, and the value of a\n"
5628  	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5629  	"\t    correspond to fields in the event's format description.  Keys\n"
5630  	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5631  	"\t    Compound keys consisting of up to two fields can be specified\n"
5632  	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5633  	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5634  	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5635  	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5636  	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5637  	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5638  	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5639  	"\t    its histogram data will be shared with other triggers of the\n"
5640  	"\t    same name, and trigger hits will update this common data.\n\n"
5641  	"\t    Reading the 'hist' file for the event will dump the hash\n"
5642  	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5643  	"\t    triggers attached to an event, there will be a table for each\n"
5644  	"\t    trigger in the output.  The table displayed for a named\n"
5645  	"\t    trigger will be the same as any other instance having the\n"
5646  	"\t    same name.  The default format used to display a given field\n"
5647  	"\t    can be modified by appending any of the following modifiers\n"
5648  	"\t    to the field name, as applicable:\n\n"
5649  	"\t            .hex        display a number as a hex value\n"
5650  	"\t            .sym        display an address as a symbol\n"
5651  	"\t            .sym-offset display an address as a symbol and offset\n"
5652  	"\t            .execname   display a common_pid as a program name\n"
5653  	"\t            .syscall    display a syscall id as a syscall name\n"
5654  	"\t            .log2       display log2 value rather than raw number\n"
5655  	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5656  	"\t            .usecs      display a common_timestamp in microseconds\n"
5657  	"\t            .percent    display a number of percentage value\n"
5658  	"\t            .graph      display a bar-graph of a value\n\n"
5659  	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5660  	"\t    trigger or to start a hist trigger but not log any events\n"
5661  	"\t    until told to do so.  'continue' can be used to start or\n"
5662  	"\t    restart a paused hist trigger.\n\n"
5663  	"\t    The 'clear' parameter will clear the contents of a running\n"
5664  	"\t    hist trigger and leave its current paused/active state\n"
5665  	"\t    unchanged.\n\n"
5666  	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5667  	"\t    raw hitcount in the histogram.\n\n"
5668  	"\t    The enable_hist and disable_hist triggers can be used to\n"
5669  	"\t    have one event conditionally start and stop another event's\n"
5670  	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5671  	"\t    the enable_event and disable_event triggers.\n\n"
5672  	"\t    Hist trigger handlers and actions are executed whenever a\n"
5673  	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5674  	"\t        <handler>.<action>\n\n"
5675  	"\t    The available handlers are:\n\n"
5676  	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5677  	"\t        onmax(var)               - invoke if var exceeds current max\n"
5678  	"\t        onchange(var)            - invoke action if var changes\n\n"
5679  	"\t    The available actions are:\n\n"
5680  	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5681  	"\t        save(field,...)                      - save current event fields\n"
5682  #ifdef CONFIG_TRACER_SNAPSHOT
5683  	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5684  #endif
5685  #ifdef CONFIG_SYNTH_EVENTS
5686  	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5687  	"\t  Write into this file to define/undefine new synthetic events.\n"
5688  	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5689  #endif
5690  #endif
5691  ;
5692  
5693  static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5694  tracing_readme_read(struct file *filp, char __user *ubuf,
5695  		       size_t cnt, loff_t *ppos)
5696  {
5697  	return simple_read_from_buffer(ubuf, cnt, ppos,
5698  					readme_msg, strlen(readme_msg));
5699  }
5700  
5701  static const struct file_operations tracing_readme_fops = {
5702  	.open		= tracing_open_generic,
5703  	.read		= tracing_readme_read,
5704  	.llseek		= generic_file_llseek,
5705  };
5706  
5707  #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5708  static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5709  update_eval_map(union trace_eval_map_item *ptr)
5710  {
5711  	if (!ptr->map.eval_string) {
5712  		if (ptr->tail.next) {
5713  			ptr = ptr->tail.next;
5714  			/* Set ptr to the next real item (skip head) */
5715  			ptr++;
5716  		} else
5717  			return NULL;
5718  	}
5719  	return ptr;
5720  }
5721  
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5722  static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5723  {
5724  	union trace_eval_map_item *ptr = v;
5725  
5726  	/*
5727  	 * Paranoid! If ptr points to end, we don't want to increment past it.
5728  	 * This really should never happen.
5729  	 */
5730  	(*pos)++;
5731  	ptr = update_eval_map(ptr);
5732  	if (WARN_ON_ONCE(!ptr))
5733  		return NULL;
5734  
5735  	ptr++;
5736  	ptr = update_eval_map(ptr);
5737  
5738  	return ptr;
5739  }
5740  
eval_map_start(struct seq_file * m,loff_t * pos)5741  static void *eval_map_start(struct seq_file *m, loff_t *pos)
5742  {
5743  	union trace_eval_map_item *v;
5744  	loff_t l = 0;
5745  
5746  	mutex_lock(&trace_eval_mutex);
5747  
5748  	v = trace_eval_maps;
5749  	if (v)
5750  		v++;
5751  
5752  	while (v && l < *pos) {
5753  		v = eval_map_next(m, v, &l);
5754  	}
5755  
5756  	return v;
5757  }
5758  
eval_map_stop(struct seq_file * m,void * v)5759  static void eval_map_stop(struct seq_file *m, void *v)
5760  {
5761  	mutex_unlock(&trace_eval_mutex);
5762  }
5763  
eval_map_show(struct seq_file * m,void * v)5764  static int eval_map_show(struct seq_file *m, void *v)
5765  {
5766  	union trace_eval_map_item *ptr = v;
5767  
5768  	seq_printf(m, "%s %ld (%s)\n",
5769  		   ptr->map.eval_string, ptr->map.eval_value,
5770  		   ptr->map.system);
5771  
5772  	return 0;
5773  }
5774  
5775  static const struct seq_operations tracing_eval_map_seq_ops = {
5776  	.start		= eval_map_start,
5777  	.next		= eval_map_next,
5778  	.stop		= eval_map_stop,
5779  	.show		= eval_map_show,
5780  };
5781  
tracing_eval_map_open(struct inode * inode,struct file * filp)5782  static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5783  {
5784  	int ret;
5785  
5786  	ret = tracing_check_open_get_tr(NULL);
5787  	if (ret)
5788  		return ret;
5789  
5790  	return seq_open(filp, &tracing_eval_map_seq_ops);
5791  }
5792  
5793  static const struct file_operations tracing_eval_map_fops = {
5794  	.open		= tracing_eval_map_open,
5795  	.read		= seq_read,
5796  	.llseek		= seq_lseek,
5797  	.release	= seq_release,
5798  };
5799  
5800  static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5801  trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5802  {
5803  	/* Return tail of array given the head */
5804  	return ptr + ptr->head.length + 1;
5805  }
5806  
5807  static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5808  trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5809  			   int len)
5810  {
5811  	struct trace_eval_map **stop;
5812  	struct trace_eval_map **map;
5813  	union trace_eval_map_item *map_array;
5814  	union trace_eval_map_item *ptr;
5815  
5816  	stop = start + len;
5817  
5818  	/*
5819  	 * The trace_eval_maps contains the map plus a head and tail item,
5820  	 * where the head holds the module and length of array, and the
5821  	 * tail holds a pointer to the next list.
5822  	 */
5823  	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5824  	if (!map_array) {
5825  		pr_warn("Unable to allocate trace eval mapping\n");
5826  		return;
5827  	}
5828  
5829  	guard(mutex)(&trace_eval_mutex);
5830  
5831  	if (!trace_eval_maps)
5832  		trace_eval_maps = map_array;
5833  	else {
5834  		ptr = trace_eval_maps;
5835  		for (;;) {
5836  			ptr = trace_eval_jmp_to_tail(ptr);
5837  			if (!ptr->tail.next)
5838  				break;
5839  			ptr = ptr->tail.next;
5840  
5841  		}
5842  		ptr->tail.next = map_array;
5843  	}
5844  	map_array->head.mod = mod;
5845  	map_array->head.length = len;
5846  	map_array++;
5847  
5848  	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5849  		map_array->map = **map;
5850  		map_array++;
5851  	}
5852  	memset(map_array, 0, sizeof(*map_array));
5853  }
5854  
trace_create_eval_file(struct dentry * d_tracer)5855  static void trace_create_eval_file(struct dentry *d_tracer)
5856  {
5857  	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5858  			  NULL, &tracing_eval_map_fops);
5859  }
5860  
5861  #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5862  static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5863  static inline void trace_insert_eval_map_file(struct module *mod,
5864  			      struct trace_eval_map **start, int len) { }
5865  #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5866  
5867  static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5868  trace_event_update_with_eval_map(struct module *mod,
5869  				 struct trace_eval_map **start,
5870  				 int len)
5871  {
5872  	struct trace_eval_map **map;
5873  
5874  	/* Always run sanitizer only if btf_type_tag attr exists. */
5875  	if (len <= 0) {
5876  		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5877  		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5878  		      __has_attribute(btf_type_tag)))
5879  			return;
5880  	}
5881  
5882  	map = start;
5883  
5884  	trace_event_update_all(map, len);
5885  
5886  	if (len <= 0)
5887  		return;
5888  
5889  	trace_insert_eval_map_file(mod, start, len);
5890  }
5891  
5892  static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5893  tracing_set_trace_read(struct file *filp, char __user *ubuf,
5894  		       size_t cnt, loff_t *ppos)
5895  {
5896  	struct trace_array *tr = filp->private_data;
5897  	char buf[MAX_TRACER_SIZE+2];
5898  	int r;
5899  
5900  	scoped_guard(mutex, &trace_types_lock) {
5901  		r = sprintf(buf, "%s\n", tr->current_trace->name);
5902  	}
5903  
5904  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5905  }
5906  
tracer_init(struct tracer * t,struct trace_array * tr)5907  int tracer_init(struct tracer *t, struct trace_array *tr)
5908  {
5909  	tracing_reset_online_cpus(&tr->array_buffer);
5910  	return t->init(tr);
5911  }
5912  
set_buffer_entries(struct array_buffer * buf,unsigned long val)5913  static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5914  {
5915  	int cpu;
5916  
5917  	for_each_tracing_cpu(cpu)
5918  		per_cpu_ptr(buf->data, cpu)->entries = val;
5919  }
5920  
update_buffer_entries(struct array_buffer * buf,int cpu)5921  static void update_buffer_entries(struct array_buffer *buf, int cpu)
5922  {
5923  	if (cpu == RING_BUFFER_ALL_CPUS) {
5924  		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5925  	} else {
5926  		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5927  	}
5928  }
5929  
5930  #ifdef CONFIG_TRACER_MAX_TRACE
5931  /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5932  static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5933  					struct array_buffer *size_buf, int cpu_id)
5934  {
5935  	int cpu, ret = 0;
5936  
5937  	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5938  		for_each_tracing_cpu(cpu) {
5939  			ret = ring_buffer_resize(trace_buf->buffer,
5940  				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5941  			if (ret < 0)
5942  				break;
5943  			per_cpu_ptr(trace_buf->data, cpu)->entries =
5944  				per_cpu_ptr(size_buf->data, cpu)->entries;
5945  		}
5946  	} else {
5947  		ret = ring_buffer_resize(trace_buf->buffer,
5948  				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5949  		if (ret == 0)
5950  			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5951  				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5952  	}
5953  
5954  	return ret;
5955  }
5956  #endif /* CONFIG_TRACER_MAX_TRACE */
5957  
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5958  static int __tracing_resize_ring_buffer(struct trace_array *tr,
5959  					unsigned long size, int cpu)
5960  {
5961  	int ret;
5962  
5963  	/*
5964  	 * If kernel or user changes the size of the ring buffer
5965  	 * we use the size that was given, and we can forget about
5966  	 * expanding it later.
5967  	 */
5968  	trace_set_ring_buffer_expanded(tr);
5969  
5970  	/* May be called before buffers are initialized */
5971  	if (!tr->array_buffer.buffer)
5972  		return 0;
5973  
5974  	/* Do not allow tracing while resizing ring buffer */
5975  	tracing_stop_tr(tr);
5976  
5977  	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5978  	if (ret < 0)
5979  		goto out_start;
5980  
5981  #ifdef CONFIG_TRACER_MAX_TRACE
5982  	if (!tr->allocated_snapshot)
5983  		goto out;
5984  
5985  	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5986  	if (ret < 0) {
5987  		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5988  						     &tr->array_buffer, cpu);
5989  		if (r < 0) {
5990  			/*
5991  			 * AARGH! We are left with different
5992  			 * size max buffer!!!!
5993  			 * The max buffer is our "snapshot" buffer.
5994  			 * When a tracer needs a snapshot (one of the
5995  			 * latency tracers), it swaps the max buffer
5996  			 * with the saved snap shot. We succeeded to
5997  			 * update the size of the main buffer, but failed to
5998  			 * update the size of the max buffer. But when we tried
5999  			 * to reset the main buffer to the original size, we
6000  			 * failed there too. This is very unlikely to
6001  			 * happen, but if it does, warn and kill all
6002  			 * tracing.
6003  			 */
6004  			WARN_ON(1);
6005  			tracing_disabled = 1;
6006  		}
6007  		goto out_start;
6008  	}
6009  
6010  	update_buffer_entries(&tr->max_buffer, cpu);
6011  
6012   out:
6013  #endif /* CONFIG_TRACER_MAX_TRACE */
6014  
6015  	update_buffer_entries(&tr->array_buffer, cpu);
6016   out_start:
6017  	tracing_start_tr(tr);
6018  	return ret;
6019  }
6020  
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6021  ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6022  				  unsigned long size, int cpu_id)
6023  {
6024  	guard(mutex)(&trace_types_lock);
6025  
6026  	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6027  		/* make sure, this cpu is enabled in the mask */
6028  		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6029  			return -EINVAL;
6030  	}
6031  
6032  	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6033  }
6034  
6035  struct trace_mod_entry {
6036  	unsigned long	mod_addr;
6037  	char		mod_name[MODULE_NAME_LEN];
6038  };
6039  
6040  struct trace_scratch {
6041  	unsigned int		clock_id;
6042  	unsigned long		text_addr;
6043  	unsigned long		nr_entries;
6044  	struct trace_mod_entry	entries[];
6045  };
6046  
6047  static DEFINE_MUTEX(scratch_mutex);
6048  
cmp_mod_entry(const void * key,const void * pivot)6049  static int cmp_mod_entry(const void *key, const void *pivot)
6050  {
6051  	unsigned long addr = (unsigned long)key;
6052  	const struct trace_mod_entry *ent = pivot;
6053  
6054  	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6055  		return 0;
6056  	else
6057  		return addr - ent->mod_addr;
6058  }
6059  
6060  /**
6061   * trace_adjust_address() - Adjust prev boot address to current address.
6062   * @tr: Persistent ring buffer's trace_array.
6063   * @addr: Address in @tr which is adjusted.
6064   */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6065  unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6066  {
6067  	struct trace_module_delta *module_delta;
6068  	struct trace_scratch *tscratch;
6069  	struct trace_mod_entry *entry;
6070  	unsigned long raddr;
6071  	int idx = 0, nr_entries;
6072  
6073  	/* If we don't have last boot delta, return the address */
6074  	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6075  		return addr;
6076  
6077  	/* tr->module_delta must be protected by rcu. */
6078  	guard(rcu)();
6079  	tscratch = tr->scratch;
6080  	/* if there is no tscrach, module_delta must be NULL. */
6081  	module_delta = READ_ONCE(tr->module_delta);
6082  	if (!module_delta || !tscratch->nr_entries ||
6083  	    tscratch->entries[0].mod_addr > addr) {
6084  		raddr = addr + tr->text_delta;
6085  		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6086  			is_kernel_rodata(raddr) ? raddr : addr;
6087  	}
6088  
6089  	/* Note that entries must be sorted. */
6090  	nr_entries = tscratch->nr_entries;
6091  	if (nr_entries == 1 ||
6092  	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6093  		idx = nr_entries - 1;
6094  	else {
6095  		entry = __inline_bsearch((void *)addr,
6096  				tscratch->entries,
6097  				nr_entries - 1,
6098  				sizeof(tscratch->entries[0]),
6099  				cmp_mod_entry);
6100  		if (entry)
6101  			idx = entry - tscratch->entries;
6102  	}
6103  
6104  	return addr + module_delta->delta[idx];
6105  }
6106  
6107  #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6108  static int save_mod(struct module *mod, void *data)
6109  {
6110  	struct trace_array *tr = data;
6111  	struct trace_scratch *tscratch;
6112  	struct trace_mod_entry *entry;
6113  	unsigned int size;
6114  
6115  	tscratch = tr->scratch;
6116  	if (!tscratch)
6117  		return -1;
6118  	size = tr->scratch_size;
6119  
6120  	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6121  		return -1;
6122  
6123  	entry = &tscratch->entries[tscratch->nr_entries];
6124  
6125  	tscratch->nr_entries++;
6126  
6127  	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6128  	strscpy(entry->mod_name, mod->name);
6129  
6130  	return 0;
6131  }
6132  #else
save_mod(struct module * mod,void * data)6133  static int save_mod(struct module *mod, void *data)
6134  {
6135  	return 0;
6136  }
6137  #endif
6138  
update_last_data(struct trace_array * tr)6139  static void update_last_data(struct trace_array *tr)
6140  {
6141  	struct trace_module_delta *module_delta;
6142  	struct trace_scratch *tscratch;
6143  
6144  	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6145  		return;
6146  
6147  	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6148  		return;
6149  
6150  	/* Only if the buffer has previous boot data clear and update it. */
6151  	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6152  
6153  	/* Reset the module list and reload them */
6154  	if (tr->scratch) {
6155  		struct trace_scratch *tscratch = tr->scratch;
6156  
6157  		tscratch->clock_id = tr->clock_id;
6158  		memset(tscratch->entries, 0,
6159  		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6160  		tscratch->nr_entries = 0;
6161  
6162  		guard(mutex)(&scratch_mutex);
6163  		module_for_each_mod(save_mod, tr);
6164  	}
6165  
6166  	/*
6167  	 * Need to clear all CPU buffers as there cannot be events
6168  	 * from the previous boot mixed with events with this boot
6169  	 * as that will cause a confusing trace. Need to clear all
6170  	 * CPU buffers, even for those that may currently be offline.
6171  	 */
6172  	tracing_reset_all_cpus(&tr->array_buffer);
6173  
6174  	/* Using current data now */
6175  	tr->text_delta = 0;
6176  
6177  	if (!tr->scratch)
6178  		return;
6179  
6180  	tscratch = tr->scratch;
6181  	module_delta = READ_ONCE(tr->module_delta);
6182  	WRITE_ONCE(tr->module_delta, NULL);
6183  	kfree_rcu(module_delta, rcu);
6184  
6185  	/* Set the persistent ring buffer meta data to this address */
6186  	tscratch->text_addr = (unsigned long)_text;
6187  }
6188  
6189  /**
6190   * tracing_update_buffers - used by tracing facility to expand ring buffers
6191   * @tr: The tracing instance
6192   *
6193   * To save on memory when the tracing is never used on a system with it
6194   * configured in. The ring buffers are set to a minimum size. But once
6195   * a user starts to use the tracing facility, then they need to grow
6196   * to their default size.
6197   *
6198   * This function is to be called when a tracer is about to be used.
6199   */
tracing_update_buffers(struct trace_array * tr)6200  int tracing_update_buffers(struct trace_array *tr)
6201  {
6202  	int ret = 0;
6203  
6204  	guard(mutex)(&trace_types_lock);
6205  
6206  	update_last_data(tr);
6207  
6208  	if (!tr->ring_buffer_expanded)
6209  		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6210  						RING_BUFFER_ALL_CPUS);
6211  	return ret;
6212  }
6213  
6214  struct trace_option_dentry;
6215  
6216  static void
6217  create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6218  
6219  /*
6220   * Used to clear out the tracer before deletion of an instance.
6221   * Must have trace_types_lock held.
6222   */
tracing_set_nop(struct trace_array * tr)6223  static void tracing_set_nop(struct trace_array *tr)
6224  {
6225  	if (tr->current_trace == &nop_trace)
6226  		return;
6227  
6228  	tr->current_trace->enabled--;
6229  
6230  	if (tr->current_trace->reset)
6231  		tr->current_trace->reset(tr);
6232  
6233  	tr->current_trace = &nop_trace;
6234  }
6235  
6236  static bool tracer_options_updated;
6237  
add_tracer_options(struct trace_array * tr,struct tracer * t)6238  static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6239  {
6240  	/* Only enable if the directory has been created already. */
6241  	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
6242  		return;
6243  
6244  	/* Only create trace option files after update_tracer_options finish */
6245  	if (!tracer_options_updated)
6246  		return;
6247  
6248  	create_trace_option_files(tr, t);
6249  }
6250  
tracing_set_tracer(struct trace_array * tr,const char * buf)6251  int tracing_set_tracer(struct trace_array *tr, const char *buf)
6252  {
6253  	struct tracer *t;
6254  #ifdef CONFIG_TRACER_MAX_TRACE
6255  	bool had_max_tr;
6256  #endif
6257  	int ret;
6258  
6259  	guard(mutex)(&trace_types_lock);
6260  
6261  	update_last_data(tr);
6262  
6263  	if (!tr->ring_buffer_expanded) {
6264  		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6265  						RING_BUFFER_ALL_CPUS);
6266  		if (ret < 0)
6267  			return ret;
6268  		ret = 0;
6269  	}
6270  
6271  	for (t = trace_types; t; t = t->next) {
6272  		if (strcmp(t->name, buf) == 0)
6273  			break;
6274  	}
6275  	if (!t)
6276  		return -EINVAL;
6277  
6278  	if (t == tr->current_trace)
6279  		return 0;
6280  
6281  #ifdef CONFIG_TRACER_SNAPSHOT
6282  	if (t->use_max_tr) {
6283  		local_irq_disable();
6284  		arch_spin_lock(&tr->max_lock);
6285  		ret = tr->cond_snapshot ? -EBUSY : 0;
6286  		arch_spin_unlock(&tr->max_lock);
6287  		local_irq_enable();
6288  		if (ret)
6289  			return ret;
6290  	}
6291  #endif
6292  	/* Some tracers won't work on kernel command line */
6293  	if (system_state < SYSTEM_RUNNING && t->noboot) {
6294  		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6295  			t->name);
6296  		return -EINVAL;
6297  	}
6298  
6299  	/* Some tracers are only allowed for the top level buffer */
6300  	if (!trace_ok_for_array(t, tr))
6301  		return -EINVAL;
6302  
6303  	/* If trace pipe files are being read, we can't change the tracer */
6304  	if (tr->trace_ref)
6305  		return -EBUSY;
6306  
6307  	trace_branch_disable();
6308  
6309  	tr->current_trace->enabled--;
6310  
6311  	if (tr->current_trace->reset)
6312  		tr->current_trace->reset(tr);
6313  
6314  #ifdef CONFIG_TRACER_MAX_TRACE
6315  	had_max_tr = tr->current_trace->use_max_tr;
6316  
6317  	/* Current trace needs to be nop_trace before synchronize_rcu */
6318  	tr->current_trace = &nop_trace;
6319  
6320  	if (had_max_tr && !t->use_max_tr) {
6321  		/*
6322  		 * We need to make sure that the update_max_tr sees that
6323  		 * current_trace changed to nop_trace to keep it from
6324  		 * swapping the buffers after we resize it.
6325  		 * The update_max_tr is called from interrupts disabled
6326  		 * so a synchronized_sched() is sufficient.
6327  		 */
6328  		synchronize_rcu();
6329  		free_snapshot(tr);
6330  		tracing_disarm_snapshot(tr);
6331  	}
6332  
6333  	if (!had_max_tr && t->use_max_tr) {
6334  		ret = tracing_arm_snapshot_locked(tr);
6335  		if (ret)
6336  			return ret;
6337  	}
6338  #else
6339  	tr->current_trace = &nop_trace;
6340  #endif
6341  
6342  	if (t->init) {
6343  		ret = tracer_init(t, tr);
6344  		if (ret) {
6345  #ifdef CONFIG_TRACER_MAX_TRACE
6346  			if (t->use_max_tr)
6347  				tracing_disarm_snapshot(tr);
6348  #endif
6349  			return ret;
6350  		}
6351  	}
6352  
6353  	tr->current_trace = t;
6354  	tr->current_trace->enabled++;
6355  	trace_branch_enable(tr);
6356  
6357  	return 0;
6358  }
6359  
6360  static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6361  tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6362  			size_t cnt, loff_t *ppos)
6363  {
6364  	struct trace_array *tr = filp->private_data;
6365  	char buf[MAX_TRACER_SIZE+1];
6366  	char *name;
6367  	size_t ret;
6368  	int err;
6369  
6370  	ret = cnt;
6371  
6372  	if (cnt > MAX_TRACER_SIZE)
6373  		cnt = MAX_TRACER_SIZE;
6374  
6375  	if (copy_from_user(buf, ubuf, cnt))
6376  		return -EFAULT;
6377  
6378  	buf[cnt] = 0;
6379  
6380  	name = strim(buf);
6381  
6382  	err = tracing_set_tracer(tr, name);
6383  	if (err)
6384  		return err;
6385  
6386  	*ppos += ret;
6387  
6388  	return ret;
6389  }
6390  
6391  static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6392  tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6393  		   size_t cnt, loff_t *ppos)
6394  {
6395  	char buf[64];
6396  	int r;
6397  
6398  	r = snprintf(buf, sizeof(buf), "%ld\n",
6399  		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6400  	if (r > sizeof(buf))
6401  		r = sizeof(buf);
6402  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6403  }
6404  
6405  static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6406  tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6407  		    size_t cnt, loff_t *ppos)
6408  {
6409  	unsigned long val;
6410  	int ret;
6411  
6412  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6413  	if (ret)
6414  		return ret;
6415  
6416  	*ptr = val * 1000;
6417  
6418  	return cnt;
6419  }
6420  
6421  static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6422  tracing_thresh_read(struct file *filp, char __user *ubuf,
6423  		    size_t cnt, loff_t *ppos)
6424  {
6425  	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6426  }
6427  
6428  static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6429  tracing_thresh_write(struct file *filp, const char __user *ubuf,
6430  		     size_t cnt, loff_t *ppos)
6431  {
6432  	struct trace_array *tr = filp->private_data;
6433  	int ret;
6434  
6435  	guard(mutex)(&trace_types_lock);
6436  	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6437  	if (ret < 0)
6438  		return ret;
6439  
6440  	if (tr->current_trace->update_thresh) {
6441  		ret = tr->current_trace->update_thresh(tr);
6442  		if (ret < 0)
6443  			return ret;
6444  	}
6445  
6446  	return cnt;
6447  }
6448  
6449  #ifdef CONFIG_TRACER_MAX_TRACE
6450  
6451  static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6452  tracing_max_lat_read(struct file *filp, char __user *ubuf,
6453  		     size_t cnt, loff_t *ppos)
6454  {
6455  	struct trace_array *tr = filp->private_data;
6456  
6457  	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6458  }
6459  
6460  static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6461  tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6462  		      size_t cnt, loff_t *ppos)
6463  {
6464  	struct trace_array *tr = filp->private_data;
6465  
6466  	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6467  }
6468  
6469  #endif
6470  
open_pipe_on_cpu(struct trace_array * tr,int cpu)6471  static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6472  {
6473  	if (cpu == RING_BUFFER_ALL_CPUS) {
6474  		if (cpumask_empty(tr->pipe_cpumask)) {
6475  			cpumask_setall(tr->pipe_cpumask);
6476  			return 0;
6477  		}
6478  	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6479  		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6480  		return 0;
6481  	}
6482  	return -EBUSY;
6483  }
6484  
close_pipe_on_cpu(struct trace_array * tr,int cpu)6485  static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6486  {
6487  	if (cpu == RING_BUFFER_ALL_CPUS) {
6488  		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6489  		cpumask_clear(tr->pipe_cpumask);
6490  	} else {
6491  		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6492  		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6493  	}
6494  }
6495  
tracing_open_pipe(struct inode * inode,struct file * filp)6496  static int tracing_open_pipe(struct inode *inode, struct file *filp)
6497  {
6498  	struct trace_array *tr = inode->i_private;
6499  	struct trace_iterator *iter;
6500  	int cpu;
6501  	int ret;
6502  
6503  	ret = tracing_check_open_get_tr(tr);
6504  	if (ret)
6505  		return ret;
6506  
6507  	guard(mutex)(&trace_types_lock);
6508  	cpu = tracing_get_cpu(inode);
6509  	ret = open_pipe_on_cpu(tr, cpu);
6510  	if (ret)
6511  		goto fail_pipe_on_cpu;
6512  
6513  	/* create a buffer to store the information to pass to userspace */
6514  	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6515  	if (!iter) {
6516  		ret = -ENOMEM;
6517  		goto fail_alloc_iter;
6518  	}
6519  
6520  	trace_seq_init(&iter->seq);
6521  	iter->trace = tr->current_trace;
6522  
6523  	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6524  		ret = -ENOMEM;
6525  		goto fail;
6526  	}
6527  
6528  	/* trace pipe does not show start of buffer */
6529  	cpumask_setall(iter->started);
6530  
6531  	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6532  		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6533  
6534  	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6535  	if (trace_clocks[tr->clock_id].in_ns)
6536  		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6537  
6538  	iter->tr = tr;
6539  	iter->array_buffer = &tr->array_buffer;
6540  	iter->cpu_file = cpu;
6541  	mutex_init(&iter->mutex);
6542  	filp->private_data = iter;
6543  
6544  	if (iter->trace->pipe_open)
6545  		iter->trace->pipe_open(iter);
6546  
6547  	nonseekable_open(inode, filp);
6548  
6549  	tr->trace_ref++;
6550  
6551  	return ret;
6552  
6553  fail:
6554  	kfree(iter);
6555  fail_alloc_iter:
6556  	close_pipe_on_cpu(tr, cpu);
6557  fail_pipe_on_cpu:
6558  	__trace_array_put(tr);
6559  	return ret;
6560  }
6561  
tracing_release_pipe(struct inode * inode,struct file * file)6562  static int tracing_release_pipe(struct inode *inode, struct file *file)
6563  {
6564  	struct trace_iterator *iter = file->private_data;
6565  	struct trace_array *tr = inode->i_private;
6566  
6567  	scoped_guard(mutex, &trace_types_lock) {
6568  		tr->trace_ref--;
6569  
6570  		if (iter->trace->pipe_close)
6571  			iter->trace->pipe_close(iter);
6572  		close_pipe_on_cpu(tr, iter->cpu_file);
6573  	}
6574  
6575  	free_trace_iter_content(iter);
6576  	kfree(iter);
6577  
6578  	trace_array_put(tr);
6579  
6580  	return 0;
6581  }
6582  
6583  static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6584  trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6585  {
6586  	struct trace_array *tr = iter->tr;
6587  
6588  	/* Iterators are static, they should be filled or empty */
6589  	if (trace_buffer_iter(iter, iter->cpu_file))
6590  		return EPOLLIN | EPOLLRDNORM;
6591  
6592  	if (tr->trace_flags & TRACE_ITER_BLOCK)
6593  		/*
6594  		 * Always select as readable when in blocking mode
6595  		 */
6596  		return EPOLLIN | EPOLLRDNORM;
6597  	else
6598  		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6599  					     filp, poll_table, iter->tr->buffer_percent);
6600  }
6601  
6602  static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6603  tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6604  {
6605  	struct trace_iterator *iter = filp->private_data;
6606  
6607  	return trace_poll(iter, filp, poll_table);
6608  }
6609  
6610  /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6611  static int tracing_wait_pipe(struct file *filp)
6612  {
6613  	struct trace_iterator *iter = filp->private_data;
6614  	int ret;
6615  
6616  	while (trace_empty(iter)) {
6617  
6618  		if ((filp->f_flags & O_NONBLOCK)) {
6619  			return -EAGAIN;
6620  		}
6621  
6622  		/*
6623  		 * We block until we read something and tracing is disabled.
6624  		 * We still block if tracing is disabled, but we have never
6625  		 * read anything. This allows a user to cat this file, and
6626  		 * then enable tracing. But after we have read something,
6627  		 * we give an EOF when tracing is again disabled.
6628  		 *
6629  		 * iter->pos will be 0 if we haven't read anything.
6630  		 */
6631  		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6632  			break;
6633  
6634  		mutex_unlock(&iter->mutex);
6635  
6636  		ret = wait_on_pipe(iter, 0);
6637  
6638  		mutex_lock(&iter->mutex);
6639  
6640  		if (ret)
6641  			return ret;
6642  	}
6643  
6644  	return 1;
6645  }
6646  
update_last_data_if_empty(struct trace_array * tr)6647  static bool update_last_data_if_empty(struct trace_array *tr)
6648  {
6649  	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6650  		return false;
6651  
6652  	if (!ring_buffer_empty(tr->array_buffer.buffer))
6653  		return false;
6654  
6655  	/*
6656  	 * If the buffer contains the last boot data and all per-cpu
6657  	 * buffers are empty, reset it from the kernel side.
6658  	 */
6659  	update_last_data(tr);
6660  	return true;
6661  }
6662  
6663  /*
6664   * Consumer reader.
6665   */
6666  static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6667  tracing_read_pipe(struct file *filp, char __user *ubuf,
6668  		  size_t cnt, loff_t *ppos)
6669  {
6670  	struct trace_iterator *iter = filp->private_data;
6671  	ssize_t sret;
6672  
6673  	/*
6674  	 * Avoid more than one consumer on a single file descriptor
6675  	 * This is just a matter of traces coherency, the ring buffer itself
6676  	 * is protected.
6677  	 */
6678  	guard(mutex)(&iter->mutex);
6679  
6680  	/* return any leftover data */
6681  	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6682  	if (sret != -EBUSY)
6683  		return sret;
6684  
6685  	trace_seq_init(&iter->seq);
6686  
6687  	if (iter->trace->read) {
6688  		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6689  		if (sret)
6690  			return sret;
6691  	}
6692  
6693  waitagain:
6694  	if (update_last_data_if_empty(iter->tr))
6695  		return 0;
6696  
6697  	sret = tracing_wait_pipe(filp);
6698  	if (sret <= 0)
6699  		return sret;
6700  
6701  	/* stop when tracing is finished */
6702  	if (trace_empty(iter))
6703  		return 0;
6704  
6705  	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6706  		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6707  
6708  	/* reset all but tr, trace, and overruns */
6709  	trace_iterator_reset(iter);
6710  	cpumask_clear(iter->started);
6711  	trace_seq_init(&iter->seq);
6712  
6713  	trace_event_read_lock();
6714  	trace_access_lock(iter->cpu_file);
6715  	while (trace_find_next_entry_inc(iter) != NULL) {
6716  		enum print_line_t ret;
6717  		int save_len = iter->seq.seq.len;
6718  
6719  		ret = print_trace_line(iter);
6720  		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6721  			/*
6722  			 * If one print_trace_line() fills entire trace_seq in one shot,
6723  			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6724  			 * In this case, we need to consume it, otherwise, loop will peek
6725  			 * this event next time, resulting in an infinite loop.
6726  			 */
6727  			if (save_len == 0) {
6728  				iter->seq.full = 0;
6729  				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6730  				trace_consume(iter);
6731  				break;
6732  			}
6733  
6734  			/* In other cases, don't print partial lines */
6735  			iter->seq.seq.len = save_len;
6736  			break;
6737  		}
6738  		if (ret != TRACE_TYPE_NO_CONSUME)
6739  			trace_consume(iter);
6740  
6741  		if (trace_seq_used(&iter->seq) >= cnt)
6742  			break;
6743  
6744  		/*
6745  		 * Setting the full flag means we reached the trace_seq buffer
6746  		 * size and we should leave by partial output condition above.
6747  		 * One of the trace_seq_* functions is not used properly.
6748  		 */
6749  		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6750  			  iter->ent->type);
6751  	}
6752  	trace_access_unlock(iter->cpu_file);
6753  	trace_event_read_unlock();
6754  
6755  	/* Now copy what we have to the user */
6756  	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6757  	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6758  		trace_seq_init(&iter->seq);
6759  
6760  	/*
6761  	 * If there was nothing to send to user, in spite of consuming trace
6762  	 * entries, go back to wait for more entries.
6763  	 */
6764  	if (sret == -EBUSY)
6765  		goto waitagain;
6766  
6767  	return sret;
6768  }
6769  
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6770  static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6771  				     unsigned int idx)
6772  {
6773  	__free_page(spd->pages[idx]);
6774  }
6775  
6776  static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6777  tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6778  {
6779  	size_t count;
6780  	int save_len;
6781  	int ret;
6782  
6783  	/* Seq buffer is page-sized, exactly what we need. */
6784  	for (;;) {
6785  		save_len = iter->seq.seq.len;
6786  		ret = print_trace_line(iter);
6787  
6788  		if (trace_seq_has_overflowed(&iter->seq)) {
6789  			iter->seq.seq.len = save_len;
6790  			break;
6791  		}
6792  
6793  		/*
6794  		 * This should not be hit, because it should only
6795  		 * be set if the iter->seq overflowed. But check it
6796  		 * anyway to be safe.
6797  		 */
6798  		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6799  			iter->seq.seq.len = save_len;
6800  			break;
6801  		}
6802  
6803  		count = trace_seq_used(&iter->seq) - save_len;
6804  		if (rem < count) {
6805  			rem = 0;
6806  			iter->seq.seq.len = save_len;
6807  			break;
6808  		}
6809  
6810  		if (ret != TRACE_TYPE_NO_CONSUME)
6811  			trace_consume(iter);
6812  		rem -= count;
6813  		if (!trace_find_next_entry_inc(iter))	{
6814  			rem = 0;
6815  			iter->ent = NULL;
6816  			break;
6817  		}
6818  	}
6819  
6820  	return rem;
6821  }
6822  
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6823  static ssize_t tracing_splice_read_pipe(struct file *filp,
6824  					loff_t *ppos,
6825  					struct pipe_inode_info *pipe,
6826  					size_t len,
6827  					unsigned int flags)
6828  {
6829  	struct page *pages_def[PIPE_DEF_BUFFERS];
6830  	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6831  	struct trace_iterator *iter = filp->private_data;
6832  	struct splice_pipe_desc spd = {
6833  		.pages		= pages_def,
6834  		.partial	= partial_def,
6835  		.nr_pages	= 0, /* This gets updated below. */
6836  		.nr_pages_max	= PIPE_DEF_BUFFERS,
6837  		.ops		= &default_pipe_buf_ops,
6838  		.spd_release	= tracing_spd_release_pipe,
6839  	};
6840  	ssize_t ret;
6841  	size_t rem;
6842  	unsigned int i;
6843  
6844  	if (splice_grow_spd(pipe, &spd))
6845  		return -ENOMEM;
6846  
6847  	mutex_lock(&iter->mutex);
6848  
6849  	if (iter->trace->splice_read) {
6850  		ret = iter->trace->splice_read(iter, filp,
6851  					       ppos, pipe, len, flags);
6852  		if (ret)
6853  			goto out_err;
6854  	}
6855  
6856  	ret = tracing_wait_pipe(filp);
6857  	if (ret <= 0)
6858  		goto out_err;
6859  
6860  	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6861  		ret = -EFAULT;
6862  		goto out_err;
6863  	}
6864  
6865  	trace_event_read_lock();
6866  	trace_access_lock(iter->cpu_file);
6867  
6868  	/* Fill as many pages as possible. */
6869  	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6870  		spd.pages[i] = alloc_page(GFP_KERNEL);
6871  		if (!spd.pages[i])
6872  			break;
6873  
6874  		rem = tracing_fill_pipe_page(rem, iter);
6875  
6876  		/* Copy the data into the page, so we can start over. */
6877  		ret = trace_seq_to_buffer(&iter->seq,
6878  					  page_address(spd.pages[i]),
6879  					  min((size_t)trace_seq_used(&iter->seq),
6880  						  (size_t)PAGE_SIZE));
6881  		if (ret < 0) {
6882  			__free_page(spd.pages[i]);
6883  			break;
6884  		}
6885  		spd.partial[i].offset = 0;
6886  		spd.partial[i].len = ret;
6887  
6888  		trace_seq_init(&iter->seq);
6889  	}
6890  
6891  	trace_access_unlock(iter->cpu_file);
6892  	trace_event_read_unlock();
6893  	mutex_unlock(&iter->mutex);
6894  
6895  	spd.nr_pages = i;
6896  
6897  	if (i)
6898  		ret = splice_to_pipe(pipe, &spd);
6899  	else
6900  		ret = 0;
6901  out:
6902  	splice_shrink_spd(&spd);
6903  	return ret;
6904  
6905  out_err:
6906  	mutex_unlock(&iter->mutex);
6907  	goto out;
6908  }
6909  
6910  static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6911  tracing_entries_read(struct file *filp, char __user *ubuf,
6912  		     size_t cnt, loff_t *ppos)
6913  {
6914  	struct inode *inode = file_inode(filp);
6915  	struct trace_array *tr = inode->i_private;
6916  	int cpu = tracing_get_cpu(inode);
6917  	char buf[64];
6918  	int r = 0;
6919  	ssize_t ret;
6920  
6921  	mutex_lock(&trace_types_lock);
6922  
6923  	if (cpu == RING_BUFFER_ALL_CPUS) {
6924  		int cpu, buf_size_same;
6925  		unsigned long size;
6926  
6927  		size = 0;
6928  		buf_size_same = 1;
6929  		/* check if all cpu sizes are same */
6930  		for_each_tracing_cpu(cpu) {
6931  			/* fill in the size from first enabled cpu */
6932  			if (size == 0)
6933  				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6934  			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6935  				buf_size_same = 0;
6936  				break;
6937  			}
6938  		}
6939  
6940  		if (buf_size_same) {
6941  			if (!tr->ring_buffer_expanded)
6942  				r = sprintf(buf, "%lu (expanded: %lu)\n",
6943  					    size >> 10,
6944  					    trace_buf_size >> 10);
6945  			else
6946  				r = sprintf(buf, "%lu\n", size >> 10);
6947  		} else
6948  			r = sprintf(buf, "X\n");
6949  	} else
6950  		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6951  
6952  	mutex_unlock(&trace_types_lock);
6953  
6954  	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6955  	return ret;
6956  }
6957  
6958  static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6959  tracing_entries_write(struct file *filp, const char __user *ubuf,
6960  		      size_t cnt, loff_t *ppos)
6961  {
6962  	struct inode *inode = file_inode(filp);
6963  	struct trace_array *tr = inode->i_private;
6964  	unsigned long val;
6965  	int ret;
6966  
6967  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6968  	if (ret)
6969  		return ret;
6970  
6971  	/* must have at least 1 entry */
6972  	if (!val)
6973  		return -EINVAL;
6974  
6975  	/* value is in KB */
6976  	val <<= 10;
6977  	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6978  	if (ret < 0)
6979  		return ret;
6980  
6981  	*ppos += cnt;
6982  
6983  	return cnt;
6984  }
6985  
6986  static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6987  tracing_total_entries_read(struct file *filp, char __user *ubuf,
6988  				size_t cnt, loff_t *ppos)
6989  {
6990  	struct trace_array *tr = filp->private_data;
6991  	char buf[64];
6992  	int r, cpu;
6993  	unsigned long size = 0, expanded_size = 0;
6994  
6995  	mutex_lock(&trace_types_lock);
6996  	for_each_tracing_cpu(cpu) {
6997  		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6998  		if (!tr->ring_buffer_expanded)
6999  			expanded_size += trace_buf_size >> 10;
7000  	}
7001  	if (tr->ring_buffer_expanded)
7002  		r = sprintf(buf, "%lu\n", size);
7003  	else
7004  		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7005  	mutex_unlock(&trace_types_lock);
7006  
7007  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7008  }
7009  
7010  #define LAST_BOOT_HEADER ((void *)1)
7011  
l_next(struct seq_file * m,void * v,loff_t * pos)7012  static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7013  {
7014  	struct trace_array *tr = m->private;
7015  	struct trace_scratch *tscratch = tr->scratch;
7016  	unsigned int index = *pos;
7017  
7018  	(*pos)++;
7019  
7020  	if (*pos == 1)
7021  		return LAST_BOOT_HEADER;
7022  
7023  	/* Only show offsets of the last boot data */
7024  	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7025  		return NULL;
7026  
7027  	/* *pos 0 is for the header, 1 is for the first module */
7028  	index--;
7029  
7030  	if (index >= tscratch->nr_entries)
7031  		return NULL;
7032  
7033  	return &tscratch->entries[index];
7034  }
7035  
l_start(struct seq_file * m,loff_t * pos)7036  static void *l_start(struct seq_file *m, loff_t *pos)
7037  {
7038  	mutex_lock(&scratch_mutex);
7039  
7040  	return l_next(m, NULL, pos);
7041  }
7042  
l_stop(struct seq_file * m,void * p)7043  static void l_stop(struct seq_file *m, void *p)
7044  {
7045  	mutex_unlock(&scratch_mutex);
7046  }
7047  
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7048  static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7049  {
7050  	struct trace_scratch *tscratch = tr->scratch;
7051  
7052  	/*
7053  	 * Do not leak KASLR address. This only shows the KASLR address of
7054  	 * the last boot. When the ring buffer is started, the LAST_BOOT
7055  	 * flag gets cleared, and this should only report "current".
7056  	 * Otherwise it shows the KASLR address from the previous boot which
7057  	 * should not be the same as the current boot.
7058  	 */
7059  	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7060  		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7061  	else
7062  		seq_puts(m, "# Current\n");
7063  }
7064  
l_show(struct seq_file * m,void * v)7065  static int l_show(struct seq_file *m, void *v)
7066  {
7067  	struct trace_array *tr = m->private;
7068  	struct trace_mod_entry *entry = v;
7069  
7070  	if (v == LAST_BOOT_HEADER) {
7071  		show_last_boot_header(m, tr);
7072  		return 0;
7073  	}
7074  
7075  	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7076  	return 0;
7077  }
7078  
7079  static const struct seq_operations last_boot_seq_ops = {
7080  	.start		= l_start,
7081  	.next		= l_next,
7082  	.stop		= l_stop,
7083  	.show		= l_show,
7084  };
7085  
tracing_last_boot_open(struct inode * inode,struct file * file)7086  static int tracing_last_boot_open(struct inode *inode, struct file *file)
7087  {
7088  	struct trace_array *tr = inode->i_private;
7089  	struct seq_file *m;
7090  	int ret;
7091  
7092  	ret = tracing_check_open_get_tr(tr);
7093  	if (ret)
7094  		return ret;
7095  
7096  	ret = seq_open(file, &last_boot_seq_ops);
7097  	if (ret) {
7098  		trace_array_put(tr);
7099  		return ret;
7100  	}
7101  
7102  	m = file->private_data;
7103  	m->private = tr;
7104  
7105  	return 0;
7106  }
7107  
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7108  static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7109  {
7110  	struct trace_array *tr = inode->i_private;
7111  	int cpu = tracing_get_cpu(inode);
7112  	int ret;
7113  
7114  	ret = tracing_check_open_get_tr(tr);
7115  	if (ret)
7116  		return ret;
7117  
7118  	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7119  	if (ret < 0)
7120  		__trace_array_put(tr);
7121  	return ret;
7122  }
7123  
7124  static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7125  tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7126  			  size_t cnt, loff_t *ppos)
7127  {
7128  	/*
7129  	 * There is no need to read what the user has written, this function
7130  	 * is just to make sure that there is no error when "echo" is used
7131  	 */
7132  
7133  	*ppos += cnt;
7134  
7135  	return cnt;
7136  }
7137  
7138  static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7139  tracing_free_buffer_release(struct inode *inode, struct file *filp)
7140  {
7141  	struct trace_array *tr = inode->i_private;
7142  
7143  	/* disable tracing ? */
7144  	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7145  		tracer_tracing_off(tr);
7146  	/* resize the ring buffer to 0 */
7147  	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7148  
7149  	trace_array_put(tr);
7150  
7151  	return 0;
7152  }
7153  
7154  #define TRACE_MARKER_MAX_SIZE		4096
7155  
write_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt,unsigned long ip)7156  static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
7157  				      size_t cnt, unsigned long ip)
7158  {
7159  	struct ring_buffer_event *event;
7160  	enum event_trigger_type tt = ETT_NONE;
7161  	struct trace_buffer *buffer;
7162  	struct print_entry *entry;
7163  	int meta_size;
7164  	ssize_t written;
7165  	size_t size;
7166  	int len;
7167  
7168  /* Used in tracing_mark_raw_write() as well */
7169  #define FAULTED_STR "<faulted>"
7170  #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7171  
7172  	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7173   again:
7174  	size = cnt + meta_size;
7175  
7176  	/* If less than "<faulted>", then make sure we can still add that */
7177  	if (cnt < FAULTED_SIZE)
7178  		size += FAULTED_SIZE - cnt;
7179  
7180  	buffer = tr->array_buffer.buffer;
7181  	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7182  					    tracing_gen_ctx());
7183  	if (unlikely(!event)) {
7184  		/*
7185  		 * If the size was greater than what was allowed, then
7186  		 * make it smaller and try again.
7187  		 */
7188  		if (size > ring_buffer_max_event_size(buffer)) {
7189  			/* cnt < FAULTED size should never be bigger than max */
7190  			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7191  				return -EBADF;
7192  			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7193  			/* The above should only happen once */
7194  			if (WARN_ON_ONCE(cnt + meta_size == size))
7195  				return -EBADF;
7196  			goto again;
7197  		}
7198  
7199  		/* Ring buffer disabled, return as if not open for write */
7200  		return -EBADF;
7201  	}
7202  
7203  	entry = ring_buffer_event_data(event);
7204  	entry->ip = ip;
7205  
7206  	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7207  	if (len) {
7208  		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7209  		cnt = FAULTED_SIZE;
7210  		written = -EFAULT;
7211  	} else
7212  		written = cnt;
7213  
7214  	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7215  		/* do not add \n before testing triggers, but add \0 */
7216  		entry->buf[cnt] = '\0';
7217  		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7218  	}
7219  
7220  	if (entry->buf[cnt - 1] != '\n') {
7221  		entry->buf[cnt] = '\n';
7222  		entry->buf[cnt + 1] = '\0';
7223  	} else
7224  		entry->buf[cnt] = '\0';
7225  
7226  	if (static_branch_unlikely(&trace_marker_exports_enabled))
7227  		ftrace_exports(event, TRACE_EXPORT_MARKER);
7228  	__buffer_unlock_commit(buffer, event);
7229  
7230  	if (tt)
7231  		event_triggers_post_call(tr->trace_marker_file, tt);
7232  
7233  	return written;
7234  }
7235  
7236  static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7237  tracing_mark_write(struct file *filp, const char __user *ubuf,
7238  					size_t cnt, loff_t *fpos)
7239  {
7240  	struct trace_array *tr = filp->private_data;
7241  	ssize_t written = -ENODEV;
7242  	unsigned long ip;
7243  
7244  	if (tracing_disabled)
7245  		return -EINVAL;
7246  
7247  	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7248  		return -EINVAL;
7249  
7250  	if ((ssize_t)cnt < 0)
7251  		return -EINVAL;
7252  
7253  	if (cnt > TRACE_MARKER_MAX_SIZE)
7254  		cnt = TRACE_MARKER_MAX_SIZE;
7255  
7256  	/* The selftests expect this function to be the IP address */
7257  	ip = _THIS_IP_;
7258  
7259  	/* The global trace_marker can go to multiple instances */
7260  	if (tr == &global_trace) {
7261  		guard(rcu)();
7262  		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7263  			written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7264  			if (written < 0)
7265  				break;
7266  		}
7267  	} else {
7268  		written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7269  	}
7270  
7271  	return written;
7272  }
7273  
write_raw_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt)7274  static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7275  					  const char __user *ubuf, size_t cnt)
7276  {
7277  	struct ring_buffer_event *event;
7278  	struct trace_buffer *buffer;
7279  	struct raw_data_entry *entry;
7280  	ssize_t written;
7281  	int size;
7282  	int len;
7283  
7284  #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7285  
7286  	size = sizeof(*entry) + cnt;
7287  	if (cnt < FAULT_SIZE_ID)
7288  		size += FAULT_SIZE_ID - cnt;
7289  
7290  	buffer = tr->array_buffer.buffer;
7291  
7292  	if (size > ring_buffer_max_event_size(buffer))
7293  		return -EINVAL;
7294  
7295  	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7296  					    tracing_gen_ctx());
7297  	if (!event)
7298  		/* Ring buffer disabled, return as if not open for write */
7299  		return -EBADF;
7300  
7301  	entry = ring_buffer_event_data(event);
7302  
7303  	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7304  	if (len) {
7305  		entry->id = -1;
7306  		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7307  		written = -EFAULT;
7308  	} else
7309  		written = cnt;
7310  
7311  	__buffer_unlock_commit(buffer, event);
7312  
7313  	return written;
7314  }
7315  
7316  static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7317  tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7318  					size_t cnt, loff_t *fpos)
7319  {
7320  	struct trace_array *tr = filp->private_data;
7321  	ssize_t written = -ENODEV;
7322  
7323  #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7324  
7325  	if (tracing_disabled)
7326  		return -EINVAL;
7327  
7328  	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7329  		return -EINVAL;
7330  
7331  	/* The marker must at least have a tag id */
7332  	if (cnt < sizeof(unsigned int))
7333  		return -EINVAL;
7334  
7335  	/* The global trace_marker_raw can go to multiple instances */
7336  	if (tr == &global_trace) {
7337  		guard(rcu)();
7338  		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7339  			written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7340  			if (written < 0)
7341  				break;
7342  		}
7343  	} else {
7344  		written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7345  	}
7346  
7347  	return written;
7348  }
7349  
tracing_clock_show(struct seq_file * m,void * v)7350  static int tracing_clock_show(struct seq_file *m, void *v)
7351  {
7352  	struct trace_array *tr = m->private;
7353  	int i;
7354  
7355  	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7356  		seq_printf(m,
7357  			"%s%s%s%s", i ? " " : "",
7358  			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7359  			i == tr->clock_id ? "]" : "");
7360  	seq_putc(m, '\n');
7361  
7362  	return 0;
7363  }
7364  
tracing_set_clock(struct trace_array * tr,const char * clockstr)7365  int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7366  {
7367  	int i;
7368  
7369  	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7370  		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7371  			break;
7372  	}
7373  	if (i == ARRAY_SIZE(trace_clocks))
7374  		return -EINVAL;
7375  
7376  	guard(mutex)(&trace_types_lock);
7377  
7378  	tr->clock_id = i;
7379  
7380  	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7381  
7382  	/*
7383  	 * New clock may not be consistent with the previous clock.
7384  	 * Reset the buffer so that it doesn't have incomparable timestamps.
7385  	 */
7386  	tracing_reset_online_cpus(&tr->array_buffer);
7387  
7388  #ifdef CONFIG_TRACER_MAX_TRACE
7389  	if (tr->max_buffer.buffer)
7390  		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7391  	tracing_reset_online_cpus(&tr->max_buffer);
7392  #endif
7393  
7394  	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7395  		struct trace_scratch *tscratch = tr->scratch;
7396  
7397  		tscratch->clock_id = i;
7398  	}
7399  
7400  	return 0;
7401  }
7402  
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7403  static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7404  				   size_t cnt, loff_t *fpos)
7405  {
7406  	struct seq_file *m = filp->private_data;
7407  	struct trace_array *tr = m->private;
7408  	char buf[64];
7409  	const char *clockstr;
7410  	int ret;
7411  
7412  	if (cnt >= sizeof(buf))
7413  		return -EINVAL;
7414  
7415  	if (copy_from_user(buf, ubuf, cnt))
7416  		return -EFAULT;
7417  
7418  	buf[cnt] = 0;
7419  
7420  	clockstr = strstrip(buf);
7421  
7422  	ret = tracing_set_clock(tr, clockstr);
7423  	if (ret)
7424  		return ret;
7425  
7426  	*fpos += cnt;
7427  
7428  	return cnt;
7429  }
7430  
tracing_clock_open(struct inode * inode,struct file * file)7431  static int tracing_clock_open(struct inode *inode, struct file *file)
7432  {
7433  	struct trace_array *tr = inode->i_private;
7434  	int ret;
7435  
7436  	ret = tracing_check_open_get_tr(tr);
7437  	if (ret)
7438  		return ret;
7439  
7440  	ret = single_open(file, tracing_clock_show, inode->i_private);
7441  	if (ret < 0)
7442  		trace_array_put(tr);
7443  
7444  	return ret;
7445  }
7446  
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7447  static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7448  {
7449  	struct trace_array *tr = m->private;
7450  
7451  	guard(mutex)(&trace_types_lock);
7452  
7453  	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7454  		seq_puts(m, "delta [absolute]\n");
7455  	else
7456  		seq_puts(m, "[delta] absolute\n");
7457  
7458  	return 0;
7459  }
7460  
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7461  static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7462  {
7463  	struct trace_array *tr = inode->i_private;
7464  	int ret;
7465  
7466  	ret = tracing_check_open_get_tr(tr);
7467  	if (ret)
7468  		return ret;
7469  
7470  	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7471  	if (ret < 0)
7472  		trace_array_put(tr);
7473  
7474  	return ret;
7475  }
7476  
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7477  u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7478  {
7479  	if (rbe == this_cpu_read(trace_buffered_event))
7480  		return ring_buffer_time_stamp(buffer);
7481  
7482  	return ring_buffer_event_time_stamp(buffer, rbe);
7483  }
7484  
7485  /*
7486   * Set or disable using the per CPU trace_buffer_event when possible.
7487   */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7488  int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7489  {
7490  	guard(mutex)(&trace_types_lock);
7491  
7492  	if (set && tr->no_filter_buffering_ref++)
7493  		return 0;
7494  
7495  	if (!set) {
7496  		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7497  			return -EINVAL;
7498  
7499  		--tr->no_filter_buffering_ref;
7500  	}
7501  
7502  	return 0;
7503  }
7504  
7505  struct ftrace_buffer_info {
7506  	struct trace_iterator	iter;
7507  	void			*spare;
7508  	unsigned int		spare_cpu;
7509  	unsigned int		spare_size;
7510  	unsigned int		read;
7511  };
7512  
7513  #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7514  static int tracing_snapshot_open(struct inode *inode, struct file *file)
7515  {
7516  	struct trace_array *tr = inode->i_private;
7517  	struct trace_iterator *iter;
7518  	struct seq_file *m;
7519  	int ret;
7520  
7521  	ret = tracing_check_open_get_tr(tr);
7522  	if (ret)
7523  		return ret;
7524  
7525  	if (file->f_mode & FMODE_READ) {
7526  		iter = __tracing_open(inode, file, true);
7527  		if (IS_ERR(iter))
7528  			ret = PTR_ERR(iter);
7529  	} else {
7530  		/* Writes still need the seq_file to hold the private data */
7531  		ret = -ENOMEM;
7532  		m = kzalloc(sizeof(*m), GFP_KERNEL);
7533  		if (!m)
7534  			goto out;
7535  		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7536  		if (!iter) {
7537  			kfree(m);
7538  			goto out;
7539  		}
7540  		ret = 0;
7541  
7542  		iter->tr = tr;
7543  		iter->array_buffer = &tr->max_buffer;
7544  		iter->cpu_file = tracing_get_cpu(inode);
7545  		m->private = iter;
7546  		file->private_data = m;
7547  	}
7548  out:
7549  	if (ret < 0)
7550  		trace_array_put(tr);
7551  
7552  	return ret;
7553  }
7554  
tracing_swap_cpu_buffer(void * tr)7555  static void tracing_swap_cpu_buffer(void *tr)
7556  {
7557  	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7558  }
7559  
7560  static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7561  tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7562  		       loff_t *ppos)
7563  {
7564  	struct seq_file *m = filp->private_data;
7565  	struct trace_iterator *iter = m->private;
7566  	struct trace_array *tr = iter->tr;
7567  	unsigned long val;
7568  	int ret;
7569  
7570  	ret = tracing_update_buffers(tr);
7571  	if (ret < 0)
7572  		return ret;
7573  
7574  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7575  	if (ret)
7576  		return ret;
7577  
7578  	guard(mutex)(&trace_types_lock);
7579  
7580  	if (tr->current_trace->use_max_tr)
7581  		return -EBUSY;
7582  
7583  	local_irq_disable();
7584  	arch_spin_lock(&tr->max_lock);
7585  	if (tr->cond_snapshot)
7586  		ret = -EBUSY;
7587  	arch_spin_unlock(&tr->max_lock);
7588  	local_irq_enable();
7589  	if (ret)
7590  		return ret;
7591  
7592  	switch (val) {
7593  	case 0:
7594  		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7595  			return -EINVAL;
7596  		if (tr->allocated_snapshot)
7597  			free_snapshot(tr);
7598  		break;
7599  	case 1:
7600  /* Only allow per-cpu swap if the ring buffer supports it */
7601  #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7602  		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7603  			return -EINVAL;
7604  #endif
7605  		if (tr->allocated_snapshot)
7606  			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7607  					&tr->array_buffer, iter->cpu_file);
7608  
7609  		ret = tracing_arm_snapshot_locked(tr);
7610  		if (ret)
7611  			return ret;
7612  
7613  		/* Now, we're going to swap */
7614  		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7615  			local_irq_disable();
7616  			update_max_tr(tr, current, smp_processor_id(), NULL);
7617  			local_irq_enable();
7618  		} else {
7619  			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7620  						 (void *)tr, 1);
7621  		}
7622  		tracing_disarm_snapshot(tr);
7623  		break;
7624  	default:
7625  		if (tr->allocated_snapshot) {
7626  			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7627  				tracing_reset_online_cpus(&tr->max_buffer);
7628  			else
7629  				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7630  		}
7631  		break;
7632  	}
7633  
7634  	if (ret >= 0) {
7635  		*ppos += cnt;
7636  		ret = cnt;
7637  	}
7638  
7639  	return ret;
7640  }
7641  
tracing_snapshot_release(struct inode * inode,struct file * file)7642  static int tracing_snapshot_release(struct inode *inode, struct file *file)
7643  {
7644  	struct seq_file *m = file->private_data;
7645  	int ret;
7646  
7647  	ret = tracing_release(inode, file);
7648  
7649  	if (file->f_mode & FMODE_READ)
7650  		return ret;
7651  
7652  	/* If write only, the seq_file is just a stub */
7653  	if (m)
7654  		kfree(m->private);
7655  	kfree(m);
7656  
7657  	return 0;
7658  }
7659  
7660  static int tracing_buffers_open(struct inode *inode, struct file *filp);
7661  static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7662  				    size_t count, loff_t *ppos);
7663  static int tracing_buffers_release(struct inode *inode, struct file *file);
7664  static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7665  		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7666  
snapshot_raw_open(struct inode * inode,struct file * filp)7667  static int snapshot_raw_open(struct inode *inode, struct file *filp)
7668  {
7669  	struct ftrace_buffer_info *info;
7670  	int ret;
7671  
7672  	/* The following checks for tracefs lockdown */
7673  	ret = tracing_buffers_open(inode, filp);
7674  	if (ret < 0)
7675  		return ret;
7676  
7677  	info = filp->private_data;
7678  
7679  	if (info->iter.trace->use_max_tr) {
7680  		tracing_buffers_release(inode, filp);
7681  		return -EBUSY;
7682  	}
7683  
7684  	info->iter.snapshot = true;
7685  	info->iter.array_buffer = &info->iter.tr->max_buffer;
7686  
7687  	return ret;
7688  }
7689  
7690  #endif /* CONFIG_TRACER_SNAPSHOT */
7691  
7692  
7693  static const struct file_operations tracing_thresh_fops = {
7694  	.open		= tracing_open_generic,
7695  	.read		= tracing_thresh_read,
7696  	.write		= tracing_thresh_write,
7697  	.llseek		= generic_file_llseek,
7698  };
7699  
7700  #ifdef CONFIG_TRACER_MAX_TRACE
7701  static const struct file_operations tracing_max_lat_fops = {
7702  	.open		= tracing_open_generic_tr,
7703  	.read		= tracing_max_lat_read,
7704  	.write		= tracing_max_lat_write,
7705  	.llseek		= generic_file_llseek,
7706  	.release	= tracing_release_generic_tr,
7707  };
7708  #endif
7709  
7710  static const struct file_operations set_tracer_fops = {
7711  	.open		= tracing_open_generic_tr,
7712  	.read		= tracing_set_trace_read,
7713  	.write		= tracing_set_trace_write,
7714  	.llseek		= generic_file_llseek,
7715  	.release	= tracing_release_generic_tr,
7716  };
7717  
7718  static const struct file_operations tracing_pipe_fops = {
7719  	.open		= tracing_open_pipe,
7720  	.poll		= tracing_poll_pipe,
7721  	.read		= tracing_read_pipe,
7722  	.splice_read	= tracing_splice_read_pipe,
7723  	.release	= tracing_release_pipe,
7724  };
7725  
7726  static const struct file_operations tracing_entries_fops = {
7727  	.open		= tracing_open_generic_tr,
7728  	.read		= tracing_entries_read,
7729  	.write		= tracing_entries_write,
7730  	.llseek		= generic_file_llseek,
7731  	.release	= tracing_release_generic_tr,
7732  };
7733  
7734  static const struct file_operations tracing_buffer_meta_fops = {
7735  	.open		= tracing_buffer_meta_open,
7736  	.read		= seq_read,
7737  	.llseek		= seq_lseek,
7738  	.release	= tracing_seq_release,
7739  };
7740  
7741  static const struct file_operations tracing_total_entries_fops = {
7742  	.open		= tracing_open_generic_tr,
7743  	.read		= tracing_total_entries_read,
7744  	.llseek		= generic_file_llseek,
7745  	.release	= tracing_release_generic_tr,
7746  };
7747  
7748  static const struct file_operations tracing_free_buffer_fops = {
7749  	.open		= tracing_open_generic_tr,
7750  	.write		= tracing_free_buffer_write,
7751  	.release	= tracing_free_buffer_release,
7752  };
7753  
7754  static const struct file_operations tracing_mark_fops = {
7755  	.open		= tracing_mark_open,
7756  	.write		= tracing_mark_write,
7757  	.release	= tracing_release_generic_tr,
7758  };
7759  
7760  static const struct file_operations tracing_mark_raw_fops = {
7761  	.open		= tracing_mark_open,
7762  	.write		= tracing_mark_raw_write,
7763  	.release	= tracing_release_generic_tr,
7764  };
7765  
7766  static const struct file_operations trace_clock_fops = {
7767  	.open		= tracing_clock_open,
7768  	.read		= seq_read,
7769  	.llseek		= seq_lseek,
7770  	.release	= tracing_single_release_tr,
7771  	.write		= tracing_clock_write,
7772  };
7773  
7774  static const struct file_operations trace_time_stamp_mode_fops = {
7775  	.open		= tracing_time_stamp_mode_open,
7776  	.read		= seq_read,
7777  	.llseek		= seq_lseek,
7778  	.release	= tracing_single_release_tr,
7779  };
7780  
7781  static const struct file_operations last_boot_fops = {
7782  	.open		= tracing_last_boot_open,
7783  	.read		= seq_read,
7784  	.llseek		= seq_lseek,
7785  	.release	= tracing_seq_release,
7786  };
7787  
7788  #ifdef CONFIG_TRACER_SNAPSHOT
7789  static const struct file_operations snapshot_fops = {
7790  	.open		= tracing_snapshot_open,
7791  	.read		= seq_read,
7792  	.write		= tracing_snapshot_write,
7793  	.llseek		= tracing_lseek,
7794  	.release	= tracing_snapshot_release,
7795  };
7796  
7797  static const struct file_operations snapshot_raw_fops = {
7798  	.open		= snapshot_raw_open,
7799  	.read		= tracing_buffers_read,
7800  	.release	= tracing_buffers_release,
7801  	.splice_read	= tracing_buffers_splice_read,
7802  };
7803  
7804  #endif /* CONFIG_TRACER_SNAPSHOT */
7805  
7806  /*
7807   * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7808   * @filp: The active open file structure
7809   * @ubuf: The userspace provided buffer to read value into
7810   * @cnt: The maximum number of bytes to read
7811   * @ppos: The current "file" position
7812   *
7813   * This function implements the write interface for a struct trace_min_max_param.
7814   * The filp->private_data must point to a trace_min_max_param structure that
7815   * defines where to write the value, the min and the max acceptable values,
7816   * and a lock to protect the write.
7817   */
7818  static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7819  trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7820  {
7821  	struct trace_min_max_param *param = filp->private_data;
7822  	u64 val;
7823  	int err;
7824  
7825  	if (!param)
7826  		return -EFAULT;
7827  
7828  	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7829  	if (err)
7830  		return err;
7831  
7832  	if (param->lock)
7833  		mutex_lock(param->lock);
7834  
7835  	if (param->min && val < *param->min)
7836  		err = -EINVAL;
7837  
7838  	if (param->max && val > *param->max)
7839  		err = -EINVAL;
7840  
7841  	if (!err)
7842  		*param->val = val;
7843  
7844  	if (param->lock)
7845  		mutex_unlock(param->lock);
7846  
7847  	if (err)
7848  		return err;
7849  
7850  	return cnt;
7851  }
7852  
7853  /*
7854   * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7855   * @filp: The active open file structure
7856   * @ubuf: The userspace provided buffer to read value into
7857   * @cnt: The maximum number of bytes to read
7858   * @ppos: The current "file" position
7859   *
7860   * This function implements the read interface for a struct trace_min_max_param.
7861   * The filp->private_data must point to a trace_min_max_param struct with valid
7862   * data.
7863   */
7864  static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7865  trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7866  {
7867  	struct trace_min_max_param *param = filp->private_data;
7868  	char buf[U64_STR_SIZE];
7869  	int len;
7870  	u64 val;
7871  
7872  	if (!param)
7873  		return -EFAULT;
7874  
7875  	val = *param->val;
7876  
7877  	if (cnt > sizeof(buf))
7878  		cnt = sizeof(buf);
7879  
7880  	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7881  
7882  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7883  }
7884  
7885  const struct file_operations trace_min_max_fops = {
7886  	.open		= tracing_open_generic,
7887  	.read		= trace_min_max_read,
7888  	.write		= trace_min_max_write,
7889  };
7890  
7891  #define TRACING_LOG_ERRS_MAX	8
7892  #define TRACING_LOG_LOC_MAX	128
7893  
7894  #define CMD_PREFIX "  Command: "
7895  
7896  struct err_info {
7897  	const char	**errs;	/* ptr to loc-specific array of err strings */
7898  	u8		type;	/* index into errs -> specific err string */
7899  	u16		pos;	/* caret position */
7900  	u64		ts;
7901  };
7902  
7903  struct tracing_log_err {
7904  	struct list_head	list;
7905  	struct err_info		info;
7906  	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7907  	char			*cmd;                     /* what caused err */
7908  };
7909  
7910  static DEFINE_MUTEX(tracing_err_log_lock);
7911  
alloc_tracing_log_err(int len)7912  static struct tracing_log_err *alloc_tracing_log_err(int len)
7913  {
7914  	struct tracing_log_err *err;
7915  
7916  	err = kzalloc(sizeof(*err), GFP_KERNEL);
7917  	if (!err)
7918  		return ERR_PTR(-ENOMEM);
7919  
7920  	err->cmd = kzalloc(len, GFP_KERNEL);
7921  	if (!err->cmd) {
7922  		kfree(err);
7923  		return ERR_PTR(-ENOMEM);
7924  	}
7925  
7926  	return err;
7927  }
7928  
free_tracing_log_err(struct tracing_log_err * err)7929  static void free_tracing_log_err(struct tracing_log_err *err)
7930  {
7931  	kfree(err->cmd);
7932  	kfree(err);
7933  }
7934  
get_tracing_log_err(struct trace_array * tr,int len)7935  static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7936  						   int len)
7937  {
7938  	struct tracing_log_err *err;
7939  	char *cmd;
7940  
7941  	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7942  		err = alloc_tracing_log_err(len);
7943  		if (PTR_ERR(err) != -ENOMEM)
7944  			tr->n_err_log_entries++;
7945  
7946  		return err;
7947  	}
7948  	cmd = kzalloc(len, GFP_KERNEL);
7949  	if (!cmd)
7950  		return ERR_PTR(-ENOMEM);
7951  	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7952  	kfree(err->cmd);
7953  	err->cmd = cmd;
7954  	list_del(&err->list);
7955  
7956  	return err;
7957  }
7958  
7959  /**
7960   * err_pos - find the position of a string within a command for error careting
7961   * @cmd: The tracing command that caused the error
7962   * @str: The string to position the caret at within @cmd
7963   *
7964   * Finds the position of the first occurrence of @str within @cmd.  The
7965   * return value can be passed to tracing_log_err() for caret placement
7966   * within @cmd.
7967   *
7968   * Returns the index within @cmd of the first occurrence of @str or 0
7969   * if @str was not found.
7970   */
err_pos(char * cmd,const char * str)7971  unsigned int err_pos(char *cmd, const char *str)
7972  {
7973  	char *found;
7974  
7975  	if (WARN_ON(!strlen(cmd)))
7976  		return 0;
7977  
7978  	found = strstr(cmd, str);
7979  	if (found)
7980  		return found - cmd;
7981  
7982  	return 0;
7983  }
7984  
7985  /**
7986   * tracing_log_err - write an error to the tracing error log
7987   * @tr: The associated trace array for the error (NULL for top level array)
7988   * @loc: A string describing where the error occurred
7989   * @cmd: The tracing command that caused the error
7990   * @errs: The array of loc-specific static error strings
7991   * @type: The index into errs[], which produces the specific static err string
7992   * @pos: The position the caret should be placed in the cmd
7993   *
7994   * Writes an error into tracing/error_log of the form:
7995   *
7996   * <loc>: error: <text>
7997   *   Command: <cmd>
7998   *              ^
7999   *
8000   * tracing/error_log is a small log file containing the last
8001   * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8002   * unless there has been a tracing error, and the error log can be
8003   * cleared and have its memory freed by writing the empty string in
8004   * truncation mode to it i.e. echo > tracing/error_log.
8005   *
8006   * NOTE: the @errs array along with the @type param are used to
8007   * produce a static error string - this string is not copied and saved
8008   * when the error is logged - only a pointer to it is saved.  See
8009   * existing callers for examples of how static strings are typically
8010   * defined for use with tracing_log_err().
8011   */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8012  void tracing_log_err(struct trace_array *tr,
8013  		     const char *loc, const char *cmd,
8014  		     const char **errs, u8 type, u16 pos)
8015  {
8016  	struct tracing_log_err *err;
8017  	int len = 0;
8018  
8019  	if (!tr)
8020  		tr = &global_trace;
8021  
8022  	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8023  
8024  	guard(mutex)(&tracing_err_log_lock);
8025  
8026  	err = get_tracing_log_err(tr, len);
8027  	if (PTR_ERR(err) == -ENOMEM)
8028  		return;
8029  
8030  	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8031  	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8032  
8033  	err->info.errs = errs;
8034  	err->info.type = type;
8035  	err->info.pos = pos;
8036  	err->info.ts = local_clock();
8037  
8038  	list_add_tail(&err->list, &tr->err_log);
8039  }
8040  
clear_tracing_err_log(struct trace_array * tr)8041  static void clear_tracing_err_log(struct trace_array *tr)
8042  {
8043  	struct tracing_log_err *err, *next;
8044  
8045  	guard(mutex)(&tracing_err_log_lock);
8046  
8047  	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8048  		list_del(&err->list);
8049  		free_tracing_log_err(err);
8050  	}
8051  
8052  	tr->n_err_log_entries = 0;
8053  }
8054  
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8055  static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8056  {
8057  	struct trace_array *tr = m->private;
8058  
8059  	mutex_lock(&tracing_err_log_lock);
8060  
8061  	return seq_list_start(&tr->err_log, *pos);
8062  }
8063  
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8064  static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8065  {
8066  	struct trace_array *tr = m->private;
8067  
8068  	return seq_list_next(v, &tr->err_log, pos);
8069  }
8070  
tracing_err_log_seq_stop(struct seq_file * m,void * v)8071  static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8072  {
8073  	mutex_unlock(&tracing_err_log_lock);
8074  }
8075  
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8076  static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8077  {
8078  	u16 i;
8079  
8080  	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8081  		seq_putc(m, ' ');
8082  	for (i = 0; i < pos; i++)
8083  		seq_putc(m, ' ');
8084  	seq_puts(m, "^\n");
8085  }
8086  
tracing_err_log_seq_show(struct seq_file * m,void * v)8087  static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8088  {
8089  	struct tracing_log_err *err = v;
8090  
8091  	if (err) {
8092  		const char *err_text = err->info.errs[err->info.type];
8093  		u64 sec = err->info.ts;
8094  		u32 nsec;
8095  
8096  		nsec = do_div(sec, NSEC_PER_SEC);
8097  		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8098  			   err->loc, err_text);
8099  		seq_printf(m, "%s", err->cmd);
8100  		tracing_err_log_show_pos(m, err->info.pos);
8101  	}
8102  
8103  	return 0;
8104  }
8105  
8106  static const struct seq_operations tracing_err_log_seq_ops = {
8107  	.start  = tracing_err_log_seq_start,
8108  	.next   = tracing_err_log_seq_next,
8109  	.stop   = tracing_err_log_seq_stop,
8110  	.show   = tracing_err_log_seq_show
8111  };
8112  
tracing_err_log_open(struct inode * inode,struct file * file)8113  static int tracing_err_log_open(struct inode *inode, struct file *file)
8114  {
8115  	struct trace_array *tr = inode->i_private;
8116  	int ret = 0;
8117  
8118  	ret = tracing_check_open_get_tr(tr);
8119  	if (ret)
8120  		return ret;
8121  
8122  	/* If this file was opened for write, then erase contents */
8123  	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8124  		clear_tracing_err_log(tr);
8125  
8126  	if (file->f_mode & FMODE_READ) {
8127  		ret = seq_open(file, &tracing_err_log_seq_ops);
8128  		if (!ret) {
8129  			struct seq_file *m = file->private_data;
8130  			m->private = tr;
8131  		} else {
8132  			trace_array_put(tr);
8133  		}
8134  	}
8135  	return ret;
8136  }
8137  
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8138  static ssize_t tracing_err_log_write(struct file *file,
8139  				     const char __user *buffer,
8140  				     size_t count, loff_t *ppos)
8141  {
8142  	return count;
8143  }
8144  
tracing_err_log_release(struct inode * inode,struct file * file)8145  static int tracing_err_log_release(struct inode *inode, struct file *file)
8146  {
8147  	struct trace_array *tr = inode->i_private;
8148  
8149  	trace_array_put(tr);
8150  
8151  	if (file->f_mode & FMODE_READ)
8152  		seq_release(inode, file);
8153  
8154  	return 0;
8155  }
8156  
8157  static const struct file_operations tracing_err_log_fops = {
8158  	.open           = tracing_err_log_open,
8159  	.write		= tracing_err_log_write,
8160  	.read           = seq_read,
8161  	.llseek         = tracing_lseek,
8162  	.release        = tracing_err_log_release,
8163  };
8164  
tracing_buffers_open(struct inode * inode,struct file * filp)8165  static int tracing_buffers_open(struct inode *inode, struct file *filp)
8166  {
8167  	struct trace_array *tr = inode->i_private;
8168  	struct ftrace_buffer_info *info;
8169  	int ret;
8170  
8171  	ret = tracing_check_open_get_tr(tr);
8172  	if (ret)
8173  		return ret;
8174  
8175  	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8176  	if (!info) {
8177  		trace_array_put(tr);
8178  		return -ENOMEM;
8179  	}
8180  
8181  	mutex_lock(&trace_types_lock);
8182  
8183  	info->iter.tr		= tr;
8184  	info->iter.cpu_file	= tracing_get_cpu(inode);
8185  	info->iter.trace	= tr->current_trace;
8186  	info->iter.array_buffer = &tr->array_buffer;
8187  	info->spare		= NULL;
8188  	/* Force reading ring buffer for first read */
8189  	info->read		= (unsigned int)-1;
8190  
8191  	filp->private_data = info;
8192  
8193  	tr->trace_ref++;
8194  
8195  	mutex_unlock(&trace_types_lock);
8196  
8197  	ret = nonseekable_open(inode, filp);
8198  	if (ret < 0)
8199  		trace_array_put(tr);
8200  
8201  	return ret;
8202  }
8203  
8204  static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8205  tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8206  {
8207  	struct ftrace_buffer_info *info = filp->private_data;
8208  	struct trace_iterator *iter = &info->iter;
8209  
8210  	return trace_poll(iter, filp, poll_table);
8211  }
8212  
8213  static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8214  tracing_buffers_read(struct file *filp, char __user *ubuf,
8215  		     size_t count, loff_t *ppos)
8216  {
8217  	struct ftrace_buffer_info *info = filp->private_data;
8218  	struct trace_iterator *iter = &info->iter;
8219  	void *trace_data;
8220  	int page_size;
8221  	ssize_t ret = 0;
8222  	ssize_t size;
8223  
8224  	if (!count)
8225  		return 0;
8226  
8227  #ifdef CONFIG_TRACER_MAX_TRACE
8228  	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8229  		return -EBUSY;
8230  #endif
8231  
8232  	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8233  
8234  	/* Make sure the spare matches the current sub buffer size */
8235  	if (info->spare) {
8236  		if (page_size != info->spare_size) {
8237  			ring_buffer_free_read_page(iter->array_buffer->buffer,
8238  						   info->spare_cpu, info->spare);
8239  			info->spare = NULL;
8240  		}
8241  	}
8242  
8243  	if (!info->spare) {
8244  		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8245  							  iter->cpu_file);
8246  		if (IS_ERR(info->spare)) {
8247  			ret = PTR_ERR(info->spare);
8248  			info->spare = NULL;
8249  		} else {
8250  			info->spare_cpu = iter->cpu_file;
8251  			info->spare_size = page_size;
8252  		}
8253  	}
8254  	if (!info->spare)
8255  		return ret;
8256  
8257  	/* Do we have previous read data to read? */
8258  	if (info->read < page_size)
8259  		goto read;
8260  
8261   again:
8262  	trace_access_lock(iter->cpu_file);
8263  	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8264  				    info->spare,
8265  				    count,
8266  				    iter->cpu_file, 0);
8267  	trace_access_unlock(iter->cpu_file);
8268  
8269  	if (ret < 0) {
8270  		if (trace_empty(iter) && !iter->closed) {
8271  			if (update_last_data_if_empty(iter->tr))
8272  				return 0;
8273  
8274  			if ((filp->f_flags & O_NONBLOCK))
8275  				return -EAGAIN;
8276  
8277  			ret = wait_on_pipe(iter, 0);
8278  			if (ret)
8279  				return ret;
8280  
8281  			goto again;
8282  		}
8283  		return 0;
8284  	}
8285  
8286  	info->read = 0;
8287   read:
8288  	size = page_size - info->read;
8289  	if (size > count)
8290  		size = count;
8291  	trace_data = ring_buffer_read_page_data(info->spare);
8292  	ret = copy_to_user(ubuf, trace_data + info->read, size);
8293  	if (ret == size)
8294  		return -EFAULT;
8295  
8296  	size -= ret;
8297  
8298  	*ppos += size;
8299  	info->read += size;
8300  
8301  	return size;
8302  }
8303  
tracing_buffers_flush(struct file * file,fl_owner_t id)8304  static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8305  {
8306  	struct ftrace_buffer_info *info = file->private_data;
8307  	struct trace_iterator *iter = &info->iter;
8308  
8309  	iter->closed = true;
8310  	/* Make sure the waiters see the new wait_index */
8311  	(void)atomic_fetch_inc_release(&iter->wait_index);
8312  
8313  	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8314  
8315  	return 0;
8316  }
8317  
tracing_buffers_release(struct inode * inode,struct file * file)8318  static int tracing_buffers_release(struct inode *inode, struct file *file)
8319  {
8320  	struct ftrace_buffer_info *info = file->private_data;
8321  	struct trace_iterator *iter = &info->iter;
8322  
8323  	guard(mutex)(&trace_types_lock);
8324  
8325  	iter->tr->trace_ref--;
8326  
8327  	__trace_array_put(iter->tr);
8328  
8329  	if (info->spare)
8330  		ring_buffer_free_read_page(iter->array_buffer->buffer,
8331  					   info->spare_cpu, info->spare);
8332  	kvfree(info);
8333  
8334  	return 0;
8335  }
8336  
8337  struct buffer_ref {
8338  	struct trace_buffer	*buffer;
8339  	void			*page;
8340  	int			cpu;
8341  	refcount_t		refcount;
8342  };
8343  
buffer_ref_release(struct buffer_ref * ref)8344  static void buffer_ref_release(struct buffer_ref *ref)
8345  {
8346  	if (!refcount_dec_and_test(&ref->refcount))
8347  		return;
8348  	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8349  	kfree(ref);
8350  }
8351  
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8352  static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8353  				    struct pipe_buffer *buf)
8354  {
8355  	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8356  
8357  	buffer_ref_release(ref);
8358  	buf->private = 0;
8359  }
8360  
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8361  static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8362  				struct pipe_buffer *buf)
8363  {
8364  	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8365  
8366  	if (refcount_read(&ref->refcount) > INT_MAX/2)
8367  		return false;
8368  
8369  	refcount_inc(&ref->refcount);
8370  	return true;
8371  }
8372  
8373  /* Pipe buffer operations for a buffer. */
8374  static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8375  	.release		= buffer_pipe_buf_release,
8376  	.get			= buffer_pipe_buf_get,
8377  };
8378  
8379  /*
8380   * Callback from splice_to_pipe(), if we need to release some pages
8381   * at the end of the spd in case we error'ed out in filling the pipe.
8382   */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8383  static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8384  {
8385  	struct buffer_ref *ref =
8386  		(struct buffer_ref *)spd->partial[i].private;
8387  
8388  	buffer_ref_release(ref);
8389  	spd->partial[i].private = 0;
8390  }
8391  
8392  static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8393  tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8394  			    struct pipe_inode_info *pipe, size_t len,
8395  			    unsigned int flags)
8396  {
8397  	struct ftrace_buffer_info *info = file->private_data;
8398  	struct trace_iterator *iter = &info->iter;
8399  	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8400  	struct page *pages_def[PIPE_DEF_BUFFERS];
8401  	struct splice_pipe_desc spd = {
8402  		.pages		= pages_def,
8403  		.partial	= partial_def,
8404  		.nr_pages_max	= PIPE_DEF_BUFFERS,
8405  		.ops		= &buffer_pipe_buf_ops,
8406  		.spd_release	= buffer_spd_release,
8407  	};
8408  	struct buffer_ref *ref;
8409  	bool woken = false;
8410  	int page_size;
8411  	int entries, i;
8412  	ssize_t ret = 0;
8413  
8414  #ifdef CONFIG_TRACER_MAX_TRACE
8415  	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8416  		return -EBUSY;
8417  #endif
8418  
8419  	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8420  	if (*ppos & (page_size - 1))
8421  		return -EINVAL;
8422  
8423  	if (len & (page_size - 1)) {
8424  		if (len < page_size)
8425  			return -EINVAL;
8426  		len &= (~(page_size - 1));
8427  	}
8428  
8429  	if (splice_grow_spd(pipe, &spd))
8430  		return -ENOMEM;
8431  
8432   again:
8433  	trace_access_lock(iter->cpu_file);
8434  	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8435  
8436  	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8437  		struct page *page;
8438  		int r;
8439  
8440  		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8441  		if (!ref) {
8442  			ret = -ENOMEM;
8443  			break;
8444  		}
8445  
8446  		refcount_set(&ref->refcount, 1);
8447  		ref->buffer = iter->array_buffer->buffer;
8448  		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8449  		if (IS_ERR(ref->page)) {
8450  			ret = PTR_ERR(ref->page);
8451  			ref->page = NULL;
8452  			kfree(ref);
8453  			break;
8454  		}
8455  		ref->cpu = iter->cpu_file;
8456  
8457  		r = ring_buffer_read_page(ref->buffer, ref->page,
8458  					  len, iter->cpu_file, 1);
8459  		if (r < 0) {
8460  			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8461  						   ref->page);
8462  			kfree(ref);
8463  			break;
8464  		}
8465  
8466  		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8467  
8468  		spd.pages[i] = page;
8469  		spd.partial[i].len = page_size;
8470  		spd.partial[i].offset = 0;
8471  		spd.partial[i].private = (unsigned long)ref;
8472  		spd.nr_pages++;
8473  		*ppos += page_size;
8474  
8475  		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8476  	}
8477  
8478  	trace_access_unlock(iter->cpu_file);
8479  	spd.nr_pages = i;
8480  
8481  	/* did we read anything? */
8482  	if (!spd.nr_pages) {
8483  
8484  		if (ret)
8485  			goto out;
8486  
8487  		if (woken)
8488  			goto out;
8489  
8490  		ret = -EAGAIN;
8491  		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8492  			goto out;
8493  
8494  		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8495  		if (ret)
8496  			goto out;
8497  
8498  		/* No need to wait after waking up when tracing is off */
8499  		if (!tracer_tracing_is_on(iter->tr))
8500  			goto out;
8501  
8502  		/* Iterate one more time to collect any new data then exit */
8503  		woken = true;
8504  
8505  		goto again;
8506  	}
8507  
8508  	ret = splice_to_pipe(pipe, &spd);
8509  out:
8510  	splice_shrink_spd(&spd);
8511  
8512  	return ret;
8513  }
8514  
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8515  static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8516  {
8517  	struct ftrace_buffer_info *info = file->private_data;
8518  	struct trace_iterator *iter = &info->iter;
8519  	int err;
8520  
8521  	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8522  		if (!(file->f_flags & O_NONBLOCK)) {
8523  			err = ring_buffer_wait(iter->array_buffer->buffer,
8524  					       iter->cpu_file,
8525  					       iter->tr->buffer_percent,
8526  					       NULL, NULL);
8527  			if (err)
8528  				return err;
8529  		}
8530  
8531  		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8532  						  iter->cpu_file);
8533  	} else if (cmd) {
8534  		return -ENOTTY;
8535  	}
8536  
8537  	/*
8538  	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8539  	 * waiters
8540  	 */
8541  	guard(mutex)(&trace_types_lock);
8542  
8543  	/* Make sure the waiters see the new wait_index */
8544  	(void)atomic_fetch_inc_release(&iter->wait_index);
8545  
8546  	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8547  
8548  	return 0;
8549  }
8550  
8551  #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8552  static int get_snapshot_map(struct trace_array *tr)
8553  {
8554  	int err = 0;
8555  
8556  	/*
8557  	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8558  	 * take trace_types_lock. Instead use the specific
8559  	 * snapshot_trigger_lock.
8560  	 */
8561  	spin_lock(&tr->snapshot_trigger_lock);
8562  
8563  	if (tr->snapshot || tr->mapped == UINT_MAX)
8564  		err = -EBUSY;
8565  	else
8566  		tr->mapped++;
8567  
8568  	spin_unlock(&tr->snapshot_trigger_lock);
8569  
8570  	/* Wait for update_max_tr() to observe iter->tr->mapped */
8571  	if (tr->mapped == 1)
8572  		synchronize_rcu();
8573  
8574  	return err;
8575  
8576  }
put_snapshot_map(struct trace_array * tr)8577  static void put_snapshot_map(struct trace_array *tr)
8578  {
8579  	spin_lock(&tr->snapshot_trigger_lock);
8580  	if (!WARN_ON(!tr->mapped))
8581  		tr->mapped--;
8582  	spin_unlock(&tr->snapshot_trigger_lock);
8583  }
8584  #else
get_snapshot_map(struct trace_array * tr)8585  static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8586  static inline void put_snapshot_map(struct trace_array *tr) { }
8587  #endif
8588  
tracing_buffers_mmap_close(struct vm_area_struct * vma)8589  static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8590  {
8591  	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8592  	struct trace_iterator *iter = &info->iter;
8593  
8594  	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8595  	put_snapshot_map(iter->tr);
8596  }
8597  
8598  static const struct vm_operations_struct tracing_buffers_vmops = {
8599  	.close		= tracing_buffers_mmap_close,
8600  };
8601  
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8602  static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8603  {
8604  	struct ftrace_buffer_info *info = filp->private_data;
8605  	struct trace_iterator *iter = &info->iter;
8606  	int ret = 0;
8607  
8608  	/* A memmap'ed buffer is not supported for user space mmap */
8609  	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8610  		return -ENODEV;
8611  
8612  	ret = get_snapshot_map(iter->tr);
8613  	if (ret)
8614  		return ret;
8615  
8616  	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8617  	if (ret)
8618  		put_snapshot_map(iter->tr);
8619  
8620  	vma->vm_ops = &tracing_buffers_vmops;
8621  
8622  	return ret;
8623  }
8624  
8625  static const struct file_operations tracing_buffers_fops = {
8626  	.open		= tracing_buffers_open,
8627  	.read		= tracing_buffers_read,
8628  	.poll		= tracing_buffers_poll,
8629  	.release	= tracing_buffers_release,
8630  	.flush		= tracing_buffers_flush,
8631  	.splice_read	= tracing_buffers_splice_read,
8632  	.unlocked_ioctl = tracing_buffers_ioctl,
8633  	.mmap		= tracing_buffers_mmap,
8634  };
8635  
8636  static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8637  tracing_stats_read(struct file *filp, char __user *ubuf,
8638  		   size_t count, loff_t *ppos)
8639  {
8640  	struct inode *inode = file_inode(filp);
8641  	struct trace_array *tr = inode->i_private;
8642  	struct array_buffer *trace_buf = &tr->array_buffer;
8643  	int cpu = tracing_get_cpu(inode);
8644  	struct trace_seq *s;
8645  	unsigned long cnt;
8646  	unsigned long long t;
8647  	unsigned long usec_rem;
8648  
8649  	s = kmalloc(sizeof(*s), GFP_KERNEL);
8650  	if (!s)
8651  		return -ENOMEM;
8652  
8653  	trace_seq_init(s);
8654  
8655  	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8656  	trace_seq_printf(s, "entries: %ld\n", cnt);
8657  
8658  	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8659  	trace_seq_printf(s, "overrun: %ld\n", cnt);
8660  
8661  	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8662  	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8663  
8664  	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8665  	trace_seq_printf(s, "bytes: %ld\n", cnt);
8666  
8667  	if (trace_clocks[tr->clock_id].in_ns) {
8668  		/* local or global for trace_clock */
8669  		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8670  		usec_rem = do_div(t, USEC_PER_SEC);
8671  		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8672  								t, usec_rem);
8673  
8674  		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8675  		usec_rem = do_div(t, USEC_PER_SEC);
8676  		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8677  	} else {
8678  		/* counter or tsc mode for trace_clock */
8679  		trace_seq_printf(s, "oldest event ts: %llu\n",
8680  				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8681  
8682  		trace_seq_printf(s, "now ts: %llu\n",
8683  				ring_buffer_time_stamp(trace_buf->buffer));
8684  	}
8685  
8686  	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8687  	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8688  
8689  	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8690  	trace_seq_printf(s, "read events: %ld\n", cnt);
8691  
8692  	count = simple_read_from_buffer(ubuf, count, ppos,
8693  					s->buffer, trace_seq_used(s));
8694  
8695  	kfree(s);
8696  
8697  	return count;
8698  }
8699  
8700  static const struct file_operations tracing_stats_fops = {
8701  	.open		= tracing_open_generic_tr,
8702  	.read		= tracing_stats_read,
8703  	.llseek		= generic_file_llseek,
8704  	.release	= tracing_release_generic_tr,
8705  };
8706  
8707  #ifdef CONFIG_DYNAMIC_FTRACE
8708  
8709  static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8710  tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8711  		  size_t cnt, loff_t *ppos)
8712  {
8713  	ssize_t ret;
8714  	char *buf;
8715  	int r;
8716  
8717  	/* 512 should be plenty to hold the amount needed */
8718  #define DYN_INFO_BUF_SIZE	512
8719  
8720  	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8721  	if (!buf)
8722  		return -ENOMEM;
8723  
8724  	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8725  		      "%ld pages:%ld groups: %ld\n"
8726  		      "ftrace boot update time = %llu (ns)\n"
8727  		      "ftrace module total update time = %llu (ns)\n",
8728  		      ftrace_update_tot_cnt,
8729  		      ftrace_number_of_pages,
8730  		      ftrace_number_of_groups,
8731  		      ftrace_update_time,
8732  		      ftrace_total_mod_time);
8733  
8734  	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8735  	kfree(buf);
8736  	return ret;
8737  }
8738  
8739  static const struct file_operations tracing_dyn_info_fops = {
8740  	.open		= tracing_open_generic,
8741  	.read		= tracing_read_dyn_info,
8742  	.llseek		= generic_file_llseek,
8743  };
8744  #endif /* CONFIG_DYNAMIC_FTRACE */
8745  
8746  #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8747  static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8748  ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8749  		struct trace_array *tr, struct ftrace_probe_ops *ops,
8750  		void *data)
8751  {
8752  	tracing_snapshot_instance(tr);
8753  }
8754  
8755  static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8756  ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8757  		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8758  		      void *data)
8759  {
8760  	struct ftrace_func_mapper *mapper = data;
8761  	long *count = NULL;
8762  
8763  	if (mapper)
8764  		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8765  
8766  	if (count) {
8767  
8768  		if (*count <= 0)
8769  			return;
8770  
8771  		(*count)--;
8772  	}
8773  
8774  	tracing_snapshot_instance(tr);
8775  }
8776  
8777  static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8778  ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8779  		      struct ftrace_probe_ops *ops, void *data)
8780  {
8781  	struct ftrace_func_mapper *mapper = data;
8782  	long *count = NULL;
8783  
8784  	seq_printf(m, "%ps:", (void *)ip);
8785  
8786  	seq_puts(m, "snapshot");
8787  
8788  	if (mapper)
8789  		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8790  
8791  	if (count)
8792  		seq_printf(m, ":count=%ld\n", *count);
8793  	else
8794  		seq_puts(m, ":unlimited\n");
8795  
8796  	return 0;
8797  }
8798  
8799  static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8800  ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8801  		     unsigned long ip, void *init_data, void **data)
8802  {
8803  	struct ftrace_func_mapper *mapper = *data;
8804  
8805  	if (!mapper) {
8806  		mapper = allocate_ftrace_func_mapper();
8807  		if (!mapper)
8808  			return -ENOMEM;
8809  		*data = mapper;
8810  	}
8811  
8812  	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8813  }
8814  
8815  static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8816  ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8817  		     unsigned long ip, void *data)
8818  {
8819  	struct ftrace_func_mapper *mapper = data;
8820  
8821  	if (!ip) {
8822  		if (!mapper)
8823  			return;
8824  		free_ftrace_func_mapper(mapper, NULL);
8825  		return;
8826  	}
8827  
8828  	ftrace_func_mapper_remove_ip(mapper, ip);
8829  }
8830  
8831  static struct ftrace_probe_ops snapshot_probe_ops = {
8832  	.func			= ftrace_snapshot,
8833  	.print			= ftrace_snapshot_print,
8834  };
8835  
8836  static struct ftrace_probe_ops snapshot_count_probe_ops = {
8837  	.func			= ftrace_count_snapshot,
8838  	.print			= ftrace_snapshot_print,
8839  	.init			= ftrace_snapshot_init,
8840  	.free			= ftrace_snapshot_free,
8841  };
8842  
8843  static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8844  ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8845  			       char *glob, char *cmd, char *param, int enable)
8846  {
8847  	struct ftrace_probe_ops *ops;
8848  	void *count = (void *)-1;
8849  	char *number;
8850  	int ret;
8851  
8852  	if (!tr)
8853  		return -ENODEV;
8854  
8855  	/* hash funcs only work with set_ftrace_filter */
8856  	if (!enable)
8857  		return -EINVAL;
8858  
8859  	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8860  
8861  	if (glob[0] == '!') {
8862  		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8863  		if (!ret)
8864  			tracing_disarm_snapshot(tr);
8865  
8866  		return ret;
8867  	}
8868  
8869  	if (!param)
8870  		goto out_reg;
8871  
8872  	number = strsep(&param, ":");
8873  
8874  	if (!strlen(number))
8875  		goto out_reg;
8876  
8877  	/*
8878  	 * We use the callback data field (which is a pointer)
8879  	 * as our counter.
8880  	 */
8881  	ret = kstrtoul(number, 0, (unsigned long *)&count);
8882  	if (ret)
8883  		return ret;
8884  
8885   out_reg:
8886  	ret = tracing_arm_snapshot(tr);
8887  	if (ret < 0)
8888  		return ret;
8889  
8890  	ret = register_ftrace_function_probe(glob, tr, ops, count);
8891  	if (ret < 0)
8892  		tracing_disarm_snapshot(tr);
8893  
8894  	return ret < 0 ? ret : 0;
8895  }
8896  
8897  static struct ftrace_func_command ftrace_snapshot_cmd = {
8898  	.name			= "snapshot",
8899  	.func			= ftrace_trace_snapshot_callback,
8900  };
8901  
register_snapshot_cmd(void)8902  static __init int register_snapshot_cmd(void)
8903  {
8904  	return register_ftrace_command(&ftrace_snapshot_cmd);
8905  }
8906  #else
register_snapshot_cmd(void)8907  static inline __init int register_snapshot_cmd(void) { return 0; }
8908  #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8909  
tracing_get_dentry(struct trace_array * tr)8910  static struct dentry *tracing_get_dentry(struct trace_array *tr)
8911  {
8912  	/* Top directory uses NULL as the parent */
8913  	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8914  		return NULL;
8915  
8916  	if (WARN_ON(!tr->dir))
8917  		return ERR_PTR(-ENODEV);
8918  
8919  	/* All sub buffers have a descriptor */
8920  	return tr->dir;
8921  }
8922  
tracing_dentry_percpu(struct trace_array * tr,int cpu)8923  static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8924  {
8925  	struct dentry *d_tracer;
8926  
8927  	if (tr->percpu_dir)
8928  		return tr->percpu_dir;
8929  
8930  	d_tracer = tracing_get_dentry(tr);
8931  	if (IS_ERR(d_tracer))
8932  		return NULL;
8933  
8934  	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8935  
8936  	MEM_FAIL(!tr->percpu_dir,
8937  		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8938  
8939  	return tr->percpu_dir;
8940  }
8941  
8942  static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8943  trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8944  		      void *data, long cpu, const struct file_operations *fops)
8945  {
8946  	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8947  
8948  	if (ret) /* See tracing_get_cpu() */
8949  		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8950  	return ret;
8951  }
8952  
8953  static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8954  tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8955  {
8956  	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8957  	struct dentry *d_cpu;
8958  	char cpu_dir[30]; /* 30 characters should be more than enough */
8959  
8960  	if (!d_percpu)
8961  		return;
8962  
8963  	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8964  	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8965  	if (!d_cpu) {
8966  		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8967  		return;
8968  	}
8969  
8970  	/* per cpu trace_pipe */
8971  	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8972  				tr, cpu, &tracing_pipe_fops);
8973  
8974  	/* per cpu trace */
8975  	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8976  				tr, cpu, &tracing_fops);
8977  
8978  	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8979  				tr, cpu, &tracing_buffers_fops);
8980  
8981  	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8982  				tr, cpu, &tracing_stats_fops);
8983  
8984  	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8985  				tr, cpu, &tracing_entries_fops);
8986  
8987  	if (tr->range_addr_start)
8988  		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8989  				      tr, cpu, &tracing_buffer_meta_fops);
8990  #ifdef CONFIG_TRACER_SNAPSHOT
8991  	if (!tr->range_addr_start) {
8992  		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8993  				      tr, cpu, &snapshot_fops);
8994  
8995  		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8996  				      tr, cpu, &snapshot_raw_fops);
8997  	}
8998  #endif
8999  }
9000  
9001  #ifdef CONFIG_FTRACE_SELFTEST
9002  /* Let selftest have access to static functions in this file */
9003  #include "trace_selftest.c"
9004  #endif
9005  
9006  static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9007  trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9008  			loff_t *ppos)
9009  {
9010  	struct trace_option_dentry *topt = filp->private_data;
9011  	char *buf;
9012  
9013  	if (topt->flags->val & topt->opt->bit)
9014  		buf = "1\n";
9015  	else
9016  		buf = "0\n";
9017  
9018  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9019  }
9020  
9021  static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9022  trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9023  			 loff_t *ppos)
9024  {
9025  	struct trace_option_dentry *topt = filp->private_data;
9026  	unsigned long val;
9027  	int ret;
9028  
9029  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9030  	if (ret)
9031  		return ret;
9032  
9033  	if (val != 0 && val != 1)
9034  		return -EINVAL;
9035  
9036  	if (!!(topt->flags->val & topt->opt->bit) != val) {
9037  		guard(mutex)(&trace_types_lock);
9038  		ret = __set_tracer_option(topt->tr, topt->flags,
9039  					  topt->opt, !val);
9040  		if (ret)
9041  			return ret;
9042  	}
9043  
9044  	*ppos += cnt;
9045  
9046  	return cnt;
9047  }
9048  
tracing_open_options(struct inode * inode,struct file * filp)9049  static int tracing_open_options(struct inode *inode, struct file *filp)
9050  {
9051  	struct trace_option_dentry *topt = inode->i_private;
9052  	int ret;
9053  
9054  	ret = tracing_check_open_get_tr(topt->tr);
9055  	if (ret)
9056  		return ret;
9057  
9058  	filp->private_data = inode->i_private;
9059  	return 0;
9060  }
9061  
tracing_release_options(struct inode * inode,struct file * file)9062  static int tracing_release_options(struct inode *inode, struct file *file)
9063  {
9064  	struct trace_option_dentry *topt = file->private_data;
9065  
9066  	trace_array_put(topt->tr);
9067  	return 0;
9068  }
9069  
9070  static const struct file_operations trace_options_fops = {
9071  	.open = tracing_open_options,
9072  	.read = trace_options_read,
9073  	.write = trace_options_write,
9074  	.llseek	= generic_file_llseek,
9075  	.release = tracing_release_options,
9076  };
9077  
9078  /*
9079   * In order to pass in both the trace_array descriptor as well as the index
9080   * to the flag that the trace option file represents, the trace_array
9081   * has a character array of trace_flags_index[], which holds the index
9082   * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9083   * The address of this character array is passed to the flag option file
9084   * read/write callbacks.
9085   *
9086   * In order to extract both the index and the trace_array descriptor,
9087   * get_tr_index() uses the following algorithm.
9088   *
9089   *   idx = *ptr;
9090   *
9091   * As the pointer itself contains the address of the index (remember
9092   * index[1] == 1).
9093   *
9094   * Then to get the trace_array descriptor, by subtracting that index
9095   * from the ptr, we get to the start of the index itself.
9096   *
9097   *   ptr - idx == &index[0]
9098   *
9099   * Then a simple container_of() from that pointer gets us to the
9100   * trace_array descriptor.
9101   */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9102  static void get_tr_index(void *data, struct trace_array **ptr,
9103  			 unsigned int *pindex)
9104  {
9105  	*pindex = *(unsigned char *)data;
9106  
9107  	*ptr = container_of(data - *pindex, struct trace_array,
9108  			    trace_flags_index);
9109  }
9110  
9111  static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9112  trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9113  			loff_t *ppos)
9114  {
9115  	void *tr_index = filp->private_data;
9116  	struct trace_array *tr;
9117  	unsigned int index;
9118  	char *buf;
9119  
9120  	get_tr_index(tr_index, &tr, &index);
9121  
9122  	if (tr->trace_flags & (1 << index))
9123  		buf = "1\n";
9124  	else
9125  		buf = "0\n";
9126  
9127  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9128  }
9129  
9130  static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9131  trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9132  			 loff_t *ppos)
9133  {
9134  	void *tr_index = filp->private_data;
9135  	struct trace_array *tr;
9136  	unsigned int index;
9137  	unsigned long val;
9138  	int ret;
9139  
9140  	get_tr_index(tr_index, &tr, &index);
9141  
9142  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9143  	if (ret)
9144  		return ret;
9145  
9146  	if (val != 0 && val != 1)
9147  		return -EINVAL;
9148  
9149  	mutex_lock(&event_mutex);
9150  	mutex_lock(&trace_types_lock);
9151  	ret = set_tracer_flag(tr, 1 << index, val);
9152  	mutex_unlock(&trace_types_lock);
9153  	mutex_unlock(&event_mutex);
9154  
9155  	if (ret < 0)
9156  		return ret;
9157  
9158  	*ppos += cnt;
9159  
9160  	return cnt;
9161  }
9162  
9163  static const struct file_operations trace_options_core_fops = {
9164  	.open = tracing_open_generic,
9165  	.read = trace_options_core_read,
9166  	.write = trace_options_core_write,
9167  	.llseek = generic_file_llseek,
9168  };
9169  
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9170  struct dentry *trace_create_file(const char *name,
9171  				 umode_t mode,
9172  				 struct dentry *parent,
9173  				 void *data,
9174  				 const struct file_operations *fops)
9175  {
9176  	struct dentry *ret;
9177  
9178  	ret = tracefs_create_file(name, mode, parent, data, fops);
9179  	if (!ret)
9180  		pr_warn("Could not create tracefs '%s' entry\n", name);
9181  
9182  	return ret;
9183  }
9184  
9185  
trace_options_init_dentry(struct trace_array * tr)9186  static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9187  {
9188  	struct dentry *d_tracer;
9189  
9190  	if (tr->options)
9191  		return tr->options;
9192  
9193  	d_tracer = tracing_get_dentry(tr);
9194  	if (IS_ERR(d_tracer))
9195  		return NULL;
9196  
9197  	tr->options = tracefs_create_dir("options", d_tracer);
9198  	if (!tr->options) {
9199  		pr_warn("Could not create tracefs directory 'options'\n");
9200  		return NULL;
9201  	}
9202  
9203  	return tr->options;
9204  }
9205  
9206  static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9207  create_trace_option_file(struct trace_array *tr,
9208  			 struct trace_option_dentry *topt,
9209  			 struct tracer_flags *flags,
9210  			 struct tracer_opt *opt)
9211  {
9212  	struct dentry *t_options;
9213  
9214  	t_options = trace_options_init_dentry(tr);
9215  	if (!t_options)
9216  		return;
9217  
9218  	topt->flags = flags;
9219  	topt->opt = opt;
9220  	topt->tr = tr;
9221  
9222  	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9223  					t_options, topt, &trace_options_fops);
9224  
9225  }
9226  
9227  static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9228  create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9229  {
9230  	struct trace_option_dentry *topts;
9231  	struct trace_options *tr_topts;
9232  	struct tracer_flags *flags;
9233  	struct tracer_opt *opts;
9234  	int cnt;
9235  	int i;
9236  
9237  	if (!tracer)
9238  		return;
9239  
9240  	flags = tracer->flags;
9241  
9242  	if (!flags || !flags->opts)
9243  		return;
9244  
9245  	/*
9246  	 * If this is an instance, only create flags for tracers
9247  	 * the instance may have.
9248  	 */
9249  	if (!trace_ok_for_array(tracer, tr))
9250  		return;
9251  
9252  	for (i = 0; i < tr->nr_topts; i++) {
9253  		/* Make sure there's no duplicate flags. */
9254  		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9255  			return;
9256  	}
9257  
9258  	opts = flags->opts;
9259  
9260  	for (cnt = 0; opts[cnt].name; cnt++)
9261  		;
9262  
9263  	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9264  	if (!topts)
9265  		return;
9266  
9267  	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9268  			    GFP_KERNEL);
9269  	if (!tr_topts) {
9270  		kfree(topts);
9271  		return;
9272  	}
9273  
9274  	tr->topts = tr_topts;
9275  	tr->topts[tr->nr_topts].tracer = tracer;
9276  	tr->topts[tr->nr_topts].topts = topts;
9277  	tr->nr_topts++;
9278  
9279  	for (cnt = 0; opts[cnt].name; cnt++) {
9280  		create_trace_option_file(tr, &topts[cnt], flags,
9281  					 &opts[cnt]);
9282  		MEM_FAIL(topts[cnt].entry == NULL,
9283  			  "Failed to create trace option: %s",
9284  			  opts[cnt].name);
9285  	}
9286  }
9287  
9288  static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9289  create_trace_option_core_file(struct trace_array *tr,
9290  			      const char *option, long index)
9291  {
9292  	struct dentry *t_options;
9293  
9294  	t_options = trace_options_init_dentry(tr);
9295  	if (!t_options)
9296  		return NULL;
9297  
9298  	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9299  				 (void *)&tr->trace_flags_index[index],
9300  				 &trace_options_core_fops);
9301  }
9302  
create_trace_options_dir(struct trace_array * tr)9303  static void create_trace_options_dir(struct trace_array *tr)
9304  {
9305  	struct dentry *t_options;
9306  	bool top_level = tr == &global_trace;
9307  	int i;
9308  
9309  	t_options = trace_options_init_dentry(tr);
9310  	if (!t_options)
9311  		return;
9312  
9313  	for (i = 0; trace_options[i]; i++) {
9314  		if (top_level ||
9315  		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9316  			create_trace_option_core_file(tr, trace_options[i], i);
9317  	}
9318  }
9319  
9320  static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9321  rb_simple_read(struct file *filp, char __user *ubuf,
9322  	       size_t cnt, loff_t *ppos)
9323  {
9324  	struct trace_array *tr = filp->private_data;
9325  	char buf[64];
9326  	int r;
9327  
9328  	r = tracer_tracing_is_on(tr);
9329  	r = sprintf(buf, "%d\n", r);
9330  
9331  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9332  }
9333  
9334  static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9335  rb_simple_write(struct file *filp, const char __user *ubuf,
9336  		size_t cnt, loff_t *ppos)
9337  {
9338  	struct trace_array *tr = filp->private_data;
9339  	struct trace_buffer *buffer = tr->array_buffer.buffer;
9340  	unsigned long val;
9341  	int ret;
9342  
9343  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9344  	if (ret)
9345  		return ret;
9346  
9347  	if (buffer) {
9348  		guard(mutex)(&trace_types_lock);
9349  		if (!!val == tracer_tracing_is_on(tr)) {
9350  			val = 0; /* do nothing */
9351  		} else if (val) {
9352  			tracer_tracing_on(tr);
9353  			if (tr->current_trace->start)
9354  				tr->current_trace->start(tr);
9355  		} else {
9356  			tracer_tracing_off(tr);
9357  			if (tr->current_trace->stop)
9358  				tr->current_trace->stop(tr);
9359  			/* Wake up any waiters */
9360  			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9361  		}
9362  	}
9363  
9364  	(*ppos)++;
9365  
9366  	return cnt;
9367  }
9368  
9369  static const struct file_operations rb_simple_fops = {
9370  	.open		= tracing_open_generic_tr,
9371  	.read		= rb_simple_read,
9372  	.write		= rb_simple_write,
9373  	.release	= tracing_release_generic_tr,
9374  	.llseek		= default_llseek,
9375  };
9376  
9377  static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9378  buffer_percent_read(struct file *filp, char __user *ubuf,
9379  		    size_t cnt, loff_t *ppos)
9380  {
9381  	struct trace_array *tr = filp->private_data;
9382  	char buf[64];
9383  	int r;
9384  
9385  	r = tr->buffer_percent;
9386  	r = sprintf(buf, "%d\n", r);
9387  
9388  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9389  }
9390  
9391  static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9392  buffer_percent_write(struct file *filp, const char __user *ubuf,
9393  		     size_t cnt, loff_t *ppos)
9394  {
9395  	struct trace_array *tr = filp->private_data;
9396  	unsigned long val;
9397  	int ret;
9398  
9399  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9400  	if (ret)
9401  		return ret;
9402  
9403  	if (val > 100)
9404  		return -EINVAL;
9405  
9406  	tr->buffer_percent = val;
9407  
9408  	(*ppos)++;
9409  
9410  	return cnt;
9411  }
9412  
9413  static const struct file_operations buffer_percent_fops = {
9414  	.open		= tracing_open_generic_tr,
9415  	.read		= buffer_percent_read,
9416  	.write		= buffer_percent_write,
9417  	.release	= tracing_release_generic_tr,
9418  	.llseek		= default_llseek,
9419  };
9420  
9421  static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9422  buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9423  {
9424  	struct trace_array *tr = filp->private_data;
9425  	size_t size;
9426  	char buf[64];
9427  	int order;
9428  	int r;
9429  
9430  	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9431  	size = (PAGE_SIZE << order) / 1024;
9432  
9433  	r = sprintf(buf, "%zd\n", size);
9434  
9435  	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9436  }
9437  
9438  static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9439  buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9440  			 size_t cnt, loff_t *ppos)
9441  {
9442  	struct trace_array *tr = filp->private_data;
9443  	unsigned long val;
9444  	int old_order;
9445  	int order;
9446  	int pages;
9447  	int ret;
9448  
9449  	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9450  	if (ret)
9451  		return ret;
9452  
9453  	val *= 1024; /* value passed in is in KB */
9454  
9455  	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9456  	order = fls(pages - 1);
9457  
9458  	/* limit between 1 and 128 system pages */
9459  	if (order < 0 || order > 7)
9460  		return -EINVAL;
9461  
9462  	/* Do not allow tracing while changing the order of the ring buffer */
9463  	tracing_stop_tr(tr);
9464  
9465  	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9466  	if (old_order == order)
9467  		goto out;
9468  
9469  	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9470  	if (ret)
9471  		goto out;
9472  
9473  #ifdef CONFIG_TRACER_MAX_TRACE
9474  
9475  	if (!tr->allocated_snapshot)
9476  		goto out_max;
9477  
9478  	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9479  	if (ret) {
9480  		/* Put back the old order */
9481  		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9482  		if (WARN_ON_ONCE(cnt)) {
9483  			/*
9484  			 * AARGH! We are left with different orders!
9485  			 * The max buffer is our "snapshot" buffer.
9486  			 * When a tracer needs a snapshot (one of the
9487  			 * latency tracers), it swaps the max buffer
9488  			 * with the saved snap shot. We succeeded to
9489  			 * update the order of the main buffer, but failed to
9490  			 * update the order of the max buffer. But when we tried
9491  			 * to reset the main buffer to the original size, we
9492  			 * failed there too. This is very unlikely to
9493  			 * happen, but if it does, warn and kill all
9494  			 * tracing.
9495  			 */
9496  			tracing_disabled = 1;
9497  		}
9498  		goto out;
9499  	}
9500   out_max:
9501  #endif
9502  	(*ppos)++;
9503   out:
9504  	if (ret)
9505  		cnt = ret;
9506  	tracing_start_tr(tr);
9507  	return cnt;
9508  }
9509  
9510  static const struct file_operations buffer_subbuf_size_fops = {
9511  	.open		= tracing_open_generic_tr,
9512  	.read		= buffer_subbuf_size_read,
9513  	.write		= buffer_subbuf_size_write,
9514  	.release	= tracing_release_generic_tr,
9515  	.llseek		= default_llseek,
9516  };
9517  
9518  static struct dentry *trace_instance_dir;
9519  
9520  static void
9521  init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9522  
9523  #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9524  static int make_mod_delta(struct module *mod, void *data)
9525  {
9526  	struct trace_module_delta *module_delta;
9527  	struct trace_scratch *tscratch;
9528  	struct trace_mod_entry *entry;
9529  	struct trace_array *tr = data;
9530  	int i;
9531  
9532  	tscratch = tr->scratch;
9533  	module_delta = READ_ONCE(tr->module_delta);
9534  	for (i = 0; i < tscratch->nr_entries; i++) {
9535  		entry = &tscratch->entries[i];
9536  		if (strcmp(mod->name, entry->mod_name))
9537  			continue;
9538  		if (mod->state == MODULE_STATE_GOING)
9539  			module_delta->delta[i] = 0;
9540  		else
9541  			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9542  						 - entry->mod_addr;
9543  		break;
9544  	}
9545  	return 0;
9546  }
9547  #else
make_mod_delta(struct module * mod,void * data)9548  static int make_mod_delta(struct module *mod, void *data)
9549  {
9550  	return 0;
9551  }
9552  #endif
9553  
mod_addr_comp(const void * a,const void * b,const void * data)9554  static int mod_addr_comp(const void *a, const void *b, const void *data)
9555  {
9556  	const struct trace_mod_entry *e1 = a;
9557  	const struct trace_mod_entry *e2 = b;
9558  
9559  	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9560  }
9561  
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9562  static void setup_trace_scratch(struct trace_array *tr,
9563  				struct trace_scratch *tscratch, unsigned int size)
9564  {
9565  	struct trace_module_delta *module_delta;
9566  	struct trace_mod_entry *entry;
9567  	int i, nr_entries;
9568  
9569  	if (!tscratch)
9570  		return;
9571  
9572  	tr->scratch = tscratch;
9573  	tr->scratch_size = size;
9574  
9575  	if (tscratch->text_addr)
9576  		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9577  
9578  	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9579  		goto reset;
9580  
9581  	/* Check if each module name is a valid string */
9582  	for (i = 0; i < tscratch->nr_entries; i++) {
9583  		int n;
9584  
9585  		entry = &tscratch->entries[i];
9586  
9587  		for (n = 0; n < MODULE_NAME_LEN; n++) {
9588  			if (entry->mod_name[n] == '\0')
9589  				break;
9590  			if (!isprint(entry->mod_name[n]))
9591  				goto reset;
9592  		}
9593  		if (n == MODULE_NAME_LEN)
9594  			goto reset;
9595  	}
9596  
9597  	/* Sort the entries so that we can find appropriate module from address. */
9598  	nr_entries = tscratch->nr_entries;
9599  	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9600  	       mod_addr_comp, NULL, NULL);
9601  
9602  	if (IS_ENABLED(CONFIG_MODULES)) {
9603  		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9604  		if (!module_delta) {
9605  			pr_info("module_delta allocation failed. Not able to decode module address.");
9606  			goto reset;
9607  		}
9608  		init_rcu_head(&module_delta->rcu);
9609  	} else
9610  		module_delta = NULL;
9611  	WRITE_ONCE(tr->module_delta, module_delta);
9612  
9613  	/* Scan modules to make text delta for modules. */
9614  	module_for_each_mod(make_mod_delta, tr);
9615  
9616  	/* Set trace_clock as the same of the previous boot. */
9617  	if (tscratch->clock_id != tr->clock_id) {
9618  		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9619  		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9620  			pr_info("the previous trace_clock info is not valid.");
9621  			goto reset;
9622  		}
9623  	}
9624  	return;
9625   reset:
9626  	/* Invalid trace modules */
9627  	memset(tscratch, 0, size);
9628  }
9629  
9630  static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9631  allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9632  {
9633  	enum ring_buffer_flags rb_flags;
9634  	struct trace_scratch *tscratch;
9635  	unsigned int scratch_size = 0;
9636  
9637  	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9638  
9639  	buf->tr = tr;
9640  
9641  	if (tr->range_addr_start && tr->range_addr_size) {
9642  		/* Add scratch buffer to handle 128 modules */
9643  		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9644  						      tr->range_addr_start,
9645  						      tr->range_addr_size,
9646  						      struct_size(tscratch, entries, 128));
9647  
9648  		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9649  		setup_trace_scratch(tr, tscratch, scratch_size);
9650  
9651  		/*
9652  		 * This is basically the same as a mapped buffer,
9653  		 * with the same restrictions.
9654  		 */
9655  		tr->mapped++;
9656  	} else {
9657  		buf->buffer = ring_buffer_alloc(size, rb_flags);
9658  	}
9659  	if (!buf->buffer)
9660  		return -ENOMEM;
9661  
9662  	buf->data = alloc_percpu(struct trace_array_cpu);
9663  	if (!buf->data) {
9664  		ring_buffer_free(buf->buffer);
9665  		buf->buffer = NULL;
9666  		return -ENOMEM;
9667  	}
9668  
9669  	/* Allocate the first page for all buffers */
9670  	set_buffer_entries(&tr->array_buffer,
9671  			   ring_buffer_size(tr->array_buffer.buffer, 0));
9672  
9673  	return 0;
9674  }
9675  
free_trace_buffer(struct array_buffer * buf)9676  static void free_trace_buffer(struct array_buffer *buf)
9677  {
9678  	if (buf->buffer) {
9679  		ring_buffer_free(buf->buffer);
9680  		buf->buffer = NULL;
9681  		free_percpu(buf->data);
9682  		buf->data = NULL;
9683  	}
9684  }
9685  
allocate_trace_buffers(struct trace_array * tr,int size)9686  static int allocate_trace_buffers(struct trace_array *tr, int size)
9687  {
9688  	int ret;
9689  
9690  	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9691  	if (ret)
9692  		return ret;
9693  
9694  #ifdef CONFIG_TRACER_MAX_TRACE
9695  	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9696  	if (tr->range_addr_start)
9697  		return 0;
9698  
9699  	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9700  				    allocate_snapshot ? size : 1);
9701  	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9702  		free_trace_buffer(&tr->array_buffer);
9703  		return -ENOMEM;
9704  	}
9705  	tr->allocated_snapshot = allocate_snapshot;
9706  
9707  	allocate_snapshot = false;
9708  #endif
9709  
9710  	return 0;
9711  }
9712  
free_trace_buffers(struct trace_array * tr)9713  static void free_trace_buffers(struct trace_array *tr)
9714  {
9715  	if (!tr)
9716  		return;
9717  
9718  	free_trace_buffer(&tr->array_buffer);
9719  	kfree(tr->module_delta);
9720  
9721  #ifdef CONFIG_TRACER_MAX_TRACE
9722  	free_trace_buffer(&tr->max_buffer);
9723  #endif
9724  }
9725  
init_trace_flags_index(struct trace_array * tr)9726  static void init_trace_flags_index(struct trace_array *tr)
9727  {
9728  	int i;
9729  
9730  	/* Used by the trace options files */
9731  	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9732  		tr->trace_flags_index[i] = i;
9733  }
9734  
__update_tracer_options(struct trace_array * tr)9735  static void __update_tracer_options(struct trace_array *tr)
9736  {
9737  	struct tracer *t;
9738  
9739  	for (t = trace_types; t; t = t->next)
9740  		add_tracer_options(tr, t);
9741  }
9742  
update_tracer_options(struct trace_array * tr)9743  static void update_tracer_options(struct trace_array *tr)
9744  {
9745  	guard(mutex)(&trace_types_lock);
9746  	tracer_options_updated = true;
9747  	__update_tracer_options(tr);
9748  }
9749  
9750  /* Must have trace_types_lock held */
trace_array_find(const char * instance)9751  struct trace_array *trace_array_find(const char *instance)
9752  {
9753  	struct trace_array *tr, *found = NULL;
9754  
9755  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9756  		if (tr->name && strcmp(tr->name, instance) == 0) {
9757  			found = tr;
9758  			break;
9759  		}
9760  	}
9761  
9762  	return found;
9763  }
9764  
trace_array_find_get(const char * instance)9765  struct trace_array *trace_array_find_get(const char *instance)
9766  {
9767  	struct trace_array *tr;
9768  
9769  	guard(mutex)(&trace_types_lock);
9770  	tr = trace_array_find(instance);
9771  	if (tr)
9772  		tr->ref++;
9773  
9774  	return tr;
9775  }
9776  
trace_array_create_dir(struct trace_array * tr)9777  static int trace_array_create_dir(struct trace_array *tr)
9778  {
9779  	int ret;
9780  
9781  	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9782  	if (!tr->dir)
9783  		return -EINVAL;
9784  
9785  	ret = event_trace_add_tracer(tr->dir, tr);
9786  	if (ret) {
9787  		tracefs_remove(tr->dir);
9788  		return ret;
9789  	}
9790  
9791  	init_tracer_tracefs(tr, tr->dir);
9792  	__update_tracer_options(tr);
9793  
9794  	return ret;
9795  }
9796  
9797  static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9798  trace_array_create_systems(const char *name, const char *systems,
9799  			   unsigned long range_addr_start,
9800  			   unsigned long range_addr_size)
9801  {
9802  	struct trace_array *tr;
9803  	int ret;
9804  
9805  	ret = -ENOMEM;
9806  	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9807  	if (!tr)
9808  		return ERR_PTR(ret);
9809  
9810  	tr->name = kstrdup(name, GFP_KERNEL);
9811  	if (!tr->name)
9812  		goto out_free_tr;
9813  
9814  	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9815  		goto out_free_tr;
9816  
9817  	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9818  		goto out_free_tr;
9819  
9820  	if (systems) {
9821  		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9822  		if (!tr->system_names)
9823  			goto out_free_tr;
9824  	}
9825  
9826  	/* Only for boot up memory mapped ring buffers */
9827  	tr->range_addr_start = range_addr_start;
9828  	tr->range_addr_size = range_addr_size;
9829  
9830  	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9831  
9832  	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9833  
9834  	raw_spin_lock_init(&tr->start_lock);
9835  
9836  	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9837  #ifdef CONFIG_TRACER_MAX_TRACE
9838  	spin_lock_init(&tr->snapshot_trigger_lock);
9839  #endif
9840  	tr->current_trace = &nop_trace;
9841  
9842  	INIT_LIST_HEAD(&tr->systems);
9843  	INIT_LIST_HEAD(&tr->events);
9844  	INIT_LIST_HEAD(&tr->hist_vars);
9845  	INIT_LIST_HEAD(&tr->err_log);
9846  	INIT_LIST_HEAD(&tr->marker_list);
9847  
9848  #ifdef CONFIG_MODULES
9849  	INIT_LIST_HEAD(&tr->mod_events);
9850  #endif
9851  
9852  	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9853  		goto out_free_tr;
9854  
9855  	/* The ring buffer is defaultly expanded */
9856  	trace_set_ring_buffer_expanded(tr);
9857  
9858  	if (ftrace_allocate_ftrace_ops(tr) < 0)
9859  		goto out_free_tr;
9860  
9861  	ftrace_init_trace_array(tr);
9862  
9863  	init_trace_flags_index(tr);
9864  
9865  	if (trace_instance_dir) {
9866  		ret = trace_array_create_dir(tr);
9867  		if (ret)
9868  			goto out_free_tr;
9869  	} else
9870  		__trace_early_add_events(tr);
9871  
9872  	list_add(&tr->list, &ftrace_trace_arrays);
9873  
9874  	tr->ref++;
9875  
9876  	return tr;
9877  
9878   out_free_tr:
9879  	ftrace_free_ftrace_ops(tr);
9880  	free_trace_buffers(tr);
9881  	free_cpumask_var(tr->pipe_cpumask);
9882  	free_cpumask_var(tr->tracing_cpumask);
9883  	kfree_const(tr->system_names);
9884  	kfree(tr->range_name);
9885  	kfree(tr->name);
9886  	kfree(tr);
9887  
9888  	return ERR_PTR(ret);
9889  }
9890  
trace_array_create(const char * name)9891  static struct trace_array *trace_array_create(const char *name)
9892  {
9893  	return trace_array_create_systems(name, NULL, 0, 0);
9894  }
9895  
instance_mkdir(const char * name)9896  static int instance_mkdir(const char *name)
9897  {
9898  	struct trace_array *tr;
9899  	int ret;
9900  
9901  	guard(mutex)(&event_mutex);
9902  	guard(mutex)(&trace_types_lock);
9903  
9904  	ret = -EEXIST;
9905  	if (trace_array_find(name))
9906  		return -EEXIST;
9907  
9908  	tr = trace_array_create(name);
9909  
9910  	ret = PTR_ERR_OR_ZERO(tr);
9911  
9912  	return ret;
9913  }
9914  
9915  #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)9916  static u64 map_pages(unsigned long start, unsigned long size)
9917  {
9918  	unsigned long vmap_start, vmap_end;
9919  	struct vm_struct *area;
9920  	int ret;
9921  
9922  	area = get_vm_area(size, VM_IOREMAP);
9923  	if (!area)
9924  		return 0;
9925  
9926  	vmap_start = (unsigned long) area->addr;
9927  	vmap_end = vmap_start + size;
9928  
9929  	ret = vmap_page_range(vmap_start, vmap_end,
9930  			      start, pgprot_nx(PAGE_KERNEL));
9931  	if (ret < 0) {
9932  		free_vm_area(area);
9933  		return 0;
9934  	}
9935  
9936  	return (u64)vmap_start;
9937  }
9938  #else
map_pages(unsigned long start,unsigned long size)9939  static inline u64 map_pages(unsigned long start, unsigned long size)
9940  {
9941  	return 0;
9942  }
9943  #endif
9944  
9945  /**
9946   * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9947   * @name: The name of the trace array to be looked up/created.
9948   * @systems: A list of systems to create event directories for (NULL for all)
9949   *
9950   * Returns pointer to trace array with given name.
9951   * NULL, if it cannot be created.
9952   *
9953   * NOTE: This function increments the reference counter associated with the
9954   * trace array returned. This makes sure it cannot be freed while in use.
9955   * Use trace_array_put() once the trace array is no longer needed.
9956   * If the trace_array is to be freed, trace_array_destroy() needs to
9957   * be called after the trace_array_put(), or simply let user space delete
9958   * it from the tracefs instances directory. But until the
9959   * trace_array_put() is called, user space can not delete it.
9960   *
9961   */
trace_array_get_by_name(const char * name,const char * systems)9962  struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9963  {
9964  	struct trace_array *tr;
9965  
9966  	guard(mutex)(&event_mutex);
9967  	guard(mutex)(&trace_types_lock);
9968  
9969  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9970  		if (tr->name && strcmp(tr->name, name) == 0) {
9971  			tr->ref++;
9972  			return tr;
9973  		}
9974  	}
9975  
9976  	tr = trace_array_create_systems(name, systems, 0, 0);
9977  
9978  	if (IS_ERR(tr))
9979  		tr = NULL;
9980  	else
9981  		tr->ref++;
9982  
9983  	return tr;
9984  }
9985  EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9986  
__remove_instance(struct trace_array * tr)9987  static int __remove_instance(struct trace_array *tr)
9988  {
9989  	int i;
9990  
9991  	/* Reference counter for a newly created trace array = 1. */
9992  	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9993  		return -EBUSY;
9994  
9995  	list_del(&tr->list);
9996  
9997  	/* Disable all the flags that were enabled coming in */
9998  	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9999  		if ((1 << i) & ZEROED_TRACE_FLAGS)
10000  			set_tracer_flag(tr, 1 << i, 0);
10001  	}
10002  
10003  	if (printk_trace == tr)
10004  		update_printk_trace(&global_trace);
10005  
10006  	if (update_marker_trace(tr, 0))
10007  		synchronize_rcu();
10008  
10009  	tracing_set_nop(tr);
10010  	clear_ftrace_function_probes(tr);
10011  	event_trace_del_tracer(tr);
10012  	ftrace_clear_pids(tr);
10013  	ftrace_destroy_function_files(tr);
10014  	tracefs_remove(tr->dir);
10015  	free_percpu(tr->last_func_repeats);
10016  	free_trace_buffers(tr);
10017  	clear_tracing_err_log(tr);
10018  
10019  	if (tr->range_name) {
10020  		reserve_mem_release_by_name(tr->range_name);
10021  		kfree(tr->range_name);
10022  	}
10023  
10024  	for (i = 0; i < tr->nr_topts; i++) {
10025  		kfree(tr->topts[i].topts);
10026  	}
10027  	kfree(tr->topts);
10028  
10029  	free_cpumask_var(tr->pipe_cpumask);
10030  	free_cpumask_var(tr->tracing_cpumask);
10031  	kfree_const(tr->system_names);
10032  	kfree(tr->name);
10033  	kfree(tr);
10034  
10035  	return 0;
10036  }
10037  
trace_array_destroy(struct trace_array * this_tr)10038  int trace_array_destroy(struct trace_array *this_tr)
10039  {
10040  	struct trace_array *tr;
10041  
10042  	if (!this_tr)
10043  		return -EINVAL;
10044  
10045  	guard(mutex)(&event_mutex);
10046  	guard(mutex)(&trace_types_lock);
10047  
10048  
10049  	/* Making sure trace array exists before destroying it. */
10050  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10051  		if (tr == this_tr)
10052  			return __remove_instance(tr);
10053  	}
10054  
10055  	return -ENODEV;
10056  }
10057  EXPORT_SYMBOL_GPL(trace_array_destroy);
10058  
instance_rmdir(const char * name)10059  static int instance_rmdir(const char *name)
10060  {
10061  	struct trace_array *tr;
10062  
10063  	guard(mutex)(&event_mutex);
10064  	guard(mutex)(&trace_types_lock);
10065  
10066  	tr = trace_array_find(name);
10067  	if (!tr)
10068  		return -ENODEV;
10069  
10070  	return __remove_instance(tr);
10071  }
10072  
create_trace_instances(struct dentry * d_tracer)10073  static __init void create_trace_instances(struct dentry *d_tracer)
10074  {
10075  	struct trace_array *tr;
10076  
10077  	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10078  							 instance_mkdir,
10079  							 instance_rmdir);
10080  	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10081  		return;
10082  
10083  	guard(mutex)(&event_mutex);
10084  	guard(mutex)(&trace_types_lock);
10085  
10086  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10087  		if (!tr->name)
10088  			continue;
10089  		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10090  			     "Failed to create instance directory\n"))
10091  			return;
10092  	}
10093  }
10094  
10095  static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10096  init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10097  {
10098  	int cpu;
10099  
10100  	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10101  			tr, &show_traces_fops);
10102  
10103  	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10104  			tr, &set_tracer_fops);
10105  
10106  	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10107  			  tr, &tracing_cpumask_fops);
10108  
10109  	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10110  			  tr, &tracing_iter_fops);
10111  
10112  	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10113  			  tr, &tracing_fops);
10114  
10115  	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10116  			  tr, &tracing_pipe_fops);
10117  
10118  	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10119  			  tr, &tracing_entries_fops);
10120  
10121  	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10122  			  tr, &tracing_total_entries_fops);
10123  
10124  	trace_create_file("free_buffer", 0200, d_tracer,
10125  			  tr, &tracing_free_buffer_fops);
10126  
10127  	trace_create_file("trace_marker", 0220, d_tracer,
10128  			  tr, &tracing_mark_fops);
10129  
10130  	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10131  
10132  	trace_create_file("trace_marker_raw", 0220, d_tracer,
10133  			  tr, &tracing_mark_raw_fops);
10134  
10135  	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10136  			  &trace_clock_fops);
10137  
10138  	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10139  			  tr, &rb_simple_fops);
10140  
10141  	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10142  			  &trace_time_stamp_mode_fops);
10143  
10144  	tr->buffer_percent = 50;
10145  
10146  	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10147  			tr, &buffer_percent_fops);
10148  
10149  	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10150  			  tr, &buffer_subbuf_size_fops);
10151  
10152  	create_trace_options_dir(tr);
10153  
10154  #ifdef CONFIG_TRACER_MAX_TRACE
10155  	trace_create_maxlat_file(tr, d_tracer);
10156  #endif
10157  
10158  	if (ftrace_create_function_files(tr, d_tracer))
10159  		MEM_FAIL(1, "Could not allocate function filter files");
10160  
10161  	if (tr->range_addr_start) {
10162  		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10163  				  tr, &last_boot_fops);
10164  #ifdef CONFIG_TRACER_SNAPSHOT
10165  	} else {
10166  		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10167  				  tr, &snapshot_fops);
10168  #endif
10169  	}
10170  
10171  	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10172  			  tr, &tracing_err_log_fops);
10173  
10174  	for_each_tracing_cpu(cpu)
10175  		tracing_init_tracefs_percpu(tr, cpu);
10176  
10177  	ftrace_init_tracefs(tr, d_tracer);
10178  }
10179  
10180  #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10181  static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10182  {
10183  	struct vfsmount *mnt;
10184  	struct file_system_type *type;
10185  	struct fs_context *fc;
10186  	int ret;
10187  
10188  	/*
10189  	 * To maintain backward compatibility for tools that mount
10190  	 * debugfs to get to the tracing facility, tracefs is automatically
10191  	 * mounted to the debugfs/tracing directory.
10192  	 */
10193  	type = get_fs_type("tracefs");
10194  	if (!type)
10195  		return NULL;
10196  
10197  	fc = fs_context_for_submount(type, mntpt);
10198  	put_filesystem(type);
10199  	if (IS_ERR(fc))
10200  		return ERR_CAST(fc);
10201  
10202  	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10203  
10204  	ret = vfs_parse_fs_string(fc, "source",
10205  				  "tracefs", strlen("tracefs"));
10206  	if (!ret)
10207  		mnt = fc_mount(fc);
10208  	else
10209  		mnt = ERR_PTR(ret);
10210  
10211  	put_fs_context(fc);
10212  	return mnt;
10213  }
10214  #endif
10215  
10216  /**
10217   * tracing_init_dentry - initialize top level trace array
10218   *
10219   * This is called when creating files or directories in the tracing
10220   * directory. It is called via fs_initcall() by any of the boot up code
10221   * and expects to return the dentry of the top level tracing directory.
10222   */
tracing_init_dentry(void)10223  int tracing_init_dentry(void)
10224  {
10225  	struct trace_array *tr = &global_trace;
10226  
10227  	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10228  		pr_warn("Tracing disabled due to lockdown\n");
10229  		return -EPERM;
10230  	}
10231  
10232  	/* The top level trace array uses  NULL as parent */
10233  	if (tr->dir)
10234  		return 0;
10235  
10236  	if (WARN_ON(!tracefs_initialized()))
10237  		return -ENODEV;
10238  
10239  #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10240  	/*
10241  	 * As there may still be users that expect the tracing
10242  	 * files to exist in debugfs/tracing, we must automount
10243  	 * the tracefs file system there, so older tools still
10244  	 * work with the newer kernel.
10245  	 */
10246  	tr->dir = debugfs_create_automount("tracing", NULL,
10247  					   trace_automount, NULL);
10248  #endif
10249  
10250  	return 0;
10251  }
10252  
10253  extern struct trace_eval_map *__start_ftrace_eval_maps[];
10254  extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10255  
10256  static struct workqueue_struct *eval_map_wq __initdata;
10257  static struct work_struct eval_map_work __initdata;
10258  static struct work_struct tracerfs_init_work __initdata;
10259  
eval_map_work_func(struct work_struct * work)10260  static void __init eval_map_work_func(struct work_struct *work)
10261  {
10262  	int len;
10263  
10264  	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10265  	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10266  }
10267  
trace_eval_init(void)10268  static int __init trace_eval_init(void)
10269  {
10270  	INIT_WORK(&eval_map_work, eval_map_work_func);
10271  
10272  	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10273  	if (!eval_map_wq) {
10274  		pr_err("Unable to allocate eval_map_wq\n");
10275  		/* Do work here */
10276  		eval_map_work_func(&eval_map_work);
10277  		return -ENOMEM;
10278  	}
10279  
10280  	queue_work(eval_map_wq, &eval_map_work);
10281  	return 0;
10282  }
10283  
10284  subsys_initcall(trace_eval_init);
10285  
trace_eval_sync(void)10286  static int __init trace_eval_sync(void)
10287  {
10288  	/* Make sure the eval map updates are finished */
10289  	if (eval_map_wq)
10290  		destroy_workqueue(eval_map_wq);
10291  	return 0;
10292  }
10293  
10294  late_initcall_sync(trace_eval_sync);
10295  
10296  
10297  #ifdef CONFIG_MODULES
10298  
module_exists(const char * module)10299  bool module_exists(const char *module)
10300  {
10301  	/* All modules have the symbol __this_module */
10302  	static const char this_mod[] = "__this_module";
10303  	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10304  	unsigned long val;
10305  	int n;
10306  
10307  	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10308  
10309  	if (n > sizeof(modname) - 1)
10310  		return false;
10311  
10312  	val = module_kallsyms_lookup_name(modname);
10313  	return val != 0;
10314  }
10315  
trace_module_add_evals(struct module * mod)10316  static void trace_module_add_evals(struct module *mod)
10317  {
10318  	/*
10319  	 * Modules with bad taint do not have events created, do
10320  	 * not bother with enums either.
10321  	 */
10322  	if (trace_module_has_bad_taint(mod))
10323  		return;
10324  
10325  	/* Even if no trace_evals, this need to sanitize field types. */
10326  	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10327  }
10328  
10329  #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10330  static void trace_module_remove_evals(struct module *mod)
10331  {
10332  	union trace_eval_map_item *map;
10333  	union trace_eval_map_item **last = &trace_eval_maps;
10334  
10335  	if (!mod->num_trace_evals)
10336  		return;
10337  
10338  	guard(mutex)(&trace_eval_mutex);
10339  
10340  	map = trace_eval_maps;
10341  
10342  	while (map) {
10343  		if (map->head.mod == mod)
10344  			break;
10345  		map = trace_eval_jmp_to_tail(map);
10346  		last = &map->tail.next;
10347  		map = map->tail.next;
10348  	}
10349  	if (!map)
10350  		return;
10351  
10352  	*last = trace_eval_jmp_to_tail(map)->tail.next;
10353  	kfree(map);
10354  }
10355  #else
trace_module_remove_evals(struct module * mod)10356  static inline void trace_module_remove_evals(struct module *mod) { }
10357  #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10358  
trace_module_record(struct module * mod,bool add)10359  static void trace_module_record(struct module *mod, bool add)
10360  {
10361  	struct trace_array *tr;
10362  	unsigned long flags;
10363  
10364  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10365  		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10366  		/* Update any persistent trace array that has already been started */
10367  		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10368  			guard(mutex)(&scratch_mutex);
10369  			save_mod(mod, tr);
10370  		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10371  			/* Update delta if the module loaded in previous boot */
10372  			make_mod_delta(mod, tr);
10373  		}
10374  	}
10375  }
10376  
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10377  static int trace_module_notify(struct notifier_block *self,
10378  			       unsigned long val, void *data)
10379  {
10380  	struct module *mod = data;
10381  
10382  	switch (val) {
10383  	case MODULE_STATE_COMING:
10384  		trace_module_add_evals(mod);
10385  		trace_module_record(mod, true);
10386  		break;
10387  	case MODULE_STATE_GOING:
10388  		trace_module_remove_evals(mod);
10389  		trace_module_record(mod, false);
10390  		break;
10391  	}
10392  
10393  	return NOTIFY_OK;
10394  }
10395  
10396  static struct notifier_block trace_module_nb = {
10397  	.notifier_call = trace_module_notify,
10398  	.priority = 0,
10399  };
10400  #endif /* CONFIG_MODULES */
10401  
tracer_init_tracefs_work_func(struct work_struct * work)10402  static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10403  {
10404  
10405  	event_trace_init();
10406  
10407  	init_tracer_tracefs(&global_trace, NULL);
10408  	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10409  
10410  	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10411  			&global_trace, &tracing_thresh_fops);
10412  
10413  	trace_create_file("README", TRACE_MODE_READ, NULL,
10414  			NULL, &tracing_readme_fops);
10415  
10416  	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10417  			NULL, &tracing_saved_cmdlines_fops);
10418  
10419  	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10420  			  NULL, &tracing_saved_cmdlines_size_fops);
10421  
10422  	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10423  			NULL, &tracing_saved_tgids_fops);
10424  
10425  	trace_create_eval_file(NULL);
10426  
10427  #ifdef CONFIG_MODULES
10428  	register_module_notifier(&trace_module_nb);
10429  #endif
10430  
10431  #ifdef CONFIG_DYNAMIC_FTRACE
10432  	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10433  			NULL, &tracing_dyn_info_fops);
10434  #endif
10435  
10436  	create_trace_instances(NULL);
10437  
10438  	update_tracer_options(&global_trace);
10439  }
10440  
tracer_init_tracefs(void)10441  static __init int tracer_init_tracefs(void)
10442  {
10443  	int ret;
10444  
10445  	trace_access_lock_init();
10446  
10447  	ret = tracing_init_dentry();
10448  	if (ret)
10449  		return 0;
10450  
10451  	if (eval_map_wq) {
10452  		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10453  		queue_work(eval_map_wq, &tracerfs_init_work);
10454  	} else {
10455  		tracer_init_tracefs_work_func(NULL);
10456  	}
10457  
10458  	rv_init_interface();
10459  
10460  	return 0;
10461  }
10462  
10463  fs_initcall(tracer_init_tracefs);
10464  
10465  static int trace_die_panic_handler(struct notifier_block *self,
10466  				unsigned long ev, void *unused);
10467  
10468  static struct notifier_block trace_panic_notifier = {
10469  	.notifier_call = trace_die_panic_handler,
10470  	.priority = INT_MAX - 1,
10471  };
10472  
10473  static struct notifier_block trace_die_notifier = {
10474  	.notifier_call = trace_die_panic_handler,
10475  	.priority = INT_MAX - 1,
10476  };
10477  
10478  /*
10479   * The idea is to execute the following die/panic callback early, in order
10480   * to avoid showing irrelevant information in the trace (like other panic
10481   * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10482   * warnings get disabled (to prevent potential log flooding).
10483   */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10484  static int trace_die_panic_handler(struct notifier_block *self,
10485  				unsigned long ev, void *unused)
10486  {
10487  	if (!ftrace_dump_on_oops_enabled())
10488  		return NOTIFY_DONE;
10489  
10490  	/* The die notifier requires DIE_OOPS to trigger */
10491  	if (self == &trace_die_notifier && ev != DIE_OOPS)
10492  		return NOTIFY_DONE;
10493  
10494  	ftrace_dump(DUMP_PARAM);
10495  
10496  	return NOTIFY_DONE;
10497  }
10498  
10499  /*
10500   * printk is set to max of 1024, we really don't need it that big.
10501   * Nothing should be printing 1000 characters anyway.
10502   */
10503  #define TRACE_MAX_PRINT		1000
10504  
10505  /*
10506   * Define here KERN_TRACE so that we have one place to modify
10507   * it if we decide to change what log level the ftrace dump
10508   * should be at.
10509   */
10510  #define KERN_TRACE		KERN_EMERG
10511  
10512  void
trace_printk_seq(struct trace_seq * s)10513  trace_printk_seq(struct trace_seq *s)
10514  {
10515  	/* Probably should print a warning here. */
10516  	if (s->seq.len >= TRACE_MAX_PRINT)
10517  		s->seq.len = TRACE_MAX_PRINT;
10518  
10519  	/*
10520  	 * More paranoid code. Although the buffer size is set to
10521  	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10522  	 * an extra layer of protection.
10523  	 */
10524  	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10525  		s->seq.len = s->seq.size - 1;
10526  
10527  	/* should be zero ended, but we are paranoid. */
10528  	s->buffer[s->seq.len] = 0;
10529  
10530  	printk(KERN_TRACE "%s", s->buffer);
10531  
10532  	trace_seq_init(s);
10533  }
10534  
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10535  static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10536  {
10537  	iter->tr = tr;
10538  	iter->trace = iter->tr->current_trace;
10539  	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10540  	iter->array_buffer = &tr->array_buffer;
10541  
10542  	if (iter->trace && iter->trace->open)
10543  		iter->trace->open(iter);
10544  
10545  	/* Annotate start of buffers if we had overruns */
10546  	if (ring_buffer_overruns(iter->array_buffer->buffer))
10547  		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10548  
10549  	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10550  	if (trace_clocks[iter->tr->clock_id].in_ns)
10551  		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10552  
10553  	/* Can not use kmalloc for iter.temp and iter.fmt */
10554  	iter->temp = static_temp_buf;
10555  	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10556  	iter->fmt = static_fmt_buf;
10557  	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10558  }
10559  
trace_init_global_iter(struct trace_iterator * iter)10560  void trace_init_global_iter(struct trace_iterator *iter)
10561  {
10562  	trace_init_iter(iter, &global_trace);
10563  }
10564  
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10565  static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10566  {
10567  	/* use static because iter can be a bit big for the stack */
10568  	static struct trace_iterator iter;
10569  	unsigned int old_userobj;
10570  	unsigned long flags;
10571  	int cnt = 0;
10572  
10573  	/*
10574  	 * Always turn off tracing when we dump.
10575  	 * We don't need to show trace output of what happens
10576  	 * between multiple crashes.
10577  	 *
10578  	 * If the user does a sysrq-z, then they can re-enable
10579  	 * tracing with echo 1 > tracing_on.
10580  	 */
10581  	tracer_tracing_off(tr);
10582  
10583  	local_irq_save(flags);
10584  
10585  	/* Simulate the iterator */
10586  	trace_init_iter(&iter, tr);
10587  
10588  	/* While dumping, do not allow the buffer to be enable */
10589  	tracer_tracing_disable(tr);
10590  
10591  	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10592  
10593  	/* don't look at user memory in panic mode */
10594  	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10595  
10596  	if (dump_mode == DUMP_ORIG)
10597  		iter.cpu_file = raw_smp_processor_id();
10598  	else
10599  		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10600  
10601  	if (tr == &global_trace)
10602  		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10603  	else
10604  		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10605  
10606  	/* Did function tracer already get disabled? */
10607  	if (ftrace_is_dead()) {
10608  		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10609  		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10610  	}
10611  
10612  	/*
10613  	 * We need to stop all tracing on all CPUS to read
10614  	 * the next buffer. This is a bit expensive, but is
10615  	 * not done often. We fill all what we can read,
10616  	 * and then release the locks again.
10617  	 */
10618  
10619  	while (!trace_empty(&iter)) {
10620  
10621  		if (!cnt)
10622  			printk(KERN_TRACE "---------------------------------\n");
10623  
10624  		cnt++;
10625  
10626  		trace_iterator_reset(&iter);
10627  		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10628  
10629  		if (trace_find_next_entry_inc(&iter) != NULL) {
10630  			int ret;
10631  
10632  			ret = print_trace_line(&iter);
10633  			if (ret != TRACE_TYPE_NO_CONSUME)
10634  				trace_consume(&iter);
10635  		}
10636  		touch_nmi_watchdog();
10637  
10638  		trace_printk_seq(&iter.seq);
10639  	}
10640  
10641  	if (!cnt)
10642  		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10643  	else
10644  		printk(KERN_TRACE "---------------------------------\n");
10645  
10646  	tr->trace_flags |= old_userobj;
10647  
10648  	tracer_tracing_enable(tr);
10649  	local_irq_restore(flags);
10650  }
10651  
ftrace_dump_by_param(void)10652  static void ftrace_dump_by_param(void)
10653  {
10654  	bool first_param = true;
10655  	char dump_param[MAX_TRACER_SIZE];
10656  	char *buf, *token, *inst_name;
10657  	struct trace_array *tr;
10658  
10659  	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10660  	buf = dump_param;
10661  
10662  	while ((token = strsep(&buf, ",")) != NULL) {
10663  		if (first_param) {
10664  			first_param = false;
10665  			if (!strcmp("0", token))
10666  				continue;
10667  			else if (!strcmp("1", token)) {
10668  				ftrace_dump_one(&global_trace, DUMP_ALL);
10669  				continue;
10670  			}
10671  			else if (!strcmp("2", token) ||
10672  			  !strcmp("orig_cpu", token)) {
10673  				ftrace_dump_one(&global_trace, DUMP_ORIG);
10674  				continue;
10675  			}
10676  		}
10677  
10678  		inst_name = strsep(&token, "=");
10679  		tr = trace_array_find(inst_name);
10680  		if (!tr) {
10681  			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10682  			continue;
10683  		}
10684  
10685  		if (token && (!strcmp("2", token) ||
10686  			  !strcmp("orig_cpu", token)))
10687  			ftrace_dump_one(tr, DUMP_ORIG);
10688  		else
10689  			ftrace_dump_one(tr, DUMP_ALL);
10690  	}
10691  }
10692  
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10693  void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10694  {
10695  	static atomic_t dump_running;
10696  
10697  	/* Only allow one dump user at a time. */
10698  	if (atomic_inc_return(&dump_running) != 1) {
10699  		atomic_dec(&dump_running);
10700  		return;
10701  	}
10702  
10703  	switch (oops_dump_mode) {
10704  	case DUMP_ALL:
10705  		ftrace_dump_one(&global_trace, DUMP_ALL);
10706  		break;
10707  	case DUMP_ORIG:
10708  		ftrace_dump_one(&global_trace, DUMP_ORIG);
10709  		break;
10710  	case DUMP_PARAM:
10711  		ftrace_dump_by_param();
10712  		break;
10713  	case DUMP_NONE:
10714  		break;
10715  	default:
10716  		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10717  		ftrace_dump_one(&global_trace, DUMP_ALL);
10718  	}
10719  
10720  	atomic_dec(&dump_running);
10721  }
10722  EXPORT_SYMBOL_GPL(ftrace_dump);
10723  
10724  #define WRITE_BUFSIZE  4096
10725  
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10726  ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10727  				size_t count, loff_t *ppos,
10728  				int (*createfn)(const char *))
10729  {
10730  	char *kbuf __free(kfree) = NULL;
10731  	char *buf, *tmp;
10732  	int ret = 0;
10733  	size_t done = 0;
10734  	size_t size;
10735  
10736  	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10737  	if (!kbuf)
10738  		return -ENOMEM;
10739  
10740  	while (done < count) {
10741  		size = count - done;
10742  
10743  		if (size >= WRITE_BUFSIZE)
10744  			size = WRITE_BUFSIZE - 1;
10745  
10746  		if (copy_from_user(kbuf, buffer + done, size))
10747  			return -EFAULT;
10748  
10749  		kbuf[size] = '\0';
10750  		buf = kbuf;
10751  		do {
10752  			tmp = strchr(buf, '\n');
10753  			if (tmp) {
10754  				*tmp = '\0';
10755  				size = tmp - buf + 1;
10756  			} else {
10757  				size = strlen(buf);
10758  				if (done + size < count) {
10759  					if (buf != kbuf)
10760  						break;
10761  					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10762  					pr_warn("Line length is too long: Should be less than %d\n",
10763  						WRITE_BUFSIZE - 2);
10764  					return -EINVAL;
10765  				}
10766  			}
10767  			done += size;
10768  
10769  			/* Remove comments */
10770  			tmp = strchr(buf, '#');
10771  
10772  			if (tmp)
10773  				*tmp = '\0';
10774  
10775  			ret = createfn(buf);
10776  			if (ret)
10777  				return ret;
10778  			buf += size;
10779  
10780  		} while (done < count);
10781  	}
10782  	return done;
10783  }
10784  
10785  #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10786  __init static bool tr_needs_alloc_snapshot(const char *name)
10787  {
10788  	char *test;
10789  	int len = strlen(name);
10790  	bool ret;
10791  
10792  	if (!boot_snapshot_index)
10793  		return false;
10794  
10795  	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10796  	    boot_snapshot_info[len] == '\t')
10797  		return true;
10798  
10799  	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10800  	if (!test)
10801  		return false;
10802  
10803  	sprintf(test, "\t%s\t", name);
10804  	ret = strstr(boot_snapshot_info, test) == NULL;
10805  	kfree(test);
10806  	return ret;
10807  }
10808  
do_allocate_snapshot(const char * name)10809  __init static void do_allocate_snapshot(const char *name)
10810  {
10811  	if (!tr_needs_alloc_snapshot(name))
10812  		return;
10813  
10814  	/*
10815  	 * When allocate_snapshot is set, the next call to
10816  	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10817  	 * will allocate the snapshot buffer. That will alse clear
10818  	 * this flag.
10819  	 */
10820  	allocate_snapshot = true;
10821  }
10822  #else
do_allocate_snapshot(const char * name)10823  static inline void do_allocate_snapshot(const char *name) { }
10824  #endif
10825  
enable_instances(void)10826  __init static void enable_instances(void)
10827  {
10828  	struct trace_array *tr;
10829  	bool memmap_area = false;
10830  	char *curr_str;
10831  	char *name;
10832  	char *str;
10833  	char *tok;
10834  
10835  	/* A tab is always appended */
10836  	boot_instance_info[boot_instance_index - 1] = '\0';
10837  	str = boot_instance_info;
10838  
10839  	while ((curr_str = strsep(&str, "\t"))) {
10840  		phys_addr_t start = 0;
10841  		phys_addr_t size = 0;
10842  		unsigned long addr = 0;
10843  		bool traceprintk = false;
10844  		bool traceoff = false;
10845  		char *flag_delim;
10846  		char *addr_delim;
10847  		char *rname __free(kfree) = NULL;
10848  
10849  		tok = strsep(&curr_str, ",");
10850  
10851  		flag_delim = strchr(tok, '^');
10852  		addr_delim = strchr(tok, '@');
10853  
10854  		if (addr_delim)
10855  			*addr_delim++ = '\0';
10856  
10857  		if (flag_delim)
10858  			*flag_delim++ = '\0';
10859  
10860  		name = tok;
10861  
10862  		if (flag_delim) {
10863  			char *flag;
10864  
10865  			while ((flag = strsep(&flag_delim, "^"))) {
10866  				if (strcmp(flag, "traceoff") == 0) {
10867  					traceoff = true;
10868  				} else if ((strcmp(flag, "printk") == 0) ||
10869  					   (strcmp(flag, "traceprintk") == 0) ||
10870  					   (strcmp(flag, "trace_printk") == 0)) {
10871  					traceprintk = true;
10872  				} else {
10873  					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10874  						flag, name);
10875  				}
10876  			}
10877  		}
10878  
10879  		tok = addr_delim;
10880  		if (tok && isdigit(*tok)) {
10881  			start = memparse(tok, &tok);
10882  			if (!start) {
10883  				pr_warn("Tracing: Invalid boot instance address for %s\n",
10884  					name);
10885  				continue;
10886  			}
10887  			if (*tok != ':') {
10888  				pr_warn("Tracing: No size specified for instance %s\n", name);
10889  				continue;
10890  			}
10891  			tok++;
10892  			size = memparse(tok, &tok);
10893  			if (!size) {
10894  				pr_warn("Tracing: Invalid boot instance size for %s\n",
10895  					name);
10896  				continue;
10897  			}
10898  			memmap_area = true;
10899  		} else if (tok) {
10900  			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10901  				start = 0;
10902  				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10903  				continue;
10904  			}
10905  			rname = kstrdup(tok, GFP_KERNEL);
10906  		}
10907  
10908  		if (start) {
10909  			/* Start and size must be page aligned */
10910  			if (start & ~PAGE_MASK) {
10911  				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10912  				continue;
10913  			}
10914  			if (size & ~PAGE_MASK) {
10915  				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10916  				continue;
10917  			}
10918  
10919  			if (memmap_area)
10920  				addr = map_pages(start, size);
10921  			else
10922  				addr = (unsigned long)phys_to_virt(start);
10923  			if (addr) {
10924  				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10925  					name, &start, (unsigned long)size);
10926  			} else {
10927  				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10928  				continue;
10929  			}
10930  		} else {
10931  			/* Only non mapped buffers have snapshot buffers */
10932  			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10933  				do_allocate_snapshot(name);
10934  		}
10935  
10936  		tr = trace_array_create_systems(name, NULL, addr, size);
10937  		if (IS_ERR(tr)) {
10938  			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10939  			continue;
10940  		}
10941  
10942  		if (traceoff)
10943  			tracer_tracing_off(tr);
10944  
10945  		if (traceprintk)
10946  			update_printk_trace(tr);
10947  
10948  		/*
10949  		 * memmap'd buffers can not be freed.
10950  		 */
10951  		if (memmap_area) {
10952  			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10953  			tr->ref++;
10954  		}
10955  
10956  		if (start) {
10957  			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10958  			tr->range_name = no_free_ptr(rname);
10959  		}
10960  
10961  		while ((tok = strsep(&curr_str, ","))) {
10962  			early_enable_events(tr, tok, true);
10963  		}
10964  	}
10965  }
10966  
tracer_alloc_buffers(void)10967  __init static int tracer_alloc_buffers(void)
10968  {
10969  	int ring_buf_size;
10970  	int ret = -ENOMEM;
10971  
10972  
10973  	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10974  		pr_warn("Tracing disabled due to lockdown\n");
10975  		return -EPERM;
10976  	}
10977  
10978  	/*
10979  	 * Make sure we don't accidentally add more trace options
10980  	 * than we have bits for.
10981  	 */
10982  	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10983  
10984  	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10985  		return -ENOMEM;
10986  
10987  	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10988  		goto out_free_buffer_mask;
10989  
10990  	/* Only allocate trace_printk buffers if a trace_printk exists */
10991  	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10992  		/* Must be called before global_trace.buffer is allocated */
10993  		trace_printk_init_buffers();
10994  
10995  	/* To save memory, keep the ring buffer size to its minimum */
10996  	if (global_trace.ring_buffer_expanded)
10997  		ring_buf_size = trace_buf_size;
10998  	else
10999  		ring_buf_size = 1;
11000  
11001  	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11002  	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11003  
11004  	raw_spin_lock_init(&global_trace.start_lock);
11005  
11006  	/*
11007  	 * The prepare callbacks allocates some memory for the ring buffer. We
11008  	 * don't free the buffer if the CPU goes down. If we were to free
11009  	 * the buffer, then the user would lose any trace that was in the
11010  	 * buffer. The memory will be removed once the "instance" is removed.
11011  	 */
11012  	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11013  				      "trace/RB:prepare", trace_rb_cpu_prepare,
11014  				      NULL);
11015  	if (ret < 0)
11016  		goto out_free_cpumask;
11017  	/* Used for event triggers */
11018  	ret = -ENOMEM;
11019  	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11020  	if (!temp_buffer)
11021  		goto out_rm_hp_state;
11022  
11023  	if (trace_create_savedcmd() < 0)
11024  		goto out_free_temp_buffer;
11025  
11026  	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11027  		goto out_free_savedcmd;
11028  
11029  	/* TODO: make the number of buffers hot pluggable with CPUS */
11030  	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11031  		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11032  		goto out_free_pipe_cpumask;
11033  	}
11034  	if (global_trace.buffer_disabled)
11035  		tracing_off();
11036  
11037  	if (trace_boot_clock) {
11038  		ret = tracing_set_clock(&global_trace, trace_boot_clock);
11039  		if (ret < 0)
11040  			pr_warn("Trace clock %s not defined, going back to default\n",
11041  				trace_boot_clock);
11042  	}
11043  
11044  	/*
11045  	 * register_tracer() might reference current_trace, so it
11046  	 * needs to be set before we register anything. This is
11047  	 * just a bootstrap of current_trace anyway.
11048  	 */
11049  	global_trace.current_trace = &nop_trace;
11050  
11051  	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11052  #ifdef CONFIG_TRACER_MAX_TRACE
11053  	spin_lock_init(&global_trace.snapshot_trigger_lock);
11054  #endif
11055  	ftrace_init_global_array_ops(&global_trace);
11056  
11057  #ifdef CONFIG_MODULES
11058  	INIT_LIST_HEAD(&global_trace.mod_events);
11059  #endif
11060  
11061  	init_trace_flags_index(&global_trace);
11062  
11063  	register_tracer(&nop_trace);
11064  
11065  	/* Function tracing may start here (via kernel command line) */
11066  	init_function_trace();
11067  
11068  	/* All seems OK, enable tracing */
11069  	tracing_disabled = 0;
11070  
11071  	atomic_notifier_chain_register(&panic_notifier_list,
11072  				       &trace_panic_notifier);
11073  
11074  	register_die_notifier(&trace_die_notifier);
11075  
11076  	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11077  
11078  	INIT_LIST_HEAD(&global_trace.systems);
11079  	INIT_LIST_HEAD(&global_trace.events);
11080  	INIT_LIST_HEAD(&global_trace.hist_vars);
11081  	INIT_LIST_HEAD(&global_trace.err_log);
11082  	list_add(&global_trace.marker_list, &marker_copies);
11083  	list_add(&global_trace.list, &ftrace_trace_arrays);
11084  
11085  	apply_trace_boot_options();
11086  
11087  	register_snapshot_cmd();
11088  
11089  	return 0;
11090  
11091  out_free_pipe_cpumask:
11092  	free_cpumask_var(global_trace.pipe_cpumask);
11093  out_free_savedcmd:
11094  	trace_free_saved_cmdlines_buffer();
11095  out_free_temp_buffer:
11096  	ring_buffer_free(temp_buffer);
11097  out_rm_hp_state:
11098  	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11099  out_free_cpumask:
11100  	free_cpumask_var(global_trace.tracing_cpumask);
11101  out_free_buffer_mask:
11102  	free_cpumask_var(tracing_buffer_mask);
11103  	return ret;
11104  }
11105  
11106  #ifdef CONFIG_FUNCTION_TRACER
11107  /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11108  __init struct trace_array *trace_get_global_array(void)
11109  {
11110  	return &global_trace;
11111  }
11112  #endif
11113  
ftrace_boot_snapshot(void)11114  void __init ftrace_boot_snapshot(void)
11115  {
11116  #ifdef CONFIG_TRACER_MAX_TRACE
11117  	struct trace_array *tr;
11118  
11119  	if (!snapshot_at_boot)
11120  		return;
11121  
11122  	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11123  		if (!tr->allocated_snapshot)
11124  			continue;
11125  
11126  		tracing_snapshot_instance(tr);
11127  		trace_array_puts(tr, "** Boot snapshot taken **\n");
11128  	}
11129  #endif
11130  }
11131  
early_trace_init(void)11132  void __init early_trace_init(void)
11133  {
11134  	if (tracepoint_printk) {
11135  		tracepoint_print_iter =
11136  			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11137  		if (MEM_FAIL(!tracepoint_print_iter,
11138  			     "Failed to allocate trace iterator\n"))
11139  			tracepoint_printk = 0;
11140  		else
11141  			static_key_enable(&tracepoint_printk_key.key);
11142  	}
11143  	tracer_alloc_buffers();
11144  
11145  	init_events();
11146  }
11147  
trace_init(void)11148  void __init trace_init(void)
11149  {
11150  	trace_event_init();
11151  
11152  	if (boot_instance_index)
11153  		enable_instances();
11154  }
11155  
clear_boot_tracer(void)11156  __init static void clear_boot_tracer(void)
11157  {
11158  	/*
11159  	 * The default tracer at boot buffer is an init section.
11160  	 * This function is called in lateinit. If we did not
11161  	 * find the boot tracer, then clear it out, to prevent
11162  	 * later registration from accessing the buffer that is
11163  	 * about to be freed.
11164  	 */
11165  	if (!default_bootup_tracer)
11166  		return;
11167  
11168  	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11169  	       default_bootup_tracer);
11170  	default_bootup_tracer = NULL;
11171  }
11172  
11173  #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11174  __init static void tracing_set_default_clock(void)
11175  {
11176  	/* sched_clock_stable() is determined in late_initcall */
11177  	if (!trace_boot_clock && !sched_clock_stable()) {
11178  		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11179  			pr_warn("Can not set tracing clock due to lockdown\n");
11180  			return;
11181  		}
11182  
11183  		printk(KERN_WARNING
11184  		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11185  		       "If you want to keep using the local clock, then add:\n"
11186  		       "  \"trace_clock=local\"\n"
11187  		       "on the kernel command line\n");
11188  		tracing_set_clock(&global_trace, "global");
11189  	}
11190  }
11191  #else
tracing_set_default_clock(void)11192  static inline void tracing_set_default_clock(void) { }
11193  #endif
11194  
late_trace_init(void)11195  __init static int late_trace_init(void)
11196  {
11197  	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11198  		static_key_disable(&tracepoint_printk_key.key);
11199  		tracepoint_printk = 0;
11200  	}
11201  
11202  	if (traceoff_after_boot)
11203  		tracing_off();
11204  
11205  	tracing_set_default_clock();
11206  	clear_boot_tracer();
11207  	return 0;
11208  }
11209  
11210  late_initcall_sync(late_trace_init);
11211