1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * OS Noise Tracer: computes the OS Noise suffered by a running thread.
4  * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
5  *
6  * Based on "hwlat_detector" tracer by:
7  *   Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
8  *   Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
9  *   With feedback from Clark Williams <williams@redhat.com>
10  *
11  * And also based on the rtsl tracer presented on:
12  *  DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
13  *  scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
14  *  (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
15  *
16  * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
17  */
18 
19 #include <linux/kthread.h>
20 #include <linux/tracefs.h>
21 #include <linux/uaccess.h>
22 #include <linux/cpumask.h>
23 #include <linux/delay.h>
24 #include <linux/sched/clock.h>
25 #include <uapi/linux/sched/types.h>
26 #include <linux/sched.h>
27 #include "trace.h"
28 
29 #ifdef CONFIG_X86_LOCAL_APIC
30 #include <asm/trace/irq_vectors.h>
31 #undef TRACE_INCLUDE_PATH
32 #undef TRACE_INCLUDE_FILE
33 #endif /* CONFIG_X86_LOCAL_APIC */
34 
35 #include <trace/events/irq.h>
36 #include <trace/events/sched.h>
37 
38 #define CREATE_TRACE_POINTS
39 #include <trace/events/osnoise.h>
40 
41 /*
42  * Default values.
43  */
44 #define BANNER			"osnoise: "
45 #define DEFAULT_SAMPLE_PERIOD	1000000			/* 1s */
46 #define DEFAULT_SAMPLE_RUNTIME	1000000			/* 1s */
47 
48 #define DEFAULT_TIMERLAT_PERIOD	1000			/* 1ms */
49 #define DEFAULT_TIMERLAT_PRIO	95			/* FIFO 95 */
50 
51 /*
52  * osnoise/options entries.
53  */
54 enum osnoise_options_index {
55 	OSN_DEFAULTS = 0,
56 	OSN_WORKLOAD,
57 	OSN_PANIC_ON_STOP,
58 	OSN_PREEMPT_DISABLE,
59 	OSN_IRQ_DISABLE,
60 	OSN_MAX
61 };
62 
63 static const char * const osnoise_options_str[OSN_MAX] = {
64 							"DEFAULTS",
65 							"OSNOISE_WORKLOAD",
66 							"PANIC_ON_STOP",
67 							"OSNOISE_PREEMPT_DISABLE",
68 							"OSNOISE_IRQ_DISABLE" };
69 
70 #define OSN_DEFAULT_OPTIONS		0x2
71 static unsigned long osnoise_options	= OSN_DEFAULT_OPTIONS;
72 
73 /*
74  * trace_array of the enabled osnoise/timerlat instances.
75  */
76 struct osnoise_instance {
77 	struct list_head	list;
78 	struct trace_array	*tr;
79 };
80 
81 static struct list_head osnoise_instances;
82 
83 static bool osnoise_has_registered_instances(void)
84 {
85 	return !!list_first_or_null_rcu(&osnoise_instances,
86 					struct osnoise_instance,
87 					list);
88 }
89 
90 /*
91  * osnoise_instance_registered - check if a tr is already registered
92  */
93 static int osnoise_instance_registered(struct trace_array *tr)
94 {
95 	struct osnoise_instance *inst;
96 	int found = 0;
97 
98 	rcu_read_lock();
99 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
100 		if (inst->tr == tr)
101 			found = 1;
102 	}
103 	rcu_read_unlock();
104 
105 	return found;
106 }
107 
108 /*
109  * osnoise_register_instance - register a new trace instance
110  *
111  * Register a trace_array *tr in the list of instances running
112  * osnoise/timerlat tracers.
113  */
114 static int osnoise_register_instance(struct trace_array *tr)
115 {
116 	struct osnoise_instance *inst;
117 
118 	/*
119 	 * register/unregister serialization is provided by trace's
120 	 * trace_types_lock.
121 	 */
122 	lockdep_assert_held(&trace_types_lock);
123 
124 	inst = kmalloc(sizeof(*inst), GFP_KERNEL);
125 	if (!inst)
126 		return -ENOMEM;
127 
128 	INIT_LIST_HEAD_RCU(&inst->list);
129 	inst->tr = tr;
130 	list_add_tail_rcu(&inst->list, &osnoise_instances);
131 
132 	return 0;
133 }
134 
135 /*
136  *  osnoise_unregister_instance - unregister a registered trace instance
137  *
138  * Remove the trace_array *tr from the list of instances running
139  * osnoise/timerlat tracers.
140  */
141 static void osnoise_unregister_instance(struct trace_array *tr)
142 {
143 	struct osnoise_instance *inst;
144 	int found = 0;
145 
146 	/*
147 	 * register/unregister serialization is provided by trace's
148 	 * trace_types_lock.
149 	 */
150 	list_for_each_entry_rcu(inst, &osnoise_instances, list,
151 				lockdep_is_held(&trace_types_lock)) {
152 		if (inst->tr == tr) {
153 			list_del_rcu(&inst->list);
154 			found = 1;
155 			break;
156 		}
157 	}
158 
159 	if (!found)
160 		return;
161 
162 	kvfree_rcu_mightsleep(inst);
163 }
164 
165 /*
166  * NMI runtime info.
167  */
168 struct osn_nmi {
169 	u64	count;
170 	u64	delta_start;
171 };
172 
173 /*
174  * IRQ runtime info.
175  */
176 struct osn_irq {
177 	u64	count;
178 	u64	arrival_time;
179 	u64	delta_start;
180 };
181 
182 #define IRQ_CONTEXT	0
183 #define THREAD_CONTEXT	1
184 #define THREAD_URET	2
185 /*
186  * sofirq runtime info.
187  */
188 struct osn_softirq {
189 	u64	count;
190 	u64	arrival_time;
191 	u64	delta_start;
192 };
193 
194 /*
195  * thread runtime info.
196  */
197 struct osn_thread {
198 	u64	count;
199 	u64	arrival_time;
200 	u64	delta_start;
201 };
202 
203 /*
204  * Runtime information: this structure saves the runtime information used by
205  * one sampling thread.
206  */
207 struct osnoise_variables {
208 	struct task_struct	*kthread;
209 	bool			sampling;
210 	pid_t			pid;
211 	struct osn_nmi		nmi;
212 	struct osn_irq		irq;
213 	struct osn_softirq	softirq;
214 	struct osn_thread	thread;
215 	local_t			int_counter;
216 };
217 
218 /*
219  * Per-cpu runtime information.
220  */
221 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
222 
223 /*
224  * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
225  */
226 static inline struct osnoise_variables *this_cpu_osn_var(void)
227 {
228 	return this_cpu_ptr(&per_cpu_osnoise_var);
229 }
230 
231 /*
232  * Protect the interface.
233  */
234 static struct mutex interface_lock;
235 
236 #ifdef CONFIG_TIMERLAT_TRACER
237 /*
238  * Runtime information for the timer mode.
239  */
240 struct timerlat_variables {
241 	struct task_struct	*kthread;
242 	struct hrtimer		timer;
243 	u64			rel_period;
244 	u64			abs_period;
245 	bool			tracing_thread;
246 	u64			count;
247 	bool			uthread_migrate;
248 };
249 
250 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
251 
252 /*
253  * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
254  */
255 static inline struct timerlat_variables *this_cpu_tmr_var(void)
256 {
257 	return this_cpu_ptr(&per_cpu_timerlat_var);
258 }
259 
260 /*
261  * tlat_var_reset - Reset the values of the given timerlat_variables
262  */
263 static inline void tlat_var_reset(void)
264 {
265 	struct timerlat_variables *tlat_var;
266 	int cpu;
267 
268 	/* Synchronize with the timerlat interfaces */
269 	mutex_lock(&interface_lock);
270 	/*
271 	 * So far, all the values are initialized as 0, so
272 	 * zeroing the structure is perfect.
273 	 */
274 	for_each_cpu(cpu, cpu_online_mask) {
275 		tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
276 		if (tlat_var->kthread)
277 			hrtimer_cancel(&tlat_var->timer);
278 		memset(tlat_var, 0, sizeof(*tlat_var));
279 	}
280 	mutex_unlock(&interface_lock);
281 }
282 #else /* CONFIG_TIMERLAT_TRACER */
283 #define tlat_var_reset()	do {} while (0)
284 #endif /* CONFIG_TIMERLAT_TRACER */
285 
286 /*
287  * osn_var_reset - Reset the values of the given osnoise_variables
288  */
289 static inline void osn_var_reset(void)
290 {
291 	struct osnoise_variables *osn_var;
292 	int cpu;
293 
294 	/*
295 	 * So far, all the values are initialized as 0, so
296 	 * zeroing the structure is perfect.
297 	 */
298 	for_each_cpu(cpu, cpu_online_mask) {
299 		osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
300 		memset(osn_var, 0, sizeof(*osn_var));
301 	}
302 }
303 
304 /*
305  * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
306  */
307 static inline void osn_var_reset_all(void)
308 {
309 	osn_var_reset();
310 	tlat_var_reset();
311 }
312 
313 /*
314  * Tells NMIs to call back to the osnoise tracer to record timestamps.
315  */
316 bool trace_osnoise_callback_enabled;
317 
318 /*
319  * Tracer data.
320  */
321 static struct osnoise_data {
322 	u64	sample_period;		/* total sampling period */
323 	u64	sample_runtime;		/* active sampling portion of period */
324 	u64	stop_tracing;		/* stop trace in the internal operation (loop/irq) */
325 	u64	stop_tracing_total;	/* stop trace in the final operation (report/thread) */
326 #ifdef CONFIG_TIMERLAT_TRACER
327 	u64	timerlat_period;	/* timerlat period */
328 	u64	print_stack;		/* print IRQ stack if total > */
329 	int	timerlat_tracer;	/* timerlat tracer */
330 #endif
331 	bool	tainted;		/* infor users and developers about a problem */
332 } osnoise_data = {
333 	.sample_period			= DEFAULT_SAMPLE_PERIOD,
334 	.sample_runtime			= DEFAULT_SAMPLE_RUNTIME,
335 	.stop_tracing			= 0,
336 	.stop_tracing_total		= 0,
337 #ifdef CONFIG_TIMERLAT_TRACER
338 	.print_stack			= 0,
339 	.timerlat_period		= DEFAULT_TIMERLAT_PERIOD,
340 	.timerlat_tracer		= 0,
341 #endif
342 };
343 
344 #ifdef CONFIG_TIMERLAT_TRACER
345 static inline bool timerlat_enabled(void)
346 {
347 	return osnoise_data.timerlat_tracer;
348 }
349 
350 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
351 {
352 	struct timerlat_variables *tlat_var = this_cpu_tmr_var();
353 	/*
354 	 * If the timerlat is enabled, but the irq handler did
355 	 * not run yet enabling timerlat_tracer, do not trace.
356 	 */
357 	if (!tlat_var->tracing_thread) {
358 		osn_var->softirq.arrival_time = 0;
359 		osn_var->softirq.delta_start = 0;
360 		return 0;
361 	}
362 	return 1;
363 }
364 
365 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
366 {
367 	struct timerlat_variables *tlat_var = this_cpu_tmr_var();
368 	/*
369 	 * If the timerlat is enabled, but the irq handler did
370 	 * not run yet enabling timerlat_tracer, do not trace.
371 	 */
372 	if (!tlat_var->tracing_thread) {
373 		osn_var->thread.delta_start = 0;
374 		osn_var->thread.arrival_time = 0;
375 		return 0;
376 	}
377 	return 1;
378 }
379 #else /* CONFIG_TIMERLAT_TRACER */
380 static inline bool timerlat_enabled(void)
381 {
382 	return false;
383 }
384 
385 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
386 {
387 	return 1;
388 }
389 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
390 {
391 	return 1;
392 }
393 #endif
394 
395 #ifdef CONFIG_PREEMPT_RT
396 /*
397  * Print the osnoise header info.
398  */
399 static void print_osnoise_headers(struct seq_file *s)
400 {
401 	if (osnoise_data.tainted)
402 		seq_puts(s, "# osnoise is tainted!\n");
403 
404 	seq_puts(s, "#                                _-------=> irqs-off\n");
405 	seq_puts(s, "#                               / _------=> need-resched\n");
406 	seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
407 	seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
408 	seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
409 	seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
410 	seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
411 
412 	seq_puts(s, "#                              |||||| /          ");
413 	seq_puts(s, "                                     MAX\n");
414 
415 	seq_puts(s, "#                              ||||| /                         ");
416 	seq_puts(s, "                    SINGLE      Interference counters:\n");
417 
418 	seq_puts(s, "#                              |||||||               RUNTIME   ");
419 	seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
420 
421 	seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    IN US    ");
422 	seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
423 
424 	seq_puts(s, "#              | |         |   |||||||      |           |      ");
425 	seq_puts(s, "       |    |            |      |      |      |      |      |\n");
426 }
427 #else /* CONFIG_PREEMPT_RT */
428 static void print_osnoise_headers(struct seq_file *s)
429 {
430 	if (osnoise_data.tainted)
431 		seq_puts(s, "# osnoise is tainted!\n");
432 
433 	seq_puts(s, "#                                _-----=> irqs-off\n");
434 	seq_puts(s, "#                               / _----=> need-resched\n");
435 	seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
436 	seq_puts(s, "#                              || / _--=> preempt-depth\n");
437 	seq_puts(s, "#                              ||| / _-=> migrate-disable     ");
438 	seq_puts(s, "                    MAX\n");
439 	seq_puts(s, "#                              |||| /     delay               ");
440 	seq_puts(s, "                    SINGLE      Interference counters:\n");
441 
442 	seq_puts(s, "#                              |||||               RUNTIME   ");
443 	seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
444 
445 	seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP    IN US    ");
446 	seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
447 
448 	seq_puts(s, "#              | |         |   |||||      |           |      ");
449 	seq_puts(s, "       |    |            |      |      |      |      |      |\n");
450 }
451 #endif /* CONFIG_PREEMPT_RT */
452 
453 /*
454  * osnoise_taint - report an osnoise error.
455  */
456 #define osnoise_taint(msg) ({							\
457 	struct osnoise_instance *inst;						\
458 	struct trace_buffer *buffer;						\
459 										\
460 	rcu_read_lock();							\
461 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {		\
462 		buffer = inst->tr->array_buffer.buffer;				\
463 		trace_array_printk_buf(buffer, _THIS_IP_, msg);			\
464 	}									\
465 	rcu_read_unlock();							\
466 	osnoise_data.tainted = true;						\
467 })
468 
469 /*
470  * Record an osnoise_sample into the tracer buffer.
471  */
472 static void
473 __record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
474 {
475 	struct ring_buffer_event *event;
476 	struct osnoise_entry *entry;
477 
478 	event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry),
479 					  tracing_gen_ctx());
480 	if (!event)
481 		return;
482 	entry	= ring_buffer_event_data(event);
483 	entry->runtime		= sample->runtime;
484 	entry->noise		= sample->noise;
485 	entry->max_sample	= sample->max_sample;
486 	entry->hw_count		= sample->hw_count;
487 	entry->nmi_count	= sample->nmi_count;
488 	entry->irq_count	= sample->irq_count;
489 	entry->softirq_count	= sample->softirq_count;
490 	entry->thread_count	= sample->thread_count;
491 
492 	trace_buffer_unlock_commit_nostack(buffer, event);
493 }
494 
495 /*
496  * Record an osnoise_sample on all osnoise instances and fire trace event.
497  */
498 static void record_osnoise_sample(struct osnoise_sample *sample)
499 {
500 	struct osnoise_instance *inst;
501 	struct trace_buffer *buffer;
502 
503 	trace_osnoise_sample(sample);
504 
505 	rcu_read_lock();
506 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
507 		buffer = inst->tr->array_buffer.buffer;
508 		__record_osnoise_sample(sample, buffer);
509 	}
510 	rcu_read_unlock();
511 }
512 
513 #ifdef CONFIG_TIMERLAT_TRACER
514 /*
515  * Print the timerlat header info.
516  */
517 #ifdef CONFIG_PREEMPT_RT
518 static void print_timerlat_headers(struct seq_file *s)
519 {
520 	seq_puts(s, "#                                _-------=> irqs-off\n");
521 	seq_puts(s, "#                               / _------=> need-resched\n");
522 	seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
523 	seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
524 	seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
525 	seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
526 	seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
527 	seq_puts(s, "#                              |||||| /\n");
528 	seq_puts(s, "#                              |||||||             ACTIVATION\n");
529 	seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    ID     ");
530 	seq_puts(s, "       CONTEXT                LATENCY\n");
531 	seq_puts(s, "#              | |         |   |||||||      |         |      ");
532 	seq_puts(s, "            |                       |\n");
533 }
534 #else /* CONFIG_PREEMPT_RT */
535 static void print_timerlat_headers(struct seq_file *s)
536 {
537 	seq_puts(s, "#                                _-----=> irqs-off\n");
538 	seq_puts(s, "#                               / _----=> need-resched\n");
539 	seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
540 	seq_puts(s, "#                              || / _--=> preempt-depth\n");
541 	seq_puts(s, "#                              ||| / _-=> migrate-disable\n");
542 	seq_puts(s, "#                              |||| /     delay\n");
543 	seq_puts(s, "#                              |||||            ACTIVATION\n");
544 	seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP   ID      ");
545 	seq_puts(s, "      CONTEXT                 LATENCY\n");
546 	seq_puts(s, "#              | |         |   |||||      |         |      ");
547 	seq_puts(s, "            |                       |\n");
548 }
549 #endif /* CONFIG_PREEMPT_RT */
550 
551 static void
552 __record_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
553 {
554 	struct ring_buffer_event *event;
555 	struct timerlat_entry *entry;
556 
557 	event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry),
558 					  tracing_gen_ctx());
559 	if (!event)
560 		return;
561 	entry	= ring_buffer_event_data(event);
562 	entry->seqnum			= sample->seqnum;
563 	entry->context			= sample->context;
564 	entry->timer_latency		= sample->timer_latency;
565 
566 	trace_buffer_unlock_commit_nostack(buffer, event);
567 }
568 
569 /*
570  * Record an timerlat_sample into the tracer buffer.
571  */
572 static void record_timerlat_sample(struct timerlat_sample *sample)
573 {
574 	struct osnoise_instance *inst;
575 	struct trace_buffer *buffer;
576 
577 	trace_timerlat_sample(sample);
578 
579 	rcu_read_lock();
580 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
581 		buffer = inst->tr->array_buffer.buffer;
582 		__record_timerlat_sample(sample, buffer);
583 	}
584 	rcu_read_unlock();
585 }
586 
587 #ifdef CONFIG_STACKTRACE
588 
589 #define	MAX_CALLS	256
590 
591 /*
592  * Stack trace will take place only at IRQ level, so, no need
593  * to control nesting here.
594  */
595 struct trace_stack {
596 	int		stack_size;
597 	int		nr_entries;
598 	unsigned long	calls[MAX_CALLS];
599 };
600 
601 static DEFINE_PER_CPU(struct trace_stack, trace_stack);
602 
603 /*
604  * timerlat_save_stack - save a stack trace without printing
605  *
606  * Save the current stack trace without printing. The
607  * stack will be printed later, after the end of the measurement.
608  */
609 static void timerlat_save_stack(int skip)
610 {
611 	unsigned int size, nr_entries;
612 	struct trace_stack *fstack;
613 
614 	fstack = this_cpu_ptr(&trace_stack);
615 
616 	size = ARRAY_SIZE(fstack->calls);
617 
618 	nr_entries = stack_trace_save(fstack->calls, size, skip);
619 
620 	fstack->stack_size = nr_entries * sizeof(unsigned long);
621 	fstack->nr_entries = nr_entries;
622 
623 	return;
624 
625 }
626 
627 static void
628 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
629 {
630 	struct ring_buffer_event *event;
631 	struct stack_entry *entry;
632 
633 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
634 					  tracing_gen_ctx());
635 	if (!event)
636 		return;
637 
638 	entry = ring_buffer_event_data(event);
639 
640 	memcpy(&entry->caller, fstack->calls, size);
641 	entry->size = fstack->nr_entries;
642 
643 	trace_buffer_unlock_commit_nostack(buffer, event);
644 }
645 
646 /*
647  * timerlat_dump_stack - dump a stack trace previously saved
648  */
649 static void timerlat_dump_stack(u64 latency)
650 {
651 	struct osnoise_instance *inst;
652 	struct trace_buffer *buffer;
653 	struct trace_stack *fstack;
654 	unsigned int size;
655 
656 	/*
657 	 * trace only if latency > print_stack config, if enabled.
658 	 */
659 	if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
660 		return;
661 
662 	preempt_disable_notrace();
663 	fstack = this_cpu_ptr(&trace_stack);
664 	size = fstack->stack_size;
665 
666 	rcu_read_lock();
667 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
668 		buffer = inst->tr->array_buffer.buffer;
669 		__timerlat_dump_stack(buffer, fstack, size);
670 
671 	}
672 	rcu_read_unlock();
673 	preempt_enable_notrace();
674 }
675 #else /* CONFIG_STACKTRACE */
676 #define timerlat_dump_stack(u64 latency) do {} while (0)
677 #define timerlat_save_stack(a) do {} while (0)
678 #endif /* CONFIG_STACKTRACE */
679 #endif /* CONFIG_TIMERLAT_TRACER */
680 
681 /*
682  * Macros to encapsulate the time capturing infrastructure.
683  */
684 #define time_get()	trace_clock_local()
685 #define time_to_us(x)	div_u64(x, 1000)
686 #define time_sub(a, b)	((a) - (b))
687 
688 /*
689  * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
690  *
691  * If an IRQ is preempted by an NMI, its delta_start is pushed forward
692  * to discount the NMI interference.
693  *
694  * See get_int_safe_duration().
695  */
696 static inline void
697 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
698 {
699 	if (osn_var->irq.delta_start)
700 		osn_var->irq.delta_start += duration;
701 }
702 
703 #ifndef CONFIG_PREEMPT_RT
704 /*
705  * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
706  *
707  * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
708  * forward to discount the interference.
709  *
710  * See get_int_safe_duration().
711  */
712 static inline void
713 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
714 {
715 	if (osn_var->softirq.delta_start)
716 		osn_var->softirq.delta_start += duration;
717 }
718 #else /* CONFIG_PREEMPT_RT */
719 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
720 #endif
721 
722 /*
723  * cond_move_thread_delta_start - Forward the delta_start of a running thread
724  *
725  * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
726  * is pushed forward to discount the interference.
727  *
728  * See get_int_safe_duration().
729  */
730 static inline void
731 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
732 {
733 	if (osn_var->thread.delta_start)
734 		osn_var->thread.delta_start += duration;
735 }
736 
737 /*
738  * get_int_safe_duration - Get the duration of a window
739  *
740  * The irq, softirq and thread varaibles need to have its duration without
741  * the interference from higher priority interrupts. Instead of keeping a
742  * variable to discount the interrupt interference from these variables, the
743  * starting time of these variables are pushed forward with the interrupt's
744  * duration. In this way, a single variable is used to:
745  *
746  *   - Know if a given window is being measured.
747  *   - Account its duration.
748  *   - Discount the interference.
749  *
750  * To avoid getting inconsistent values, e.g.,:
751  *
752  *	now = time_get()
753  *		--->	interrupt!
754  *			delta_start -= int duration;
755  *		<---
756  *	duration = now - delta_start;
757  *
758  *	result: negative duration if the variable duration before the
759  *	interrupt was smaller than the interrupt execution.
760  *
761  * A counter of interrupts is used. If the counter increased, try
762  * to capture an interference safe duration.
763  */
764 static inline s64
765 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
766 {
767 	u64 int_counter, now;
768 	s64 duration;
769 
770 	do {
771 		int_counter = local_read(&osn_var->int_counter);
772 		/* synchronize with interrupts */
773 		barrier();
774 
775 		now = time_get();
776 		duration = (now - *delta_start);
777 
778 		/* synchronize with interrupts */
779 		barrier();
780 	} while (int_counter != local_read(&osn_var->int_counter));
781 
782 	/*
783 	 * This is an evidence of race conditions that cause
784 	 * a value to be "discounted" too much.
785 	 */
786 	if (duration < 0)
787 		osnoise_taint("Negative duration!\n");
788 
789 	*delta_start = 0;
790 
791 	return duration;
792 }
793 
794 /*
795  *
796  * set_int_safe_time - Save the current time on *time, aware of interference
797  *
798  * Get the time, taking into consideration a possible interference from
799  * higher priority interrupts.
800  *
801  * See get_int_safe_duration() for an explanation.
802  */
803 static u64
804 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
805 {
806 	u64 int_counter;
807 
808 	do {
809 		int_counter = local_read(&osn_var->int_counter);
810 		/* synchronize with interrupts */
811 		barrier();
812 
813 		*time = time_get();
814 
815 		/* synchronize with interrupts */
816 		barrier();
817 	} while (int_counter != local_read(&osn_var->int_counter));
818 
819 	return int_counter;
820 }
821 
822 #ifdef CONFIG_TIMERLAT_TRACER
823 /*
824  * copy_int_safe_time - Copy *src into *desc aware of interference
825  */
826 static u64
827 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
828 {
829 	u64 int_counter;
830 
831 	do {
832 		int_counter = local_read(&osn_var->int_counter);
833 		/* synchronize with interrupts */
834 		barrier();
835 
836 		*dst = *src;
837 
838 		/* synchronize with interrupts */
839 		barrier();
840 	} while (int_counter != local_read(&osn_var->int_counter));
841 
842 	return int_counter;
843 }
844 #endif /* CONFIG_TIMERLAT_TRACER */
845 
846 /*
847  * trace_osnoise_callback - NMI entry/exit callback
848  *
849  * This function is called at the entry and exit NMI code. The bool enter
850  * distinguishes between either case. This function is used to note a NMI
851  * occurrence, compute the noise caused by the NMI, and to remove the noise
852  * it is potentially causing on other interference variables.
853  */
854 void trace_osnoise_callback(bool enter)
855 {
856 	struct osnoise_variables *osn_var = this_cpu_osn_var();
857 	u64 duration;
858 
859 	if (!osn_var->sampling)
860 		return;
861 
862 	/*
863 	 * Currently trace_clock_local() calls sched_clock() and the
864 	 * generic version is not NMI safe.
865 	 */
866 	if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
867 		if (enter) {
868 			osn_var->nmi.delta_start = time_get();
869 			local_inc(&osn_var->int_counter);
870 		} else {
871 			duration = time_get() - osn_var->nmi.delta_start;
872 
873 			trace_nmi_noise(osn_var->nmi.delta_start, duration);
874 
875 			cond_move_irq_delta_start(osn_var, duration);
876 			cond_move_softirq_delta_start(osn_var, duration);
877 			cond_move_thread_delta_start(osn_var, duration);
878 		}
879 	}
880 
881 	if (enter)
882 		osn_var->nmi.count++;
883 }
884 
885 /*
886  * osnoise_trace_irq_entry - Note the starting of an IRQ
887  *
888  * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
889  * it is safe to use a single variable (ons_var->irq) to save the statistics.
890  * The arrival_time is used to report... the arrival time. The delta_start
891  * is used to compute the duration at the IRQ exit handler. See
892  * cond_move_irq_delta_start().
893  */
894 void osnoise_trace_irq_entry(int id)
895 {
896 	struct osnoise_variables *osn_var = this_cpu_osn_var();
897 
898 	if (!osn_var->sampling)
899 		return;
900 	/*
901 	 * This value will be used in the report, but not to compute
902 	 * the execution time, so it is safe to get it unsafe.
903 	 */
904 	osn_var->irq.arrival_time = time_get();
905 	set_int_safe_time(osn_var, &osn_var->irq.delta_start);
906 	osn_var->irq.count++;
907 
908 	local_inc(&osn_var->int_counter);
909 }
910 
911 /*
912  * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
913  *
914  * Computes the duration of the IRQ noise, and trace it. Also discounts the
915  * interference from other sources of noise could be currently being accounted.
916  */
917 void osnoise_trace_irq_exit(int id, const char *desc)
918 {
919 	struct osnoise_variables *osn_var = this_cpu_osn_var();
920 	s64 duration;
921 
922 	if (!osn_var->sampling)
923 		return;
924 
925 	duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start);
926 	trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration);
927 	osn_var->irq.arrival_time = 0;
928 	cond_move_softirq_delta_start(osn_var, duration);
929 	cond_move_thread_delta_start(osn_var, duration);
930 }
931 
932 /*
933  * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
934  *
935  * Used to note the starting of an IRQ occurece.
936  */
937 static void trace_irqentry_callback(void *data, int irq,
938 				    struct irqaction *action)
939 {
940 	osnoise_trace_irq_entry(irq);
941 }
942 
943 /*
944  * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
945  *
946  * Used to note the end of an IRQ occurece.
947  */
948 static void trace_irqexit_callback(void *data, int irq,
949 				   struct irqaction *action, int ret)
950 {
951 	osnoise_trace_irq_exit(irq, action->name);
952 }
953 
954 /*
955  * arch specific register function.
956  */
957 int __weak osnoise_arch_register(void)
958 {
959 	return 0;
960 }
961 
962 /*
963  * arch specific unregister function.
964  */
965 void __weak osnoise_arch_unregister(void)
966 {
967 	return;
968 }
969 
970 /*
971  * hook_irq_events - Hook IRQ handling events
972  *
973  * This function hooks the IRQ related callbacks to the respective trace
974  * events.
975  */
976 static int hook_irq_events(void)
977 {
978 	int ret;
979 
980 	ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL);
981 	if (ret)
982 		goto out_err;
983 
984 	ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL);
985 	if (ret)
986 		goto out_unregister_entry;
987 
988 	ret = osnoise_arch_register();
989 	if (ret)
990 		goto out_irq_exit;
991 
992 	return 0;
993 
994 out_irq_exit:
995 	unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
996 out_unregister_entry:
997 	unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
998 out_err:
999 	return -EINVAL;
1000 }
1001 
1002 /*
1003  * unhook_irq_events - Unhook IRQ handling events
1004  *
1005  * This function unhooks the IRQ related callbacks to the respective trace
1006  * events.
1007  */
1008 static void unhook_irq_events(void)
1009 {
1010 	osnoise_arch_unregister();
1011 	unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1012 	unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1013 }
1014 
1015 #ifndef CONFIG_PREEMPT_RT
1016 /*
1017  * trace_softirq_entry_callback - Note the starting of a softirq
1018  *
1019  * Save the starting time of a softirq. As softirqs are non-preemptive to
1020  * other softirqs, it is safe to use a single variable (ons_var->softirq)
1021  * to save the statistics. The arrival_time is used to report... the
1022  * arrival time. The delta_start is used to compute the duration at the
1023  * softirq exit handler. See cond_move_softirq_delta_start().
1024  */
1025 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
1026 {
1027 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1028 
1029 	if (!osn_var->sampling)
1030 		return;
1031 	/*
1032 	 * This value will be used in the report, but not to compute
1033 	 * the execution time, so it is safe to get it unsafe.
1034 	 */
1035 	osn_var->softirq.arrival_time = time_get();
1036 	set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
1037 	osn_var->softirq.count++;
1038 
1039 	local_inc(&osn_var->int_counter);
1040 }
1041 
1042 /*
1043  * trace_softirq_exit_callback - Note the end of an softirq
1044  *
1045  * Computes the duration of the softirq noise, and trace it. Also discounts the
1046  * interference from other sources of noise could be currently being accounted.
1047  */
1048 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
1049 {
1050 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1051 	s64 duration;
1052 
1053 	if (!osn_var->sampling)
1054 		return;
1055 
1056 	if (unlikely(timerlat_enabled()))
1057 		if (!timerlat_softirq_exit(osn_var))
1058 			return;
1059 
1060 	duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
1061 	trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
1062 	cond_move_thread_delta_start(osn_var, duration);
1063 	osn_var->softirq.arrival_time = 0;
1064 }
1065 
1066 /*
1067  * hook_softirq_events - Hook softirq handling events
1068  *
1069  * This function hooks the softirq related callbacks to the respective trace
1070  * events.
1071  */
1072 static int hook_softirq_events(void)
1073 {
1074 	int ret;
1075 
1076 	ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1077 	if (ret)
1078 		goto out_err;
1079 
1080 	ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1081 	if (ret)
1082 		goto out_unreg_entry;
1083 
1084 	return 0;
1085 
1086 out_unreg_entry:
1087 	unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1088 out_err:
1089 	return -EINVAL;
1090 }
1091 
1092 /*
1093  * unhook_softirq_events - Unhook softirq handling events
1094  *
1095  * This function hooks the softirq related callbacks to the respective trace
1096  * events.
1097  */
1098 static void unhook_softirq_events(void)
1099 {
1100 	unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1101 	unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1102 }
1103 #else /* CONFIG_PREEMPT_RT */
1104 /*
1105  * softirq are threads on the PREEMPT_RT mode.
1106  */
1107 static int hook_softirq_events(void)
1108 {
1109 	return 0;
1110 }
1111 static void unhook_softirq_events(void)
1112 {
1113 }
1114 #endif
1115 
1116 /*
1117  * thread_entry - Record the starting of a thread noise window
1118  *
1119  * It saves the context switch time for a noisy thread, and increments
1120  * the interference counters.
1121  */
1122 static void
1123 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
1124 {
1125 	if (!osn_var->sampling)
1126 		return;
1127 	/*
1128 	 * The arrival time will be used in the report, but not to compute
1129 	 * the execution time, so it is safe to get it unsafe.
1130 	 */
1131 	osn_var->thread.arrival_time = time_get();
1132 
1133 	set_int_safe_time(osn_var, &osn_var->thread.delta_start);
1134 
1135 	osn_var->thread.count++;
1136 	local_inc(&osn_var->int_counter);
1137 }
1138 
1139 /*
1140  * thread_exit - Report the end of a thread noise window
1141  *
1142  * It computes the total noise from a thread, tracing if needed.
1143  */
1144 static void
1145 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
1146 {
1147 	s64 duration;
1148 
1149 	if (!osn_var->sampling)
1150 		return;
1151 
1152 	if (unlikely(timerlat_enabled()))
1153 		if (!timerlat_thread_exit(osn_var))
1154 			return;
1155 
1156 	duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
1157 
1158 	trace_thread_noise(t, osn_var->thread.arrival_time, duration);
1159 
1160 	osn_var->thread.arrival_time = 0;
1161 }
1162 
1163 #ifdef CONFIG_TIMERLAT_TRACER
1164 /*
1165  * osnoise_stop_exception - Stop tracing and the tracer.
1166  */
1167 static __always_inline void osnoise_stop_exception(char *msg, int cpu)
1168 {
1169 	struct osnoise_instance *inst;
1170 	struct trace_array *tr;
1171 
1172 	rcu_read_lock();
1173 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1174 		tr = inst->tr;
1175 		trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1176 				       "stop tracing hit on cpu %d due to exception: %s\n",
1177 				       smp_processor_id(),
1178 				       msg);
1179 
1180 		if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1181 			panic("tracer hit on cpu %d due to exception: %s\n",
1182 			      smp_processor_id(),
1183 			      msg);
1184 
1185 		tracer_tracing_off(tr);
1186 	}
1187 	rcu_read_unlock();
1188 }
1189 
1190 /*
1191  * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler
1192  *
1193  * his function is hooked to the sched:sched_migrate_task trace event, and monitors
1194  * timerlat user-space thread migration.
1195  */
1196 static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu)
1197 {
1198 	struct osnoise_variables *osn_var;
1199 	long cpu = task_cpu(p);
1200 
1201 	osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
1202 	if (osn_var->pid == p->pid && dest_cpu != cpu) {
1203 		per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
1204 		osnoise_taint("timerlat user-thread migrated\n");
1205 		osnoise_stop_exception("timerlat user-thread migrated", cpu);
1206 	}
1207 }
1208 
1209 static bool monitor_enabled;
1210 
1211 static int register_migration_monitor(void)
1212 {
1213 	int ret = 0;
1214 
1215 	/*
1216 	 * Timerlat thread migration check is only required when running timerlat in user-space.
1217 	 * Thus, enable callback only if timerlat is set with no workload.
1218 	 */
1219 	if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) {
1220 		if (WARN_ON_ONCE(monitor_enabled))
1221 			return 0;
1222 
1223 		ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1224 		if (!ret)
1225 			monitor_enabled = true;
1226 	}
1227 
1228 	return ret;
1229 }
1230 
1231 static void unregister_migration_monitor(void)
1232 {
1233 	if (!monitor_enabled)
1234 		return;
1235 
1236 	unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1237 	monitor_enabled = false;
1238 }
1239 #else
1240 static int register_migration_monitor(void)
1241 {
1242 	return 0;
1243 }
1244 static void unregister_migration_monitor(void) {}
1245 #endif
1246 /*
1247  * trace_sched_switch - sched:sched_switch trace event handler
1248  *
1249  * This function is hooked to the sched:sched_switch trace event, and it is
1250  * used to record the beginning and to report the end of a thread noise window.
1251  */
1252 static void
1253 trace_sched_switch_callback(void *data, bool preempt,
1254 			    struct task_struct *p,
1255 			    struct task_struct *n,
1256 			    unsigned int prev_state)
1257 {
1258 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1259 	int workload = test_bit(OSN_WORKLOAD, &osnoise_options);
1260 
1261 	if ((p->pid != osn_var->pid) || !workload)
1262 		thread_exit(osn_var, p);
1263 
1264 	if ((n->pid != osn_var->pid) || !workload)
1265 		thread_entry(osn_var, n);
1266 }
1267 
1268 /*
1269  * hook_thread_events - Hook the instrumentation for thread noise
1270  *
1271  * Hook the osnoise tracer callbacks to handle the noise from other
1272  * threads on the necessary kernel events.
1273  */
1274 static int hook_thread_events(void)
1275 {
1276 	int ret;
1277 
1278 	ret = register_trace_sched_switch(trace_sched_switch_callback, NULL);
1279 	if (ret)
1280 		return -EINVAL;
1281 
1282 	ret = register_migration_monitor();
1283 	if (ret)
1284 		goto out_unreg;
1285 
1286 	return 0;
1287 
1288 out_unreg:
1289 	unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1290 	return -EINVAL;
1291 }
1292 
1293 /*
1294  * unhook_thread_events - unhook the instrumentation for thread noise
1295  *
1296  * Unook the osnoise tracer callbacks to handle the noise from other
1297  * threads on the necessary kernel events.
1298  */
1299 static void unhook_thread_events(void)
1300 {
1301 	unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1302 	unregister_migration_monitor();
1303 }
1304 
1305 /*
1306  * save_osn_sample_stats - Save the osnoise_sample statistics
1307  *
1308  * Save the osnoise_sample statistics before the sampling phase. These
1309  * values will be used later to compute the diff betwneen the statistics
1310  * before and after the osnoise sampling.
1311  */
1312 static void
1313 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1314 {
1315 	s->nmi_count = osn_var->nmi.count;
1316 	s->irq_count = osn_var->irq.count;
1317 	s->softirq_count = osn_var->softirq.count;
1318 	s->thread_count = osn_var->thread.count;
1319 }
1320 
1321 /*
1322  * diff_osn_sample_stats - Compute the osnoise_sample statistics
1323  *
1324  * After a sample period, compute the difference on the osnoise_sample
1325  * statistics. The struct osnoise_sample *s contains the statistics saved via
1326  * save_osn_sample_stats() before the osnoise sampling.
1327  */
1328 static void
1329 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1330 {
1331 	s->nmi_count = osn_var->nmi.count - s->nmi_count;
1332 	s->irq_count = osn_var->irq.count - s->irq_count;
1333 	s->softirq_count = osn_var->softirq.count - s->softirq_count;
1334 	s->thread_count = osn_var->thread.count - s->thread_count;
1335 }
1336 
1337 /*
1338  * osnoise_stop_tracing - Stop tracing and the tracer.
1339  */
1340 static __always_inline void osnoise_stop_tracing(void)
1341 {
1342 	struct osnoise_instance *inst;
1343 	struct trace_array *tr;
1344 
1345 	rcu_read_lock();
1346 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1347 		tr = inst->tr;
1348 		trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1349 				"stop tracing hit on cpu %d\n", smp_processor_id());
1350 
1351 		if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1352 			panic("tracer hit stop condition on CPU %d\n", smp_processor_id());
1353 
1354 		tracer_tracing_off(tr);
1355 	}
1356 	rcu_read_unlock();
1357 }
1358 
1359 /*
1360  * osnoise_has_tracing_on - Check if there is at least one instance on
1361  */
1362 static __always_inline int osnoise_has_tracing_on(void)
1363 {
1364 	struct osnoise_instance *inst;
1365 	int trace_is_on = 0;
1366 
1367 	rcu_read_lock();
1368 	list_for_each_entry_rcu(inst, &osnoise_instances, list)
1369 		trace_is_on += tracer_tracing_is_on(inst->tr);
1370 	rcu_read_unlock();
1371 
1372 	return trace_is_on;
1373 }
1374 
1375 /*
1376  * notify_new_max_latency - Notify a new max latency via fsnotify interface.
1377  */
1378 static void notify_new_max_latency(u64 latency)
1379 {
1380 	struct osnoise_instance *inst;
1381 	struct trace_array *tr;
1382 
1383 	rcu_read_lock();
1384 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1385 		tr = inst->tr;
1386 		if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
1387 			tr->max_latency = latency;
1388 			latency_fsnotify(tr);
1389 		}
1390 	}
1391 	rcu_read_unlock();
1392 }
1393 
1394 /*
1395  * run_osnoise - Sample the time and look for osnoise
1396  *
1397  * Used to capture the time, looking for potential osnoise latency repeatedly.
1398  * Different from hwlat_detector, it is called with preemption and interrupts
1399  * enabled. This allows irqs, softirqs and threads to run, interfering on the
1400  * osnoise sampling thread, as they would do with a regular thread.
1401  */
1402 static int run_osnoise(void)
1403 {
1404 	bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options);
1405 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1406 	u64 start, sample, last_sample;
1407 	u64 last_int_count, int_count;
1408 	s64 noise = 0, max_noise = 0;
1409 	s64 total, last_total = 0;
1410 	struct osnoise_sample s;
1411 	bool disable_preemption;
1412 	unsigned int threshold;
1413 	u64 runtime, stop_in;
1414 	u64 sum_noise = 0;
1415 	int hw_count = 0;
1416 	int ret = -1;
1417 
1418 	/*
1419 	 * Disabling preemption is only required if IRQs are enabled,
1420 	 * and the options is set on.
1421 	 */
1422 	disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options);
1423 
1424 	/*
1425 	 * Considers the current thread as the workload.
1426 	 */
1427 	osn_var->pid = current->pid;
1428 
1429 	/*
1430 	 * Save the current stats for the diff
1431 	 */
1432 	save_osn_sample_stats(osn_var, &s);
1433 
1434 	/*
1435 	 * if threshold is 0, use the default value of 1 us.
1436 	 */
1437 	threshold = tracing_thresh ? : 1000;
1438 
1439 	/*
1440 	 * Apply PREEMPT and IRQ disabled options.
1441 	 */
1442 	if (disable_irq)
1443 		local_irq_disable();
1444 
1445 	if (disable_preemption)
1446 		preempt_disable();
1447 
1448 	/*
1449 	 * Make sure NMIs see sampling first
1450 	 */
1451 	osn_var->sampling = true;
1452 	barrier();
1453 
1454 	/*
1455 	 * Transform the *_us config to nanoseconds to avoid the
1456 	 * division on the main loop.
1457 	 */
1458 	runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
1459 	stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
1460 
1461 	/*
1462 	 * Start timestemp
1463 	 */
1464 	start = time_get();
1465 
1466 	/*
1467 	 * "previous" loop.
1468 	 */
1469 	last_int_count = set_int_safe_time(osn_var, &last_sample);
1470 
1471 	do {
1472 		/*
1473 		 * Get sample!
1474 		 */
1475 		int_count = set_int_safe_time(osn_var, &sample);
1476 
1477 		noise = time_sub(sample, last_sample);
1478 
1479 		/*
1480 		 * This shouldn't happen.
1481 		 */
1482 		if (noise < 0) {
1483 			osnoise_taint("negative noise!");
1484 			goto out;
1485 		}
1486 
1487 		/*
1488 		 * Sample runtime.
1489 		 */
1490 		total = time_sub(sample, start);
1491 
1492 		/*
1493 		 * Check for possible overflows.
1494 		 */
1495 		if (total < last_total) {
1496 			osnoise_taint("total overflow!");
1497 			break;
1498 		}
1499 
1500 		last_total = total;
1501 
1502 		if (noise >= threshold) {
1503 			int interference = int_count - last_int_count;
1504 
1505 			if (noise > max_noise)
1506 				max_noise = noise;
1507 
1508 			if (!interference)
1509 				hw_count++;
1510 
1511 			sum_noise += noise;
1512 
1513 			trace_sample_threshold(last_sample, noise, interference);
1514 
1515 			if (osnoise_data.stop_tracing)
1516 				if (noise > stop_in)
1517 					osnoise_stop_tracing();
1518 		}
1519 
1520 		/*
1521 		 * In some cases, notably when running on a nohz_full CPU with
1522 		 * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to
1523 		 * account for QSs. This will eventually cause unwarranted
1524 		 * noise as RCU forces preemption as the means of ending the
1525 		 * current grace period.  We avoid this by calling
1526 		 * rcu_momentary_eqs(), which performs a zero duration EQS
1527 		 * allowing RCU to end the current grace period. This call
1528 		 * shouldn't be wrapped inside an RCU critical section.
1529 		 *
1530 		 * Normally QSs for other cases are handled through cond_resched().
1531 		 * For simplicity, however, we call rcu_momentary_eqs() for all
1532 		 * configurations here.
1533 		 */
1534 		if (!disable_irq)
1535 			local_irq_disable();
1536 
1537 		rcu_momentary_eqs();
1538 
1539 		if (!disable_irq)
1540 			local_irq_enable();
1541 
1542 		/*
1543 		 * For the non-preemptive kernel config: let threads runs, if
1544 		 * they so wish, unless set not do to so.
1545 		 */
1546 		if (!disable_irq && !disable_preemption)
1547 			cond_resched();
1548 
1549 		last_sample = sample;
1550 		last_int_count = int_count;
1551 
1552 	} while (total < runtime && !kthread_should_stop());
1553 
1554 	/*
1555 	 * Finish the above in the view for interrupts.
1556 	 */
1557 	barrier();
1558 
1559 	osn_var->sampling = false;
1560 
1561 	/*
1562 	 * Make sure sampling data is no longer updated.
1563 	 */
1564 	barrier();
1565 
1566 	/*
1567 	 * Return to the preemptive state.
1568 	 */
1569 	if (disable_preemption)
1570 		preempt_enable();
1571 
1572 	if (disable_irq)
1573 		local_irq_enable();
1574 
1575 	/*
1576 	 * Save noise info.
1577 	 */
1578 	s.noise = time_to_us(sum_noise);
1579 	s.runtime = time_to_us(total);
1580 	s.max_sample = time_to_us(max_noise);
1581 	s.hw_count = hw_count;
1582 
1583 	/* Save interference stats info */
1584 	diff_osn_sample_stats(osn_var, &s);
1585 
1586 	record_osnoise_sample(&s);
1587 
1588 	notify_new_max_latency(max_noise);
1589 
1590 	if (osnoise_data.stop_tracing_total)
1591 		if (s.noise > osnoise_data.stop_tracing_total)
1592 			osnoise_stop_tracing();
1593 
1594 	return 0;
1595 out:
1596 	return ret;
1597 }
1598 
1599 static struct cpumask osnoise_cpumask;
1600 static struct cpumask save_cpumask;
1601 static struct cpumask kthread_cpumask;
1602 
1603 /*
1604  * osnoise_sleep - sleep until the next period
1605  */
1606 static void osnoise_sleep(bool skip_period)
1607 {
1608 	u64 interval;
1609 	ktime_t wake_time;
1610 
1611 	mutex_lock(&interface_lock);
1612 	if (skip_period)
1613 		interval = osnoise_data.sample_period;
1614 	else
1615 		interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
1616 	mutex_unlock(&interface_lock);
1617 
1618 	/*
1619 	 * differently from hwlat_detector, the osnoise tracer can run
1620 	 * without a pause because preemption is on.
1621 	 */
1622 	if (!interval) {
1623 		/* Let synchronize_rcu_tasks() make progress */
1624 		cond_resched_tasks_rcu_qs();
1625 		return;
1626 	}
1627 
1628 	wake_time = ktime_add_us(ktime_get(), interval);
1629 	__set_current_state(TASK_INTERRUPTIBLE);
1630 
1631 	while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) {
1632 		if (kthread_should_stop())
1633 			break;
1634 	}
1635 }
1636 
1637 /*
1638  * osnoise_migration_pending - checks if the task needs to migrate
1639  *
1640  * osnoise/timerlat threads are per-cpu. If there is a pending request to
1641  * migrate the thread away from the current CPU, something bad has happened.
1642  * Play the good citizen and leave.
1643  *
1644  * Returns 0 if it is safe to continue, 1 otherwise.
1645  */
1646 static inline int osnoise_migration_pending(void)
1647 {
1648 	if (!current->migration_pending)
1649 		return 0;
1650 
1651 	/*
1652 	 * If migration is pending, there is a task waiting for the
1653 	 * tracer to enable migration. The tracer does not allow migration,
1654 	 * thus: taint and leave to unblock the blocked thread.
1655 	 */
1656 	osnoise_taint("migration requested to osnoise threads, leaving.");
1657 
1658 	/*
1659 	 * Unset this thread from the threads managed by the interface.
1660 	 * The tracers are responsible for cleaning their env before
1661 	 * exiting.
1662 	 */
1663 	mutex_lock(&interface_lock);
1664 	this_cpu_osn_var()->kthread = NULL;
1665 	cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask);
1666 	mutex_unlock(&interface_lock);
1667 
1668 	return 1;
1669 }
1670 
1671 /*
1672  * osnoise_main - The osnoise detection kernel thread
1673  *
1674  * Calls run_osnoise() function to measure the osnoise for the configured runtime,
1675  * every period.
1676  */
1677 static int osnoise_main(void *data)
1678 {
1679 	unsigned long flags;
1680 
1681 	/*
1682 	 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1683 	 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1684 	 *
1685 	 * To work around this limitation, disable migration and remove the
1686 	 * flag.
1687 	 */
1688 	migrate_disable();
1689 	raw_spin_lock_irqsave(&current->pi_lock, flags);
1690 	current->flags &= ~(PF_NO_SETAFFINITY);
1691 	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
1692 
1693 	while (!kthread_should_stop()) {
1694 		if (osnoise_migration_pending())
1695 			break;
1696 
1697 		/* skip a period if tracing is off on all instances */
1698 		if (!osnoise_has_tracing_on()) {
1699 			osnoise_sleep(true);
1700 			continue;
1701 		}
1702 
1703 		run_osnoise();
1704 		osnoise_sleep(false);
1705 	}
1706 
1707 	migrate_enable();
1708 	return 0;
1709 }
1710 
1711 #ifdef CONFIG_TIMERLAT_TRACER
1712 /*
1713  * timerlat_irq - hrtimer handler for timerlat.
1714  */
1715 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
1716 {
1717 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1718 	struct timerlat_variables *tlat;
1719 	struct timerlat_sample s;
1720 	u64 now;
1721 	u64 diff;
1722 
1723 	/*
1724 	 * I am not sure if the timer was armed for this CPU. So, get
1725 	 * the timerlat struct from the timer itself, not from this
1726 	 * CPU.
1727 	 */
1728 	tlat = container_of(timer, struct timerlat_variables, timer);
1729 
1730 	now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1731 
1732 	/*
1733 	 * Enable the osnoise: events for thread an softirq.
1734 	 */
1735 	tlat->tracing_thread = true;
1736 
1737 	osn_var->thread.arrival_time = time_get();
1738 
1739 	/*
1740 	 * A hardirq is running: the timer IRQ. It is for sure preempting
1741 	 * a thread, and potentially preempting a softirq.
1742 	 *
1743 	 * At this point, it is not interesting to know the duration of the
1744 	 * preempted thread (and maybe softirq), but how much time they will
1745 	 * delay the beginning of the execution of the timer thread.
1746 	 *
1747 	 * To get the correct (net) delay added by the softirq, its delta_start
1748 	 * is set as the IRQ one. In this way, at the return of the IRQ, the delta
1749 	 * start of the sofitrq will be zeroed, accounting then only the time
1750 	 * after that.
1751 	 *
1752 	 * The thread follows the same principle. However, if a softirq is
1753 	 * running, the thread needs to receive the softirq delta_start. The
1754 	 * reason being is that the softirq will be the last to be unfolded,
1755 	 * resseting the thread delay to zero.
1756 	 *
1757 	 * The PREEMPT_RT is a special case, though. As softirqs run as threads
1758 	 * on RT, moving the thread is enough.
1759 	 */
1760 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
1761 		copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1762 				   &osn_var->softirq.delta_start);
1763 
1764 		copy_int_safe_time(osn_var, &osn_var->softirq.delta_start,
1765 				    &osn_var->irq.delta_start);
1766 	} else {
1767 		copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1768 				    &osn_var->irq.delta_start);
1769 	}
1770 
1771 	/*
1772 	 * Compute the current time with the expected time.
1773 	 */
1774 	diff = now - tlat->abs_period;
1775 
1776 	tlat->count++;
1777 	s.seqnum = tlat->count;
1778 	s.timer_latency = diff;
1779 	s.context = IRQ_CONTEXT;
1780 
1781 	record_timerlat_sample(&s);
1782 
1783 	if (osnoise_data.stop_tracing) {
1784 		if (time_to_us(diff) >= osnoise_data.stop_tracing) {
1785 
1786 			/*
1787 			 * At this point, if stop_tracing is set and <= print_stack,
1788 			 * print_stack is set and would be printed in the thread handler.
1789 			 *
1790 			 * Thus, print the stack trace as it is helpful to define the
1791 			 * root cause of an IRQ latency.
1792 			 */
1793 			if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
1794 				timerlat_save_stack(0);
1795 				timerlat_dump_stack(time_to_us(diff));
1796 			}
1797 
1798 			osnoise_stop_tracing();
1799 			notify_new_max_latency(diff);
1800 
1801 			wake_up_process(tlat->kthread);
1802 
1803 			return HRTIMER_NORESTART;
1804 		}
1805 	}
1806 
1807 	wake_up_process(tlat->kthread);
1808 
1809 	if (osnoise_data.print_stack)
1810 		timerlat_save_stack(0);
1811 
1812 	return HRTIMER_NORESTART;
1813 }
1814 
1815 /*
1816  * wait_next_period - Wait for the next period for timerlat
1817  */
1818 static int wait_next_period(struct timerlat_variables *tlat)
1819 {
1820 	ktime_t next_abs_period, now;
1821 	u64 rel_period = osnoise_data.timerlat_period * 1000;
1822 
1823 	now = hrtimer_cb_get_time(&tlat->timer);
1824 	next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1825 
1826 	/*
1827 	 * Save the next abs_period.
1828 	 */
1829 	tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1830 
1831 	/*
1832 	 * If the new abs_period is in the past, skip the activation.
1833 	 */
1834 	while (ktime_compare(now, next_abs_period) > 0) {
1835 		next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1836 		tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1837 	}
1838 
1839 	set_current_state(TASK_INTERRUPTIBLE);
1840 
1841 	hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD);
1842 	schedule();
1843 	return 1;
1844 }
1845 
1846 /*
1847  * timerlat_main- Timerlat main
1848  */
1849 static int timerlat_main(void *data)
1850 {
1851 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1852 	struct timerlat_variables *tlat = this_cpu_tmr_var();
1853 	struct timerlat_sample s;
1854 	struct sched_param sp;
1855 	unsigned long flags;
1856 	u64 now, diff;
1857 
1858 	/*
1859 	 * Make the thread RT, that is how cyclictest is usually used.
1860 	 */
1861 	sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
1862 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1863 
1864 	/*
1865 	 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1866 	 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1867 	 *
1868 	 * To work around this limitation, disable migration and remove the
1869 	 * flag.
1870 	 */
1871 	migrate_disable();
1872 	raw_spin_lock_irqsave(&current->pi_lock, flags);
1873 	current->flags &= ~(PF_NO_SETAFFINITY);
1874 	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
1875 
1876 	tlat->count = 0;
1877 	tlat->tracing_thread = false;
1878 
1879 	hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1880 	tlat->kthread = current;
1881 	osn_var->pid = current->pid;
1882 	/*
1883 	 * Anotate the arrival time.
1884 	 */
1885 	tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
1886 
1887 	wait_next_period(tlat);
1888 
1889 	osn_var->sampling = 1;
1890 
1891 	while (!kthread_should_stop()) {
1892 
1893 		now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1894 		diff = now - tlat->abs_period;
1895 
1896 		s.seqnum = tlat->count;
1897 		s.timer_latency = diff;
1898 		s.context = THREAD_CONTEXT;
1899 
1900 		record_timerlat_sample(&s);
1901 
1902 		notify_new_max_latency(diff);
1903 
1904 		timerlat_dump_stack(time_to_us(diff));
1905 
1906 		tlat->tracing_thread = false;
1907 		if (osnoise_data.stop_tracing_total)
1908 			if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
1909 				osnoise_stop_tracing();
1910 
1911 		if (osnoise_migration_pending())
1912 			break;
1913 
1914 		wait_next_period(tlat);
1915 	}
1916 
1917 	hrtimer_cancel(&tlat->timer);
1918 	migrate_enable();
1919 	return 0;
1920 }
1921 #else /* CONFIG_TIMERLAT_TRACER */
1922 static int timerlat_main(void *data)
1923 {
1924 	return 0;
1925 }
1926 #endif /* CONFIG_TIMERLAT_TRACER */
1927 
1928 /*
1929  * stop_kthread - stop a workload thread
1930  */
1931 static void stop_kthread(unsigned int cpu)
1932 {
1933 	struct task_struct *kthread;
1934 
1935 	kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
1936 	if (kthread) {
1937 		if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
1938 		    !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
1939 			kthread_stop(kthread);
1940 		} else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) {
1941 			/*
1942 			 * This is a user thread waiting on the timerlat_fd. We need
1943 			 * to close all users, and the best way to guarantee this is
1944 			 * by killing the thread. NOTE: this is a purpose specific file.
1945 			 */
1946 			kill_pid(kthread->thread_pid, SIGKILL, 1);
1947 			put_task_struct(kthread);
1948 		}
1949 	} else {
1950 		/* if no workload, just return */
1951 		if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1952 			/*
1953 			 * This is set in the osnoise tracer case.
1954 			 */
1955 			per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
1956 			barrier();
1957 		}
1958 	}
1959 }
1960 
1961 /*
1962  * stop_per_cpu_kthread - Stop per-cpu threads
1963  *
1964  * Stop the osnoise sampling htread. Use this on unload and at system
1965  * shutdown.
1966  */
1967 static void stop_per_cpu_kthreads(void)
1968 {
1969 	int cpu;
1970 
1971 	cpus_read_lock();
1972 
1973 	for_each_online_cpu(cpu)
1974 		stop_kthread(cpu);
1975 
1976 	cpus_read_unlock();
1977 }
1978 
1979 /*
1980  * start_kthread - Start a workload tread
1981  */
1982 static int start_kthread(unsigned int cpu)
1983 {
1984 	struct task_struct *kthread;
1985 	void *main = osnoise_main;
1986 	char comm[24];
1987 
1988 	/* Do not start a new thread if it is already running */
1989 	if (per_cpu(per_cpu_osnoise_var, cpu).kthread)
1990 		return 0;
1991 
1992 	if (timerlat_enabled()) {
1993 		snprintf(comm, 24, "timerlat/%d", cpu);
1994 		main = timerlat_main;
1995 	} else {
1996 		/* if no workload, just return */
1997 		if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1998 			per_cpu(per_cpu_osnoise_var, cpu).sampling = true;
1999 			barrier();
2000 			return 0;
2001 		}
2002 		snprintf(comm, 24, "osnoise/%d", cpu);
2003 	}
2004 
2005 	kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
2006 
2007 	if (IS_ERR(kthread)) {
2008 		pr_err(BANNER "could not start sampling thread\n");
2009 		return -ENOMEM;
2010 	}
2011 
2012 	per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
2013 	cpumask_set_cpu(cpu, &kthread_cpumask);
2014 
2015 	return 0;
2016 }
2017 
2018 /*
2019  * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
2020  *
2021  * This starts the kernel thread that will look for osnoise on many
2022  * cpus.
2023  */
2024 static int start_per_cpu_kthreads(void)
2025 {
2026 	struct cpumask *current_mask = &save_cpumask;
2027 	int retval = 0;
2028 	int cpu;
2029 
2030 	if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
2031 		if (timerlat_enabled())
2032 			return 0;
2033 	}
2034 
2035 	cpus_read_lock();
2036 	/*
2037 	 * Run only on online CPUs in which osnoise is allowed to run.
2038 	 */
2039 	cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
2040 
2041 	for_each_possible_cpu(cpu) {
2042 		if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) {
2043 			struct task_struct *kthread;
2044 
2045 			kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
2046 			if (!WARN_ON(!kthread))
2047 				kthread_stop(kthread);
2048 		}
2049 	}
2050 
2051 	for_each_cpu(cpu, current_mask) {
2052 		retval = start_kthread(cpu);
2053 		if (retval) {
2054 			cpus_read_unlock();
2055 			stop_per_cpu_kthreads();
2056 			return retval;
2057 		}
2058 	}
2059 
2060 	cpus_read_unlock();
2061 
2062 	return retval;
2063 }
2064 
2065 #ifdef CONFIG_HOTPLUG_CPU
2066 static void osnoise_hotplug_workfn(struct work_struct *dummy)
2067 {
2068 	unsigned int cpu = smp_processor_id();
2069 
2070 	guard(mutex)(&trace_types_lock);
2071 
2072 	if (!osnoise_has_registered_instances())
2073 		return;
2074 
2075 	guard(mutex)(&interface_lock);
2076 	guard(cpus_read_lock)();
2077 
2078 	if (!cpu_online(cpu))
2079 		return;
2080 
2081 	if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
2082 		return;
2083 
2084 	start_kthread(cpu);
2085 }
2086 
2087 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
2088 
2089 /*
2090  * osnoise_cpu_init - CPU hotplug online callback function
2091  */
2092 static int osnoise_cpu_init(unsigned int cpu)
2093 {
2094 	schedule_work_on(cpu, &osnoise_hotplug_work);
2095 	return 0;
2096 }
2097 
2098 /*
2099  * osnoise_cpu_die - CPU hotplug offline callback function
2100  */
2101 static int osnoise_cpu_die(unsigned int cpu)
2102 {
2103 	stop_kthread(cpu);
2104 	return 0;
2105 }
2106 
2107 static void osnoise_init_hotplug_support(void)
2108 {
2109 	int ret;
2110 
2111 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
2112 				osnoise_cpu_init, osnoise_cpu_die);
2113 	if (ret < 0)
2114 		pr_warn(BANNER "Error to init cpu hotplug support\n");
2115 
2116 	return;
2117 }
2118 #else /* CONFIG_HOTPLUG_CPU */
2119 static void osnoise_init_hotplug_support(void)
2120 {
2121 	return;
2122 }
2123 #endif /* CONFIG_HOTPLUG_CPU */
2124 
2125 /*
2126  * seq file functions for the osnoise/options file.
2127  */
2128 static void *s_options_start(struct seq_file *s, loff_t *pos)
2129 {
2130 	int option = *pos;
2131 
2132 	mutex_lock(&interface_lock);
2133 
2134 	if (option >= OSN_MAX)
2135 		return NULL;
2136 
2137 	return pos;
2138 }
2139 
2140 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos)
2141 {
2142 	int option = ++(*pos);
2143 
2144 	if (option >= OSN_MAX)
2145 		return NULL;
2146 
2147 	return pos;
2148 }
2149 
2150 static int s_options_show(struct seq_file *s, void *v)
2151 {
2152 	loff_t *pos = v;
2153 	int option = *pos;
2154 
2155 	if (option == OSN_DEFAULTS) {
2156 		if (osnoise_options == OSN_DEFAULT_OPTIONS)
2157 			seq_printf(s, "%s", osnoise_options_str[option]);
2158 		else
2159 			seq_printf(s, "NO_%s", osnoise_options_str[option]);
2160 		goto out;
2161 	}
2162 
2163 	if (test_bit(option, &osnoise_options))
2164 		seq_printf(s, "%s", osnoise_options_str[option]);
2165 	else
2166 		seq_printf(s, "NO_%s", osnoise_options_str[option]);
2167 
2168 out:
2169 	if (option != OSN_MAX)
2170 		seq_puts(s, " ");
2171 
2172 	return 0;
2173 }
2174 
2175 static void s_options_stop(struct seq_file *s, void *v)
2176 {
2177 	seq_puts(s, "\n");
2178 	mutex_unlock(&interface_lock);
2179 }
2180 
2181 static const struct seq_operations osnoise_options_seq_ops = {
2182 	.start		= s_options_start,
2183 	.next		= s_options_next,
2184 	.show		= s_options_show,
2185 	.stop		= s_options_stop
2186 };
2187 
2188 static int osnoise_options_open(struct inode *inode, struct file *file)
2189 {
2190 	return seq_open(file, &osnoise_options_seq_ops);
2191 };
2192 
2193 /**
2194  * osnoise_options_write - Write function for "options" entry
2195  * @filp: The active open file structure
2196  * @ubuf: The user buffer that contains the value to write
2197  * @cnt: The maximum number of bytes to write to "file"
2198  * @ppos: The current position in @file
2199  *
2200  * Writing the option name sets the option, writing the "NO_"
2201  * prefix in front of the option name disables it.
2202  *
2203  * Writing "DEFAULTS" resets the option values to the default ones.
2204  */
2205 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
2206 				     size_t cnt, loff_t *ppos)
2207 {
2208 	int running, option, enable, retval;
2209 	char buf[256], *option_str;
2210 
2211 	if (cnt >= 256)
2212 		return -EINVAL;
2213 
2214 	if (copy_from_user(buf, ubuf, cnt))
2215 		return -EFAULT;
2216 
2217 	buf[cnt] = 0;
2218 
2219 	if (strncmp(buf, "NO_", 3)) {
2220 		option_str = strstrip(buf);
2221 		enable = true;
2222 	} else {
2223 		option_str = strstrip(&buf[3]);
2224 		enable = false;
2225 	}
2226 
2227 	option = match_string(osnoise_options_str, OSN_MAX, option_str);
2228 	if (option < 0)
2229 		return -EINVAL;
2230 
2231 	/*
2232 	 * trace_types_lock is taken to avoid concurrency on start/stop.
2233 	 */
2234 	mutex_lock(&trace_types_lock);
2235 	running = osnoise_has_registered_instances();
2236 	if (running)
2237 		stop_per_cpu_kthreads();
2238 
2239 	mutex_lock(&interface_lock);
2240 	/*
2241 	 * avoid CPU hotplug operations that might read options.
2242 	 */
2243 	cpus_read_lock();
2244 
2245 	retval = cnt;
2246 
2247 	if (enable) {
2248 		if (option == OSN_DEFAULTS)
2249 			osnoise_options = OSN_DEFAULT_OPTIONS;
2250 		else
2251 			set_bit(option, &osnoise_options);
2252 	} else {
2253 		if (option == OSN_DEFAULTS)
2254 			retval = -EINVAL;
2255 		else
2256 			clear_bit(option, &osnoise_options);
2257 	}
2258 
2259 	cpus_read_unlock();
2260 	mutex_unlock(&interface_lock);
2261 
2262 	if (running)
2263 		start_per_cpu_kthreads();
2264 	mutex_unlock(&trace_types_lock);
2265 
2266 	return retval;
2267 }
2268 
2269 /*
2270  * osnoise_cpus_read - Read function for reading the "cpus" file
2271  * @filp: The active open file structure
2272  * @ubuf: The userspace provided buffer to read value into
2273  * @cnt: The maximum number of bytes to read
2274  * @ppos: The current "file" position
2275  *
2276  * Prints the "cpus" output into the user-provided buffer.
2277  */
2278 static ssize_t
2279 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
2280 		  loff_t *ppos)
2281 {
2282 	char *mask_str __free(kfree) = NULL;
2283 	int len;
2284 
2285 	guard(mutex)(&interface_lock);
2286 
2287 	len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
2288 	mask_str = kmalloc(len, GFP_KERNEL);
2289 	if (!mask_str)
2290 		return -ENOMEM;
2291 
2292 	len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
2293 	if (len >= count)
2294 		return -EINVAL;
2295 
2296 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
2297 
2298 	return count;
2299 }
2300 
2301 /*
2302  * osnoise_cpus_write - Write function for "cpus" entry
2303  * @filp: The active open file structure
2304  * @ubuf: The user buffer that contains the value to write
2305  * @count: The maximum number of bytes to write to "file"
2306  * @ppos: The current position in @file
2307  *
2308  * This function provides a write implementation for the "cpus"
2309  * interface to the osnoise trace. By default, it lists all  CPUs,
2310  * in this way, allowing osnoise threads to run on any online CPU
2311  * of the system. It serves to restrict the execution of osnoise to the
2312  * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
2313  * Because the user might be interested in tracing what is running on
2314  * other CPUs. For instance, one might run osnoise in one HT CPU
2315  * while observing what is running on the sibling HT CPU.
2316  */
2317 static ssize_t
2318 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
2319 		   loff_t *ppos)
2320 {
2321 	cpumask_var_t osnoise_cpumask_new;
2322 	int running, err;
2323 	char *buf __free(kfree) = NULL;
2324 
2325 	buf = kmalloc(count, GFP_KERNEL);
2326 	if (!buf)
2327 		return -ENOMEM;
2328 
2329 	if (copy_from_user(buf, ubuf, count))
2330 		return -EFAULT;
2331 
2332 	if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
2333 		return -ENOMEM;
2334 
2335 	err = cpulist_parse(buf, osnoise_cpumask_new);
2336 	if (err)
2337 		goto err_free;
2338 
2339 	/*
2340 	 * trace_types_lock is taken to avoid concurrency on start/stop.
2341 	 */
2342 	mutex_lock(&trace_types_lock);
2343 	running = osnoise_has_registered_instances();
2344 	if (running)
2345 		stop_per_cpu_kthreads();
2346 
2347 	mutex_lock(&interface_lock);
2348 	/*
2349 	 * osnoise_cpumask is read by CPU hotplug operations.
2350 	 */
2351 	cpus_read_lock();
2352 
2353 	cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
2354 
2355 	cpus_read_unlock();
2356 	mutex_unlock(&interface_lock);
2357 
2358 	if (running)
2359 		start_per_cpu_kthreads();
2360 	mutex_unlock(&trace_types_lock);
2361 
2362 	free_cpumask_var(osnoise_cpumask_new);
2363 	return count;
2364 
2365 err_free:
2366 	free_cpumask_var(osnoise_cpumask_new);
2367 
2368 	return err;
2369 }
2370 
2371 #ifdef CONFIG_TIMERLAT_TRACER
2372 static int timerlat_fd_open(struct inode *inode, struct file *file)
2373 {
2374 	struct osnoise_variables *osn_var;
2375 	struct timerlat_variables *tlat;
2376 	long cpu = (long) inode->i_cdev;
2377 
2378 	mutex_lock(&interface_lock);
2379 
2380 	/*
2381 	 * This file is accessible only if timerlat is enabled, and
2382 	 * NO_OSNOISE_WORKLOAD is set.
2383 	 */
2384 	if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) {
2385 		mutex_unlock(&interface_lock);
2386 		return -EINVAL;
2387 	}
2388 
2389 	migrate_disable();
2390 
2391 	osn_var = this_cpu_osn_var();
2392 
2393 	/*
2394 	 * The osn_var->pid holds the single access to this file.
2395 	 */
2396 	if (osn_var->pid) {
2397 		mutex_unlock(&interface_lock);
2398 		migrate_enable();
2399 		return -EBUSY;
2400 	}
2401 
2402 	/*
2403 	 * timerlat tracer is a per-cpu tracer. Check if the user-space too
2404 	 * is pinned to a single CPU. The tracer laters monitor if the task
2405 	 * migrates and then disables tracer if it does. However, it is
2406 	 * worth doing this basic acceptance test to avoid obviusly wrong
2407 	 * setup.
2408 	 */
2409 	if (current->nr_cpus_allowed > 1 ||  cpu != smp_processor_id()) {
2410 		mutex_unlock(&interface_lock);
2411 		migrate_enable();
2412 		return -EPERM;
2413 	}
2414 
2415 	/*
2416 	 * From now on, it is good to go.
2417 	 */
2418 	file->private_data = inode->i_cdev;
2419 
2420 	get_task_struct(current);
2421 
2422 	osn_var->kthread = current;
2423 	osn_var->pid = current->pid;
2424 
2425 	/*
2426 	 * Setup is done.
2427 	 */
2428 	mutex_unlock(&interface_lock);
2429 
2430 	tlat = this_cpu_tmr_var();
2431 	tlat->count = 0;
2432 
2433 	hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
2434 
2435 	migrate_enable();
2436 	return 0;
2437 };
2438 
2439 /*
2440  * timerlat_fd_read - Read function for "timerlat_fd" file
2441  * @file: The active open file structure
2442  * @ubuf: The userspace provided buffer to read value into
2443  * @cnt: The maximum number of bytes to read
2444  * @ppos: The current "file" position
2445  *
2446  * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error.
2447  */
2448 static ssize_t
2449 timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
2450 		  loff_t *ppos)
2451 {
2452 	long cpu = (long) file->private_data;
2453 	struct osnoise_variables *osn_var;
2454 	struct timerlat_variables *tlat;
2455 	struct timerlat_sample s;
2456 	s64 diff;
2457 	u64 now;
2458 
2459 	migrate_disable();
2460 
2461 	tlat = this_cpu_tmr_var();
2462 
2463 	/*
2464 	 * While in user-space, the thread is migratable. There is nothing
2465 	 * we can do about it.
2466 	 * So, if the thread is running on another CPU, stop the machinery.
2467 	 */
2468 	if (cpu == smp_processor_id()) {
2469 		if (tlat->uthread_migrate) {
2470 			migrate_enable();
2471 			return -EINVAL;
2472 		}
2473 	} else {
2474 		per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
2475 		osnoise_taint("timerlat user thread migrate\n");
2476 		osnoise_stop_tracing();
2477 		migrate_enable();
2478 		return -EINVAL;
2479 	}
2480 
2481 	osn_var = this_cpu_osn_var();
2482 
2483 	/*
2484 	 * The timerlat in user-space runs in a different order:
2485 	 * the read() starts from the execution of the previous occurrence,
2486 	 * sleeping for the next occurrence.
2487 	 *
2488 	 * So, skip if we are entering on read() before the first wakeup
2489 	 * from timerlat IRQ:
2490 	 */
2491 	if (likely(osn_var->sampling)) {
2492 		now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2493 		diff = now - tlat->abs_period;
2494 
2495 		/*
2496 		 * it was not a timer firing, but some other signal?
2497 		 */
2498 		if (diff < 0)
2499 			goto out;
2500 
2501 		s.seqnum = tlat->count;
2502 		s.timer_latency = diff;
2503 		s.context = THREAD_URET;
2504 
2505 		record_timerlat_sample(&s);
2506 
2507 		notify_new_max_latency(diff);
2508 
2509 		tlat->tracing_thread = false;
2510 		if (osnoise_data.stop_tracing_total)
2511 			if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
2512 				osnoise_stop_tracing();
2513 	} else {
2514 		tlat->tracing_thread = false;
2515 		tlat->kthread = current;
2516 
2517 		/* Annotate now to drift new period */
2518 		tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
2519 
2520 		osn_var->sampling = 1;
2521 	}
2522 
2523 	/* wait for the next period */
2524 	wait_next_period(tlat);
2525 
2526 	/* This is the wakeup from this cycle */
2527 	now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2528 	diff = now - tlat->abs_period;
2529 
2530 	/*
2531 	 * it was not a timer firing, but some other signal?
2532 	 */
2533 	if (diff < 0)
2534 		goto out;
2535 
2536 	s.seqnum = tlat->count;
2537 	s.timer_latency = diff;
2538 	s.context = THREAD_CONTEXT;
2539 
2540 	record_timerlat_sample(&s);
2541 
2542 	if (osnoise_data.stop_tracing_total) {
2543 		if (time_to_us(diff) >= osnoise_data.stop_tracing_total) {
2544 			timerlat_dump_stack(time_to_us(diff));
2545 			notify_new_max_latency(diff);
2546 			osnoise_stop_tracing();
2547 		}
2548 	}
2549 
2550 out:
2551 	migrate_enable();
2552 	return 0;
2553 }
2554 
2555 static int timerlat_fd_release(struct inode *inode, struct file *file)
2556 {
2557 	struct osnoise_variables *osn_var;
2558 	struct timerlat_variables *tlat_var;
2559 	long cpu = (long) file->private_data;
2560 
2561 	migrate_disable();
2562 	mutex_lock(&interface_lock);
2563 
2564 	osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
2565 	tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
2566 
2567 	if (tlat_var->kthread)
2568 		hrtimer_cancel(&tlat_var->timer);
2569 	memset(tlat_var, 0, sizeof(*tlat_var));
2570 
2571 	osn_var->sampling = 0;
2572 	osn_var->pid = 0;
2573 
2574 	/*
2575 	 * We are leaving, not being stopped... see stop_kthread();
2576 	 */
2577 	if (osn_var->kthread) {
2578 		put_task_struct(osn_var->kthread);
2579 		osn_var->kthread = NULL;
2580 	}
2581 
2582 	mutex_unlock(&interface_lock);
2583 	migrate_enable();
2584 	return 0;
2585 }
2586 #endif
2587 
2588 /*
2589  * osnoise/runtime_us: cannot be greater than the period.
2590  */
2591 static struct trace_min_max_param osnoise_runtime = {
2592 	.lock	= &interface_lock,
2593 	.val	= &osnoise_data.sample_runtime,
2594 	.max	= &osnoise_data.sample_period,
2595 	.min	= NULL,
2596 };
2597 
2598 /*
2599  * osnoise/period_us: cannot be smaller than the runtime.
2600  */
2601 static struct trace_min_max_param osnoise_period = {
2602 	.lock	= &interface_lock,
2603 	.val	= &osnoise_data.sample_period,
2604 	.max	= NULL,
2605 	.min	= &osnoise_data.sample_runtime,
2606 };
2607 
2608 /*
2609  * osnoise/stop_tracing_us: no limit.
2610  */
2611 static struct trace_min_max_param osnoise_stop_tracing_in = {
2612 	.lock	= &interface_lock,
2613 	.val	= &osnoise_data.stop_tracing,
2614 	.max	= NULL,
2615 	.min	= NULL,
2616 };
2617 
2618 /*
2619  * osnoise/stop_tracing_total_us: no limit.
2620  */
2621 static struct trace_min_max_param osnoise_stop_tracing_total = {
2622 	.lock	= &interface_lock,
2623 	.val	= &osnoise_data.stop_tracing_total,
2624 	.max	= NULL,
2625 	.min	= NULL,
2626 };
2627 
2628 #ifdef CONFIG_TIMERLAT_TRACER
2629 /*
2630  * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
2631  * latency is higher than val.
2632  */
2633 static struct trace_min_max_param osnoise_print_stack = {
2634 	.lock	= &interface_lock,
2635 	.val	= &osnoise_data.print_stack,
2636 	.max	= NULL,
2637 	.min	= NULL,
2638 };
2639 
2640 /*
2641  * osnoise/timerlat_period: min 100 us, max 1 s
2642  */
2643 static u64 timerlat_min_period = 100;
2644 static u64 timerlat_max_period = 1000000;
2645 static struct trace_min_max_param timerlat_period = {
2646 	.lock	= &interface_lock,
2647 	.val	= &osnoise_data.timerlat_period,
2648 	.max	= &timerlat_max_period,
2649 	.min	= &timerlat_min_period,
2650 };
2651 
2652 static const struct file_operations timerlat_fd_fops = {
2653 	.open		= timerlat_fd_open,
2654 	.read		= timerlat_fd_read,
2655 	.release	= timerlat_fd_release,
2656 	.llseek		= generic_file_llseek,
2657 };
2658 #endif
2659 
2660 static const struct file_operations cpus_fops = {
2661 	.open		= tracing_open_generic,
2662 	.read		= osnoise_cpus_read,
2663 	.write		= osnoise_cpus_write,
2664 	.llseek		= generic_file_llseek,
2665 };
2666 
2667 static const struct file_operations osnoise_options_fops = {
2668 	.open		= osnoise_options_open,
2669 	.read		= seq_read,
2670 	.llseek		= seq_lseek,
2671 	.release	= seq_release,
2672 	.write		= osnoise_options_write
2673 };
2674 
2675 #ifdef CONFIG_TIMERLAT_TRACER
2676 #ifdef CONFIG_STACKTRACE
2677 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2678 {
2679 	struct dentry *tmp;
2680 
2681 	tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
2682 				  &osnoise_print_stack, &trace_min_max_fops);
2683 	if (!tmp)
2684 		return -ENOMEM;
2685 
2686 	return 0;
2687 }
2688 #else /* CONFIG_STACKTRACE */
2689 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2690 {
2691 	return 0;
2692 }
2693 #endif /* CONFIG_STACKTRACE */
2694 
2695 static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir)
2696 {
2697 	struct dentry *timerlat_fd;
2698 	struct dentry *per_cpu;
2699 	struct dentry *cpu_dir;
2700 	char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */
2701 	long cpu;
2702 
2703 	/*
2704 	 * Why not using tracing instance per_cpu/ dir?
2705 	 *
2706 	 * Because osnoise/timerlat have a single workload, having
2707 	 * multiple files like these are wast of memory.
2708 	 */
2709 	per_cpu = tracefs_create_dir("per_cpu", top_dir);
2710 	if (!per_cpu)
2711 		return -ENOMEM;
2712 
2713 	for_each_possible_cpu(cpu) {
2714 		snprintf(cpu_str, 30, "cpu%ld", cpu);
2715 		cpu_dir = tracefs_create_dir(cpu_str, per_cpu);
2716 		if (!cpu_dir)
2717 			goto out_clean;
2718 
2719 		timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ,
2720 						cpu_dir, NULL, &timerlat_fd_fops);
2721 		if (!timerlat_fd)
2722 			goto out_clean;
2723 
2724 		/* Record the CPU */
2725 		d_inode(timerlat_fd)->i_cdev = (void *)(cpu);
2726 	}
2727 
2728 	return 0;
2729 
2730 out_clean:
2731 	tracefs_remove(per_cpu);
2732 	return -ENOMEM;
2733 }
2734 
2735 /*
2736  * init_timerlat_tracefs - A function to initialize the timerlat interface files
2737  */
2738 static int init_timerlat_tracefs(struct dentry *top_dir)
2739 {
2740 	struct dentry *tmp;
2741 	int retval;
2742 
2743 	tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
2744 				  &timerlat_period, &trace_min_max_fops);
2745 	if (!tmp)
2746 		return -ENOMEM;
2747 
2748 	retval = osnoise_create_cpu_timerlat_fd(top_dir);
2749 	if (retval)
2750 		return retval;
2751 
2752 	return init_timerlat_stack_tracefs(top_dir);
2753 }
2754 #else /* CONFIG_TIMERLAT_TRACER */
2755 static int init_timerlat_tracefs(struct dentry *top_dir)
2756 {
2757 	return 0;
2758 }
2759 #endif /* CONFIG_TIMERLAT_TRACER */
2760 
2761 /*
2762  * init_tracefs - A function to initialize the tracefs interface files
2763  *
2764  * This function creates entries in tracefs for "osnoise" and "timerlat".
2765  * It creates these directories in the tracing directory, and within that
2766  * directory the use can change and view the configs.
2767  */
2768 static int init_tracefs(void)
2769 {
2770 	struct dentry *top_dir;
2771 	struct dentry *tmp;
2772 	int ret;
2773 
2774 	ret = tracing_init_dentry();
2775 	if (ret)
2776 		return -ENOMEM;
2777 
2778 	top_dir = tracefs_create_dir("osnoise", NULL);
2779 	if (!top_dir)
2780 		return 0;
2781 
2782 	tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
2783 				  &osnoise_period, &trace_min_max_fops);
2784 	if (!tmp)
2785 		goto err;
2786 
2787 	tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
2788 				  &osnoise_runtime, &trace_min_max_fops);
2789 	if (!tmp)
2790 		goto err;
2791 
2792 	tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
2793 				  &osnoise_stop_tracing_in, &trace_min_max_fops);
2794 	if (!tmp)
2795 		goto err;
2796 
2797 	tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
2798 				  &osnoise_stop_tracing_total, &trace_min_max_fops);
2799 	if (!tmp)
2800 		goto err;
2801 
2802 	tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
2803 	if (!tmp)
2804 		goto err;
2805 
2806 	tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL,
2807 				&osnoise_options_fops);
2808 	if (!tmp)
2809 		goto err;
2810 
2811 	ret = init_timerlat_tracefs(top_dir);
2812 	if (ret)
2813 		goto err;
2814 
2815 	return 0;
2816 
2817 err:
2818 	tracefs_remove(top_dir);
2819 	return -ENOMEM;
2820 }
2821 
2822 static int osnoise_hook_events(void)
2823 {
2824 	int retval;
2825 
2826 	/*
2827 	 * Trace is already hooked, we are re-enabling from
2828 	 * a stop_tracing_*.
2829 	 */
2830 	if (trace_osnoise_callback_enabled)
2831 		return 0;
2832 
2833 	retval = hook_irq_events();
2834 	if (retval)
2835 		return -EINVAL;
2836 
2837 	retval = hook_softirq_events();
2838 	if (retval)
2839 		goto out_unhook_irq;
2840 
2841 	retval = hook_thread_events();
2842 	/*
2843 	 * All fine!
2844 	 */
2845 	if (!retval)
2846 		return 0;
2847 
2848 	unhook_softirq_events();
2849 out_unhook_irq:
2850 	unhook_irq_events();
2851 	return -EINVAL;
2852 }
2853 
2854 static void osnoise_unhook_events(void)
2855 {
2856 	unhook_thread_events();
2857 	unhook_softirq_events();
2858 	unhook_irq_events();
2859 }
2860 
2861 /*
2862  * osnoise_workload_start - start the workload and hook to events
2863  */
2864 static int osnoise_workload_start(void)
2865 {
2866 	int retval;
2867 
2868 	/*
2869 	 * Instances need to be registered after calling workload
2870 	 * start. Hence, if there is already an instance, the
2871 	 * workload was already registered. Otherwise, this
2872 	 * code is on the way to register the first instance,
2873 	 * and the workload will start.
2874 	 */
2875 	if (osnoise_has_registered_instances())
2876 		return 0;
2877 
2878 	osn_var_reset_all();
2879 
2880 	retval = osnoise_hook_events();
2881 	if (retval)
2882 		return retval;
2883 
2884 	/*
2885 	 * Make sure that ftrace_nmi_enter/exit() see reset values
2886 	 * before enabling trace_osnoise_callback_enabled.
2887 	 */
2888 	barrier();
2889 	trace_osnoise_callback_enabled = true;
2890 
2891 	retval = start_per_cpu_kthreads();
2892 	if (retval) {
2893 		trace_osnoise_callback_enabled = false;
2894 		/*
2895 		 * Make sure that ftrace_nmi_enter/exit() see
2896 		 * trace_osnoise_callback_enabled as false before continuing.
2897 		 */
2898 		barrier();
2899 
2900 		osnoise_unhook_events();
2901 		return retval;
2902 	}
2903 
2904 	return 0;
2905 }
2906 
2907 /*
2908  * osnoise_workload_stop - stop the workload and unhook the events
2909  */
2910 static void osnoise_workload_stop(void)
2911 {
2912 	/*
2913 	 * Instances need to be unregistered before calling
2914 	 * stop. Hence, if there is a registered instance, more
2915 	 * than one instance is running, and the workload will not
2916 	 * yet stop. Otherwise, this code is on the way to disable
2917 	 * the last instance, and the workload can stop.
2918 	 */
2919 	if (osnoise_has_registered_instances())
2920 		return;
2921 
2922 	/*
2923 	 * If callbacks were already disabled in a previous stop
2924 	 * call, there is no need to disable then again.
2925 	 *
2926 	 * For instance, this happens when tracing is stopped via:
2927 	 * echo 0 > tracing_on
2928 	 * echo nop > current_tracer.
2929 	 */
2930 	if (!trace_osnoise_callback_enabled)
2931 		return;
2932 
2933 	trace_osnoise_callback_enabled = false;
2934 	/*
2935 	 * Make sure that ftrace_nmi_enter/exit() see
2936 	 * trace_osnoise_callback_enabled as false before continuing.
2937 	 */
2938 	barrier();
2939 
2940 	stop_per_cpu_kthreads();
2941 
2942 	osnoise_unhook_events();
2943 }
2944 
2945 static void osnoise_tracer_start(struct trace_array *tr)
2946 {
2947 	int retval;
2948 
2949 	/*
2950 	 * If the instance is already registered, there is no need to
2951 	 * register it again.
2952 	 */
2953 	if (osnoise_instance_registered(tr))
2954 		return;
2955 
2956 	retval = osnoise_workload_start();
2957 	if (retval)
2958 		pr_err(BANNER "Error starting osnoise tracer\n");
2959 
2960 	osnoise_register_instance(tr);
2961 }
2962 
2963 static void osnoise_tracer_stop(struct trace_array *tr)
2964 {
2965 	osnoise_unregister_instance(tr);
2966 	osnoise_workload_stop();
2967 }
2968 
2969 static int osnoise_tracer_init(struct trace_array *tr)
2970 {
2971 	/*
2972 	 * Only allow osnoise tracer if timerlat tracer is not running
2973 	 * already.
2974 	 */
2975 	if (timerlat_enabled())
2976 		return -EBUSY;
2977 
2978 	tr->max_latency = 0;
2979 
2980 	osnoise_tracer_start(tr);
2981 	return 0;
2982 }
2983 
2984 static void osnoise_tracer_reset(struct trace_array *tr)
2985 {
2986 	osnoise_tracer_stop(tr);
2987 }
2988 
2989 static struct tracer osnoise_tracer __read_mostly = {
2990 	.name		= "osnoise",
2991 	.init		= osnoise_tracer_init,
2992 	.reset		= osnoise_tracer_reset,
2993 	.start		= osnoise_tracer_start,
2994 	.stop		= osnoise_tracer_stop,
2995 	.print_header	= print_osnoise_headers,
2996 	.allow_instances = true,
2997 };
2998 
2999 #ifdef CONFIG_TIMERLAT_TRACER
3000 static void timerlat_tracer_start(struct trace_array *tr)
3001 {
3002 	int retval;
3003 
3004 	/*
3005 	 * If the instance is already registered, there is no need to
3006 	 * register it again.
3007 	 */
3008 	if (osnoise_instance_registered(tr))
3009 		return;
3010 
3011 	retval = osnoise_workload_start();
3012 	if (retval)
3013 		pr_err(BANNER "Error starting timerlat tracer\n");
3014 
3015 	osnoise_register_instance(tr);
3016 
3017 	return;
3018 }
3019 
3020 static void timerlat_tracer_stop(struct trace_array *tr)
3021 {
3022 	int cpu;
3023 
3024 	osnoise_unregister_instance(tr);
3025 
3026 	/*
3027 	 * Instruct the threads to stop only if this is the last instance.
3028 	 */
3029 	if (!osnoise_has_registered_instances()) {
3030 		for_each_online_cpu(cpu)
3031 			per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
3032 	}
3033 
3034 	osnoise_workload_stop();
3035 }
3036 
3037 static int timerlat_tracer_init(struct trace_array *tr)
3038 {
3039 	/*
3040 	 * Only allow timerlat tracer if osnoise tracer is not running already.
3041 	 */
3042 	if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
3043 		return -EBUSY;
3044 
3045 	/*
3046 	 * If this is the first instance, set timerlat_tracer to block
3047 	 * osnoise tracer start.
3048 	 */
3049 	if (!osnoise_has_registered_instances())
3050 		osnoise_data.timerlat_tracer = 1;
3051 
3052 	tr->max_latency = 0;
3053 	timerlat_tracer_start(tr);
3054 
3055 	return 0;
3056 }
3057 
3058 static void timerlat_tracer_reset(struct trace_array *tr)
3059 {
3060 	timerlat_tracer_stop(tr);
3061 
3062 	/*
3063 	 * If this is the last instance, reset timerlat_tracer allowing
3064 	 * osnoise to be started.
3065 	 */
3066 	if (!osnoise_has_registered_instances())
3067 		osnoise_data.timerlat_tracer = 0;
3068 }
3069 
3070 static struct tracer timerlat_tracer __read_mostly = {
3071 	.name		= "timerlat",
3072 	.init		= timerlat_tracer_init,
3073 	.reset		= timerlat_tracer_reset,
3074 	.start		= timerlat_tracer_start,
3075 	.stop		= timerlat_tracer_stop,
3076 	.print_header	= print_timerlat_headers,
3077 	.allow_instances = true,
3078 };
3079 
3080 __init static int init_timerlat_tracer(void)
3081 {
3082 	return register_tracer(&timerlat_tracer);
3083 }
3084 #else /* CONFIG_TIMERLAT_TRACER */
3085 __init static int init_timerlat_tracer(void)
3086 {
3087 	return 0;
3088 }
3089 #endif /* CONFIG_TIMERLAT_TRACER */
3090 
3091 __init static int init_osnoise_tracer(void)
3092 {
3093 	int ret;
3094 
3095 	mutex_init(&interface_lock);
3096 
3097 	cpumask_copy(&osnoise_cpumask, cpu_all_mask);
3098 
3099 	ret = register_tracer(&osnoise_tracer);
3100 	if (ret) {
3101 		pr_err(BANNER "Error registering osnoise!\n");
3102 		return ret;
3103 	}
3104 
3105 	ret = init_timerlat_tracer();
3106 	if (ret) {
3107 		pr_err(BANNER "Error registering timerlat!\n");
3108 		return ret;
3109 	}
3110 
3111 	osnoise_init_hotplug_support();
3112 
3113 	INIT_LIST_HEAD_RCU(&osnoise_instances);
3114 
3115 	init_tracefs();
3116 
3117 	return 0;
3118 }
3119 late_initcall(init_osnoise_tracer);
3120