1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2025 - Google LLC
4 * Author: Vincent Donnefort <vdonnefort@google.com>
5 */
6
7 #include <linux/kstrtox.h>
8 #include <linux/lockdep.h>
9 #include <linux/mutex.h>
10 #include <linux/tracefs.h>
11 #include <linux/trace_remote.h>
12 #include <linux/trace_seq.h>
13 #include <linux/types.h>
14
15 #include "trace.h"
16
17 #define TRACEFS_DIR "remotes"
18 #define TRACEFS_MODE_WRITE 0640
19 #define TRACEFS_MODE_READ 0440
20
21 enum tri_type {
22 TRI_CONSUMING,
23 TRI_NONCONSUMING,
24 };
25
26 struct trace_remote_iterator {
27 struct trace_remote *remote;
28 struct trace_seq seq;
29 struct delayed_work poll_work;
30 unsigned long lost_events;
31 u64 ts;
32 struct ring_buffer_iter *rb_iter;
33 struct ring_buffer_iter **rb_iters;
34 struct remote_event_hdr *evt;
35 int cpu;
36 int evt_cpu;
37 loff_t pos;
38 enum tri_type type;
39 };
40
41 struct trace_remote {
42 struct trace_remote_callbacks *cbs;
43 void *priv;
44 struct trace_buffer *trace_buffer;
45 struct trace_buffer_desc *trace_buffer_desc;
46 struct dentry *dentry;
47 struct eventfs_inode *eventfs;
48 struct remote_event *events;
49 unsigned long nr_events;
50 unsigned long trace_buffer_size;
51 struct ring_buffer_remote rb_remote;
52 struct mutex lock;
53 struct rw_semaphore reader_lock;
54 struct rw_semaphore *pcpu_reader_locks;
55 unsigned int nr_readers;
56 unsigned int poll_ms;
57 bool tracing_on;
58 };
59
trace_remote_loaded(struct trace_remote * remote)60 static bool trace_remote_loaded(struct trace_remote *remote)
61 {
62 return !!remote->trace_buffer;
63 }
64
trace_remote_load(struct trace_remote * remote)65 static int trace_remote_load(struct trace_remote *remote)
66 {
67 struct ring_buffer_remote *rb_remote = &remote->rb_remote;
68 struct trace_buffer_desc *desc;
69
70 lockdep_assert_held(&remote->lock);
71
72 if (trace_remote_loaded(remote))
73 return 0;
74
75 desc = remote->cbs->load_trace_buffer(remote->trace_buffer_size, remote->priv);
76 if (IS_ERR(desc))
77 return PTR_ERR(desc);
78
79 rb_remote->desc = desc;
80 rb_remote->swap_reader_page = remote->cbs->swap_reader_page;
81 rb_remote->priv = remote->priv;
82 rb_remote->reset = remote->cbs->reset;
83 remote->trace_buffer = ring_buffer_alloc_remote(rb_remote);
84 if (!remote->trace_buffer) {
85 remote->cbs->unload_trace_buffer(desc, remote->priv);
86 return -ENOMEM;
87 }
88
89 remote->trace_buffer_desc = desc;
90
91 return 0;
92 }
93
trace_remote_try_unload(struct trace_remote * remote)94 static void trace_remote_try_unload(struct trace_remote *remote)
95 {
96 lockdep_assert_held(&remote->lock);
97
98 if (!trace_remote_loaded(remote))
99 return;
100
101 /* The buffer is being read or writable */
102 if (remote->nr_readers || remote->tracing_on)
103 return;
104
105 /* The buffer has readable data */
106 if (!ring_buffer_empty(remote->trace_buffer))
107 return;
108
109 ring_buffer_free(remote->trace_buffer);
110 remote->trace_buffer = NULL;
111 remote->cbs->unload_trace_buffer(remote->trace_buffer_desc, remote->priv);
112 }
113
trace_remote_enable_tracing(struct trace_remote * remote)114 static int trace_remote_enable_tracing(struct trace_remote *remote)
115 {
116 int ret;
117
118 lockdep_assert_held(&remote->lock);
119
120 if (remote->tracing_on)
121 return 0;
122
123 ret = trace_remote_load(remote);
124 if (ret)
125 return ret;
126
127 ret = remote->cbs->enable_tracing(true, remote->priv);
128 if (ret) {
129 trace_remote_try_unload(remote);
130 return ret;
131 }
132
133 remote->tracing_on = true;
134
135 return 0;
136 }
137
trace_remote_disable_tracing(struct trace_remote * remote)138 static int trace_remote_disable_tracing(struct trace_remote *remote)
139 {
140 int ret;
141
142 lockdep_assert_held(&remote->lock);
143
144 if (!remote->tracing_on)
145 return 0;
146
147 ret = remote->cbs->enable_tracing(false, remote->priv);
148 if (ret)
149 return ret;
150
151 ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS);
152 remote->tracing_on = false;
153 trace_remote_try_unload(remote);
154
155 return 0;
156 }
157
trace_remote_reset(struct trace_remote * remote,int cpu)158 static void trace_remote_reset(struct trace_remote *remote, int cpu)
159 {
160 lockdep_assert_held(&remote->lock);
161
162 if (!trace_remote_loaded(remote))
163 return;
164
165 if (cpu == RING_BUFFER_ALL_CPUS)
166 ring_buffer_reset(remote->trace_buffer);
167 else
168 ring_buffer_reset_cpu(remote->trace_buffer, cpu);
169
170 trace_remote_try_unload(remote);
171 }
172
173 static ssize_t
tracing_on_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)174 tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
175 {
176 struct seq_file *seq = filp->private_data;
177 struct trace_remote *remote = seq->private;
178 unsigned long val;
179 int ret;
180
181 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
182 if (ret)
183 return ret;
184
185 guard(mutex)(&remote->lock);
186
187 ret = val ? trace_remote_enable_tracing(remote) : trace_remote_disable_tracing(remote);
188 if (ret)
189 return ret;
190
191 return cnt;
192 }
tracing_on_show(struct seq_file * s,void * unused)193 static int tracing_on_show(struct seq_file *s, void *unused)
194 {
195 struct trace_remote *remote = s->private;
196
197 seq_printf(s, "%d\n", remote->tracing_on);
198
199 return 0;
200 }
201 DEFINE_SHOW_STORE_ATTRIBUTE(tracing_on);
202
buffer_size_kb_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)203 static ssize_t buffer_size_kb_write(struct file *filp, const char __user *ubuf, size_t cnt,
204 loff_t *ppos)
205 {
206 struct seq_file *seq = filp->private_data;
207 struct trace_remote *remote = seq->private;
208 unsigned long val;
209 int ret;
210
211 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
212 if (ret)
213 return ret;
214
215 /* KiB to Bytes */
216 if (!val || check_shl_overflow(val, 10, &val))
217 return -EINVAL;
218
219 guard(mutex)(&remote->lock);
220
221 if (trace_remote_loaded(remote))
222 return -EBUSY;
223
224 remote->trace_buffer_size = val;
225
226 return cnt;
227 }
228
buffer_size_kb_show(struct seq_file * s,void * unused)229 static int buffer_size_kb_show(struct seq_file *s, void *unused)
230 {
231 struct trace_remote *remote = s->private;
232
233 seq_printf(s, "%lu (%s)\n", remote->trace_buffer_size >> 10,
234 trace_remote_loaded(remote) ? "loaded" : "unloaded");
235
236 return 0;
237 }
238 DEFINE_SHOW_STORE_ATTRIBUTE(buffer_size_kb);
239
trace_remote_get(struct trace_remote * remote,int cpu)240 static int trace_remote_get(struct trace_remote *remote, int cpu)
241 {
242 int ret;
243
244 if (remote->nr_readers == UINT_MAX)
245 return -EBUSY;
246
247 ret = trace_remote_load(remote);
248 if (ret)
249 return ret;
250
251 if (cpu != RING_BUFFER_ALL_CPUS && !remote->pcpu_reader_locks) {
252 int lock_cpu;
253
254 remote->pcpu_reader_locks = kcalloc(nr_cpu_ids, sizeof(*remote->pcpu_reader_locks),
255 GFP_KERNEL);
256 if (!remote->pcpu_reader_locks) {
257 trace_remote_try_unload(remote);
258 return -ENOMEM;
259 }
260
261 for_each_possible_cpu(lock_cpu)
262 init_rwsem(&remote->pcpu_reader_locks[lock_cpu]);
263 }
264
265 remote->nr_readers++;
266
267 return 0;
268 }
269
trace_remote_put(struct trace_remote * remote)270 static void trace_remote_put(struct trace_remote *remote)
271 {
272 if (WARN_ON(!remote->nr_readers))
273 return;
274
275 remote->nr_readers--;
276 if (remote->nr_readers)
277 return;
278
279 kfree(remote->pcpu_reader_locks);
280 remote->pcpu_reader_locks = NULL;
281
282 trace_remote_try_unload(remote);
283 }
284
__poll_remote(struct work_struct * work)285 static void __poll_remote(struct work_struct *work)
286 {
287 struct delayed_work *dwork = to_delayed_work(work);
288 struct trace_remote_iterator *iter;
289
290 iter = container_of(dwork, struct trace_remote_iterator, poll_work);
291 ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu);
292 schedule_delayed_work((struct delayed_work *)work,
293 msecs_to_jiffies(iter->remote->poll_ms));
294 }
295
__free_ring_buffer_iter(struct trace_remote_iterator * iter,int cpu)296 static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
297 {
298 if (cpu != RING_BUFFER_ALL_CPUS) {
299 ring_buffer_read_finish(iter->rb_iter);
300 return;
301 }
302
303 for_each_possible_cpu(cpu) {
304 if (iter->rb_iters[cpu])
305 ring_buffer_read_finish(iter->rb_iters[cpu]);
306 }
307
308 kfree(iter->rb_iters);
309 }
310
__alloc_ring_buffer_iter(struct trace_remote_iterator * iter,int cpu)311 static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
312 {
313 if (cpu != RING_BUFFER_ALL_CPUS) {
314 iter->rb_iter = ring_buffer_read_start(iter->remote->trace_buffer, cpu, GFP_KERNEL);
315
316 return iter->rb_iter ? 0 : -ENOMEM;
317 }
318
319 iter->rb_iters = kcalloc(nr_cpu_ids, sizeof(*iter->rb_iters), GFP_KERNEL);
320 if (!iter->rb_iters)
321 return -ENOMEM;
322
323 for_each_possible_cpu(cpu) {
324 iter->rb_iters[cpu] = ring_buffer_read_start(iter->remote->trace_buffer, cpu,
325 GFP_KERNEL);
326 if (!iter->rb_iters[cpu]) {
327 __free_ring_buffer_iter(iter, RING_BUFFER_ALL_CPUS);
328 return -ENOMEM;
329 }
330 }
331
332 return 0;
333 }
334
335 static struct trace_remote_iterator
trace_remote_iter(struct trace_remote * remote,int cpu,enum tri_type type)336 *trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type)
337 {
338 struct trace_remote_iterator *iter = NULL;
339 int ret;
340
341 lockdep_assert_held(&remote->lock);
342
343 if (type == TRI_NONCONSUMING && !trace_remote_loaded(remote))
344 return NULL;
345
346 ret = trace_remote_get(remote, cpu);
347 if (ret)
348 return ERR_PTR(ret);
349
350 /* Test the CPU */
351 ret = ring_buffer_poll_remote(remote->trace_buffer, cpu);
352 if (ret)
353 goto err;
354
355 iter = kzalloc_obj(*iter);
356 if (iter) {
357 iter->remote = remote;
358 iter->cpu = cpu;
359 iter->type = type;
360 trace_seq_init(&iter->seq);
361
362 switch (type) {
363 case TRI_CONSUMING:
364 INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
365 schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms));
366 break;
367 case TRI_NONCONSUMING:
368 ret = __alloc_ring_buffer_iter(iter, cpu);
369 break;
370 }
371
372 if (ret)
373 goto err;
374
375 return iter;
376 }
377 ret = -ENOMEM;
378
379 err:
380 kfree(iter);
381 trace_remote_put(remote);
382
383 return ERR_PTR(ret);
384 }
385
trace_remote_iter_free(struct trace_remote_iterator * iter)386 static void trace_remote_iter_free(struct trace_remote_iterator *iter)
387 {
388 struct trace_remote *remote;
389
390 if (!iter)
391 return;
392
393 remote = iter->remote;
394
395 lockdep_assert_held(&remote->lock);
396
397 switch (iter->type) {
398 case TRI_CONSUMING:
399 cancel_delayed_work_sync(&iter->poll_work);
400 break;
401 case TRI_NONCONSUMING:
402 __free_ring_buffer_iter(iter, iter->cpu);
403 break;
404 }
405
406 kfree(iter);
407 trace_remote_put(remote);
408 }
409
trace_remote_iter_read_start(struct trace_remote_iterator * iter)410 static void trace_remote_iter_read_start(struct trace_remote_iterator *iter)
411 {
412 struct trace_remote *remote = iter->remote;
413 int cpu = iter->cpu;
414
415 /* Acquire global reader lock */
416 if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
417 down_write(&remote->reader_lock);
418 else
419 down_read(&remote->reader_lock);
420
421 if (cpu == RING_BUFFER_ALL_CPUS)
422 return;
423
424 /*
425 * No need for the remote lock here, iter holds a reference on
426 * remote->nr_readers
427 */
428
429 /* Get the per-CPU one */
430 if (WARN_ON_ONCE(!remote->pcpu_reader_locks))
431 return;
432
433 if (iter->type == TRI_CONSUMING)
434 down_write(&remote->pcpu_reader_locks[cpu]);
435 else
436 down_read(&remote->pcpu_reader_locks[cpu]);
437 }
438
trace_remote_iter_read_finished(struct trace_remote_iterator * iter)439 static void trace_remote_iter_read_finished(struct trace_remote_iterator *iter)
440 {
441 struct trace_remote *remote = iter->remote;
442 int cpu = iter->cpu;
443
444 /* Release per-CPU reader lock */
445 if (cpu != RING_BUFFER_ALL_CPUS) {
446 /*
447 * No need for the remote lock here, iter holds a reference on
448 * remote->nr_readers
449 */
450 if (iter->type == TRI_CONSUMING)
451 up_write(&remote->pcpu_reader_locks[cpu]);
452 else
453 up_read(&remote->pcpu_reader_locks[cpu]);
454 }
455
456 /* Release global reader lock */
457 if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
458 up_write(&remote->reader_lock);
459 else
460 up_read(&remote->reader_lock);
461 }
462
__get_rb_iter(struct trace_remote_iterator * iter,int cpu)463 static struct ring_buffer_iter *__get_rb_iter(struct trace_remote_iterator *iter, int cpu)
464 {
465 return iter->cpu != RING_BUFFER_ALL_CPUS ? iter->rb_iter : iter->rb_iters[cpu];
466 }
467
468 static struct ring_buffer_event *
__peek_event(struct trace_remote_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)469 __peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events)
470 {
471 struct ring_buffer_event *rb_evt;
472 struct ring_buffer_iter *rb_iter;
473
474 switch (iter->type) {
475 case TRI_CONSUMING:
476 return ring_buffer_peek(iter->remote->trace_buffer, cpu, ts, lost_events);
477 case TRI_NONCONSUMING:
478 rb_iter = __get_rb_iter(iter, cpu);
479 rb_evt = ring_buffer_iter_peek(rb_iter, ts);
480 if (!rb_evt)
481 return NULL;
482
483 *lost_events = ring_buffer_iter_dropped(rb_iter);
484
485 return rb_evt;
486 }
487
488 return NULL;
489 }
490
trace_remote_iter_read_event(struct trace_remote_iterator * iter)491 static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
492 {
493 struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
494 struct ring_buffer_event *rb_evt;
495 int cpu = iter->cpu;
496
497 if (cpu != RING_BUFFER_ALL_CPUS) {
498 if (ring_buffer_empty_cpu(trace_buffer, cpu))
499 return false;
500
501 rb_evt = __peek_event(iter, cpu, &iter->ts, &iter->lost_events);
502 if (!rb_evt)
503 return false;
504
505 iter->evt_cpu = cpu;
506 iter->evt = ring_buffer_event_data(rb_evt);
507 return true;
508 }
509
510 iter->ts = U64_MAX;
511 for_each_possible_cpu(cpu) {
512 unsigned long lost_events;
513 u64 ts;
514
515 if (ring_buffer_empty_cpu(trace_buffer, cpu))
516 continue;
517
518 rb_evt = __peek_event(iter, cpu, &ts, &lost_events);
519 if (!rb_evt)
520 continue;
521
522 if (ts >= iter->ts)
523 continue;
524
525 iter->ts = ts;
526 iter->evt_cpu = cpu;
527 iter->evt = ring_buffer_event_data(rb_evt);
528 iter->lost_events = lost_events;
529 }
530
531 return iter->ts != U64_MAX;
532 }
533
trace_remote_iter_move(struct trace_remote_iterator * iter)534 static void trace_remote_iter_move(struct trace_remote_iterator *iter)
535 {
536 struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
537
538 switch (iter->type) {
539 case TRI_CONSUMING:
540 ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
541 break;
542 case TRI_NONCONSUMING:
543 ring_buffer_iter_advance(__get_rb_iter(iter, iter->evt_cpu));
544 break;
545 }
546 }
547
548 static struct remote_event *trace_remote_find_event(struct trace_remote *remote, unsigned short id);
549
trace_remote_iter_print_event(struct trace_remote_iterator * iter)550 static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
551 {
552 struct remote_event *evt;
553 unsigned long usecs_rem;
554 u64 ts = iter->ts;
555
556 if (iter->lost_events)
557 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
558 iter->evt_cpu, iter->lost_events);
559
560 do_div(ts, 1000);
561 usecs_rem = do_div(ts, USEC_PER_SEC);
562
563 trace_seq_printf(&iter->seq, "[%03d]\t%5llu.%06lu: ", iter->evt_cpu,
564 ts, usecs_rem);
565
566 evt = trace_remote_find_event(iter->remote, iter->evt->id);
567 if (!evt)
568 trace_seq_printf(&iter->seq, "UNKNOWN id=%d\n", iter->evt->id);
569 else
570 evt->print(iter->evt, &iter->seq);
571
572 return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0;
573 }
574
trace_pipe_open(struct inode * inode,struct file * filp)575 static int trace_pipe_open(struct inode *inode, struct file *filp)
576 {
577 struct trace_remote *remote = inode->i_private;
578 struct trace_remote_iterator *iter;
579 int cpu = tracing_get_cpu(inode);
580
581 guard(mutex)(&remote->lock);
582
583 iter = trace_remote_iter(remote, cpu, TRI_CONSUMING);
584 if (IS_ERR(iter))
585 return PTR_ERR(iter);
586
587 filp->private_data = iter;
588
589 return IS_ERR(iter) ? PTR_ERR(iter) : 0;
590 }
591
trace_pipe_release(struct inode * inode,struct file * filp)592 static int trace_pipe_release(struct inode *inode, struct file *filp)
593 {
594 struct trace_remote_iterator *iter = filp->private_data;
595 struct trace_remote *remote = iter->remote;
596
597 guard(mutex)(&remote->lock);
598
599 trace_remote_iter_free(iter);
600
601 return 0;
602 }
603
trace_pipe_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)604 static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
605 {
606 struct trace_remote_iterator *iter = filp->private_data;
607 struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
608 int ret;
609
610 copy_to_user:
611 ret = trace_seq_to_user(&iter->seq, ubuf, cnt);
612 if (ret != -EBUSY)
613 return ret;
614
615 trace_seq_init(&iter->seq);
616
617 ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL);
618 if (ret < 0)
619 return ret;
620
621 trace_remote_iter_read_start(iter);
622
623 while (trace_remote_iter_read_event(iter)) {
624 int prev_len = iter->seq.seq.len;
625
626 if (trace_remote_iter_print_event(iter)) {
627 iter->seq.seq.len = prev_len;
628 break;
629 }
630
631 trace_remote_iter_move(iter);
632 }
633
634 trace_remote_iter_read_finished(iter);
635
636 goto copy_to_user;
637 }
638
639 static const struct file_operations trace_pipe_fops = {
640 .open = trace_pipe_open,
641 .read = trace_pipe_read,
642 .release = trace_pipe_release,
643 };
644
trace_next(struct seq_file * m,void * v,loff_t * pos)645 static void *trace_next(struct seq_file *m, void *v, loff_t *pos)
646 {
647 struct trace_remote_iterator *iter = m->private;
648
649 ++*pos;
650
651 if (!iter || !trace_remote_iter_read_event(iter))
652 return NULL;
653
654 trace_remote_iter_move(iter);
655 iter->pos++;
656
657 return iter;
658 }
659
trace_start(struct seq_file * m,loff_t * pos)660 static void *trace_start(struct seq_file *m, loff_t *pos)
661 {
662 struct trace_remote_iterator *iter = m->private;
663 loff_t i;
664
665 if (!iter)
666 return NULL;
667
668 trace_remote_iter_read_start(iter);
669
670 if (!*pos) {
671 iter->pos = -1;
672 return trace_next(m, NULL, &i);
673 }
674
675 i = iter->pos;
676 while (i < *pos) {
677 iter = trace_next(m, NULL, &i);
678 if (!iter)
679 return NULL;
680 }
681
682 return iter;
683 }
684
trace_show(struct seq_file * m,void * v)685 static int trace_show(struct seq_file *m, void *v)
686 {
687 struct trace_remote_iterator *iter = v;
688
689 trace_seq_init(&iter->seq);
690
691 if (trace_remote_iter_print_event(iter)) {
692 seq_printf(m, "[EVENT %d PRINT TOO BIG]\n", iter->evt->id);
693 return 0;
694 }
695
696 return trace_print_seq(m, &iter->seq);
697 }
698
trace_stop(struct seq_file * m,void * v)699 static void trace_stop(struct seq_file *m, void *v)
700 {
701 struct trace_remote_iterator *iter = m->private;
702
703 if (iter)
704 trace_remote_iter_read_finished(iter);
705 }
706
707 static const struct seq_operations trace_sops = {
708 .start = trace_start,
709 .next = trace_next,
710 .show = trace_show,
711 .stop = trace_stop,
712 };
713
trace_open(struct inode * inode,struct file * filp)714 static int trace_open(struct inode *inode, struct file *filp)
715 {
716 struct trace_remote *remote = inode->i_private;
717 struct trace_remote_iterator *iter = NULL;
718 int cpu = tracing_get_cpu(inode);
719 int ret;
720
721 if (!(filp->f_mode & FMODE_READ))
722 return 0;
723
724 guard(mutex)(&remote->lock);
725
726 iter = trace_remote_iter(remote, cpu, TRI_NONCONSUMING);
727 if (IS_ERR(iter))
728 return PTR_ERR(iter);
729
730 ret = seq_open(filp, &trace_sops);
731 if (ret) {
732 trace_remote_iter_free(iter);
733 return ret;
734 }
735
736 ((struct seq_file *)filp->private_data)->private = (void *)iter;
737
738 return 0;
739 }
740
trace_release(struct inode * inode,struct file * filp)741 static int trace_release(struct inode *inode, struct file *filp)
742 {
743 struct trace_remote_iterator *iter;
744
745 if (!(filp->f_mode & FMODE_READ))
746 return 0;
747
748 iter = ((struct seq_file *)filp->private_data)->private;
749 seq_release(inode, filp);
750
751 if (!iter)
752 return 0;
753
754 guard(mutex)(&iter->remote->lock);
755
756 trace_remote_iter_free(iter);
757
758 return 0;
759 }
760
trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)761 static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
762 {
763 struct inode *inode = file_inode(filp);
764 struct trace_remote *remote = inode->i_private;
765 int cpu = tracing_get_cpu(inode);
766
767 guard(mutex)(&remote->lock);
768
769 trace_remote_reset(remote, cpu);
770
771 return cnt;
772 }
773
774 static const struct file_operations trace_fops = {
775 .open = trace_open,
776 .write = trace_write,
777 .read = seq_read,
778 .read_iter = seq_read_iter,
779 .release = trace_release,
780 };
781
trace_remote_init_tracefs(const char * name,struct trace_remote * remote)782 static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote)
783 {
784 struct dentry *remote_d, *percpu_d, *d;
785 static struct dentry *root;
786 static DEFINE_MUTEX(lock);
787 bool root_inited = false;
788 int cpu;
789
790 guard(mutex)(&lock);
791
792 if (!root) {
793 root = tracefs_create_dir(TRACEFS_DIR, NULL);
794 if (!root) {
795 pr_err("Failed to create tracefs dir "TRACEFS_DIR"\n");
796 return -ENOMEM;
797 }
798 root_inited = true;
799 }
800
801 remote_d = tracefs_create_dir(name, root);
802 if (!remote_d) {
803 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/\n", name);
804 goto err;
805 }
806
807 d = trace_create_file("tracing_on", TRACEFS_MODE_WRITE, remote_d, remote, &tracing_on_fops);
808 if (!d)
809 goto err;
810
811 d = trace_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, remote_d, remote,
812 &buffer_size_kb_fops);
813 if (!d)
814 goto err;
815
816 d = trace_create_file("trace_pipe", TRACEFS_MODE_READ, remote_d, remote, &trace_pipe_fops);
817 if (!d)
818 goto err;
819
820 d = trace_create_file("trace", TRACEFS_MODE_WRITE, remote_d, remote, &trace_fops);
821 if (!d)
822 goto err;
823
824 percpu_d = tracefs_create_dir("per_cpu", remote_d);
825 if (!percpu_d) {
826 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/per_cpu/\n", name);
827 goto err;
828 }
829
830 for_each_possible_cpu(cpu) {
831 struct dentry *cpu_d;
832 char cpu_name[16];
833
834 snprintf(cpu_name, sizeof(cpu_name), "cpu%d", cpu);
835 cpu_d = tracefs_create_dir(cpu_name, percpu_d);
836 if (!cpu_d) {
837 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/percpu/cpu%d\n",
838 name, cpu);
839 goto err;
840 }
841
842 d = trace_create_cpu_file("trace_pipe", TRACEFS_MODE_READ, cpu_d, remote, cpu,
843 &trace_pipe_fops);
844 if (!d)
845 goto err;
846
847 d = trace_create_cpu_file("trace", TRACEFS_MODE_WRITE, cpu_d, remote, cpu,
848 &trace_fops);
849 if (!d)
850 goto err;
851 }
852
853 remote->dentry = remote_d;
854
855 return 0;
856
857 err:
858 if (root_inited) {
859 tracefs_remove(root);
860 root = NULL;
861 } else {
862 tracefs_remove(remote_d);
863 }
864
865 return -ENOMEM;
866 }
867
868 static int trace_remote_register_events(const char *remote_name, struct trace_remote *remote,
869 struct remote_event *events, size_t nr_events);
870
871 /**
872 * trace_remote_register() - Register a Tracefs remote
873 * @name: Name of the remote, used for the Tracefs remotes/ directory.
874 * @cbs: Set of callbacks used to control the remote.
875 * @priv: Private data, passed to each callback from @cbs.
876 * @events: Array of events. &remote_event.name and &remote_event.id must be
877 * filled by the caller.
878 * @nr_events: Number of events in the @events array.
879 *
880 * A trace remote is an entity, outside of the kernel (most likely firmware or
881 * hypervisor) capable of writing events into a Tracefs compatible ring-buffer.
882 * The kernel would then act as a reader.
883 *
884 * The registered remote will be found under the Tracefs directory
885 * remotes/<name>.
886 *
887 * Return: 0 on success, negative error code on failure.
888 */
trace_remote_register(const char * name,struct trace_remote_callbacks * cbs,void * priv,struct remote_event * events,size_t nr_events)889 int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv,
890 struct remote_event *events, size_t nr_events)
891 {
892 struct trace_remote *remote;
893 int ret;
894
895 remote = kzalloc_obj(*remote);
896 if (!remote)
897 return -ENOMEM;
898
899 remote->cbs = cbs;
900 remote->priv = priv;
901 remote->trace_buffer_size = 7 << 10;
902 remote->poll_ms = 100;
903 mutex_init(&remote->lock);
904 init_rwsem(&remote->reader_lock);
905
906 if (trace_remote_init_tracefs(name, remote)) {
907 kfree(remote);
908 return -ENOMEM;
909 }
910
911 ret = trace_remote_register_events(name, remote, events, nr_events);
912 if (ret) {
913 pr_err("Failed to register events for trace remote '%s' (%d)\n",
914 name, ret);
915 return ret;
916 }
917
918 ret = cbs->init ? cbs->init(remote->dentry, priv) : 0;
919 if (ret)
920 pr_err("Init failed for trace remote '%s' (%d)\n", name, ret);
921
922 return ret;
923 }
924 EXPORT_SYMBOL_GPL(trace_remote_register);
925
926 /**
927 * trace_remote_free_buffer() - Free trace buffer allocated with trace_remote_alloc_buffer()
928 * @desc: Descriptor of the per-CPU ring-buffers, originally filled by
929 * trace_remote_alloc_buffer()
930 *
931 * Most likely called from &trace_remote_callbacks.unload_trace_buffer.
932 */
trace_remote_free_buffer(struct trace_buffer_desc * desc)933 void trace_remote_free_buffer(struct trace_buffer_desc *desc)
934 {
935 struct ring_buffer_desc *rb_desc;
936 int cpu;
937
938 for_each_ring_buffer_desc(rb_desc, cpu, desc) {
939 unsigned int id;
940
941 free_page(rb_desc->meta_va);
942
943 for (id = 0; id < rb_desc->nr_page_va; id++)
944 free_page(rb_desc->page_va[id]);
945 }
946 }
947 EXPORT_SYMBOL_GPL(trace_remote_free_buffer);
948
949 /**
950 * trace_remote_alloc_buffer() - Dynamically allocate a trace buffer
951 * @desc: Uninitialized trace_buffer_desc
952 * @desc_size: Size of the trace_buffer_desc. Must be at least equal to
953 * trace_buffer_desc_size()
954 * @buffer_size: Size in bytes of each per-CPU ring-buffer
955 * @cpumask: CPUs to allocate a ring-buffer for
956 *
957 * Helper to dynamically allocate a set of pages (enough to cover @buffer_size)
958 * for each CPU from @cpumask and fill @desc. Most likely called from
959 * &trace_remote_callbacks.load_trace_buffer.
960 *
961 * Return: 0 on success, negative error code on failure.
962 */
trace_remote_alloc_buffer(struct trace_buffer_desc * desc,size_t desc_size,size_t buffer_size,const struct cpumask * cpumask)963 int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
964 const struct cpumask *cpumask)
965 {
966 unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1;
967 void *desc_end = desc + desc_size;
968 struct ring_buffer_desc *rb_desc;
969 int cpu, ret = -ENOMEM;
970
971 if (desc_size < struct_size(desc, __data, 0))
972 return -EINVAL;
973
974 desc->nr_cpus = 0;
975 desc->struct_len = struct_size(desc, __data, 0);
976
977 rb_desc = (struct ring_buffer_desc *)&desc->__data[0];
978
979 for_each_cpu(cpu, cpumask) {
980 unsigned int id;
981
982 if ((void *)rb_desc + struct_size(rb_desc, page_va, nr_pages) > desc_end) {
983 ret = -EINVAL;
984 goto err;
985 }
986
987 rb_desc->cpu = cpu;
988 rb_desc->nr_page_va = 0;
989 rb_desc->meta_va = (unsigned long)__get_free_page(GFP_KERNEL);
990 if (!rb_desc->meta_va)
991 goto err;
992
993 for (id = 0; id < nr_pages; id++) {
994 rb_desc->page_va[id] = (unsigned long)__get_free_page(GFP_KERNEL);
995 if (!rb_desc->page_va[id])
996 goto err;
997
998 rb_desc->nr_page_va++;
999 }
1000 desc->nr_cpus++;
1001 desc->struct_len += offsetof(struct ring_buffer_desc, page_va);
1002 desc->struct_len += struct_size(rb_desc, page_va, rb_desc->nr_page_va);
1003 rb_desc = __next_ring_buffer_desc(rb_desc);
1004 }
1005
1006 return 0;
1007
1008 err:
1009 trace_remote_free_buffer(desc);
1010 return ret;
1011 }
1012 EXPORT_SYMBOL_GPL(trace_remote_alloc_buffer);
1013
1014 static int
trace_remote_enable_event(struct trace_remote * remote,struct remote_event * evt,bool enable)1015 trace_remote_enable_event(struct trace_remote *remote, struct remote_event *evt, bool enable)
1016 {
1017 int ret;
1018
1019 lockdep_assert_held(&remote->lock);
1020
1021 if (evt->enabled == enable)
1022 return 0;
1023
1024 ret = remote->cbs->enable_event(evt->id, enable, remote->priv);
1025 if (ret)
1026 return ret;
1027
1028 evt->enabled = enable;
1029
1030 return 0;
1031 }
1032
remote_event_enable_show(struct seq_file * s,void * unused)1033 static int remote_event_enable_show(struct seq_file *s, void *unused)
1034 {
1035 struct remote_event *evt = s->private;
1036
1037 seq_printf(s, "%d\n", evt->enabled);
1038
1039 return 0;
1040 }
1041
remote_event_enable_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)1042 static ssize_t remote_event_enable_write(struct file *filp, const char __user *ubuf,
1043 size_t count, loff_t *ppos)
1044 {
1045 struct seq_file *seq = filp->private_data;
1046 struct remote_event *evt = seq->private;
1047 struct trace_remote *remote = evt->remote;
1048 u8 enable;
1049 int ret;
1050
1051 ret = kstrtou8_from_user(ubuf, count, 10, &enable);
1052 if (ret)
1053 return ret;
1054
1055 guard(mutex)(&remote->lock);
1056
1057 ret = trace_remote_enable_event(remote, evt, enable);
1058 if (ret)
1059 return ret;
1060
1061 return count;
1062 }
1063 DEFINE_SHOW_STORE_ATTRIBUTE(remote_event_enable);
1064
remote_event_id_show(struct seq_file * s,void * unused)1065 static int remote_event_id_show(struct seq_file *s, void *unused)
1066 {
1067 struct remote_event *evt = s->private;
1068
1069 seq_printf(s, "%d\n", evt->id);
1070
1071 return 0;
1072 }
1073 DEFINE_SHOW_ATTRIBUTE(remote_event_id);
1074
remote_event_format_show(struct seq_file * s,void * unused)1075 static int remote_event_format_show(struct seq_file *s, void *unused)
1076 {
1077 size_t offset = sizeof(struct remote_event_hdr);
1078 struct remote_event *evt = s->private;
1079 struct trace_event_fields *field;
1080
1081 seq_printf(s, "name: %s\n", evt->name);
1082 seq_printf(s, "ID: %d\n", evt->id);
1083 seq_puts(s,
1084 "format:\n\tfield:unsigned short common_type;\toffset:0;\tsize:2;\tsigned:0;\n\n");
1085
1086 field = &evt->fields[0];
1087 while (field->name) {
1088 seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%u;\tsigned:%d;\n",
1089 field->type, field->name, offset, field->size,
1090 field->is_signed);
1091 offset += field->size;
1092 field++;
1093 }
1094
1095 if (field != &evt->fields[0])
1096 seq_puts(s, "\n");
1097
1098 seq_printf(s, "print fmt: %s\n", evt->print_fmt);
1099
1100 return 0;
1101 }
1102 DEFINE_SHOW_ATTRIBUTE(remote_event_format);
1103
remote_event_callback(const char * name,umode_t * mode,void ** data,const struct file_operations ** fops)1104 static int remote_event_callback(const char *name, umode_t *mode, void **data,
1105 const struct file_operations **fops)
1106 {
1107 if (!strcmp(name, "enable")) {
1108 *mode = TRACEFS_MODE_WRITE;
1109 *fops = &remote_event_enable_fops;
1110 return 1;
1111 }
1112
1113 if (!strcmp(name, "id")) {
1114 *mode = TRACEFS_MODE_READ;
1115 *fops = &remote_event_id_fops;
1116 return 1;
1117 }
1118
1119 if (!strcmp(name, "format")) {
1120 *mode = TRACEFS_MODE_READ;
1121 *fops = &remote_event_format_fops;
1122 return 1;
1123 }
1124
1125 return 0;
1126 }
1127
remote_events_dir_enable_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)1128 static ssize_t remote_events_dir_enable_write(struct file *filp, const char __user *ubuf,
1129 size_t count, loff_t *ppos)
1130 {
1131 struct trace_remote *remote = file_inode(filp)->i_private;
1132 int i, ret;
1133 u8 enable;
1134
1135 ret = kstrtou8_from_user(ubuf, count, 10, &enable);
1136 if (ret)
1137 return ret;
1138
1139 guard(mutex)(&remote->lock);
1140
1141 for (i = 0; i < remote->nr_events; i++) {
1142 struct remote_event *evt = &remote->events[i];
1143
1144 trace_remote_enable_event(remote, evt, enable);
1145 }
1146
1147 return count;
1148 }
1149
remote_events_dir_enable_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1150 static ssize_t remote_events_dir_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1151 loff_t *ppos)
1152 {
1153 struct trace_remote *remote = file_inode(filp)->i_private;
1154 const char enabled_char[] = {'0', '1', 'X'};
1155 char enabled_str[] = " \n";
1156 int i, enabled = -1;
1157
1158 guard(mutex)(&remote->lock);
1159
1160 for (i = 0; i < remote->nr_events; i++) {
1161 struct remote_event *evt = &remote->events[i];
1162
1163 if (enabled == -1) {
1164 enabled = evt->enabled;
1165 } else if (enabled != evt->enabled) {
1166 enabled = 2;
1167 break;
1168 }
1169 }
1170
1171 enabled_str[0] = enabled_char[enabled == -1 ? 0 : enabled];
1172
1173 return simple_read_from_buffer(ubuf, cnt, ppos, enabled_str, 2);
1174 }
1175
1176 static const struct file_operations remote_events_dir_enable_fops = {
1177 .write = remote_events_dir_enable_write,
1178 .read = remote_events_dir_enable_read,
1179 };
1180
1181 static ssize_t
remote_events_dir_header_page_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1182 remote_events_dir_header_page_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1183 {
1184 struct trace_seq *s;
1185 int ret;
1186
1187 s = kmalloc(sizeof(*s), GFP_KERNEL);
1188 if (!s)
1189 return -ENOMEM;
1190
1191 trace_seq_init(s);
1192
1193 ring_buffer_print_page_header(NULL, s);
1194 ret = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s));
1195 kfree(s);
1196
1197 return ret;
1198 }
1199
1200 static const struct file_operations remote_events_dir_header_page_fops = {
1201 .read = remote_events_dir_header_page_read,
1202 };
1203
1204 static ssize_t
remote_events_dir_header_event_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1205 remote_events_dir_header_event_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1206 {
1207 struct trace_seq *s;
1208 int ret;
1209
1210 s = kmalloc(sizeof(*s), GFP_KERNEL);
1211 if (!s)
1212 return -ENOMEM;
1213
1214 trace_seq_init(s);
1215
1216 ring_buffer_print_entry_header(s);
1217 ret = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s));
1218 kfree(s);
1219
1220 return ret;
1221 }
1222
1223 static const struct file_operations remote_events_dir_header_event_fops = {
1224 .read = remote_events_dir_header_event_read,
1225 };
1226
remote_events_dir_callback(const char * name,umode_t * mode,void ** data,const struct file_operations ** fops)1227 static int remote_events_dir_callback(const char *name, umode_t *mode, void **data,
1228 const struct file_operations **fops)
1229 {
1230 if (!strcmp(name, "enable")) {
1231 *mode = TRACEFS_MODE_WRITE;
1232 *fops = &remote_events_dir_enable_fops;
1233 return 1;
1234 }
1235
1236 if (!strcmp(name, "header_page")) {
1237 *mode = TRACEFS_MODE_READ;
1238 *fops = &remote_events_dir_header_page_fops;
1239 return 1;
1240 }
1241
1242 if (!strcmp(name, "header_event")) {
1243 *mode = TRACEFS_MODE_READ;
1244 *fops = &remote_events_dir_header_event_fops;
1245 return 1;
1246 }
1247
1248 return 0;
1249 }
1250
trace_remote_init_eventfs(const char * remote_name,struct trace_remote * remote,struct remote_event * evt)1251 static int trace_remote_init_eventfs(const char *remote_name, struct trace_remote *remote,
1252 struct remote_event *evt)
1253 {
1254 struct eventfs_inode *eventfs = remote->eventfs;
1255 static struct eventfs_entry dir_entries[] = {
1256 {
1257 .name = "enable",
1258 .callback = remote_events_dir_callback,
1259 }, {
1260 .name = "header_page",
1261 .callback = remote_events_dir_callback,
1262 }, {
1263 .name = "header_event",
1264 .callback = remote_events_dir_callback,
1265 }
1266 };
1267 static struct eventfs_entry entries[] = {
1268 {
1269 .name = "enable",
1270 .callback = remote_event_callback,
1271 }, {
1272 .name = "id",
1273 .callback = remote_event_callback,
1274 }, {
1275 .name = "format",
1276 .callback = remote_event_callback,
1277 }
1278 };
1279 bool eventfs_create = false;
1280
1281 if (!eventfs) {
1282 eventfs = eventfs_create_events_dir("events", remote->dentry, dir_entries,
1283 ARRAY_SIZE(dir_entries), remote);
1284 if (IS_ERR(eventfs))
1285 return PTR_ERR(eventfs);
1286
1287 /*
1288 * Create similar hierarchy as local events even if a single system is supported at
1289 * the moment
1290 */
1291 eventfs = eventfs_create_dir(remote_name, eventfs, NULL, 0, NULL);
1292 if (IS_ERR(eventfs))
1293 return PTR_ERR(eventfs);
1294
1295 remote->eventfs = eventfs;
1296 eventfs_create = true;
1297 }
1298
1299 eventfs = eventfs_create_dir(evt->name, eventfs, entries, ARRAY_SIZE(entries), evt);
1300 if (IS_ERR(eventfs)) {
1301 if (eventfs_create) {
1302 eventfs_remove_events_dir(remote->eventfs);
1303 remote->eventfs = NULL;
1304 }
1305 return PTR_ERR(eventfs);
1306 }
1307
1308 return 0;
1309 }
1310
trace_remote_attach_events(struct trace_remote * remote,struct remote_event * events,size_t nr_events)1311 static int trace_remote_attach_events(struct trace_remote *remote, struct remote_event *events,
1312 size_t nr_events)
1313 {
1314 int i;
1315
1316 for (i = 0; i < nr_events; i++) {
1317 struct remote_event *evt = &events[i];
1318
1319 if (evt->remote)
1320 return -EEXIST;
1321
1322 evt->remote = remote;
1323
1324 /* We need events to be sorted for efficient lookup */
1325 if (i && evt->id <= events[i - 1].id)
1326 return -EINVAL;
1327 }
1328
1329 remote->events = events;
1330 remote->nr_events = nr_events;
1331
1332 return 0;
1333 }
1334
trace_remote_register_events(const char * remote_name,struct trace_remote * remote,struct remote_event * events,size_t nr_events)1335 static int trace_remote_register_events(const char *remote_name, struct trace_remote *remote,
1336 struct remote_event *events, size_t nr_events)
1337 {
1338 int i, ret;
1339
1340 ret = trace_remote_attach_events(remote, events, nr_events);
1341 if (ret)
1342 return ret;
1343
1344 for (i = 0; i < nr_events; i++) {
1345 struct remote_event *evt = &events[i];
1346
1347 ret = trace_remote_init_eventfs(remote_name, remote, evt);
1348 if (ret)
1349 pr_warn("Failed to init eventfs for event '%s' (%d)",
1350 evt->name, ret);
1351 }
1352
1353 return 0;
1354 }
1355
__cmp_events(const void * key,const void * data)1356 static int __cmp_events(const void *key, const void *data)
1357 {
1358 const struct remote_event *evt = data;
1359 int id = (int)((long)key);
1360
1361 return id - (int)evt->id;
1362 }
1363
trace_remote_find_event(struct trace_remote * remote,unsigned short id)1364 static struct remote_event *trace_remote_find_event(struct trace_remote *remote, unsigned short id)
1365 {
1366 return bsearch((const void *)(unsigned long)id, remote->events, remote->nr_events,
1367 sizeof(*remote->events), __cmp_events);
1368 }
1369