xref: /linux/kernel/trace/simple_ring_buffer.c (revision e4bf304f000e6fcceaf60b1455a5124b783b3a66)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2025 - Google LLC
4  * Author: Vincent Donnefort <vdonnefort@google.com>
5  */
6 
7 #include <linux/atomic.h>
8 #include <linux/simple_ring_buffer.h>
9 
10 #include <asm/barrier.h>
11 #include <asm/local.h>
12 
13 enum simple_rb_link_type {
14 	SIMPLE_RB_LINK_NORMAL		= 0,
15 	SIMPLE_RB_LINK_HEAD		= 1,
16 	SIMPLE_RB_LINK_HEAD_MOVING
17 };
18 
19 #define SIMPLE_RB_LINK_MASK ~(SIMPLE_RB_LINK_HEAD | SIMPLE_RB_LINK_HEAD_MOVING)
20 
simple_bpage_set_head_link(struct simple_buffer_page * bpage)21 static void simple_bpage_set_head_link(struct simple_buffer_page *bpage)
22 {
23 	unsigned long link = (unsigned long)bpage->link.next;
24 
25 	link &= SIMPLE_RB_LINK_MASK;
26 	link |= SIMPLE_RB_LINK_HEAD;
27 
28 	/*
29 	 * Paired with simple_rb_find_head() to order access between the head
30 	 * link and overrun. It ensures we always report an up-to-date value
31 	 * after swapping the reader page.
32 	 */
33 	smp_store_release(&bpage->link.next, (struct list_head *)link);
34 }
35 
simple_bpage_unset_head_link(struct simple_buffer_page * bpage,struct simple_buffer_page * dst,enum simple_rb_link_type new_type)36 static bool simple_bpage_unset_head_link(struct simple_buffer_page *bpage,
37 					 struct simple_buffer_page *dst,
38 					 enum simple_rb_link_type new_type)
39 {
40 	unsigned long *link = (unsigned long *)(&bpage->link.next);
41 	unsigned long old = (*link & SIMPLE_RB_LINK_MASK) | SIMPLE_RB_LINK_HEAD;
42 	unsigned long new = (unsigned long)(&dst->link) | new_type;
43 
44 	return try_cmpxchg(link, &old, new);
45 }
46 
simple_bpage_set_normal_link(struct simple_buffer_page * bpage)47 static void simple_bpage_set_normal_link(struct simple_buffer_page *bpage)
48 {
49 	unsigned long link = (unsigned long)bpage->link.next;
50 
51 	WRITE_ONCE(bpage->link.next, (struct list_head *)(link & SIMPLE_RB_LINK_MASK));
52 }
53 
simple_bpage_from_link(struct list_head * link)54 static struct simple_buffer_page *simple_bpage_from_link(struct list_head *link)
55 {
56 	unsigned long ptr = (unsigned long)link & SIMPLE_RB_LINK_MASK;
57 
58 	return container_of((struct list_head *)ptr, struct simple_buffer_page, link);
59 }
60 
simple_bpage_next_page(struct simple_buffer_page * bpage)61 static struct simple_buffer_page *simple_bpage_next_page(struct simple_buffer_page *bpage)
62 {
63 	return simple_bpage_from_link(bpage->link.next);
64 }
65 
simple_bpage_reset(struct simple_buffer_page * bpage)66 static void simple_bpage_reset(struct simple_buffer_page *bpage)
67 {
68 	bpage->write = 0;
69 	bpage->entries = 0;
70 
71 	local_set(&bpage->page->commit, 0);
72 }
73 
simple_bpage_init(struct simple_buffer_page * bpage,void * page)74 static void simple_bpage_init(struct simple_buffer_page *bpage, void *page)
75 {
76 	INIT_LIST_HEAD(&bpage->link);
77 	bpage->page = (struct buffer_data_page *)page;
78 
79 	simple_bpage_reset(bpage);
80 }
81 
82 #define simple_rb_meta_inc(__meta, __inc)		\
83 	WRITE_ONCE((__meta), (__meta + __inc))
84 
simple_rb_loaded(struct simple_rb_per_cpu * cpu_buffer)85 static bool simple_rb_loaded(struct simple_rb_per_cpu *cpu_buffer)
86 {
87 	return !!cpu_buffer->bpages;
88 }
89 
simple_rb_find_head(struct simple_rb_per_cpu * cpu_buffer)90 static int simple_rb_find_head(struct simple_rb_per_cpu *cpu_buffer)
91 {
92 	int retry = cpu_buffer->nr_pages * 2;
93 	struct simple_buffer_page *head;
94 
95 	head = cpu_buffer->head_page;
96 
97 	while (retry--) {
98 		unsigned long link;
99 
100 spin:
101 		/* See smp_store_release in simple_bpage_set_head_link() */
102 		link = (unsigned long)smp_load_acquire(&head->link.prev->next);
103 
104 		switch (link & ~SIMPLE_RB_LINK_MASK) {
105 		/* Found the head */
106 		case SIMPLE_RB_LINK_HEAD:
107 			cpu_buffer->head_page = head;
108 			return 0;
109 		/* The writer caught the head, we can spin, that won't be long */
110 		case SIMPLE_RB_LINK_HEAD_MOVING:
111 			goto spin;
112 		}
113 
114 		head = simple_bpage_next_page(head);
115 	}
116 
117 	return -EBUSY;
118 }
119 
120 /**
121  * simple_ring_buffer_swap_reader_page - Swap ring-buffer head with the reader
122  * @cpu_buffer: A simple_rb_per_cpu
123  *
124  * This function enables consuming reading. It ensures the current head page will not be overwritten
125  * and can be safely read.
126  *
127  * Returns 0 on success, -ENODEV if @cpu_buffer was unloaded or -EBUSY if we failed to catch the
128  * head page.
129  */
simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu * cpu_buffer)130 int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer)
131 {
132 	struct simple_buffer_page *last, *head, *reader;
133 	unsigned long overrun;
134 	int retry = 8;
135 	int ret;
136 
137 	if (!simple_rb_loaded(cpu_buffer))
138 		return -ENODEV;
139 
140 	reader = cpu_buffer->reader_page;
141 
142 	do {
143 		/* Run after the writer to find the head */
144 		ret = simple_rb_find_head(cpu_buffer);
145 		if (ret)
146 			return ret;
147 
148 		head = cpu_buffer->head_page;
149 
150 		/* Connect the reader page around the header page */
151 		reader->link.next = head->link.next;
152 		reader->link.prev = head->link.prev;
153 
154 		/* The last page before the head */
155 		last = simple_bpage_from_link(head->link.prev);
156 
157 		/* The reader page points to the new header page */
158 		simple_bpage_set_head_link(reader);
159 
160 		overrun = cpu_buffer->meta->overrun;
161 	} while (!simple_bpage_unset_head_link(last, reader, SIMPLE_RB_LINK_NORMAL) && retry--);
162 
163 	if (!retry)
164 		return -EINVAL;
165 
166 	cpu_buffer->head_page = simple_bpage_from_link(reader->link.next);
167 	cpu_buffer->head_page->link.prev = &reader->link;
168 	cpu_buffer->reader_page = head;
169 	cpu_buffer->meta->reader.lost_events = overrun - cpu_buffer->last_overrun;
170 	cpu_buffer->meta->reader.id = cpu_buffer->reader_page->id;
171 	cpu_buffer->last_overrun = overrun;
172 
173 	return 0;
174 }
175 EXPORT_SYMBOL_GPL(simple_ring_buffer_swap_reader_page);
176 
simple_rb_move_tail(struct simple_rb_per_cpu * cpu_buffer)177 static struct simple_buffer_page *simple_rb_move_tail(struct simple_rb_per_cpu *cpu_buffer)
178 {
179 	struct simple_buffer_page *tail, *new_tail;
180 
181 	tail = cpu_buffer->tail_page;
182 	new_tail = simple_bpage_next_page(tail);
183 
184 	if (simple_bpage_unset_head_link(tail, new_tail, SIMPLE_RB_LINK_HEAD_MOVING)) {
185 		/*
186 		 * Oh no! we've caught the head. There is none anymore and
187 		 * swap_reader will spin until we set the new one. Overrun must
188 		 * be written first, to make sure we report the correct number
189 		 * of lost events.
190 		 */
191 		simple_rb_meta_inc(cpu_buffer->meta->overrun, new_tail->entries);
192 		simple_rb_meta_inc(cpu_buffer->meta->pages_lost, 1);
193 
194 		simple_bpage_set_head_link(new_tail);
195 		simple_bpage_set_normal_link(tail);
196 	}
197 
198 	simple_bpage_reset(new_tail);
199 	cpu_buffer->tail_page = new_tail;
200 
201 	simple_rb_meta_inc(cpu_buffer->meta->pages_touched, 1);
202 
203 	return new_tail;
204 }
205 
rb_event_size(unsigned long length)206 static unsigned long rb_event_size(unsigned long length)
207 {
208 	struct ring_buffer_event *event;
209 
210 	return length + RB_EVNT_HDR_SIZE + sizeof(event->array[0]);
211 }
212 
213 static struct ring_buffer_event *
rb_event_add_ts_extend(struct ring_buffer_event * event,u64 delta)214 rb_event_add_ts_extend(struct ring_buffer_event *event, u64 delta)
215 {
216 	event->type_len = RINGBUF_TYPE_TIME_EXTEND;
217 	event->time_delta = delta & TS_MASK;
218 	event->array[0] = delta >> TS_SHIFT;
219 
220 	return (struct ring_buffer_event *)((unsigned long)event + 8);
221 }
222 
223 static struct ring_buffer_event *
simple_rb_reserve_next(struct simple_rb_per_cpu * cpu_buffer,unsigned long length,u64 timestamp)224 simple_rb_reserve_next(struct simple_rb_per_cpu *cpu_buffer, unsigned long length, u64 timestamp)
225 {
226 	unsigned long ts_ext_size = 0, event_size = rb_event_size(length);
227 	struct simple_buffer_page *tail = cpu_buffer->tail_page;
228 	struct ring_buffer_event *event;
229 	u32 write, prev_write;
230 	u64 time_delta;
231 
232 	time_delta = timestamp - cpu_buffer->write_stamp;
233 
234 	if (test_time_stamp(time_delta))
235 		ts_ext_size = 8;
236 
237 	prev_write = tail->write;
238 	write = prev_write + event_size + ts_ext_size;
239 
240 	if (unlikely(write > (PAGE_SIZE - BUF_PAGE_HDR_SIZE)))
241 		tail = simple_rb_move_tail(cpu_buffer);
242 
243 	if (!tail->entries) {
244 		tail->page->time_stamp = timestamp;
245 		time_delta = 0;
246 		ts_ext_size = 0;
247 		write = event_size;
248 		prev_write = 0;
249 	}
250 
251 	tail->write = write;
252 	tail->entries++;
253 
254 	cpu_buffer->write_stamp = timestamp;
255 
256 	event = (struct ring_buffer_event *)(tail->page->data + prev_write);
257 	if (ts_ext_size) {
258 		event = rb_event_add_ts_extend(event, time_delta);
259 		time_delta = 0;
260 	}
261 
262 	event->type_len = 0;
263 	event->time_delta = time_delta;
264 	event->array[0] = event_size - RB_EVNT_HDR_SIZE;
265 
266 	return event;
267 }
268 
269 /**
270  * simple_ring_buffer_reserve - Reserve an entry in @cpu_buffer
271  * @cpu_buffer:	A simple_rb_per_cpu
272  * @length:	Size of the entry in bytes
273  * @timestamp:	Timestamp of the entry
274  *
275  * Returns the address of the entry where to write data or NULL
276  */
simple_ring_buffer_reserve(struct simple_rb_per_cpu * cpu_buffer,unsigned long length,u64 timestamp)277 void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length,
278 				 u64 timestamp)
279 {
280 	struct ring_buffer_event *rb_event;
281 
282 	if (cmpxchg(&cpu_buffer->status, SIMPLE_RB_READY, SIMPLE_RB_WRITING) != SIMPLE_RB_READY)
283 		return NULL;
284 
285 	rb_event = simple_rb_reserve_next(cpu_buffer, length, timestamp);
286 
287 	return &rb_event->array[1];
288 }
289 EXPORT_SYMBOL_GPL(simple_ring_buffer_reserve);
290 
291 /**
292  * simple_ring_buffer_commit - Commit the entry reserved with simple_ring_buffer_reserve()
293  * @cpu_buffer:	The simple_rb_per_cpu where the entry has been reserved
294  */
simple_ring_buffer_commit(struct simple_rb_per_cpu * cpu_buffer)295 void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer)
296 {
297 	local_set(&cpu_buffer->tail_page->page->commit,
298 		  cpu_buffer->tail_page->write);
299 	simple_rb_meta_inc(cpu_buffer->meta->entries, 1);
300 
301 	/*
302 	 * Paired with simple_rb_enable_tracing() to ensure data is
303 	 * written to the ring-buffer before teardown.
304 	 */
305 	smp_store_release(&cpu_buffer->status, SIMPLE_RB_READY);
306 }
307 EXPORT_SYMBOL_GPL(simple_ring_buffer_commit);
308 
simple_rb_enable_tracing(struct simple_rb_per_cpu * cpu_buffer,bool enable)309 static u32 simple_rb_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable)
310 {
311 	u32 prev_status;
312 
313 	if (enable)
314 		return cmpxchg(&cpu_buffer->status, SIMPLE_RB_UNAVAILABLE, SIMPLE_RB_READY);
315 
316 	/* Wait for the buffer to be released */
317 	do {
318 		prev_status = cmpxchg_acquire(&cpu_buffer->status,
319 					      SIMPLE_RB_READY,
320 					      SIMPLE_RB_UNAVAILABLE);
321 	} while (prev_status == SIMPLE_RB_WRITING);
322 
323 	return prev_status;
324 }
325 
326 /**
327  * simple_ring_buffer_reset - Reset @cpu_buffer
328  * @cpu_buffer: A simple_rb_per_cpu
329  *
330  * This will not clear the content of the data, only reset counters and pointers
331  *
332  * Returns 0 on success or -ENODEV if @cpu_buffer was unloaded.
333  */
simple_ring_buffer_reset(struct simple_rb_per_cpu * cpu_buffer)334 int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer)
335 {
336 	struct simple_buffer_page *bpage;
337 	u32 prev_status;
338 	int ret;
339 
340 	if (!simple_rb_loaded(cpu_buffer))
341 		return -ENODEV;
342 
343 	prev_status = simple_rb_enable_tracing(cpu_buffer, false);
344 
345 	ret = simple_rb_find_head(cpu_buffer);
346 	if (ret)
347 		return ret;
348 
349 	bpage = cpu_buffer->tail_page = cpu_buffer->head_page;
350 	do {
351 		simple_bpage_reset(bpage);
352 		bpage = simple_bpage_next_page(bpage);
353 	} while (bpage != cpu_buffer->head_page);
354 
355 	simple_bpage_reset(cpu_buffer->reader_page);
356 
357 	cpu_buffer->last_overrun = 0;
358 	cpu_buffer->write_stamp = 0;
359 
360 	cpu_buffer->meta->reader.read = 0;
361 	cpu_buffer->meta->reader.lost_events = 0;
362 	cpu_buffer->meta->entries = 0;
363 	cpu_buffer->meta->overrun = 0;
364 	cpu_buffer->meta->read = 0;
365 	cpu_buffer->meta->pages_lost = 0;
366 	cpu_buffer->meta->pages_touched = 0;
367 
368 	if (prev_status == SIMPLE_RB_READY)
369 		simple_rb_enable_tracing(cpu_buffer, true);
370 
371 	return 0;
372 }
373 EXPORT_SYMBOL_GPL(simple_ring_buffer_reset);
374 
simple_ring_buffer_init_mm(struct simple_rb_per_cpu * cpu_buffer,struct simple_buffer_page * bpages,const struct ring_buffer_desc * desc,void * (* load_page)(unsigned long va),void (* unload_page)(void * va))375 int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer,
376 			       struct simple_buffer_page *bpages,
377 			       const struct ring_buffer_desc *desc,
378 			       void *(*load_page)(unsigned long va),
379 			       void (*unload_page)(void *va))
380 {
381 	struct simple_buffer_page *bpage = bpages;
382 	int ret = 0;
383 	void *page;
384 	int i;
385 
386 	/* At least 1 reader page and two pages in the ring-buffer */
387 	if (desc->nr_page_va < 3)
388 		return -EINVAL;
389 
390 	memset(cpu_buffer, 0, sizeof(*cpu_buffer));
391 
392 	cpu_buffer->meta = load_page(desc->meta_va);
393 	if (!cpu_buffer->meta)
394 		return -EINVAL;
395 
396 	memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta));
397 	cpu_buffer->meta->meta_page_size = PAGE_SIZE;
398 	cpu_buffer->meta->nr_subbufs = cpu_buffer->nr_pages;
399 
400 	/* The reader page is not part of the ring initially */
401 	page = load_page(desc->page_va[0]);
402 	if (!page) {
403 		unload_page(cpu_buffer->meta);
404 		return -EINVAL;
405 	}
406 
407 	simple_bpage_init(bpage, page);
408 	bpage->id = 0;
409 
410 	cpu_buffer->nr_pages = 1;
411 
412 	cpu_buffer->reader_page = bpage;
413 	cpu_buffer->tail_page = bpage + 1;
414 	cpu_buffer->head_page = bpage + 1;
415 
416 	for (i = 1; i < desc->nr_page_va; i++) {
417 		page = load_page(desc->page_va[i]);
418 		if (!page) {
419 			ret = -EINVAL;
420 			break;
421 		}
422 
423 		simple_bpage_init(++bpage, page);
424 
425 		bpage->link.next = &(bpage + 1)->link;
426 		bpage->link.prev = &(bpage - 1)->link;
427 		bpage->id = i;
428 
429 		cpu_buffer->nr_pages = i + 1;
430 	}
431 
432 	if (ret) {
433 		for (i--; i >= 0; i--)
434 			unload_page((void *)desc->page_va[i]);
435 		unload_page(cpu_buffer->meta);
436 
437 		return ret;
438 	}
439 
440 	/* Close the ring */
441 	bpage->link.next = &cpu_buffer->tail_page->link;
442 	cpu_buffer->tail_page->link.prev = &bpage->link;
443 
444 	/* The last init'ed page points to the head page */
445 	simple_bpage_set_head_link(bpage);
446 
447 	cpu_buffer->bpages = bpages;
448 
449 	return 0;
450 }
451 
__load_page(unsigned long page)452 static void *__load_page(unsigned long page)
453 {
454 	return (void *)page;
455 }
456 
__unload_page(void * page)457 static void __unload_page(void *page) { }
458 
459 /**
460  * simple_ring_buffer_init - Init @cpu_buffer based on @desc
461  * @cpu_buffer:	A simple_rb_per_cpu buffer to init, allocated by the caller.
462  * @bpages:	Array of simple_buffer_pages, with as many elements as @desc->nr_page_va
463  * @desc:	A ring_buffer_desc
464  *
465  * Returns 0 on success or -EINVAL if the content of @desc is invalid
466  */
simple_ring_buffer_init(struct simple_rb_per_cpu * cpu_buffer,struct simple_buffer_page * bpages,const struct ring_buffer_desc * desc)467 int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
468 			    const struct ring_buffer_desc *desc)
469 {
470 	return simple_ring_buffer_init_mm(cpu_buffer, bpages, desc, __load_page, __unload_page);
471 }
472 EXPORT_SYMBOL_GPL(simple_ring_buffer_init);
473 
simple_ring_buffer_unload_mm(struct simple_rb_per_cpu * cpu_buffer,void (* unload_page)(void *))474 void simple_ring_buffer_unload_mm(struct simple_rb_per_cpu *cpu_buffer,
475 				  void (*unload_page)(void *))
476 {
477 	int p;
478 
479 	if (!simple_rb_loaded(cpu_buffer))
480 		return;
481 
482 	simple_rb_enable_tracing(cpu_buffer, false);
483 
484 	unload_page(cpu_buffer->meta);
485 	for (p = 0; p < cpu_buffer->nr_pages; p++)
486 		unload_page(cpu_buffer->bpages[p].page);
487 
488 	cpu_buffer->bpages = NULL;
489 }
490 
491 /**
492  * simple_ring_buffer_unload - Prepare @cpu_buffer for deletion
493  * @cpu_buffer:	A simple_rb_per_cpu that will be deleted.
494  */
simple_ring_buffer_unload(struct simple_rb_per_cpu * cpu_buffer)495 void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer)
496 {
497 	return simple_ring_buffer_unload_mm(cpu_buffer, __unload_page);
498 }
499 EXPORT_SYMBOL_GPL(simple_ring_buffer_unload);
500 
501 /**
502  * simple_ring_buffer_enable_tracing - Enable or disable writing to @cpu_buffer
503  * @cpu_buffer: A simple_rb_per_cpu
504  * @enable:	True to enable tracing, False to disable it
505  *
506  * Returns 0 on success or -ENODEV if @cpu_buffer was unloaded
507  */
simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu * cpu_buffer,bool enable)508 int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable)
509 {
510 	if (!simple_rb_loaded(cpu_buffer))
511 		return -ENODEV;
512 
513 	simple_rb_enable_tracing(cpu_buffer, enable);
514 
515 	return 0;
516 }
517 EXPORT_SYMBOL_GPL(simple_ring_buffer_enable_tracing);
518