1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2025 - Google LLC
4 * Author: Vincent Donnefort <vdonnefort@google.com>
5 */
6
7 #include <linux/atomic.h>
8 #include <linux/simple_ring_buffer.h>
9
10 #include <asm/barrier.h>
11 #include <asm/local.h>
12
13 enum simple_rb_link_type {
14 SIMPLE_RB_LINK_NORMAL = 0,
15 SIMPLE_RB_LINK_HEAD = 1,
16 SIMPLE_RB_LINK_HEAD_MOVING
17 };
18
19 #define SIMPLE_RB_LINK_MASK ~(SIMPLE_RB_LINK_HEAD | SIMPLE_RB_LINK_HEAD_MOVING)
20
simple_bpage_set_head_link(struct simple_buffer_page * bpage)21 static void simple_bpage_set_head_link(struct simple_buffer_page *bpage)
22 {
23 unsigned long link = (unsigned long)bpage->link.next;
24
25 link &= SIMPLE_RB_LINK_MASK;
26 link |= SIMPLE_RB_LINK_HEAD;
27
28 /*
29 * Paired with simple_rb_find_head() to order access between the head
30 * link and overrun. It ensures we always report an up-to-date value
31 * after swapping the reader page.
32 */
33 smp_store_release(&bpage->link.next, (struct list_head *)link);
34 }
35
simple_bpage_unset_head_link(struct simple_buffer_page * bpage,struct simple_buffer_page * dst,enum simple_rb_link_type new_type)36 static bool simple_bpage_unset_head_link(struct simple_buffer_page *bpage,
37 struct simple_buffer_page *dst,
38 enum simple_rb_link_type new_type)
39 {
40 unsigned long *link = (unsigned long *)(&bpage->link.next);
41 unsigned long old = (*link & SIMPLE_RB_LINK_MASK) | SIMPLE_RB_LINK_HEAD;
42 unsigned long new = (unsigned long)(&dst->link) | new_type;
43
44 return try_cmpxchg(link, &old, new);
45 }
46
simple_bpage_set_normal_link(struct simple_buffer_page * bpage)47 static void simple_bpage_set_normal_link(struct simple_buffer_page *bpage)
48 {
49 unsigned long link = (unsigned long)bpage->link.next;
50
51 WRITE_ONCE(bpage->link.next, (struct list_head *)(link & SIMPLE_RB_LINK_MASK));
52 }
53
simple_bpage_from_link(struct list_head * link)54 static struct simple_buffer_page *simple_bpage_from_link(struct list_head *link)
55 {
56 unsigned long ptr = (unsigned long)link & SIMPLE_RB_LINK_MASK;
57
58 return container_of((struct list_head *)ptr, struct simple_buffer_page, link);
59 }
60
simple_bpage_next_page(struct simple_buffer_page * bpage)61 static struct simple_buffer_page *simple_bpage_next_page(struct simple_buffer_page *bpage)
62 {
63 return simple_bpage_from_link(bpage->link.next);
64 }
65
simple_bpage_reset(struct simple_buffer_page * bpage)66 static void simple_bpage_reset(struct simple_buffer_page *bpage)
67 {
68 bpage->write = 0;
69 bpage->entries = 0;
70
71 local_set(&bpage->page->commit, 0);
72 }
73
simple_bpage_init(struct simple_buffer_page * bpage,void * page)74 static void simple_bpage_init(struct simple_buffer_page *bpage, void *page)
75 {
76 INIT_LIST_HEAD(&bpage->link);
77 bpage->page = (struct buffer_data_page *)page;
78
79 simple_bpage_reset(bpage);
80 }
81
82 #define simple_rb_meta_inc(__meta, __inc) \
83 WRITE_ONCE((__meta), (__meta + __inc))
84
simple_rb_loaded(struct simple_rb_per_cpu * cpu_buffer)85 static bool simple_rb_loaded(struct simple_rb_per_cpu *cpu_buffer)
86 {
87 return !!cpu_buffer->bpages;
88 }
89
simple_rb_find_head(struct simple_rb_per_cpu * cpu_buffer)90 static int simple_rb_find_head(struct simple_rb_per_cpu *cpu_buffer)
91 {
92 int retry = cpu_buffer->nr_pages * 2;
93 struct simple_buffer_page *head;
94
95 head = cpu_buffer->head_page;
96
97 while (retry--) {
98 unsigned long link;
99
100 spin:
101 /* See smp_store_release in simple_bpage_set_head_link() */
102 link = (unsigned long)smp_load_acquire(&head->link.prev->next);
103
104 switch (link & ~SIMPLE_RB_LINK_MASK) {
105 /* Found the head */
106 case SIMPLE_RB_LINK_HEAD:
107 cpu_buffer->head_page = head;
108 return 0;
109 /* The writer caught the head, we can spin, that won't be long */
110 case SIMPLE_RB_LINK_HEAD_MOVING:
111 goto spin;
112 }
113
114 head = simple_bpage_next_page(head);
115 }
116
117 return -EBUSY;
118 }
119
120 /**
121 * simple_ring_buffer_swap_reader_page - Swap ring-buffer head with the reader
122 * @cpu_buffer: A simple_rb_per_cpu
123 *
124 * This function enables consuming reading. It ensures the current head page will not be overwritten
125 * and can be safely read.
126 *
127 * Returns 0 on success, -ENODEV if @cpu_buffer was unloaded or -EBUSY if we failed to catch the
128 * head page.
129 */
simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu * cpu_buffer)130 int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer)
131 {
132 struct simple_buffer_page *last, *head, *reader;
133 unsigned long overrun;
134 int retry = 8;
135 int ret;
136
137 if (!simple_rb_loaded(cpu_buffer))
138 return -ENODEV;
139
140 reader = cpu_buffer->reader_page;
141
142 do {
143 /* Run after the writer to find the head */
144 ret = simple_rb_find_head(cpu_buffer);
145 if (ret)
146 return ret;
147
148 head = cpu_buffer->head_page;
149
150 /* Connect the reader page around the header page */
151 reader->link.next = head->link.next;
152 reader->link.prev = head->link.prev;
153
154 /* The last page before the head */
155 last = simple_bpage_from_link(head->link.prev);
156
157 /* The reader page points to the new header page */
158 simple_bpage_set_head_link(reader);
159
160 overrun = cpu_buffer->meta->overrun;
161 } while (!simple_bpage_unset_head_link(last, reader, SIMPLE_RB_LINK_NORMAL) && retry--);
162
163 if (!retry)
164 return -EINVAL;
165
166 cpu_buffer->head_page = simple_bpage_from_link(reader->link.next);
167 cpu_buffer->head_page->link.prev = &reader->link;
168 cpu_buffer->reader_page = head;
169 cpu_buffer->meta->reader.lost_events = overrun - cpu_buffer->last_overrun;
170 cpu_buffer->meta->reader.id = cpu_buffer->reader_page->id;
171 cpu_buffer->last_overrun = overrun;
172
173 return 0;
174 }
175 EXPORT_SYMBOL_GPL(simple_ring_buffer_swap_reader_page);
176
simple_rb_move_tail(struct simple_rb_per_cpu * cpu_buffer)177 static struct simple_buffer_page *simple_rb_move_tail(struct simple_rb_per_cpu *cpu_buffer)
178 {
179 struct simple_buffer_page *tail, *new_tail;
180
181 tail = cpu_buffer->tail_page;
182 new_tail = simple_bpage_next_page(tail);
183
184 if (simple_bpage_unset_head_link(tail, new_tail, SIMPLE_RB_LINK_HEAD_MOVING)) {
185 /*
186 * Oh no! we've caught the head. There is none anymore and
187 * swap_reader will spin until we set the new one. Overrun must
188 * be written first, to make sure we report the correct number
189 * of lost events.
190 */
191 simple_rb_meta_inc(cpu_buffer->meta->overrun, new_tail->entries);
192 simple_rb_meta_inc(cpu_buffer->meta->pages_lost, 1);
193
194 simple_bpage_set_head_link(new_tail);
195 simple_bpage_set_normal_link(tail);
196 }
197
198 simple_bpage_reset(new_tail);
199 cpu_buffer->tail_page = new_tail;
200
201 simple_rb_meta_inc(cpu_buffer->meta->pages_touched, 1);
202
203 return new_tail;
204 }
205
rb_event_size(unsigned long length)206 static unsigned long rb_event_size(unsigned long length)
207 {
208 struct ring_buffer_event *event;
209
210 return length + RB_EVNT_HDR_SIZE + sizeof(event->array[0]);
211 }
212
213 static struct ring_buffer_event *
rb_event_add_ts_extend(struct ring_buffer_event * event,u64 delta)214 rb_event_add_ts_extend(struct ring_buffer_event *event, u64 delta)
215 {
216 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
217 event->time_delta = delta & TS_MASK;
218 event->array[0] = delta >> TS_SHIFT;
219
220 return (struct ring_buffer_event *)((unsigned long)event + 8);
221 }
222
223 static struct ring_buffer_event *
simple_rb_reserve_next(struct simple_rb_per_cpu * cpu_buffer,unsigned long length,u64 timestamp)224 simple_rb_reserve_next(struct simple_rb_per_cpu *cpu_buffer, unsigned long length, u64 timestamp)
225 {
226 unsigned long ts_ext_size = 0, event_size = rb_event_size(length);
227 struct simple_buffer_page *tail = cpu_buffer->tail_page;
228 struct ring_buffer_event *event;
229 u32 write, prev_write;
230 u64 time_delta;
231
232 time_delta = timestamp - cpu_buffer->write_stamp;
233
234 if (test_time_stamp(time_delta))
235 ts_ext_size = 8;
236
237 prev_write = tail->write;
238 write = prev_write + event_size + ts_ext_size;
239
240 if (unlikely(write > (PAGE_SIZE - BUF_PAGE_HDR_SIZE)))
241 tail = simple_rb_move_tail(cpu_buffer);
242
243 if (!tail->entries) {
244 tail->page->time_stamp = timestamp;
245 time_delta = 0;
246 ts_ext_size = 0;
247 write = event_size;
248 prev_write = 0;
249 }
250
251 tail->write = write;
252 tail->entries++;
253
254 cpu_buffer->write_stamp = timestamp;
255
256 event = (struct ring_buffer_event *)(tail->page->data + prev_write);
257 if (ts_ext_size) {
258 event = rb_event_add_ts_extend(event, time_delta);
259 time_delta = 0;
260 }
261
262 event->type_len = 0;
263 event->time_delta = time_delta;
264 event->array[0] = event_size - RB_EVNT_HDR_SIZE;
265
266 return event;
267 }
268
269 /**
270 * simple_ring_buffer_reserve - Reserve an entry in @cpu_buffer
271 * @cpu_buffer: A simple_rb_per_cpu
272 * @length: Size of the entry in bytes
273 * @timestamp: Timestamp of the entry
274 *
275 * Returns the address of the entry where to write data or NULL
276 */
simple_ring_buffer_reserve(struct simple_rb_per_cpu * cpu_buffer,unsigned long length,u64 timestamp)277 void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length,
278 u64 timestamp)
279 {
280 struct ring_buffer_event *rb_event;
281
282 if (cmpxchg(&cpu_buffer->status, SIMPLE_RB_READY, SIMPLE_RB_WRITING) != SIMPLE_RB_READY)
283 return NULL;
284
285 rb_event = simple_rb_reserve_next(cpu_buffer, length, timestamp);
286
287 return &rb_event->array[1];
288 }
289 EXPORT_SYMBOL_GPL(simple_ring_buffer_reserve);
290
291 /**
292 * simple_ring_buffer_commit - Commit the entry reserved with simple_ring_buffer_reserve()
293 * @cpu_buffer: The simple_rb_per_cpu where the entry has been reserved
294 */
simple_ring_buffer_commit(struct simple_rb_per_cpu * cpu_buffer)295 void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer)
296 {
297 local_set(&cpu_buffer->tail_page->page->commit,
298 cpu_buffer->tail_page->write);
299 simple_rb_meta_inc(cpu_buffer->meta->entries, 1);
300
301 /*
302 * Paired with simple_rb_enable_tracing() to ensure data is
303 * written to the ring-buffer before teardown.
304 */
305 smp_store_release(&cpu_buffer->status, SIMPLE_RB_READY);
306 }
307 EXPORT_SYMBOL_GPL(simple_ring_buffer_commit);
308
simple_rb_enable_tracing(struct simple_rb_per_cpu * cpu_buffer,bool enable)309 static u32 simple_rb_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable)
310 {
311 u32 prev_status;
312
313 if (enable)
314 return cmpxchg(&cpu_buffer->status, SIMPLE_RB_UNAVAILABLE, SIMPLE_RB_READY);
315
316 /* Wait for the buffer to be released */
317 do {
318 prev_status = cmpxchg_acquire(&cpu_buffer->status,
319 SIMPLE_RB_READY,
320 SIMPLE_RB_UNAVAILABLE);
321 } while (prev_status == SIMPLE_RB_WRITING);
322
323 return prev_status;
324 }
325
326 /**
327 * simple_ring_buffer_reset - Reset @cpu_buffer
328 * @cpu_buffer: A simple_rb_per_cpu
329 *
330 * This will not clear the content of the data, only reset counters and pointers
331 *
332 * Returns 0 on success or -ENODEV if @cpu_buffer was unloaded.
333 */
simple_ring_buffer_reset(struct simple_rb_per_cpu * cpu_buffer)334 int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer)
335 {
336 struct simple_buffer_page *bpage;
337 u32 prev_status;
338 int ret;
339
340 if (!simple_rb_loaded(cpu_buffer))
341 return -ENODEV;
342
343 prev_status = simple_rb_enable_tracing(cpu_buffer, false);
344
345 ret = simple_rb_find_head(cpu_buffer);
346 if (ret)
347 return ret;
348
349 bpage = cpu_buffer->tail_page = cpu_buffer->head_page;
350 do {
351 simple_bpage_reset(bpage);
352 bpage = simple_bpage_next_page(bpage);
353 } while (bpage != cpu_buffer->head_page);
354
355 simple_bpage_reset(cpu_buffer->reader_page);
356
357 cpu_buffer->last_overrun = 0;
358 cpu_buffer->write_stamp = 0;
359
360 cpu_buffer->meta->reader.read = 0;
361 cpu_buffer->meta->reader.lost_events = 0;
362 cpu_buffer->meta->entries = 0;
363 cpu_buffer->meta->overrun = 0;
364 cpu_buffer->meta->read = 0;
365 cpu_buffer->meta->pages_lost = 0;
366 cpu_buffer->meta->pages_touched = 0;
367
368 if (prev_status == SIMPLE_RB_READY)
369 simple_rb_enable_tracing(cpu_buffer, true);
370
371 return 0;
372 }
373 EXPORT_SYMBOL_GPL(simple_ring_buffer_reset);
374
simple_ring_buffer_init_mm(struct simple_rb_per_cpu * cpu_buffer,struct simple_buffer_page * bpages,const struct ring_buffer_desc * desc,void * (* load_page)(unsigned long va),void (* unload_page)(void * va))375 int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer,
376 struct simple_buffer_page *bpages,
377 const struct ring_buffer_desc *desc,
378 void *(*load_page)(unsigned long va),
379 void (*unload_page)(void *va))
380 {
381 struct simple_buffer_page *bpage = bpages;
382 int ret = 0;
383 void *page;
384 int i;
385
386 /* At least 1 reader page and two pages in the ring-buffer */
387 if (desc->nr_page_va < 3)
388 return -EINVAL;
389
390 memset(cpu_buffer, 0, sizeof(*cpu_buffer));
391
392 cpu_buffer->meta = load_page(desc->meta_va);
393 if (!cpu_buffer->meta)
394 return -EINVAL;
395
396 memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta));
397 cpu_buffer->meta->meta_page_size = PAGE_SIZE;
398 cpu_buffer->meta->nr_subbufs = cpu_buffer->nr_pages;
399
400 /* The reader page is not part of the ring initially */
401 page = load_page(desc->page_va[0]);
402 if (!page) {
403 unload_page(cpu_buffer->meta);
404 return -EINVAL;
405 }
406
407 simple_bpage_init(bpage, page);
408 bpage->id = 0;
409
410 cpu_buffer->nr_pages = 1;
411
412 cpu_buffer->reader_page = bpage;
413 cpu_buffer->tail_page = bpage + 1;
414 cpu_buffer->head_page = bpage + 1;
415
416 for (i = 1; i < desc->nr_page_va; i++) {
417 page = load_page(desc->page_va[i]);
418 if (!page) {
419 ret = -EINVAL;
420 break;
421 }
422
423 simple_bpage_init(++bpage, page);
424
425 bpage->link.next = &(bpage + 1)->link;
426 bpage->link.prev = &(bpage - 1)->link;
427 bpage->id = i;
428
429 cpu_buffer->nr_pages = i + 1;
430 }
431
432 if (ret) {
433 for (i--; i >= 0; i--)
434 unload_page((void *)desc->page_va[i]);
435 unload_page(cpu_buffer->meta);
436
437 return ret;
438 }
439
440 /* Close the ring */
441 bpage->link.next = &cpu_buffer->tail_page->link;
442 cpu_buffer->tail_page->link.prev = &bpage->link;
443
444 /* The last init'ed page points to the head page */
445 simple_bpage_set_head_link(bpage);
446
447 cpu_buffer->bpages = bpages;
448
449 return 0;
450 }
451
__load_page(unsigned long page)452 static void *__load_page(unsigned long page)
453 {
454 return (void *)page;
455 }
456
__unload_page(void * page)457 static void __unload_page(void *page) { }
458
459 /**
460 * simple_ring_buffer_init - Init @cpu_buffer based on @desc
461 * @cpu_buffer: A simple_rb_per_cpu buffer to init, allocated by the caller.
462 * @bpages: Array of simple_buffer_pages, with as many elements as @desc->nr_page_va
463 * @desc: A ring_buffer_desc
464 *
465 * Returns 0 on success or -EINVAL if the content of @desc is invalid
466 */
simple_ring_buffer_init(struct simple_rb_per_cpu * cpu_buffer,struct simple_buffer_page * bpages,const struct ring_buffer_desc * desc)467 int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
468 const struct ring_buffer_desc *desc)
469 {
470 return simple_ring_buffer_init_mm(cpu_buffer, bpages, desc, __load_page, __unload_page);
471 }
472 EXPORT_SYMBOL_GPL(simple_ring_buffer_init);
473
simple_ring_buffer_unload_mm(struct simple_rb_per_cpu * cpu_buffer,void (* unload_page)(void *))474 void simple_ring_buffer_unload_mm(struct simple_rb_per_cpu *cpu_buffer,
475 void (*unload_page)(void *))
476 {
477 int p;
478
479 if (!simple_rb_loaded(cpu_buffer))
480 return;
481
482 simple_rb_enable_tracing(cpu_buffer, false);
483
484 unload_page(cpu_buffer->meta);
485 for (p = 0; p < cpu_buffer->nr_pages; p++)
486 unload_page(cpu_buffer->bpages[p].page);
487
488 cpu_buffer->bpages = NULL;
489 }
490
491 /**
492 * simple_ring_buffer_unload - Prepare @cpu_buffer for deletion
493 * @cpu_buffer: A simple_rb_per_cpu that will be deleted.
494 */
simple_ring_buffer_unload(struct simple_rb_per_cpu * cpu_buffer)495 void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer)
496 {
497 return simple_ring_buffer_unload_mm(cpu_buffer, __unload_page);
498 }
499 EXPORT_SYMBOL_GPL(simple_ring_buffer_unload);
500
501 /**
502 * simple_ring_buffer_enable_tracing - Enable or disable writing to @cpu_buffer
503 * @cpu_buffer: A simple_rb_per_cpu
504 * @enable: True to enable tracing, False to disable it
505 *
506 * Returns 0 on success or -ENODEV if @cpu_buffer was unloaded
507 */
simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu * cpu_buffer,bool enable)508 int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable)
509 {
510 if (!simple_rb_loaded(cpu_buffer))
511 return -ENODEV;
512
513 simple_rb_enable_tracing(cpu_buffer, enable);
514
515 return 0;
516 }
517 EXPORT_SYMBOL_GPL(simple_ring_buffer_enable_tracing);
518