1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #include <asm/barrier.h>
4 #include <linux/perf_event.h>
5 #include <linux/ring_buffer.h>
6 #include <sys/epoll.h>
7 #include <sys/mman.h>
8 #include <argp.h>
9 #include <stdlib.h>
10 #include "bench.h"
11 #include "ringbuf_bench.skel.h"
12 #include "perfbuf_bench.skel.h"
13
14 static struct {
15 bool back2back;
16 int batch_cnt;
17 bool sampled;
18 int sample_rate;
19 int ringbuf_sz; /* per-ringbuf, in bytes */
20 bool ringbuf_use_output; /* use slower output API */
21 int perfbuf_sz; /* per-CPU size, in pages */
22 bool overwrite;
23 bool bench_producer;
24 } args = {
25 .back2back = false,
26 .batch_cnt = 500,
27 .sampled = false,
28 .sample_rate = 500,
29 .ringbuf_sz = 512 * 1024,
30 .ringbuf_use_output = false,
31 .perfbuf_sz = 128,
32 .overwrite = false,
33 .bench_producer = false,
34 };
35
36 enum {
37 ARG_RB_BACK2BACK = 2000,
38 ARG_RB_USE_OUTPUT = 2001,
39 ARG_RB_BATCH_CNT = 2002,
40 ARG_RB_SAMPLED = 2003,
41 ARG_RB_SAMPLE_RATE = 2004,
42 ARG_RB_OVERWRITE = 2005,
43 ARG_RB_BENCH_PRODUCER = 2006,
44 };
45
46 static const struct argp_option opts[] = {
47 { "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
48 { "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
49 { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
50 { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
51 { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
52 { "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"},
53 { "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"},
54 {},
55 };
56
parse_arg(int key,char * arg,struct argp_state * state)57 static error_t parse_arg(int key, char *arg, struct argp_state *state)
58 {
59 switch (key) {
60 case ARG_RB_BACK2BACK:
61 args.back2back = true;
62 break;
63 case ARG_RB_USE_OUTPUT:
64 args.ringbuf_use_output = true;
65 break;
66 case ARG_RB_BATCH_CNT:
67 args.batch_cnt = strtol(arg, NULL, 10);
68 if (args.batch_cnt < 0) {
69 fprintf(stderr, "Invalid batch count.");
70 argp_usage(state);
71 }
72 break;
73 case ARG_RB_SAMPLED:
74 args.sampled = true;
75 break;
76 case ARG_RB_SAMPLE_RATE:
77 args.sample_rate = strtol(arg, NULL, 10);
78 if (args.sample_rate < 0) {
79 fprintf(stderr, "Invalid perfbuf sample rate.");
80 argp_usage(state);
81 }
82 break;
83 case ARG_RB_OVERWRITE:
84 args.overwrite = true;
85 break;
86 case ARG_RB_BENCH_PRODUCER:
87 args.bench_producer = true;
88 break;
89 default:
90 return ARGP_ERR_UNKNOWN;
91 }
92 return 0;
93 }
94
95 /* exported into benchmark runner */
96 const struct argp bench_ringbufs_argp = {
97 .options = opts,
98 .parser = parse_arg,
99 };
100
101 /* RINGBUF-LIBBPF benchmark */
102
103 static struct counter buf_hits;
104
bufs_trigger_batch(void)105 static inline void bufs_trigger_batch(void)
106 {
107 (void)syscall(__NR_getpgid);
108 }
109
bufs_validate(void)110 static void bufs_validate(void)
111 {
112 if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) {
113 fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n");
114 exit(1);
115 }
116
117 if (args.overwrite && !args.bench_producer) {
118 fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n");
119 exit(1);
120 }
121
122 if (args.bench_producer && env.consumer_cnt != 0) {
123 fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n");
124 exit(1);
125 }
126
127 if (args.bench_producer && args.back2back) {
128 fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n");
129 exit(1);
130 }
131
132 if (args.bench_producer && args.sampled) {
133 fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n");
134 exit(1);
135 }
136
137 if (!args.bench_producer && env.consumer_cnt != 1) {
138 fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n");
139 exit(1);
140 }
141
142 if (args.back2back && env.producer_cnt > 1) {
143 fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
144 exit(1);
145 }
146 }
147
bufs_sample_producer(void * input)148 static void *bufs_sample_producer(void *input)
149 {
150 if (args.back2back) {
151 /* initial batch to get everything started */
152 bufs_trigger_batch();
153 return NULL;
154 }
155
156 while (true)
157 bufs_trigger_batch();
158 return NULL;
159 }
160
161 static struct ringbuf_libbpf_ctx {
162 struct ringbuf_bench *skel;
163 struct ring_buffer *ringbuf;
164 } ringbuf_libbpf_ctx;
165
ringbuf_libbpf_measure(struct bench_res * res)166 static void ringbuf_libbpf_measure(struct bench_res *res)
167 {
168 struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
169
170 if (args.bench_producer)
171 res->hits = atomic_swap(&ctx->skel->bss->hits, 0);
172 else
173 res->hits = atomic_swap(&buf_hits.value, 0);
174 res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
175 }
176
ringbuf_setup_skeleton(void)177 static struct ringbuf_bench *ringbuf_setup_skeleton(void)
178 {
179 __u32 flags;
180 struct bpf_map *ringbuf;
181 struct ringbuf_bench *skel;
182
183 setup_libbpf();
184
185 skel = ringbuf_bench__open();
186 if (!skel) {
187 fprintf(stderr, "failed to open skeleton\n");
188 exit(1);
189 }
190
191 skel->rodata->batch_cnt = args.batch_cnt;
192 skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
193 skel->rodata->bench_producer = args.bench_producer;
194
195 if (args.sampled)
196 /* record data + header take 16 bytes */
197 skel->rodata->wakeup_data_size = args.sample_rate * 16;
198
199 ringbuf = skel->maps.ringbuf;
200 if (args.overwrite) {
201 flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE;
202 bpf_map__set_map_flags(ringbuf, flags);
203 }
204
205 bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);
206
207 if (ringbuf_bench__load(skel)) {
208 fprintf(stderr, "failed to load skeleton\n");
209 exit(1);
210 }
211
212 return skel;
213 }
214
buf_process_sample(void * ctx,void * data,size_t len)215 static int buf_process_sample(void *ctx, void *data, size_t len)
216 {
217 atomic_inc(&buf_hits.value);
218 return 0;
219 }
220
ringbuf_libbpf_setup(void)221 static void ringbuf_libbpf_setup(void)
222 {
223 struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
224 struct bpf_link *link;
225 int map_fd;
226
227 ctx->skel = ringbuf_setup_skeleton();
228
229 map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
230 ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample, NULL, NULL);
231 if (!ctx->ringbuf) {
232 fprintf(stderr, "failed to create ringbuf\n");
233 exit(1);
234 }
235
236 link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
237 if (!link) {
238 fprintf(stderr, "failed to attach program!\n");
239 exit(1);
240 }
241 }
242
ringbuf_libbpf_consumer(void * input)243 static void *ringbuf_libbpf_consumer(void *input)
244 {
245 struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
246
247 while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
248 if (args.back2back)
249 bufs_trigger_batch();
250 }
251 fprintf(stderr, "ringbuf polling failed!\n");
252 return NULL;
253 }
254
255 /* RINGBUF-CUSTOM benchmark */
256 struct ringbuf_custom {
257 __u64 *consumer_pos;
258 __u64 *producer_pos;
259 __u64 mask;
260 void *data;
261 int map_fd;
262 };
263
264 static struct ringbuf_custom_ctx {
265 struct ringbuf_bench *skel;
266 struct ringbuf_custom ringbuf;
267 int epoll_fd;
268 struct epoll_event event;
269 } ringbuf_custom_ctx;
270
ringbuf_custom_measure(struct bench_res * res)271 static void ringbuf_custom_measure(struct bench_res *res)
272 {
273 struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
274
275 res->hits = atomic_swap(&buf_hits.value, 0);
276 res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
277 }
278
ringbuf_custom_setup(void)279 static void ringbuf_custom_setup(void)
280 {
281 struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
282 const size_t page_size = getpagesize();
283 struct bpf_link *link;
284 struct ringbuf_custom *r;
285 void *tmp;
286 int err;
287
288 ctx->skel = ringbuf_setup_skeleton();
289
290 ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
291 if (ctx->epoll_fd < 0) {
292 fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
293 exit(1);
294 }
295
296 r = &ctx->ringbuf;
297 r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
298 r->mask = args.ringbuf_sz - 1;
299
300 /* Map writable consumer page */
301 tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
302 r->map_fd, 0);
303 if (tmp == MAP_FAILED) {
304 fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
305 exit(1);
306 }
307 r->consumer_pos = tmp;
308
309 /* Map read-only producer page and data pages. */
310 tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
311 r->map_fd, page_size);
312 if (tmp == MAP_FAILED) {
313 fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
314 exit(1);
315 }
316 r->producer_pos = tmp;
317 r->data = tmp + page_size;
318
319 ctx->event.events = EPOLLIN;
320 err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
321 if (err < 0) {
322 fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
323 exit(1);
324 }
325
326 link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
327 if (!link) {
328 fprintf(stderr, "failed to attach program\n");
329 exit(1);
330 }
331 }
332
333 #define RINGBUF_BUSY_BIT (1 << 31)
334 #define RINGBUF_DISCARD_BIT (1 << 30)
335 #define RINGBUF_META_LEN 8
336
roundup_len(__u32 len)337 static inline int roundup_len(__u32 len)
338 {
339 /* clear out top 2 bits */
340 len <<= 2;
341 len >>= 2;
342 /* add length prefix */
343 len += RINGBUF_META_LEN;
344 /* round up to 8 byte alignment */
345 return (len + 7) / 8 * 8;
346 }
347
ringbuf_custom_process_ring(struct ringbuf_custom * r)348 static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
349 {
350 unsigned long cons_pos, prod_pos;
351 int *len_ptr, len;
352 bool got_new_data;
353
354 cons_pos = smp_load_acquire(r->consumer_pos);
355 while (true) {
356 got_new_data = false;
357 prod_pos = smp_load_acquire(r->producer_pos);
358 while (cons_pos < prod_pos) {
359 len_ptr = r->data + (cons_pos & r->mask);
360 len = smp_load_acquire(len_ptr);
361
362 /* sample not committed yet, bail out for now */
363 if (len & RINGBUF_BUSY_BIT)
364 return;
365
366 got_new_data = true;
367 cons_pos += roundup_len(len);
368
369 atomic_inc(&buf_hits.value);
370 }
371 if (got_new_data)
372 smp_store_release(r->consumer_pos, cons_pos);
373 else
374 break;
375 }
376 }
377
ringbuf_custom_consumer(void * input)378 static void *ringbuf_custom_consumer(void *input)
379 {
380 struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
381 int cnt;
382
383 do {
384 if (args.back2back)
385 bufs_trigger_batch();
386 cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
387 if (cnt > 0)
388 ringbuf_custom_process_ring(&ctx->ringbuf);
389 } while (cnt >= 0);
390 fprintf(stderr, "ringbuf polling failed!\n");
391 return 0;
392 }
393
394 /* PERFBUF-LIBBPF benchmark */
395 static struct perfbuf_libbpf_ctx {
396 struct perfbuf_bench *skel;
397 struct perf_buffer *perfbuf;
398 } perfbuf_libbpf_ctx;
399
perfbuf_measure(struct bench_res * res)400 static void perfbuf_measure(struct bench_res *res)
401 {
402 struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
403
404 res->hits = atomic_swap(&buf_hits.value, 0);
405 res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
406 }
407
perfbuf_setup_skeleton(void)408 static struct perfbuf_bench *perfbuf_setup_skeleton(void)
409 {
410 struct perfbuf_bench *skel;
411
412 setup_libbpf();
413
414 skel = perfbuf_bench__open();
415 if (!skel) {
416 fprintf(stderr, "failed to open skeleton\n");
417 exit(1);
418 }
419
420 skel->rodata->batch_cnt = args.batch_cnt;
421
422 if (perfbuf_bench__load(skel)) {
423 fprintf(stderr, "failed to load skeleton\n");
424 exit(1);
425 }
426
427 return skel;
428 }
429
430 static enum bpf_perf_event_ret
perfbuf_process_sample_raw(void * input_ctx,int cpu,struct perf_event_header * e)431 perfbuf_process_sample_raw(void *input_ctx, int cpu,
432 struct perf_event_header *e)
433 {
434 switch (e->type) {
435 case PERF_RECORD_SAMPLE:
436 atomic_inc(&buf_hits.value);
437 break;
438 case PERF_RECORD_LOST:
439 break;
440 default:
441 return LIBBPF_PERF_EVENT_ERROR;
442 }
443 return LIBBPF_PERF_EVENT_CONT;
444 }
445
perfbuf_libbpf_setup(void)446 static void perfbuf_libbpf_setup(void)
447 {
448 struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
449 struct perf_event_attr attr;
450 struct bpf_link *link;
451
452 ctx->skel = perfbuf_setup_skeleton();
453
454 memset(&attr, 0, sizeof(attr));
455 attr.config = PERF_COUNT_SW_BPF_OUTPUT;
456 attr.type = PERF_TYPE_SOFTWARE;
457 attr.sample_type = PERF_SAMPLE_RAW;
458 /* notify only every Nth sample */
459 if (args.sampled) {
460 attr.sample_period = args.sample_rate;
461 attr.wakeup_events = args.sample_rate;
462 } else {
463 attr.sample_period = 1;
464 attr.wakeup_events = 1;
465 }
466
467 if (args.sample_rate > args.batch_cnt) {
468 fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
469 args.sample_rate, args.batch_cnt);
470 exit(1);
471 }
472
473 ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
474 args.perfbuf_sz, &attr,
475 perfbuf_process_sample_raw, NULL, NULL);
476 if (!ctx->perfbuf) {
477 fprintf(stderr, "failed to create perfbuf\n");
478 exit(1);
479 }
480
481 link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
482 if (!link) {
483 fprintf(stderr, "failed to attach program\n");
484 exit(1);
485 }
486 }
487
perfbuf_libbpf_consumer(void * input)488 static void *perfbuf_libbpf_consumer(void *input)
489 {
490 struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
491
492 while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
493 if (args.back2back)
494 bufs_trigger_batch();
495 }
496 fprintf(stderr, "perfbuf polling failed!\n");
497 return NULL;
498 }
499
500 /* PERFBUF-CUSTOM benchmark */
501
502 /* copies of internal libbpf definitions */
503 struct perf_cpu_buf {
504 struct perf_buffer *pb;
505 void *base; /* mmap()'ed memory */
506 void *buf; /* for reconstructing segmented data */
507 size_t buf_size;
508 int fd;
509 int cpu;
510 int map_key;
511 };
512
513 struct perf_buffer {
514 perf_buffer_event_fn event_cb;
515 perf_buffer_sample_fn sample_cb;
516 perf_buffer_lost_fn lost_cb;
517 void *ctx; /* passed into callbacks */
518
519 size_t page_size;
520 size_t mmap_size;
521 struct perf_cpu_buf **cpu_bufs;
522 struct epoll_event *events;
523 int cpu_cnt; /* number of allocated CPU buffers */
524 int epoll_fd; /* perf event FD */
525 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
526 };
527
perfbuf_custom_consumer(void * input)528 static void *perfbuf_custom_consumer(void *input)
529 {
530 struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
531 struct perf_buffer *pb = ctx->perfbuf;
532 struct perf_cpu_buf *cpu_buf;
533 struct perf_event_mmap_page *header;
534 size_t mmap_mask = pb->mmap_size - 1;
535 struct perf_event_header *ehdr;
536 __u64 data_head, data_tail;
537 size_t ehdr_size;
538 void *base;
539 int i, cnt;
540
541 while (true) {
542 if (args.back2back)
543 bufs_trigger_batch();
544 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
545 if (cnt <= 0) {
546 fprintf(stderr, "perf epoll failed: %d\n", -errno);
547 exit(1);
548 }
549
550 for (i = 0; i < cnt; ++i) {
551 cpu_buf = pb->events[i].data.ptr;
552 header = cpu_buf->base;
553 base = ((void *)header) + pb->page_size;
554
555 data_head = ring_buffer_read_head(header);
556 data_tail = header->data_tail;
557 while (data_head != data_tail) {
558 ehdr = base + (data_tail & mmap_mask);
559 ehdr_size = ehdr->size;
560
561 if (ehdr->type == PERF_RECORD_SAMPLE)
562 atomic_inc(&buf_hits.value);
563
564 data_tail += ehdr_size;
565 }
566 ring_buffer_write_tail(header, data_tail);
567 }
568 }
569 return NULL;
570 }
571
572 const struct bench bench_rb_libbpf = {
573 .name = "rb-libbpf",
574 .argp = &bench_ringbufs_argp,
575 .validate = bufs_validate,
576 .setup = ringbuf_libbpf_setup,
577 .producer_thread = bufs_sample_producer,
578 .consumer_thread = ringbuf_libbpf_consumer,
579 .measure = ringbuf_libbpf_measure,
580 .report_progress = hits_drops_report_progress,
581 .report_final = hits_drops_report_final,
582 };
583
584 const struct bench bench_rb_custom = {
585 .name = "rb-custom",
586 .argp = &bench_ringbufs_argp,
587 .validate = bufs_validate,
588 .setup = ringbuf_custom_setup,
589 .producer_thread = bufs_sample_producer,
590 .consumer_thread = ringbuf_custom_consumer,
591 .measure = ringbuf_custom_measure,
592 .report_progress = hits_drops_report_progress,
593 .report_final = hits_drops_report_final,
594 };
595
596 const struct bench bench_pb_libbpf = {
597 .name = "pb-libbpf",
598 .argp = &bench_ringbufs_argp,
599 .validate = bufs_validate,
600 .setup = perfbuf_libbpf_setup,
601 .producer_thread = bufs_sample_producer,
602 .consumer_thread = perfbuf_libbpf_consumer,
603 .measure = perfbuf_measure,
604 .report_progress = hits_drops_report_progress,
605 .report_final = hits_drops_report_final,
606 };
607
608 const struct bench bench_pb_custom = {
609 .name = "pb-custom",
610 .argp = &bench_ringbufs_argp,
611 .validate = bufs_validate,
612 .setup = perfbuf_libbpf_setup,
613 .producer_thread = bufs_sample_producer,
614 .consumer_thread = perfbuf_custom_consumer,
615 .measure = perfbuf_measure,
616 .report_progress = hits_drops_report_progress,
617 .report_final = hits_drops_report_final,
618 };
619
620