1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <unistd.h>
6 #include <stdint.h>
7 #include "bpf_util.h"
8 #include "bench.h"
9 #include "trigger_bench.skel.h"
10 #include "trace_helpers.h"
11
12 #define MAX_TRIG_BATCH_ITERS 1000
13
14 static struct {
15 __u32 batch_iters;
16 } args = {
17 .batch_iters = 100,
18 };
19
20 enum {
21 ARG_TRIG_BATCH_ITERS = 7000,
22 };
23
24 static const struct argp_option opts[] = {
25 { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,
26 "Number of in-kernel iterations per one driver test run"},
27 {},
28 };
29
parse_arg(int key,char * arg,struct argp_state * state)30 static error_t parse_arg(int key, char *arg, struct argp_state *state)
31 {
32 long ret;
33
34 switch (key) {
35 case ARG_TRIG_BATCH_ITERS:
36 ret = strtol(arg, NULL, 10);
37 if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) {
38 fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n",
39 1, MAX_TRIG_BATCH_ITERS);
40 argp_usage(state);
41 }
42 args.batch_iters = ret;
43 break;
44 default:
45 return ARGP_ERR_UNKNOWN;
46 }
47
48 return 0;
49 }
50
51 const struct argp bench_trigger_batch_argp = {
52 .options = opts,
53 .parser = parse_arg,
54 };
55
56 /* adjust slot shift in inc_hits() if changing */
57 #define MAX_BUCKETS 256
58
59 #pragma GCC diagnostic ignored "-Wattributes"
60
61 /* BPF triggering benchmarks */
62 static struct trigger_ctx {
63 struct trigger_bench *skel;
64 bool usermode_counters;
65 int driver_prog_fd;
66 } ctx;
67
68 static struct counter base_hits[MAX_BUCKETS];
69
inc_counter(struct counter * counters)70 static __always_inline void inc_counter(struct counter *counters)
71 {
72 static __thread int tid = 0;
73 unsigned slot;
74
75 if (unlikely(tid == 0))
76 tid = sys_gettid();
77
78 /* multiplicative hashing, it's fast */
79 slot = 2654435769U * tid;
80 slot >>= 24;
81
82 atomic_inc(&base_hits[slot].value); /* use highest byte as an index */
83 }
84
sum_and_reset_counters(struct counter * counters)85 static long sum_and_reset_counters(struct counter *counters)
86 {
87 int i;
88 long sum = 0;
89
90 for (i = 0; i < MAX_BUCKETS; i++)
91 sum += atomic_swap(&counters[i].value, 0);
92 return sum;
93 }
94
trigger_validate(void)95 static void trigger_validate(void)
96 {
97 if (env.consumer_cnt != 0) {
98 fprintf(stderr, "benchmark doesn't support consumer!\n");
99 exit(1);
100 }
101 }
102
trigger_producer(void * input)103 static void *trigger_producer(void *input)
104 {
105 if (ctx.usermode_counters) {
106 while (true) {
107 (void)syscall(__NR_getpgid);
108 inc_counter(base_hits);
109 }
110 } else {
111 while (true)
112 (void)syscall(__NR_getpgid);
113 }
114 return NULL;
115 }
116
trigger_producer_batch(void * input)117 static void *trigger_producer_batch(void *input)
118 {
119 int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);
120
121 while (true)
122 bpf_prog_test_run_opts(fd, NULL);
123
124 return NULL;
125 }
126
trigger_measure(struct bench_res * res)127 static void trigger_measure(struct bench_res *res)
128 {
129 if (ctx.usermode_counters)
130 res->hits = sum_and_reset_counters(base_hits);
131 else
132 res->hits = sum_and_reset_counters(ctx.skel->bss->hits);
133 }
134
setup_ctx(void)135 static void setup_ctx(void)
136 {
137 setup_libbpf();
138
139 ctx.skel = trigger_bench__open();
140 if (!ctx.skel) {
141 fprintf(stderr, "failed to open skeleton\n");
142 exit(1);
143 }
144
145 /* default "driver" BPF program */
146 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
147
148 ctx.skel->rodata->batch_iters = args.batch_iters;
149 }
150
load_ctx(void)151 static void load_ctx(void)
152 {
153 int err;
154
155 err = trigger_bench__load(ctx.skel);
156 if (err) {
157 fprintf(stderr, "failed to open skeleton\n");
158 exit(1);
159 }
160 }
161
attach_bpf(struct bpf_program * prog)162 static void attach_bpf(struct bpf_program *prog)
163 {
164 struct bpf_link *link;
165
166 link = bpf_program__attach(prog);
167 if (!link) {
168 fprintf(stderr, "failed to attach program!\n");
169 exit(1);
170 }
171 }
172
trigger_syscall_count_setup(void)173 static void trigger_syscall_count_setup(void)
174 {
175 ctx.usermode_counters = true;
176 }
177
178 /* Batched, staying mostly in-kernel triggering setups */
trigger_kernel_count_setup(void)179 static void trigger_kernel_count_setup(void)
180 {
181 setup_ctx();
182 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
183 bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
184 load_ctx();
185 /* override driver program */
186 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
187 }
188
trigger_kprobe_setup(void)189 static void trigger_kprobe_setup(void)
190 {
191 setup_ctx();
192 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true);
193 load_ctx();
194 attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
195 }
196
trigger_kretprobe_setup(void)197 static void trigger_kretprobe_setup(void)
198 {
199 setup_ctx();
200 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true);
201 load_ctx();
202 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
203 }
204
trigger_kprobe_multi_setup(void)205 static void trigger_kprobe_multi_setup(void)
206 {
207 setup_ctx();
208 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true);
209 load_ctx();
210 attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
211 }
212
trigger_kretprobe_multi_setup(void)213 static void trigger_kretprobe_multi_setup(void)
214 {
215 setup_ctx();
216 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true);
217 load_ctx();
218 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
219 }
220
trigger_fentry_setup(void)221 static void trigger_fentry_setup(void)
222 {
223 setup_ctx();
224 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true);
225 load_ctx();
226 attach_bpf(ctx.skel->progs.bench_trigger_fentry);
227 }
228
trigger_fexit_setup(void)229 static void trigger_fexit_setup(void)
230 {
231 setup_ctx();
232 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true);
233 load_ctx();
234 attach_bpf(ctx.skel->progs.bench_trigger_fexit);
235 }
236
trigger_fmodret_setup(void)237 static void trigger_fmodret_setup(void)
238 {
239 setup_ctx();
240 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
241 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
242 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true);
243 load_ctx();
244 /* override driver program */
245 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
246 attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
247 }
248
trigger_tp_setup(void)249 static void trigger_tp_setup(void)
250 {
251 setup_ctx();
252 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
253 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
254 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true);
255 load_ctx();
256 /* override driver program */
257 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
258 attach_bpf(ctx.skel->progs.bench_trigger_tp);
259 }
260
trigger_rawtp_setup(void)261 static void trigger_rawtp_setup(void)
262 {
263 setup_ctx();
264 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
265 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
266 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true);
267 load_ctx();
268 /* override driver program */
269 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
270 attach_bpf(ctx.skel->progs.bench_trigger_rawtp);
271 }
272
273 /* make sure call is not inlined and not avoided by compiler, so __weak and
274 * inline asm volatile in the body of the function
275 *
276 * There is a performance difference between uprobing at nop location vs other
277 * instructions. So use two different targets, one of which starts with nop
278 * and another doesn't.
279 *
280 * GCC doesn't generate stack setup preamble for these functions due to them
281 * having no input arguments and doing nothing in the body.
282 */
uprobe_target_nop(void)283 __nocf_check __weak void uprobe_target_nop(void)
284 {
285 asm volatile ("nop");
286 }
287
opaque_noop_func(void)288 __weak void opaque_noop_func(void)
289 {
290 }
291
uprobe_target_push(void)292 __nocf_check __weak int uprobe_target_push(void)
293 {
294 /* overhead of function call is negligible compared to uprobe
295 * triggering, so this shouldn't affect benchmark results much
296 */
297 opaque_noop_func();
298 return 1;
299 }
300
uprobe_target_ret(void)301 __nocf_check __weak void uprobe_target_ret(void)
302 {
303 asm volatile ("");
304 }
305
uprobe_producer_count(void * input)306 static void *uprobe_producer_count(void *input)
307 {
308 while (true) {
309 uprobe_target_nop();
310 inc_counter(base_hits);
311 }
312 return NULL;
313 }
314
uprobe_producer_nop(void * input)315 static void *uprobe_producer_nop(void *input)
316 {
317 while (true)
318 uprobe_target_nop();
319 return NULL;
320 }
321
uprobe_producer_push(void * input)322 static void *uprobe_producer_push(void *input)
323 {
324 while (true)
325 uprobe_target_push();
326 return NULL;
327 }
328
uprobe_producer_ret(void * input)329 static void *uprobe_producer_ret(void *input)
330 {
331 while (true)
332 uprobe_target_ret();
333 return NULL;
334 }
335
336 #ifdef __x86_64__
uprobe_target_nop5(void)337 __nocf_check __weak void uprobe_target_nop5(void)
338 {
339 asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00");
340 }
341
uprobe_producer_nop5(void * input)342 static void *uprobe_producer_nop5(void *input)
343 {
344 while (true)
345 uprobe_target_nop5();
346 return NULL;
347 }
348 #endif
349
usetup(bool use_retprobe,bool use_multi,void * target_addr)350 static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
351 {
352 size_t uprobe_offset;
353 struct bpf_link *link;
354 int err;
355
356 setup_libbpf();
357
358 ctx.skel = trigger_bench__open();
359 if (!ctx.skel) {
360 fprintf(stderr, "failed to open skeleton\n");
361 exit(1);
362 }
363
364 if (use_multi)
365 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true);
366 else
367 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
368
369 err = trigger_bench__load(ctx.skel);
370 if (err) {
371 fprintf(stderr, "failed to load skeleton\n");
372 exit(1);
373 }
374
375 uprobe_offset = get_uprobe_offset(target_addr);
376 if (use_multi) {
377 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
378 .retprobe = use_retprobe,
379 .cnt = 1,
380 .offsets = &uprobe_offset,
381 );
382 link = bpf_program__attach_uprobe_multi(
383 ctx.skel->progs.bench_trigger_uprobe_multi,
384 -1 /* all PIDs */, "/proc/self/exe", NULL, &opts);
385 ctx.skel->links.bench_trigger_uprobe_multi = link;
386 } else {
387 link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
388 use_retprobe,
389 -1 /* all PIDs */,
390 "/proc/self/exe",
391 uprobe_offset);
392 ctx.skel->links.bench_trigger_uprobe = link;
393 }
394 if (!link) {
395 fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe");
396 exit(1);
397 }
398 }
399
usermode_count_setup(void)400 static void usermode_count_setup(void)
401 {
402 ctx.usermode_counters = true;
403 }
404
uprobe_nop_setup(void)405 static void uprobe_nop_setup(void)
406 {
407 usetup(false, false /* !use_multi */, &uprobe_target_nop);
408 }
409
uretprobe_nop_setup(void)410 static void uretprobe_nop_setup(void)
411 {
412 usetup(true, false /* !use_multi */, &uprobe_target_nop);
413 }
414
uprobe_push_setup(void)415 static void uprobe_push_setup(void)
416 {
417 usetup(false, false /* !use_multi */, &uprobe_target_push);
418 }
419
uretprobe_push_setup(void)420 static void uretprobe_push_setup(void)
421 {
422 usetup(true, false /* !use_multi */, &uprobe_target_push);
423 }
424
uprobe_ret_setup(void)425 static void uprobe_ret_setup(void)
426 {
427 usetup(false, false /* !use_multi */, &uprobe_target_ret);
428 }
429
uretprobe_ret_setup(void)430 static void uretprobe_ret_setup(void)
431 {
432 usetup(true, false /* !use_multi */, &uprobe_target_ret);
433 }
434
uprobe_multi_nop_setup(void)435 static void uprobe_multi_nop_setup(void)
436 {
437 usetup(false, true /* use_multi */, &uprobe_target_nop);
438 }
439
uretprobe_multi_nop_setup(void)440 static void uretprobe_multi_nop_setup(void)
441 {
442 usetup(true, true /* use_multi */, &uprobe_target_nop);
443 }
444
uprobe_multi_push_setup(void)445 static void uprobe_multi_push_setup(void)
446 {
447 usetup(false, true /* use_multi */, &uprobe_target_push);
448 }
449
uretprobe_multi_push_setup(void)450 static void uretprobe_multi_push_setup(void)
451 {
452 usetup(true, true /* use_multi */, &uprobe_target_push);
453 }
454
uprobe_multi_ret_setup(void)455 static void uprobe_multi_ret_setup(void)
456 {
457 usetup(false, true /* use_multi */, &uprobe_target_ret);
458 }
459
uretprobe_multi_ret_setup(void)460 static void uretprobe_multi_ret_setup(void)
461 {
462 usetup(true, true /* use_multi */, &uprobe_target_ret);
463 }
464
465 #ifdef __x86_64__
uprobe_nop5_setup(void)466 static void uprobe_nop5_setup(void)
467 {
468 usetup(false, false /* !use_multi */, &uprobe_target_nop5);
469 }
470
uretprobe_nop5_setup(void)471 static void uretprobe_nop5_setup(void)
472 {
473 usetup(true, false /* !use_multi */, &uprobe_target_nop5);
474 }
475
uprobe_multi_nop5_setup(void)476 static void uprobe_multi_nop5_setup(void)
477 {
478 usetup(false, true /* use_multi */, &uprobe_target_nop5);
479 }
480
uretprobe_multi_nop5_setup(void)481 static void uretprobe_multi_nop5_setup(void)
482 {
483 usetup(true, true /* use_multi */, &uprobe_target_nop5);
484 }
485 #endif
486
487 const struct bench bench_trig_syscall_count = {
488 .name = "trig-syscall-count",
489 .validate = trigger_validate,
490 .setup = trigger_syscall_count_setup,
491 .producer_thread = trigger_producer,
492 .measure = trigger_measure,
493 .report_progress = hits_drops_report_progress,
494 .report_final = hits_drops_report_final,
495 };
496
497 /* batched (staying mostly in kernel) kprobe/fentry benchmarks */
498 #define BENCH_TRIG_KERNEL(KIND, NAME) \
499 const struct bench bench_trig_##KIND = { \
500 .name = "trig-" NAME, \
501 .setup = trigger_##KIND##_setup, \
502 .producer_thread = trigger_producer_batch, \
503 .measure = trigger_measure, \
504 .report_progress = hits_drops_report_progress, \
505 .report_final = hits_drops_report_final, \
506 .argp = &bench_trigger_batch_argp, \
507 }
508
509 BENCH_TRIG_KERNEL(kernel_count, "kernel-count");
510 BENCH_TRIG_KERNEL(kprobe, "kprobe");
511 BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
512 BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
513 BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
514 BENCH_TRIG_KERNEL(fentry, "fentry");
515 BENCH_TRIG_KERNEL(fexit, "fexit");
516 BENCH_TRIG_KERNEL(fmodret, "fmodret");
517 BENCH_TRIG_KERNEL(tp, "tp");
518 BENCH_TRIG_KERNEL(rawtp, "rawtp");
519
520 /* uprobe benchmarks */
521 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \
522 const struct bench bench_trig_##KIND = { \
523 .name = "trig-" NAME, \
524 .validate = trigger_validate, \
525 .setup = KIND##_setup, \
526 .producer_thread = uprobe_producer_##PRODUCER, \
527 .measure = trigger_measure, \
528 .report_progress = hits_drops_report_progress, \
529 .report_final = hits_drops_report_final, \
530 }
531
532 BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count");
533 BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop");
534 BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push");
535 BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
536 BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
537 BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
538 BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
539 BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop");
540 BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push");
541 BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
542 BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
543 BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
544 BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
545 #ifdef __x86_64__
546 BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");
547 BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");
548 BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");
549 BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");
550 #endif
551