1febf8a37SArnaldo Carvalho de Melo // SPDX-License-Identifier: GPL-2.0
2febf8a37SArnaldo Carvalho de Melo /*
3febf8a37SArnaldo Carvalho de Melo * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
4febf8a37SArnaldo Carvalho de Melo *
5febf8a37SArnaldo Carvalho de Melo * This exactly matches what is marshalled into the raw_syscall:sys_enter
6febf8a37SArnaldo Carvalho de Melo * payload expected by the 'perf trace' beautifiers.
7febf8a37SArnaldo Carvalho de Melo */
8febf8a37SArnaldo Carvalho de Melo
9*29d16de2SArnaldo Carvalho de Melo #include "vmlinux.h"
1014e4b9f4SIan Rogers
1159f3bd78SArnaldo Carvalho de Melo #include <bpf/bpf_helpers.h>
1214e4b9f4SIan Rogers #include <linux/limits.h>
13262b54b6SArnaldo Carvalho de Melo
14262b54b6SArnaldo Carvalho de Melo #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
15262b54b6SArnaldo Carvalho de Melo #define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
16262b54b6SArnaldo Carvalho de Melo
17262b54b6SArnaldo Carvalho de Melo /**
18262b54b6SArnaldo Carvalho de Melo * is_power_of_2() - check if a value is a power of two
19262b54b6SArnaldo Carvalho de Melo * @n: the value to check
20262b54b6SArnaldo Carvalho de Melo *
21262b54b6SArnaldo Carvalho de Melo * Determine whether some value is a power of two, where zero is *not*
22262b54b6SArnaldo Carvalho de Melo * considered a power of two. Return: true if @n is a power of 2, otherwise
235e6da6beSIan Rogers * false.
245e6da6beSIan Rogers */
25febf8a37SArnaldo Carvalho de Melo #define is_power_of_2(n) (n != 0 && ((n & (n - 1)) == 0))
2614e4b9f4SIan Rogers
2714e4b9f4SIan Rogers #define MAX_CPUS 4096
2814e4b9f4SIan Rogers
2914e4b9f4SIan Rogers #define TRACE_AUG_MAX_BUF 32 /* for buffer augmentation in perf trace */
305e6da6beSIan Rogers
3114e4b9f4SIan Rogers /* bpf-output associated map */
32febf8a37SArnaldo Carvalho de Melo struct __augmented_syscalls__ {
334cae8675SArnaldo Carvalho de Melo __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
343803a229SArnaldo Carvalho de Melo __type(key, int);
353803a229SArnaldo Carvalho de Melo __type(value, __u32);
363803a229SArnaldo Carvalho de Melo __uint(max_entries, MAX_CPUS);
373803a229SArnaldo Carvalho de Melo } __augmented_syscalls__ SEC(".maps");
3814e4b9f4SIan Rogers
3914e4b9f4SIan Rogers /*
4014e4b9f4SIan Rogers * What to augment at entry?
4114e4b9f4SIan Rogers *
4214e4b9f4SIan Rogers * Pointer arg payloads (filenames, etc) passed from userspace to the kernel
4314e4b9f4SIan Rogers */
443803a229SArnaldo Carvalho de Melo struct syscalls_sys_enter {
453803a229SArnaldo Carvalho de Melo __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
463803a229SArnaldo Carvalho de Melo __type(key, __u32);
473803a229SArnaldo Carvalho de Melo __type(value, __u32);
483803a229SArnaldo Carvalho de Melo __uint(max_entries, 512);
493803a229SArnaldo Carvalho de Melo } syscalls_sys_enter SEC(".maps");
5014e4b9f4SIan Rogers
5114e4b9f4SIan Rogers /*
5214e4b9f4SIan Rogers * What to augment at exit?
5314e4b9f4SIan Rogers *
5414e4b9f4SIan Rogers * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace.
5514e4b9f4SIan Rogers */
563803a229SArnaldo Carvalho de Melo struct syscalls_sys_exit {
57febf8a37SArnaldo Carvalho de Melo __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
58febf8a37SArnaldo Carvalho de Melo __type(key, __u32);
59febf8a37SArnaldo Carvalho de Melo __type(value, __u32);
60febf8a37SArnaldo Carvalho de Melo __uint(max_entries, 512);
61febf8a37SArnaldo Carvalho de Melo } syscalls_sys_exit SEC(".maps");
62febf8a37SArnaldo Carvalho de Melo
63febf8a37SArnaldo Carvalho de Melo struct syscall_enter_args {
64febf8a37SArnaldo Carvalho de Melo unsigned long long common_tp_fields;
65febf8a37SArnaldo Carvalho de Melo long syscall_nr;
66febf8a37SArnaldo Carvalho de Melo unsigned long args[6];
67febf8a37SArnaldo Carvalho de Melo };
68febf8a37SArnaldo Carvalho de Melo
692ad926dbSArnaldo Carvalho de Melo struct syscall_exit_args {
7079ef68c7SArnaldo Carvalho de Melo unsigned long long common_tp_fields;
71deaf4da4SArnaldo Carvalho de Melo long syscall_nr;
7259f3bd78SArnaldo Carvalho de Melo long ret;
7379ef68c7SArnaldo Carvalho de Melo };
7479ef68c7SArnaldo Carvalho de Melo
7514e4b9f4SIan Rogers /*
7614e4b9f4SIan Rogers * Desired design of maximum size and alignment (see RFC2553)
7714e4b9f4SIan Rogers */
7814e4b9f4SIan Rogers #define SS_MAXSIZE 128 /* Implementation specific max size */
7914e4b9f4SIan Rogers
8014e4b9f4SIan Rogers typedef unsigned short sa_family_t;
8114e4b9f4SIan Rogers
8214e4b9f4SIan Rogers /*
8314e4b9f4SIan Rogers * FIXME: Should come from system headers
8414e4b9f4SIan Rogers *
8514e4b9f4SIan Rogers * The definition uses anonymous union and struct in order to control the
8614e4b9f4SIan Rogers * default alignment.
8714e4b9f4SIan Rogers */
8814e4b9f4SIan Rogers struct sockaddr_storage {
8914e4b9f4SIan Rogers union {
9014e4b9f4SIan Rogers struct {
9114e4b9f4SIan Rogers sa_family_t ss_family; /* address family */
9214e4b9f4SIan Rogers /* Following field(s) are implementation specific */
9314e4b9f4SIan Rogers char __data[SS_MAXSIZE - sizeof(unsigned short)];
9414e4b9f4SIan Rogers /* space to achieve desired size, */
9514e4b9f4SIan Rogers /* _SS_MAXSIZE value minus size of ss_family */
9614e4b9f4SIan Rogers };
9714e4b9f4SIan Rogers void *__align; /* implementation specific desired alignment */
9814e4b9f4SIan Rogers };
9914e4b9f4SIan Rogers };
10014e4b9f4SIan Rogers
10114e4b9f4SIan Rogers struct augmented_arg {
10214e4b9f4SIan Rogers unsigned int size;
10314e4b9f4SIan Rogers int err;
10414e4b9f4SIan Rogers union {
10514e4b9f4SIan Rogers char value[PATH_MAX];
10614e4b9f4SIan Rogers struct sockaddr_storage saddr;
107ed9a77baSArnaldo Carvalho de Melo };
1086f563674SArnaldo Carvalho de Melo };
10959f3bd78SArnaldo Carvalho de Melo
110212b9ab6SArnaldo Carvalho de Melo struct pids_filtered {
1116f563674SArnaldo Carvalho de Melo __uint(type, BPF_MAP_TYPE_HASH);
1122ad926dbSArnaldo Carvalho de Melo __type(key, pid_t);
113212b9ab6SArnaldo Carvalho de Melo __type(value, bool);
114212b9ab6SArnaldo Carvalho de Melo __uint(max_entries, 64);
115a9cd6c67SArnaldo Carvalho de Melo } pids_filtered SEC(".maps");
11659f3bd78SArnaldo Carvalho de Melo
1176f563674SArnaldo Carvalho de Melo struct augmented_args_payload {
11859f3bd78SArnaldo Carvalho de Melo struct syscall_enter_args args;
119c265784dSArnaldo Carvalho de Melo struct augmented_arg arg, arg2; // We have to reserve space for two arguments (rename, etc)
12014e4b9f4SIan Rogers };
12114e4b9f4SIan Rogers
12214e4b9f4SIan Rogers // We need more tmp space than the BPF stack can give us
12314e4b9f4SIan Rogers struct augmented_args_tmp {
12414e4b9f4SIan Rogers __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
12514e4b9f4SIan Rogers __type(key, int);
12659f3bd78SArnaldo Carvalho de Melo __type(value, struct augmented_args_payload);
127c265784dSArnaldo Carvalho de Melo __uint(max_entries, 1);
128c265784dSArnaldo Carvalho de Melo } augmented_args_tmp SEC(".maps");
129c265784dSArnaldo Carvalho de Melo
130c265784dSArnaldo Carvalho de Melo struct beauty_map_enter {
131c265784dSArnaldo Carvalho de Melo __uint(type, BPF_MAP_TYPE_HASH);
132c265784dSArnaldo Carvalho de Melo __type(key, int);
133e051c2f6SArnaldo Carvalho de Melo __type(value, __u32[6]);
134e051c2f6SArnaldo Carvalho de Melo __uint(max_entries, 512);
135e051c2f6SArnaldo Carvalho de Melo } beauty_map_enter SEC(".maps");
13614e4b9f4SIan Rogers
137e051c2f6SArnaldo Carvalho de Melo struct beauty_payload_enter {
138e051c2f6SArnaldo Carvalho de Melo struct syscall_enter_args args;
1390c95a7ffSArnaldo Carvalho de Melo struct augmented_arg aug_args[6];
1402ad926dbSArnaldo Carvalho de Melo };
1410c95a7ffSArnaldo Carvalho de Melo
1422ad926dbSArnaldo Carvalho de Melo struct beauty_payload_enter_map {
1435069211eSThomas Richter __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
1440c95a7ffSArnaldo Carvalho de Melo __type(key, int);
1452ad926dbSArnaldo Carvalho de Melo __type(value, struct beauty_payload_enter);
146deaf4da4SArnaldo Carvalho de Melo __uint(max_entries, 1);
147deaf4da4SArnaldo Carvalho de Melo } beauty_payload_enter_map SEC(".maps");
1482ad926dbSArnaldo Carvalho de Melo
augmented_args_payload(void)149deaf4da4SArnaldo Carvalho de Melo static inline struct augmented_args_payload *augmented_args_payload(void)
1502ad926dbSArnaldo Carvalho de Melo {
1512ad926dbSArnaldo Carvalho de Melo int key = 0;
1527d964231SArnaldo Carvalho de Melo return bpf_map_lookup_elem(&augmented_args_tmp, &key);
1532ad926dbSArnaldo Carvalho de Melo }
1542ad926dbSArnaldo Carvalho de Melo
augmented__output(void * ctx,struct augmented_args_payload * args,int len)155deaf4da4SArnaldo Carvalho de Melo static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len)
156deaf4da4SArnaldo Carvalho de Melo {
157deaf4da4SArnaldo Carvalho de Melo /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
158deaf4da4SArnaldo Carvalho de Melo return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
159deaf4da4SArnaldo Carvalho de Melo }
1602ad926dbSArnaldo Carvalho de Melo
augmented__beauty_output(void * ctx,void * data,int len)1612ad926dbSArnaldo Carvalho de Melo static inline int augmented__beauty_output(void *ctx, void *data, int len)
1620c95a7ffSArnaldo Carvalho de Melo {
1630c95a7ffSArnaldo Carvalho de Melo return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, data, len);
1642ad926dbSArnaldo Carvalho de Melo }
1650c95a7ffSArnaldo Carvalho de Melo
1660c95a7ffSArnaldo Carvalho de Melo static inline
augmented_arg__read_str(struct augmented_arg * augmented_arg,const void * arg,unsigned int arg_len)1675e6da6beSIan Rogers unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
1685834da7fSArnaldo Carvalho de Melo {
1695834da7fSArnaldo Carvalho de Melo unsigned int augmented_len = sizeof(*augmented_arg);
1705834da7fSArnaldo Carvalho de Melo int string_len = bpf_probe_read_user_str(&augmented_arg->value, arg_len, arg);
1715834da7fSArnaldo Carvalho de Melo
1725834da7fSArnaldo Carvalho de Melo augmented_arg->size = augmented_arg->err = 0;
1736ff8fff4SArnaldo Carvalho de Melo /*
174212b9ab6SArnaldo Carvalho de Melo * probe_read_str may return < 0, e.g. -EFAULT
1756f563674SArnaldo Carvalho de Melo * So we leave that in the augmented_arg->size that userspace will
1766ff8fff4SArnaldo Carvalho de Melo */
1776ff8fff4SArnaldo Carvalho de Melo if (string_len > 0) {
1786ff8fff4SArnaldo Carvalho de Melo augmented_len -= sizeof(augmented_arg->value) - string_len;
1795e6da6beSIan Rogers _Static_assert(is_power_of_2(sizeof(augmented_arg->value)), "sizeof(augmented_arg->value) needs to be a power of two");
180212b9ab6SArnaldo Carvalho de Melo augmented_len &= sizeof(augmented_arg->value) - 1;
181212b9ab6SArnaldo Carvalho de Melo augmented_arg->size = string_len;
182c265784dSArnaldo Carvalho de Melo } else {
183212b9ab6SArnaldo Carvalho de Melo /*
184212b9ab6SArnaldo Carvalho de Melo * So that username notice the error while still being able
185212b9ab6SArnaldo Carvalho de Melo * to skip this augmented arg record
186212b9ab6SArnaldo Carvalho de Melo */
187212b9ab6SArnaldo Carvalho de Melo augmented_arg->err = string_len;
188212b9ab6SArnaldo Carvalho de Melo augmented_len = offsetof(struct augmented_arg, value);
189212b9ab6SArnaldo Carvalho de Melo }
190262b54b6SArnaldo Carvalho de Melo
19118364804SArnaldo Carvalho de Melo return augmented_len;
192212b9ab6SArnaldo Carvalho de Melo }
1935069211eSThomas Richter
194212b9ab6SArnaldo Carvalho de Melo SEC("tp/raw_syscalls/sys_enter")
syscall_unaugmented(struct syscall_enter_args * args)195e051c2f6SArnaldo Carvalho de Melo int syscall_unaugmented(struct syscall_enter_args *args)
196212b9ab6SArnaldo Carvalho de Melo {
197212b9ab6SArnaldo Carvalho de Melo return 1;
1985e6da6beSIan Rogers }
1993c475bc0SArnaldo Carvalho de Melo
2003c475bc0SArnaldo Carvalho de Melo /*
201c265784dSArnaldo Carvalho de Melo * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in
2023c475bc0SArnaldo Carvalho de Melo * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go
2033c475bc0SArnaldo Carvalho de Melo * on from there, reading the first syscall arg as a string, i.e. open's
2043c475bc0SArnaldo Carvalho de Melo * filename.
2053c475bc0SArnaldo Carvalho de Melo */
2063c475bc0SArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_connect")
sys_enter_connect(struct syscall_enter_args * args)2073c475bc0SArnaldo Carvalho de Melo int sys_enter_connect(struct syscall_enter_args *args)
2083c475bc0SArnaldo Carvalho de Melo {
20918364804SArnaldo Carvalho de Melo struct augmented_args_payload *augmented_args = augmented_args_payload();
2103c475bc0SArnaldo Carvalho de Melo const void *sockaddr_arg = (const void *)args->args[1];
2115069211eSThomas Richter unsigned int socklen = args->args[2];
2123c475bc0SArnaldo Carvalho de Melo unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
213e051c2f6SArnaldo Carvalho de Melo
2143c475bc0SArnaldo Carvalho de Melo if (augmented_args == NULL)
2153c475bc0SArnaldo Carvalho de Melo return 1; /* Failure: don't filter */
2165e6da6beSIan Rogers
2176ff8fff4SArnaldo Carvalho de Melo _Static_assert(is_power_of_2(sizeof(augmented_args->arg.saddr)), "sizeof(augmented_args->arg.saddr) needs to be a power of two");
2186ff8fff4SArnaldo Carvalho de Melo socklen &= sizeof(augmented_args->arg.saddr) - 1;
219c265784dSArnaldo Carvalho de Melo
2206ff8fff4SArnaldo Carvalho de Melo bpf_probe_read_user(&augmented_args->arg.saddr, socklen, sockaddr_arg);
2216ff8fff4SArnaldo Carvalho de Melo augmented_args->arg.size = socklen;
2226ff8fff4SArnaldo Carvalho de Melo augmented_args->arg.err = 0;
2236ff8fff4SArnaldo Carvalho de Melo
2246ff8fff4SArnaldo Carvalho de Melo return augmented__output(args, augmented_args, len + socklen);
2256ff8fff4SArnaldo Carvalho de Melo }
2262ad926dbSArnaldo Carvalho de Melo
2276ff8fff4SArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_sendto")
sys_enter_sendto(struct syscall_enter_args * args)228e051c2f6SArnaldo Carvalho de Melo int sys_enter_sendto(struct syscall_enter_args *args)
2296ff8fff4SArnaldo Carvalho de Melo {
2306ff8fff4SArnaldo Carvalho de Melo struct augmented_args_payload *augmented_args = augmented_args_payload();
2315e6da6beSIan Rogers const void *sockaddr_arg = (const void *)args->args[4];
232236dd583SArnaldo Carvalho de Melo unsigned int socklen = args->args[5];
233236dd583SArnaldo Carvalho de Melo unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
234c265784dSArnaldo Carvalho de Melo
235236dd583SArnaldo Carvalho de Melo if (augmented_args == NULL)
236236dd583SArnaldo Carvalho de Melo return 1; /* Failure: don't filter */
237236dd583SArnaldo Carvalho de Melo
238236dd583SArnaldo Carvalho de Melo socklen &= sizeof(augmented_args->arg.saddr) - 1;
239236dd583SArnaldo Carvalho de Melo
240236dd583SArnaldo Carvalho de Melo bpf_probe_read_user(&augmented_args->arg.saddr, socklen, sockaddr_arg);
2412ad926dbSArnaldo Carvalho de Melo
242236dd583SArnaldo Carvalho de Melo return augmented__output(args, augmented_args, len + socklen);
243e051c2f6SArnaldo Carvalho de Melo }
244236dd583SArnaldo Carvalho de Melo
245236dd583SArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_open")
sys_enter_open(struct syscall_enter_args * args)2465e6da6beSIan Rogers int sys_enter_open(struct syscall_enter_args *args)
247cfa9ac73SArnaldo Carvalho de Melo {
248cfa9ac73SArnaldo Carvalho de Melo struct augmented_args_payload *augmented_args = augmented_args_payload();
249c265784dSArnaldo Carvalho de Melo const void *filename_arg = (const void *)args->args[0];
250cfa9ac73SArnaldo Carvalho de Melo unsigned int len = sizeof(augmented_args->args);
251cfa9ac73SArnaldo Carvalho de Melo
252cfa9ac73SArnaldo Carvalho de Melo if (augmented_args == NULL)
253cfa9ac73SArnaldo Carvalho de Melo return 1; /* Failure: don't filter */
254cfa9ac73SArnaldo Carvalho de Melo
255cfa9ac73SArnaldo Carvalho de Melo len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
256cfa9ac73SArnaldo Carvalho de Melo
2572ad926dbSArnaldo Carvalho de Melo return augmented__output(args, augmented_args, len);
2582ad926dbSArnaldo Carvalho de Melo }
259cfa9ac73SArnaldo Carvalho de Melo
260e051c2f6SArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_openat")
sys_enter_openat(struct syscall_enter_args * args)261cfa9ac73SArnaldo Carvalho de Melo int sys_enter_openat(struct syscall_enter_args *args)
262cfa9ac73SArnaldo Carvalho de Melo {
2635e6da6beSIan Rogers struct augmented_args_payload *augmented_args = augmented_args_payload();
2648d5da264SArnaldo Carvalho de Melo const void *filename_arg = (const void *)args->args[1];
2658d5da264SArnaldo Carvalho de Melo unsigned int len = sizeof(augmented_args->args);
266c265784dSArnaldo Carvalho de Melo
2678d5da264SArnaldo Carvalho de Melo if (augmented_args == NULL)
2688d5da264SArnaldo Carvalho de Melo return 1; /* Failure: don't filter */
2698d5da264SArnaldo Carvalho de Melo
2708d5da264SArnaldo Carvalho de Melo len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
2718d5da264SArnaldo Carvalho de Melo
2728d5da264SArnaldo Carvalho de Melo return augmented__output(args, augmented_args, len);
2738d5da264SArnaldo Carvalho de Melo }
2742ad926dbSArnaldo Carvalho de Melo
2752ad926dbSArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_rename")
sys_enter_rename(struct syscall_enter_args * args)2768d5da264SArnaldo Carvalho de Melo int sys_enter_rename(struct syscall_enter_args *args)
277e051c2f6SArnaldo Carvalho de Melo {
2788d5da264SArnaldo Carvalho de Melo struct augmented_args_payload *augmented_args = augmented_args_payload();
2798d5da264SArnaldo Carvalho de Melo const void *oldpath_arg = (const void *)args->args[0],
280a9cd6c67SArnaldo Carvalho de Melo *newpath_arg = (const void *)args->args[1];
281a9cd6c67SArnaldo Carvalho de Melo unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
282a9cd6c67SArnaldo Carvalho de Melo
283a9cd6c67SArnaldo Carvalho de Melo if (augmented_args == NULL)
284a9cd6c67SArnaldo Carvalho de Melo return 1; /* Failure: don't filter */
285a9cd6c67SArnaldo Carvalho de Melo
286a9cd6c67SArnaldo Carvalho de Melo len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
287a9cd6c67SArnaldo Carvalho de Melo
288a9cd6c67SArnaldo Carvalho de Melo oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
289a9cd6c67SArnaldo Carvalho de Melo augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
290a9cd6c67SArnaldo Carvalho de Melo len += augmented_args->arg.size;
2915e6da6beSIan Rogers
292a9cd6c67SArnaldo Carvalho de Melo /* Every read from userspace is limited to value size */
293a9cd6c67SArnaldo Carvalho de Melo if (augmented_args->arg.size > sizeof(augmented_args->arg.value))
294a9cd6c67SArnaldo Carvalho de Melo return 1; /* Failure: don't filter */
295a9cd6c67SArnaldo Carvalho de Melo
296a9cd6c67SArnaldo Carvalho de Melo struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
297a9cd6c67SArnaldo Carvalho de Melo
298a9cd6c67SArnaldo Carvalho de Melo newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
299a9cd6c67SArnaldo Carvalho de Melo arg2->size = newpath_len;
300a9cd6c67SArnaldo Carvalho de Melo
3015069211eSThomas Richter len += newpath_len;
302a9cd6c67SArnaldo Carvalho de Melo
303a9cd6c67SArnaldo Carvalho de Melo return augmented__output(args, augmented_args, len);
304a9cd6c67SArnaldo Carvalho de Melo }
305a9cd6c67SArnaldo Carvalho de Melo
306a9cd6c67SArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_renameat2")
sys_enter_renameat2(struct syscall_enter_args * args)307a9cd6c67SArnaldo Carvalho de Melo int sys_enter_renameat2(struct syscall_enter_args *args)
308a9cd6c67SArnaldo Carvalho de Melo {
309a9cd6c67SArnaldo Carvalho de Melo struct augmented_args_payload *augmented_args = augmented_args_payload();
310a9cd6c67SArnaldo Carvalho de Melo const void *oldpath_arg = (const void *)args->args[1],
311a9cd6c67SArnaldo Carvalho de Melo *newpath_arg = (const void *)args->args[3];
312a9cd6c67SArnaldo Carvalho de Melo unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
313a9cd6c67SArnaldo Carvalho de Melo
314a9cd6c67SArnaldo Carvalho de Melo if (augmented_args == NULL)
3155069211eSThomas Richter return 1; /* Failure: don't filter */
316a9cd6c67SArnaldo Carvalho de Melo
317a9cd6c67SArnaldo Carvalho de Melo len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
318a9cd6c67SArnaldo Carvalho de Melo
319a9cd6c67SArnaldo Carvalho de Melo oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
320a9cd6c67SArnaldo Carvalho de Melo augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
321a9cd6c67SArnaldo Carvalho de Melo len += augmented_args->arg.size;
322a9cd6c67SArnaldo Carvalho de Melo
3235e6da6beSIan Rogers /* Every read from userspace is limited to value size */
3246ac73820SArnaldo Carvalho de Melo if (augmented_args->arg.size > sizeof(augmented_args->arg.value))
3256ac73820SArnaldo Carvalho de Melo return 1; /* Failure: don't filter */
3266ac73820SArnaldo Carvalho de Melo
3276ac73820SArnaldo Carvalho de Melo struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
3286ac73820SArnaldo Carvalho de Melo
3296ac73820SArnaldo Carvalho de Melo newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
3306ac73820SArnaldo Carvalho de Melo arg2->size = newpath_len;
3316ac73820SArnaldo Carvalho de Melo
3326ac73820SArnaldo Carvalho de Melo len += newpath_len;
3336ac73820SArnaldo Carvalho de Melo
3346ac73820SArnaldo Carvalho de Melo return augmented__output(args, augmented_args, len);
3356ac73820SArnaldo Carvalho de Melo }
3366ac73820SArnaldo Carvalho de Melo
3375069211eSThomas Richter #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
3386ac73820SArnaldo Carvalho de Melo
3396ac73820SArnaldo Carvalho de Melo // we need just the start, get the size to then copy it
3406ac73820SArnaldo Carvalho de Melo struct perf_event_attr_size {
3416ac73820SArnaldo Carvalho de Melo __u32 type;
3426ac73820SArnaldo Carvalho de Melo /*
3436ac73820SArnaldo Carvalho de Melo * Size of the attr structure, for fwd/bwd compat.
34414e4b9f4SIan Rogers */
34514e4b9f4SIan Rogers __u32 size;
34614e4b9f4SIan Rogers };
34714e4b9f4SIan Rogers
34814e4b9f4SIan Rogers SEC("tp/syscalls/sys_enter_perf_event_open")
sys_enter_perf_event_open(struct syscall_enter_args * args)34914e4b9f4SIan Rogers int sys_enter_perf_event_open(struct syscall_enter_args *args)
35014e4b9f4SIan Rogers {
35114e4b9f4SIan Rogers struct augmented_args_payload *augmented_args = augmented_args_payload();
35214e4b9f4SIan Rogers const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read;
35314e4b9f4SIan Rogers unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
3545e6da6beSIan Rogers
355febf8a37SArnaldo Carvalho de Melo if (augmented_args == NULL)
356febf8a37SArnaldo Carvalho de Melo goto failure;
3576f563674SArnaldo Carvalho de Melo
358deaf4da4SArnaldo Carvalho de Melo if (bpf_probe_read_user(&augmented_args->arg.value, sizeof(*attr), attr) < 0)
359deaf4da4SArnaldo Carvalho de Melo goto failure;
360deaf4da4SArnaldo Carvalho de Melo
361deaf4da4SArnaldo Carvalho de Melo attr_read = (const struct perf_event_attr_size *)augmented_args->arg.value;
362deaf4da4SArnaldo Carvalho de Melo
363deaf4da4SArnaldo Carvalho de Melo __u32 size = attr_read->size;
364deaf4da4SArnaldo Carvalho de Melo
365deaf4da4SArnaldo Carvalho de Melo if (!size)
366deaf4da4SArnaldo Carvalho de Melo size = PERF_ATTR_SIZE_VER0;
36759f3bd78SArnaldo Carvalho de Melo
36801128065SArnaldo Carvalho de Melo if (size > sizeof(augmented_args->arg.value))
36901128065SArnaldo Carvalho de Melo goto failure;
37001128065SArnaldo Carvalho de Melo
371c265784dSArnaldo Carvalho de Melo // Now that we read attr->size and tested it against the size limits, read it completely
37259f3bd78SArnaldo Carvalho de Melo if (bpf_probe_read_user(&augmented_args->arg.value, size, attr) < 0)
37359f3bd78SArnaldo Carvalho de Melo goto failure;
374febf8a37SArnaldo Carvalho de Melo
3755069211eSThomas Richter return augmented__output(args, augmented_args, len + size);
376b27b38edSArnaldo Carvalho de Melo failure:
377bf134ca6SArnaldo Carvalho de Melo return 1; /* Failure: don't filter */
378bf134ca6SArnaldo Carvalho de Melo }
379bf134ca6SArnaldo Carvalho de Melo
3804d39c89fSIngo Molnar SEC("tp/syscalls/sys_enter_clock_nanosleep")
sys_enter_clock_nanosleep(struct syscall_enter_args * args)381bf134ca6SArnaldo Carvalho de Melo int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
382bf134ca6SArnaldo Carvalho de Melo {
383bf134ca6SArnaldo Carvalho de Melo struct augmented_args_payload *augmented_args = augmented_args_payload();
384bf134ca6SArnaldo Carvalho de Melo const void *rqtp_arg = (const void *)args->args[2];
385b27b38edSArnaldo Carvalho de Melo unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
386febf8a37SArnaldo Carvalho de Melo __u32 size = sizeof(struct timespec64);
387febf8a37SArnaldo Carvalho de Melo
3885e6da6beSIan Rogers if (augmented_args == NULL)
389febf8a37SArnaldo Carvalho de Melo goto failure;
390febf8a37SArnaldo Carvalho de Melo
391b27b38edSArnaldo Carvalho de Melo if (size > sizeof(augmented_args->arg.value))
392b27b38edSArnaldo Carvalho de Melo goto failure;
393b27b38edSArnaldo Carvalho de Melo
394b27b38edSArnaldo Carvalho de Melo bpf_probe_read_user(&augmented_args->arg.value, size, rqtp_arg);
395b27b38edSArnaldo Carvalho de Melo
3965069211eSThomas Richter return augmented__output(args, augmented_args, len + size);
397bf134ca6SArnaldo Carvalho de Melo failure:
398bf134ca6SArnaldo Carvalho de Melo return 1; /* Failure: don't filter */
399bf134ca6SArnaldo Carvalho de Melo }
4004d39c89fSIngo Molnar
401bf134ca6SArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_nanosleep")
sys_enter_nanosleep(struct syscall_enter_args * args)402bf134ca6SArnaldo Carvalho de Melo int sys_enter_nanosleep(struct syscall_enter_args *args)
403bf134ca6SArnaldo Carvalho de Melo {
404bf134ca6SArnaldo Carvalho de Melo struct augmented_args_payload *augmented_args = augmented_args_payload();
405bf134ca6SArnaldo Carvalho de Melo const void *req_arg = (const void *)args->args[0];
406b27b38edSArnaldo Carvalho de Melo unsigned int len = sizeof(augmented_args->args);
407febf8a37SArnaldo Carvalho de Melo __u32 size = sizeof(struct timespec64);
408febf8a37SArnaldo Carvalho de Melo
40914e4b9f4SIan Rogers if (augmented_args == NULL)
410 goto failure;
411
412 if (size > sizeof(augmented_args->arg.value))
413 goto failure;
414
415 bpf_probe_read_user(&augmented_args->arg.value, size, req_arg);
416
417 return augmented__output(args, augmented_args, len + size);
418 failure:
419 return 1; /* Failure: don't filter */
420 }
421
getpid(void)422 static pid_t getpid(void)
423 {
424 return bpf_get_current_pid_tgid();
425 }
426
pid_filter__has(struct pids_filtered * pids,pid_t pid)427 static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
428 {
429 return bpf_map_lookup_elem(pids, &pid) != NULL;
430 }
431
augment_sys_enter(void * ctx,struct syscall_enter_args * args)432 static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
433 {
434 bool augmented, do_output = false;
435 int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
436 u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */
437 s64 aug_size, size;
438 unsigned int nr, *beauty_map;
439 struct beauty_payload_enter *payload;
440 void *arg, *payload_offset;
441
442 /* fall back to do predefined tail call */
443 if (args == NULL)
444 return 1;
445
446 /* use syscall number to get beauty_map entry */
447 nr = (__u32)args->syscall_nr;
448 beauty_map = bpf_map_lookup_elem(&beauty_map_enter, &nr);
449
450 /* set up payload for output */
451 payload = bpf_map_lookup_elem(&beauty_payload_enter_map, &zero);
452 payload_offset = (void *)&payload->aug_args;
453
454 if (beauty_map == NULL || payload == NULL)
455 return 1;
456
457 /* copy the sys_enter header, which has the syscall_nr */
458 __builtin_memcpy(&payload->args, args, sizeof(struct syscall_enter_args));
459
460 /*
461 * Determine what type of argument and how many bytes to read from user space, using the
462 * value in the beauty_map. This is the relation of parameter type and its corresponding
463 * value in the beauty map, and how many bytes we read eventually:
464 *
465 * string: 1 -> size of string
466 * struct: size of struct -> size of struct
467 * buffer: -1 * (index of paired len) -> value of paired len (maximum: TRACE_AUG_MAX_BUF)
468 */
469 for (int i = 0; i < 6; i++) {
470 arg = (void *)args->args[i];
471 augmented = false;
472 size = beauty_map[i];
473 aug_size = size; /* size of the augmented data read from user space */
474
475 if (size == 0 || arg == NULL)
476 continue;
477
478 if (size == 1) { /* string */
479 aug_size = bpf_probe_read_user_str(((struct augmented_arg *)payload_offset)->value, value_size, arg);
480 /* minimum of 0 to pass the verifier */
481 if (aug_size < 0)
482 aug_size = 0;
483
484 augmented = true;
485 } else if (size > 0 && size <= value_size) { /* struct */
486 if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg))
487 augmented = true;
488 } else if ((int)size < 0 && size >= -6) { /* buffer */
489 index = -(size + 1);
490 barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick.
491 index &= 7; // Satisfy the bounds checking with the verifier in some kernels.
492 aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index];
493
494 if (aug_size > 0) {
495 if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg))
496 augmented = true;
497 }
498 }
499
500 /* Augmented data size is limited to sizeof(augmented_arg->unnamed union with value field) */
501 if (aug_size > value_size)
502 aug_size = value_size;
503
504 /* write data to payload */
505 if (augmented) {
506 int written = offsetof(struct augmented_arg, value) + aug_size;
507
508 if (written < 0 || written > sizeof(struct augmented_arg))
509 return 1;
510
511 ((struct augmented_arg *)payload_offset)->size = aug_size;
512 output += written;
513 payload_offset += written;
514 do_output = true;
515 }
516 }
517
518 if (!do_output || (sizeof(struct syscall_enter_args) + output) > sizeof(struct beauty_payload_enter))
519 return 1;
520
521 return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output);
522 }
523
524 SEC("tp/raw_syscalls/sys_enter")
sys_enter(struct syscall_enter_args * args)525 int sys_enter(struct syscall_enter_args *args)
526 {
527 struct augmented_args_payload *augmented_args;
528 /*
529 * We start len, the amount of data that will be in the perf ring
530 * buffer, if this is not filtered out by one of pid_filter__has(),
531 * syscall->enabled, etc, with the non-augmented raw syscall payload,
532 * i.e. sizeof(augmented_args->args).
533 *
534 * We'll add to this as we add augmented syscalls right after that
535 * initial, non-augmented raw_syscalls:sys_enter payload.
536 */
537
538 if (pid_filter__has(&pids_filtered, getpid()))
539 return 0;
540
541 augmented_args = augmented_args_payload();
542 if (augmented_args == NULL)
543 return 1;
544
545 bpf_probe_read_kernel(&augmented_args->args, sizeof(augmented_args->args), args);
546
547 /*
548 * Jump to syscall specific augmenter, even if the default one,
549 * "!raw_syscalls:unaugmented" that will just return 1 to return the
550 * unaugmented tracepoint payload.
551 */
552 if (augment_sys_enter(args, &augmented_args->args))
553 bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
554
555 // If not found on the PROG_ARRAY syscalls map, then we're filtering it:
556 return 0;
557 }
558
559 SEC("tp/raw_syscalls/sys_exit")
sys_exit(struct syscall_exit_args * args)560 int sys_exit(struct syscall_exit_args *args)
561 {
562 struct syscall_exit_args exit_args;
563
564 if (pid_filter__has(&pids_filtered, getpid()))
565 return 0;
566
567 bpf_probe_read_kernel(&exit_args, sizeof(exit_args), args);
568 /*
569 * Jump to syscall specific return augmenter, even if the default one,
570 * "!raw_syscalls:unaugmented" that will just return 1 to return the
571 * unaugmented tracepoint payload.
572 */
573 bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr);
574 /*
575 * If not found on the PROG_ARRAY syscalls map, then we're filtering it:
576 */
577 return 0;
578 }
579
580 char _license[] SEC("license") = "GPL";
581