xref: /linux/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c (revision 29d16de26df17e94dd0310dc83c37f82b48a59cf)
1febf8a37SArnaldo Carvalho de Melo // SPDX-License-Identifier: GPL-2.0
2febf8a37SArnaldo Carvalho de Melo /*
3febf8a37SArnaldo Carvalho de Melo  * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
4febf8a37SArnaldo Carvalho de Melo  *
5febf8a37SArnaldo Carvalho de Melo  * This exactly matches what is marshalled into the raw_syscall:sys_enter
6febf8a37SArnaldo Carvalho de Melo  * payload expected by the 'perf trace' beautifiers.
7febf8a37SArnaldo Carvalho de Melo  */
8febf8a37SArnaldo Carvalho de Melo 
9*29d16de2SArnaldo Carvalho de Melo #include "vmlinux.h"
1014e4b9f4SIan Rogers 
1159f3bd78SArnaldo Carvalho de Melo #include <bpf/bpf_helpers.h>
1214e4b9f4SIan Rogers #include <linux/limits.h>
13262b54b6SArnaldo Carvalho de Melo 
14262b54b6SArnaldo Carvalho de Melo #define PERF_ALIGN(x, a)        __PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
15262b54b6SArnaldo Carvalho de Melo #define __PERF_ALIGN_MASK(x, mask)      (((x)+(mask))&~(mask))
16262b54b6SArnaldo Carvalho de Melo 
17262b54b6SArnaldo Carvalho de Melo /**
18262b54b6SArnaldo Carvalho de Melo  * is_power_of_2() - check if a value is a power of two
19262b54b6SArnaldo Carvalho de Melo  * @n: the value to check
20262b54b6SArnaldo Carvalho de Melo  *
21262b54b6SArnaldo Carvalho de Melo  * Determine whether some value is a power of two, where zero is *not*
22262b54b6SArnaldo Carvalho de Melo  * considered a power of two.  Return: true if @n is a power of 2, otherwise
235e6da6beSIan Rogers  * false.
245e6da6beSIan Rogers  */
25febf8a37SArnaldo Carvalho de Melo #define is_power_of_2(n) (n != 0 && ((n & (n - 1)) == 0))
2614e4b9f4SIan Rogers 
2714e4b9f4SIan Rogers #define MAX_CPUS  4096
2814e4b9f4SIan Rogers 
2914e4b9f4SIan Rogers #define TRACE_AUG_MAX_BUF 32 /* for buffer augmentation in perf trace */
305e6da6beSIan Rogers 
3114e4b9f4SIan Rogers /* bpf-output associated map */
32febf8a37SArnaldo Carvalho de Melo struct __augmented_syscalls__ {
334cae8675SArnaldo Carvalho de Melo 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
343803a229SArnaldo Carvalho de Melo 	__type(key, int);
353803a229SArnaldo Carvalho de Melo 	__type(value, __u32);
363803a229SArnaldo Carvalho de Melo 	__uint(max_entries, MAX_CPUS);
373803a229SArnaldo Carvalho de Melo } __augmented_syscalls__ SEC(".maps");
3814e4b9f4SIan Rogers 
3914e4b9f4SIan Rogers /*
4014e4b9f4SIan Rogers  * What to augment at entry?
4114e4b9f4SIan Rogers  *
4214e4b9f4SIan Rogers  * Pointer arg payloads (filenames, etc) passed from userspace to the kernel
4314e4b9f4SIan Rogers  */
443803a229SArnaldo Carvalho de Melo struct syscalls_sys_enter {
453803a229SArnaldo Carvalho de Melo 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
463803a229SArnaldo Carvalho de Melo 	__type(key, __u32);
473803a229SArnaldo Carvalho de Melo 	__type(value, __u32);
483803a229SArnaldo Carvalho de Melo 	__uint(max_entries, 512);
493803a229SArnaldo Carvalho de Melo } syscalls_sys_enter SEC(".maps");
5014e4b9f4SIan Rogers 
5114e4b9f4SIan Rogers /*
5214e4b9f4SIan Rogers  * What to augment at exit?
5314e4b9f4SIan Rogers  *
5414e4b9f4SIan Rogers  * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace.
5514e4b9f4SIan Rogers  */
563803a229SArnaldo Carvalho de Melo struct syscalls_sys_exit {
57febf8a37SArnaldo Carvalho de Melo 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
58febf8a37SArnaldo Carvalho de Melo 	__type(key, __u32);
59febf8a37SArnaldo Carvalho de Melo 	__type(value, __u32);
60febf8a37SArnaldo Carvalho de Melo 	__uint(max_entries, 512);
61febf8a37SArnaldo Carvalho de Melo } syscalls_sys_exit SEC(".maps");
62febf8a37SArnaldo Carvalho de Melo 
63febf8a37SArnaldo Carvalho de Melo struct syscall_enter_args {
64febf8a37SArnaldo Carvalho de Melo 	unsigned long long common_tp_fields;
65febf8a37SArnaldo Carvalho de Melo 	long		   syscall_nr;
66febf8a37SArnaldo Carvalho de Melo 	unsigned long	   args[6];
67febf8a37SArnaldo Carvalho de Melo };
68febf8a37SArnaldo Carvalho de Melo 
692ad926dbSArnaldo Carvalho de Melo struct syscall_exit_args {
7079ef68c7SArnaldo Carvalho de Melo 	unsigned long long common_tp_fields;
71deaf4da4SArnaldo Carvalho de Melo 	long		   syscall_nr;
7259f3bd78SArnaldo Carvalho de Melo 	long		   ret;
7379ef68c7SArnaldo Carvalho de Melo };
7479ef68c7SArnaldo Carvalho de Melo 
7514e4b9f4SIan Rogers /*
7614e4b9f4SIan Rogers  * Desired design of maximum size and alignment (see RFC2553)
7714e4b9f4SIan Rogers  */
7814e4b9f4SIan Rogers #define SS_MAXSIZE   128     /* Implementation specific max size */
7914e4b9f4SIan Rogers 
8014e4b9f4SIan Rogers typedef unsigned short sa_family_t;
8114e4b9f4SIan Rogers 
8214e4b9f4SIan Rogers /*
8314e4b9f4SIan Rogers  * FIXME: Should come from system headers
8414e4b9f4SIan Rogers  *
8514e4b9f4SIan Rogers  * The definition uses anonymous union and struct in order to control the
8614e4b9f4SIan Rogers  * default alignment.
8714e4b9f4SIan Rogers  */
8814e4b9f4SIan Rogers struct sockaddr_storage {
8914e4b9f4SIan Rogers 	union {
9014e4b9f4SIan Rogers 		struct {
9114e4b9f4SIan Rogers 			sa_family_t    ss_family; /* address family */
9214e4b9f4SIan Rogers 			/* Following field(s) are implementation specific */
9314e4b9f4SIan Rogers 			char __data[SS_MAXSIZE - sizeof(unsigned short)];
9414e4b9f4SIan Rogers 				/* space to achieve desired size, */
9514e4b9f4SIan Rogers 				/* _SS_MAXSIZE value minus size of ss_family */
9614e4b9f4SIan Rogers 		};
9714e4b9f4SIan Rogers 		void *__align; /* implementation specific desired alignment */
9814e4b9f4SIan Rogers 	};
9914e4b9f4SIan Rogers };
10014e4b9f4SIan Rogers 
10114e4b9f4SIan Rogers struct augmented_arg {
10214e4b9f4SIan Rogers 	unsigned int	size;
10314e4b9f4SIan Rogers 	int		err;
10414e4b9f4SIan Rogers 	union {
10514e4b9f4SIan Rogers 		char   value[PATH_MAX];
10614e4b9f4SIan Rogers 		struct sockaddr_storage saddr;
107ed9a77baSArnaldo Carvalho de Melo 	};
1086f563674SArnaldo Carvalho de Melo };
10959f3bd78SArnaldo Carvalho de Melo 
110212b9ab6SArnaldo Carvalho de Melo struct pids_filtered {
1116f563674SArnaldo Carvalho de Melo 	__uint(type, BPF_MAP_TYPE_HASH);
1122ad926dbSArnaldo Carvalho de Melo 	__type(key, pid_t);
113212b9ab6SArnaldo Carvalho de Melo 	__type(value, bool);
114212b9ab6SArnaldo Carvalho de Melo 	__uint(max_entries, 64);
115a9cd6c67SArnaldo Carvalho de Melo } pids_filtered SEC(".maps");
11659f3bd78SArnaldo Carvalho de Melo 
1176f563674SArnaldo Carvalho de Melo struct augmented_args_payload {
11859f3bd78SArnaldo Carvalho de Melo 	struct syscall_enter_args args;
119c265784dSArnaldo Carvalho de Melo 	struct augmented_arg arg, arg2; // We have to reserve space for two arguments (rename, etc)
12014e4b9f4SIan Rogers };
12114e4b9f4SIan Rogers 
12214e4b9f4SIan Rogers // We need more tmp space than the BPF stack can give us
12314e4b9f4SIan Rogers struct augmented_args_tmp {
12414e4b9f4SIan Rogers 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
12514e4b9f4SIan Rogers 	__type(key, int);
12659f3bd78SArnaldo Carvalho de Melo 	__type(value, struct augmented_args_payload);
127c265784dSArnaldo Carvalho de Melo 	__uint(max_entries, 1);
128c265784dSArnaldo Carvalho de Melo } augmented_args_tmp SEC(".maps");
129c265784dSArnaldo Carvalho de Melo 
130c265784dSArnaldo Carvalho de Melo struct beauty_map_enter {
131c265784dSArnaldo Carvalho de Melo 	__uint(type, BPF_MAP_TYPE_HASH);
132c265784dSArnaldo Carvalho de Melo 	__type(key, int);
133e051c2f6SArnaldo Carvalho de Melo 	__type(value, __u32[6]);
134e051c2f6SArnaldo Carvalho de Melo 	__uint(max_entries, 512);
135e051c2f6SArnaldo Carvalho de Melo } beauty_map_enter SEC(".maps");
13614e4b9f4SIan Rogers 
137e051c2f6SArnaldo Carvalho de Melo struct beauty_payload_enter {
138e051c2f6SArnaldo Carvalho de Melo 	struct syscall_enter_args args;
1390c95a7ffSArnaldo Carvalho de Melo 	struct augmented_arg aug_args[6];
1402ad926dbSArnaldo Carvalho de Melo };
1410c95a7ffSArnaldo Carvalho de Melo 
1422ad926dbSArnaldo Carvalho de Melo struct beauty_payload_enter_map {
1435069211eSThomas Richter 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
1440c95a7ffSArnaldo Carvalho de Melo 	__type(key, int);
1452ad926dbSArnaldo Carvalho de Melo 	__type(value, struct beauty_payload_enter);
146deaf4da4SArnaldo Carvalho de Melo 	__uint(max_entries, 1);
147deaf4da4SArnaldo Carvalho de Melo } beauty_payload_enter_map SEC(".maps");
1482ad926dbSArnaldo Carvalho de Melo 
augmented_args_payload(void)149deaf4da4SArnaldo Carvalho de Melo static inline struct augmented_args_payload *augmented_args_payload(void)
1502ad926dbSArnaldo Carvalho de Melo {
1512ad926dbSArnaldo Carvalho de Melo 	int key = 0;
1527d964231SArnaldo Carvalho de Melo 	return bpf_map_lookup_elem(&augmented_args_tmp, &key);
1532ad926dbSArnaldo Carvalho de Melo }
1542ad926dbSArnaldo Carvalho de Melo 
augmented__output(void * ctx,struct augmented_args_payload * args,int len)155deaf4da4SArnaldo Carvalho de Melo static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len)
156deaf4da4SArnaldo Carvalho de Melo {
157deaf4da4SArnaldo Carvalho de Melo 	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
158deaf4da4SArnaldo Carvalho de Melo 	return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
159deaf4da4SArnaldo Carvalho de Melo }
1602ad926dbSArnaldo Carvalho de Melo 
augmented__beauty_output(void * ctx,void * data,int len)1612ad926dbSArnaldo Carvalho de Melo static inline int augmented__beauty_output(void *ctx, void *data, int len)
1620c95a7ffSArnaldo Carvalho de Melo {
1630c95a7ffSArnaldo Carvalho de Melo 	return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, data, len);
1642ad926dbSArnaldo Carvalho de Melo }
1650c95a7ffSArnaldo Carvalho de Melo 
1660c95a7ffSArnaldo Carvalho de Melo static inline
augmented_arg__read_str(struct augmented_arg * augmented_arg,const void * arg,unsigned int arg_len)1675e6da6beSIan Rogers unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
1685834da7fSArnaldo Carvalho de Melo {
1695834da7fSArnaldo Carvalho de Melo 	unsigned int augmented_len = sizeof(*augmented_arg);
1705834da7fSArnaldo Carvalho de Melo 	int string_len = bpf_probe_read_user_str(&augmented_arg->value, arg_len, arg);
1715834da7fSArnaldo Carvalho de Melo 
1725834da7fSArnaldo Carvalho de Melo 	augmented_arg->size = augmented_arg->err = 0;
1736ff8fff4SArnaldo Carvalho de Melo 	/*
174212b9ab6SArnaldo Carvalho de Melo 	 * probe_read_str may return < 0, e.g. -EFAULT
1756f563674SArnaldo Carvalho de Melo 	 * So we leave that in the augmented_arg->size that userspace will
1766ff8fff4SArnaldo Carvalho de Melo 	 */
1776ff8fff4SArnaldo Carvalho de Melo 	if (string_len > 0) {
1786ff8fff4SArnaldo Carvalho de Melo 		augmented_len -= sizeof(augmented_arg->value) - string_len;
1795e6da6beSIan Rogers 		_Static_assert(is_power_of_2(sizeof(augmented_arg->value)), "sizeof(augmented_arg->value) needs to be a power of two");
180212b9ab6SArnaldo Carvalho de Melo 		augmented_len &= sizeof(augmented_arg->value) - 1;
181212b9ab6SArnaldo Carvalho de Melo 		augmented_arg->size = string_len;
182c265784dSArnaldo Carvalho de Melo 	} else {
183212b9ab6SArnaldo Carvalho de Melo 		/*
184212b9ab6SArnaldo Carvalho de Melo 		 * So that username notice the error while still being able
185212b9ab6SArnaldo Carvalho de Melo 		 * to skip this augmented arg record
186212b9ab6SArnaldo Carvalho de Melo 		 */
187212b9ab6SArnaldo Carvalho de Melo 		augmented_arg->err = string_len;
188212b9ab6SArnaldo Carvalho de Melo 		augmented_len = offsetof(struct augmented_arg, value);
189212b9ab6SArnaldo Carvalho de Melo 	}
190262b54b6SArnaldo Carvalho de Melo 
19118364804SArnaldo Carvalho de Melo 	return augmented_len;
192212b9ab6SArnaldo Carvalho de Melo }
1935069211eSThomas Richter 
194212b9ab6SArnaldo Carvalho de Melo SEC("tp/raw_syscalls/sys_enter")
syscall_unaugmented(struct syscall_enter_args * args)195e051c2f6SArnaldo Carvalho de Melo int syscall_unaugmented(struct syscall_enter_args *args)
196212b9ab6SArnaldo Carvalho de Melo {
197212b9ab6SArnaldo Carvalho de Melo 	return 1;
1985e6da6beSIan Rogers }
1993c475bc0SArnaldo Carvalho de Melo 
2003c475bc0SArnaldo Carvalho de Melo /*
201c265784dSArnaldo Carvalho de Melo  * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in
2023c475bc0SArnaldo Carvalho de Melo  * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go
2033c475bc0SArnaldo Carvalho de Melo  * on from there, reading the first syscall arg as a string, i.e. open's
2043c475bc0SArnaldo Carvalho de Melo  * filename.
2053c475bc0SArnaldo Carvalho de Melo  */
2063c475bc0SArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_connect")
sys_enter_connect(struct syscall_enter_args * args)2073c475bc0SArnaldo Carvalho de Melo int sys_enter_connect(struct syscall_enter_args *args)
2083c475bc0SArnaldo Carvalho de Melo {
20918364804SArnaldo Carvalho de Melo 	struct augmented_args_payload *augmented_args = augmented_args_payload();
2103c475bc0SArnaldo Carvalho de Melo 	const void *sockaddr_arg = (const void *)args->args[1];
2115069211eSThomas Richter 	unsigned int socklen = args->args[2];
2123c475bc0SArnaldo Carvalho de Melo 	unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
213e051c2f6SArnaldo Carvalho de Melo 
2143c475bc0SArnaldo Carvalho de Melo         if (augmented_args == NULL)
2153c475bc0SArnaldo Carvalho de Melo                 return 1; /* Failure: don't filter */
2165e6da6beSIan Rogers 
2176ff8fff4SArnaldo Carvalho de Melo 	_Static_assert(is_power_of_2(sizeof(augmented_args->arg.saddr)), "sizeof(augmented_args->arg.saddr) needs to be a power of two");
2186ff8fff4SArnaldo Carvalho de Melo 	socklen &= sizeof(augmented_args->arg.saddr) - 1;
219c265784dSArnaldo Carvalho de Melo 
2206ff8fff4SArnaldo Carvalho de Melo 	bpf_probe_read_user(&augmented_args->arg.saddr, socklen, sockaddr_arg);
2216ff8fff4SArnaldo Carvalho de Melo 	augmented_args->arg.size = socklen;
2226ff8fff4SArnaldo Carvalho de Melo 	augmented_args->arg.err = 0;
2236ff8fff4SArnaldo Carvalho de Melo 
2246ff8fff4SArnaldo Carvalho de Melo 	return augmented__output(args, augmented_args, len + socklen);
2256ff8fff4SArnaldo Carvalho de Melo }
2262ad926dbSArnaldo Carvalho de Melo 
2276ff8fff4SArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_sendto")
sys_enter_sendto(struct syscall_enter_args * args)228e051c2f6SArnaldo Carvalho de Melo int sys_enter_sendto(struct syscall_enter_args *args)
2296ff8fff4SArnaldo Carvalho de Melo {
2306ff8fff4SArnaldo Carvalho de Melo 	struct augmented_args_payload *augmented_args = augmented_args_payload();
2315e6da6beSIan Rogers 	const void *sockaddr_arg = (const void *)args->args[4];
232236dd583SArnaldo Carvalho de Melo 	unsigned int socklen = args->args[5];
233236dd583SArnaldo Carvalho de Melo 	unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
234c265784dSArnaldo Carvalho de Melo 
235236dd583SArnaldo Carvalho de Melo         if (augmented_args == NULL)
236236dd583SArnaldo Carvalho de Melo                 return 1; /* Failure: don't filter */
237236dd583SArnaldo Carvalho de Melo 
238236dd583SArnaldo Carvalho de Melo 	socklen &= sizeof(augmented_args->arg.saddr) - 1;
239236dd583SArnaldo Carvalho de Melo 
240236dd583SArnaldo Carvalho de Melo 	bpf_probe_read_user(&augmented_args->arg.saddr, socklen, sockaddr_arg);
2412ad926dbSArnaldo Carvalho de Melo 
242236dd583SArnaldo Carvalho de Melo 	return augmented__output(args, augmented_args, len + socklen);
243e051c2f6SArnaldo Carvalho de Melo }
244236dd583SArnaldo Carvalho de Melo 
245236dd583SArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_open")
sys_enter_open(struct syscall_enter_args * args)2465e6da6beSIan Rogers int sys_enter_open(struct syscall_enter_args *args)
247cfa9ac73SArnaldo Carvalho de Melo {
248cfa9ac73SArnaldo Carvalho de Melo 	struct augmented_args_payload *augmented_args = augmented_args_payload();
249c265784dSArnaldo Carvalho de Melo 	const void *filename_arg = (const void *)args->args[0];
250cfa9ac73SArnaldo Carvalho de Melo 	unsigned int len = sizeof(augmented_args->args);
251cfa9ac73SArnaldo Carvalho de Melo 
252cfa9ac73SArnaldo Carvalho de Melo         if (augmented_args == NULL)
253cfa9ac73SArnaldo Carvalho de Melo                 return 1; /* Failure: don't filter */
254cfa9ac73SArnaldo Carvalho de Melo 
255cfa9ac73SArnaldo Carvalho de Melo 	len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
256cfa9ac73SArnaldo Carvalho de Melo 
2572ad926dbSArnaldo Carvalho de Melo 	return augmented__output(args, augmented_args, len);
2582ad926dbSArnaldo Carvalho de Melo }
259cfa9ac73SArnaldo Carvalho de Melo 
260e051c2f6SArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_openat")
sys_enter_openat(struct syscall_enter_args * args)261cfa9ac73SArnaldo Carvalho de Melo int sys_enter_openat(struct syscall_enter_args *args)
262cfa9ac73SArnaldo Carvalho de Melo {
2635e6da6beSIan Rogers 	struct augmented_args_payload *augmented_args = augmented_args_payload();
2648d5da264SArnaldo Carvalho de Melo 	const void *filename_arg = (const void *)args->args[1];
2658d5da264SArnaldo Carvalho de Melo 	unsigned int len = sizeof(augmented_args->args);
266c265784dSArnaldo Carvalho de Melo 
2678d5da264SArnaldo Carvalho de Melo         if (augmented_args == NULL)
2688d5da264SArnaldo Carvalho de Melo                 return 1; /* Failure: don't filter */
2698d5da264SArnaldo Carvalho de Melo 
2708d5da264SArnaldo Carvalho de Melo 	len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
2718d5da264SArnaldo Carvalho de Melo 
2728d5da264SArnaldo Carvalho de Melo 	return augmented__output(args, augmented_args, len);
2738d5da264SArnaldo Carvalho de Melo }
2742ad926dbSArnaldo Carvalho de Melo 
2752ad926dbSArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_rename")
sys_enter_rename(struct syscall_enter_args * args)2768d5da264SArnaldo Carvalho de Melo int sys_enter_rename(struct syscall_enter_args *args)
277e051c2f6SArnaldo Carvalho de Melo {
2788d5da264SArnaldo Carvalho de Melo 	struct augmented_args_payload *augmented_args = augmented_args_payload();
2798d5da264SArnaldo Carvalho de Melo 	const void *oldpath_arg = (const void *)args->args[0],
280a9cd6c67SArnaldo Carvalho de Melo 		   *newpath_arg = (const void *)args->args[1];
281a9cd6c67SArnaldo Carvalho de Melo 	unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
282a9cd6c67SArnaldo Carvalho de Melo 
283a9cd6c67SArnaldo Carvalho de Melo         if (augmented_args == NULL)
284a9cd6c67SArnaldo Carvalho de Melo                 return 1; /* Failure: don't filter */
285a9cd6c67SArnaldo Carvalho de Melo 
286a9cd6c67SArnaldo Carvalho de Melo 	len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
287a9cd6c67SArnaldo Carvalho de Melo 
288a9cd6c67SArnaldo Carvalho de Melo 	oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
289a9cd6c67SArnaldo Carvalho de Melo 	augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
290a9cd6c67SArnaldo Carvalho de Melo 	len += augmented_args->arg.size;
2915e6da6beSIan Rogers 
292a9cd6c67SArnaldo Carvalho de Melo 	/* Every read from userspace is limited to value size */
293a9cd6c67SArnaldo Carvalho de Melo 	if (augmented_args->arg.size > sizeof(augmented_args->arg.value))
294a9cd6c67SArnaldo Carvalho de Melo 		return 1; /* Failure: don't filter */
295a9cd6c67SArnaldo Carvalho de Melo 
296a9cd6c67SArnaldo Carvalho de Melo 	struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
297a9cd6c67SArnaldo Carvalho de Melo 
298a9cd6c67SArnaldo Carvalho de Melo 	newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
299a9cd6c67SArnaldo Carvalho de Melo 	arg2->size = newpath_len;
300a9cd6c67SArnaldo Carvalho de Melo 
3015069211eSThomas Richter 	len += newpath_len;
302a9cd6c67SArnaldo Carvalho de Melo 
303a9cd6c67SArnaldo Carvalho de Melo 	return augmented__output(args, augmented_args, len);
304a9cd6c67SArnaldo Carvalho de Melo }
305a9cd6c67SArnaldo Carvalho de Melo 
306a9cd6c67SArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_renameat2")
sys_enter_renameat2(struct syscall_enter_args * args)307a9cd6c67SArnaldo Carvalho de Melo int sys_enter_renameat2(struct syscall_enter_args *args)
308a9cd6c67SArnaldo Carvalho de Melo {
309a9cd6c67SArnaldo Carvalho de Melo 	struct augmented_args_payload *augmented_args = augmented_args_payload();
310a9cd6c67SArnaldo Carvalho de Melo 	const void *oldpath_arg = (const void *)args->args[1],
311a9cd6c67SArnaldo Carvalho de Melo 		   *newpath_arg = (const void *)args->args[3];
312a9cd6c67SArnaldo Carvalho de Melo 	unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
313a9cd6c67SArnaldo Carvalho de Melo 
314a9cd6c67SArnaldo Carvalho de Melo         if (augmented_args == NULL)
3155069211eSThomas Richter                 return 1; /* Failure: don't filter */
316a9cd6c67SArnaldo Carvalho de Melo 
317a9cd6c67SArnaldo Carvalho de Melo 	len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
318a9cd6c67SArnaldo Carvalho de Melo 
319a9cd6c67SArnaldo Carvalho de Melo 	oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
320a9cd6c67SArnaldo Carvalho de Melo 	augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
321a9cd6c67SArnaldo Carvalho de Melo 	len += augmented_args->arg.size;
322a9cd6c67SArnaldo Carvalho de Melo 
3235e6da6beSIan Rogers 	/* Every read from userspace is limited to value size */
3246ac73820SArnaldo Carvalho de Melo 	if (augmented_args->arg.size > sizeof(augmented_args->arg.value))
3256ac73820SArnaldo Carvalho de Melo 		return 1; /* Failure: don't filter */
3266ac73820SArnaldo Carvalho de Melo 
3276ac73820SArnaldo Carvalho de Melo 	struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
3286ac73820SArnaldo Carvalho de Melo 
3296ac73820SArnaldo Carvalho de Melo 	newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
3306ac73820SArnaldo Carvalho de Melo 	arg2->size = newpath_len;
3316ac73820SArnaldo Carvalho de Melo 
3326ac73820SArnaldo Carvalho de Melo 	len += newpath_len;
3336ac73820SArnaldo Carvalho de Melo 
3346ac73820SArnaldo Carvalho de Melo 	return augmented__output(args, augmented_args, len);
3356ac73820SArnaldo Carvalho de Melo }
3366ac73820SArnaldo Carvalho de Melo 
3375069211eSThomas Richter #define PERF_ATTR_SIZE_VER0     64      /* sizeof first published struct */
3386ac73820SArnaldo Carvalho de Melo 
3396ac73820SArnaldo Carvalho de Melo // we need just the start, get the size to then copy it
3406ac73820SArnaldo Carvalho de Melo struct perf_event_attr_size {
3416ac73820SArnaldo Carvalho de Melo         __u32                   type;
3426ac73820SArnaldo Carvalho de Melo         /*
3436ac73820SArnaldo Carvalho de Melo          * Size of the attr structure, for fwd/bwd compat.
34414e4b9f4SIan Rogers          */
34514e4b9f4SIan Rogers         __u32                   size;
34614e4b9f4SIan Rogers };
34714e4b9f4SIan Rogers 
34814e4b9f4SIan Rogers SEC("tp/syscalls/sys_enter_perf_event_open")
sys_enter_perf_event_open(struct syscall_enter_args * args)34914e4b9f4SIan Rogers int sys_enter_perf_event_open(struct syscall_enter_args *args)
35014e4b9f4SIan Rogers {
35114e4b9f4SIan Rogers 	struct augmented_args_payload *augmented_args = augmented_args_payload();
35214e4b9f4SIan Rogers 	const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read;
35314e4b9f4SIan Rogers 	unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
3545e6da6beSIan Rogers 
355febf8a37SArnaldo Carvalho de Melo         if (augmented_args == NULL)
356febf8a37SArnaldo Carvalho de Melo 		goto failure;
3576f563674SArnaldo Carvalho de Melo 
358deaf4da4SArnaldo Carvalho de Melo 	if (bpf_probe_read_user(&augmented_args->arg.value, sizeof(*attr), attr) < 0)
359deaf4da4SArnaldo Carvalho de Melo 		goto failure;
360deaf4da4SArnaldo Carvalho de Melo 
361deaf4da4SArnaldo Carvalho de Melo 	attr_read = (const struct perf_event_attr_size *)augmented_args->arg.value;
362deaf4da4SArnaldo Carvalho de Melo 
363deaf4da4SArnaldo Carvalho de Melo 	__u32 size = attr_read->size;
364deaf4da4SArnaldo Carvalho de Melo 
365deaf4da4SArnaldo Carvalho de Melo 	if (!size)
366deaf4da4SArnaldo Carvalho de Melo 		size = PERF_ATTR_SIZE_VER0;
36759f3bd78SArnaldo Carvalho de Melo 
36801128065SArnaldo Carvalho de Melo 	if (size > sizeof(augmented_args->arg.value))
36901128065SArnaldo Carvalho de Melo                 goto failure;
37001128065SArnaldo Carvalho de Melo 
371c265784dSArnaldo Carvalho de Melo 	// Now that we read attr->size and tested it against the size limits, read it completely
37259f3bd78SArnaldo Carvalho de Melo 	if (bpf_probe_read_user(&augmented_args->arg.value, size, attr) < 0)
37359f3bd78SArnaldo Carvalho de Melo 		goto failure;
374febf8a37SArnaldo Carvalho de Melo 
3755069211eSThomas Richter 	return augmented__output(args, augmented_args, len + size);
376b27b38edSArnaldo Carvalho de Melo failure:
377bf134ca6SArnaldo Carvalho de Melo 	return 1; /* Failure: don't filter */
378bf134ca6SArnaldo Carvalho de Melo }
379bf134ca6SArnaldo Carvalho de Melo 
3804d39c89fSIngo Molnar SEC("tp/syscalls/sys_enter_clock_nanosleep")
sys_enter_clock_nanosleep(struct syscall_enter_args * args)381bf134ca6SArnaldo Carvalho de Melo int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
382bf134ca6SArnaldo Carvalho de Melo {
383bf134ca6SArnaldo Carvalho de Melo 	struct augmented_args_payload *augmented_args = augmented_args_payload();
384bf134ca6SArnaldo Carvalho de Melo 	const void *rqtp_arg = (const void *)args->args[2];
385b27b38edSArnaldo Carvalho de Melo 	unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs
386febf8a37SArnaldo Carvalho de Melo 	__u32 size = sizeof(struct timespec64);
387febf8a37SArnaldo Carvalho de Melo 
3885e6da6beSIan Rogers         if (augmented_args == NULL)
389febf8a37SArnaldo Carvalho de Melo 		goto failure;
390febf8a37SArnaldo Carvalho de Melo 
391b27b38edSArnaldo Carvalho de Melo 	if (size > sizeof(augmented_args->arg.value))
392b27b38edSArnaldo Carvalho de Melo                 goto failure;
393b27b38edSArnaldo Carvalho de Melo 
394b27b38edSArnaldo Carvalho de Melo 	bpf_probe_read_user(&augmented_args->arg.value, size, rqtp_arg);
395b27b38edSArnaldo Carvalho de Melo 
3965069211eSThomas Richter 	return augmented__output(args, augmented_args, len + size);
397bf134ca6SArnaldo Carvalho de Melo failure:
398bf134ca6SArnaldo Carvalho de Melo 	return 1; /* Failure: don't filter */
399bf134ca6SArnaldo Carvalho de Melo }
4004d39c89fSIngo Molnar 
401bf134ca6SArnaldo Carvalho de Melo SEC("tp/syscalls/sys_enter_nanosleep")
sys_enter_nanosleep(struct syscall_enter_args * args)402bf134ca6SArnaldo Carvalho de Melo int sys_enter_nanosleep(struct syscall_enter_args *args)
403bf134ca6SArnaldo Carvalho de Melo {
404bf134ca6SArnaldo Carvalho de Melo 	struct augmented_args_payload *augmented_args = augmented_args_payload();
405bf134ca6SArnaldo Carvalho de Melo 	const void *req_arg = (const void *)args->args[0];
406b27b38edSArnaldo Carvalho de Melo 	unsigned int len = sizeof(augmented_args->args);
407febf8a37SArnaldo Carvalho de Melo 	__u32 size = sizeof(struct timespec64);
408febf8a37SArnaldo Carvalho de Melo 
40914e4b9f4SIan Rogers         if (augmented_args == NULL)
410 		goto failure;
411 
412 	if (size > sizeof(augmented_args->arg.value))
413                 goto failure;
414 
415 	bpf_probe_read_user(&augmented_args->arg.value, size, req_arg);
416 
417 	return augmented__output(args, augmented_args, len + size);
418 failure:
419 	return 1; /* Failure: don't filter */
420 }
421 
getpid(void)422 static pid_t getpid(void)
423 {
424 	return bpf_get_current_pid_tgid();
425 }
426 
pid_filter__has(struct pids_filtered * pids,pid_t pid)427 static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
428 {
429 	return bpf_map_lookup_elem(pids, &pid) != NULL;
430 }
431 
augment_sys_enter(void * ctx,struct syscall_enter_args * args)432 static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
433 {
434 	bool augmented, do_output = false;
435 	int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
436 	u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */
437 	s64 aug_size, size;
438 	unsigned int nr, *beauty_map;
439 	struct beauty_payload_enter *payload;
440 	void *arg, *payload_offset;
441 
442 	/* fall back to do predefined tail call */
443 	if (args == NULL)
444 		return 1;
445 
446 	/* use syscall number to get beauty_map entry */
447 	nr             = (__u32)args->syscall_nr;
448 	beauty_map     = bpf_map_lookup_elem(&beauty_map_enter, &nr);
449 
450 	/* set up payload for output */
451 	payload        = bpf_map_lookup_elem(&beauty_payload_enter_map, &zero);
452 	payload_offset = (void *)&payload->aug_args;
453 
454 	if (beauty_map == NULL || payload == NULL)
455 		return 1;
456 
457 	/* copy the sys_enter header, which has the syscall_nr */
458 	__builtin_memcpy(&payload->args, args, sizeof(struct syscall_enter_args));
459 
460 	/*
461 	 * Determine what type of argument and how many bytes to read from user space, using the
462 	 * value in the beauty_map. This is the relation of parameter type and its corresponding
463 	 * value in the beauty map, and how many bytes we read eventually:
464 	 *
465 	 * string: 1			      -> size of string
466 	 * struct: size of struct	      -> size of struct
467 	 * buffer: -1 * (index of paired len) -> value of paired len (maximum: TRACE_AUG_MAX_BUF)
468 	 */
469 	for (int i = 0; i < 6; i++) {
470 		arg = (void *)args->args[i];
471 		augmented = false;
472 		size = beauty_map[i];
473 		aug_size = size; /* size of the augmented data read from user space */
474 
475 		if (size == 0 || arg == NULL)
476 			continue;
477 
478 		if (size == 1) { /* string */
479 			aug_size = bpf_probe_read_user_str(((struct augmented_arg *)payload_offset)->value, value_size, arg);
480 			/* minimum of 0 to pass the verifier */
481 			if (aug_size < 0)
482 				aug_size = 0;
483 
484 			augmented = true;
485 		} else if (size > 0 && size <= value_size) { /* struct */
486 			if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg))
487 				augmented = true;
488 		} else if ((int)size < 0 && size >= -6) { /* buffer */
489 			index = -(size + 1);
490 			barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick.
491 			index &= 7;	    // Satisfy the bounds checking with the verifier in some kernels.
492 			aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index];
493 
494 			if (aug_size > 0) {
495 				if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg))
496 					augmented = true;
497 			}
498 		}
499 
500 		/* Augmented data size is limited to sizeof(augmented_arg->unnamed union with value field) */
501 		if (aug_size > value_size)
502 			aug_size = value_size;
503 
504 		/* write data to payload */
505 		if (augmented) {
506 			int written = offsetof(struct augmented_arg, value) + aug_size;
507 
508 			if (written < 0 || written > sizeof(struct augmented_arg))
509 				return 1;
510 
511 			((struct augmented_arg *)payload_offset)->size = aug_size;
512 			output += written;
513 			payload_offset += written;
514 			do_output = true;
515 		}
516 	}
517 
518 	if (!do_output || (sizeof(struct syscall_enter_args) + output) > sizeof(struct beauty_payload_enter))
519 		return 1;
520 
521 	return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output);
522 }
523 
524 SEC("tp/raw_syscalls/sys_enter")
sys_enter(struct syscall_enter_args * args)525 int sys_enter(struct syscall_enter_args *args)
526 {
527 	struct augmented_args_payload *augmented_args;
528 	/*
529 	 * We start len, the amount of data that will be in the perf ring
530 	 * buffer, if this is not filtered out by one of pid_filter__has(),
531 	 * syscall->enabled, etc, with the non-augmented raw syscall payload,
532 	 * i.e. sizeof(augmented_args->args).
533 	 *
534 	 * We'll add to this as we add augmented syscalls right after that
535 	 * initial, non-augmented raw_syscalls:sys_enter payload.
536 	 */
537 
538 	if (pid_filter__has(&pids_filtered, getpid()))
539 		return 0;
540 
541 	augmented_args = augmented_args_payload();
542 	if (augmented_args == NULL)
543 		return 1;
544 
545 	bpf_probe_read_kernel(&augmented_args->args, sizeof(augmented_args->args), args);
546 
547 	/*
548 	 * Jump to syscall specific augmenter, even if the default one,
549 	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
550 	 * unaugmented tracepoint payload.
551 	 */
552 	if (augment_sys_enter(args, &augmented_args->args))
553 		bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
554 
555 	// If not found on the PROG_ARRAY syscalls map, then we're filtering it:
556 	return 0;
557 }
558 
559 SEC("tp/raw_syscalls/sys_exit")
sys_exit(struct syscall_exit_args * args)560 int sys_exit(struct syscall_exit_args *args)
561 {
562 	struct syscall_exit_args exit_args;
563 
564 	if (pid_filter__has(&pids_filtered, getpid()))
565 		return 0;
566 
567 	bpf_probe_read_kernel(&exit_args, sizeof(exit_args), args);
568 	/*
569 	 * Jump to syscall specific return augmenter, even if the default one,
570 	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
571 	 * unaugmented tracepoint payload.
572 	 */
573 	bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr);
574 	/*
575 	 * If not found on the PROG_ARRAY syscalls map, then we're filtering it:
576 	 */
577 	return 0;
578 }
579 
580 char _license[] SEC("license") = "GPL";
581