1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #include <vmlinux.h>
4 #include <bpf/bpf_core_read.h>
5 #include <bpf/bpf_helpers.h>
6 #include <bpf/bpf_tracing.h>
7
8 #include "profiler.h"
9
10 #ifndef NULL
11 #define NULL 0
12 #endif
13
14 #define O_WRONLY 00000001
15 #define O_RDWR 00000002
16 #define O_DIRECTORY 00200000
17 #define __O_TMPFILE 020000000
18 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
19 #define MAX_ERRNO 4095
20 #define S_IFMT 00170000
21 #define S_IFSOCK 0140000
22 #define S_IFLNK 0120000
23 #define S_IFREG 0100000
24 #define S_IFBLK 0060000
25 #define S_IFDIR 0040000
26 #define S_IFCHR 0020000
27 #define S_IFIFO 0010000
28 #define S_ISUID 0004000
29 #define S_ISGID 0002000
30 #define S_ISVTX 0001000
31 #define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
32 #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
33 #define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
34 #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
35 #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
36 #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
37 #define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
38
39 #define KILL_DATA_ARRAY_SIZE 8
40
41 struct var_kill_data_arr_t {
42 struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
43 };
44
45 union any_profiler_data_t {
46 struct var_exec_data_t var_exec;
47 struct var_kill_data_t var_kill;
48 struct var_sysctl_data_t var_sysctl;
49 struct var_filemod_data_t var_filemod;
50 struct var_fork_data_t var_fork;
51 struct var_kill_data_arr_t var_kill_data_arr;
52 };
53
54 volatile struct profiler_config_struct bpf_config = {};
55
56 #define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
57 #define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
58 #define CGROUP_LOGIN_SESSION_INODE \
59 (bpf_config.cgroup_login_session_inode)
60 #define KILL_SIGNALS (bpf_config.kill_signals_mask)
61 #define STALE_INFO (bpf_config.stale_info_secs)
62 #define INODE_FILTER (bpf_config.inode_filter)
63 #define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
64 #define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
65
66 struct kernfs_iattrs___52 {
67 struct iattr ia_iattr;
68 };
69
70 struct kernfs_node___52 {
71 union /* kernfs_node_id */ {
72 struct {
73 u32 ino;
74 u32 generation;
75 };
76 u64 id;
77 } id;
78 };
79
80 struct {
81 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
82 __uint(max_entries, 1);
83 __type(key, u32);
84 __type(value, union any_profiler_data_t);
85 } data_heap SEC(".maps");
86
87 struct {
88 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
89 __uint(key_size, sizeof(int));
90 __uint(value_size, sizeof(int));
91 } events SEC(".maps");
92
93 struct {
94 __uint(type, BPF_MAP_TYPE_HASH);
95 __uint(max_entries, KILL_DATA_ARRAY_SIZE);
96 __type(key, u32);
97 __type(value, struct var_kill_data_arr_t);
98 } var_tpid_to_data SEC(".maps");
99
100 struct {
101 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
102 __uint(max_entries, profiler_bpf_max_function_id);
103 __type(key, u32);
104 __type(value, struct bpf_func_stats_data);
105 } bpf_func_stats SEC(".maps");
106
107 struct {
108 __uint(type, BPF_MAP_TYPE_HASH);
109 __type(key, u32);
110 __type(value, bool);
111 __uint(max_entries, 16);
112 } allowed_devices SEC(".maps");
113
114 struct {
115 __uint(type, BPF_MAP_TYPE_HASH);
116 __type(key, u64);
117 __type(value, bool);
118 __uint(max_entries, 1024);
119 } allowed_file_inodes SEC(".maps");
120
121 struct {
122 __uint(type, BPF_MAP_TYPE_HASH);
123 __type(key, u64);
124 __type(value, bool);
125 __uint(max_entries, 1024);
126 } allowed_directory_inodes SEC(".maps");
127
128 struct {
129 __uint(type, BPF_MAP_TYPE_HASH);
130 __type(key, u32);
131 __type(value, bool);
132 __uint(max_entries, 16);
133 } disallowed_exec_inodes SEC(".maps");
134
135 #ifndef ARRAY_SIZE
136 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
137 #endif
138
IS_ERR(const void * ptr)139 static INLINE bool IS_ERR(const void* ptr)
140 {
141 return IS_ERR_VALUE((unsigned long)ptr);
142 }
143
get_userspace_pid()144 static INLINE u32 get_userspace_pid()
145 {
146 return bpf_get_current_pid_tgid() >> 32;
147 }
148
is_init_process(u32 tgid)149 static INLINE bool is_init_process(u32 tgid)
150 {
151 return tgid == 1 || tgid == 0;
152 }
153
154 static INLINE unsigned long
probe_read_lim(void * dst,void * src,unsigned long len,unsigned long max)155 probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
156 {
157 len = len < max ? len : max;
158 if (len > 1) {
159 if (bpf_probe_read(dst, len, src))
160 return 0;
161 } else if (len == 1) {
162 if (bpf_probe_read(dst, 1, src))
163 return 0;
164 }
165 return len;
166 }
167
get_var_spid_index(struct var_kill_data_arr_t * arr_struct,int spid)168 static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
169 int spid)
170 {
171 #ifdef UNROLL
172 #pragma unroll
173 #endif
174 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
175 if (arr_struct->array[i].meta.pid == spid)
176 return i;
177 return -1;
178 }
179
populate_ancestors(struct task_struct * task,struct ancestors_data_t * ancestors_data)180 static INLINE void populate_ancestors(struct task_struct* task,
181 struct ancestors_data_t* ancestors_data)
182 {
183 struct task_struct* parent = task;
184 u32 num_ancestors, ppid;
185
186 ancestors_data->num_ancestors = 0;
187 #ifdef UNROLL
188 #pragma unroll
189 #endif
190 for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
191 parent = BPF_CORE_READ(parent, real_parent);
192 if (parent == NULL)
193 break;
194 ppid = BPF_CORE_READ(parent, tgid);
195 if (is_init_process(ppid))
196 break;
197 ancestors_data->ancestor_pids[num_ancestors] = ppid;
198 ancestors_data->ancestor_exec_ids[num_ancestors] =
199 BPF_CORE_READ(parent, self_exec_id);
200 ancestors_data->ancestor_start_times[num_ancestors] =
201 BPF_CORE_READ(parent, start_time);
202 ancestors_data->num_ancestors = num_ancestors;
203 }
204 }
205
read_full_cgroup_path(struct kernfs_node * cgroup_node,struct kernfs_node * cgroup_root_node,void * payload,int * root_pos)206 static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
207 struct kernfs_node* cgroup_root_node,
208 void* payload,
209 int* root_pos)
210 {
211 void* payload_start = payload;
212 size_t filepart_length;
213
214 #ifdef UNROLL
215 #pragma unroll
216 #endif
217 for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
218 filepart_length =
219 bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
220 if (!cgroup_node)
221 return payload;
222 if (cgroup_node == cgroup_root_node)
223 *root_pos = payload - payload_start;
224 if (filepart_length <= MAX_PATH) {
225 barrier_var(filepart_length);
226 payload += filepart_length;
227 }
228 cgroup_node = BPF_CORE_READ(cgroup_node, parent);
229 }
230 return payload;
231 }
232
get_inode_from_kernfs(struct kernfs_node * node)233 static ino_t get_inode_from_kernfs(struct kernfs_node* node)
234 {
235 struct kernfs_node___52* node52 = (void*)node;
236
237 if (bpf_core_field_exists(node52->id.ino)) {
238 barrier_var(node52);
239 return BPF_CORE_READ(node52, id.ino);
240 } else {
241 barrier_var(node);
242 return (u64)BPF_CORE_READ(node, id);
243 }
244 }
245
246 extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
247 enum cgroup_subsys_id___local {
248 pids_cgrp_id___local = 123, /* value doesn't matter */
249 };
250
populate_cgroup_info(struct cgroup_data_t * cgroup_data,struct task_struct * task,void * payload)251 static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
252 struct task_struct* task,
253 void* payload)
254 {
255 struct kernfs_node* root_kernfs =
256 BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
257 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
258
259 if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
260 int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
261 pids_cgrp_id___local);
262 #ifdef UNROLL
263 #pragma unroll
264 #endif
265 for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
266 struct cgroup_subsys_state* subsys =
267 BPF_CORE_READ(task, cgroups, subsys[i]);
268 if (subsys != NULL) {
269 int subsys_id = BPF_CORE_READ(subsys, ss, id);
270 if (subsys_id == cgrp_id) {
271 proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
272 root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
273 break;
274 }
275 }
276 }
277 }
278
279 cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
280 cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
281
282 if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
283 cgroup_data->cgroup_root_mtime =
284 BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
285 cgroup_data->cgroup_proc_mtime =
286 BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
287 } else {
288 struct kernfs_iattrs___52* root_iattr =
289 (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
290 cgroup_data->cgroup_root_mtime =
291 BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
292
293 struct kernfs_iattrs___52* proc_iattr =
294 (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
295 cgroup_data->cgroup_proc_mtime =
296 BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
297 }
298
299 cgroup_data->cgroup_root_length = 0;
300 cgroup_data->cgroup_proc_length = 0;
301 cgroup_data->cgroup_full_length = 0;
302
303 size_t cgroup_root_length =
304 bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
305 barrier_var(cgroup_root_length);
306 if (cgroup_root_length <= MAX_PATH) {
307 barrier_var(cgroup_root_length);
308 cgroup_data->cgroup_root_length = cgroup_root_length;
309 payload += cgroup_root_length;
310 }
311
312 size_t cgroup_proc_length =
313 bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
314 barrier_var(cgroup_proc_length);
315 if (cgroup_proc_length <= MAX_PATH) {
316 barrier_var(cgroup_proc_length);
317 cgroup_data->cgroup_proc_length = cgroup_proc_length;
318 payload += cgroup_proc_length;
319 }
320
321 if (FETCH_CGROUPS_FROM_BPF) {
322 cgroup_data->cgroup_full_path_root_pos = -1;
323 void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
324 &cgroup_data->cgroup_full_path_root_pos);
325 cgroup_data->cgroup_full_length = payload_end_pos - payload;
326 payload = payload_end_pos;
327 }
328
329 return (void*)payload;
330 }
331
populate_var_metadata(struct var_metadata_t * metadata,struct task_struct * task,u32 pid,void * payload)332 static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
333 struct task_struct* task,
334 u32 pid, void* payload)
335 {
336 u64 uid_gid = bpf_get_current_uid_gid();
337
338 metadata->uid = (u32)uid_gid;
339 metadata->gid = uid_gid >> 32;
340 metadata->pid = pid;
341 metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
342 metadata->start_time = BPF_CORE_READ(task, start_time);
343 metadata->comm_length = 0;
344
345 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
346 barrier_var(comm_length);
347 if (comm_length <= TASK_COMM_LEN) {
348 barrier_var(comm_length);
349 metadata->comm_length = comm_length;
350 payload += comm_length;
351 }
352
353 return (void*)payload;
354 }
355
356 static INLINE struct var_kill_data_t*
get_var_kill_data(struct pt_regs * ctx,int spid,int tpid,int sig)357 get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
358 {
359 int zero = 0;
360 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
361
362 if (kill_data == NULL)
363 return NULL;
364 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
365
366 void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
367 payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
368 size_t payload_length = payload - (void*)kill_data->payload;
369 kill_data->payload_length = payload_length;
370 populate_ancestors(task, &kill_data->ancestors_info);
371 kill_data->meta.type = KILL_EVENT;
372 kill_data->kill_target_pid = tpid;
373 kill_data->kill_sig = sig;
374 kill_data->kill_count = 1;
375 kill_data->last_kill_time = bpf_ktime_get_ns();
376 return kill_data;
377 }
378
trace_var_sys_kill(void * ctx,int tpid,int sig)379 static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
380 {
381 if ((KILL_SIGNALS & (1ULL << sig)) == 0)
382 return 0;
383
384 u32 spid = get_userspace_pid();
385 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
386
387 if (arr_struct == NULL) {
388 struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
389 int zero = 0;
390
391 if (kill_data == NULL)
392 return 0;
393 arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
394 if (arr_struct == NULL)
395 return 0;
396 bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
397 } else {
398 int index = get_var_spid_index(arr_struct, spid);
399
400 if (index == -1) {
401 struct var_kill_data_t* kill_data =
402 get_var_kill_data(ctx, spid, tpid, sig);
403 if (kill_data == NULL)
404 return 0;
405 #ifdef UNROLL
406 #pragma unroll
407 #endif
408 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
409 if (arr_struct->array[i].meta.pid == 0) {
410 bpf_probe_read(&arr_struct->array[i],
411 sizeof(arr_struct->array[i]), kill_data);
412 bpf_map_update_elem(&var_tpid_to_data, &tpid,
413 arr_struct, 0);
414
415 return 0;
416 }
417 return 0;
418 }
419
420 struct var_kill_data_t* kill_data = &arr_struct->array[index];
421
422 u64 delta_sec =
423 (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
424
425 if (delta_sec < STALE_INFO) {
426 kill_data->kill_count++;
427 kill_data->last_kill_time = bpf_ktime_get_ns();
428 bpf_probe_read(&arr_struct->array[index],
429 sizeof(arr_struct->array[index]),
430 kill_data);
431 } else {
432 struct var_kill_data_t* kill_data =
433 get_var_kill_data(ctx, spid, tpid, sig);
434 if (kill_data == NULL)
435 return 0;
436 bpf_probe_read(&arr_struct->array[index],
437 sizeof(arr_struct->array[index]),
438 kill_data);
439 }
440 }
441 bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
442 return 0;
443 }
444
bpf_stats_enter(struct bpf_func_stats_ctx * bpf_stat_ctx,enum bpf_function_id func_id)445 static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
446 enum bpf_function_id func_id)
447 {
448 int func_id_key = func_id;
449
450 bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
451 bpf_stat_ctx->bpf_func_stats_data_val =
452 bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
453 if (bpf_stat_ctx->bpf_func_stats_data_val)
454 bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
455 }
456
bpf_stats_exit(struct bpf_func_stats_ctx * bpf_stat_ctx)457 static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
458 {
459 if (bpf_stat_ctx->bpf_func_stats_data_val)
460 bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
461 bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
462 }
463
464 static INLINE void
bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx * bpf_stat_ctx,struct var_metadata_t * meta)465 bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
466 struct var_metadata_t* meta)
467 {
468 if (bpf_stat_ctx->bpf_func_stats_data_val) {
469 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
470 meta->bpf_stats_num_perf_events =
471 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
472 }
473 meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
474 meta->cpu_id = bpf_get_smp_processor_id();
475 }
476
477 static INLINE size_t
read_absolute_file_path_from_dentry(struct dentry * filp_dentry,void * payload)478 read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
479 {
480 size_t length = 0;
481 size_t filepart_length;
482 struct dentry* parent_dentry;
483
484 #ifdef UNROLL
485 #pragma unroll
486 #endif
487 for (int i = 0; i < MAX_PATH_DEPTH; i++) {
488 filepart_length = bpf_probe_read_str(payload, MAX_PATH,
489 BPF_CORE_READ(filp_dentry, d_name.name));
490 barrier_var(filepart_length);
491 if (filepart_length > MAX_PATH)
492 break;
493 barrier_var(filepart_length);
494 payload += filepart_length;
495 length += filepart_length;
496
497 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
498 if (filp_dentry == parent_dentry)
499 break;
500 filp_dentry = parent_dentry;
501 }
502
503 return length;
504 }
505
506 static INLINE bool
is_ancestor_in_allowed_inodes(struct dentry * filp_dentry)507 is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
508 {
509 struct dentry* parent_dentry;
510 #ifdef UNROLL
511 #pragma unroll
512 #endif
513 for (int i = 0; i < MAX_PATH_DEPTH; i++) {
514 u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
515 bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
516
517 if (allowed_dir != NULL)
518 return true;
519 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
520 if (filp_dentry == parent_dentry)
521 break;
522 filp_dentry = parent_dentry;
523 }
524 return false;
525 }
526
is_dentry_allowed_for_filemod(struct dentry * file_dentry,u32 * device_id,u64 * file_ino)527 static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
528 u32* device_id,
529 u64* file_ino)
530 {
531 u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
532 *device_id = dev_id;
533 bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
534
535 if (allowed_device == NULL)
536 return false;
537
538 u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
539 *file_ino = ino;
540 bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
541
542 if (allowed_file == NULL)
543 if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
544 return false;
545 return true;
546 }
547
548 SEC("kprobe/proc_sys_write")
BPF_KPROBE(kprobe__proc_sys_write,struct file * filp,const char * buf,size_t count,loff_t * ppos)549 ssize_t BPF_KPROBE(kprobe__proc_sys_write,
550 struct file* filp, const char* buf,
551 size_t count, loff_t* ppos)
552 {
553 struct bpf_func_stats_ctx stats_ctx;
554 bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
555
556 u32 pid = get_userspace_pid();
557 int zero = 0;
558 struct var_sysctl_data_t* sysctl_data =
559 bpf_map_lookup_elem(&data_heap, &zero);
560 if (!sysctl_data)
561 goto out;
562
563 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
564 sysctl_data->meta.type = SYSCTL_EVENT;
565 void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
566 payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
567
568 populate_ancestors(task, &sysctl_data->ancestors_info);
569
570 sysctl_data->sysctl_val_length = 0;
571 sysctl_data->sysctl_path_length = 0;
572
573 size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
574 barrier_var(sysctl_val_length);
575 if (sysctl_val_length <= CTL_MAXNAME) {
576 barrier_var(sysctl_val_length);
577 sysctl_data->sysctl_val_length = sysctl_val_length;
578 payload += sysctl_val_length;
579 }
580
581 size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
582 BPF_CORE_READ(filp, f_path.dentry, d_name.name));
583 barrier_var(sysctl_path_length);
584 if (sysctl_path_length <= MAX_PATH) {
585 barrier_var(sysctl_path_length);
586 sysctl_data->sysctl_path_length = sysctl_path_length;
587 payload += sysctl_path_length;
588 }
589
590 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
591 unsigned long data_len = payload - (void*)sysctl_data;
592 data_len = data_len > sizeof(struct var_sysctl_data_t)
593 ? sizeof(struct var_sysctl_data_t)
594 : data_len;
595 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
596 out:
597 bpf_stats_exit(&stats_ctx);
598 return 0;
599 }
600
601 SEC("tracepoint/syscalls/sys_enter_kill")
tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter * ctx)602 int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
603 {
604 struct bpf_func_stats_ctx stats_ctx;
605
606 bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
607 int pid = ctx->args[0];
608 int sig = ctx->args[1];
609 int ret = trace_var_sys_kill(ctx, pid, sig);
610 bpf_stats_exit(&stats_ctx);
611 return ret;
612 };
613
614 SEC("raw_tracepoint/sched_process_exit")
raw_tracepoint__sched_process_exit(void * ctx)615 int raw_tracepoint__sched_process_exit(void* ctx)
616 {
617 int zero = 0;
618 struct bpf_func_stats_ctx stats_ctx;
619 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
620
621 u32 tpid = get_userspace_pid();
622
623 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
624 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
625
626 if (arr_struct == NULL || kill_data == NULL)
627 goto out;
628
629 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
630 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
631
632 #ifdef UNROLL
633 #pragma unroll
634 #endif
635 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
636 struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
637
638 if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
639 bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
640 void* payload = kill_data->payload;
641 size_t offset = kill_data->payload_length;
642 if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
643 return 0;
644 payload += offset;
645
646 kill_data->kill_target_name_length = 0;
647 kill_data->kill_target_cgroup_proc_length = 0;
648
649 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
650 barrier_var(comm_length);
651 if (comm_length <= TASK_COMM_LEN) {
652 barrier_var(comm_length);
653 kill_data->kill_target_name_length = comm_length;
654 payload += comm_length;
655 }
656
657 size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
658 BPF_CORE_READ(proc_kernfs, name));
659 barrier_var(cgroup_proc_length);
660 if (cgroup_proc_length <= KILL_TARGET_LEN) {
661 barrier_var(cgroup_proc_length);
662 kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
663 payload += cgroup_proc_length;
664 }
665
666 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
667 unsigned long data_len = (void*)payload - (void*)kill_data;
668 data_len = data_len > sizeof(struct var_kill_data_t)
669 ? sizeof(struct var_kill_data_t)
670 : data_len;
671 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
672 }
673 }
674 bpf_map_delete_elem(&var_tpid_to_data, &tpid);
675 out:
676 bpf_stats_exit(&stats_ctx);
677 return 0;
678 }
679
680 SEC("raw_tracepoint/sched_process_exec")
raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args * ctx)681 int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
682 {
683 struct bpf_func_stats_ctx stats_ctx;
684 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
685
686 struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
687 u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
688
689 bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
690 if (should_filter_binprm != NULL)
691 goto out;
692
693 int zero = 0;
694 struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
695 if (!proc_exec_data)
696 goto out;
697
698 if (INODE_FILTER && inode != INODE_FILTER)
699 return 0;
700
701 u32 pid = get_userspace_pid();
702 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
703
704 proc_exec_data->meta.type = EXEC_EVENT;
705 proc_exec_data->bin_path_length = 0;
706 proc_exec_data->cmdline_length = 0;
707 proc_exec_data->environment_length = 0;
708 void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
709 proc_exec_data->payload);
710 payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
711
712 struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
713 proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
714 proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
715 proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
716 proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
717
718 const char* filename = BPF_CORE_READ(bprm, filename);
719 size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
720 barrier_var(bin_path_length);
721 if (bin_path_length <= MAX_FILENAME_LEN) {
722 barrier_var(bin_path_length);
723 proc_exec_data->bin_path_length = bin_path_length;
724 payload += bin_path_length;
725 }
726
727 void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
728 void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
729 unsigned int cmdline_length = probe_read_lim(payload, arg_start,
730 arg_end - arg_start, MAX_ARGS_LEN);
731
732 if (cmdline_length <= MAX_ARGS_LEN) {
733 barrier_var(cmdline_length);
734 proc_exec_data->cmdline_length = cmdline_length;
735 payload += cmdline_length;
736 }
737
738 if (READ_ENVIRON_FROM_EXEC) {
739 void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
740 void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
741 unsigned long env_len = probe_read_lim(payload, env_start,
742 env_end - env_start, MAX_ENVIRON_LEN);
743 if (cmdline_length <= MAX_ENVIRON_LEN) {
744 proc_exec_data->environment_length = env_len;
745 payload += env_len;
746 }
747 }
748
749 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
750 unsigned long data_len = payload - (void*)proc_exec_data;
751 data_len = data_len > sizeof(struct var_exec_data_t)
752 ? sizeof(struct var_exec_data_t)
753 : data_len;
754 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
755 out:
756 bpf_stats_exit(&stats_ctx);
757 return 0;
758 }
759
760 SEC("kretprobe/do_filp_open")
kprobe_ret__do_filp_open(struct pt_regs * ctx)761 int kprobe_ret__do_filp_open(struct pt_regs* ctx)
762 {
763 struct bpf_func_stats_ctx stats_ctx;
764 bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
765
766 struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
767
768 if (filp == NULL || IS_ERR(filp))
769 goto out;
770 unsigned int flags = BPF_CORE_READ(filp, f_flags);
771 if ((flags & (O_RDWR | O_WRONLY)) == 0)
772 goto out;
773 if ((flags & O_TMPFILE) > 0)
774 goto out;
775 struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
776 umode_t mode = BPF_CORE_READ(file_inode, i_mode);
777 if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
778 S_ISSOCK(mode))
779 goto out;
780
781 struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
782 u32 device_id = 0;
783 u64 file_ino = 0;
784 if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
785 goto out;
786
787 int zero = 0;
788 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
789 if (!filemod_data)
790 goto out;
791
792 u32 pid = get_userspace_pid();
793 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
794
795 filemod_data->meta.type = FILEMOD_EVENT;
796 filemod_data->fmod_type = FMOD_OPEN;
797 filemod_data->dst_flags = flags;
798 filemod_data->src_inode = 0;
799 filemod_data->dst_inode = file_ino;
800 filemod_data->src_device_id = 0;
801 filemod_data->dst_device_id = device_id;
802 filemod_data->src_filepath_length = 0;
803 filemod_data->dst_filepath_length = 0;
804
805 void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
806 filemod_data->payload);
807 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
808
809 size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
810 barrier_var(len);
811 if (len <= MAX_FILEPATH_LENGTH) {
812 barrier_var(len);
813 payload += len;
814 filemod_data->dst_filepath_length = len;
815 }
816 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
817 unsigned long data_len = payload - (void*)filemod_data;
818 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
819 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
820 out:
821 bpf_stats_exit(&stats_ctx);
822 return 0;
823 }
824
825 SEC("kprobe/vfs_link")
BPF_KPROBE(kprobe__vfs_link,struct dentry * old_dentry,struct inode * dir,struct dentry * new_dentry,struct inode ** delegated_inode)826 int BPF_KPROBE(kprobe__vfs_link,
827 struct dentry* old_dentry, struct inode* dir,
828 struct dentry* new_dentry, struct inode** delegated_inode)
829 {
830 struct bpf_func_stats_ctx stats_ctx;
831 bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
832
833 u32 src_device_id = 0;
834 u64 src_file_ino = 0;
835 u32 dst_device_id = 0;
836 u64 dst_file_ino = 0;
837 if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
838 !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
839 goto out;
840
841 int zero = 0;
842 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
843 if (!filemod_data)
844 goto out;
845
846 u32 pid = get_userspace_pid();
847 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
848
849 filemod_data->meta.type = FILEMOD_EVENT;
850 filemod_data->fmod_type = FMOD_LINK;
851 filemod_data->dst_flags = 0;
852 filemod_data->src_inode = src_file_ino;
853 filemod_data->dst_inode = dst_file_ino;
854 filemod_data->src_device_id = src_device_id;
855 filemod_data->dst_device_id = dst_device_id;
856 filemod_data->src_filepath_length = 0;
857 filemod_data->dst_filepath_length = 0;
858
859 void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
860 filemod_data->payload);
861 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
862
863 size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
864 barrier_var(len);
865 if (len <= MAX_FILEPATH_LENGTH) {
866 barrier_var(len);
867 payload += len;
868 filemod_data->src_filepath_length = len;
869 }
870
871 len = read_absolute_file_path_from_dentry(new_dentry, payload);
872 barrier_var(len);
873 if (len <= MAX_FILEPATH_LENGTH) {
874 barrier_var(len);
875 payload += len;
876 filemod_data->dst_filepath_length = len;
877 }
878
879 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
880 unsigned long data_len = payload - (void*)filemod_data;
881 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
882 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
883 out:
884 bpf_stats_exit(&stats_ctx);
885 return 0;
886 }
887
888 SEC("kprobe/vfs_symlink")
BPF_KPROBE(kprobe__vfs_symlink,struct inode * dir,struct dentry * dentry,const char * oldname)889 int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
890 const char* oldname)
891 {
892 struct bpf_func_stats_ctx stats_ctx;
893 bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
894
895 u32 dst_device_id = 0;
896 u64 dst_file_ino = 0;
897 if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
898 goto out;
899
900 int zero = 0;
901 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
902 if (!filemod_data)
903 goto out;
904
905 u32 pid = get_userspace_pid();
906 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
907
908 filemod_data->meta.type = FILEMOD_EVENT;
909 filemod_data->fmod_type = FMOD_SYMLINK;
910 filemod_data->dst_flags = 0;
911 filemod_data->src_inode = 0;
912 filemod_data->dst_inode = dst_file_ino;
913 filemod_data->src_device_id = 0;
914 filemod_data->dst_device_id = dst_device_id;
915 filemod_data->src_filepath_length = 0;
916 filemod_data->dst_filepath_length = 0;
917
918 void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
919 filemod_data->payload);
920 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
921
922 size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
923 barrier_var(len);
924 if (len <= MAX_FILEPATH_LENGTH) {
925 barrier_var(len);
926 payload += len;
927 filemod_data->src_filepath_length = len;
928 }
929 len = read_absolute_file_path_from_dentry(dentry, payload);
930 barrier_var(len);
931 if (len <= MAX_FILEPATH_LENGTH) {
932 barrier_var(len);
933 payload += len;
934 filemod_data->dst_filepath_length = len;
935 }
936 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
937 unsigned long data_len = payload - (void*)filemod_data;
938 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
939 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
940 out:
941 bpf_stats_exit(&stats_ctx);
942 return 0;
943 }
944
945 SEC("raw_tracepoint/sched_process_fork")
raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args * ctx)946 int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
947 {
948 struct bpf_func_stats_ctx stats_ctx;
949 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
950
951 int zero = 0;
952 struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
953 if (!fork_data)
954 goto out;
955
956 struct task_struct* parent = (struct task_struct*)ctx->args[0];
957 struct task_struct* child = (struct task_struct*)ctx->args[1];
958 fork_data->meta.type = FORK_EVENT;
959
960 void* payload = populate_var_metadata(&fork_data->meta, child,
961 BPF_CORE_READ(child, pid), fork_data->payload);
962 fork_data->parent_pid = BPF_CORE_READ(parent, pid);
963 fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
964 fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
965 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
966
967 unsigned long data_len = payload - (void*)fork_data;
968 data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
969 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
970 out:
971 bpf_stats_exit(&stats_ctx);
972 return 0;
973 }
974 char _license[] SEC("license") = "GPL";
975