1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2021 Facebook */ 3 4 #define _GNU_SOURCE /* See feature_test_macros(7) */ 5 #include <unistd.h> 6 #include <sched.h> 7 #include <pthread.h> 8 #include <sys/syscall.h> /* For SYS_xxx definitions */ 9 #include <sys/types.h> 10 #include <sys/eventfd.h> 11 #include <sys/mman.h> 12 #include <test_progs.h> 13 #include <bpf/btf.h> 14 #include "task_local_storage_helpers.h" 15 #include "task_local_storage.skel.h" 16 #include "task_local_storage_exit_creds.skel.h" 17 #include "task_ls_recursion.skel.h" 18 #include "task_storage_nodeadlock.skel.h" 19 #include "uptr_test_common.h" 20 #include "task_ls_uptr.skel.h" 21 #include "uptr_update_failure.skel.h" 22 #include "uptr_failure.skel.h" 23 #include "uptr_map_failure.skel.h" 24 25 static void test_sys_enter_exit(void) 26 { 27 struct task_local_storage *skel; 28 int err; 29 30 skel = task_local_storage__open_and_load(); 31 if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 32 return; 33 34 skel->bss->target_pid = sys_gettid(); 35 36 err = task_local_storage__attach(skel); 37 if (!ASSERT_OK(err, "skel_attach")) 38 goto out; 39 40 sys_gettid(); 41 sys_gettid(); 42 43 /* 3x syscalls: 1x attach and 2x gettid */ 44 ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt"); 45 ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt"); 46 ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt"); 47 out: 48 task_local_storage__destroy(skel); 49 } 50 51 static void test_exit_creds(void) 52 { 53 struct task_local_storage_exit_creds *skel; 54 int err, run_count, sync_rcu_calls = 0; 55 const int MAX_SYNC_RCU_CALLS = 1000; 56 57 skel = task_local_storage_exit_creds__open_and_load(); 58 if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 59 return; 60 61 err = task_local_storage_exit_creds__attach(skel); 62 if (!ASSERT_OK(err, "skel_attach")) 63 goto out; 64 65 /* trigger at least one exit_creds() */ 66 if (CHECK_FAIL(system("ls > /dev/null"))) 67 goto out; 68 69 /* kern_sync_rcu is not enough on its own as the read section we want 70 * to wait for may start after we enter synchronize_rcu, so our call 71 * won't wait for the section to finish. Loop on the run counter 72 * as well to ensure the program has run. 73 */ 74 do { 75 kern_sync_rcu(); 76 run_count = __atomic_load_n(&skel->bss->run_count, __ATOMIC_SEQ_CST); 77 } while (run_count == 0 && ++sync_rcu_calls < MAX_SYNC_RCU_CALLS); 78 79 ASSERT_NEQ(sync_rcu_calls, MAX_SYNC_RCU_CALLS, 80 "sync_rcu count too high"); 81 ASSERT_NEQ(run_count, 0, "run_count"); 82 ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count"); 83 ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count"); 84 out: 85 task_local_storage_exit_creds__destroy(skel); 86 } 87 88 static void test_recursion(void) 89 { 90 int err, map_fd, prog_fd, task_fd; 91 struct task_ls_recursion *skel; 92 struct bpf_prog_info info; 93 __u32 info_len = sizeof(info); 94 long value; 95 96 task_fd = sys_pidfd_open(getpid(), 0); 97 if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open")) 98 return; 99 100 skel = task_ls_recursion__open_and_load(); 101 if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 102 goto out; 103 104 err = task_ls_recursion__attach(skel); 105 if (!ASSERT_OK(err, "skel_attach")) 106 goto out; 107 108 /* trigger sys_enter, make sure it does not cause deadlock */ 109 skel->bss->test_pid = getpid(); 110 sys_gettid(); 111 skel->bss->test_pid = 0; 112 task_ls_recursion__detach(skel); 113 114 /* Refer to the comment in BPF_PROG(on_update) for 115 * the explanation on the value 201 and 100. 116 */ 117 map_fd = bpf_map__fd(skel->maps.map_a); 118 err = bpf_map_lookup_elem(map_fd, &task_fd, &value); 119 ASSERT_OK(err, "lookup map_a"); 120 ASSERT_EQ(value, 201, "map_a value"); 121 ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy"); 122 123 map_fd = bpf_map__fd(skel->maps.map_b); 124 err = bpf_map_lookup_elem(map_fd, &task_fd, &value); 125 ASSERT_OK(err, "lookup map_b"); 126 ASSERT_EQ(value, 100, "map_b value"); 127 128 prog_fd = bpf_program__fd(skel->progs.on_update); 129 memset(&info, 0, sizeof(info)); 130 err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); 131 ASSERT_OK(err, "get prog info"); 132 ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion"); 133 134 prog_fd = bpf_program__fd(skel->progs.on_enter); 135 memset(&info, 0, sizeof(info)); 136 err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); 137 ASSERT_OK(err, "get prog info"); 138 ASSERT_EQ(info.recursion_misses, 0, "on_enter prog recursion"); 139 140 out: 141 close(task_fd); 142 task_ls_recursion__destroy(skel); 143 } 144 145 static bool stop; 146 147 static void waitall(const pthread_t *tids, int nr) 148 { 149 int i; 150 151 stop = true; 152 for (i = 0; i < nr; i++) 153 pthread_join(tids[i], NULL); 154 } 155 156 static void *sock_create_loop(void *arg) 157 { 158 struct task_storage_nodeadlock *skel = arg; 159 int fd; 160 161 while (!stop) { 162 fd = socket(AF_INET, SOCK_STREAM, 0); 163 close(fd); 164 if (skel->bss->nr_get_errs || skel->bss->nr_del_errs) 165 stop = true; 166 } 167 168 return NULL; 169 } 170 171 static void test_nodeadlock(void) 172 { 173 struct task_storage_nodeadlock *skel; 174 struct bpf_prog_info info = {}; 175 __u32 info_len = sizeof(info); 176 const int nr_threads = 32; 177 pthread_t tids[nr_threads]; 178 int i, prog_fd, err; 179 cpu_set_t old, new; 180 181 /* Pin all threads to one cpu to increase the chance of preemption 182 * in a sleepable bpf prog. 183 */ 184 CPU_ZERO(&new); 185 CPU_SET(0, &new); 186 err = sched_getaffinity(getpid(), sizeof(old), &old); 187 if (!ASSERT_OK(err, "getaffinity")) 188 return; 189 err = sched_setaffinity(getpid(), sizeof(new), &new); 190 if (!ASSERT_OK(err, "setaffinity")) 191 return; 192 193 skel = task_storage_nodeadlock__open_and_load(); 194 if (!ASSERT_OK_PTR(skel, "open_and_load")) 195 goto done; 196 197 /* Unnecessary recursion and deadlock detection are reproducible 198 * in the preemptible kernel. 199 */ 200 if (!skel->kconfig->CONFIG_PREEMPTION) { 201 test__skip(); 202 goto done; 203 } 204 205 err = task_storage_nodeadlock__attach(skel); 206 ASSERT_OK(err, "attach prog"); 207 208 for (i = 0; i < nr_threads; i++) { 209 err = pthread_create(&tids[i], NULL, sock_create_loop, skel); 210 if (err) { 211 /* Only assert once here to avoid excessive 212 * PASS printing during test failure. 213 */ 214 ASSERT_OK(err, "pthread_create"); 215 waitall(tids, i); 216 goto done; 217 } 218 } 219 220 /* With 32 threads, 1s is enough to reproduce the issue */ 221 sleep(1); 222 waitall(tids, nr_threads); 223 224 info_len = sizeof(info); 225 prog_fd = bpf_program__fd(skel->progs.socket_post_create); 226 err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); 227 ASSERT_OK(err, "get prog info"); 228 ASSERT_EQ(info.recursion_misses, 0, "prog recursion"); 229 230 ASSERT_EQ(skel->bss->nr_get_errs, 0, "bpf_task_storage_get busy"); 231 ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy"); 232 233 done: 234 task_storage_nodeadlock__destroy(skel); 235 sched_setaffinity(getpid(), sizeof(old), &old); 236 } 237 238 static struct user_data udata __attribute__((aligned(16))) = { 239 .a = 1, 240 .b = 2, 241 }; 242 243 static struct user_data udata2 __attribute__((aligned(16))) = { 244 .a = 3, 245 .b = 4, 246 }; 247 248 static void check_udata2(int expected) 249 { 250 udata2.result = udata2.nested_result = 0; 251 usleep(1); 252 ASSERT_EQ(udata2.result, expected, "udata2.result"); 253 ASSERT_EQ(udata2.nested_result, expected, "udata2.nested_result"); 254 } 255 256 static void test_uptr_basic(void) 257 { 258 int map_fd, parent_task_fd, ev_fd; 259 struct value_type value = {}; 260 struct task_ls_uptr *skel; 261 pid_t child_pid, my_tid; 262 __u64 ev_dummy_data = 1; 263 int err; 264 265 my_tid = sys_gettid(); 266 parent_task_fd = sys_pidfd_open(my_tid, 0); 267 if (!ASSERT_OK_FD(parent_task_fd, "parent_task_fd")) 268 return; 269 270 ev_fd = eventfd(0, 0); 271 if (!ASSERT_OK_FD(ev_fd, "ev_fd")) { 272 close(parent_task_fd); 273 return; 274 } 275 276 skel = task_ls_uptr__open_and_load(); 277 if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 278 goto out; 279 280 map_fd = bpf_map__fd(skel->maps.datamap); 281 value.udata = &udata; 282 value.nested.udata = &udata; 283 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST); 284 if (!ASSERT_OK(err, "update_elem(udata)")) 285 goto out; 286 287 err = task_ls_uptr__attach(skel); 288 if (!ASSERT_OK(err, "skel_attach")) 289 goto out; 290 291 child_pid = fork(); 292 if (!ASSERT_NEQ(child_pid, -1, "fork")) 293 goto out; 294 295 /* Call syscall in the child process, but access the map value of 296 * the parent process in the BPF program to check if the user kptr 297 * is translated/mapped correctly. 298 */ 299 if (child_pid == 0) { 300 /* child */ 301 302 /* Overwrite the user_data in the child process to check if 303 * the BPF program accesses the user_data of the parent. 304 */ 305 udata.a = 0; 306 udata.b = 0; 307 308 /* Wait for the parent to set child_pid */ 309 read(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data)); 310 exit(0); 311 } 312 313 skel->bss->parent_pid = my_tid; 314 skel->bss->target_pid = child_pid; 315 316 write(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data)); 317 318 err = waitpid(child_pid, NULL, 0); 319 ASSERT_EQ(err, child_pid, "waitpid"); 320 ASSERT_EQ(udata.result, MAGIC_VALUE + udata.a + udata.b, "udata.result"); 321 ASSERT_EQ(udata.nested_result, MAGIC_VALUE + udata.a + udata.b, "udata.nested_result"); 322 323 skel->bss->target_pid = my_tid; 324 325 /* update_elem: uptr changes from udata1 to udata2 */ 326 value.udata = &udata2; 327 value.nested.udata = &udata2; 328 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST); 329 if (!ASSERT_OK(err, "update_elem(udata2)")) 330 goto out; 331 check_udata2(MAGIC_VALUE + udata2.a + udata2.b); 332 333 /* update_elem: uptr changes from udata2 uptr to NULL */ 334 memset(&value, 0, sizeof(value)); 335 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST); 336 if (!ASSERT_OK(err, "update_elem(udata2)")) 337 goto out; 338 check_udata2(0); 339 340 /* update_elem: uptr changes from NULL to udata2 */ 341 value.udata = &udata2; 342 value.nested.udata = &udata2; 343 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST); 344 if (!ASSERT_OK(err, "update_elem(udata2)")) 345 goto out; 346 check_udata2(MAGIC_VALUE + udata2.a + udata2.b); 347 348 /* Check if user programs can access the value of user kptrs 349 * through bpf_map_lookup_elem(). Make sure the kernel value is not 350 * leaked. 351 */ 352 err = bpf_map_lookup_elem(map_fd, &parent_task_fd, &value); 353 if (!ASSERT_OK(err, "bpf_map_lookup_elem")) 354 goto out; 355 ASSERT_EQ(value.udata, NULL, "value.udata"); 356 ASSERT_EQ(value.nested.udata, NULL, "value.nested.udata"); 357 358 /* delete_elem */ 359 err = bpf_map_delete_elem(map_fd, &parent_task_fd); 360 ASSERT_OK(err, "delete_elem(udata2)"); 361 check_udata2(0); 362 363 /* update_elem: add uptr back to test map_free */ 364 value.udata = &udata2; 365 value.nested.udata = &udata2; 366 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST); 367 ASSERT_OK(err, "update_elem(udata2)"); 368 369 out: 370 task_ls_uptr__destroy(skel); 371 close(ev_fd); 372 close(parent_task_fd); 373 } 374 375 static void test_uptr_across_pages(void) 376 { 377 int page_size = getpagesize(); 378 struct value_type value = {}; 379 struct task_ls_uptr *skel; 380 int err, task_fd, map_fd; 381 void *mem; 382 383 task_fd = sys_pidfd_open(getpid(), 0); 384 if (!ASSERT_OK_FD(task_fd, "task_fd")) 385 return; 386 387 mem = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, 388 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 389 if (!ASSERT_OK_PTR(mem, "mmap(page_size * 2)")) { 390 close(task_fd); 391 return; 392 } 393 394 skel = task_ls_uptr__open_and_load(); 395 if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 396 goto out; 397 398 map_fd = bpf_map__fd(skel->maps.datamap); 399 value.udata = mem + page_size - offsetof(struct user_data, b); 400 err = bpf_map_update_elem(map_fd, &task_fd, &value, 0); 401 if (!ASSERT_ERR(err, "update_elem(udata)")) 402 goto out; 403 ASSERT_EQ(errno, EOPNOTSUPP, "errno"); 404 405 value.udata = mem + page_size - sizeof(struct user_data); 406 err = bpf_map_update_elem(map_fd, &task_fd, &value, 0); 407 ASSERT_OK(err, "update_elem(udata)"); 408 409 out: 410 task_ls_uptr__destroy(skel); 411 close(task_fd); 412 munmap(mem, page_size * 2); 413 } 414 415 static void test_uptr_update_failure(void) 416 { 417 struct value_lock_type value = {}; 418 struct uptr_update_failure *skel; 419 int err, task_fd, map_fd; 420 421 task_fd = sys_pidfd_open(getpid(), 0); 422 if (!ASSERT_OK_FD(task_fd, "task_fd")) 423 return; 424 425 skel = uptr_update_failure__open_and_load(); 426 if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 427 goto out; 428 429 map_fd = bpf_map__fd(skel->maps.datamap); 430 431 value.udata = &udata; 432 err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_F_LOCK); 433 if (!ASSERT_ERR(err, "update_elem(udata, BPF_F_LOCK)")) 434 goto out; 435 ASSERT_EQ(errno, EOPNOTSUPP, "errno"); 436 437 err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_EXIST); 438 if (!ASSERT_ERR(err, "update_elem(udata, BPF_EXIST)")) 439 goto out; 440 ASSERT_EQ(errno, ENOENT, "errno"); 441 442 err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST); 443 if (!ASSERT_OK(err, "update_elem(udata, BPF_NOEXIST)")) 444 goto out; 445 446 value.udata = &udata2; 447 err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST); 448 if (!ASSERT_ERR(err, "update_elem(udata2, BPF_NOEXIST)")) 449 goto out; 450 ASSERT_EQ(errno, EEXIST, "errno"); 451 452 out: 453 uptr_update_failure__destroy(skel); 454 close(task_fd); 455 } 456 457 static void test_uptr_map_failure(const char *map_name, int expected_errno) 458 { 459 LIBBPF_OPTS(bpf_map_create_opts, create_attr); 460 struct uptr_map_failure *skel; 461 struct bpf_map *map; 462 struct btf *btf; 463 int map_fd, err; 464 465 skel = uptr_map_failure__open(); 466 if (!ASSERT_OK_PTR(skel, "uptr_map_failure__open")) 467 return; 468 469 map = bpf_object__find_map_by_name(skel->obj, map_name); 470 btf = bpf_object__btf(skel->obj); 471 err = btf__load_into_kernel(btf); 472 if (!ASSERT_OK(err, "btf__load_into_kernel")) 473 goto done; 474 475 create_attr.map_flags = bpf_map__map_flags(map); 476 create_attr.btf_fd = btf__fd(btf); 477 create_attr.btf_key_type_id = bpf_map__btf_key_type_id(map); 478 create_attr.btf_value_type_id = bpf_map__btf_value_type_id(map); 479 map_fd = bpf_map_create(bpf_map__type(map), map_name, 480 bpf_map__key_size(map), bpf_map__value_size(map), 481 0, &create_attr); 482 if (ASSERT_ERR_FD(map_fd, "map_create")) 483 ASSERT_EQ(errno, expected_errno, "errno"); 484 else 485 close(map_fd); 486 487 done: 488 uptr_map_failure__destroy(skel); 489 } 490 491 void test_task_local_storage(void) 492 { 493 if (test__start_subtest("sys_enter_exit")) 494 test_sys_enter_exit(); 495 if (test__start_subtest("exit_creds")) 496 test_exit_creds(); 497 if (test__start_subtest("recursion")) 498 test_recursion(); 499 if (test__start_subtest("nodeadlock")) 500 test_nodeadlock(); 501 if (test__start_subtest("uptr_basic")) 502 test_uptr_basic(); 503 if (test__start_subtest("uptr_across_pages")) 504 test_uptr_across_pages(); 505 if (test__start_subtest("uptr_update_failure")) 506 test_uptr_update_failure(); 507 if (test__start_subtest("uptr_map_failure_e2big")) { 508 if (getpagesize() == PAGE_SIZE) 509 test_uptr_map_failure("large_uptr_map", E2BIG); 510 else 511 test__skip(); 512 } 513 if (test__start_subtest("uptr_map_failure_size0")) 514 test_uptr_map_failure("empty_uptr_map", EINVAL); 515 if (test__start_subtest("uptr_map_failure_kstruct")) 516 test_uptr_map_failure("kstruct_uptr_map", EINVAL); 517 RUN_TESTS(uptr_failure); 518 } 519