1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2024 Meta 3 4 #include <poll.h> 5 #include <test_progs.h> 6 #include "network_helpers.h" 7 #include "sock_iter_batch.skel.h" 8 9 #define TEST_NS "sock_iter_batch_netns" 10 #define TEST_CHILD_NS "sock_iter_batch_child_netns" 11 12 static const int init_batch_size = 16; 13 static const int nr_soreuse = 4; 14 15 struct iter_out { 16 int idx; 17 __u64 cookie; 18 } __packed; 19 20 struct sock_count { 21 __u64 cookie; 22 int count; 23 }; 24 25 static int insert(__u64 cookie, struct sock_count counts[], int counts_len) 26 { 27 int insert = -1; 28 int i = 0; 29 30 for (; i < counts_len; i++) { 31 if (!counts[i].cookie) { 32 insert = i; 33 } else if (counts[i].cookie == cookie) { 34 insert = i; 35 break; 36 } 37 } 38 if (insert < 0) 39 return insert; 40 41 counts[insert].cookie = cookie; 42 counts[insert].count++; 43 44 return counts[insert].count; 45 } 46 47 static int read_n(int iter_fd, int n, struct sock_count counts[], 48 int counts_len) 49 { 50 struct iter_out out; 51 int nread = 1; 52 int i = 0; 53 54 for (; nread > 0 && (n < 0 || i < n); i++) { 55 nread = read(iter_fd, &out, sizeof(out)); 56 if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread")) 57 break; 58 ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert"); 59 } 60 61 ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n"); 62 63 return i; 64 } 65 66 static __u64 socket_cookie(int fd) 67 { 68 __u64 cookie; 69 socklen_t cookie_len = sizeof(cookie); 70 71 if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, 72 &cookie_len), "getsockopt(SO_COOKIE)")) 73 return 0; 74 return cookie; 75 } 76 77 static bool was_seen(int fd, struct sock_count counts[], int counts_len) 78 { 79 __u64 cookie = socket_cookie(fd); 80 int i = 0; 81 82 for (; cookie && i < counts_len; i++) 83 if (cookie == counts[i].cookie) 84 return true; 85 86 return false; 87 } 88 89 static int get_seen_socket(int *fds, struct sock_count counts[], int n) 90 { 91 int i = 0; 92 93 for (; i < n; i++) 94 if (was_seen(fds[i], counts, n)) 95 return i; 96 return -1; 97 } 98 99 static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n) 100 { 101 int i, nread, iter_fd; 102 int nth_sock_idx = -1; 103 struct iter_out out; 104 105 iter_fd = bpf_iter_create(bpf_link__fd(link)); 106 if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) 107 return -1; 108 109 for (; n >= 0; n--) { 110 nread = read(iter_fd, &out, sizeof(out)); 111 if (!nread || !ASSERT_GE(nread, 1, "nread")) 112 goto done; 113 } 114 115 for (i = 0; i < fds_len && nth_sock_idx < 0; i++) 116 if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie) 117 nth_sock_idx = i; 118 done: 119 close(iter_fd); 120 return nth_sock_idx; 121 } 122 123 static void destroy(int fd) 124 { 125 struct sock_iter_batch *skel = NULL; 126 __u64 cookie = socket_cookie(fd); 127 struct bpf_link *link = NULL; 128 int iter_fd = -1; 129 int nread; 130 __u64 out; 131 132 skel = sock_iter_batch__open(); 133 if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) 134 goto done; 135 136 skel->rodata->destroy_cookie = cookie; 137 138 if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load")) 139 goto done; 140 141 link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL); 142 if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) 143 goto done; 144 145 iter_fd = bpf_iter_create(bpf_link__fd(link)); 146 if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) 147 goto done; 148 149 /* Delete matching socket. */ 150 nread = read(iter_fd, &out, sizeof(out)); 151 ASSERT_GE(nread, 0, "nread"); 152 if (nread) 153 ASSERT_EQ(out, cookie, "cookie matches"); 154 done: 155 if (iter_fd >= 0) 156 close(iter_fd); 157 bpf_link__destroy(link); 158 sock_iter_batch__destroy(skel); 159 close(fd); 160 } 161 162 static int get_seen_count(int fd, struct sock_count counts[], int n) 163 { 164 __u64 cookie = socket_cookie(fd); 165 int count = 0; 166 int i = 0; 167 168 for (; cookie && !count && i < n; i++) 169 if (cookie == counts[i].cookie) 170 count = counts[i].count; 171 172 return count; 173 } 174 175 static void check_n_were_seen_once(int *fds, int fds_len, int n, 176 struct sock_count counts[], int counts_len) 177 { 178 int seen_once = 0; 179 int seen_cnt; 180 int i = 0; 181 182 for (; i < fds_len; i++) { 183 /* Skip any sockets that were closed or that weren't seen 184 * exactly once. 185 */ 186 if (fds[i] < 0) 187 continue; 188 seen_cnt = get_seen_count(fds[i], counts, counts_len); 189 if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt")) 190 seen_once++; 191 } 192 193 ASSERT_EQ(seen_once, n, "seen_once"); 194 } 195 196 static int accept_from_one(struct pollfd *server_poll_fds, 197 int server_poll_fds_len) 198 { 199 static const int poll_timeout_ms = 5000; /* 5s */ 200 int ret; 201 int i; 202 203 ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms); 204 if (!ASSERT_EQ(ret, 1, "poll")) 205 return -1; 206 207 for (i = 0; i < server_poll_fds_len; i++) 208 if (server_poll_fds[i].revents & POLLIN) 209 return accept(server_poll_fds[i].fd, NULL, NULL); 210 211 return -1; 212 } 213 214 static int *connect_to_server(int family, int sock_type, const char *addr, 215 __u16 port, int nr_connects, int *server_fds, 216 int server_fds_len) 217 { 218 struct pollfd *server_poll_fds = NULL; 219 int *established_socks = NULL; 220 int i; 221 222 server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds)); 223 if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds")) 224 return NULL; 225 226 for (i = 0; i < server_fds_len; i++) { 227 server_poll_fds[i].fd = server_fds[i]; 228 server_poll_fds[i].events = POLLIN; 229 } 230 231 i = 0; 232 233 established_socks = malloc(sizeof(*established_socks) * nr_connects*2); 234 if (!ASSERT_OK_PTR(established_socks, "established_socks")) 235 goto error; 236 237 while (nr_connects--) { 238 established_socks[i] = connect_to_addr_str(family, sock_type, 239 addr, port, NULL); 240 if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str")) 241 goto error; 242 i++; 243 established_socks[i] = accept_from_one(server_poll_fds, 244 server_fds_len); 245 if (!ASSERT_OK_FD(established_socks[i], "accept_from_one")) 246 goto error; 247 i++; 248 } 249 250 free(server_poll_fds); 251 return established_socks; 252 error: 253 free_fds(established_socks, i); 254 free(server_poll_fds); 255 return NULL; 256 } 257 258 static void remove_seen(int family, int sock_type, const char *addr, __u16 port, 259 int *socks, int socks_len, int *established_socks, 260 int established_socks_len, struct sock_count *counts, 261 int counts_len, struct bpf_link *link, int iter_fd) 262 { 263 int close_idx; 264 265 /* Iterate through the first socks_len - 1 sockets. */ 266 read_n(iter_fd, socks_len - 1, counts, counts_len); 267 268 /* Make sure we saw socks_len - 1 sockets exactly once. */ 269 check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, 270 counts_len); 271 272 /* Close a socket we've already seen to remove it from the bucket. */ 273 close_idx = get_seen_socket(socks, counts, counts_len); 274 if (!ASSERT_GE(close_idx, 0, "close_idx")) 275 return; 276 close(socks[close_idx]); 277 socks[close_idx] = -1; 278 279 /* Iterate through the rest of the sockets. */ 280 read_n(iter_fd, -1, counts, counts_len); 281 282 /* Make sure the last socket wasn't skipped and that there were no 283 * repeats. 284 */ 285 check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, 286 counts_len); 287 } 288 289 static void remove_seen_established(int family, int sock_type, const char *addr, 290 __u16 port, int *listen_socks, 291 int listen_socks_len, int *established_socks, 292 int established_socks_len, 293 struct sock_count *counts, int counts_len, 294 struct bpf_link *link, int iter_fd) 295 { 296 int close_idx; 297 298 /* Iterate through all listening sockets. */ 299 read_n(iter_fd, listen_socks_len, counts, counts_len); 300 301 /* Make sure we saw all listening sockets exactly once. */ 302 check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, 303 counts, counts_len); 304 305 /* Leave one established socket. */ 306 read_n(iter_fd, established_socks_len - 1, counts, counts_len); 307 308 /* Close a socket we've already seen to remove it from the bucket. */ 309 close_idx = get_nth_socket(established_socks, established_socks_len, 310 link, listen_socks_len + 1); 311 if (!ASSERT_GE(close_idx, 0, "close_idx")) 312 return; 313 destroy(established_socks[close_idx]); 314 established_socks[close_idx] = -1; 315 316 /* Iterate through the rest of the sockets. */ 317 read_n(iter_fd, -1, counts, counts_len); 318 319 /* Make sure the last socket wasn't skipped and that there were no 320 * repeats. 321 */ 322 check_n_were_seen_once(established_socks, established_socks_len, 323 established_socks_len - 1, counts, counts_len); 324 } 325 326 static void remove_unseen(int family, int sock_type, const char *addr, 327 __u16 port, int *socks, int socks_len, 328 int *established_socks, int established_socks_len, 329 struct sock_count *counts, int counts_len, 330 struct bpf_link *link, int iter_fd) 331 { 332 int close_idx; 333 334 /* Iterate through the first socket. */ 335 read_n(iter_fd, 1, counts, counts_len); 336 337 /* Make sure we saw a socket from fds. */ 338 check_n_were_seen_once(socks, socks_len, 1, counts, counts_len); 339 340 /* Close what would be the next socket in the bucket to exercise the 341 * condition where we need to skip past the first cookie we remembered. 342 */ 343 close_idx = get_nth_socket(socks, socks_len, link, 1); 344 if (!ASSERT_GE(close_idx, 0, "close_idx")) 345 return; 346 close(socks[close_idx]); 347 socks[close_idx] = -1; 348 349 /* Iterate through the rest of the sockets. */ 350 read_n(iter_fd, -1, counts, counts_len); 351 352 /* Make sure the remaining sockets were seen exactly once and that we 353 * didn't repeat the socket that was already seen. 354 */ 355 check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, 356 counts_len); 357 } 358 359 static void remove_unseen_established(int family, int sock_type, 360 const char *addr, __u16 port, 361 int *listen_socks, int listen_socks_len, 362 int *established_socks, 363 int established_socks_len, 364 struct sock_count *counts, int counts_len, 365 struct bpf_link *link, int iter_fd) 366 { 367 int close_idx; 368 369 /* Iterate through all listening sockets. */ 370 read_n(iter_fd, listen_socks_len, counts, counts_len); 371 372 /* Make sure we saw all listening sockets exactly once. */ 373 check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, 374 counts, counts_len); 375 376 /* Iterate through the first established socket. */ 377 read_n(iter_fd, 1, counts, counts_len); 378 379 /* Make sure we saw one established socks. */ 380 check_n_were_seen_once(established_socks, established_socks_len, 1, 381 counts, counts_len); 382 383 /* Close what would be the next socket in the bucket to exercise the 384 * condition where we need to skip past the first cookie we remembered. 385 */ 386 close_idx = get_nth_socket(established_socks, established_socks_len, 387 link, listen_socks_len + 1); 388 if (!ASSERT_GE(close_idx, 0, "close_idx")) 389 return; 390 391 destroy(established_socks[close_idx]); 392 established_socks[close_idx] = -1; 393 394 /* Iterate through the rest of the sockets. */ 395 read_n(iter_fd, -1, counts, counts_len); 396 397 /* Make sure the remaining sockets were seen exactly once and that we 398 * didn't repeat the socket that was already seen. 399 */ 400 check_n_were_seen_once(established_socks, established_socks_len, 401 established_socks_len - 1, counts, counts_len); 402 } 403 404 static void remove_all(int family, int sock_type, const char *addr, 405 __u16 port, int *socks, int socks_len, 406 int *established_socks, int established_socks_len, 407 struct sock_count *counts, int counts_len, 408 struct bpf_link *link, int iter_fd) 409 { 410 int close_idx, i; 411 412 /* Iterate through the first socket. */ 413 read_n(iter_fd, 1, counts, counts_len); 414 415 /* Make sure we saw a socket from fds. */ 416 check_n_were_seen_once(socks, socks_len, 1, counts, counts_len); 417 418 /* Close all remaining sockets to exhaust the list of saved cookies and 419 * exit without putting any sockets into the batch on the next read. 420 */ 421 for (i = 0; i < socks_len - 1; i++) { 422 close_idx = get_nth_socket(socks, socks_len, link, 1); 423 if (!ASSERT_GE(close_idx, 0, "close_idx")) 424 return; 425 close(socks[close_idx]); 426 socks[close_idx] = -1; 427 } 428 429 /* Make sure there are no more sockets returned */ 430 ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n"); 431 } 432 433 static void remove_all_established(int family, int sock_type, const char *addr, 434 __u16 port, int *listen_socks, 435 int listen_socks_len, int *established_socks, 436 int established_socks_len, 437 struct sock_count *counts, int counts_len, 438 struct bpf_link *link, int iter_fd) 439 { 440 int *close_idx = NULL; 441 int i; 442 443 /* Iterate through all listening sockets. */ 444 read_n(iter_fd, listen_socks_len, counts, counts_len); 445 446 /* Make sure we saw all listening sockets exactly once. */ 447 check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, 448 counts, counts_len); 449 450 /* Iterate through the first established socket. */ 451 read_n(iter_fd, 1, counts, counts_len); 452 453 /* Make sure we saw one established socks. */ 454 check_n_were_seen_once(established_socks, established_socks_len, 1, 455 counts, counts_len); 456 457 /* Close all remaining sockets to exhaust the list of saved cookies and 458 * exit without putting any sockets into the batch on the next read. 459 */ 460 close_idx = malloc(sizeof(int) * (established_socks_len - 1)); 461 if (!ASSERT_OK_PTR(close_idx, "close_idx malloc")) 462 return; 463 for (i = 0; i < established_socks_len - 1; i++) { 464 close_idx[i] = get_nth_socket(established_socks, 465 established_socks_len, link, 466 listen_socks_len + i); 467 if (!ASSERT_GE(close_idx[i], 0, "close_idx")) 468 return; 469 } 470 471 for (i = 0; i < established_socks_len - 1; i++) { 472 destroy(established_socks[close_idx[i]]); 473 established_socks[close_idx[i]] = -1; 474 } 475 476 /* Make sure there are no more sockets returned */ 477 ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n"); 478 free(close_idx); 479 } 480 481 static void add_some(int family, int sock_type, const char *addr, __u16 port, 482 int *socks, int socks_len, int *established_socks, 483 int established_socks_len, struct sock_count *counts, 484 int counts_len, struct bpf_link *link, int iter_fd) 485 { 486 int *new_socks = NULL; 487 488 /* Iterate through the first socks_len - 1 sockets. */ 489 read_n(iter_fd, socks_len - 1, counts, counts_len); 490 491 /* Make sure we saw socks_len - 1 sockets exactly once. */ 492 check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, 493 counts_len); 494 495 /* Double the number of sockets in the bucket. */ 496 new_socks = start_reuseport_server(family, sock_type, addr, port, 0, 497 socks_len); 498 if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server")) 499 goto done; 500 501 /* Iterate through the rest of the sockets. */ 502 read_n(iter_fd, -1, counts, counts_len); 503 504 /* Make sure each of the original sockets was seen exactly once. */ 505 check_n_were_seen_once(socks, socks_len, socks_len, counts, 506 counts_len); 507 done: 508 free_fds(new_socks, socks_len); 509 } 510 511 static void add_some_established(int family, int sock_type, const char *addr, 512 __u16 port, int *listen_socks, 513 int listen_socks_len, int *established_socks, 514 int established_socks_len, 515 struct sock_count *counts, 516 int counts_len, struct bpf_link *link, 517 int iter_fd) 518 { 519 int *new_socks = NULL; 520 521 /* Iterate through all listening sockets. */ 522 read_n(iter_fd, listen_socks_len, counts, counts_len); 523 524 /* Make sure we saw all listening sockets exactly once. */ 525 check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, 526 counts, counts_len); 527 528 /* Iterate through the first established_socks_len - 1 sockets. */ 529 read_n(iter_fd, established_socks_len - 1, counts, counts_len); 530 531 /* Make sure we saw established_socks_len - 1 sockets exactly once. */ 532 check_n_were_seen_once(established_socks, established_socks_len, 533 established_socks_len - 1, counts, counts_len); 534 535 /* Double the number of established sockets in the bucket. */ 536 new_socks = connect_to_server(family, sock_type, addr, port, 537 established_socks_len / 2, listen_socks, 538 listen_socks_len); 539 if (!ASSERT_OK_PTR(new_socks, "connect_to_server")) 540 goto done; 541 542 /* Iterate through the rest of the sockets. */ 543 read_n(iter_fd, -1, counts, counts_len); 544 545 /* Make sure each of the original sockets was seen exactly once. */ 546 check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, 547 counts, counts_len); 548 check_n_were_seen_once(established_socks, established_socks_len, 549 established_socks_len, counts, counts_len); 550 done: 551 free_fds(new_socks, established_socks_len); 552 } 553 554 static void force_realloc(int family, int sock_type, const char *addr, 555 __u16 port, int *socks, int socks_len, 556 int *established_socks, int established_socks_len, 557 struct sock_count *counts, int counts_len, 558 struct bpf_link *link, int iter_fd) 559 { 560 int *new_socks = NULL; 561 562 /* Iterate through the first socket just to initialize the batch. */ 563 read_n(iter_fd, 1, counts, counts_len); 564 565 /* Double the number of sockets in the bucket to force a realloc on the 566 * next read. 567 */ 568 new_socks = start_reuseport_server(family, sock_type, addr, port, 0, 569 socks_len); 570 if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server")) 571 goto done; 572 573 /* Iterate through the rest of the sockets. */ 574 read_n(iter_fd, -1, counts, counts_len); 575 576 /* Make sure each socket from the first set was seen exactly once. */ 577 check_n_were_seen_once(socks, socks_len, socks_len, counts, 578 counts_len); 579 done: 580 free_fds(new_socks, socks_len); 581 } 582 583 static void force_realloc_established(int family, int sock_type, 584 const char *addr, __u16 port, 585 int *listen_socks, int listen_socks_len, 586 int *established_socks, 587 int established_socks_len, 588 struct sock_count *counts, int counts_len, 589 struct bpf_link *link, int iter_fd) 590 { 591 /* Iterate through all sockets to trigger a realloc. */ 592 read_n(iter_fd, -1, counts, counts_len); 593 594 /* Make sure each socket was seen exactly once. */ 595 check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, 596 counts, counts_len); 597 check_n_were_seen_once(established_socks, established_socks_len, 598 established_socks_len, counts, counts_len); 599 } 600 601 struct test_case { 602 void (*test)(int family, int sock_type, const char *addr, __u16 port, 603 int *socks, int socks_len, int *established_socks, 604 int established_socks_len, struct sock_count *counts, 605 int counts_len, struct bpf_link *link, int iter_fd); 606 const char *description; 607 int ehash_buckets; 608 int connections; 609 int init_socks; 610 int max_socks; 611 int sock_type; 612 int family; 613 }; 614 615 static struct test_case resume_tests[] = { 616 { 617 .description = "udp: resume after removing a seen socket", 618 .init_socks = nr_soreuse, 619 .max_socks = nr_soreuse, 620 .sock_type = SOCK_DGRAM, 621 .family = AF_INET6, 622 .test = remove_seen, 623 }, 624 { 625 .description = "udp: resume after removing one unseen socket", 626 .init_socks = nr_soreuse, 627 .max_socks = nr_soreuse, 628 .sock_type = SOCK_DGRAM, 629 .family = AF_INET6, 630 .test = remove_unseen, 631 }, 632 { 633 .description = "udp: resume after removing all unseen sockets", 634 .init_socks = nr_soreuse, 635 .max_socks = nr_soreuse, 636 .sock_type = SOCK_DGRAM, 637 .family = AF_INET6, 638 .test = remove_all, 639 }, 640 { 641 .description = "udp: resume after adding a few sockets", 642 .init_socks = nr_soreuse, 643 .max_socks = nr_soreuse, 644 .sock_type = SOCK_DGRAM, 645 /* Use AF_INET so that new sockets are added to the head of the 646 * bucket's list. 647 */ 648 .family = AF_INET, 649 .test = add_some, 650 }, 651 { 652 .description = "udp: force a realloc to occur", 653 .init_socks = init_batch_size, 654 .max_socks = init_batch_size * 2, 655 .sock_type = SOCK_DGRAM, 656 /* Use AF_INET6 so that new sockets are added to the tail of the 657 * bucket's list, needing to be added to the next batch to force 658 * a realloc. 659 */ 660 .family = AF_INET6, 661 .test = force_realloc, 662 }, 663 { 664 .description = "tcp: resume after removing a seen socket (listening)", 665 .init_socks = nr_soreuse, 666 .max_socks = nr_soreuse, 667 .sock_type = SOCK_STREAM, 668 .family = AF_INET6, 669 .test = remove_seen, 670 }, 671 { 672 .description = "tcp: resume after removing one unseen socket (listening)", 673 .init_socks = nr_soreuse, 674 .max_socks = nr_soreuse, 675 .sock_type = SOCK_STREAM, 676 .family = AF_INET6, 677 .test = remove_unseen, 678 }, 679 { 680 .description = "tcp: resume after removing all unseen sockets (listening)", 681 .init_socks = nr_soreuse, 682 .max_socks = nr_soreuse, 683 .sock_type = SOCK_STREAM, 684 .family = AF_INET6, 685 .test = remove_all, 686 }, 687 { 688 .description = "tcp: resume after adding a few sockets (listening)", 689 .init_socks = nr_soreuse, 690 .max_socks = nr_soreuse, 691 .sock_type = SOCK_STREAM, 692 /* Use AF_INET so that new sockets are added to the head of the 693 * bucket's list. 694 */ 695 .family = AF_INET, 696 .test = add_some, 697 }, 698 { 699 .description = "tcp: force a realloc to occur (listening)", 700 .init_socks = init_batch_size, 701 .max_socks = init_batch_size * 2, 702 .sock_type = SOCK_STREAM, 703 /* Use AF_INET6 so that new sockets are added to the tail of the 704 * bucket's list, needing to be added to the next batch to force 705 * a realloc. 706 */ 707 .family = AF_INET6, 708 .test = force_realloc, 709 }, 710 { 711 .description = "tcp: resume after removing a seen socket (established)", 712 /* Force all established sockets into one bucket */ 713 .ehash_buckets = 1, 714 .connections = nr_soreuse, 715 .init_socks = nr_soreuse, 716 /* Room for connect()ed and accept()ed sockets */ 717 .max_socks = nr_soreuse * 3, 718 .sock_type = SOCK_STREAM, 719 .family = AF_INET6, 720 .test = remove_seen_established, 721 }, 722 { 723 .description = "tcp: resume after removing one unseen socket (established)", 724 /* Force all established sockets into one bucket */ 725 .ehash_buckets = 1, 726 .connections = nr_soreuse, 727 .init_socks = nr_soreuse, 728 /* Room for connect()ed and accept()ed sockets */ 729 .max_socks = nr_soreuse * 3, 730 .sock_type = SOCK_STREAM, 731 .family = AF_INET6, 732 .test = remove_unseen_established, 733 }, 734 { 735 .description = "tcp: resume after removing all unseen sockets (established)", 736 /* Force all established sockets into one bucket */ 737 .ehash_buckets = 1, 738 .connections = nr_soreuse, 739 .init_socks = nr_soreuse, 740 /* Room for connect()ed and accept()ed sockets */ 741 .max_socks = nr_soreuse * 3, 742 .sock_type = SOCK_STREAM, 743 .family = AF_INET6, 744 .test = remove_all_established, 745 }, 746 { 747 .description = "tcp: resume after adding a few sockets (established)", 748 /* Force all established sockets into one bucket */ 749 .ehash_buckets = 1, 750 .connections = nr_soreuse, 751 .init_socks = nr_soreuse, 752 /* Room for connect()ed and accept()ed sockets */ 753 .max_socks = nr_soreuse * 3, 754 .sock_type = SOCK_STREAM, 755 .family = AF_INET6, 756 .test = add_some_established, 757 }, 758 { 759 .description = "tcp: force a realloc to occur (established)", 760 /* Force all established sockets into one bucket */ 761 .ehash_buckets = 1, 762 /* Bucket size will need to double when going from listening to 763 * established sockets. 764 */ 765 .connections = init_batch_size, 766 .init_socks = nr_soreuse, 767 /* Room for connect()ed and accept()ed sockets */ 768 .max_socks = nr_soreuse + (init_batch_size * 2), 769 .sock_type = SOCK_STREAM, 770 .family = AF_INET6, 771 .test = force_realloc_established, 772 }, 773 }; 774 775 static void do_resume_test(struct test_case *tc) 776 { 777 struct sock_iter_batch *skel = NULL; 778 struct sock_count *counts = NULL; 779 static const __u16 port = 10001; 780 struct nstoken *nstoken = NULL; 781 struct bpf_link *link = NULL; 782 int *established_fds = NULL; 783 int err, iter_fd = -1; 784 const char *addr; 785 int *fds = NULL; 786 787 if (tc->ehash_buckets) { 788 SYS_NOFAIL("ip netns del " TEST_CHILD_NS); 789 SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d", 790 tc->ehash_buckets); 791 SYS(done, "ip netns add %s", TEST_CHILD_NS); 792 SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS); 793 nstoken = open_netns(TEST_CHILD_NS); 794 if (!ASSERT_OK_PTR(nstoken, "open_child_netns")) 795 goto done; 796 } 797 798 counts = calloc(tc->max_socks, sizeof(*counts)); 799 if (!ASSERT_OK_PTR(counts, "counts")) 800 goto done; 801 skel = sock_iter_batch__open(); 802 if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) 803 goto done; 804 805 /* Prepare a bucket of sockets in the kernel hashtable */ 806 addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1"; 807 fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0, 808 tc->init_socks); 809 if (!ASSERT_OK_PTR(fds, "start_reuseport_server")) 810 goto done; 811 if (tc->connections) { 812 established_fds = connect_to_server(tc->family, tc->sock_type, 813 addr, port, 814 tc->connections, fds, 815 tc->init_socks); 816 if (!ASSERT_OK_PTR(established_fds, "connect_to_server")) 817 goto done; 818 } 819 skel->rodata->ports[0] = 0; 820 skel->rodata->ports[1] = 0; 821 skel->rodata->sf = tc->family; 822 skel->rodata->ss = 0; 823 824 err = sock_iter_batch__load(skel); 825 if (!ASSERT_OK(err, "sock_iter_batch__load")) 826 goto done; 827 828 link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ? 829 skel->progs.iter_tcp_soreuse : 830 skel->progs.iter_udp_soreuse, 831 NULL); 832 if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) 833 goto done; 834 835 iter_fd = bpf_iter_create(bpf_link__fd(link)); 836 if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) 837 goto done; 838 839 tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks, 840 established_fds, tc->connections*2, counts, tc->max_socks, 841 link, iter_fd); 842 done: 843 close_netns(nstoken); 844 SYS_NOFAIL("ip netns del " TEST_CHILD_NS); 845 SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0"); 846 free(counts); 847 free_fds(fds, tc->init_socks); 848 free_fds(established_fds, tc->connections*2); 849 if (iter_fd >= 0) 850 close(iter_fd); 851 bpf_link__destroy(link); 852 sock_iter_batch__destroy(skel); 853 } 854 855 static void do_resume_tests(void) 856 { 857 int i; 858 859 for (i = 0; i < ARRAY_SIZE(resume_tests); i++) { 860 if (test__start_subtest(resume_tests[i].description)) { 861 do_resume_test(&resume_tests[i]); 862 } 863 } 864 } 865 866 static void do_test(int sock_type, bool onebyone) 867 { 868 int err, i, nread, to_read, total_read, iter_fd = -1; 869 struct iter_out outputs[nr_soreuse]; 870 struct bpf_link *link = NULL; 871 struct sock_iter_batch *skel; 872 int first_idx, second_idx; 873 int *fds[2] = {}; 874 875 skel = sock_iter_batch__open(); 876 if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) 877 return; 878 879 /* Prepare 2 buckets of sockets in the kernel hashtable */ 880 for (i = 0; i < ARRAY_SIZE(fds); i++) { 881 int local_port; 882 883 fds[i] = start_reuseport_server(AF_INET6, sock_type, "::1", 0, 0, 884 nr_soreuse); 885 if (!ASSERT_OK_PTR(fds[i], "start_reuseport_server")) 886 goto done; 887 local_port = get_socket_local_port(*fds[i]); 888 if (!ASSERT_GE(local_port, 0, "get_socket_local_port")) 889 goto done; 890 skel->rodata->ports[i] = ntohs(local_port); 891 } 892 skel->rodata->sf = AF_INET6; 893 if (sock_type == SOCK_STREAM) 894 skel->rodata->ss = TCP_LISTEN; 895 896 err = sock_iter_batch__load(skel); 897 if (!ASSERT_OK(err, "sock_iter_batch__load")) 898 goto done; 899 900 link = bpf_program__attach_iter(sock_type == SOCK_STREAM ? 901 skel->progs.iter_tcp_soreuse : 902 skel->progs.iter_udp_soreuse, 903 NULL); 904 if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) 905 goto done; 906 907 iter_fd = bpf_iter_create(bpf_link__fd(link)); 908 if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create")) 909 goto done; 910 911 /* Test reading a bucket (either from fds[0] or fds[1]). 912 * Only read "nr_soreuse - 1" number of sockets 913 * from a bucket and leave one socket out from 914 * that bucket on purpose. 915 */ 916 to_read = (nr_soreuse - 1) * sizeof(*outputs); 917 total_read = 0; 918 first_idx = -1; 919 do { 920 nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read); 921 if (nread <= 0 || nread % sizeof(*outputs)) 922 break; 923 total_read += nread; 924 925 if (first_idx == -1) 926 first_idx = outputs[0].idx; 927 for (i = 0; i < nread / sizeof(*outputs); i++) 928 ASSERT_EQ(outputs[i].idx, first_idx, "first_idx"); 929 } while (total_read < to_read); 930 ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread"); 931 ASSERT_EQ(total_read, to_read, "total_read"); 932 933 free_fds(fds[first_idx], nr_soreuse); 934 fds[first_idx] = NULL; 935 936 /* Read the "whole" second bucket */ 937 to_read = nr_soreuse * sizeof(*outputs); 938 total_read = 0; 939 second_idx = !first_idx; 940 do { 941 nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read); 942 if (nread <= 0 || nread % sizeof(*outputs)) 943 break; 944 total_read += nread; 945 946 for (i = 0; i < nread / sizeof(*outputs); i++) 947 ASSERT_EQ(outputs[i].idx, second_idx, "second_idx"); 948 } while (total_read <= to_read); 949 ASSERT_EQ(nread, 0, "nread"); 950 /* Both so_reuseport ports should be in different buckets, so 951 * total_read must equal to the expected to_read. 952 * 953 * For a very unlikely case, both ports collide at the same bucket, 954 * the bucket offset (i.e. 3) will be skipped and it cannot 955 * expect the to_read number of bytes. 956 */ 957 if (skel->bss->bucket[0] != skel->bss->bucket[1]) 958 ASSERT_EQ(total_read, to_read, "total_read"); 959 960 done: 961 for (i = 0; i < ARRAY_SIZE(fds); i++) 962 free_fds(fds[i], nr_soreuse); 963 if (iter_fd < 0) 964 close(iter_fd); 965 bpf_link__destroy(link); 966 sock_iter_batch__destroy(skel); 967 } 968 969 void test_sock_iter_batch(void) 970 { 971 struct nstoken *nstoken = NULL; 972 973 SYS_NOFAIL("ip netns del " TEST_NS); 974 SYS(done, "ip netns add %s", TEST_NS); 975 SYS(done, "ip -net %s link set dev lo up", TEST_NS); 976 977 nstoken = open_netns(TEST_NS); 978 if (!ASSERT_OK_PTR(nstoken, "open_netns")) 979 goto done; 980 981 if (test__start_subtest("tcp")) { 982 do_test(SOCK_STREAM, true); 983 do_test(SOCK_STREAM, false); 984 } 985 if (test__start_subtest("udp")) { 986 do_test(SOCK_DGRAM, true); 987 do_test(SOCK_DGRAM, false); 988 } 989 do_resume_tests(); 990 close_netns(nstoken); 991 992 done: 993 SYS_NOFAIL("ip netns del " TEST_NS); 994 } 995