xref: /linux/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c (revision 6e375b236317c19cf3e4da40285ef5b2f0da1899)
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2024 Meta
3 
4 #include <poll.h>
5 #include <test_progs.h>
6 #include "network_helpers.h"
7 #include "sock_iter_batch.skel.h"
8 
9 #define TEST_NS "sock_iter_batch_netns"
10 #define TEST_CHILD_NS "sock_iter_batch_child_netns"
11 
12 static const int init_batch_size = 16;
13 static const int nr_soreuse = 4;
14 
15 struct iter_out {
16 	int idx;
17 	__u64 cookie;
18 } __packed;
19 
20 struct sock_count {
21 	__u64 cookie;
22 	int count;
23 };
24 
25 static int insert(__u64 cookie, struct sock_count counts[], int counts_len)
26 {
27 	int insert = -1;
28 	int i = 0;
29 
30 	for (; i < counts_len; i++) {
31 		if (!counts[i].cookie) {
32 			insert = i;
33 		} else if (counts[i].cookie == cookie) {
34 			insert = i;
35 			break;
36 		}
37 	}
38 	if (insert < 0)
39 		return insert;
40 
41 	counts[insert].cookie = cookie;
42 	counts[insert].count++;
43 
44 	return counts[insert].count;
45 }
46 
47 static int read_n(int iter_fd, int n, struct sock_count counts[],
48 		  int counts_len)
49 {
50 	struct iter_out out;
51 	int nread = 1;
52 	int i = 0;
53 
54 	for (; nread > 0 && (n < 0 || i < n); i++) {
55 		nread = read(iter_fd, &out, sizeof(out));
56 		if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread"))
57 			break;
58 		ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert");
59 	}
60 
61 	ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n");
62 
63 	return i;
64 }
65 
66 static __u64 socket_cookie(int fd)
67 {
68 	__u64 cookie;
69 	socklen_t cookie_len = sizeof(cookie);
70 
71 	if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie,
72 				  &cookie_len), "getsockopt(SO_COOKIE)"))
73 		return 0;
74 	return cookie;
75 }
76 
77 static bool was_seen(int fd, struct sock_count counts[], int counts_len)
78 {
79 	__u64 cookie = socket_cookie(fd);
80 	int i = 0;
81 
82 	for (; cookie && i < counts_len; i++)
83 		if (cookie == counts[i].cookie)
84 			return true;
85 
86 	return false;
87 }
88 
89 static int get_seen_socket(int *fds, struct sock_count counts[], int n)
90 {
91 	int i = 0;
92 
93 	for (; i < n; i++)
94 		if (was_seen(fds[i], counts, n))
95 			return i;
96 	return -1;
97 }
98 
99 static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n)
100 {
101 	int i, nread, iter_fd;
102 	int nth_sock_idx = -1;
103 	struct iter_out out;
104 
105 	iter_fd = bpf_iter_create(bpf_link__fd(link));
106 	if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
107 		return -1;
108 
109 	for (; n >= 0; n--) {
110 		nread = read(iter_fd, &out, sizeof(out));
111 		if (!nread || !ASSERT_GE(nread, 1, "nread"))
112 			goto done;
113 	}
114 
115 	for (i = 0; i < fds_len && nth_sock_idx < 0; i++)
116 		if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie)
117 			nth_sock_idx = i;
118 done:
119 	close(iter_fd);
120 	return nth_sock_idx;
121 }
122 
123 static void destroy(int fd)
124 {
125 	struct sock_iter_batch *skel = NULL;
126 	__u64 cookie = socket_cookie(fd);
127 	struct bpf_link *link = NULL;
128 	int iter_fd = -1;
129 	int nread;
130 	__u64 out;
131 
132 	skel = sock_iter_batch__open();
133 	if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
134 		goto done;
135 
136 	skel->rodata->destroy_cookie = cookie;
137 
138 	if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load"))
139 		goto done;
140 
141 	link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL);
142 	if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
143 		goto done;
144 
145 	iter_fd = bpf_iter_create(bpf_link__fd(link));
146 	if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
147 		goto done;
148 
149 	/* Delete matching socket. */
150 	nread = read(iter_fd, &out, sizeof(out));
151 	ASSERT_GE(nread, 0, "nread");
152 	if (nread)
153 		ASSERT_EQ(out, cookie, "cookie matches");
154 done:
155 	if (iter_fd >= 0)
156 		close(iter_fd);
157 	bpf_link__destroy(link);
158 	sock_iter_batch__destroy(skel);
159 	close(fd);
160 }
161 
162 static int get_seen_count(int fd, struct sock_count counts[], int n)
163 {
164 	__u64 cookie = socket_cookie(fd);
165 	int count = 0;
166 	int i = 0;
167 
168 	for (; cookie && !count && i < n; i++)
169 		if (cookie == counts[i].cookie)
170 			count = counts[i].count;
171 
172 	return count;
173 }
174 
175 static void check_n_were_seen_once(int *fds, int fds_len, int n,
176 				   struct sock_count counts[], int counts_len)
177 {
178 	int seen_once = 0;
179 	int seen_cnt;
180 	int i = 0;
181 
182 	for (; i < fds_len; i++) {
183 		/* Skip any sockets that were closed or that weren't seen
184 		 * exactly once.
185 		 */
186 		if (fds[i] < 0)
187 			continue;
188 		seen_cnt = get_seen_count(fds[i], counts, counts_len);
189 		if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt"))
190 			seen_once++;
191 	}
192 
193 	ASSERT_EQ(seen_once, n, "seen_once");
194 }
195 
196 static int accept_from_one(struct pollfd *server_poll_fds,
197 			   int server_poll_fds_len)
198 {
199 	static const int poll_timeout_ms = 5000; /* 5s */
200 	int ret;
201 	int i;
202 
203 	ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms);
204 	if (!ASSERT_EQ(ret, 1, "poll"))
205 		return -1;
206 
207 	for (i = 0; i < server_poll_fds_len; i++)
208 		if (server_poll_fds[i].revents & POLLIN)
209 			return accept(server_poll_fds[i].fd, NULL, NULL);
210 
211 	return -1;
212 }
213 
214 static int *connect_to_server(int family, int sock_type, const char *addr,
215 			      __u16 port, int nr_connects, int *server_fds,
216 			      int server_fds_len)
217 {
218 	struct pollfd *server_poll_fds = NULL;
219 	int *established_socks = NULL;
220 	int i;
221 
222 	server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds));
223 	if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds"))
224 		return NULL;
225 
226 	for (i = 0; i < server_fds_len; i++) {
227 		server_poll_fds[i].fd = server_fds[i];
228 		server_poll_fds[i].events = POLLIN;
229 	}
230 
231 	i = 0;
232 
233 	established_socks = malloc(sizeof(*established_socks) * nr_connects*2);
234 	if (!ASSERT_OK_PTR(established_socks, "established_socks"))
235 		goto error;
236 
237 	while (nr_connects--) {
238 		established_socks[i] = connect_to_addr_str(family, sock_type,
239 							   addr, port, NULL);
240 		if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str"))
241 			goto error;
242 		i++;
243 		established_socks[i] = accept_from_one(server_poll_fds,
244 						       server_fds_len);
245 		if (!ASSERT_OK_FD(established_socks[i], "accept_from_one"))
246 			goto error;
247 		i++;
248 	}
249 
250 	free(server_poll_fds);
251 	return established_socks;
252 error:
253 	free_fds(established_socks, i);
254 	free(server_poll_fds);
255 	return NULL;
256 }
257 
258 static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
259 			int *socks, int socks_len, int *established_socks,
260 			int established_socks_len, struct sock_count *counts,
261 			int counts_len, struct bpf_link *link, int iter_fd)
262 {
263 	int close_idx;
264 
265 	/* Iterate through the first socks_len - 1 sockets. */
266 	read_n(iter_fd, socks_len - 1, counts, counts_len);
267 
268 	/* Make sure we saw socks_len - 1 sockets exactly once. */
269 	check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
270 			       counts_len);
271 
272 	/* Close a socket we've already seen to remove it from the bucket. */
273 	close_idx = get_seen_socket(socks, counts, counts_len);
274 	if (!ASSERT_GE(close_idx, 0, "close_idx"))
275 		return;
276 	close(socks[close_idx]);
277 	socks[close_idx] = -1;
278 
279 	/* Iterate through the rest of the sockets. */
280 	read_n(iter_fd, -1, counts, counts_len);
281 
282 	/* Make sure the last socket wasn't skipped and that there were no
283 	 * repeats.
284 	 */
285 	check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
286 			       counts_len);
287 }
288 
289 static void remove_seen_established(int family, int sock_type, const char *addr,
290 				    __u16 port, int *listen_socks,
291 				    int listen_socks_len, int *established_socks,
292 				    int established_socks_len,
293 				    struct sock_count *counts, int counts_len,
294 				    struct bpf_link *link, int iter_fd)
295 {
296 	int close_idx;
297 
298 	/* Iterate through all listening sockets. */
299 	read_n(iter_fd, listen_socks_len, counts, counts_len);
300 
301 	/* Make sure we saw all listening sockets exactly once. */
302 	check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
303 			       counts, counts_len);
304 
305 	/* Leave one established socket. */
306 	read_n(iter_fd, established_socks_len - 1, counts, counts_len);
307 
308 	/* Close a socket we've already seen to remove it from the bucket. */
309 	close_idx = get_nth_socket(established_socks, established_socks_len,
310 				   link, listen_socks_len + 1);
311 	if (!ASSERT_GE(close_idx, 0, "close_idx"))
312 		return;
313 	destroy(established_socks[close_idx]);
314 	established_socks[close_idx] = -1;
315 
316 	/* Iterate through the rest of the sockets. */
317 	read_n(iter_fd, -1, counts, counts_len);
318 
319 	/* Make sure the last socket wasn't skipped and that there were no
320 	 * repeats.
321 	 */
322 	check_n_were_seen_once(established_socks, established_socks_len,
323 			       established_socks_len - 1, counts, counts_len);
324 }
325 
326 static void remove_unseen(int family, int sock_type, const char *addr,
327 			  __u16 port, int *socks, int socks_len,
328 			  int *established_socks, int established_socks_len,
329 			  struct sock_count *counts, int counts_len,
330 			  struct bpf_link *link, int iter_fd)
331 {
332 	int close_idx;
333 
334 	/* Iterate through the first socket. */
335 	read_n(iter_fd, 1, counts, counts_len);
336 
337 	/* Make sure we saw a socket from fds. */
338 	check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
339 
340 	/* Close what would be the next socket in the bucket to exercise the
341 	 * condition where we need to skip past the first cookie we remembered.
342 	 */
343 	close_idx = get_nth_socket(socks, socks_len, link, 1);
344 	if (!ASSERT_GE(close_idx, 0, "close_idx"))
345 		return;
346 	close(socks[close_idx]);
347 	socks[close_idx] = -1;
348 
349 	/* Iterate through the rest of the sockets. */
350 	read_n(iter_fd, -1, counts, counts_len);
351 
352 	/* Make sure the remaining sockets were seen exactly once and that we
353 	 * didn't repeat the socket that was already seen.
354 	 */
355 	check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
356 			       counts_len);
357 }
358 
359 static void remove_unseen_established(int family, int sock_type,
360 				      const char *addr, __u16 port,
361 				      int *listen_socks, int listen_socks_len,
362 				      int *established_socks,
363 				      int established_socks_len,
364 				      struct sock_count *counts, int counts_len,
365 				      struct bpf_link *link, int iter_fd)
366 {
367 	int close_idx;
368 
369 	/* Iterate through all listening sockets. */
370 	read_n(iter_fd, listen_socks_len, counts, counts_len);
371 
372 	/* Make sure we saw all listening sockets exactly once. */
373 	check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
374 			       counts, counts_len);
375 
376 	/* Iterate through the first established socket. */
377 	read_n(iter_fd, 1, counts, counts_len);
378 
379 	/* Make sure we saw one established socks. */
380 	check_n_were_seen_once(established_socks, established_socks_len, 1,
381 			       counts, counts_len);
382 
383 	/* Close what would be the next socket in the bucket to exercise the
384 	 * condition where we need to skip past the first cookie we remembered.
385 	 */
386 	close_idx = get_nth_socket(established_socks, established_socks_len,
387 				   link, listen_socks_len + 1);
388 	if (!ASSERT_GE(close_idx, 0, "close_idx"))
389 		return;
390 
391 	destroy(established_socks[close_idx]);
392 	established_socks[close_idx] = -1;
393 
394 	/* Iterate through the rest of the sockets. */
395 	read_n(iter_fd, -1, counts, counts_len);
396 
397 	/* Make sure the remaining sockets were seen exactly once and that we
398 	 * didn't repeat the socket that was already seen.
399 	 */
400 	check_n_were_seen_once(established_socks, established_socks_len,
401 			       established_socks_len - 1, counts, counts_len);
402 }
403 
404 static void remove_all(int family, int sock_type, const char *addr,
405 		       __u16 port, int *socks, int socks_len,
406 		       int *established_socks, int established_socks_len,
407 		       struct sock_count *counts, int counts_len,
408 		       struct bpf_link *link, int iter_fd)
409 {
410 	int close_idx, i;
411 
412 	/* Iterate through the first socket. */
413 	read_n(iter_fd, 1, counts, counts_len);
414 
415 	/* Make sure we saw a socket from fds. */
416 	check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
417 
418 	/* Close all remaining sockets to exhaust the list of saved cookies and
419 	 * exit without putting any sockets into the batch on the next read.
420 	 */
421 	for (i = 0; i < socks_len - 1; i++) {
422 		close_idx = get_nth_socket(socks, socks_len, link, 1);
423 		if (!ASSERT_GE(close_idx, 0, "close_idx"))
424 			return;
425 		close(socks[close_idx]);
426 		socks[close_idx] = -1;
427 	}
428 
429 	/* Make sure there are no more sockets returned */
430 	ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
431 }
432 
433 static void remove_all_established(int family, int sock_type, const char *addr,
434 				   __u16 port, int *listen_socks,
435 				   int listen_socks_len, int *established_socks,
436 				   int established_socks_len,
437 				   struct sock_count *counts, int counts_len,
438 				   struct bpf_link *link, int iter_fd)
439 {
440 	int *close_idx = NULL;
441 	int i;
442 
443 	/* Iterate through all listening sockets. */
444 	read_n(iter_fd, listen_socks_len, counts, counts_len);
445 
446 	/* Make sure we saw all listening sockets exactly once. */
447 	check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
448 			       counts, counts_len);
449 
450 	/* Iterate through the first established socket. */
451 	read_n(iter_fd, 1, counts, counts_len);
452 
453 	/* Make sure we saw one established socks. */
454 	check_n_were_seen_once(established_socks, established_socks_len, 1,
455 			       counts, counts_len);
456 
457 	/* Close all remaining sockets to exhaust the list of saved cookies and
458 	 * exit without putting any sockets into the batch on the next read.
459 	 */
460 	close_idx = malloc(sizeof(int) * (established_socks_len - 1));
461 	if (!ASSERT_OK_PTR(close_idx, "close_idx malloc"))
462 		return;
463 	for (i = 0; i < established_socks_len - 1; i++) {
464 		close_idx[i] = get_nth_socket(established_socks,
465 					      established_socks_len, link,
466 					      listen_socks_len + i);
467 		if (!ASSERT_GE(close_idx[i], 0, "close_idx"))
468 			return;
469 	}
470 
471 	for (i = 0; i < established_socks_len - 1; i++) {
472 		destroy(established_socks[close_idx[i]]);
473 		established_socks[close_idx[i]] = -1;
474 	}
475 
476 	/* Make sure there are no more sockets returned */
477 	ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
478 	free(close_idx);
479 }
480 
481 static void add_some(int family, int sock_type, const char *addr, __u16 port,
482 		     int *socks, int socks_len, int *established_socks,
483 		     int established_socks_len, struct sock_count *counts,
484 		     int counts_len, struct bpf_link *link, int iter_fd)
485 {
486 	int *new_socks = NULL;
487 
488 	/* Iterate through the first socks_len - 1 sockets. */
489 	read_n(iter_fd, socks_len - 1, counts, counts_len);
490 
491 	/* Make sure we saw socks_len - 1 sockets exactly once. */
492 	check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
493 			       counts_len);
494 
495 	/* Double the number of sockets in the bucket. */
496 	new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
497 					   socks_len);
498 	if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
499 		goto done;
500 
501 	/* Iterate through the rest of the sockets. */
502 	read_n(iter_fd, -1, counts, counts_len);
503 
504 	/* Make sure each of the original sockets was seen exactly once. */
505 	check_n_were_seen_once(socks, socks_len, socks_len, counts,
506 			       counts_len);
507 done:
508 	free_fds(new_socks, socks_len);
509 }
510 
511 static void add_some_established(int family, int sock_type, const char *addr,
512 				 __u16 port, int *listen_socks,
513 				 int listen_socks_len, int *established_socks,
514 				 int established_socks_len,
515 				 struct sock_count *counts,
516 				 int counts_len, struct bpf_link *link,
517 				 int iter_fd)
518 {
519 	int *new_socks = NULL;
520 
521 	/* Iterate through all listening sockets. */
522 	read_n(iter_fd, listen_socks_len, counts, counts_len);
523 
524 	/* Make sure we saw all listening sockets exactly once. */
525 	check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
526 			       counts, counts_len);
527 
528 	/* Iterate through the first established_socks_len - 1 sockets. */
529 	read_n(iter_fd, established_socks_len - 1, counts, counts_len);
530 
531 	/* Make sure we saw established_socks_len - 1 sockets exactly once. */
532 	check_n_were_seen_once(established_socks, established_socks_len,
533 			       established_socks_len - 1, counts, counts_len);
534 
535 	/* Double the number of established sockets in the bucket. */
536 	new_socks = connect_to_server(family, sock_type, addr, port,
537 				      established_socks_len / 2, listen_socks,
538 				      listen_socks_len);
539 	if (!ASSERT_OK_PTR(new_socks, "connect_to_server"))
540 		goto done;
541 
542 	/* Iterate through the rest of the sockets. */
543 	read_n(iter_fd, -1, counts, counts_len);
544 
545 	/* Make sure each of the original sockets was seen exactly once. */
546 	check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
547 			       counts, counts_len);
548 	check_n_were_seen_once(established_socks, established_socks_len,
549 			       established_socks_len, counts, counts_len);
550 done:
551 	free_fds(new_socks, established_socks_len);
552 }
553 
554 static void force_realloc(int family, int sock_type, const char *addr,
555 			  __u16 port, int *socks, int socks_len,
556 			  int *established_socks, int established_socks_len,
557 			  struct sock_count *counts, int counts_len,
558 			  struct bpf_link *link, int iter_fd)
559 {
560 	int *new_socks = NULL;
561 
562 	/* Iterate through the first socket just to initialize the batch. */
563 	read_n(iter_fd, 1, counts, counts_len);
564 
565 	/* Double the number of sockets in the bucket to force a realloc on the
566 	 * next read.
567 	 */
568 	new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
569 					   socks_len);
570 	if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
571 		goto done;
572 
573 	/* Iterate through the rest of the sockets. */
574 	read_n(iter_fd, -1, counts, counts_len);
575 
576 	/* Make sure each socket from the first set was seen exactly once. */
577 	check_n_were_seen_once(socks, socks_len, socks_len, counts,
578 			       counts_len);
579 done:
580 	free_fds(new_socks, socks_len);
581 }
582 
583 static void force_realloc_established(int family, int sock_type,
584 				      const char *addr, __u16 port,
585 				      int *listen_socks, int listen_socks_len,
586 				      int *established_socks,
587 				      int established_socks_len,
588 				      struct sock_count *counts, int counts_len,
589 				      struct bpf_link *link, int iter_fd)
590 {
591 	/* Iterate through all sockets to trigger a realloc. */
592 	read_n(iter_fd, -1, counts, counts_len);
593 
594 	/* Make sure each socket was seen exactly once. */
595 	check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
596 			       counts, counts_len);
597 	check_n_were_seen_once(established_socks, established_socks_len,
598 			       established_socks_len, counts, counts_len);
599 }
600 
601 struct test_case {
602 	void (*test)(int family, int sock_type, const char *addr, __u16 port,
603 		     int *socks, int socks_len, int *established_socks,
604 		     int established_socks_len, struct sock_count *counts,
605 		     int counts_len, struct bpf_link *link, int iter_fd);
606 	const char *description;
607 	int ehash_buckets;
608 	int connections;
609 	int init_socks;
610 	int max_socks;
611 	int sock_type;
612 	int family;
613 };
614 
615 static struct test_case resume_tests[] = {
616 	{
617 		.description = "udp: resume after removing a seen socket",
618 		.init_socks = nr_soreuse,
619 		.max_socks = nr_soreuse,
620 		.sock_type = SOCK_DGRAM,
621 		.family = AF_INET6,
622 		.test = remove_seen,
623 	},
624 	{
625 		.description = "udp: resume after removing one unseen socket",
626 		.init_socks = nr_soreuse,
627 		.max_socks = nr_soreuse,
628 		.sock_type = SOCK_DGRAM,
629 		.family = AF_INET6,
630 		.test = remove_unseen,
631 	},
632 	{
633 		.description = "udp: resume after removing all unseen sockets",
634 		.init_socks = nr_soreuse,
635 		.max_socks = nr_soreuse,
636 		.sock_type = SOCK_DGRAM,
637 		.family = AF_INET6,
638 		.test = remove_all,
639 	},
640 	{
641 		.description = "udp: resume after adding a few sockets",
642 		.init_socks = nr_soreuse,
643 		.max_socks = nr_soreuse,
644 		.sock_type = SOCK_DGRAM,
645 		/* Use AF_INET so that new sockets are added to the head of the
646 		 * bucket's list.
647 		 */
648 		.family = AF_INET,
649 		.test = add_some,
650 	},
651 	{
652 		.description = "udp: force a realloc to occur",
653 		.init_socks = init_batch_size,
654 		.max_socks = init_batch_size * 2,
655 		.sock_type = SOCK_DGRAM,
656 		/* Use AF_INET6 so that new sockets are added to the tail of the
657 		 * bucket's list, needing to be added to the next batch to force
658 		 * a realloc.
659 		 */
660 		.family = AF_INET6,
661 		.test = force_realloc,
662 	},
663 	{
664 		.description = "tcp: resume after removing a seen socket (listening)",
665 		.init_socks = nr_soreuse,
666 		.max_socks = nr_soreuse,
667 		.sock_type = SOCK_STREAM,
668 		.family = AF_INET6,
669 		.test = remove_seen,
670 	},
671 	{
672 		.description = "tcp: resume after removing one unseen socket (listening)",
673 		.init_socks = nr_soreuse,
674 		.max_socks = nr_soreuse,
675 		.sock_type = SOCK_STREAM,
676 		.family = AF_INET6,
677 		.test = remove_unseen,
678 	},
679 	{
680 		.description = "tcp: resume after removing all unseen sockets (listening)",
681 		.init_socks = nr_soreuse,
682 		.max_socks = nr_soreuse,
683 		.sock_type = SOCK_STREAM,
684 		.family = AF_INET6,
685 		.test = remove_all,
686 	},
687 	{
688 		.description = "tcp: resume after adding a few sockets (listening)",
689 		.init_socks = nr_soreuse,
690 		.max_socks = nr_soreuse,
691 		.sock_type = SOCK_STREAM,
692 		/* Use AF_INET so that new sockets are added to the head of the
693 		 * bucket's list.
694 		 */
695 		.family = AF_INET,
696 		.test = add_some,
697 	},
698 	{
699 		.description = "tcp: force a realloc to occur (listening)",
700 		.init_socks = init_batch_size,
701 		.max_socks = init_batch_size * 2,
702 		.sock_type = SOCK_STREAM,
703 		/* Use AF_INET6 so that new sockets are added to the tail of the
704 		 * bucket's list, needing to be added to the next batch to force
705 		 * a realloc.
706 		 */
707 		.family = AF_INET6,
708 		.test = force_realloc,
709 	},
710 	{
711 		.description = "tcp: resume after removing a seen socket (established)",
712 		/* Force all established sockets into one bucket */
713 		.ehash_buckets = 1,
714 		.connections = nr_soreuse,
715 		.init_socks = nr_soreuse,
716 		/* Room for connect()ed and accept()ed sockets */
717 		.max_socks = nr_soreuse * 3,
718 		.sock_type = SOCK_STREAM,
719 		.family = AF_INET6,
720 		.test = remove_seen_established,
721 	},
722 	{
723 		.description = "tcp: resume after removing one unseen socket (established)",
724 		/* Force all established sockets into one bucket */
725 		.ehash_buckets = 1,
726 		.connections = nr_soreuse,
727 		.init_socks = nr_soreuse,
728 		/* Room for connect()ed and accept()ed sockets */
729 		.max_socks = nr_soreuse * 3,
730 		.sock_type = SOCK_STREAM,
731 		.family = AF_INET6,
732 		.test = remove_unseen_established,
733 	},
734 	{
735 		.description = "tcp: resume after removing all unseen sockets (established)",
736 		/* Force all established sockets into one bucket */
737 		.ehash_buckets = 1,
738 		.connections = nr_soreuse,
739 		.init_socks = nr_soreuse,
740 		/* Room for connect()ed and accept()ed sockets */
741 		.max_socks = nr_soreuse * 3,
742 		.sock_type = SOCK_STREAM,
743 		.family = AF_INET6,
744 		.test = remove_all_established,
745 	},
746 	{
747 		.description = "tcp: resume after adding a few sockets (established)",
748 		/* Force all established sockets into one bucket */
749 		.ehash_buckets = 1,
750 		.connections = nr_soreuse,
751 		.init_socks = nr_soreuse,
752 		/* Room for connect()ed and accept()ed sockets */
753 		.max_socks = nr_soreuse * 3,
754 		.sock_type = SOCK_STREAM,
755 		.family = AF_INET6,
756 		.test = add_some_established,
757 	},
758 	{
759 		.description = "tcp: force a realloc to occur (established)",
760 		/* Force all established sockets into one bucket */
761 		.ehash_buckets = 1,
762 		/* Bucket size will need to double when going from listening to
763 		 * established sockets.
764 		 */
765 		.connections = init_batch_size,
766 		.init_socks = nr_soreuse,
767 		/* Room for connect()ed and accept()ed sockets */
768 		.max_socks = nr_soreuse + (init_batch_size * 2),
769 		.sock_type = SOCK_STREAM,
770 		.family = AF_INET6,
771 		.test = force_realloc_established,
772 	},
773 };
774 
775 static void do_resume_test(struct test_case *tc)
776 {
777 	struct sock_iter_batch *skel = NULL;
778 	struct sock_count *counts = NULL;
779 	static const __u16 port = 10001;
780 	struct nstoken *nstoken = NULL;
781 	struct bpf_link *link = NULL;
782 	int *established_fds = NULL;
783 	int err, iter_fd = -1;
784 	const char *addr;
785 	int *fds = NULL;
786 
787 	if (tc->ehash_buckets) {
788 		SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
789 		SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d",
790 		    tc->ehash_buckets);
791 		SYS(done, "ip netns add %s", TEST_CHILD_NS);
792 		SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS);
793 		nstoken = open_netns(TEST_CHILD_NS);
794 		if (!ASSERT_OK_PTR(nstoken, "open_child_netns"))
795 			goto done;
796 	}
797 
798 	counts = calloc(tc->max_socks, sizeof(*counts));
799 	if (!ASSERT_OK_PTR(counts, "counts"))
800 		goto done;
801 	skel = sock_iter_batch__open();
802 	if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
803 		goto done;
804 
805 	/* Prepare a bucket of sockets in the kernel hashtable */
806 	addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1";
807 	fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0,
808 				     tc->init_socks);
809 	if (!ASSERT_OK_PTR(fds, "start_reuseport_server"))
810 		goto done;
811 	if (tc->connections) {
812 		established_fds = connect_to_server(tc->family, tc->sock_type,
813 						    addr, port,
814 						    tc->connections, fds,
815 						    tc->init_socks);
816 		if (!ASSERT_OK_PTR(established_fds, "connect_to_server"))
817 			goto done;
818 	}
819 	skel->rodata->ports[0] = 0;
820 	skel->rodata->ports[1] = 0;
821 	skel->rodata->sf = tc->family;
822 	skel->rodata->ss = 0;
823 
824 	err = sock_iter_batch__load(skel);
825 	if (!ASSERT_OK(err, "sock_iter_batch__load"))
826 		goto done;
827 
828 	link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ?
829 					skel->progs.iter_tcp_soreuse :
830 					skel->progs.iter_udp_soreuse,
831 					NULL);
832 	if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
833 		goto done;
834 
835 	iter_fd = bpf_iter_create(bpf_link__fd(link));
836 	if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
837 		goto done;
838 
839 	tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks,
840 		 established_fds, tc->connections*2, counts, tc->max_socks,
841 		 link, iter_fd);
842 done:
843 	close_netns(nstoken);
844 	SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
845 	SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0");
846 	free(counts);
847 	free_fds(fds, tc->init_socks);
848 	free_fds(established_fds, tc->connections*2);
849 	if (iter_fd >= 0)
850 		close(iter_fd);
851 	bpf_link__destroy(link);
852 	sock_iter_batch__destroy(skel);
853 }
854 
855 static void do_resume_tests(void)
856 {
857 	int i;
858 
859 	for (i = 0; i < ARRAY_SIZE(resume_tests); i++) {
860 		if (test__start_subtest(resume_tests[i].description)) {
861 			do_resume_test(&resume_tests[i]);
862 		}
863 	}
864 }
865 
866 static void do_test(int sock_type, bool onebyone)
867 {
868 	int err, i, nread, to_read, total_read, iter_fd = -1;
869 	struct iter_out outputs[nr_soreuse];
870 	struct bpf_link *link = NULL;
871 	struct sock_iter_batch *skel;
872 	int first_idx, second_idx;
873 	int *fds[2] = {};
874 
875 	skel = sock_iter_batch__open();
876 	if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
877 		return;
878 
879 	/* Prepare 2 buckets of sockets in the kernel hashtable */
880 	for (i = 0; i < ARRAY_SIZE(fds); i++) {
881 		int local_port;
882 
883 		fds[i] = start_reuseport_server(AF_INET6, sock_type, "::1", 0, 0,
884 						nr_soreuse);
885 		if (!ASSERT_OK_PTR(fds[i], "start_reuseport_server"))
886 			goto done;
887 		local_port = get_socket_local_port(*fds[i]);
888 		if (!ASSERT_GE(local_port, 0, "get_socket_local_port"))
889 			goto done;
890 		skel->rodata->ports[i] = ntohs(local_port);
891 	}
892 	skel->rodata->sf = AF_INET6;
893 	if (sock_type == SOCK_STREAM)
894 		skel->rodata->ss = TCP_LISTEN;
895 
896 	err = sock_iter_batch__load(skel);
897 	if (!ASSERT_OK(err, "sock_iter_batch__load"))
898 		goto done;
899 
900 	link = bpf_program__attach_iter(sock_type == SOCK_STREAM ?
901 					skel->progs.iter_tcp_soreuse :
902 					skel->progs.iter_udp_soreuse,
903 					NULL);
904 	if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
905 		goto done;
906 
907 	iter_fd = bpf_iter_create(bpf_link__fd(link));
908 	if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
909 		goto done;
910 
911 	/* Test reading a bucket (either from fds[0] or fds[1]).
912 	 * Only read "nr_soreuse - 1" number of sockets
913 	 * from a bucket and leave one socket out from
914 	 * that bucket on purpose.
915 	 */
916 	to_read = (nr_soreuse - 1) * sizeof(*outputs);
917 	total_read = 0;
918 	first_idx = -1;
919 	do {
920 		nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
921 		if (nread <= 0 || nread % sizeof(*outputs))
922 			break;
923 		total_read += nread;
924 
925 		if (first_idx == -1)
926 			first_idx = outputs[0].idx;
927 		for (i = 0; i < nread / sizeof(*outputs); i++)
928 			ASSERT_EQ(outputs[i].idx, first_idx, "first_idx");
929 	} while (total_read < to_read);
930 	ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread");
931 	ASSERT_EQ(total_read, to_read, "total_read");
932 
933 	free_fds(fds[first_idx], nr_soreuse);
934 	fds[first_idx] = NULL;
935 
936 	/* Read the "whole" second bucket */
937 	to_read = nr_soreuse * sizeof(*outputs);
938 	total_read = 0;
939 	second_idx = !first_idx;
940 	do {
941 		nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
942 		if (nread <= 0 || nread % sizeof(*outputs))
943 			break;
944 		total_read += nread;
945 
946 		for (i = 0; i < nread / sizeof(*outputs); i++)
947 			ASSERT_EQ(outputs[i].idx, second_idx, "second_idx");
948 	} while (total_read <= to_read);
949 	ASSERT_EQ(nread, 0, "nread");
950 	/* Both so_reuseport ports should be in different buckets, so
951 	 * total_read must equal to the expected to_read.
952 	 *
953 	 * For a very unlikely case, both ports collide at the same bucket,
954 	 * the bucket offset (i.e. 3) will be skipped and it cannot
955 	 * expect the to_read number of bytes.
956 	 */
957 	if (skel->bss->bucket[0] != skel->bss->bucket[1])
958 		ASSERT_EQ(total_read, to_read, "total_read");
959 
960 done:
961 	for (i = 0; i < ARRAY_SIZE(fds); i++)
962 		free_fds(fds[i], nr_soreuse);
963 	if (iter_fd < 0)
964 		close(iter_fd);
965 	bpf_link__destroy(link);
966 	sock_iter_batch__destroy(skel);
967 }
968 
969 void test_sock_iter_batch(void)
970 {
971 	struct nstoken *nstoken = NULL;
972 
973 	SYS_NOFAIL("ip netns del " TEST_NS);
974 	SYS(done, "ip netns add %s", TEST_NS);
975 	SYS(done, "ip -net %s link set dev lo up", TEST_NS);
976 
977 	nstoken = open_netns(TEST_NS);
978 	if (!ASSERT_OK_PTR(nstoken, "open_netns"))
979 		goto done;
980 
981 	if (test__start_subtest("tcp")) {
982 		do_test(SOCK_STREAM, true);
983 		do_test(SOCK_STREAM, false);
984 	}
985 	if (test__start_subtest("udp")) {
986 		do_test(SOCK_DGRAM, true);
987 		do_test(SOCK_DGRAM, false);
988 	}
989 	do_resume_tests();
990 	close_netns(nstoken);
991 
992 done:
993 	SYS_NOFAIL("ip netns del " TEST_NS);
994 }
995