1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2020 Cloudflare
3 /*
4  * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
5  * Covers:
6  *  1. BPF map operations - bpf_map_{update,lookup delete}_elem
7  *  2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
8  *  3. BPF reuseport helper - bpf_sk_select_reuseport
9  */
10 
11 #include <linux/compiler.h>
12 #include <errno.h>
13 #include <error.h>
14 #include <limits.h>
15 #include <netinet/in.h>
16 #include <pthread.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/select.h>
20 #include <unistd.h>
21 #include <linux/vm_sockets.h>
22 
23 #include <bpf/bpf.h>
24 #include <bpf/libbpf.h>
25 
26 #include "bpf_util.h"
27 #include "test_progs.h"
28 #include "test_sockmap_listen.skel.h"
29 
30 #include "sockmap_helpers.h"
31 
test_insert_invalid(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)32 static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
33 				int family, int sotype, int mapfd)
34 {
35 	u32 key = 0;
36 	u64 value;
37 	int err;
38 
39 	value = -1;
40 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
41 	if (!err || errno != EINVAL)
42 		FAIL_ERRNO("map_update: expected EINVAL");
43 
44 	value = INT_MAX;
45 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
46 	if (!err || errno != EBADF)
47 		FAIL_ERRNO("map_update: expected EBADF");
48 }
49 
test_insert_opened(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)50 static void test_insert_opened(struct test_sockmap_listen *skel __always_unused,
51 			       int family, int sotype, int mapfd)
52 {
53 	u32 key = 0;
54 	u64 value;
55 	int err, s;
56 
57 	s = xsocket(family, sotype, 0);
58 	if (s == -1)
59 		return;
60 
61 	errno = 0;
62 	value = s;
63 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
64 	if (sotype == SOCK_STREAM) {
65 		if (!err || errno != EOPNOTSUPP)
66 			FAIL_ERRNO("map_update: expected EOPNOTSUPP");
67 	} else if (err)
68 		FAIL_ERRNO("map_update: expected success");
69 	xclose(s);
70 }
71 
test_insert_bound(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)72 static void test_insert_bound(struct test_sockmap_listen *skel __always_unused,
73 			      int family, int sotype, int mapfd)
74 {
75 	struct sockaddr_storage addr;
76 	socklen_t len = 0;
77 	u32 key = 0;
78 	u64 value;
79 	int err, s;
80 
81 	init_addr_loopback(family, &addr, &len);
82 
83 	s = xsocket(family, sotype, 0);
84 	if (s == -1)
85 		return;
86 
87 	err = xbind(s, sockaddr(&addr), len);
88 	if (err)
89 		goto close;
90 
91 	errno = 0;
92 	value = s;
93 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
94 	if (!err || errno != EOPNOTSUPP)
95 		FAIL_ERRNO("map_update: expected EOPNOTSUPP");
96 close:
97 	xclose(s);
98 }
99 
test_insert(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)100 static void test_insert(struct test_sockmap_listen *skel __always_unused,
101 			int family, int sotype, int mapfd)
102 {
103 	u64 value;
104 	u32 key;
105 	int s;
106 
107 	s = socket_loopback(family, sotype);
108 	if (s < 0)
109 		return;
110 
111 	key = 0;
112 	value = s;
113 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
114 	xclose(s);
115 }
116 
test_delete_after_insert(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)117 static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused,
118 				     int family, int sotype, int mapfd)
119 {
120 	u64 value;
121 	u32 key;
122 	int s;
123 
124 	s = socket_loopback(family, sotype);
125 	if (s < 0)
126 		return;
127 
128 	key = 0;
129 	value = s;
130 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
131 	xbpf_map_delete_elem(mapfd, &key);
132 	xclose(s);
133 }
134 
test_delete_after_close(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)135 static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused,
136 				    int family, int sotype, int mapfd)
137 {
138 	int err, s;
139 	u64 value;
140 	u32 key;
141 
142 	s = socket_loopback(family, sotype);
143 	if (s < 0)
144 		return;
145 
146 	key = 0;
147 	value = s;
148 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
149 
150 	xclose(s);
151 
152 	errno = 0;
153 	err = bpf_map_delete_elem(mapfd, &key);
154 	if (!err || (errno != EINVAL && errno != ENOENT))
155 		/* SOCKMAP and SOCKHASH return different error codes */
156 		FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
157 }
158 
test_lookup_after_insert(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)159 static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused,
160 				     int family, int sotype, int mapfd)
161 {
162 	u64 cookie, value;
163 	socklen_t len;
164 	u32 key;
165 	int s;
166 
167 	s = socket_loopback(family, sotype);
168 	if (s < 0)
169 		return;
170 
171 	key = 0;
172 	value = s;
173 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
174 
175 	len = sizeof(cookie);
176 	xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
177 
178 	xbpf_map_lookup_elem(mapfd, &key, &value);
179 
180 	if (value != cookie) {
181 		FAIL("map_lookup: have %#llx, want %#llx",
182 		     (unsigned long long)value, (unsigned long long)cookie);
183 	}
184 
185 	xclose(s);
186 }
187 
test_lookup_after_delete(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)188 static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused,
189 				     int family, int sotype, int mapfd)
190 {
191 	int err, s;
192 	u64 value;
193 	u32 key;
194 
195 	s = socket_loopback(family, sotype);
196 	if (s < 0)
197 		return;
198 
199 	key = 0;
200 	value = s;
201 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
202 	xbpf_map_delete_elem(mapfd, &key);
203 
204 	errno = 0;
205 	err = bpf_map_lookup_elem(mapfd, &key, &value);
206 	if (!err || errno != ENOENT)
207 		FAIL_ERRNO("map_lookup: expected ENOENT");
208 
209 	xclose(s);
210 }
211 
test_lookup_32_bit_value(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)212 static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused,
213 				     int family, int sotype, int mapfd)
214 {
215 	u32 key, value32;
216 	int err, s;
217 
218 	s = socket_loopback(family, sotype);
219 	if (s < 0)
220 		return;
221 
222 	mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key),
223 			       sizeof(value32), 1, NULL);
224 	if (mapfd < 0) {
225 		FAIL_ERRNO("map_create");
226 		goto close;
227 	}
228 
229 	key = 0;
230 	value32 = s;
231 	xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
232 
233 	errno = 0;
234 	err = bpf_map_lookup_elem(mapfd, &key, &value32);
235 	if (!err || errno != ENOSPC)
236 		FAIL_ERRNO("map_lookup: expected ENOSPC");
237 
238 	xclose(mapfd);
239 close:
240 	xclose(s);
241 }
242 
test_update_existing(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)243 static void test_update_existing(struct test_sockmap_listen *skel __always_unused,
244 				 int family, int sotype, int mapfd)
245 {
246 	int s1, s2;
247 	u64 value;
248 	u32 key;
249 
250 	s1 = socket_loopback(family, sotype);
251 	if (s1 < 0)
252 		return;
253 
254 	s2 = socket_loopback(family, sotype);
255 	if (s2 < 0)
256 		goto close_s1;
257 
258 	key = 0;
259 	value = s1;
260 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
261 
262 	value = s2;
263 	xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
264 	xclose(s2);
265 close_s1:
266 	xclose(s1);
267 }
268 
269 /* Exercise the code path where we destroy child sockets that never
270  * got accept()'ed, aka orphans, when parent socket gets closed.
271  */
do_destroy_orphan_child(int family,int sotype,int mapfd)272 static void do_destroy_orphan_child(int family, int sotype, int mapfd)
273 {
274 	struct sockaddr_storage addr;
275 	socklen_t len;
276 	int err, s, c;
277 	u64 value;
278 	u32 key;
279 
280 	s = socket_loopback(family, sotype);
281 	if (s < 0)
282 		return;
283 
284 	len = sizeof(addr);
285 	err = xgetsockname(s, sockaddr(&addr), &len);
286 	if (err)
287 		goto close_srv;
288 
289 	key = 0;
290 	value = s;
291 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
292 
293 	c = xsocket(family, sotype, 0);
294 	if (c == -1)
295 		goto close_srv;
296 
297 	xconnect(c, sockaddr(&addr), len);
298 	xclose(c);
299 close_srv:
300 	xclose(s);
301 }
302 
test_destroy_orphan_child(struct test_sockmap_listen * skel,int family,int sotype,int mapfd)303 static void test_destroy_orphan_child(struct test_sockmap_listen *skel,
304 				      int family, int sotype, int mapfd)
305 {
306 	int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
307 	int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
308 	const struct test {
309 		int progfd;
310 		enum bpf_attach_type atype;
311 	} tests[] = {
312 		{ -1, -1 },
313 		{ msg_verdict, BPF_SK_MSG_VERDICT },
314 		{ skb_verdict, BPF_SK_SKB_VERDICT },
315 	};
316 	const struct test *t;
317 
318 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
319 		if (t->progfd != -1 &&
320 		    xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0)
321 			return;
322 
323 		do_destroy_orphan_child(family, sotype, mapfd);
324 
325 		if (t->progfd != -1)
326 			xbpf_prog_detach2(t->progfd, mapfd, t->atype);
327 	}
328 }
329 
330 /* Perform a passive open after removing listening socket from SOCKMAP
331  * to ensure that callbacks get restored properly.
332  */
test_clone_after_delete(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)333 static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused,
334 				    int family, int sotype, int mapfd)
335 {
336 	struct sockaddr_storage addr;
337 	socklen_t len;
338 	int err, s, c;
339 	u64 value;
340 	u32 key;
341 
342 	s = socket_loopback(family, sotype);
343 	if (s < 0)
344 		return;
345 
346 	len = sizeof(addr);
347 	err = xgetsockname(s, sockaddr(&addr), &len);
348 	if (err)
349 		goto close_srv;
350 
351 	key = 0;
352 	value = s;
353 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
354 	xbpf_map_delete_elem(mapfd, &key);
355 
356 	c = xsocket(family, sotype, 0);
357 	if (c < 0)
358 		goto close_srv;
359 
360 	xconnect(c, sockaddr(&addr), len);
361 	xclose(c);
362 close_srv:
363 	xclose(s);
364 }
365 
366 /* Check that child socket that got created while parent was in a
367  * SOCKMAP, but got accept()'ed only after the parent has been removed
368  * from SOCKMAP, gets cloned without parent psock state or callbacks.
369  */
test_accept_after_delete(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)370 static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused,
371 				     int family, int sotype, int mapfd)
372 {
373 	struct sockaddr_storage addr;
374 	const u32 zero = 0;
375 	int err, s, c, p;
376 	socklen_t len;
377 	u64 value;
378 
379 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
380 	if (s == -1)
381 		return;
382 
383 	len = sizeof(addr);
384 	err = xgetsockname(s, sockaddr(&addr), &len);
385 	if (err)
386 		goto close_srv;
387 
388 	value = s;
389 	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
390 	if (err)
391 		goto close_srv;
392 
393 	c = xsocket(family, sotype, 0);
394 	if (c == -1)
395 		goto close_srv;
396 
397 	/* Create child while parent is in sockmap */
398 	err = xconnect(c, sockaddr(&addr), len);
399 	if (err)
400 		goto close_cli;
401 
402 	/* Remove parent from sockmap */
403 	err = xbpf_map_delete_elem(mapfd, &zero);
404 	if (err)
405 		goto close_cli;
406 
407 	p = xaccept_nonblock(s, NULL, NULL);
408 	if (p == -1)
409 		goto close_cli;
410 
411 	/* Check that child sk_user_data is not set */
412 	value = p;
413 	xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
414 
415 	xclose(p);
416 close_cli:
417 	xclose(c);
418 close_srv:
419 	xclose(s);
420 }
421 
422 /* Check that child socket that got created and accepted while parent
423  * was in a SOCKMAP is cloned without parent psock state or callbacks.
424  */
test_accept_before_delete(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)425 static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused,
426 				      int family, int sotype, int mapfd)
427 {
428 	struct sockaddr_storage addr;
429 	const u32 zero = 0, one = 1;
430 	int err, s, c, p;
431 	socklen_t len;
432 	u64 value;
433 
434 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
435 	if (s == -1)
436 		return;
437 
438 	len = sizeof(addr);
439 	err = xgetsockname(s, sockaddr(&addr), &len);
440 	if (err)
441 		goto close_srv;
442 
443 	value = s;
444 	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
445 	if (err)
446 		goto close_srv;
447 
448 	c = xsocket(family, sotype, 0);
449 	if (c == -1)
450 		goto close_srv;
451 
452 	/* Create & accept child while parent is in sockmap */
453 	err = xconnect(c, sockaddr(&addr), len);
454 	if (err)
455 		goto close_cli;
456 
457 	p = xaccept_nonblock(s, NULL, NULL);
458 	if (p == -1)
459 		goto close_cli;
460 
461 	/* Check that child sk_user_data is not set */
462 	value = p;
463 	xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
464 
465 	xclose(p);
466 close_cli:
467 	xclose(c);
468 close_srv:
469 	xclose(s);
470 }
471 
472 struct connect_accept_ctx {
473 	int sockfd;
474 	unsigned int done;
475 	unsigned int nr_iter;
476 };
477 
is_thread_done(struct connect_accept_ctx * ctx)478 static bool is_thread_done(struct connect_accept_ctx *ctx)
479 {
480 	return READ_ONCE(ctx->done);
481 }
482 
connect_accept_thread(void * arg)483 static void *connect_accept_thread(void *arg)
484 {
485 	struct connect_accept_ctx *ctx = arg;
486 	struct sockaddr_storage addr;
487 	int family, socktype;
488 	socklen_t len;
489 	int err, i, s;
490 
491 	s = ctx->sockfd;
492 
493 	len = sizeof(addr);
494 	err = xgetsockname(s, sockaddr(&addr), &len);
495 	if (err)
496 		goto done;
497 
498 	len = sizeof(family);
499 	err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
500 	if (err)
501 		goto done;
502 
503 	len = sizeof(socktype);
504 	err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
505 	if (err)
506 		goto done;
507 
508 	for (i = 0; i < ctx->nr_iter; i++) {
509 		int c, p;
510 
511 		c = xsocket(family, socktype, 0);
512 		if (c < 0)
513 			break;
514 
515 		err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
516 		if (err) {
517 			xclose(c);
518 			break;
519 		}
520 
521 		p = xaccept_nonblock(s, NULL, NULL);
522 		if (p < 0) {
523 			xclose(c);
524 			break;
525 		}
526 
527 		xclose(p);
528 		xclose(c);
529 	}
530 done:
531 	WRITE_ONCE(ctx->done, 1);
532 	return NULL;
533 }
534 
test_syn_recv_insert_delete(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)535 static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused,
536 					int family, int sotype, int mapfd)
537 {
538 	struct connect_accept_ctx ctx = { 0 };
539 	struct sockaddr_storage addr;
540 	socklen_t len;
541 	u32 zero = 0;
542 	pthread_t t;
543 	int err, s;
544 	u64 value;
545 
546 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
547 	if (s < 0)
548 		return;
549 
550 	len = sizeof(addr);
551 	err = xgetsockname(s, sockaddr(&addr), &len);
552 	if (err)
553 		goto close;
554 
555 	ctx.sockfd = s;
556 	ctx.nr_iter = 1000;
557 
558 	err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
559 	if (err)
560 		goto close;
561 
562 	value = s;
563 	while (!is_thread_done(&ctx)) {
564 		err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
565 		if (err)
566 			break;
567 
568 		err = xbpf_map_delete_elem(mapfd, &zero);
569 		if (err)
570 			break;
571 	}
572 
573 	xpthread_join(t, NULL);
574 close:
575 	xclose(s);
576 }
577 
listen_thread(void * arg)578 static void *listen_thread(void *arg)
579 {
580 	struct sockaddr unspec = { AF_UNSPEC };
581 	struct connect_accept_ctx *ctx = arg;
582 	int err, i, s;
583 
584 	s = ctx->sockfd;
585 
586 	for (i = 0; i < ctx->nr_iter; i++) {
587 		err = xlisten(s, 1);
588 		if (err)
589 			break;
590 		err = xconnect(s, &unspec, sizeof(unspec));
591 		if (err)
592 			break;
593 	}
594 
595 	WRITE_ONCE(ctx->done, 1);
596 	return NULL;
597 }
598 
test_race_insert_listen(struct test_sockmap_listen * skel __always_unused,int family,int socktype,int mapfd)599 static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused,
600 				    int family, int socktype, int mapfd)
601 {
602 	struct connect_accept_ctx ctx = { 0 };
603 	const u32 zero = 0;
604 	const int one = 1;
605 	pthread_t t;
606 	int err, s;
607 	u64 value;
608 
609 	s = xsocket(family, socktype, 0);
610 	if (s < 0)
611 		return;
612 
613 	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
614 	if (err)
615 		goto close;
616 
617 	ctx.sockfd = s;
618 	ctx.nr_iter = 10000;
619 
620 	err = pthread_create(&t, NULL, listen_thread, &ctx);
621 	if (err)
622 		goto close;
623 
624 	value = s;
625 	while (!is_thread_done(&ctx)) {
626 		err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
627 		/* Expecting EOPNOTSUPP before listen() */
628 		if (err && errno != EOPNOTSUPP) {
629 			FAIL_ERRNO("map_update");
630 			break;
631 		}
632 
633 		err = bpf_map_delete_elem(mapfd, &zero);
634 		/* Expecting no entry after unhash on connect(AF_UNSPEC) */
635 		if (err && errno != EINVAL && errno != ENOENT) {
636 			FAIL_ERRNO("map_delete");
637 			break;
638 		}
639 	}
640 
641 	xpthread_join(t, NULL);
642 close:
643 	xclose(s);
644 }
645 
zero_verdict_count(int mapfd)646 static void zero_verdict_count(int mapfd)
647 {
648 	unsigned int zero = 0;
649 	int key;
650 
651 	key = SK_DROP;
652 	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
653 	key = SK_PASS;
654 	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
655 }
656 
657 enum redir_mode {
658 	REDIR_INGRESS,
659 	REDIR_EGRESS,
660 };
661 
redir_mode_str(enum redir_mode mode)662 static const char *redir_mode_str(enum redir_mode mode)
663 {
664 	switch (mode) {
665 	case REDIR_INGRESS:
666 		return "ingress";
667 	case REDIR_EGRESS:
668 		return "egress";
669 	default:
670 		return "unknown";
671 	}
672 }
673 
redir_to_connected(int family,int sotype,int sock_mapfd,int verd_mapfd,enum redir_mode mode)674 static void redir_to_connected(int family, int sotype, int sock_mapfd,
675 			       int verd_mapfd, enum redir_mode mode)
676 {
677 	const char *log_prefix = redir_mode_str(mode);
678 	int s, c0, c1, p0, p1;
679 	unsigned int pass;
680 	int err, n;
681 	u32 key;
682 	char b;
683 
684 	zero_verdict_count(verd_mapfd);
685 
686 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
687 	if (s < 0)
688 		return;
689 
690 	err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
691 	if (err)
692 		goto close_srv;
693 
694 	err = add_to_sockmap(sock_mapfd, p0, p1);
695 	if (err)
696 		goto close;
697 
698 	n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
699 	if (n < 0)
700 		FAIL_ERRNO("%s: write", log_prefix);
701 	if (n == 0)
702 		FAIL("%s: incomplete write", log_prefix);
703 	if (n < 1)
704 		goto close;
705 
706 	key = SK_PASS;
707 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
708 	if (err)
709 		goto close;
710 	if (pass != 1)
711 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
712 	n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
713 	if (n < 0)
714 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
715 	if (n == 0)
716 		FAIL("%s: incomplete recv", log_prefix);
717 
718 close:
719 	xclose(p1);
720 	xclose(c1);
721 	xclose(p0);
722 	xclose(c0);
723 close_srv:
724 	xclose(s);
725 }
726 
test_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)727 static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
728 					struct bpf_map *inner_map, int family,
729 					int sotype)
730 {
731 	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
732 	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
733 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
734 	int sock_map = bpf_map__fd(inner_map);
735 	int err;
736 
737 	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
738 	if (err)
739 		return;
740 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
741 	if (err)
742 		goto detach;
743 
744 	redir_to_connected(family, sotype, sock_map, verdict_map,
745 			   REDIR_INGRESS);
746 
747 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
748 detach:
749 	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
750 }
751 
test_msg_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)752 static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
753 					struct bpf_map *inner_map, int family,
754 					int sotype)
755 {
756 	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
757 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
758 	int sock_map = bpf_map__fd(inner_map);
759 	int err;
760 
761 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
762 	if (err)
763 		return;
764 
765 	redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
766 
767 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
768 }
769 
redir_to_listening(int family,int sotype,int sock_mapfd,int verd_mapfd,enum redir_mode mode)770 static void redir_to_listening(int family, int sotype, int sock_mapfd,
771 			       int verd_mapfd, enum redir_mode mode)
772 {
773 	const char *log_prefix = redir_mode_str(mode);
774 	struct sockaddr_storage addr;
775 	int s, c, p, err, n;
776 	unsigned int drop;
777 	socklen_t len;
778 	u32 key;
779 
780 	zero_verdict_count(verd_mapfd);
781 
782 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
783 	if (s < 0)
784 		return;
785 
786 	len = sizeof(addr);
787 	err = xgetsockname(s, sockaddr(&addr), &len);
788 	if (err)
789 		goto close_srv;
790 
791 	c = xsocket(family, sotype, 0);
792 	if (c < 0)
793 		goto close_srv;
794 	err = xconnect(c, sockaddr(&addr), len);
795 	if (err)
796 		goto close_cli;
797 
798 	p = xaccept_nonblock(s, NULL, NULL);
799 	if (p < 0)
800 		goto close_cli;
801 
802 	err = add_to_sockmap(sock_mapfd, s, p);
803 	if (err)
804 		goto close_peer;
805 
806 	n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
807 	if (n < 0 && errno != EACCES)
808 		FAIL_ERRNO("%s: write", log_prefix);
809 	if (n == 0)
810 		FAIL("%s: incomplete write", log_prefix);
811 	if (n < 1)
812 		goto close_peer;
813 
814 	key = SK_DROP;
815 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
816 	if (err)
817 		goto close_peer;
818 	if (drop != 1)
819 		FAIL("%s: want drop count 1, have %d", log_prefix, drop);
820 
821 close_peer:
822 	xclose(p);
823 close_cli:
824 	xclose(c);
825 close_srv:
826 	xclose(s);
827 }
828 
test_skb_redir_to_listening(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)829 static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
830 					struct bpf_map *inner_map, int family,
831 					int sotype)
832 {
833 	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
834 	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
835 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
836 	int sock_map = bpf_map__fd(inner_map);
837 	int err;
838 
839 	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
840 	if (err)
841 		return;
842 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
843 	if (err)
844 		goto detach;
845 
846 	redir_to_listening(family, sotype, sock_map, verdict_map,
847 			   REDIR_INGRESS);
848 
849 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
850 detach:
851 	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
852 }
853 
test_msg_redir_to_listening(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)854 static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
855 					struct bpf_map *inner_map, int family,
856 					int sotype)
857 {
858 	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
859 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
860 	int sock_map = bpf_map__fd(inner_map);
861 	int err;
862 
863 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
864 	if (err)
865 		return;
866 
867 	redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
868 
869 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
870 }
871 
redir_partial(int family,int sotype,int sock_map,int parser_map)872 static void redir_partial(int family, int sotype, int sock_map, int parser_map)
873 {
874 	int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
875 	int err, n, key, value;
876 	char buf[] = "abc";
877 
878 	key = 0;
879 	value = sizeof(buf) - 1;
880 	err = xbpf_map_update_elem(parser_map, &key, &value, 0);
881 	if (err)
882 		return;
883 
884 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
885 	if (s < 0)
886 		goto clean_parser_map;
887 
888 	err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
889 	if (err)
890 		goto close_srv;
891 
892 	err = add_to_sockmap(sock_map, p0, p1);
893 	if (err)
894 		goto close;
895 
896 	n = xsend(c1, buf, sizeof(buf), 0);
897 	if (n < sizeof(buf))
898 		FAIL("incomplete write");
899 
900 	n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
901 	if (n != sizeof(buf) - 1)
902 		FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
903 
904 close:
905 	xclose(c0);
906 	xclose(p0);
907 	xclose(c1);
908 	xclose(p1);
909 close_srv:
910 	xclose(s);
911 
912 clean_parser_map:
913 	key = 0;
914 	value = 0;
915 	xbpf_map_update_elem(parser_map, &key, &value, 0);
916 }
917 
test_skb_redir_partial(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)918 static void test_skb_redir_partial(struct test_sockmap_listen *skel,
919 				   struct bpf_map *inner_map, int family,
920 				   int sotype)
921 {
922 	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
923 	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
924 	int parser_map = bpf_map__fd(skel->maps.parser_map);
925 	int sock_map = bpf_map__fd(inner_map);
926 	int err;
927 
928 	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
929 	if (err)
930 		return;
931 
932 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
933 	if (err)
934 		goto detach;
935 
936 	redir_partial(family, sotype, sock_map, parser_map);
937 
938 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
939 detach:
940 	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
941 }
942 
test_reuseport_select_listening(int family,int sotype,int sock_map,int verd_map,int reuseport_prog)943 static void test_reuseport_select_listening(int family, int sotype,
944 					    int sock_map, int verd_map,
945 					    int reuseport_prog)
946 {
947 	struct sockaddr_storage addr;
948 	unsigned int pass;
949 	int s, c, err;
950 	socklen_t len;
951 	u64 value;
952 	u32 key;
953 
954 	zero_verdict_count(verd_map);
955 
956 	s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
957 				      reuseport_prog);
958 	if (s < 0)
959 		return;
960 
961 	len = sizeof(addr);
962 	err = xgetsockname(s, sockaddr(&addr), &len);
963 	if (err)
964 		goto close_srv;
965 
966 	key = 0;
967 	value = s;
968 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
969 	if (err)
970 		goto close_srv;
971 
972 	c = xsocket(family, sotype, 0);
973 	if (c < 0)
974 		goto close_srv;
975 	err = xconnect(c, sockaddr(&addr), len);
976 	if (err)
977 		goto close_cli;
978 
979 	if (sotype == SOCK_STREAM) {
980 		int p;
981 
982 		p = xaccept_nonblock(s, NULL, NULL);
983 		if (p < 0)
984 			goto close_cli;
985 		xclose(p);
986 	} else {
987 		char b = 'a';
988 		ssize_t n;
989 
990 		n = xsend(c, &b, sizeof(b), 0);
991 		if (n == -1)
992 			goto close_cli;
993 
994 		n = xrecv_nonblock(s, &b, sizeof(b), 0);
995 		if (n == -1)
996 			goto close_cli;
997 	}
998 
999 	key = SK_PASS;
1000 	err = xbpf_map_lookup_elem(verd_map, &key, &pass);
1001 	if (err)
1002 		goto close_cli;
1003 	if (pass != 1)
1004 		FAIL("want pass count 1, have %d", pass);
1005 
1006 close_cli:
1007 	xclose(c);
1008 close_srv:
1009 	xclose(s);
1010 }
1011 
test_reuseport_select_connected(int family,int sotype,int sock_map,int verd_map,int reuseport_prog)1012 static void test_reuseport_select_connected(int family, int sotype,
1013 					    int sock_map, int verd_map,
1014 					    int reuseport_prog)
1015 {
1016 	struct sockaddr_storage addr;
1017 	int s, c0, c1, p0, err;
1018 	unsigned int drop;
1019 	socklen_t len;
1020 	u64 value;
1021 	u32 key;
1022 
1023 	zero_verdict_count(verd_map);
1024 
1025 	s = socket_loopback_reuseport(family, sotype, reuseport_prog);
1026 	if (s < 0)
1027 		return;
1028 
1029 	/* Populate sock_map[0] to avoid ENOENT on first connection */
1030 	key = 0;
1031 	value = s;
1032 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1033 	if (err)
1034 		goto close_srv;
1035 
1036 	len = sizeof(addr);
1037 	err = xgetsockname(s, sockaddr(&addr), &len);
1038 	if (err)
1039 		goto close_srv;
1040 
1041 	c0 = xsocket(family, sotype, 0);
1042 	if (c0 < 0)
1043 		goto close_srv;
1044 
1045 	err = xconnect(c0, sockaddr(&addr), len);
1046 	if (err)
1047 		goto close_cli0;
1048 
1049 	if (sotype == SOCK_STREAM) {
1050 		p0 = xaccept_nonblock(s, NULL, NULL);
1051 		if (p0 < 0)
1052 			goto close_cli0;
1053 	} else {
1054 		p0 = xsocket(family, sotype, 0);
1055 		if (p0 < 0)
1056 			goto close_cli0;
1057 
1058 		len = sizeof(addr);
1059 		err = xgetsockname(c0, sockaddr(&addr), &len);
1060 		if (err)
1061 			goto close_cli0;
1062 
1063 		err = xconnect(p0, sockaddr(&addr), len);
1064 		if (err)
1065 			goto close_cli0;
1066 	}
1067 
1068 	/* Update sock_map[0] to redirect to a connected socket */
1069 	key = 0;
1070 	value = p0;
1071 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
1072 	if (err)
1073 		goto close_peer0;
1074 
1075 	c1 = xsocket(family, sotype, 0);
1076 	if (c1 < 0)
1077 		goto close_peer0;
1078 
1079 	len = sizeof(addr);
1080 	err = xgetsockname(s, sockaddr(&addr), &len);
1081 	if (err)
1082 		goto close_srv;
1083 
1084 	errno = 0;
1085 	err = connect(c1, sockaddr(&addr), len);
1086 	if (sotype == SOCK_DGRAM) {
1087 		char b = 'a';
1088 		ssize_t n;
1089 
1090 		n = xsend(c1, &b, sizeof(b), 0);
1091 		if (n == -1)
1092 			goto close_cli1;
1093 
1094 		n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1095 		err = n == -1;
1096 	}
1097 	if (!err || errno != ECONNREFUSED)
1098 		FAIL_ERRNO("connect: expected ECONNREFUSED");
1099 
1100 	key = SK_DROP;
1101 	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1102 	if (err)
1103 		goto close_cli1;
1104 	if (drop != 1)
1105 		FAIL("want drop count 1, have %d", drop);
1106 
1107 close_cli1:
1108 	xclose(c1);
1109 close_peer0:
1110 	xclose(p0);
1111 close_cli0:
1112 	xclose(c0);
1113 close_srv:
1114 	xclose(s);
1115 }
1116 
1117 /* Check that redirecting across reuseport groups is not allowed. */
test_reuseport_mixed_groups(int family,int sotype,int sock_map,int verd_map,int reuseport_prog)1118 static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
1119 					int verd_map, int reuseport_prog)
1120 {
1121 	struct sockaddr_storage addr;
1122 	int s1, s2, c, err;
1123 	unsigned int drop;
1124 	socklen_t len;
1125 	u32 key;
1126 
1127 	zero_verdict_count(verd_map);
1128 
1129 	/* Create two listeners, each in its own reuseport group */
1130 	s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1131 	if (s1 < 0)
1132 		return;
1133 
1134 	s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1135 	if (s2 < 0)
1136 		goto close_srv1;
1137 
1138 	err = add_to_sockmap(sock_map, s1, s2);
1139 	if (err)
1140 		goto close_srv2;
1141 
1142 	/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
1143 	len = sizeof(addr);
1144 	err = xgetsockname(s2, sockaddr(&addr), &len);
1145 	if (err)
1146 		goto close_srv2;
1147 
1148 	c = xsocket(family, sotype, 0);
1149 	if (c < 0)
1150 		goto close_srv2;
1151 
1152 	err = connect(c, sockaddr(&addr), len);
1153 	if (sotype == SOCK_DGRAM) {
1154 		char b = 'a';
1155 		ssize_t n;
1156 
1157 		n = xsend(c, &b, sizeof(b), 0);
1158 		if (n == -1)
1159 			goto close_cli;
1160 
1161 		n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1162 		err = n == -1;
1163 	}
1164 	if (!err || errno != ECONNREFUSED) {
1165 		FAIL_ERRNO("connect: expected ECONNREFUSED");
1166 		goto close_cli;
1167 	}
1168 
1169 	/* Expect drop, can't redirect outside of reuseport group */
1170 	key = SK_DROP;
1171 	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1172 	if (err)
1173 		goto close_cli;
1174 	if (drop != 1)
1175 		FAIL("want drop count 1, have %d", drop);
1176 
1177 close_cli:
1178 	xclose(c);
1179 close_srv2:
1180 	xclose(s2);
1181 close_srv1:
1182 	xclose(s1);
1183 }
1184 
1185 #define TEST(fn, ...)                                                          \
1186 	{                                                                      \
1187 		fn, #fn, __VA_ARGS__                                           \
1188 	}
1189 
test_ops_cleanup(const struct bpf_map * map)1190 static void test_ops_cleanup(const struct bpf_map *map)
1191 {
1192 	int err, mapfd;
1193 	u32 key;
1194 
1195 	mapfd = bpf_map__fd(map);
1196 
1197 	for (key = 0; key < bpf_map__max_entries(map); key++) {
1198 		err = bpf_map_delete_elem(mapfd, &key);
1199 		if (err && errno != EINVAL && errno != ENOENT)
1200 			FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
1201 	}
1202 }
1203 
family_str(sa_family_t family)1204 static const char *family_str(sa_family_t family)
1205 {
1206 	switch (family) {
1207 	case AF_INET:
1208 		return "IPv4";
1209 	case AF_INET6:
1210 		return "IPv6";
1211 	case AF_UNIX:
1212 		return "Unix";
1213 	case AF_VSOCK:
1214 		return "VSOCK";
1215 	default:
1216 		return "unknown";
1217 	}
1218 }
1219 
map_type_str(const struct bpf_map * map)1220 static const char *map_type_str(const struct bpf_map *map)
1221 {
1222 	int type;
1223 
1224 	if (!map)
1225 		return "invalid";
1226 	type = bpf_map__type(map);
1227 
1228 	switch (type) {
1229 	case BPF_MAP_TYPE_SOCKMAP:
1230 		return "sockmap";
1231 	case BPF_MAP_TYPE_SOCKHASH:
1232 		return "sockhash";
1233 	default:
1234 		return "unknown";
1235 	}
1236 }
1237 
sotype_str(int sotype)1238 static const char *sotype_str(int sotype)
1239 {
1240 	switch (sotype) {
1241 	case SOCK_DGRAM:
1242 		return "UDP";
1243 	case SOCK_STREAM:
1244 		return "TCP";
1245 	default:
1246 		return "unknown";
1247 	}
1248 }
1249 
test_ops(struct test_sockmap_listen * skel,struct bpf_map * map,int family,int sotype)1250 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
1251 		     int family, int sotype)
1252 {
1253 	const struct op_test {
1254 		void (*fn)(struct test_sockmap_listen *skel,
1255 			   int family, int sotype, int mapfd);
1256 		const char *name;
1257 		int sotype;
1258 	} tests[] = {
1259 		/* insert */
1260 		TEST(test_insert_invalid),
1261 		TEST(test_insert_opened),
1262 		TEST(test_insert_bound, SOCK_STREAM),
1263 		TEST(test_insert),
1264 		/* delete */
1265 		TEST(test_delete_after_insert),
1266 		TEST(test_delete_after_close),
1267 		/* lookup */
1268 		TEST(test_lookup_after_insert),
1269 		TEST(test_lookup_after_delete),
1270 		TEST(test_lookup_32_bit_value),
1271 		/* update */
1272 		TEST(test_update_existing),
1273 		/* races with insert/delete */
1274 		TEST(test_destroy_orphan_child, SOCK_STREAM),
1275 		TEST(test_syn_recv_insert_delete, SOCK_STREAM),
1276 		TEST(test_race_insert_listen, SOCK_STREAM),
1277 		/* child clone */
1278 		TEST(test_clone_after_delete, SOCK_STREAM),
1279 		TEST(test_accept_after_delete, SOCK_STREAM),
1280 		TEST(test_accept_before_delete, SOCK_STREAM),
1281 	};
1282 	const char *family_name, *map_name, *sotype_name;
1283 	const struct op_test *t;
1284 	char s[MAX_TEST_NAME];
1285 	int map_fd;
1286 
1287 	family_name = family_str(family);
1288 	map_name = map_type_str(map);
1289 	sotype_name = sotype_str(sotype);
1290 	map_fd = bpf_map__fd(map);
1291 
1292 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1293 		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1294 			 sotype_name, t->name);
1295 
1296 		if (t->sotype != 0 && t->sotype != sotype)
1297 			continue;
1298 
1299 		if (!test__start_subtest(s))
1300 			continue;
1301 
1302 		t->fn(skel, family, sotype, map_fd);
1303 		test_ops_cleanup(map);
1304 	}
1305 }
1306 
test_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int family,int sotype)1307 static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1308 		       int family, int sotype)
1309 {
1310 	const struct redir_test {
1311 		void (*fn)(struct test_sockmap_listen *skel,
1312 			   struct bpf_map *map, int family, int sotype);
1313 		const char *name;
1314 	} tests[] = {
1315 		TEST(test_skb_redir_to_connected),
1316 		TEST(test_skb_redir_to_listening),
1317 		TEST(test_skb_redir_partial),
1318 		TEST(test_msg_redir_to_connected),
1319 		TEST(test_msg_redir_to_listening),
1320 	};
1321 	const char *family_name, *map_name;
1322 	const struct redir_test *t;
1323 	char s[MAX_TEST_NAME];
1324 
1325 	family_name = family_str(family);
1326 	map_name = map_type_str(map);
1327 
1328 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1329 		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
1330 			 t->name);
1331 
1332 		if (!test__start_subtest(s))
1333 			continue;
1334 
1335 		t->fn(skel, map, family, sotype);
1336 	}
1337 }
1338 
pairs_redir_to_connected(int cli0,int peer0,int cli1,int peer1,int sock_mapfd,int nop_mapfd,int verd_mapfd,enum redir_mode mode)1339 static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
1340 				     int sock_mapfd, int nop_mapfd,
1341 				     int verd_mapfd, enum redir_mode mode)
1342 {
1343 	const char *log_prefix = redir_mode_str(mode);
1344 	unsigned int pass;
1345 	int err, n;
1346 	u32 key;
1347 	char b;
1348 
1349 	zero_verdict_count(verd_mapfd);
1350 
1351 	err = add_to_sockmap(sock_mapfd, peer0, peer1);
1352 	if (err)
1353 		return;
1354 
1355 	if (nop_mapfd >= 0) {
1356 		err = add_to_sockmap(nop_mapfd, cli0, cli1);
1357 		if (err)
1358 			return;
1359 	}
1360 
1361 	n = write(cli1, "a", 1);
1362 	if (n < 0)
1363 		FAIL_ERRNO("%s: write", log_prefix);
1364 	if (n == 0)
1365 		FAIL("%s: incomplete write", log_prefix);
1366 	if (n < 1)
1367 		return;
1368 
1369 	key = SK_PASS;
1370 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1371 	if (err)
1372 		return;
1373 	if (pass != 1)
1374 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1375 
1376 	n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
1377 	if (n < 0)
1378 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
1379 	if (n == 0)
1380 		FAIL("%s: incomplete recv", log_prefix);
1381 }
1382 
unix_redir_to_connected(int sotype,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1383 static void unix_redir_to_connected(int sotype, int sock_mapfd,
1384 			       int verd_mapfd, enum redir_mode mode)
1385 {
1386 	int c0, c1, p0, p1;
1387 	int sfd[2];
1388 
1389 	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1390 		return;
1391 	c0 = sfd[0], p0 = sfd[1];
1392 
1393 	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1394 		goto close0;
1395 	c1 = sfd[0], p1 = sfd[1];
1396 
1397 	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
1398 
1399 	xclose(c1);
1400 	xclose(p1);
1401 close0:
1402 	xclose(c0);
1403 	xclose(p0);
1404 }
1405 
unix_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int sotype)1406 static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1407 					struct bpf_map *inner_map, int sotype)
1408 {
1409 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1410 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1411 	int sock_map = bpf_map__fd(inner_map);
1412 	int err;
1413 
1414 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1415 	if (err)
1416 		return;
1417 
1418 	skel->bss->test_ingress = false;
1419 	unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
1420 	skel->bss->test_ingress = true;
1421 	unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
1422 
1423 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1424 }
1425 
test_unix_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int sotype)1426 static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1427 			    int sotype)
1428 {
1429 	const char *family_name, *map_name;
1430 	char s[MAX_TEST_NAME];
1431 
1432 	family_name = family_str(AF_UNIX);
1433 	map_name = map_type_str(map);
1434 	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1435 	if (!test__start_subtest(s))
1436 		return;
1437 	unix_skb_redir_to_connected(skel, map, sotype);
1438 }
1439 
1440 /* Returns two connected loopback vsock sockets */
vsock_socketpair_connectible(int sotype,int * v0,int * v1)1441 static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
1442 {
1443 	struct sockaddr_storage addr;
1444 	socklen_t len = sizeof(addr);
1445 	int s, p, c;
1446 
1447 	s = socket_loopback(AF_VSOCK, sotype);
1448 	if (s < 0)
1449 		return -1;
1450 
1451 	c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0);
1452 	if (c == -1)
1453 		goto close_srv;
1454 
1455 	if (getsockname(s, sockaddr(&addr), &len) < 0)
1456 		goto close_cli;
1457 
1458 	if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) {
1459 		FAIL_ERRNO("connect");
1460 		goto close_cli;
1461 	}
1462 
1463 	len = sizeof(addr);
1464 	p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC);
1465 	if (p < 0)
1466 		goto close_cli;
1467 
1468 	if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
1469 		FAIL_ERRNO("poll_connect");
1470 		goto close_acc;
1471 	}
1472 
1473 	*v0 = p;
1474 	*v1 = c;
1475 
1476 	return 0;
1477 
1478 close_acc:
1479 	close(p);
1480 close_cli:
1481 	close(c);
1482 close_srv:
1483 	close(s);
1484 
1485 	return -1;
1486 }
1487 
vsock_unix_redir_connectible(int sock_mapfd,int verd_mapfd,enum redir_mode mode,int sotype)1488 static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
1489 					 enum redir_mode mode, int sotype)
1490 {
1491 	const char *log_prefix = redir_mode_str(mode);
1492 	char a = 'a', b = 'b';
1493 	int u0, u1, v0, v1;
1494 	int sfd[2];
1495 	unsigned int pass;
1496 	int err, n;
1497 	u32 key;
1498 
1499 	zero_verdict_count(verd_mapfd);
1500 
1501 	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
1502 		return;
1503 
1504 	u0 = sfd[0];
1505 	u1 = sfd[1];
1506 
1507 	err = vsock_socketpair_connectible(sotype, &v0, &v1);
1508 	if (err) {
1509 		FAIL("vsock_socketpair_connectible() failed");
1510 		goto close_uds;
1511 	}
1512 
1513 	err = add_to_sockmap(sock_mapfd, u0, v0);
1514 	if (err) {
1515 		FAIL("add_to_sockmap failed");
1516 		goto close_vsock;
1517 	}
1518 
1519 	n = write(v1, &a, sizeof(a));
1520 	if (n < 0)
1521 		FAIL_ERRNO("%s: write", log_prefix);
1522 	if (n == 0)
1523 		FAIL("%s: incomplete write", log_prefix);
1524 	if (n < 1)
1525 		goto out;
1526 
1527 	n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
1528 	if (n < 0)
1529 		FAIL("%s: recv() err, errno=%d", log_prefix, errno);
1530 	if (n == 0)
1531 		FAIL("%s: incomplete recv", log_prefix);
1532 	if (b != a)
1533 		FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
1534 
1535 	key = SK_PASS;
1536 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1537 	if (err)
1538 		goto out;
1539 	if (pass != 1)
1540 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1541 out:
1542 	key = 0;
1543 	bpf_map_delete_elem(sock_mapfd, &key);
1544 	key = 1;
1545 	bpf_map_delete_elem(sock_mapfd, &key);
1546 
1547 close_vsock:
1548 	close(v0);
1549 	close(v1);
1550 
1551 close_uds:
1552 	close(u0);
1553 	close(u1);
1554 }
1555 
vsock_unix_skb_redir_connectible(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int sotype)1556 static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
1557 					     struct bpf_map *inner_map,
1558 					     int sotype)
1559 {
1560 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1561 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1562 	int sock_map = bpf_map__fd(inner_map);
1563 	int err;
1564 
1565 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1566 	if (err)
1567 		return;
1568 
1569 	skel->bss->test_ingress = false;
1570 	vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
1571 	skel->bss->test_ingress = true;
1572 	vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
1573 
1574 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1575 }
1576 
test_vsock_redir(struct test_sockmap_listen * skel,struct bpf_map * map)1577 static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
1578 {
1579 	const char *family_name, *map_name;
1580 	char s[MAX_TEST_NAME];
1581 
1582 	family_name = family_str(AF_VSOCK);
1583 	map_name = map_type_str(map);
1584 	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1585 	if (!test__start_subtest(s))
1586 		return;
1587 
1588 	vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
1589 	vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
1590 }
1591 
test_reuseport(struct test_sockmap_listen * skel,struct bpf_map * map,int family,int sotype)1592 static void test_reuseport(struct test_sockmap_listen *skel,
1593 			   struct bpf_map *map, int family, int sotype)
1594 {
1595 	const struct reuseport_test {
1596 		void (*fn)(int family, int sotype, int socket_map,
1597 			   int verdict_map, int reuseport_prog);
1598 		const char *name;
1599 		int sotype;
1600 	} tests[] = {
1601 		TEST(test_reuseport_select_listening),
1602 		TEST(test_reuseport_select_connected),
1603 		TEST(test_reuseport_mixed_groups),
1604 	};
1605 	int socket_map, verdict_map, reuseport_prog;
1606 	const char *family_name, *map_name, *sotype_name;
1607 	const struct reuseport_test *t;
1608 	char s[MAX_TEST_NAME];
1609 
1610 	family_name = family_str(family);
1611 	map_name = map_type_str(map);
1612 	sotype_name = sotype_str(sotype);
1613 
1614 	socket_map = bpf_map__fd(map);
1615 	verdict_map = bpf_map__fd(skel->maps.verdict_map);
1616 	reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
1617 
1618 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1619 		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1620 			 sotype_name, t->name);
1621 
1622 		if (t->sotype != 0 && t->sotype != sotype)
1623 			continue;
1624 
1625 		if (!test__start_subtest(s))
1626 			continue;
1627 
1628 		t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
1629 	}
1630 }
1631 
inet_socketpair(int family,int type,int * s,int * c)1632 static int inet_socketpair(int family, int type, int *s, int *c)
1633 {
1634 	struct sockaddr_storage addr;
1635 	socklen_t len;
1636 	int p0, c0;
1637 	int err;
1638 
1639 	p0 = socket_loopback(family, type | SOCK_NONBLOCK);
1640 	if (p0 < 0)
1641 		return p0;
1642 
1643 	len = sizeof(addr);
1644 	err = xgetsockname(p0, sockaddr(&addr), &len);
1645 	if (err)
1646 		goto close_peer0;
1647 
1648 	c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
1649 	if (c0 < 0) {
1650 		err = c0;
1651 		goto close_peer0;
1652 	}
1653 	err = xconnect(c0, sockaddr(&addr), len);
1654 	if (err)
1655 		goto close_cli0;
1656 	err = xgetsockname(c0, sockaddr(&addr), &len);
1657 	if (err)
1658 		goto close_cli0;
1659 	err = xconnect(p0, sockaddr(&addr), len);
1660 	if (err)
1661 		goto close_cli0;
1662 
1663 	*s = p0;
1664 	*c = c0;
1665 	return 0;
1666 
1667 close_cli0:
1668 	xclose(c0);
1669 close_peer0:
1670 	xclose(p0);
1671 	return err;
1672 }
1673 
udp_redir_to_connected(int family,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1674 static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
1675 				   enum redir_mode mode)
1676 {
1677 	int c0, c1, p0, p1;
1678 	int err;
1679 
1680 	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1681 	if (err)
1682 		return;
1683 	err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1684 	if (err)
1685 		goto close_cli0;
1686 
1687 	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
1688 
1689 	xclose(c1);
1690 	xclose(p1);
1691 close_cli0:
1692 	xclose(c0);
1693 	xclose(p0);
1694 }
1695 
udp_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family)1696 static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
1697 				       struct bpf_map *inner_map, int family)
1698 {
1699 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1700 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1701 	int sock_map = bpf_map__fd(inner_map);
1702 	int err;
1703 
1704 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1705 	if (err)
1706 		return;
1707 
1708 	skel->bss->test_ingress = false;
1709 	udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
1710 	skel->bss->test_ingress = true;
1711 	udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
1712 
1713 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1714 }
1715 
test_udp_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int family)1716 static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1717 			   int family)
1718 {
1719 	const char *family_name, *map_name;
1720 	char s[MAX_TEST_NAME];
1721 
1722 	family_name = family_str(family);
1723 	map_name = map_type_str(map);
1724 	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1725 	if (!test__start_subtest(s))
1726 		return;
1727 	udp_skb_redir_to_connected(skel, map, family);
1728 }
1729 
inet_unix_redir_to_connected(int family,int type,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1730 static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
1731 					int verd_mapfd, enum redir_mode mode)
1732 {
1733 	int c0, c1, p0, p1;
1734 	int sfd[2];
1735 	int err;
1736 
1737 	if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1738 		return;
1739 	c0 = sfd[0], p0 = sfd[1];
1740 
1741 	err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1742 	if (err)
1743 		goto close;
1744 
1745 	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
1746 
1747 	xclose(c1);
1748 	xclose(p1);
1749 close:
1750 	xclose(c0);
1751 	xclose(p0);
1752 }
1753 
inet_unix_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family)1754 static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1755 					    struct bpf_map *inner_map, int family)
1756 {
1757 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1758 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1759 	int sock_map = bpf_map__fd(inner_map);
1760 	int err;
1761 
1762 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1763 	if (err)
1764 		return;
1765 
1766 	skel->bss->test_ingress = false;
1767 	inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1768 				    REDIR_EGRESS);
1769 	inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1770 				    REDIR_EGRESS);
1771 	skel->bss->test_ingress = true;
1772 	inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1773 				    REDIR_INGRESS);
1774 	inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1775 				    REDIR_INGRESS);
1776 
1777 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1778 }
1779 
unix_inet_redir_to_connected(int family,int type,int sock_mapfd,int nop_mapfd,int verd_mapfd,enum redir_mode mode)1780 static void unix_inet_redir_to_connected(int family, int type,
1781 					int sock_mapfd, int nop_mapfd,
1782 					int verd_mapfd,
1783 					enum redir_mode mode)
1784 {
1785 	int c0, c1, p0, p1;
1786 	int sfd[2];
1787 	int err;
1788 
1789 	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1790 	if (err)
1791 		return;
1792 
1793 	if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1794 		goto close_cli0;
1795 	c1 = sfd[0], p1 = sfd[1];
1796 
1797 	pairs_redir_to_connected(c0, p0, c1, p1,
1798 				 sock_mapfd, nop_mapfd, verd_mapfd, mode);
1799 
1800 	xclose(c1);
1801 	xclose(p1);
1802 close_cli0:
1803 	xclose(c0);
1804 	xclose(p0);
1805 
1806 }
1807 
unix_inet_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family)1808 static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
1809 					    struct bpf_map *inner_map, int family)
1810 {
1811 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1812 	int nop_map = bpf_map__fd(skel->maps.nop_map);
1813 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1814 	int sock_map = bpf_map__fd(inner_map);
1815 	int err;
1816 
1817 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1818 	if (err)
1819 		return;
1820 
1821 	skel->bss->test_ingress = false;
1822 	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1823 				     sock_map, -1, verdict_map,
1824 				     REDIR_EGRESS);
1825 	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1826 				     sock_map, -1, verdict_map,
1827 				     REDIR_EGRESS);
1828 
1829 	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1830 				     sock_map, nop_map, verdict_map,
1831 				     REDIR_EGRESS);
1832 	unix_inet_redir_to_connected(family, SOCK_STREAM,
1833 				     sock_map, nop_map, verdict_map,
1834 				     REDIR_EGRESS);
1835 	skel->bss->test_ingress = true;
1836 	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1837 				     sock_map, -1, verdict_map,
1838 				     REDIR_INGRESS);
1839 	unix_inet_redir_to_connected(family, SOCK_STREAM,
1840 				     sock_map, -1, verdict_map,
1841 				     REDIR_INGRESS);
1842 
1843 	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1844 				     sock_map, nop_map, verdict_map,
1845 				     REDIR_INGRESS);
1846 	unix_inet_redir_to_connected(family, SOCK_STREAM,
1847 				     sock_map, nop_map, verdict_map,
1848 				     REDIR_INGRESS);
1849 
1850 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1851 }
1852 
test_udp_unix_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int family)1853 static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1854 				int family)
1855 {
1856 	const char *family_name, *map_name;
1857 	char s[MAX_TEST_NAME];
1858 
1859 	family_name = family_str(family);
1860 	map_name = map_type_str(map);
1861 	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1862 	if (!test__start_subtest(s))
1863 		return;
1864 	inet_unix_skb_redir_to_connected(skel, map, family);
1865 	unix_inet_skb_redir_to_connected(skel, map, family);
1866 }
1867 
run_tests(struct test_sockmap_listen * skel,struct bpf_map * map,int family)1868 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
1869 		      int family)
1870 {
1871 	test_ops(skel, map, family, SOCK_STREAM);
1872 	test_ops(skel, map, family, SOCK_DGRAM);
1873 	test_redir(skel, map, family, SOCK_STREAM);
1874 	test_reuseport(skel, map, family, SOCK_STREAM);
1875 	test_reuseport(skel, map, family, SOCK_DGRAM);
1876 	test_udp_redir(skel, map, family);
1877 	test_udp_unix_redir(skel, map, family);
1878 }
1879 
serial_test_sockmap_listen(void)1880 void serial_test_sockmap_listen(void)
1881 {
1882 	struct test_sockmap_listen *skel;
1883 
1884 	skel = test_sockmap_listen__open_and_load();
1885 	if (!skel) {
1886 		FAIL("skeleton open/load failed");
1887 		return;
1888 	}
1889 
1890 	skel->bss->test_sockmap = true;
1891 	run_tests(skel, skel->maps.sock_map, AF_INET);
1892 	run_tests(skel, skel->maps.sock_map, AF_INET6);
1893 	test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
1894 	test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
1895 	test_vsock_redir(skel, skel->maps.sock_map);
1896 
1897 	skel->bss->test_sockmap = false;
1898 	run_tests(skel, skel->maps.sock_hash, AF_INET);
1899 	run_tests(skel, skel->maps.sock_hash, AF_INET6);
1900 	test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
1901 	test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
1902 	test_vsock_redir(skel, skel->maps.sock_hash);
1903 
1904 	test_sockmap_listen__destroy(skel);
1905 }
1906