xref: /linux/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Test for sockmap/sockhash redirection.
4  *
5  * BPF_MAP_TYPE_SOCKMAP
6  * BPF_MAP_TYPE_SOCKHASH
7  *	x
8  * sk_msg-to-egress
9  * sk_msg-to-ingress
10  * sk_skb-to-egress
11  * sk_skb-to-ingress
12  *	x
13  * AF_INET, SOCK_STREAM
14  * AF_INET6, SOCK_STREAM
15  * AF_INET, SOCK_DGRAM
16  * AF_INET6, SOCK_DGRAM
17  * AF_UNIX, SOCK_STREAM
18  * AF_UNIX, SOCK_DGRAM
19  * AF_VSOCK, SOCK_STREAM
20  * AF_VSOCK, SOCK_SEQPACKET
21  */
22 
23 #include <errno.h>
24 #include <error.h>
25 #include <sched.h>
26 #include <stdio.h>
27 #include <unistd.h>
28 
29 #include <netinet/in.h>
30 #include <sys/socket.h>
31 #include <sys/types.h>
32 #include <sys/un.h>
33 #include <linux/string.h>
34 #include <linux/vm_sockets.h>
35 
36 #include <bpf/bpf.h>
37 #include <bpf/libbpf.h>
38 
39 #include "linux/const.h"
40 #include "test_progs.h"
41 #include "sockmap_helpers.h"
42 #include "test_sockmap_redir.skel.h"
43 
44 /* The meaning of SUPPORTED is "will redirect packet as expected".
45  */
46 #define SUPPORTED		_BITUL(0)
47 
48 /* Note on sk_skb-to-ingress ->af_vsock:
49  *
50  * Peer socket may receive the packet some time after the return from sendmsg().
51  * In a typical usage scenario, recvmsg() will block until the redirected packet
52  * appears in the destination queue, or timeout if the packet was dropped. By
53  * that point, the verdict map has already been updated to reflect what has
54  * happened.
55  *
56  * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg()
57  * takes place. Which means we may race the execution of the verdict logic and
58  * read map_verd before it has been updated, i.e. we might observe
59  * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1.
60  *
61  * This confuses the selftest logic: if there was no packet dropped, where's the
62  * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0
63  * (which implies the verdict program has not been ran) just re-read the verdict
64  * map again.
65  */
66 #define UNSUPPORTED_RACY_VERD	_BITUL(1)
67 
68 enum prog_type {
69 	SK_MSG_EGRESS,
70 	SK_MSG_INGRESS,
71 	SK_SKB_EGRESS,
72 	SK_SKB_INGRESS,
73 };
74 
75 enum {
76 	SEND_INNER = 0,
77 	SEND_OUTER,
78 };
79 
80 enum {
81 	RECV_INNER = 0,
82 	RECV_OUTER,
83 };
84 
85 struct maps {
86 	int in;
87 	int out;
88 	int verd;
89 };
90 
91 struct combo_spec {
92 	enum prog_type prog_type;
93 	const char *in, *out;
94 };
95 
96 struct redir_spec {
97 	const char *name;
98 	int idx_send;
99 	int idx_recv;
100 	enum prog_type prog_type;
101 };
102 
103 struct socket_spec {
104 	int family;
105 	int sotype;
106 	int send_flags;
107 	int in[2];
108 	int out[2];
109 };
110 
socket_spec_pairs(struct socket_spec * s)111 static int socket_spec_pairs(struct socket_spec *s)
112 {
113 	return create_socket_pairs(s->family, s->sotype,
114 				   &s->in[0], &s->out[0],
115 				   &s->in[1], &s->out[1]);
116 }
117 
socket_spec_close(struct socket_spec * s)118 static void socket_spec_close(struct socket_spec *s)
119 {
120 	xclose(s->in[0]);
121 	xclose(s->in[1]);
122 	xclose(s->out[0]);
123 	xclose(s->out[1]);
124 }
125 
get_redir_params(struct redir_spec * redir,struct test_sockmap_redir * skel,int * prog_fd,enum bpf_attach_type * attach_type,int * redirect_flags)126 static void get_redir_params(struct redir_spec *redir,
127 			     struct test_sockmap_redir *skel, int *prog_fd,
128 			     enum bpf_attach_type *attach_type,
129 			     int *redirect_flags)
130 {
131 	enum prog_type type = redir->prog_type;
132 	struct bpf_program *prog;
133 	bool sk_msg;
134 
135 	sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS;
136 	prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict;
137 
138 	*prog_fd = bpf_program__fd(prog);
139 	*attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT;
140 
141 	if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS)
142 		*redirect_flags = BPF_F_INGRESS;
143 	else
144 		*redirect_flags = 0;
145 }
146 
try_recv(const char * prefix,int fd,int flags,bool expect_success)147 static void try_recv(const char *prefix, int fd, int flags, bool expect_success)
148 {
149 	ssize_t n;
150 	char buf;
151 
152 	errno = 0;
153 	n = recv(fd, &buf, 1, flags);
154 	if (n < 0 && expect_success)
155 		FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n);
156 	if (!n && !expect_success)
157 		FAIL("%s: expected failure: retval=%zd", prefix, n);
158 }
159 
handle_unsupported(int sd_send,int sd_peer,int sd_in,int sd_out,int sd_recv,int map_verd,int status)160 static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out,
161 			       int sd_recv, int map_verd, int status)
162 {
163 	unsigned int drop, pass;
164 	char recv_buf;
165 	ssize_t n;
166 
167 get_verdict:
168 	if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) ||
169 	    xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass))
170 		return;
171 
172 	if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) {
173 		sched_yield();
174 		goto get_verdict;
175 	}
176 
177 	if (pass != 0) {
178 		FAIL("unsupported: wanted verdict pass 0, have %u", pass);
179 		return;
180 	}
181 
182 	/* If nothing was dropped, packet should have reached the peer */
183 	if (drop == 0) {
184 		errno = 0;
185 		n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
186 		if (n != 1)
187 			FAIL_ERRNO("unsupported: packet missing, retval=%zd", n);
188 	}
189 
190 	/* Ensure queues are empty */
191 	try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false);
192 	if (sd_in != sd_send)
193 		try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false);
194 
195 	try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false);
196 	if (sd_recv != sd_out)
197 		try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false);
198 }
199 
test_send_redir_recv(int sd_send,int send_flags,int sd_peer,int sd_in,int sd_out,int sd_recv,struct maps * maps,int status)200 static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer,
201 				 int sd_in, int sd_out, int sd_recv,
202 				 struct maps *maps, int status)
203 {
204 	unsigned int drop, pass;
205 	char *send_buf = "ab";
206 	char recv_buf = '\0';
207 	ssize_t n, len = 1;
208 
209 	/* Zero out the verdict map */
210 	if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) ||
211 	    xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY))
212 		return;
213 
214 	if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST))
215 		return;
216 
217 	if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST))
218 		goto del_in;
219 
220 	/* Last byte is OOB data when send_flags has MSG_OOB bit set */
221 	if (send_flags & MSG_OOB)
222 		len++;
223 	n = send(sd_send, send_buf, len, send_flags);
224 	if (n >= 0 && n < len)
225 		FAIL("incomplete send");
226 	if (n < 0) {
227 		/* sk_msg redirect combo not supported? */
228 		if (status & SUPPORTED || errno != EACCES)
229 			FAIL_ERRNO("send");
230 		goto out;
231 	}
232 
233 	if (!(status & SUPPORTED)) {
234 		handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv,
235 				   maps->verd, status);
236 		goto out;
237 	}
238 
239 	errno = 0;
240 	n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
241 	if (n != 1) {
242 		FAIL_ERRNO("recv_timeout()");
243 		goto out;
244 	}
245 
246 	/* Check verdict _after_ recv(); af_vsock may need time to catch up */
247 	if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) ||
248 	    xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass))
249 		goto out;
250 
251 	if (drop != 0 || pass != 1)
252 		FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u",
253 		     drop, pass);
254 
255 	if (recv_buf != send_buf[0])
256 		FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]);
257 
258 	if (send_flags & MSG_OOB) {
259 		/* Fail reading OOB while in sockmap */
260 		try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out,
261 			 MSG_OOB | MSG_DONTWAIT, false);
262 
263 		/* Remove sd_out from sockmap */
264 		xbpf_map_delete_elem(maps->out, &u32(0));
265 
266 		/* Check that OOB was dropped on redirect */
267 		try_recv("recv(sd_out, MSG_OOB)", sd_out,
268 			 MSG_OOB | MSG_DONTWAIT, false);
269 
270 		goto del_in;
271 	}
272 out:
273 	xbpf_map_delete_elem(maps->out, &u32(0));
274 del_in:
275 	xbpf_map_delete_elem(maps->in, &u32(0));
276 }
277 
is_redir_supported(enum prog_type type,const char * in,const char * out)278 static int is_redir_supported(enum prog_type type, const char *in,
279 			      const char *out)
280 {
281 	/* Matching based on strings returned by socket_kind_to_str():
282 	 * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq
283 	 * Plus a wildcard: any
284 	 * Not in use: u_seq, v_dgr
285 	 */
286 	struct combo_spec *c, combos[] = {
287 		/* Send to local: TCP -> any, but vsock */
288 		{ SK_MSG_INGRESS,	"tcp",	"tcp"	},
289 		{ SK_MSG_INGRESS,	"tcp",	"udp"	},
290 		{ SK_MSG_INGRESS,	"tcp",	"u_str"	},
291 		{ SK_MSG_INGRESS,	"tcp",	"u_dgr"	},
292 
293 		/* Send to egress: TCP -> TCP */
294 		{ SK_MSG_EGRESS,	"tcp",	"tcp"	},
295 
296 		/* Ingress to egress: any -> any */
297 		{ SK_SKB_EGRESS,	"any",	"any"	},
298 
299 		/* Ingress to local: any -> any, but vsock */
300 		{ SK_SKB_INGRESS,	"any",	"tcp"	},
301 		{ SK_SKB_INGRESS,	"any",	"udp"	},
302 		{ SK_SKB_INGRESS,	"any",	"u_str"	},
303 		{ SK_SKB_INGRESS,	"any",	"u_dgr"	},
304 	};
305 
306 	for (c = combos; c < combos + ARRAY_SIZE(combos); c++) {
307 		if (c->prog_type == type &&
308 		    (!strcmp(c->in, "any") || strstarts(in, c->in)) &&
309 		    (!strcmp(c->out, "any") || strstarts(out, c->out)))
310 			return SUPPORTED;
311 	}
312 
313 	return 0;
314 }
315 
get_support_status(enum prog_type type,const char * in,const char * out)316 static int get_support_status(enum prog_type type, const char *in,
317 			      const char *out)
318 {
319 	int status = is_redir_supported(type, in, out);
320 
321 	if (type == SK_SKB_INGRESS && strstarts(out, "v_"))
322 		status |= UNSUPPORTED_RACY_VERD;
323 
324 	return status;
325 }
326 
test_socket(enum bpf_map_type type,struct redir_spec * redir,struct maps * maps,struct socket_spec * s_in,struct socket_spec * s_out)327 static void test_socket(enum bpf_map_type type, struct redir_spec *redir,
328 			struct maps *maps, struct socket_spec *s_in,
329 			struct socket_spec *s_out)
330 {
331 	int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status;
332 	const char *in_str, *out_str;
333 	char s[MAX_TEST_NAME];
334 
335 	fd_in = s_in->in[0];
336 	fd_out = s_out->out[0];
337 	fd_send = s_in->in[redir->idx_send];
338 	fd_peer = s_in->in[redir->idx_send ^ 1];
339 	fd_recv = s_out->out[redir->idx_recv];
340 	flags = s_in->send_flags;
341 
342 	in_str = socket_kind_to_str(fd_in);
343 	out_str = socket_kind_to_str(fd_out);
344 	status = get_support_status(redir->prog_type, in_str, out_str);
345 
346 	snprintf(s, sizeof(s),
347 		 "%-4s %-17s %-5s %s %-5s%6s",
348 		 /* hash sk_skb-to-ingress u_str → v_str (OOB) */
349 		 type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash",
350 		 redir->name,
351 		 in_str,
352 		 status & SUPPORTED ? "→" : " ",
353 		 out_str,
354 		 (flags & MSG_OOB) ? "(OOB)" : "");
355 
356 	if (!test__start_subtest(s))
357 		return;
358 
359 	test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv,
360 			     maps, status);
361 }
362 
test_redir(enum bpf_map_type type,struct redir_spec * redir,struct maps * maps)363 static void test_redir(enum bpf_map_type type, struct redir_spec *redir,
364 		       struct maps *maps)
365 {
366 	struct socket_spec *s, sockets[] = {
367 		{ AF_INET, SOCK_STREAM },
368 		// { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */
369 		{ AF_INET6, SOCK_STREAM },
370 		{ AF_INET, SOCK_DGRAM },
371 		{ AF_INET6, SOCK_DGRAM },
372 		{ AF_UNIX, SOCK_STREAM },
373 		{ AF_UNIX, SOCK_STREAM, MSG_OOB },
374 		{ AF_UNIX, SOCK_DGRAM },
375 		// { AF_UNIX, SOCK_SEQPACKET},	/* Unsupported BPF_MAP_UPDATE_ELEM */
376 		{ AF_VSOCK, SOCK_STREAM },
377 		// { AF_VSOCK, SOCK_DGRAM },	/* Unsupported socket() */
378 		{ AF_VSOCK, SOCK_SEQPACKET },
379 	};
380 
381 	for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
382 		if (socket_spec_pairs(s))
383 			goto out;
384 
385 	/* Intra-proto */
386 	for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
387 		test_socket(type, redir, maps, s, s);
388 
389 	/* Cross-proto */
390 	for (int i = 0; i < ARRAY_SIZE(sockets); i++) {
391 		for (int j = 0; j < ARRAY_SIZE(sockets); j++) {
392 			struct socket_spec *out = &sockets[j];
393 			struct socket_spec *in = &sockets[i];
394 
395 			/* Skip intra-proto and between variants */
396 			if (out->send_flags ||
397 			    (in->family == out->family &&
398 			     in->sotype == out->sotype))
399 				continue;
400 
401 			test_socket(type, redir, maps, in, out);
402 		}
403 	}
404 out:
405 	while (--s >= sockets)
406 		socket_spec_close(s);
407 }
408 
test_map(enum bpf_map_type type)409 static void test_map(enum bpf_map_type type)
410 {
411 	struct redir_spec *r, redirs[] = {
412 		{ "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS },
413 		{ "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS },
414 		{ "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS },
415 		{ "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS },
416 	};
417 
418 	for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) {
419 		enum bpf_attach_type attach_type;
420 		struct test_sockmap_redir *skel;
421 		struct maps maps;
422 		int prog_fd;
423 
424 		skel = test_sockmap_redir__open_and_load();
425 		if (!skel) {
426 			FAIL("open_and_load");
427 			return;
428 		}
429 
430 		switch (type) {
431 		case BPF_MAP_TYPE_SOCKMAP:
432 			maps.in = bpf_map__fd(skel->maps.nop_map);
433 			maps.out = bpf_map__fd(skel->maps.sock_map);
434 			break;
435 		case BPF_MAP_TYPE_SOCKHASH:
436 			maps.in = bpf_map__fd(skel->maps.nop_hash);
437 			maps.out = bpf_map__fd(skel->maps.sock_hash);
438 			break;
439 		default:
440 			FAIL("Unsupported bpf_map_type");
441 			return;
442 		}
443 
444 		skel->bss->redirect_type = type;
445 		maps.verd = bpf_map__fd(skel->maps.verdict_map);
446 		get_redir_params(r, skel, &prog_fd, &attach_type,
447 				 &skel->bss->redirect_flags);
448 
449 		if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0))
450 			return;
451 
452 		test_redir(type, r, &maps);
453 
454 		if (xbpf_prog_detach2(prog_fd, maps.in, attach_type))
455 			return;
456 
457 		test_sockmap_redir__destroy(skel);
458 	}
459 }
460 
serial_test_sockmap_redir(void)461 void serial_test_sockmap_redir(void)
462 {
463 	test_map(BPF_MAP_TYPE_SOCKMAP);
464 	test_map(BPF_MAP_TYPE_SOCKHASH);
465 }
466