1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /*
4  * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5  * between src and dst. The netns fwd has veth links to each src and dst. The
6  * client is in src and server in dst. The test installs a TC BPF program to each
7  * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8  * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9  * switch from ingress side; it also installs a checker prog on the egress side
10  * to drop unexpected traffic.
11  */
12 
13 #include <arpa/inet.h>
14 #include <linux/if_tun.h>
15 #include <linux/limits.h>
16 #include <linux/sysctl.h>
17 #include <linux/time_types.h>
18 #include <linux/net_tstamp.h>
19 #include <net/if.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include "test_progs.h"
26 #include "network_helpers.h"
27 #include "netlink_helpers.h"
28 #include "test_tc_neigh_fib.skel.h"
29 #include "test_tc_neigh.skel.h"
30 #include "test_tc_peer.skel.h"
31 #include "test_tc_dtime.skel.h"
32 
33 #ifndef TCP_TX_DELAY
34 #define TCP_TX_DELAY 37
35 #endif
36 
37 #define NS_SRC "ns_src"
38 #define NS_FWD "ns_fwd"
39 #define NS_DST "ns_dst"
40 
41 #define IP4_SRC "172.16.1.100"
42 #define IP4_DST "172.16.2.100"
43 #define IP4_TUN_SRC "172.17.1.100"
44 #define IP4_TUN_FWD "172.17.1.200"
45 #define IP4_PORT 9004
46 
47 #define IP6_SRC "0::1:dead:beef:cafe"
48 #define IP6_DST "0::2:dead:beef:cafe"
49 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
50 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
51 #define IP6_PORT 9006
52 
53 #define IP4_SLL "169.254.0.1"
54 #define IP4_DLL "169.254.0.2"
55 #define IP4_NET "169.254.0.0"
56 
57 #define MAC_DST_FWD "00:11:22:33:44:55"
58 #define MAC_DST "00:22:33:44:55:66"
59 
60 #define IFADDR_STR_LEN 18
61 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
62 
63 #define TIMEOUT_MILLIS 10000
64 #define NSEC_PER_SEC 1000000000ULL
65 
66 #define log_err(MSG, ...) \
67 	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
68 		__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
69 
70 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
71 
write_file(const char * path,const char * newval)72 static int write_file(const char *path, const char *newval)
73 {
74 	FILE *f;
75 
76 	f = fopen(path, "r+");
77 	if (!f)
78 		return -1;
79 	if (fwrite(newval, strlen(newval), 1, f) != 1) {
80 		log_err("writing to %s failed", path);
81 		fclose(f);
82 		return -1;
83 	}
84 	fclose(f);
85 	return 0;
86 }
87 
netns_setup_namespaces(const char * verb)88 static int netns_setup_namespaces(const char *verb)
89 {
90 	const char * const *ns = namespaces;
91 	char cmd[128];
92 
93 	while (*ns) {
94 		snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
95 		if (!ASSERT_OK(system(cmd), cmd))
96 			return -1;
97 		ns++;
98 	}
99 	return 0;
100 }
101 
netns_setup_namespaces_nofail(const char * verb)102 static void netns_setup_namespaces_nofail(const char *verb)
103 {
104 	const char * const *ns = namespaces;
105 	char cmd[128];
106 
107 	while (*ns) {
108 		snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
109 		system(cmd);
110 		ns++;
111 	}
112 }
113 
114 enum dev_mode {
115 	MODE_VETH,
116 	MODE_NETKIT,
117 };
118 
119 struct netns_setup_result {
120 	enum dev_mode dev_mode;
121 	int ifindex_src;
122 	int ifindex_src_fwd;
123 	int ifindex_dst;
124 	int ifindex_dst_fwd;
125 };
126 
get_ifaddr(const char * name,char * ifaddr)127 static int get_ifaddr(const char *name, char *ifaddr)
128 {
129 	char path[PATH_MAX];
130 	FILE *f;
131 	int ret;
132 
133 	snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
134 	f = fopen(path, "r");
135 	if (!ASSERT_OK_PTR(f, path))
136 		return -1;
137 
138 	ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
139 	if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
140 		fclose(f);
141 		return -1;
142 	}
143 	fclose(f);
144 	return 0;
145 }
146 
create_netkit(int mode,char * prim,char * peer)147 static int create_netkit(int mode, char *prim, char *peer)
148 {
149 	struct rtattr *linkinfo, *data, *peer_info;
150 	struct rtnl_handle rth = { .fd = -1 };
151 	const char *type = "netkit";
152 	struct {
153 		struct nlmsghdr n;
154 		struct ifinfomsg i;
155 		char buf[1024];
156 	} req = {};
157 	int err;
158 
159 	err = rtnl_open(&rth, 0);
160 	if (!ASSERT_OK(err, "open_rtnetlink"))
161 		return err;
162 
163 	memset(&req, 0, sizeof(req));
164 	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
165 	req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
166 	req.n.nlmsg_type = RTM_NEWLINK;
167 	req.i.ifi_family = AF_UNSPEC;
168 
169 	addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
170 	linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
171 	addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
172 	data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
173 	addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
174 	peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
175 	req.n.nlmsg_len += sizeof(struct ifinfomsg);
176 	addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
177 	addattr_nest_end(&req.n, peer_info);
178 	addattr_nest_end(&req.n, data);
179 	addattr_nest_end(&req.n, linkinfo);
180 
181 	err = rtnl_talk(&rth, &req.n, NULL);
182 	ASSERT_OK(err, "talk_rtnetlink");
183 	rtnl_close(&rth);
184 	return err;
185 }
186 
netns_setup_links_and_routes(struct netns_setup_result * result)187 static int netns_setup_links_and_routes(struct netns_setup_result *result)
188 {
189 	struct nstoken *nstoken = NULL;
190 	char src_fwd_addr[IFADDR_STR_LEN+1] = {};
191 	int err;
192 
193 	if (result->dev_mode == MODE_VETH) {
194 		SYS(fail, "ip link add src type veth peer name src_fwd");
195 		SYS(fail, "ip link add dst type veth peer name dst_fwd");
196 
197 		SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
198 		SYS(fail, "ip link set dst address " MAC_DST);
199 	} else if (result->dev_mode == MODE_NETKIT) {
200 		err = create_netkit(NETKIT_L3, "src", "src_fwd");
201 		if (!ASSERT_OK(err, "create_ifindex_src"))
202 			goto fail;
203 		err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
204 		if (!ASSERT_OK(err, "create_ifindex_dst"))
205 			goto fail;
206 	}
207 
208 	if (get_ifaddr("src_fwd", src_fwd_addr))
209 		goto fail;
210 
211 	result->ifindex_src = if_nametoindex("src");
212 	if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
213 		goto fail;
214 
215 	result->ifindex_src_fwd = if_nametoindex("src_fwd");
216 	if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
217 		goto fail;
218 
219 	result->ifindex_dst = if_nametoindex("dst");
220 	if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
221 		goto fail;
222 
223 	result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
224 	if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
225 		goto fail;
226 
227 	SYS(fail, "ip link set src netns " NS_SRC);
228 	SYS(fail, "ip link set src_fwd netns " NS_FWD);
229 	SYS(fail, "ip link set dst_fwd netns " NS_FWD);
230 	SYS(fail, "ip link set dst netns " NS_DST);
231 
232 	/** setup in 'src' namespace */
233 	nstoken = open_netns(NS_SRC);
234 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
235 		goto fail;
236 
237 	SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
238 	SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
239 	SYS(fail, "ip link set dev src up");
240 
241 	SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
242 	SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
243 	SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
244 
245 	if (result->dev_mode == MODE_VETH) {
246 		SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
247 		    src_fwd_addr);
248 		SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
249 		    src_fwd_addr);
250 	}
251 
252 	close_netns(nstoken);
253 
254 	/** setup in 'fwd' namespace */
255 	nstoken = open_netns(NS_FWD);
256 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
257 		goto fail;
258 
259 	/* The fwd netns automatically gets a v6 LL address / routes, but also
260 	 * needs v4 one in order to start ARP probing. IP4_NET route is added
261 	 * to the endpoints so that the ARP processing will reply.
262 	 */
263 	SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
264 	SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
265 	SYS(fail, "ip link set dev src_fwd up");
266 	SYS(fail, "ip link set dev dst_fwd up");
267 
268 	SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
269 	SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
270 	SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
271 	SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
272 
273 	close_netns(nstoken);
274 
275 	/** setup in 'dst' namespace */
276 	nstoken = open_netns(NS_DST);
277 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
278 		goto fail;
279 
280 	SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
281 	SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
282 	SYS(fail, "ip link set dev dst up");
283 
284 	SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
285 	SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
286 	SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
287 
288 	if (result->dev_mode == MODE_VETH) {
289 		SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
290 		SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
291 	}
292 
293 	close_netns(nstoken);
294 
295 	return 0;
296 fail:
297 	if (nstoken)
298 		close_netns(nstoken);
299 	return -1;
300 }
301 
qdisc_clsact_create(struct bpf_tc_hook * qdisc_hook,int ifindex)302 static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
303 {
304 	char err_str[128], ifname[16];
305 	int err;
306 
307 	qdisc_hook->ifindex = ifindex;
308 	qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
309 	err = bpf_tc_hook_create(qdisc_hook);
310 	snprintf(err_str, sizeof(err_str),
311 		 "qdisc add dev %s clsact",
312 		 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
313 	err_str[sizeof(err_str) - 1] = 0;
314 	ASSERT_OK(err, err_str);
315 
316 	return err;
317 }
318 
xgress_filter_add(struct bpf_tc_hook * qdisc_hook,enum bpf_tc_attach_point xgress,const struct bpf_program * prog,int priority)319 static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
320 			     enum bpf_tc_attach_point xgress,
321 			     const struct bpf_program *prog, int priority)
322 {
323 	LIBBPF_OPTS(bpf_tc_opts, tc_attach);
324 	char err_str[128], ifname[16];
325 	int err;
326 
327 	qdisc_hook->attach_point = xgress;
328 	tc_attach.prog_fd = bpf_program__fd(prog);
329 	tc_attach.priority = priority;
330 	err = bpf_tc_attach(qdisc_hook, &tc_attach);
331 	snprintf(err_str, sizeof(err_str),
332 		 "filter add dev %s %s prio %d bpf da %s",
333 		 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
334 		 xgress == BPF_TC_INGRESS ? "ingress" : "egress",
335 		 priority, bpf_program__name(prog));
336 	err_str[sizeof(err_str) - 1] = 0;
337 	ASSERT_OK(err, err_str);
338 
339 	return err;
340 }
341 
342 #define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({		\
343 	if ((err = qdisc_clsact_create(qdisc_hook, ifindex)))	\
344 		goto fail;					\
345 })
346 
347 #define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({		\
348 	if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority)))	\
349 		goto fail;							\
350 })
351 
netns_load_bpf(const struct bpf_program * src_prog,const struct bpf_program * dst_prog,const struct bpf_program * chk_prog,const struct netns_setup_result * setup_result)352 static int netns_load_bpf(const struct bpf_program *src_prog,
353 			  const struct bpf_program *dst_prog,
354 			  const struct bpf_program *chk_prog,
355 			  const struct netns_setup_result *setup_result)
356 {
357 	LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
358 	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
359 	int err;
360 
361 	/* tc qdisc add dev src_fwd clsact */
362 	QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
363 	/* tc filter add dev src_fwd ingress bpf da src_prog */
364 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
365 	/* tc filter add dev src_fwd egress bpf da chk_prog */
366 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
367 
368 	/* tc qdisc add dev dst_fwd clsact */
369 	QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
370 	/* tc filter add dev dst_fwd ingress bpf da dst_prog */
371 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
372 	/* tc filter add dev dst_fwd egress bpf da chk_prog */
373 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
374 
375 	return 0;
376 fail:
377 	return -1;
378 }
379 
test_tcp(int family,const char * addr,__u16 port)380 static void test_tcp(int family, const char *addr, __u16 port)
381 {
382 	int listen_fd = -1, accept_fd = -1, client_fd = -1;
383 	char buf[] = "testing testing";
384 	int n;
385 	struct nstoken *nstoken;
386 
387 	nstoken = open_netns(NS_DST);
388 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
389 		return;
390 
391 	listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
392 	if (!ASSERT_GE(listen_fd, 0, "listen"))
393 		goto done;
394 
395 	close_netns(nstoken);
396 	nstoken = open_netns(NS_SRC);
397 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
398 		goto done;
399 
400 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
401 	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
402 		goto done;
403 
404 	accept_fd = accept(listen_fd, NULL, NULL);
405 	if (!ASSERT_GE(accept_fd, 0, "accept"))
406 		goto done;
407 
408 	if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
409 		goto done;
410 
411 	n = write(client_fd, buf, sizeof(buf));
412 	if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
413 		goto done;
414 
415 	n = read(accept_fd, buf, sizeof(buf));
416 	ASSERT_EQ(n, sizeof(buf), "recv from server");
417 
418 done:
419 	if (nstoken)
420 		close_netns(nstoken);
421 	if (listen_fd >= 0)
422 		close(listen_fd);
423 	if (accept_fd >= 0)
424 		close(accept_fd);
425 	if (client_fd >= 0)
426 		close(client_fd);
427 }
428 
test_ping(int family,const char * addr)429 static int test_ping(int family, const char *addr)
430 {
431 	SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
432 	return 0;
433 fail:
434 	return -1;
435 }
436 
test_connectivity(void)437 static void test_connectivity(void)
438 {
439 	test_tcp(AF_INET, IP4_DST, IP4_PORT);
440 	test_ping(AF_INET, IP4_DST);
441 	test_tcp(AF_INET6, IP6_DST, IP6_PORT);
442 	test_ping(AF_INET6, IP6_DST);
443 }
444 
set_forwarding(bool enable)445 static int set_forwarding(bool enable)
446 {
447 	int err;
448 
449 	err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
450 	if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
451 		return err;
452 
453 	err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
454 	if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
455 		return err;
456 
457 	return 0;
458 }
459 
rcv_tstamp(int fd,const char * expected,size_t s)460 static void rcv_tstamp(int fd, const char *expected, size_t s)
461 {
462 	struct __kernel_timespec pkt_ts = {};
463 	char ctl[CMSG_SPACE(sizeof(pkt_ts))];
464 	struct timespec now_ts;
465 	struct msghdr msg = {};
466 	__u64 now_ns, pkt_ns;
467 	struct cmsghdr *cmsg;
468 	struct iovec iov;
469 	char data[32];
470 	int ret;
471 
472 	iov.iov_base = data;
473 	iov.iov_len = sizeof(data);
474 	msg.msg_iov = &iov;
475 	msg.msg_iovlen = 1;
476 	msg.msg_control = &ctl;
477 	msg.msg_controllen = sizeof(ctl);
478 
479 	ret = recvmsg(fd, &msg, 0);
480 	if (!ASSERT_EQ(ret, s, "recvmsg"))
481 		return;
482 	ASSERT_STRNEQ(data, expected, s, "expected rcv data");
483 
484 	cmsg = CMSG_FIRSTHDR(&msg);
485 	if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
486 	    cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
487 		memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
488 
489 	pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
490 	ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
491 
492 	ret = clock_gettime(CLOCK_REALTIME, &now_ts);
493 	ASSERT_OK(ret, "clock_gettime");
494 	now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
495 
496 	if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
497 		ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
498 			  "check rcv tstamp");
499 }
500 
snd_tstamp(int fd,char * b,size_t s)501 static void snd_tstamp(int fd, char *b, size_t s)
502 {
503 	struct sock_txtime opt = { .clockid = CLOCK_TAI };
504 	char ctl[CMSG_SPACE(sizeof(__u64))];
505 	struct timespec now_ts;
506 	struct msghdr msg = {};
507 	struct cmsghdr *cmsg;
508 	struct iovec iov;
509 	__u64 now_ns;
510 	int ret;
511 
512 	ret = clock_gettime(CLOCK_TAI, &now_ts);
513 	ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
514 	now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
515 
516 	iov.iov_base = b;
517 	iov.iov_len = s;
518 	msg.msg_iov = &iov;
519 	msg.msg_iovlen = 1;
520 	msg.msg_control = &ctl;
521 	msg.msg_controllen = sizeof(ctl);
522 
523 	cmsg = CMSG_FIRSTHDR(&msg);
524 	cmsg->cmsg_level = SOL_SOCKET;
525 	cmsg->cmsg_type = SCM_TXTIME;
526 	cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
527 	*(__u64 *)CMSG_DATA(cmsg) = now_ns;
528 
529 	ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
530 	ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
531 
532 	ret = sendmsg(fd, &msg, 0);
533 	ASSERT_EQ(ret, s, "sendmsg");
534 }
535 
test_inet_dtime(int family,int type,const char * addr,__u16 port)536 static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
537 {
538 	int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
539 	char buf[] = "testing testing";
540 	struct nstoken *nstoken;
541 
542 	nstoken = open_netns(NS_DST);
543 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
544 		return;
545 	listen_fd = start_server(family, type, addr, port, 0);
546 	close_netns(nstoken);
547 
548 	if (!ASSERT_GE(listen_fd, 0, "listen"))
549 		return;
550 
551 	/* Ensure the kernel puts the (rcv) timestamp for all skb */
552 	err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
553 			 &opt, sizeof(opt));
554 	if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
555 		goto done;
556 
557 	if (type == SOCK_STREAM) {
558 		/* Ensure the kernel set EDT when sending out rst/ack
559 		 * from the kernel's ctl_sk.
560 		 */
561 		err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
562 				 sizeof(opt));
563 		if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
564 			goto done;
565 	}
566 
567 	nstoken = open_netns(NS_SRC);
568 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
569 		goto done;
570 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
571 	close_netns(nstoken);
572 
573 	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
574 		goto done;
575 
576 	if (type == SOCK_STREAM) {
577 		int n;
578 
579 		accept_fd = accept(listen_fd, NULL, NULL);
580 		if (!ASSERT_GE(accept_fd, 0, "accept"))
581 			goto done;
582 
583 		n = write(client_fd, buf, sizeof(buf));
584 		if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
585 			goto done;
586 		rcv_tstamp(accept_fd, buf, sizeof(buf));
587 	} else {
588 		snd_tstamp(client_fd, buf, sizeof(buf));
589 		rcv_tstamp(listen_fd, buf, sizeof(buf));
590 	}
591 
592 done:
593 	close(listen_fd);
594 	if (accept_fd != -1)
595 		close(accept_fd);
596 	if (client_fd != -1)
597 		close(client_fd);
598 }
599 
netns_load_dtime_bpf(struct test_tc_dtime * skel,const struct netns_setup_result * setup_result)600 static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
601 				const struct netns_setup_result *setup_result)
602 {
603 	LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
604 	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
605 	LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
606 	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
607 	struct nstoken *nstoken;
608 	int err;
609 
610 	/* setup ns_src tc progs */
611 	nstoken = open_netns(NS_SRC);
612 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
613 		return -1;
614 	/* tc qdisc add dev src clsact */
615 	QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
616 	/* tc filter add dev src ingress bpf da ingress_host */
617 	XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
618 	/* tc filter add dev src egress bpf da egress_host */
619 	XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
620 	close_netns(nstoken);
621 
622 	/* setup ns_dst tc progs */
623 	nstoken = open_netns(NS_DST);
624 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
625 		return -1;
626 	/* tc qdisc add dev dst clsact */
627 	QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
628 	/* tc filter add dev dst ingress bpf da ingress_host */
629 	XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
630 	/* tc filter add dev dst egress bpf da egress_host */
631 	XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
632 	close_netns(nstoken);
633 
634 	/* setup ns_fwd tc progs */
635 	nstoken = open_netns(NS_FWD);
636 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
637 		return -1;
638 	/* tc qdisc add dev dst_fwd clsact */
639 	QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
640 	/* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
641 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
642 			  skel->progs.ingress_fwdns_prio100, 100);
643 	/* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
644 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
645 			  skel->progs.ingress_fwdns_prio101, 101);
646 	/* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
647 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
648 			  skel->progs.egress_fwdns_prio100, 100);
649 	/* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
650 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
651 			  skel->progs.egress_fwdns_prio101, 101);
652 
653 	/* tc qdisc add dev src_fwd clsact */
654 	QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
655 	/* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
656 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
657 			  skel->progs.ingress_fwdns_prio100, 100);
658 	/* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
659 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
660 			  skel->progs.ingress_fwdns_prio101, 101);
661 	/* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
662 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
663 			  skel->progs.egress_fwdns_prio100, 100);
664 	/* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
665 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
666 			  skel->progs.egress_fwdns_prio101, 101);
667 	close_netns(nstoken);
668 	return 0;
669 
670 fail:
671 	close_netns(nstoken);
672 	return err;
673 }
674 
675 enum {
676 	INGRESS_FWDNS_P100,
677 	INGRESS_FWDNS_P101,
678 	EGRESS_FWDNS_P100,
679 	EGRESS_FWDNS_P101,
680 	INGRESS_ENDHOST,
681 	EGRESS_ENDHOST,
682 	SET_DTIME,
683 	__MAX_CNT,
684 };
685 
686 const char *cnt_names[] = {
687 	"ingress_fwdns_p100",
688 	"ingress_fwdns_p101",
689 	"egress_fwdns_p100",
690 	"egress_fwdns_p101",
691 	"ingress_endhost",
692 	"egress_endhost",
693 	"set_dtime",
694 };
695 
696 enum {
697 	TCP_IP6_CLEAR_DTIME,
698 	TCP_IP4,
699 	TCP_IP6,
700 	UDP_IP4,
701 	UDP_IP6,
702 	TCP_IP4_RT_FWD,
703 	TCP_IP6_RT_FWD,
704 	UDP_IP4_RT_FWD,
705 	UDP_IP6_RT_FWD,
706 	UKN_TEST,
707 	__NR_TESTS,
708 };
709 
710 const char *test_names[] = {
711 	"tcp ip6 clear dtime",
712 	"tcp ip4",
713 	"tcp ip6",
714 	"udp ip4",
715 	"udp ip6",
716 	"tcp ip4 rt fwd",
717 	"tcp ip6 rt fwd",
718 	"udp ip4 rt fwd",
719 	"udp ip6 rt fwd",
720 };
721 
dtime_cnt_str(int test,int cnt)722 static const char *dtime_cnt_str(int test, int cnt)
723 {
724 	static char name[64];
725 
726 	snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
727 
728 	return name;
729 }
730 
dtime_err_str(int test,int cnt)731 static const char *dtime_err_str(int test, int cnt)
732 {
733 	static char name[64];
734 
735 	snprintf(name, sizeof(name), "%s %s errs", test_names[test],
736 		 cnt_names[cnt]);
737 
738 	return name;
739 }
740 
test_tcp_clear_dtime(struct test_tc_dtime * skel)741 static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
742 {
743 	int i, t = TCP_IP6_CLEAR_DTIME;
744 	__u32 *dtimes = skel->bss->dtimes[t];
745 	__u32 *errs = skel->bss->errs[t];
746 
747 	skel->bss->test = t;
748 	test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
749 
750 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
751 		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
752 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
753 		  dtime_cnt_str(t, INGRESS_FWDNS_P101));
754 	ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
755 		  dtime_cnt_str(t, EGRESS_FWDNS_P100));
756 	ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
757 		  dtime_cnt_str(t, EGRESS_FWDNS_P101));
758 	ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
759 		  dtime_cnt_str(t, EGRESS_ENDHOST));
760 	ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
761 		  dtime_cnt_str(t, INGRESS_ENDHOST));
762 
763 	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
764 		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
765 }
766 
test_tcp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)767 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
768 {
769 	__u32 *dtimes, *errs;
770 	const char *addr;
771 	int i, t;
772 
773 	if (family == AF_INET) {
774 		t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
775 		addr = IP4_DST;
776 	} else {
777 		t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
778 		addr = IP6_DST;
779 	}
780 
781 	dtimes = skel->bss->dtimes[t];
782 	errs = skel->bss->errs[t];
783 
784 	skel->bss->test = t;
785 	test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
786 
787 	/* fwdns_prio100 prog does not read delivery_time_type, so
788 	 * kernel puts the (rcv) timetamp in __sk_buff->tstamp
789 	 */
790 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
791 		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
792 	for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
793 		ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
794 
795 	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
796 		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
797 }
798 
test_udp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)799 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
800 {
801 	__u32 *dtimes, *errs;
802 	const char *addr;
803 	int i, t;
804 
805 	if (family == AF_INET) {
806 		t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
807 		addr = IP4_DST;
808 	} else {
809 		t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
810 		addr = IP6_DST;
811 	}
812 
813 	dtimes = skel->bss->dtimes[t];
814 	errs = skel->bss->errs[t];
815 
816 	skel->bss->test = t;
817 	test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
818 
819 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
820 		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
821 	/* non mono delivery time is not forwarded */
822 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
823 		  dtime_cnt_str(t, INGRESS_FWDNS_P101));
824 	for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
825 		ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
826 
827 	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
828 		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
829 }
830 
test_tc_redirect_dtime(struct netns_setup_result * setup_result)831 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
832 {
833 	struct test_tc_dtime *skel;
834 	struct nstoken *nstoken;
835 	int err;
836 
837 	skel = test_tc_dtime__open();
838 	if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
839 		return;
840 
841 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
842 	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
843 
844 	err = test_tc_dtime__load(skel);
845 	if (!ASSERT_OK(err, "test_tc_dtime__load"))
846 		goto done;
847 
848 	if (netns_load_dtime_bpf(skel, setup_result))
849 		goto done;
850 
851 	nstoken = open_netns(NS_FWD);
852 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
853 		goto done;
854 	err = set_forwarding(false);
855 	close_netns(nstoken);
856 	if (!ASSERT_OK(err, "disable forwarding"))
857 		goto done;
858 
859 	test_tcp_clear_dtime(skel);
860 
861 	test_tcp_dtime(skel, AF_INET, true);
862 	test_tcp_dtime(skel, AF_INET6, true);
863 	test_udp_dtime(skel, AF_INET, true);
864 	test_udp_dtime(skel, AF_INET6, true);
865 
866 	/* Test the kernel ip[6]_forward path instead
867 	 * of bpf_redirect_neigh().
868 	 */
869 	nstoken = open_netns(NS_FWD);
870 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
871 		goto done;
872 	err = set_forwarding(true);
873 	close_netns(nstoken);
874 	if (!ASSERT_OK(err, "enable forwarding"))
875 		goto done;
876 
877 	test_tcp_dtime(skel, AF_INET, false);
878 	test_tcp_dtime(skel, AF_INET6, false);
879 	test_udp_dtime(skel, AF_INET, false);
880 	test_udp_dtime(skel, AF_INET6, false);
881 
882 done:
883 	test_tc_dtime__destroy(skel);
884 }
885 
test_tc_redirect_neigh_fib(struct netns_setup_result * setup_result)886 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
887 {
888 	struct nstoken *nstoken = NULL;
889 	struct test_tc_neigh_fib *skel = NULL;
890 
891 	nstoken = open_netns(NS_FWD);
892 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
893 		return;
894 
895 	skel = test_tc_neigh_fib__open();
896 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
897 		goto done;
898 
899 	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
900 		goto done;
901 
902 	if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
903 			   skel->progs.tc_chk, setup_result))
904 		goto done;
905 
906 	/* bpf_fib_lookup() checks if forwarding is enabled */
907 	if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
908 		goto done;
909 
910 	test_connectivity();
911 
912 done:
913 	if (skel)
914 		test_tc_neigh_fib__destroy(skel);
915 	close_netns(nstoken);
916 }
917 
test_tc_redirect_neigh(struct netns_setup_result * setup_result)918 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
919 {
920 	struct nstoken *nstoken = NULL;
921 	struct test_tc_neigh *skel = NULL;
922 	int err;
923 
924 	nstoken = open_netns(NS_FWD);
925 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
926 		return;
927 
928 	skel = test_tc_neigh__open();
929 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
930 		goto done;
931 
932 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
933 	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
934 
935 	err = test_tc_neigh__load(skel);
936 	if (!ASSERT_OK(err, "test_tc_neigh__load"))
937 		goto done;
938 
939 	if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
940 			   skel->progs.tc_chk, setup_result))
941 		goto done;
942 
943 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
944 		goto done;
945 
946 	test_connectivity();
947 
948 done:
949 	if (skel)
950 		test_tc_neigh__destroy(skel);
951 	close_netns(nstoken);
952 }
953 
test_tc_redirect_peer(struct netns_setup_result * setup_result)954 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
955 {
956 	struct nstoken *nstoken;
957 	struct test_tc_peer *skel;
958 	int err;
959 
960 	nstoken = open_netns(NS_FWD);
961 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
962 		return;
963 
964 	skel = test_tc_peer__open();
965 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
966 		goto done;
967 
968 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
969 	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
970 
971 	err = test_tc_peer__load(skel);
972 	if (!ASSERT_OK(err, "test_tc_peer__load"))
973 		goto done;
974 
975 	if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
976 			   skel->progs.tc_chk, setup_result))
977 		goto done;
978 
979 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
980 		goto done;
981 
982 	test_connectivity();
983 
984 done:
985 	if (skel)
986 		test_tc_peer__destroy(skel);
987 	close_netns(nstoken);
988 }
989 
tun_open(char * name)990 static int tun_open(char *name)
991 {
992 	struct ifreq ifr;
993 	int fd, err;
994 
995 	fd = open("/dev/net/tun", O_RDWR);
996 	if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
997 		return -1;
998 
999 	memset(&ifr, 0, sizeof(ifr));
1000 
1001 	ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
1002 	if (*name)
1003 		strncpy(ifr.ifr_name, name, IFNAMSIZ);
1004 
1005 	err = ioctl(fd, TUNSETIFF, &ifr);
1006 	if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
1007 		goto fail;
1008 
1009 	SYS(fail, "ip link set dev %s up", name);
1010 
1011 	return fd;
1012 fail:
1013 	close(fd);
1014 	return -1;
1015 }
1016 
1017 enum {
1018 	SRC_TO_TARGET = 0,
1019 	TARGET_TO_SRC = 1,
1020 };
1021 
tun_relay_loop(int src_fd,int target_fd)1022 static int tun_relay_loop(int src_fd, int target_fd)
1023 {
1024 	fd_set rfds, wfds;
1025 
1026 	FD_ZERO(&rfds);
1027 	FD_ZERO(&wfds);
1028 
1029 	for (;;) {
1030 		char buf[1500];
1031 		int direction, nread, nwrite;
1032 
1033 		FD_SET(src_fd, &rfds);
1034 		FD_SET(target_fd, &rfds);
1035 
1036 		if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
1037 			log_err("select failed");
1038 			return 1;
1039 		}
1040 
1041 		direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
1042 
1043 		nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
1044 		if (nread < 0) {
1045 			log_err("read failed");
1046 			return 1;
1047 		}
1048 
1049 		nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
1050 		if (nwrite != nread) {
1051 			log_err("write failed");
1052 			return 1;
1053 		}
1054 	}
1055 }
1056 
test_tc_redirect_peer_l3(struct netns_setup_result * setup_result)1057 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
1058 {
1059 	LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
1060 	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
1061 	struct test_tc_peer *skel = NULL;
1062 	struct nstoken *nstoken = NULL;
1063 	int err;
1064 	int tunnel_pid = -1;
1065 	int src_fd, target_fd = -1;
1066 	int ifindex;
1067 
1068 	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
1069 	 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
1070 	 * expose the L2 headers encapsulating the IP packet to BPF and hence
1071 	 * don't have skb in suitable state for this test. Alternative to TUN/TAP
1072 	 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
1073 	 * but that requires much more complicated setup.
1074 	 */
1075 	nstoken = open_netns(NS_SRC);
1076 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
1077 		return;
1078 
1079 	src_fd = tun_open("tun_src");
1080 	if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
1081 		goto fail;
1082 
1083 	close_netns(nstoken);
1084 
1085 	nstoken = open_netns(NS_FWD);
1086 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
1087 		goto fail;
1088 
1089 	target_fd = tun_open("tun_fwd");
1090 	if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
1091 		goto fail;
1092 
1093 	tunnel_pid = fork();
1094 	if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
1095 		goto fail;
1096 
1097 	if (tunnel_pid == 0)
1098 		exit(tun_relay_loop(src_fd, target_fd));
1099 
1100 	skel = test_tc_peer__open();
1101 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1102 		goto fail;
1103 
1104 	ifindex = if_nametoindex("tun_fwd");
1105 	if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
1106 		goto fail;
1107 
1108 	skel->rodata->IFINDEX_SRC = ifindex;
1109 	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1110 
1111 	err = test_tc_peer__load(skel);
1112 	if (!ASSERT_OK(err, "test_tc_peer__load"))
1113 		goto fail;
1114 
1115 	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
1116 	 * towards dst, and "tc_dst" to redirect packets
1117 	 * and "tc_chk" on dst_fwd to drop non-redirected packets.
1118 	 */
1119 	/* tc qdisc add dev tun_fwd clsact */
1120 	QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
1121 	/* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
1122 	XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
1123 
1124 	/* tc qdisc add dev dst_fwd clsact */
1125 	QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
1126 	/* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
1127 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
1128 	/* tc filter add dev dst_fwd egress bpf da tc_chk */
1129 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
1130 
1131 	/* Setup route and neigh tables */
1132 	SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
1133 	SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
1134 
1135 	SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
1136 	SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
1137 
1138 	SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
1139 	SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
1140 	    " dev tun_src scope global");
1141 	SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
1142 	SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
1143 	SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
1144 	    " dev tun_src scope global");
1145 	SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
1146 
1147 	SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1148 	SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1149 
1150 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1151 		goto fail;
1152 
1153 	test_connectivity();
1154 
1155 fail:
1156 	if (tunnel_pid > 0) {
1157 		kill(tunnel_pid, SIGTERM);
1158 		waitpid(tunnel_pid, NULL, 0);
1159 	}
1160 	if (src_fd >= 0)
1161 		close(src_fd);
1162 	if (target_fd >= 0)
1163 		close(target_fd);
1164 	if (skel)
1165 		test_tc_peer__destroy(skel);
1166 	if (nstoken)
1167 		close_netns(nstoken);
1168 }
1169 
1170 #define RUN_TEST(name, mode)                                                                \
1171 	({                                                                                  \
1172 		struct netns_setup_result setup_result = { .dev_mode = mode, };             \
1173 		if (test__start_subtest(#name))                                             \
1174 			if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
1175 				if (ASSERT_OK(netns_setup_links_and_routes(&setup_result),  \
1176 					      "setup links and routes"))                    \
1177 					test_ ## name(&setup_result);                       \
1178 				netns_setup_namespaces("delete");                           \
1179 			}                                                                   \
1180 	})
1181 
test_tc_redirect_run_tests(void * arg)1182 static void *test_tc_redirect_run_tests(void *arg)
1183 {
1184 	netns_setup_namespaces_nofail("delete");
1185 
1186 	RUN_TEST(tc_redirect_peer, MODE_VETH);
1187 	RUN_TEST(tc_redirect_peer, MODE_NETKIT);
1188 	RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
1189 	RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
1190 	RUN_TEST(tc_redirect_neigh, MODE_VETH);
1191 	RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
1192 	RUN_TEST(tc_redirect_dtime, MODE_VETH);
1193 	return NULL;
1194 }
1195 
test_tc_redirect(void)1196 void test_tc_redirect(void)
1197 {
1198 	pthread_t test_thread;
1199 	int err;
1200 
1201 	/* Run the tests in their own thread to isolate the namespace changes
1202 	 * so they do not affect the environment of other tests.
1203 	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
1204 	 */
1205 	err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
1206 	if (ASSERT_OK(err, "pthread_create"))
1207 		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
1208 }
1209