1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /*
4 * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5 * between src and dst. The netns fwd has veth links to each src and dst. The
6 * client is in src and server in dst. The test installs a TC BPF program to each
7 * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8 * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9 * switch from ingress side; it also installs a checker prog on the egress side
10 * to drop unexpected traffic.
11 */
12
13 #include <arpa/inet.h>
14 #include <linux/if_tun.h>
15 #include <linux/limits.h>
16 #include <linux/sysctl.h>
17 #include <linux/time_types.h>
18 #include <linux/net_tstamp.h>
19 #include <net/if.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24
25 #include "test_progs.h"
26 #include "network_helpers.h"
27 #include "netlink_helpers.h"
28 #include "test_tc_neigh_fib.skel.h"
29 #include "test_tc_neigh.skel.h"
30 #include "test_tc_peer.skel.h"
31 #include "test_tc_dtime.skel.h"
32
33 #ifndef TCP_TX_DELAY
34 #define TCP_TX_DELAY 37
35 #endif
36
37 #define NS_SRC "ns_src"
38 #define NS_FWD "ns_fwd"
39 #define NS_DST "ns_dst"
40
41 #define IP4_SRC "172.16.1.100"
42 #define IP4_DST "172.16.2.100"
43 #define IP4_TUN_SRC "172.17.1.100"
44 #define IP4_TUN_FWD "172.17.1.200"
45 #define IP4_PORT 9004
46
47 #define IP6_SRC "0::1:dead:beef:cafe"
48 #define IP6_DST "0::2:dead:beef:cafe"
49 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
50 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
51 #define IP6_PORT 9006
52
53 #define IP4_SLL "169.254.0.1"
54 #define IP4_DLL "169.254.0.2"
55 #define IP4_NET "169.254.0.0"
56
57 #define MAC_DST_FWD "00:11:22:33:44:55"
58 #define MAC_DST "00:22:33:44:55:66"
59 #define MAC_SRC_FWD "00:33:44:55:66:77"
60 #define MAC_SRC "00:44:55:66:77:88"
61
62 #define IFADDR_STR_LEN 18
63 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
64
65 #define TIMEOUT_MILLIS 10000
66 #define NSEC_PER_SEC 1000000000ULL
67
68 #define log_err(MSG, ...) \
69 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
70 __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
71
72 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
73 static struct netns_obj *netns_objs[3];
74
write_file(const char * path,const char * newval)75 static int write_file(const char *path, const char *newval)
76 {
77 FILE *f;
78
79 f = fopen(path, "r+");
80 if (!f)
81 return -1;
82 if (fwrite(newval, strlen(newval), 1, f) != 1) {
83 log_err("writing to %s failed", path);
84 fclose(f);
85 return -1;
86 }
87 fclose(f);
88 return 0;
89 }
90
netns_setup_namespaces(const char * verb)91 static int netns_setup_namespaces(const char *verb)
92 {
93 struct netns_obj **ns_obj = netns_objs;
94 const char * const *ns = namespaces;
95
96 while (*ns) {
97 if (strcmp(verb, "add") == 0) {
98 *ns_obj = netns_new(*ns, false);
99 if (!ASSERT_OK_PTR(*ns_obj, "netns_new"))
100 return -1;
101 } else {
102 if (!ASSERT_OK_PTR(*ns_obj, "netns_obj is NULL"))
103 return -1;
104 netns_free(*ns_obj);
105 *ns_obj = NULL;
106 }
107 ns++;
108 ns_obj++;
109 }
110 return 0;
111 }
112
netns_setup_namespaces_nofail(const char * verb)113 static void netns_setup_namespaces_nofail(const char *verb)
114 {
115 struct netns_obj **ns_obj = netns_objs;
116 const char * const *ns = namespaces;
117
118 while (*ns) {
119 if (strcmp(verb, "add") == 0) {
120 *ns_obj = netns_new(*ns, false);
121 } else {
122 if (*ns_obj)
123 netns_free(*ns_obj);
124 *ns_obj = NULL;
125 }
126 ns++;
127 ns_obj++;
128 }
129 }
130
131 enum dev_mode {
132 MODE_VETH,
133 MODE_NETKIT,
134 };
135
136 struct netns_setup_result {
137 enum dev_mode dev_mode;
138 int ifindex_src;
139 int ifindex_src_fwd;
140 int ifindex_dst;
141 int ifindex_dst_fwd;
142 };
143
get_ifaddr(const char * name,char * ifaddr)144 static int get_ifaddr(const char *name, char *ifaddr)
145 {
146 char path[PATH_MAX];
147 FILE *f;
148 int ret;
149
150 snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
151 f = fopen(path, "r");
152 if (!ASSERT_OK_PTR(f, path))
153 return -1;
154
155 ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
156 if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
157 fclose(f);
158 return -1;
159 }
160 fclose(f);
161 return 0;
162 }
163
create_netkit(int mode,char * prim,char * peer)164 static int create_netkit(int mode, char *prim, char *peer)
165 {
166 struct rtattr *linkinfo, *data, *peer_info;
167 struct rtnl_handle rth = { .fd = -1 };
168 const char *type = "netkit";
169 struct {
170 struct nlmsghdr n;
171 struct ifinfomsg i;
172 char buf[1024];
173 } req = {};
174 int err;
175
176 err = rtnl_open(&rth, 0);
177 if (!ASSERT_OK(err, "open_rtnetlink"))
178 return err;
179
180 memset(&req, 0, sizeof(req));
181 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
182 req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
183 req.n.nlmsg_type = RTM_NEWLINK;
184 req.i.ifi_family = AF_UNSPEC;
185
186 addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
187 linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
188 addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
189 data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
190 addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
191 peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
192 req.n.nlmsg_len += sizeof(struct ifinfomsg);
193 addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
194 addattr_nest_end(&req.n, peer_info);
195 addattr_nest_end(&req.n, data);
196 addattr_nest_end(&req.n, linkinfo);
197
198 err = rtnl_talk(&rth, &req.n, NULL);
199 ASSERT_OK(err, "talk_rtnetlink");
200 rtnl_close(&rth);
201 return err;
202 }
203
netns_setup_links_and_routes(struct netns_setup_result * result)204 static int netns_setup_links_and_routes(struct netns_setup_result *result)
205 {
206 struct nstoken *nstoken = NULL;
207 char src_fwd_addr[IFADDR_STR_LEN+1] = {};
208 char src_addr[IFADDR_STR_LEN + 1] = {};
209 int err;
210
211 if (result->dev_mode == MODE_VETH) {
212 SYS(fail, "ip link add src address " MAC_SRC " type veth "
213 "peer name src_fwd address " MAC_SRC_FWD);
214 SYS(fail, "ip link add dst address " MAC_DST " type veth "
215 "peer name dst_fwd address " MAC_DST_FWD);
216 } else if (result->dev_mode == MODE_NETKIT) {
217 err = create_netkit(NETKIT_L3, "src", "src_fwd");
218 if (!ASSERT_OK(err, "create_ifindex_src"))
219 goto fail;
220 err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
221 if (!ASSERT_OK(err, "create_ifindex_dst"))
222 goto fail;
223 }
224
225 if (get_ifaddr("src_fwd", src_fwd_addr))
226 goto fail;
227
228 if (get_ifaddr("src", src_addr))
229 goto fail;
230
231 result->ifindex_src = if_nametoindex("src");
232 if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
233 goto fail;
234
235 result->ifindex_src_fwd = if_nametoindex("src_fwd");
236 if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
237 goto fail;
238
239 result->ifindex_dst = if_nametoindex("dst");
240 if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
241 goto fail;
242
243 result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
244 if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
245 goto fail;
246
247 SYS(fail, "ip link set src netns " NS_SRC);
248 SYS(fail, "ip link set src_fwd netns " NS_FWD);
249 SYS(fail, "ip link set dst_fwd netns " NS_FWD);
250 SYS(fail, "ip link set dst netns " NS_DST);
251
252 /** setup in 'src' namespace */
253 nstoken = open_netns(NS_SRC);
254 if (!ASSERT_OK_PTR(nstoken, "setns src"))
255 goto fail;
256
257 SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
258 SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
259 SYS(fail, "ip link set dev src up");
260
261 SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
262 SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
263 SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
264
265 if (result->dev_mode == MODE_VETH) {
266 SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
267 src_fwd_addr);
268 SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
269 src_fwd_addr);
270 }
271
272 close_netns(nstoken);
273
274 /** setup in 'fwd' namespace */
275 nstoken = open_netns(NS_FWD);
276 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
277 goto fail;
278
279 /* The fwd netns automatically gets a v6 LL address / routes, but also
280 * needs v4 one in order to start ARP probing. IP4_NET route is added
281 * to the endpoints so that the ARP processing will reply.
282 */
283 SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
284 SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
285 SYS(fail, "ip link set dev src_fwd up");
286 SYS(fail, "ip link set dev dst_fwd up");
287
288 SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
289 SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
290 SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
291 SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
292
293 if (result->dev_mode == MODE_VETH) {
294 SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr);
295 SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr);
296 SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST);
297 SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST);
298 }
299
300 close_netns(nstoken);
301
302 /** setup in 'dst' namespace */
303 nstoken = open_netns(NS_DST);
304 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
305 goto fail;
306
307 SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
308 SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
309 SYS(fail, "ip link set dev dst up");
310 SYS(fail, "ip link set dev lo up");
311
312 SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
313 SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
314 SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
315
316 if (result->dev_mode == MODE_VETH) {
317 SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
318 SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
319 }
320
321 close_netns(nstoken);
322
323 return 0;
324 fail:
325 if (nstoken)
326 close_netns(nstoken);
327 return -1;
328 }
329
qdisc_clsact_create(struct bpf_tc_hook * qdisc_hook,int ifindex)330 static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
331 {
332 char err_str[128], ifname[16];
333 int err;
334
335 qdisc_hook->ifindex = ifindex;
336 qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
337 err = bpf_tc_hook_create(qdisc_hook);
338 snprintf(err_str, sizeof(err_str),
339 "qdisc add dev %s clsact",
340 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
341 err_str[sizeof(err_str) - 1] = 0;
342 ASSERT_OK(err, err_str);
343
344 return err;
345 }
346
xgress_filter_add(struct bpf_tc_hook * qdisc_hook,enum bpf_tc_attach_point xgress,const struct bpf_program * prog,int priority)347 static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
348 enum bpf_tc_attach_point xgress,
349 const struct bpf_program *prog, int priority)
350 {
351 LIBBPF_OPTS(bpf_tc_opts, tc_attach);
352 char err_str[128], ifname[16];
353 int err;
354
355 qdisc_hook->attach_point = xgress;
356 tc_attach.prog_fd = bpf_program__fd(prog);
357 tc_attach.priority = priority;
358 err = bpf_tc_attach(qdisc_hook, &tc_attach);
359 snprintf(err_str, sizeof(err_str),
360 "filter add dev %s %s prio %d bpf da %s",
361 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
362 xgress == BPF_TC_INGRESS ? "ingress" : "egress",
363 priority, bpf_program__name(prog));
364 err_str[sizeof(err_str) - 1] = 0;
365 ASSERT_OK(err, err_str);
366
367 return err;
368 }
369
370 #define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \
371 if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \
372 goto fail; \
373 })
374
375 #define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \
376 if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \
377 goto fail; \
378 })
379
netns_load_bpf(const struct bpf_program * src_prog,const struct bpf_program * dst_prog,const struct bpf_program * chk_prog,const struct netns_setup_result * setup_result)380 static int netns_load_bpf(const struct bpf_program *src_prog,
381 const struct bpf_program *dst_prog,
382 const struct bpf_program *chk_prog,
383 const struct netns_setup_result *setup_result)
384 {
385 LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
386 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
387 int err;
388
389 /* tc qdisc add dev src_fwd clsact */
390 QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
391 /* tc filter add dev src_fwd ingress bpf da src_prog */
392 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
393 /* tc filter add dev src_fwd egress bpf da chk_prog */
394 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
395
396 /* tc qdisc add dev dst_fwd clsact */
397 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
398 /* tc filter add dev dst_fwd ingress bpf da dst_prog */
399 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
400 /* tc filter add dev dst_fwd egress bpf da chk_prog */
401 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
402
403 return 0;
404 fail:
405 return -1;
406 }
407
test_tcp(int family,const char * addr,__u16 port)408 static void test_tcp(int family, const char *addr, __u16 port)
409 {
410 int listen_fd = -1, accept_fd = -1, client_fd = -1;
411 char buf[] = "testing testing";
412 int n;
413 struct nstoken *nstoken;
414
415 nstoken = open_netns(NS_DST);
416 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
417 return;
418
419 listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
420 if (!ASSERT_GE(listen_fd, 0, "listen"))
421 goto done;
422
423 close_netns(nstoken);
424 nstoken = open_netns(NS_SRC);
425 if (!ASSERT_OK_PTR(nstoken, "setns src"))
426 goto done;
427
428 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
429 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
430 goto done;
431
432 accept_fd = accept(listen_fd, NULL, NULL);
433 if (!ASSERT_GE(accept_fd, 0, "accept"))
434 goto done;
435
436 if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
437 goto done;
438
439 n = write(client_fd, buf, sizeof(buf));
440 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
441 goto done;
442
443 n = read(accept_fd, buf, sizeof(buf));
444 ASSERT_EQ(n, sizeof(buf), "recv from server");
445
446 done:
447 if (nstoken)
448 close_netns(nstoken);
449 if (listen_fd >= 0)
450 close(listen_fd);
451 if (accept_fd >= 0)
452 close(accept_fd);
453 if (client_fd >= 0)
454 close(client_fd);
455 }
456
test_ping(int family,const char * addr)457 static int test_ping(int family, const char *addr)
458 {
459 SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
460 return 0;
461 fail:
462 return -1;
463 }
464
test_connectivity(void)465 static void test_connectivity(void)
466 {
467 test_tcp(AF_INET, IP4_DST, IP4_PORT);
468 test_ping(AF_INET, IP4_DST);
469 test_tcp(AF_INET6, IP6_DST, IP6_PORT);
470 test_ping(AF_INET6, IP6_DST);
471 }
472
set_forwarding(bool enable)473 static int set_forwarding(bool enable)
474 {
475 int err;
476
477 err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
478 if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
479 return err;
480
481 err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
482 if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
483 return err;
484
485 return 0;
486 }
487
__rcv_tstamp(int fd,const char * expected,size_t s,__u64 * tstamp)488 static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp)
489 {
490 struct timespec pkt_ts = {};
491 char ctl[CMSG_SPACE(sizeof(pkt_ts))];
492 struct timespec now_ts;
493 struct msghdr msg = {};
494 __u64 now_ns, pkt_ns;
495 struct cmsghdr *cmsg;
496 struct iovec iov;
497 char data[32];
498 int ret;
499
500 iov.iov_base = data;
501 iov.iov_len = sizeof(data);
502 msg.msg_iov = &iov;
503 msg.msg_iovlen = 1;
504 msg.msg_control = &ctl;
505 msg.msg_controllen = sizeof(ctl);
506
507 ret = recvmsg(fd, &msg, 0);
508 if (!ASSERT_EQ(ret, s, "recvmsg"))
509 return -1;
510 ASSERT_STRNEQ(data, expected, s, "expected rcv data");
511
512 cmsg = CMSG_FIRSTHDR(&msg);
513 if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
514 cmsg->cmsg_type == SO_TIMESTAMPNS)
515 memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
516
517 pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
518 if (tstamp) {
519 /* caller will check the tstamp itself */
520 *tstamp = pkt_ns;
521 return 0;
522 }
523
524 ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
525
526 ret = clock_gettime(CLOCK_REALTIME, &now_ts);
527 ASSERT_OK(ret, "clock_gettime");
528 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
529
530 if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
531 ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
532 "check rcv tstamp");
533 return 0;
534 }
535
rcv_tstamp(int fd,const char * expected,size_t s)536 static void rcv_tstamp(int fd, const char *expected, size_t s)
537 {
538 __rcv_tstamp(fd, expected, s, NULL);
539 }
540
wait_netstamp_needed_key(void)541 static int wait_netstamp_needed_key(void)
542 {
543 int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n;
544 char buf[] = "testing testing";
545 struct nstoken *nstoken;
546 __u64 tstamp = 0;
547
548 nstoken = open_netns(NS_DST);
549 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
550 return -1;
551
552 srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
553 if (!ASSERT_GE(srv_fd, 0, "start_server"))
554 goto done;
555
556 err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS,
557 &opt, sizeof(opt));
558 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS)"))
559 goto done;
560
561 cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS);
562 if (!ASSERT_GE(cli_fd, 0, "connect_to_fd"))
563 goto done;
564
565 again:
566 n = write(cli_fd, buf, sizeof(buf));
567 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
568 goto done;
569 err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp);
570 if (!ASSERT_OK(err, "__rcv_tstamp"))
571 goto done;
572 if (!tstamp && nretries++ < 5) {
573 sleep(1);
574 printf("netstamp_needed_key retry#%d\n", nretries);
575 goto again;
576 }
577
578 done:
579 if (!tstamp && srv_fd != -1) {
580 close(srv_fd);
581 srv_fd = -1;
582 }
583 if (cli_fd != -1)
584 close(cli_fd);
585 close_netns(nstoken);
586 return srv_fd;
587 }
588
snd_tstamp(int fd,char * b,size_t s)589 static void snd_tstamp(int fd, char *b, size_t s)
590 {
591 struct sock_txtime opt = { .clockid = CLOCK_TAI };
592 char ctl[CMSG_SPACE(sizeof(__u64))];
593 struct timespec now_ts;
594 struct msghdr msg = {};
595 struct cmsghdr *cmsg;
596 struct iovec iov;
597 __u64 now_ns;
598 int ret;
599
600 ret = clock_gettime(CLOCK_TAI, &now_ts);
601 ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
602 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
603
604 iov.iov_base = b;
605 iov.iov_len = s;
606 msg.msg_iov = &iov;
607 msg.msg_iovlen = 1;
608 msg.msg_control = &ctl;
609 msg.msg_controllen = sizeof(ctl);
610
611 cmsg = CMSG_FIRSTHDR(&msg);
612 cmsg->cmsg_level = SOL_SOCKET;
613 cmsg->cmsg_type = SCM_TXTIME;
614 cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
615 *(__u64 *)CMSG_DATA(cmsg) = now_ns;
616
617 ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
618 ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
619
620 ret = sendmsg(fd, &msg, 0);
621 ASSERT_EQ(ret, s, "sendmsg");
622 }
623
test_inet_dtime(int family,int type,const char * addr,__u16 port)624 static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
625 {
626 int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
627 char buf[] = "testing testing";
628 struct nstoken *nstoken;
629
630 nstoken = open_netns(NS_DST);
631 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
632 return;
633 listen_fd = start_server(family, type, addr, port, 0);
634 close_netns(nstoken);
635
636 if (!ASSERT_GE(listen_fd, 0, "listen"))
637 return;
638
639 /* Ensure the kernel puts the (rcv) timestamp for all skb */
640 err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS,
641 &opt, sizeof(opt));
642 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS)"))
643 goto done;
644
645 if (type == SOCK_STREAM) {
646 /* Ensure the kernel set EDT when sending out rst/ack
647 * from the kernel's ctl_sk.
648 */
649 err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
650 sizeof(opt));
651 if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
652 goto done;
653 }
654
655 nstoken = open_netns(NS_SRC);
656 if (!ASSERT_OK_PTR(nstoken, "setns src"))
657 goto done;
658 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
659 close_netns(nstoken);
660
661 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
662 goto done;
663
664 if (type == SOCK_STREAM) {
665 int n;
666
667 accept_fd = accept(listen_fd, NULL, NULL);
668 if (!ASSERT_GE(accept_fd, 0, "accept"))
669 goto done;
670
671 n = write(client_fd, buf, sizeof(buf));
672 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
673 goto done;
674 rcv_tstamp(accept_fd, buf, sizeof(buf));
675 } else {
676 snd_tstamp(client_fd, buf, sizeof(buf));
677 rcv_tstamp(listen_fd, buf, sizeof(buf));
678 }
679
680 done:
681 close(listen_fd);
682 if (accept_fd != -1)
683 close(accept_fd);
684 if (client_fd != -1)
685 close(client_fd);
686 }
687
netns_load_dtime_bpf(struct test_tc_dtime * skel,const struct netns_setup_result * setup_result)688 static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
689 const struct netns_setup_result *setup_result)
690 {
691 LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
692 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
693 LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
694 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
695 struct nstoken *nstoken;
696 int err;
697
698 /* setup ns_src tc progs */
699 nstoken = open_netns(NS_SRC);
700 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
701 return -1;
702 /* tc qdisc add dev src clsact */
703 QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
704 /* tc filter add dev src ingress bpf da ingress_host */
705 XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
706 /* tc filter add dev src egress bpf da egress_host */
707 XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
708 close_netns(nstoken);
709
710 /* setup ns_dst tc progs */
711 nstoken = open_netns(NS_DST);
712 if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
713 return -1;
714 /* tc qdisc add dev dst clsact */
715 QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
716 /* tc filter add dev dst ingress bpf da ingress_host */
717 XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
718 /* tc filter add dev dst egress bpf da egress_host */
719 XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
720 close_netns(nstoken);
721
722 /* setup ns_fwd tc progs */
723 nstoken = open_netns(NS_FWD);
724 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
725 return -1;
726 /* tc qdisc add dev dst_fwd clsact */
727 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
728 /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
729 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
730 skel->progs.ingress_fwdns_prio100, 100);
731 /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
732 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
733 skel->progs.ingress_fwdns_prio101, 101);
734 /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
735 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
736 skel->progs.egress_fwdns_prio100, 100);
737 /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
738 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
739 skel->progs.egress_fwdns_prio101, 101);
740
741 /* tc qdisc add dev src_fwd clsact */
742 QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
743 /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
744 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
745 skel->progs.ingress_fwdns_prio100, 100);
746 /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
747 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
748 skel->progs.ingress_fwdns_prio101, 101);
749 /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
750 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
751 skel->progs.egress_fwdns_prio100, 100);
752 /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
753 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
754 skel->progs.egress_fwdns_prio101, 101);
755 close_netns(nstoken);
756 return 0;
757
758 fail:
759 close_netns(nstoken);
760 return err;
761 }
762
763 enum {
764 INGRESS_FWDNS_P100,
765 INGRESS_FWDNS_P101,
766 EGRESS_FWDNS_P100,
767 EGRESS_FWDNS_P101,
768 INGRESS_ENDHOST,
769 EGRESS_ENDHOST,
770 SET_DTIME,
771 __MAX_CNT,
772 };
773
774 const char *cnt_names[] = {
775 "ingress_fwdns_p100",
776 "ingress_fwdns_p101",
777 "egress_fwdns_p100",
778 "egress_fwdns_p101",
779 "ingress_endhost",
780 "egress_endhost",
781 "set_dtime",
782 };
783
784 enum {
785 TCP_IP6_CLEAR_DTIME,
786 TCP_IP4,
787 TCP_IP6,
788 UDP_IP4,
789 UDP_IP6,
790 TCP_IP4_RT_FWD,
791 TCP_IP6_RT_FWD,
792 UDP_IP4_RT_FWD,
793 UDP_IP6_RT_FWD,
794 UKN_TEST,
795 __NR_TESTS,
796 };
797
798 const char *test_names[] = {
799 "tcp ip6 clear dtime",
800 "tcp ip4",
801 "tcp ip6",
802 "udp ip4",
803 "udp ip6",
804 "tcp ip4 rt fwd",
805 "tcp ip6 rt fwd",
806 "udp ip4 rt fwd",
807 "udp ip6 rt fwd",
808 };
809
dtime_cnt_str(int test,int cnt)810 static const char *dtime_cnt_str(int test, int cnt)
811 {
812 static char name[64];
813
814 snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
815
816 return name;
817 }
818
dtime_err_str(int test,int cnt)819 static const char *dtime_err_str(int test, int cnt)
820 {
821 static char name[64];
822
823 snprintf(name, sizeof(name), "%s %s errs", test_names[test],
824 cnt_names[cnt]);
825
826 return name;
827 }
828
test_tcp_clear_dtime(struct test_tc_dtime * skel)829 static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
830 {
831 int i, t = TCP_IP6_CLEAR_DTIME;
832 __u32 *dtimes = skel->bss->dtimes[t];
833 __u32 *errs = skel->bss->errs[t];
834
835 skel->bss->test = t;
836 test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
837
838 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
839 dtime_cnt_str(t, INGRESS_FWDNS_P100));
840 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
841 dtime_cnt_str(t, INGRESS_FWDNS_P101));
842 ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
843 dtime_cnt_str(t, EGRESS_FWDNS_P100));
844 ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
845 dtime_cnt_str(t, EGRESS_FWDNS_P101));
846 ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
847 dtime_cnt_str(t, EGRESS_ENDHOST));
848 ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
849 dtime_cnt_str(t, INGRESS_ENDHOST));
850
851 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
852 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
853 }
854
test_tcp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)855 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
856 {
857 __u32 *dtimes, *errs;
858 const char *addr;
859 int i, t;
860
861 if (family == AF_INET) {
862 t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
863 addr = IP4_DST;
864 } else {
865 t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
866 addr = IP6_DST;
867 }
868
869 dtimes = skel->bss->dtimes[t];
870 errs = skel->bss->errs[t];
871
872 skel->bss->test = t;
873 test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
874
875 /* fwdns_prio100 prog does not read delivery_time_type, so
876 * kernel puts the (rcv) timestamp in __sk_buff->tstamp
877 */
878 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
879 dtime_cnt_str(t, INGRESS_FWDNS_P100));
880 for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
881 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
882
883 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
884 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
885 }
886
test_udp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)887 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
888 {
889 __u32 *dtimes, *errs;
890 const char *addr;
891 int i, t;
892
893 if (family == AF_INET) {
894 t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
895 addr = IP4_DST;
896 } else {
897 t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
898 addr = IP6_DST;
899 }
900
901 dtimes = skel->bss->dtimes[t];
902 errs = skel->bss->errs[t];
903
904 skel->bss->test = t;
905 test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
906
907 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
908 dtime_cnt_str(t, INGRESS_FWDNS_P100));
909 for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
910 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
911
912 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
913 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
914 }
915
test_tc_redirect_dtime(struct netns_setup_result * setup_result)916 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
917 {
918 struct test_tc_dtime *skel;
919 struct nstoken *nstoken;
920 int hold_tstamp_fd, err;
921
922 /* Hold a sk with the SOCK_TIMESTAMP set to ensure there
923 * is no delay in the kernel net_enable_timestamp().
924 * This ensures the following tests must have
925 * non zero rcv tstamp in the recvmsg().
926 */
927 hold_tstamp_fd = wait_netstamp_needed_key();
928 if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key"))
929 return;
930
931 skel = test_tc_dtime__open();
932 if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
933 goto done;
934
935 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
936 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
937
938 err = test_tc_dtime__load(skel);
939 if (!ASSERT_OK(err, "test_tc_dtime__load"))
940 goto done;
941
942 if (netns_load_dtime_bpf(skel, setup_result))
943 goto done;
944
945 nstoken = open_netns(NS_FWD);
946 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
947 goto done;
948 err = set_forwarding(false);
949 close_netns(nstoken);
950 if (!ASSERT_OK(err, "disable forwarding"))
951 goto done;
952
953 test_tcp_clear_dtime(skel);
954
955 test_tcp_dtime(skel, AF_INET, true);
956 test_tcp_dtime(skel, AF_INET6, true);
957 test_udp_dtime(skel, AF_INET, true);
958 test_udp_dtime(skel, AF_INET6, true);
959
960 /* Test the kernel ip[6]_forward path instead
961 * of bpf_redirect_neigh().
962 */
963 nstoken = open_netns(NS_FWD);
964 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
965 goto done;
966 err = set_forwarding(true);
967 close_netns(nstoken);
968 if (!ASSERT_OK(err, "enable forwarding"))
969 goto done;
970
971 test_tcp_dtime(skel, AF_INET, false);
972 test_tcp_dtime(skel, AF_INET6, false);
973 test_udp_dtime(skel, AF_INET, false);
974 test_udp_dtime(skel, AF_INET6, false);
975
976 done:
977 test_tc_dtime__destroy(skel);
978 close(hold_tstamp_fd);
979 }
980
test_tc_redirect_neigh_fib(struct netns_setup_result * setup_result)981 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
982 {
983 struct nstoken *nstoken = NULL;
984 struct test_tc_neigh_fib *skel = NULL;
985
986 nstoken = open_netns(NS_FWD);
987 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
988 return;
989
990 skel = test_tc_neigh_fib__open();
991 if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
992 goto done;
993
994 if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
995 goto done;
996
997 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
998 skel->progs.tc_chk, setup_result))
999 goto done;
1000
1001 /* bpf_fib_lookup() checks if forwarding is enabled */
1002 if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
1003 goto done;
1004
1005 test_connectivity();
1006
1007 done:
1008 if (skel)
1009 test_tc_neigh_fib__destroy(skel);
1010 close_netns(nstoken);
1011 }
1012
test_tc_redirect_neigh(struct netns_setup_result * setup_result)1013 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
1014 {
1015 struct nstoken *nstoken = NULL;
1016 struct test_tc_neigh *skel = NULL;
1017 int err;
1018
1019 nstoken = open_netns(NS_FWD);
1020 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
1021 return;
1022
1023 skel = test_tc_neigh__open();
1024 if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
1025 goto done;
1026
1027 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
1028 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1029
1030 err = test_tc_neigh__load(skel);
1031 if (!ASSERT_OK(err, "test_tc_neigh__load"))
1032 goto done;
1033
1034 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
1035 skel->progs.tc_chk, setup_result))
1036 goto done;
1037
1038 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1039 goto done;
1040
1041 test_connectivity();
1042
1043 done:
1044 if (skel)
1045 test_tc_neigh__destroy(skel);
1046 close_netns(nstoken);
1047 }
1048
test_tc_redirect_peer(struct netns_setup_result * setup_result)1049 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
1050 {
1051 struct nstoken *nstoken;
1052 struct test_tc_peer *skel;
1053 int err;
1054
1055 nstoken = open_netns(NS_FWD);
1056 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
1057 return;
1058
1059 skel = test_tc_peer__open();
1060 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1061 goto done;
1062
1063 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
1064 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1065
1066 err = test_tc_peer__load(skel);
1067 if (!ASSERT_OK(err, "test_tc_peer__load"))
1068 goto done;
1069
1070 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
1071 skel->progs.tc_chk, setup_result))
1072 goto done;
1073
1074 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1075 goto done;
1076
1077 test_connectivity();
1078
1079 done:
1080 if (skel)
1081 test_tc_peer__destroy(skel);
1082 close_netns(nstoken);
1083 }
1084
tun_open(char * name)1085 static int tun_open(char *name)
1086 {
1087 struct ifreq ifr;
1088 int fd, err;
1089
1090 fd = open("/dev/net/tun", O_RDWR);
1091 if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
1092 return -1;
1093
1094 memset(&ifr, 0, sizeof(ifr));
1095
1096 ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
1097 if (*name)
1098 strncpy(ifr.ifr_name, name, IFNAMSIZ);
1099
1100 err = ioctl(fd, TUNSETIFF, &ifr);
1101 if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
1102 goto fail;
1103
1104 SYS(fail, "ip link set dev %s up", name);
1105
1106 return fd;
1107 fail:
1108 close(fd);
1109 return -1;
1110 }
1111
1112 enum {
1113 SRC_TO_TARGET = 0,
1114 TARGET_TO_SRC = 1,
1115 };
1116
tun_relay_loop(int src_fd,int target_fd)1117 static int tun_relay_loop(int src_fd, int target_fd)
1118 {
1119 fd_set rfds, wfds;
1120
1121 FD_ZERO(&rfds);
1122 FD_ZERO(&wfds);
1123
1124 for (;;) {
1125 char buf[1500];
1126 int direction, nread, nwrite;
1127
1128 FD_SET(src_fd, &rfds);
1129 FD_SET(target_fd, &rfds);
1130
1131 if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
1132 log_err("select failed");
1133 return 1;
1134 }
1135
1136 direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
1137
1138 nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
1139 if (nread < 0) {
1140 log_err("read failed");
1141 return 1;
1142 }
1143
1144 nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
1145 if (nwrite != nread) {
1146 log_err("write failed");
1147 return 1;
1148 }
1149 }
1150 }
1151
test_tc_redirect_peer_l3(struct netns_setup_result * setup_result)1152 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
1153 {
1154 LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
1155 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
1156 struct test_tc_peer *skel = NULL;
1157 struct nstoken *nstoken = NULL;
1158 int err;
1159 int tunnel_pid = -1;
1160 int src_fd, target_fd = -1;
1161 int ifindex;
1162
1163 /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
1164 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
1165 * expose the L2 headers encapsulating the IP packet to BPF and hence
1166 * don't have skb in suitable state for this test. Alternative to TUN/TAP
1167 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
1168 * but that requires much more complicated setup.
1169 */
1170 nstoken = open_netns(NS_SRC);
1171 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
1172 return;
1173
1174 src_fd = tun_open("tun_src");
1175 if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
1176 goto fail;
1177
1178 close_netns(nstoken);
1179
1180 nstoken = open_netns(NS_FWD);
1181 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
1182 goto fail;
1183
1184 target_fd = tun_open("tun_fwd");
1185 if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
1186 goto fail;
1187
1188 tunnel_pid = fork();
1189 if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
1190 goto fail;
1191
1192 if (tunnel_pid == 0)
1193 exit(tun_relay_loop(src_fd, target_fd));
1194
1195 skel = test_tc_peer__open();
1196 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1197 goto fail;
1198
1199 ifindex = if_nametoindex("tun_fwd");
1200 if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
1201 goto fail;
1202
1203 skel->rodata->IFINDEX_SRC = ifindex;
1204 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1205
1206 err = test_tc_peer__load(skel);
1207 if (!ASSERT_OK(err, "test_tc_peer__load"))
1208 goto fail;
1209
1210 /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
1211 * towards dst, and "tc_dst" to redirect packets
1212 * and "tc_chk" on dst_fwd to drop non-redirected packets.
1213 */
1214 /* tc qdisc add dev tun_fwd clsact */
1215 QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
1216 /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
1217 XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
1218
1219 /* tc qdisc add dev dst_fwd clsact */
1220 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
1221 /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
1222 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
1223 /* tc filter add dev dst_fwd egress bpf da tc_chk */
1224 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
1225
1226 /* Setup route and neigh tables */
1227 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
1228 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
1229
1230 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
1231 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
1232
1233 SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
1234 SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
1235 " dev tun_src scope global");
1236 SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
1237 SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
1238 SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
1239 " dev tun_src scope global");
1240 SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
1241
1242 SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1243 SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1244
1245 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1246 goto fail;
1247
1248 test_connectivity();
1249
1250 fail:
1251 if (tunnel_pid > 0) {
1252 kill(tunnel_pid, SIGTERM);
1253 waitpid(tunnel_pid, NULL, 0);
1254 }
1255 if (src_fd >= 0)
1256 close(src_fd);
1257 if (target_fd >= 0)
1258 close(target_fd);
1259 if (skel)
1260 test_tc_peer__destroy(skel);
1261 if (nstoken)
1262 close_netns(nstoken);
1263 }
1264
1265 #define RUN_TEST(name, mode) \
1266 ({ \
1267 struct netns_setup_result setup_result = { .dev_mode = mode, }; \
1268 if (test__start_subtest(#name)) \
1269 if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
1270 if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
1271 "setup links and routes")) \
1272 test_ ## name(&setup_result); \
1273 netns_setup_namespaces("delete"); \
1274 } \
1275 })
1276
test_tc_redirect_run_tests(void * arg)1277 static void *test_tc_redirect_run_tests(void *arg)
1278 {
1279 netns_setup_namespaces_nofail("delete");
1280
1281 RUN_TEST(tc_redirect_peer, MODE_VETH);
1282 RUN_TEST(tc_redirect_peer, MODE_NETKIT);
1283 RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
1284 RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
1285 RUN_TEST(tc_redirect_neigh, MODE_VETH);
1286 RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
1287 RUN_TEST(tc_redirect_dtime, MODE_VETH);
1288 return NULL;
1289 }
1290
test_tc_redirect(void)1291 void test_tc_redirect(void)
1292 {
1293 pthread_t test_thread;
1294 int err;
1295
1296 /* Run the tests in their own thread to isolate the namespace changes
1297 * so they do not affect the environment of other tests.
1298 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
1299 */
1300 err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
1301 if (ASSERT_OK(err, "pthread_create"))
1302 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
1303 }
1304