1 // SPDX-License-Identifier: GPL-2.0-only
2 #define _GNU_SOURCE
3
4 #include <errno.h>
5 #include <stdbool.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <unistd.h>
9 #include <sched.h>
10
11 #include <arpa/inet.h>
12 #include <sys/mount.h>
13 #include <sys/stat.h>
14 #include <sys/types.h>
15 #include <sys/un.h>
16 #include <sys/eventfd.h>
17
18 #include <linux/err.h>
19 #include <linux/in.h>
20 #include <linux/in6.h>
21 #include <linux/limits.h>
22
23 #include <linux/ip.h>
24 #include <netinet/udp.h>
25 #include <netinet/tcp.h>
26 #include <net/if.h>
27
28 #include "bpf_util.h"
29 #include "network_helpers.h"
30 #include "test_progs.h"
31
32 #ifdef TRAFFIC_MONITOR
33 /* Prevent pcap.h from including pcap/bpf.h and causing conflicts */
34 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
35 #include <pcap/pcap.h>
36 #include <pcap/dlt.h>
37 #endif
38
39 #ifndef IPPROTO_MPTCP
40 #define IPPROTO_MPTCP 262
41 #endif
42
43 #define clean_errno() (errno == 0 ? "None" : strerror(errno))
44 #define log_err(MSG, ...) ({ \
45 int __save = errno; \
46 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
47 __FILE__, __LINE__, clean_errno(), \
48 ##__VA_ARGS__); \
49 errno = __save; \
50 })
51
52 struct ipv4_packet pkt_v4 = {
53 .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
54 .iph.ihl = 5,
55 .iph.protocol = IPPROTO_TCP,
56 .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
57 .tcp.urg_ptr = 123,
58 .tcp.doff = 5,
59 };
60
61 struct ipv6_packet pkt_v6 = {
62 .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
63 .iph.nexthdr = IPPROTO_TCP,
64 .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
65 .tcp.urg_ptr = 123,
66 .tcp.doff = 5,
67 };
68
69 static const struct network_helper_opts default_opts;
70
settimeo(int fd,int timeout_ms)71 int settimeo(int fd, int timeout_ms)
72 {
73 struct timeval timeout = { .tv_sec = 3 };
74
75 if (timeout_ms > 0) {
76 timeout.tv_sec = timeout_ms / 1000;
77 timeout.tv_usec = (timeout_ms % 1000) * 1000;
78 }
79
80 if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
81 sizeof(timeout))) {
82 log_err("Failed to set SO_RCVTIMEO");
83 return -1;
84 }
85
86 if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
87 sizeof(timeout))) {
88 log_err("Failed to set SO_SNDTIMEO");
89 return -1;
90 }
91
92 return 0;
93 }
94
95 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
96
start_server_addr(int type,const struct sockaddr_storage * addr,socklen_t addrlen,const struct network_helper_opts * opts)97 int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
98 const struct network_helper_opts *opts)
99 {
100 int fd;
101
102 if (!opts)
103 opts = &default_opts;
104
105 fd = socket(addr->ss_family, type, opts->proto);
106 if (fd < 0) {
107 log_err("Failed to create server socket");
108 return -1;
109 }
110
111 if (settimeo(fd, opts->timeout_ms))
112 goto error_close;
113
114 if (opts->post_socket_cb &&
115 opts->post_socket_cb(fd, opts->cb_opts)) {
116 log_err("Failed to call post_socket_cb");
117 goto error_close;
118 }
119
120 if (bind(fd, (struct sockaddr *)addr, addrlen) < 0) {
121 log_err("Failed to bind socket");
122 goto error_close;
123 }
124
125 if (type == SOCK_STREAM) {
126 if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) {
127 log_err("Failed to listed on socket");
128 goto error_close;
129 }
130 }
131
132 return fd;
133
134 error_close:
135 save_errno_close(fd);
136 return -1;
137 }
138
start_server_str(int family,int type,const char * addr_str,__u16 port,const struct network_helper_opts * opts)139 int start_server_str(int family, int type, const char *addr_str, __u16 port,
140 const struct network_helper_opts *opts)
141 {
142 struct sockaddr_storage addr;
143 socklen_t addrlen;
144
145 if (!opts)
146 opts = &default_opts;
147
148 if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
149 return -1;
150
151 return start_server_addr(type, &addr, addrlen, opts);
152 }
153
start_server(int family,int type,const char * addr_str,__u16 port,int timeout_ms)154 int start_server(int family, int type, const char *addr_str, __u16 port,
155 int timeout_ms)
156 {
157 struct network_helper_opts opts = {
158 .timeout_ms = timeout_ms,
159 };
160
161 return start_server_str(family, type, addr_str, port, &opts);
162 }
163
reuseport_cb(int fd,void * opts)164 static int reuseport_cb(int fd, void *opts)
165 {
166 int on = 1;
167
168 return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on));
169 }
170
start_reuseport_server(int family,int type,const char * addr_str,__u16 port,int timeout_ms,unsigned int nr_listens)171 int *start_reuseport_server(int family, int type, const char *addr_str,
172 __u16 port, int timeout_ms, unsigned int nr_listens)
173 {
174 struct network_helper_opts opts = {
175 .timeout_ms = timeout_ms,
176 .post_socket_cb = reuseport_cb,
177 };
178 struct sockaddr_storage addr;
179 unsigned int nr_fds = 0;
180 socklen_t addrlen;
181 int *fds;
182
183 if (!nr_listens)
184 return NULL;
185
186 if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
187 return NULL;
188
189 fds = malloc(sizeof(*fds) * nr_listens);
190 if (!fds)
191 return NULL;
192
193 fds[0] = start_server_addr(type, &addr, addrlen, &opts);
194 if (fds[0] == -1)
195 goto close_fds;
196 nr_fds = 1;
197
198 if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
199 goto close_fds;
200
201 for (; nr_fds < nr_listens; nr_fds++) {
202 fds[nr_fds] = start_server_addr(type, &addr, addrlen, &opts);
203 if (fds[nr_fds] == -1)
204 goto close_fds;
205 }
206
207 return fds;
208
209 close_fds:
210 free_fds(fds, nr_fds);
211 return NULL;
212 }
213
free_fds(int * fds,unsigned int nr_close_fds)214 void free_fds(int *fds, unsigned int nr_close_fds)
215 {
216 if (fds) {
217 while (nr_close_fds)
218 close(fds[--nr_close_fds]);
219 free(fds);
220 }
221 }
222
fastopen_connect(int server_fd,const char * data,unsigned int data_len,int timeout_ms)223 int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
224 int timeout_ms)
225 {
226 struct sockaddr_storage addr;
227 socklen_t addrlen = sizeof(addr);
228 struct sockaddr_in *addr_in;
229 int fd, ret;
230
231 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
232 log_err("Failed to get server addr");
233 return -1;
234 }
235
236 addr_in = (struct sockaddr_in *)&addr;
237 fd = socket(addr_in->sin_family, SOCK_STREAM, 0);
238 if (fd < 0) {
239 log_err("Failed to create client socket");
240 return -1;
241 }
242
243 if (settimeo(fd, timeout_ms))
244 goto error_close;
245
246 ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr,
247 addrlen);
248 if (ret != data_len) {
249 log_err("sendto(data, %u) != %d\n", data_len, ret);
250 goto error_close;
251 }
252
253 return fd;
254
255 error_close:
256 save_errno_close(fd);
257 return -1;
258 }
259
client_socket(int family,int type,const struct network_helper_opts * opts)260 int client_socket(int family, int type,
261 const struct network_helper_opts *opts)
262 {
263 int fd;
264
265 if (!opts)
266 opts = &default_opts;
267
268 fd = socket(family, type, opts->proto);
269 if (fd < 0) {
270 log_err("Failed to create client socket");
271 return -1;
272 }
273
274 if (settimeo(fd, opts->timeout_ms))
275 goto error_close;
276
277 if (opts->post_socket_cb &&
278 opts->post_socket_cb(fd, opts->cb_opts))
279 goto error_close;
280
281 return fd;
282
283 error_close:
284 save_errno_close(fd);
285 return -1;
286 }
287
connect_to_addr(int type,const struct sockaddr_storage * addr,socklen_t addrlen,const struct network_helper_opts * opts)288 int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
289 const struct network_helper_opts *opts)
290 {
291 int fd;
292
293 if (!opts)
294 opts = &default_opts;
295
296 fd = client_socket(addr->ss_family, type, opts);
297 if (fd < 0) {
298 log_err("Failed to create client socket");
299 return -1;
300 }
301
302 if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
303 log_err("Failed to connect to server");
304 save_errno_close(fd);
305 return -1;
306 }
307
308 return fd;
309 }
310
connect_to_addr_str(int family,int type,const char * addr_str,__u16 port,const struct network_helper_opts * opts)311 int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port,
312 const struct network_helper_opts *opts)
313 {
314 struct sockaddr_storage addr;
315 socklen_t addrlen;
316
317 if (!opts)
318 opts = &default_opts;
319
320 if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
321 return -1;
322
323 return connect_to_addr(type, &addr, addrlen, opts);
324 }
325
connect_to_fd_opts(int server_fd,const struct network_helper_opts * opts)326 int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
327 {
328 struct sockaddr_storage addr;
329 socklen_t addrlen, optlen;
330 int type;
331
332 if (!opts)
333 opts = &default_opts;
334
335 optlen = sizeof(type);
336 if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
337 log_err("getsockopt(SOL_TYPE)");
338 return -1;
339 }
340
341 addrlen = sizeof(addr);
342 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
343 log_err("Failed to get server addr");
344 return -1;
345 }
346
347 return connect_to_addr(type, &addr, addrlen, opts);
348 }
349
connect_to_fd(int server_fd,int timeout_ms)350 int connect_to_fd(int server_fd, int timeout_ms)
351 {
352 struct network_helper_opts opts = {
353 .timeout_ms = timeout_ms,
354 };
355 socklen_t optlen;
356 int protocol;
357
358 optlen = sizeof(protocol);
359 if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
360 log_err("getsockopt(SOL_PROTOCOL)");
361 return -1;
362 }
363 opts.proto = protocol;
364
365 return connect_to_fd_opts(server_fd, &opts);
366 }
367
connect_fd_to_fd(int client_fd,int server_fd,int timeout_ms)368 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
369 {
370 struct sockaddr_storage addr;
371 socklen_t len = sizeof(addr);
372
373 if (settimeo(client_fd, timeout_ms))
374 return -1;
375
376 if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
377 log_err("Failed to get server addr");
378 return -1;
379 }
380
381 if (connect(client_fd, (const struct sockaddr *)&addr, len)) {
382 log_err("Failed to connect to server");
383 return -1;
384 }
385
386 return 0;
387 }
388
make_sockaddr(int family,const char * addr_str,__u16 port,struct sockaddr_storage * addr,socklen_t * len)389 int make_sockaddr(int family, const char *addr_str, __u16 port,
390 struct sockaddr_storage *addr, socklen_t *len)
391 {
392 if (family == AF_INET) {
393 struct sockaddr_in *sin = (void *)addr;
394
395 memset(addr, 0, sizeof(*sin));
396 sin->sin_family = AF_INET;
397 sin->sin_port = htons(port);
398 if (addr_str &&
399 inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
400 log_err("inet_pton(AF_INET, %s)", addr_str);
401 return -1;
402 }
403 if (len)
404 *len = sizeof(*sin);
405 return 0;
406 } else if (family == AF_INET6) {
407 struct sockaddr_in6 *sin6 = (void *)addr;
408
409 memset(addr, 0, sizeof(*sin6));
410 sin6->sin6_family = AF_INET6;
411 sin6->sin6_port = htons(port);
412 if (addr_str &&
413 inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
414 log_err("inet_pton(AF_INET6, %s)", addr_str);
415 return -1;
416 }
417 if (len)
418 *len = sizeof(*sin6);
419 return 0;
420 } else if (family == AF_UNIX) {
421 /* Note that we always use abstract unix sockets to avoid having
422 * to clean up leftover files.
423 */
424 struct sockaddr_un *sun = (void *)addr;
425
426 memset(addr, 0, sizeof(*sun));
427 sun->sun_family = family;
428 sun->sun_path[0] = 0;
429 strcpy(sun->sun_path + 1, addr_str);
430 if (len)
431 *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str);
432 return 0;
433 }
434 return -1;
435 }
436
ping_command(int family)437 char *ping_command(int family)
438 {
439 if (family == AF_INET6) {
440 /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
441 if (!system("which ping6 >/dev/null 2>&1"))
442 return "ping6";
443 else
444 return "ping -6";
445 }
446 return "ping";
447 }
448
append_tid(char * str,size_t sz)449 int append_tid(char *str, size_t sz)
450 {
451 size_t end;
452
453 if (!str)
454 return -1;
455
456 end = strlen(str);
457 if (end + 8 > sz)
458 return -1;
459
460 sprintf(&str[end], "%07d", gettid());
461 str[end + 7] = '\0';
462
463 return 0;
464 }
465
remove_netns(const char * name)466 int remove_netns(const char *name)
467 {
468 char *cmd;
469 int r;
470
471 r = asprintf(&cmd, "ip netns del %s >/dev/null 2>&1", name);
472 if (r < 0) {
473 log_err("Failed to malloc cmd");
474 return -1;
475 }
476
477 r = system(cmd);
478 free(cmd);
479 return r;
480 }
481
make_netns(const char * name)482 int make_netns(const char *name)
483 {
484 char *cmd;
485 int r;
486
487 r = asprintf(&cmd, "ip netns add %s", name);
488 if (r < 0) {
489 log_err("Failed to malloc cmd");
490 return -1;
491 }
492
493 r = system(cmd);
494 free(cmd);
495
496 if (r)
497 return r;
498
499 r = asprintf(&cmd, "ip -n %s link set lo up", name);
500 if (r < 0) {
501 log_err("Failed to malloc cmd for setting up lo");
502 remove_netns(name);
503 return -1;
504 }
505
506 r = system(cmd);
507 free(cmd);
508
509 return r;
510 }
511
512 struct nstoken {
513 int orig_netns_fd;
514 };
515
open_netns(const char * name)516 struct nstoken *open_netns(const char *name)
517 {
518 int nsfd;
519 char nspath[PATH_MAX];
520 int err;
521 struct nstoken *token;
522
523 token = calloc(1, sizeof(struct nstoken));
524 if (!token) {
525 log_err("Failed to malloc token");
526 return NULL;
527 }
528
529 token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
530 if (token->orig_netns_fd == -1) {
531 log_err("Failed to open(/proc/self/ns/net)");
532 goto fail;
533 }
534
535 snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
536 nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
537 if (nsfd == -1) {
538 log_err("Failed to open(%s)", nspath);
539 goto fail;
540 }
541
542 err = setns(nsfd, CLONE_NEWNET);
543 close(nsfd);
544 if (err) {
545 log_err("Failed to setns(nsfd)");
546 goto fail;
547 }
548
549 return token;
550 fail:
551 if (token->orig_netns_fd != -1)
552 close(token->orig_netns_fd);
553 free(token);
554 return NULL;
555 }
556
close_netns(struct nstoken * token)557 void close_netns(struct nstoken *token)
558 {
559 if (!token)
560 return;
561
562 if (setns(token->orig_netns_fd, CLONE_NEWNET))
563 log_err("Failed to setns(orig_netns_fd)");
564 close(token->orig_netns_fd);
565 free(token);
566 }
567
open_tuntap(const char * dev_name,bool need_mac)568 int open_tuntap(const char *dev_name, bool need_mac)
569 {
570 int err = 0;
571 struct ifreq ifr;
572 int fd = open("/dev/net/tun", O_RDWR);
573
574 if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)"))
575 return -1;
576
577 ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
578 strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
579 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
580
581 err = ioctl(fd, TUNSETIFF, &ifr);
582 if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
583 close(fd);
584 return -1;
585 }
586
587 err = fcntl(fd, F_SETFL, O_NONBLOCK);
588 if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
589 close(fd);
590 return -1;
591 }
592
593 return fd;
594 }
595
get_socket_local_port(int sock_fd)596 int get_socket_local_port(int sock_fd)
597 {
598 struct sockaddr_storage addr;
599 socklen_t addrlen = sizeof(addr);
600 int err;
601
602 err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen);
603 if (err < 0)
604 return err;
605
606 if (addr.ss_family == AF_INET) {
607 struct sockaddr_in *sin = (struct sockaddr_in *)&addr;
608
609 return sin->sin_port;
610 } else if (addr.ss_family == AF_INET6) {
611 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr;
612
613 return sin->sin6_port;
614 }
615
616 return -1;
617 }
618
get_hw_ring_size(char * ifname,struct ethtool_ringparam * ring_param)619 int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
620 {
621 struct ifreq ifr = {0};
622 int sockfd, err;
623
624 sockfd = socket(AF_INET, SOCK_DGRAM, 0);
625 if (sockfd < 0)
626 return -errno;
627
628 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
629
630 ring_param->cmd = ETHTOOL_GRINGPARAM;
631 ifr.ifr_data = (char *)ring_param;
632
633 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
634 err = errno;
635 close(sockfd);
636 return -err;
637 }
638
639 close(sockfd);
640 return 0;
641 }
642
set_hw_ring_size(char * ifname,struct ethtool_ringparam * ring_param)643 int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
644 {
645 struct ifreq ifr = {0};
646 int sockfd, err;
647
648 sockfd = socket(AF_INET, SOCK_DGRAM, 0);
649 if (sockfd < 0)
650 return -errno;
651
652 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
653
654 ring_param->cmd = ETHTOOL_SRINGPARAM;
655 ifr.ifr_data = (char *)ring_param;
656
657 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
658 err = errno;
659 close(sockfd);
660 return -err;
661 }
662
663 close(sockfd);
664 return 0;
665 }
666
667 struct send_recv_arg {
668 int fd;
669 uint32_t bytes;
670 int stop;
671 };
672
send_recv_server(void * arg)673 static void *send_recv_server(void *arg)
674 {
675 struct send_recv_arg *a = (struct send_recv_arg *)arg;
676 ssize_t nr_sent = 0, bytes = 0;
677 char batch[1500];
678 int err = 0, fd;
679
680 fd = accept(a->fd, NULL, NULL);
681 while (fd == -1) {
682 if (errno == EINTR)
683 continue;
684 err = -errno;
685 goto done;
686 }
687
688 if (settimeo(fd, 0)) {
689 err = -errno;
690 goto done;
691 }
692
693 while (bytes < a->bytes && !READ_ONCE(a->stop)) {
694 nr_sent = send(fd, &batch,
695 MIN(a->bytes - bytes, sizeof(batch)), 0);
696 if (nr_sent == -1 && errno == EINTR)
697 continue;
698 if (nr_sent == -1) {
699 err = -errno;
700 break;
701 }
702 bytes += nr_sent;
703 }
704
705 if (bytes != a->bytes) {
706 log_err("send %zd expected %u", bytes, a->bytes);
707 if (!err)
708 err = bytes > a->bytes ? -E2BIG : -EINTR;
709 }
710
711 done:
712 if (fd >= 0)
713 close(fd);
714 if (err) {
715 WRITE_ONCE(a->stop, 1);
716 return ERR_PTR(err);
717 }
718 return NULL;
719 }
720
send_recv_data(int lfd,int fd,uint32_t total_bytes)721 int send_recv_data(int lfd, int fd, uint32_t total_bytes)
722 {
723 ssize_t nr_recv = 0, bytes = 0;
724 struct send_recv_arg arg = {
725 .fd = lfd,
726 .bytes = total_bytes,
727 .stop = 0,
728 };
729 pthread_t srv_thread;
730 void *thread_ret;
731 char batch[1500];
732 int err = 0;
733
734 err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg);
735 if (err) {
736 log_err("Failed to pthread_create");
737 return err;
738 }
739
740 /* recv total_bytes */
741 while (bytes < total_bytes && !READ_ONCE(arg.stop)) {
742 nr_recv = recv(fd, &batch,
743 MIN(total_bytes - bytes, sizeof(batch)), 0);
744 if (nr_recv == -1 && errno == EINTR)
745 continue;
746 if (nr_recv == -1) {
747 err = -errno;
748 break;
749 }
750 bytes += nr_recv;
751 }
752
753 if (bytes != total_bytes) {
754 log_err("recv %zd expected %u", bytes, total_bytes);
755 if (!err)
756 err = bytes > total_bytes ? -E2BIG : -EINTR;
757 }
758
759 WRITE_ONCE(arg.stop, 1);
760 pthread_join(srv_thread, &thread_ret);
761 if (IS_ERR(thread_ret)) {
762 log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret));
763 err = err ? : PTR_ERR(thread_ret);
764 }
765
766 return err;
767 }
768
769 #ifdef TRAFFIC_MONITOR
770 struct tmonitor_ctx {
771 pcap_t *pcap;
772 pcap_dumper_t *dumper;
773 pthread_t thread;
774 int wake_fd;
775
776 volatile bool done;
777 char pkt_fname[PATH_MAX];
778 int pcap_fd;
779 };
780
__base_pr(const char * format,va_list args)781 static int __base_pr(const char *format, va_list args)
782 {
783 return vfprintf(stdout, format, args);
784 }
785
786 static tm_print_fn_t __tm_pr = __base_pr;
787
traffic_monitor_set_print(tm_print_fn_t fn)788 tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
789 {
790 tm_print_fn_t old_print_fn;
791
792 old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED);
793
794 return old_print_fn;
795 }
796
tm_print(const char * format,...)797 void tm_print(const char *format, ...)
798 {
799 tm_print_fn_t print_fn;
800 va_list args;
801
802 print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED);
803 if (!print_fn)
804 return;
805
806 va_start(args, format);
807 print_fn(format, args);
808 va_end(args);
809 }
810
811 /* Is this packet captured with a Ethernet protocol type? */
is_ethernet(const u_char * packet)812 static bool is_ethernet(const u_char *packet)
813 {
814 u16 arphdr_type;
815
816 memcpy(&arphdr_type, packet + 8, 2);
817 arphdr_type = ntohs(arphdr_type);
818
819 /* Except the following cases, the protocol type contains the
820 * Ethernet protocol type for the packet.
821 *
822 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
823 */
824 switch (arphdr_type) {
825 case 770: /* ARPHRD_FRAD */
826 case 778: /* ARPHDR_IPGRE */
827 case 803: /* ARPHRD_IEEE80211_RADIOTAP */
828 tm_print("Packet captured: arphdr_type=%d\n", arphdr_type);
829 return false;
830 }
831 return true;
832 }
833
834 static const char * const pkt_types[] = {
835 "In",
836 "B", /* Broadcast */
837 "M", /* Multicast */
838 "C", /* Captured with the promiscuous mode */
839 "Out",
840 };
841
pkt_type_str(u16 pkt_type)842 static const char *pkt_type_str(u16 pkt_type)
843 {
844 if (pkt_type < ARRAY_SIZE(pkt_types))
845 return pkt_types[pkt_type];
846 return "Unknown";
847 }
848
849 #define MAX_FLAGS_STRLEN 21
850 /* Show the information of the transport layer in the packet */
show_transport(const u_char * packet,u16 len,u32 ifindex,const char * src_addr,const char * dst_addr,u16 proto,bool ipv6,u8 pkt_type)851 static void show_transport(const u_char *packet, u16 len, u32 ifindex,
852 const char *src_addr, const char *dst_addr,
853 u16 proto, bool ipv6, u8 pkt_type)
854 {
855 char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = "";
856 const char *transport_str;
857 u16 src_port, dst_port;
858 struct udphdr *udp;
859 struct tcphdr *tcp;
860
861 ifname = if_indextoname(ifindex, _ifname);
862 if (!ifname) {
863 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
864 ifname = _ifname;
865 }
866
867 if (proto == IPPROTO_UDP) {
868 udp = (struct udphdr *)packet;
869 src_port = ntohs(udp->source);
870 dst_port = ntohs(udp->dest);
871 transport_str = "UDP";
872 } else if (proto == IPPROTO_TCP) {
873 tcp = (struct tcphdr *)packet;
874 src_port = ntohs(tcp->source);
875 dst_port = ntohs(tcp->dest);
876 transport_str = "TCP";
877 } else if (proto == IPPROTO_ICMP) {
878 tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
879 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
880 packet[0], packet[1]);
881 return;
882 } else if (proto == IPPROTO_ICMPV6) {
883 tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
884 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
885 packet[0], packet[1]);
886 return;
887 } else {
888 tm_print("%-7s %-3s %s %s > %s: protocol %d\n",
889 ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
890 src_addr, dst_addr, proto);
891 return;
892 }
893
894 /* TCP or UDP*/
895
896 if (proto == IPPROTO_TCP)
897 snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s",
898 tcp->fin ? ", FIN" : "",
899 tcp->syn ? ", SYN" : "",
900 tcp->rst ? ", RST" : "",
901 tcp->ack ? ", ACK" : "");
902
903 if (ipv6)
904 tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n",
905 ifname, pkt_type_str(pkt_type), src_addr, src_port,
906 dst_addr, dst_port, transport_str, len, flags);
907 else
908 tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n",
909 ifname, pkt_type_str(pkt_type), src_addr, src_port,
910 dst_addr, dst_port, transport_str, len, flags);
911 }
912
show_ipv6_packet(const u_char * packet,u32 ifindex,u8 pkt_type)913 static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
914 {
915 char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN];
916 struct ipv6hdr *pkt = (struct ipv6hdr *)packet;
917 const char *src, *dst;
918 u_char proto;
919
920 src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf));
921 if (!src)
922 src = "<invalid>";
923 dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf));
924 if (!dst)
925 dst = "<invalid>";
926 proto = pkt->nexthdr;
927 show_transport(packet + sizeof(struct ipv6hdr),
928 ntohs(pkt->payload_len),
929 ifindex, src, dst, proto, true, pkt_type);
930 }
931
show_ipv4_packet(const u_char * packet,u32 ifindex,u8 pkt_type)932 static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
933 {
934 char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN];
935 struct iphdr *pkt = (struct iphdr *)packet;
936 const char *src, *dst;
937 u_char proto;
938
939 src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf));
940 if (!src)
941 src = "<invalid>";
942 dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf));
943 if (!dst)
944 dst = "<invalid>";
945 proto = pkt->protocol;
946 show_transport(packet + sizeof(struct iphdr),
947 ntohs(pkt->tot_len),
948 ifindex, src, dst, proto, false, pkt_type);
949 }
950
traffic_monitor_thread(void * arg)951 static void *traffic_monitor_thread(void *arg)
952 {
953 char *ifname, _ifname[IF_NAMESIZE];
954 const u_char *packet, *payload;
955 struct tmonitor_ctx *ctx = arg;
956 pcap_dumper_t *dumper = ctx->dumper;
957 int fd = ctx->pcap_fd, nfds, r;
958 int wake_fd = ctx->wake_fd;
959 struct pcap_pkthdr header;
960 pcap_t *pcap = ctx->pcap;
961 u32 ifindex;
962 fd_set fds;
963 u16 proto;
964 u8 ptype;
965
966 nfds = (fd > wake_fd ? fd : wake_fd) + 1;
967 FD_ZERO(&fds);
968
969 while (!ctx->done) {
970 FD_SET(fd, &fds);
971 FD_SET(wake_fd, &fds);
972 r = select(nfds, &fds, NULL, NULL, NULL);
973 if (!r)
974 continue;
975 if (r < 0) {
976 if (errno == EINTR)
977 continue;
978 log_err("Fail to select on pcap fd and wake fd");
979 break;
980 }
981
982 /* This instance of pcap is non-blocking */
983 packet = pcap_next(pcap, &header);
984 if (!packet)
985 continue;
986
987 /* According to the man page of pcap_dump(), first argument
988 * is the pcap_dumper_t pointer even it's argument type is
989 * u_char *.
990 */
991 pcap_dump((u_char *)dumper, &header, packet);
992
993 /* Not sure what other types of packets look like. Here, we
994 * parse only Ethernet and compatible packets.
995 */
996 if (!is_ethernet(packet))
997 continue;
998
999 /* Skip SLL2 header
1000 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
1001 *
1002 * Although the document doesn't mention that, the payload
1003 * doesn't include the Ethernet header. The payload starts
1004 * from the first byte of the network layer header.
1005 */
1006 payload = packet + 20;
1007
1008 memcpy(&proto, packet, 2);
1009 proto = ntohs(proto);
1010 memcpy(&ifindex, packet + 4, 4);
1011 ifindex = ntohl(ifindex);
1012 ptype = packet[10];
1013
1014 if (proto == ETH_P_IPV6) {
1015 show_ipv6_packet(payload, ifindex, ptype);
1016 } else if (proto == ETH_P_IP) {
1017 show_ipv4_packet(payload, ifindex, ptype);
1018 } else {
1019 ifname = if_indextoname(ifindex, _ifname);
1020 if (!ifname) {
1021 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
1022 ifname = _ifname;
1023 }
1024
1025 tm_print("%-7s %-3s Unknown network protocol type 0x%x\n",
1026 ifname, pkt_type_str(ptype), proto);
1027 }
1028 }
1029
1030 return NULL;
1031 }
1032
1033 /* Prepare the pcap handle to capture packets.
1034 *
1035 * This pcap is non-blocking and immediate mode is enabled to receive
1036 * captured packets as soon as possible. The snaplen is set to 1024 bytes
1037 * to limit the size of captured content. The format of the link-layer
1038 * header is set to DLT_LINUX_SLL2 to enable handling various link-layer
1039 * technologies.
1040 */
traffic_monitor_prepare_pcap(void)1041 static pcap_t *traffic_monitor_prepare_pcap(void)
1042 {
1043 char errbuf[PCAP_ERRBUF_SIZE];
1044 pcap_t *pcap;
1045 int r;
1046
1047 /* Listen on all NICs in the namespace */
1048 pcap = pcap_create("any", errbuf);
1049 if (!pcap) {
1050 log_err("Failed to open pcap: %s", errbuf);
1051 return NULL;
1052 }
1053 /* Limit the size of the packet (first N bytes) */
1054 r = pcap_set_snaplen(pcap, 1024);
1055 if (r) {
1056 log_err("Failed to set snaplen: %s", pcap_geterr(pcap));
1057 goto error;
1058 }
1059 /* To receive packets as fast as possible */
1060 r = pcap_set_immediate_mode(pcap, 1);
1061 if (r) {
1062 log_err("Failed to set immediate mode: %s", pcap_geterr(pcap));
1063 goto error;
1064 }
1065 r = pcap_setnonblock(pcap, 1, errbuf);
1066 if (r) {
1067 log_err("Failed to set nonblock: %s", errbuf);
1068 goto error;
1069 }
1070 r = pcap_activate(pcap);
1071 if (r) {
1072 log_err("Failed to activate pcap: %s", pcap_geterr(pcap));
1073 goto error;
1074 }
1075 /* Determine the format of the link-layer header */
1076 r = pcap_set_datalink(pcap, DLT_LINUX_SLL2);
1077 if (r) {
1078 log_err("Failed to set datalink: %s", pcap_geterr(pcap));
1079 goto error;
1080 }
1081
1082 return pcap;
1083 error:
1084 pcap_close(pcap);
1085 return NULL;
1086 }
1087
encode_test_name(char * buf,size_t len,const char * test_name,const char * subtest_name)1088 static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name)
1089 {
1090 char *p;
1091
1092 if (subtest_name)
1093 snprintf(buf, len, "%s__%s", test_name, subtest_name);
1094 else
1095 snprintf(buf, len, "%s", test_name);
1096 while ((p = strchr(buf, '/')))
1097 *p = '_';
1098 while ((p = strchr(buf, ' ')))
1099 *p = '_';
1100 }
1101
1102 #define PCAP_DIR "/tmp/tmon_pcap"
1103
1104 /* Start to monitor the network traffic in the given network namespace.
1105 *
1106 * netns: the name of the network namespace to monitor. If NULL, the
1107 * current network namespace is monitored.
1108 * test_name: the name of the running test.
1109 * subtest_name: the name of the running subtest if there is. It should be
1110 * NULL if it is not a subtest.
1111 *
1112 * This function will start a thread to capture packets going through NICs
1113 * in the give network namespace.
1114 */
traffic_monitor_start(const char * netns,const char * test_name,const char * subtest_name)1115 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
1116 const char *subtest_name)
1117 {
1118 struct nstoken *nstoken = NULL;
1119 struct tmonitor_ctx *ctx;
1120 char test_name_buf[64];
1121 static int tmon_seq;
1122 int r;
1123
1124 if (netns) {
1125 nstoken = open_netns(netns);
1126 if (!nstoken)
1127 return NULL;
1128 }
1129 ctx = malloc(sizeof(*ctx));
1130 if (!ctx) {
1131 log_err("Failed to malloc ctx");
1132 goto fail_ctx;
1133 }
1134 memset(ctx, 0, sizeof(*ctx));
1135
1136 encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name);
1137 snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname),
1138 PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++,
1139 test_name_buf, netns ? netns : "unknown");
1140
1141 r = mkdir(PCAP_DIR, 0755);
1142 if (r && errno != EEXIST) {
1143 log_err("Failed to create " PCAP_DIR);
1144 goto fail_pcap;
1145 }
1146
1147 ctx->pcap = traffic_monitor_prepare_pcap();
1148 if (!ctx->pcap)
1149 goto fail_pcap;
1150 ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap);
1151 if (ctx->pcap_fd < 0) {
1152 log_err("Failed to get pcap fd");
1153 goto fail_dumper;
1154 }
1155
1156 /* Create a packet file */
1157 ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname);
1158 if (!ctx->dumper) {
1159 log_err("Failed to open pcap dump: %s", ctx->pkt_fname);
1160 goto fail_dumper;
1161 }
1162
1163 /* Create an eventfd to wake up the monitor thread */
1164 ctx->wake_fd = eventfd(0, 0);
1165 if (ctx->wake_fd < 0) {
1166 log_err("Failed to create eventfd");
1167 goto fail_eventfd;
1168 }
1169
1170 r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx);
1171 if (r) {
1172 log_err("Failed to create thread");
1173 goto fail;
1174 }
1175
1176 close_netns(nstoken);
1177
1178 return ctx;
1179
1180 fail:
1181 close(ctx->wake_fd);
1182
1183 fail_eventfd:
1184 pcap_dump_close(ctx->dumper);
1185 unlink(ctx->pkt_fname);
1186
1187 fail_dumper:
1188 pcap_close(ctx->pcap);
1189
1190 fail_pcap:
1191 free(ctx);
1192
1193 fail_ctx:
1194 close_netns(nstoken);
1195
1196 return NULL;
1197 }
1198
traffic_monitor_release(struct tmonitor_ctx * ctx)1199 static void traffic_monitor_release(struct tmonitor_ctx *ctx)
1200 {
1201 pcap_close(ctx->pcap);
1202 pcap_dump_close(ctx->dumper);
1203
1204 close(ctx->wake_fd);
1205
1206 free(ctx);
1207 }
1208
1209 /* Stop the network traffic monitor.
1210 *
1211 * ctx: the context returned by traffic_monitor_start()
1212 */
traffic_monitor_stop(struct tmonitor_ctx * ctx)1213 void traffic_monitor_stop(struct tmonitor_ctx *ctx)
1214 {
1215 __u64 w = 1;
1216
1217 if (!ctx)
1218 return;
1219
1220 /* Stop the monitor thread */
1221 ctx->done = true;
1222 /* Wake up the background thread. */
1223 write(ctx->wake_fd, &w, sizeof(w));
1224 pthread_join(ctx->thread, NULL);
1225
1226 tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
1227
1228 traffic_monitor_release(ctx);
1229 }
1230
1231 #endif /* TRAFFIC_MONITOR */
1232