1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Test for sockmap/sockhash redirection. 4 * 5 * BPF_MAP_TYPE_SOCKMAP 6 * BPF_MAP_TYPE_SOCKHASH 7 * x 8 * sk_msg-to-egress 9 * sk_msg-to-ingress 10 * sk_skb-to-egress 11 * sk_skb-to-ingress 12 * x 13 * AF_INET, SOCK_STREAM 14 * AF_INET6, SOCK_STREAM 15 * AF_INET, SOCK_DGRAM 16 * AF_INET6, SOCK_DGRAM 17 * AF_UNIX, SOCK_STREAM 18 * AF_UNIX, SOCK_DGRAM 19 * AF_VSOCK, SOCK_STREAM 20 * AF_VSOCK, SOCK_SEQPACKET 21 */ 22 23 #include <errno.h> 24 #include <error.h> 25 #include <sched.h> 26 #include <stdio.h> 27 #include <unistd.h> 28 29 #include <netinet/in.h> 30 #include <sys/socket.h> 31 #include <sys/types.h> 32 #include <sys/un.h> 33 #include <linux/string.h> 34 #include <linux/vm_sockets.h> 35 36 #include <bpf/bpf.h> 37 #include <bpf/libbpf.h> 38 39 #include "linux/const.h" 40 #include "test_progs.h" 41 #include "sockmap_helpers.h" 42 #include "test_sockmap_redir.skel.h" 43 44 /* The meaning of SUPPORTED is "will redirect packet as expected". 45 */ 46 #define SUPPORTED _BITUL(0) 47 48 /* Note on sk_skb-to-ingress ->af_vsock: 49 * 50 * Peer socket may receive the packet some time after the return from sendmsg(). 51 * In a typical usage scenario, recvmsg() will block until the redirected packet 52 * appears in the destination queue, or timeout if the packet was dropped. By 53 * that point, the verdict map has already been updated to reflect what has 54 * happened. 55 * 56 * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg() 57 * takes place. Which means we may race the execution of the verdict logic and 58 * read map_verd before it has been updated, i.e. we might observe 59 * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1. 60 * 61 * This confuses the selftest logic: if there was no packet dropped, where's the 62 * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0 63 * (which implies the verdict program has not been ran) just re-read the verdict 64 * map again. 65 */ 66 #define UNSUPPORTED_RACY_VERD _BITUL(1) 67 68 enum prog_type { 69 SK_MSG_EGRESS, 70 SK_MSG_INGRESS, 71 SK_SKB_EGRESS, 72 SK_SKB_INGRESS, 73 }; 74 75 enum { 76 SEND_INNER = 0, 77 SEND_OUTER, 78 }; 79 80 enum { 81 RECV_INNER = 0, 82 RECV_OUTER, 83 }; 84 85 struct maps { 86 int in; 87 int out; 88 int verd; 89 }; 90 91 struct combo_spec { 92 enum prog_type prog_type; 93 const char *in, *out; 94 }; 95 96 struct redir_spec { 97 const char *name; 98 int idx_send; 99 int idx_recv; 100 enum prog_type prog_type; 101 }; 102 103 struct socket_spec { 104 int family; 105 int sotype; 106 int send_flags; 107 int in[2]; 108 int out[2]; 109 }; 110 111 static int socket_spec_pairs(struct socket_spec *s) 112 { 113 return create_socket_pairs(s->family, s->sotype, 114 &s->in[0], &s->out[0], 115 &s->in[1], &s->out[1]); 116 } 117 118 static void socket_spec_close(struct socket_spec *s) 119 { 120 xclose(s->in[0]); 121 xclose(s->in[1]); 122 xclose(s->out[0]); 123 xclose(s->out[1]); 124 } 125 126 static void get_redir_params(struct redir_spec *redir, 127 struct test_sockmap_redir *skel, int *prog_fd, 128 enum bpf_attach_type *attach_type, 129 int *redirect_flags) 130 { 131 enum prog_type type = redir->prog_type; 132 struct bpf_program *prog; 133 bool sk_msg; 134 135 sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS; 136 prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict; 137 138 *prog_fd = bpf_program__fd(prog); 139 *attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT; 140 141 if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS) 142 *redirect_flags = BPF_F_INGRESS; 143 else 144 *redirect_flags = 0; 145 } 146 147 static void try_recv(const char *prefix, int fd, int flags, bool expect_success) 148 { 149 ssize_t n; 150 char buf; 151 152 errno = 0; 153 n = recv(fd, &buf, 1, flags); 154 if (n < 0 && expect_success) 155 FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n); 156 if (!n && !expect_success) 157 FAIL("%s: expected failure: retval=%zd", prefix, n); 158 } 159 160 static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out, 161 int sd_recv, int map_verd, int status) 162 { 163 unsigned int drop, pass; 164 char recv_buf; 165 ssize_t n; 166 167 get_verdict: 168 if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) || 169 xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass)) 170 return; 171 172 if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) { 173 sched_yield(); 174 goto get_verdict; 175 } 176 177 if (pass != 0) { 178 FAIL("unsupported: wanted verdict pass 0, have %u", pass); 179 return; 180 } 181 182 /* If nothing was dropped, packet should have reached the peer */ 183 if (drop == 0) { 184 errno = 0; 185 n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC); 186 if (n != 1) 187 FAIL_ERRNO("unsupported: packet missing, retval=%zd", n); 188 } 189 190 /* Ensure queues are empty */ 191 try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false); 192 if (sd_in != sd_send) 193 try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false); 194 195 try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false); 196 if (sd_recv != sd_out) 197 try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false); 198 } 199 200 static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer, 201 int sd_in, int sd_out, int sd_recv, 202 struct maps *maps, int status) 203 { 204 unsigned int drop, pass; 205 char *send_buf = "ab"; 206 char recv_buf = '\0'; 207 ssize_t n, len = 1; 208 209 /* Zero out the verdict map */ 210 if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) || 211 xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY)) 212 return; 213 214 if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST)) 215 return; 216 217 if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST)) 218 goto del_in; 219 220 /* Last byte is OOB data when send_flags has MSG_OOB bit set */ 221 if (send_flags & MSG_OOB) 222 len++; 223 n = send(sd_send, send_buf, len, send_flags); 224 if (n >= 0 && n < len) 225 FAIL("incomplete send"); 226 if (n < 0) { 227 /* sk_msg redirect combo not supported? */ 228 if (status & SUPPORTED || errno != EACCES) 229 FAIL_ERRNO("send"); 230 goto out; 231 } 232 233 if (!(status & SUPPORTED)) { 234 handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv, 235 maps->verd, status); 236 goto out; 237 } 238 239 errno = 0; 240 n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC); 241 if (n != 1) { 242 FAIL_ERRNO("recv_timeout()"); 243 goto out; 244 } 245 246 /* Check verdict _after_ recv(); af_vsock may need time to catch up */ 247 if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) || 248 xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass)) 249 goto out; 250 251 if (drop != 0 || pass != 1) 252 FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u", 253 drop, pass); 254 255 if (recv_buf != send_buf[0]) 256 FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]); 257 258 if (send_flags & MSG_OOB) { 259 /* Fail reading OOB while in sockmap */ 260 try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out, 261 MSG_OOB | MSG_DONTWAIT, false); 262 263 /* Remove sd_out from sockmap */ 264 xbpf_map_delete_elem(maps->out, &u32(0)); 265 266 /* Check that OOB was dropped on redirect */ 267 try_recv("recv(sd_out, MSG_OOB)", sd_out, 268 MSG_OOB | MSG_DONTWAIT, false); 269 270 goto del_in; 271 } 272 out: 273 xbpf_map_delete_elem(maps->out, &u32(0)); 274 del_in: 275 xbpf_map_delete_elem(maps->in, &u32(0)); 276 } 277 278 static int is_redir_supported(enum prog_type type, const char *in, 279 const char *out) 280 { 281 /* Matching based on strings returned by socket_kind_to_str(): 282 * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq 283 * Plus a wildcard: any 284 * Not in use: u_seq, v_dgr 285 */ 286 struct combo_spec *c, combos[] = { 287 /* Send to local: TCP -> any, but vsock */ 288 { SK_MSG_INGRESS, "tcp", "tcp" }, 289 { SK_MSG_INGRESS, "tcp", "udp" }, 290 { SK_MSG_INGRESS, "tcp", "u_str" }, 291 { SK_MSG_INGRESS, "tcp", "u_dgr" }, 292 293 /* Send to egress: TCP -> TCP */ 294 { SK_MSG_EGRESS, "tcp", "tcp" }, 295 296 /* Ingress to egress: any -> any */ 297 { SK_SKB_EGRESS, "any", "any" }, 298 299 /* Ingress to local: any -> any, but vsock */ 300 { SK_SKB_INGRESS, "any", "tcp" }, 301 { SK_SKB_INGRESS, "any", "udp" }, 302 { SK_SKB_INGRESS, "any", "u_str" }, 303 { SK_SKB_INGRESS, "any", "u_dgr" }, 304 }; 305 306 for (c = combos; c < combos + ARRAY_SIZE(combos); c++) { 307 if (c->prog_type == type && 308 (!strcmp(c->in, "any") || strstarts(in, c->in)) && 309 (!strcmp(c->out, "any") || strstarts(out, c->out))) 310 return SUPPORTED; 311 } 312 313 return 0; 314 } 315 316 static int get_support_status(enum prog_type type, const char *in, 317 const char *out) 318 { 319 int status = is_redir_supported(type, in, out); 320 321 if (type == SK_SKB_INGRESS && strstarts(out, "v_")) 322 status |= UNSUPPORTED_RACY_VERD; 323 324 return status; 325 } 326 327 static void test_socket(enum bpf_map_type type, struct redir_spec *redir, 328 struct maps *maps, struct socket_spec *s_in, 329 struct socket_spec *s_out) 330 { 331 int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status; 332 const char *in_str, *out_str; 333 char s[MAX_TEST_NAME]; 334 335 fd_in = s_in->in[0]; 336 fd_out = s_out->out[0]; 337 fd_send = s_in->in[redir->idx_send]; 338 fd_peer = s_in->in[redir->idx_send ^ 1]; 339 fd_recv = s_out->out[redir->idx_recv]; 340 flags = s_in->send_flags; 341 342 in_str = socket_kind_to_str(fd_in); 343 out_str = socket_kind_to_str(fd_out); 344 status = get_support_status(redir->prog_type, in_str, out_str); 345 346 snprintf(s, sizeof(s), 347 "%-4s %-17s %-5s %s %-5s%6s", 348 /* hash sk_skb-to-ingress u_str → v_str (OOB) */ 349 type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash", 350 redir->name, 351 in_str, 352 status & SUPPORTED ? "→" : " ", 353 out_str, 354 (flags & MSG_OOB) ? "(OOB)" : ""); 355 356 if (!test__start_subtest(s)) 357 return; 358 359 test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv, 360 maps, status); 361 } 362 363 static void test_redir(enum bpf_map_type type, struct redir_spec *redir, 364 struct maps *maps) 365 { 366 struct socket_spec *s, sockets[] = { 367 { AF_INET, SOCK_STREAM }, 368 // { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */ 369 { AF_INET6, SOCK_STREAM }, 370 { AF_INET, SOCK_DGRAM }, 371 { AF_INET6, SOCK_DGRAM }, 372 { AF_UNIX, SOCK_STREAM }, 373 { AF_UNIX, SOCK_STREAM, MSG_OOB }, 374 { AF_UNIX, SOCK_DGRAM }, 375 // { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */ 376 { AF_VSOCK, SOCK_STREAM }, 377 // { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */ 378 { AF_VSOCK, SOCK_SEQPACKET }, 379 }; 380 381 for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) 382 if (socket_spec_pairs(s)) 383 goto out; 384 385 /* Intra-proto */ 386 for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) 387 test_socket(type, redir, maps, s, s); 388 389 /* Cross-proto */ 390 for (int i = 0; i < ARRAY_SIZE(sockets); i++) { 391 for (int j = 0; j < ARRAY_SIZE(sockets); j++) { 392 struct socket_spec *out = &sockets[j]; 393 struct socket_spec *in = &sockets[i]; 394 395 /* Skip intra-proto and between variants */ 396 if (out->send_flags || 397 (in->family == out->family && 398 in->sotype == out->sotype)) 399 continue; 400 401 test_socket(type, redir, maps, in, out); 402 } 403 } 404 out: 405 while (--s >= sockets) 406 socket_spec_close(s); 407 } 408 409 static void test_map(enum bpf_map_type type) 410 { 411 struct redir_spec *r, redirs[] = { 412 { "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS }, 413 { "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS }, 414 { "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS }, 415 { "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS }, 416 }; 417 418 for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) { 419 enum bpf_attach_type attach_type; 420 struct test_sockmap_redir *skel; 421 struct maps maps; 422 int prog_fd; 423 424 skel = test_sockmap_redir__open_and_load(); 425 if (!skel) { 426 FAIL("open_and_load"); 427 return; 428 } 429 430 switch (type) { 431 case BPF_MAP_TYPE_SOCKMAP: 432 maps.in = bpf_map__fd(skel->maps.nop_map); 433 maps.out = bpf_map__fd(skel->maps.sock_map); 434 break; 435 case BPF_MAP_TYPE_SOCKHASH: 436 maps.in = bpf_map__fd(skel->maps.nop_hash); 437 maps.out = bpf_map__fd(skel->maps.sock_hash); 438 break; 439 default: 440 FAIL("Unsupported bpf_map_type"); 441 return; 442 } 443 444 skel->bss->redirect_type = type; 445 maps.verd = bpf_map__fd(skel->maps.verdict_map); 446 get_redir_params(r, skel, &prog_fd, &attach_type, 447 &skel->bss->redirect_flags); 448 449 if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0)) 450 return; 451 452 test_redir(type, r, &maps); 453 454 if (xbpf_prog_detach2(prog_fd, maps.in, attach_type)) 455 return; 456 457 test_sockmap_redir__destroy(skel); 458 } 459 } 460 461 void serial_test_sockmap_redir(void) 462 { 463 test_map(BPF_MAP_TYPE_SOCKMAP); 464 test_map(BPF_MAP_TYPE_SOCKHASH); 465 } 466