1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */
3
4 #include "vmlinux.h"
5 #include "bpf_tracing_net.h"
6 #include <bpf/bpf_core_read.h>
7 #include <bpf/bpf_helpers.h>
8 #include <bpf/bpf_tracing.h>
9 #include "bpf_misc.h"
10
11 extern unsigned long CONFIG_HZ __kconfig;
12
13 const volatile char veth[IFNAMSIZ];
14 const volatile int veth_ifindex;
15
16 int nr_listen;
17 int nr_passive;
18 int nr_active;
19 int nr_connect;
20 int nr_binddev;
21 int nr_socket_post_create;
22 int nr_fin_wait1;
23
24 struct sockopt_test {
25 int opt;
26 int new;
27 int restore;
28 int expected;
29 int tcp_expected;
30 unsigned int flip:1;
31 };
32
33 static const char not_exist_cc[] = "not_exist";
34 static const char cubic_cc[] = "cubic";
35 static const char reno_cc[] = "reno";
36
37 static const struct sockopt_test sol_socket_tests[] = {
38 { .opt = SO_REUSEADDR, .flip = 1, },
39 { .opt = SO_SNDBUF, .new = 8123, .expected = 8123 * 2, },
40 { .opt = SO_RCVBUF, .new = 8123, .expected = 8123 * 2, },
41 { .opt = SO_KEEPALIVE, .flip = 1, },
42 { .opt = SO_PRIORITY, .new = 0xeb9f, .expected = 0xeb9f, },
43 { .opt = SO_REUSEPORT, .flip = 1, },
44 { .opt = SO_RCVLOWAT, .new = 8123, .expected = 8123, },
45 { .opt = SO_MARK, .new = 0xeb9f, .expected = 0xeb9f, },
46 { .opt = SO_MAX_PACING_RATE, .new = 0xeb9f, .expected = 0xeb9f, },
47 { .opt = SO_TXREHASH, .flip = 1, },
48 { .opt = 0, },
49 };
50
51 static const struct sockopt_test sol_tcp_tests[] = {
52 { .opt = TCP_NODELAY, .flip = 1, },
53 { .opt = TCP_KEEPIDLE, .new = 123, .expected = 123, .restore = 321, },
54 { .opt = TCP_KEEPINTVL, .new = 123, .expected = 123, .restore = 321, },
55 { .opt = TCP_KEEPCNT, .new = 123, .expected = 123, .restore = 124, },
56 { .opt = TCP_SYNCNT, .new = 123, .expected = 123, .restore = 124, },
57 { .opt = TCP_WINDOW_CLAMP, .new = 8123, .expected = 8123, .restore = 8124, },
58 { .opt = TCP_CONGESTION, },
59 { .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, },
60 { .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, },
61 { .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
62 { .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS,
63 .expected = BPF_SOCK_OPS_ALL_CB_FLAGS, },
64 { .opt = TCP_BPF_DELACK_MAX, .new = 30000, .expected = 30000, },
65 { .opt = TCP_BPF_RTO_MIN, .new = 30000, .expected = 30000, },
66 { .opt = TCP_RTO_MAX_MS, .new = 2000, .expected = 2000, },
67 { .opt = 0, },
68 };
69
70 static const struct sockopt_test sol_ip_tests[] = {
71 { .opt = IP_TOS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, },
72 { .opt = 0, },
73 };
74
75 static const struct sockopt_test sol_ipv6_tests[] = {
76 { .opt = IPV6_TCLASS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, },
77 { .opt = IPV6_AUTOFLOWLABEL, .flip = 1, },
78 { .opt = 0, },
79 };
80
81 struct loop_ctx {
82 void *ctx;
83 struct sock *sk;
84 };
85
bpf_test_sockopt_flip(void * ctx,struct sock * sk,const struct sockopt_test * t,int level)86 static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
87 const struct sockopt_test *t,
88 int level)
89 {
90 int old, tmp, new, opt = t->opt;
91
92 opt = t->opt;
93
94 if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)))
95 return 1;
96 /* kernel initialized txrehash to 255 */
97 if (level == SOL_SOCKET && opt == SO_TXREHASH && old != 0 && old != 1)
98 old = 1;
99
100 new = !old;
101 if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
102 return 1;
103 if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) ||
104 tmp != new)
105 return 1;
106
107 if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old)))
108 return 1;
109
110 return 0;
111 }
112
bpf_test_sockopt_int(void * ctx,struct sock * sk,const struct sockopt_test * t,int level)113 static int bpf_test_sockopt_int(void *ctx, struct sock *sk,
114 const struct sockopt_test *t,
115 int level)
116 {
117 int old, tmp, new, expected, opt;
118
119 opt = t->opt;
120 new = t->new;
121 if (sk->sk_type == SOCK_STREAM && t->tcp_expected)
122 expected = t->tcp_expected;
123 else
124 expected = t->expected;
125
126 if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)) ||
127 old == new)
128 return 1;
129
130 if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
131 return 1;
132 if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) ||
133 tmp != expected)
134 return 1;
135
136 if (t->restore)
137 old = t->restore;
138 if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old)))
139 return 1;
140
141 return 0;
142 }
143
bpf_test_socket_sockopt(__u32 i,struct loop_ctx * lc)144 static int bpf_test_socket_sockopt(__u32 i, struct loop_ctx *lc)
145 {
146 const struct sockopt_test *t;
147
148 if (i >= ARRAY_SIZE(sol_socket_tests))
149 return 1;
150
151 t = &sol_socket_tests[i];
152 if (!t->opt)
153 return 1;
154
155 if (t->flip)
156 return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, SOL_SOCKET);
157
158 return bpf_test_sockopt_int(lc->ctx, lc->sk, t, SOL_SOCKET);
159 }
160
bpf_test_ip_sockopt(__u32 i,struct loop_ctx * lc)161 static int bpf_test_ip_sockopt(__u32 i, struct loop_ctx *lc)
162 {
163 const struct sockopt_test *t;
164
165 if (i >= ARRAY_SIZE(sol_ip_tests))
166 return 1;
167
168 t = &sol_ip_tests[i];
169 if (!t->opt)
170 return 1;
171
172 if (t->flip)
173 return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IP);
174
175 return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IP);
176 }
177
bpf_test_ipv6_sockopt(__u32 i,struct loop_ctx * lc)178 static int bpf_test_ipv6_sockopt(__u32 i, struct loop_ctx *lc)
179 {
180 const struct sockopt_test *t;
181
182 if (i >= ARRAY_SIZE(sol_ipv6_tests))
183 return 1;
184
185 t = &sol_ipv6_tests[i];
186 if (!t->opt)
187 return 1;
188
189 if (t->flip)
190 return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IPV6);
191
192 return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IPV6);
193 }
194
bpf_test_tcp_sockopt(__u32 i,struct loop_ctx * lc)195 static int bpf_test_tcp_sockopt(__u32 i, struct loop_ctx *lc)
196 {
197 const struct sockopt_test *t;
198 struct sock *sk;
199 void *ctx;
200
201 if (i >= ARRAY_SIZE(sol_tcp_tests))
202 return 1;
203
204 t = &sol_tcp_tests[i];
205 if (!t->opt)
206 return 1;
207
208 ctx = lc->ctx;
209 sk = lc->sk;
210
211 if (t->opt == TCP_CONGESTION) {
212 char old_cc[16], tmp_cc[16];
213 const char *new_cc;
214 int new_cc_len;
215
216 if (!bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION,
217 (void *)not_exist_cc, sizeof(not_exist_cc)))
218 return 1;
219 if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc)))
220 return 1;
221 if (!bpf_strncmp(old_cc, sizeof(old_cc), cubic_cc)) {
222 new_cc = reno_cc;
223 new_cc_len = sizeof(reno_cc);
224 } else {
225 new_cc = cubic_cc;
226 new_cc_len = sizeof(cubic_cc);
227 }
228 if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, (void *)new_cc,
229 new_cc_len))
230 return 1;
231 if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, tmp_cc, sizeof(tmp_cc)))
232 return 1;
233 if (bpf_strncmp(tmp_cc, sizeof(tmp_cc), new_cc))
234 return 1;
235 if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc)))
236 return 1;
237 return 0;
238 }
239
240 if (t->flip)
241 return bpf_test_sockopt_flip(ctx, sk, t, IPPROTO_TCP);
242
243 return bpf_test_sockopt_int(ctx, sk, t, IPPROTO_TCP);
244 }
245
bpf_test_sockopt(void * ctx,struct sock * sk)246 static int bpf_test_sockopt(void *ctx, struct sock *sk)
247 {
248 struct loop_ctx lc = { .ctx = ctx, .sk = sk, };
249 __u16 family, proto;
250 int n;
251
252 family = sk->sk_family;
253 proto = sk->sk_protocol;
254
255 n = bpf_loop(ARRAY_SIZE(sol_socket_tests), bpf_test_socket_sockopt, &lc, 0);
256 if (n != ARRAY_SIZE(sol_socket_tests))
257 return -1;
258
259 if (proto == IPPROTO_TCP) {
260 n = bpf_loop(ARRAY_SIZE(sol_tcp_tests), bpf_test_tcp_sockopt, &lc, 0);
261 if (n != ARRAY_SIZE(sol_tcp_tests))
262 return -1;
263 }
264
265 if (family == AF_INET) {
266 n = bpf_loop(ARRAY_SIZE(sol_ip_tests), bpf_test_ip_sockopt, &lc, 0);
267 if (n != ARRAY_SIZE(sol_ip_tests))
268 return -1;
269 } else {
270 n = bpf_loop(ARRAY_SIZE(sol_ipv6_tests), bpf_test_ipv6_sockopt, &lc, 0);
271 if (n != ARRAY_SIZE(sol_ipv6_tests))
272 return -1;
273 }
274
275 return 0;
276 }
277
binddev_test(void * ctx)278 static int binddev_test(void *ctx)
279 {
280 const char empty_ifname[] = "";
281 int ifindex, zero = 0;
282
283 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
284 (void *)veth, sizeof(veth)))
285 return -1;
286 if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
287 &ifindex, sizeof(int)) ||
288 ifindex != veth_ifindex)
289 return -1;
290
291 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
292 (void *)empty_ifname, sizeof(empty_ifname)))
293 return -1;
294 if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
295 &ifindex, sizeof(int)) ||
296 ifindex != 0)
297 return -1;
298
299 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
300 (void *)&veth_ifindex, sizeof(int)))
301 return -1;
302 if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
303 &ifindex, sizeof(int)) ||
304 ifindex != veth_ifindex)
305 return -1;
306
307 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
308 &zero, sizeof(int)))
309 return -1;
310 if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
311 &ifindex, sizeof(int)) ||
312 ifindex != 0)
313 return -1;
314
315 return 0;
316 }
317
test_tcp_maxseg(void * ctx,struct sock * sk)318 static int test_tcp_maxseg(void *ctx, struct sock *sk)
319 {
320 int val = 1314, tmp;
321
322 if (sk->sk_state != TCP_ESTABLISHED)
323 return bpf_setsockopt(ctx, IPPROTO_TCP, TCP_MAXSEG,
324 &val, sizeof(val));
325
326 if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_MAXSEG, &tmp, sizeof(tmp)) ||
327 tmp > val)
328 return -1;
329
330 return 0;
331 }
332
test_tcp_saved_syn(void * ctx,struct sock * sk)333 static int test_tcp_saved_syn(void *ctx, struct sock *sk)
334 {
335 __u8 saved_syn[20];
336 int one = 1;
337
338 if (sk->sk_state == TCP_LISTEN)
339 return bpf_setsockopt(ctx, IPPROTO_TCP, TCP_SAVE_SYN,
340 &one, sizeof(one));
341
342 return bpf_getsockopt(ctx, IPPROTO_TCP, TCP_SAVED_SYN,
343 saved_syn, sizeof(saved_syn));
344 }
345
346 SEC("lsm_cgroup/socket_post_create")
BPF_PROG(socket_post_create,struct socket * sock,int family,int type,int protocol,int kern)347 int BPF_PROG(socket_post_create, struct socket *sock, int family,
348 int type, int protocol, int kern)
349 {
350 struct sock *sk = sock->sk;
351
352 if (!sk)
353 return 1;
354
355 nr_socket_post_create += !bpf_test_sockopt(sk, sk);
356 nr_binddev += !binddev_test(sk);
357
358 return 1;
359 }
360
361 SEC("cgroup/getsockopt")
_getsockopt(struct bpf_sockopt * ctx)362 int _getsockopt(struct bpf_sockopt *ctx)
363 {
364 struct bpf_sock *sk = ctx->sk;
365 int *optval = ctx->optval;
366 struct tcp_sock *tp;
367
368 if (!sk || ctx->level != SOL_TCP || ctx->optname != TCP_BPF_SOCK_OPS_CB_FLAGS)
369 return 1;
370
371 tp = bpf_core_cast(sk, struct tcp_sock);
372 if (ctx->optval + sizeof(int) <= ctx->optval_end) {
373 *optval = tp->bpf_sock_ops_cb_flags;
374 ctx->retval = 0;
375 }
376 return 1;
377 }
378
379 SEC("sockops")
skops_sockopt(struct bpf_sock_ops * skops)380 int skops_sockopt(struct bpf_sock_ops *skops)
381 {
382 struct bpf_sock *bpf_sk = skops->sk;
383 struct sock *sk;
384 int flags;
385
386 if (!bpf_sk)
387 return 1;
388
389 sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
390 if (!sk)
391 return 1;
392
393 switch (skops->op) {
394 case BPF_SOCK_OPS_TCP_LISTEN_CB:
395 nr_listen += !(bpf_test_sockopt(skops, sk) ||
396 test_tcp_maxseg(skops, sk) ||
397 test_tcp_saved_syn(skops, sk));
398 break;
399 case BPF_SOCK_OPS_TCP_CONNECT_CB:
400 nr_connect += !(bpf_test_sockopt(skops, sk) ||
401 test_tcp_maxseg(skops, sk));
402 break;
403 case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
404 nr_active += !(bpf_test_sockopt(skops, sk) ||
405 test_tcp_maxseg(skops, sk));
406 break;
407 case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
408 nr_passive += !(bpf_test_sockopt(skops, sk) ||
409 test_tcp_maxseg(skops, sk) ||
410 test_tcp_saved_syn(skops, sk));
411 flags = skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_STATE_CB_FLAG;
412 bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, sizeof(flags));
413 break;
414 case BPF_SOCK_OPS_STATE_CB:
415 if (skops->args[1] == BPF_TCP_CLOSE_WAIT)
416 nr_fin_wait1 += !bpf_test_sockopt(skops, sk);
417 break;
418 }
419
420 return 1;
421 }
422
423 char _license[] SEC("license") = "GPL";
424