1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3
4 #include <stddef.h>
5 #include <errno.h>
6 #include <stdbool.h>
7 #include <sys/types.h>
8 #include <sys/socket.h>
9 #include <linux/tcp.h>
10 #include <linux/socket.h>
11 #include <linux/bpf.h>
12 #include <linux/types.h>
13 #include <bpf/bpf_helpers.h>
14 #include <bpf/bpf_endian.h>
15 #define BPF_PROG_TEST_TCP_HDR_OPTIONS
16 #include "test_tcp_hdr_options.h"
17 #include "bpf_misc.h"
18
19 __u8 test_kind = TCPOPT_EXP;
20 __u16 test_magic = 0xeB9F;
21 __u32 inherit_cb_flags = 0;
22
23 struct bpf_test_option passive_synack_out = {};
24 struct bpf_test_option passive_fin_out = {};
25
26 struct bpf_test_option passive_estab_in = {};
27 struct bpf_test_option passive_fin_in = {};
28
29 struct bpf_test_option active_syn_out = {};
30 struct bpf_test_option active_fin_out = {};
31
32 struct bpf_test_option active_estab_in = {};
33 struct bpf_test_option active_fin_in = {};
34
35 struct {
36 __uint(type, BPF_MAP_TYPE_SK_STORAGE);
37 __uint(map_flags, BPF_F_NO_PREALLOC);
38 __type(key, int);
39 __type(value, struct hdr_stg);
40 } hdr_stg_map SEC(".maps");
41
skops_want_cookie(const struct bpf_sock_ops * skops)42 static bool skops_want_cookie(const struct bpf_sock_ops *skops)
43 {
44 return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
45 }
46
skops_current_mss(const struct bpf_sock_ops * skops)47 static bool skops_current_mss(const struct bpf_sock_ops *skops)
48 {
49 return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
50 }
51
option_total_len(__u8 flags)52 static __u8 option_total_len(__u8 flags)
53 {
54 __u8 i, len = 1; /* +1 for flags */
55
56 if (!flags)
57 return 0;
58
59 /* RESEND bit does not use a byte */
60 for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
61 len += !!TEST_OPTION_FLAGS(flags, i);
62
63 if (test_kind == TCPOPT_EXP)
64 return len + TCP_BPF_EXPOPT_BASE_LEN;
65 else
66 return len + 2; /* +1 kind, +1 kind-len */
67 }
68
write_test_option(const struct bpf_test_option * test_opt,__u8 * data)69 static void write_test_option(const struct bpf_test_option *test_opt,
70 __u8 *data)
71 {
72 __u8 offset = 0;
73
74 data[offset++] = test_opt->flags;
75 if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
76 data[offset++] = test_opt->max_delack_ms;
77
78 if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
79 data[offset++] = test_opt->rand;
80 }
81
store_option(struct bpf_sock_ops * skops,const struct bpf_test_option * test_opt)82 static int store_option(struct bpf_sock_ops *skops,
83 const struct bpf_test_option *test_opt)
84 {
85 union {
86 struct tcp_exprm_opt exprm;
87 struct tcp_opt regular;
88 } write_opt;
89 int err;
90
91 if (test_kind == TCPOPT_EXP) {
92 write_opt.exprm.kind = TCPOPT_EXP;
93 write_opt.exprm.len = option_total_len(test_opt->flags);
94 write_opt.exprm.magic = __bpf_htons(test_magic);
95 write_opt.exprm.data32 = 0;
96 write_test_option(test_opt, write_opt.exprm.data);
97 err = bpf_store_hdr_opt(skops, &write_opt.exprm,
98 sizeof(write_opt.exprm), 0);
99 } else {
100 write_opt.regular.kind = test_kind;
101 write_opt.regular.len = option_total_len(test_opt->flags);
102 write_opt.regular.data32 = 0;
103 write_test_option(test_opt, write_opt.regular.data);
104 err = bpf_store_hdr_opt(skops, &write_opt.regular,
105 sizeof(write_opt.regular), 0);
106 }
107
108 if (err)
109 RET_CG_ERR(err);
110
111 return CG_OK;
112 }
113
parse_test_option(struct bpf_test_option * opt,const __u8 * start)114 static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
115 {
116 opt->flags = *start++;
117
118 if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
119 opt->max_delack_ms = *start++;
120
121 if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
122 opt->rand = *start++;
123
124 return 0;
125 }
126
load_option(struct bpf_sock_ops * skops,struct bpf_test_option * test_opt,bool from_syn)127 static int load_option(struct bpf_sock_ops *skops,
128 struct bpf_test_option *test_opt, bool from_syn)
129 {
130 union {
131 struct tcp_exprm_opt exprm;
132 struct tcp_opt regular;
133 } search_opt;
134 int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
135
136 if (test_kind == TCPOPT_EXP) {
137 search_opt.exprm.kind = TCPOPT_EXP;
138 search_opt.exprm.len = 4;
139 search_opt.exprm.magic = __bpf_htons(test_magic);
140 search_opt.exprm.data32 = 0;
141 ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
142 sizeof(search_opt.exprm), load_flags);
143 if (ret < 0)
144 return ret;
145 return parse_test_option(test_opt, search_opt.exprm.data);
146 } else {
147 search_opt.regular.kind = test_kind;
148 search_opt.regular.len = 0;
149 search_opt.regular.data32 = 0;
150 ret = bpf_load_hdr_opt(skops, &search_opt.regular,
151 sizeof(search_opt.regular), load_flags);
152 if (ret < 0)
153 return ret;
154 return parse_test_option(test_opt, search_opt.regular.data);
155 }
156 }
157
synack_opt_len(struct bpf_sock_ops * skops)158 static int synack_opt_len(struct bpf_sock_ops *skops)
159 {
160 struct bpf_test_option test_opt = {};
161 __u8 optlen;
162 int err;
163
164 if (!passive_synack_out.flags)
165 return CG_OK;
166
167 err = load_option(skops, &test_opt, true);
168
169 /* bpf_test_option is not found */
170 if (err == -ENOMSG)
171 return CG_OK;
172
173 if (err)
174 RET_CG_ERR(err);
175
176 optlen = option_total_len(passive_synack_out.flags);
177 if (optlen) {
178 err = bpf_reserve_hdr_opt(skops, optlen, 0);
179 if (err)
180 RET_CG_ERR(err);
181 }
182
183 return CG_OK;
184 }
185
write_synack_opt(struct bpf_sock_ops * skops)186 static int write_synack_opt(struct bpf_sock_ops *skops)
187 {
188 struct bpf_test_option opt;
189
190 if (!passive_synack_out.flags)
191 /* We should not even be called since no header
192 * space has been reserved.
193 */
194 RET_CG_ERR(0);
195
196 opt = passive_synack_out;
197 if (skops_want_cookie(skops))
198 SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
199
200 return store_option(skops, &opt);
201 }
202
syn_opt_len(struct bpf_sock_ops * skops)203 static int syn_opt_len(struct bpf_sock_ops *skops)
204 {
205 __u8 optlen;
206 int err;
207
208 if (!active_syn_out.flags)
209 return CG_OK;
210
211 optlen = option_total_len(active_syn_out.flags);
212 if (optlen) {
213 err = bpf_reserve_hdr_opt(skops, optlen, 0);
214 if (err)
215 RET_CG_ERR(err);
216 }
217
218 return CG_OK;
219 }
220
write_syn_opt(struct bpf_sock_ops * skops)221 static int write_syn_opt(struct bpf_sock_ops *skops)
222 {
223 if (!active_syn_out.flags)
224 RET_CG_ERR(0);
225
226 return store_option(skops, &active_syn_out);
227 }
228
fin_opt_len(struct bpf_sock_ops * skops)229 static int fin_opt_len(struct bpf_sock_ops *skops)
230 {
231 struct bpf_test_option *opt;
232 struct hdr_stg *hdr_stg;
233 __u8 optlen;
234 int err;
235
236 if (!skops->sk)
237 RET_CG_ERR(0);
238
239 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
240 if (!hdr_stg)
241 RET_CG_ERR(0);
242
243 if (hdr_stg->active)
244 opt = &active_fin_out;
245 else
246 opt = &passive_fin_out;
247
248 optlen = option_total_len(opt->flags);
249 if (optlen) {
250 err = bpf_reserve_hdr_opt(skops, optlen, 0);
251 if (err)
252 RET_CG_ERR(err);
253 }
254
255 return CG_OK;
256 }
257
write_fin_opt(struct bpf_sock_ops * skops)258 static int write_fin_opt(struct bpf_sock_ops *skops)
259 {
260 struct bpf_test_option *opt;
261 struct hdr_stg *hdr_stg;
262
263 if (!skops->sk)
264 RET_CG_ERR(0);
265
266 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
267 if (!hdr_stg)
268 RET_CG_ERR(0);
269
270 if (hdr_stg->active)
271 opt = &active_fin_out;
272 else
273 opt = &passive_fin_out;
274
275 if (!opt->flags)
276 RET_CG_ERR(0);
277
278 return store_option(skops, opt);
279 }
280
resend_in_ack(struct bpf_sock_ops * skops)281 static int resend_in_ack(struct bpf_sock_ops *skops)
282 {
283 struct hdr_stg *hdr_stg;
284
285 if (!skops->sk)
286 return -1;
287
288 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
289 if (!hdr_stg)
290 return -1;
291
292 return !!hdr_stg->resend_syn;
293 }
294
nodata_opt_len(struct bpf_sock_ops * skops)295 static int nodata_opt_len(struct bpf_sock_ops *skops)
296 {
297 int resend;
298
299 resend = resend_in_ack(skops);
300 if (resend < 0)
301 RET_CG_ERR(0);
302
303 if (resend)
304 return syn_opt_len(skops);
305
306 return CG_OK;
307 }
308
write_nodata_opt(struct bpf_sock_ops * skops)309 static int write_nodata_opt(struct bpf_sock_ops *skops)
310 {
311 int resend;
312
313 resend = resend_in_ack(skops);
314 if (resend < 0)
315 RET_CG_ERR(0);
316
317 if (resend)
318 return write_syn_opt(skops);
319
320 return CG_OK;
321 }
322
data_opt_len(struct bpf_sock_ops * skops)323 static int data_opt_len(struct bpf_sock_ops *skops)
324 {
325 /* Same as the nodata version. Mostly to show
326 * an example usage on skops->skb_len.
327 */
328 return nodata_opt_len(skops);
329 }
330
write_data_opt(struct bpf_sock_ops * skops)331 static int write_data_opt(struct bpf_sock_ops *skops)
332 {
333 return write_nodata_opt(skops);
334 }
335
current_mss_opt_len(struct bpf_sock_ops * skops)336 static int current_mss_opt_len(struct bpf_sock_ops *skops)
337 {
338 /* Reserve maximum that may be needed */
339 int err;
340
341 err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
342 if (err)
343 RET_CG_ERR(err);
344
345 return CG_OK;
346 }
347
handle_hdr_opt_len(struct bpf_sock_ops * skops)348 static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
349 {
350 __u8 tcp_flags = skops_tcp_flags(skops);
351
352 if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
353 return synack_opt_len(skops);
354
355 if (tcp_flags & TCPHDR_SYN)
356 return syn_opt_len(skops);
357
358 if (tcp_flags & TCPHDR_FIN)
359 return fin_opt_len(skops);
360
361 if (skops_current_mss(skops))
362 /* The kernel is calculating the MSS */
363 return current_mss_opt_len(skops);
364
365 if (skops->skb_len)
366 return data_opt_len(skops);
367
368 return nodata_opt_len(skops);
369 }
370
handle_write_hdr_opt(struct bpf_sock_ops * skops)371 static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
372 {
373 __u8 tcp_flags = skops_tcp_flags(skops);
374 struct tcphdr *th;
375
376 if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
377 return write_synack_opt(skops);
378
379 if (tcp_flags & TCPHDR_SYN)
380 return write_syn_opt(skops);
381
382 if (tcp_flags & TCPHDR_FIN)
383 return write_fin_opt(skops);
384
385 th = skops->skb_data;
386 if (th + 1 > skops->skb_data_end)
387 RET_CG_ERR(0);
388
389 if (skops->skb_len > tcp_hdrlen(th))
390 return write_data_opt(skops);
391
392 return write_nodata_opt(skops);
393 }
394
set_delack_max(struct bpf_sock_ops * skops,__u8 max_delack_ms)395 static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
396 {
397 __u32 max_delack_us = max_delack_ms * 1000;
398
399 return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
400 &max_delack_us, sizeof(max_delack_us));
401 }
402
set_rto_min(struct bpf_sock_ops * skops,__u8 peer_max_delack_ms)403 static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
404 {
405 __u32 min_rto_us = peer_max_delack_ms * 1000;
406
407 return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
408 sizeof(min_rto_us));
409 }
410
handle_active_estab(struct bpf_sock_ops * skops)411 static int handle_active_estab(struct bpf_sock_ops *skops)
412 {
413 struct hdr_stg init_stg = {
414 .active = true,
415 };
416 int err;
417
418 err = load_option(skops, &active_estab_in, false);
419 if (err && err != -ENOMSG)
420 RET_CG_ERR(err);
421
422 init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
423 OPTION_RESEND);
424 if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
425 &init_stg,
426 BPF_SK_STORAGE_GET_F_CREATE))
427 RET_CG_ERR(0);
428
429 if (init_stg.resend_syn)
430 /* Don't clear the write_hdr cb now because
431 * the ACK may get lost and retransmit may
432 * be needed.
433 *
434 * PARSE_ALL_HDR cb flag is set to learn if this
435 * resend_syn option has received by the peer.
436 *
437 * The header option will be resent until a valid
438 * packet is received at handle_parse_hdr()
439 * and all hdr cb flags will be cleared in
440 * handle_parse_hdr().
441 */
442 set_parse_all_hdr_cb_flags(skops);
443 else if (!active_fin_out.flags)
444 /* No options will be written from now */
445 clear_hdr_cb_flags(skops);
446
447 if (active_syn_out.max_delack_ms) {
448 err = set_delack_max(skops, active_syn_out.max_delack_ms);
449 if (err)
450 RET_CG_ERR(err);
451 }
452
453 if (active_estab_in.max_delack_ms) {
454 err = set_rto_min(skops, active_estab_in.max_delack_ms);
455 if (err)
456 RET_CG_ERR(err);
457 }
458
459 return CG_OK;
460 }
461
handle_passive_estab(struct bpf_sock_ops * skops)462 static int handle_passive_estab(struct bpf_sock_ops *skops)
463 {
464 struct hdr_stg init_stg = {};
465 struct tcphdr *th;
466 int err;
467
468 inherit_cb_flags = skops->bpf_sock_ops_cb_flags;
469
470 err = load_option(skops, &passive_estab_in, true);
471 if (err == -ENOENT) {
472 /* saved_syn is not found. It was in syncookie mode.
473 * We have asked the active side to resend the options
474 * in ACK, so try to find the bpf_test_option from ACK now.
475 */
476 err = load_option(skops, &passive_estab_in, false);
477 init_stg.syncookie = true;
478 }
479
480 /* ENOMSG: The bpf_test_option is not found which is fine.
481 * Bail out now for all other errors.
482 */
483 if (err && err != -ENOMSG)
484 RET_CG_ERR(err);
485
486 th = skops->skb_data;
487 if (th + 1 > skops->skb_data_end)
488 RET_CG_ERR(0);
489
490 if (th->syn) {
491 /* Fastopen */
492
493 /* Cannot clear cb_flags to stop write_hdr cb.
494 * synack is not sent yet for fast open.
495 * Even it was, the synack may need to be retransmitted.
496 *
497 * PARSE_ALL_HDR cb flag is set to learn
498 * if synack has reached the peer.
499 * All cb_flags will be cleared in handle_parse_hdr().
500 */
501 set_parse_all_hdr_cb_flags(skops);
502 init_stg.fastopen = true;
503 } else if (!passive_fin_out.flags) {
504 /* No options will be written from now */
505 clear_hdr_cb_flags(skops);
506 }
507
508 if (!skops->sk ||
509 !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
510 BPF_SK_STORAGE_GET_F_CREATE))
511 RET_CG_ERR(0);
512
513 if (passive_synack_out.max_delack_ms) {
514 err = set_delack_max(skops, passive_synack_out.max_delack_ms);
515 if (err)
516 RET_CG_ERR(err);
517 }
518
519 if (passive_estab_in.max_delack_ms) {
520 err = set_rto_min(skops, passive_estab_in.max_delack_ms);
521 if (err)
522 RET_CG_ERR(err);
523 }
524
525 return CG_OK;
526 }
527
handle_parse_hdr(struct bpf_sock_ops * skops)528 static int handle_parse_hdr(struct bpf_sock_ops *skops)
529 {
530 struct hdr_stg *hdr_stg;
531 struct tcphdr *th;
532
533 if (!skops->sk)
534 RET_CG_ERR(0);
535
536 th = skops->skb_data;
537 if (th + 1 > skops->skb_data_end)
538 RET_CG_ERR(0);
539
540 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
541 if (!hdr_stg)
542 RET_CG_ERR(0);
543
544 if (hdr_stg->resend_syn || hdr_stg->fastopen)
545 /* The PARSE_ALL_HDR cb flag was turned on
546 * to ensure that the previously written
547 * options have reached the peer.
548 * Those previously written option includes:
549 * - Active side: resend_syn in ACK during syncookie
550 * or
551 * - Passive side: SYNACK during fastopen
552 *
553 * A valid packet has been received here after
554 * the 3WHS, so the PARSE_ALL_HDR cb flag
555 * can be cleared now.
556 */
557 clear_parse_all_hdr_cb_flags(skops);
558
559 if (hdr_stg->resend_syn && !active_fin_out.flags)
560 /* Active side resent the syn option in ACK
561 * because the server was in syncookie mode.
562 * A valid packet has been received, so
563 * clear header cb flags if there is no
564 * more option to send.
565 */
566 clear_hdr_cb_flags(skops);
567
568 if (hdr_stg->fastopen && !passive_fin_out.flags)
569 /* Passive side was in fastopen.
570 * A valid packet has been received, so
571 * the SYNACK has reached the peer.
572 * Clear header cb flags if there is no more
573 * option to send.
574 */
575 clear_hdr_cb_flags(skops);
576
577 if (th->fin) {
578 struct bpf_test_option *fin_opt;
579 int err;
580
581 if (hdr_stg->active)
582 fin_opt = &active_fin_in;
583 else
584 fin_opt = &passive_fin_in;
585
586 err = load_option(skops, fin_opt, false);
587 if (err && err != -ENOMSG)
588 RET_CG_ERR(err);
589 }
590
591 return CG_OK;
592 }
593
594 SEC("sockops")
estab(struct bpf_sock_ops * skops)595 int estab(struct bpf_sock_ops *skops)
596 {
597 int true_val = 1;
598
599 switch (skops->op) {
600 case BPF_SOCK_OPS_TCP_LISTEN_CB:
601 bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
602 &true_val, sizeof(true_val));
603 set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
604 break;
605 case BPF_SOCK_OPS_TCP_CONNECT_CB:
606 set_hdr_cb_flags(skops, 0);
607 break;
608 case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
609 return handle_parse_hdr(skops);
610 case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
611 return handle_hdr_opt_len(skops);
612 case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
613 return handle_write_hdr_opt(skops);
614 case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
615 return handle_passive_estab(skops);
616 case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
617 return handle_active_estab(skops);
618 }
619
620 return CG_OK;
621 }
622
623 char _license[] SEC("license") = "GPL";
624