1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/tcp.h> 4 #include <linux/rcupdate.h> 5 #include <net/tcp.h> 6 #include <net/busy_poll.h> 7 8 void tcp_fastopen_init_key_once(struct net *net) 9 { 10 u8 key[TCP_FASTOPEN_KEY_LENGTH]; 11 struct tcp_fastopen_context *ctxt; 12 13 rcu_read_lock(); 14 ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); 15 if (ctxt) { 16 rcu_read_unlock(); 17 return; 18 } 19 rcu_read_unlock(); 20 21 /* tcp_fastopen_reset_cipher publishes the new context 22 * atomically, so we allow this race happening here. 23 * 24 * All call sites of tcp_fastopen_cookie_gen also check 25 * for a valid cookie, so this is an acceptable risk. 26 */ 27 get_random_bytes(key, sizeof(key)); 28 tcp_fastopen_reset_cipher(net, NULL, key, NULL); 29 } 30 31 static void tcp_fastopen_ctx_free(struct rcu_head *head) 32 { 33 struct tcp_fastopen_context *ctx = 34 container_of(head, struct tcp_fastopen_context, rcu); 35 36 kfree_sensitive(ctx); 37 } 38 39 void tcp_fastopen_destroy_cipher(struct sock *sk) 40 { 41 struct tcp_fastopen_context *ctx; 42 43 ctx = rcu_dereference_protected( 44 inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1); 45 if (ctx) 46 call_rcu(&ctx->rcu, tcp_fastopen_ctx_free); 47 } 48 49 void tcp_fastopen_ctx_destroy(struct net *net) 50 { 51 struct tcp_fastopen_context *ctxt; 52 53 ctxt = unrcu_pointer(xchg(&net->ipv4.tcp_fastopen_ctx, NULL)); 54 55 if (ctxt) 56 call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); 57 } 58 59 int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, 60 void *primary_key, void *backup_key) 61 { 62 struct tcp_fastopen_context *ctx, *octx; 63 struct fastopen_queue *q; 64 int err = 0; 65 66 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 67 if (!ctx) { 68 err = -ENOMEM; 69 goto out; 70 } 71 72 ctx->key[0].key[0] = get_unaligned_le64(primary_key); 73 ctx->key[0].key[1] = get_unaligned_le64(primary_key + 8); 74 if (backup_key) { 75 ctx->key[1].key[0] = get_unaligned_le64(backup_key); 76 ctx->key[1].key[1] = get_unaligned_le64(backup_key + 8); 77 ctx->num = 2; 78 } else { 79 ctx->num = 1; 80 } 81 82 if (sk) { 83 q = &inet_csk(sk)->icsk_accept_queue.fastopenq; 84 octx = unrcu_pointer(xchg(&q->ctx, RCU_INITIALIZER(ctx))); 85 } else { 86 octx = unrcu_pointer(xchg(&net->ipv4.tcp_fastopen_ctx, 87 RCU_INITIALIZER(ctx))); 88 } 89 90 if (octx) 91 call_rcu(&octx->rcu, tcp_fastopen_ctx_free); 92 out: 93 return err; 94 } 95 96 int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, 97 u64 *key) 98 { 99 struct tcp_fastopen_context *ctx; 100 int n_keys = 0, i; 101 102 rcu_read_lock(); 103 if (icsk) 104 ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); 105 else 106 ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); 107 if (ctx) { 108 n_keys = tcp_fastopen_context_len(ctx); 109 for (i = 0; i < n_keys; i++) { 110 put_unaligned_le64(ctx->key[i].key[0], key + (i * 2)); 111 put_unaligned_le64(ctx->key[i].key[1], key + (i * 2) + 1); 112 } 113 } 114 rcu_read_unlock(); 115 116 return n_keys; 117 } 118 119 static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, 120 struct sk_buff *syn, 121 const siphash_key_t *key, 122 struct tcp_fastopen_cookie *foc) 123 { 124 BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64)); 125 126 if (req->rsk_ops->family == AF_INET) { 127 const struct iphdr *iph = ip_hdr(syn); 128 129 foc->val[0] = cpu_to_le64(siphash(&iph->saddr, 130 sizeof(iph->saddr) + 131 sizeof(iph->daddr), 132 key)); 133 foc->len = TCP_FASTOPEN_COOKIE_SIZE; 134 return true; 135 } 136 #if IS_ENABLED(CONFIG_IPV6) 137 if (req->rsk_ops->family == AF_INET6) { 138 const struct ipv6hdr *ip6h = ipv6_hdr(syn); 139 140 foc->val[0] = cpu_to_le64(siphash(&ip6h->saddr, 141 sizeof(ip6h->saddr) + 142 sizeof(ip6h->daddr), 143 key)); 144 foc->len = TCP_FASTOPEN_COOKIE_SIZE; 145 return true; 146 } 147 #endif 148 return false; 149 } 150 151 /* Generate the fastopen cookie by applying SipHash to both the source and 152 * destination addresses. 153 */ 154 static void tcp_fastopen_cookie_gen(struct sock *sk, 155 struct request_sock *req, 156 struct sk_buff *syn, 157 struct tcp_fastopen_cookie *foc) 158 { 159 struct tcp_fastopen_context *ctx; 160 161 rcu_read_lock(); 162 ctx = tcp_fastopen_get_ctx(sk); 163 if (ctx) 164 __tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[0], foc); 165 rcu_read_unlock(); 166 } 167 168 /* If an incoming SYN or SYNACK frame contains a payload and/or FIN, 169 * queue this additional data / FIN. 170 */ 171 void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) 172 { 173 struct tcp_sock *tp = tcp_sk(sk); 174 175 if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt) 176 return; 177 178 skb = skb_clone(skb, GFP_ATOMIC); 179 if (!skb) 180 return; 181 182 tcp_cleanup_skb(skb); 183 /* segs_in has been initialized to 1 in tcp_create_openreq_child(). 184 * Hence, reset segs_in to 0 before calling tcp_segs_in() 185 * to avoid double counting. Also, tcp_segs_in() expects 186 * skb->len to include the tcp_hdrlen. Hence, it should 187 * be called before __skb_pull(). 188 */ 189 tp->segs_in = 0; 190 tcp_segs_in(tp, skb); 191 __skb_pull(skb, tcp_hdrlen(skb)); 192 sk_forced_mem_schedule(sk, skb->truesize); 193 skb_set_owner_r(skb, sk); 194 195 TCP_SKB_CB(skb)->seq++; 196 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; 197 198 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 199 tcp_add_receive_queue(sk, skb); 200 tp->syn_data_acked = 1; 201 202 /* u64_stats_update_begin(&tp->syncp) not needed here, 203 * as we certainly are not changing upper 32bit value (0) 204 */ 205 tp->bytes_received = skb->len; 206 207 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 208 tcp_fin(sk); 209 } 210 211 /* returns 0 - no key match, 1 for primary, 2 for backup */ 212 static int tcp_fastopen_cookie_gen_check(struct sock *sk, 213 struct request_sock *req, 214 struct sk_buff *syn, 215 struct tcp_fastopen_cookie *orig, 216 struct tcp_fastopen_cookie *valid_foc) 217 { 218 struct tcp_fastopen_cookie search_foc = { .len = -1 }; 219 struct tcp_fastopen_cookie *foc = valid_foc; 220 struct tcp_fastopen_context *ctx; 221 int i, ret = 0; 222 223 rcu_read_lock(); 224 ctx = tcp_fastopen_get_ctx(sk); 225 if (!ctx) 226 goto out; 227 for (i = 0; i < tcp_fastopen_context_len(ctx); i++) { 228 __tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[i], foc); 229 if (tcp_fastopen_cookie_match(foc, orig)) { 230 ret = i + 1; 231 goto out; 232 } 233 foc = &search_foc; 234 } 235 out: 236 rcu_read_unlock(); 237 return ret; 238 } 239 240 static struct sock *tcp_fastopen_create_child(struct sock *sk, 241 struct sk_buff *skb, 242 struct request_sock *req) 243 { 244 struct tcp_sock *tp; 245 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 246 struct sock *child; 247 bool own_req; 248 249 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, 250 NULL, &own_req); 251 if (!child) 252 return NULL; 253 254 spin_lock(&queue->fastopenq.lock); 255 queue->fastopenq.qlen++; 256 spin_unlock(&queue->fastopenq.lock); 257 258 /* Initialize the child socket. Have to fix some values to take 259 * into account the child is a Fast Open socket and is created 260 * only out of the bits carried in the SYN packet. 261 */ 262 tp = tcp_sk(child); 263 264 rcu_assign_pointer(tp->fastopen_rsk, req); 265 tcp_rsk(req)->tfo_listener = true; 266 267 /* RFC1323: The window in SYN & SYN/ACK segments is never 268 * scaled. So correct it appropriately. 269 */ 270 tp->snd_wnd = ntohs(tcp_hdr(skb)->window); 271 tp->max_window = tp->snd_wnd; 272 273 /* Activate the retrans timer so that SYNACK can be retransmitted. 274 * The request socket is not added to the ehash 275 * because it's been added to the accept queue directly. 276 */ 277 req->timeout = tcp_timeout_init(child); 278 tcp_reset_xmit_timer(child, ICSK_TIME_RETRANS, 279 req->timeout, false); 280 281 refcount_set(&req->rsk_refcnt, 2); 282 283 sk_mark_napi_id_set(child, skb); 284 285 /* Now finish processing the fastopen child socket. */ 286 tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb); 287 288 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; 289 290 tcp_fastopen_add_skb(child, skb); 291 292 tcp_rsk(req)->rcv_nxt = tp->rcv_nxt; 293 tp->rcv_wup = tp->rcv_nxt; 294 /* tcp_conn_request() is sending the SYNACK, 295 * and queues the child into listener accept queue. 296 */ 297 return child; 298 } 299 300 static bool tcp_fastopen_queue_check(struct sock *sk) 301 { 302 struct fastopen_queue *fastopenq; 303 int max_qlen; 304 305 /* Make sure the listener has enabled fastopen, and we don't 306 * exceed the max # of pending TFO requests allowed before trying 307 * to validating the cookie in order to avoid burning CPU cycles 308 * unnecessarily. 309 * 310 * XXX (TFO) - The implication of checking the max_qlen before 311 * processing a cookie request is that clients can't differentiate 312 * between qlen overflow causing Fast Open to be disabled 313 * temporarily vs a server not supporting Fast Open at all. 314 */ 315 fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq; 316 max_qlen = READ_ONCE(fastopenq->max_qlen); 317 if (max_qlen == 0) 318 return false; 319 320 if (fastopenq->qlen >= max_qlen) { 321 struct request_sock *req1; 322 spin_lock(&fastopenq->lock); 323 req1 = fastopenq->rskq_rst_head; 324 if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) { 325 __NET_INC_STATS(sock_net(sk), 326 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); 327 spin_unlock(&fastopenq->lock); 328 return false; 329 } 330 fastopenq->rskq_rst_head = req1->dl_next; 331 fastopenq->qlen--; 332 spin_unlock(&fastopenq->lock); 333 reqsk_put(req1); 334 } 335 return true; 336 } 337 338 static bool tcp_fastopen_no_cookie(const struct sock *sk, 339 const struct dst_entry *dst, 340 int flag) 341 { 342 return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) || 343 tcp_sk(sk)->fastopen_no_cookie || 344 (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); 345 } 346 347 /* Returns true if we should perform Fast Open on the SYN. The cookie (foc) 348 * may be updated and return the client in the SYN-ACK later. E.g., Fast Open 349 * cookie request (foc->len == 0). 350 */ 351 struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, 352 struct request_sock *req, 353 struct tcp_fastopen_cookie *foc, 354 const struct dst_entry *dst) 355 { 356 bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; 357 int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); 358 struct tcp_fastopen_cookie valid_foc = { .len = -1 }; 359 struct sock *child; 360 int ret = 0; 361 362 if (foc->len == 0) /* Client requests a cookie */ 363 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); 364 365 if (!((tcp_fastopen & TFO_SERVER_ENABLE) && 366 (syn_data || foc->len >= 0) && 367 tcp_fastopen_queue_check(sk))) { 368 foc->len = -1; 369 return NULL; 370 } 371 372 if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) 373 goto fastopen; 374 375 if (foc->len == 0) { 376 /* Client requests a cookie. */ 377 tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc); 378 } else if (foc->len > 0) { 379 ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc, 380 &valid_foc); 381 if (!ret) { 382 NET_INC_STATS(sock_net(sk), 383 LINUX_MIB_TCPFASTOPENPASSIVEFAIL); 384 } else { 385 /* Cookie is valid. Create a (full) child socket to 386 * accept the data in SYN before returning a SYN-ACK to 387 * ack the data. If we fail to create the socket, fall 388 * back and ack the ISN only but includes the same 389 * cookie. 390 * 391 * Note: Data-less SYN with valid cookie is allowed to 392 * send data in SYN_RECV state. 393 */ 394 fastopen: 395 child = tcp_fastopen_create_child(sk, skb, req); 396 if (child) { 397 if (ret == 2) { 398 valid_foc.exp = foc->exp; 399 *foc = valid_foc; 400 NET_INC_STATS(sock_net(sk), 401 LINUX_MIB_TCPFASTOPENPASSIVEALTKEY); 402 } else { 403 foc->len = -1; 404 } 405 NET_INC_STATS(sock_net(sk), 406 LINUX_MIB_TCPFASTOPENPASSIVE); 407 tcp_sk(child)->syn_fastopen_child = 1; 408 return child; 409 } 410 NET_INC_STATS(sock_net(sk), 411 LINUX_MIB_TCPFASTOPENPASSIVEFAIL); 412 } 413 } 414 valid_foc.exp = foc->exp; 415 *foc = valid_foc; 416 return NULL; 417 } 418 419 bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, 420 struct tcp_fastopen_cookie *cookie) 421 { 422 const struct dst_entry *dst; 423 424 tcp_fastopen_cache_get(sk, mss, cookie); 425 426 /* Firewall blackhole issue check */ 427 if (tcp_fastopen_active_should_disable(sk)) { 428 cookie->len = -1; 429 return false; 430 } 431 432 dst = __sk_dst_get(sk); 433 434 if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) { 435 cookie->len = -1; 436 return true; 437 } 438 if (cookie->len > 0) 439 return true; 440 tcp_sk(sk)->fastopen_client_fail = TFO_COOKIE_UNAVAILABLE; 441 return false; 442 } 443 444 /* This function checks if we want to defer sending SYN until the first 445 * write(). We defer under the following conditions: 446 * 1. fastopen_connect sockopt is set 447 * 2. we have a valid cookie 448 * Return value: return true if we want to defer until application writes data 449 * return false if we want to send out SYN immediately 450 */ 451 bool tcp_fastopen_defer_connect(struct sock *sk, int *err) 452 { 453 struct tcp_fastopen_cookie cookie = { .len = 0 }; 454 struct tcp_sock *tp = tcp_sk(sk); 455 u16 mss; 456 457 if (tp->fastopen_connect && !tp->fastopen_req) { 458 if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) { 459 inet_set_bit(DEFER_CONNECT, sk); 460 return true; 461 } 462 463 /* Alloc fastopen_req in order for FO option to be included 464 * in SYN 465 */ 466 tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), 467 sk->sk_allocation); 468 if (tp->fastopen_req) 469 tp->fastopen_req->cookie = cookie; 470 else 471 *err = -ENOBUFS; 472 } 473 return false; 474 } 475 EXPORT_IPV6_MOD(tcp_fastopen_defer_connect); 476 477 /* 478 * The following code block is to deal with middle box issues with TFO: 479 * Middlebox firewall issues can potentially cause server's data being 480 * blackholed after a successful 3WHS using TFO. 481 * The proposed solution is to disable active TFO globally under the 482 * following circumstances: 483 * 1. client side TFO socket receives out of order FIN 484 * 2. client side TFO socket receives out of order RST 485 * 3. client side TFO socket has timed out three times consecutively during 486 * or after handshake 487 * We disable active side TFO globally for 1hr at first. Then if it 488 * happens again, we disable it for 2h, then 4h, 8h, ... 489 * And we reset the timeout back to 1hr when we see a successful active 490 * TFO connection with data exchanges. 491 */ 492 493 /* Disable active TFO and record current jiffies and 494 * tfo_active_disable_times 495 */ 496 void tcp_fastopen_active_disable(struct sock *sk) 497 { 498 struct net *net = sock_net(sk); 499 500 if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)) 501 return; 502 503 /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ 504 WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies); 505 506 /* Paired with smp_rmb() in tcp_fastopen_active_should_disable(). 507 * We want net->ipv4.tfo_active_disable_stamp to be updated first. 508 */ 509 smp_mb__before_atomic(); 510 atomic_inc(&net->ipv4.tfo_active_disable_times); 511 512 NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); 513 } 514 515 /* Calculate timeout for tfo active disable 516 * Return true if we are still in the active TFO disable period 517 * Return false if timeout already expired and we should use active TFO 518 */ 519 bool tcp_fastopen_active_should_disable(struct sock *sk) 520 { 521 unsigned int tfo_bh_timeout = 522 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout); 523 unsigned long timeout; 524 int tfo_da_times; 525 int multiplier; 526 527 if (!tfo_bh_timeout) 528 return false; 529 530 tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); 531 if (!tfo_da_times) 532 return false; 533 534 /* Paired with smp_mb__before_atomic() in tcp_fastopen_active_disable() */ 535 smp_rmb(); 536 537 /* Limit timeout to max: 2^6 * initial timeout */ 538 multiplier = 1 << min(tfo_da_times - 1, 6); 539 540 /* Paired with the WRITE_ONCE() in tcp_fastopen_active_disable(). */ 541 timeout = READ_ONCE(sock_net(sk)->ipv4.tfo_active_disable_stamp) + 542 multiplier * tfo_bh_timeout * HZ; 543 if (time_before(jiffies, timeout)) 544 return true; 545 546 /* Mark check bit so we can check for successful active TFO 547 * condition and reset tfo_active_disable_times 548 */ 549 tcp_sk(sk)->syn_fastopen_ch = 1; 550 return false; 551 } 552 553 /* Disable active TFO if FIN is the only packet in the ofo queue 554 * and no data is received. 555 * Also check if we can reset tfo_active_disable_times if data is 556 * received successfully on a marked active TFO sockets opened on 557 * a non-loopback interface 558 */ 559 void tcp_fastopen_active_disable_ofo_check(struct sock *sk) 560 { 561 struct tcp_sock *tp = tcp_sk(sk); 562 struct dst_entry *dst; 563 struct sk_buff *skb; 564 565 if (!tp->syn_fastopen) 566 return; 567 568 if (!tp->data_segs_in) { 569 skb = skb_rb_first(&tp->out_of_order_queue); 570 if (skb && !skb_rb_next(skb)) { 571 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { 572 tcp_fastopen_active_disable(sk); 573 return; 574 } 575 } 576 } else if (tp->syn_fastopen_ch && 577 atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) { 578 dst = sk_dst_get(sk); 579 if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) 580 atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0); 581 dst_release(dst); 582 } 583 } 584 585 void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired) 586 { 587 u32 timeouts = inet_csk(sk)->icsk_retransmits; 588 struct tcp_sock *tp = tcp_sk(sk); 589 590 /* Broken middle-boxes may black-hole Fast Open connection during or 591 * even after the handshake. Be extremely conservative and pause 592 * Fast Open globally after hitting the third consecutive timeout or 593 * exceeding the configured timeout limit. 594 */ 595 if ((tp->syn_fastopen || tp->syn_data || tp->syn_data_acked) && 596 (timeouts == 2 || (timeouts < 2 && expired))) { 597 tcp_fastopen_active_disable(sk); 598 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); 599 } 600 } 601