1 /* 2 * Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the Apache License 2.0 (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10 #include "internal/quic_ackm.h" 11 #include "internal/uint_set.h" 12 #include "internal/common.h" 13 #include <assert.h> 14 15 DEFINE_LIST_OF(tx_history, OSSL_ACKM_TX_PKT); 16 17 /* 18 * TX Packet History 19 * ***************** 20 * 21 * The TX Packet History object tracks information about packets which have been 22 * sent for which we later expect to receive an ACK. It is essentially a simple 23 * database keeping a list of packet information structures in packet number 24 * order which can also be looked up directly by packet number. 25 * 26 * We currently only allow packets to be appended to the list (i.e. the packet 27 * numbers of the packets appended to the list must monotonically increase), as 28 * we should not currently need more general functionality such as a sorted list 29 * insert. 30 */ 31 struct tx_pkt_history_st { 32 /* A linked list of all our packets. */ 33 OSSL_LIST(tx_history) 34 packets; 35 36 /* 37 * Mapping from packet numbers (uint64_t) to (OSSL_ACKM_TX_PKT *) 38 * 39 * Invariant: A packet is in this map if and only if it is in the linked 40 * list. 41 */ 42 LHASH_OF(OSSL_ACKM_TX_PKT) *map; 43 44 /* 45 * The lowest packet number which may currently be added to the history list 46 * (inclusive). We do not allow packet numbers to be added to the history 47 * list non-monotonically, so packet numbers must be greater than or equal 48 * to this value. 49 */ 50 uint64_t watermark; 51 52 /* 53 * Packet number of the highest packet info structure we have yet appended 54 * to the list. This is usually one less than watermark, except when we have 55 * not added any packet yet. 56 */ 57 uint64_t highest_sent; 58 }; 59 60 DEFINE_LHASH_OF_EX(OSSL_ACKM_TX_PKT); 61 62 static unsigned long tx_pkt_info_hash(const OSSL_ACKM_TX_PKT *pkt) 63 { 64 /* Using low bits of the packet number as the hash should be enough */ 65 return (unsigned long)pkt->pkt_num; 66 } 67 68 static int tx_pkt_info_compare(const OSSL_ACKM_TX_PKT *a, 69 const OSSL_ACKM_TX_PKT *b) 70 { 71 if (a->pkt_num < b->pkt_num) 72 return -1; 73 if (a->pkt_num > b->pkt_num) 74 return 1; 75 return 0; 76 } 77 78 static int 79 tx_pkt_history_init(struct tx_pkt_history_st *h) 80 { 81 ossl_list_tx_history_init(&h->packets); 82 h->watermark = 0; 83 h->highest_sent = 0; 84 85 h->map = lh_OSSL_ACKM_TX_PKT_new(tx_pkt_info_hash, tx_pkt_info_compare); 86 if (h->map == NULL) 87 return 0; 88 89 return 1; 90 } 91 92 static void 93 tx_pkt_history_destroy(struct tx_pkt_history_st *h) 94 { 95 lh_OSSL_ACKM_TX_PKT_free(h->map); 96 h->map = NULL; 97 ossl_list_tx_history_init(&h->packets); 98 } 99 100 static int 101 tx_pkt_history_add_actual(struct tx_pkt_history_st *h, 102 OSSL_ACKM_TX_PKT *pkt) 103 { 104 OSSL_ACKM_TX_PKT *existing; 105 106 /* 107 * There should not be any existing packet with this number 108 * in our mapping. 109 */ 110 existing = lh_OSSL_ACKM_TX_PKT_retrieve(h->map, pkt); 111 if (!ossl_assert(existing == NULL)) 112 return 0; 113 114 /* Should not already be in a list. */ 115 if (!ossl_assert(ossl_list_tx_history_next(pkt) == NULL 116 && ossl_list_tx_history_prev(pkt) == NULL)) 117 return 0; 118 119 lh_OSSL_ACKM_TX_PKT_insert(h->map, pkt); 120 121 ossl_list_tx_history_insert_tail(&h->packets, pkt); 122 return 1; 123 } 124 125 /* Adds a packet information structure to the history list. */ 126 static int 127 tx_pkt_history_add(struct tx_pkt_history_st *h, 128 OSSL_ACKM_TX_PKT *pkt) 129 { 130 if (!ossl_assert(pkt->pkt_num >= h->watermark)) 131 return 0; 132 133 if (tx_pkt_history_add_actual(h, pkt) < 1) 134 return 0; 135 136 h->watermark = pkt->pkt_num + 1; 137 h->highest_sent = pkt->pkt_num; 138 return 1; 139 } 140 141 /* Retrieve a packet information structure by packet number. */ 142 static OSSL_ACKM_TX_PKT * 143 tx_pkt_history_by_pkt_num(struct tx_pkt_history_st *h, uint64_t pkt_num) 144 { 145 OSSL_ACKM_TX_PKT key; 146 147 key.pkt_num = pkt_num; 148 149 return lh_OSSL_ACKM_TX_PKT_retrieve(h->map, &key); 150 } 151 152 /* Remove a packet information structure from the history log. */ 153 static int 154 tx_pkt_history_remove(struct tx_pkt_history_st *h, uint64_t pkt_num) 155 { 156 OSSL_ACKM_TX_PKT key, *pkt; 157 key.pkt_num = pkt_num; 158 159 pkt = tx_pkt_history_by_pkt_num(h, pkt_num); 160 if (pkt == NULL) 161 return 0; 162 163 ossl_list_tx_history_remove(&h->packets, pkt); 164 lh_OSSL_ACKM_TX_PKT_delete(h->map, &key); 165 return 1; 166 } 167 168 /* 169 * RX Packet Number Tracking 170 * ************************* 171 * 172 * **Background.** The RX side of the ACK manager must track packets we have 173 * received for which we have to generate ACK frames. Broadly, this means we 174 * store a set of packet numbers which we have received but which we do not know 175 * for a fact that the transmitter knows we have received. 176 * 177 * This must handle various situations: 178 * 179 * 1. We receive a packet but have not sent an ACK yet, so the transmitter 180 * does not know whether we have received it or not yet. 181 * 182 * 2. We receive a packet and send an ACK which is lost. We do not 183 * immediately know that the ACK was lost and the transmitter does not know 184 * that we have received the packet. 185 * 186 * 3. We receive a packet and send an ACK which is received by the 187 * transmitter. The transmitter does not immediately respond with an ACK, 188 * or responds with an ACK which is lost. The transmitter knows that we 189 * have received the packet, but we do not know for sure that it knows, 190 * because the ACK we sent could have been lost. 191 * 192 * 4. We receive a packet and send an ACK which is received by the 193 * transmitter. The transmitter subsequently sends us an ACK which confirms 194 * its receipt of the ACK we sent, and we successfully receive that ACK, so 195 * we know that the transmitter knows, that we received the original 196 * packet. 197 * 198 * Only when we reach case (4) are we relieved of any need to track a given 199 * packet number we have received, because only in this case do we know for sure 200 * that the peer knows we have received the packet. Having reached case (4) we 201 * will never again need to generate an ACK containing the PN in question, but 202 * until we reach that point, we must keep track of the PN as not having been 203 * provably ACKed, as we may have to keep generating ACKs for the given PN not 204 * just until the transmitter receives one, but until we know that it has 205 * received one. This will be referred to herein as "provably ACKed". 206 * 207 * **Duplicate handling.** The above discusses the case where we have received a 208 * packet with a given PN but are at best unsure whether the sender knows we 209 * have received it or not. However, we must also handle the case where we have 210 * yet to receive a packet with a given PN in the first place. The reason for 211 * this is because of the requirement expressed by RFC 9000 s. 12.3: 212 * 213 * "A receiver MUST discard a newly unprotected packet unless it is certain 214 * that it has not processed another packet with the same packet number from 215 * the same packet number space." 216 * 217 * We must ensure we never process a duplicate PN. As such, each possible PN we 218 * can receive must exist in one of the following logical states: 219 * 220 * - We have never processed this PN before 221 * (so if we receive such a PN, it can be processed) 222 * 223 * - We have processed this PN but it has not yet been provably ACKed 224 * (and should therefore be in any future ACK frame generated; 225 * if we receive such a PN again, it must be ignored) 226 * 227 * - We have processed this PN and it has been provably ACKed 228 * (if we receive such a PN again, it must be ignored) 229 * 230 * However, if we were to track this state for every PN ever used in the history 231 * of a connection, the amount of state required would increase unboundedly as 232 * the connection goes on (for example, we would have to store a set of every PN 233 * ever received.) 234 * 235 * RFC 9000 s. 12.3 continues: 236 * 237 * "Endpoints that track all individual packets for the purposes of detecting 238 * duplicates are at risk of accumulating excessive state. The data required 239 * for detecting duplicates can be limited by maintaining a minimum packet 240 * number below which all packets are immediately dropped." 241 * 242 * Moreover, RFC 9000 s. 13.2.3 states that: 243 * 244 * "A receiver MUST retain an ACK Range unless it can ensure that it will not 245 * subsequently accept packets with numbers in that range. Maintaining a 246 * minimum packet number that increases as ranges are discarded is one way to 247 * achieve this with minimal state." 248 * 249 * This touches on a subtlety of the original requirement quoted above: the 250 * receiver MUST discard a packet unless it is certain that it has not processed 251 * another packet with the same PN. However, this does not forbid the receiver 252 * from also discarding some PNs even though it has not yet processed them. In 253 * other words, implementations must be conservative and err in the direction of 254 * assuming a packet is a duplicate, but it is acceptable for this to come at 255 * the cost of falsely identifying some packets as duplicates. 256 * 257 * This allows us to bound the amount of state we must keep, and we adopt the 258 * suggested strategy quoted above to do so. We define a watermark PN below 259 * which all PNs are in the same state. This watermark is only ever increased. 260 * Thus the PNs the state for which needs to be explicitly tracked is limited to 261 * only a small number of recent PNs, and all older PNs have an assumed state. 262 * 263 * Any given PN thus falls into one of the following states: 264 * 265 * - (A) The PN is above the watermark but we have not yet received it. 266 * 267 * If we receive such a PN, we should process it and record the PN as 268 * received. 269 * 270 * - (B) The PN is above the watermark and we have received it. 271 * 272 * The PN should be included in any future ACK frame we generate. 273 * If we receive such a PN again, we should ignore it. 274 * 275 * - (C) The PN is below the watermark. 276 * 277 * We do not know whether a packet with the given PN was received or 278 * not. To be safe, if we receive such a packet, it is not processed. 279 * 280 * Note that state (C) corresponds to both "we have processed this PN and it has 281 * been provably ACKed" logical state and a subset of the PNs in the "we have 282 * never processed this PN before" logical state (namely all PNs which were lost 283 * and never received, but which are not recent enough to be above the 284 * watermark). The reason we can merge these states and avoid tracking states 285 * for the PNs in this state is because the provably ACKed and never-received 286 * states are functionally identical in terms of how we need to handle them: we 287 * don't need to do anything for PNs in either of these states, so we don't have 288 * to care about PNs in this state nor do we have to care about distinguishing 289 * the two states for a given PN. 290 * 291 * Note that under this scheme provably ACKed PNs are by definition always below 292 * the watermark; therefore, it follows that when a PN becomes provably ACKed, 293 * the watermark must be immediately increased to exceed it (otherwise we would 294 * keep reporting it in future ACK frames). 295 * 296 * This is in line with RFC 9000 s. 13.2.4's suggested strategy on when 297 * to advance the watermark: 298 * 299 * "When a packet containing an ACK frame is sent, the Largest Acknowledged 300 * field in that frame can be saved. When a packet containing an ACK frame is 301 * acknowledged, the receiver can stop acknowledging packets less than or 302 * equal to the Largest Acknowledged field in the sent ACK frame." 303 * 304 * This is where our scheme's false positives arise. When a packet containing an 305 * ACK frame is itself ACK'd, PNs referenced in that ACK frame become provably 306 * acked, and the watermark is bumped accordingly. However, the Largest 307 * Acknowledged field does not imply that all lower PNs have been received, 308 * because there may be gaps expressed in the ranges of PNs expressed by that 309 * and previous ACK frames. Thus, some unreceived PNs may be moved below the 310 * watermark, and we may subsequently reject those PNs as possibly being 311 * duplicates even though we have not actually received those PNs. Since we bump 312 * the watermark when a PN becomes provably ACKed, it follows that an unreceived 313 * PN falls below the watermark (and thus becomes a false positive for the 314 * purposes of duplicate detection) when a higher-numbered PN becomes provably 315 * ACKed. 316 * 317 * Thus, when PN n becomes provably acked, any unreceived PNs in the range [0, 318 * n) will no longer be processed. Although datagrams may be reordered in the 319 * network, a PN we receive can only become provably ACKed after our own 320 * subsequently generated ACK frame is sent in a future TX packet, and then we 321 * receive another RX PN acknowledging that TX packet. This means that a given RX 322 * PN can only become provably ACKed at least 1 RTT after it is received; it is 323 * unlikely that any reordered datagrams will still be "in the network" (and not 324 * lost) by this time. If this does occur for whatever reason and a late PN is 325 * received, the packet will be discarded unprocessed and the PN is simply 326 * handled as though lost (a "written off" PN). 327 * 328 * **Data structure.** Our state for the RX handling side of the ACK manager, as 329 * discussed above, mainly comprises: 330 * 331 * a) a logical set of PNs, and 332 * b) a monotonically increasing PN counter (the watermark). 333 * 334 * For (a), we define a data structure which stores a logical set of PNs, which 335 * we use to keep track of which PNs we have received but which have not yet 336 * been provably ACKed, and thus will later need to generate an ACK frame for. 337 * 338 * The correspondence with the logical states discussed above is as follows. A 339 * PN is in state (C) if it is below the watermark; otherwise it is in state (B) 340 * if it is in the logical set of PNs, and in state (A) otherwise. 341 * 342 * Note that PNs are only removed from the PN set (when they become provably 343 * ACKed or written off) by virtue of advancement of the watermark. Removing PNs 344 * from the PN set any other way would be ambiguous as it would be 345 * indistinguishable from a PN we have not yet received and risk us processing a 346 * duplicate packet. In other words, for a given PN: 347 * 348 * - State (A) can transition to state (B) or (C) 349 * - State (B) can transition to state (C) only 350 * - State (C) is the terminal state 351 * 352 * We can query the logical set data structure for PNs which have been received 353 * but which have not been provably ACKed when we want to generate ACK frames. 354 * Since ACK frames can be lost and/or we might not know that the peer has 355 * successfully received them, we might generate multiple ACK frames covering a 356 * given PN until that PN becomes provably ACKed and we finally remove it from 357 * our set (by bumping the watermark) as no longer being our concern. 358 * 359 * The data structure used is the UINT_SET structure defined in uint_set.h, 360 * which is used as a PN set. We use the following operations of the structure: 361 * 362 * Insert Range: Used when we receive a new PN. 363 * 364 * Remove Range: Used when bumping the watermark. 365 * 366 * Query: Used to determine if a PN is in the set. 367 * 368 * **Possible duplicates.** A PN is considered a possible duplicate when either: 369 * 370 * a) its PN is already in the PN set (i.e. has already been received), or 371 * b) its PN is below the watermark (i.e. was provably ACKed or written off). 372 * 373 * A packet with a given PN is considered 'processable' when that PN is not 374 * considered a possible duplicate (see ossl_ackm_is_rx_pn_processable). 375 * 376 * **TX/RX interaction.** The watermark is bumped whenever an RX packet becomes 377 * provably ACKed. This occurs when an ACK frame is received by the TX side of 378 * the ACK manager; thus, there is necessary interaction between the TX and RX 379 * sides of the ACK manager. 380 * 381 * This is implemented as follows. When a packet is queued as sent in the TX 382 * side of the ACK manager, it may optionally have a Largest Acked value set on 383 * it. The user of the ACK manager should do this if the packet being 384 * transmitted contains an ACK frame, by setting the field to the Largest Acked 385 * field of that frame. Otherwise, this field should be set to QUIC_PN_INVALID. 386 * When a TX packet is eventually acknowledged which has this field set, it is 387 * used to update the state of the RX side of the ACK manager by bumping the 388 * watermark accordingly. 389 */ 390 struct rx_pkt_history_st { 391 UINT_SET set; 392 393 /* 394 * Invariant: PNs below this are not in the set. 395 * Invariant: This is monotonic and only ever increases. 396 */ 397 QUIC_PN watermark; 398 }; 399 400 static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h, 401 QUIC_PN watermark); 402 403 static void rx_pkt_history_init(struct rx_pkt_history_st *h) 404 { 405 ossl_uint_set_init(&h->set); 406 h->watermark = 0; 407 } 408 409 static void rx_pkt_history_destroy(struct rx_pkt_history_st *h) 410 { 411 ossl_uint_set_destroy(&h->set); 412 } 413 414 /* 415 * Limit the number of ACK ranges we store to prevent resource consumption DoS 416 * attacks. 417 */ 418 #define MAX_RX_ACK_RANGES 32 419 420 static void rx_pkt_history_trim_range_count(struct rx_pkt_history_st *h) 421 { 422 QUIC_PN highest = QUIC_PN_INVALID; 423 424 while (ossl_list_uint_set_num(&h->set) > MAX_RX_ACK_RANGES) { 425 UINT_RANGE r = ossl_list_uint_set_head(&h->set)->range; 426 427 highest = (highest == QUIC_PN_INVALID) 428 ? r.end 429 : ossl_quic_pn_max(highest, r.end); 430 431 ossl_uint_set_remove(&h->set, &r); 432 } 433 434 /* 435 * Bump watermark to cover all PNs we removed to avoid accidental 436 * reprocessing of packets. 437 */ 438 if (highest != QUIC_PN_INVALID) 439 rx_pkt_history_bump_watermark(h, highest + 1); 440 } 441 442 static int rx_pkt_history_add_pn(struct rx_pkt_history_st *h, 443 QUIC_PN pn) 444 { 445 UINT_RANGE r; 446 447 r.start = pn; 448 r.end = pn; 449 450 if (pn < h->watermark) 451 return 1; /* consider this a success case */ 452 453 if (ossl_uint_set_insert(&h->set, &r) != 1) 454 return 0; 455 456 rx_pkt_history_trim_range_count(h); 457 return 1; 458 } 459 460 static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h, 461 QUIC_PN watermark) 462 { 463 UINT_RANGE r; 464 465 if (watermark <= h->watermark) 466 return 1; 467 468 /* Remove existing PNs below the watermark. */ 469 r.start = 0; 470 r.end = watermark - 1; 471 if (ossl_uint_set_remove(&h->set, &r) != 1) 472 return 0; 473 474 h->watermark = watermark; 475 return 1; 476 } 477 478 /* 479 * ACK Manager Implementation 480 * ************************** 481 * Implementation of the ACK manager proper. 482 */ 483 484 /* Constants used by the ACK manager; see RFC 9002. */ 485 #define K_GRANULARITY (1 * OSSL_TIME_MS) 486 #define K_PKT_THRESHOLD 3 487 #define K_TIME_THRESHOLD_NUM 9 488 #define K_TIME_THRESHOLD_DEN 8 489 490 /* The maximum number of times we allow PTO to be doubled. */ 491 #define MAX_PTO_COUNT 16 492 493 /* Default maximum amount of time to leave an ACK-eliciting packet un-ACK'd. */ 494 #define DEFAULT_TX_MAX_ACK_DELAY ossl_ms2time(QUIC_DEFAULT_MAX_ACK_DELAY) 495 496 struct ossl_ackm_st { 497 /* Our list of transmitted packets. Corresponds to RFC 9002 sent_packets. */ 498 struct tx_pkt_history_st tx_history[QUIC_PN_SPACE_NUM]; 499 500 /* Our list of received PNs which are not yet provably acked. */ 501 struct rx_pkt_history_st rx_history[QUIC_PN_SPACE_NUM]; 502 503 /* Polymorphic dependencies that we consume. */ 504 OSSL_TIME (*now)(void *arg); 505 void *now_arg; 506 OSSL_STATM *statm; 507 const OSSL_CC_METHOD *cc_method; 508 OSSL_CC_DATA *cc_data; 509 510 /* RFC 9002 variables. */ 511 uint32_t pto_count; 512 QUIC_PN largest_acked_pkt[QUIC_PN_SPACE_NUM]; 513 OSSL_TIME time_of_last_ack_eliciting_pkt[QUIC_PN_SPACE_NUM]; 514 OSSL_TIME loss_time[QUIC_PN_SPACE_NUM]; 515 OSSL_TIME loss_detection_deadline; 516 517 /* Lowest PN which is still not known to be ACKed. */ 518 QUIC_PN lowest_unacked_pkt[QUIC_PN_SPACE_NUM]; 519 520 /* Time at which we got our first RTT sample, or 0. */ 521 OSSL_TIME first_rtt_sample; 522 523 /* 524 * A packet's num_bytes are added to this if it is inflight, 525 * and removed again once ack'd/lost/discarded. 526 */ 527 uint64_t bytes_in_flight; 528 529 /* 530 * A packet's num_bytes are added to this if it is both inflight and 531 * ack-eliciting, and removed again once ack'd/lost/discarded. 532 */ 533 uint64_t ack_eliciting_bytes_in_flight[QUIC_PN_SPACE_NUM]; 534 535 /* Count of ECN-CE events. */ 536 uint64_t peer_ecnce[QUIC_PN_SPACE_NUM]; 537 538 /* Set to 1 when the handshake is confirmed. */ 539 char handshake_confirmed; 540 541 /* Set to 1 when attached to server channel */ 542 char is_server; 543 544 /* Set to 1 when the peer has completed address validation. */ 545 char peer_completed_addr_validation; 546 547 /* Set to 1 when a PN space has been discarded. */ 548 char discarded[QUIC_PN_SPACE_NUM]; 549 550 /* Set to 1 when we think an ACK frame should be generated. */ 551 char rx_ack_desired[QUIC_PN_SPACE_NUM]; 552 553 /* Set to 1 if an ACK frame has ever been generated. */ 554 char rx_ack_generated[QUIC_PN_SPACE_NUM]; 555 556 /* Probe request counts for reporting to the user. */ 557 OSSL_ACKM_PROBE_INFO pending_probe; 558 559 /* Generated ACK frames for each PN space. */ 560 OSSL_QUIC_FRAME_ACK ack[QUIC_PN_SPACE_NUM]; 561 OSSL_QUIC_ACK_RANGE ack_ranges[QUIC_PN_SPACE_NUM][MAX_RX_ACK_RANGES]; 562 563 /* Other RX state. */ 564 /* Largest PN we have RX'd. */ 565 QUIC_PN rx_largest_pn[QUIC_PN_SPACE_NUM]; 566 567 /* Time at which the PN in rx_largest_pn was RX'd. */ 568 OSSL_TIME rx_largest_time[QUIC_PN_SPACE_NUM]; 569 570 /* 571 * ECN event counters. Each time we receive a packet with a given ECN label, 572 * the corresponding ECN counter here is incremented. 573 */ 574 uint64_t rx_ect0[QUIC_PN_SPACE_NUM]; 575 uint64_t rx_ect1[QUIC_PN_SPACE_NUM]; 576 uint64_t rx_ecnce[QUIC_PN_SPACE_NUM]; 577 578 /* 579 * Number of ACK-eliciting packets since last ACK. We use this to defer 580 * emitting ACK frames until a threshold number of ACK-eliciting packets 581 * have been received. 582 */ 583 uint32_t rx_ack_eliciting_pkts_since_last_ack[QUIC_PN_SPACE_NUM]; 584 585 /* 586 * The ACK frame coalescing deadline at which we should flush any unsent ACK 587 * frames. 588 */ 589 OSSL_TIME rx_ack_flush_deadline[QUIC_PN_SPACE_NUM]; 590 591 /* 592 * The RX maximum ACK delay (the maximum amount of time our peer might 593 * wait to send us an ACK after receiving an ACK-eliciting packet). 594 */ 595 OSSL_TIME rx_max_ack_delay; 596 597 /* 598 * The TX maximum ACK delay (the maximum amount of time we allow ourselves 599 * to wait before generating an ACK after receiving an ACK-eliciting 600 * packet). 601 */ 602 OSSL_TIME tx_max_ack_delay; 603 604 /* Callbacks for deadline updates. */ 605 void (*loss_detection_deadline_cb)(OSSL_TIME deadline, void *arg); 606 void *loss_detection_deadline_cb_arg; 607 608 void (*ack_deadline_cb)(OSSL_TIME deadline, int pkt_space, void *arg); 609 void *ack_deadline_cb_arg; 610 }; 611 612 static ossl_inline uint32_t min_u32(uint32_t x, uint32_t y) 613 { 614 return x < y ? x : y; 615 } 616 617 /* 618 * Get TX history for a given packet number space. Must not have been 619 * discarded. 620 */ 621 static struct tx_pkt_history_st *get_tx_history(OSSL_ACKM *ackm, int pkt_space) 622 { 623 assert(!ackm->discarded[pkt_space]); 624 625 return &ackm->tx_history[pkt_space]; 626 } 627 628 /* 629 * Get RX history for a given packet number space. Must not have been 630 * discarded. 631 */ 632 static struct rx_pkt_history_st *get_rx_history(OSSL_ACKM *ackm, int pkt_space) 633 { 634 assert(!ackm->discarded[pkt_space]); 635 636 return &ackm->rx_history[pkt_space]; 637 } 638 639 /* Does the newly-acknowledged list contain any ack-eliciting packet? */ 640 static int ack_includes_ack_eliciting(OSSL_ACKM_TX_PKT *pkt) 641 { 642 for (; pkt != NULL; pkt = pkt->anext) 643 if (pkt->is_ack_eliciting) 644 return 1; 645 646 return 0; 647 } 648 649 /* Return number of ACK-eliciting bytes in flight across all PN spaces. */ 650 static uint64_t ackm_ack_eliciting_bytes_in_flight(OSSL_ACKM *ackm) 651 { 652 int i; 653 uint64_t total = 0; 654 655 for (i = 0; i < QUIC_PN_SPACE_NUM; ++i) 656 total += ackm->ack_eliciting_bytes_in_flight[i]; 657 658 return total; 659 } 660 661 /* Return 1 if the range contains the given PN. */ 662 static int range_contains(const OSSL_QUIC_ACK_RANGE *range, QUIC_PN pn) 663 { 664 return pn >= range->start && pn <= range->end; 665 } 666 667 /* 668 * Given a logical representation of an ACK frame 'ack', create a singly-linked 669 * list of the newly ACK'd frames; that is, of frames which are matched by the 670 * list of PN ranges contained in the ACK frame. The packet structures in the 671 * list returned are removed from the TX history list. Returns a pointer to the 672 * list head (or NULL) if empty. 673 */ 674 static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_newly_acked_pkts(OSSL_ACKM *ackm, 675 const OSSL_QUIC_FRAME_ACK *ack, 676 int pkt_space) 677 { 678 OSSL_ACKM_TX_PKT *acked_pkts = NULL, **fixup = &acked_pkts, *pkt, *pprev; 679 struct tx_pkt_history_st *h; 680 size_t ridx = 0; 681 682 assert(ack->num_ack_ranges > 0); 683 684 /* 685 * Our history list is a list of packets sorted in ascending order 686 * by packet number. 687 * 688 * ack->ack_ranges is a list of packet number ranges in descending order. 689 * 690 * Walk through our history list from the end in order to efficiently detect 691 * membership in the specified ack ranges. As an optimization, we use our 692 * hashtable to try and skip to the first matching packet. This may fail if 693 * the ACK ranges given include nonexistent packets. 694 */ 695 h = get_tx_history(ackm, pkt_space); 696 697 pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end); 698 if (pkt == NULL) 699 pkt = ossl_list_tx_history_tail(&h->packets); 700 701 for (; pkt != NULL; pkt = pprev) { 702 /* 703 * Save prev value as it will be zeroed if we remove the packet from the 704 * history list below. 705 */ 706 pprev = ossl_list_tx_history_prev(pkt); 707 708 for (;; ++ridx) { 709 if (ridx >= ack->num_ack_ranges) { 710 /* 711 * We have exhausted all ranges so stop here, even if there are 712 * more packets to look at. 713 */ 714 goto stop; 715 } 716 717 if (range_contains(&ack->ack_ranges[ridx], pkt->pkt_num)) { 718 /* We have matched this range. */ 719 tx_pkt_history_remove(h, pkt->pkt_num); 720 721 *fixup = pkt; 722 fixup = &pkt->anext; 723 *fixup = NULL; 724 break; 725 } else if (pkt->pkt_num > ack->ack_ranges[ridx].end) { 726 /* 727 * We have not reached this range yet in our list, so do not 728 * advance ridx. 729 */ 730 break; 731 } else { 732 /* 733 * We have moved beyond this range, so advance to the next range 734 * and try matching again. 735 */ 736 assert(pkt->pkt_num < ack->ack_ranges[ridx].start); 737 continue; 738 } 739 } 740 } 741 stop: 742 743 return acked_pkts; 744 } 745 746 /* 747 * Create a singly-linked list of newly detected-lost packets in the given 748 * packet number space. Returns the head of the list or NULL if no packets were 749 * detected lost. The packets in the list are removed from the TX history list. 750 */ 751 static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_lost_pkts(OSSL_ACKM *ackm, 752 int pkt_space) 753 { 754 OSSL_ACKM_TX_PKT *lost_pkts = NULL, **fixup = &lost_pkts, *pkt, *pnext; 755 OSSL_TIME loss_delay, lost_send_time, now; 756 OSSL_RTT_INFO rtt; 757 struct tx_pkt_history_st *h; 758 759 assert(ackm->largest_acked_pkt[pkt_space] != QUIC_PN_INVALID); 760 761 ossl_statm_get_rtt_info(ackm->statm, &rtt); 762 763 ackm->loss_time[pkt_space] = ossl_time_zero(); 764 765 loss_delay = ossl_time_multiply(ossl_time_max(rtt.latest_rtt, 766 rtt.smoothed_rtt), 767 K_TIME_THRESHOLD_NUM); 768 loss_delay = ossl_time_divide(loss_delay, K_TIME_THRESHOLD_DEN); 769 770 /* Minimum time of K_GRANULARITY before packets are deemed lost. */ 771 loss_delay = ossl_time_max(loss_delay, ossl_ticks2time(K_GRANULARITY)); 772 773 /* Packets sent before this time are deemed lost. */ 774 now = ackm->now(ackm->now_arg); 775 lost_send_time = ossl_time_subtract(now, loss_delay); 776 777 h = get_tx_history(ackm, pkt_space); 778 pkt = ossl_list_tx_history_head(&h->packets); 779 780 for (; pkt != NULL; pkt = pnext) { 781 assert(pkt_space == pkt->pkt_space); 782 783 /* 784 * Save prev value as it will be zeroed if we remove the packet from the 785 * history list below. 786 */ 787 pnext = ossl_list_tx_history_next(pkt); 788 789 if (pkt->pkt_num > ackm->largest_acked_pkt[pkt_space]) 790 continue; 791 792 /* 793 * Mark packet as lost, or set time when it should be marked. 794 */ 795 if (ossl_time_compare(pkt->time, lost_send_time) <= 0 796 || ackm->largest_acked_pkt[pkt_space] 797 >= pkt->pkt_num + K_PKT_THRESHOLD) { 798 tx_pkt_history_remove(h, pkt->pkt_num); 799 800 *fixup = pkt; 801 fixup = &pkt->lnext; 802 *fixup = NULL; 803 } else { 804 if (ossl_time_is_zero(ackm->loss_time[pkt_space])) 805 ackm->loss_time[pkt_space] = ossl_time_add(pkt->time, loss_delay); 806 else 807 ackm->loss_time[pkt_space] = ossl_time_min(ackm->loss_time[pkt_space], 808 ossl_time_add(pkt->time, loss_delay)); 809 } 810 } 811 812 return lost_pkts; 813 } 814 815 static OSSL_TIME ackm_get_loss_time_and_space(OSSL_ACKM *ackm, int *pspace) 816 { 817 OSSL_TIME time = ackm->loss_time[QUIC_PN_SPACE_INITIAL]; 818 int i, space = QUIC_PN_SPACE_INITIAL; 819 820 for (i = space + 1; i < QUIC_PN_SPACE_NUM; ++i) 821 if (ossl_time_is_zero(time) 822 || ossl_time_compare(ackm->loss_time[i], time) == -1) { 823 time = ackm->loss_time[i]; 824 space = i; 825 } 826 827 *pspace = space; 828 return time; 829 } 830 831 static OSSL_TIME ackm_get_pto_time_and_space(OSSL_ACKM *ackm, int *space) 832 { 833 OSSL_RTT_INFO rtt; 834 OSSL_TIME duration; 835 OSSL_TIME pto_timeout = ossl_time_infinite(), t; 836 int pto_space = QUIC_PN_SPACE_INITIAL, i; 837 838 ossl_statm_get_rtt_info(ackm->statm, &rtt); 839 840 duration 841 = ossl_time_add(rtt.smoothed_rtt, 842 ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4), 843 ossl_ticks2time(K_GRANULARITY))); 844 845 duration 846 = ossl_time_multiply(duration, 847 (uint64_t)1 << min_u32(ackm->pto_count, 848 MAX_PTO_COUNT)); 849 850 /* Anti-deadlock PTO starts from the current time. */ 851 if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) { 852 assert(!ackm->peer_completed_addr_validation); 853 854 *space = ackm->discarded[QUIC_PN_SPACE_INITIAL] 855 ? QUIC_PN_SPACE_HANDSHAKE 856 : QUIC_PN_SPACE_INITIAL; 857 return ossl_time_add(ackm->now(ackm->now_arg), duration); 858 } 859 860 for (i = QUIC_PN_SPACE_INITIAL; i < QUIC_PN_SPACE_NUM; ++i) { 861 /* 862 * RFC 9002 section 6.2.2.1 keep probe timeout armed until 863 * handshake is confirmed (client sees HANDSHAKE_DONE message 864 * from server). 865 */ 866 if (ackm->ack_eliciting_bytes_in_flight[i] == 0 && (ackm->handshake_confirmed == 1 || ackm->is_server == 1)) 867 continue; 868 869 if (i == QUIC_PN_SPACE_APP) { 870 /* Skip application data until handshake confirmed. */ 871 if (!ackm->handshake_confirmed) 872 break; 873 874 /* Include max_ack_delay and backoff for app data. */ 875 if (!ossl_time_is_infinite(ackm->rx_max_ack_delay)) { 876 uint64_t factor 877 = (uint64_t)1 << min_u32(ackm->pto_count, MAX_PTO_COUNT); 878 879 duration 880 = ossl_time_add(duration, 881 ossl_time_multiply(ackm->rx_max_ack_delay, 882 factor)); 883 } 884 } 885 886 /* 887 * Only re-arm timer if stack has sent at least one ACK eliciting frame. 888 * If stack has sent no ACK eliciting frame at given encryption level then 889 * particular timer is zero and we must not attempt to set it. Timer keeps 890 * time since epoch (Jan 1 1970) and we must not set timer to past. 891 */ 892 if (!ossl_time_is_zero(ackm->time_of_last_ack_eliciting_pkt[i])) { 893 t = ossl_time_add(ackm->time_of_last_ack_eliciting_pkt[i], duration); 894 if (ossl_time_compare(t, pto_timeout) < 0) { 895 pto_timeout = t; 896 pto_space = i; 897 } 898 } 899 } 900 901 *space = pto_space; 902 return pto_timeout; 903 } 904 905 static void ackm_set_loss_detection_timer_actual(OSSL_ACKM *ackm, 906 OSSL_TIME deadline) 907 { 908 ackm->loss_detection_deadline = deadline; 909 910 if (ackm->loss_detection_deadline_cb != NULL) 911 ackm->loss_detection_deadline_cb(deadline, 912 ackm->loss_detection_deadline_cb_arg); 913 } 914 915 static int ackm_set_loss_detection_timer(OSSL_ACKM *ackm) 916 { 917 int space; 918 OSSL_TIME earliest_loss_time, timeout; 919 920 earliest_loss_time = ackm_get_loss_time_and_space(ackm, &space); 921 if (!ossl_time_is_zero(earliest_loss_time)) { 922 /* Time threshold loss detection. */ 923 ackm_set_loss_detection_timer_actual(ackm, earliest_loss_time); 924 return 1; 925 } 926 927 if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0 928 && ackm->peer_completed_addr_validation) { 929 /* 930 * Nothing to detect lost, so no timer is set. However, the client 931 * needs to arm the timer if the server might be blocked by the 932 * anti-amplification limit. 933 */ 934 ackm_set_loss_detection_timer_actual(ackm, ossl_time_zero()); 935 return 1; 936 } 937 938 timeout = ackm_get_pto_time_and_space(ackm, &space); 939 ackm_set_loss_detection_timer_actual(ackm, timeout); 940 return 1; 941 } 942 943 static int ackm_in_persistent_congestion(OSSL_ACKM *ackm, 944 const OSSL_ACKM_TX_PKT *lpkt) 945 { 946 /* TODO(QUIC FUTURE): Persistent congestion not currently implemented. */ 947 return 0; 948 } 949 950 static void ackm_on_pkts_lost(OSSL_ACKM *ackm, int pkt_space, 951 const OSSL_ACKM_TX_PKT *lpkt, int pseudo) 952 { 953 const OSSL_ACKM_TX_PKT *p, *pnext; 954 OSSL_RTT_INFO rtt; 955 QUIC_PN largest_pn_lost = 0; 956 OSSL_CC_LOSS_INFO loss_info = { 0 }; 957 uint32_t flags = 0; 958 959 for (p = lpkt; p != NULL; p = pnext) { 960 pnext = p->lnext; 961 962 if (p->is_inflight) { 963 ackm->bytes_in_flight -= p->num_bytes; 964 if (p->is_ack_eliciting) 965 ackm->ack_eliciting_bytes_in_flight[p->pkt_space] 966 -= p->num_bytes; 967 968 if (p->pkt_num > largest_pn_lost) 969 largest_pn_lost = p->pkt_num; 970 971 if (!pseudo) { 972 /* 973 * If this is pseudo-loss (e.g. during connection retry) we do not 974 * inform the CC as it is not a real loss and not reflective of 975 * network conditions. 976 */ 977 loss_info.tx_time = p->time; 978 loss_info.tx_size = p->num_bytes; 979 980 ackm->cc_method->on_data_lost(ackm->cc_data, &loss_info); 981 } 982 } 983 984 p->on_lost(p->cb_arg); 985 } 986 987 /* 988 * Persistent congestion can only be considered if we have gotten at least 989 * one RTT sample. 990 */ 991 ossl_statm_get_rtt_info(ackm->statm, &rtt); 992 if (!ossl_time_is_zero(ackm->first_rtt_sample) 993 && ackm_in_persistent_congestion(ackm, lpkt)) 994 flags |= OSSL_CC_LOST_FLAG_PERSISTENT_CONGESTION; 995 996 ackm->cc_method->on_data_lost_finished(ackm->cc_data, flags); 997 } 998 999 static void ackm_on_pkts_acked(OSSL_ACKM *ackm, const OSSL_ACKM_TX_PKT *apkt) 1000 { 1001 const OSSL_ACKM_TX_PKT *anext; 1002 QUIC_PN last_pn_acked = 0; 1003 OSSL_CC_ACK_INFO ainfo = { 0 }; 1004 1005 for (; apkt != NULL; apkt = anext) { 1006 if (apkt->is_inflight) { 1007 ackm->bytes_in_flight -= apkt->num_bytes; 1008 if (apkt->is_ack_eliciting) 1009 ackm->ack_eliciting_bytes_in_flight[apkt->pkt_space] 1010 -= apkt->num_bytes; 1011 1012 if (apkt->pkt_num > last_pn_acked) 1013 last_pn_acked = apkt->pkt_num; 1014 1015 if (apkt->largest_acked != QUIC_PN_INVALID) 1016 /* 1017 * This can fail, but it is monotonic; worst case we try again 1018 * next time. 1019 */ 1020 rx_pkt_history_bump_watermark(get_rx_history(ackm, 1021 apkt->pkt_space), 1022 apkt->largest_acked + 1); 1023 } 1024 1025 ainfo.tx_time = apkt->time; 1026 ainfo.tx_size = apkt->num_bytes; 1027 1028 anext = apkt->anext; 1029 apkt->on_acked(apkt->cb_arg); /* may free apkt */ 1030 1031 if (apkt->is_inflight) 1032 ackm->cc_method->on_data_acked(ackm->cc_data, &ainfo); 1033 } 1034 } 1035 1036 OSSL_ACKM *ossl_ackm_new(OSSL_TIME (*now)(void *arg), 1037 void *now_arg, 1038 OSSL_STATM *statm, 1039 const OSSL_CC_METHOD *cc_method, 1040 OSSL_CC_DATA *cc_data, 1041 int is_server) 1042 { 1043 OSSL_ACKM *ackm; 1044 int i; 1045 1046 ackm = OPENSSL_zalloc(sizeof(OSSL_ACKM)); 1047 if (ackm == NULL) 1048 return NULL; 1049 1050 for (i = 0; i < (int)OSSL_NELEM(ackm->tx_history); ++i) { 1051 ackm->largest_acked_pkt[i] = QUIC_PN_INVALID; 1052 ackm->rx_ack_flush_deadline[i] = ossl_time_infinite(); 1053 if (tx_pkt_history_init(&ackm->tx_history[i]) < 1) 1054 goto err; 1055 } 1056 1057 for (i = 0; i < (int)OSSL_NELEM(ackm->rx_history); ++i) 1058 rx_pkt_history_init(&ackm->rx_history[i]); 1059 1060 ackm->now = now; 1061 ackm->now_arg = now_arg; 1062 ackm->statm = statm; 1063 ackm->cc_method = cc_method; 1064 ackm->cc_data = cc_data; 1065 ackm->is_server = (char)is_server; 1066 1067 ackm->rx_max_ack_delay = ossl_ms2time(QUIC_DEFAULT_MAX_ACK_DELAY); 1068 ackm->tx_max_ack_delay = DEFAULT_TX_MAX_ACK_DELAY; 1069 1070 return ackm; 1071 1072 err: 1073 while (--i >= 0) 1074 tx_pkt_history_destroy(&ackm->tx_history[i]); 1075 1076 OPENSSL_free(ackm); 1077 return NULL; 1078 } 1079 1080 void ossl_ackm_free(OSSL_ACKM *ackm) 1081 { 1082 size_t i; 1083 1084 if (ackm == NULL) 1085 return; 1086 1087 for (i = 0; i < OSSL_NELEM(ackm->tx_history); ++i) 1088 if (!ackm->discarded[i]) { 1089 tx_pkt_history_destroy(&ackm->tx_history[i]); 1090 rx_pkt_history_destroy(&ackm->rx_history[i]); 1091 } 1092 1093 OPENSSL_free(ackm); 1094 } 1095 1096 int ossl_ackm_on_tx_packet(OSSL_ACKM *ackm, OSSL_ACKM_TX_PKT *pkt) 1097 { 1098 struct tx_pkt_history_st *h = get_tx_history(ackm, pkt->pkt_space); 1099 1100 /* Time must be set and not move backwards. */ 1101 if (ossl_time_is_zero(pkt->time) 1102 || ossl_time_compare(ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space], 1103 pkt->time) 1104 > 0) 1105 return 0; 1106 1107 /* Must have non-zero number of bytes. */ 1108 if (pkt->num_bytes == 0) 1109 return 0; 1110 1111 /* Does not make any sense for a non-in-flight packet to be ACK-eliciting. */ 1112 if (!pkt->is_inflight && pkt->is_ack_eliciting) 1113 return 0; 1114 1115 if (tx_pkt_history_add(h, pkt) == 0) 1116 return 0; 1117 1118 if (pkt->is_inflight) { 1119 if (pkt->is_ack_eliciting) { 1120 ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space] = pkt->time; 1121 ackm->ack_eliciting_bytes_in_flight[pkt->pkt_space] 1122 += pkt->num_bytes; 1123 } 1124 1125 ackm->bytes_in_flight += pkt->num_bytes; 1126 ackm_set_loss_detection_timer(ackm); 1127 1128 ackm->cc_method->on_data_sent(ackm->cc_data, pkt->num_bytes); 1129 } 1130 1131 return 1; 1132 } 1133 1134 int ossl_ackm_on_rx_datagram(OSSL_ACKM *ackm, size_t num_bytes) 1135 { 1136 /* No-op on the client. */ 1137 return 1; 1138 } 1139 1140 static void ackm_process_ecn(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack, 1141 int pkt_space) 1142 { 1143 struct tx_pkt_history_st *h; 1144 OSSL_ACKM_TX_PKT *pkt; 1145 OSSL_CC_ECN_INFO ecn_info = { 0 }; 1146 1147 /* 1148 * If the ECN-CE counter reported by the peer has increased, this could 1149 * be a new congestion event. 1150 */ 1151 if (ack->ecnce > ackm->peer_ecnce[pkt_space]) { 1152 ackm->peer_ecnce[pkt_space] = ack->ecnce; 1153 1154 h = get_tx_history(ackm, pkt_space); 1155 pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end); 1156 if (pkt == NULL) 1157 return; 1158 1159 ecn_info.largest_acked_time = pkt->time; 1160 ackm->cc_method->on_ecn(ackm->cc_data, &ecn_info); 1161 } 1162 } 1163 1164 int ossl_ackm_on_rx_ack_frame(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack, 1165 int pkt_space, OSSL_TIME rx_time) 1166 { 1167 OSSL_ACKM_TX_PKT *na_pkts, *lost_pkts; 1168 int must_set_timer = 0; 1169 1170 if (ackm->largest_acked_pkt[pkt_space] == QUIC_PN_INVALID) 1171 ackm->largest_acked_pkt[pkt_space] = ack->ack_ranges[0].end; 1172 else 1173 ackm->largest_acked_pkt[pkt_space] 1174 = ossl_quic_pn_max(ackm->largest_acked_pkt[pkt_space], 1175 ack->ack_ranges[0].end); 1176 1177 /* 1178 * If we get an ACK in the handshake space, address validation is completed. 1179 * Make sure we update the timer, even if no packets were ACK'd. 1180 */ 1181 if (!ackm->peer_completed_addr_validation 1182 && pkt_space == QUIC_PN_SPACE_HANDSHAKE) { 1183 ackm->peer_completed_addr_validation = 1; 1184 must_set_timer = 1; 1185 } 1186 1187 /* 1188 * Find packets that are newly acknowledged and remove them from the list. 1189 */ 1190 na_pkts = ackm_detect_and_remove_newly_acked_pkts(ackm, ack, pkt_space); 1191 if (na_pkts == NULL) { 1192 if (must_set_timer) 1193 ackm_set_loss_detection_timer(ackm); 1194 1195 return 1; 1196 } 1197 1198 /* 1199 * Update the RTT if the largest acknowledged is newly acked and at least 1200 * one ACK-eliciting packet was newly acked. 1201 * 1202 * First packet in the list is always the one with the largest PN. 1203 */ 1204 if (na_pkts->pkt_num == ack->ack_ranges[0].end && ack_includes_ack_eliciting(na_pkts)) { 1205 OSSL_TIME now = ackm->now(ackm->now_arg), ack_delay; 1206 if (ossl_time_is_zero(ackm->first_rtt_sample)) 1207 ackm->first_rtt_sample = now; 1208 1209 /* Enforce maximum ACK delay. */ 1210 ack_delay = ack->delay_time; 1211 if (ackm->handshake_confirmed) 1212 ack_delay = ossl_time_min(ack_delay, ackm->rx_max_ack_delay); 1213 1214 ossl_statm_update_rtt(ackm->statm, ack_delay, 1215 ossl_time_subtract(now, na_pkts->time)); 1216 } 1217 1218 /* 1219 * Process ECN information if present. 1220 * 1221 * We deliberately do most ECN processing in the ACKM rather than the 1222 * congestion controller to avoid having to give the congestion controller 1223 * access to ACKM internal state. 1224 */ 1225 if (ack->ecn_present) 1226 ackm_process_ecn(ackm, ack, pkt_space); 1227 1228 /* Handle inferred loss. */ 1229 lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space); 1230 if (lost_pkts != NULL) 1231 ackm_on_pkts_lost(ackm, pkt_space, lost_pkts, /*pseudo=*/0); 1232 1233 ackm_on_pkts_acked(ackm, na_pkts); 1234 1235 /* 1236 * Reset pto_count unless the client is unsure if the server validated the 1237 * client's address. 1238 */ 1239 if (ackm->peer_completed_addr_validation) 1240 ackm->pto_count = 0; 1241 1242 ackm_set_loss_detection_timer(ackm); 1243 return 1; 1244 } 1245 1246 int ossl_ackm_on_pkt_space_discarded(OSSL_ACKM *ackm, int pkt_space) 1247 { 1248 OSSL_ACKM_TX_PKT *pkt, *pnext; 1249 uint64_t num_bytes_invalidated = 0; 1250 1251 if (ackm->discarded[pkt_space]) 1252 return 0; 1253 1254 if (pkt_space == QUIC_PN_SPACE_HANDSHAKE) 1255 ackm->peer_completed_addr_validation = 1; 1256 1257 for (pkt = ossl_list_tx_history_head(&get_tx_history(ackm, pkt_space)->packets); 1258 pkt != NULL; pkt = pnext) { 1259 pnext = ossl_list_tx_history_next(pkt); 1260 if (pkt->is_inflight) { 1261 ackm->bytes_in_flight -= pkt->num_bytes; 1262 num_bytes_invalidated += pkt->num_bytes; 1263 } 1264 1265 pkt->on_discarded(pkt->cb_arg); /* may free pkt */ 1266 } 1267 1268 tx_pkt_history_destroy(&ackm->tx_history[pkt_space]); 1269 rx_pkt_history_destroy(&ackm->rx_history[pkt_space]); 1270 1271 if (num_bytes_invalidated > 0) 1272 ackm->cc_method->on_data_invalidated(ackm->cc_data, 1273 num_bytes_invalidated); 1274 1275 ackm->time_of_last_ack_eliciting_pkt[pkt_space] = ossl_time_zero(); 1276 ackm->loss_time[pkt_space] = ossl_time_zero(); 1277 ackm->pto_count = 0; 1278 ackm->discarded[pkt_space] = 1; 1279 ackm->ack_eliciting_bytes_in_flight[pkt_space] = 0; 1280 ackm_set_loss_detection_timer(ackm); 1281 return 1; 1282 } 1283 1284 int ossl_ackm_on_handshake_confirmed(OSSL_ACKM *ackm) 1285 { 1286 ackm->handshake_confirmed = 1; 1287 ackm->peer_completed_addr_validation = 1; 1288 ackm_set_loss_detection_timer(ackm); 1289 return 1; 1290 } 1291 1292 static void ackm_queue_probe_anti_deadlock_handshake(OSSL_ACKM *ackm) 1293 { 1294 ++ackm->pending_probe.anti_deadlock_handshake; 1295 } 1296 1297 static void ackm_queue_probe_anti_deadlock_initial(OSSL_ACKM *ackm) 1298 { 1299 ++ackm->pending_probe.anti_deadlock_initial; 1300 } 1301 1302 static void ackm_queue_probe(OSSL_ACKM *ackm, int pkt_space) 1303 { 1304 /* 1305 * TODO(QUIC FUTURE): We are allowed to send either one or two probe 1306 * packets here. 1307 * Determine a strategy for when we should send two probe packets. 1308 */ 1309 ++ackm->pending_probe.pto[pkt_space]; 1310 } 1311 1312 int ossl_ackm_on_timeout(OSSL_ACKM *ackm) 1313 { 1314 int pkt_space; 1315 OSSL_TIME earliest_loss_time; 1316 OSSL_ACKM_TX_PKT *lost_pkts; 1317 1318 earliest_loss_time = ackm_get_loss_time_and_space(ackm, &pkt_space); 1319 if (!ossl_time_is_zero(earliest_loss_time)) { 1320 /* Time threshold loss detection. */ 1321 lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space); 1322 if (lost_pkts != NULL) 1323 ackm_on_pkts_lost(ackm, pkt_space, lost_pkts, /*pseudo=*/0); 1324 ackm_set_loss_detection_timer(ackm); 1325 return 1; 1326 } 1327 1328 if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) { 1329 assert(!ackm->peer_completed_addr_validation); 1330 /* 1331 * Client sends an anti-deadlock packet: Initial is padded to earn more 1332 * anti-amplification credit. A handshake packet proves address 1333 * ownership. 1334 */ 1335 if (ackm->discarded[QUIC_PN_SPACE_INITIAL]) 1336 ackm_queue_probe_anti_deadlock_handshake(ackm); 1337 else 1338 ackm_queue_probe_anti_deadlock_initial(ackm); 1339 } else { 1340 /* 1341 * PTO. The user of the ACKM should send new data if available, else 1342 * retransmit old data, or if neither is available, send a single PING 1343 * frame. 1344 */ 1345 ackm_get_pto_time_and_space(ackm, &pkt_space); 1346 ackm_queue_probe(ackm, pkt_space); 1347 } 1348 1349 ++ackm->pto_count; 1350 ackm_set_loss_detection_timer(ackm); 1351 return 1; 1352 } 1353 1354 OSSL_TIME ossl_ackm_get_loss_detection_deadline(OSSL_ACKM *ackm) 1355 { 1356 return ackm->loss_detection_deadline; 1357 } 1358 1359 OSSL_ACKM_PROBE_INFO *ossl_ackm_get0_probe_request(OSSL_ACKM *ackm) 1360 { 1361 return &ackm->pending_probe; 1362 } 1363 1364 int ossl_ackm_get_largest_unacked(OSSL_ACKM *ackm, int pkt_space, QUIC_PN *pn) 1365 { 1366 struct tx_pkt_history_st *h; 1367 OSSL_ACKM_TX_PKT *p; 1368 1369 h = get_tx_history(ackm, pkt_space); 1370 p = ossl_list_tx_history_tail(&h->packets); 1371 if (p != NULL) { 1372 *pn = p->pkt_num; 1373 return 1; 1374 } 1375 1376 return 0; 1377 } 1378 1379 /* Number of ACK-eliciting packets RX'd before we always emit an ACK. */ 1380 #define PKTS_BEFORE_ACK 2 1381 1382 /* 1383 * Return 1 if emission of an ACK frame is currently desired. 1384 * 1385 * This occurs when one or more of the following conditions occurs: 1386 * 1387 * - We have flagged that we want to send an ACK frame 1388 * (for example, due to the packet threshold count being exceeded), or 1389 * 1390 * - We have exceeded the ACK flush deadline, meaning that 1391 * we have received at least one ACK-eliciting packet, but held off on 1392 * sending an ACK frame immediately in the hope that more ACK-eliciting 1393 * packets might come in, but not enough did and we are now requesting 1394 * transmission of an ACK frame anyway. 1395 * 1396 */ 1397 int ossl_ackm_is_ack_desired(OSSL_ACKM *ackm, int pkt_space) 1398 { 1399 return ackm->rx_ack_desired[pkt_space] 1400 || (!ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space]) 1401 && ossl_time_compare(ackm->now(ackm->now_arg), 1402 ackm->rx_ack_flush_deadline[pkt_space]) 1403 >= 0); 1404 } 1405 1406 /* 1407 * Returns 1 if an ACK frame matches a given packet number. 1408 */ 1409 static int ack_contains(const OSSL_QUIC_FRAME_ACK *ack, QUIC_PN pkt_num) 1410 { 1411 size_t i; 1412 1413 for (i = 0; i < ack->num_ack_ranges; ++i) 1414 if (range_contains(&ack->ack_ranges[i], pkt_num)) 1415 return 1; 1416 1417 return 0; 1418 } 1419 1420 /* 1421 * Returns 1 iff a PN (which we have just received) was previously reported as 1422 * implied missing (by us, in an ACK frame we previously generated). 1423 */ 1424 static int ackm_is_missing(OSSL_ACKM *ackm, int pkt_space, QUIC_PN pkt_num) 1425 { 1426 /* 1427 * A PN is implied missing if it is not greater than the highest PN in our 1428 * generated ACK frame, but is not matched by the frame. 1429 */ 1430 return ackm->ack[pkt_space].num_ack_ranges > 0 1431 && pkt_num <= ackm->ack[pkt_space].ack_ranges[0].end 1432 && !ack_contains(&ackm->ack[pkt_space], pkt_num); 1433 } 1434 1435 /* 1436 * Returns 1 iff our RX of a PN newly establishes the implication of missing 1437 * packets. 1438 */ 1439 static int ackm_has_newly_missing(OSSL_ACKM *ackm, int pkt_space) 1440 { 1441 struct rx_pkt_history_st *h; 1442 1443 h = get_rx_history(ackm, pkt_space); 1444 1445 if (ossl_list_uint_set_is_empty(&h->set)) 1446 return 0; 1447 1448 /* 1449 * The second condition here establishes that the highest PN range in our RX 1450 * history comprises only a single PN. If there is more than one, then this 1451 * function will have returned 1 during a previous call to 1452 * ossl_ackm_on_rx_packet assuming the third condition below was met. Thus 1453 * we only return 1 when the missing PN condition is newly established. 1454 * 1455 * The third condition here establishes that the highest PN range in our RX 1456 * history is beyond (and does not border) the highest PN we have yet 1457 * reported in any ACK frame. Thus there is a gap of at least one PN between 1458 * the PNs we have ACK'd previously and the PN we have just received. 1459 */ 1460 return ackm->ack[pkt_space].num_ack_ranges > 0 1461 && ossl_list_uint_set_tail(&h->set)->range.start 1462 == ossl_list_uint_set_tail(&h->set)->range.end 1463 && ossl_list_uint_set_tail(&h->set)->range.start 1464 > ackm->ack[pkt_space].ack_ranges[0].end + 1; 1465 } 1466 1467 static void ackm_set_flush_deadline(OSSL_ACKM *ackm, int pkt_space, 1468 OSSL_TIME deadline) 1469 { 1470 ackm->rx_ack_flush_deadline[pkt_space] = deadline; 1471 1472 if (ackm->ack_deadline_cb != NULL) 1473 ackm->ack_deadline_cb(ossl_ackm_get_ack_deadline(ackm, pkt_space), 1474 pkt_space, ackm->ack_deadline_cb_arg); 1475 } 1476 1477 /* Explicitly flags that we want to generate an ACK frame. */ 1478 static void ackm_queue_ack(OSSL_ACKM *ackm, int pkt_space) 1479 { 1480 ackm->rx_ack_desired[pkt_space] = 1; 1481 1482 /* Cancel deadline. */ 1483 ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite()); 1484 } 1485 1486 static void ackm_on_rx_ack_eliciting(OSSL_ACKM *ackm, 1487 OSSL_TIME rx_time, int pkt_space, 1488 int was_missing) 1489 { 1490 OSSL_TIME tx_max_ack_delay; 1491 1492 if (ackm->rx_ack_desired[pkt_space]) 1493 /* ACK generation already requested so nothing to do. */ 1494 return; 1495 1496 ++ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space]; 1497 1498 if (!ackm->rx_ack_generated[pkt_space] 1499 || was_missing 1500 || ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space] 1501 >= PKTS_BEFORE_ACK 1502 || ackm_has_newly_missing(ackm, pkt_space)) { 1503 /* 1504 * Either: 1505 * 1506 * - We have never yet generated an ACK frame, meaning that this 1507 * is the first ever packet received, which we should always 1508 * acknowledge immediately, or 1509 * 1510 * - We previously reported the PN that we have just received as 1511 * missing in a previous ACK frame (meaning that we should report 1512 * the fact that we now have it to the peer immediately), or 1513 * 1514 * - We have exceeded the ACK-eliciting packet threshold count 1515 * for the purposes of ACK coalescing, so request transmission 1516 * of an ACK frame, or 1517 * 1518 * - The PN we just received and added to our PN RX history 1519 * newly implies one or more missing PNs, in which case we should 1520 * inform the peer by sending an ACK frame immediately. 1521 * 1522 * We do not test the ACK flush deadline here because it is tested 1523 * separately in ossl_ackm_is_ack_desired. 1524 */ 1525 ackm_queue_ack(ackm, pkt_space); 1526 return; 1527 } 1528 1529 /* 1530 * Not emitting an ACK yet. 1531 * 1532 * Update the ACK flush deadline. 1533 * 1534 * RFC 9000 s. 13.2.1: "An endpoint MUST acknowledge all ack-eliciting 1535 * Initial and Handshake packets immediately"; don't delay ACK generation if 1536 * we are using the Initial or Handshake PN spaces. 1537 */ 1538 tx_max_ack_delay = ackm->tx_max_ack_delay; 1539 if (pkt_space == QUIC_PN_SPACE_INITIAL 1540 || pkt_space == QUIC_PN_SPACE_HANDSHAKE) 1541 tx_max_ack_delay = ossl_time_zero(); 1542 1543 if (ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space])) 1544 ackm_set_flush_deadline(ackm, pkt_space, 1545 ossl_time_add(rx_time, tx_max_ack_delay)); 1546 else 1547 ackm_set_flush_deadline(ackm, pkt_space, 1548 ossl_time_min(ackm->rx_ack_flush_deadline[pkt_space], 1549 ossl_time_add(rx_time, 1550 tx_max_ack_delay))); 1551 } 1552 1553 int ossl_ackm_on_rx_packet(OSSL_ACKM *ackm, const OSSL_ACKM_RX_PKT *pkt) 1554 { 1555 struct rx_pkt_history_st *h = get_rx_history(ackm, pkt->pkt_space); 1556 int was_missing; 1557 1558 if (ossl_ackm_is_rx_pn_processable(ackm, pkt->pkt_num, pkt->pkt_space) != 1) 1559 /* PN has already been processed or written off, no-op. */ 1560 return 1; 1561 1562 /* 1563 * Record the largest PN we have RX'd and the time we received it. 1564 * We use this to calculate the ACK delay field of ACK frames. 1565 */ 1566 if (pkt->pkt_num > ackm->rx_largest_pn[pkt->pkt_space]) { 1567 ackm->rx_largest_pn[pkt->pkt_space] = pkt->pkt_num; 1568 ackm->rx_largest_time[pkt->pkt_space] = pkt->time; 1569 } 1570 1571 /* 1572 * If the PN we just received was previously implied missing by virtue of 1573 * being omitted from a previous ACK frame generated, we skip any packet 1574 * count thresholds or coalescing delays and emit a new ACK frame 1575 * immediately. 1576 */ 1577 was_missing = ackm_is_missing(ackm, pkt->pkt_space, pkt->pkt_num); 1578 1579 /* 1580 * Add the packet number to our history list of PNs we have not yet provably 1581 * acked. 1582 */ 1583 if (rx_pkt_history_add_pn(h, pkt->pkt_num) != 1) 1584 return 0; 1585 1586 /* 1587 * Receiving this packet may or may not cause us to emit an ACK frame. 1588 * We may not emit an ACK frame yet if we have not yet received a threshold 1589 * number of packets. 1590 */ 1591 if (pkt->is_ack_eliciting) 1592 ackm_on_rx_ack_eliciting(ackm, pkt->time, pkt->pkt_space, was_missing); 1593 1594 /* Update the ECN counters according to which ECN signal we got, if any. */ 1595 switch (pkt->ecn) { 1596 case OSSL_ACKM_ECN_ECT0: 1597 ++ackm->rx_ect0[pkt->pkt_space]; 1598 break; 1599 case OSSL_ACKM_ECN_ECT1: 1600 ++ackm->rx_ect1[pkt->pkt_space]; 1601 break; 1602 case OSSL_ACKM_ECN_ECNCE: 1603 ++ackm->rx_ecnce[pkt->pkt_space]; 1604 break; 1605 default: 1606 break; 1607 } 1608 1609 return 1; 1610 } 1611 1612 static void ackm_fill_rx_ack_ranges(OSSL_ACKM *ackm, int pkt_space, 1613 OSSL_QUIC_FRAME_ACK *ack) 1614 { 1615 struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space); 1616 UINT_SET_ITEM *x; 1617 size_t i = 0; 1618 1619 /* 1620 * Copy out ranges from the PN set, starting at the end, until we reach our 1621 * maximum number of ranges. 1622 */ 1623 for (x = ossl_list_uint_set_tail(&h->set); 1624 x != NULL && i < OSSL_NELEM(ackm->ack_ranges); 1625 x = ossl_list_uint_set_prev(x), ++i) { 1626 ackm->ack_ranges[pkt_space][i].start = x->range.start; 1627 ackm->ack_ranges[pkt_space][i].end = x->range.end; 1628 } 1629 1630 ack->ack_ranges = ackm->ack_ranges[pkt_space]; 1631 ack->num_ack_ranges = i; 1632 } 1633 1634 const OSSL_QUIC_FRAME_ACK *ossl_ackm_get_ack_frame(OSSL_ACKM *ackm, 1635 int pkt_space) 1636 { 1637 OSSL_QUIC_FRAME_ACK *ack = &ackm->ack[pkt_space]; 1638 OSSL_TIME now = ackm->now(ackm->now_arg); 1639 1640 ackm_fill_rx_ack_ranges(ackm, pkt_space, ack); 1641 1642 if (!ossl_time_is_zero(ackm->rx_largest_time[pkt_space]) 1643 && ossl_time_compare(now, ackm->rx_largest_time[pkt_space]) > 0 1644 && pkt_space == QUIC_PN_SPACE_APP) 1645 ack->delay_time = ossl_time_subtract(now, ackm->rx_largest_time[pkt_space]); 1646 else 1647 ack->delay_time = ossl_time_zero(); 1648 1649 ack->ect0 = ackm->rx_ect0[pkt_space]; 1650 ack->ect1 = ackm->rx_ect1[pkt_space]; 1651 ack->ecnce = ackm->rx_ecnce[pkt_space]; 1652 ack->ecn_present = 1; 1653 1654 ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space] = 0; 1655 1656 ackm->rx_ack_generated[pkt_space] = 1; 1657 ackm->rx_ack_desired[pkt_space] = 0; 1658 ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite()); 1659 return ack; 1660 } 1661 1662 OSSL_TIME ossl_ackm_get_ack_deadline(OSSL_ACKM *ackm, int pkt_space) 1663 { 1664 if (ackm->rx_ack_desired[pkt_space]) 1665 /* Already desired, deadline is now. */ 1666 return ossl_time_zero(); 1667 1668 return ackm->rx_ack_flush_deadline[pkt_space]; 1669 } 1670 1671 int ossl_ackm_is_rx_pn_processable(OSSL_ACKM *ackm, QUIC_PN pn, int pkt_space) 1672 { 1673 struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space); 1674 1675 return pn >= h->watermark && ossl_uint_set_query(&h->set, pn) == 0; 1676 } 1677 1678 void ossl_ackm_set_loss_detection_deadline_callback(OSSL_ACKM *ackm, 1679 void (*fn)(OSSL_TIME deadline, 1680 void *arg), 1681 void *arg) 1682 { 1683 ackm->loss_detection_deadline_cb = fn; 1684 ackm->loss_detection_deadline_cb_arg = arg; 1685 } 1686 1687 void ossl_ackm_set_ack_deadline_callback(OSSL_ACKM *ackm, 1688 void (*fn)(OSSL_TIME deadline, 1689 int pkt_space, 1690 void *arg), 1691 void *arg) 1692 { 1693 ackm->ack_deadline_cb = fn; 1694 ackm->ack_deadline_cb_arg = arg; 1695 } 1696 1697 int ossl_ackm_mark_packet_pseudo_lost(OSSL_ACKM *ackm, 1698 int pkt_space, QUIC_PN pn) 1699 { 1700 struct tx_pkt_history_st *h = get_tx_history(ackm, pkt_space); 1701 OSSL_ACKM_TX_PKT *pkt; 1702 1703 pkt = tx_pkt_history_by_pkt_num(h, pn); 1704 if (pkt == NULL) 1705 return 0; 1706 1707 tx_pkt_history_remove(h, pkt->pkt_num); 1708 pkt->lnext = NULL; 1709 ackm_on_pkts_lost(ackm, pkt_space, pkt, /*pseudo=*/1); 1710 return 1; 1711 } 1712 1713 OSSL_TIME ossl_ackm_get_pto_duration(OSSL_ACKM *ackm) 1714 { 1715 OSSL_TIME duration; 1716 OSSL_RTT_INFO rtt; 1717 1718 ossl_statm_get_rtt_info(ackm->statm, &rtt); 1719 1720 duration = ossl_time_add(rtt.smoothed_rtt, 1721 ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4), 1722 ossl_ticks2time(K_GRANULARITY))); 1723 if (!ossl_time_is_infinite(ackm->rx_max_ack_delay)) 1724 duration = ossl_time_add(duration, ackm->rx_max_ack_delay); 1725 1726 return duration; 1727 } 1728 1729 QUIC_PN ossl_ackm_get_largest_acked(OSSL_ACKM *ackm, int pkt_space) 1730 { 1731 return ackm->largest_acked_pkt[pkt_space]; 1732 } 1733 1734 void ossl_ackm_set_rx_max_ack_delay(OSSL_ACKM *ackm, OSSL_TIME rx_max_ack_delay) 1735 { 1736 ackm->rx_max_ack_delay = rx_max_ack_delay; 1737 } 1738 1739 void ossl_ackm_set_tx_max_ack_delay(OSSL_ACKM *ackm, OSSL_TIME tx_max_ack_delay) 1740 { 1741 ackm->tx_max_ack_delay = tx_max_ack_delay; 1742 } 1743