1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 3 #ifndef _NET_GRO_H 4 #define _NET_GRO_H 5 6 #include <linux/indirect_call_wrapper.h> 7 #include <linux/ip.h> 8 #include <linux/ipv6.h> 9 #include <net/ip6_checksum.h> 10 #include <linux/skbuff.h> 11 #include <net/udp.h> 12 #include <net/hotdata.h> 13 14 /* This should be increased if a protocol with a bigger head is added. */ 15 #define GRO_MAX_HEAD (MAX_HEADER + 128) 16 17 struct napi_gro_cb { 18 union { 19 struct { 20 /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ 21 void *frag0; 22 23 /* Length of frag0. */ 24 unsigned int frag0_len; 25 }; 26 27 struct { 28 /* used in skb_gro_receive() slow path */ 29 struct sk_buff *last; 30 31 /* jiffies when first packet was created/queued */ 32 unsigned long age; 33 }; 34 }; 35 36 /* This indicates where we are processing relative to skb->data. */ 37 int data_offset; 38 39 /* This is non-zero if the packet cannot be merged with the new skb. */ 40 u16 flush; 41 42 /* Number of segments aggregated. */ 43 u16 count; 44 45 /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */ 46 u16 proto; 47 48 u16 pad; 49 50 /* Used in napi_gro_cb::free */ 51 #define NAPI_GRO_FREE 1 52 #define NAPI_GRO_FREE_STOLEN_HEAD 2 53 /* portion of the cb set to zero at every gro iteration */ 54 struct_group(zeroed, 55 56 /* Start offset for remote checksum offload */ 57 u16 gro_remcsum_start; 58 59 /* This is non-zero if the packet may be of the same flow. */ 60 u8 same_flow:1; 61 62 /* Used in tunnel GRO receive */ 63 u8 encap_mark:1; 64 65 /* GRO checksum is valid */ 66 u8 csum_valid:1; 67 68 /* Number of checksums via CHECKSUM_UNNECESSARY */ 69 u8 csum_cnt:3; 70 71 /* Free the skb? */ 72 u8 free:2; 73 74 /* Used in foo-over-udp, set in udp[46]_gro_receive */ 75 u8 is_ipv6:1; 76 77 /* Used in GRE, set in fou/gue_gro_receive */ 78 u8 is_fou:1; 79 80 /* Used to determine if ipid_offset can be ignored */ 81 u8 ip_fixedid:1; 82 83 /* Number of gro_receive callbacks this packet already went through */ 84 u8 recursion_counter:4; 85 86 /* GRO is done by frag_list pointer chaining. */ 87 u8 is_flist:1; 88 ); 89 90 /* used to support CHECKSUM_COMPLETE for tunneling protocols */ 91 __wsum csum; 92 93 /* L3 offsets */ 94 union { 95 struct { 96 u16 network_offset; 97 u16 inner_network_offset; 98 }; 99 u16 network_offsets[2]; 100 }; 101 }; 102 103 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) 104 105 #define GRO_RECURSION_LIMIT 15 106 static inline int gro_recursion_inc_test(struct sk_buff *skb) 107 { 108 return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; 109 } 110 111 typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); 112 static inline struct sk_buff *call_gro_receive(gro_receive_t cb, 113 struct list_head *head, 114 struct sk_buff *skb) 115 { 116 if (unlikely(gro_recursion_inc_test(skb))) { 117 NAPI_GRO_CB(skb)->flush |= 1; 118 return NULL; 119 } 120 121 return cb(head, skb); 122 } 123 124 typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, 125 struct sk_buff *); 126 static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, 127 struct sock *sk, 128 struct list_head *head, 129 struct sk_buff *skb) 130 { 131 if (unlikely(gro_recursion_inc_test(skb))) { 132 NAPI_GRO_CB(skb)->flush |= 1; 133 return NULL; 134 } 135 136 return cb(sk, head, skb); 137 } 138 139 static inline unsigned int skb_gro_offset(const struct sk_buff *skb) 140 { 141 return NAPI_GRO_CB(skb)->data_offset; 142 } 143 144 static inline unsigned int skb_gro_len(const struct sk_buff *skb) 145 { 146 return skb->len - NAPI_GRO_CB(skb)->data_offset; 147 } 148 149 static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) 150 { 151 NAPI_GRO_CB(skb)->data_offset += len; 152 } 153 154 static inline void *skb_gro_header_fast(const struct sk_buff *skb, 155 unsigned int offset) 156 { 157 return NAPI_GRO_CB(skb)->frag0 + offset; 158 } 159 160 static inline bool skb_gro_may_pull(const struct sk_buff *skb, 161 unsigned int hlen) 162 { 163 return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len); 164 } 165 166 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, 167 unsigned int offset) 168 { 169 if (!pskb_may_pull(skb, hlen)) 170 return NULL; 171 172 return skb->data + offset; 173 } 174 175 static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen, 176 unsigned int offset) 177 { 178 void *ptr; 179 180 ptr = skb_gro_header_fast(skb, offset); 181 if (!skb_gro_may_pull(skb, hlen)) 182 ptr = skb_gro_header_slow(skb, hlen, offset); 183 return ptr; 184 } 185 186 static inline int skb_gro_receive_network_offset(const struct sk_buff *skb) 187 { 188 return NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark]; 189 } 190 191 static inline void *skb_gro_network_header(const struct sk_buff *skb) 192 { 193 if (skb_gro_may_pull(skb, skb_gro_offset(skb))) 194 return skb_gro_header_fast(skb, skb_gro_receive_network_offset(skb)); 195 196 return skb->data + skb_gro_receive_network_offset(skb); 197 } 198 199 static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb, 200 int proto) 201 { 202 const struct iphdr *iph = skb_gro_network_header(skb); 203 204 return csum_tcpudp_nofold(iph->saddr, iph->daddr, 205 skb_gro_len(skb), proto, 0); 206 } 207 208 static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, 209 const void *start, unsigned int len) 210 { 211 if (NAPI_GRO_CB(skb)->csum_valid) 212 NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len, 213 wsum_negate(NAPI_GRO_CB(skb)->csum))); 214 } 215 216 /* GRO checksum functions. These are logical equivalents of the normal 217 * checksum functions (in skbuff.h) except that they operate on the GRO 218 * offsets and fields in sk_buff. 219 */ 220 221 __sum16 __skb_gro_checksum_complete(struct sk_buff *skb); 222 223 static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) 224 { 225 return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); 226 } 227 228 static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, 229 bool zero_okay, 230 __sum16 check) 231 { 232 return ((skb->ip_summed != CHECKSUM_PARTIAL || 233 skb_checksum_start_offset(skb) < 234 skb_gro_offset(skb)) && 235 !skb_at_gro_remcsum_start(skb) && 236 NAPI_GRO_CB(skb)->csum_cnt == 0 && 237 (!zero_okay || check)); 238 } 239 240 static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, 241 __wsum psum) 242 { 243 if (NAPI_GRO_CB(skb)->csum_valid && 244 !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) 245 return 0; 246 247 NAPI_GRO_CB(skb)->csum = psum; 248 249 return __skb_gro_checksum_complete(skb); 250 } 251 252 static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) 253 { 254 if (NAPI_GRO_CB(skb)->csum_cnt > 0) { 255 /* Consume a checksum from CHECKSUM_UNNECESSARY */ 256 NAPI_GRO_CB(skb)->csum_cnt--; 257 } else { 258 /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we 259 * verified a new top level checksum or an encapsulated one 260 * during GRO. This saves work if we fallback to normal path. 261 */ 262 __skb_incr_checksum_unnecessary(skb); 263 } 264 } 265 266 #define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ 267 compute_pseudo) \ 268 ({ \ 269 __sum16 __ret = 0; \ 270 if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ 271 __ret = __skb_gro_checksum_validate_complete(skb, \ 272 compute_pseudo(skb, proto)); \ 273 if (!__ret) \ 274 skb_gro_incr_csum_unnecessary(skb); \ 275 __ret; \ 276 }) 277 278 #define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ 279 __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) 280 281 #define skb_gro_checksum_validate_zero_check(skb, proto, check, \ 282 compute_pseudo) \ 283 __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) 284 285 #define skb_gro_checksum_simple_validate(skb) \ 286 __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) 287 288 static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) 289 { 290 return (NAPI_GRO_CB(skb)->csum_cnt == 0 && 291 !NAPI_GRO_CB(skb)->csum_valid); 292 } 293 294 static inline void __skb_gro_checksum_convert(struct sk_buff *skb, 295 __wsum pseudo) 296 { 297 NAPI_GRO_CB(skb)->csum = ~pseudo; 298 NAPI_GRO_CB(skb)->csum_valid = 1; 299 } 300 301 #define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ 302 do { \ 303 if (__skb_gro_checksum_convert_check(skb)) \ 304 __skb_gro_checksum_convert(skb, \ 305 compute_pseudo(skb, proto)); \ 306 } while (0) 307 308 struct gro_remcsum { 309 int offset; 310 __wsum delta; 311 }; 312 313 static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) 314 { 315 grc->offset = 0; 316 grc->delta = 0; 317 } 318 319 static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, 320 unsigned int off, size_t hdrlen, 321 int start, int offset, 322 struct gro_remcsum *grc, 323 bool nopartial) 324 { 325 __wsum delta; 326 size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); 327 328 BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); 329 330 if (!nopartial) { 331 NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; 332 return ptr; 333 } 334 335 ptr = skb_gro_header(skb, off + plen, off); 336 if (!ptr) 337 return NULL; 338 339 delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, 340 start, offset); 341 342 /* Adjust skb->csum since we changed the packet */ 343 NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); 344 345 grc->offset = off + hdrlen + offset; 346 grc->delta = delta; 347 348 return ptr; 349 } 350 351 static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, 352 struct gro_remcsum *grc) 353 { 354 void *ptr; 355 size_t plen = grc->offset + sizeof(u16); 356 357 if (!grc->delta) 358 return; 359 360 ptr = skb_gro_header(skb, plen, grc->offset); 361 if (!ptr) 362 return; 363 364 remcsum_unadjust((__sum16 *)ptr, grc->delta); 365 } 366 367 #ifdef CONFIG_XFRM_OFFLOAD 368 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) 369 { 370 if (PTR_ERR(pp) != -EINPROGRESS) 371 NAPI_GRO_CB(skb)->flush |= flush; 372 } 373 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, 374 struct sk_buff *pp, 375 int flush, 376 struct gro_remcsum *grc) 377 { 378 if (PTR_ERR(pp) != -EINPROGRESS) { 379 NAPI_GRO_CB(skb)->flush |= flush; 380 skb_gro_remcsum_cleanup(skb, grc); 381 skb->remcsum_offload = 0; 382 } 383 } 384 #else 385 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) 386 { 387 NAPI_GRO_CB(skb)->flush |= flush; 388 } 389 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, 390 struct sk_buff *pp, 391 int flush, 392 struct gro_remcsum *grc) 393 { 394 NAPI_GRO_CB(skb)->flush |= flush; 395 skb_gro_remcsum_cleanup(skb, grc); 396 skb->remcsum_offload = 0; 397 } 398 #endif 399 400 INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, 401 struct sk_buff *)); 402 INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); 403 INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, 404 struct sk_buff *)); 405 INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); 406 407 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, 408 struct sk_buff *)); 409 INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); 410 411 INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, 412 struct sk_buff *)); 413 INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); 414 415 #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ 416 ({ \ 417 unlikely(gro_recursion_inc_test(skb)) ? \ 418 NAPI_GRO_CB(skb)->flush |= 1, NULL : \ 419 INDIRECT_CALL_INET(cb, f2, f1, head, skb); \ 420 }) 421 422 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, 423 struct udphdr *uh, struct sock *sk); 424 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); 425 426 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) 427 { 428 struct udphdr *uh; 429 unsigned int hlen, off; 430 431 off = skb_gro_offset(skb); 432 hlen = off + sizeof(*uh); 433 uh = skb_gro_header(skb, hlen, off); 434 435 return uh; 436 } 437 438 static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, 439 int proto) 440 { 441 const struct ipv6hdr *iph = skb_gro_network_header(skb); 442 443 return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, 444 skb_gro_len(skb), proto, 0)); 445 } 446 447 static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2, 448 struct sk_buff *p, bool outer) 449 { 450 const u32 id = ntohl(*(__be32 *)&iph->id); 451 const u32 id2 = ntohl(*(__be32 *)&iph2->id); 452 const u16 ipid_offset = (id >> 16) - (id2 >> 16); 453 const u16 count = NAPI_GRO_CB(p)->count; 454 const u32 df = id & IP_DF; 455 int flush; 456 457 /* All fields must match except length and checksum. */ 458 flush = (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF)); 459 460 if (flush | (outer && df)) 461 return flush; 462 463 /* When we receive our second frame we can make a decision on if we 464 * continue this flow as an atomic flow with a fixed ID or if we use 465 * an incrementing ID. 466 */ 467 if (count == 1 && df && !ipid_offset) 468 NAPI_GRO_CB(p)->ip_fixedid = true; 469 470 return ipid_offset ^ (count * !NAPI_GRO_CB(p)->ip_fixedid); 471 } 472 473 static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2) 474 { 475 /* <Version:4><Traffic_Class:8><Flow_Label:20> */ 476 __be32 first_word = *(__be32 *)iph ^ *(__be32 *)iph2; 477 478 /* Flush if Traffic Class fields are different. */ 479 return !!((first_word & htonl(0x0FF00000)) | 480 (__force __be32)(iph->hop_limit ^ iph2->hop_limit)); 481 } 482 483 static inline int __gro_receive_network_flush(const void *th, const void *th2, 484 struct sk_buff *p, const u16 diff, 485 bool outer) 486 { 487 const void *nh = th - diff; 488 const void *nh2 = th2 - diff; 489 490 if (((struct iphdr *)nh)->version == 6) 491 return ipv6_gro_flush(nh, nh2); 492 else 493 return inet_gro_flush(nh, nh2, p, outer); 494 } 495 496 static inline int gro_receive_network_flush(const void *th, const void *th2, 497 struct sk_buff *p) 498 { 499 const bool encap_mark = NAPI_GRO_CB(p)->encap_mark; 500 int off = skb_transport_offset(p); 501 int flush; 502 503 flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, encap_mark); 504 if (encap_mark) 505 flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, false); 506 507 return flush; 508 } 509 510 int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); 511 int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); 512 void __gro_flush(struct gro_node *gro, bool flush_old); 513 514 static inline void gro_flush(struct gro_node *gro, bool flush_old) 515 { 516 if (!gro->bitmask) 517 return; 518 519 __gro_flush(gro, flush_old); 520 } 521 522 static inline void napi_gro_flush(struct napi_struct *napi, bool flush_old) 523 { 524 gro_flush(&napi->gro, flush_old); 525 } 526 527 /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ 528 static inline void gro_normal_list(struct gro_node *gro) 529 { 530 if (!gro->rx_count) 531 return; 532 netif_receive_skb_list_internal(&gro->rx_list); 533 INIT_LIST_HEAD(&gro->rx_list); 534 gro->rx_count = 0; 535 } 536 537 /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, 538 * pass the whole batch up to the stack. 539 */ 540 static inline void gro_normal_one(struct gro_node *gro, struct sk_buff *skb, 541 int segs) 542 { 543 list_add_tail(&skb->list, &gro->rx_list); 544 gro->rx_count += segs; 545 if (gro->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) 546 gro_normal_list(gro); 547 } 548 549 void gro_init(struct gro_node *gro); 550 void gro_cleanup(struct gro_node *gro); 551 552 /* This function is the alternative of 'inet_iif' and 'inet_sdif' 553 * functions in case we can not rely on fields of IPCB. 554 * 555 * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. 556 * The caller must hold the RCU read lock. 557 */ 558 static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) 559 { 560 *iif = inet_iif(skb) ?: skb->dev->ifindex; 561 *sdif = 0; 562 563 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) 564 if (netif_is_l3_slave(skb->dev)) { 565 struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); 566 567 *sdif = *iif; 568 *iif = master ? master->ifindex : 0; 569 } 570 #endif 571 } 572 573 /* This function is the alternative of 'inet6_iif' and 'inet6_sdif' 574 * functions in case we can not rely on fields of IP6CB. 575 * 576 * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. 577 * The caller must hold the RCU read lock. 578 */ 579 static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) 580 { 581 /* using skb->dev->ifindex because skb_dst(skb) is not initialized */ 582 *iif = skb->dev->ifindex; 583 *sdif = 0; 584 585 #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) 586 if (netif_is_l3_slave(skb->dev)) { 587 struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); 588 589 *sdif = *iif; 590 *iif = master ? master->ifindex : 0; 591 } 592 #endif 593 } 594 595 struct packet_offload *gro_find_receive_by_type(__be16 type); 596 struct packet_offload *gro_find_complete_by_type(__be16 type); 597 598 #endif /* _NET_GRO_H */ 599