1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * xfrm_output.c - Common IPsec encapsulation code. 4 * 5 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 6 */ 7 8 #include <linux/errno.h> 9 #include <linux/module.h> 10 #include <linux/netdevice.h> 11 #include <linux/netfilter.h> 12 #include <linux/skbuff.h> 13 #include <linux/slab.h> 14 #include <linux/spinlock.h> 15 #include <net/dst.h> 16 #include <net/gso.h> 17 #include <net/icmp.h> 18 #include <net/inet_ecn.h> 19 #include <net/xfrm.h> 20 21 #if IS_ENABLED(CONFIG_IPV6) 22 #include <net/ip6_route.h> 23 #endif 24 25 #include "xfrm_inout.h" 26 27 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); 28 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); 29 30 static int xfrm_skb_check_space(struct sk_buff *skb) 31 { 32 struct dst_entry *dst = skb_dst(skb); 33 int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) 34 - skb_headroom(skb); 35 int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); 36 37 if (nhead <= 0) { 38 if (ntail <= 0) 39 return 0; 40 nhead = 0; 41 } else if (ntail < 0) 42 ntail = 0; 43 44 return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); 45 } 46 47 /* Children define the path of the packet through the 48 * Linux networking. Thus, destinations are stackable. 49 */ 50 51 static struct dst_entry *skb_dst_pop(struct sk_buff *skb) 52 { 53 struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb))); 54 55 skb_dst_drop(skb); 56 return child; 57 } 58 59 /* Add encapsulation header. 60 * 61 * The IP header will be moved forward to make space for the encapsulation 62 * header. 63 */ 64 static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) 65 { 66 struct iphdr *iph = ip_hdr(skb); 67 int ihl = iph->ihl * 4; 68 69 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 70 71 skb_set_network_header(skb, -x->props.header_len); 72 skb->mac_header = skb->network_header + 73 offsetof(struct iphdr, protocol); 74 skb->transport_header = skb->network_header + ihl; 75 __skb_pull(skb, ihl); 76 memmove(skb_network_header(skb), iph, ihl); 77 return 0; 78 } 79 80 #if IS_ENABLED(CONFIG_IPV6_MIP6) 81 static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type) 82 { 83 const unsigned char *nh = skb_network_header(skb); 84 unsigned int offset = sizeof(struct ipv6hdr); 85 unsigned int packet_len; 86 int found_rhdr = 0; 87 88 packet_len = skb_tail_pointer(skb) - nh; 89 *nexthdr = &ipv6_hdr(skb)->nexthdr; 90 91 while (offset <= packet_len) { 92 struct ipv6_opt_hdr *exthdr; 93 94 switch (**nexthdr) { 95 case NEXTHDR_HOP: 96 break; 97 case NEXTHDR_ROUTING: 98 if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) { 99 struct ipv6_rt_hdr *rt; 100 101 rt = (struct ipv6_rt_hdr *)(nh + offset); 102 if (rt->type != 0) 103 return offset; 104 } 105 found_rhdr = 1; 106 break; 107 case NEXTHDR_DEST: 108 /* HAO MUST NOT appear more than once. 109 * XXX: It is better to try to find by the end of 110 * XXX: packet if HAO exists. 111 */ 112 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { 113 net_dbg_ratelimited("mip6: hao exists already, override\n"); 114 return offset; 115 } 116 117 if (found_rhdr) 118 return offset; 119 120 break; 121 default: 122 return offset; 123 } 124 125 if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) 126 return -EINVAL; 127 128 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 129 offset); 130 offset += ipv6_optlen(exthdr); 131 if (offset > IPV6_MAXPLEN) 132 return -EINVAL; 133 *nexthdr = &exthdr->nexthdr; 134 } 135 136 return -EINVAL; 137 } 138 #endif 139 140 #if IS_ENABLED(CONFIG_IPV6) 141 static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr) 142 { 143 switch (x->type->proto) { 144 #if IS_ENABLED(CONFIG_IPV6_MIP6) 145 case IPPROTO_DSTOPTS: 146 case IPPROTO_ROUTING: 147 return mip6_rthdr_offset(skb, prevhdr, x->type->proto); 148 #endif 149 default: 150 break; 151 } 152 153 return ip6_find_1stfragopt(skb, prevhdr); 154 } 155 #endif 156 157 /* Add encapsulation header. 158 * 159 * The IP header and mutable extension headers will be moved forward to make 160 * space for the encapsulation header. 161 */ 162 static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) 163 { 164 #if IS_ENABLED(CONFIG_IPV6) 165 struct ipv6hdr *iph; 166 u8 *prevhdr; 167 int hdr_len; 168 169 iph = ipv6_hdr(skb); 170 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 171 172 hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); 173 if (hdr_len < 0) 174 return hdr_len; 175 skb_set_mac_header(skb, 176 (prevhdr - x->props.header_len) - skb->data); 177 skb_set_network_header(skb, -x->props.header_len); 178 skb->transport_header = skb->network_header + hdr_len; 179 __skb_pull(skb, hdr_len); 180 memmove(ipv6_hdr(skb), iph, hdr_len); 181 return 0; 182 #else 183 WARN_ON_ONCE(1); 184 return -EAFNOSUPPORT; 185 #endif 186 } 187 188 /* Add route optimization header space. 189 * 190 * The IP header and mutable extension headers will be moved forward to make 191 * space for the route optimization header. 192 */ 193 static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) 194 { 195 #if IS_ENABLED(CONFIG_IPV6) 196 struct ipv6hdr *iph; 197 u8 *prevhdr; 198 int hdr_len; 199 200 iph = ipv6_hdr(skb); 201 202 hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); 203 if (hdr_len < 0) 204 return hdr_len; 205 skb_set_mac_header(skb, 206 (prevhdr - x->props.header_len) - skb->data); 207 skb_set_network_header(skb, -x->props.header_len); 208 skb->transport_header = skb->network_header + hdr_len; 209 __skb_pull(skb, hdr_len); 210 memmove(ipv6_hdr(skb), iph, hdr_len); 211 212 return 0; 213 #else 214 WARN_ON_ONCE(1); 215 return -EAFNOSUPPORT; 216 #endif 217 } 218 219 /* Add encapsulation header. 220 * 221 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. 222 */ 223 static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 224 { 225 struct ip_beet_phdr *ph; 226 struct iphdr *top_iph; 227 int hdrlen, optlen; 228 229 hdrlen = 0; 230 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 231 if (unlikely(optlen)) 232 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); 233 234 skb_set_network_header(skb, -x->props.header_len - hdrlen + 235 (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); 236 if (x->sel.family != AF_INET6) 237 skb->network_header += IPV4_BEET_PHMAXLEN; 238 skb->mac_header = skb->network_header + 239 offsetof(struct iphdr, protocol); 240 skb->transport_header = skb->network_header + sizeof(*top_iph); 241 242 xfrm4_beet_make_header(skb); 243 244 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); 245 246 top_iph = ip_hdr(skb); 247 248 if (unlikely(optlen)) { 249 if (WARN_ON(optlen < 0)) 250 return -EINVAL; 251 252 ph->padlen = 4 - (optlen & 4); 253 ph->hdrlen = optlen / 8; 254 ph->nexthdr = top_iph->protocol; 255 if (ph->padlen) 256 memset(ph + 1, IPOPT_NOP, ph->padlen); 257 258 top_iph->protocol = IPPROTO_BEETPH; 259 top_iph->ihl = sizeof(struct iphdr) / 4; 260 } 261 262 top_iph->saddr = x->props.saddr.a4; 263 top_iph->daddr = x->id.daddr.a4; 264 265 return 0; 266 } 267 268 /* Add encapsulation header. 269 * 270 * The top IP header will be constructed per RFC 2401. 271 */ 272 static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 273 { 274 bool small_ipv6 = (skb->protocol == htons(ETH_P_IPV6)) && (skb->len <= IPV6_MIN_MTU); 275 struct dst_entry *dst = skb_dst(skb); 276 struct iphdr *top_iph; 277 int flags; 278 279 skb_set_inner_network_header(skb, skb_network_offset(skb)); 280 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 281 282 skb_set_network_header(skb, -x->props.header_len); 283 skb->mac_header = skb->network_header + 284 offsetof(struct iphdr, protocol); 285 skb->transport_header = skb->network_header + sizeof(*top_iph); 286 top_iph = ip_hdr(skb); 287 288 top_iph->ihl = 5; 289 top_iph->version = 4; 290 291 top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); 292 293 /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ 294 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 295 top_iph->tos = 0; 296 else 297 top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; 298 top_iph->tos = INET_ECN_encapsulate(top_iph->tos, 299 XFRM_MODE_SKB_CB(skb)->tos); 300 301 flags = x->props.flags; 302 if (flags & XFRM_STATE_NOECN) 303 IP_ECN_clear(top_iph); 304 305 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) || small_ipv6 ? 306 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); 307 308 top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); 309 310 top_iph->saddr = x->props.saddr.a4; 311 top_iph->daddr = x->id.daddr.a4; 312 ip_select_ident(dev_net(dst->dev), skb, NULL); 313 314 return 0; 315 } 316 317 #if IS_ENABLED(CONFIG_IPV6) 318 static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 319 { 320 struct dst_entry *dst = skb_dst(skb); 321 struct ipv6hdr *top_iph; 322 int dsfield; 323 324 skb_set_inner_network_header(skb, skb_network_offset(skb)); 325 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 326 327 skb_set_network_header(skb, -x->props.header_len); 328 skb->mac_header = skb->network_header + 329 offsetof(struct ipv6hdr, nexthdr); 330 skb->transport_header = skb->network_header + sizeof(*top_iph); 331 top_iph = ipv6_hdr(skb); 332 333 top_iph->version = 6; 334 335 memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, 336 sizeof(top_iph->flow_lbl)); 337 top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); 338 339 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 340 dsfield = 0; 341 else 342 dsfield = XFRM_MODE_SKB_CB(skb)->tos; 343 dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); 344 if (x->props.flags & XFRM_STATE_NOECN) 345 dsfield &= ~INET_ECN_MASK; 346 ipv6_change_dsfield(top_iph, 0, dsfield); 347 top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); 348 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 349 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 350 return 0; 351 } 352 353 static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 354 { 355 struct ipv6hdr *top_iph; 356 struct ip_beet_phdr *ph; 357 int optlen, hdr_len; 358 359 hdr_len = 0; 360 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 361 if (unlikely(optlen)) 362 hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); 363 364 skb_set_network_header(skb, -x->props.header_len - hdr_len); 365 if (x->sel.family != AF_INET6) 366 skb->network_header += IPV4_BEET_PHMAXLEN; 367 skb->mac_header = skb->network_header + 368 offsetof(struct ipv6hdr, nexthdr); 369 skb->transport_header = skb->network_header + sizeof(*top_iph); 370 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); 371 372 xfrm6_beet_make_header(skb); 373 374 top_iph = ipv6_hdr(skb); 375 if (unlikely(optlen)) { 376 if (WARN_ON(optlen < 0)) 377 return -EINVAL; 378 379 ph->padlen = 4 - (optlen & 4); 380 ph->hdrlen = optlen / 8; 381 ph->nexthdr = top_iph->nexthdr; 382 if (ph->padlen) 383 memset(ph + 1, IPOPT_NOP, ph->padlen); 384 385 top_iph->nexthdr = IPPROTO_BEETPH; 386 } 387 388 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 389 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 390 return 0; 391 } 392 #endif 393 394 /* Add encapsulation header. 395 * 396 * On exit, the transport header will be set to the start of the 397 * encapsulation header to be filled in by x->type->output and the mac 398 * header will be set to the nextheader (protocol for IPv4) field of the 399 * extension header directly preceding the encapsulation header, or in 400 * its absence, that of the top IP header. 401 * The value of the network header will always point to the top IP header 402 * while skb->data will point to the payload. 403 */ 404 static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 405 { 406 int err; 407 408 err = xfrm_inner_extract_output(x, skb); 409 if (err) 410 return err; 411 412 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; 413 skb->protocol = htons(ETH_P_IP); 414 415 switch (x->props.mode) { 416 case XFRM_MODE_BEET: 417 return xfrm4_beet_encap_add(x, skb); 418 case XFRM_MODE_TUNNEL: 419 return xfrm4_tunnel_encap_add(x, skb); 420 } 421 422 WARN_ON_ONCE(1); 423 return -EOPNOTSUPP; 424 } 425 426 static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 427 { 428 #if IS_ENABLED(CONFIG_IPV6) 429 int err; 430 431 err = xfrm_inner_extract_output(x, skb); 432 if (err) 433 return err; 434 435 skb->ignore_df = 1; 436 skb->protocol = htons(ETH_P_IPV6); 437 438 switch (x->props.mode) { 439 case XFRM_MODE_BEET: 440 return xfrm6_beet_encap_add(x, skb); 441 case XFRM_MODE_TUNNEL: 442 return xfrm6_tunnel_encap_add(x, skb); 443 default: 444 WARN_ON_ONCE(1); 445 return -EOPNOTSUPP; 446 } 447 #endif 448 WARN_ON_ONCE(1); 449 return -EAFNOSUPPORT; 450 } 451 452 static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 453 { 454 switch (x->props.mode) { 455 case XFRM_MODE_BEET: 456 case XFRM_MODE_TUNNEL: 457 if (x->props.family == AF_INET) 458 return xfrm4_prepare_output(x, skb); 459 if (x->props.family == AF_INET6) 460 return xfrm6_prepare_output(x, skb); 461 break; 462 case XFRM_MODE_TRANSPORT: 463 if (x->props.family == AF_INET) 464 return xfrm4_transport_output(x, skb); 465 if (x->props.family == AF_INET6) 466 return xfrm6_transport_output(x, skb); 467 break; 468 case XFRM_MODE_ROUTEOPTIMIZATION: 469 if (x->props.family == AF_INET6) 470 return xfrm6_ro_output(x, skb); 471 WARN_ON_ONCE(1); 472 break; 473 default: 474 if (x->mode_cbs && x->mode_cbs->prepare_output) 475 return x->mode_cbs->prepare_output(x, skb); 476 WARN_ON_ONCE(1); 477 break; 478 } 479 480 return -EOPNOTSUPP; 481 } 482 483 #if IS_ENABLED(CONFIG_NET_PKTGEN) 484 int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 485 { 486 return xfrm_outer_mode_output(x, skb); 487 } 488 EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output); 489 #endif 490 491 static int xfrm_output_one(struct sk_buff *skb, int err) 492 { 493 struct dst_entry *dst = skb_dst(skb); 494 struct xfrm_state *x = dst->xfrm; 495 struct net *net = xs_net(x); 496 497 if (err <= 0 || x->xso.type == XFRM_DEV_OFFLOAD_PACKET) 498 goto resume; 499 500 do { 501 err = xfrm_skb_check_space(skb); 502 if (err) { 503 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 504 goto error_nolock; 505 } 506 507 skb->mark = xfrm_smark_get(skb->mark, x); 508 509 err = xfrm_outer_mode_output(x, skb); 510 if (err) { 511 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); 512 goto error_nolock; 513 } 514 515 spin_lock_bh(&x->lock); 516 517 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 518 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); 519 err = -EINVAL; 520 goto error; 521 } 522 523 err = xfrm_state_check_expire(x); 524 if (err) { 525 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); 526 goto error; 527 } 528 529 err = xfrm_replay_overflow(x, skb); 530 if (err) { 531 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); 532 goto error; 533 } 534 535 x->curlft.bytes += skb->len; 536 x->curlft.packets++; 537 x->lastused = ktime_get_real_seconds(); 538 539 spin_unlock_bh(&x->lock); 540 541 skb_dst_force(skb); 542 if (!skb_dst(skb)) { 543 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 544 err = -EHOSTUNREACH; 545 goto error_nolock; 546 } 547 548 if (xfrm_offload(skb)) { 549 x->type_offload->encap(x, skb); 550 } else { 551 /* Inner headers are invalid now. */ 552 skb->encapsulation = 0; 553 554 err = x->type->output(x, skb); 555 if (err == -EINPROGRESS) 556 goto out; 557 } 558 559 resume: 560 if (err) { 561 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); 562 goto error_nolock; 563 } 564 565 dst = skb_dst_pop(skb); 566 if (!dst) { 567 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 568 err = -EHOSTUNREACH; 569 goto error_nolock; 570 } 571 skb_dst_set(skb, dst); 572 x = dst->xfrm; 573 } while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)); 574 575 return 0; 576 577 error: 578 spin_unlock_bh(&x->lock); 579 error_nolock: 580 kfree_skb(skb); 581 out: 582 return err; 583 } 584 585 int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err) 586 { 587 struct net *net = xs_net(skb_dst(skb)->xfrm); 588 589 while (likely((err = xfrm_output_one(skb, err)) == 0)) { 590 nf_reset_ct(skb); 591 592 err = skb_dst(skb)->ops->local_out(net, sk, skb); 593 if (unlikely(err != 1)) 594 goto out; 595 596 if (!skb_dst(skb)->xfrm) 597 return dst_output(net, sk, skb); 598 599 err = nf_hook(skb_dst(skb)->ops->family, 600 NF_INET_POST_ROUTING, net, sk, skb, 601 NULL, skb_dst(skb)->dev, xfrm_output2); 602 if (unlikely(err != 1)) 603 goto out; 604 } 605 606 if (err == -EINPROGRESS) 607 err = 0; 608 609 out: 610 return err; 611 } 612 EXPORT_SYMBOL_GPL(xfrm_output_resume); 613 614 static int xfrm_dev_direct_output(struct sock *sk, struct xfrm_state *x, 615 struct sk_buff *skb) 616 { 617 struct dst_entry *dst = skb_dst(skb); 618 struct net *net = xs_net(x); 619 int err; 620 621 dst = skb_dst_pop(skb); 622 if (!dst) { 623 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 624 kfree_skb(skb); 625 return -EHOSTUNREACH; 626 } 627 skb_dst_set(skb, dst); 628 nf_reset_ct(skb); 629 630 err = skb_dst(skb)->ops->local_out(net, sk, skb); 631 if (unlikely(err != 1)) { 632 kfree_skb(skb); 633 return err; 634 } 635 636 /* In transport mode, network destination is 637 * directly reachable, while in tunnel mode, 638 * inner packet network may not be. In packet 639 * offload type, HW is responsible for hard 640 * header packet mangling so directly xmit skb 641 * to netdevice. 642 */ 643 skb->dev = x->xso.dev; 644 __skb_push(skb, skb->dev->hard_header_len); 645 return dev_queue_xmit(skb); 646 } 647 648 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 649 { 650 return xfrm_output_resume(sk, skb, 1); 651 } 652 653 static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) 654 { 655 struct sk_buff *segs, *nskb; 656 657 BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); 658 BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_GSO_CB_OFFSET); 659 segs = skb_gso_segment(skb, 0); 660 kfree_skb(skb); 661 if (IS_ERR(segs)) 662 return PTR_ERR(segs); 663 if (segs == NULL) 664 return -EINVAL; 665 666 skb_list_walk_safe(segs, segs, nskb) { 667 int err; 668 669 skb_mark_not_on_list(segs); 670 err = xfrm_output2(net, sk, segs); 671 672 if (unlikely(err)) { 673 kfree_skb_list(nskb); 674 return err; 675 } 676 } 677 678 return 0; 679 } 680 681 /* For partial checksum offload, the outer header checksum is calculated 682 * by software and the inner header checksum is calculated by hardware. 683 * This requires hardware to know the inner packet type to calculate 684 * the inner header checksum. Save inner ip protocol here to avoid 685 * traversing the packet in the vendor's xmit code. 686 * For IPsec tunnel mode save the ip protocol from the IP header of the 687 * plain text packet. Otherwise If the encap type is IPIP, just save 688 * skb->inner_ipproto in any other case get the ip protocol from the IP 689 * header. 690 */ 691 static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) 692 { 693 struct xfrm_offload *xo = xfrm_offload(skb); 694 const struct ethhdr *eth; 695 696 if (!xo) 697 return; 698 699 if (x->outer_mode.encap == XFRM_MODE_TUNNEL) { 700 switch (skb_dst(skb)->ops->family) { 701 case AF_INET: 702 xo->inner_ipproto = ip_hdr(skb)->protocol; 703 break; 704 case AF_INET6: 705 xo->inner_ipproto = ipv6_hdr(skb)->nexthdr; 706 break; 707 default: 708 break; 709 } 710 711 return; 712 } 713 if (x->outer_mode.encap == XFRM_MODE_IPTFS) { 714 xo->inner_ipproto = IPPROTO_AGGFRAG; 715 return; 716 } 717 718 /* non-Tunnel Mode */ 719 if (!skb->encapsulation) 720 return; 721 722 if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) { 723 xo->inner_ipproto = skb->inner_ipproto; 724 return; 725 } 726 727 if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) 728 return; 729 730 eth = (struct ethhdr *)skb_inner_mac_header(skb); 731 732 switch (ntohs(eth->h_proto)) { 733 case ETH_P_IPV6: 734 xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr; 735 break; 736 case ETH_P_IP: 737 xo->inner_ipproto = inner_ip_hdr(skb)->protocol; 738 break; 739 } 740 } 741 742 int xfrm_output(struct sock *sk, struct sk_buff *skb) 743 { 744 struct net *net = dev_net(skb_dst(skb)->dev); 745 struct xfrm_state *x = skb_dst(skb)->xfrm; 746 int family; 747 int err; 748 749 family = (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ? x->outer_mode.family 750 : skb_dst(skb)->ops->family; 751 752 switch (family) { 753 case AF_INET: 754 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 755 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; 756 break; 757 case AF_INET6: 758 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); 759 760 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; 761 break; 762 } 763 764 if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) { 765 if (!xfrm_dev_offload_ok(skb, x)) { 766 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 767 kfree_skb(skb); 768 return -EHOSTUNREACH; 769 } 770 771 /* Exclusive direct xmit for tunnel mode, as 772 * some filtering or matching rules may apply 773 * in transport mode. 774 * Locally generated packets also require 775 * the normal XFRM path for L2 header setup, 776 * as the hardware needs the L2 header to match 777 * for encryption, so skip direct output as well. 778 */ 779 if (x->props.mode == XFRM_MODE_TUNNEL && !skb->sk) 780 return xfrm_dev_direct_output(sk, x, skb); 781 782 return xfrm_output_resume(sk, skb, 0); 783 } 784 785 secpath_reset(skb); 786 787 if (xfrm_dev_offload_ok(skb, x)) { 788 struct sec_path *sp; 789 790 sp = secpath_set(skb); 791 if (!sp) { 792 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 793 kfree_skb(skb); 794 return -ENOMEM; 795 } 796 797 sp->olen++; 798 sp->xvec[sp->len++] = x; 799 xfrm_state_hold(x); 800 801 xfrm_get_inner_ipproto(skb, x); 802 skb->encapsulation = 1; 803 804 if (skb_is_gso(skb)) { 805 if (skb->inner_protocol && x->props.mode == XFRM_MODE_TUNNEL) 806 return xfrm_output_gso(net, sk, skb); 807 808 skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; 809 goto out; 810 } 811 812 if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) 813 goto out; 814 } else { 815 if (skb_is_gso(skb)) 816 return xfrm_output_gso(net, sk, skb); 817 } 818 819 if (skb->ip_summed == CHECKSUM_PARTIAL) { 820 err = skb_checksum_help(skb); 821 if (err) { 822 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 823 kfree_skb(skb); 824 return err; 825 } 826 } 827 828 out: 829 return xfrm_output2(net, sk, skb); 830 } 831 EXPORT_SYMBOL_GPL(xfrm_output); 832 833 int xfrm4_tunnel_check_size(struct sk_buff *skb) 834 { 835 int mtu, ret = 0; 836 837 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) 838 goto out; 839 840 if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) 841 goto out; 842 843 mtu = dst_mtu(skb_dst(skb)); 844 if ((!skb_is_gso(skb) && skb->len > mtu) || 845 (skb_is_gso(skb) && 846 !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { 847 skb->protocol = htons(ETH_P_IP); 848 849 if (skb->sk && sk_fullsock(skb->sk)) 850 xfrm_local_error(skb, mtu); 851 else 852 icmp_send(skb, ICMP_DEST_UNREACH, 853 ICMP_FRAG_NEEDED, htonl(mtu)); 854 ret = -EMSGSIZE; 855 } 856 out: 857 return ret; 858 } 859 EXPORT_SYMBOL_GPL(xfrm4_tunnel_check_size); 860 861 static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) 862 { 863 int err; 864 865 if (x->outer_mode.encap == XFRM_MODE_BEET && 866 ip_is_fragment(ip_hdr(skb))) { 867 net_warn_ratelimited("BEET mode doesn't support inner IPv4 fragments\n"); 868 return -EAFNOSUPPORT; 869 } 870 871 err = xfrm4_tunnel_check_size(skb); 872 if (err) 873 return err; 874 875 XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; 876 877 xfrm4_extract_header(skb); 878 return 0; 879 } 880 881 #if IS_ENABLED(CONFIG_IPV6) 882 int xfrm6_tunnel_check_size(struct sk_buff *skb) 883 { 884 int mtu, ret = 0; 885 struct dst_entry *dst = skb_dst(skb); 886 struct sock *sk = skb_to_full_sk(skb); 887 888 if (skb->ignore_df) 889 goto out; 890 891 mtu = dst_mtu(dst); 892 if (mtu < IPV6_MIN_MTU) 893 mtu = IPV6_MIN_MTU; 894 895 if ((!skb_is_gso(skb) && skb->len > mtu) || 896 (skb_is_gso(skb) && 897 !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { 898 skb->dev = dst->dev; 899 skb->protocol = htons(ETH_P_IPV6); 900 901 if (xfrm6_local_dontfrag(sk)) 902 xfrm6_local_rxpmtu(skb, mtu); 903 else if (sk) 904 xfrm_local_error(skb, mtu); 905 else 906 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 907 ret = -EMSGSIZE; 908 } 909 out: 910 return ret; 911 } 912 EXPORT_SYMBOL_GPL(xfrm6_tunnel_check_size); 913 #endif 914 915 static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) 916 { 917 #if IS_ENABLED(CONFIG_IPV6) 918 int err; 919 920 err = xfrm6_tunnel_check_size(skb); 921 if (err) 922 return err; 923 924 XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; 925 926 xfrm6_extract_header(skb); 927 return 0; 928 #else 929 WARN_ON_ONCE(1); 930 return -EAFNOSUPPORT; 931 #endif 932 } 933 934 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) 935 { 936 switch (skb->protocol) { 937 case htons(ETH_P_IP): 938 return xfrm4_extract_output(x, skb); 939 case htons(ETH_P_IPV6): 940 return xfrm6_extract_output(x, skb); 941 } 942 943 return -EAFNOSUPPORT; 944 } 945 946 void xfrm_local_error(struct sk_buff *skb, int mtu) 947 { 948 unsigned int proto; 949 struct xfrm_state_afinfo *afinfo; 950 951 if (skb->protocol == htons(ETH_P_IP)) 952 proto = AF_INET; 953 else if (skb->protocol == htons(ETH_P_IPV6) && 954 skb->sk->sk_family == AF_INET6) 955 proto = AF_INET6; 956 else 957 return; 958 959 afinfo = xfrm_state_get_afinfo(proto); 960 if (afinfo) { 961 afinfo->local_error(skb, mtu); 962 rcu_read_unlock(); 963 } 964 } 965 EXPORT_SYMBOL_GPL(xfrm_local_error); 966