1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * xfrm_output.c - Common IPsec encapsulation code. 4 * 5 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 6 */ 7 8 #include <linux/errno.h> 9 #include <linux/module.h> 10 #include <linux/netdevice.h> 11 #include <linux/netfilter.h> 12 #include <linux/skbuff.h> 13 #include <linux/slab.h> 14 #include <linux/spinlock.h> 15 #include <net/dst.h> 16 #include <net/gso.h> 17 #include <net/icmp.h> 18 #include <net/inet_ecn.h> 19 #include <net/xfrm.h> 20 21 #if IS_ENABLED(CONFIG_IPV6) 22 #include <net/ip6_route.h> 23 #include <net/ipv6_stubs.h> 24 #endif 25 26 #include "xfrm_inout.h" 27 28 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); 29 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); 30 31 static int xfrm_skb_check_space(struct sk_buff *skb) 32 { 33 struct dst_entry *dst = skb_dst(skb); 34 int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) 35 - skb_headroom(skb); 36 int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); 37 38 if (nhead <= 0) { 39 if (ntail <= 0) 40 return 0; 41 nhead = 0; 42 } else if (ntail < 0) 43 ntail = 0; 44 45 return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); 46 } 47 48 /* Children define the path of the packet through the 49 * Linux networking. Thus, destinations are stackable. 50 */ 51 52 static struct dst_entry *skb_dst_pop(struct sk_buff *skb) 53 { 54 struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb))); 55 56 skb_dst_drop(skb); 57 return child; 58 } 59 60 /* Add encapsulation header. 61 * 62 * The IP header will be moved forward to make space for the encapsulation 63 * header. 64 */ 65 static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) 66 { 67 struct iphdr *iph = ip_hdr(skb); 68 int ihl = iph->ihl * 4; 69 70 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 71 72 skb_set_network_header(skb, -x->props.header_len); 73 skb->mac_header = skb->network_header + 74 offsetof(struct iphdr, protocol); 75 skb->transport_header = skb->network_header + ihl; 76 __skb_pull(skb, ihl); 77 memmove(skb_network_header(skb), iph, ihl); 78 return 0; 79 } 80 81 #if IS_ENABLED(CONFIG_IPV6_MIP6) 82 static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type) 83 { 84 const unsigned char *nh = skb_network_header(skb); 85 unsigned int offset = sizeof(struct ipv6hdr); 86 unsigned int packet_len; 87 int found_rhdr = 0; 88 89 packet_len = skb_tail_pointer(skb) - nh; 90 *nexthdr = &ipv6_hdr(skb)->nexthdr; 91 92 while (offset <= packet_len) { 93 struct ipv6_opt_hdr *exthdr; 94 95 switch (**nexthdr) { 96 case NEXTHDR_HOP: 97 break; 98 case NEXTHDR_ROUTING: 99 if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) { 100 struct ipv6_rt_hdr *rt; 101 102 rt = (struct ipv6_rt_hdr *)(nh + offset); 103 if (rt->type != 0) 104 return offset; 105 } 106 found_rhdr = 1; 107 break; 108 case NEXTHDR_DEST: 109 /* HAO MUST NOT appear more than once. 110 * XXX: It is better to try to find by the end of 111 * XXX: packet if HAO exists. 112 */ 113 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { 114 net_dbg_ratelimited("mip6: hao exists already, override\n"); 115 return offset; 116 } 117 118 if (found_rhdr) 119 return offset; 120 121 break; 122 default: 123 return offset; 124 } 125 126 if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) 127 return -EINVAL; 128 129 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 130 offset); 131 offset += ipv6_optlen(exthdr); 132 if (offset > IPV6_MAXPLEN) 133 return -EINVAL; 134 *nexthdr = &exthdr->nexthdr; 135 } 136 137 return -EINVAL; 138 } 139 #endif 140 141 #if IS_ENABLED(CONFIG_IPV6) 142 static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr) 143 { 144 switch (x->type->proto) { 145 #if IS_ENABLED(CONFIG_IPV6_MIP6) 146 case IPPROTO_DSTOPTS: 147 case IPPROTO_ROUTING: 148 return mip6_rthdr_offset(skb, prevhdr, x->type->proto); 149 #endif 150 default: 151 break; 152 } 153 154 return ip6_find_1stfragopt(skb, prevhdr); 155 } 156 #endif 157 158 /* Add encapsulation header. 159 * 160 * The IP header and mutable extension headers will be moved forward to make 161 * space for the encapsulation header. 162 */ 163 static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) 164 { 165 #if IS_ENABLED(CONFIG_IPV6) 166 struct ipv6hdr *iph; 167 u8 *prevhdr; 168 int hdr_len; 169 170 iph = ipv6_hdr(skb); 171 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 172 173 hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); 174 if (hdr_len < 0) 175 return hdr_len; 176 skb_set_mac_header(skb, 177 (prevhdr - x->props.header_len) - skb->data); 178 skb_set_network_header(skb, -x->props.header_len); 179 skb->transport_header = skb->network_header + hdr_len; 180 __skb_pull(skb, hdr_len); 181 memmove(ipv6_hdr(skb), iph, hdr_len); 182 return 0; 183 #else 184 WARN_ON_ONCE(1); 185 return -EAFNOSUPPORT; 186 #endif 187 } 188 189 /* Add route optimization header space. 190 * 191 * The IP header and mutable extension headers will be moved forward to make 192 * space for the route optimization header. 193 */ 194 static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) 195 { 196 #if IS_ENABLED(CONFIG_IPV6) 197 struct ipv6hdr *iph; 198 u8 *prevhdr; 199 int hdr_len; 200 201 iph = ipv6_hdr(skb); 202 203 hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); 204 if (hdr_len < 0) 205 return hdr_len; 206 skb_set_mac_header(skb, 207 (prevhdr - x->props.header_len) - skb->data); 208 skb_set_network_header(skb, -x->props.header_len); 209 skb->transport_header = skb->network_header + hdr_len; 210 __skb_pull(skb, hdr_len); 211 memmove(ipv6_hdr(skb), iph, hdr_len); 212 213 return 0; 214 #else 215 WARN_ON_ONCE(1); 216 return -EAFNOSUPPORT; 217 #endif 218 } 219 220 /* Add encapsulation header. 221 * 222 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. 223 */ 224 static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 225 { 226 struct ip_beet_phdr *ph; 227 struct iphdr *top_iph; 228 int hdrlen, optlen; 229 230 hdrlen = 0; 231 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 232 if (unlikely(optlen)) 233 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); 234 235 skb_set_network_header(skb, -x->props.header_len - hdrlen + 236 (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); 237 if (x->sel.family != AF_INET6) 238 skb->network_header += IPV4_BEET_PHMAXLEN; 239 skb->mac_header = skb->network_header + 240 offsetof(struct iphdr, protocol); 241 skb->transport_header = skb->network_header + sizeof(*top_iph); 242 243 xfrm4_beet_make_header(skb); 244 245 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); 246 247 top_iph = ip_hdr(skb); 248 249 if (unlikely(optlen)) { 250 if (WARN_ON(optlen < 0)) 251 return -EINVAL; 252 253 ph->padlen = 4 - (optlen & 4); 254 ph->hdrlen = optlen / 8; 255 ph->nexthdr = top_iph->protocol; 256 if (ph->padlen) 257 memset(ph + 1, IPOPT_NOP, ph->padlen); 258 259 top_iph->protocol = IPPROTO_BEETPH; 260 top_iph->ihl = sizeof(struct iphdr) / 4; 261 } 262 263 top_iph->saddr = x->props.saddr.a4; 264 top_iph->daddr = x->id.daddr.a4; 265 266 return 0; 267 } 268 269 /* Add encapsulation header. 270 * 271 * The top IP header will be constructed per RFC 2401. 272 */ 273 static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 274 { 275 bool small_ipv6 = (skb->protocol == htons(ETH_P_IPV6)) && (skb->len <= IPV6_MIN_MTU); 276 struct dst_entry *dst = skb_dst(skb); 277 struct iphdr *top_iph; 278 int flags; 279 280 skb_set_inner_network_header(skb, skb_network_offset(skb)); 281 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 282 283 skb_set_network_header(skb, -x->props.header_len); 284 skb->mac_header = skb->network_header + 285 offsetof(struct iphdr, protocol); 286 skb->transport_header = skb->network_header + sizeof(*top_iph); 287 top_iph = ip_hdr(skb); 288 289 top_iph->ihl = 5; 290 top_iph->version = 4; 291 292 top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); 293 294 /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ 295 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 296 top_iph->tos = 0; 297 else 298 top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; 299 top_iph->tos = INET_ECN_encapsulate(top_iph->tos, 300 XFRM_MODE_SKB_CB(skb)->tos); 301 302 flags = x->props.flags; 303 if (flags & XFRM_STATE_NOECN) 304 IP_ECN_clear(top_iph); 305 306 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) || small_ipv6 ? 307 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); 308 309 top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); 310 311 top_iph->saddr = x->props.saddr.a4; 312 top_iph->daddr = x->id.daddr.a4; 313 ip_select_ident(dev_net(dst->dev), skb, NULL); 314 315 return 0; 316 } 317 318 #if IS_ENABLED(CONFIG_IPV6) 319 static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 320 { 321 struct dst_entry *dst = skb_dst(skb); 322 struct ipv6hdr *top_iph; 323 int dsfield; 324 325 skb_set_inner_network_header(skb, skb_network_offset(skb)); 326 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 327 328 skb_set_network_header(skb, -x->props.header_len); 329 skb->mac_header = skb->network_header + 330 offsetof(struct ipv6hdr, nexthdr); 331 skb->transport_header = skb->network_header + sizeof(*top_iph); 332 top_iph = ipv6_hdr(skb); 333 334 top_iph->version = 6; 335 336 memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, 337 sizeof(top_iph->flow_lbl)); 338 top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); 339 340 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 341 dsfield = 0; 342 else 343 dsfield = XFRM_MODE_SKB_CB(skb)->tos; 344 dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); 345 if (x->props.flags & XFRM_STATE_NOECN) 346 dsfield &= ~INET_ECN_MASK; 347 ipv6_change_dsfield(top_iph, 0, dsfield); 348 top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); 349 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 350 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 351 return 0; 352 } 353 354 static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 355 { 356 struct ipv6hdr *top_iph; 357 struct ip_beet_phdr *ph; 358 int optlen, hdr_len; 359 360 hdr_len = 0; 361 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 362 if (unlikely(optlen)) 363 hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); 364 365 skb_set_network_header(skb, -x->props.header_len - hdr_len); 366 if (x->sel.family != AF_INET6) 367 skb->network_header += IPV4_BEET_PHMAXLEN; 368 skb->mac_header = skb->network_header + 369 offsetof(struct ipv6hdr, nexthdr); 370 skb->transport_header = skb->network_header + sizeof(*top_iph); 371 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); 372 373 xfrm6_beet_make_header(skb); 374 375 top_iph = ipv6_hdr(skb); 376 if (unlikely(optlen)) { 377 if (WARN_ON(optlen < 0)) 378 return -EINVAL; 379 380 ph->padlen = 4 - (optlen & 4); 381 ph->hdrlen = optlen / 8; 382 ph->nexthdr = top_iph->nexthdr; 383 if (ph->padlen) 384 memset(ph + 1, IPOPT_NOP, ph->padlen); 385 386 top_iph->nexthdr = IPPROTO_BEETPH; 387 } 388 389 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 390 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 391 return 0; 392 } 393 #endif 394 395 /* Add encapsulation header. 396 * 397 * On exit, the transport header will be set to the start of the 398 * encapsulation header to be filled in by x->type->output and the mac 399 * header will be set to the nextheader (protocol for IPv4) field of the 400 * extension header directly preceding the encapsulation header, or in 401 * its absence, that of the top IP header. 402 * The value of the network header will always point to the top IP header 403 * while skb->data will point to the payload. 404 */ 405 static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 406 { 407 int err; 408 409 err = xfrm_inner_extract_output(x, skb); 410 if (err) 411 return err; 412 413 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; 414 skb->protocol = htons(ETH_P_IP); 415 416 switch (x->props.mode) { 417 case XFRM_MODE_BEET: 418 return xfrm4_beet_encap_add(x, skb); 419 case XFRM_MODE_TUNNEL: 420 return xfrm4_tunnel_encap_add(x, skb); 421 } 422 423 WARN_ON_ONCE(1); 424 return -EOPNOTSUPP; 425 } 426 427 static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 428 { 429 #if IS_ENABLED(CONFIG_IPV6) 430 int err; 431 432 err = xfrm_inner_extract_output(x, skb); 433 if (err) 434 return err; 435 436 skb->ignore_df = 1; 437 skb->protocol = htons(ETH_P_IPV6); 438 439 switch (x->props.mode) { 440 case XFRM_MODE_BEET: 441 return xfrm6_beet_encap_add(x, skb); 442 case XFRM_MODE_TUNNEL: 443 return xfrm6_tunnel_encap_add(x, skb); 444 default: 445 WARN_ON_ONCE(1); 446 return -EOPNOTSUPP; 447 } 448 #endif 449 WARN_ON_ONCE(1); 450 return -EAFNOSUPPORT; 451 } 452 453 static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 454 { 455 switch (x->props.mode) { 456 case XFRM_MODE_BEET: 457 case XFRM_MODE_TUNNEL: 458 if (x->props.family == AF_INET) 459 return xfrm4_prepare_output(x, skb); 460 if (x->props.family == AF_INET6) 461 return xfrm6_prepare_output(x, skb); 462 break; 463 case XFRM_MODE_TRANSPORT: 464 if (x->props.family == AF_INET) 465 return xfrm4_transport_output(x, skb); 466 if (x->props.family == AF_INET6) 467 return xfrm6_transport_output(x, skb); 468 break; 469 case XFRM_MODE_ROUTEOPTIMIZATION: 470 if (x->props.family == AF_INET6) 471 return xfrm6_ro_output(x, skb); 472 WARN_ON_ONCE(1); 473 break; 474 default: 475 if (x->mode_cbs && x->mode_cbs->prepare_output) 476 return x->mode_cbs->prepare_output(x, skb); 477 WARN_ON_ONCE(1); 478 break; 479 } 480 481 return -EOPNOTSUPP; 482 } 483 484 #if IS_ENABLED(CONFIG_NET_PKTGEN) 485 int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 486 { 487 return xfrm_outer_mode_output(x, skb); 488 } 489 EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output); 490 #endif 491 492 static int xfrm_output_one(struct sk_buff *skb, int err) 493 { 494 struct dst_entry *dst = skb_dst(skb); 495 struct xfrm_state *x = dst->xfrm; 496 struct net *net = xs_net(x); 497 498 if (err <= 0 || x->xso.type == XFRM_DEV_OFFLOAD_PACKET) 499 goto resume; 500 501 do { 502 err = xfrm_skb_check_space(skb); 503 if (err) { 504 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 505 goto error_nolock; 506 } 507 508 skb->mark = xfrm_smark_get(skb->mark, x); 509 510 err = xfrm_outer_mode_output(x, skb); 511 if (err) { 512 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); 513 goto error_nolock; 514 } 515 516 spin_lock_bh(&x->lock); 517 518 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 519 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); 520 err = -EINVAL; 521 goto error; 522 } 523 524 err = xfrm_state_check_expire(x); 525 if (err) { 526 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); 527 goto error; 528 } 529 530 err = xfrm_replay_overflow(x, skb); 531 if (err) { 532 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); 533 goto error; 534 } 535 536 x->curlft.bytes += skb->len; 537 x->curlft.packets++; 538 x->lastused = ktime_get_real_seconds(); 539 540 spin_unlock_bh(&x->lock); 541 542 skb_dst_force(skb); 543 if (!skb_dst(skb)) { 544 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 545 err = -EHOSTUNREACH; 546 goto error_nolock; 547 } 548 549 if (xfrm_offload(skb)) { 550 x->type_offload->encap(x, skb); 551 } else { 552 /* Inner headers are invalid now. */ 553 skb->encapsulation = 0; 554 555 err = x->type->output(x, skb); 556 if (err == -EINPROGRESS) 557 goto out; 558 } 559 560 resume: 561 if (err) { 562 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); 563 goto error_nolock; 564 } 565 566 dst = skb_dst_pop(skb); 567 if (!dst) { 568 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 569 err = -EHOSTUNREACH; 570 goto error_nolock; 571 } 572 skb_dst_set(skb, dst); 573 x = dst->xfrm; 574 } while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)); 575 576 return 0; 577 578 error: 579 spin_unlock_bh(&x->lock); 580 error_nolock: 581 kfree_skb(skb); 582 out: 583 return err; 584 } 585 586 int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err) 587 { 588 struct net *net = xs_net(skb_dst(skb)->xfrm); 589 590 while (likely((err = xfrm_output_one(skb, err)) == 0)) { 591 nf_reset_ct(skb); 592 593 err = skb_dst(skb)->ops->local_out(net, sk, skb); 594 if (unlikely(err != 1)) 595 goto out; 596 597 if (!skb_dst(skb)->xfrm) 598 return dst_output(net, sk, skb); 599 600 err = nf_hook(skb_dst(skb)->ops->family, 601 NF_INET_POST_ROUTING, net, sk, skb, 602 NULL, skb_dst(skb)->dev, xfrm_output2); 603 if (unlikely(err != 1)) 604 goto out; 605 } 606 607 if (err == -EINPROGRESS) 608 err = 0; 609 610 out: 611 return err; 612 } 613 EXPORT_SYMBOL_GPL(xfrm_output_resume); 614 615 static int xfrm_dev_direct_output(struct sock *sk, struct xfrm_state *x, 616 struct sk_buff *skb) 617 { 618 struct dst_entry *dst = skb_dst(skb); 619 struct net *net = xs_net(x); 620 int err; 621 622 dst = skb_dst_pop(skb); 623 if (!dst) { 624 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 625 kfree_skb(skb); 626 return -EHOSTUNREACH; 627 } 628 skb_dst_set(skb, dst); 629 nf_reset_ct(skb); 630 631 err = skb_dst(skb)->ops->local_out(net, sk, skb); 632 if (unlikely(err != 1)) { 633 kfree_skb(skb); 634 return err; 635 } 636 637 /* In transport mode, network destination is 638 * directly reachable, while in tunnel mode, 639 * inner packet network may not be. In packet 640 * offload type, HW is responsible for hard 641 * header packet mangling so directly xmit skb 642 * to netdevice. 643 */ 644 skb->dev = x->xso.dev; 645 __skb_push(skb, skb->dev->hard_header_len); 646 return dev_queue_xmit(skb); 647 } 648 649 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 650 { 651 return xfrm_output_resume(sk, skb, 1); 652 } 653 654 static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) 655 { 656 struct sk_buff *segs, *nskb; 657 658 BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); 659 BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_GSO_CB_OFFSET); 660 segs = skb_gso_segment(skb, 0); 661 kfree_skb(skb); 662 if (IS_ERR(segs)) 663 return PTR_ERR(segs); 664 if (segs == NULL) 665 return -EINVAL; 666 667 skb_list_walk_safe(segs, segs, nskb) { 668 int err; 669 670 skb_mark_not_on_list(segs); 671 err = xfrm_output2(net, sk, segs); 672 673 if (unlikely(err)) { 674 kfree_skb_list(nskb); 675 return err; 676 } 677 } 678 679 return 0; 680 } 681 682 /* For partial checksum offload, the outer header checksum is calculated 683 * by software and the inner header checksum is calculated by hardware. 684 * This requires hardware to know the inner packet type to calculate 685 * the inner header checksum. Save inner ip protocol here to avoid 686 * traversing the packet in the vendor's xmit code. 687 * For IPsec tunnel mode save the ip protocol from the IP header of the 688 * plain text packet. Otherwise If the encap type is IPIP, just save 689 * skb->inner_ipproto in any other case get the ip protocol from the IP 690 * header. 691 */ 692 static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) 693 { 694 struct xfrm_offload *xo = xfrm_offload(skb); 695 const struct ethhdr *eth; 696 697 if (!xo) 698 return; 699 700 if (x->outer_mode.encap == XFRM_MODE_TUNNEL) { 701 switch (x->outer_mode.family) { 702 case AF_INET: 703 xo->inner_ipproto = ip_hdr(skb)->protocol; 704 break; 705 case AF_INET6: 706 xo->inner_ipproto = ipv6_hdr(skb)->nexthdr; 707 break; 708 default: 709 break; 710 } 711 712 return; 713 } 714 if (x->outer_mode.encap == XFRM_MODE_IPTFS) { 715 xo->inner_ipproto = IPPROTO_AGGFRAG; 716 return; 717 } 718 719 /* non-Tunnel Mode */ 720 if (!skb->encapsulation) 721 return; 722 723 if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) { 724 xo->inner_ipproto = skb->inner_ipproto; 725 return; 726 } 727 728 if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) 729 return; 730 731 eth = (struct ethhdr *)skb_inner_mac_header(skb); 732 733 switch (ntohs(eth->h_proto)) { 734 case ETH_P_IPV6: 735 xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr; 736 break; 737 case ETH_P_IP: 738 xo->inner_ipproto = inner_ip_hdr(skb)->protocol; 739 break; 740 } 741 } 742 743 int xfrm_output(struct sock *sk, struct sk_buff *skb) 744 { 745 struct net *net = dev_net(skb_dst(skb)->dev); 746 struct xfrm_state *x = skb_dst(skb)->xfrm; 747 int family; 748 int err; 749 750 family = (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ? x->outer_mode.family 751 : skb_dst(skb)->ops->family; 752 753 switch (family) { 754 case AF_INET: 755 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 756 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; 757 break; 758 case AF_INET6: 759 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); 760 761 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; 762 break; 763 } 764 765 if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) { 766 if (!xfrm_dev_offload_ok(skb, x)) { 767 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 768 kfree_skb(skb); 769 return -EHOSTUNREACH; 770 } 771 772 /* Exclusive direct xmit for tunnel mode, as 773 * some filtering or matching rules may apply 774 * in transport mode. 775 */ 776 if (x->props.mode == XFRM_MODE_TUNNEL) 777 return xfrm_dev_direct_output(sk, x, skb); 778 779 return xfrm_output_resume(sk, skb, 0); 780 } 781 782 secpath_reset(skb); 783 784 if (xfrm_dev_offload_ok(skb, x)) { 785 struct sec_path *sp; 786 787 sp = secpath_set(skb); 788 if (!sp) { 789 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 790 kfree_skb(skb); 791 return -ENOMEM; 792 } 793 794 sp->olen++; 795 sp->xvec[sp->len++] = x; 796 xfrm_state_hold(x); 797 798 xfrm_get_inner_ipproto(skb, x); 799 skb->encapsulation = 1; 800 801 if (skb_is_gso(skb)) { 802 if (skb->inner_protocol && x->props.mode == XFRM_MODE_TUNNEL) 803 return xfrm_output_gso(net, sk, skb); 804 805 skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; 806 goto out; 807 } 808 809 if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) 810 goto out; 811 } else { 812 if (skb_is_gso(skb)) 813 return xfrm_output_gso(net, sk, skb); 814 } 815 816 if (skb->ip_summed == CHECKSUM_PARTIAL) { 817 err = skb_checksum_help(skb); 818 if (err) { 819 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 820 kfree_skb(skb); 821 return err; 822 } 823 } 824 825 out: 826 return xfrm_output2(net, sk, skb); 827 } 828 EXPORT_SYMBOL_GPL(xfrm_output); 829 830 int xfrm4_tunnel_check_size(struct sk_buff *skb) 831 { 832 int mtu, ret = 0; 833 834 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) 835 goto out; 836 837 if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) 838 goto out; 839 840 mtu = dst_mtu(skb_dst(skb)); 841 if ((!skb_is_gso(skb) && skb->len > mtu) || 842 (skb_is_gso(skb) && 843 !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { 844 skb->protocol = htons(ETH_P_IP); 845 846 if (skb->sk && sk_fullsock(skb->sk)) 847 xfrm_local_error(skb, mtu); 848 else 849 icmp_send(skb, ICMP_DEST_UNREACH, 850 ICMP_FRAG_NEEDED, htonl(mtu)); 851 ret = -EMSGSIZE; 852 } 853 out: 854 return ret; 855 } 856 EXPORT_SYMBOL_GPL(xfrm4_tunnel_check_size); 857 858 static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) 859 { 860 int err; 861 862 if (x->outer_mode.encap == XFRM_MODE_BEET && 863 ip_is_fragment(ip_hdr(skb))) { 864 net_warn_ratelimited("BEET mode doesn't support inner IPv4 fragments\n"); 865 return -EAFNOSUPPORT; 866 } 867 868 err = xfrm4_tunnel_check_size(skb); 869 if (err) 870 return err; 871 872 XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; 873 874 xfrm4_extract_header(skb); 875 return 0; 876 } 877 878 #if IS_ENABLED(CONFIG_IPV6) 879 int xfrm6_tunnel_check_size(struct sk_buff *skb) 880 { 881 int mtu, ret = 0; 882 struct dst_entry *dst = skb_dst(skb); 883 struct sock *sk = skb_to_full_sk(skb); 884 885 if (skb->ignore_df) 886 goto out; 887 888 mtu = dst_mtu(dst); 889 if (mtu < IPV6_MIN_MTU) 890 mtu = IPV6_MIN_MTU; 891 892 if ((!skb_is_gso(skb) && skb->len > mtu) || 893 (skb_is_gso(skb) && 894 !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { 895 skb->dev = dst->dev; 896 skb->protocol = htons(ETH_P_IPV6); 897 898 if (xfrm6_local_dontfrag(sk)) 899 ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); 900 else if (sk) 901 xfrm_local_error(skb, mtu); 902 else 903 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 904 ret = -EMSGSIZE; 905 } 906 out: 907 return ret; 908 } 909 EXPORT_SYMBOL_GPL(xfrm6_tunnel_check_size); 910 #endif 911 912 static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) 913 { 914 #if IS_ENABLED(CONFIG_IPV6) 915 int err; 916 917 err = xfrm6_tunnel_check_size(skb); 918 if (err) 919 return err; 920 921 XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; 922 923 xfrm6_extract_header(skb); 924 return 0; 925 #else 926 WARN_ON_ONCE(1); 927 return -EAFNOSUPPORT; 928 #endif 929 } 930 931 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) 932 { 933 switch (skb->protocol) { 934 case htons(ETH_P_IP): 935 return xfrm4_extract_output(x, skb); 936 case htons(ETH_P_IPV6): 937 return xfrm6_extract_output(x, skb); 938 } 939 940 return -EAFNOSUPPORT; 941 } 942 943 void xfrm_local_error(struct sk_buff *skb, int mtu) 944 { 945 unsigned int proto; 946 struct xfrm_state_afinfo *afinfo; 947 948 if (skb->protocol == htons(ETH_P_IP)) 949 proto = AF_INET; 950 else if (skb->protocol == htons(ETH_P_IPV6) && 951 skb->sk->sk_family == AF_INET6) 952 proto = AF_INET6; 953 else 954 return; 955 956 afinfo = xfrm_state_get_afinfo(proto); 957 if (afinfo) { 958 afinfo->local_error(skb, mtu); 959 rcu_read_unlock(); 960 } 961 } 962 EXPORT_SYMBOL_GPL(xfrm_local_error); 963