1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPv6 output functions 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on linux/net/ipv4/ip_output.c 10 * 11 * Changes: 12 * A.N.Kuznetsov : airthmetics in fragmentation. 13 * extension headers are implemented. 14 * route changes now work. 15 * ip6_forward does not confuse sniffers. 16 * etc. 17 * 18 * H. von Brand : Added missing #include <linux/string.h> 19 * Imran Patel : frag id should be in NBO 20 * Kazunori MIYAZAWA @USAGI 21 * : add ip6_append_data and related functions 22 * for datagram xmit 23 */ 24 25 #include <linux/errno.h> 26 #include <linux/kernel.h> 27 #include <linux/string.h> 28 #include <linux/socket.h> 29 #include <linux/net.h> 30 #include <linux/netdevice.h> 31 #include <linux/if_arp.h> 32 #include <linux/in6.h> 33 #include <linux/tcp.h> 34 #include <linux/route.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 38 #include <linux/bpf-cgroup.h> 39 #include <linux/netfilter.h> 40 #include <linux/netfilter_ipv6.h> 41 42 #include <net/sock.h> 43 #include <net/snmp.h> 44 45 #include <net/gso.h> 46 #include <net/ipv6.h> 47 #include <net/ndisc.h> 48 #include <net/protocol.h> 49 #include <net/ip6_route.h> 50 #include <net/addrconf.h> 51 #include <net/rawv6.h> 52 #include <net/icmp.h> 53 #include <net/xfrm.h> 54 #include <net/checksum.h> 55 #include <linux/mroute6.h> 56 #include <net/l3mdev.h> 57 #include <net/lwtunnel.h> 58 #include <net/ip_tunnels.h> 59 60 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 61 { 62 struct dst_entry *dst = skb_dst(skb); 63 struct net_device *dev = dst_dev(dst); 64 struct inet6_dev *idev = ip6_dst_idev(dst); 65 unsigned int hh_len = LL_RESERVED_SPACE(dev); 66 const struct in6_addr *daddr, *nexthop; 67 struct ipv6hdr *hdr; 68 struct neighbour *neigh; 69 int ret; 70 71 /* Be paranoid, rather than too clever. */ 72 if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { 73 /* Make sure idev stays alive */ 74 rcu_read_lock(); 75 skb = skb_expand_head(skb, hh_len); 76 if (!skb) { 77 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 78 rcu_read_unlock(); 79 return -ENOMEM; 80 } 81 rcu_read_unlock(); 82 } 83 84 hdr = ipv6_hdr(skb); 85 daddr = &hdr->daddr; 86 if (ipv6_addr_is_multicast(daddr)) { 87 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 88 ((mroute6_is_socket(net, skb) && 89 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 90 ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { 91 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 92 93 /* Do not check for IFF_ALLMULTI; multicast routing 94 is not supported in any case. 95 */ 96 if (newskb) 97 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 98 net, sk, newskb, NULL, newskb->dev, 99 dev_loopback_xmit); 100 101 if (hdr->hop_limit == 0) { 102 IP6_INC_STATS(net, idev, 103 IPSTATS_MIB_OUTDISCARDS); 104 kfree_skb(skb); 105 return 0; 106 } 107 } 108 109 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); 110 if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && 111 !(dev->flags & IFF_LOOPBACK)) { 112 kfree_skb(skb); 113 return 0; 114 } 115 } 116 117 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 118 int res = lwtunnel_xmit(skb); 119 120 if (res != LWTUNNEL_XMIT_CONTINUE) 121 return res; 122 } 123 124 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 125 126 rcu_read_lock(); 127 nexthop = rt6_nexthop(dst_rt6_info(dst), daddr); 128 neigh = __ipv6_neigh_lookup_noref(dev, nexthop); 129 130 if (IS_ERR_OR_NULL(neigh)) { 131 if (unlikely(!neigh)) 132 neigh = __neigh_create(&nd_tbl, nexthop, dev, false); 133 if (IS_ERR(neigh)) { 134 rcu_read_unlock(); 135 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); 136 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); 137 return -EINVAL; 138 } 139 } 140 sock_confirm_neigh(skb, neigh); 141 ret = neigh_output(neigh, skb, false); 142 rcu_read_unlock(); 143 return ret; 144 } 145 146 static int 147 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, 148 struct sk_buff *skb, unsigned int mtu) 149 { 150 struct sk_buff *segs, *nskb; 151 netdev_features_t features; 152 int ret = 0; 153 154 /* Please see corresponding comment in ip_finish_output_gso 155 * describing the cases where GSO segment length exceeds the 156 * egress MTU. 157 */ 158 features = netif_skb_features(skb); 159 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 160 if (IS_ERR_OR_NULL(segs)) { 161 kfree_skb(skb); 162 return -ENOMEM; 163 } 164 165 consume_skb(skb); 166 167 skb_list_walk_safe(segs, segs, nskb) { 168 int err; 169 170 skb_mark_not_on_list(segs); 171 /* Last GSO segment can be smaller than gso_size (and MTU). 172 * Adding a fragment header would produce an "atomic fragment", 173 * which is considered harmful (RFC-8021). Avoid that. 174 */ 175 err = segs->len > mtu ? 176 ip6_fragment(net, sk, segs, ip6_finish_output2) : 177 ip6_finish_output2(net, sk, segs); 178 if (err && ret == 0) 179 ret = err; 180 } 181 182 return ret; 183 } 184 185 static int ip6_finish_output_gso(struct net *net, struct sock *sk, 186 struct sk_buff *skb, unsigned int mtu) 187 { 188 if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && 189 !skb_gso_validate_network_len(skb, mtu)) 190 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); 191 192 return ip6_finish_output2(net, sk, skb); 193 } 194 195 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 196 { 197 unsigned int mtu; 198 199 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 200 /* Policy lookup after SNAT yielded a new policy */ 201 if (skb_dst(skb)->xfrm) { 202 IP6CB(skb)->flags |= IP6SKB_REROUTED; 203 return dst_output(net, sk, skb); 204 } 205 #endif 206 207 mtu = ip6_skb_dst_mtu(skb); 208 if (skb_is_gso(skb)) 209 return ip6_finish_output_gso(net, sk, skb, mtu); 210 211 if (skb->len > mtu || 212 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 213 return ip6_fragment(net, sk, skb, ip6_finish_output2); 214 215 return ip6_finish_output2(net, sk, skb); 216 } 217 218 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 219 { 220 int ret; 221 222 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 223 switch (ret) { 224 case NET_XMIT_SUCCESS: 225 case NET_XMIT_CN: 226 return __ip6_finish_output(net, sk, skb) ? : ret; 227 default: 228 kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); 229 return ret; 230 } 231 } 232 233 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 234 { 235 struct dst_entry *dst = skb_dst(skb); 236 struct net_device *dev = dst_dev(dst), *indev = skb->dev; 237 struct inet6_dev *idev = ip6_dst_idev(dst); 238 239 skb->protocol = htons(ETH_P_IPV6); 240 skb->dev = dev; 241 242 if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) { 243 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 244 kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); 245 return 0; 246 } 247 248 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 249 net, sk, skb, indev, dev, 250 ip6_finish_output, 251 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 252 } 253 EXPORT_SYMBOL(ip6_output); 254 255 bool ip6_autoflowlabel(struct net *net, const struct sock *sk) 256 { 257 if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk)) 258 return ip6_default_np_autolabel(net); 259 return inet6_test_bit(AUTOFLOWLABEL, sk); 260 } 261 262 /* 263 * xmit an sk_buff (used by TCP and SCTP) 264 * Note : socket lock is not held for SYNACK packets, but might be modified 265 * by calls to skb_set_owner_w() and ipv6_local_error(), 266 * which are using proper atomic operations or spinlocks. 267 */ 268 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 269 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) 270 { 271 struct net *net = sock_net(sk); 272 const struct ipv6_pinfo *np = inet6_sk(sk); 273 struct in6_addr *first_hop = &fl6->daddr; 274 struct dst_entry *dst = skb_dst(skb); 275 struct net_device *dev = dst_dev(dst); 276 struct inet6_dev *idev = ip6_dst_idev(dst); 277 struct hop_jumbo_hdr *hop_jumbo; 278 int hoplen = sizeof(*hop_jumbo); 279 unsigned int head_room; 280 struct ipv6hdr *hdr; 281 u8 proto = fl6->flowi6_proto; 282 int seg_len = skb->len; 283 int hlimit = -1; 284 u32 mtu; 285 286 head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev); 287 if (opt) 288 head_room += opt->opt_nflen + opt->opt_flen; 289 290 if (unlikely(head_room > skb_headroom(skb))) { 291 /* Make sure idev stays alive */ 292 rcu_read_lock(); 293 skb = skb_expand_head(skb, head_room); 294 if (!skb) { 295 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 296 rcu_read_unlock(); 297 return -ENOBUFS; 298 } 299 rcu_read_unlock(); 300 } 301 302 if (opt) { 303 seg_len += opt->opt_nflen + opt->opt_flen; 304 305 if (opt->opt_flen) 306 ipv6_push_frag_opts(skb, opt, &proto); 307 308 if (opt->opt_nflen) 309 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, 310 &fl6->saddr); 311 } 312 313 if (unlikely(seg_len > IPV6_MAXPLEN)) { 314 hop_jumbo = skb_push(skb, hoplen); 315 316 hop_jumbo->nexthdr = proto; 317 hop_jumbo->hdrlen = 0; 318 hop_jumbo->tlv_type = IPV6_TLV_JUMBO; 319 hop_jumbo->tlv_len = 4; 320 hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen); 321 322 proto = IPPROTO_HOPOPTS; 323 seg_len = 0; 324 IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO; 325 } 326 327 skb_push(skb, sizeof(struct ipv6hdr)); 328 skb_reset_network_header(skb); 329 hdr = ipv6_hdr(skb); 330 331 /* 332 * Fill in the IPv6 header 333 */ 334 if (np) 335 hlimit = READ_ONCE(np->hop_limit); 336 if (hlimit < 0) 337 hlimit = ip6_dst_hoplimit(dst); 338 339 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 340 ip6_autoflowlabel(net, sk), fl6)); 341 342 hdr->payload_len = htons(seg_len); 343 hdr->nexthdr = proto; 344 hdr->hop_limit = hlimit; 345 346 hdr->saddr = fl6->saddr; 347 hdr->daddr = *first_hop; 348 349 skb->protocol = htons(ETH_P_IPV6); 350 skb->priority = priority; 351 skb->mark = mark; 352 353 mtu = dst_mtu(dst); 354 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 355 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); 356 357 /* if egress device is enslaved to an L3 master device pass the 358 * skb to its handler for processing 359 */ 360 skb = l3mdev_ip6_out((struct sock *)sk, skb); 361 if (unlikely(!skb)) 362 return 0; 363 364 /* hooks should never assume socket lock is held. 365 * we promote our socket to non const 366 */ 367 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 368 net, (struct sock *)sk, skb, NULL, dev, 369 dst_output); 370 } 371 372 skb->dev = dev; 373 /* ipv6_local_error() does not require socket lock, 374 * we promote our socket to non const 375 */ 376 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); 377 378 IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); 379 kfree_skb(skb); 380 return -EMSGSIZE; 381 } 382 EXPORT_SYMBOL(ip6_xmit); 383 384 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 385 { 386 struct ip6_ra_chain *ra; 387 struct sock *last = NULL; 388 389 read_lock(&ip6_ra_lock); 390 for (ra = ip6_ra_chain; ra; ra = ra->next) { 391 struct sock *sk = ra->sk; 392 if (sk && ra->sel == sel && 393 (!sk->sk_bound_dev_if || 394 sk->sk_bound_dev_if == skb->dev->ifindex)) { 395 396 if (inet6_test_bit(RTALERT_ISOLATE, sk) && 397 !net_eq(sock_net(sk), dev_net(skb->dev))) { 398 continue; 399 } 400 if (last) { 401 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 402 if (skb2) 403 rawv6_rcv(last, skb2); 404 } 405 last = sk; 406 } 407 } 408 409 if (last) { 410 rawv6_rcv(last, skb); 411 read_unlock(&ip6_ra_lock); 412 return 1; 413 } 414 read_unlock(&ip6_ra_lock); 415 return 0; 416 } 417 418 static int ip6_forward_proxy_check(struct sk_buff *skb) 419 { 420 struct ipv6hdr *hdr = ipv6_hdr(skb); 421 u8 nexthdr = hdr->nexthdr; 422 __be16 frag_off; 423 int offset; 424 425 if (ipv6_ext_hdr(nexthdr)) { 426 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 427 if (offset < 0) 428 return 0; 429 } else 430 offset = sizeof(struct ipv6hdr); 431 432 if (nexthdr == IPPROTO_ICMPV6) { 433 struct icmp6hdr *icmp6; 434 435 if (!pskb_may_pull(skb, (skb_network_header(skb) + 436 offset + 1 - skb->data))) 437 return 0; 438 439 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 440 441 switch (icmp6->icmp6_type) { 442 case NDISC_ROUTER_SOLICITATION: 443 case NDISC_ROUTER_ADVERTISEMENT: 444 case NDISC_NEIGHBOUR_SOLICITATION: 445 case NDISC_NEIGHBOUR_ADVERTISEMENT: 446 case NDISC_REDIRECT: 447 /* For reaction involving unicast neighbor discovery 448 * message destined to the proxied address, pass it to 449 * input function. 450 */ 451 return 1; 452 default: 453 break; 454 } 455 } 456 457 /* 458 * The proxying router can't forward traffic sent to a link-local 459 * address, so signal the sender and discard the packet. This 460 * behavior is clarified by the MIPv6 specification. 461 */ 462 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 463 dst_link_failure(skb); 464 return -1; 465 } 466 467 return 0; 468 } 469 470 static inline int ip6_forward_finish(struct net *net, struct sock *sk, 471 struct sk_buff *skb) 472 { 473 #ifdef CONFIG_NET_SWITCHDEV 474 if (skb->offload_l3_fwd_mark) { 475 consume_skb(skb); 476 return 0; 477 } 478 #endif 479 480 skb_clear_tstamp(skb); 481 return dst_output(net, sk, skb); 482 } 483 484 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 485 { 486 if (skb->len <= mtu) 487 return false; 488 489 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 490 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 491 return true; 492 493 if (skb->ignore_df) 494 return false; 495 496 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 497 return false; 498 499 return true; 500 } 501 502 int ip6_forward(struct sk_buff *skb) 503 { 504 struct dst_entry *dst = skb_dst(skb); 505 struct ipv6hdr *hdr = ipv6_hdr(skb); 506 struct inet6_skb_parm *opt = IP6CB(skb); 507 struct net *net = dev_net(dst_dev(dst)); 508 struct net_device *dev; 509 struct inet6_dev *idev; 510 SKB_DR(reason); 511 u32 mtu; 512 513 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); 514 if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) 515 goto error; 516 517 if (skb->pkt_type != PACKET_HOST) 518 goto drop; 519 520 if (unlikely(skb->sk)) 521 goto drop; 522 523 if (skb_warn_if_lro(skb)) 524 goto drop; 525 526 if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) && 527 (!idev || !READ_ONCE(idev->cnf.disable_policy)) && 528 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 529 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 530 goto drop; 531 } 532 533 skb_forward_csum(skb); 534 535 /* 536 * We DO NOT make any processing on 537 * RA packets, pushing them to user level AS IS 538 * without ane WARRANTY that application will be able 539 * to interpret them. The reason is that we 540 * cannot make anything clever here. 541 * 542 * We are not end-node, so that if packet contains 543 * AH/ESP, we cannot make anything. 544 * Defragmentation also would be mistake, RA packets 545 * cannot be fragmented, because there is no warranty 546 * that different fragments will go along one path. --ANK 547 */ 548 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 549 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 550 return 0; 551 } 552 553 /* 554 * check and decrement ttl 555 */ 556 if (hdr->hop_limit <= 1) { 557 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 558 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); 559 560 kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); 561 return -ETIMEDOUT; 562 } 563 564 /* XXX: idev->cnf.proxy_ndp? */ 565 if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && 566 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 567 int proxied = ip6_forward_proxy_check(skb); 568 if (proxied > 0) { 569 /* It's tempting to decrease the hop limit 570 * here by 1, as we do at the end of the 571 * function too. 572 * 573 * But that would be incorrect, as proxying is 574 * not forwarding. The ip6_input function 575 * will handle this packet locally, and it 576 * depends on the hop limit being unchanged. 577 * 578 * One example is the NDP hop limit, that 579 * always has to stay 255, but other would be 580 * similar checks around RA packets, where the 581 * user can even change the desired limit. 582 */ 583 return ip6_input(skb); 584 } else if (proxied < 0) { 585 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 586 goto drop; 587 } 588 } 589 590 if (!xfrm6_route_forward(skb)) { 591 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 592 SKB_DR_SET(reason, XFRM_POLICY); 593 goto drop; 594 } 595 dst = skb_dst(skb); 596 dev = dst_dev(dst); 597 /* IPv6 specs say nothing about it, but it is clear that we cannot 598 send redirects to source routed frames. 599 We don't send redirects to frames decapsulated from IPsec. 600 */ 601 if (IP6CB(skb)->iif == dev->ifindex && 602 opt->srcrt == 0 && !skb_sec_path(skb)) { 603 struct in6_addr *target = NULL; 604 struct inet_peer *peer; 605 struct rt6_info *rt; 606 607 /* 608 * incoming and outgoing devices are the same 609 * send a redirect. 610 */ 611 612 rt = dst_rt6_info(dst); 613 if (rt->rt6i_flags & RTF_GATEWAY) 614 target = &rt->rt6i_gateway; 615 else 616 target = &hdr->daddr; 617 618 rcu_read_lock(); 619 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr); 620 621 /* Limit redirects both by destination (here) 622 and by source (inside ndisc_send_redirect) 623 */ 624 if (inet_peer_xrlim_allow(peer, 1*HZ)) 625 ndisc_send_redirect(skb, target); 626 rcu_read_unlock(); 627 } else { 628 int addrtype = ipv6_addr_type(&hdr->saddr); 629 630 /* This check is security critical. */ 631 if (addrtype == IPV6_ADDR_ANY || 632 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 633 goto error; 634 if (addrtype & IPV6_ADDR_LINKLOCAL) { 635 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 636 ICMPV6_NOT_NEIGHBOUR, 0); 637 goto error; 638 } 639 } 640 641 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 642 643 mtu = ip6_dst_mtu_maybe_forward(dst, true); 644 if (mtu < IPV6_MIN_MTU) 645 mtu = IPV6_MIN_MTU; 646 647 if (ip6_pkt_too_big(skb, mtu)) { 648 /* Again, force OUTPUT device used as source address */ 649 skb->dev = dev; 650 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 651 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); 652 __IP6_INC_STATS(net, ip6_dst_idev(dst), 653 IPSTATS_MIB_FRAGFAILS); 654 kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); 655 return -EMSGSIZE; 656 } 657 658 if (skb_cow(skb, dev->hard_header_len)) { 659 __IP6_INC_STATS(net, ip6_dst_idev(dst), 660 IPSTATS_MIB_OUTDISCARDS); 661 goto drop; 662 } 663 664 hdr = ipv6_hdr(skb); 665 666 /* Mangling hops number delayed to point after skb COW */ 667 668 hdr->hop_limit--; 669 670 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 671 net, NULL, skb, skb->dev, dev, 672 ip6_forward_finish); 673 674 error: 675 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 676 SKB_DR_SET(reason, IP_INADDRERRORS); 677 drop: 678 kfree_skb_reason(skb, reason); 679 return -EINVAL; 680 } 681 682 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 683 { 684 to->pkt_type = from->pkt_type; 685 to->priority = from->priority; 686 to->protocol = from->protocol; 687 skb_dst_drop(to); 688 skb_dst_set(to, dst_clone(skb_dst(from))); 689 to->dev = from->dev; 690 to->mark = from->mark; 691 692 skb_copy_hash(to, from); 693 694 #ifdef CONFIG_NET_SCHED 695 to->tc_index = from->tc_index; 696 #endif 697 nf_copy(to, from); 698 skb_ext_copy(to, from); 699 skb_copy_secmark(to, from); 700 } 701 702 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, 703 u8 nexthdr, __be32 frag_id, 704 struct ip6_fraglist_iter *iter) 705 { 706 unsigned int first_len; 707 struct frag_hdr *fh; 708 709 /* BUILD HEADER */ 710 *prevhdr = NEXTHDR_FRAGMENT; 711 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 712 if (!iter->tmp_hdr) 713 return -ENOMEM; 714 715 iter->frag = skb_shinfo(skb)->frag_list; 716 skb_frag_list_init(skb); 717 718 iter->offset = 0; 719 iter->hlen = hlen; 720 iter->frag_id = frag_id; 721 iter->nexthdr = nexthdr; 722 723 __skb_pull(skb, hlen); 724 fh = __skb_push(skb, sizeof(struct frag_hdr)); 725 __skb_push(skb, hlen); 726 skb_reset_network_header(skb); 727 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); 728 729 fh->nexthdr = nexthdr; 730 fh->reserved = 0; 731 fh->frag_off = htons(IP6_MF); 732 fh->identification = frag_id; 733 734 first_len = skb_pagelen(skb); 735 skb->data_len = first_len - skb_headlen(skb); 736 skb->len = first_len; 737 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 738 739 return 0; 740 } 741 EXPORT_SYMBOL(ip6_fraglist_init); 742 743 void ip6_fraglist_prepare(struct sk_buff *skb, 744 struct ip6_fraglist_iter *iter) 745 { 746 struct sk_buff *frag = iter->frag; 747 unsigned int hlen = iter->hlen; 748 struct frag_hdr *fh; 749 750 frag->ip_summed = CHECKSUM_NONE; 751 skb_reset_transport_header(frag); 752 fh = __skb_push(frag, sizeof(struct frag_hdr)); 753 __skb_push(frag, hlen); 754 skb_reset_network_header(frag); 755 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); 756 iter->offset += skb->len - hlen - sizeof(struct frag_hdr); 757 fh->nexthdr = iter->nexthdr; 758 fh->reserved = 0; 759 fh->frag_off = htons(iter->offset); 760 if (frag->next) 761 fh->frag_off |= htons(IP6_MF); 762 fh->identification = iter->frag_id; 763 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 764 ip6_copy_metadata(frag, skb); 765 } 766 EXPORT_SYMBOL(ip6_fraglist_prepare); 767 768 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, 769 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, 770 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) 771 { 772 state->prevhdr = prevhdr; 773 state->nexthdr = nexthdr; 774 state->frag_id = frag_id; 775 776 state->hlen = hlen; 777 state->mtu = mtu; 778 779 state->left = skb->len - hlen; /* Space per frame */ 780 state->ptr = hlen; /* Where to start from */ 781 782 state->hroom = hdr_room; 783 state->troom = needed_tailroom; 784 785 state->offset = 0; 786 } 787 EXPORT_SYMBOL(ip6_frag_init); 788 789 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) 790 { 791 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; 792 struct sk_buff *frag; 793 struct frag_hdr *fh; 794 unsigned int len; 795 796 len = state->left; 797 /* IF: it doesn't fit, use 'mtu' - the data space left */ 798 if (len > state->mtu) 799 len = state->mtu; 800 /* IF: we are not sending up to and including the packet end 801 then align the next start on an eight byte boundary */ 802 if (len < state->left) 803 len &= ~7; 804 805 /* Allocate buffer */ 806 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + 807 state->hroom + state->troom, GFP_ATOMIC); 808 if (!frag) 809 return ERR_PTR(-ENOMEM); 810 811 /* 812 * Set up data on packet 813 */ 814 815 ip6_copy_metadata(frag, skb); 816 skb_reserve(frag, state->hroom); 817 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); 818 skb_reset_network_header(frag); 819 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); 820 frag->transport_header = (frag->network_header + state->hlen + 821 sizeof(struct frag_hdr)); 822 823 /* 824 * Charge the memory for the fragment to any owner 825 * it might possess 826 */ 827 if (skb->sk) 828 skb_set_owner_w(frag, skb->sk); 829 830 /* 831 * Copy the packet header into the new buffer. 832 */ 833 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); 834 835 fragnexthdr_offset = skb_network_header(frag); 836 fragnexthdr_offset += prevhdr - skb_network_header(skb); 837 *fragnexthdr_offset = NEXTHDR_FRAGMENT; 838 839 /* 840 * Build fragment header. 841 */ 842 fh->nexthdr = state->nexthdr; 843 fh->reserved = 0; 844 fh->identification = state->frag_id; 845 846 /* 847 * Copy a block of the IP datagram. 848 */ 849 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), 850 len)); 851 state->left -= len; 852 853 fh->frag_off = htons(state->offset); 854 if (state->left > 0) 855 fh->frag_off |= htons(IP6_MF); 856 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 857 858 state->ptr += len; 859 state->offset += len; 860 861 return frag; 862 } 863 EXPORT_SYMBOL(ip6_frag_next); 864 865 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 866 int (*output)(struct net *, struct sock *, struct sk_buff *)) 867 { 868 struct sk_buff *frag; 869 struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); 870 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 871 inet6_sk(skb->sk) : NULL; 872 u8 tstamp_type = skb->tstamp_type; 873 struct ip6_frag_state state; 874 unsigned int mtu, hlen, nexthdr_offset; 875 ktime_t tstamp = skb->tstamp; 876 int hroom, err = 0; 877 __be32 frag_id; 878 u8 *prevhdr, nexthdr = 0; 879 880 err = ip6_find_1stfragopt(skb, &prevhdr); 881 if (err < 0) 882 goto fail; 883 hlen = err; 884 nexthdr = *prevhdr; 885 nexthdr_offset = prevhdr - skb_network_header(skb); 886 887 mtu = ip6_skb_dst_mtu(skb); 888 889 /* We must not fragment if the socket is set to force MTU discovery 890 * or if the skb it not generated by a local socket. 891 */ 892 if (unlikely(!skb->ignore_df && skb->len > mtu)) 893 goto fail_toobig; 894 895 if (IP6CB(skb)->frag_max_size) { 896 if (IP6CB(skb)->frag_max_size > mtu) 897 goto fail_toobig; 898 899 /* don't send fragments larger than what we received */ 900 mtu = IP6CB(skb)->frag_max_size; 901 if (mtu < IPV6_MIN_MTU) 902 mtu = IPV6_MIN_MTU; 903 } 904 905 if (np) { 906 u32 frag_size = READ_ONCE(np->frag_size); 907 908 if (frag_size && frag_size < mtu) 909 mtu = frag_size; 910 } 911 if (mtu < hlen + sizeof(struct frag_hdr) + 8) 912 goto fail_toobig; 913 mtu -= hlen + sizeof(struct frag_hdr); 914 915 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 916 &ipv6_hdr(skb)->saddr); 917 918 if (skb->ip_summed == CHECKSUM_PARTIAL && 919 (err = skb_checksum_help(skb))) 920 goto fail; 921 922 prevhdr = skb_network_header(skb) + nexthdr_offset; 923 hroom = LL_RESERVED_SPACE(rt->dst.dev); 924 if (skb_has_frag_list(skb)) { 925 unsigned int first_len = skb_pagelen(skb); 926 struct ip6_fraglist_iter iter; 927 struct sk_buff *frag2; 928 929 if (first_len - hlen > mtu || 930 ((first_len - hlen) & 7) || 931 skb_cloned(skb) || 932 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 933 goto slow_path; 934 935 skb_walk_frags(skb, frag) { 936 /* Correct geometry. */ 937 if (frag->len > mtu || 938 ((frag->len & 7) && frag->next) || 939 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) 940 goto slow_path_clean; 941 942 /* Partially cloned skb? */ 943 if (skb_shared(frag)) 944 goto slow_path_clean; 945 946 BUG_ON(frag->sk); 947 if (skb->sk) { 948 frag->sk = skb->sk; 949 frag->destructor = sock_wfree; 950 } 951 skb->truesize -= frag->truesize; 952 } 953 954 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, 955 &iter); 956 if (err < 0) 957 goto fail; 958 959 /* We prevent @rt from being freed. */ 960 rcu_read_lock(); 961 962 for (;;) { 963 /* Prepare header of the next frame, 964 * before previous one went down. */ 965 if (iter.frag) 966 ip6_fraglist_prepare(skb, &iter); 967 968 skb_set_delivery_time(skb, tstamp, tstamp_type); 969 err = output(net, sk, skb); 970 if (!err) 971 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 972 IPSTATS_MIB_FRAGCREATES); 973 974 if (err || !iter.frag) 975 break; 976 977 skb = ip6_fraglist_next(&iter); 978 } 979 980 kfree(iter.tmp_hdr); 981 982 if (err == 0) { 983 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 984 IPSTATS_MIB_FRAGOKS); 985 rcu_read_unlock(); 986 return 0; 987 } 988 989 kfree_skb_list(iter.frag); 990 991 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 992 IPSTATS_MIB_FRAGFAILS); 993 rcu_read_unlock(); 994 return err; 995 996 slow_path_clean: 997 skb_walk_frags(skb, frag2) { 998 if (frag2 == frag) 999 break; 1000 frag2->sk = NULL; 1001 frag2->destructor = NULL; 1002 skb->truesize += frag2->truesize; 1003 } 1004 } 1005 1006 slow_path: 1007 /* 1008 * Fragment the datagram. 1009 */ 1010 1011 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, 1012 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, 1013 &state); 1014 1015 /* 1016 * Keep copying data until we run out. 1017 */ 1018 1019 while (state.left > 0) { 1020 frag = ip6_frag_next(skb, &state); 1021 if (IS_ERR(frag)) { 1022 err = PTR_ERR(frag); 1023 goto fail; 1024 } 1025 1026 /* 1027 * Put this fragment into the sending queue. 1028 */ 1029 skb_set_delivery_time(frag, tstamp, tstamp_type); 1030 err = output(net, sk, frag); 1031 if (err) 1032 goto fail; 1033 1034 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1035 IPSTATS_MIB_FRAGCREATES); 1036 } 1037 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1038 IPSTATS_MIB_FRAGOKS); 1039 consume_skb(skb); 1040 return err; 1041 1042 fail_toobig: 1043 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1044 err = -EMSGSIZE; 1045 1046 fail: 1047 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1048 IPSTATS_MIB_FRAGFAILS); 1049 kfree_skb(skb); 1050 return err; 1051 } 1052 1053 static inline int ip6_rt_check(const struct rt6key *rt_key, 1054 const struct in6_addr *fl_addr, 1055 const struct in6_addr *addr_cache) 1056 { 1057 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 1058 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 1059 } 1060 1061 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 1062 struct dst_entry *dst, 1063 const struct flowi6 *fl6) 1064 { 1065 struct ipv6_pinfo *np = inet6_sk(sk); 1066 struct rt6_info *rt; 1067 1068 if (!dst) 1069 goto out; 1070 1071 if (dst->ops->family != AF_INET6) { 1072 dst_release(dst); 1073 return NULL; 1074 } 1075 1076 rt = dst_rt6_info(dst); 1077 /* Yes, checking route validity in not connected 1078 * case is not very simple. Take into account, 1079 * that we do not support routing by source, TOS, 1080 * and MSG_DONTROUTE --ANK (980726) 1081 * 1082 * 1. ip6_rt_check(): If route was host route, 1083 * check that cached destination is current. 1084 * If it is network route, we still may 1085 * check its validity using saved pointer 1086 * to the last used address: daddr_cache. 1087 * We do not want to save whole address now, 1088 * (because main consumer of this service 1089 * is tcp, which has not this problem), 1090 * so that the last trick works only on connected 1091 * sockets. 1092 * 2. oif also should be the same. 1093 */ 1094 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 1095 #ifdef CONFIG_IPV6_SUBTREES 1096 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 1097 #endif 1098 (fl6->flowi6_oif && fl6->flowi6_oif != dst_dev(dst)->ifindex)) { 1099 dst_release(dst); 1100 dst = NULL; 1101 } 1102 1103 out: 1104 return dst; 1105 } 1106 1107 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, 1108 struct dst_entry **dst, struct flowi6 *fl6) 1109 { 1110 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1111 struct neighbour *n; 1112 struct rt6_info *rt; 1113 #endif 1114 int err; 1115 int flags = 0; 1116 1117 /* The correct way to handle this would be to do 1118 * ip6_route_get_saddr, and then ip6_route_output; however, 1119 * the route-specific preferred source forces the 1120 * ip6_route_output call _before_ ip6_route_get_saddr. 1121 * 1122 * In source specific routing (no src=any default route), 1123 * ip6_route_output will fail given src=any saddr, though, so 1124 * that's why we try it again later. 1125 */ 1126 if (ipv6_addr_any(&fl6->saddr)) { 1127 struct fib6_info *from; 1128 struct rt6_info *rt; 1129 1130 *dst = ip6_route_output(net, sk, fl6); 1131 rt = (*dst)->error ? NULL : dst_rt6_info(*dst); 1132 1133 rcu_read_lock(); 1134 from = rt ? rcu_dereference(rt->from) : NULL; 1135 err = ip6_route_get_saddr(net, from, &fl6->daddr, 1136 sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, 1137 fl6->flowi6_l3mdev, 1138 &fl6->saddr); 1139 rcu_read_unlock(); 1140 1141 if (err) 1142 goto out_err_release; 1143 1144 /* If we had an erroneous initial result, pretend it 1145 * never existed and let the SA-enabled version take 1146 * over. 1147 */ 1148 if ((*dst)->error) { 1149 dst_release(*dst); 1150 *dst = NULL; 1151 } 1152 1153 if (fl6->flowi6_oif) 1154 flags |= RT6_LOOKUP_F_IFACE; 1155 } 1156 1157 if (!*dst) 1158 *dst = ip6_route_output_flags(net, sk, fl6, flags); 1159 1160 err = (*dst)->error; 1161 if (err) 1162 goto out_err_release; 1163 1164 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1165 /* 1166 * Here if the dst entry we've looked up 1167 * has a neighbour entry that is in the INCOMPLETE 1168 * state and the src address from the flow is 1169 * marked as OPTIMISTIC, we release the found 1170 * dst entry and replace it instead with the 1171 * dst entry of the nexthop router 1172 */ 1173 rt = dst_rt6_info(*dst); 1174 rcu_read_lock(); 1175 n = __ipv6_neigh_lookup_noref(rt->dst.dev, 1176 rt6_nexthop(rt, &fl6->daddr)); 1177 err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0; 1178 rcu_read_unlock(); 1179 1180 if (err) { 1181 struct inet6_ifaddr *ifp; 1182 struct flowi6 fl_gw6; 1183 int redirect; 1184 1185 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 1186 (*dst)->dev, 1); 1187 1188 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 1189 if (ifp) 1190 in6_ifa_put(ifp); 1191 1192 if (redirect) { 1193 /* 1194 * We need to get the dst entry for the 1195 * default router instead 1196 */ 1197 dst_release(*dst); 1198 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 1199 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 1200 *dst = ip6_route_output(net, sk, &fl_gw6); 1201 err = (*dst)->error; 1202 if (err) 1203 goto out_err_release; 1204 } 1205 } 1206 #endif 1207 if (ipv6_addr_v4mapped(&fl6->saddr) && 1208 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { 1209 err = -EAFNOSUPPORT; 1210 goto out_err_release; 1211 } 1212 1213 return 0; 1214 1215 out_err_release: 1216 dst_release(*dst); 1217 *dst = NULL; 1218 1219 if (err == -ENETUNREACH) 1220 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1221 return err; 1222 } 1223 1224 /** 1225 * ip6_dst_lookup - perform route lookup on flow 1226 * @net: Network namespace to perform lookup in 1227 * @sk: socket which provides route info 1228 * @dst: pointer to dst_entry * for result 1229 * @fl6: flow to lookup 1230 * 1231 * This function performs a route lookup on the given flow. 1232 * 1233 * It returns zero on success, or a standard errno code on error. 1234 */ 1235 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 1236 struct flowi6 *fl6) 1237 { 1238 *dst = NULL; 1239 return ip6_dst_lookup_tail(net, sk, dst, fl6); 1240 } 1241 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1242 1243 /** 1244 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1245 * @net: Network namespace to perform lookup in 1246 * @sk: socket which provides route info 1247 * @fl6: flow to lookup 1248 * @final_dst: final destination address for ipsec lookup 1249 * 1250 * This function performs a route lookup on the given flow. 1251 * 1252 * It returns a valid dst pointer on success, or a pointer encoded 1253 * error code. 1254 */ 1255 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, 1256 const struct in6_addr *final_dst) 1257 { 1258 struct dst_entry *dst = NULL; 1259 int err; 1260 1261 err = ip6_dst_lookup_tail(net, sk, &dst, fl6); 1262 if (err) 1263 return ERR_PTR(err); 1264 if (final_dst) 1265 fl6->daddr = *final_dst; 1266 1267 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); 1268 } 1269 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1270 1271 /** 1272 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1273 * @sk: socket which provides the dst cache and route info 1274 * @fl6: flow to lookup 1275 * @final_dst: final destination address for ipsec lookup 1276 * @connected: whether @sk is connected or not 1277 * 1278 * This function performs a route lookup on the given flow with the 1279 * possibility of using the cached route in the socket if it is valid. 1280 * It will take the socket dst lock when operating on the dst cache. 1281 * As a result, this function can only be used in process context. 1282 * 1283 * In addition, for a connected socket, cache the dst in the socket 1284 * if the current cache is not valid. 1285 * 1286 * It returns a valid dst pointer on success, or a pointer encoded 1287 * error code. 1288 */ 1289 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1290 const struct in6_addr *final_dst, 1291 bool connected) 1292 { 1293 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1294 1295 dst = ip6_sk_dst_check(sk, dst, fl6); 1296 if (dst) 1297 return dst; 1298 1299 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); 1300 if (connected && !IS_ERR(dst)) 1301 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); 1302 1303 return dst; 1304 } 1305 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1306 1307 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1308 gfp_t gfp) 1309 { 1310 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1311 } 1312 1313 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1314 gfp_t gfp) 1315 { 1316 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1317 } 1318 1319 static void ip6_append_data_mtu(unsigned int *mtu, 1320 int *maxfraglen, 1321 unsigned int fragheaderlen, 1322 struct sk_buff *skb, 1323 struct rt6_info *rt, 1324 unsigned int orig_mtu) 1325 { 1326 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1327 if (!skb) { 1328 /* first fragment, reserve header_len */ 1329 *mtu = orig_mtu - rt->dst.header_len; 1330 1331 } else { 1332 /* 1333 * this fragment is not first, the headers 1334 * space is regarded as data space. 1335 */ 1336 *mtu = orig_mtu; 1337 } 1338 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1339 + fragheaderlen - sizeof(struct frag_hdr); 1340 } 1341 } 1342 1343 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1344 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, 1345 struct rt6_info *rt) 1346 { 1347 struct ipv6_pinfo *np = inet6_sk(sk); 1348 unsigned int mtu, frag_size; 1349 struct ipv6_txoptions *nopt, *opt = ipc6->opt; 1350 1351 /* callers pass dst together with a reference, set it first so 1352 * ip6_cork_release() can put it down even in case of an error. 1353 */ 1354 cork->base.dst = &rt->dst; 1355 1356 /* 1357 * setup for corking 1358 */ 1359 if (opt) { 1360 if (WARN_ON(v6_cork->opt)) 1361 return -EINVAL; 1362 1363 nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); 1364 if (unlikely(!nopt)) 1365 return -ENOBUFS; 1366 1367 nopt->tot_len = sizeof(*opt); 1368 nopt->opt_flen = opt->opt_flen; 1369 nopt->opt_nflen = opt->opt_nflen; 1370 1371 nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); 1372 if (opt->dst0opt && !nopt->dst0opt) 1373 return -ENOBUFS; 1374 1375 nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); 1376 if (opt->dst1opt && !nopt->dst1opt) 1377 return -ENOBUFS; 1378 1379 nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); 1380 if (opt->hopopt && !nopt->hopopt) 1381 return -ENOBUFS; 1382 1383 nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); 1384 if (opt->srcrt && !nopt->srcrt) 1385 return -ENOBUFS; 1386 1387 /* need source address above miyazawa*/ 1388 } 1389 v6_cork->hop_limit = ipc6->hlimit; 1390 v6_cork->tclass = ipc6->tclass; 1391 v6_cork->dontfrag = ipc6->dontfrag; 1392 if (rt->dst.flags & DST_XFRM_TUNNEL) 1393 mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? 1394 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 1395 else 1396 mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? 1397 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); 1398 1399 frag_size = READ_ONCE(np->frag_size); 1400 if (frag_size && frag_size < mtu) 1401 mtu = frag_size; 1402 1403 cork->base.fragsize = mtu; 1404 cork->base.gso_size = ipc6->gso_size; 1405 cork->base.tx_flags = 0; 1406 cork->base.mark = ipc6->sockc.mark; 1407 cork->base.priority = ipc6->sockc.priority; 1408 sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags); 1409 if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { 1410 cork->base.flags |= IPCORK_TS_OPT_ID; 1411 cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; 1412 } 1413 cork->base.length = 0; 1414 cork->base.transmit_time = ipc6->sockc.transmit_time; 1415 1416 return 0; 1417 } 1418 1419 static int __ip6_append_data(struct sock *sk, 1420 struct sk_buff_head *queue, 1421 struct inet_cork_full *cork_full, 1422 struct inet6_cork *v6_cork, 1423 struct page_frag *pfrag, 1424 int getfrag(void *from, char *to, int offset, 1425 int len, int odd, struct sk_buff *skb), 1426 void *from, size_t length, int transhdrlen, 1427 unsigned int flags) 1428 { 1429 struct sk_buff *skb, *skb_prev = NULL; 1430 struct inet_cork *cork = &cork_full->base; 1431 struct flowi6 *fl6 = &cork_full->fl.u.ip6; 1432 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; 1433 struct ubuf_info *uarg = NULL; 1434 int exthdrlen = 0; 1435 int dst_exthdrlen = 0; 1436 int hh_len; 1437 int copy; 1438 int err; 1439 int offset = 0; 1440 bool zc = false; 1441 u32 tskey = 0; 1442 struct rt6_info *rt = dst_rt6_info(cork->dst); 1443 bool paged, hold_tskey = false, extra_uref = false; 1444 struct ipv6_txoptions *opt = v6_cork->opt; 1445 int csummode = CHECKSUM_NONE; 1446 unsigned int maxnonfragsize, headersize; 1447 unsigned int wmem_alloc_delta = 0; 1448 1449 skb = skb_peek_tail(queue); 1450 if (!skb) { 1451 exthdrlen = opt ? opt->opt_flen : 0; 1452 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1453 } 1454 1455 paged = !!cork->gso_size; 1456 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; 1457 orig_mtu = mtu; 1458 1459 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1460 1461 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1462 (opt ? opt->opt_nflen : 0); 1463 1464 headersize = sizeof(struct ipv6hdr) + 1465 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1466 rt->rt6i_nfheader_len; 1467 1468 if (mtu <= fragheaderlen || 1469 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) 1470 goto emsgsize; 1471 1472 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1473 sizeof(struct frag_hdr); 1474 1475 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit 1476 * the first fragment 1477 */ 1478 if (headersize + transhdrlen > mtu) 1479 goto emsgsize; 1480 1481 if (cork->length + length > mtu - headersize && v6_cork->dontfrag && 1482 (sk->sk_protocol == IPPROTO_UDP || 1483 sk->sk_protocol == IPPROTO_ICMPV6 || 1484 sk->sk_protocol == IPPROTO_RAW)) { 1485 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1486 sizeof(struct ipv6hdr)); 1487 goto emsgsize; 1488 } 1489 1490 if (ip6_sk_ignore_df(sk)) 1491 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1492 else 1493 maxnonfragsize = mtu; 1494 1495 if (cork->length + length > maxnonfragsize - headersize) { 1496 emsgsize: 1497 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); 1498 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); 1499 return -EMSGSIZE; 1500 } 1501 1502 /* CHECKSUM_PARTIAL only with no extension headers and when 1503 * we are not going to fragment 1504 */ 1505 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && 1506 headersize == sizeof(struct ipv6hdr) && 1507 length <= mtu - headersize && 1508 (!(flags & MSG_MORE) || cork->gso_size) && 1509 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 1510 csummode = CHECKSUM_PARTIAL; 1511 1512 if ((flags & MSG_ZEROCOPY) && length) { 1513 struct msghdr *msg = from; 1514 1515 if (getfrag == ip_generic_getfrag && msg->msg_ubuf) { 1516 if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb)) 1517 return -EINVAL; 1518 1519 /* Leave uarg NULL if can't zerocopy, callers should 1520 * be able to handle it. 1521 */ 1522 if ((rt->dst.dev->features & NETIF_F_SG) && 1523 csummode == CHECKSUM_PARTIAL) { 1524 paged = true; 1525 zc = true; 1526 uarg = msg->msg_ubuf; 1527 } 1528 } else if (sock_flag(sk, SOCK_ZEROCOPY)) { 1529 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb), 1530 false); 1531 if (!uarg) 1532 return -ENOBUFS; 1533 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 1534 if (rt->dst.dev->features & NETIF_F_SG && 1535 csummode == CHECKSUM_PARTIAL) { 1536 paged = true; 1537 zc = true; 1538 } else { 1539 uarg_to_msgzc(uarg)->zerocopy = 0; 1540 skb_zcopy_set(skb, uarg, &extra_uref); 1541 } 1542 } 1543 } else if ((flags & MSG_SPLICE_PAGES) && length) { 1544 if (inet_test_bit(HDRINCL, sk)) 1545 return -EPERM; 1546 if (rt->dst.dev->features & NETIF_F_SG && 1547 getfrag == ip_generic_getfrag) 1548 /* We need an empty buffer to attach stuff to */ 1549 paged = true; 1550 else 1551 flags &= ~MSG_SPLICE_PAGES; 1552 } 1553 1554 if (cork->tx_flags & SKBTX_ANY_TSTAMP && 1555 READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { 1556 if (cork->flags & IPCORK_TS_OPT_ID) { 1557 tskey = cork->ts_opt_id; 1558 } else { 1559 tskey = atomic_inc_return(&sk->sk_tskey) - 1; 1560 hold_tskey = true; 1561 } 1562 } 1563 1564 /* 1565 * Let's try using as much space as possible. 1566 * Use MTU if total length of the message fits into the MTU. 1567 * Otherwise, we need to reserve fragment header and 1568 * fragment alignment (= 8-15 octects, in total). 1569 * 1570 * Note that we may need to "move" the data from the tail 1571 * of the buffer to the new fragment when we split 1572 * the message. 1573 * 1574 * FIXME: It may be fragmented into multiple chunks 1575 * at once if non-fragmentable extension headers 1576 * are too large. 1577 * --yoshfuji 1578 */ 1579 1580 cork->length += length; 1581 if (!skb) 1582 goto alloc_new_skb; 1583 1584 while (length > 0) { 1585 /* Check if the remaining data fits into current packet. */ 1586 copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len; 1587 if (copy < length) 1588 copy = maxfraglen - skb->len; 1589 1590 if (copy <= 0) { 1591 char *data; 1592 unsigned int datalen; 1593 unsigned int fraglen; 1594 unsigned int fraggap; 1595 unsigned int alloclen, alloc_extra; 1596 unsigned int pagedlen; 1597 alloc_new_skb: 1598 /* There's no room in the current skb */ 1599 if (skb) 1600 fraggap = skb->len - maxfraglen; 1601 else 1602 fraggap = 0; 1603 /* update mtu and maxfraglen if necessary */ 1604 if (!skb || !skb_prev) 1605 ip6_append_data_mtu(&mtu, &maxfraglen, 1606 fragheaderlen, skb, rt, 1607 orig_mtu); 1608 1609 skb_prev = skb; 1610 1611 /* 1612 * If remaining data exceeds the mtu, 1613 * we know we need more fragment(s). 1614 */ 1615 datalen = length + fraggap; 1616 1617 if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen) 1618 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1619 fraglen = datalen + fragheaderlen; 1620 pagedlen = 0; 1621 1622 alloc_extra = hh_len; 1623 alloc_extra += dst_exthdrlen; 1624 alloc_extra += rt->dst.trailer_len; 1625 1626 /* We just reserve space for fragment header. 1627 * Note: this may be overallocation if the message 1628 * (without MSG_MORE) fits into the MTU. 1629 */ 1630 alloc_extra += sizeof(struct frag_hdr); 1631 1632 if ((flags & MSG_MORE) && 1633 !(rt->dst.dev->features&NETIF_F_SG)) 1634 alloclen = mtu; 1635 else if (!paged && 1636 (fraglen + alloc_extra < SKB_MAX_ALLOC || 1637 !(rt->dst.dev->features & NETIF_F_SG))) 1638 alloclen = fraglen; 1639 else { 1640 alloclen = fragheaderlen + transhdrlen; 1641 pagedlen = datalen - transhdrlen; 1642 } 1643 alloclen += alloc_extra; 1644 1645 if (datalen != length + fraggap) { 1646 /* 1647 * this is not the last fragment, the trailer 1648 * space is regarded as data space. 1649 */ 1650 datalen += rt->dst.trailer_len; 1651 } 1652 1653 fraglen = datalen + fragheaderlen; 1654 1655 copy = datalen - transhdrlen - fraggap - pagedlen; 1656 /* [!] NOTE: copy may be negative if pagedlen>0 1657 * because then the equation may reduces to -fraggap. 1658 */ 1659 if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) { 1660 err = -EINVAL; 1661 goto error; 1662 } 1663 if (transhdrlen) { 1664 skb = sock_alloc_send_skb(sk, alloclen, 1665 (flags & MSG_DONTWAIT), &err); 1666 } else { 1667 skb = NULL; 1668 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 1669 2 * sk->sk_sndbuf) 1670 skb = alloc_skb(alloclen, 1671 sk->sk_allocation); 1672 if (unlikely(!skb)) 1673 err = -ENOBUFS; 1674 } 1675 if (!skb) 1676 goto error; 1677 /* 1678 * Fill in the control structures 1679 */ 1680 skb->protocol = htons(ETH_P_IPV6); 1681 skb->ip_summed = csummode; 1682 skb->csum = 0; 1683 /* reserve for fragmentation and ipsec header */ 1684 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1685 dst_exthdrlen); 1686 1687 /* 1688 * Find where to start putting bytes 1689 */ 1690 data = skb_put(skb, fraglen - pagedlen); 1691 skb_set_network_header(skb, exthdrlen); 1692 data += fragheaderlen; 1693 skb->transport_header = (skb->network_header + 1694 fragheaderlen); 1695 if (fraggap) { 1696 skb->csum = skb_copy_and_csum_bits( 1697 skb_prev, maxfraglen, 1698 data + transhdrlen, fraggap); 1699 skb_prev->csum = csum_sub(skb_prev->csum, 1700 skb->csum); 1701 data += fraggap; 1702 pskb_trim_unique(skb_prev, maxfraglen); 1703 } 1704 if (copy > 0 && 1705 INDIRECT_CALL_1(getfrag, ip_generic_getfrag, 1706 from, data + transhdrlen, offset, 1707 copy, fraggap, skb) < 0) { 1708 err = -EFAULT; 1709 kfree_skb(skb); 1710 goto error; 1711 } else if (flags & MSG_SPLICE_PAGES) { 1712 copy = 0; 1713 } 1714 1715 offset += copy; 1716 length -= copy + transhdrlen; 1717 transhdrlen = 0; 1718 exthdrlen = 0; 1719 dst_exthdrlen = 0; 1720 1721 /* Only the initial fragment is time stamped */ 1722 skb_shinfo(skb)->tx_flags = cork->tx_flags; 1723 cork->tx_flags = 0; 1724 skb_shinfo(skb)->tskey = tskey; 1725 tskey = 0; 1726 skb_zcopy_set(skb, uarg, &extra_uref); 1727 1728 if ((flags & MSG_CONFIRM) && !skb_prev) 1729 skb_set_dst_pending_confirm(skb, 1); 1730 1731 /* 1732 * Put the packet on the pending queue 1733 */ 1734 if (!skb->destructor) { 1735 skb->destructor = sock_wfree; 1736 skb->sk = sk; 1737 wmem_alloc_delta += skb->truesize; 1738 } 1739 __skb_queue_tail(queue, skb); 1740 continue; 1741 } 1742 1743 if (copy > length) 1744 copy = length; 1745 1746 if (!(rt->dst.dev->features&NETIF_F_SG) && 1747 skb_tailroom(skb) >= copy) { 1748 unsigned int off; 1749 1750 off = skb->len; 1751 if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, 1752 from, skb_put(skb, copy), 1753 offset, copy, off, skb) < 0) { 1754 __skb_trim(skb, off); 1755 err = -EFAULT; 1756 goto error; 1757 } 1758 } else if (flags & MSG_SPLICE_PAGES) { 1759 struct msghdr *msg = from; 1760 1761 err = -EIO; 1762 if (WARN_ON_ONCE(copy > msg->msg_iter.count)) 1763 goto error; 1764 1765 err = skb_splice_from_iter(skb, &msg->msg_iter, copy, 1766 sk->sk_allocation); 1767 if (err < 0) 1768 goto error; 1769 copy = err; 1770 wmem_alloc_delta += copy; 1771 } else if (!zc) { 1772 int i = skb_shinfo(skb)->nr_frags; 1773 1774 err = -ENOMEM; 1775 if (!sk_page_frag_refill(sk, pfrag)) 1776 goto error; 1777 1778 skb_zcopy_downgrade_managed(skb); 1779 if (!skb_can_coalesce(skb, i, pfrag->page, 1780 pfrag->offset)) { 1781 err = -EMSGSIZE; 1782 if (i == MAX_SKB_FRAGS) 1783 goto error; 1784 1785 __skb_fill_page_desc(skb, i, pfrag->page, 1786 pfrag->offset, 0); 1787 skb_shinfo(skb)->nr_frags = ++i; 1788 get_page(pfrag->page); 1789 } 1790 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1791 if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, 1792 from, 1793 page_address(pfrag->page) + pfrag->offset, 1794 offset, copy, skb->len, skb) < 0) 1795 goto error_efault; 1796 1797 pfrag->offset += copy; 1798 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1799 skb->len += copy; 1800 skb->data_len += copy; 1801 skb->truesize += copy; 1802 wmem_alloc_delta += copy; 1803 } else { 1804 err = skb_zerocopy_iter_dgram(skb, from, copy); 1805 if (err < 0) 1806 goto error; 1807 } 1808 offset += copy; 1809 length -= copy; 1810 } 1811 1812 if (wmem_alloc_delta) 1813 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1814 return 0; 1815 1816 error_efault: 1817 err = -EFAULT; 1818 error: 1819 net_zcopy_put_abort(uarg, extra_uref); 1820 cork->length -= length; 1821 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1822 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1823 if (hold_tskey) 1824 atomic_dec(&sk->sk_tskey); 1825 return err; 1826 } 1827 1828 int ip6_append_data(struct sock *sk, 1829 int getfrag(void *from, char *to, int offset, int len, 1830 int odd, struct sk_buff *skb), 1831 void *from, size_t length, int transhdrlen, 1832 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1833 struct rt6_info *rt, unsigned int flags) 1834 { 1835 struct inet_sock *inet = inet_sk(sk); 1836 struct ipv6_pinfo *np = inet6_sk(sk); 1837 int exthdrlen; 1838 int err; 1839 1840 if (flags&MSG_PROBE) 1841 return 0; 1842 if (skb_queue_empty(&sk->sk_write_queue)) { 1843 /* 1844 * setup for corking 1845 */ 1846 dst_hold(&rt->dst); 1847 err = ip6_setup_cork(sk, &inet->cork, &np->cork, 1848 ipc6, rt); 1849 if (err) 1850 return err; 1851 1852 inet->cork.fl.u.ip6 = *fl6; 1853 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1854 length += exthdrlen; 1855 transhdrlen += exthdrlen; 1856 } else { 1857 transhdrlen = 0; 1858 } 1859 1860 return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, 1861 &np->cork, sk_page_frag(sk), getfrag, 1862 from, length, transhdrlen, flags); 1863 } 1864 EXPORT_SYMBOL_GPL(ip6_append_data); 1865 1866 static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) 1867 { 1868 struct dst_entry *dst = cork->base.dst; 1869 1870 cork->base.dst = NULL; 1871 skb_dst_set(skb, dst); 1872 } 1873 1874 static void ip6_cork_release(struct inet_cork_full *cork, 1875 struct inet6_cork *v6_cork) 1876 { 1877 if (v6_cork->opt) { 1878 struct ipv6_txoptions *opt = v6_cork->opt; 1879 1880 kfree(opt->dst0opt); 1881 kfree(opt->dst1opt); 1882 kfree(opt->hopopt); 1883 kfree(opt->srcrt); 1884 kfree(opt); 1885 v6_cork->opt = NULL; 1886 } 1887 1888 if (cork->base.dst) { 1889 dst_release(cork->base.dst); 1890 cork->base.dst = NULL; 1891 } 1892 } 1893 1894 struct sk_buff *__ip6_make_skb(struct sock *sk, 1895 struct sk_buff_head *queue, 1896 struct inet_cork_full *cork, 1897 struct inet6_cork *v6_cork) 1898 { 1899 struct sk_buff *skb, *tmp_skb; 1900 struct sk_buff **tail_skb; 1901 struct in6_addr *final_dst; 1902 struct net *net = sock_net(sk); 1903 struct ipv6hdr *hdr; 1904 struct ipv6_txoptions *opt = v6_cork->opt; 1905 struct rt6_info *rt = dst_rt6_info(cork->base.dst); 1906 struct flowi6 *fl6 = &cork->fl.u.ip6; 1907 unsigned char proto = fl6->flowi6_proto; 1908 1909 skb = __skb_dequeue(queue); 1910 if (!skb) 1911 goto out; 1912 tail_skb = &(skb_shinfo(skb)->frag_list); 1913 1914 /* move skb->data to ip header from ext header */ 1915 if (skb->data < skb_network_header(skb)) 1916 __skb_pull(skb, skb_network_offset(skb)); 1917 while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1918 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1919 *tail_skb = tmp_skb; 1920 tail_skb = &(tmp_skb->next); 1921 skb->len += tmp_skb->len; 1922 skb->data_len += tmp_skb->len; 1923 skb->truesize += tmp_skb->truesize; 1924 tmp_skb->destructor = NULL; 1925 tmp_skb->sk = NULL; 1926 } 1927 1928 /* Allow local fragmentation. */ 1929 skb->ignore_df = ip6_sk_ignore_df(sk); 1930 __skb_pull(skb, skb_network_header_len(skb)); 1931 1932 final_dst = &fl6->daddr; 1933 if (opt && opt->opt_flen) 1934 ipv6_push_frag_opts(skb, opt, &proto); 1935 if (opt && opt->opt_nflen) 1936 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); 1937 1938 skb_push(skb, sizeof(struct ipv6hdr)); 1939 skb_reset_network_header(skb); 1940 hdr = ipv6_hdr(skb); 1941 1942 ip6_flow_hdr(hdr, v6_cork->tclass, 1943 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1944 ip6_autoflowlabel(net, sk), fl6)); 1945 hdr->hop_limit = v6_cork->hop_limit; 1946 hdr->nexthdr = proto; 1947 hdr->saddr = fl6->saddr; 1948 hdr->daddr = *final_dst; 1949 1950 skb->priority = cork->base.priority; 1951 skb->mark = cork->base.mark; 1952 if (sk_is_tcp(sk)) 1953 skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC); 1954 else 1955 skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid); 1956 1957 ip6_cork_steal_dst(skb, cork); 1958 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); 1959 if (proto == IPPROTO_ICMPV6) { 1960 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1961 u8 icmp6_type; 1962 1963 if (sk->sk_socket->type == SOCK_RAW && 1964 !(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH)) 1965 icmp6_type = fl6->fl6_icmp_type; 1966 else 1967 icmp6_type = icmp6_hdr(skb)->icmp6_type; 1968 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type); 1969 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1970 } 1971 1972 ip6_cork_release(cork, v6_cork); 1973 out: 1974 return skb; 1975 } 1976 1977 int ip6_send_skb(struct sk_buff *skb) 1978 { 1979 struct net *net = sock_net(skb->sk); 1980 struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); 1981 int err; 1982 1983 rcu_read_lock(); 1984 err = ip6_local_out(net, skb->sk, skb); 1985 if (err) { 1986 if (err > 0) 1987 err = net_xmit_errno(err); 1988 if (err) 1989 IP6_INC_STATS(net, rt->rt6i_idev, 1990 IPSTATS_MIB_OUTDISCARDS); 1991 } 1992 1993 rcu_read_unlock(); 1994 return err; 1995 } 1996 1997 int ip6_push_pending_frames(struct sock *sk) 1998 { 1999 struct sk_buff *skb; 2000 2001 skb = ip6_finish_skb(sk); 2002 if (!skb) 2003 return 0; 2004 2005 return ip6_send_skb(skb); 2006 } 2007 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 2008 2009 static void __ip6_flush_pending_frames(struct sock *sk, 2010 struct sk_buff_head *queue, 2011 struct inet_cork_full *cork, 2012 struct inet6_cork *v6_cork) 2013 { 2014 struct sk_buff *skb; 2015 2016 while ((skb = __skb_dequeue_tail(queue)) != NULL) { 2017 if (skb_dst(skb)) 2018 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 2019 IPSTATS_MIB_OUTDISCARDS); 2020 kfree_skb(skb); 2021 } 2022 2023 ip6_cork_release(cork, v6_cork); 2024 } 2025 2026 void ip6_flush_pending_frames(struct sock *sk) 2027 { 2028 __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 2029 &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 2030 } 2031 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 2032 2033 struct sk_buff *ip6_make_skb(struct sock *sk, 2034 int getfrag(void *from, char *to, int offset, 2035 int len, int odd, struct sk_buff *skb), 2036 void *from, size_t length, int transhdrlen, 2037 struct ipcm6_cookie *ipc6, struct rt6_info *rt, 2038 unsigned int flags, struct inet_cork_full *cork) 2039 { 2040 struct inet6_cork v6_cork; 2041 struct sk_buff_head queue; 2042 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 2043 int err; 2044 2045 if (flags & MSG_PROBE) { 2046 dst_release(&rt->dst); 2047 return NULL; 2048 } 2049 2050 __skb_queue_head_init(&queue); 2051 2052 cork->base.flags = 0; 2053 cork->base.addr = 0; 2054 cork->base.opt = NULL; 2055 v6_cork.opt = NULL; 2056 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt); 2057 if (err) { 2058 ip6_cork_release(cork, &v6_cork); 2059 return ERR_PTR(err); 2060 } 2061 2062 err = __ip6_append_data(sk, &queue, cork, &v6_cork, 2063 ¤t->task_frag, getfrag, from, 2064 length + exthdrlen, transhdrlen + exthdrlen, 2065 flags); 2066 if (err) { 2067 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); 2068 return ERR_PTR(err); 2069 } 2070 2071 return __ip6_make_skb(sk, &queue, cork, &v6_cork); 2072 } 2073