1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/export.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/mroute6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <linux/slab.h>
45 #include <net/net_namespace.h>
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
57 #include <net/netlink.h>
58
59 #include <asm/uaccess.h>
60
61 #ifdef CONFIG_SYSCTL
62 #include <linux/sysctl.h>
63 #endif
64
65 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 const struct in6_addr *dest);
67 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
68 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
69 static unsigned int ip6_mtu(const struct dst_entry *dst);
70 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71 static void ip6_dst_destroy(struct dst_entry *);
72 static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
74 static int ip6_dst_gc(struct dst_ops *ops);
75
76 static int ip6_pkt_discard(struct sk_buff *skb);
77 static int ip6_pkt_discard_out(struct sk_buff *skb);
78 static void ip6_link_failure(struct sk_buff *skb);
79 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
81 #ifdef CONFIG_IPV6_ROUTE_INFO
82 static struct rt6_info *rt6_add_route_info(struct net *net,
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
85 unsigned pref);
86 static struct rt6_info *rt6_get_route_info(struct net *net,
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
89 #endif
90
ipv6_cow_metrics(struct dst_entry * dst,unsigned long old)91 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92 {
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
97 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122 }
123
ip6_neigh_lookup(const struct dst_entry * dst,const void * daddr)124 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
125 {
126 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
127 if (n)
128 return n;
129 return neigh_create(&nd_tbl, daddr, dst->dev);
130 }
131
rt6_bind_neighbour(struct rt6_info * rt,struct net_device * dev)132 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
133 {
134 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
135 if (!n) {
136 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
137 if (IS_ERR(n))
138 return PTR_ERR(n);
139 }
140 dst_set_neighbour(&rt->dst, n);
141
142 return 0;
143 }
144
145 static struct dst_ops ip6_dst_ops_template = {
146 .family = AF_INET6,
147 .protocol = cpu_to_be16(ETH_P_IPV6),
148 .gc = ip6_dst_gc,
149 .gc_thresh = 1024,
150 .check = ip6_dst_check,
151 .default_advmss = ip6_default_advmss,
152 .mtu = ip6_mtu,
153 .cow_metrics = ipv6_cow_metrics,
154 .destroy = ip6_dst_destroy,
155 .ifdown = ip6_dst_ifdown,
156 .negative_advice = ip6_negative_advice,
157 .link_failure = ip6_link_failure,
158 .update_pmtu = ip6_rt_update_pmtu,
159 .local_out = __ip6_local_out,
160 .neigh_lookup = ip6_neigh_lookup,
161 };
162
ip6_blackhole_mtu(const struct dst_entry * dst)163 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
164 {
165 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
166
167 return mtu ? : dst->dev->mtu;
168 }
169
ip6_rt_blackhole_update_pmtu(struct dst_entry * dst,u32 mtu)170 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
171 {
172 }
173
ip6_rt_blackhole_cow_metrics(struct dst_entry * dst,unsigned long old)174 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
175 unsigned long old)
176 {
177 return NULL;
178 }
179
180 static struct dst_ops ip6_dst_blackhole_ops = {
181 .family = AF_INET6,
182 .protocol = cpu_to_be16(ETH_P_IPV6),
183 .destroy = ip6_dst_destroy,
184 .check = ip6_dst_check,
185 .mtu = ip6_blackhole_mtu,
186 .default_advmss = ip6_default_advmss,
187 .update_pmtu = ip6_rt_blackhole_update_pmtu,
188 .cow_metrics = ip6_rt_blackhole_cow_metrics,
189 .neigh_lookup = ip6_neigh_lookup,
190 };
191
192 static const u32 ip6_template_metrics[RTAX_MAX] = {
193 [RTAX_HOPLIMIT - 1] = 255,
194 };
195
196 static struct rt6_info ip6_null_entry_template = {
197 .dst = {
198 .__refcnt = ATOMIC_INIT(1),
199 .__use = 1,
200 .obsolete = -1,
201 .error = -ENETUNREACH,
202 .input = ip6_pkt_discard,
203 .output = ip6_pkt_discard_out,
204 },
205 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
206 .rt6i_protocol = RTPROT_KERNEL,
207 .rt6i_metric = ~(u32) 0,
208 .rt6i_ref = ATOMIC_INIT(1),
209 };
210
211 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
212
213 static int ip6_pkt_prohibit(struct sk_buff *skb);
214 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
215
216 static struct rt6_info ip6_prohibit_entry_template = {
217 .dst = {
218 .__refcnt = ATOMIC_INIT(1),
219 .__use = 1,
220 .obsolete = -1,
221 .error = -EACCES,
222 .input = ip6_pkt_prohibit,
223 .output = ip6_pkt_prohibit_out,
224 },
225 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
226 .rt6i_protocol = RTPROT_KERNEL,
227 .rt6i_metric = ~(u32) 0,
228 .rt6i_ref = ATOMIC_INIT(1),
229 };
230
231 static struct rt6_info ip6_blk_hole_entry_template = {
232 .dst = {
233 .__refcnt = ATOMIC_INIT(1),
234 .__use = 1,
235 .obsolete = -1,
236 .error = -EINVAL,
237 .input = dst_discard,
238 .output = dst_discard,
239 },
240 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
241 .rt6i_protocol = RTPROT_KERNEL,
242 .rt6i_metric = ~(u32) 0,
243 .rt6i_ref = ATOMIC_INIT(1),
244 };
245
246 #endif
247
248 /* allocate dst with ip6_dst_ops */
ip6_dst_alloc(struct dst_ops * ops,struct net_device * dev,int flags)249 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
250 struct net_device *dev,
251 int flags)
252 {
253 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
254
255 if (rt)
256 memset(&rt->rt6i_table, 0,
257 sizeof(*rt) - sizeof(struct dst_entry));
258
259 return rt;
260 }
261
ip6_dst_destroy(struct dst_entry * dst)262 static void ip6_dst_destroy(struct dst_entry *dst)
263 {
264 struct rt6_info *rt = (struct rt6_info *)dst;
265 struct inet6_dev *idev = rt->rt6i_idev;
266 struct inet_peer *peer = rt->rt6i_peer;
267
268 if (!(rt->dst.flags & DST_HOST))
269 dst_destroy_metrics_generic(dst);
270
271 if (idev) {
272 rt->rt6i_idev = NULL;
273 in6_dev_put(idev);
274 }
275 if (peer) {
276 rt->rt6i_peer = NULL;
277 inet_putpeer(peer);
278 }
279 }
280
281 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
282
rt6_peer_genid(void)283 static u32 rt6_peer_genid(void)
284 {
285 return atomic_read(&__rt6_peer_genid);
286 }
287
rt6_bind_peer(struct rt6_info * rt,int create)288 void rt6_bind_peer(struct rt6_info *rt, int create)
289 {
290 struct inet_peer *peer;
291
292 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
293 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
294 inet_putpeer(peer);
295 else
296 rt->rt6i_peer_genid = rt6_peer_genid();
297 }
298
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)299 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
300 int how)
301 {
302 struct rt6_info *rt = (struct rt6_info *)dst;
303 struct inet6_dev *idev = rt->rt6i_idev;
304 struct net_device *loopback_dev =
305 dev_net(dev)->loopback_dev;
306
307 if (dev != loopback_dev && idev && idev->dev == dev) {
308 struct inet6_dev *loopback_idev =
309 in6_dev_get(loopback_dev);
310 if (loopback_idev) {
311 rt->rt6i_idev = loopback_idev;
312 in6_dev_put(idev);
313 }
314 }
315 }
316
rt6_check_expired(const struct rt6_info * rt)317 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
318 {
319 return (rt->rt6i_flags & RTF_EXPIRES) &&
320 time_after(jiffies, rt->dst.expires);
321 }
322
rt6_need_strict(const struct in6_addr * daddr)323 static inline int rt6_need_strict(const struct in6_addr *daddr)
324 {
325 return ipv6_addr_type(daddr) &
326 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
327 }
328
329 /*
330 * Route lookup. Any table->tb6_lock is implied.
331 */
332
rt6_device_match(struct net * net,struct rt6_info * rt,const struct in6_addr * saddr,int oif,int flags)333 static inline struct rt6_info *rt6_device_match(struct net *net,
334 struct rt6_info *rt,
335 const struct in6_addr *saddr,
336 int oif,
337 int flags)
338 {
339 struct rt6_info *local = NULL;
340 struct rt6_info *sprt;
341
342 if (!oif && ipv6_addr_any(saddr))
343 goto out;
344
345 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
346 struct net_device *dev = sprt->dst.dev;
347
348 if (oif) {
349 if (dev->ifindex == oif)
350 return sprt;
351 if (dev->flags & IFF_LOOPBACK) {
352 if (!sprt->rt6i_idev ||
353 sprt->rt6i_idev->dev->ifindex != oif) {
354 if (flags & RT6_LOOKUP_F_IFACE && oif)
355 continue;
356 if (local && (!oif ||
357 local->rt6i_idev->dev->ifindex == oif))
358 continue;
359 }
360 local = sprt;
361 }
362 } else {
363 if (ipv6_chk_addr(net, saddr, dev,
364 flags & RT6_LOOKUP_F_IFACE))
365 return sprt;
366 }
367 }
368
369 if (oif) {
370 if (local)
371 return local;
372
373 if (flags & RT6_LOOKUP_F_IFACE)
374 return net->ipv6.ip6_null_entry;
375 }
376 out:
377 return rt;
378 }
379
380 #ifdef CONFIG_IPV6_ROUTER_PREF
rt6_probe(struct rt6_info * rt)381 static void rt6_probe(struct rt6_info *rt)
382 {
383 struct neighbour *neigh;
384 /*
385 * Okay, this does not seem to be appropriate
386 * for now, however, we need to check if it
387 * is really so; aka Router Reachability Probing.
388 *
389 * Router Reachability Probe MUST be rate-limited
390 * to no more than one per minute.
391 */
392 rcu_read_lock();
393 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
394 if (!neigh || (neigh->nud_state & NUD_VALID))
395 goto out;
396 read_lock_bh(&neigh->lock);
397 if (!(neigh->nud_state & NUD_VALID) &&
398 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
399 struct in6_addr mcaddr;
400 struct in6_addr *target;
401
402 neigh->updated = jiffies;
403 read_unlock_bh(&neigh->lock);
404
405 target = (struct in6_addr *)&neigh->primary_key;
406 addrconf_addr_solict_mult(target, &mcaddr);
407 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
408 } else {
409 read_unlock_bh(&neigh->lock);
410 }
411 out:
412 rcu_read_unlock();
413 }
414 #else
rt6_probe(struct rt6_info * rt)415 static inline void rt6_probe(struct rt6_info *rt)
416 {
417 }
418 #endif
419
420 /*
421 * Default Router Selection (RFC 2461 6.3.6)
422 */
rt6_check_dev(struct rt6_info * rt,int oif)423 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
424 {
425 struct net_device *dev = rt->dst.dev;
426 if (!oif || dev->ifindex == oif)
427 return 2;
428 if ((dev->flags & IFF_LOOPBACK) &&
429 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
430 return 1;
431 return 0;
432 }
433
rt6_check_neigh(struct rt6_info * rt)434 static inline int rt6_check_neigh(struct rt6_info *rt)
435 {
436 struct neighbour *neigh;
437 int m;
438
439 rcu_read_lock();
440 neigh = dst_get_neighbour_noref(&rt->dst);
441 if (rt->rt6i_flags & RTF_NONEXTHOP ||
442 !(rt->rt6i_flags & RTF_GATEWAY))
443 m = 1;
444 else if (neigh) {
445 read_lock_bh(&neigh->lock);
446 if (neigh->nud_state & NUD_VALID)
447 m = 2;
448 #ifdef CONFIG_IPV6_ROUTER_PREF
449 else if (neigh->nud_state & NUD_FAILED)
450 m = 0;
451 #endif
452 else
453 m = 1;
454 read_unlock_bh(&neigh->lock);
455 } else
456 m = 0;
457 rcu_read_unlock();
458 return m;
459 }
460
rt6_score_route(struct rt6_info * rt,int oif,int strict)461 static int rt6_score_route(struct rt6_info *rt, int oif,
462 int strict)
463 {
464 int m, n;
465
466 m = rt6_check_dev(rt, oif);
467 if (!m && (strict & RT6_LOOKUP_F_IFACE))
468 return -1;
469 #ifdef CONFIG_IPV6_ROUTER_PREF
470 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
471 #endif
472 n = rt6_check_neigh(rt);
473 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
474 return -1;
475 return m;
476 }
477
find_match(struct rt6_info * rt,int oif,int strict,int * mpri,struct rt6_info * match)478 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
479 int *mpri, struct rt6_info *match)
480 {
481 int m;
482
483 if (rt6_check_expired(rt))
484 goto out;
485
486 m = rt6_score_route(rt, oif, strict);
487 if (m < 0)
488 goto out;
489
490 if (m > *mpri) {
491 if (strict & RT6_LOOKUP_F_REACHABLE)
492 rt6_probe(match);
493 *mpri = m;
494 match = rt;
495 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
496 rt6_probe(rt);
497 }
498
499 out:
500 return match;
501 }
502
find_rr_leaf(struct fib6_node * fn,struct rt6_info * rr_head,u32 metric,int oif,int strict)503 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
504 struct rt6_info *rr_head,
505 u32 metric, int oif, int strict)
506 {
507 struct rt6_info *rt, *match;
508 int mpri = -1;
509
510 match = NULL;
511 for (rt = rr_head; rt && rt->rt6i_metric == metric;
512 rt = rt->dst.rt6_next)
513 match = find_match(rt, oif, strict, &mpri, match);
514 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
515 rt = rt->dst.rt6_next)
516 match = find_match(rt, oif, strict, &mpri, match);
517
518 return match;
519 }
520
rt6_select(struct fib6_node * fn,int oif,int strict)521 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
522 {
523 struct rt6_info *match, *rt0;
524 struct net *net;
525
526 rt0 = fn->rr_ptr;
527 if (!rt0)
528 fn->rr_ptr = rt0 = fn->leaf;
529
530 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
531
532 if (!match &&
533 (strict & RT6_LOOKUP_F_REACHABLE)) {
534 struct rt6_info *next = rt0->dst.rt6_next;
535
536 /* no entries matched; do round-robin */
537 if (!next || next->rt6i_metric != rt0->rt6i_metric)
538 next = fn->leaf;
539
540 if (next != rt0)
541 fn->rr_ptr = next;
542 }
543
544 net = dev_net(rt0->dst.dev);
545 return match ? match : net->ipv6.ip6_null_entry;
546 }
547
548 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,const struct in6_addr * gwaddr)549 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
550 const struct in6_addr *gwaddr)
551 {
552 struct net *net = dev_net(dev);
553 struct route_info *rinfo = (struct route_info *) opt;
554 struct in6_addr prefix_buf, *prefix;
555 unsigned int pref;
556 unsigned long lifetime;
557 struct rt6_info *rt;
558
559 if (len < sizeof(struct route_info)) {
560 return -EINVAL;
561 }
562
563 /* Sanity check for prefix_len and length */
564 if (rinfo->length > 3) {
565 return -EINVAL;
566 } else if (rinfo->prefix_len > 128) {
567 return -EINVAL;
568 } else if (rinfo->prefix_len > 64) {
569 if (rinfo->length < 2) {
570 return -EINVAL;
571 }
572 } else if (rinfo->prefix_len > 0) {
573 if (rinfo->length < 1) {
574 return -EINVAL;
575 }
576 }
577
578 pref = rinfo->route_pref;
579 if (pref == ICMPV6_ROUTER_PREF_INVALID)
580 return -EINVAL;
581
582 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
583
584 if (rinfo->length == 3)
585 prefix = (struct in6_addr *)rinfo->prefix;
586 else {
587 /* this function is safe */
588 ipv6_addr_prefix(&prefix_buf,
589 (struct in6_addr *)rinfo->prefix,
590 rinfo->prefix_len);
591 prefix = &prefix_buf;
592 }
593
594 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
595 dev->ifindex);
596
597 if (rt && !lifetime) {
598 ip6_del_rt(rt);
599 rt = NULL;
600 }
601
602 if (!rt && lifetime)
603 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
604 pref);
605 else if (rt)
606 rt->rt6i_flags = RTF_ROUTEINFO |
607 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
608
609 if (rt) {
610 if (!addrconf_finite_timeout(lifetime)) {
611 rt->rt6i_flags &= ~RTF_EXPIRES;
612 } else {
613 rt->dst.expires = jiffies + HZ * lifetime;
614 rt->rt6i_flags |= RTF_EXPIRES;
615 }
616 dst_release(&rt->dst);
617 }
618 return 0;
619 }
620 #endif
621
622 #define BACKTRACK(__net, saddr) \
623 do { \
624 if (rt == __net->ipv6.ip6_null_entry) { \
625 struct fib6_node *pn; \
626 while (1) { \
627 if (fn->fn_flags & RTN_TL_ROOT) \
628 goto out; \
629 pn = fn->parent; \
630 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
631 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
632 else \
633 fn = pn; \
634 if (fn->fn_flags & RTN_RTINFO) \
635 goto restart; \
636 } \
637 } \
638 } while (0)
639
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)640 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
641 struct fib6_table *table,
642 struct flowi6 *fl6, int flags)
643 {
644 struct fib6_node *fn;
645 struct rt6_info *rt;
646
647 read_lock_bh(&table->tb6_lock);
648 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
649 restart:
650 rt = fn->leaf;
651 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
652 BACKTRACK(net, &fl6->saddr);
653 out:
654 dst_use(&rt->dst, jiffies);
655 read_unlock_bh(&table->tb6_lock);
656 return rt;
657
658 }
659
ip6_route_lookup(struct net * net,struct flowi6 * fl6,int flags)660 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
661 int flags)
662 {
663 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
664 }
665 EXPORT_SYMBOL_GPL(ip6_route_lookup);
666
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,int strict)667 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
668 const struct in6_addr *saddr, int oif, int strict)
669 {
670 struct flowi6 fl6 = {
671 .flowi6_oif = oif,
672 .daddr = *daddr,
673 };
674 struct dst_entry *dst;
675 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
676
677 if (saddr) {
678 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
679 flags |= RT6_LOOKUP_F_HAS_SADDR;
680 }
681
682 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
683 if (dst->error == 0)
684 return (struct rt6_info *) dst;
685
686 dst_release(dst);
687
688 return NULL;
689 }
690
691 EXPORT_SYMBOL(rt6_lookup);
692
693 /* ip6_ins_rt is called with FREE table->tb6_lock.
694 It takes new route entry, the addition fails by any reason the
695 route is freed. In any case, if caller does not hold it, it may
696 be destroyed.
697 */
698
__ip6_ins_rt(struct rt6_info * rt,struct nl_info * info)699 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
700 {
701 int err;
702 struct fib6_table *table;
703
704 table = rt->rt6i_table;
705 write_lock_bh(&table->tb6_lock);
706 err = fib6_add(&table->tb6_root, rt, info);
707 write_unlock_bh(&table->tb6_lock);
708
709 return err;
710 }
711
ip6_ins_rt(struct rt6_info * rt)712 int ip6_ins_rt(struct rt6_info *rt)
713 {
714 struct nl_info info = {
715 .nl_net = dev_net(rt->dst.dev),
716 };
717 return __ip6_ins_rt(rt, &info);
718 }
719
rt6_alloc_cow(const struct rt6_info * ort,const struct in6_addr * daddr,const struct in6_addr * saddr)720 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
721 const struct in6_addr *daddr,
722 const struct in6_addr *saddr)
723 {
724 struct rt6_info *rt;
725
726 /*
727 * Clone the route.
728 */
729
730 rt = ip6_rt_copy(ort, daddr);
731
732 if (rt) {
733 int attempts = !in_softirq();
734
735 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
736 if (ort->rt6i_dst.plen != 128 &&
737 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
738 rt->rt6i_flags |= RTF_ANYCAST;
739 rt->rt6i_gateway = *daddr;
740 }
741
742 rt->rt6i_flags |= RTF_CACHE;
743
744 #ifdef CONFIG_IPV6_SUBTREES
745 if (rt->rt6i_src.plen && saddr) {
746 rt->rt6i_src.addr = *saddr;
747 rt->rt6i_src.plen = 128;
748 }
749 #endif
750
751 retry:
752 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
753 struct net *net = dev_net(rt->dst.dev);
754 int saved_rt_min_interval =
755 net->ipv6.sysctl.ip6_rt_gc_min_interval;
756 int saved_rt_elasticity =
757 net->ipv6.sysctl.ip6_rt_gc_elasticity;
758
759 if (attempts-- > 0) {
760 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
761 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
762
763 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
764
765 net->ipv6.sysctl.ip6_rt_gc_elasticity =
766 saved_rt_elasticity;
767 net->ipv6.sysctl.ip6_rt_gc_min_interval =
768 saved_rt_min_interval;
769 goto retry;
770 }
771
772 if (net_ratelimit())
773 printk(KERN_WARNING
774 "ipv6: Neighbour table overflow.\n");
775 dst_free(&rt->dst);
776 return NULL;
777 }
778 }
779
780 return rt;
781 }
782
rt6_alloc_clone(struct rt6_info * ort,const struct in6_addr * daddr)783 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
784 const struct in6_addr *daddr)
785 {
786 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
787
788 if (rt) {
789 rt->rt6i_flags |= RTF_CACHE;
790 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
791 }
792 return rt;
793 }
794
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,int flags)795 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
796 struct flowi6 *fl6, int flags)
797 {
798 struct fib6_node *fn;
799 struct rt6_info *rt, *nrt;
800 int strict = 0;
801 int attempts = 3;
802 int err;
803 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
804
805 strict |= flags & RT6_LOOKUP_F_IFACE;
806
807 relookup:
808 read_lock_bh(&table->tb6_lock);
809
810 restart_2:
811 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
812
813 restart:
814 rt = rt6_select(fn, oif, strict | reachable);
815
816 BACKTRACK(net, &fl6->saddr);
817 if (rt == net->ipv6.ip6_null_entry ||
818 rt->rt6i_flags & RTF_CACHE)
819 goto out;
820
821 dst_hold(&rt->dst);
822 read_unlock_bh(&table->tb6_lock);
823
824 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
825 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
826 else if (!(rt->dst.flags & DST_HOST))
827 nrt = rt6_alloc_clone(rt, &fl6->daddr);
828 else
829 goto out2;
830
831 dst_release(&rt->dst);
832 rt = nrt ? : net->ipv6.ip6_null_entry;
833
834 dst_hold(&rt->dst);
835 if (nrt) {
836 err = ip6_ins_rt(nrt);
837 if (!err)
838 goto out2;
839 }
840
841 if (--attempts <= 0)
842 goto out2;
843
844 /*
845 * Race condition! In the gap, when table->tb6_lock was
846 * released someone could insert this route. Relookup.
847 */
848 dst_release(&rt->dst);
849 goto relookup;
850
851 out:
852 if (reachable) {
853 reachable = 0;
854 goto restart_2;
855 }
856 dst_hold(&rt->dst);
857 read_unlock_bh(&table->tb6_lock);
858 out2:
859 rt->dst.lastuse = jiffies;
860 rt->dst.__use++;
861
862 return rt;
863 }
864
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)865 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
866 struct flowi6 *fl6, int flags)
867 {
868 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
869 }
870
ip6_route_input(struct sk_buff * skb)871 void ip6_route_input(struct sk_buff *skb)
872 {
873 const struct ipv6hdr *iph = ipv6_hdr(skb);
874 struct net *net = dev_net(skb->dev);
875 int flags = RT6_LOOKUP_F_HAS_SADDR;
876 struct flowi6 fl6 = {
877 .flowi6_iif = skb->dev->ifindex,
878 .daddr = iph->daddr,
879 .saddr = iph->saddr,
880 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
881 .flowi6_mark = skb->mark,
882 .flowi6_proto = iph->nexthdr,
883 };
884
885 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
886 flags |= RT6_LOOKUP_F_IFACE;
887
888 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
889 }
890
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)891 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
892 struct flowi6 *fl6, int flags)
893 {
894 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
895 }
896
ip6_route_output(struct net * net,const struct sock * sk,struct flowi6 * fl6)897 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
898 struct flowi6 *fl6)
899 {
900 int flags = 0;
901
902 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
903 flags |= RT6_LOOKUP_F_IFACE;
904
905 if (!ipv6_addr_any(&fl6->saddr))
906 flags |= RT6_LOOKUP_F_HAS_SADDR;
907 else if (sk)
908 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
909
910 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
911 }
912
913 EXPORT_SYMBOL(ip6_route_output);
914
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)915 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
916 {
917 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
918 struct dst_entry *new = NULL;
919
920 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
921 if (rt) {
922 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
923
924 new = &rt->dst;
925
926 new->__use = 1;
927 new->input = dst_discard;
928 new->output = dst_discard;
929
930 if (dst_metrics_read_only(&ort->dst))
931 new->_metrics = ort->dst._metrics;
932 else
933 dst_copy_metrics(new, &ort->dst);
934 rt->rt6i_idev = ort->rt6i_idev;
935 if (rt->rt6i_idev)
936 in6_dev_hold(rt->rt6i_idev);
937 rt->dst.expires = 0;
938
939 rt->rt6i_gateway = ort->rt6i_gateway;
940 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
941 rt->rt6i_metric = 0;
942
943 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
944 #ifdef CONFIG_IPV6_SUBTREES
945 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
946 #endif
947
948 dst_free(new);
949 }
950
951 dst_release(dst_orig);
952 return new ? new : ERR_PTR(-ENOMEM);
953 }
954
955 /*
956 * Destination cache support functions
957 */
958
ip6_dst_check(struct dst_entry * dst,u32 cookie)959 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
960 {
961 struct rt6_info *rt;
962
963 rt = (struct rt6_info *) dst;
964
965 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
966 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
967 if (!rt->rt6i_peer)
968 rt6_bind_peer(rt, 0);
969 rt->rt6i_peer_genid = rt6_peer_genid();
970 }
971 return dst;
972 }
973 return NULL;
974 }
975
ip6_negative_advice(struct dst_entry * dst)976 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
977 {
978 struct rt6_info *rt = (struct rt6_info *) dst;
979
980 if (rt) {
981 if (rt->rt6i_flags & RTF_CACHE) {
982 if (rt6_check_expired(rt)) {
983 ip6_del_rt(rt);
984 dst = NULL;
985 }
986 } else {
987 dst_release(dst);
988 dst = NULL;
989 }
990 }
991 return dst;
992 }
993
ip6_link_failure(struct sk_buff * skb)994 static void ip6_link_failure(struct sk_buff *skb)
995 {
996 struct rt6_info *rt;
997
998 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
999
1000 rt = (struct rt6_info *) skb_dst(skb);
1001 if (rt) {
1002 if (rt->rt6i_flags & RTF_CACHE) {
1003 dst_set_expires(&rt->dst, 0);
1004 rt->rt6i_flags |= RTF_EXPIRES;
1005 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1006 rt->rt6i_node->fn_sernum = -1;
1007 }
1008 }
1009
ip6_rt_update_pmtu(struct dst_entry * dst,u32 mtu)1010 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1011 {
1012 struct rt6_info *rt6 = (struct rt6_info*)dst;
1013
1014 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1015 rt6->rt6i_flags |= RTF_MODIFIED;
1016 if (mtu < IPV6_MIN_MTU) {
1017 u32 features = dst_metric(dst, RTAX_FEATURES);
1018 mtu = IPV6_MIN_MTU;
1019 features |= RTAX_FEATURE_ALLFRAG;
1020 dst_metric_set(dst, RTAX_FEATURES, features);
1021 }
1022 dst_metric_set(dst, RTAX_MTU, mtu);
1023 }
1024 }
1025
ip6_default_advmss(const struct dst_entry * dst)1026 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1027 {
1028 struct net_device *dev = dst->dev;
1029 unsigned int mtu = dst_mtu(dst);
1030 struct net *net = dev_net(dev);
1031
1032 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1033
1034 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1035 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1036
1037 /*
1038 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1039 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1040 * IPV6_MAXPLEN is also valid and means: "any MSS,
1041 * rely only on pmtu discovery"
1042 */
1043 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1044 mtu = IPV6_MAXPLEN;
1045 return mtu;
1046 }
1047
ip6_mtu(const struct dst_entry * dst)1048 static unsigned int ip6_mtu(const struct dst_entry *dst)
1049 {
1050 struct inet6_dev *idev;
1051 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1052
1053 if (mtu)
1054 return mtu;
1055
1056 mtu = IPV6_MIN_MTU;
1057
1058 rcu_read_lock();
1059 idev = __in6_dev_get(dst->dev);
1060 if (idev)
1061 mtu = idev->cnf.mtu6;
1062 rcu_read_unlock();
1063
1064 return mtu;
1065 }
1066
1067 static struct dst_entry *icmp6_dst_gc_list;
1068 static DEFINE_SPINLOCK(icmp6_dst_lock);
1069
icmp6_dst_alloc(struct net_device * dev,struct neighbour * neigh,struct flowi6 * fl6)1070 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1071 struct neighbour *neigh,
1072 struct flowi6 *fl6)
1073 {
1074 struct dst_entry *dst;
1075 struct rt6_info *rt;
1076 struct inet6_dev *idev = in6_dev_get(dev);
1077 struct net *net = dev_net(dev);
1078
1079 if (unlikely(!idev))
1080 return ERR_PTR(-ENODEV);
1081
1082 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1083 if (unlikely(!rt)) {
1084 in6_dev_put(idev);
1085 dst = ERR_PTR(-ENOMEM);
1086 goto out;
1087 }
1088
1089 if (neigh)
1090 neigh_hold(neigh);
1091 else {
1092 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1093 if (IS_ERR(neigh)) {
1094 in6_dev_put(idev);
1095 dst_free(&rt->dst);
1096 return ERR_CAST(neigh);
1097 }
1098 }
1099
1100 rt->dst.flags |= DST_HOST;
1101 rt->dst.output = ip6_output;
1102 dst_set_neighbour(&rt->dst, neigh);
1103 atomic_set(&rt->dst.__refcnt, 1);
1104 rt->rt6i_dst.addr = fl6->daddr;
1105 rt->rt6i_dst.plen = 128;
1106 rt->rt6i_idev = idev;
1107 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1108
1109 spin_lock_bh(&icmp6_dst_lock);
1110 rt->dst.next = icmp6_dst_gc_list;
1111 icmp6_dst_gc_list = &rt->dst;
1112 spin_unlock_bh(&icmp6_dst_lock);
1113
1114 fib6_force_start_gc(net);
1115
1116 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1117
1118 out:
1119 return dst;
1120 }
1121
icmp6_dst_gc(void)1122 int icmp6_dst_gc(void)
1123 {
1124 struct dst_entry *dst, **pprev;
1125 int more = 0;
1126
1127 spin_lock_bh(&icmp6_dst_lock);
1128 pprev = &icmp6_dst_gc_list;
1129
1130 while ((dst = *pprev) != NULL) {
1131 if (!atomic_read(&dst->__refcnt)) {
1132 *pprev = dst->next;
1133 dst_free(dst);
1134 } else {
1135 pprev = &dst->next;
1136 ++more;
1137 }
1138 }
1139
1140 spin_unlock_bh(&icmp6_dst_lock);
1141
1142 return more;
1143 }
1144
icmp6_clean_all(int (* func)(struct rt6_info * rt,void * arg),void * arg)1145 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1146 void *arg)
1147 {
1148 struct dst_entry *dst, **pprev;
1149
1150 spin_lock_bh(&icmp6_dst_lock);
1151 pprev = &icmp6_dst_gc_list;
1152 while ((dst = *pprev) != NULL) {
1153 struct rt6_info *rt = (struct rt6_info *) dst;
1154 if (func(rt, arg)) {
1155 *pprev = dst->next;
1156 dst_free(dst);
1157 } else {
1158 pprev = &dst->next;
1159 }
1160 }
1161 spin_unlock_bh(&icmp6_dst_lock);
1162 }
1163
ip6_dst_gc(struct dst_ops * ops)1164 static int ip6_dst_gc(struct dst_ops *ops)
1165 {
1166 unsigned long now = jiffies;
1167 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1168 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1169 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1170 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1171 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1172 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1173 int entries;
1174
1175 entries = dst_entries_get_fast(ops);
1176 if (time_after(rt_last_gc + rt_min_interval, now) &&
1177 entries <= rt_max_size)
1178 goto out;
1179
1180 net->ipv6.ip6_rt_gc_expire++;
1181 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1182 net->ipv6.ip6_rt_last_gc = now;
1183 entries = dst_entries_get_slow(ops);
1184 if (entries < ops->gc_thresh)
1185 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1186 out:
1187 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1188 return entries > rt_max_size;
1189 }
1190
1191 /* Clean host part of a prefix. Not necessary in radix tree,
1192 but results in cleaner routing tables.
1193
1194 Remove it only when all the things will work!
1195 */
1196
ip6_dst_hoplimit(struct dst_entry * dst)1197 int ip6_dst_hoplimit(struct dst_entry *dst)
1198 {
1199 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1200 if (hoplimit == 0) {
1201 struct net_device *dev = dst->dev;
1202 struct inet6_dev *idev;
1203
1204 rcu_read_lock();
1205 idev = __in6_dev_get(dev);
1206 if (idev)
1207 hoplimit = idev->cnf.hop_limit;
1208 else
1209 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1210 rcu_read_unlock();
1211 }
1212 return hoplimit;
1213 }
1214 EXPORT_SYMBOL(ip6_dst_hoplimit);
1215
1216 /*
1217 *
1218 */
1219
ip6_route_add(struct fib6_config * cfg)1220 int ip6_route_add(struct fib6_config *cfg)
1221 {
1222 int err;
1223 struct net *net = cfg->fc_nlinfo.nl_net;
1224 struct rt6_info *rt = NULL;
1225 struct net_device *dev = NULL;
1226 struct inet6_dev *idev = NULL;
1227 struct fib6_table *table;
1228 int addr_type;
1229
1230 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1231 return -EINVAL;
1232 #ifndef CONFIG_IPV6_SUBTREES
1233 if (cfg->fc_src_len)
1234 return -EINVAL;
1235 #endif
1236 if (cfg->fc_ifindex) {
1237 err = -ENODEV;
1238 dev = dev_get_by_index(net, cfg->fc_ifindex);
1239 if (!dev)
1240 goto out;
1241 idev = in6_dev_get(dev);
1242 if (!idev)
1243 goto out;
1244 }
1245
1246 if (cfg->fc_metric == 0)
1247 cfg->fc_metric = IP6_RT_PRIO_USER;
1248
1249 err = -ENOBUFS;
1250 if (cfg->fc_nlinfo.nlh &&
1251 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1252 table = fib6_get_table(net, cfg->fc_table);
1253 if (!table) {
1254 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1255 table = fib6_new_table(net, cfg->fc_table);
1256 }
1257 } else {
1258 table = fib6_new_table(net, cfg->fc_table);
1259 }
1260
1261 if (!table)
1262 goto out;
1263
1264 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1265
1266 if (!rt) {
1267 err = -ENOMEM;
1268 goto out;
1269 }
1270
1271 rt->dst.obsolete = -1;
1272 rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
1273 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1274 0;
1275
1276 if (cfg->fc_protocol == RTPROT_UNSPEC)
1277 cfg->fc_protocol = RTPROT_BOOT;
1278 rt->rt6i_protocol = cfg->fc_protocol;
1279
1280 addr_type = ipv6_addr_type(&cfg->fc_dst);
1281
1282 if (addr_type & IPV6_ADDR_MULTICAST)
1283 rt->dst.input = ip6_mc_input;
1284 else if (cfg->fc_flags & RTF_LOCAL)
1285 rt->dst.input = ip6_input;
1286 else
1287 rt->dst.input = ip6_forward;
1288
1289 rt->dst.output = ip6_output;
1290
1291 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1292 rt->rt6i_dst.plen = cfg->fc_dst_len;
1293 if (rt->rt6i_dst.plen == 128)
1294 rt->dst.flags |= DST_HOST;
1295
1296 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1297 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1298 if (!metrics) {
1299 err = -ENOMEM;
1300 goto out;
1301 }
1302 dst_init_metrics(&rt->dst, metrics, 0);
1303 }
1304 #ifdef CONFIG_IPV6_SUBTREES
1305 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1306 rt->rt6i_src.plen = cfg->fc_src_len;
1307 #endif
1308
1309 rt->rt6i_metric = cfg->fc_metric;
1310
1311 /* We cannot add true routes via loopback here,
1312 they would result in kernel looping; promote them to reject routes
1313 */
1314 if ((cfg->fc_flags & RTF_REJECT) ||
1315 (dev && (dev->flags & IFF_LOOPBACK) &&
1316 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1317 !(cfg->fc_flags & RTF_LOCAL))) {
1318 /* hold loopback dev/idev if we haven't done so. */
1319 if (dev != net->loopback_dev) {
1320 if (dev) {
1321 dev_put(dev);
1322 in6_dev_put(idev);
1323 }
1324 dev = net->loopback_dev;
1325 dev_hold(dev);
1326 idev = in6_dev_get(dev);
1327 if (!idev) {
1328 err = -ENODEV;
1329 goto out;
1330 }
1331 }
1332 rt->dst.output = ip6_pkt_discard_out;
1333 rt->dst.input = ip6_pkt_discard;
1334 rt->dst.error = -ENETUNREACH;
1335 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1336 goto install_route;
1337 }
1338
1339 if (cfg->fc_flags & RTF_GATEWAY) {
1340 const struct in6_addr *gw_addr;
1341 int gwa_type;
1342
1343 gw_addr = &cfg->fc_gateway;
1344 rt->rt6i_gateway = *gw_addr;
1345 gwa_type = ipv6_addr_type(gw_addr);
1346
1347 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1348 struct rt6_info *grt;
1349
1350 /* IPv6 strictly inhibits using not link-local
1351 addresses as nexthop address.
1352 Otherwise, router will not able to send redirects.
1353 It is very good, but in some (rare!) circumstances
1354 (SIT, PtP, NBMA NOARP links) it is handy to allow
1355 some exceptions. --ANK
1356 */
1357 err = -EINVAL;
1358 if (!(gwa_type & IPV6_ADDR_UNICAST))
1359 goto out;
1360
1361 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1362
1363 err = -EHOSTUNREACH;
1364 if (!grt)
1365 goto out;
1366 if (dev) {
1367 if (dev != grt->dst.dev) {
1368 dst_release(&grt->dst);
1369 goto out;
1370 }
1371 } else {
1372 dev = grt->dst.dev;
1373 idev = grt->rt6i_idev;
1374 dev_hold(dev);
1375 in6_dev_hold(grt->rt6i_idev);
1376 }
1377 if (!(grt->rt6i_flags & RTF_GATEWAY))
1378 err = 0;
1379 dst_release(&grt->dst);
1380
1381 if (err)
1382 goto out;
1383 }
1384 err = -EINVAL;
1385 if (!dev || (dev->flags & IFF_LOOPBACK))
1386 goto out;
1387 }
1388
1389 err = -ENODEV;
1390 if (!dev)
1391 goto out;
1392
1393 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1394 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1395 err = -EINVAL;
1396 goto out;
1397 }
1398 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1399 rt->rt6i_prefsrc.plen = 128;
1400 } else
1401 rt->rt6i_prefsrc.plen = 0;
1402
1403 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1404 err = rt6_bind_neighbour(rt, dev);
1405 if (err)
1406 goto out;
1407 }
1408
1409 rt->rt6i_flags = cfg->fc_flags;
1410
1411 install_route:
1412 if (cfg->fc_mx) {
1413 struct nlattr *nla;
1414 int remaining;
1415
1416 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1417 int type = nla_type(nla);
1418
1419 if (type) {
1420 if (type > RTAX_MAX) {
1421 err = -EINVAL;
1422 goto out;
1423 }
1424
1425 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1426 }
1427 }
1428 }
1429
1430 rt->dst.dev = dev;
1431 rt->rt6i_idev = idev;
1432 rt->rt6i_table = table;
1433
1434 cfg->fc_nlinfo.nl_net = dev_net(dev);
1435
1436 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1437
1438 out:
1439 if (dev)
1440 dev_put(dev);
1441 if (idev)
1442 in6_dev_put(idev);
1443 if (rt)
1444 dst_free(&rt->dst);
1445 return err;
1446 }
1447
__ip6_del_rt(struct rt6_info * rt,struct nl_info * info)1448 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1449 {
1450 int err;
1451 struct fib6_table *table;
1452 struct net *net = dev_net(rt->dst.dev);
1453
1454 if (rt == net->ipv6.ip6_null_entry)
1455 return -ENOENT;
1456
1457 table = rt->rt6i_table;
1458 write_lock_bh(&table->tb6_lock);
1459
1460 err = fib6_del(rt, info);
1461 dst_release(&rt->dst);
1462
1463 write_unlock_bh(&table->tb6_lock);
1464
1465 return err;
1466 }
1467
ip6_del_rt(struct rt6_info * rt)1468 int ip6_del_rt(struct rt6_info *rt)
1469 {
1470 struct nl_info info = {
1471 .nl_net = dev_net(rt->dst.dev),
1472 };
1473 return __ip6_del_rt(rt, &info);
1474 }
1475
ip6_route_del(struct fib6_config * cfg)1476 static int ip6_route_del(struct fib6_config *cfg)
1477 {
1478 struct fib6_table *table;
1479 struct fib6_node *fn;
1480 struct rt6_info *rt;
1481 int err = -ESRCH;
1482
1483 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1484 if (!table)
1485 return err;
1486
1487 read_lock_bh(&table->tb6_lock);
1488
1489 fn = fib6_locate(&table->tb6_root,
1490 &cfg->fc_dst, cfg->fc_dst_len,
1491 &cfg->fc_src, cfg->fc_src_len);
1492
1493 if (fn) {
1494 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1495 if (cfg->fc_ifindex &&
1496 (!rt->dst.dev ||
1497 rt->dst.dev->ifindex != cfg->fc_ifindex))
1498 continue;
1499 if (cfg->fc_flags & RTF_GATEWAY &&
1500 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1501 continue;
1502 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1503 continue;
1504 dst_hold(&rt->dst);
1505 read_unlock_bh(&table->tb6_lock);
1506
1507 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1508 }
1509 }
1510 read_unlock_bh(&table->tb6_lock);
1511
1512 return err;
1513 }
1514
1515 /*
1516 * Handle redirects
1517 */
1518 struct ip6rd_flowi {
1519 struct flowi6 fl6;
1520 struct in6_addr gateway;
1521 };
1522
__ip6_route_redirect(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1523 static struct rt6_info *__ip6_route_redirect(struct net *net,
1524 struct fib6_table *table,
1525 struct flowi6 *fl6,
1526 int flags)
1527 {
1528 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1529 struct rt6_info *rt;
1530 struct fib6_node *fn;
1531
1532 /*
1533 * Get the "current" route for this destination and
1534 * check if the redirect has come from approriate router.
1535 *
1536 * RFC 2461 specifies that redirects should only be
1537 * accepted if they come from the nexthop to the target.
1538 * Due to the way the routes are chosen, this notion
1539 * is a bit fuzzy and one might need to check all possible
1540 * routes.
1541 */
1542
1543 read_lock_bh(&table->tb6_lock);
1544 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1545 restart:
1546 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1547 /*
1548 * Current route is on-link; redirect is always invalid.
1549 *
1550 * Seems, previous statement is not true. It could
1551 * be node, which looks for us as on-link (f.e. proxy ndisc)
1552 * But then router serving it might decide, that we should
1553 * know truth 8)8) --ANK (980726).
1554 */
1555 if (rt6_check_expired(rt))
1556 continue;
1557 if (!(rt->rt6i_flags & RTF_GATEWAY))
1558 continue;
1559 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1560 continue;
1561 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1562 continue;
1563 break;
1564 }
1565
1566 if (!rt)
1567 rt = net->ipv6.ip6_null_entry;
1568 BACKTRACK(net, &fl6->saddr);
1569 out:
1570 dst_hold(&rt->dst);
1571
1572 read_unlock_bh(&table->tb6_lock);
1573
1574 return rt;
1575 };
1576
ip6_route_redirect(const struct in6_addr * dest,const struct in6_addr * src,const struct in6_addr * gateway,struct net_device * dev)1577 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1578 const struct in6_addr *src,
1579 const struct in6_addr *gateway,
1580 struct net_device *dev)
1581 {
1582 int flags = RT6_LOOKUP_F_HAS_SADDR;
1583 struct net *net = dev_net(dev);
1584 struct ip6rd_flowi rdfl = {
1585 .fl6 = {
1586 .flowi6_oif = dev->ifindex,
1587 .daddr = *dest,
1588 .saddr = *src,
1589 },
1590 };
1591
1592 rdfl.gateway = *gateway;
1593
1594 if (rt6_need_strict(dest))
1595 flags |= RT6_LOOKUP_F_IFACE;
1596
1597 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1598 flags, __ip6_route_redirect);
1599 }
1600
rt6_redirect(const struct in6_addr * dest,const struct in6_addr * src,const struct in6_addr * saddr,struct neighbour * neigh,u8 * lladdr,int on_link)1601 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1602 const struct in6_addr *saddr,
1603 struct neighbour *neigh, u8 *lladdr, int on_link)
1604 {
1605 struct rt6_info *rt, *nrt = NULL;
1606 struct netevent_redirect netevent;
1607 struct net *net = dev_net(neigh->dev);
1608
1609 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1610
1611 if (rt == net->ipv6.ip6_null_entry) {
1612 if (net_ratelimit())
1613 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1614 "for redirect target\n");
1615 goto out;
1616 }
1617
1618 /*
1619 * We have finally decided to accept it.
1620 */
1621
1622 neigh_update(neigh, lladdr, NUD_STALE,
1623 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1624 NEIGH_UPDATE_F_OVERRIDE|
1625 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1626 NEIGH_UPDATE_F_ISROUTER))
1627 );
1628
1629 /*
1630 * Redirect received -> path was valid.
1631 * Look, redirects are sent only in response to data packets,
1632 * so that this nexthop apparently is reachable. --ANK
1633 */
1634 dst_confirm(&rt->dst);
1635
1636 /* Duplicate redirect: silently ignore. */
1637 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1638 goto out;
1639
1640 nrt = ip6_rt_copy(rt, dest);
1641 if (!nrt)
1642 goto out;
1643
1644 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1645 if (on_link)
1646 nrt->rt6i_flags &= ~RTF_GATEWAY;
1647
1648 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1649 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1650
1651 if (ip6_ins_rt(nrt))
1652 goto out;
1653
1654 netevent.old = &rt->dst;
1655 netevent.new = &nrt->dst;
1656 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1657
1658 if (rt->rt6i_flags & RTF_CACHE) {
1659 ip6_del_rt(rt);
1660 return;
1661 }
1662
1663 out:
1664 dst_release(&rt->dst);
1665 }
1666
1667 /*
1668 * Handle ICMP "packet too big" messages
1669 * i.e. Path MTU discovery
1670 */
1671
rt6_do_pmtu_disc(const struct in6_addr * daddr,const struct in6_addr * saddr,struct net * net,u32 pmtu,int ifindex)1672 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1673 struct net *net, u32 pmtu, int ifindex)
1674 {
1675 struct rt6_info *rt, *nrt;
1676 int allfrag = 0;
1677 again:
1678 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1679 if (!rt)
1680 return;
1681
1682 if (rt6_check_expired(rt)) {
1683 ip6_del_rt(rt);
1684 goto again;
1685 }
1686
1687 if (pmtu >= dst_mtu(&rt->dst))
1688 goto out;
1689
1690 if (pmtu < IPV6_MIN_MTU) {
1691 /*
1692 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1693 * MTU (1280) and a fragment header should always be included
1694 * after a node receiving Too Big message reporting PMTU is
1695 * less than the IPv6 Minimum Link MTU.
1696 */
1697 pmtu = IPV6_MIN_MTU;
1698 allfrag = 1;
1699 }
1700
1701 /* New mtu received -> path was valid.
1702 They are sent only in response to data packets,
1703 so that this nexthop apparently is reachable. --ANK
1704 */
1705 dst_confirm(&rt->dst);
1706
1707 /* Host route. If it is static, it would be better
1708 not to override it, but add new one, so that
1709 when cache entry will expire old pmtu
1710 would return automatically.
1711 */
1712 if (rt->rt6i_flags & RTF_CACHE) {
1713 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1714 if (allfrag) {
1715 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1716 features |= RTAX_FEATURE_ALLFRAG;
1717 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1718 }
1719 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1720 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1721 goto out;
1722 }
1723
1724 /* Network route.
1725 Two cases are possible:
1726 1. It is connected route. Action: COW
1727 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1728 */
1729 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1730 nrt = rt6_alloc_cow(rt, daddr, saddr);
1731 else
1732 nrt = rt6_alloc_clone(rt, daddr);
1733
1734 if (nrt) {
1735 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1736 if (allfrag) {
1737 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1738 features |= RTAX_FEATURE_ALLFRAG;
1739 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1740 }
1741
1742 /* According to RFC 1981, detecting PMTU increase shouldn't be
1743 * happened within 5 mins, the recommended timer is 10 mins.
1744 * Here this route expiration time is set to ip6_rt_mtu_expires
1745 * which is 10 mins. After 10 mins the decreased pmtu is expired
1746 * and detecting PMTU increase will be automatically happened.
1747 */
1748 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1749 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1750
1751 ip6_ins_rt(nrt);
1752 }
1753 out:
1754 dst_release(&rt->dst);
1755 }
1756
rt6_pmtu_discovery(const struct in6_addr * daddr,const struct in6_addr * saddr,struct net_device * dev,u32 pmtu)1757 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1758 struct net_device *dev, u32 pmtu)
1759 {
1760 struct net *net = dev_net(dev);
1761
1762 /*
1763 * RFC 1981 states that a node "MUST reduce the size of the packets it
1764 * is sending along the path" that caused the Packet Too Big message.
1765 * Since it's not possible in the general case to determine which
1766 * interface was used to send the original packet, we update the MTU
1767 * on the interface that will be used to send future packets. We also
1768 * update the MTU on the interface that received the Packet Too Big in
1769 * case the original packet was forced out that interface with
1770 * SO_BINDTODEVICE or similar. This is the next best thing to the
1771 * correct behaviour, which would be to update the MTU on all
1772 * interfaces.
1773 */
1774 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1775 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1776 }
1777
1778 /*
1779 * Misc support functions
1780 */
1781
ip6_rt_copy(const struct rt6_info * ort,const struct in6_addr * dest)1782 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1783 const struct in6_addr *dest)
1784 {
1785 struct net *net = dev_net(ort->dst.dev);
1786 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1787 ort->dst.dev, 0);
1788
1789 if (rt) {
1790 rt->dst.input = ort->dst.input;
1791 rt->dst.output = ort->dst.output;
1792 rt->dst.flags |= DST_HOST;
1793
1794 rt->rt6i_dst.addr = *dest;
1795 rt->rt6i_dst.plen = 128;
1796 dst_copy_metrics(&rt->dst, &ort->dst);
1797 rt->dst.error = ort->dst.error;
1798 rt->rt6i_idev = ort->rt6i_idev;
1799 if (rt->rt6i_idev)
1800 in6_dev_hold(rt->rt6i_idev);
1801 rt->dst.lastuse = jiffies;
1802 rt->dst.expires = 0;
1803
1804 rt->rt6i_gateway = ort->rt6i_gateway;
1805 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1806 rt->rt6i_metric = 0;
1807
1808 #ifdef CONFIG_IPV6_SUBTREES
1809 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1810 #endif
1811 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1812 rt->rt6i_table = ort->rt6i_table;
1813 }
1814 return rt;
1815 }
1816
1817 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,int ifindex)1818 static struct rt6_info *rt6_get_route_info(struct net *net,
1819 const struct in6_addr *prefix, int prefixlen,
1820 const struct in6_addr *gwaddr, int ifindex)
1821 {
1822 struct fib6_node *fn;
1823 struct rt6_info *rt = NULL;
1824 struct fib6_table *table;
1825
1826 table = fib6_get_table(net, RT6_TABLE_INFO);
1827 if (!table)
1828 return NULL;
1829
1830 write_lock_bh(&table->tb6_lock);
1831 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1832 if (!fn)
1833 goto out;
1834
1835 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1836 if (rt->dst.dev->ifindex != ifindex)
1837 continue;
1838 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1839 continue;
1840 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1841 continue;
1842 dst_hold(&rt->dst);
1843 break;
1844 }
1845 out:
1846 write_unlock_bh(&table->tb6_lock);
1847 return rt;
1848 }
1849
rt6_add_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,int ifindex,unsigned pref)1850 static struct rt6_info *rt6_add_route_info(struct net *net,
1851 const struct in6_addr *prefix, int prefixlen,
1852 const struct in6_addr *gwaddr, int ifindex,
1853 unsigned pref)
1854 {
1855 struct fib6_config cfg = {
1856 .fc_table = RT6_TABLE_INFO,
1857 .fc_metric = IP6_RT_PRIO_USER,
1858 .fc_ifindex = ifindex,
1859 .fc_dst_len = prefixlen,
1860 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1861 RTF_UP | RTF_PREF(pref),
1862 .fc_nlinfo.pid = 0,
1863 .fc_nlinfo.nlh = NULL,
1864 .fc_nlinfo.nl_net = net,
1865 };
1866
1867 cfg.fc_dst = *prefix;
1868 cfg.fc_gateway = *gwaddr;
1869
1870 /* We should treat it as a default route if prefix length is 0. */
1871 if (!prefixlen)
1872 cfg.fc_flags |= RTF_DEFAULT;
1873
1874 ip6_route_add(&cfg);
1875
1876 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1877 }
1878 #endif
1879
rt6_get_dflt_router(const struct in6_addr * addr,struct net_device * dev)1880 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1881 {
1882 struct rt6_info *rt;
1883 struct fib6_table *table;
1884
1885 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1886 if (!table)
1887 return NULL;
1888
1889 write_lock_bh(&table->tb6_lock);
1890 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1891 if (dev == rt->dst.dev &&
1892 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1893 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1894 break;
1895 }
1896 if (rt)
1897 dst_hold(&rt->dst);
1898 write_unlock_bh(&table->tb6_lock);
1899 return rt;
1900 }
1901
rt6_add_dflt_router(const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)1902 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1903 struct net_device *dev,
1904 unsigned int pref)
1905 {
1906 struct fib6_config cfg = {
1907 .fc_table = RT6_TABLE_DFLT,
1908 .fc_metric = IP6_RT_PRIO_USER,
1909 .fc_ifindex = dev->ifindex,
1910 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1911 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1912 .fc_nlinfo.pid = 0,
1913 .fc_nlinfo.nlh = NULL,
1914 .fc_nlinfo.nl_net = dev_net(dev),
1915 };
1916
1917 cfg.fc_gateway = *gwaddr;
1918
1919 ip6_route_add(&cfg);
1920
1921 return rt6_get_dflt_router(gwaddr, dev);
1922 }
1923
rt6_purge_dflt_routers(struct net * net)1924 void rt6_purge_dflt_routers(struct net *net)
1925 {
1926 struct rt6_info *rt;
1927 struct fib6_table *table;
1928
1929 /* NOTE: Keep consistent with rt6_get_dflt_router */
1930 table = fib6_get_table(net, RT6_TABLE_DFLT);
1931 if (!table)
1932 return;
1933
1934 restart:
1935 read_lock_bh(&table->tb6_lock);
1936 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1937 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1938 dst_hold(&rt->dst);
1939 read_unlock_bh(&table->tb6_lock);
1940 ip6_del_rt(rt);
1941 goto restart;
1942 }
1943 }
1944 read_unlock_bh(&table->tb6_lock);
1945 }
1946
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)1947 static void rtmsg_to_fib6_config(struct net *net,
1948 struct in6_rtmsg *rtmsg,
1949 struct fib6_config *cfg)
1950 {
1951 memset(cfg, 0, sizeof(*cfg));
1952
1953 cfg->fc_table = RT6_TABLE_MAIN;
1954 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1955 cfg->fc_metric = rtmsg->rtmsg_metric;
1956 cfg->fc_expires = rtmsg->rtmsg_info;
1957 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1958 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1959 cfg->fc_flags = rtmsg->rtmsg_flags;
1960
1961 cfg->fc_nlinfo.nl_net = net;
1962
1963 cfg->fc_dst = rtmsg->rtmsg_dst;
1964 cfg->fc_src = rtmsg->rtmsg_src;
1965 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1966 }
1967
ipv6_route_ioctl(struct net * net,unsigned int cmd,void __user * arg)1968 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1969 {
1970 struct fib6_config cfg;
1971 struct in6_rtmsg rtmsg;
1972 int err;
1973
1974 switch(cmd) {
1975 case SIOCADDRT: /* Add a route */
1976 case SIOCDELRT: /* Delete a route */
1977 if (!capable(CAP_NET_ADMIN))
1978 return -EPERM;
1979 err = copy_from_user(&rtmsg, arg,
1980 sizeof(struct in6_rtmsg));
1981 if (err)
1982 return -EFAULT;
1983
1984 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1985
1986 rtnl_lock();
1987 switch (cmd) {
1988 case SIOCADDRT:
1989 err = ip6_route_add(&cfg);
1990 break;
1991 case SIOCDELRT:
1992 err = ip6_route_del(&cfg);
1993 break;
1994 default:
1995 err = -EINVAL;
1996 }
1997 rtnl_unlock();
1998
1999 return err;
2000 }
2001
2002 return -EINVAL;
2003 }
2004
2005 /*
2006 * Drop the packet on the floor
2007 */
2008
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)2009 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2010 {
2011 int type;
2012 struct dst_entry *dst = skb_dst(skb);
2013 switch (ipstats_mib_noroutes) {
2014 case IPSTATS_MIB_INNOROUTES:
2015 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2016 if (type == IPV6_ADDR_ANY) {
2017 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2018 IPSTATS_MIB_INADDRERRORS);
2019 break;
2020 }
2021 /* FALLTHROUGH */
2022 case IPSTATS_MIB_OUTNOROUTES:
2023 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2024 ipstats_mib_noroutes);
2025 break;
2026 }
2027 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2028 kfree_skb(skb);
2029 return 0;
2030 }
2031
ip6_pkt_discard(struct sk_buff * skb)2032 static int ip6_pkt_discard(struct sk_buff *skb)
2033 {
2034 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2035 }
2036
ip6_pkt_discard_out(struct sk_buff * skb)2037 static int ip6_pkt_discard_out(struct sk_buff *skb)
2038 {
2039 skb->dev = skb_dst(skb)->dev;
2040 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2041 }
2042
2043 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2044
ip6_pkt_prohibit(struct sk_buff * skb)2045 static int ip6_pkt_prohibit(struct sk_buff *skb)
2046 {
2047 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2048 }
2049
ip6_pkt_prohibit_out(struct sk_buff * skb)2050 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2051 {
2052 skb->dev = skb_dst(skb)->dev;
2053 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2054 }
2055
2056 #endif
2057
2058 /*
2059 * Allocate a dst for local (unicast / anycast) address.
2060 */
2061
addrconf_dst_alloc(struct inet6_dev * idev,const struct in6_addr * addr,bool anycast)2062 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2063 const struct in6_addr *addr,
2064 bool anycast)
2065 {
2066 struct net *net = dev_net(idev->dev);
2067 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2068 net->loopback_dev, 0);
2069 int err;
2070
2071 if (!rt) {
2072 if (net_ratelimit())
2073 pr_warning("IPv6: Maximum number of routes reached,"
2074 " consider increasing route/max_size.\n");
2075 return ERR_PTR(-ENOMEM);
2076 }
2077
2078 in6_dev_hold(idev);
2079
2080 rt->dst.flags |= DST_HOST;
2081 rt->dst.input = ip6_input;
2082 rt->dst.output = ip6_output;
2083 rt->rt6i_idev = idev;
2084 rt->dst.obsolete = -1;
2085
2086 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2087 if (anycast)
2088 rt->rt6i_flags |= RTF_ANYCAST;
2089 else
2090 rt->rt6i_flags |= RTF_LOCAL;
2091 err = rt6_bind_neighbour(rt, rt->dst.dev);
2092 if (err) {
2093 dst_free(&rt->dst);
2094 return ERR_PTR(err);
2095 }
2096
2097 rt->rt6i_dst.addr = *addr;
2098 rt->rt6i_dst.plen = 128;
2099 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2100
2101 atomic_set(&rt->dst.__refcnt, 1);
2102
2103 return rt;
2104 }
2105
ip6_route_get_saddr(struct net * net,struct rt6_info * rt,const struct in6_addr * daddr,unsigned int prefs,struct in6_addr * saddr)2106 int ip6_route_get_saddr(struct net *net,
2107 struct rt6_info *rt,
2108 const struct in6_addr *daddr,
2109 unsigned int prefs,
2110 struct in6_addr *saddr)
2111 {
2112 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2113 int err = 0;
2114 if (rt->rt6i_prefsrc.plen)
2115 *saddr = rt->rt6i_prefsrc.addr;
2116 else
2117 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2118 daddr, prefs, saddr);
2119 return err;
2120 }
2121
2122 /* remove deleted ip from prefsrc entries */
2123 struct arg_dev_net_ip {
2124 struct net_device *dev;
2125 struct net *net;
2126 struct in6_addr *addr;
2127 };
2128
fib6_remove_prefsrc(struct rt6_info * rt,void * arg)2129 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2130 {
2131 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2132 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2133 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2134
2135 if (((void *)rt->dst.dev == dev || !dev) &&
2136 rt != net->ipv6.ip6_null_entry &&
2137 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2138 /* remove prefsrc entry */
2139 rt->rt6i_prefsrc.plen = 0;
2140 }
2141 return 0;
2142 }
2143
rt6_remove_prefsrc(struct inet6_ifaddr * ifp)2144 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2145 {
2146 struct net *net = dev_net(ifp->idev->dev);
2147 struct arg_dev_net_ip adni = {
2148 .dev = ifp->idev->dev,
2149 .net = net,
2150 .addr = &ifp->addr,
2151 };
2152 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2153 }
2154
2155 struct arg_dev_net {
2156 struct net_device *dev;
2157 struct net *net;
2158 };
2159
fib6_ifdown(struct rt6_info * rt,void * arg)2160 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2161 {
2162 const struct arg_dev_net *adn = arg;
2163 const struct net_device *dev = adn->dev;
2164
2165 if ((rt->dst.dev == dev || !dev) &&
2166 rt != adn->net->ipv6.ip6_null_entry)
2167 return -1;
2168
2169 return 0;
2170 }
2171
rt6_ifdown(struct net * net,struct net_device * dev)2172 void rt6_ifdown(struct net *net, struct net_device *dev)
2173 {
2174 struct arg_dev_net adn = {
2175 .dev = dev,
2176 .net = net,
2177 };
2178
2179 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2180 icmp6_clean_all(fib6_ifdown, &adn);
2181 }
2182
2183 struct rt6_mtu_change_arg
2184 {
2185 struct net_device *dev;
2186 unsigned mtu;
2187 };
2188
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)2189 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2190 {
2191 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2192 struct inet6_dev *idev;
2193
2194 /* In IPv6 pmtu discovery is not optional,
2195 so that RTAX_MTU lock cannot disable it.
2196 We still use this lock to block changes
2197 caused by addrconf/ndisc.
2198 */
2199
2200 idev = __in6_dev_get(arg->dev);
2201 if (!idev)
2202 return 0;
2203
2204 /* For administrative MTU increase, there is no way to discover
2205 IPv6 PMTU increase, so PMTU increase should be updated here.
2206 Since RFC 1981 doesn't include administrative MTU increase
2207 update PMTU increase is a MUST. (i.e. jumbo frame)
2208 */
2209 /*
2210 If new MTU is less than route PMTU, this new MTU will be the
2211 lowest MTU in the path, update the route PMTU to reflect PMTU
2212 decreases; if new MTU is greater than route PMTU, and the
2213 old MTU is the lowest MTU in the path, update the route PMTU
2214 to reflect the increase. In this case if the other nodes' MTU
2215 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2216 PMTU discouvery.
2217 */
2218 if (rt->dst.dev == arg->dev &&
2219 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2220 (dst_mtu(&rt->dst) >= arg->mtu ||
2221 (dst_mtu(&rt->dst) < arg->mtu &&
2222 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2223 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2224 }
2225 return 0;
2226 }
2227
rt6_mtu_change(struct net_device * dev,unsigned mtu)2228 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2229 {
2230 struct rt6_mtu_change_arg arg = {
2231 .dev = dev,
2232 .mtu = mtu,
2233 };
2234
2235 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2236 }
2237
2238 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2239 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2240 [RTA_OIF] = { .type = NLA_U32 },
2241 [RTA_IIF] = { .type = NLA_U32 },
2242 [RTA_PRIORITY] = { .type = NLA_U32 },
2243 [RTA_METRICS] = { .type = NLA_NESTED },
2244 };
2245
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg)2246 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2247 struct fib6_config *cfg)
2248 {
2249 struct rtmsg *rtm;
2250 struct nlattr *tb[RTA_MAX+1];
2251 int err;
2252
2253 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2254 if (err < 0)
2255 goto errout;
2256
2257 err = -EINVAL;
2258 rtm = nlmsg_data(nlh);
2259 memset(cfg, 0, sizeof(*cfg));
2260
2261 cfg->fc_table = rtm->rtm_table;
2262 cfg->fc_dst_len = rtm->rtm_dst_len;
2263 cfg->fc_src_len = rtm->rtm_src_len;
2264 cfg->fc_flags = RTF_UP;
2265 cfg->fc_protocol = rtm->rtm_protocol;
2266
2267 if (rtm->rtm_type == RTN_UNREACHABLE)
2268 cfg->fc_flags |= RTF_REJECT;
2269
2270 if (rtm->rtm_type == RTN_LOCAL)
2271 cfg->fc_flags |= RTF_LOCAL;
2272
2273 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2274 cfg->fc_nlinfo.nlh = nlh;
2275 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2276
2277 if (tb[RTA_GATEWAY]) {
2278 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2279 cfg->fc_flags |= RTF_GATEWAY;
2280 }
2281
2282 if (tb[RTA_DST]) {
2283 int plen = (rtm->rtm_dst_len + 7) >> 3;
2284
2285 if (nla_len(tb[RTA_DST]) < plen)
2286 goto errout;
2287
2288 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2289 }
2290
2291 if (tb[RTA_SRC]) {
2292 int plen = (rtm->rtm_src_len + 7) >> 3;
2293
2294 if (nla_len(tb[RTA_SRC]) < plen)
2295 goto errout;
2296
2297 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2298 }
2299
2300 if (tb[RTA_PREFSRC])
2301 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2302
2303 if (tb[RTA_OIF])
2304 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2305
2306 if (tb[RTA_PRIORITY])
2307 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2308
2309 if (tb[RTA_METRICS]) {
2310 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2311 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2312 }
2313
2314 if (tb[RTA_TABLE])
2315 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2316
2317 err = 0;
2318 errout:
2319 return err;
2320 }
2321
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)2322 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2323 {
2324 struct fib6_config cfg;
2325 int err;
2326
2327 err = rtm_to_fib6_config(skb, nlh, &cfg);
2328 if (err < 0)
2329 return err;
2330
2331 return ip6_route_del(&cfg);
2332 }
2333
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)2334 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2335 {
2336 struct fib6_config cfg;
2337 int err;
2338
2339 err = rtm_to_fib6_config(skb, nlh, &cfg);
2340 if (err < 0)
2341 return err;
2342
2343 return ip6_route_add(&cfg);
2344 }
2345
rt6_nlmsg_size(void)2346 static inline size_t rt6_nlmsg_size(void)
2347 {
2348 return NLMSG_ALIGN(sizeof(struct rtmsg))
2349 + nla_total_size(16) /* RTA_SRC */
2350 + nla_total_size(16) /* RTA_DST */
2351 + nla_total_size(16) /* RTA_GATEWAY */
2352 + nla_total_size(16) /* RTA_PREFSRC */
2353 + nla_total_size(4) /* RTA_TABLE */
2354 + nla_total_size(4) /* RTA_IIF */
2355 + nla_total_size(4) /* RTA_OIF */
2356 + nla_total_size(4) /* RTA_PRIORITY */
2357 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2358 + nla_total_size(sizeof(struct rta_cacheinfo));
2359 }
2360
rt6_fill_node(struct net * net,struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 pid,u32 seq,int prefix,int nowait,unsigned int flags)2361 static int rt6_fill_node(struct net *net,
2362 struct sk_buff *skb, struct rt6_info *rt,
2363 struct in6_addr *dst, struct in6_addr *src,
2364 int iif, int type, u32 pid, u32 seq,
2365 int prefix, int nowait, unsigned int flags)
2366 {
2367 const struct inet_peer *peer;
2368 struct rtmsg *rtm;
2369 struct nlmsghdr *nlh;
2370 long expires;
2371 u32 table;
2372 struct neighbour *n;
2373 u32 ts, tsage;
2374
2375 if (prefix) { /* user wants prefix routes only */
2376 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2377 /* success since this is not a prefix route */
2378 return 1;
2379 }
2380 }
2381
2382 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2383 if (!nlh)
2384 return -EMSGSIZE;
2385
2386 rtm = nlmsg_data(nlh);
2387 rtm->rtm_family = AF_INET6;
2388 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2389 rtm->rtm_src_len = rt->rt6i_src.plen;
2390 rtm->rtm_tos = 0;
2391 if (rt->rt6i_table)
2392 table = rt->rt6i_table->tb6_id;
2393 else
2394 table = RT6_TABLE_UNSPEC;
2395 rtm->rtm_table = table;
2396 NLA_PUT_U32(skb, RTA_TABLE, table);
2397 if (rt->rt6i_flags & RTF_REJECT)
2398 rtm->rtm_type = RTN_UNREACHABLE;
2399 else if (rt->rt6i_flags & RTF_LOCAL)
2400 rtm->rtm_type = RTN_LOCAL;
2401 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2402 rtm->rtm_type = RTN_LOCAL;
2403 else
2404 rtm->rtm_type = RTN_UNICAST;
2405 rtm->rtm_flags = 0;
2406 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2407 rtm->rtm_protocol = rt->rt6i_protocol;
2408 if (rt->rt6i_flags & RTF_DYNAMIC)
2409 rtm->rtm_protocol = RTPROT_REDIRECT;
2410 else if (rt->rt6i_flags & RTF_ADDRCONF)
2411 rtm->rtm_protocol = RTPROT_KERNEL;
2412 else if (rt->rt6i_flags & RTF_DEFAULT)
2413 rtm->rtm_protocol = RTPROT_RA;
2414
2415 if (rt->rt6i_flags & RTF_CACHE)
2416 rtm->rtm_flags |= RTM_F_CLONED;
2417
2418 if (dst) {
2419 NLA_PUT(skb, RTA_DST, 16, dst);
2420 rtm->rtm_dst_len = 128;
2421 } else if (rtm->rtm_dst_len)
2422 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2423 #ifdef CONFIG_IPV6_SUBTREES
2424 if (src) {
2425 NLA_PUT(skb, RTA_SRC, 16, src);
2426 rtm->rtm_src_len = 128;
2427 } else if (rtm->rtm_src_len)
2428 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2429 #endif
2430 if (iif) {
2431 #ifdef CONFIG_IPV6_MROUTE
2432 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2433 int err = ip6mr_get_route(net, skb, rtm, nowait);
2434 if (err <= 0) {
2435 if (!nowait) {
2436 if (err == 0)
2437 return 0;
2438 goto nla_put_failure;
2439 } else {
2440 if (err == -EMSGSIZE)
2441 goto nla_put_failure;
2442 }
2443 }
2444 } else
2445 #endif
2446 NLA_PUT_U32(skb, RTA_IIF, iif);
2447 } else if (dst) {
2448 struct in6_addr saddr_buf;
2449 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2450 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2451 }
2452
2453 if (rt->rt6i_prefsrc.plen) {
2454 struct in6_addr saddr_buf;
2455 saddr_buf = rt->rt6i_prefsrc.addr;
2456 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2457 }
2458
2459 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2460 goto nla_put_failure;
2461
2462 rcu_read_lock();
2463 n = dst_get_neighbour_noref(&rt->dst);
2464 if (n)
2465 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2466 rcu_read_unlock();
2467
2468 if (rt->dst.dev)
2469 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2470
2471 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2472
2473 if (!(rt->rt6i_flags & RTF_EXPIRES))
2474 expires = 0;
2475 else if (rt->dst.expires - jiffies < INT_MAX)
2476 expires = rt->dst.expires - jiffies;
2477 else
2478 expires = INT_MAX;
2479
2480 peer = rt->rt6i_peer;
2481 ts = tsage = 0;
2482 if (peer && peer->tcp_ts_stamp) {
2483 ts = peer->tcp_ts;
2484 tsage = get_seconds() - peer->tcp_ts_stamp;
2485 }
2486
2487 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2488 expires, rt->dst.error) < 0)
2489 goto nla_put_failure;
2490
2491 return nlmsg_end(skb, nlh);
2492
2493 nla_put_failure:
2494 nlmsg_cancel(skb, nlh);
2495 return -EMSGSIZE;
2496 }
2497
rt6_dump_route(struct rt6_info * rt,void * p_arg)2498 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2499 {
2500 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2501 int prefix;
2502
2503 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2504 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2505 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2506 } else
2507 prefix = 0;
2508
2509 return rt6_fill_node(arg->net,
2510 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2511 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2512 prefix, 0, NLM_F_MULTI);
2513 }
2514
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,void * arg)2515 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2516 {
2517 struct net *net = sock_net(in_skb->sk);
2518 struct nlattr *tb[RTA_MAX+1];
2519 struct rt6_info *rt;
2520 struct sk_buff *skb;
2521 struct rtmsg *rtm;
2522 struct flowi6 fl6;
2523 int err, iif = 0;
2524
2525 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2526 if (err < 0)
2527 goto errout;
2528
2529 err = -EINVAL;
2530 memset(&fl6, 0, sizeof(fl6));
2531
2532 if (tb[RTA_SRC]) {
2533 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2534 goto errout;
2535
2536 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2537 }
2538
2539 if (tb[RTA_DST]) {
2540 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2541 goto errout;
2542
2543 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2544 }
2545
2546 if (tb[RTA_IIF])
2547 iif = nla_get_u32(tb[RTA_IIF]);
2548
2549 if (tb[RTA_OIF])
2550 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2551
2552 if (iif) {
2553 struct net_device *dev;
2554 dev = __dev_get_by_index(net, iif);
2555 if (!dev) {
2556 err = -ENODEV;
2557 goto errout;
2558 }
2559 }
2560
2561 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2562 if (!skb) {
2563 err = -ENOBUFS;
2564 goto errout;
2565 }
2566
2567 /* Reserve room for dummy headers, this skb can pass
2568 through good chunk of routing engine.
2569 */
2570 skb_reset_mac_header(skb);
2571 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2572
2573 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2574 skb_dst_set(skb, &rt->dst);
2575
2576 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2577 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2578 nlh->nlmsg_seq, 0, 0, 0);
2579 if (err < 0) {
2580 kfree_skb(skb);
2581 goto errout;
2582 }
2583
2584 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2585 errout:
2586 return err;
2587 }
2588
inet6_rt_notify(int event,struct rt6_info * rt,struct nl_info * info)2589 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2590 {
2591 struct sk_buff *skb;
2592 struct net *net = info->nl_net;
2593 u32 seq;
2594 int err;
2595
2596 err = -ENOBUFS;
2597 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2598
2599 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2600 if (!skb)
2601 goto errout;
2602
2603 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2604 event, info->pid, seq, 0, 0, 0);
2605 if (err < 0) {
2606 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2607 WARN_ON(err == -EMSGSIZE);
2608 kfree_skb(skb);
2609 goto errout;
2610 }
2611 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2612 info->nlh, gfp_any());
2613 return;
2614 errout:
2615 if (err < 0)
2616 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2617 }
2618
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * data)2619 static int ip6_route_dev_notify(struct notifier_block *this,
2620 unsigned long event, void *data)
2621 {
2622 struct net_device *dev = (struct net_device *)data;
2623 struct net *net = dev_net(dev);
2624
2625 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2626 net->ipv6.ip6_null_entry->dst.dev = dev;
2627 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2628 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2629 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2630 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2631 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2632 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2633 #endif
2634 }
2635
2636 return NOTIFY_OK;
2637 }
2638
2639 /*
2640 * /proc
2641 */
2642
2643 #ifdef CONFIG_PROC_FS
2644
2645 struct rt6_proc_arg
2646 {
2647 char *buffer;
2648 int offset;
2649 int length;
2650 int skip;
2651 int len;
2652 };
2653
rt6_info_route(struct rt6_info * rt,void * p_arg)2654 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2655 {
2656 struct seq_file *m = p_arg;
2657 struct neighbour *n;
2658
2659 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2660
2661 #ifdef CONFIG_IPV6_SUBTREES
2662 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2663 #else
2664 seq_puts(m, "00000000000000000000000000000000 00 ");
2665 #endif
2666 rcu_read_lock();
2667 n = dst_get_neighbour_noref(&rt->dst);
2668 if (n) {
2669 seq_printf(m, "%pi6", n->primary_key);
2670 } else {
2671 seq_puts(m, "00000000000000000000000000000000");
2672 }
2673 rcu_read_unlock();
2674 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2675 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2676 rt->dst.__use, rt->rt6i_flags,
2677 rt->dst.dev ? rt->dst.dev->name : "");
2678 return 0;
2679 }
2680
ipv6_route_show(struct seq_file * m,void * v)2681 static int ipv6_route_show(struct seq_file *m, void *v)
2682 {
2683 struct net *net = (struct net *)m->private;
2684 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2685 return 0;
2686 }
2687
ipv6_route_open(struct inode * inode,struct file * file)2688 static int ipv6_route_open(struct inode *inode, struct file *file)
2689 {
2690 return single_open_net(inode, file, ipv6_route_show);
2691 }
2692
2693 static const struct file_operations ipv6_route_proc_fops = {
2694 .owner = THIS_MODULE,
2695 .open = ipv6_route_open,
2696 .read = seq_read,
2697 .llseek = seq_lseek,
2698 .release = single_release_net,
2699 };
2700
rt6_stats_seq_show(struct seq_file * seq,void * v)2701 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2702 {
2703 struct net *net = (struct net *)seq->private;
2704 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2705 net->ipv6.rt6_stats->fib_nodes,
2706 net->ipv6.rt6_stats->fib_route_nodes,
2707 net->ipv6.rt6_stats->fib_rt_alloc,
2708 net->ipv6.rt6_stats->fib_rt_entries,
2709 net->ipv6.rt6_stats->fib_rt_cache,
2710 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2711 net->ipv6.rt6_stats->fib_discarded_routes);
2712
2713 return 0;
2714 }
2715
rt6_stats_seq_open(struct inode * inode,struct file * file)2716 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2717 {
2718 return single_open_net(inode, file, rt6_stats_seq_show);
2719 }
2720
2721 static const struct file_operations rt6_stats_seq_fops = {
2722 .owner = THIS_MODULE,
2723 .open = rt6_stats_seq_open,
2724 .read = seq_read,
2725 .llseek = seq_lseek,
2726 .release = single_release_net,
2727 };
2728 #endif /* CONFIG_PROC_FS */
2729
2730 #ifdef CONFIG_SYSCTL
2731
2732 static
ipv6_sysctl_rtcache_flush(ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2733 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2734 void __user *buffer, size_t *lenp, loff_t *ppos)
2735 {
2736 struct net *net;
2737 int delay;
2738 if (!write)
2739 return -EINVAL;
2740
2741 net = (struct net *)ctl->extra1;
2742 delay = net->ipv6.sysctl.flush_delay;
2743 proc_dointvec(ctl, write, buffer, lenp, ppos);
2744 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2745 return 0;
2746 }
2747
2748 ctl_table ipv6_route_table_template[] = {
2749 {
2750 .procname = "flush",
2751 .data = &init_net.ipv6.sysctl.flush_delay,
2752 .maxlen = sizeof(int),
2753 .mode = 0200,
2754 .proc_handler = ipv6_sysctl_rtcache_flush
2755 },
2756 {
2757 .procname = "gc_thresh",
2758 .data = &ip6_dst_ops_template.gc_thresh,
2759 .maxlen = sizeof(int),
2760 .mode = 0644,
2761 .proc_handler = proc_dointvec,
2762 },
2763 {
2764 .procname = "max_size",
2765 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2766 .maxlen = sizeof(int),
2767 .mode = 0644,
2768 .proc_handler = proc_dointvec,
2769 },
2770 {
2771 .procname = "gc_min_interval",
2772 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2773 .maxlen = sizeof(int),
2774 .mode = 0644,
2775 .proc_handler = proc_dointvec_jiffies,
2776 },
2777 {
2778 .procname = "gc_timeout",
2779 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2780 .maxlen = sizeof(int),
2781 .mode = 0644,
2782 .proc_handler = proc_dointvec_jiffies,
2783 },
2784 {
2785 .procname = "gc_interval",
2786 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2787 .maxlen = sizeof(int),
2788 .mode = 0644,
2789 .proc_handler = proc_dointvec_jiffies,
2790 },
2791 {
2792 .procname = "gc_elasticity",
2793 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2794 .maxlen = sizeof(int),
2795 .mode = 0644,
2796 .proc_handler = proc_dointvec,
2797 },
2798 {
2799 .procname = "mtu_expires",
2800 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2801 .maxlen = sizeof(int),
2802 .mode = 0644,
2803 .proc_handler = proc_dointvec_jiffies,
2804 },
2805 {
2806 .procname = "min_adv_mss",
2807 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2808 .maxlen = sizeof(int),
2809 .mode = 0644,
2810 .proc_handler = proc_dointvec,
2811 },
2812 {
2813 .procname = "gc_min_interval_ms",
2814 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2815 .maxlen = sizeof(int),
2816 .mode = 0644,
2817 .proc_handler = proc_dointvec_ms_jiffies,
2818 },
2819 { }
2820 };
2821
ipv6_route_sysctl_init(struct net * net)2822 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2823 {
2824 struct ctl_table *table;
2825
2826 table = kmemdup(ipv6_route_table_template,
2827 sizeof(ipv6_route_table_template),
2828 GFP_KERNEL);
2829
2830 if (table) {
2831 table[0].data = &net->ipv6.sysctl.flush_delay;
2832 table[0].extra1 = net;
2833 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2834 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2835 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2836 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2837 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2838 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2839 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2840 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2841 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2842 }
2843
2844 return table;
2845 }
2846 #endif
2847
ip6_route_net_init(struct net * net)2848 static int __net_init ip6_route_net_init(struct net *net)
2849 {
2850 int ret = -ENOMEM;
2851
2852 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2853 sizeof(net->ipv6.ip6_dst_ops));
2854
2855 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2856 goto out_ip6_dst_ops;
2857
2858 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2859 sizeof(*net->ipv6.ip6_null_entry),
2860 GFP_KERNEL);
2861 if (!net->ipv6.ip6_null_entry)
2862 goto out_ip6_dst_entries;
2863 net->ipv6.ip6_null_entry->dst.path =
2864 (struct dst_entry *)net->ipv6.ip6_null_entry;
2865 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2866 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2867 ip6_template_metrics, true);
2868
2869 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2870 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2871 sizeof(*net->ipv6.ip6_prohibit_entry),
2872 GFP_KERNEL);
2873 if (!net->ipv6.ip6_prohibit_entry)
2874 goto out_ip6_null_entry;
2875 net->ipv6.ip6_prohibit_entry->dst.path =
2876 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2877 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2878 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2879 ip6_template_metrics, true);
2880
2881 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2882 sizeof(*net->ipv6.ip6_blk_hole_entry),
2883 GFP_KERNEL);
2884 if (!net->ipv6.ip6_blk_hole_entry)
2885 goto out_ip6_prohibit_entry;
2886 net->ipv6.ip6_blk_hole_entry->dst.path =
2887 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2888 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2889 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2890 ip6_template_metrics, true);
2891 #endif
2892
2893 net->ipv6.sysctl.flush_delay = 0;
2894 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2895 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2896 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2897 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2898 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2899 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2900 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2901
2902 #ifdef CONFIG_PROC_FS
2903 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2904 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2905 #endif
2906 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2907
2908 ret = 0;
2909 out:
2910 return ret;
2911
2912 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2913 out_ip6_prohibit_entry:
2914 kfree(net->ipv6.ip6_prohibit_entry);
2915 out_ip6_null_entry:
2916 kfree(net->ipv6.ip6_null_entry);
2917 #endif
2918 out_ip6_dst_entries:
2919 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2920 out_ip6_dst_ops:
2921 goto out;
2922 }
2923
ip6_route_net_exit(struct net * net)2924 static void __net_exit ip6_route_net_exit(struct net *net)
2925 {
2926 #ifdef CONFIG_PROC_FS
2927 proc_net_remove(net, "ipv6_route");
2928 proc_net_remove(net, "rt6_stats");
2929 #endif
2930 kfree(net->ipv6.ip6_null_entry);
2931 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2932 kfree(net->ipv6.ip6_prohibit_entry);
2933 kfree(net->ipv6.ip6_blk_hole_entry);
2934 #endif
2935 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2936 }
2937
2938 static struct pernet_operations ip6_route_net_ops = {
2939 .init = ip6_route_net_init,
2940 .exit = ip6_route_net_exit,
2941 };
2942
2943 static struct notifier_block ip6_route_dev_notifier = {
2944 .notifier_call = ip6_route_dev_notify,
2945 .priority = 0,
2946 };
2947
ip6_route_init(void)2948 int __init ip6_route_init(void)
2949 {
2950 int ret;
2951
2952 ret = -ENOMEM;
2953 ip6_dst_ops_template.kmem_cachep =
2954 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2955 SLAB_HWCACHE_ALIGN, NULL);
2956 if (!ip6_dst_ops_template.kmem_cachep)
2957 goto out;
2958
2959 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2960 if (ret)
2961 goto out_kmem_cache;
2962
2963 ret = register_pernet_subsys(&ip6_route_net_ops);
2964 if (ret)
2965 goto out_dst_entries;
2966
2967 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2968
2969 /* Registering of the loopback is done before this portion of code,
2970 * the loopback reference in rt6_info will not be taken, do it
2971 * manually for init_net */
2972 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2973 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2974 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2975 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2976 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2977 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2978 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2979 #endif
2980 ret = fib6_init();
2981 if (ret)
2982 goto out_register_subsys;
2983
2984 ret = xfrm6_init();
2985 if (ret)
2986 goto out_fib6_init;
2987
2988 ret = fib6_rules_init();
2989 if (ret)
2990 goto xfrm6_init;
2991
2992 ret = -ENOBUFS;
2993 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2994 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2995 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2996 goto fib6_rules_init;
2997
2998 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2999 if (ret)
3000 goto fib6_rules_init;
3001
3002 out:
3003 return ret;
3004
3005 fib6_rules_init:
3006 fib6_rules_cleanup();
3007 xfrm6_init:
3008 xfrm6_fini();
3009 out_fib6_init:
3010 fib6_gc_cleanup();
3011 out_register_subsys:
3012 unregister_pernet_subsys(&ip6_route_net_ops);
3013 out_dst_entries:
3014 dst_entries_destroy(&ip6_dst_blackhole_ops);
3015 out_kmem_cache:
3016 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3017 goto out;
3018 }
3019
ip6_route_cleanup(void)3020 void ip6_route_cleanup(void)
3021 {
3022 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3023 fib6_rules_cleanup();
3024 xfrm6_fini();
3025 fib6_gc_cleanup();
3026 unregister_pernet_subsys(&ip6_route_net_ops);
3027 dst_entries_destroy(&ip6_dst_blackhole_ops);
3028 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3029 }
3030