xref: /linux/net/ipv6/ndisc.c (revision 8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Neighbour Discovery for IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *	Mike Shaver		<shaver@ingenia.com>
9  */
10 
11 /*
12  *	Changes:
13  *
14  *	Alexey I. Froloff		:	RFC6106 (DNSSL) support
15  *	Pierre Ynard			:	export userland ND options
16  *						through netlink (RDNSS support)
17  *	Lars Fenneberg			:	fixed MTU setting on receipt
18  *						of an RA.
19  *	Janos Farkas			:	kmalloc failure checks
20  *	Alexey Kuznetsov		:	state machine reworked
21  *						and moved to net/core.
22  *	Pekka Savola			:	RFC2461 validation
23  *	YOSHIFUJI Hideaki @USAGI	:	Verify ND options properly
24  */
25 
26 #define pr_fmt(fmt) "ICMPv6: " fmt
27 
28 #include <linux/module.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/sched.h>
34 #include <linux/net.h>
35 #include <linux/in6.h>
36 #include <linux/route.h>
37 #include <linux/init.h>
38 #include <linux/rcupdate.h>
39 #include <linux/slab.h>
40 #ifdef CONFIG_SYSCTL
41 #include <linux/sysctl.h>
42 #endif
43 
44 #include <linux/if_addr.h>
45 #include <linux/if_ether.h>
46 #include <linux/if_arp.h>
47 #include <linux/ipv6.h>
48 #include <linux/icmpv6.h>
49 #include <linux/jhash.h>
50 
51 #include <net/sock.h>
52 #include <net/snmp.h>
53 
54 #include <net/ipv6.h>
55 #include <net/protocol.h>
56 #include <net/ndisc.h>
57 #include <net/ip6_route.h>
58 #include <net/addrconf.h>
59 #include <net/icmp.h>
60 
61 #include <net/netlink.h>
62 #include <linux/rtnetlink.h>
63 
64 #include <net/flow.h>
65 #include <net/ip6_checksum.h>
66 #include <net/inet_common.h>
67 #include <linux/proc_fs.h>
68 
69 #include <linux/netfilter.h>
70 #include <linux/netfilter_ipv6.h>
71 
72 static u32 ndisc_hash(const void *pkey,
73 		      const struct net_device *dev,
74 		      __u32 *hash_rnd);
75 static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey);
76 static bool ndisc_allow_add(const struct net_device *dev,
77 			    struct netlink_ext_ack *extack);
78 static int ndisc_constructor(struct neighbour *neigh);
79 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
80 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
81 static int pndisc_constructor(struct pneigh_entry *n);
82 static void pndisc_destructor(struct pneigh_entry *n);
83 static void pndisc_redo(struct sk_buff *skb);
84 static int ndisc_is_multicast(const void *pkey);
85 
86 static const struct neigh_ops ndisc_generic_ops = {
87 	.family =		AF_INET6,
88 	.solicit =		ndisc_solicit,
89 	.error_report =		ndisc_error_report,
90 	.output =		neigh_resolve_output,
91 	.connected_output =	neigh_connected_output,
92 };
93 
94 static const struct neigh_ops ndisc_hh_ops = {
95 	.family =		AF_INET6,
96 	.solicit =		ndisc_solicit,
97 	.error_report =		ndisc_error_report,
98 	.output =		neigh_resolve_output,
99 	.connected_output =	neigh_resolve_output,
100 };
101 
102 
103 static const struct neigh_ops ndisc_direct_ops = {
104 	.family =		AF_INET6,
105 	.output =		neigh_direct_output,
106 	.connected_output =	neigh_direct_output,
107 };
108 
109 struct neigh_table nd_tbl = {
110 	.family =	AF_INET6,
111 	.key_len =	sizeof(struct in6_addr),
112 	.protocol =	cpu_to_be16(ETH_P_IPV6),
113 	.hash =		ndisc_hash,
114 	.key_eq =	ndisc_key_eq,
115 	.constructor =	ndisc_constructor,
116 	.pconstructor =	pndisc_constructor,
117 	.pdestructor =	pndisc_destructor,
118 	.proxy_redo =	pndisc_redo,
119 	.is_multicast =	ndisc_is_multicast,
120 	.allow_add  =   ndisc_allow_add,
121 	.id =		"ndisc_cache",
122 	.parms = {
123 		.tbl			= &nd_tbl,
124 		.reachable_time		= ND_REACHABLE_TIME,
125 		.data = {
126 			[NEIGH_VAR_MCAST_PROBES] = 3,
127 			[NEIGH_VAR_UCAST_PROBES] = 3,
128 			[NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER,
129 			[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
130 			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
131 			[NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ,
132 			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
133 			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
134 			[NEIGH_VAR_PROXY_QLEN] = 64,
135 			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
136 			[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
137 		},
138 	},
139 	.gc_interval =	  30 * HZ,
140 	.gc_thresh1 =	 128,
141 	.gc_thresh2 =	 512,
142 	.gc_thresh3 =	1024,
143 };
144 EXPORT_SYMBOL_GPL(nd_tbl);
145 
__ndisc_fill_addr_option(struct sk_buff * skb,int type,const void * data,int data_len,int pad)146 void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data,
147 			      int data_len, int pad)
148 {
149 	int space = __ndisc_opt_addr_space(data_len, pad);
150 	u8 *opt = skb_put(skb, space);
151 
152 	opt[0] = type;
153 	opt[1] = space>>3;
154 
155 	memset(opt + 2, 0, pad);
156 	opt   += pad;
157 	space -= pad;
158 
159 	memcpy(opt+2, data, data_len);
160 	data_len += 2;
161 	opt += data_len;
162 	space -= data_len;
163 	if (space > 0)
164 		memset(opt, 0, space);
165 }
166 EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option);
167 
ndisc_fill_addr_option(struct sk_buff * skb,int type,const void * data,u8 icmp6_type)168 static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type,
169 					  const void *data, u8 icmp6_type)
170 {
171 	__ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len,
172 				 ndisc_addr_option_pad(skb->dev->type));
173 	ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type);
174 }
175 
ndisc_fill_redirect_addr_option(struct sk_buff * skb,void * ha,const u8 * ops_data)176 static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb,
177 						   void *ha,
178 						   const u8 *ops_data)
179 {
180 	ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT);
181 	ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data);
182 }
183 
ndisc_next_option(struct nd_opt_hdr * cur,struct nd_opt_hdr * end)184 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
185 					    struct nd_opt_hdr *end)
186 {
187 	int type;
188 	if (!cur || !end || cur >= end)
189 		return NULL;
190 	type = cur->nd_opt_type;
191 	do {
192 		cur = ((void *)cur) + (cur->nd_opt_len << 3);
193 	} while (cur < end && cur->nd_opt_type != type);
194 	return cur <= end && cur->nd_opt_type == type ? cur : NULL;
195 }
196 
ndisc_is_useropt(const struct net_device * dev,struct nd_opt_hdr * opt)197 static inline int ndisc_is_useropt(const struct net_device *dev,
198 				   struct nd_opt_hdr *opt)
199 {
200 	return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
201 		opt->nd_opt_type == ND_OPT_RDNSS ||
202 		opt->nd_opt_type == ND_OPT_DNSSL ||
203 		opt->nd_opt_type == ND_OPT_6CO ||
204 		opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
205 		opt->nd_opt_type == ND_OPT_PREF64;
206 }
207 
ndisc_next_useropt(const struct net_device * dev,struct nd_opt_hdr * cur,struct nd_opt_hdr * end)208 static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
209 					     struct nd_opt_hdr *cur,
210 					     struct nd_opt_hdr *end)
211 {
212 	if (!cur || !end || cur >= end)
213 		return NULL;
214 	do {
215 		cur = ((void *)cur) + (cur->nd_opt_len << 3);
216 	} while (cur < end && !ndisc_is_useropt(dev, cur));
217 	return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL;
218 }
219 
ndisc_parse_options(const struct net_device * dev,u8 * opt,int opt_len,struct ndisc_options * ndopts)220 struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
221 					  u8 *opt, int opt_len,
222 					  struct ndisc_options *ndopts)
223 {
224 	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
225 
226 	if (!nd_opt || opt_len < 0 || !ndopts)
227 		return NULL;
228 	memset(ndopts, 0, sizeof(*ndopts));
229 	while (opt_len) {
230 		bool unknown = false;
231 		int l;
232 		if (opt_len < sizeof(struct nd_opt_hdr))
233 			return NULL;
234 		l = nd_opt->nd_opt_len << 3;
235 		if (opt_len < l || l == 0)
236 			return NULL;
237 		if (ndisc_ops_parse_options(dev, nd_opt, ndopts))
238 			goto next_opt;
239 		switch (nd_opt->nd_opt_type) {
240 		case ND_OPT_SOURCE_LL_ADDR:
241 		case ND_OPT_TARGET_LL_ADDR:
242 		case ND_OPT_MTU:
243 		case ND_OPT_NONCE:
244 		case ND_OPT_REDIRECT_HDR:
245 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
246 				net_dbg_ratelimited("%s: duplicated ND6 option found: type=%d\n",
247 						    __func__, nd_opt->nd_opt_type);
248 			} else {
249 				ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
250 			}
251 			break;
252 		case ND_OPT_PREFIX_INFO:
253 			ndopts->nd_opts_pi_end = nd_opt;
254 			if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
255 				ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
256 			break;
257 #ifdef CONFIG_IPV6_ROUTE_INFO
258 		case ND_OPT_ROUTE_INFO:
259 			ndopts->nd_opts_ri_end = nd_opt;
260 			if (!ndopts->nd_opts_ri)
261 				ndopts->nd_opts_ri = nd_opt;
262 			break;
263 #endif
264 		default:
265 			unknown = true;
266 		}
267 		if (ndisc_is_useropt(dev, nd_opt)) {
268 			ndopts->nd_useropts_end = nd_opt;
269 			if (!ndopts->nd_useropts)
270 				ndopts->nd_useropts = nd_opt;
271 		} else if (unknown) {
272 			/*
273 			 * Unknown options must be silently ignored,
274 			 * to accommodate future extension to the
275 			 * protocol.
276 			 */
277 			net_dbg_ratelimited("%s: ignored unsupported option; type=%d, len=%d\n",
278 					    __func__, nd_opt->nd_opt_type, nd_opt->nd_opt_len);
279 		}
280 next_opt:
281 		opt_len -= l;
282 		nd_opt = ((void *)nd_opt) + l;
283 	}
284 	return ndopts;
285 }
286 
ndisc_mc_map(const struct in6_addr * addr,char * buf,struct net_device * dev,int dir)287 int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
288 {
289 	switch (dev->type) {
290 	case ARPHRD_ETHER:
291 	case ARPHRD_IEEE802:	/* Not sure. Check it later. --ANK */
292 	case ARPHRD_FDDI:
293 		ipv6_eth_mc_map(addr, buf);
294 		return 0;
295 	case ARPHRD_ARCNET:
296 		ipv6_arcnet_mc_map(addr, buf);
297 		return 0;
298 	case ARPHRD_INFINIBAND:
299 		ipv6_ib_mc_map(addr, dev->broadcast, buf);
300 		return 0;
301 	case ARPHRD_IPGRE:
302 		return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
303 	default:
304 		if (dir) {
305 			memcpy(buf, dev->broadcast, dev->addr_len);
306 			return 0;
307 		}
308 	}
309 	return -EINVAL;
310 }
311 EXPORT_SYMBOL(ndisc_mc_map);
312 
ndisc_hash(const void * pkey,const struct net_device * dev,__u32 * hash_rnd)313 static u32 ndisc_hash(const void *pkey,
314 		      const struct net_device *dev,
315 		      __u32 *hash_rnd)
316 {
317 	return ndisc_hashfn(pkey, dev, hash_rnd);
318 }
319 
ndisc_key_eq(const struct neighbour * n,const void * pkey)320 static bool ndisc_key_eq(const struct neighbour *n, const void *pkey)
321 {
322 	return neigh_key_eq128(n, pkey);
323 }
324 
ndisc_constructor(struct neighbour * neigh)325 static int ndisc_constructor(struct neighbour *neigh)
326 {
327 	struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key;
328 	struct net_device *dev = neigh->dev;
329 	struct inet6_dev *in6_dev;
330 	struct neigh_parms *parms;
331 	bool is_multicast = ipv6_addr_is_multicast(addr);
332 
333 	in6_dev = in6_dev_get(dev);
334 	if (!in6_dev) {
335 		return -EINVAL;
336 	}
337 
338 	parms = in6_dev->nd_parms;
339 	__neigh_parms_put(neigh->parms);
340 	neigh->parms = neigh_parms_clone(parms);
341 
342 	neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
343 	if (!dev->header_ops) {
344 		neigh->nud_state = NUD_NOARP;
345 		neigh->ops = &ndisc_direct_ops;
346 		neigh->output = neigh_direct_output;
347 	} else {
348 		if (is_multicast) {
349 			neigh->nud_state = NUD_NOARP;
350 			ndisc_mc_map(addr, neigh->ha, dev, 1);
351 		} else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
352 			neigh->nud_state = NUD_NOARP;
353 			memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
354 			if (dev->flags&IFF_LOOPBACK)
355 				neigh->type = RTN_LOCAL;
356 		} else if (dev->flags&IFF_POINTOPOINT) {
357 			neigh->nud_state = NUD_NOARP;
358 			memcpy(neigh->ha, dev->broadcast, dev->addr_len);
359 		}
360 		if (dev->header_ops->cache)
361 			neigh->ops = &ndisc_hh_ops;
362 		else
363 			neigh->ops = &ndisc_generic_ops;
364 		if (neigh->nud_state&NUD_VALID)
365 			neigh->output = neigh->ops->connected_output;
366 		else
367 			neigh->output = neigh->ops->output;
368 	}
369 	in6_dev_put(in6_dev);
370 	return 0;
371 }
372 
pndisc_constructor(struct pneigh_entry * n)373 static int pndisc_constructor(struct pneigh_entry *n)
374 {
375 	struct in6_addr *addr = (struct in6_addr *)&n->key;
376 	struct net_device *dev = n->dev;
377 	struct in6_addr maddr;
378 
379 	if (!dev)
380 		return -EINVAL;
381 
382 	addrconf_addr_solict_mult(addr, &maddr);
383 	return ipv6_dev_mc_inc(dev, &maddr);
384 }
385 
pndisc_destructor(struct pneigh_entry * n)386 static void pndisc_destructor(struct pneigh_entry *n)
387 {
388 	struct in6_addr *addr = (struct in6_addr *)&n->key;
389 	struct net_device *dev = n->dev;
390 	struct in6_addr maddr;
391 
392 	if (!dev)
393 		return;
394 
395 	addrconf_addr_solict_mult(addr, &maddr);
396 	ipv6_dev_mc_dec(dev, &maddr);
397 }
398 
399 /* called with rtnl held */
ndisc_allow_add(const struct net_device * dev,struct netlink_ext_ack * extack)400 static bool ndisc_allow_add(const struct net_device *dev,
401 			    struct netlink_ext_ack *extack)
402 {
403 	struct inet6_dev *idev = __in6_dev_get(dev);
404 
405 	if (!idev || idev->cnf.disable_ipv6) {
406 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device");
407 		return false;
408 	}
409 
410 	return true;
411 }
412 
ndisc_alloc_skb(struct net_device * dev,int len)413 static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
414 				       int len)
415 {
416 	int hlen = LL_RESERVED_SPACE(dev);
417 	int tlen = dev->needed_tailroom;
418 	struct sk_buff *skb;
419 
420 	skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
421 	if (!skb)
422 		return NULL;
423 
424 	skb->protocol = htons(ETH_P_IPV6);
425 	skb->dev = dev;
426 
427 	skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
428 	skb_reset_transport_header(skb);
429 
430 	/* Manually assign socket ownership as we avoid calling
431 	 * sock_alloc_send_pskb() to bypass wmem buffer limits
432 	 */
433 	rcu_read_lock();
434 	skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk);
435 	rcu_read_unlock();
436 
437 	return skb;
438 }
439 
ip6_nd_hdr(struct sk_buff * skb,const struct in6_addr * saddr,const struct in6_addr * daddr,int hop_limit,int len)440 static void ip6_nd_hdr(struct sk_buff *skb,
441 		       const struct in6_addr *saddr,
442 		       const struct in6_addr *daddr,
443 		       int hop_limit, int len)
444 {
445 	struct ipv6hdr *hdr;
446 	struct inet6_dev *idev;
447 	unsigned tclass;
448 
449 	rcu_read_lock();
450 	idev = __in6_dev_get(skb->dev);
451 	tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0;
452 	rcu_read_unlock();
453 
454 	skb_push(skb, sizeof(*hdr));
455 	skb_reset_network_header(skb);
456 	hdr = ipv6_hdr(skb);
457 
458 	ip6_flow_hdr(hdr, tclass, 0);
459 
460 	hdr->payload_len = htons(len);
461 	hdr->nexthdr = IPPROTO_ICMPV6;
462 	hdr->hop_limit = hop_limit;
463 
464 	hdr->saddr = *saddr;
465 	hdr->daddr = *daddr;
466 }
467 
ndisc_send_skb(struct sk_buff * skb,const struct in6_addr * daddr,const struct in6_addr * saddr)468 void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
469 		    const struct in6_addr *saddr)
470 {
471 	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
472 	struct dst_entry *dst = skb_dst(skb);
473 	struct net_device *dev;
474 	struct inet6_dev *idev;
475 	struct net *net;
476 	struct sock *sk;
477 	int err;
478 	u8 type;
479 
480 	type = icmp6h->icmp6_type;
481 
482 	rcu_read_lock();
483 
484 	net = dev_net_rcu(skb->dev);
485 	sk = net->ipv6.ndisc_sk;
486 	if (!dst) {
487 		struct flowi6 fl6;
488 		int oif = skb->dev->ifindex;
489 
490 		icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
491 		dst = icmp6_dst_alloc(skb->dev, &fl6);
492 		if (IS_ERR(dst)) {
493 			rcu_read_unlock();
494 			kfree_skb(skb);
495 			return;
496 		}
497 
498 		skb_dst_set(skb, dst);
499 	}
500 
501 	icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len,
502 					      IPPROTO_ICMPV6,
503 					      csum_partial(icmp6h,
504 							   skb->len, 0));
505 
506 	ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
507 
508 	dev = dst_dev(dst);
509 	idev = __in6_dev_get(dev);
510 	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
511 
512 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
513 		      net, sk, skb, NULL, dev,
514 		      dst_output);
515 	if (!err) {
516 		ICMP6MSGOUT_INC_STATS(net, idev, type);
517 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
518 	}
519 
520 	rcu_read_unlock();
521 }
522 EXPORT_SYMBOL(ndisc_send_skb);
523 
ndisc_send_na(struct net_device * dev,const struct in6_addr * daddr,const struct in6_addr * solicited_addr,bool router,bool solicited,bool override,bool inc_opt)524 void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
525 		   const struct in6_addr *solicited_addr,
526 		   bool router, bool solicited, bool override, bool inc_opt)
527 {
528 	struct sk_buff *skb;
529 	struct in6_addr tmpaddr;
530 	struct inet6_ifaddr *ifp;
531 	const struct in6_addr *src_addr;
532 	struct nd_msg *msg;
533 	int optlen = 0;
534 
535 	/* for anycast or proxy, solicited_addr != src_addr */
536 	ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
537 	if (ifp) {
538 		src_addr = solicited_addr;
539 		if (ifp->flags & IFA_F_OPTIMISTIC)
540 			override = false;
541 		inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao);
542 		in6_ifa_put(ifp);
543 	} else {
544 		if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
545 				       inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
546 				       &tmpaddr))
547 			return;
548 		src_addr = &tmpaddr;
549 	}
550 
551 	if (!dev->addr_len)
552 		inc_opt = false;
553 	if (inc_opt)
554 		optlen += ndisc_opt_addr_space(dev,
555 					       NDISC_NEIGHBOUR_ADVERTISEMENT);
556 
557 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
558 	if (!skb)
559 		return;
560 
561 	msg = skb_put(skb, sizeof(*msg));
562 	*msg = (struct nd_msg) {
563 		.icmph = {
564 			.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
565 			.icmp6_router = router,
566 			.icmp6_solicited = solicited,
567 			.icmp6_override = override,
568 		},
569 		.target = *solicited_addr,
570 	};
571 
572 	if (inc_opt)
573 		ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
574 				       dev->dev_addr,
575 				       NDISC_NEIGHBOUR_ADVERTISEMENT);
576 
577 	ndisc_send_skb(skb, daddr, src_addr);
578 }
579 
ndisc_send_unsol_na(struct net_device * dev)580 static void ndisc_send_unsol_na(struct net_device *dev)
581 {
582 	struct inet6_dev *idev;
583 	struct inet6_ifaddr *ifa;
584 
585 	idev = in6_dev_get(dev);
586 	if (!idev)
587 		return;
588 
589 	read_lock_bh(&idev->lock);
590 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
591 		/* skip tentative addresses until dad completes */
592 		if (ifa->flags & IFA_F_TENTATIVE &&
593 		    !(ifa->flags & IFA_F_OPTIMISTIC))
594 			continue;
595 
596 		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
597 			      /*router=*/ !!idev->cnf.forwarding,
598 			      /*solicited=*/ false, /*override=*/ true,
599 			      /*inc_opt=*/ true);
600 	}
601 	read_unlock_bh(&idev->lock);
602 
603 	in6_dev_put(idev);
604 }
605 
ndisc_ns_create(struct net_device * dev,const struct in6_addr * solicit,const struct in6_addr * saddr,u64 nonce)606 struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit,
607 				const struct in6_addr *saddr, u64 nonce)
608 {
609 	int inc_opt = dev->addr_len;
610 	struct sk_buff *skb;
611 	struct nd_msg *msg;
612 	int optlen = 0;
613 
614 	if (!saddr)
615 		return NULL;
616 
617 	if (ipv6_addr_any(saddr))
618 		inc_opt = false;
619 	if (inc_opt)
620 		optlen += ndisc_opt_addr_space(dev,
621 					       NDISC_NEIGHBOUR_SOLICITATION);
622 	if (nonce != 0)
623 		optlen += 8;
624 
625 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
626 	if (!skb)
627 		return NULL;
628 
629 	msg = skb_put(skb, sizeof(*msg));
630 	*msg = (struct nd_msg) {
631 		.icmph = {
632 			.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
633 		},
634 		.target = *solicit,
635 	};
636 
637 	if (inc_opt)
638 		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
639 				       dev->dev_addr,
640 				       NDISC_NEIGHBOUR_SOLICITATION);
641 	if (nonce != 0) {
642 		u8 *opt = skb_put(skb, 8);
643 
644 		opt[0] = ND_OPT_NONCE;
645 		opt[1] = 8 >> 3;
646 		memcpy(opt + 2, &nonce, 6);
647 	}
648 
649 	return skb;
650 }
651 EXPORT_SYMBOL(ndisc_ns_create);
652 
ndisc_send_ns(struct net_device * dev,const struct in6_addr * solicit,const struct in6_addr * daddr,const struct in6_addr * saddr,u64 nonce)653 void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
654 		   const struct in6_addr *daddr, const struct in6_addr *saddr,
655 		   u64 nonce)
656 {
657 	struct in6_addr addr_buf;
658 	struct sk_buff *skb;
659 
660 	if (!saddr) {
661 		if (ipv6_get_lladdr(dev, &addr_buf,
662 				    (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC)))
663 			return;
664 		saddr = &addr_buf;
665 	}
666 
667 	skb = ndisc_ns_create(dev, solicit, saddr, nonce);
668 
669 	if (skb)
670 		ndisc_send_skb(skb, daddr, saddr);
671 }
672 
ndisc_send_rs(struct net_device * dev,const struct in6_addr * saddr,const struct in6_addr * daddr)673 void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
674 		   const struct in6_addr *daddr)
675 {
676 	struct sk_buff *skb;
677 	struct rs_msg *msg;
678 	int send_sllao = dev->addr_len;
679 	int optlen = 0;
680 
681 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
682 	/*
683 	 * According to section 2.2 of RFC 4429, we must not
684 	 * send router solicitations with a sllao from
685 	 * optimistic addresses, but we may send the solicitation
686 	 * if we don't include the sllao.  So here we check
687 	 * if our address is optimistic, and if so, we
688 	 * suppress the inclusion of the sllao.
689 	 */
690 	if (send_sllao) {
691 		struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
692 							   dev, 1);
693 		if (ifp) {
694 			if (ifp->flags & IFA_F_OPTIMISTIC)  {
695 				send_sllao = 0;
696 			}
697 			in6_ifa_put(ifp);
698 		} else {
699 			send_sllao = 0;
700 		}
701 	}
702 #endif
703 	if (send_sllao)
704 		optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION);
705 
706 	skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
707 	if (!skb)
708 		return;
709 
710 	msg = skb_put(skb, sizeof(*msg));
711 	*msg = (struct rs_msg) {
712 		.icmph = {
713 			.icmp6_type = NDISC_ROUTER_SOLICITATION,
714 		},
715 	};
716 
717 	if (send_sllao)
718 		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
719 				       dev->dev_addr,
720 				       NDISC_ROUTER_SOLICITATION);
721 
722 	ndisc_send_skb(skb, daddr, saddr);
723 }
724 
725 
ndisc_error_report(struct neighbour * neigh,struct sk_buff * skb)726 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
727 {
728 	/*
729 	 *	"The sender MUST return an ICMP
730 	 *	 destination unreachable"
731 	 */
732 	dst_link_failure(skb);
733 	kfree_skb(skb);
734 }
735 
736 /* Called with locked neigh: either read or both */
737 
ndisc_solicit(struct neighbour * neigh,struct sk_buff * skb)738 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
739 {
740 	struct in6_addr *saddr = NULL;
741 	struct in6_addr mcaddr;
742 	struct net_device *dev = neigh->dev;
743 	struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
744 	int probes = atomic_read(&neigh->probes);
745 
746 	if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
747 					   dev, false, 1,
748 					   IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
749 		saddr = &ipv6_hdr(skb)->saddr;
750 	probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
751 	if (probes < 0) {
752 		if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) {
753 			net_dbg_ratelimited("%s: trying to ucast probe in NUD_INVALID: %pI6\n",
754 					    __func__, target);
755 		}
756 		ndisc_send_ns(dev, target, target, saddr, 0);
757 	} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
758 		neigh_app_ns(neigh);
759 	} else {
760 		addrconf_addr_solict_mult(target, &mcaddr);
761 		ndisc_send_ns(dev, target, &mcaddr, saddr, 0);
762 	}
763 }
764 
pndisc_is_router(const void * pkey,struct net_device * dev)765 static int pndisc_is_router(const void *pkey,
766 			    struct net_device *dev)
767 {
768 	struct pneigh_entry *n;
769 	int ret = -1;
770 
771 	n = pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
772 	if (n)
773 		ret = !!(READ_ONCE(n->flags) & NTF_ROUTER);
774 
775 	return ret;
776 }
777 
ndisc_update(const struct net_device * dev,struct neighbour * neigh,const u8 * lladdr,u8 new,u32 flags,u8 icmp6_type,struct ndisc_options * ndopts)778 void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
779 		  const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
780 		  struct ndisc_options *ndopts)
781 {
782 	neigh_update(neigh, lladdr, new, flags, 0);
783 	/* report ndisc ops about neighbour update */
784 	ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
785 }
786 
ndisc_recv_ns(struct sk_buff * skb)787 static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb)
788 {
789 	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
790 	const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
791 	const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
792 	u8 *lladdr = NULL;
793 	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
794 				    offsetof(struct nd_msg, opt));
795 	struct ndisc_options ndopts;
796 	struct net_device *dev = skb->dev;
797 	struct inet6_ifaddr *ifp;
798 	struct inet6_dev *idev = NULL;
799 	struct neighbour *neigh;
800 	int dad = ipv6_addr_any(saddr);
801 	int is_router = -1;
802 	SKB_DR(reason);
803 	u64 nonce = 0;
804 	bool inc;
805 
806 	if (skb->len < sizeof(struct nd_msg))
807 		return SKB_DROP_REASON_PKT_TOO_SMALL;
808 
809 	if (ipv6_addr_is_multicast(&msg->target)) {
810 		net_dbg_ratelimited("NS: multicast target address\n");
811 		return reason;
812 	}
813 
814 	/*
815 	 * RFC2461 7.1.1:
816 	 * DAD has to be destined for solicited node multicast address.
817 	 */
818 	if (dad && !ipv6_addr_is_solict_mult(daddr)) {
819 		net_dbg_ratelimited("NS: bad DAD packet (wrong destination)\n");
820 		return reason;
821 	}
822 
823 	if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
824 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
825 
826 	if (ndopts.nd_opts_src_lladdr) {
827 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
828 		if (!lladdr) {
829 			net_dbg_ratelimited("NS: invalid link-layer address length\n");
830 			return reason;
831 		}
832 
833 		/* RFC2461 7.1.1:
834 		 *	If the IP source address is the unspecified address,
835 		 *	there MUST NOT be source link-layer address option
836 		 *	in the message.
837 		 */
838 		if (dad) {
839 			net_dbg_ratelimited("NS: bad DAD packet (link-layer address option)\n");
840 			return reason;
841 		}
842 	}
843 	if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1)
844 		memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6);
845 
846 	inc = ipv6_addr_is_multicast(daddr);
847 
848 	ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
849 	if (ifp) {
850 have_ifp:
851 		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
852 			if (dad) {
853 				if (nonce != 0 && ifp->dad_nonce == nonce) {
854 					u8 *np = (u8 *)&nonce;
855 					/* Matching nonce if looped back */
856 					net_dbg_ratelimited("%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n",
857 							    ifp->idev->dev->name, &ifp->addr, np);
858 					goto out;
859 				}
860 				/*
861 				 * We are colliding with another node
862 				 * who is doing DAD
863 				 * so fail our DAD process
864 				 */
865 				addrconf_dad_failure(skb, ifp);
866 				return reason;
867 			} else {
868 				/*
869 				 * This is not a dad solicitation.
870 				 * If we are an optimistic node,
871 				 * we should respond.
872 				 * Otherwise, we should ignore it.
873 				 */
874 				if (!(ifp->flags & IFA_F_OPTIMISTIC))
875 					goto out;
876 			}
877 		}
878 
879 		idev = ifp->idev;
880 	} else {
881 		struct net *net = dev_net(dev);
882 
883 		/* perhaps an address on the master device */
884 		if (netif_is_l3_slave(dev)) {
885 			struct net_device *mdev;
886 
887 			mdev = netdev_master_upper_dev_get_rcu(dev);
888 			if (mdev) {
889 				ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1);
890 				if (ifp)
891 					goto have_ifp;
892 			}
893 		}
894 
895 		idev = in6_dev_get(dev);
896 		if (!idev) {
897 			/* XXX: count this drop? */
898 			return reason;
899 		}
900 
901 		if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
902 		    (READ_ONCE(idev->cnf.forwarding) &&
903 		     (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) ||
904 		      READ_ONCE(idev->cnf.proxy_ndp)) &&
905 		     (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
906 			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
907 			    skb->pkt_type != PACKET_HOST &&
908 			    inc &&
909 			    NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) {
910 				/*
911 				 * for anycast or proxy,
912 				 * sender should delay its response
913 				 * by a random time between 0 and
914 				 * MAX_ANYCAST_DELAY_TIME seconds.
915 				 * (RFC2461) -- yoshfuji
916 				 */
917 				struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
918 				if (n)
919 					pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
920 				goto out;
921 			}
922 		} else {
923 			SKB_DR_SET(reason, IPV6_NDISC_NS_OTHERHOST);
924 			goto out;
925 		}
926 	}
927 
928 	if (is_router < 0)
929 		is_router = READ_ONCE(idev->cnf.forwarding);
930 
931 	if (dad) {
932 		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
933 			      !!is_router, false, (ifp != NULL), true);
934 		goto out;
935 	}
936 
937 	if (inc)
938 		NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
939 	else
940 		NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
941 
942 	/*
943 	 *	update / create cache entry
944 	 *	for the source address
945 	 */
946 	neigh = __neigh_lookup(&nd_tbl, saddr, dev,
947 			       !inc || lladdr || !dev->addr_len);
948 	if (neigh)
949 		ndisc_update(dev, neigh, lladdr, NUD_STALE,
950 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
951 			     NEIGH_UPDATE_F_OVERRIDE,
952 			     NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
953 	if (neigh || !dev->header_ops) {
954 		ndisc_send_na(dev, saddr, &msg->target, !!is_router,
955 			      true, (ifp != NULL && inc), inc);
956 		if (neigh)
957 			neigh_release(neigh);
958 		reason = SKB_CONSUMED;
959 	}
960 
961 out:
962 	if (ifp)
963 		in6_ifa_put(ifp);
964 	else
965 		in6_dev_put(idev);
966 	return reason;
967 }
968 
accept_untracked_na(struct net_device * dev,struct in6_addr * saddr)969 static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr)
970 {
971 	struct inet6_dev *idev = __in6_dev_get(dev);
972 
973 	switch (READ_ONCE(idev->cnf.accept_untracked_na)) {
974 	case 0: /* Don't accept untracked na (absent in neighbor cache) */
975 		return 0;
976 	case 1: /* Create new entries from na if currently untracked */
977 		return 1;
978 	case 2: /* Create new entries from untracked na only if saddr is in the
979 		 * same subnet as an address configured on the interface that
980 		 * received the na
981 		 */
982 		return !!ipv6_chk_prefix(saddr, dev);
983 	default:
984 		return 0;
985 	}
986 }
987 
ndisc_recv_na(struct sk_buff * skb)988 static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
989 {
990 	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
991 	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
992 	const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
993 	u8 *lladdr = NULL;
994 	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
995 				    offsetof(struct nd_msg, opt));
996 	struct ndisc_options ndopts;
997 	struct net_device *dev = skb->dev;
998 	struct inet6_dev *idev = __in6_dev_get(dev);
999 	struct inet6_ifaddr *ifp;
1000 	struct neighbour *neigh;
1001 	SKB_DR(reason);
1002 	u8 new_state;
1003 
1004 	if (skb->len < sizeof(struct nd_msg))
1005 		return SKB_DROP_REASON_PKT_TOO_SMALL;
1006 
1007 	if (ipv6_addr_is_multicast(&msg->target)) {
1008 		net_dbg_ratelimited("NA: target address is multicast\n");
1009 		return reason;
1010 	}
1011 
1012 	if (ipv6_addr_is_multicast(daddr) &&
1013 	    msg->icmph.icmp6_solicited) {
1014 		net_dbg_ratelimited("NA: solicited NA is multicasted\n");
1015 		return reason;
1016 	}
1017 
1018 	/* For some 802.11 wireless deployments (and possibly other networks),
1019 	 * there will be a NA proxy and unsolicitd packets are attacks
1020 	 * and thus should not be accepted.
1021 	 * drop_unsolicited_na takes precedence over accept_untracked_na
1022 	 */
1023 	if (!msg->icmph.icmp6_solicited && idev &&
1024 	    READ_ONCE(idev->cnf.drop_unsolicited_na))
1025 		return reason;
1026 
1027 	if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
1028 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
1029 
1030 	if (ndopts.nd_opts_tgt_lladdr) {
1031 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
1032 		if (!lladdr) {
1033 			net_dbg_ratelimited("NA: invalid link-layer address length\n");
1034 			return reason;
1035 		}
1036 	}
1037 	ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
1038 	if (ifp) {
1039 		if (skb->pkt_type != PACKET_LOOPBACK
1040 		    && (ifp->flags & IFA_F_TENTATIVE)) {
1041 				addrconf_dad_failure(skb, ifp);
1042 				return reason;
1043 		}
1044 		/* What should we make now? The advertisement
1045 		   is invalid, but ndisc specs say nothing
1046 		   about it. It could be misconfiguration, or
1047 		   an smart proxy agent tries to help us :-)
1048 
1049 		   We should not print the error if NA has been
1050 		   received from loopback - it is just our own
1051 		   unsolicited advertisement.
1052 		 */
1053 		if (skb->pkt_type != PACKET_LOOPBACK)
1054 			net_warn_ratelimited("NA: %pM advertised our address %pI6c on %s!\n",
1055 					     eth_hdr(skb)->h_source, &ifp->addr,
1056 					     ifp->idev->dev->name);
1057 		in6_ifa_put(ifp);
1058 		return reason;
1059 	}
1060 
1061 	neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
1062 
1063 	/* RFC 9131 updates original Neighbour Discovery RFC 4861.
1064 	 * NAs with Target LL Address option without a corresponding
1065 	 * entry in the neighbour cache can now create a STALE neighbour
1066 	 * cache entry on routers.
1067 	 *
1068 	 *   entry accept  fwding  solicited        behaviour
1069 	 * ------- ------  ------  ---------    ----------------------
1070 	 * present      X       X         0     Set state to STALE
1071 	 * present      X       X         1     Set state to REACHABLE
1072 	 *  absent      0       X         X     Do nothing
1073 	 *  absent      1       0         X     Do nothing
1074 	 *  absent      1       1         X     Add a new STALE entry
1075 	 *
1076 	 * Note that we don't do a (daddr == all-routers-mcast) check.
1077 	 */
1078 	new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE;
1079 	if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) {
1080 		if (accept_untracked_na(dev, saddr)) {
1081 			neigh = neigh_create(&nd_tbl, &msg->target, dev);
1082 			new_state = NUD_STALE;
1083 		}
1084 	}
1085 
1086 	if (neigh && !IS_ERR(neigh)) {
1087 		u8 old_flags = neigh->flags;
1088 		struct net *net = dev_net(dev);
1089 
1090 		if (READ_ONCE(neigh->nud_state) & NUD_FAILED)
1091 			goto out;
1092 
1093 		/*
1094 		 * Don't update the neighbor cache entry on a proxy NA from
1095 		 * ourselves because either the proxied node is off link or it
1096 		 * has already sent a NA to us.
1097 		 */
1098 		if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
1099 		    READ_ONCE(net->ipv6.devconf_all->forwarding) &&
1100 		    READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
1101 		    pneigh_lookup(&nd_tbl, net, &msg->target, dev)) {
1102 			/* XXX: idev->cnf.proxy_ndp */
1103 			goto out;
1104 		}
1105 
1106 		ndisc_update(dev, neigh, lladdr,
1107 			     new_state,
1108 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1109 			     (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
1110 			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1111 			     (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0),
1112 			     NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts);
1113 
1114 		if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
1115 			/*
1116 			 * Change: router to host
1117 			 */
1118 			rt6_clean_tohost(dev_net(dev),  saddr);
1119 		}
1120 		reason = SKB_CONSUMED;
1121 out:
1122 		neigh_release(neigh);
1123 	}
1124 	return reason;
1125 }
1126 
ndisc_recv_rs(struct sk_buff * skb)1127 static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb)
1128 {
1129 	struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
1130 	unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
1131 	struct neighbour *neigh;
1132 	struct inet6_dev *idev;
1133 	const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
1134 	struct ndisc_options ndopts;
1135 	u8 *lladdr = NULL;
1136 	SKB_DR(reason);
1137 
1138 	if (skb->len < sizeof(*rs_msg))
1139 		return SKB_DROP_REASON_PKT_TOO_SMALL;
1140 
1141 	idev = __in6_dev_get(skb->dev);
1142 	if (!idev) {
1143 		net_err_ratelimited("RS: can't find in6 device\n");
1144 		return reason;
1145 	}
1146 
1147 	/* Don't accept RS if we're not in router mode */
1148 	if (!READ_ONCE(idev->cnf.forwarding))
1149 		goto out;
1150 
1151 	/*
1152 	 * Don't update NCE if src = ::;
1153 	 * this implies that the source node has no ip address assigned yet.
1154 	 */
1155 	if (ipv6_addr_any(saddr))
1156 		goto out;
1157 
1158 	/* Parse ND options */
1159 	if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts))
1160 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
1161 
1162 	if (ndopts.nd_opts_src_lladdr) {
1163 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1164 					     skb->dev);
1165 		if (!lladdr)
1166 			goto out;
1167 	}
1168 
1169 	neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
1170 	if (neigh) {
1171 		ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1172 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1173 			     NEIGH_UPDATE_F_OVERRIDE|
1174 			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER,
1175 			     NDISC_ROUTER_SOLICITATION, &ndopts);
1176 		neigh_release(neigh);
1177 		reason = SKB_CONSUMED;
1178 	}
1179 out:
1180 	return reason;
1181 }
1182 
ndisc_ra_useropt(struct sk_buff * ra,struct nd_opt_hdr * opt)1183 static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1184 {
1185 	struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
1186 	struct sk_buff *skb;
1187 	struct nlmsghdr *nlh;
1188 	struct nduseroptmsg *ndmsg;
1189 	struct net *net = dev_net(ra->dev);
1190 	int err;
1191 	int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
1192 				    + (opt->nd_opt_len << 3));
1193 	size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
1194 
1195 	skb = nlmsg_new(msg_size, GFP_ATOMIC);
1196 	if (!skb) {
1197 		err = -ENOBUFS;
1198 		goto errout;
1199 	}
1200 
1201 	nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
1202 	if (!nlh) {
1203 		goto nla_put_failure;
1204 	}
1205 
1206 	ndmsg = nlmsg_data(nlh);
1207 	ndmsg->nduseropt_family = AF_INET6;
1208 	ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1209 	ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
1210 	ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
1211 	ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
1212 
1213 	memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
1214 
1215 	if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr))
1216 		goto nla_put_failure;
1217 	nlmsg_end(skb, nlh);
1218 
1219 	rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
1220 	return;
1221 
1222 nla_put_failure:
1223 	nlmsg_free(skb);
1224 	err = -EMSGSIZE;
1225 errout:
1226 	rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1227 }
1228 
ndisc_router_discovery(struct sk_buff * skb)1229 static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
1230 {
1231 	struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
1232 	bool send_ifinfo_notify = false;
1233 	struct neighbour *neigh = NULL;
1234 	struct ndisc_options ndopts;
1235 	struct fib6_info *rt = NULL;
1236 	struct inet6_dev *in6_dev;
1237 	struct fib6_table *table;
1238 	u32 defrtr_usr_metric;
1239 	unsigned int pref = 0;
1240 	__u32 old_if_flags;
1241 	struct net *net;
1242 	SKB_DR(reason);
1243 	int lifetime;
1244 	int optlen;
1245 
1246 	__u8 *opt = (__u8 *)(ra_msg + 1);
1247 
1248 	optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
1249 		sizeof(struct ra_msg);
1250 
1251 	net_dbg_ratelimited("RA: %s, dev: %s\n", __func__, skb->dev->name);
1252 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1253 		net_dbg_ratelimited("RA: source address is not link-local\n");
1254 		return reason;
1255 	}
1256 	if (optlen < 0)
1257 		return SKB_DROP_REASON_PKT_TOO_SMALL;
1258 
1259 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1260 	if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
1261 		net_dbg_ratelimited("RA: from host or unauthorized router\n");
1262 		return reason;
1263 	}
1264 #endif
1265 
1266 	in6_dev = __in6_dev_get(skb->dev);
1267 	if (!in6_dev) {
1268 		net_err_ratelimited("RA: can't find inet6 device for %s\n", skb->dev->name);
1269 		return reason;
1270 	}
1271 
1272 	if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts))
1273 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
1274 
1275 	if (!ipv6_accept_ra(in6_dev)) {
1276 		net_dbg_ratelimited("RA: %s, did not accept ra for dev: %s\n", __func__,
1277 				    skb->dev->name);
1278 		goto skip_linkparms;
1279 	}
1280 
1281 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1282 	/* skip link-specific parameters from interior routers */
1283 	if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
1284 		net_dbg_ratelimited("RA: %s, nodetype is NODEFAULT, dev: %s\n", __func__,
1285 				    skb->dev->name);
1286 		goto skip_linkparms;
1287 	}
1288 #endif
1289 
1290 	if (in6_dev->if_flags & IF_RS_SENT) {
1291 		/*
1292 		 *	flag that an RA was received after an RS was sent
1293 		 *	out on this interface.
1294 		 */
1295 		in6_dev->if_flags |= IF_RA_RCVD;
1296 	}
1297 
1298 	/*
1299 	 * Remember the managed/otherconf flags from most recently
1300 	 * received RA message (RFC 2462) -- yoshfuji
1301 	 */
1302 	old_if_flags = in6_dev->if_flags;
1303 	in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
1304 				IF_RA_OTHERCONF)) |
1305 				(ra_msg->icmph.icmp6_addrconf_managed ?
1306 					IF_RA_MANAGED : 0) |
1307 				(ra_msg->icmph.icmp6_addrconf_other ?
1308 					IF_RA_OTHERCONF : 0);
1309 
1310 	if (old_if_flags != in6_dev->if_flags)
1311 		send_ifinfo_notify = true;
1312 
1313 	if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) {
1314 		net_dbg_ratelimited("RA: %s, defrtr is false for dev: %s\n", __func__,
1315 				    skb->dev->name);
1316 		goto skip_defrtr;
1317 	}
1318 
1319 	lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1320 	if (lifetime != 0 &&
1321 	    lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) {
1322 		net_dbg_ratelimited("RA: router lifetime (%ds) is too short: %s\n", lifetime,
1323 				    skb->dev->name);
1324 		goto skip_defrtr;
1325 	}
1326 
1327 	/* Do not accept RA with source-addr found on local machine unless
1328 	 * accept_ra_from_local is set to true.
1329 	 */
1330 	net = dev_net(in6_dev->dev);
1331 	if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
1332 	    ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
1333 		net_dbg_ratelimited("RA from local address detected on dev: %s: default router ignored\n",
1334 				    skb->dev->name);
1335 		goto skip_defrtr;
1336 	}
1337 
1338 #ifdef CONFIG_IPV6_ROUTER_PREF
1339 	pref = ra_msg->icmph.icmp6_router_pref;
1340 	/* 10b is handled as if it were 00b (medium) */
1341 	if (pref == ICMPV6_ROUTER_PREF_INVALID ||
1342 	    !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref))
1343 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
1344 #endif
1345 	/* routes added from RAs do not use nexthop objects */
1346 	rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
1347 	if (rt) {
1348 		neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
1349 					 rt->fib6_nh->fib_nh_dev, NULL,
1350 					  &ipv6_hdr(skb)->saddr);
1351 		if (!neigh) {
1352 			net_err_ratelimited("RA: %s got default router without neighbour\n",
1353 					    __func__);
1354 			fib6_info_release(rt);
1355 			return reason;
1356 		}
1357 	}
1358 	/* Set default route metric as specified by user */
1359 	defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric;
1360 	/* delete the route if lifetime is 0 or if metric needs change */
1361 	if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) {
1362 		ip6_del_rt(net, rt, false);
1363 		rt = NULL;
1364 	}
1365 
1366 	net_dbg_ratelimited("RA: rt: %p  lifetime: %d, metric: %d, for dev: %s\n", rt, lifetime,
1367 			    defrtr_usr_metric, skb->dev->name);
1368 	if (!rt && lifetime) {
1369 		net_dbg_ratelimited("RA: adding default router\n");
1370 
1371 		if (neigh)
1372 			neigh_release(neigh);
1373 
1374 		rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
1375 					 skb->dev, pref, defrtr_usr_metric,
1376 					 lifetime);
1377 		if (!rt) {
1378 			net_err_ratelimited("RA: %s failed to add default route\n", __func__);
1379 			return reason;
1380 		}
1381 
1382 		neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
1383 					 rt->fib6_nh->fib_nh_dev, NULL,
1384 					  &ipv6_hdr(skb)->saddr);
1385 		if (!neigh) {
1386 			net_err_ratelimited("RA: %s got default router without neighbour\n",
1387 					    __func__);
1388 			fib6_info_release(rt);
1389 			return reason;
1390 		}
1391 		neigh->flags |= NTF_ROUTER;
1392 	} else if (rt && IPV6_EXTRACT_PREF(rt->fib6_flags) != pref) {
1393 		struct nl_info nlinfo = {
1394 			.nl_net = net,
1395 		};
1396 		rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1397 		inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
1398 	}
1399 
1400 	if (rt) {
1401 		table = rt->fib6_table;
1402 		spin_lock_bh(&table->tb6_lock);
1403 
1404 		fib6_set_expires(rt, jiffies + (HZ * lifetime));
1405 		fib6_add_gc_list(rt);
1406 
1407 		spin_unlock_bh(&table->tb6_lock);
1408 	}
1409 	if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 &&
1410 	    ra_msg->icmph.icmp6_hop_limit) {
1411 		if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <=
1412 		    ra_msg->icmph.icmp6_hop_limit) {
1413 			WRITE_ONCE(in6_dev->cnf.hop_limit,
1414 				   ra_msg->icmph.icmp6_hop_limit);
1415 			fib6_metric_set(rt, RTAX_HOPLIMIT,
1416 					ra_msg->icmph.icmp6_hop_limit);
1417 		} else {
1418 			net_dbg_ratelimited("RA: Got route advertisement with lower hop_limit than minimum\n");
1419 		}
1420 	}
1421 
1422 skip_defrtr:
1423 
1424 	/*
1425 	 *	Update Reachable Time and Retrans Timer
1426 	 */
1427 
1428 	if (in6_dev->nd_parms) {
1429 		unsigned long rtime = ntohl(ra_msg->retrans_timer);
1430 
1431 		if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
1432 			rtime = (rtime*HZ)/1000;
1433 			if (rtime < HZ/100)
1434 				rtime = HZ/100;
1435 			NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime);
1436 			in6_dev->tstamp = jiffies;
1437 			send_ifinfo_notify = true;
1438 		}
1439 
1440 		rtime = ntohl(ra_msg->reachable_time);
1441 		if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
1442 			rtime = (rtime*HZ)/1000;
1443 
1444 			if (rtime < HZ/10)
1445 				rtime = HZ/10;
1446 
1447 			if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) {
1448 				NEIGH_VAR_SET(in6_dev->nd_parms,
1449 					      BASE_REACHABLE_TIME, rtime);
1450 				NEIGH_VAR_SET(in6_dev->nd_parms,
1451 					      GC_STALETIME, 3 * rtime);
1452 				in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
1453 				in6_dev->tstamp = jiffies;
1454 				send_ifinfo_notify = true;
1455 			}
1456 		}
1457 	}
1458 
1459 skip_linkparms:
1460 
1461 	/*
1462 	 *	Process options.
1463 	 */
1464 
1465 	if (!neigh)
1466 		neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
1467 				       skb->dev, 1);
1468 	if (neigh) {
1469 		u8 *lladdr = NULL;
1470 		if (ndopts.nd_opts_src_lladdr) {
1471 			lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1472 						     skb->dev);
1473 			if (!lladdr) {
1474 				net_dbg_ratelimited("RA: invalid link-layer address length\n");
1475 				goto out;
1476 			}
1477 		}
1478 		ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1479 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1480 			     NEIGH_UPDATE_F_OVERRIDE|
1481 			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1482 			     NEIGH_UPDATE_F_ISROUTER,
1483 			     NDISC_ROUTER_ADVERTISEMENT, &ndopts);
1484 		reason = SKB_CONSUMED;
1485 	}
1486 
1487 	if (!ipv6_accept_ra(in6_dev)) {
1488 		net_dbg_ratelimited("RA: %s, accept_ra is false for dev: %s\n", __func__,
1489 				    skb->dev->name);
1490 		goto out;
1491 	}
1492 
1493 #ifdef CONFIG_IPV6_ROUTE_INFO
1494 	if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
1495 	    ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
1496 			  in6_dev->dev, 0)) {
1497 		net_dbg_ratelimited("RA from local address detected on dev: %s: router info ignored.\n",
1498 				    skb->dev->name);
1499 		goto skip_routeinfo;
1500 	}
1501 
1502 	if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) {
1503 		struct nd_opt_hdr *p;
1504 		for (p = ndopts.nd_opts_ri;
1505 		     p;
1506 		     p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
1507 			struct route_info *ri = (struct route_info *)p;
1508 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1509 			if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
1510 			    ri->prefix_len == 0)
1511 				continue;
1512 #endif
1513 			if (ri->prefix_len == 0 &&
1514 			    !READ_ONCE(in6_dev->cnf.accept_ra_defrtr))
1515 				continue;
1516 			if (ri->lifetime != 0 &&
1517 			    ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft))
1518 				continue;
1519 			if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen))
1520 				continue;
1521 			if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen))
1522 				continue;
1523 			rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
1524 				      &ipv6_hdr(skb)->saddr);
1525 		}
1526 	}
1527 
1528 skip_routeinfo:
1529 #endif
1530 
1531 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1532 	/* skip link-specific ndopts from interior routers */
1533 	if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
1534 		net_dbg_ratelimited("RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n",
1535 				    __func__, skb->dev->name);
1536 		goto out;
1537 	}
1538 #endif
1539 
1540 	if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) {
1541 		struct nd_opt_hdr *p;
1542 		for (p = ndopts.nd_opts_pi;
1543 		     p;
1544 		     p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1545 			addrconf_prefix_rcv(skb->dev, (u8 *)p,
1546 					    (p->nd_opt_len) << 3,
1547 					    ndopts.nd_opts_src_lladdr != NULL);
1548 		}
1549 	}
1550 
1551 	if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) {
1552 		__be32 n;
1553 		u32 mtu;
1554 
1555 		memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
1556 		mtu = ntohl(n);
1557 
1558 		if (in6_dev->ra_mtu != mtu) {
1559 			in6_dev->ra_mtu = mtu;
1560 			send_ifinfo_notify = true;
1561 		}
1562 
1563 		if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1564 			net_dbg_ratelimited("RA: invalid mtu: %d\n", mtu);
1565 		} else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) {
1566 			WRITE_ONCE(in6_dev->cnf.mtu6, mtu);
1567 			fib6_metric_set(rt, RTAX_MTU, mtu);
1568 			rt6_mtu_change(skb->dev, mtu);
1569 		}
1570 	}
1571 
1572 	if (ndopts.nd_useropts) {
1573 		struct nd_opt_hdr *p;
1574 		for (p = ndopts.nd_useropts;
1575 		     p;
1576 		     p = ndisc_next_useropt(skb->dev, p,
1577 					    ndopts.nd_useropts_end)) {
1578 			ndisc_ra_useropt(skb, p);
1579 		}
1580 	}
1581 
1582 	if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1583 		net_dbg_ratelimited("RA: invalid RA options\n");
1584 	}
1585 out:
1586 	/* Send a notify if RA changed managed/otherconf flags or
1587 	 * timer settings or ra_mtu value
1588 	 */
1589 	if (send_ifinfo_notify)
1590 		inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1591 
1592 	fib6_info_release(rt);
1593 	if (neigh)
1594 		neigh_release(neigh);
1595 	return reason;
1596 }
1597 
ndisc_redirect_rcv(struct sk_buff * skb)1598 static enum skb_drop_reason ndisc_redirect_rcv(struct sk_buff *skb)
1599 {
1600 	struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
1601 	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
1602 				    offsetof(struct rd_msg, opt));
1603 	struct ndisc_options ndopts;
1604 	SKB_DR(reason);
1605 	u8 *hdr;
1606 
1607 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1608 	switch (skb->ndisc_nodetype) {
1609 	case NDISC_NODETYPE_HOST:
1610 	case NDISC_NODETYPE_NODEFAULT:
1611 		net_dbg_ratelimited("Redirect: from host or unauthorized router\n");
1612 		return reason;
1613 	}
1614 #endif
1615 
1616 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1617 		net_dbg_ratelimited("Redirect: source address is not link-local\n");
1618 		return reason;
1619 	}
1620 
1621 	if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts))
1622 		return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
1623 
1624 	if (!ndopts.nd_opts_rh) {
1625 		ip6_redirect_no_header(skb, dev_net(skb->dev),
1626 					skb->dev->ifindex);
1627 		return reason;
1628 	}
1629 
1630 	hdr = (u8 *)ndopts.nd_opts_rh;
1631 	hdr += 8;
1632 	if (!pskb_pull(skb, hdr - skb_transport_header(skb)))
1633 		return SKB_DROP_REASON_PKT_TOO_SMALL;
1634 
1635 	return icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
1636 }
1637 
ndisc_fill_redirect_hdr_option(struct sk_buff * skb,struct sk_buff * orig_skb,int rd_len)1638 static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
1639 					   struct sk_buff *orig_skb,
1640 					   int rd_len)
1641 {
1642 	u8 *opt = skb_put(skb, rd_len);
1643 
1644 	memset(opt, 0, 8);
1645 	*(opt++) = ND_OPT_REDIRECT_HDR;
1646 	*(opt++) = (rd_len >> 3);
1647 	opt += 6;
1648 
1649 	skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt,
1650 		      rd_len - 8);
1651 }
1652 
ndisc_send_redirect(struct sk_buff * skb,const struct in6_addr * target)1653 void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1654 {
1655 	struct net_device *dev = skb->dev;
1656 	struct net *net = dev_net_rcu(dev);
1657 	struct sock *sk = net->ipv6.ndisc_sk;
1658 	int optlen = 0;
1659 	struct inet_peer *peer;
1660 	struct sk_buff *buff;
1661 	struct rd_msg *msg;
1662 	struct in6_addr saddr_buf;
1663 	struct rt6_info *rt;
1664 	struct dst_entry *dst;
1665 	struct flowi6 fl6;
1666 	int rd_len;
1667 	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
1668 	   ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
1669 	bool ret;
1670 
1671 	if (netif_is_l3_master(dev)) {
1672 		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
1673 		if (!dev)
1674 			return;
1675 	}
1676 
1677 	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1678 		net_dbg_ratelimited("Redirect: no link-local address on %s\n", dev->name);
1679 		return;
1680 	}
1681 
1682 	if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1683 	    ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1684 		net_dbg_ratelimited("Redirect: target address is not link-local unicast\n");
1685 		return;
1686 	}
1687 
1688 	icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
1689 			 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1690 
1691 	dst = ip6_route_output(net, NULL, &fl6);
1692 	if (dst->error) {
1693 		dst_release(dst);
1694 		return;
1695 	}
1696 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1697 	if (IS_ERR(dst))
1698 		return;
1699 
1700 	rt = dst_rt6_info(dst);
1701 
1702 	if (rt->rt6i_flags & RTF_GATEWAY) {
1703 		net_dbg_ratelimited("Redirect: destination is not a neighbour\n");
1704 		goto release;
1705 	}
1706 
1707 	peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
1708 	ret = inet_peer_xrlim_allow(peer, 1*HZ);
1709 
1710 	if (!ret)
1711 		goto release;
1712 
1713 	if (dev->addr_len) {
1714 		struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target);
1715 		if (!neigh) {
1716 			net_dbg_ratelimited("Redirect: no neigh for target address\n");
1717 			goto release;
1718 		}
1719 
1720 		read_lock_bh(&neigh->lock);
1721 		if (neigh->nud_state & NUD_VALID) {
1722 			memcpy(ha_buf, neigh->ha, dev->addr_len);
1723 			read_unlock_bh(&neigh->lock);
1724 			ha = ha_buf;
1725 			optlen += ndisc_redirect_opt_addr_space(dev, neigh,
1726 								ops_data_buf,
1727 								&ops_data);
1728 		} else
1729 			read_unlock_bh(&neigh->lock);
1730 
1731 		neigh_release(neigh);
1732 	}
1733 
1734 	rd_len = min_t(unsigned int,
1735 		       IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen,
1736 		       skb->len + 8);
1737 	rd_len &= ~0x7;
1738 	optlen += rd_len;
1739 
1740 	buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
1741 	if (!buff)
1742 		goto release;
1743 
1744 	msg = skb_put(buff, sizeof(*msg));
1745 	*msg = (struct rd_msg) {
1746 		.icmph = {
1747 			.icmp6_type = NDISC_REDIRECT,
1748 		},
1749 		.target = *target,
1750 		.dest = ipv6_hdr(skb)->daddr,
1751 	};
1752 
1753 	/*
1754 	 *	include target_address option
1755 	 */
1756 
1757 	if (ha)
1758 		ndisc_fill_redirect_addr_option(buff, ha, ops_data);
1759 
1760 	/*
1761 	 *	build redirect option and copy skb over to the new packet.
1762 	 */
1763 
1764 	if (rd_len)
1765 		ndisc_fill_redirect_hdr_option(buff, skb, rd_len);
1766 
1767 	skb_dst_set(buff, dst);
1768 	ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf);
1769 	return;
1770 
1771 release:
1772 	dst_release(dst);
1773 }
1774 
pndisc_redo(struct sk_buff * skb)1775 static void pndisc_redo(struct sk_buff *skb)
1776 {
1777 	enum skb_drop_reason reason = ndisc_recv_ns(skb);
1778 
1779 	kfree_skb_reason(skb, reason);
1780 }
1781 
ndisc_is_multicast(const void * pkey)1782 static int ndisc_is_multicast(const void *pkey)
1783 {
1784 	return ipv6_addr_is_multicast((struct in6_addr *)pkey);
1785 }
1786 
ndisc_suppress_frag_ndisc(struct sk_buff * skb)1787 static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
1788 {
1789 	struct inet6_dev *idev = __in6_dev_get(skb->dev);
1790 
1791 	if (!idev)
1792 		return true;
1793 	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
1794 	    READ_ONCE(idev->cnf.suppress_frag_ndisc)) {
1795 		net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
1796 		return true;
1797 	}
1798 	return false;
1799 }
1800 
ndisc_rcv(struct sk_buff * skb)1801 enum skb_drop_reason ndisc_rcv(struct sk_buff *skb)
1802 {
1803 	struct nd_msg *msg;
1804 	SKB_DR(reason);
1805 
1806 	if (ndisc_suppress_frag_ndisc(skb))
1807 		return SKB_DROP_REASON_IPV6_NDISC_FRAG;
1808 
1809 	if (skb_linearize(skb))
1810 		return SKB_DROP_REASON_NOMEM;
1811 
1812 	msg = (struct nd_msg *)skb_transport_header(skb);
1813 
1814 	__skb_push(skb, skb->data - skb_transport_header(skb));
1815 
1816 	if (ipv6_hdr(skb)->hop_limit != 255) {
1817 		net_dbg_ratelimited("NDISC: invalid hop-limit: %d\n", ipv6_hdr(skb)->hop_limit);
1818 		return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT;
1819 	}
1820 
1821 	if (msg->icmph.icmp6_code != 0) {
1822 		net_dbg_ratelimited("NDISC: invalid ICMPv6 code: %d\n", msg->icmph.icmp6_code);
1823 		return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE;
1824 	}
1825 
1826 	switch (msg->icmph.icmp6_type) {
1827 	case NDISC_NEIGHBOUR_SOLICITATION:
1828 		memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
1829 		reason = ndisc_recv_ns(skb);
1830 		break;
1831 
1832 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
1833 		reason = ndisc_recv_na(skb);
1834 		break;
1835 
1836 	case NDISC_ROUTER_SOLICITATION:
1837 		reason = ndisc_recv_rs(skb);
1838 		break;
1839 
1840 	case NDISC_ROUTER_ADVERTISEMENT:
1841 		reason = ndisc_router_discovery(skb);
1842 		break;
1843 
1844 	case NDISC_REDIRECT:
1845 		reason = ndisc_redirect_rcv(skb);
1846 		break;
1847 	}
1848 
1849 	return reason;
1850 }
1851 
ndisc_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1852 static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1853 {
1854 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1855 	struct netdev_notifier_change_info *change_info;
1856 	struct net *net = dev_net(dev);
1857 	struct inet6_dev *idev;
1858 	bool evict_nocarrier;
1859 
1860 	switch (event) {
1861 	case NETDEV_CHANGEADDR:
1862 		neigh_changeaddr(&nd_tbl, dev);
1863 		fib6_run_gc(0, net, false);
1864 		fallthrough;
1865 	case NETDEV_UP:
1866 		idev = in6_dev_get(dev);
1867 		if (!idev)
1868 			break;
1869 		if (READ_ONCE(idev->cnf.ndisc_notify) ||
1870 		    READ_ONCE(net->ipv6.devconf_all->ndisc_notify))
1871 			ndisc_send_unsol_na(dev);
1872 		in6_dev_put(idev);
1873 		break;
1874 	case NETDEV_CHANGE:
1875 		idev = in6_dev_get(dev);
1876 		if (!idev)
1877 			evict_nocarrier = true;
1878 		else {
1879 			evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) &&
1880 					  READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier);
1881 			in6_dev_put(idev);
1882 		}
1883 
1884 		change_info = ptr;
1885 		if (change_info->flags_changed & IFF_NOARP)
1886 			neigh_changeaddr(&nd_tbl, dev);
1887 		if (evict_nocarrier && !netif_carrier_ok(dev))
1888 			neigh_carrier_down(&nd_tbl, dev);
1889 		break;
1890 	case NETDEV_DOWN:
1891 		neigh_ifdown(&nd_tbl, dev);
1892 		fib6_run_gc(0, net, false);
1893 		break;
1894 	case NETDEV_NOTIFY_PEERS:
1895 		ndisc_send_unsol_na(dev);
1896 		break;
1897 	default:
1898 		break;
1899 	}
1900 
1901 	return NOTIFY_DONE;
1902 }
1903 
1904 static struct notifier_block ndisc_netdev_notifier = {
1905 	.notifier_call = ndisc_netdev_event,
1906 	.priority = ADDRCONF_NOTIFY_PRIORITY - 5,
1907 };
1908 
1909 #ifdef CONFIG_SYSCTL
ndisc_warn_deprecated_sysctl(const struct ctl_table * ctl,const char * func,const char * dev_name)1910 static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl,
1911 					 const char *func, const char *dev_name)
1912 {
1913 	static char warncomm[TASK_COMM_LEN];
1914 	static int warned;
1915 	if (strcmp(warncomm, current->comm) && warned < 5) {
1916 		strscpy(warncomm, current->comm);
1917 		pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n",
1918 			warncomm, func,
1919 			dev_name, ctl->procname,
1920 			dev_name, ctl->procname);
1921 		warned++;
1922 	}
1923 }
1924 
ndisc_ifinfo_sysctl_change(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)1925 int ndisc_ifinfo_sysctl_change(const struct ctl_table *ctl, int write, void *buffer,
1926 		size_t *lenp, loff_t *ppos)
1927 {
1928 	struct net_device *dev = ctl->extra1;
1929 	struct inet6_dev *idev;
1930 	int ret;
1931 
1932 	if ((strcmp(ctl->procname, "retrans_time") == 0) ||
1933 	    (strcmp(ctl->procname, "base_reachable_time") == 0))
1934 		ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1935 
1936 	if (strcmp(ctl->procname, "retrans_time") == 0)
1937 		ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos);
1938 
1939 	else if (strcmp(ctl->procname, "base_reachable_time") == 0)
1940 		ret = neigh_proc_dointvec_jiffies(ctl, write,
1941 						  buffer, lenp, ppos);
1942 
1943 	else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1944 		 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1945 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write,
1946 						     buffer, lenp, ppos);
1947 	else
1948 		ret = -1;
1949 
1950 	if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
1951 		if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
1952 			idev->nd_parms->reachable_time =
1953 					neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
1954 		WRITE_ONCE(idev->tstamp, jiffies);
1955 		inet6_ifinfo_notify(RTM_NEWLINK, idev);
1956 		in6_dev_put(idev);
1957 	}
1958 	return ret;
1959 }
1960 
1961 
1962 #endif
1963 
ndisc_net_init(struct net * net)1964 static int __net_init ndisc_net_init(struct net *net)
1965 {
1966 	struct ipv6_pinfo *np;
1967 	struct sock *sk;
1968 	int err;
1969 
1970 	err = inet_ctl_sock_create(&sk, PF_INET6,
1971 				   SOCK_RAW, IPPROTO_ICMPV6, net);
1972 	if (err < 0) {
1973 		net_err_ratelimited("NDISC: Failed to initialize the control socket (err %d)\n",
1974 				    err);
1975 		return err;
1976 	}
1977 
1978 	net->ipv6.ndisc_sk = sk;
1979 
1980 	np = inet6_sk(sk);
1981 	np->hop_limit = 255;
1982 	/* Do not loopback ndisc messages */
1983 	inet6_clear_bit(MC6_LOOP, sk);
1984 
1985 	return 0;
1986 }
1987 
ndisc_net_exit(struct net * net)1988 static void __net_exit ndisc_net_exit(struct net *net)
1989 {
1990 	inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
1991 }
1992 
1993 static struct pernet_operations ndisc_net_ops = {
1994 	.init = ndisc_net_init,
1995 	.exit = ndisc_net_exit,
1996 };
1997 
ndisc_init(void)1998 int __init ndisc_init(void)
1999 {
2000 	int err;
2001 
2002 	err = register_pernet_subsys(&ndisc_net_ops);
2003 	if (err)
2004 		return err;
2005 	/*
2006 	 * Initialize the neighbour table
2007 	 */
2008 	neigh_table_init(NEIGH_ND_TABLE, &nd_tbl);
2009 
2010 #ifdef CONFIG_SYSCTL
2011 	err = neigh_sysctl_register(NULL, &nd_tbl.parms,
2012 				    ndisc_ifinfo_sysctl_change);
2013 	if (err)
2014 		goto out_unregister_pernet;
2015 out:
2016 #endif
2017 	return err;
2018 
2019 #ifdef CONFIG_SYSCTL
2020 out_unregister_pernet:
2021 	unregister_pernet_subsys(&ndisc_net_ops);
2022 	goto out;
2023 #endif
2024 }
2025 
ndisc_late_init(void)2026 int __init ndisc_late_init(void)
2027 {
2028 	return register_netdevice_notifier(&ndisc_netdev_notifier);
2029 }
2030 
ndisc_late_cleanup(void)2031 void ndisc_late_cleanup(void)
2032 {
2033 	unregister_netdevice_notifier(&ndisc_netdev_notifier);
2034 }
2035 
ndisc_cleanup(void)2036 void ndisc_cleanup(void)
2037 {
2038 #ifdef CONFIG_SYSCTL
2039 	neigh_sysctl_unregister(&nd_tbl.parms);
2040 #endif
2041 	neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl);
2042 	unregister_pernet_subsys(&ndisc_net_ops);
2043 }
2044