xref: /src/sys/netinet6/nd6_nbr.c (revision cf275806b6eddd66a3d82be56b3672dc5afab525)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	$KAME: nd6_nbr.c,v 1.86 2002/01/21 02:33:04 jinmei Exp $
32  */
33 
34 #include "opt_inet.h"
35 #include "opt_inet6.h"
36 #include "opt_ipsec.h"
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/counter.h>
41 #include <sys/eventhandler.h>
42 #include <sys/malloc.h>
43 #include <sys/libkern.h>
44 #include <sys/lock.h>
45 #include <sys/rwlock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/time.h>
50 #include <sys/kernel.h>
51 #include <sys/errno.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/callout.h>
56 #include <sys/refcount.h>
57 
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/if_dl.h>
61 #include <net/if_var.h>
62 #include <net/if_private.h>
63 #include <net/route.h>
64 #include <net/vnet.h>
65 
66 #include <netinet/in.h>
67 #include <netinet/in_var.h>
68 #include <net/if_llatbl.h>
69 #include <netinet6/in6_var.h>
70 #include <netinet6/in6_ifattach.h>
71 #include <netinet/ip6.h>
72 #include <netinet6/ip6_var.h>
73 #include <netinet6/scope6_var.h>
74 #include <netinet6/nd6.h>
75 #include <netinet/icmp6.h>
76 #include <netinet/ip_carp.h>
77 #include <netinet6/send.h>
78 
79 #include <machine/atomic.h>
80 
81 #define SDL(s) ((struct sockaddr_dl *)s)
82 
83 MALLOC_DECLARE(M_IP6NDP);
84 
85 struct dadq;
86 static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *);
87 static void nd6_dad_add(struct dadq *dp);
88 static void nd6_dad_del(struct dadq *dp);
89 static void nd6_dad_rele(struct dadq *);
90 static void nd6_dad_starttimer(struct dadq *, int);
91 static void nd6_dad_stoptimer(struct dadq *);
92 static void nd6_dad_timer(void *);
93 static void nd6_dad_duplicated(struct ifaddr *, struct dadq *);
94 static void nd6_dad_ns_output(struct dadq *);
95 static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *);
96 static void nd6_dad_na_input(struct ifaddr *);
97 static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *,
98     const struct in6_addr *, u_long, int, struct sockaddr *, u_int);
99 static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *,
100     const struct in6_addr *, const struct in6_addr *, uint8_t *, u_int);
101 static void nd6_queue_add(struct ifaddr *, struct in6_addr *,
102     struct in6_addr *, struct sockaddr_dl *, int, uint32_t);
103 
104 static struct ifaddr *nd6_proxy_fill_sdl(struct ifnet *,
105     const struct in6_addr *, struct sockaddr_dl *);
106 
107 VNET_DEFINE_STATIC(int, dad_enhanced) = 1;
108 #define	V_dad_enhanced			VNET(dad_enhanced)
109 
110 SYSCTL_DECL(_net_inet6_ip6);
111 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW,
112     &VNET_NAME(dad_enhanced), 0,
113     "Enable Enhanced DAD, which adds a random nonce to NS messages for DAD.");
114 
115 VNET_DEFINE_STATIC(int, dad_maxtry) = 15;	/* max # of *tries* to
116 						   transmit DAD packet */
117 #define	V_dad_maxtry			VNET(dad_maxtry)
118 
119 VNET_DEFINE_STATIC(int, nd6_onlink_ns_rfc4861) = 0;
120 #define	V_nd6_onlink_ns_rfc4861		VNET(nd6_onlink_ns_rfc4861)
121 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861,
122     nd6_onlink_ns_rfc4861, CTLFLAG_VNET | CTLFLAG_RW,
123     &VNET_NAME(nd6_onlink_ns_rfc4861), 0,
124     "Accept 'on-link' ICMPv6 NS messages in compliance with RFC 4861");
125 
126 struct nd_queue {
127 	TAILQ_ENTRY(nd_queue) ndq_list;
128 	struct ifaddr		*ndq_ifa;
129 	struct in6_addr		ndq_daddr;
130 	struct in6_addr		ndq_taddr;
131 	struct sockaddr_dl	ndq_sdl;
132 	uint32_t		ndq_flags;
133 	struct callout		ndq_callout;
134 };
135 
136 /*
137  * Input a Neighbor Solicitation Message.
138  *
139  * Based on RFC 2461
140  * Based on RFC 2462 (duplicate address detection)
141  */
142 void
nd6_ns_input(struct mbuf * m,int off,int icmp6len)143 nd6_ns_input(struct mbuf *m, int off, int icmp6len)
144 {
145 	struct ifnet *ifp;
146 	struct ip6_hdr *ip6;
147 	struct nd_neighbor_solicit *nd_ns;
148 	struct in6_addr daddr6, myaddr6, saddr6, taddr6;
149 	struct ifaddr *ifa;
150 	struct sockaddr_dl proxydl;
151 	union nd_opts ndopts;
152 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
153 	char *lladdr;
154 	int lladdrlen, rflag, tentative, tlladdr;
155 	uint32_t dflags;
156 
157 	ifa = NULL;
158 
159 	/* RFC 6980: Nodes MUST silently ignore fragments */
160 	if(m->m_flags & M_FRAGMENTED)
161 		goto freeit;
162 
163 	ifp = m->m_pkthdr.rcvif;
164 	ip6 = mtod(m, struct ip6_hdr *);
165 	if (__predict_false(ip6->ip6_hlim != 255)) {
166 		ICMP6STAT_INC(icp6s_invlhlim);
167 		nd6log((LOG_ERR,
168 		    "nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n",
169 		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
170 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
171 		goto bads;
172 	}
173 
174 	if (m->m_len < off + icmp6len) {
175 		m = m_pullup(m, off + icmp6len);
176 		if (m == NULL) {
177 			IP6STAT_INC(ip6s_exthdrtoolong);
178 			return;
179 		}
180 	}
181 	ip6 = mtod(m, struct ip6_hdr *);
182 	nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off);
183 
184 	saddr6 = ip6->ip6_src;
185 	daddr6 = ip6->ip6_dst;
186 	taddr6 = nd_ns->nd_ns_target;
187 	if (in6_setscope(&taddr6, ifp, NULL) != 0)
188 		goto bad;
189 
190 	rflag = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0;
191 	if (ifp->if_inet6->nd_flags & ND6_IFF_ACCEPT_RTADV && V_ip6_norbit_raif)
192 		rflag = 0;
193 
194 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
195 		/* dst has to be a solicited node multicast address. */
196 		if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL &&
197 		    /* don't check ifindex portion */
198 		    daddr6.s6_addr32[1] == 0 &&
199 		    daddr6.s6_addr32[2] == IPV6_ADDR_INT32_ONE &&
200 		    daddr6.s6_addr8[12] == 0xff) {
201 			; /* good */
202 		} else {
203 			nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet "
204 			    "(wrong ip6 dst)\n"));
205 			goto bad;
206 		}
207 	} else if (!V_nd6_onlink_ns_rfc4861) {
208 		struct sockaddr_in6 src_sa6;
209 
210 		/*
211 		 * According to recent IETF discussions, it is not a good idea
212 		 * to accept a NS from an address which would not be deemed
213 		 * to be a neighbor otherwise.  This point is expected to be
214 		 * clarified in future revisions of the specification.
215 		 */
216 		bzero(&src_sa6, sizeof(src_sa6));
217 		src_sa6.sin6_family = AF_INET6;
218 		src_sa6.sin6_len = sizeof(src_sa6);
219 		src_sa6.sin6_addr = saddr6;
220 		if (nd6_is_addr_neighbor(&src_sa6, ifp) == 0) {
221 			nd6log((LOG_INFO, "nd6_ns_input: "
222 				"NS packet from non-neighbor\n"));
223 			goto bad;
224 		}
225 	}
226 
227 	if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
228 		nd6log((LOG_INFO, "nd6_ns_input: bad NS target (multicast)\n"));
229 		goto bad;
230 	}
231 
232 	icmp6len -= sizeof(*nd_ns);
233 	nd6_option_init(nd_ns + 1, icmp6len, &ndopts);
234 	if (nd6_options(&ndopts) < 0) {
235 		nd6log((LOG_INFO,
236 		    "nd6_ns_input: invalid ND option, ignored\n"));
237 		/* nd6_options have incremented stats */
238 		goto freeit;
239 	}
240 
241 	lladdr = NULL;
242 	lladdrlen = 0;
243 	if (ndopts.nd_opts_src_lladdr) {
244 		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
245 		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
246 	}
247 
248 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && lladdr) {
249 		nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet "
250 		    "(link-layer address option)\n"));
251 		goto bad;
252 	}
253 
254 	/*
255 	 * Attaching target link-layer address to the NA?
256 	 * (RFC 2461 7.2.4)
257 	 *
258 	 * NS IP dst is unicast/anycast			MUST NOT add
259 	 * NS IP dst is solicited-node multicast	MUST add
260 	 *
261 	 * In implementation, we add target link-layer address by default.
262 	 * We do not add one in MUST NOT cases.
263 	 */
264 	tlladdr = 0;
265 	if (IN6_IS_ADDR_MULTICAST(&daddr6))
266 		tlladdr |= ND6_NA_OPT_LLA;
267 
268 	/*
269 	 * Target address (taddr6) must be either:
270 	 * (1) Valid unicast/anycast address for my receiving interface,
271 	 * (2) Unicast address for which I'm offering proxy service, or
272 	 * (3) "tentative" address on which DAD is being performed.
273 	 */
274 	/* (1) and (3) check. */
275 	if (ifp->if_carp) {
276 		ifa = (*carp_iamatch6_p)(ifp, &taddr6);
277 		if (ifa != NULL)
278 			tlladdr |= ND6_NA_CARP_MASTER;
279 	} else
280 		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
281 
282 	/* (2) check. */
283 	dflags = 0;
284 	if (ifa == NULL) {
285 		if ((ifa = nd6_proxy_fill_sdl(ifp, &taddr6, &proxydl)) != NULL)
286 			dflags |= ND6_QUEUE_FLAG_PROXY;
287 	}
288 	if (ifa == NULL) {
289 		/*
290 		 * We've got an NS packet, and we don't have that address
291 		 * assigned for us.  We MUST silently ignore it.
292 		 * See RFC2461 7.2.3.
293 		 */
294 		goto freeit;
295 	}
296 	if ((((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) != 0)
297 		dflags |= ND6_QUEUE_FLAG_ANYCAST;
298 	myaddr6 = *IFA_IN6(ifa);
299 	tentative = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE;
300 	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED)
301 		goto freeit;
302 
303 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
304 		nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s "
305 		    "(if %d, NS packet %d)\n",
306 		    ip6_sprintf(ip6bufs, &taddr6),
307 		    ifp->if_addrlen, lladdrlen - 2));
308 		goto bad;
309 	}
310 
311 	if (IN6_ARE_ADDR_EQUAL(&myaddr6, &saddr6)) {
312 		nd6log((LOG_INFO, "nd6_ns_input: duplicate IP6 address %s\n",
313 		    ip6_sprintf(ip6bufs, &saddr6)));
314 		goto freeit;
315 	}
316 
317 	/*
318 	 * We have neighbor solicitation packet, with target address equals to
319 	 * one of my tentative address.
320 	 *
321 	 * src addr	how to process?
322 	 * ---		---
323 	 * multicast	of course, invalid (rejected in ip6_input)
324 	 * unicast	somebody is doing address resolution -> ignore
325 	 * unspec	dup address detection
326 	 *
327 	 * The processing is defined in RFC 2462.
328 	 */
329 	if (tentative) {
330 		/*
331 		 * If source address is unspecified address, it is for
332 		 * duplicate address detection.
333 		 *
334 		 * If not, the packet is for addess resolution;
335 		 * silently ignore it.
336 		 */
337 		if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
338 			nd6_dad_ns_input(ifa, ndopts.nd_opts_nonce);
339 
340 		goto freeit;
341 	}
342 
343 	/*
344 	 * If the Target Address is either an anycast address or a unicast
345 	 * address for which the node is providing proxy service, or the Target
346 	 * Link-Layer Address option is not included, the Override flag SHOULD
347 	 * be set to zero.  Otherwise, the Override flag SHOULD be set to one.
348 	 */
349 	if (dflags == 0 && (tlladdr & ND6_NA_OPT_LLA) != 0)
350 		rflag |= ND_NA_FLAG_OVERRIDE;
351 	/*
352 	 * If the source address is unspecified address, entries must not
353 	 * be created or updated.
354 	 * It looks that sender is performing DAD. nd6_na_output() will
355 	 * send NA toward all-node multicast address, to tell the sender
356 	 * that I'm using the address.
357 	 * S bit ("solicited") must be zero.
358 	 */
359 	if (!IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
360 		nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen,
361 		    ND_NEIGHBOR_SOLICIT, 0);
362 		rflag |= ND_NA_FLAG_SOLICITED;
363 	}
364 
365 	/*
366 	 * RFC 4861, anycast or proxy NA sent in response to a NS SHOULD
367 	 * be delayed by a random time between 0 and MAX_ANYCAST_DELAY_TIME
368 	 * to reduce the probability of network congestion.
369 	 */
370 	if (dflags == 0)
371 		nd6_na_output_fib(ifp, &saddr6, &taddr6, rflag, tlladdr, NULL, M_GETFIB(m));
372 	else
373 		nd6_queue_add(ifa, &saddr6, &taddr6, &proxydl, arc4random() %
374 		    (MAX_ANYCAST_DELAY_TIME * hz), dflags);
375  freeit:
376 	if (ifa != NULL)
377 		ifa_free(ifa);
378 	m_freem(m);
379 	return;
380 
381  bad:
382 	nd6log((LOG_ERR, "nd6_ns_input: src=%s\n",
383 		ip6_sprintf(ip6bufs, &saddr6)));
384 	nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n",
385 		ip6_sprintf(ip6bufs, &daddr6)));
386 	nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n",
387 		ip6_sprintf(ip6bufs, &taddr6)));
388  bads:
389 	ICMP6STAT_INC(icp6s_badns);
390 	if (ifa != NULL)
391 		ifa_free(ifa);
392 	m_freem(m);
393 }
394 
395 static struct ifaddr *
nd6_proxy_fill_sdl(struct ifnet * ifp,const struct in6_addr * taddr6,struct sockaddr_dl * sdl)396 nd6_proxy_fill_sdl(struct ifnet *ifp, const struct in6_addr *taddr6,
397     struct sockaddr_dl *sdl)
398 {
399 	struct ifaddr *ifa;
400 	struct llentry *ln;
401 
402 	ifa = NULL;
403 	ln = nd6_lookup(taddr6, LLE_SF(AF_INET6, 0), ifp);
404 	if (ln == NULL)
405 		return (ifa);
406 	if ((ln->la_flags & (LLE_PUB | LLE_VALID)) == (LLE_PUB | LLE_VALID)) {
407 		link_init_sdl(ifp, (struct sockaddr *)sdl, ifp->if_type);
408 		sdl->sdl_alen = ifp->if_addrlen;
409 		bcopy(ln->ll_addr, &sdl->sdl_data, ifp->if_addrlen);
410 		LLE_RUNLOCK(ln);
411 		ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
412 		    IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
413 	} else
414 		LLE_RUNLOCK(ln);
415 
416 	return (ifa);
417 }
418 
419 /*
420  * Output a Neighbor Solicitation Message. Caller specifies:
421  *	- ICMP6 header source IP6 address
422  *	- ND6 header target IP6 address
423  *	- ND6 header source datalink address
424  *
425  * Based on RFC 2461
426  * Based on RFC 2462 (duplicate address detection)
427  *
428  *    ln - for source address determination
429  * nonce - If non-NULL, NS is used for duplicate address detection and
430  *         the value (length is ND_OPT_NONCE_LEN) is used as a random nonce.
431  */
432 static void
nd6_ns_output_fib(struct ifnet * ifp,const struct in6_addr * saddr6,const struct in6_addr * daddr6,const struct in6_addr * taddr6,uint8_t * nonce,u_int fibnum)433 nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6,
434     const struct in6_addr *daddr6, const struct in6_addr *taddr6,
435     uint8_t *nonce, u_int fibnum)
436 {
437 	struct mbuf *m;
438 	struct m_tag *mtag;
439 	struct ip6_hdr *ip6;
440 	struct nd_neighbor_solicit *nd_ns;
441 	struct ip6_moptions im6o;
442 	int icmp6len;
443 	int maxlen;
444 
445 	NET_EPOCH_ASSERT();
446 
447 	if (IN6_IS_ADDR_MULTICAST(taddr6))
448 		return;
449 
450 	/* estimate the size of message */
451 	maxlen = sizeof(*ip6) + sizeof(*nd_ns);
452 	maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
453 	KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
454 	    "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
455 	    __func__, max_linkhdr, maxlen, MCLBYTES));
456 
457 	if (max_linkhdr + maxlen > MHLEN)
458 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
459 	else
460 		m = m_gethdr(M_NOWAIT, MT_DATA);
461 	if (m == NULL)
462 		return;
463 	M_SETFIB(m, fibnum);
464 
465 	icmp6len = sizeof(*nd_ns);
466 	m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len;
467 	m->m_data += max_linkhdr;	/* or M_ALIGN() equivalent? */
468 
469 	/* fill neighbor solicitation packet */
470 	ip6 = mtod(m, struct ip6_hdr *);
471 	ip6->ip6_flow = 0;
472 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
473 	ip6->ip6_vfc |= IPV6_VERSION;
474 	/* ip6->ip6_plen will be set later */
475 	ip6->ip6_nxt = IPPROTO_ICMPV6;
476 	ip6->ip6_hlim = 255;
477 	if (daddr6)
478 		ip6->ip6_dst = *daddr6;
479 	else {
480 		ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
481 		ip6->ip6_dst.s6_addr16[1] = 0;
482 		ip6->ip6_dst.s6_addr32[1] = 0;
483 		ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_ONE;
484 		ip6->ip6_dst.s6_addr32[3] = taddr6->s6_addr32[3];
485 		ip6->ip6_dst.s6_addr8[12] = 0xff;
486 		if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
487 			goto bad;
488 	}
489 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
490 		m->m_flags |= M_MCAST;
491 		im6o.im6o_multicast_ifp = ifp;
492 		im6o.im6o_multicast_hlim = 255;
493 		im6o.im6o_multicast_loop = 0;
494 	}
495 	if (nonce == NULL) {
496 		char ip6buf[INET6_ADDRSTRLEN];
497 		struct ifaddr *ifa = NULL;
498 
499 		/*
500 		 * RFC2461 7.2.2:
501 		 * "If the source address of the packet prompting the
502 		 * solicitation is the same as one of the addresses assigned
503 		 * to the outgoing interface, that address SHOULD be placed
504 		 * in the IP Source Address of the outgoing solicitation.
505 		 * Otherwise, any one of the addresses assigned to the
506 		 * interface should be used."
507 		 *
508 		 * We use the source address for the prompting packet
509 		 * (saddr6), if saddr6 belongs to the outgoing interface.
510 		 * Otherwise, we perform the source address selection as usual.
511 		 */
512 		if (saddr6 != NULL)
513 			ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, saddr6);
514 		if (ifa == NULL) {
515 			int error;
516 
517 			error = in6_selectsrc_nbr(fibnum, &ip6->ip6_dst, &im6o,
518 			    ifp, &ip6->ip6_src);
519 			if (error) {
520 				nd6log((LOG_DEBUG, "%s: source can't be "
521 				    "determined: dst=%s, error=%d\n", __func__,
522 				    ip6_sprintf(ip6buf, &ip6->ip6_dst),
523 				    error));
524 				goto bad;
525 			}
526 		} else
527 			ip6->ip6_src = *saddr6;
528 
529 		if (ifp->if_carp != NULL) {
530 			/*
531 			 * Check that selected source address belongs to
532 			 * CARP addresses.
533 			 */
534 			if (ifa == NULL)
535 				ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
536 				    &ip6->ip6_src);
537 			/*
538 			 * Do not send NS for CARP address if we are not
539 			 * the CARP master.
540 			 */
541 			if (ifa != NULL && ifa->ifa_carp != NULL &&
542 			    !(*carp_master_p)(ifa)) {
543 				nd6log((LOG_DEBUG,
544 				    "nd6_ns_output: NS from BACKUP CARP address %s\n",
545 				    ip6_sprintf(ip6buf, &ip6->ip6_src)));
546 				ifa_free(ifa);
547 				goto bad;
548 			}
549 		}
550 		if (ifa != NULL)
551 			ifa_free(ifa);
552 	} else {
553 		/*
554 		 * Source address for DAD packet must always be IPv6
555 		 * unspecified address. (0::0)
556 		 * We actually don't have to 0-clear the address (we did it
557 		 * above), but we do so here explicitly to make the intention
558 		 * clearer.
559 		 */
560 		bzero(&ip6->ip6_src, sizeof(ip6->ip6_src));
561 	}
562 	nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1);
563 	nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
564 	nd_ns->nd_ns_code = 0;
565 	nd_ns->nd_ns_reserved = 0;
566 	nd_ns->nd_ns_target = *taddr6;
567 	in6_clearscope(&nd_ns->nd_ns_target); /* XXX */
568 
569 	/*
570 	 * Add source link-layer address option.
571 	 *
572 	 *				spec		implementation
573 	 *				---		---
574 	 * DAD packet			MUST NOT	do not add the option
575 	 * there's no link layer address:
576 	 *				impossible	do not add the option
577 	 * there's link layer address:
578 	 *	Multicast NS		MUST add one	add the option
579 	 *	Unicast NS		SHOULD add one	add the option
580 	 */
581 	if (nonce == NULL) {
582 		struct nd_opt_hdr *nd_opt;
583 		char *mac;
584 		int optlen;
585 
586 		mac = NULL;
587 		if (ifp->if_carp)
588 			mac = (*carp_macmatch6_p)(ifp, m, &ip6->ip6_src);
589 		if (mac == NULL)
590 			mac = nd6_ifptomac(ifp);
591 
592 		if (mac != NULL) {
593 			nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
594 			optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
595 			/* 8 byte alignments... */
596 			optlen = (optlen + 7) & ~7;
597 			m->m_pkthdr.len += optlen;
598 			m->m_len += optlen;
599 			icmp6len += optlen;
600 			bzero(nd_opt, optlen);
601 			nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
602 			nd_opt->nd_opt_len = optlen >> 3;
603 			bcopy(mac, nd_opt + 1, ifp->if_addrlen);
604 		}
605 	}
606 	/*
607 	 * Add a Nonce option (RFC 3971) to detect looped back NS messages.
608 	 * This behavior is documented as Enhanced Duplicate Address
609 	 * Detection in RFC 7527.
610 	 * net.inet6.ip6.dad_enhanced=0 disables this.
611 	 */
612 	if (V_dad_enhanced != 0 && nonce != NULL) {
613 		int optlen = sizeof(struct nd_opt_hdr) + ND_OPT_NONCE_LEN;
614 		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
615 		/* 8-byte alignment is required. */
616 		optlen = (optlen + 7) & ~7;
617 
618 		m->m_pkthdr.len += optlen;
619 		m->m_len += optlen;
620 		icmp6len += optlen;
621 		bzero((caddr_t)nd_opt, optlen);
622 		nd_opt->nd_opt_type = ND_OPT_NONCE;
623 		nd_opt->nd_opt_len = optlen >> 3;
624 		bcopy(nonce, (caddr_t)(nd_opt + 1), ND_OPT_NONCE_LEN);
625 	}
626 	ip6->ip6_plen = htons((u_short)icmp6len);
627 	nd_ns->nd_ns_cksum = 0;
628 	nd_ns->nd_ns_cksum =
629 	    in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), icmp6len);
630 
631 	if (send_sendso_input_hook != NULL) {
632 		mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
633 			sizeof(unsigned short), M_NOWAIT);
634 		if (mtag == NULL)
635 			goto bad;
636 		*(unsigned short *)(mtag + 1) = nd_ns->nd_ns_type;
637 		m_tag_prepend(m, mtag);
638 	}
639 
640 	ip6_output(m, NULL, NULL, (nonce != NULL) ? IPV6_UNSPECSRC : 0,
641 	    &im6o, NULL, NULL);
642 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
643 	icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);
644 	ICMP6STAT_INC2(icp6s_outhist, ND_NEIGHBOR_SOLICIT);
645 
646 	return;
647 
648   bad:
649 	m_freem(m);
650 }
651 
652 #ifndef BURN_BRIDGES
653 void
nd6_ns_output(struct ifnet * ifp,const struct in6_addr * saddr6,const struct in6_addr * daddr6,const struct in6_addr * taddr6,uint8_t * nonce)654 nd6_ns_output(struct ifnet *ifp, const struct in6_addr *saddr6,
655     const struct in6_addr *daddr6, const struct in6_addr *taddr6,uint8_t *nonce)
656 {
657 
658 	nd6_ns_output_fib(ifp, saddr6, daddr6, taddr6, nonce, RT_DEFAULT_FIB);
659 }
660 #endif
661 /*
662  * Neighbor advertisement input handling.
663  *
664  * Based on RFC 2461
665  * Based on RFC 2462 (duplicate address detection)
666  */
667 void
nd6_na_input(struct mbuf * m,int off,int icmp6len)668 nd6_na_input(struct mbuf *m, int off, int icmp6len)
669 {
670 	struct ifnet *ifp;
671 	struct ip6_hdr *ip6;
672 	struct ifaddr *ifa;
673 	struct llentry *ln;
674 	struct mbuf *chain;
675 	struct nd_neighbor_advert *nd_na;
676 	struct in6_addr daddr6, taddr6;
677 	union nd_opts ndopts;
678 	u_char linkhdr[LLE_MAX_LINKHDR];
679 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
680 	char *lladdr;
681 	size_t linkhdrsize;
682 	int flags, is_override, is_router, is_solicited;
683 	int lladdr_off, lladdrlen, checklink;
684 	bool flush_holdchain = false;
685 
686 	NET_EPOCH_ASSERT();
687 
688 	chain = NULL;
689 	ln = NULL;
690 	checklink = 0;
691 
692 	/* RFC 6980: Nodes MUST silently ignore fragments */
693 	if(m->m_flags & M_FRAGMENTED)
694 		goto freeit;
695 
696 	ifp = m->m_pkthdr.rcvif;
697 	ip6 = mtod(m, struct ip6_hdr *);
698 	if (__predict_false(ip6->ip6_hlim != 255)) {
699 		ICMP6STAT_INC(icp6s_invlhlim);
700 		nd6log((LOG_ERR,
701 		    "nd6_na_input: invalid hlim (%d) from %s to %s on %s\n",
702 		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
703 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
704 		goto bad;
705 	}
706 
707 	if (m->m_len < off + icmp6len) {
708 		m = m_pullup(m, off + icmp6len);
709 		if (m == NULL) {
710 			IP6STAT_INC(ip6s_exthdrtoolong);
711 			return;
712 		}
713 	}
714 	ip6 = mtod(m, struct ip6_hdr *);
715 	nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off);
716 
717 	flags = nd_na->nd_na_flags_reserved;
718 	is_router = ((flags & ND_NA_FLAG_ROUTER) != 0);
719 	is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0);
720 	is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0);
721 
722 	taddr6 = nd_na->nd_na_target;
723 	if (in6_setscope(&taddr6, ifp, NULL))
724 		goto bad;	/* XXX: impossible */
725 
726 	if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
727 		nd6log((LOG_ERR,
728 		    "nd6_na_input: invalid target address %s\n",
729 		    ip6_sprintf(ip6bufs, &taddr6)));
730 		goto bad;
731 	}
732 
733 	daddr6 = ip6->ip6_dst;
734 	if (IN6_IS_ADDR_MULTICAST(&daddr6))
735 		if (is_solicited) {
736 			nd6log((LOG_ERR,
737 			    "nd6_na_input: a solicited adv is multicasted\n"));
738 			goto bad;
739 		}
740 
741 	icmp6len -= sizeof(*nd_na);
742 	nd6_option_init(nd_na + 1, icmp6len, &ndopts);
743 	if (nd6_options(&ndopts) < 0) {
744 		nd6log((LOG_INFO,
745 		    "nd6_na_input: invalid ND option, ignored\n"));
746 		/* nd6_options have incremented stats */
747 		goto freeit;
748 	}
749 
750 	lladdr = NULL;
751 	lladdrlen = 0;
752 	if (ndopts.nd_opts_tgt_lladdr) {
753 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
754 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
755 	}
756 
757 	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
758 	if (ifa != NULL && ifa->ifa_carp != NULL) {
759 		/*
760 		 * Silently ignore NAs for CARP addresses if we are not
761 		 * the CARP master.
762 		 */
763 		if (!(*carp_master_p)(ifa)) {
764 			nd6log((LOG_DEBUG,
765 			    "nd6_na_input: NA for BACKUP CARP address %s\n",
766 			    ip6_sprintf(ip6bufs, &taddr6)));
767 			ifa_free(ifa);
768 			goto freeit;
769 		}
770 	}
771 	/*
772 	 * Target address matches one of my interface address.
773 	 *
774 	 * If my address is tentative, this means that there's somebody
775 	 * already using the same address as mine.  This indicates DAD failure.
776 	 * This is defined in RFC 2462.
777 	 *
778 	 * Otherwise, process as defined in RFC 2461.
779 	 */
780 	if (ifa
781 	 && (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) {
782 		nd6_dad_na_input(ifa);
783 		ifa_free(ifa);
784 		goto freeit;
785 	}
786 
787 	/* Just for safety, maybe unnecessary. */
788 	if (ifa) {
789 		ifa_free(ifa);
790 		log(LOG_ERR,
791 		    "nd6_na_input: duplicate IP6 address %s\n",
792 		    ip6_sprintf(ip6bufs, &taddr6));
793 		goto freeit;
794 	}
795 
796 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
797 		nd6log((LOG_INFO, "nd6_na_input: lladdrlen mismatch for %s "
798 		    "(if %d, NA packet %d)\n", ip6_sprintf(ip6bufs, &taddr6),
799 		    ifp->if_addrlen, lladdrlen - 2));
800 		goto bad;
801 	}
802 
803 	/*
804 	 * If no neighbor cache entry is found, NA SHOULD silently be
805 	 * discarded.
806 	 */
807 	ln = nd6_lookup(&taddr6, LLE_SF(AF_INET6, LLE_EXCLUSIVE), ifp);
808 	if (ln == NULL) {
809 		goto freeit;
810 	}
811 
812 	/*
813 	 * Do not try to override static entry.
814 	 */
815 	if (ln->la_flags & LLE_STATIC)
816 		goto freeit;
817 
818 	if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
819 		/*
820 		 * If the link-layer has address, and no lladdr option came,
821 		 * discard the packet.
822 		 */
823 		if (ifp->if_addrlen && lladdr == NULL) {
824 			goto freeit;
825 		}
826 
827 		/*
828 		 * Record link-layer address, and update the state.
829 		 */
830 		if (!nd6_try_set_entry_addr(ifp, ln, lladdr))
831 			goto freeit;
832 
833 		flush_holdchain = true;
834 		if (is_solicited)
835 			nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
836 		else
837 			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
838 		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
839 		if ((ln->ln_router = is_router) != 0) {
840 			/*
841 			 * This means a router's state has changed from
842 			 * non-reachable to probably reachable, and might
843 			 * affect the status of associated prefixes..
844 			 */
845 			checklink = 1;
846 		}
847 	} else {
848 		int llchange;
849 
850 		/*
851 		 * Check if the link-layer address has changed or not.
852 		 */
853 		if (lladdr == NULL)
854 			llchange = 0;
855 		else {
856 			if (ln->la_flags & LLE_VALID) {
857 				if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen))
858 					llchange = 1;
859 				else
860 					llchange = 0;
861 			} else
862 				llchange = 1;
863 		}
864 
865 		/*
866 		 * This is VERY complex.  Look at it with care.
867 		 *
868 		 * override solicit lladdr llchange	action
869 		 *					(L: record lladdr)
870 		 *
871 		 *	0	0	n	--	(2c)
872 		 *	0	0	y	n	(2b) L
873 		 *	0	0	y	y	(1)    REACHABLE->STALE
874 		 *	0	1	n	--	(2c)   *->REACHABLE
875 		 *	0	1	y	n	(2b) L *->REACHABLE
876 		 *	0	1	y	y	(1)    REACHABLE->STALE
877 		 *	1	0	n	--	(2a)
878 		 *	1	0	y	n	(2a) L
879 		 *	1	0	y	y	(2a) L *->STALE
880 		 *	1	1	n	--	(2a)   *->REACHABLE
881 		 *	1	1	y	n	(2a) L *->REACHABLE
882 		 *	1	1	y	y	(2a) L *->REACHABLE
883 		 */
884 		if (!is_override && (lladdr != NULL && llchange)) {  /* (1) */
885 			/*
886 			 * If state is REACHABLE, make it STALE.
887 			 * no other updates should be done.
888 			 */
889 			if (ln->ln_state == ND6_LLINFO_REACHABLE)
890 				nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
891 			goto freeit;
892 		} else if (is_override				   /* (2a) */
893 			|| (!is_override && (lladdr != NULL && !llchange)) /* (2b) */
894 			|| lladdr == NULL) {			   /* (2c) */
895 			/*
896 			 * Update link-local address, if any.
897 			 */
898 			if (lladdr != NULL) {
899 				linkhdrsize = sizeof(linkhdr);
900 				if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
901 				    linkhdr, &linkhdrsize, &lladdr_off) != 0)
902 					goto freeit;
903 				if (lltable_try_set_entry_addr(ifp, ln, linkhdr,
904 				    linkhdrsize, lladdr_off) == 0)
905 					goto freeit;
906 				EVENTHANDLER_INVOKE(lle_event, ln,
907 				    LLENTRY_RESOLVED);
908 			}
909 
910 			/*
911 			 * If solicited, make the state REACHABLE.
912 			 * If not solicited and the link-layer address was
913 			 * changed, make it STALE.
914 			 */
915 			if (is_solicited)
916 				nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
917 			else {
918 				if (lladdr != NULL && llchange)
919 					nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
920 			}
921 		}
922 
923 		if (ln->ln_router && !is_router) {
924 			/*
925 			 * The peer dropped the router flag.
926 			 * Remove the sender from the Default Router List and
927 			 * update the Destination Cache entries.
928 			 */
929 			struct ifnet *nd6_ifp;
930 
931 			nd6_ifp = lltable_get_ifp(ln->lle_tbl);
932 			if (!defrouter_remove(&ln->r_l3addr.addr6, nd6_ifp) &&
933 			    (nd6_ifp->if_inet6->nd_flags &
934 			     ND6_IFF_ACCEPT_RTADV) != 0)
935 				/*
936 				 * Even if the neighbor is not in the default
937 				 * router list, the neighbor may be used as a
938 				 * next hop for some destinations (e.g. redirect
939 				 * case). So we must call rt6_flush explicitly.
940 				 */
941 				rt6_flush(&ip6->ip6_src, ifp);
942 		}
943 		ln->ln_router = is_router;
944 	}
945         /* XXX - QL
946 	 *  Does this matter?
947 	 *  rt->rt_flags &= ~RTF_REJECT;
948 	 */
949 	ln->la_asked = 0;
950 	if (ln->la_hold != NULL)
951 		chain = nd6_grab_holdchain(ln);
952  freeit:
953 	if (ln != NULL)
954 		LLE_WUNLOCK(ln);
955 
956 	if (chain != NULL)
957 		nd6_flush_holdchain(ifp, ln, chain);
958 	if (flush_holdchain)
959 		nd6_flush_children_holdchain(ifp, ln);
960 
961 	if (checklink)
962 		pfxlist_onlink_check();
963 
964 	m_freem(m);
965 	return;
966 
967  bad:
968 	if (ln != NULL)
969 		LLE_WUNLOCK(ln);
970 
971 	ICMP6STAT_INC(icp6s_badna);
972 	m_freem(m);
973 }
974 
975 /*
976  * Neighbor advertisement output handling.
977  *
978  * Based on RFC 2461
979  *
980  * tlladdr:
981  * - 0x01 if include target link-layer address
982  * - 0x02 if target address is CARP MASTER
983  * sdl0 - sockaddr_dl (= proxy NA) or NULL
984  */
985 static void
nd6_na_output_fib(struct ifnet * ifp,const struct in6_addr * daddr6_0,const struct in6_addr * taddr6,u_long flags,int tlladdr,struct sockaddr * sdl0,u_int fibnum)986 nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
987     const struct in6_addr *taddr6, u_long flags, int tlladdr,
988     struct sockaddr *sdl0, u_int fibnum)
989 {
990 	struct mbuf *m;
991 	struct m_tag *mtag;
992 	struct ip6_hdr *ip6;
993 	struct nd_neighbor_advert *nd_na;
994 	struct ip6_moptions im6o;
995 	struct in6_addr daddr6;
996 
997 	NET_EPOCH_ASSERT();
998 
999 	int icmp6len, maxlen, error;
1000 	caddr_t mac = NULL;
1001 
1002 	daddr6 = *daddr6_0;	/* make a local copy for modification */
1003 
1004 	/* estimate the size of message */
1005 	maxlen = sizeof(*ip6) + sizeof(*nd_na);
1006 	maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
1007 	KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
1008 	    "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
1009 	    __func__, max_linkhdr, maxlen, MCLBYTES));
1010 
1011 	if (max_linkhdr + maxlen > MHLEN)
1012 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1013 	else
1014 		m = m_gethdr(M_NOWAIT, MT_DATA);
1015 	if (m == NULL)
1016 		return;
1017 	M_SETFIB(m, fibnum);
1018 
1019 	icmp6len = sizeof(*nd_na);
1020 	m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len;
1021 	m->m_data += max_linkhdr;	/* or M_ALIGN() equivalent? */
1022 
1023 	/* fill neighbor advertisement packet */
1024 	ip6 = mtod(m, struct ip6_hdr *);
1025 	ip6->ip6_flow = 0;
1026 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1027 	ip6->ip6_vfc |= IPV6_VERSION;
1028 	ip6->ip6_nxt = IPPROTO_ICMPV6;
1029 	ip6->ip6_hlim = 255;
1030 
1031 	if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) {
1032 		/* reply to DAD */
1033 		daddr6 = in6addr_linklocal_allnodes;
1034 		if (in6_setscope(&daddr6, ifp, NULL))
1035 			goto bad;
1036 
1037 		flags &= ~ND_NA_FLAG_SOLICITED;
1038 	}
1039 	if (IN6_IS_ADDR_MULTICAST(&daddr6)) {
1040 		m->m_flags |= M_MCAST;
1041 		im6o.im6o_multicast_ifp = ifp;
1042 		im6o.im6o_multicast_hlim = 255;
1043 		im6o.im6o_multicast_loop = 0;
1044 	}
1045 
1046 	ip6->ip6_dst = daddr6;
1047 	error = in6_selectsrc_nbr(fibnum, &daddr6, &im6o, ifp, &ip6->ip6_src);
1048 	if (error) {
1049 		char ip6buf[INET6_ADDRSTRLEN];
1050 		nd6log((LOG_DEBUG, "nd6_na_output: source can't be "
1051 		    "determined: dst=%s, error=%d\n",
1052 		    ip6_sprintf(ip6buf, &daddr6), error));
1053 		goto bad;
1054 	}
1055 	nd_na = (struct nd_neighbor_advert *)(ip6 + 1);
1056 	nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
1057 	nd_na->nd_na_code = 0;
1058 	nd_na->nd_na_target = *taddr6;
1059 	in6_clearscope(&nd_na->nd_na_target); /* XXX */
1060 
1061 	/*
1062 	 * If we respond from CARP address, we need to prepare mac address
1063 	 * for carp_output().
1064 	 */
1065 	if (ifp->if_carp && (tlladdr & ND6_NA_CARP_MASTER))
1066 		mac = (*carp_macmatch6_p)(ifp, m, taddr6);
1067 	/*
1068 	 * "tlladdr" indicates NS's condition for adding tlladdr or not.
1069 	 * see nd6_ns_input() for details.
1070 	 * Basically, if NS packet is sent to unicast/anycast addr,
1071 	 * target lladdr option SHOULD NOT be included.
1072 	 */
1073 	if (tlladdr & ND6_NA_OPT_LLA) {
1074 		/*
1075 		 * sdl0 != NULL indicates proxy NA.  If we do proxy, use
1076 		 * lladdr in sdl0.  If we are not proxying (sending NA for
1077 		 * my address) use lladdr configured for the interface.
1078 		 */
1079 		if (sdl0 == NULL) {
1080 			if (mac == NULL)
1081 				mac = nd6_ifptomac(ifp);
1082 		} else if (sdl0->sa_family == AF_LINK) {
1083 			struct sockaddr_dl *sdl;
1084 			sdl = (struct sockaddr_dl *)sdl0;
1085 			if (sdl->sdl_alen == ifp->if_addrlen)
1086 				mac = LLADDR(sdl);
1087 		}
1088 	}
1089 	if ((tlladdr & ND6_NA_OPT_LLA) && mac != NULL) {
1090 		int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
1091 		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_na + 1);
1092 
1093 		/* roundup to 8 bytes alignment! */
1094 		optlen = (optlen + 7) & ~7;
1095 
1096 		m->m_pkthdr.len += optlen;
1097 		m->m_len += optlen;
1098 		icmp6len += optlen;
1099 		bzero((caddr_t)nd_opt, optlen);
1100 		nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1101 		nd_opt->nd_opt_len = optlen >> 3;
1102 		bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
1103 	} else
1104 		flags &= ~ND_NA_FLAG_OVERRIDE;
1105 
1106 	ip6->ip6_plen = htons((u_short)icmp6len);
1107 	nd_na->nd_na_flags_reserved = flags;
1108 	nd_na->nd_na_cksum = 0;
1109 	nd_na->nd_na_cksum =
1110 	    in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), icmp6len);
1111 
1112 	if (send_sendso_input_hook != NULL) {
1113 		mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
1114 		    sizeof(unsigned short), M_NOWAIT);
1115 		if (mtag == NULL)
1116 			goto bad;
1117 		*(unsigned short *)(mtag + 1) = nd_na->nd_na_type;
1118 		m_tag_prepend(m, mtag);
1119 	}
1120 
1121 	ip6_output(m, NULL, NULL, 0, &im6o, NULL, NULL);
1122 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
1123 	icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert);
1124 	ICMP6STAT_INC2(icp6s_outhist, ND_NEIGHBOR_ADVERT);
1125 
1126 	return;
1127 
1128   bad:
1129 	m_freem(m);
1130 }
1131 
1132 #ifndef BURN_BRIDGES
1133 void
nd6_na_output(struct ifnet * ifp,const struct in6_addr * daddr6_0,const struct in6_addr * taddr6,u_long flags,int tlladdr,struct sockaddr * sdl0)1134 nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0,
1135     const struct in6_addr *taddr6, u_long flags, int tlladdr,
1136     struct sockaddr *sdl0)
1137 {
1138 
1139 	nd6_na_output_fib(ifp, daddr6_0, taddr6, flags, tlladdr, sdl0,
1140 	    RT_DEFAULT_FIB);
1141 }
1142 #endif
1143 
1144 caddr_t
nd6_ifptomac(struct ifnet * ifp)1145 nd6_ifptomac(struct ifnet *ifp)
1146 {
1147 	switch (ifp->if_type) {
1148 	case IFT_ETHER:
1149 	case IFT_IEEE1394:
1150 	case IFT_L2VLAN:
1151 	case IFT_INFINIBAND:
1152 	case IFT_BRIDGE:
1153 		return IF_LLADDR(ifp);
1154 	default:
1155 		return NULL;
1156 	}
1157 }
1158 
1159 struct dadq {
1160 	TAILQ_ENTRY(dadq) dad_list;
1161 	struct ifaddr *dad_ifa;
1162 	int dad_count;		/* max NS to send */
1163 	int dad_ns_tcount;	/* # of trials to send NS */
1164 	int dad_ns_ocount;	/* NS sent so far */
1165 	int dad_ns_icount;
1166 	int dad_na_icount;
1167 	int dad_ns_lcount;	/* looped back NS */
1168 	int dad_loopbackprobe;	/* probing state for loopback detection */
1169 	struct callout dad_timer_ch;
1170 	struct vnet *dad_vnet;
1171 	u_int dad_refcnt;
1172 #define	ND_OPT_NONCE_LEN32 \
1173 		((ND_OPT_NONCE_LEN + sizeof(uint32_t) - 1)/sizeof(uint32_t))
1174 	uint32_t dad_nonce[ND_OPT_NONCE_LEN32];
1175 	bool dad_ondadq;	/* on dadq? Protected by DADQ_WLOCK. */
1176 };
1177 
1178 VNET_DEFINE_STATIC(TAILQ_HEAD(, dadq), dadq);
1179 VNET_DEFINE_STATIC(struct rwlock, dad_rwlock);
1180 #define	V_dadq			VNET(dadq)
1181 #define	V_dad_rwlock		VNET(dad_rwlock)
1182 
1183 #define	DADQ_LOCKPTR()		(&V_dad_rwlock)
1184 #define	DADQ_LOCK_INIT()	rw_init(DADQ_LOCKPTR(), "nd6 DAD queue")
1185 #define	DADQ_RLOCK()		rw_rlock(DADQ_LOCKPTR())
1186 #define	DADQ_RUNLOCK()		rw_runlock(DADQ_LOCKPTR())
1187 #define	DADQ_WLOCK()		rw_wlock(DADQ_LOCKPTR())
1188 #define	DADQ_WUNLOCK()		rw_wunlock(DADQ_LOCKPTR())
1189 
1190 #define	DADQ_LOCK_ASSERT()	rw_assert(DADQ_LOCKPTR(), RA_LOCKED);
1191 #define	DADQ_RLOCK_ASSERT()	rw_assert(DADQ_LOCKPTR(), RA_RLOCKED);
1192 #define	DADQ_WLOCK_ASSERT()	rw_assert(DADQ_LOCKPTR(), RA_WLOCKED);
1193 
1194 static void
nd6_dad_add(struct dadq * dp)1195 nd6_dad_add(struct dadq *dp)
1196 {
1197 	DADQ_WLOCK_ASSERT();
1198 
1199 	TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list);
1200 	dp->dad_ondadq = true;
1201 }
1202 
1203 static void
nd6_dad_del(struct dadq * dp)1204 nd6_dad_del(struct dadq *dp)
1205 {
1206 	DADQ_WLOCK_ASSERT();
1207 
1208 	if (dp->dad_ondadq) {
1209 		/*
1210 		 * Remove dp from the dadq and release the dadq's
1211 		 * reference.
1212 		 */
1213 		TAILQ_REMOVE(&V_dadq, dp, dad_list);
1214 		dp->dad_ondadq = false;
1215 		nd6_dad_rele(dp);
1216 	}
1217 }
1218 
1219 static struct dadq *
nd6_dad_find(struct ifaddr * ifa,struct nd_opt_nonce * n)1220 nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n)
1221 {
1222 	struct dadq *dp;
1223 
1224 	DADQ_LOCK_ASSERT();
1225 
1226 	TAILQ_FOREACH(dp, &V_dadq, dad_list) {
1227 		if (dp->dad_ifa != ifa)
1228 			continue;
1229 
1230 		/*
1231 		 * Skip if the nonce matches the received one.
1232 		 * +2 in the length is required because of type and
1233 		 * length fields are included in a header.
1234 		 */
1235 		if (n != NULL &&
1236 		    n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 &&
1237 		    memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0],
1238 		    ND_OPT_NONCE_LEN) == 0) {
1239 			dp->dad_ns_lcount++;
1240 			continue;
1241 		}
1242 		break;
1243 	}
1244 
1245 	return (dp);
1246 }
1247 
1248 static void
nd6_dad_starttimer(struct dadq * dp,int ticks)1249 nd6_dad_starttimer(struct dadq *dp, int ticks)
1250 {
1251 	DADQ_WLOCK_ASSERT();
1252 
1253 	callout_reset(&dp->dad_timer_ch, ticks, nd6_dad_timer, dp);
1254 }
1255 
1256 static void
nd6_dad_stoptimer(struct dadq * dp)1257 nd6_dad_stoptimer(struct dadq *dp)
1258 {
1259 	callout_drain(&dp->dad_timer_ch);
1260 }
1261 
1262 static void
nd6_dad_rele(struct dadq * dp)1263 nd6_dad_rele(struct dadq *dp)
1264 {
1265 	if (refcount_release(&dp->dad_refcnt)) {
1266 		KASSERT(!dp->dad_ondadq, ("dp %p still on DAD queue", dp));
1267 		ifa_free(dp->dad_ifa);
1268 		free(dp, M_IP6NDP);
1269 	}
1270 }
1271 
1272 void
nd6_dad_init(void)1273 nd6_dad_init(void)
1274 {
1275 	DADQ_LOCK_INIT();
1276 	TAILQ_INIT(&V_dadq);
1277 }
1278 
1279 /*
1280  * Start Duplicate Address Detection (DAD) for specified interface address.
1281  */
1282 void
nd6_dad_start(struct ifaddr * ifa,int delay)1283 nd6_dad_start(struct ifaddr *ifa, int delay)
1284 {
1285 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
1286 	struct dadq *dp;
1287 	char ip6buf[INET6_ADDRSTRLEN];
1288 
1289 	KASSERT((ia->ia6_flags & IN6_IFF_TENTATIVE) != 0,
1290 	    ("starting DAD on non-tentative address %p", ifa));
1291 
1292 	/*
1293 	 * If we don't need DAD, don't do it.
1294 	 * There are several cases:
1295 	 * - DAD is disabled globally or on the interface
1296 	 * - the interface address is anycast
1297 	 */
1298 	if ((ia->ia6_flags & IN6_IFF_ANYCAST) != 0 ||
1299 	    V_ip6_dad_count == 0 ||
1300 	    (ifa->ifa_ifp->if_inet6->nd_flags & ND6_IFF_NO_DAD) != 0) {
1301 		ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
1302 		return;
1303 	}
1304 	if ((ifa->ifa_ifp->if_flags & IFF_UP) == 0 ||
1305 	    (ifa->ifa_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1306 	    (ifa->ifa_ifp->if_inet6->nd_flags & ND6_IFF_IFDISABLED) != 0)
1307 		return;
1308 
1309 	DADQ_WLOCK();
1310 	if ((dp = nd6_dad_find(ifa, NULL)) != NULL) {
1311 		/*
1312 		 * DAD is already in progress.  Let the existing entry
1313 		 * finish it.
1314 		 */
1315 		DADQ_WUNLOCK();
1316 		return;
1317 	}
1318 
1319 	dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT | M_ZERO);
1320 	if (dp == NULL) {
1321 		log(LOG_ERR, "nd6_dad_start: memory allocation failed for "
1322 			"%s(%s)\n",
1323 			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
1324 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1325 		return;
1326 	}
1327 	callout_init_rw(&dp->dad_timer_ch, DADQ_LOCKPTR(),
1328 	    CALLOUT_RETURNUNLOCKED);
1329 #ifdef VIMAGE
1330 	dp->dad_vnet = curvnet;
1331 #endif
1332 	nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
1333 	    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
1334 
1335 	/*
1336 	 * Send NS packet for DAD, ip6_dad_count times.
1337 	 * Note that we must delay the first transmission, if this is the
1338 	 * first packet to be sent from the interface after interface
1339 	 * (re)initialization.
1340 	 */
1341 	dp->dad_ifa = ifa;
1342 	ifa_ref(dp->dad_ifa);
1343 	dp->dad_count = V_ip6_dad_count;
1344 	dp->dad_ns_icount = dp->dad_na_icount = 0;
1345 	dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
1346 	dp->dad_ns_lcount = dp->dad_loopbackprobe = 0;
1347 
1348 	/* Add this to the dadq and add a reference for the dadq. */
1349 	refcount_init(&dp->dad_refcnt, 1);
1350 	nd6_dad_add(dp);
1351 	nd6_dad_starttimer(dp, delay);
1352 	DADQ_WUNLOCK();
1353 }
1354 
1355 /*
1356  * terminate DAD unconditionally.  used for address removals.
1357  */
1358 void
nd6_dad_stop(struct ifaddr * ifa)1359 nd6_dad_stop(struct ifaddr *ifa)
1360 {
1361 	struct dadq *dp;
1362 
1363 	DADQ_WLOCK();
1364 	dp = nd6_dad_find(ifa, NULL);
1365 	if (dp == NULL) {
1366 		DADQ_WUNLOCK();
1367 		/* DAD wasn't started yet */
1368 		return;
1369 	}
1370 
1371 	/*
1372 	 * Acquire a temporary reference so that we can safely stop the callout.
1373 	 */
1374 	(void)refcount_acquire(&dp->dad_refcnt);
1375 	nd6_dad_del(dp);
1376 	DADQ_WUNLOCK();
1377 
1378 	nd6_dad_stoptimer(dp);
1379 	nd6_dad_rele(dp);
1380 }
1381 
1382 static void
nd6_dad_timer(void * arg)1383 nd6_dad_timer(void *arg)
1384 {
1385 	struct dadq *dp = arg;
1386 	struct ifaddr *ifa = dp->dad_ifa;
1387 	struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
1388 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
1389 	char ip6buf[INET6_ADDRSTRLEN];
1390 	struct epoch_tracker et;
1391 
1392 	CURVNET_SET(dp->dad_vnet);
1393 	KASSERT(ia != NULL, ("DAD entry %p with no address", dp));
1394 
1395 	NET_EPOCH_ENTER(et);
1396 	if (ifp->if_inet6->nd_flags & ND6_IFF_IFDISABLED) {
1397 		/* Do not need DAD for ifdisabled interface. */
1398 		log(LOG_ERR, "nd6_dad_timer: cancel DAD on %s because of "
1399 		    "ND6_IFF_IFDISABLED.\n", ifp->if_xname);
1400 		goto err;
1401 	}
1402 	if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
1403 		log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
1404 			"%s(%s)\n",
1405 			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
1406 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1407 		goto err;
1408 	}
1409 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
1410 		log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
1411 			"%s(%s)\n",
1412 			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
1413 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1414 		goto err;
1415 	}
1416 
1417 	/* Stop DAD if the interface is down even after dad_maxtry attempts. */
1418 	if ((dp->dad_ns_tcount > V_dad_maxtry) &&
1419 	    (((ifp->if_flags & IFF_UP) == 0) ||
1420 	     ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))) {
1421 		nd6log((LOG_INFO, "%s: could not run DAD "
1422 		    "because the interface was down or not running.\n",
1423 		    if_name(ifa->ifa_ifp)));
1424 		goto err;
1425 	}
1426 
1427 	/* Need more checks? */
1428 	if (dp->dad_ns_ocount < dp->dad_count) {
1429 		/*
1430 		 * We have more NS to go.  Send NS packet for DAD.
1431 		 */
1432 		nd6_dad_starttimer(dp,
1433 		    (long)ifa->ifa_ifp->if_inet6->nd_retrans * hz / 1000);
1434 		nd6_dad_ns_output(dp);
1435 		goto done;
1436 	} else {
1437 		/*
1438 		 * We have transmitted sufficient number of DAD packets.
1439 		 * See what we've got.
1440 		 */
1441 		if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0) {
1442 			/* We've seen NS or NA, means DAD has failed. */
1443 			nd6_dad_duplicated(ifa, dp);
1444 		} else if (V_dad_enhanced != 0 &&
1445 		    dp->dad_ns_lcount > 0 &&
1446 		    dp->dad_ns_lcount > dp->dad_loopbackprobe) {
1447 			/*
1448 			 * Sec. 4.1 in RFC 7527 requires transmission of
1449 			 * additional probes until the loopback condition
1450 			 * becomes clear when a looped back probe is detected.
1451 			 */
1452 			log(LOG_ERR, "%s: a looped back NS message is "
1453 			    "detected during DAD for %s.  "
1454 			    "Another DAD probes are being sent.\n",
1455 			    if_name(ifa->ifa_ifp),
1456 			    ip6_sprintf(ip6buf, IFA_IN6(ifa)));
1457 			dp->dad_loopbackprobe = dp->dad_ns_lcount;
1458 			/*
1459 			 * Send an NS immediately and increase dad_count by
1460 			 * V_nd6_mmaxtries - 1.
1461 			 */
1462 			dp->dad_count =
1463 			    dp->dad_ns_ocount + V_nd6_mmaxtries - 1;
1464 			nd6_dad_starttimer(dp,
1465 			    (long)ifa->ifa_ifp->if_inet6->nd_retrans * hz / 1000);
1466 			nd6_dad_ns_output(dp);
1467 			goto done;
1468 		} else {
1469 			/*
1470 			 * We are done with DAD.  No NA came, no NS came.
1471 			 * No duplicate address found.  Check IFDISABLED flag
1472 			 * again in case that it is changed between the
1473 			 * beginning of this function and here.
1474 			 *
1475 			 * Reset DAD failures counter if using stable addresses.
1476 			 */
1477 			if ((ifp->if_inet6->nd_flags & ND6_IFF_IFDISABLED) == 0) {
1478 				ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
1479 				if ((ifp->if_inet6->nd_flags & ND6_IFF_STABLEADDR) && !(ia->ia6_flags & IN6_IFF_TEMPORARY))
1480 					atomic_store_int(&DAD_FAILURES(ifp), 0);
1481 				/*
1482 				 * RFC 9131 Section 6.1.2: The first advertisement
1483 				 * SHOULD be sent as soon as an address changes the
1484 				 * state from tentative to preferred.
1485 				 */
1486 				nd6_grand_start(ifa, ND6_QUEUE_FLAG_NEWGUA);
1487 			}
1488 
1489 			nd6log((LOG_DEBUG,
1490 			    "%s: DAD complete for %s - no duplicates found\n",
1491 			    if_name(ifa->ifa_ifp),
1492 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
1493 			if (dp->dad_ns_lcount > 0)
1494 				log(LOG_ERR, "%s: DAD completed while "
1495 				    "a looped back NS message is detected "
1496 				    "during DAD for %s.\n",
1497 				    if_name(ifa->ifa_ifp),
1498 				    ip6_sprintf(ip6buf, IFA_IN6(ifa)));
1499 		}
1500 	}
1501 err:
1502 	nd6_dad_del(dp);
1503 	DADQ_WUNLOCK();
1504 done:
1505 	NET_EPOCH_EXIT(et);
1506 	CURVNET_RESTORE();
1507 }
1508 
1509 static void
nd6_dad_duplicated(struct ifaddr * ifa,struct dadq * dp)1510 nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp)
1511 {
1512 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
1513 	struct ifnet *ifp;
1514 	char ip6buf[INET6_ADDRSTRLEN];
1515 
1516 	ifp = ifa->ifa_ifp;
1517 
1518 	log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: "
1519 	    "NS in/out/loopback=%d/%d/%d, NA in=%d\n",
1520 	    if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
1521 	    dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_ns_lcount,
1522 	    dp->dad_na_icount);
1523 
1524 	ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
1525 	ia->ia6_flags |= IN6_IFF_DUPLICATED;
1526 
1527 	log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
1528 	    if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr));
1529 
1530 	/*
1531 	 * For RFC 7217 stable addresses, increment failure counter here if we still have retries.
1532 	 * More addresses will be generated as long as retries are not exhausted.
1533 	 */
1534 	if ((ifp->if_inet6->nd_flags & ND6_IFF_STABLEADDR) && !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
1535 		u_int dad_failures = atomic_load_int(&DAD_FAILURES(ifp));
1536 
1537 		if (dad_failures <= V_ip6_stableaddr_maxretries) {
1538 			atomic_add_int(&DAD_FAILURES(ifp), 1);
1539 			/* if retries exhausted, output an informative error message */
1540 			if (dad_failures == V_ip6_stableaddr_maxretries)
1541 				log(LOG_ERR, "%s: manual intervention required, consider disabling \"stableaddr\" on the interface"
1542 				    " or checking hostuuid for uniqueness\n",
1543 				    if_name(ifp));
1544 		}
1545 	} else {
1546 		log(LOG_ERR, "%s: manual intervention required\n",
1547 		    if_name(ifp));
1548 	}
1549 
1550 	/*
1551 	 * If the address is a link-local address formed from an interface
1552 	 * identifier based on the hardware address which is supposed to be
1553 	 * uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP
1554 	 * operation on the interface SHOULD be disabled.
1555 	 * [RFC 4862, Section 5.4.5]
1556 	 */
1557 	if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) {
1558 		struct in6_addr in6;
1559 
1560 		/*
1561 		 * To avoid over-reaction, we only apply this logic when we are
1562 		 * very sure that hardware addresses are supposed to be unique.
1563 		 */
1564 		switch (ifp->if_type) {
1565 		case IFT_ETHER:
1566 		case IFT_ATM:
1567 		case IFT_IEEE1394:
1568 		case IFT_INFINIBAND:
1569 			in6 = ia->ia_addr.sin6_addr;
1570 			if (in6_get_hw_ifid(ifp, &in6) == 0 &&
1571 			    IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) {
1572 				ifp->if_inet6->nd_flags |= ND6_IFF_IFDISABLED;
1573 				log(LOG_ERR, "%s: possible hardware address "
1574 				    "duplication detected, disable IPv6\n",
1575 				    if_name(ifp));
1576 			}
1577 			break;
1578 		}
1579 	}
1580 }
1581 
1582 /*
1583  * Transmit a neighbour solicitation for the purpose of DAD.  Returns with the
1584  * DAD queue unlocked.
1585  */
1586 static void
nd6_dad_ns_output(struct dadq * dp)1587 nd6_dad_ns_output(struct dadq *dp)
1588 {
1589 	struct in6_ifaddr *ia = (struct in6_ifaddr *)dp->dad_ifa;
1590 	struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
1591 	int i;
1592 
1593 	DADQ_WLOCK_ASSERT();
1594 
1595 	dp->dad_ns_tcount++;
1596 	if ((ifp->if_flags & IFF_UP) == 0) {
1597 		DADQ_WUNLOCK();
1598 		return;
1599 	}
1600 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1601 		DADQ_WUNLOCK();
1602 		return;
1603 	}
1604 
1605 	dp->dad_ns_ocount++;
1606 	if (V_dad_enhanced != 0) {
1607 		for (i = 0; i < ND_OPT_NONCE_LEN32; i++)
1608 			dp->dad_nonce[i] = arc4random();
1609 		/*
1610 		 * XXXHRS: Note that in the case that
1611 		 * DupAddrDetectTransmits > 1, multiple NS messages with
1612 		 * different nonces can be looped back in an unexpected
1613 		 * order.  The current implementation recognizes only
1614 		 * the latest nonce on the sender side.  Practically it
1615 		 * should work well in almost all cases.
1616 		 */
1617 	}
1618 	DADQ_WUNLOCK();
1619 	nd6_ns_output(ifp, NULL, NULL, &ia->ia_addr.sin6_addr,
1620 	    (uint8_t *)&dp->dad_nonce[0]);
1621 }
1622 
1623 static void
nd6_dad_ns_input(struct ifaddr * ifa,struct nd_opt_nonce * ndopt_nonce)1624 nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce)
1625 {
1626 	struct dadq *dp;
1627 
1628 	if (ifa == NULL)
1629 		panic("ifa == NULL in nd6_dad_ns_input");
1630 
1631 	/* Ignore Nonce option when Enhanced DAD is disabled. */
1632 	if (V_dad_enhanced == 0)
1633 		ndopt_nonce = NULL;
1634 	DADQ_RLOCK();
1635 	dp = nd6_dad_find(ifa, ndopt_nonce);
1636 	if (dp != NULL)
1637 		dp->dad_ns_icount++;
1638 	DADQ_RUNLOCK();
1639 }
1640 
1641 static void
nd6_dad_na_input(struct ifaddr * ifa)1642 nd6_dad_na_input(struct ifaddr *ifa)
1643 {
1644 	struct dadq *dp;
1645 
1646 	if (ifa == NULL)
1647 		panic("ifa == NULL in nd6_dad_na_input");
1648 
1649 	DADQ_RLOCK();
1650 	dp = nd6_dad_find(ifa, NULL);
1651 	if (dp != NULL)
1652 		dp->dad_na_icount++;
1653 	DADQ_RUNLOCK();
1654 }
1655 
1656 static void
nd6_queue_rel(void * arg)1657 nd6_queue_rel(void *arg)
1658 {
1659 	struct nd_queue *ndq = arg;
1660 	struct ifaddr *ifa;
1661 
1662 	ifa = ndq->ndq_ifa;
1663 	IF_ADDR_WLOCK_ASSERT(ifa->ifa_ifp);
1664 
1665 	/* Remove ndq from the nd_queue list and free it */
1666 	TAILQ_REMOVE(&ifa->ifa_ifp->if_inet6->nd_queue, ndq, ndq_list);
1667 	IF_ADDR_WUNLOCK(ifa->ifa_ifp);
1668 
1669 	free(ndq, M_IP6NDP);
1670 	ifa_free(ifa);
1671 }
1672 
1673 static void
nd6_queue_timer(void * arg)1674 nd6_queue_timer(void *arg)
1675 {
1676 	struct nd_queue *ndq = arg;
1677 	struct ifaddr *ifa = ndq->ndq_ifa;
1678 	struct ifnet *ifp;
1679 	struct in6_addr daddr, taddr;
1680 	struct sockaddr_dl sdl;
1681 	struct epoch_tracker et;
1682 	int delay, tlladdr;
1683 	u_long flags;
1684 	bool proxy;
1685 
1686 	KASSERT(ifa != NULL, ("ND6 queue entry %p with no address", ndq));
1687 
1688 	ifp = ifa->ifa_ifp;
1689 	CURVNET_SET(ifp->if_vnet);
1690 	NET_EPOCH_ENTER(et);
1691 
1692 	daddr = ndq->ndq_daddr;
1693 	taddr = ndq->ndq_taddr;
1694 	tlladdr = ND6_NA_OPT_LLA;
1695 	flags = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0;
1696 	if ((ifp->if_inet6->nd_flags & ND6_IFF_ACCEPT_RTADV) != 0 && V_ip6_norbit_raif)
1697 		flags &= ~ND_NA_FLAG_ROUTER;
1698 
1699 	/*
1700 	 * RFC 9131 Section 6.1.2: If the address is preferred,
1701 	 * then the Override flag SHOULD NOT be set.
1702 	 */
1703 	if ((ndq->ndq_flags & ND6_QUEUE_FLAG_NEWGUA) != 0) {
1704 		/*
1705 		 * XXX: If the address is in the Optimistic state,
1706 		 * then the Override flag MUST NOT be set.
1707 		 * We don't support RFC 4429 yet.
1708 		 */
1709 		if ((ifp->if_inet6->nd_flags & ND6_IFF_PREFER_SOURCE) == 0)
1710 			flags |= ND_NA_FLAG_OVERRIDE;
1711 	}
1712 	/*
1713 	 * RFC 4861 Section 7.2.6: if link-layer address changed,
1714 	 * The Override flag MAY be set to either zero or one.
1715 	 */
1716 	if ((ndq->ndq_flags & ND6_QUEUE_FLAG_LLADDR) != 0)
1717 		flags |= ND_NA_FLAG_OVERRIDE;
1718 	/* anycast advertisement delay rule (RFC 4861 7.2.7, SHOULD) */
1719 	if ((ndq->ndq_flags & ND6_QUEUE_FLAG_ANYCAST) != 0)
1720 		flags |= ND_NA_FLAG_SOLICITED;
1721 	/* proxy advertisement delay rule (RFC 4861 7.2.8, SHOULD) */
1722 	proxy = false;
1723 	if ((ndq->ndq_flags & ND6_QUEUE_FLAG_PROXY) != 0) {
1724 		flags |= ND_NA_FLAG_SOLICITED;
1725 		sdl = ndq->ndq_sdl;
1726 		proxy = true;
1727 	}
1728 
1729 	/*
1730 	 * if it was GRAND, wait at least a RetransTimer
1731 	 * before removing from queue.
1732 	 */
1733 	if ((ndq->ndq_flags & ND6_QUEUE_GRAND_MASK) != 0) {
1734 		delay = ifp->if_inet6->nd_retrans * hz / 1000;
1735 		callout_reset(&ndq->ndq_callout, delay, nd6_queue_rel, ndq);
1736 		IF_ADDR_WUNLOCK(ifp);
1737 	} else
1738 		nd6_queue_rel(ndq);
1739 
1740 	if (__predict_true(in6_setscope(&daddr, ifp, NULL) == 0))
1741 		nd6_na_output_fib(ifp, &daddr, &taddr, flags, tlladdr,
1742 		    proxy ? (struct sockaddr *)&sdl : NULL, ifp->if_fib);
1743 
1744 	NET_EPOCH_EXIT(et);
1745 	CURVNET_RESTORE();
1746 }
1747 
1748 /*
1749  * Queue a delayed IPv6 Neighbor Advertisement.
1750  *
1751  * daddr: destination address (who the NA is sent to)
1752  * taddr: target address being advertised (used for proxy NAs)
1753  * sdl: link-layer address (used for proxy NAs)
1754  */
1755 static void
nd6_queue_add(struct ifaddr * ifa,struct in6_addr * daddr,struct in6_addr * taddr,struct sockaddr_dl * sdl,int delay,uint32_t flags)1756 nd6_queue_add(struct ifaddr *ifa, struct in6_addr *daddr,
1757     struct in6_addr *taddr, struct sockaddr_dl *sdl, int delay, uint32_t flags)
1758 {
1759 	struct nd_queue *ndq = NULL;
1760 	struct ifnet *ifp;
1761 	struct in6_ifextra *ext;
1762 	char ip6buf[INET6_ADDRSTRLEN];
1763 
1764 	NET_EPOCH_ASSERT();
1765 
1766 	ifp = ifa->ifa_ifp;
1767 	ext = ifp->if_inet6;
1768 	IF_ADDR_WLOCK(ifp);
1769 	/*
1770 	 * if request comes from GRAND, check whether another delayed
1771 	 * GRAND NA exists in the queue.
1772 	 * If it exists, cancel previous one and reuse its ndq.
1773 	 */
1774 	if ((flags & ND6_QUEUE_GRAND_MASK) != 0) {
1775 		TAILQ_FOREACH(ndq, &ext->nd_queue, ndq_list) {
1776 			if (ndq->ndq_ifa == ifa &&
1777 			    (flags & ND6_QUEUE_GRAND_MASK) != 0)
1778 				break;
1779 		}
1780 	}
1781 	if (ndq == NULL) {
1782 		ndq = malloc(sizeof(*ndq), M_IP6NDP, M_NOWAIT | M_ZERO);
1783 		if (ndq == NULL) {
1784 			log(LOG_ERR, "%s: memory allocation failed for %s(%s)\n",
1785 			    __func__, ip6_sprintf(ip6buf, IFA_IN6(ifa)),
1786 			    ifp ? if_name(ifp) : "???");
1787 			IF_ADDR_WUNLOCK(ifp);
1788 			return;
1789 		}
1790 
1791 		callout_init_mtx(&ndq->ndq_callout, &ifp->if_addr_lock,
1792 		    CALLOUT_TRYLOCK | CALLOUT_RETURNUNLOCKED);
1793 		ifa_ref(ifa);
1794 		ndq->ndq_ifa = ifa;
1795 		TAILQ_INSERT_TAIL(&ext->nd_queue, ndq, ndq_list);
1796 	}
1797 
1798 	memcpy(&ndq->ndq_daddr, daddr, sizeof(struct in6_addr));
1799 	/*
1800 	 * For proxy NAs, the target address (taddr) being advertised differs from
1801 	 * the interface address (ifa), so we must explicitly store both the proxy
1802 	 * target address and its link-layer address (sdl).
1803 	 * For non-proxy NAs, use the interface address (ifa) itself as the target.
1804 	 */
1805 	if ((flags & ND6_QUEUE_FLAG_PROXY) != 0) {
1806 		memcpy(&ndq->ndq_taddr, taddr, sizeof(struct in6_addr));
1807 		memcpy(&ndq->ndq_sdl, sdl, sizeof(struct sockaddr_dl));
1808 	} else
1809 		memcpy(&ndq->ndq_taddr, IFA_IN6(ifa), sizeof(struct in6_addr));
1810 	ndq->ndq_flags = flags;
1811 
1812 	nd6log((LOG_DEBUG, "%s: delay IPv6 NA for %s\n", if_name(ifp),
1813 	    ip6_sprintf(ip6buf, IFA_IN6(ifa))));
1814 	callout_reset(&ndq->ndq_callout, delay, nd6_queue_timer, ndq);
1815 	IF_ADDR_WUNLOCK(ifp);
1816 }
1817 
1818 /*
1819  * Start Gratuitous Neighbor Discovery (GRAND) for specified address.
1820  * Called after DAD completes and by interface link layer change event.
1821  */
1822 void
nd6_grand_start(struct ifaddr * ifa,uint32_t flags)1823 nd6_grand_start(struct ifaddr *ifa, uint32_t flags)
1824 {
1825 	struct nd_queue *ndq;
1826 	struct in6_ifextra *ext = ifa->ifa_ifp->if_inet6;
1827 	struct in6_addr daddr = IN6ADDR_ANY_INIT;
1828 	int delay, count = 0;
1829 
1830 	NET_EPOCH_ASSERT();
1831 	/* If we don't need GRAND, don't do it. */
1832 	if (V_ip6_grand_count == 0 ||
1833 	    ifa->ifa_carp != NULL)
1834 		return;
1835 
1836 	/* Check if new address is global */
1837 	if ((flags & ND6_QUEUE_FLAG_NEWGUA) != 0 &&
1838 	    in6_addrscope(IFA_IN6(ifa)) != IPV6_ADDR_SCOPE_GLOBAL)
1839 		return;
1840 
1841 	/*
1842 	 * RFC 9131 Section 6.1.2: These advertisements MUST be
1843 	 * separated by at least RetransTimer seconds.
1844 	 */
1845 	TAILQ_FOREACH(ndq, &ext->nd_queue, ndq_list) {
1846 		/*
1847 		 * RFC 9131 Section 6.1.2: a node SHOULD send
1848 		 * up to MAX_NEIGHBOR_ADVERTISEMENT Neighbor Advertisement messages.
1849 		 * Make sure we don't queue GRAND more than V_ip6_grand_count
1850 		 * per interface.
1851 		 * Since this limitation only applies to GRAND, don't
1852 		 * count non-GRAND ndq.
1853 		 */
1854 		if ((ndq->ndq_flags & ND6_QUEUE_GRAND_MASK) == 0)
1855 			continue;
1856 
1857 		count++;
1858 		if (count >= V_ip6_grand_count)
1859 			return;
1860 	}
1861 
1862 	/*
1863 	 * RFC 9131 Section 6.1.2: if new global address added,
1864 	 * use the all-routers multicast address.
1865 	 */
1866 	if ((flags & ND6_QUEUE_FLAG_NEWGUA) != 0)
1867 		daddr = in6addr_linklocal_allrouters;
1868 
1869 	/*
1870 	 * RFC 4861 Section 7.2.6: if link-layer address changed,
1871 	 * use the all-nodes multicast address.
1872 	 */
1873 	if ((flags & ND6_QUEUE_FLAG_LLADDR) != 0)
1874 		daddr = in6addr_linklocal_allnodes;
1875 
1876 	delay = ext->nd_retrans * hz / 1000;
1877 	nd6_queue_add(ifa, &daddr, NULL, NULL, count * delay, flags);
1878 }
1879 
1880 /*
1881  * drain nd6 queue. used for address removals.
1882  */
1883 void
nd6_queue_stop(struct ifaddr * ifa)1884 nd6_queue_stop(struct ifaddr *ifa)
1885 {
1886 	struct nd_queue *ndq, *dndq;
1887 	struct ifnet *ifp;
1888 
1889 	ifp = ifa->ifa_ifp;
1890 	IF_ADDR_WLOCK(ifp);
1891 	TAILQ_FOREACH_SAFE(ndq, &ifp->if_inet6->nd_queue, ndq_list, dndq) {
1892 		if (ndq->ndq_ifa != ifa)
1893 			continue;
1894 
1895 		callout_stop(&ndq->ndq_callout);
1896 
1897 		/* Remove ndq from the nd_queue list and free it */
1898 		TAILQ_REMOVE(&ifa->ifa_ifp->if_inet6->nd_queue, ndq, ndq_list);
1899 		free(ndq, M_IP6NDP);
1900 		ifa_free(ifa);
1901 	}
1902 	IF_ADDR_WUNLOCK(ifp);
1903 }
1904