1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2007-2009 Bruce Simpson.
5 * Copyright (c) 2005 Robert N. M. Watson.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote
17 * products derived from this software without specific prior written
18 * permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * IPv4 multicast socket, group, and socket option processing module.
35 */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/protosw.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/protosw.h>
47 #include <sys/sysctl.h>
48 #include <sys/ktr.h>
49 #include <sys/taskqueue.h>
50 #include <sys/tree.h>
51
52 #include <net/if.h>
53 #include <net/if_var.h>
54 #include <net/if_dl.h>
55 #include <net/route.h>
56 #include <net/route/nhop.h>
57 #include <net/vnet.h>
58
59 #include <net/ethernet.h>
60
61 #include <netinet/in.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/in_fib.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/in_var.h>
66 #include <net/if_private.h>
67 #include <netinet/ip_var.h>
68 #include <netinet/igmp_var.h>
69 #include <netinet/ip_mroute.h>
70
71 #ifndef KTR_IGMPV3
72 #define KTR_IGMPV3 KTR_INET
73 #endif
74
75 #ifndef __SOCKUNION_DECLARED
76 union sockunion {
77 struct sockaddr_storage ss;
78 struct sockaddr sa;
79 struct sockaddr_dl sdl;
80 struct sockaddr_in sin;
81 };
82 typedef union sockunion sockunion_t;
83 #define __SOCKUNION_DECLARED
84 #endif /* __SOCKUNION_DECLARED */
85
86 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
87 "IPv4 multicast PCB-layer source filter");
88 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
89 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
90 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
91 "IPv4 multicast IGMP-layer source filter");
92
93 /*
94 * Locking:
95 *
96 * - Lock order is: IN_MULTI_LOCK, INP_WLOCK, IN_MULTI_LIST_LOCK, IGMP_LOCK,
97 * IF_ADDR_LOCK.
98 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
99 * it can be taken by code in net/if.c also.
100 * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
101 *
102 * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly
103 * any need for in_multi itself to be virtualized -- it is bound to an ifp
104 * anyway no matter what happens.
105 */
106 struct mtx in_multi_list_mtx;
107 MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF);
108
109 struct mtx in_multi_free_mtx;
110 MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF);
111
112 struct sx in_multi_sx;
113 SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx");
114
115 /*
116 * Functions with non-static linkage defined in this file should be
117 * declared in in_var.h:
118 * imo_multi_filter()
119 * in_joingroup()
120 * in_joingroup_locked()
121 * in_leavegroup()
122 * in_leavegroup_locked()
123 * and ip_var.h:
124 * inp_freemoptions()
125 * inp_getmoptions()
126 * inp_setmoptions()
127 */
128 static void imf_commit(struct in_mfilter *);
129 static int imf_get_source(struct in_mfilter *imf,
130 const struct sockaddr_in *psin,
131 struct in_msource **);
132 static struct in_msource *
133 imf_graft(struct in_mfilter *, const uint8_t,
134 const struct sockaddr_in *);
135 static void imf_leave(struct in_mfilter *);
136 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *);
137 static void imf_purge(struct in_mfilter *);
138 static void imf_rollback(struct in_mfilter *);
139 static void imf_reap(struct in_mfilter *);
140 static struct in_mfilter *
141 imo_match_group(const struct ip_moptions *,
142 const struct ifnet *, const struct sockaddr *);
143 static struct in_msource *
144 imo_match_source(struct in_mfilter *, const struct sockaddr *);
145 static void ims_merge(struct ip_msource *ims,
146 const struct in_msource *lims, const int rollback);
147 static int in_getmulti(struct ifnet *, const struct in_addr *,
148 struct in_multi **);
149 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr,
150 const int noalloc, struct ip_msource **pims);
151 #ifdef KTR
152 static int inm_is_ifp_detached(const struct in_multi *);
153 #endif
154 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
155 static void inm_purge(struct in_multi *);
156 static void inm_reap(struct in_multi *);
157 static void inm_release(struct in_multi *);
158 static struct ip_moptions *
159 inp_findmoptions(struct inpcb *);
160 static int inp_get_source_filters(struct inpcb *, struct sockopt *);
161 static int inp_join_group(struct inpcb *, struct sockopt *);
162 static int inp_leave_group(struct inpcb *, struct sockopt *);
163 static int inp_block_unblock_source(struct inpcb *, struct sockopt *);
164 static int inp_set_multicast_if(struct inpcb *, struct sockopt *);
165 static int inp_set_source_filters(struct inpcb *, struct sockopt *);
166 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
167
168 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast,
169 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
170 "IPv4 multicast");
171
172 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
173 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
174 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0,
175 "Max source filters per group");
176
177 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
178 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
179 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0,
180 "Max source filters per socket");
181
182 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
183 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN,
184 &in_mcast_loop, 0, "Loopback multicast datagrams by default");
185
186 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
187 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
188 "Per-interface stack-wide source filters");
189
190 #ifdef KTR
191 /*
192 * Inline function which wraps assertions for a valid ifp.
193 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
194 * is detached.
195 */
196 static int __inline
inm_is_ifp_detached(const struct in_multi * inm)197 inm_is_ifp_detached(const struct in_multi *inm)
198 {
199 struct ifnet *ifp;
200
201 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
202 ifp = inm->inm_ifma->ifma_ifp;
203 if (ifp != NULL) {
204 /*
205 * Sanity check that netinet's notion of ifp is the
206 * same as net's.
207 */
208 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
209 }
210
211 return (ifp == NULL);
212 }
213 #endif
214
215 /*
216 * Interface detach can happen in a taskqueue thread context, so we must use a
217 * dedicated thread to avoid deadlocks when draining inm_release tasks.
218 */
219 TASKQUEUE_DEFINE_THREAD(inm_free);
220 static struct in_multi_head inm_free_list = SLIST_HEAD_INITIALIZER();
221 static void inm_release_task(void *arg __unused, int pending __unused);
222 static struct task inm_free_task = TASK_INITIALIZER(0, inm_release_task, NULL);
223
224 void
inm_release_wait(void * arg __unused)225 inm_release_wait(void *arg __unused)
226 {
227
228 /*
229 * Make sure all pending multicast addresses are freed before
230 * the VNET or network device is destroyed:
231 */
232 taskqueue_drain(taskqueue_inm_free, &inm_free_task);
233 }
234 #ifdef VIMAGE
235 /* XXX-BZ FIXME, see D24914. */
236 VNET_SYSUNINIT(inm_release_wait, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, inm_release_wait, NULL);
237 #endif
238
239 void
inm_release_list_deferred(struct in_multi_head * inmh)240 inm_release_list_deferred(struct in_multi_head *inmh)
241 {
242
243 if (SLIST_EMPTY(inmh))
244 return;
245 mtx_lock(&in_multi_free_mtx);
246 SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele);
247 mtx_unlock(&in_multi_free_mtx);
248 taskqueue_enqueue(taskqueue_inm_free, &inm_free_task);
249 }
250
251 void
inm_disconnect(struct in_multi * inm)252 inm_disconnect(struct in_multi *inm)
253 {
254 struct ifnet *ifp;
255 struct ifmultiaddr *ifma, *ll_ifma;
256
257 ifp = inm->inm_ifp;
258 IF_ADDR_WLOCK_ASSERT(ifp);
259 ifma = inm->inm_ifma;
260
261 if_ref(ifp);
262 if (ifma->ifma_flags & IFMA_F_ENQUEUED) {
263 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
264 ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
265 }
266 MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname);
267 if ((ll_ifma = ifma->ifma_llifma) != NULL) {
268 MPASS(ifma != ll_ifma);
269 ifma->ifma_llifma = NULL;
270 MPASS(ll_ifma->ifma_llifma == NULL);
271 MPASS(ll_ifma->ifma_ifp == ifp);
272 if (--ll_ifma->ifma_refcount == 0) {
273 if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
274 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
275 ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
276 }
277 MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname);
278 if_freemulti(ll_ifma);
279 }
280 }
281 }
282
283 void
inm_release_deferred(struct in_multi * inm)284 inm_release_deferred(struct in_multi *inm)
285 {
286 struct in_multi_head tmp;
287
288 IN_MULTI_LIST_LOCK_ASSERT();
289 MPASS(inm->inm_refcount > 0);
290 if (--inm->inm_refcount == 0) {
291 SLIST_INIT(&tmp);
292 inm_disconnect(inm);
293 inm->inm_ifma->ifma_protospec = NULL;
294 SLIST_INSERT_HEAD(&tmp, inm, inm_nrele);
295 inm_release_list_deferred(&tmp);
296 }
297 }
298
299 static void
inm_release_task(void * arg __unused,int pending __unused)300 inm_release_task(void *arg __unused, int pending __unused)
301 {
302 struct in_multi_head inm_free_tmp;
303 struct in_multi *inm, *tinm;
304
305 SLIST_INIT(&inm_free_tmp);
306 mtx_lock(&in_multi_free_mtx);
307 SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele);
308 mtx_unlock(&in_multi_free_mtx);
309 IN_MULTI_LOCK();
310 SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) {
311 SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele);
312 MPASS(inm);
313 inm_release(inm);
314 }
315 IN_MULTI_UNLOCK();
316 }
317
318 /*
319 * Initialize an in_mfilter structure to a known state at t0, t1
320 * with an empty source filter list.
321 */
322 static __inline void
imf_init(struct in_mfilter * imf,const int st0,const int st1)323 imf_init(struct in_mfilter *imf, const int st0, const int st1)
324 {
325 memset(imf, 0, sizeof(struct in_mfilter));
326 RB_INIT(&imf->imf_sources);
327 imf->imf_st[0] = st0;
328 imf->imf_st[1] = st1;
329 }
330
331 struct in_mfilter *
ip_mfilter_alloc(const int mflags,const int st0,const int st1)332 ip_mfilter_alloc(const int mflags, const int st0, const int st1)
333 {
334 struct in_mfilter *imf;
335
336 imf = malloc(sizeof(*imf), M_INMFILTER, mflags);
337 if (imf != NULL)
338 imf_init(imf, st0, st1);
339
340 return (imf);
341 }
342
343 void
ip_mfilter_free(struct in_mfilter * imf)344 ip_mfilter_free(struct in_mfilter *imf)
345 {
346
347 imf_purge(imf);
348 free(imf, M_INMFILTER);
349 }
350
351 /*
352 * Function for looking up an in_multi record for an IPv4 multicast address
353 * on a given interface. ifp must be valid. If no record found, return NULL.
354 * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held.
355 */
356 struct in_multi *
inm_lookup_locked(struct ifnet * ifp,const struct in_addr ina)357 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina)
358 {
359 struct ifmultiaddr *ifma;
360 struct in_multi *inm;
361
362 IN_MULTI_LIST_LOCK_ASSERT();
363 IF_ADDR_LOCK_ASSERT(ifp);
364
365 CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) {
366 inm = inm_ifmultiaddr_get_inm(ifma);
367 if (inm == NULL)
368 continue;
369 if (inm->inm_addr.s_addr == ina.s_addr)
370 return (inm);
371 }
372 return (NULL);
373 }
374
375 /*
376 * Wrapper for inm_lookup_locked().
377 * The IF_ADDR_LOCK will be taken on ifp and released on return.
378 */
379 struct in_multi *
inm_lookup(struct ifnet * ifp,const struct in_addr ina)380 inm_lookup(struct ifnet *ifp, const struct in_addr ina)
381 {
382 struct epoch_tracker et;
383 struct in_multi *inm;
384
385 IN_MULTI_LIST_LOCK_ASSERT();
386 NET_EPOCH_ENTER(et);
387
388 inm = inm_lookup_locked(ifp, ina);
389 NET_EPOCH_EXIT(et);
390
391 return (inm);
392 }
393
394 /*
395 * Find an IPv4 multicast group entry for this ip_moptions instance
396 * which matches the specified group, and optionally an interface.
397 * Return its index into the array, or -1 if not found.
398 */
399 static struct in_mfilter *
imo_match_group(const struct ip_moptions * imo,const struct ifnet * ifp,const struct sockaddr * group)400 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
401 const struct sockaddr *group)
402 {
403 const struct sockaddr_in *gsin;
404 struct in_mfilter *imf;
405 struct in_multi *inm;
406
407 gsin = (const struct sockaddr_in *)group;
408
409 IP_MFILTER_FOREACH(imf, &imo->imo_head) {
410 inm = imf->imf_inm;
411 if (inm == NULL)
412 continue;
413 if ((ifp == NULL || (inm->inm_ifp == ifp)) &&
414 in_hosteq(inm->inm_addr, gsin->sin_addr)) {
415 break;
416 }
417 }
418 return (imf);
419 }
420
421 /*
422 * Find an IPv4 multicast source entry for this imo which matches
423 * the given group index for this socket, and source address.
424 *
425 * NOTE: This does not check if the entry is in-mode, merely if
426 * it exists, which may not be the desired behaviour.
427 */
428 static struct in_msource *
imo_match_source(struct in_mfilter * imf,const struct sockaddr * src)429 imo_match_source(struct in_mfilter *imf, const struct sockaddr *src)
430 {
431 struct ip_msource find;
432 struct ip_msource *ims;
433 const sockunion_t *psa;
434
435 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
436
437 /* Source trees are keyed in host byte order. */
438 psa = (const sockunion_t *)src;
439 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
440 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
441
442 return ((struct in_msource *)ims);
443 }
444
445 /*
446 * Perform filtering for multicast datagrams on a socket by group and source.
447 *
448 * Returns 0 if a datagram should be allowed through, or various error codes
449 * if the socket was not a member of the group, or the source was muted, etc.
450 */
451 int
imo_multi_filter(const struct ip_moptions * imo,const struct ifnet * ifp,const struct sockaddr * group,const struct sockaddr * src)452 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
453 const struct sockaddr *group, const struct sockaddr *src)
454 {
455 struct in_mfilter *imf;
456 struct in_msource *ims;
457 int mode;
458
459 KASSERT(ifp != NULL, ("%s: null ifp", __func__));
460
461 imf = imo_match_group(imo, ifp, group);
462 if (imf == NULL)
463 return (MCAST_NOTGMEMBER);
464
465 /*
466 * Check if the source was included in an (S,G) join.
467 * Allow reception on exclusive memberships by default,
468 * reject reception on inclusive memberships by default.
469 * Exclude source only if an in-mode exclude filter exists.
470 * Include source only if an in-mode include filter exists.
471 * NOTE: We are comparing group state here at IGMP t1 (now)
472 * with socket-layer t0 (since last downcall).
473 */
474 mode = imf->imf_st[1];
475 ims = imo_match_source(imf, src);
476
477 if ((ims == NULL && mode == MCAST_INCLUDE) ||
478 (ims != NULL && ims->imsl_st[0] == MCAST_EXCLUDE))
479 return (MCAST_NOTSMEMBER);
480
481 return (MCAST_PASS);
482 }
483
484 /*
485 * Find and return a reference to an in_multi record for (ifp, group),
486 * and bump its reference count.
487 * If one does not exist, try to allocate it, and update link-layer multicast
488 * filters on ifp to listen for group.
489 * Assumes the IN_MULTI lock is held across the call.
490 * Return 0 if successful, otherwise return an appropriate error code.
491 */
492 static int
in_getmulti(struct ifnet * ifp,const struct in_addr * group,struct in_multi ** pinm)493 in_getmulti(struct ifnet *ifp, const struct in_addr *group,
494 struct in_multi **pinm)
495 {
496 struct sockaddr_in gsin;
497 struct ifmultiaddr *ifma;
498 struct in_ifinfo *ii;
499 struct in_multi *inm;
500 int error;
501
502 IN_MULTI_LOCK_ASSERT();
503
504 ii = (struct in_ifinfo *)ifp->if_inet;
505 IN_MULTI_LIST_LOCK();
506 inm = inm_lookup(ifp, *group);
507 if (inm != NULL) {
508 /*
509 * If we already joined this group, just bump the
510 * refcount and return it.
511 */
512 KASSERT(inm->inm_refcount >= 1,
513 ("%s: bad refcount %d", __func__, inm->inm_refcount));
514 inm_acquire_locked(inm);
515 *pinm = inm;
516 }
517 IN_MULTI_LIST_UNLOCK();
518 if (inm != NULL)
519 return (0);
520
521 memset(&gsin, 0, sizeof(gsin));
522 gsin.sin_family = AF_INET;
523 gsin.sin_len = sizeof(struct sockaddr_in);
524 gsin.sin_addr = *group;
525
526 /*
527 * Check if a link-layer group is already associated
528 * with this network-layer group on the given ifnet.
529 */
530 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
531 if (error != 0)
532 return (error);
533
534 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
535 IN_MULTI_LIST_LOCK();
536 IF_ADDR_WLOCK(ifp);
537
538 /*
539 * If something other than netinet is occupying the link-layer
540 * group, print a meaningful error message and back out of
541 * the allocation.
542 * Otherwise, bump the refcount on the existing network-layer
543 * group association and return it.
544 */
545 if (ifma->ifma_protospec != NULL) {
546 inm = (struct in_multi *)ifma->ifma_protospec;
547 #ifdef INVARIANTS
548 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
549 __func__));
550 KASSERT(ifma->ifma_addr->sa_family == AF_INET,
551 ("%s: ifma not AF_INET", __func__));
552 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
553 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
554 !in_hosteq(inm->inm_addr, *group)) {
555 char addrbuf[INET_ADDRSTRLEN];
556
557 panic("%s: ifma %p is inconsistent with %p (%s)",
558 __func__, ifma, inm, inet_ntoa_r(*group, addrbuf));
559 }
560 #endif
561 inm_acquire_locked(inm);
562 *pinm = inm;
563 goto out_locked;
564 }
565
566 IF_ADDR_WLOCK_ASSERT(ifp);
567
568 /*
569 * A new in_multi record is needed; allocate and initialize it.
570 * We DO NOT perform an IGMP join as the in_ layer may need to
571 * push an initial source list down to IGMP to support SSM.
572 *
573 * The initial source filter state is INCLUDE, {} as per the RFC.
574 */
575 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
576 if (inm == NULL) {
577 IF_ADDR_WUNLOCK(ifp);
578 IN_MULTI_LIST_UNLOCK();
579 if_delmulti_ifma(ifma);
580 return (ENOMEM);
581 }
582 inm->inm_addr = *group;
583 inm->inm_ifp = ifp;
584 inm->inm_igi = ii->ii_igmp;
585 inm->inm_ifma = ifma;
586 inm->inm_refcount = 1;
587 inm->inm_state = IGMP_NOT_MEMBER;
588 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
589 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
590 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
591 RB_INIT(&inm->inm_srcs);
592
593 ifma->ifma_protospec = inm;
594
595 *pinm = inm;
596 out_locked:
597 IF_ADDR_WUNLOCK(ifp);
598 IN_MULTI_LIST_UNLOCK();
599 return (0);
600 }
601
602 /*
603 * Drop a reference to an in_multi record.
604 *
605 * If the refcount drops to 0, free the in_multi record and
606 * delete the underlying link-layer membership.
607 */
608 static void
inm_release(struct in_multi * inm)609 inm_release(struct in_multi *inm)
610 {
611 struct ifmultiaddr *ifma;
612 struct ifnet *ifp;
613
614 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
615 MPASS(inm->inm_refcount == 0);
616 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
617
618 ifma = inm->inm_ifma;
619 ifp = inm->inm_ifp;
620
621 /* XXX this access is not covered by IF_ADDR_LOCK */
622 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
623 if (ifp != NULL) {
624 CURVNET_SET(ifp->if_vnet);
625 inm_purge(inm);
626 free(inm, M_IPMADDR);
627 if_delmulti_ifma_flags(ifma, 1);
628 CURVNET_RESTORE();
629 if_rele(ifp);
630 } else {
631 inm_purge(inm);
632 free(inm, M_IPMADDR);
633 if_delmulti_ifma_flags(ifma, 1);
634 }
635 }
636
637 /*
638 * Clear recorded source entries for a group.
639 * Used by the IGMP code. Caller must hold the IN_MULTI lock.
640 * FIXME: Should reap.
641 */
642 void
inm_clear_recorded(struct in_multi * inm)643 inm_clear_recorded(struct in_multi *inm)
644 {
645 struct ip_msource *ims;
646
647 IN_MULTI_LIST_LOCK_ASSERT();
648
649 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
650 if (ims->ims_stp) {
651 ims->ims_stp = 0;
652 --inm->inm_st[1].iss_rec;
653 }
654 }
655 KASSERT(inm->inm_st[1].iss_rec == 0,
656 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
657 }
658
659 /*
660 * Record a source as pending for a Source-Group IGMPv3 query.
661 * This lives here as it modifies the shared tree.
662 *
663 * inm is the group descriptor.
664 * naddr is the address of the source to record in network-byte order.
665 *
666 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
667 * lazy-allocate a source node in response to an SG query.
668 * Otherwise, no allocation is performed. This saves some memory
669 * with the trade-off that the source will not be reported to the
670 * router if joined in the window between the query response and
671 * the group actually being joined on the local host.
672 *
673 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
674 * This turns off the allocation of a recorded source entry if
675 * the group has not been joined.
676 *
677 * Return 0 if the source didn't exist or was already marked as recorded.
678 * Return 1 if the source was marked as recorded by this function.
679 * Return <0 if any error occurred (negated errno code).
680 */
681 int
inm_record_source(struct in_multi * inm,const in_addr_t naddr)682 inm_record_source(struct in_multi *inm, const in_addr_t naddr)
683 {
684 struct ip_msource find;
685 struct ip_msource *ims, *nims;
686
687 IN_MULTI_LIST_LOCK_ASSERT();
688
689 find.ims_haddr = ntohl(naddr);
690 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
691 if (ims && ims->ims_stp)
692 return (0);
693 if (ims == NULL) {
694 if (inm->inm_nsrc == in_mcast_maxgrpsrc)
695 return (-ENOSPC);
696 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
697 M_NOWAIT | M_ZERO);
698 if (nims == NULL)
699 return (-ENOMEM);
700 nims->ims_haddr = find.ims_haddr;
701 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
702 ++inm->inm_nsrc;
703 ims = nims;
704 }
705
706 /*
707 * Mark the source as recorded and update the recorded
708 * source count.
709 */
710 ++ims->ims_stp;
711 ++inm->inm_st[1].iss_rec;
712
713 return (1);
714 }
715
716 /*
717 * Return a pointer to an in_msource owned by an in_mfilter,
718 * given its source address.
719 * Lazy-allocate if needed. If this is a new entry its filter state is
720 * undefined at t0.
721 *
722 * imf is the filter set being modified.
723 * haddr is the source address in *host* byte-order.
724 *
725 * SMPng: May be called with locks held; malloc must not block.
726 */
727 static int
imf_get_source(struct in_mfilter * imf,const struct sockaddr_in * psin,struct in_msource ** plims)728 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
729 struct in_msource **plims)
730 {
731 struct ip_msource find;
732 struct ip_msource *ims, *nims;
733 struct in_msource *lims;
734 int error;
735
736 error = 0;
737 ims = NULL;
738 lims = NULL;
739
740 /* key is host byte order */
741 find.ims_haddr = ntohl(psin->sin_addr.s_addr);
742 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
743 lims = (struct in_msource *)ims;
744 if (lims == NULL) {
745 if (imf->imf_nsrc == in_mcast_maxsocksrc)
746 return (ENOSPC);
747 nims = malloc(sizeof(struct in_msource), M_INMFILTER,
748 M_NOWAIT | M_ZERO);
749 if (nims == NULL)
750 return (ENOMEM);
751 lims = (struct in_msource *)nims;
752 lims->ims_haddr = find.ims_haddr;
753 lims->imsl_st[0] = MCAST_UNDEFINED;
754 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
755 ++imf->imf_nsrc;
756 }
757
758 *plims = lims;
759
760 return (error);
761 }
762
763 /*
764 * Graft a source entry into an existing socket-layer filter set,
765 * maintaining any required invariants and checking allocations.
766 *
767 * The source is marked as being in the new filter mode at t1.
768 *
769 * Return the pointer to the new node, otherwise return NULL.
770 */
771 static struct in_msource *
imf_graft(struct in_mfilter * imf,const uint8_t st1,const struct sockaddr_in * psin)772 imf_graft(struct in_mfilter *imf, const uint8_t st1,
773 const struct sockaddr_in *psin)
774 {
775 struct ip_msource *nims;
776 struct in_msource *lims;
777
778 nims = malloc(sizeof(struct in_msource), M_INMFILTER,
779 M_NOWAIT | M_ZERO);
780 if (nims == NULL)
781 return (NULL);
782 lims = (struct in_msource *)nims;
783 lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
784 lims->imsl_st[0] = MCAST_UNDEFINED;
785 lims->imsl_st[1] = st1;
786 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
787 ++imf->imf_nsrc;
788
789 return (lims);
790 }
791
792 /*
793 * Prune a source entry from an existing socket-layer filter set,
794 * maintaining any required invariants and checking allocations.
795 *
796 * The source is marked as being left at t1, it is not freed.
797 *
798 * Return 0 if no error occurred, otherwise return an errno value.
799 */
800 static int
imf_prune(struct in_mfilter * imf,const struct sockaddr_in * psin)801 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
802 {
803 struct ip_msource find;
804 struct ip_msource *ims;
805 struct in_msource *lims;
806
807 /* key is host byte order */
808 find.ims_haddr = ntohl(psin->sin_addr.s_addr);
809 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
810 if (ims == NULL)
811 return (ENOENT);
812 lims = (struct in_msource *)ims;
813 lims->imsl_st[1] = MCAST_UNDEFINED;
814 return (0);
815 }
816
817 /*
818 * Revert socket-layer filter set deltas at t1 to t0 state.
819 */
820 static void
imf_rollback(struct in_mfilter * imf)821 imf_rollback(struct in_mfilter *imf)
822 {
823 struct ip_msource *ims, *tims;
824 struct in_msource *lims;
825
826 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
827 lims = (struct in_msource *)ims;
828 if (lims->imsl_st[0] == lims->imsl_st[1]) {
829 /* no change at t1 */
830 continue;
831 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
832 /* revert change to existing source at t1 */
833 lims->imsl_st[1] = lims->imsl_st[0];
834 } else {
835 /* revert source added t1 */
836 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
837 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
838 free(ims, M_INMFILTER);
839 imf->imf_nsrc--;
840 }
841 }
842 imf->imf_st[1] = imf->imf_st[0];
843 }
844
845 /*
846 * Mark socket-layer filter set as INCLUDE {} at t1.
847 */
848 static void
imf_leave(struct in_mfilter * imf)849 imf_leave(struct in_mfilter *imf)
850 {
851 struct ip_msource *ims;
852 struct in_msource *lims;
853
854 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
855 lims = (struct in_msource *)ims;
856 lims->imsl_st[1] = MCAST_UNDEFINED;
857 }
858 imf->imf_st[1] = MCAST_INCLUDE;
859 }
860
861 /*
862 * Mark socket-layer filter set deltas as committed.
863 */
864 static void
imf_commit(struct in_mfilter * imf)865 imf_commit(struct in_mfilter *imf)
866 {
867 struct ip_msource *ims;
868 struct in_msource *lims;
869
870 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
871 lims = (struct in_msource *)ims;
872 lims->imsl_st[0] = lims->imsl_st[1];
873 }
874 imf->imf_st[0] = imf->imf_st[1];
875 }
876
877 /*
878 * Reap unreferenced sources from socket-layer filter set.
879 */
880 static void
imf_reap(struct in_mfilter * imf)881 imf_reap(struct in_mfilter *imf)
882 {
883 struct ip_msource *ims, *tims;
884 struct in_msource *lims;
885
886 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
887 lims = (struct in_msource *)ims;
888 if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
889 (lims->imsl_st[1] == MCAST_UNDEFINED)) {
890 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
891 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
892 free(ims, M_INMFILTER);
893 imf->imf_nsrc--;
894 }
895 }
896 }
897
898 /*
899 * Purge socket-layer filter set.
900 */
901 static void
imf_purge(struct in_mfilter * imf)902 imf_purge(struct in_mfilter *imf)
903 {
904 struct ip_msource *ims, *tims;
905
906 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
907 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
908 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
909 free(ims, M_INMFILTER);
910 imf->imf_nsrc--;
911 }
912 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
913 KASSERT(RB_EMPTY(&imf->imf_sources),
914 ("%s: imf_sources not empty", __func__));
915 }
916
917 /*
918 * Look up a source filter entry for a multicast group.
919 *
920 * inm is the group descriptor to work with.
921 * haddr is the host-byte-order IPv4 address to look up.
922 * noalloc may be non-zero to suppress allocation of sources.
923 * *pims will be set to the address of the retrieved or allocated source.
924 *
925 * SMPng: NOTE: may be called with locks held.
926 * Return 0 if successful, otherwise return a non-zero error code.
927 */
928 static int
inm_get_source(struct in_multi * inm,const in_addr_t haddr,const int noalloc,struct ip_msource ** pims)929 inm_get_source(struct in_multi *inm, const in_addr_t haddr,
930 const int noalloc, struct ip_msource **pims)
931 {
932 struct ip_msource find;
933 struct ip_msource *ims, *nims;
934
935 find.ims_haddr = haddr;
936 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
937 if (ims == NULL && !noalloc) {
938 if (inm->inm_nsrc == in_mcast_maxgrpsrc)
939 return (ENOSPC);
940 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
941 M_NOWAIT | M_ZERO);
942 if (nims == NULL)
943 return (ENOMEM);
944 nims->ims_haddr = haddr;
945 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
946 ++inm->inm_nsrc;
947 ims = nims;
948 #ifdef KTR
949 CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__,
950 haddr, ims);
951 #endif
952 }
953
954 *pims = ims;
955 return (0);
956 }
957
958 /*
959 * Merge socket-layer source into IGMP-layer source.
960 * If rollback is non-zero, perform the inverse of the merge.
961 */
962 static void
ims_merge(struct ip_msource * ims,const struct in_msource * lims,const int rollback)963 ims_merge(struct ip_msource *ims, const struct in_msource *lims,
964 const int rollback)
965 {
966 int n = rollback ? -1 : 1;
967
968 if (lims->imsl_st[0] == MCAST_EXCLUDE) {
969 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x",
970 __func__, n, ims->ims_haddr);
971 ims->ims_st[1].ex -= n;
972 } else if (lims->imsl_st[0] == MCAST_INCLUDE) {
973 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x",
974 __func__, n, ims->ims_haddr);
975 ims->ims_st[1].in -= n;
976 }
977
978 if (lims->imsl_st[1] == MCAST_EXCLUDE) {
979 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x",
980 __func__, n, ims->ims_haddr);
981 ims->ims_st[1].ex += n;
982 } else if (lims->imsl_st[1] == MCAST_INCLUDE) {
983 CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x",
984 __func__, n, ims->ims_haddr);
985 ims->ims_st[1].in += n;
986 }
987 }
988
989 /*
990 * Atomically update the global in_multi state, when a membership's
991 * filter list is being updated in any way.
992 *
993 * imf is the per-inpcb-membership group filter pointer.
994 * A fake imf may be passed for in-kernel consumers.
995 *
996 * XXX This is a candidate for a set-symmetric-difference style loop
997 * which would eliminate the repeated lookup from root of ims nodes,
998 * as they share the same key space.
999 *
1000 * If any error occurred this function will back out of refcounts
1001 * and return a non-zero value.
1002 */
1003 static int
inm_merge(struct in_multi * inm,struct in_mfilter * imf)1004 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1005 {
1006 struct ip_msource *ims, *nims;
1007 struct in_msource *lims;
1008 int schanged, error;
1009 int nsrc0, nsrc1;
1010
1011 schanged = 0;
1012 error = 0;
1013 nsrc1 = nsrc0 = 0;
1014 IN_MULTI_LIST_LOCK_ASSERT();
1015
1016 /*
1017 * Update the source filters first, as this may fail.
1018 * Maintain count of in-mode filters at t0, t1. These are
1019 * used to work out if we transition into ASM mode or not.
1020 * Maintain a count of source filters whose state was
1021 * actually modified by this operation.
1022 */
1023 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
1024 lims = (struct in_msource *)ims;
1025 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
1026 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
1027 if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
1028 error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
1029 ++schanged;
1030 if (error)
1031 break;
1032 ims_merge(nims, lims, 0);
1033 }
1034 if (error) {
1035 struct ip_msource *bims;
1036
1037 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
1038 lims = (struct in_msource *)ims;
1039 if (lims->imsl_st[0] == lims->imsl_st[1])
1040 continue;
1041 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
1042 if (bims == NULL)
1043 continue;
1044 ims_merge(bims, lims, 1);
1045 }
1046 goto out_reap;
1047 }
1048
1049 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
1050 __func__, nsrc0, nsrc1);
1051
1052 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
1053 if (imf->imf_st[0] == imf->imf_st[1] &&
1054 imf->imf_st[1] == MCAST_INCLUDE) {
1055 if (nsrc1 == 0) {
1056 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
1057 --inm->inm_st[1].iss_in;
1058 }
1059 }
1060
1061 /* Handle filter mode transition on socket. */
1062 if (imf->imf_st[0] != imf->imf_st[1]) {
1063 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
1064 __func__, imf->imf_st[0], imf->imf_st[1]);
1065
1066 if (imf->imf_st[0] == MCAST_EXCLUDE) {
1067 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
1068 --inm->inm_st[1].iss_ex;
1069 } else if (imf->imf_st[0] == MCAST_INCLUDE) {
1070 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
1071 --inm->inm_st[1].iss_in;
1072 }
1073
1074 if (imf->imf_st[1] == MCAST_EXCLUDE) {
1075 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
1076 inm->inm_st[1].iss_ex++;
1077 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
1078 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
1079 inm->inm_st[1].iss_in++;
1080 }
1081 }
1082
1083 /*
1084 * Track inm filter state in terms of listener counts.
1085 * If there are any exclusive listeners, stack-wide
1086 * membership is exclusive.
1087 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
1088 * If no listeners remain, state is undefined at t1,
1089 * and the IGMP lifecycle for this group should finish.
1090 */
1091 if (inm->inm_st[1].iss_ex > 0) {
1092 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
1093 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
1094 } else if (inm->inm_st[1].iss_in > 0) {
1095 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
1096 inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
1097 } else {
1098 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
1099 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
1100 }
1101
1102 /* Decrement ASM listener count on transition out of ASM mode. */
1103 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
1104 if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
1105 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
1106 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
1107 --inm->inm_st[1].iss_asm;
1108 }
1109 }
1110
1111 /* Increment ASM listener count on transition to ASM mode. */
1112 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
1113 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
1114 inm->inm_st[1].iss_asm++;
1115 }
1116
1117 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
1118 inm_print(inm);
1119
1120 out_reap:
1121 if (schanged > 0) {
1122 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
1123 inm_reap(inm);
1124 }
1125 return (error);
1126 }
1127
1128 /*
1129 * Mark an in_multi's filter set deltas as committed.
1130 * Called by IGMP after a state change has been enqueued.
1131 */
1132 void
inm_commit(struct in_multi * inm)1133 inm_commit(struct in_multi *inm)
1134 {
1135 struct ip_msource *ims;
1136
1137 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
1138 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
1139 inm_print(inm);
1140
1141 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
1142 ims->ims_st[0] = ims->ims_st[1];
1143 }
1144 inm->inm_st[0] = inm->inm_st[1];
1145 }
1146
1147 /*
1148 * Reap unreferenced nodes from an in_multi's filter set.
1149 */
1150 static void
inm_reap(struct in_multi * inm)1151 inm_reap(struct in_multi *inm)
1152 {
1153 struct ip_msource *ims, *tims;
1154
1155 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1156 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
1157 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
1158 ims->ims_stp != 0)
1159 continue;
1160 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1161 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1162 free(ims, M_IPMSOURCE);
1163 inm->inm_nsrc--;
1164 }
1165 }
1166
1167 /*
1168 * Purge all source nodes from an in_multi's filter set.
1169 */
1170 static void
inm_purge(struct in_multi * inm)1171 inm_purge(struct in_multi *inm)
1172 {
1173 struct ip_msource *ims, *tims;
1174
1175 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1176 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1177 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1178 free(ims, M_IPMSOURCE);
1179 inm->inm_nsrc--;
1180 }
1181 mbufq_drain(&inm->inm_scq);
1182 }
1183
1184 /*
1185 * Join a multicast group; unlocked entry point.
1186 *
1187 * SMPng: XXX: in_joingroup() is called from in_control(). Fortunately,
1188 * ifp is unlikely to have been detached at this point, so we assume
1189 * it's OK to recurse.
1190 */
1191 int
in_joingroup(struct ifnet * ifp,const struct in_addr * gina,struct in_mfilter * imf,struct in_multi ** pinm)1192 in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
1193 /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1194 {
1195 int error;
1196
1197 IN_MULTI_LOCK();
1198 error = in_joingroup_locked(ifp, gina, imf, pinm);
1199 IN_MULTI_UNLOCK();
1200
1201 return (error);
1202 }
1203
1204 /*
1205 * Join a multicast group; real entry point.
1206 *
1207 * Only preserves atomicity at inm level.
1208 * NOTE: imf argument cannot be const due to sys/tree.h limitations.
1209 *
1210 * If the IGMP downcall fails, the group is not joined, and an error
1211 * code is returned.
1212 */
1213 int
in_joingroup_locked(struct ifnet * ifp,const struct in_addr * gina,struct in_mfilter * imf,struct in_multi ** pinm)1214 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
1215 /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1216 {
1217 struct in_mfilter timf;
1218 struct in_multi *inm;
1219 int error;
1220
1221 IN_MULTI_LOCK_ASSERT();
1222 IN_MULTI_LIST_UNLOCK_ASSERT();
1223
1224 CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__,
1225 ntohl(gina->s_addr), ifp, ifp->if_xname);
1226
1227 error = 0;
1228 inm = NULL;
1229
1230 /*
1231 * If no imf was specified (i.e. kernel consumer),
1232 * fake one up and assume it is an ASM join.
1233 */
1234 if (imf == NULL) {
1235 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
1236 imf = &timf;
1237 }
1238
1239 error = in_getmulti(ifp, gina, &inm);
1240 if (error) {
1241 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
1242 return (error);
1243 }
1244 IN_MULTI_LIST_LOCK();
1245 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1246 error = inm_merge(inm, imf);
1247 if (error) {
1248 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1249 goto out_inm_release;
1250 }
1251
1252 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1253 error = igmp_change_state(inm);
1254 if (error) {
1255 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
1256 goto out_inm_release;
1257 }
1258
1259 out_inm_release:
1260 if (error) {
1261 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1262 IF_ADDR_WLOCK(ifp);
1263 inm_release_deferred(inm);
1264 IF_ADDR_WUNLOCK(ifp);
1265 } else {
1266 *pinm = inm;
1267 }
1268 IN_MULTI_LIST_UNLOCK();
1269
1270 return (error);
1271 }
1272
1273 /*
1274 * Leave a multicast group; unlocked entry point.
1275 */
1276 int
in_leavegroup(struct in_multi * inm,struct in_mfilter * imf)1277 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1278 {
1279 int error;
1280
1281 IN_MULTI_LOCK();
1282 error = in_leavegroup_locked(inm, imf);
1283 IN_MULTI_UNLOCK();
1284
1285 return (error);
1286 }
1287
1288 /*
1289 * Leave a multicast group; real entry point.
1290 * All source filters will be expunged.
1291 *
1292 * Only preserves atomicity at inm level.
1293 *
1294 * Holding the write lock for the INP which contains imf
1295 * is highly advisable. We can't assert for it as imf does not
1296 * contain a back-pointer to the owning inp.
1297 *
1298 * Note: This is not the same as inm_release(*) as this function also
1299 * makes a state change downcall into IGMP.
1300 */
1301 int
in_leavegroup_locked(struct in_multi * inm,struct in_mfilter * imf)1302 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1303 {
1304 struct in_mfilter timf;
1305 int error;
1306
1307 IN_MULTI_LOCK_ASSERT();
1308 IN_MULTI_LIST_UNLOCK_ASSERT();
1309
1310 error = 0;
1311
1312 CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__,
1313 inm, ntohl(inm->inm_addr.s_addr),
1314 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
1315 imf);
1316
1317 /*
1318 * If no imf was specified (i.e. kernel consumer),
1319 * fake one up and assume it is an ASM join.
1320 */
1321 if (imf == NULL) {
1322 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
1323 imf = &timf;
1324 }
1325
1326 /*
1327 * Begin state merge transaction at IGMP layer.
1328 *
1329 * As this particular invocation should not cause any memory
1330 * to be allocated, and there is no opportunity to roll back
1331 * the transaction, it MUST NOT fail.
1332 */
1333 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1334 IN_MULTI_LIST_LOCK();
1335 error = inm_merge(inm, imf);
1336 KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
1337
1338 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1339 CURVNET_SET(inm->inm_ifp->if_vnet);
1340 error = igmp_change_state(inm);
1341 IF_ADDR_WLOCK(inm->inm_ifp);
1342 inm_release_deferred(inm);
1343 IF_ADDR_WUNLOCK(inm->inm_ifp);
1344 IN_MULTI_LIST_UNLOCK();
1345 CURVNET_RESTORE();
1346 if (error)
1347 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1348
1349 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1350
1351 return (error);
1352 }
1353
1354 /*#ifndef BURN_BRIDGES*/
1355
1356 /*
1357 * Block or unblock an ASM multicast source on an inpcb.
1358 * This implements the delta-based API described in RFC 3678.
1359 *
1360 * The delta-based API applies only to exclusive-mode memberships.
1361 * An IGMP downcall will be performed.
1362 *
1363 * Return 0 if successful, otherwise return an appropriate error code.
1364 */
1365 static int
inp_block_unblock_source(struct inpcb * inp,struct sockopt * sopt)1366 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
1367 {
1368 struct epoch_tracker et;
1369 struct group_source_req gsr;
1370 sockunion_t *gsa, *ssa;
1371 struct ifnet *ifp;
1372 struct in_mfilter *imf;
1373 struct ip_moptions *imo;
1374 struct in_msource *ims;
1375 struct in_multi *inm;
1376 uint16_t fmode;
1377 int error, doblock;
1378
1379 ifp = NULL;
1380 error = 0;
1381 doblock = 0;
1382
1383 memset(&gsr, 0, sizeof(struct group_source_req));
1384 gsa = (sockunion_t *)&gsr.gsr_group;
1385 ssa = (sockunion_t *)&gsr.gsr_source;
1386
1387 switch (sopt->sopt_name) {
1388 case IP_BLOCK_SOURCE:
1389 case IP_UNBLOCK_SOURCE: {
1390 struct ip_mreq_source mreqs;
1391
1392 error = sooptcopyin(sopt, &mreqs,
1393 sizeof(struct ip_mreq_source),
1394 sizeof(struct ip_mreq_source));
1395 if (error)
1396 return (error);
1397
1398 gsa->sin.sin_family = AF_INET;
1399 gsa->sin.sin_len = sizeof(struct sockaddr_in);
1400 gsa->sin.sin_addr = mreqs.imr_multiaddr;
1401
1402 ssa->sin.sin_family = AF_INET;
1403 ssa->sin.sin_len = sizeof(struct sockaddr_in);
1404 ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1405
1406 if (!in_nullhost(mreqs.imr_interface)) {
1407 NET_EPOCH_ENTER(et);
1408 INADDR_TO_IFP(mreqs.imr_interface, ifp);
1409 /* XXXGL: ifref? */
1410 NET_EPOCH_EXIT(et);
1411 }
1412 if (sopt->sopt_name == IP_BLOCK_SOURCE)
1413 doblock = 1;
1414
1415 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
1416 __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
1417 break;
1418 }
1419
1420 case MCAST_BLOCK_SOURCE:
1421 case MCAST_UNBLOCK_SOURCE:
1422 error = sooptcopyin(sopt, &gsr,
1423 sizeof(struct group_source_req),
1424 sizeof(struct group_source_req));
1425 if (error)
1426 return (error);
1427
1428 if (gsa->sin.sin_family != AF_INET ||
1429 gsa->sin.sin_len != sizeof(struct sockaddr_in))
1430 return (EINVAL);
1431
1432 if (ssa->sin.sin_family != AF_INET ||
1433 ssa->sin.sin_len != sizeof(struct sockaddr_in))
1434 return (EINVAL);
1435
1436 NET_EPOCH_ENTER(et);
1437 ifp = ifnet_byindex(gsr.gsr_interface);
1438 NET_EPOCH_EXIT(et);
1439 if (ifp == NULL)
1440 return (EADDRNOTAVAIL);
1441
1442 if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
1443 doblock = 1;
1444 break;
1445
1446 default:
1447 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
1448 __func__, sopt->sopt_name);
1449 return (EOPNOTSUPP);
1450 break;
1451 }
1452
1453 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1454 return (EINVAL);
1455
1456 IN_MULTI_LOCK();
1457
1458 /*
1459 * Check if we are actually a member of this group.
1460 */
1461 imo = inp_findmoptions(inp);
1462 imf = imo_match_group(imo, ifp, &gsa->sa);
1463 if (imf == NULL) {
1464 error = EADDRNOTAVAIL;
1465 goto out_inp_locked;
1466 }
1467 inm = imf->imf_inm;
1468
1469 /*
1470 * Attempting to use the delta-based API on an
1471 * non exclusive-mode membership is an error.
1472 */
1473 fmode = imf->imf_st[0];
1474 if (fmode != MCAST_EXCLUDE) {
1475 error = EINVAL;
1476 goto out_inp_locked;
1477 }
1478
1479 /*
1480 * Deal with error cases up-front:
1481 * Asked to block, but already blocked; or
1482 * Asked to unblock, but nothing to unblock.
1483 * If adding a new block entry, allocate it.
1484 */
1485 ims = imo_match_source(imf, &ssa->sa);
1486 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
1487 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__,
1488 ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not ");
1489 error = EADDRNOTAVAIL;
1490 goto out_inp_locked;
1491 }
1492
1493 INP_WLOCK_ASSERT(inp);
1494
1495 /*
1496 * Begin state merge transaction at socket layer.
1497 */
1498 if (doblock) {
1499 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
1500 ims = imf_graft(imf, fmode, &ssa->sin);
1501 if (ims == NULL)
1502 error = ENOMEM;
1503 } else {
1504 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
1505 error = imf_prune(imf, &ssa->sin);
1506 }
1507
1508 if (error) {
1509 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
1510 goto out_imf_rollback;
1511 }
1512
1513 /*
1514 * Begin state merge transaction at IGMP layer.
1515 */
1516 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1517 IN_MULTI_LIST_LOCK();
1518 error = inm_merge(inm, imf);
1519 if (error) {
1520 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1521 IN_MULTI_LIST_UNLOCK();
1522 goto out_imf_rollback;
1523 }
1524
1525 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1526 error = igmp_change_state(inm);
1527 IN_MULTI_LIST_UNLOCK();
1528 if (error)
1529 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1530
1531 out_imf_rollback:
1532 if (error)
1533 imf_rollback(imf);
1534 else
1535 imf_commit(imf);
1536
1537 imf_reap(imf);
1538
1539 out_inp_locked:
1540 INP_WUNLOCK(inp);
1541 IN_MULTI_UNLOCK();
1542 return (error);
1543 }
1544
1545 /*
1546 * Given an inpcb, return its multicast options structure pointer. Accepts
1547 * an unlocked inpcb pointer, but will return it locked. May sleep.
1548 *
1549 * SMPng: NOTE: Returns with the INP write lock held.
1550 */
1551 static struct ip_moptions *
inp_findmoptions(struct inpcb * inp)1552 inp_findmoptions(struct inpcb *inp)
1553 {
1554 struct ip_moptions *imo;
1555
1556 INP_WLOCK(inp);
1557 if (inp->inp_moptions != NULL)
1558 return (inp->inp_moptions);
1559
1560 INP_WUNLOCK(inp);
1561
1562 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
1563
1564 imo->imo_multicast_ifp = NULL;
1565 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1566 imo->imo_multicast_vif = -1;
1567 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1568 imo->imo_multicast_loop = in_mcast_loop;
1569 STAILQ_INIT(&imo->imo_head);
1570
1571 INP_WLOCK(inp);
1572 if (inp->inp_moptions != NULL) {
1573 free(imo, M_IPMOPTS);
1574 return (inp->inp_moptions);
1575 }
1576 inp->inp_moptions = imo;
1577 return (imo);
1578 }
1579
1580 void
inp_freemoptions(struct ip_moptions * imo)1581 inp_freemoptions(struct ip_moptions *imo)
1582 {
1583 struct in_mfilter *imf;
1584 struct in_multi *inm;
1585 struct ifnet *ifp;
1586
1587 if (imo == NULL)
1588 return;
1589
1590 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
1591 ip_mfilter_remove(&imo->imo_head, imf);
1592
1593 imf_leave(imf);
1594 if ((inm = imf->imf_inm) != NULL) {
1595 if ((ifp = inm->inm_ifp) != NULL) {
1596 CURVNET_SET(ifp->if_vnet);
1597 (void)in_leavegroup(inm, imf);
1598 CURVNET_RESTORE();
1599 } else {
1600 (void)in_leavegroup(inm, imf);
1601 }
1602 }
1603 ip_mfilter_free(imf);
1604 }
1605 free(imo, M_IPMOPTS);
1606 }
1607
1608 /*
1609 * Atomically get source filters on a socket for an IPv4 multicast group.
1610 * Called with INP lock held; returns with lock released.
1611 */
1612 static int
inp_get_source_filters(struct inpcb * inp,struct sockopt * sopt)1613 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
1614 {
1615 struct epoch_tracker et;
1616 struct __msfilterreq msfr;
1617 sockunion_t *gsa;
1618 struct ifnet *ifp;
1619 struct ip_moptions *imo;
1620 struct in_mfilter *imf;
1621 struct ip_msource *ims;
1622 struct in_msource *lims;
1623 struct sockaddr_in *psin;
1624 struct sockaddr_storage *ptss;
1625 struct sockaddr_storage *tss;
1626 int error;
1627 size_t nsrcs, ncsrcs;
1628
1629 INP_WLOCK_ASSERT(inp);
1630
1631 imo = inp->inp_moptions;
1632 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
1633
1634 INP_WUNLOCK(inp);
1635
1636 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1637 sizeof(struct __msfilterreq));
1638 if (error)
1639 return (error);
1640
1641 NET_EPOCH_ENTER(et);
1642 ifp = ifnet_byindex(msfr.msfr_ifindex);
1643 NET_EPOCH_EXIT(et); /* XXXGL: unsafe ifnet pointer left */
1644 if (ifp == NULL)
1645 return (EINVAL);
1646
1647 INP_WLOCK(inp);
1648
1649 /*
1650 * Lookup group on the socket.
1651 */
1652 gsa = (sockunion_t *)&msfr.msfr_group;
1653 imf = imo_match_group(imo, ifp, &gsa->sa);
1654 if (imf == NULL) {
1655 INP_WUNLOCK(inp);
1656 return (EADDRNOTAVAIL);
1657 }
1658
1659 /*
1660 * Ignore memberships which are in limbo.
1661 */
1662 if (imf->imf_st[1] == MCAST_UNDEFINED) {
1663 INP_WUNLOCK(inp);
1664 return (EAGAIN);
1665 }
1666 msfr.msfr_fmode = imf->imf_st[1];
1667
1668 /*
1669 * If the user specified a buffer, copy out the source filter
1670 * entries to userland gracefully.
1671 * We only copy out the number of entries which userland
1672 * has asked for, but we always tell userland how big the
1673 * buffer really needs to be.
1674 */
1675 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
1676 msfr.msfr_nsrcs = in_mcast_maxsocksrc;
1677 tss = NULL;
1678 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
1679 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1680 M_TEMP, M_NOWAIT | M_ZERO);
1681 if (tss == NULL) {
1682 INP_WUNLOCK(inp);
1683 return (ENOBUFS);
1684 }
1685 }
1686
1687 /*
1688 * Count number of sources in-mode at t0.
1689 * If buffer space exists and remains, copy out source entries.
1690 */
1691 nsrcs = msfr.msfr_nsrcs;
1692 ncsrcs = 0;
1693 ptss = tss;
1694 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
1695 lims = (struct in_msource *)ims;
1696 if (lims->imsl_st[0] == MCAST_UNDEFINED ||
1697 lims->imsl_st[0] != imf->imf_st[0])
1698 continue;
1699 ++ncsrcs;
1700 if (tss != NULL && nsrcs > 0) {
1701 psin = (struct sockaddr_in *)ptss;
1702 psin->sin_family = AF_INET;
1703 psin->sin_len = sizeof(struct sockaddr_in);
1704 psin->sin_addr.s_addr = htonl(lims->ims_haddr);
1705 psin->sin_port = 0;
1706 ++ptss;
1707 --nsrcs;
1708 }
1709 }
1710
1711 INP_WUNLOCK(inp);
1712
1713 if (tss != NULL) {
1714 error = copyout(tss, msfr.msfr_srcs,
1715 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1716 free(tss, M_TEMP);
1717 if (error)
1718 return (error);
1719 }
1720
1721 msfr.msfr_nsrcs = ncsrcs;
1722 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
1723
1724 return (error);
1725 }
1726
1727 /*
1728 * Return the IP multicast options in response to user getsockopt().
1729 */
1730 int
inp_getmoptions(struct inpcb * inp,struct sockopt * sopt)1731 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1732 {
1733 struct ip_mreqn mreqn;
1734 struct ip_moptions *imo;
1735 struct ifnet *ifp;
1736 struct in_ifaddr *ia;
1737 int error, optval;
1738 u_char coptval;
1739
1740 INP_WLOCK(inp);
1741 imo = inp->inp_moptions;
1742 /* If socket is neither of type SOCK_RAW or SOCK_DGRAM reject it. */
1743 if (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1744 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM) {
1745 INP_WUNLOCK(inp);
1746 return (EOPNOTSUPP);
1747 }
1748
1749 error = 0;
1750 switch (sopt->sopt_name) {
1751 case IP_MULTICAST_VIF:
1752 if (imo != NULL)
1753 optval = imo->imo_multicast_vif;
1754 else
1755 optval = -1;
1756 INP_WUNLOCK(inp);
1757 error = sooptcopyout(sopt, &optval, sizeof(int));
1758 break;
1759
1760 case IP_MULTICAST_IF:
1761 memset(&mreqn, 0, sizeof(struct ip_mreqn));
1762 if (imo != NULL) {
1763 ifp = imo->imo_multicast_ifp;
1764 if (!in_nullhost(imo->imo_multicast_addr)) {
1765 mreqn.imr_address = imo->imo_multicast_addr;
1766 } else if (ifp != NULL) {
1767 struct epoch_tracker et;
1768
1769 mreqn.imr_ifindex = ifp->if_index;
1770 NET_EPOCH_ENTER(et);
1771 IFP_TO_IA(ifp, ia);
1772 if (ia != NULL)
1773 mreqn.imr_address =
1774 IA_SIN(ia)->sin_addr;
1775 NET_EPOCH_EXIT(et);
1776 }
1777 }
1778 INP_WUNLOCK(inp);
1779 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1780 error = sooptcopyout(sopt, &mreqn,
1781 sizeof(struct ip_mreqn));
1782 } else {
1783 error = sooptcopyout(sopt, &mreqn.imr_address,
1784 sizeof(struct in_addr));
1785 }
1786 break;
1787
1788 case IP_MULTICAST_TTL:
1789 if (imo == NULL)
1790 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1791 else
1792 optval = coptval = imo->imo_multicast_ttl;
1793 INP_WUNLOCK(inp);
1794 if (sopt->sopt_valsize == sizeof(u_char))
1795 error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1796 else
1797 error = sooptcopyout(sopt, &optval, sizeof(int));
1798 break;
1799
1800 case IP_MULTICAST_LOOP:
1801 if (imo == NULL)
1802 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1803 else
1804 optval = coptval = imo->imo_multicast_loop;
1805 INP_WUNLOCK(inp);
1806 if (sopt->sopt_valsize == sizeof(u_char))
1807 error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1808 else
1809 error = sooptcopyout(sopt, &optval, sizeof(int));
1810 break;
1811
1812 case IP_MSFILTER:
1813 if (imo == NULL) {
1814 error = EADDRNOTAVAIL;
1815 INP_WUNLOCK(inp);
1816 } else {
1817 error = inp_get_source_filters(inp, sopt);
1818 }
1819 break;
1820
1821 default:
1822 INP_WUNLOCK(inp);
1823 error = ENOPROTOOPT;
1824 break;
1825 }
1826
1827 INP_UNLOCK_ASSERT(inp);
1828
1829 return (error);
1830 }
1831
1832 /*
1833 * Look up the ifnet to join a multicast group membership via legacy
1834 * IP_ADD_MEMBERSHIP or via more modern MCAST_JOIN_GROUP.
1835 *
1836 * If the interface index was specified explicitly, just use it. If the
1837 * address was specified (legacy), try to find matching interface. Else
1838 * (index == 0 && no address) do a route lookup. If that fails for a modern
1839 * MCAST_JOIN_GROUP return failure, for legacy IP_ADD_MEMBERSHIP find first
1840 * multicast capable interface.
1841 */
1842 static struct ifnet *
inp_lookup_mcast_ifp(const struct inpcb * inp,const struct in_addr maddr,const struct in_addr * ina,const u_int index)1843 inp_lookup_mcast_ifp(const struct inpcb *inp, const struct in_addr maddr,
1844 const struct in_addr *ina, const u_int index)
1845 {
1846 struct ifnet *ifp;
1847 struct nhop_object *nh;
1848
1849 NET_EPOCH_ASSERT();
1850
1851 if (index != 0)
1852 return (ifnet_byindex_ref(index));
1853
1854 if (ina != NULL && !in_nullhost(*ina)) {
1855 INADDR_TO_IFP(*ina, ifp);
1856 if (ifp != NULL)
1857 if_ref(ifp);
1858 return (ifp);
1859 }
1860
1861 nh = fib4_lookup(inp->inp_inc.inc_fibnum, maddr, 0, NHR_NONE, 0);
1862 if (nh != NULL) {
1863 ifp = nh->nh_ifp;
1864 if_ref(ifp);
1865 return (ifp);
1866 }
1867
1868 if (ina != NULL) {
1869 struct in_ifaddr *ia;
1870
1871 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1872 if (!(ia->ia_ifp->if_flags & IFF_LOOPBACK) &&
1873 (ia->ia_ifp->if_flags & IFF_MULTICAST)) {
1874 ifp = ia->ia_ifp;
1875 if_ref(ifp);
1876 return (ifp);
1877 }
1878 }
1879 }
1880
1881 return (NULL);
1882 }
1883
1884 /*
1885 * Join an IPv4 multicast group, possibly with a source.
1886 */
1887 static int
inp_join_group(struct inpcb * inp,struct sockopt * sopt)1888 inp_join_group(struct inpcb *inp, struct sockopt *sopt)
1889 {
1890 struct group_source_req gsr;
1891 sockunion_t *gsa, *ssa;
1892 struct ifnet *ifp;
1893 struct in_mfilter *imf;
1894 struct ip_moptions *imo;
1895 struct in_multi *inm;
1896 struct in_msource *lims;
1897 struct epoch_tracker et;
1898 int error, is_new;
1899
1900 ifp = NULL;
1901 lims = NULL;
1902 error = 0;
1903
1904 memset(&gsr, 0, sizeof(struct group_source_req));
1905 gsa = (sockunion_t *)&gsr.gsr_group;
1906 gsa->ss.ss_family = AF_UNSPEC;
1907 ssa = (sockunion_t *)&gsr.gsr_source;
1908 ssa->ss.ss_family = AF_UNSPEC;
1909
1910 switch (sopt->sopt_name) {
1911 case IP_ADD_MEMBERSHIP: {
1912 struct ip_mreqn mreqn;
1913 bool mreq;
1914
1915 mreq = (sopt->sopt_valsize != sizeof(struct ip_mreqn));
1916
1917 error = sooptcopyin(sopt, &mreqn,
1918 mreq ? sizeof(struct ip_mreq) : sizeof(struct ip_mreqn),
1919 mreq ? sizeof(struct ip_mreq) : sizeof(struct ip_mreqn));
1920 if (error)
1921 return (error);
1922
1923 gsa->sin.sin_family = AF_INET;
1924 gsa->sin.sin_len = sizeof(struct sockaddr_in);
1925 gsa->sin.sin_addr = mreqn.imr_multiaddr;
1926 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1927 return (EINVAL);
1928
1929 NET_EPOCH_ENTER(et);
1930 ifp = inp_lookup_mcast_ifp(inp, mreqn.imr_multiaddr,
1931 mreq ? &mreqn.imr_address : NULL,
1932 mreq ? 0 : mreqn.imr_ifindex);
1933 NET_EPOCH_EXIT(et);
1934 break;
1935 }
1936 case IP_ADD_SOURCE_MEMBERSHIP: {
1937 struct ip_mreq_source mreqs;
1938
1939 error = sooptcopyin(sopt, &mreqs, sizeof(struct ip_mreq_source),
1940 sizeof(struct ip_mreq_source));
1941 if (error)
1942 return (error);
1943
1944 gsa->sin.sin_family = ssa->sin.sin_family = AF_INET;
1945 gsa->sin.sin_len = ssa->sin.sin_len =
1946 sizeof(struct sockaddr_in);
1947
1948 gsa->sin.sin_addr = mreqs.imr_multiaddr;
1949 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1950 return (EINVAL);
1951
1952 ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1953
1954 NET_EPOCH_ENTER(et);
1955 ifp = inp_lookup_mcast_ifp(inp, mreqs.imr_multiaddr,
1956 &mreqs.imr_interface, 0);
1957 NET_EPOCH_EXIT(et);
1958 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
1959 __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
1960 break;
1961 }
1962
1963 case MCAST_JOIN_GROUP:
1964 case MCAST_JOIN_SOURCE_GROUP:
1965 if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1966 error = sooptcopyin(sopt, &gsr,
1967 sizeof(struct group_req),
1968 sizeof(struct group_req));
1969 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1970 error = sooptcopyin(sopt, &gsr,
1971 sizeof(struct group_source_req),
1972 sizeof(struct group_source_req));
1973 }
1974 if (error)
1975 return (error);
1976
1977 if (gsa->sin.sin_family != AF_INET ||
1978 gsa->sin.sin_len != sizeof(struct sockaddr_in))
1979 return (EINVAL);
1980
1981 /*
1982 * Overwrite the port field if present, as the sockaddr
1983 * being copied in may be matched with a binary comparison.
1984 */
1985 gsa->sin.sin_port = 0;
1986 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1987 if (ssa->sin.sin_family != AF_INET ||
1988 ssa->sin.sin_len != sizeof(struct sockaddr_in))
1989 return (EINVAL);
1990 ssa->sin.sin_port = 0;
1991 }
1992
1993 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1994 return (EINVAL);
1995
1996 NET_EPOCH_ENTER(et);
1997 ifp = inp_lookup_mcast_ifp(inp, gsa->sin.sin_addr, NULL,
1998 gsr.gsr_interface);
1999 NET_EPOCH_EXIT(et);
2000 if (ifp == NULL)
2001 return (EADDRNOTAVAIL);
2002 break;
2003
2004 default:
2005 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
2006 __func__, sopt->sopt_name);
2007 return (EOPNOTSUPP);
2008 break;
2009 }
2010
2011 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2012 if (ifp != NULL)
2013 if_rele(ifp);
2014 return (EADDRNOTAVAIL);
2015 }
2016
2017 IN_MULTI_LOCK();
2018
2019 /*
2020 * Find the membership in the membership list.
2021 */
2022 imo = inp_findmoptions(inp);
2023 imf = imo_match_group(imo, ifp, &gsa->sa);
2024 if (imf == NULL) {
2025 is_new = 1;
2026 inm = NULL;
2027
2028 if (ip_mfilter_count(&imo->imo_head) >= IP_MAX_MEMBERSHIPS) {
2029 error = ENOMEM;
2030 goto out_inp_locked;
2031 }
2032 } else {
2033 is_new = 0;
2034 inm = imf->imf_inm;
2035
2036 if (ssa->ss.ss_family != AF_UNSPEC) {
2037 /*
2038 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
2039 * is an error. On an existing inclusive membership,
2040 * it just adds the source to the filter list.
2041 */
2042 if (imf->imf_st[1] != MCAST_INCLUDE) {
2043 error = EINVAL;
2044 goto out_inp_locked;
2045 }
2046 /*
2047 * Throw out duplicates.
2048 *
2049 * XXX FIXME: This makes a naive assumption that
2050 * even if entries exist for *ssa in this imf,
2051 * they will be rejected as dupes, even if they
2052 * are not valid in the current mode (in-mode).
2053 *
2054 * in_msource is transactioned just as for anything
2055 * else in SSM -- but note naive use of inm_graft()
2056 * below for allocating new filter entries.
2057 *
2058 * This is only an issue if someone mixes the
2059 * full-state SSM API with the delta-based API,
2060 * which is discouraged in the relevant RFCs.
2061 */
2062 lims = imo_match_source(imf, &ssa->sa);
2063 if (lims != NULL /*&&
2064 lims->imsl_st[1] == MCAST_INCLUDE*/) {
2065 error = EADDRNOTAVAIL;
2066 goto out_inp_locked;
2067 }
2068 } else {
2069 /*
2070 * MCAST_JOIN_GROUP on an existing exclusive
2071 * membership is an error; return EADDRINUSE
2072 * to preserve 4.4BSD API idempotence, and
2073 * avoid tedious detour to code below.
2074 * NOTE: This is bending RFC 3678 a bit.
2075 *
2076 * On an existing inclusive membership, this is also
2077 * an error; if you want to change filter mode,
2078 * you must use the userland API setsourcefilter().
2079 * XXX We don't reject this for imf in UNDEFINED
2080 * state at t1, because allocation of a filter
2081 * is atomic with allocation of a membership.
2082 */
2083 error = EINVAL;
2084 if (imf->imf_st[1] == MCAST_EXCLUDE)
2085 error = EADDRINUSE;
2086 goto out_inp_locked;
2087 }
2088 }
2089
2090 /*
2091 * Begin state merge transaction at socket layer.
2092 */
2093 INP_WLOCK_ASSERT(inp);
2094
2095 /*
2096 * Graft new source into filter list for this inpcb's
2097 * membership of the group. The in_multi may not have
2098 * been allocated yet if this is a new membership, however,
2099 * the in_mfilter slot will be allocated and must be initialized.
2100 *
2101 * Note: Grafting of exclusive mode filters doesn't happen
2102 * in this path.
2103 * XXX: Should check for non-NULL lims (node exists but may
2104 * not be in-mode) for interop with full-state API.
2105 */
2106 if (ssa->ss.ss_family != AF_UNSPEC) {
2107 /* Membership starts in IN mode */
2108 if (is_new) {
2109 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
2110 imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE);
2111 if (imf == NULL) {
2112 error = ENOMEM;
2113 goto out_inp_locked;
2114 }
2115 } else {
2116 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
2117 }
2118 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
2119 if (lims == NULL) {
2120 CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2121 __func__);
2122 error = ENOMEM;
2123 goto out_inp_locked;
2124 }
2125 } else {
2126 /* No address specified; Membership starts in EX mode */
2127 if (is_new) {
2128 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__);
2129 imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE);
2130 if (imf == NULL) {
2131 error = ENOMEM;
2132 goto out_inp_locked;
2133 }
2134 }
2135 }
2136
2137 /*
2138 * Begin state merge transaction at IGMP layer.
2139 */
2140 if (is_new) {
2141 in_pcbref(inp);
2142 INP_WUNLOCK(inp);
2143
2144 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
2145 &imf->imf_inm);
2146
2147 INP_WLOCK(inp);
2148 if (in_pcbrele_wlocked(inp)) {
2149 error = ENXIO;
2150 goto out_inp_unlocked;
2151 }
2152 if (error) {
2153 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed",
2154 __func__);
2155 goto out_inp_locked;
2156 }
2157 /*
2158 * NOTE: Refcount from in_joingroup_locked()
2159 * is protecting membership.
2160 */
2161 ip_mfilter_insert(&imo->imo_head, imf);
2162 } else {
2163 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2164 IN_MULTI_LIST_LOCK();
2165 error = inm_merge(inm, imf);
2166 if (error) {
2167 CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2168 __func__);
2169 IN_MULTI_LIST_UNLOCK();
2170 imf_rollback(imf);
2171 imf_reap(imf);
2172 goto out_inp_locked;
2173 }
2174 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2175 error = igmp_change_state(inm);
2176 IN_MULTI_LIST_UNLOCK();
2177 if (error) {
2178 CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2179 __func__);
2180 imf_rollback(imf);
2181 imf_reap(imf);
2182 goto out_inp_locked;
2183 }
2184 }
2185
2186 imf_commit(imf);
2187 imf = NULL;
2188
2189 out_inp_locked:
2190 INP_WUNLOCK(inp);
2191 out_inp_unlocked:
2192 IN_MULTI_UNLOCK();
2193
2194 if (is_new && imf) {
2195 if (imf->imf_inm != NULL) {
2196 IN_MULTI_LIST_LOCK();
2197 IF_ADDR_WLOCK(ifp);
2198 inm_release_deferred(imf->imf_inm);
2199 IF_ADDR_WUNLOCK(ifp);
2200 IN_MULTI_LIST_UNLOCK();
2201 }
2202 ip_mfilter_free(imf);
2203 }
2204 if_rele(ifp);
2205 return (error);
2206 }
2207
2208 /*
2209 * Leave an IPv4 multicast group on an inpcb, possibly with a source.
2210 */
2211 static int
inp_leave_group(struct inpcb * inp,struct sockopt * sopt)2212 inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
2213 {
2214 struct epoch_tracker et;
2215 struct group_source_req gsr;
2216 struct ip_mreq_source mreqs;
2217 sockunion_t *gsa, *ssa;
2218 struct ifnet *ifp;
2219 struct in_mfilter *imf;
2220 struct ip_moptions *imo;
2221 struct in_msource *ims;
2222 struct in_multi *inm;
2223 int error;
2224 bool is_final;
2225
2226 ifp = NULL;
2227 error = 0;
2228 is_final = true;
2229
2230 memset(&gsr, 0, sizeof(struct group_source_req));
2231 gsa = (sockunion_t *)&gsr.gsr_group;
2232 gsa->ss.ss_family = AF_UNSPEC;
2233 ssa = (sockunion_t *)&gsr.gsr_source;
2234 ssa->ss.ss_family = AF_UNSPEC;
2235
2236 switch (sopt->sopt_name) {
2237 case IP_DROP_MEMBERSHIP:
2238 case IP_DROP_SOURCE_MEMBERSHIP:
2239 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
2240 error = sooptcopyin(sopt, &mreqs,
2241 sizeof(struct ip_mreq),
2242 sizeof(struct ip_mreq));
2243 /*
2244 * Swap interface and sourceaddr arguments,
2245 * as ip_mreq and ip_mreq_source are laid
2246 * out differently.
2247 */
2248 mreqs.imr_interface = mreqs.imr_sourceaddr;
2249 mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
2250 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2251 error = sooptcopyin(sopt, &mreqs,
2252 sizeof(struct ip_mreq_source),
2253 sizeof(struct ip_mreq_source));
2254 }
2255 if (error)
2256 return (error);
2257
2258 gsa->sin.sin_family = AF_INET;
2259 gsa->sin.sin_len = sizeof(struct sockaddr_in);
2260 gsa->sin.sin_addr = mreqs.imr_multiaddr;
2261
2262 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2263 ssa->sin.sin_family = AF_INET;
2264 ssa->sin.sin_len = sizeof(struct sockaddr_in);
2265 ssa->sin.sin_addr = mreqs.imr_sourceaddr;
2266 }
2267
2268 /*
2269 * Attempt to look up hinted ifp from interface address.
2270 * Fallthrough with null ifp iff lookup fails, to
2271 * preserve 4.4BSD mcast API idempotence.
2272 * XXX NOTE WELL: The RFC 3678 API is preferred because
2273 * using an IPv4 address as a key is racy.
2274 */
2275 if (!in_nullhost(mreqs.imr_interface)) {
2276 NET_EPOCH_ENTER(et);
2277 INADDR_TO_IFP(mreqs.imr_interface, ifp);
2278 /* XXXGL ifref? */
2279 NET_EPOCH_EXIT(et);
2280 }
2281 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
2282 __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
2283
2284 break;
2285
2286 case MCAST_LEAVE_GROUP:
2287 case MCAST_LEAVE_SOURCE_GROUP:
2288 if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
2289 error = sooptcopyin(sopt, &gsr,
2290 sizeof(struct group_req),
2291 sizeof(struct group_req));
2292 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2293 error = sooptcopyin(sopt, &gsr,
2294 sizeof(struct group_source_req),
2295 sizeof(struct group_source_req));
2296 }
2297 if (error)
2298 return (error);
2299
2300 if (gsa->sin.sin_family != AF_INET ||
2301 gsa->sin.sin_len != sizeof(struct sockaddr_in))
2302 return (EINVAL);
2303
2304 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2305 if (ssa->sin.sin_family != AF_INET ||
2306 ssa->sin.sin_len != sizeof(struct sockaddr_in))
2307 return (EINVAL);
2308 }
2309
2310 NET_EPOCH_ENTER(et);
2311 ifp = ifnet_byindex(gsr.gsr_interface);
2312 NET_EPOCH_EXIT(et); /* XXXGL: unsafe ifp */
2313 if (ifp == NULL)
2314 return (EADDRNOTAVAIL);
2315 break;
2316
2317 default:
2318 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
2319 __func__, sopt->sopt_name);
2320 return (EOPNOTSUPP);
2321 break;
2322 }
2323
2324 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2325 return (EINVAL);
2326
2327 IN_MULTI_LOCK();
2328
2329 /*
2330 * Find the membership in the membership list.
2331 */
2332 imo = inp_findmoptions(inp);
2333 imf = imo_match_group(imo, ifp, &gsa->sa);
2334 if (imf == NULL) {
2335 error = EADDRNOTAVAIL;
2336 goto out_inp_locked;
2337 }
2338 inm = imf->imf_inm;
2339
2340 if (ssa->ss.ss_family != AF_UNSPEC)
2341 is_final = false;
2342
2343 /*
2344 * Begin state merge transaction at socket layer.
2345 */
2346 INP_WLOCK_ASSERT(inp);
2347
2348 /*
2349 * If we were instructed only to leave a given source, do so.
2350 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
2351 */
2352 if (is_final) {
2353 ip_mfilter_remove(&imo->imo_head, imf);
2354 imf_leave(imf);
2355
2356 /*
2357 * Give up the multicast address record to which
2358 * the membership points.
2359 */
2360 (void) in_leavegroup_locked(imf->imf_inm, imf);
2361 } else {
2362 if (imf->imf_st[0] == MCAST_EXCLUDE) {
2363 error = EADDRNOTAVAIL;
2364 goto out_inp_locked;
2365 }
2366 ims = imo_match_source(imf, &ssa->sa);
2367 if (ims == NULL) {
2368 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent",
2369 __func__, ntohl(ssa->sin.sin_addr.s_addr), "not ");
2370 error = EADDRNOTAVAIL;
2371 goto out_inp_locked;
2372 }
2373 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
2374 error = imf_prune(imf, &ssa->sin);
2375 if (error) {
2376 CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2377 __func__);
2378 goto out_inp_locked;
2379 }
2380 }
2381
2382 /*
2383 * Begin state merge transaction at IGMP layer.
2384 */
2385 if (!is_final) {
2386 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2387 IN_MULTI_LIST_LOCK();
2388 error = inm_merge(inm, imf);
2389 if (error) {
2390 CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2391 __func__);
2392 IN_MULTI_LIST_UNLOCK();
2393 imf_rollback(imf);
2394 imf_reap(imf);
2395 goto out_inp_locked;
2396 }
2397
2398 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2399 error = igmp_change_state(inm);
2400 IN_MULTI_LIST_UNLOCK();
2401 if (error) {
2402 CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2403 __func__);
2404 imf_rollback(imf);
2405 imf_reap(imf);
2406 goto out_inp_locked;
2407 }
2408 }
2409 imf_commit(imf);
2410 imf_reap(imf);
2411
2412 out_inp_locked:
2413 INP_WUNLOCK(inp);
2414
2415 if (is_final && imf)
2416 ip_mfilter_free(imf);
2417
2418 IN_MULTI_UNLOCK();
2419 return (error);
2420 }
2421
2422 /*
2423 * Select the interface for transmitting IPv4 multicast datagrams.
2424 *
2425 * Either an instance of struct in_addr or an instance of struct ip_mreqn
2426 * may be passed to this socket option. An address of INADDR_ANY or an
2427 * interface index of 0 is used to remove a previous selection.
2428 * When no interface is selected, one is chosen for every send.
2429 */
2430 static int
inp_set_multicast_if(struct inpcb * inp,struct sockopt * sopt)2431 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
2432 {
2433 struct in_addr addr;
2434 struct ip_mreqn mreqn;
2435 struct ifnet *ifp;
2436 struct ip_moptions *imo;
2437 int error;
2438
2439 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
2440 /*
2441 * An interface index was specified using the
2442 * Linux-derived ip_mreqn structure.
2443 */
2444 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
2445 sizeof(struct ip_mreqn));
2446 if (error)
2447 return (error);
2448
2449 if (mreqn.imr_ifindex < 0)
2450 return (EINVAL);
2451
2452 if (mreqn.imr_ifindex == 0) {
2453 ifp = NULL;
2454 } else {
2455 struct epoch_tracker et;
2456
2457 NET_EPOCH_ENTER(et);
2458 ifp = ifnet_byindex(mreqn.imr_ifindex);
2459 NET_EPOCH_EXIT(et); /* XXXGL: unsafe ifp */
2460 if (ifp == NULL)
2461 return (EADDRNOTAVAIL);
2462 }
2463 } else {
2464 /*
2465 * An interface was specified by IPv4 address.
2466 * This is the traditional BSD usage.
2467 */
2468 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
2469 sizeof(struct in_addr));
2470 if (error)
2471 return (error);
2472 if (in_nullhost(addr)) {
2473 ifp = NULL;
2474 } else {
2475 struct epoch_tracker et;
2476
2477 NET_EPOCH_ENTER(et);
2478 INADDR_TO_IFP(addr, ifp);
2479 /* XXXGL ifref? */
2480 NET_EPOCH_EXIT(et);
2481 if (ifp == NULL)
2482 return (EADDRNOTAVAIL);
2483 }
2484 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp,
2485 ntohl(addr.s_addr));
2486 }
2487
2488 /* Reject interfaces which do not support multicast. */
2489 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
2490 return (EOPNOTSUPP);
2491
2492 imo = inp_findmoptions(inp);
2493 imo->imo_multicast_ifp = ifp;
2494 imo->imo_multicast_addr.s_addr = INADDR_ANY;
2495 INP_WUNLOCK(inp);
2496
2497 return (0);
2498 }
2499
2500 /*
2501 * Atomically set source filters on a socket for an IPv4 multicast group.
2502 */
2503 static int
inp_set_source_filters(struct inpcb * inp,struct sockopt * sopt)2504 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
2505 {
2506 struct epoch_tracker et;
2507 struct __msfilterreq msfr;
2508 sockunion_t *gsa;
2509 struct ifnet *ifp;
2510 struct in_mfilter *imf;
2511 struct ip_moptions *imo;
2512 struct in_multi *inm;
2513 int error;
2514
2515 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
2516 sizeof(struct __msfilterreq));
2517 if (error)
2518 return (error);
2519
2520 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
2521 return (ENOBUFS);
2522
2523 if ((msfr.msfr_fmode != MCAST_EXCLUDE &&
2524 msfr.msfr_fmode != MCAST_INCLUDE))
2525 return (EINVAL);
2526
2527 if (msfr.msfr_group.ss_family != AF_INET ||
2528 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
2529 return (EINVAL);
2530
2531 gsa = (sockunion_t *)&msfr.msfr_group;
2532 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2533 return (EINVAL);
2534
2535 gsa->sin.sin_port = 0; /* ignore port */
2536
2537 NET_EPOCH_ENTER(et);
2538 ifp = ifnet_byindex(msfr.msfr_ifindex);
2539 NET_EPOCH_EXIT(et); /* XXXGL: unsafe ifp */
2540 if (ifp == NULL)
2541 return (EADDRNOTAVAIL);
2542
2543 IN_MULTI_LOCK();
2544
2545 /*
2546 * Take the INP write lock.
2547 * Check if this socket is a member of this group.
2548 */
2549 imo = inp_findmoptions(inp);
2550 imf = imo_match_group(imo, ifp, &gsa->sa);
2551 if (imf == NULL) {
2552 error = EADDRNOTAVAIL;
2553 goto out_inp_locked;
2554 }
2555 inm = imf->imf_inm;
2556
2557 /*
2558 * Begin state merge transaction at socket layer.
2559 */
2560 INP_WLOCK_ASSERT(inp);
2561
2562 imf->imf_st[1] = msfr.msfr_fmode;
2563
2564 /*
2565 * Apply any new source filters, if present.
2566 * Make a copy of the user-space source vector so
2567 * that we may copy them with a single copyin. This
2568 * allows us to deal with page faults up-front.
2569 */
2570 if (msfr.msfr_nsrcs > 0) {
2571 struct in_msource *lims;
2572 struct sockaddr_in *psin;
2573 struct sockaddr_storage *kss, *pkss;
2574 int i;
2575
2576 INP_WUNLOCK(inp);
2577
2578 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
2579 __func__, (unsigned long)msfr.msfr_nsrcs);
2580 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
2581 M_TEMP, M_WAITOK);
2582 error = copyin(msfr.msfr_srcs, kss,
2583 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
2584 if (error) {
2585 free(kss, M_TEMP);
2586 return (error);
2587 }
2588
2589 INP_WLOCK(inp);
2590
2591 /*
2592 * Mark all source filters as UNDEFINED at t1.
2593 * Restore new group filter mode, as imf_leave()
2594 * will set it to INCLUDE.
2595 */
2596 imf_leave(imf);
2597 imf->imf_st[1] = msfr.msfr_fmode;
2598
2599 /*
2600 * Update socket layer filters at t1, lazy-allocating
2601 * new entries. This saves a bunch of memory at the
2602 * cost of one RB_FIND() per source entry; duplicate
2603 * entries in the msfr_nsrcs vector are ignored.
2604 * If we encounter an error, rollback transaction.
2605 *
2606 * XXX This too could be replaced with a set-symmetric
2607 * difference like loop to avoid walking from root
2608 * every time, as the key space is common.
2609 */
2610 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
2611 psin = (struct sockaddr_in *)pkss;
2612 if (psin->sin_family != AF_INET) {
2613 error = EAFNOSUPPORT;
2614 break;
2615 }
2616 if (psin->sin_len != sizeof(struct sockaddr_in)) {
2617 error = EINVAL;
2618 break;
2619 }
2620 error = imf_get_source(imf, psin, &lims);
2621 if (error)
2622 break;
2623 lims->imsl_st[1] = imf->imf_st[1];
2624 }
2625 free(kss, M_TEMP);
2626 }
2627
2628 if (error)
2629 goto out_imf_rollback;
2630
2631 INP_WLOCK_ASSERT(inp);
2632
2633 /*
2634 * Begin state merge transaction at IGMP layer.
2635 */
2636 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2637 IN_MULTI_LIST_LOCK();
2638 error = inm_merge(inm, imf);
2639 if (error) {
2640 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
2641 IN_MULTI_LIST_UNLOCK();
2642 goto out_imf_rollback;
2643 }
2644
2645 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2646 error = igmp_change_state(inm);
2647 IN_MULTI_LIST_UNLOCK();
2648 if (error)
2649 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
2650
2651 out_imf_rollback:
2652 if (error)
2653 imf_rollback(imf);
2654 else
2655 imf_commit(imf);
2656
2657 imf_reap(imf);
2658
2659 out_inp_locked:
2660 INP_WUNLOCK(inp);
2661 IN_MULTI_UNLOCK();
2662 return (error);
2663 }
2664
2665 /*
2666 * Set the IP multicast options in response to user setsockopt().
2667 *
2668 * Many of the socket options handled in this function duplicate the
2669 * functionality of socket options in the regular unicast API. However,
2670 * it is not possible to merge the duplicate code, because the idempotence
2671 * of the IPv4 multicast part of the BSD Sockets API must be preserved;
2672 * the effects of these options must be treated as separate and distinct.
2673 *
2674 * SMPng: XXX: Unlocked read of inp_socket believed OK.
2675 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
2676 * is refactored to no longer use vifs.
2677 */
2678 int
inp_setmoptions(struct inpcb * inp,struct sockopt * sopt)2679 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
2680 {
2681 struct ip_moptions *imo;
2682 int error;
2683
2684 error = 0;
2685
2686 /* If socket is neither of type SOCK_RAW or SOCK_DGRAM, reject it. */
2687 if (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
2688 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)
2689 return (EOPNOTSUPP);
2690
2691 switch (sopt->sopt_name) {
2692 case IP_MULTICAST_VIF: {
2693 int vifi;
2694 /*
2695 * Select a multicast VIF for transmission.
2696 * Only useful if multicast forwarding is active.
2697 */
2698 if (legal_vif_num == NULL) {
2699 error = EOPNOTSUPP;
2700 break;
2701 }
2702 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
2703 if (error)
2704 break;
2705 if (!legal_vif_num(vifi) && (vifi != -1)) {
2706 error = EINVAL;
2707 break;
2708 }
2709 imo = inp_findmoptions(inp);
2710 imo->imo_multicast_vif = vifi;
2711 INP_WUNLOCK(inp);
2712 break;
2713 }
2714
2715 case IP_MULTICAST_IF:
2716 error = inp_set_multicast_if(inp, sopt);
2717 break;
2718
2719 case IP_MULTICAST_TTL: {
2720 u_char ttl;
2721
2722 /*
2723 * Set the IP time-to-live for outgoing multicast packets.
2724 * The original multicast API required a char argument,
2725 * which is inconsistent with the rest of the socket API.
2726 * We allow either a char or an int.
2727 */
2728 if (sopt->sopt_valsize == sizeof(u_char)) {
2729 error = sooptcopyin(sopt, &ttl, sizeof(u_char),
2730 sizeof(u_char));
2731 if (error)
2732 break;
2733 } else {
2734 u_int ittl;
2735
2736 error = sooptcopyin(sopt, &ittl, sizeof(u_int),
2737 sizeof(u_int));
2738 if (error)
2739 break;
2740 if (ittl > 255) {
2741 error = EINVAL;
2742 break;
2743 }
2744 ttl = (u_char)ittl;
2745 }
2746 imo = inp_findmoptions(inp);
2747 imo->imo_multicast_ttl = ttl;
2748 INP_WUNLOCK(inp);
2749 break;
2750 }
2751
2752 case IP_MULTICAST_LOOP: {
2753 u_char loop;
2754
2755 /*
2756 * Set the loopback flag for outgoing multicast packets.
2757 * Must be zero or one. The original multicast API required a
2758 * char argument, which is inconsistent with the rest
2759 * of the socket API. We allow either a char or an int.
2760 */
2761 if (sopt->sopt_valsize == sizeof(u_char)) {
2762 error = sooptcopyin(sopt, &loop, sizeof(u_char),
2763 sizeof(u_char));
2764 if (error)
2765 break;
2766 } else {
2767 u_int iloop;
2768
2769 error = sooptcopyin(sopt, &iloop, sizeof(u_int),
2770 sizeof(u_int));
2771 if (error)
2772 break;
2773 loop = (u_char)iloop;
2774 }
2775 imo = inp_findmoptions(inp);
2776 imo->imo_multicast_loop = !!loop;
2777 INP_WUNLOCK(inp);
2778 break;
2779 }
2780
2781 case IP_ADD_MEMBERSHIP:
2782 case IP_ADD_SOURCE_MEMBERSHIP:
2783 case MCAST_JOIN_GROUP:
2784 case MCAST_JOIN_SOURCE_GROUP:
2785 error = inp_join_group(inp, sopt);
2786 break;
2787
2788 case IP_DROP_MEMBERSHIP:
2789 case IP_DROP_SOURCE_MEMBERSHIP:
2790 case MCAST_LEAVE_GROUP:
2791 case MCAST_LEAVE_SOURCE_GROUP:
2792 error = inp_leave_group(inp, sopt);
2793 break;
2794
2795 case IP_BLOCK_SOURCE:
2796 case IP_UNBLOCK_SOURCE:
2797 case MCAST_BLOCK_SOURCE:
2798 case MCAST_UNBLOCK_SOURCE:
2799 error = inp_block_unblock_source(inp, sopt);
2800 break;
2801
2802 case IP_MSFILTER:
2803 error = inp_set_source_filters(inp, sopt);
2804 break;
2805
2806 default:
2807 error = EOPNOTSUPP;
2808 break;
2809 }
2810
2811 INP_UNLOCK_ASSERT(inp);
2812
2813 return (error);
2814 }
2815
2816 /*
2817 * Expose IGMP's multicast filter mode and source list(s) to userland,
2818 * keyed by (ifindex, group).
2819 * The filter mode is written out as a uint32_t, followed by
2820 * 0..n of struct in_addr.
2821 * For use by ifmcstat(8).
2822 * SMPng: NOTE: unlocked read of ifindex space.
2823 */
2824 static int
sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)2825 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
2826 {
2827 struct in_addr src, group;
2828 struct epoch_tracker et;
2829 struct ifnet *ifp;
2830 struct ifmultiaddr *ifma;
2831 struct in_multi *inm;
2832 struct ip_msource *ims;
2833 int *name;
2834 int retval;
2835 u_int namelen;
2836 uint32_t fmode, ifindex;
2837
2838 name = (int *)arg1;
2839 namelen = arg2;
2840
2841 if (req->newptr != NULL)
2842 return (EPERM);
2843
2844 if (namelen != 2)
2845 return (EINVAL);
2846
2847 group.s_addr = name[1];
2848 if (!IN_MULTICAST(ntohl(group.s_addr))) {
2849 CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast",
2850 __func__, ntohl(group.s_addr));
2851 return (EINVAL);
2852 }
2853
2854 ifindex = name[0];
2855 NET_EPOCH_ENTER(et);
2856 ifp = ifnet_byindex(ifindex);
2857 if (ifp == NULL) {
2858 NET_EPOCH_EXIT(et);
2859 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
2860 __func__, ifindex);
2861 return (ENOENT);
2862 }
2863
2864 retval = sysctl_wire_old_buffer(req,
2865 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
2866 if (retval) {
2867 NET_EPOCH_EXIT(et);
2868 return (retval);
2869 }
2870
2871 IN_MULTI_LIST_LOCK();
2872
2873 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2874 inm = inm_ifmultiaddr_get_inm(ifma);
2875 if (inm == NULL)
2876 continue;
2877 if (!in_hosteq(inm->inm_addr, group))
2878 continue;
2879 fmode = inm->inm_st[1].iss_fmode;
2880 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
2881 if (retval != 0)
2882 break;
2883 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
2884 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__,
2885 ims->ims_haddr);
2886 /*
2887 * Only copy-out sources which are in-mode.
2888 */
2889 if (fmode != ims_get_mode(inm, ims, 1)) {
2890 CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
2891 __func__);
2892 continue;
2893 }
2894 src.s_addr = htonl(ims->ims_haddr);
2895 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
2896 if (retval != 0)
2897 break;
2898 }
2899 }
2900
2901 IN_MULTI_LIST_UNLOCK();
2902 NET_EPOCH_EXIT(et);
2903
2904 return (retval);
2905 }
2906
2907 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3)
2908
2909 static const char *inm_modestrs[] = {
2910 [MCAST_UNDEFINED] = "un",
2911 [MCAST_INCLUDE] = "in",
2912 [MCAST_EXCLUDE] = "ex",
2913 };
2914 _Static_assert(MCAST_UNDEFINED == 0 &&
2915 MCAST_EXCLUDE + 1 == nitems(inm_modestrs),
2916 "inm_modestrs: no longer matches #defines");
2917
2918 static const char *
inm_mode_str(const int mode)2919 inm_mode_str(const int mode)
2920 {
2921
2922 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
2923 return (inm_modestrs[mode]);
2924 return ("??");
2925 }
2926
2927 static const char *inm_statestrs[] = {
2928 [IGMP_NOT_MEMBER] = "not-member",
2929 [IGMP_SILENT_MEMBER] = "silent",
2930 [IGMP_REPORTING_MEMBER] = "reporting",
2931 [IGMP_IDLE_MEMBER] = "idle",
2932 [IGMP_LAZY_MEMBER] = "lazy",
2933 [IGMP_SLEEPING_MEMBER] = "sleeping",
2934 [IGMP_AWAKENING_MEMBER] = "awakening",
2935 [IGMP_G_QUERY_PENDING_MEMBER] = "query-pending",
2936 [IGMP_SG_QUERY_PENDING_MEMBER] = "sg-query-pending",
2937 [IGMP_LEAVING_MEMBER] = "leaving",
2938 };
2939 _Static_assert(IGMP_NOT_MEMBER == 0 &&
2940 IGMP_LEAVING_MEMBER + 1 == nitems(inm_statestrs),
2941 "inm_statetrs: no longer matches #defines");
2942
2943 static const char *
inm_state_str(const int state)2944 inm_state_str(const int state)
2945 {
2946
2947 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
2948 return (inm_statestrs[state]);
2949 return ("??");
2950 }
2951
2952 /*
2953 * Dump an in_multi structure to the console.
2954 */
2955 void
inm_print(const struct in_multi * inm)2956 inm_print(const struct in_multi *inm)
2957 {
2958 int t;
2959 char addrbuf[INET_ADDRSTRLEN];
2960
2961 if ((ktr_mask & KTR_IGMPV3) == 0)
2962 return;
2963
2964 printf("%s: --- begin inm %p ---\n", __func__, inm);
2965 printf("addr %s ifp %p(%s) ifma %p\n",
2966 inet_ntoa_r(inm->inm_addr, addrbuf),
2967 inm->inm_ifp,
2968 inm->inm_ifp->if_xname,
2969 inm->inm_ifma);
2970 printf("timer %u state %s refcount %u scq.len %u\n",
2971 inm->inm_timer,
2972 inm_state_str(inm->inm_state),
2973 inm->inm_refcount,
2974 inm->inm_scq.mq_len);
2975 printf("igi %p nsrc %lu sctimer %u scrv %u\n",
2976 inm->inm_igi,
2977 inm->inm_nsrc,
2978 inm->inm_sctimer,
2979 inm->inm_scrv);
2980 for (t = 0; t < 2; t++) {
2981 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
2982 inm_mode_str(inm->inm_st[t].iss_fmode),
2983 inm->inm_st[t].iss_asm,
2984 inm->inm_st[t].iss_ex,
2985 inm->inm_st[t].iss_in,
2986 inm->inm_st[t].iss_rec);
2987 }
2988 printf("%s: --- end inm %p ---\n", __func__, inm);
2989 }
2990
2991 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */
2992
2993 void
inm_print(const struct in_multi * inm)2994 inm_print(const struct in_multi *inm)
2995 {
2996
2997 }
2998
2999 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */
3000
3001 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
3002