xref: /src/sys/net/if_geneve.c (revision f4e5b45b11628416f596b3aec2ccd3056800a171)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2025-2026 Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/lock.h>
35 #include <sys/hash.h>
36 #include <sys/malloc.h>
37 #include <sys/mbuf.h>
38 #include <sys/module.h>
39 #include <sys/refcount.h>
40 #include <sys/rmlock.h>
41 #include <sys/priv.h>
42 #include <sys/proc.h>
43 #include <sys/queue.h>
44 #include <sys/sdt.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sockio.h>
48 #include <sys/sx.h>
49 #include <sys/systm.h>
50 #include <sys/counter.h>
51 #include <sys/jail.h>
52 
53 #include <net/bpf.h>
54 #include <net/ethernet.h>
55 #include <net/if.h>
56 #include <net/if_var.h>
57 #include <net/if_private.h>
58 #include <net/if_arp.h>
59 #include <net/if_clone.h>
60 #include <net/if_media.h>
61 #include <net/if_types.h>
62 #include <net/netisr.h>
63 #include <net/route.h>
64 #include <net/route/nhop.h>
65 
66 #include <netinet/in.h>
67 #include <netinet/in_systm.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_pcb.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/ip6.h>
73 #include <netinet6/ip6_var.h>
74 #include <netinet6/in6_var.h>
75 #include <netinet6/scope6_var.h>
76 #include <netinet/udp.h>
77 #include <netinet/udp_var.h>
78 #include <netinet/in_fib.h>
79 #include <netinet6/in6_fib.h>
80 #include <netinet/ip_ecn.h>
81 #include <net/if_geneve.h>
82 
83 #include <netlink/netlink.h>
84 #include <netlink/netlink_ctl.h>
85 #include <netlink/netlink_var.h>
86 #include <netlink/netlink_route.h>
87 #include <netlink/route/route_var.h>
88 
89 #include <security/mac/mac_framework.h>
90 
91 SDT_PROVIDER_DEFINE(if_geneve);
92 
93 struct geneve_softc;
94 LIST_HEAD(geneve_softc_head, geneve_softc);
95 
96 static struct sx geneve_sx;
97 SX_SYSINIT(geneve, &geneve_sx, "GENEVE global start/stop lock");
98 
99 static unsigned geneve_osd_jail_slot;
100 
101 union sockaddr_union {
102 	struct sockaddr		sa;
103 	struct sockaddr_in	sin;
104 	struct sockaddr_in6	sin6;
105 };
106 
107 struct geneve_socket_mc_info {
108 	union sockaddr_union	gnvsomc_saddr;
109 	union sockaddr_union	gnvsomc_gaddr;
110 	int			gnvsomc_ifidx;
111 	int			gnvsomc_users;
112 };
113 
114 /* The maximum MTU of encapsulated geneve packet. */
115 #define GENEVE_MAX_L3MTU	(IP_MAXPACKET - \
116 	    60 /* Maximum IPv4 header len */ - \
117 	    sizeof(struct udphdr) - \
118 	    sizeof(struct genevehdr))
119 #define GENEVE_MAX_MTU		(GENEVE_MAX_L3MTU - \
120 	    ETHER_HDR_LEN - ETHER_VLAN_ENCAP_LEN)
121 
122 #define GENEVE_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU | IFCAP_NV)
123 
124 #define GENEVE_VERSION	0
125 #define GENEVE_VNI_MASK	(GENEVE_VNI_MAX - 1)
126 
127 #define GENEVE_HDR_VNI_SHIFT	8
128 
129 #define GENEVE_SO_MC_MAX_GROUPS		32
130 
131 #define GENEVE_SO_VNI_HASH_SHIFT	6
132 #define GENEVE_SO_VNI_HASH_SIZE		(1 << GENEVE_SO_VNI_HASH_SHIFT)
133 #define GENEVE_SO_VNI_HASH(_vni)	((_vni) % GENEVE_SO_VNI_HASH_SIZE)
134 
135 struct geneve_socket {
136 	struct socket			*gnvso_sock;
137 	struct rmlock			gnvso_lock;
138 	u_int				gnvso_refcnt;
139 	union sockaddr_union		gnvso_laddr;
140 	LIST_ENTRY(geneve_socket)	gnvso_entry;
141 	struct geneve_softc_head	gnvso_vni_hash[GENEVE_SO_VNI_HASH_SIZE];
142 	struct geneve_socket_mc_info	gnvso_mc[GENEVE_SO_MC_MAX_GROUPS];
143 };
144 
145 #define GENEVE_SO_RLOCK(_gnvso, _p)	rm_rlock(&(_gnvso)->gnvso_lock, (_p))
146 #define GENEVE_SO_RUNLOCK(_gnvso, _p)	rm_runlock(&(_gnvso)->gnvso_lock, (_p))
147 #define GENEVE_SO_WLOCK(_gnvso)		rm_wlock(&(_gnvso)->gnvso_lock)
148 #define GENEVE_SO_WUNLOCK(_gnvso)		rm_wunlock(&(_gnvso)->gnvso_lock)
149 #define GENEVE_SO_LOCK_ASSERT(_gnvso) \
150     rm_assert(&(_gnvso)->gnvso_lock, RA_LOCKED)
151 #define GENEVE_SO_LOCK_WASSERT(_gnvso) \
152     rm_assert(&(_gnvso)->gnvso_lock, RA_WLOCKED)
153 
154 #define GENEVE_SO_ACQUIRE(_gnvso)		refcount_acquire(&(_gnvso)->gnvso_refcnt)
155 #define GENEVE_SO_RELEASE(_gnvso)		refcount_release(&(_gnvso)->gnvso_refcnt)
156 
157 struct gnv_ftable_entry {
158 	LIST_ENTRY(gnv_ftable_entry)	gnvfe_hash;
159 	uint16_t			gnvfe_flags;
160 	uint8_t				gnvfe_mac[ETHER_ADDR_LEN];
161 	union sockaddr_union		gnvfe_raddr;
162 	time_t				gnvfe_expire;
163 };
164 
165 #define GENEVE_FE_FLAG_DYNAMIC		0x01
166 #define GENEVE_FE_FLAG_STATIC		0x02
167 
168 #define GENEVE_FE_IS_DYNAMIC(_fe) \
169     ((_fe)->gnvfe_flags & GENEVE_FE_FLAG_DYNAMIC)
170 
171 #define GENEVE_SC_FTABLE_SHIFT		9
172 #define GENEVE_SC_FTABLE_SIZE		(1 << GENEVE_SC_FTABLE_SHIFT)
173 #define GENEVE_SC_FTABLE_MASK		(GENEVE_SC_FTABLE_SIZE - 1)
174 #define GENEVE_SC_FTABLE_HASH(_sc, _mac)	\
175     (geneve_mac_hash(_sc, _mac) % GENEVE_SC_FTABLE_SIZE)
176 
177 LIST_HEAD(geneve_ftable_head, gnv_ftable_entry);
178 
179 struct geneve_statistics {
180 	uint32_t	ftable_nospace;
181 	uint32_t	ftable_lock_upgrade_failed;
182 	counter_u64_t	txcsum;
183 	counter_u64_t	tso;
184 	counter_u64_t	rxcsum;
185 };
186 
187 struct geneve_softc {
188 	LIST_ENTRY(geneve_softc)	gnv_entry;
189 
190 	struct ifnet			*gnv_ifp;
191 	uint32_t			gnv_flags;
192 #define GENEVE_FLAG_INIT		0x0001
193 #define GENEVE_FLAG_RUNNING		0x0002
194 #define GENEVE_FLAG_TEARDOWN		0x0004
195 #define GENEVE_FLAG_LEARN		0x0008
196 #define GENEVE_FLAG_USER_MTU		0x0010
197 #define GENEVE_FLAG_TTL_INHERIT		0x0020
198 #define GENEVE_FLAG_DSCP_INHERIT	0x0040
199 #define GENEVE_FLAG_COLLECT_METADATA	0x0080
200 
201 	int				gnv_reqcap;
202 	int				gnv_reqcap2;
203 	struct geneve_socket		*gnv_sock;
204 	union sockaddr_union		gnv_src_addr;
205 	union sockaddr_union		gnv_dst_addr;
206 	uint32_t			gnv_fibnum;
207 	uint32_t			gnv_vni;
208 	uint32_t			gnv_port_hash_key;
209 	uint16_t			gnv_proto;
210 	uint16_t			gnv_min_port;
211 	uint16_t			gnv_max_port;
212 	uint8_t				gnv_ttl;
213 	enum ifla_geneve_df		gnv_df;
214 
215 	/* Lookup table from MAC address to forwarding entry. */
216 	uint32_t			gnv_ftable_cnt;
217 	uint32_t			gnv_ftable_max;
218 	uint32_t			gnv_ftable_timeout;
219 	uint32_t			gnv_ftable_hash_key;
220 	struct geneve_ftable_head	*gnv_ftable;
221 
222 	/* Derived from gnv_dst_addr. */
223 	struct gnv_ftable_entry		gnv_default_fe;
224 
225 	struct ip_moptions		*gnv_im4o;
226 	struct ip6_moptions		*gnv_im6o;
227 
228 	struct rmlock			gnv_lock;
229 	volatile u_int			gnv_refcnt;
230 
231 	int				gnv_so_mc_index;
232 	struct geneve_statistics	gnv_stats;
233 	struct callout			gnv_callout;
234 	struct ether_addr		gnv_hwaddr;
235 	int				gnv_mc_ifindex;
236 	struct ifnet			*gnv_mc_ifp;
237 	struct ifmedia			gnv_media;
238 	char				gnv_mc_ifname[IFNAMSIZ];
239 
240 	/* For rate limiting errors on the tx fast path. */
241 	struct timeval			err_time;
242 	int				err_pps;
243 };
244 
245 #define GENEVE_RLOCK(_sc, _p)	rm_rlock(&(_sc)->gnv_lock, (_p))
246 #define GENEVE_RUNLOCK(_sc, _p)	rm_runlock(&(_sc)->gnv_lock, (_p))
247 #define GENEVE_WLOCK(_sc)	rm_wlock(&(_sc)->gnv_lock)
248 #define GENEVE_WUNLOCK(_sc)	rm_wunlock(&(_sc)->gnv_lock)
249 #define GENEVE_LOCK_WOWNED(_sc)	rm_wowned(&(_sc)->gnv_lock)
250 #define GENEVE_LOCK_ASSERT(_sc)	rm_assert(&(_sc)->gnv_lock, RA_LOCKED)
251 #define GENEVE_LOCK_WASSERT(_sc) rm_assert(&(_sc)->gnv_lock, RA_WLOCKED)
252 #define GENEVE_UNLOCK(_sc, _p) do {		\
253     if (GENEVE_LOCK_WOWNED(_sc))		\
254 	GENEVE_WUNLOCK(_sc);			\
255     else					\
256 	GENEVE_RUNLOCK(_sc, _p);		\
257 } while (0)
258 
259 #define GENEVE_ACQUIRE(_sc)	refcount_acquire(&(_sc)->gnv_refcnt)
260 #define GENEVE_RELEASE(_sc)	refcount_release(&(_sc)->gnv_refcnt)
261 
262 #define	SATOCONSTSIN(sa)	((const struct sockaddr_in *)(sa))
263 #define	SATOCONSTSIN6(sa)	((const struct sockaddr_in6 *)(sa))
264 
265 struct geneve_pkt_info {
266 	u_int		isr;
267 	uint16_t	ethertype;
268 	uint8_t		ecn;
269 	uint8_t		ttl;
270 };
271 
272 struct nl_parsed_geneve {
273 	/* essential */
274 	uint32_t			ifla_vni;
275 	uint16_t			ifla_proto;
276 	struct sockaddr			*ifla_local;
277 	struct sockaddr			*ifla_remote;
278 	uint16_t			ifla_local_port;
279 	uint16_t			ifla_remote_port;
280 
281 	/* optional */
282 	struct ifla_geneve_port_range	ifla_port_range;
283 	enum ifla_geneve_df		ifla_df;
284 	uint8_t				ifla_ttl;
285 	bool				ifla_ttl_inherit;
286 	bool				ifla_dscp_inherit;
287 	bool				ifla_external;
288 
289 	/* l2 specific */
290 	bool				ifla_ftable_learn;
291 	bool				ifla_ftable_flush;
292 	uint32_t			ifla_ftable_max;
293 	uint32_t			ifla_ftable_timeout;
294 	uint32_t			ifla_ftable_count;	/* read-only */
295 
296 	/* multicast specific */
297 	char				*ifla_mc_ifname;
298 	uint32_t			ifla_mc_ifindex;	/* read-only */
299 };
300 
301 /* The multicast-based learning parts of the code are taken from if_vxlan */
302 static int	geneve_ftable_addr_cmp(const uint8_t *, const uint8_t *);
303 static void	geneve_ftable_init(struct geneve_softc *);
304 static void	geneve_ftable_fini(struct geneve_softc *);
305 static void	geneve_ftable_flush(struct geneve_softc *, int);
306 static void	geneve_ftable_expire(struct geneve_softc *);
307 static int	geneve_ftable_update_locked(struct geneve_softc *,
308 		    const union sockaddr_union *, const uint8_t *,
309 		    struct rm_priotracker *);
310 static int	geneve_ftable_learn(struct geneve_softc *,
311 		    const struct sockaddr *, const uint8_t *);
312 
313 static struct gnv_ftable_entry *
314 		geneve_ftable_entry_alloc(void);
315 static void	geneve_ftable_entry_free(struct gnv_ftable_entry *);
316 static void	geneve_ftable_entry_init(struct geneve_softc *,
317 		    struct gnv_ftable_entry *, const uint8_t *,
318 		    const struct sockaddr *, uint32_t);
319 static void	geneve_ftable_entry_destroy(struct geneve_softc *,
320 		    struct gnv_ftable_entry *);
321 static int	geneve_ftable_entry_insert(struct geneve_softc *,
322 		    struct gnv_ftable_entry *);
323 static struct gnv_ftable_entry *
324 		geneve_ftable_entry_lookup(struct geneve_softc *,
325 		    const uint8_t *);
326 
327 static struct geneve_socket *
328 		geneve_socket_alloc(union sockaddr_union *laddr);
329 static void	geneve_socket_destroy(struct geneve_socket *);
330 static void	geneve_socket_release(struct geneve_socket *);
331 static struct geneve_socket *
332 		geneve_socket_lookup(union sockaddr_union *);
333 static void	geneve_socket_insert(struct geneve_socket *);
334 static int	geneve_socket_init(struct geneve_socket *, struct ifnet *);
335 static int	geneve_socket_bind(struct geneve_socket *, struct ifnet *);
336 static int	geneve_socket_create(struct ifnet *, int,
337 		    const union sockaddr_union *, struct geneve_socket **);
338 static int	geneve_socket_set_df(struct geneve_socket *, bool);
339 
340 static struct geneve_socket *
341 		geneve_socket_mc_lookup(const union sockaddr_union *);
342 static int	geneve_sockaddr_mc_info_match(
343 		    const struct geneve_socket_mc_info *,
344 		    const union sockaddr_union *,
345 		    const union sockaddr_union *, int);
346 static int	geneve_socket_mc_join_group(struct geneve_socket *,
347 		    const union sockaddr_union *, const union sockaddr_union *,
348 		    int *, union sockaddr_union *);
349 static int	geneve_socket_mc_leave_group(struct geneve_socket *,
350 		    const union sockaddr_union *,
351 		    const union sockaddr_union *, int);
352 static int	geneve_socket_mc_add_group(struct geneve_socket *,
353 		    const union sockaddr_union *,
354 		    const union sockaddr_union *, int, int *);
355 static void	geneve_socket_mc_release_group(struct geneve_socket *, int);
356 
357 static struct geneve_softc *
358 		geneve_socket_lookup_softc_locked(struct geneve_socket *,
359 		    uint32_t);
360 static struct geneve_softc *
361 		geneve_socket_lookup_softc(struct geneve_socket *, uint32_t);
362 static int	geneve_socket_insert_softc(struct geneve_socket *,
363 		    struct geneve_softc *);
364 static void	geneve_socket_remove_softc(struct geneve_socket *,
365 		    struct geneve_softc *);
366 
367 static struct ifnet *
368 		geneve_multicast_if_ref(struct geneve_softc *, uint32_t);
369 static void	geneve_free_multicast(struct geneve_softc *);
370 static int	geneve_setup_multicast_interface(struct geneve_softc *);
371 
372 static int	geneve_setup_multicast(struct geneve_softc *);
373 static int	geneve_setup_socket(struct geneve_softc *);
374 static void	geneve_setup_interface_hdrlen(struct geneve_softc *);
375 static int	geneve_valid_init_config(struct geneve_softc *);
376 static void	geneve_init_complete(struct geneve_softc *);
377 static void	geneve_init(void *);
378 static void	geneve_release(struct geneve_softc *);
379 static void	geneve_teardown_wait(struct geneve_softc *);
380 static void	geneve_teardown_locked(struct geneve_softc *);
381 static void	geneve_teardown(struct geneve_softc *);
382 static void	geneve_timer(void *);
383 
384 static int	geneve_flush_ftable(struct geneve_softc *, bool);
385 static uint16_t	geneve_get_local_port(struct geneve_softc *);
386 static uint16_t	geneve_get_remote_port(struct geneve_softc *);
387 
388 static int	geneve_set_vni_nl(struct geneve_softc *, struct nl_pstate *,
389 		    uint32_t);
390 static int	geneve_set_local_addr_nl(struct geneve_softc *, struct nl_pstate *,
391 		    struct sockaddr *);
392 static int	geneve_set_remote_addr_nl(struct geneve_softc *, struct nl_pstate *,
393 		    struct sockaddr *);
394 static int	geneve_set_local_port_nl(struct geneve_softc *, struct nl_pstate *,
395 		    uint16_t);
396 static int	geneve_set_remote_port_nl(struct geneve_softc *, struct nl_pstate *,
397 		    uint16_t);
398 static int	geneve_set_port_range_nl(struct geneve_softc *, struct nl_pstate *,
399 		    struct ifla_geneve_port_range);
400 static int	geneve_set_df_nl(struct geneve_softc *, struct nl_pstate *,
401 		    enum ifla_geneve_df);
402 static int	geneve_set_ttl_nl(struct geneve_softc *, struct nl_pstate *,
403 		    uint8_t);
404 static int	geneve_set_ttl_inherit_nl(struct geneve_softc *, struct nl_pstate *,
405 		    bool);
406 static int	geneve_set_dscp_inherit_nl(struct geneve_softc *, struct nl_pstate *,
407 		    bool);
408 static int	geneve_set_collect_metadata_nl(struct geneve_softc *,
409 		    struct nl_pstate *, bool);
410 static int	geneve_set_learn_nl(struct geneve_softc *, struct nl_pstate *,
411 		    bool);
412 static int	geneve_set_ftable_max_nl(struct geneve_softc *, struct nl_pstate *,
413 		    uint32_t);
414 static int	geneve_set_ftable_timeout_nl(struct geneve_softc *,
415 		    struct nl_pstate *, uint32_t);
416 static int	geneve_set_mc_if_nl(struct geneve_softc *, struct nl_pstate *,
417 		    char *);
418 static int	geneve_flush_ftable_nl(struct geneve_softc *, struct nl_pstate *,
419 		    bool);
420 static void	geneve_get_local_addr_nl(struct geneve_softc *, struct nl_writer *);
421 static void	geneve_get_remote_addr_nl(struct geneve_softc *, struct nl_writer *);
422 
423 static int	geneve_ioctl_ifflags(struct geneve_softc *);
424 static int	geneve_ioctl(struct ifnet *, u_long, caddr_t);
425 
426 static uint16_t geneve_pick_source_port(struct geneve_softc *, struct mbuf *);
427 static void	geneve_encap_header(struct geneve_softc *, struct mbuf *,
428 		    int, uint16_t, uint16_t, uint16_t);
429 static uint16_t	geneve_get_ethertype(struct mbuf *);
430 static int	geneve_inherit_l3_hdr(struct mbuf *, struct geneve_softc *,
431 		    uint16_t, uint8_t *, uint8_t *, u_short *);
432 #ifdef INET
433 static int	geneve_encap4(struct geneve_softc *,
434 		    const union sockaddr_union *, struct mbuf *);
435 #endif
436 #ifdef INET6
437 static int	geneve_encap6(struct geneve_softc *,
438 		    const union sockaddr_union *, struct mbuf *);
439 #endif
440 static int	geneve_transmit(struct ifnet *, struct mbuf *);
441 static void	geneve_qflush(struct ifnet *);
442 static int	geneve_output(struct ifnet *, struct mbuf *,
443 		    const struct sockaddr *, struct route *);
444 static uint32_t	geneve_map_etype_to_af(uint32_t);
445 static bool	geneve_udp_input(struct mbuf *, int, struct inpcb *,
446 		    const struct sockaddr *, void *);
447 static int	geneve_input_ether(struct geneve_softc *, struct mbuf **,
448 		    const struct sockaddr *, struct geneve_pkt_info *);
449 static int	geneve_input_inherit(struct geneve_softc *,
450 		    struct mbuf **, int, struct geneve_pkt_info *);
451 static int	geneve_next_option(struct geneve_socket *, struct genevehdr *,
452 		    struct mbuf **);
453 static void	geneve_input_csum(struct mbuf *m, struct ifnet *ifp,
454 		    counter_u64_t rxcsum);
455 
456 static void	geneve_stats_alloc(struct geneve_softc *);
457 static void	geneve_stats_free(struct geneve_softc *);
458 static void	geneve_set_default_config(struct geneve_softc *);
459 static int	geneve_set_reqcap(struct geneve_softc *, struct ifnet *, int,
460 		    int);
461 static void	geneve_set_hwcaps(struct geneve_softc *);
462 static int	geneve_clone_create(struct if_clone *, char *, size_t,
463 		    struct ifc_data *, struct ifnet **);
464 static int	geneve_clone_destroy(struct if_clone *, struct ifnet *,
465 		    uint32_t);
466 static int	geneve_clone_create_nl(struct if_clone *, char *, size_t,
467 		    struct ifc_data_nl *);
468 static int	geneve_clone_modify_nl(struct ifnet *, struct ifc_data_nl *);
469 static void	geneve_clone_dump_nl(struct ifnet *, struct nl_writer *);
470 
471 static uint32_t geneve_mac_hash(struct geneve_softc *, const uint8_t *);
472 static int	geneve_media_change(struct ifnet *);
473 static void	geneve_media_status(struct ifnet *, struct ifmediareq *);
474 
475 static int	geneve_sockaddr_cmp(const union sockaddr_union *,
476 		    const struct sockaddr *);
477 static void	geneve_sockaddr_copy(union sockaddr_union *,
478 		    const struct sockaddr *);
479 static int	geneve_sockaddr_in_equal(const union sockaddr_union *,
480 		    const struct sockaddr *);
481 static void	geneve_sockaddr_in_copy(union sockaddr_union *,
482 		    const struct sockaddr *);
483 static int	geneve_sockaddr_supported(const union sockaddr_union *, int);
484 static int	geneve_sockaddr_in_any(const union sockaddr_union *);
485 
486 static int	geneve_can_change_config(struct geneve_softc *);
487 static int	geneve_check_proto(uint16_t);
488 static int	geneve_check_multicast_addr(const union sockaddr_union *);
489 static int	geneve_check_sockaddr(const union sockaddr_union *, const int);
490 
491 static int	geneve_prison_remove(void *, void *);
492 static void	vnet_geneve_load(void);
493 static void	vnet_geneve_unload(void);
494 static void	geneve_module_init(void);
495 static void	geneve_module_deinit(void);
496 static int	geneve_modevent(module_t, int, void *);
497 
498 
499 static const char geneve_name[] = "geneve";
500 static MALLOC_DEFINE(M_GENEVE, geneve_name,
501     "Generic Network Virtualization Encapsulation Interface");
502 #define MTAG_GENEVE_LOOP	0x93d66dc0 /* geneve mtag */
503 
504 VNET_DEFINE_STATIC(struct if_clone *, geneve_cloner);
505 #define	V_geneve_cloner	VNET(geneve_cloner)
506 
507 static struct mtx geneve_list_mtx;
508 #define GENEVE_LIST_LOCK()	mtx_lock(&geneve_list_mtx)
509 #define GENEVE_LIST_UNLOCK()	mtx_unlock(&geneve_list_mtx)
510 
511 static LIST_HEAD(, geneve_socket) geneve_socket_list = LIST_HEAD_INITIALIZER(geneve_socket_list);
512 
513 /* Default maximum number of addresses in the forwarding table. */
514 #define GENEVE_FTABLE_MAX	2000
515 
516 /* Timeout (in seconds) of addresses learned in the forwarding table. */
517 #define GENEVE_FTABLE_TIMEOUT	(20 * 60)
518 
519 /* Maximum timeout (in seconds) of addresses learned in the forwarding table. */
520 #define GENEVE_FTABLE_MAX_TIMEOUT	(60 * 60 * 24)
521 
522 /* Number of seconds between pruning attempts of the forwarding table. */
523 #define GENEVE_FTABLE_PRUNE	(5 * 60)
524 
525 static int geneve_ftable_prune_period = GENEVE_FTABLE_PRUNE;
526 
527 #define _OUT(_field)	offsetof(struct nl_parsed_geneve, _field)
528 static const struct nlattr_parser nla_p_geneve_create[] = {
529 	{ .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = nlattr_get_uint16 },
530 };
531 #undef _OUT
532 NL_DECLARE_ATTR_PARSER(geneve_create_parser, nla_p_geneve_create);
533 
534 #define _OUT(_field)	offsetof(struct nl_parsed_geneve, _field)
535 static const struct nlattr_parser nla_p_geneve[] = {
536 	{ .type = IFLA_GENEVE_ID, .off = _OUT(ifla_vni), .cb = nlattr_get_uint32 },
537 	{ .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = nlattr_get_uint16 },
538 	{ .type = IFLA_GENEVE_LOCAL, .off = _OUT(ifla_local), .cb = nlattr_get_ip },
539 	{ .type = IFLA_GENEVE_REMOTE, .off = _OUT(ifla_remote), .cb = nlattr_get_ip },
540 	{ .type = IFLA_GENEVE_LOCAL_PORT, .off = _OUT(ifla_local_port), .cb = nlattr_get_uint16 },
541 	{ .type = IFLA_GENEVE_PORT, .off = _OUT(ifla_remote_port), .cb = nlattr_get_uint16 },
542 	{ .type = IFLA_GENEVE_PORT_RANGE, .off = _OUT(ifla_port_range),
543 		.arg = (void *)sizeof(struct ifla_geneve_port_range), .cb = nlattr_get_bytes },
544 	{ .type = IFLA_GENEVE_DF, .off = _OUT(ifla_df), .cb = nlattr_get_uint8 },
545 	{ .type = IFLA_GENEVE_TTL, .off = _OUT(ifla_ttl), .cb = nlattr_get_uint8 },
546 	{ .type = IFLA_GENEVE_TTL_INHERIT, .off = _OUT(ifla_ttl_inherit), .cb = nlattr_get_bool },
547 	{ .type = IFLA_GENEVE_DSCP_INHERIT, .off = _OUT(ifla_dscp_inherit), .cb = nlattr_get_bool },
548 	{ .type = IFLA_GENEVE_COLLECT_METADATA, .off = _OUT(ifla_external), .cb = nlattr_get_bool },
549 	{ .type = IFLA_GENEVE_FTABLE_LEARN, .off = _OUT(ifla_ftable_learn), .cb = nlattr_get_bool },
550 	{ .type = IFLA_GENEVE_FTABLE_FLUSH, .off = _OUT(ifla_ftable_flush), .cb = nlattr_get_bool },
551 	{ .type = IFLA_GENEVE_FTABLE_MAX, .off = _OUT(ifla_ftable_max), .cb = nlattr_get_uint32 },
552 	{ .type = IFLA_GENEVE_FTABLE_TIMEOUT, .off = _OUT(ifla_ftable_timeout), .cb = nlattr_get_uint32 },
553 	{ .type = IFLA_GENEVE_MC_IFNAME, .off = _OUT(ifla_mc_ifname), .cb = nlattr_get_string },
554 };
555 #undef _OUT
556 NL_DECLARE_ATTR_PARSER(geneve_modify_parser, nla_p_geneve);
557 
558 static const struct nlhdr_parser *all_parsers[] = {
559 	&geneve_create_parser, &geneve_modify_parser,
560 };
561 
562 static int
geneve_ftable_addr_cmp(const uint8_t * a,const uint8_t * b)563 geneve_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
564 {
565 	int i, d;
566 
567 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
568 		d = (int)a[i] - (int)b[i];
569 
570 	return (d);
571 }
572 
573 static void
geneve_ftable_init(struct geneve_softc * sc)574 geneve_ftable_init(struct geneve_softc *sc)
575 {
576 	int i;
577 
578 	sc->gnv_ftable = malloc(sizeof(struct geneve_ftable_head) *
579 	    GENEVE_SC_FTABLE_SIZE, M_GENEVE, M_ZERO | M_WAITOK);
580 
581 	for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++)
582 		LIST_INIT(&sc->gnv_ftable[i]);
583 	sc->gnv_ftable_hash_key = arc4random();
584 }
585 
586 static void
geneve_ftable_fini(struct geneve_softc * sc)587 geneve_ftable_fini(struct geneve_softc *sc)
588 {
589 	int i;
590 
591 	for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
592 		KASSERT(LIST_EMPTY(&sc->gnv_ftable[i]),
593 		    ("%s: geneve %p ftable[%d] not empty", __func__, sc, i));
594 	}
595 	MPASS(sc->gnv_ftable_cnt == 0);
596 
597 	free(sc->gnv_ftable, M_GENEVE);
598 	sc->gnv_ftable = NULL;
599 }
600 
601 static void
geneve_ftable_flush(struct geneve_softc * sc,int all)602 geneve_ftable_flush(struct geneve_softc *sc, int all)
603 {
604 	struct gnv_ftable_entry *fe, *tfe;
605 
606 	for (int i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
607 		LIST_FOREACH_SAFE(fe, &sc->gnv_ftable[i], gnvfe_hash, tfe) {
608 			if (all || GENEVE_FE_IS_DYNAMIC(fe))
609 				geneve_ftable_entry_destroy(sc, fe);
610 		}
611 	}
612 }
613 
614 static void
geneve_ftable_expire(struct geneve_softc * sc)615 geneve_ftable_expire(struct geneve_softc *sc)
616 {
617 	struct gnv_ftable_entry *fe, *tfe;
618 
619 	GENEVE_LOCK_WASSERT(sc);
620 
621 	for (int i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
622 		LIST_FOREACH_SAFE(fe, &sc->gnv_ftable[i], gnvfe_hash, tfe) {
623 			if (GENEVE_FE_IS_DYNAMIC(fe) &&
624 			    time_uptime >= fe->gnvfe_expire)
625 				geneve_ftable_entry_destroy(sc, fe);
626 		}
627 	}
628 }
629 
630 static int
geneve_ftable_update_locked(struct geneve_softc * sc,const union sockaddr_union * unsa,const uint8_t * mac,struct rm_priotracker * tracker)631 geneve_ftable_update_locked(struct geneve_softc *sc,
632     const union sockaddr_union *unsa, const uint8_t *mac,
633     struct rm_priotracker *tracker)
634 {
635 	struct gnv_ftable_entry *fe;
636 	int error;
637 
638 	GENEVE_LOCK_ASSERT(sc);
639 
640 again:
641 	/*
642 	 * A forwarding entry for this MAC address might already exist. If
643 	 * so, update it, otherwise create a new one. We may have to upgrade
644 	 * the lock if we have to change or create an entry.
645 	 */
646 	fe = geneve_ftable_entry_lookup(sc, mac);
647 	if (fe != NULL) {
648 		fe->gnvfe_expire = time_uptime + sc->gnv_ftable_timeout;
649 
650 		if (!GENEVE_FE_IS_DYNAMIC(fe) ||
651 		    geneve_sockaddr_in_equal(&fe->gnvfe_raddr, &unsa->sa))
652 			return (0);
653 		if (!GENEVE_LOCK_WOWNED(sc)) {
654 			GENEVE_RUNLOCK(sc, tracker);
655 			GENEVE_WLOCK(sc);
656 			sc->gnv_stats.ftable_lock_upgrade_failed++;
657 			goto again;
658 		}
659 		geneve_sockaddr_in_copy(&fe->gnvfe_raddr, &unsa->sa);
660 		return (0);
661 	}
662 
663 	if (!GENEVE_LOCK_WOWNED(sc)) {
664 		GENEVE_RUNLOCK(sc, tracker);
665 		GENEVE_WLOCK(sc);
666 		sc->gnv_stats.ftable_lock_upgrade_failed++;
667 		goto again;
668 	}
669 
670 	if (sc->gnv_ftable_cnt >= sc->gnv_ftable_max) {
671 		sc->gnv_stats.ftable_nospace++;
672 		return (ENOSPC);
673 	}
674 
675 	fe = geneve_ftable_entry_alloc();
676 	if (fe == NULL)
677 		return (ENOMEM);
678 
679 	geneve_ftable_entry_init(sc, fe, mac, &unsa->sa, GENEVE_FE_FLAG_DYNAMIC);
680 
681 	/* The prior lookup failed, so the insert should not. */
682 	error = geneve_ftable_entry_insert(sc, fe);
683 	MPASS(error == 0);
684 
685 	return (error);
686 }
687 
688 static int
geneve_ftable_learn(struct geneve_softc * sc,const struct sockaddr * sa,const uint8_t * mac)689 geneve_ftable_learn(struct geneve_softc *sc, const struct sockaddr *sa,
690     const uint8_t *mac)
691 {
692 	struct rm_priotracker tracker;
693 	union sockaddr_union unsa;
694 	int error;
695 
696 	/*
697 	 * The source port may be randomly selected by the remote host, so
698 	 * use the port of the default destination address.
699 	 */
700 	geneve_sockaddr_copy(&unsa, sa);
701 	unsa.sin.sin_port = sc->gnv_dst_addr.sin.sin_port;
702 
703 #ifdef INET6
704 	if (unsa.sa.sa_family == AF_INET6) {
705 		error = sa6_embedscope(&unsa.sin6, V_ip6_use_defzone);
706 		if (error)
707 			return (error);
708 	}
709 #endif
710 
711 	GENEVE_RLOCK(sc, &tracker);
712 	error = geneve_ftable_update_locked(sc, &unsa, mac, &tracker);
713 	GENEVE_UNLOCK(sc, &tracker);
714 
715 	return (error);
716 }
717 
718 static struct gnv_ftable_entry *
geneve_ftable_entry_alloc(void)719 geneve_ftable_entry_alloc(void)
720 {
721 	struct gnv_ftable_entry *fe;
722 
723 	fe = malloc(sizeof(*fe), M_GENEVE, M_ZERO | M_NOWAIT);
724 
725 	return (fe);
726 }
727 
728 static void
geneve_ftable_entry_free(struct gnv_ftable_entry * fe)729 geneve_ftable_entry_free(struct gnv_ftable_entry *fe)
730 {
731 
732 	free(fe, M_GENEVE);
733 }
734 
735 static void
geneve_ftable_entry_init(struct geneve_softc * sc,struct gnv_ftable_entry * fe,const uint8_t * mac,const struct sockaddr * sa,uint32_t flags)736 geneve_ftable_entry_init(struct geneve_softc *sc, struct gnv_ftable_entry *fe,
737     const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
738 {
739 
740 	fe->gnvfe_flags = flags;
741 	fe->gnvfe_expire = time_uptime + sc->gnv_ftable_timeout;
742 	memcpy(fe->gnvfe_mac, mac, ETHER_ADDR_LEN);
743 	geneve_sockaddr_copy(&fe->gnvfe_raddr, sa);
744 }
745 
746 static void
geneve_ftable_entry_destroy(struct geneve_softc * sc,struct gnv_ftable_entry * fe)747 geneve_ftable_entry_destroy(struct geneve_softc *sc,
748     struct gnv_ftable_entry *fe)
749 {
750 
751 	sc->gnv_ftable_cnt--;
752 	LIST_REMOVE(fe, gnvfe_hash);
753 	geneve_ftable_entry_free(fe);
754 }
755 
756 static int
geneve_ftable_entry_insert(struct geneve_softc * sc,struct gnv_ftable_entry * fe)757 geneve_ftable_entry_insert(struct geneve_softc *sc,
758     struct gnv_ftable_entry *fe)
759 {
760 	struct gnv_ftable_entry *lfe;
761 	uint32_t hash;
762 	int dir;
763 
764 	GENEVE_LOCK_WASSERT(sc);
765 	hash = GENEVE_SC_FTABLE_HASH(sc, fe->gnvfe_mac);
766 
767 	lfe = LIST_FIRST(&sc->gnv_ftable[hash]);
768 	if (lfe == NULL) {
769 		LIST_INSERT_HEAD(&sc->gnv_ftable[hash], fe, gnvfe_hash);
770 		goto out;
771 	}
772 
773 	do {
774 		dir = geneve_ftable_addr_cmp(fe->gnvfe_mac, lfe->gnvfe_mac);
775 		if (dir == 0)
776 			return (EEXIST);
777 		if (dir > 0) {
778 			LIST_INSERT_BEFORE(lfe, fe, gnvfe_hash);
779 			goto out;
780 		} else if (LIST_NEXT(lfe, gnvfe_hash) == NULL) {
781 			LIST_INSERT_AFTER(lfe, fe, gnvfe_hash);
782 			goto out;
783 		} else
784 			lfe = LIST_NEXT(lfe, gnvfe_hash);
785 	} while (lfe != NULL);
786 
787 out:
788 	sc->gnv_ftable_cnt++;
789 
790 	return (0);
791 }
792 
793 static struct gnv_ftable_entry *
geneve_ftable_entry_lookup(struct geneve_softc * sc,const uint8_t * mac)794 geneve_ftable_entry_lookup(struct geneve_softc *sc, const uint8_t *mac)
795 {
796 	struct gnv_ftable_entry *fe;
797 	uint32_t hash;
798 	int dir;
799 
800 	GENEVE_LOCK_ASSERT(sc);
801 
802 	hash = GENEVE_SC_FTABLE_HASH(sc, mac);
803 	LIST_FOREACH(fe, &sc->gnv_ftable[hash], gnvfe_hash) {
804 		dir = geneve_ftable_addr_cmp(mac, fe->gnvfe_mac);
805 		if (dir == 0)
806 			return (fe);
807 		if (dir > 0)
808 			break;
809 	}
810 
811 	return (NULL);
812 }
813 
814 static struct geneve_socket *
geneve_socket_alloc(union sockaddr_union * laddr)815 geneve_socket_alloc(union sockaddr_union *laddr)
816 {
817 	struct geneve_socket *gnvso;
818 
819 	gnvso = malloc(sizeof(*gnvso), M_GENEVE, M_WAITOK | M_ZERO);
820 	rm_init(&gnvso->gnvso_lock, "genevesorm");
821 	refcount_init(&gnvso->gnvso_refcnt, 0);
822 	for (int i = 0; i < GENEVE_SO_VNI_HASH_SIZE; i++)
823 		LIST_INIT(&gnvso->gnvso_vni_hash[i]);
824 	gnvso->gnvso_laddr = *laddr;
825 
826 	return (gnvso);
827 }
828 
829 static void
geneve_socket_destroy(struct geneve_socket * gnvso)830 geneve_socket_destroy(struct geneve_socket *gnvso)
831 {
832 	struct socket *so;
833 
834 	so = gnvso->gnvso_sock;
835 	if (so != NULL) {
836 		gnvso->gnvso_sock = NULL;
837 		soclose(so);
838 	}
839 
840 	rm_destroy(&gnvso->gnvso_lock);
841 	free(gnvso, M_GENEVE);
842 }
843 
844 static void
geneve_socket_release(struct geneve_socket * gnvso)845 geneve_socket_release(struct geneve_socket *gnvso)
846 {
847 	int destroy;
848 
849 	GENEVE_LIST_LOCK();
850 	destroy = GENEVE_SO_RELEASE(gnvso);
851 	if (destroy != 0)
852 		LIST_REMOVE(gnvso, gnvso_entry);
853 	GENEVE_LIST_UNLOCK();
854 
855 	if (destroy != 0)
856 		geneve_socket_destroy(gnvso);
857 }
858 
859 static struct geneve_socket *
geneve_socket_lookup(union sockaddr_union * unsa)860 geneve_socket_lookup(union sockaddr_union *unsa)
861 {
862 	struct geneve_socket *gnvso;
863 
864 	GENEVE_LIST_LOCK();
865 	LIST_FOREACH(gnvso, &geneve_socket_list, gnvso_entry) {
866 		if (geneve_sockaddr_cmp(&gnvso->gnvso_laddr, &unsa->sa) == 0) {
867 			GENEVE_SO_ACQUIRE(gnvso);
868 			break;
869 		}
870 	}
871 	GENEVE_LIST_UNLOCK();
872 
873 	return (gnvso);
874 }
875 
876 static void
geneve_socket_insert(struct geneve_socket * gnvso)877 geneve_socket_insert(struct geneve_socket *gnvso)
878 {
879 
880 	GENEVE_LIST_LOCK();
881 	GENEVE_SO_ACQUIRE(gnvso);
882 	LIST_INSERT_HEAD(&geneve_socket_list, gnvso, gnvso_entry);
883 	GENEVE_LIST_UNLOCK();
884 }
885 
886 static int
geneve_socket_init(struct geneve_socket * gnvso,struct ifnet * ifp)887 geneve_socket_init(struct geneve_socket *gnvso, struct ifnet *ifp)
888 {
889 	struct thread *td;
890 	int error;
891 
892 	td = curthread;
893 	error = socreate(gnvso->gnvso_laddr.sa.sa_family, &gnvso->gnvso_sock,
894 	    SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
895 	if (error) {
896 		if_printf(ifp, "cannot create socket: %d\n", error);
897 		return (error);
898 	}
899 
900 	/*
901 	 * XXX: If Geneve traffic is shared with other UDP listeners on
902 	 * the same IP address, tunnel endpoints SHOULD implement a mechanism
903 	 * to ensure ICMP return traffic arising from network errors is
904 	 * directed to the correct listener. Unfortunately,
905 	 * udp_set_kernel_tunneling does not handle icmp errors from transit
906 	 * devices other than specified source.
907 	 */
908 	error = udp_set_kernel_tunneling(gnvso->gnvso_sock,
909 	    geneve_udp_input, NULL, gnvso);
910 	if (error)
911 		if_printf(ifp, "cannot set tunneling function: %d\n", error);
912 
913 	return (error);
914 }
915 
916 static int
geneve_socket_bind(struct geneve_socket * gnvso,struct ifnet * ifp)917 geneve_socket_bind(struct geneve_socket *gnvso, struct ifnet *ifp)
918 {
919 	union sockaddr_union laddr;
920 	int error;
921 
922 	laddr = gnvso->gnvso_laddr;
923 	error = sobind(gnvso->gnvso_sock, &laddr.sa, curthread);
924 	if (error)
925 		return (error);
926 
927 	return (0);
928 }
929 
930 static int
geneve_socket_create(struct ifnet * ifp,int multicast,const union sockaddr_union * unsa,struct geneve_socket ** xgnvso)931 geneve_socket_create(struct ifnet *ifp, int multicast,
932     const union sockaddr_union *unsa, struct geneve_socket **xgnvso)
933 {
934 	union sockaddr_union laddr;
935 	struct geneve_socket *gnvso;
936 	int error;
937 
938 	laddr = *unsa;
939 
940 	/*
941 	 * If this socket will be multicast, then only the local port
942 	 * must be specified when binding.
943 	 */
944 	if (multicast != 0) {
945 		switch (laddr.sa.sa_family) {
946 #ifdef INET
947 		case AF_INET:
948 			laddr.sin.sin_addr.s_addr = INADDR_ANY;
949 			break;
950 #endif
951 #ifdef INET6
952 		case AF_INET6:
953 			laddr.sin6.sin6_addr = in6addr_any;
954 			break;
955 #endif
956 		default:
957 			return (EAFNOSUPPORT);
958 		}
959 	}
960 	gnvso = geneve_socket_alloc(&laddr);
961 	if (gnvso == NULL)
962 		return (ENOMEM);
963 
964 	error = geneve_socket_init(gnvso, ifp);
965 	if (error)
966 		goto fail;
967 
968 	error = geneve_socket_bind(gnvso, ifp);
969 	if (error)
970 		goto fail;
971 
972 	/*
973 	 * There is a small window between the bind completing and
974 	 * inserting the socket, so that a concurrent create may fail.
975 	 * Let's not worry about that for now.
976 	 */
977 	if_printf(ifp, "new geneve socket inserted to socket list\n");
978 	geneve_socket_insert(gnvso);
979 	*xgnvso = gnvso;
980 
981 	return (0);
982 
983 fail:
984 	if_printf(ifp, "can't create new socket (error: %d)\n", error);
985 	geneve_socket_destroy(gnvso);
986 
987 	return (error);
988 }
989 
990 static struct geneve_socket *
geneve_socket_mc_lookup(const union sockaddr_union * unsa)991 geneve_socket_mc_lookup(const union sockaddr_union *unsa)
992 {
993 	union sockaddr_union laddr;
994 
995 	laddr = *unsa;
996 
997 	switch (laddr.sa.sa_family) {
998 #ifdef INET
999 	case AF_INET:
1000 		laddr.sin.sin_addr.s_addr = INADDR_ANY;
1001 		break;
1002 #endif
1003 #ifdef INET6
1004 	case AF_INET6:
1005 		laddr.sin6.sin6_addr = in6addr_any;
1006 		break;
1007 #endif
1008 	default:
1009 		return (NULL);
1010 	}
1011 
1012 	return (geneve_socket_lookup(&laddr));
1013 }
1014 
1015 static int
geneve_sockaddr_mc_info_match(const struct geneve_socket_mc_info * mc,const union sockaddr_union * group,const union sockaddr_union * local,int ifidx)1016 geneve_sockaddr_mc_info_match(const struct geneve_socket_mc_info *mc,
1017     const union sockaddr_union *group, const union sockaddr_union *local,
1018     int ifidx)
1019 {
1020 
1021 	if (!geneve_sockaddr_in_any(local) &&
1022 	    !geneve_sockaddr_in_equal(&mc->gnvsomc_saddr, &local->sa))
1023 		return (0);
1024 	if (!geneve_sockaddr_in_equal(&mc->gnvsomc_gaddr, &group->sa))
1025 		return (0);
1026 	if (ifidx != 0 && ifidx != mc->gnvsomc_ifidx)
1027 		return (0);
1028 
1029 	return (1);
1030 }
1031 
1032 static int
geneve_socket_mc_join_group(struct geneve_socket * gnvso,const union sockaddr_union * group,const union sockaddr_union * local,int * ifidx,union sockaddr_union * source)1033 geneve_socket_mc_join_group(struct geneve_socket *gnvso,
1034     const union sockaddr_union *group, const union sockaddr_union *local,
1035     int *ifidx, union sockaddr_union *source)
1036 {
1037 	struct sockopt sopt;
1038 	int error;
1039 
1040 	*source = *local;
1041 
1042 	if (group->sa.sa_family == AF_INET) {
1043 		struct ip_mreq mreq;
1044 
1045 		mreq.imr_multiaddr = group->sin.sin_addr;
1046 		mreq.imr_interface = local->sin.sin_addr;
1047 
1048 		memset(&sopt, 0, sizeof(sopt));
1049 		sopt.sopt_dir = SOPT_SET;
1050 		sopt.sopt_level = IPPROTO_IP;
1051 		sopt.sopt_name = IP_ADD_MEMBERSHIP;
1052 		sopt.sopt_val = &mreq;
1053 		sopt.sopt_valsize = sizeof(mreq);
1054 		error = sosetopt(gnvso->gnvso_sock, &sopt);
1055 		if (error)
1056 			return (error);
1057 
1058 		/*
1059 		 * BMV: Ideally, there would be a formal way for us to get
1060 		 * the local interface that was selected based on the
1061 		 * imr_interface address. We could then update *ifidx so
1062 		 * geneve_sockaddr_mc_info_match() would return a match for
1063 		 * later creates that explicitly set the multicast interface.
1064 		 *
1065 		 * If we really need to, we can of course look in the INP's
1066 		 * membership list:
1067 		 *     sotoinpcb(gnvso->gnvso_sock)->inp_moptions->
1068 		 *         imo_head[]->imf_inm->inm_ifp
1069 		 * similarly to imo_match_group().
1070 		 */
1071 		source->sin.sin_addr = local->sin.sin_addr;
1072 
1073 	} else if (group->sa.sa_family == AF_INET6) {
1074 		struct ipv6_mreq mreq;
1075 
1076 		mreq.ipv6mr_multiaddr = group->sin6.sin6_addr;
1077 		mreq.ipv6mr_interface = *ifidx;
1078 
1079 		memset(&sopt, 0, sizeof(sopt));
1080 		sopt.sopt_dir = SOPT_SET;
1081 		sopt.sopt_level = IPPROTO_IPV6;
1082 		sopt.sopt_name = IPV6_JOIN_GROUP;
1083 		sopt.sopt_val = &mreq;
1084 		sopt.sopt_valsize = sizeof(mreq);
1085 		error = sosetopt(gnvso->gnvso_sock, &sopt);
1086 
1087 		/*
1088 		 * BMV: As with IPv4, we would really like to know what
1089 		 * interface in6p_lookup_mcast_ifp() selected.
1090 		 */
1091 	} else
1092 		error = EAFNOSUPPORT;
1093 
1094 	return (error);
1095 }
1096 
1097 static int
geneve_socket_mc_leave_group(struct geneve_socket * gnvso,const union sockaddr_union * group,const union sockaddr_union * source,int ifidx)1098 geneve_socket_mc_leave_group(struct geneve_socket *gnvso,
1099     const union sockaddr_union *group, const union sockaddr_union *source,
1100     int ifidx)
1101 {
1102 	struct sockopt sopt;
1103 	int error;
1104 
1105 	memset(&sopt, 0, sizeof(sopt));
1106 	sopt.sopt_dir = SOPT_SET;
1107 
1108 	if (group->sa.sa_family == AF_INET) {
1109 		struct ip_mreq mreq;
1110 
1111 		mreq.imr_multiaddr = group->sin.sin_addr;
1112 		mreq.imr_interface = source->sin.sin_addr;
1113 
1114 		sopt.sopt_level = IPPROTO_IP;
1115 		sopt.sopt_name = IP_DROP_MEMBERSHIP;
1116 		sopt.sopt_val = &mreq;
1117 		sopt.sopt_valsize = sizeof(mreq);
1118 		error = sosetopt(gnvso->gnvso_sock, &sopt);
1119 	} else if (group->sa.sa_family == AF_INET6) {
1120 		struct ipv6_mreq mreq;
1121 
1122 		mreq.ipv6mr_multiaddr = group->sin6.sin6_addr;
1123 		mreq.ipv6mr_interface = ifidx;
1124 
1125 		sopt.sopt_level = IPPROTO_IPV6;
1126 		sopt.sopt_name = IPV6_LEAVE_GROUP;
1127 		sopt.sopt_val = &mreq;
1128 		sopt.sopt_valsize = sizeof(mreq);
1129 		error = sosetopt(gnvso->gnvso_sock, &sopt);
1130 	} else
1131 		error = EAFNOSUPPORT;
1132 
1133 	return (error);
1134 }
1135 
1136 static int
geneve_socket_mc_add_group(struct geneve_socket * gnvso,const union sockaddr_union * group,const union sockaddr_union * local,int ifidx,int * idx)1137 geneve_socket_mc_add_group(struct geneve_socket *gnvso,
1138     const union sockaddr_union *group, const union sockaddr_union *local,
1139     int ifidx, int *idx)
1140 {
1141 	union sockaddr_union source;
1142 	struct geneve_socket_mc_info *mc;
1143 	int i, empty, error;
1144 
1145 	/*
1146 	 * Within a socket, the same multicast group may be used by multiple
1147 	 * interfaces, each with a different network identifier. But a socket
1148 	 * may only join a multicast group once, so keep track of the users
1149 	 * here.
1150 	 */
1151 
1152 	GENEVE_SO_WLOCK(gnvso);
1153 	for (empty = 0, i = 0; i < GENEVE_SO_MC_MAX_GROUPS; i++) {
1154 		mc = &gnvso->gnvso_mc[i];
1155 
1156 		if (mc->gnvsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1157 			empty++;
1158 			continue;
1159 		}
1160 		if (geneve_sockaddr_mc_info_match(mc, group, local, ifidx))
1161 			goto out;
1162 	}
1163 	GENEVE_SO_WUNLOCK(gnvso);
1164 
1165 	if (empty == 0)
1166 		return (ENOSPC);
1167 
1168 	error = geneve_socket_mc_join_group(gnvso, group, local, &ifidx, &source);
1169 	if (error)
1170 		return (error);
1171 
1172 	GENEVE_SO_WLOCK(gnvso);
1173 	for (i = 0; i < GENEVE_SO_MC_MAX_GROUPS; i++) {
1174 		mc = &gnvso->gnvso_mc[i];
1175 
1176 		if (mc->gnvsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1177 			geneve_sockaddr_copy(&mc->gnvsomc_gaddr, &group->sa);
1178 			geneve_sockaddr_copy(&mc->gnvsomc_saddr, &source.sa);
1179 			mc->gnvsomc_ifidx = ifidx;
1180 			goto out;
1181 		}
1182 	}
1183 	GENEVE_SO_WUNLOCK(gnvso);
1184 
1185 	error = geneve_socket_mc_leave_group(gnvso, group, &source, ifidx);
1186 	MPASS(error == 0);
1187 
1188 	return (ENOSPC);
1189 
1190 out:
1191 	mc->gnvsomc_users++;
1192 	GENEVE_SO_WUNLOCK(gnvso);
1193 	*idx = i;
1194 
1195 	return (0);
1196 }
1197 
1198 static void
geneve_socket_mc_release_group(struct geneve_socket * vso,int idx)1199 geneve_socket_mc_release_group(struct geneve_socket *vso, int idx)
1200 {
1201 	union sockaddr_union group, source;
1202 	struct geneve_socket_mc_info *mc;
1203 	int ifidx, leave;
1204 
1205 	KASSERT(idx >= 0 && idx < GENEVE_SO_MC_MAX_GROUPS,
1206 	    ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
1207 
1208 	leave = 0;
1209 	mc = &vso->gnvso_mc[idx];
1210 
1211 	GENEVE_SO_WLOCK(vso);
1212 	mc->gnvsomc_users--;
1213 	if (mc->gnvsomc_users == 0) {
1214 		group = mc->gnvsomc_gaddr;
1215 		source = mc->gnvsomc_saddr;
1216 		ifidx = mc->gnvsomc_ifidx;
1217 		memset(mc, 0, sizeof(*mc));
1218 		leave = 1;
1219 	}
1220 	GENEVE_SO_WUNLOCK(vso);
1221 
1222 	if (leave != 0) {
1223 		/*
1224 		 * Our socket's membership in this group may have already
1225 		 * been removed if we joined through an interface that's
1226 		 * been detached.
1227 		 */
1228 		geneve_socket_mc_leave_group(vso, &group, &source, ifidx);
1229 	}
1230 }
1231 
1232 static struct geneve_softc *
geneve_socket_lookup_softc_locked(struct geneve_socket * gnvso,uint32_t vni)1233 geneve_socket_lookup_softc_locked(struct geneve_socket *gnvso, uint32_t vni)
1234 {
1235 	struct geneve_softc *sc;
1236 	uint32_t hash;
1237 
1238 	GENEVE_SO_LOCK_ASSERT(gnvso);
1239 	hash = GENEVE_SO_VNI_HASH(vni);
1240 
1241 	LIST_FOREACH(sc, &gnvso->gnvso_vni_hash[hash], gnv_entry) {
1242 		if (sc->gnv_vni == vni) {
1243 			GENEVE_ACQUIRE(sc);
1244 			break;
1245 		}
1246 	}
1247 
1248 	return (sc);
1249 }
1250 
1251 static struct geneve_softc *
geneve_socket_lookup_softc(struct geneve_socket * gnvso,uint32_t vni)1252 geneve_socket_lookup_softc(struct geneve_socket *gnvso, uint32_t vni)
1253 {
1254 	struct rm_priotracker tracker;
1255 	struct geneve_softc *sc;
1256 
1257 	GENEVE_SO_RLOCK(gnvso, &tracker);
1258 	sc = geneve_socket_lookup_softc_locked(gnvso, vni);
1259 	GENEVE_SO_RUNLOCK(gnvso, &tracker);
1260 
1261 	return (sc);
1262 }
1263 
1264 static int
geneve_socket_insert_softc(struct geneve_socket * gnvso,struct geneve_softc * sc)1265 geneve_socket_insert_softc(struct geneve_socket *gnvso, struct geneve_softc *sc)
1266 {
1267 	struct geneve_softc *tsc;
1268 	uint32_t vni, hash;
1269 
1270 	vni = sc->gnv_vni;
1271 	hash = GENEVE_SO_VNI_HASH(vni);
1272 
1273 	GENEVE_SO_WLOCK(gnvso);
1274 	tsc = geneve_socket_lookup_softc_locked(gnvso, vni);
1275 	if (tsc != NULL) {
1276 		GENEVE_SO_WUNLOCK(gnvso);
1277 		geneve_release(tsc);
1278 		return (EEXIST);
1279 	}
1280 
1281 	GENEVE_ACQUIRE(sc);
1282 	LIST_INSERT_HEAD(&gnvso->gnvso_vni_hash[hash], sc, gnv_entry);
1283 	GENEVE_SO_WUNLOCK(gnvso);
1284 
1285 	return (0);
1286 }
1287 
1288 static void
geneve_socket_remove_softc(struct geneve_socket * gnvso,struct geneve_softc * sc)1289 geneve_socket_remove_softc(struct geneve_socket *gnvso, struct geneve_softc *sc)
1290 {
1291 
1292 	GENEVE_SO_WLOCK(gnvso);
1293 	LIST_REMOVE(sc, gnv_entry);
1294 	GENEVE_SO_WUNLOCK(gnvso);
1295 
1296 	geneve_release(sc);
1297 }
1298 
1299 static struct ifnet *
geneve_multicast_if_ref(struct geneve_softc * sc,uint32_t af)1300 geneve_multicast_if_ref(struct geneve_softc *sc, uint32_t af)
1301 {
1302 	struct ifnet *ifp;
1303 
1304 	GENEVE_LOCK_ASSERT(sc);
1305 
1306 	ifp = NULL;
1307 	if (af == AF_INET && sc->gnv_im4o != NULL)
1308 		ifp = sc->gnv_im4o->imo_multicast_ifp;
1309 	else if (af == AF_INET6 && sc->gnv_im6o != NULL)
1310 		ifp = sc->gnv_im6o->im6o_multicast_ifp;
1311 
1312 	if (ifp != NULL)
1313 		if_ref(ifp);
1314 
1315 	return (ifp);
1316 }
1317 
1318 static void
geneve_free_multicast(struct geneve_softc * sc)1319 geneve_free_multicast(struct geneve_softc *sc)
1320 {
1321 
1322 	if (sc->gnv_mc_ifp != NULL) {
1323 		if_rele(sc->gnv_mc_ifp);
1324 		sc->gnv_mc_ifp = NULL;
1325 		sc->gnv_mc_ifindex = 0;
1326 	}
1327 
1328 	if (sc->gnv_im4o != NULL) {
1329 		free(sc->gnv_im4o, M_GENEVE);
1330 		sc->gnv_im4o = NULL;
1331 	}
1332 
1333 	if (sc->gnv_im6o != NULL) {
1334 		free(sc->gnv_im6o, M_GENEVE);
1335 		sc->gnv_im6o = NULL;
1336 	}
1337 }
1338 
1339 static int
geneve_setup_multicast_interface(struct geneve_softc * sc)1340 geneve_setup_multicast_interface(struct geneve_softc *sc)
1341 {
1342 	struct ifnet *ifp;
1343 
1344 	ifp = ifunit_ref(sc->gnv_mc_ifname);
1345 	if (ifp == NULL) {
1346 		if_printf(sc->gnv_ifp, "multicast interface %s does not exist\n",
1347 		    sc->gnv_mc_ifname);
1348 		return (ENOENT);
1349 	}
1350 
1351 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1352 		if_printf(sc->gnv_ifp, "interface %s does not support multicast\n",
1353 		    sc->gnv_mc_ifname);
1354 		if_rele(ifp);
1355 		return (ENOTSUP);
1356 	}
1357 
1358 	sc->gnv_mc_ifp = ifp;
1359 	sc->gnv_mc_ifindex = ifp->if_index;
1360 
1361 	return (0);
1362 }
1363 
1364 static int
geneve_setup_multicast(struct geneve_softc * sc)1365 geneve_setup_multicast(struct geneve_softc *sc)
1366 {
1367 	const union sockaddr_union *group;
1368 	int error;
1369 
1370 	group = &sc->gnv_dst_addr;
1371 	error = 0;
1372 
1373 	if (sc->gnv_mc_ifname[0] != '\0') {
1374 		error = geneve_setup_multicast_interface(sc);
1375 		if (error)
1376 			return (error);
1377 	}
1378 
1379 	/*
1380 	 * Initialize an multicast options structure that is sufficiently
1381 	 * populated for use in the respective IP output routine. This
1382 	 * structure is typically stored in the socket, but our sockets
1383 	 * may be shared among multiple interfaces.
1384 	 */
1385 	if (group->sa.sa_family == AF_INET) {
1386 		sc->gnv_im4o = malloc(sizeof(struct ip_moptions), M_GENEVE,
1387 		    M_ZERO | M_WAITOK);
1388 		sc->gnv_im4o->imo_multicast_ifp = sc->gnv_mc_ifp;
1389 		sc->gnv_im4o->imo_multicast_ttl = sc->gnv_ttl;
1390 		sc->gnv_im4o->imo_multicast_vif = -1;
1391 	} else if (group->sa.sa_family == AF_INET6) {
1392 		sc->gnv_im6o = malloc(sizeof(struct ip6_moptions), M_GENEVE,
1393 		    M_ZERO | M_WAITOK);
1394 		sc->gnv_im6o->im6o_multicast_ifp = sc->gnv_mc_ifp;
1395 		sc->gnv_im6o->im6o_multicast_hlim = sc->gnv_ttl;
1396 	}
1397 
1398 	return (error);
1399 }
1400 
1401 static int
geneve_setup_socket(struct geneve_softc * sc)1402 geneve_setup_socket(struct geneve_softc *sc)
1403 {
1404 	struct geneve_socket *gnvso;
1405 	struct ifnet *ifp;
1406 	union sockaddr_union *saddr, *daddr;
1407 	int multicast, error;
1408 
1409 	gnvso = NULL;
1410 	ifp = sc->gnv_ifp;
1411 	saddr = &sc->gnv_src_addr;
1412 	daddr = &sc->gnv_dst_addr;
1413 	multicast = geneve_check_multicast_addr(daddr);
1414 	MPASS(multicast != EINVAL);
1415 	sc->gnv_so_mc_index = -1;
1416 
1417 	/* Try to create the socket. If that fails, attempt to use an existing one. */
1418 	error = geneve_socket_create(ifp, multicast, saddr, &gnvso);
1419 	if (error) {
1420 		if (multicast != 0)
1421 			gnvso = geneve_socket_mc_lookup(saddr);
1422 		else
1423 			gnvso = geneve_socket_lookup(saddr);
1424 
1425 		if (gnvso == NULL) {
1426 			if_printf(ifp, "can't find existing socket\n");
1427 			goto out;
1428 		}
1429 	}
1430 
1431 	if (sc->gnv_df == IFLA_GENEVE_DF_SET) {
1432 		error = geneve_socket_set_df(gnvso, true);
1433 		if (error)
1434 			goto out;
1435 	}
1436 
1437 	if (multicast != 0) {
1438 		error = geneve_setup_multicast(sc);
1439 		if (error)
1440 			goto out;
1441 
1442 		error = geneve_socket_mc_add_group(gnvso, daddr, saddr,
1443 		    sc->gnv_mc_ifindex, &sc->gnv_so_mc_index);
1444 		if (error)
1445 			goto out;
1446 	}
1447 
1448 	sc->gnv_sock = gnvso;
1449 	error = geneve_socket_insert_softc(gnvso, sc);
1450 	if (error) {
1451 		sc->gnv_sock = NULL;
1452 		if_printf(ifp, "network identifier %d already exists\n", sc->gnv_vni);
1453 		goto out;
1454 	}
1455 
1456 	return (0);
1457 
1458 out:
1459 	if (gnvso != NULL) {
1460 		if (sc->gnv_so_mc_index != -1) {
1461 			geneve_socket_mc_release_group(gnvso, sc->gnv_so_mc_index);
1462 			sc->gnv_so_mc_index = -1;
1463 		}
1464 		if (multicast != 0)
1465 			geneve_free_multicast(sc);
1466 		geneve_socket_release(gnvso);
1467 	}
1468 
1469 	return (error);
1470 }
1471 
1472 static void
geneve_setup_interface_hdrlen(struct geneve_softc * sc)1473 geneve_setup_interface_hdrlen(struct geneve_softc *sc)
1474 {
1475 	struct ifnet *ifp;
1476 
1477 	GENEVE_LOCK_WASSERT(sc);
1478 
1479 	ifp = sc->gnv_ifp;
1480 	ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct geneveudphdr);
1481 	if (sc->gnv_proto == GENEVE_PROTO_ETHER)
1482 		ifp->if_hdrlen += ETHER_HDR_LEN;
1483 
1484 	if (sc->gnv_dst_addr.sa.sa_family == AF_INET)
1485 		ifp->if_hdrlen += sizeof(struct ip);
1486 	else
1487 		ifp->if_hdrlen += sizeof(struct ip6_hdr);
1488 
1489 	if ((sc->gnv_flags & GENEVE_FLAG_USER_MTU) == 0)
1490 		ifp->if_mtu = ETHERMTU - ifp->if_hdrlen;
1491 }
1492 
1493 static int
geneve_socket_set_df(struct geneve_socket * gnvso,bool df)1494 geneve_socket_set_df(struct geneve_socket *gnvso, bool df)
1495 {
1496 	struct sockopt sopt;
1497 	int optval;
1498 
1499 	memset(&sopt, 0, sizeof(sopt));
1500 	sopt.sopt_dir = SOPT_SET;
1501 
1502 	switch (gnvso->gnvso_laddr.sa.sa_family) {
1503 	case AF_INET:
1504 		sopt.sopt_level = IPPROTO_IP;
1505 		sopt.sopt_name = IP_DONTFRAG;
1506 		break;
1507 
1508 	case AF_INET6:
1509 		sopt.sopt_level = IPPROTO_IPV6;
1510 		sopt.sopt_name = IPV6_DONTFRAG;
1511 		break;
1512 
1513 	default:
1514 		return (EAFNOSUPPORT);
1515 	}
1516 
1517 	optval = df ? 1 : 0;
1518 	sopt.sopt_val = &optval;
1519 	sopt.sopt_valsize = sizeof(optval);
1520 
1521 	return (sosetopt(gnvso->gnvso_sock, &sopt));
1522 }
1523 
1524 static int
geneve_valid_init_config(struct geneve_softc * sc)1525 geneve_valid_init_config(struct geneve_softc *sc)
1526 {
1527 	const char *reason;
1528 
1529 	if (sc->gnv_vni >= GENEVE_VNI_MAX) {
1530 		if_printf(sc->gnv_ifp, "%u", sc->gnv_vni);
1531 		reason = "invalid virtual network identifier specified";
1532 		goto fail;
1533 	}
1534 
1535 	if (geneve_sockaddr_supported(&sc->gnv_src_addr, 1) == 0) {
1536 		reason = "source address type is not supported";
1537 		goto fail;
1538 	}
1539 
1540 	if (geneve_sockaddr_supported(&sc->gnv_dst_addr, 0) == 0) {
1541 		reason = "destination address type is not supported";
1542 		goto fail;
1543 	}
1544 
1545 	if (geneve_sockaddr_in_any(&sc->gnv_dst_addr) != 0) {
1546 		reason = "no valid destination address specified";
1547 		goto fail;
1548 	}
1549 
1550 	if (geneve_check_multicast_addr(&sc->gnv_dst_addr) == 0 &&
1551 	    sc->gnv_mc_ifname[0] != '\0') {
1552 		reason = "can only specify interface with a group address";
1553 		goto fail;
1554 	}
1555 
1556 	if (geneve_sockaddr_in_any(&sc->gnv_src_addr) == 0) {
1557 		if (&sc->gnv_src_addr.sa.sa_family ==
1558 		    &sc->gnv_dst_addr.sa.sa_family) {
1559 			reason = "source and destination address must both be either IPv4 or IPv6";
1560 			goto fail;
1561 		}
1562 	}
1563 
1564 	if (sc->gnv_src_addr.sin.sin_port == 0) {
1565 		reason = "local port not specified";
1566 		goto fail;
1567 	}
1568 
1569 	if (sc->gnv_dst_addr.sin.sin_port == 0) {
1570 		reason = "remote port not specified";
1571 		goto fail;
1572 	}
1573 
1574 	return (0);
1575 
1576 fail:
1577 	if_printf(sc->gnv_ifp, "cannot initialize interface: %s\n", reason);
1578 	return (EINVAL);
1579 }
1580 
1581 static void
geneve_init_complete(struct geneve_softc * sc)1582 geneve_init_complete(struct geneve_softc *sc)
1583 {
1584 
1585 	GENEVE_WLOCK(sc);
1586 	sc->gnv_flags |= GENEVE_FLAG_RUNNING;
1587 	sc->gnv_flags &= ~GENEVE_FLAG_INIT;
1588 	wakeup(sc);
1589 	GENEVE_WUNLOCK(sc);
1590 }
1591 
1592 static void
geneve_init(void * xsc)1593 geneve_init(void *xsc)
1594 {
1595 	static const uint8_t empty_mac[ETHER_ADDR_LEN];
1596 	struct geneve_softc *sc;
1597 	struct ifnet *ifp;
1598 
1599 	sc = xsc;
1600 	sx_xlock(&geneve_sx);
1601 	GENEVE_WLOCK(sc);
1602 	ifp = sc->gnv_ifp;
1603 	if (sc->gnv_flags & GENEVE_FLAG_RUNNING) {
1604 		GENEVE_WUNLOCK(sc);
1605 		sx_xunlock(&geneve_sx);
1606 		return;
1607 	}
1608 	sc->gnv_flags |= GENEVE_FLAG_INIT;
1609 	GENEVE_WUNLOCK(sc);
1610 
1611 	if (geneve_valid_init_config(sc) != 0)
1612 		goto out;
1613 
1614 	if (geneve_setup_socket(sc) != 0)
1615 		goto out;
1616 
1617 	/* Initialize the default forwarding entry. */
1618 	if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
1619 		geneve_ftable_entry_init(sc, &sc->gnv_default_fe, empty_mac,
1620 		    &sc->gnv_dst_addr.sa, GENEVE_FE_FLAG_STATIC);
1621 
1622 		GENEVE_WLOCK(sc);
1623 		callout_reset(&sc->gnv_callout, geneve_ftable_prune_period * hz,
1624 		    geneve_timer, sc);
1625 		GENEVE_WUNLOCK(sc);
1626 	}
1627 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1628 	if_link_state_change(ifp, LINK_STATE_UP);
1629 
1630 out:
1631 	geneve_init_complete(sc);
1632 	sx_xunlock(&geneve_sx);
1633 }
1634 
1635 static void
geneve_release(struct geneve_softc * sc)1636 geneve_release(struct geneve_softc *sc)
1637 {
1638 
1639 	/*
1640 	 * The softc may be destroyed as soon as we release our reference,
1641 	 * so we cannot serialize the wakeup with the softc lock. We use a
1642 	 * timeout in our sleeps so a missed wakeup is unfortunate but not fatal.
1643 	 */
1644 	if (GENEVE_RELEASE(sc) != 0)
1645 		wakeup(sc);
1646 }
1647 
1648 static void
geneve_teardown_wait(struct geneve_softc * sc)1649 geneve_teardown_wait(struct geneve_softc *sc)
1650 {
1651 
1652 	GENEVE_LOCK_WASSERT(sc);
1653 	while (sc->gnv_flags & GENEVE_FLAG_TEARDOWN)
1654 		rm_sleep(sc, &sc->gnv_lock, 0, "gnvtrn", hz);
1655 }
1656 
1657 static void
geneve_teardown_locked(struct geneve_softc * sc)1658 geneve_teardown_locked(struct geneve_softc *sc)
1659 {
1660 	struct ifnet *ifp;
1661 	struct geneve_socket *gnvso;
1662 
1663 	sx_assert(&geneve_sx, SA_XLOCKED);
1664 	GENEVE_LOCK_WASSERT(sc);
1665 	MPASS(sc->gnv_flags & GENEVE_FLAG_TEARDOWN);
1666 
1667 	ifp = sc->gnv_ifp;
1668 	ifp->if_flags &= ~IFF_UP;
1669 	sc->gnv_flags &= ~GENEVE_FLAG_RUNNING;
1670 
1671 	if (sc->gnv_proto == GENEVE_PROTO_ETHER)
1672 		callout_stop(&sc->gnv_callout);
1673 	gnvso = sc->gnv_sock;
1674 	sc->gnv_sock = NULL;
1675 
1676 	GENEVE_WUNLOCK(sc);
1677 	if_link_state_change(ifp, LINK_STATE_DOWN);
1678 
1679 	if (gnvso != NULL) {
1680 		geneve_socket_remove_softc(gnvso, sc);
1681 
1682 		if (sc->gnv_so_mc_index != -1) {
1683 			geneve_socket_mc_release_group(gnvso, sc->gnv_so_mc_index);
1684 			sc->gnv_so_mc_index = -1;
1685 		}
1686 	}
1687 
1688 	GENEVE_WLOCK(sc);
1689 	while (sc->gnv_refcnt != 0)
1690 		rm_sleep(sc, &sc->gnv_lock, 0, "gnvdrn", hz);
1691 	GENEVE_WUNLOCK(sc);
1692 
1693 	if (sc->gnv_proto == GENEVE_PROTO_ETHER)
1694 		callout_drain(&sc->gnv_callout);
1695 
1696 	geneve_free_multicast(sc);
1697 	if (gnvso != NULL)
1698 		geneve_socket_release(gnvso);
1699 
1700 	GENEVE_WLOCK(sc);
1701 	sc->gnv_flags &= ~GENEVE_FLAG_TEARDOWN;
1702 	wakeup(sc);
1703 	GENEVE_WUNLOCK(sc);
1704 }
1705 
1706 static void
geneve_teardown(struct geneve_softc * sc)1707 geneve_teardown(struct geneve_softc *sc)
1708 {
1709 
1710 	sx_xlock(&geneve_sx);
1711 	GENEVE_WLOCK(sc);
1712 	if (sc->gnv_flags & GENEVE_FLAG_TEARDOWN) {
1713 		geneve_teardown_wait(sc);
1714 		GENEVE_WUNLOCK(sc);
1715 		sx_xunlock(&geneve_sx);
1716 		return;
1717 	}
1718 
1719 	sc->gnv_flags |= GENEVE_FLAG_TEARDOWN;
1720 	geneve_teardown_locked(sc);
1721 	sx_xunlock(&geneve_sx);
1722 }
1723 
1724 static void
geneve_timer(void * xsc)1725 geneve_timer(void *xsc)
1726 {
1727 	struct geneve_softc *sc;
1728 
1729 	sc = xsc;
1730 	GENEVE_LOCK_WASSERT(sc);
1731 
1732 	geneve_ftable_expire(sc);
1733 	callout_schedule(&sc->gnv_callout, geneve_ftable_prune_period * hz);
1734 }
1735 
1736 static int
geneve_ioctl_ifflags(struct geneve_softc * sc)1737 geneve_ioctl_ifflags(struct geneve_softc *sc)
1738 {
1739 	struct ifnet *ifp;
1740 
1741 	ifp = sc->gnv_ifp;
1742 
1743 	if ((ifp->if_flags & IFF_UP) != 0) {
1744 		if ((sc->gnv_flags & GENEVE_FLAG_RUNNING) == 0)
1745 			geneve_init(sc);
1746 	} else {
1747 		if (sc->gnv_flags & GENEVE_FLAG_RUNNING)
1748 			geneve_teardown(sc);
1749 	}
1750 
1751 	return (0);
1752 }
1753 
1754 static int
geneve_flush_ftable(struct geneve_softc * sc,bool flush)1755 geneve_flush_ftable(struct geneve_softc *sc, bool flush)
1756 {
1757 
1758 	GENEVE_WLOCK(sc);
1759 	geneve_ftable_flush(sc, flush);
1760 	GENEVE_WUNLOCK(sc);
1761 
1762 	return (0);
1763 }
1764 
1765 static uint16_t
geneve_get_local_port(struct geneve_softc * sc)1766 geneve_get_local_port(struct geneve_softc *sc)
1767 {
1768 	uint16_t port = 0;
1769 
1770 	GENEVE_LOCK_ASSERT(sc);
1771 
1772 	switch (sc->gnv_src_addr.sa.sa_family) {
1773 	case AF_INET:
1774 		port = ntohs(sc->gnv_src_addr.sin.sin_port);
1775 		break;
1776 	case AF_INET6:
1777 		port = ntohs(sc->gnv_src_addr.sin6.sin6_port);
1778 		break;
1779 	}
1780 
1781 	return (port);
1782 }
1783 
1784 static uint16_t
geneve_get_remote_port(struct geneve_softc * sc)1785 geneve_get_remote_port(struct geneve_softc *sc)
1786 {
1787 	uint16_t port = 0;
1788 
1789 	GENEVE_LOCK_ASSERT(sc);
1790 
1791 	switch (sc->gnv_dst_addr.sa.sa_family) {
1792 	case AF_INET:
1793 		port = ntohs(sc->gnv_dst_addr.sin.sin_port);
1794 		break;
1795 	case AF_INET6:
1796 		port = ntohs(sc->gnv_dst_addr.sin6.sin6_port);
1797 		break;
1798 	}
1799 
1800 	return (port);
1801 }
1802 
1803 /* Netlink Helpers */
1804 static int
geneve_set_vni_nl(struct geneve_softc * sc,struct nl_pstate * npt,uint32_t vni)1805 geneve_set_vni_nl(struct geneve_softc *sc, struct nl_pstate *npt, uint32_t vni)
1806 {
1807 	int error;
1808 
1809 	error = 0;
1810 	if (vni >= GENEVE_VNI_MAX) {
1811 		error = EINVAL;
1812 		goto ret;
1813 	}
1814 
1815 	GENEVE_WLOCK(sc);
1816 	if (geneve_can_change_config(sc))
1817 		sc->gnv_vni = vni;
1818 	else
1819 		error = EBUSY;
1820 	GENEVE_WUNLOCK(sc);
1821 
1822 ret:
1823 	if (error == EINVAL)
1824 		nlmsg_report_err_msg(npt, "geneve vni is invalid: %u", vni);
1825 
1826 	if (error == EBUSY)
1827 		nlmsg_report_err_msg(npt, "geneve interface is busy.");
1828 
1829 	return (error);
1830 }
1831 
1832 static int
geneve_set_local_addr_nl(struct geneve_softc * sc,struct nl_pstate * npt,struct sockaddr * sa)1833 geneve_set_local_addr_nl(struct geneve_softc *sc, struct nl_pstate *npt,
1834     struct sockaddr *sa)
1835 {
1836 	union sockaddr_union *unsa = (union sockaddr_union *)sa;
1837 	int error;
1838 
1839 	error = geneve_check_sockaddr(unsa, sa->sa_len);
1840 	if (error != 0)
1841 		goto ret;
1842 
1843 	error = geneve_check_multicast_addr(unsa);
1844 	if (error != 0)
1845 		goto ret;
1846 
1847 #ifdef INET6
1848 	if (unsa->sa.sa_family == AF_INET6) {
1849 		error = sa6_embedscope(&unsa->sin6, V_ip6_use_defzone);
1850 		if (error != 0)
1851 			goto ret;
1852 	}
1853 #endif
1854 
1855 	GENEVE_WLOCK(sc);
1856 	if (geneve_can_change_config(sc)) {
1857 		geneve_sockaddr_in_copy(&sc->gnv_src_addr, &unsa->sa);
1858 		geneve_set_hwcaps(sc);
1859 	} else
1860 		error = EBUSY;
1861 	GENEVE_WUNLOCK(sc);
1862 
1863 ret:
1864 	if (error == EINVAL)
1865 		nlmsg_report_err_msg(npt, "local address is invalid.");
1866 
1867 	if (error == EAFNOSUPPORT)
1868 		nlmsg_report_err_msg(npt, "address family is not supported.");
1869 
1870 	if (error == EBUSY)
1871 		nlmsg_report_err_msg(npt, "geneve interface is busy.");
1872 
1873 	return (error);
1874 }
1875 
1876 static int
geneve_set_remote_addr_nl(struct geneve_softc * sc,struct nl_pstate * npt,struct sockaddr * sa)1877 geneve_set_remote_addr_nl(struct geneve_softc *sc, struct nl_pstate *npt,
1878     struct sockaddr *sa)
1879 {
1880 	union sockaddr_union *unsa = (union sockaddr_union *)sa;
1881 	int error;
1882 
1883 	error = geneve_check_sockaddr(unsa, sa->sa_len);
1884 	if (error != 0)
1885 		goto ret;
1886 
1887 #ifdef INET6
1888 	if (unsa->sa.sa_family == AF_INET6) {
1889 		error = sa6_embedscope(&unsa->sin6, V_ip6_use_defzone);
1890 		if (error != 0)
1891 			goto ret;
1892 	}
1893 #endif
1894 
1895 	GENEVE_WLOCK(sc);
1896 	if (geneve_can_change_config(sc)) {
1897 		geneve_sockaddr_in_copy(&sc->gnv_dst_addr, &unsa->sa);
1898 		geneve_setup_interface_hdrlen(sc);
1899 	} else
1900 		error = EBUSY;
1901 	GENEVE_WUNLOCK(sc);
1902 
1903 ret:
1904 	if (error == EINVAL)
1905 		nlmsg_report_err_msg(npt, "remote address is invalid.");
1906 
1907 	if (error == EAFNOSUPPORT)
1908 		nlmsg_report_err_msg(npt, "address family is not supported.");
1909 
1910 	if (error == EBUSY)
1911 		nlmsg_report_err_msg(npt, "geneve interface is busy.");
1912 
1913 	return (error);
1914 }
1915 
1916 static int
geneve_set_local_port_nl(struct geneve_softc * sc,struct nl_pstate * npt,uint16_t port)1917 geneve_set_local_port_nl(struct geneve_softc *sc, struct nl_pstate *npt, uint16_t port)
1918 {
1919 	int error;
1920 
1921 	error = 0;
1922 	if (port == 0 || port > UINT16_MAX) {
1923 		error = EINVAL;
1924 		goto ret;
1925 	}
1926 
1927 	GENEVE_WLOCK(sc);
1928 	if (geneve_can_change_config(sc) == 0) {
1929 		GENEVE_WUNLOCK(sc);
1930 		error = EBUSY;
1931 		goto ret;
1932 	}
1933 
1934 	switch (sc->gnv_src_addr.sa.sa_family) {
1935 	case AF_INET:
1936 		sc->gnv_src_addr.sin.sin_port = htons(port);
1937 		break;
1938 	case AF_INET6:
1939 		sc->gnv_src_addr.sin6.sin6_port = htons(port);
1940 		break;
1941 	}
1942 	GENEVE_WUNLOCK(sc);
1943 
1944 ret:
1945 	if (error == EINVAL)
1946 		nlmsg_report_err_msg(npt, "local port is invalid: %u", port);
1947 
1948 	if (error == EBUSY)
1949 		nlmsg_report_err_msg(npt, "geneve interface is busy.");
1950 
1951 	return (error);
1952 }
1953 
1954 static int
geneve_set_remote_port_nl(struct geneve_softc * sc,struct nl_pstate * npt,uint16_t port)1955 geneve_set_remote_port_nl(struct geneve_softc *sc, struct nl_pstate *npt, uint16_t port)
1956 {
1957 	int error;
1958 
1959 	error = 0;
1960 	if (port == 0 || port > UINT16_MAX) {
1961 		error = EINVAL;
1962 		goto ret;
1963 	}
1964 
1965 	GENEVE_WLOCK(sc);
1966 	if (geneve_can_change_config(sc) == 0) {
1967 		GENEVE_WUNLOCK(sc);
1968 		error = EBUSY;
1969 		goto ret;
1970 	}
1971 
1972 	switch (sc->gnv_dst_addr.sa.sa_family) {
1973 	case AF_INET:
1974 		sc->gnv_dst_addr.sin.sin_port = htons(port);
1975 		break;
1976 	case AF_INET6:
1977 		sc->gnv_dst_addr.sin6.sin6_port = htons(port);
1978 		break;
1979 	}
1980 	GENEVE_WUNLOCK(sc);
1981 
1982 ret:
1983 	if (error == EINVAL)
1984 		nlmsg_report_err_msg(npt, "remote port is invalid: %u", port);
1985 
1986 	if (error == EBUSY)
1987 		nlmsg_report_err_msg(npt, "geneve interface is busy.");
1988 
1989 	return (error);
1990 }
1991 
1992 static int
geneve_set_port_range_nl(struct geneve_softc * sc,struct nl_pstate * npt,struct ifla_geneve_port_range port_range)1993 geneve_set_port_range_nl(struct geneve_softc *sc, struct nl_pstate *npt,
1994     struct ifla_geneve_port_range port_range)
1995 {
1996 	int error;
1997 
1998 	error = 0;
1999 	if (port_range.low <= 0 || port_range.high > UINT16_MAX ||
2000 	    port_range.high < port_range.low) {
2001 		error = EINVAL;
2002 		goto ret;
2003 	}
2004 
2005 	GENEVE_WLOCK(sc);
2006 	if (geneve_can_change_config(sc)) {
2007 		sc->gnv_min_port = port_range.low;
2008 		sc->gnv_max_port = port_range.high;
2009 	} else
2010 		error = EBUSY;
2011 	GENEVE_WUNLOCK(sc);
2012 
2013 ret:
2014 	if (error == EINVAL)
2015 		nlmsg_report_err_msg(npt, "port range is invalid: %u-%u",
2016 		    port_range.low, port_range.high);
2017 
2018 	if (error == EBUSY)
2019 		nlmsg_report_err_msg(npt, "geneve interface is busy.");
2020 
2021 	return (error);
2022 }
2023 
2024 static int
geneve_set_df_nl(struct geneve_softc * sc,struct nl_pstate * npt,enum ifla_geneve_df df)2025 geneve_set_df_nl(struct geneve_softc *sc, struct nl_pstate *npt,
2026     enum ifla_geneve_df df)
2027 {
2028 	int error;
2029 
2030 	error = 0;
2031 	GENEVE_WLOCK(sc);
2032 	if (geneve_can_change_config(sc))
2033 		sc->gnv_df = df;
2034 	else
2035 		error = EBUSY;
2036 	GENEVE_WUNLOCK(sc);
2037 
2038 	if (error == EBUSY)
2039 		nlmsg_report_err_msg(npt, "geneve interface is busy.");
2040 
2041 	return (error);
2042 }
2043 
2044 static int
geneve_set_ttl_nl(struct geneve_softc * sc,struct nl_pstate * npt __unused,uint8_t ttl)2045 geneve_set_ttl_nl(struct geneve_softc *sc, struct nl_pstate *npt __unused,
2046     uint8_t ttl)
2047 {
2048 
2049 	GENEVE_WLOCK(sc);
2050 	sc->gnv_ttl = ttl;
2051 	if (sc->gnv_im4o != NULL)
2052 		sc->gnv_im4o->imo_multicast_ttl = sc->gnv_ttl;
2053 	if (sc->gnv_im6o != NULL)
2054 		sc->gnv_im6o->im6o_multicast_hlim = sc->gnv_ttl;
2055 	GENEVE_WUNLOCK(sc);
2056 
2057 	return (0);
2058 }
2059 
2060 static int
geneve_set_ttl_inherit_nl(struct geneve_softc * sc,struct nl_pstate * npt __unused,bool inherit)2061 geneve_set_ttl_inherit_nl(struct geneve_softc *sc,
2062     struct nl_pstate *npt __unused, bool inherit)
2063 {
2064 
2065 	GENEVE_WLOCK(sc);
2066 	if (inherit)
2067 		sc->gnv_flags |= GENEVE_FLAG_TTL_INHERIT;
2068 	else
2069 		sc->gnv_flags &= ~GENEVE_FLAG_TTL_INHERIT;
2070 	GENEVE_WUNLOCK(sc);
2071 
2072 	return (0);
2073 }
2074 
2075 static int
geneve_set_dscp_inherit_nl(struct geneve_softc * sc,struct nl_pstate * npt __unused,bool inherit)2076 geneve_set_dscp_inherit_nl(struct geneve_softc *sc,
2077     struct nl_pstate *npt __unused, bool inherit)
2078 {
2079 
2080 	GENEVE_WLOCK(sc);
2081 	if (inherit)
2082 		sc->gnv_flags |= GENEVE_FLAG_DSCP_INHERIT;
2083 	else
2084 		sc->gnv_flags &= ~GENEVE_FLAG_DSCP_INHERIT;
2085 	GENEVE_WUNLOCK(sc);
2086 
2087 	return (0);
2088 }
2089 
2090 static int
geneve_set_collect_metadata_nl(struct geneve_softc * sc,struct nl_pstate * npt __unused,bool external)2091 geneve_set_collect_metadata_nl(struct geneve_softc *sc,
2092     struct nl_pstate *npt __unused, bool external)
2093 {
2094 
2095 	GENEVE_WLOCK(sc);
2096 	if (external)
2097 		sc->gnv_flags |= GENEVE_FLAG_COLLECT_METADATA;
2098 	else
2099 		sc->gnv_flags &= ~GENEVE_FLAG_COLLECT_METADATA;
2100 	GENEVE_WUNLOCK(sc);
2101 
2102 	return (0);
2103 }
2104 
2105 static int
geneve_set_learn_nl(struct geneve_softc * sc,struct nl_pstate * npt,bool learn)2106 geneve_set_learn_nl(struct geneve_softc *sc, struct nl_pstate *npt,
2107     bool learn)
2108 {
2109 
2110 	GENEVE_WLOCK(sc);
2111 	if (learn)
2112 		sc->gnv_flags |= GENEVE_FLAG_LEARN;
2113 	else
2114 		sc->gnv_flags &= ~GENEVE_FLAG_LEARN;
2115 	GENEVE_WUNLOCK(sc);
2116 
2117 	return (0);
2118 }
2119 
2120 static int
geneve_set_ftable_max_nl(struct geneve_softc * sc,struct nl_pstate * npt,uint32_t max)2121 geneve_set_ftable_max_nl(struct geneve_softc *sc, struct nl_pstate *npt,
2122     uint32_t max)
2123 {
2124 	int error;
2125 
2126 	error = 0;
2127 	GENEVE_WLOCK(sc);
2128 	if (max <= GENEVE_FTABLE_MAX)
2129 		sc->gnv_ftable_max = max;
2130 	else
2131 		error = EINVAL;
2132 	GENEVE_WUNLOCK(sc);
2133 
2134 	if (error == EINVAL)
2135 		nlmsg_report_err_msg(npt,
2136 		    "maximum number of entries in the table can not be more than %u",
2137 		    GENEVE_FTABLE_MAX);
2138 
2139 	return (error);
2140 }
2141 
2142 static int
geneve_set_ftable_timeout_nl(struct geneve_softc * sc,struct nl_pstate * npt,uint32_t timeout)2143 geneve_set_ftable_timeout_nl(struct geneve_softc *sc, struct nl_pstate *npt,
2144     uint32_t timeout)
2145 {
2146 	int error;
2147 
2148 	error = 0;
2149 	GENEVE_WLOCK(sc);
2150 	if (timeout <= GENEVE_FTABLE_MAX_TIMEOUT)
2151 		sc->gnv_ftable_timeout = timeout;
2152 	else
2153 		error = EINVAL;
2154 	GENEVE_WUNLOCK(sc);
2155 
2156 	if (error == EINVAL)
2157 		nlmsg_report_err_msg(npt,
2158 		    "maximum timeout for stale entries in the table can not be more than %u",
2159 		    GENEVE_FTABLE_MAX_TIMEOUT);
2160 
2161 	return (error);
2162 }
2163 
2164 static int
geneve_set_mc_if_nl(struct geneve_softc * sc,struct nl_pstate * npt,char * ifname)2165 geneve_set_mc_if_nl(struct geneve_softc *sc, struct nl_pstate *npt,
2166     char *ifname)
2167 {
2168 	int error;
2169 
2170 	error = 0;
2171 	GENEVE_WLOCK(sc);
2172 	if (geneve_can_change_config(sc)) {
2173 		strlcpy(sc->gnv_mc_ifname, ifname, IFNAMSIZ);
2174 		geneve_set_hwcaps(sc);
2175 	} else
2176 		error = EBUSY;
2177 	GENEVE_WUNLOCK(sc);
2178 
2179 	if (error == EBUSY)
2180 		nlmsg_report_err_msg(npt, "geneve interface is busy.");
2181 
2182 	return (error);
2183 }
2184 
2185 static int
geneve_flush_ftable_nl(struct geneve_softc * sc,struct nl_pstate * npt,bool flush)2186 geneve_flush_ftable_nl(struct geneve_softc *sc, struct nl_pstate *npt,
2187     bool flush)
2188 {
2189 
2190 	return (geneve_flush_ftable(sc, flush));
2191 }
2192 
2193 static void
geneve_get_local_addr_nl(struct geneve_softc * sc,struct nl_writer * nw)2194 geneve_get_local_addr_nl(struct geneve_softc *sc, struct nl_writer *nw)
2195 {
2196 	struct sockaddr *sa;
2197 
2198 	GENEVE_LOCK_ASSERT(sc);
2199 
2200 	sa = &sc->gnv_src_addr.sa;
2201 	if (sa->sa_family == AF_INET) {
2202 		const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
2203 		nlattr_add_in_addr(nw, IFLA_GENEVE_LOCAL, in4);
2204 	} else if (sa->sa_family == AF_INET6) {
2205 		const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
2206 		nlattr_add_in6_addr(nw, IFLA_GENEVE_LOCAL, in6);
2207 	}
2208 }
2209 
2210 static void
geneve_get_remote_addr_nl(struct geneve_softc * sc,struct nl_writer * nw)2211 geneve_get_remote_addr_nl(struct geneve_softc *sc, struct nl_writer *nw)
2212 {
2213 	struct sockaddr *sa;
2214 
2215 	GENEVE_LOCK_ASSERT(sc);
2216 
2217 	sa = &sc->gnv_dst_addr.sa;
2218 	if (sa->sa_family == AF_INET) {
2219 		const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
2220 		nlattr_add_in_addr(nw, IFLA_GENEVE_REMOTE, in4);
2221 	} else if (sa->sa_family == AF_INET6) {
2222 		const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
2223 		nlattr_add_in6_addr(nw, IFLA_GENEVE_REMOTE, in6);
2224 	}
2225 }
2226 
2227 static int
geneve_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)2228 geneve_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2229 {
2230 	struct rm_priotracker tracker;
2231 	struct geneve_softc *sc;
2232 	struct siocsifcapnv_driver_data *drv_ioctl_data, drv_ioctl_data_d;
2233 	struct ifreq *ifr;
2234 	int max, error;
2235 
2236 	CURVNET_ASSERT_SET();
2237 
2238 	error = 0;
2239 	sc = ifp->if_softc;
2240 	ifr = (struct ifreq *)data;
2241 
2242 	switch (cmd) {
2243 	case SIOCADDMULTI:
2244 	case SIOCDELMULTI:
2245 		break;
2246 
2247 	case SIOCGDRVSPEC:
2248 		break;
2249 	case SIOCSDRVSPEC:
2250 		error = priv_check(curthread, PRIV_NET_GENEVE);
2251 		if (error)
2252 			return (error);
2253 		break;
2254 	}
2255 
2256 	switch (cmd) {
2257 	case SIOCSIFFLAGS:
2258 		error = geneve_ioctl_ifflags(sc);
2259 		break;
2260 
2261 	case SIOCSIFMEDIA:
2262 	case SIOCGIFMEDIA:
2263 		if (sc->gnv_proto == GENEVE_PROTO_ETHER)
2264 			error = ifmedia_ioctl(ifp, ifr, &sc->gnv_media, cmd);
2265 		else
2266 			error = EINVAL;
2267 		break;
2268 
2269 	case SIOCSIFMTU:
2270 		if (sc->gnv_proto == GENEVE_PROTO_ETHER)
2271 			max = GENEVE_MAX_MTU;
2272 		else
2273 			max = GENEVE_MAX_L3MTU;
2274 
2275 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > max)
2276 			error = EINVAL;
2277 		else {
2278 			GENEVE_WLOCK(sc);
2279 			ifp->if_mtu = ifr->ifr_mtu;
2280 			sc->gnv_flags |= GENEVE_FLAG_USER_MTU;
2281 			GENEVE_WUNLOCK(sc);
2282 		}
2283 		break;
2284 
2285 	case SIOCGIFCAPNV:
2286 		break;
2287 	case SIOCSIFCAP:
2288 		drv_ioctl_data = &drv_ioctl_data_d;
2289 		drv_ioctl_data->reqcap = ifr->ifr_reqcap;
2290 		drv_ioctl_data->reqcap2 = if_getcapenable2(ifp);
2291 		drv_ioctl_data->nvcap = NULL;
2292 		/* FALLTHROUGH */
2293 	case SIOCSIFCAPNV:
2294 		if (cmd == SIOCSIFCAPNV)
2295 			drv_ioctl_data = (struct siocsifcapnv_driver_data *)data;
2296 
2297 		GENEVE_WLOCK(sc);
2298 		error = geneve_set_reqcap(sc, ifp, drv_ioctl_data->reqcap,
2299 		    drv_ioctl_data->reqcap2);
2300 		if (error == 0)
2301 			geneve_set_hwcaps(sc);
2302 		GENEVE_WUNLOCK(sc);
2303 		break;
2304 
2305 	case SIOCGTUNFIB:
2306 		GENEVE_RLOCK(sc, &tracker);
2307 		ifr->ifr_fib = sc->gnv_fibnum;
2308 		GENEVE_RUNLOCK(sc, &tracker);
2309 		break;
2310 
2311 	case SIOCSTUNFIB:
2312 		if ((error = priv_check(curthread, PRIV_NET_GENEVE)) != 0)
2313 			break;
2314 
2315 		if (ifr->ifr_fib >= rt_numfibs)
2316 			error = EINVAL;
2317 		else {
2318 			GENEVE_WLOCK(sc);
2319 			sc->gnv_fibnum = ifr->ifr_fib;
2320 			GENEVE_WUNLOCK(sc);
2321 		}
2322 		break;
2323 
2324 	case SIOCSIFADDR:
2325 		ifp->if_flags |= IFF_UP;
2326 		/* FALLTHROUGH */
2327 	case SIOCGIFADDR:
2328 		if (sc->gnv_proto == GENEVE_PROTO_ETHER)
2329 			error = ether_ioctl(ifp, cmd, data);
2330 		break;
2331 
2332 	default:
2333 		if (sc->gnv_proto == GENEVE_PROTO_ETHER)
2334 			error = ether_ioctl(ifp, cmd, data);
2335 		else
2336 			error = EINVAL;
2337 		break;
2338 	}
2339 
2340 	return (error);
2341 }
2342 
2343 static uint16_t
geneve_pick_source_port(struct geneve_softc * sc,struct mbuf * m)2344 geneve_pick_source_port(struct geneve_softc *sc, struct mbuf *m)
2345 {
2346 	int range;
2347 	uint32_t hash;
2348 
2349 	range = sc->gnv_max_port - sc->gnv_min_port + 1;
2350 
2351 	/* RFC 8926 Section 3.3-2.2.1 */
2352 	if (M_HASHTYPE_ISHASH(m))
2353 		hash = m->m_pkthdr.flowid;
2354 	else
2355 		hash = jenkins_hash(m->m_data, ETHER_HDR_LEN, sc->gnv_port_hash_key);
2356 
2357 	return (sc->gnv_min_port + (hash % range));
2358 }
2359 
2360 static void
geneve_encap_header(struct geneve_softc * sc,struct mbuf * m,int ipoff,uint16_t srcport,uint16_t dstport,uint16_t proto)2361 geneve_encap_header(struct geneve_softc *sc, struct mbuf *m, int ipoff,
2362     uint16_t srcport, uint16_t dstport, uint16_t proto)
2363 {
2364 	struct geneveudphdr *hdr;
2365 	struct udphdr *udph;
2366 	struct genevehdr *gnvh;
2367 	int len;
2368 
2369 	len = m->m_pkthdr.len - ipoff;
2370 	MPASS(len >= sizeof(struct geneveudphdr));
2371 	hdr = mtodo(m, ipoff);
2372 
2373 	udph = &hdr->geneve_udp;
2374 	udph->uh_sport = srcport;
2375 	udph->uh_dport = dstport;
2376 	udph->uh_ulen = htons(len);
2377 	udph->uh_sum = 0;
2378 
2379 	gnvh = &hdr->geneve_hdr;
2380 	gnvh->geneve_ver = 0;
2381 	gnvh->geneve_optlen = 0;
2382 	gnvh->geneve_critical = 0;
2383 	gnvh->geneve_control = 0;
2384 	gnvh->geneve_flags = 0;
2385 	gnvh->geneve_proto = proto;
2386 	gnvh->geneve_vni = htonl(sc->gnv_vni << GENEVE_HDR_VNI_SHIFT);
2387 }
2388 
2389 /* Return the CSUM_INNER_* equivalent of CSUM_* caps. */
2390 static uint32_t
csum_flags_to_inner_flags(uint32_t csum_flags_in,const uint32_t encap)2391 csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap)
2392 {
2393 	uint32_t csum_flags = encap;
2394 	const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP;
2395 
2396 	/*
2397 	 * csum_flags can request either v4 or v6 offload but not both.
2398 	 * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO)
2399 	 * so those bits are no good to detect the IP version.  Other bits are
2400 	 * always set with CSUM_TSO and we use those to figure out the IP
2401 	 * version.
2402 	 */
2403 	if (csum_flags_in & v4) {
2404 		if (csum_flags_in & CSUM_IP)
2405 			csum_flags |= CSUM_INNER_IP;
2406 		if (csum_flags_in & CSUM_IP_UDP)
2407 			csum_flags |= CSUM_INNER_IP_UDP;
2408 		if (csum_flags_in & CSUM_IP_TCP)
2409 			csum_flags |= CSUM_INNER_IP_TCP;
2410 		if (csum_flags_in & CSUM_IP_TSO)
2411 			csum_flags |= CSUM_INNER_IP_TSO;
2412 	} else {
2413 #ifdef INVARIANTS
2414 		const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP;
2415 		MPASS((csum_flags_in & v6) != 0);
2416 #endif
2417 		if (csum_flags_in & CSUM_IP6_UDP)
2418 			csum_flags |= CSUM_INNER_IP6_UDP;
2419 		if (csum_flags_in & CSUM_IP6_TCP)
2420 			csum_flags |= CSUM_INNER_IP6_TCP;
2421 		if (csum_flags_in & CSUM_IP6_TSO)
2422 			csum_flags |= CSUM_INNER_IP6_TSO;
2423 	}
2424 
2425 	return (csum_flags);
2426 }
2427 
2428 static uint16_t
geneve_get_ethertype(struct mbuf * m)2429 geneve_get_ethertype(struct mbuf *m)
2430 {
2431 	struct ip *ip;
2432 	struct ip6_hdr *ip6;
2433 
2434 	/*
2435 	 * We should pullup, but we're only interested in the first byte, so
2436 	 * that'll always be contiguous.
2437 	 */
2438 	ip = mtod(m, struct ip *);
2439 	if (ip->ip_v == IPVERSION)
2440 		return (ETHERTYPE_IP);
2441 
2442 	ip6 = mtod(m, struct ip6_hdr *);
2443 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION)
2444 		return (ETHERTYPE_IPV6);
2445 
2446 	return (0);
2447 }
2448 
2449 /* RFC 8926 Section 4.4.2. DSCP, ECN, and TTL */
2450 static int
geneve_inherit_l3_hdr(struct mbuf * m,struct geneve_softc * sc,uint16_t proto,uint8_t * tos,uint8_t * ttl,u_short * ip_off)2451 geneve_inherit_l3_hdr(struct mbuf *m, struct geneve_softc *sc, uint16_t proto,
2452     uint8_t *tos, uint8_t *ttl, u_short *ip_off)
2453 {
2454 	struct ether_header *eh;
2455 	struct ip *ip_inner, iphdr;
2456 	struct ip6_hdr *ip6_inner, ip6hdr;
2457 	int offset;
2458 
2459 	*tos = 0;
2460 	*ttl = sc->gnv_ttl;
2461 	if (sc->gnv_df == IFLA_GENEVE_DF_SET)
2462 		*ip_off = htons(IP_DF);
2463 	else
2464 		*ip_off = 0;
2465 
2466 	/* Set offset and address family if proto is ethernet */
2467 	if (proto == GENEVE_PROTO_ETHER) {
2468 		eh = mtod(m, struct ether_header *);
2469 		if (eh->ether_type == htons(ETHERTYPE_IP)) {
2470 			if (m->m_pkthdr.len < ETHER_HDR_LEN + sizeof(struct ip)) {
2471 				m_freem(m);
2472 				return (EINVAL);
2473 			}
2474 			proto = ETHERTYPE_IP;
2475 		} else if (eh->ether_type == htons(ETHERTYPE_IPV6)) {
2476 			if (m->m_pkthdr.len < ETHER_HDR_LEN + sizeof(struct ip6_hdr)) {
2477 				m_freem(m);
2478 				return (EINVAL);
2479 			}
2480 			proto = ETHERTYPE_IPV6;
2481 		} else
2482 			return (0);
2483 
2484 		offset = ETHER_HDR_LEN;
2485 	} else
2486 		offset = 0;
2487 
2488 	switch (proto) {
2489 	case ETHERTYPE_IP:
2490 		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2491 			m_copydata(m, offset, sizeof(struct ip), (caddr_t)&iphdr);
2492 			ip_inner = &iphdr;
2493 		} else
2494 			ip_inner = mtodo(m, offset);
2495 
2496 		*tos = ip_inner->ip_tos;
2497 		if (sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT)
2498 			*ttl = ip_inner->ip_ttl;
2499 		if (sc->gnv_df == IFLA_GENEVE_DF_INHERIT)
2500 			*ip_off = ip_inner->ip_off;
2501 		break;
2502 
2503 	case ETHERTYPE_IPV6:
2504 		if (__predict_false(m->m_len < offset + sizeof(struct ip6_hdr))) {
2505 			m_copydata(m, offset, sizeof(struct ip6_hdr), (caddr_t)&ip6hdr);
2506 			ip6_inner = &ip6hdr;
2507 		} else
2508 			ip6_inner = mtodo(m, offset);
2509 
2510 		*tos = IPV6_TRAFFIC_CLASS(ip6_inner);
2511 		if (sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT)
2512 			*ttl = ip6_inner->ip6_hlim;
2513 		break;
2514 	}
2515 
2516 	return (0);
2517 }
2518 
2519 #ifdef INET
2520 static int
geneve_encap4(struct geneve_softc * sc,const union sockaddr_union * funsa,struct mbuf * m)2521 geneve_encap4(struct geneve_softc *sc, const union sockaddr_union *funsa,
2522     struct mbuf *m)
2523 {
2524 	struct ifnet *ifp;
2525 	struct ip *ip;
2526 	struct in_addr srcaddr, dstaddr;
2527 	struct route route, *ro;
2528 	struct sockaddr_in *sin;
2529 	int plen, error;
2530 	uint32_t csum_flags;
2531 	uint16_t srcport, dstport, proto;
2532 	u_short ip_off;
2533 	uint8_t tos, ecn, ttl;
2534 	bool mcast;
2535 
2536 	NET_EPOCH_ASSERT();
2537 
2538 	ifp = sc->gnv_ifp;
2539 	srcaddr = sc->gnv_src_addr.sin.sin_addr;
2540 	srcport = htons(geneve_pick_source_port(sc, m));
2541 	dstaddr = funsa->sin.sin_addr;
2542 	dstport = funsa->sin.sin_port;
2543 	plen = m->m_pkthdr.len;
2544 
2545 	if (sc->gnv_proto == GENEVE_PROTO_ETHER)
2546 		proto = sc->gnv_proto;
2547 	else
2548 		proto = geneve_get_ethertype(m);
2549 
2550 	error = geneve_inherit_l3_hdr(m, sc, proto, &tos, &ttl, &ip_off);
2551 	if (error) {
2552 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2553 		return (error);
2554 	}
2555 
2556 	M_PREPEND(m, sizeof(struct ip) + sizeof(struct geneveudphdr), M_NOWAIT);
2557 	if (m == NULL) {
2558 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2559 		return (ENOBUFS);
2560 	}
2561 	ip = mtod(m, struct ip *);
2562 
2563 	ecn = (tos & IPTOS_ECN_MASK);
2564 	ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &ecn);
2565 	if (sc->gnv_flags & GENEVE_FLAG_DSCP_INHERIT)
2566 		ip->ip_tos |= (tos & ~IPTOS_ECN_MASK);
2567 
2568 	ip->ip_len = htons(m->m_pkthdr.len);
2569 	ip->ip_off = ip_off;
2570 	ip->ip_ttl = ttl;
2571 	ip->ip_p = IPPROTO_UDP;
2572 	ip->ip_sum = 0;
2573 	ip->ip_src = srcaddr;
2574 	ip->ip_dst = dstaddr;
2575 
2576 	geneve_encap_header(sc, m, sizeof(struct ip), srcport, dstport, htons(proto));
2577 	mcast = (m->m_flags & (M_MCAST | M_BCAST));
2578 	m->m_flags &= ~(M_MCAST | M_BCAST);
2579 
2580 	m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
2581 	if (m->m_pkthdr.csum_flags != 0) {
2582 		/*
2583 		 * HW checksum (L3 and/or L4) or TSO has been requested.
2584 		 * Look up the ifnet for the outbound route and verify that the
2585 		 * outbound ifnet can perform the requested operation on the inner frame.
2586 		 */
2587 		memset(&route, 0, sizeof(route));
2588 		ro = &route;
2589 		sin = (struct sockaddr_in *)&ro->ro_dst;
2590 		sin->sin_family = AF_INET;
2591 		sin->sin_len = sizeof(*sin);
2592 		sin->sin_addr = ip->ip_dst;
2593 		ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE, 0);
2594 		if (ro->ro_nh == NULL) {
2595 			m_freem(m);
2596 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2597 			return (EHOSTUNREACH);
2598 		}
2599 
2600 		csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
2601 		    CSUM_ENCAP_GENEVE);
2602 		if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != csum_flags) {
2603 			if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
2604 				const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
2605 
2606 				if_printf(ifp, "interface %s is missing hwcaps "
2607 				    "0x%08x, csum_flags 0x%08x -> 0x%08x, "
2608 				    "hwassist 0x%08x\n", nh_ifp->if_xname,
2609 				    csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
2610 				    m->m_pkthdr.csum_flags, csum_flags,
2611 				    (uint32_t)nh_ifp->if_hwassist);
2612 			}
2613 			m_freem(m);
2614 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2615 			return (ENXIO);
2616 		}
2617 		m->m_pkthdr.csum_flags = csum_flags;
2618 		if (csum_flags & (CSUM_INNER_IP | CSUM_INNER_IP_UDP |
2619 		    CSUM_INNER_IP6_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
2620 			counter_u64_add(sc->gnv_stats.txcsum, 1);
2621 			if (csum_flags & CSUM_INNER_TSO)
2622 				counter_u64_add(sc->gnv_stats.tso, 1);
2623 		}
2624 	} else
2625 		ro = NULL;
2626 
2627 	error = ip_output(m, NULL, ro, 0, sc->gnv_im4o, NULL);
2628 	if (error == 0) {
2629 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2630 		if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
2631 		if (mcast)
2632 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2633 	} else
2634 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2635 
2636 	return (error);
2637 }
2638 #endif
2639 
2640 #ifdef INET6
2641 static int
geneve_encap6(struct geneve_softc * sc,const union sockaddr_union * funsa,struct mbuf * m)2642 geneve_encap6(struct geneve_softc *sc, const union sockaddr_union *funsa,
2643     struct mbuf *m)
2644 {
2645 	struct ifnet *ifp;
2646 	struct ip6_hdr *ip6;
2647 	struct ip6_pktopts opts;
2648 	struct sockaddr_in6 *sin6;
2649 	struct route_in6 route, *ro;
2650 	const struct in6_addr *srcaddr, *dstaddr;
2651 	int plen, error;
2652 	uint32_t csum_flags;
2653 	uint16_t srcport, dstport, proto;
2654 	u_short ip6_df;
2655 	uint8_t tos, ecn, etos, ttl;
2656 	bool mcast;
2657 
2658 	NET_EPOCH_ASSERT();
2659 
2660 	ifp = sc->gnv_ifp;
2661 	srcaddr = &sc->gnv_src_addr.sin6.sin6_addr;
2662 	srcport = htons(geneve_pick_source_port(sc, m));
2663 	dstaddr = &funsa->sin6.sin6_addr;
2664 	dstport = funsa->sin6.sin6_port;
2665 	plen = m->m_pkthdr.len;
2666 
2667 	if (sc->gnv_proto == GENEVE_PROTO_ETHER)
2668 		proto = sc->gnv_proto;
2669 	else
2670 		proto = geneve_get_ethertype(m);
2671 
2672 	error = geneve_inherit_l3_hdr(m, sc, proto, &tos, &ttl, &ip6_df);
2673 	if (error) {
2674 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2675 			return (error);
2676 	}
2677 
2678 	ip6_initpktopts(&opts);
2679 	if (ip6_df)
2680 		opts.ip6po_flags = IP6PO_DONTFRAG;
2681 
2682 	M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct geneveudphdr), M_NOWAIT);
2683 	if (m == NULL) {
2684 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2685 		return (ENOBUFS);
2686 	}
2687 
2688 	ip6 = mtod(m, struct ip6_hdr *);
2689 	ip6->ip6_flow = 0;
2690 	ip6->ip6_vfc = IPV6_VERSION;
2691 
2692 	ecn = (tos & IPTOS_ECN_MASK);
2693 	ip_ecn_ingress(ECN_ALLOWED, &etos, &ecn);
2694 	ip6->ip6_flow |= htonl((u_int32_t)etos << IPV6_FLOWLABEL_LEN);
2695 	if (sc->gnv_flags & GENEVE_FLAG_DSCP_INHERIT)
2696 		ip6->ip6_flow |= htonl((u_int32_t)tos << IPV6_FLOWLABEL_LEN);
2697 
2698 	ip6->ip6_plen = 0;
2699 	ip6->ip6_nxt = IPPROTO_UDP;
2700 	ip6->ip6_hlim = ttl;
2701 	ip6->ip6_src = *srcaddr;
2702 	ip6->ip6_dst = *dstaddr;
2703 
2704 	geneve_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport,
2705 	    htons(proto));
2706 	mcast = (m->m_flags & (M_MCAST | M_BCAST));
2707 	m->m_flags &= ~(M_MCAST | M_BCAST);
2708 
2709 	ro = NULL;
2710 	m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
2711 	if (mcast || m->m_pkthdr.csum_flags != 0) {
2712 		/*
2713 		 * HW checksum (L3 and/or L4) or TSO has been requested.  Look
2714 		 * up the ifnet for the outbound route and verify that the
2715 		 * outbound ifnet can perform the requested operation on the
2716 		 * inner frame.
2717 		 * XXX: There's a rare scenario with ipv6 over multicast
2718 		 * underlay where, when mc_ifname is set, it causes panics
2719 		 * inside a jail. We'll force geneve to select its own outbound
2720 		 * interface to avoid this.
2721 		 */
2722 		memset(&route, 0, sizeof(route));
2723 		ro = &route;
2724 		sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
2725 		sin6->sin6_family = AF_INET6;
2726 		sin6->sin6_len = sizeof(*sin6);
2727 		sin6->sin6_addr = ip6->ip6_dst;
2728 		ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0, NHR_NONE, 0);
2729 		if (ro->ro_nh == NULL) {
2730 			m_freem(m);
2731 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2732 			return (EHOSTUNREACH);
2733 		}
2734 	}
2735 	if (m->m_pkthdr.csum_flags != 0) {
2736 		csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
2737 		    CSUM_ENCAP_GENEVE);
2738 		if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != csum_flags) {
2739 			if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
2740 				const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
2741 
2742 				if_printf(ifp, "interface %s is missing hwcaps "
2743 				    "0x%08x, csum_flags 0x%08x -> 0x%08x, "
2744 				    "hwassist 0x%08x\n", nh_ifp->if_xname,
2745 				    csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
2746 				    m->m_pkthdr.csum_flags, csum_flags,
2747 				    (uint32_t)nh_ifp->if_hwassist);
2748 			}
2749 			m_freem(m);
2750 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2751 			return (ENXIO);
2752 		}
2753 		m->m_pkthdr.csum_flags = csum_flags;
2754 		if (csum_flags &
2755 		    (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
2756 		    CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
2757 			counter_u64_add(sc->gnv_stats.txcsum, 1);
2758 			if (csum_flags & CSUM_INNER_TSO)
2759 				counter_u64_add(sc->gnv_stats.tso, 1);
2760 		}
2761 	} else if (ntohs(dstport) != V_zero_checksum_port) {
2762 		struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
2763 
2764 		hdr->uh_sum = in6_cksum_pseudo(ip6,
2765 		    m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
2766 		m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
2767 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
2768 	}
2769 	error = ip6_output(m, &opts, ro, 0, sc->gnv_im6o, NULL, NULL);
2770 	if (error == 0) {
2771 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2772 		if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
2773 		if (mcast)
2774 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2775 	} else
2776 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2777 
2778 	return (error);
2779 }
2780 #endif
2781 
2782 static int
geneve_transmit(struct ifnet * ifp,struct mbuf * m)2783 geneve_transmit(struct ifnet *ifp, struct mbuf *m)
2784 {
2785 	struct rm_priotracker tracker;
2786 	union sockaddr_union unsa;
2787 	struct geneve_softc *sc;
2788 	struct gnv_ftable_entry *fe;
2789 	struct ifnet *mcifp;
2790 	struct ether_header *eh;
2791 	uint32_t af;
2792 	int error;
2793 
2794 	mcifp = NULL;
2795 	sc = ifp->if_softc;
2796 	GENEVE_RLOCK(sc, &tracker);
2797 	M_SETFIB(m, sc->gnv_fibnum);
2798 
2799 	if ((sc->gnv_flags & GENEVE_FLAG_RUNNING) == 0) {
2800 		GENEVE_RUNLOCK(sc, &tracker);
2801 		m_freem(m);
2802 		return (ENETDOWN);
2803 	}
2804 	if (__predict_false(if_tunnel_check_nesting(ifp, m,
2805 	    MTAG_GENEVE_LOOP, 1) != 0)) {
2806 		GENEVE_RUNLOCK(sc, &tracker);
2807 		m_freem(m);
2808 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2809 		return (ELOOP);
2810 	}
2811 
2812 	if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
2813 		fe = NULL;
2814 		eh = mtod(m, struct ether_header *);
2815 
2816 		ETHER_BPF_MTAP(ifp, m);
2817 		if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
2818 			fe = geneve_ftable_entry_lookup(sc, eh->ether_dhost);
2819 		if (fe == NULL)
2820 			fe = &sc->gnv_default_fe;
2821 		geneve_sockaddr_copy(&unsa, &fe->gnvfe_raddr.sa);
2822 	} else
2823 		geneve_sockaddr_copy(&unsa, &sc->gnv_dst_addr.sa);
2824 
2825 	af = unsa.sa.sa_family;
2826 	if (geneve_check_multicast_addr(&unsa) != 0)
2827 		mcifp = geneve_multicast_if_ref(sc, af);
2828 
2829 	GENEVE_ACQUIRE(sc);
2830 	GENEVE_RUNLOCK(sc, &tracker);
2831 
2832 	switch (af) {
2833 #ifdef INET
2834 	case AF_INET:
2835 		error = geneve_encap4(sc, &unsa, m);
2836 		break;
2837 #endif
2838 #ifdef INET6
2839 	case AF_INET6:
2840 		error = geneve_encap6(sc, &unsa, m);
2841 		break;
2842 #endif
2843 	default:
2844 		error = EAFNOSUPPORT;
2845 	}
2846 
2847 	geneve_release(sc);
2848 	if (mcifp != NULL)
2849 		if_rele(mcifp);
2850 
2851 	return (error);
2852 }
2853 
2854 static int
geneve_output(struct ifnet * ifp,struct mbuf * m,const struct sockaddr * dst,struct route * ro)2855 geneve_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
2856     struct route *ro)
2857 {
2858 	uint32_t af;
2859 	int error;
2860 
2861 #ifdef MAC
2862 	error = mac_ifnet_check_transmit(ifp, m);
2863 	if (error) {
2864 		m_freem(m);
2865 		return (error);
2866 	}
2867 #endif
2868 
2869 	/* BPF writes need to be handled specially. */
2870 	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
2871 		memmove(&af, dst->sa_data, sizeof(af));
2872 	else
2873 		af = RO_GET_FAMILY(ro, dst);
2874 
2875 	BPF_MTAP2(ifp, &af, sizeof(af), m);
2876 	error = (ifp->if_transmit)(ifp, m);
2877 	if (error)
2878 		return (ENOBUFS);
2879 	return (0);
2880 }
2881 
2882 static int
geneve_next_option(struct geneve_socket * gnvso,struct genevehdr * gnvh,struct mbuf ** m0)2883 geneve_next_option(struct geneve_socket *gnvso, struct genevehdr *gnvh,
2884 	struct mbuf **m0)
2885 {
2886 	int optlen, error;
2887 
2888 	error = 0;
2889 	/*
2890 	 * We MUST NOT forward the packet if control (O) bit is set
2891 	 * and currently there is not standard specification for it.
2892 	 * Therefore, we drop it.
2893 	 */
2894 	if (gnvh->geneve_control)
2895 		return (EINVAL);
2896 
2897 	optlen = gnvh->geneve_optlen;
2898 	if (optlen == 0)
2899 		return (error);
2900 
2901 	/*
2902 	 * XXX: Geneve options processing
2903 	 * We MUST drop the packet if there are options to process
2904 	 * and we are not able to process it.
2905 	 */
2906 	if (gnvh->geneve_critical)
2907 		error = EINVAL;
2908 
2909 	return (error);
2910 }
2911 
2912 static void
geneve_qflush(struct ifnet * ifp __unused)2913 geneve_qflush(struct ifnet *ifp __unused)
2914 {
2915 }
2916 
2917 static void
geneve_input_csum(struct mbuf * m,struct ifnet * ifp,counter_u64_t rxcsum)2918 geneve_input_csum(struct mbuf *m, struct ifnet *ifp, counter_u64_t rxcsum)
2919 {
2920 	uint32_t csum_flags;
2921 
2922 	if ((((ifp->if_capenable & IFCAP_RXCSUM) != 0 &&
2923 	    (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) != 0) ||
2924 	    ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0 &&
2925 	    (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) == 0))) {
2926 		csum_flags = 0;
2927 
2928 		if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)
2929 			csum_flags |= CSUM_L3_CALC;
2930 		if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID)
2931 			csum_flags |= CSUM_L3_VALID;
2932 		if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC)
2933 			csum_flags |= CSUM_L4_CALC;
2934 		if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID)
2935 			csum_flags |= CSUM_L4_VALID;
2936 		m->m_pkthdr.csum_flags = csum_flags;
2937 		counter_u64_add(rxcsum, 1);
2938 	} else {
2939 		/* clear everything */
2940 		m->m_pkthdr.csum_flags = 0;
2941 		m->m_pkthdr.csum_data = 0;
2942 	}
2943 }
2944 
2945 static uint32_t
geneve_map_etype_to_af(uint32_t ethertype)2946 geneve_map_etype_to_af(uint32_t ethertype)
2947 {
2948 
2949 	if (ethertype == ETHERTYPE_IP)
2950 		return (AF_INET);
2951 	if (ethertype == ETHERTYPE_IPV6)
2952 		return (AF_INET6);
2953 	if (ethertype == ETHERTYPE_ARP)
2954 		return (AF_LINK);
2955 	return (0);
2956 }
2957 
2958 static bool
geneve_udp_input(struct mbuf * m,int offset,struct inpcb * inpcb,const struct sockaddr * srcsa,void * xgnvso)2959 geneve_udp_input(struct mbuf *m, int offset, struct inpcb *inpcb,
2960     const struct sockaddr *srcsa, void *xgnvso)
2961 {
2962 	struct geneve_socket *gnvso;
2963 	struct geneve_pkt_info info;
2964 	struct genevehdr *gnvh, gnvhdr;
2965 	struct geneve_softc *sc;
2966 	struct ip *iphdr;
2967 	struct ip6_hdr *ip6hdr;
2968 	struct ifnet *ifp;
2969 	int32_t plen, af;
2970 	uint32_t vni;
2971 	uint16_t optlen, proto;
2972 	int error;
2973 
2974 	M_ASSERTPKTHDR(m);
2975 	plen = m->m_pkthdr.len;
2976 	gnvso = xgnvso;
2977 
2978 	if (m->m_pkthdr.len < offset + sizeof(struct geneveudphdr))
2979 		return (false);
2980 
2981 	/* Get ECN and TTL values for future processing */
2982 	memset(&info, 0, sizeof(info));
2983 	info.ethertype = geneve_get_ethertype(m);
2984 	if (info.ethertype == ETHERTYPE_IP) {
2985 		iphdr = mtodo(m, offset - sizeof(struct ip));
2986 		info.ecn = (iphdr->ip_tos & IPTOS_ECN_MASK);
2987 		info.ttl = iphdr->ip_ttl;
2988 	} else if (info.ethertype == ETHERTYPE_IPV6) {
2989 		ip6hdr = mtodo(m, offset - sizeof(struct ip6_hdr));
2990 		info.ecn = IPV6_ECN(ip6hdr);
2991 		info.ttl = ip6hdr->ip6_hlim;
2992 	}
2993 
2994 	/* Get geneve header */
2995 	offset += sizeof(struct udphdr);
2996 	if (__predict_false(m->m_len < offset + sizeof(struct genevehdr))) {
2997 		m_copydata(m, offset, sizeof(struct genevehdr), (caddr_t)&gnvhdr);
2998 		gnvh = &gnvhdr;
2999 	} else
3000 		gnvh = mtodo(m, offset);
3001 
3002 	/*
3003 	 * Drop if there is a reserved bit or unknown version set in the header.
3004 	 * As defined in RFC 8926 3.4
3005 	 */
3006 	if (gnvh->geneve_ver != htons(GENEVE_VERSION) ||
3007 	    gnvh->geneve_vni & ~GENEVE_VNI_MASK)
3008 		return (false);
3009 
3010 	/*
3011 	 * The length of the option fields, expressed in 4-byte multiples, not
3012 	 * including the 8-byte fixed tunnel header.
3013 	 */
3014 	optlen = ntohs(gnvh->geneve_optlen) * 4;
3015 	error = geneve_next_option(gnvso, gnvh, &m);
3016 	if (error != 0)
3017 		return (false);
3018 
3019 	vni = ntohl(gnvh->geneve_vni) >> GENEVE_HDR_VNI_SHIFT;
3020 	sc = geneve_socket_lookup_softc(gnvso, vni);
3021 	if (sc == NULL)
3022 		return (false);
3023 
3024 	if ((sc->gnv_flags & GENEVE_FLAG_RUNNING) == 0)
3025 		goto out;
3026 
3027 	proto = ntohs(gnvh->geneve_proto);
3028 	m_adj(m, offset + sizeof(struct genevehdr) + optlen);
3029 
3030 	/* if next protocol is ethernet, check its ethertype and learn it */
3031 	if (proto == GENEVE_PROTO_ETHER) {
3032 		offset = ETHER_HDR_LEN;
3033 		error = geneve_input_ether(sc, &m, srcsa, &info);
3034 		if (error != 0)
3035 			goto out;
3036 	} else {
3037 		info.ethertype = proto;
3038 		af = geneve_map_etype_to_af(info.ethertype);
3039 		offset = 0;
3040 	}
3041 
3042 	error = geneve_input_inherit(sc, &m, offset, &info);
3043 	if (error != 0)
3044 		goto out;
3045 
3046 	ifp = sc->gnv_ifp;
3047 	if (ifp == m->m_pkthdr.rcvif)
3048 		/* XXX Does not catch more complex loops. */
3049 		goto out;
3050 
3051 	m_clrprotoflags(m);
3052 	m->m_pkthdr.rcvif = ifp;
3053 	M_SETFIB(m, ifp->if_fib);
3054 	geneve_input_csum(m, ifp, sc->gnv_stats.rxcsum);
3055 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
3056 	if_inc_counter(ifp, IFCOUNTER_IBYTES, plen);
3057 	if (sc->gnv_mc_ifp != NULL)
3058 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
3059 
3060 	MPASS(m != NULL);
3061 
3062 	if (proto == GENEVE_PROTO_ETHER)
3063 		(*ifp->if_input)(ifp, m);
3064 	else {
3065 		BPF_MTAP2(ifp, &af, sizeof(af), m);
3066 		netisr_dispatch_src(info.isr, (uintptr_t)xgnvso, m);
3067 	}
3068 
3069 	m = NULL;
3070 out:
3071 	geneve_release(sc);
3072 	if (m != NULL) {
3073 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
3074 		m_freem(m);
3075 	}
3076 
3077 	return (true);
3078 }
3079 
3080 static int
geneve_input_ether(struct geneve_softc * sc,struct mbuf ** m0,const struct sockaddr * sa,struct geneve_pkt_info * info)3081 geneve_input_ether(struct geneve_softc *sc, struct mbuf **m0,
3082     const struct sockaddr *sa, struct geneve_pkt_info *info)
3083 {
3084 	struct mbuf *m;
3085 	struct ether_header *eh;
3086 
3087 	m = *m0;
3088 
3089 	if (sc->gnv_proto != GENEVE_PROTO_ETHER)
3090 		return (EPROTOTYPE);
3091 
3092 	if (m->m_pkthdr.len < ETHER_HDR_LEN)
3093 		return (EINVAL);
3094 
3095 	if (m->m_len < ETHER_HDR_LEN &&
3096 	    (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
3097 		*m0 = NULL;
3098 		return (ENOBUFS);
3099 	}
3100 
3101 	eh = mtod(m, struct ether_header *);
3102 	info->ethertype = ntohs(eh->ether_type);
3103 	if (sc->gnv_flags & GENEVE_FLAG_LEARN)
3104 		geneve_ftable_learn(sc, sa, eh->ether_shost);
3105 
3106 	*m0 = m;
3107 	return (0);
3108 }
3109 
3110 static int
geneve_input_inherit(struct geneve_softc * sc,struct mbuf ** m0,int offset,struct geneve_pkt_info * info)3111 geneve_input_inherit(struct geneve_softc *sc, struct mbuf **m0,
3112     int offset, struct geneve_pkt_info *info)
3113 {
3114 	struct mbuf *m;
3115 	struct ip *iphdr;
3116 	struct ip6_hdr *ip6hdr;
3117 	uint8_t itos;
3118 
3119 	m = *m0;
3120 
3121 	switch (info->ethertype) {
3122 	case ETHERTYPE_IP:
3123 		offset += sizeof(struct ip);
3124 		if (m->m_pkthdr.len < offset)
3125 			return (EINVAL);
3126 
3127 		if (m->m_len < offset &&
3128 		    (m = m_pullup(m, offset)) == NULL) {
3129 			*m0 = NULL;
3130 			return (ENOBUFS);
3131 		}
3132 		iphdr = mtodo(m, offset - sizeof(struct ip));
3133 
3134 		if (ip_ecn_egress(ECN_COMPLETE, &info->ecn, &iphdr->ip_tos) == 0) {
3135 			*m0 = NULL;
3136 			return (ENOBUFS);
3137 		}
3138 
3139 		if ((sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT) != 0 && info->ttl > 0)
3140 			iphdr->ip_ttl = info->ttl;
3141 
3142 		info->isr = NETISR_IP;
3143 		break;
3144 
3145 	case ETHERTYPE_IPV6:
3146 		offset += sizeof(struct ip6_hdr);
3147 		if (m->m_pkthdr.len < offset)
3148 			return (EINVAL);
3149 
3150 		if (m->m_len < offset &&
3151 		    (m = m_pullup(m, offset)) == NULL) {
3152 			*m0 = NULL;
3153 			return (ENOBUFS);
3154 		}
3155 		ip6hdr = mtodo(m, offset - sizeof(struct ip6_hdr));
3156 
3157 		itos = (ntohl(ip6hdr->ip6_flow) >> IPV6_FLOWLABEL_LEN) & 0xff;
3158 		if (ip_ecn_egress(ECN_COMPLETE, &info->ecn, &itos) == 0) {
3159 			*m0 = NULL;
3160 			return (ENOBUFS);
3161 		}
3162 		ip6hdr->ip6_flow |= htonl((uint32_t)itos << IPV6_FLOWLABEL_LEN);
3163 
3164 		if ((sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT) && (info->ttl > 0))
3165 			ip6hdr->ip6_hlim = info->ttl;
3166 
3167 		info->isr = NETISR_IPV6;
3168 		break;
3169 
3170 	case ETHERTYPE_ARP:
3171 		if (sc->gnv_proto == GENEVE_PROTO_INHERIT)
3172 			return (EINVAL);
3173 
3174 		offset += sizeof(struct arphdr);
3175 		if (m->m_pkthdr.len < offset)
3176 			return (EINVAL);
3177 
3178 		if (m->m_len < offset &&
3179 		    (m = m_pullup(m, offset)) == NULL) {
3180 			*m0 = NULL;
3181 			return (ENOBUFS);
3182 		}
3183 		info->isr = NETISR_ARP;
3184 		break;
3185 
3186 	default:
3187 		if_inc_counter(sc->gnv_ifp, IFCOUNTER_NOPROTO, 1);
3188 		return (EINVAL);
3189 	}
3190 
3191 	*m0 = m;
3192 	return (0);
3193 }
3194 
3195 static void
geneve_stats_alloc(struct geneve_softc * sc)3196 geneve_stats_alloc(struct geneve_softc *sc)
3197 {
3198 	struct geneve_statistics *stats = &sc->gnv_stats;
3199 
3200 	stats->txcsum = counter_u64_alloc(M_WAITOK);
3201 	stats->tso = counter_u64_alloc(M_WAITOK);
3202 	stats->rxcsum = counter_u64_alloc(M_WAITOK);
3203 }
3204 
3205 static void
geneve_stats_free(struct geneve_softc * sc)3206 geneve_stats_free(struct geneve_softc *sc)
3207 {
3208 	struct geneve_statistics *stats = &sc->gnv_stats;
3209 
3210 	counter_u64_free(stats->txcsum);
3211 	counter_u64_free(stats->tso);
3212 	counter_u64_free(stats->rxcsum);
3213 }
3214 
3215 static void
geneve_set_default_config(struct geneve_softc * sc)3216 geneve_set_default_config(struct geneve_softc *sc)
3217 {
3218 
3219 	sc->gnv_flags |= GENEVE_FLAG_LEARN;
3220 
3221 	sc->gnv_vni = GENEVE_VNI_MAX;
3222 	sc->gnv_ttl = V_ip_defttl;
3223 
3224 	sc->gnv_src_addr.sin.sin_port = htons(GENEVE_UDPPORT);
3225 	sc->gnv_dst_addr.sin.sin_port = htons(GENEVE_UDPPORT);
3226 
3227 	/*
3228 	 * RFC 8926 Section 3.3, the entire 16-bit range MAY
3229 	 * be used to maximize entropy.
3230 	 */
3231 	sc->gnv_min_port = V_ipport_firstauto;
3232 	sc->gnv_max_port = V_ipport_lastauto;
3233 
3234 	sc->gnv_proto = GENEVE_PROTO_ETHER;
3235 
3236 	sc->gnv_ftable_max = GENEVE_FTABLE_MAX;
3237 	sc->gnv_ftable_timeout = GENEVE_FTABLE_TIMEOUT;
3238 }
3239 
3240 static int
geneve_set_reqcap(struct geneve_softc * sc,struct ifnet * ifp,int reqcap,int reqcap2)3241 geneve_set_reqcap(struct geneve_softc *sc, struct ifnet *ifp, int reqcap,
3242     int reqcap2)
3243 {
3244 	int mask = reqcap ^ ifp->if_capenable;
3245 
3246 	/* Disable TSO if tx checksums are disabled. */
3247 	if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) &&
3248 	    reqcap & IFCAP_TSO4) {
3249 		reqcap &= ~IFCAP_TSO4;
3250 		if_printf(ifp, "tso4 disabled due to -txcsum.\n");
3251 	}
3252 	if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) &&
3253 	    reqcap & IFCAP_TSO6) {
3254 		reqcap &= ~IFCAP_TSO6;
3255 		if_printf(ifp, "tso6 disabled due to -txcsum6.\n");
3256 	}
3257 
3258 	/* Do not enable TSO if tx checksums are disabled. */
3259 	if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 &&
3260 	    !(reqcap & IFCAP_TXCSUM)) {
3261 		if_printf(ifp, "enable txcsum first.\n");
3262 		return (EAGAIN);
3263 	}
3264 	if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 &&
3265 	    !(reqcap & IFCAP_TXCSUM_IPV6)) {
3266 		if_printf(ifp, "enable txcsum6 first.\n");
3267 		return (EAGAIN);
3268 	}
3269 
3270 	sc->gnv_reqcap = reqcap;
3271 	sc->gnv_reqcap2 = reqcap2;
3272 	return (0);
3273 }
3274 
3275 /*
3276  * A GENEVE interface inherits the capabilities of the genevedev or the interface
3277  * hosting the genevelocal address.
3278  */
3279 static void
geneve_set_hwcaps(struct geneve_softc * sc)3280 geneve_set_hwcaps(struct geneve_softc *sc)
3281 {
3282 	struct epoch_tracker et;
3283 	struct ifnet *p, *ifp;
3284 	struct ifaddr *ifa;
3285 	u_long hwa;
3286 	int cap, ena;
3287 	bool rel;
3288 
3289 	/* reset caps */
3290 	ifp = sc->gnv_ifp;
3291 	ifp->if_capabilities &= GENEVE_BASIC_IFCAPS;
3292 	ifp->if_capenable &= GENEVE_BASIC_IFCAPS;
3293 	ifp->if_hwassist = 0;
3294 
3295 	NET_EPOCH_ENTER(et);
3296 	CURVNET_SET(ifp->if_vnet);
3297 
3298 	p = NULL;
3299 	rel = false;
3300 	if (sc->gnv_mc_ifname[0] != '\0') {
3301 		rel = true;
3302 		p = ifunit_ref(sc->gnv_mc_ifname);
3303 	} else if (geneve_sockaddr_in_any(&sc->gnv_src_addr) == 0) {
3304 		if (sc->gnv_src_addr.sa.sa_family == AF_INET) {
3305 			struct sockaddr_in in4 = sc->gnv_src_addr.sin;
3306 
3307 			in4.sin_port = 0;
3308 			ifa = ifa_ifwithaddr((struct sockaddr *)&in4);
3309 			if (ifa != NULL)
3310 				p = ifa->ifa_ifp;
3311 		} else if (sc->gnv_src_addr.sa.sa_family == AF_INET6) {
3312 			struct sockaddr_in6 in6 = sc->gnv_src_addr.sin6;
3313 
3314 			in6.sin6_port = 0;
3315 			ifa = ifa_ifwithaddr((struct sockaddr *)&in6);
3316 			if (ifa != NULL)
3317 				p = ifa->ifa_ifp;
3318 		}
3319 	}
3320 	if (p == NULL) {
3321 		CURVNET_RESTORE();
3322 		NET_EPOCH_EXIT(et);
3323 		return;
3324 	}
3325 
3326 	cap = ena = hwa = 0;
3327 
3328 	/* checksum offload */
3329 	if ((p->if_capabilities2 & IFCAP2_BIT(IFCAP2_GENEVE_HWCSUM)) != 0)
3330 		cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
3331 	if ((p->if_capenable2 & IFCAP2_BIT(IFCAP2_GENEVE_HWCSUM)) != 0) {
3332 		ena |= sc->gnv_reqcap & p->if_capenable & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
3333 		if (ena & IFCAP_TXCSUM) {
3334 			if (p->if_hwassist & CSUM_INNER_IP)
3335 				hwa |= CSUM_IP;
3336 			if (p->if_hwassist & CSUM_INNER_IP_UDP)
3337 				hwa |= CSUM_IP_UDP;
3338 			if (p->if_hwassist & CSUM_INNER_IP_TCP)
3339 				hwa |= CSUM_IP_TCP;
3340 		}
3341 		if (ena & IFCAP_TXCSUM_IPV6) {
3342 			if (p->if_hwassist & CSUM_INNER_IP6_UDP)
3343 				hwa |= CSUM_IP6_UDP;
3344 			if (p->if_hwassist & CSUM_INNER_IP6_TCP)
3345 				hwa |= CSUM_IP6_TCP;
3346 		}
3347 	}
3348 
3349 	/* hardware TSO */
3350 	if ((p->if_capabilities2 & IFCAP2_BIT(IFCAP2_GENEVE_HWTSO)) != 0) {
3351 		cap |= p->if_capabilities & IFCAP_TSO;
3352 		if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen)
3353 			ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen;
3354 		else
3355 			ifp->if_hw_tsomax = p->if_hw_tsomax;
3356 		ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1;
3357 		ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize;
3358 	}
3359 	if ((p->if_capenable2 & IFCAP2_BIT(IFCAP2_GENEVE_HWTSO)) != 0) {
3360 		ena |= sc->gnv_reqcap & p->if_capenable & IFCAP_TSO;
3361 		if (ena & IFCAP_TSO) {
3362 			if (p->if_hwassist & CSUM_INNER_IP_TSO)
3363 				hwa |= CSUM_IP_TSO;
3364 			if (p->if_hwassist & CSUM_INNER_IP6_TSO)
3365 				hwa |= CSUM_IP6_TSO;
3366 		}
3367 	}
3368 
3369 	ifp->if_capabilities |= cap;
3370 	ifp->if_capenable |= ena;
3371 	ifp->if_hwassist |= hwa;
3372 	if (rel)
3373 		if_rele(p);
3374 
3375 	CURVNET_RESTORE();
3376 	NET_EPOCH_EXIT(et);
3377 }
3378 
3379 static int
geneve_clone_create_nl(struct if_clone * ifc,char * name,size_t len,struct ifc_data_nl * ifd)3380 geneve_clone_create_nl(struct if_clone *ifc, char *name, size_t len,
3381     struct ifc_data_nl *ifd)
3382 {
3383 	struct nl_parsed_link *lattrs = ifd->lattrs;
3384 	struct nl_pstate *npt = ifd->npt;
3385 	struct nl_parsed_geneve attrs = {};
3386 	int error;
3387 
3388 	if ((lattrs->ifla_idata == NULL) ||
3389 	    (!nl_has_attr(ifd->bm, IFLA_LINKINFO))) {
3390 		nlmsg_report_err_msg(npt, "geneve protocol is required");
3391 		return (ENOTSUP);
3392 	}
3393 
3394 	error = nl_parse_nested(lattrs->ifla_idata, &geneve_create_parser, npt, &attrs);
3395 	if (error != 0)
3396 		return (error);
3397 	if (geneve_check_proto(attrs.ifla_proto)) {
3398 		nlmsg_report_err_msg(npt, "Unsupported ethertype: 0x%04X", attrs.ifla_proto);
3399 		return (ENOTSUP);
3400 	}
3401 
3402 	struct geneve_params gnvp = { .ifla_proto = attrs.ifla_proto };
3403 	struct ifc_data ifd_new = {
3404 		.flags = IFC_F_SYSSPACE,
3405 		.unit = ifd->unit,
3406 		.params = &gnvp
3407 	};
3408 
3409 	return (geneve_clone_create(ifc, name, len, &ifd_new, &ifd->ifp));
3410 }
3411 
3412 static int
geneve_clone_modify_nl(struct ifnet * ifp,struct ifc_data_nl * ifd)3413 geneve_clone_modify_nl(struct ifnet *ifp, struct ifc_data_nl *ifd)
3414 {
3415 	struct geneve_softc *sc = ifp->if_softc;
3416 	struct nl_parsed_link *lattrs = ifd->lattrs;
3417 	struct nl_pstate *npt = ifd->npt;
3418 	struct nl_parsed_geneve params;
3419 	struct nlattr *attrs = lattrs->ifla_idata;
3420 	struct nlattr_bmask bm;
3421 	int error = 0;
3422 
3423 	if ((attrs == NULL) ||
3424 	    (nl_has_attr(ifd->bm, IFLA_LINKINFO) == 0)) {
3425 		error = nl_modify_ifp_generic(ifp, lattrs, ifd->bm, npt);
3426 		return (error);
3427 	}
3428 
3429 	error = priv_check(curthread, PRIV_NET_GENEVE);
3430 	if (error)
3431 		return (error);
3432 
3433 	/* make sure ignored attributes by nl_parse will not cause panics */
3434 	memset(&params, 0, sizeof(params));
3435 
3436 	nl_get_attrs_bmask_raw(NLA_DATA(attrs), NLA_DATA_LEN(attrs), &bm);
3437 	error = nl_parse_nested(attrs, &geneve_modify_parser, npt, &params);
3438 
3439 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_ID))
3440 		error = geneve_set_vni_nl(sc, npt, params.ifla_vni);
3441 
3442 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_LOCAL))
3443 		error = geneve_set_local_addr_nl(sc, npt, params.ifla_local);
3444 
3445 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_REMOTE))
3446 		error = geneve_set_remote_addr_nl(sc, npt, params.ifla_remote);
3447 
3448 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_LOCAL_PORT))
3449 		error = geneve_set_local_port_nl(sc, npt, params.ifla_local_port);
3450 
3451 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_PORT))
3452 		error = geneve_set_remote_port_nl(sc, npt, params.ifla_remote_port);
3453 
3454 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_PORT_RANGE))
3455 		error = geneve_set_port_range_nl(sc, npt, params.ifla_port_range);
3456 
3457 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_DF))
3458 		error = geneve_set_df_nl(sc, npt, params.ifla_df);
3459 
3460 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_TTL))
3461 		error = geneve_set_ttl_nl(sc, npt, params.ifla_ttl);
3462 
3463 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_TTL_INHERIT))
3464 		error = geneve_set_ttl_inherit_nl(sc, npt, params.ifla_ttl_inherit);
3465 
3466 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_DSCP_INHERIT))
3467 		error = geneve_set_dscp_inherit_nl(sc, npt, params.ifla_dscp_inherit);
3468 
3469 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_COLLECT_METADATA))
3470 		error = geneve_set_collect_metadata_nl(sc, npt, params.ifla_external);
3471 
3472 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_FTABLE_LEARN))
3473 		error = geneve_set_learn_nl(sc, npt, params.ifla_ftable_learn);
3474 
3475 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_FTABLE_FLUSH))
3476 		error = geneve_flush_ftable_nl(sc, npt, params.ifla_ftable_flush);
3477 
3478 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_FTABLE_MAX))
3479 		error = geneve_set_ftable_max_nl(sc, npt, params.ifla_ftable_max);
3480 
3481 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_FTABLE_TIMEOUT))
3482 		error = geneve_set_ftable_timeout_nl(sc, npt, params.ifla_ftable_timeout);
3483 
3484 	if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_MC_IFNAME))
3485 		error = geneve_set_mc_if_nl(sc, npt, params.ifla_mc_ifname);
3486 
3487 	if (error == 0)
3488 		error = nl_modify_ifp_generic(ifp, lattrs, ifd->bm, npt);
3489 
3490 	return (error);
3491 }
3492 
3493 static void
geneve_clone_dump_nl(struct ifnet * ifp,struct nl_writer * nw)3494 geneve_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw)
3495 {
3496 	struct geneve_softc *sc;
3497 	struct rm_priotracker tracker;
3498 	int off, off2;
3499 
3500 	nlattr_add_u32(nw, IFLA_LINK, ifp->if_index);
3501 	nlattr_add_string(nw, IFLA_IFNAME, ifp->if_xname);
3502 
3503 	off = nlattr_add_nested(nw, IFLA_LINKINFO);
3504 	if (off == 0)
3505 		return;
3506 
3507 	nlattr_add_string(nw, IFLA_INFO_KIND, "geneve");
3508 	off2 = nlattr_add_nested(nw, IFLA_INFO_DATA);
3509 	if (off2 == 0) {
3510 		nlattr_set_len(nw, off);
3511 		return;
3512 	}
3513 
3514 	sc = ifp->if_softc;
3515 	GENEVE_RLOCK(sc, &tracker);
3516 
3517 	nlattr_add_u32(nw, IFLA_GENEVE_ID, sc->gnv_vni);
3518 	nlattr_add_u16(nw, IFLA_GENEVE_PROTOCOL, sc->gnv_proto);
3519 	geneve_get_local_addr_nl(sc, nw);
3520 	geneve_get_remote_addr_nl(sc, nw);
3521 	nlattr_add_u16(nw, IFLA_GENEVE_LOCAL_PORT, geneve_get_local_port(sc));
3522 	nlattr_add_u16(nw, IFLA_GENEVE_PORT, geneve_get_remote_port(sc));
3523 
3524 	const struct ifla_geneve_port_range port_range = {
3525 		.low = sc->gnv_min_port,
3526 		.high = sc->gnv_max_port
3527 	};
3528 	nlattr_add(nw, IFLA_GENEVE_PORT_RANGE, sizeof(port_range), &port_range);
3529 
3530 	nlattr_add_u8(nw, IFLA_GENEVE_DF, (uint8_t)sc->gnv_df);
3531 	nlattr_add_u8(nw, IFLA_GENEVE_TTL, sc->gnv_ttl);
3532 	nlattr_add_bool(nw, IFLA_GENEVE_TTL_INHERIT,
3533 	    sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT);
3534 	nlattr_add_bool(nw, IFLA_GENEVE_DSCP_INHERIT,
3535 	    sc->gnv_flags & GENEVE_FLAG_DSCP_INHERIT);
3536 	nlattr_add_bool(nw, IFLA_GENEVE_COLLECT_METADATA,
3537 	    sc->gnv_flags & GENEVE_FLAG_COLLECT_METADATA);
3538 
3539 	nlattr_add_bool(nw, IFLA_GENEVE_FTABLE_LEARN,
3540 	    sc->gnv_flags & GENEVE_FLAG_LEARN);
3541 	nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_MAX, sc->gnv_ftable_max);
3542 	nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_TIMEOUT, sc->gnv_ftable_timeout);
3543 	nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_COUNT, sc->gnv_ftable_cnt);
3544 	nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_NOSPACE_CNT, sc->gnv_stats.ftable_nospace);
3545 	nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_LOCK_UP_FAIL_CNT,
3546 	    sc->gnv_stats.ftable_lock_upgrade_failed);
3547 
3548 	nlattr_add_string(nw, IFLA_GENEVE_MC_IFNAME, sc->gnv_mc_ifname);
3549 	nlattr_add_u32(nw, IFLA_GENEVE_MC_IFINDEX, sc->gnv_mc_ifindex);
3550 
3551 	nlattr_add_u64(nw, IFLA_GENEVE_TXCSUM_CNT,
3552 	    counter_u64_fetch(sc->gnv_stats.txcsum));
3553 	nlattr_add_u64(nw, IFLA_GENEVE_TSO_CNT,
3554 	    counter_u64_fetch(sc->gnv_stats.tso));
3555 	nlattr_add_u64(nw, IFLA_GENEVE_RXCSUM_CNT,
3556 	    counter_u64_fetch(sc->gnv_stats.rxcsum));
3557 
3558 	nlattr_set_len(nw, off2);
3559 	nlattr_set_len(nw, off);
3560 
3561 	GENEVE_RUNLOCK(sc, &tracker);
3562 }
3563 
3564 static int
geneve_clone_create(struct if_clone * ifc,char * name,size_t len,struct ifc_data * ifd,struct ifnet ** ifpp)3565 geneve_clone_create(struct if_clone *ifc, char *name, size_t len,
3566     struct ifc_data *ifd, struct ifnet **ifpp)
3567 {
3568 	struct geneve_softc *sc;
3569 	struct geneve_params gnvp;
3570 	struct ifnet *ifp;
3571 	int error;
3572 
3573 	sc = malloc(sizeof(struct geneve_softc), M_GENEVE, M_WAITOK | M_ZERO);
3574 	sc->gnv_fibnum = curthread->td_proc->p_fibnum;
3575 	geneve_set_default_config(sc);
3576 
3577 	if (ifd != NULL) {
3578 		error = ifc_copyin(ifd, &gnvp, sizeof(gnvp));
3579 		if (error != 0 ||
3580 		    (error = geneve_check_proto(gnvp.ifla_proto)) != 0) {
3581 			free(sc, M_GENEVE);
3582 			return (error);
3583 		}
3584 
3585 		sc->gnv_proto = gnvp.ifla_proto;
3586 	}
3587 
3588 	if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
3589 		ifp = if_alloc(IFT_ETHER);
3590 		ifp->if_flags |= IFF_SIMPLEX | IFF_BROADCAST;
3591 		geneve_ftable_init(sc);
3592 		callout_init_rw(&sc->gnv_callout, &sc->gnv_lock, 0);
3593 	} else if (sc->gnv_proto == GENEVE_PROTO_INHERIT) {
3594 		ifp = if_alloc(IFT_TUNNEL);
3595 		ifp->if_flags |= IFF_NOARP;
3596 	} else {
3597 		free(sc, M_GENEVE);
3598 		return (EINVAL);
3599 	}
3600 
3601 	geneve_stats_alloc(sc);
3602 	sc->gnv_ifp = ifp;
3603 	rm_init(&sc->gnv_lock, "geneverm");
3604 	sc->gnv_port_hash_key = arc4random();
3605 
3606 	ifp->if_softc = sc;
3607 	if_initname(ifp, geneve_name, ifd->unit);
3608 	ifp->if_flags |= IFF_MULTICAST;
3609 	ifp->if_init = geneve_init;
3610 	ifp->if_ioctl = geneve_ioctl;
3611 	ifp->if_transmit = geneve_transmit;
3612 	ifp->if_qflush = geneve_qflush;
3613 	ifp->if_capabilities = GENEVE_BASIC_IFCAPS;
3614 	ifp->if_capenable = GENEVE_BASIC_IFCAPS;
3615 	sc->gnv_reqcap = -1;
3616 	geneve_set_hwcaps(sc);
3617 
3618 	if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
3619 		ifmedia_init(&sc->gnv_media, 0, geneve_media_change, geneve_media_status);
3620 		ifmedia_add(&sc->gnv_media, IFM_ETHER | IFM_AUTO, 0, NULL);
3621 		ifmedia_set(&sc->gnv_media, IFM_ETHER | IFM_AUTO);
3622 
3623 		ether_gen_addr(ifp, &sc->gnv_hwaddr);
3624 		ether_ifattach(ifp, sc->gnv_hwaddr.octet);
3625 
3626 		ifp->if_baudrate = 0;
3627 	} else {
3628 		ifp->if_output = geneve_output;
3629 
3630 		if_attach(ifp);
3631 		bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
3632 	}
3633 
3634 	GENEVE_WLOCK(sc);
3635 	geneve_setup_interface_hdrlen(sc);
3636 	GENEVE_WUNLOCK(sc);
3637 	*ifpp = ifp;
3638 
3639 	return (0);
3640 }
3641 
3642 static int
geneve_clone_destroy(struct if_clone * ifc,struct ifnet * ifp,uint32_t flags)3643 geneve_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
3644 {
3645 	struct geneve_softc *sc;
3646 
3647 	sc = if_getsoftc(ifp);
3648 	geneve_teardown(sc);
3649 
3650 	if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
3651 		geneve_ftable_flush(sc, 1);
3652 
3653 		ether_ifdetach(ifp);
3654 		if_free(ifp);
3655 		ifmedia_removeall(&sc->gnv_media);
3656 
3657 		geneve_ftable_fini(sc);
3658 	} else {
3659 		bpfdetach(ifp);
3660 		if_detach(ifp);
3661 		if_free(ifp);
3662 	}
3663 
3664 	rm_destroy(&sc->gnv_lock);
3665 	geneve_stats_free(sc);
3666 	free(sc, M_GENEVE);
3667 
3668 	return (0);
3669 }
3670 
3671 /* BMV: Taken from if_bridge. */
3672 static uint32_t
geneve_mac_hash(struct geneve_softc * sc,const uint8_t * addr)3673 geneve_mac_hash(struct geneve_softc *sc, const uint8_t *addr)
3674 {
3675 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->gnv_ftable_hash_key;
3676 
3677 	b += addr[5] << 8;
3678 	b += addr[4];
3679 	a += addr[3] << 24;
3680 	a += addr[2] << 16;
3681 	a += addr[1] << 8;
3682 	a += addr[0];
3683 
3684 /*
3685  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3686  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3687  */
3688 #define	mix(a, b, c)							\
3689 do {									\
3690 	a -= b; a -= c; a ^= (c >> 13);					\
3691 	b -= c; b -= a; b ^= (a << 8);					\
3692 	c -= a; c -= b; c ^= (b >> 13);					\
3693 	a -= b; a -= c; a ^= (c >> 12);					\
3694 	b -= c; b -= a; b ^= (a << 16);					\
3695 	c -= a; c -= b; c ^= (b >> 5);					\
3696 	a -= b; a -= c; a ^= (c >> 3);					\
3697 	b -= c; b -= a; b ^= (a << 10);					\
3698 	c -= a; c -= b; c ^= (b >> 15);					\
3699 } while (0)
3700 
3701 	mix(a, b, c);
3702 
3703 #undef mix
3704 
3705 	return (c);
3706 }
3707 
3708 static int
geneve_media_change(struct ifnet * ifp)3709 geneve_media_change(struct ifnet *ifp)
3710 {
3711 
3712 	/* Ignore. */
3713 	return (0);
3714 }
3715 
3716 static void
geneve_media_status(struct ifnet * ifp,struct ifmediareq * ifmr)3717 geneve_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3718 {
3719 
3720 	ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
3721 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
3722 }
3723 
3724 static int
geneve_sockaddr_cmp(const union sockaddr_union * unsa,const struct sockaddr * sa)3725 geneve_sockaddr_cmp(const union sockaddr_union *unsa,
3726     const struct sockaddr *sa)
3727 {
3728 
3729 	return (memcmp(&unsa->sa, sa, unsa->sa.sa_len));
3730 }
3731 
3732 static void
geneve_sockaddr_copy(union sockaddr_union * dst,const struct sockaddr * sa)3733 geneve_sockaddr_copy(union sockaddr_union *dst,
3734     const struct sockaddr *sa)
3735 {
3736 
3737 	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
3738 	memset(dst, 0, sizeof(*dst));
3739 
3740 	if (sa->sa_family == AF_INET) {
3741 		dst->sin = *SATOCONSTSIN(sa);
3742 		dst->sin.sin_len = sizeof(struct sockaddr_in);
3743 	} else if (sa->sa_family == AF_INET6) {
3744 		dst->sin6 = *SATOCONSTSIN6(sa);
3745 		dst->sin6.sin6_len = sizeof(struct sockaddr_in6);
3746 	}
3747 }
3748 
3749 static int
geneve_sockaddr_in_equal(const union sockaddr_union * unsa,const struct sockaddr * sa)3750 geneve_sockaddr_in_equal(const union sockaddr_union *unsa,
3751     const struct sockaddr *sa)
3752 {
3753 	int equal;
3754 
3755 	if (sa->sa_family == AF_INET) {
3756 		const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
3757 		equal = in4->s_addr == unsa->sin.sin_addr.s_addr;
3758 	} else if (sa->sa_family == AF_INET6) {
3759 		const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
3760 		equal = IN6_ARE_ADDR_EQUAL(in6, &unsa->sin6.sin6_addr);
3761 	} else
3762 		equal = 0;
3763 
3764 	return (equal);
3765 }
3766 
3767 static void
geneve_sockaddr_in_copy(union sockaddr_union * dst,const struct sockaddr * sa)3768 geneve_sockaddr_in_copy(union sockaddr_union *dst,
3769     const struct sockaddr *sa)
3770 {
3771 
3772 	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
3773 
3774 	if (sa->sa_family == AF_INET) {
3775 		const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
3776 		dst->sin.sin_family = AF_INET;
3777 		dst->sin.sin_len = sizeof(struct sockaddr_in);
3778 		dst->sin.sin_addr = *in4;
3779 	} else if (sa->sa_family == AF_INET6) {
3780 		const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
3781 		dst->sin6.sin6_family = AF_INET6;
3782 		dst->sin6.sin6_len = sizeof(struct sockaddr_in6);
3783 		dst->sin6.sin6_addr = *in6;
3784 	}
3785 }
3786 
3787 static int
geneve_sockaddr_supported(const union sockaddr_union * gnvaddr,int unspec)3788 geneve_sockaddr_supported(const union sockaddr_union *gnvaddr, int unspec)
3789 {
3790 	const struct sockaddr *sa;
3791 	int supported;
3792 
3793 	sa = &gnvaddr->sa;
3794 	supported = 0;
3795 
3796 	if (sa->sa_family == AF_UNSPEC && unspec != 0) {
3797 		supported = 1;
3798 	} else if (sa->sa_family == AF_INET) {
3799 		supported = 1;
3800 	} else if (sa->sa_family == AF_INET6) {
3801 		supported = 1;
3802 	}
3803 
3804 	return (supported);
3805 }
3806 
3807 static int
geneve_sockaddr_in_any(const union sockaddr_union * gnvaddr)3808 geneve_sockaddr_in_any(const union sockaddr_union *gnvaddr)
3809 {
3810 	const struct sockaddr *sa;
3811 	int any;
3812 
3813 	sa = &gnvaddr->sa;
3814 
3815 	if (sa->sa_family == AF_INET) {
3816 		const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
3817 		any = in4->s_addr == INADDR_ANY;
3818 	} else if (sa->sa_family == AF_INET6) {
3819 		const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
3820 		any = IN6_IS_ADDR_UNSPECIFIED(in6);
3821 	} else
3822 		any = -1;
3823 
3824 	return (any);
3825 }
3826 
3827 static int
geneve_can_change_config(struct geneve_softc * sc)3828 geneve_can_change_config(struct geneve_softc *sc)
3829 {
3830 
3831 	GENEVE_LOCK_ASSERT(sc);
3832 
3833 	if (sc->gnv_flags & GENEVE_FLAG_RUNNING)
3834 		return (0);
3835 	if (sc->gnv_flags & (GENEVE_FLAG_INIT | GENEVE_FLAG_TEARDOWN))
3836 		return (0);
3837 	if (sc->gnv_flags & GENEVE_FLAG_COLLECT_METADATA)
3838 		return (0);
3839 
3840 	return (1);
3841 }
3842 
3843 static int
geneve_check_proto(uint16_t proto)3844 geneve_check_proto(uint16_t proto)
3845 {
3846 	int error;
3847 
3848 	switch (proto) {
3849 	case GENEVE_PROTO_ETHER:
3850 	case GENEVE_PROTO_INHERIT:
3851 		error = 0;
3852 		break;
3853 
3854 	default:
3855 		error = EAFNOSUPPORT;
3856 		break;
3857 	}
3858 
3859 	return (error);
3860 }
3861 
3862 static int
geneve_check_multicast_addr(const union sockaddr_union * sa)3863 geneve_check_multicast_addr(const union sockaddr_union *sa)
3864 {
3865 	int mc;
3866 
3867 	if (sa->sa.sa_family == AF_INET) {
3868 		const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
3869 		mc = IN_MULTICAST(ntohl(in4->s_addr));
3870 	} else if (sa->sa.sa_family == AF_INET6) {
3871 		const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
3872 		mc = IN6_IS_ADDR_MULTICAST(in6);
3873 	} else
3874 		mc = EINVAL;
3875 
3876 	return (mc);
3877 }
3878 
3879 static int
geneve_check_sockaddr(const union sockaddr_union * sa,const int len)3880 geneve_check_sockaddr(const union sockaddr_union *sa, const int len)
3881 {
3882 	int error;
3883 
3884 	error = 0;
3885 	switch (sa->sa.sa_family) {
3886 	case AF_INET:
3887 	case AF_INET6:
3888 		if (len < sizeof(struct sockaddr))
3889 			error = EINVAL;
3890 		break;
3891 
3892 	default:
3893 		error = EAFNOSUPPORT;
3894 	}
3895 
3896 	return (error);
3897 }
3898 
3899 static int
geneve_prison_remove(void * obj,void * data __unused)3900 geneve_prison_remove(void *obj, void *data __unused)
3901 {
3902 #ifdef VIMAGE
3903 	struct prison *pr;
3904 
3905 	pr = obj;
3906 	if (prison_owns_vnet(pr)) {
3907 		CURVNET_SET(pr->pr_vnet);
3908 		if (V_geneve_cloner != NULL) {
3909 			ifc_detach_cloner(V_geneve_cloner);
3910 			V_geneve_cloner = NULL;
3911 		}
3912 		CURVNET_RESTORE();
3913 	}
3914 #endif
3915 	return (0);
3916 }
3917 
3918 static void
vnet_geneve_load(void)3919 vnet_geneve_load(void)
3920 {
3921 	struct if_clone_addreq_v2 req = {
3922 		.version = 2,
3923 		.flags = IFC_F_AUTOUNIT,
3924 		.match_f = NULL,
3925 		.create_f = geneve_clone_create,
3926 		.destroy_f = geneve_clone_destroy,
3927 		.create_nl_f = geneve_clone_create_nl,
3928 		.modify_nl_f = geneve_clone_modify_nl,
3929 		.dump_nl_f = geneve_clone_dump_nl,
3930 	};
3931 	V_geneve_cloner = ifc_attach_cloner(geneve_name, (struct if_clone_addreq *)&req);
3932 }
3933 VNET_SYSINIT(vnet_geneve_load, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_geneve_load, NULL);
3934 
3935 static void
vnet_geneve_unload(void)3936 vnet_geneve_unload(void)
3937 {
3938 
3939 	if (V_geneve_cloner != NULL)
3940 		ifc_detach_cloner(V_geneve_cloner);
3941 }
3942 VNET_SYSUNINIT(vnet_geneve_unload, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_geneve_unload, NULL);
3943 
3944 static void
geneve_module_init(void)3945 geneve_module_init(void)
3946 {
3947 	mtx_init(&geneve_list_mtx, "geneve list", NULL, MTX_DEF);
3948 	osd_method_t methods[PR_MAXMETHOD] = {
3949 		[PR_METHOD_REMOVE] = geneve_prison_remove,
3950 	};
3951 
3952 	geneve_osd_jail_slot = osd_jail_register(NULL, methods);
3953 	NL_VERIFY_PARSERS(all_parsers);
3954 }
3955 
3956 static void
geneve_module_deinit(void)3957 geneve_module_deinit(void)
3958 {
3959 	struct if_clone *clone;
3960 	VNET_ITERATOR_DECL(vnet_iter);
3961 
3962 	VNET_LIST_RLOCK();
3963 	VNET_FOREACH(vnet_iter) {
3964 		clone = VNET_VNET(vnet_iter, geneve_cloner);
3965 		if (clone != NULL) {
3966 			ifc_detach_cloner(clone);
3967 			VNET_VNET(vnet_iter, geneve_cloner) = NULL;
3968 		}
3969 	}
3970 	VNET_LIST_RUNLOCK();
3971 	NET_EPOCH_WAIT();
3972 	MPASS(LIST_EMPTY(&geneve_socket_list));
3973 	mtx_destroy(&geneve_list_mtx);
3974 	if (geneve_osd_jail_slot != 0)
3975 		osd_jail_deregister(geneve_osd_jail_slot);
3976 }
3977 
3978 static int
geneve_modevent(module_t mod,int type,void * unused)3979 geneve_modevent(module_t mod, int type, void *unused)
3980 {
3981 	int error;
3982 
3983 	error = 0;
3984 
3985 	switch (type) {
3986 	case MOD_LOAD:
3987 		geneve_module_init();
3988 		break;
3989 
3990 	case MOD_UNLOAD:
3991 		geneve_module_deinit();
3992 		break;
3993 
3994 	default:
3995 		error = ENOTSUP;
3996 		break;
3997 	}
3998 
3999 	return (error);
4000 }
4001 
4002 static moduledata_t geneve_mod = {
4003 	"if_geneve",
4004 	geneve_modevent,
4005 	0
4006 };
4007 
4008 DECLARE_MODULE(if_geneve, geneve_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
4009 MODULE_VERSION(if_geneve, 1);
4010