xref: /src/sys/net/if_gre.c (revision afbfc2a617ee16b4c3bafef869690b594f812690)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1998 The NetBSD Foundation, Inc.
5  * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Heiko W.Rupp <hwr@pilhuhn.de>
10  *
11  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  *
34  * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
35  */
36 
37 #include <sys/cdefs.h>
38 #include "opt_inet.h"
39 #include "opt_inet6.h"
40 #include "opt_rss.h"
41 
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/module.h>
47 #include <sys/mbuf.h>
48 #include <sys/priv.h>
49 #include <sys/proc.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sockio.h>
53 #include <sys/sx.h>
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56 #include <sys/systm.h>
57 
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_private.h>
62 #include <net/if_clone.h>
63 #include <net/if_types.h>
64 #include <net/netisr.h>
65 #include <net/vnet.h>
66 #include <net/route.h>
67 
68 #include <netinet/in.h>
69 #include <netinet/in_pcb.h>
70 #ifdef INET
71 #include <netinet/in_var.h>
72 #include <netinet/ip.h>
73 #include <netinet/ip_var.h>
74 #ifdef RSS
75 #include <netinet/in_rss.h>
76 #endif
77 #endif
78 
79 #ifdef INET6
80 #include <netinet/ip6.h>
81 #include <netinet6/in6_var.h>
82 #include <netinet6/ip6_var.h>
83 #ifdef RSS
84 #include <netinet6/in6_rss.h>
85 #endif
86 #endif
87 
88 #include <netinet/ip_encap.h>
89 #include <netinet/udp.h>
90 #include <net/bpf.h>
91 #include <net/if_gre.h>
92 
93 #include <netlink/netlink.h>
94 #include <netlink/netlink_ctl.h>
95 #include <netlink/netlink_var.h>
96 #include <netlink/netlink_route.h>
97 #include <netlink/route/route_var.h>
98 
99 #include <machine/in_cksum.h>
100 #include <security/mac/mac_framework.h>
101 
102 #define	GREMTU			1476
103 
104 static const char grename[] = "gre";
105 MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
106 
107 static struct sx gre_ioctl_sx;
108 SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
109 #define GRE_LOCK_ASSERT() sx_assert(&gre_ioctl_sx, SA_XLOCKED);
110 
111 static int	gre_clone_create(struct if_clone *, char *, size_t,
112 		    struct ifc_data *, struct ifnet **);
113 static int	gre_clone_destroy(struct if_clone *, struct ifnet *,
114 		    uint32_t);
115 static int	gre_clone_create_nl(struct if_clone *, char *, size_t,
116 		    struct ifc_data_nl *);
117 static int	gre_clone_modify_nl(struct ifnet *, struct ifc_data_nl *);
118 static void	gre_clone_dump_nl(struct ifnet *, struct nl_writer *);
119 VNET_DEFINE_STATIC(struct if_clone *, gre_cloner);
120 #define	V_gre_cloner	VNET(gre_cloner)
121 
122 #ifdef VIMAGE
123 static void	gre_reassign(struct ifnet *, struct vnet *, char *);
124 #endif
125 static void	gre_qflush(struct ifnet *);
126 static int	gre_transmit(struct ifnet *, struct mbuf *);
127 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
128 static int	gre_output(struct ifnet *, struct mbuf *,
129 		    const struct sockaddr *, struct route *);
130 static void	gre_delete_tunnel(struct gre_softc *);
131 static int	gre_set_addr_nl(struct gre_softc *, struct nl_pstate *,
132 		    struct sockaddr *, struct sockaddr *);
133 
134 static int	gre_set_flags(struct gre_softc *, uint32_t);
135 static int	gre_set_key(struct gre_softc *, uint32_t);
136 static int	gre_set_udp_sport(struct gre_softc *, uint16_t);
137 static int	gre_setopts(struct gre_softc *, u_long, uint32_t);
138 
139 static int	gre_set_flags_nl(struct gre_softc *, struct nl_pstate *, uint32_t);
140 static int	gre_set_key_nl(struct gre_softc *, struct nl_pstate *, uint32_t);
141 static int	gre_set_encap_nl(struct gre_softc *, struct nl_pstate *, uint32_t);
142 static int	gre_set_udp_sport_nl(struct gre_softc *, struct nl_pstate *, uint16_t);
143 
144 SYSCTL_DECL(_net_link);
145 static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
146     "Generic Routing Encapsulation");
147 #ifndef MAX_GRE_NEST
148 /*
149  * This macro controls the default upper limitation on nesting of gre tunnels.
150  * Since, setting a large value to this macro with a careless configuration
151  * may introduce system crash, we don't allow any nestings by default.
152  * If you need to configure nested gre tunnels, you can define this macro
153  * in your kernel configuration file.  However, if you do so, please be
154  * careful to configure the tunnels so that it won't make a loop.
155  */
156 #define MAX_GRE_NEST 1
157 #endif
158 
159 VNET_DEFINE_STATIC(int, max_gre_nesting) = MAX_GRE_NEST;
160 #define	V_max_gre_nesting	VNET(max_gre_nesting)
161 SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
162     &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
163 
164 struct nl_parsed_gre {
165 	struct sockaddr		*ifla_local;
166 	struct sockaddr		*ifla_remote;
167 	uint32_t		ifla_flags;
168 	uint32_t		ifla_okey;
169 	uint32_t		ifla_encap_type;
170 	uint16_t		ifla_encap_sport;
171 };
172 
173 #define _OUT(_field)	offsetof(struct nl_parsed_gre, _field)
174 static const struct nlattr_parser nla_p_gre[] = {
175 	{ .type = IFLA_GRE_LOCAL, .off = _OUT(ifla_local), .cb = nlattr_get_ip },
176 	{ .type = IFLA_GRE_REMOTE, .off = _OUT(ifla_remote), .cb = nlattr_get_ip },
177 	{ .type = IFLA_GRE_FLAGS, .off = _OUT(ifla_flags), .cb = nlattr_get_uint32 },
178 	{ .type = IFLA_GRE_OKEY, .off = _OUT(ifla_okey), .cb = nlattr_get_uint32 },
179 	{ .type = IFLA_GRE_ENCAP_TYPE, .off = _OUT(ifla_encap_type), .cb = nlattr_get_uint32 },
180 	{ .type = IFLA_GRE_ENCAP_SPORT, .off = _OUT(ifla_encap_sport), .cb = nlattr_get_uint16 },
181 };
182 #undef _OUT
183 NL_DECLARE_ATTR_PARSER(gre_modify_parser, nla_p_gre);
184 
185 static const struct nlhdr_parser *all_parsers[] = {
186 	&gre_modify_parser,
187 };
188 
189 
190 static void
vnet_gre_init(const void * unused __unused)191 vnet_gre_init(const void *unused __unused)
192 {
193 	struct if_clone_addreq_v2 req = {
194 		.version = 2,
195 		.flags = IFC_F_AUTOUNIT,
196 		.match_f = NULL,
197 		.create_f = gre_clone_create,
198 		.destroy_f = gre_clone_destroy,
199 		.create_nl_f = gre_clone_create_nl,
200 		.modify_nl_f = gre_clone_modify_nl,
201 		.dump_nl_f = gre_clone_dump_nl,
202 	};
203 	V_gre_cloner = ifc_attach_cloner(grename, (struct if_clone_addreq *)&req);
204 #ifdef INET
205 	in_gre_init();
206 #endif
207 #ifdef INET6
208 	in6_gre_init();
209 #endif
210 }
211 VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
212     vnet_gre_init, NULL);
213 
214 static void
vnet_gre_uninit(const void * unused __unused)215 vnet_gre_uninit(const void *unused __unused)
216 {
217 
218 	ifc_detach_cloner(V_gre_cloner);
219 #ifdef INET
220 	in_gre_uninit();
221 #endif
222 #ifdef INET6
223 	in6_gre_uninit();
224 #endif
225 	/* XXX: epoch_call drain */
226 }
227 VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
228     vnet_gre_uninit, NULL);
229 
230 static int
gre_clone_create_nl(struct if_clone * ifc,char * name,size_t len,struct ifc_data_nl * ifd)231 gre_clone_create_nl(struct if_clone *ifc, char *name, size_t len,
232     struct ifc_data_nl *ifd)
233 {
234 	struct ifc_data ifd_new = {
235 		.flags = IFC_F_SYSSPACE,
236 		.unit = ifd->unit,
237 	};
238 
239 	return (gre_clone_create(ifc, name, len, &ifd_new, &ifd->ifp));
240 }
241 
242 static int
gre_clone_modify_nl(struct ifnet * ifp,struct ifc_data_nl * ifd)243 gre_clone_modify_nl(struct ifnet *ifp, struct ifc_data_nl *ifd)
244 {
245 	struct gre_softc *sc = ifp->if_softc;
246 	struct nl_parsed_link *lattrs = ifd->lattrs;
247 	struct nl_pstate *npt = ifd->npt;
248 	struct nl_parsed_gre params;
249 	struct nlattr *attrs = lattrs->ifla_idata;
250 	struct nlattr_bmask bm;
251 	int error = 0;
252 
253 	if ((attrs == NULL) ||
254 	    (nl_has_attr(ifd->bm, IFLA_LINKINFO) == 0)) {
255 		error = nl_modify_ifp_generic(ifp, lattrs, ifd->bm, npt);
256 		return (error);
257 	}
258 
259 	error = priv_check(curthread, PRIV_NET_GRE);
260 	if (error)
261 		return (error);
262 
263 	/* make sure ignored attributes by nl_parse will not cause panics */
264 	memset(&params, 0, sizeof(params));
265 
266 	nl_get_attrs_bmask_raw(NLA_DATA(attrs), NLA_DATA_LEN(attrs), &bm);
267 	if ((error = nl_parse_nested(attrs, &gre_modify_parser, npt, &params)) != 0)
268 		return (error);
269 
270 	if (nl_has_attr(&bm, IFLA_GRE_LOCAL) && nl_has_attr(&bm, IFLA_GRE_REMOTE))
271 		error = gre_set_addr_nl(sc, npt, params.ifla_local, params.ifla_remote);
272 	else if (nl_has_attr(&bm, IFLA_GRE_LOCAL) || nl_has_attr(&bm, IFLA_GRE_REMOTE)) {
273 		error = EINVAL;
274 		nlmsg_report_err_msg(npt, "Specify both remote and local address together");
275 	}
276 
277 	if (error == 0 && nl_has_attr(&bm, IFLA_GRE_FLAGS))
278 		error = gre_set_flags_nl(sc, npt, params.ifla_flags);
279 
280 	if (error == 0 && nl_has_attr(&bm, IFLA_GRE_OKEY))
281 		error = gre_set_key_nl(sc, npt, params.ifla_okey);
282 
283 	if (error == 0 && nl_has_attr(&bm, IFLA_GRE_ENCAP_TYPE))
284 		error = gre_set_encap_nl(sc, npt, params.ifla_encap_type);
285 
286 	if (error == 0 && nl_has_attr(&bm, IFLA_GRE_ENCAP_SPORT))
287 		error = gre_set_udp_sport_nl(sc, npt, params.ifla_encap_sport);
288 
289 	if (error == 0)
290 		error = nl_modify_ifp_generic(ifp, ifd->lattrs, ifd->bm, ifd->npt);
291 
292 	return (error);
293 }
294 
295 static void
gre_clone_dump_nl(struct ifnet * ifp,struct nl_writer * nw)296 gre_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw)
297 {
298 	GRE_RLOCK_TRACKER;
299 	struct gre_softc *sc;
300 	struct ifreq ifr;
301 
302 	nlattr_add_u32(nw, IFLA_LINK, ifp->if_index);
303 	nlattr_add_string(nw, IFLA_IFNAME, ifp->if_xname);
304 
305 	int off = nlattr_add_nested(nw, IFLA_LINKINFO);
306 	if (off == 0)
307 		return;
308 
309 	nlattr_add_string(nw, IFLA_INFO_KIND, "gre");
310 	int off2 = nlattr_add_nested(nw, IFLA_INFO_DATA);
311 	if (off2 == 0) {
312 		nlattr_set_len(nw, off);
313 		return;
314 	}
315 
316 	sc = ifp->if_softc;
317 	GRE_RLOCK();
318 
319 	if (sc->gre_family == AF_INET) {
320 #ifdef INET
321 		if (in_gre_ioctl(sc, SIOCGIFPSRCADDR, (caddr_t)&ifr) == 0)
322 			nlattr_add_in_addr(nw, IFLA_GRE_LOCAL,
323 			    (const struct in_addr *)&ifr.ifr_addr);
324 		if (in_gre_ioctl(sc, SIOCGIFPDSTADDR, (caddr_t)&ifr) == 0)
325 			nlattr_add_in_addr(nw, IFLA_GRE_LOCAL,
326 			    (const struct in_addr *)&ifr.ifr_dstaddr);
327 #endif
328 	} else if (sc->gre_family == AF_INET6) {
329 #ifdef INET6
330 		if (in6_gre_ioctl(sc, SIOCGIFPSRCADDR_IN6, (caddr_t)&ifr) == 0)
331 			nlattr_add_in6_addr(nw, IFLA_GRE_LOCAL,
332 			    (const struct in6_addr *)&ifr.ifr_addr);
333 		if (in6_gre_ioctl(sc, SIOCGIFPDSTADDR_IN6, (caddr_t)&ifr) == 0)
334 			nlattr_add_in6_addr(nw, IFLA_GRE_LOCAL,
335 			    (const struct in6_addr *)&ifr.ifr_dstaddr);
336 #endif
337 	}
338 
339 	nlattr_add_u32(nw, IFLA_GRE_FLAGS, sc->gre_options);
340 	nlattr_add_u32(nw, IFLA_GRE_OKEY, sc->gre_key);
341 	nlattr_add_u32(nw, IFLA_GRE_ENCAP_TYPE,
342 	    sc->gre_options & GRE_UDPENCAP ? IFLA_TUNNEL_GRE_UDP : IFLA_TUNNEL_NONE);
343 	nlattr_add_u16(nw, IFLA_GRE_ENCAP_SPORT, sc->gre_port);
344 
345 	nlattr_set_len(nw, off2);
346 	nlattr_set_len(nw, off);
347 
348 	GRE_RUNLOCK();
349 }
350 
351 static int
gre_clone_create(struct if_clone * ifc,char * name,size_t len,struct ifc_data * ifd,struct ifnet ** ifpp)352 gre_clone_create(struct if_clone *ifc, char *name, size_t len,
353     struct ifc_data *ifd, struct ifnet **ifpp)
354 {
355 	struct gre_softc *sc;
356 
357 	sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
358 	sc->gre_fibnum = curthread->td_proc->p_fibnum;
359 	GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
360 	GRE2IFP(sc)->if_softc = sc;
361 	if_initname(GRE2IFP(sc), grename, ifd->unit);
362 
363 	GRE2IFP(sc)->if_mtu = GREMTU;
364 	GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
365 	GRE2IFP(sc)->if_output = gre_output;
366 	GRE2IFP(sc)->if_ioctl = gre_ioctl;
367 	GRE2IFP(sc)->if_transmit = gre_transmit;
368 	GRE2IFP(sc)->if_qflush = gre_qflush;
369 #ifdef VIMAGE
370 	GRE2IFP(sc)->if_reassign = gre_reassign;
371 #endif
372 	GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
373 	GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
374 	if_attach(GRE2IFP(sc));
375 	bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
376 	*ifpp = GRE2IFP(sc);
377 
378 	return (0);
379 }
380 
381 #ifdef VIMAGE
382 static void
gre_reassign(struct ifnet * ifp,struct vnet * new_vnet __unused,char * unused __unused)383 gre_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
384     char *unused __unused)
385 {
386 	struct gre_softc *sc;
387 
388 	sx_xlock(&gre_ioctl_sx);
389 	sc = ifp->if_softc;
390 	if (sc != NULL)
391 		gre_delete_tunnel(sc);
392 	sx_xunlock(&gre_ioctl_sx);
393 }
394 #endif /* VIMAGE */
395 
396 static int
gre_clone_destroy(struct if_clone * ifc,struct ifnet * ifp,uint32_t flags)397 gre_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
398 {
399 	struct gre_softc *sc;
400 
401 	sx_xlock(&gre_ioctl_sx);
402 	sc = ifp->if_softc;
403 	gre_delete_tunnel(sc);
404 	bpfdetach(ifp);
405 	if_detach(ifp);
406 	ifp->if_softc = NULL;
407 	sx_xunlock(&gre_ioctl_sx);
408 
409 	GRE_WAIT();
410 	if_free(ifp);
411 	free(sc, M_GRE);
412 
413 	return (0);
414 }
415 
416 static int
gre_set_key(struct gre_softc * sc,uint32_t key)417 gre_set_key(struct gre_softc *sc, uint32_t key)
418 {
419 	int error = 0;
420 
421 	GRE_LOCK_ASSERT();
422 
423 	if (sc->gre_key == key)
424 		return (0);
425 	error = gre_setopts(sc, GRESKEY, key);
426 
427 	return (error);
428 }
429 
430 static int
gre_set_flags(struct gre_softc * sc,uint32_t opt)431 gre_set_flags(struct gre_softc *sc, uint32_t opt)
432 {
433 	int error = 0;
434 
435 	GRE_LOCK_ASSERT();
436 
437 	if (opt & ~GRE_OPTMASK)
438 		return (EINVAL);
439 	if (sc->gre_options == opt)
440 		return (0);
441 	error = gre_setopts(sc, GRESOPTS, opt);
442 
443 	return (error);
444 }
445 
446 static int
gre_set_udp_sport(struct gre_softc * sc,uint16_t port)447 gre_set_udp_sport(struct gre_softc *sc, uint16_t port)
448 {
449 	int error = 0;
450 
451 	GRE_LOCK_ASSERT();
452 
453 	if (port != 0 && (port < V_ipport_hifirstauto ||
454 	    port > V_ipport_hilastauto))
455 		return (EINVAL);
456 	if (sc->gre_port == port)
457 		return (0);
458 	if ((sc->gre_options & GRE_UDPENCAP) == 0) {
459 		/*
460 		 * UDP encapsulation is not enabled, thus
461 		 * there is no need to reattach softc.
462 		 */
463 		sc->gre_port = port;
464 		return (0);
465 	}
466 	error = gre_setopts(sc, GRESPORT, port);
467 
468 	return (error);
469 }
470 
471 static int
gre_setopts(struct gre_softc * sc,u_long cmd,uint32_t opt)472 gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t opt)
473 {
474 	int error = 0;
475 
476 	GRE_LOCK_ASSERT();
477 
478 	switch (sc->gre_family) {
479 #ifdef INET
480 	case AF_INET:
481 		error = in_gre_setopts(sc, cmd, opt);
482 		break;
483 #endif
484 #ifdef INET6
485 	case AF_INET6:
486 		error = in6_gre_setopts(sc, cmd, opt);
487 		break;
488 #endif
489 	default:
490 		/*
491 		 * Tunnel is not yet configured.
492 		 * We can just change any parameters.
493 		 */
494 		if (cmd == GRESKEY)
495 			sc->gre_key = opt;
496 		if (cmd == GRESOPTS)
497 			sc->gre_options = opt;
498 		if (cmd == GRESPORT)
499 			sc->gre_port = opt;
500 		break;
501 	}
502 	/*
503 	 * XXX: Do we need to initiate change of interface
504 	 * state here?
505 	 */
506 	return (error);
507 };
508 
509 static int
gre_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)510 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
511 {
512 	struct ifreq *ifr = (struct ifreq *)data;
513 	struct gre_softc *sc;
514 	uint32_t opt;
515 	int error;
516 
517 	switch (cmd) {
518 	case SIOCSIFMTU:
519 		 /* XXX: */
520 		if (ifr->ifr_mtu < 576)
521 			return (EINVAL);
522 		ifp->if_mtu = ifr->ifr_mtu;
523 		return (0);
524 	case SIOCSIFADDR:
525 		ifp->if_flags |= IFF_UP;
526 	case SIOCSIFFLAGS:
527 	case SIOCADDMULTI:
528 	case SIOCDELMULTI:
529 		return (0);
530 	case GRESADDRS:
531 	case GRESADDRD:
532 	case GREGADDRS:
533 	case GREGADDRD:
534 	case GRESPROTO:
535 	case GREGPROTO:
536 		return (EOPNOTSUPP);
537 	}
538 	sx_xlock(&gre_ioctl_sx);
539 	sc = ifp->if_softc;
540 	if (sc == NULL) {
541 		error = ENXIO;
542 		goto end;
543 	}
544 	error = 0;
545 	switch (cmd) {
546 	case SIOCDIFPHYADDR:
547 		if (sc->gre_family == 0)
548 			break;
549 		gre_delete_tunnel(sc);
550 		break;
551 #ifdef INET
552 	case SIOCSIFPHYADDR:
553 	case SIOCGIFPSRCADDR:
554 	case SIOCGIFPDSTADDR:
555 		error = in_gre_ioctl(sc, cmd, data);
556 		break;
557 #endif
558 #ifdef INET6
559 	case SIOCSIFPHYADDR_IN6:
560 	case SIOCGIFPSRCADDR_IN6:
561 	case SIOCGIFPDSTADDR_IN6:
562 		error = in6_gre_ioctl(sc, cmd, data);
563 		break;
564 #endif
565 	case SIOCGTUNFIB:
566 		ifr->ifr_fib = sc->gre_fibnum;
567 		break;
568 	case SIOCSTUNFIB:
569 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
570 			break;
571 		if (ifr->ifr_fib >= rt_numfibs)
572 			error = EINVAL;
573 		else
574 			sc->gre_fibnum = ifr->ifr_fib;
575 		break;
576 	case GRESKEY:
577 	case GRESOPTS:
578 	case GRESPORT:
579 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
580 			break;
581 		if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
582 		    sizeof(opt))) != 0)
583 			break;
584 		if (cmd == GRESKEY)
585 			error = gre_set_key(sc, opt);
586 		else if (cmd == GRESOPTS)
587 			error = gre_set_flags(sc, opt);
588 		else if (cmd == GRESPORT)
589 			error = gre_set_udp_sport(sc, opt);
590 		break;
591 	case GREGKEY:
592 		error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr),
593 		    sizeof(sc->gre_key));
594 		break;
595 	case GREGOPTS:
596 		error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr),
597 		    sizeof(sc->gre_options));
598 		break;
599 	case GREGPORT:
600 		error = copyout(&sc->gre_port, ifr_data_get_ptr(ifr),
601 		    sizeof(sc->gre_port));
602 		break;
603 	default:
604 		error = EINVAL;
605 		break;
606 	}
607 	if (error == 0 && sc->gre_family != 0) {
608 		if (
609 #ifdef INET
610 		    cmd == SIOCSIFPHYADDR ||
611 #endif
612 #ifdef INET6
613 		    cmd == SIOCSIFPHYADDR_IN6 ||
614 #endif
615 		    0) {
616 			if_link_state_change(ifp, LINK_STATE_UP);
617 		}
618 	}
619 end:
620 	sx_xunlock(&gre_ioctl_sx);
621 	return (error);
622 }
623 
624 static void
gre_delete_tunnel(struct gre_softc * sc)625 gre_delete_tunnel(struct gre_softc *sc)
626 {
627 	struct gre_socket *gs;
628 
629 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
630 	if (sc->gre_family != 0) {
631 		CK_LIST_REMOVE(sc, chain);
632 		CK_LIST_REMOVE(sc, srchash);
633 		GRE_WAIT();
634 		free(sc->gre_hdr, M_GRE);
635 		sc->gre_family = 0;
636 	}
637 	/*
638 	 * If this Tunnel was the last one that could use UDP socket,
639 	 * we should unlink socket from hash table and close it.
640 	 */
641 	if ((gs = sc->gre_so) != NULL && CK_LIST_EMPTY(&gs->list)) {
642 		CK_LIST_REMOVE(gs, chain);
643 		soclose(gs->so);
644 		NET_EPOCH_CALL(gre_sofree, &gs->epoch_ctx);
645 		sc->gre_so = NULL;
646 	}
647 	GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
648 	if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN);
649 }
650 
651 struct gre_list *
gre_hashinit(void)652 gre_hashinit(void)
653 {
654 	struct gre_list *hash;
655 	int i;
656 
657 	hash = malloc(sizeof(struct gre_list) * GRE_HASH_SIZE,
658 	    M_GRE, M_WAITOK);
659 	for (i = 0; i < GRE_HASH_SIZE; i++)
660 		CK_LIST_INIT(&hash[i]);
661 
662 	return (hash);
663 }
664 
665 void
gre_hashdestroy(struct gre_list * hash)666 gre_hashdestroy(struct gre_list *hash)
667 {
668 
669 	free(hash, M_GRE);
670 }
671 
672 void
gre_sofree(epoch_context_t ctx)673 gre_sofree(epoch_context_t ctx)
674 {
675 	struct gre_socket *gs;
676 
677 	gs = __containerof(ctx, struct gre_socket, epoch_ctx);
678 	free(gs, M_GRE);
679 }
680 
681 static __inline uint16_t
gre_cksum_add(uint16_t sum,uint16_t a)682 gre_cksum_add(uint16_t sum, uint16_t a)
683 {
684 	uint16_t res;
685 
686 	res = sum + a;
687 	return (res + (res < a));
688 }
689 
690 void
gre_update_udphdr(struct gre_softc * sc,struct udphdr * udp,uint16_t csum)691 gre_update_udphdr(struct gre_softc *sc, struct udphdr *udp, uint16_t csum)
692 {
693 
694 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
695 	MPASS(sc->gre_options & GRE_UDPENCAP);
696 
697 	udp->uh_dport = htons(GRE_UDPPORT);
698 	udp->uh_sport = htons(sc->gre_port);
699 	udp->uh_sum = csum;
700 	udp->uh_ulen = 0;
701 }
702 
703 void
gre_update_hdr(struct gre_softc * sc,struct grehdr * gh)704 gre_update_hdr(struct gre_softc *sc, struct grehdr *gh)
705 {
706 	uint32_t *opts;
707 	uint16_t flags;
708 
709 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
710 
711 	flags = 0;
712 	opts = gh->gre_opts;
713 	if (sc->gre_options & GRE_ENABLE_CSUM) {
714 		flags |= GRE_FLAGS_CP;
715 		sc->gre_hlen += 2 * sizeof(uint16_t);
716 		*opts++ = 0;
717 	}
718 	if (sc->gre_key != 0) {
719 		flags |= GRE_FLAGS_KP;
720 		sc->gre_hlen += sizeof(uint32_t);
721 		*opts++ = htonl(sc->gre_key);
722 	}
723 	if (sc->gre_options & GRE_ENABLE_SEQ) {
724 		flags |= GRE_FLAGS_SP;
725 		sc->gre_hlen += sizeof(uint32_t);
726 		*opts++ = 0;
727 	} else
728 		sc->gre_oseq = 0;
729 	gh->gre_flags = htons(flags);
730 }
731 
732 int
gre_input(struct mbuf * m,int off,int proto,void * arg)733 gre_input(struct mbuf *m, int off, int proto, void *arg)
734 {
735 	struct gre_softc *sc = arg;
736 	struct grehdr *gh;
737 	struct ifnet *ifp;
738 	uint32_t *opts;
739 #ifdef notyet
740 	uint32_t key;
741 #endif
742 	uint16_t flags;
743 	int hlen, isr, af;
744 
745 	ifp = GRE2IFP(sc);
746 	hlen = off + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
747 	if (m->m_pkthdr.len < hlen)
748 		goto drop;
749 	if (m->m_len < hlen) {
750 		m = m_pullup(m, hlen);
751 		if (m == NULL)
752 			goto drop;
753 	}
754 	gh = (struct grehdr *)mtodo(m, off);
755 	flags = ntohs(gh->gre_flags);
756 	if (flags & ~GRE_FLAGS_MASK)
757 		goto drop;
758 	opts = gh->gre_opts;
759 	hlen = 2 * sizeof(uint16_t);
760 	if (flags & GRE_FLAGS_CP) {
761 		/* reserved1 field must be zero */
762 		if (((uint16_t *)opts)[1] != 0)
763 			goto drop;
764 		if (in_cksum_skip(m, m->m_pkthdr.len, off) != 0)
765 			goto drop;
766 		hlen += 2 * sizeof(uint16_t);
767 		opts++;
768 	}
769 	if (flags & GRE_FLAGS_KP) {
770 #ifdef notyet
771         /*
772          * XXX: The current implementation uses the key only for outgoing
773          * packets. But we can check the key value here, or even in the
774          * encapcheck function.
775          */
776 		key = ntohl(*opts);
777 #endif
778 		hlen += sizeof(uint32_t);
779 		opts++;
780     }
781 #ifdef notyet
782 	} else
783 		key = 0;
784 
785 	if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
786 		goto drop;
787 #endif
788 	if (flags & GRE_FLAGS_SP) {
789 #ifdef notyet
790 		seq = ntohl(*opts);
791 #endif
792 		hlen += sizeof(uint32_t);
793 	}
794 	switch (ntohs(gh->gre_proto)) {
795 	case ETHERTYPE_WCCP:
796 		/*
797 		 * For WCCP skip an additional 4 bytes if after GRE header
798 		 * doesn't follow an IP header.
799 		 */
800 		if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
801 			hlen += sizeof(uint32_t);
802 		/* FALLTHROUGH */
803 	case ETHERTYPE_IP:
804 		isr = NETISR_IP;
805 		af = AF_INET;
806 		break;
807 	case ETHERTYPE_IPV6:
808 		isr = NETISR_IPV6;
809 		af = AF_INET6;
810 		break;
811 	default:
812 		goto drop;
813 	}
814 	m_adj(m, off + hlen);
815 	m_clrprotoflags(m);
816 	m->m_pkthdr.rcvif = ifp;
817 	M_SETFIB(m, ifp->if_fib);
818 #ifdef MAC
819 	mac_ifnet_create_mbuf(ifp, m);
820 #endif
821 	BPF_MTAP2(ifp, &af, sizeof(af), m);
822 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
823 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
824 	if ((ifp->if_flags & IFF_MONITOR) != 0)
825 		m_freem(m);
826 	else
827 		netisr_dispatch(isr, m);
828 	return (IPPROTO_DONE);
829 drop:
830 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
831 	m_freem(m);
832 	return (IPPROTO_DONE);
833 }
834 
835 static int
836 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
837    struct route *ro)
838 {
839 	uint32_t af;
840 
841 	/* BPF writes need to be handled specially. */
842 	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
843 		bcopy(dst->sa_data, &af, sizeof(af));
844 	else
845 		af = RO_GET_FAMILY(ro, dst);
846 	/*
847 	 * Now save the af in the inbound pkt csum data, this is a cheat since
848 	 * we are using the inbound csum_data field to carry the af over to
849 	 * the gre_transmit() routine, avoiding using yet another mtag.
850 	 */
851 	m->m_pkthdr.csum_data = af;
852 	return (ifp->if_transmit(ifp, m));
853 }
854 
855 static void
856 gre_setseqn(struct grehdr *gh, uint32_t seq)
857 {
858 	uint32_t *opts;
859 	uint16_t flags;
860 
861 	opts = gh->gre_opts;
862 	flags = ntohs(gh->gre_flags);
863 	KASSERT((flags & GRE_FLAGS_SP) != 0,
864 	    ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
865 	if (flags & GRE_FLAGS_CP)
866 		opts++;
867 	if (flags & GRE_FLAGS_KP)
868 		opts++;
869 	*opts = htonl(seq);
870 }
871 
872 static uint32_t
873 gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af)
874 {
875 	uint32_t flowid = 0;
876 
877 	if ((sc->gre_options & GRE_UDPENCAP) == 0 || sc->gre_port != 0)
878 		return (flowid);
879 	switch (af) {
880 #ifdef INET
881 	case AF_INET:
882 #ifdef RSS
883 		flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src,
884 		    mtod(m, struct ip *)->ip_dst);
885 		break;
886 #endif
887 		flowid = mtod(m, struct ip *)->ip_src.s_addr ^
888 		    mtod(m, struct ip *)->ip_dst.s_addr;
889 		break;
890 #endif
891 #ifdef INET6
892 	case AF_INET6:
893 #ifdef RSS
894 		flowid = rss_hash_ip6_2tuple(
895 		    &mtod(m, struct ip6_hdr *)->ip6_src,
896 		    &mtod(m, struct ip6_hdr *)->ip6_dst);
897 		break;
898 #endif
899 		flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^
900 		    mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3];
901 		break;
902 #endif
903 	default:
904 		break;
905 	}
906 	return (flowid);
907 }
908 
909 #define	MTAG_GRE	1307983903
910 static int
911 gre_transmit(struct ifnet *ifp, struct mbuf *m)
912 {
913 	GRE_RLOCK_TRACKER;
914 	struct gre_softc *sc;
915 	struct grehdr *gh;
916 	struct udphdr *uh;
917 	uint32_t af, flowid;
918 	int error, len;
919 	uint16_t proto;
920 
921 	len = 0;
922 	GRE_RLOCK();
923 #ifdef MAC
924 	error = mac_ifnet_check_transmit(ifp, m);
925 	if (error) {
926 		m_freem(m);
927 		goto drop;
928 	}
929 #endif
930 	error = ENETDOWN;
931 	sc = ifp->if_softc;
932 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
933 	    (ifp->if_flags & IFF_UP) == 0 ||
934 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
935 	    sc->gre_family == 0 ||
936 	    (error = if_tunnel_check_nesting(ifp, m, MTAG_GRE,
937 		V_max_gre_nesting)) != 0) {
938 		m_freem(m);
939 		goto drop;
940 	}
941 	af = m->m_pkthdr.csum_data;
942 	BPF_MTAP2(ifp, &af, sizeof(af), m);
943 	m->m_flags &= ~(M_BCAST|M_MCAST);
944 	flowid = gre_flowid(sc, m, af);
945 	M_SETFIB(m, sc->gre_fibnum);
946 	M_PREPEND(m, sc->gre_hlen, M_NOWAIT);
947 	if (m == NULL) {
948 		error = ENOBUFS;
949 		goto drop;
950 	}
951 	bcopy(sc->gre_hdr, mtod(m, void *), sc->gre_hlen);
952 	/* Determine GRE proto */
953 	switch (af) {
954 #ifdef INET
955 	case AF_INET:
956 		proto = htons(ETHERTYPE_IP);
957 		break;
958 #endif
959 #ifdef INET6
960 	case AF_INET6:
961 		proto = htons(ETHERTYPE_IPV6);
962 		break;
963 #endif
964 	default:
965 		m_freem(m);
966 		error = ENETDOWN;
967 		goto drop;
968 	}
969 	/* Determine offset of GRE header */
970 	switch (sc->gre_family) {
971 #ifdef INET
972 	case AF_INET:
973 		len = sizeof(struct ip);
974 		break;
975 #endif
976 #ifdef INET6
977 	case AF_INET6:
978 		len = sizeof(struct ip6_hdr);
979 		break;
980 #endif
981 	default:
982 		m_freem(m);
983 		error = ENETDOWN;
984 		goto drop;
985 	}
986 	if (sc->gre_options & GRE_UDPENCAP) {
987 		uh = (struct udphdr *)mtodo(m, len);
988 		uh->uh_sport |= htons(V_ipport_hifirstauto) |
989 		    (flowid >> 16) | (flowid & 0xFFFF);
990 		uh->uh_sport = htons(ntohs(uh->uh_sport) %
991 		    V_ipport_hilastauto);
992 		uh->uh_ulen = htons(m->m_pkthdr.len - len);
993 		uh->uh_sum = gre_cksum_add(uh->uh_sum,
994 		    htons(m->m_pkthdr.len - len + IPPROTO_UDP));
995 		m->m_pkthdr.csum_flags = sc->gre_csumflags;
996 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
997 		len += sizeof(struct udphdr);
998 	}
999 	gh = (struct grehdr *)mtodo(m, len);
1000 	gh->gre_proto = proto;
1001 	if (sc->gre_options & GRE_ENABLE_SEQ)
1002 		gre_setseqn(gh, sc->gre_oseq++);
1003 	if (sc->gre_options & GRE_ENABLE_CSUM) {
1004 		*(uint16_t *)gh->gre_opts = in_cksum_skip(m,
1005 		    m->m_pkthdr.len, len);
1006 	}
1007 	len = m->m_pkthdr.len - len;
1008 	switch (sc->gre_family) {
1009 #ifdef INET
1010 	case AF_INET:
1011 		error = in_gre_output(m, af, sc->gre_hlen);
1012 		break;
1013 #endif
1014 #ifdef INET6
1015 	case AF_INET6:
1016 		error = in6_gre_output(m, af, sc->gre_hlen, flowid);
1017 		break;
1018 #endif
1019 	default:
1020 		m_freem(m);
1021 		error = ENETDOWN;
1022 	}
1023 drop:
1024 	if (error)
1025 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1026 	else {
1027 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
1028 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
1029 	}
1030 	GRE_RUNLOCK();
1031 	return (error);
1032 }
1033 
1034 static void
1035 gre_qflush(struct ifnet *ifp __unused)
1036 {
1037 
1038 }
1039 
1040 static int
1041 gre_set_addr_nl(struct gre_softc *sc, struct nl_pstate *npt,
1042     struct sockaddr *src, struct sockaddr *dst)
1043 {
1044 #if defined(INET) || defined(INET6)
1045 	union {
1046 #ifdef INET
1047 		struct in_aliasreq in;
1048 #endif
1049 #ifdef INET6
1050 		struct in6_aliasreq in6;
1051 #endif
1052 	} aliasreq;
1053 #endif
1054 	int error;
1055 
1056 	/* XXX: this sanity check runs again in in[6]_gre_ioctl */
1057 	if (src->sa_family != dst->sa_family)
1058 		error = EADDRNOTAVAIL;
1059 #ifdef INET
1060 	else if (src->sa_family == AF_INET) {
1061 		memcpy(&aliasreq.in.ifra_addr, src, sizeof(struct sockaddr_in));
1062 		memcpy(&aliasreq.in.ifra_dstaddr, dst, sizeof(struct sockaddr_in));
1063 		sx_xlock(&gre_ioctl_sx);
1064 		error = in_gre_ioctl(sc, SIOCSIFPHYADDR, (caddr_t)&aliasreq.in);
1065 		sx_xunlock(&gre_ioctl_sx);
1066 	}
1067 #endif
1068 #ifdef INET6
1069 	else if (src->sa_family == AF_INET6) {
1070 		memcpy(&aliasreq.in6.ifra_addr, src, sizeof(struct sockaddr_in6));
1071 		memcpy(&aliasreq.in6.ifra_dstaddr, dst, sizeof(struct sockaddr_in6));
1072 		sx_xlock(&gre_ioctl_sx);
1073 		error = in6_gre_ioctl(sc, SIOCSIFPHYADDR_IN6, (caddr_t)&aliasreq.in6);
1074 		sx_xunlock(&gre_ioctl_sx);
1075 	}
1076 #endif
1077 	else
1078 		error = EAFNOSUPPORT;
1079 
1080 	if (error == EADDRNOTAVAIL)
1081 		nlmsg_report_err_msg(npt, "address is invalid");
1082 	if (error == EEXIST)
1083 		nlmsg_report_err_msg(npt, "remote and local addresses are the same");
1084 	if (error == EAFNOSUPPORT)
1085 		nlmsg_report_err_msg(npt, "address family is not supported");
1086 
1087 	return (error);
1088 }
1089 
1090 static int
1091 gre_set_flags_nl(struct gre_softc *sc, struct nl_pstate *npt, uint32_t opt)
1092 {
1093 	int error = 0;
1094 
1095 	sx_xlock(&gre_ioctl_sx);
1096 	error = gre_set_flags(sc, opt);
1097 	sx_xunlock(&gre_ioctl_sx);
1098 
1099 	if (error == EINVAL)
1100 		nlmsg_report_err_msg(npt, "gre flags are invalid");
1101 
1102 	return (error);
1103 }
1104 
1105 static int
1106 gre_set_key_nl(struct gre_softc *sc, struct nl_pstate *npt, uint32_t key)
1107 {
1108 	int error = 0;
1109 
1110 	sx_xlock(&gre_ioctl_sx);
1111 	error = gre_set_key(sc, key);
1112 	sx_xunlock(&gre_ioctl_sx);
1113 
1114 	if (error == EINVAL)
1115 		nlmsg_report_err_msg(npt, "gre key is invalid: %u", key);
1116 
1117 	return (error);
1118 }
1119 
1120 static int
1121 gre_set_encap_nl(struct gre_softc *sc, struct nl_pstate *npt, uint32_t type)
1122 {
1123 	uint32_t opt;
1124 	int error = 0;
1125 
1126 	sx_xlock(&gre_ioctl_sx);
1127 	opt = sc->gre_options;
1128 	if (type & IFLA_TUNNEL_GRE_UDP)
1129 		opt |= GRE_UDPENCAP;
1130 	else
1131 		opt &= ~GRE_UDPENCAP;
1132 	error = gre_set_flags(sc, opt);
1133 	sx_xunlock(&gre_ioctl_sx);
1134 
1135 	if (error == EEXIST)
1136 		nlmsg_report_err_msg(npt, "same gre tunnel exist");
1137 
1138 	return (error);
1139 }
1140 
1141 
1142 static int
1143 gre_set_udp_sport_nl(struct gre_softc *sc, struct nl_pstate *npt, uint16_t port)
1144 {
1145 	int error = 0;
1146 
1147 	sx_xlock(&gre_ioctl_sx);
1148 	error = gre_set_udp_sport(sc, port);
1149 	sx_xunlock(&gre_ioctl_sx);
1150 
1151 	if (error == EINVAL)
1152 		nlmsg_report_err_msg(npt, "source port is invalid: %u", port);
1153 
1154 	return (error);
1155 }
1156 
1157 
1158 static int
1159 gremodevent(module_t mod, int type, void *data)
1160 {
1161 
1162 	switch (type) {
1163 	case MOD_LOAD:
1164 		NL_VERIFY_PARSERS(all_parsers);
1165 		break;
1166 	case MOD_UNLOAD:
1167 		break;
1168 	default:
1169 		return (EOPNOTSUPP);
1170 	}
1171 	return (0);
1172 }
1173 
1174 static moduledata_t gre_mod = {
1175 	"if_gre",
1176 	gremodevent,
1177 	0
1178 };
1179 
1180 DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
1181 MODULE_VERSION(if_gre, 1);
1182