1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 1998 The NetBSD Foundation, Inc.
5 * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Heiko W.Rupp <hwr@pilhuhn.de>
10 *
11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 *
34 * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
35 */
36
37 #include <sys/cdefs.h>
38 #include "opt_inet.h"
39 #include "opt_inet6.h"
40 #include "opt_rss.h"
41
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/module.h>
47 #include <sys/mbuf.h>
48 #include <sys/priv.h>
49 #include <sys/proc.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sockio.h>
53 #include <sys/sx.h>
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56 #include <sys/systm.h>
57
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_private.h>
62 #include <net/if_clone.h>
63 #include <net/if_types.h>
64 #include <net/netisr.h>
65 #include <net/vnet.h>
66 #include <net/route.h>
67
68 #include <netinet/in.h>
69 #include <netinet/in_pcb.h>
70 #ifdef INET
71 #include <netinet/in_var.h>
72 #include <netinet/ip.h>
73 #include <netinet/ip_var.h>
74 #ifdef RSS
75 #include <netinet/in_rss.h>
76 #endif
77 #endif
78
79 #ifdef INET6
80 #include <netinet/ip6.h>
81 #include <netinet6/in6_var.h>
82 #include <netinet6/ip6_var.h>
83 #ifdef RSS
84 #include <netinet6/in6_rss.h>
85 #endif
86 #endif
87
88 #include <netinet/ip_encap.h>
89 #include <netinet/udp.h>
90 #include <net/bpf.h>
91 #include <net/if_gre.h>
92
93 #include <netlink/netlink.h>
94 #include <netlink/netlink_ctl.h>
95 #include <netlink/netlink_var.h>
96 #include <netlink/netlink_route.h>
97 #include <netlink/route/route_var.h>
98
99 #include <machine/in_cksum.h>
100 #include <security/mac/mac_framework.h>
101
102 #define GREMTU 1476
103
104 static const char grename[] = "gre";
105 MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
106
107 static struct sx gre_ioctl_sx;
108 SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
109 #define GRE_LOCK_ASSERT() sx_assert(&gre_ioctl_sx, SA_XLOCKED);
110
111 static int gre_clone_create(struct if_clone *, char *, size_t,
112 struct ifc_data *, struct ifnet **);
113 static int gre_clone_destroy(struct if_clone *, struct ifnet *,
114 uint32_t);
115 static int gre_clone_create_nl(struct if_clone *, char *, size_t,
116 struct ifc_data_nl *);
117 static int gre_clone_modify_nl(struct ifnet *, struct ifc_data_nl *);
118 static void gre_clone_dump_nl(struct ifnet *, struct nl_writer *);
119 VNET_DEFINE_STATIC(struct if_clone *, gre_cloner);
120 #define V_gre_cloner VNET(gre_cloner)
121
122 #ifdef VIMAGE
123 static void gre_reassign(struct ifnet *, struct vnet *, char *);
124 #endif
125 static void gre_qflush(struct ifnet *);
126 static int gre_transmit(struct ifnet *, struct mbuf *);
127 static int gre_ioctl(struct ifnet *, u_long, caddr_t);
128 static int gre_output(struct ifnet *, struct mbuf *,
129 const struct sockaddr *, struct route *);
130 static void gre_delete_tunnel(struct gre_softc *);
131 static int gre_set_addr_nl(struct gre_softc *, struct nl_pstate *,
132 struct sockaddr *, struct sockaddr *);
133
134 static int gre_set_flags(struct gre_softc *, uint32_t);
135 static int gre_set_key(struct gre_softc *, uint32_t);
136 static int gre_set_udp_sport(struct gre_softc *, uint16_t);
137 static int gre_setopts(struct gre_softc *, u_long, uint32_t);
138
139 static int gre_set_flags_nl(struct gre_softc *, struct nl_pstate *, uint32_t);
140 static int gre_set_key_nl(struct gre_softc *, struct nl_pstate *, uint32_t);
141 static int gre_set_encap_nl(struct gre_softc *, struct nl_pstate *, uint32_t);
142 static int gre_set_udp_sport_nl(struct gre_softc *, struct nl_pstate *, uint16_t);
143
144 SYSCTL_DECL(_net_link);
145 static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
146 "Generic Routing Encapsulation");
147 #ifndef MAX_GRE_NEST
148 /*
149 * This macro controls the default upper limitation on nesting of gre tunnels.
150 * Since, setting a large value to this macro with a careless configuration
151 * may introduce system crash, we don't allow any nestings by default.
152 * If you need to configure nested gre tunnels, you can define this macro
153 * in your kernel configuration file. However, if you do so, please be
154 * careful to configure the tunnels so that it won't make a loop.
155 */
156 #define MAX_GRE_NEST 1
157 #endif
158
159 VNET_DEFINE_STATIC(int, max_gre_nesting) = MAX_GRE_NEST;
160 #define V_max_gre_nesting VNET(max_gre_nesting)
161 SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
162 &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
163
164 struct nl_parsed_gre {
165 struct sockaddr *ifla_local;
166 struct sockaddr *ifla_remote;
167 uint32_t ifla_flags;
168 uint32_t ifla_okey;
169 uint32_t ifla_encap_type;
170 uint16_t ifla_encap_sport;
171 };
172
173 #define _OUT(_field) offsetof(struct nl_parsed_gre, _field)
174 static const struct nlattr_parser nla_p_gre[] = {
175 { .type = IFLA_GRE_LOCAL, .off = _OUT(ifla_local), .cb = nlattr_get_ip },
176 { .type = IFLA_GRE_REMOTE, .off = _OUT(ifla_remote), .cb = nlattr_get_ip },
177 { .type = IFLA_GRE_FLAGS, .off = _OUT(ifla_flags), .cb = nlattr_get_uint32 },
178 { .type = IFLA_GRE_OKEY, .off = _OUT(ifla_okey), .cb = nlattr_get_uint32 },
179 { .type = IFLA_GRE_ENCAP_TYPE, .off = _OUT(ifla_encap_type), .cb = nlattr_get_uint32 },
180 { .type = IFLA_GRE_ENCAP_SPORT, .off = _OUT(ifla_encap_sport), .cb = nlattr_get_uint16 },
181 };
182 #undef _OUT
183 NL_DECLARE_ATTR_PARSER(gre_modify_parser, nla_p_gre);
184
185 static const struct nlhdr_parser *all_parsers[] = {
186 &gre_modify_parser,
187 };
188
189
190 static void
vnet_gre_init(const void * unused __unused)191 vnet_gre_init(const void *unused __unused)
192 {
193 struct if_clone_addreq_v2 req = {
194 .version = 2,
195 .flags = IFC_F_AUTOUNIT,
196 .match_f = NULL,
197 .create_f = gre_clone_create,
198 .destroy_f = gre_clone_destroy,
199 .create_nl_f = gre_clone_create_nl,
200 .modify_nl_f = gre_clone_modify_nl,
201 .dump_nl_f = gre_clone_dump_nl,
202 };
203 V_gre_cloner = ifc_attach_cloner(grename, (struct if_clone_addreq *)&req);
204 #ifdef INET
205 in_gre_init();
206 #endif
207 #ifdef INET6
208 in6_gre_init();
209 #endif
210 }
211 VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
212 vnet_gre_init, NULL);
213
214 static void
vnet_gre_uninit(const void * unused __unused)215 vnet_gre_uninit(const void *unused __unused)
216 {
217
218 ifc_detach_cloner(V_gre_cloner);
219 #ifdef INET
220 in_gre_uninit();
221 #endif
222 #ifdef INET6
223 in6_gre_uninit();
224 #endif
225 /* XXX: epoch_call drain */
226 }
227 VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
228 vnet_gre_uninit, NULL);
229
230 static int
gre_clone_create_nl(struct if_clone * ifc,char * name,size_t len,struct ifc_data_nl * ifd)231 gre_clone_create_nl(struct if_clone *ifc, char *name, size_t len,
232 struct ifc_data_nl *ifd)
233 {
234 struct ifc_data ifd_new = {
235 .flags = IFC_F_SYSSPACE,
236 .unit = ifd->unit,
237 };
238
239 return (gre_clone_create(ifc, name, len, &ifd_new, &ifd->ifp));
240 }
241
242 static int
gre_clone_modify_nl(struct ifnet * ifp,struct ifc_data_nl * ifd)243 gre_clone_modify_nl(struct ifnet *ifp, struct ifc_data_nl *ifd)
244 {
245 struct gre_softc *sc = ifp->if_softc;
246 struct nl_parsed_link *lattrs = ifd->lattrs;
247 struct nl_pstate *npt = ifd->npt;
248 struct nl_parsed_gre params;
249 struct nlattr *attrs = lattrs->ifla_idata;
250 struct nlattr_bmask bm;
251 int error = 0;
252
253 if ((attrs == NULL) ||
254 (nl_has_attr(ifd->bm, IFLA_LINKINFO) == 0)) {
255 error = nl_modify_ifp_generic(ifp, lattrs, ifd->bm, npt);
256 return (error);
257 }
258
259 error = priv_check(curthread, PRIV_NET_GRE);
260 if (error)
261 return (error);
262
263 /* make sure ignored attributes by nl_parse will not cause panics */
264 memset(¶ms, 0, sizeof(params));
265
266 nl_get_attrs_bmask_raw(NLA_DATA(attrs), NLA_DATA_LEN(attrs), &bm);
267 if ((error = nl_parse_nested(attrs, &gre_modify_parser, npt, ¶ms)) != 0)
268 return (error);
269
270 if (nl_has_attr(&bm, IFLA_GRE_LOCAL) && nl_has_attr(&bm, IFLA_GRE_REMOTE))
271 error = gre_set_addr_nl(sc, npt, params.ifla_local, params.ifla_remote);
272 else if (nl_has_attr(&bm, IFLA_GRE_LOCAL) || nl_has_attr(&bm, IFLA_GRE_REMOTE)) {
273 error = EINVAL;
274 nlmsg_report_err_msg(npt, "Specify both remote and local address together");
275 }
276
277 if (error == 0 && nl_has_attr(&bm, IFLA_GRE_FLAGS))
278 error = gre_set_flags_nl(sc, npt, params.ifla_flags);
279
280 if (error == 0 && nl_has_attr(&bm, IFLA_GRE_OKEY))
281 error = gre_set_key_nl(sc, npt, params.ifla_okey);
282
283 if (error == 0 && nl_has_attr(&bm, IFLA_GRE_ENCAP_TYPE))
284 error = gre_set_encap_nl(sc, npt, params.ifla_encap_type);
285
286 if (error == 0 && nl_has_attr(&bm, IFLA_GRE_ENCAP_SPORT))
287 error = gre_set_udp_sport_nl(sc, npt, params.ifla_encap_sport);
288
289 if (error == 0)
290 error = nl_modify_ifp_generic(ifp, ifd->lattrs, ifd->bm, ifd->npt);
291
292 return (error);
293 }
294
295 static void
gre_clone_dump_nl(struct ifnet * ifp,struct nl_writer * nw)296 gre_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw)
297 {
298 GRE_RLOCK_TRACKER;
299 struct gre_softc *sc;
300 struct ifreq ifr;
301
302 nlattr_add_u32(nw, IFLA_LINK, ifp->if_index);
303 nlattr_add_string(nw, IFLA_IFNAME, ifp->if_xname);
304
305 int off = nlattr_add_nested(nw, IFLA_LINKINFO);
306 if (off == 0)
307 return;
308
309 nlattr_add_string(nw, IFLA_INFO_KIND, "gre");
310 int off2 = nlattr_add_nested(nw, IFLA_INFO_DATA);
311 if (off2 == 0) {
312 nlattr_set_len(nw, off);
313 return;
314 }
315
316 sc = ifp->if_softc;
317 GRE_RLOCK();
318
319 if (sc->gre_family == AF_INET) {
320 #ifdef INET
321 if (in_gre_ioctl(sc, SIOCGIFPSRCADDR, (caddr_t)&ifr) == 0)
322 nlattr_add_in_addr(nw, IFLA_GRE_LOCAL,
323 (const struct in_addr *)&ifr.ifr_addr);
324 if (in_gre_ioctl(sc, SIOCGIFPDSTADDR, (caddr_t)&ifr) == 0)
325 nlattr_add_in_addr(nw, IFLA_GRE_LOCAL,
326 (const struct in_addr *)&ifr.ifr_dstaddr);
327 #endif
328 } else if (sc->gre_family == AF_INET6) {
329 #ifdef INET6
330 if (in6_gre_ioctl(sc, SIOCGIFPSRCADDR_IN6, (caddr_t)&ifr) == 0)
331 nlattr_add_in6_addr(nw, IFLA_GRE_LOCAL,
332 (const struct in6_addr *)&ifr.ifr_addr);
333 if (in6_gre_ioctl(sc, SIOCGIFPDSTADDR_IN6, (caddr_t)&ifr) == 0)
334 nlattr_add_in6_addr(nw, IFLA_GRE_LOCAL,
335 (const struct in6_addr *)&ifr.ifr_dstaddr);
336 #endif
337 }
338
339 nlattr_add_u32(nw, IFLA_GRE_FLAGS, sc->gre_options);
340 nlattr_add_u32(nw, IFLA_GRE_OKEY, sc->gre_key);
341 nlattr_add_u32(nw, IFLA_GRE_ENCAP_TYPE,
342 sc->gre_options & GRE_UDPENCAP ? IFLA_TUNNEL_GRE_UDP : IFLA_TUNNEL_NONE);
343 nlattr_add_u16(nw, IFLA_GRE_ENCAP_SPORT, sc->gre_port);
344
345 nlattr_set_len(nw, off2);
346 nlattr_set_len(nw, off);
347
348 GRE_RUNLOCK();
349 }
350
351 static int
gre_clone_create(struct if_clone * ifc,char * name,size_t len,struct ifc_data * ifd,struct ifnet ** ifpp)352 gre_clone_create(struct if_clone *ifc, char *name, size_t len,
353 struct ifc_data *ifd, struct ifnet **ifpp)
354 {
355 struct gre_softc *sc;
356
357 sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
358 sc->gre_fibnum = curthread->td_proc->p_fibnum;
359 GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
360 GRE2IFP(sc)->if_softc = sc;
361 if_initname(GRE2IFP(sc), grename, ifd->unit);
362
363 GRE2IFP(sc)->if_mtu = GREMTU;
364 GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
365 GRE2IFP(sc)->if_output = gre_output;
366 GRE2IFP(sc)->if_ioctl = gre_ioctl;
367 GRE2IFP(sc)->if_transmit = gre_transmit;
368 GRE2IFP(sc)->if_qflush = gre_qflush;
369 #ifdef VIMAGE
370 GRE2IFP(sc)->if_reassign = gre_reassign;
371 #endif
372 GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
373 GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
374 if_attach(GRE2IFP(sc));
375 bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
376 *ifpp = GRE2IFP(sc);
377
378 return (0);
379 }
380
381 #ifdef VIMAGE
382 static void
gre_reassign(struct ifnet * ifp,struct vnet * new_vnet __unused,char * unused __unused)383 gre_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
384 char *unused __unused)
385 {
386 struct gre_softc *sc;
387
388 sx_xlock(&gre_ioctl_sx);
389 sc = ifp->if_softc;
390 if (sc != NULL)
391 gre_delete_tunnel(sc);
392 sx_xunlock(&gre_ioctl_sx);
393 }
394 #endif /* VIMAGE */
395
396 static int
gre_clone_destroy(struct if_clone * ifc,struct ifnet * ifp,uint32_t flags)397 gre_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
398 {
399 struct gre_softc *sc;
400
401 sx_xlock(&gre_ioctl_sx);
402 sc = ifp->if_softc;
403 gre_delete_tunnel(sc);
404 bpfdetach(ifp);
405 if_detach(ifp);
406 ifp->if_softc = NULL;
407 sx_xunlock(&gre_ioctl_sx);
408
409 GRE_WAIT();
410 if_free(ifp);
411 free(sc, M_GRE);
412
413 return (0);
414 }
415
416 static int
gre_set_key(struct gre_softc * sc,uint32_t key)417 gre_set_key(struct gre_softc *sc, uint32_t key)
418 {
419 int error = 0;
420
421 GRE_LOCK_ASSERT();
422
423 if (sc->gre_key == key)
424 return (0);
425 error = gre_setopts(sc, GRESKEY, key);
426
427 return (error);
428 }
429
430 static int
gre_set_flags(struct gre_softc * sc,uint32_t opt)431 gre_set_flags(struct gre_softc *sc, uint32_t opt)
432 {
433 int error = 0;
434
435 GRE_LOCK_ASSERT();
436
437 if (opt & ~GRE_OPTMASK)
438 return (EINVAL);
439 if (sc->gre_options == opt)
440 return (0);
441 error = gre_setopts(sc, GRESOPTS, opt);
442
443 return (error);
444 }
445
446 static int
gre_set_udp_sport(struct gre_softc * sc,uint16_t port)447 gre_set_udp_sport(struct gre_softc *sc, uint16_t port)
448 {
449 int error = 0;
450
451 GRE_LOCK_ASSERT();
452
453 if (port != 0 && (port < V_ipport_hifirstauto ||
454 port > V_ipport_hilastauto))
455 return (EINVAL);
456 if (sc->gre_port == port)
457 return (0);
458 if ((sc->gre_options & GRE_UDPENCAP) == 0) {
459 /*
460 * UDP encapsulation is not enabled, thus
461 * there is no need to reattach softc.
462 */
463 sc->gre_port = port;
464 return (0);
465 }
466 error = gre_setopts(sc, GRESPORT, port);
467
468 return (error);
469 }
470
471 static int
gre_setopts(struct gre_softc * sc,u_long cmd,uint32_t opt)472 gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t opt)
473 {
474 int error = 0;
475
476 GRE_LOCK_ASSERT();
477
478 switch (sc->gre_family) {
479 #ifdef INET
480 case AF_INET:
481 error = in_gre_setopts(sc, cmd, opt);
482 break;
483 #endif
484 #ifdef INET6
485 case AF_INET6:
486 error = in6_gre_setopts(sc, cmd, opt);
487 break;
488 #endif
489 default:
490 /*
491 * Tunnel is not yet configured.
492 * We can just change any parameters.
493 */
494 if (cmd == GRESKEY)
495 sc->gre_key = opt;
496 if (cmd == GRESOPTS)
497 sc->gre_options = opt;
498 if (cmd == GRESPORT)
499 sc->gre_port = opt;
500 break;
501 }
502 /*
503 * XXX: Do we need to initiate change of interface
504 * state here?
505 */
506 return (error);
507 };
508
509 static int
gre_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)510 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
511 {
512 struct ifreq *ifr = (struct ifreq *)data;
513 struct gre_softc *sc;
514 uint32_t opt;
515 int error;
516
517 switch (cmd) {
518 case SIOCSIFMTU:
519 /* XXX: */
520 if (ifr->ifr_mtu < 576)
521 return (EINVAL);
522 ifp->if_mtu = ifr->ifr_mtu;
523 return (0);
524 case SIOCSIFADDR:
525 ifp->if_flags |= IFF_UP;
526 case SIOCSIFFLAGS:
527 case SIOCADDMULTI:
528 case SIOCDELMULTI:
529 return (0);
530 case GRESADDRS:
531 case GRESADDRD:
532 case GREGADDRS:
533 case GREGADDRD:
534 case GRESPROTO:
535 case GREGPROTO:
536 return (EOPNOTSUPP);
537 }
538 sx_xlock(&gre_ioctl_sx);
539 sc = ifp->if_softc;
540 if (sc == NULL) {
541 error = ENXIO;
542 goto end;
543 }
544 error = 0;
545 switch (cmd) {
546 case SIOCDIFPHYADDR:
547 if (sc->gre_family == 0)
548 break;
549 gre_delete_tunnel(sc);
550 break;
551 #ifdef INET
552 case SIOCSIFPHYADDR:
553 case SIOCGIFPSRCADDR:
554 case SIOCGIFPDSTADDR:
555 error = in_gre_ioctl(sc, cmd, data);
556 break;
557 #endif
558 #ifdef INET6
559 case SIOCSIFPHYADDR_IN6:
560 case SIOCGIFPSRCADDR_IN6:
561 case SIOCGIFPDSTADDR_IN6:
562 error = in6_gre_ioctl(sc, cmd, data);
563 break;
564 #endif
565 case SIOCGTUNFIB:
566 ifr->ifr_fib = sc->gre_fibnum;
567 break;
568 case SIOCSTUNFIB:
569 if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
570 break;
571 if (ifr->ifr_fib >= rt_numfibs)
572 error = EINVAL;
573 else
574 sc->gre_fibnum = ifr->ifr_fib;
575 break;
576 case GRESKEY:
577 case GRESOPTS:
578 case GRESPORT:
579 if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
580 break;
581 if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
582 sizeof(opt))) != 0)
583 break;
584 if (cmd == GRESKEY)
585 error = gre_set_key(sc, opt);
586 else if (cmd == GRESOPTS)
587 error = gre_set_flags(sc, opt);
588 else if (cmd == GRESPORT)
589 error = gre_set_udp_sport(sc, opt);
590 break;
591 case GREGKEY:
592 error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr),
593 sizeof(sc->gre_key));
594 break;
595 case GREGOPTS:
596 error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr),
597 sizeof(sc->gre_options));
598 break;
599 case GREGPORT:
600 error = copyout(&sc->gre_port, ifr_data_get_ptr(ifr),
601 sizeof(sc->gre_port));
602 break;
603 default:
604 error = EINVAL;
605 break;
606 }
607 if (error == 0 && sc->gre_family != 0) {
608 if (
609 #ifdef INET
610 cmd == SIOCSIFPHYADDR ||
611 #endif
612 #ifdef INET6
613 cmd == SIOCSIFPHYADDR_IN6 ||
614 #endif
615 0) {
616 if_link_state_change(ifp, LINK_STATE_UP);
617 }
618 }
619 end:
620 sx_xunlock(&gre_ioctl_sx);
621 return (error);
622 }
623
624 static void
gre_delete_tunnel(struct gre_softc * sc)625 gre_delete_tunnel(struct gre_softc *sc)
626 {
627 struct gre_socket *gs;
628
629 sx_assert(&gre_ioctl_sx, SA_XLOCKED);
630 if (sc->gre_family != 0) {
631 CK_LIST_REMOVE(sc, chain);
632 CK_LIST_REMOVE(sc, srchash);
633 GRE_WAIT();
634 free(sc->gre_hdr, M_GRE);
635 sc->gre_family = 0;
636 }
637 /*
638 * If this Tunnel was the last one that could use UDP socket,
639 * we should unlink socket from hash table and close it.
640 */
641 if ((gs = sc->gre_so) != NULL && CK_LIST_EMPTY(&gs->list)) {
642 CK_LIST_REMOVE(gs, chain);
643 soclose(gs->so);
644 NET_EPOCH_CALL(gre_sofree, &gs->epoch_ctx);
645 sc->gre_so = NULL;
646 }
647 GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
648 if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN);
649 }
650
651 struct gre_list *
gre_hashinit(void)652 gre_hashinit(void)
653 {
654 struct gre_list *hash;
655 int i;
656
657 hash = malloc(sizeof(struct gre_list) * GRE_HASH_SIZE,
658 M_GRE, M_WAITOK);
659 for (i = 0; i < GRE_HASH_SIZE; i++)
660 CK_LIST_INIT(&hash[i]);
661
662 return (hash);
663 }
664
665 void
gre_hashdestroy(struct gre_list * hash)666 gre_hashdestroy(struct gre_list *hash)
667 {
668
669 free(hash, M_GRE);
670 }
671
672 void
gre_sofree(epoch_context_t ctx)673 gre_sofree(epoch_context_t ctx)
674 {
675 struct gre_socket *gs;
676
677 gs = __containerof(ctx, struct gre_socket, epoch_ctx);
678 free(gs, M_GRE);
679 }
680
681 static __inline uint16_t
gre_cksum_add(uint16_t sum,uint16_t a)682 gre_cksum_add(uint16_t sum, uint16_t a)
683 {
684 uint16_t res;
685
686 res = sum + a;
687 return (res + (res < a));
688 }
689
690 void
gre_update_udphdr(struct gre_softc * sc,struct udphdr * udp,uint16_t csum)691 gre_update_udphdr(struct gre_softc *sc, struct udphdr *udp, uint16_t csum)
692 {
693
694 sx_assert(&gre_ioctl_sx, SA_XLOCKED);
695 MPASS(sc->gre_options & GRE_UDPENCAP);
696
697 udp->uh_dport = htons(GRE_UDPPORT);
698 udp->uh_sport = htons(sc->gre_port);
699 udp->uh_sum = csum;
700 udp->uh_ulen = 0;
701 }
702
703 void
gre_update_hdr(struct gre_softc * sc,struct grehdr * gh)704 gre_update_hdr(struct gre_softc *sc, struct grehdr *gh)
705 {
706 uint32_t *opts;
707 uint16_t flags;
708
709 sx_assert(&gre_ioctl_sx, SA_XLOCKED);
710
711 flags = 0;
712 opts = gh->gre_opts;
713 if (sc->gre_options & GRE_ENABLE_CSUM) {
714 flags |= GRE_FLAGS_CP;
715 sc->gre_hlen += 2 * sizeof(uint16_t);
716 *opts++ = 0;
717 }
718 if (sc->gre_key != 0) {
719 flags |= GRE_FLAGS_KP;
720 sc->gre_hlen += sizeof(uint32_t);
721 *opts++ = htonl(sc->gre_key);
722 }
723 if (sc->gre_options & GRE_ENABLE_SEQ) {
724 flags |= GRE_FLAGS_SP;
725 sc->gre_hlen += sizeof(uint32_t);
726 *opts++ = 0;
727 } else
728 sc->gre_oseq = 0;
729 gh->gre_flags = htons(flags);
730 }
731
732 int
gre_input(struct mbuf * m,int off,int proto,void * arg)733 gre_input(struct mbuf *m, int off, int proto, void *arg)
734 {
735 struct gre_softc *sc = arg;
736 struct grehdr *gh;
737 struct ifnet *ifp;
738 uint32_t *opts;
739 #ifdef notyet
740 uint32_t key;
741 #endif
742 uint16_t flags;
743 int hlen, isr, af;
744
745 ifp = GRE2IFP(sc);
746 hlen = off + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
747 if (m->m_pkthdr.len < hlen)
748 goto drop;
749 if (m->m_len < hlen) {
750 m = m_pullup(m, hlen);
751 if (m == NULL)
752 goto drop;
753 }
754 gh = (struct grehdr *)mtodo(m, off);
755 flags = ntohs(gh->gre_flags);
756 if (flags & ~GRE_FLAGS_MASK)
757 goto drop;
758 opts = gh->gre_opts;
759 hlen = 2 * sizeof(uint16_t);
760 if (flags & GRE_FLAGS_CP) {
761 /* reserved1 field must be zero */
762 if (((uint16_t *)opts)[1] != 0)
763 goto drop;
764 if (in_cksum_skip(m, m->m_pkthdr.len, off) != 0)
765 goto drop;
766 hlen += 2 * sizeof(uint16_t);
767 opts++;
768 }
769 if (flags & GRE_FLAGS_KP) {
770 #ifdef notyet
771 /*
772 * XXX: The current implementation uses the key only for outgoing
773 * packets. But we can check the key value here, or even in the
774 * encapcheck function.
775 */
776 key = ntohl(*opts);
777 #endif
778 hlen += sizeof(uint32_t);
779 opts++;
780 }
781 #ifdef notyet
782 } else
783 key = 0;
784
785 if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
786 goto drop;
787 #endif
788 if (flags & GRE_FLAGS_SP) {
789 #ifdef notyet
790 seq = ntohl(*opts);
791 #endif
792 hlen += sizeof(uint32_t);
793 }
794 switch (ntohs(gh->gre_proto)) {
795 case ETHERTYPE_WCCP:
796 /*
797 * For WCCP skip an additional 4 bytes if after GRE header
798 * doesn't follow an IP header.
799 */
800 if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
801 hlen += sizeof(uint32_t);
802 /* FALLTHROUGH */
803 case ETHERTYPE_IP:
804 isr = NETISR_IP;
805 af = AF_INET;
806 break;
807 case ETHERTYPE_IPV6:
808 isr = NETISR_IPV6;
809 af = AF_INET6;
810 break;
811 default:
812 goto drop;
813 }
814 m_adj(m, off + hlen);
815 m_clrprotoflags(m);
816 m->m_pkthdr.rcvif = ifp;
817 M_SETFIB(m, ifp->if_fib);
818 #ifdef MAC
819 mac_ifnet_create_mbuf(ifp, m);
820 #endif
821 BPF_MTAP2(ifp, &af, sizeof(af), m);
822 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
823 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
824 if ((ifp->if_flags & IFF_MONITOR) != 0)
825 m_freem(m);
826 else
827 netisr_dispatch(isr, m);
828 return (IPPROTO_DONE);
829 drop:
830 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
831 m_freem(m);
832 return (IPPROTO_DONE);
833 }
834
835 static int
836 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
837 struct route *ro)
838 {
839 uint32_t af;
840
841 /* BPF writes need to be handled specially. */
842 if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
843 bcopy(dst->sa_data, &af, sizeof(af));
844 else
845 af = RO_GET_FAMILY(ro, dst);
846 /*
847 * Now save the af in the inbound pkt csum data, this is a cheat since
848 * we are using the inbound csum_data field to carry the af over to
849 * the gre_transmit() routine, avoiding using yet another mtag.
850 */
851 m->m_pkthdr.csum_data = af;
852 return (ifp->if_transmit(ifp, m));
853 }
854
855 static void
856 gre_setseqn(struct grehdr *gh, uint32_t seq)
857 {
858 uint32_t *opts;
859 uint16_t flags;
860
861 opts = gh->gre_opts;
862 flags = ntohs(gh->gre_flags);
863 KASSERT((flags & GRE_FLAGS_SP) != 0,
864 ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
865 if (flags & GRE_FLAGS_CP)
866 opts++;
867 if (flags & GRE_FLAGS_KP)
868 opts++;
869 *opts = htonl(seq);
870 }
871
872 static uint32_t
873 gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af)
874 {
875 uint32_t flowid = 0;
876
877 if ((sc->gre_options & GRE_UDPENCAP) == 0 || sc->gre_port != 0)
878 return (flowid);
879 switch (af) {
880 #ifdef INET
881 case AF_INET:
882 #ifdef RSS
883 flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src,
884 mtod(m, struct ip *)->ip_dst);
885 break;
886 #endif
887 flowid = mtod(m, struct ip *)->ip_src.s_addr ^
888 mtod(m, struct ip *)->ip_dst.s_addr;
889 break;
890 #endif
891 #ifdef INET6
892 case AF_INET6:
893 #ifdef RSS
894 flowid = rss_hash_ip6_2tuple(
895 &mtod(m, struct ip6_hdr *)->ip6_src,
896 &mtod(m, struct ip6_hdr *)->ip6_dst);
897 break;
898 #endif
899 flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^
900 mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3];
901 break;
902 #endif
903 default:
904 break;
905 }
906 return (flowid);
907 }
908
909 #define MTAG_GRE 1307983903
910 static int
911 gre_transmit(struct ifnet *ifp, struct mbuf *m)
912 {
913 GRE_RLOCK_TRACKER;
914 struct gre_softc *sc;
915 struct grehdr *gh;
916 struct udphdr *uh;
917 uint32_t af, flowid;
918 int error, len;
919 uint16_t proto;
920
921 len = 0;
922 GRE_RLOCK();
923 #ifdef MAC
924 error = mac_ifnet_check_transmit(ifp, m);
925 if (error) {
926 m_freem(m);
927 goto drop;
928 }
929 #endif
930 error = ENETDOWN;
931 sc = ifp->if_softc;
932 if ((ifp->if_flags & IFF_MONITOR) != 0 ||
933 (ifp->if_flags & IFF_UP) == 0 ||
934 (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
935 sc->gre_family == 0 ||
936 (error = if_tunnel_check_nesting(ifp, m, MTAG_GRE,
937 V_max_gre_nesting)) != 0) {
938 m_freem(m);
939 goto drop;
940 }
941 af = m->m_pkthdr.csum_data;
942 BPF_MTAP2(ifp, &af, sizeof(af), m);
943 m->m_flags &= ~(M_BCAST|M_MCAST);
944 flowid = gre_flowid(sc, m, af);
945 M_SETFIB(m, sc->gre_fibnum);
946 M_PREPEND(m, sc->gre_hlen, M_NOWAIT);
947 if (m == NULL) {
948 error = ENOBUFS;
949 goto drop;
950 }
951 bcopy(sc->gre_hdr, mtod(m, void *), sc->gre_hlen);
952 /* Determine GRE proto */
953 switch (af) {
954 #ifdef INET
955 case AF_INET:
956 proto = htons(ETHERTYPE_IP);
957 break;
958 #endif
959 #ifdef INET6
960 case AF_INET6:
961 proto = htons(ETHERTYPE_IPV6);
962 break;
963 #endif
964 default:
965 m_freem(m);
966 error = ENETDOWN;
967 goto drop;
968 }
969 /* Determine offset of GRE header */
970 switch (sc->gre_family) {
971 #ifdef INET
972 case AF_INET:
973 len = sizeof(struct ip);
974 break;
975 #endif
976 #ifdef INET6
977 case AF_INET6:
978 len = sizeof(struct ip6_hdr);
979 break;
980 #endif
981 default:
982 m_freem(m);
983 error = ENETDOWN;
984 goto drop;
985 }
986 if (sc->gre_options & GRE_UDPENCAP) {
987 uh = (struct udphdr *)mtodo(m, len);
988 uh->uh_sport |= htons(V_ipport_hifirstauto) |
989 (flowid >> 16) | (flowid & 0xFFFF);
990 uh->uh_sport = htons(ntohs(uh->uh_sport) %
991 V_ipport_hilastauto);
992 uh->uh_ulen = htons(m->m_pkthdr.len - len);
993 uh->uh_sum = gre_cksum_add(uh->uh_sum,
994 htons(m->m_pkthdr.len - len + IPPROTO_UDP));
995 m->m_pkthdr.csum_flags = sc->gre_csumflags;
996 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
997 len += sizeof(struct udphdr);
998 }
999 gh = (struct grehdr *)mtodo(m, len);
1000 gh->gre_proto = proto;
1001 if (sc->gre_options & GRE_ENABLE_SEQ)
1002 gre_setseqn(gh, sc->gre_oseq++);
1003 if (sc->gre_options & GRE_ENABLE_CSUM) {
1004 *(uint16_t *)gh->gre_opts = in_cksum_skip(m,
1005 m->m_pkthdr.len, len);
1006 }
1007 len = m->m_pkthdr.len - len;
1008 switch (sc->gre_family) {
1009 #ifdef INET
1010 case AF_INET:
1011 error = in_gre_output(m, af, sc->gre_hlen);
1012 break;
1013 #endif
1014 #ifdef INET6
1015 case AF_INET6:
1016 error = in6_gre_output(m, af, sc->gre_hlen, flowid);
1017 break;
1018 #endif
1019 default:
1020 m_freem(m);
1021 error = ENETDOWN;
1022 }
1023 drop:
1024 if (error)
1025 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1026 else {
1027 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
1028 if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
1029 }
1030 GRE_RUNLOCK();
1031 return (error);
1032 }
1033
1034 static void
1035 gre_qflush(struct ifnet *ifp __unused)
1036 {
1037
1038 }
1039
1040 static int
1041 gre_set_addr_nl(struct gre_softc *sc, struct nl_pstate *npt,
1042 struct sockaddr *src, struct sockaddr *dst)
1043 {
1044 #if defined(INET) || defined(INET6)
1045 union {
1046 #ifdef INET
1047 struct in_aliasreq in;
1048 #endif
1049 #ifdef INET6
1050 struct in6_aliasreq in6;
1051 #endif
1052 } aliasreq;
1053 #endif
1054 int error;
1055
1056 /* XXX: this sanity check runs again in in[6]_gre_ioctl */
1057 if (src->sa_family != dst->sa_family)
1058 error = EADDRNOTAVAIL;
1059 #ifdef INET
1060 else if (src->sa_family == AF_INET) {
1061 memcpy(&aliasreq.in.ifra_addr, src, sizeof(struct sockaddr_in));
1062 memcpy(&aliasreq.in.ifra_dstaddr, dst, sizeof(struct sockaddr_in));
1063 sx_xlock(&gre_ioctl_sx);
1064 error = in_gre_ioctl(sc, SIOCSIFPHYADDR, (caddr_t)&aliasreq.in);
1065 sx_xunlock(&gre_ioctl_sx);
1066 }
1067 #endif
1068 #ifdef INET6
1069 else if (src->sa_family == AF_INET6) {
1070 memcpy(&aliasreq.in6.ifra_addr, src, sizeof(struct sockaddr_in6));
1071 memcpy(&aliasreq.in6.ifra_dstaddr, dst, sizeof(struct sockaddr_in6));
1072 sx_xlock(&gre_ioctl_sx);
1073 error = in6_gre_ioctl(sc, SIOCSIFPHYADDR_IN6, (caddr_t)&aliasreq.in6);
1074 sx_xunlock(&gre_ioctl_sx);
1075 }
1076 #endif
1077 else
1078 error = EAFNOSUPPORT;
1079
1080 if (error == EADDRNOTAVAIL)
1081 nlmsg_report_err_msg(npt, "address is invalid");
1082 if (error == EEXIST)
1083 nlmsg_report_err_msg(npt, "remote and local addresses are the same");
1084 if (error == EAFNOSUPPORT)
1085 nlmsg_report_err_msg(npt, "address family is not supported");
1086
1087 return (error);
1088 }
1089
1090 static int
1091 gre_set_flags_nl(struct gre_softc *sc, struct nl_pstate *npt, uint32_t opt)
1092 {
1093 int error = 0;
1094
1095 sx_xlock(&gre_ioctl_sx);
1096 error = gre_set_flags(sc, opt);
1097 sx_xunlock(&gre_ioctl_sx);
1098
1099 if (error == EINVAL)
1100 nlmsg_report_err_msg(npt, "gre flags are invalid");
1101
1102 return (error);
1103 }
1104
1105 static int
1106 gre_set_key_nl(struct gre_softc *sc, struct nl_pstate *npt, uint32_t key)
1107 {
1108 int error = 0;
1109
1110 sx_xlock(&gre_ioctl_sx);
1111 error = gre_set_key(sc, key);
1112 sx_xunlock(&gre_ioctl_sx);
1113
1114 if (error == EINVAL)
1115 nlmsg_report_err_msg(npt, "gre key is invalid: %u", key);
1116
1117 return (error);
1118 }
1119
1120 static int
1121 gre_set_encap_nl(struct gre_softc *sc, struct nl_pstate *npt, uint32_t type)
1122 {
1123 uint32_t opt;
1124 int error = 0;
1125
1126 sx_xlock(&gre_ioctl_sx);
1127 opt = sc->gre_options;
1128 if (type & IFLA_TUNNEL_GRE_UDP)
1129 opt |= GRE_UDPENCAP;
1130 else
1131 opt &= ~GRE_UDPENCAP;
1132 error = gre_set_flags(sc, opt);
1133 sx_xunlock(&gre_ioctl_sx);
1134
1135 if (error == EEXIST)
1136 nlmsg_report_err_msg(npt, "same gre tunnel exist");
1137
1138 return (error);
1139 }
1140
1141
1142 static int
1143 gre_set_udp_sport_nl(struct gre_softc *sc, struct nl_pstate *npt, uint16_t port)
1144 {
1145 int error = 0;
1146
1147 sx_xlock(&gre_ioctl_sx);
1148 error = gre_set_udp_sport(sc, port);
1149 sx_xunlock(&gre_ioctl_sx);
1150
1151 if (error == EINVAL)
1152 nlmsg_report_err_msg(npt, "source port is invalid: %u", port);
1153
1154 return (error);
1155 }
1156
1157
1158 static int
1159 gremodevent(module_t mod, int type, void *data)
1160 {
1161
1162 switch (type) {
1163 case MOD_LOAD:
1164 NL_VERIFY_PARSERS(all_parsers);
1165 break;
1166 case MOD_UNLOAD:
1167 break;
1168 default:
1169 return (EOPNOTSUPP);
1170 }
1171 return (0);
1172 }
1173
1174 static moduledata_t gre_mod = {
1175 "if_gre",
1176 gremodevent,
1177 0
1178 };
1179
1180 DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
1181 MODULE_VERSION(if_gre, 1);
1182