1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2021 Ng Peng Nam Sean 5 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include <sys/types.h> 33 #include <sys/malloc.h> 34 #include <sys/rmlock.h> 35 #include <sys/socket.h> 36 37 #include <net/if.h> 38 #include <net/route.h> 39 #include <net/route/nhop.h> 40 #include <net/route/route_ctl.h> 41 #include <net/route/route_var.h> 42 #include <netinet6/scope6_var.h> 43 #include <netlink/netlink.h> 44 #include <netlink/netlink_ctl.h> 45 #include <netlink/netlink_route.h> 46 #include <netlink/route/route_var.h> 47 48 #define DEBUG_MOD_NAME nl_route 49 #define DEBUG_MAX_LEVEL LOG_DEBUG3 50 #include <netlink/netlink_debug.h> 51 _DECLARE_DEBUG(LOG_INFO); 52 53 static unsigned char 54 get_rtm_type(const struct nhop_object *nh) 55 { 56 int nh_flags = nh->nh_flags; 57 58 /* Use the fact that nhg runtime flags are only NHF_MULTIPATH */ 59 if (nh_flags & NHF_BLACKHOLE) 60 return (RTN_BLACKHOLE); 61 else if (nh_flags & NHF_REJECT) 62 return (RTN_PROHIBIT); 63 return (RTN_UNICAST); 64 } 65 66 static uint8_t 67 nl_get_rtm_protocol(const struct nhop_object *nh) 68 { 69 const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh; 70 int rt_flags; 71 uint8_t origin; 72 73 if (NH_IS_NHGRP(nh)) { 74 origin = nhgrp_get_origin(nhg); 75 if (origin != RTPROT_UNSPEC) 76 return (origin); 77 nh = nhg->nhops[0]; 78 } 79 origin = nhop_get_origin(nh); 80 if (origin != RTPROT_UNSPEC) 81 return (origin); 82 /* TODO: remove guesswork once all kernel users fill in origin */ 83 rt_flags = nhop_get_rtflags(nh); 84 if (rt_flags & RTF_PROTO1) 85 return (RTPROT_ZEBRA); 86 if (rt_flags & RTF_STATIC) 87 return (RTPROT_STATIC); 88 return (RTPROT_KERNEL); 89 } 90 91 static int 92 get_rtmsg_type_from_rtsock(int cmd) 93 { 94 switch (cmd) { 95 case RTM_ADD: 96 case RTM_CHANGE: 97 case RTM_GET: 98 return NL_RTM_NEWROUTE; 99 case RTM_DELETE: 100 return NL_RTM_DELROUTE; 101 } 102 103 return (0); 104 } 105 106 /* 107 * fibnum heuristics 108 * 109 * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS 110 * msg rtm_table RTA_TABLE result 111 * RTM_GETROUTE/dump 0 - RT_ALL_FIBS 112 * RTM_GETROUTE/dump 1 - 1 113 * RTM_GETROUTE/get 0 - 0 114 * 115 */ 116 117 static struct nhop_object * 118 rc_get_nhop(const struct rib_cmd_info *rc) 119 { 120 return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new); 121 } 122 123 static void 124 dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh) 125 { 126 #ifdef INET6 127 int upper_family; 128 #endif 129 130 switch (nhop_get_neigh_family(nh)) { 131 case AF_LINK: 132 /* onlink prefix, skip */ 133 break; 134 case AF_INET: 135 nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr); 136 break; 137 #ifdef INET6 138 case AF_INET6: 139 upper_family = nhop_get_upper_family(nh); 140 if (upper_family == AF_INET6) { 141 struct in6_addr gw6 = nh->gw6_sa.sin6_addr; 142 in6_clearscope(&gw6); 143 144 nlattr_add(nw, NL_RTA_GATEWAY, 16, &gw6); 145 } else if (upper_family == AF_INET) { 146 /* IPv4 over IPv6 */ 147 struct in6_addr gw6 = nh->gw6_sa.sin6_addr; 148 in6_clearscope(&gw6); 149 150 char buf[20]; 151 struct rtvia *via = (struct rtvia *)&buf[0]; 152 via->rtvia_family = AF_INET6; 153 memcpy(via->rtvia_addr, &gw6, 16); 154 nlattr_add(nw, NL_RTA_VIA, 17, via); 155 } 156 break; 157 #endif 158 } 159 } 160 161 static void 162 dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh) 163 { 164 int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t); 165 struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); 166 167 if (nla == NULL) 168 return; 169 nla->nla_type = NL_RTA_METRICS; 170 nla->nla_len = nla_len; 171 nla++; 172 nla->nla_type = NL_RTAX_MTU; 173 nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t); 174 *((uint32_t *)(nla + 1)) = nh->nh_mtu; 175 } 176 177 static void 178 dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm) 179 { 180 uint32_t uidx = nhgrp_get_uidx(nhg); 181 uint32_t num_nhops, nh_expire; 182 const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops); 183 uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh); 184 185 if (uidx != 0) 186 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); 187 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhgrp_get_idx(nhg)); 188 189 nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags); 190 int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH); 191 if (off == 0) 192 return; 193 194 for (int i = 0; i < num_nhops; i++) { 195 int nh_off = nlattr_save_offset(nw); 196 struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop); 197 if (rtnh == NULL) 198 return; 199 rtnh->rtnh_flags = 0; 200 rtnh->rtnh_ifindex = if_getindex(wn[i].nh->nh_ifp); 201 rtnh->rtnh_hops = wn[i].weight; 202 dump_rc_nhop_gw(nw, wn[i].nh); 203 uint32_t rtflags = nhop_get_rtflags(wn[i].nh); 204 if (rtflags != base_rtflags) 205 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); 206 if (rtflags & RTF_FIXEDMTU) 207 dump_rc_nhop_mtu(nw, wn[i].nh); 208 nh_expire = nhop_get_expire(wn[i].nh); 209 if (nh_expire > 0) 210 nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime); 211 rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop); 212 /* 213 * nlattr_add() allocates 4-byte aligned storage, no need to aligh 214 * length here 215 * */ 216 rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off; 217 } 218 nlattr_set_len(nw, off); 219 } 220 221 static void 222 dump_rc_nhop(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtmsg *rtm) 223 { 224 const struct nhop_object *nh = rnd->rnd_nhop; 225 uint32_t rtflags, uidx, nh_expire; 226 227 if (NH_IS_NHGRP(rnd->rnd_nhop)) { 228 dump_rc_nhg(nw, rnd->rnd_nhgrp, rtm); 229 return; 230 } 231 232 rtflags = nhop_get_rtflags(nh); 233 /* 234 * IPv4 over IPv6 235 * ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2), 236 * IPv4 w/ gw 237 * ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)], 238 * Direct route: 239 * ('RTA_OIF', 2) 240 */ 241 if (nh->nh_flags & NHF_GATEWAY) 242 dump_rc_nhop_gw(nw, nh); 243 244 uidx = nhop_get_uidx(nh); 245 if (uidx != 0) 246 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); 247 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh)); 248 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); 249 250 if (rtflags & RTF_FIXEDMTU) 251 dump_rc_nhop_mtu(nw, nh); 252 nh_expire = nhop_get_expire(nh); 253 if (nh_expire > 0) 254 nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime); 255 256 /* In any case, fill outgoing interface */ 257 nlattr_add_u32(nw, NL_RTA_OIF, if_getindex(nh->nh_ifp)); 258 259 if (rnd->rnd_weight != RT_DEFAULT_WEIGHT) 260 nlattr_add_u32(nw, NL_RTA_WEIGHT, rnd->rnd_weight); 261 } 262 263 /* 264 * Dumps output from a rib command into an rtmsg 265 */ 266 267 static int 268 dump_px(uint32_t fibnum, const struct nlmsghdr *hdr, 269 const struct rtentry *rt, struct route_nhop_data *rnd, 270 struct nl_writer *nw) 271 { 272 struct rtmsg *rtm; 273 int error = 0; 274 275 NET_EPOCH_ASSERT(); 276 277 if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg))) 278 goto enomem; 279 280 int family = rt_get_family(rt); 281 int rtm_off = nlattr_save_offset(nw); 282 rtm = nlmsg_reserve_object(nw, struct rtmsg); 283 rtm->rtm_family = family; 284 rtm->rtm_dst_len = 0; 285 rtm->rtm_src_len = 0; 286 rtm->rtm_tos = 0; 287 if (fibnum < 255) 288 rtm->rtm_table = (unsigned char)fibnum; 289 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 290 rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop); 291 rtm->rtm_type = get_rtm_type(rnd->rnd_nhop); 292 293 nlattr_add_u32(nw, NL_RTA_TABLE, fibnum); 294 295 int plen = 0; 296 #if defined(INET) || defined(INET6) 297 uint32_t scopeid; 298 #endif 299 switch (family) { 300 #ifdef INET 301 case AF_INET: 302 { 303 struct in_addr addr; 304 rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid); 305 nlattr_add(nw, NL_RTA_DST, 4, &addr); 306 break; 307 } 308 #endif 309 #ifdef INET6 310 case AF_INET6: 311 { 312 struct in6_addr addr; 313 rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid); 314 nlattr_add(nw, NL_RTA_DST, 16, &addr); 315 break; 316 } 317 #endif 318 default: 319 FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family); 320 error = EAFNOSUPPORT; 321 goto flush; 322 } 323 324 rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg); 325 if (plen > 0) 326 rtm->rtm_dst_len = plen; 327 dump_rc_nhop(nw, rnd, rtm); 328 329 if (nlmsg_end(nw)) 330 return (0); 331 enomem: 332 error = ENOMEM; 333 flush: 334 nlmsg_abort(nw); 335 return (error); 336 } 337 338 static int 339 family_to_group(int family) 340 { 341 switch (family) { 342 case AF_INET: 343 return (RTNLGRP_IPV4_ROUTE); 344 case AF_INET6: 345 return (RTNLGRP_IPV6_ROUTE); 346 } 347 return (0); 348 } 349 350 static void 351 report_operation(uint32_t fibnum, struct rib_cmd_info *rc, 352 struct nlpcb *nlp, struct nlmsghdr *hdr) 353 { 354 struct nl_writer nw; 355 uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt)); 356 357 if (nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id, 0, 358 false)) { 359 struct route_nhop_data rnd = { 360 .rnd_nhop = rc_get_nhop(rc), 361 .rnd_weight = rc->rc_nh_weight, 362 }; 363 hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE); 364 hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND); 365 switch (rc->rc_cmd) { 366 case RTM_ADD: 367 hdr->nlmsg_type = NL_RTM_NEWROUTE; 368 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; 369 break; 370 case RTM_CHANGE: 371 hdr->nlmsg_type = NL_RTM_NEWROUTE; 372 hdr->nlmsg_flags |= NLM_F_REPLACE; 373 break; 374 case RTM_DELETE: 375 hdr->nlmsg_type = NL_RTM_DELROUTE; 376 break; 377 } 378 dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw); 379 nlmsg_flush(&nw); 380 } 381 382 rtsock_callback_p->route_f(fibnum, rc); 383 } 384 385 static void 386 set_scope6(struct sockaddr *sa, struct ifnet *ifp) 387 { 388 #ifdef INET6 389 if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) { 390 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; 391 392 if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) 393 in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp)); 394 } 395 #endif 396 } 397 398 struct rta_mpath_nh { 399 struct sockaddr *gw; 400 struct ifnet *ifp; 401 uint8_t rtnh_flags; 402 uint8_t rtnh_weight; 403 }; 404 405 #define _IN(_field) offsetof(struct rtnexthop, _field) 406 #define _OUT(_field) offsetof(struct rta_mpath_nh, _field) 407 const static struct nlattr_parser nla_p_rtnh[] = { 408 { .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip }, 409 { .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia }, 410 }; 411 const static struct nlfield_parser nlf_p_rtnh[] = { 412 { .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 }, 413 { .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 }, 414 { .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz }, 415 }; 416 #undef _IN 417 #undef _OUT 418 419 static bool 420 post_p_rtnh(void *_attrs, struct nl_pstate *npt __unused) 421 { 422 struct rta_mpath_nh *attrs = (struct rta_mpath_nh *)_attrs; 423 424 set_scope6(attrs->gw, attrs->ifp); 425 return (true); 426 } 427 NL_DECLARE_PARSER_EXT(mpath_parser, struct rtnexthop, NULL, nlf_p_rtnh, nla_p_rtnh, post_p_rtnh); 428 429 struct rta_mpath { 430 u_int num_nhops; 431 struct rta_mpath_nh nhops[0]; 432 }; 433 434 static int 435 nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, 436 const void *arg, void *target) 437 { 438 struct rta_mpath *mp; 439 struct rtnexthop *rtnh; 440 uint16_t data_len, len; 441 u_int max_nhops; 442 int error; 443 444 data_len = nla->nla_len - sizeof(struct nlattr); 445 max_nhops = data_len / sizeof(struct rtnexthop); 446 447 mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh)); 448 mp->num_nhops = 0; 449 450 for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) { 451 struct rta_mpath_nh *mpnh; 452 453 if (__predict_false(rtnh->rtnh_len <= sizeof(*rtnh) || 454 rtnh->rtnh_len > data_len)) { 455 NLMSG_REPORT_ERR_MSG(npt, "%s: bad length %u", 456 __func__, rtnh->rtnh_len); 457 return (EINVAL); 458 } 459 mpnh = &mp->nhops[mp->num_nhops++]; 460 error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser, 461 npt, mpnh); 462 if (error != 0) { 463 NLMSG_REPORT_ERR_MSG(npt, 464 "RTA_MULTIPATH: nexthop %u: parse failed", 465 mp->num_nhops - 1); 466 return (error); 467 } 468 len = NL_ITEM_ALIGN(rtnh->rtnh_len); 469 data_len -= len; 470 rtnh = (struct rtnexthop *)((char *)rtnh + len); 471 } 472 if (data_len != 0 || mp->num_nhops == 0) { 473 NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr"); 474 return (EINVAL); 475 } 476 477 *((struct rta_mpath **)target) = mp; 478 return (0); 479 } 480 481 482 struct nl_parsed_route { 483 struct sockaddr *rta_dst; 484 struct sockaddr *rta_gw; 485 struct ifnet *rta_oif; 486 struct rta_mpath *rta_multipath; 487 uint32_t rta_table; 488 uint32_t rta_rtflags; 489 uint32_t rta_nh_id; 490 uint32_t rta_weight; 491 uint32_t rta_expire; 492 uint32_t rtax_mtu; 493 uint8_t rtm_table; 494 uint8_t rtm_family; 495 uint8_t rtm_dst_len; 496 uint8_t rtm_protocol; 497 uint8_t rtm_type; 498 uint32_t rtm_flags; 499 }; 500 501 #define _IN(_field) offsetof(struct rtmsg, _field) 502 #define _OUT(_field) offsetof(struct nl_parsed_route, _field) 503 static struct nlattr_parser nla_p_rtmetrics[] = { 504 { .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 }, 505 }; 506 NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics); 507 508 static const struct nlattr_parser nla_p_rtmsg[] = { 509 { .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip }, 510 { .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp }, 511 { .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip }, 512 { .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested }, 513 { .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath }, 514 { .type = NL_RTA_WEIGHT, .off = _OUT(rta_weight), .cb = nlattr_get_uint32 }, 515 { .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 }, 516 { .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 }, 517 { .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia }, 518 { .type = NL_RTA_EXPIRES, .off = _OUT(rta_expire), .cb = nlattr_get_uint32 }, 519 { .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 }, 520 }; 521 522 static const struct nlfield_parser nlf_p_rtmsg[] = { 523 { .off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 }, 524 { .off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 }, 525 { .off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 }, 526 { .off_in = _IN(rtm_type), .off_out = _OUT(rtm_type), .cb = nlf_get_u8 }, 527 { .off_in = _IN(rtm_table), .off_out = _OUT(rtm_table), .cb = nlf_get_u8 }, 528 { .off_in = _IN(rtm_flags), .off_out = _OUT(rtm_flags), .cb = nlf_get_u32 }, 529 }; 530 #undef _IN 531 #undef _OUT 532 533 static bool 534 post_p_rtmsg(void *_attrs, struct nl_pstate *npt __unused) 535 { 536 struct nl_parsed_route *attrs = (struct nl_parsed_route *)_attrs; 537 538 set_scope6(attrs->rta_dst, attrs->rta_oif); 539 set_scope6(attrs->rta_gw, attrs->rta_oif); 540 return (true); 541 } 542 NL_DECLARE_PARSER_EXT(rtm_parser, struct rtmsg, NULL, nlf_p_rtmsg, nla_p_rtmsg, post_p_rtmsg); 543 544 struct netlink_walkargs { 545 struct nl_writer *nw; 546 struct route_nhop_data rnd; 547 struct nlmsghdr hdr; 548 struct nlpcb *nlp; 549 uint32_t fibnum; 550 int family; 551 int error; 552 int count; 553 int dumped; 554 int dumped_tables; 555 }; 556 557 static int 558 dump_rtentry(struct rtentry *rt, void *_arg) 559 { 560 struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg; 561 int error; 562 563 wa->count++; 564 if (wa->error != 0) 565 return (0); 566 if (!rt_is_exportable(rt, nlp_get_cred(wa->nlp))) 567 return (0); 568 wa->dumped++; 569 570 rt_get_rnd(rt, &wa->rnd); 571 572 error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw); 573 574 IF_DEBUG_LEVEL(LOG_DEBUG3) { 575 char rtbuf[INET6_ADDRSTRLEN + 5]; 576 FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family, 577 "Dump %s, error %d", 578 rt_print_buf(rt, rtbuf, sizeof(rtbuf)), error); 579 } 580 wa->error = error; 581 582 return (0); 583 } 584 585 static void 586 dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family) 587 { 588 FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump"); 589 wa->count = 0; 590 wa->dumped = 0; 591 592 rib_walk(fibnum, family, false, dump_rtentry, wa); 593 594 wa->dumped_tables++; 595 596 FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d", 597 wa->count, wa->dumped); 598 } 599 600 static int 601 dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family) 602 { 603 wa->fibnum = fibnum; 604 605 if (family == AF_UNSPEC) { 606 for (int i = 0; i < AF_MAX; i++) { 607 if (rt_tables_get_rnh(fibnum, i) != 0) { 608 wa->family = i; 609 dump_rtable_one(wa, fibnum, i); 610 if (wa->error != 0) 611 break; 612 } 613 } 614 } else { 615 if (rt_tables_get_rnh(fibnum, family) != 0) { 616 wa->family = family; 617 dump_rtable_one(wa, fibnum, family); 618 } 619 } 620 621 return (wa->error); 622 } 623 624 static int 625 handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs, 626 struct nlmsghdr *hdr, struct nl_pstate *npt) 627 { 628 RIB_RLOCK_TRACKER; 629 struct rib_head *rnh; 630 const struct rtentry *rt; 631 struct route_nhop_data rnd; 632 uint32_t fibnum = attrs->rta_table; 633 sa_family_t family = attrs->rtm_family; 634 635 if (attrs->rta_dst == NULL) { 636 NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied"); 637 return (EINVAL); 638 } 639 640 rnh = rt_tables_get_rnh(fibnum, family); 641 if (rnh == NULL) 642 return (EAFNOSUPPORT); 643 644 RIB_RLOCK(rnh); 645 646 struct sockaddr *dst = attrs->rta_dst; 647 648 if (attrs->rtm_flags & RTM_F_PREFIX) 649 rt = rib_lookup_prefix_plen(rnh, dst, attrs->rtm_dst_len, &rnd); 650 else 651 rt = (const struct rtentry *)rnh->rnh_matchaddr(dst, &rnh->head); 652 if (rt == NULL) { 653 RIB_RUNLOCK(rnh); 654 return (ESRCH); 655 } 656 657 rt_get_rnd(rt, &rnd); 658 rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0); 659 660 RIB_RUNLOCK(rnh); 661 662 if (!rt_is_exportable(rt, nlp_get_cred(nlp))) 663 return (ESRCH); 664 665 IF_DEBUG_LEVEL(LOG_DEBUG2) { 666 char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused; 667 FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s", 668 nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)), 669 rt_print_buf(rt, rtbuf, sizeof(rtbuf))); 670 } 671 672 hdr->nlmsg_type = NL_RTM_NEWROUTE; 673 dump_px(fibnum, hdr, rt, &rnd, npt->nw); 674 675 return (0); 676 } 677 678 static int 679 handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family, 680 struct nlmsghdr *hdr, struct nl_writer *nw) 681 { 682 struct netlink_walkargs wa = { 683 .nlp = nlp, 684 .nw = nw, 685 .hdr.nlmsg_pid = hdr->nlmsg_pid, 686 .hdr.nlmsg_seq = hdr->nlmsg_seq, 687 .hdr.nlmsg_type = NL_RTM_NEWROUTE, 688 .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI, 689 }; 690 691 if (fibnum == RT_TABLE_UNSPEC) { 692 for (int i = 0; i < V_rt_numfibs; i++) { 693 dump_rtable_fib(&wa, fibnum, family); 694 if (wa.error != 0) 695 break; 696 } 697 } else 698 dump_rtable_fib(&wa, fibnum, family); 699 700 if (wa.error == 0 && wa.dumped_tables == 0) { 701 FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family"); 702 wa.error = ESRCH; 703 // How do we propagate it? 704 } 705 706 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) { 707 NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); 708 return (ENOMEM); 709 } 710 711 return (wa.error); 712 } 713 714 static struct nhop_object * 715 finalize_nhop(struct nhop_object *nh, const struct sockaddr *dst, int *perror) 716 { 717 /* 718 * The following MUST be filled: 719 * nh_ifp, nh_ifa, nh_gw 720 */ 721 if (nh->gw_sa.sa_family == 0) { 722 /* 723 * Empty gateway. Can be direct route with RTA_OIF set. 724 */ 725 if (nh->nh_ifp != NULL) 726 nhop_set_direct_gw(nh, nh->nh_ifp); 727 else { 728 NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping"); 729 *perror = EINVAL; 730 return (NULL); 731 } 732 /* Both nh_ifp and gateway are set */ 733 } else { 734 /* Gateway is set up, we can derive ifp if not set */ 735 if (nh->nh_ifp == NULL) { 736 uint32_t fibnum = nhop_get_fibnum(nh); 737 uint32_t flags = 0; 738 739 if (nh->nh_flags & NHF_GATEWAY) 740 flags = RTF_GATEWAY; 741 else if (nh->nh_flags & NHF_HOST) 742 flags = RTF_HOST; 743 744 struct ifaddr *ifa = ifa_ifwithroute(flags, dst, &nh->gw_sa, fibnum); 745 if (ifa == NULL) { 746 NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping"); 747 *perror = EINVAL; 748 return (NULL); 749 } 750 nhop_set_transmit_ifp(nh, ifa->ifa_ifp); 751 } 752 } 753 /* Both nh_ifp and gateway are set */ 754 if (nh->nh_ifa == NULL) { 755 const struct sockaddr *gw_sa = &nh->gw_sa; 756 757 if (gw_sa->sa_family != dst->sa_family) { 758 /* 759 * Use dst as the target for determining the default 760 * preferred ifa IF 761 * 1) the gateway is link-level (e.g. direct route) 762 * 2) the gateway family is different (e.g. IPv4 over IPv6). 763 */ 764 gw_sa = dst; 765 } 766 767 struct ifaddr *ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp); 768 if (ifa == NULL) { 769 /* Try link-level ifa. */ 770 gw_sa = &nh->gw_sa; 771 ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp); 772 if (ifa == NULL) { 773 NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping"); 774 *perror = EINVAL; 775 return (NULL); 776 } 777 } 778 nhop_set_src(nh, ifa); 779 } 780 781 return (nhop_get_nhop(nh, perror)); 782 } 783 784 static int 785 get_pxflag(const struct nl_parsed_route *attrs) 786 { 787 int pxflag = 0; 788 switch (attrs->rtm_family) { 789 case AF_INET: 790 if (attrs->rtm_dst_len == 32) 791 pxflag = NHF_HOST; 792 else if (attrs->rtm_dst_len == 0) 793 pxflag = NHF_DEFAULT; 794 break; 795 case AF_INET6: 796 if (attrs->rtm_dst_len == 128) 797 pxflag = NHF_HOST; 798 else if (attrs->rtm_dst_len == 0) 799 pxflag = NHF_DEFAULT; 800 break; 801 } 802 803 return (pxflag); 804 } 805 806 static int 807 get_op_flags(int nlm_flags) 808 { 809 int op_flags = 0; 810 811 op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0; 812 op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0; 813 op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0; 814 op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0; 815 816 return (op_flags); 817 } 818 819 static int 820 create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh, 821 struct nl_pstate *npt, struct nhop_object **pnh) 822 { 823 int error; 824 825 if (mpnh->gw == NULL) 826 return (EINVAL); 827 828 struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); 829 if (nh == NULL) 830 return (ENOMEM); 831 832 error = nl_set_nexthop_gw(nh, mpnh->gw, mpnh->ifp, npt); 833 if (error != 0) { 834 nhop_free(nh); 835 return (error); 836 } 837 if (mpnh->ifp != NULL) 838 nhop_set_transmit_ifp(nh, mpnh->ifp); 839 nhop_set_pxtype_flag(nh, get_pxflag(attrs)); 840 nhop_set_rtflags(nh, attrs->rta_rtflags); 841 if (attrs->rtm_protocol > RTPROT_STATIC) 842 nhop_set_origin(nh, attrs->rtm_protocol); 843 844 *pnh = finalize_nhop(nh, attrs->rta_dst, &error); 845 846 return (error); 847 } 848 849 static struct nhop_object * 850 create_nexthop_from_attrs(struct nl_parsed_route *attrs, 851 struct nl_pstate *npt, int *perror) 852 { 853 struct nhop_object *nh = NULL; 854 int error = 0; 855 uint32_t nh_expire = 0; 856 857 if (attrs->rta_multipath != NULL) { 858 /* Multipath w/o explicit nexthops */ 859 int num_nhops = attrs->rta_multipath->num_nhops; 860 struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops); 861 862 for (int i = 0; i < num_nhops; i++) { 863 struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i]; 864 865 error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh); 866 if (error != 0) { 867 for (int j = 0; j < i; j++) 868 nhop_free(wn[j].nh); 869 break; 870 } 871 wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1; 872 } 873 if (error == 0) { 874 struct rib_head *rh = nhop_get_rh(wn[0].nh); 875 struct nhgrp_object *nhg; 876 877 nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family, 878 wn, num_nhops, perror); 879 if (nhg != NULL) { 880 if (attrs->rtm_protocol > RTPROT_STATIC) 881 nhgrp_set_origin(nhg, attrs->rtm_protocol); 882 nhg = nhgrp_get_nhgrp(nhg, perror); 883 } 884 for (int i = 0; i < num_nhops; i++) 885 nhop_free(wn[i].nh); 886 if (nhg != NULL) 887 return ((struct nhop_object *)nhg); 888 error = *perror; 889 } 890 *perror = error; 891 } else { 892 nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); 893 if (nh == NULL) { 894 *perror = ENOMEM; 895 return (NULL); 896 } 897 if (attrs->rta_gw != NULL) { 898 *perror = nl_set_nexthop_gw(nh, attrs->rta_gw, attrs->rta_oif, npt); 899 if (*perror != 0) { 900 nhop_free(nh); 901 return (NULL); 902 } 903 } 904 if (attrs->rta_oif != NULL) 905 nhop_set_transmit_ifp(nh, attrs->rta_oif); 906 if (attrs->rtax_mtu != 0) 907 nhop_set_mtu(nh, attrs->rtax_mtu, true); 908 if (attrs->rta_expire > 0) { 909 nh_expire = attrs->rta_expire - time_second + time_uptime; 910 nhop_set_expire(nh, nh_expire); 911 } 912 if (attrs->rta_rtflags & RTF_BROADCAST) 913 nhop_set_broadcast(nh, true); 914 if (attrs->rtm_protocol > RTPROT_STATIC) 915 nhop_set_origin(nh, attrs->rtm_protocol); 916 nhop_set_pxtype_flag(nh, get_pxflag(attrs)); 917 nhop_set_rtflags(nh, attrs->rta_rtflags); 918 919 switch (attrs->rtm_type) { 920 case RTN_UNICAST: 921 break; 922 case RTN_BLACKHOLE: 923 nhop_set_blackhole(nh, RTF_BLACKHOLE); 924 break; 925 case RTN_PROHIBIT: 926 case RTN_UNREACHABLE: 927 nhop_set_blackhole(nh, RTF_REJECT); 928 break; 929 /* TODO: return ENOTSUP for other types if strict option is set */ 930 } 931 932 nh = finalize_nhop(nh, attrs->rta_dst, perror); 933 } 934 935 return (nh); 936 } 937 938 static int 939 rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp, 940 struct nl_pstate *npt) 941 { 942 struct rib_cmd_info rc = {}; 943 struct nhop_object *nh = NULL; 944 int error; 945 946 struct nl_parsed_route attrs = {}; 947 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 948 if (error != 0) 949 return (error); 950 951 /* Check if we have enough data */ 952 if (attrs.rta_dst == NULL) { 953 NL_LOG(LOG_DEBUG, "missing RTA_DST"); 954 return (EINVAL); 955 } 956 957 /* pre-2.6.19 Linux API compatibility */ 958 if (attrs.rtm_table > 0 && attrs.rta_table == 0) 959 attrs.rta_table = attrs.rtm_table; 960 if (attrs.rta_table >= V_rt_numfibs || attrs.rtm_family > AF_MAX) { 961 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 962 return (EINVAL); 963 } 964 965 if (attrs.rta_nh_id != 0) { 966 /* Referenced uindex */ 967 int pxflag = get_pxflag(&attrs); 968 nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id, 969 pxflag, &error); 970 if (error != 0) 971 return (error); 972 } else { 973 nh = create_nexthop_from_attrs(&attrs, npt, &error); 974 if (error != 0) { 975 NL_LOG(LOG_DEBUG, "Error creating nexthop"); 976 return (error); 977 } 978 } 979 980 if (!NH_IS_NHGRP(nh) && attrs.rta_weight == 0) 981 attrs.rta_weight = RT_DEFAULT_WEIGHT; 982 struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = attrs.rta_weight }; 983 int op_flags = get_op_flags(hdr->nlmsg_flags); 984 985 error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len, 986 &rnd, op_flags, &rc); 987 if (error == 0) 988 report_operation(attrs.rta_table, &rc, nlp, hdr); 989 return (error); 990 } 991 992 static int 993 path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 994 { 995 struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data; 996 997 if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw)) 998 return (0); 999 1000 if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp)) 1001 return (0); 1002 1003 return (1); 1004 } 1005 1006 static int 1007 rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp, 1008 struct nl_pstate *npt) 1009 { 1010 struct rib_cmd_info rc; 1011 int error; 1012 1013 struct nl_parsed_route attrs = {}; 1014 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 1015 if (error != 0) 1016 return (error); 1017 1018 if (attrs.rta_dst == NULL) { 1019 NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set"); 1020 return (ESRCH); 1021 } 1022 1023 if (attrs.rta_table >= V_rt_numfibs || attrs.rtm_family > AF_MAX) { 1024 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 1025 return (EINVAL); 1026 } 1027 1028 error = rib_del_route_px(attrs.rta_table, attrs.rta_dst, 1029 attrs.rtm_dst_len, path_match_func, &attrs, 1030 (attrs.rta_rtflags & RTF_PINNED) ? RTM_F_FORCE : 0, &rc); 1031 if (error == 0) 1032 report_operation(attrs.rta_table, &rc, nlp, hdr); 1033 return (error); 1034 } 1035 1036 static int 1037 rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) 1038 { 1039 int error; 1040 1041 struct nl_parsed_route attrs = {}; 1042 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 1043 if (error != 0) 1044 return (error); 1045 1046 if (attrs.rta_table >= V_rt_numfibs || attrs.rtm_family > AF_MAX) { 1047 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 1048 return (EINVAL); 1049 } 1050 1051 if (hdr->nlmsg_flags & NLM_F_DUMP) 1052 error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw); 1053 else 1054 error = handle_rtm_getroute(nlp, &attrs, hdr, npt); 1055 1056 return (error); 1057 } 1058 1059 void 1060 rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc) 1061 { 1062 struct nl_writer nw; 1063 int family, nlm_flags = 0; 1064 1065 family = rt_get_family(rc->rc_rt); 1066 1067 /* XXX: check if there are active listeners first */ 1068 1069 /* TODO: consider passing PID/type/seq */ 1070 switch (rc->rc_cmd) { 1071 case RTM_ADD: 1072 nlm_flags = NLM_F_EXCL | NLM_F_CREATE; 1073 break; 1074 case RTM_CHANGE: 1075 nlm_flags = NLM_F_REPLACE; 1076 break; 1077 case RTM_DELETE: 1078 nlm_flags = 0; 1079 break; 1080 } 1081 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1082 char rtbuf[NHOP_PRINT_BUFSIZE] __unused; 1083 FIB_LOG(LOG_DEBUG2, fibnum, family, 1084 "received event %s for %s / nlm_flags=%X", 1085 rib_print_cmd(rc->rc_cmd), 1086 rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)), 1087 nlm_flags); 1088 } 1089 1090 struct nlmsghdr hdr = { 1091 .nlmsg_flags = nlm_flags, 1092 .nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd), 1093 }; 1094 1095 struct route_nhop_data rnd = { 1096 .rnd_nhop = rc_get_nhop(rc), 1097 .rnd_weight = rc->rc_nh_weight, 1098 }; 1099 1100 uint32_t group_id = family_to_group(family); 1101 if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id, 0, 1102 false)) { 1103 NL_LOG(LOG_DEBUG, "error allocating event buffer"); 1104 return; 1105 } 1106 1107 dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw); 1108 nlmsg_flush(&nw); 1109 } 1110 1111 static const struct rtnl_cmd_handler cmd_handlers[] = { 1112 { 1113 .cmd = NL_RTM_GETROUTE, 1114 .name = "RTM_GETROUTE", 1115 .cb = &rtnl_handle_getroute, 1116 .flags = RTNL_F_ALLOW_NONVNET_JAIL, 1117 }, 1118 { 1119 .cmd = NL_RTM_DELROUTE, 1120 .name = "RTM_DELROUTE", 1121 .cb = &rtnl_handle_delroute, 1122 .priv = PRIV_NET_ROUTE, 1123 .flags = RTNL_F_ALLOW_NONVNET_JAIL, 1124 }, 1125 { 1126 .cmd = NL_RTM_NEWROUTE, 1127 .name = "RTM_NEWROUTE", 1128 .cb = &rtnl_handle_newroute, 1129 .priv = PRIV_NET_ROUTE, 1130 .flags = RTNL_F_ALLOW_NONVNET_JAIL, 1131 } 1132 }; 1133 1134 static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser}; 1135 1136 void 1137 rtnl_routes_init(void) 1138 { 1139 NL_VERIFY_PARSERS(all_parsers); 1140 rtnl_register_messages(cmd_handlers, nitems(cmd_handlers)); 1141 } 1142