1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/types.h>
3 #include <linux/skbuff.h>
4 #include <linux/socket.h>
5 #include <linux/sysctl.h>
6 #include <linux/net.h>
7 #include <linux/module.h>
8 #include <linux/if_arp.h>
9 #include <linux/ipv6.h>
10 #include <linux/mpls.h>
11 #include <linux/netconf.h>
12 #include <linux/nospec.h>
13 #include <linux/vmalloc.h>
14 #include <linux/percpu.h>
15 #include <net/gso.h>
16 #include <net/ip.h>
17 #include <net/dst.h>
18 #include <net/sock.h>
19 #include <net/arp.h>
20 #include <net/ip_fib.h>
21 #include <net/netevent.h>
22 #include <net/ip_tunnels.h>
23 #include <net/netns/generic.h>
24 #if IS_ENABLED(CONFIG_IPV6)
25 #include <net/ipv6.h>
26 #endif
27 #include <net/ipv6_stubs.h>
28 #include <net/rtnh.h>
29 #include "internal.h"
30
31 /* max memory we will use for mpls_route */
32 #define MAX_MPLS_ROUTE_MEM 4096
33
34 /* Maximum number of labels to look ahead at when selecting a path of
35 * a multipath route
36 */
37 #define MAX_MP_SELECT_LABELS 4
38
39 #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
40
41 static int label_limit = (1 << 20) - 1;
42 static int ttl_max = 255;
43
44 #if IS_ENABLED(CONFIG_NET_IP_TUNNEL)
ipgre_mpls_encap_hlen(struct ip_tunnel_encap * e)45 static size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e)
46 {
47 return sizeof(struct mpls_shim_hdr);
48 }
49
50 static const struct ip_tunnel_encap_ops mpls_iptun_ops = {
51 .encap_hlen = ipgre_mpls_encap_hlen,
52 };
53
ipgre_tunnel_encap_add_mpls_ops(void)54 static int ipgre_tunnel_encap_add_mpls_ops(void)
55 {
56 return ip_tunnel_encap_add_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
57 }
58
ipgre_tunnel_encap_del_mpls_ops(void)59 static void ipgre_tunnel_encap_del_mpls_ops(void)
60 {
61 ip_tunnel_encap_del_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
62 }
63 #else
ipgre_tunnel_encap_add_mpls_ops(void)64 static int ipgre_tunnel_encap_add_mpls_ops(void)
65 {
66 return 0;
67 }
68
ipgre_tunnel_encap_del_mpls_ops(void)69 static void ipgre_tunnel_encap_del_mpls_ops(void)
70 {
71 }
72 #endif
73
74 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
75 struct nlmsghdr *nlh, struct net *net, u32 portid,
76 unsigned int nlm_flags);
77
mpls_route_input(struct net * net,unsigned int index)78 static struct mpls_route *mpls_route_input(struct net *net, unsigned int index)
79 {
80 struct mpls_route __rcu **platform_label;
81
82 platform_label = mpls_dereference(net, net->mpls.platform_label);
83 return mpls_dereference(net, platform_label[index]);
84 }
85
mpls_platform_label_rcu(struct net * net,size_t * platform_labels)86 static struct mpls_route __rcu **mpls_platform_label_rcu(struct net *net, size_t *platform_labels)
87 {
88 struct mpls_route __rcu **platform_label;
89 unsigned int sequence;
90
91 do {
92 sequence = read_seqcount_begin(&net->mpls.platform_label_seq);
93 platform_label = rcu_dereference(net->mpls.platform_label);
94 *platform_labels = net->mpls.platform_labels;
95 } while (read_seqcount_retry(&net->mpls.platform_label_seq, sequence));
96
97 return platform_label;
98 }
99
mpls_route_input_rcu(struct net * net,unsigned int index)100 static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned int index)
101 {
102 struct mpls_route __rcu **platform_label;
103 size_t platform_labels;
104
105 platform_label = mpls_platform_label_rcu(net, &platform_labels);
106
107 if (index >= platform_labels)
108 return NULL;
109
110 return rcu_dereference(platform_label[index]);
111 }
112
mpls_output_possible(const struct net_device * dev)113 bool mpls_output_possible(const struct net_device *dev)
114 {
115 return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev);
116 }
117 EXPORT_SYMBOL_GPL(mpls_output_possible);
118
__mpls_nh_via(struct mpls_route * rt,struct mpls_nh * nh)119 static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh)
120 {
121 return (u8 *)nh + rt->rt_via_offset;
122 }
123
mpls_nh_via(const struct mpls_route * rt,const struct mpls_nh * nh)124 static const u8 *mpls_nh_via(const struct mpls_route *rt,
125 const struct mpls_nh *nh)
126 {
127 return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh);
128 }
129
mpls_nh_header_size(const struct mpls_nh * nh)130 static unsigned int mpls_nh_header_size(const struct mpls_nh *nh)
131 {
132 /* The size of the layer 2.5 labels to be added for this route */
133 return nh->nh_labels * sizeof(struct mpls_shim_hdr);
134 }
135
mpls_dev_mtu(const struct net_device * dev)136 unsigned int mpls_dev_mtu(const struct net_device *dev)
137 {
138 /* The amount of data the layer 2 frame can hold */
139 return dev->mtu;
140 }
141 EXPORT_SYMBOL_GPL(mpls_dev_mtu);
142
mpls_pkt_too_big(const struct sk_buff * skb,unsigned int mtu)143 bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
144 {
145 if (skb->len <= mtu)
146 return false;
147
148 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
149 return false;
150
151 return true;
152 }
153 EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
154
mpls_stats_inc_outucastpkts(struct net * net,struct net_device * dev,const struct sk_buff * skb)155 void mpls_stats_inc_outucastpkts(struct net *net,
156 struct net_device *dev,
157 const struct sk_buff *skb)
158 {
159 struct mpls_dev *mdev;
160
161 if (skb->protocol == htons(ETH_P_MPLS_UC)) {
162 mdev = mpls_dev_rcu(dev);
163 if (mdev)
164 MPLS_INC_STATS_LEN(mdev, skb->len,
165 tx_packets,
166 tx_bytes);
167 } else if (skb->protocol == htons(ETH_P_IP)) {
168 IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
169 #if IS_ENABLED(CONFIG_IPV6)
170 } else if (skb->protocol == htons(ETH_P_IPV6)) {
171 struct inet6_dev *in6dev = in6_dev_rcu(dev);
172
173 if (in6dev)
174 IP6_UPD_PO_STATS(net, in6dev,
175 IPSTATS_MIB_OUT, skb->len);
176 #endif
177 }
178 }
179 EXPORT_SYMBOL_GPL(mpls_stats_inc_outucastpkts);
180
mpls_multipath_hash(struct mpls_route * rt,struct sk_buff * skb)181 static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb)
182 {
183 struct mpls_entry_decoded dec;
184 unsigned int mpls_hdr_len = 0;
185 struct mpls_shim_hdr *hdr;
186 bool eli_seen = false;
187 int label_index;
188 u32 hash = 0;
189
190 for (label_index = 0; label_index < MAX_MP_SELECT_LABELS;
191 label_index++) {
192 mpls_hdr_len += sizeof(*hdr);
193 if (!pskb_may_pull(skb, mpls_hdr_len))
194 break;
195
196 /* Read and decode the current label */
197 hdr = mpls_hdr(skb) + label_index;
198 dec = mpls_entry_decode(hdr);
199
200 /* RFC6790 - reserved labels MUST NOT be used as keys
201 * for the load-balancing function
202 */
203 if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) {
204 hash = jhash_1word(dec.label, hash);
205
206 /* The entropy label follows the entropy label
207 * indicator, so this means that the entropy
208 * label was just added to the hash - no need to
209 * go any deeper either in the label stack or in the
210 * payload
211 */
212 if (eli_seen)
213 break;
214 } else if (dec.label == MPLS_LABEL_ENTROPY) {
215 eli_seen = true;
216 }
217
218 if (!dec.bos)
219 continue;
220
221 /* found bottom label; does skb have room for a header? */
222 if (pskb_may_pull(skb, mpls_hdr_len + sizeof(struct iphdr))) {
223 const struct iphdr *v4hdr;
224
225 v4hdr = (const struct iphdr *)(hdr + 1);
226 if (v4hdr->version == 4) {
227 hash = jhash_3words(ntohl(v4hdr->saddr),
228 ntohl(v4hdr->daddr),
229 v4hdr->protocol, hash);
230 } else if (v4hdr->version == 6 &&
231 pskb_may_pull(skb, mpls_hdr_len +
232 sizeof(struct ipv6hdr))) {
233 const struct ipv6hdr *v6hdr;
234
235 v6hdr = (const struct ipv6hdr *)(hdr + 1);
236 hash = __ipv6_addr_jhash(&v6hdr->saddr, hash);
237 hash = __ipv6_addr_jhash(&v6hdr->daddr, hash);
238 hash = jhash_1word(v6hdr->nexthdr, hash);
239 }
240 }
241
242 break;
243 }
244
245 return hash;
246 }
247
mpls_get_nexthop(struct mpls_route * rt,u8 index)248 static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index)
249 {
250 return (struct mpls_nh *)((u8 *)rt->rt_nh + index * rt->rt_nh_size);
251 }
252
253 /* number of alive nexthops (rt->rt_nhn_alive) and the flags for
254 * a next hop (nh->nh_flags) are modified by netdev event handlers.
255 * Since those fields can change at any moment, use READ_ONCE to
256 * access both.
257 */
mpls_select_multipath(struct mpls_route * rt,struct sk_buff * skb)258 static const struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
259 struct sk_buff *skb)
260 {
261 u32 hash = 0;
262 int nh_index = 0;
263 int n = 0;
264 u8 alive;
265
266 /* No need to look further into packet if there's only
267 * one path
268 */
269 if (rt->rt_nhn == 1)
270 return rt->rt_nh;
271
272 alive = READ_ONCE(rt->rt_nhn_alive);
273 if (alive == 0)
274 return NULL;
275
276 hash = mpls_multipath_hash(rt, skb);
277 nh_index = hash % alive;
278 if (alive == rt->rt_nhn)
279 goto out;
280 for_nexthops(rt) {
281 unsigned int nh_flags = READ_ONCE(nh->nh_flags);
282
283 if (nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
284 continue;
285 if (n == nh_index)
286 return nh;
287 n++;
288 } endfor_nexthops(rt);
289
290 out:
291 return mpls_get_nexthop(rt, nh_index);
292 }
293
mpls_egress(struct net * net,struct mpls_route * rt,struct sk_buff * skb,struct mpls_entry_decoded dec)294 static bool mpls_egress(struct net *net, struct mpls_route *rt,
295 struct sk_buff *skb, struct mpls_entry_decoded dec)
296 {
297 enum mpls_payload_type payload_type;
298 bool success = false;
299
300 /* The IPv4 code below accesses through the IPv4 header
301 * checksum, which is 12 bytes into the packet.
302 * The IPv6 code below accesses through the IPv6 hop limit
303 * which is 8 bytes into the packet.
304 *
305 * For all supported cases there should always be at least 12
306 * bytes of packet data present. The IPv4 header is 20 bytes
307 * without options and the IPv6 header is always 40 bytes
308 * long.
309 */
310 if (!pskb_may_pull(skb, 12))
311 return false;
312
313 payload_type = rt->rt_payload_type;
314 if (payload_type == MPT_UNSPEC)
315 payload_type = ip_hdr(skb)->version;
316
317 switch (payload_type) {
318 case MPT_IPV4: {
319 struct iphdr *hdr4 = ip_hdr(skb);
320 u8 new_ttl;
321 skb->protocol = htons(ETH_P_IP);
322
323 /* If propagating TTL, take the decremented TTL from
324 * the incoming MPLS header, otherwise decrement the
325 * TTL, but only if not 0 to avoid underflow.
326 */
327 if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
328 (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
329 net->mpls.ip_ttl_propagate))
330 new_ttl = dec.ttl;
331 else
332 new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0;
333
334 csum_replace2(&hdr4->check,
335 htons(hdr4->ttl << 8),
336 htons(new_ttl << 8));
337 hdr4->ttl = new_ttl;
338 success = true;
339 break;
340 }
341 case MPT_IPV6: {
342 struct ipv6hdr *hdr6 = ipv6_hdr(skb);
343 skb->protocol = htons(ETH_P_IPV6);
344
345 /* If propagating TTL, take the decremented TTL from
346 * the incoming MPLS header, otherwise decrement the
347 * hop limit, but only if not 0 to avoid underflow.
348 */
349 if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
350 (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
351 net->mpls.ip_ttl_propagate))
352 hdr6->hop_limit = dec.ttl;
353 else if (hdr6->hop_limit)
354 hdr6->hop_limit = hdr6->hop_limit - 1;
355 success = true;
356 break;
357 }
358 case MPT_UNSPEC:
359 /* Should have decided which protocol it is by now */
360 break;
361 }
362
363 return success;
364 }
365
mpls_forward(struct sk_buff * skb,struct net_device * dev,struct packet_type * pt,struct net_device * orig_dev)366 static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
367 struct packet_type *pt, struct net_device *orig_dev)
368 {
369 struct net *net = dev_net_rcu(dev);
370 struct mpls_shim_hdr *hdr;
371 const struct mpls_nh *nh;
372 struct mpls_route *rt;
373 struct mpls_entry_decoded dec;
374 struct net_device *out_dev;
375 struct mpls_dev *out_mdev;
376 struct mpls_dev *mdev;
377 unsigned int hh_len;
378 unsigned int new_header_size;
379 unsigned int mtu;
380 int err;
381
382 /* Careful this entire function runs inside of an rcu critical section */
383
384 mdev = mpls_dev_rcu(dev);
385 if (!mdev)
386 goto drop;
387
388 MPLS_INC_STATS_LEN(mdev, skb->len, rx_packets,
389 rx_bytes);
390
391 if (!mdev->input_enabled) {
392 MPLS_INC_STATS(mdev, rx_dropped);
393 goto drop;
394 }
395
396 if (skb->pkt_type != PACKET_HOST)
397 goto err;
398
399 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
400 goto err;
401
402 if (!pskb_may_pull(skb, sizeof(*hdr)))
403 goto err;
404
405 skb_dst_drop(skb);
406
407 /* Read and decode the label */
408 hdr = mpls_hdr(skb);
409 dec = mpls_entry_decode(hdr);
410
411 rt = mpls_route_input_rcu(net, dec.label);
412 if (!rt) {
413 MPLS_INC_STATS(mdev, rx_noroute);
414 goto drop;
415 }
416
417 nh = mpls_select_multipath(rt, skb);
418 if (!nh)
419 goto err;
420
421 /* Pop the label */
422 skb_pull(skb, sizeof(*hdr));
423 skb_reset_network_header(skb);
424
425 skb_orphan(skb);
426
427 if (skb_warn_if_lro(skb))
428 goto err;
429
430 skb_forward_csum(skb);
431
432 /* Verify ttl is valid */
433 if (dec.ttl <= 1)
434 goto err;
435
436 /* Find the output device */
437 out_dev = nh->nh_dev;
438 if (!mpls_output_possible(out_dev))
439 goto tx_err;
440
441 /* Verify the destination can hold the packet */
442 new_header_size = mpls_nh_header_size(nh);
443 mtu = mpls_dev_mtu(out_dev);
444 if (mpls_pkt_too_big(skb, mtu - new_header_size))
445 goto tx_err;
446
447 hh_len = LL_RESERVED_SPACE(out_dev);
448 if (!out_dev->header_ops)
449 hh_len = 0;
450
451 /* Ensure there is enough space for the headers in the skb */
452 if (skb_cow(skb, hh_len + new_header_size))
453 goto tx_err;
454
455 skb->dev = out_dev;
456 skb->protocol = htons(ETH_P_MPLS_UC);
457
458 dec.ttl -= 1;
459 if (unlikely(!new_header_size && dec.bos)) {
460 /* Penultimate hop popping */
461 if (!mpls_egress(net, rt, skb, dec))
462 goto err;
463 } else {
464 bool bos;
465 int i;
466 skb_push(skb, new_header_size);
467 skb_reset_network_header(skb);
468 /* Push the new labels */
469 hdr = mpls_hdr(skb);
470 bos = dec.bos;
471 for (i = nh->nh_labels - 1; i >= 0; i--) {
472 hdr[i] = mpls_entry_encode(nh->nh_label[i],
473 dec.ttl, 0, bos);
474 bos = false;
475 }
476 }
477
478 mpls_stats_inc_outucastpkts(net, out_dev, skb);
479
480 /* If via wasn't specified then send out using device address */
481 if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC)
482 err = neigh_xmit(NEIGH_LINK_TABLE, out_dev,
483 out_dev->dev_addr, skb);
484 else
485 err = neigh_xmit(nh->nh_via_table, out_dev,
486 mpls_nh_via(rt, nh), skb);
487 if (err)
488 net_dbg_ratelimited("%s: packet transmission failed: %d\n",
489 __func__, err);
490 return 0;
491
492 tx_err:
493 out_mdev = out_dev ? mpls_dev_rcu(out_dev) : NULL;
494 if (out_mdev)
495 MPLS_INC_STATS(out_mdev, tx_errors);
496 goto drop;
497 err:
498 MPLS_INC_STATS(mdev, rx_errors);
499 drop:
500 kfree_skb(skb);
501 return NET_RX_DROP;
502 }
503
504 static struct packet_type mpls_packet_type __read_mostly = {
505 .type = cpu_to_be16(ETH_P_MPLS_UC),
506 .func = mpls_forward,
507 };
508
509 static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
510 [RTA_DST] = { .type = NLA_U32 },
511 [RTA_OIF] = { .type = NLA_U32 },
512 [RTA_TTL_PROPAGATE] = { .type = NLA_U8 },
513 };
514
515 struct mpls_route_config {
516 u32 rc_protocol;
517 u32 rc_ifindex;
518 u8 rc_via_table;
519 u8 rc_via_alen;
520 u8 rc_via[MAX_VIA_ALEN];
521 u32 rc_label;
522 u8 rc_ttl_propagate;
523 u8 rc_output_labels;
524 u32 rc_output_label[MAX_NEW_LABELS];
525 u32 rc_nlflags;
526 enum mpls_payload_type rc_payload_type;
527 struct nl_info rc_nlinfo;
528 struct rtnexthop *rc_mp;
529 int rc_mp_len;
530 };
531
532 /* all nexthops within a route have the same size based on max
533 * number of labels and max via length for a hop
534 */
mpls_rt_alloc(u8 num_nh,u8 max_alen,u8 max_labels)535 static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels)
536 {
537 u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen);
538 struct mpls_route *rt;
539 size_t size;
540
541 size = sizeof(*rt) + num_nh * nh_size;
542 if (size > MAX_MPLS_ROUTE_MEM)
543 return ERR_PTR(-EINVAL);
544
545 rt = kzalloc(size, GFP_KERNEL);
546 if (!rt)
547 return ERR_PTR(-ENOMEM);
548
549 rt->rt_nhn = num_nh;
550 rt->rt_nhn_alive = num_nh;
551 rt->rt_nh_size = nh_size;
552 rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels);
553
554 return rt;
555 }
556
mpls_rt_free_rcu(struct rcu_head * head)557 static void mpls_rt_free_rcu(struct rcu_head *head)
558 {
559 struct mpls_route *rt;
560
561 rt = container_of(head, struct mpls_route, rt_rcu);
562
563 change_nexthops(rt) {
564 netdev_put(nh->nh_dev, &nh->nh_dev_tracker);
565 } endfor_nexthops(rt);
566
567 kfree(rt);
568 }
569
mpls_rt_free(struct mpls_route * rt)570 static void mpls_rt_free(struct mpls_route *rt)
571 {
572 if (rt)
573 call_rcu(&rt->rt_rcu, mpls_rt_free_rcu);
574 }
575
mpls_notify_route(struct net * net,unsigned index,struct mpls_route * old,struct mpls_route * new,const struct nl_info * info)576 static void mpls_notify_route(struct net *net, unsigned index,
577 struct mpls_route *old, struct mpls_route *new,
578 const struct nl_info *info)
579 {
580 struct nlmsghdr *nlh = info ? info->nlh : NULL;
581 unsigned portid = info ? info->portid : 0;
582 int event = new ? RTM_NEWROUTE : RTM_DELROUTE;
583 struct mpls_route *rt = new ? new : old;
584 unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0;
585 /* Ignore reserved labels for now */
586 if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED))
587 rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags);
588 }
589
mpls_route_update(struct net * net,unsigned index,struct mpls_route * new,const struct nl_info * info)590 static void mpls_route_update(struct net *net, unsigned index,
591 struct mpls_route *new,
592 const struct nl_info *info)
593 {
594 struct mpls_route __rcu **platform_label;
595 struct mpls_route *rt;
596
597 platform_label = mpls_dereference(net, net->mpls.platform_label);
598 rt = mpls_dereference(net, platform_label[index]);
599 rcu_assign_pointer(platform_label[index], new);
600
601 mpls_notify_route(net, index, rt, new, info);
602
603 /* If we removed a route free it now */
604 mpls_rt_free(rt);
605 }
606
find_free_label(struct net * net)607 static unsigned int find_free_label(struct net *net)
608 {
609 unsigned int index;
610
611 for (index = MPLS_LABEL_FIRST_UNRESERVED;
612 index < net->mpls.platform_labels;
613 index++) {
614 if (!mpls_route_input(net, index))
615 return index;
616 }
617
618 return LABEL_NOT_SPECIFIED;
619 }
620
621 #if IS_ENABLED(CONFIG_INET)
inet_fib_lookup_dev(struct net * net,struct mpls_nh * nh,const void * addr)622 static struct net_device *inet_fib_lookup_dev(struct net *net,
623 struct mpls_nh *nh,
624 const void *addr)
625 {
626 struct net_device *dev;
627 struct rtable *rt;
628 struct in_addr daddr;
629
630 memcpy(&daddr, addr, sizeof(struct in_addr));
631 rt = ip_route_output(net, daddr.s_addr, 0, 0, 0, RT_SCOPE_UNIVERSE);
632 if (IS_ERR(rt))
633 return ERR_CAST(rt);
634
635 dev = rt->dst.dev;
636 netdev_hold(dev, &nh->nh_dev_tracker, GFP_KERNEL);
637 ip_rt_put(rt);
638
639 return dev;
640 }
641 #else
inet_fib_lookup_dev(struct net * net,struct mpls_nh * nh,const void * addr)642 static struct net_device *inet_fib_lookup_dev(struct net *net,
643 struct mpls_nh *nh,
644 const void *addr)
645 {
646 return ERR_PTR(-EAFNOSUPPORT);
647 }
648 #endif
649
650 #if IS_ENABLED(CONFIG_IPV6)
inet6_fib_lookup_dev(struct net * net,struct mpls_nh * nh,const void * addr)651 static struct net_device *inet6_fib_lookup_dev(struct net *net,
652 struct mpls_nh *nh,
653 const void *addr)
654 {
655 struct net_device *dev;
656 struct dst_entry *dst;
657 struct flowi6 fl6;
658
659 if (!ipv6_stub)
660 return ERR_PTR(-EAFNOSUPPORT);
661
662 memset(&fl6, 0, sizeof(fl6));
663 memcpy(&fl6.daddr, addr, sizeof(struct in6_addr));
664 dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
665 if (IS_ERR(dst))
666 return ERR_CAST(dst);
667
668 dev = dst->dev;
669 netdev_hold(dev, &nh->nh_dev_tracker, GFP_KERNEL);
670 dst_release(dst);
671
672 return dev;
673 }
674 #else
inet6_fib_lookup_dev(struct net * net,struct mpls_nh * nh,const void * addr)675 static struct net_device *inet6_fib_lookup_dev(struct net *net,
676 struct mpls_nh *nh,
677 const void *addr)
678 {
679 return ERR_PTR(-EAFNOSUPPORT);
680 }
681 #endif
682
find_outdev(struct net * net,struct mpls_route * rt,struct mpls_nh * nh,int oif)683 static struct net_device *find_outdev(struct net *net,
684 struct mpls_route *rt,
685 struct mpls_nh *nh, int oif)
686 {
687 struct net_device *dev = NULL;
688
689 if (!oif) {
690 switch (nh->nh_via_table) {
691 case NEIGH_ARP_TABLE:
692 dev = inet_fib_lookup_dev(net, nh, mpls_nh_via(rt, nh));
693 break;
694 case NEIGH_ND_TABLE:
695 dev = inet6_fib_lookup_dev(net, nh, mpls_nh_via(rt, nh));
696 break;
697 case NEIGH_LINK_TABLE:
698 break;
699 }
700 } else {
701 dev = netdev_get_by_index(net, oif,
702 &nh->nh_dev_tracker, GFP_KERNEL);
703 }
704
705 if (!dev)
706 return ERR_PTR(-ENODEV);
707
708 if (IS_ERR(dev))
709 return dev;
710
711 nh->nh_dev = dev;
712
713 return dev;
714 }
715
mpls_nh_assign_dev(struct net * net,struct mpls_route * rt,struct mpls_nh * nh,int oif)716 static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt,
717 struct mpls_nh *nh, int oif)
718 {
719 struct net_device *dev = NULL;
720 int err = -ENODEV;
721
722 dev = find_outdev(net, rt, nh, oif);
723 if (IS_ERR(dev)) {
724 err = PTR_ERR(dev);
725 goto errout;
726 }
727
728 /* Ensure this is a supported device */
729 err = -EINVAL;
730 if (!mpls_dev_get(net, dev))
731 goto errout_put;
732
733 if ((nh->nh_via_table == NEIGH_LINK_TABLE) &&
734 (dev->addr_len != nh->nh_via_alen))
735 goto errout_put;
736
737 if (!(dev->flags & IFF_UP)) {
738 nh->nh_flags |= RTNH_F_DEAD;
739 } else {
740 unsigned int flags;
741
742 flags = netif_get_flags(dev);
743 if (!(flags & (IFF_RUNNING | IFF_LOWER_UP)))
744 nh->nh_flags |= RTNH_F_LINKDOWN;
745 }
746
747 return 0;
748
749 errout_put:
750 netdev_put(nh->nh_dev, &nh->nh_dev_tracker);
751 nh->nh_dev = NULL;
752 errout:
753 return err;
754 }
755
nla_get_via(const struct nlattr * nla,u8 * via_alen,u8 * via_table,u8 via_addr[],struct netlink_ext_ack * extack)756 static int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
757 u8 via_addr[], struct netlink_ext_ack *extack)
758 {
759 struct rtvia *via = nla_data(nla);
760 int err = -EINVAL;
761 int alen;
762
763 if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) {
764 NL_SET_ERR_MSG_ATTR(extack, nla,
765 "Invalid attribute length for RTA_VIA");
766 goto errout;
767 }
768 alen = nla_len(nla) -
769 offsetof(struct rtvia, rtvia_addr);
770 if (alen > MAX_VIA_ALEN) {
771 NL_SET_ERR_MSG_ATTR(extack, nla,
772 "Invalid address length for RTA_VIA");
773 goto errout;
774 }
775
776 /* Validate the address family */
777 switch (via->rtvia_family) {
778 case AF_PACKET:
779 *via_table = NEIGH_LINK_TABLE;
780 break;
781 case AF_INET:
782 *via_table = NEIGH_ARP_TABLE;
783 if (alen != 4)
784 goto errout;
785 break;
786 case AF_INET6:
787 *via_table = NEIGH_ND_TABLE;
788 if (alen != 16)
789 goto errout;
790 break;
791 default:
792 /* Unsupported address family */
793 goto errout;
794 }
795
796 memcpy(via_addr, via->rtvia_addr, alen);
797 *via_alen = alen;
798 err = 0;
799
800 errout:
801 return err;
802 }
803
mpls_nh_build_from_cfg(struct mpls_route_config * cfg,struct mpls_route * rt)804 static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
805 struct mpls_route *rt)
806 {
807 struct net *net = cfg->rc_nlinfo.nl_net;
808 struct mpls_nh *nh = rt->rt_nh;
809 int err;
810 int i;
811
812 if (!nh)
813 return -ENOMEM;
814
815 nh->nh_labels = cfg->rc_output_labels;
816 for (i = 0; i < nh->nh_labels; i++)
817 nh->nh_label[i] = cfg->rc_output_label[i];
818
819 nh->nh_via_table = cfg->rc_via_table;
820 memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen);
821 nh->nh_via_alen = cfg->rc_via_alen;
822
823 err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex);
824 if (err)
825 goto errout;
826
827 if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
828 rt->rt_nhn_alive--;
829
830 return 0;
831
832 errout:
833 return err;
834 }
835
mpls_nh_build(struct net * net,struct mpls_route * rt,struct mpls_nh * nh,int oif,struct nlattr * via,struct nlattr * newdst,u8 max_labels,struct netlink_ext_ack * extack)836 static int mpls_nh_build(struct net *net, struct mpls_route *rt,
837 struct mpls_nh *nh, int oif, struct nlattr *via,
838 struct nlattr *newdst, u8 max_labels,
839 struct netlink_ext_ack *extack)
840 {
841 int err = -ENOMEM;
842
843 if (!nh)
844 goto errout;
845
846 if (newdst) {
847 err = nla_get_labels(newdst, max_labels, &nh->nh_labels,
848 nh->nh_label, extack);
849 if (err)
850 goto errout;
851 }
852
853 if (via) {
854 err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
855 __mpls_nh_via(rt, nh), extack);
856 if (err)
857 goto errout;
858 } else {
859 nh->nh_via_table = MPLS_NEIGH_TABLE_UNSPEC;
860 }
861
862 err = mpls_nh_assign_dev(net, rt, nh, oif);
863 if (err)
864 goto errout;
865
866 return 0;
867
868 errout:
869 return err;
870 }
871
mpls_count_nexthops(struct rtnexthop * rtnh,int len,u8 cfg_via_alen,u8 * max_via_alen,u8 * max_labels)872 static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len,
873 u8 cfg_via_alen, u8 *max_via_alen,
874 u8 *max_labels)
875 {
876 int remaining = len;
877 u8 nhs = 0;
878
879 *max_via_alen = 0;
880 *max_labels = 0;
881
882 while (rtnh_ok(rtnh, remaining)) {
883 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
884 int attrlen;
885 u8 n_labels = 0;
886
887 attrlen = rtnh_attrlen(rtnh);
888 nla = nla_find(attrs, attrlen, RTA_VIA);
889 if (nla && nla_len(nla) >=
890 offsetof(struct rtvia, rtvia_addr)) {
891 int via_alen = nla_len(nla) -
892 offsetof(struct rtvia, rtvia_addr);
893
894 if (via_alen <= MAX_VIA_ALEN)
895 *max_via_alen = max_t(u16, *max_via_alen,
896 via_alen);
897 }
898
899 nla = nla_find(attrs, attrlen, RTA_NEWDST);
900 if (nla &&
901 nla_get_labels(nla, MAX_NEW_LABELS, &n_labels,
902 NULL, NULL) != 0)
903 return 0;
904
905 *max_labels = max_t(u8, *max_labels, n_labels);
906
907 /* number of nexthops is tracked by a u8.
908 * Check for overflow.
909 */
910 if (nhs == 255)
911 return 0;
912 nhs++;
913
914 rtnh = rtnh_next(rtnh, &remaining);
915 }
916
917 /* leftover implies invalid nexthop configuration, discard it */
918 return remaining > 0 ? 0 : nhs;
919 }
920
mpls_nh_build_multi(struct mpls_route_config * cfg,struct mpls_route * rt,u8 max_labels,struct netlink_ext_ack * extack)921 static int mpls_nh_build_multi(struct mpls_route_config *cfg,
922 struct mpls_route *rt, u8 max_labels,
923 struct netlink_ext_ack *extack)
924 {
925 struct rtnexthop *rtnh = cfg->rc_mp;
926 struct nlattr *nla_via, *nla_newdst;
927 int remaining = cfg->rc_mp_len;
928 int err = 0;
929
930 rt->rt_nhn = 0;
931
932 change_nexthops(rt) {
933 int attrlen;
934
935 nla_via = NULL;
936 nla_newdst = NULL;
937
938 err = -EINVAL;
939 if (!rtnh_ok(rtnh, remaining))
940 goto errout;
941
942 /* neither weighted multipath nor any flags
943 * are supported
944 */
945 if (rtnh->rtnh_hops || rtnh->rtnh_flags)
946 goto errout;
947
948 attrlen = rtnh_attrlen(rtnh);
949 if (attrlen > 0) {
950 struct nlattr *attrs = rtnh_attrs(rtnh);
951
952 nla_via = nla_find(attrs, attrlen, RTA_VIA);
953 nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST);
954 }
955
956 err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
957 rtnh->rtnh_ifindex, nla_via, nla_newdst,
958 max_labels, extack);
959 if (err)
960 goto errout;
961
962 if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
963 rt->rt_nhn_alive--;
964
965 rtnh = rtnh_next(rtnh, &remaining);
966 rt->rt_nhn++;
967 } endfor_nexthops(rt);
968
969 return 0;
970
971 errout:
972 return err;
973 }
974
mpls_label_ok(struct net * net,unsigned int * index,struct netlink_ext_ack * extack)975 static bool mpls_label_ok(struct net *net, unsigned int *index,
976 struct netlink_ext_ack *extack)
977 {
978 /* Reserved labels may not be set */
979 if (*index < MPLS_LABEL_FIRST_UNRESERVED) {
980 NL_SET_ERR_MSG(extack,
981 "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher");
982 return false;
983 }
984
985 /* The full 20 bit range may not be supported. */
986 if (*index >= net->mpls.platform_labels) {
987 NL_SET_ERR_MSG(extack,
988 "Label >= configured maximum in platform_labels");
989 return false;
990 }
991
992 *index = array_index_nospec(*index, net->mpls.platform_labels);
993
994 return true;
995 }
996
mpls_route_add(struct mpls_route_config * cfg,struct netlink_ext_ack * extack)997 static int mpls_route_add(struct mpls_route_config *cfg,
998 struct netlink_ext_ack *extack)
999 {
1000 struct net *net = cfg->rc_nlinfo.nl_net;
1001 struct mpls_route *rt, *old;
1002 int err = -EINVAL;
1003 u8 max_via_alen;
1004 unsigned index;
1005 u8 max_labels;
1006 u8 nhs;
1007
1008 index = cfg->rc_label;
1009
1010 /* If a label was not specified during insert pick one */
1011 if ((index == LABEL_NOT_SPECIFIED) &&
1012 (cfg->rc_nlflags & NLM_F_CREATE)) {
1013 index = find_free_label(net);
1014 }
1015
1016 if (!mpls_label_ok(net, &index, extack))
1017 goto errout;
1018
1019 /* Append makes no sense with mpls */
1020 err = -EOPNOTSUPP;
1021 if (cfg->rc_nlflags & NLM_F_APPEND) {
1022 NL_SET_ERR_MSG(extack, "MPLS does not support route append");
1023 goto errout;
1024 }
1025
1026 err = -EEXIST;
1027 old = mpls_route_input(net, index);
1028 if ((cfg->rc_nlflags & NLM_F_EXCL) && old)
1029 goto errout;
1030
1031 err = -EEXIST;
1032 if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old)
1033 goto errout;
1034
1035 err = -ENOENT;
1036 if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old)
1037 goto errout;
1038
1039 err = -EINVAL;
1040 if (cfg->rc_mp) {
1041 nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
1042 cfg->rc_via_alen, &max_via_alen,
1043 &max_labels);
1044 } else {
1045 max_via_alen = cfg->rc_via_alen;
1046 max_labels = cfg->rc_output_labels;
1047 nhs = 1;
1048 }
1049
1050 if (nhs == 0) {
1051 NL_SET_ERR_MSG(extack, "Route does not contain a nexthop");
1052 goto errout;
1053 }
1054
1055 rt = mpls_rt_alloc(nhs, max_via_alen, max_labels);
1056 if (IS_ERR(rt)) {
1057 err = PTR_ERR(rt);
1058 goto errout;
1059 }
1060
1061 rt->rt_protocol = cfg->rc_protocol;
1062 rt->rt_payload_type = cfg->rc_payload_type;
1063 rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
1064
1065 if (cfg->rc_mp)
1066 err = mpls_nh_build_multi(cfg, rt, max_labels, extack);
1067 else
1068 err = mpls_nh_build_from_cfg(cfg, rt);
1069 if (err)
1070 goto freert;
1071
1072 mpls_route_update(net, index, rt, &cfg->rc_nlinfo);
1073
1074 return 0;
1075
1076 freert:
1077 mpls_rt_free(rt);
1078 errout:
1079 return err;
1080 }
1081
mpls_route_del(struct mpls_route_config * cfg,struct netlink_ext_ack * extack)1082 static int mpls_route_del(struct mpls_route_config *cfg,
1083 struct netlink_ext_ack *extack)
1084 {
1085 struct net *net = cfg->rc_nlinfo.nl_net;
1086 unsigned index;
1087 int err = -EINVAL;
1088
1089 index = cfg->rc_label;
1090
1091 if (!mpls_label_ok(net, &index, extack))
1092 goto errout;
1093
1094 mpls_route_update(net, index, NULL, &cfg->rc_nlinfo);
1095
1096 err = 0;
1097 errout:
1098 return err;
1099 }
1100
mpls_get_stats(struct mpls_dev * mdev,struct mpls_link_stats * stats)1101 static void mpls_get_stats(struct mpls_dev *mdev,
1102 struct mpls_link_stats *stats)
1103 {
1104 struct mpls_pcpu_stats *p;
1105 int i;
1106
1107 memset(stats, 0, sizeof(*stats));
1108
1109 for_each_possible_cpu(i) {
1110 struct mpls_link_stats local;
1111 unsigned int start;
1112
1113 p = per_cpu_ptr(mdev->stats, i);
1114 do {
1115 start = u64_stats_fetch_begin(&p->syncp);
1116 local = p->stats;
1117 } while (u64_stats_fetch_retry(&p->syncp, start));
1118
1119 stats->rx_packets += local.rx_packets;
1120 stats->rx_bytes += local.rx_bytes;
1121 stats->tx_packets += local.tx_packets;
1122 stats->tx_bytes += local.tx_bytes;
1123 stats->rx_errors += local.rx_errors;
1124 stats->tx_errors += local.tx_errors;
1125 stats->rx_dropped += local.rx_dropped;
1126 stats->tx_dropped += local.tx_dropped;
1127 stats->rx_noroute += local.rx_noroute;
1128 }
1129 }
1130
mpls_fill_stats_af(struct sk_buff * skb,const struct net_device * dev)1131 static int mpls_fill_stats_af(struct sk_buff *skb,
1132 const struct net_device *dev)
1133 {
1134 struct mpls_link_stats *stats;
1135 struct mpls_dev *mdev;
1136 struct nlattr *nla;
1137
1138 mdev = mpls_dev_rcu(dev);
1139 if (!mdev)
1140 return -ENODATA;
1141
1142 nla = nla_reserve_64bit(skb, MPLS_STATS_LINK,
1143 sizeof(struct mpls_link_stats),
1144 MPLS_STATS_UNSPEC);
1145 if (!nla)
1146 return -EMSGSIZE;
1147
1148 stats = nla_data(nla);
1149 mpls_get_stats(mdev, stats);
1150
1151 return 0;
1152 }
1153
mpls_get_stats_af_size(const struct net_device * dev)1154 static size_t mpls_get_stats_af_size(const struct net_device *dev)
1155 {
1156 struct mpls_dev *mdev;
1157
1158 mdev = mpls_dev_rcu(dev);
1159 if (!mdev)
1160 return 0;
1161
1162 return nla_total_size_64bit(sizeof(struct mpls_link_stats));
1163 }
1164
mpls_netconf_fill_devconf(struct sk_buff * skb,struct mpls_dev * mdev,u32 portid,u32 seq,int event,unsigned int flags,int type)1165 static int mpls_netconf_fill_devconf(struct sk_buff *skb, struct mpls_dev *mdev,
1166 u32 portid, u32 seq, int event,
1167 unsigned int flags, int type)
1168 {
1169 struct nlmsghdr *nlh;
1170 struct netconfmsg *ncm;
1171 bool all = false;
1172
1173 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1174 flags);
1175 if (!nlh)
1176 return -EMSGSIZE;
1177
1178 if (type == NETCONFA_ALL)
1179 all = true;
1180
1181 ncm = nlmsg_data(nlh);
1182 ncm->ncm_family = AF_MPLS;
1183
1184 if (nla_put_s32(skb, NETCONFA_IFINDEX, mdev->dev->ifindex) < 0)
1185 goto nla_put_failure;
1186
1187 if ((all || type == NETCONFA_INPUT) &&
1188 nla_put_s32(skb, NETCONFA_INPUT,
1189 READ_ONCE(mdev->input_enabled)) < 0)
1190 goto nla_put_failure;
1191
1192 nlmsg_end(skb, nlh);
1193 return 0;
1194
1195 nla_put_failure:
1196 nlmsg_cancel(skb, nlh);
1197 return -EMSGSIZE;
1198 }
1199
mpls_netconf_msgsize_devconf(int type)1200 static int mpls_netconf_msgsize_devconf(int type)
1201 {
1202 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1203 + nla_total_size(4); /* NETCONFA_IFINDEX */
1204 bool all = false;
1205
1206 if (type == NETCONFA_ALL)
1207 all = true;
1208
1209 if (all || type == NETCONFA_INPUT)
1210 size += nla_total_size(4);
1211
1212 return size;
1213 }
1214
mpls_netconf_notify_devconf(struct net * net,int event,int type,struct mpls_dev * mdev)1215 static void mpls_netconf_notify_devconf(struct net *net, int event,
1216 int type, struct mpls_dev *mdev)
1217 {
1218 struct sk_buff *skb;
1219 int err = -ENOBUFS;
1220
1221 skb = nlmsg_new(mpls_netconf_msgsize_devconf(type), GFP_KERNEL);
1222 if (!skb)
1223 goto errout;
1224
1225 err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, event, 0, type);
1226 if (err < 0) {
1227 /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */
1228 WARN_ON(err == -EMSGSIZE);
1229 kfree_skb(skb);
1230 goto errout;
1231 }
1232
1233 rtnl_notify(skb, net, 0, RTNLGRP_MPLS_NETCONF, NULL, GFP_KERNEL);
1234 return;
1235 errout:
1236 rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err);
1237 }
1238
1239 static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = {
1240 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1241 };
1242
mpls_netconf_valid_get_req(struct sk_buff * skb,const struct nlmsghdr * nlh,struct nlattr ** tb,struct netlink_ext_ack * extack)1243 static int mpls_netconf_valid_get_req(struct sk_buff *skb,
1244 const struct nlmsghdr *nlh,
1245 struct nlattr **tb,
1246 struct netlink_ext_ack *extack)
1247 {
1248 int i, err;
1249
1250 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
1251 NL_SET_ERR_MSG_MOD(extack,
1252 "Invalid header for netconf get request");
1253 return -EINVAL;
1254 }
1255
1256 if (!netlink_strict_get_check(skb))
1257 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
1258 tb, NETCONFA_MAX,
1259 devconf_mpls_policy, extack);
1260
1261 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
1262 tb, NETCONFA_MAX,
1263 devconf_mpls_policy, extack);
1264 if (err)
1265 return err;
1266
1267 for (i = 0; i <= NETCONFA_MAX; i++) {
1268 if (!tb[i])
1269 continue;
1270
1271 switch (i) {
1272 case NETCONFA_IFINDEX:
1273 break;
1274 default:
1275 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request");
1276 return -EINVAL;
1277 }
1278 }
1279
1280 return 0;
1281 }
1282
mpls_netconf_get_devconf(struct sk_buff * in_skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1283 static int mpls_netconf_get_devconf(struct sk_buff *in_skb,
1284 struct nlmsghdr *nlh,
1285 struct netlink_ext_ack *extack)
1286 {
1287 struct net *net = sock_net(in_skb->sk);
1288 struct nlattr *tb[NETCONFA_MAX + 1];
1289 struct net_device *dev;
1290 struct mpls_dev *mdev;
1291 struct sk_buff *skb;
1292 int ifindex;
1293 int err;
1294
1295 err = mpls_netconf_valid_get_req(in_skb, nlh, tb, extack);
1296 if (err < 0)
1297 goto errout;
1298
1299 if (!tb[NETCONFA_IFINDEX]) {
1300 err = -EINVAL;
1301 goto errout;
1302 }
1303
1304 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1305
1306 skb = nlmsg_new(mpls_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1307 if (!skb) {
1308 err = -ENOBUFS;
1309 goto errout;
1310 }
1311
1312 rcu_read_lock();
1313
1314 dev = dev_get_by_index_rcu(net, ifindex);
1315 if (!dev) {
1316 err = -EINVAL;
1317 goto errout_unlock;
1318 }
1319
1320 mdev = mpls_dev_rcu(dev);
1321 if (!mdev) {
1322 err = -EINVAL;
1323 goto errout_unlock;
1324 }
1325
1326 err = mpls_netconf_fill_devconf(skb, mdev,
1327 NETLINK_CB(in_skb).portid,
1328 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1329 NETCONFA_ALL);
1330 if (err < 0) {
1331 /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */
1332 WARN_ON(err == -EMSGSIZE);
1333 goto errout_unlock;
1334 }
1335
1336 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1337
1338 rcu_read_unlock();
1339 errout:
1340 return err;
1341
1342 errout_unlock:
1343 rcu_read_unlock();
1344 kfree_skb(skb);
1345 goto errout;
1346 }
1347
mpls_netconf_dump_devconf(struct sk_buff * skb,struct netlink_callback * cb)1348 static int mpls_netconf_dump_devconf(struct sk_buff *skb,
1349 struct netlink_callback *cb)
1350 {
1351 const struct nlmsghdr *nlh = cb->nlh;
1352 struct net *net = sock_net(skb->sk);
1353 struct {
1354 unsigned long ifindex;
1355 } *ctx = (void *)cb->ctx;
1356 struct net_device *dev;
1357 struct mpls_dev *mdev;
1358 int err = 0;
1359
1360 if (cb->strict_check) {
1361 struct netlink_ext_ack *extack = cb->extack;
1362 struct netconfmsg *ncm;
1363
1364 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
1365 NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request");
1366 return -EINVAL;
1367 }
1368
1369 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
1370 NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request");
1371 return -EINVAL;
1372 }
1373 }
1374
1375 rcu_read_lock();
1376 for_each_netdev_dump(net, dev, ctx->ifindex) {
1377 mdev = mpls_dev_rcu(dev);
1378 if (!mdev)
1379 continue;
1380 err = mpls_netconf_fill_devconf(skb, mdev,
1381 NETLINK_CB(cb->skb).portid,
1382 nlh->nlmsg_seq,
1383 RTM_NEWNETCONF,
1384 NLM_F_MULTI,
1385 NETCONFA_ALL);
1386 if (err < 0)
1387 break;
1388 }
1389 rcu_read_unlock();
1390
1391 return err;
1392 }
1393
1394 #define MPLS_PERDEV_SYSCTL_OFFSET(field) \
1395 (&((struct mpls_dev *)0)->field)
1396
mpls_conf_proc(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)1397 static int mpls_conf_proc(const struct ctl_table *ctl, int write,
1398 void *buffer, size_t *lenp, loff_t *ppos)
1399 {
1400 int oval = *(int *)ctl->data;
1401 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1402
1403 if (write) {
1404 struct mpls_dev *mdev = ctl->extra1;
1405 int i = (int *)ctl->data - (int *)mdev;
1406 struct net *net = ctl->extra2;
1407 int val = *(int *)ctl->data;
1408
1409 if (i == offsetof(struct mpls_dev, input_enabled) &&
1410 val != oval) {
1411 mpls_netconf_notify_devconf(net, RTM_NEWNETCONF,
1412 NETCONFA_INPUT, mdev);
1413 }
1414 }
1415
1416 return ret;
1417 }
1418
1419 static const struct ctl_table mpls_dev_table[] = {
1420 {
1421 .procname = "input",
1422 .maxlen = sizeof(int),
1423 .mode = 0644,
1424 .proc_handler = mpls_conf_proc,
1425 .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled),
1426 },
1427 };
1428
mpls_dev_sysctl_register(struct net_device * dev,struct mpls_dev * mdev)1429 static int mpls_dev_sysctl_register(struct net_device *dev,
1430 struct mpls_dev *mdev)
1431 {
1432 char path[sizeof("net/mpls/conf/") + IFNAMSIZ];
1433 size_t table_size = ARRAY_SIZE(mpls_dev_table);
1434 struct net *net = dev_net(dev);
1435 struct ctl_table *table;
1436 int i;
1437
1438 table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL);
1439 if (!table)
1440 goto out;
1441
1442 /* Table data contains only offsets relative to the base of
1443 * the mdev at this point, so make them absolute.
1444 */
1445 for (i = 0; i < table_size; i++) {
1446 table[i].data = (char *)mdev + (uintptr_t)table[i].data;
1447 table[i].extra1 = mdev;
1448 table[i].extra2 = net;
1449 }
1450
1451 snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
1452
1453 mdev->sysctl = register_net_sysctl_sz(net, path, table, table_size);
1454 if (!mdev->sysctl)
1455 goto free;
1456
1457 mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, mdev);
1458 return 0;
1459
1460 free:
1461 kfree(table);
1462 out:
1463 mdev->sysctl = NULL;
1464 return -ENOBUFS;
1465 }
1466
mpls_dev_sysctl_unregister(struct net_device * dev,struct mpls_dev * mdev)1467 static void mpls_dev_sysctl_unregister(struct net_device *dev,
1468 struct mpls_dev *mdev)
1469 {
1470 struct net *net = dev_net(dev);
1471 const struct ctl_table *table;
1472
1473 if (!mdev->sysctl)
1474 return;
1475
1476 table = mdev->sysctl->ctl_table_arg;
1477 unregister_net_sysctl_table(mdev->sysctl);
1478 kfree(table);
1479
1480 mpls_netconf_notify_devconf(net, RTM_DELNETCONF, 0, mdev);
1481 }
1482
mpls_add_dev(struct net_device * dev)1483 static struct mpls_dev *mpls_add_dev(struct net_device *dev)
1484 {
1485 struct mpls_dev *mdev;
1486 int err = -ENOMEM;
1487 int i;
1488
1489 mdev = kzalloc_obj(*mdev);
1490 if (!mdev)
1491 return ERR_PTR(err);
1492
1493 mdev->stats = alloc_percpu(struct mpls_pcpu_stats);
1494 if (!mdev->stats)
1495 goto free;
1496
1497 for_each_possible_cpu(i) {
1498 struct mpls_pcpu_stats *mpls_stats;
1499
1500 mpls_stats = per_cpu_ptr(mdev->stats, i);
1501 u64_stats_init(&mpls_stats->syncp);
1502 }
1503
1504 mdev->dev = dev;
1505
1506 err = mpls_dev_sysctl_register(dev, mdev);
1507 if (err)
1508 goto free;
1509
1510 rcu_assign_pointer(dev->mpls_ptr, mdev);
1511
1512 return mdev;
1513
1514 free:
1515 free_percpu(mdev->stats);
1516 kfree(mdev);
1517 return ERR_PTR(err);
1518 }
1519
mpls_dev_destroy_rcu(struct rcu_head * head)1520 static void mpls_dev_destroy_rcu(struct rcu_head *head)
1521 {
1522 struct mpls_dev *mdev = container_of(head, struct mpls_dev, rcu);
1523
1524 free_percpu(mdev->stats);
1525 kfree(mdev);
1526 }
1527
mpls_ifdown(struct net_device * dev,int event)1528 static int mpls_ifdown(struct net_device *dev, int event)
1529 {
1530 struct net *net = dev_net(dev);
1531 unsigned int index;
1532
1533 for (index = 0; index < net->mpls.platform_labels; index++) {
1534 struct mpls_route *rt;
1535 bool nh_del = false;
1536 u8 alive = 0;
1537
1538 rt = mpls_route_input(net, index);
1539 if (!rt)
1540 continue;
1541
1542 if (event == NETDEV_UNREGISTER) {
1543 u8 deleted = 0;
1544
1545 for_nexthops(rt) {
1546 if (!nh->nh_dev || nh->nh_dev == dev)
1547 deleted++;
1548 if (nh->nh_dev == dev)
1549 nh_del = true;
1550 } endfor_nexthops(rt);
1551
1552 /* if there are no more nexthops, delete the route */
1553 if (deleted == rt->rt_nhn) {
1554 mpls_route_update(net, index, NULL, NULL);
1555 continue;
1556 }
1557
1558 if (nh_del) {
1559 size_t size = sizeof(*rt) + rt->rt_nhn *
1560 rt->rt_nh_size;
1561 struct mpls_route *orig = rt;
1562
1563 rt = kmemdup(orig, size, GFP_KERNEL);
1564 if (!rt)
1565 return -ENOMEM;
1566 }
1567 }
1568
1569 change_nexthops(rt) {
1570 unsigned int nh_flags = nh->nh_flags;
1571
1572 if (nh->nh_dev != dev) {
1573 if (nh_del)
1574 netdev_hold(nh->nh_dev, &nh->nh_dev_tracker,
1575 GFP_KERNEL);
1576 goto next;
1577 }
1578
1579 switch (event) {
1580 case NETDEV_DOWN:
1581 case NETDEV_UNREGISTER:
1582 nh_flags |= RTNH_F_DEAD;
1583 fallthrough;
1584 case NETDEV_CHANGE:
1585 nh_flags |= RTNH_F_LINKDOWN;
1586 break;
1587 }
1588 if (event == NETDEV_UNREGISTER)
1589 nh->nh_dev = NULL;
1590
1591 if (nh->nh_flags != nh_flags)
1592 WRITE_ONCE(nh->nh_flags, nh_flags);
1593 next:
1594 if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)))
1595 alive++;
1596 } endfor_nexthops(rt);
1597
1598 WRITE_ONCE(rt->rt_nhn_alive, alive);
1599
1600 if (nh_del)
1601 mpls_route_update(net, index, rt, NULL);
1602 }
1603
1604 return 0;
1605 }
1606
mpls_ifup(struct net_device * dev,unsigned int flags)1607 static void mpls_ifup(struct net_device *dev, unsigned int flags)
1608 {
1609 struct net *net = dev_net(dev);
1610 unsigned int index;
1611 u8 alive;
1612
1613 for (index = 0; index < net->mpls.platform_labels; index++) {
1614 struct mpls_route *rt;
1615
1616 rt = mpls_route_input(net, index);
1617 if (!rt)
1618 continue;
1619
1620 alive = 0;
1621 change_nexthops(rt) {
1622 unsigned int nh_flags = nh->nh_flags;
1623
1624 if (!(nh_flags & flags)) {
1625 alive++;
1626 continue;
1627 }
1628 if (nh->nh_dev != dev)
1629 continue;
1630 alive++;
1631 nh_flags &= ~flags;
1632 WRITE_ONCE(nh->nh_flags, nh_flags);
1633 } endfor_nexthops(rt);
1634
1635 WRITE_ONCE(rt->rt_nhn_alive, alive);
1636 }
1637 }
1638
mpls_dev_notify(struct notifier_block * this,unsigned long event,void * ptr)1639 static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
1640 void *ptr)
1641 {
1642 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1643 struct net *net = dev_net(dev);
1644 struct mpls_dev *mdev;
1645 unsigned int flags;
1646 int err;
1647
1648 mutex_lock(&net->mpls.platform_mutex);
1649
1650 if (event == NETDEV_REGISTER) {
1651 mdev = mpls_add_dev(dev);
1652 if (IS_ERR(mdev)) {
1653 err = PTR_ERR(mdev);
1654 goto err;
1655 }
1656
1657 goto out;
1658 }
1659
1660 mdev = mpls_dev_get(net, dev);
1661 if (!mdev)
1662 goto out;
1663
1664 switch (event) {
1665
1666 case NETDEV_DOWN:
1667 err = mpls_ifdown(dev, event);
1668 if (err)
1669 goto err;
1670 break;
1671 case NETDEV_UP:
1672 flags = netif_get_flags(dev);
1673 if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1674 mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
1675 else
1676 mpls_ifup(dev, RTNH_F_DEAD);
1677 break;
1678 case NETDEV_CHANGE:
1679 flags = netif_get_flags(dev);
1680 if (flags & (IFF_RUNNING | IFF_LOWER_UP)) {
1681 mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
1682 } else {
1683 err = mpls_ifdown(dev, event);
1684 if (err)
1685 goto err;
1686 }
1687 break;
1688 case NETDEV_UNREGISTER:
1689 err = mpls_ifdown(dev, event);
1690 if (err)
1691 goto err;
1692
1693 mdev = mpls_dev_get(net, dev);
1694 if (mdev) {
1695 mpls_dev_sysctl_unregister(dev, mdev);
1696 RCU_INIT_POINTER(dev->mpls_ptr, NULL);
1697 call_rcu(&mdev->rcu, mpls_dev_destroy_rcu);
1698 }
1699 break;
1700 case NETDEV_CHANGENAME:
1701 mdev = mpls_dev_get(net, dev);
1702 if (mdev) {
1703 mpls_dev_sysctl_unregister(dev, mdev);
1704 err = mpls_dev_sysctl_register(dev, mdev);
1705 if (err)
1706 goto err;
1707 }
1708 break;
1709 }
1710
1711 out:
1712 mutex_unlock(&net->mpls.platform_mutex);
1713 return NOTIFY_OK;
1714
1715 err:
1716 mutex_unlock(&net->mpls.platform_mutex);
1717 return notifier_from_errno(err);
1718 }
1719
1720 static struct notifier_block mpls_dev_notifier = {
1721 .notifier_call = mpls_dev_notify,
1722 };
1723
nla_put_via(struct sk_buff * skb,u8 table,const void * addr,int alen)1724 static int nla_put_via(struct sk_buff *skb,
1725 u8 table, const void *addr, int alen)
1726 {
1727 static const int table_to_family[NEIGH_NR_TABLES + 1] = {
1728 AF_INET, AF_INET6, AF_PACKET,
1729 };
1730 struct nlattr *nla;
1731 struct rtvia *via;
1732 int family = AF_UNSPEC;
1733
1734 nla = nla_reserve(skb, RTA_VIA, alen + 2);
1735 if (!nla)
1736 return -EMSGSIZE;
1737
1738 if (table <= NEIGH_NR_TABLES)
1739 family = table_to_family[table];
1740
1741 via = nla_data(nla);
1742 via->rtvia_family = family;
1743 memcpy(via->rtvia_addr, addr, alen);
1744 return 0;
1745 }
1746
nla_put_labels(struct sk_buff * skb,int attrtype,u8 labels,const u32 label[])1747 int nla_put_labels(struct sk_buff *skb, int attrtype,
1748 u8 labels, const u32 label[])
1749 {
1750 struct nlattr *nla;
1751 struct mpls_shim_hdr *nla_label;
1752 bool bos;
1753 int i;
1754 nla = nla_reserve(skb, attrtype, labels*4);
1755 if (!nla)
1756 return -EMSGSIZE;
1757
1758 nla_label = nla_data(nla);
1759 bos = true;
1760 for (i = labels - 1; i >= 0; i--) {
1761 nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos);
1762 bos = false;
1763 }
1764
1765 return 0;
1766 }
1767 EXPORT_SYMBOL_GPL(nla_put_labels);
1768
nla_get_labels(const struct nlattr * nla,u8 max_labels,u8 * labels,u32 label[],struct netlink_ext_ack * extack)1769 int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels,
1770 u32 label[], struct netlink_ext_ack *extack)
1771 {
1772 unsigned len = nla_len(nla);
1773 struct mpls_shim_hdr *nla_label;
1774 u8 nla_labels;
1775 bool bos;
1776 int i;
1777
1778 /* len needs to be an even multiple of 4 (the label size). Number
1779 * of labels is a u8 so check for overflow.
1780 */
1781 if (len & 3 || len / 4 > 255) {
1782 NL_SET_ERR_MSG_ATTR(extack, nla,
1783 "Invalid length for labels attribute");
1784 return -EINVAL;
1785 }
1786
1787 /* Limit the number of new labels allowed */
1788 nla_labels = len/4;
1789 if (nla_labels > max_labels) {
1790 NL_SET_ERR_MSG(extack, "Too many labels");
1791 return -EINVAL;
1792 }
1793
1794 /* when label == NULL, caller wants number of labels */
1795 if (!label)
1796 goto out;
1797
1798 nla_label = nla_data(nla);
1799 bos = true;
1800 for (i = nla_labels - 1; i >= 0; i--, bos = false) {
1801 struct mpls_entry_decoded dec;
1802 dec = mpls_entry_decode(nla_label + i);
1803
1804 /* Ensure the bottom of stack flag is properly set
1805 * and ttl and tc are both clear.
1806 */
1807 if (dec.ttl) {
1808 NL_SET_ERR_MSG_ATTR(extack, nla,
1809 "TTL in label must be 0");
1810 return -EINVAL;
1811 }
1812
1813 if (dec.tc) {
1814 NL_SET_ERR_MSG_ATTR(extack, nla,
1815 "Traffic class in label must be 0");
1816 return -EINVAL;
1817 }
1818
1819 if (dec.bos != bos) {
1820 NL_SET_BAD_ATTR(extack, nla);
1821 if (bos) {
1822 NL_SET_ERR_MSG(extack,
1823 "BOS bit must be set in first label");
1824 } else {
1825 NL_SET_ERR_MSG(extack,
1826 "BOS bit can only be set in first label");
1827 }
1828 return -EINVAL;
1829 }
1830
1831 switch (dec.label) {
1832 case MPLS_LABEL_IMPLNULL:
1833 /* RFC3032: This is a label that an LSR may
1834 * assign and distribute, but which never
1835 * actually appears in the encapsulation.
1836 */
1837 NL_SET_ERR_MSG_ATTR(extack, nla,
1838 "Implicit NULL Label (3) can not be used in encapsulation");
1839 return -EINVAL;
1840 }
1841
1842 label[i] = dec.label;
1843 }
1844 out:
1845 *labels = nla_labels;
1846 return 0;
1847 }
1848 EXPORT_SYMBOL_GPL(nla_get_labels);
1849
rtm_to_route_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct mpls_route_config * cfg,struct netlink_ext_ack * extack)1850 static int rtm_to_route_config(struct sk_buff *skb,
1851 struct nlmsghdr *nlh,
1852 struct mpls_route_config *cfg,
1853 struct netlink_ext_ack *extack)
1854 {
1855 struct rtmsg *rtm;
1856 struct nlattr *tb[RTA_MAX+1];
1857 int index;
1858 int err;
1859
1860 err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
1861 rtm_mpls_policy, extack);
1862 if (err < 0)
1863 goto errout;
1864
1865 err = -EINVAL;
1866 rtm = nlmsg_data(nlh);
1867
1868 if (rtm->rtm_family != AF_MPLS) {
1869 NL_SET_ERR_MSG(extack, "Invalid address family in rtmsg");
1870 goto errout;
1871 }
1872 if (rtm->rtm_dst_len != 20) {
1873 NL_SET_ERR_MSG(extack, "rtm_dst_len must be 20 for MPLS");
1874 goto errout;
1875 }
1876 if (rtm->rtm_src_len != 0) {
1877 NL_SET_ERR_MSG(extack, "rtm_src_len must be 0 for MPLS");
1878 goto errout;
1879 }
1880 if (rtm->rtm_tos != 0) {
1881 NL_SET_ERR_MSG(extack, "rtm_tos must be 0 for MPLS");
1882 goto errout;
1883 }
1884 if (rtm->rtm_table != RT_TABLE_MAIN) {
1885 NL_SET_ERR_MSG(extack,
1886 "MPLS only supports the main route table");
1887 goto errout;
1888 }
1889 /* Any value is acceptable for rtm_protocol */
1890
1891 /* As mpls uses destination specific addresses
1892 * (or source specific address in the case of multicast)
1893 * all addresses have universal scope.
1894 */
1895 if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) {
1896 NL_SET_ERR_MSG(extack,
1897 "Invalid route scope - MPLS only supports UNIVERSE");
1898 goto errout;
1899 }
1900 if (rtm->rtm_type != RTN_UNICAST) {
1901 NL_SET_ERR_MSG(extack,
1902 "Invalid route type - MPLS only supports UNICAST");
1903 goto errout;
1904 }
1905 if (rtm->rtm_flags != 0) {
1906 NL_SET_ERR_MSG(extack, "rtm_flags must be 0 for MPLS");
1907 goto errout;
1908 }
1909
1910 cfg->rc_label = LABEL_NOT_SPECIFIED;
1911 cfg->rc_protocol = rtm->rtm_protocol;
1912 cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC;
1913 cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
1914 cfg->rc_nlflags = nlh->nlmsg_flags;
1915 cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid;
1916 cfg->rc_nlinfo.nlh = nlh;
1917 cfg->rc_nlinfo.nl_net = sock_net(skb->sk);
1918
1919 for (index = 0; index <= RTA_MAX; index++) {
1920 struct nlattr *nla = tb[index];
1921 if (!nla)
1922 continue;
1923
1924 switch (index) {
1925 case RTA_OIF:
1926 cfg->rc_ifindex = nla_get_u32(nla);
1927 break;
1928 case RTA_NEWDST:
1929 if (nla_get_labels(nla, MAX_NEW_LABELS,
1930 &cfg->rc_output_labels,
1931 cfg->rc_output_label, extack))
1932 goto errout;
1933 break;
1934 case RTA_DST:
1935 {
1936 u8 label_count;
1937 if (nla_get_labels(nla, 1, &label_count,
1938 &cfg->rc_label, extack))
1939 goto errout;
1940
1941 if (!mpls_label_ok(cfg->rc_nlinfo.nl_net,
1942 &cfg->rc_label, extack))
1943 goto errout;
1944 break;
1945 }
1946 case RTA_GATEWAY:
1947 NL_SET_ERR_MSG(extack, "MPLS does not support RTA_GATEWAY attribute");
1948 goto errout;
1949 case RTA_VIA:
1950 {
1951 if (nla_get_via(nla, &cfg->rc_via_alen,
1952 &cfg->rc_via_table, cfg->rc_via,
1953 extack))
1954 goto errout;
1955 break;
1956 }
1957 case RTA_MULTIPATH:
1958 {
1959 cfg->rc_mp = nla_data(nla);
1960 cfg->rc_mp_len = nla_len(nla);
1961 break;
1962 }
1963 case RTA_TTL_PROPAGATE:
1964 {
1965 u8 ttl_propagate = nla_get_u8(nla);
1966
1967 if (ttl_propagate > 1) {
1968 NL_SET_ERR_MSG_ATTR(extack, nla,
1969 "RTA_TTL_PROPAGATE can only be 0 or 1");
1970 goto errout;
1971 }
1972 cfg->rc_ttl_propagate = ttl_propagate ?
1973 MPLS_TTL_PROP_ENABLED :
1974 MPLS_TTL_PROP_DISABLED;
1975 break;
1976 }
1977 default:
1978 NL_SET_ERR_MSG_ATTR(extack, nla, "Unknown attribute");
1979 /* Unsupported attribute */
1980 goto errout;
1981 }
1982 }
1983
1984 err = 0;
1985 errout:
1986 return err;
1987 }
1988
mpls_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1989 static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1990 struct netlink_ext_ack *extack)
1991 {
1992 struct net *net = sock_net(skb->sk);
1993 struct mpls_route_config *cfg;
1994 int err;
1995
1996 cfg = kzalloc_obj(*cfg);
1997 if (!cfg)
1998 return -ENOMEM;
1999
2000 err = rtm_to_route_config(skb, nlh, cfg, extack);
2001 if (err < 0)
2002 goto out;
2003
2004 mutex_lock(&net->mpls.platform_mutex);
2005 err = mpls_route_del(cfg, extack);
2006 mutex_unlock(&net->mpls.platform_mutex);
2007 out:
2008 kfree(cfg);
2009
2010 return err;
2011 }
2012
2013
mpls_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2014 static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
2015 struct netlink_ext_ack *extack)
2016 {
2017 struct net *net = sock_net(skb->sk);
2018 struct mpls_route_config *cfg;
2019 int err;
2020
2021 cfg = kzalloc_obj(*cfg);
2022 if (!cfg)
2023 return -ENOMEM;
2024
2025 err = rtm_to_route_config(skb, nlh, cfg, extack);
2026 if (err < 0)
2027 goto out;
2028
2029 mutex_lock(&net->mpls.platform_mutex);
2030 err = mpls_route_add(cfg, extack);
2031 mutex_unlock(&net->mpls.platform_mutex);
2032 out:
2033 kfree(cfg);
2034
2035 return err;
2036 }
2037
mpls_dump_route(struct sk_buff * skb,u32 portid,u32 seq,int event,u32 label,struct mpls_route * rt,int flags)2038 static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
2039 u32 label, struct mpls_route *rt, int flags)
2040 {
2041 struct net_device *dev;
2042 struct nlmsghdr *nlh;
2043 struct rtmsg *rtm;
2044
2045 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
2046 if (nlh == NULL)
2047 return -EMSGSIZE;
2048
2049 rtm = nlmsg_data(nlh);
2050 rtm->rtm_family = AF_MPLS;
2051 rtm->rtm_dst_len = 20;
2052 rtm->rtm_src_len = 0;
2053 rtm->rtm_tos = 0;
2054 rtm->rtm_table = RT_TABLE_MAIN;
2055 rtm->rtm_protocol = rt->rt_protocol;
2056 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2057 rtm->rtm_type = RTN_UNICAST;
2058 rtm->rtm_flags = 0;
2059
2060 if (nla_put_labels(skb, RTA_DST, 1, &label))
2061 goto nla_put_failure;
2062
2063 if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) {
2064 bool ttl_propagate =
2065 rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED;
2066
2067 if (nla_put_u8(skb, RTA_TTL_PROPAGATE,
2068 ttl_propagate))
2069 goto nla_put_failure;
2070 }
2071 if (rt->rt_nhn == 1) {
2072 const struct mpls_nh *nh = rt->rt_nh;
2073
2074 if (nh->nh_labels &&
2075 nla_put_labels(skb, RTA_NEWDST, nh->nh_labels,
2076 nh->nh_label))
2077 goto nla_put_failure;
2078 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC &&
2079 nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
2080 nh->nh_via_alen))
2081 goto nla_put_failure;
2082 dev = nh->nh_dev;
2083 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
2084 goto nla_put_failure;
2085 if (nh->nh_flags & RTNH_F_LINKDOWN)
2086 rtm->rtm_flags |= RTNH_F_LINKDOWN;
2087 if (nh->nh_flags & RTNH_F_DEAD)
2088 rtm->rtm_flags |= RTNH_F_DEAD;
2089 } else {
2090 struct rtnexthop *rtnh;
2091 struct nlattr *mp;
2092 u8 linkdown = 0;
2093 u8 dead = 0;
2094
2095 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
2096 if (!mp)
2097 goto nla_put_failure;
2098
2099 for_nexthops(rt) {
2100 dev = nh->nh_dev;
2101 if (!dev)
2102 continue;
2103
2104 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
2105 if (!rtnh)
2106 goto nla_put_failure;
2107
2108 rtnh->rtnh_ifindex = dev->ifindex;
2109 if (nh->nh_flags & RTNH_F_LINKDOWN) {
2110 rtnh->rtnh_flags |= RTNH_F_LINKDOWN;
2111 linkdown++;
2112 }
2113 if (nh->nh_flags & RTNH_F_DEAD) {
2114 rtnh->rtnh_flags |= RTNH_F_DEAD;
2115 dead++;
2116 }
2117
2118 if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST,
2119 nh->nh_labels,
2120 nh->nh_label))
2121 goto nla_put_failure;
2122 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC &&
2123 nla_put_via(skb, nh->nh_via_table,
2124 mpls_nh_via(rt, nh),
2125 nh->nh_via_alen))
2126 goto nla_put_failure;
2127
2128 /* length of rtnetlink header + attributes */
2129 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
2130 } endfor_nexthops(rt);
2131
2132 if (linkdown == rt->rt_nhn)
2133 rtm->rtm_flags |= RTNH_F_LINKDOWN;
2134 if (dead == rt->rt_nhn)
2135 rtm->rtm_flags |= RTNH_F_DEAD;
2136
2137 nla_nest_end(skb, mp);
2138 }
2139
2140 nlmsg_end(skb, nlh);
2141 return 0;
2142
2143 nla_put_failure:
2144 nlmsg_cancel(skb, nlh);
2145 return -EMSGSIZE;
2146 }
2147
2148 #if IS_ENABLED(CONFIG_INET)
mpls_valid_fib_dump_req(struct net * net,const struct nlmsghdr * nlh,struct fib_dump_filter * filter,struct netlink_callback * cb)2149 static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
2150 struct fib_dump_filter *filter,
2151 struct netlink_callback *cb)
2152 {
2153 return ip_valid_fib_dump_req(net, nlh, filter, cb);
2154 }
2155 #else
mpls_valid_fib_dump_req(struct net * net,const struct nlmsghdr * nlh,struct fib_dump_filter * filter,struct netlink_callback * cb)2156 static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
2157 struct fib_dump_filter *filter,
2158 struct netlink_callback *cb)
2159 {
2160 struct netlink_ext_ack *extack = cb->extack;
2161 struct nlattr *tb[RTA_MAX + 1];
2162 struct rtmsg *rtm;
2163 int err, i;
2164
2165 rtm = nlmsg_payload(nlh, sizeof(*rtm));
2166 if (!rtm) {
2167 NL_SET_ERR_MSG_MOD(extack, "Invalid header for FIB dump request");
2168 return -EINVAL;
2169 }
2170
2171 if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
2172 rtm->rtm_table || rtm->rtm_scope || rtm->rtm_type ||
2173 rtm->rtm_flags) {
2174 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for FIB dump request");
2175 return -EINVAL;
2176 }
2177
2178 if (rtm->rtm_protocol) {
2179 filter->protocol = rtm->rtm_protocol;
2180 filter->filter_set = 1;
2181 cb->answer_flags = NLM_F_DUMP_FILTERED;
2182 }
2183
2184 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2185 rtm_mpls_policy, extack);
2186 if (err < 0)
2187 return err;
2188
2189 for (i = 0; i <= RTA_MAX; ++i) {
2190 int ifindex;
2191
2192 if (i == RTA_OIF) {
2193 ifindex = nla_get_u32(tb[i]);
2194 filter->dev = dev_get_by_index_rcu(net, ifindex);
2195 if (!filter->dev)
2196 return -ENODEV;
2197 filter->filter_set = 1;
2198 } else if (tb[i]) {
2199 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request");
2200 return -EINVAL;
2201 }
2202 }
2203
2204 return 0;
2205 }
2206 #endif
2207
mpls_rt_uses_dev(struct mpls_route * rt,const struct net_device * dev)2208 static bool mpls_rt_uses_dev(struct mpls_route *rt,
2209 const struct net_device *dev)
2210 {
2211 if (rt->rt_nhn == 1) {
2212 struct mpls_nh *nh = rt->rt_nh;
2213
2214 if (nh->nh_dev == dev)
2215 return true;
2216 } else {
2217 for_nexthops(rt) {
2218 if (nh->nh_dev == dev)
2219 return true;
2220 } endfor_nexthops(rt);
2221 }
2222
2223 return false;
2224 }
2225
mpls_dump_routes(struct sk_buff * skb,struct netlink_callback * cb)2226 static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
2227 {
2228 const struct nlmsghdr *nlh = cb->nlh;
2229 struct net *net = sock_net(skb->sk);
2230 struct mpls_route __rcu **platform_label;
2231 struct fib_dump_filter filter = {
2232 .rtnl_held = false,
2233 };
2234 unsigned int flags = NLM_F_MULTI;
2235 size_t platform_labels;
2236 unsigned int index;
2237 int err;
2238
2239 rcu_read_lock();
2240
2241 if (cb->strict_check) {
2242 err = mpls_valid_fib_dump_req(net, nlh, &filter, cb);
2243 if (err < 0)
2244 goto err;
2245
2246 /* for MPLS, there is only 1 table with fixed type and flags.
2247 * If either are set in the filter then return nothing.
2248 */
2249 if ((filter.table_id && filter.table_id != RT_TABLE_MAIN) ||
2250 (filter.rt_type && filter.rt_type != RTN_UNICAST) ||
2251 filter.flags)
2252 goto unlock;
2253 }
2254
2255 index = cb->args[0];
2256 if (index < MPLS_LABEL_FIRST_UNRESERVED)
2257 index = MPLS_LABEL_FIRST_UNRESERVED;
2258
2259 platform_label = mpls_platform_label_rcu(net, &platform_labels);
2260
2261 if (filter.filter_set)
2262 flags |= NLM_F_DUMP_FILTERED;
2263
2264 for (; index < platform_labels; index++) {
2265 struct mpls_route *rt;
2266
2267 rt = rcu_dereference(platform_label[index]);
2268 if (!rt)
2269 continue;
2270
2271 if ((filter.dev && !mpls_rt_uses_dev(rt, filter.dev)) ||
2272 (filter.protocol && rt->rt_protocol != filter.protocol))
2273 continue;
2274
2275 if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid,
2276 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2277 index, rt, flags) < 0)
2278 break;
2279 }
2280 cb->args[0] = index;
2281
2282 unlock:
2283 rcu_read_unlock();
2284 return skb->len;
2285
2286 err:
2287 rcu_read_unlock();
2288 return err;
2289 }
2290
lfib_nlmsg_size(struct mpls_route * rt)2291 static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
2292 {
2293 size_t payload =
2294 NLMSG_ALIGN(sizeof(struct rtmsg))
2295 + nla_total_size(4) /* RTA_DST */
2296 + nla_total_size(1); /* RTA_TTL_PROPAGATE */
2297
2298 if (rt->rt_nhn == 1) {
2299 struct mpls_nh *nh = rt->rt_nh;
2300
2301 if (nh->nh_dev)
2302 payload += nla_total_size(4); /* RTA_OIF */
2303 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) /* RTA_VIA */
2304 payload += nla_total_size(2 + nh->nh_via_alen);
2305 if (nh->nh_labels) /* RTA_NEWDST */
2306 payload += nla_total_size(nh->nh_labels * 4);
2307 } else {
2308 /* each nexthop is packed in an attribute */
2309 size_t nhsize = 0;
2310
2311 for_nexthops(rt) {
2312 if (!nh->nh_dev)
2313 continue;
2314 nhsize += nla_total_size(sizeof(struct rtnexthop));
2315 /* RTA_VIA */
2316 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC)
2317 nhsize += nla_total_size(2 + nh->nh_via_alen);
2318 if (nh->nh_labels)
2319 nhsize += nla_total_size(nh->nh_labels * 4);
2320 } endfor_nexthops(rt);
2321 /* nested attribute */
2322 payload += nla_total_size(nhsize);
2323 }
2324
2325 return payload;
2326 }
2327
rtmsg_lfib(int event,u32 label,struct mpls_route * rt,struct nlmsghdr * nlh,struct net * net,u32 portid,unsigned int nlm_flags)2328 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
2329 struct nlmsghdr *nlh, struct net *net, u32 portid,
2330 unsigned int nlm_flags)
2331 {
2332 struct sk_buff *skb;
2333 u32 seq = nlh ? nlh->nlmsg_seq : 0;
2334 int err = -ENOBUFS;
2335
2336 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL);
2337 if (skb == NULL)
2338 goto errout;
2339
2340 err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags);
2341 if (err < 0) {
2342 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */
2343 WARN_ON(err == -EMSGSIZE);
2344 kfree_skb(skb);
2345 goto errout;
2346 }
2347 rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL);
2348
2349 return;
2350 errout:
2351 rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
2352 }
2353
mpls_valid_getroute_req(struct sk_buff * skb,const struct nlmsghdr * nlh,struct nlattr ** tb,struct netlink_ext_ack * extack)2354 static int mpls_valid_getroute_req(struct sk_buff *skb,
2355 const struct nlmsghdr *nlh,
2356 struct nlattr **tb,
2357 struct netlink_ext_ack *extack)
2358 {
2359 struct rtmsg *rtm;
2360 int i, err;
2361
2362 rtm = nlmsg_payload(nlh, sizeof(*rtm));
2363 if (!rtm) {
2364 NL_SET_ERR_MSG_MOD(extack,
2365 "Invalid header for get route request");
2366 return -EINVAL;
2367 }
2368
2369 if (!netlink_strict_get_check(skb))
2370 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
2371 rtm_mpls_policy, extack);
2372
2373 if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) ||
2374 rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_table ||
2375 rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type) {
2376 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
2377 return -EINVAL;
2378 }
2379 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
2380 NL_SET_ERR_MSG_MOD(extack,
2381 "Invalid flags for get route request");
2382 return -EINVAL;
2383 }
2384
2385 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2386 rtm_mpls_policy, extack);
2387 if (err)
2388 return err;
2389
2390 if ((tb[RTA_DST] || tb[RTA_NEWDST]) && !rtm->rtm_dst_len) {
2391 NL_SET_ERR_MSG_MOD(extack, "rtm_dst_len must be 20 for MPLS");
2392 return -EINVAL;
2393 }
2394
2395 for (i = 0; i <= RTA_MAX; i++) {
2396 if (!tb[i])
2397 continue;
2398
2399 switch (i) {
2400 case RTA_DST:
2401 case RTA_NEWDST:
2402 break;
2403 default:
2404 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
2405 return -EINVAL;
2406 }
2407 }
2408
2409 return 0;
2410 }
2411
mpls_getroute(struct sk_buff * in_skb,struct nlmsghdr * in_nlh,struct netlink_ext_ack * extack)2412 static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
2413 struct netlink_ext_ack *extack)
2414 {
2415 struct net *net = sock_net(in_skb->sk);
2416 u32 portid = NETLINK_CB(in_skb).portid;
2417 u32 in_label = LABEL_NOT_SPECIFIED;
2418 struct nlattr *tb[RTA_MAX + 1];
2419 struct mpls_route *rt = NULL;
2420 u32 labels[MAX_NEW_LABELS];
2421 struct mpls_shim_hdr *hdr;
2422 unsigned int hdr_size = 0;
2423 const struct mpls_nh *nh;
2424 struct net_device *dev;
2425 struct rtmsg *rtm, *r;
2426 struct nlmsghdr *nlh;
2427 struct sk_buff *skb;
2428 u8 n_labels;
2429 int err;
2430
2431 mutex_lock(&net->mpls.platform_mutex);
2432
2433 err = mpls_valid_getroute_req(in_skb, in_nlh, tb, extack);
2434 if (err < 0)
2435 goto errout;
2436
2437 rtm = nlmsg_data(in_nlh);
2438
2439 if (tb[RTA_DST]) {
2440 u8 label_count;
2441
2442 if (nla_get_labels(tb[RTA_DST], 1, &label_count,
2443 &in_label, extack)) {
2444 err = -EINVAL;
2445 goto errout;
2446 }
2447
2448 if (!mpls_label_ok(net, &in_label, extack)) {
2449 err = -EINVAL;
2450 goto errout;
2451 }
2452 }
2453
2454 if (in_label < net->mpls.platform_labels)
2455 rt = mpls_route_input(net, in_label);
2456 if (!rt) {
2457 err = -ENETUNREACH;
2458 goto errout;
2459 }
2460
2461 if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
2462 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL);
2463 if (!skb) {
2464 err = -ENOBUFS;
2465 goto errout;
2466 }
2467
2468 err = mpls_dump_route(skb, portid, in_nlh->nlmsg_seq,
2469 RTM_NEWROUTE, in_label, rt, 0);
2470 if (err < 0) {
2471 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */
2472 WARN_ON(err == -EMSGSIZE);
2473 goto errout_free;
2474 }
2475
2476 err = rtnl_unicast(skb, net, portid);
2477 goto errout;
2478 }
2479
2480 if (tb[RTA_NEWDST]) {
2481 if (nla_get_labels(tb[RTA_NEWDST], MAX_NEW_LABELS, &n_labels,
2482 labels, extack) != 0) {
2483 err = -EINVAL;
2484 goto errout;
2485 }
2486
2487 hdr_size = n_labels * sizeof(struct mpls_shim_hdr);
2488 }
2489
2490 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2491 if (!skb) {
2492 err = -ENOBUFS;
2493 goto errout;
2494 }
2495
2496 skb->protocol = htons(ETH_P_MPLS_UC);
2497
2498 if (hdr_size) {
2499 bool bos;
2500 int i;
2501
2502 if (skb_cow(skb, hdr_size)) {
2503 err = -ENOBUFS;
2504 goto errout_free;
2505 }
2506
2507 skb_reserve(skb, hdr_size);
2508 skb_push(skb, hdr_size);
2509 skb_reset_network_header(skb);
2510
2511 /* Push new labels */
2512 hdr = mpls_hdr(skb);
2513 bos = true;
2514 for (i = n_labels - 1; i >= 0; i--) {
2515 hdr[i] = mpls_entry_encode(labels[i],
2516 1, 0, bos);
2517 bos = false;
2518 }
2519 }
2520
2521 nh = mpls_select_multipath(rt, skb);
2522 if (!nh) {
2523 err = -ENETUNREACH;
2524 goto errout_free;
2525 }
2526
2527 if (hdr_size) {
2528 skb_pull(skb, hdr_size);
2529 skb_reset_network_header(skb);
2530 }
2531
2532 nlh = nlmsg_put(skb, portid, in_nlh->nlmsg_seq,
2533 RTM_NEWROUTE, sizeof(*r), 0);
2534 if (!nlh) {
2535 err = -EMSGSIZE;
2536 goto errout_free;
2537 }
2538
2539 r = nlmsg_data(nlh);
2540 r->rtm_family = AF_MPLS;
2541 r->rtm_dst_len = 20;
2542 r->rtm_src_len = 0;
2543 r->rtm_table = RT_TABLE_MAIN;
2544 r->rtm_type = RTN_UNICAST;
2545 r->rtm_scope = RT_SCOPE_UNIVERSE;
2546 r->rtm_protocol = rt->rt_protocol;
2547 r->rtm_flags = 0;
2548
2549 if (nla_put_labels(skb, RTA_DST, 1, &in_label))
2550 goto nla_put_failure;
2551
2552 if (nh->nh_labels &&
2553 nla_put_labels(skb, RTA_NEWDST, nh->nh_labels,
2554 nh->nh_label))
2555 goto nla_put_failure;
2556
2557 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC &&
2558 nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
2559 nh->nh_via_alen))
2560 goto nla_put_failure;
2561 dev = nh->nh_dev;
2562 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
2563 goto nla_put_failure;
2564
2565 nlmsg_end(skb, nlh);
2566
2567 err = rtnl_unicast(skb, net, portid);
2568 errout:
2569 mutex_unlock(&net->mpls.platform_mutex);
2570 return err;
2571
2572 nla_put_failure:
2573 nlmsg_cancel(skb, nlh);
2574 err = -EMSGSIZE;
2575 errout_free:
2576 mutex_unlock(&net->mpls.platform_mutex);
2577 kfree_skb(skb);
2578 return err;
2579 }
2580
resize_platform_label_table(struct net * net,size_t limit)2581 static int resize_platform_label_table(struct net *net, size_t limit)
2582 {
2583 size_t size = sizeof(struct mpls_route *) * limit;
2584 size_t old_limit;
2585 size_t cp_size;
2586 struct mpls_route __rcu **labels = NULL, **old;
2587 struct mpls_route *rt0 = NULL, *rt2 = NULL;
2588 unsigned index;
2589
2590 if (size) {
2591 labels = kvzalloc(size, GFP_KERNEL);
2592 if (!labels)
2593 goto nolabels;
2594 }
2595
2596 /* In case the predefined labels need to be populated */
2597 if (limit > MPLS_LABEL_IPV4NULL) {
2598 struct net_device *lo = net->loopback_dev;
2599
2600 rt0 = mpls_rt_alloc(1, lo->addr_len, 0);
2601 if (IS_ERR(rt0))
2602 goto nort0;
2603
2604 rt0->rt_nh->nh_dev = lo;
2605 netdev_hold(lo, &rt0->rt_nh->nh_dev_tracker, GFP_KERNEL);
2606 rt0->rt_protocol = RTPROT_KERNEL;
2607 rt0->rt_payload_type = MPT_IPV4;
2608 rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
2609 rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
2610 rt0->rt_nh->nh_via_alen = lo->addr_len;
2611 memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
2612 lo->addr_len);
2613 }
2614 if (limit > MPLS_LABEL_IPV6NULL) {
2615 struct net_device *lo = net->loopback_dev;
2616
2617 rt2 = mpls_rt_alloc(1, lo->addr_len, 0);
2618 if (IS_ERR(rt2))
2619 goto nort2;
2620
2621 rt2->rt_nh->nh_dev = lo;
2622 netdev_hold(lo, &rt2->rt_nh->nh_dev_tracker, GFP_KERNEL);
2623 rt2->rt_protocol = RTPROT_KERNEL;
2624 rt2->rt_payload_type = MPT_IPV6;
2625 rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
2626 rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
2627 rt2->rt_nh->nh_via_alen = lo->addr_len;
2628 memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
2629 lo->addr_len);
2630 }
2631
2632 mutex_lock(&net->mpls.platform_mutex);
2633
2634 /* Remember the original table */
2635 old = mpls_dereference(net, net->mpls.platform_label);
2636 old_limit = net->mpls.platform_labels;
2637
2638 /* Free any labels beyond the new table */
2639 for (index = limit; index < old_limit; index++)
2640 mpls_route_update(net, index, NULL, NULL);
2641
2642 /* Copy over the old labels */
2643 cp_size = size;
2644 if (old_limit < limit)
2645 cp_size = old_limit * sizeof(struct mpls_route *);
2646
2647 memcpy(labels, old, cp_size);
2648
2649 /* If needed set the predefined labels */
2650 if ((old_limit <= MPLS_LABEL_IPV6NULL) &&
2651 (limit > MPLS_LABEL_IPV6NULL)) {
2652 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2);
2653 rt2 = NULL;
2654 }
2655
2656 if ((old_limit <= MPLS_LABEL_IPV4NULL) &&
2657 (limit > MPLS_LABEL_IPV4NULL)) {
2658 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0);
2659 rt0 = NULL;
2660 }
2661
2662 /* Update the global pointers */
2663 local_bh_disable();
2664 write_seqcount_begin(&net->mpls.platform_label_seq);
2665 net->mpls.platform_labels = limit;
2666 rcu_assign_pointer(net->mpls.platform_label, labels);
2667 write_seqcount_end(&net->mpls.platform_label_seq);
2668 local_bh_enable();
2669
2670 mutex_unlock(&net->mpls.platform_mutex);
2671
2672 mpls_rt_free(rt2);
2673 mpls_rt_free(rt0);
2674
2675 if (old) {
2676 synchronize_rcu();
2677 kvfree(old);
2678 }
2679 return 0;
2680
2681 nort2:
2682 mpls_rt_free(rt0);
2683 nort0:
2684 kvfree(labels);
2685 nolabels:
2686 return -ENOMEM;
2687 }
2688
mpls_platform_labels(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2689 static int mpls_platform_labels(const struct ctl_table *table, int write,
2690 void *buffer, size_t *lenp, loff_t *ppos)
2691 {
2692 struct net *net = table->data;
2693 int platform_labels = net->mpls.platform_labels;
2694 int ret;
2695 struct ctl_table tmp = {
2696 .procname = table->procname,
2697 .data = &platform_labels,
2698 .maxlen = sizeof(int),
2699 .mode = table->mode,
2700 .extra1 = SYSCTL_ZERO,
2701 .extra2 = &label_limit,
2702 };
2703
2704 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2705
2706 if (write && ret == 0)
2707 ret = resize_platform_label_table(net, platform_labels);
2708
2709 return ret;
2710 }
2711
2712 #define MPLS_NS_SYSCTL_OFFSET(field) \
2713 (&((struct net *)0)->field)
2714
2715 static const struct ctl_table mpls_table[] = {
2716 {
2717 .procname = "platform_labels",
2718 .data = NULL,
2719 .maxlen = sizeof(int),
2720 .mode = 0644,
2721 .proc_handler = mpls_platform_labels,
2722 },
2723 {
2724 .procname = "ip_ttl_propagate",
2725 .data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate),
2726 .maxlen = sizeof(int),
2727 .mode = 0644,
2728 .proc_handler = proc_dointvec_minmax,
2729 .extra1 = SYSCTL_ZERO,
2730 .extra2 = SYSCTL_ONE,
2731 },
2732 {
2733 .procname = "default_ttl",
2734 .data = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl),
2735 .maxlen = sizeof(int),
2736 .mode = 0644,
2737 .proc_handler = proc_dointvec_minmax,
2738 .extra1 = SYSCTL_ONE,
2739 .extra2 = &ttl_max,
2740 },
2741 };
2742
mpls_net_init(struct net * net)2743 static __net_init int mpls_net_init(struct net *net)
2744 {
2745 size_t table_size = ARRAY_SIZE(mpls_table);
2746 struct ctl_table *table;
2747 int i;
2748
2749 mutex_init(&net->mpls.platform_mutex);
2750 seqcount_mutex_init(&net->mpls.platform_label_seq, &net->mpls.platform_mutex);
2751
2752 net->mpls.platform_labels = 0;
2753 net->mpls.platform_label = NULL;
2754 net->mpls.ip_ttl_propagate = 1;
2755 net->mpls.default_ttl = 255;
2756
2757 table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
2758 if (table == NULL)
2759 return -ENOMEM;
2760
2761 /* Table data contains only offsets relative to the base of
2762 * the mdev at this point, so make them absolute.
2763 */
2764 for (i = 0; i < table_size; i++)
2765 table[i].data = (char *)net + (uintptr_t)table[i].data;
2766
2767 net->mpls.ctl = register_net_sysctl_sz(net, "net/mpls", table,
2768 table_size);
2769 if (net->mpls.ctl == NULL) {
2770 kfree(table);
2771 return -ENOMEM;
2772 }
2773
2774 return 0;
2775 }
2776
mpls_net_exit(struct net * net)2777 static __net_exit void mpls_net_exit(struct net *net)
2778 {
2779 struct mpls_route __rcu **platform_label;
2780 size_t platform_labels;
2781 const struct ctl_table *table;
2782 unsigned int index;
2783
2784 table = net->mpls.ctl->ctl_table_arg;
2785 unregister_net_sysctl_table(net->mpls.ctl);
2786 kfree(table);
2787
2788 /* An rcu grace period has passed since there was a device in
2789 * the network namespace (and thus the last in flight packet)
2790 * left this network namespace. This is because
2791 * unregister_netdevice_many and netdev_run_todo has completed
2792 * for each network device that was in this network namespace.
2793 *
2794 * As such no additional rcu synchronization is necessary when
2795 * freeing the platform_label table.
2796 */
2797 mutex_lock(&net->mpls.platform_mutex);
2798
2799 platform_label = mpls_dereference(net, net->mpls.platform_label);
2800 platform_labels = net->mpls.platform_labels;
2801
2802 for (index = 0; index < platform_labels; index++) {
2803 struct mpls_route *rt;
2804
2805 rt = mpls_dereference(net, platform_label[index]);
2806 mpls_notify_route(net, index, rt, NULL, NULL);
2807 mpls_rt_free(rt);
2808 }
2809
2810 mutex_unlock(&net->mpls.platform_mutex);
2811
2812 kvfree(platform_label);
2813 }
2814
2815 static struct pernet_operations mpls_net_ops = {
2816 .init = mpls_net_init,
2817 .exit = mpls_net_exit,
2818 };
2819
2820 static struct rtnl_af_ops mpls_af_ops __read_mostly = {
2821 .family = AF_MPLS,
2822 .fill_stats_af = mpls_fill_stats_af,
2823 .get_stats_af_size = mpls_get_stats_af_size,
2824 };
2825
2826 static const struct rtnl_msg_handler mpls_rtnl_msg_handlers[] __initdata_or_module = {
2827 {THIS_MODULE, PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL,
2828 RTNL_FLAG_DOIT_UNLOCKED},
2829 {THIS_MODULE, PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL,
2830 RTNL_FLAG_DOIT_UNLOCKED},
2831 {THIS_MODULE, PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes,
2832 RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
2833 {THIS_MODULE, PF_MPLS, RTM_GETNETCONF,
2834 mpls_netconf_get_devconf, mpls_netconf_dump_devconf,
2835 RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
2836 };
2837
mpls_init(void)2838 static int __init mpls_init(void)
2839 {
2840 int err;
2841
2842 BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4);
2843
2844 err = register_pernet_subsys(&mpls_net_ops);
2845 if (err)
2846 goto out;
2847
2848 err = register_netdevice_notifier(&mpls_dev_notifier);
2849 if (err)
2850 goto out_unregister_pernet;
2851
2852 dev_add_pack(&mpls_packet_type);
2853
2854 err = rtnl_af_register(&mpls_af_ops);
2855 if (err)
2856 goto out_unregister_dev_type;
2857
2858 err = rtnl_register_many(mpls_rtnl_msg_handlers);
2859 if (err)
2860 goto out_unregister_rtnl_af;
2861
2862 err = ipgre_tunnel_encap_add_mpls_ops();
2863 if (err) {
2864 pr_err("Can't add mpls over gre tunnel ops\n");
2865 goto out_unregister_rtnl;
2866 }
2867
2868 err = 0;
2869 out:
2870 return err;
2871
2872 out_unregister_rtnl:
2873 rtnl_unregister_many(mpls_rtnl_msg_handlers);
2874 out_unregister_rtnl_af:
2875 rtnl_af_unregister(&mpls_af_ops);
2876 out_unregister_dev_type:
2877 dev_remove_pack(&mpls_packet_type);
2878 unregister_netdevice_notifier(&mpls_dev_notifier);
2879 out_unregister_pernet:
2880 unregister_pernet_subsys(&mpls_net_ops);
2881 goto out;
2882 }
2883 module_init(mpls_init);
2884
mpls_exit(void)2885 static void __exit mpls_exit(void)
2886 {
2887 rtnl_unregister_all(PF_MPLS);
2888 rtnl_af_unregister(&mpls_af_ops);
2889 dev_remove_pack(&mpls_packet_type);
2890 unregister_netdevice_notifier(&mpls_dev_notifier);
2891 unregister_pernet_subsys(&mpls_net_ops);
2892 ipgre_tunnel_encap_del_mpls_ops();
2893 }
2894 module_exit(mpls_exit);
2895
2896 MODULE_DESCRIPTION("MultiProtocol Label Switching");
2897 MODULE_LICENSE("GPL v2");
2898 MODULE_ALIAS_NETPROTO(PF_MPLS);
2899