1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Vxlan vni filter for collect metadata mode
4 *
5 * Authors: Roopa Prabhu <roopa@nvidia.com>
6 *
7 */
8
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/etherdevice.h>
12 #include <linux/rhashtable.h>
13 #include <net/rtnetlink.h>
14 #include <net/net_namespace.h>
15 #include <net/sock.h>
16 #include <net/vxlan.h>
17
18 #include "vxlan_private.h"
19
vxlan_vni_cmp(struct rhashtable_compare_arg * arg,const void * ptr)20 static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg,
21 const void *ptr)
22 {
23 const struct vxlan_vni_node *vnode = ptr;
24 __be32 vni = *(__be32 *)arg->key;
25
26 return vnode->vni != vni;
27 }
28
29 const struct rhashtable_params vxlan_vni_rht_params = {
30 .head_offset = offsetof(struct vxlan_vni_node, vnode),
31 .key_offset = offsetof(struct vxlan_vni_node, vni),
32 .key_len = sizeof(__be32),
33 .nelem_hint = 3,
34 .max_size = VXLAN_N_VID,
35 .obj_cmpfn = vxlan_vni_cmp,
36 .automatic_shrinking = true,
37 };
38
vxlan_vs_add_del_vninode(struct vxlan_dev * vxlan,struct vxlan_vni_node * v,bool del)39 static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan,
40 struct vxlan_vni_node *v,
41 bool del)
42 {
43 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
44 struct vxlan_dev_node *node;
45 struct vxlan_sock *vs;
46
47 spin_lock(&vn->sock_lock);
48 if (del) {
49 if (!hlist_unhashed(&v->hlist4.hlist))
50 hlist_del_init_rcu(&v->hlist4.hlist);
51 #if IS_ENABLED(CONFIG_IPV6)
52 if (!hlist_unhashed(&v->hlist6.hlist))
53 hlist_del_init_rcu(&v->hlist6.hlist);
54 #endif
55 goto out;
56 }
57
58 #if IS_ENABLED(CONFIG_IPV6)
59 vs = rtnl_dereference(vxlan->vn6_sock);
60 if (vs && v) {
61 node = &v->hlist6;
62 hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
63 }
64 #endif
65 vs = rtnl_dereference(vxlan->vn4_sock);
66 if (vs && v) {
67 node = &v->hlist4;
68 hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
69 }
70 out:
71 spin_unlock(&vn->sock_lock);
72 }
73
vxlan_vs_add_vnigrp(struct vxlan_dev * vxlan,struct vxlan_sock * vs,bool ipv6)74 void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan,
75 struct vxlan_sock *vs,
76 bool ipv6)
77 {
78 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
79 struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
80 struct vxlan_vni_node *v, *tmp;
81 struct vxlan_dev_node *node;
82
83 if (!vg)
84 return;
85
86 spin_lock(&vn->sock_lock);
87 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
88 #if IS_ENABLED(CONFIG_IPV6)
89 if (ipv6)
90 node = &v->hlist6;
91 else
92 #endif
93 node = &v->hlist4;
94 node->vxlan = vxlan;
95 hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
96 }
97 spin_unlock(&vn->sock_lock);
98 }
99
vxlan_vs_del_vnigrp(struct vxlan_dev * vxlan)100 void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan)
101 {
102 struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
103 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
104 struct vxlan_vni_node *v, *tmp;
105
106 if (!vg)
107 return;
108
109 spin_lock(&vn->sock_lock);
110 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
111 hlist_del_init_rcu(&v->hlist4.hlist);
112 #if IS_ENABLED(CONFIG_IPV6)
113 hlist_del_init_rcu(&v->hlist6.hlist);
114 #endif
115 }
116 spin_unlock(&vn->sock_lock);
117 }
118
vxlan_vnifilter_stats_get(const struct vxlan_vni_node * vninode,struct vxlan_vni_stats * dest)119 static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode,
120 struct vxlan_vni_stats *dest)
121 {
122 int i;
123
124 memset(dest, 0, sizeof(*dest));
125 for_each_possible_cpu(i) {
126 struct vxlan_vni_stats_pcpu *pstats;
127 struct vxlan_vni_stats temp;
128 unsigned int start;
129
130 pstats = per_cpu_ptr(vninode->stats, i);
131 do {
132 start = u64_stats_fetch_begin(&pstats->syncp);
133 memcpy(&temp, &pstats->stats, sizeof(temp));
134 } while (u64_stats_fetch_retry(&pstats->syncp, start));
135
136 dest->rx_packets += temp.rx_packets;
137 dest->rx_bytes += temp.rx_bytes;
138 dest->rx_drops += temp.rx_drops;
139 dest->rx_errors += temp.rx_errors;
140 dest->tx_packets += temp.tx_packets;
141 dest->tx_bytes += temp.tx_bytes;
142 dest->tx_drops += temp.tx_drops;
143 dest->tx_errors += temp.tx_errors;
144 }
145 }
146
vxlan_vnifilter_stats_add(struct vxlan_vni_node * vninode,int type,unsigned int len)147 static void vxlan_vnifilter_stats_add(struct vxlan_vni_node *vninode,
148 int type, unsigned int len)
149 {
150 struct vxlan_vni_stats_pcpu *pstats = this_cpu_ptr(vninode->stats);
151
152 u64_stats_update_begin(&pstats->syncp);
153 switch (type) {
154 case VXLAN_VNI_STATS_RX:
155 pstats->stats.rx_bytes += len;
156 pstats->stats.rx_packets++;
157 break;
158 case VXLAN_VNI_STATS_RX_DROPS:
159 pstats->stats.rx_drops++;
160 break;
161 case VXLAN_VNI_STATS_RX_ERRORS:
162 pstats->stats.rx_errors++;
163 break;
164 case VXLAN_VNI_STATS_TX:
165 pstats->stats.tx_bytes += len;
166 pstats->stats.tx_packets++;
167 break;
168 case VXLAN_VNI_STATS_TX_DROPS:
169 pstats->stats.tx_drops++;
170 break;
171 case VXLAN_VNI_STATS_TX_ERRORS:
172 pstats->stats.tx_errors++;
173 break;
174 }
175 u64_stats_update_end(&pstats->syncp);
176 }
177
vxlan_vnifilter_count(struct vxlan_dev * vxlan,__be32 vni,struct vxlan_vni_node * vninode,int type,unsigned int len)178 void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni,
179 struct vxlan_vni_node *vninode,
180 int type, unsigned int len)
181 {
182 struct vxlan_vni_node *vnode;
183
184 if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
185 return;
186
187 if (vninode) {
188 vnode = vninode;
189 } else {
190 vnode = vxlan_vnifilter_lookup(vxlan, vni);
191 if (!vnode)
192 return;
193 }
194
195 vxlan_vnifilter_stats_add(vnode, type, len);
196 }
197
vnirange(struct vxlan_vni_node * vbegin,struct vxlan_vni_node * vend)198 static u32 vnirange(struct vxlan_vni_node *vbegin,
199 struct vxlan_vni_node *vend)
200 {
201 return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni));
202 }
203
vxlan_vnifilter_entry_nlmsg_size(void)204 static size_t vxlan_vnifilter_entry_nlmsg_size(void)
205 {
206 return NLMSG_ALIGN(sizeof(struct tunnel_msg))
207 + nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */
208 + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */
209 + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */
210 + nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */
211 }
212
__vnifilter_entry_fill_stats(struct sk_buff * skb,const struct vxlan_vni_node * vbegin)213 static int __vnifilter_entry_fill_stats(struct sk_buff *skb,
214 const struct vxlan_vni_node *vbegin)
215 {
216 struct vxlan_vni_stats vstats;
217 struct nlattr *vstats_attr;
218
219 vstats_attr = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY_STATS);
220 if (!vstats_attr)
221 goto out_stats_err;
222
223 vxlan_vnifilter_stats_get(vbegin, &vstats);
224 if (nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_BYTES,
225 vstats.rx_bytes, VNIFILTER_ENTRY_STATS_PAD) ||
226 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_PKTS,
227 vstats.rx_packets, VNIFILTER_ENTRY_STATS_PAD) ||
228 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_DROPS,
229 vstats.rx_drops, VNIFILTER_ENTRY_STATS_PAD) ||
230 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_ERRORS,
231 vstats.rx_errors, VNIFILTER_ENTRY_STATS_PAD) ||
232 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_BYTES,
233 vstats.tx_bytes, VNIFILTER_ENTRY_STATS_PAD) ||
234 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_PKTS,
235 vstats.tx_packets, VNIFILTER_ENTRY_STATS_PAD) ||
236 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_DROPS,
237 vstats.tx_drops, VNIFILTER_ENTRY_STATS_PAD) ||
238 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_ERRORS,
239 vstats.tx_errors, VNIFILTER_ENTRY_STATS_PAD))
240 goto out_stats_err;
241
242 nla_nest_end(skb, vstats_attr);
243
244 return 0;
245
246 out_stats_err:
247 nla_nest_cancel(skb, vstats_attr);
248 return -EMSGSIZE;
249 }
250
vxlan_fill_vni_filter_entry(struct sk_buff * skb,struct vxlan_vni_node * vbegin,struct vxlan_vni_node * vend,bool fill_stats)251 static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb,
252 struct vxlan_vni_node *vbegin,
253 struct vxlan_vni_node *vend,
254 bool fill_stats)
255 {
256 struct nlattr *ventry;
257 u32 vs = be32_to_cpu(vbegin->vni);
258 u32 ve = 0;
259
260 if (vbegin != vend)
261 ve = be32_to_cpu(vend->vni);
262
263 ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY);
264 if (!ventry)
265 return false;
266
267 if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs))
268 goto out_err;
269
270 if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve))
271 goto out_err;
272
273 if (!vxlan_addr_any(&vbegin->remote_ip)) {
274 if (vbegin->remote_ip.sa.sa_family == AF_INET) {
275 if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP,
276 vbegin->remote_ip.sin.sin_addr.s_addr))
277 goto out_err;
278 #if IS_ENABLED(CONFIG_IPV6)
279 } else {
280 if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6,
281 &vbegin->remote_ip.sin6.sin6_addr))
282 goto out_err;
283 #endif
284 }
285 }
286
287 if (fill_stats && __vnifilter_entry_fill_stats(skb, vbegin))
288 goto out_err;
289
290 nla_nest_end(skb, ventry);
291
292 return true;
293
294 out_err:
295 nla_nest_cancel(skb, ventry);
296
297 return false;
298 }
299
vxlan_vnifilter_notify(const struct vxlan_dev * vxlan,struct vxlan_vni_node * vninode,int cmd)300 static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan,
301 struct vxlan_vni_node *vninode, int cmd)
302 {
303 struct tunnel_msg *tmsg;
304 struct sk_buff *skb;
305 struct nlmsghdr *nlh;
306 struct net *net = dev_net(vxlan->dev);
307 int err = -ENOBUFS;
308
309 skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL);
310 if (!skb)
311 goto out_err;
312
313 err = -EMSGSIZE;
314 nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0);
315 if (!nlh)
316 goto out_err;
317 tmsg = nlmsg_data(nlh);
318 memset(tmsg, 0, sizeof(*tmsg));
319 tmsg->family = AF_BRIDGE;
320 tmsg->ifindex = vxlan->dev->ifindex;
321
322 if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode, false))
323 goto out_err;
324
325 nlmsg_end(skb, nlh);
326 rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL);
327
328 return;
329
330 out_err:
331 rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err);
332
333 kfree_skb(skb);
334 }
335
vxlan_vnifilter_dump_dev(const struct net_device * dev,struct sk_buff * skb,struct netlink_callback * cb)336 static int vxlan_vnifilter_dump_dev(const struct net_device *dev,
337 struct sk_buff *skb,
338 struct netlink_callback *cb)
339 {
340 struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL;
341 struct vxlan_dev *vxlan = netdev_priv(dev);
342 struct tunnel_msg *new_tmsg, *tmsg;
343 int idx = 0, s_idx = cb->args[1];
344 struct vxlan_vni_group *vg;
345 struct nlmsghdr *nlh;
346 bool dump_stats;
347 int err = 0;
348
349 if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
350 return -EINVAL;
351
352 /* RCU needed because of the vni locking rules (rcu || rtnl) */
353 vg = rcu_dereference(vxlan->vnigrp);
354 if (!vg || !vg->num_vnis)
355 return 0;
356
357 tmsg = nlmsg_data(cb->nlh);
358 dump_stats = !!(tmsg->flags & TUNNEL_MSG_FLAG_STATS);
359
360 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
361 RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI);
362 if (!nlh)
363 return -EMSGSIZE;
364 new_tmsg = nlmsg_data(nlh);
365 memset(new_tmsg, 0, sizeof(*new_tmsg));
366 new_tmsg->family = PF_BRIDGE;
367 new_tmsg->ifindex = dev->ifindex;
368
369 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
370 if (idx < s_idx) {
371 idx++;
372 continue;
373 }
374 if (!vbegin) {
375 vbegin = v;
376 vend = v;
377 continue;
378 }
379 if (!dump_stats && vnirange(vend, v) == 1 &&
380 vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) {
381 goto update_end;
382 } else {
383 if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend,
384 dump_stats)) {
385 err = -EMSGSIZE;
386 break;
387 }
388 idx += vnirange(vbegin, vend) + 1;
389 vbegin = v;
390 }
391 update_end:
392 vend = v;
393 }
394
395 if (!err && vbegin) {
396 if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats))
397 err = -EMSGSIZE;
398 }
399
400 cb->args[1] = err ? idx : 0;
401
402 nlmsg_end(skb, nlh);
403
404 return err;
405 }
406
vxlan_vnifilter_dump(struct sk_buff * skb,struct netlink_callback * cb)407 static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb)
408 {
409 int idx = 0, err = 0, s_idx = cb->args[0];
410 struct net *net = sock_net(skb->sk);
411 struct tunnel_msg *tmsg;
412 struct net_device *dev;
413
414 if (cb->nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct tunnel_msg))) {
415 NL_SET_ERR_MSG(cb->extack, "Invalid msg length");
416 return -EINVAL;
417 }
418
419 tmsg = nlmsg_data(cb->nlh);
420
421 if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) {
422 NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header");
423 return -EINVAL;
424 }
425
426 rcu_read_lock();
427 if (tmsg->ifindex) {
428 dev = dev_get_by_index_rcu(net, tmsg->ifindex);
429 if (!dev) {
430 err = -ENODEV;
431 goto out_err;
432 }
433 if (!netif_is_vxlan(dev)) {
434 NL_SET_ERR_MSG(cb->extack,
435 "The device is not a vxlan device");
436 err = -EINVAL;
437 goto out_err;
438 }
439 err = vxlan_vnifilter_dump_dev(dev, skb, cb);
440 /* if the dump completed without an error we return 0 here */
441 if (err != -EMSGSIZE)
442 goto out_err;
443 } else {
444 for_each_netdev_rcu(net, dev) {
445 if (!netif_is_vxlan(dev))
446 continue;
447 if (idx < s_idx)
448 goto skip;
449 err = vxlan_vnifilter_dump_dev(dev, skb, cb);
450 if (err == -EMSGSIZE)
451 break;
452 skip:
453 idx++;
454 }
455 }
456 cb->args[0] = idx;
457 rcu_read_unlock();
458
459 return skb->len;
460
461 out_err:
462 rcu_read_unlock();
463
464 return err;
465 }
466
467 static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = {
468 [VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 },
469 [VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 },
470 [VXLAN_VNIFILTER_ENTRY_GROUP] = { .type = NLA_BINARY,
471 .len = sizeof_field(struct iphdr, daddr) },
472 [VXLAN_VNIFILTER_ENTRY_GROUP6] = { .type = NLA_BINARY,
473 .len = sizeof(struct in6_addr) },
474 };
475
476 static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = {
477 [VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED },
478 };
479
vxlan_update_default_fdb_entry(struct vxlan_dev * vxlan,__be32 vni,union vxlan_addr * old_remote_ip,union vxlan_addr * remote_ip,struct netlink_ext_ack * extack)480 static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni,
481 union vxlan_addr *old_remote_ip,
482 union vxlan_addr *remote_ip,
483 struct netlink_ext_ack *extack)
484 {
485 struct vxlan_rdst *dst = &vxlan->default_dst;
486 u32 hash_index;
487 int err = 0;
488
489 hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
490 spin_lock_bh(&vxlan->hash_lock[hash_index]);
491 if (remote_ip && !vxlan_addr_any(remote_ip)) {
492 err = vxlan_fdb_update(vxlan, all_zeros_mac,
493 remote_ip,
494 NUD_REACHABLE | NUD_PERMANENT,
495 NLM_F_APPEND | NLM_F_CREATE,
496 vxlan->cfg.dst_port,
497 vni,
498 vni,
499 dst->remote_ifindex,
500 NTF_SELF, 0, true, extack);
501 if (err) {
502 spin_unlock_bh(&vxlan->hash_lock[hash_index]);
503 return err;
504 }
505 }
506
507 if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) {
508 __vxlan_fdb_delete(vxlan, all_zeros_mac,
509 *old_remote_ip,
510 vxlan->cfg.dst_port,
511 vni, vni,
512 dst->remote_ifindex,
513 true);
514 }
515 spin_unlock_bh(&vxlan->hash_lock[hash_index]);
516
517 return err;
518 }
519
vxlan_vni_update_group(struct vxlan_dev * vxlan,struct vxlan_vni_node * vninode,union vxlan_addr * group,bool create,bool * changed,struct netlink_ext_ack * extack)520 static int vxlan_vni_update_group(struct vxlan_dev *vxlan,
521 struct vxlan_vni_node *vninode,
522 union vxlan_addr *group,
523 bool create, bool *changed,
524 struct netlink_ext_ack *extack)
525 {
526 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
527 struct vxlan_rdst *dst = &vxlan->default_dst;
528 union vxlan_addr *newrip = NULL, *oldrip = NULL;
529 union vxlan_addr old_remote_ip;
530 int ret = 0;
531
532 memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip));
533
534 /* if per vni remote ip is not present use vxlan dev
535 * default dst remote ip for fdb entry
536 */
537 if (group && !vxlan_addr_any(group)) {
538 newrip = group;
539 } else {
540 if (!vxlan_addr_any(&dst->remote_ip))
541 newrip = &dst->remote_ip;
542 }
543
544 /* if old rip exists, and no newrip,
545 * explicitly delete old rip
546 */
547 if (!newrip && !vxlan_addr_any(&old_remote_ip))
548 oldrip = &old_remote_ip;
549
550 if (!newrip && !oldrip)
551 return 0;
552
553 if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip))
554 return 0;
555
556 ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni,
557 oldrip, newrip,
558 extack);
559 if (ret)
560 goto out;
561
562 if (group)
563 memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip));
564
565 if (vxlan->dev->flags & IFF_UP) {
566 if (vxlan_addr_multicast(&old_remote_ip) &&
567 !vxlan_group_used(vn, vxlan, vninode->vni,
568 &old_remote_ip,
569 vxlan->default_dst.remote_ifindex)) {
570 ret = vxlan_igmp_leave(vxlan, &old_remote_ip,
571 0);
572 if (ret)
573 goto out;
574 }
575
576 if (vxlan_addr_multicast(&vninode->remote_ip)) {
577 ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0);
578 if (ret == -EADDRINUSE)
579 ret = 0;
580 if (ret)
581 goto out;
582 }
583 }
584
585 *changed = true;
586
587 return 0;
588 out:
589 return ret;
590 }
591
vxlan_vnilist_update_group(struct vxlan_dev * vxlan,union vxlan_addr * old_remote_ip,union vxlan_addr * new_remote_ip,struct netlink_ext_ack * extack)592 int vxlan_vnilist_update_group(struct vxlan_dev *vxlan,
593 union vxlan_addr *old_remote_ip,
594 union vxlan_addr *new_remote_ip,
595 struct netlink_ext_ack *extack)
596 {
597 struct list_head *headp, *hpos;
598 struct vxlan_vni_group *vg;
599 struct vxlan_vni_node *vent;
600 int ret;
601
602 vg = rtnl_dereference(vxlan->vnigrp);
603
604 headp = &vg->vni_list;
605 list_for_each_prev(hpos, headp) {
606 vent = list_entry(hpos, struct vxlan_vni_node, vlist);
607 if (vxlan_addr_any(&vent->remote_ip)) {
608 ret = vxlan_update_default_fdb_entry(vxlan, vent->vni,
609 old_remote_ip,
610 new_remote_ip,
611 extack);
612 if (ret)
613 return ret;
614 }
615 }
616
617 return 0;
618 }
619
vxlan_vni_delete_group(struct vxlan_dev * vxlan,struct vxlan_vni_node * vninode)620 static void vxlan_vni_delete_group(struct vxlan_dev *vxlan,
621 struct vxlan_vni_node *vninode)
622 {
623 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
624 struct vxlan_rdst *dst = &vxlan->default_dst;
625
626 /* if per vni remote_ip not present, delete the
627 * default dst remote_ip previously added for this vni
628 */
629 if (!vxlan_addr_any(&vninode->remote_ip) ||
630 !vxlan_addr_any(&dst->remote_ip)) {
631 u32 hash_index = fdb_head_index(vxlan, all_zeros_mac,
632 vninode->vni);
633
634 spin_lock_bh(&vxlan->hash_lock[hash_index]);
635 __vxlan_fdb_delete(vxlan, all_zeros_mac,
636 (vxlan_addr_any(&vninode->remote_ip) ?
637 dst->remote_ip : vninode->remote_ip),
638 vxlan->cfg.dst_port,
639 vninode->vni, vninode->vni,
640 dst->remote_ifindex,
641 true);
642 spin_unlock_bh(&vxlan->hash_lock[hash_index]);
643 }
644
645 if (vxlan->dev->flags & IFF_UP) {
646 if (vxlan_addr_multicast(&vninode->remote_ip) &&
647 !vxlan_group_used(vn, vxlan, vninode->vni,
648 &vninode->remote_ip,
649 dst->remote_ifindex)) {
650 vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0);
651 }
652 }
653 }
654
vxlan_vni_update(struct vxlan_dev * vxlan,struct vxlan_vni_group * vg,__be32 vni,union vxlan_addr * group,bool * changed,struct netlink_ext_ack * extack)655 static int vxlan_vni_update(struct vxlan_dev *vxlan,
656 struct vxlan_vni_group *vg,
657 __be32 vni, union vxlan_addr *group,
658 bool *changed,
659 struct netlink_ext_ack *extack)
660 {
661 struct vxlan_vni_node *vninode;
662 int ret;
663
664 vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni,
665 vxlan_vni_rht_params);
666 if (!vninode)
667 return 0;
668
669 ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed,
670 extack);
671 if (ret)
672 return ret;
673
674 if (changed)
675 vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
676
677 return 0;
678 }
679
__vxlan_vni_add_list(struct vxlan_vni_group * vg,struct vxlan_vni_node * v)680 static void __vxlan_vni_add_list(struct vxlan_vni_group *vg,
681 struct vxlan_vni_node *v)
682 {
683 struct list_head *headp, *hpos;
684 struct vxlan_vni_node *vent;
685
686 headp = &vg->vni_list;
687 list_for_each_prev(hpos, headp) {
688 vent = list_entry(hpos, struct vxlan_vni_node, vlist);
689 if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni))
690 continue;
691 else
692 break;
693 }
694 list_add_rcu(&v->vlist, hpos);
695 vg->num_vnis++;
696 }
697
__vxlan_vni_del_list(struct vxlan_vni_group * vg,struct vxlan_vni_node * v)698 static void __vxlan_vni_del_list(struct vxlan_vni_group *vg,
699 struct vxlan_vni_node *v)
700 {
701 list_del_rcu(&v->vlist);
702 vg->num_vnis--;
703 }
704
vxlan_vni_alloc(struct vxlan_dev * vxlan,__be32 vni)705 static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan,
706 __be32 vni)
707 {
708 struct vxlan_vni_node *vninode;
709
710 vninode = kzalloc(sizeof(*vninode), GFP_KERNEL);
711 if (!vninode)
712 return NULL;
713 vninode->stats = netdev_alloc_pcpu_stats(struct vxlan_vni_stats_pcpu);
714 if (!vninode->stats) {
715 kfree(vninode);
716 return NULL;
717 }
718 vninode->vni = vni;
719 vninode->hlist4.vxlan = vxlan;
720 #if IS_ENABLED(CONFIG_IPV6)
721 vninode->hlist6.vxlan = vxlan;
722 #endif
723
724 return vninode;
725 }
726
vxlan_vni_free(struct vxlan_vni_node * vninode)727 static void vxlan_vni_free(struct vxlan_vni_node *vninode)
728 {
729 free_percpu(vninode->stats);
730 kfree(vninode);
731 }
732
vxlan_vni_add(struct vxlan_dev * vxlan,struct vxlan_vni_group * vg,u32 vni,union vxlan_addr * group,struct netlink_ext_ack * extack)733 static int vxlan_vni_add(struct vxlan_dev *vxlan,
734 struct vxlan_vni_group *vg,
735 u32 vni, union vxlan_addr *group,
736 struct netlink_ext_ack *extack)
737 {
738 struct vxlan_vni_node *vninode;
739 __be32 v = cpu_to_be32(vni);
740 bool changed = false;
741 int err = 0;
742
743 if (vxlan_vnifilter_lookup(vxlan, v))
744 return vxlan_vni_update(vxlan, vg, v, group, &changed, extack);
745
746 err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v);
747 if (err) {
748 NL_SET_ERR_MSG(extack, "VNI in use");
749 return err;
750 }
751
752 vninode = vxlan_vni_alloc(vxlan, v);
753 if (!vninode)
754 return -ENOMEM;
755
756 err = rhashtable_lookup_insert_fast(&vg->vni_hash,
757 &vninode->vnode,
758 vxlan_vni_rht_params);
759 if (err) {
760 vxlan_vni_free(vninode);
761 return err;
762 }
763
764 __vxlan_vni_add_list(vg, vninode);
765
766 if (vxlan->dev->flags & IFF_UP)
767 vxlan_vs_add_del_vninode(vxlan, vninode, false);
768
769 err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed,
770 extack);
771
772 if (changed)
773 vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
774
775 return err;
776 }
777
vxlan_vni_node_rcu_free(struct rcu_head * rcu)778 static void vxlan_vni_node_rcu_free(struct rcu_head *rcu)
779 {
780 struct vxlan_vni_node *v;
781
782 v = container_of(rcu, struct vxlan_vni_node, rcu);
783 vxlan_vni_free(v);
784 }
785
vxlan_vni_del(struct vxlan_dev * vxlan,struct vxlan_vni_group * vg,u32 vni,struct netlink_ext_ack * extack)786 static int vxlan_vni_del(struct vxlan_dev *vxlan,
787 struct vxlan_vni_group *vg,
788 u32 vni, struct netlink_ext_ack *extack)
789 {
790 struct vxlan_vni_node *vninode;
791 __be32 v = cpu_to_be32(vni);
792 int err = 0;
793
794 vg = rtnl_dereference(vxlan->vnigrp);
795
796 vninode = rhashtable_lookup_fast(&vg->vni_hash, &v,
797 vxlan_vni_rht_params);
798 if (!vninode) {
799 err = -ENOENT;
800 goto out;
801 }
802
803 vxlan_vni_delete_group(vxlan, vninode);
804
805 err = rhashtable_remove_fast(&vg->vni_hash,
806 &vninode->vnode,
807 vxlan_vni_rht_params);
808 if (err)
809 goto out;
810
811 __vxlan_vni_del_list(vg, vninode);
812
813 vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL);
814
815 if (vxlan->dev->flags & IFF_UP)
816 vxlan_vs_add_del_vninode(vxlan, vninode, true);
817
818 call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free);
819
820 return 0;
821 out:
822 return err;
823 }
824
vxlan_vni_add_del(struct vxlan_dev * vxlan,__u32 start_vni,__u32 end_vni,union vxlan_addr * group,int cmd,struct netlink_ext_ack * extack)825 static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni,
826 __u32 end_vni, union vxlan_addr *group,
827 int cmd, struct netlink_ext_ack *extack)
828 {
829 struct vxlan_vni_group *vg;
830 int v, err = 0;
831
832 vg = rtnl_dereference(vxlan->vnigrp);
833
834 for (v = start_vni; v <= end_vni; v++) {
835 switch (cmd) {
836 case RTM_NEWTUNNEL:
837 err = vxlan_vni_add(vxlan, vg, v, group, extack);
838 break;
839 case RTM_DELTUNNEL:
840 err = vxlan_vni_del(vxlan, vg, v, extack);
841 break;
842 default:
843 err = -EOPNOTSUPP;
844 break;
845 }
846 if (err)
847 goto out;
848 }
849
850 return 0;
851 out:
852 return err;
853 }
854
vxlan_process_vni_filter(struct vxlan_dev * vxlan,struct nlattr * nlvnifilter,int cmd,struct netlink_ext_ack * extack)855 static int vxlan_process_vni_filter(struct vxlan_dev *vxlan,
856 struct nlattr *nlvnifilter,
857 int cmd, struct netlink_ext_ack *extack)
858 {
859 struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1];
860 u32 vni_start = 0, vni_end = 0;
861 union vxlan_addr group;
862 int err;
863
864 err = nla_parse_nested(vattrs,
865 VXLAN_VNIFILTER_ENTRY_MAX,
866 nlvnifilter, vni_filter_entry_policy,
867 extack);
868 if (err)
869 return err;
870
871 if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) {
872 vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]);
873 vni_end = vni_start;
874 }
875
876 if (vattrs[VXLAN_VNIFILTER_ENTRY_END])
877 vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]);
878
879 if (!vni_start && !vni_end) {
880 NL_SET_ERR_MSG_ATTR(extack, nlvnifilter,
881 "vni start nor end found in vni entry");
882 return -EINVAL;
883 }
884
885 if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) {
886 group.sin.sin_addr.s_addr =
887 nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]);
888 group.sa.sa_family = AF_INET;
889 } else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) {
890 group.sin6.sin6_addr =
891 nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]);
892 group.sa.sa_family = AF_INET6;
893 } else {
894 memset(&group, 0, sizeof(group));
895 }
896
897 if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) {
898 NL_SET_ERR_MSG(extack,
899 "Local interface required for multicast remote group");
900
901 return -EINVAL;
902 }
903
904 err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd,
905 extack);
906 if (err)
907 return err;
908
909 return 0;
910 }
911
vxlan_vnigroup_uninit(struct vxlan_dev * vxlan)912 void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan)
913 {
914 struct vxlan_vni_node *v, *tmp;
915 struct vxlan_vni_group *vg;
916
917 vg = rtnl_dereference(vxlan->vnigrp);
918 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
919 rhashtable_remove_fast(&vg->vni_hash, &v->vnode,
920 vxlan_vni_rht_params);
921 hlist_del_init_rcu(&v->hlist4.hlist);
922 #if IS_ENABLED(CONFIG_IPV6)
923 hlist_del_init_rcu(&v->hlist6.hlist);
924 #endif
925 __vxlan_vni_del_list(vg, v);
926 vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL);
927 call_rcu(&v->rcu, vxlan_vni_node_rcu_free);
928 }
929 rhashtable_destroy(&vg->vni_hash);
930 kfree(vg);
931 }
932
vxlan_vnigroup_init(struct vxlan_dev * vxlan)933 int vxlan_vnigroup_init(struct vxlan_dev *vxlan)
934 {
935 struct vxlan_vni_group *vg;
936 int ret;
937
938 vg = kzalloc(sizeof(*vg), GFP_KERNEL);
939 if (!vg)
940 return -ENOMEM;
941 ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params);
942 if (ret) {
943 kfree(vg);
944 return ret;
945 }
946 INIT_LIST_HEAD(&vg->vni_list);
947 rcu_assign_pointer(vxlan->vnigrp, vg);
948
949 return 0;
950 }
951
vxlan_vnifilter_process(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)952 static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh,
953 struct netlink_ext_ack *extack)
954 {
955 struct net *net = sock_net(skb->sk);
956 struct tunnel_msg *tmsg;
957 struct vxlan_dev *vxlan;
958 struct net_device *dev;
959 struct nlattr *attr;
960 int err, vnis = 0;
961 int rem;
962
963 /* this should validate the header and check for remaining bytes */
964 err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX,
965 vni_filter_policy, extack);
966 if (err < 0)
967 return err;
968
969 tmsg = nlmsg_data(nlh);
970 dev = __dev_get_by_index(net, tmsg->ifindex);
971 if (!dev)
972 return -ENODEV;
973
974 if (!netif_is_vxlan(dev)) {
975 NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device");
976 return -EINVAL;
977 }
978
979 vxlan = netdev_priv(dev);
980
981 if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
982 return -EOPNOTSUPP;
983
984 nlmsg_for_each_attr(attr, nlh, sizeof(*tmsg), rem) {
985 switch (nla_type(attr)) {
986 case VXLAN_VNIFILTER_ENTRY:
987 err = vxlan_process_vni_filter(vxlan, attr,
988 nlh->nlmsg_type, extack);
989 break;
990 default:
991 continue;
992 }
993 vnis++;
994 if (err)
995 break;
996 }
997
998 if (!vnis) {
999 NL_SET_ERR_MSG_MOD(extack, "No vnis found to process");
1000 err = -EINVAL;
1001 }
1002
1003 return err;
1004 }
1005
1006 static const struct rtnl_msg_handler vxlan_vnifilter_rtnl_msg_handlers[] = {
1007 {THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL, vxlan_vnifilter_dump, 0},
1008 {THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL, vxlan_vnifilter_process, NULL, 0},
1009 {THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL, vxlan_vnifilter_process, NULL, 0},
1010 };
1011
vxlan_vnifilter_init(void)1012 int vxlan_vnifilter_init(void)
1013 {
1014 return rtnl_register_many(vxlan_vnifilter_rtnl_msg_handlers);
1015 }
1016
vxlan_vnifilter_uninit(void)1017 void vxlan_vnifilter_uninit(void)
1018 {
1019 rtnl_unregister_many(vxlan_vnifilter_rtnl_msg_handlers);
1020 }
1021