1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *	Vxlan vni filter for collect metadata mode
4  *
5  *	Authors: Roopa Prabhu <roopa@nvidia.com>
6  *
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/etherdevice.h>
12 #include <linux/rhashtable.h>
13 #include <net/rtnetlink.h>
14 #include <net/net_namespace.h>
15 #include <net/sock.h>
16 #include <net/vxlan.h>
17 
18 #include "vxlan_private.h"
19 
vxlan_vni_cmp(struct rhashtable_compare_arg * arg,const void * ptr)20 static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg,
21 				const void *ptr)
22 {
23 	const struct vxlan_vni_node *vnode = ptr;
24 	__be32 vni = *(__be32 *)arg->key;
25 
26 	return vnode->vni != vni;
27 }
28 
29 const struct rhashtable_params vxlan_vni_rht_params = {
30 	.head_offset = offsetof(struct vxlan_vni_node, vnode),
31 	.key_offset = offsetof(struct vxlan_vni_node, vni),
32 	.key_len = sizeof(__be32),
33 	.nelem_hint = 3,
34 	.max_size = VXLAN_N_VID,
35 	.obj_cmpfn = vxlan_vni_cmp,
36 	.automatic_shrinking = true,
37 };
38 
vxlan_vs_add_del_vninode(struct vxlan_dev * vxlan,struct vxlan_vni_node * v,bool del)39 static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan,
40 				     struct vxlan_vni_node *v,
41 				     bool del)
42 {
43 	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
44 	struct vxlan_dev_node *node;
45 	struct vxlan_sock *vs;
46 
47 	spin_lock(&vn->sock_lock);
48 	if (del) {
49 		if (!hlist_unhashed(&v->hlist4.hlist))
50 			hlist_del_init_rcu(&v->hlist4.hlist);
51 #if IS_ENABLED(CONFIG_IPV6)
52 		if (!hlist_unhashed(&v->hlist6.hlist))
53 			hlist_del_init_rcu(&v->hlist6.hlist);
54 #endif
55 		goto out;
56 	}
57 
58 #if IS_ENABLED(CONFIG_IPV6)
59 	vs = rtnl_dereference(vxlan->vn6_sock);
60 	if (vs && v) {
61 		node = &v->hlist6;
62 		hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
63 	}
64 #endif
65 	vs = rtnl_dereference(vxlan->vn4_sock);
66 	if (vs && v) {
67 		node = &v->hlist4;
68 		hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
69 	}
70 out:
71 	spin_unlock(&vn->sock_lock);
72 }
73 
vxlan_vs_add_vnigrp(struct vxlan_dev * vxlan,struct vxlan_sock * vs,bool ipv6)74 void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan,
75 			 struct vxlan_sock *vs,
76 			 bool ipv6)
77 {
78 	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
79 	struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
80 	struct vxlan_vni_node *v, *tmp;
81 	struct vxlan_dev_node *node;
82 
83 	if (!vg)
84 		return;
85 
86 	spin_lock(&vn->sock_lock);
87 	list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
88 #if IS_ENABLED(CONFIG_IPV6)
89 		if (ipv6)
90 			node = &v->hlist6;
91 		else
92 #endif
93 			node = &v->hlist4;
94 		node->vxlan = vxlan;
95 		hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
96 	}
97 	spin_unlock(&vn->sock_lock);
98 }
99 
vxlan_vs_del_vnigrp(struct vxlan_dev * vxlan)100 void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan)
101 {
102 	struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
103 	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
104 	struct vxlan_vni_node *v, *tmp;
105 
106 	if (!vg)
107 		return;
108 
109 	spin_lock(&vn->sock_lock);
110 	list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
111 		hlist_del_init_rcu(&v->hlist4.hlist);
112 #if IS_ENABLED(CONFIG_IPV6)
113 		hlist_del_init_rcu(&v->hlist6.hlist);
114 #endif
115 	}
116 	spin_unlock(&vn->sock_lock);
117 }
118 
vxlan_vnifilter_stats_get(const struct vxlan_vni_node * vninode,struct vxlan_vni_stats * dest)119 static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode,
120 				      struct vxlan_vni_stats *dest)
121 {
122 	int i;
123 
124 	memset(dest, 0, sizeof(*dest));
125 	for_each_possible_cpu(i) {
126 		struct vxlan_vni_stats_pcpu *pstats;
127 		struct vxlan_vni_stats temp;
128 		unsigned int start;
129 
130 		pstats = per_cpu_ptr(vninode->stats, i);
131 		do {
132 			start = u64_stats_fetch_begin(&pstats->syncp);
133 			memcpy(&temp, &pstats->stats, sizeof(temp));
134 		} while (u64_stats_fetch_retry(&pstats->syncp, start));
135 
136 		dest->rx_packets += temp.rx_packets;
137 		dest->rx_bytes += temp.rx_bytes;
138 		dest->rx_drops += temp.rx_drops;
139 		dest->rx_errors += temp.rx_errors;
140 		dest->tx_packets += temp.tx_packets;
141 		dest->tx_bytes += temp.tx_bytes;
142 		dest->tx_drops += temp.tx_drops;
143 		dest->tx_errors += temp.tx_errors;
144 	}
145 }
146 
vxlan_vnifilter_stats_add(struct vxlan_vni_node * vninode,int type,unsigned int len)147 static void vxlan_vnifilter_stats_add(struct vxlan_vni_node *vninode,
148 				      int type, unsigned int len)
149 {
150 	struct vxlan_vni_stats_pcpu *pstats = this_cpu_ptr(vninode->stats);
151 
152 	u64_stats_update_begin(&pstats->syncp);
153 	switch (type) {
154 	case VXLAN_VNI_STATS_RX:
155 		pstats->stats.rx_bytes += len;
156 		pstats->stats.rx_packets++;
157 		break;
158 	case VXLAN_VNI_STATS_RX_DROPS:
159 		pstats->stats.rx_drops++;
160 		break;
161 	case VXLAN_VNI_STATS_RX_ERRORS:
162 		pstats->stats.rx_errors++;
163 		break;
164 	case VXLAN_VNI_STATS_TX:
165 		pstats->stats.tx_bytes += len;
166 		pstats->stats.tx_packets++;
167 		break;
168 	case VXLAN_VNI_STATS_TX_DROPS:
169 		pstats->stats.tx_drops++;
170 		break;
171 	case VXLAN_VNI_STATS_TX_ERRORS:
172 		pstats->stats.tx_errors++;
173 		break;
174 	}
175 	u64_stats_update_end(&pstats->syncp);
176 }
177 
vxlan_vnifilter_count(struct vxlan_dev * vxlan,__be32 vni,struct vxlan_vni_node * vninode,int type,unsigned int len)178 void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni,
179 			   struct vxlan_vni_node *vninode,
180 			   int type, unsigned int len)
181 {
182 	struct vxlan_vni_node *vnode;
183 
184 	if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
185 		return;
186 
187 	if (vninode) {
188 		vnode = vninode;
189 	} else {
190 		vnode = vxlan_vnifilter_lookup(vxlan, vni);
191 		if (!vnode)
192 			return;
193 	}
194 
195 	vxlan_vnifilter_stats_add(vnode, type, len);
196 }
197 
vnirange(struct vxlan_vni_node * vbegin,struct vxlan_vni_node * vend)198 static u32 vnirange(struct vxlan_vni_node *vbegin,
199 		    struct vxlan_vni_node *vend)
200 {
201 	return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni));
202 }
203 
vxlan_vnifilter_entry_nlmsg_size(void)204 static size_t vxlan_vnifilter_entry_nlmsg_size(void)
205 {
206 	return NLMSG_ALIGN(sizeof(struct tunnel_msg))
207 		+ nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */
208 		+ nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */
209 		+ nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */
210 		+ nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */
211 }
212 
__vnifilter_entry_fill_stats(struct sk_buff * skb,const struct vxlan_vni_node * vbegin)213 static int __vnifilter_entry_fill_stats(struct sk_buff *skb,
214 					const struct vxlan_vni_node *vbegin)
215 {
216 	struct vxlan_vni_stats vstats;
217 	struct nlattr *vstats_attr;
218 
219 	vstats_attr = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY_STATS);
220 	if (!vstats_attr)
221 		goto out_stats_err;
222 
223 	vxlan_vnifilter_stats_get(vbegin, &vstats);
224 	if (nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_BYTES,
225 			      vstats.rx_bytes, VNIFILTER_ENTRY_STATS_PAD) ||
226 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_PKTS,
227 			      vstats.rx_packets, VNIFILTER_ENTRY_STATS_PAD) ||
228 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_DROPS,
229 			      vstats.rx_drops, VNIFILTER_ENTRY_STATS_PAD) ||
230 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_ERRORS,
231 			      vstats.rx_errors, VNIFILTER_ENTRY_STATS_PAD) ||
232 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_BYTES,
233 			      vstats.tx_bytes, VNIFILTER_ENTRY_STATS_PAD) ||
234 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_PKTS,
235 			      vstats.tx_packets, VNIFILTER_ENTRY_STATS_PAD) ||
236 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_DROPS,
237 			      vstats.tx_drops, VNIFILTER_ENTRY_STATS_PAD) ||
238 	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_ERRORS,
239 			      vstats.tx_errors, VNIFILTER_ENTRY_STATS_PAD))
240 		goto out_stats_err;
241 
242 	nla_nest_end(skb, vstats_attr);
243 
244 	return 0;
245 
246 out_stats_err:
247 	nla_nest_cancel(skb, vstats_attr);
248 	return -EMSGSIZE;
249 }
250 
vxlan_fill_vni_filter_entry(struct sk_buff * skb,struct vxlan_vni_node * vbegin,struct vxlan_vni_node * vend,bool fill_stats)251 static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb,
252 					struct vxlan_vni_node *vbegin,
253 					struct vxlan_vni_node *vend,
254 					bool fill_stats)
255 {
256 	struct nlattr *ventry;
257 	u32 vs = be32_to_cpu(vbegin->vni);
258 	u32 ve = 0;
259 
260 	if (vbegin != vend)
261 		ve = be32_to_cpu(vend->vni);
262 
263 	ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY);
264 	if (!ventry)
265 		return false;
266 
267 	if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs))
268 		goto out_err;
269 
270 	if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve))
271 		goto out_err;
272 
273 	if (!vxlan_addr_any(&vbegin->remote_ip)) {
274 		if (vbegin->remote_ip.sa.sa_family == AF_INET) {
275 			if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP,
276 					    vbegin->remote_ip.sin.sin_addr.s_addr))
277 				goto out_err;
278 #if IS_ENABLED(CONFIG_IPV6)
279 		} else {
280 			if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6,
281 					     &vbegin->remote_ip.sin6.sin6_addr))
282 				goto out_err;
283 #endif
284 		}
285 	}
286 
287 	if (fill_stats && __vnifilter_entry_fill_stats(skb, vbegin))
288 		goto out_err;
289 
290 	nla_nest_end(skb, ventry);
291 
292 	return true;
293 
294 out_err:
295 	nla_nest_cancel(skb, ventry);
296 
297 	return false;
298 }
299 
vxlan_vnifilter_notify(const struct vxlan_dev * vxlan,struct vxlan_vni_node * vninode,int cmd)300 static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan,
301 				   struct vxlan_vni_node *vninode, int cmd)
302 {
303 	struct tunnel_msg *tmsg;
304 	struct sk_buff *skb;
305 	struct nlmsghdr *nlh;
306 	struct net *net = dev_net(vxlan->dev);
307 	int err = -ENOBUFS;
308 
309 	skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL);
310 	if (!skb)
311 		goto out_err;
312 
313 	err = -EMSGSIZE;
314 	nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0);
315 	if (!nlh)
316 		goto out_err;
317 	tmsg = nlmsg_data(nlh);
318 	memset(tmsg, 0, sizeof(*tmsg));
319 	tmsg->family = AF_BRIDGE;
320 	tmsg->ifindex = vxlan->dev->ifindex;
321 
322 	if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode, false))
323 		goto out_err;
324 
325 	nlmsg_end(skb, nlh);
326 	rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL);
327 
328 	return;
329 
330 out_err:
331 	rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err);
332 
333 	kfree_skb(skb);
334 }
335 
vxlan_vnifilter_dump_dev(const struct net_device * dev,struct sk_buff * skb,struct netlink_callback * cb)336 static int vxlan_vnifilter_dump_dev(const struct net_device *dev,
337 				    struct sk_buff *skb,
338 				    struct netlink_callback *cb)
339 {
340 	struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL;
341 	struct vxlan_dev *vxlan = netdev_priv(dev);
342 	struct tunnel_msg *new_tmsg, *tmsg;
343 	int idx = 0, s_idx = cb->args[1];
344 	struct vxlan_vni_group *vg;
345 	struct nlmsghdr *nlh;
346 	bool dump_stats;
347 	int err = 0;
348 
349 	if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
350 		return -EINVAL;
351 
352 	/* RCU needed because of the vni locking rules (rcu || rtnl) */
353 	vg = rcu_dereference(vxlan->vnigrp);
354 	if (!vg || !vg->num_vnis)
355 		return 0;
356 
357 	tmsg = nlmsg_data(cb->nlh);
358 	dump_stats = !!(tmsg->flags & TUNNEL_MSG_FLAG_STATS);
359 
360 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
361 			RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI);
362 	if (!nlh)
363 		return -EMSGSIZE;
364 	new_tmsg = nlmsg_data(nlh);
365 	memset(new_tmsg, 0, sizeof(*new_tmsg));
366 	new_tmsg->family = PF_BRIDGE;
367 	new_tmsg->ifindex = dev->ifindex;
368 
369 	list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
370 		if (idx < s_idx) {
371 			idx++;
372 			continue;
373 		}
374 		if (!vbegin) {
375 			vbegin = v;
376 			vend = v;
377 			continue;
378 		}
379 		if (!dump_stats && vnirange(vend, v) == 1 &&
380 		    vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) {
381 			goto update_end;
382 		} else {
383 			if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend,
384 							 dump_stats)) {
385 				err = -EMSGSIZE;
386 				break;
387 			}
388 			idx += vnirange(vbegin, vend) + 1;
389 			vbegin = v;
390 		}
391 update_end:
392 		vend = v;
393 	}
394 
395 	if (!err && vbegin) {
396 		if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats))
397 			err = -EMSGSIZE;
398 	}
399 
400 	cb->args[1] = err ? idx : 0;
401 
402 	nlmsg_end(skb, nlh);
403 
404 	return err;
405 }
406 
vxlan_vnifilter_dump(struct sk_buff * skb,struct netlink_callback * cb)407 static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb)
408 {
409 	int idx = 0, err = 0, s_idx = cb->args[0];
410 	struct net *net = sock_net(skb->sk);
411 	struct tunnel_msg *tmsg;
412 	struct net_device *dev;
413 
414 	if (cb->nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct tunnel_msg))) {
415 		NL_SET_ERR_MSG(cb->extack, "Invalid msg length");
416 		return -EINVAL;
417 	}
418 
419 	tmsg = nlmsg_data(cb->nlh);
420 
421 	if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) {
422 		NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header");
423 		return -EINVAL;
424 	}
425 
426 	rcu_read_lock();
427 	if (tmsg->ifindex) {
428 		dev = dev_get_by_index_rcu(net, tmsg->ifindex);
429 		if (!dev) {
430 			err = -ENODEV;
431 			goto out_err;
432 		}
433 		if (!netif_is_vxlan(dev)) {
434 			NL_SET_ERR_MSG(cb->extack,
435 				       "The device is not a vxlan device");
436 			err = -EINVAL;
437 			goto out_err;
438 		}
439 		err = vxlan_vnifilter_dump_dev(dev, skb, cb);
440 		/* if the dump completed without an error we return 0 here */
441 		if (err != -EMSGSIZE)
442 			goto out_err;
443 	} else {
444 		for_each_netdev_rcu(net, dev) {
445 			if (!netif_is_vxlan(dev))
446 				continue;
447 			if (idx < s_idx)
448 				goto skip;
449 			err = vxlan_vnifilter_dump_dev(dev, skb, cb);
450 			if (err == -EMSGSIZE)
451 				break;
452 skip:
453 			idx++;
454 		}
455 	}
456 	cb->args[0] = idx;
457 	rcu_read_unlock();
458 
459 	return skb->len;
460 
461 out_err:
462 	rcu_read_unlock();
463 
464 	return err;
465 }
466 
467 static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = {
468 	[VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 },
469 	[VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 },
470 	[VXLAN_VNIFILTER_ENTRY_GROUP]	= { .type = NLA_BINARY,
471 					    .len = sizeof_field(struct iphdr, daddr) },
472 	[VXLAN_VNIFILTER_ENTRY_GROUP6]	= { .type = NLA_BINARY,
473 					    .len = sizeof(struct in6_addr) },
474 };
475 
476 static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = {
477 	[VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED },
478 };
479 
vxlan_update_default_fdb_entry(struct vxlan_dev * vxlan,__be32 vni,union vxlan_addr * old_remote_ip,union vxlan_addr * remote_ip,struct netlink_ext_ack * extack)480 static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni,
481 					  union vxlan_addr *old_remote_ip,
482 					  union vxlan_addr *remote_ip,
483 					  struct netlink_ext_ack *extack)
484 {
485 	struct vxlan_rdst *dst = &vxlan->default_dst;
486 	u32 hash_index;
487 	int err = 0;
488 
489 	hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
490 	spin_lock_bh(&vxlan->hash_lock[hash_index]);
491 	if (remote_ip && !vxlan_addr_any(remote_ip)) {
492 		err = vxlan_fdb_update(vxlan, all_zeros_mac,
493 				       remote_ip,
494 				       NUD_REACHABLE | NUD_PERMANENT,
495 				       NLM_F_APPEND | NLM_F_CREATE,
496 				       vxlan->cfg.dst_port,
497 				       vni,
498 				       vni,
499 				       dst->remote_ifindex,
500 				       NTF_SELF, 0, true, extack);
501 		if (err) {
502 			spin_unlock_bh(&vxlan->hash_lock[hash_index]);
503 			return err;
504 		}
505 	}
506 
507 	if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) {
508 		__vxlan_fdb_delete(vxlan, all_zeros_mac,
509 				   *old_remote_ip,
510 				   vxlan->cfg.dst_port,
511 				   vni, vni,
512 				   dst->remote_ifindex,
513 				   true);
514 	}
515 	spin_unlock_bh(&vxlan->hash_lock[hash_index]);
516 
517 	return err;
518 }
519 
vxlan_vni_update_group(struct vxlan_dev * vxlan,struct vxlan_vni_node * vninode,union vxlan_addr * group,bool create,bool * changed,struct netlink_ext_ack * extack)520 static int vxlan_vni_update_group(struct vxlan_dev *vxlan,
521 				  struct vxlan_vni_node *vninode,
522 				  union vxlan_addr *group,
523 				  bool create, bool *changed,
524 				  struct netlink_ext_ack *extack)
525 {
526 	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
527 	struct vxlan_rdst *dst = &vxlan->default_dst;
528 	union vxlan_addr *newrip = NULL, *oldrip = NULL;
529 	union vxlan_addr old_remote_ip;
530 	int ret = 0;
531 
532 	memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip));
533 
534 	/* if per vni remote ip is not present use vxlan dev
535 	 * default dst remote ip for fdb entry
536 	 */
537 	if (group && !vxlan_addr_any(group)) {
538 		newrip = group;
539 	} else {
540 		if (!vxlan_addr_any(&dst->remote_ip))
541 			newrip = &dst->remote_ip;
542 	}
543 
544 	/* if old rip exists, and no newrip,
545 	 * explicitly delete old rip
546 	 */
547 	if (!newrip && !vxlan_addr_any(&old_remote_ip))
548 		oldrip = &old_remote_ip;
549 
550 	if (!newrip && !oldrip)
551 		return 0;
552 
553 	if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip))
554 		return 0;
555 
556 	ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni,
557 					     oldrip, newrip,
558 					     extack);
559 	if (ret)
560 		goto out;
561 
562 	if (group)
563 		memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip));
564 
565 	if (vxlan->dev->flags & IFF_UP) {
566 		if (vxlan_addr_multicast(&old_remote_ip) &&
567 		    !vxlan_group_used(vn, vxlan, vninode->vni,
568 				      &old_remote_ip,
569 				      vxlan->default_dst.remote_ifindex)) {
570 			ret = vxlan_igmp_leave(vxlan, &old_remote_ip,
571 					       0);
572 			if (ret)
573 				goto out;
574 		}
575 
576 		if (vxlan_addr_multicast(&vninode->remote_ip)) {
577 			ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0);
578 			if (ret == -EADDRINUSE)
579 				ret = 0;
580 			if (ret)
581 				goto out;
582 		}
583 	}
584 
585 	*changed = true;
586 
587 	return 0;
588 out:
589 	return ret;
590 }
591 
vxlan_vnilist_update_group(struct vxlan_dev * vxlan,union vxlan_addr * old_remote_ip,union vxlan_addr * new_remote_ip,struct netlink_ext_ack * extack)592 int vxlan_vnilist_update_group(struct vxlan_dev *vxlan,
593 			       union vxlan_addr *old_remote_ip,
594 			       union vxlan_addr *new_remote_ip,
595 			       struct netlink_ext_ack *extack)
596 {
597 	struct list_head *headp, *hpos;
598 	struct vxlan_vni_group *vg;
599 	struct vxlan_vni_node *vent;
600 	int ret;
601 
602 	vg = rtnl_dereference(vxlan->vnigrp);
603 
604 	headp = &vg->vni_list;
605 	list_for_each_prev(hpos, headp) {
606 		vent = list_entry(hpos, struct vxlan_vni_node, vlist);
607 		if (vxlan_addr_any(&vent->remote_ip)) {
608 			ret = vxlan_update_default_fdb_entry(vxlan, vent->vni,
609 							     old_remote_ip,
610 							     new_remote_ip,
611 							     extack);
612 			if (ret)
613 				return ret;
614 		}
615 	}
616 
617 	return 0;
618 }
619 
vxlan_vni_delete_group(struct vxlan_dev * vxlan,struct vxlan_vni_node * vninode)620 static void vxlan_vni_delete_group(struct vxlan_dev *vxlan,
621 				   struct vxlan_vni_node *vninode)
622 {
623 	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
624 	struct vxlan_rdst *dst = &vxlan->default_dst;
625 
626 	/* if per vni remote_ip not present, delete the
627 	 * default dst remote_ip previously added for this vni
628 	 */
629 	if (!vxlan_addr_any(&vninode->remote_ip) ||
630 	    !vxlan_addr_any(&dst->remote_ip)) {
631 		u32 hash_index = fdb_head_index(vxlan, all_zeros_mac,
632 						vninode->vni);
633 
634 		spin_lock_bh(&vxlan->hash_lock[hash_index]);
635 		__vxlan_fdb_delete(vxlan, all_zeros_mac,
636 				   (vxlan_addr_any(&vninode->remote_ip) ?
637 				   dst->remote_ip : vninode->remote_ip),
638 				   vxlan->cfg.dst_port,
639 				   vninode->vni, vninode->vni,
640 				   dst->remote_ifindex,
641 				   true);
642 		spin_unlock_bh(&vxlan->hash_lock[hash_index]);
643 	}
644 
645 	if (vxlan->dev->flags & IFF_UP) {
646 		if (vxlan_addr_multicast(&vninode->remote_ip) &&
647 		    !vxlan_group_used(vn, vxlan, vninode->vni,
648 				      &vninode->remote_ip,
649 				      dst->remote_ifindex)) {
650 			vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0);
651 		}
652 	}
653 }
654 
vxlan_vni_update(struct vxlan_dev * vxlan,struct vxlan_vni_group * vg,__be32 vni,union vxlan_addr * group,bool * changed,struct netlink_ext_ack * extack)655 static int vxlan_vni_update(struct vxlan_dev *vxlan,
656 			    struct vxlan_vni_group *vg,
657 			    __be32 vni, union vxlan_addr *group,
658 			    bool *changed,
659 			    struct netlink_ext_ack *extack)
660 {
661 	struct vxlan_vni_node *vninode;
662 	int ret;
663 
664 	vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni,
665 					 vxlan_vni_rht_params);
666 	if (!vninode)
667 		return 0;
668 
669 	ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed,
670 				     extack);
671 	if (ret)
672 		return ret;
673 
674 	if (changed)
675 		vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
676 
677 	return 0;
678 }
679 
__vxlan_vni_add_list(struct vxlan_vni_group * vg,struct vxlan_vni_node * v)680 static void __vxlan_vni_add_list(struct vxlan_vni_group *vg,
681 				 struct vxlan_vni_node *v)
682 {
683 	struct list_head *headp, *hpos;
684 	struct vxlan_vni_node *vent;
685 
686 	headp = &vg->vni_list;
687 	list_for_each_prev(hpos, headp) {
688 		vent = list_entry(hpos, struct vxlan_vni_node, vlist);
689 		if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni))
690 			continue;
691 		else
692 			break;
693 	}
694 	list_add_rcu(&v->vlist, hpos);
695 	vg->num_vnis++;
696 }
697 
__vxlan_vni_del_list(struct vxlan_vni_group * vg,struct vxlan_vni_node * v)698 static void __vxlan_vni_del_list(struct vxlan_vni_group *vg,
699 				 struct vxlan_vni_node *v)
700 {
701 	list_del_rcu(&v->vlist);
702 	vg->num_vnis--;
703 }
704 
vxlan_vni_alloc(struct vxlan_dev * vxlan,__be32 vni)705 static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan,
706 					      __be32 vni)
707 {
708 	struct vxlan_vni_node *vninode;
709 
710 	vninode = kzalloc(sizeof(*vninode), GFP_KERNEL);
711 	if (!vninode)
712 		return NULL;
713 	vninode->stats = netdev_alloc_pcpu_stats(struct vxlan_vni_stats_pcpu);
714 	if (!vninode->stats) {
715 		kfree(vninode);
716 		return NULL;
717 	}
718 	vninode->vni = vni;
719 	vninode->hlist4.vxlan = vxlan;
720 #if IS_ENABLED(CONFIG_IPV6)
721 	vninode->hlist6.vxlan = vxlan;
722 #endif
723 
724 	return vninode;
725 }
726 
vxlan_vni_free(struct vxlan_vni_node * vninode)727 static void vxlan_vni_free(struct vxlan_vni_node *vninode)
728 {
729 	free_percpu(vninode->stats);
730 	kfree(vninode);
731 }
732 
vxlan_vni_add(struct vxlan_dev * vxlan,struct vxlan_vni_group * vg,u32 vni,union vxlan_addr * group,struct netlink_ext_ack * extack)733 static int vxlan_vni_add(struct vxlan_dev *vxlan,
734 			 struct vxlan_vni_group *vg,
735 			 u32 vni, union vxlan_addr *group,
736 			 struct netlink_ext_ack *extack)
737 {
738 	struct vxlan_vni_node *vninode;
739 	__be32 v = cpu_to_be32(vni);
740 	bool changed = false;
741 	int err = 0;
742 
743 	if (vxlan_vnifilter_lookup(vxlan, v))
744 		return vxlan_vni_update(vxlan, vg, v, group, &changed, extack);
745 
746 	err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v);
747 	if (err) {
748 		NL_SET_ERR_MSG(extack, "VNI in use");
749 		return err;
750 	}
751 
752 	vninode = vxlan_vni_alloc(vxlan, v);
753 	if (!vninode)
754 		return -ENOMEM;
755 
756 	err = rhashtable_lookup_insert_fast(&vg->vni_hash,
757 					    &vninode->vnode,
758 					    vxlan_vni_rht_params);
759 	if (err) {
760 		vxlan_vni_free(vninode);
761 		return err;
762 	}
763 
764 	__vxlan_vni_add_list(vg, vninode);
765 
766 	if (vxlan->dev->flags & IFF_UP)
767 		vxlan_vs_add_del_vninode(vxlan, vninode, false);
768 
769 	err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed,
770 				     extack);
771 
772 	if (changed)
773 		vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
774 
775 	return err;
776 }
777 
vxlan_vni_node_rcu_free(struct rcu_head * rcu)778 static void vxlan_vni_node_rcu_free(struct rcu_head *rcu)
779 {
780 	struct vxlan_vni_node *v;
781 
782 	v = container_of(rcu, struct vxlan_vni_node, rcu);
783 	vxlan_vni_free(v);
784 }
785 
vxlan_vni_del(struct vxlan_dev * vxlan,struct vxlan_vni_group * vg,u32 vni,struct netlink_ext_ack * extack)786 static int vxlan_vni_del(struct vxlan_dev *vxlan,
787 			 struct vxlan_vni_group *vg,
788 			 u32 vni, struct netlink_ext_ack *extack)
789 {
790 	struct vxlan_vni_node *vninode;
791 	__be32 v = cpu_to_be32(vni);
792 	int err = 0;
793 
794 	vg = rtnl_dereference(vxlan->vnigrp);
795 
796 	vninode = rhashtable_lookup_fast(&vg->vni_hash, &v,
797 					 vxlan_vni_rht_params);
798 	if (!vninode) {
799 		err = -ENOENT;
800 		goto out;
801 	}
802 
803 	vxlan_vni_delete_group(vxlan, vninode);
804 
805 	err = rhashtable_remove_fast(&vg->vni_hash,
806 				     &vninode->vnode,
807 				     vxlan_vni_rht_params);
808 	if (err)
809 		goto out;
810 
811 	__vxlan_vni_del_list(vg, vninode);
812 
813 	vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL);
814 
815 	if (vxlan->dev->flags & IFF_UP)
816 		vxlan_vs_add_del_vninode(vxlan, vninode, true);
817 
818 	call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free);
819 
820 	return 0;
821 out:
822 	return err;
823 }
824 
vxlan_vni_add_del(struct vxlan_dev * vxlan,__u32 start_vni,__u32 end_vni,union vxlan_addr * group,int cmd,struct netlink_ext_ack * extack)825 static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni,
826 			     __u32 end_vni, union vxlan_addr *group,
827 			     int cmd, struct netlink_ext_ack *extack)
828 {
829 	struct vxlan_vni_group *vg;
830 	int v, err = 0;
831 
832 	vg = rtnl_dereference(vxlan->vnigrp);
833 
834 	for (v = start_vni; v <= end_vni; v++) {
835 		switch (cmd) {
836 		case RTM_NEWTUNNEL:
837 			err = vxlan_vni_add(vxlan, vg, v, group, extack);
838 			break;
839 		case RTM_DELTUNNEL:
840 			err = vxlan_vni_del(vxlan, vg, v, extack);
841 			break;
842 		default:
843 			err = -EOPNOTSUPP;
844 			break;
845 		}
846 		if (err)
847 			goto out;
848 	}
849 
850 	return 0;
851 out:
852 	return err;
853 }
854 
vxlan_process_vni_filter(struct vxlan_dev * vxlan,struct nlattr * nlvnifilter,int cmd,struct netlink_ext_ack * extack)855 static int vxlan_process_vni_filter(struct vxlan_dev *vxlan,
856 				    struct nlattr *nlvnifilter,
857 				    int cmd, struct netlink_ext_ack *extack)
858 {
859 	struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1];
860 	u32 vni_start = 0, vni_end = 0;
861 	union vxlan_addr group;
862 	int err;
863 
864 	err = nla_parse_nested(vattrs,
865 			       VXLAN_VNIFILTER_ENTRY_MAX,
866 			       nlvnifilter, vni_filter_entry_policy,
867 			       extack);
868 	if (err)
869 		return err;
870 
871 	if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) {
872 		vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]);
873 		vni_end = vni_start;
874 	}
875 
876 	if (vattrs[VXLAN_VNIFILTER_ENTRY_END])
877 		vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]);
878 
879 	if (!vni_start && !vni_end) {
880 		NL_SET_ERR_MSG_ATTR(extack, nlvnifilter,
881 				    "vni start nor end found in vni entry");
882 		return -EINVAL;
883 	}
884 
885 	if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) {
886 		group.sin.sin_addr.s_addr =
887 			nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]);
888 		group.sa.sa_family = AF_INET;
889 	} else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) {
890 		group.sin6.sin6_addr =
891 			nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]);
892 		group.sa.sa_family = AF_INET6;
893 	} else {
894 		memset(&group, 0, sizeof(group));
895 	}
896 
897 	if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) {
898 		NL_SET_ERR_MSG(extack,
899 			       "Local interface required for multicast remote group");
900 
901 		return -EINVAL;
902 	}
903 
904 	err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd,
905 				extack);
906 	if (err)
907 		return err;
908 
909 	return 0;
910 }
911 
vxlan_vnigroup_uninit(struct vxlan_dev * vxlan)912 void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan)
913 {
914 	struct vxlan_vni_node *v, *tmp;
915 	struct vxlan_vni_group *vg;
916 
917 	vg = rtnl_dereference(vxlan->vnigrp);
918 	list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
919 		rhashtable_remove_fast(&vg->vni_hash, &v->vnode,
920 				       vxlan_vni_rht_params);
921 		hlist_del_init_rcu(&v->hlist4.hlist);
922 #if IS_ENABLED(CONFIG_IPV6)
923 		hlist_del_init_rcu(&v->hlist6.hlist);
924 #endif
925 		__vxlan_vni_del_list(vg, v);
926 		vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL);
927 		call_rcu(&v->rcu, vxlan_vni_node_rcu_free);
928 	}
929 	rhashtable_destroy(&vg->vni_hash);
930 	kfree(vg);
931 }
932 
vxlan_vnigroup_init(struct vxlan_dev * vxlan)933 int vxlan_vnigroup_init(struct vxlan_dev *vxlan)
934 {
935 	struct vxlan_vni_group *vg;
936 	int ret;
937 
938 	vg = kzalloc(sizeof(*vg), GFP_KERNEL);
939 	if (!vg)
940 		return -ENOMEM;
941 	ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params);
942 	if (ret) {
943 		kfree(vg);
944 		return ret;
945 	}
946 	INIT_LIST_HEAD(&vg->vni_list);
947 	rcu_assign_pointer(vxlan->vnigrp, vg);
948 
949 	return 0;
950 }
951 
vxlan_vnifilter_process(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)952 static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh,
953 				   struct netlink_ext_ack *extack)
954 {
955 	struct net *net = sock_net(skb->sk);
956 	struct tunnel_msg *tmsg;
957 	struct vxlan_dev *vxlan;
958 	struct net_device *dev;
959 	struct nlattr *attr;
960 	int err, vnis = 0;
961 	int rem;
962 
963 	/* this should validate the header and check for remaining bytes */
964 	err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX,
965 			  vni_filter_policy, extack);
966 	if (err < 0)
967 		return err;
968 
969 	tmsg = nlmsg_data(nlh);
970 	dev = __dev_get_by_index(net, tmsg->ifindex);
971 	if (!dev)
972 		return -ENODEV;
973 
974 	if (!netif_is_vxlan(dev)) {
975 		NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device");
976 		return -EINVAL;
977 	}
978 
979 	vxlan = netdev_priv(dev);
980 
981 	if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
982 		return -EOPNOTSUPP;
983 
984 	nlmsg_for_each_attr(attr, nlh, sizeof(*tmsg), rem) {
985 		switch (nla_type(attr)) {
986 		case VXLAN_VNIFILTER_ENTRY:
987 			err = vxlan_process_vni_filter(vxlan, attr,
988 						       nlh->nlmsg_type, extack);
989 			break;
990 		default:
991 			continue;
992 		}
993 		vnis++;
994 		if (err)
995 			break;
996 	}
997 
998 	if (!vnis) {
999 		NL_SET_ERR_MSG_MOD(extack, "No vnis found to process");
1000 		err = -EINVAL;
1001 	}
1002 
1003 	return err;
1004 }
1005 
1006 static const struct rtnl_msg_handler vxlan_vnifilter_rtnl_msg_handlers[] = {
1007 	{THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL, vxlan_vnifilter_dump, 0},
1008 	{THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL, vxlan_vnifilter_process, NULL, 0},
1009 	{THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL, vxlan_vnifilter_process, NULL, 0},
1010 };
1011 
vxlan_vnifilter_init(void)1012 int vxlan_vnifilter_init(void)
1013 {
1014 	return rtnl_register_many(vxlan_vnifilter_rtnl_msg_handlers);
1015 }
1016 
vxlan_vnifilter_uninit(void)1017 void vxlan_vnifilter_uninit(void)
1018 {
1019 	rtnl_unregister_many(vxlan_vnifilter_rtnl_msg_handlers);
1020 }
1021