1 /*
2  *	Linux IPv6 multicast routing support for BSD pim6sd
3  *	Based on net/ipv4/ipmr.c.
4  *
5  *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6  *		LSIIT Laboratory, Strasbourg, France
7  *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8  *		6WIND, Paris, France
9  *	Copyright (C)2007,2008 USAGI/WIDE Project
10  *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11  *
12  *	This program is free software; you can redistribute it and/or
13  *	modify it under the terms of the GNU General Public License
14  *	as published by the Free Software Foundation; either version
15  *	2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
25 #include <linux/mm.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <linux/compat.h>
38 #include <net/protocol.h>
39 #include <linux/skbuff.h>
40 #include <net/sock.h>
41 #include <net/raw.h>
42 #include <linux/notifier.h>
43 #include <linux/if_arp.h>
44 #include <net/checksum.h>
45 #include <net/netlink.h>
46 #include <net/fib_rules.h>
47 
48 #include <net/ipv6.h>
49 #include <net/ip6_route.h>
50 #include <linux/mroute6.h>
51 #include <linux/pim.h>
52 #include <net/addrconf.h>
53 #include <linux/netfilter_ipv6.h>
54 #include <linux/export.h>
55 #include <net/ip6_checksum.h>
56 
57 struct mr6_table {
58 	struct list_head	list;
59 #ifdef CONFIG_NET_NS
60 	struct net		*net;
61 #endif
62 	u32			id;
63 	struct sock		*mroute6_sk;
64 	struct timer_list	ipmr_expire_timer;
65 	struct list_head	mfc6_unres_queue;
66 	struct list_head	mfc6_cache_array[MFC6_LINES];
67 	struct mif_device	vif6_table[MAXMIFS];
68 	int			maxvif;
69 	atomic_t		cache_resolve_queue_len;
70 	int			mroute_do_assert;
71 	int			mroute_do_pim;
72 #ifdef CONFIG_IPV6_PIMSM_V2
73 	int			mroute_reg_vif_num;
74 #endif
75 };
76 
77 struct ip6mr_rule {
78 	struct fib_rule		common;
79 };
80 
81 struct ip6mr_result {
82 	struct mr6_table	*mrt;
83 };
84 
85 /* Big lock, protecting vif table, mrt cache and mroute socket state.
86    Note that the changes are semaphored via rtnl_lock.
87  */
88 
89 static DEFINE_RWLOCK(mrt_lock);
90 
91 /*
92  *	Multicast router control variables
93  */
94 
95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
96 
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock);
99 
100 /* We return to original Alan's scheme. Hash table of resolved
101    entries is changed only in process context and protected
102    with weak lock mrt_lock. Queue of unresolved entries is protected
103    with strong spinlock mfc_unres_lock.
104 
105    In this case data path is free of exclusive locks at all.
106  */
107 
108 static struct kmem_cache *mrt_cachep __read_mostly;
109 
110 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111 static void ip6mr_free_table(struct mr6_table *mrt);
112 
113 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114 			  struct sk_buff *skb, struct mfc6_cache *cache);
115 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 			      mifi_t mifi, int assert);
117 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
118 			       struct mfc6_cache *c, struct rtmsg *rtm);
119 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
120 			       struct netlink_callback *cb);
121 static void mroute_clean_tables(struct mr6_table *mrt);
122 static void ipmr_expire_process(unsigned long arg);
123 
124 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
125 #define ip6mr_for_each_table(mrt, net) \
126 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
127 
ip6mr_get_table(struct net * net,u32 id)128 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
129 {
130 	struct mr6_table *mrt;
131 
132 	ip6mr_for_each_table(mrt, net) {
133 		if (mrt->id == id)
134 			return mrt;
135 	}
136 	return NULL;
137 }
138 
ip6mr_fib_lookup(struct net * net,struct flowi6 * flp6,struct mr6_table ** mrt)139 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
140 			    struct mr6_table **mrt)
141 {
142 	struct ip6mr_result res;
143 	struct fib_lookup_arg arg = { .result = &res, };
144 	int err;
145 
146 	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
147 			       flowi6_to_flowi(flp6), 0, &arg);
148 	if (err < 0)
149 		return err;
150 	*mrt = res.mrt;
151 	return 0;
152 }
153 
ip6mr_rule_action(struct fib_rule * rule,struct flowi * flp,int flags,struct fib_lookup_arg * arg)154 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
155 			     int flags, struct fib_lookup_arg *arg)
156 {
157 	struct ip6mr_result *res = arg->result;
158 	struct mr6_table *mrt;
159 
160 	switch (rule->action) {
161 	case FR_ACT_TO_TBL:
162 		break;
163 	case FR_ACT_UNREACHABLE:
164 		return -ENETUNREACH;
165 	case FR_ACT_PROHIBIT:
166 		return -EACCES;
167 	case FR_ACT_BLACKHOLE:
168 	default:
169 		return -EINVAL;
170 	}
171 
172 	mrt = ip6mr_get_table(rule->fr_net, rule->table);
173 	if (mrt == NULL)
174 		return -EAGAIN;
175 	res->mrt = mrt;
176 	return 0;
177 }
178 
ip6mr_rule_match(struct fib_rule * rule,struct flowi * flp,int flags)179 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
180 {
181 	return 1;
182 }
183 
184 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
185 	FRA_GENERIC_POLICY,
186 };
187 
ip6mr_rule_configure(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh,struct nlattr ** tb)188 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
189 				struct fib_rule_hdr *frh, struct nlattr **tb)
190 {
191 	return 0;
192 }
193 
ip6mr_rule_compare(struct fib_rule * rule,struct fib_rule_hdr * frh,struct nlattr ** tb)194 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
195 			      struct nlattr **tb)
196 {
197 	return 1;
198 }
199 
ip6mr_rule_fill(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh)200 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
201 			   struct fib_rule_hdr *frh)
202 {
203 	frh->dst_len = 0;
204 	frh->src_len = 0;
205 	frh->tos     = 0;
206 	return 0;
207 }
208 
209 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
210 	.family		= RTNL_FAMILY_IP6MR,
211 	.rule_size	= sizeof(struct ip6mr_rule),
212 	.addr_size	= sizeof(struct in6_addr),
213 	.action		= ip6mr_rule_action,
214 	.match		= ip6mr_rule_match,
215 	.configure	= ip6mr_rule_configure,
216 	.compare	= ip6mr_rule_compare,
217 	.default_pref	= fib_default_rule_pref,
218 	.fill		= ip6mr_rule_fill,
219 	.nlgroup	= RTNLGRP_IPV6_RULE,
220 	.policy		= ip6mr_rule_policy,
221 	.owner		= THIS_MODULE,
222 };
223 
ip6mr_rules_init(struct net * net)224 static int __net_init ip6mr_rules_init(struct net *net)
225 {
226 	struct fib_rules_ops *ops;
227 	struct mr6_table *mrt;
228 	int err;
229 
230 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
231 	if (IS_ERR(ops))
232 		return PTR_ERR(ops);
233 
234 	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
235 
236 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
237 	if (mrt == NULL) {
238 		err = -ENOMEM;
239 		goto err1;
240 	}
241 
242 	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
243 	if (err < 0)
244 		goto err2;
245 
246 	net->ipv6.mr6_rules_ops = ops;
247 	return 0;
248 
249 err2:
250 	kfree(mrt);
251 err1:
252 	fib_rules_unregister(ops);
253 	return err;
254 }
255 
ip6mr_rules_exit(struct net * net)256 static void __net_exit ip6mr_rules_exit(struct net *net)
257 {
258 	struct mr6_table *mrt, *next;
259 
260 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
261 		list_del(&mrt->list);
262 		ip6mr_free_table(mrt);
263 	}
264 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
265 }
266 #else
267 #define ip6mr_for_each_table(mrt, net) \
268 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
269 
ip6mr_get_table(struct net * net,u32 id)270 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
271 {
272 	return net->ipv6.mrt6;
273 }
274 
ip6mr_fib_lookup(struct net * net,struct flowi6 * flp6,struct mr6_table ** mrt)275 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
276 			    struct mr6_table **mrt)
277 {
278 	*mrt = net->ipv6.mrt6;
279 	return 0;
280 }
281 
ip6mr_rules_init(struct net * net)282 static int __net_init ip6mr_rules_init(struct net *net)
283 {
284 	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
285 	return net->ipv6.mrt6 ? 0 : -ENOMEM;
286 }
287 
ip6mr_rules_exit(struct net * net)288 static void __net_exit ip6mr_rules_exit(struct net *net)
289 {
290 	ip6mr_free_table(net->ipv6.mrt6);
291 }
292 #endif
293 
ip6mr_new_table(struct net * net,u32 id)294 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
295 {
296 	struct mr6_table *mrt;
297 	unsigned int i;
298 
299 	mrt = ip6mr_get_table(net, id);
300 	if (mrt != NULL)
301 		return mrt;
302 
303 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
304 	if (mrt == NULL)
305 		return NULL;
306 	mrt->id = id;
307 	write_pnet(&mrt->net, net);
308 
309 	/* Forwarding cache */
310 	for (i = 0; i < MFC6_LINES; i++)
311 		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
312 
313 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
314 
315 	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
316 		    (unsigned long)mrt);
317 
318 #ifdef CONFIG_IPV6_PIMSM_V2
319 	mrt->mroute_reg_vif_num = -1;
320 #endif
321 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
322 	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
323 #endif
324 	return mrt;
325 }
326 
ip6mr_free_table(struct mr6_table * mrt)327 static void ip6mr_free_table(struct mr6_table *mrt)
328 {
329 	del_timer(&mrt->ipmr_expire_timer);
330 	mroute_clean_tables(mrt);
331 	kfree(mrt);
332 }
333 
334 #ifdef CONFIG_PROC_FS
335 
336 struct ipmr_mfc_iter {
337 	struct seq_net_private p;
338 	struct mr6_table *mrt;
339 	struct list_head *cache;
340 	int ct;
341 };
342 
343 
ipmr_mfc_seq_idx(struct net * net,struct ipmr_mfc_iter * it,loff_t pos)344 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
345 					   struct ipmr_mfc_iter *it, loff_t pos)
346 {
347 	struct mr6_table *mrt = it->mrt;
348 	struct mfc6_cache *mfc;
349 
350 	read_lock(&mrt_lock);
351 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
352 		it->cache = &mrt->mfc6_cache_array[it->ct];
353 		list_for_each_entry(mfc, it->cache, list)
354 			if (pos-- == 0)
355 				return mfc;
356 	}
357 	read_unlock(&mrt_lock);
358 
359 	spin_lock_bh(&mfc_unres_lock);
360 	it->cache = &mrt->mfc6_unres_queue;
361 	list_for_each_entry(mfc, it->cache, list)
362 		if (pos-- == 0)
363 			return mfc;
364 	spin_unlock_bh(&mfc_unres_lock);
365 
366 	it->cache = NULL;
367 	return NULL;
368 }
369 
370 /*
371  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
372  */
373 
374 struct ipmr_vif_iter {
375 	struct seq_net_private p;
376 	struct mr6_table *mrt;
377 	int ct;
378 };
379 
ip6mr_vif_seq_idx(struct net * net,struct ipmr_vif_iter * iter,loff_t pos)380 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
381 					    struct ipmr_vif_iter *iter,
382 					    loff_t pos)
383 {
384 	struct mr6_table *mrt = iter->mrt;
385 
386 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
387 		if (!MIF_EXISTS(mrt, iter->ct))
388 			continue;
389 		if (pos-- == 0)
390 			return &mrt->vif6_table[iter->ct];
391 	}
392 	return NULL;
393 }
394 
ip6mr_vif_seq_start(struct seq_file * seq,loff_t * pos)395 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
396 	__acquires(mrt_lock)
397 {
398 	struct ipmr_vif_iter *iter = seq->private;
399 	struct net *net = seq_file_net(seq);
400 	struct mr6_table *mrt;
401 
402 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
403 	if (mrt == NULL)
404 		return ERR_PTR(-ENOENT);
405 
406 	iter->mrt = mrt;
407 
408 	read_lock(&mrt_lock);
409 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
410 		: SEQ_START_TOKEN;
411 }
412 
ip6mr_vif_seq_next(struct seq_file * seq,void * v,loff_t * pos)413 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
414 {
415 	struct ipmr_vif_iter *iter = seq->private;
416 	struct net *net = seq_file_net(seq);
417 	struct mr6_table *mrt = iter->mrt;
418 
419 	++*pos;
420 	if (v == SEQ_START_TOKEN)
421 		return ip6mr_vif_seq_idx(net, iter, 0);
422 
423 	while (++iter->ct < mrt->maxvif) {
424 		if (!MIF_EXISTS(mrt, iter->ct))
425 			continue;
426 		return &mrt->vif6_table[iter->ct];
427 	}
428 	return NULL;
429 }
430 
ip6mr_vif_seq_stop(struct seq_file * seq,void * v)431 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
432 	__releases(mrt_lock)
433 {
434 	read_unlock(&mrt_lock);
435 }
436 
ip6mr_vif_seq_show(struct seq_file * seq,void * v)437 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
438 {
439 	struct ipmr_vif_iter *iter = seq->private;
440 	struct mr6_table *mrt = iter->mrt;
441 
442 	if (v == SEQ_START_TOKEN) {
443 		seq_puts(seq,
444 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
445 	} else {
446 		const struct mif_device *vif = v;
447 		const char *name = vif->dev ? vif->dev->name : "none";
448 
449 		seq_printf(seq,
450 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
451 			   vif - mrt->vif6_table,
452 			   name, vif->bytes_in, vif->pkt_in,
453 			   vif->bytes_out, vif->pkt_out,
454 			   vif->flags);
455 	}
456 	return 0;
457 }
458 
459 static const struct seq_operations ip6mr_vif_seq_ops = {
460 	.start = ip6mr_vif_seq_start,
461 	.next  = ip6mr_vif_seq_next,
462 	.stop  = ip6mr_vif_seq_stop,
463 	.show  = ip6mr_vif_seq_show,
464 };
465 
ip6mr_vif_open(struct inode * inode,struct file * file)466 static int ip6mr_vif_open(struct inode *inode, struct file *file)
467 {
468 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
469 			    sizeof(struct ipmr_vif_iter));
470 }
471 
472 static const struct file_operations ip6mr_vif_fops = {
473 	.owner	 = THIS_MODULE,
474 	.open    = ip6mr_vif_open,
475 	.read    = seq_read,
476 	.llseek  = seq_lseek,
477 	.release = seq_release_net,
478 };
479 
ipmr_mfc_seq_start(struct seq_file * seq,loff_t * pos)480 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
481 {
482 	struct ipmr_mfc_iter *it = seq->private;
483 	struct net *net = seq_file_net(seq);
484 	struct mr6_table *mrt;
485 
486 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
487 	if (mrt == NULL)
488 		return ERR_PTR(-ENOENT);
489 
490 	it->mrt = mrt;
491 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
492 		: SEQ_START_TOKEN;
493 }
494 
ipmr_mfc_seq_next(struct seq_file * seq,void * v,loff_t * pos)495 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
496 {
497 	struct mfc6_cache *mfc = v;
498 	struct ipmr_mfc_iter *it = seq->private;
499 	struct net *net = seq_file_net(seq);
500 	struct mr6_table *mrt = it->mrt;
501 
502 	++*pos;
503 
504 	if (v == SEQ_START_TOKEN)
505 		return ipmr_mfc_seq_idx(net, seq->private, 0);
506 
507 	if (mfc->list.next != it->cache)
508 		return list_entry(mfc->list.next, struct mfc6_cache, list);
509 
510 	if (it->cache == &mrt->mfc6_unres_queue)
511 		goto end_of_list;
512 
513 	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
514 
515 	while (++it->ct < MFC6_LINES) {
516 		it->cache = &mrt->mfc6_cache_array[it->ct];
517 		if (list_empty(it->cache))
518 			continue;
519 		return list_first_entry(it->cache, struct mfc6_cache, list);
520 	}
521 
522 	/* exhausted cache_array, show unresolved */
523 	read_unlock(&mrt_lock);
524 	it->cache = &mrt->mfc6_unres_queue;
525 	it->ct = 0;
526 
527 	spin_lock_bh(&mfc_unres_lock);
528 	if (!list_empty(it->cache))
529 		return list_first_entry(it->cache, struct mfc6_cache, list);
530 
531  end_of_list:
532 	spin_unlock_bh(&mfc_unres_lock);
533 	it->cache = NULL;
534 
535 	return NULL;
536 }
537 
ipmr_mfc_seq_stop(struct seq_file * seq,void * v)538 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
539 {
540 	struct ipmr_mfc_iter *it = seq->private;
541 	struct mr6_table *mrt = it->mrt;
542 
543 	if (it->cache == &mrt->mfc6_unres_queue)
544 		spin_unlock_bh(&mfc_unres_lock);
545 	else if (it->cache == mrt->mfc6_cache_array)
546 		read_unlock(&mrt_lock);
547 }
548 
ipmr_mfc_seq_show(struct seq_file * seq,void * v)549 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
550 {
551 	int n;
552 
553 	if (v == SEQ_START_TOKEN) {
554 		seq_puts(seq,
555 			 "Group                            "
556 			 "Origin                           "
557 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
558 	} else {
559 		const struct mfc6_cache *mfc = v;
560 		const struct ipmr_mfc_iter *it = seq->private;
561 		struct mr6_table *mrt = it->mrt;
562 
563 		seq_printf(seq, "%pI6 %pI6 %-3hd",
564 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
565 			   mfc->mf6c_parent);
566 
567 		if (it->cache != &mrt->mfc6_unres_queue) {
568 			seq_printf(seq, " %8lu %8lu %8lu",
569 				   mfc->mfc_un.res.pkt,
570 				   mfc->mfc_un.res.bytes,
571 				   mfc->mfc_un.res.wrong_if);
572 			for (n = mfc->mfc_un.res.minvif;
573 			     n < mfc->mfc_un.res.maxvif; n++) {
574 				if (MIF_EXISTS(mrt, n) &&
575 				    mfc->mfc_un.res.ttls[n] < 255)
576 					seq_printf(seq,
577 						   " %2d:%-3d",
578 						   n, mfc->mfc_un.res.ttls[n]);
579 			}
580 		} else {
581 			/* unresolved mfc_caches don't contain
582 			 * pkt, bytes and wrong_if values
583 			 */
584 			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
585 		}
586 		seq_putc(seq, '\n');
587 	}
588 	return 0;
589 }
590 
591 static const struct seq_operations ipmr_mfc_seq_ops = {
592 	.start = ipmr_mfc_seq_start,
593 	.next  = ipmr_mfc_seq_next,
594 	.stop  = ipmr_mfc_seq_stop,
595 	.show  = ipmr_mfc_seq_show,
596 };
597 
ipmr_mfc_open(struct inode * inode,struct file * file)598 static int ipmr_mfc_open(struct inode *inode, struct file *file)
599 {
600 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
601 			    sizeof(struct ipmr_mfc_iter));
602 }
603 
604 static const struct file_operations ip6mr_mfc_fops = {
605 	.owner	 = THIS_MODULE,
606 	.open    = ipmr_mfc_open,
607 	.read    = seq_read,
608 	.llseek  = seq_lseek,
609 	.release = seq_release_net,
610 };
611 #endif
612 
613 #ifdef CONFIG_IPV6_PIMSM_V2
614 
pim6_rcv(struct sk_buff * skb)615 static int pim6_rcv(struct sk_buff *skb)
616 {
617 	struct pimreghdr *pim;
618 	struct ipv6hdr   *encap;
619 	struct net_device  *reg_dev = NULL;
620 	struct net *net = dev_net(skb->dev);
621 	struct mr6_table *mrt;
622 	struct flowi6 fl6 = {
623 		.flowi6_iif	= skb->dev->ifindex,
624 		.flowi6_mark	= skb->mark,
625 	};
626 	int reg_vif_num;
627 
628 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
629 		goto drop;
630 
631 	pim = (struct pimreghdr *)skb_transport_header(skb);
632 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
633 	    (pim->flags & PIM_NULL_REGISTER) ||
634 	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
635 			     sizeof(*pim), IPPROTO_PIM,
636 			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
637 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
638 		goto drop;
639 
640 	/* check if the inner packet is destined to mcast group */
641 	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
642 				   sizeof(*pim));
643 
644 	if (!ipv6_addr_is_multicast(&encap->daddr) ||
645 	    encap->payload_len == 0 ||
646 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
647 		goto drop;
648 
649 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
650 		goto drop;
651 	reg_vif_num = mrt->mroute_reg_vif_num;
652 
653 	read_lock(&mrt_lock);
654 	if (reg_vif_num >= 0)
655 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
656 	if (reg_dev)
657 		dev_hold(reg_dev);
658 	read_unlock(&mrt_lock);
659 
660 	if (reg_dev == NULL)
661 		goto drop;
662 
663 	skb->mac_header = skb->network_header;
664 	skb_pull(skb, (u8 *)encap - skb->data);
665 	skb_reset_network_header(skb);
666 	skb->protocol = htons(ETH_P_IPV6);
667 	skb->ip_summed = CHECKSUM_NONE;
668 	skb->pkt_type = PACKET_HOST;
669 
670 	skb_tunnel_rx(skb, reg_dev);
671 
672 	netif_rx(skb);
673 
674 	dev_put(reg_dev);
675 	return 0;
676  drop:
677 	kfree_skb(skb);
678 	return 0;
679 }
680 
681 static const struct inet6_protocol pim6_protocol = {
682 	.handler	=	pim6_rcv,
683 };
684 
685 /* Service routines creating virtual interfaces: PIMREG */
686 
reg_vif_xmit(struct sk_buff * skb,struct net_device * dev)687 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
688 				      struct net_device *dev)
689 {
690 	struct net *net = dev_net(dev);
691 	struct mr6_table *mrt;
692 	struct flowi6 fl6 = {
693 		.flowi6_oif	= dev->ifindex,
694 		.flowi6_iif	= skb->skb_iif,
695 		.flowi6_mark	= skb->mark,
696 	};
697 	int err;
698 
699 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
700 	if (err < 0) {
701 		kfree_skb(skb);
702 		return err;
703 	}
704 
705 	read_lock(&mrt_lock);
706 	dev->stats.tx_bytes += skb->len;
707 	dev->stats.tx_packets++;
708 	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
709 	read_unlock(&mrt_lock);
710 	kfree_skb(skb);
711 	return NETDEV_TX_OK;
712 }
713 
714 static const struct net_device_ops reg_vif_netdev_ops = {
715 	.ndo_start_xmit	= reg_vif_xmit,
716 };
717 
reg_vif_setup(struct net_device * dev)718 static void reg_vif_setup(struct net_device *dev)
719 {
720 	dev->type		= ARPHRD_PIMREG;
721 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
722 	dev->flags		= IFF_NOARP;
723 	dev->netdev_ops		= &reg_vif_netdev_ops;
724 	dev->destructor		= free_netdev;
725 	dev->features		|= NETIF_F_NETNS_LOCAL;
726 }
727 
ip6mr_reg_vif(struct net * net,struct mr6_table * mrt)728 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
729 {
730 	struct net_device *dev;
731 	char name[IFNAMSIZ];
732 
733 	if (mrt->id == RT6_TABLE_DFLT)
734 		sprintf(name, "pim6reg");
735 	else
736 		sprintf(name, "pim6reg%u", mrt->id);
737 
738 	dev = alloc_netdev(0, name, reg_vif_setup);
739 	if (dev == NULL)
740 		return NULL;
741 
742 	dev_net_set(dev, net);
743 
744 	if (register_netdevice(dev)) {
745 		free_netdev(dev);
746 		return NULL;
747 	}
748 	dev->iflink = 0;
749 
750 	if (dev_open(dev))
751 		goto failure;
752 
753 	dev_hold(dev);
754 	return dev;
755 
756 failure:
757 	/* allow the register to be completed before unregistering. */
758 	rtnl_unlock();
759 	rtnl_lock();
760 
761 	unregister_netdevice(dev);
762 	return NULL;
763 }
764 #endif
765 
766 /*
767  *	Delete a VIF entry
768  */
769 
mif6_delete(struct mr6_table * mrt,int vifi,struct list_head * head)770 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
771 {
772 	struct mif_device *v;
773 	struct net_device *dev;
774 	struct inet6_dev *in6_dev;
775 
776 	if (vifi < 0 || vifi >= mrt->maxvif)
777 		return -EADDRNOTAVAIL;
778 
779 	v = &mrt->vif6_table[vifi];
780 
781 	write_lock_bh(&mrt_lock);
782 	dev = v->dev;
783 	v->dev = NULL;
784 
785 	if (!dev) {
786 		write_unlock_bh(&mrt_lock);
787 		return -EADDRNOTAVAIL;
788 	}
789 
790 #ifdef CONFIG_IPV6_PIMSM_V2
791 	if (vifi == mrt->mroute_reg_vif_num)
792 		mrt->mroute_reg_vif_num = -1;
793 #endif
794 
795 	if (vifi + 1 == mrt->maxvif) {
796 		int tmp;
797 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
798 			if (MIF_EXISTS(mrt, tmp))
799 				break;
800 		}
801 		mrt->maxvif = tmp + 1;
802 	}
803 
804 	write_unlock_bh(&mrt_lock);
805 
806 	dev_set_allmulti(dev, -1);
807 
808 	in6_dev = __in6_dev_get(dev);
809 	if (in6_dev)
810 		in6_dev->cnf.mc_forwarding--;
811 
812 	if (v->flags & MIFF_REGISTER)
813 		unregister_netdevice_queue(dev, head);
814 
815 	dev_put(dev);
816 	return 0;
817 }
818 
ip6mr_cache_free(struct mfc6_cache * c)819 static inline void ip6mr_cache_free(struct mfc6_cache *c)
820 {
821 	kmem_cache_free(mrt_cachep, c);
822 }
823 
824 /* Destroy an unresolved cache entry, killing queued skbs
825    and reporting error to netlink readers.
826  */
827 
ip6mr_destroy_unres(struct mr6_table * mrt,struct mfc6_cache * c)828 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
829 {
830 	struct net *net = read_pnet(&mrt->net);
831 	struct sk_buff *skb;
832 
833 	atomic_dec(&mrt->cache_resolve_queue_len);
834 
835 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
836 		if (ipv6_hdr(skb)->version == 0) {
837 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
838 			nlh->nlmsg_type = NLMSG_ERROR;
839 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
840 			skb_trim(skb, nlh->nlmsg_len);
841 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
842 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
843 		} else
844 			kfree_skb(skb);
845 	}
846 
847 	ip6mr_cache_free(c);
848 }
849 
850 
851 /* Timer process for all the unresolved queue. */
852 
ipmr_do_expire_process(struct mr6_table * mrt)853 static void ipmr_do_expire_process(struct mr6_table *mrt)
854 {
855 	unsigned long now = jiffies;
856 	unsigned long expires = 10 * HZ;
857 	struct mfc6_cache *c, *next;
858 
859 	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
860 		if (time_after(c->mfc_un.unres.expires, now)) {
861 			/* not yet... */
862 			unsigned long interval = c->mfc_un.unres.expires - now;
863 			if (interval < expires)
864 				expires = interval;
865 			continue;
866 		}
867 
868 		list_del(&c->list);
869 		ip6mr_destroy_unres(mrt, c);
870 	}
871 
872 	if (!list_empty(&mrt->mfc6_unres_queue))
873 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
874 }
875 
ipmr_expire_process(unsigned long arg)876 static void ipmr_expire_process(unsigned long arg)
877 {
878 	struct mr6_table *mrt = (struct mr6_table *)arg;
879 
880 	if (!spin_trylock(&mfc_unres_lock)) {
881 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
882 		return;
883 	}
884 
885 	if (!list_empty(&mrt->mfc6_unres_queue))
886 		ipmr_do_expire_process(mrt);
887 
888 	spin_unlock(&mfc_unres_lock);
889 }
890 
891 /* Fill oifs list. It is called under write locked mrt_lock. */
892 
ip6mr_update_thresholds(struct mr6_table * mrt,struct mfc6_cache * cache,unsigned char * ttls)893 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
894 				    unsigned char *ttls)
895 {
896 	int vifi;
897 
898 	cache->mfc_un.res.minvif = MAXMIFS;
899 	cache->mfc_un.res.maxvif = 0;
900 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
901 
902 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
903 		if (MIF_EXISTS(mrt, vifi) &&
904 		    ttls[vifi] && ttls[vifi] < 255) {
905 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
906 			if (cache->mfc_un.res.minvif > vifi)
907 				cache->mfc_un.res.minvif = vifi;
908 			if (cache->mfc_un.res.maxvif <= vifi)
909 				cache->mfc_un.res.maxvif = vifi + 1;
910 		}
911 	}
912 }
913 
mif6_add(struct net * net,struct mr6_table * mrt,struct mif6ctl * vifc,int mrtsock)914 static int mif6_add(struct net *net, struct mr6_table *mrt,
915 		    struct mif6ctl *vifc, int mrtsock)
916 {
917 	int vifi = vifc->mif6c_mifi;
918 	struct mif_device *v = &mrt->vif6_table[vifi];
919 	struct net_device *dev;
920 	struct inet6_dev *in6_dev;
921 	int err;
922 
923 	/* Is vif busy ? */
924 	if (MIF_EXISTS(mrt, vifi))
925 		return -EADDRINUSE;
926 
927 	switch (vifc->mif6c_flags) {
928 #ifdef CONFIG_IPV6_PIMSM_V2
929 	case MIFF_REGISTER:
930 		/*
931 		 * Special Purpose VIF in PIM
932 		 * All the packets will be sent to the daemon
933 		 */
934 		if (mrt->mroute_reg_vif_num >= 0)
935 			return -EADDRINUSE;
936 		dev = ip6mr_reg_vif(net, mrt);
937 		if (!dev)
938 			return -ENOBUFS;
939 		err = dev_set_allmulti(dev, 1);
940 		if (err) {
941 			unregister_netdevice(dev);
942 			dev_put(dev);
943 			return err;
944 		}
945 		break;
946 #endif
947 	case 0:
948 		dev = dev_get_by_index(net, vifc->mif6c_pifi);
949 		if (!dev)
950 			return -EADDRNOTAVAIL;
951 		err = dev_set_allmulti(dev, 1);
952 		if (err) {
953 			dev_put(dev);
954 			return err;
955 		}
956 		break;
957 	default:
958 		return -EINVAL;
959 	}
960 
961 	in6_dev = __in6_dev_get(dev);
962 	if (in6_dev)
963 		in6_dev->cnf.mc_forwarding++;
964 
965 	/*
966 	 *	Fill in the VIF structures
967 	 */
968 	v->rate_limit = vifc->vifc_rate_limit;
969 	v->flags = vifc->mif6c_flags;
970 	if (!mrtsock)
971 		v->flags |= VIFF_STATIC;
972 	v->threshold = vifc->vifc_threshold;
973 	v->bytes_in = 0;
974 	v->bytes_out = 0;
975 	v->pkt_in = 0;
976 	v->pkt_out = 0;
977 	v->link = dev->ifindex;
978 	if (v->flags & MIFF_REGISTER)
979 		v->link = dev->iflink;
980 
981 	/* And finish update writing critical data */
982 	write_lock_bh(&mrt_lock);
983 	v->dev = dev;
984 #ifdef CONFIG_IPV6_PIMSM_V2
985 	if (v->flags & MIFF_REGISTER)
986 		mrt->mroute_reg_vif_num = vifi;
987 #endif
988 	if (vifi + 1 > mrt->maxvif)
989 		mrt->maxvif = vifi + 1;
990 	write_unlock_bh(&mrt_lock);
991 	return 0;
992 }
993 
ip6mr_cache_find(struct mr6_table * mrt,const struct in6_addr * origin,const struct in6_addr * mcastgrp)994 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
995 					   const struct in6_addr *origin,
996 					   const struct in6_addr *mcastgrp)
997 {
998 	int line = MFC6_HASH(mcastgrp, origin);
999 	struct mfc6_cache *c;
1000 
1001 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1002 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1003 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1004 			return c;
1005 	}
1006 	return NULL;
1007 }
1008 
1009 /*
1010  *	Allocate a multicast cache entry
1011  */
ip6mr_cache_alloc(void)1012 static struct mfc6_cache *ip6mr_cache_alloc(void)
1013 {
1014 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1015 	if (c == NULL)
1016 		return NULL;
1017 	c->mfc_un.res.minvif = MAXMIFS;
1018 	return c;
1019 }
1020 
ip6mr_cache_alloc_unres(void)1021 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1022 {
1023 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1024 	if (c == NULL)
1025 		return NULL;
1026 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
1027 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
1028 	return c;
1029 }
1030 
1031 /*
1032  *	A cache entry has gone into a resolved state from queued
1033  */
1034 
ip6mr_cache_resolve(struct net * net,struct mr6_table * mrt,struct mfc6_cache * uc,struct mfc6_cache * c)1035 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1036 				struct mfc6_cache *uc, struct mfc6_cache *c)
1037 {
1038 	struct sk_buff *skb;
1039 
1040 	/*
1041 	 *	Play the pending entries through our router
1042 	 */
1043 
1044 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1045 		if (ipv6_hdr(skb)->version == 0) {
1046 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1047 
1048 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1049 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1050 			} else {
1051 				nlh->nlmsg_type = NLMSG_ERROR;
1052 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1053 				skb_trim(skb, nlh->nlmsg_len);
1054 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1055 			}
1056 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1057 		} else
1058 			ip6_mr_forward(net, mrt, skb, c);
1059 	}
1060 }
1061 
1062 /*
1063  *	Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1064  *	expects the following bizarre scheme.
1065  *
1066  *	Called under mrt_lock.
1067  */
1068 
ip6mr_cache_report(struct mr6_table * mrt,struct sk_buff * pkt,mifi_t mifi,int assert)1069 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1070 			      mifi_t mifi, int assert)
1071 {
1072 	struct sk_buff *skb;
1073 	struct mrt6msg *msg;
1074 	int ret;
1075 
1076 #ifdef CONFIG_IPV6_PIMSM_V2
1077 	if (assert == MRT6MSG_WHOLEPKT)
1078 		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1079 						+sizeof(*msg));
1080 	else
1081 #endif
1082 		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1083 
1084 	if (!skb)
1085 		return -ENOBUFS;
1086 
1087 	/* I suppose that internal messages
1088 	 * do not require checksums */
1089 
1090 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1091 
1092 #ifdef CONFIG_IPV6_PIMSM_V2
1093 	if (assert == MRT6MSG_WHOLEPKT) {
1094 		/* Ugly, but we have no choice with this interface.
1095 		   Duplicate old header, fix length etc.
1096 		   And all this only to mangle msg->im6_msgtype and
1097 		   to set msg->im6_mbz to "mbz" :-)
1098 		 */
1099 		skb_push(skb, -skb_network_offset(pkt));
1100 
1101 		skb_push(skb, sizeof(*msg));
1102 		skb_reset_transport_header(skb);
1103 		msg = (struct mrt6msg *)skb_transport_header(skb);
1104 		msg->im6_mbz = 0;
1105 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1106 		msg->im6_mif = mrt->mroute_reg_vif_num;
1107 		msg->im6_pad = 0;
1108 		msg->im6_src = ipv6_hdr(pkt)->saddr;
1109 		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1110 
1111 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1112 	} else
1113 #endif
1114 	{
1115 	/*
1116 	 *	Copy the IP header
1117 	 */
1118 
1119 	skb_put(skb, sizeof(struct ipv6hdr));
1120 	skb_reset_network_header(skb);
1121 	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1122 
1123 	/*
1124 	 *	Add our header
1125 	 */
1126 	skb_put(skb, sizeof(*msg));
1127 	skb_reset_transport_header(skb);
1128 	msg = (struct mrt6msg *)skb_transport_header(skb);
1129 
1130 	msg->im6_mbz = 0;
1131 	msg->im6_msgtype = assert;
1132 	msg->im6_mif = mifi;
1133 	msg->im6_pad = 0;
1134 	msg->im6_src = ipv6_hdr(pkt)->saddr;
1135 	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1136 
1137 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1138 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1139 	}
1140 
1141 	if (mrt->mroute6_sk == NULL) {
1142 		kfree_skb(skb);
1143 		return -EINVAL;
1144 	}
1145 
1146 	/*
1147 	 *	Deliver to user space multicast routing algorithms
1148 	 */
1149 	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1150 	if (ret < 0) {
1151 		if (net_ratelimit())
1152 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1153 		kfree_skb(skb);
1154 	}
1155 
1156 	return ret;
1157 }
1158 
1159 /*
1160  *	Queue a packet for resolution. It gets locked cache entry!
1161  */
1162 
1163 static int
ip6mr_cache_unresolved(struct mr6_table * mrt,mifi_t mifi,struct sk_buff * skb)1164 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1165 {
1166 	bool found = false;
1167 	int err;
1168 	struct mfc6_cache *c;
1169 
1170 	spin_lock_bh(&mfc_unres_lock);
1171 	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1172 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1173 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1174 			found = true;
1175 			break;
1176 		}
1177 	}
1178 
1179 	if (!found) {
1180 		/*
1181 		 *	Create a new entry if allowable
1182 		 */
1183 
1184 		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1185 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
1186 			spin_unlock_bh(&mfc_unres_lock);
1187 
1188 			kfree_skb(skb);
1189 			return -ENOBUFS;
1190 		}
1191 
1192 		/*
1193 		 *	Fill in the new cache entry
1194 		 */
1195 		c->mf6c_parent = -1;
1196 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1197 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1198 
1199 		/*
1200 		 *	Reflect first query at pim6sd
1201 		 */
1202 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1203 		if (err < 0) {
1204 			/* If the report failed throw the cache entry
1205 			   out - Brad Parker
1206 			 */
1207 			spin_unlock_bh(&mfc_unres_lock);
1208 
1209 			ip6mr_cache_free(c);
1210 			kfree_skb(skb);
1211 			return err;
1212 		}
1213 
1214 		atomic_inc(&mrt->cache_resolve_queue_len);
1215 		list_add(&c->list, &mrt->mfc6_unres_queue);
1216 
1217 		ipmr_do_expire_process(mrt);
1218 	}
1219 
1220 	/*
1221 	 *	See if we can append the packet
1222 	 */
1223 	if (c->mfc_un.unres.unresolved.qlen > 3) {
1224 		kfree_skb(skb);
1225 		err = -ENOBUFS;
1226 	} else {
1227 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1228 		err = 0;
1229 	}
1230 
1231 	spin_unlock_bh(&mfc_unres_lock);
1232 	return err;
1233 }
1234 
1235 /*
1236  *	MFC6 cache manipulation by user space
1237  */
1238 
ip6mr_mfc_delete(struct mr6_table * mrt,struct mf6cctl * mfc)1239 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1240 {
1241 	int line;
1242 	struct mfc6_cache *c, *next;
1243 
1244 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1245 
1246 	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1247 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1248 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1249 			write_lock_bh(&mrt_lock);
1250 			list_del(&c->list);
1251 			write_unlock_bh(&mrt_lock);
1252 
1253 			ip6mr_cache_free(c);
1254 			return 0;
1255 		}
1256 	}
1257 	return -ENOENT;
1258 }
1259 
ip6mr_device_event(struct notifier_block * this,unsigned long event,void * ptr)1260 static int ip6mr_device_event(struct notifier_block *this,
1261 			      unsigned long event, void *ptr)
1262 {
1263 	struct net_device *dev = ptr;
1264 	struct net *net = dev_net(dev);
1265 	struct mr6_table *mrt;
1266 	struct mif_device *v;
1267 	int ct;
1268 	LIST_HEAD(list);
1269 
1270 	if (event != NETDEV_UNREGISTER)
1271 		return NOTIFY_DONE;
1272 
1273 	ip6mr_for_each_table(mrt, net) {
1274 		v = &mrt->vif6_table[0];
1275 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1276 			if (v->dev == dev)
1277 				mif6_delete(mrt, ct, &list);
1278 		}
1279 	}
1280 	unregister_netdevice_many(&list);
1281 
1282 	return NOTIFY_DONE;
1283 }
1284 
1285 static struct notifier_block ip6_mr_notifier = {
1286 	.notifier_call = ip6mr_device_event
1287 };
1288 
1289 /*
1290  *	Setup for IP multicast routing
1291  */
1292 
ip6mr_net_init(struct net * net)1293 static int __net_init ip6mr_net_init(struct net *net)
1294 {
1295 	int err;
1296 
1297 	err = ip6mr_rules_init(net);
1298 	if (err < 0)
1299 		goto fail;
1300 
1301 #ifdef CONFIG_PROC_FS
1302 	err = -ENOMEM;
1303 	if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1304 		goto proc_vif_fail;
1305 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1306 		goto proc_cache_fail;
1307 #endif
1308 
1309 	return 0;
1310 
1311 #ifdef CONFIG_PROC_FS
1312 proc_cache_fail:
1313 	proc_net_remove(net, "ip6_mr_vif");
1314 proc_vif_fail:
1315 	ip6mr_rules_exit(net);
1316 #endif
1317 fail:
1318 	return err;
1319 }
1320 
ip6mr_net_exit(struct net * net)1321 static void __net_exit ip6mr_net_exit(struct net *net)
1322 {
1323 #ifdef CONFIG_PROC_FS
1324 	proc_net_remove(net, "ip6_mr_cache");
1325 	proc_net_remove(net, "ip6_mr_vif");
1326 #endif
1327 	ip6mr_rules_exit(net);
1328 }
1329 
1330 static struct pernet_operations ip6mr_net_ops = {
1331 	.init = ip6mr_net_init,
1332 	.exit = ip6mr_net_exit,
1333 };
1334 
ip6_mr_init(void)1335 int __init ip6_mr_init(void)
1336 {
1337 	int err;
1338 
1339 	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1340 				       sizeof(struct mfc6_cache),
1341 				       0, SLAB_HWCACHE_ALIGN,
1342 				       NULL);
1343 	if (!mrt_cachep)
1344 		return -ENOMEM;
1345 
1346 	err = register_pernet_subsys(&ip6mr_net_ops);
1347 	if (err)
1348 		goto reg_pernet_fail;
1349 
1350 	err = register_netdevice_notifier(&ip6_mr_notifier);
1351 	if (err)
1352 		goto reg_notif_fail;
1353 #ifdef CONFIG_IPV6_PIMSM_V2
1354 	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1355 		printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1356 		err = -EAGAIN;
1357 		goto add_proto_fail;
1358 	}
1359 #endif
1360 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1361 		      ip6mr_rtm_dumproute, NULL);
1362 	return 0;
1363 #ifdef CONFIG_IPV6_PIMSM_V2
1364 add_proto_fail:
1365 	unregister_netdevice_notifier(&ip6_mr_notifier);
1366 #endif
1367 reg_notif_fail:
1368 	unregister_pernet_subsys(&ip6mr_net_ops);
1369 reg_pernet_fail:
1370 	kmem_cache_destroy(mrt_cachep);
1371 	return err;
1372 }
1373 
ip6_mr_cleanup(void)1374 void ip6_mr_cleanup(void)
1375 {
1376 	unregister_netdevice_notifier(&ip6_mr_notifier);
1377 	unregister_pernet_subsys(&ip6mr_net_ops);
1378 	kmem_cache_destroy(mrt_cachep);
1379 }
1380 
ip6mr_mfc_add(struct net * net,struct mr6_table * mrt,struct mf6cctl * mfc,int mrtsock)1381 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1382 			 struct mf6cctl *mfc, int mrtsock)
1383 {
1384 	bool found = false;
1385 	int line;
1386 	struct mfc6_cache *uc, *c;
1387 	unsigned char ttls[MAXMIFS];
1388 	int i;
1389 
1390 	if (mfc->mf6cc_parent >= MAXMIFS)
1391 		return -ENFILE;
1392 
1393 	memset(ttls, 255, MAXMIFS);
1394 	for (i = 0; i < MAXMIFS; i++) {
1395 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1396 			ttls[i] = 1;
1397 
1398 	}
1399 
1400 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1401 
1402 	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1403 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1404 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1405 			found = true;
1406 			break;
1407 		}
1408 	}
1409 
1410 	if (found) {
1411 		write_lock_bh(&mrt_lock);
1412 		c->mf6c_parent = mfc->mf6cc_parent;
1413 		ip6mr_update_thresholds(mrt, c, ttls);
1414 		if (!mrtsock)
1415 			c->mfc_flags |= MFC_STATIC;
1416 		write_unlock_bh(&mrt_lock);
1417 		return 0;
1418 	}
1419 
1420 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1421 		return -EINVAL;
1422 
1423 	c = ip6mr_cache_alloc();
1424 	if (c == NULL)
1425 		return -ENOMEM;
1426 
1427 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1428 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1429 	c->mf6c_parent = mfc->mf6cc_parent;
1430 	ip6mr_update_thresholds(mrt, c, ttls);
1431 	if (!mrtsock)
1432 		c->mfc_flags |= MFC_STATIC;
1433 
1434 	write_lock_bh(&mrt_lock);
1435 	list_add(&c->list, &mrt->mfc6_cache_array[line]);
1436 	write_unlock_bh(&mrt_lock);
1437 
1438 	/*
1439 	 *	Check to see if we resolved a queued list. If so we
1440 	 *	need to send on the frames and tidy up.
1441 	 */
1442 	found = false;
1443 	spin_lock_bh(&mfc_unres_lock);
1444 	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1445 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1446 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1447 			list_del(&uc->list);
1448 			atomic_dec(&mrt->cache_resolve_queue_len);
1449 			found = true;
1450 			break;
1451 		}
1452 	}
1453 	if (list_empty(&mrt->mfc6_unres_queue))
1454 		del_timer(&mrt->ipmr_expire_timer);
1455 	spin_unlock_bh(&mfc_unres_lock);
1456 
1457 	if (found) {
1458 		ip6mr_cache_resolve(net, mrt, uc, c);
1459 		ip6mr_cache_free(uc);
1460 	}
1461 	return 0;
1462 }
1463 
1464 /*
1465  *	Close the multicast socket, and clear the vif tables etc
1466  */
1467 
mroute_clean_tables(struct mr6_table * mrt)1468 static void mroute_clean_tables(struct mr6_table *mrt)
1469 {
1470 	int i;
1471 	LIST_HEAD(list);
1472 	struct mfc6_cache *c, *next;
1473 
1474 	/*
1475 	 *	Shut down all active vif entries
1476 	 */
1477 	for (i = 0; i < mrt->maxvif; i++) {
1478 		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1479 			mif6_delete(mrt, i, &list);
1480 	}
1481 	unregister_netdevice_many(&list);
1482 
1483 	/*
1484 	 *	Wipe the cache
1485 	 */
1486 	for (i = 0; i < MFC6_LINES; i++) {
1487 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1488 			if (c->mfc_flags & MFC_STATIC)
1489 				continue;
1490 			write_lock_bh(&mrt_lock);
1491 			list_del(&c->list);
1492 			write_unlock_bh(&mrt_lock);
1493 
1494 			ip6mr_cache_free(c);
1495 		}
1496 	}
1497 
1498 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1499 		spin_lock_bh(&mfc_unres_lock);
1500 		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1501 			list_del(&c->list);
1502 			ip6mr_destroy_unres(mrt, c);
1503 		}
1504 		spin_unlock_bh(&mfc_unres_lock);
1505 	}
1506 }
1507 
ip6mr_sk_init(struct mr6_table * mrt,struct sock * sk)1508 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1509 {
1510 	int err = 0;
1511 	struct net *net = sock_net(sk);
1512 
1513 	rtnl_lock();
1514 	write_lock_bh(&mrt_lock);
1515 	if (likely(mrt->mroute6_sk == NULL)) {
1516 		mrt->mroute6_sk = sk;
1517 		net->ipv6.devconf_all->mc_forwarding++;
1518 	}
1519 	else
1520 		err = -EADDRINUSE;
1521 	write_unlock_bh(&mrt_lock);
1522 
1523 	rtnl_unlock();
1524 
1525 	return err;
1526 }
1527 
ip6mr_sk_done(struct sock * sk)1528 int ip6mr_sk_done(struct sock *sk)
1529 {
1530 	int err = -EACCES;
1531 	struct net *net = sock_net(sk);
1532 	struct mr6_table *mrt;
1533 
1534 	rtnl_lock();
1535 	ip6mr_for_each_table(mrt, net) {
1536 		if (sk == mrt->mroute6_sk) {
1537 			write_lock_bh(&mrt_lock);
1538 			mrt->mroute6_sk = NULL;
1539 			net->ipv6.devconf_all->mc_forwarding--;
1540 			write_unlock_bh(&mrt_lock);
1541 
1542 			mroute_clean_tables(mrt);
1543 			err = 0;
1544 			break;
1545 		}
1546 	}
1547 	rtnl_unlock();
1548 
1549 	return err;
1550 }
1551 
mroute6_socket(struct net * net,struct sk_buff * skb)1552 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1553 {
1554 	struct mr6_table *mrt;
1555 	struct flowi6 fl6 = {
1556 		.flowi6_iif	= skb->skb_iif,
1557 		.flowi6_oif	= skb->dev->ifindex,
1558 		.flowi6_mark	= skb->mark,
1559 	};
1560 
1561 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1562 		return NULL;
1563 
1564 	return mrt->mroute6_sk;
1565 }
1566 
1567 /*
1568  *	Socket options and virtual interface manipulation. The whole
1569  *	virtual interface system is a complete heap, but unfortunately
1570  *	that's how BSD mrouted happens to think. Maybe one day with a proper
1571  *	MOSPF/PIM router set up we can clean this up.
1572  */
1573 
ip6_mroute_setsockopt(struct sock * sk,int optname,char __user * optval,unsigned int optlen)1574 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1575 {
1576 	int ret;
1577 	struct mif6ctl vif;
1578 	struct mf6cctl mfc;
1579 	mifi_t mifi;
1580 	struct net *net = sock_net(sk);
1581 	struct mr6_table *mrt;
1582 
1583 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1584 	if (mrt == NULL)
1585 		return -ENOENT;
1586 
1587 	if (optname != MRT6_INIT) {
1588 		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1589 			return -EACCES;
1590 	}
1591 
1592 	switch (optname) {
1593 	case MRT6_INIT:
1594 		if (sk->sk_type != SOCK_RAW ||
1595 		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1596 			return -EOPNOTSUPP;
1597 		if (optlen < sizeof(int))
1598 			return -EINVAL;
1599 
1600 		return ip6mr_sk_init(mrt, sk);
1601 
1602 	case MRT6_DONE:
1603 		return ip6mr_sk_done(sk);
1604 
1605 	case MRT6_ADD_MIF:
1606 		if (optlen < sizeof(vif))
1607 			return -EINVAL;
1608 		if (copy_from_user(&vif, optval, sizeof(vif)))
1609 			return -EFAULT;
1610 		if (vif.mif6c_mifi >= MAXMIFS)
1611 			return -ENFILE;
1612 		rtnl_lock();
1613 		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1614 		rtnl_unlock();
1615 		return ret;
1616 
1617 	case MRT6_DEL_MIF:
1618 		if (optlen < sizeof(mifi_t))
1619 			return -EINVAL;
1620 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1621 			return -EFAULT;
1622 		rtnl_lock();
1623 		ret = mif6_delete(mrt, mifi, NULL);
1624 		rtnl_unlock();
1625 		return ret;
1626 
1627 	/*
1628 	 *	Manipulate the forwarding caches. These live
1629 	 *	in a sort of kernel/user symbiosis.
1630 	 */
1631 	case MRT6_ADD_MFC:
1632 	case MRT6_DEL_MFC:
1633 		if (optlen < sizeof(mfc))
1634 			return -EINVAL;
1635 		if (copy_from_user(&mfc, optval, sizeof(mfc)))
1636 			return -EFAULT;
1637 		rtnl_lock();
1638 		if (optname == MRT6_DEL_MFC)
1639 			ret = ip6mr_mfc_delete(mrt, &mfc);
1640 		else
1641 			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1642 		rtnl_unlock();
1643 		return ret;
1644 
1645 	/*
1646 	 *	Control PIM assert (to activate pim will activate assert)
1647 	 */
1648 	case MRT6_ASSERT:
1649 	{
1650 		int v;
1651 		if (get_user(v, (int __user *)optval))
1652 			return -EFAULT;
1653 		mrt->mroute_do_assert = !!v;
1654 		return 0;
1655 	}
1656 
1657 #ifdef CONFIG_IPV6_PIMSM_V2
1658 	case MRT6_PIM:
1659 	{
1660 		int v;
1661 		if (get_user(v, (int __user *)optval))
1662 			return -EFAULT;
1663 		v = !!v;
1664 		rtnl_lock();
1665 		ret = 0;
1666 		if (v != mrt->mroute_do_pim) {
1667 			mrt->mroute_do_pim = v;
1668 			mrt->mroute_do_assert = v;
1669 		}
1670 		rtnl_unlock();
1671 		return ret;
1672 	}
1673 
1674 #endif
1675 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1676 	case MRT6_TABLE:
1677 	{
1678 		u32 v;
1679 
1680 		if (optlen != sizeof(u32))
1681 			return -EINVAL;
1682 		if (get_user(v, (u32 __user *)optval))
1683 			return -EFAULT;
1684 		if (sk == mrt->mroute6_sk)
1685 			return -EBUSY;
1686 
1687 		rtnl_lock();
1688 		ret = 0;
1689 		if (!ip6mr_new_table(net, v))
1690 			ret = -ENOMEM;
1691 		raw6_sk(sk)->ip6mr_table = v;
1692 		rtnl_unlock();
1693 		return ret;
1694 	}
1695 #endif
1696 	/*
1697 	 *	Spurious command, or MRT6_VERSION which you cannot
1698 	 *	set.
1699 	 */
1700 	default:
1701 		return -ENOPROTOOPT;
1702 	}
1703 }
1704 
1705 /*
1706  *	Getsock opt support for the multicast routing system.
1707  */
1708 
ip6_mroute_getsockopt(struct sock * sk,int optname,char __user * optval,int __user * optlen)1709 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1710 			  int __user *optlen)
1711 {
1712 	int olr;
1713 	int val;
1714 	struct net *net = sock_net(sk);
1715 	struct mr6_table *mrt;
1716 
1717 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1718 	if (mrt == NULL)
1719 		return -ENOENT;
1720 
1721 	switch (optname) {
1722 	case MRT6_VERSION:
1723 		val = 0x0305;
1724 		break;
1725 #ifdef CONFIG_IPV6_PIMSM_V2
1726 	case MRT6_PIM:
1727 		val = mrt->mroute_do_pim;
1728 		break;
1729 #endif
1730 	case MRT6_ASSERT:
1731 		val = mrt->mroute_do_assert;
1732 		break;
1733 	default:
1734 		return -ENOPROTOOPT;
1735 	}
1736 
1737 	if (get_user(olr, optlen))
1738 		return -EFAULT;
1739 
1740 	olr = min_t(int, olr, sizeof(int));
1741 	if (olr < 0)
1742 		return -EINVAL;
1743 
1744 	if (put_user(olr, optlen))
1745 		return -EFAULT;
1746 	if (copy_to_user(optval, &val, olr))
1747 		return -EFAULT;
1748 	return 0;
1749 }
1750 
1751 /*
1752  *	The IP multicast ioctl support routines.
1753  */
1754 
ip6mr_ioctl(struct sock * sk,int cmd,void __user * arg)1755 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1756 {
1757 	struct sioc_sg_req6 sr;
1758 	struct sioc_mif_req6 vr;
1759 	struct mif_device *vif;
1760 	struct mfc6_cache *c;
1761 	struct net *net = sock_net(sk);
1762 	struct mr6_table *mrt;
1763 
1764 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1765 	if (mrt == NULL)
1766 		return -ENOENT;
1767 
1768 	switch (cmd) {
1769 	case SIOCGETMIFCNT_IN6:
1770 		if (copy_from_user(&vr, arg, sizeof(vr)))
1771 			return -EFAULT;
1772 		if (vr.mifi >= mrt->maxvif)
1773 			return -EINVAL;
1774 		read_lock(&mrt_lock);
1775 		vif = &mrt->vif6_table[vr.mifi];
1776 		if (MIF_EXISTS(mrt, vr.mifi)) {
1777 			vr.icount = vif->pkt_in;
1778 			vr.ocount = vif->pkt_out;
1779 			vr.ibytes = vif->bytes_in;
1780 			vr.obytes = vif->bytes_out;
1781 			read_unlock(&mrt_lock);
1782 
1783 			if (copy_to_user(arg, &vr, sizeof(vr)))
1784 				return -EFAULT;
1785 			return 0;
1786 		}
1787 		read_unlock(&mrt_lock);
1788 		return -EADDRNOTAVAIL;
1789 	case SIOCGETSGCNT_IN6:
1790 		if (copy_from_user(&sr, arg, sizeof(sr)))
1791 			return -EFAULT;
1792 
1793 		read_lock(&mrt_lock);
1794 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1795 		if (c) {
1796 			sr.pktcnt = c->mfc_un.res.pkt;
1797 			sr.bytecnt = c->mfc_un.res.bytes;
1798 			sr.wrong_if = c->mfc_un.res.wrong_if;
1799 			read_unlock(&mrt_lock);
1800 
1801 			if (copy_to_user(arg, &sr, sizeof(sr)))
1802 				return -EFAULT;
1803 			return 0;
1804 		}
1805 		read_unlock(&mrt_lock);
1806 		return -EADDRNOTAVAIL;
1807 	default:
1808 		return -ENOIOCTLCMD;
1809 	}
1810 }
1811 
1812 #ifdef CONFIG_COMPAT
1813 struct compat_sioc_sg_req6 {
1814 	struct sockaddr_in6 src;
1815 	struct sockaddr_in6 grp;
1816 	compat_ulong_t pktcnt;
1817 	compat_ulong_t bytecnt;
1818 	compat_ulong_t wrong_if;
1819 };
1820 
1821 struct compat_sioc_mif_req6 {
1822 	mifi_t	mifi;
1823 	compat_ulong_t icount;
1824 	compat_ulong_t ocount;
1825 	compat_ulong_t ibytes;
1826 	compat_ulong_t obytes;
1827 };
1828 
ip6mr_compat_ioctl(struct sock * sk,unsigned int cmd,void __user * arg)1829 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1830 {
1831 	struct compat_sioc_sg_req6 sr;
1832 	struct compat_sioc_mif_req6 vr;
1833 	struct mif_device *vif;
1834 	struct mfc6_cache *c;
1835 	struct net *net = sock_net(sk);
1836 	struct mr6_table *mrt;
1837 
1838 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1839 	if (mrt == NULL)
1840 		return -ENOENT;
1841 
1842 	switch (cmd) {
1843 	case SIOCGETMIFCNT_IN6:
1844 		if (copy_from_user(&vr, arg, sizeof(vr)))
1845 			return -EFAULT;
1846 		if (vr.mifi >= mrt->maxvif)
1847 			return -EINVAL;
1848 		read_lock(&mrt_lock);
1849 		vif = &mrt->vif6_table[vr.mifi];
1850 		if (MIF_EXISTS(mrt, vr.mifi)) {
1851 			vr.icount = vif->pkt_in;
1852 			vr.ocount = vif->pkt_out;
1853 			vr.ibytes = vif->bytes_in;
1854 			vr.obytes = vif->bytes_out;
1855 			read_unlock(&mrt_lock);
1856 
1857 			if (copy_to_user(arg, &vr, sizeof(vr)))
1858 				return -EFAULT;
1859 			return 0;
1860 		}
1861 		read_unlock(&mrt_lock);
1862 		return -EADDRNOTAVAIL;
1863 	case SIOCGETSGCNT_IN6:
1864 		if (copy_from_user(&sr, arg, sizeof(sr)))
1865 			return -EFAULT;
1866 
1867 		read_lock(&mrt_lock);
1868 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1869 		if (c) {
1870 			sr.pktcnt = c->mfc_un.res.pkt;
1871 			sr.bytecnt = c->mfc_un.res.bytes;
1872 			sr.wrong_if = c->mfc_un.res.wrong_if;
1873 			read_unlock(&mrt_lock);
1874 
1875 			if (copy_to_user(arg, &sr, sizeof(sr)))
1876 				return -EFAULT;
1877 			return 0;
1878 		}
1879 		read_unlock(&mrt_lock);
1880 		return -EADDRNOTAVAIL;
1881 	default:
1882 		return -ENOIOCTLCMD;
1883 	}
1884 }
1885 #endif
1886 
ip6mr_forward2_finish(struct sk_buff * skb)1887 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1888 {
1889 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1890 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
1891 	return dst_output(skb);
1892 }
1893 
1894 /*
1895  *	Processing handlers for ip6mr_forward
1896  */
1897 
ip6mr_forward2(struct net * net,struct mr6_table * mrt,struct sk_buff * skb,struct mfc6_cache * c,int vifi)1898 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1899 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1900 {
1901 	struct ipv6hdr *ipv6h;
1902 	struct mif_device *vif = &mrt->vif6_table[vifi];
1903 	struct net_device *dev;
1904 	struct dst_entry *dst;
1905 	struct flowi6 fl6;
1906 
1907 	if (vif->dev == NULL)
1908 		goto out_free;
1909 
1910 #ifdef CONFIG_IPV6_PIMSM_V2
1911 	if (vif->flags & MIFF_REGISTER) {
1912 		vif->pkt_out++;
1913 		vif->bytes_out += skb->len;
1914 		vif->dev->stats.tx_bytes += skb->len;
1915 		vif->dev->stats.tx_packets++;
1916 		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1917 		goto out_free;
1918 	}
1919 #endif
1920 
1921 	ipv6h = ipv6_hdr(skb);
1922 
1923 	fl6 = (struct flowi6) {
1924 		.flowi6_oif = vif->link,
1925 		.daddr = ipv6h->daddr,
1926 	};
1927 
1928 	dst = ip6_route_output(net, NULL, &fl6);
1929 	if (dst->error) {
1930 		dst_release(dst);
1931 		goto out_free;
1932 	}
1933 
1934 	skb_dst_drop(skb);
1935 	skb_dst_set(skb, dst);
1936 
1937 	/*
1938 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1939 	 * not only before forwarding, but after forwarding on all output
1940 	 * interfaces. It is clear, if mrouter runs a multicasting
1941 	 * program, it should receive packets not depending to what interface
1942 	 * program is joined.
1943 	 * If we will not make it, the program will have to join on all
1944 	 * interfaces. On the other hand, multihoming host (or router, but
1945 	 * not mrouter) cannot join to more than one interface - it will
1946 	 * result in receiving multiple packets.
1947 	 */
1948 	dev = vif->dev;
1949 	skb->dev = dev;
1950 	vif->pkt_out++;
1951 	vif->bytes_out += skb->len;
1952 
1953 	/* We are about to write */
1954 	/* XXX: extension headers? */
1955 	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1956 		goto out_free;
1957 
1958 	ipv6h = ipv6_hdr(skb);
1959 	ipv6h->hop_limit--;
1960 
1961 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1962 
1963 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1964 		       ip6mr_forward2_finish);
1965 
1966 out_free:
1967 	kfree_skb(skb);
1968 	return 0;
1969 }
1970 
ip6mr_find_vif(struct mr6_table * mrt,struct net_device * dev)1971 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1972 {
1973 	int ct;
1974 
1975 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1976 		if (mrt->vif6_table[ct].dev == dev)
1977 			break;
1978 	}
1979 	return ct;
1980 }
1981 
ip6_mr_forward(struct net * net,struct mr6_table * mrt,struct sk_buff * skb,struct mfc6_cache * cache)1982 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1983 			  struct sk_buff *skb, struct mfc6_cache *cache)
1984 {
1985 	int psend = -1;
1986 	int vif, ct;
1987 
1988 	vif = cache->mf6c_parent;
1989 	cache->mfc_un.res.pkt++;
1990 	cache->mfc_un.res.bytes += skb->len;
1991 
1992 	/*
1993 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1994 	 */
1995 	if (mrt->vif6_table[vif].dev != skb->dev) {
1996 		int true_vifi;
1997 
1998 		cache->mfc_un.res.wrong_if++;
1999 		true_vifi = ip6mr_find_vif(mrt, skb->dev);
2000 
2001 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2002 		    /* pimsm uses asserts, when switching from RPT to SPT,
2003 		       so that we cannot check that packet arrived on an oif.
2004 		       It is bad, but otherwise we would need to move pretty
2005 		       large chunk of pimd to kernel. Ough... --ANK
2006 		     */
2007 		    (mrt->mroute_do_pim ||
2008 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2009 		    time_after(jiffies,
2010 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2011 			cache->mfc_un.res.last_assert = jiffies;
2012 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2013 		}
2014 		goto dont_forward;
2015 	}
2016 
2017 	mrt->vif6_table[vif].pkt_in++;
2018 	mrt->vif6_table[vif].bytes_in += skb->len;
2019 
2020 	/*
2021 	 *	Forward the frame
2022 	 */
2023 	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2024 		if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2025 			if (psend != -1) {
2026 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2027 				if (skb2)
2028 					ip6mr_forward2(net, mrt, skb2, cache, psend);
2029 			}
2030 			psend = ct;
2031 		}
2032 	}
2033 	if (psend != -1) {
2034 		ip6mr_forward2(net, mrt, skb, cache, psend);
2035 		return 0;
2036 	}
2037 
2038 dont_forward:
2039 	kfree_skb(skb);
2040 	return 0;
2041 }
2042 
2043 
2044 /*
2045  *	Multicast packets for forwarding arrive here
2046  */
2047 
ip6_mr_input(struct sk_buff * skb)2048 int ip6_mr_input(struct sk_buff *skb)
2049 {
2050 	struct mfc6_cache *cache;
2051 	struct net *net = dev_net(skb->dev);
2052 	struct mr6_table *mrt;
2053 	struct flowi6 fl6 = {
2054 		.flowi6_iif	= skb->dev->ifindex,
2055 		.flowi6_mark	= skb->mark,
2056 	};
2057 	int err;
2058 
2059 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2060 	if (err < 0) {
2061 		kfree_skb(skb);
2062 		return err;
2063 	}
2064 
2065 	read_lock(&mrt_lock);
2066 	cache = ip6mr_cache_find(mrt,
2067 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2068 
2069 	/*
2070 	 *	No usable cache entry
2071 	 */
2072 	if (cache == NULL) {
2073 		int vif;
2074 
2075 		vif = ip6mr_find_vif(mrt, skb->dev);
2076 		if (vif >= 0) {
2077 			int err = ip6mr_cache_unresolved(mrt, vif, skb);
2078 			read_unlock(&mrt_lock);
2079 
2080 			return err;
2081 		}
2082 		read_unlock(&mrt_lock);
2083 		kfree_skb(skb);
2084 		return -ENODEV;
2085 	}
2086 
2087 	ip6_mr_forward(net, mrt, skb, cache);
2088 
2089 	read_unlock(&mrt_lock);
2090 
2091 	return 0;
2092 }
2093 
2094 
__ip6mr_fill_mroute(struct mr6_table * mrt,struct sk_buff * skb,struct mfc6_cache * c,struct rtmsg * rtm)2095 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2096 			       struct mfc6_cache *c, struct rtmsg *rtm)
2097 {
2098 	int ct;
2099 	struct rtnexthop *nhp;
2100 	u8 *b = skb_tail_pointer(skb);
2101 	struct rtattr *mp_head;
2102 
2103 	/* If cache is unresolved, don't try to parse IIF and OIF */
2104 	if (c->mf6c_parent >= MAXMIFS)
2105 		return -ENOENT;
2106 
2107 	if (MIF_EXISTS(mrt, c->mf6c_parent))
2108 		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2109 
2110 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2111 
2112 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2113 		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2114 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2115 				goto rtattr_failure;
2116 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2117 			nhp->rtnh_flags = 0;
2118 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2119 			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2120 			nhp->rtnh_len = sizeof(*nhp);
2121 		}
2122 	}
2123 	mp_head->rta_type = RTA_MULTIPATH;
2124 	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2125 	rtm->rtm_type = RTN_MULTICAST;
2126 	return 1;
2127 
2128 rtattr_failure:
2129 	nlmsg_trim(skb, b);
2130 	return -EMSGSIZE;
2131 }
2132 
ip6mr_get_route(struct net * net,struct sk_buff * skb,struct rtmsg * rtm,int nowait)2133 int ip6mr_get_route(struct net *net,
2134 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2135 {
2136 	int err;
2137 	struct mr6_table *mrt;
2138 	struct mfc6_cache *cache;
2139 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2140 
2141 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2142 	if (mrt == NULL)
2143 		return -ENOENT;
2144 
2145 	read_lock(&mrt_lock);
2146 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2147 
2148 	if (!cache) {
2149 		struct sk_buff *skb2;
2150 		struct ipv6hdr *iph;
2151 		struct net_device *dev;
2152 		int vif;
2153 
2154 		if (nowait) {
2155 			read_unlock(&mrt_lock);
2156 			return -EAGAIN;
2157 		}
2158 
2159 		dev = skb->dev;
2160 		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2161 			read_unlock(&mrt_lock);
2162 			return -ENODEV;
2163 		}
2164 
2165 		/* really correct? */
2166 		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2167 		if (!skb2) {
2168 			read_unlock(&mrt_lock);
2169 			return -ENOMEM;
2170 		}
2171 
2172 		skb_reset_transport_header(skb2);
2173 
2174 		skb_put(skb2, sizeof(struct ipv6hdr));
2175 		skb_reset_network_header(skb2);
2176 
2177 		iph = ipv6_hdr(skb2);
2178 		iph->version = 0;
2179 		iph->priority = 0;
2180 		iph->flow_lbl[0] = 0;
2181 		iph->flow_lbl[1] = 0;
2182 		iph->flow_lbl[2] = 0;
2183 		iph->payload_len = 0;
2184 		iph->nexthdr = IPPROTO_NONE;
2185 		iph->hop_limit = 0;
2186 		iph->saddr = rt->rt6i_src.addr;
2187 		iph->daddr = rt->rt6i_dst.addr;
2188 
2189 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
2190 		read_unlock(&mrt_lock);
2191 
2192 		return err;
2193 	}
2194 
2195 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2196 		cache->mfc_flags |= MFC_NOTIFY;
2197 
2198 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2199 	read_unlock(&mrt_lock);
2200 	return err;
2201 }
2202 
ip6mr_fill_mroute(struct mr6_table * mrt,struct sk_buff * skb,u32 pid,u32 seq,struct mfc6_cache * c)2203 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2204 			     u32 pid, u32 seq, struct mfc6_cache *c)
2205 {
2206 	struct nlmsghdr *nlh;
2207 	struct rtmsg *rtm;
2208 
2209 	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2210 	if (nlh == NULL)
2211 		return -EMSGSIZE;
2212 
2213 	rtm = nlmsg_data(nlh);
2214 	rtm->rtm_family   = RTNL_FAMILY_IPMR;
2215 	rtm->rtm_dst_len  = 128;
2216 	rtm->rtm_src_len  = 128;
2217 	rtm->rtm_tos      = 0;
2218 	rtm->rtm_table    = mrt->id;
2219 	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2220 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2221 	rtm->rtm_protocol = RTPROT_UNSPEC;
2222 	rtm->rtm_flags    = 0;
2223 
2224 	NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2225 	NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2226 
2227 	if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2228 		goto nla_put_failure;
2229 
2230 	return nlmsg_end(skb, nlh);
2231 
2232 nla_put_failure:
2233 	nlmsg_cancel(skb, nlh);
2234 	return -EMSGSIZE;
2235 }
2236 
ip6mr_rtm_dumproute(struct sk_buff * skb,struct netlink_callback * cb)2237 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2238 {
2239 	struct net *net = sock_net(skb->sk);
2240 	struct mr6_table *mrt;
2241 	struct mfc6_cache *mfc;
2242 	unsigned int t = 0, s_t;
2243 	unsigned int h = 0, s_h;
2244 	unsigned int e = 0, s_e;
2245 
2246 	s_t = cb->args[0];
2247 	s_h = cb->args[1];
2248 	s_e = cb->args[2];
2249 
2250 	read_lock(&mrt_lock);
2251 	ip6mr_for_each_table(mrt, net) {
2252 		if (t < s_t)
2253 			goto next_table;
2254 		if (t > s_t)
2255 			s_h = 0;
2256 		for (h = s_h; h < MFC6_LINES; h++) {
2257 			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2258 				if (e < s_e)
2259 					goto next_entry;
2260 				if (ip6mr_fill_mroute(mrt, skb,
2261 						      NETLINK_CB(cb->skb).pid,
2262 						      cb->nlh->nlmsg_seq,
2263 						      mfc) < 0)
2264 					goto done;
2265 next_entry:
2266 				e++;
2267 			}
2268 			e = s_e = 0;
2269 		}
2270 		s_h = 0;
2271 next_table:
2272 		t++;
2273 	}
2274 done:
2275 	read_unlock(&mrt_lock);
2276 
2277 	cb->args[2] = e;
2278 	cb->args[1] = h;
2279 	cb->args[0] = t;
2280 
2281 	return skb->len;
2282 }
2283