1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
3 
4 #ifndef _IP_SET_HASH_GEN_H
5 #define _IP_SET_HASH_GEN_H
6 
7 #include <linux/rcupdate.h>
8 #include <linux/jhash.h>
9 #include <linux/types.h>
10 #include <linux/netfilter/nfnetlink.h>
11 #include <linux/netfilter/ipset/ip_set.h>
12 
13 #define __ipset_dereference(p)		\
14 	rcu_dereference_protected(p, 1)
15 #define ipset_dereference_nfnl(p)	\
16 	rcu_dereference_protected(p,	\
17 		lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
18 #define ipset_dereference_set(p, set) 	\
19 	rcu_dereference_protected(p,	\
20 		lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
21 		lockdep_is_held(&(set)->lock))
22 #define ipset_dereference_bh_nfnl(p)	\
23 	rcu_dereference_bh_check(p, 	\
24 		lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
25 
26 /* Hashing which uses arrays to resolve clashing. The hash table is resized
27  * (doubled) when searching becomes too long.
28  * Internally jhash is used with the assumption that the size of the
29  * stored data is a multiple of sizeof(u32).
30  *
31  * Readers and resizing
32  *
33  * Resizing can be triggered by userspace command only, and those
34  * are serialized by the nfnl mutex. During resizing the set is
35  * read-locked, so the only possible concurrent operations are
36  * the kernel side readers. Those must be protected by proper RCU locking.
37  */
38 
39 /* Number of elements to store in an initial array block */
40 #define AHASH_INIT_SIZE			4
41 /* Max number of elements to store in an array block */
42 #define AHASH_MAX_SIZE			(3 * AHASH_INIT_SIZE)
43 /* Max muber of elements in the array block when tuned */
44 #define AHASH_MAX_TUNED			64
45 
46 /* Max number of elements can be tuned */
47 #ifdef IP_SET_HASH_WITH_MULTI
48 #define AHASH_MAX(h)			((h)->ahash_max)
49 
50 static u8
tune_ahash_max(u8 curr,u32 multi)51 tune_ahash_max(u8 curr, u32 multi)
52 {
53 	u32 n;
54 
55 	if (multi < curr)
56 		return curr;
57 
58 	n = curr + AHASH_INIT_SIZE;
59 	/* Currently, at listing one hash bucket must fit into a message.
60 	 * Therefore we have a hard limit here.
61 	 */
62 	return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
63 }
64 
65 #define TUNE_AHASH_MAX(h, multi)	\
66 	((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
67 #else
68 #define AHASH_MAX(h)			AHASH_MAX_SIZE
69 #define TUNE_AHASH_MAX(h, multi)
70 #endif
71 
72 /* A hash bucket */
73 struct hbucket {
74 	struct rcu_head rcu;	/* for call_rcu */
75 	/* Which positions are used in the array */
76 	DECLARE_BITMAP(used, AHASH_MAX_TUNED);
77 	u8 size;		/* size of the array */
78 	u8 pos;			/* position of the first free entry */
79 	unsigned char value[]	/* the array of the values */
80 		__aligned(__alignof__(u64));
81 };
82 
83 /* Region size for locking == 2^HTABLE_REGION_BITS */
84 #define HTABLE_REGION_BITS	10
85 #define ahash_numof_locks(htable_bits)		\
86 	((htable_bits) < HTABLE_REGION_BITS ? 1	\
87 		: jhash_size((htable_bits) - HTABLE_REGION_BITS))
88 #define ahash_sizeof_regions(htable_bits)		\
89 	(ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region))
90 #define ahash_region(n, htable_bits)		\
91 	((n) % ahash_numof_locks(htable_bits))
92 #define ahash_bucket_start(h,  htable_bits)	\
93 	((htable_bits) < HTABLE_REGION_BITS ? 0	\
94 		: (h) * jhash_size(HTABLE_REGION_BITS))
95 #define ahash_bucket_end(h,  htable_bits)	\
96 	((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits)	\
97 		: ((h) + 1) * jhash_size(HTABLE_REGION_BITS))
98 
99 struct htable_gc {
100 	struct delayed_work dwork;
101 	struct ip_set *set;	/* Set the gc belongs to */
102 	u32 region;		/* Last gc run position */
103 };
104 
105 /* The hash table: the table size stored here in order to make resizing easy */
106 struct htable {
107 	atomic_t ref;		/* References for resizing */
108 	atomic_t uref;		/* References for dumping and gc */
109 	u8 htable_bits;		/* size of hash table == 2^htable_bits */
110 	u32 maxelem;		/* Maxelem per region */
111 	struct ip_set_region *hregion;	/* Region locks and ext sizes */
112 	struct hbucket __rcu *bucket[]; /* hashtable buckets */
113 };
114 
115 #define hbucket(h, i)		((h)->bucket[i])
116 #define ext_size(n, dsize)	\
117 	(sizeof(struct hbucket) + (n) * (dsize))
118 
119 #ifndef IPSET_NET_COUNT
120 #define IPSET_NET_COUNT		1
121 #endif
122 
123 /* Book-keeping of the prefixes added to the set */
124 struct net_prefixes {
125 	u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */
126 	u8 cidr[IPSET_NET_COUNT];  /* the cidr value */
127 };
128 
129 /* Compute the hash table size */
130 static size_t
htable_size(u8 hbits)131 htable_size(u8 hbits)
132 {
133 	size_t hsize;
134 
135 	/* We must fit both into u32 in jhash and size_t */
136 	if (hbits > 31)
137 		return 0;
138 	hsize = jhash_size(hbits);
139 	if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
140 	    < hsize)
141 		return 0;
142 
143 	return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
144 }
145 
146 /* Compute htable_bits from the user input parameter hashsize */
147 static u8
htable_bits(u32 hashsize)148 htable_bits(u32 hashsize)
149 {
150 	/* Assume that hashsize == 2^htable_bits */
151 	u8 bits = fls(hashsize - 1);
152 
153 	if (jhash_size(bits) != hashsize)
154 		/* Round up to the first 2^n value */
155 		bits = fls(hashsize);
156 
157 	return bits;
158 }
159 
160 #ifdef IP_SET_HASH_WITH_NETS
161 #if IPSET_NET_COUNT > 1
162 #define __CIDR(cidr, i)		(cidr[i])
163 #else
164 #define __CIDR(cidr, i)		(cidr)
165 #endif
166 
167 /* cidr + 1 is stored in net_prefixes to support /0 */
168 #define NCIDR_PUT(cidr)		((cidr) + 1)
169 #define NCIDR_GET(cidr)		((cidr) - 1)
170 
171 #ifdef IP_SET_HASH_WITH_NETS_PACKED
172 /* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */
173 #define DCIDR_PUT(cidr)		((cidr) - 1)
174 #define DCIDR_GET(cidr, i)	(__CIDR(cidr, i) + 1)
175 #else
176 #define DCIDR_PUT(cidr)		(cidr)
177 #define DCIDR_GET(cidr, i)	__CIDR(cidr, i)
178 #endif
179 
180 #define INIT_CIDR(cidr, host_mask)	\
181 	DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
182 
183 #ifdef IP_SET_HASH_WITH_NET0
184 /* cidr from 0 to HOST_MASK value and c = cidr + 1 */
185 #define NLEN			(HOST_MASK + 1)
186 #define CIDR_POS(c)		((c) - 1)
187 #else
188 /* cidr from 1 to HOST_MASK value and c = cidr + 1 */
189 #define NLEN			HOST_MASK
190 #define CIDR_POS(c)		((c) - 2)
191 #endif
192 
193 #else
194 #define NLEN			0
195 #endif /* IP_SET_HASH_WITH_NETS */
196 
197 #define SET_ELEM_EXPIRED(set, d)	\
198 	(SET_WITH_TIMEOUT(set) &&	\
199 	 ip_set_timeout_expired(ext_timeout(d, set)))
200 
201 #endif /* _IP_SET_HASH_GEN_H */
202 
203 #ifndef MTYPE
204 #error "MTYPE is not defined!"
205 #endif
206 
207 #ifndef HTYPE
208 #error "HTYPE is not defined!"
209 #endif
210 
211 #ifndef HOST_MASK
212 #error "HOST_MASK is not defined!"
213 #endif
214 
215 /* Family dependent templates */
216 
217 #undef ahash_data
218 #undef mtype_data_equal
219 #undef mtype_do_data_match
220 #undef mtype_data_set_flags
221 #undef mtype_data_reset_elem
222 #undef mtype_data_reset_flags
223 #undef mtype_data_netmask
224 #undef mtype_data_list
225 #undef mtype_data_next
226 #undef mtype_elem
227 
228 #undef mtype_ahash_destroy
229 #undef mtype_ext_cleanup
230 #undef mtype_add_cidr
231 #undef mtype_del_cidr
232 #undef mtype_ahash_memsize
233 #undef mtype_flush
234 #undef mtype_destroy
235 #undef mtype_same_set
236 #undef mtype_kadt
237 #undef mtype_uadt
238 
239 #undef mtype_add
240 #undef mtype_del
241 #undef mtype_test_cidrs
242 #undef mtype_test
243 #undef mtype_uref
244 #undef mtype_resize
245 #undef mtype_ext_size
246 #undef mtype_resize_ad
247 #undef mtype_head
248 #undef mtype_list
249 #undef mtype_gc_do
250 #undef mtype_gc
251 #undef mtype_gc_init
252 #undef mtype_variant
253 #undef mtype_data_match
254 
255 #undef htype
256 #undef HKEY
257 
258 #define mtype_data_equal	IPSET_TOKEN(MTYPE, _data_equal)
259 #ifdef IP_SET_HASH_WITH_NETS
260 #define mtype_do_data_match	IPSET_TOKEN(MTYPE, _do_data_match)
261 #else
262 #define mtype_do_data_match(d)	1
263 #endif
264 #define mtype_data_set_flags	IPSET_TOKEN(MTYPE, _data_set_flags)
265 #define mtype_data_reset_elem	IPSET_TOKEN(MTYPE, _data_reset_elem)
266 #define mtype_data_reset_flags	IPSET_TOKEN(MTYPE, _data_reset_flags)
267 #define mtype_data_netmask	IPSET_TOKEN(MTYPE, _data_netmask)
268 #define mtype_data_list		IPSET_TOKEN(MTYPE, _data_list)
269 #define mtype_data_next		IPSET_TOKEN(MTYPE, _data_next)
270 #define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
271 
272 #define mtype_ahash_destroy	IPSET_TOKEN(MTYPE, _ahash_destroy)
273 #define mtype_ext_cleanup	IPSET_TOKEN(MTYPE, _ext_cleanup)
274 #define mtype_add_cidr		IPSET_TOKEN(MTYPE, _add_cidr)
275 #define mtype_del_cidr		IPSET_TOKEN(MTYPE, _del_cidr)
276 #define mtype_ahash_memsize	IPSET_TOKEN(MTYPE, _ahash_memsize)
277 #define mtype_flush		IPSET_TOKEN(MTYPE, _flush)
278 #define mtype_destroy		IPSET_TOKEN(MTYPE, _destroy)
279 #define mtype_same_set		IPSET_TOKEN(MTYPE, _same_set)
280 #define mtype_kadt		IPSET_TOKEN(MTYPE, _kadt)
281 #define mtype_uadt		IPSET_TOKEN(MTYPE, _uadt)
282 
283 #define mtype_add		IPSET_TOKEN(MTYPE, _add)
284 #define mtype_del		IPSET_TOKEN(MTYPE, _del)
285 #define mtype_test_cidrs	IPSET_TOKEN(MTYPE, _test_cidrs)
286 #define mtype_test		IPSET_TOKEN(MTYPE, _test)
287 #define mtype_uref		IPSET_TOKEN(MTYPE, _uref)
288 #define mtype_resize		IPSET_TOKEN(MTYPE, _resize)
289 #define mtype_ext_size		IPSET_TOKEN(MTYPE, _ext_size)
290 #define mtype_resize_ad		IPSET_TOKEN(MTYPE, _resize_ad)
291 #define mtype_head		IPSET_TOKEN(MTYPE, _head)
292 #define mtype_list		IPSET_TOKEN(MTYPE, _list)
293 #define mtype_gc_do		IPSET_TOKEN(MTYPE, _gc_do)
294 #define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
295 #define mtype_gc_init		IPSET_TOKEN(MTYPE, _gc_init)
296 #define mtype_variant		IPSET_TOKEN(MTYPE, _variant)
297 #define mtype_data_match	IPSET_TOKEN(MTYPE, _data_match)
298 
299 #ifndef HKEY_DATALEN
300 #define HKEY_DATALEN		sizeof(struct mtype_elem)
301 #endif
302 
303 #define htype			MTYPE
304 
305 #define HKEY(data, initval, htable_bits)			\
306 ({								\
307 	const u32 *__k = (const u32 *)data;			\
308 	u32 __l = HKEY_DATALEN / sizeof(u32);			\
309 								\
310 	BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0);		\
311 								\
312 	jhash2(__k, __l, initval) & jhash_mask(htable_bits);	\
313 })
314 
315 /* The generic hash structure */
316 struct htype {
317 	struct htable __rcu *table; /* the hash table */
318 	struct htable_gc gc;	/* gc workqueue */
319 	u32 maxelem;		/* max elements in the hash */
320 	u32 initval;		/* random jhash init value */
321 #ifdef IP_SET_HASH_WITH_MARKMASK
322 	u32 markmask;		/* markmask value for mark mask to store */
323 #endif
324 #ifdef IP_SET_HASH_WITH_MULTI
325 	u8 ahash_max;		/* max elements in an array block */
326 #endif
327 #ifdef IP_SET_HASH_WITH_NETMASK
328 	u8 netmask;		/* netmask value for subnets to store */
329 #endif
330 	struct list_head ad;	/* Resize add|del backlist */
331 	struct mtype_elem next; /* temporary storage for uadd */
332 #ifdef IP_SET_HASH_WITH_NETS
333 	struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */
334 #endif
335 };
336 
337 /* ADD|DEL entries saved during resize */
338 struct mtype_resize_ad {
339 	struct list_head list;
340 	enum ipset_adt ad;	/* ADD|DEL element */
341 	struct mtype_elem d;	/* Element value */
342 	struct ip_set_ext ext;	/* Extensions for ADD */
343 	struct ip_set_ext mext;	/* Target extensions for ADD */
344 	u32 flags;		/* Flags for ADD */
345 };
346 
347 #ifdef IP_SET_HASH_WITH_NETS
348 /* Network cidr size book keeping when the hash stores different
349  * sized networks. cidr == real cidr + 1 to support /0.
350  */
351 static void
mtype_add_cidr(struct ip_set * set,struct htype * h,u8 cidr,u8 n)352 mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n)
353 {
354 	int i, j;
355 
356 	spin_lock_bh(&set->lock);
357 	/* Add in increasing prefix order, so larger cidr first */
358 	for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) {
359 		if (j != -1) {
360 			continue;
361 		} else if (h->nets[i].cidr[n] < cidr) {
362 			j = i;
363 		} else if (h->nets[i].cidr[n] == cidr) {
364 			h->nets[CIDR_POS(cidr)].nets[n]++;
365 			goto unlock;
366 		}
367 	}
368 	if (j != -1) {
369 		for (; i > j; i--)
370 			h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
371 	}
372 	h->nets[i].cidr[n] = cidr;
373 	h->nets[CIDR_POS(cidr)].nets[n] = 1;
374 unlock:
375 	spin_unlock_bh(&set->lock);
376 }
377 
378 static void
mtype_del_cidr(struct ip_set * set,struct htype * h,u8 cidr,u8 n)379 mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n)
380 {
381 	u8 i, j, net_end = NLEN - 1;
382 
383 	spin_lock_bh(&set->lock);
384 	for (i = 0; i < NLEN; i++) {
385 		if (h->nets[i].cidr[n] != cidr)
386 			continue;
387 		h->nets[CIDR_POS(cidr)].nets[n]--;
388 		if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
389 			goto unlock;
390 		for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
391 			h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
392 		h->nets[j].cidr[n] = 0;
393 		goto unlock;
394 	}
395 unlock:
396 	spin_unlock_bh(&set->lock);
397 }
398 #endif
399 
400 /* Calculate the actual memory size of the set data */
401 static size_t
mtype_ahash_memsize(const struct htype * h,const struct htable * t)402 mtype_ahash_memsize(const struct htype *h, const struct htable *t)
403 {
404 	return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits);
405 }
406 
407 /* Get the ith element from the array block n */
408 #define ahash_data(n, i, dsize)	\
409 	((struct mtype_elem *)((n)->value + ((i) * (dsize))))
410 
411 static void
mtype_ext_cleanup(struct ip_set * set,struct hbucket * n)412 mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
413 {
414 	int i;
415 
416 	for (i = 0; i < n->pos; i++)
417 		if (test_bit(i, n->used))
418 			ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
419 }
420 
421 /* Flush a hash type of set: destroy all elements */
422 static void
mtype_flush(struct ip_set * set)423 mtype_flush(struct ip_set *set)
424 {
425 	struct htype *h = set->data;
426 	struct htable *t;
427 	struct hbucket *n;
428 	u32 r, i;
429 
430 	t = ipset_dereference_nfnl(h->table);
431 	for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
432 		spin_lock_bh(&t->hregion[r].lock);
433 		for (i = ahash_bucket_start(r, t->htable_bits);
434 		     i < ahash_bucket_end(r, t->htable_bits); i++) {
435 			n = __ipset_dereference(hbucket(t, i));
436 			if (!n)
437 				continue;
438 			if (set->extensions & IPSET_EXT_DESTROY)
439 				mtype_ext_cleanup(set, n);
440 			/* FIXME: use slab cache */
441 			rcu_assign_pointer(hbucket(t, i), NULL);
442 			kfree_rcu(n, rcu);
443 		}
444 		t->hregion[r].ext_size = 0;
445 		t->hregion[r].elements = 0;
446 		spin_unlock_bh(&t->hregion[r].lock);
447 	}
448 #ifdef IP_SET_HASH_WITH_NETS
449 	memset(h->nets, 0, sizeof(h->nets));
450 #endif
451 }
452 
453 /* Destroy the hashtable part of the set */
454 static void
mtype_ahash_destroy(struct ip_set * set,struct htable * t,bool ext_destroy)455 mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
456 {
457 	struct hbucket *n;
458 	u32 i;
459 
460 	for (i = 0; i < jhash_size(t->htable_bits); i++) {
461 		n = __ipset_dereference(hbucket(t, i));
462 		if (!n)
463 			continue;
464 		if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
465 			mtype_ext_cleanup(set, n);
466 		/* FIXME: use slab cache */
467 		kfree(n);
468 	}
469 
470 	ip_set_free(t->hregion);
471 	ip_set_free(t);
472 }
473 
474 /* Destroy a hash type of set */
475 static void
mtype_destroy(struct ip_set * set)476 mtype_destroy(struct ip_set *set)
477 {
478 	struct htype *h = set->data;
479 	struct list_head *l, *lt;
480 
481 	if (SET_WITH_TIMEOUT(set))
482 		cancel_delayed_work_sync(&h->gc.dwork);
483 
484 	mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
485 	list_for_each_safe(l, lt, &h->ad) {
486 		list_del(l);
487 		kfree(l);
488 	}
489 	kfree(h);
490 
491 	set->data = NULL;
492 }
493 
494 static bool
mtype_same_set(const struct ip_set * a,const struct ip_set * b)495 mtype_same_set(const struct ip_set *a, const struct ip_set *b)
496 {
497 	const struct htype *x = a->data;
498 	const struct htype *y = b->data;
499 
500 	/* Resizing changes htable_bits, so we ignore it */
501 	return x->maxelem == y->maxelem &&
502 	       a->timeout == b->timeout &&
503 #ifdef IP_SET_HASH_WITH_NETMASK
504 	       x->netmask == y->netmask &&
505 #endif
506 #ifdef IP_SET_HASH_WITH_MARKMASK
507 	       x->markmask == y->markmask &&
508 #endif
509 	       a->extensions == b->extensions;
510 }
511 
512 static void
mtype_gc_do(struct ip_set * set,struct htype * h,struct htable * t,u32 r)513 mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
514 {
515 	struct hbucket *n, *tmp;
516 	struct mtype_elem *data;
517 	u32 i, j, d;
518 	size_t dsize = set->dsize;
519 #ifdef IP_SET_HASH_WITH_NETS
520 	u8 k;
521 #endif
522 	u8 htable_bits = t->htable_bits;
523 
524 	spin_lock_bh(&t->hregion[r].lock);
525 	for (i = ahash_bucket_start(r, htable_bits);
526 	     i < ahash_bucket_end(r, htable_bits); i++) {
527 		n = __ipset_dereference(hbucket(t, i));
528 		if (!n)
529 			continue;
530 		for (j = 0, d = 0; j < n->pos; j++) {
531 			if (!test_bit(j, n->used)) {
532 				d++;
533 				continue;
534 			}
535 			data = ahash_data(n, j, dsize);
536 			if (!ip_set_timeout_expired(ext_timeout(data, set)))
537 				continue;
538 			pr_debug("expired %u/%u\n", i, j);
539 			clear_bit(j, n->used);
540 			smp_mb__after_atomic();
541 #ifdef IP_SET_HASH_WITH_NETS
542 			for (k = 0; k < IPSET_NET_COUNT; k++)
543 				mtype_del_cidr(set, h,
544 					NCIDR_PUT(DCIDR_GET(data->cidr, k)),
545 					k);
546 #endif
547 			t->hregion[r].elements--;
548 			ip_set_ext_destroy(set, data);
549 			d++;
550 		}
551 		if (d >= AHASH_INIT_SIZE) {
552 			if (d >= n->size) {
553 				t->hregion[r].ext_size -=
554 					ext_size(n->size, dsize);
555 				rcu_assign_pointer(hbucket(t, i), NULL);
556 				kfree_rcu(n, rcu);
557 				continue;
558 			}
559 			tmp = kzalloc(sizeof(*tmp) +
560 				(n->size - AHASH_INIT_SIZE) * dsize,
561 				GFP_ATOMIC);
562 			if (!tmp)
563 				/* Still try to delete expired elements. */
564 				continue;
565 			tmp->size = n->size - AHASH_INIT_SIZE;
566 			for (j = 0, d = 0; j < n->pos; j++) {
567 				if (!test_bit(j, n->used))
568 					continue;
569 				data = ahash_data(n, j, dsize);
570 				memcpy(tmp->value + d * dsize,
571 				       data, dsize);
572 				set_bit(d, tmp->used);
573 				d++;
574 			}
575 			tmp->pos = d;
576 			t->hregion[r].ext_size -=
577 				ext_size(AHASH_INIT_SIZE, dsize);
578 			rcu_assign_pointer(hbucket(t, i), tmp);
579 			kfree_rcu(n, rcu);
580 		}
581 	}
582 	spin_unlock_bh(&t->hregion[r].lock);
583 }
584 
585 static void
mtype_gc(struct work_struct * work)586 mtype_gc(struct work_struct *work)
587 {
588 	struct htable_gc *gc;
589 	struct ip_set *set;
590 	struct htype *h;
591 	struct htable *t;
592 	u32 r, numof_locks;
593 	unsigned int next_run;
594 
595 	gc = container_of(work, struct htable_gc, dwork.work);
596 	set = gc->set;
597 	h = set->data;
598 
599 	spin_lock_bh(&set->lock);
600 	t = ipset_dereference_set(h->table, set);
601 	atomic_inc(&t->uref);
602 	numof_locks = ahash_numof_locks(t->htable_bits);
603 	r = gc->region++;
604 	if (r >= numof_locks) {
605 		r = gc->region = 0;
606 	}
607 	next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks;
608 	if (next_run < HZ/10)
609 		next_run = HZ/10;
610 	spin_unlock_bh(&set->lock);
611 
612 	mtype_gc_do(set, h, t, r);
613 
614 	if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
615 		pr_debug("Table destroy after resize by expire: %p\n", t);
616 		mtype_ahash_destroy(set, t, false);
617 	}
618 
619 	queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run);
620 
621 }
622 
623 static void
mtype_gc_init(struct htable_gc * gc)624 mtype_gc_init(struct htable_gc *gc)
625 {
626 	INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc);
627 	queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
628 }
629 
630 static int
631 mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
632 	  struct ip_set_ext *mext, u32 flags);
633 static int
634 mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
635 	  struct ip_set_ext *mext, u32 flags);
636 
637 /* Resize a hash: create a new hash table with doubling the hashsize
638  * and inserting the elements to it. Repeat until we succeed or
639  * fail due to memory pressures.
640  */
641 static int
mtype_resize(struct ip_set * set,bool retried)642 mtype_resize(struct ip_set *set, bool retried)
643 {
644 	struct htype *h = set->data;
645 	struct htable *t, *orig;
646 	u8 htable_bits;
647 	size_t dsize = set->dsize;
648 #ifdef IP_SET_HASH_WITH_NETS
649 	u8 flags;
650 	struct mtype_elem *tmp;
651 #endif
652 	struct mtype_elem *data;
653 	struct mtype_elem *d;
654 	struct hbucket *n, *m;
655 	struct list_head *l, *lt;
656 	struct mtype_resize_ad *x;
657 	u32 i, j, r, nr, key;
658 	int ret;
659 
660 #ifdef IP_SET_HASH_WITH_NETS
661 	tmp = kmalloc(dsize, GFP_KERNEL);
662 	if (!tmp)
663 		return -ENOMEM;
664 #endif
665 	orig = ipset_dereference_bh_nfnl(h->table);
666 	htable_bits = orig->htable_bits;
667 
668 retry:
669 	ret = 0;
670 	htable_bits++;
671 	if (!htable_bits) {
672 		/* In case we have plenty of memory :-) */
673 		pr_warn("Cannot increase the hashsize of set %s further\n",
674 			set->name);
675 		ret = -IPSET_ERR_HASH_FULL;
676 		goto out;
677 	}
678 	t = ip_set_alloc(htable_size(htable_bits));
679 	if (!t) {
680 		ret = -ENOMEM;
681 		goto out;
682 	}
683 	t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits));
684 	if (!t->hregion) {
685 		ip_set_free(t);
686 		ret = -ENOMEM;
687 		goto out;
688 	}
689 	t->htable_bits = htable_bits;
690 	t->maxelem = h->maxelem / ahash_numof_locks(htable_bits);
691 	for (i = 0; i < ahash_numof_locks(htable_bits); i++)
692 		spin_lock_init(&t->hregion[i].lock);
693 
694 	/* There can't be another parallel resizing,
695 	 * but dumping, gc, kernel side add/del are possible
696 	 */
697 	orig = ipset_dereference_bh_nfnl(h->table);
698 	atomic_set(&orig->ref, 1);
699 	atomic_inc(&orig->uref);
700 	pr_debug("attempt to resize set %s from %u to %u, t %p\n",
701 		 set->name, orig->htable_bits, htable_bits, orig);
702 	for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) {
703 		/* Expire may replace a hbucket with another one */
704 		rcu_read_lock_bh();
705 		for (i = ahash_bucket_start(r, orig->htable_bits);
706 		     i < ahash_bucket_end(r, orig->htable_bits); i++) {
707 			n = __ipset_dereference(hbucket(orig, i));
708 			if (!n)
709 				continue;
710 			for (j = 0; j < n->pos; j++) {
711 				if (!test_bit(j, n->used))
712 					continue;
713 				data = ahash_data(n, j, dsize);
714 				if (SET_ELEM_EXPIRED(set, data))
715 					continue;
716 #ifdef IP_SET_HASH_WITH_NETS
717 				/* We have readers running parallel with us,
718 				 * so the live data cannot be modified.
719 				 */
720 				flags = 0;
721 				memcpy(tmp, data, dsize);
722 				data = tmp;
723 				mtype_data_reset_flags(data, &flags);
724 #endif
725 				key = HKEY(data, h->initval, htable_bits);
726 				m = __ipset_dereference(hbucket(t, key));
727 				nr = ahash_region(key, htable_bits);
728 				if (!m) {
729 					m = kzalloc(sizeof(*m) +
730 					    AHASH_INIT_SIZE * dsize,
731 					    GFP_ATOMIC);
732 					if (!m) {
733 						ret = -ENOMEM;
734 						goto cleanup;
735 					}
736 					m->size = AHASH_INIT_SIZE;
737 					t->hregion[nr].ext_size +=
738 						ext_size(AHASH_INIT_SIZE,
739 							 dsize);
740 					RCU_INIT_POINTER(hbucket(t, key), m);
741 				} else if (m->pos >= m->size) {
742 					struct hbucket *ht;
743 
744 					if (m->size >= AHASH_MAX(h)) {
745 						ret = -EAGAIN;
746 					} else {
747 						ht = kzalloc(sizeof(*ht) +
748 						(m->size + AHASH_INIT_SIZE)
749 						* dsize,
750 						GFP_ATOMIC);
751 						if (!ht)
752 							ret = -ENOMEM;
753 					}
754 					if (ret < 0)
755 						goto cleanup;
756 					memcpy(ht, m, sizeof(struct hbucket) +
757 					       m->size * dsize);
758 					ht->size = m->size + AHASH_INIT_SIZE;
759 					t->hregion[nr].ext_size +=
760 						ext_size(AHASH_INIT_SIZE,
761 							 dsize);
762 					kfree(m);
763 					m = ht;
764 					RCU_INIT_POINTER(hbucket(t, key), ht);
765 				}
766 				d = ahash_data(m, m->pos, dsize);
767 				memcpy(d, data, dsize);
768 				set_bit(m->pos++, m->used);
769 				t->hregion[nr].elements++;
770 #ifdef IP_SET_HASH_WITH_NETS
771 				mtype_data_reset_flags(d, &flags);
772 #endif
773 			}
774 		}
775 		rcu_read_unlock_bh();
776 	}
777 
778 	/* There can't be any other writer. */
779 	rcu_assign_pointer(h->table, t);
780 
781 	/* Give time to other readers of the set */
782 	synchronize_rcu();
783 
784 	pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
785 		 orig->htable_bits, orig, t->htable_bits, t);
786 	/* Add/delete elements processed by the SET target during resize.
787 	 * Kernel-side add cannot trigger a resize and userspace actions
788 	 * are serialized by the mutex.
789 	 */
790 	list_for_each_safe(l, lt, &h->ad) {
791 		x = list_entry(l, struct mtype_resize_ad, list);
792 		if (x->ad == IPSET_ADD) {
793 			mtype_add(set, &x->d, &x->ext, &x->mext, x->flags);
794 		} else {
795 			mtype_del(set, &x->d, NULL, NULL, 0);
796 		}
797 		list_del(l);
798 		kfree(l);
799 	}
800 	/* If there's nobody else using the table, destroy it */
801 	if (atomic_dec_and_test(&orig->uref)) {
802 		pr_debug("Table destroy by resize %p\n", orig);
803 		mtype_ahash_destroy(set, orig, false);
804 	}
805 
806 out:
807 #ifdef IP_SET_HASH_WITH_NETS
808 	kfree(tmp);
809 #endif
810 	return ret;
811 
812 cleanup:
813 	rcu_read_unlock_bh();
814 	atomic_set(&orig->ref, 0);
815 	atomic_dec(&orig->uref);
816 	mtype_ahash_destroy(set, t, false);
817 	if (ret == -EAGAIN)
818 		goto retry;
819 	goto out;
820 }
821 
822 /* Get the current number of elements and ext_size in the set  */
823 static void
mtype_ext_size(struct ip_set * set,u32 * elements,size_t * ext_size)824 mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size)
825 {
826 	struct htype *h = set->data;
827 	const struct htable *t;
828 	u32 i, j, r;
829 	struct hbucket *n;
830 	struct mtype_elem *data;
831 
832 	t = rcu_dereference_bh(h->table);
833 	for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
834 		for (i = ahash_bucket_start(r, t->htable_bits);
835 		     i < ahash_bucket_end(r, t->htable_bits); i++) {
836 			n = rcu_dereference_bh(hbucket(t, i));
837 			if (!n)
838 				continue;
839 			for (j = 0; j < n->pos; j++) {
840 				if (!test_bit(j, n->used))
841 					continue;
842 				data = ahash_data(n, j, set->dsize);
843 				if (!SET_ELEM_EXPIRED(set, data))
844 					(*elements)++;
845 			}
846 		}
847 		*ext_size += t->hregion[r].ext_size;
848 	}
849 }
850 
851 /* Add an element to a hash and update the internal counters when succeeded,
852  * otherwise report the proper error code.
853  */
854 static int
mtype_add(struct ip_set * set,void * value,const struct ip_set_ext * ext,struct ip_set_ext * mext,u32 flags)855 mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
856 	  struct ip_set_ext *mext, u32 flags)
857 {
858 	struct htype *h = set->data;
859 	struct htable *t;
860 	const struct mtype_elem *d = value;
861 	struct mtype_elem *data;
862 	struct hbucket *n, *old = ERR_PTR(-ENOENT);
863 	int i, j = -1, ret;
864 	bool flag_exist = flags & IPSET_FLAG_EXIST;
865 	bool deleted = false, forceadd = false, reuse = false;
866 	u32 r, key, multi = 0, elements, maxelem;
867 
868 	rcu_read_lock_bh();
869 	t = rcu_dereference_bh(h->table);
870 	key = HKEY(value, h->initval, t->htable_bits);
871 	r = ahash_region(key, t->htable_bits);
872 	atomic_inc(&t->uref);
873 	elements = t->hregion[r].elements;
874 	maxelem = t->maxelem;
875 	if (elements >= maxelem) {
876 		u32 e;
877 		if (SET_WITH_TIMEOUT(set)) {
878 			rcu_read_unlock_bh();
879 			mtype_gc_do(set, h, t, r);
880 			rcu_read_lock_bh();
881 		}
882 		maxelem = h->maxelem;
883 		elements = 0;
884 		for (e = 0; e < ahash_numof_locks(t->htable_bits); e++)
885 			elements += t->hregion[e].elements;
886 		if (elements >= maxelem && SET_WITH_FORCEADD(set))
887 			forceadd = true;
888 	}
889 	rcu_read_unlock_bh();
890 
891 	spin_lock_bh(&t->hregion[r].lock);
892 	n = rcu_dereference_bh(hbucket(t, key));
893 	if (!n) {
894 		if (forceadd || elements >= maxelem)
895 			goto set_full;
896 		old = NULL;
897 		n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
898 			    GFP_ATOMIC);
899 		if (!n) {
900 			ret = -ENOMEM;
901 			goto unlock;
902 		}
903 		n->size = AHASH_INIT_SIZE;
904 		t->hregion[r].ext_size +=
905 			ext_size(AHASH_INIT_SIZE, set->dsize);
906 		goto copy_elem;
907 	}
908 	for (i = 0; i < n->pos; i++) {
909 		if (!test_bit(i, n->used)) {
910 			/* Reuse first deleted entry */
911 			if (j == -1) {
912 				deleted = reuse = true;
913 				j = i;
914 			}
915 			continue;
916 		}
917 		data = ahash_data(n, i, set->dsize);
918 		if (mtype_data_equal(data, d, &multi)) {
919 			if (flag_exist || SET_ELEM_EXPIRED(set, data)) {
920 				/* Just the extensions could be overwritten */
921 				j = i;
922 				goto overwrite_extensions;
923 			}
924 			ret = -IPSET_ERR_EXIST;
925 			goto unlock;
926 		}
927 		/* Reuse first timed out entry */
928 		if (SET_ELEM_EXPIRED(set, data) && j == -1) {
929 			j = i;
930 			reuse = true;
931 		}
932 	}
933 	if (reuse || forceadd) {
934 		if (j == -1)
935 			j = 0;
936 		data = ahash_data(n, j, set->dsize);
937 		if (!deleted) {
938 #ifdef IP_SET_HASH_WITH_NETS
939 			for (i = 0; i < IPSET_NET_COUNT; i++)
940 				mtype_del_cidr(set, h,
941 					NCIDR_PUT(DCIDR_GET(data->cidr, i)),
942 					i);
943 #endif
944 			ip_set_ext_destroy(set, data);
945 			t->hregion[r].elements--;
946 		}
947 		goto copy_data;
948 	}
949 	if (elements >= maxelem)
950 		goto set_full;
951 	/* Create a new slot */
952 	if (n->pos >= n->size) {
953 		TUNE_AHASH_MAX(h, multi);
954 		if (n->size >= AHASH_MAX(h)) {
955 			/* Trigger rehashing */
956 			mtype_data_next(&h->next, d);
957 			ret = -EAGAIN;
958 			goto resize;
959 		}
960 		old = n;
961 		n = kzalloc(sizeof(*n) +
962 			    (old->size + AHASH_INIT_SIZE) * set->dsize,
963 			    GFP_ATOMIC);
964 		if (!n) {
965 			ret = -ENOMEM;
966 			goto unlock;
967 		}
968 		memcpy(n, old, sizeof(struct hbucket) +
969 		       old->size * set->dsize);
970 		n->size = old->size + AHASH_INIT_SIZE;
971 		t->hregion[r].ext_size +=
972 			ext_size(AHASH_INIT_SIZE, set->dsize);
973 	}
974 
975 copy_elem:
976 	j = n->pos++;
977 	data = ahash_data(n, j, set->dsize);
978 copy_data:
979 	t->hregion[r].elements++;
980 #ifdef IP_SET_HASH_WITH_NETS
981 	for (i = 0; i < IPSET_NET_COUNT; i++)
982 		mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
983 #endif
984 	memcpy(data, d, sizeof(struct mtype_elem));
985 overwrite_extensions:
986 #ifdef IP_SET_HASH_WITH_NETS
987 	mtype_data_set_flags(data, flags);
988 #endif
989 	if (SET_WITH_COUNTER(set))
990 		ip_set_init_counter(ext_counter(data, set), ext);
991 	if (SET_WITH_COMMENT(set))
992 		ip_set_init_comment(set, ext_comment(data, set), ext);
993 	if (SET_WITH_SKBINFO(set))
994 		ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
995 	/* Must come last for the case when timed out entry is reused */
996 	if (SET_WITH_TIMEOUT(set))
997 		ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
998 	smp_mb__before_atomic();
999 	set_bit(j, n->used);
1000 	if (old != ERR_PTR(-ENOENT)) {
1001 		rcu_assign_pointer(hbucket(t, key), n);
1002 		if (old)
1003 			kfree_rcu(old, rcu);
1004 	}
1005 	ret = 0;
1006 resize:
1007 	spin_unlock_bh(&t->hregion[r].lock);
1008 	if (atomic_read(&t->ref) && ext->target) {
1009 		/* Resize is in process and kernel side add, save values */
1010 		struct mtype_resize_ad *x;
1011 
1012 		x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC);
1013 		if (!x)
1014 			/* Don't bother */
1015 			goto out;
1016 		x->ad = IPSET_ADD;
1017 		memcpy(&x->d, value, sizeof(struct mtype_elem));
1018 		memcpy(&x->ext, ext, sizeof(struct ip_set_ext));
1019 		memcpy(&x->mext, mext, sizeof(struct ip_set_ext));
1020 		x->flags = flags;
1021 		spin_lock_bh(&set->lock);
1022 		list_add_tail(&x->list, &h->ad);
1023 		spin_unlock_bh(&set->lock);
1024 	}
1025 	goto out;
1026 
1027 set_full:
1028 	if (net_ratelimit())
1029 		pr_warn("Set %s is full, maxelem %u reached\n",
1030 			set->name, maxelem);
1031 	ret = -IPSET_ERR_HASH_FULL;
1032 unlock:
1033 	spin_unlock_bh(&t->hregion[r].lock);
1034 out:
1035 	if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
1036 		pr_debug("Table destroy after resize by add: %p\n", t);
1037 		mtype_ahash_destroy(set, t, false);
1038 	}
1039 	return ret;
1040 }
1041 
1042 /* Delete an element from the hash and free up space if possible.
1043  */
1044 static int
mtype_del(struct ip_set * set,void * value,const struct ip_set_ext * ext,struct ip_set_ext * mext,u32 flags)1045 mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
1046 	  struct ip_set_ext *mext, u32 flags)
1047 {
1048 	struct htype *h = set->data;
1049 	struct htable *t;
1050 	const struct mtype_elem *d = value;
1051 	struct mtype_elem *data;
1052 	struct hbucket *n;
1053 	struct mtype_resize_ad *x = NULL;
1054 	int i, j, k, r, ret = -IPSET_ERR_EXIST;
1055 	u32 key, multi = 0;
1056 	size_t dsize = set->dsize;
1057 
1058 	/* Userspace add and resize is excluded by the mutex.
1059 	 * Kernespace add does not trigger resize.
1060 	 */
1061 	rcu_read_lock_bh();
1062 	t = rcu_dereference_bh(h->table);
1063 	key = HKEY(value, h->initval, t->htable_bits);
1064 	r = ahash_region(key, t->htable_bits);
1065 	atomic_inc(&t->uref);
1066 	rcu_read_unlock_bh();
1067 
1068 	spin_lock_bh(&t->hregion[r].lock);
1069 	n = rcu_dereference_bh(hbucket(t, key));
1070 	if (!n)
1071 		goto out;
1072 	for (i = 0, k = 0; i < n->pos; i++) {
1073 		if (!test_bit(i, n->used)) {
1074 			k++;
1075 			continue;
1076 		}
1077 		data = ahash_data(n, i, dsize);
1078 		if (!mtype_data_equal(data, d, &multi))
1079 			continue;
1080 		if (SET_ELEM_EXPIRED(set, data))
1081 			goto out;
1082 
1083 		ret = 0;
1084 		clear_bit(i, n->used);
1085 		smp_mb__after_atomic();
1086 		if (i + 1 == n->pos)
1087 			n->pos--;
1088 		t->hregion[r].elements--;
1089 #ifdef IP_SET_HASH_WITH_NETS
1090 		for (j = 0; j < IPSET_NET_COUNT; j++)
1091 			mtype_del_cidr(set, h,
1092 				       NCIDR_PUT(DCIDR_GET(d->cidr, j)), j);
1093 #endif
1094 		ip_set_ext_destroy(set, data);
1095 
1096 		if (atomic_read(&t->ref) && ext->target) {
1097 			/* Resize is in process and kernel side del,
1098 			 * save values
1099 			 */
1100 			x = kzalloc(sizeof(struct mtype_resize_ad),
1101 				    GFP_ATOMIC);
1102 			if (x) {
1103 				x->ad = IPSET_DEL;
1104 				memcpy(&x->d, value,
1105 				       sizeof(struct mtype_elem));
1106 				x->flags = flags;
1107 			}
1108 		}
1109 		for (; i < n->pos; i++) {
1110 			if (!test_bit(i, n->used))
1111 				k++;
1112 		}
1113 		if (n->pos == 0 && k == 0) {
1114 			t->hregion[r].ext_size -= ext_size(n->size, dsize);
1115 			rcu_assign_pointer(hbucket(t, key), NULL);
1116 			kfree_rcu(n, rcu);
1117 		} else if (k >= AHASH_INIT_SIZE) {
1118 			struct hbucket *tmp = kzalloc(sizeof(*tmp) +
1119 					(n->size - AHASH_INIT_SIZE) * dsize,
1120 					GFP_ATOMIC);
1121 			if (!tmp)
1122 				goto out;
1123 			tmp->size = n->size - AHASH_INIT_SIZE;
1124 			for (j = 0, k = 0; j < n->pos; j++) {
1125 				if (!test_bit(j, n->used))
1126 					continue;
1127 				data = ahash_data(n, j, dsize);
1128 				memcpy(tmp->value + k * dsize, data, dsize);
1129 				set_bit(k, tmp->used);
1130 				k++;
1131 			}
1132 			tmp->pos = k;
1133 			t->hregion[r].ext_size -=
1134 				ext_size(AHASH_INIT_SIZE, dsize);
1135 			rcu_assign_pointer(hbucket(t, key), tmp);
1136 			kfree_rcu(n, rcu);
1137 		}
1138 		goto out;
1139 	}
1140 
1141 out:
1142 	spin_unlock_bh(&t->hregion[r].lock);
1143 	if (x) {
1144 		spin_lock_bh(&set->lock);
1145 		list_add(&x->list, &h->ad);
1146 		spin_unlock_bh(&set->lock);
1147 	}
1148 	if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
1149 		pr_debug("Table destroy after resize by del: %p\n", t);
1150 		mtype_ahash_destroy(set, t, false);
1151 	}
1152 	return ret;
1153 }
1154 
1155 static int
mtype_data_match(struct mtype_elem * data,const struct ip_set_ext * ext,struct ip_set_ext * mext,struct ip_set * set,u32 flags)1156 mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
1157 		 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
1158 {
1159 	if (!ip_set_match_extensions(set, ext, mext, flags, data))
1160 		return 0;
1161 	/* nomatch entries return -ENOTEMPTY */
1162 	return mtype_do_data_match(data);
1163 }
1164 
1165 #ifdef IP_SET_HASH_WITH_NETS
1166 /* Special test function which takes into account the different network
1167  * sizes added to the set
1168  */
1169 static int
mtype_test_cidrs(struct ip_set * set,struct mtype_elem * d,const struct ip_set_ext * ext,struct ip_set_ext * mext,u32 flags)1170 mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
1171 		 const struct ip_set_ext *ext,
1172 		 struct ip_set_ext *mext, u32 flags)
1173 {
1174 	struct htype *h = set->data;
1175 	struct htable *t = rcu_dereference_bh(h->table);
1176 	struct hbucket *n;
1177 	struct mtype_elem *data;
1178 #if IPSET_NET_COUNT == 2
1179 	struct mtype_elem orig = *d;
1180 	int ret, i, j = 0, k;
1181 #else
1182 	int ret, i, j = 0;
1183 #endif
1184 	u32 key, multi = 0;
1185 
1186 	pr_debug("test by nets\n");
1187 	for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) {
1188 #if IPSET_NET_COUNT == 2
1189 		mtype_data_reset_elem(d, &orig);
1190 		mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
1191 		for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi;
1192 		     k++) {
1193 			mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
1194 					   true);
1195 #else
1196 		mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
1197 #endif
1198 		key = HKEY(d, h->initval, t->htable_bits);
1199 		n = rcu_dereference_bh(hbucket(t, key));
1200 		if (!n)
1201 			continue;
1202 		for (i = 0; i < n->pos; i++) {
1203 			if (!test_bit(i, n->used))
1204 				continue;
1205 			data = ahash_data(n, i, set->dsize);
1206 			if (!mtype_data_equal(data, d, &multi))
1207 				continue;
1208 			ret = mtype_data_match(data, ext, mext, set, flags);
1209 			if (ret != 0)
1210 				return ret;
1211 #ifdef IP_SET_HASH_WITH_MULTI
1212 			/* No match, reset multiple match flag */
1213 			multi = 0;
1214 #endif
1215 		}
1216 #if IPSET_NET_COUNT == 2
1217 		}
1218 #endif
1219 	}
1220 	return 0;
1221 }
1222 #endif
1223 
1224 /* Test whether the element is added to the set */
1225 static int
mtype_test(struct ip_set * set,void * value,const struct ip_set_ext * ext,struct ip_set_ext * mext,u32 flags)1226 mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
1227 	   struct ip_set_ext *mext, u32 flags)
1228 {
1229 	struct htype *h = set->data;
1230 	struct htable *t;
1231 	struct mtype_elem *d = value;
1232 	struct hbucket *n;
1233 	struct mtype_elem *data;
1234 	int i, ret = 0;
1235 	u32 key, multi = 0;
1236 
1237 	rcu_read_lock_bh();
1238 	t = rcu_dereference_bh(h->table);
1239 #ifdef IP_SET_HASH_WITH_NETS
1240 	/* If we test an IP address and not a network address,
1241 	 * try all possible network sizes
1242 	 */
1243 	for (i = 0; i < IPSET_NET_COUNT; i++)
1244 		if (DCIDR_GET(d->cidr, i) != HOST_MASK)
1245 			break;
1246 	if (i == IPSET_NET_COUNT) {
1247 		ret = mtype_test_cidrs(set, d, ext, mext, flags);
1248 		goto out;
1249 	}
1250 #endif
1251 
1252 	key = HKEY(d, h->initval, t->htable_bits);
1253 	n = rcu_dereference_bh(hbucket(t, key));
1254 	if (!n) {
1255 		ret = 0;
1256 		goto out;
1257 	}
1258 	for (i = 0; i < n->pos; i++) {
1259 		if (!test_bit(i, n->used))
1260 			continue;
1261 		data = ahash_data(n, i, set->dsize);
1262 		if (!mtype_data_equal(data, d, &multi))
1263 			continue;
1264 		ret = mtype_data_match(data, ext, mext, set, flags);
1265 		if (ret != 0)
1266 			goto out;
1267 	}
1268 out:
1269 	rcu_read_unlock_bh();
1270 	return ret;
1271 }
1272 
1273 /* Reply a HEADER request: fill out the header part of the set */
1274 static int
mtype_head(struct ip_set * set,struct sk_buff * skb)1275 mtype_head(struct ip_set *set, struct sk_buff *skb)
1276 {
1277 	struct htype *h = set->data;
1278 	const struct htable *t;
1279 	struct nlattr *nested;
1280 	size_t memsize;
1281 	u32 elements = 0;
1282 	size_t ext_size = 0;
1283 	u8 htable_bits;
1284 
1285 	rcu_read_lock_bh();
1286 	t = rcu_dereference_bh(h->table);
1287 	mtype_ext_size(set, &elements, &ext_size);
1288 	memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size;
1289 	htable_bits = t->htable_bits;
1290 	rcu_read_unlock_bh();
1291 
1292 	nested = nla_nest_start(skb, IPSET_ATTR_DATA);
1293 	if (!nested)
1294 		goto nla_put_failure;
1295 	if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
1296 			  htonl(jhash_size(htable_bits))) ||
1297 	    nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
1298 		goto nla_put_failure;
1299 #ifdef IP_SET_HASH_WITH_NETMASK
1300 	if (h->netmask != HOST_MASK &&
1301 	    nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
1302 		goto nla_put_failure;
1303 #endif
1304 #ifdef IP_SET_HASH_WITH_MARKMASK
1305 	if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
1306 		goto nla_put_failure;
1307 #endif
1308 	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
1309 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
1310 	    nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)))
1311 		goto nla_put_failure;
1312 	if (unlikely(ip_set_put_flags(skb, set)))
1313 		goto nla_put_failure;
1314 	nla_nest_end(skb, nested);
1315 
1316 	return 0;
1317 nla_put_failure:
1318 	return -EMSGSIZE;
1319 }
1320 
1321 /* Make possible to run dumping parallel with resizing */
1322 static void
mtype_uref(struct ip_set * set,struct netlink_callback * cb,bool start)1323 mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
1324 {
1325 	struct htype *h = set->data;
1326 	struct htable *t;
1327 
1328 	if (start) {
1329 		rcu_read_lock_bh();
1330 		t = ipset_dereference_bh_nfnl(h->table);
1331 		atomic_inc(&t->uref);
1332 		cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
1333 		rcu_read_unlock_bh();
1334 	} else if (cb->args[IPSET_CB_PRIVATE]) {
1335 		t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
1336 		if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
1337 			pr_debug("Table destroy after resize "
1338 				 " by dump: %p\n", t);
1339 			mtype_ahash_destroy(set, t, false);
1340 		}
1341 		cb->args[IPSET_CB_PRIVATE] = 0;
1342 	}
1343 }
1344 
1345 /* Reply a LIST/SAVE request: dump the elements of the specified set */
1346 static int
mtype_list(const struct ip_set * set,struct sk_buff * skb,struct netlink_callback * cb)1347 mtype_list(const struct ip_set *set,
1348 	   struct sk_buff *skb, struct netlink_callback *cb)
1349 {
1350 	const struct htable *t;
1351 	struct nlattr *atd, *nested;
1352 	const struct hbucket *n;
1353 	const struct mtype_elem *e;
1354 	u32 first = cb->args[IPSET_CB_ARG0];
1355 	/* We assume that one hash bucket fills into one page */
1356 	void *incomplete;
1357 	int i, ret = 0;
1358 
1359 	atd = nla_nest_start(skb, IPSET_ATTR_ADT);
1360 	if (!atd)
1361 		return -EMSGSIZE;
1362 
1363 	pr_debug("list hash set %s\n", set->name);
1364 	t = (const struct htable *)cb->args[IPSET_CB_PRIVATE];
1365 	/* Expire may replace a hbucket with another one */
1366 	rcu_read_lock();
1367 	for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
1368 	     cb->args[IPSET_CB_ARG0]++) {
1369 		cond_resched_rcu();
1370 		incomplete = skb_tail_pointer(skb);
1371 		n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
1372 		pr_debug("cb->arg bucket: %lu, t %p n %p\n",
1373 			 cb->args[IPSET_CB_ARG0], t, n);
1374 		if (!n)
1375 			continue;
1376 		for (i = 0; i < n->pos; i++) {
1377 			if (!test_bit(i, n->used))
1378 				continue;
1379 			e = ahash_data(n, i, set->dsize);
1380 			if (SET_ELEM_EXPIRED(set, e))
1381 				continue;
1382 			pr_debug("list hash %lu hbucket %p i %u, data %p\n",
1383 				 cb->args[IPSET_CB_ARG0], n, i, e);
1384 			nested = nla_nest_start(skb, IPSET_ATTR_DATA);
1385 			if (!nested) {
1386 				if (cb->args[IPSET_CB_ARG0] == first) {
1387 					nla_nest_cancel(skb, atd);
1388 					ret = -EMSGSIZE;
1389 					goto out;
1390 				}
1391 				goto nla_put_failure;
1392 			}
1393 			if (mtype_data_list(skb, e))
1394 				goto nla_put_failure;
1395 			if (ip_set_put_extensions(skb, set, e, true))
1396 				goto nla_put_failure;
1397 			nla_nest_end(skb, nested);
1398 		}
1399 	}
1400 	nla_nest_end(skb, atd);
1401 	/* Set listing finished */
1402 	cb->args[IPSET_CB_ARG0] = 0;
1403 
1404 	goto out;
1405 
1406 nla_put_failure:
1407 	nlmsg_trim(skb, incomplete);
1408 	if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
1409 		pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
1410 			set->name);
1411 		cb->args[IPSET_CB_ARG0] = 0;
1412 		ret = -EMSGSIZE;
1413 	} else {
1414 		nla_nest_end(skb, atd);
1415 	}
1416 out:
1417 	rcu_read_unlock();
1418 	return ret;
1419 }
1420 
1421 static int
1422 IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
1423 			  const struct xt_action_param *par,
1424 			  enum ipset_adt adt, struct ip_set_adt_opt *opt);
1425 
1426 static int
1427 IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
1428 			  enum ipset_adt adt, u32 *lineno, u32 flags,
1429 			  bool retried);
1430 
1431 static const struct ip_set_type_variant mtype_variant = {
1432 	.kadt	= mtype_kadt,
1433 	.uadt	= mtype_uadt,
1434 	.adt	= {
1435 		[IPSET_ADD] = mtype_add,
1436 		[IPSET_DEL] = mtype_del,
1437 		[IPSET_TEST] = mtype_test,
1438 	},
1439 	.destroy = mtype_destroy,
1440 	.flush	= mtype_flush,
1441 	.head	= mtype_head,
1442 	.list	= mtype_list,
1443 	.uref	= mtype_uref,
1444 	.resize	= mtype_resize,
1445 	.same_set = mtype_same_set,
1446 	.region_lock = true,
1447 };
1448 
1449 #ifdef IP_SET_EMIT_CREATE
1450 static int
IPSET_TOKEN(HTYPE,_create)1451 IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1452 			    struct nlattr *tb[], u32 flags)
1453 {
1454 	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
1455 #ifdef IP_SET_HASH_WITH_MARKMASK
1456 	u32 markmask;
1457 #endif
1458 	u8 hbits;
1459 #ifdef IP_SET_HASH_WITH_NETMASK
1460 	u8 netmask;
1461 #endif
1462 	size_t hsize;
1463 	struct htype *h;
1464 	struct htable *t;
1465 	u32 i;
1466 
1467 	pr_debug("Create set %s with family %s\n",
1468 		 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
1469 
1470 #ifdef IP_SET_PROTO_UNDEF
1471 	if (set->family != NFPROTO_UNSPEC)
1472 		return -IPSET_ERR_INVALID_FAMILY;
1473 #else
1474 	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
1475 		return -IPSET_ERR_INVALID_FAMILY;
1476 #endif
1477 
1478 	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
1479 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
1480 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
1481 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
1482 		return -IPSET_ERR_PROTOCOL;
1483 
1484 #ifdef IP_SET_HASH_WITH_MARKMASK
1485 	/* Separated condition in order to avoid directive in argument list */
1486 	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
1487 		return -IPSET_ERR_PROTOCOL;
1488 
1489 	markmask = 0xffffffff;
1490 	if (tb[IPSET_ATTR_MARKMASK]) {
1491 		markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
1492 		if (markmask == 0)
1493 			return -IPSET_ERR_INVALID_MARKMASK;
1494 	}
1495 #endif
1496 
1497 #ifdef IP_SET_HASH_WITH_NETMASK
1498 	netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
1499 	if (tb[IPSET_ATTR_NETMASK]) {
1500 		netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
1501 
1502 		if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
1503 		    (set->family == NFPROTO_IPV6 && netmask > 128) ||
1504 		    netmask == 0)
1505 			return -IPSET_ERR_INVALID_NETMASK;
1506 	}
1507 #endif
1508 
1509 	if (tb[IPSET_ATTR_HASHSIZE]) {
1510 		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
1511 		if (hashsize < IPSET_MIMINAL_HASHSIZE)
1512 			hashsize = IPSET_MIMINAL_HASHSIZE;
1513 	}
1514 
1515 	if (tb[IPSET_ATTR_MAXELEM])
1516 		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
1517 
1518 	hsize = sizeof(*h);
1519 	h = kzalloc(hsize, GFP_KERNEL);
1520 	if (!h)
1521 		return -ENOMEM;
1522 
1523 	hbits = htable_bits(hashsize);
1524 	hsize = htable_size(hbits);
1525 	if (hsize == 0) {
1526 		kfree(h);
1527 		return -ENOMEM;
1528 	}
1529 	t = ip_set_alloc(hsize);
1530 	if (!t) {
1531 		kfree(h);
1532 		return -ENOMEM;
1533 	}
1534 	t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits));
1535 	if (!t->hregion) {
1536 		ip_set_free(t);
1537 		kfree(h);
1538 		return -ENOMEM;
1539 	}
1540 	h->gc.set = set;
1541 	for (i = 0; i < ahash_numof_locks(hbits); i++)
1542 		spin_lock_init(&t->hregion[i].lock);
1543 	h->maxelem = maxelem;
1544 #ifdef IP_SET_HASH_WITH_NETMASK
1545 	h->netmask = netmask;
1546 #endif
1547 #ifdef IP_SET_HASH_WITH_MARKMASK
1548 	h->markmask = markmask;
1549 #endif
1550 	get_random_bytes(&h->initval, sizeof(h->initval));
1551 
1552 	t->htable_bits = hbits;
1553 	t->maxelem = h->maxelem / ahash_numof_locks(hbits);
1554 	RCU_INIT_POINTER(h->table, t);
1555 
1556 	INIT_LIST_HEAD(&h->ad);
1557 	set->data = h;
1558 #ifndef IP_SET_PROTO_UNDEF
1559 	if (set->family == NFPROTO_IPV4) {
1560 #endif
1561 		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
1562 		set->dsize = ip_set_elem_len(set, tb,
1563 			sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
1564 			__alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
1565 #ifndef IP_SET_PROTO_UNDEF
1566 	} else {
1567 		set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
1568 		set->dsize = ip_set_elem_len(set, tb,
1569 			sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
1570 			__alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
1571 	}
1572 #endif
1573 	set->timeout = IPSET_NO_TIMEOUT;
1574 	if (tb[IPSET_ATTR_TIMEOUT]) {
1575 		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
1576 #ifndef IP_SET_PROTO_UNDEF
1577 		if (set->family == NFPROTO_IPV4)
1578 #endif
1579 			IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc);
1580 #ifndef IP_SET_PROTO_UNDEF
1581 		else
1582 			IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc);
1583 #endif
1584 	}
1585 	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
1586 		 set->name, jhash_size(t->htable_bits),
1587 		 t->htable_bits, h->maxelem, set->data, t);
1588 
1589 	return 0;
1590 }
1591 #endif /* IP_SET_EMIT_CREATE */
1592 
1593 #undef HKEY_DATALEN
1594