1 /*
2  * net/sched/sch_netem.c	Network emulator
3  *
4  * 		This program is free software; you can redistribute it and/or
5  * 		modify it under the terms of the GNU General Public License
6  * 		as published by the Free Software Foundation; either version
7  * 		2 of the License.
8  *
9  *  		Many of the algorithms and ideas for this came from
10  *		NIST Net which is not copyrighted.
11  *
12  * Authors:	Stephen Hemminger <shemminger@osdl.org>
13  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14  */
15 
16 #include <linux/mm.h>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/skbuff.h>
23 #include <linux/vmalloc.h>
24 #include <linux/rtnetlink.h>
25 #include <linux/reciprocal_div.h>
26 
27 #include <net/netlink.h>
28 #include <net/pkt_sched.h>
29 
30 #define VERSION "1.3"
31 
32 /*	Network Emulation Queuing algorithm.
33 	====================================
34 
35 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
36 		 Network Emulation Tool
37 		 [2] Luigi Rizzo, DummyNet for FreeBSD
38 
39 	 ----------------------------------------------------------------
40 
41 	 This started out as a simple way to delay outgoing packets to
42 	 test TCP but has grown to include most of the functionality
43 	 of a full blown network emulator like NISTnet. It can delay
44 	 packets and add random jitter (and correlation). The random
45 	 distribution can be loaded from a table as well to provide
46 	 normal, Pareto, or experimental curves. Packet loss,
47 	 duplication, and reordering can also be emulated.
48 
49 	 This qdisc does not do classification that can be handled in
50 	 layering other disciplines.  It does not need to do bandwidth
51 	 control either since that can be handled by using token
52 	 bucket or other rate control.
53 
54      Correlated Loss Generator models
55 
56 	Added generation of correlated loss according to the
57 	"Gilbert-Elliot" model, a 4-state markov model.
58 
59 	References:
60 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
61 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
62 	and intuitive loss model for packet networks and its implementation
63 	in the Netem module in the Linux kernel", available in [1]
64 
65 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
66 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
67 */
68 
69 struct netem_sched_data {
70 	/* internal t(ime)fifo qdisc uses sch->q and sch->limit */
71 
72 	/* optional qdisc for classful handling (NULL at netem init) */
73 	struct Qdisc	*qdisc;
74 
75 	struct qdisc_watchdog watchdog;
76 
77 	psched_tdiff_t latency;
78 	psched_tdiff_t jitter;
79 
80 	u32 loss;
81 	u32 limit;
82 	u32 counter;
83 	u32 gap;
84 	u32 duplicate;
85 	u32 reorder;
86 	u32 corrupt;
87 	u32 rate;
88 	s32 packet_overhead;
89 	u32 cell_size;
90 	u32 cell_size_reciprocal;
91 	s32 cell_overhead;
92 
93 	struct crndstate {
94 		u32 last;
95 		u32 rho;
96 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
97 
98 	struct disttable {
99 		u32  size;
100 		s16 table[0];
101 	} *delay_dist;
102 
103 	enum  {
104 		CLG_RANDOM,
105 		CLG_4_STATES,
106 		CLG_GILB_ELL,
107 	} loss_model;
108 
109 	/* Correlated Loss Generation models */
110 	struct clgstate {
111 		/* state of the Markov chain */
112 		u8 state;
113 
114 		/* 4-states and Gilbert-Elliot models */
115 		u32 a1;	/* p13 for 4-states or p for GE */
116 		u32 a2;	/* p31 for 4-states or r for GE */
117 		u32 a3;	/* p32 for 4-states or h for GE */
118 		u32 a4;	/* p14 for 4-states or 1-k for GE */
119 		u32 a5; /* p23 used only in 4-states */
120 	} clg;
121 
122 };
123 
124 /* Time stamp put into socket buffer control block
125  * Only valid when skbs are in our internal t(ime)fifo queue.
126  */
127 struct netem_skb_cb {
128 	psched_time_t	time_to_send;
129 };
130 
netem_skb_cb(struct sk_buff * skb)131 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
132 {
133 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
134 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
135 }
136 
137 /* init_crandom - initialize correlated random number generator
138  * Use entropy source for initial seed.
139  */
init_crandom(struct crndstate * state,unsigned long rho)140 static void init_crandom(struct crndstate *state, unsigned long rho)
141 {
142 	state->rho = rho;
143 	state->last = net_random();
144 }
145 
146 /* get_crandom - correlated random number generator
147  * Next number depends on last value.
148  * rho is scaled to avoid floating point.
149  */
get_crandom(struct crndstate * state)150 static u32 get_crandom(struct crndstate *state)
151 {
152 	u64 value, rho;
153 	unsigned long answer;
154 
155 	if (state->rho == 0)	/* no correlation */
156 		return net_random();
157 
158 	value = net_random();
159 	rho = (u64)state->rho + 1;
160 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
161 	state->last = answer;
162 	return answer;
163 }
164 
165 /* loss_4state - 4-state model loss generator
166  * Generates losses according to the 4-state Markov chain adopted in
167  * the GI (General and Intuitive) loss model.
168  */
loss_4state(struct netem_sched_data * q)169 static bool loss_4state(struct netem_sched_data *q)
170 {
171 	struct clgstate *clg = &q->clg;
172 	u32 rnd = net_random();
173 
174 	/*
175 	 * Makes a comparison between rnd and the transition
176 	 * probabilities outgoing from the current state, then decides the
177 	 * next state and if the next packet has to be transmitted or lost.
178 	 * The four states correspond to:
179 	 *   1 => successfully transmitted packets within a gap period
180 	 *   4 => isolated losses within a gap period
181 	 *   3 => lost packets within a burst period
182 	 *   2 => successfully transmitted packets within a burst period
183 	 */
184 	switch (clg->state) {
185 	case 1:
186 		if (rnd < clg->a4) {
187 			clg->state = 4;
188 			return true;
189 		} else if (clg->a4 < rnd && rnd < clg->a1) {
190 			clg->state = 3;
191 			return true;
192 		} else if (clg->a1 < rnd)
193 			clg->state = 1;
194 
195 		break;
196 	case 2:
197 		if (rnd < clg->a5) {
198 			clg->state = 3;
199 			return true;
200 		} else
201 			clg->state = 2;
202 
203 		break;
204 	case 3:
205 		if (rnd < clg->a3)
206 			clg->state = 2;
207 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
208 			clg->state = 1;
209 			return true;
210 		} else if (clg->a2 + clg->a3 < rnd) {
211 			clg->state = 3;
212 			return true;
213 		}
214 		break;
215 	case 4:
216 		clg->state = 1;
217 		break;
218 	}
219 
220 	return false;
221 }
222 
223 /* loss_gilb_ell - Gilbert-Elliot model loss generator
224  * Generates losses according to the Gilbert-Elliot loss model or
225  * its special cases  (Gilbert or Simple Gilbert)
226  *
227  * Makes a comparison between random number and the transition
228  * probabilities outgoing from the current state, then decides the
229  * next state. A second random number is extracted and the comparison
230  * with the loss probability of the current state decides if the next
231  * packet will be transmitted or lost.
232  */
loss_gilb_ell(struct netem_sched_data * q)233 static bool loss_gilb_ell(struct netem_sched_data *q)
234 {
235 	struct clgstate *clg = &q->clg;
236 
237 	switch (clg->state) {
238 	case 1:
239 		if (net_random() < clg->a1)
240 			clg->state = 2;
241 		if (net_random() < clg->a4)
242 			return true;
243 	case 2:
244 		if (net_random() < clg->a2)
245 			clg->state = 1;
246 		if (clg->a3 > net_random())
247 			return true;
248 	}
249 
250 	return false;
251 }
252 
loss_event(struct netem_sched_data * q)253 static bool loss_event(struct netem_sched_data *q)
254 {
255 	switch (q->loss_model) {
256 	case CLG_RANDOM:
257 		/* Random packet drop 0 => none, ~0 => all */
258 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
259 
260 	case CLG_4_STATES:
261 		/* 4state loss model algorithm (used also for GI model)
262 		* Extracts a value from the markov 4 state loss generator,
263 		* if it is 1 drops a packet and if needed writes the event in
264 		* the kernel logs
265 		*/
266 		return loss_4state(q);
267 
268 	case CLG_GILB_ELL:
269 		/* Gilbert-Elliot loss model algorithm
270 		* Extracts a value from the Gilbert-Elliot loss generator,
271 		* if it is 1 drops a packet and if needed writes the event in
272 		* the kernel logs
273 		*/
274 		return loss_gilb_ell(q);
275 	}
276 
277 	return false;	/* not reached */
278 }
279 
280 
281 /* tabledist - return a pseudo-randomly distributed value with mean mu and
282  * std deviation sigma.  Uses table lookup to approximate the desired
283  * distribution, and a uniformly-distributed pseudo-random source.
284  */
tabledist(psched_tdiff_t mu,psched_tdiff_t sigma,struct crndstate * state,const struct disttable * dist)285 static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
286 				struct crndstate *state,
287 				const struct disttable *dist)
288 {
289 	psched_tdiff_t x;
290 	long t;
291 	u32 rnd;
292 
293 	if (sigma == 0)
294 		return mu;
295 
296 	rnd = get_crandom(state);
297 
298 	/* default uniform distribution */
299 	if (dist == NULL)
300 		return (rnd % (2*sigma)) - sigma + mu;
301 
302 	t = dist->table[rnd % dist->size];
303 	x = (sigma % NETEM_DIST_SCALE) * t;
304 	if (x >= 0)
305 		x += NETEM_DIST_SCALE/2;
306 	else
307 		x -= NETEM_DIST_SCALE/2;
308 
309 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
310 }
311 
packet_len_2_sched_time(unsigned int len,struct netem_sched_data * q)312 static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
313 {
314 	u64 ticks;
315 
316 	len += q->packet_overhead;
317 
318 	if (q->cell_size) {
319 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
320 
321 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
322 			cells++;
323 		len = cells * (q->cell_size + q->cell_overhead);
324 	}
325 
326 	ticks = (u64)len * NSEC_PER_SEC;
327 
328 	do_div(ticks, q->rate);
329 	return PSCHED_NS2TICKS(ticks);
330 }
331 
tfifo_enqueue(struct sk_buff * nskb,struct Qdisc * sch)332 static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
333 {
334 	struct sk_buff_head *list = &sch->q;
335 	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
336 	struct sk_buff *skb;
337 
338 	if (likely(skb_queue_len(list) < sch->limit)) {
339 		skb = skb_peek_tail(list);
340 		/* Optimize for add at tail */
341 		if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
342 			return qdisc_enqueue_tail(nskb, sch);
343 
344 		skb_queue_reverse_walk(list, skb) {
345 			if (tnext >= netem_skb_cb(skb)->time_to_send)
346 				break;
347 		}
348 
349 		__skb_queue_after(list, skb, nskb);
350 		sch->qstats.backlog += qdisc_pkt_len(nskb);
351 		return NET_XMIT_SUCCESS;
352 	}
353 
354 	return qdisc_reshape_fail(nskb, sch);
355 }
356 
357 /*
358  * Insert one skb into qdisc.
359  * Note: parent depends on return value to account for queue length.
360  * 	NET_XMIT_DROP: queue length didn't change.
361  *      NET_XMIT_SUCCESS: one skb was queued.
362  */
netem_enqueue(struct sk_buff * skb,struct Qdisc * sch)363 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
364 {
365 	struct netem_sched_data *q = qdisc_priv(sch);
366 	/* We don't fill cb now as skb_unshare() may invalidate it */
367 	struct netem_skb_cb *cb;
368 	struct sk_buff *skb2;
369 	int ret;
370 	int count = 1;
371 
372 	/* Random duplication */
373 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
374 		++count;
375 
376 	/* Drop packet? */
377 	if (loss_event(q))
378 		--count;
379 
380 	if (count == 0) {
381 		sch->qstats.drops++;
382 		kfree_skb(skb);
383 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
384 	}
385 
386 	skb_orphan(skb);
387 
388 	/*
389 	 * If we need to duplicate packet, then re-insert at top of the
390 	 * qdisc tree, since parent queuer expects that only one
391 	 * skb will be queued.
392 	 */
393 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
394 		struct Qdisc *rootq = qdisc_root(sch);
395 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
396 		q->duplicate = 0;
397 
398 		qdisc_enqueue_root(skb2, rootq);
399 		q->duplicate = dupsave;
400 	}
401 
402 	/*
403 	 * Randomized packet corruption.
404 	 * Make copy if needed since we are modifying
405 	 * If packet is going to be hardware checksummed, then
406 	 * do it now in software before we mangle it.
407 	 */
408 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
409 		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
410 		    (skb->ip_summed == CHECKSUM_PARTIAL &&
411 		     skb_checksum_help(skb))) {
412 			sch->qstats.drops++;
413 			return NET_XMIT_DROP;
414 		}
415 
416 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
417 	}
418 
419 	cb = netem_skb_cb(skb);
420 	if (q->gap == 0 ||		/* not doing reordering */
421 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
422 	    q->reorder < get_crandom(&q->reorder_cor)) {
423 		psched_time_t now;
424 		psched_tdiff_t delay;
425 
426 		delay = tabledist(q->latency, q->jitter,
427 				  &q->delay_cor, q->delay_dist);
428 
429 		now = psched_get_time();
430 
431 		if (q->rate) {
432 			struct sk_buff_head *list = &sch->q;
433 
434 			delay += packet_len_2_sched_time(skb->len, q);
435 
436 			if (!skb_queue_empty(list)) {
437 				/*
438 				 * Last packet in queue is reference point (now).
439 				 * First packet in queue is already in flight,
440 				 * calculate this time bonus and substract
441 				 * from delay.
442 				 */
443 				delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
444 				now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
445 			}
446 		}
447 
448 		cb->time_to_send = now + delay;
449 		++q->counter;
450 		ret = tfifo_enqueue(skb, sch);
451 	} else {
452 		/*
453 		 * Do re-ordering by putting one out of N packets at the front
454 		 * of the queue.
455 		 */
456 		cb->time_to_send = psched_get_time();
457 		q->counter = 0;
458 
459 		__skb_queue_head(&sch->q, skb);
460 		sch->qstats.backlog += qdisc_pkt_len(skb);
461 		sch->qstats.requeues++;
462 		ret = NET_XMIT_SUCCESS;
463 	}
464 
465 	if (ret != NET_XMIT_SUCCESS) {
466 		if (net_xmit_drop_count(ret)) {
467 			sch->qstats.drops++;
468 			return ret;
469 		}
470 	}
471 
472 	return NET_XMIT_SUCCESS;
473 }
474 
netem_drop(struct Qdisc * sch)475 static unsigned int netem_drop(struct Qdisc *sch)
476 {
477 	struct netem_sched_data *q = qdisc_priv(sch);
478 	unsigned int len;
479 
480 	len = qdisc_queue_drop(sch);
481 	if (!len && q->qdisc && q->qdisc->ops->drop)
482 	    len = q->qdisc->ops->drop(q->qdisc);
483 	if (len)
484 		sch->qstats.drops++;
485 
486 	return len;
487 }
488 
netem_dequeue(struct Qdisc * sch)489 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
490 {
491 	struct netem_sched_data *q = qdisc_priv(sch);
492 	struct sk_buff *skb;
493 
494 	if (qdisc_is_throttled(sch))
495 		return NULL;
496 
497 tfifo_dequeue:
498 	skb = qdisc_peek_head(sch);
499 	if (skb) {
500 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
501 
502 		/* if more time remaining? */
503 		if (cb->time_to_send <= psched_get_time()) {
504 			__skb_unlink(skb, &sch->q);
505 			sch->qstats.backlog -= qdisc_pkt_len(skb);
506 
507 #ifdef CONFIG_NET_CLS_ACT
508 			/*
509 			 * If it's at ingress let's pretend the delay is
510 			 * from the network (tstamp will be updated).
511 			 */
512 			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
513 				skb->tstamp.tv64 = 0;
514 #endif
515 
516 			if (q->qdisc) {
517 				int err = qdisc_enqueue(skb, q->qdisc);
518 
519 				if (unlikely(err != NET_XMIT_SUCCESS)) {
520 					if (net_xmit_drop_count(err)) {
521 						sch->qstats.drops++;
522 						qdisc_tree_decrease_qlen(sch, 1);
523 					}
524 				}
525 				goto tfifo_dequeue;
526 			}
527 deliver:
528 			qdisc_unthrottled(sch);
529 			qdisc_bstats_update(sch, skb);
530 			return skb;
531 		}
532 
533 		if (q->qdisc) {
534 			skb = q->qdisc->ops->dequeue(q->qdisc);
535 			if (skb)
536 				goto deliver;
537 		}
538 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
539 	}
540 
541 	if (q->qdisc) {
542 		skb = q->qdisc->ops->dequeue(q->qdisc);
543 		if (skb)
544 			goto deliver;
545 	}
546 	return NULL;
547 }
548 
netem_reset(struct Qdisc * sch)549 static void netem_reset(struct Qdisc *sch)
550 {
551 	struct netem_sched_data *q = qdisc_priv(sch);
552 
553 	qdisc_reset_queue(sch);
554 	if (q->qdisc)
555 		qdisc_reset(q->qdisc);
556 	qdisc_watchdog_cancel(&q->watchdog);
557 }
558 
dist_free(struct disttable * d)559 static void dist_free(struct disttable *d)
560 {
561 	if (d) {
562 		if (is_vmalloc_addr(d))
563 			vfree(d);
564 		else
565 			kfree(d);
566 	}
567 }
568 
569 /*
570  * Distribution data is a variable size payload containing
571  * signed 16 bit values.
572  */
get_dist_table(struct Qdisc * sch,const struct nlattr * attr)573 static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
574 {
575 	struct netem_sched_data *q = qdisc_priv(sch);
576 	size_t n = nla_len(attr)/sizeof(__s16);
577 	const __s16 *data = nla_data(attr);
578 	spinlock_t *root_lock;
579 	struct disttable *d;
580 	int i;
581 	size_t s;
582 
583 	if (n > NETEM_DIST_MAX)
584 		return -EINVAL;
585 
586 	s = sizeof(struct disttable) + n * sizeof(s16);
587 	d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
588 	if (!d)
589 		d = vmalloc(s);
590 	if (!d)
591 		return -ENOMEM;
592 
593 	d->size = n;
594 	for (i = 0; i < n; i++)
595 		d->table[i] = data[i];
596 
597 	root_lock = qdisc_root_sleeping_lock(sch);
598 
599 	spin_lock_bh(root_lock);
600 	swap(q->delay_dist, d);
601 	spin_unlock_bh(root_lock);
602 
603 	dist_free(d);
604 	return 0;
605 }
606 
get_correlation(struct Qdisc * sch,const struct nlattr * attr)607 static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
608 {
609 	struct netem_sched_data *q = qdisc_priv(sch);
610 	const struct tc_netem_corr *c = nla_data(attr);
611 
612 	init_crandom(&q->delay_cor, c->delay_corr);
613 	init_crandom(&q->loss_cor, c->loss_corr);
614 	init_crandom(&q->dup_cor, c->dup_corr);
615 }
616 
get_reorder(struct Qdisc * sch,const struct nlattr * attr)617 static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
618 {
619 	struct netem_sched_data *q = qdisc_priv(sch);
620 	const struct tc_netem_reorder *r = nla_data(attr);
621 
622 	q->reorder = r->probability;
623 	init_crandom(&q->reorder_cor, r->correlation);
624 }
625 
get_corrupt(struct Qdisc * sch,const struct nlattr * attr)626 static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
627 {
628 	struct netem_sched_data *q = qdisc_priv(sch);
629 	const struct tc_netem_corrupt *r = nla_data(attr);
630 
631 	q->corrupt = r->probability;
632 	init_crandom(&q->corrupt_cor, r->correlation);
633 }
634 
get_rate(struct Qdisc * sch,const struct nlattr * attr)635 static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
636 {
637 	struct netem_sched_data *q = qdisc_priv(sch);
638 	const struct tc_netem_rate *r = nla_data(attr);
639 
640 	q->rate = r->rate;
641 	q->packet_overhead = r->packet_overhead;
642 	q->cell_size = r->cell_size;
643 	if (q->cell_size)
644 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
645 	q->cell_overhead = r->cell_overhead;
646 }
647 
get_loss_clg(struct Qdisc * sch,const struct nlattr * attr)648 static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
649 {
650 	struct netem_sched_data *q = qdisc_priv(sch);
651 	const struct nlattr *la;
652 	int rem;
653 
654 	nla_for_each_nested(la, attr, rem) {
655 		u16 type = nla_type(la);
656 
657 		switch(type) {
658 		case NETEM_LOSS_GI: {
659 			const struct tc_netem_gimodel *gi = nla_data(la);
660 
661 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
662 				pr_info("netem: incorrect gi model size\n");
663 				return -EINVAL;
664 			}
665 
666 			q->loss_model = CLG_4_STATES;
667 
668 			q->clg.state = 1;
669 			q->clg.a1 = gi->p13;
670 			q->clg.a2 = gi->p31;
671 			q->clg.a3 = gi->p32;
672 			q->clg.a4 = gi->p14;
673 			q->clg.a5 = gi->p23;
674 			break;
675 		}
676 
677 		case NETEM_LOSS_GE: {
678 			const struct tc_netem_gemodel *ge = nla_data(la);
679 
680 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
681 				pr_info("netem: incorrect ge model size\n");
682 				return -EINVAL;
683 			}
684 
685 			q->loss_model = CLG_GILB_ELL;
686 			q->clg.state = 1;
687 			q->clg.a1 = ge->p;
688 			q->clg.a2 = ge->r;
689 			q->clg.a3 = ge->h;
690 			q->clg.a4 = ge->k1;
691 			break;
692 		}
693 
694 		default:
695 			pr_info("netem: unknown loss type %u\n", type);
696 			return -EINVAL;
697 		}
698 	}
699 
700 	return 0;
701 }
702 
703 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
704 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
705 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
706 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
707 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
708 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
709 };
710 
parse_attr(struct nlattr * tb[],int maxtype,struct nlattr * nla,const struct nla_policy * policy,int len)711 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
712 		      const struct nla_policy *policy, int len)
713 {
714 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
715 
716 	if (nested_len < 0) {
717 		pr_info("netem: invalid attributes len %d\n", nested_len);
718 		return -EINVAL;
719 	}
720 
721 	if (nested_len >= nla_attr_size(0))
722 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
723 				 nested_len, policy);
724 
725 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
726 	return 0;
727 }
728 
729 /* Parse netlink message to set options */
netem_change(struct Qdisc * sch,struct nlattr * opt)730 static int netem_change(struct Qdisc *sch, struct nlattr *opt)
731 {
732 	struct netem_sched_data *q = qdisc_priv(sch);
733 	struct nlattr *tb[TCA_NETEM_MAX + 1];
734 	struct tc_netem_qopt *qopt;
735 	int ret;
736 
737 	if (opt == NULL)
738 		return -EINVAL;
739 
740 	qopt = nla_data(opt);
741 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
742 	if (ret < 0)
743 		return ret;
744 
745 	sch->limit = qopt->limit;
746 
747 	q->latency = qopt->latency;
748 	q->jitter = qopt->jitter;
749 	q->limit = qopt->limit;
750 	q->gap = qopt->gap;
751 	q->counter = 0;
752 	q->loss = qopt->loss;
753 	q->duplicate = qopt->duplicate;
754 
755 	/* for compatibility with earlier versions.
756 	 * if gap is set, need to assume 100% probability
757 	 */
758 	if (q->gap)
759 		q->reorder = ~0;
760 
761 	if (tb[TCA_NETEM_CORR])
762 		get_correlation(sch, tb[TCA_NETEM_CORR]);
763 
764 	if (tb[TCA_NETEM_DELAY_DIST]) {
765 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
766 		if (ret)
767 			return ret;
768 	}
769 
770 	if (tb[TCA_NETEM_REORDER])
771 		get_reorder(sch, tb[TCA_NETEM_REORDER]);
772 
773 	if (tb[TCA_NETEM_CORRUPT])
774 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
775 
776 	if (tb[TCA_NETEM_RATE])
777 		get_rate(sch, tb[TCA_NETEM_RATE]);
778 
779 	q->loss_model = CLG_RANDOM;
780 	if (tb[TCA_NETEM_LOSS])
781 		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
782 
783 	return ret;
784 }
785 
netem_init(struct Qdisc * sch,struct nlattr * opt)786 static int netem_init(struct Qdisc *sch, struct nlattr *opt)
787 {
788 	struct netem_sched_data *q = qdisc_priv(sch);
789 	int ret;
790 
791 	if (!opt)
792 		return -EINVAL;
793 
794 	qdisc_watchdog_init(&q->watchdog, sch);
795 
796 	q->loss_model = CLG_RANDOM;
797 	ret = netem_change(sch, opt);
798 	if (ret)
799 		pr_info("netem: change failed\n");
800 	return ret;
801 }
802 
netem_destroy(struct Qdisc * sch)803 static void netem_destroy(struct Qdisc *sch)
804 {
805 	struct netem_sched_data *q = qdisc_priv(sch);
806 
807 	qdisc_watchdog_cancel(&q->watchdog);
808 	if (q->qdisc)
809 		qdisc_destroy(q->qdisc);
810 	dist_free(q->delay_dist);
811 }
812 
dump_loss_model(const struct netem_sched_data * q,struct sk_buff * skb)813 static int dump_loss_model(const struct netem_sched_data *q,
814 			   struct sk_buff *skb)
815 {
816 	struct nlattr *nest;
817 
818 	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
819 	if (nest == NULL)
820 		goto nla_put_failure;
821 
822 	switch (q->loss_model) {
823 	case CLG_RANDOM:
824 		/* legacy loss model */
825 		nla_nest_cancel(skb, nest);
826 		return 0;	/* no data */
827 
828 	case CLG_4_STATES: {
829 		struct tc_netem_gimodel gi = {
830 			.p13 = q->clg.a1,
831 			.p31 = q->clg.a2,
832 			.p32 = q->clg.a3,
833 			.p14 = q->clg.a4,
834 			.p23 = q->clg.a5,
835 		};
836 
837 		NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
838 		break;
839 	}
840 	case CLG_GILB_ELL: {
841 		struct tc_netem_gemodel ge = {
842 			.p = q->clg.a1,
843 			.r = q->clg.a2,
844 			.h = q->clg.a3,
845 			.k1 = q->clg.a4,
846 		};
847 
848 		NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
849 		break;
850 	}
851 	}
852 
853 	nla_nest_end(skb, nest);
854 	return 0;
855 
856 nla_put_failure:
857 	nla_nest_cancel(skb, nest);
858 	return -1;
859 }
860 
netem_dump(struct Qdisc * sch,struct sk_buff * skb)861 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
862 {
863 	const struct netem_sched_data *q = qdisc_priv(sch);
864 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
865 	struct tc_netem_qopt qopt;
866 	struct tc_netem_corr cor;
867 	struct tc_netem_reorder reorder;
868 	struct tc_netem_corrupt corrupt;
869 	struct tc_netem_rate rate;
870 
871 	qopt.latency = q->latency;
872 	qopt.jitter = q->jitter;
873 	qopt.limit = q->limit;
874 	qopt.loss = q->loss;
875 	qopt.gap = q->gap;
876 	qopt.duplicate = q->duplicate;
877 	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
878 
879 	cor.delay_corr = q->delay_cor.rho;
880 	cor.loss_corr = q->loss_cor.rho;
881 	cor.dup_corr = q->dup_cor.rho;
882 	NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
883 
884 	reorder.probability = q->reorder;
885 	reorder.correlation = q->reorder_cor.rho;
886 	NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
887 
888 	corrupt.probability = q->corrupt;
889 	corrupt.correlation = q->corrupt_cor.rho;
890 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
891 
892 	rate.rate = q->rate;
893 	rate.packet_overhead = q->packet_overhead;
894 	rate.cell_size = q->cell_size;
895 	rate.cell_overhead = q->cell_overhead;
896 	NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
897 
898 	if (dump_loss_model(q, skb) != 0)
899 		goto nla_put_failure;
900 
901 	return nla_nest_end(skb, nla);
902 
903 nla_put_failure:
904 	nlmsg_trim(skb, nla);
905 	return -1;
906 }
907 
netem_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)908 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
909 			  struct sk_buff *skb, struct tcmsg *tcm)
910 {
911 	struct netem_sched_data *q = qdisc_priv(sch);
912 
913 	if (cl != 1 || !q->qdisc) 	/* only one class */
914 		return -ENOENT;
915 
916 	tcm->tcm_handle |= TC_H_MIN(1);
917 	tcm->tcm_info = q->qdisc->handle;
918 
919 	return 0;
920 }
921 
netem_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old)922 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
923 		     struct Qdisc **old)
924 {
925 	struct netem_sched_data *q = qdisc_priv(sch);
926 
927 	sch_tree_lock(sch);
928 	*old = q->qdisc;
929 	q->qdisc = new;
930 	if (*old) {
931 		qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
932 		qdisc_reset(*old);
933 	}
934 	sch_tree_unlock(sch);
935 
936 	return 0;
937 }
938 
netem_leaf(struct Qdisc * sch,unsigned long arg)939 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
940 {
941 	struct netem_sched_data *q = qdisc_priv(sch);
942 	return q->qdisc;
943 }
944 
netem_get(struct Qdisc * sch,u32 classid)945 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
946 {
947 	return 1;
948 }
949 
netem_put(struct Qdisc * sch,unsigned long arg)950 static void netem_put(struct Qdisc *sch, unsigned long arg)
951 {
952 }
953 
netem_walk(struct Qdisc * sch,struct qdisc_walker * walker)954 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
955 {
956 	if (!walker->stop) {
957 		if (walker->count >= walker->skip)
958 			if (walker->fn(sch, 1, walker) < 0) {
959 				walker->stop = 1;
960 				return;
961 			}
962 		walker->count++;
963 	}
964 }
965 
966 static const struct Qdisc_class_ops netem_class_ops = {
967 	.graft		=	netem_graft,
968 	.leaf		=	netem_leaf,
969 	.get		=	netem_get,
970 	.put		=	netem_put,
971 	.walk		=	netem_walk,
972 	.dump		=	netem_dump_class,
973 };
974 
975 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
976 	.id		=	"netem",
977 	.cl_ops		=	&netem_class_ops,
978 	.priv_size	=	sizeof(struct netem_sched_data),
979 	.enqueue	=	netem_enqueue,
980 	.dequeue	=	netem_dequeue,
981 	.peek		=	qdisc_peek_dequeued,
982 	.drop		=	netem_drop,
983 	.init		=	netem_init,
984 	.reset		=	netem_reset,
985 	.destroy	=	netem_destroy,
986 	.change		=	netem_change,
987 	.dump		=	netem_dump,
988 	.owner		=	THIS_MODULE,
989 };
990 
991 
netem_module_init(void)992 static int __init netem_module_init(void)
993 {
994 	pr_info("netem: version " VERSION "\n");
995 	return register_qdisc(&netem_qdisc_ops);
996 }
netem_module_exit(void)997 static void __exit netem_module_exit(void)
998 {
999 	unregister_qdisc(&netem_qdisc_ops);
1000 }
1001 module_init(netem_module_init)
1002 module_exit(netem_module_exit)
1003 MODULE_LICENSE("GPL");
1004