xref: /linux/net/sched/sch_red.c (revision c6fbb759)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 
39 	unsigned char		flags;
40 	/* Non-flags in tc_red_qopt.flags. */
41 	unsigned char		userbits;
42 
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 	struct tcf_qevent	qe_early_drop;
50 	struct tcf_qevent	qe_mark;
51 };
52 
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54 
55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57 	return q->flags & TC_RED_ECN;
58 }
59 
60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62 	return q->flags & TC_RED_HARDDROP;
63 }
64 
65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67 	return q->flags & TC_RED_NODROP;
68 }
69 
70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71 		       struct sk_buff **to_free)
72 {
73 	struct red_sched_data *q = qdisc_priv(sch);
74 	struct Qdisc *child = q->qdisc;
75 	int ret;
76 
77 	q->vars.qavg = red_calc_qavg(&q->parms,
78 				     &q->vars,
79 				     child->qstats.backlog);
80 
81 	if (red_is_idling(&q->vars))
82 		red_end_of_idle_period(&q->vars);
83 
84 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
85 	case RED_DONT_MARK:
86 		break;
87 
88 	case RED_PROB_MARK:
89 		qdisc_qstats_overlimit(sch);
90 		if (!red_use_ecn(q)) {
91 			q->stats.prob_drop++;
92 			goto congestion_drop;
93 		}
94 
95 		if (INET_ECN_set_ce(skb)) {
96 			q->stats.prob_mark++;
97 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
98 			if (!skb)
99 				return NET_XMIT_CN | ret;
100 		} else if (!red_use_nodrop(q)) {
101 			q->stats.prob_drop++;
102 			goto congestion_drop;
103 		}
104 
105 		/* Non-ECT packet in ECN nodrop mode: queue it. */
106 		break;
107 
108 	case RED_HARD_MARK:
109 		qdisc_qstats_overlimit(sch);
110 		if (red_use_harddrop(q) || !red_use_ecn(q)) {
111 			q->stats.forced_drop++;
112 			goto congestion_drop;
113 		}
114 
115 		if (INET_ECN_set_ce(skb)) {
116 			q->stats.forced_mark++;
117 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
118 			if (!skb)
119 				return NET_XMIT_CN | ret;
120 		} else if (!red_use_nodrop(q)) {
121 			q->stats.forced_drop++;
122 			goto congestion_drop;
123 		}
124 
125 		/* Non-ECT packet in ECN nodrop mode: queue it. */
126 		break;
127 	}
128 
129 	ret = qdisc_enqueue(skb, child, to_free);
130 	if (likely(ret == NET_XMIT_SUCCESS)) {
131 		qdisc_qstats_backlog_inc(sch, skb);
132 		sch->q.qlen++;
133 	} else if (net_xmit_drop_count(ret)) {
134 		q->stats.pdrop++;
135 		qdisc_qstats_drop(sch);
136 	}
137 	return ret;
138 
139 congestion_drop:
140 	skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
141 	if (!skb)
142 		return NET_XMIT_CN | ret;
143 
144 	qdisc_drop(skb, sch, to_free);
145 	return NET_XMIT_CN;
146 }
147 
148 static struct sk_buff *red_dequeue(struct Qdisc *sch)
149 {
150 	struct sk_buff *skb;
151 	struct red_sched_data *q = qdisc_priv(sch);
152 	struct Qdisc *child = q->qdisc;
153 
154 	skb = child->dequeue(child);
155 	if (skb) {
156 		qdisc_bstats_update(sch, skb);
157 		qdisc_qstats_backlog_dec(sch, skb);
158 		sch->q.qlen--;
159 	} else {
160 		if (!red_is_idling(&q->vars))
161 			red_start_of_idle_period(&q->vars);
162 	}
163 	return skb;
164 }
165 
166 static struct sk_buff *red_peek(struct Qdisc *sch)
167 {
168 	struct red_sched_data *q = qdisc_priv(sch);
169 	struct Qdisc *child = q->qdisc;
170 
171 	return child->ops->peek(child);
172 }
173 
174 static void red_reset(struct Qdisc *sch)
175 {
176 	struct red_sched_data *q = qdisc_priv(sch);
177 
178 	qdisc_reset(q->qdisc);
179 	red_restart(&q->vars);
180 }
181 
182 static int red_offload(struct Qdisc *sch, bool enable)
183 {
184 	struct red_sched_data *q = qdisc_priv(sch);
185 	struct net_device *dev = qdisc_dev(sch);
186 	struct tc_red_qopt_offload opt = {
187 		.handle = sch->handle,
188 		.parent = sch->parent,
189 	};
190 
191 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
192 		return -EOPNOTSUPP;
193 
194 	if (enable) {
195 		opt.command = TC_RED_REPLACE;
196 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
197 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
198 		opt.set.probability = q->parms.max_P;
199 		opt.set.limit = q->limit;
200 		opt.set.is_ecn = red_use_ecn(q);
201 		opt.set.is_harddrop = red_use_harddrop(q);
202 		opt.set.is_nodrop = red_use_nodrop(q);
203 		opt.set.qstats = &sch->qstats;
204 	} else {
205 		opt.command = TC_RED_DESTROY;
206 	}
207 
208 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
209 }
210 
211 static void red_destroy(struct Qdisc *sch)
212 {
213 	struct red_sched_data *q = qdisc_priv(sch);
214 
215 	tcf_qevent_destroy(&q->qe_mark, sch);
216 	tcf_qevent_destroy(&q->qe_early_drop, sch);
217 	del_timer_sync(&q->adapt_timer);
218 	red_offload(sch, false);
219 	qdisc_put(q->qdisc);
220 }
221 
222 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
223 	[TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
224 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
225 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
226 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
227 	[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
228 	[TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
229 	[TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
230 };
231 
232 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
233 			struct netlink_ext_ack *extack)
234 {
235 	struct Qdisc *old_child = NULL, *child = NULL;
236 	struct red_sched_data *q = qdisc_priv(sch);
237 	struct nla_bitfield32 flags_bf;
238 	struct tc_red_qopt *ctl;
239 	unsigned char userbits;
240 	unsigned char flags;
241 	int err;
242 	u32 max_P;
243 	u8 *stab;
244 
245 	if (tb[TCA_RED_PARMS] == NULL ||
246 	    tb[TCA_RED_STAB] == NULL)
247 		return -EINVAL;
248 
249 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
250 
251 	ctl = nla_data(tb[TCA_RED_PARMS]);
252 	stab = nla_data(tb[TCA_RED_STAB]);
253 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
254 			      ctl->Scell_log, stab))
255 		return -EINVAL;
256 
257 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
258 			    tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
259 			    &flags_bf, &userbits, extack);
260 	if (err)
261 		return err;
262 
263 	if (ctl->limit > 0) {
264 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
265 					 extack);
266 		if (IS_ERR(child))
267 			return PTR_ERR(child);
268 
269 		/* child is fifo, no need to check for noop_qdisc */
270 		qdisc_hash_add(child, true);
271 	}
272 
273 	sch_tree_lock(sch);
274 
275 	flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
276 	err = red_validate_flags(flags, extack);
277 	if (err)
278 		goto unlock_out;
279 
280 	q->flags = flags;
281 	q->userbits = userbits;
282 	q->limit = ctl->limit;
283 	if (child) {
284 		qdisc_tree_flush_backlog(q->qdisc);
285 		old_child = q->qdisc;
286 		q->qdisc = child;
287 	}
288 
289 	red_set_parms(&q->parms,
290 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
291 		      ctl->Plog, ctl->Scell_log,
292 		      stab,
293 		      max_P);
294 	red_set_vars(&q->vars);
295 
296 	del_timer(&q->adapt_timer);
297 	if (ctl->flags & TC_RED_ADAPTATIVE)
298 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
299 
300 	if (!q->qdisc->q.qlen)
301 		red_start_of_idle_period(&q->vars);
302 
303 	sch_tree_unlock(sch);
304 
305 	red_offload(sch, true);
306 
307 	if (old_child)
308 		qdisc_put(old_child);
309 	return 0;
310 
311 unlock_out:
312 	sch_tree_unlock(sch);
313 	if (child)
314 		qdisc_put(child);
315 	return err;
316 }
317 
318 static inline void red_adaptative_timer(struct timer_list *t)
319 {
320 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
321 	struct Qdisc *sch = q->sch;
322 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
323 
324 	spin_lock(root_lock);
325 	red_adaptative_algo(&q->parms, &q->vars);
326 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
327 	spin_unlock(root_lock);
328 }
329 
330 static int red_init(struct Qdisc *sch, struct nlattr *opt,
331 		    struct netlink_ext_ack *extack)
332 {
333 	struct red_sched_data *q = qdisc_priv(sch);
334 	struct nlattr *tb[TCA_RED_MAX + 1];
335 	int err;
336 
337 	q->qdisc = &noop_qdisc;
338 	q->sch = sch;
339 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
340 
341 	if (!opt)
342 		return -EINVAL;
343 
344 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
345 					  extack);
346 	if (err < 0)
347 		return err;
348 
349 	err = __red_change(sch, tb, extack);
350 	if (err)
351 		return err;
352 
353 	err = tcf_qevent_init(&q->qe_early_drop, sch,
354 			      FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
355 			      tb[TCA_RED_EARLY_DROP_BLOCK], extack);
356 	if (err)
357 		return err;
358 
359 	return tcf_qevent_init(&q->qe_mark, sch,
360 			       FLOW_BLOCK_BINDER_TYPE_RED_MARK,
361 			       tb[TCA_RED_MARK_BLOCK], extack);
362 }
363 
364 static int red_change(struct Qdisc *sch, struct nlattr *opt,
365 		      struct netlink_ext_ack *extack)
366 {
367 	struct red_sched_data *q = qdisc_priv(sch);
368 	struct nlattr *tb[TCA_RED_MAX + 1];
369 	int err;
370 
371 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
372 					  extack);
373 	if (err < 0)
374 		return err;
375 
376 	err = tcf_qevent_validate_change(&q->qe_early_drop,
377 					 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
378 	if (err)
379 		return err;
380 
381 	err = tcf_qevent_validate_change(&q->qe_mark,
382 					 tb[TCA_RED_MARK_BLOCK], extack);
383 	if (err)
384 		return err;
385 
386 	return __red_change(sch, tb, extack);
387 }
388 
389 static int red_dump_offload_stats(struct Qdisc *sch)
390 {
391 	struct tc_red_qopt_offload hw_stats = {
392 		.command = TC_RED_STATS,
393 		.handle = sch->handle,
394 		.parent = sch->parent,
395 		{
396 			.stats.bstats = &sch->bstats,
397 			.stats.qstats = &sch->qstats,
398 		},
399 	};
400 
401 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
402 }
403 
404 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
405 {
406 	struct red_sched_data *q = qdisc_priv(sch);
407 	struct nlattr *opts = NULL;
408 	struct tc_red_qopt opt = {
409 		.limit		= q->limit,
410 		.flags		= (q->flags & TC_RED_HISTORIC_FLAGS) |
411 				  q->userbits,
412 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
413 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
414 		.Wlog		= q->parms.Wlog,
415 		.Plog		= q->parms.Plog,
416 		.Scell_log	= q->parms.Scell_log,
417 	};
418 	int err;
419 
420 	err = red_dump_offload_stats(sch);
421 	if (err)
422 		goto nla_put_failure;
423 
424 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
425 	if (opts == NULL)
426 		goto nla_put_failure;
427 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
428 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
429 	    nla_put_bitfield32(skb, TCA_RED_FLAGS,
430 			       q->flags, TC_RED_SUPPORTED_FLAGS) ||
431 	    tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
432 	    tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
433 		goto nla_put_failure;
434 	return nla_nest_end(skb, opts);
435 
436 nla_put_failure:
437 	nla_nest_cancel(skb, opts);
438 	return -EMSGSIZE;
439 }
440 
441 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
442 {
443 	struct red_sched_data *q = qdisc_priv(sch);
444 	struct net_device *dev = qdisc_dev(sch);
445 	struct tc_red_xstats st = {0};
446 
447 	if (sch->flags & TCQ_F_OFFLOADED) {
448 		struct tc_red_qopt_offload hw_stats_request = {
449 			.command = TC_RED_XSTATS,
450 			.handle = sch->handle,
451 			.parent = sch->parent,
452 			{
453 				.xstats = &q->stats,
454 			},
455 		};
456 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
457 					      &hw_stats_request);
458 	}
459 	st.early = q->stats.prob_drop + q->stats.forced_drop;
460 	st.pdrop = q->stats.pdrop;
461 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
462 
463 	return gnet_stats_copy_app(d, &st, sizeof(st));
464 }
465 
466 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
467 			  struct sk_buff *skb, struct tcmsg *tcm)
468 {
469 	struct red_sched_data *q = qdisc_priv(sch);
470 
471 	tcm->tcm_handle |= TC_H_MIN(1);
472 	tcm->tcm_info = q->qdisc->handle;
473 	return 0;
474 }
475 
476 static void red_graft_offload(struct Qdisc *sch,
477 			      struct Qdisc *new, struct Qdisc *old,
478 			      struct netlink_ext_ack *extack)
479 {
480 	struct tc_red_qopt_offload graft_offload = {
481 		.handle		= sch->handle,
482 		.parent		= sch->parent,
483 		.child_handle	= new->handle,
484 		.command	= TC_RED_GRAFT,
485 	};
486 
487 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
488 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
489 }
490 
491 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
492 		     struct Qdisc **old, struct netlink_ext_ack *extack)
493 {
494 	struct red_sched_data *q = qdisc_priv(sch);
495 
496 	if (new == NULL)
497 		new = &noop_qdisc;
498 
499 	*old = qdisc_replace(sch, new, &q->qdisc);
500 
501 	red_graft_offload(sch, new, *old, extack);
502 	return 0;
503 }
504 
505 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
506 {
507 	struct red_sched_data *q = qdisc_priv(sch);
508 	return q->qdisc;
509 }
510 
511 static unsigned long red_find(struct Qdisc *sch, u32 classid)
512 {
513 	return 1;
514 }
515 
516 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
517 {
518 	if (!walker->stop) {
519 		tc_qdisc_stats_dump(sch, 1, walker);
520 	}
521 }
522 
523 static const struct Qdisc_class_ops red_class_ops = {
524 	.graft		=	red_graft,
525 	.leaf		=	red_leaf,
526 	.find		=	red_find,
527 	.walk		=	red_walk,
528 	.dump		=	red_dump_class,
529 };
530 
531 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
532 	.id		=	"red",
533 	.priv_size	=	sizeof(struct red_sched_data),
534 	.cl_ops		=	&red_class_ops,
535 	.enqueue	=	red_enqueue,
536 	.dequeue	=	red_dequeue,
537 	.peek		=	red_peek,
538 	.init		=	red_init,
539 	.reset		=	red_reset,
540 	.destroy	=	red_destroy,
541 	.change		=	red_change,
542 	.dump		=	red_dump,
543 	.dump_stats	=	red_dump_stats,
544 	.owner		=	THIS_MODULE,
545 };
546 
547 static int __init red_module_init(void)
548 {
549 	return register_qdisc(&red_qdisc_ops);
550 }
551 
552 static void __exit red_module_exit(void)
553 {
554 	unregister_qdisc(&red_qdisc_ops);
555 }
556 
557 module_init(red_module_init)
558 module_exit(red_module_exit)
559 
560 MODULE_LICENSE("GPL");
561