xref: /openbsd/sys/net/ifq.c (revision cf96265b)
1 /*	$OpenBSD: ifq.c,v 1.53 2023/11/10 15:51:24 bluhm Exp $ */
2 
3 /*
4  * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "kstat.h"
21 
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/socket.h>
25 #include <sys/mbuf.h>
26 #include <sys/proc.h>
27 #include <sys/sysctl.h>
28 
29 #include <net/if.h>
30 #include <net/if_var.h>
31 
32 #if NBPFILTER > 0
33 #include <net/bpf.h>
34 #endif
35 
36 #if NKSTAT > 0
37 #include <sys/kstat.h>
38 #endif
39 
40 /*
41  * priq glue
42  */
43 unsigned int	 priq_idx(unsigned int, const struct mbuf *);
44 struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
45 struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
46 void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
47 void		 priq_purge(struct ifqueue *, struct mbuf_list *);
48 
49 void		*priq_alloc(unsigned int, void *);
50 void		 priq_free(unsigned int, void *);
51 
52 const struct ifq_ops priq_ops = {
53 	priq_idx,
54 	priq_enq,
55 	priq_deq_begin,
56 	priq_deq_commit,
57 	priq_purge,
58 	priq_alloc,
59 	priq_free,
60 };
61 
62 const struct ifq_ops * const ifq_priq_ops = &priq_ops;
63 
64 /*
65  * priq internal structures
66  */
67 
68 struct priq {
69 	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
70 };
71 
72 /*
73  * ifqueue serialiser
74  */
75 
76 void	ifq_start_task(void *);
77 void	ifq_restart_task(void *);
78 void	ifq_barrier_task(void *);
79 void	ifq_bundle_task(void *);
80 
81 static inline void
ifq_run_start(struct ifqueue * ifq)82 ifq_run_start(struct ifqueue *ifq)
83 {
84 	ifq_serialize(ifq, &ifq->ifq_start);
85 }
86 
87 void
ifq_serialize(struct ifqueue * ifq,struct task * t)88 ifq_serialize(struct ifqueue *ifq, struct task *t)
89 {
90 	struct task work;
91 
92 	if (ISSET(t->t_flags, TASK_ONQUEUE))
93 		return;
94 
95 	mtx_enter(&ifq->ifq_task_mtx);
96 	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
97 		SET(t->t_flags, TASK_ONQUEUE);
98 		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
99 	}
100 
101 	if (ifq->ifq_serializer == NULL) {
102 		ifq->ifq_serializer = curcpu();
103 
104 		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
105 			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
106 			CLR(t->t_flags, TASK_ONQUEUE);
107 			work = *t; /* copy to caller to avoid races */
108 
109 			mtx_leave(&ifq->ifq_task_mtx);
110 
111 			(*work.t_func)(work.t_arg);
112 
113 			mtx_enter(&ifq->ifq_task_mtx);
114 		}
115 
116 		ifq->ifq_serializer = NULL;
117 	}
118 	mtx_leave(&ifq->ifq_task_mtx);
119 }
120 
121 int
ifq_is_serialized(struct ifqueue * ifq)122 ifq_is_serialized(struct ifqueue *ifq)
123 {
124 	return (ifq->ifq_serializer == curcpu());
125 }
126 
127 void
ifq_start(struct ifqueue * ifq)128 ifq_start(struct ifqueue *ifq)
129 {
130 	if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) {
131 		task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
132 		ifq_run_start(ifq);
133 	} else
134 		task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
135 }
136 
137 void
ifq_start_task(void * p)138 ifq_start_task(void *p)
139 {
140 	struct ifqueue *ifq = p;
141 	struct ifnet *ifp = ifq->ifq_if;
142 
143 	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
144 	    ifq_empty(ifq) || ifq_is_oactive(ifq))
145 		return;
146 
147 	ifp->if_qstart(ifq);
148 }
149 
150 void
ifq_set_oactive(struct ifqueue * ifq)151 ifq_set_oactive(struct ifqueue *ifq)
152 {
153 	if (ifq->ifq_oactive)
154 		return;
155 
156 	mtx_enter(&ifq->ifq_mtx);
157 	if (!ifq->ifq_oactive) {
158 		ifq->ifq_oactive = 1;
159 		ifq->ifq_oactives++;
160 	}
161 	mtx_leave(&ifq->ifq_mtx);
162 }
163 
164 void
ifq_restart_task(void * p)165 ifq_restart_task(void *p)
166 {
167 	struct ifqueue *ifq = p;
168 	struct ifnet *ifp = ifq->ifq_if;
169 
170 	ifq_clr_oactive(ifq);
171 	ifp->if_qstart(ifq);
172 }
173 
174 void
ifq_bundle_task(void * p)175 ifq_bundle_task(void *p)
176 {
177 	struct ifqueue *ifq = p;
178 
179 	ifq_run_start(ifq);
180 }
181 
182 void
ifq_barrier(struct ifqueue * ifq)183 ifq_barrier(struct ifqueue *ifq)
184 {
185 	struct cond c = COND_INITIALIZER();
186 	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
187 
188 	task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
189 
190 	if (ifq->ifq_serializer == NULL)
191 		return;
192 
193 	ifq_serialize(ifq, &t);
194 
195 	cond_wait(&c, "ifqbar");
196 }
197 
198 void
ifq_barrier_task(void * p)199 ifq_barrier_task(void *p)
200 {
201 	struct cond *c = p;
202 
203 	cond_signal(c);
204 }
205 
206 /*
207  * ifqueue mbuf queue API
208  */
209 
210 #if NKSTAT > 0
211 struct ifq_kstat_data {
212 	struct kstat_kv kd_packets;
213 	struct kstat_kv kd_bytes;
214 	struct kstat_kv kd_qdrops;
215 	struct kstat_kv kd_errors;
216 	struct kstat_kv kd_qlen;
217 	struct kstat_kv kd_maxqlen;
218 	struct kstat_kv kd_oactive;
219 	struct kstat_kv kd_oactives;
220 };
221 
222 static const struct ifq_kstat_data ifq_kstat_tpl = {
223 	KSTAT_KV_UNIT_INITIALIZER("packets",
224 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
225 	KSTAT_KV_UNIT_INITIALIZER("bytes",
226 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
227 	KSTAT_KV_UNIT_INITIALIZER("qdrops",
228 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
229 	KSTAT_KV_UNIT_INITIALIZER("errors",
230 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
231 	KSTAT_KV_UNIT_INITIALIZER("qlen",
232 	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
233 	KSTAT_KV_UNIT_INITIALIZER("maxqlen",
234 	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
235 	KSTAT_KV_INITIALIZER("oactive", KSTAT_KV_T_BOOL),
236 	KSTAT_KV_INITIALIZER("oactives", KSTAT_KV_T_COUNTER32),
237 };
238 
239 int
ifq_kstat_copy(struct kstat * ks,void * dst)240 ifq_kstat_copy(struct kstat *ks, void *dst)
241 {
242 	struct ifqueue *ifq = ks->ks_softc;
243 	struct ifq_kstat_data *kd = dst;
244 
245 	*kd = ifq_kstat_tpl;
246 	kstat_kv_u64(&kd->kd_packets) = ifq->ifq_packets;
247 	kstat_kv_u64(&kd->kd_bytes) = ifq->ifq_bytes;
248 	kstat_kv_u64(&kd->kd_qdrops) = ifq->ifq_qdrops;
249 	kstat_kv_u64(&kd->kd_errors) = ifq->ifq_errors;
250 	kstat_kv_u32(&kd->kd_qlen) = ifq->ifq_len;
251 	kstat_kv_u32(&kd->kd_maxqlen) = ifq->ifq_maxlen;
252 	kstat_kv_bool(&kd->kd_oactive) = ifq->ifq_oactive;
253 	kstat_kv_u32(&kd->kd_oactives) = ifq->ifq_oactives;
254 
255 	return (0);
256 }
257 #endif
258 
259 void
ifq_init(struct ifqueue * ifq,struct ifnet * ifp,unsigned int idx)260 ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
261 {
262 	ifq->ifq_if = ifp;
263 	ifq->ifq_softnet = net_tq(idx);
264 	ifq->ifq_softc = NULL;
265 
266 	mtx_init(&ifq->ifq_mtx, IPL_NET);
267 
268 	/* default to priq */
269 	ifq->ifq_ops = &priq_ops;
270 	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
271 
272 	ml_init(&ifq->ifq_free);
273 	ifq->ifq_len = 0;
274 
275 	ifq->ifq_packets = 0;
276 	ifq->ifq_bytes = 0;
277 	ifq->ifq_qdrops = 0;
278 	ifq->ifq_errors = 0;
279 	ifq->ifq_mcasts = 0;
280 
281 	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
282 	TAILQ_INIT(&ifq->ifq_task_list);
283 	ifq->ifq_serializer = NULL;
284 	task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
285 
286 	task_set(&ifq->ifq_start, ifq_start_task, ifq);
287 	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
288 
289 	if (ifq->ifq_maxlen == 0)
290 		ifq_init_maxlen(ifq, IFQ_MAXLEN);
291 
292 	ifq->ifq_idx = idx;
293 
294 #if NKSTAT > 0
295 	/* XXX xname vs driver name and unit */
296 	ifq->ifq_kstat = kstat_create(ifp->if_xname, 0,
297 	    "txq", ifq->ifq_idx, KSTAT_T_KV, 0);
298 	KASSERT(ifq->ifq_kstat != NULL);
299 	kstat_set_mutex(ifq->ifq_kstat, &ifq->ifq_mtx);
300 	ifq->ifq_kstat->ks_softc = ifq;
301 	ifq->ifq_kstat->ks_datalen = sizeof(ifq_kstat_tpl);
302 	ifq->ifq_kstat->ks_copy = ifq_kstat_copy;
303 	kstat_install(ifq->ifq_kstat);
304 #endif
305 }
306 
307 void
ifq_attach(struct ifqueue * ifq,const struct ifq_ops * newops,void * opsarg)308 ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
309 {
310 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
311 	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
312 	struct mbuf *m;
313 	const struct ifq_ops *oldops;
314 	void *newq, *oldq;
315 
316 	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
317 
318 	mtx_enter(&ifq->ifq_mtx);
319 	ifq->ifq_ops->ifqop_purge(ifq, &ml);
320 	ifq->ifq_len = 0;
321 
322 	oldops = ifq->ifq_ops;
323 	oldq = ifq->ifq_q;
324 
325 	ifq->ifq_ops = newops;
326 	ifq->ifq_q = newq;
327 
328 	while ((m = ml_dequeue(&ml)) != NULL) {
329 		m = ifq->ifq_ops->ifqop_enq(ifq, m);
330 		if (m != NULL) {
331 			ifq->ifq_qdrops++;
332 			ml_enqueue(&free_ml, m);
333 		} else
334 			ifq->ifq_len++;
335 	}
336 	mtx_leave(&ifq->ifq_mtx);
337 
338 	oldops->ifqop_free(ifq->ifq_idx, oldq);
339 
340 	ml_purge(&free_ml);
341 }
342 
343 void
ifq_destroy(struct ifqueue * ifq)344 ifq_destroy(struct ifqueue *ifq)
345 {
346 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
347 
348 #if NKSTAT > 0
349 	kstat_destroy(ifq->ifq_kstat);
350 #endif
351 
352 	NET_ASSERT_UNLOCKED();
353 	if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle))
354 		taskq_barrier(ifq->ifq_softnet);
355 
356 	/* don't need to lock because this is the last use of the ifq */
357 
358 	ifq->ifq_ops->ifqop_purge(ifq, &ml);
359 	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
360 
361 	ml_purge(&ml);
362 }
363 
364 void
ifq_add_data(struct ifqueue * ifq,struct if_data * data)365 ifq_add_data(struct ifqueue *ifq, struct if_data *data)
366 {
367 	mtx_enter(&ifq->ifq_mtx);
368 	data->ifi_opackets += ifq->ifq_packets;
369 	data->ifi_obytes += ifq->ifq_bytes;
370 	data->ifi_oqdrops += ifq->ifq_qdrops;
371 	data->ifi_omcasts += ifq->ifq_mcasts;
372 	/* ifp->if_data.ifi_oerrors */
373 	mtx_leave(&ifq->ifq_mtx);
374 }
375 
376 int
ifq_enqueue(struct ifqueue * ifq,struct mbuf * m)377 ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
378 {
379 	struct mbuf *dm;
380 
381 	mtx_enter(&ifq->ifq_mtx);
382 	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
383 	if (dm != m) {
384 		ifq->ifq_packets++;
385 		ifq->ifq_bytes += m->m_pkthdr.len;
386 		if (ISSET(m->m_flags, M_MCAST))
387 			ifq->ifq_mcasts++;
388 	}
389 
390 	if (dm == NULL)
391 		ifq->ifq_len++;
392 	else
393 		ifq->ifq_qdrops++;
394 	mtx_leave(&ifq->ifq_mtx);
395 
396 	if (dm != NULL)
397 		m_freem(dm);
398 
399 	return (dm == m ? ENOBUFS : 0);
400 }
401 
402 static inline void
ifq_deq_enter(struct ifqueue * ifq)403 ifq_deq_enter(struct ifqueue *ifq)
404 {
405 	mtx_enter(&ifq->ifq_mtx);
406 }
407 
408 static inline void
ifq_deq_leave(struct ifqueue * ifq)409 ifq_deq_leave(struct ifqueue *ifq)
410 {
411 	struct mbuf_list ml;
412 
413 	ml = ifq->ifq_free;
414 	ml_init(&ifq->ifq_free);
415 
416 	mtx_leave(&ifq->ifq_mtx);
417 
418 	if (!ml_empty(&ml))
419 		ml_purge(&ml);
420 }
421 
422 struct mbuf *
ifq_deq_begin(struct ifqueue * ifq)423 ifq_deq_begin(struct ifqueue *ifq)
424 {
425 	struct mbuf *m = NULL;
426 	void *cookie;
427 
428 	ifq_deq_enter(ifq);
429 	if (ifq->ifq_len == 0 ||
430 	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
431 		ifq_deq_leave(ifq);
432 		return (NULL);
433 	}
434 
435 	m->m_pkthdr.ph_cookie = cookie;
436 
437 	return (m);
438 }
439 
440 void
ifq_deq_commit(struct ifqueue * ifq,struct mbuf * m)441 ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
442 {
443 	void *cookie;
444 
445 	KASSERT(m != NULL);
446 	cookie = m->m_pkthdr.ph_cookie;
447 
448 	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
449 	ifq->ifq_len--;
450 	ifq_deq_leave(ifq);
451 }
452 
453 void
ifq_deq_rollback(struct ifqueue * ifq,struct mbuf * m)454 ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
455 {
456 	KASSERT(m != NULL);
457 
458 	ifq_deq_leave(ifq);
459 }
460 
461 struct mbuf *
ifq_dequeue(struct ifqueue * ifq)462 ifq_dequeue(struct ifqueue *ifq)
463 {
464 	struct mbuf *m;
465 
466 	m = ifq_deq_begin(ifq);
467 	if (m == NULL)
468 		return (NULL);
469 
470 	ifq_deq_commit(ifq, m);
471 
472 	return (m);
473 }
474 
475 int
ifq_deq_sleep(struct ifqueue * ifq,struct mbuf ** mp,int nbio,int priority,const char * wmesg,volatile unsigned int * sleeping,volatile unsigned int * alive)476 ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority,
477     const char *wmesg, volatile unsigned int *sleeping,
478     volatile unsigned int *alive)
479 {
480 	struct mbuf *m;
481 	void *cookie;
482 	int error = 0;
483 
484 	ifq_deq_enter(ifq);
485 	if (ifq->ifq_len == 0 && nbio)
486 		error = EWOULDBLOCK;
487 	else {
488 		for (;;) {
489 			m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie);
490 			if (m != NULL) {
491 				ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
492 				ifq->ifq_len--;
493 				*mp = m;
494 				break;
495 			}
496 
497 			(*sleeping)++;
498 			error = msleep_nsec(ifq, &ifq->ifq_mtx,
499 			    priority, wmesg, INFSLP);
500 			(*sleeping)--;
501 			if (error != 0)
502 				break;
503 			if (!(*alive)) {
504 				error = EIO;
505 				break;
506 			}
507 		}
508 	}
509 	ifq_deq_leave(ifq);
510 
511 	return (error);
512 }
513 
514 int
ifq_hdatalen(struct ifqueue * ifq)515 ifq_hdatalen(struct ifqueue *ifq)
516 {
517 	struct mbuf *m;
518 	int len = 0;
519 
520 	if (ifq_empty(ifq))
521 		return (0);
522 
523 	m = ifq_deq_begin(ifq);
524 	if (m != NULL) {
525 		len = m->m_pkthdr.len;
526 		ifq_deq_rollback(ifq, m);
527 	}
528 
529 	return (len);
530 }
531 
532 void
ifq_init_maxlen(struct ifqueue * ifq,unsigned int maxlen)533 ifq_init_maxlen(struct ifqueue *ifq, unsigned int maxlen)
534 {
535 	/* this is not MP safe, use only during attach */
536 	ifq->ifq_maxlen = maxlen;
537 }
538 
539 unsigned int
ifq_purge(struct ifqueue * ifq)540 ifq_purge(struct ifqueue *ifq)
541 {
542 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
543 	unsigned int rv;
544 
545 	mtx_enter(&ifq->ifq_mtx);
546 	ifq->ifq_ops->ifqop_purge(ifq, &ml);
547 	rv = ifq->ifq_len;
548 	ifq->ifq_len = 0;
549 	ifq->ifq_qdrops += rv;
550 	mtx_leave(&ifq->ifq_mtx);
551 
552 	KASSERT(rv == ml_len(&ml));
553 
554 	ml_purge(&ml);
555 
556 	return (rv);
557 }
558 
559 void *
ifq_q_enter(struct ifqueue * ifq,const struct ifq_ops * ops)560 ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
561 {
562 	mtx_enter(&ifq->ifq_mtx);
563 	if (ifq->ifq_ops == ops)
564 		return (ifq->ifq_q);
565 
566 	mtx_leave(&ifq->ifq_mtx);
567 
568 	return (NULL);
569 }
570 
571 void
ifq_q_leave(struct ifqueue * ifq,void * q)572 ifq_q_leave(struct ifqueue *ifq, void *q)
573 {
574 	KASSERT(q == ifq->ifq_q);
575 	mtx_leave(&ifq->ifq_mtx);
576 }
577 
578 void
ifq_mfreem(struct ifqueue * ifq,struct mbuf * m)579 ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
580 {
581 	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
582 
583 	ifq->ifq_len--;
584 	ifq->ifq_qdrops++;
585 	ml_enqueue(&ifq->ifq_free, m);
586 }
587 
588 void
ifq_mfreeml(struct ifqueue * ifq,struct mbuf_list * ml)589 ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
590 {
591 	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
592 
593 	ifq->ifq_len -= ml_len(ml);
594 	ifq->ifq_qdrops += ml_len(ml);
595 	ml_enlist(&ifq->ifq_free, ml);
596 }
597 
598 /*
599  * ifiq
600  */
601 
602 #if NKSTAT > 0
603 struct ifiq_kstat_data {
604 	struct kstat_kv kd_packets;
605 	struct kstat_kv kd_bytes;
606 	struct kstat_kv kd_fdrops;
607 	struct kstat_kv kd_qdrops;
608 	struct kstat_kv kd_errors;
609 	struct kstat_kv kd_qlen;
610 
611 	struct kstat_kv kd_enqueues;
612 	struct kstat_kv kd_dequeues;
613 };
614 
615 static const struct ifiq_kstat_data ifiq_kstat_tpl = {
616 	KSTAT_KV_UNIT_INITIALIZER("packets",
617 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
618 	KSTAT_KV_UNIT_INITIALIZER("bytes",
619 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
620 	KSTAT_KV_UNIT_INITIALIZER("fdrops",
621 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
622 	KSTAT_KV_UNIT_INITIALIZER("qdrops",
623 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
624 	KSTAT_KV_UNIT_INITIALIZER("errors",
625 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
626 	KSTAT_KV_UNIT_INITIALIZER("qlen",
627 	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
628 
629 	KSTAT_KV_INITIALIZER("enqueues",
630 	    KSTAT_KV_T_COUNTER64),
631 	KSTAT_KV_INITIALIZER("dequeues",
632 	    KSTAT_KV_T_COUNTER64),
633 };
634 
635 int
ifiq_kstat_copy(struct kstat * ks,void * dst)636 ifiq_kstat_copy(struct kstat *ks, void *dst)
637 {
638 	struct ifiqueue *ifiq = ks->ks_softc;
639 	struct ifiq_kstat_data *kd = dst;
640 
641 	*kd = ifiq_kstat_tpl;
642 	kstat_kv_u64(&kd->kd_packets) = ifiq->ifiq_packets;
643 	kstat_kv_u64(&kd->kd_bytes) = ifiq->ifiq_bytes;
644 	kstat_kv_u64(&kd->kd_fdrops) = ifiq->ifiq_fdrops;
645 	kstat_kv_u64(&kd->kd_qdrops) = ifiq->ifiq_qdrops;
646 	kstat_kv_u64(&kd->kd_errors) = ifiq->ifiq_errors;
647 	kstat_kv_u32(&kd->kd_qlen) = ml_len(&ifiq->ifiq_ml);
648 
649 	kstat_kv_u64(&kd->kd_enqueues) = ifiq->ifiq_enqueues;
650 	kstat_kv_u64(&kd->kd_dequeues) = ifiq->ifiq_dequeues;
651 
652 	return (0);
653 }
654 #endif
655 
656 static void	ifiq_process(void *);
657 
658 void
ifiq_init(struct ifiqueue * ifiq,struct ifnet * ifp,unsigned int idx)659 ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
660 {
661 	ifiq->ifiq_if = ifp;
662 	ifiq->ifiq_softnet = net_tq(idx);
663 	ifiq->ifiq_softc = NULL;
664 
665 	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
666 	ml_init(&ifiq->ifiq_ml);
667 	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
668 	ifiq->ifiq_pressure = 0;
669 
670 	ifiq->ifiq_packets = 0;
671 	ifiq->ifiq_bytes = 0;
672 	ifiq->ifiq_fdrops = 0;
673 	ifiq->ifiq_qdrops = 0;
674 	ifiq->ifiq_errors = 0;
675 
676 	ifiq->ifiq_idx = idx;
677 
678 #if NKSTAT > 0
679 	/* XXX xname vs driver name and unit */
680 	ifiq->ifiq_kstat = kstat_create(ifp->if_xname, 0,
681 	    "rxq", ifiq->ifiq_idx, KSTAT_T_KV, 0);
682 	KASSERT(ifiq->ifiq_kstat != NULL);
683 	kstat_set_mutex(ifiq->ifiq_kstat, &ifiq->ifiq_mtx);
684 	ifiq->ifiq_kstat->ks_softc = ifiq;
685 	ifiq->ifiq_kstat->ks_datalen = sizeof(ifiq_kstat_tpl);
686 	ifiq->ifiq_kstat->ks_copy = ifiq_kstat_copy;
687 	kstat_install(ifiq->ifiq_kstat);
688 #endif
689 }
690 
691 void
ifiq_destroy(struct ifiqueue * ifiq)692 ifiq_destroy(struct ifiqueue *ifiq)
693 {
694 #if NKSTAT > 0
695 	kstat_destroy(ifiq->ifiq_kstat);
696 #endif
697 
698 	NET_ASSERT_UNLOCKED();
699 	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
700 		taskq_barrier(ifiq->ifiq_softnet);
701 
702 	/* don't need to lock because this is the last use of the ifiq */
703 	ml_purge(&ifiq->ifiq_ml);
704 }
705 
706 unsigned int ifiq_maxlen_drop = 2048 * 5;
707 unsigned int ifiq_maxlen_return = 2048 * 3;
708 
709 int
ifiq_input(struct ifiqueue * ifiq,struct mbuf_list * ml)710 ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml)
711 {
712 	struct ifnet *ifp = ifiq->ifiq_if;
713 	struct mbuf *m;
714 	uint64_t packets;
715 	uint64_t bytes = 0;
716 	uint64_t fdrops = 0;
717 	unsigned int len;
718 #if NBPFILTER > 0
719 	caddr_t if_bpf;
720 #endif
721 
722 	if (ml_empty(ml))
723 		return (0);
724 
725 	MBUF_LIST_FOREACH(ml, m) {
726 		m->m_pkthdr.ph_ifidx = ifp->if_index;
727 		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
728 		bytes += m->m_pkthdr.len;
729 	}
730 	packets = ml_len(ml);
731 
732 #if NBPFILTER > 0
733 	if_bpf = ifp->if_bpf;
734 	if (if_bpf) {
735 		struct mbuf_list ml0 = *ml;
736 
737 		ml_init(ml);
738 
739 		while ((m = ml_dequeue(&ml0)) != NULL) {
740 			if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
741 				m_freem(m);
742 				fdrops++;
743 			} else
744 				ml_enqueue(ml, m);
745 		}
746 
747 		if (ml_empty(ml)) {
748 			mtx_enter(&ifiq->ifiq_mtx);
749 			ifiq->ifiq_packets += packets;
750 			ifiq->ifiq_bytes += bytes;
751 			ifiq->ifiq_fdrops += fdrops;
752 			mtx_leave(&ifiq->ifiq_mtx);
753 
754 			return (0);
755 		}
756 	}
757 #endif
758 
759 	mtx_enter(&ifiq->ifiq_mtx);
760 	ifiq->ifiq_packets += packets;
761 	ifiq->ifiq_bytes += bytes;
762 	ifiq->ifiq_fdrops += fdrops;
763 
764 	len = ml_len(&ifiq->ifiq_ml);
765 	if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR))) {
766 		if (len > ifiq_maxlen_drop)
767 			ifiq->ifiq_qdrops += ml_len(ml);
768 		else {
769 			ifiq->ifiq_enqueues++;
770 			ml_enlist(&ifiq->ifiq_ml, ml);
771 		}
772 	}
773 	mtx_leave(&ifiq->ifiq_mtx);
774 
775 	if (ml_empty(ml))
776 		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
777 	else
778 		ml_purge(ml);
779 
780 	return (len > ifiq_maxlen_return);
781 }
782 
783 void
ifiq_add_data(struct ifiqueue * ifiq,struct if_data * data)784 ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
785 {
786 	mtx_enter(&ifiq->ifiq_mtx);
787 	data->ifi_ipackets += ifiq->ifiq_packets;
788 	data->ifi_ibytes += ifiq->ifiq_bytes;
789 	data->ifi_iqdrops += ifiq->ifiq_qdrops;
790 	mtx_leave(&ifiq->ifiq_mtx);
791 }
792 
793 int
ifiq_enqueue(struct ifiqueue * ifiq,struct mbuf * m)794 ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
795 {
796 	struct ifnet *ifp = ifiq->ifiq_if;
797 #if NBPFILTER > 0
798 	caddr_t if_bpf = ifp->if_bpf;
799 #endif
800 
801 	m->m_pkthdr.ph_ifidx = ifp->if_index;
802 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
803 
804 #if NBPFILTER > 0
805 	if_bpf = ifp->if_bpf;
806 	if (if_bpf) {
807 		if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
808 			mtx_enter(&ifiq->ifiq_mtx);
809 			ifiq->ifiq_packets++;
810 			ifiq->ifiq_bytes += m->m_pkthdr.len;
811 			ifiq->ifiq_fdrops++;
812 			mtx_leave(&ifiq->ifiq_mtx);
813 
814 			m_freem(m);
815 			return (0);
816 		}
817 	}
818 #endif
819 
820 	mtx_enter(&ifiq->ifiq_mtx);
821 	ifiq->ifiq_packets++;
822 	ifiq->ifiq_bytes += m->m_pkthdr.len;
823 	ifiq->ifiq_enqueues++;
824 	ml_enqueue(&ifiq->ifiq_ml, m);
825 	mtx_leave(&ifiq->ifiq_mtx);
826 
827 	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
828 
829 	return (0);
830 }
831 
832 static void
ifiq_process(void * arg)833 ifiq_process(void *arg)
834 {
835 	struct ifiqueue *ifiq = arg;
836 	struct mbuf_list ml;
837 
838 	if (ifiq_empty(ifiq))
839 		return;
840 
841 	mtx_enter(&ifiq->ifiq_mtx);
842 	ifiq->ifiq_dequeues++;
843 	ml = ifiq->ifiq_ml;
844 	ml_init(&ifiq->ifiq_ml);
845 	mtx_leave(&ifiq->ifiq_mtx);
846 
847 	if_input_process(ifiq->ifiq_if, &ml);
848 }
849 
850 int
net_ifiq_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)851 net_ifiq_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
852     void *newp, size_t newlen)
853 {
854 	int error = EOPNOTSUPP;
855 /* pressure is disabled for 6.6-release */
856 #if 0
857 	int val;
858 
859 	if (namelen != 1)
860 		return (EISDIR);
861 
862 	switch (name[0]) {
863 	case NET_LINK_IFRXQ_PRESSURE_RETURN:
864 		val = ifiq_pressure_return;
865 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
866 		if (error != 0)
867 			return (error);
868 		if (val < 1 || val > ifiq_pressure_drop)
869 			return (EINVAL);
870 		ifiq_pressure_return = val;
871 		break;
872 	case NET_LINK_IFRXQ_PRESSURE_DROP:
873 		val = ifiq_pressure_drop;
874 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
875 		if (error != 0)
876 			return (error);
877 		if (ifiq_pressure_return > val)
878 			return (EINVAL);
879 		ifiq_pressure_drop = val;
880 		break;
881 	default:
882 		error = EOPNOTSUPP;
883 		break;
884 	}
885 #endif
886 
887 	return (error);
888 }
889 
890 /*
891  * priq implementation
892  */
893 
894 unsigned int
priq_idx(unsigned int nqueues,const struct mbuf * m)895 priq_idx(unsigned int nqueues, const struct mbuf *m)
896 {
897 	unsigned int flow = 0;
898 
899 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
900 		flow = m->m_pkthdr.ph_flowid;
901 
902 	return (flow % nqueues);
903 }
904 
905 void *
priq_alloc(unsigned int idx,void * null)906 priq_alloc(unsigned int idx, void *null)
907 {
908 	struct priq *pq;
909 	int i;
910 
911 	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
912 	for (i = 0; i < IFQ_NQUEUES; i++)
913 		ml_init(&pq->pq_lists[i]);
914 	return (pq);
915 }
916 
917 void
priq_free(unsigned int idx,void * pq)918 priq_free(unsigned int idx, void *pq)
919 {
920 	free(pq, M_DEVBUF, sizeof(struct priq));
921 }
922 
923 struct mbuf *
priq_enq(struct ifqueue * ifq,struct mbuf * m)924 priq_enq(struct ifqueue *ifq, struct mbuf *m)
925 {
926 	struct priq *pq;
927 	struct mbuf_list *pl;
928 	struct mbuf *n = NULL;
929 	unsigned int prio;
930 
931 	pq = ifq->ifq_q;
932 	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
933 
934 	/* Find a lower priority queue to drop from */
935 	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
936 		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
937 			pl = &pq->pq_lists[prio];
938 			if (ml_len(pl) > 0) {
939 				n = ml_dequeue(pl);
940 				goto enqueue;
941 			}
942 		}
943 		/*
944 		 * There's no lower priority queue that we can
945 		 * drop from so don't enqueue this one.
946 		 */
947 		return (m);
948 	}
949 
950  enqueue:
951 	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
952 	ml_enqueue(pl, m);
953 
954 	return (n);
955 }
956 
957 struct mbuf *
priq_deq_begin(struct ifqueue * ifq,void ** cookiep)958 priq_deq_begin(struct ifqueue *ifq, void **cookiep)
959 {
960 	struct priq *pq = ifq->ifq_q;
961 	struct mbuf_list *pl;
962 	unsigned int prio = nitems(pq->pq_lists);
963 	struct mbuf *m;
964 
965 	do {
966 		pl = &pq->pq_lists[--prio];
967 		m = MBUF_LIST_FIRST(pl);
968 		if (m != NULL) {
969 			*cookiep = pl;
970 			return (m);
971 		}
972 	} while (prio > 0);
973 
974 	return (NULL);
975 }
976 
977 void
priq_deq_commit(struct ifqueue * ifq,struct mbuf * m,void * cookie)978 priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
979 {
980 	struct mbuf_list *pl = cookie;
981 
982 	KASSERT(MBUF_LIST_FIRST(pl) == m);
983 
984 	ml_dequeue(pl);
985 }
986 
987 void
priq_purge(struct ifqueue * ifq,struct mbuf_list * ml)988 priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
989 {
990 	struct priq *pq = ifq->ifq_q;
991 	struct mbuf_list *pl;
992 	unsigned int prio = nitems(pq->pq_lists);
993 
994 	do {
995 		pl = &pq->pq_lists[--prio];
996 		ml_enlist(ml, pl);
997 	} while (prio > 0);
998 }
999