xref: /openbsd/sys/net/ifq.c (revision a4f86d2e)
1 /*	$OpenBSD: ifq.c,v 1.55 2024/11/20 02:18:45 dlg Exp $ */
2 
3 /*
4  * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "kstat.h"
21 
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/socket.h>
25 #include <sys/mbuf.h>
26 #include <sys/proc.h>
27 #include <sys/sysctl.h>
28 
29 #include <net/if.h>
30 #include <net/if_var.h>
31 
32 #if NBPFILTER > 0
33 #include <net/bpf.h>
34 #endif
35 
36 #if NKSTAT > 0
37 #include <sys/kstat.h>
38 #endif
39 
40 /*
41  * priq glue
42  */
43 unsigned int	 priq_idx(unsigned int, const struct mbuf *);
44 struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
45 struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
46 void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
47 void		 priq_purge(struct ifqueue *, struct mbuf_list *);
48 
49 void		*priq_alloc(unsigned int, void *);
50 void		 priq_free(unsigned int, void *);
51 
52 const struct ifq_ops priq_ops = {
53 	priq_idx,
54 	priq_enq,
55 	priq_deq_begin,
56 	priq_deq_commit,
57 	priq_purge,
58 	priq_alloc,
59 	priq_free,
60 };
61 
62 const struct ifq_ops * const ifq_priq_ops = &priq_ops;
63 
64 /*
65  * priq internal structures
66  */
67 
68 struct priq {
69 	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
70 };
71 
72 /*
73  * ifqueue serialiser
74  */
75 
76 void	ifq_start_task(void *);
77 void	ifq_restart_task(void *);
78 void	ifq_barrier_task(void *);
79 void	ifq_bundle_task(void *);
80 
81 static inline void
ifq_run_start(struct ifqueue * ifq)82 ifq_run_start(struct ifqueue *ifq)
83 {
84 	ifq_serialize(ifq, &ifq->ifq_start);
85 }
86 
87 void
ifq_serialize(struct ifqueue * ifq,struct task * t)88 ifq_serialize(struct ifqueue *ifq, struct task *t)
89 {
90 	struct task work;
91 
92 	if (ISSET(t->t_flags, TASK_ONQUEUE))
93 		return;
94 
95 	mtx_enter(&ifq->ifq_task_mtx);
96 	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
97 		SET(t->t_flags, TASK_ONQUEUE);
98 		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
99 	}
100 
101 	if (ifq->ifq_serializer == NULL) {
102 		ifq->ifq_serializer = curcpu();
103 
104 		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
105 			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
106 			CLR(t->t_flags, TASK_ONQUEUE);
107 			work = *t; /* copy to caller to avoid races */
108 
109 			mtx_leave(&ifq->ifq_task_mtx);
110 
111 			(*work.t_func)(work.t_arg);
112 
113 			mtx_enter(&ifq->ifq_task_mtx);
114 		}
115 
116 		ifq->ifq_serializer = NULL;
117 	}
118 	mtx_leave(&ifq->ifq_task_mtx);
119 }
120 
121 void
ifq_start(struct ifqueue * ifq)122 ifq_start(struct ifqueue *ifq)
123 {
124 	if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) {
125 		task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
126 		ifq_run_start(ifq);
127 	} else
128 		task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
129 }
130 
131 void
ifq_start_task(void * p)132 ifq_start_task(void *p)
133 {
134 	struct ifqueue *ifq = p;
135 	struct ifnet *ifp = ifq->ifq_if;
136 
137 	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
138 	    ifq_empty(ifq) || ifq_is_oactive(ifq))
139 		return;
140 
141 	ifp->if_qstart(ifq);
142 }
143 
144 void
ifq_set_oactive(struct ifqueue * ifq)145 ifq_set_oactive(struct ifqueue *ifq)
146 {
147 	if (ifq->ifq_oactive)
148 		return;
149 
150 	mtx_enter(&ifq->ifq_mtx);
151 	if (!ifq->ifq_oactive) {
152 		ifq->ifq_oactive = 1;
153 		ifq->ifq_oactives++;
154 	}
155 	mtx_leave(&ifq->ifq_mtx);
156 }
157 
158 void
ifq_deq_set_oactive(struct ifqueue * ifq)159 ifq_deq_set_oactive(struct ifqueue *ifq)
160 {
161 	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
162 
163 	if (!ifq->ifq_oactive) {
164 		ifq->ifq_oactive = 1;
165 		ifq->ifq_oactives++;
166 	}
167 }
168 
169 void
ifq_restart_task(void * p)170 ifq_restart_task(void *p)
171 {
172 	struct ifqueue *ifq = p;
173 	struct ifnet *ifp = ifq->ifq_if;
174 
175 	ifq_clr_oactive(ifq);
176 	ifp->if_qstart(ifq);
177 }
178 
179 void
ifq_bundle_task(void * p)180 ifq_bundle_task(void *p)
181 {
182 	struct ifqueue *ifq = p;
183 
184 	ifq_run_start(ifq);
185 }
186 
187 void
ifq_barrier(struct ifqueue * ifq)188 ifq_barrier(struct ifqueue *ifq)
189 {
190 	struct cond c = COND_INITIALIZER();
191 	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
192 
193 	task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
194 
195 	if (ifq->ifq_serializer == NULL)
196 		return;
197 
198 	ifq_serialize(ifq, &t);
199 
200 	cond_wait(&c, "ifqbar");
201 }
202 
203 void
ifq_barrier_task(void * p)204 ifq_barrier_task(void *p)
205 {
206 	struct cond *c = p;
207 
208 	cond_signal(c);
209 }
210 
211 /*
212  * ifqueue mbuf queue API
213  */
214 
215 #if NKSTAT > 0
216 struct ifq_kstat_data {
217 	struct kstat_kv kd_packets;
218 	struct kstat_kv kd_bytes;
219 	struct kstat_kv kd_qdrops;
220 	struct kstat_kv kd_errors;
221 	struct kstat_kv kd_qlen;
222 	struct kstat_kv kd_maxqlen;
223 	struct kstat_kv kd_oactive;
224 	struct kstat_kv kd_oactives;
225 };
226 
227 static const struct ifq_kstat_data ifq_kstat_tpl = {
228 	KSTAT_KV_UNIT_INITIALIZER("packets",
229 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
230 	KSTAT_KV_UNIT_INITIALIZER("bytes",
231 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
232 	KSTAT_KV_UNIT_INITIALIZER("qdrops",
233 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
234 	KSTAT_KV_UNIT_INITIALIZER("errors",
235 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
236 	KSTAT_KV_UNIT_INITIALIZER("qlen",
237 	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
238 	KSTAT_KV_UNIT_INITIALIZER("maxqlen",
239 	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
240 	KSTAT_KV_INITIALIZER("oactive", KSTAT_KV_T_BOOL),
241 	KSTAT_KV_INITIALIZER("oactives", KSTAT_KV_T_COUNTER32),
242 };
243 
244 int
ifq_kstat_copy(struct kstat * ks,void * dst)245 ifq_kstat_copy(struct kstat *ks, void *dst)
246 {
247 	struct ifqueue *ifq = ks->ks_softc;
248 	struct ifq_kstat_data *kd = dst;
249 
250 	*kd = ifq_kstat_tpl;
251 	kstat_kv_u64(&kd->kd_packets) = ifq->ifq_packets;
252 	kstat_kv_u64(&kd->kd_bytes) = ifq->ifq_bytes;
253 	kstat_kv_u64(&kd->kd_qdrops) = ifq->ifq_qdrops;
254 	kstat_kv_u64(&kd->kd_errors) = ifq->ifq_errors;
255 	kstat_kv_u32(&kd->kd_qlen) = ifq->ifq_len;
256 	kstat_kv_u32(&kd->kd_maxqlen) = ifq->ifq_maxlen;
257 	kstat_kv_bool(&kd->kd_oactive) = ifq->ifq_oactive;
258 	kstat_kv_u32(&kd->kd_oactives) = ifq->ifq_oactives;
259 
260 	return (0);
261 }
262 #endif
263 
264 void
ifq_init(struct ifqueue * ifq,struct ifnet * ifp,unsigned int idx)265 ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
266 {
267 	ifq->ifq_if = ifp;
268 	ifq->ifq_softnet = net_tq(idx);
269 	ifq->ifq_softc = NULL;
270 
271 	mtx_init(&ifq->ifq_mtx, IPL_NET);
272 
273 	/* default to priq */
274 	ifq->ifq_ops = &priq_ops;
275 	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
276 
277 	ml_init(&ifq->ifq_free);
278 	ifq->ifq_len = 0;
279 
280 	ifq->ifq_packets = 0;
281 	ifq->ifq_bytes = 0;
282 	ifq->ifq_qdrops = 0;
283 	ifq->ifq_errors = 0;
284 	ifq->ifq_mcasts = 0;
285 
286 	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
287 	TAILQ_INIT(&ifq->ifq_task_list);
288 	ifq->ifq_serializer = NULL;
289 	task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
290 
291 	task_set(&ifq->ifq_start, ifq_start_task, ifq);
292 	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
293 
294 	if (ifq->ifq_maxlen == 0)
295 		ifq_init_maxlen(ifq, IFQ_MAXLEN);
296 
297 	ifq->ifq_idx = idx;
298 
299 #if NKSTAT > 0
300 	/* XXX xname vs driver name and unit */
301 	ifq->ifq_kstat = kstat_create(ifp->if_xname, 0,
302 	    "txq", ifq->ifq_idx, KSTAT_T_KV, 0);
303 	KASSERT(ifq->ifq_kstat != NULL);
304 	kstat_set_mutex(ifq->ifq_kstat, &ifq->ifq_mtx);
305 	ifq->ifq_kstat->ks_softc = ifq;
306 	ifq->ifq_kstat->ks_datalen = sizeof(ifq_kstat_tpl);
307 	ifq->ifq_kstat->ks_copy = ifq_kstat_copy;
308 	kstat_install(ifq->ifq_kstat);
309 #endif
310 }
311 
312 void
ifq_attach(struct ifqueue * ifq,const struct ifq_ops * newops,void * opsarg)313 ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
314 {
315 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
316 	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
317 	struct mbuf *m;
318 	const struct ifq_ops *oldops;
319 	void *newq, *oldq;
320 
321 	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
322 
323 	mtx_enter(&ifq->ifq_mtx);
324 	ifq->ifq_ops->ifqop_purge(ifq, &ml);
325 	ifq->ifq_len = 0;
326 
327 	oldops = ifq->ifq_ops;
328 	oldq = ifq->ifq_q;
329 
330 	ifq->ifq_ops = newops;
331 	ifq->ifq_q = newq;
332 
333 	while ((m = ml_dequeue(&ml)) != NULL) {
334 		m = ifq->ifq_ops->ifqop_enq(ifq, m);
335 		if (m != NULL) {
336 			ifq->ifq_qdrops++;
337 			ml_enqueue(&free_ml, m);
338 		} else
339 			ifq->ifq_len++;
340 	}
341 	mtx_leave(&ifq->ifq_mtx);
342 
343 	oldops->ifqop_free(ifq->ifq_idx, oldq);
344 
345 	ml_purge(&free_ml);
346 }
347 
348 void
ifq_destroy(struct ifqueue * ifq)349 ifq_destroy(struct ifqueue *ifq)
350 {
351 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
352 
353 #if NKSTAT > 0
354 	kstat_destroy(ifq->ifq_kstat);
355 #endif
356 
357 	NET_ASSERT_UNLOCKED();
358 	if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle))
359 		taskq_barrier(ifq->ifq_softnet);
360 
361 	/* don't need to lock because this is the last use of the ifq */
362 
363 	ifq->ifq_ops->ifqop_purge(ifq, &ml);
364 	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
365 
366 	ml_purge(&ml);
367 }
368 
369 void
ifq_add_data(struct ifqueue * ifq,struct if_data * data)370 ifq_add_data(struct ifqueue *ifq, struct if_data *data)
371 {
372 	mtx_enter(&ifq->ifq_mtx);
373 	data->ifi_opackets += ifq->ifq_packets;
374 	data->ifi_obytes += ifq->ifq_bytes;
375 	data->ifi_oqdrops += ifq->ifq_qdrops;
376 	data->ifi_omcasts += ifq->ifq_mcasts;
377 	/* ifp->if_data.ifi_oerrors */
378 	mtx_leave(&ifq->ifq_mtx);
379 }
380 
381 int
ifq_enqueue(struct ifqueue * ifq,struct mbuf * m)382 ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
383 {
384 	struct mbuf *dm;
385 
386 	mtx_enter(&ifq->ifq_mtx);
387 	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
388 	if (dm != m) {
389 		ifq->ifq_packets++;
390 		ifq->ifq_bytes += m->m_pkthdr.len;
391 		if (ISSET(m->m_flags, M_MCAST))
392 			ifq->ifq_mcasts++;
393 	}
394 
395 	if (dm == NULL)
396 		ifq->ifq_len++;
397 	else
398 		ifq->ifq_qdrops++;
399 	mtx_leave(&ifq->ifq_mtx);
400 
401 	if (dm != NULL)
402 		m_freem(dm);
403 
404 	return (dm == m ? ENOBUFS : 0);
405 }
406 
407 static inline void
ifq_deq_enter(struct ifqueue * ifq)408 ifq_deq_enter(struct ifqueue *ifq)
409 {
410 	mtx_enter(&ifq->ifq_mtx);
411 }
412 
413 static inline void
ifq_deq_leave(struct ifqueue * ifq)414 ifq_deq_leave(struct ifqueue *ifq)
415 {
416 	struct mbuf_list ml;
417 
418 	ml = ifq->ifq_free;
419 	ml_init(&ifq->ifq_free);
420 
421 	mtx_leave(&ifq->ifq_mtx);
422 
423 	if (!ml_empty(&ml))
424 		ml_purge(&ml);
425 }
426 
427 struct mbuf *
ifq_deq_begin(struct ifqueue * ifq)428 ifq_deq_begin(struct ifqueue *ifq)
429 {
430 	struct mbuf *m = NULL;
431 	void *cookie;
432 
433 	ifq_deq_enter(ifq);
434 	if (ifq->ifq_len == 0 ||
435 	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
436 		ifq_deq_leave(ifq);
437 		return (NULL);
438 	}
439 
440 	m->m_pkthdr.ph_cookie = cookie;
441 
442 	return (m);
443 }
444 
445 void
ifq_deq_commit(struct ifqueue * ifq,struct mbuf * m)446 ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
447 {
448 	void *cookie;
449 
450 	KASSERT(m != NULL);
451 	cookie = m->m_pkthdr.ph_cookie;
452 
453 	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
454 	ifq->ifq_len--;
455 	ifq_deq_leave(ifq);
456 }
457 
458 void
ifq_deq_rollback(struct ifqueue * ifq,struct mbuf * m)459 ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
460 {
461 	KASSERT(m != NULL);
462 
463 	ifq_deq_leave(ifq);
464 }
465 
466 struct mbuf *
ifq_dequeue(struct ifqueue * ifq)467 ifq_dequeue(struct ifqueue *ifq)
468 {
469 	struct mbuf *m;
470 
471 	m = ifq_deq_begin(ifq);
472 	if (m == NULL)
473 		return (NULL);
474 
475 	ifq_deq_commit(ifq, m);
476 
477 	return (m);
478 }
479 
480 int
ifq_deq_sleep(struct ifqueue * ifq,struct mbuf ** mp,int nbio,int priority,const char * wmesg,volatile unsigned int * sleeping,volatile unsigned int * alive)481 ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority,
482     const char *wmesg, volatile unsigned int *sleeping,
483     volatile unsigned int *alive)
484 {
485 	struct mbuf *m;
486 	void *cookie;
487 	int error = 0;
488 
489 	ifq_deq_enter(ifq);
490 	if (ifq->ifq_len == 0 && nbio)
491 		error = EWOULDBLOCK;
492 	else {
493 		for (;;) {
494 			m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie);
495 			if (m != NULL) {
496 				ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
497 				ifq->ifq_len--;
498 				*mp = m;
499 				break;
500 			}
501 
502 			(*sleeping)++;
503 			error = msleep_nsec(ifq, &ifq->ifq_mtx,
504 			    priority, wmesg, INFSLP);
505 			(*sleeping)--;
506 			if (error != 0)
507 				break;
508 			if (!(*alive)) {
509 				error = EIO;
510 				break;
511 			}
512 		}
513 	}
514 	ifq_deq_leave(ifq);
515 
516 	return (error);
517 }
518 
519 int
ifq_hdatalen(struct ifqueue * ifq)520 ifq_hdatalen(struct ifqueue *ifq)
521 {
522 	struct mbuf *m;
523 	int len = 0;
524 
525 	if (ifq_empty(ifq))
526 		return (0);
527 
528 	m = ifq_deq_begin(ifq);
529 	if (m != NULL) {
530 		len = m->m_pkthdr.len;
531 		ifq_deq_rollback(ifq, m);
532 	}
533 
534 	return (len);
535 }
536 
537 void
ifq_init_maxlen(struct ifqueue * ifq,unsigned int maxlen)538 ifq_init_maxlen(struct ifqueue *ifq, unsigned int maxlen)
539 {
540 	/* this is not MP safe, use only during attach */
541 	ifq->ifq_maxlen = maxlen;
542 }
543 
544 unsigned int
ifq_purge(struct ifqueue * ifq)545 ifq_purge(struct ifqueue *ifq)
546 {
547 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
548 	unsigned int rv;
549 
550 	mtx_enter(&ifq->ifq_mtx);
551 	ifq->ifq_ops->ifqop_purge(ifq, &ml);
552 	rv = ifq->ifq_len;
553 	ifq->ifq_len = 0;
554 	ifq->ifq_qdrops += rv;
555 	mtx_leave(&ifq->ifq_mtx);
556 
557 	KASSERT(rv == ml_len(&ml));
558 
559 	ml_purge(&ml);
560 
561 	return (rv);
562 }
563 
564 void *
ifq_q_enter(struct ifqueue * ifq,const struct ifq_ops * ops)565 ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
566 {
567 	mtx_enter(&ifq->ifq_mtx);
568 	if (ifq->ifq_ops == ops)
569 		return (ifq->ifq_q);
570 
571 	mtx_leave(&ifq->ifq_mtx);
572 
573 	return (NULL);
574 }
575 
576 void
ifq_q_leave(struct ifqueue * ifq,void * q)577 ifq_q_leave(struct ifqueue *ifq, void *q)
578 {
579 	KASSERT(q == ifq->ifq_q);
580 	mtx_leave(&ifq->ifq_mtx);
581 }
582 
583 void
ifq_mfreem(struct ifqueue * ifq,struct mbuf * m)584 ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
585 {
586 	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
587 
588 	ifq->ifq_len--;
589 	ifq->ifq_qdrops++;
590 	ml_enqueue(&ifq->ifq_free, m);
591 }
592 
593 void
ifq_mfreeml(struct ifqueue * ifq,struct mbuf_list * ml)594 ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
595 {
596 	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
597 
598 	ifq->ifq_len -= ml_len(ml);
599 	ifq->ifq_qdrops += ml_len(ml);
600 	ml_enlist(&ifq->ifq_free, ml);
601 }
602 
603 /*
604  * ifiq
605  */
606 
607 #if NKSTAT > 0
608 struct ifiq_kstat_data {
609 	struct kstat_kv kd_packets;
610 	struct kstat_kv kd_bytes;
611 	struct kstat_kv kd_fdrops;
612 	struct kstat_kv kd_qdrops;
613 	struct kstat_kv kd_errors;
614 	struct kstat_kv kd_qlen;
615 
616 	struct kstat_kv kd_enqueues;
617 	struct kstat_kv kd_dequeues;
618 };
619 
620 static const struct ifiq_kstat_data ifiq_kstat_tpl = {
621 	KSTAT_KV_UNIT_INITIALIZER("packets",
622 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
623 	KSTAT_KV_UNIT_INITIALIZER("bytes",
624 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
625 	KSTAT_KV_UNIT_INITIALIZER("fdrops",
626 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
627 	KSTAT_KV_UNIT_INITIALIZER("qdrops",
628 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
629 	KSTAT_KV_UNIT_INITIALIZER("errors",
630 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
631 	KSTAT_KV_UNIT_INITIALIZER("qlen",
632 	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
633 
634 	KSTAT_KV_INITIALIZER("enqueues",
635 	    KSTAT_KV_T_COUNTER64),
636 	KSTAT_KV_INITIALIZER("dequeues",
637 	    KSTAT_KV_T_COUNTER64),
638 };
639 
640 int
ifiq_kstat_copy(struct kstat * ks,void * dst)641 ifiq_kstat_copy(struct kstat *ks, void *dst)
642 {
643 	struct ifiqueue *ifiq = ks->ks_softc;
644 	struct ifiq_kstat_data *kd = dst;
645 
646 	*kd = ifiq_kstat_tpl;
647 	kstat_kv_u64(&kd->kd_packets) = ifiq->ifiq_packets;
648 	kstat_kv_u64(&kd->kd_bytes) = ifiq->ifiq_bytes;
649 	kstat_kv_u64(&kd->kd_fdrops) = ifiq->ifiq_fdrops;
650 	kstat_kv_u64(&kd->kd_qdrops) = ifiq->ifiq_qdrops;
651 	kstat_kv_u64(&kd->kd_errors) = ifiq->ifiq_errors;
652 	kstat_kv_u32(&kd->kd_qlen) = ml_len(&ifiq->ifiq_ml);
653 
654 	kstat_kv_u64(&kd->kd_enqueues) = ifiq->ifiq_enqueues;
655 	kstat_kv_u64(&kd->kd_dequeues) = ifiq->ifiq_dequeues;
656 
657 	return (0);
658 }
659 #endif
660 
661 static void	ifiq_process(void *);
662 
663 void
ifiq_init(struct ifiqueue * ifiq,struct ifnet * ifp,unsigned int idx)664 ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
665 {
666 	ifiq->ifiq_if = ifp;
667 	ifiq->ifiq_softnet = net_tq(idx);
668 	ifiq->ifiq_softc = NULL;
669 
670 	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
671 	ml_init(&ifiq->ifiq_ml);
672 	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
673 	ifiq->ifiq_pressure = 0;
674 
675 	ifiq->ifiq_packets = 0;
676 	ifiq->ifiq_bytes = 0;
677 	ifiq->ifiq_fdrops = 0;
678 	ifiq->ifiq_qdrops = 0;
679 	ifiq->ifiq_errors = 0;
680 
681 	ifiq->ifiq_idx = idx;
682 
683 #if NKSTAT > 0
684 	/* XXX xname vs driver name and unit */
685 	ifiq->ifiq_kstat = kstat_create(ifp->if_xname, 0,
686 	    "rxq", ifiq->ifiq_idx, KSTAT_T_KV, 0);
687 	KASSERT(ifiq->ifiq_kstat != NULL);
688 	kstat_set_mutex(ifiq->ifiq_kstat, &ifiq->ifiq_mtx);
689 	ifiq->ifiq_kstat->ks_softc = ifiq;
690 	ifiq->ifiq_kstat->ks_datalen = sizeof(ifiq_kstat_tpl);
691 	ifiq->ifiq_kstat->ks_copy = ifiq_kstat_copy;
692 	kstat_install(ifiq->ifiq_kstat);
693 #endif
694 }
695 
696 void
ifiq_destroy(struct ifiqueue * ifiq)697 ifiq_destroy(struct ifiqueue *ifiq)
698 {
699 #if NKSTAT > 0
700 	kstat_destroy(ifiq->ifiq_kstat);
701 #endif
702 
703 	NET_ASSERT_UNLOCKED();
704 	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
705 		taskq_barrier(ifiq->ifiq_softnet);
706 
707 	/* don't need to lock because this is the last use of the ifiq */
708 	ml_purge(&ifiq->ifiq_ml);
709 }
710 
711 unsigned int ifiq_maxlen_drop = 2048 * 5;
712 unsigned int ifiq_maxlen_return = 2048 * 3;
713 
714 int
ifiq_input(struct ifiqueue * ifiq,struct mbuf_list * ml)715 ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml)
716 {
717 	struct ifnet *ifp = ifiq->ifiq_if;
718 	struct mbuf *m;
719 	uint64_t packets;
720 	uint64_t bytes = 0;
721 	uint64_t fdrops = 0;
722 	unsigned int len;
723 #if NBPFILTER > 0
724 	caddr_t if_bpf;
725 #endif
726 
727 	if (ml_empty(ml))
728 		return (0);
729 
730 	MBUF_LIST_FOREACH(ml, m) {
731 		m->m_pkthdr.ph_ifidx = ifp->if_index;
732 		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
733 		bytes += m->m_pkthdr.len;
734 	}
735 	packets = ml_len(ml);
736 
737 #if NBPFILTER > 0
738 	if_bpf = ifp->if_bpf;
739 	if (if_bpf) {
740 		struct mbuf_list ml0 = *ml;
741 
742 		ml_init(ml);
743 
744 		while ((m = ml_dequeue(&ml0)) != NULL) {
745 			if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
746 				m_freem(m);
747 				fdrops++;
748 			} else
749 				ml_enqueue(ml, m);
750 		}
751 
752 		if (ml_empty(ml)) {
753 			mtx_enter(&ifiq->ifiq_mtx);
754 			ifiq->ifiq_packets += packets;
755 			ifiq->ifiq_bytes += bytes;
756 			ifiq->ifiq_fdrops += fdrops;
757 			mtx_leave(&ifiq->ifiq_mtx);
758 
759 			return (0);
760 		}
761 	}
762 #endif
763 
764 	mtx_enter(&ifiq->ifiq_mtx);
765 	ifiq->ifiq_packets += packets;
766 	ifiq->ifiq_bytes += bytes;
767 	ifiq->ifiq_fdrops += fdrops;
768 
769 	len = ml_len(&ifiq->ifiq_ml);
770 	if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR))) {
771 		if (len > ifiq_maxlen_drop)
772 			ifiq->ifiq_qdrops += ml_len(ml);
773 		else {
774 			ifiq->ifiq_enqueues++;
775 			ml_enlist(&ifiq->ifiq_ml, ml);
776 		}
777 	}
778 	mtx_leave(&ifiq->ifiq_mtx);
779 
780 	if (ml_empty(ml))
781 		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
782 	else
783 		ml_purge(ml);
784 
785 	return (len > ifiq_maxlen_return);
786 }
787 
788 void
ifiq_add_data(struct ifiqueue * ifiq,struct if_data * data)789 ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
790 {
791 	mtx_enter(&ifiq->ifiq_mtx);
792 	data->ifi_ipackets += ifiq->ifiq_packets;
793 	data->ifi_ibytes += ifiq->ifiq_bytes;
794 	data->ifi_iqdrops += ifiq->ifiq_qdrops;
795 	mtx_leave(&ifiq->ifiq_mtx);
796 }
797 
798 int
ifiq_enqueue(struct ifiqueue * ifiq,struct mbuf * m)799 ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
800 {
801 	struct ifnet *ifp = ifiq->ifiq_if;
802 #if NBPFILTER > 0
803 	caddr_t if_bpf = ifp->if_bpf;
804 #endif
805 
806 	m->m_pkthdr.ph_ifidx = ifp->if_index;
807 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
808 
809 #if NBPFILTER > 0
810 	if_bpf = ifp->if_bpf;
811 	if (if_bpf) {
812 		if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
813 			mtx_enter(&ifiq->ifiq_mtx);
814 			ifiq->ifiq_packets++;
815 			ifiq->ifiq_bytes += m->m_pkthdr.len;
816 			ifiq->ifiq_fdrops++;
817 			mtx_leave(&ifiq->ifiq_mtx);
818 
819 			m_freem(m);
820 			return (0);
821 		}
822 	}
823 #endif
824 
825 	mtx_enter(&ifiq->ifiq_mtx);
826 	ifiq->ifiq_packets++;
827 	ifiq->ifiq_bytes += m->m_pkthdr.len;
828 	ifiq->ifiq_enqueues++;
829 	ml_enqueue(&ifiq->ifiq_ml, m);
830 	mtx_leave(&ifiq->ifiq_mtx);
831 
832 	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
833 
834 	return (0);
835 }
836 
837 static void
ifiq_process(void * arg)838 ifiq_process(void *arg)
839 {
840 	struct ifiqueue *ifiq = arg;
841 	struct mbuf_list ml;
842 
843 	if (ifiq_empty(ifiq))
844 		return;
845 
846 	mtx_enter(&ifiq->ifiq_mtx);
847 	ifiq->ifiq_dequeues++;
848 	ml = ifiq->ifiq_ml;
849 	ml_init(&ifiq->ifiq_ml);
850 	mtx_leave(&ifiq->ifiq_mtx);
851 
852 	if_input_process(ifiq->ifiq_if, &ml);
853 }
854 
855 int
net_ifiq_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)856 net_ifiq_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
857     void *newp, size_t newlen)
858 {
859 	int error = EOPNOTSUPP;
860 /* pressure is disabled for 6.6-release */
861 #if 0
862 	int val;
863 
864 	if (namelen != 1)
865 		return (EISDIR);
866 
867 	switch (name[0]) {
868 	case NET_LINK_IFRXQ_PRESSURE_RETURN:
869 		val = ifiq_pressure_return;
870 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
871 		if (error != 0)
872 			return (error);
873 		if (val < 1 || val > ifiq_pressure_drop)
874 			return (EINVAL);
875 		ifiq_pressure_return = val;
876 		break;
877 	case NET_LINK_IFRXQ_PRESSURE_DROP:
878 		val = ifiq_pressure_drop;
879 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
880 		if (error != 0)
881 			return (error);
882 		if (ifiq_pressure_return > val)
883 			return (EINVAL);
884 		ifiq_pressure_drop = val;
885 		break;
886 	default:
887 		error = EOPNOTSUPP;
888 		break;
889 	}
890 #endif
891 
892 	return (error);
893 }
894 
895 /*
896  * priq implementation
897  */
898 
899 unsigned int
priq_idx(unsigned int nqueues,const struct mbuf * m)900 priq_idx(unsigned int nqueues, const struct mbuf *m)
901 {
902 	unsigned int flow = 0;
903 
904 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
905 		flow = m->m_pkthdr.ph_flowid;
906 
907 	return (flow % nqueues);
908 }
909 
910 void *
priq_alloc(unsigned int idx,void * null)911 priq_alloc(unsigned int idx, void *null)
912 {
913 	struct priq *pq;
914 	int i;
915 
916 	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
917 	for (i = 0; i < IFQ_NQUEUES; i++)
918 		ml_init(&pq->pq_lists[i]);
919 	return (pq);
920 }
921 
922 void
priq_free(unsigned int idx,void * pq)923 priq_free(unsigned int idx, void *pq)
924 {
925 	free(pq, M_DEVBUF, sizeof(struct priq));
926 }
927 
928 struct mbuf *
priq_enq(struct ifqueue * ifq,struct mbuf * m)929 priq_enq(struct ifqueue *ifq, struct mbuf *m)
930 {
931 	struct priq *pq;
932 	struct mbuf_list *pl;
933 	struct mbuf *n = NULL;
934 	unsigned int prio;
935 
936 	pq = ifq->ifq_q;
937 	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
938 
939 	/* Find a lower priority queue to drop from */
940 	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
941 		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
942 			pl = &pq->pq_lists[prio];
943 			if (ml_len(pl) > 0) {
944 				n = ml_dequeue(pl);
945 				goto enqueue;
946 			}
947 		}
948 		/*
949 		 * There's no lower priority queue that we can
950 		 * drop from so don't enqueue this one.
951 		 */
952 		return (m);
953 	}
954 
955  enqueue:
956 	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
957 	ml_enqueue(pl, m);
958 
959 	return (n);
960 }
961 
962 struct mbuf *
priq_deq_begin(struct ifqueue * ifq,void ** cookiep)963 priq_deq_begin(struct ifqueue *ifq, void **cookiep)
964 {
965 	struct priq *pq = ifq->ifq_q;
966 	struct mbuf_list *pl;
967 	unsigned int prio = nitems(pq->pq_lists);
968 	struct mbuf *m;
969 
970 	do {
971 		pl = &pq->pq_lists[--prio];
972 		m = MBUF_LIST_FIRST(pl);
973 		if (m != NULL) {
974 			*cookiep = pl;
975 			return (m);
976 		}
977 	} while (prio > 0);
978 
979 	return (NULL);
980 }
981 
982 void
priq_deq_commit(struct ifqueue * ifq,struct mbuf * m,void * cookie)983 priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
984 {
985 	struct mbuf_list *pl = cookie;
986 
987 	KASSERT(MBUF_LIST_FIRST(pl) == m);
988 
989 	ml_dequeue(pl);
990 }
991 
992 void
priq_purge(struct ifqueue * ifq,struct mbuf_list * ml)993 priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
994 {
995 	struct priq *pq = ifq->ifq_q;
996 	struct mbuf_list *pl;
997 	unsigned int prio = nitems(pq->pq_lists);
998 
999 	do {
1000 		pl = &pq->pq_lists[--prio];
1001 		ml_enlist(ml, pl);
1002 	} while (prio > 0);
1003 }
1004