1 /* $OpenBSD: ifq.c,v 1.55 2024/11/20 02:18:45 dlg Exp $ */
2
3 /*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include "bpfilter.h"
20 #include "kstat.h"
21
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/socket.h>
25 #include <sys/mbuf.h>
26 #include <sys/proc.h>
27 #include <sys/sysctl.h>
28
29 #include <net/if.h>
30 #include <net/if_var.h>
31
32 #if NBPFILTER > 0
33 #include <net/bpf.h>
34 #endif
35
36 #if NKSTAT > 0
37 #include <sys/kstat.h>
38 #endif
39
40 /*
41 * priq glue
42 */
43 unsigned int priq_idx(unsigned int, const struct mbuf *);
44 struct mbuf *priq_enq(struct ifqueue *, struct mbuf *);
45 struct mbuf *priq_deq_begin(struct ifqueue *, void **);
46 void priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
47 void priq_purge(struct ifqueue *, struct mbuf_list *);
48
49 void *priq_alloc(unsigned int, void *);
50 void priq_free(unsigned int, void *);
51
52 const struct ifq_ops priq_ops = {
53 priq_idx,
54 priq_enq,
55 priq_deq_begin,
56 priq_deq_commit,
57 priq_purge,
58 priq_alloc,
59 priq_free,
60 };
61
62 const struct ifq_ops * const ifq_priq_ops = &priq_ops;
63
64 /*
65 * priq internal structures
66 */
67
68 struct priq {
69 struct mbuf_list pq_lists[IFQ_NQUEUES];
70 };
71
72 /*
73 * ifqueue serialiser
74 */
75
76 void ifq_start_task(void *);
77 void ifq_restart_task(void *);
78 void ifq_barrier_task(void *);
79 void ifq_bundle_task(void *);
80
81 static inline void
ifq_run_start(struct ifqueue * ifq)82 ifq_run_start(struct ifqueue *ifq)
83 {
84 ifq_serialize(ifq, &ifq->ifq_start);
85 }
86
87 void
ifq_serialize(struct ifqueue * ifq,struct task * t)88 ifq_serialize(struct ifqueue *ifq, struct task *t)
89 {
90 struct task work;
91
92 if (ISSET(t->t_flags, TASK_ONQUEUE))
93 return;
94
95 mtx_enter(&ifq->ifq_task_mtx);
96 if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
97 SET(t->t_flags, TASK_ONQUEUE);
98 TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
99 }
100
101 if (ifq->ifq_serializer == NULL) {
102 ifq->ifq_serializer = curcpu();
103
104 while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
105 TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
106 CLR(t->t_flags, TASK_ONQUEUE);
107 work = *t; /* copy to caller to avoid races */
108
109 mtx_leave(&ifq->ifq_task_mtx);
110
111 (*work.t_func)(work.t_arg);
112
113 mtx_enter(&ifq->ifq_task_mtx);
114 }
115
116 ifq->ifq_serializer = NULL;
117 }
118 mtx_leave(&ifq->ifq_task_mtx);
119 }
120
121 void
ifq_start(struct ifqueue * ifq)122 ifq_start(struct ifqueue *ifq)
123 {
124 if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) {
125 task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
126 ifq_run_start(ifq);
127 } else
128 task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
129 }
130
131 void
ifq_start_task(void * p)132 ifq_start_task(void *p)
133 {
134 struct ifqueue *ifq = p;
135 struct ifnet *ifp = ifq->ifq_if;
136
137 if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
138 ifq_empty(ifq) || ifq_is_oactive(ifq))
139 return;
140
141 ifp->if_qstart(ifq);
142 }
143
144 void
ifq_set_oactive(struct ifqueue * ifq)145 ifq_set_oactive(struct ifqueue *ifq)
146 {
147 if (ifq->ifq_oactive)
148 return;
149
150 mtx_enter(&ifq->ifq_mtx);
151 if (!ifq->ifq_oactive) {
152 ifq->ifq_oactive = 1;
153 ifq->ifq_oactives++;
154 }
155 mtx_leave(&ifq->ifq_mtx);
156 }
157
158 void
ifq_deq_set_oactive(struct ifqueue * ifq)159 ifq_deq_set_oactive(struct ifqueue *ifq)
160 {
161 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
162
163 if (!ifq->ifq_oactive) {
164 ifq->ifq_oactive = 1;
165 ifq->ifq_oactives++;
166 }
167 }
168
169 void
ifq_restart_task(void * p)170 ifq_restart_task(void *p)
171 {
172 struct ifqueue *ifq = p;
173 struct ifnet *ifp = ifq->ifq_if;
174
175 ifq_clr_oactive(ifq);
176 ifp->if_qstart(ifq);
177 }
178
179 void
ifq_bundle_task(void * p)180 ifq_bundle_task(void *p)
181 {
182 struct ifqueue *ifq = p;
183
184 ifq_run_start(ifq);
185 }
186
187 void
ifq_barrier(struct ifqueue * ifq)188 ifq_barrier(struct ifqueue *ifq)
189 {
190 struct cond c = COND_INITIALIZER();
191 struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
192
193 task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
194
195 if (ifq->ifq_serializer == NULL)
196 return;
197
198 ifq_serialize(ifq, &t);
199
200 cond_wait(&c, "ifqbar");
201 }
202
203 void
ifq_barrier_task(void * p)204 ifq_barrier_task(void *p)
205 {
206 struct cond *c = p;
207
208 cond_signal(c);
209 }
210
211 /*
212 * ifqueue mbuf queue API
213 */
214
215 #if NKSTAT > 0
216 struct ifq_kstat_data {
217 struct kstat_kv kd_packets;
218 struct kstat_kv kd_bytes;
219 struct kstat_kv kd_qdrops;
220 struct kstat_kv kd_errors;
221 struct kstat_kv kd_qlen;
222 struct kstat_kv kd_maxqlen;
223 struct kstat_kv kd_oactive;
224 struct kstat_kv kd_oactives;
225 };
226
227 static const struct ifq_kstat_data ifq_kstat_tpl = {
228 KSTAT_KV_UNIT_INITIALIZER("packets",
229 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
230 KSTAT_KV_UNIT_INITIALIZER("bytes",
231 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
232 KSTAT_KV_UNIT_INITIALIZER("qdrops",
233 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
234 KSTAT_KV_UNIT_INITIALIZER("errors",
235 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
236 KSTAT_KV_UNIT_INITIALIZER("qlen",
237 KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
238 KSTAT_KV_UNIT_INITIALIZER("maxqlen",
239 KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
240 KSTAT_KV_INITIALIZER("oactive", KSTAT_KV_T_BOOL),
241 KSTAT_KV_INITIALIZER("oactives", KSTAT_KV_T_COUNTER32),
242 };
243
244 int
ifq_kstat_copy(struct kstat * ks,void * dst)245 ifq_kstat_copy(struct kstat *ks, void *dst)
246 {
247 struct ifqueue *ifq = ks->ks_softc;
248 struct ifq_kstat_data *kd = dst;
249
250 *kd = ifq_kstat_tpl;
251 kstat_kv_u64(&kd->kd_packets) = ifq->ifq_packets;
252 kstat_kv_u64(&kd->kd_bytes) = ifq->ifq_bytes;
253 kstat_kv_u64(&kd->kd_qdrops) = ifq->ifq_qdrops;
254 kstat_kv_u64(&kd->kd_errors) = ifq->ifq_errors;
255 kstat_kv_u32(&kd->kd_qlen) = ifq->ifq_len;
256 kstat_kv_u32(&kd->kd_maxqlen) = ifq->ifq_maxlen;
257 kstat_kv_bool(&kd->kd_oactive) = ifq->ifq_oactive;
258 kstat_kv_u32(&kd->kd_oactives) = ifq->ifq_oactives;
259
260 return (0);
261 }
262 #endif
263
264 void
ifq_init(struct ifqueue * ifq,struct ifnet * ifp,unsigned int idx)265 ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
266 {
267 ifq->ifq_if = ifp;
268 ifq->ifq_softnet = net_tq(idx);
269 ifq->ifq_softc = NULL;
270
271 mtx_init(&ifq->ifq_mtx, IPL_NET);
272
273 /* default to priq */
274 ifq->ifq_ops = &priq_ops;
275 ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
276
277 ml_init(&ifq->ifq_free);
278 ifq->ifq_len = 0;
279
280 ifq->ifq_packets = 0;
281 ifq->ifq_bytes = 0;
282 ifq->ifq_qdrops = 0;
283 ifq->ifq_errors = 0;
284 ifq->ifq_mcasts = 0;
285
286 mtx_init(&ifq->ifq_task_mtx, IPL_NET);
287 TAILQ_INIT(&ifq->ifq_task_list);
288 ifq->ifq_serializer = NULL;
289 task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
290
291 task_set(&ifq->ifq_start, ifq_start_task, ifq);
292 task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
293
294 if (ifq->ifq_maxlen == 0)
295 ifq_init_maxlen(ifq, IFQ_MAXLEN);
296
297 ifq->ifq_idx = idx;
298
299 #if NKSTAT > 0
300 /* XXX xname vs driver name and unit */
301 ifq->ifq_kstat = kstat_create(ifp->if_xname, 0,
302 "txq", ifq->ifq_idx, KSTAT_T_KV, 0);
303 KASSERT(ifq->ifq_kstat != NULL);
304 kstat_set_mutex(ifq->ifq_kstat, &ifq->ifq_mtx);
305 ifq->ifq_kstat->ks_softc = ifq;
306 ifq->ifq_kstat->ks_datalen = sizeof(ifq_kstat_tpl);
307 ifq->ifq_kstat->ks_copy = ifq_kstat_copy;
308 kstat_install(ifq->ifq_kstat);
309 #endif
310 }
311
312 void
ifq_attach(struct ifqueue * ifq,const struct ifq_ops * newops,void * opsarg)313 ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
314 {
315 struct mbuf_list ml = MBUF_LIST_INITIALIZER();
316 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
317 struct mbuf *m;
318 const struct ifq_ops *oldops;
319 void *newq, *oldq;
320
321 newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
322
323 mtx_enter(&ifq->ifq_mtx);
324 ifq->ifq_ops->ifqop_purge(ifq, &ml);
325 ifq->ifq_len = 0;
326
327 oldops = ifq->ifq_ops;
328 oldq = ifq->ifq_q;
329
330 ifq->ifq_ops = newops;
331 ifq->ifq_q = newq;
332
333 while ((m = ml_dequeue(&ml)) != NULL) {
334 m = ifq->ifq_ops->ifqop_enq(ifq, m);
335 if (m != NULL) {
336 ifq->ifq_qdrops++;
337 ml_enqueue(&free_ml, m);
338 } else
339 ifq->ifq_len++;
340 }
341 mtx_leave(&ifq->ifq_mtx);
342
343 oldops->ifqop_free(ifq->ifq_idx, oldq);
344
345 ml_purge(&free_ml);
346 }
347
348 void
ifq_destroy(struct ifqueue * ifq)349 ifq_destroy(struct ifqueue *ifq)
350 {
351 struct mbuf_list ml = MBUF_LIST_INITIALIZER();
352
353 #if NKSTAT > 0
354 kstat_destroy(ifq->ifq_kstat);
355 #endif
356
357 NET_ASSERT_UNLOCKED();
358 if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle))
359 taskq_barrier(ifq->ifq_softnet);
360
361 /* don't need to lock because this is the last use of the ifq */
362
363 ifq->ifq_ops->ifqop_purge(ifq, &ml);
364 ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
365
366 ml_purge(&ml);
367 }
368
369 void
ifq_add_data(struct ifqueue * ifq,struct if_data * data)370 ifq_add_data(struct ifqueue *ifq, struct if_data *data)
371 {
372 mtx_enter(&ifq->ifq_mtx);
373 data->ifi_opackets += ifq->ifq_packets;
374 data->ifi_obytes += ifq->ifq_bytes;
375 data->ifi_oqdrops += ifq->ifq_qdrops;
376 data->ifi_omcasts += ifq->ifq_mcasts;
377 /* ifp->if_data.ifi_oerrors */
378 mtx_leave(&ifq->ifq_mtx);
379 }
380
381 int
ifq_enqueue(struct ifqueue * ifq,struct mbuf * m)382 ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
383 {
384 struct mbuf *dm;
385
386 mtx_enter(&ifq->ifq_mtx);
387 dm = ifq->ifq_ops->ifqop_enq(ifq, m);
388 if (dm != m) {
389 ifq->ifq_packets++;
390 ifq->ifq_bytes += m->m_pkthdr.len;
391 if (ISSET(m->m_flags, M_MCAST))
392 ifq->ifq_mcasts++;
393 }
394
395 if (dm == NULL)
396 ifq->ifq_len++;
397 else
398 ifq->ifq_qdrops++;
399 mtx_leave(&ifq->ifq_mtx);
400
401 if (dm != NULL)
402 m_freem(dm);
403
404 return (dm == m ? ENOBUFS : 0);
405 }
406
407 static inline void
ifq_deq_enter(struct ifqueue * ifq)408 ifq_deq_enter(struct ifqueue *ifq)
409 {
410 mtx_enter(&ifq->ifq_mtx);
411 }
412
413 static inline void
ifq_deq_leave(struct ifqueue * ifq)414 ifq_deq_leave(struct ifqueue *ifq)
415 {
416 struct mbuf_list ml;
417
418 ml = ifq->ifq_free;
419 ml_init(&ifq->ifq_free);
420
421 mtx_leave(&ifq->ifq_mtx);
422
423 if (!ml_empty(&ml))
424 ml_purge(&ml);
425 }
426
427 struct mbuf *
ifq_deq_begin(struct ifqueue * ifq)428 ifq_deq_begin(struct ifqueue *ifq)
429 {
430 struct mbuf *m = NULL;
431 void *cookie;
432
433 ifq_deq_enter(ifq);
434 if (ifq->ifq_len == 0 ||
435 (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
436 ifq_deq_leave(ifq);
437 return (NULL);
438 }
439
440 m->m_pkthdr.ph_cookie = cookie;
441
442 return (m);
443 }
444
445 void
ifq_deq_commit(struct ifqueue * ifq,struct mbuf * m)446 ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
447 {
448 void *cookie;
449
450 KASSERT(m != NULL);
451 cookie = m->m_pkthdr.ph_cookie;
452
453 ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
454 ifq->ifq_len--;
455 ifq_deq_leave(ifq);
456 }
457
458 void
ifq_deq_rollback(struct ifqueue * ifq,struct mbuf * m)459 ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
460 {
461 KASSERT(m != NULL);
462
463 ifq_deq_leave(ifq);
464 }
465
466 struct mbuf *
ifq_dequeue(struct ifqueue * ifq)467 ifq_dequeue(struct ifqueue *ifq)
468 {
469 struct mbuf *m;
470
471 m = ifq_deq_begin(ifq);
472 if (m == NULL)
473 return (NULL);
474
475 ifq_deq_commit(ifq, m);
476
477 return (m);
478 }
479
480 int
ifq_deq_sleep(struct ifqueue * ifq,struct mbuf ** mp,int nbio,int priority,const char * wmesg,volatile unsigned int * sleeping,volatile unsigned int * alive)481 ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority,
482 const char *wmesg, volatile unsigned int *sleeping,
483 volatile unsigned int *alive)
484 {
485 struct mbuf *m;
486 void *cookie;
487 int error = 0;
488
489 ifq_deq_enter(ifq);
490 if (ifq->ifq_len == 0 && nbio)
491 error = EWOULDBLOCK;
492 else {
493 for (;;) {
494 m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie);
495 if (m != NULL) {
496 ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
497 ifq->ifq_len--;
498 *mp = m;
499 break;
500 }
501
502 (*sleeping)++;
503 error = msleep_nsec(ifq, &ifq->ifq_mtx,
504 priority, wmesg, INFSLP);
505 (*sleeping)--;
506 if (error != 0)
507 break;
508 if (!(*alive)) {
509 error = EIO;
510 break;
511 }
512 }
513 }
514 ifq_deq_leave(ifq);
515
516 return (error);
517 }
518
519 int
ifq_hdatalen(struct ifqueue * ifq)520 ifq_hdatalen(struct ifqueue *ifq)
521 {
522 struct mbuf *m;
523 int len = 0;
524
525 if (ifq_empty(ifq))
526 return (0);
527
528 m = ifq_deq_begin(ifq);
529 if (m != NULL) {
530 len = m->m_pkthdr.len;
531 ifq_deq_rollback(ifq, m);
532 }
533
534 return (len);
535 }
536
537 void
ifq_init_maxlen(struct ifqueue * ifq,unsigned int maxlen)538 ifq_init_maxlen(struct ifqueue *ifq, unsigned int maxlen)
539 {
540 /* this is not MP safe, use only during attach */
541 ifq->ifq_maxlen = maxlen;
542 }
543
544 unsigned int
ifq_purge(struct ifqueue * ifq)545 ifq_purge(struct ifqueue *ifq)
546 {
547 struct mbuf_list ml = MBUF_LIST_INITIALIZER();
548 unsigned int rv;
549
550 mtx_enter(&ifq->ifq_mtx);
551 ifq->ifq_ops->ifqop_purge(ifq, &ml);
552 rv = ifq->ifq_len;
553 ifq->ifq_len = 0;
554 ifq->ifq_qdrops += rv;
555 mtx_leave(&ifq->ifq_mtx);
556
557 KASSERT(rv == ml_len(&ml));
558
559 ml_purge(&ml);
560
561 return (rv);
562 }
563
564 void *
ifq_q_enter(struct ifqueue * ifq,const struct ifq_ops * ops)565 ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
566 {
567 mtx_enter(&ifq->ifq_mtx);
568 if (ifq->ifq_ops == ops)
569 return (ifq->ifq_q);
570
571 mtx_leave(&ifq->ifq_mtx);
572
573 return (NULL);
574 }
575
576 void
ifq_q_leave(struct ifqueue * ifq,void * q)577 ifq_q_leave(struct ifqueue *ifq, void *q)
578 {
579 KASSERT(q == ifq->ifq_q);
580 mtx_leave(&ifq->ifq_mtx);
581 }
582
583 void
ifq_mfreem(struct ifqueue * ifq,struct mbuf * m)584 ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
585 {
586 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
587
588 ifq->ifq_len--;
589 ifq->ifq_qdrops++;
590 ml_enqueue(&ifq->ifq_free, m);
591 }
592
593 void
ifq_mfreeml(struct ifqueue * ifq,struct mbuf_list * ml)594 ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
595 {
596 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
597
598 ifq->ifq_len -= ml_len(ml);
599 ifq->ifq_qdrops += ml_len(ml);
600 ml_enlist(&ifq->ifq_free, ml);
601 }
602
603 /*
604 * ifiq
605 */
606
607 #if NKSTAT > 0
608 struct ifiq_kstat_data {
609 struct kstat_kv kd_packets;
610 struct kstat_kv kd_bytes;
611 struct kstat_kv kd_fdrops;
612 struct kstat_kv kd_qdrops;
613 struct kstat_kv kd_errors;
614 struct kstat_kv kd_qlen;
615
616 struct kstat_kv kd_enqueues;
617 struct kstat_kv kd_dequeues;
618 };
619
620 static const struct ifiq_kstat_data ifiq_kstat_tpl = {
621 KSTAT_KV_UNIT_INITIALIZER("packets",
622 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
623 KSTAT_KV_UNIT_INITIALIZER("bytes",
624 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
625 KSTAT_KV_UNIT_INITIALIZER("fdrops",
626 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
627 KSTAT_KV_UNIT_INITIALIZER("qdrops",
628 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
629 KSTAT_KV_UNIT_INITIALIZER("errors",
630 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
631 KSTAT_KV_UNIT_INITIALIZER("qlen",
632 KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
633
634 KSTAT_KV_INITIALIZER("enqueues",
635 KSTAT_KV_T_COUNTER64),
636 KSTAT_KV_INITIALIZER("dequeues",
637 KSTAT_KV_T_COUNTER64),
638 };
639
640 int
ifiq_kstat_copy(struct kstat * ks,void * dst)641 ifiq_kstat_copy(struct kstat *ks, void *dst)
642 {
643 struct ifiqueue *ifiq = ks->ks_softc;
644 struct ifiq_kstat_data *kd = dst;
645
646 *kd = ifiq_kstat_tpl;
647 kstat_kv_u64(&kd->kd_packets) = ifiq->ifiq_packets;
648 kstat_kv_u64(&kd->kd_bytes) = ifiq->ifiq_bytes;
649 kstat_kv_u64(&kd->kd_fdrops) = ifiq->ifiq_fdrops;
650 kstat_kv_u64(&kd->kd_qdrops) = ifiq->ifiq_qdrops;
651 kstat_kv_u64(&kd->kd_errors) = ifiq->ifiq_errors;
652 kstat_kv_u32(&kd->kd_qlen) = ml_len(&ifiq->ifiq_ml);
653
654 kstat_kv_u64(&kd->kd_enqueues) = ifiq->ifiq_enqueues;
655 kstat_kv_u64(&kd->kd_dequeues) = ifiq->ifiq_dequeues;
656
657 return (0);
658 }
659 #endif
660
661 static void ifiq_process(void *);
662
663 void
ifiq_init(struct ifiqueue * ifiq,struct ifnet * ifp,unsigned int idx)664 ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
665 {
666 ifiq->ifiq_if = ifp;
667 ifiq->ifiq_softnet = net_tq(idx);
668 ifiq->ifiq_softc = NULL;
669
670 mtx_init(&ifiq->ifiq_mtx, IPL_NET);
671 ml_init(&ifiq->ifiq_ml);
672 task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
673 ifiq->ifiq_pressure = 0;
674
675 ifiq->ifiq_packets = 0;
676 ifiq->ifiq_bytes = 0;
677 ifiq->ifiq_fdrops = 0;
678 ifiq->ifiq_qdrops = 0;
679 ifiq->ifiq_errors = 0;
680
681 ifiq->ifiq_idx = idx;
682
683 #if NKSTAT > 0
684 /* XXX xname vs driver name and unit */
685 ifiq->ifiq_kstat = kstat_create(ifp->if_xname, 0,
686 "rxq", ifiq->ifiq_idx, KSTAT_T_KV, 0);
687 KASSERT(ifiq->ifiq_kstat != NULL);
688 kstat_set_mutex(ifiq->ifiq_kstat, &ifiq->ifiq_mtx);
689 ifiq->ifiq_kstat->ks_softc = ifiq;
690 ifiq->ifiq_kstat->ks_datalen = sizeof(ifiq_kstat_tpl);
691 ifiq->ifiq_kstat->ks_copy = ifiq_kstat_copy;
692 kstat_install(ifiq->ifiq_kstat);
693 #endif
694 }
695
696 void
ifiq_destroy(struct ifiqueue * ifiq)697 ifiq_destroy(struct ifiqueue *ifiq)
698 {
699 #if NKSTAT > 0
700 kstat_destroy(ifiq->ifiq_kstat);
701 #endif
702
703 NET_ASSERT_UNLOCKED();
704 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
705 taskq_barrier(ifiq->ifiq_softnet);
706
707 /* don't need to lock because this is the last use of the ifiq */
708 ml_purge(&ifiq->ifiq_ml);
709 }
710
711 unsigned int ifiq_maxlen_drop = 2048 * 5;
712 unsigned int ifiq_maxlen_return = 2048 * 3;
713
714 int
ifiq_input(struct ifiqueue * ifiq,struct mbuf_list * ml)715 ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml)
716 {
717 struct ifnet *ifp = ifiq->ifiq_if;
718 struct mbuf *m;
719 uint64_t packets;
720 uint64_t bytes = 0;
721 uint64_t fdrops = 0;
722 unsigned int len;
723 #if NBPFILTER > 0
724 caddr_t if_bpf;
725 #endif
726
727 if (ml_empty(ml))
728 return (0);
729
730 MBUF_LIST_FOREACH(ml, m) {
731 m->m_pkthdr.ph_ifidx = ifp->if_index;
732 m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
733 bytes += m->m_pkthdr.len;
734 }
735 packets = ml_len(ml);
736
737 #if NBPFILTER > 0
738 if_bpf = ifp->if_bpf;
739 if (if_bpf) {
740 struct mbuf_list ml0 = *ml;
741
742 ml_init(ml);
743
744 while ((m = ml_dequeue(&ml0)) != NULL) {
745 if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
746 m_freem(m);
747 fdrops++;
748 } else
749 ml_enqueue(ml, m);
750 }
751
752 if (ml_empty(ml)) {
753 mtx_enter(&ifiq->ifiq_mtx);
754 ifiq->ifiq_packets += packets;
755 ifiq->ifiq_bytes += bytes;
756 ifiq->ifiq_fdrops += fdrops;
757 mtx_leave(&ifiq->ifiq_mtx);
758
759 return (0);
760 }
761 }
762 #endif
763
764 mtx_enter(&ifiq->ifiq_mtx);
765 ifiq->ifiq_packets += packets;
766 ifiq->ifiq_bytes += bytes;
767 ifiq->ifiq_fdrops += fdrops;
768
769 len = ml_len(&ifiq->ifiq_ml);
770 if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR))) {
771 if (len > ifiq_maxlen_drop)
772 ifiq->ifiq_qdrops += ml_len(ml);
773 else {
774 ifiq->ifiq_enqueues++;
775 ml_enlist(&ifiq->ifiq_ml, ml);
776 }
777 }
778 mtx_leave(&ifiq->ifiq_mtx);
779
780 if (ml_empty(ml))
781 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
782 else
783 ml_purge(ml);
784
785 return (len > ifiq_maxlen_return);
786 }
787
788 void
ifiq_add_data(struct ifiqueue * ifiq,struct if_data * data)789 ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
790 {
791 mtx_enter(&ifiq->ifiq_mtx);
792 data->ifi_ipackets += ifiq->ifiq_packets;
793 data->ifi_ibytes += ifiq->ifiq_bytes;
794 data->ifi_iqdrops += ifiq->ifiq_qdrops;
795 mtx_leave(&ifiq->ifiq_mtx);
796 }
797
798 int
ifiq_enqueue(struct ifiqueue * ifiq,struct mbuf * m)799 ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
800 {
801 struct ifnet *ifp = ifiq->ifiq_if;
802 #if NBPFILTER > 0
803 caddr_t if_bpf = ifp->if_bpf;
804 #endif
805
806 m->m_pkthdr.ph_ifidx = ifp->if_index;
807 m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
808
809 #if NBPFILTER > 0
810 if_bpf = ifp->if_bpf;
811 if (if_bpf) {
812 if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
813 mtx_enter(&ifiq->ifiq_mtx);
814 ifiq->ifiq_packets++;
815 ifiq->ifiq_bytes += m->m_pkthdr.len;
816 ifiq->ifiq_fdrops++;
817 mtx_leave(&ifiq->ifiq_mtx);
818
819 m_freem(m);
820 return (0);
821 }
822 }
823 #endif
824
825 mtx_enter(&ifiq->ifiq_mtx);
826 ifiq->ifiq_packets++;
827 ifiq->ifiq_bytes += m->m_pkthdr.len;
828 ifiq->ifiq_enqueues++;
829 ml_enqueue(&ifiq->ifiq_ml, m);
830 mtx_leave(&ifiq->ifiq_mtx);
831
832 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
833
834 return (0);
835 }
836
837 static void
ifiq_process(void * arg)838 ifiq_process(void *arg)
839 {
840 struct ifiqueue *ifiq = arg;
841 struct mbuf_list ml;
842
843 if (ifiq_empty(ifiq))
844 return;
845
846 mtx_enter(&ifiq->ifiq_mtx);
847 ifiq->ifiq_dequeues++;
848 ml = ifiq->ifiq_ml;
849 ml_init(&ifiq->ifiq_ml);
850 mtx_leave(&ifiq->ifiq_mtx);
851
852 if_input_process(ifiq->ifiq_if, &ml);
853 }
854
855 int
net_ifiq_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)856 net_ifiq_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
857 void *newp, size_t newlen)
858 {
859 int error = EOPNOTSUPP;
860 /* pressure is disabled for 6.6-release */
861 #if 0
862 int val;
863
864 if (namelen != 1)
865 return (EISDIR);
866
867 switch (name[0]) {
868 case NET_LINK_IFRXQ_PRESSURE_RETURN:
869 val = ifiq_pressure_return;
870 error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
871 if (error != 0)
872 return (error);
873 if (val < 1 || val > ifiq_pressure_drop)
874 return (EINVAL);
875 ifiq_pressure_return = val;
876 break;
877 case NET_LINK_IFRXQ_PRESSURE_DROP:
878 val = ifiq_pressure_drop;
879 error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
880 if (error != 0)
881 return (error);
882 if (ifiq_pressure_return > val)
883 return (EINVAL);
884 ifiq_pressure_drop = val;
885 break;
886 default:
887 error = EOPNOTSUPP;
888 break;
889 }
890 #endif
891
892 return (error);
893 }
894
895 /*
896 * priq implementation
897 */
898
899 unsigned int
priq_idx(unsigned int nqueues,const struct mbuf * m)900 priq_idx(unsigned int nqueues, const struct mbuf *m)
901 {
902 unsigned int flow = 0;
903
904 if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
905 flow = m->m_pkthdr.ph_flowid;
906
907 return (flow % nqueues);
908 }
909
910 void *
priq_alloc(unsigned int idx,void * null)911 priq_alloc(unsigned int idx, void *null)
912 {
913 struct priq *pq;
914 int i;
915
916 pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
917 for (i = 0; i < IFQ_NQUEUES; i++)
918 ml_init(&pq->pq_lists[i]);
919 return (pq);
920 }
921
922 void
priq_free(unsigned int idx,void * pq)923 priq_free(unsigned int idx, void *pq)
924 {
925 free(pq, M_DEVBUF, sizeof(struct priq));
926 }
927
928 struct mbuf *
priq_enq(struct ifqueue * ifq,struct mbuf * m)929 priq_enq(struct ifqueue *ifq, struct mbuf *m)
930 {
931 struct priq *pq;
932 struct mbuf_list *pl;
933 struct mbuf *n = NULL;
934 unsigned int prio;
935
936 pq = ifq->ifq_q;
937 KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
938
939 /* Find a lower priority queue to drop from */
940 if (ifq_len(ifq) >= ifq->ifq_maxlen) {
941 for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
942 pl = &pq->pq_lists[prio];
943 if (ml_len(pl) > 0) {
944 n = ml_dequeue(pl);
945 goto enqueue;
946 }
947 }
948 /*
949 * There's no lower priority queue that we can
950 * drop from so don't enqueue this one.
951 */
952 return (m);
953 }
954
955 enqueue:
956 pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
957 ml_enqueue(pl, m);
958
959 return (n);
960 }
961
962 struct mbuf *
priq_deq_begin(struct ifqueue * ifq,void ** cookiep)963 priq_deq_begin(struct ifqueue *ifq, void **cookiep)
964 {
965 struct priq *pq = ifq->ifq_q;
966 struct mbuf_list *pl;
967 unsigned int prio = nitems(pq->pq_lists);
968 struct mbuf *m;
969
970 do {
971 pl = &pq->pq_lists[--prio];
972 m = MBUF_LIST_FIRST(pl);
973 if (m != NULL) {
974 *cookiep = pl;
975 return (m);
976 }
977 } while (prio > 0);
978
979 return (NULL);
980 }
981
982 void
priq_deq_commit(struct ifqueue * ifq,struct mbuf * m,void * cookie)983 priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
984 {
985 struct mbuf_list *pl = cookie;
986
987 KASSERT(MBUF_LIST_FIRST(pl) == m);
988
989 ml_dequeue(pl);
990 }
991
992 void
priq_purge(struct ifqueue * ifq,struct mbuf_list * ml)993 priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
994 {
995 struct priq *pq = ifq->ifq_q;
996 struct mbuf_list *pl;
997 unsigned int prio = nitems(pq->pq_lists);
998
999 do {
1000 pl = &pq->pq_lists[--prio];
1001 ml_enlist(ml, pl);
1002 } while (prio > 0);
1003 }
1004