xref: /netbsd/share/man/man9/altq.9 (revision 6550d01e)
1.\"	$NetBSD: altq.9,v 1.14 2007/06/24 19:26:58 rumble Exp $
2.\"	$OpenBSD: altq.9,v 1.4 2001/07/12 12:41:42 itojun Exp $
3.\"
4.\" Copyright (C) 2001
5.\" Sony Computer Science Laboratories Inc.  All rights reserved.
6.\"
7.\" Redistribution and use in source and binary forms, with or without
8.\" modification, are permitted provided that the following conditions
9.\" are met:
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. Redistributions in binary form must reproduce the above copyright
13.\"    notice, this list of conditions and the following disclaimer in the
14.\"    documentation and/or other materials provided with the distribution.
15.\"
16.\" THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
17.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19.\" ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
20.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26.\" SUCH DAMAGE.
27.\"
28.Dd October 12, 2006
29.Dt ALTQ 9
30.Os
31.\"
32.Sh NAME
33.Nm ALTQ
34.Nd kernel interfaces for manipulating output queues on network interfaces
35.Sh SYNOPSIS
36.In sys/types.h
37.In sys/socket.h
38.In net/if.h
39.Ft void \"macro
40.Fn IFQ_ENQUEUE "struct ifaltq *ifq" "struct mbuf *m" "struct altq_pktattr *pattr" "int err"
41.Ft void \"macro
42.Fn IFQ_DEQUEUE "struct ifaltq *ifq" "struct mbuf *m"
43.Ft void \"macro
44.Fn IFQ_POLL "struct ifaltq *ifq" "struct mbuf *m"
45.Ft void \"macro
46.Fn IFQ_PURGE "struct ifaltq *ifq"
47.Ft void \"macro
48.Fn IFQ_CLASSIFY "struct ifaltq *ifq" "struct mbuf *m" "int af" "struct altq_pktattr *pattr"
49.Ft void \"macro
50.Fn IFQ_IS_EMPTY "struct ifaltq *ifq"
51.Ft void \"macro
52.Fn IFQ_SET_MAXLEN "struct ifaltq *ifq" "int len"
53.Ft void \"macro
54.Fn IFQ_INC_LEN "struct ifaltq *ifq"
55.Ft void \"macro
56.Fn IFQ_DEC_LEN "struct ifaltq *ifq"
57.Ft void \"macro
58.Fn IFQ_INC_DROPS "struct ifaltq *ifq"
59.Ft void \"macro
60.Fn IFQ_SET_READY "struct ifaltq *ifq"
61.Sh DESCRIPTION
62The
63.Nm
64system is a framework to manage queueing disciplines on network
65interfaces.
66.Nm
67introduces new macros to manipulate output queues.
68The output queue macros are used to abstract queue operations and not to
69touch the internal fields of the output queue structure.
70The macros are independent from the
71.Nm
72implementation, and compatible with the traditional
73.Dv ifqueue
74macros for ease of transition.
75.Pp
76.Fn IFQ_ENQUEUE
77enqueues a packet
78.Fa m
79to the queue
80.Fa ifq .
81The underlying queueing discipline may discard the packet.
82.Fa err
83is set to 0 on success, or
84.Dv ENOBUFS
85if the packet is discarded.
86.Fa m
87will be freed by the device driver on success or by the queueing discipline on
88failure, so that the caller should not touch
89.Fa m
90after calling
91.Fn IFQ_ENQUEUE .
92.Pp
93.Fn IFQ_DEQUEUE
94dequeues a packet from the queue.
95The dequeued packet is returned in
96.Fa m ,
97or
98.Fa m
99is set to
100.Dv NULL
101if no packet is dequeued.
102The caller must always check
103.Fa m
104since a non-empty queue could return
105.Dv NULL
106under rate-limiting.
107.Pp
108.Fn IFQ_POLL
109returns the next packet without removing it from the queue.
110It is guaranteed by the underlying queueing discipline that
111.Fn IFQ_DEQUEUE
112immediately after
113.Fn IFQ_POLL
114returns the same packet.
115.Pp
116.Fn IFQ_PURGE
117discards all the packets in the queue.
118The purge operation is needed since a non-work conserving queue cannot be
119emptied by a dequeue loop.
120.Pp
121.Fn IFQ_CLASSIFY
122classifies a packet to a scheduling class, and returns the result in
123.Fa pattr .
124.Pp
125.Fn IFQ_IS_EMPTY
126can be used to check if the queue is empty.
127Note that
128.Fn IFQ_DEQUEUE
129could still return
130.Dv NULL
131if the queueing discipline is non-work conserving.
132.Pp
133.Fn IFQ_SET_MAXLEN
134sets the queue length limit to the default FIFO queue.
135.Pp
136.Fn IFQ_INC_LEN
137and
138.Fn IFQ_DEC_LEN
139increment or decrement the current queue length in packets.
140.Pp
141.Fn IFQ_INC_DROPS
142increments the drop counter and is equal to
143.Fn IF_DROP .
144It is defined for naming consistency.
145.Pp
146.Fn IFQ_SET_READY
147sets a flag to indicate this driver is converted to use the new macros.
148.Nm
149can be enabled only on interfaces with this flag.
150.Sh COMPATIBILITY
151.Ss ifaltq structure
152In order to keep compatibility with the existing code, the new
153output queue structure
154.Dv ifaltq
155has the same fields.
156The traditional
157.Fn IF_XXX
158macros and the code directly referencing the fields within
159.Dv if_snd
160still work with
161.Dv ifaltq .
162(Once we finish conversions of all the drivers, we no longer need
163these fields.)
164.Bd -literal
165            ##old-style##                           ##new-style##
166                                       |
167 struct ifqueue {                      | struct ifaltq {
168    struct mbuf *ifq_head;             |    struct mbuf *ifq_head;
169    struct mbuf *ifq_tail;             |    struct mbuf *ifq_tail;
170    int          ifq_len;              |    int          ifq_len;
171    int          ifq_maxlen;           |    int          ifq_maxlen;
172    int          ifq_drops;            |    int          ifq_drops;
173 };                                    |    /* altq related fields */
174                                       |    ......
175                                       | };
176                                       |
177.Ed
178The new structure replaces
179.Dv struct ifqueue
180in
181.Dv struct ifnet .
182.Bd -literal
183            ##old-style##                           ##new-style##
184                                       |
185 struct ifnet {                        | struct ifnet {
186     ....                              |     ....
187                                       |
188     struct ifqueue if_snd;            |     struct ifaltq if_snd;
189                                       |
190     ....                              |     ....
191 };                                    | };
192                                       |
193.Ed
194The (simplified) new
195.Fn IFQ_XXX
196macros looks like:
197.Bd -literal
198	#ifdef ALTQ
199	#define IFQ_DEQUEUE(ifq, m)			\e
200		if (ALTQ_IS_ENABLED((ifq))		\e
201			ALTQ_DEQUEUE((ifq), (m));	\e
202		else					\e
203			IF_DEQUEUE((ifq), (m));
204	#else
205	#define IFQ_DEQUEUE(ifq, m)	IF_DEQUEUE((ifq), (m));
206	#endif
207.Ed
208.Ss Enqueue operation
209The semantics of the enqueue operation are changed.
210In the new style,
211enqueue and packet drop are combined since they cannot be easily
212separated in many queueing disciplines.
213The new enqueue operation corresponds to the following macro that is
214written with the old macros.
215.Bd -literal
216#define	IFQ_ENQUEUE(ifq, m, pattr, err)                   \e
217do {                                                      \e
218        if (ALTQ_IS_ENABLED((ifq)))                       \e
219                ALTQ_ENQUEUE((ifq), (m), (pattr), (err)); \e
220        else {                                            \e
221                if (IF_QFULL((ifq))) {                    \e
222                        m_freem((m));                     \e
223                        (err) = ENOBUFS;                  \e
224                } else {                                  \e
225                        IF_ENQUEUE((ifq), (m));           \e
226                        (err) = 0;                        \e
227                }                                         \e
228        }                                                 \e
229        if ((err))                                        \e
230                (ifq)-\*[Gt]ifq_drops++;                       \e
231} while (/*CONSTCOND*/ 0)
232.Ed
233.Pp
234.Fn IFQ_ENQUEUE
235does the following:
236.Bl -hyphen -compact
237.It
238queue a packet
239.It
240drop (and free) a packet if the enqueue operation fails
241.El
242If the enqueue operation fails,
243.Fa err
244is set to
245.Dv ENOBUFS .
246.Fa m
247is freed by the queueing discipline.
248The caller should not touch mbuf after calling
249.Fn IFQ_ENQUEUE
250so that the caller may need to copy
251.Fa m_pkthdr.len
252or
253.Fa m_flags
254field beforehand for statistics.
255The caller should not use
256.Fn senderr
257since mbuf was already freed.
258.Pp
259The new style
260.Fn if_output
261looks as follows:
262.Bd -literal
263            ##old-style##                           ##new-style##
264                                       |
265 int                                   | int
266 ether_output(ifp, m0, dst, rt0)       | ether_output(ifp, m0, dst, rt0)
267 {                                     | {
268     ......                            |     ......
269                                       |
270                                       |     mflags = m-\*[Gt]m_flags;
271                                       |     len = m-\*[Gt]m_pkthdr.len;
272     s = splimp();                     |     s = splimp();
273     if (IF_QFULL(\*[Am]ifp-\*[Gt]if_snd)) {     |     IFQ_ENQUEUE(\*[Am]ifp-\*[Gt]if_snd, m,
274                                       |                 NULL, error);
275         IF_DROP(\*[Am]ifp-\*[Gt]if_snd);        |     if (error != 0) {
276         splx(s);                      |         splx(s);
277         senderr(ENOBUFS);             |         return (error);
278     }                                 |     }
279     IF_ENQUEUE(\*[Am]ifp-\*[Gt]if_snd, m);      |
280     ifp-\*[Gt]if_obytes +=                 |     ifp-\*[Gt]if_obytes += len;
281                    m-\*[Gt]m_pkthdr.len;   |
282     if (m-\*[Gt]m_flags \*[Am] M_MCAST)         |     if (mflags \*[Am] M_MCAST)
283         ifp-\*[Gt]if_omcasts++;            |         ifp-\*[Gt]if_omcasts++;
284                                       |
285     if ((ifp-\*[Gt]if_flags \*[Am] IFF_OACTIVE) |     if ((ifp-\*[Gt]if_flags \*[Am] IFF_OACTIVE)
286         == 0)                         |         == 0)
287         (*ifp-\*[Gt]if_start)(ifp);        |         (*ifp-\*[Gt]if_start)(ifp);
288     splx(s);                          |     splx(s);
289     return (error);                   |     return (error);
290                                       |
291 bad:                                  | bad:
292     if (m)                            |     if (m)
293         m_freem(m);                   |         m_freem(m);
294     return (error);                   |     return (error);
295 }                                     | }
296                                       |
297.Ed
298.Ss Classifier
299The classifier mechanism is currently implemented in
300.Fn if_output .
301.Dv struct altq_pktattr
302is used to store the classifier result, and it is passed to the enqueue
303function.
304(We will change the method to tag the classifier result to mbuf in the future.)
305.Bd -literal
306int
307ether_output(ifp, m0, dst, rt0)
308{
309	......
310	struct altq_pktattr pktattr;
311
312	......
313
314	/* classify the packet before prepending link-headers */
315	IFQ_CLASSIFY(\*[Am]ifp-\*[Gt]if_snd, m, dst-\*[Gt]sa_family, \*[Am]pktattr);
316
317	/* prepend link-level headers */
318	......
319
320	IFQ_ENQUEUE(\*[Am]ifp-\*[Gt]if_snd, m, \*[Am]pktattr, error);
321
322	......
323}
324.Ed
325.Sh HOW TO CONVERT THE EXISTING DRIVERS
326First, make sure the corresponding
327.Fn if_output
328is already converted to the new style.
329.Pp
330Look for
331.Fa if_snd
332in the driver.
333You will probably need to make changes to the lines that include
334.Fa if_snd .
335.Ss Empty check operation
336If the code checks
337.Fa ifq_head
338to see whether the queue is empty or not, use
339.Fn IFQ_IS_EMPTY .
340.Bd -literal
341            ##old-style##                           ##new-style##
342                                       |
343 if (ifp-\*[Gt]if_snd.ifq_head != NULL)     | if (IFQ_IS_EMPTY(\*[Am]ifp-\*[Gt]if_snd) == 0)
344                                       |
345.Ed
346Note that
347.Fn IFQ_POLL
348can be used for the same purpose, but
349.Fn IFQ_POLL
350could be costly for a complex scheduling algorithm since
351.Fn IFQ_POLL
352needs to run the scheduling algorithm to select the next packet.
353On the other hand,
354.Fn IFQ_IS_EMPTY
355checks only if there is any packet stored in the queue.
356Another difference is that even when
357.Fn IFQ_IS_EMPTY
358is
359.Dv false ,
360.Fn IFQ_DEQUEUE
361could still return
362.Dv NULL
363if the queue is under rate-limiting.
364.Ss Dequeue operation
365Replace
366.Fn IF_DEQUEUE
367by
368.Fn IFQ_DEQUEUE .
369Always check whether the dequeued mbuf is
370.Dv NULL
371or not.
372Note that even when
373.Fn IFQ_IS_EMPTY
374is
375.Dv false ,
376.Fn IFQ_DEQUEUE
377could return
378.Dv NULL
379due to rate-limiting.
380.Bd -literal
381            ##old-style##                           ##new-style##
382                                       |
383 IF_DEQUEUE(\*[Am]ifp-\*[Gt]if_snd, m);          | IFQ_DEQUEUE(\*[Am]ifp-\*[Gt]if_snd, m);
384                                       | if (m == NULL)
385                                       |     return;
386                                       |
387.Ed
388A driver is supposed to call
389.Fn if_start
390from transmission complete interrupts in order to trigger the next dequeue.
391.Ss Poll-and-dequeue operation
392If the code polls the packet at the head of the queue and actually uses
393the packet before dequeueing it, use
394.Fn IFQ_POLL
395and
396.Fn IFQ_DEQUEUE .
397.Bd -literal
398            ##old-style##                           ##new-style##
399                                       |
400 m = ifp-\*[Gt]if_snd.ifq_head;             | IFQ_POLL(\*[Am]ifp-\*[Gt]if_snd, m);
401 if (m != NULL) {                      | if (m != NULL) {
402                                       |
403     /* use m to get resources */      |     /* use m to get resources */
404     if (something goes wrong)         |     if (something goes wrong)
405         return;                       |         return;
406                                       |
407     IF_DEQUEUE(\*[Am]ifp-\*[Gt]if_snd, m);      |     IFQ_DEQUEUE(\*[Am]ifp-\*[Gt]if_snd, m);
408                                       |
409     /* kick the hardware */           |     /* kick the hardware */
410 }                                     | }
411                                       |
412.Ed
413It is guaranteed that
414.Fn IFQ_DEQUEUE
415immediately after
416.Fn IFQ_POLL
417returns the same packet.
418Note that they need to be guarded by
419.Fn splimp
420if called from outside of
421.Fn if_start .
422.Ss Eliminating IF_PREPEND
423If the code uses
424.Fn IF_PREPEND ,
425you have to eliminate it since the prepend operation is not possible for many
426queueing disciplines.
427A common use of
428.Fn IF_PREPEND
429is to cancel the previous dequeue operation.
430You have to convert the logic into poll-and-dequeue.
431.Bd -literal
432            ##old-style##                           ##new-style##
433                                       |
434 IF_DEQUEUE(\*[Am]ifp-\*[Gt]if_snd, m);          | IFQ_POLL(\*[Am]ifp-\*[Gt]if_snd, m);
435 if (m != NULL) {                      | if (m != NULL) {
436                                       |
437     if (something_goes_wrong) {       |     if (something_goes_wrong) {
438         IF_PREPEND(\*[Am]ifp-\*[Gt]if_snd, m);  |
439         return;                       |         return;
440     }                                 |     }
441                                       |
442                                       |     /* at this point, the driver
443                                       |      * is committed to send this
444                                       |      * packet.
445                                       |      */
446                                       |     IFQ_DEQUEUE(\*[Am]ifp-\*[Gt]if_snd, m);
447                                       |
448     /* kick the hardware */           |     /* kick the hardware */
449 }                                     | }
450                                       |
451.Ed
452.Ss Purge operation
453Use
454.Fn IFQ_PURGE
455to empty the queue.
456Note that a non-work conserving queue cannot be emptied by a dequeue loop.
457.Bd -literal
458            ##old-style##                           ##new-style##
459                                       |
460 while (ifp-\*[Gt]if_snd.ifq_head != NULL) {|  IFQ_PURGE(\*[Am]ifp-\*[Gt]if_snd);
461     IF_DEQUEUE(\*[Am]ifp-\*[Gt]if_snd, m);      |
462     m_freem(m);                       |
463 }                                     |
464                                       |
465.Ed
466.Ss Attach routine
467Use
468.Fn IFQ_SET_MAXLEN
469to set
470.Fa ifq_maxlen
471to
472.Fa len .
473Add
474.Fn IFQ_SET_READY
475to show this driver is converted to the new style.
476(This is used to distinguish new-style drivers.)
477.Bd -literal
478            ##old-style##                           ##new-style##
479                                       |
480 ifp-\*[Gt]if_snd.ifq_maxlen = qsize;       | IFQ_SET_MAXLEN(\*[Am]ifp-\*[Gt]if_snd, qsize);
481                                       | IFQ_SET_READY(\*[Am]ifp-\*[Gt]if_snd);
482 if_attach(ifp);                       | if_attach(ifp);
483                                       |
484.Ed
485.Ss Other issues
486The new macros for statistics:
487.Bd -literal
488            ##old-style##                           ##new-style##
489                                       |
490 IF_DROP(\*[Am]ifp-\*[Gt]if_snd);                | IFQ_INC_DROPS(\*[Am]ifp-\*[Gt]if_snd);
491                                       |
492 ifp-\*[Gt]if_snd.ifq_len++;                | IFQ_INC_LEN(\*[Am]ifp-\*[Gt]if_snd);
493                                       |
494 ifp-\*[Gt]if_snd.ifq_len--;                | IFQ_DEC_LEN(\*[Am]ifp-\*[Gt]if_snd);
495                                       |
496.Ed
497Some drivers instruct the hardware to invoke transmission complete
498interrupts only when it thinks necessary.
499Rate-limiting breaks its assumption.
500.Ss How to convert drivers using multiple ifqueues
501Some (pseudo) devices (such as slip) have another
502.Dv ifqueue
503to prioritize packets.
504It is possible to eliminate the second queue
505since
506.Nm
507provides more flexible mechanisms but the following shows
508how to keep the original behavior.
509.Bd -literal
510struct sl_softc {
511	struct	ifnet sc_if;		/* network-visible interface */
512	...
513	struct	ifqueue sc_fastq;	/* interactive output queue */
514	...
515};
516.Ed
517The driver doesn't compile in the new model since it has the following
518line
519.Po
520.Fa if_snd
521is no longer a type of
522.Dv struct ifqueue
523.Pc .
524.Bd -literal
525	struct ifqueue *ifq = \*[Am]ifp-\*[Gt]if_snd;
526.Ed
527A simple way is to use the original
528.Fn IF_XXX
529macros for
530.Fa sc_fastq
531and use the new
532.Fn IFQ_XXX
533macros for
534.Fa if_snd .
535The enqueue operation looks like:
536.Bd -literal
537            ##old-style##                           ##new-style##
538                                       |
539 struct ifqueue *ifq = \*[Am]ifp-\*[Gt]if_snd;   | struct ifqueue *ifq = NULL;
540                                       |
541 if (ip-\*[Gt]ip_tos \*[Am] IPTOS_LOWDELAY)      | if ((ip-\*[Gt]ip_tos \*[Am] IPTOS_LOWDELAY) \*[Am]\*[Am]
542     ifq = \*[Am]sc-\*[Gt]sc_fastq;              | !ALTQ_IS_ENABLED(\*[Am]sc-\*[Gt]sc_if.if_snd)) {
543                                       |     ifq = \*[Am]sc-\*[Gt]sc_fastq;
544 if (IF_QFULL(ifq)) {                  |     if (IF_QFULL(ifq)) {
545     IF_DROP(ifq);                     |         IF_DROP(ifq);
546     m_freem(m);                       |         m_freem(m);
547     splx(s);                          |         error = ENOBUFS;
548     sc-\*[Gt]sc_if.if_oerrors++;           |     } else {
549     return (ENOBUFS);                 |         IF_ENQUEUE(ifq, m);
550 }                                     |         error = 0;
551 IF_ENQUEUE(ifq, m);                   |     }
552                                       | } else
553                                       |     IFQ_ENQUEUE(\*[Am]sc-\*[Gt]sc_if.if_snd,
554                                       |                 m, NULL, error);
555                                       |
556                                       | if (error) {
557                                       |     splx(s);
558                                       |     sc-\*[Gt]sc_if.if_oerrors++;
559                                       |     return (error);
560                                       | }
561 if ((sc-\*[Gt]sc_oqlen =                   | if ((sc-\*[Gt]sc_oqlen =
562      sc-\*[Gt]sc_ttyp-\*[Gt]t_outq.c_cc) == 0)  |      sc-\*[Gt]sc_ttyp-\*[Gt]t_outq.c_cc) == 0)
563     slstart(sc-\*[Gt]sc_ttyp);             |     slstart(sc-\*[Gt]sc_ttyp);
564 splx(s);                              | splx(s);
565                                       |
566.Ed
567The dequeue operations looks like:
568.Bd -literal
569            ##old-style##                           ##new-style##
570                                       |
571 s = splimp();                         | s = splimp();
572 IF_DEQUEUE(\*[Am]sc-\*[Gt]sc_fastq, m);         | IF_DEQUEUE(\*[Am]sc-\*[Gt]sc_fastq, m);
573 if (m == NULL)                        | if (m == NULL)
574     IF_DEQUEUE(\*[Am]sc-\*[Gt]sc_if.if_snd, m); |     IFQ_DEQUEUE(\*[Am]sc-\*[Gt]sc_if.if_snd, m);
575 splx(s);                              | splx(s);
576                                       |
577.Ed
578.Sh QUEUEING DISCIPLINES
579Queueing disciplines need to maintain
580.Fa ifq_len
581.Po
582used by
583.Fn IFQ_IS_EMPTY
584.Pc .
585Queueing disciplines also need to guarantee the same mbuf is returned if
586.Fn IFQ_DEQUEUE
587is called immediately after
588.Fn IFQ_POLL .
589.Sh SEE ALSO
590.Xr pf 4 ,
591.Xr altq.conf 5 ,
592.Xr pf.conf 5 ,
593.Xr altqd 8 ,
594.Xr tbrconfig 8
595.Sh HISTORY
596The
597.Nm
598system first appeared in March 1997.
599