xref: /openbsd/sys/net/ifq.h (revision 73471bf0)
1 /*	$OpenBSD: ifq.h,v 1.33 2021/03/10 10:21:48 jsg Exp $ */
2 
3 /*
4  * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #ifndef _NET_IFQ_H_
20 #define _NET_IFQ_H_
21 
22 struct ifnet;
23 struct kstat;
24 
25 struct ifq_ops;
26 
27 struct ifqueue {
28 	struct ifnet		*ifq_if;
29 	struct taskq		*ifq_softnet;
30 	union {
31 		void			*_ifq_softc;
32 		/*
33 		 * a rings sndq is found by looking up an array of pointers.
34 		 * by default we only have one sndq and the default drivers
35 		 * dont use ifq_softc, so we can borrow it for the map until
36 		 * we need to allocate a proper map.
37 		 */
38 		struct ifqueue		*_ifq_ifqs[1];
39 	} _ifq_ptr;
40 #define ifq_softc		 _ifq_ptr._ifq_softc
41 #define ifq_ifqs		 _ifq_ptr._ifq_ifqs
42 
43 	/* mbuf handling */
44 	struct mutex		 ifq_mtx;
45 	const struct ifq_ops	*ifq_ops;
46 	void			*ifq_q;
47 	struct mbuf_list	 ifq_free;
48 	unsigned int		 ifq_len;
49 	unsigned int		 ifq_oactive;
50 
51 	/* statistics */
52 	uint64_t		 ifq_packets;
53 	uint64_t		 ifq_bytes;
54 	uint64_t		 ifq_qdrops;
55 	uint64_t		 ifq_errors;
56 	uint64_t		 ifq_mcasts;
57 
58 	struct kstat		*ifq_kstat;
59 
60 	/* work serialisation */
61 	struct mutex		 ifq_task_mtx;
62 	struct task_list	 ifq_task_list;
63 	void			*ifq_serializer;
64 	struct task		 ifq_bundle;
65 
66 	/* work to be serialised */
67 	struct task		 ifq_start;
68 	struct task		 ifq_restart;
69 
70 	/* properties */
71 	unsigned int		 ifq_maxlen;
72 	unsigned int		 ifq_idx;
73 };
74 
75 struct ifiqueue {
76 	struct ifnet		*ifiq_if;
77 	struct taskq		*ifiq_softnet;
78 	union {
79 		void			*_ifiq_softc;
80 		struct ifiqueue		*_ifiq_ifiqs[1];
81 	} _ifiq_ptr;
82 #define ifiq_softc		 _ifiq_ptr._ifiq_softc
83 #define ifiq_ifiqs		 _ifiq_ptr._ifiq_ifiqs
84 
85 	struct mutex		 ifiq_mtx;
86 	struct mbuf_list	 ifiq_ml;
87 	struct task		 ifiq_task;
88 	unsigned int		 ifiq_pressure;
89 
90 	/* counters */
91 	uint64_t		 ifiq_packets;
92 	uint64_t		 ifiq_bytes;
93 	uint64_t		 ifiq_qdrops;
94 	uint64_t		 ifiq_errors;
95 	uint64_t		 ifiq_mcasts;
96 	uint64_t		 ifiq_noproto;
97 
98 	struct kstat		*ifiq_kstat;
99 
100 	/* properties */
101 	unsigned int		 ifiq_idx;
102 };
103 
104 #ifdef _KERNEL
105 
106 #define IFQ_MAXLEN		256
107 
108 /*
109  *
110  * Interface Send Queues
111  *
112  * struct ifqueue sits between the network stack and a drivers
113  * transmission of packets. The high level view is that when the stack
114  * has finished generating a packet it hands it to a driver for
115  * transmission. It does this by queueing the packet on an ifqueue and
116  * notifying the driver to start transmission of the queued packets.
117  *
118  * A network device may have multiple contexts for the transmission
119  * of packets, ie, independent transmit rings. Such a network device,
120  * represented by a struct ifnet, would then have multiple ifqueue
121  * structures, each of which maps to an independent transmit ring.
122  *
123  * struct ifqueue also provides the point where conditioning of
124  * traffic (ie, priq and hfsc) is implemented, and provides some
125  * infrastructure to assist in the implementation of network drivers.
126  *
127  * = ifq API
128  *
129  * The ifq API provides functions for three distinct consumers:
130  *
131  * 1. The network stack
132  * 2. Traffic QoS/conditioning implementations
133  * 3. Network drivers
134  *
135  * == Network Stack API
136  *
137  * The network stack is responsible for initialising and destroying
138  * the ifqueue structures, changing the traffic conditioner on an
139  * interface, enqueuing packets for transmission, and notifying
140  * the driver to start transmission of a particular ifqueue.
141  *
142  * === ifq_init()
143  *
144  * During if_attach(), the network stack calls ifq_init to initialise
145  * the ifqueue structure. By default it configures the priq traffic
146  * conditioner.
147  *
148  * === ifq_destroy()
149  *
150  * The network stack calls ifq_destroy() during if_detach to tear down
151  * the ifqueue structure. It frees the traffic conditioner state, and
152  * frees any mbufs that were left queued.
153  *
154  * === ifq_attach()
155  *
156  * ifq_attach() is used to replace the current traffic conditioner on
157  * the ifqueue. All the pending mbufs are removed from the previous
158  * conditioner and requeued on the new.
159  *
160  * === ifq_idx()
161  *
162  * ifq_idx() selects a specific ifqueue from the current ifnet
163  * structure for use in the transmission of the mbuf.
164  *
165  * === ifq_enqueue()
166  *
167  * ifq_enqueue() attempts to fit an mbuf onto the ifqueue. The
168  * current traffic conditioner may drop a packet to make space on the
169  * queue.
170  *
171  * === ifq_start()
172  *
173  * Once a packet has been successfully queued with ifq_enqueue(),
174  * the network card is notified with a call to ifq_start().
175  * Calls to ifq_start() run in the ifqueue serialisation context,
176  * guaranteeing that only one instance of ifp->if_qstart() will be
177  * running on behalf of a specific ifqueue in the system at any point
178  * in time.
179  *
180  * == Traffic conditioners API
181  *
182  * The majority of interaction between struct ifqueue and a traffic
183  * conditioner occurs via the callbacks a traffic conditioner provides
184  * in an instance of struct ifq_ops.
185  *
186  * XXX document ifqop_*
187  *
188  * The ifqueue API implements the locking on behalf of the conditioning
189  * implementations so conditioners only have to reject or keep mbufs.
190  * If something needs to inspect a conditioners internals, the queue lock
191  * needs to be taken to allow for a consistent or safe view. The queue
192  * lock may be taken and released with ifq_q_enter() and ifq_q_leave().
193  *
194  * === ifq_q_enter()
195  *
196  * Code wishing to access a conditioners internals may take the queue
197  * lock with ifq_q_enter(). The caller must pass a reference to the
198  * conditioners ifq_ops structure so the infrastructure can ensure the
199  * caller is able to understand the internals. ifq_q_enter() returns
200  * a pointer to the conditioners internal structures, or NULL if the
201  * ifq_ops did not match the current conditioner.
202  *
203  * === ifq_q_leave()
204  *
205  * The queue lock acquired with ifq_q_enter() is released with
206  * ifq_q_leave().
207  *
208  * === ifq_mfreem() and ifq_mfreeml()
209  *
210  * A goal of the API is to avoid freeing an mbuf while mutexes are
211  * held. Because the ifq API manages the lock on behalf of the backend
212  * ifqops, the backend should not directly free mbufs. If a conditioner
213  * backend needs to drop a packet during the handling of ifqop_deq_begin,
214  * it may free it by calling ifq_mfreem(). This accounts for the drop,
215  * and schedules the free of the mbuf outside the hold of ifq_mtx.
216  * ifq_mfreeml() takes an mbuf list as an argument instead.
217  *
218  *
219  * == Network Driver API
220  *
221  * The API used by network drivers is mostly documented in the
222  * ifq_dequeue(9) manpage except for ifq_serialize().
223  *
224  * === ifq_serialize()
225  *
226  * A driver may run arbitrary work in the ifqueue serialiser context
227  * via ifq_serialize(). The work to be done is represented by a task
228  * that has been prepared with task_set.
229  *
230  * The work will be run in series with any other work dispatched by
231  * ifq_start(), ifq_restart(), or other ifq_serialize() calls.
232  *
233  * Because the work may be run on another CPU, the lifetime of the
234  * task and the work it represents can extend beyond the end of the
235  * call to ifq_serialize() that dispatched it.
236  *
237  *
238  * = ifqueue work serialisation
239  *
240  * ifqueues provide a mechanism to dispatch work to be run in a single
241  * context. Work in this mechanism is represented by task structures.
242  *
243  * The tasks are run in a context similar to a taskq serviced by a
244  * single kernel thread, except the work is run immediately by the
245  * first CPU that dispatches work. If a second CPU attempts to dispatch
246  * additional tasks while the first is still running, it will be queued
247  * to be run by the first CPU. The second CPU will return immediately.
248  *
249  * = MP Safe Network Drivers
250  *
251  * An MP safe network driver is one in which its start routine can be
252  * called by the network stack without holding the big kernel lock.
253  *
254  * == Attach
255  *
256  * A driver advertises it's ability to run its start routine without
257  * the kernel lock by setting the IFXF_MPSAFE flag in ifp->if_xflags
258  * before calling if_attach(). Advertising an MPSAFE start routine
259  * also implies that the driver understands that a network card can
260  * have multiple rings or transmit queues, and therefore provides
261  * if_qstart function (which takes an ifqueue pointer) instead of an
262  * if_start function (which takes an ifnet pointer).
263  *
264  * If the hardware supports multiple transmit rings, it advertises
265  * support for multiple rings to the network stack with if_attach_queues()
266  * after the call to if_attach(). if_attach_queues allocates a struct
267  * ifqueue for each hardware ring, which can then be initialised by
268  * the driver with data for each ring.
269  *
270  *	void	drv_start(struct ifqueue *);
271  *
272  *	void
273  *	drv_attach()
274  *	{
275  *	...
276  *		ifp->if_xflags = IFXF_MPSAFE;
277  *		ifp->if_qstart = drv_start;
278  *		if_attach(ifp);
279  *
280  *		if_attach_queues(ifp, DRV_NUM_TX_RINGS);
281  *		for (i = ; i < DRV_NUM_TX_RINGS; i++) {
282  *			struct ifqueue *ifq = ifp->if_ifqs[i];
283  *			struct drv_tx_ring *ring = &sc->sc_tx_rings[i];
284  *
285  *			ifq->ifq_softc = ring;
286  *			ring->ifq = ifq;
287  *		}
288  *	}
289  *
290  * The network stack will then call ifp->if_qstart via ifq_start()
291  * to guarantee there is only one instance of that function running
292  * for each ifq in the system, and to serialise it with other work
293  * the driver may provide.
294  *
295  * == Initialise
296  *
297  * When the stack requests an interface be brought up (ie, drv_ioctl()
298  * is called to handle SIOCSIFFLAGS with IFF_UP set in ifp->if_flags)
299  * drivers should set IFF_RUNNING in ifp->if_flags, and then call
300  * ifq_clr_oactive() against each ifq.
301  *
302  * == if_start
303  *
304  * ifq_start() checks that IFF_RUNNING is set in ifp->if_flags, that
305  * ifq_is_oactive() does not return true, and that there are pending
306  * packets to transmit via a call to ifq_len(). Therefore, drivers are
307  * no longer responsible for doing this themselves.
308  *
309  * If a driver should not transmit packets while its link is down, use
310  * ifq_purge() to flush pending packets from the transmit queue.
311  *
312  * Drivers for hardware should use the following pattern to transmit
313  * packets:
314  *
315  *	void
316  *	drv_start(struct ifqueue *ifq)
317  *	{
318  *		struct drv_tx_ring *ring = ifq->ifq_softc;
319  *		struct ifnet *ifp = ifq->ifq_if;
320  *		struct drv_softc *sc = ifp->if_softc;
321  *		struct mbuf *m;
322  *		int kick = 0;
323  *
324  *		if (NO_LINK) {
325  *			ifq_purge(ifq);
326  *			return;
327  *		}
328  *
329  *		for (;;) {
330  *			if (NO_SPACE(ring)) {
331  *				ifq_set_oactive(ifq);
332  *				break;
333  *			}
334  *
335  *			m = ifq_dequeue(ifq);
336  *			if (m == NULL)
337  *				break;
338  *
339  *			if (drv_encap(sc, ring, m) != 0) { // map and fill ring
340  *				m_freem(m);
341  *				continue;
342  *			}
343  *
344  *			bpf_mtap();
345  *		}
346  *
347  *		drv_kick(ring); // notify hw of new descriptors on the ring
348  *	 }
349  *
350  * == Transmission completion
351  *
352  * The following pattern should be used for transmit queue interrupt
353  * processing:
354  *
355  *	void
356  *	drv_txeof(struct drv_tx_ring *ring)
357  *	{
358  *		struct ifqueue *ifq = ring->ifq;
359  *
360  *		while (COMPLETED_PKTS(ring)) {
361  *			// unmap packets, m_freem() the mbufs.
362  *		}
363  *
364  *		if (ifq_is_oactive(ifq))
365  *			ifq_restart(ifq);
366  *	}
367  *
368  * == Stop
369  *
370  * Bringing an interface down (ie, IFF_UP was cleared in ifp->if_flags)
371  * should clear IFF_RUNNING in ifp->if_flags, and guarantee the start
372  * routine is not running before freeing any resources it uses:
373  *
374  *	void
375  *	drv_down(struct drv_softc *sc)
376  *	{
377  *		struct ifnet *ifp = &sc->sc_if;
378  *		struct ifqueue *ifq;
379  *		int i;
380  *
381  *		CLR(ifp->if_flags, IFF_RUNNING);
382  *		DISABLE_INTERRUPTS();
383  *
384  *		for (i = 0; i < sc->sc_num_queues; i++) {
385  *			ifq = ifp->if_ifqs[i];
386  *			ifq_barrier(ifq);
387  *		}
388  *
389  *		intr_barrier(sc->sc_ih);
390  *
391  *		FREE_RESOURCES();
392  *
393  *		for (i = 0; i < sc->sc_num_queues; i++) {
394  *			ifq = ifp->if_ifqs[i];
395  *			ifq_clr_oactive(ifq);
396  *		}
397  *	}
398  *
399  */
400 
401 struct ifq_ops {
402 	unsigned int		 (*ifqop_idx)(unsigned int,
403 				    const struct mbuf *);
404 	struct mbuf		*(*ifqop_enq)(struct ifqueue *, struct mbuf *);
405 	struct mbuf		*(*ifqop_deq_begin)(struct ifqueue *, void **);
406 	void			 (*ifqop_deq_commit)(struct ifqueue *,
407 				    struct mbuf *, void *);
408 	void			 (*ifqop_purge)(struct ifqueue *,
409 				    struct mbuf_list *);
410 	void			*(*ifqop_alloc)(unsigned int, void *);
411 	void			 (*ifqop_free)(unsigned int, void *);
412 };
413 
414 extern const struct ifq_ops * const ifq_priq_ops;
415 
416 /*
417  * Interface send queues.
418  */
419 
420 void		 ifq_init(struct ifqueue *, struct ifnet *, unsigned int);
421 void		 ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
422 void		 ifq_destroy(struct ifqueue *);
423 void		 ifq_add_data(struct ifqueue *, struct if_data *);
424 int		 ifq_enqueue(struct ifqueue *, struct mbuf *);
425 void		 ifq_start(struct ifqueue *);
426 struct mbuf	*ifq_deq_begin(struct ifqueue *);
427 void		 ifq_deq_commit(struct ifqueue *, struct mbuf *);
428 void		 ifq_deq_rollback(struct ifqueue *, struct mbuf *);
429 struct mbuf	*ifq_dequeue(struct ifqueue *);
430 int		 ifq_hdatalen(struct ifqueue *);
431 void		 ifq_mfreem(struct ifqueue *, struct mbuf *);
432 void		 ifq_mfreeml(struct ifqueue *, struct mbuf_list *);
433 unsigned int	 ifq_purge(struct ifqueue *);
434 void		*ifq_q_enter(struct ifqueue *, const struct ifq_ops *);
435 void		 ifq_q_leave(struct ifqueue *, void *);
436 void		 ifq_serialize(struct ifqueue *, struct task *);
437 void		 ifq_barrier(struct ifqueue *);
438 
439 
440 int		 ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int,
441 		     const char *, volatile unsigned int *,
442 		     volatile unsigned int *);
443 
444 #define	ifq_len(_ifq)			((_ifq)->ifq_len)
445 #define	ifq_empty(_ifq)			(ifq_len(_ifq) == 0)
446 #define	ifq_set_maxlen(_ifq, _l)	((_ifq)->ifq_maxlen = (_l))
447 
448 static inline int
449 ifq_is_priq(struct ifqueue *ifq)
450 {
451 	return (ifq->ifq_ops == ifq_priq_ops);
452 }
453 
454 static inline void
455 ifq_set_oactive(struct ifqueue *ifq)
456 {
457 	ifq->ifq_oactive = 1;
458 }
459 
460 static inline void
461 ifq_clr_oactive(struct ifqueue *ifq)
462 {
463 	ifq->ifq_oactive = 0;
464 }
465 
466 static inline unsigned int
467 ifq_is_oactive(struct ifqueue *ifq)
468 {
469 	return (ifq->ifq_oactive);
470 }
471 
472 static inline void
473 ifq_restart(struct ifqueue *ifq)
474 {
475 	ifq_serialize(ifq, &ifq->ifq_restart);
476 }
477 
478 static inline unsigned int
479 ifq_idx(struct ifqueue *ifq, unsigned int nifqs, const struct mbuf *m)
480 {
481 	return ((*ifq->ifq_ops->ifqop_idx)(nifqs, m));
482 }
483 
484 /* ifiq */
485 
486 void		 ifiq_init(struct ifiqueue *, struct ifnet *, unsigned int);
487 void		 ifiq_destroy(struct ifiqueue *);
488 int		 ifiq_input(struct ifiqueue *, struct mbuf_list *);
489 int		 ifiq_enqueue(struct ifiqueue *, struct mbuf *);
490 void		 ifiq_add_data(struct ifiqueue *, struct if_data *);
491 
492 #define	ifiq_len(_ifiq)			ml_len(&(_ifiq)->ifiq_ml)
493 #define	ifiq_empty(_ifiq)		ml_empty(&(_ifiq)->ifiq_ml)
494 
495 #endif /* _KERNEL */
496 
497 #endif /* _NET_IFQ_H_ */
498