xref: /openbsd/sys/net/ifq.h (revision 8ac498c1)
1*8ac498c1Sdlg /*	$OpenBSD: ifq.h,v 1.4 2015/12/29 12:35:43 dlg Exp $ */
244430e21Sdlg 
344430e21Sdlg /*
444430e21Sdlg  * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
544430e21Sdlg  *
644430e21Sdlg  * Permission to use, copy, modify, and distribute this software for any
744430e21Sdlg  * purpose with or without fee is hereby granted, provided that the above
844430e21Sdlg  * copyright notice and this permission notice appear in all copies.
944430e21Sdlg  *
1044430e21Sdlg  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1144430e21Sdlg  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1244430e21Sdlg  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1344430e21Sdlg  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1444430e21Sdlg  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1544430e21Sdlg  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1644430e21Sdlg  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1744430e21Sdlg  */
1844430e21Sdlg 
1944430e21Sdlg #ifndef _NET_IFQ_H_
2044430e21Sdlg #define _NET_IFQ_H_
2144430e21Sdlg 
2244430e21Sdlg struct ifnet;
2344430e21Sdlg 
2444430e21Sdlg struct ifq_ops;
2544430e21Sdlg 
2644430e21Sdlg struct ifqueue {
2783d8135eSdlg 	struct ifnet		*ifq_if;
2883d8135eSdlg 
2983d8135eSdlg 	/* mbuf handling */
3044430e21Sdlg 	struct mutex		 ifq_mtx;
3144430e21Sdlg 	uint64_t		 ifq_drops;
3244430e21Sdlg 	const struct ifq_ops	*ifq_ops;
3344430e21Sdlg 	void			*ifq_q;
3444430e21Sdlg 	unsigned int		 ifq_len;
3544430e21Sdlg 	unsigned int		 ifq_oactive;
3644430e21Sdlg 
3783d8135eSdlg 	/* work serialisation */
3883d8135eSdlg 	struct mutex		 ifq_task_mtx;
3983d8135eSdlg 	struct task_list	 ifq_task_list;
40*8ac498c1Sdlg 	void			*ifq_serializer;
4183d8135eSdlg 
4283d8135eSdlg 	/* work to be serialised */
4383d8135eSdlg 	struct task		 ifq_start;
4483d8135eSdlg 	struct task		 ifq_restart;
4583d8135eSdlg 
4644430e21Sdlg 	unsigned int		 ifq_maxlen;
4744430e21Sdlg };
4844430e21Sdlg 
4944430e21Sdlg #ifdef _KERNEL
5044430e21Sdlg 
5144430e21Sdlg #define IFQ_MAXLEN		256
5244430e21Sdlg 
53edc1be42Sdlg /*
54edc1be42Sdlg  *
55edc1be42Sdlg  * Interface Send Queues
56edc1be42Sdlg  *
57edc1be42Sdlg  * struct ifqueue sits between the network stack and a drivers
58edc1be42Sdlg  * transmission of packets. The high level view is that when the stack
59edc1be42Sdlg  * has finished generating a packet it hands it to a driver for
60edc1be42Sdlg  * transmission. It does this by queueing the packet on an ifqueue and
61edc1be42Sdlg  * notifying the driver to start transmission of the queued packets.
62edc1be42Sdlg  *
63edc1be42Sdlg  * struct ifqueue also provides the point where conditioning of
64edc1be42Sdlg  * traffic (ie, priq and hfsc) is implemented, and provides some
65edc1be42Sdlg  * infrastructure to assist in the implementation of network drivers.
66edc1be42Sdlg  *
67edc1be42Sdlg  * = ifq API
68edc1be42Sdlg  *
69edc1be42Sdlg  * The ifq API provides functions for three distinct consumers:
70edc1be42Sdlg  *
71edc1be42Sdlg  * 1. The network stack
72edc1be42Sdlg  * 2. Traffic QoS/conditioning implementations
73edc1be42Sdlg  * 3. Network drivers
74edc1be42Sdlg  *
75edc1be42Sdlg  * == Network Stack API
76edc1be42Sdlg  *
77edc1be42Sdlg  * The network stack is responsible for initialising and destroying
78edc1be42Sdlg  * the ifqueue structure, changing the traffic conditioner on an
79edc1be42Sdlg  * interface queue, enqueuing packets for transmission, and notifying
80edc1be42Sdlg  * the driver to start transmission.
81edc1be42Sdlg  *
82edc1be42Sdlg  * === ifq_init()
83edc1be42Sdlg  *
84edc1be42Sdlg  * During if_attach(), the network stack calls ifq_init to initialise
85edc1be42Sdlg  * the ifqueue structure. By default it configures the priq traffic
86edc1be42Sdlg  * conditioner.
87edc1be42Sdlg  *
88edc1be42Sdlg  * === ifq_destroy()
89edc1be42Sdlg  *
90edc1be42Sdlg  * The network stack calls ifq_destroy() during if_detach to tear down
91edc1be42Sdlg  * the ifqueue structure. It frees the traffic conditioner state, and
92edc1be42Sdlg  * frees any mbufs that were left queued.
93edc1be42Sdlg  *
94edc1be42Sdlg  * === ifq_attach()
95edc1be42Sdlg  *
96edc1be42Sdlg  * ifq_attach() is used to replace the current traffic conditioner on
97edc1be42Sdlg  * the ifqueue. All the pending mbufs are removed from the previous
98edc1be42Sdlg  * conditioner and requeued on the new.
99edc1be42Sdlg  *
100edc1be42Sdlg  * === ifq_enqueue() and ifq_enqueue_try()
101edc1be42Sdlg  *
102edc1be42Sdlg  * ifq_enqueue() and ifq_enqueue_try() attempt to fit an mbuf onto the
103edc1be42Sdlg  * ifqueue. If the current traffic conditioner rejects the packet it
104edc1be42Sdlg  * wont be queued and will be counted as a drop. ifq_enqueue() will
105edc1be42Sdlg  * free the mbuf on the callers behalf if the packet is rejected.
106edc1be42Sdlg  * ifq_enqueue_try() does not free the mbuf, allowing the caller to
107edc1be42Sdlg  * reuse it.
108edc1be42Sdlg  *
109edc1be42Sdlg  * === ifq_start()
110edc1be42Sdlg  *
111edc1be42Sdlg  * Once a packet has been successfully queued with ifq_enqueue() or
112edc1be42Sdlg  * ifq_enqueue_try(), the network card is notified with a call to
113edc1be42Sdlg  * if_start(). If an interface is marked with IFXF_MPSAFE in its
114edc1be42Sdlg  * if_xflags field, if_start() calls ifq_start() to dispatch the
115edc1be42Sdlg  * interfaces start routine. Calls to ifq_start() run in the ifqueue
116edc1be42Sdlg  * serialisation context, guaranteeing that only one instance of
117edc1be42Sdlg  * ifp->if_start() will be running in the system at any point in time.
118edc1be42Sdlg  *
119edc1be42Sdlg  *
120edc1be42Sdlg  * == Traffic conditioners API
121edc1be42Sdlg  *
122edc1be42Sdlg  * The majority of interaction between struct ifqueue and a traffic
123edc1be42Sdlg  * conditioner occurs via the callbacks a traffic conditioner provides
124edc1be42Sdlg  * in an instance of struct ifq_ops.
125edc1be42Sdlg  *
126edc1be42Sdlg  * XXX document ifqop_*
127edc1be42Sdlg  *
128edc1be42Sdlg  * The ifqueue API implements the locking on behalf of the conditioning
129edc1be42Sdlg  * implementations so conditioners only have to reject or keep mbufs.
130edc1be42Sdlg  * If something needs to inspect a conditioners internals, the queue lock
131edc1be42Sdlg  * needs to be taken to allow for a consistent or safe view. The queue
132edc1be42Sdlg  * lock may be taken and released with ifq_q_enter() and ifq_q_leave().
133edc1be42Sdlg  *
134edc1be42Sdlg  * === ifq_q_enter()
135edc1be42Sdlg  *
136edc1be42Sdlg  * Code wishing to access a conditioners internals may take the queue
137edc1be42Sdlg  * lock with ifq_q_enter(). The caller must pass a reference to the
138edc1be42Sdlg  * conditioners ifq_ops structure so the infrastructure can ensure the
139edc1be42Sdlg  * caller is able to understand the internals. ifq_q_enter() returns
140edc1be42Sdlg  * a pointer to the conditions internal structures, or NULL if the
141edc1be42Sdlg  * ifq_ops did not match the current conditioner.
142edc1be42Sdlg  *
143edc1be42Sdlg  * === ifq_q_leave()
144edc1be42Sdlg  *
145edc1be42Sdlg  * The queue lock acquired with ifq_q_enter() is released with
146edc1be42Sdlg  * ifq_q_leave().
147edc1be42Sdlg  *
148edc1be42Sdlg  *
149edc1be42Sdlg  * == Network Driver API
150edc1be42Sdlg  *
151edc1be42Sdlg  * The API used by network drivers is mostly documented in the
152*8ac498c1Sdlg  * ifq_dequeue(9) manpage except for ifq_serialize(),
153*8ac498c1Sdlg  * ifq_is_serialized(), and IFQ_ASSERT_SERIALIZED().
154edc1be42Sdlg  *
155edc1be42Sdlg  * === ifq_serialize()
156edc1be42Sdlg  *
157edc1be42Sdlg  * A driver may run arbitrary work in the ifqueue serialiser context
158edc1be42Sdlg  * via ifq_serialize(). The work to be done is represented by a task
159edc1be42Sdlg  * that has been prepared with task_set.
160edc1be42Sdlg  *
161edc1be42Sdlg  * The work will be run in series with any other work dispatched by
162edc1be42Sdlg  * ifq_start(), ifq_restart(), or other ifq_serialize() calls.
163edc1be42Sdlg  *
164edc1be42Sdlg  * Because the work may be run on another CPU, the lifetime of the
165edc1be42Sdlg  * task and the work it represents can extend beyond the end of the
166edc1be42Sdlg  * call to ifq_serialize() that dispatched it.
167edc1be42Sdlg  *
168*8ac498c1Sdlg  * === ifq_is_serialized()
169*8ac498c1Sdlg  *
170*8ac498c1Sdlg  * This function returns whether the caller is currently within the
171*8ac498c1Sdlg  * ifqueue serializer context.
172*8ac498c1Sdlg  *
173*8ac498c1Sdlg  * === IFQ_ASSERT_SERIALIZED()
174*8ac498c1Sdlg  *
175*8ac498c1Sdlg  * This macro will assert that the caller is currently within the
176*8ac498c1Sdlg  * specified ifqueue serialiser context.
177*8ac498c1Sdlg  *
178edc1be42Sdlg  *
179edc1be42Sdlg  * = ifqueue work serialisation
180edc1be42Sdlg  *
181edc1be42Sdlg  * ifqueues provide a mechanism to dispatch work to be run in a single
182edc1be42Sdlg  * context. Work in this mechanism is represtented by task structures.
183edc1be42Sdlg  *
184edc1be42Sdlg  * The tasks are run in a context similar to a taskq serviced by a
185edc1be42Sdlg  * single kernel thread, except the work is run immediately by the
186edc1be42Sdlg  * first CPU that dispatches work. If a second CPU attempts to dispatch
187edc1be42Sdlg  * additional tasks while the first is still running, it will be queued
188edc1be42Sdlg  * to be run by the first CPU. The second CPU will return immediately.
189edc1be42Sdlg  *
190edc1be42Sdlg  * = MP Safe Network Drivers
191edc1be42Sdlg  *
192edc1be42Sdlg  * An MP safe network driver is one in which its start routine can be
193edc1be42Sdlg  * called by the network stack without holding the big kernel lock.
194edc1be42Sdlg  *
195edc1be42Sdlg  * == Attach
196edc1be42Sdlg  *
197edc1be42Sdlg  * A driver advertises it's ability to run its start routine by setting
198edc1be42Sdlg  * the IFXF_MPSAFE flag in ifp->if_xflags before calling if_attach():
199edc1be42Sdlg  *
200edc1be42Sdlg  * 	ifp->if_xflags = IFXF_MPSAFE;
201edc1be42Sdlg  * 	ifp->if_start = drv_start;
202edc1be42Sdlg  * 	if_attach(ifp);
203edc1be42Sdlg  *
204edc1be42Sdlg  * The network stack will then wrap its calls to ifp->if_start with
205edc1be42Sdlg  * ifq_start() to guarantee there is only one instance of that function
206edc1be42Sdlg  * running in the system and to serialise it with other work the driver
207edc1be42Sdlg  * may provide.
208edc1be42Sdlg  *
209edc1be42Sdlg  * == Initialise
210edc1be42Sdlg  *
211edc1be42Sdlg  * When the stack requests an interface be brought up (ie, drv_ioctl()
212edc1be42Sdlg  * is called to handle SIOCSIFFLAGS with IFF_UP set in ifp->if_flags)
213edc1be42Sdlg  * drivers should set IFF_RUNNING in ifp->if_flags and call
214edc1be42Sdlg  * ifq_clr_oactive().
215edc1be42Sdlg  *
216edc1be42Sdlg  * == if_start
217edc1be42Sdlg  *
218edc1be42Sdlg  * ifq_start() checks that IFF_RUNNING is set in ifp->if_flags, that
219edc1be42Sdlg  * ifq_is_oactive() does not return true, and that there are pending
220edc1be42Sdlg  * packets to transmit via a call to ifq_len(). Therefore, drivers are
221edc1be42Sdlg  * no longer responsible for doing this themselves.
222edc1be42Sdlg  *
223edc1be42Sdlg  * If a driver should not transmit packets while its link is down, use
224edc1be42Sdlg  * ifq_purge() to flush pending packets from the transmit queue.
225edc1be42Sdlg  *
226edc1be42Sdlg  * Drivers for hardware should use the following pattern to transmit
227edc1be42Sdlg  * packets:
228edc1be42Sdlg  *
229edc1be42Sdlg  * 	void
230edc1be42Sdlg  * 	drv_start(struct ifnet *ifp)
231edc1be42Sdlg  * 	{
232edc1be42Sdlg  * 		struct drv_softc *sc = ifp->if_softc;
233edc1be42Sdlg  * 		struct mbuf *m;
234edc1be42Sdlg  * 		int kick = 0;
235edc1be42Sdlg  *
236edc1be42Sdlg  * 		if (NO_LINK) {
237edc1be42Sdlg  * 			ifq_purge(&ifp->if_snd);
238edc1be42Sdlg  * 			return;
239edc1be42Sdlg  * 		}
240edc1be42Sdlg  *
241edc1be42Sdlg  * 		for (;;) {
242edc1be42Sdlg  * 			if (NO_SPACE) {
243edc1be42Sdlg  * 				ifq_set_oactive(&ifp->if_snd);
244edc1be42Sdlg  * 				break;
245edc1be42Sdlg  * 			}
246edc1be42Sdlg  *
247edc1be42Sdlg  * 			m = ifq_dequeue(&ifp->if_snd);
248edc1be42Sdlg  * 			if (m == NULL)
249edc1be42Sdlg  * 				break;
250edc1be42Sdlg  *
251edc1be42Sdlg  * 			if (drv_encap(sc, m) != 0) { // map and fill ring
252edc1be42Sdlg  * 				m_freem(m);
253edc1be42Sdlg  * 				continue;
254edc1be42Sdlg  * 			}
255edc1be42Sdlg  *
256edc1be42Sdlg  * 			bpf_mtap();
257edc1be42Sdlg  * 		}
258edc1be42Sdlg  *
259edc1be42Sdlg  *  		drv_kick(sc); // notify hw of new descriptors on the ring
260edc1be42Sdlg  * 	 }
261edc1be42Sdlg  *
262edc1be42Sdlg  * == Transmission completion
263edc1be42Sdlg  *
264edc1be42Sdlg  * The following pattern should be used for transmit queue interrupt
265edc1be42Sdlg  * processing:
266edc1be42Sdlg  *
267edc1be42Sdlg  * 	void
268edc1be42Sdlg  * 	drv_txeof(struct drv_softc *sc)
269edc1be42Sdlg  * 	{
270edc1be42Sdlg  * 		struct ifnet *ifp = &sc->sc_if;
271edc1be42Sdlg  *
272edc1be42Sdlg  * 		while (COMPLETED_PKTS) {
273edc1be42Sdlg  * 			// unmap packets, m_freem() the mbufs.
274edc1be42Sdlg  * 		}
275edc1be42Sdlg  *
276edc1be42Sdlg  * 		if (ifq_is_oactive(&ifp->if_snd))
277edc1be42Sdlg  * 			ifq_restart(&ifp->if_snd);
278edc1be42Sdlg  * 	}
279edc1be42Sdlg  *
280edc1be42Sdlg  * == Stop
281edc1be42Sdlg  *
282edc1be42Sdlg  * Bringing an interface down (ie, IFF_UP was cleared in ifp->if_flags)
283edc1be42Sdlg  * should clear IFF_RUNNING in ifp->if_flags, and guarantee the start
284edc1be42Sdlg  * routine is not running before freeing any resources it uses:
285edc1be42Sdlg  *
286edc1be42Sdlg  * 	void
287edc1be42Sdlg  * 	drv_down(struct drv_softc *sc)
288edc1be42Sdlg  * 	{
289edc1be42Sdlg  * 		struct ifnet *ifp = &sc->sc_if;
290edc1be42Sdlg  *
291edc1be42Sdlg  * 		CLR(ifp->if_flags, IFF_RUNNING);
292edc1be42Sdlg  * 		DISABLE_INTERRUPTS();
293edc1be42Sdlg  *
294edc1be42Sdlg  * 		ifq_barrier(&ifp->if_snd);
295edc1be42Sdlg  * 		intr_barrier(sc->sc_ih);
296edc1be42Sdlg  *
297edc1be42Sdlg  * 		FREE_RESOURCES();
298edc1be42Sdlg  *
299edc1be42Sdlg  * 		ifq_clr_oactive();
300edc1be42Sdlg  * 	}
301edc1be42Sdlg  *
302edc1be42Sdlg  */
303edc1be42Sdlg 
30444430e21Sdlg struct ifq_ops {
30544430e21Sdlg 	void			*(*ifqop_alloc)(void *);
30644430e21Sdlg 	void			 (*ifqop_free)(void *);
30744430e21Sdlg 	int			 (*ifqop_enq)(struct ifqueue *, struct mbuf *);
30844430e21Sdlg 	struct mbuf 		*(*ifqop_deq_begin)(struct ifqueue *, void **);
30944430e21Sdlg 	void			 (*ifqop_deq_commit)(struct ifqueue *,
31044430e21Sdlg 				    struct mbuf *, void *);
31144430e21Sdlg 	void	 		 (*ifqop_purge)(struct ifqueue *,
31244430e21Sdlg 				    struct mbuf_list *);
31344430e21Sdlg };
31444430e21Sdlg 
31544430e21Sdlg /*
31644430e21Sdlg  * Interface send queues.
31744430e21Sdlg  */
31844430e21Sdlg 
31983d8135eSdlg void		 ifq_init(struct ifqueue *, struct ifnet *);
32044430e21Sdlg void		 ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
32144430e21Sdlg void		 ifq_destroy(struct ifqueue *);
32244430e21Sdlg int		 ifq_enqueue_try(struct ifqueue *, struct mbuf *);
32344430e21Sdlg int		 ifq_enqueue(struct ifqueue *, struct mbuf *);
32444430e21Sdlg struct mbuf	*ifq_deq_begin(struct ifqueue *);
32544430e21Sdlg void		 ifq_deq_commit(struct ifqueue *, struct mbuf *);
32644430e21Sdlg void		 ifq_deq_rollback(struct ifqueue *, struct mbuf *);
32744430e21Sdlg struct mbuf	*ifq_dequeue(struct ifqueue *);
32844430e21Sdlg unsigned int	 ifq_purge(struct ifqueue *);
32944430e21Sdlg void		*ifq_q_enter(struct ifqueue *, const struct ifq_ops *);
33044430e21Sdlg void		 ifq_q_leave(struct ifqueue *, void *);
33183d8135eSdlg void		 ifq_serialize(struct ifqueue *, struct task *);
332*8ac498c1Sdlg int		 ifq_is_serialized(struct ifqueue *);
33383d8135eSdlg void		 ifq_barrier(struct ifqueue *);
33444430e21Sdlg 
33544430e21Sdlg #define	ifq_len(_ifq)			((_ifq)->ifq_len)
33644430e21Sdlg #define	ifq_empty(_ifq)			(ifq_len(_ifq) == 0)
33744430e21Sdlg #define	ifq_set_maxlen(_ifq, _l)	((_ifq)->ifq_maxlen = (_l))
33844430e21Sdlg 
33944430e21Sdlg static inline void
34044430e21Sdlg ifq_set_oactive(struct ifqueue *ifq)
34144430e21Sdlg {
34244430e21Sdlg 	ifq->ifq_oactive = 1;
34344430e21Sdlg }
34444430e21Sdlg 
34544430e21Sdlg static inline void
34644430e21Sdlg ifq_clr_oactive(struct ifqueue *ifq)
34744430e21Sdlg {
34844430e21Sdlg 	ifq->ifq_oactive = 0;
34944430e21Sdlg }
35044430e21Sdlg 
35144430e21Sdlg static inline unsigned int
35244430e21Sdlg ifq_is_oactive(struct ifqueue *ifq)
35344430e21Sdlg {
35444430e21Sdlg 	return (ifq->ifq_oactive);
35544430e21Sdlg }
35644430e21Sdlg 
35783d8135eSdlg static inline void
35883d8135eSdlg ifq_start(struct ifqueue *ifq)
35983d8135eSdlg {
36083d8135eSdlg 	ifq_serialize(ifq, &ifq->ifq_start);
36183d8135eSdlg }
36283d8135eSdlg 
36383d8135eSdlg static inline void
36483d8135eSdlg ifq_restart(struct ifqueue *ifq)
36583d8135eSdlg {
36683d8135eSdlg 	ifq_serialize(ifq, &ifq->ifq_restart);
36783d8135eSdlg }
36883d8135eSdlg 
369*8ac498c1Sdlg #define IFQ_ASSERT_SERIALIZED(_ifq)	KASSERT(ifq_is_serialized(_ifq))
370*8ac498c1Sdlg 
37144430e21Sdlg extern const struct ifq_ops * const ifq_priq_ops;
37244430e21Sdlg 
37344430e21Sdlg #endif /* _KERNEL */
37444430e21Sdlg 
37544430e21Sdlg #endif /* _NET_IFQ_H_ */
376