1 /* $OpenBSD: ifq.h,v 1.41 2023/11/10 15:51:24 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #ifndef _NET_IFQ_H_ 20 #define _NET_IFQ_H_ 21 22 struct ifnet; 23 struct kstat; 24 25 struct ifq_ops; 26 27 struct ifqueue { 28 struct ifnet *ifq_if; 29 struct taskq *ifq_softnet; 30 union { 31 void *_ifq_softc; 32 /* 33 * a rings sndq is found by looking up an array of pointers. 34 * by default we only have one sndq and the default drivers 35 * dont use ifq_softc, so we can borrow it for the map until 36 * we need to allocate a proper map. 37 */ 38 struct ifqueue *_ifq_ifqs[1]; 39 } _ifq_ptr; 40 #define ifq_softc _ifq_ptr._ifq_softc 41 #define ifq_ifqs _ifq_ptr._ifq_ifqs 42 43 /* mbuf handling */ 44 struct mutex ifq_mtx; 45 const struct ifq_ops *ifq_ops; 46 void *ifq_q; 47 struct mbuf_list ifq_free; 48 unsigned int ifq_len; 49 unsigned int ifq_oactive; 50 51 /* statistics */ 52 uint64_t ifq_packets; 53 uint64_t ifq_bytes; 54 uint64_t ifq_qdrops; 55 uint64_t ifq_errors; 56 uint64_t ifq_mcasts; 57 uint32_t ifq_oactives; 58 59 struct kstat *ifq_kstat; 60 61 /* work serialisation */ 62 struct mutex ifq_task_mtx; 63 struct task_list ifq_task_list; 64 void *ifq_serializer; 65 struct task ifq_bundle; 66 67 /* work to be serialised */ 68 struct task ifq_start; 69 struct task ifq_restart; 70 71 /* properties */ 72 unsigned int ifq_maxlen; 73 unsigned int ifq_idx; 74 }; 75 76 struct ifiqueue { 77 struct ifnet *ifiq_if; 78 struct taskq *ifiq_softnet; 79 union { 80 void *_ifiq_softc; 81 struct ifiqueue *_ifiq_ifiqs[1]; 82 } _ifiq_ptr; 83 #define ifiq_softc _ifiq_ptr._ifiq_softc 84 #define ifiq_ifiqs _ifiq_ptr._ifiq_ifiqs 85 86 struct mutex ifiq_mtx; 87 struct mbuf_list ifiq_ml; 88 struct task ifiq_task; 89 unsigned int ifiq_pressure; 90 91 /* counters */ 92 uint64_t ifiq_packets; 93 uint64_t ifiq_bytes; 94 uint64_t ifiq_fdrops; 95 uint64_t ifiq_qdrops; 96 uint64_t ifiq_errors; 97 uint64_t ifiq_mcasts; 98 uint64_t ifiq_noproto; 99 100 /* number of times a list of packets were put on ifiq_ml */ 101 uint64_t ifiq_enqueues; 102 /* number of times a list of packets were pulled off ifiq_ml */ 103 uint64_t ifiq_dequeues; 104 105 struct kstat *ifiq_kstat; 106 107 /* properties */ 108 unsigned int ifiq_idx; 109 }; 110 111 #ifdef _KERNEL 112 113 #define IFQ_MAXLEN 256 114 115 /* 116 * 117 * Interface Send Queues 118 * 119 * struct ifqueue sits between the network stack and a drivers 120 * transmission of packets. The high level view is that when the stack 121 * has finished generating a packet it hands it to a driver for 122 * transmission. It does this by queueing the packet on an ifqueue and 123 * notifying the driver to start transmission of the queued packets. 124 * 125 * A network device may have multiple contexts for the transmission 126 * of packets, ie, independent transmit rings. Such a network device, 127 * represented by a struct ifnet, would then have multiple ifqueue 128 * structures, each of which maps to an independent transmit ring. 129 * 130 * struct ifqueue also provides the point where conditioning of 131 * traffic (ie, priq and hfsc) is implemented, and provides some 132 * infrastructure to assist in the implementation of network drivers. 133 * 134 * = ifq API 135 * 136 * The ifq API provides functions for three distinct consumers: 137 * 138 * 1. The network stack 139 * 2. Traffic QoS/conditioning implementations 140 * 3. Network drivers 141 * 142 * == Network Stack API 143 * 144 * The network stack is responsible for initialising and destroying 145 * the ifqueue structures, changing the traffic conditioner on an 146 * interface, enqueuing packets for transmission, and notifying 147 * the driver to start transmission of a particular ifqueue. 148 * 149 * === ifq_init() 150 * 151 * During if_attach(), the network stack calls ifq_init to initialise 152 * the ifqueue structure. By default it configures the priq traffic 153 * conditioner. 154 * 155 * === ifq_destroy() 156 * 157 * The network stack calls ifq_destroy() during if_detach to tear down 158 * the ifqueue structure. It frees the traffic conditioner state, and 159 * frees any mbufs that were left queued. 160 * 161 * === ifq_attach() 162 * 163 * ifq_attach() is used to replace the current traffic conditioner on 164 * the ifqueue. All the pending mbufs are removed from the previous 165 * conditioner and requeued on the new. 166 * 167 * === ifq_idx() 168 * 169 * ifq_idx() selects a specific ifqueue from the current ifnet 170 * structure for use in the transmission of the mbuf. 171 * 172 * === ifq_enqueue() 173 * 174 * ifq_enqueue() attempts to fit an mbuf onto the ifqueue. The 175 * current traffic conditioner may drop a packet to make space on the 176 * queue. 177 * 178 * === ifq_start() 179 * 180 * Once a packet has been successfully queued with ifq_enqueue(), 181 * the network card is notified with a call to ifq_start(). 182 * Calls to ifq_start() run in the ifqueue serialisation context, 183 * guaranteeing that only one instance of ifp->if_qstart() will be 184 * running on behalf of a specific ifqueue in the system at any point 185 * in time. 186 * 187 * == Traffic conditioners API 188 * 189 * The majority of interaction between struct ifqueue and a traffic 190 * conditioner occurs via the callbacks a traffic conditioner provides 191 * in an instance of struct ifq_ops. 192 * 193 * XXX document ifqop_* 194 * 195 * The ifqueue API implements the locking on behalf of the conditioning 196 * implementations so conditioners only have to reject or keep mbufs. 197 * If something needs to inspect a conditioners internals, the queue lock 198 * needs to be taken to allow for a consistent or safe view. The queue 199 * lock may be taken and released with ifq_q_enter() and ifq_q_leave(). 200 * 201 * === ifq_q_enter() 202 * 203 * Code wishing to access a conditioners internals may take the queue 204 * lock with ifq_q_enter(). The caller must pass a reference to the 205 * conditioners ifq_ops structure so the infrastructure can ensure the 206 * caller is able to understand the internals. ifq_q_enter() returns 207 * a pointer to the conditioners internal structures, or NULL if the 208 * ifq_ops did not match the current conditioner. 209 * 210 * === ifq_q_leave() 211 * 212 * The queue lock acquired with ifq_q_enter() is released with 213 * ifq_q_leave(). 214 * 215 * === ifq_mfreem() and ifq_mfreeml() 216 * 217 * A goal of the API is to avoid freeing an mbuf while mutexes are 218 * held. Because the ifq API manages the lock on behalf of the backend 219 * ifqops, the backend should not directly free mbufs. If a conditioner 220 * backend needs to drop a packet during the handling of ifqop_deq_begin, 221 * it may free it by calling ifq_mfreem(). This accounts for the drop, 222 * and schedules the free of the mbuf outside the hold of ifq_mtx. 223 * ifq_mfreeml() takes an mbuf list as an argument instead. 224 * 225 * 226 * == Network Driver API 227 * 228 * The API used by network drivers is mostly documented in the 229 * ifq_dequeue(9) manpage except for ifq_serialize(). 230 * 231 * === ifq_serialize() 232 * 233 * A driver may run arbitrary work in the ifqueue serialiser context 234 * via ifq_serialize(). The work to be done is represented by a task 235 * that has been prepared with task_set. 236 * 237 * The work will be run in series with any other work dispatched by 238 * ifq_start(), ifq_restart(), or other ifq_serialize() calls. 239 * 240 * Because the work may be run on another CPU, the lifetime of the 241 * task and the work it represents can extend beyond the end of the 242 * call to ifq_serialize() that dispatched it. 243 * 244 * 245 * = ifqueue work serialisation 246 * 247 * ifqueues provide a mechanism to dispatch work to be run in a single 248 * context. Work in this mechanism is represented by task structures. 249 * 250 * The tasks are run in a context similar to a taskq serviced by a 251 * single kernel thread, except the work is run immediately by the 252 * first CPU that dispatches work. If a second CPU attempts to dispatch 253 * additional tasks while the first is still running, it will be queued 254 * to be run by the first CPU. The second CPU will return immediately. 255 * 256 * = MP Safe Network Drivers 257 * 258 * An MP safe network driver is one in which its start routine can be 259 * called by the network stack without holding the big kernel lock. 260 * 261 * == Attach 262 * 263 * A driver advertises its ability to run its start routine without 264 * the kernel lock by setting the IFXF_MPSAFE flag in ifp->if_xflags 265 * before calling if_attach(). Advertising an MPSAFE start routine 266 * also implies that the driver understands that a network card can 267 * have multiple rings or transmit queues, and therefore provides 268 * if_qstart function (which takes an ifqueue pointer) instead of an 269 * if_start function (which takes an ifnet pointer). 270 * 271 * If the hardware supports multiple transmit rings, it advertises 272 * support for multiple rings to the network stack with if_attach_queues() 273 * after the call to if_attach(). if_attach_queues allocates a struct 274 * ifqueue for each hardware ring, which can then be initialised by 275 * the driver with data for each ring. 276 * 277 * void drv_start(struct ifqueue *); 278 * 279 * void 280 * drv_attach() 281 * { 282 * ... 283 * ifp->if_xflags = IFXF_MPSAFE; 284 * ifp->if_qstart = drv_start; 285 * if_attach(ifp); 286 * 287 * if_attach_queues(ifp, DRV_NUM_TX_RINGS); 288 * for (i = 0; i < DRV_NUM_TX_RINGS; i++) { 289 * struct ifqueue *ifq = ifp->if_ifqs[i]; 290 * struct drv_tx_ring *ring = &sc->sc_tx_rings[i]; 291 * 292 * ifq->ifq_softc = ring; 293 * ring->ifq = ifq; 294 * } 295 * } 296 * 297 * The network stack will then call ifp->if_qstart via ifq_start() 298 * to guarantee there is only one instance of that function running 299 * for each ifq in the system, and to serialise it with other work 300 * the driver may provide. 301 * 302 * == Initialise 303 * 304 * When the stack requests an interface be brought up (ie, drv_ioctl() 305 * is called to handle SIOCSIFFLAGS with IFF_UP set in ifp->if_flags) 306 * drivers should set IFF_RUNNING in ifp->if_flags, and then call 307 * ifq_clr_oactive() against each ifq. 308 * 309 * == if_start 310 * 311 * ifq_start() checks that IFF_RUNNING is set in ifp->if_flags, that 312 * ifq_is_oactive() does not return true, and that there are pending 313 * packets to transmit via a call to ifq_len(). Therefore, drivers are 314 * no longer responsible for doing this themselves. 315 * 316 * If a driver should not transmit packets while its link is down, use 317 * ifq_purge() to flush pending packets from the transmit queue. 318 * 319 * Drivers for hardware should use the following pattern to transmit 320 * packets: 321 * 322 * void 323 * drv_start(struct ifqueue *ifq) 324 * { 325 * struct drv_tx_ring *ring = ifq->ifq_softc; 326 * struct ifnet *ifp = ifq->ifq_if; 327 * struct drv_softc *sc = ifp->if_softc; 328 * struct mbuf *m; 329 * int kick = 0; 330 * 331 * if (NO_LINK) { 332 * ifq_purge(ifq); 333 * return; 334 * } 335 * 336 * for (;;) { 337 * if (NO_SPACE(ring)) { 338 * ifq_set_oactive(ifq); 339 * break; 340 * } 341 * 342 * m = ifq_dequeue(ifq); 343 * if (m == NULL) 344 * break; 345 * 346 * if (drv_encap(sc, ring, m) != 0) { // map and fill ring 347 * m_freem(m); 348 * continue; 349 * } 350 * 351 * bpf_mtap(); 352 * } 353 * 354 * drv_kick(ring); // notify hw of new descriptors on the ring 355 * } 356 * 357 * == Transmission completion 358 * 359 * The following pattern should be used for transmit queue interrupt 360 * processing: 361 * 362 * void 363 * drv_txeof(struct drv_tx_ring *ring) 364 * { 365 * struct ifqueue *ifq = ring->ifq; 366 * 367 * while (COMPLETED_PKTS(ring)) { 368 * // unmap packets, m_freem() the mbufs. 369 * } 370 * 371 * if (ifq_is_oactive(ifq)) 372 * ifq_restart(ifq); 373 * } 374 * 375 * == Stop 376 * 377 * Bringing an interface down (ie, IFF_UP was cleared in ifp->if_flags) 378 * should clear IFF_RUNNING in ifp->if_flags, and guarantee the start 379 * routine is not running before freeing any resources it uses: 380 * 381 * void 382 * drv_down(struct drv_softc *sc) 383 * { 384 * struct ifnet *ifp = &sc->sc_if; 385 * struct ifqueue *ifq; 386 * int i; 387 * 388 * CLR(ifp->if_flags, IFF_RUNNING); 389 * DISABLE_INTERRUPTS(); 390 * 391 * for (i = 0; i < sc->sc_num_queues; i++) { 392 * ifq = ifp->if_ifqs[i]; 393 * ifq_barrier(ifq); 394 * } 395 * 396 * intr_barrier(sc->sc_ih); 397 * 398 * FREE_RESOURCES(); 399 * 400 * for (i = 0; i < sc->sc_num_queues; i++) { 401 * ifq = ifp->if_ifqs[i]; 402 * ifq_clr_oactive(ifq); 403 * } 404 * } 405 * 406 */ 407 408 struct ifq_ops { 409 unsigned int (*ifqop_idx)(unsigned int, 410 const struct mbuf *); 411 struct mbuf *(*ifqop_enq)(struct ifqueue *, struct mbuf *); 412 struct mbuf *(*ifqop_deq_begin)(struct ifqueue *, void **); 413 void (*ifqop_deq_commit)(struct ifqueue *, 414 struct mbuf *, void *); 415 void (*ifqop_purge)(struct ifqueue *, 416 struct mbuf_list *); 417 void *(*ifqop_alloc)(unsigned int, void *); 418 void (*ifqop_free)(unsigned int, void *); 419 }; 420 421 extern const struct ifq_ops * const ifq_priq_ops; 422 423 /* 424 * Interface send queues. 425 */ 426 427 void ifq_init(struct ifqueue *, struct ifnet *, unsigned int); 428 void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *); 429 void ifq_destroy(struct ifqueue *); 430 void ifq_add_data(struct ifqueue *, struct if_data *); 431 int ifq_enqueue(struct ifqueue *, struct mbuf *); 432 void ifq_start(struct ifqueue *); 433 struct mbuf *ifq_deq_begin(struct ifqueue *); 434 void ifq_deq_commit(struct ifqueue *, struct mbuf *); 435 void ifq_deq_rollback(struct ifqueue *, struct mbuf *); 436 struct mbuf *ifq_dequeue(struct ifqueue *); 437 int ifq_hdatalen(struct ifqueue *); 438 void ifq_init_maxlen(struct ifqueue *, unsigned int); 439 void ifq_mfreem(struct ifqueue *, struct mbuf *); 440 void ifq_mfreeml(struct ifqueue *, struct mbuf_list *); 441 unsigned int ifq_purge(struct ifqueue *); 442 void *ifq_q_enter(struct ifqueue *, const struct ifq_ops *); 443 void ifq_q_leave(struct ifqueue *, void *); 444 void ifq_serialize(struct ifqueue *, struct task *); 445 void ifq_barrier(struct ifqueue *); 446 void ifq_set_oactive(struct ifqueue *); 447 448 int ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int, 449 const char *, volatile unsigned int *, 450 volatile unsigned int *); 451 452 #define ifq_len(_ifq) READ_ONCE((_ifq)->ifq_len) 453 #define ifq_empty(_ifq) (ifq_len(_ifq) == 0) 454 455 static inline int 456 ifq_is_priq(struct ifqueue *ifq) 457 { 458 return (ifq->ifq_ops == ifq_priq_ops); 459 } 460 461 static inline void 462 ifq_clr_oactive(struct ifqueue *ifq) 463 { 464 ifq->ifq_oactive = 0; 465 } 466 467 static inline unsigned int 468 ifq_is_oactive(struct ifqueue *ifq) 469 { 470 return (ifq->ifq_oactive); 471 } 472 473 static inline void 474 ifq_restart(struct ifqueue *ifq) 475 { 476 ifq_serialize(ifq, &ifq->ifq_restart); 477 } 478 479 static inline unsigned int 480 ifq_idx(struct ifqueue *ifq, unsigned int nifqs, const struct mbuf *m) 481 { 482 return ((*ifq->ifq_ops->ifqop_idx)(nifqs, m)); 483 } 484 485 /* ifiq */ 486 487 void ifiq_init(struct ifiqueue *, struct ifnet *, unsigned int); 488 void ifiq_destroy(struct ifiqueue *); 489 int ifiq_input(struct ifiqueue *, struct mbuf_list *); 490 int ifiq_enqueue(struct ifiqueue *, struct mbuf *); 491 void ifiq_add_data(struct ifiqueue *, struct if_data *); 492 493 #define ifiq_len(_ifiq) READ_ONCE(ml_len(&(_ifiq)->ifiq_ml)) 494 #define ifiq_empty(_ifiq) (ifiq_len(_ifiq) == 0) 495 496 #endif /* _KERNEL */ 497 498 #endif /* _NET_IFQ_H_ */ 499