1 /* $OpenBSD: ifq.h,v 1.33 2021/03/10 10:21:48 jsg Exp $ */ 2 3 /* 4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #ifndef _NET_IFQ_H_ 20 #define _NET_IFQ_H_ 21 22 struct ifnet; 23 struct kstat; 24 25 struct ifq_ops; 26 27 struct ifqueue { 28 struct ifnet *ifq_if; 29 struct taskq *ifq_softnet; 30 union { 31 void *_ifq_softc; 32 /* 33 * a rings sndq is found by looking up an array of pointers. 34 * by default we only have one sndq and the default drivers 35 * dont use ifq_softc, so we can borrow it for the map until 36 * we need to allocate a proper map. 37 */ 38 struct ifqueue *_ifq_ifqs[1]; 39 } _ifq_ptr; 40 #define ifq_softc _ifq_ptr._ifq_softc 41 #define ifq_ifqs _ifq_ptr._ifq_ifqs 42 43 /* mbuf handling */ 44 struct mutex ifq_mtx; 45 const struct ifq_ops *ifq_ops; 46 void *ifq_q; 47 struct mbuf_list ifq_free; 48 unsigned int ifq_len; 49 unsigned int ifq_oactive; 50 51 /* statistics */ 52 uint64_t ifq_packets; 53 uint64_t ifq_bytes; 54 uint64_t ifq_qdrops; 55 uint64_t ifq_errors; 56 uint64_t ifq_mcasts; 57 58 struct kstat *ifq_kstat; 59 60 /* work serialisation */ 61 struct mutex ifq_task_mtx; 62 struct task_list ifq_task_list; 63 void *ifq_serializer; 64 struct task ifq_bundle; 65 66 /* work to be serialised */ 67 struct task ifq_start; 68 struct task ifq_restart; 69 70 /* properties */ 71 unsigned int ifq_maxlen; 72 unsigned int ifq_idx; 73 }; 74 75 struct ifiqueue { 76 struct ifnet *ifiq_if; 77 struct taskq *ifiq_softnet; 78 union { 79 void *_ifiq_softc; 80 struct ifiqueue *_ifiq_ifiqs[1]; 81 } _ifiq_ptr; 82 #define ifiq_softc _ifiq_ptr._ifiq_softc 83 #define ifiq_ifiqs _ifiq_ptr._ifiq_ifiqs 84 85 struct mutex ifiq_mtx; 86 struct mbuf_list ifiq_ml; 87 struct task ifiq_task; 88 unsigned int ifiq_pressure; 89 90 /* counters */ 91 uint64_t ifiq_packets; 92 uint64_t ifiq_bytes; 93 uint64_t ifiq_qdrops; 94 uint64_t ifiq_errors; 95 uint64_t ifiq_mcasts; 96 uint64_t ifiq_noproto; 97 98 struct kstat *ifiq_kstat; 99 100 /* properties */ 101 unsigned int ifiq_idx; 102 }; 103 104 #ifdef _KERNEL 105 106 #define IFQ_MAXLEN 256 107 108 /* 109 * 110 * Interface Send Queues 111 * 112 * struct ifqueue sits between the network stack and a drivers 113 * transmission of packets. The high level view is that when the stack 114 * has finished generating a packet it hands it to a driver for 115 * transmission. It does this by queueing the packet on an ifqueue and 116 * notifying the driver to start transmission of the queued packets. 117 * 118 * A network device may have multiple contexts for the transmission 119 * of packets, ie, independent transmit rings. Such a network device, 120 * represented by a struct ifnet, would then have multiple ifqueue 121 * structures, each of which maps to an independent transmit ring. 122 * 123 * struct ifqueue also provides the point where conditioning of 124 * traffic (ie, priq and hfsc) is implemented, and provides some 125 * infrastructure to assist in the implementation of network drivers. 126 * 127 * = ifq API 128 * 129 * The ifq API provides functions for three distinct consumers: 130 * 131 * 1. The network stack 132 * 2. Traffic QoS/conditioning implementations 133 * 3. Network drivers 134 * 135 * == Network Stack API 136 * 137 * The network stack is responsible for initialising and destroying 138 * the ifqueue structures, changing the traffic conditioner on an 139 * interface, enqueuing packets for transmission, and notifying 140 * the driver to start transmission of a particular ifqueue. 141 * 142 * === ifq_init() 143 * 144 * During if_attach(), the network stack calls ifq_init to initialise 145 * the ifqueue structure. By default it configures the priq traffic 146 * conditioner. 147 * 148 * === ifq_destroy() 149 * 150 * The network stack calls ifq_destroy() during if_detach to tear down 151 * the ifqueue structure. It frees the traffic conditioner state, and 152 * frees any mbufs that were left queued. 153 * 154 * === ifq_attach() 155 * 156 * ifq_attach() is used to replace the current traffic conditioner on 157 * the ifqueue. All the pending mbufs are removed from the previous 158 * conditioner and requeued on the new. 159 * 160 * === ifq_idx() 161 * 162 * ifq_idx() selects a specific ifqueue from the current ifnet 163 * structure for use in the transmission of the mbuf. 164 * 165 * === ifq_enqueue() 166 * 167 * ifq_enqueue() attempts to fit an mbuf onto the ifqueue. The 168 * current traffic conditioner may drop a packet to make space on the 169 * queue. 170 * 171 * === ifq_start() 172 * 173 * Once a packet has been successfully queued with ifq_enqueue(), 174 * the network card is notified with a call to ifq_start(). 175 * Calls to ifq_start() run in the ifqueue serialisation context, 176 * guaranteeing that only one instance of ifp->if_qstart() will be 177 * running on behalf of a specific ifqueue in the system at any point 178 * in time. 179 * 180 * == Traffic conditioners API 181 * 182 * The majority of interaction between struct ifqueue and a traffic 183 * conditioner occurs via the callbacks a traffic conditioner provides 184 * in an instance of struct ifq_ops. 185 * 186 * XXX document ifqop_* 187 * 188 * The ifqueue API implements the locking on behalf of the conditioning 189 * implementations so conditioners only have to reject or keep mbufs. 190 * If something needs to inspect a conditioners internals, the queue lock 191 * needs to be taken to allow for a consistent or safe view. The queue 192 * lock may be taken and released with ifq_q_enter() and ifq_q_leave(). 193 * 194 * === ifq_q_enter() 195 * 196 * Code wishing to access a conditioners internals may take the queue 197 * lock with ifq_q_enter(). The caller must pass a reference to the 198 * conditioners ifq_ops structure so the infrastructure can ensure the 199 * caller is able to understand the internals. ifq_q_enter() returns 200 * a pointer to the conditioners internal structures, or NULL if the 201 * ifq_ops did not match the current conditioner. 202 * 203 * === ifq_q_leave() 204 * 205 * The queue lock acquired with ifq_q_enter() is released with 206 * ifq_q_leave(). 207 * 208 * === ifq_mfreem() and ifq_mfreeml() 209 * 210 * A goal of the API is to avoid freeing an mbuf while mutexes are 211 * held. Because the ifq API manages the lock on behalf of the backend 212 * ifqops, the backend should not directly free mbufs. If a conditioner 213 * backend needs to drop a packet during the handling of ifqop_deq_begin, 214 * it may free it by calling ifq_mfreem(). This accounts for the drop, 215 * and schedules the free of the mbuf outside the hold of ifq_mtx. 216 * ifq_mfreeml() takes an mbuf list as an argument instead. 217 * 218 * 219 * == Network Driver API 220 * 221 * The API used by network drivers is mostly documented in the 222 * ifq_dequeue(9) manpage except for ifq_serialize(). 223 * 224 * === ifq_serialize() 225 * 226 * A driver may run arbitrary work in the ifqueue serialiser context 227 * via ifq_serialize(). The work to be done is represented by a task 228 * that has been prepared with task_set. 229 * 230 * The work will be run in series with any other work dispatched by 231 * ifq_start(), ifq_restart(), or other ifq_serialize() calls. 232 * 233 * Because the work may be run on another CPU, the lifetime of the 234 * task and the work it represents can extend beyond the end of the 235 * call to ifq_serialize() that dispatched it. 236 * 237 * 238 * = ifqueue work serialisation 239 * 240 * ifqueues provide a mechanism to dispatch work to be run in a single 241 * context. Work in this mechanism is represented by task structures. 242 * 243 * The tasks are run in a context similar to a taskq serviced by a 244 * single kernel thread, except the work is run immediately by the 245 * first CPU that dispatches work. If a second CPU attempts to dispatch 246 * additional tasks while the first is still running, it will be queued 247 * to be run by the first CPU. The second CPU will return immediately. 248 * 249 * = MP Safe Network Drivers 250 * 251 * An MP safe network driver is one in which its start routine can be 252 * called by the network stack without holding the big kernel lock. 253 * 254 * == Attach 255 * 256 * A driver advertises it's ability to run its start routine without 257 * the kernel lock by setting the IFXF_MPSAFE flag in ifp->if_xflags 258 * before calling if_attach(). Advertising an MPSAFE start routine 259 * also implies that the driver understands that a network card can 260 * have multiple rings or transmit queues, and therefore provides 261 * if_qstart function (which takes an ifqueue pointer) instead of an 262 * if_start function (which takes an ifnet pointer). 263 * 264 * If the hardware supports multiple transmit rings, it advertises 265 * support for multiple rings to the network stack with if_attach_queues() 266 * after the call to if_attach(). if_attach_queues allocates a struct 267 * ifqueue for each hardware ring, which can then be initialised by 268 * the driver with data for each ring. 269 * 270 * void drv_start(struct ifqueue *); 271 * 272 * void 273 * drv_attach() 274 * { 275 * ... 276 * ifp->if_xflags = IFXF_MPSAFE; 277 * ifp->if_qstart = drv_start; 278 * if_attach(ifp); 279 * 280 * if_attach_queues(ifp, DRV_NUM_TX_RINGS); 281 * for (i = ; i < DRV_NUM_TX_RINGS; i++) { 282 * struct ifqueue *ifq = ifp->if_ifqs[i]; 283 * struct drv_tx_ring *ring = &sc->sc_tx_rings[i]; 284 * 285 * ifq->ifq_softc = ring; 286 * ring->ifq = ifq; 287 * } 288 * } 289 * 290 * The network stack will then call ifp->if_qstart via ifq_start() 291 * to guarantee there is only one instance of that function running 292 * for each ifq in the system, and to serialise it with other work 293 * the driver may provide. 294 * 295 * == Initialise 296 * 297 * When the stack requests an interface be brought up (ie, drv_ioctl() 298 * is called to handle SIOCSIFFLAGS with IFF_UP set in ifp->if_flags) 299 * drivers should set IFF_RUNNING in ifp->if_flags, and then call 300 * ifq_clr_oactive() against each ifq. 301 * 302 * == if_start 303 * 304 * ifq_start() checks that IFF_RUNNING is set in ifp->if_flags, that 305 * ifq_is_oactive() does not return true, and that there are pending 306 * packets to transmit via a call to ifq_len(). Therefore, drivers are 307 * no longer responsible for doing this themselves. 308 * 309 * If a driver should not transmit packets while its link is down, use 310 * ifq_purge() to flush pending packets from the transmit queue. 311 * 312 * Drivers for hardware should use the following pattern to transmit 313 * packets: 314 * 315 * void 316 * drv_start(struct ifqueue *ifq) 317 * { 318 * struct drv_tx_ring *ring = ifq->ifq_softc; 319 * struct ifnet *ifp = ifq->ifq_if; 320 * struct drv_softc *sc = ifp->if_softc; 321 * struct mbuf *m; 322 * int kick = 0; 323 * 324 * if (NO_LINK) { 325 * ifq_purge(ifq); 326 * return; 327 * } 328 * 329 * for (;;) { 330 * if (NO_SPACE(ring)) { 331 * ifq_set_oactive(ifq); 332 * break; 333 * } 334 * 335 * m = ifq_dequeue(ifq); 336 * if (m == NULL) 337 * break; 338 * 339 * if (drv_encap(sc, ring, m) != 0) { // map and fill ring 340 * m_freem(m); 341 * continue; 342 * } 343 * 344 * bpf_mtap(); 345 * } 346 * 347 * drv_kick(ring); // notify hw of new descriptors on the ring 348 * } 349 * 350 * == Transmission completion 351 * 352 * The following pattern should be used for transmit queue interrupt 353 * processing: 354 * 355 * void 356 * drv_txeof(struct drv_tx_ring *ring) 357 * { 358 * struct ifqueue *ifq = ring->ifq; 359 * 360 * while (COMPLETED_PKTS(ring)) { 361 * // unmap packets, m_freem() the mbufs. 362 * } 363 * 364 * if (ifq_is_oactive(ifq)) 365 * ifq_restart(ifq); 366 * } 367 * 368 * == Stop 369 * 370 * Bringing an interface down (ie, IFF_UP was cleared in ifp->if_flags) 371 * should clear IFF_RUNNING in ifp->if_flags, and guarantee the start 372 * routine is not running before freeing any resources it uses: 373 * 374 * void 375 * drv_down(struct drv_softc *sc) 376 * { 377 * struct ifnet *ifp = &sc->sc_if; 378 * struct ifqueue *ifq; 379 * int i; 380 * 381 * CLR(ifp->if_flags, IFF_RUNNING); 382 * DISABLE_INTERRUPTS(); 383 * 384 * for (i = 0; i < sc->sc_num_queues; i++) { 385 * ifq = ifp->if_ifqs[i]; 386 * ifq_barrier(ifq); 387 * } 388 * 389 * intr_barrier(sc->sc_ih); 390 * 391 * FREE_RESOURCES(); 392 * 393 * for (i = 0; i < sc->sc_num_queues; i++) { 394 * ifq = ifp->if_ifqs[i]; 395 * ifq_clr_oactive(ifq); 396 * } 397 * } 398 * 399 */ 400 401 struct ifq_ops { 402 unsigned int (*ifqop_idx)(unsigned int, 403 const struct mbuf *); 404 struct mbuf *(*ifqop_enq)(struct ifqueue *, struct mbuf *); 405 struct mbuf *(*ifqop_deq_begin)(struct ifqueue *, void **); 406 void (*ifqop_deq_commit)(struct ifqueue *, 407 struct mbuf *, void *); 408 void (*ifqop_purge)(struct ifqueue *, 409 struct mbuf_list *); 410 void *(*ifqop_alloc)(unsigned int, void *); 411 void (*ifqop_free)(unsigned int, void *); 412 }; 413 414 extern const struct ifq_ops * const ifq_priq_ops; 415 416 /* 417 * Interface send queues. 418 */ 419 420 void ifq_init(struct ifqueue *, struct ifnet *, unsigned int); 421 void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *); 422 void ifq_destroy(struct ifqueue *); 423 void ifq_add_data(struct ifqueue *, struct if_data *); 424 int ifq_enqueue(struct ifqueue *, struct mbuf *); 425 void ifq_start(struct ifqueue *); 426 struct mbuf *ifq_deq_begin(struct ifqueue *); 427 void ifq_deq_commit(struct ifqueue *, struct mbuf *); 428 void ifq_deq_rollback(struct ifqueue *, struct mbuf *); 429 struct mbuf *ifq_dequeue(struct ifqueue *); 430 int ifq_hdatalen(struct ifqueue *); 431 void ifq_mfreem(struct ifqueue *, struct mbuf *); 432 void ifq_mfreeml(struct ifqueue *, struct mbuf_list *); 433 unsigned int ifq_purge(struct ifqueue *); 434 void *ifq_q_enter(struct ifqueue *, const struct ifq_ops *); 435 void ifq_q_leave(struct ifqueue *, void *); 436 void ifq_serialize(struct ifqueue *, struct task *); 437 void ifq_barrier(struct ifqueue *); 438 439 440 int ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int, 441 const char *, volatile unsigned int *, 442 volatile unsigned int *); 443 444 #define ifq_len(_ifq) ((_ifq)->ifq_len) 445 #define ifq_empty(_ifq) (ifq_len(_ifq) == 0) 446 #define ifq_set_maxlen(_ifq, _l) ((_ifq)->ifq_maxlen = (_l)) 447 448 static inline int 449 ifq_is_priq(struct ifqueue *ifq) 450 { 451 return (ifq->ifq_ops == ifq_priq_ops); 452 } 453 454 static inline void 455 ifq_set_oactive(struct ifqueue *ifq) 456 { 457 ifq->ifq_oactive = 1; 458 } 459 460 static inline void 461 ifq_clr_oactive(struct ifqueue *ifq) 462 { 463 ifq->ifq_oactive = 0; 464 } 465 466 static inline unsigned int 467 ifq_is_oactive(struct ifqueue *ifq) 468 { 469 return (ifq->ifq_oactive); 470 } 471 472 static inline void 473 ifq_restart(struct ifqueue *ifq) 474 { 475 ifq_serialize(ifq, &ifq->ifq_restart); 476 } 477 478 static inline unsigned int 479 ifq_idx(struct ifqueue *ifq, unsigned int nifqs, const struct mbuf *m) 480 { 481 return ((*ifq->ifq_ops->ifqop_idx)(nifqs, m)); 482 } 483 484 /* ifiq */ 485 486 void ifiq_init(struct ifiqueue *, struct ifnet *, unsigned int); 487 void ifiq_destroy(struct ifiqueue *); 488 int ifiq_input(struct ifiqueue *, struct mbuf_list *); 489 int ifiq_enqueue(struct ifiqueue *, struct mbuf *); 490 void ifiq_add_data(struct ifiqueue *, struct if_data *); 491 492 #define ifiq_len(_ifiq) ml_len(&(_ifiq)->ifiq_ml) 493 #define ifiq_empty(_ifiq) ml_empty(&(_ifiq)->ifiq_ml) 494 495 #endif /* _KERNEL */ 496 497 #endif /* _NET_IFQ_H_ */ 498