1 /* 2 * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $FreeBSD: head/sys/dev/netmap/netmap_kern.h 238985 2012-08-02 11:59:43Z luigi $ 29 * 30 * The header contains the definitions of constants and function 31 * prototypes used only in kernelspace. 32 */ 33 34 #ifndef _NET_NETMAP_KERN_H_ 35 #define _NET_NETMAP_KERN_H_ 36 37 #define WITH_VALE // comment out to disable VALE support 38 39 #define likely(x) __builtin_expect((long)!!(x), 1L) 40 #define unlikely(x) __builtin_expect((long)!!(x), 0L) 41 42 #define NM_LOCK_T struct lock 43 #define NMG_LOCK_T struct lock 44 #define NMG_LOCK_INIT() lockinit(&netmap_global_lock, \ 45 "netmap global lock", 0, LK_CANRECURSE) 46 #define NMG_LOCK_DESTROY() lockuninit(&netmap_global_lock) 47 #define NMG_LOCK() lockmgr(&netmap_global_lock, LK_EXCLUSIVE) 48 #define NMG_UNLOCK() lockmgr(&netmap_global_lock, LK_RELEASE) 49 #define NMG_LOCK_ASSERT() KKASSERT(lockstatus(&netmap_global_lock, NULL) != 0) 50 51 #define NM_SELINFO_T struct kqinfo 52 #define MBUF_LEN(m) ((m)->m_pkthdr.len) 53 #define MBUF_IFP(m) ((m)->m_pkthdr.rcvif) 54 #define NM_SEND_UP(ifp, m) ((ifp)->if_input(ifp, m, NULL, -1)) 55 56 #define NM_ATOMIC_T volatile int // XXX ? 57 /* atomic operations */ 58 #include <machine/atomic.h> 59 #define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1)) 60 #define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0) 61 62 #define prefetch(x) __builtin_prefetch(x) 63 64 #define mb() cpu_mfence() 65 #define rmb() cpu_lfence() 66 #define wmb() cpu_sfence() 67 68 #ifdef MALLOC_DECLARE 69 MALLOC_DECLARE(M_NETMAP); 70 #endif 71 72 // XXX linux struct, not used in FreeBSD 73 struct net_device_ops { 74 }; 75 struct hrtimer { 76 }; 77 78 #define IFCAP_NETMAP 0x8000 /* XXX move to <net/if.h> */ 79 80 #define ND(format, ...) 81 #define D(format, ...) \ 82 do { \ 83 struct timeval __xxts; \ 84 microtime(&__xxts); \ 85 kprintf("%03d.%06d %s [%d] " format "\n", \ 86 (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \ 87 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 88 } while (0) 89 90 /* rate limited, lps indicates how many per second */ 91 #define RD(lps, format, ...) \ 92 do { \ 93 static int t0, __cnt; \ 94 if (t0 != time_second) { \ 95 t0 = time_second; \ 96 __cnt = 0; \ 97 } \ 98 if (__cnt++ < lps) \ 99 D(format, ##__VA_ARGS__); \ 100 } while (0) 101 102 struct netmap_adapter; 103 struct nm_bdg_fwd; 104 struct nm_bridge; 105 struct netmap_priv_d; 106 107 const char *nm_dump_buf(char *p, int len, int lim, char *dst); 108 109 #include <net/netmap/netmap_mbq.h> 110 111 extern NMG_LOCK_T netmap_global_lock; 112 113 /* 114 * private, kernel view of a ring. Keeps track of the status of 115 * a ring across system calls. 116 * 117 * nr_hwcur index of the next buffer to refill. 118 * It corresponds to ring->cur - ring->reserved 119 * 120 * nr_hwavail the number of slots "owned" by userspace. 121 * nr_hwavail =:= ring->avail + ring->reserved 122 * 123 * The indexes in the NIC and netmap rings are offset by nkr_hwofs slots. 124 * This is so that, on a reset, buffers owned by userspace are not 125 * modified by the kernel. In particular: 126 * RX rings: the next empty buffer (hwcur + hwavail + hwofs) coincides with 127 * the next empty buffer as known by the hardware (next_to_check or so). 128 * TX rings: hwcur + hwofs coincides with next_to_send 129 * 130 * Clients cannot issue concurrent syscall on a ring. The system 131 * detects this and reports an error using two flags, 132 * NKR_WBUSY and NKR_RBUSY 133 * For received packets, slot->flags is set to nkr_slot_flags 134 * so we can provide a proper initial value (e.g. set NS_FORWARD 135 * when operating in 'transparent' mode). 136 * 137 * The following fields are used to implement lock-free copy of packets 138 * from input to output ports in VALE switch: 139 * nkr_hwlease buffer after the last one being copied. 140 * A writer in nm_bdg_flush reserves N buffers 141 * from nr_hwlease, advances it, then does the 142 * copy outside the lock. 143 * In RX rings (used for VALE ports), 144 * nkr_hwcur + nkr_hwavail <= nkr_hwlease < nkr_hwcur+N-1 145 * In TX rings (used for NIC or host stack ports) 146 * nkr_hwcur <= nkr_hwlease < nkr_hwcur+ nkr_hwavail 147 * nkr_leases array of nkr_num_slots where writers can report 148 * completion of their block. NR_NOSLOT (~0) indicates 149 * that the writer has not finished yet 150 * nkr_lease_idx index of next free slot in nr_leases, to be assigned 151 * 152 * The kring is manipulated by txsync/rxsync and generic netmap function. 153 * q_lock is used to arbitrate access to the kring from within the netmap 154 * code, and this and other protections guarantee that there is never 155 * more than 1 concurrent call to txsync or rxsync. So we are free 156 * to manipulate the kring from within txsync/rxsync without any extra 157 * locks. 158 */ 159 struct netmap_kring { 160 struct netmap_ring *ring; 161 uint32_t nr_hwcur; 162 uint32_t nr_hwavail; 163 uint32_t nr_kflags; /* private driver flags */ 164 int32_t nr_hwreserved; 165 #define NKR_PENDINTR 0x1 // Pending interrupt. 166 uint32_t nkr_num_slots; 167 int32_t nkr_hwofs; /* offset between NIC and netmap ring */ 168 169 uint16_t nkr_slot_flags; /* initial value for flags */ 170 struct netmap_adapter *na; 171 struct nm_bdg_fwd *nkr_ft; 172 uint32_t *nkr_leases; 173 #define NR_NOSLOT ((uint32_t)~0) 174 uint32_t nkr_hwlease; 175 uint32_t nkr_lease_idx; 176 177 NM_SELINFO_T si; /* poll/select wait queue */ 178 NM_LOCK_T q_lock; /* protects kring and ring. */ 179 NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */ 180 181 volatile int nkr_stopped; 182 183 /* support for adapters without native netmap support. 184 * On tx rings we preallocate an array of tx buffers 185 * (same size as the netmap ring), on rx rings we 186 * store incoming packets in a queue. 187 * XXX who writes to the rx queue ? 188 */ 189 struct mbuf **tx_pool; 190 u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */ 191 struct mbq rx_queue; /* A queue for intercepted rx mbufs. */ 192 193 } __attribute__((__aligned__(64))); 194 195 196 /* return the next index, with wraparound */ 197 static inline uint32_t 198 nm_next(uint32_t i, uint32_t lim) 199 { 200 return unlikely (i == lim) ? 0 : i + 1; 201 } 202 203 /* 204 * 205 * Here is the layout for the Rx and Tx rings. 206 207 RxRING TxRING 208 209 +-----------------+ +-----------------+ 210 | | | | 211 |XXX free slot XXX| |XXX free slot XXX| 212 +-----------------+ +-----------------+ 213 | |<-hwcur | |<-hwcur 214 | reserved h | | (ready | 215 +----------- w -+ | to be | 216 cur->| a | | sent) h | 217 | v | +---------- w | 218 | a | cur->| (being a | 219 | i | | prepared) v | 220 | avail l | | a | 221 +-----------------+ + a ------ i + 222 | | ... | v l |<-hwlease 223 | (being | ... | a | ... 224 | prepared) | ... | i | ... 225 +-----------------+ ... | l | ... 226 | |<-hwlease +-----------------+ 227 | | | | 228 | | | | 229 | | | | 230 | | | | 231 +-----------------+ +-----------------+ 232 233 * The cur/avail (user view) and hwcur/hwavail (kernel view) 234 * are used in the normal operation of the card. 235 * 236 * When a ring is the output of a switch port (Rx ring for 237 * a VALE port, Tx ring for the host stack or NIC), slots 238 * are reserved in blocks through 'hwlease' which points 239 * to the next unused slot. 240 * On an Rx ring, hwlease is always after hwavail, 241 * and completions cause avail to advance. 242 * On a Tx ring, hwlease is always between cur and hwavail, 243 * and completions cause cur to advance. 244 * 245 * nm_kr_space() returns the maximum number of slots that 246 * can be assigned. 247 * nm_kr_lease() reserves the required number of buffers, 248 * advances nkr_hwlease and also returns an entry in 249 * a circular array where completions should be reported. 250 */ 251 252 253 254 255 enum txrx { NR_RX = 0, NR_TX = 1 }; 256 257 /* 258 * The "struct netmap_adapter" extends the "struct adapter" 259 * (or equivalent) device descriptor. 260 * It contains all base fields needed to support netmap operation. 261 * There are in fact different types of netmap adapters 262 * (native, generic, VALE switch...) so a netmap_adapter is 263 * just the first field in the derived type. 264 */ 265 struct netmap_adapter { 266 /* 267 * On linux we do not have a good way to tell if an interface 268 * is netmap-capable. So we use the following trick: 269 * NA(ifp) points here, and the first entry (which hopefully 270 * always exists and is at least 32 bits) contains a magic 271 * value which we can use to detect that the interface is good. 272 */ 273 uint32_t magic; 274 uint32_t na_flags; /* future place for IFCAP_NETMAP */ 275 #define NAF_SKIP_INTR 1 /* use the regular interrupt handler. 276 * useful during initialization 277 */ 278 #define NAF_SW_ONLY 2 /* forward packets only to sw adapter */ 279 #define NAF_BDG_MAYSLEEP 4 /* the bridge is allowed to sleep when 280 * forwarding packets coming from this 281 * interface 282 */ 283 #define NAF_MEM_OWNER 8 /* the adapter is responsible for the 284 * deallocation of the memory allocator 285 */ 286 #define NAF_NATIVE_ON 16 /* the adapter is native and the attached 287 * interface is in netmap mode 288 */ 289 int active_fds; /* number of user-space descriptors using this 290 interface, which is equal to the number of 291 struct netmap_if objs in the mapped region. */ 292 293 u_int num_rx_rings; /* number of adapter receive rings */ 294 u_int num_tx_rings; /* number of adapter transmit rings */ 295 296 u_int num_tx_desc; /* number of descriptor in each queue */ 297 u_int num_rx_desc; 298 299 /* tx_rings and rx_rings are private but allocated 300 * as a contiguous chunk of memory. Each array has 301 * N+1 entries, for the adapter queues and for the host queue. 302 */ 303 struct netmap_kring *tx_rings; /* array of TX rings. */ 304 struct netmap_kring *rx_rings; /* array of RX rings. */ 305 void *tailroom; /* space below the rings array */ 306 /* (used for leases) */ 307 308 309 NM_SELINFO_T tx_si, rx_si; /* global wait queues */ 310 311 /* copy of if_qflush and if_transmit pointers, to intercept 312 * packets from the network stack when netmap is active. 313 */ 314 int (*if_transmit)(struct ifnet *, struct mbuf *); 315 316 /* references to the ifnet and device routines, used by 317 * the generic netmap functions. 318 */ 319 struct ifnet *ifp; /* adapter is ifp->if_softc */ 320 321 /* private cleanup */ 322 void (*nm_dtor)(struct netmap_adapter *); 323 324 int (*nm_register)(struct netmap_adapter *, int onoff); 325 326 int (*nm_txsync)(struct netmap_adapter *, u_int ring, int flags); 327 int (*nm_rxsync)(struct netmap_adapter *, u_int ring, int flags); 328 #define NAF_FORCE_READ 1 329 #define NAF_FORCE_RECLAIM 2 330 /* return configuration information */ 331 int (*nm_config)(struct netmap_adapter *, 332 u_int *txr, u_int *txd, u_int *rxr, u_int *rxd); 333 int (*nm_krings_create)(struct netmap_adapter *); 334 void (*nm_krings_delete)(struct netmap_adapter *); 335 int (*nm_notify)(struct netmap_adapter *, 336 u_int ring, enum txrx, int flags); 337 #define NAF_GLOBAL_NOTIFY 4 338 #define NAF_DISABLE_NOTIFY 8 339 340 /* standard refcount to control the lifetime of the adapter 341 * (it should be equal to the lifetime of the corresponding ifp) 342 */ 343 int na_refcount; 344 345 /* memory allocator (opaque) 346 * We also cache a pointer to the lut_entry for translating 347 * buffer addresses, and the total number of buffers. 348 */ 349 struct netmap_mem_d *nm_mem; 350 struct lut_entry *na_lut; 351 uint32_t na_lut_objtotal; /* max buffer index */ 352 353 /* used internally. If non-null, the interface cannot be bound 354 * from userspace 355 */ 356 void *na_private; 357 }; 358 359 /* 360 * If the NIC is owned by the kernel 361 * (i.e., bridge), neither another bridge nor user can use it; 362 * if the NIC is owned by a user, only users can share it. 363 * Evaluation must be done under NMG_LOCK(). 364 */ 365 #define NETMAP_OWNED_BY_KERN(na) (na->na_private) 366 #define NETMAP_OWNED_BY_ANY(na) \ 367 (NETMAP_OWNED_BY_KERN(na) || (na->active_fds > 0)) 368 369 370 /* 371 * derived netmap adapters for various types of ports 372 */ 373 struct netmap_vp_adapter { /* VALE software port */ 374 struct netmap_adapter up; 375 376 /* 377 * Bridge support: 378 * 379 * bdg_port is the port number used in the bridge; 380 * na_bdg points to the bridge this NA is attached to. 381 */ 382 int bdg_port; 383 struct nm_bridge *na_bdg; 384 int retry; 385 }; 386 387 struct netmap_hw_adapter { /* physical device */ 388 struct netmap_adapter up; 389 390 struct net_device_ops nm_ndo; // XXX linux only 391 }; 392 393 struct netmap_generic_adapter { /* non-native device */ 394 struct netmap_hw_adapter up; 395 396 /* Pointer to a previously used netmap adapter. */ 397 struct netmap_adapter *prev; 398 399 /* generic netmap adapters support: 400 * a net_device_ops struct overrides ndo_select_queue(), 401 * save_if_input saves the if_input hook (FreeBSD), 402 * mit_timer and mit_pending implement rx interrupt mitigation, 403 */ 404 struct net_device_ops generic_ndo; 405 void (*save_if_input)(struct ifnet *, struct mbuf *, 406 const struct pktinfo *, int); 407 408 struct hrtimer mit_timer; 409 int mit_pending; 410 }; 411 412 #ifdef WITH_VALE 413 414 /* bridge wrapper for non VALE ports. It is used to connect real devices to the bridge. 415 * 416 * The real device must already have its own netmap adapter (hwna). The 417 * bridge wrapper and the hwna adapter share the same set of netmap rings and 418 * buffers, but they have two separate sets of krings descriptors, with tx/rx 419 * meanings swapped: 420 * 421 * netmap 422 * bwrap krings rings krings hwna 423 * +------+ +------+ +-----+ +------+ +------+ 424 * |tx_rings->| |\ /| |----| |<-tx_rings| 425 * | | +------+ \ / +-----+ +------+ | | 426 * | | X | | 427 * | | / \ | | 428 * | | +------+/ \+-----+ +------+ | | 429 * |rx_rings->| | | |----| |<-rx_rings| 430 * | | +------+ +-----+ +------+ | | 431 * +------+ +------+ 432 * 433 * - packets coming from the bridge go to the brwap rx rings, which are also the 434 * hwna tx rings. The bwrap notify callback will then complete the hwna tx 435 * (see netmap_bwrap_notify). 436 * - packets coming from the outside go to the hwna rx rings, which are also the 437 * bwrap tx rings. The (overwritten) hwna notify method will then complete 438 * the bridge tx (see netmap_bwrap_intr_notify). 439 * 440 * The bridge wrapper may optionally connect the hwna 'host' rings to the 441 * bridge. This is done by using a second port in the bridge and connecting it 442 * to the 'host' netmap_vp_adapter contained in the netmap_bwrap_adapter. 443 * The brwap host adapter cross-links the hwna host rings in the same way as shown above. 444 * 445 * - packets coming from the bridge and directed to host stack are handled by the 446 * bwrap host notify callback (see netmap_bwrap_host_notify) 447 * - packets coming from the host stack are still handled by the overwritten 448 * hwna notify callback (netmap_bwrap_intr_notify), but are diverted to the 449 * host adapter depending on the ring number. 450 * 451 */ 452 struct netmap_bwrap_adapter { 453 struct netmap_vp_adapter up; 454 struct netmap_vp_adapter host; /* for host rings */ 455 struct netmap_adapter *hwna; /* the underlying device */ 456 457 /* backup of the hwna notify callback */ 458 int (*save_notify)(struct netmap_adapter *, 459 u_int ring, enum txrx, int flags); 460 /* When we attach a physical interface to the bridge, we 461 * allow the controlling process to terminate, so we need 462 * a place to store the netmap_priv_d data structure. 463 * This is only done when physical interfaces are attached to a bridge. 464 */ 465 struct netmap_priv_d *na_kpriv; 466 }; 467 468 469 /* 470 * Available space in the ring. Only used in VALE code 471 */ 472 static inline uint32_t 473 nm_kr_space(struct netmap_kring *k, int is_rx) 474 { 475 int space; 476 477 if (is_rx) { 478 int busy = k->nkr_hwlease - k->nr_hwcur + k->nr_hwreserved; 479 if (busy < 0) 480 busy += k->nkr_num_slots; 481 space = k->nkr_num_slots - 1 - busy; 482 } else { 483 space = k->nr_hwcur + k->nr_hwavail - k->nkr_hwlease; 484 if (space < 0) 485 space += k->nkr_num_slots; 486 } 487 #if 0 488 // sanity check 489 if (k->nkr_hwlease >= k->nkr_num_slots || 490 k->nr_hwcur >= k->nkr_num_slots || 491 k->nr_hwavail >= k->nkr_num_slots || 492 busy < 0 || 493 busy >= k->nkr_num_slots) { 494 D("invalid kring, cur %d avail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease, 495 k->nkr_lease_idx, k->nkr_num_slots); 496 } 497 #endif 498 return space; 499 } 500 501 502 503 504 /* make a lease on the kring for N positions. return the 505 * lease index 506 */ 507 static inline uint32_t 508 nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) 509 { 510 uint32_t lim = k->nkr_num_slots - 1; 511 uint32_t lease_idx = k->nkr_lease_idx; 512 513 k->nkr_leases[lease_idx] = NR_NOSLOT; 514 k->nkr_lease_idx = nm_next(lease_idx, lim); 515 516 if (n > nm_kr_space(k, is_rx)) { 517 D("invalid request for %d slots", n); 518 panic("x"); 519 } 520 /* XXX verify that there are n slots */ 521 k->nkr_hwlease += n; 522 if (k->nkr_hwlease > lim) 523 k->nkr_hwlease -= lim + 1; 524 525 if (k->nkr_hwlease >= k->nkr_num_slots || 526 k->nr_hwcur >= k->nkr_num_slots || 527 k->nr_hwavail >= k->nkr_num_slots || 528 k->nkr_lease_idx >= k->nkr_num_slots) { 529 D("invalid kring %s, cur %d avail %d lease %d lease_idx %d lim %d", 530 k->na->ifp->if_xname, 531 k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease, 532 k->nkr_lease_idx, k->nkr_num_slots); 533 } 534 return lease_idx; 535 } 536 537 #endif /* WITH_VALE */ 538 539 /* return update position */ 540 static inline uint32_t 541 nm_kr_rxpos(struct netmap_kring *k) 542 { 543 uint32_t pos = k->nr_hwcur + k->nr_hwavail; 544 if (pos >= k->nkr_num_slots) 545 pos -= k->nkr_num_slots; 546 #if 0 547 if (pos >= k->nkr_num_slots || 548 k->nkr_hwlease >= k->nkr_num_slots || 549 k->nr_hwcur >= k->nkr_num_slots || 550 k->nr_hwavail >= k->nkr_num_slots || 551 k->nkr_lease_idx >= k->nkr_num_slots) { 552 D("invalid kring, cur %d avail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease, 553 k->nkr_lease_idx, k->nkr_num_slots); 554 } 555 #endif 556 return pos; 557 } 558 559 560 /* 561 * protect against multiple threads using the same ring. 562 * also check that the ring has not been stopped. 563 * We only care for 0 or !=0 as a return code. 564 */ 565 #define NM_KR_BUSY 1 566 #define NM_KR_STOPPED 2 567 568 static __inline void nm_kr_put(struct netmap_kring *kr) 569 { 570 NM_ATOMIC_CLEAR(&kr->nr_busy); 571 } 572 573 static __inline int nm_kr_tryget(struct netmap_kring *kr) 574 { 575 /* check a first time without taking the lock 576 * to avoid starvation for nm_kr_get() 577 */ 578 if (unlikely(kr->nkr_stopped)) { 579 ND("ring %p stopped (%d)", kr, kr->nkr_stopped); 580 return NM_KR_STOPPED; 581 } 582 if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))) 583 return NM_KR_BUSY; 584 /* check a second time with lock held */ 585 if (unlikely(kr->nkr_stopped)) { 586 ND("ring %p stopped (%d)", kr, kr->nkr_stopped); 587 nm_kr_put(kr); 588 return NM_KR_STOPPED; 589 } 590 return 0; 591 } 592 593 594 /* 595 * The following are support routines used by individual drivers to 596 * support netmap operation. 597 * 598 * netmap_attach() initializes a struct netmap_adapter, allocating the 599 * struct netmap_ring's and the struct selinfo. 600 * 601 * netmap_detach() frees the memory allocated by netmap_attach(). 602 * 603 * netmap_transmit() replaces the if_transmit routine of the interface, 604 * and is used to intercept packets coming from the stack. 605 * 606 * netmap_load_map/netmap_reload_map are helper routines to set/reset 607 * the dmamap for a packet buffer 608 * 609 * netmap_reset() is a helper routine to be called in the driver 610 * when reinitializing a ring. 611 */ 612 int netmap_attach(struct netmap_adapter *); 613 int netmap_attach_common(struct netmap_adapter *); 614 void netmap_detach_common(struct netmap_adapter *na); 615 void netmap_detach(struct ifnet *); 616 int netmap_transmit(struct ifnet *, struct mbuf *); 617 struct netmap_slot *netmap_reset(struct netmap_adapter *na, 618 enum txrx tx, u_int n, u_int new_cur); 619 int netmap_ring_reinit(struct netmap_kring *); 620 621 622 /* 623 * Support routines to be used with the VALE switch 624 */ 625 int netmap_update_config(struct netmap_adapter *na); 626 int netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom); 627 void netmap_krings_delete(struct netmap_adapter *na); 628 629 struct netmap_if * 630 netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, 631 uint16_t ringid, int *err); 632 633 634 635 u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg); 636 int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create); 637 int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na); 638 639 #ifdef WITH_VALE 640 /* 641 * The following bridge-related interfaces are used by other kernel modules 642 * In the version that only supports unicast or broadcast, the lookup 643 * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports, 644 * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown. 645 * XXX in practice "unknown" might be handled same as broadcast. 646 */ 647 typedef u_int (*bdg_lookup_fn_t)(char *buf, u_int len, 648 uint8_t *ring_nr, struct netmap_vp_adapter *); 649 u_int netmap_bdg_learning(char *, u_int, uint8_t *, 650 struct netmap_vp_adapter *); 651 652 #define NM_BDG_MAXPORTS 254 /* up to 254 */ 653 #define NM_BDG_BROADCAST NM_BDG_MAXPORTS 654 #define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1) 655 656 #define NM_NAME "vale" /* prefix for bridge port name */ 657 658 659 /* these are redefined in case of no VALE support */ 660 int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create); 661 void netmap_init_bridges(void); 662 int netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func); 663 664 #else /* !WITH_VALE */ 665 #define netmap_get_bdg_na(_1, _2, _3) 0 666 #define netmap_init_bridges(_1) 667 #define netmap_bdg_ctl(_1, _2) EINVAL 668 #endif /* !WITH_VALE */ 669 670 /* Various prototypes */ 671 struct dev_kqfilter_args; /* XXX this shouldn't be here */ 672 int netmap_kqfilter(struct dev_kqfilter_args *ap); 673 674 675 int netmap_init(void); 676 void netmap_fini(void); 677 int netmap_get_memory(struct netmap_priv_d* p); 678 void netmap_dtor(void *data); 679 int netmap_dtor_locked(struct netmap_priv_d *priv); 680 681 struct dev_ioctl_args; /* XXX this shouldn't be here */ 682 int netmap_ioctl(struct dev_ioctl_args *ap); 683 684 /* netmap_adapter creation/destruction */ 685 #define NM_IFPNAME(ifp) ((ifp) ? (ifp)->if_xname : "zombie") 686 #define NM_DEBUG_PUTGET 1 687 688 #ifdef NM_DEBUG_PUTGET 689 690 #define NM_DBG(f) __##f 691 692 void __netmap_adapter_get(struct netmap_adapter *na); 693 694 #define netmap_adapter_get(na) \ 695 do { \ 696 struct netmap_adapter *__na = na; \ 697 D("getting %p:%s (%d)", __na, NM_IFPNAME(__na->ifp), __na->na_refcount); \ 698 __netmap_adapter_get(__na); \ 699 } while (0) 700 701 int __netmap_adapter_put(struct netmap_adapter *na); 702 703 #define netmap_adapter_put(na) \ 704 do { \ 705 struct netmap_adapter *__na = na; \ 706 D("putting %p:%s (%d)", __na, NM_IFPNAME(__na->ifp), __na->na_refcount); \ 707 __netmap_adapter_put(__na); \ 708 } while (0) 709 710 #else /* !NM_DEBUG_PUTGET */ 711 712 #define NM_DBG(f) f 713 void netmap_adapter_get(struct netmap_adapter *na); 714 int netmap_adapter_put(struct netmap_adapter *na); 715 716 #endif /* !NM_DEBUG_PUTGET */ 717 718 719 720 extern u_int netmap_buf_size; 721 #define NETMAP_BUF_SIZE netmap_buf_size // XXX remove 722 extern int netmap_mitigate; 723 extern int netmap_no_pendintr; 724 extern u_int netmap_total_buffers; 725 extern char *netmap_buffer_base; 726 extern int netmap_verbose; // XXX debugging 727 enum { /* verbose flags */ 728 NM_VERB_ON = 1, /* generic verbose */ 729 NM_VERB_HOST = 0x2, /* verbose host stack */ 730 NM_VERB_RXSYNC = 0x10, /* verbose on rxsync/txsync */ 731 NM_VERB_TXSYNC = 0x20, 732 NM_VERB_RXINTR = 0x100, /* verbose on rx/tx intr (driver) */ 733 NM_VERB_TXINTR = 0x200, 734 NM_VERB_NIC_RXSYNC = 0x1000, /* verbose on rx/tx intr (driver) */ 735 NM_VERB_NIC_TXSYNC = 0x2000, 736 }; 737 738 extern int netmap_txsync_retry; 739 extern int netmap_generic_mit; 740 extern int netmap_generic_ringsize; 741 742 /* 743 * NA returns a pointer to the struct netmap adapter from the ifp, 744 * WNA is used to write it. 745 */ 746 #ifndef WNA 747 #define WNA(_ifp) (_ifp)->if_unused7 /* XXX better name ;) */ 748 #endif 749 #define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp)) 750 751 /* 752 * Macros to determine if an interface is netmap capable or netmap enabled. 753 * See the magic field in struct netmap_adapter. 754 */ 755 /* 756 * on FreeBSD just use if_capabilities and if_capenable. 757 */ 758 #define NETMAP_CAPABLE(ifp) (NA(ifp) && \ 759 (ifp)->if_capabilities & IFCAP_NETMAP ) 760 761 #define NETMAP_SET_CAPABLE(ifp) \ 762 (ifp)->if_capabilities |= IFCAP_NETMAP 763 764 /* Callback invoked by the dma machinery after a successfull dmamap_load */ 765 static void netmap_dmamap_cb(__unused void *arg, 766 __unused bus_dma_segment_t * segs, __unused int nseg, __unused int error) 767 { 768 } 769 770 /* bus_dmamap_load wrapper: call aforementioned function if map != NULL. 771 * XXX can we do it without a callback ? 772 */ 773 static inline void 774 netmap_load_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf) 775 { 776 if (map) 777 bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE, 778 netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT); 779 } 780 781 /* update the map when a buffer changes. */ 782 static inline void 783 netmap_reload_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf) 784 { 785 if (map) { 786 bus_dmamap_unload(tag, map); 787 bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE, 788 netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT); 789 } 790 } 791 792 /* 793 * functions to map NIC to KRING indexes (n2k) and vice versa (k2n) 794 */ 795 static inline int 796 netmap_idx_n2k(struct netmap_kring *kr, int idx) 797 { 798 int n = kr->nkr_num_slots; 799 idx += kr->nkr_hwofs; 800 if (idx < 0) 801 return idx + n; 802 else if (idx < n) 803 return idx; 804 else 805 return idx - n; 806 } 807 808 809 static inline int 810 netmap_idx_k2n(struct netmap_kring *kr, int idx) 811 { 812 int n = kr->nkr_num_slots; 813 idx -= kr->nkr_hwofs; 814 if (idx < 0) 815 return idx + n; 816 else if (idx < n) 817 return idx; 818 else 819 return idx - n; 820 } 821 822 823 /* Entries of the look-up table. */ 824 struct lut_entry { 825 void *vaddr; /* virtual address. */ 826 vm_paddr_t paddr; /* physical address. */ 827 }; 828 829 struct netmap_obj_pool; 830 extern struct lut_entry *netmap_buffer_lut; 831 #define NMB_VA(i) (netmap_buffer_lut[i].vaddr) 832 #define NMB_PA(i) (netmap_buffer_lut[i].paddr) 833 834 /* 835 * NMB return the virtual address of a buffer (buffer 0 on bad index) 836 * PNMB also fills the physical address 837 */ 838 static inline void * 839 NMB(struct netmap_slot *slot) 840 { 841 uint32_t i = slot->buf_idx; 842 return (unlikely(i >= netmap_total_buffers)) ? NMB_VA(0) : NMB_VA(i); 843 } 844 845 static inline void * 846 PNMB(struct netmap_slot *slot, uint64_t *pp) 847 { 848 uint32_t i = slot->buf_idx; 849 void *ret = (i >= netmap_total_buffers) ? NMB_VA(0) : NMB_VA(i); 850 851 *pp = (i >= netmap_total_buffers) ? NMB_PA(0) : NMB_PA(i); 852 return ret; 853 } 854 855 /* Generic version of NMB, which uses device-specific memory. */ 856 static inline void * 857 BDG_NMB(struct netmap_adapter *na, struct netmap_slot *slot) 858 { 859 struct lut_entry *lut = na->na_lut; 860 uint32_t i = slot->buf_idx; 861 return (unlikely(i >= na->na_lut_objtotal)) ? 862 lut[0].vaddr : lut[i].vaddr; 863 } 864 865 /* default functions to handle rx/tx interrupts */ 866 int netmap_rx_irq(struct ifnet *, u_int, u_int *); 867 #define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL) 868 int netmap_common_irq(struct ifnet *, u_int, u_int *work_done); 869 870 871 void netmap_txsync_to_host(struct netmap_adapter *na); 872 void netmap_disable_all_rings(struct ifnet *); 873 void netmap_enable_all_rings(struct ifnet *); 874 void netmap_disable_ring(struct netmap_kring *kr); 875 876 877 /* Structure associated to each thread which registered an interface. 878 * 879 * The first 4 fields of this structure are written by NIOCREGIF and 880 * read by poll() and NIOC?XSYNC. 881 * There is low contention among writers (actually, a correct user program 882 * should have no contention among writers) and among writers and readers, 883 * so we use a single global lock to protect the structure initialization. 884 * Since initialization involves the allocation of memory, we reuse the memory 885 * allocator lock. 886 * Read access to the structure is lock free. Readers must check that 887 * np_nifp is not NULL before using the other fields. 888 * If np_nifp is NULL initialization has not been performed, so they should 889 * return an error to userlevel. 890 * 891 * The ref_done field is used to regulate access to the refcount in the 892 * memory allocator. The refcount must be incremented at most once for 893 * each open("/dev/netmap"). The increment is performed by the first 894 * function that calls netmap_get_memory() (currently called by 895 * mmap(), NIOCGINFO and NIOCREGIF). 896 * If the refcount is incremented, it is then decremented when the 897 * private structure is destroyed. 898 */ 899 struct netmap_priv_d { 900 struct netmap_if * volatile np_nifp; /* netmap if descriptor. */ 901 902 struct netmap_adapter *np_na; 903 int np_ringid; /* from the ioctl */ 904 u_int np_qfirst, np_qlast; /* range of rings to scan */ 905 uint16_t np_txpoll; 906 907 struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */ 908 /* np_refcount is only used on FreeBSD */ 909 int np_refcount; /* use with NMG_LOCK held */ 910 }; 911 912 913 /* 914 * generic netmap emulation for devices that do not have 915 * native netmap support. 916 * XXX generic_netmap_register() is only exported to implement 917 * nma_is_generic(). 918 */ 919 int generic_netmap_register(struct netmap_adapter *na, int enable); 920 int generic_netmap_attach(struct ifnet *ifp); 921 922 int netmap_catch_rx(struct netmap_adapter *na, int intercept); 923 void generic_rx_handler(struct ifnet *ifp, struct mbuf *m, 924 const struct pktinfo *, int); 925 void netmap_catch_packet_steering(struct netmap_generic_adapter *na, int enable); 926 int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr); 927 int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx); 928 void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq); 929 930 static __inline int 931 nma_is_generic(struct netmap_adapter *na) 932 { 933 return na->nm_register == generic_netmap_register; 934 } 935 936 /* 937 * netmap_mitigation API. This is used by the generic adapter 938 * to reduce the number of interrupt requests/selwakeup 939 * to clients on incoming packets. 940 */ 941 void netmap_mitigation_init(struct netmap_generic_adapter *na); 942 void netmap_mitigation_start(struct netmap_generic_adapter *na); 943 void netmap_mitigation_restart(struct netmap_generic_adapter *na); 944 int netmap_mitigation_active(struct netmap_generic_adapter *na); 945 void netmap_mitigation_cleanup(struct netmap_generic_adapter *na); 946 947 // int generic_timer_handler(struct hrtimer *t); 948 949 #endif /* _NET_NETMAP_KERN_H_ */ 950