1 /* LWIP service - ethif.c - ethernet interfaces */ 2 /* 3 * The most important aspect of this module is to maintain a send queue for the 4 * interface. This send queue consists of packets to send. At times, the user 5 * may request a change to the driver configuration. While configuration 6 * requests would ideally be enqueued in the send queue, this has proven too 7 * problematic to work in practice, especially since out-of-memory conditions 8 * may prevent configuration requests from being accepted immediately in such a 9 * model. Instead, we take a simple and blunt approach: configuration requests 10 * "cut in line" and thus take precedence over pending packets in the send 11 * queue. This may not always be entirely correct: for example, packets may be 12 * transmitted with the old ethernet address after the network device has 13 * already been reconfigured to receive from a new ethernet address. However, 14 * this should not be a real problem, and we take care explicitly of perhaps 15 * the most problematic case: packets not getting checksummed due to checksum 16 * offloading configuration changes. 17 * 18 * Even with this blunt approach, we maintain three concurrent configurations: 19 * the active, the pending, and the wanted configuration. The active one is 20 * the last known active configuration at the network driver. It used not only 21 * to report whether the device is in RUNNING state, but also to replay the 22 * active configuration to a restarted driver. The pending configuration is 23 * a partially new configuration that has been given to ndev to send to the 24 * driver, but not yet acknowledged by the driver. Finally, the wanted 25 * configuration is the latest one that has yet to be given to ndev. 26 * 27 * Each configuration has a bitmask indicating which part of the configuration 28 * has changed, in order to limit work on the driver side. This is also the 29 * reason that the pending and wanted configurations are separate: if e.g. a 30 * media change is pending at the driver, and the user also requests a mode 31 * change, we do not want the media change to be repeated after it has been 32 * acknowleged by the driver, just to change the mode as well. In this example 33 * the pending configuration will have NDEV_SET_MEDIA set, and the wanted 34 * configuration will have NDEV_SET_MODE set. Once acknowledged, the pending 35 * bitmask is cleared and the wanted bitmask is tested to see if another 36 * configuration change should be given to ndev. Technically, this could lead 37 * to starvation of actual packet transmission, but we expect configuration 38 * changes to be very rare, since they are always user initiated. 39 * 40 * It is important to note for understanding the code that for some fields 41 * (mode, flags, caps), the three configurations are cascading: even though the 42 * wanted configuration may not have NDEV_SET_MODE set, its mode field will 43 * still contain the most recently requested mode; that is, the mode in the 44 * pending configuration if that one has NDEV_SET_MODE set, or otherwise the 45 * mode in the active configuration. For that reason, we carefully merge 46 * configuration requests into the next level (wanted -> pending -> active), 47 * updating just the fields that have been changed by the previous level. This 48 * approach simplifies obtaining current values a lot, but is not very obvious. 49 * 50 * Also, we never send multiple configuration requests at once, even though 51 * ndev would let us do that: we use a single array for the list of multicast 52 * ethernet addresses that we send to the driver, which the driver may retrieve 53 * (using a memory grant) at any time. We necessarily recompute the multicast 54 * list before sending a configuration request, and thus, sending multiple 55 * requests at once may lead to the driver retrieving a corrupted list. 56 */ 57 58 #include "lwip.h" 59 #include "ethif.h" 60 61 #include "lwip/etharp.h" 62 #include "lwip/ethip6.h" 63 #include "lwip/igmp.h" 64 #include "lwip/mld6.h" 65 66 #include <net/if_media.h> 67 68 #define ETHIF_MAX_MTU 1500 /* maximum MTU value for ethernet */ 69 #define ETHIF_DEF_MTU ETHIF_MAX_MTU /* default MTU value that we use */ 70 71 #define ETHIF_MCAST_MAX 8 /* maximum number of multicast addresses */ 72 73 struct ethif { 74 struct ifdev ethif_ifdev; /* interface device, MUST be first */ 75 ndev_id_t ethif_ndev; /* network device ID */ 76 unsigned int ethif_flags; /* interface flags (ETHIFF_) */ 77 uint32_t ethif_caps; /* driver capabilities (NDEV_CAPS_) */ 78 uint32_t ethif_media; /* driver-reported media type (IFM_) */ 79 struct ndev_conf ethif_active; /* active configuration (at driver) */ 80 struct ndev_conf ethif_pending; /* pending configuration (at ndev) */ 81 struct ndev_conf ethif_wanted; /* desired configuration (waiting) */ 82 struct ndev_hwaddr ethif_mclist[ETHIF_MCAST_MAX]; /* multicast list */ 83 struct { /* send queue (packet/conf refs) */ 84 struct pbuf *es_head; /* first (oldest) request reference */ 85 struct pbuf **es_unsentp; /* ptr-ptr to first unsent request */ 86 struct pbuf **es_tailp; /* ptr-ptr for adding new requests */ 87 unsigned int es_count; /* buffer count, see ETHIF_PBUF_.. */ 88 } ethif_snd; 89 struct { /* receive queue (packets) */ 90 struct pbuf *er_head; /* first (oldest) request buffer */ 91 struct pbuf **er_tailp; /* ptr-ptr for adding new requests */ 92 } ethif_rcv; 93 SIMPLEQ_ENTRY(ethif) ethif_next; /* next in free list */ 94 } ethif_array[NR_NDEV]; /* any other value would be suboptimal */ 95 96 #define ethif_get_name(ethif) (ifdev_get_name(&(ethif)->ethif_ifdev)) 97 #define ethif_get_netif(ethif) (ifdev_get_netif(&(ethif)->ethif_ifdev)) 98 99 #define ETHIFF_DISABLED 0x01 /* driver has disappeared */ 100 #define ETHIFF_FIRST_CONF 0x02 /* first configuration request sent */ 101 102 /* 103 * Send queue limit settings. Both are counted in number of pbuf objects. 104 * ETHIF_PBUF_MIN is the minimum number of pbuf objects that can always be 105 * enqueued on a particular interface's send queue. It should be at least the 106 * number of pbufs for one single packet after being reduced to the ndev limit, 107 * so NDEV_IOV_MAX (8) is a natural fit. The ETHIF_PBUF_MAX_n values define 108 * the maximum number of pbufs that may be used by all interface send queues 109 * combined, whichever of the two is smaller. The resulting number must be set 110 * fairly high, because at any time there may be a lot of active TCP sockets 111 * that all generate a (multi-pbuf) packet as a result of a clock tick. It is 112 * currently a function of the size of the buffer pool, capped to a value that 113 * is a function of the number of TCP sockets (assuming one packet per socket; 114 * up to MSS/BUFSIZE+1 data pbufs, one header pbuf, one extra as margin). The 115 * difference between the per-interface guaranteed minimum and the global 116 * maximum is what makes up a pool of "spares", which are really just tokens 117 * allowing for enqueuing of that many pbufs. 118 */ 119 #define ETHIF_PBUF_MIN (NDEV_IOV_MAX) 120 #define ETHIF_PBUF_MAX_1 (mempool_cur_buffers() >> 1) 121 #define ETHIF_PBUF_MAX_2 (NR_TCPSOCK * (TCP_MSS / MEMPOOL_BUFSIZE + 3)) 122 123 static unsigned int ethif_spares; 124 125 static SIMPLEQ_HEAD(, ethif) ethif_freelist; /* free ethif objects */ 126 127 static const struct ifdev_ops ethif_ops; 128 129 #ifdef INET6 130 static ip6_addr_t ethif_ip6addr_allnodes_ll; 131 #endif /* INET6 */ 132 133 /* 134 * Initialize the ethernet interfaces module. 135 */ 136 void 137 ethif_init(void) 138 { 139 unsigned int slot; 140 141 /* Initialize the list of free ethif objects. */ 142 SIMPLEQ_INIT(ðif_freelist); 143 144 for (slot = 0; slot < __arraycount(ethif_array); slot++) 145 SIMPLEQ_INSERT_TAIL(ðif_freelist, ðif_array[slot], 146 ethif_next); 147 148 /* Initialize the number of in-use spare tokens. */ 149 ethif_spares = 0; 150 151 #ifdef INET6 152 /* Preinitialize the link-local all-nodes IPv6 multicast address. */ 153 ip6_addr_set_allnodes_linklocal(ðif_ip6addr_allnodes_ll); 154 #endif /* INET6 */ 155 } 156 157 /* 158 * As the result of some event, the NetBSD-style interface flags for this 159 * interface may have changed. Recompute and update the flags as appropriate. 160 */ 161 static void 162 ethif_update_ifflags(struct ethif * ethif) 163 { 164 unsigned int ifflags; 165 166 ifflags = ifdev_get_ifflags(ðif->ethif_ifdev); 167 168 /* These are the flags that we might update here. */ 169 ifflags &= ~(IFF_RUNNING | IFF_ALLMULTI); 170 171 /* 172 * For us, the RUNNING flag indicates that -as far as we know- the 173 * network device is fully operational and has its I/O engines running. 174 * This is a reflection of the current state, not of any intention, so 175 * we look at the active configuration here. We use the same approach 176 * for one other receive state flags here (ALLMULTI). 177 */ 178 if ((ethif->ethif_flags & 179 (ETHIFF_DISABLED | ETHIFF_FIRST_CONF)) == 0 && 180 ethif->ethif_active.nconf_mode != NDEV_MODE_DOWN) { 181 ifflags |= IFF_RUNNING; 182 183 if (ethif->ethif_active.nconf_mode & NDEV_MODE_MCAST_ALL) 184 ifflags |= IFF_ALLMULTI; 185 } 186 187 ifdev_update_ifflags(ðif->ethif_ifdev, ifflags); 188 } 189 190 /* 191 * Add a multicast hardware receive address into the set of hardware addresses 192 * in the given configuration, if the given address is not already in the 193 * configuration's set. Adjust the configuration's mode as needed. Return 194 * TRUE If the address was added, and FALSE if the address could not be added 195 * due to a full list (of 'max' elements), in which case the mode is changed 196 * from receiving from listed multicast addresses to receiving from all 197 * multicast addresses. 198 */ 199 static int 200 ethif_add_mcast(struct ndev_conf * nconf, unsigned int max, 201 struct ndev_hwaddr * hwaddr) 202 { 203 unsigned int slot; 204 205 /* 206 * See if the hardware address is already in the list we produced so 207 * far. This makes the multicast list generation O(n^2) but we do not 208 * expect many entries nor is the list size large anyway. 209 */ 210 for (slot = 0; slot < nconf->nconf_mccount; slot++) 211 if (!memcmp(&nconf->nconf_mclist[slot], hwaddr, 212 sizeof(*hwaddr))) 213 return TRUE; 214 215 if (nconf->nconf_mccount < max) { 216 memcpy(&nconf->nconf_mclist[slot], hwaddr, sizeof(*hwaddr)); 217 nconf->nconf_mccount++; 218 219 nconf->nconf_mode |= NDEV_MODE_MCAST_LIST; 220 221 return TRUE; 222 } else { 223 nconf->nconf_mode &= ~NDEV_MODE_MCAST_LIST; 224 nconf->nconf_mode |= NDEV_MODE_MCAST_ALL; 225 226 return FALSE; 227 } 228 } 229 230 /* 231 * Add the ethernet hardware address derived from the given IPv4 multicast 232 * address, to the list of multicast addresses. 233 */ 234 static int 235 ethif_add_mcast_v4(struct ndev_conf * nconf, unsigned int max, 236 const ip4_addr_t * ip4addr) 237 { 238 struct ndev_hwaddr hwaddr; 239 240 /* 01:00:05:xx:xx:xx with the lower 23 bits of the IPv4 address. */ 241 hwaddr.nhwa_addr[0] = LL_IP4_MULTICAST_ADDR_0; 242 hwaddr.nhwa_addr[1] = LL_IP4_MULTICAST_ADDR_1; 243 hwaddr.nhwa_addr[2] = LL_IP4_MULTICAST_ADDR_2; 244 hwaddr.nhwa_addr[3] = (ip4_addr_get_u32(ip4addr) >> 16) & 0x7f; 245 hwaddr.nhwa_addr[4] = (ip4_addr_get_u32(ip4addr) >> 8) & 0xff; 246 hwaddr.nhwa_addr[5] = (ip4_addr_get_u32(ip4addr) >> 0) & 0xff; 247 248 return ethif_add_mcast(nconf, max, &hwaddr); 249 } 250 251 /* 252 * Add the ethernet hardware address derived from the given IPv6 multicast 253 * address, to the list of multicast addresses. 254 */ 255 static int 256 ethif_add_mcast_v6(struct ndev_conf * nconf, unsigned int max, 257 const ip6_addr_t * ip6addr) 258 { 259 struct ndev_hwaddr hwaddr; 260 261 /* 33:33:xx:xx:xx:xx with the lower 32 bits of the IPv6 address. */ 262 hwaddr.nhwa_addr[0] = LL_IP6_MULTICAST_ADDR_0; 263 hwaddr.nhwa_addr[1] = LL_IP6_MULTICAST_ADDR_1; 264 memcpy(&hwaddr.nhwa_addr[2], &ip6addr->addr[3], sizeof(uint32_t)); 265 266 return ethif_add_mcast(nconf, max, &hwaddr); 267 } 268 269 /* 270 * Set up the multicast mode for a configuration that is to be sent to a 271 * network driver, generating a multicast receive address list for the driver 272 * as applicable. 273 */ 274 static void 275 ethif_gen_mcast(struct ethif * ethif, struct ndev_conf * nconf) 276 { 277 struct igmp_group *group4; 278 struct mld_group *group6; 279 unsigned int max; 280 281 /* Make sure that multicast is supported at all for this interface. */ 282 if (!(ethif->ethif_caps & NDEV_CAP_MCAST)) 283 return; 284 285 /* Make sure the mode is being (re)configured to be up. */ 286 if (!(nconf->nconf_set & NDEV_SET_MODE) || 287 nconf->nconf_mode == NDEV_MODE_DOWN) 288 return; 289 290 /* Recompute the desired multicast flags. */ 291 nconf->nconf_mode &= ~(NDEV_MODE_MCAST_LIST | NDEV_MODE_MCAST_ALL); 292 293 /* If promiscuous mode is enabled, receive all multicast packets. */ 294 if (nconf->nconf_mode & NDEV_MODE_PROMISC) { 295 nconf->nconf_mode |= NDEV_MODE_MCAST_ALL; 296 297 return; 298 } 299 300 /* 301 * Map all IGMP/MLD6 multicast addresses to ethernet addresses, merging 302 * any duplicates to save slots. We have to add the MLD6 all-nodes 303 * multicast address ourselves, which also means the list is never 304 * empty unless compiling with USE_INET6=no. If the list is too small 305 * for all addresses, opt to receive all multicast packets instead. 306 */ 307 nconf->nconf_mclist = ethif->ethif_mclist; 308 nconf->nconf_mccount = 0; 309 max = __arraycount(ethif->ethif_mclist); 310 311 for (group4 = netif_igmp_data(ethif_get_netif(ethif)); group4 != NULL; 312 group4 = group4->next) 313 if (!ethif_add_mcast_v4(nconf, max, &group4->group_address)) 314 return; 315 316 #ifdef INET6 317 if (!ethif_add_mcast_v6(nconf, max, ðif_ip6addr_allnodes_ll)) 318 return; 319 #endif /* INET6 */ 320 321 for (group6 = netif_mld6_data(ethif_get_netif(ethif)); group6 != NULL; 322 group6 = group6->next) 323 if (!ethif_add_mcast_v6(nconf, max, &group6->group_address)) 324 return; 325 } 326 327 /* 328 * Merge a source configuration into a destination configuration, copying any 329 * fields intended to be set from the source into the destination and clearing 330 * the "set" mask in the source, without changing the source fields, so that 331 * the source will reflect the destination's contents. 332 */ 333 static void 334 ethif_merge_conf(struct ndev_conf * dconf, struct ndev_conf * sconf) 335 { 336 337 dconf->nconf_set |= sconf->nconf_set; 338 339 if (sconf->nconf_set & NDEV_SET_MODE) 340 dconf->nconf_mode = sconf->nconf_mode; 341 if (sconf->nconf_set & NDEV_SET_CAPS) 342 dconf->nconf_caps = sconf->nconf_caps; 343 if (sconf->nconf_set & NDEV_SET_FLAGS) 344 dconf->nconf_flags = sconf->nconf_flags; 345 if (sconf->nconf_set & NDEV_SET_MEDIA) 346 dconf->nconf_media = sconf->nconf_media; 347 if (sconf->nconf_set & NDEV_SET_HWADDR) 348 memcpy(&dconf->nconf_hwaddr, &sconf->nconf_hwaddr, 349 sizeof(dconf->nconf_hwaddr)); 350 351 sconf->nconf_set = 0; 352 } 353 354 /* 355 * Return TRUE if we can and should try to pass a configuration request to the 356 * ndev layer on this interface, or FALSE otherwise. 357 */ 358 static int 359 ethif_can_conf(struct ethif * ethif) 360 { 361 362 /* Is there a configuration change waiting? The common case is no. */ 363 if (ethif->ethif_wanted.nconf_set == 0) 364 return FALSE; 365 366 /* 367 * Is there a configuration change pending already? Then wait for it 368 * to be acknowledged first. 369 */ 370 if (ethif->ethif_pending.nconf_set != 0) 371 return FALSE; 372 373 /* Make sure the interface is in the appropriate state. */ 374 if (ethif->ethif_flags & ETHIFF_DISABLED) 375 return FALSE; 376 377 /* First let all current packet send requests finish. */ 378 return (ethif->ethif_snd.es_unsentp == ðif->ethif_snd.es_head); 379 } 380 381 /* 382 * Return TRUE if we can and should try to pass the next unsent packet send 383 * request to the ndev layer on this interface, or FALSE otherwise. 384 */ 385 static int 386 ethif_can_send(struct ethif * ethif) 387 { 388 389 /* Is there anything to hand to ndev at all? The common case is no. */ 390 if (*ethif->ethif_snd.es_unsentp == NULL) 391 return FALSE; 392 393 /* 394 * Is there a configuration change pending? Then we cannot send 395 * packets yet. Always let all configuration changes through first. 396 */ 397 if (ethif->ethif_pending.nconf_set != 0 || 398 ethif->ethif_wanted.nconf_set != 0) 399 return FALSE; 400 401 /* Make sure the interface is in the appropriate state. */ 402 if ((ethif->ethif_flags & (ETHIFF_DISABLED | ETHIFF_FIRST_CONF)) != 0) 403 return FALSE; 404 405 return TRUE; 406 } 407 408 /* 409 * Return TRUE if we can and should try to receive packets on this interface 410 * and are ready to accept received packets, or FALSE otherwise. 411 */ 412 static int 413 ethif_can_recv(struct ethif * ethif) 414 { 415 416 if ((ethif->ethif_flags & (ETHIFF_DISABLED | ETHIFF_FIRST_CONF)) != 0) 417 return FALSE; 418 419 /* 420 * We do not check the link status here. There is no reason not to 421 * spawn receive requests, or accept received packets, while the link 422 * is reported to be down. 423 */ 424 return ifdev_is_up(ðif->ethif_ifdev); 425 } 426 427 /* 428 * Polling function, invoked after each message loop iteration. Check whether 429 * any configuration change or packets can be sent to the driver, and whether 430 * any new packet receive requests can be enqueued at the driver. 431 */ 432 static void 433 ethif_poll(struct ifdev * ifdev) 434 { 435 struct ethif *ethif = (struct ethif *)ifdev; 436 struct pbuf *pbuf, *pref; 437 438 /* 439 * If a configuration request is desired, see if we can send it to the 440 * driver now. Otherwise, attempt to send any packets if possible. 441 * In both cases, a failure of the ndev call indicates that we should 442 * try again later. 443 */ 444 if (ethif_can_conf(ethif)) { 445 ethif_gen_mcast(ethif, ðif->ethif_wanted); 446 447 /* 448 * On success, move the wanted configuration into the pending 449 * slot. Otherwise, try again on the next poll iteration. 450 */ 451 if (ndev_conf(ethif->ethif_ndev, ðif->ethif_wanted) == OK) 452 ethif_merge_conf(ðif->ethif_pending, 453 ðif->ethif_wanted); 454 } else { 455 while (ethif_can_send(ethif)) { 456 pref = *ethif->ethif_snd.es_unsentp; 457 458 if (pref->type == PBUF_REF) 459 pbuf = (struct pbuf *)pref->payload; 460 else 461 pbuf = pref; 462 463 if (ndev_send(ethif->ethif_ndev, pbuf) == OK) 464 ethif->ethif_snd.es_unsentp = 465 pchain_end(pref); 466 else 467 break; 468 } 469 } 470 471 /* 472 * Attempt to create additional receive requests for the driver, if 473 * applicable. We currently do not set a limit on the maximum number 474 * of concurrently pending receive requests here, because the maximum 475 * in ndev is already quite low. That may have to be changed one day. 476 */ 477 while (ethif_can_recv(ethif) && ndev_can_recv(ethif->ethif_ndev)) { 478 /* 479 * Allocate a buffer for the network device driver to copy the 480 * received packet into. Allocation may fail if no buffers are 481 * available at this time; in that case simply try again later. 482 * We add room for a VLAN tag even though we do not support 483 * such tags just yet. 484 */ 485 if ((pbuf = pchain_alloc(PBUF_RAW, ETH_PAD_LEN + ETH_HDR_LEN + 486 ETHIF_MAX_MTU + NDEV_ETH_PACKET_TAG)) == NULL) 487 break; 488 489 /* 490 * Effectively throw away two bytes in order to align TCP/IP 491 * header fields to 32 bits. See the short discussion in 492 * lwipopts.h as to why we are not using lwIP's ETH_PAD_SIZE. 493 */ 494 util_pbuf_header(pbuf, -ETH_PAD_LEN); 495 496 /* 497 * Send the request to the driver. This may still fail due to 498 * grant allocation failure, in which case we try again later. 499 */ 500 if (ndev_recv(ethif->ethif_ndev, pbuf) != OK) { 501 pbuf_free(pbuf); 502 503 break; 504 } 505 506 /* 507 * Hold on to the packet buffer until the receive request 508 * completes or is aborted, or the driver disappears. 509 */ 510 *ethif->ethif_rcv.er_tailp = pbuf; 511 ethif->ethif_rcv.er_tailp = pchain_end(pbuf); 512 } 513 } 514 515 /* 516 * Complete the link-layer header of the packet by filling in a source address. 517 * This is relevant for BPF-generated packets only, and thus we can safely 518 * modify the given pbuf. 519 */ 520 static void 521 ethif_hdrcmplt(struct ifdev * ifdev, struct pbuf * pbuf) 522 { 523 struct netif *netif; 524 525 /* Make sure there is an ethernet packet header at all. */ 526 if (pbuf->len < ETH_HDR_LEN) 527 return; 528 529 netif = ifdev_get_netif(ifdev); 530 531 /* 532 * Insert the source ethernet address into the packet. The source 533 * address is located right after the destination address at the start 534 * of the packet. 535 */ 536 memcpy((uint8_t *)pbuf->payload + netif->hwaddr_len, netif->hwaddr, 537 netif->hwaddr_len); 538 } 539 540 /* 541 * Return TRUE if the given additional number of spare tokens may be used, or 542 * FALSE if the limit has been reached. Each spare token represents one 543 * enqueued pbuf. The limit must be such that we do not impede normal traffic 544 * but also do not spend the entire buffer pool on enqueued packets. 545 */ 546 static int 547 ethif_can_spare(unsigned int spares) 548 { 549 unsigned int max; 550 551 /* 552 * Use the configured maximum, which depends on the current size of the 553 * buffer pool. 554 */ 555 max = ETHIF_PBUF_MAX_1; 556 557 /* 558 * However, limit the total to a value based on the maximum number of 559 * TCP packets that can, in the worst case, be expected to queue up at 560 * any single moment. 561 */ 562 if (max > ETHIF_PBUF_MAX_2) 563 max = ETHIF_PBUF_MAX_2; 564 565 return (spares + ethif_spares <= max - ETHIF_PBUF_MIN * NR_NDEV); 566 } 567 568 /* 569 * Process a packet as output on an ethernet interface. 570 */ 571 static err_t 572 ethif_output(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif) 573 { 574 struct ethif *ethif = (struct ethif *)ifdev; 575 struct pbuf *pref, *pcopy; 576 size_t padding; 577 unsigned int count, spares; 578 579 /* Packets must never be sent on behalf of another interface. */ 580 assert(netif == NULL); 581 582 /* 583 * The caller already rejects packets while the interface or link is 584 * down. We do want to keep enqueuing packets while the driver is 585 * restarting, so do not check ETHIFF_DISABLED or ETHIFF_FIRST_CONF. 586 */ 587 588 /* 589 * Reject oversized packets immediately. This should not happen. 590 * Undersized packets are padded below. 591 */ 592 if (pbuf->tot_len > NDEV_ETH_PACKET_MAX) { 593 printf("LWIP: attempt to send oversized ethernet packet " 594 "(size %u)\n", pbuf->tot_len); 595 util_stacktrace(); 596 597 return ERR_MEM; 598 } 599 600 /* 601 * The original lwIP idea for processing output packets is that we make 602 * a copy of the packet here, so that lwIP is free to do whatever it 603 * wants with the original packet (e.g., keep on the TCP retransmission 604 * queue). More recently, lwIP has made progress towards allowing the 605 * packet to be referenced only, decreasing the reference count only 606 * once the packet has been actually sent. For many embedded systems, 607 * that change now allows zero-copy transmission with direct DMA from 608 * the provided packet buffer. We are not so lucky: we have to make an 609 * additional inter-process copy anyway. We do however use the same 610 * referencing system to avoid having to make yet another copy of the 611 * packet here. 612 * 613 * There was previously a check on (pbuf->ref > 1) here, to ensure that 614 * we would never enqueue packets that are retransmitted while we were 615 * still in the process of sending the initial copy. Now that for ARP 616 * and NDP queuing, packets are referenced rather than copied (lwIP 617 * patch #9272), we can no longer perform that check: packets may 618 * legitimately have a reference count of 2 at this point. The second 619 * reference will be dropped by the caller immediately after we return. 620 */ 621 622 /* 623 * There are two cases in which we need to make a copy of the packet 624 * after all: 625 * 626 * 1) in the case that the packet needs to be padded in order to reach 627 * the minimum ethernet packet size (for drivers' convenience); 628 * 2) in the (much more exceptional) case that the given pbuf chain 629 * exceeds the maximum vector size for network driver requests. 630 */ 631 if (NDEV_ETH_PACKET_MIN > pbuf->tot_len) 632 padding = NDEV_ETH_PACKET_MIN - pbuf->tot_len; 633 else 634 padding = 0; 635 636 count = pbuf_clen(pbuf); 637 638 if (padding != 0 || count > NDEV_IOV_MAX) { 639 pcopy = pchain_alloc(PBUF_RAW, pbuf->tot_len + padding); 640 if (pcopy == NULL) { 641 ifdev_output_drop(ifdev); 642 643 return ERR_MEM; 644 } 645 646 if (pbuf_copy(pcopy, pbuf) != ERR_OK) 647 panic("unexpected pbuf copy failure"); 648 649 if (padding > 0) { 650 /* 651 * This restriction can be lifted if needed, but it 652 * involves hairy pbuf traversal and our standard pool 653 * size should be way in excess of the minimum packet 654 * size. 655 */ 656 assert(pcopy->len == pbuf->tot_len + padding); 657 658 memset((char *)pcopy->payload + pbuf->tot_len, 0, 659 padding); 660 } 661 662 count = pbuf_clen(pcopy); 663 assert(count <= NDEV_IOV_MAX); 664 665 pbuf = pcopy; 666 } else 667 pcopy = NULL; 668 669 /* 670 * Restrict the size of the send queue, so that it will not exhaust the 671 * buffer pool. 672 */ 673 if (ethif->ethif_snd.es_count >= ETHIF_PBUF_MIN) 674 spares = count; 675 else if (ethif->ethif_snd.es_count + count > ETHIF_PBUF_MIN) 676 spares = ethif->ethif_snd.es_count + count - ETHIF_PBUF_MIN; 677 else 678 spares = 0; 679 680 if (spares > 0 && !ethif_can_spare(spares)) { 681 if (pcopy != NULL) 682 pbuf_free(pcopy); 683 684 ifdev_output_drop(ifdev); 685 686 return ERR_MEM; 687 } 688 689 /* 690 * A side effect of the referencing approach is that we cannot touch 691 * the last pbuf's "next" pointer. Thus, we need another way of 692 * linking together the buffers on the send queue. We use a linked 693 * list of PBUF_REF-type buffers for this instead. However, do this 694 * only when we have not made a copy of the original pbuf, because then 695 * we might as well use the copy instead. 696 */ 697 if (pcopy == NULL) { 698 if ((pref = pbuf_alloc(PBUF_RAW, 0, PBUF_REF)) == NULL) { 699 ifdev_output_drop(ifdev); 700 701 return ERR_MEM; 702 } 703 704 pbuf_ref(pbuf); 705 706 pref->payload = pbuf; 707 pref->tot_len = 0; 708 pref->len = count; 709 } else 710 pref = pcopy; 711 712 /* If the send queue was empty so far, set the IFF_OACTIVE flag. */ 713 if (ethif->ethif_snd.es_head == NULL) 714 ifdev_update_ifflags(ðif->ethif_ifdev, 715 ifdev_get_ifflags(ðif->ethif_ifdev) | IFF_OACTIVE); 716 717 /* 718 * Enqueue the packet on the send queue. It will be sent from the 719 * polling function as soon as possible. TODO: see if sending it from 720 * here makes any performance difference at all. 721 */ 722 *ethif->ethif_snd.es_tailp = pref; 723 ethif->ethif_snd.es_tailp = pchain_end(pref); 724 725 ethif->ethif_snd.es_count += count; 726 ethif_spares += spares; 727 728 return ERR_OK; 729 } 730 731 /* 732 * Transmit an ethernet packet on an ethernet interface, as requested by lwIP. 733 */ 734 static err_t 735 ethif_linkoutput(struct netif * netif, struct pbuf * pbuf) 736 { 737 struct ifdev *ifdev = netif_get_ifdev(netif); 738 739 /* 740 * Let ifdev make the callback to our output function, so that it can 741 * pass the packet to BPF devices and generically update statistics. 742 */ 743 return ifdev_output(ifdev, pbuf, NULL /*netif*/, TRUE /*to_bpf*/, 744 TRUE /*hdrcmplt*/); 745 } 746 747 /* 748 * The multicast address list has changed. See to it that the change will make 749 * it to the network driver at some point. 750 */ 751 static err_t 752 ethif_set_mcast(struct ethif * ethif) 753 { 754 755 /* 756 * Simply generate a mode change request, unless the interface is down. 757 * Once the mode change request is about to be sent to the driver, we 758 * will recompute the multicast settings. 759 */ 760 if (ifdev_is_up(ðif->ethif_ifdev)) 761 ethif->ethif_wanted.nconf_set |= NDEV_SET_MODE; 762 763 return ERR_OK; 764 } 765 766 /* 767 * An IPv4 multicast address has been added to or removed from the list of IPv4 768 * multicast addresses. 769 */ 770 static err_t 771 ethif_set_mcast_v4(struct netif * netif, const ip4_addr_t * group __unused, 772 enum netif_mac_filter_action action __unused) 773 { 774 775 return ethif_set_mcast((struct ethif *)netif_get_ifdev(netif)); 776 } 777 778 /* 779 * An IPv6 multicast address has been added to or removed from the list of IPv6 780 * multicast addresses. 781 */ 782 static err_t 783 ethif_set_mcast_v6(struct netif * netif, const ip6_addr_t * group __unused, 784 enum netif_mac_filter_action action __unused) 785 { 786 787 return ethif_set_mcast((struct ethif *)netif_get_ifdev(netif)); 788 } 789 790 /* 791 * Initialization function for an ethernet-type netif interface, called from 792 * lwIP at interface creation time. 793 */ 794 static err_t 795 ethif_init_netif(struct ifdev * ifdev, struct netif * netif) 796 { 797 struct ethif *ethif = (struct ethif *)ifdev; 798 799 /* 800 * Fill in a dummy name. Since it is only two characters, do not 801 * bother trying to reuse part of the given name. If this name is ever 802 * actually used anywhere, the dummy should suffice for debugging. 803 */ 804 netif->name[0] = 'e'; 805 netif->name[1] = 'n'; 806 807 netif->linkoutput = ethif_linkoutput; 808 809 memset(netif->hwaddr, 0, sizeof(netif->hwaddr)); 810 811 /* 812 * Set the netif flags, partially based on the capabilities reported by 813 * the network device driver. The reason that we do this now is that 814 * lwIP tests for some of these flags and starts appropriate submodules 815 * (e.g., IGMP) right after returning from this function. If we set 816 * the flags later, we also have to take over management of those 817 * submodules, which is something we'd rather avoid. For this reason 818 * in particular, we also do not support capability mask changes after 819 * driver restarts - see ethif_enable(). 820 */ 821 netif->flags = NETIF_FLAG_ETHARP | NETIF_FLAG_ETHERNET; 822 823 if (ethif->ethif_caps & NDEV_CAP_BCAST) 824 netif->flags |= NETIF_FLAG_BROADCAST; 825 826 if (ethif->ethif_caps & NDEV_CAP_MCAST) { 827 /* The IGMP code adds the all-stations multicast entry. */ 828 netif->igmp_mac_filter = ethif_set_mcast_v4; 829 830 netif->flags |= NETIF_FLAG_IGMP; 831 832 /* For MLD6 we have to add the all-nodes entry ourselves. */ 833 netif->mld_mac_filter = ethif_set_mcast_v6; 834 835 netif->flags |= NETIF_FLAG_MLD6; 836 } 837 838 return ERR_OK; 839 } 840 841 /* 842 * The ndev layer reports that a new network device driver has appeared, with 843 * the given ndev identifier, a driver-given name, and a certain set of 844 * capabilities. Create a new ethernet interface object for it. On success, 845 * return a pointer to the object (for later callbacks from ndev). In that 846 * case, the ndev layer will always immediately call ethif_enable() afterwards. 847 * On failure, return NULL, in which case ndev will forget about the driver. 848 */ 849 struct ethif * 850 ethif_add(ndev_id_t id, const char * name, uint32_t caps) 851 { 852 struct ethif *ethif; 853 unsigned int ifflags; 854 int r; 855 856 /* 857 * First make sure that the interface name is valid, unique, and not 858 * reserved for virtual interface types. 859 */ 860 if ((r = ifdev_check_name(name, NULL /*vtype_slot*/)) != OK) { 861 /* 862 * There is some risk in printing bad stuff, but this may help 863 * in preventing serious driver writer frustration.. 864 */ 865 printf("LWIP: invalid driver name '%s' (%d)\n", name, r); 866 867 return NULL; 868 } 869 870 /* Then see if there is a free ethernet interface object available. */ 871 if (SIMPLEQ_EMPTY(ðif_freelist)) { 872 printf("LWIP: out of slots for driver name '%s'\n", name); 873 874 return NULL; 875 } 876 877 /* 878 * All good; set up the interface. First initialize the object, since 879 * adding the interface to lwIP might spawn some activity right away. 880 */ 881 ethif = SIMPLEQ_FIRST(ðif_freelist); 882 SIMPLEQ_REMOVE_HEAD(ðif_freelist, ethif_next); 883 884 /* Initialize the ethif structure. */ 885 memset(ethif, 0, sizeof(*ethif)); 886 ethif->ethif_ndev = id; 887 ethif->ethif_flags = ETHIFF_DISABLED; 888 ethif->ethif_caps = caps; 889 890 ethif->ethif_snd.es_head = NULL; 891 ethif->ethif_snd.es_unsentp = ðif->ethif_snd.es_head; 892 ethif->ethif_snd.es_tailp = ðif->ethif_snd.es_head; 893 ethif->ethif_snd.es_count = 0; 894 895 ethif->ethif_rcv.er_head = NULL; 896 ethif->ethif_rcv.er_tailp = ðif->ethif_rcv.er_head; 897 898 /* 899 * Set all the three configurations to the same initial values. Since 900 * any change to the configuration will go through all three, this 901 * allows us to obtain various parts of the status (in particular, the 902 * mode, flags, enabled capabilities, and media type selection) from 903 * any of the three without having to consult the others. Note that 904 * the hardware address is set to a indeterminate initial value, as it 905 * is left to the network driver unless specifically overridden. 906 */ 907 ethif->ethif_active.nconf_set = 0; 908 ethif->ethif_active.nconf_mode = NDEV_MODE_DOWN; 909 ethif->ethif_active.nconf_flags = 0; 910 ethif->ethif_active.nconf_caps = 0; 911 ethif->ethif_active.nconf_media = 912 IFM_MAKEWORD(IFM_ETHER, IFM_AUTO, 0, 0); 913 memcpy(ðif->ethif_pending, ðif->ethif_active, 914 sizeof(ethif->ethif_pending)); 915 memcpy(ðif->ethif_wanted, ðif->ethif_pending, 916 sizeof(ethif->ethif_wanted)); 917 918 /* 919 * Compute the initial NetBSD-style interface flags. The IFF_SIMPLEX 920 * interface flag is always enabled because we do not support network 921 * drivers that are receiving their own packets. In particular, lwIP 922 * currently does not deal well with receiving back its own multicast 923 * packets, which leads to IPv6 DAD failures. The other two flags 924 * (IFF_BROADCAST, IFF_MULTICAST) denote capabilities, not enabled 925 * receipt modes. 926 */ 927 ifflags = IFF_SIMPLEX; 928 if (caps & NDEV_CAP_BCAST) 929 ifflags |= IFF_BROADCAST; 930 if (caps & NDEV_CAP_MCAST) 931 ifflags |= IFF_MULTICAST; 932 933 /* Finally, add the interface to ifdev and lwIP. This cannot fail. */ 934 ifdev_add(ðif->ethif_ifdev, name, ifflags, IFT_ETHER, ETH_HDR_LEN, 935 ETHARP_HWADDR_LEN, DLT_EN10MB, ETHIF_DEF_MTU, 936 ND6_IFF_PERFORMNUD | ND6_IFF_AUTO_LINKLOCAL, ðif_ops); 937 938 return ethif; 939 } 940 941 /* 942 * The link status and/or media type of an ethernet interface has changed. 943 */ 944 static void 945 ethif_set_status(struct ethif * ethif, uint32_t link, uint32_t media) 946 { 947 unsigned int iflink; 948 949 /* We save the media type locally for now. */ 950 ethif->ethif_media = media; 951 952 /* Let the ifdev module handle the details of the link change. */ 953 switch (link) { 954 case NDEV_LINK_UP: iflink = LINK_STATE_UP; break; 955 case NDEV_LINK_DOWN: iflink = LINK_STATE_DOWN; break; 956 default: iflink = LINK_STATE_UNKNOWN; break; 957 } 958 959 ifdev_update_link(ðif->ethif_ifdev, iflink); 960 } 961 962 /* 963 * The ndev layer reports that a previously added or disabled network device 964 * driver has been (re)enabled. Start by initializing the driver. Return TRUE 965 * if the interface could indeed be enabled, or FALSE if it should be forgotten 966 * altogether after all. 967 */ 968 int 969 ethif_enable(struct ethif * ethif, const char * name, 970 const struct ndev_hwaddr * hwaddr, uint8_t hwaddr_len, uint32_t caps, 971 uint32_t link, uint32_t media) 972 { 973 int r; 974 975 assert(ethif->ethif_flags & ETHIFF_DISABLED); 976 977 /* 978 * One disadvantage of keeping service labels and ethernet driver names 979 * disjunct is that the ethernet driver may mess with its name between 980 * restarts. Ultimately we may end up renaming our ethernet drivers 981 * such that their labels match their names, in which case we no longer 982 * need the drivers themselves to produce a name, and we can retire 983 * this check. 984 */ 985 if (name != NULL && strcmp(ethif_get_name(ethif), name)) { 986 printf("LWIP: driver '%s' restarted with name '%s'\n", 987 ethif_get_name(ethif), name); 988 989 return FALSE; 990 } 991 992 /* 993 * The hardware address length is just a sanity check for now. After 994 * the initialization reply, we assume the same length is used for all 995 * addresses, which is also the maximum, namely 48 bits (six bytes). 996 */ 997 if (hwaddr_len != ETHARP_HWADDR_LEN) { 998 printf("LWIP: driver '%s' reports hwaddr length %u\n", 999 ethif_get_name(ethif), hwaddr_len); 1000 1001 return FALSE; 1002 } 1003 1004 /* 1005 * If the driver has changed its available capabilities as a result of 1006 * a restart, we have a problem: we may already have configured the 1007 * interface's netif object to make use of of some of those 1008 * capabilities. TODO: we can deal with some cases (e.g., disappearing 1009 * checksum offloading capabilities) with some effort, and with other 1010 * cases (e.g., disappearing multicast support) with a LOT more effort. 1011 */ 1012 if (ethif->ethif_caps != caps) { 1013 printf("LWIP: driver '%s' changed capabilities\n", 1014 ethif_get_name(ethif)); 1015 1016 return FALSE; 1017 } 1018 1019 /* 1020 * Set the hardware address on the interface, unless a request is 1021 * currently pending to change it, in which case the new address has 1022 * been set already and we do not want to revert that change. If not, 1023 * we always set the address, because it may have changed as part of a 1024 * driver restart and we do not want to get out of sync with it, nor 1025 * can we necessarily change it back. 1026 */ 1027 if (!(ethif->ethif_active.nconf_set & NDEV_SET_HWADDR) && 1028 !(ethif->ethif_pending.nconf_set & NDEV_SET_HWADDR)) 1029 ifdev_update_hwaddr(ðif->ethif_ifdev, hwaddr->nhwa_addr, 1030 (name == NULL) /*is_factory*/); 1031 1032 /* 1033 * At this point, only one more thing can fail: it is possible that we 1034 * do not manage to send the first configuration request due to memory 1035 * shortage. This is extremely unlikely to happen, so send the conf 1036 * request first and forget the entire driver if it fails. 1037 */ 1038 /* 1039 * Always generate a new multicast list before sending a configuration 1040 * request, and at no other time (since there may be a grant for it). 1041 */ 1042 ethif_gen_mcast(ethif, ðif->ethif_active); 1043 1044 if ((r = ndev_conf(ethif->ethif_ndev, ðif->ethif_active)) != OK) { 1045 printf("LWIP: sending first configuration to '%s' failed " 1046 "(%d)\n", ethif_get_name(ethif), r); 1047 1048 return FALSE; 1049 } 1050 1051 ethif_set_status(ethif, link, media); 1052 1053 ethif->ethif_flags &= ~ETHIFF_DISABLED; 1054 ethif->ethif_flags |= ETHIFF_FIRST_CONF; 1055 1056 return TRUE; 1057 } 1058 1059 /* 1060 * The configuration change stored in the "pending" slot of the given ethif 1061 * object has been acknowledged by the network device driver (or the driver has 1062 * died, see ethif_disable()). Apply changes to the "active" slot of the given 1063 * ethif object, as well as previously delayed changes to lwIP through netif. 1064 */ 1065 static void 1066 ethif_post_conf(struct ethif * ethif) 1067 { 1068 struct ndev_conf *nconf; 1069 unsigned int flags; 1070 1071 nconf = ðif->ethif_pending; 1072 1073 /* 1074 * Now that the driver configuration has changed, we know that the 1075 * new checksum settings will be applied to all sent and received 1076 * packets, and we can disable checksumming flags in netif as desired. 1077 * Enabling checksumming flags has already been done earlier on. 1078 */ 1079 if (nconf->nconf_set & NDEV_SET_CAPS) { 1080 flags = ethif_get_netif(ethif)->chksum_flags; 1081 1082 if (nconf->nconf_caps & NDEV_CAP_CS_IP4_TX) 1083 flags &= ~NETIF_CHECKSUM_GEN_IP; 1084 if (nconf->nconf_caps & NDEV_CAP_CS_IP4_RX) 1085 flags &= ~NETIF_CHECKSUM_CHECK_IP; 1086 if (nconf->nconf_caps & NDEV_CAP_CS_UDP_TX) 1087 flags &= ~NETIF_CHECKSUM_GEN_UDP; 1088 if (nconf->nconf_caps & NDEV_CAP_CS_UDP_RX) 1089 flags &= ~NETIF_CHECKSUM_CHECK_UDP; 1090 if (nconf->nconf_caps & NDEV_CAP_CS_TCP_TX) 1091 flags &= ~NETIF_CHECKSUM_GEN_TCP; 1092 if (nconf->nconf_caps & NDEV_CAP_CS_TCP_RX) 1093 flags &= ~NETIF_CHECKSUM_CHECK_TCP; 1094 1095 NETIF_SET_CHECKSUM_CTRL(ethif_get_netif(ethif), flags); 1096 } 1097 1098 /* 1099 * Merge any individual parts of the now acknowledged configuration 1100 * changes into the active configuration. The result is that we are 1101 * able to reapply these changes at any time should the network driver 1102 * be restarted. In addition, by only setting bits for fields that 1103 * have actually changed, we can later tell whether the user wanted the 1104 * change or ethif should just take over what the driver reports after 1105 * a restart; this is important for HW-address and media settings. 1106 */ 1107 ethif_merge_conf(ðif->ethif_active, ðif->ethif_pending); 1108 } 1109 1110 /* 1111 * All receive requests have been canceled at the ndev layer, because the 1112 * network device driver has been restarted or shut down. Clear the receive 1113 * queue, freeing any packets in it. 1114 */ 1115 static void 1116 ethif_drain(struct ethif * ethif) 1117 { 1118 struct pbuf *pbuf, **pnext; 1119 1120 while ((pbuf = ethif->ethif_rcv.er_head) != NULL) { 1121 pnext = pchain_end(pbuf); 1122 1123 if ((ethif->ethif_rcv.er_head = *pnext) == NULL) 1124 ethif->ethif_rcv.er_tailp = ðif->ethif_rcv.er_head; 1125 1126 *pnext = NULL; 1127 pbuf_free(pbuf); 1128 } 1129 } 1130 1131 /* 1132 * The network device driver has stopped working (i.e., crashed), but has not 1133 * been shut down completely, and is expect to come back later. 1134 */ 1135 void 1136 ethif_disable(struct ethif * ethif) 1137 { 1138 1139 /* 1140 * We assume, optimistically, that a new instance of the driver will be 1141 * brought up soon after which we can continue operating as before. As 1142 * such, we do not want to change most of the user-visible state until 1143 * we know for sure that our optimism was in vain. In particular, we 1144 * do *not* want to change the following parts of the state here: 1145 * 1146 * - the contents of the send queue; 1147 * - the state of the interface (up or down); 1148 * - the state and media type of the physical link. 1149 * 1150 * The main user-visible indication of the crash will be that the 1151 * interface does not have the IFF_RUNNING flag set. 1152 */ 1153 1154 /* 1155 * If a configuration request was pending, it will be lost now. Highly 1156 * unintuitively, make the requested configuration the *active* one, 1157 * just as though the request completed successfully. This works, 1158 * because once the driver comes back, the active configuration will be 1159 * replayed as initial configuration. Therefore, by pretending that 1160 * the current request went through, we ensure that it too will be sent 1161 * to the new instance--before anything else is allowed to happen. 1162 */ 1163 if (ethif->ethif_pending.nconf_set != 0) 1164 ethif_post_conf(ethif); 1165 1166 /* 1167 * Any packet send requests have been lost, too, and likewise forgotten 1168 * by ndev. Thus, we need to forget that we sent any packets, so that 1169 * they will be resent after the driver comes back up. That *may* 1170 * cause packet duplication, but that is preferable over packet loss. 1171 */ 1172 ethif->ethif_snd.es_unsentp = ðif->ethif_snd.es_head; 1173 1174 /* 1175 * We fully restart the receive queue, because all receive requests 1176 * have been forgotten by ndev as well now and it is easier to simply 1177 * reconstruct the receive queue in its entirety later on. 1178 */ 1179 ethif_drain(ethif); 1180 1181 /* Make sure we do not attempt to initiate new requests for now. */ 1182 ethif->ethif_flags &= ~ETHIFF_FIRST_CONF; 1183 ethif->ethif_flags |= ETHIFF_DISABLED; 1184 } 1185 1186 /* 1187 * Dequeue and discard the packet at the head of the send queue. 1188 */ 1189 static void 1190 ethif_dequeue_send(struct ethif * ethif) 1191 { 1192 struct pbuf *pref, *pbuf, **pnext; 1193 unsigned int count, spares; 1194 1195 /* 1196 * The send queue is a linked list of reference buffers, each of which 1197 * links to the actual packet. Dequeue the first reference buffer. 1198 */ 1199 pref = ethif->ethif_snd.es_head; 1200 assert(pref != NULL); 1201 1202 pnext = pchain_end(pref); 1203 1204 if (ethif->ethif_snd.es_unsentp == pnext) 1205 ethif->ethif_snd.es_unsentp = ðif->ethif_snd.es_head; 1206 1207 if ((ethif->ethif_snd.es_head = *pnext) == NULL) 1208 ethif->ethif_snd.es_tailp = ðif->ethif_snd.es_head; 1209 1210 /* Do this before possibly calling pbuf_clen() below.. */ 1211 *pnext = NULL; 1212 1213 /* 1214 * If we never made a copy of the original packet, we now have it 1215 * pointed to by a reference buffer. If so, decrease the reference 1216 * count of the actual packet, thereby freeing it if lwIP itself was 1217 * already done with. Otherwise, the copy of the packet is the 1218 * reference buffer itself. In both cases we need to free that buffer. 1219 */ 1220 if (pref->type == PBUF_REF) { 1221 pbuf = (struct pbuf *)pref->payload; 1222 1223 pbuf_free(pbuf); 1224 1225 count = pref->len; 1226 } else 1227 count = pbuf_clen(pref); 1228 1229 assert(count > 0); 1230 assert(ethif->ethif_snd.es_count >= count); 1231 ethif->ethif_snd.es_count -= count; 1232 1233 if (ethif->ethif_snd.es_count >= ETHIF_PBUF_MIN) 1234 spares = count; 1235 else if (ethif->ethif_snd.es_count + count > ETHIF_PBUF_MIN) 1236 spares = ethif->ethif_snd.es_count + count - ETHIF_PBUF_MIN; 1237 else 1238 spares = 0; 1239 1240 assert(ethif_spares >= spares); 1241 ethif_spares -= spares; 1242 1243 /* Free the reference buffer as well. */ 1244 pbuf_free(pref); 1245 1246 /* If the send queue is now empty, clear the IFF_OACTIVE flag. */ 1247 if (ethif->ethif_snd.es_head == NULL) 1248 ifdev_update_ifflags(ðif->ethif_ifdev, 1249 ifdev_get_ifflags(ðif->ethif_ifdev) & ~IFF_OACTIVE); 1250 } 1251 1252 /* 1253 * The ndev layer reports that a network device driver has been permanently 1254 * shut down. Remove the corresponding ethernet interface from the system. 1255 */ 1256 void 1257 ethif_remove(struct ethif * ethif) 1258 { 1259 int r; 1260 1261 /* Clear the send and receive queues. */ 1262 while (ethif->ethif_snd.es_head != NULL) 1263 ethif_dequeue_send(ethif); 1264 1265 ethif_drain(ethif); 1266 1267 /* Let the ifdev module deal with most other removal aspects. */ 1268 if ((r = ifdev_remove(ðif->ethif_ifdev)) != OK) 1269 panic("unable to remove ethernet interface: %d", r); 1270 1271 /* Finally, readd the ethif object to the free list. */ 1272 SIMPLEQ_INSERT_HEAD(ðif_freelist, ethif, ethif_next); 1273 } 1274 1275 /* 1276 * The ndev layer reports that the (oldest) pending configuration request has 1277 * completed with the given result. 1278 */ 1279 void 1280 ethif_configured(struct ethif * ethif, int32_t result) 1281 { 1282 1283 /* 1284 * The driver is not supposed to return failure in response to a 1285 * configure result. If it does, we have no proper way to recover, as 1286 * we may already have applied part of the new configuration to netif. 1287 * For now, just report failure and then pretend success. 1288 */ 1289 if (result < 0) { 1290 printf("LWIP: driver '%s' replied with conf result %d\n", 1291 ethif_get_name(ethif), result); 1292 1293 result = 0; 1294 } 1295 1296 if (ethif->ethif_flags & ETHIFF_FIRST_CONF) 1297 ethif->ethif_flags &= ~ETHIFF_FIRST_CONF; 1298 else 1299 ethif_post_conf(ethif); 1300 1301 /* 1302 * For now, the result is simply a boolean value indicating whether the 1303 * driver is using the all-multicast receive mode instead of the 1304 * multicast-list receive mode. We can turn it into a bitmap later. 1305 */ 1306 if (result != 0) { 1307 ethif->ethif_active.nconf_mode &= ~NDEV_MODE_MCAST_LIST; 1308 ethif->ethif_active.nconf_mode |= NDEV_MODE_MCAST_ALL; 1309 } 1310 1311 /* The interface flags may have changed now, so update them. */ 1312 ethif_update_ifflags(ethif); 1313 1314 /* Regular operation will resume from the polling function. */ 1315 } 1316 1317 /* 1318 * The ndev layer reports that the first packet on the send queue has been 1319 * successfully transmitted with 'result' set to OK, or dropped if 'result' is 1320 * negative. The latter may happen if the interface was taken down while there 1321 * were still packets in transit. 1322 */ 1323 void 1324 ethif_sent(struct ethif * ethif, int32_t result) 1325 { 1326 1327 ethif_dequeue_send(ethif); 1328 1329 if (result < 0) 1330 ifdev_output_drop(ðif->ethif_ifdev); 1331 1332 /* More requests may be sent from the polling function now. */ 1333 } 1334 1335 /* 1336 * The ndev layer reports that the first buffer on the receive queue has been 1337 * filled with a packet of 'result' bytes, or if 'result' is negative, the 1338 * receive request has been aborted. 1339 */ 1340 void 1341 ethif_received(struct ethif * ethif, int32_t result) 1342 { 1343 struct pbuf *pbuf, *pwalk, **pnext; 1344 size_t left; 1345 1346 /* 1347 * Start by removing the first buffer chain off the receive queue. The 1348 * ndev layer guarantees that there ever was a receive request at all. 1349 */ 1350 if ((pbuf = ethif->ethif_rcv.er_head) == NULL) 1351 panic("driver received packet but queue empty"); 1352 1353 pnext = pchain_end(pbuf); 1354 1355 if ((ethif->ethif_rcv.er_head = *pnext) == NULL) 1356 ethif->ethif_rcv.er_tailp = ðif->ethif_rcv.er_head; 1357 *pnext = NULL; 1358 1359 /* Decide if we can and should deliver a packet to the layers above. */ 1360 if (result <= 0 || !ethif_can_recv(ethif)) { 1361 pbuf_free(pbuf); 1362 1363 return; 1364 } 1365 1366 if (result > pbuf->tot_len) { 1367 printf("LWIP: driver '%s' returned bad packet size (%zd)\n", 1368 ethif_get_name(ethif), (ssize_t)result); 1369 1370 pbuf_free(pbuf); 1371 1372 return; 1373 } 1374 1375 /* 1376 * The packet often does not use all of the buffers, or at least not 1377 * all of the last buffer. Adjust lengths for the buffers that contain 1378 * part of the packet, and free the remaining (unused) buffers, if any. 1379 */ 1380 left = (size_t)result; 1381 1382 for (pwalk = pbuf; ; pwalk = pwalk->next) { 1383 pwalk->tot_len = left; 1384 if (pwalk->len > left) 1385 pwalk->len = left; 1386 left -= pwalk->len; 1387 if (left == 0) 1388 break; 1389 } 1390 1391 if (pwalk->next != NULL) { 1392 pbuf_free(pwalk->next); 1393 1394 pwalk->next = NULL; 1395 } 1396 1397 /* 1398 * Finally, hand off the packet to the layers above. We go through 1399 * ifdev so that it can pass the packet to BPF devices and update 1400 * statistics and all that. 1401 */ 1402 ifdev_input(ðif->ethif_ifdev, pbuf, NULL /*netif*/, 1403 TRUE /*to_bpf*/); 1404 } 1405 1406 /* 1407 * The ndev layer reports a network driver status update. If anything has 1408 * changed since the last status, we may have to take action. The given 1409 * statistics counters are relative to the previous status report. 1410 */ 1411 void 1412 ethif_status(struct ethif * ethif, uint32_t link, uint32_t media, 1413 uint32_t oerror, uint32_t coll, uint32_t ierror, uint32_t iqdrop) 1414 { 1415 struct if_data *ifdata; 1416 1417 ethif_set_status(ethif, link, media); 1418 1419 ifdata = ifdev_get_ifdata(ðif->ethif_ifdev); 1420 ifdata->ifi_oerrors += oerror; 1421 ifdata->ifi_collisions += coll; 1422 ifdata->ifi_ierrors += ierror; 1423 ifdata->ifi_iqdrops += iqdrop; 1424 } 1425 1426 /* 1427 * Set NetBSD-style interface flags (IFF_) for an ethernet interface. 1428 */ 1429 static int 1430 ethif_set_ifflags(struct ifdev * ifdev, unsigned int ifflags) 1431 { 1432 struct ethif *ethif = (struct ethif *)ifdev; 1433 uint32_t mode, flags; 1434 1435 /* 1436 * We do not support IFF_NOARP at this time, because lwIP does not: the 1437 * idea of IFF_NOARP is that only static ARP entries are used, but lwIP 1438 * does not support separating static from dynamic ARP operation. The 1439 * flag does not appear to be particularly widely used anyway. 1440 */ 1441 if ((ifflags & ~(IFF_UP | IFF_DEBUG | IFF_LINK0 | IFF_LINK1 | 1442 IFF_LINK2)) != 0) 1443 return EINVAL; 1444 1445 mode = ethif->ethif_wanted.nconf_mode; 1446 if ((ifflags & IFF_UP) && mode == NDEV_MODE_DOWN) { 1447 mode = NDEV_MODE_UP; 1448 1449 /* Always enable broadcast receipt when supported. */ 1450 if (ethif->ethif_caps & NDEV_CAP_BCAST) 1451 mode |= NDEV_MODE_BCAST; 1452 1453 if (ifdev_is_promisc(ifdev)) 1454 mode |= NDEV_MODE_PROMISC; 1455 1456 /* 1457 * The multicast flags will be set right before we send the 1458 * request to the driver. 1459 */ 1460 } else if (!(ifflags & IFF_UP) && mode != NDEV_MODE_DOWN) 1461 ethif->ethif_wanted.nconf_mode = NDEV_MODE_DOWN; 1462 1463 if (mode != ethif->ethif_wanted.nconf_mode) { 1464 ethif->ethif_wanted.nconf_mode = mode; 1465 ethif->ethif_wanted.nconf_set |= NDEV_SET_MODE; 1466 } 1467 1468 /* 1469 * Some of the interface flags (UP, DEBUG, PROMISC, LINK[0-2]) are a 1470 * reflection of the intended state as set by userland before, so that 1471 * a userland utility will never not see the flag it just set (or the 1472 * other way around). These flags therefore do not necessarily reflect 1473 * what is actually going on at that moment. We cannot have both. 1474 */ 1475 flags = 0; 1476 if (ifflags & IFF_DEBUG) 1477 flags |= NDEV_FLAG_DEBUG; 1478 if (ifflags & IFF_LINK0) 1479 flags |= NDEV_FLAG_LINK0; 1480 if (ifflags & IFF_LINK1) 1481 flags |= NDEV_FLAG_LINK1; 1482 if (ifflags & IFF_LINK2) 1483 flags |= NDEV_FLAG_LINK2; 1484 1485 if (flags != ethif->ethif_wanted.nconf_flags) { 1486 ethif->ethif_wanted.nconf_flags = flags; 1487 ethif->ethif_wanted.nconf_set |= NDEV_SET_FLAGS; 1488 } 1489 1490 /* The changes will be picked up from the polling function. */ 1491 return OK; 1492 } 1493 1494 /* 1495 * Convert a bitmask of ndev-layer capabilities (NDEV_CAP_) to NetBSD-style 1496 * interface capabilities (IFCAP_). 1497 */ 1498 static uint64_t 1499 ethif_cap_to_ifcap(uint32_t caps) 1500 { 1501 uint64_t ifcap; 1502 1503 ifcap = 0; 1504 if (caps & NDEV_CAP_CS_IP4_TX) 1505 ifcap |= IFCAP_CSUM_IPv4_Tx; 1506 if (caps & NDEV_CAP_CS_IP4_RX) 1507 ifcap |= IFCAP_CSUM_IPv4_Rx; 1508 if (caps & NDEV_CAP_CS_UDP_TX) 1509 ifcap |= IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv6_Tx; 1510 if (caps & NDEV_CAP_CS_UDP_RX) 1511 ifcap |= IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv6_Rx; 1512 if (caps & NDEV_CAP_CS_TCP_TX) 1513 ifcap |= IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv6_Tx; 1514 if (caps & NDEV_CAP_CS_TCP_RX) 1515 ifcap |= IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv6_Rx; 1516 1517 return ifcap; 1518 } 1519 1520 /* 1521 * Retrieve potential and enabled NetBSD-style interface capabilities (IFCAP_). 1522 */ 1523 static void 1524 ethif_get_ifcap(struct ifdev * ifdev, uint64_t * ifcap, uint64_t * ifena) 1525 { 1526 struct ethif *ethif = (struct ethif *)ifdev; 1527 1528 *ifcap = ethif_cap_to_ifcap(ethif->ethif_caps); 1529 *ifena = ethif_cap_to_ifcap(ethif->ethif_wanted.nconf_caps); 1530 } 1531 1532 /* 1533 * Set NetBSD-style enabled interface capabilities (IFCAP_). 1534 */ 1535 static int 1536 ethif_set_ifcap(struct ifdev * ifdev, uint64_t ifcap) 1537 { 1538 struct ethif *ethif = (struct ethif *)ifdev; 1539 unsigned int flags; 1540 uint32_t caps; 1541 1542 if (ifcap & ~(IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx | 1543 IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv6_Tx | 1544 IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv6_Rx | 1545 IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv6_Tx | 1546 IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv6_Rx)) 1547 return EINVAL; 1548 1549 /* 1550 * Some IPv4/IPv6 flags need to be set together in order to be picked 1551 * up. Unfortunately, that is all we can do given that lwIP does not 1552 * distinguish IPv4/IPv6 when it comes to TCP/UDP checksum flags. 1553 */ 1554 caps = 0; 1555 if (ifcap & IFCAP_CSUM_IPv4_Tx) 1556 caps |= NDEV_CAP_CS_IP4_TX; 1557 if (ifcap & IFCAP_CSUM_IPv4_Rx) 1558 caps |= NDEV_CAP_CS_IP4_RX; 1559 if ((ifcap & (IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv6_Tx)) == 1560 (IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv6_Tx)) 1561 caps |= NDEV_CAP_CS_UDP_TX; 1562 if ((ifcap & (IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv6_Rx)) == 1563 (IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv6_Rx)) 1564 caps |= NDEV_CAP_CS_UDP_RX; 1565 if ((ifcap & (IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv6_Tx)) == 1566 (IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv6_Tx)) 1567 caps |= NDEV_CAP_CS_TCP_TX; 1568 if ((ifcap & (IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv6_Rx)) == 1569 (IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv6_Rx)) 1570 caps |= NDEV_CAP_CS_TCP_RX; 1571 1572 /* 1573 * When changing checksumming capabilities, we have to make sure that 1574 * we only ever checksum too much and never too little. This means 1575 * that we enable any checksum options in netif here, and disable any 1576 * checksum options in netif only after driver configuration. 1577 * 1578 * Note that we have to draw the line somewhere with this kind of 1579 * self-protection, and that line is short of TCP retransmission: we 1580 * see it as lwIP's job to compute checksums for retransmitted TCP 1581 * packets if they were saved across checksum changes. Even though 1582 * lwIP may not care, there is little we can do about that anyway. 1583 */ 1584 if (ethif->ethif_wanted.nconf_caps != caps) { 1585 flags = ethif_get_netif(ethif)->chksum_flags; 1586 1587 if (!(caps & NDEV_CAP_CS_IP4_TX)) 1588 flags |= NETIF_CHECKSUM_GEN_IP; 1589 if (!(caps & NDEV_CAP_CS_IP4_RX)) 1590 flags |= NETIF_CHECKSUM_CHECK_IP; 1591 if (!(caps & NDEV_CAP_CS_UDP_TX)) 1592 flags |= NETIF_CHECKSUM_GEN_UDP; 1593 if (!(caps & NDEV_CAP_CS_UDP_RX)) 1594 flags |= NETIF_CHECKSUM_CHECK_UDP; 1595 if (!(caps & NDEV_CAP_CS_TCP_TX)) 1596 flags |= NETIF_CHECKSUM_GEN_TCP; 1597 if (!(caps & NDEV_CAP_CS_TCP_RX)) 1598 flags |= NETIF_CHECKSUM_CHECK_TCP; 1599 1600 NETIF_SET_CHECKSUM_CTRL(ethif_get_netif(ethif), flags); 1601 1602 ethif->ethif_wanted.nconf_caps = caps; 1603 ethif->ethif_wanted.nconf_set |= NDEV_SET_CAPS; 1604 } 1605 1606 /* The changes will be picked up from the polling function. */ 1607 return OK; 1608 } 1609 1610 /* 1611 * Retrieve NetBSD-style interface media type (IFM_). Return both the current 1612 * media type selection and the driver-reported active media type. 1613 */ 1614 static void 1615 ethif_get_ifmedia(struct ifdev * ifdev, int * ifcurrent, int * ifactive) 1616 { 1617 struct ethif *ethif = (struct ethif *)ifdev; 1618 1619 /* 1620 * For the current select, report back whatever the user gave us, even 1621 * if it has not reached the driver at all yet. 1622 */ 1623 *ifcurrent = (int)ethif->ethif_wanted.nconf_media; 1624 *ifactive = (int)ethif->ethif_media; 1625 } 1626 1627 /* 1628 * Set current NetBSD-style interface media type (IFM_). 1629 */ 1630 static int 1631 ethif_set_ifmedia(struct ifdev * ifdev, int ifmedia) 1632 { 1633 struct ethif *ethif = (struct ethif *)ifdev; 1634 1635 /* 1636 * We currently completely lack the infrastructure to suspend the 1637 * current IOCTL call until the driver replies (or disappears). 1638 * Therefore we have no choice but to return success here, even if the 1639 * driver cannot accept the change. The driver does notify us of media 1640 * changes, so the user may observe the new active media type later. 1641 * Also note that the new media type may not be the requested type, 1642 * which is why we do not perform any checks against the wanted or 1643 * active media types. 1644 */ 1645 ethif->ethif_wanted.nconf_media = (uint32_t)ifmedia; 1646 ethif->ethif_wanted.nconf_set |= NDEV_SET_MEDIA; 1647 1648 /* The change will be picked up from the polling function. */ 1649 return OK; 1650 } 1651 1652 /* 1653 * Enable or disable promiscuous mode on the interface. 1654 */ 1655 static void 1656 ethif_set_promisc(struct ifdev * ifdev, int promisc) 1657 { 1658 struct ethif *ethif = (struct ethif *)ifdev; 1659 1660 if (ethif->ethif_wanted.nconf_mode != NDEV_MODE_DOWN) { 1661 if (promisc) 1662 ethif->ethif_wanted.nconf_mode |= NDEV_MODE_PROMISC; 1663 else 1664 ethif->ethif_wanted.nconf_mode &= ~NDEV_MODE_PROMISC; 1665 ethif->ethif_wanted.nconf_set |= NDEV_SET_MODE; 1666 } 1667 1668 /* The change will be picked up from the polling function. */ 1669 } 1670 1671 /* 1672 * Set the hardware address on the interface. 1673 */ 1674 static int 1675 ethif_set_hwaddr(struct ifdev * ifdev, const uint8_t * hwaddr) 1676 { 1677 struct ethif *ethif = (struct ethif *)ifdev; 1678 1679 if (!(ethif->ethif_caps & NDEV_CAP_HWADDR)) 1680 return EINVAL; 1681 1682 memcpy(ðif->ethif_wanted.nconf_hwaddr.nhwa_addr, hwaddr, 1683 ETHARP_HWADDR_LEN); 1684 ethif->ethif_wanted.nconf_set |= NDEV_SET_HWADDR; 1685 1686 /* The change will be picked up from the polling function. */ 1687 return OK; 1688 } 1689 1690 /* 1691 * Set the Maximum Transmission Unit for this interface. Return TRUE if the 1692 * new value is acceptable, in which case the caller will do the rest. Return 1693 * FALSE otherwise. 1694 */ 1695 static int 1696 ethif_set_mtu(struct ifdev * ifdev __unused, unsigned int mtu) 1697 { 1698 1699 return (mtu <= ETHIF_MAX_MTU); 1700 } 1701 1702 static const struct ifdev_ops ethif_ops = { 1703 .iop_init = ethif_init_netif, 1704 .iop_input = netif_input, 1705 .iop_output = ethif_output, 1706 .iop_output_v4 = etharp_output, 1707 .iop_output_v6 = ethip6_output, 1708 .iop_hdrcmplt = ethif_hdrcmplt, 1709 .iop_poll = ethif_poll, 1710 .iop_set_ifflags = ethif_set_ifflags, 1711 .iop_get_ifcap = ethif_get_ifcap, 1712 .iop_set_ifcap = ethif_set_ifcap, 1713 .iop_get_ifmedia = ethif_get_ifmedia, 1714 .iop_set_ifmedia = ethif_set_ifmedia, 1715 .iop_set_promisc = ethif_set_promisc, 1716 .iop_set_hwaddr = ethif_set_hwaddr, 1717 .iop_set_mtu = ethif_set_mtu, 1718 }; 1719