1 /* 2 * Copyright (c) 2003, 2004 Matthew Dillon. All rights reserved. 3 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 4 * Copyright (c) 2003 Jonathan Lemon. All rights reserved. 5 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Jonathan Lemon, Jeffrey M. Hsu, and Matthew Dillon. 9 * 10 * Jonathan Lemon gave Jeffrey Hsu permission to combine his copyright 11 * into this one around July 8 2004. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of The DragonFly Project nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific, prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 35 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/msgport.h> 44 #include <sys/proc.h> 45 #include <sys/interrupt.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/socketvar.h> 49 #include <net/if.h> 50 #include <net/if_var.h> 51 #include <net/netisr2.h> 52 #include <machine/cpufunc.h> 53 #include <machine/smp.h> 54 55 #include <sys/thread2.h> 56 #include <sys/msgport2.h> 57 #include <net/netmsg2.h> 58 #include <sys/mplock2.h> 59 60 #include <vm/vm_extern.h> 61 62 static void netmsg_service_port_init(lwkt_port_t); 63 static void netmsg_service_loop(void *arg); 64 static void netisr_hashfn0(struct mbuf **mp, int hoff); 65 static void netisr_nohashck(struct mbuf *, const struct pktinfo *); 66 67 struct netmsg_port_registration { 68 TAILQ_ENTRY(netmsg_port_registration) npr_entry; 69 lwkt_port_t npr_port; 70 }; 71 72 struct netisr_rollup { 73 TAILQ_ENTRY(netisr_rollup) ru_entry; 74 netisr_ru_t ru_func; 75 int ru_prio; 76 void *ru_key; 77 }; 78 79 struct netmsg_rollup { 80 struct netmsg_base base; 81 netisr_ru_t func; 82 int prio; 83 void *key; 84 }; 85 86 struct netmsg_barrier { 87 struct netmsg_base base; 88 volatile cpumask_t *br_cpumask; 89 volatile uint32_t br_done; 90 }; 91 92 #define NETISR_BR_NOTDONE 0x1 93 #define NETISR_BR_WAITDONE 0x80000000 94 95 struct netisr_barrier { 96 struct netmsg_barrier *br_msgs[MAXCPU]; 97 int br_isset; 98 }; 99 100 struct netisr_data { 101 struct thread thread; 102 #ifdef INVARIANTS 103 void *netlastfunc; 104 #endif 105 TAILQ_HEAD(, netisr_rollup) netrulist; 106 }; 107 108 static struct netisr_data *netisr_data[MAXCPU]; 109 110 static struct netisr netisrs[NETISR_MAX]; 111 static TAILQ_HEAD(,netmsg_port_registration) netreglist; 112 113 /* Per-CPU thread to handle any protocol. */ 114 struct thread *netisr_threads[MAXCPU]; 115 116 lwkt_port netisr_afree_rport; 117 lwkt_port netisr_afree_free_so_rport; 118 lwkt_port netisr_adone_rport; 119 lwkt_port netisr_apanic_rport; 120 lwkt_port netisr_sync_port; 121 122 static int (*netmsg_fwd_port_fn)(lwkt_port_t, lwkt_msg_t); 123 124 SYSCTL_NODE(_net, OID_AUTO, netisr, CTLFLAG_RW, 0, "netisr"); 125 126 static int netisr_rollup_limit = 32; 127 SYSCTL_INT(_net_netisr, OID_AUTO, rollup_limit, CTLFLAG_RW, 128 &netisr_rollup_limit, 0, "Message to process before rollup"); 129 130 int netisr_ncpus; 131 TUNABLE_INT("net.netisr.ncpus", &netisr_ncpus); 132 SYSCTL_INT(_net_netisr, OID_AUTO, ncpus, CTLFLAG_RD, 133 &netisr_ncpus, 0, "# of CPUs to handle network messages"); 134 135 /* 136 * netisr_afree_rport replymsg function, only used to handle async 137 * messages which the sender has abandoned to their fate. 138 */ 139 static void 140 netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) 141 { 142 kfree(msg, M_LWKTMSG); 143 } 144 145 static void 146 netisr_autofree_free_so_reply(lwkt_port_t port, lwkt_msg_t msg) 147 { 148 sofree(((netmsg_t)msg)->base.nm_so); 149 kfree(msg, M_LWKTMSG); 150 } 151 152 /* 153 * We need a custom putport function to handle the case where the 154 * message target is the current thread's message port. This case 155 * can occur when the TCP or UDP stack does a direct callback to NFS and NFS 156 * then turns around and executes a network operation synchronously. 157 * 158 * To prevent deadlocking, we must execute these self-referential messages 159 * synchronously, effectively turning the message into a glorified direct 160 * procedure call back into the protocol stack. The operation must be 161 * complete on return or we will deadlock, so panic if it isn't. 162 * 163 * However, the target function is under no obligation to immediately 164 * reply the message. It may forward it elsewhere. 165 */ 166 static int 167 netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg) 168 { 169 netmsg_base_t nmsg = (void *)lmsg; 170 171 if ((lmsg->ms_flags & MSGF_SYNC) && port == &curthread->td_msgport) { 172 nmsg->nm_dispatch((netmsg_t)nmsg); 173 return(EASYNC); 174 } else { 175 return(netmsg_fwd_port_fn(port, lmsg)); 176 } 177 } 178 179 /* 180 * UNIX DOMAIN sockets still have to run their uipc functions synchronously, 181 * because they depend on the user proc context for a number of things 182 * (like creds) which we have not yet incorporated into the message structure. 183 * 184 * However, we maintain or message/port abstraction. Having a special 185 * synchronous port which runs the commands synchronously gives us the 186 * ability to serialize operations in one place later on when we start 187 * removing the BGL. 188 */ 189 static int 190 netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg) 191 { 192 netmsg_base_t nmsg = (void *)lmsg; 193 194 KKASSERT((lmsg->ms_flags & MSGF_DONE) == 0); 195 196 lmsg->ms_target_port = port; /* required for abort */ 197 nmsg->nm_dispatch((netmsg_t)nmsg); 198 return(EASYNC); 199 } 200 201 static void 202 netisr_init(void) 203 { 204 int i; 205 206 if (netisr_ncpus <= 0 || netisr_ncpus > ncpus) { 207 /* Default. */ 208 netisr_ncpus = ncpus; 209 } 210 if (netisr_ncpus > NETISR_CPUMAX) 211 netisr_ncpus = NETISR_CPUMAX; 212 213 TAILQ_INIT(&netreglist); 214 215 /* 216 * Create default per-cpu threads for generic protocol handling. 217 */ 218 for (i = 0; i < ncpus; ++i) { 219 struct netisr_data *nd; 220 221 nd = (void *)kmem_alloc3(&kernel_map, sizeof(*nd), 222 VM_SUBSYS_GD, KM_CPU(i)); 223 memset(nd, 0, sizeof(*nd)); 224 TAILQ_INIT(&nd->netrulist); 225 netisr_data[i] = nd; 226 227 lwkt_create(netmsg_service_loop, NULL, &netisr_threads[i], 228 &nd->thread, TDF_NOSTART|TDF_FORCE_SPINPORT|TDF_FIXEDCPU, 229 i, "netisr %d", i); 230 netmsg_service_port_init(&netisr_threads[i]->td_msgport); 231 lwkt_schedule(netisr_threads[i]); 232 } 233 234 /* 235 * The netisr_afree_rport is a special reply port which automatically 236 * frees the replied message. The netisr_adone_rport simply marks 237 * the message as being done. The netisr_apanic_rport panics if 238 * the message is replied to. 239 */ 240 lwkt_initport_replyonly(&netisr_afree_rport, netisr_autofree_reply); 241 lwkt_initport_replyonly(&netisr_afree_free_so_rport, 242 netisr_autofree_free_so_reply); 243 lwkt_initport_replyonly_null(&netisr_adone_rport); 244 lwkt_initport_panic(&netisr_apanic_rport); 245 246 /* 247 * The netisr_syncport is a special port which executes the message 248 * synchronously and waits for it if EASYNC is returned. 249 */ 250 lwkt_initport_putonly(&netisr_sync_port, netmsg_sync_putport); 251 } 252 SYSINIT(netisr, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, netisr_init, NULL); 253 254 /* 255 * Finish initializing the message port for a netmsg service. This also 256 * registers the port for synchronous cleanup operations such as when an 257 * ifnet is being destroyed. There is no deregistration API yet. 258 */ 259 static void 260 netmsg_service_port_init(lwkt_port_t port) 261 { 262 struct netmsg_port_registration *reg; 263 264 /* 265 * Override the putport function. Our custom function checks for 266 * self-references and executes such commands synchronously. 267 */ 268 if (netmsg_fwd_port_fn == NULL) 269 netmsg_fwd_port_fn = port->mp_putport; 270 KKASSERT(netmsg_fwd_port_fn == port->mp_putport); 271 port->mp_putport = netmsg_put_port; 272 273 /* 274 * Keep track of ports using the netmsg API so we can synchronize 275 * certain operations (such as freeing an ifnet structure) across all 276 * consumers. 277 */ 278 reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO); 279 reg->npr_port = port; 280 TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry); 281 } 282 283 /* 284 * This function synchronizes the caller with all netmsg services. For 285 * example, if an interface is being removed we must make sure that all 286 * packets related to that interface complete processing before the structure 287 * can actually be freed. This sort of synchronization is an alternative to 288 * ref-counting the netif, removing the ref counting overhead in favor of 289 * placing additional overhead in the netif freeing sequence (where it is 290 * inconsequential). 291 */ 292 void 293 netmsg_service_sync(void) 294 { 295 struct netmsg_port_registration *reg; 296 struct netmsg_base smsg; 297 298 netmsg_init(&smsg, NULL, &curthread->td_msgport, 0, netmsg_sync_handler); 299 300 TAILQ_FOREACH(reg, &netreglist, npr_entry) { 301 lwkt_domsg(reg->npr_port, &smsg.lmsg, 0); 302 } 303 } 304 305 /* 306 * The netmsg function simply replies the message. API semantics require 307 * EASYNC to be returned if the netmsg function disposes of the message. 308 */ 309 void 310 netmsg_sync_handler(netmsg_t msg) 311 { 312 lwkt_replymsg(&msg->lmsg, 0); 313 } 314 315 /* 316 * Generic netmsg service loop. Some protocols may roll their own but all 317 * must do the basic command dispatch function call done here. 318 */ 319 static void 320 netmsg_service_loop(void *arg) 321 { 322 netmsg_base_t msg; 323 thread_t td = curthread; 324 int limit; 325 struct netisr_data *nd = netisr_data[mycpuid]; 326 327 td->td_type = TD_TYPE_NETISR; 328 329 while ((msg = lwkt_waitport(&td->td_msgport, 0))) { 330 struct netisr_rollup *ru; 331 332 /* 333 * Run up to 512 pending netmsgs. 334 */ 335 limit = netisr_rollup_limit; 336 do { 337 KASSERT(msg->nm_dispatch != NULL, 338 ("netmsg_service isr %d badmsg", 339 msg->lmsg.u.ms_result)); 340 /* 341 * Don't match so_port, if the msg explicitly 342 * asks us to ignore its so_port. 343 */ 344 if ((msg->lmsg.ms_flags & MSGF_IGNSOPORT) == 0 && 345 msg->nm_so && 346 msg->nm_so->so_port != &td->td_msgport) { 347 /* 348 * Sockets undergoing connect or disconnect 349 * ops can change ports on us. Chase the 350 * port. 351 */ 352 #ifdef foo 353 /* 354 * This could be quite common for protocols 355 * which support asynchronous pru_connect, 356 * e.g. TCP, so kprintf socket port chasing 357 * could be too verbose for the console. 358 */ 359 kprintf("%s: Warning, port changed so=%p\n", 360 __func__, msg->nm_so); 361 #endif 362 lwkt_forwardmsg(msg->nm_so->so_port, 363 &msg->lmsg); 364 } else { 365 /* 366 * We are on the correct port, dispatch it. 367 */ 368 #ifdef INVARIANTS 369 nd->netlastfunc = msg->nm_dispatch; 370 #endif 371 msg->nm_dispatch((netmsg_t)msg); 372 } 373 if (--limit == 0) 374 break; 375 } while ((msg = lwkt_getport(&td->td_msgport)) != NULL); 376 377 /* 378 * Run all registered rollup functions for this cpu 379 * (e.g. tcp_willblock()). 380 */ 381 TAILQ_FOREACH(ru, &nd->netrulist, ru_entry) 382 ru->ru_func(); 383 } 384 } 385 386 /* 387 * Forward a packet to a netisr service function. 388 * 389 * If the packet has not been assigned to a protocol thread we call 390 * the port characterization function to assign it. The caller must 391 * clear M_HASH (or not have set it in the first place) if the caller 392 * wishes the packet to be recharacterized. 393 */ 394 int 395 netisr_queue(int num, struct mbuf *m) 396 { 397 struct netisr *ni; 398 struct netmsg_packet *pmsg; 399 lwkt_port_t port; 400 401 KASSERT((num > 0 && num <= NELEM(netisrs)), 402 ("Bad isr %d", num)); 403 404 ni = &netisrs[num]; 405 if (ni->ni_handler == NULL) { 406 kprintf("%s: Unregistered isr %d\n", __func__, num); 407 m_freem(m); 408 return (EIO); 409 } 410 411 /* 412 * Figure out which protocol thread to send to. This does not 413 * have to be perfect but performance will be really good if it 414 * is correct. Major protocol inputs such as ip_input() will 415 * re-characterize the packet as necessary. 416 */ 417 if ((m->m_flags & M_HASH) == 0) { 418 ni->ni_hashfn(&m, 0); 419 if (m == NULL) 420 return (EIO); 421 if ((m->m_flags & M_HASH) == 0) { 422 kprintf("%s(%d): packet hash failed\n", 423 __func__, num); 424 m_freem(m); 425 return (EIO); 426 } 427 } 428 429 /* 430 * Get the protocol port based on the packet hash, initialize 431 * the netmsg, and send it off. 432 */ 433 port = netisr_hashport(m->m_pkthdr.hash); 434 pmsg = &m->m_hdr.mh_netmsg; 435 netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport, 436 0, ni->ni_handler); 437 pmsg->nm_packet = m; 438 pmsg->base.lmsg.u.ms_result = num; 439 lwkt_sendmsg(port, &pmsg->base.lmsg); 440 441 return (0); 442 } 443 444 /* 445 * Run a netisr service function on the packet. 446 * 447 * The packet must have been correctly characterized! 448 */ 449 int 450 netisr_handle(int num, struct mbuf *m) 451 { 452 struct netisr *ni; 453 struct netmsg_packet *pmsg; 454 lwkt_port_t port; 455 456 /* 457 * Get the protocol port based on the packet hash 458 */ 459 KASSERT((m->m_flags & M_HASH), ("packet not characterized")); 460 port = netisr_hashport(m->m_pkthdr.hash); 461 KASSERT(&curthread->td_msgport == port, ("wrong msgport")); 462 463 KASSERT((num > 0 && num <= NELEM(netisrs)), ("bad isr %d", num)); 464 ni = &netisrs[num]; 465 if (ni->ni_handler == NULL) { 466 kprintf("%s: unregistered isr %d\n", __func__, num); 467 m_freem(m); 468 return EIO; 469 } 470 471 /* 472 * Initialize the netmsg, and run the handler directly. 473 */ 474 pmsg = &m->m_hdr.mh_netmsg; 475 netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport, 476 0, ni->ni_handler); 477 pmsg->nm_packet = m; 478 pmsg->base.lmsg.u.ms_result = num; 479 ni->ni_handler((netmsg_t)&pmsg->base); 480 481 return 0; 482 } 483 484 /* 485 * Pre-characterization of a deeper portion of the packet for the 486 * requested isr. 487 * 488 * The base of the ISR type (e.g. IP) that we want to characterize is 489 * at (hoff) relative to the beginning of the mbuf. This allows 490 * e.g. ether_characterize() to not have to adjust the m_data/m_len. 491 */ 492 void 493 netisr_characterize(int num, struct mbuf **mp, int hoff) 494 { 495 struct netisr *ni; 496 struct mbuf *m; 497 498 /* 499 * Validation 500 */ 501 m = *mp; 502 KKASSERT(m != NULL); 503 504 if (num < 0 || num >= NETISR_MAX) { 505 if (num == NETISR_MAX) { 506 m_sethash(m, 0); 507 return; 508 } 509 panic("Bad isr %d", num); 510 } 511 512 /* 513 * Valid netisr? 514 */ 515 ni = &netisrs[num]; 516 if (ni->ni_handler == NULL) { 517 kprintf("%s: Unregistered isr %d\n", __func__, num); 518 m_freem(m); 519 *mp = NULL; 520 } 521 522 /* 523 * Characterize the packet 524 */ 525 if ((m->m_flags & M_HASH) == 0) { 526 ni->ni_hashfn(mp, hoff); 527 m = *mp; 528 if (m && (m->m_flags & M_HASH) == 0) { 529 kprintf("%s(%d): packet hash failed\n", 530 __func__, num); 531 } 532 } 533 } 534 535 void 536 netisr_register(int num, netisr_fn_t handler, netisr_hashfn_t hashfn) 537 { 538 struct netisr *ni; 539 540 KASSERT((num > 0 && num <= NELEM(netisrs)), 541 ("netisr_register: bad isr %d", num)); 542 KKASSERT(handler != NULL); 543 544 if (hashfn == NULL) 545 hashfn = netisr_hashfn0; 546 547 ni = &netisrs[num]; 548 549 ni->ni_handler = handler; 550 ni->ni_hashck = netisr_nohashck; 551 ni->ni_hashfn = hashfn; 552 netmsg_init(&ni->ni_netmsg, NULL, &netisr_adone_rport, 0, NULL); 553 } 554 555 void 556 netisr_register_hashcheck(int num, netisr_hashck_t hashck) 557 { 558 struct netisr *ni; 559 560 KASSERT((num > 0 && num <= NELEM(netisrs)), 561 ("netisr_register: bad isr %d", num)); 562 563 ni = &netisrs[num]; 564 ni->ni_hashck = hashck; 565 } 566 567 static void 568 netisr_register_rollup_dispatch(netmsg_t nmsg) 569 { 570 struct netmsg_rollup *nm = (struct netmsg_rollup *)nmsg; 571 int cpuid = mycpuid; 572 struct netisr_data *nd = netisr_data[cpuid]; 573 struct netisr_rollup *new_ru, *ru; 574 575 new_ru = kmalloc(sizeof(*new_ru), M_TEMP, M_WAITOK|M_ZERO); 576 new_ru->ru_func = nm->func; 577 new_ru->ru_prio = nm->prio; 578 579 /* 580 * Higher priority "rollup" appears first 581 */ 582 TAILQ_FOREACH(ru, &nd->netrulist, ru_entry) { 583 if (ru->ru_prio < new_ru->ru_prio) { 584 TAILQ_INSERT_BEFORE(ru, new_ru, ru_entry); 585 goto done; 586 } 587 } 588 TAILQ_INSERT_TAIL(&nd->netrulist, new_ru, ru_entry); 589 done: 590 if (cpuid == 0) 591 nm->key = new_ru; 592 KKASSERT(nm->key != NULL); 593 new_ru->ru_key = nm->key; 594 595 netisr_forwardmsg_all(&nm->base, cpuid + 1); 596 } 597 598 struct netisr_rollup * 599 netisr_register_rollup(netisr_ru_t func, int prio) 600 { 601 struct netmsg_rollup nm; 602 603 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 604 netisr_register_rollup_dispatch); 605 nm.func = func; 606 nm.prio = prio; 607 nm.key = NULL; 608 netisr_domsg_global(&nm.base); 609 610 KKASSERT(nm.key != NULL); 611 return (nm.key); 612 } 613 614 static void 615 netisr_unregister_rollup_dispatch(netmsg_t nmsg) 616 { 617 struct netmsg_rollup *nm = (struct netmsg_rollup *)nmsg; 618 int cpuid = mycpuid; 619 struct netisr_data *nd = netisr_data[cpuid]; 620 struct netisr_rollup *ru; 621 622 TAILQ_FOREACH(ru, &nd->netrulist, ru_entry) { 623 if (ru->ru_key == nm->key) 624 break; 625 } 626 if (ru == NULL) 627 panic("netisr: no rullup for %p", nm->key); 628 629 TAILQ_REMOVE(&nd->netrulist, ru, ru_entry); 630 kfree(ru, M_TEMP); 631 632 netisr_forwardmsg_all(&nm->base, cpuid + 1); 633 } 634 635 void 636 netisr_unregister_rollup(struct netisr_rollup *key) 637 { 638 struct netmsg_rollup nm; 639 640 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 641 netisr_unregister_rollup_dispatch); 642 nm.key = key; 643 netisr_domsg_global(&nm.base); 644 } 645 646 /* 647 * Return a default protocol control message processing thread port 648 */ 649 lwkt_port_t 650 cpu0_ctlport(int cmd __unused, struct sockaddr *sa __unused, 651 void *extra __unused, int *cpuid) 652 { 653 *cpuid = 0; 654 return netisr_cpuport(*cpuid); 655 } 656 657 /* 658 * This is a default netisr packet characterization function which 659 * sets M_HASH. If a netisr is registered with a NULL hashfn function 660 * this one is assigned. 661 * 662 * This function makes no attempt to validate the packet. 663 */ 664 static void 665 netisr_hashfn0(struct mbuf **mp, int hoff __unused) 666 { 667 668 m_sethash(*mp, 0); 669 } 670 671 /* 672 * schednetisr() is used to call the netisr handler from the appropriate 673 * netisr thread for polling and other purposes. 674 * 675 * This function may be called from a hard interrupt or IPI and must be 676 * MP SAFE and non-blocking. We use a fixed per-cpu message instead of 677 * trying to allocate one. We must get ourselves onto the target cpu 678 * to safely check the MSGF_DONE bit on the message but since the message 679 * will be sent to that cpu anyway this does not add any extra work beyond 680 * what lwkt_sendmsg() would have already had to do to schedule the target 681 * thread. 682 */ 683 static void 684 schednetisr_remote(void *data) 685 { 686 int num = (int)(intptr_t)data; 687 struct netisr *ni = &netisrs[num]; 688 lwkt_port_t port = &netisr_threads[0]->td_msgport; 689 netmsg_base_t pmsg; 690 691 pmsg = &netisrs[num].ni_netmsg; 692 if (pmsg->lmsg.ms_flags & MSGF_DONE) { 693 netmsg_init(pmsg, NULL, &netisr_adone_rport, 0, ni->ni_handler); 694 pmsg->lmsg.u.ms_result = num; 695 lwkt_sendmsg(port, &pmsg->lmsg); 696 } 697 } 698 699 void 700 schednetisr(int num) 701 { 702 KASSERT((num > 0 && num <= NELEM(netisrs)), 703 ("schednetisr: bad isr %d", num)); 704 KKASSERT(netisrs[num].ni_handler != NULL); 705 if (mycpu->gd_cpuid != 0) { 706 lwkt_send_ipiq(globaldata_find(0), 707 schednetisr_remote, (void *)(intptr_t)num); 708 } else { 709 crit_enter(); 710 schednetisr_remote((void *)(intptr_t)num); 711 crit_exit(); 712 } 713 } 714 715 static void 716 netisr_barrier_dispatch(netmsg_t nmsg) 717 { 718 struct netmsg_barrier *msg = (struct netmsg_barrier *)nmsg; 719 720 ATOMIC_CPUMASK_NANDBIT(*msg->br_cpumask, mycpu->gd_cpuid); 721 if (CPUMASK_TESTZERO(*msg->br_cpumask)) 722 wakeup(msg->br_cpumask); 723 724 for (;;) { 725 uint32_t done = msg->br_done; 726 727 cpu_ccfence(); 728 if ((done & NETISR_BR_NOTDONE) == 0) 729 break; 730 731 tsleep_interlock(&msg->br_done, 0); 732 if (atomic_cmpset_int(&msg->br_done, 733 done, done | NETISR_BR_WAITDONE)) 734 tsleep(&msg->br_done, PINTERLOCKED, "nbrdsp", 0); 735 } 736 737 lwkt_replymsg(&nmsg->lmsg, 0); 738 } 739 740 struct netisr_barrier * 741 netisr_barrier_create(void) 742 { 743 struct netisr_barrier *br; 744 745 br = kmalloc(sizeof(*br), M_LWKTMSG, M_WAITOK | M_ZERO); 746 return br; 747 } 748 749 void 750 netisr_barrier_set(struct netisr_barrier *br) 751 { 752 volatile cpumask_t other_cpumask; 753 int i, cur_cpuid; 754 755 ASSERT_NETISR0; 756 KKASSERT(!br->br_isset); 757 758 other_cpumask = mycpu->gd_other_cpus; 759 CPUMASK_ANDMASK(other_cpumask, smp_active_mask); 760 cur_cpuid = mycpuid; 761 762 for (i = 0; i < ncpus; ++i) { 763 struct netmsg_barrier *msg; 764 765 if (i == cur_cpuid) 766 continue; 767 768 msg = kmalloc(sizeof(struct netmsg_barrier), 769 M_LWKTMSG, M_WAITOK); 770 771 /* 772 * Don't use priority message here; mainly to keep 773 * it ordered w/ the previous data packets sent by 774 * the caller. 775 */ 776 netmsg_init(&msg->base, NULL, &netisr_afree_rport, 0, 777 netisr_barrier_dispatch); 778 msg->br_cpumask = &other_cpumask; 779 msg->br_done = NETISR_BR_NOTDONE; 780 781 KKASSERT(br->br_msgs[i] == NULL); 782 br->br_msgs[i] = msg; 783 } 784 785 for (i = 0; i < ncpus; ++i) { 786 if (i == cur_cpuid) 787 continue; 788 lwkt_sendmsg(netisr_cpuport(i), &br->br_msgs[i]->base.lmsg); 789 } 790 791 while (CPUMASK_TESTNZERO(other_cpumask)) { 792 tsleep_interlock(&other_cpumask, 0); 793 if (CPUMASK_TESTNZERO(other_cpumask)) 794 tsleep(&other_cpumask, PINTERLOCKED, "nbrset", 0); 795 } 796 br->br_isset = 1; 797 } 798 799 void 800 netisr_barrier_rem(struct netisr_barrier *br) 801 { 802 int i, cur_cpuid; 803 804 ASSERT_NETISR0; 805 KKASSERT(br->br_isset); 806 807 cur_cpuid = mycpuid; 808 for (i = 0; i < ncpus; ++i) { 809 struct netmsg_barrier *msg = br->br_msgs[i]; 810 uint32_t done; 811 812 msg = br->br_msgs[i]; 813 br->br_msgs[i] = NULL; 814 815 if (i == cur_cpuid) 816 continue; 817 818 done = atomic_swap_int(&msg->br_done, 0); 819 if (done & NETISR_BR_WAITDONE) 820 wakeup(&msg->br_done); 821 } 822 br->br_isset = 0; 823 } 824 825 static void 826 netisr_nohashck(struct mbuf *m, const struct pktinfo *pi __unused) 827 { 828 m->m_flags &= ~M_HASH; 829 } 830 831 void 832 netisr_hashcheck(int num, struct mbuf *m, const struct pktinfo *pi) 833 { 834 struct netisr *ni; 835 836 if (num < 0 || num >= NETISR_MAX) 837 panic("Bad isr %d", num); 838 839 /* 840 * Valid netisr? 841 */ 842 ni = &netisrs[num]; 843 if (ni->ni_handler == NULL) 844 panic("Unregistered isr %d", num); 845 846 ni->ni_hashck(m, pi); 847 } 848