1 /* 2 * Copyright (c) 2003, 2004 Matthew Dillon. All rights reserved. 3 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 4 * Copyright (c) 2003 Jonathan Lemon. All rights reserved. 5 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Jonathan Lemon, Jeffrey M. Hsu, and Matthew Dillon. 9 * 10 * Jonathan Lemon gave Jeffrey Hsu permission to combine his copyright 11 * into this one around July 8 2004. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of The DragonFly Project nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific, prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 35 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/msgport.h> 44 #include <sys/proc.h> 45 #include <sys/interrupt.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/socketvar.h> 49 #include <net/if.h> 50 #include <net/if_var.h> 51 #include <net/netisr2.h> 52 #include <machine/cpufunc.h> 53 #include <machine/smp.h> 54 55 #include <sys/thread2.h> 56 #include <sys/msgport2.h> 57 #include <net/netmsg2.h> 58 59 #include <vm/vm_extern.h> 60 61 static void netmsg_service_port_init(lwkt_port_t); 62 static void netmsg_service_loop(void *arg); 63 static void netisr_hashfn0(struct mbuf **mp, int hoff); 64 static void netisr_nohashck(struct mbuf *, const struct pktinfo *); 65 66 struct netmsg_port_registration { 67 TAILQ_ENTRY(netmsg_port_registration) npr_entry; 68 lwkt_port_t npr_port; 69 }; 70 71 struct netisr_rollup { 72 TAILQ_ENTRY(netisr_rollup) ru_entry; 73 netisr_ru_t ru_func; 74 int ru_prio; 75 void *ru_key; 76 }; 77 78 struct netmsg_rollup { 79 struct netmsg_base base; 80 netisr_ru_t func; 81 int prio; 82 void *key; 83 }; 84 85 struct netmsg_barrier { 86 struct netmsg_base base; 87 volatile cpumask_t *br_cpumask; 88 volatile uint32_t br_done; 89 }; 90 91 #define NETISR_BR_NOTDONE 0x1 92 #define NETISR_BR_WAITDONE 0x80000000 93 94 struct netisr_barrier { 95 struct netmsg_barrier *br_msgs[MAXCPU]; 96 int br_isset; 97 }; 98 99 struct netisr_data { 100 struct thread thread; 101 #ifdef INVARIANTS 102 void *netlastfunc; 103 #endif 104 TAILQ_HEAD(, netisr_rollup) netrulist; 105 }; 106 107 static struct netisr_data *netisr_data[MAXCPU]; 108 109 static struct netisr netisrs[NETISR_MAX]; 110 static TAILQ_HEAD(,netmsg_port_registration) netreglist; 111 112 /* Per-CPU thread to handle any protocol. */ 113 struct thread *netisr_threads[MAXCPU]; 114 115 lwkt_port netisr_afree_rport; 116 lwkt_port netisr_afree_free_so_rport; 117 lwkt_port netisr_adone_rport; 118 lwkt_port netisr_apanic_rport; 119 lwkt_port netisr_sync_port; 120 121 static int (*netmsg_fwd_port_fn)(lwkt_port_t, lwkt_msg_t); 122 123 SYSCTL_NODE(_net, OID_AUTO, netisr, CTLFLAG_RW, 0, "netisr"); 124 125 __read_frequently static int netisr_rollup_limit = 32; 126 SYSCTL_INT(_net_netisr, OID_AUTO, rollup_limit, CTLFLAG_RW, 127 &netisr_rollup_limit, 0, "Message to process before rollup"); 128 129 __read_frequently int netisr_ncpus; 130 TUNABLE_INT("net.netisr.ncpus", &netisr_ncpus); 131 SYSCTL_INT(_net_netisr, OID_AUTO, ncpus, CTLFLAG_RD, 132 &netisr_ncpus, 0, "# of CPUs to handle network messages"); 133 134 /* 135 * netisr_afree_rport replymsg function, only used to handle async 136 * messages which the sender has abandoned to their fate. 137 */ 138 static void 139 netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) 140 { 141 kfree(msg, M_LWKTMSG); 142 } 143 144 static void 145 netisr_autofree_free_so_reply(lwkt_port_t port, lwkt_msg_t msg) 146 { 147 sofree(((netmsg_t)msg)->base.nm_so); 148 kfree(msg, M_LWKTMSG); 149 } 150 151 /* 152 * We need a custom putport function to handle the case where the 153 * message target is the current thread's message port. This case 154 * can occur when the TCP or UDP stack does a direct callback to NFS and NFS 155 * then turns around and executes a network operation synchronously. 156 * 157 * To prevent deadlocking, we must execute these self-referential messages 158 * synchronously, effectively turning the message into a glorified direct 159 * procedure call back into the protocol stack. The operation must be 160 * complete on return or we will deadlock, so panic if it isn't. 161 * 162 * However, the target function is under no obligation to immediately 163 * reply the message. It may forward it elsewhere. 164 */ 165 static int 166 netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg) 167 { 168 netmsg_base_t nmsg = (void *)lmsg; 169 170 if ((lmsg->ms_flags & MSGF_SYNC) && port == &curthread->td_msgport) { 171 nmsg->nm_dispatch((netmsg_t)nmsg); 172 return(EASYNC); 173 } else { 174 return(netmsg_fwd_port_fn(port, lmsg)); 175 } 176 } 177 178 /* 179 * UNIX DOMAIN sockets still have to run their uipc functions synchronously, 180 * because they depend on the user proc context for a number of things 181 * (like creds) which we have not yet incorporated into the message structure. 182 * 183 * However, we maintain or message/port abstraction. Having a special 184 * synchronous port which runs the commands synchronously gives us the 185 * ability to serialize operations in one place later on when we start 186 * removing the BGL. 187 */ 188 static int 189 netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg) 190 { 191 netmsg_base_t nmsg = (void *)lmsg; 192 193 KKASSERT((lmsg->ms_flags & MSGF_DONE) == 0); 194 195 lmsg->ms_target_port = port; /* required for abort */ 196 nmsg->nm_dispatch((netmsg_t)nmsg); 197 return(EASYNC); 198 } 199 200 static void 201 netisr_init(void) 202 { 203 int i; 204 205 if (netisr_ncpus <= 0 || netisr_ncpus > ncpus) { 206 /* Default. */ 207 netisr_ncpus = ncpus; 208 } 209 if (netisr_ncpus > NETISR_CPUMAX) 210 netisr_ncpus = NETISR_CPUMAX; 211 212 TAILQ_INIT(&netreglist); 213 214 /* 215 * Create default per-cpu threads for generic protocol handling. 216 */ 217 for (i = 0; i < ncpus; ++i) { 218 struct netisr_data *nd; 219 220 nd = (void *)kmem_alloc3(kernel_map, sizeof(*nd), 221 VM_SUBSYS_GD, KM_CPU(i)); 222 memset(nd, 0, sizeof(*nd)); 223 TAILQ_INIT(&nd->netrulist); 224 netisr_data[i] = nd; 225 226 lwkt_create(netmsg_service_loop, NULL, &netisr_threads[i], 227 &nd->thread, TDF_NOSTART|TDF_FORCE_SPINPORT|TDF_FIXEDCPU, 228 i, "netisr %d", i); 229 netmsg_service_port_init(&netisr_threads[i]->td_msgport); 230 lwkt_schedule(netisr_threads[i]); 231 } 232 233 /* 234 * The netisr_afree_rport is a special reply port which automatically 235 * frees the replied message. The netisr_adone_rport simply marks 236 * the message as being done. The netisr_apanic_rport panics if 237 * the message is replied to. 238 */ 239 lwkt_initport_replyonly(&netisr_afree_rport, netisr_autofree_reply); 240 lwkt_initport_replyonly(&netisr_afree_free_so_rport, 241 netisr_autofree_free_so_reply); 242 lwkt_initport_replyonly_null(&netisr_adone_rport); 243 lwkt_initport_panic(&netisr_apanic_rport); 244 245 /* 246 * The netisr_syncport is a special port which executes the message 247 * synchronously and waits for it if EASYNC is returned. 248 */ 249 lwkt_initport_putonly(&netisr_sync_port, netmsg_sync_putport); 250 } 251 SYSINIT(netisr, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, netisr_init, NULL); 252 253 /* 254 * Finish initializing the message port for a netmsg service. This also 255 * registers the port for synchronous cleanup operations such as when an 256 * ifnet is being destroyed. There is no deregistration API yet. 257 */ 258 static void 259 netmsg_service_port_init(lwkt_port_t port) 260 { 261 struct netmsg_port_registration *reg; 262 263 /* 264 * Override the putport function. Our custom function checks for 265 * self-references and executes such commands synchronously. 266 */ 267 if (netmsg_fwd_port_fn == NULL) 268 netmsg_fwd_port_fn = port->mp_putport; 269 KKASSERT(netmsg_fwd_port_fn == port->mp_putport); 270 port->mp_putport = netmsg_put_port; 271 272 /* 273 * Keep track of ports using the netmsg API so we can synchronize 274 * certain operations (such as freeing an ifnet structure) across all 275 * consumers. 276 */ 277 reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO); 278 reg->npr_port = port; 279 TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry); 280 } 281 282 /* 283 * This function synchronizes the caller with all netmsg services. For 284 * example, if an interface is being removed we must make sure that all 285 * packets related to that interface complete processing before the structure 286 * can actually be freed. This sort of synchronization is an alternative to 287 * ref-counting the netif, removing the ref counting overhead in favor of 288 * placing additional overhead in the netif freeing sequence (where it is 289 * inconsequential). 290 */ 291 void 292 netmsg_service_sync(void) 293 { 294 struct netmsg_port_registration *reg; 295 struct netmsg_base smsg; 296 297 netmsg_init(&smsg, NULL, &curthread->td_msgport, 0, netmsg_sync_handler); 298 299 TAILQ_FOREACH(reg, &netreglist, npr_entry) { 300 lwkt_domsg(reg->npr_port, &smsg.lmsg, 0); 301 } 302 } 303 304 /* 305 * The netmsg function simply replies the message. API semantics require 306 * EASYNC to be returned if the netmsg function disposes of the message. 307 */ 308 void 309 netmsg_sync_handler(netmsg_t msg) 310 { 311 lwkt_replymsg(&msg->lmsg, 0); 312 } 313 314 /* 315 * Generic netmsg service loop. Some protocols may roll their own but all 316 * must do the basic command dispatch function call done here. 317 */ 318 static void 319 netmsg_service_loop(void *arg) 320 { 321 netmsg_base_t msg; 322 thread_t td = curthread; 323 int limit; 324 struct netisr_data *nd = netisr_data[mycpuid]; 325 326 td->td_type = TD_TYPE_NETISR; 327 328 while ((msg = lwkt_waitport(&td->td_msgport, 0))) { 329 struct netisr_rollup *ru; 330 331 /* 332 * Run up to 512 pending netmsgs. 333 */ 334 limit = netisr_rollup_limit; 335 do { 336 KASSERT(msg->nm_dispatch != NULL, 337 ("netmsg_service isr %d badmsg", 338 msg->lmsg.u.ms_result)); 339 /* 340 * Don't match so_port, if the msg explicitly 341 * asks us to ignore its so_port. 342 */ 343 if ((msg->lmsg.ms_flags & MSGF_IGNSOPORT) == 0 && 344 msg->nm_so && 345 msg->nm_so->so_port != &td->td_msgport) { 346 /* 347 * Sockets undergoing connect or disconnect 348 * ops can change ports on us. Chase the 349 * port. 350 */ 351 #ifdef foo 352 /* 353 * This could be quite common for protocols 354 * which support asynchronous pru_connect, 355 * e.g. TCP, so kprintf socket port chasing 356 * could be too verbose for the console. 357 */ 358 kprintf("%s: Warning, port changed so=%p\n", 359 __func__, msg->nm_so); 360 #endif 361 lwkt_forwardmsg(msg->nm_so->so_port, 362 &msg->lmsg); 363 } else { 364 /* 365 * We are on the correct port, dispatch it. 366 */ 367 #ifdef INVARIANTS 368 nd->netlastfunc = msg->nm_dispatch; 369 #endif 370 msg->nm_dispatch((netmsg_t)msg); 371 } 372 if (--limit == 0) 373 break; 374 } while ((msg = lwkt_getport(&td->td_msgport)) != NULL); 375 376 /* 377 * Run all registered rollup functions for this cpu 378 * (e.g. tcp_willblock()). 379 */ 380 TAILQ_FOREACH(ru, &nd->netrulist, ru_entry) 381 ru->ru_func(); 382 } 383 } 384 385 /* 386 * Forward a packet to a netisr service function. 387 * 388 * If the packet has not been assigned to a protocol thread we call 389 * the port characterization function to assign it. The caller must 390 * clear M_HASH (or not have set it in the first place) if the caller 391 * wishes the packet to be recharacterized. 392 */ 393 int 394 netisr_queue(int num, struct mbuf *m) 395 { 396 struct netisr *ni; 397 struct netmsg_packet *pmsg; 398 lwkt_port_t port; 399 400 KASSERT((num > 0 && num <= NELEM(netisrs)), 401 ("Bad isr %d", num)); 402 403 ni = &netisrs[num]; 404 if (ni->ni_handler == NULL) { 405 kprintf("%s: Unregistered isr %d\n", __func__, num); 406 m_freem(m); 407 return (EIO); 408 } 409 410 /* 411 * Figure out which protocol thread to send to. This does not 412 * have to be perfect but performance will be really good if it 413 * is correct. Major protocol inputs such as ip_input() will 414 * re-characterize the packet as necessary. 415 */ 416 if ((m->m_flags & M_HASH) == 0) { 417 ni->ni_hashfn(&m, 0); 418 if (m == NULL) 419 return (EIO); 420 if ((m->m_flags & M_HASH) == 0) { 421 kprintf("%s(%d): packet hash failed\n", 422 __func__, num); 423 m_freem(m); 424 return (EIO); 425 } 426 } 427 428 /* 429 * Get the protocol port based on the packet hash, initialize 430 * the netmsg, and send it off. 431 */ 432 port = netisr_hashport(m->m_pkthdr.hash); 433 pmsg = &m->m_hdr.mh_netmsg; 434 netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport, 435 0, ni->ni_handler); 436 pmsg->nm_packet = m; 437 pmsg->base.lmsg.u.ms_result = num; 438 lwkt_sendmsg(port, &pmsg->base.lmsg); 439 440 return (0); 441 } 442 443 /* 444 * Run a netisr service function on the packet. 445 * 446 * The packet must have been correctly characterized! 447 */ 448 int 449 netisr_handle(int num, struct mbuf *m) 450 { 451 struct netisr *ni; 452 struct netmsg_packet *pmsg; 453 lwkt_port_t port; 454 455 /* 456 * Get the protocol port based on the packet hash 457 */ 458 KASSERT((m->m_flags & M_HASH), ("packet not characterized")); 459 port = netisr_hashport(m->m_pkthdr.hash); 460 KASSERT(&curthread->td_msgport == port, ("wrong msgport")); 461 462 KASSERT((num > 0 && num <= NELEM(netisrs)), ("bad isr %d", num)); 463 ni = &netisrs[num]; 464 if (ni->ni_handler == NULL) { 465 kprintf("%s: unregistered isr %d\n", __func__, num); 466 m_freem(m); 467 return EIO; 468 } 469 470 /* 471 * Initialize the netmsg, and run the handler directly. 472 */ 473 pmsg = &m->m_hdr.mh_netmsg; 474 netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport, 475 0, ni->ni_handler); 476 pmsg->nm_packet = m; 477 pmsg->base.lmsg.u.ms_result = num; 478 ni->ni_handler((netmsg_t)&pmsg->base); 479 480 return 0; 481 } 482 483 /* 484 * Pre-characterization of a deeper portion of the packet for the 485 * requested isr. 486 * 487 * The base of the ISR type (e.g. IP) that we want to characterize is 488 * at (hoff) relative to the beginning of the mbuf. This allows 489 * e.g. ether_characterize() to not have to adjust the m_data/m_len. 490 */ 491 void 492 netisr_characterize(int num, struct mbuf **mp, int hoff) 493 { 494 struct netisr *ni; 495 struct mbuf *m; 496 497 /* 498 * Validation 499 */ 500 m = *mp; 501 KKASSERT(m != NULL); 502 503 if (num < 0 || num >= NETISR_MAX) { 504 if (num == NETISR_MAX) { 505 m_sethash(m, 0); 506 return; 507 } 508 panic("Bad isr %d", num); 509 } 510 511 /* 512 * Valid netisr? 513 */ 514 ni = &netisrs[num]; 515 if (ni->ni_handler == NULL) { 516 kprintf("%s: Unregistered isr %d\n", __func__, num); 517 m_freem(m); 518 *mp = NULL; 519 } 520 521 /* 522 * Characterize the packet 523 */ 524 if ((m->m_flags & M_HASH) == 0) { 525 ni->ni_hashfn(mp, hoff); 526 m = *mp; 527 if (m && (m->m_flags & M_HASH) == 0) { 528 kprintf("%s(%d): packet hash failed\n", 529 __func__, num); 530 } 531 } 532 } 533 534 void 535 netisr_register(int num, netisr_fn_t handler, netisr_hashfn_t hashfn) 536 { 537 struct netisr *ni; 538 539 KASSERT((num > 0 && num <= NELEM(netisrs)), 540 ("netisr_register: bad isr %d", num)); 541 KKASSERT(handler != NULL); 542 543 if (hashfn == NULL) 544 hashfn = netisr_hashfn0; 545 546 ni = &netisrs[num]; 547 548 ni->ni_handler = handler; 549 ni->ni_hashck = netisr_nohashck; 550 ni->ni_hashfn = hashfn; 551 netmsg_init(&ni->ni_netmsg, NULL, &netisr_adone_rport, 0, NULL); 552 } 553 554 void 555 netisr_register_hashcheck(int num, netisr_hashck_t hashck) 556 { 557 struct netisr *ni; 558 559 KASSERT((num > 0 && num <= NELEM(netisrs)), 560 ("netisr_register: bad isr %d", num)); 561 562 ni = &netisrs[num]; 563 ni->ni_hashck = hashck; 564 } 565 566 static void 567 netisr_register_rollup_dispatch(netmsg_t nmsg) 568 { 569 struct netmsg_rollup *nm = (struct netmsg_rollup *)nmsg; 570 int cpuid = mycpuid; 571 struct netisr_data *nd = netisr_data[cpuid]; 572 struct netisr_rollup *new_ru, *ru; 573 574 new_ru = kmalloc(sizeof(*new_ru), M_TEMP, M_WAITOK|M_ZERO); 575 new_ru->ru_func = nm->func; 576 new_ru->ru_prio = nm->prio; 577 578 /* 579 * Higher priority "rollup" appears first 580 */ 581 TAILQ_FOREACH(ru, &nd->netrulist, ru_entry) { 582 if (ru->ru_prio < new_ru->ru_prio) { 583 TAILQ_INSERT_BEFORE(ru, new_ru, ru_entry); 584 goto done; 585 } 586 } 587 TAILQ_INSERT_TAIL(&nd->netrulist, new_ru, ru_entry); 588 done: 589 if (cpuid == 0) 590 nm->key = new_ru; 591 KKASSERT(nm->key != NULL); 592 new_ru->ru_key = nm->key; 593 594 netisr_forwardmsg_all(&nm->base, cpuid + 1); 595 } 596 597 struct netisr_rollup * 598 netisr_register_rollup(netisr_ru_t func, int prio) 599 { 600 struct netmsg_rollup nm; 601 602 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 603 netisr_register_rollup_dispatch); 604 nm.func = func; 605 nm.prio = prio; 606 nm.key = NULL; 607 netisr_domsg_global(&nm.base); 608 609 KKASSERT(nm.key != NULL); 610 return (nm.key); 611 } 612 613 static void 614 netisr_unregister_rollup_dispatch(netmsg_t nmsg) 615 { 616 struct netmsg_rollup *nm = (struct netmsg_rollup *)nmsg; 617 int cpuid = mycpuid; 618 struct netisr_data *nd = netisr_data[cpuid]; 619 struct netisr_rollup *ru; 620 621 TAILQ_FOREACH(ru, &nd->netrulist, ru_entry) { 622 if (ru->ru_key == nm->key) 623 break; 624 } 625 if (ru == NULL) 626 panic("netisr: no rullup for %p", nm->key); 627 628 TAILQ_REMOVE(&nd->netrulist, ru, ru_entry); 629 kfree(ru, M_TEMP); 630 631 netisr_forwardmsg_all(&nm->base, cpuid + 1); 632 } 633 634 void 635 netisr_unregister_rollup(struct netisr_rollup *key) 636 { 637 struct netmsg_rollup nm; 638 639 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 640 netisr_unregister_rollup_dispatch); 641 nm.key = key; 642 netisr_domsg_global(&nm.base); 643 } 644 645 /* 646 * Return a default protocol control message processing thread port 647 */ 648 lwkt_port_t 649 cpu0_ctlport(int cmd __unused, struct sockaddr *sa __unused, 650 void *extra __unused, int *cpuid) 651 { 652 *cpuid = 0; 653 return netisr_cpuport(*cpuid); 654 } 655 656 /* 657 * This is a default netisr packet characterization function which 658 * sets M_HASH. If a netisr is registered with a NULL hashfn function 659 * this one is assigned. 660 * 661 * This function makes no attempt to validate the packet. 662 */ 663 static void 664 netisr_hashfn0(struct mbuf **mp, int hoff __unused) 665 { 666 667 m_sethash(*mp, 0); 668 } 669 670 /* 671 * schednetisr() is used to call the netisr handler from the appropriate 672 * netisr thread for polling and other purposes. 673 * 674 * This function may be called from a hard interrupt or IPI and must be 675 * MP SAFE and non-blocking. We use a fixed per-cpu message instead of 676 * trying to allocate one. We must get ourselves onto the target cpu 677 * to safely check the MSGF_DONE bit on the message but since the message 678 * will be sent to that cpu anyway this does not add any extra work beyond 679 * what lwkt_sendmsg() would have already had to do to schedule the target 680 * thread. 681 */ 682 static void 683 schednetisr_remote(void *data) 684 { 685 int num = (int)(intptr_t)data; 686 struct netisr *ni = &netisrs[num]; 687 lwkt_port_t port = &netisr_threads[0]->td_msgport; 688 netmsg_base_t pmsg; 689 690 pmsg = &netisrs[num].ni_netmsg; 691 if (pmsg->lmsg.ms_flags & MSGF_DONE) { 692 netmsg_init(pmsg, NULL, &netisr_adone_rport, 0, ni->ni_handler); 693 pmsg->lmsg.u.ms_result = num; 694 lwkt_sendmsg(port, &pmsg->lmsg); 695 } 696 } 697 698 void 699 schednetisr(int num) 700 { 701 KASSERT((num > 0 && num <= NELEM(netisrs)), 702 ("schednetisr: bad isr %d", num)); 703 KKASSERT(netisrs[num].ni_handler != NULL); 704 if (mycpu->gd_cpuid != 0) { 705 lwkt_send_ipiq(globaldata_find(0), 706 schednetisr_remote, (void *)(intptr_t)num); 707 } else { 708 crit_enter(); 709 schednetisr_remote((void *)(intptr_t)num); 710 crit_exit(); 711 } 712 } 713 714 static void 715 netisr_barrier_dispatch(netmsg_t nmsg) 716 { 717 struct netmsg_barrier *msg = (struct netmsg_barrier *)nmsg; 718 719 ATOMIC_CPUMASK_NANDBIT(*msg->br_cpumask, mycpu->gd_cpuid); 720 if (CPUMASK_TESTZERO(*msg->br_cpumask)) 721 wakeup(msg->br_cpumask); 722 723 for (;;) { 724 uint32_t done = msg->br_done; 725 726 cpu_ccfence(); 727 if ((done & NETISR_BR_NOTDONE) == 0) 728 break; 729 730 tsleep_interlock(&msg->br_done, 0); 731 if (atomic_cmpset_int(&msg->br_done, 732 done, done | NETISR_BR_WAITDONE)) 733 tsleep(&msg->br_done, PINTERLOCKED, "nbrdsp", 0); 734 } 735 736 lwkt_replymsg(&nmsg->lmsg, 0); 737 } 738 739 struct netisr_barrier * 740 netisr_barrier_create(void) 741 { 742 struct netisr_barrier *br; 743 744 br = kmalloc(sizeof(*br), M_LWKTMSG, M_WAITOK | M_ZERO); 745 return br; 746 } 747 748 void 749 netisr_barrier_set(struct netisr_barrier *br) 750 { 751 volatile cpumask_t other_cpumask; 752 int i, cur_cpuid; 753 754 ASSERT_NETISR0; 755 KKASSERT(!br->br_isset); 756 757 other_cpumask = mycpu->gd_other_cpus; 758 CPUMASK_ANDMASK(other_cpumask, smp_active_mask); 759 cur_cpuid = mycpuid; 760 761 for (i = 0; i < ncpus; ++i) { 762 struct netmsg_barrier *msg; 763 764 if (i == cur_cpuid) 765 continue; 766 767 msg = kmalloc(sizeof(struct netmsg_barrier), 768 M_LWKTMSG, M_WAITOK); 769 770 /* 771 * Don't use priority message here; mainly to keep 772 * it ordered w/ the previous data packets sent by 773 * the caller. 774 */ 775 netmsg_init(&msg->base, NULL, &netisr_afree_rport, 0, 776 netisr_barrier_dispatch); 777 msg->br_cpumask = &other_cpumask; 778 msg->br_done = NETISR_BR_NOTDONE; 779 780 KKASSERT(br->br_msgs[i] == NULL); 781 br->br_msgs[i] = msg; 782 } 783 784 for (i = 0; i < ncpus; ++i) { 785 if (i == cur_cpuid) 786 continue; 787 lwkt_sendmsg(netisr_cpuport(i), &br->br_msgs[i]->base.lmsg); 788 } 789 790 while (CPUMASK_TESTNZERO(other_cpumask)) { 791 tsleep_interlock(&other_cpumask, 0); 792 if (CPUMASK_TESTNZERO(other_cpumask)) 793 tsleep(&other_cpumask, PINTERLOCKED, "nbrset", 0); 794 } 795 br->br_isset = 1; 796 } 797 798 void 799 netisr_barrier_rem(struct netisr_barrier *br) 800 { 801 int i, cur_cpuid; 802 803 ASSERT_NETISR0; 804 KKASSERT(br->br_isset); 805 806 cur_cpuid = mycpuid; 807 for (i = 0; i < ncpus; ++i) { 808 struct netmsg_barrier *msg = br->br_msgs[i]; 809 uint32_t done; 810 811 msg = br->br_msgs[i]; 812 br->br_msgs[i] = NULL; 813 814 if (i == cur_cpuid) 815 continue; 816 817 done = atomic_swap_int(&msg->br_done, 0); 818 if (done & NETISR_BR_WAITDONE) 819 wakeup(&msg->br_done); 820 } 821 br->br_isset = 0; 822 } 823 824 static void 825 netisr_nohashck(struct mbuf *m, const struct pktinfo *pi __unused) 826 { 827 m->m_flags &= ~M_HASH; 828 } 829 830 void 831 netisr_hashcheck(int num, struct mbuf *m, const struct pktinfo *pi) 832 { 833 struct netisr *ni; 834 835 if (num < 0 || num >= NETISR_MAX) 836 panic("Bad isr %d", num); 837 838 /* 839 * Valid netisr? 840 */ 841 ni = &netisrs[num]; 842 if (ni->ni_handler == NULL) 843 panic("Unregistered isr %d", num); 844 845 ni->ni_hashck(m, pi); 846 } 847