1 /* 2 * Copyright (c) 2003, 2004 Matthew Dillon. All rights reserved. 3 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 4 * Copyright (c) 2003 Jonathan Lemon. All rights reserved. 5 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Jonathan Lemon, Jeffrey M. Hsu, and Matthew Dillon. 9 * 10 * Jonathan Lemon gave Jeffrey Hsu permission to combine his copyright 11 * into this one around July 8 2004. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of The DragonFly Project nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific, prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 35 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * $DragonFly: src/sys/net/netisr.c,v 1.49 2008/11/01 10:29:31 sephe Exp $ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/msgport.h> 46 #include <sys/proc.h> 47 #include <sys/interrupt.h> 48 #include <sys/socket.h> 49 #include <sys/sysctl.h> 50 #include <sys/socketvar.h> 51 #include <net/if.h> 52 #include <net/if_var.h> 53 #include <net/netisr.h> 54 #include <machine/cpufunc.h> 55 56 #include <sys/thread2.h> 57 #include <sys/msgport2.h> 58 #include <net/netmsg2.h> 59 #include <sys/mplock2.h> 60 61 #define NETISR_GET_MPLOCK(ni) \ 62 do { \ 63 if (((ni)->ni_flags & NETISR_FLAG_MPSAFE) == 0) \ 64 get_mplock(); \ 65 } while (0) 66 67 #define NETISR_REL_MPLOCK(ni) \ 68 do { \ 69 if (((ni)->ni_flags & NETISR_FLAG_MPSAFE) == 0) \ 70 rel_mplock(); \ 71 } while (0) 72 73 static void netmsg_sync_func(struct netmsg *msg); 74 75 struct netmsg_port_registration { 76 TAILQ_ENTRY(netmsg_port_registration) npr_entry; 77 lwkt_port_t npr_port; 78 }; 79 80 static struct netisr netisrs[NETISR_MAX]; 81 static TAILQ_HEAD(,netmsg_port_registration) netreglist; 82 83 /* Per-CPU thread to handle any protocol. */ 84 struct thread netisr_cpu[MAXCPU]; 85 lwkt_port netisr_afree_rport; 86 lwkt_port netisr_adone_rport; 87 lwkt_port netisr_apanic_rport; 88 lwkt_port netisr_sync_port; 89 90 static int (*netmsg_fwd_port_fn)(lwkt_port_t, lwkt_msg_t); 91 92 static int netisr_mpsafe_thread = NETMSG_SERVICE_ADAPTIVE; 93 TUNABLE_INT("net.netisr.mpsafe_thread", &netisr_mpsafe_thread); 94 95 SYSCTL_NODE(_net, OID_AUTO, netisr, CTLFLAG_RW, 0, "netisr"); 96 SYSCTL_INT(_net_netisr, OID_AUTO, mpsafe_thread, CTLFLAG_RW, 97 &netisr_mpsafe_thread, 0, 98 "0:BGL, 1:Adaptive BGL, 2:No BGL(experimental)"); 99 100 static __inline int 101 NETISR_TO_MSGF(const struct netisr *ni) 102 { 103 int msg_flags = 0; 104 105 if (ni->ni_flags & NETISR_FLAG_MPSAFE) 106 msg_flags |= MSGF_MPSAFE; 107 return msg_flags; 108 } 109 110 /* 111 * netisr_afree_rport replymsg function, only used to handle async 112 * messages which the sender has abandoned to their fate. 113 */ 114 static void 115 netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) 116 { 117 kfree(msg, M_LWKTMSG); 118 } 119 120 /* 121 * We need a custom putport function to handle the case where the 122 * message target is the current thread's message port. This case 123 * can occur when the TCP or UDP stack does a direct callback to NFS and NFS 124 * then turns around and executes a network operation synchronously. 125 * 126 * To prevent deadlocking, we must execute these self-referential messages 127 * synchronously, effectively turning the message into a glorified direct 128 * procedure call back into the protocol stack. The operation must be 129 * complete on return or we will deadlock, so panic if it isn't. 130 */ 131 static int 132 netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg) 133 { 134 netmsg_t netmsg = (void *)lmsg; 135 136 if ((lmsg->ms_flags & MSGF_SYNC) && port == &curthread->td_msgport) { 137 netmsg->nm_dispatch(netmsg); 138 if ((lmsg->ms_flags & MSGF_DONE) == 0) 139 panic("netmsg_put_port: self-referential deadlock on netport"); 140 return(EASYNC); 141 } else { 142 return(netmsg_fwd_port_fn(port, lmsg)); 143 } 144 } 145 146 /* 147 * UNIX DOMAIN sockets still have to run their uipc functions synchronously, 148 * because they depend on the user proc context for a number of things 149 * (like creds) which we have not yet incorporated into the message structure. 150 * 151 * However, we maintain or message/port abstraction. Having a special 152 * synchronous port which runs the commands synchronously gives us the 153 * ability to serialize operations in one place later on when we start 154 * removing the BGL. 155 */ 156 static int 157 netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg) 158 { 159 netmsg_t netmsg = (void *)lmsg; 160 161 KKASSERT((lmsg->ms_flags & MSGF_DONE) == 0); 162 163 lmsg->ms_target_port = port; /* required for abort */ 164 netmsg->nm_dispatch(netmsg); 165 return(EASYNC); 166 } 167 168 static void 169 netisr_init(void) 170 { 171 int i; 172 173 TAILQ_INIT(&netreglist); 174 175 /* 176 * Create default per-cpu threads for generic protocol handling. 177 */ 178 for (i = 0; i < ncpus; ++i) { 179 lwkt_create(netmsg_service_loop, &netisr_mpsafe_thread, NULL, 180 &netisr_cpu[i], TDF_NETWORK | TDF_MPSAFE, i, 181 "netisr_cpu %d", i); 182 netmsg_service_port_init(&netisr_cpu[i].td_msgport); 183 } 184 185 /* 186 * The netisr_afree_rport is a special reply port which automatically 187 * frees the replied message. The netisr_adone_rport simply marks 188 * the message as being done. The netisr_apanic_rport panics if 189 * the message is replied to. 190 */ 191 lwkt_initport_replyonly(&netisr_afree_rport, netisr_autofree_reply); 192 lwkt_initport_replyonly_null(&netisr_adone_rport); 193 lwkt_initport_panic(&netisr_apanic_rport); 194 195 /* 196 * The netisr_syncport is a special port which executes the message 197 * synchronously and waits for it if EASYNC is returned. 198 */ 199 lwkt_initport_putonly(&netisr_sync_port, netmsg_sync_putport); 200 } 201 202 SYSINIT(netisr, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, netisr_init, NULL); 203 204 /* 205 * Finish initializing the message port for a netmsg service. This also 206 * registers the port for synchronous cleanup operations such as when an 207 * ifnet is being destroyed. There is no deregistration API yet. 208 */ 209 void 210 netmsg_service_port_init(lwkt_port_t port) 211 { 212 struct netmsg_port_registration *reg; 213 214 /* 215 * Override the putport function. Our custom function checks for 216 * self-references and executes such commands synchronously. 217 */ 218 if (netmsg_fwd_port_fn == NULL) 219 netmsg_fwd_port_fn = port->mp_putport; 220 KKASSERT(netmsg_fwd_port_fn == port->mp_putport); 221 port->mp_putport = netmsg_put_port; 222 223 /* 224 * Keep track of ports using the netmsg API so we can synchronize 225 * certain operations (such as freeing an ifnet structure) across all 226 * consumers. 227 */ 228 reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO); 229 reg->npr_port = port; 230 TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry); 231 } 232 233 /* 234 * This function synchronizes the caller with all netmsg services. For 235 * example, if an interface is being removed we must make sure that all 236 * packets related to that interface complete processing before the structure 237 * can actually be freed. This sort of synchronization is an alternative to 238 * ref-counting the netif, removing the ref counting overhead in favor of 239 * placing additional overhead in the netif freeing sequence (where it is 240 * inconsequential). 241 */ 242 void 243 netmsg_service_sync(void) 244 { 245 struct netmsg_port_registration *reg; 246 struct netmsg smsg; 247 248 netmsg_init(&smsg, NULL, &curthread->td_msgport, 249 MSGF_MPSAFE, netmsg_sync_func); 250 251 TAILQ_FOREACH(reg, &netreglist, npr_entry) { 252 lwkt_domsg(reg->npr_port, &smsg.nm_lmsg, 0); 253 } 254 } 255 256 /* 257 * The netmsg function simply replies the message. API semantics require 258 * EASYNC to be returned if the netmsg function disposes of the message. 259 */ 260 static void 261 netmsg_sync_func(struct netmsg *msg) 262 { 263 lwkt_replymsg(&msg->nm_lmsg, 0); 264 } 265 266 /* 267 * Service a netmsg request and modify the BGL lock state if appropriate. 268 * The new BGL lock state is returned (1:locked, 0:unlocked). 269 */ 270 int 271 netmsg_service(struct netmsg *msg, int mpsafe_mode, int mplocked) 272 { 273 /* 274 * If nm_so is non-NULL the message is related to a socket. Sockets 275 * can migrate between protocol processing threads when they connect, 276 * due to an implied connect during a sendmsg(), or when a connection 277 * is accepted. 278 * 279 * If this occurs any messages already queued to the original thread 280 * or which race the change must be forwarded to the new protocol 281 * processing port. 282 * 283 * MPSAFE - socket changes are synchronous to the current protocol port 284 * so if the port can only change out from under us if it is 285 * already different from the current port anyway so we forward 286 * it. It is possible to chase a changing port, which is fine. 287 */ 288 if (msg->nm_so && msg->nm_so->so_port != &curthread->td_msgport) { 289 lwkt_forwardmsg(msg->nm_so->so_port, &msg->nm_lmsg); 290 return(mplocked); 291 } 292 293 /* 294 * Adjust the mplock dynamically. 295 */ 296 switch (mpsafe_mode) { 297 case NETMSG_SERVICE_ADAPTIVE: /* Adaptive BGL */ 298 if (msg->nm_lmsg.ms_flags & MSGF_MPSAFE) { 299 if (mplocked) { 300 rel_mplock(); 301 mplocked = 0; 302 } 303 msg->nm_dispatch(msg); 304 /* Leave mpunlocked */ 305 } else { 306 if (!mplocked) { 307 get_mplock(); 308 /* mplocked = 1; not needed */ 309 } 310 msg->nm_dispatch(msg); 311 rel_mplock(); 312 mplocked = 0; 313 /* Leave mpunlocked, next msg might be mpsafe */ 314 } 315 break; 316 317 case NETMSG_SERVICE_MPSAFE: /* No BGL */ 318 if (mplocked) { 319 rel_mplock(); 320 mplocked = 0; 321 } 322 msg->nm_dispatch(msg); 323 /* Leave mpunlocked */ 324 break; 325 326 default: /* BGL */ 327 if (!mplocked) { 328 get_mplock(); 329 mplocked = 1; 330 } 331 msg->nm_dispatch(msg); 332 /* Leave mplocked */ 333 break; 334 } 335 return mplocked; 336 } 337 338 /* 339 * Generic netmsg service loop. Some protocols may roll their own but all 340 * must do the basic command dispatch function call done here. 341 */ 342 void 343 netmsg_service_loop(void *arg) 344 { 345 struct netmsg *msg; 346 int mplocked, *mpsafe_mode = arg; 347 348 /* 349 * Thread was started with TDF_MPSAFE 350 */ 351 mplocked = 0; 352 353 /* 354 * Loop on netmsgs 355 */ 356 while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) { 357 mplocked = netmsg_service(msg, *mpsafe_mode, mplocked); 358 } 359 } 360 361 /* 362 * Call the netisr directly. 363 * Queueing may be done in the msg port layer at its discretion. 364 */ 365 void 366 netisr_dispatch(int num, struct mbuf *m) 367 { 368 /* just queue it for now XXX JH */ 369 netisr_queue(num, m); 370 } 371 372 /* 373 * Same as netisr_dispatch(), but always queue. 374 * This is either used in places where we are not confident that 375 * direct dispatch is possible, or where queueing is required. 376 */ 377 int 378 netisr_queue(int num, struct mbuf *m) 379 { 380 struct netisr *ni; 381 struct netmsg_packet *pmsg; 382 lwkt_port_t port; 383 384 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 385 ("%s: bad isr %d", __func__, num)); 386 387 ni = &netisrs[num]; 388 if (ni->ni_handler == NULL) { 389 kprintf("%s: unregistered isr %d\n", __func__, num); 390 m_freem(m); 391 return (EIO); 392 } 393 394 if ((port = ni->ni_mport(&m)) == NULL) 395 return (EIO); 396 397 pmsg = &m->m_hdr.mh_netmsg; 398 399 netmsg_init(&pmsg->nm_netmsg, NULL, &netisr_apanic_rport, 400 NETISR_TO_MSGF(ni), ni->ni_handler); 401 pmsg->nm_packet = m; 402 pmsg->nm_netmsg.nm_lmsg.u.ms_result = num; 403 lwkt_sendmsg(port, &pmsg->nm_netmsg.nm_lmsg); 404 return (0); 405 } 406 407 void 408 netisr_register(int num, pkt_portfn_t mportfn, 409 pktinfo_portfn_t mportfn_pktinfo, netisr_fn_t handler, 410 uint32_t flags) 411 { 412 struct netisr *ni; 413 414 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 415 ("netisr_register: bad isr %d", num)); 416 ni = &netisrs[num]; 417 418 ni->ni_mport = mportfn; 419 ni->ni_mport_pktinfo = mportfn_pktinfo; 420 ni->ni_handler = handler; 421 ni->ni_flags = flags; 422 netmsg_init(&ni->ni_netmsg, NULL, &netisr_adone_rport, 423 NETISR_TO_MSGF(ni), NULL); 424 } 425 426 int 427 netisr_unregister(int num) 428 { 429 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 430 ("unregister_netisr: bad isr number: %d\n", num)); 431 432 /* XXX JH */ 433 return (0); 434 } 435 436 /* 437 * Return message port for default handler thread on CPU 0. 438 */ 439 lwkt_port_t 440 cpu0_portfn(struct mbuf **mptr) 441 { 442 struct mbuf *m = *mptr; 443 int cpu = 0; 444 445 m->m_pkthdr.hash = cpu; 446 m->m_flags |= M_HASH; 447 return (&netisr_cpu[cpu].td_msgport); 448 } 449 450 lwkt_port_t 451 cpu_portfn(int cpu) 452 { 453 return (&netisr_cpu[cpu].td_msgport); 454 } 455 456 /* 457 * If the current thread is a network protocol thread (TDF_NETWORK), 458 * then return the current thread's message port. 459 * XXX Else, return the current CPU's netisr message port. 460 */ 461 lwkt_port_t 462 cur_netport(void) 463 { 464 if (curthread->td_flags & TDF_NETWORK) 465 return &curthread->td_msgport; 466 else 467 return cpu_portfn(mycpuid); 468 } 469 470 /* ARGSUSED */ 471 lwkt_port_t 472 cpu0_soport(struct socket *so __unused, struct sockaddr *nam __unused, 473 struct mbuf **dummy __unused) 474 { 475 return (&netisr_cpu[0].td_msgport); 476 } 477 478 lwkt_port_t 479 cpu0_ctlport(int cmd __unused, struct sockaddr *sa __unused, 480 void *extra __unused) 481 { 482 return (&netisr_cpu[0].td_msgport); 483 } 484 485 lwkt_port_t 486 sync_soport(struct socket *so __unused, struct sockaddr *nam __unused, 487 struct mbuf **dummy __unused) 488 { 489 return (&netisr_sync_port); 490 } 491 492 /* 493 * schednetisr() is used to call the netisr handler from the appropriate 494 * netisr thread for polling and other purposes. 495 * 496 * This function may be called from a hard interrupt or IPI and must be 497 * MP SAFE and non-blocking. We use a fixed per-cpu message instead of 498 * trying to allocate one. We must get ourselves onto the target cpu 499 * to safely check the MSGF_DONE bit on the message but since the message 500 * will be sent to that cpu anyway this does not add any extra work beyond 501 * what lwkt_sendmsg() would have already had to do to schedule the target 502 * thread. 503 */ 504 static void 505 schednetisr_remote(void *data) 506 { 507 int num = (int)(intptr_t)data; 508 struct netisr *ni = &netisrs[num]; 509 lwkt_port_t port = &netisr_cpu[0].td_msgport; 510 struct netmsg *pmsg; 511 512 pmsg = &netisrs[num].ni_netmsg; 513 crit_enter(); 514 if (pmsg->nm_lmsg.ms_flags & MSGF_DONE) { 515 netmsg_init(pmsg, NULL, &netisr_adone_rport, 516 NETISR_TO_MSGF(ni), ni->ni_handler); 517 pmsg->nm_lmsg.u.ms_result = num; 518 lwkt_sendmsg(port, &pmsg->nm_lmsg); 519 } 520 crit_exit(); 521 } 522 523 void 524 schednetisr(int num) 525 { 526 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 527 ("schednetisr: bad isr %d", num)); 528 #ifdef SMP 529 if (mycpu->gd_cpuid != 0) { 530 lwkt_send_ipiq(globaldata_find(0), 531 schednetisr_remote, (void *)(intptr_t)num); 532 } else { 533 schednetisr_remote((void *)(intptr_t)num); 534 } 535 #else 536 schednetisr_remote((void *)(intptr_t)num); 537 #endif 538 } 539 540 lwkt_port_t 541 netisr_find_port(int num, struct mbuf **m0) 542 { 543 struct netisr *ni; 544 lwkt_port_t port; 545 struct mbuf *m = *m0; 546 547 *m0 = NULL; 548 549 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 550 ("%s: bad isr %d", __func__, num)); 551 552 ni = &netisrs[num]; 553 if (ni->ni_mport == NULL) { 554 kprintf("%s: unregistered isr %d\n", __func__, num); 555 m_freem(m); 556 return NULL; 557 } 558 559 if ((port = ni->ni_mport(&m)) == NULL) 560 return NULL; 561 562 *m0 = m; 563 return port; 564 } 565 566 void 567 netisr_run(int num, struct mbuf *m) 568 { 569 struct netisr *ni; 570 struct netmsg_packet *pmsg; 571 572 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 573 ("%s: bad isr %d", __func__, num)); 574 575 ni = &netisrs[num]; 576 if (ni->ni_handler == NULL) { 577 kprintf("%s: unregistered isr %d\n", __func__, num); 578 m_freem(m); 579 return; 580 } 581 582 pmsg = &m->m_hdr.mh_netmsg; 583 584 netmsg_init(&pmsg->nm_netmsg, NULL, &netisr_apanic_rport, 585 0, ni->ni_handler); 586 pmsg->nm_packet = m; 587 pmsg->nm_netmsg.nm_lmsg.u.ms_result = num; 588 589 NETISR_GET_MPLOCK(ni); 590 ni->ni_handler(&pmsg->nm_netmsg); 591 NETISR_REL_MPLOCK(ni); 592 } 593 594 lwkt_port_t 595 pktinfo_portfn_cpu0(const struct pktinfo *dummy __unused, 596 struct mbuf *m) 597 { 598 m->m_pkthdr.hash = 0; 599 return &netisr_cpu[0].td_msgport; 600 } 601 602 lwkt_port_t 603 pktinfo_portfn_notsupp(const struct pktinfo *dummy __unused, 604 struct mbuf *m __unused) 605 { 606 return NULL; 607 } 608 609 lwkt_port_t 610 netisr_find_pktinfo_port(const struct pktinfo *pi, struct mbuf *m) 611 { 612 struct netisr *ni; 613 int num = pi->pi_netisr; 614 615 KASSERT(m->m_flags & M_HASH, ("packet does not contain hash\n")); 616 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 617 ("%s: bad isr %d", __func__, num)); 618 619 ni = &netisrs[num]; 620 if (ni->ni_mport_pktinfo == NULL) { 621 kprintf("%s: unregistered isr %d\n", __func__, num); 622 return NULL; 623 } 624 return ni->ni_mport_pktinfo(pi, m); 625 } 626