1 /*- 2 * Copyright (c) 2001-2002 Luigi Rizzo 3 * 4 * Supported by: the Xorp Project (www.xorp.org) 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $ 28 */ 29 30 #include "opt_ifpoll.h" 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/ktr.h> 35 #include <sys/malloc.h> 36 #include <sys/serialize.h> 37 #include <sys/socket.h> 38 #include <sys/sysctl.h> 39 40 #include <sys/thread2.h> 41 #include <sys/msgport2.h> 42 43 #include <machine/atomic.h> 44 #include <machine/clock.h> 45 #include <machine/smp.h> 46 47 #include <net/if.h> 48 #include <net/if_poll.h> 49 #include <net/netmsg2.h> 50 51 /* 52 * Polling support for network device drivers. 53 * 54 * Drivers which support this feature try to register one status polling 55 * handler and several TX/RX polling handlers with the polling code. 56 * If interface's if_qpoll is called with non-NULL second argument, then 57 * a register operation is requested, else a deregister operation is 58 * requested. If the requested operation is "register", driver should 59 * setup the ifpoll_info passed in accoding its own needs: 60 * ifpoll_info.ifpi_status.status_func == NULL 61 * No status polling handler will be installed on CPU(0) 62 * ifpoll_info.ifpi_rx[n].poll_func == NULL 63 * No RX polling handler will be installed on CPU(n) 64 * ifpoll_info.ifpi_tx[n].poll_func == NULL 65 * No TX polling handler will be installed on CPU(n) 66 * 67 * All of the registered polling handlers are called only if the interface 68 * is marked as 'IFF_RUNNING and IFF_NPOLLING'. However, the interface's 69 * register and deregister function (ifnet.if_qpoll) will be called even 70 * if interface is not marked with 'IFF_RUNNING'. 71 * 72 * If registration is successful, the driver must disable interrupts, 73 * and further I/O is performed through the TX/RX polling handler, which 74 * are invoked (at least once per clock tick) with 3 arguments: the "arg" 75 * passed at register time, a struct ifnet pointer, and a "count" limit. 76 * The registered serializer will be held before calling the related 77 * polling handler. 78 * 79 * The count limit specifies how much work the handler can do during the 80 * call -- typically this is the number of packets to be received, or 81 * transmitted, etc. (drivers are free to interpret this number, as long 82 * as the max time spent in the function grows roughly linearly with the 83 * count). 84 * 85 * A second variable controls the sharing of CPU between polling/kernel 86 * network processing, and other activities (typically userlevel tasks): 87 * net.ifpoll.X.{rx,tx}.user_frac (between 0 and 100, default 50) sets the 88 * share of CPU allocated to user tasks. CPU is allocated proportionally 89 * to the shares, by dynamically adjusting the "count" (poll_burst). 90 * 91 * Other parameters can should be left to their default values. 92 * The following constraints hold 93 * 94 * 1 <= poll_burst <= poll_burst_max 95 * 1 <= poll_each_burst <= poll_burst_max 96 * MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX 97 */ 98 99 #define IFPOLL_LIST_LEN 128 100 #define IFPOLL_FREQ_MAX 30000 101 102 #define MIN_IOPOLL_BURST_MAX 10 103 #define MAX_IOPOLL_BURST_MAX 1000 104 #define IOPOLL_BURST_MAX 150 /* good for 100Mbit net and HZ=1000 */ 105 106 #define IOPOLL_EACH_BURST 5 107 108 #define IFPOLL_FREQ_DEFAULT 2000 109 #define IOPOLL_FREQ_DEFAULT IFPOLL_FREQ_DEFAULT 110 #define STPOLL_FREQ_DEFAULT 100 111 112 #define IFPOLL_TXFRAC_DEFAULT 1 113 #define IFPOLL_STFRAC_DEFAULT 20 114 115 #define IFPOLL_RX 0x1 116 #define IFPOLL_TX 0x2 117 118 union ifpoll_time { 119 struct timeval tv; 120 uint64_t tsc; 121 }; 122 123 struct iopoll_rec { 124 struct lwkt_serialize *serializer; 125 struct ifnet *ifp; 126 void *arg; 127 ifpoll_iofn_t poll_func; 128 }; 129 130 struct iopoll_ctx { 131 #ifdef IFPOLL_MULTI_SYSTIMER 132 struct systimer pollclock; 133 #endif 134 135 union ifpoll_time prev_t; 136 uint32_t short_ticks; /* statistics */ 137 uint32_t lost_polls; /* statistics */ 138 uint32_t suspect; /* statistics */ 139 uint32_t stalled; /* statistics */ 140 uint32_t pending_polls; /* state */ 141 142 struct netmsg poll_netmsg; 143 144 int poll_cpuid; 145 #ifdef IFPOLL_MULTI_SYSTIMER 146 int pollhz; /* tunable */ 147 #else 148 int poll_type; /* IFPOLL_{RX,TX} */ 149 #endif 150 uint32_t phase; /* state */ 151 int residual_burst; /* state */ 152 uint32_t poll_each_burst; /* tunable */ 153 union ifpoll_time poll_start_t; /* state */ 154 155 uint32_t poll_handlers; /* next free entry in pr[]. */ 156 struct iopoll_rec pr[IFPOLL_LIST_LEN]; 157 158 struct netmsg poll_more_netmsg; 159 160 uint32_t poll_burst; /* state */ 161 uint32_t poll_burst_max; /* tunable */ 162 uint32_t user_frac; /* tunable */ 163 uint32_t kern_frac; /* state */ 164 165 struct sysctl_ctx_list poll_sysctl_ctx; 166 struct sysctl_oid *poll_sysctl_tree; 167 } __cachealign; 168 169 struct iopoll_comm { 170 struct sysctl_ctx_list sysctl_ctx; 171 struct sysctl_oid *sysctl_tree; 172 } __cachealign; 173 174 struct stpoll_rec { 175 struct lwkt_serialize *serializer; 176 struct ifnet *ifp; 177 ifpoll_stfn_t status_func; 178 }; 179 180 struct stpoll_ctx { 181 #ifdef IFPOLL_MULTI_SYSTIMER 182 struct systimer pollclock; 183 #endif 184 185 struct netmsg poll_netmsg; 186 187 #ifdef IFPOLL_MULTI_SYSTIMER 188 int pollhz; /* tunable */ 189 #endif 190 uint32_t poll_handlers; /* next free entry in pr[]. */ 191 struct stpoll_rec pr[IFPOLL_LIST_LEN]; 192 193 struct sysctl_ctx_list poll_sysctl_ctx; 194 struct sysctl_oid *poll_sysctl_tree; 195 }; 196 197 struct iopoll_sysctl_netmsg { 198 struct netmsg nmsg; 199 struct iopoll_ctx *ctx; 200 }; 201 202 #ifndef IFPOLL_MULTI_SYSTIMER 203 204 struct ifpoll_data { 205 struct systimer clock; 206 int txfrac_count; 207 int stfrac_count; 208 u_int tx_cpumask; 209 u_int rx_cpumask; 210 } __cachealign; 211 212 #endif 213 214 static struct stpoll_ctx stpoll_context; 215 static struct iopoll_comm *iopoll_common[IFPOLL_CTX_MAX]; 216 static struct iopoll_ctx *rxpoll_context[IFPOLL_CTX_MAX]; 217 static struct iopoll_ctx *txpoll_context[IFPOLL_CTX_MAX]; 218 219 SYSCTL_NODE(_net, OID_AUTO, ifpoll, CTLFLAG_RW, 0, 220 "Network device polling parameters"); 221 222 static int ifpoll_ncpus = IFPOLL_CTX_MAX; 223 224 static int iopoll_burst_max = IOPOLL_BURST_MAX; 225 static int iopoll_each_burst = IOPOLL_EACH_BURST; 226 227 TUNABLE_INT("net.ifpoll.burst_max", &iopoll_burst_max); 228 TUNABLE_INT("net.ifpoll.each_burst", &iopoll_each_burst); 229 230 #ifdef IFPOLL_MULTI_SYSTIMER 231 232 static int stpoll_hz = STPOLL_FREQ_DEFAULT; 233 static int iopoll_hz = IOPOLL_FREQ_DEFAULT; 234 235 TUNABLE_INT("net.ifpoll.stpoll_hz", &stpoll_hz); 236 TUNABLE_INT("net.ifpoll.iopoll_hz", &iopoll_hz); 237 238 #else /* !IFPOLL_MULTI_SYSTIMER */ 239 240 static struct ifpoll_data ifpoll0; 241 static int ifpoll_pollhz = IFPOLL_FREQ_DEFAULT; 242 static int ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT; 243 static int ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT; 244 static int ifpoll_handlers; 245 246 TUNABLE_INT("net.ifpoll.pollhz", &ifpoll_pollhz); 247 TUNABLE_INT("net.ifpoll.status_frac", &ifpoll_stfrac); 248 TUNABLE_INT("net.ifpoll.tx_frac", &ifpoll_txfrac); 249 250 static void sysctl_ifpollhz_handler(struct netmsg *); 251 static int sysctl_ifpollhz(SYSCTL_HANDLER_ARGS); 252 253 SYSCTL_PROC(_net_ifpoll, OID_AUTO, pollhz, CTLTYPE_INT | CTLFLAG_RW, 254 0, 0, sysctl_ifpollhz, "I", "Polling frequency"); 255 SYSCTL_INT(_net_ifpoll, OID_AUTO, tx_frac, CTLFLAG_RW, 256 &ifpoll_txfrac, 0, "Every this many cycles poll transmit"); 257 SYSCTL_INT(_net_ifpoll, OID_AUTO, st_frac, CTLFLAG_RW, 258 &ifpoll_stfrac, 0, "Every this many cycles poll status"); 259 260 #endif /* IFPOLL_MULTI_SYSTIMER */ 261 262 void ifpoll_init_pcpu(int); 263 264 #ifndef IFPOLL_MULTI_SYSTIMER 265 static void ifpoll_start_handler(struct netmsg *); 266 static void ifpoll_stop_handler(struct netmsg *); 267 static void ifpoll_handler_addevent(void); 268 static void ifpoll_handler_delevent(void); 269 static void ifpoll_ipi_handler(void *, int); 270 static void ifpoll_systimer(systimer_t, struct intrframe *); 271 #endif 272 273 static void ifpoll_register_handler(struct netmsg *); 274 static void ifpoll_deregister_handler(struct netmsg *); 275 276 /* 277 * Status polling 278 */ 279 static void stpoll_init(void); 280 static void stpoll_handler(struct netmsg *); 281 static void stpoll_clock(struct stpoll_ctx *); 282 #ifdef IFPOLL_MULTI_SYSTIMER 283 static void stpoll_systimer(systimer_t, struct intrframe *); 284 #endif 285 static int stpoll_register(struct ifnet *, const struct ifpoll_status *); 286 static int stpoll_deregister(struct ifnet *); 287 288 #ifdef IFPOLL_MULTI_SYSTIMER 289 static void sysctl_stpollhz_handler(struct netmsg *); 290 static int sysctl_stpollhz(SYSCTL_HANDLER_ARGS); 291 #endif 292 293 /* 294 * RX/TX polling 295 */ 296 static struct iopoll_ctx *iopoll_ctx_create(int, int); 297 static struct iopoll_comm *iopoll_comm_create(int); 298 static void iopoll_init(int); 299 static void iopoll_handler(struct netmsg *); 300 static void iopollmore_handler(struct netmsg *); 301 static void iopoll_clock(struct iopoll_ctx *); 302 #ifdef IFPOLL_MULTI_SYSTIMER 303 static void iopoll_systimer(systimer_t, struct intrframe *); 304 #endif 305 static int iopoll_register(struct ifnet *, struct iopoll_ctx *, 306 const struct ifpoll_io *); 307 static int iopoll_deregister(struct ifnet *, struct iopoll_ctx *); 308 309 static void iopoll_add_sysctl(struct sysctl_ctx_list *, 310 struct sysctl_oid_list *, struct iopoll_ctx *); 311 #ifdef IFPOLL_MULTI_SYSTIMER 312 static void sysctl_iopollhz_handler(struct netmsg *); 313 static int sysctl_iopollhz(SYSCTL_HANDLER_ARGS); 314 #endif 315 static void sysctl_burstmax_handler(struct netmsg *); 316 static int sysctl_burstmax(SYSCTL_HANDLER_ARGS); 317 static void sysctl_eachburst_handler(struct netmsg *); 318 static int sysctl_eachburst(SYSCTL_HANDLER_ARGS); 319 320 static __inline void 321 ifpoll_sendmsg_oncpu(struct netmsg *msg) 322 { 323 if (msg->nm_lmsg.ms_flags & MSGF_DONE) 324 ifnet_sendmsg(&msg->nm_lmsg, mycpuid); 325 } 326 327 static __inline void 328 sched_stpoll(struct stpoll_ctx *st_ctx) 329 { 330 ifpoll_sendmsg_oncpu(&st_ctx->poll_netmsg); 331 } 332 333 static __inline void 334 sched_iopoll(struct iopoll_ctx *io_ctx) 335 { 336 ifpoll_sendmsg_oncpu(&io_ctx->poll_netmsg); 337 } 338 339 static __inline void 340 sched_iopollmore(struct iopoll_ctx *io_ctx) 341 { 342 ifpoll_sendmsg_oncpu(&io_ctx->poll_more_netmsg); 343 } 344 345 static __inline void 346 ifpoll_time_get(union ifpoll_time *t) 347 { 348 if (tsc_present) 349 t->tsc = rdtsc(); 350 else 351 microuptime(&t->tv); 352 } 353 354 /* Return time diff in us */ 355 static __inline int 356 ifpoll_time_diff(const union ifpoll_time *s, const union ifpoll_time *e) 357 { 358 if (tsc_present) { 359 return (((e->tsc - s->tsc) * 1000000) / tsc_frequency); 360 } else { 361 return ((e->tv.tv_usec - s->tv.tv_usec) + 362 (e->tv.tv_sec - s->tv.tv_sec) * 1000000); 363 } 364 } 365 366 /* 367 * Initialize per-cpu qpolling(4) context. Called from kern_clock.c: 368 */ 369 void 370 ifpoll_init_pcpu(int cpuid) 371 { 372 if (cpuid >= IFPOLL_CTX_MAX) { 373 return; 374 } else if (cpuid == 0) { 375 if (ifpoll_ncpus > ncpus) 376 ifpoll_ncpus = ncpus; 377 kprintf("ifpoll_ncpus %d\n", ifpoll_ncpus); 378 379 #ifndef IFPOLL_MULTI_SYSTIMER 380 systimer_init_periodic_nq(&ifpoll0.clock, 381 ifpoll_systimer, NULL, 1); 382 #endif 383 384 stpoll_init(); 385 } 386 iopoll_init(cpuid); 387 } 388 389 #ifndef IFPOLL_MULTI_SYSTIMER 390 391 #ifdef SMP 392 static void 393 ifpoll_ipi_handler(void *arg __unused, int poll) 394 { 395 KKASSERT(mycpuid < ifpoll_ncpus); 396 397 if (poll & IFPOLL_TX) 398 iopoll_clock(txpoll_context[mycpuid]); 399 if (poll & IFPOLL_RX) 400 iopoll_clock(rxpoll_context[mycpuid]); 401 } 402 #endif /* SMP */ 403 404 static void 405 ifpoll_systimer(systimer_t info __unused, struct intrframe *frame __unused) 406 { 407 #ifdef SMP 408 uint32_t cpumask = 0; 409 #endif 410 411 KKASSERT(mycpuid == 0); 412 413 if (ifpoll0.stfrac_count-- == 0) { 414 ifpoll0.stfrac_count = ifpoll_stfrac; 415 stpoll_clock(&stpoll_context); 416 } 417 418 if (ifpoll0.txfrac_count-- == 0) { 419 ifpoll0.txfrac_count = ifpoll_txfrac; 420 421 #ifdef SMP 422 /* TODO: We may try to piggyback TX on RX */ 423 cpumask = smp_active_mask & ifpoll0.tx_cpumask; 424 if (cpumask != 0) { 425 lwkt_send_ipiq2_mask(cpumask, ifpoll_ipi_handler, 426 NULL, IFPOLL_TX); 427 } 428 #else 429 iopoll_clock(txpoll_context[0]); 430 #endif 431 } 432 433 #ifdef SMP 434 cpumask = smp_active_mask & ifpoll0.rx_cpumask; 435 if (cpumask != 0) { 436 lwkt_send_ipiq2_mask(cpumask, ifpoll_ipi_handler, 437 NULL, IFPOLL_RX); 438 } 439 #else 440 iopoll_clock(rxpoll_context[0]); 441 #endif 442 } 443 444 static void 445 ifpoll_start_handler(struct netmsg *nmsg) 446 { 447 KKASSERT(&curthread->td_msgport == ifnet_portfn(0)); 448 449 kprintf("ifpoll: start\n"); 450 systimer_adjust_periodic(&ifpoll0.clock, ifpoll_pollhz); 451 lwkt_replymsg(&nmsg->nm_lmsg, 0); 452 } 453 454 static void 455 ifpoll_stop_handler(struct netmsg *nmsg) 456 { 457 KKASSERT(&curthread->td_msgport == ifnet_portfn(0)); 458 459 kprintf("ifpoll: stop\n"); 460 systimer_adjust_periodic(&ifpoll0.clock, 1); 461 lwkt_replymsg(&nmsg->nm_lmsg, 0); 462 } 463 464 static void 465 ifpoll_handler_addevent(void) 466 { 467 if (atomic_fetchadd_int(&ifpoll_handlers, 1) == 0) { 468 struct netmsg *nmsg; 469 470 /* Start systimer */ 471 nmsg = kmalloc(sizeof(*nmsg), M_LWKTMSG, M_WAITOK); 472 netmsg_init(nmsg, &netisr_afree_rport, 0, ifpoll_start_handler); 473 ifnet_sendmsg(&nmsg->nm_lmsg, 0); 474 } 475 } 476 477 static void 478 ifpoll_handler_delevent(void) 479 { 480 KKASSERT(ifpoll_handlers > 0); 481 if (atomic_fetchadd_int(&ifpoll_handlers, -1) == 1) { 482 struct netmsg *nmsg; 483 484 /* Stop systimer */ 485 nmsg = kmalloc(sizeof(*nmsg), M_LWKTMSG, M_WAITOK); 486 netmsg_init(nmsg, &netisr_afree_rport, 0, ifpoll_stop_handler); 487 ifnet_sendmsg(&nmsg->nm_lmsg, 0); 488 } 489 } 490 491 static void 492 sysctl_ifpollhz_handler(struct netmsg *nmsg) 493 { 494 KKASSERT(&curthread->td_msgport == ifnet_portfn(0)); 495 496 /* 497 * If there is no handler registered, don't adjust polling 498 * systimer frequency; polling systimer frequency will be 499 * adjusted once there is registered handler. 500 */ 501 ifpoll_pollhz = nmsg->nm_lmsg.u.ms_result; 502 if (ifpoll_handlers) 503 systimer_adjust_periodic(&ifpoll0.clock, ifpoll_pollhz); 504 505 lwkt_replymsg(&nmsg->nm_lmsg, 0); 506 } 507 508 static int 509 sysctl_ifpollhz(SYSCTL_HANDLER_ARGS) 510 { 511 struct netmsg nmsg; 512 int error, phz; 513 514 phz = ifpoll_pollhz; 515 error = sysctl_handle_int(oidp, &phz, 0, req); 516 if (error || req->newptr == NULL) 517 return error; 518 if (phz <= 0) 519 return EINVAL; 520 else if (phz > IFPOLL_FREQ_MAX) 521 phz = IFPOLL_FREQ_MAX; 522 523 netmsg_init(&nmsg, &curthread->td_msgport, MSGF_MPSAFE, 524 sysctl_ifpollhz_handler); 525 nmsg.nm_lmsg.u.ms_result = phz; 526 527 return ifnet_domsg(&nmsg.nm_lmsg, 0); 528 } 529 530 #endif /* !IFPOLL_MULTI_SYSTIMER */ 531 532 int 533 ifpoll_register(struct ifnet *ifp) 534 { 535 struct ifpoll_info info; 536 struct netmsg nmsg; 537 int error; 538 539 if (ifp->if_qpoll == NULL) { 540 /* Device does not support polling */ 541 return EOPNOTSUPP; 542 } 543 544 /* 545 * Attempt to register. Interlock with IFF_NPOLLING. 546 */ 547 548 ifnet_serialize_all(ifp); 549 550 if (ifp->if_flags & IFF_NPOLLING) { 551 /* Already polling */ 552 ifnet_deserialize_all(ifp); 553 return EBUSY; 554 } 555 556 bzero(&info, sizeof(info)); 557 info.ifpi_ifp = ifp; 558 559 ifp->if_flags |= IFF_NPOLLING; 560 ifp->if_qpoll(ifp, &info); 561 562 ifnet_deserialize_all(ifp); 563 564 netmsg_init(&nmsg, &curthread->td_msgport, MSGF_MPSAFE, 565 ifpoll_register_handler); 566 nmsg.nm_lmsg.u.ms_resultp = &info; 567 568 error = ifnet_domsg(&nmsg.nm_lmsg, 0); 569 if (error) { 570 if (!ifpoll_deregister(ifp)) { 571 if_printf(ifp, "ifpoll_register: " 572 "ifpoll_deregister failed!\n"); 573 } 574 } 575 return error; 576 } 577 578 int 579 ifpoll_deregister(struct ifnet *ifp) 580 { 581 struct netmsg nmsg; 582 int error; 583 584 if (ifp->if_qpoll == NULL) 585 return EOPNOTSUPP; 586 587 ifnet_serialize_all(ifp); 588 589 if ((ifp->if_flags & IFF_NPOLLING) == 0) { 590 ifnet_deserialize_all(ifp); 591 return EINVAL; 592 } 593 ifp->if_flags &= ~IFF_NPOLLING; 594 595 ifnet_deserialize_all(ifp); 596 597 netmsg_init(&nmsg, &curthread->td_msgport, MSGF_MPSAFE, 598 ifpoll_deregister_handler); 599 nmsg.nm_lmsg.u.ms_resultp = ifp; 600 601 error = ifnet_domsg(&nmsg.nm_lmsg, 0); 602 if (!error) { 603 ifnet_serialize_all(ifp); 604 ifp->if_qpoll(ifp, NULL); 605 ifnet_deserialize_all(ifp); 606 } 607 return error; 608 } 609 610 static void 611 ifpoll_register_handler(struct netmsg *nmsg) 612 { 613 const struct ifpoll_info *info = nmsg->nm_lmsg.u.ms_resultp; 614 int cpuid = mycpuid, nextcpu; 615 int error; 616 617 KKASSERT(cpuid < ifpoll_ncpus); 618 KKASSERT(&curthread->td_msgport == ifnet_portfn(cpuid)); 619 620 if (cpuid == 0) { 621 error = stpoll_register(info->ifpi_ifp, &info->ifpi_status); 622 if (error) 623 goto failed; 624 } 625 626 error = iopoll_register(info->ifpi_ifp, rxpoll_context[cpuid], 627 &info->ifpi_rx[cpuid]); 628 if (error) 629 goto failed; 630 631 error = iopoll_register(info->ifpi_ifp, txpoll_context[cpuid], 632 &info->ifpi_tx[cpuid]); 633 if (error) 634 goto failed; 635 636 nextcpu = cpuid + 1; 637 if (nextcpu < ifpoll_ncpus) 638 ifnet_forwardmsg(&nmsg->nm_lmsg, nextcpu); 639 else 640 lwkt_replymsg(&nmsg->nm_lmsg, 0); 641 return; 642 failed: 643 lwkt_replymsg(&nmsg->nm_lmsg, error); 644 } 645 646 static void 647 ifpoll_deregister_handler(struct netmsg *nmsg) 648 { 649 struct ifnet *ifp = nmsg->nm_lmsg.u.ms_resultp; 650 int cpuid = mycpuid, nextcpu; 651 652 KKASSERT(cpuid < ifpoll_ncpus); 653 KKASSERT(&curthread->td_msgport == ifnet_portfn(cpuid)); 654 655 /* Ignore errors */ 656 if (cpuid == 0) 657 stpoll_deregister(ifp); 658 iopoll_deregister(ifp, rxpoll_context[cpuid]); 659 iopoll_deregister(ifp, txpoll_context[cpuid]); 660 661 nextcpu = cpuid + 1; 662 if (nextcpu < ifpoll_ncpus) 663 ifnet_forwardmsg(&nmsg->nm_lmsg, nextcpu); 664 else 665 lwkt_replymsg(&nmsg->nm_lmsg, 0); 666 } 667 668 static void 669 stpoll_init(void) 670 { 671 struct stpoll_ctx *st_ctx = &stpoll_context; 672 673 #ifdef IFPOLL_MULTI_SYSTIMER 674 st_ctx->pollhz = stpoll_hz; 675 #endif 676 677 sysctl_ctx_init(&st_ctx->poll_sysctl_ctx); 678 st_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&st_ctx->poll_sysctl_ctx, 679 SYSCTL_STATIC_CHILDREN(_net_ifpoll), 680 OID_AUTO, "status", CTLFLAG_RD, 0, ""); 681 682 #ifdef IFPOLL_MULTI_SYSTIMER 683 SYSCTL_ADD_PROC(&st_ctx->poll_sysctl_ctx, 684 SYSCTL_CHILDREN(st_ctx->poll_sysctl_tree), 685 OID_AUTO, "pollhz", CTLTYPE_INT | CTLFLAG_RW, 686 st_ctx, 0, sysctl_stpollhz, "I", 687 "Status polling frequency"); 688 #endif 689 690 SYSCTL_ADD_UINT(&st_ctx->poll_sysctl_ctx, 691 SYSCTL_CHILDREN(st_ctx->poll_sysctl_tree), 692 OID_AUTO, "handlers", CTLFLAG_RD, 693 &st_ctx->poll_handlers, 0, 694 "Number of registered status poll handlers"); 695 696 netmsg_init(&st_ctx->poll_netmsg, &netisr_adone_rport, MSGF_MPSAFE, 697 stpoll_handler); 698 699 #ifdef IFPOLL_MULTI_SYSTIMER 700 systimer_init_periodic_nq(&st_ctx->pollclock, 701 stpoll_systimer, st_ctx, 1); 702 #endif 703 } 704 705 #ifdef IFPOLL_MULTI_SYSTIMER 706 707 static void 708 sysctl_stpollhz_handler(struct netmsg *msg) 709 { 710 struct stpoll_ctx *st_ctx = &stpoll_context; 711 712 KKASSERT(&curthread->td_msgport == ifnet_portfn(0)); 713 714 /* 715 * If there is no handler registered, don't adjust polling 716 * systimer frequency; polling systimer frequency will be 717 * adjusted once there is registered handler. 718 */ 719 st_ctx->pollhz = msg->nm_lmsg.u.ms_result; 720 if (st_ctx->poll_handlers) 721 systimer_adjust_periodic(&st_ctx->pollclock, st_ctx->pollhz); 722 723 lwkt_replymsg(&msg->nm_lmsg, 0); 724 } 725 726 static int 727 sysctl_stpollhz(SYSCTL_HANDLER_ARGS) 728 { 729 struct stpoll_ctx *st_ctx = arg1; 730 struct netmsg msg; 731 int error, phz; 732 733 phz = st_ctx->pollhz; 734 error = sysctl_handle_int(oidp, &phz, 0, req); 735 if (error || req->newptr == NULL) 736 return error; 737 if (phz <= 0) 738 return EINVAL; 739 else if (phz > IFPOLL_FREQ_MAX) 740 phz = IFPOLL_FREQ_MAX; 741 742 netmsg_init(&msg, &curthread->td_msgport, MSGF_MPSAFE, 743 sysctl_stpollhz_handler); 744 msg.nm_lmsg.u.ms_result = phz; 745 746 return ifnet_domsg(&msg.nm_lmsg, 0); 747 } 748 749 #endif /* IFPOLL_MULTI_SYSTIMER */ 750 751 /* 752 * stpoll_handler is scheduled by sched_stpoll when appropriate, typically 753 * once per polling systimer tick. 754 */ 755 static void 756 stpoll_handler(struct netmsg *msg) 757 { 758 struct stpoll_ctx *st_ctx = &stpoll_context; 759 struct thread *td = curthread; 760 int i, poll_hz; 761 762 KKASSERT(&td->td_msgport == ifnet_portfn(0)); 763 764 crit_enter_quick(td); 765 766 /* Reply ASAP */ 767 lwkt_replymsg(&msg->nm_lmsg, 0); 768 769 if (st_ctx->poll_handlers == 0) { 770 crit_exit_quick(td); 771 return; 772 } 773 774 #ifdef IFPOLL_MULTI_SYSTIMER 775 poll_hz = st_ctx->pollhz; 776 #else 777 poll_hz = ifpoll_pollhz / (ifpoll_stfrac + 1); 778 #endif 779 780 for (i = 0; i < st_ctx->poll_handlers; ++i) { 781 const struct stpoll_rec *rec = &st_ctx->pr[i]; 782 struct ifnet *ifp = rec->ifp; 783 784 if (!lwkt_serialize_try(rec->serializer)) 785 continue; 786 787 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == 788 (IFF_RUNNING | IFF_NPOLLING)) 789 rec->status_func(ifp, poll_hz); 790 791 lwkt_serialize_exit(rec->serializer); 792 } 793 794 crit_exit_quick(td); 795 } 796 797 /* 798 * Hook from status poll systimer. Tries to schedule an status poll. 799 */ 800 static void 801 stpoll_clock(struct stpoll_ctx *st_ctx) 802 { 803 globaldata_t gd = mycpu; 804 805 KKASSERT(gd->gd_cpuid == 0); 806 807 if (st_ctx->poll_handlers == 0) 808 return; 809 810 crit_enter_gd(gd); 811 sched_stpoll(st_ctx); 812 crit_exit_gd(gd); 813 } 814 815 #ifdef IFPOLL_MULTI_SYSTIMER 816 static void 817 stpoll_systimer(systimer_t info, struct intrframe *frame __unused) 818 { 819 stpoll_clock(info->data); 820 } 821 #endif 822 823 static int 824 stpoll_register(struct ifnet *ifp, const struct ifpoll_status *st_rec) 825 { 826 struct stpoll_ctx *st_ctx = &stpoll_context; 827 int error; 828 829 KKASSERT(&curthread->td_msgport == ifnet_portfn(0)); 830 831 if (st_rec->status_func == NULL) 832 return 0; 833 834 /* 835 * Check if there is room. 836 */ 837 if (st_ctx->poll_handlers >= IFPOLL_LIST_LEN) { 838 /* 839 * List full, cannot register more entries. 840 * This should never happen; if it does, it is probably a 841 * broken driver trying to register multiple times. Checking 842 * this at runtime is expensive, and won't solve the problem 843 * anyways, so just report a few times and then give up. 844 */ 845 static int verbose = 10; /* XXX */ 846 847 if (verbose > 0) { 848 kprintf("status poll handlers list full, " 849 "maybe a broken driver ?\n"); 850 verbose--; 851 } 852 error = ENOENT; 853 } else { 854 struct stpoll_rec *rec = &st_ctx->pr[st_ctx->poll_handlers]; 855 856 rec->ifp = ifp; 857 rec->serializer = st_rec->serializer; 858 rec->status_func = st_rec->status_func; 859 860 st_ctx->poll_handlers++; 861 862 #ifdef IFPOLL_MULTI_SYSTIMER 863 if (st_ctx->poll_handlers == 1) { 864 systimer_adjust_periodic(&st_ctx->pollclock, 865 st_ctx->pollhz); 866 } 867 #else 868 ifpoll_handler_addevent(); 869 #endif 870 error = 0; 871 } 872 return error; 873 } 874 875 static int 876 stpoll_deregister(struct ifnet *ifp) 877 { 878 struct stpoll_ctx *st_ctx = &stpoll_context; 879 int i, error; 880 881 KKASSERT(&curthread->td_msgport == ifnet_portfn(0)); 882 883 for (i = 0; i < st_ctx->poll_handlers; ++i) { 884 if (st_ctx->pr[i].ifp == ifp) /* Found it */ 885 break; 886 } 887 if (i == st_ctx->poll_handlers) { 888 kprintf("stpoll_deregister: ifp not found!!!\n"); 889 error = ENOENT; 890 } else { 891 st_ctx->poll_handlers--; 892 if (i < st_ctx->poll_handlers) { 893 /* Last entry replaces this one. */ 894 st_ctx->pr[i] = st_ctx->pr[st_ctx->poll_handlers]; 895 } 896 897 #ifdef IFPOLL_MULTI_SYSTIMER 898 if (st_ctx->poll_handlers == 0) 899 systimer_adjust_periodic(&st_ctx->pollclock, 1); 900 #else 901 ifpoll_handler_delevent(); 902 #endif 903 error = 0; 904 } 905 return error; 906 } 907 908 #ifndef IFPOLL_MULTI_SYSTIMER 909 static __inline int 910 iopoll_hz(struct iopoll_ctx *io_ctx) 911 { 912 int poll_hz; 913 914 poll_hz = ifpoll_pollhz; 915 if (io_ctx->poll_type == IFPOLL_TX) 916 poll_hz /= ifpoll_txfrac + 1; 917 return poll_hz; 918 } 919 #endif 920 921 static __inline void 922 iopoll_reset_state(struct iopoll_ctx *io_ctx) 923 { 924 crit_enter(); 925 io_ctx->poll_burst = 5; 926 io_ctx->pending_polls = 0; 927 io_ctx->residual_burst = 0; 928 io_ctx->phase = 0; 929 io_ctx->kern_frac = 0; 930 bzero(&io_ctx->poll_start_t, sizeof(io_ctx->poll_start_t)); 931 bzero(&io_ctx->prev_t, sizeof(io_ctx->prev_t)); 932 crit_exit(); 933 } 934 935 static void 936 iopoll_init(int cpuid) 937 { 938 KKASSERT(cpuid < IFPOLL_CTX_MAX); 939 940 /* Create iopoll_comm context before TX/RX poll context */ 941 iopoll_common[cpuid] = iopoll_comm_create(cpuid); 942 943 rxpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_RX); 944 txpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_TX); 945 } 946 947 static struct iopoll_comm * 948 iopoll_comm_create(int cpuid) 949 { 950 struct iopoll_comm *comm; 951 char cpuid_str[16]; 952 953 comm = kmalloc(sizeof(*comm), M_DEVBUF, M_WAITOK | M_ZERO); 954 955 ksnprintf(cpuid_str, sizeof(cpuid_str), "%d", cpuid); 956 957 sysctl_ctx_init(&comm->sysctl_ctx); 958 comm->sysctl_tree = SYSCTL_ADD_NODE(&comm->sysctl_ctx, 959 SYSCTL_STATIC_CHILDREN(_net_ifpoll), 960 OID_AUTO, cpuid_str, CTLFLAG_RD, 0, ""); 961 962 return comm; 963 } 964 965 static struct iopoll_ctx * 966 iopoll_ctx_create(int cpuid, int poll_type) 967 { 968 struct iopoll_comm *comm; 969 struct iopoll_ctx *io_ctx; 970 const char *poll_type_str; 971 972 KKASSERT(poll_type == IFPOLL_RX || poll_type == IFPOLL_TX); 973 974 /* 975 * Make sure that tunables are in sane state 976 */ 977 if (iopoll_burst_max < MIN_IOPOLL_BURST_MAX) 978 iopoll_burst_max = MIN_IOPOLL_BURST_MAX; 979 else if (iopoll_burst_max > MAX_IOPOLL_BURST_MAX) 980 iopoll_burst_max = MAX_IOPOLL_BURST_MAX; 981 982 if (iopoll_each_burst > iopoll_burst_max) 983 iopoll_each_burst = iopoll_burst_max; 984 985 /* 986 * Create the per-cpu polling context 987 */ 988 io_ctx = kmalloc(sizeof(*io_ctx), M_DEVBUF, M_WAITOK | M_ZERO); 989 990 io_ctx->poll_each_burst = iopoll_each_burst; 991 io_ctx->poll_burst_max = iopoll_burst_max; 992 io_ctx->user_frac = 50; 993 #ifdef IFPOLL_MULTI_SYSTIMER 994 io_ctx->pollhz = iopoll_hz; 995 #else 996 io_ctx->poll_type = poll_type; 997 #endif 998 io_ctx->poll_cpuid = cpuid; 999 iopoll_reset_state(io_ctx); 1000 1001 netmsg_init(&io_ctx->poll_netmsg, &netisr_adone_rport, MSGF_MPSAFE, 1002 iopoll_handler); 1003 io_ctx->poll_netmsg.nm_lmsg.u.ms_resultp = io_ctx; 1004 1005 netmsg_init(&io_ctx->poll_more_netmsg, &netisr_adone_rport, MSGF_MPSAFE, 1006 iopollmore_handler); 1007 io_ctx->poll_more_netmsg.nm_lmsg.u.ms_resultp = io_ctx; 1008 1009 /* 1010 * Initialize per-cpu sysctl nodes 1011 */ 1012 if (poll_type == IFPOLL_RX) 1013 poll_type_str = "rx"; 1014 else 1015 poll_type_str = "tx"; 1016 1017 comm = iopoll_common[cpuid]; 1018 sysctl_ctx_init(&io_ctx->poll_sysctl_ctx); 1019 io_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&io_ctx->poll_sysctl_ctx, 1020 SYSCTL_CHILDREN(comm->sysctl_tree), 1021 OID_AUTO, poll_type_str, CTLFLAG_RD, 0, ""); 1022 iopoll_add_sysctl(&io_ctx->poll_sysctl_ctx, 1023 SYSCTL_CHILDREN(io_ctx->poll_sysctl_tree), io_ctx); 1024 1025 #ifdef IFPOLL_MULTI_SYSTIMER 1026 /* 1027 * Initialize systimer 1028 */ 1029 systimer_init_periodic_nq(&io_ctx->pollclock, 1030 iopoll_systimer, io_ctx, 1); 1031 #endif 1032 1033 return io_ctx; 1034 } 1035 1036 /* 1037 * Hook from iopoll systimer. Tries to schedule an iopoll, but keeps 1038 * track of lost ticks due to the previous handler taking too long. 1039 * Normally, this should not happen, because polling handler should 1040 * run for a short time. However, in some cases (e.g. when there are 1041 * changes in link status etc.) the drivers take a very long time 1042 * (even in the order of milliseconds) to reset and reconfigure the 1043 * device, causing apparent lost polls. 1044 * 1045 * The first part of the code is just for debugging purposes, and tries 1046 * to count how often hardclock ticks are shorter than they should, 1047 * meaning either stray interrupts or delayed events. 1048 * 1049 * WARNING! called from fastint or IPI, the MP lock might not be held. 1050 */ 1051 static void 1052 iopoll_clock(struct iopoll_ctx *io_ctx) 1053 { 1054 globaldata_t gd = mycpu; 1055 union ifpoll_time t; 1056 int delta, poll_hz; 1057 1058 KKASSERT(gd->gd_cpuid == io_ctx->poll_cpuid); 1059 1060 if (io_ctx->poll_handlers == 0) 1061 return; 1062 1063 #ifdef IFPOLL_MULTI_SYSTIMER 1064 poll_hz = io_ctx->pollhz; 1065 #else 1066 poll_hz = iopoll_hz(io_ctx); 1067 #endif 1068 1069 ifpoll_time_get(&t); 1070 delta = ifpoll_time_diff(&io_ctx->prev_t, &t); 1071 if (delta * poll_hz < 500000) 1072 io_ctx->short_ticks++; 1073 else 1074 io_ctx->prev_t = t; 1075 1076 if (io_ctx->pending_polls > 100) { 1077 /* 1078 * Too much, assume it has stalled (not always true 1079 * see comment above). 1080 */ 1081 io_ctx->stalled++; 1082 io_ctx->pending_polls = 0; 1083 io_ctx->phase = 0; 1084 } 1085 1086 if (io_ctx->phase <= 2) { 1087 if (io_ctx->phase != 0) 1088 io_ctx->suspect++; 1089 io_ctx->phase = 1; 1090 crit_enter_gd(gd); 1091 sched_iopoll(io_ctx); 1092 crit_exit_gd(gd); 1093 io_ctx->phase = 2; 1094 } 1095 if (io_ctx->pending_polls++ > 0) 1096 io_ctx->lost_polls++; 1097 } 1098 1099 #ifdef IFPOLL_MULTI_SYSTIMER 1100 static void 1101 iopoll_systimer(systimer_t info, struct intrframe *frame __unused) 1102 { 1103 iopoll_clock(info->data); 1104 } 1105 #endif 1106 1107 /* 1108 * iopoll_handler is scheduled by sched_iopoll when appropriate, typically 1109 * once per polling systimer tick. 1110 * 1111 * Note that the message is replied immediately in order to allow a new 1112 * ISR to be scheduled in the handler. 1113 */ 1114 static void 1115 iopoll_handler(struct netmsg *msg) 1116 { 1117 struct iopoll_ctx *io_ctx; 1118 struct thread *td = curthread; 1119 int i, cycles; 1120 1121 io_ctx = msg->nm_lmsg.u.ms_resultp; 1122 KKASSERT(&td->td_msgport == ifnet_portfn(io_ctx->poll_cpuid)); 1123 1124 crit_enter_quick(td); 1125 1126 /* Reply ASAP */ 1127 lwkt_replymsg(&msg->nm_lmsg, 0); 1128 1129 if (io_ctx->poll_handlers == 0) { 1130 crit_exit_quick(td); 1131 return; 1132 } 1133 1134 io_ctx->phase = 3; 1135 if (io_ctx->residual_burst == 0) { 1136 /* First call in this tick */ 1137 ifpoll_time_get(&io_ctx->poll_start_t); 1138 io_ctx->residual_burst = io_ctx->poll_burst; 1139 } 1140 cycles = (io_ctx->residual_burst < io_ctx->poll_each_burst) ? 1141 io_ctx->residual_burst : io_ctx->poll_each_burst; 1142 io_ctx->residual_burst -= cycles; 1143 1144 for (i = 0; i < io_ctx->poll_handlers; i++) { 1145 const struct iopoll_rec *rec = &io_ctx->pr[i]; 1146 struct ifnet *ifp = rec->ifp; 1147 1148 if (!lwkt_serialize_try(rec->serializer)) 1149 continue; 1150 1151 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == 1152 (IFF_RUNNING | IFF_NPOLLING)) 1153 rec->poll_func(ifp, rec->arg, cycles); 1154 1155 lwkt_serialize_exit(rec->serializer); 1156 } 1157 1158 /* 1159 * Do a quick exit/enter to catch any higher-priority 1160 * interrupt sources. 1161 */ 1162 crit_exit_quick(td); 1163 crit_enter_quick(td); 1164 1165 sched_iopollmore(io_ctx); 1166 io_ctx->phase = 4; 1167 1168 crit_exit_quick(td); 1169 } 1170 1171 /* 1172 * iopollmore_handler is called after other netisr's, possibly scheduling 1173 * another iopoll_handler call, or adapting the burst size for the next cycle. 1174 * 1175 * It is very bad to fetch large bursts of packets from a single card at once, 1176 * because the burst could take a long time to be completely processed leading 1177 * to unfairness. To reduce the problem, and also to account better for time 1178 * spent in network-related processing, we split the burst in smaller chunks 1179 * of fixed size, giving control to the other netisr's between chunks. This 1180 * helps in improving the fairness, reducing livelock and accounting for the 1181 * work performed in low level handling. 1182 */ 1183 static void 1184 iopollmore_handler(struct netmsg *msg) 1185 { 1186 struct thread *td = curthread; 1187 struct iopoll_ctx *io_ctx; 1188 union ifpoll_time t; 1189 int kern_load, poll_hz; 1190 uint32_t pending_polls; 1191 1192 io_ctx = msg->nm_lmsg.u.ms_resultp; 1193 KKASSERT(&td->td_msgport == ifnet_portfn(io_ctx->poll_cpuid)); 1194 1195 crit_enter_quick(td); 1196 1197 /* Replay ASAP */ 1198 lwkt_replymsg(&msg->nm_lmsg, 0); 1199 1200 if (io_ctx->poll_handlers == 0) { 1201 crit_exit_quick(td); 1202 return; 1203 } 1204 1205 #ifdef IFPOLL_MULTI_SYSTIMER 1206 poll_hz = io_ctx->pollhz; 1207 #else 1208 poll_hz = iopoll_hz(io_ctx); 1209 #endif 1210 1211 io_ctx->phase = 5; 1212 if (io_ctx->residual_burst > 0) { 1213 sched_iopoll(io_ctx); 1214 crit_exit_quick(td); 1215 /* Will run immediately on return, followed by netisrs */ 1216 return; 1217 } 1218 1219 /* Here we can account time spent in iopoll's in this tick */ 1220 ifpoll_time_get(&t); 1221 kern_load = ifpoll_time_diff(&io_ctx->poll_start_t, &t); 1222 kern_load = (kern_load * poll_hz) / 10000; /* 0..100 */ 1223 io_ctx->kern_frac = kern_load; 1224 1225 if (kern_load > (100 - io_ctx->user_frac)) { 1226 /* Try decrease ticks */ 1227 if (io_ctx->poll_burst > 1) 1228 io_ctx->poll_burst--; 1229 } else { 1230 if (io_ctx->poll_burst < io_ctx->poll_burst_max) 1231 io_ctx->poll_burst++; 1232 } 1233 1234 io_ctx->pending_polls--; 1235 pending_polls = io_ctx->pending_polls; 1236 1237 if (pending_polls == 0) { 1238 /* We are done */ 1239 io_ctx->phase = 0; 1240 } else { 1241 /* 1242 * Last cycle was long and caused us to miss one or more 1243 * hardclock ticks. Restart processing again, but slightly 1244 * reduce the burst size to prevent that this happens again. 1245 */ 1246 io_ctx->poll_burst -= (io_ctx->poll_burst / 8); 1247 if (io_ctx->poll_burst < 1) 1248 io_ctx->poll_burst = 1; 1249 sched_iopoll(io_ctx); 1250 io_ctx->phase = 6; 1251 } 1252 1253 crit_exit_quick(td); 1254 } 1255 1256 static void 1257 iopoll_add_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent, 1258 struct iopoll_ctx *io_ctx) 1259 { 1260 #ifdef IFPOLL_MULTI_SYSTIMER 1261 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "pollhz", 1262 CTLTYPE_INT | CTLFLAG_RW, io_ctx, 0, sysctl_iopollhz, 1263 "I", "Device polling frequency"); 1264 #endif 1265 1266 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "burst_max", 1267 CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_burstmax, 1268 "IU", "Max Polling burst size"); 1269 1270 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "each_burst", 1271 CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_eachburst, 1272 "IU", "Max size of each burst"); 1273 1274 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "phase", CTLFLAG_RD, 1275 &io_ctx->phase, 0, "Polling phase"); 1276 1277 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "suspect", CTLFLAG_RW, 1278 &io_ctx->suspect, 0, "suspect event"); 1279 1280 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "stalled", CTLFLAG_RW, 1281 &io_ctx->stalled, 0, "potential stalls"); 1282 1283 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "burst", CTLFLAG_RD, 1284 &io_ctx->poll_burst, 0, "Current polling burst size"); 1285 1286 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "user_frac", CTLFLAG_RW, 1287 &io_ctx->user_frac, 0, 1288 "Desired user fraction of cpu time"); 1289 1290 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "kern_frac", CTLFLAG_RD, 1291 &io_ctx->kern_frac, 0, 1292 "Kernel fraction of cpu time"); 1293 1294 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "short_ticks", CTLFLAG_RW, 1295 &io_ctx->short_ticks, 0, 1296 "Hardclock ticks shorter than they should be"); 1297 1298 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "lost_polls", CTLFLAG_RW, 1299 &io_ctx->lost_polls, 0, 1300 "How many times we would have lost a poll tick"); 1301 1302 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "pending_polls", CTLFLAG_RD, 1303 &io_ctx->pending_polls, 0, "Do we need to poll again"); 1304 1305 SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "residual_burst", CTLFLAG_RD, 1306 &io_ctx->residual_burst, 0, 1307 "# of residual cycles in burst"); 1308 1309 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "handlers", CTLFLAG_RD, 1310 &io_ctx->poll_handlers, 0, 1311 "Number of registered poll handlers"); 1312 } 1313 1314 #ifdef IFPOLL_MULTI_SYSTIMER 1315 1316 static int 1317 sysctl_iopollhz(SYSCTL_HANDLER_ARGS) 1318 { 1319 struct iopoll_ctx *io_ctx = arg1; 1320 struct iopoll_sysctl_netmsg msg; 1321 struct netmsg *nmsg; 1322 int error, phz; 1323 1324 phz = io_ctx->pollhz; 1325 error = sysctl_handle_int(oidp, &phz, 0, req); 1326 if (error || req->newptr == NULL) 1327 return error; 1328 if (phz <= 0) 1329 return EINVAL; 1330 else if (phz > IFPOLL_FREQ_MAX) 1331 phz = IFPOLL_FREQ_MAX; 1332 1333 nmsg = &msg.nmsg; 1334 netmsg_init(nmsg, &curthread->td_msgport, MSGF_MPSAFE, 1335 sysctl_iopollhz_handler); 1336 nmsg->nm_lmsg.u.ms_result = phz; 1337 msg.ctx = io_ctx; 1338 1339 return ifnet_domsg(&nmsg->nm_lmsg, io_ctx->poll_cpuid); 1340 } 1341 1342 static void 1343 sysctl_iopollhz_handler(struct netmsg *nmsg) 1344 { 1345 struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg; 1346 struct iopoll_ctx *io_ctx; 1347 1348 io_ctx = msg->ctx; 1349 KKASSERT(&curthread->td_msgport == ifnet_portfn(io_ctx->poll_cpuid)); 1350 1351 /* 1352 * If polling is disabled or there is no polling handler 1353 * registered, don't adjust polling systimer frequency. 1354 * Polling systimer frequency will be adjusted once there 1355 * are registered handlers. 1356 */ 1357 io_ctx->pollhz = nmsg->nm_lmsg.u.ms_result; 1358 if (io_ctx->poll_handlers) 1359 systimer_adjust_periodic(&io_ctx->pollclock, io_ctx->pollhz); 1360 1361 lwkt_replymsg(&nmsg->nm_lmsg, 0); 1362 } 1363 1364 #endif /* IFPOLL_MULTI_SYSTIMER */ 1365 1366 static void 1367 sysctl_burstmax_handler(struct netmsg *nmsg) 1368 { 1369 struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg; 1370 struct iopoll_ctx *io_ctx; 1371 1372 io_ctx = msg->ctx; 1373 KKASSERT(&curthread->td_msgport == ifnet_portfn(io_ctx->poll_cpuid)); 1374 1375 io_ctx->poll_burst_max = nmsg->nm_lmsg.u.ms_result; 1376 if (io_ctx->poll_each_burst > io_ctx->poll_burst_max) 1377 io_ctx->poll_each_burst = io_ctx->poll_burst_max; 1378 if (io_ctx->poll_burst > io_ctx->poll_burst_max) 1379 io_ctx->poll_burst = io_ctx->poll_burst_max; 1380 if (io_ctx->residual_burst > io_ctx->poll_burst_max) 1381 io_ctx->residual_burst = io_ctx->poll_burst_max; 1382 1383 lwkt_replymsg(&nmsg->nm_lmsg, 0); 1384 } 1385 1386 static int 1387 sysctl_burstmax(SYSCTL_HANDLER_ARGS) 1388 { 1389 struct iopoll_ctx *io_ctx = arg1; 1390 struct iopoll_sysctl_netmsg msg; 1391 struct netmsg *nmsg; 1392 uint32_t burst_max; 1393 int error; 1394 1395 burst_max = io_ctx->poll_burst_max; 1396 error = sysctl_handle_int(oidp, &burst_max, 0, req); 1397 if (error || req->newptr == NULL) 1398 return error; 1399 if (burst_max < MIN_IOPOLL_BURST_MAX) 1400 burst_max = MIN_IOPOLL_BURST_MAX; 1401 else if (burst_max > MAX_IOPOLL_BURST_MAX) 1402 burst_max = MAX_IOPOLL_BURST_MAX; 1403 1404 nmsg = &msg.nmsg; 1405 netmsg_init(nmsg, &curthread->td_msgport, MSGF_MPSAFE, 1406 sysctl_burstmax_handler); 1407 nmsg->nm_lmsg.u.ms_result = burst_max; 1408 msg.ctx = io_ctx; 1409 1410 return ifnet_domsg(&nmsg->nm_lmsg, io_ctx->poll_cpuid); 1411 } 1412 1413 static void 1414 sysctl_eachburst_handler(struct netmsg *nmsg) 1415 { 1416 struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg; 1417 struct iopoll_ctx *io_ctx; 1418 uint32_t each_burst; 1419 1420 io_ctx = msg->ctx; 1421 KKASSERT(&curthread->td_msgport == ifnet_portfn(io_ctx->poll_cpuid)); 1422 1423 each_burst = nmsg->nm_lmsg.u.ms_result; 1424 if (each_burst > io_ctx->poll_burst_max) 1425 each_burst = io_ctx->poll_burst_max; 1426 else if (each_burst < 1) 1427 each_burst = 1; 1428 io_ctx->poll_each_burst = each_burst; 1429 1430 lwkt_replymsg(&nmsg->nm_lmsg, 0); 1431 } 1432 1433 static int 1434 sysctl_eachburst(SYSCTL_HANDLER_ARGS) 1435 { 1436 struct iopoll_ctx *io_ctx = arg1; 1437 struct iopoll_sysctl_netmsg msg; 1438 struct netmsg *nmsg; 1439 uint32_t each_burst; 1440 int error; 1441 1442 each_burst = io_ctx->poll_each_burst; 1443 error = sysctl_handle_int(oidp, &each_burst, 0, req); 1444 if (error || req->newptr == NULL) 1445 return error; 1446 1447 nmsg = &msg.nmsg; 1448 netmsg_init(nmsg, &curthread->td_msgport, MSGF_MPSAFE, 1449 sysctl_eachburst_handler); 1450 nmsg->nm_lmsg.u.ms_result = each_burst; 1451 msg.ctx = io_ctx; 1452 1453 return ifnet_domsg(&nmsg->nm_lmsg, io_ctx->poll_cpuid); 1454 } 1455 1456 static int 1457 iopoll_register(struct ifnet *ifp, struct iopoll_ctx *io_ctx, 1458 const struct ifpoll_io *io_rec) 1459 { 1460 int error; 1461 1462 KKASSERT(&curthread->td_msgport == ifnet_portfn(io_ctx->poll_cpuid)); 1463 1464 if (io_rec->poll_func == NULL) 1465 return 0; 1466 1467 /* 1468 * Check if there is room. 1469 */ 1470 if (io_ctx->poll_handlers >= IFPOLL_LIST_LEN) { 1471 /* 1472 * List full, cannot register more entries. 1473 * This should never happen; if it does, it is probably a 1474 * broken driver trying to register multiple times. Checking 1475 * this at runtime is expensive, and won't solve the problem 1476 * anyways, so just report a few times and then give up. 1477 */ 1478 static int verbose = 10; /* XXX */ 1479 if (verbose > 0) { 1480 kprintf("io poll handlers list full, " 1481 "maybe a broken driver ?\n"); 1482 verbose--; 1483 } 1484 error = ENOENT; 1485 } else { 1486 struct iopoll_rec *rec = &io_ctx->pr[io_ctx->poll_handlers]; 1487 1488 rec->ifp = ifp; 1489 rec->serializer = io_rec->serializer; 1490 rec->arg = io_rec->arg; 1491 rec->poll_func = io_rec->poll_func; 1492 1493 io_ctx->poll_handlers++; 1494 if (io_ctx->poll_handlers == 1) { 1495 #ifdef IFPOLL_MULTI_SYSTIMER 1496 systimer_adjust_periodic(&io_ctx->pollclock, 1497 io_ctx->pollhz); 1498 #else 1499 u_int *mask; 1500 1501 if (io_ctx->poll_type == IFPOLL_RX) 1502 mask = &ifpoll0.rx_cpumask; 1503 else 1504 mask = &ifpoll0.tx_cpumask; 1505 KKASSERT((*mask & mycpu->gd_cpumask) == 0); 1506 atomic_set_int(mask, mycpu->gd_cpumask); 1507 #endif 1508 } 1509 #ifndef IFPOLL_MULTI_SYSTIMER 1510 ifpoll_handler_addevent(); 1511 #endif 1512 error = 0; 1513 } 1514 return error; 1515 } 1516 1517 static int 1518 iopoll_deregister(struct ifnet *ifp, struct iopoll_ctx *io_ctx) 1519 { 1520 int i, error; 1521 1522 KKASSERT(&curthread->td_msgport == ifnet_portfn(io_ctx->poll_cpuid)); 1523 1524 for (i = 0; i < io_ctx->poll_handlers; ++i) { 1525 if (io_ctx->pr[i].ifp == ifp) /* Found it */ 1526 break; 1527 } 1528 if (i == io_ctx->poll_handlers) { 1529 error = ENOENT; 1530 } else { 1531 io_ctx->poll_handlers--; 1532 if (i < io_ctx->poll_handlers) { 1533 /* Last entry replaces this one. */ 1534 io_ctx->pr[i] = io_ctx->pr[io_ctx->poll_handlers]; 1535 } 1536 1537 if (io_ctx->poll_handlers == 0) { 1538 #ifdef IFPOLL_MULTI_SYSTIMER 1539 systimer_adjust_periodic(&io_ctx->pollclock, 1); 1540 #else 1541 u_int *mask; 1542 1543 if (io_ctx->poll_type == IFPOLL_RX) 1544 mask = &ifpoll0.rx_cpumask; 1545 else 1546 mask = &ifpoll0.tx_cpumask; 1547 KKASSERT(*mask & mycpu->gd_cpumask); 1548 atomic_clear_int(mask, mycpu->gd_cpumask); 1549 #endif 1550 iopoll_reset_state(io_ctx); 1551 } 1552 #ifndef IFPOLL_MULTI_SYSTIMER 1553 ifpoll_handler_delevent(); 1554 #endif 1555 error = 0; 1556 } 1557 return error; 1558 } 1559