1 /*- 2 * Copyright (c) 2001-2002 Luigi Rizzo 3 * 4 * Supported by: the Xorp Project (www.xorp.org) 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $ 28 */ 29 30 #include "opt_ifpoll.h" 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/ktr.h> 35 #include <sys/malloc.h> 36 #include <sys/serialize.h> 37 #include <sys/socket.h> 38 #include <sys/sysctl.h> 39 40 #include <sys/thread2.h> 41 #include <sys/msgport2.h> 42 43 #include <machine/atomic.h> 44 #include <machine/clock.h> 45 #include <machine/smp.h> 46 47 #include <net/if.h> 48 #include <net/if_poll.h> 49 #include <net/netmsg2.h> 50 #include <net/netisr2.h> 51 52 /* 53 * Polling support for network device drivers. 54 * 55 * Drivers which support this feature try to register one status polling 56 * handler and several TX/RX polling handlers with the polling code. 57 * If interface's if_npoll is called with non-NULL second argument, then 58 * a register operation is requested, else a deregister operation is 59 * requested. If the requested operation is "register", driver should 60 * setup the ifpoll_info passed in accoding its own needs: 61 * ifpoll_info.ifpi_status.status_func == NULL 62 * No status polling handler will be installed on CPU(0) 63 * ifpoll_info.ifpi_rx[n].poll_func == NULL 64 * No RX polling handler will be installed on CPU(n) 65 * ifpoll_info.ifpi_tx[n].poll_func == NULL 66 * No TX polling handler will be installed on CPU(n) 67 * 68 * RX is polled at the specified polling frequency (net.ifpoll.X.pollhz). 69 * TX and status polling could be done at lower frequency than RX frequency 70 * (net.ifpoll.0.status_frac and net.ifpoll.X.tx_frac). To avoid systimer 71 * staggering at high frequency, RX systimer gives TX and status polling a 72 * piggyback (XXX). 73 * 74 * All of the registered polling handlers are called only if the interface 75 * is marked as 'IFF_RUNNING and IFF_NPOLLING'. However, the interface's 76 * register and deregister function (ifnet.if_npoll) will be called even 77 * if interface is not marked with 'IFF_RUNNING'. 78 * 79 * If registration is successful, the driver must disable interrupts, 80 * and further I/O is performed through the TX/RX polling handler, which 81 * are invoked (at least once per clock tick) with 3 arguments: the "arg" 82 * passed at register time, a struct ifnet pointer, and a "count" limit. 83 * The registered serializer will be held before calling the related 84 * polling handler. 85 * 86 * The count limit specifies how much work the handler can do during the 87 * call -- typically this is the number of packets to be received, or 88 * transmitted, etc. (drivers are free to interpret this number, as long 89 * as the max time spent in the function grows roughly linearly with the 90 * count). 91 * 92 * A second variable controls the sharing of CPU between polling/kernel 93 * network processing, and other activities (typically userlevel tasks): 94 * net.ifpoll.X.{rx,tx}.user_frac (between 0 and 100, default 50) sets the 95 * share of CPU allocated to user tasks. CPU is allocated proportionally 96 * to the shares, by dynamically adjusting the "count" (poll_burst). 97 * 98 * Other parameters can should be left to their default values. 99 * The following constraints hold 100 * 101 * 1 <= poll_burst <= poll_burst_max 102 * 1 <= poll_each_burst <= poll_burst_max 103 * MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX 104 */ 105 106 #define IFPOLL_LIST_LEN 128 107 #define IFPOLL_FREQ_MAX 30000 108 109 #define MIN_IOPOLL_BURST_MAX 10 110 #define MAX_IOPOLL_BURST_MAX 5000 111 #define IOPOLL_BURST_MAX 250 /* good for 1000Mbit net and HZ=6000 */ 112 113 #define IOPOLL_EACH_BURST 50 114 #define IOPOLL_USER_FRAC 50 115 116 #define IFPOLL_FREQ_DEFAULT 6000 117 118 #define IFPOLL_TXFRAC_DEFAULT 1 /* 1/1 of the pollhz */ 119 #define IFPOLL_STFRAC_DEFAULT 120 /* 1/120 of the pollhz */ 120 121 #define IFPOLL_RX 0x1 122 #define IFPOLL_TX 0x2 123 124 union ifpoll_time { 125 struct timeval tv; 126 uint64_t tsc; 127 }; 128 129 struct iopoll_rec { 130 struct lwkt_serialize *serializer; 131 struct ifnet *ifp; 132 void *arg; 133 ifpoll_iofn_t poll_func; 134 }; 135 136 struct iopoll_ctx { 137 union ifpoll_time prev_t; 138 u_long short_ticks; /* statistics */ 139 u_long lost_polls; /* statistics */ 140 u_long suspect; /* statistics */ 141 u_long stalled; /* statistics */ 142 uint32_t pending_polls; /* state */ 143 144 struct netmsg_base poll_netmsg; 145 struct netmsg_base poll_more_netmsg; 146 147 int poll_cpuid; 148 int pollhz; 149 uint32_t phase; /* state */ 150 int residual_burst; /* state */ 151 uint32_t poll_each_burst; /* tunable */ 152 union ifpoll_time poll_start_t; /* state */ 153 154 uint32_t poll_burst; /* state */ 155 uint32_t poll_burst_max; /* tunable */ 156 uint32_t user_frac; /* tunable */ 157 uint32_t kern_frac; /* state */ 158 159 uint32_t poll_handlers; /* next free entry in pr[]. */ 160 struct iopoll_rec pr[IFPOLL_LIST_LEN]; 161 162 struct sysctl_ctx_list poll_sysctl_ctx; 163 struct sysctl_oid *poll_sysctl_tree; 164 } __cachealign; 165 166 struct poll_comm { 167 struct systimer pollclock; 168 int poll_cpuid; 169 170 int stfrac_count; /* state */ 171 int poll_stfrac; /* tunable */ 172 173 int txfrac_count; /* state */ 174 int poll_txfrac; /* tunable */ 175 176 int pollhz; /* tunable */ 177 178 struct sysctl_ctx_list sysctl_ctx; 179 struct sysctl_oid *sysctl_tree; 180 } __cachealign; 181 182 struct stpoll_rec { 183 struct lwkt_serialize *serializer; 184 struct ifnet *ifp; 185 ifpoll_stfn_t status_func; 186 }; 187 188 struct stpoll_ctx { 189 struct netmsg_base poll_netmsg; 190 191 uint32_t poll_handlers; /* next free entry in pr[]. */ 192 struct stpoll_rec pr[IFPOLL_LIST_LEN]; 193 194 struct sysctl_ctx_list poll_sysctl_ctx; 195 struct sysctl_oid *poll_sysctl_tree; 196 } __cachealign; 197 198 struct iopoll_sysctl_netmsg { 199 struct netmsg_base base; 200 struct iopoll_ctx *ctx; 201 }; 202 203 void ifpoll_init_pcpu(int); 204 static void ifpoll_register_handler(netmsg_t); 205 static void ifpoll_deregister_handler(netmsg_t); 206 207 /* 208 * Status polling 209 */ 210 static void stpoll_init(void); 211 static void stpoll_handler(netmsg_t); 212 static void stpoll_clock(struct stpoll_ctx *); 213 static int stpoll_register(struct ifnet *, const struct ifpoll_status *); 214 static int stpoll_deregister(struct ifnet *); 215 216 /* 217 * RX/TX polling 218 */ 219 static struct iopoll_ctx *iopoll_ctx_create(int, int); 220 static void iopoll_init(int); 221 static void rxpoll_handler(netmsg_t); 222 static void txpoll_handler(netmsg_t); 223 static void rxpollmore_handler(netmsg_t); 224 static void txpollmore_handler(netmsg_t); 225 static void iopoll_clock(struct iopoll_ctx *); 226 static int iopoll_register(struct ifnet *, struct iopoll_ctx *, 227 const struct ifpoll_io *); 228 static int iopoll_deregister(struct ifnet *, struct iopoll_ctx *); 229 230 static void iopoll_add_sysctl(struct sysctl_ctx_list *, 231 struct sysctl_oid_list *, struct iopoll_ctx *, int); 232 static void sysctl_burstmax_handler(netmsg_t); 233 static int sysctl_burstmax(SYSCTL_HANDLER_ARGS); 234 static void sysctl_eachburst_handler(netmsg_t); 235 static int sysctl_eachburst(SYSCTL_HANDLER_ARGS); 236 237 /* 238 * Common functions 239 */ 240 static void poll_comm_init(int); 241 static void poll_comm_start(int); 242 static void poll_comm_adjust_pollhz(struct poll_comm *); 243 static void poll_comm_systimer0(systimer_t, int, struct intrframe *); 244 static void poll_comm_systimer(systimer_t, int, struct intrframe *); 245 static void sysctl_pollhz_handler(netmsg_t); 246 static void sysctl_stfrac_handler(netmsg_t); 247 static void sysctl_txfrac_handler(netmsg_t); 248 static int sysctl_pollhz(SYSCTL_HANDLER_ARGS); 249 static int sysctl_stfrac(SYSCTL_HANDLER_ARGS); 250 static int sysctl_txfrac(SYSCTL_HANDLER_ARGS); 251 static int sysctl_compat_npoll_stfrac(SYSCTL_HANDLER_ARGS); 252 static int sysctl_compat_npoll_cpuid(SYSCTL_HANDLER_ARGS); 253 254 static struct stpoll_ctx stpoll_context; 255 static struct poll_comm *poll_common[MAXCPU]; 256 static struct iopoll_ctx *rxpoll_context[MAXCPU]; 257 static struct iopoll_ctx *txpoll_context[MAXCPU]; 258 259 SYSCTL_NODE(_net, OID_AUTO, ifpoll, CTLFLAG_RW, 0, 260 "Network device polling parameters"); 261 262 static int iopoll_burst_max = IOPOLL_BURST_MAX; 263 static int iopoll_each_burst = IOPOLL_EACH_BURST; 264 static int iopoll_user_frac = IOPOLL_USER_FRAC; 265 266 static int ifpoll_pollhz = IFPOLL_FREQ_DEFAULT; 267 static int ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT; 268 static int ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT; 269 270 TUNABLE_INT("net.ifpoll.burst_max", &iopoll_burst_max); 271 TUNABLE_INT("net.ifpoll.each_burst", &iopoll_each_burst); 272 TUNABLE_INT("net.ifpoll.user_frac", &iopoll_user_frac); 273 TUNABLE_INT("net.ifpoll.pollhz", &ifpoll_pollhz); 274 TUNABLE_INT("net.ifpoll.status_frac", &ifpoll_stfrac); 275 TUNABLE_INT("net.ifpoll.tx_frac", &ifpoll_txfrac); 276 277 #define IFPOLL_FREQ_ADJ(comm) (((comm)->poll_cpuid * 3) % 50) 278 279 static __inline int 280 poll_comm_pollhz_div(const struct poll_comm *comm, int pollhz) 281 { 282 return pollhz + IFPOLL_FREQ_ADJ(comm); 283 } 284 285 static __inline int 286 poll_comm_pollhz_conv(const struct poll_comm *comm, int pollhz) 287 { 288 return pollhz - IFPOLL_FREQ_ADJ(comm); 289 } 290 291 static __inline void 292 ifpoll_sendmsg_oncpu(netmsg_t msg) 293 { 294 if (msg->lmsg.ms_flags & MSGF_DONE) 295 lwkt_sendmsg(netisr_cpuport(mycpuid), &msg->lmsg); 296 } 297 298 static __inline void 299 sched_stpoll(struct stpoll_ctx *st_ctx) 300 { 301 ifpoll_sendmsg_oncpu((netmsg_t)&st_ctx->poll_netmsg); 302 } 303 304 static __inline void 305 sched_iopoll(struct iopoll_ctx *io_ctx) 306 { 307 ifpoll_sendmsg_oncpu((netmsg_t)&io_ctx->poll_netmsg); 308 } 309 310 static __inline void 311 sched_iopollmore(struct iopoll_ctx *io_ctx) 312 { 313 ifpoll_sendmsg_oncpu((netmsg_t)&io_ctx->poll_more_netmsg); 314 } 315 316 static __inline void 317 ifpoll_time_get(union ifpoll_time *t) 318 { 319 if (tsc_invariant) 320 t->tsc = rdtsc(); 321 else 322 microuptime(&t->tv); 323 } 324 325 /* Return time diff in us */ 326 static __inline int 327 ifpoll_time_diff(const union ifpoll_time *s, const union ifpoll_time *e) 328 { 329 if (__predict_true(tsc_present)) { 330 return (((e->tsc - s->tsc) * 1000000) / tsc_frequency); 331 } else { 332 return ((e->tv.tv_usec - s->tv.tv_usec) + 333 (e->tv.tv_sec - s->tv.tv_sec) * 1000000); 334 } 335 } 336 337 /* 338 * Initialize per-cpu polling(4) context. Called from kern_clock.c: 339 */ 340 void 341 ifpoll_init_pcpu(int cpuid) 342 { 343 if (cpuid >= ncpus2) 344 return; 345 346 poll_comm_init(cpuid); 347 348 if (cpuid == 0) 349 stpoll_init(); 350 iopoll_init(cpuid); 351 352 poll_comm_start(cpuid); 353 } 354 355 int 356 ifpoll_register(struct ifnet *ifp) 357 { 358 struct ifpoll_info *info; 359 struct netmsg_base nmsg; 360 int error; 361 362 if (ifp->if_npoll == NULL) { 363 /* Device does not support polling */ 364 return EOPNOTSUPP; 365 } 366 367 info = kmalloc(sizeof(*info), M_TEMP, M_WAITOK | M_ZERO); 368 369 /* 370 * Attempt to register. Interlock with IFF_NPOLLING. 371 */ 372 373 ifnet_serialize_all(ifp); 374 375 if (ifp->if_flags & IFF_NPOLLING) { 376 /* Already polling */ 377 ifnet_deserialize_all(ifp); 378 kfree(info, M_TEMP); 379 return EBUSY; 380 } 381 382 info->ifpi_ifp = ifp; 383 384 ifp->if_flags |= IFF_NPOLLING; 385 ifp->if_npoll(ifp, info); 386 387 ifnet_deserialize_all(ifp); 388 389 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 390 0, ifpoll_register_handler); 391 nmsg.lmsg.u.ms_resultp = info; 392 393 error = lwkt_domsg(netisr_cpuport(0), &nmsg.lmsg, 0); 394 if (error) { 395 if (!ifpoll_deregister(ifp)) { 396 if_printf(ifp, "ifpoll_register: " 397 "ifpoll_deregister failed!\n"); 398 } 399 } 400 401 kfree(info, M_TEMP); 402 return error; 403 } 404 405 int 406 ifpoll_deregister(struct ifnet *ifp) 407 { 408 struct netmsg_base nmsg; 409 int error; 410 411 if (ifp->if_npoll == NULL) 412 return EOPNOTSUPP; 413 414 ifnet_serialize_all(ifp); 415 416 if ((ifp->if_flags & IFF_NPOLLING) == 0) { 417 ifnet_deserialize_all(ifp); 418 return EINVAL; 419 } 420 ifp->if_flags &= ~IFF_NPOLLING; 421 422 ifnet_deserialize_all(ifp); 423 424 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 425 0, ifpoll_deregister_handler); 426 nmsg.lmsg.u.ms_resultp = ifp; 427 428 error = lwkt_domsg(netisr_cpuport(0), &nmsg.lmsg, 0); 429 if (!error) { 430 ifnet_serialize_all(ifp); 431 ifp->if_npoll(ifp, NULL); 432 ifnet_deserialize_all(ifp); 433 } 434 return error; 435 } 436 437 static void 438 ifpoll_register_handler(netmsg_t nmsg) 439 { 440 const struct ifpoll_info *info = nmsg->lmsg.u.ms_resultp; 441 int cpuid = mycpuid, nextcpu; 442 int error; 443 444 KKASSERT(cpuid < ncpus2); 445 KKASSERT(&curthread->td_msgport == netisr_cpuport(cpuid)); 446 447 if (cpuid == 0) { 448 error = stpoll_register(info->ifpi_ifp, &info->ifpi_status); 449 if (error) 450 goto failed; 451 } 452 453 error = iopoll_register(info->ifpi_ifp, rxpoll_context[cpuid], 454 &info->ifpi_rx[cpuid]); 455 if (error) 456 goto failed; 457 458 error = iopoll_register(info->ifpi_ifp, txpoll_context[cpuid], 459 &info->ifpi_tx[cpuid]); 460 if (error) 461 goto failed; 462 463 /* Adjust polling frequency, after all registration is done */ 464 poll_comm_adjust_pollhz(poll_common[cpuid]); 465 466 nextcpu = cpuid + 1; 467 if (nextcpu < ncpus2) 468 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nmsg->lmsg); 469 else 470 lwkt_replymsg(&nmsg->lmsg, 0); 471 return; 472 failed: 473 lwkt_replymsg(&nmsg->lmsg, error); 474 } 475 476 static void 477 ifpoll_deregister_handler(netmsg_t nmsg) 478 { 479 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 480 int cpuid = mycpuid, nextcpu; 481 482 KKASSERT(cpuid < ncpus2); 483 KKASSERT(&curthread->td_msgport == netisr_cpuport(cpuid)); 484 485 /* Ignore errors */ 486 if (cpuid == 0) 487 stpoll_deregister(ifp); 488 iopoll_deregister(ifp, rxpoll_context[cpuid]); 489 iopoll_deregister(ifp, txpoll_context[cpuid]); 490 491 /* Adjust polling frequency, after all deregistration is done */ 492 poll_comm_adjust_pollhz(poll_common[cpuid]); 493 494 nextcpu = cpuid + 1; 495 if (nextcpu < ncpus2) 496 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nmsg->lmsg); 497 else 498 lwkt_replymsg(&nmsg->lmsg, 0); 499 } 500 501 static void 502 stpoll_init(void) 503 { 504 struct stpoll_ctx *st_ctx = &stpoll_context; 505 const struct poll_comm *comm = poll_common[0]; 506 507 sysctl_ctx_init(&st_ctx->poll_sysctl_ctx); 508 st_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&st_ctx->poll_sysctl_ctx, 509 SYSCTL_CHILDREN(comm->sysctl_tree), 510 OID_AUTO, "status", CTLFLAG_RD, 0, ""); 511 512 SYSCTL_ADD_UINT(&st_ctx->poll_sysctl_ctx, 513 SYSCTL_CHILDREN(st_ctx->poll_sysctl_tree), 514 OID_AUTO, "handlers", CTLFLAG_RD, 515 &st_ctx->poll_handlers, 0, 516 "Number of registered status poll handlers"); 517 518 netmsg_init(&st_ctx->poll_netmsg, NULL, &netisr_adone_rport, 519 0, stpoll_handler); 520 } 521 522 /* 523 * stpoll_handler is scheduled by sched_stpoll when appropriate, typically 524 * once per polling systimer tick. 525 */ 526 static void 527 stpoll_handler(netmsg_t msg) 528 { 529 struct stpoll_ctx *st_ctx = &stpoll_context; 530 struct thread *td = curthread; 531 int i; 532 533 KKASSERT(&td->td_msgport == netisr_cpuport(0)); 534 535 crit_enter_quick(td); 536 537 /* Reply ASAP */ 538 lwkt_replymsg(&msg->lmsg, 0); 539 540 if (st_ctx->poll_handlers == 0) { 541 crit_exit_quick(td); 542 return; 543 } 544 545 for (i = 0; i < st_ctx->poll_handlers; ++i) { 546 const struct stpoll_rec *rec = &st_ctx->pr[i]; 547 struct ifnet *ifp = rec->ifp; 548 549 if (!lwkt_serialize_try(rec->serializer)) 550 continue; 551 552 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == 553 (IFF_RUNNING | IFF_NPOLLING)) 554 rec->status_func(ifp); 555 556 lwkt_serialize_exit(rec->serializer); 557 } 558 559 crit_exit_quick(td); 560 } 561 562 /* 563 * Hook from status poll systimer. Tries to schedule an status poll. 564 * NOTE: Caller should hold critical section. 565 */ 566 static void 567 stpoll_clock(struct stpoll_ctx *st_ctx) 568 { 569 KKASSERT(mycpuid == 0); 570 571 if (st_ctx->poll_handlers == 0) 572 return; 573 sched_stpoll(st_ctx); 574 } 575 576 static int 577 stpoll_register(struct ifnet *ifp, const struct ifpoll_status *st_rec) 578 { 579 struct stpoll_ctx *st_ctx = &stpoll_context; 580 int error; 581 582 KKASSERT(&curthread->td_msgport == netisr_cpuport(0)); 583 584 if (st_rec->status_func == NULL) 585 return 0; 586 587 /* 588 * Check if there is room. 589 */ 590 if (st_ctx->poll_handlers >= IFPOLL_LIST_LEN) { 591 /* 592 * List full, cannot register more entries. 593 * This should never happen; if it does, it is probably a 594 * broken driver trying to register multiple times. Checking 595 * this at runtime is expensive, and won't solve the problem 596 * anyways, so just report a few times and then give up. 597 */ 598 static int verbose = 10; /* XXX */ 599 600 if (verbose > 0) { 601 kprintf("status poll handlers list full, " 602 "maybe a broken driver ?\n"); 603 verbose--; 604 } 605 error = ENOENT; 606 } else { 607 struct stpoll_rec *rec = &st_ctx->pr[st_ctx->poll_handlers]; 608 609 rec->ifp = ifp; 610 rec->serializer = st_rec->serializer; 611 rec->status_func = st_rec->status_func; 612 613 st_ctx->poll_handlers++; 614 error = 0; 615 } 616 return error; 617 } 618 619 static int 620 stpoll_deregister(struct ifnet *ifp) 621 { 622 struct stpoll_ctx *st_ctx = &stpoll_context; 623 int i, error; 624 625 KKASSERT(&curthread->td_msgport == netisr_cpuport(0)); 626 627 for (i = 0; i < st_ctx->poll_handlers; ++i) { 628 if (st_ctx->pr[i].ifp == ifp) /* Found it */ 629 break; 630 } 631 if (i == st_ctx->poll_handlers) { 632 error = ENOENT; 633 } else { 634 st_ctx->poll_handlers--; 635 if (i < st_ctx->poll_handlers) { 636 /* Last entry replaces this one. */ 637 st_ctx->pr[i] = st_ctx->pr[st_ctx->poll_handlers]; 638 } 639 error = 0; 640 } 641 return error; 642 } 643 644 static __inline void 645 iopoll_reset_state(struct iopoll_ctx *io_ctx) 646 { 647 crit_enter(); 648 io_ctx->poll_burst = io_ctx->poll_each_burst; 649 io_ctx->pending_polls = 0; 650 io_ctx->residual_burst = 0; 651 io_ctx->phase = 0; 652 io_ctx->kern_frac = 0; 653 bzero(&io_ctx->poll_start_t, sizeof(io_ctx->poll_start_t)); 654 bzero(&io_ctx->prev_t, sizeof(io_ctx->prev_t)); 655 crit_exit(); 656 } 657 658 static void 659 iopoll_init(int cpuid) 660 { 661 KKASSERT(cpuid < ncpus2); 662 663 rxpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_RX); 664 txpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_TX); 665 } 666 667 static struct iopoll_ctx * 668 iopoll_ctx_create(int cpuid, int poll_type) 669 { 670 struct poll_comm *comm; 671 struct iopoll_ctx *io_ctx; 672 const char *poll_type_str; 673 netisr_fn_t handler, more_handler; 674 675 KKASSERT(poll_type == IFPOLL_RX || poll_type == IFPOLL_TX); 676 677 /* 678 * Make sure that tunables are in sane state 679 */ 680 if (iopoll_burst_max < MIN_IOPOLL_BURST_MAX) 681 iopoll_burst_max = MIN_IOPOLL_BURST_MAX; 682 else if (iopoll_burst_max > MAX_IOPOLL_BURST_MAX) 683 iopoll_burst_max = MAX_IOPOLL_BURST_MAX; 684 685 if (iopoll_each_burst > iopoll_burst_max) 686 iopoll_each_burst = iopoll_burst_max; 687 688 comm = poll_common[cpuid]; 689 690 /* 691 * Create the per-cpu polling context 692 */ 693 io_ctx = kmalloc_cachealign(sizeof(*io_ctx), M_DEVBUF, 694 M_WAITOK | M_ZERO); 695 696 io_ctx->poll_each_burst = iopoll_each_burst; 697 io_ctx->poll_burst_max = iopoll_burst_max; 698 io_ctx->user_frac = iopoll_user_frac; 699 if (poll_type == IFPOLL_RX) 700 io_ctx->pollhz = comm->pollhz; 701 else 702 io_ctx->pollhz = comm->pollhz / (comm->poll_txfrac + 1); 703 io_ctx->poll_cpuid = cpuid; 704 iopoll_reset_state(io_ctx); 705 706 if (poll_type == IFPOLL_RX) { 707 handler = rxpoll_handler; 708 more_handler = rxpollmore_handler; 709 } else { 710 handler = txpoll_handler; 711 more_handler = txpollmore_handler; 712 } 713 714 netmsg_init(&io_ctx->poll_netmsg, NULL, &netisr_adone_rport, 715 0, handler); 716 io_ctx->poll_netmsg.lmsg.u.ms_resultp = io_ctx; 717 718 netmsg_init(&io_ctx->poll_more_netmsg, NULL, &netisr_adone_rport, 719 0, more_handler); 720 io_ctx->poll_more_netmsg.lmsg.u.ms_resultp = io_ctx; 721 722 /* 723 * Initialize per-cpu sysctl nodes 724 */ 725 if (poll_type == IFPOLL_RX) 726 poll_type_str = "rx"; 727 else 728 poll_type_str = "tx"; 729 730 sysctl_ctx_init(&io_ctx->poll_sysctl_ctx); 731 io_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&io_ctx->poll_sysctl_ctx, 732 SYSCTL_CHILDREN(comm->sysctl_tree), 733 OID_AUTO, poll_type_str, CTLFLAG_RD, 0, ""); 734 iopoll_add_sysctl(&io_ctx->poll_sysctl_ctx, 735 SYSCTL_CHILDREN(io_ctx->poll_sysctl_tree), io_ctx, poll_type); 736 737 return io_ctx; 738 } 739 740 /* 741 * Hook from iopoll systimer. Tries to schedule an iopoll, but keeps 742 * track of lost ticks due to the previous handler taking too long. 743 * Normally, this should not happen, because polling handler should 744 * run for a short time. However, in some cases (e.g. when there are 745 * changes in link status etc.) the drivers take a very long time 746 * (even in the order of milliseconds) to reset and reconfigure the 747 * device, causing apparent lost polls. 748 * 749 * The first part of the code is just for debugging purposes, and tries 750 * to count how often hardclock ticks are shorter than they should, 751 * meaning either stray interrupts or delayed events. 752 * 753 * WARNING! called from fastint or IPI, the MP lock might not be held. 754 * NOTE: Caller should hold critical section. 755 */ 756 static void 757 iopoll_clock(struct iopoll_ctx *io_ctx) 758 { 759 union ifpoll_time t; 760 int delta; 761 762 KKASSERT(mycpuid == io_ctx->poll_cpuid); 763 764 if (io_ctx->poll_handlers == 0) 765 return; 766 767 ifpoll_time_get(&t); 768 delta = ifpoll_time_diff(&io_ctx->prev_t, &t); 769 if (delta * io_ctx->pollhz < 500000) 770 io_ctx->short_ticks++; 771 else 772 io_ctx->prev_t = t; 773 774 if (io_ctx->pending_polls > 100) { 775 /* 776 * Too much, assume it has stalled (not always true 777 * see comment above). 778 */ 779 io_ctx->stalled++; 780 io_ctx->pending_polls = 0; 781 io_ctx->phase = 0; 782 } 783 784 if (io_ctx->phase <= 2) { 785 if (io_ctx->phase != 0) 786 io_ctx->suspect++; 787 io_ctx->phase = 1; 788 sched_iopoll(io_ctx); 789 io_ctx->phase = 2; 790 } 791 if (io_ctx->pending_polls++ > 0) 792 io_ctx->lost_polls++; 793 } 794 795 /* 796 * rxpoll_handler and txpoll_handler are scheduled by sched_iopoll when 797 * appropriate, typically once per polling systimer tick. 798 * 799 * Note that the message is replied immediately in order to allow a new 800 * ISR to be scheduled in the handler. 801 */ 802 static void 803 rxpoll_handler(netmsg_t msg) 804 { 805 struct iopoll_ctx *io_ctx; 806 struct thread *td = curthread; 807 int i, cycles; 808 809 io_ctx = msg->lmsg.u.ms_resultp; 810 KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 811 812 crit_enter_quick(td); 813 814 /* Reply ASAP */ 815 lwkt_replymsg(&msg->lmsg, 0); 816 817 if (io_ctx->poll_handlers == 0) { 818 crit_exit_quick(td); 819 return; 820 } 821 822 io_ctx->phase = 3; 823 if (io_ctx->residual_burst == 0) { 824 /* First call in this tick */ 825 ifpoll_time_get(&io_ctx->poll_start_t); 826 io_ctx->residual_burst = io_ctx->poll_burst; 827 } 828 cycles = (io_ctx->residual_burst < io_ctx->poll_each_burst) ? 829 io_ctx->residual_burst : io_ctx->poll_each_burst; 830 io_ctx->residual_burst -= cycles; 831 832 for (i = 0; i < io_ctx->poll_handlers; i++) { 833 const struct iopoll_rec *rec = &io_ctx->pr[i]; 834 struct ifnet *ifp = rec->ifp; 835 836 if (!lwkt_serialize_try(rec->serializer)) 837 continue; 838 839 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == 840 (IFF_RUNNING | IFF_NPOLLING)) 841 rec->poll_func(ifp, rec->arg, cycles); 842 843 lwkt_serialize_exit(rec->serializer); 844 } 845 846 /* 847 * Do a quick exit/enter to catch any higher-priority 848 * interrupt sources. 849 */ 850 crit_exit_quick(td); 851 crit_enter_quick(td); 852 853 sched_iopollmore(io_ctx); 854 io_ctx->phase = 4; 855 856 crit_exit_quick(td); 857 } 858 859 static void 860 txpoll_handler(netmsg_t msg) 861 { 862 struct iopoll_ctx *io_ctx; 863 struct thread *td = curthread; 864 int i; 865 866 io_ctx = msg->lmsg.u.ms_resultp; 867 KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 868 869 crit_enter_quick(td); 870 871 /* Reply ASAP */ 872 lwkt_replymsg(&msg->lmsg, 0); 873 874 if (io_ctx->poll_handlers == 0) { 875 crit_exit_quick(td); 876 return; 877 } 878 879 io_ctx->phase = 3; 880 881 for (i = 0; i < io_ctx->poll_handlers; i++) { 882 const struct iopoll_rec *rec = &io_ctx->pr[i]; 883 struct ifnet *ifp = rec->ifp; 884 885 if (!lwkt_serialize_try(rec->serializer)) 886 continue; 887 888 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == 889 (IFF_RUNNING | IFF_NPOLLING)) 890 rec->poll_func(ifp, rec->arg, -1); 891 892 lwkt_serialize_exit(rec->serializer); 893 } 894 895 /* 896 * Do a quick exit/enter to catch any higher-priority 897 * interrupt sources. 898 */ 899 crit_exit_quick(td); 900 crit_enter_quick(td); 901 902 sched_iopollmore(io_ctx); 903 io_ctx->phase = 4; 904 905 crit_exit_quick(td); 906 } 907 908 /* 909 * rxpollmore_handler and txpollmore_handler are called after other netisr's, 910 * possibly scheduling another rxpoll_handler or txpoll_handler call, or 911 * adapting the burst size for the next cycle. 912 * 913 * It is very bad to fetch large bursts of packets from a single card at once, 914 * because the burst could take a long time to be completely processed leading 915 * to unfairness. To reduce the problem, and also to account better for time 916 * spent in network-related processing, we split the burst in smaller chunks 917 * of fixed size, giving control to the other netisr's between chunks. This 918 * helps in improving the fairness, reducing livelock and accounting for the 919 * work performed in low level handling. 920 */ 921 static void 922 rxpollmore_handler(netmsg_t msg) 923 { 924 struct thread *td = curthread; 925 struct iopoll_ctx *io_ctx; 926 union ifpoll_time t; 927 int kern_load; 928 uint32_t pending_polls; 929 930 io_ctx = msg->lmsg.u.ms_resultp; 931 KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 932 933 crit_enter_quick(td); 934 935 /* Replay ASAP */ 936 lwkt_replymsg(&msg->lmsg, 0); 937 938 if (io_ctx->poll_handlers == 0) { 939 crit_exit_quick(td); 940 return; 941 } 942 943 io_ctx->phase = 5; 944 if (io_ctx->residual_burst > 0) { 945 sched_iopoll(io_ctx); 946 crit_exit_quick(td); 947 /* Will run immediately on return, followed by netisrs */ 948 return; 949 } 950 951 /* Here we can account time spent in iopoll's in this tick */ 952 ifpoll_time_get(&t); 953 kern_load = ifpoll_time_diff(&io_ctx->poll_start_t, &t); 954 kern_load = (kern_load * io_ctx->pollhz) / 10000; /* 0..100 */ 955 io_ctx->kern_frac = kern_load; 956 957 if (kern_load > (100 - io_ctx->user_frac)) { 958 /* Try decrease ticks */ 959 if (io_ctx->poll_burst > 1) 960 io_ctx->poll_burst--; 961 } else { 962 if (io_ctx->poll_burst < io_ctx->poll_burst_max) 963 io_ctx->poll_burst++; 964 } 965 966 io_ctx->pending_polls--; 967 pending_polls = io_ctx->pending_polls; 968 969 if (pending_polls == 0) { 970 /* We are done */ 971 io_ctx->phase = 0; 972 } else { 973 /* 974 * Last cycle was long and caused us to miss one or more 975 * hardclock ticks. Restart processing again, but slightly 976 * reduce the burst size to prevent that this happens again. 977 */ 978 io_ctx->poll_burst -= (io_ctx->poll_burst / 8); 979 if (io_ctx->poll_burst < 1) 980 io_ctx->poll_burst = 1; 981 sched_iopoll(io_ctx); 982 io_ctx->phase = 6; 983 } 984 985 crit_exit_quick(td); 986 } 987 988 static void 989 txpollmore_handler(netmsg_t msg) 990 { 991 struct thread *td = curthread; 992 struct iopoll_ctx *io_ctx; 993 uint32_t pending_polls; 994 995 io_ctx = msg->lmsg.u.ms_resultp; 996 KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 997 998 crit_enter_quick(td); 999 1000 /* Replay ASAP */ 1001 lwkt_replymsg(&msg->lmsg, 0); 1002 1003 if (io_ctx->poll_handlers == 0) { 1004 crit_exit_quick(td); 1005 return; 1006 } 1007 1008 io_ctx->phase = 5; 1009 1010 io_ctx->pending_polls--; 1011 pending_polls = io_ctx->pending_polls; 1012 1013 if (pending_polls == 0) { 1014 /* We are done */ 1015 io_ctx->phase = 0; 1016 } else { 1017 /* 1018 * Last cycle was long and caused us to miss one or more 1019 * hardclock ticks. Restart processing again. 1020 */ 1021 sched_iopoll(io_ctx); 1022 io_ctx->phase = 6; 1023 } 1024 1025 crit_exit_quick(td); 1026 } 1027 1028 static void 1029 iopoll_add_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent, 1030 struct iopoll_ctx *io_ctx, int poll_type) 1031 { 1032 if (poll_type == IFPOLL_RX) { 1033 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "burst_max", 1034 CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_burstmax, 1035 "IU", "Max Polling burst size"); 1036 1037 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "each_burst", 1038 CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_eachburst, 1039 "IU", "Max size of each burst"); 1040 1041 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "burst", CTLFLAG_RD, 1042 &io_ctx->poll_burst, 0, "Current polling burst size"); 1043 1044 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "user_frac", CTLFLAG_RW, 1045 &io_ctx->user_frac, 0, "Desired user fraction of cpu time"); 1046 1047 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "kern_frac", CTLFLAG_RD, 1048 &io_ctx->kern_frac, 0, "Kernel fraction of cpu time"); 1049 1050 SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "residual_burst", CTLFLAG_RD, 1051 &io_ctx->residual_burst, 0, 1052 "# of residual cycles in burst"); 1053 } 1054 1055 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "phase", CTLFLAG_RD, 1056 &io_ctx->phase, 0, "Polling phase"); 1057 1058 SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "suspect", CTLFLAG_RW, 1059 &io_ctx->suspect, "Suspected events"); 1060 1061 SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "stalled", CTLFLAG_RW, 1062 &io_ctx->stalled, "Potential stalls"); 1063 1064 SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "short_ticks", CTLFLAG_RW, 1065 &io_ctx->short_ticks, 1066 "Hardclock ticks shorter than they should be"); 1067 1068 SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "lost_polls", CTLFLAG_RW, 1069 &io_ctx->lost_polls, 1070 "How many times we would have lost a poll tick"); 1071 1072 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "pending_polls", CTLFLAG_RD, 1073 &io_ctx->pending_polls, 0, "Do we need to poll again"); 1074 1075 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "handlers", CTLFLAG_RD, 1076 &io_ctx->poll_handlers, 0, "Number of registered poll handlers"); 1077 } 1078 1079 static void 1080 sysctl_burstmax_handler(netmsg_t nmsg) 1081 { 1082 struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg; 1083 struct iopoll_ctx *io_ctx; 1084 1085 io_ctx = msg->ctx; 1086 KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1087 1088 io_ctx->poll_burst_max = nmsg->lmsg.u.ms_result; 1089 if (io_ctx->poll_each_burst > io_ctx->poll_burst_max) 1090 io_ctx->poll_each_burst = io_ctx->poll_burst_max; 1091 if (io_ctx->poll_burst > io_ctx->poll_burst_max) 1092 io_ctx->poll_burst = io_ctx->poll_burst_max; 1093 if (io_ctx->residual_burst > io_ctx->poll_burst_max) 1094 io_ctx->residual_burst = io_ctx->poll_burst_max; 1095 1096 lwkt_replymsg(&nmsg->lmsg, 0); 1097 } 1098 1099 static int 1100 sysctl_burstmax(SYSCTL_HANDLER_ARGS) 1101 { 1102 struct iopoll_ctx *io_ctx = arg1; 1103 struct iopoll_sysctl_netmsg msg; 1104 uint32_t burst_max; 1105 int error; 1106 1107 burst_max = io_ctx->poll_burst_max; 1108 error = sysctl_handle_int(oidp, &burst_max, 0, req); 1109 if (error || req->newptr == NULL) 1110 return error; 1111 if (burst_max < MIN_IOPOLL_BURST_MAX) 1112 burst_max = MIN_IOPOLL_BURST_MAX; 1113 else if (burst_max > MAX_IOPOLL_BURST_MAX) 1114 burst_max = MAX_IOPOLL_BURST_MAX; 1115 1116 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 1117 0, sysctl_burstmax_handler); 1118 msg.base.lmsg.u.ms_result = burst_max; 1119 msg.ctx = io_ctx; 1120 1121 return lwkt_domsg(netisr_cpuport(io_ctx->poll_cpuid), 1122 &msg.base.lmsg, 0); 1123 } 1124 1125 static void 1126 sysctl_eachburst_handler(netmsg_t nmsg) 1127 { 1128 struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg; 1129 struct iopoll_ctx *io_ctx; 1130 uint32_t each_burst; 1131 1132 io_ctx = msg->ctx; 1133 KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1134 1135 each_burst = nmsg->lmsg.u.ms_result; 1136 if (each_burst > io_ctx->poll_burst_max) 1137 each_burst = io_ctx->poll_burst_max; 1138 else if (each_burst < 1) 1139 each_burst = 1; 1140 io_ctx->poll_each_burst = each_burst; 1141 1142 lwkt_replymsg(&nmsg->lmsg, 0); 1143 } 1144 1145 static int 1146 sysctl_eachburst(SYSCTL_HANDLER_ARGS) 1147 { 1148 struct iopoll_ctx *io_ctx = arg1; 1149 struct iopoll_sysctl_netmsg msg; 1150 uint32_t each_burst; 1151 int error; 1152 1153 each_burst = io_ctx->poll_each_burst; 1154 error = sysctl_handle_int(oidp, &each_burst, 0, req); 1155 if (error || req->newptr == NULL) 1156 return error; 1157 1158 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 1159 0, sysctl_eachburst_handler); 1160 msg.base.lmsg.u.ms_result = each_burst; 1161 msg.ctx = io_ctx; 1162 1163 return lwkt_domsg(netisr_cpuport(io_ctx->poll_cpuid), 1164 &msg.base.lmsg, 0); 1165 } 1166 1167 static int 1168 iopoll_register(struct ifnet *ifp, struct iopoll_ctx *io_ctx, 1169 const struct ifpoll_io *io_rec) 1170 { 1171 int error; 1172 1173 KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1174 1175 if (io_rec->poll_func == NULL) 1176 return 0; 1177 1178 /* 1179 * Check if there is room. 1180 */ 1181 if (io_ctx->poll_handlers >= IFPOLL_LIST_LEN) { 1182 /* 1183 * List full, cannot register more entries. 1184 * This should never happen; if it does, it is probably a 1185 * broken driver trying to register multiple times. Checking 1186 * this at runtime is expensive, and won't solve the problem 1187 * anyways, so just report a few times and then give up. 1188 */ 1189 static int verbose = 10; /* XXX */ 1190 if (verbose > 0) { 1191 kprintf("io poll handlers list full, " 1192 "maybe a broken driver ?\n"); 1193 verbose--; 1194 } 1195 error = ENOENT; 1196 } else { 1197 struct iopoll_rec *rec = &io_ctx->pr[io_ctx->poll_handlers]; 1198 1199 rec->ifp = ifp; 1200 rec->serializer = io_rec->serializer; 1201 rec->arg = io_rec->arg; 1202 rec->poll_func = io_rec->poll_func; 1203 1204 io_ctx->poll_handlers++; 1205 error = 0; 1206 } 1207 return error; 1208 } 1209 1210 static int 1211 iopoll_deregister(struct ifnet *ifp, struct iopoll_ctx *io_ctx) 1212 { 1213 int i, error; 1214 1215 KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1216 1217 for (i = 0; i < io_ctx->poll_handlers; ++i) { 1218 if (io_ctx->pr[i].ifp == ifp) /* Found it */ 1219 break; 1220 } 1221 if (i == io_ctx->poll_handlers) { 1222 error = ENOENT; 1223 } else { 1224 io_ctx->poll_handlers--; 1225 if (i < io_ctx->poll_handlers) { 1226 /* Last entry replaces this one. */ 1227 io_ctx->pr[i] = io_ctx->pr[io_ctx->poll_handlers]; 1228 } 1229 1230 if (io_ctx->poll_handlers == 0) 1231 iopoll_reset_state(io_ctx); 1232 error = 0; 1233 } 1234 return error; 1235 } 1236 1237 static void 1238 poll_comm_init(int cpuid) 1239 { 1240 struct poll_comm *comm; 1241 char cpuid_str[16]; 1242 1243 comm = kmalloc_cachealign(sizeof(*comm), M_DEVBUF, M_WAITOK | M_ZERO); 1244 1245 if (ifpoll_stfrac < 1) 1246 ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT; 1247 if (ifpoll_txfrac < 1) 1248 ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT; 1249 1250 comm->poll_cpuid = cpuid; 1251 comm->pollhz = poll_comm_pollhz_div(comm, ifpoll_pollhz); 1252 comm->poll_stfrac = ifpoll_stfrac - 1; 1253 comm->poll_txfrac = ifpoll_txfrac - 1; 1254 1255 ksnprintf(cpuid_str, sizeof(cpuid_str), "%d", cpuid); 1256 1257 sysctl_ctx_init(&comm->sysctl_ctx); 1258 comm->sysctl_tree = SYSCTL_ADD_NODE(&comm->sysctl_ctx, 1259 SYSCTL_STATIC_CHILDREN(_net_ifpoll), 1260 OID_AUTO, cpuid_str, CTLFLAG_RD, 0, ""); 1261 1262 SYSCTL_ADD_PROC(&comm->sysctl_ctx, SYSCTL_CHILDREN(comm->sysctl_tree), 1263 OID_AUTO, "pollhz", CTLTYPE_INT | CTLFLAG_RW, 1264 comm, 0, sysctl_pollhz, 1265 "I", "Device polling frequency"); 1266 1267 if (cpuid == 0) { 1268 SYSCTL_ADD_PROC(&comm->sysctl_ctx, 1269 SYSCTL_CHILDREN(comm->sysctl_tree), 1270 OID_AUTO, "status_frac", 1271 CTLTYPE_INT | CTLFLAG_RW, 1272 comm, 0, sysctl_stfrac, 1273 "I", "# of cycles before status is polled"); 1274 } 1275 SYSCTL_ADD_PROC(&comm->sysctl_ctx, SYSCTL_CHILDREN(comm->sysctl_tree), 1276 OID_AUTO, "tx_frac", CTLTYPE_INT | CTLFLAG_RW, 1277 comm, 0, sysctl_txfrac, 1278 "I", "# of cycles before TX is polled"); 1279 1280 poll_common[cpuid] = comm; 1281 } 1282 1283 static void 1284 poll_comm_start(int cpuid) 1285 { 1286 struct poll_comm *comm = poll_common[cpuid]; 1287 systimer_func_t func; 1288 1289 /* 1290 * Initialize systimer 1291 */ 1292 if (cpuid == 0) 1293 func = poll_comm_systimer0; 1294 else 1295 func = poll_comm_systimer; 1296 systimer_init_periodic_nq(&comm->pollclock, func, comm, 1); 1297 } 1298 1299 static void 1300 _poll_comm_systimer(struct poll_comm *comm) 1301 { 1302 iopoll_clock(rxpoll_context[comm->poll_cpuid]); 1303 if (comm->txfrac_count-- == 0) { 1304 comm->txfrac_count = comm->poll_txfrac; 1305 iopoll_clock(txpoll_context[comm->poll_cpuid]); 1306 } 1307 } 1308 1309 static void 1310 poll_comm_systimer0(systimer_t info, int in_ipi __unused, 1311 struct intrframe *frame __unused) 1312 { 1313 struct poll_comm *comm = info->data; 1314 globaldata_t gd = mycpu; 1315 1316 KKASSERT(comm->poll_cpuid == gd->gd_cpuid && gd->gd_cpuid == 0); 1317 1318 crit_enter_gd(gd); 1319 1320 if (comm->stfrac_count-- == 0) { 1321 comm->stfrac_count = comm->poll_stfrac; 1322 stpoll_clock(&stpoll_context); 1323 } 1324 _poll_comm_systimer(comm); 1325 1326 crit_exit_gd(gd); 1327 } 1328 1329 static void 1330 poll_comm_systimer(systimer_t info, int in_ipi __unused, 1331 struct intrframe *frame __unused) 1332 { 1333 struct poll_comm *comm = info->data; 1334 globaldata_t gd = mycpu; 1335 1336 KKASSERT(comm->poll_cpuid == gd->gd_cpuid && gd->gd_cpuid != 0); 1337 1338 crit_enter_gd(gd); 1339 _poll_comm_systimer(comm); 1340 crit_exit_gd(gd); 1341 } 1342 1343 static void 1344 poll_comm_adjust_pollhz(struct poll_comm *comm) 1345 { 1346 uint32_t handlers; 1347 int pollhz = 1; 1348 1349 KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid)); 1350 1351 /* 1352 * If there is no polling handler registered, set systimer 1353 * frequency to the lowest value. Polling systimer frequency 1354 * will be adjusted to the requested value, once there are 1355 * registered handlers. 1356 */ 1357 handlers = rxpoll_context[mycpuid]->poll_handlers + 1358 txpoll_context[mycpuid]->poll_handlers; 1359 if (comm->poll_cpuid == 0) 1360 handlers += stpoll_context.poll_handlers; 1361 if (handlers) 1362 pollhz = comm->pollhz; 1363 systimer_adjust_periodic(&comm->pollclock, pollhz); 1364 } 1365 1366 static int 1367 sysctl_pollhz(SYSCTL_HANDLER_ARGS) 1368 { 1369 struct poll_comm *comm = arg1; 1370 struct netmsg_base nmsg; 1371 int error, phz; 1372 1373 phz = poll_comm_pollhz_conv(comm, comm->pollhz); 1374 error = sysctl_handle_int(oidp, &phz, 0, req); 1375 if (error || req->newptr == NULL) 1376 return error; 1377 if (phz <= 0) 1378 return EINVAL; 1379 else if (phz > IFPOLL_FREQ_MAX) 1380 phz = IFPOLL_FREQ_MAX; 1381 1382 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 1383 0, sysctl_pollhz_handler); 1384 nmsg.lmsg.u.ms_result = phz; 1385 1386 return lwkt_domsg(netisr_cpuport(comm->poll_cpuid), &nmsg.lmsg, 0); 1387 } 1388 1389 static void 1390 sysctl_pollhz_handler(netmsg_t nmsg) 1391 { 1392 struct poll_comm *comm = poll_common[mycpuid]; 1393 1394 KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid)); 1395 1396 /* Save polling frequency */ 1397 comm->pollhz = poll_comm_pollhz_div(comm, nmsg->lmsg.u.ms_result); 1398 1399 /* 1400 * Adjust cached pollhz 1401 */ 1402 rxpoll_context[mycpuid]->pollhz = comm->pollhz; 1403 txpoll_context[mycpuid]->pollhz = 1404 comm->pollhz / (comm->poll_txfrac + 1); 1405 1406 /* 1407 * Adjust polling frequency 1408 */ 1409 poll_comm_adjust_pollhz(comm); 1410 1411 lwkt_replymsg(&nmsg->lmsg, 0); 1412 } 1413 1414 static int 1415 sysctl_stfrac(SYSCTL_HANDLER_ARGS) 1416 { 1417 struct poll_comm *comm = arg1; 1418 struct netmsg_base nmsg; 1419 int error, stfrac; 1420 1421 KKASSERT(comm->poll_cpuid == 0); 1422 1423 stfrac = comm->poll_stfrac + 1; 1424 error = sysctl_handle_int(oidp, &stfrac, 0, req); 1425 if (error || req->newptr == NULL) 1426 return error; 1427 if (stfrac < 1) 1428 return EINVAL; 1429 1430 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 1431 0, sysctl_stfrac_handler); 1432 nmsg.lmsg.u.ms_result = stfrac - 1; 1433 1434 return lwkt_domsg(netisr_cpuport(comm->poll_cpuid), &nmsg.lmsg, 0); 1435 } 1436 1437 static void 1438 sysctl_stfrac_handler(netmsg_t nmsg) 1439 { 1440 struct poll_comm *comm = poll_common[mycpuid]; 1441 int stfrac = nmsg->lmsg.u.ms_result; 1442 1443 KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid)); 1444 1445 crit_enter(); 1446 comm->poll_stfrac = stfrac; 1447 if (comm->stfrac_count > comm->poll_stfrac) 1448 comm->stfrac_count = comm->poll_stfrac; 1449 crit_exit(); 1450 1451 lwkt_replymsg(&nmsg->lmsg, 0); 1452 } 1453 1454 static int 1455 sysctl_txfrac(SYSCTL_HANDLER_ARGS) 1456 { 1457 struct poll_comm *comm = arg1; 1458 struct netmsg_base nmsg; 1459 int error, txfrac; 1460 1461 txfrac = comm->poll_txfrac + 1; 1462 error = sysctl_handle_int(oidp, &txfrac, 0, req); 1463 if (error || req->newptr == NULL) 1464 return error; 1465 if (txfrac < 1) 1466 return EINVAL; 1467 1468 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 1469 0, sysctl_txfrac_handler); 1470 nmsg.lmsg.u.ms_result = txfrac - 1; 1471 1472 return lwkt_domsg(netisr_cpuport(comm->poll_cpuid), &nmsg.lmsg, 0); 1473 } 1474 1475 static void 1476 sysctl_txfrac_handler(netmsg_t nmsg) 1477 { 1478 struct poll_comm *comm = poll_common[mycpuid]; 1479 int txfrac = nmsg->lmsg.u.ms_result; 1480 1481 KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid)); 1482 1483 crit_enter(); 1484 comm->poll_txfrac = txfrac; 1485 if (comm->txfrac_count > comm->poll_txfrac) 1486 comm->txfrac_count = comm->poll_txfrac; 1487 crit_exit(); 1488 1489 lwkt_replymsg(&nmsg->lmsg, 0); 1490 } 1491 1492 void 1493 ifpoll_compat_setup(struct ifpoll_compat *cp, 1494 struct sysctl_ctx_list *sysctl_ctx, 1495 struct sysctl_oid *sysctl_tree, 1496 int unit, struct lwkt_serialize *slz) 1497 { 1498 cp->ifpc_stcount = 0; 1499 cp->ifpc_stfrac = ((poll_common[0]->poll_stfrac + 1) * 1500 howmany(IOPOLL_BURST_MAX, IOPOLL_EACH_BURST)) - 1; 1501 1502 cp->ifpc_cpuid = unit % ncpus2; 1503 cp->ifpc_serializer = slz; 1504 1505 if (sysctl_ctx != NULL && sysctl_tree != NULL) { 1506 SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1507 OID_AUTO, "npoll_stfrac", CTLTYPE_INT | CTLFLAG_RW, 1508 cp, 0, sysctl_compat_npoll_stfrac, "I", 1509 "polling status frac"); 1510 SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1511 OID_AUTO, "npoll_cpuid", CTLTYPE_INT | CTLFLAG_RW, 1512 cp, 0, sysctl_compat_npoll_cpuid, "I", 1513 "polling cpuid"); 1514 } 1515 } 1516 1517 static int 1518 sysctl_compat_npoll_stfrac(SYSCTL_HANDLER_ARGS) 1519 { 1520 struct ifpoll_compat *cp = arg1; 1521 int error = 0, stfrac; 1522 1523 lwkt_serialize_enter(cp->ifpc_serializer); 1524 1525 stfrac = cp->ifpc_stfrac + 1; 1526 error = sysctl_handle_int(oidp, &stfrac, 0, req); 1527 if (!error && req->newptr != NULL) { 1528 if (stfrac < 1) { 1529 error = EINVAL; 1530 } else { 1531 cp->ifpc_stfrac = stfrac - 1; 1532 if (cp->ifpc_stcount > cp->ifpc_stfrac) 1533 cp->ifpc_stcount = cp->ifpc_stfrac; 1534 } 1535 } 1536 1537 lwkt_serialize_exit(cp->ifpc_serializer); 1538 return error; 1539 } 1540 1541 static int 1542 sysctl_compat_npoll_cpuid(SYSCTL_HANDLER_ARGS) 1543 { 1544 struct ifpoll_compat *cp = arg1; 1545 int error = 0, cpuid; 1546 1547 lwkt_serialize_enter(cp->ifpc_serializer); 1548 1549 cpuid = cp->ifpc_cpuid; 1550 error = sysctl_handle_int(oidp, &cpuid, 0, req); 1551 if (!error && req->newptr != NULL) { 1552 if (cpuid < 0 || cpuid >= ncpus2) 1553 error = EINVAL; 1554 else 1555 cp->ifpc_cpuid = cpuid; 1556 } 1557 1558 lwkt_serialize_exit(cp->ifpc_serializer); 1559 return error; 1560 } 1561