1 /*- 2 * Copyright (c) 2001-2002 Luigi Rizzo 3 * 4 * Supported by: the Xorp Project (www.xorp.org) 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $ 28 */ 29 30 #include "opt_ifpoll.h" 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/ktr.h> 35 #include <sys/malloc.h> 36 #include <sys/serialize.h> 37 #include <sys/socket.h> 38 #include <sys/sysctl.h> 39 40 #include <sys/thread2.h> 41 #include <sys/msgport2.h> 42 43 #include <machine/atomic.h> 44 #include <machine/clock.h> 45 #include <machine/smp.h> 46 47 #include <net/if.h> 48 #include <net/if_poll.h> 49 #include <net/netmsg2.h> 50 #include <net/netisr2.h> 51 52 /* 53 * Polling support for network device drivers. 54 * 55 * Drivers which support this feature try to register one status polling 56 * handler and several TX/RX polling handlers with the polling code. 57 * If interface's if_npoll is called with non-NULL second argument, then 58 * a register operation is requested, else a deregister operation is 59 * requested. If the requested operation is "register", driver should 60 * setup the ifpoll_info passed in accoding its own needs: 61 * ifpoll_info.ifpi_status.status_func == NULL 62 * No status polling handler will be installed on CPU(0) 63 * ifpoll_info.ifpi_rx[n].poll_func == NULL 64 * No RX polling handler will be installed on CPU(n) 65 * ifpoll_info.ifpi_tx[n].poll_func == NULL 66 * No TX polling handler will be installed on CPU(n) 67 * 68 * RX is polled at the specified polling frequency (net.ifpoll.X.pollhz). 69 * TX and status polling could be done at lower frequency than RX frequency 70 * (net.ifpoll.0.status_frac and net.ifpoll.X.tx_frac). To avoid systimer 71 * staggering at high frequency, RX systimer gives TX and status polling a 72 * piggyback (XXX). 73 * 74 * All of the registered polling handlers are called only if the interface 75 * is marked as 'IFF_RUNNING and IFF_NPOLLING'. However, the interface's 76 * register and deregister function (ifnet.if_npoll) will be called even 77 * if interface is not marked with 'IFF_RUNNING'. 78 * 79 * If registration is successful, the driver must disable interrupts, 80 * and further I/O is performed through the TX/RX polling handler, which 81 * are invoked (at least once per clock tick) with 3 arguments: the "arg" 82 * passed at register time, a struct ifnet pointer, and a "count" limit. 83 * The registered serializer will be held before calling the related 84 * polling handler. 85 * 86 * The count limit specifies how much work the handler can do during the 87 * call -- typically this is the number of packets to be received, or 88 * transmitted, etc. (drivers are free to interpret this number, as long 89 * as the max time spent in the function grows roughly linearly with the 90 * count). 91 * 92 * A second variable controls the sharing of CPU between polling/kernel 93 * network processing, and other activities (typically userlevel tasks): 94 * net.ifpoll.X.{rx,tx}.user_frac (between 0 and 100, default 50) sets the 95 * share of CPU allocated to user tasks. CPU is allocated proportionally 96 * to the shares, by dynamically adjusting the "count" (poll_burst). 97 * 98 * Other parameters can should be left to their default values. 99 * The following constraints hold 100 * 101 * 1 <= poll_burst <= poll_burst_max 102 * 1 <= poll_each_burst <= poll_burst_max 103 * MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX 104 */ 105 106 #define IFPOLL_LIST_LEN 128 107 #define IFPOLL_FREQ_MAX 30000 108 109 #define MIN_IOPOLL_BURST_MAX 10 110 #define MAX_IOPOLL_BURST_MAX 5000 111 #define IOPOLL_BURST_MAX 250 /* good for 1000Mbit net and HZ=6000 */ 112 113 #define IOPOLL_EACH_BURST 50 114 #define IOPOLL_USER_FRAC 50 115 116 #define IFPOLL_FREQ_DEFAULT 6000 117 118 #define IFPOLL_TXFRAC_DEFAULT 1 /* 1/1 of the pollhz */ 119 #define IFPOLL_STFRAC_DEFAULT 120 /* 1/120 of the pollhz */ 120 121 #define IFPOLL_RX 0x1 122 #define IFPOLL_TX 0x2 123 124 union ifpoll_time { 125 struct timeval tv; 126 uint64_t tsc; 127 }; 128 129 struct iopoll_rec { 130 struct lwkt_serialize *serializer; 131 struct ifnet *ifp; 132 void *arg; 133 ifpoll_iofn_t poll_func; 134 }; 135 136 struct iopoll_ctx { 137 union ifpoll_time prev_t; 138 u_long short_ticks; /* statistics */ 139 u_long lost_polls; /* statistics */ 140 u_long suspect; /* statistics */ 141 u_long stalled; /* statistics */ 142 uint32_t pending_polls; /* state */ 143 144 struct netmsg_base poll_netmsg; 145 struct netmsg_base poll_more_netmsg; 146 147 int poll_cpuid; 148 int pollhz; 149 uint32_t phase; /* state */ 150 int residual_burst; /* state */ 151 uint32_t poll_each_burst; /* tunable */ 152 union ifpoll_time poll_start_t; /* state */ 153 154 uint32_t poll_burst; /* state */ 155 uint32_t poll_burst_max; /* tunable */ 156 uint32_t user_frac; /* tunable */ 157 uint32_t kern_frac; /* state */ 158 159 uint32_t poll_handlers; /* next free entry in pr[]. */ 160 struct iopoll_rec pr[IFPOLL_LIST_LEN]; 161 162 struct sysctl_ctx_list poll_sysctl_ctx; 163 struct sysctl_oid *poll_sysctl_tree; 164 } __cachealign; 165 166 struct poll_comm { 167 struct systimer pollclock; 168 int poll_cpuid; 169 170 int stfrac_count; /* state */ 171 int poll_stfrac; /* tunable */ 172 173 int txfrac_count; /* state */ 174 int poll_txfrac; /* tunable */ 175 176 int pollhz; /* tunable */ 177 178 struct sysctl_ctx_list sysctl_ctx; 179 struct sysctl_oid *sysctl_tree; 180 } __cachealign; 181 182 struct stpoll_rec { 183 struct lwkt_serialize *serializer; 184 struct ifnet *ifp; 185 ifpoll_stfn_t status_func; 186 }; 187 188 struct stpoll_ctx { 189 struct netmsg_base poll_netmsg; 190 191 uint32_t poll_handlers; /* next free entry in pr[]. */ 192 struct stpoll_rec pr[IFPOLL_LIST_LEN]; 193 194 struct sysctl_ctx_list poll_sysctl_ctx; 195 struct sysctl_oid *poll_sysctl_tree; 196 } __cachealign; 197 198 struct iopoll_sysctl_netmsg { 199 struct netmsg_base base; 200 struct iopoll_ctx *ctx; 201 }; 202 203 void ifpoll_init_pcpu(int); 204 static void ifpoll_register_handler(netmsg_t); 205 static void ifpoll_deregister_handler(netmsg_t); 206 207 /* 208 * Status polling 209 */ 210 static void stpoll_init(void); 211 static void stpoll_handler(netmsg_t); 212 static void stpoll_clock(struct stpoll_ctx *); 213 static int stpoll_register(struct ifnet *, const struct ifpoll_status *); 214 static int stpoll_deregister(struct ifnet *); 215 216 /* 217 * RX/TX polling 218 */ 219 static struct iopoll_ctx *iopoll_ctx_create(int, int); 220 static void iopoll_init(int); 221 static void rxpoll_handler(netmsg_t); 222 static void txpoll_handler(netmsg_t); 223 static void rxpollmore_handler(netmsg_t); 224 static void txpollmore_handler(netmsg_t); 225 static void iopoll_clock(struct iopoll_ctx *); 226 static int iopoll_register(struct ifnet *, struct iopoll_ctx *, 227 const struct ifpoll_io *); 228 static int iopoll_deregister(struct ifnet *, struct iopoll_ctx *); 229 230 static void iopoll_add_sysctl(struct sysctl_ctx_list *, 231 struct sysctl_oid_list *, struct iopoll_ctx *, int); 232 static void sysctl_burstmax_handler(netmsg_t); 233 static int sysctl_burstmax(SYSCTL_HANDLER_ARGS); 234 static void sysctl_eachburst_handler(netmsg_t); 235 static int sysctl_eachburst(SYSCTL_HANDLER_ARGS); 236 237 /* 238 * Common functions 239 */ 240 static void poll_comm_init(int); 241 static void poll_comm_start(int); 242 static void poll_comm_adjust_pollhz(struct poll_comm *); 243 static void poll_comm_systimer0(systimer_t, int, struct intrframe *); 244 static void poll_comm_systimer(systimer_t, int, struct intrframe *); 245 static void sysctl_pollhz_handler(netmsg_t); 246 static void sysctl_stfrac_handler(netmsg_t); 247 static void sysctl_txfrac_handler(netmsg_t); 248 static int sysctl_pollhz(SYSCTL_HANDLER_ARGS); 249 static int sysctl_stfrac(SYSCTL_HANDLER_ARGS); 250 static int sysctl_txfrac(SYSCTL_HANDLER_ARGS); 251 static int sysctl_compat_npoll_stfrac(SYSCTL_HANDLER_ARGS); 252 static int sysctl_compat_npoll_cpuid(SYSCTL_HANDLER_ARGS); 253 254 static struct stpoll_ctx stpoll_context; 255 static struct poll_comm *poll_common[MAXCPU]; 256 static struct iopoll_ctx *rxpoll_context[MAXCPU]; 257 static struct iopoll_ctx *txpoll_context[MAXCPU]; 258 259 SYSCTL_NODE(_net, OID_AUTO, ifpoll, CTLFLAG_RW, 0, 260 "Network device polling parameters"); 261 262 static int iopoll_burst_max = IOPOLL_BURST_MAX; 263 static int iopoll_each_burst = IOPOLL_EACH_BURST; 264 static int iopoll_user_frac = IOPOLL_USER_FRAC; 265 266 static int ifpoll_pollhz = IFPOLL_FREQ_DEFAULT; 267 static int ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT; 268 static int ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT; 269 270 TUNABLE_INT("net.ifpoll.burst_max", &iopoll_burst_max); 271 TUNABLE_INT("net.ifpoll.each_burst", &iopoll_each_burst); 272 TUNABLE_INT("net.ifpoll.user_frac", &iopoll_user_frac); 273 TUNABLE_INT("net.ifpoll.pollhz", &ifpoll_pollhz); 274 TUNABLE_INT("net.ifpoll.status_frac", &ifpoll_stfrac); 275 TUNABLE_INT("net.ifpoll.tx_frac", &ifpoll_txfrac); 276 277 #if !defined(KTR_IF_POLL) 278 #define KTR_IF_POLL KTR_ALL 279 #endif 280 KTR_INFO_MASTER(if_poll); 281 KTR_INFO(KTR_IF_POLL, if_poll, rx_start, 0, "rx start"); 282 KTR_INFO(KTR_IF_POLL, if_poll, rx_end, 1, "rx end"); 283 KTR_INFO(KTR_IF_POLL, if_poll, tx_start, 2, "tx start"); 284 KTR_INFO(KTR_IF_POLL, if_poll, tx_end, 3, "tx end"); 285 KTR_INFO(KTR_IF_POLL, if_poll, rx_mstart, 4, "rx more start"); 286 KTR_INFO(KTR_IF_POLL, if_poll, rx_mend, 5, "rx more end"); 287 KTR_INFO(KTR_IF_POLL, if_poll, tx_mstart, 6, "tx more start"); 288 KTR_INFO(KTR_IF_POLL, if_poll, tx_mend, 7, "tx more end"); 289 KTR_INFO(KTR_IF_POLL, if_poll, ioclock_start, 8, "ioclock start"); 290 KTR_INFO(KTR_IF_POLL, if_poll, ioclock_end, 9, "ioclock end"); 291 #define logpoll(name) KTR_LOG(if_poll_ ## name) 292 293 #define IFPOLL_FREQ_ADJ(comm) (((comm)->poll_cpuid * 3) % 50) 294 295 static __inline int 296 poll_comm_pollhz_div(const struct poll_comm *comm, int pollhz) 297 { 298 return pollhz + IFPOLL_FREQ_ADJ(comm); 299 } 300 301 static __inline int 302 poll_comm_pollhz_conv(const struct poll_comm *comm, int pollhz) 303 { 304 return pollhz - IFPOLL_FREQ_ADJ(comm); 305 } 306 307 static __inline void 308 ifpoll_sendmsg_oncpu(netmsg_t msg) 309 { 310 if (msg->lmsg.ms_flags & MSGF_DONE) 311 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), &msg->lmsg); 312 } 313 314 static __inline void 315 sched_stpoll(struct stpoll_ctx *st_ctx) 316 { 317 ifpoll_sendmsg_oncpu((netmsg_t)&st_ctx->poll_netmsg); 318 } 319 320 static __inline void 321 sched_iopoll(struct iopoll_ctx *io_ctx) 322 { 323 ifpoll_sendmsg_oncpu((netmsg_t)&io_ctx->poll_netmsg); 324 } 325 326 static __inline void 327 sched_iopollmore(struct iopoll_ctx *io_ctx) 328 { 329 ifpoll_sendmsg_oncpu((netmsg_t)&io_ctx->poll_more_netmsg); 330 } 331 332 static __inline void 333 ifpoll_time_get(union ifpoll_time *t) 334 { 335 if (tsc_invariant) 336 t->tsc = rdtsc(); 337 else 338 microuptime(&t->tv); 339 } 340 341 /* Return time diff in us */ 342 static __inline int 343 ifpoll_time_diff(const union ifpoll_time *s, const union ifpoll_time *e) 344 { 345 if (tsc_invariant) { 346 return (((e->tsc - s->tsc) * 1000000) / tsc_frequency); 347 } else { 348 return ((e->tv.tv_usec - s->tv.tv_usec) + 349 (e->tv.tv_sec - s->tv.tv_sec) * 1000000); 350 } 351 } 352 353 /* 354 * Initialize per-cpu polling(4) context. Called from kern_clock.c: 355 */ 356 void 357 ifpoll_init_pcpu(int cpuid) 358 { 359 if (cpuid >= ncpus2) 360 return; 361 362 poll_comm_init(cpuid); 363 364 if (cpuid == 0) 365 stpoll_init(); 366 iopoll_init(cpuid); 367 368 poll_comm_start(cpuid); 369 } 370 371 int 372 ifpoll_register(struct ifnet *ifp) 373 { 374 struct ifpoll_info *info; 375 struct netmsg_base nmsg; 376 int error; 377 378 if (ifp->if_npoll == NULL) { 379 /* Device does not support polling */ 380 return EOPNOTSUPP; 381 } 382 383 info = kmalloc(sizeof(*info), M_TEMP, M_WAITOK | M_ZERO); 384 385 /* 386 * Attempt to register. Interlock with IFF_NPOLLING. 387 */ 388 389 ifnet_serialize_all(ifp); 390 391 if (ifp->if_flags & IFF_NPOLLING) { 392 /* Already polling */ 393 ifnet_deserialize_all(ifp); 394 kfree(info, M_TEMP); 395 return EBUSY; 396 } 397 398 info->ifpi_ifp = ifp; 399 400 ifp->if_flags |= IFF_NPOLLING; 401 ifp->if_npoll(ifp, info); 402 403 ifnet_deserialize_all(ifp); 404 405 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 406 0, ifpoll_register_handler); 407 nmsg.lmsg.u.ms_resultp = info; 408 409 error = lwkt_domsg(netisr_cpuport(0), &nmsg.lmsg, 0); 410 if (error) { 411 if (!ifpoll_deregister(ifp)) { 412 if_printf(ifp, "ifpoll_register: " 413 "ifpoll_deregister failed!\n"); 414 } 415 } 416 417 kfree(info, M_TEMP); 418 return error; 419 } 420 421 int 422 ifpoll_deregister(struct ifnet *ifp) 423 { 424 struct netmsg_base nmsg; 425 int error; 426 427 if (ifp->if_npoll == NULL) 428 return EOPNOTSUPP; 429 430 ifnet_serialize_all(ifp); 431 432 if ((ifp->if_flags & IFF_NPOLLING) == 0) { 433 ifnet_deserialize_all(ifp); 434 return EINVAL; 435 } 436 ifp->if_flags &= ~IFF_NPOLLING; 437 438 ifnet_deserialize_all(ifp); 439 440 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 441 0, ifpoll_deregister_handler); 442 nmsg.lmsg.u.ms_resultp = ifp; 443 444 error = lwkt_domsg(netisr_cpuport(0), &nmsg.lmsg, 0); 445 if (!error) { 446 ifnet_serialize_all(ifp); 447 ifp->if_npoll(ifp, NULL); 448 ifnet_deserialize_all(ifp); 449 } 450 return error; 451 } 452 453 static void 454 ifpoll_register_handler(netmsg_t nmsg) 455 { 456 const struct ifpoll_info *info = nmsg->lmsg.u.ms_resultp; 457 int cpuid = mycpuid, nextcpu; 458 int error; 459 460 KKASSERT(cpuid < ncpus2); 461 KKASSERT(&curthread->td_msgport == netisr_cpuport(cpuid)); 462 463 if (cpuid == 0) { 464 error = stpoll_register(info->ifpi_ifp, &info->ifpi_status); 465 if (error) 466 goto failed; 467 } 468 469 error = iopoll_register(info->ifpi_ifp, rxpoll_context[cpuid], 470 &info->ifpi_rx[cpuid]); 471 if (error) 472 goto failed; 473 474 error = iopoll_register(info->ifpi_ifp, txpoll_context[cpuid], 475 &info->ifpi_tx[cpuid]); 476 if (error) 477 goto failed; 478 479 /* Adjust polling frequency, after all registration is done */ 480 poll_comm_adjust_pollhz(poll_common[cpuid]); 481 482 nextcpu = cpuid + 1; 483 if (nextcpu < ncpus2) 484 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nmsg->lmsg); 485 else 486 lwkt_replymsg(&nmsg->lmsg, 0); 487 return; 488 failed: 489 lwkt_replymsg(&nmsg->lmsg, error); 490 } 491 492 static void 493 ifpoll_deregister_handler(netmsg_t nmsg) 494 { 495 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 496 int cpuid = mycpuid, nextcpu; 497 498 KKASSERT(cpuid < ncpus2); 499 KKASSERT(&curthread->td_msgport == netisr_cpuport(cpuid)); 500 501 /* Ignore errors */ 502 if (cpuid == 0) 503 stpoll_deregister(ifp); 504 iopoll_deregister(ifp, rxpoll_context[cpuid]); 505 iopoll_deregister(ifp, txpoll_context[cpuid]); 506 507 /* Adjust polling frequency, after all deregistration is done */ 508 poll_comm_adjust_pollhz(poll_common[cpuid]); 509 510 nextcpu = cpuid + 1; 511 if (nextcpu < ncpus2) 512 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nmsg->lmsg); 513 else 514 lwkt_replymsg(&nmsg->lmsg, 0); 515 } 516 517 static void 518 stpoll_init(void) 519 { 520 struct stpoll_ctx *st_ctx = &stpoll_context; 521 const struct poll_comm *comm = poll_common[0]; 522 523 sysctl_ctx_init(&st_ctx->poll_sysctl_ctx); 524 st_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&st_ctx->poll_sysctl_ctx, 525 SYSCTL_CHILDREN(comm->sysctl_tree), 526 OID_AUTO, "status", CTLFLAG_RD, 0, ""); 527 528 SYSCTL_ADD_UINT(&st_ctx->poll_sysctl_ctx, 529 SYSCTL_CHILDREN(st_ctx->poll_sysctl_tree), 530 OID_AUTO, "handlers", CTLFLAG_RD, 531 &st_ctx->poll_handlers, 0, 532 "Number of registered status poll handlers"); 533 534 netmsg_init(&st_ctx->poll_netmsg, NULL, &netisr_adone_rport, 535 0, stpoll_handler); 536 } 537 538 /* 539 * stpoll_handler is scheduled by sched_stpoll when appropriate, typically 540 * once per polling systimer tick. 541 */ 542 static void 543 stpoll_handler(netmsg_t msg) 544 { 545 struct stpoll_ctx *st_ctx = &stpoll_context; 546 struct thread *td = curthread; 547 int i; 548 549 KKASSERT(&td->td_msgport == netisr_cpuport(0)); 550 551 crit_enter_quick(td); 552 553 /* Reply ASAP */ 554 lwkt_replymsg(&msg->lmsg, 0); 555 556 if (st_ctx->poll_handlers == 0) { 557 crit_exit_quick(td); 558 return; 559 } 560 561 for (i = 0; i < st_ctx->poll_handlers; ++i) { 562 const struct stpoll_rec *rec = &st_ctx->pr[i]; 563 struct ifnet *ifp = rec->ifp; 564 565 if (!lwkt_serialize_try(rec->serializer)) 566 continue; 567 568 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == 569 (IFF_RUNNING | IFF_NPOLLING)) 570 rec->status_func(ifp); 571 572 lwkt_serialize_exit(rec->serializer); 573 } 574 575 crit_exit_quick(td); 576 } 577 578 /* 579 * Hook from status poll systimer. Tries to schedule an status poll. 580 * NOTE: Caller should hold critical section. 581 */ 582 static void 583 stpoll_clock(struct stpoll_ctx *st_ctx) 584 { 585 KKASSERT(mycpuid == 0); 586 587 if (st_ctx->poll_handlers == 0) 588 return; 589 sched_stpoll(st_ctx); 590 } 591 592 static int 593 stpoll_register(struct ifnet *ifp, const struct ifpoll_status *st_rec) 594 { 595 struct stpoll_ctx *st_ctx = &stpoll_context; 596 int error; 597 598 KKASSERT(&curthread->td_msgport == netisr_cpuport(0)); 599 600 if (st_rec->status_func == NULL) 601 return 0; 602 603 /* 604 * Check if there is room. 605 */ 606 if (st_ctx->poll_handlers >= IFPOLL_LIST_LEN) { 607 /* 608 * List full, cannot register more entries. 609 * This should never happen; if it does, it is probably a 610 * broken driver trying to register multiple times. Checking 611 * this at runtime is expensive, and won't solve the problem 612 * anyways, so just report a few times and then give up. 613 */ 614 static int verbose = 10; /* XXX */ 615 616 if (verbose > 0) { 617 kprintf("status poll handlers list full, " 618 "maybe a broken driver ?\n"); 619 verbose--; 620 } 621 error = ENOENT; 622 } else { 623 struct stpoll_rec *rec = &st_ctx->pr[st_ctx->poll_handlers]; 624 625 rec->ifp = ifp; 626 rec->serializer = st_rec->serializer; 627 rec->status_func = st_rec->status_func; 628 629 st_ctx->poll_handlers++; 630 error = 0; 631 } 632 return error; 633 } 634 635 static int 636 stpoll_deregister(struct ifnet *ifp) 637 { 638 struct stpoll_ctx *st_ctx = &stpoll_context; 639 int i, error; 640 641 KKASSERT(&curthread->td_msgport == netisr_cpuport(0)); 642 643 for (i = 0; i < st_ctx->poll_handlers; ++i) { 644 if (st_ctx->pr[i].ifp == ifp) /* Found it */ 645 break; 646 } 647 if (i == st_ctx->poll_handlers) { 648 error = ENOENT; 649 } else { 650 st_ctx->poll_handlers--; 651 if (i < st_ctx->poll_handlers) { 652 /* Last entry replaces this one. */ 653 st_ctx->pr[i] = st_ctx->pr[st_ctx->poll_handlers]; 654 } 655 error = 0; 656 } 657 return error; 658 } 659 660 static __inline void 661 iopoll_reset_state(struct iopoll_ctx *io_ctx) 662 { 663 crit_enter(); 664 io_ctx->poll_burst = io_ctx->poll_each_burst; 665 io_ctx->pending_polls = 0; 666 io_ctx->residual_burst = 0; 667 io_ctx->phase = 0; 668 io_ctx->kern_frac = 0; 669 bzero(&io_ctx->poll_start_t, sizeof(io_ctx->poll_start_t)); 670 bzero(&io_ctx->prev_t, sizeof(io_ctx->prev_t)); 671 crit_exit(); 672 } 673 674 static void 675 iopoll_init(int cpuid) 676 { 677 KKASSERT(cpuid < ncpus2); 678 679 rxpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_RX); 680 txpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_TX); 681 } 682 683 static struct iopoll_ctx * 684 iopoll_ctx_create(int cpuid, int poll_type) 685 { 686 struct poll_comm *comm; 687 struct iopoll_ctx *io_ctx; 688 const char *poll_type_str; 689 netisr_fn_t handler, more_handler; 690 691 KKASSERT(poll_type == IFPOLL_RX || poll_type == IFPOLL_TX); 692 693 /* 694 * Make sure that tunables are in sane state 695 */ 696 if (iopoll_burst_max < MIN_IOPOLL_BURST_MAX) 697 iopoll_burst_max = MIN_IOPOLL_BURST_MAX; 698 else if (iopoll_burst_max > MAX_IOPOLL_BURST_MAX) 699 iopoll_burst_max = MAX_IOPOLL_BURST_MAX; 700 701 if (iopoll_each_burst > iopoll_burst_max) 702 iopoll_each_burst = iopoll_burst_max; 703 704 comm = poll_common[cpuid]; 705 706 /* 707 * Create the per-cpu polling context 708 */ 709 io_ctx = kmalloc_cachealign(sizeof(*io_ctx), M_DEVBUF, 710 M_WAITOK | M_ZERO); 711 712 io_ctx->poll_each_burst = iopoll_each_burst; 713 io_ctx->poll_burst_max = iopoll_burst_max; 714 io_ctx->user_frac = iopoll_user_frac; 715 if (poll_type == IFPOLL_RX) 716 io_ctx->pollhz = comm->pollhz; 717 else 718 io_ctx->pollhz = comm->pollhz / (comm->poll_txfrac + 1); 719 io_ctx->poll_cpuid = cpuid; 720 iopoll_reset_state(io_ctx); 721 722 if (poll_type == IFPOLL_RX) { 723 handler = rxpoll_handler; 724 more_handler = rxpollmore_handler; 725 } else { 726 handler = txpoll_handler; 727 more_handler = txpollmore_handler; 728 } 729 730 netmsg_init(&io_ctx->poll_netmsg, NULL, &netisr_adone_rport, 731 0, handler); 732 io_ctx->poll_netmsg.lmsg.u.ms_resultp = io_ctx; 733 734 netmsg_init(&io_ctx->poll_more_netmsg, NULL, &netisr_adone_rport, 735 0, more_handler); 736 io_ctx->poll_more_netmsg.lmsg.u.ms_resultp = io_ctx; 737 738 /* 739 * Initialize per-cpu sysctl nodes 740 */ 741 if (poll_type == IFPOLL_RX) 742 poll_type_str = "rx"; 743 else 744 poll_type_str = "tx"; 745 746 sysctl_ctx_init(&io_ctx->poll_sysctl_ctx); 747 io_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&io_ctx->poll_sysctl_ctx, 748 SYSCTL_CHILDREN(comm->sysctl_tree), 749 OID_AUTO, poll_type_str, CTLFLAG_RD, 0, ""); 750 iopoll_add_sysctl(&io_ctx->poll_sysctl_ctx, 751 SYSCTL_CHILDREN(io_ctx->poll_sysctl_tree), io_ctx, poll_type); 752 753 return io_ctx; 754 } 755 756 /* 757 * Hook from iopoll systimer. Tries to schedule an iopoll, but keeps 758 * track of lost ticks due to the previous handler taking too long. 759 * Normally, this should not happen, because polling handler should 760 * run for a short time. However, in some cases (e.g. when there are 761 * changes in link status etc.) the drivers take a very long time 762 * (even in the order of milliseconds) to reset and reconfigure the 763 * device, causing apparent lost polls. 764 * 765 * The first part of the code is just for debugging purposes, and tries 766 * to count how often hardclock ticks are shorter than they should, 767 * meaning either stray interrupts or delayed events. 768 * 769 * WARNING! called from fastint or IPI, the MP lock might not be held. 770 * NOTE: Caller should hold critical section. 771 */ 772 static void 773 iopoll_clock(struct iopoll_ctx *io_ctx) 774 { 775 union ifpoll_time t; 776 int delta; 777 778 KKASSERT(mycpuid == io_ctx->poll_cpuid); 779 780 if (io_ctx->poll_handlers == 0) 781 return; 782 783 logpoll(ioclock_start); 784 785 ifpoll_time_get(&t); 786 delta = ifpoll_time_diff(&io_ctx->prev_t, &t); 787 if (delta * io_ctx->pollhz < 500000) 788 io_ctx->short_ticks++; 789 else 790 io_ctx->prev_t = t; 791 792 if (io_ctx->pending_polls > 100) { 793 /* 794 * Too much, assume it has stalled (not always true 795 * see comment above). 796 */ 797 io_ctx->stalled++; 798 io_ctx->pending_polls = 0; 799 io_ctx->phase = 0; 800 } 801 802 if (io_ctx->phase <= 2) { 803 if (io_ctx->phase != 0) 804 io_ctx->suspect++; 805 io_ctx->phase = 1; 806 sched_iopoll(io_ctx); 807 io_ctx->phase = 2; 808 } 809 if (io_ctx->pending_polls++ > 0) 810 io_ctx->lost_polls++; 811 812 logpoll(ioclock_end); 813 } 814 815 /* 816 * rxpoll_handler and txpoll_handler are scheduled by sched_iopoll when 817 * appropriate, typically once per polling systimer tick. 818 * 819 * Note that the message is replied immediately in order to allow a new 820 * ISR to be scheduled in the handler. 821 */ 822 static void 823 rxpoll_handler(netmsg_t msg) 824 { 825 struct iopoll_ctx *io_ctx; 826 struct thread *td = curthread; 827 int i, cycles; 828 829 logpoll(rx_start); 830 831 io_ctx = msg->lmsg.u.ms_resultp; 832 KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 833 834 crit_enter_quick(td); 835 836 /* Reply ASAP */ 837 lwkt_replymsg(&msg->lmsg, 0); 838 839 if (io_ctx->poll_handlers == 0) { 840 crit_exit_quick(td); 841 logpoll(rx_end); 842 return; 843 } 844 845 io_ctx->phase = 3; 846 if (io_ctx->residual_burst == 0) { 847 /* First call in this tick */ 848 ifpoll_time_get(&io_ctx->poll_start_t); 849 io_ctx->residual_burst = io_ctx->poll_burst; 850 } 851 cycles = (io_ctx->residual_burst < io_ctx->poll_each_burst) ? 852 io_ctx->residual_burst : io_ctx->poll_each_burst; 853 io_ctx->residual_burst -= cycles; 854 855 for (i = 0; i < io_ctx->poll_handlers; i++) { 856 const struct iopoll_rec *rec = &io_ctx->pr[i]; 857 struct ifnet *ifp = rec->ifp; 858 859 if (!lwkt_serialize_try(rec->serializer)) 860 continue; 861 862 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == 863 (IFF_RUNNING | IFF_NPOLLING)) 864 rec->poll_func(ifp, rec->arg, cycles); 865 866 lwkt_serialize_exit(rec->serializer); 867 } 868 869 /* 870 * Do a quick exit/enter to catch any higher-priority 871 * interrupt sources. 872 */ 873 crit_exit_quick(td); 874 crit_enter_quick(td); 875 876 sched_iopollmore(io_ctx); 877 io_ctx->phase = 4; 878 879 crit_exit_quick(td); 880 881 logpoll(rx_end); 882 } 883 884 static void 885 txpoll_handler(netmsg_t msg) 886 { 887 struct iopoll_ctx *io_ctx; 888 struct thread *td = curthread; 889 int i; 890 891 logpoll(tx_start); 892 893 io_ctx = msg->lmsg.u.ms_resultp; 894 KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 895 896 crit_enter_quick(td); 897 898 /* Reply ASAP */ 899 lwkt_replymsg(&msg->lmsg, 0); 900 901 if (io_ctx->poll_handlers == 0) { 902 crit_exit_quick(td); 903 logpoll(tx_end); 904 return; 905 } 906 907 io_ctx->phase = 3; 908 909 for (i = 0; i < io_ctx->poll_handlers; i++) { 910 const struct iopoll_rec *rec = &io_ctx->pr[i]; 911 struct ifnet *ifp = rec->ifp; 912 913 if (!lwkt_serialize_try(rec->serializer)) 914 continue; 915 916 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == 917 (IFF_RUNNING | IFF_NPOLLING)) 918 rec->poll_func(ifp, rec->arg, -1); 919 920 lwkt_serialize_exit(rec->serializer); 921 } 922 923 /* 924 * Do a quick exit/enter to catch any higher-priority 925 * interrupt sources. 926 */ 927 crit_exit_quick(td); 928 crit_enter_quick(td); 929 930 sched_iopollmore(io_ctx); 931 io_ctx->phase = 4; 932 933 crit_exit_quick(td); 934 935 logpoll(tx_end); 936 } 937 938 /* 939 * rxpollmore_handler and txpollmore_handler are called after other netisr's, 940 * possibly scheduling another rxpoll_handler or txpoll_handler call, or 941 * adapting the burst size for the next cycle. 942 * 943 * It is very bad to fetch large bursts of packets from a single card at once, 944 * because the burst could take a long time to be completely processed leading 945 * to unfairness. To reduce the problem, and also to account better for time 946 * spent in network-related processing, we split the burst in smaller chunks 947 * of fixed size, giving control to the other netisr's between chunks. This 948 * helps in improving the fairness, reducing livelock and accounting for the 949 * work performed in low level handling. 950 */ 951 static void 952 rxpollmore_handler(netmsg_t msg) 953 { 954 struct thread *td = curthread; 955 struct iopoll_ctx *io_ctx; 956 union ifpoll_time t; 957 int kern_load; 958 uint32_t pending_polls; 959 960 logpoll(rx_mstart); 961 962 io_ctx = msg->lmsg.u.ms_resultp; 963 KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 964 965 crit_enter_quick(td); 966 967 /* Replay ASAP */ 968 lwkt_replymsg(&msg->lmsg, 0); 969 970 if (io_ctx->poll_handlers == 0) { 971 crit_exit_quick(td); 972 logpoll(rx_mend); 973 return; 974 } 975 976 io_ctx->phase = 5; 977 if (io_ctx->residual_burst > 0) { 978 sched_iopoll(io_ctx); 979 crit_exit_quick(td); 980 /* Will run immediately on return, followed by netisrs */ 981 logpoll(rx_mend); 982 return; 983 } 984 985 /* Here we can account time spent in iopoll's in this tick */ 986 ifpoll_time_get(&t); 987 kern_load = ifpoll_time_diff(&io_ctx->poll_start_t, &t); 988 kern_load = (kern_load * io_ctx->pollhz) / 10000; /* 0..100 */ 989 io_ctx->kern_frac = kern_load; 990 991 if (kern_load > (100 - io_ctx->user_frac)) { 992 /* Try decrease ticks */ 993 if (io_ctx->poll_burst > 1) 994 io_ctx->poll_burst--; 995 } else { 996 if (io_ctx->poll_burst < io_ctx->poll_burst_max) 997 io_ctx->poll_burst++; 998 } 999 1000 io_ctx->pending_polls--; 1001 pending_polls = io_ctx->pending_polls; 1002 1003 if (pending_polls == 0) { 1004 /* We are done */ 1005 io_ctx->phase = 0; 1006 } else { 1007 /* 1008 * Last cycle was long and caused us to miss one or more 1009 * hardclock ticks. Restart processing again, but slightly 1010 * reduce the burst size to prevent that this happens again. 1011 */ 1012 io_ctx->poll_burst -= (io_ctx->poll_burst / 8); 1013 if (io_ctx->poll_burst < 1) 1014 io_ctx->poll_burst = 1; 1015 sched_iopoll(io_ctx); 1016 io_ctx->phase = 6; 1017 } 1018 1019 crit_exit_quick(td); 1020 1021 logpoll(rx_mend); 1022 } 1023 1024 static void 1025 txpollmore_handler(netmsg_t msg) 1026 { 1027 struct thread *td = curthread; 1028 struct iopoll_ctx *io_ctx; 1029 uint32_t pending_polls; 1030 1031 logpoll(tx_mstart); 1032 1033 io_ctx = msg->lmsg.u.ms_resultp; 1034 KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1035 1036 crit_enter_quick(td); 1037 1038 /* Replay ASAP */ 1039 lwkt_replymsg(&msg->lmsg, 0); 1040 1041 if (io_ctx->poll_handlers == 0) { 1042 crit_exit_quick(td); 1043 logpoll(tx_mend); 1044 return; 1045 } 1046 1047 io_ctx->phase = 5; 1048 1049 io_ctx->pending_polls--; 1050 pending_polls = io_ctx->pending_polls; 1051 1052 if (pending_polls == 0) { 1053 /* We are done */ 1054 io_ctx->phase = 0; 1055 } else { 1056 /* 1057 * Last cycle was long and caused us to miss one or more 1058 * hardclock ticks. Restart processing again. 1059 */ 1060 sched_iopoll(io_ctx); 1061 io_ctx->phase = 6; 1062 } 1063 1064 crit_exit_quick(td); 1065 1066 logpoll(tx_mend); 1067 } 1068 1069 static void 1070 iopoll_add_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent, 1071 struct iopoll_ctx *io_ctx, int poll_type) 1072 { 1073 if (poll_type == IFPOLL_RX) { 1074 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "burst_max", 1075 CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_burstmax, 1076 "IU", "Max Polling burst size"); 1077 1078 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "each_burst", 1079 CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_eachburst, 1080 "IU", "Max size of each burst"); 1081 1082 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "burst", CTLFLAG_RD, 1083 &io_ctx->poll_burst, 0, "Current polling burst size"); 1084 1085 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "user_frac", CTLFLAG_RW, 1086 &io_ctx->user_frac, 0, "Desired user fraction of cpu time"); 1087 1088 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "kern_frac", CTLFLAG_RD, 1089 &io_ctx->kern_frac, 0, "Kernel fraction of cpu time"); 1090 1091 SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "residual_burst", CTLFLAG_RD, 1092 &io_ctx->residual_burst, 0, 1093 "# of residual cycles in burst"); 1094 } 1095 1096 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "phase", CTLFLAG_RD, 1097 &io_ctx->phase, 0, "Polling phase"); 1098 1099 SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "suspect", CTLFLAG_RW, 1100 &io_ctx->suspect, "Suspected events"); 1101 1102 SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "stalled", CTLFLAG_RW, 1103 &io_ctx->stalled, "Potential stalls"); 1104 1105 SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "short_ticks", CTLFLAG_RW, 1106 &io_ctx->short_ticks, 1107 "Hardclock ticks shorter than they should be"); 1108 1109 SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "lost_polls", CTLFLAG_RW, 1110 &io_ctx->lost_polls, 1111 "How many times we would have lost a poll tick"); 1112 1113 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "pending_polls", CTLFLAG_RD, 1114 &io_ctx->pending_polls, 0, "Do we need to poll again"); 1115 1116 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "handlers", CTLFLAG_RD, 1117 &io_ctx->poll_handlers, 0, "Number of registered poll handlers"); 1118 } 1119 1120 static void 1121 sysctl_burstmax_handler(netmsg_t nmsg) 1122 { 1123 struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg; 1124 struct iopoll_ctx *io_ctx; 1125 1126 io_ctx = msg->ctx; 1127 KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1128 1129 io_ctx->poll_burst_max = nmsg->lmsg.u.ms_result; 1130 if (io_ctx->poll_each_burst > io_ctx->poll_burst_max) 1131 io_ctx->poll_each_burst = io_ctx->poll_burst_max; 1132 if (io_ctx->poll_burst > io_ctx->poll_burst_max) 1133 io_ctx->poll_burst = io_ctx->poll_burst_max; 1134 if (io_ctx->residual_burst > io_ctx->poll_burst_max) 1135 io_ctx->residual_burst = io_ctx->poll_burst_max; 1136 1137 lwkt_replymsg(&nmsg->lmsg, 0); 1138 } 1139 1140 static int 1141 sysctl_burstmax(SYSCTL_HANDLER_ARGS) 1142 { 1143 struct iopoll_ctx *io_ctx = arg1; 1144 struct iopoll_sysctl_netmsg msg; 1145 uint32_t burst_max; 1146 int error; 1147 1148 burst_max = io_ctx->poll_burst_max; 1149 error = sysctl_handle_int(oidp, &burst_max, 0, req); 1150 if (error || req->newptr == NULL) 1151 return error; 1152 if (burst_max < MIN_IOPOLL_BURST_MAX) 1153 burst_max = MIN_IOPOLL_BURST_MAX; 1154 else if (burst_max > MAX_IOPOLL_BURST_MAX) 1155 burst_max = MAX_IOPOLL_BURST_MAX; 1156 1157 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 1158 0, sysctl_burstmax_handler); 1159 msg.base.lmsg.u.ms_result = burst_max; 1160 msg.ctx = io_ctx; 1161 1162 return lwkt_domsg(netisr_cpuport(io_ctx->poll_cpuid), 1163 &msg.base.lmsg, 0); 1164 } 1165 1166 static void 1167 sysctl_eachburst_handler(netmsg_t nmsg) 1168 { 1169 struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg; 1170 struct iopoll_ctx *io_ctx; 1171 uint32_t each_burst; 1172 1173 io_ctx = msg->ctx; 1174 KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1175 1176 each_burst = nmsg->lmsg.u.ms_result; 1177 if (each_burst > io_ctx->poll_burst_max) 1178 each_burst = io_ctx->poll_burst_max; 1179 else if (each_burst < 1) 1180 each_burst = 1; 1181 io_ctx->poll_each_burst = each_burst; 1182 1183 lwkt_replymsg(&nmsg->lmsg, 0); 1184 } 1185 1186 static int 1187 sysctl_eachburst(SYSCTL_HANDLER_ARGS) 1188 { 1189 struct iopoll_ctx *io_ctx = arg1; 1190 struct iopoll_sysctl_netmsg msg; 1191 uint32_t each_burst; 1192 int error; 1193 1194 each_burst = io_ctx->poll_each_burst; 1195 error = sysctl_handle_int(oidp, &each_burst, 0, req); 1196 if (error || req->newptr == NULL) 1197 return error; 1198 1199 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 1200 0, sysctl_eachburst_handler); 1201 msg.base.lmsg.u.ms_result = each_burst; 1202 msg.ctx = io_ctx; 1203 1204 return lwkt_domsg(netisr_cpuport(io_ctx->poll_cpuid), 1205 &msg.base.lmsg, 0); 1206 } 1207 1208 static int 1209 iopoll_register(struct ifnet *ifp, struct iopoll_ctx *io_ctx, 1210 const struct ifpoll_io *io_rec) 1211 { 1212 int error; 1213 1214 KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1215 1216 if (io_rec->poll_func == NULL) 1217 return 0; 1218 1219 /* 1220 * Check if there is room. 1221 */ 1222 if (io_ctx->poll_handlers >= IFPOLL_LIST_LEN) { 1223 /* 1224 * List full, cannot register more entries. 1225 * This should never happen; if it does, it is probably a 1226 * broken driver trying to register multiple times. Checking 1227 * this at runtime is expensive, and won't solve the problem 1228 * anyways, so just report a few times and then give up. 1229 */ 1230 static int verbose = 10; /* XXX */ 1231 if (verbose > 0) { 1232 kprintf("io poll handlers list full, " 1233 "maybe a broken driver ?\n"); 1234 verbose--; 1235 } 1236 error = ENOENT; 1237 } else { 1238 struct iopoll_rec *rec = &io_ctx->pr[io_ctx->poll_handlers]; 1239 1240 rec->ifp = ifp; 1241 rec->serializer = io_rec->serializer; 1242 rec->arg = io_rec->arg; 1243 rec->poll_func = io_rec->poll_func; 1244 1245 io_ctx->poll_handlers++; 1246 error = 0; 1247 } 1248 return error; 1249 } 1250 1251 static int 1252 iopoll_deregister(struct ifnet *ifp, struct iopoll_ctx *io_ctx) 1253 { 1254 int i, error; 1255 1256 KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1257 1258 for (i = 0; i < io_ctx->poll_handlers; ++i) { 1259 if (io_ctx->pr[i].ifp == ifp) /* Found it */ 1260 break; 1261 } 1262 if (i == io_ctx->poll_handlers) { 1263 error = ENOENT; 1264 } else { 1265 io_ctx->poll_handlers--; 1266 if (i < io_ctx->poll_handlers) { 1267 /* Last entry replaces this one. */ 1268 io_ctx->pr[i] = io_ctx->pr[io_ctx->poll_handlers]; 1269 } 1270 1271 if (io_ctx->poll_handlers == 0) 1272 iopoll_reset_state(io_ctx); 1273 error = 0; 1274 } 1275 return error; 1276 } 1277 1278 static void 1279 poll_comm_init(int cpuid) 1280 { 1281 struct poll_comm *comm; 1282 char cpuid_str[16]; 1283 1284 comm = kmalloc_cachealign(sizeof(*comm), M_DEVBUF, M_WAITOK | M_ZERO); 1285 1286 if (ifpoll_stfrac < 1) 1287 ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT; 1288 if (ifpoll_txfrac < 1) 1289 ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT; 1290 1291 comm->poll_cpuid = cpuid; 1292 comm->pollhz = poll_comm_pollhz_div(comm, ifpoll_pollhz); 1293 comm->poll_stfrac = ifpoll_stfrac - 1; 1294 comm->poll_txfrac = ifpoll_txfrac - 1; 1295 1296 ksnprintf(cpuid_str, sizeof(cpuid_str), "%d", cpuid); 1297 1298 sysctl_ctx_init(&comm->sysctl_ctx); 1299 comm->sysctl_tree = SYSCTL_ADD_NODE(&comm->sysctl_ctx, 1300 SYSCTL_STATIC_CHILDREN(_net_ifpoll), 1301 OID_AUTO, cpuid_str, CTLFLAG_RD, 0, ""); 1302 1303 SYSCTL_ADD_PROC(&comm->sysctl_ctx, SYSCTL_CHILDREN(comm->sysctl_tree), 1304 OID_AUTO, "pollhz", CTLTYPE_INT | CTLFLAG_RW, 1305 comm, 0, sysctl_pollhz, 1306 "I", "Device polling frequency"); 1307 1308 if (cpuid == 0) { 1309 SYSCTL_ADD_PROC(&comm->sysctl_ctx, 1310 SYSCTL_CHILDREN(comm->sysctl_tree), 1311 OID_AUTO, "status_frac", 1312 CTLTYPE_INT | CTLFLAG_RW, 1313 comm, 0, sysctl_stfrac, 1314 "I", "# of cycles before status is polled"); 1315 } 1316 SYSCTL_ADD_PROC(&comm->sysctl_ctx, SYSCTL_CHILDREN(comm->sysctl_tree), 1317 OID_AUTO, "tx_frac", CTLTYPE_INT | CTLFLAG_RW, 1318 comm, 0, sysctl_txfrac, 1319 "I", "# of cycles before TX is polled"); 1320 1321 poll_common[cpuid] = comm; 1322 } 1323 1324 static void 1325 poll_comm_start(int cpuid) 1326 { 1327 struct poll_comm *comm = poll_common[cpuid]; 1328 systimer_func_t func; 1329 1330 /* 1331 * Initialize systimer 1332 */ 1333 if (cpuid == 0) 1334 func = poll_comm_systimer0; 1335 else 1336 func = poll_comm_systimer; 1337 systimer_init_periodic_nq(&comm->pollclock, func, comm, 1); 1338 } 1339 1340 static void 1341 _poll_comm_systimer(struct poll_comm *comm) 1342 { 1343 iopoll_clock(rxpoll_context[comm->poll_cpuid]); 1344 if (comm->txfrac_count-- == 0) { 1345 comm->txfrac_count = comm->poll_txfrac; 1346 iopoll_clock(txpoll_context[comm->poll_cpuid]); 1347 } 1348 } 1349 1350 static void 1351 poll_comm_systimer0(systimer_t info, int in_ipi __unused, 1352 struct intrframe *frame __unused) 1353 { 1354 struct poll_comm *comm = info->data; 1355 globaldata_t gd = mycpu; 1356 1357 KKASSERT(comm->poll_cpuid == gd->gd_cpuid && gd->gd_cpuid == 0); 1358 1359 crit_enter_gd(gd); 1360 1361 if (comm->stfrac_count-- == 0) { 1362 comm->stfrac_count = comm->poll_stfrac; 1363 stpoll_clock(&stpoll_context); 1364 } 1365 _poll_comm_systimer(comm); 1366 1367 crit_exit_gd(gd); 1368 } 1369 1370 static void 1371 poll_comm_systimer(systimer_t info, int in_ipi __unused, 1372 struct intrframe *frame __unused) 1373 { 1374 struct poll_comm *comm = info->data; 1375 globaldata_t gd = mycpu; 1376 1377 KKASSERT(comm->poll_cpuid == gd->gd_cpuid && gd->gd_cpuid != 0); 1378 1379 crit_enter_gd(gd); 1380 _poll_comm_systimer(comm); 1381 crit_exit_gd(gd); 1382 } 1383 1384 static void 1385 poll_comm_adjust_pollhz(struct poll_comm *comm) 1386 { 1387 uint32_t handlers; 1388 int pollhz = 1; 1389 1390 KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid)); 1391 1392 /* 1393 * If there is no polling handler registered, set systimer 1394 * frequency to the lowest value. Polling systimer frequency 1395 * will be adjusted to the requested value, once there are 1396 * registered handlers. 1397 */ 1398 handlers = rxpoll_context[mycpuid]->poll_handlers + 1399 txpoll_context[mycpuid]->poll_handlers; 1400 if (comm->poll_cpuid == 0) 1401 handlers += stpoll_context.poll_handlers; 1402 if (handlers) 1403 pollhz = comm->pollhz; 1404 systimer_adjust_periodic(&comm->pollclock, pollhz); 1405 } 1406 1407 static int 1408 sysctl_pollhz(SYSCTL_HANDLER_ARGS) 1409 { 1410 struct poll_comm *comm = arg1; 1411 struct netmsg_base nmsg; 1412 int error, phz; 1413 1414 phz = poll_comm_pollhz_conv(comm, comm->pollhz); 1415 error = sysctl_handle_int(oidp, &phz, 0, req); 1416 if (error || req->newptr == NULL) 1417 return error; 1418 if (phz <= 0) 1419 return EINVAL; 1420 else if (phz > IFPOLL_FREQ_MAX) 1421 phz = IFPOLL_FREQ_MAX; 1422 1423 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 1424 0, sysctl_pollhz_handler); 1425 nmsg.lmsg.u.ms_result = phz; 1426 1427 return lwkt_domsg(netisr_cpuport(comm->poll_cpuid), &nmsg.lmsg, 0); 1428 } 1429 1430 static void 1431 sysctl_pollhz_handler(netmsg_t nmsg) 1432 { 1433 struct poll_comm *comm = poll_common[mycpuid]; 1434 1435 KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid)); 1436 1437 /* Save polling frequency */ 1438 comm->pollhz = poll_comm_pollhz_div(comm, nmsg->lmsg.u.ms_result); 1439 1440 /* 1441 * Adjust cached pollhz 1442 */ 1443 rxpoll_context[mycpuid]->pollhz = comm->pollhz; 1444 txpoll_context[mycpuid]->pollhz = 1445 comm->pollhz / (comm->poll_txfrac + 1); 1446 1447 /* 1448 * Adjust polling frequency 1449 */ 1450 poll_comm_adjust_pollhz(comm); 1451 1452 lwkt_replymsg(&nmsg->lmsg, 0); 1453 } 1454 1455 static int 1456 sysctl_stfrac(SYSCTL_HANDLER_ARGS) 1457 { 1458 struct poll_comm *comm = arg1; 1459 struct netmsg_base nmsg; 1460 int error, stfrac; 1461 1462 KKASSERT(comm->poll_cpuid == 0); 1463 1464 stfrac = comm->poll_stfrac + 1; 1465 error = sysctl_handle_int(oidp, &stfrac, 0, req); 1466 if (error || req->newptr == NULL) 1467 return error; 1468 if (stfrac < 1) 1469 return EINVAL; 1470 1471 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 1472 0, sysctl_stfrac_handler); 1473 nmsg.lmsg.u.ms_result = stfrac - 1; 1474 1475 return lwkt_domsg(netisr_cpuport(comm->poll_cpuid), &nmsg.lmsg, 0); 1476 } 1477 1478 static void 1479 sysctl_stfrac_handler(netmsg_t nmsg) 1480 { 1481 struct poll_comm *comm = poll_common[mycpuid]; 1482 int stfrac = nmsg->lmsg.u.ms_result; 1483 1484 KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid)); 1485 1486 crit_enter(); 1487 comm->poll_stfrac = stfrac; 1488 if (comm->stfrac_count > comm->poll_stfrac) 1489 comm->stfrac_count = comm->poll_stfrac; 1490 crit_exit(); 1491 1492 lwkt_replymsg(&nmsg->lmsg, 0); 1493 } 1494 1495 static int 1496 sysctl_txfrac(SYSCTL_HANDLER_ARGS) 1497 { 1498 struct poll_comm *comm = arg1; 1499 struct netmsg_base nmsg; 1500 int error, txfrac; 1501 1502 txfrac = comm->poll_txfrac + 1; 1503 error = sysctl_handle_int(oidp, &txfrac, 0, req); 1504 if (error || req->newptr == NULL) 1505 return error; 1506 if (txfrac < 1) 1507 return EINVAL; 1508 1509 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 1510 0, sysctl_txfrac_handler); 1511 nmsg.lmsg.u.ms_result = txfrac - 1; 1512 1513 return lwkt_domsg(netisr_cpuport(comm->poll_cpuid), &nmsg.lmsg, 0); 1514 } 1515 1516 static void 1517 sysctl_txfrac_handler(netmsg_t nmsg) 1518 { 1519 struct poll_comm *comm = poll_common[mycpuid]; 1520 int txfrac = nmsg->lmsg.u.ms_result; 1521 1522 KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid)); 1523 1524 crit_enter(); 1525 comm->poll_txfrac = txfrac; 1526 if (comm->txfrac_count > comm->poll_txfrac) 1527 comm->txfrac_count = comm->poll_txfrac; 1528 crit_exit(); 1529 1530 lwkt_replymsg(&nmsg->lmsg, 0); 1531 } 1532 1533 void 1534 ifpoll_compat_setup(struct ifpoll_compat *cp, 1535 struct sysctl_ctx_list *sysctl_ctx, 1536 struct sysctl_oid *sysctl_tree, 1537 int unit, struct lwkt_serialize *slz) 1538 { 1539 cp->ifpc_stcount = 0; 1540 cp->ifpc_stfrac = ((poll_common[0]->poll_stfrac + 1) * 1541 howmany(IOPOLL_BURST_MAX, IOPOLL_EACH_BURST)) - 1; 1542 1543 cp->ifpc_cpuid = unit % ncpus2; 1544 cp->ifpc_serializer = slz; 1545 1546 if (sysctl_ctx != NULL && sysctl_tree != NULL) { 1547 SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1548 OID_AUTO, "npoll_stfrac", CTLTYPE_INT | CTLFLAG_RW, 1549 cp, 0, sysctl_compat_npoll_stfrac, "I", 1550 "polling status frac"); 1551 SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1552 OID_AUTO, "npoll_cpuid", CTLTYPE_INT | CTLFLAG_RW, 1553 cp, 0, sysctl_compat_npoll_cpuid, "I", 1554 "polling cpuid"); 1555 } 1556 } 1557 1558 static int 1559 sysctl_compat_npoll_stfrac(SYSCTL_HANDLER_ARGS) 1560 { 1561 struct ifpoll_compat *cp = arg1; 1562 int error = 0, stfrac; 1563 1564 lwkt_serialize_enter(cp->ifpc_serializer); 1565 1566 stfrac = cp->ifpc_stfrac + 1; 1567 error = sysctl_handle_int(oidp, &stfrac, 0, req); 1568 if (!error && req->newptr != NULL) { 1569 if (stfrac < 1) { 1570 error = EINVAL; 1571 } else { 1572 cp->ifpc_stfrac = stfrac - 1; 1573 if (cp->ifpc_stcount > cp->ifpc_stfrac) 1574 cp->ifpc_stcount = cp->ifpc_stfrac; 1575 } 1576 } 1577 1578 lwkt_serialize_exit(cp->ifpc_serializer); 1579 return error; 1580 } 1581 1582 static int 1583 sysctl_compat_npoll_cpuid(SYSCTL_HANDLER_ARGS) 1584 { 1585 struct ifpoll_compat *cp = arg1; 1586 int error = 0, cpuid; 1587 1588 lwkt_serialize_enter(cp->ifpc_serializer); 1589 1590 cpuid = cp->ifpc_cpuid; 1591 error = sysctl_handle_int(oidp, &cpuid, 0, req); 1592 if (!error && req->newptr != NULL) { 1593 if (cpuid < 0 || cpuid >= ncpus2) 1594 error = EINVAL; 1595 else 1596 cp->ifpc_cpuid = cpuid; 1597 } 1598 1599 lwkt_serialize_exit(cp->ifpc_serializer); 1600 return error; 1601 } 1602