1 /*- 2 * Copyright (c) 2001-2002 Luigi Rizzo 3 * 4 * Supported by: the Xorp Project (www.xorp.org) 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $ 28 */ 29 30 #include "opt_ifpoll.h" 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/ktr.h> 35 #include <sys/malloc.h> 36 #include <sys/serialize.h> 37 #include <sys/socket.h> 38 #include <sys/sysctl.h> 39 #include <sys/microtime_pcpu.h> 40 41 #include <sys/thread2.h> 42 #include <sys/msgport2.h> 43 44 #include <net/if.h> 45 #include <net/if_poll.h> 46 #include <net/netmsg2.h> 47 #include <net/netisr2.h> 48 49 /* 50 * Polling support for network device drivers. 51 * 52 * Drivers which support this feature try to register one status polling 53 * handler and several TX/RX polling handlers with the polling code. 54 * If interface's if_npoll is called with non-NULL second argument, then 55 * a register operation is requested, else a deregister operation is 56 * requested. If the requested operation is "register", driver should 57 * setup the ifpoll_info passed in accoding its own needs: 58 * ifpoll_info.ifpi_status.status_func == NULL 59 * No status polling handler will be installed on CPU(0) 60 * ifpoll_info.ifpi_rx[n].poll_func == NULL 61 * No RX polling handler will be installed on CPU(n) 62 * ifpoll_info.ifpi_tx[n].poll_func == NULL 63 * No TX polling handler will be installed on CPU(n) 64 * 65 * RX is polled at the specified polling frequency (net.ifpoll.X.pollhz). 66 * TX and status polling could be done at lower frequency than RX frequency 67 * (net.ifpoll.0.status_frac and net.ifpoll.X.tx_frac). To avoid systimer 68 * staggering at high frequency, RX systimer gives TX and status polling a 69 * piggyback (XXX). 70 * 71 * All of the registered polling handlers are called only if the interface 72 * is marked as 'IFF_RUNNING and IFF_NPOLLING'. However, the interface's 73 * register and deregister function (ifnet.if_npoll) will be called even 74 * if interface is not marked with 'IFF_RUNNING'. 75 * 76 * If registration is successful, the driver must disable interrupts, 77 * and further I/O is performed through the TX/RX polling handler, which 78 * are invoked (at least once per clock tick) with 3 arguments: the "arg" 79 * passed at register time, a struct ifnet pointer, and a "count" limit. 80 * The registered serializer will be held before calling the related 81 * polling handler. 82 * 83 * The count limit specifies how much work the handler can do during the 84 * call -- typically this is the number of packets to be received, or 85 * transmitted, etc. (drivers are free to interpret this number, as long 86 * as the max time spent in the function grows roughly linearly with the 87 * count). 88 * 89 * A second variable controls the sharing of CPU between polling/kernel 90 * network processing, and other activities (typically userlevel tasks): 91 * net.ifpoll.X.{rx,tx}.user_frac (between 0 and 100, default 50) sets the 92 * share of CPU allocated to user tasks. CPU is allocated proportionally 93 * to the shares, by dynamically adjusting the "count" (poll_burst). 94 * 95 * Other parameters can should be left to their default values. 96 * The following constraints hold 97 * 98 * 1 <= poll_burst <= poll_burst_max 99 * 1 <= poll_each_burst <= poll_burst_max 100 * MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX 101 */ 102 103 #define IFPOLL_LIST_LEN 128 104 #define IFPOLL_FREQ_MAX 30000 105 106 #define MIN_IOPOLL_BURST_MAX 10 107 #define MAX_IOPOLL_BURST_MAX 5000 108 #define IOPOLL_BURST_MAX 250 /* good for 1000Mbit net and HZ=6000 */ 109 110 #define IOPOLL_EACH_BURST 50 111 #define IOPOLL_USER_FRAC 50 112 113 #define IFPOLL_FREQ_DEFAULT 6000 114 115 #define IFPOLL_TXFRAC_DEFAULT 1 /* 1/1 of the pollhz */ 116 #define IFPOLL_STFRAC_DEFAULT 120 /* 1/120 of the pollhz */ 117 118 #define IFPOLL_RX 0x1 119 #define IFPOLL_TX 0x2 120 121 struct iopoll_rec { 122 struct lwkt_serialize *serializer; 123 struct ifnet *ifp; 124 void *arg; 125 ifpoll_iofn_t poll_func; 126 }; 127 128 struct iopoll_ctx { 129 union microtime_pcpu prev_t; 130 u_long short_ticks; /* statistics */ 131 u_long lost_polls; /* statistics */ 132 u_long suspect; /* statistics */ 133 u_long stalled; /* statistics */ 134 uint32_t pending_polls; /* state */ 135 136 struct netmsg_base poll_netmsg; 137 struct netmsg_base poll_more_netmsg; 138 139 int poll_cpuid; 140 int pollhz; 141 uint32_t phase; /* state */ 142 int residual_burst; /* state */ 143 uint32_t poll_each_burst; /* tunable */ 144 union microtime_pcpu poll_start_t; /* state */ 145 146 uint32_t poll_burst; /* state */ 147 uint32_t poll_burst_max; /* tunable */ 148 uint32_t user_frac; /* tunable */ 149 uint32_t kern_frac; /* state */ 150 151 uint32_t poll_handlers; /* next free entry in pr[]. */ 152 struct iopoll_rec pr[IFPOLL_LIST_LEN]; 153 154 struct sysctl_ctx_list poll_sysctl_ctx; 155 struct sysctl_oid *poll_sysctl_tree; 156 } __cachealign; 157 158 struct poll_comm { 159 struct systimer pollclock; 160 int poll_cpuid; 161 162 int stfrac_count; /* state */ 163 int poll_stfrac; /* tunable */ 164 165 int txfrac_count; /* state */ 166 int poll_txfrac; /* tunable */ 167 168 int pollhz; /* tunable */ 169 170 struct sysctl_ctx_list sysctl_ctx; 171 struct sysctl_oid *sysctl_tree; 172 } __cachealign; 173 174 struct stpoll_rec { 175 struct lwkt_serialize *serializer; 176 struct ifnet *ifp; 177 ifpoll_stfn_t status_func; 178 }; 179 180 struct stpoll_ctx { 181 struct netmsg_base poll_netmsg; 182 183 uint32_t poll_handlers; /* next free entry in pr[]. */ 184 struct stpoll_rec pr[IFPOLL_LIST_LEN]; 185 186 struct sysctl_ctx_list poll_sysctl_ctx; 187 struct sysctl_oid *poll_sysctl_tree; 188 } __cachealign; 189 190 struct iopoll_sysctl_netmsg { 191 struct netmsg_base base; 192 struct iopoll_ctx *ctx; 193 }; 194 195 void ifpoll_init_pcpu(int); 196 static void ifpoll_register_handler(netmsg_t); 197 static void ifpoll_deregister_handler(netmsg_t); 198 199 /* 200 * Status polling 201 */ 202 static void stpoll_init(void); 203 static void stpoll_handler(netmsg_t); 204 static void stpoll_clock(struct stpoll_ctx *); 205 static int stpoll_register(struct ifnet *, const struct ifpoll_status *); 206 static int stpoll_deregister(struct ifnet *); 207 208 /* 209 * RX/TX polling 210 */ 211 static struct iopoll_ctx *iopoll_ctx_create(int, int); 212 static void iopoll_init(int); 213 static void rxpoll_handler(netmsg_t); 214 static void txpoll_handler(netmsg_t); 215 static void rxpollmore_handler(netmsg_t); 216 static void txpollmore_handler(netmsg_t); 217 static void iopoll_clock(struct iopoll_ctx *); 218 static int iopoll_register(struct ifnet *, struct iopoll_ctx *, 219 const struct ifpoll_io *); 220 static int iopoll_deregister(struct ifnet *, struct iopoll_ctx *); 221 222 static void iopoll_add_sysctl(struct sysctl_ctx_list *, 223 struct sysctl_oid_list *, struct iopoll_ctx *, int); 224 static void sysctl_burstmax_handler(netmsg_t); 225 static int sysctl_burstmax(SYSCTL_HANDLER_ARGS); 226 static void sysctl_eachburst_handler(netmsg_t); 227 static int sysctl_eachburst(SYSCTL_HANDLER_ARGS); 228 229 /* 230 * Common functions 231 */ 232 static void poll_comm_init(int); 233 static void poll_comm_start(int); 234 static void poll_comm_adjust_pollhz(struct poll_comm *); 235 static void poll_comm_systimer0(systimer_t, int, struct intrframe *); 236 static void poll_comm_systimer(systimer_t, int, struct intrframe *); 237 static void sysctl_pollhz_handler(netmsg_t); 238 static void sysctl_stfrac_handler(netmsg_t); 239 static void sysctl_txfrac_handler(netmsg_t); 240 static int sysctl_pollhz(SYSCTL_HANDLER_ARGS); 241 static int sysctl_stfrac(SYSCTL_HANDLER_ARGS); 242 static int sysctl_txfrac(SYSCTL_HANDLER_ARGS); 243 static int sysctl_compat_npoll_stfrac(SYSCTL_HANDLER_ARGS); 244 static int sysctl_compat_npoll_cpuid(SYSCTL_HANDLER_ARGS); 245 246 static struct stpoll_ctx stpoll_context; 247 static struct poll_comm *poll_common[MAXCPU]; 248 static struct iopoll_ctx *rxpoll_context[MAXCPU]; 249 static struct iopoll_ctx *txpoll_context[MAXCPU]; 250 251 SYSCTL_NODE(_net, OID_AUTO, ifpoll, CTLFLAG_RW, 0, 252 "Network device polling parameters"); 253 254 static int iopoll_burst_max = IOPOLL_BURST_MAX; 255 static int iopoll_each_burst = IOPOLL_EACH_BURST; 256 static int iopoll_user_frac = IOPOLL_USER_FRAC; 257 258 static int ifpoll_pollhz = IFPOLL_FREQ_DEFAULT; 259 static int ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT; 260 static int ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT; 261 262 TUNABLE_INT("net.ifpoll.burst_max", &iopoll_burst_max); 263 TUNABLE_INT("net.ifpoll.each_burst", &iopoll_each_burst); 264 TUNABLE_INT("net.ifpoll.user_frac", &iopoll_user_frac); 265 TUNABLE_INT("net.ifpoll.pollhz", &ifpoll_pollhz); 266 TUNABLE_INT("net.ifpoll.status_frac", &ifpoll_stfrac); 267 TUNABLE_INT("net.ifpoll.tx_frac", &ifpoll_txfrac); 268 269 #if !defined(KTR_IF_POLL) 270 #define KTR_IF_POLL KTR_ALL 271 #endif 272 KTR_INFO_MASTER(if_poll); 273 KTR_INFO(KTR_IF_POLL, if_poll, rx_start, 0, "rx start"); 274 KTR_INFO(KTR_IF_POLL, if_poll, rx_end, 1, "rx end"); 275 KTR_INFO(KTR_IF_POLL, if_poll, tx_start, 2, "tx start"); 276 KTR_INFO(KTR_IF_POLL, if_poll, tx_end, 3, "tx end"); 277 KTR_INFO(KTR_IF_POLL, if_poll, rx_mstart, 4, "rx more start"); 278 KTR_INFO(KTR_IF_POLL, if_poll, rx_mend, 5, "rx more end"); 279 KTR_INFO(KTR_IF_POLL, if_poll, tx_mstart, 6, "tx more start"); 280 KTR_INFO(KTR_IF_POLL, if_poll, tx_mend, 7, "tx more end"); 281 KTR_INFO(KTR_IF_POLL, if_poll, ioclock_start, 8, "ioclock start"); 282 KTR_INFO(KTR_IF_POLL, if_poll, ioclock_end, 9, "ioclock end"); 283 #define logpoll(name) KTR_LOG(if_poll_ ## name) 284 285 #define IFPOLL_FREQ_ADJ(comm) (((comm)->poll_cpuid * 3) % 50) 286 287 static __inline int 288 poll_comm_pollhz_div(const struct poll_comm *comm, int pollhz) 289 { 290 return pollhz + IFPOLL_FREQ_ADJ(comm); 291 } 292 293 static __inline int 294 poll_comm_pollhz_conv(const struct poll_comm *comm, int pollhz) 295 { 296 return pollhz - IFPOLL_FREQ_ADJ(comm); 297 } 298 299 static __inline void 300 ifpoll_sendmsg_oncpu(netmsg_t msg) 301 { 302 if (msg->lmsg.ms_flags & MSGF_DONE) 303 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), &msg->lmsg); 304 } 305 306 static __inline void 307 sched_stpoll(struct stpoll_ctx *st_ctx) 308 { 309 ifpoll_sendmsg_oncpu((netmsg_t)&st_ctx->poll_netmsg); 310 } 311 312 static __inline void 313 sched_iopoll(struct iopoll_ctx *io_ctx) 314 { 315 ifpoll_sendmsg_oncpu((netmsg_t)&io_ctx->poll_netmsg); 316 } 317 318 static __inline void 319 sched_iopollmore(struct iopoll_ctx *io_ctx) 320 { 321 ifpoll_sendmsg_oncpu((netmsg_t)&io_ctx->poll_more_netmsg); 322 } 323 324 /* 325 * Initialize per-cpu polling(4) context. Called from kern_clock.c: 326 */ 327 void 328 ifpoll_init_pcpu(int cpuid) 329 { 330 if (cpuid >= ncpus2) 331 return; 332 333 poll_comm_init(cpuid); 334 335 if (cpuid == 0) 336 stpoll_init(); 337 iopoll_init(cpuid); 338 339 poll_comm_start(cpuid); 340 } 341 342 int 343 ifpoll_register(struct ifnet *ifp) 344 { 345 struct ifpoll_info *info; 346 struct netmsg_base nmsg; 347 int error; 348 349 if (ifp->if_npoll == NULL) { 350 /* Device does not support polling */ 351 return EOPNOTSUPP; 352 } 353 354 info = kmalloc(sizeof(*info), M_TEMP, M_WAITOK | M_ZERO); 355 356 /* 357 * Attempt to register. Interlock with IFF_NPOLLING. 358 */ 359 360 ifnet_serialize_all(ifp); 361 362 if (ifp->if_flags & IFF_NPOLLING) { 363 /* Already polling */ 364 ifnet_deserialize_all(ifp); 365 kfree(info, M_TEMP); 366 return EBUSY; 367 } 368 369 info->ifpi_ifp = ifp; 370 371 ifp->if_flags |= IFF_NPOLLING; 372 ifp->if_npoll(ifp, info); 373 374 ifnet_deserialize_all(ifp); 375 376 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 377 0, ifpoll_register_handler); 378 nmsg.lmsg.u.ms_resultp = info; 379 380 error = lwkt_domsg(netisr_cpuport(0), &nmsg.lmsg, 0); 381 if (error) { 382 if (!ifpoll_deregister(ifp)) { 383 if_printf(ifp, "ifpoll_register: " 384 "ifpoll_deregister failed!\n"); 385 } 386 } 387 388 kfree(info, M_TEMP); 389 return error; 390 } 391 392 int 393 ifpoll_deregister(struct ifnet *ifp) 394 { 395 struct netmsg_base nmsg; 396 int error; 397 398 if (ifp->if_npoll == NULL) 399 return EOPNOTSUPP; 400 401 ifnet_serialize_all(ifp); 402 403 if ((ifp->if_flags & IFF_NPOLLING) == 0) { 404 ifnet_deserialize_all(ifp); 405 return EINVAL; 406 } 407 ifp->if_flags &= ~IFF_NPOLLING; 408 409 ifnet_deserialize_all(ifp); 410 411 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 412 0, ifpoll_deregister_handler); 413 nmsg.lmsg.u.ms_resultp = ifp; 414 415 error = lwkt_domsg(netisr_cpuport(0), &nmsg.lmsg, 0); 416 if (!error) { 417 ifnet_serialize_all(ifp); 418 ifp->if_npoll(ifp, NULL); 419 ifnet_deserialize_all(ifp); 420 } 421 return error; 422 } 423 424 static void 425 ifpoll_register_handler(netmsg_t nmsg) 426 { 427 const struct ifpoll_info *info = nmsg->lmsg.u.ms_resultp; 428 int cpuid = mycpuid, nextcpu; 429 int error; 430 431 KKASSERT(cpuid < ncpus2); 432 KKASSERT(&curthread->td_msgport == netisr_cpuport(cpuid)); 433 434 if (cpuid == 0) { 435 error = stpoll_register(info->ifpi_ifp, &info->ifpi_status); 436 if (error) 437 goto failed; 438 } 439 440 error = iopoll_register(info->ifpi_ifp, rxpoll_context[cpuid], 441 &info->ifpi_rx[cpuid]); 442 if (error) 443 goto failed; 444 445 error = iopoll_register(info->ifpi_ifp, txpoll_context[cpuid], 446 &info->ifpi_tx[cpuid]); 447 if (error) 448 goto failed; 449 450 /* Adjust polling frequency, after all registration is done */ 451 poll_comm_adjust_pollhz(poll_common[cpuid]); 452 453 nextcpu = cpuid + 1; 454 if (nextcpu < ncpus2) 455 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nmsg->lmsg); 456 else 457 lwkt_replymsg(&nmsg->lmsg, 0); 458 return; 459 failed: 460 lwkt_replymsg(&nmsg->lmsg, error); 461 } 462 463 static void 464 ifpoll_deregister_handler(netmsg_t nmsg) 465 { 466 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 467 int cpuid = mycpuid, nextcpu; 468 469 KKASSERT(cpuid < ncpus2); 470 KKASSERT(&curthread->td_msgport == netisr_cpuport(cpuid)); 471 472 /* Ignore errors */ 473 if (cpuid == 0) 474 stpoll_deregister(ifp); 475 iopoll_deregister(ifp, rxpoll_context[cpuid]); 476 iopoll_deregister(ifp, txpoll_context[cpuid]); 477 478 /* Adjust polling frequency, after all deregistration is done */ 479 poll_comm_adjust_pollhz(poll_common[cpuid]); 480 481 nextcpu = cpuid + 1; 482 if (nextcpu < ncpus2) 483 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nmsg->lmsg); 484 else 485 lwkt_replymsg(&nmsg->lmsg, 0); 486 } 487 488 static void 489 stpoll_init(void) 490 { 491 struct stpoll_ctx *st_ctx = &stpoll_context; 492 const struct poll_comm *comm = poll_common[0]; 493 494 sysctl_ctx_init(&st_ctx->poll_sysctl_ctx); 495 st_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&st_ctx->poll_sysctl_ctx, 496 SYSCTL_CHILDREN(comm->sysctl_tree), 497 OID_AUTO, "status", CTLFLAG_RD, 0, ""); 498 499 SYSCTL_ADD_UINT(&st_ctx->poll_sysctl_ctx, 500 SYSCTL_CHILDREN(st_ctx->poll_sysctl_tree), 501 OID_AUTO, "handlers", CTLFLAG_RD, 502 &st_ctx->poll_handlers, 0, 503 "Number of registered status poll handlers"); 504 505 netmsg_init(&st_ctx->poll_netmsg, NULL, &netisr_adone_rport, 506 0, stpoll_handler); 507 } 508 509 /* 510 * stpoll_handler is scheduled by sched_stpoll when appropriate, typically 511 * once per polling systimer tick. 512 */ 513 static void 514 stpoll_handler(netmsg_t msg) 515 { 516 struct stpoll_ctx *st_ctx = &stpoll_context; 517 struct thread *td = curthread; 518 int i; 519 520 KKASSERT(&td->td_msgport == netisr_cpuport(0)); 521 522 crit_enter_quick(td); 523 524 /* Reply ASAP */ 525 lwkt_replymsg(&msg->lmsg, 0); 526 527 if (st_ctx->poll_handlers == 0) { 528 crit_exit_quick(td); 529 return; 530 } 531 532 for (i = 0; i < st_ctx->poll_handlers; ++i) { 533 const struct stpoll_rec *rec = &st_ctx->pr[i]; 534 struct ifnet *ifp = rec->ifp; 535 536 if (!lwkt_serialize_try(rec->serializer)) 537 continue; 538 539 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == 540 (IFF_RUNNING | IFF_NPOLLING)) 541 rec->status_func(ifp); 542 543 lwkt_serialize_exit(rec->serializer); 544 } 545 546 crit_exit_quick(td); 547 } 548 549 /* 550 * Hook from status poll systimer. Tries to schedule an status poll. 551 * NOTE: Caller should hold critical section. 552 */ 553 static void 554 stpoll_clock(struct stpoll_ctx *st_ctx) 555 { 556 KKASSERT(mycpuid == 0); 557 558 if (st_ctx->poll_handlers == 0) 559 return; 560 sched_stpoll(st_ctx); 561 } 562 563 static int 564 stpoll_register(struct ifnet *ifp, const struct ifpoll_status *st_rec) 565 { 566 struct stpoll_ctx *st_ctx = &stpoll_context; 567 int error; 568 569 KKASSERT(&curthread->td_msgport == netisr_cpuport(0)); 570 571 if (st_rec->status_func == NULL) 572 return 0; 573 574 /* 575 * Check if there is room. 576 */ 577 if (st_ctx->poll_handlers >= IFPOLL_LIST_LEN) { 578 /* 579 * List full, cannot register more entries. 580 * This should never happen; if it does, it is probably a 581 * broken driver trying to register multiple times. Checking 582 * this at runtime is expensive, and won't solve the problem 583 * anyways, so just report a few times and then give up. 584 */ 585 static int verbose = 10; /* XXX */ 586 587 if (verbose > 0) { 588 kprintf("status poll handlers list full, " 589 "maybe a broken driver ?\n"); 590 verbose--; 591 } 592 error = ENOENT; 593 } else { 594 struct stpoll_rec *rec = &st_ctx->pr[st_ctx->poll_handlers]; 595 596 rec->ifp = ifp; 597 rec->serializer = st_rec->serializer; 598 rec->status_func = st_rec->status_func; 599 600 st_ctx->poll_handlers++; 601 error = 0; 602 } 603 return error; 604 } 605 606 static int 607 stpoll_deregister(struct ifnet *ifp) 608 { 609 struct stpoll_ctx *st_ctx = &stpoll_context; 610 int i, error; 611 612 KKASSERT(&curthread->td_msgport == netisr_cpuport(0)); 613 614 for (i = 0; i < st_ctx->poll_handlers; ++i) { 615 if (st_ctx->pr[i].ifp == ifp) /* Found it */ 616 break; 617 } 618 if (i == st_ctx->poll_handlers) { 619 error = ENOENT; 620 } else { 621 st_ctx->poll_handlers--; 622 if (i < st_ctx->poll_handlers) { 623 /* Last entry replaces this one. */ 624 st_ctx->pr[i] = st_ctx->pr[st_ctx->poll_handlers]; 625 } 626 error = 0; 627 } 628 return error; 629 } 630 631 static __inline void 632 iopoll_reset_state(struct iopoll_ctx *io_ctx) 633 { 634 crit_enter(); 635 io_ctx->poll_burst = io_ctx->poll_each_burst; 636 io_ctx->pending_polls = 0; 637 io_ctx->residual_burst = 0; 638 io_ctx->phase = 0; 639 io_ctx->kern_frac = 0; 640 bzero(&io_ctx->poll_start_t, sizeof(io_ctx->poll_start_t)); 641 bzero(&io_ctx->prev_t, sizeof(io_ctx->prev_t)); 642 crit_exit(); 643 } 644 645 static void 646 iopoll_init(int cpuid) 647 { 648 KKASSERT(cpuid < ncpus2); 649 650 rxpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_RX); 651 txpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_TX); 652 } 653 654 static struct iopoll_ctx * 655 iopoll_ctx_create(int cpuid, int poll_type) 656 { 657 struct poll_comm *comm; 658 struct iopoll_ctx *io_ctx; 659 const char *poll_type_str; 660 netisr_fn_t handler, more_handler; 661 662 KKASSERT(poll_type == IFPOLL_RX || poll_type == IFPOLL_TX); 663 664 /* 665 * Make sure that tunables are in sane state 666 */ 667 if (iopoll_burst_max < MIN_IOPOLL_BURST_MAX) 668 iopoll_burst_max = MIN_IOPOLL_BURST_MAX; 669 else if (iopoll_burst_max > MAX_IOPOLL_BURST_MAX) 670 iopoll_burst_max = MAX_IOPOLL_BURST_MAX; 671 672 if (iopoll_each_burst > iopoll_burst_max) 673 iopoll_each_burst = iopoll_burst_max; 674 675 comm = poll_common[cpuid]; 676 677 /* 678 * Create the per-cpu polling context 679 */ 680 io_ctx = kmalloc_cachealign(sizeof(*io_ctx), M_DEVBUF, 681 M_WAITOK | M_ZERO); 682 683 io_ctx->poll_each_burst = iopoll_each_burst; 684 io_ctx->poll_burst_max = iopoll_burst_max; 685 io_ctx->user_frac = iopoll_user_frac; 686 if (poll_type == IFPOLL_RX) 687 io_ctx->pollhz = comm->pollhz; 688 else 689 io_ctx->pollhz = comm->pollhz / (comm->poll_txfrac + 1); 690 io_ctx->poll_cpuid = cpuid; 691 iopoll_reset_state(io_ctx); 692 693 if (poll_type == IFPOLL_RX) { 694 handler = rxpoll_handler; 695 more_handler = rxpollmore_handler; 696 } else { 697 handler = txpoll_handler; 698 more_handler = txpollmore_handler; 699 } 700 701 netmsg_init(&io_ctx->poll_netmsg, NULL, &netisr_adone_rport, 702 0, handler); 703 io_ctx->poll_netmsg.lmsg.u.ms_resultp = io_ctx; 704 705 netmsg_init(&io_ctx->poll_more_netmsg, NULL, &netisr_adone_rport, 706 0, more_handler); 707 io_ctx->poll_more_netmsg.lmsg.u.ms_resultp = io_ctx; 708 709 /* 710 * Initialize per-cpu sysctl nodes 711 */ 712 if (poll_type == IFPOLL_RX) 713 poll_type_str = "rx"; 714 else 715 poll_type_str = "tx"; 716 717 sysctl_ctx_init(&io_ctx->poll_sysctl_ctx); 718 io_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&io_ctx->poll_sysctl_ctx, 719 SYSCTL_CHILDREN(comm->sysctl_tree), 720 OID_AUTO, poll_type_str, CTLFLAG_RD, 0, ""); 721 iopoll_add_sysctl(&io_ctx->poll_sysctl_ctx, 722 SYSCTL_CHILDREN(io_ctx->poll_sysctl_tree), io_ctx, poll_type); 723 724 return io_ctx; 725 } 726 727 /* 728 * Hook from iopoll systimer. Tries to schedule an iopoll, but keeps 729 * track of lost ticks due to the previous handler taking too long. 730 * Normally, this should not happen, because polling handler should 731 * run for a short time. However, in some cases (e.g. when there are 732 * changes in link status etc.) the drivers take a very long time 733 * (even in the order of milliseconds) to reset and reconfigure the 734 * device, causing apparent lost polls. 735 * 736 * The first part of the code is just for debugging purposes, and tries 737 * to count how often hardclock ticks are shorter than they should, 738 * meaning either stray interrupts or delayed events. 739 * 740 * WARNING! called from fastint or IPI, the MP lock might not be held. 741 * NOTE: Caller should hold critical section. 742 */ 743 static void 744 iopoll_clock(struct iopoll_ctx *io_ctx) 745 { 746 union microtime_pcpu t; 747 int delta; 748 749 KKASSERT(mycpuid == io_ctx->poll_cpuid); 750 751 if (io_ctx->poll_handlers == 0) 752 return; 753 754 logpoll(ioclock_start); 755 756 microtime_pcpu_get(&t); 757 delta = microtime_pcpu_diff(&io_ctx->prev_t, &t); 758 if (delta * io_ctx->pollhz < 500000) 759 io_ctx->short_ticks++; 760 else 761 io_ctx->prev_t = t; 762 763 if (io_ctx->pending_polls > 100) { 764 /* 765 * Too much, assume it has stalled (not always true 766 * see comment above). 767 */ 768 io_ctx->stalled++; 769 io_ctx->pending_polls = 0; 770 io_ctx->phase = 0; 771 } 772 773 if (io_ctx->phase <= 2) { 774 if (io_ctx->phase != 0) 775 io_ctx->suspect++; 776 io_ctx->phase = 1; 777 sched_iopoll(io_ctx); 778 io_ctx->phase = 2; 779 } 780 if (io_ctx->pending_polls++ > 0) 781 io_ctx->lost_polls++; 782 783 logpoll(ioclock_end); 784 } 785 786 /* 787 * rxpoll_handler and txpoll_handler are scheduled by sched_iopoll when 788 * appropriate, typically once per polling systimer tick. 789 * 790 * Note that the message is replied immediately in order to allow a new 791 * ISR to be scheduled in the handler. 792 */ 793 static void 794 rxpoll_handler(netmsg_t msg) 795 { 796 struct iopoll_ctx *io_ctx; 797 struct thread *td = curthread; 798 int i, cycles; 799 800 logpoll(rx_start); 801 802 io_ctx = msg->lmsg.u.ms_resultp; 803 KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 804 805 crit_enter_quick(td); 806 807 /* Reply ASAP */ 808 lwkt_replymsg(&msg->lmsg, 0); 809 810 if (io_ctx->poll_handlers == 0) { 811 crit_exit_quick(td); 812 logpoll(rx_end); 813 return; 814 } 815 816 io_ctx->phase = 3; 817 if (io_ctx->residual_burst == 0) { 818 /* First call in this tick */ 819 microtime_pcpu_get(&io_ctx->poll_start_t); 820 io_ctx->residual_burst = io_ctx->poll_burst; 821 } 822 cycles = (io_ctx->residual_burst < io_ctx->poll_each_burst) ? 823 io_ctx->residual_burst : io_ctx->poll_each_burst; 824 io_ctx->residual_burst -= cycles; 825 826 for (i = 0; i < io_ctx->poll_handlers; i++) { 827 const struct iopoll_rec *rec = &io_ctx->pr[i]; 828 struct ifnet *ifp = rec->ifp; 829 830 if (!lwkt_serialize_try(rec->serializer)) 831 continue; 832 833 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == 834 (IFF_RUNNING | IFF_NPOLLING)) 835 rec->poll_func(ifp, rec->arg, cycles); 836 837 lwkt_serialize_exit(rec->serializer); 838 } 839 840 /* 841 * Do a quick exit/enter to catch any higher-priority 842 * interrupt sources. 843 */ 844 crit_exit_quick(td); 845 crit_enter_quick(td); 846 847 sched_iopollmore(io_ctx); 848 io_ctx->phase = 4; 849 850 crit_exit_quick(td); 851 852 logpoll(rx_end); 853 } 854 855 static void 856 txpoll_handler(netmsg_t msg) 857 { 858 struct iopoll_ctx *io_ctx; 859 struct thread *td = curthread; 860 int i; 861 862 logpoll(tx_start); 863 864 io_ctx = msg->lmsg.u.ms_resultp; 865 KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 866 867 crit_enter_quick(td); 868 869 /* Reply ASAP */ 870 lwkt_replymsg(&msg->lmsg, 0); 871 872 if (io_ctx->poll_handlers == 0) { 873 crit_exit_quick(td); 874 logpoll(tx_end); 875 return; 876 } 877 878 io_ctx->phase = 3; 879 880 for (i = 0; i < io_ctx->poll_handlers; i++) { 881 const struct iopoll_rec *rec = &io_ctx->pr[i]; 882 struct ifnet *ifp = rec->ifp; 883 884 if (!lwkt_serialize_try(rec->serializer)) 885 continue; 886 887 if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) == 888 (IFF_RUNNING | IFF_NPOLLING)) 889 rec->poll_func(ifp, rec->arg, -1); 890 891 lwkt_serialize_exit(rec->serializer); 892 } 893 894 /* 895 * Do a quick exit/enter to catch any higher-priority 896 * interrupt sources. 897 */ 898 crit_exit_quick(td); 899 crit_enter_quick(td); 900 901 sched_iopollmore(io_ctx); 902 io_ctx->phase = 4; 903 904 crit_exit_quick(td); 905 906 logpoll(tx_end); 907 } 908 909 /* 910 * rxpollmore_handler and txpollmore_handler are called after other netisr's, 911 * possibly scheduling another rxpoll_handler or txpoll_handler call, or 912 * adapting the burst size for the next cycle. 913 * 914 * It is very bad to fetch large bursts of packets from a single card at once, 915 * because the burst could take a long time to be completely processed leading 916 * to unfairness. To reduce the problem, and also to account better for time 917 * spent in network-related processing, we split the burst in smaller chunks 918 * of fixed size, giving control to the other netisr's between chunks. This 919 * helps in improving the fairness, reducing livelock and accounting for the 920 * work performed in low level handling. 921 */ 922 static void 923 rxpollmore_handler(netmsg_t msg) 924 { 925 struct thread *td = curthread; 926 struct iopoll_ctx *io_ctx; 927 union microtime_pcpu t; 928 int kern_load; 929 uint32_t pending_polls; 930 931 logpoll(rx_mstart); 932 933 io_ctx = msg->lmsg.u.ms_resultp; 934 KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 935 936 crit_enter_quick(td); 937 938 /* Replay ASAP */ 939 lwkt_replymsg(&msg->lmsg, 0); 940 941 if (io_ctx->poll_handlers == 0) { 942 crit_exit_quick(td); 943 logpoll(rx_mend); 944 return; 945 } 946 947 io_ctx->phase = 5; 948 if (io_ctx->residual_burst > 0) { 949 sched_iopoll(io_ctx); 950 crit_exit_quick(td); 951 /* Will run immediately on return, followed by netisrs */ 952 logpoll(rx_mend); 953 return; 954 } 955 956 /* Here we can account time spent in iopoll's in this tick */ 957 microtime_pcpu_get(&t); 958 kern_load = microtime_pcpu_diff(&io_ctx->poll_start_t, &t); 959 kern_load = (kern_load * io_ctx->pollhz) / 10000; /* 0..100 */ 960 io_ctx->kern_frac = kern_load; 961 962 if (kern_load > (100 - io_ctx->user_frac)) { 963 /* Try decrease ticks */ 964 if (io_ctx->poll_burst > 1) 965 io_ctx->poll_burst--; 966 } else { 967 if (io_ctx->poll_burst < io_ctx->poll_burst_max) 968 io_ctx->poll_burst++; 969 } 970 971 io_ctx->pending_polls--; 972 pending_polls = io_ctx->pending_polls; 973 974 if (pending_polls == 0) { 975 /* We are done */ 976 io_ctx->phase = 0; 977 } else { 978 /* 979 * Last cycle was long and caused us to miss one or more 980 * hardclock ticks. Restart processing again, but slightly 981 * reduce the burst size to prevent that this happens again. 982 */ 983 io_ctx->poll_burst -= (io_ctx->poll_burst / 8); 984 if (io_ctx->poll_burst < 1) 985 io_ctx->poll_burst = 1; 986 sched_iopoll(io_ctx); 987 io_ctx->phase = 6; 988 } 989 990 crit_exit_quick(td); 991 992 logpoll(rx_mend); 993 } 994 995 static void 996 txpollmore_handler(netmsg_t msg) 997 { 998 struct thread *td = curthread; 999 struct iopoll_ctx *io_ctx; 1000 uint32_t pending_polls; 1001 1002 logpoll(tx_mstart); 1003 1004 io_ctx = msg->lmsg.u.ms_resultp; 1005 KKASSERT(&td->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1006 1007 crit_enter_quick(td); 1008 1009 /* Replay ASAP */ 1010 lwkt_replymsg(&msg->lmsg, 0); 1011 1012 if (io_ctx->poll_handlers == 0) { 1013 crit_exit_quick(td); 1014 logpoll(tx_mend); 1015 return; 1016 } 1017 1018 io_ctx->phase = 5; 1019 1020 io_ctx->pending_polls--; 1021 pending_polls = io_ctx->pending_polls; 1022 1023 if (pending_polls == 0) { 1024 /* We are done */ 1025 io_ctx->phase = 0; 1026 } else { 1027 /* 1028 * Last cycle was long and caused us to miss one or more 1029 * hardclock ticks. Restart processing again. 1030 */ 1031 sched_iopoll(io_ctx); 1032 io_ctx->phase = 6; 1033 } 1034 1035 crit_exit_quick(td); 1036 1037 logpoll(tx_mend); 1038 } 1039 1040 static void 1041 iopoll_add_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent, 1042 struct iopoll_ctx *io_ctx, int poll_type) 1043 { 1044 if (poll_type == IFPOLL_RX) { 1045 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "burst_max", 1046 CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_burstmax, 1047 "IU", "Max Polling burst size"); 1048 1049 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "each_burst", 1050 CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_eachburst, 1051 "IU", "Max size of each burst"); 1052 1053 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "burst", CTLFLAG_RD, 1054 &io_ctx->poll_burst, 0, "Current polling burst size"); 1055 1056 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "user_frac", CTLFLAG_RW, 1057 &io_ctx->user_frac, 0, "Desired user fraction of cpu time"); 1058 1059 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "kern_frac", CTLFLAG_RD, 1060 &io_ctx->kern_frac, 0, "Kernel fraction of cpu time"); 1061 1062 SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "residual_burst", CTLFLAG_RD, 1063 &io_ctx->residual_burst, 0, 1064 "# of residual cycles in burst"); 1065 } 1066 1067 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "phase", CTLFLAG_RD, 1068 &io_ctx->phase, 0, "Polling phase"); 1069 1070 SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "suspect", CTLFLAG_RW, 1071 &io_ctx->suspect, "Suspected events"); 1072 1073 SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "stalled", CTLFLAG_RW, 1074 &io_ctx->stalled, "Potential stalls"); 1075 1076 SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "short_ticks", CTLFLAG_RW, 1077 &io_ctx->short_ticks, 1078 "Hardclock ticks shorter than they should be"); 1079 1080 SYSCTL_ADD_ULONG(ctx, parent, OID_AUTO, "lost_polls", CTLFLAG_RW, 1081 &io_ctx->lost_polls, 1082 "How many times we would have lost a poll tick"); 1083 1084 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "pending_polls", CTLFLAG_RD, 1085 &io_ctx->pending_polls, 0, "Do we need to poll again"); 1086 1087 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "handlers", CTLFLAG_RD, 1088 &io_ctx->poll_handlers, 0, "Number of registered poll handlers"); 1089 } 1090 1091 static void 1092 sysctl_burstmax_handler(netmsg_t nmsg) 1093 { 1094 struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg; 1095 struct iopoll_ctx *io_ctx; 1096 1097 io_ctx = msg->ctx; 1098 KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1099 1100 io_ctx->poll_burst_max = nmsg->lmsg.u.ms_result; 1101 if (io_ctx->poll_each_burst > io_ctx->poll_burst_max) 1102 io_ctx->poll_each_burst = io_ctx->poll_burst_max; 1103 if (io_ctx->poll_burst > io_ctx->poll_burst_max) 1104 io_ctx->poll_burst = io_ctx->poll_burst_max; 1105 if (io_ctx->residual_burst > io_ctx->poll_burst_max) 1106 io_ctx->residual_burst = io_ctx->poll_burst_max; 1107 1108 lwkt_replymsg(&nmsg->lmsg, 0); 1109 } 1110 1111 static int 1112 sysctl_burstmax(SYSCTL_HANDLER_ARGS) 1113 { 1114 struct iopoll_ctx *io_ctx = arg1; 1115 struct iopoll_sysctl_netmsg msg; 1116 uint32_t burst_max; 1117 int error; 1118 1119 burst_max = io_ctx->poll_burst_max; 1120 error = sysctl_handle_int(oidp, &burst_max, 0, req); 1121 if (error || req->newptr == NULL) 1122 return error; 1123 if (burst_max < MIN_IOPOLL_BURST_MAX) 1124 burst_max = MIN_IOPOLL_BURST_MAX; 1125 else if (burst_max > MAX_IOPOLL_BURST_MAX) 1126 burst_max = MAX_IOPOLL_BURST_MAX; 1127 1128 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 1129 0, sysctl_burstmax_handler); 1130 msg.base.lmsg.u.ms_result = burst_max; 1131 msg.ctx = io_ctx; 1132 1133 return lwkt_domsg(netisr_cpuport(io_ctx->poll_cpuid), 1134 &msg.base.lmsg, 0); 1135 } 1136 1137 static void 1138 sysctl_eachburst_handler(netmsg_t nmsg) 1139 { 1140 struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg; 1141 struct iopoll_ctx *io_ctx; 1142 uint32_t each_burst; 1143 1144 io_ctx = msg->ctx; 1145 KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1146 1147 each_burst = nmsg->lmsg.u.ms_result; 1148 if (each_burst > io_ctx->poll_burst_max) 1149 each_burst = io_ctx->poll_burst_max; 1150 else if (each_burst < 1) 1151 each_burst = 1; 1152 io_ctx->poll_each_burst = each_burst; 1153 1154 lwkt_replymsg(&nmsg->lmsg, 0); 1155 } 1156 1157 static int 1158 sysctl_eachburst(SYSCTL_HANDLER_ARGS) 1159 { 1160 struct iopoll_ctx *io_ctx = arg1; 1161 struct iopoll_sysctl_netmsg msg; 1162 uint32_t each_burst; 1163 int error; 1164 1165 each_burst = io_ctx->poll_each_burst; 1166 error = sysctl_handle_int(oidp, &each_burst, 0, req); 1167 if (error || req->newptr == NULL) 1168 return error; 1169 1170 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 1171 0, sysctl_eachburst_handler); 1172 msg.base.lmsg.u.ms_result = each_burst; 1173 msg.ctx = io_ctx; 1174 1175 return lwkt_domsg(netisr_cpuport(io_ctx->poll_cpuid), 1176 &msg.base.lmsg, 0); 1177 } 1178 1179 static int 1180 iopoll_register(struct ifnet *ifp, struct iopoll_ctx *io_ctx, 1181 const struct ifpoll_io *io_rec) 1182 { 1183 int error; 1184 1185 KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1186 1187 if (io_rec->poll_func == NULL) 1188 return 0; 1189 1190 /* 1191 * Check if there is room. 1192 */ 1193 if (io_ctx->poll_handlers >= IFPOLL_LIST_LEN) { 1194 /* 1195 * List full, cannot register more entries. 1196 * This should never happen; if it does, it is probably a 1197 * broken driver trying to register multiple times. Checking 1198 * this at runtime is expensive, and won't solve the problem 1199 * anyways, so just report a few times and then give up. 1200 */ 1201 static int verbose = 10; /* XXX */ 1202 if (verbose > 0) { 1203 kprintf("io poll handlers list full, " 1204 "maybe a broken driver ?\n"); 1205 verbose--; 1206 } 1207 error = ENOENT; 1208 } else { 1209 struct iopoll_rec *rec = &io_ctx->pr[io_ctx->poll_handlers]; 1210 1211 rec->ifp = ifp; 1212 rec->serializer = io_rec->serializer; 1213 rec->arg = io_rec->arg; 1214 rec->poll_func = io_rec->poll_func; 1215 1216 io_ctx->poll_handlers++; 1217 error = 0; 1218 } 1219 return error; 1220 } 1221 1222 static int 1223 iopoll_deregister(struct ifnet *ifp, struct iopoll_ctx *io_ctx) 1224 { 1225 int i, error; 1226 1227 KKASSERT(&curthread->td_msgport == netisr_cpuport(io_ctx->poll_cpuid)); 1228 1229 for (i = 0; i < io_ctx->poll_handlers; ++i) { 1230 if (io_ctx->pr[i].ifp == ifp) /* Found it */ 1231 break; 1232 } 1233 if (i == io_ctx->poll_handlers) { 1234 error = ENOENT; 1235 } else { 1236 io_ctx->poll_handlers--; 1237 if (i < io_ctx->poll_handlers) { 1238 /* Last entry replaces this one. */ 1239 io_ctx->pr[i] = io_ctx->pr[io_ctx->poll_handlers]; 1240 } 1241 1242 if (io_ctx->poll_handlers == 0) 1243 iopoll_reset_state(io_ctx); 1244 error = 0; 1245 } 1246 return error; 1247 } 1248 1249 static void 1250 poll_comm_init(int cpuid) 1251 { 1252 struct poll_comm *comm; 1253 char cpuid_str[16]; 1254 1255 comm = kmalloc_cachealign(sizeof(*comm), M_DEVBUF, M_WAITOK | M_ZERO); 1256 1257 if (ifpoll_stfrac < 1) 1258 ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT; 1259 if (ifpoll_txfrac < 1) 1260 ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT; 1261 1262 comm->poll_cpuid = cpuid; 1263 comm->pollhz = poll_comm_pollhz_div(comm, ifpoll_pollhz); 1264 comm->poll_stfrac = ifpoll_stfrac - 1; 1265 comm->poll_txfrac = ifpoll_txfrac - 1; 1266 1267 ksnprintf(cpuid_str, sizeof(cpuid_str), "%d", cpuid); 1268 1269 sysctl_ctx_init(&comm->sysctl_ctx); 1270 comm->sysctl_tree = SYSCTL_ADD_NODE(&comm->sysctl_ctx, 1271 SYSCTL_STATIC_CHILDREN(_net_ifpoll), 1272 OID_AUTO, cpuid_str, CTLFLAG_RD, 0, ""); 1273 1274 SYSCTL_ADD_PROC(&comm->sysctl_ctx, SYSCTL_CHILDREN(comm->sysctl_tree), 1275 OID_AUTO, "pollhz", CTLTYPE_INT | CTLFLAG_RW, 1276 comm, 0, sysctl_pollhz, 1277 "I", "Device polling frequency"); 1278 1279 if (cpuid == 0) { 1280 SYSCTL_ADD_PROC(&comm->sysctl_ctx, 1281 SYSCTL_CHILDREN(comm->sysctl_tree), 1282 OID_AUTO, "status_frac", 1283 CTLTYPE_INT | CTLFLAG_RW, 1284 comm, 0, sysctl_stfrac, 1285 "I", "# of cycles before status is polled"); 1286 } 1287 SYSCTL_ADD_PROC(&comm->sysctl_ctx, SYSCTL_CHILDREN(comm->sysctl_tree), 1288 OID_AUTO, "tx_frac", CTLTYPE_INT | CTLFLAG_RW, 1289 comm, 0, sysctl_txfrac, 1290 "I", "# of cycles before TX is polled"); 1291 1292 poll_common[cpuid] = comm; 1293 } 1294 1295 static void 1296 poll_comm_start(int cpuid) 1297 { 1298 struct poll_comm *comm = poll_common[cpuid]; 1299 systimer_func_t func; 1300 1301 /* 1302 * Initialize systimer 1303 */ 1304 if (cpuid == 0) 1305 func = poll_comm_systimer0; 1306 else 1307 func = poll_comm_systimer; 1308 systimer_init_periodic_nq(&comm->pollclock, func, comm, 1); 1309 } 1310 1311 static void 1312 _poll_comm_systimer(struct poll_comm *comm) 1313 { 1314 iopoll_clock(rxpoll_context[comm->poll_cpuid]); 1315 if (comm->txfrac_count-- == 0) { 1316 comm->txfrac_count = comm->poll_txfrac; 1317 iopoll_clock(txpoll_context[comm->poll_cpuid]); 1318 } 1319 } 1320 1321 static void 1322 poll_comm_systimer0(systimer_t info, int in_ipi __unused, 1323 struct intrframe *frame __unused) 1324 { 1325 struct poll_comm *comm = info->data; 1326 globaldata_t gd = mycpu; 1327 1328 KKASSERT(comm->poll_cpuid == gd->gd_cpuid && gd->gd_cpuid == 0); 1329 1330 crit_enter_gd(gd); 1331 1332 if (comm->stfrac_count-- == 0) { 1333 comm->stfrac_count = comm->poll_stfrac; 1334 stpoll_clock(&stpoll_context); 1335 } 1336 _poll_comm_systimer(comm); 1337 1338 crit_exit_gd(gd); 1339 } 1340 1341 static void 1342 poll_comm_systimer(systimer_t info, int in_ipi __unused, 1343 struct intrframe *frame __unused) 1344 { 1345 struct poll_comm *comm = info->data; 1346 globaldata_t gd = mycpu; 1347 1348 KKASSERT(comm->poll_cpuid == gd->gd_cpuid && gd->gd_cpuid != 0); 1349 1350 crit_enter_gd(gd); 1351 _poll_comm_systimer(comm); 1352 crit_exit_gd(gd); 1353 } 1354 1355 static void 1356 poll_comm_adjust_pollhz(struct poll_comm *comm) 1357 { 1358 uint32_t handlers; 1359 int pollhz = 1; 1360 1361 KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid)); 1362 1363 /* 1364 * If there is no polling handler registered, set systimer 1365 * frequency to the lowest value. Polling systimer frequency 1366 * will be adjusted to the requested value, once there are 1367 * registered handlers. 1368 */ 1369 handlers = rxpoll_context[mycpuid]->poll_handlers + 1370 txpoll_context[mycpuid]->poll_handlers; 1371 if (comm->poll_cpuid == 0) 1372 handlers += stpoll_context.poll_handlers; 1373 if (handlers) 1374 pollhz = comm->pollhz; 1375 systimer_adjust_periodic(&comm->pollclock, pollhz); 1376 } 1377 1378 static int 1379 sysctl_pollhz(SYSCTL_HANDLER_ARGS) 1380 { 1381 struct poll_comm *comm = arg1; 1382 struct netmsg_base nmsg; 1383 int error, phz; 1384 1385 phz = poll_comm_pollhz_conv(comm, comm->pollhz); 1386 error = sysctl_handle_int(oidp, &phz, 0, req); 1387 if (error || req->newptr == NULL) 1388 return error; 1389 if (phz <= 0) 1390 return EINVAL; 1391 else if (phz > IFPOLL_FREQ_MAX) 1392 phz = IFPOLL_FREQ_MAX; 1393 1394 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 1395 0, sysctl_pollhz_handler); 1396 nmsg.lmsg.u.ms_result = phz; 1397 1398 return lwkt_domsg(netisr_cpuport(comm->poll_cpuid), &nmsg.lmsg, 0); 1399 } 1400 1401 static void 1402 sysctl_pollhz_handler(netmsg_t nmsg) 1403 { 1404 struct poll_comm *comm = poll_common[mycpuid]; 1405 1406 KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid)); 1407 1408 /* Save polling frequency */ 1409 comm->pollhz = poll_comm_pollhz_div(comm, nmsg->lmsg.u.ms_result); 1410 1411 /* 1412 * Adjust cached pollhz 1413 */ 1414 rxpoll_context[mycpuid]->pollhz = comm->pollhz; 1415 txpoll_context[mycpuid]->pollhz = 1416 comm->pollhz / (comm->poll_txfrac + 1); 1417 1418 /* 1419 * Adjust polling frequency 1420 */ 1421 poll_comm_adjust_pollhz(comm); 1422 1423 lwkt_replymsg(&nmsg->lmsg, 0); 1424 } 1425 1426 static int 1427 sysctl_stfrac(SYSCTL_HANDLER_ARGS) 1428 { 1429 struct poll_comm *comm = arg1; 1430 struct netmsg_base nmsg; 1431 int error, stfrac; 1432 1433 KKASSERT(comm->poll_cpuid == 0); 1434 1435 stfrac = comm->poll_stfrac + 1; 1436 error = sysctl_handle_int(oidp, &stfrac, 0, req); 1437 if (error || req->newptr == NULL) 1438 return error; 1439 if (stfrac < 1) 1440 return EINVAL; 1441 1442 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 1443 0, sysctl_stfrac_handler); 1444 nmsg.lmsg.u.ms_result = stfrac - 1; 1445 1446 return lwkt_domsg(netisr_cpuport(comm->poll_cpuid), &nmsg.lmsg, 0); 1447 } 1448 1449 static void 1450 sysctl_stfrac_handler(netmsg_t nmsg) 1451 { 1452 struct poll_comm *comm = poll_common[mycpuid]; 1453 int stfrac = nmsg->lmsg.u.ms_result; 1454 1455 KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid)); 1456 1457 crit_enter(); 1458 comm->poll_stfrac = stfrac; 1459 if (comm->stfrac_count > comm->poll_stfrac) 1460 comm->stfrac_count = comm->poll_stfrac; 1461 crit_exit(); 1462 1463 lwkt_replymsg(&nmsg->lmsg, 0); 1464 } 1465 1466 static int 1467 sysctl_txfrac(SYSCTL_HANDLER_ARGS) 1468 { 1469 struct poll_comm *comm = arg1; 1470 struct netmsg_base nmsg; 1471 int error, txfrac; 1472 1473 txfrac = comm->poll_txfrac + 1; 1474 error = sysctl_handle_int(oidp, &txfrac, 0, req); 1475 if (error || req->newptr == NULL) 1476 return error; 1477 if (txfrac < 1) 1478 return EINVAL; 1479 1480 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 1481 0, sysctl_txfrac_handler); 1482 nmsg.lmsg.u.ms_result = txfrac - 1; 1483 1484 return lwkt_domsg(netisr_cpuport(comm->poll_cpuid), &nmsg.lmsg, 0); 1485 } 1486 1487 static void 1488 sysctl_txfrac_handler(netmsg_t nmsg) 1489 { 1490 struct poll_comm *comm = poll_common[mycpuid]; 1491 int txfrac = nmsg->lmsg.u.ms_result; 1492 1493 KKASSERT(&curthread->td_msgport == netisr_cpuport(comm->poll_cpuid)); 1494 1495 crit_enter(); 1496 comm->poll_txfrac = txfrac; 1497 if (comm->txfrac_count > comm->poll_txfrac) 1498 comm->txfrac_count = comm->poll_txfrac; 1499 crit_exit(); 1500 1501 lwkt_replymsg(&nmsg->lmsg, 0); 1502 } 1503 1504 void 1505 ifpoll_compat_setup(struct ifpoll_compat *cp, 1506 struct sysctl_ctx_list *sysctl_ctx, 1507 struct sysctl_oid *sysctl_tree, 1508 int unit, struct lwkt_serialize *slz) 1509 { 1510 cp->ifpc_stcount = 0; 1511 cp->ifpc_stfrac = ((poll_common[0]->poll_stfrac + 1) * 1512 howmany(IOPOLL_BURST_MAX, IOPOLL_EACH_BURST)) - 1; 1513 1514 cp->ifpc_cpuid = unit % ncpus2; 1515 cp->ifpc_serializer = slz; 1516 1517 if (sysctl_ctx != NULL && sysctl_tree != NULL) { 1518 SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1519 OID_AUTO, "npoll_stfrac", CTLTYPE_INT | CTLFLAG_RW, 1520 cp, 0, sysctl_compat_npoll_stfrac, "I", 1521 "polling status frac"); 1522 SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1523 OID_AUTO, "npoll_cpuid", CTLTYPE_INT | CTLFLAG_RW, 1524 cp, 0, sysctl_compat_npoll_cpuid, "I", 1525 "polling cpuid"); 1526 } 1527 } 1528 1529 static int 1530 sysctl_compat_npoll_stfrac(SYSCTL_HANDLER_ARGS) 1531 { 1532 struct ifpoll_compat *cp = arg1; 1533 int error = 0, stfrac; 1534 1535 lwkt_serialize_enter(cp->ifpc_serializer); 1536 1537 stfrac = cp->ifpc_stfrac + 1; 1538 error = sysctl_handle_int(oidp, &stfrac, 0, req); 1539 if (!error && req->newptr != NULL) { 1540 if (stfrac < 1) { 1541 error = EINVAL; 1542 } else { 1543 cp->ifpc_stfrac = stfrac - 1; 1544 if (cp->ifpc_stcount > cp->ifpc_stfrac) 1545 cp->ifpc_stcount = cp->ifpc_stfrac; 1546 } 1547 } 1548 1549 lwkt_serialize_exit(cp->ifpc_serializer); 1550 return error; 1551 } 1552 1553 static int 1554 sysctl_compat_npoll_cpuid(SYSCTL_HANDLER_ARGS) 1555 { 1556 struct ifpoll_compat *cp = arg1; 1557 int error = 0, cpuid; 1558 1559 lwkt_serialize_enter(cp->ifpc_serializer); 1560 1561 cpuid = cp->ifpc_cpuid; 1562 error = sysctl_handle_int(oidp, &cpuid, 0, req); 1563 if (!error && req->newptr != NULL) { 1564 if (cpuid < 0 || cpuid >= ncpus2) 1565 error = EINVAL; 1566 else 1567 cp->ifpc_cpuid = cpuid; 1568 } 1569 1570 lwkt_serialize_exit(cp->ifpc_serializer); 1571 return error; 1572 } 1573