xref: /dragonfly/sys/net/if_poll.c (revision b58087dc)
1 /*-
2  * Copyright (c) 2001-2002 Luigi Rizzo
3  *
4  * Supported by: the Xorp Project (www.xorp.org)
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $
28  */
29 
30 #include "opt_ifpoll.h"
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/ktr.h>
35 #include <sys/malloc.h>
36 #include <sys/serialize.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 
40 #include <sys/thread2.h>
41 #include <sys/msgport2.h>
42 
43 #include <machine/atomic.h>
44 #include <machine/clock.h>
45 #include <machine/smp.h>
46 
47 #include <net/if.h>
48 #include <net/if_poll.h>
49 #include <net/netmsg2.h>
50 
51 /*
52  * Polling support for network device drivers.
53  *
54  * Drivers which support this feature try to register one status polling
55  * handler and several TX/RX polling handlers with the polling code.
56  * If interface's if_qpoll is called with non-NULL second argument, then
57  * a register operation is requested, else a deregister operation is
58  * requested.  If the requested operation is "register", driver should
59  * setup the ifpoll_info passed in accoding its own needs:
60  *   ifpoll_info.ifpi_status.status_func == NULL
61  *     No status polling handler will be installed on CPU(0)
62  *   ifpoll_info.ifpi_rx[n].poll_func == NULL
63  *     No RX polling handler will be installed on CPU(n)
64  *   ifpoll_info.ifpi_tx[n].poll_func == NULL
65  *     No TX polling handler will be installed on CPU(n)
66  *
67  * RX is polled at the specified polling frequency (net.ifpoll.X.pollhz).
68  * TX and status polling could be done at lower frequency than RX frequency
69  * (net.ifpoll.0.status_frac and net.ifpoll.X.tx_frac).  To avoid systimer
70  * staggering at high frequency, RX systimer gives TX and status polling a
71  * piggyback (XXX).
72  *
73  * All of the registered polling handlers are called only if the interface
74  * is marked as 'IFF_RUNNING and IFF_NPOLLING'.  However, the interface's
75  * register and deregister function (ifnet.if_qpoll) will be called even
76  * if interface is not marked with 'IFF_RUNNING'.
77  *
78  * If registration is successful, the driver must disable interrupts,
79  * and further I/O is performed through the TX/RX polling handler, which
80  * are invoked (at least once per clock tick) with 3 arguments: the "arg"
81  * passed at register time, a struct ifnet pointer, and a "count" limit.
82  * The registered serializer will be held before calling the related
83  * polling handler.
84  *
85  * The count limit specifies how much work the handler can do during the
86  * call -- typically this is the number of packets to be received, or
87  * transmitted, etc. (drivers are free to interpret this number, as long
88  * as the max time spent in the function grows roughly linearly with the
89  * count).
90  *
91  * A second variable controls the sharing of CPU between polling/kernel
92  * network processing, and other activities (typically userlevel tasks):
93  * net.ifpoll.X.{rx,tx}.user_frac (between 0 and 100, default 50) sets the
94  * share of CPU allocated to user tasks.  CPU is allocated proportionally
95  * to the shares, by dynamically adjusting the "count" (poll_burst).
96  *
97  * Other parameters can should be left to their default values.
98  * The following constraints hold
99  *
100  *	1 <= poll_burst <= poll_burst_max
101  *	1 <= poll_each_burst <= poll_burst_max
102  *	MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX
103  */
104 
105 #define IFPOLL_LIST_LEN		128
106 #define IFPOLL_FREQ_MAX		30000
107 
108 #define MIN_IOPOLL_BURST_MAX	10
109 #define MAX_IOPOLL_BURST_MAX	1000
110 #define IOPOLL_BURST_MAX	150	/* good for 100Mbit net and HZ=1000 */
111 
112 #define IOPOLL_EACH_BURST	5
113 
114 #define IFPOLL_FREQ_DEFAULT	2000
115 
116 #define IFPOLL_TXFRAC_DEFAULT	1	/* 1/2 of the pollhz */
117 #define IFPOLL_STFRAC_DEFAULT	19	/* 1/20 of the pollhz */
118 
119 #define IFPOLL_RX		0x1
120 #define IFPOLL_TX		0x2
121 
122 union ifpoll_time {
123 	struct timeval		tv;
124 	uint64_t		tsc;
125 };
126 
127 struct iopoll_rec {
128 	struct lwkt_serialize	*serializer;
129 	struct ifnet		*ifp;
130 	void			*arg;
131 	ifpoll_iofn_t		poll_func;
132 };
133 
134 struct iopoll_ctx {
135 	union ifpoll_time	prev_t;
136 	uint32_t		short_ticks;		/* statistics */
137 	uint32_t		lost_polls;		/* statistics */
138 	uint32_t		suspect;		/* statistics */
139 	uint32_t		stalled;		/* statistics */
140 	uint32_t		pending_polls;		/* state */
141 
142 	struct netmsg		poll_netmsg;
143 
144 	int			poll_cpuid;
145 	int			pollhz;
146 	uint32_t		phase;			/* state */
147 	int			residual_burst;		/* state */
148 	uint32_t		poll_each_burst;	/* tunable */
149 	union ifpoll_time	poll_start_t;		/* state */
150 
151 	uint32_t		poll_handlers; /* next free entry in pr[]. */
152 	struct iopoll_rec	pr[IFPOLL_LIST_LEN];
153 
154 	struct netmsg		poll_more_netmsg;
155 
156 	uint32_t		poll_burst;		/* state */
157 	uint32_t		poll_burst_max;		/* tunable */
158 	uint32_t		user_frac;		/* tunable */
159 	uint32_t		kern_frac;		/* state */
160 
161 	struct sysctl_ctx_list	poll_sysctl_ctx;
162 	struct sysctl_oid	*poll_sysctl_tree;
163 } __cachealign;
164 
165 struct poll_comm {
166 	struct systimer		pollclock;
167 	int			poll_cpuid;
168 
169 	int			stfrac_count;		/* state */
170 	int			poll_stfrac;		/* tunable */
171 
172 	int			txfrac_count;		/* state */
173 	int			poll_txfrac;		/* tunable */
174 
175 	int			pollhz;			/* tunable */
176 
177 	struct sysctl_ctx_list	sysctl_ctx;
178 	struct sysctl_oid	*sysctl_tree;
179 } __cachealign;
180 
181 struct stpoll_rec {
182 	struct lwkt_serialize	*serializer;
183 	struct ifnet		*ifp;
184 	ifpoll_stfn_t		status_func;
185 };
186 
187 struct stpoll_ctx {
188 	struct netmsg		poll_netmsg;
189 
190 	int			pollhz;
191 
192 	uint32_t		poll_handlers; /* next free entry in pr[]. */
193 	struct stpoll_rec	pr[IFPOLL_LIST_LEN];
194 
195 	struct sysctl_ctx_list	poll_sysctl_ctx;
196 	struct sysctl_oid	*poll_sysctl_tree;
197 };
198 
199 struct iopoll_sysctl_netmsg {
200 	struct netmsg		nmsg;
201 	struct iopoll_ctx	*ctx;
202 };
203 
204 void		ifpoll_init_pcpu(int);
205 static void	ifpoll_register_handler(struct netmsg *);
206 static void	ifpoll_deregister_handler(struct netmsg *);
207 
208 /*
209  * Status polling
210  */
211 static void	stpoll_init(void);
212 static void	stpoll_handler(struct netmsg *);
213 static void	stpoll_clock(struct stpoll_ctx *);
214 static int	stpoll_register(struct ifnet *, const struct ifpoll_status *);
215 static int	stpoll_deregister(struct ifnet *);
216 
217 /*
218  * RX/TX polling
219  */
220 static struct iopoll_ctx *iopoll_ctx_create(int, int);
221 static void	iopoll_init(int);
222 static void	iopoll_handler(struct netmsg *);
223 static void	iopollmore_handler(struct netmsg *);
224 static void	iopoll_clock(struct iopoll_ctx *);
225 static int	iopoll_register(struct ifnet *, struct iopoll_ctx *,
226 		    const struct ifpoll_io *);
227 static int	iopoll_deregister(struct ifnet *, struct iopoll_ctx *);
228 
229 static void	iopoll_add_sysctl(struct sysctl_ctx_list *,
230 		    struct sysctl_oid_list *, struct iopoll_ctx *);
231 static void	sysctl_burstmax_handler(struct netmsg *);
232 static int	sysctl_burstmax(SYSCTL_HANDLER_ARGS);
233 static void	sysctl_eachburst_handler(struct netmsg *);
234 static int	sysctl_eachburst(SYSCTL_HANDLER_ARGS);
235 
236 /*
237  * Common functions
238  */
239 static void	poll_comm_init(int);
240 static void	poll_comm_start(int);
241 static void	poll_comm_adjust_pollhz(struct poll_comm *);
242 static void	poll_comm_systimer0(systimer_t, struct intrframe *);
243 static void	poll_comm_systimer(systimer_t, struct intrframe *);
244 static void	sysctl_pollhz_handler(struct netmsg *);
245 static void	sysctl_stfrac_handler(struct netmsg *);
246 static void	sysctl_txfrac_handler(struct netmsg *);
247 static int	sysctl_pollhz(SYSCTL_HANDLER_ARGS);
248 static int	sysctl_stfrac(SYSCTL_HANDLER_ARGS);
249 static int	sysctl_txfrac(SYSCTL_HANDLER_ARGS);
250 
251 static struct stpoll_ctx	stpoll_context;
252 static struct poll_comm		*poll_common[IFPOLL_CTX_MAX];
253 static struct iopoll_ctx	*rxpoll_context[IFPOLL_CTX_MAX];
254 static struct iopoll_ctx	*txpoll_context[IFPOLL_CTX_MAX];
255 
256 SYSCTL_NODE(_net, OID_AUTO, ifpoll, CTLFLAG_RW, 0,
257 	    "Network device polling parameters");
258 
259 static int	ifpoll_ncpus = IFPOLL_CTX_MAX;
260 
261 static int	iopoll_burst_max = IOPOLL_BURST_MAX;
262 static int	iopoll_each_burst = IOPOLL_EACH_BURST;
263 
264 static int	ifpoll_pollhz = IFPOLL_FREQ_DEFAULT;
265 static int	ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT;
266 static int	ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT;
267 
268 TUNABLE_INT("net.ifpoll.burst_max", &iopoll_burst_max);
269 TUNABLE_INT("net.ifpoll.each_burst", &iopoll_each_burst);
270 TUNABLE_INT("net.ifpoll.pollhz", &ifpoll_pollhz);
271 TUNABLE_INT("net.ifpoll.status_frac", &ifpoll_stfrac);
272 TUNABLE_INT("net.ifpoll.tx_frac", &ifpoll_txfrac);
273 
274 static __inline void
275 ifpoll_sendmsg_oncpu(struct netmsg *msg)
276 {
277 	if (msg->nm_lmsg.ms_flags & MSGF_DONE)
278 		ifnet_sendmsg(&msg->nm_lmsg, mycpuid);
279 }
280 
281 static __inline void
282 sched_stpoll(struct stpoll_ctx *st_ctx)
283 {
284 	ifpoll_sendmsg_oncpu(&st_ctx->poll_netmsg);
285 }
286 
287 static __inline void
288 sched_iopoll(struct iopoll_ctx *io_ctx)
289 {
290 	ifpoll_sendmsg_oncpu(&io_ctx->poll_netmsg);
291 }
292 
293 static __inline void
294 sched_iopollmore(struct iopoll_ctx *io_ctx)
295 {
296 	ifpoll_sendmsg_oncpu(&io_ctx->poll_more_netmsg);
297 }
298 
299 static __inline void
300 ifpoll_time_get(union ifpoll_time *t)
301 {
302 	if (tsc_present)
303 		t->tsc = rdtsc();
304 	else
305 		microuptime(&t->tv);
306 }
307 
308 /* Return time diff in us */
309 static __inline int
310 ifpoll_time_diff(const union ifpoll_time *s, const union ifpoll_time *e)
311 {
312 	if (tsc_present) {
313 		return (((e->tsc - s->tsc) * 1000000) / tsc_frequency);
314 	} else {
315 		return ((e->tv.tv_usec - s->tv.tv_usec) +
316 			(e->tv.tv_sec - s->tv.tv_sec) * 1000000);
317 	}
318 }
319 
320 /*
321  * Initialize per-cpu qpolling(4) context.  Called from kern_clock.c:
322  */
323 void
324 ifpoll_init_pcpu(int cpuid)
325 {
326 	if (cpuid >= IFPOLL_CTX_MAX)
327 		return;
328 
329 	if (cpuid == 0) {
330 		if (ifpoll_ncpus > ncpus)
331 			ifpoll_ncpus = ncpus;
332 		if (bootverbose)
333 			kprintf("ifpoll_ncpus %d\n", ifpoll_ncpus);
334 	}
335 
336 	poll_comm_init(cpuid);
337 
338 	if (cpuid == 0)
339 		stpoll_init();
340 	iopoll_init(cpuid);
341 
342 	poll_comm_start(cpuid);
343 }
344 
345 int
346 ifpoll_register(struct ifnet *ifp)
347 {
348 	struct ifpoll_info info;
349 	struct netmsg nmsg;
350 	int error;
351 
352 	if (ifp->if_qpoll == NULL) {
353 		/* Device does not support polling */
354 		return EOPNOTSUPP;
355 	}
356 
357 	/*
358 	 * Attempt to register.  Interlock with IFF_NPOLLING.
359 	 */
360 
361 	ifnet_serialize_all(ifp);
362 
363 	if (ifp->if_flags & IFF_NPOLLING) {
364 		/* Already polling */
365 		ifnet_deserialize_all(ifp);
366 		return EBUSY;
367 	}
368 
369 	bzero(&info, sizeof(info));
370 	info.ifpi_ifp = ifp;
371 
372 	ifp->if_flags |= IFF_NPOLLING;
373 	ifp->if_qpoll(ifp, &info);
374 
375 	ifnet_deserialize_all(ifp);
376 
377 	netmsg_init(&nmsg, NULL, &curthread->td_msgport,
378 		    0, ifpoll_register_handler);
379 	nmsg.nm_lmsg.u.ms_resultp = &info;
380 
381 	error = ifnet_domsg(&nmsg.nm_lmsg, 0);
382 	if (error) {
383 		if (!ifpoll_deregister(ifp)) {
384 			if_printf(ifp, "ifpoll_register: "
385 				  "ifpoll_deregister failed!\n");
386 		}
387 	}
388 	return error;
389 }
390 
391 int
392 ifpoll_deregister(struct ifnet *ifp)
393 {
394 	struct netmsg nmsg;
395 	int error;
396 
397 	if (ifp->if_qpoll == NULL)
398 		return EOPNOTSUPP;
399 
400 	ifnet_serialize_all(ifp);
401 
402 	if ((ifp->if_flags & IFF_NPOLLING) == 0) {
403 		ifnet_deserialize_all(ifp);
404 		return EINVAL;
405 	}
406 	ifp->if_flags &= ~IFF_NPOLLING;
407 
408 	ifnet_deserialize_all(ifp);
409 
410 	netmsg_init(&nmsg, NULL, &curthread->td_msgport,
411 		    0, ifpoll_deregister_handler);
412 	nmsg.nm_lmsg.u.ms_resultp = ifp;
413 
414 	error = ifnet_domsg(&nmsg.nm_lmsg, 0);
415 	if (!error) {
416 		ifnet_serialize_all(ifp);
417 		ifp->if_qpoll(ifp, NULL);
418 		ifnet_deserialize_all(ifp);
419 	}
420 	return error;
421 }
422 
423 static void
424 ifpoll_register_handler(struct netmsg *nmsg)
425 {
426 	const struct ifpoll_info *info = nmsg->nm_lmsg.u.ms_resultp;
427 	int cpuid = mycpuid, nextcpu;
428 	int error;
429 
430 	KKASSERT(cpuid < ifpoll_ncpus);
431 	KKASSERT(&curthread->td_msgport == ifnet_portfn(cpuid));
432 
433 	if (cpuid == 0) {
434 		error = stpoll_register(info->ifpi_ifp, &info->ifpi_status);
435 		if (error)
436 			goto failed;
437 	}
438 
439 	error = iopoll_register(info->ifpi_ifp, rxpoll_context[cpuid],
440 				&info->ifpi_rx[cpuid]);
441 	if (error)
442 		goto failed;
443 
444 	error = iopoll_register(info->ifpi_ifp, txpoll_context[cpuid],
445 				&info->ifpi_tx[cpuid]);
446 	if (error)
447 		goto failed;
448 
449 	/* Adjust polling frequency, after all registration is done */
450 	poll_comm_adjust_pollhz(poll_common[cpuid]);
451 
452 	nextcpu = cpuid + 1;
453 	if (nextcpu < ifpoll_ncpus)
454 		ifnet_forwardmsg(&nmsg->nm_lmsg, nextcpu);
455 	else
456 		lwkt_replymsg(&nmsg->nm_lmsg, 0);
457 	return;
458 failed:
459 	lwkt_replymsg(&nmsg->nm_lmsg, error);
460 }
461 
462 static void
463 ifpoll_deregister_handler(struct netmsg *nmsg)
464 {
465 	struct ifnet *ifp = nmsg->nm_lmsg.u.ms_resultp;
466 	int cpuid = mycpuid, nextcpu;
467 
468 	KKASSERT(cpuid < ifpoll_ncpus);
469 	KKASSERT(&curthread->td_msgport == ifnet_portfn(cpuid));
470 
471 	/* Ignore errors */
472 	if (cpuid == 0)
473 		stpoll_deregister(ifp);
474 	iopoll_deregister(ifp, rxpoll_context[cpuid]);
475 	iopoll_deregister(ifp, txpoll_context[cpuid]);
476 
477 	/* Adjust polling frequency, after all deregistration is done */
478 	poll_comm_adjust_pollhz(poll_common[cpuid]);
479 
480 	nextcpu = cpuid + 1;
481 	if (nextcpu < ifpoll_ncpus)
482 		ifnet_forwardmsg(&nmsg->nm_lmsg, nextcpu);
483 	else
484 		lwkt_replymsg(&nmsg->nm_lmsg, 0);
485 }
486 
487 static void
488 stpoll_init(void)
489 {
490 	struct stpoll_ctx *st_ctx = &stpoll_context;
491 	const struct poll_comm *comm = poll_common[0];
492 
493 	st_ctx->pollhz = comm->pollhz / (comm->poll_stfrac + 1);
494 
495 	sysctl_ctx_init(&st_ctx->poll_sysctl_ctx);
496 	st_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&st_ctx->poll_sysctl_ctx,
497 				   SYSCTL_CHILDREN(comm->sysctl_tree),
498 				   OID_AUTO, "status", CTLFLAG_RD, 0, "");
499 
500 	SYSCTL_ADD_UINT(&st_ctx->poll_sysctl_ctx,
501 			SYSCTL_CHILDREN(st_ctx->poll_sysctl_tree),
502 			OID_AUTO, "handlers", CTLFLAG_RD,
503 			&st_ctx->poll_handlers, 0,
504 			"Number of registered status poll handlers");
505 
506 	netmsg_init(&st_ctx->poll_netmsg, NULL, &netisr_adone_rport,
507 		    0, stpoll_handler);
508 }
509 
510 /*
511  * stpoll_handler is scheduled by sched_stpoll when appropriate, typically
512  * once per polling systimer tick.
513  */
514 static void
515 stpoll_handler(struct netmsg *msg)
516 {
517 	struct stpoll_ctx *st_ctx = &stpoll_context;
518 	struct thread *td = curthread;
519 	int i;
520 
521 	KKASSERT(&td->td_msgport == ifnet_portfn(0));
522 
523 	crit_enter_quick(td);
524 
525 	/* Reply ASAP */
526 	lwkt_replymsg(&msg->nm_lmsg, 0);
527 
528 	if (st_ctx->poll_handlers == 0) {
529 		crit_exit_quick(td);
530 		return;
531 	}
532 
533 	for (i = 0; i < st_ctx->poll_handlers; ++i) {
534 		const struct stpoll_rec *rec = &st_ctx->pr[i];
535 		struct ifnet *ifp = rec->ifp;
536 
537 		if (!lwkt_serialize_try(rec->serializer))
538 			continue;
539 
540 		if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) ==
541 		    (IFF_RUNNING | IFF_NPOLLING))
542 			rec->status_func(ifp, st_ctx->pollhz);
543 
544 		lwkt_serialize_exit(rec->serializer);
545 	}
546 
547 	crit_exit_quick(td);
548 }
549 
550 /*
551  * Hook from status poll systimer.  Tries to schedule an status poll.
552  * NOTE: Caller should hold critical section.
553  */
554 static void
555 stpoll_clock(struct stpoll_ctx *st_ctx)
556 {
557 	KKASSERT(mycpuid == 0);
558 
559 	if (st_ctx->poll_handlers == 0)
560 		return;
561 	sched_stpoll(st_ctx);
562 }
563 
564 static int
565 stpoll_register(struct ifnet *ifp, const struct ifpoll_status *st_rec)
566 {
567 	struct stpoll_ctx *st_ctx = &stpoll_context;
568 	int error;
569 
570 	KKASSERT(&curthread->td_msgport == ifnet_portfn(0));
571 
572 	if (st_rec->status_func == NULL)
573 		return 0;
574 
575 	/*
576 	 * Check if there is room.
577 	 */
578 	if (st_ctx->poll_handlers >= IFPOLL_LIST_LEN) {
579 		/*
580 		 * List full, cannot register more entries.
581 		 * This should never happen; if it does, it is probably a
582 		 * broken driver trying to register multiple times. Checking
583 		 * this at runtime is expensive, and won't solve the problem
584 		 * anyways, so just report a few times and then give up.
585 		 */
586 		static int verbose = 10; /* XXX */
587 
588 		if (verbose > 0) {
589 			kprintf("status poll handlers list full, "
590 				"maybe a broken driver ?\n");
591 			verbose--;
592 		}
593 		error = ENOENT;
594 	} else {
595 		struct stpoll_rec *rec = &st_ctx->pr[st_ctx->poll_handlers];
596 
597 		rec->ifp = ifp;
598 		rec->serializer = st_rec->serializer;
599 		rec->status_func = st_rec->status_func;
600 
601 		st_ctx->poll_handlers++;
602 		error = 0;
603 	}
604 	return error;
605 }
606 
607 static int
608 stpoll_deregister(struct ifnet *ifp)
609 {
610 	struct stpoll_ctx *st_ctx = &stpoll_context;
611 	int i, error;
612 
613 	KKASSERT(&curthread->td_msgport == ifnet_portfn(0));
614 
615 	for (i = 0; i < st_ctx->poll_handlers; ++i) {
616 		if (st_ctx->pr[i].ifp == ifp) /* Found it */
617 			break;
618 	}
619 	if (i == st_ctx->poll_handlers) {
620 		kprintf("stpoll_deregister: ifp not found!!!\n");
621 		error = ENOENT;
622 	} else {
623 		st_ctx->poll_handlers--;
624 		if (i < st_ctx->poll_handlers) {
625 			/* Last entry replaces this one. */
626 			st_ctx->pr[i] = st_ctx->pr[st_ctx->poll_handlers];
627 		}
628 		error = 0;
629 	}
630 	return error;
631 }
632 
633 static __inline void
634 iopoll_reset_state(struct iopoll_ctx *io_ctx)
635 {
636 	crit_enter();
637 	io_ctx->poll_burst = 5;
638 	io_ctx->pending_polls = 0;
639 	io_ctx->residual_burst = 0;
640 	io_ctx->phase = 0;
641 	io_ctx->kern_frac = 0;
642 	bzero(&io_ctx->poll_start_t, sizeof(io_ctx->poll_start_t));
643 	bzero(&io_ctx->prev_t, sizeof(io_ctx->prev_t));
644 	crit_exit();
645 }
646 
647 static void
648 iopoll_init(int cpuid)
649 {
650 	KKASSERT(cpuid < IFPOLL_CTX_MAX);
651 
652 	rxpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_RX);
653 	txpoll_context[cpuid] = iopoll_ctx_create(cpuid, IFPOLL_TX);
654 }
655 
656 static struct iopoll_ctx *
657 iopoll_ctx_create(int cpuid, int poll_type)
658 {
659 	struct poll_comm *comm;
660 	struct iopoll_ctx *io_ctx;
661 	const char *poll_type_str;
662 
663 	KKASSERT(poll_type == IFPOLL_RX || poll_type == IFPOLL_TX);
664 
665 	/*
666 	 * Make sure that tunables are in sane state
667 	 */
668 	if (iopoll_burst_max < MIN_IOPOLL_BURST_MAX)
669 		iopoll_burst_max = MIN_IOPOLL_BURST_MAX;
670 	else if (iopoll_burst_max > MAX_IOPOLL_BURST_MAX)
671 		iopoll_burst_max = MAX_IOPOLL_BURST_MAX;
672 
673 	if (iopoll_each_burst > iopoll_burst_max)
674 		iopoll_each_burst = iopoll_burst_max;
675 
676 	comm = poll_common[cpuid];
677 
678 	/*
679 	 * Create the per-cpu polling context
680 	 */
681 	io_ctx = kmalloc(sizeof(*io_ctx), M_DEVBUF, M_WAITOK | M_ZERO);
682 
683 	io_ctx->poll_each_burst = iopoll_each_burst;
684 	io_ctx->poll_burst_max = iopoll_burst_max;
685 	io_ctx->user_frac = 50;
686 	if (poll_type == IFPOLL_RX)
687 		io_ctx->pollhz = comm->pollhz;
688 	else
689 		io_ctx->pollhz = comm->pollhz / (comm->poll_txfrac + 1);
690 	io_ctx->poll_cpuid = cpuid;
691 	iopoll_reset_state(io_ctx);
692 
693 	netmsg_init(&io_ctx->poll_netmsg, NULL, &netisr_adone_rport,
694 		    0, iopoll_handler);
695 	io_ctx->poll_netmsg.nm_lmsg.u.ms_resultp = io_ctx;
696 
697 	netmsg_init(&io_ctx->poll_more_netmsg, NULL, &netisr_adone_rport,
698 		    0, iopollmore_handler);
699 	io_ctx->poll_more_netmsg.nm_lmsg.u.ms_resultp = io_ctx;
700 
701 	/*
702 	 * Initialize per-cpu sysctl nodes
703 	 */
704 	if (poll_type == IFPOLL_RX)
705 		poll_type_str = "rx";
706 	else
707 		poll_type_str = "tx";
708 
709 	sysctl_ctx_init(&io_ctx->poll_sysctl_ctx);
710 	io_ctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&io_ctx->poll_sysctl_ctx,
711 				   SYSCTL_CHILDREN(comm->sysctl_tree),
712 				   OID_AUTO, poll_type_str, CTLFLAG_RD, 0, "");
713 	iopoll_add_sysctl(&io_ctx->poll_sysctl_ctx,
714 			  SYSCTL_CHILDREN(io_ctx->poll_sysctl_tree), io_ctx);
715 
716 	return io_ctx;
717 }
718 
719 /*
720  * Hook from iopoll systimer.  Tries to schedule an iopoll, but keeps
721  * track of lost ticks due to the previous handler taking too long.
722  * Normally, this should not happen, because polling handler should
723  * run for a short time.  However, in some cases (e.g. when there are
724  * changes in link status etc.) the drivers take a very long time
725  * (even in the order of milliseconds) to reset and reconfigure the
726  * device, causing apparent lost polls.
727  *
728  * The first part of the code is just for debugging purposes, and tries
729  * to count how often hardclock ticks are shorter than they should,
730  * meaning either stray interrupts or delayed events.
731  *
732  * WARNING! called from fastint or IPI, the MP lock might not be held.
733  * NOTE: Caller should hold critical section.
734  */
735 static void
736 iopoll_clock(struct iopoll_ctx *io_ctx)
737 {
738 	union ifpoll_time t;
739 	int delta;
740 
741 	KKASSERT(mycpuid == io_ctx->poll_cpuid);
742 
743 	if (io_ctx->poll_handlers == 0)
744 		return;
745 
746 	ifpoll_time_get(&t);
747 	delta = ifpoll_time_diff(&io_ctx->prev_t, &t);
748 	if (delta * io_ctx->pollhz < 500000)
749 		io_ctx->short_ticks++;
750 	else
751 		io_ctx->prev_t = t;
752 
753 	if (io_ctx->pending_polls > 100) {
754 		/*
755 		 * Too much, assume it has stalled (not always true
756 		 * see comment above).
757 		 */
758 		io_ctx->stalled++;
759 		io_ctx->pending_polls = 0;
760 		io_ctx->phase = 0;
761 	}
762 
763 	if (io_ctx->phase <= 2) {
764 		if (io_ctx->phase != 0)
765 			io_ctx->suspect++;
766 		io_ctx->phase = 1;
767 		sched_iopoll(io_ctx);
768 		io_ctx->phase = 2;
769 	}
770 	if (io_ctx->pending_polls++ > 0)
771 		io_ctx->lost_polls++;
772 }
773 
774 /*
775  * iopoll_handler is scheduled by sched_iopoll when appropriate, typically
776  * once per polling systimer tick.
777  *
778  * Note that the message is replied immediately in order to allow a new
779  * ISR to be scheduled in the handler.
780  */
781 static void
782 iopoll_handler(struct netmsg *msg)
783 {
784 	struct iopoll_ctx *io_ctx;
785 	struct thread *td = curthread;
786 	int i, cycles;
787 
788 	io_ctx = msg->nm_lmsg.u.ms_resultp;
789 	KKASSERT(&td->td_msgport == ifnet_portfn(io_ctx->poll_cpuid));
790 
791 	crit_enter_quick(td);
792 
793 	/* Reply ASAP */
794 	lwkt_replymsg(&msg->nm_lmsg, 0);
795 
796 	if (io_ctx->poll_handlers == 0) {
797 		crit_exit_quick(td);
798 		return;
799 	}
800 
801 	io_ctx->phase = 3;
802 	if (io_ctx->residual_burst == 0) {
803 		/* First call in this tick */
804 		ifpoll_time_get(&io_ctx->poll_start_t);
805 		io_ctx->residual_burst = io_ctx->poll_burst;
806 	}
807 	cycles = (io_ctx->residual_burst < io_ctx->poll_each_burst) ?
808 		 io_ctx->residual_burst : io_ctx->poll_each_burst;
809 	io_ctx->residual_burst -= cycles;
810 
811 	for (i = 0; i < io_ctx->poll_handlers; i++) {
812 		const struct iopoll_rec *rec = &io_ctx->pr[i];
813 		struct ifnet *ifp = rec->ifp;
814 
815 		if (!lwkt_serialize_try(rec->serializer))
816 			continue;
817 
818 		if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) ==
819 		    (IFF_RUNNING | IFF_NPOLLING))
820 			rec->poll_func(ifp, rec->arg, cycles);
821 
822 		lwkt_serialize_exit(rec->serializer);
823 	}
824 
825 	/*
826 	 * Do a quick exit/enter to catch any higher-priority
827 	 * interrupt sources.
828 	 */
829 	crit_exit_quick(td);
830 	crit_enter_quick(td);
831 
832 	sched_iopollmore(io_ctx);
833 	io_ctx->phase = 4;
834 
835 	crit_exit_quick(td);
836 }
837 
838 /*
839  * iopollmore_handler is called after other netisr's, possibly scheduling
840  * another iopoll_handler call, or adapting the burst size for the next cycle.
841  *
842  * It is very bad to fetch large bursts of packets from a single card at once,
843  * because the burst could take a long time to be completely processed leading
844  * to unfairness.  To reduce the problem, and also to account better for time
845  * spent in network-related processing, we split the burst in smaller chunks
846  * of fixed size, giving control to the other netisr's between chunks.  This
847  * helps in improving the fairness, reducing livelock and accounting for the
848  * work performed in low level handling.
849  */
850 static void
851 iopollmore_handler(struct netmsg *msg)
852 {
853 	struct thread *td = curthread;
854 	struct iopoll_ctx *io_ctx;
855 	union ifpoll_time t;
856 	int kern_load;
857 	uint32_t pending_polls;
858 
859 	io_ctx = msg->nm_lmsg.u.ms_resultp;
860 	KKASSERT(&td->td_msgport == ifnet_portfn(io_ctx->poll_cpuid));
861 
862 	crit_enter_quick(td);
863 
864 	/* Replay ASAP */
865 	lwkt_replymsg(&msg->nm_lmsg, 0);
866 
867 	if (io_ctx->poll_handlers == 0) {
868 		crit_exit_quick(td);
869 		return;
870 	}
871 
872 	io_ctx->phase = 5;
873 	if (io_ctx->residual_burst > 0) {
874 		sched_iopoll(io_ctx);
875 		crit_exit_quick(td);
876 		/* Will run immediately on return, followed by netisrs */
877 		return;
878 	}
879 
880 	/* Here we can account time spent in iopoll's in this tick */
881 	ifpoll_time_get(&t);
882 	kern_load = ifpoll_time_diff(&io_ctx->poll_start_t, &t);
883 	kern_load = (kern_load * io_ctx->pollhz) / 10000; /* 0..100 */
884 	io_ctx->kern_frac = kern_load;
885 
886 	if (kern_load > (100 - io_ctx->user_frac)) {
887 		/* Try decrease ticks */
888 		if (io_ctx->poll_burst > 1)
889 			io_ctx->poll_burst--;
890 	} else {
891 		if (io_ctx->poll_burst < io_ctx->poll_burst_max)
892 			io_ctx->poll_burst++;
893 	}
894 
895 	io_ctx->pending_polls--;
896 	pending_polls = io_ctx->pending_polls;
897 
898 	if (pending_polls == 0) {
899 		/* We are done */
900 		io_ctx->phase = 0;
901 	} else {
902 		/*
903 		 * Last cycle was long and caused us to miss one or more
904 		 * hardclock ticks.  Restart processing again, but slightly
905 		 * reduce the burst size to prevent that this happens again.
906 		 */
907 		io_ctx->poll_burst -= (io_ctx->poll_burst / 8);
908 		if (io_ctx->poll_burst < 1)
909 			io_ctx->poll_burst = 1;
910 		sched_iopoll(io_ctx);
911 		io_ctx->phase = 6;
912 	}
913 
914 	crit_exit_quick(td);
915 }
916 
917 static void
918 iopoll_add_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent,
919 		  struct iopoll_ctx *io_ctx)
920 {
921 	SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "burst_max",
922 			CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_burstmax,
923 			"IU", "Max Polling burst size");
924 
925 	SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "each_burst",
926 			CTLTYPE_UINT | CTLFLAG_RW, io_ctx, 0, sysctl_eachburst,
927 			"IU", "Max size of each burst");
928 
929 	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "phase", CTLFLAG_RD,
930 			&io_ctx->phase, 0, "Polling phase");
931 
932 	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "suspect", CTLFLAG_RW,
933 			&io_ctx->suspect, 0, "suspect event");
934 
935 	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "stalled", CTLFLAG_RW,
936 			&io_ctx->stalled, 0, "potential stalls");
937 
938 	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "burst", CTLFLAG_RD,
939 			&io_ctx->poll_burst, 0, "Current polling burst size");
940 
941 	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "user_frac", CTLFLAG_RW,
942 			&io_ctx->user_frac, 0,
943 			"Desired user fraction of cpu time");
944 
945 	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "kern_frac", CTLFLAG_RD,
946 			&io_ctx->kern_frac, 0,
947 			"Kernel fraction of cpu time");
948 
949 	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "short_ticks", CTLFLAG_RW,
950 			&io_ctx->short_ticks, 0,
951 			"Hardclock ticks shorter than they should be");
952 
953 	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "lost_polls", CTLFLAG_RW,
954 			&io_ctx->lost_polls, 0,
955 			"How many times we would have lost a poll tick");
956 
957 	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "pending_polls", CTLFLAG_RD,
958 			&io_ctx->pending_polls, 0, "Do we need to poll again");
959 
960 	SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "residual_burst", CTLFLAG_RD,
961 		       &io_ctx->residual_burst, 0,
962 		       "# of residual cycles in burst");
963 
964 	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "handlers", CTLFLAG_RD,
965 			&io_ctx->poll_handlers, 0,
966 			"Number of registered poll handlers");
967 }
968 
969 static void
970 sysctl_burstmax_handler(struct netmsg *nmsg)
971 {
972 	struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg;
973 	struct iopoll_ctx *io_ctx;
974 
975 	io_ctx = msg->ctx;
976 	KKASSERT(&curthread->td_msgport == ifnet_portfn(io_ctx->poll_cpuid));
977 
978 	io_ctx->poll_burst_max = nmsg->nm_lmsg.u.ms_result;
979 	if (io_ctx->poll_each_burst > io_ctx->poll_burst_max)
980 		io_ctx->poll_each_burst = io_ctx->poll_burst_max;
981 	if (io_ctx->poll_burst > io_ctx->poll_burst_max)
982 		io_ctx->poll_burst = io_ctx->poll_burst_max;
983 	if (io_ctx->residual_burst > io_ctx->poll_burst_max)
984 		io_ctx->residual_burst = io_ctx->poll_burst_max;
985 
986 	lwkt_replymsg(&nmsg->nm_lmsg, 0);
987 }
988 
989 static int
990 sysctl_burstmax(SYSCTL_HANDLER_ARGS)
991 {
992 	struct iopoll_ctx *io_ctx = arg1;
993 	struct iopoll_sysctl_netmsg msg;
994 	struct netmsg *nmsg;
995 	uint32_t burst_max;
996 	int error;
997 
998 	burst_max = io_ctx->poll_burst_max;
999 	error = sysctl_handle_int(oidp, &burst_max, 0, req);
1000 	if (error || req->newptr == NULL)
1001 		return error;
1002 	if (burst_max < MIN_IOPOLL_BURST_MAX)
1003 		burst_max = MIN_IOPOLL_BURST_MAX;
1004 	else if (burst_max > MAX_IOPOLL_BURST_MAX)
1005 		burst_max = MAX_IOPOLL_BURST_MAX;
1006 
1007 	nmsg = &msg.nmsg;
1008 	netmsg_init(nmsg, NULL, &curthread->td_msgport,
1009 		    0, sysctl_burstmax_handler);
1010 	nmsg->nm_lmsg.u.ms_result = burst_max;
1011 	msg.ctx = io_ctx;
1012 
1013 	return ifnet_domsg(&nmsg->nm_lmsg, io_ctx->poll_cpuid);
1014 }
1015 
1016 static void
1017 sysctl_eachburst_handler(struct netmsg *nmsg)
1018 {
1019 	struct iopoll_sysctl_netmsg *msg = (struct iopoll_sysctl_netmsg *)nmsg;
1020 	struct iopoll_ctx *io_ctx;
1021 	uint32_t each_burst;
1022 
1023 	io_ctx = msg->ctx;
1024 	KKASSERT(&curthread->td_msgport == ifnet_portfn(io_ctx->poll_cpuid));
1025 
1026 	each_burst = nmsg->nm_lmsg.u.ms_result;
1027 	if (each_burst > io_ctx->poll_burst_max)
1028 		each_burst = io_ctx->poll_burst_max;
1029 	else if (each_burst < 1)
1030 		each_burst = 1;
1031 	io_ctx->poll_each_burst = each_burst;
1032 
1033 	lwkt_replymsg(&nmsg->nm_lmsg, 0);
1034 }
1035 
1036 static int
1037 sysctl_eachburst(SYSCTL_HANDLER_ARGS)
1038 {
1039 	struct iopoll_ctx *io_ctx = arg1;
1040 	struct iopoll_sysctl_netmsg msg;
1041 	struct netmsg *nmsg;
1042 	uint32_t each_burst;
1043 	int error;
1044 
1045 	each_burst = io_ctx->poll_each_burst;
1046 	error = sysctl_handle_int(oidp, &each_burst, 0, req);
1047 	if (error || req->newptr == NULL)
1048 		return error;
1049 
1050 	nmsg = &msg.nmsg;
1051 	netmsg_init(nmsg, NULL, &curthread->td_msgport,
1052 		    0, sysctl_eachburst_handler);
1053 	nmsg->nm_lmsg.u.ms_result = each_burst;
1054 	msg.ctx = io_ctx;
1055 
1056 	return ifnet_domsg(&nmsg->nm_lmsg, io_ctx->poll_cpuid);
1057 }
1058 
1059 static int
1060 iopoll_register(struct ifnet *ifp, struct iopoll_ctx *io_ctx,
1061 		const struct ifpoll_io *io_rec)
1062 {
1063 	int error;
1064 
1065 	KKASSERT(&curthread->td_msgport == ifnet_portfn(io_ctx->poll_cpuid));
1066 
1067 	if (io_rec->poll_func == NULL)
1068 		return 0;
1069 
1070 	/*
1071 	 * Check if there is room.
1072 	 */
1073 	if (io_ctx->poll_handlers >= IFPOLL_LIST_LEN) {
1074 		/*
1075 		 * List full, cannot register more entries.
1076 		 * This should never happen; if it does, it is probably a
1077 		 * broken driver trying to register multiple times. Checking
1078 		 * this at runtime is expensive, and won't solve the problem
1079 		 * anyways, so just report a few times and then give up.
1080 		 */
1081 		static int verbose = 10; /* XXX */
1082 		if (verbose > 0) {
1083 			kprintf("io poll handlers list full, "
1084 				"maybe a broken driver ?\n");
1085 			verbose--;
1086 		}
1087 		error = ENOENT;
1088 	} else {
1089 		struct iopoll_rec *rec = &io_ctx->pr[io_ctx->poll_handlers];
1090 
1091 		rec->ifp = ifp;
1092 		rec->serializer = io_rec->serializer;
1093 		rec->arg = io_rec->arg;
1094 		rec->poll_func = io_rec->poll_func;
1095 
1096 		io_ctx->poll_handlers++;
1097 		error = 0;
1098 	}
1099 	return error;
1100 }
1101 
1102 static int
1103 iopoll_deregister(struct ifnet *ifp, struct iopoll_ctx *io_ctx)
1104 {
1105 	int i, error;
1106 
1107 	KKASSERT(&curthread->td_msgport == ifnet_portfn(io_ctx->poll_cpuid));
1108 
1109 	for (i = 0; i < io_ctx->poll_handlers; ++i) {
1110 		if (io_ctx->pr[i].ifp == ifp) /* Found it */
1111 			break;
1112 	}
1113 	if (i == io_ctx->poll_handlers) {
1114 		error = ENOENT;
1115 	} else {
1116 		io_ctx->poll_handlers--;
1117 		if (i < io_ctx->poll_handlers) {
1118 			/* Last entry replaces this one. */
1119 			io_ctx->pr[i] = io_ctx->pr[io_ctx->poll_handlers];
1120 		}
1121 
1122 		if (io_ctx->poll_handlers == 0)
1123 			iopoll_reset_state(io_ctx);
1124 		error = 0;
1125 	}
1126 	return error;
1127 }
1128 
1129 static void
1130 poll_comm_init(int cpuid)
1131 {
1132 	struct poll_comm *comm;
1133 	char cpuid_str[16];
1134 
1135 	comm = kmalloc(sizeof(*comm), M_DEVBUF, M_WAITOK | M_ZERO);
1136 
1137 	if (ifpoll_stfrac < 0)
1138 		ifpoll_stfrac = IFPOLL_STFRAC_DEFAULT;
1139 	if (ifpoll_txfrac < 0)
1140 		ifpoll_txfrac = IFPOLL_TXFRAC_DEFAULT;
1141 
1142 	comm->pollhz = ifpoll_pollhz;
1143 	comm->poll_cpuid = cpuid;
1144 	comm->poll_stfrac = ifpoll_stfrac;
1145 	comm->poll_txfrac = ifpoll_txfrac;
1146 
1147 	ksnprintf(cpuid_str, sizeof(cpuid_str), "%d", cpuid);
1148 
1149 	sysctl_ctx_init(&comm->sysctl_ctx);
1150 	comm->sysctl_tree = SYSCTL_ADD_NODE(&comm->sysctl_ctx,
1151 			    SYSCTL_STATIC_CHILDREN(_net_ifpoll),
1152 			    OID_AUTO, cpuid_str, CTLFLAG_RD, 0, "");
1153 
1154 	SYSCTL_ADD_PROC(&comm->sysctl_ctx, SYSCTL_CHILDREN(comm->sysctl_tree),
1155 			OID_AUTO, "pollhz", CTLTYPE_INT | CTLFLAG_RW,
1156 			comm, 0, sysctl_pollhz,
1157 			"I", "Device polling frequency");
1158 
1159 	if (cpuid == 0) {
1160 		SYSCTL_ADD_PROC(&comm->sysctl_ctx,
1161 				SYSCTL_CHILDREN(comm->sysctl_tree),
1162 				OID_AUTO, "status_frac",
1163 				CTLTYPE_INT | CTLFLAG_RW,
1164 				comm, 0, sysctl_stfrac,
1165 				"I", "# of cycles before status is polled");
1166 	}
1167 	SYSCTL_ADD_PROC(&comm->sysctl_ctx, SYSCTL_CHILDREN(comm->sysctl_tree),
1168 			OID_AUTO, "tx_frac", CTLTYPE_INT | CTLFLAG_RW,
1169 			comm, 0, sysctl_txfrac,
1170 			"I", "# of cycles before TX is polled");
1171 
1172 	poll_common[cpuid] = comm;
1173 }
1174 
1175 static void
1176 poll_comm_start(int cpuid)
1177 {
1178 	struct poll_comm *comm = poll_common[cpuid];
1179 	void (*func)(systimer_t, struct intrframe *);
1180 
1181 	/*
1182 	 * Initialize systimer
1183 	 */
1184 	if (cpuid == 0)
1185 		func = poll_comm_systimer0;
1186 	else
1187 		func = poll_comm_systimer;
1188 	systimer_init_periodic_nq(&comm->pollclock, func, comm, 1);
1189 }
1190 
1191 static void
1192 _poll_comm_systimer(struct poll_comm *comm)
1193 {
1194 	if (comm->txfrac_count-- == 0) {
1195 		comm->txfrac_count = comm->poll_txfrac;
1196 		iopoll_clock(txpoll_context[comm->poll_cpuid]);
1197 	}
1198 	iopoll_clock(rxpoll_context[comm->poll_cpuid]);
1199 }
1200 
1201 static void
1202 poll_comm_systimer0(systimer_t info, struct intrframe *frame __unused)
1203 {
1204 	struct poll_comm *comm = info->data;
1205 	globaldata_t gd = mycpu;
1206 
1207 	KKASSERT(comm->poll_cpuid == gd->gd_cpuid && gd->gd_cpuid == 0);
1208 
1209 	crit_enter_gd(gd);
1210 
1211 	if (comm->stfrac_count-- == 0) {
1212 		comm->stfrac_count = comm->poll_stfrac;
1213 		stpoll_clock(&stpoll_context);
1214 	}
1215 	_poll_comm_systimer(comm);
1216 
1217 	crit_exit_gd(gd);
1218 }
1219 
1220 static void
1221 poll_comm_systimer(systimer_t info, struct intrframe *frame __unused)
1222 {
1223 	struct poll_comm *comm = info->data;
1224 	globaldata_t gd = mycpu;
1225 
1226 	KKASSERT(comm->poll_cpuid == gd->gd_cpuid && gd->gd_cpuid != 0);
1227 
1228 	crit_enter_gd(gd);
1229 	_poll_comm_systimer(comm);
1230 	crit_exit_gd(gd);
1231 }
1232 
1233 static void
1234 poll_comm_adjust_pollhz(struct poll_comm *comm)
1235 {
1236 	uint32_t handlers;
1237 	int pollhz = 1;
1238 
1239 	KKASSERT(&curthread->td_msgport == ifnet_portfn(comm->poll_cpuid));
1240 
1241 	/*
1242 	 * If there is no polling handler registered, set systimer
1243 	 * frequency to the lowest value.  Polling systimer frequency
1244 	 * will be adjusted to the requested value, once there are
1245 	 * registered handlers.
1246 	 */
1247 	handlers = rxpoll_context[mycpuid]->poll_handlers +
1248 		   txpoll_context[mycpuid]->poll_handlers;
1249 	if (comm->poll_cpuid == 0)
1250 		handlers += stpoll_context.poll_handlers;
1251 	if (handlers)
1252 		pollhz = comm->pollhz;
1253 	systimer_adjust_periodic(&comm->pollclock, pollhz);
1254 }
1255 
1256 static int
1257 sysctl_pollhz(SYSCTL_HANDLER_ARGS)
1258 {
1259 	struct poll_comm *comm = arg1;
1260 	struct netmsg nmsg;
1261 	int error, phz;
1262 
1263 	phz = comm->pollhz;
1264 	error = sysctl_handle_int(oidp, &phz, 0, req);
1265 	if (error || req->newptr == NULL)
1266 		return error;
1267 	if (phz <= 0)
1268 		return EINVAL;
1269 	else if (phz > IFPOLL_FREQ_MAX)
1270 		phz = IFPOLL_FREQ_MAX;
1271 
1272 	netmsg_init(&nmsg, NULL, &curthread->td_msgport,
1273 		    0, sysctl_pollhz_handler);
1274 	nmsg.nm_lmsg.u.ms_result = phz;
1275 
1276 	return ifnet_domsg(&nmsg.nm_lmsg, comm->poll_cpuid);
1277 }
1278 
1279 static void
1280 sysctl_pollhz_handler(struct netmsg *nmsg)
1281 {
1282 	struct poll_comm *comm = poll_common[mycpuid];
1283 
1284 	KKASSERT(&curthread->td_msgport == ifnet_portfn(comm->poll_cpuid));
1285 
1286 	/* Save polling frequency */
1287 	comm->pollhz = nmsg->nm_lmsg.u.ms_result;
1288 
1289 	/*
1290 	 * Adjust cached pollhz
1291 	 */
1292 	rxpoll_context[mycpuid]->pollhz = comm->pollhz;
1293 	txpoll_context[mycpuid]->pollhz =
1294 	    comm->pollhz / (comm->poll_txfrac + 1);
1295 	if (mycpuid == 0)
1296 		stpoll_context.pollhz = comm->pollhz / (comm->poll_stfrac + 1);
1297 
1298 	/*
1299 	 * Adjust polling frequency
1300 	 */
1301 	poll_comm_adjust_pollhz(comm);
1302 
1303 	lwkt_replymsg(&nmsg->nm_lmsg, 0);
1304 }
1305 
1306 static int
1307 sysctl_stfrac(SYSCTL_HANDLER_ARGS)
1308 {
1309 	struct poll_comm *comm = arg1;
1310 	struct netmsg nmsg;
1311 	int error, stfrac;
1312 
1313 	KKASSERT(comm->poll_cpuid == 0);
1314 
1315 	stfrac = comm->poll_stfrac;
1316 	error = sysctl_handle_int(oidp, &stfrac, 0, req);
1317 	if (error || req->newptr == NULL)
1318 		return error;
1319 	if (stfrac < 0)
1320 		return EINVAL;
1321 
1322 	netmsg_init(&nmsg, NULL, &curthread->td_msgport,
1323 		    0, sysctl_stfrac_handler);
1324 	nmsg.nm_lmsg.u.ms_result = stfrac;
1325 
1326 	return ifnet_domsg(&nmsg.nm_lmsg, comm->poll_cpuid);
1327 }
1328 
1329 static void
1330 sysctl_stfrac_handler(struct netmsg *nmsg)
1331 {
1332 	struct poll_comm *comm = poll_common[mycpuid];
1333 	int stfrac = nmsg->nm_lmsg.u.ms_result;
1334 
1335 	KKASSERT(&curthread->td_msgport == ifnet_portfn(comm->poll_cpuid));
1336 
1337 	crit_enter();
1338 	comm->poll_stfrac = stfrac;
1339 	if (comm->stfrac_count > comm->poll_stfrac)
1340 		comm->stfrac_count = comm->poll_stfrac;
1341 	crit_exit();
1342 
1343 	lwkt_replymsg(&nmsg->nm_lmsg, 0);
1344 }
1345 
1346 static int
1347 sysctl_txfrac(SYSCTL_HANDLER_ARGS)
1348 {
1349 	struct poll_comm *comm = arg1;
1350 	struct netmsg nmsg;
1351 	int error, txfrac;
1352 
1353 	txfrac = comm->poll_txfrac;
1354 	error = sysctl_handle_int(oidp, &txfrac, 0, req);
1355 	if (error || req->newptr == NULL)
1356 		return error;
1357 	if (txfrac < 0)
1358 		return EINVAL;
1359 
1360 	netmsg_init(&nmsg, NULL, &curthread->td_msgport,
1361 		    0, sysctl_txfrac_handler);
1362 	nmsg.nm_lmsg.u.ms_result = txfrac;
1363 
1364 	return ifnet_domsg(&nmsg.nm_lmsg, comm->poll_cpuid);
1365 }
1366 
1367 static void
1368 sysctl_txfrac_handler(struct netmsg *nmsg)
1369 {
1370 	struct poll_comm *comm = poll_common[mycpuid];
1371 	int txfrac = nmsg->nm_lmsg.u.ms_result;
1372 
1373 	KKASSERT(&curthread->td_msgport == ifnet_portfn(comm->poll_cpuid));
1374 
1375 	crit_enter();
1376 	comm->poll_txfrac = txfrac;
1377 	if (comm->txfrac_count > comm->poll_txfrac)
1378 		comm->txfrac_count = comm->poll_txfrac;
1379 	crit_exit();
1380 
1381 	lwkt_replymsg(&nmsg->nm_lmsg, 0);
1382 }
1383