xref: /dragonfly/sys/kern/kern_intr.c (revision 2ee85085)
1 /*
2  * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3  * Copyright (c) 1997, Stefan Esser <se@freebsd.org> All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/kern/kern_intr.c,v 1.24.2.1 2001/10/14 20:05:50 luigi Exp $
27  * $DragonFly: src/sys/kern/kern_intr.c,v 1.22 2005/06/16 21:12:19 dillon Exp $
28  *
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/malloc.h>
34 #include <sys/kernel.h>
35 #include <sys/sysctl.h>
36 #include <sys/thread.h>
37 #include <sys/proc.h>
38 #include <sys/thread2.h>
39 #include <sys/random.h>
40 
41 #include <machine/ipl.h>
42 
43 #include <sys/interrupt.h>
44 
45 typedef struct intrec {
46     struct intrec *next;
47     inthand2_t	*handler;
48     void	*argument;
49     const char	*name;
50     int		intr;
51 } intrec_t;
52 
53 static intrec_t	*intlists[NHWI+NSWI];
54 static thread_t ithreads[NHWI+NSWI];
55 static struct thread ithread_ary[NHWI+NSWI];
56 static struct random_softc irandom_ary[NHWI+NSWI];
57 static int irunning[NHWI+NSWI];
58 static u_int ill_count[NHWI+NSWI];	/* interrupt livelock counter */
59 static u_int ill_ticks[NHWI+NSWI];	/* track elapsed to calculate freq */
60 static u_int ill_delta[NHWI+NSWI];	/* track elapsed to calculate freq */
61 static int ill_state[NHWI+NSWI];	/* current state */
62 static struct systimer ill_timer[NHWI+NSWI];	/* enforced freq. timer */
63 static struct systimer ill_rtimer[NHWI+NSWI];	/* recovery timer */
64 
65 #define LIVELOCK_NONE		0
66 #define LIVELOCK_LIMITED	1
67 
68 static int livelock_limit = 50000;
69 static int livelock_fallback = 20000;
70 SYSCTL_INT(_kern, OID_AUTO, livelock_limit,
71         CTLFLAG_RW, &livelock_limit, 0, "Livelock interrupt rate limit");
72 SYSCTL_INT(_kern, OID_AUTO, livelock_fallback,
73         CTLFLAG_RW, &livelock_fallback, 0, "Livelock interrupt fallback rate");
74 
75 static void ithread_handler(void *arg);
76 
77 /*
78  * Register an SWI or INTerrupt handler.
79  */
80 thread_t
81 register_swi(int intr, inthand2_t *handler, void *arg, const char *name)
82 {
83     if (intr < NHWI || intr >= NHWI + NSWI)
84 	panic("register_swi: bad intr %d", intr);
85     return(register_int(intr, handler, arg, name));
86 }
87 
88 thread_t
89 register_int(int intr, inthand2_t *handler, void *arg, const char *name)
90 {
91     intrec_t **list;
92     intrec_t *rec;
93     thread_t td;
94 
95     if (intr < 0 || intr >= NHWI + NSWI)
96 	panic("register_int: bad intr %d", intr);
97 
98     rec = malloc(sizeof(intrec_t), M_DEVBUF, M_NOWAIT);
99     if (rec == NULL)
100 	panic("register_swi: malloc failed");
101     rec->handler = handler;
102     rec->argument = arg;
103     rec->name = name;
104     rec->intr = intr;
105     rec->next = NULL;
106 
107     list = &intlists[intr];
108 
109     /*
110      * Create an interrupt thread if necessary, leave it in an unscheduled
111      * state.
112      */
113     if ((td = ithreads[intr]) == NULL) {
114 	lwkt_create((void *)ithread_handler, (void *)intr, &ithreads[intr],
115 	    &ithread_ary[intr], TDF_STOPREQ|TDF_INTTHREAD, -1,
116 	    "ithread %d", intr);
117 	td = ithreads[intr];
118 	if (intr >= NHWI && intr < NHWI + NSWI)
119 	    lwkt_setpri(td, TDPRI_SOFT_NORM);
120 	else
121 	    lwkt_setpri(td, TDPRI_INT_MED);
122     }
123 
124     /*
125      * Add the record to the interrupt list
126      */
127     crit_enter();	/* token */
128     while (*list != NULL)
129 	list = &(*list)->next;
130     *list = rec;
131     crit_exit();
132     return(td);
133 }
134 
135 void
136 unregister_swi(int intr, inthand2_t *handler)
137 {
138     if (intr < NHWI || intr >= NHWI + NSWI)
139 	panic("register_swi: bad intr %d", intr);
140     unregister_int(intr, handler);
141 }
142 
143 void
144 unregister_int(int intr, inthand2_t handler)
145 {
146     intrec_t **list;
147     intrec_t *rec;
148 
149     if (intr < 0 || intr > NHWI + NSWI)
150 	panic("register_int: bad intr %d", intr);
151     list = &intlists[intr];
152     crit_enter();
153     while ((rec = *list) != NULL) {
154 	if (rec->handler == (void *)handler) {
155 	    *list = rec->next;
156 	    break;
157 	}
158 	list = &rec->next;
159     }
160     crit_exit();
161     if (rec != NULL) {
162 	free(rec, M_DEVBUF);
163     } else {
164 	printf("warning: unregister_int: int %d handler %p not found\n",
165 	    intr, handler);
166     }
167 }
168 
169 void
170 swi_setpriority(int intr, int pri)
171 {
172     struct thread *td;
173 
174     if (intr < NHWI || intr >= NHWI + NSWI)
175 	panic("register_swi: bad intr %d", intr);
176     if ((td = ithreads[intr]) != NULL)
177 	lwkt_setpri(td, pri);
178 }
179 
180 void
181 register_randintr(int intr)
182 {
183     struct random_softc *sc = &irandom_ary[intr];
184     sc->sc_intr = intr;
185     sc->sc_enabled = 1;
186 }
187 
188 void
189 unregister_randintr(int intr)
190 {
191     struct random_softc *sc = &irandom_ary[intr];
192     sc->sc_enabled = 0;
193 }
194 
195 /*
196  * Dispatch an interrupt.  If there's nothing to do we have a stray
197  * interrupt and can just return, leaving the interrupt masked.
198  *
199  * We need to schedule the interrupt and set its irunning[] bit.  If
200  * we are not on the interrupt thread's cpu we have to send a message
201  * to the correct cpu that will issue the desired action (interlocking
202  * with the interrupt thread's critical section).
203  *
204  * We are NOT in a critical section, which will allow the scheduled
205  * interrupt to preempt us.  The MP lock might *NOT* be held here.
206  */
207 static void
208 sched_ithd_remote(void *arg)
209 {
210     sched_ithd((int)arg);
211 }
212 
213 void
214 sched_ithd(int intr)
215 {
216     thread_t td;
217 
218     if ((td = ithreads[intr]) != NULL) {
219 	if (intlists[intr] == NULL) {
220 	    printf("sched_ithd: stray interrupt %d\n", intr);
221 	} else {
222 	    if (td->td_gd == mycpu) {
223 		irunning[intr] = 1;
224 		lwkt_schedule(td);	/* preemption handled internally */
225 	    } else {
226 		lwkt_send_ipiq(td->td_gd, sched_ithd_remote, (void *)intr);
227 	    }
228 	}
229     } else {
230 	printf("sched_ithd: stray interrupt %d\n", intr);
231     }
232 }
233 
234 /*
235  * This is run from a periodic SYSTIMER (and thus must be MP safe, the BGL
236  * might not be held).
237  */
238 static void
239 ithread_livelock_wakeup(systimer_t info)
240 {
241     int intr = (int)info->data;
242     thread_t td;
243 
244     if ((td = ithreads[intr]) != NULL)
245 	lwkt_schedule(td);
246 }
247 
248 
249 /*
250  * Interrupt threads run this as their main loop.
251  *
252  * The handler begins execution outside a critical section and with the BGL
253  * held.
254  *
255  * The irunning state starts at 0.  When an interrupt occurs, the hardware
256  * interrupt is disabled and sched_ithd() The HW interrupt remains disabled
257  * until all routines have run.  We then call ithread_done() to reenable
258  * the HW interrupt and deschedule us until the next interrupt.
259  *
260  * We are responsible for atomically checking irunning[] and ithread_done()
261  * is responsible for atomically checking for platform-specific delayed
262  * interrupts.  irunning[] for our irq is only set in the context of our cpu,
263  * so a critical section is a sufficient interlock.
264  */
265 #define LIVELOCK_TIMEFRAME(freq)	((freq) >> 2)	/* 1/4 second */
266 
267 static void
268 ithread_handler(void *arg)
269 {
270     int intr = (int)arg;
271     int freq;
272     u_int bticks;
273     u_int cputicks;
274     intrec_t **list = &intlists[intr];
275     intrec_t *rec;
276     intrec_t *nrec;
277     struct random_softc *sc = &irandom_ary[intr];
278     globaldata_t gd = mycpu;
279 
280     /*
281      * The loop must be entered with one critical section held.
282      */
283     crit_enter_gd(gd);
284 
285     for (;;) {
286 	/*
287 	 * We can get woken up by the livelock periodic code too, run the
288 	 * handlers only if there is a real interrupt pending.  XXX
289 	 *
290 	 * Clear irunning[] prior to running the handlers to interlock
291 	 * again new events occuring during processing of existing events.
292 	 *
293 	 * For now run each handler in a critical section.
294 	 */
295 	irunning[intr] = 0;
296 	for (rec = *list; rec; rec = nrec) {
297 	    nrec = rec->next;
298 	    rec->handler(rec->argument);
299 	}
300 
301 	/*
302 	 * Do a quick exit/enter to catch any higher-priority
303 	 * interrupt sources and so user/system/interrupt statistics
304 	 * work for interrupt threads.
305 	 */
306 	crit_exit_gd(gd);
307 	crit_enter_gd(gd);
308 
309 	/*
310 	 * This is our interrupt hook to add rate randomness to the random
311 	 * number generator.
312 	 */
313 	if (sc->sc_enabled)
314 	    add_interrupt_randomness(intr);
315 
316 	/*
317 	 * This is our livelock test.  If we hit the rate limit we
318 	 * limit ourselves to X interrupts/sec until the rate
319 	 * falls below 50% of that value, then we unlimit again.
320 	 *
321 	 * XXX calling cputimer_count() is expensive but a livelock may
322 	 * prevent other interrupts from occuring so we cannot use ticks.
323 	 */
324 	cputicks = sys_cputimer->count();
325 	++ill_count[intr];
326 	bticks = cputicks - ill_ticks[intr];
327 	ill_ticks[intr] = cputicks;
328 	if (bticks > sys_cputimer->freq)
329 	    bticks = sys_cputimer->freq;
330 
331 	switch(ill_state[intr]) {
332 	case LIVELOCK_NONE:
333 	    ill_delta[intr] += bticks;
334 	    if (ill_delta[intr] < LIVELOCK_TIMEFRAME(sys_cputimer->freq))
335 		break;
336 	    freq = (int64_t)ill_count[intr] * sys_cputimer->freq /
337 		   ill_delta[intr];
338 	    ill_delta[intr] = 0;
339 	    ill_count[intr] = 0;
340 	    if (freq < livelock_limit)
341 		break;
342 	    printf("intr %d at %d hz, livelocked! limiting at %d hz\n",
343 		intr, freq, livelock_fallback);
344 	    ill_state[intr] = LIVELOCK_LIMITED;
345 	    bticks = 0;
346 	    /* force periodic check to avoid stale removal (if ints stop) */
347 	    systimer_init_periodic(&ill_rtimer[intr], ithread_livelock_wakeup,
348 				(void *)intr, 1);
349 	    /* fall through */
350 	case LIVELOCK_LIMITED:
351 	    /*
352 	     * Delay (us) before rearming the interrupt
353 	     */
354 	    systimer_init_oneshot(&ill_timer[intr], ithread_livelock_wakeup,
355 				(void *)intr, 1 + 1000000 / livelock_fallback);
356 	    lwkt_deschedule_self(curthread);
357 	    lwkt_switch();
358 
359 	    /* in case we were woken up by something else */
360 	    systimer_del(&ill_timer[intr]);
361 
362 	    /*
363 	     * Calculate interrupt rate (note that due to our delay it
364 	     * will not exceed livelock_fallback).
365 	     */
366 	    ill_delta[intr] += bticks;
367 	    if (ill_delta[intr] < LIVELOCK_TIMEFRAME(sys_cputimer->freq))
368 		break;
369 	    freq = (int64_t)ill_count[intr] * sys_cputimer->freq /
370 		   ill_delta[intr];
371 	    ill_delta[intr] = 0;
372 	    ill_count[intr] = 0;
373 	    if (freq < (livelock_fallback >> 1)) {
374 		printf("intr %d at %d hz, removing livelock limit\n",
375 			intr, freq);
376 		ill_state[intr] = LIVELOCK_NONE;
377 		systimer_del(&ill_rtimer[intr]);
378 	    }
379 	    break;
380 	}
381 
382 	/*
383 	 * There are two races here.  irunning[] is set by sched_ithd()
384 	 * in the context of our cpu and is critical-section safe.  We
385 	 * are responsible for checking it.  ipending is not critical
386 	 * section safe and must be handled by the platform specific
387 	 * ithread_done() routine.
388 	 */
389 	if (irunning[intr] == 0)
390 	    ithread_done(intr);
391 	/* must be in critical section on loop */
392     }
393     /* not reached */
394 }
395 
396 /*
397  * Sysctls used by systat and others: hw.intrnames and hw.intrcnt.
398  * The data for this machine dependent, and the declarations are in machine
399  * dependent code.  The layout of intrnames and intrcnt however is machine
400  * independent.
401  *
402  * We do not know the length of intrcnt and intrnames at compile time, so
403  * calculate things at run time.
404  */
405 static int
406 sysctl_intrnames(SYSCTL_HANDLER_ARGS)
407 {
408 	return (sysctl_handle_opaque(oidp, intrnames, eintrnames - intrnames,
409 	    req));
410 }
411 
412 SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD,
413 	NULL, 0, sysctl_intrnames, "", "Interrupt Names");
414 
415 static int
416 sysctl_intrcnt(SYSCTL_HANDLER_ARGS)
417 {
418 	return (sysctl_handle_opaque(oidp, intrcnt,
419 	    (char *)eintrcnt - (char *)intrcnt, req));
420 }
421 
422 SYSCTL_PROC(_hw, OID_AUTO, intrcnt, CTLTYPE_OPAQUE | CTLFLAG_RD,
423 	NULL, 0, sysctl_intrcnt, "", "Interrupt Counts");
424