1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * Copyright (c) 1997, Stefan Esser <se@freebsd.org> All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/kern/kern_intr.c,v 1.24.2.1 2001/10/14 20:05:50 luigi Exp $ 27 * $DragonFly: src/sys/kern/kern_intr.c,v 1.19 2005/02/01 22:41:26 dillon Exp $ 28 * 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/malloc.h> 34 #include <sys/kernel.h> 35 #include <sys/sysctl.h> 36 #include <sys/thread.h> 37 #include <sys/proc.h> 38 #include <sys/thread2.h> 39 #include <sys/random.h> 40 41 #include <machine/ipl.h> 42 43 #include <sys/interrupt.h> 44 45 typedef struct intrec { 46 struct intrec *next; 47 inthand2_t *handler; 48 intrmask_t *maskptr; /* LEGACY */ 49 void *argument; 50 const char *name; 51 int intr; 52 } intrec_t; 53 54 static intrec_t *intlists[NHWI+NSWI]; 55 static thread_t ithreads[NHWI+NSWI]; 56 static struct thread ithread_ary[NHWI+NSWI]; 57 static struct random_softc irandom_ary[NHWI+NSWI]; 58 static int irunning[NHWI+NSWI]; 59 static u_int ill_count[NHWI+NSWI]; /* interrupt livelock counter */ 60 static u_int ill_ticks[NHWI+NSWI]; /* track elapsed to calculate freq */ 61 static u_int ill_delta[NHWI+NSWI]; /* track elapsed to calculate freq */ 62 static int ill_state[NHWI+NSWI]; /* current state */ 63 static struct systimer ill_timer[NHWI+NSWI]; /* enforced freq. timer */ 64 static struct systimer ill_rtimer[NHWI+NSWI]; /* recovery timer */ 65 static intrmask_t dummy_intr_mask; 66 67 #define LIVELOCK_NONE 0 68 #define LIVELOCK_LIMITED 1 69 70 static int livelock_limit = 50000; 71 static int livelock_fallback = 20000; 72 SYSCTL_INT(_kern, OID_AUTO, livelock_limit, 73 CTLFLAG_RW, &livelock_limit, 0, "Livelock interrupt rate limit"); 74 SYSCTL_INT(_kern, OID_AUTO, livelock_fallback, 75 CTLFLAG_RW, &livelock_fallback, 0, "Livelock interrupt fallback rate"); 76 77 /* 78 * TEMPORARY sysctl to allow interrupt handlers to run without the critical 79 * section (if set to 0). 80 * 81 * SEQUENCE OF EVENTS: default to prior operation, testing, change default 82 * to 0, lots more testing, then make operation without a critical section 83 * mandatory and remove the sysctl code and variable. 84 */ 85 static int int_use_crit_section = 1; 86 SYSCTL_INT(_kern, OID_AUTO, int_use_crit_section, 87 CTLFLAG_RW, &int_use_crit_section, 0, "Run interrupts entirely within a critical section"); 88 89 static void ithread_handler(void *arg); 90 91 /* 92 * Register an SWI or INTerrupt handler. 93 * 94 * Note that maskptr exists to support legacy spl handling and is not intended 95 * to be permanent (because spls are not compatible with BGL removal). 96 */ 97 thread_t 98 register_swi(int intr, inthand2_t *handler, void *arg, const char *name, 99 intrmask_t *maskptr) 100 { 101 if (intr < NHWI || intr >= NHWI + NSWI) 102 panic("register_swi: bad intr %d", intr); 103 return(register_int(intr, handler, arg, name, maskptr)); 104 } 105 106 thread_t 107 register_int(int intr, inthand2_t *handler, void *arg, const char *name, 108 intrmask_t *maskptr) 109 { 110 intrec_t **list; 111 intrec_t *rec; 112 thread_t td; 113 114 if (intr < 0 || intr >= NHWI + NSWI) 115 panic("register_int: bad intr %d", intr); 116 if (maskptr == NULL) 117 maskptr = &dummy_intr_mask; 118 119 rec = malloc(sizeof(intrec_t), M_DEVBUF, M_NOWAIT); 120 if (rec == NULL) 121 panic("register_swi: malloc failed"); 122 rec->handler = handler; 123 rec->maskptr = maskptr; 124 rec->argument = arg; 125 rec->name = name; 126 rec->intr = intr; 127 rec->next = NULL; 128 129 list = &intlists[intr]; 130 131 /* 132 * Create an interrupt thread if necessary, leave it in an unscheduled 133 * state. 134 */ 135 if ((td = ithreads[intr]) == NULL) { 136 lwkt_create((void *)ithread_handler, (void *)intr, &ithreads[intr], 137 &ithread_ary[intr], TDF_STOPREQ|TDF_INTTHREAD, -1, 138 "ithread %d", intr); 139 td = ithreads[intr]; 140 if (intr >= NHWI && intr < NHWI + NSWI) 141 lwkt_setpri(td, TDPRI_SOFT_NORM); 142 else 143 lwkt_setpri(td, TDPRI_INT_MED); 144 } 145 146 /* 147 * Add the record to the interrupt list 148 */ 149 crit_enter(); /* token */ 150 while (*list != NULL) 151 list = &(*list)->next; 152 *list = rec; 153 crit_exit(); 154 return(td); 155 } 156 157 void 158 unregister_swi(int intr, inthand2_t *handler) 159 { 160 if (intr < NHWI || intr >= NHWI + NSWI) 161 panic("register_swi: bad intr %d", intr); 162 unregister_int(intr, handler); 163 } 164 165 void 166 unregister_int(int intr, inthand2_t handler) 167 { 168 intrec_t **list; 169 intrec_t *rec; 170 171 if (intr < 0 || intr > NHWI + NSWI) 172 panic("register_int: bad intr %d", intr); 173 list = &intlists[intr]; 174 crit_enter(); 175 while ((rec = *list) != NULL) { 176 if (rec->handler == (void *)handler) { 177 *list = rec->next; 178 break; 179 } 180 list = &rec->next; 181 } 182 crit_exit(); 183 if (rec != NULL) { 184 free(rec, M_DEVBUF); 185 } else { 186 printf("warning: unregister_int: int %d handler %p not found\n", 187 intr, handler); 188 } 189 } 190 191 void 192 swi_setpriority(int intr, int pri) 193 { 194 struct thread *td; 195 196 if (intr < NHWI || intr >= NHWI + NSWI) 197 panic("register_swi: bad intr %d", intr); 198 if ((td = ithreads[intr]) != NULL) 199 lwkt_setpri(td, pri); 200 } 201 202 void 203 register_randintr(int intr) 204 { 205 struct random_softc *sc = &irandom_ary[intr]; 206 sc->sc_intr = intr; 207 sc->sc_enabled = 1; 208 } 209 210 void 211 unregister_randintr(int intr) 212 { 213 struct random_softc *sc = &irandom_ary[intr]; 214 sc->sc_enabled = 0; 215 } 216 217 /* 218 * Dispatch an interrupt. If there's nothing to do we have a stray 219 * interrupt and can just return, leaving the interrupt masked. 220 * 221 * We need to schedule the interrupt and set its irunning[] bit. If 222 * we are not on the interrupt thread's cpu we have to send a message 223 * to the correct cpu that will issue the desired action (interlocking 224 * with the interrupt thread's critical section). 225 * 226 * We are NOT in a critical section, which will allow the scheduled 227 * interrupt to preempt us. The MP lock might *NOT* be held here. 228 */ 229 static void 230 sched_ithd_remote(void *arg) 231 { 232 sched_ithd((int)arg); 233 } 234 235 void 236 sched_ithd(int intr) 237 { 238 thread_t td; 239 240 if ((td = ithreads[intr]) != NULL) { 241 if (intlists[intr] == NULL) { 242 printf("sched_ithd: stray interrupt %d\n", intr); 243 } else { 244 if (td->td_gd == mycpu) { 245 irunning[intr] = 1; 246 lwkt_schedule(td); /* preemption handled internally */ 247 } else { 248 lwkt_send_ipiq(td->td_gd, sched_ithd_remote, (void *)intr); 249 } 250 } 251 } else { 252 printf("sched_ithd: stray interrupt %d\n", intr); 253 } 254 } 255 256 /* 257 * This is run from a periodic SYSTIMER (and thus must be MP safe, the BGL 258 * might not be held). 259 */ 260 static void 261 ithread_livelock_wakeup(systimer_t info) 262 { 263 int intr = (int)info->data; 264 thread_t td; 265 266 if ((td = ithreads[intr]) != NULL) 267 lwkt_schedule(td); 268 } 269 270 271 /* 272 * Interrupt threads run this as their main loop. 273 * 274 * The handler begins execution outside a critical section and with the BGL 275 * held. 276 * 277 * The irunning state starts at 0. When an interrupt occurs, the hardware 278 * interrupt is disabled and sched_ithd() The HW interrupt remains disabled 279 * until all routines have run. We then call ithread_done() to reenable 280 * the HW interrupt and deschedule us until the next interrupt. 281 * 282 * We are responsible for atomically checking irunning[] and ithread_done() 283 * is responsible for atomically checking for platform-specific delayed 284 * interrupts. irunning[] for our irq is only set in the context of our cpu, 285 * so a critical section is a sufficient interlock. 286 */ 287 #define LIVELOCK_TIMEFRAME(freq) ((freq) >> 2) /* 1/4 second */ 288 289 static void 290 ithread_handler(void *arg) 291 { 292 int intr = (int)arg; 293 int freq; 294 u_int bticks; 295 u_int cputicks; 296 intrec_t **list = &intlists[intr]; 297 intrec_t *rec; 298 intrec_t *nrec; 299 struct random_softc *sc = &irandom_ary[intr]; 300 globaldata_t gd = mycpu; 301 int in_crit_section; /* REMOVE WHEN TESTING COMPLETE */ 302 intrmask_t s; 303 304 /* 305 * The loop must be entered with one critical section held. 306 */ 307 crit_enter_gd(gd); 308 309 for (;;) { 310 /* 311 * Deal with the sysctl variable allowing the interrupt thread to run 312 * without a critical section. Once this is proven out it will 313 * become the default. Note that a critical section is always 314 * held as of the top of the loop. 315 */ 316 in_crit_section = int_use_crit_section; 317 if (in_crit_section == 0) 318 crit_exit_gd(gd); 319 320 /* 321 * We can get woken up by the livelock periodic code too, run the 322 * handlers only if there is a real interrupt pending. XXX 323 * 324 * Clear irunning[] prior to running the handlers to interlock 325 * again new events occuring during processing of existing events. 326 */ 327 irunning[intr] = 0; 328 for (rec = *list; rec; rec = nrec) { 329 nrec = rec->next; 330 s = splq(*rec->maskptr); 331 rec->handler(rec->argument); 332 splx(s); 333 } 334 335 /* 336 * This is our interrupt hook to add rate randomness to the random 337 * number generator. 338 */ 339 if (sc->sc_enabled) 340 add_interrupt_randomness(intr); 341 342 /* 343 * This is our livelock test. If we hit the rate limit we 344 * limit ourselves to X interrupts/sec until the rate 345 * falls below 50% of that value, then we unlimit again. 346 * 347 * XXX calling cputimer_count() is expensive but a livelock may 348 * prevent other interrupts from occuring so we cannot use ticks. 349 */ 350 cputicks = cputimer_count(); 351 ++ill_count[intr]; 352 bticks = cputicks - ill_ticks[intr]; 353 ill_ticks[intr] = cputicks; 354 if (bticks > cputimer_freq) 355 bticks = cputimer_freq; 356 357 switch(ill_state[intr]) { 358 case LIVELOCK_NONE: 359 ill_delta[intr] += bticks; 360 if (ill_delta[intr] < LIVELOCK_TIMEFRAME(cputimer_freq)) 361 break; 362 freq = (int64_t)ill_count[intr] * cputimer_freq / ill_delta[intr]; 363 ill_delta[intr] = 0; 364 ill_count[intr] = 0; 365 if (freq < livelock_limit) 366 break; 367 printf("intr %d at %d hz, livelocked! limiting at %d hz\n", 368 intr, freq, livelock_fallback); 369 ill_state[intr] = LIVELOCK_LIMITED; 370 bticks = 0; 371 /* force periodic check to avoid stale removal (if ints stop) */ 372 systimer_init_periodic(&ill_rtimer[intr], ithread_livelock_wakeup, 373 (void *)intr, 1); 374 /* fall through */ 375 case LIVELOCK_LIMITED: 376 /* 377 * Delay (us) before rearming the interrupt 378 */ 379 systimer_init_oneshot(&ill_timer[intr], ithread_livelock_wakeup, 380 (void *)intr, 1 + 1000000 / livelock_fallback); 381 lwkt_deschedule_self(curthread); 382 lwkt_switch(); 383 384 /* in case we were woken up by something else */ 385 systimer_del(&ill_timer[intr]); 386 387 /* 388 * Calculate interrupt rate (note that due to our delay it 389 * will not exceed livelock_fallback). 390 */ 391 ill_delta[intr] += bticks; 392 if (ill_delta[intr] < LIVELOCK_TIMEFRAME(cputimer_freq)) 393 break; 394 freq = (int64_t)ill_count[intr] * cputimer_freq / ill_delta[intr]; 395 ill_delta[intr] = 0; 396 ill_count[intr] = 0; 397 if (freq < (livelock_fallback >> 1)) { 398 printf("intr %d at %d hz, removing livelock limit\n", 399 intr, freq); 400 ill_state[intr] = LIVELOCK_NONE; 401 systimer_del(&ill_rtimer[intr]); 402 } 403 break; 404 } 405 406 /* 407 * There are two races here. irunning[] is set by sched_ithd() 408 * in the context of our cpu and is critical-section safe. We 409 * are responsible for checking it. ipending is not critical 410 * section safe and must be handled by the platform specific 411 * ithread_done() routine. 412 */ 413 if (in_crit_section) { 414 if (irunning[intr] == 0) 415 ithread_done(intr); 416 } else { 417 crit_enter_gd(gd); 418 if (irunning[intr] == 0) 419 ithread_done(intr); 420 } 421 /* must be in critical section on loop */ 422 } 423 } 424 425 /* 426 * Sysctls used by systat and others: hw.intrnames and hw.intrcnt. 427 * The data for this machine dependent, and the declarations are in machine 428 * dependent code. The layout of intrnames and intrcnt however is machine 429 * independent. 430 * 431 * We do not know the length of intrcnt and intrnames at compile time, so 432 * calculate things at run time. 433 */ 434 static int 435 sysctl_intrnames(SYSCTL_HANDLER_ARGS) 436 { 437 return (sysctl_handle_opaque(oidp, intrnames, eintrnames - intrnames, 438 req)); 439 } 440 441 SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD, 442 NULL, 0, sysctl_intrnames, "", "Interrupt Names"); 443 444 static int 445 sysctl_intrcnt(SYSCTL_HANDLER_ARGS) 446 { 447 return (sysctl_handle_opaque(oidp, intrcnt, 448 (char *)eintrcnt - (char *)intrcnt, req)); 449 } 450 451 SYSCTL_PROC(_hw, OID_AUTO, intrcnt, CTLTYPE_OPAQUE | CTLFLAG_RD, 452 NULL, 0, sysctl_intrcnt, "", "Interrupt Counts"); 453