1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * Copyright (c) 1997, Stefan Esser <se@freebsd.org> All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/kern/kern_intr.c,v 1.24.2.1 2001/10/14 20:05:50 luigi Exp $ 27 * 28 */ 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/malloc.h> 33 #include <sys/kernel.h> 34 #include <sys/sysctl.h> 35 #include <sys/thread.h> 36 #include <sys/proc.h> 37 #include <sys/random.h> 38 #include <sys/serialize.h> 39 #include <sys/interrupt.h> 40 #include <sys/bus.h> 41 #include <sys/machintr.h> 42 43 #include <machine/frame.h> 44 45 #include <sys/thread2.h> 46 #include <sys/mplock2.h> 47 48 struct intr_info; 49 50 typedef struct intrec { 51 struct intrec *next; 52 struct intr_info *info; 53 inthand2_t *handler; 54 void *argument; 55 char *name; 56 int intr; 57 int intr_flags; 58 struct lwkt_serialize *serializer; 59 } *intrec_t; 60 61 struct intr_info { 62 intrec_t i_reclist; 63 struct thread *i_thread; /* don't embed struct thread */ 64 struct random_softc i_random; 65 long i_count; /* interrupts dispatched */ 66 int i_running; 67 short i_mplock_required; 68 short i_flags; 69 int i_fast; 70 int i_slow; 71 int i_state; 72 int i_errorticks; 73 unsigned long i_straycount; 74 int i_cpuid; 75 int i_intr; 76 }; 77 78 struct intr_info_block { 79 struct intr_info ary[MAXCPU][MAX_INTS]; 80 }; 81 82 static struct intr_info_block *intr_block; 83 static struct intr_info *swi_info_ary[MAX_SOFTINTS]; 84 85 static int max_installed_hard_intr[MAXCPU]; 86 87 MALLOC_DEFINE(M_INTRMNG, "intrmng", "interrupt management"); 88 89 90 #define EMERGENCY_INTR_POLLING_FREQ_MAX 20000 91 92 /* 93 * Assert that callers into interrupt handlers don't return with 94 * dangling tokens, spinlocks, or mp locks. 95 */ 96 #ifdef INVARIANTS 97 98 #define TD_INVARIANTS_DECLARE \ 99 int spincount; \ 100 lwkt_tokref_t curstop 101 102 #define TD_INVARIANTS_GET(td) \ 103 do { \ 104 spincount = (td)->td_gd->gd_spinlocks; \ 105 curstop = (td)->td_toks_stop; \ 106 } while(0) 107 108 #define TD_INVARIANTS_TEST(td, name) \ 109 do { \ 110 KASSERT(spincount == (td)->td_gd->gd_spinlocks, \ 111 ("spincount mismatch after interrupt handler %s", \ 112 name)); \ 113 KASSERT(curstop == (td)->td_toks_stop, \ 114 ("token count mismatch after interrupt handler %s", \ 115 name)); \ 116 } while(0) 117 118 #else 119 120 /* !INVARIANTS */ 121 122 #define TD_INVARIANTS_DECLARE 123 #define TD_INVARIANTS_GET(td) 124 #define TD_INVARIANTS_TEST(td, name) 125 126 #endif /* ndef INVARIANTS */ 127 128 static int sysctl_emergency_freq(SYSCTL_HANDLER_ARGS); 129 static int sysctl_emergency_enable(SYSCTL_HANDLER_ARGS); 130 static void emergency_intr_timer_callback(systimer_t, int, struct intrframe *); 131 static void ithread_handler(void *arg); 132 static void ithread_emergency(void *arg); 133 static void report_stray_interrupt(struct intr_info *info, const char *func); 134 static void int_moveto_destcpu(int *, int); 135 static void int_moveto_origcpu(int, int); 136 static void sched_ithd_intern(struct intr_info *info); 137 138 static struct systimer emergency_intr_timer[MAXCPU]; 139 static struct thread *emergency_intr_thread[MAXCPU]; 140 141 #define ISTATE_NOTHREAD 0 142 #define ISTATE_NORMAL 1 143 #define ISTATE_LIVELOCKED 2 144 145 static int livelock_limit = 40000; 146 static int livelock_limit_hi = 120000; 147 static int livelock_lowater = 20000; 148 static int livelock_debug = -1; 149 SYSCTL_INT(_kern, OID_AUTO, livelock_limit, 150 CTLFLAG_RW, &livelock_limit, 0, "Livelock interrupt rate limit"); 151 SYSCTL_INT(_kern, OID_AUTO, livelock_limit_hi, 152 CTLFLAG_RW, &livelock_limit_hi, 0, 153 "Livelock interrupt rate limit (high frequency)"); 154 SYSCTL_INT(_kern, OID_AUTO, livelock_lowater, 155 CTLFLAG_RW, &livelock_lowater, 0, "Livelock low-water mark restore"); 156 SYSCTL_INT(_kern, OID_AUTO, livelock_debug, 157 CTLFLAG_RW, &livelock_debug, 0, "Livelock debug intr#"); 158 159 static int emergency_intr_enable = 0; /* emergency interrupt polling */ 160 TUNABLE_INT("kern.emergency_intr_enable", &emergency_intr_enable); 161 SYSCTL_PROC(_kern, OID_AUTO, emergency_intr_enable, CTLTYPE_INT | CTLFLAG_RW, 162 0, 0, sysctl_emergency_enable, "I", "Emergency Interrupt Poll Enable"); 163 164 static int emergency_intr_freq = 10; /* emergency polling frequency */ 165 TUNABLE_INT("kern.emergency_intr_freq", &emergency_intr_freq); 166 SYSCTL_PROC(_kern, OID_AUTO, emergency_intr_freq, CTLTYPE_INT | CTLFLAG_RW, 167 0, 0, sysctl_emergency_freq, "I", "Emergency Interrupt Poll Frequency"); 168 169 /* 170 * Sysctl support routines 171 */ 172 static int 173 sysctl_emergency_enable(SYSCTL_HANDLER_ARGS) 174 { 175 int error, enabled, cpuid, freq; 176 177 enabled = emergency_intr_enable; 178 error = sysctl_handle_int(oidp, &enabled, 0, req); 179 if (error || req->newptr == NULL) 180 return error; 181 emergency_intr_enable = enabled; 182 if (emergency_intr_enable) 183 freq = emergency_intr_freq; 184 else 185 freq = 1; 186 187 for (cpuid = 0; cpuid < ncpus; ++cpuid) 188 systimer_adjust_periodic(&emergency_intr_timer[cpuid], freq); 189 return 0; 190 } 191 192 static int 193 sysctl_emergency_freq(SYSCTL_HANDLER_ARGS) 194 { 195 int error, phz, cpuid, freq; 196 197 phz = emergency_intr_freq; 198 error = sysctl_handle_int(oidp, &phz, 0, req); 199 if (error || req->newptr == NULL) 200 return error; 201 if (phz <= 0) 202 return EINVAL; 203 else if (phz > EMERGENCY_INTR_POLLING_FREQ_MAX) 204 phz = EMERGENCY_INTR_POLLING_FREQ_MAX; 205 206 emergency_intr_freq = phz; 207 if (emergency_intr_enable) 208 freq = emergency_intr_freq; 209 else 210 freq = 1; 211 212 for (cpuid = 0; cpuid < ncpus; ++cpuid) 213 systimer_adjust_periodic(&emergency_intr_timer[cpuid], freq); 214 return 0; 215 } 216 217 /* 218 * Register an SWI or INTerrupt handler. 219 */ 220 void * 221 register_swi(int intr, inthand2_t *handler, void *arg, const char *name, 222 struct lwkt_serialize *serializer, int cpuid) 223 { 224 if (intr < FIRST_SOFTINT || intr >= MAX_INTS) 225 panic("register_swi: bad intr %d", intr); 226 227 if (cpuid < 0) 228 cpuid = intr % ncpus; 229 return(register_int(intr, handler, arg, name, serializer, 0, cpuid)); 230 } 231 232 void * 233 register_swi_mp(int intr, inthand2_t *handler, void *arg, const char *name, 234 struct lwkt_serialize *serializer, int cpuid) 235 { 236 if (intr < FIRST_SOFTINT || intr >= MAX_INTS) 237 panic("register_swi: bad intr %d", intr); 238 239 if (cpuid < 0) 240 cpuid = intr % ncpus; 241 return(register_int(intr, handler, arg, name, serializer, 242 INTR_MPSAFE, cpuid)); 243 } 244 245 void * 246 register_int(int intr, inthand2_t *handler, void *arg, const char *name, 247 struct lwkt_serialize *serializer, int intr_flags, int cpuid) 248 { 249 struct intr_info *info; 250 struct intrec **list; 251 intrec_t rec = NULL; 252 int orig_cpuid; 253 254 KKASSERT(cpuid >= 0 && cpuid < ncpus); 255 256 if (intr < 0 || intr >= MAX_INTS) 257 panic("register_int: bad intr %d", intr); 258 if (name == NULL) 259 name = "???"; 260 info = &intr_block->ary[cpuid][intr]; 261 262 int_moveto_destcpu(&orig_cpuid, cpuid); 263 264 /* 265 * This intr has been registered as exclusive one, so 266 * it can't shared. 267 */ 268 if (info->i_flags & INTR_EXCL) 269 goto done; 270 271 /* 272 * This intr has been registered as shared one, so it 273 * can't be used for exclusive handler. 274 */ 275 list = &info->i_reclist; 276 if ((intr_flags & INTR_EXCL) && *list != NULL) 277 goto done; 278 279 /* 280 * Construct an interrupt handler record 281 */ 282 rec = kmalloc(sizeof(struct intrec), M_DEVBUF, M_INTWAIT); 283 rec->name = kmalloc(strlen(name) + 1, M_DEVBUF, M_INTWAIT); 284 strcpy(rec->name, name); 285 286 rec->info = info; 287 rec->handler = handler; 288 rec->argument = arg; 289 rec->intr = intr; 290 rec->intr_flags = intr_flags; 291 rec->next = NULL; 292 rec->serializer = serializer; 293 294 /* 295 * Create an emergency polling thread and set up a systimer to wake 296 * it up. objcache isn't operational yet so use kmalloc. 297 * 298 * objcache may not be operational yet, use kmalloc(). 299 */ 300 if (emergency_intr_thread[cpuid] == NULL) { 301 emergency_intr_thread[cpuid] = kmalloc(sizeof(struct thread), M_DEVBUF, 302 M_INTWAIT | M_ZERO); 303 lwkt_create(ithread_emergency, NULL, NULL, 304 emergency_intr_thread[cpuid], 305 TDF_NOSTART | TDF_INTTHREAD, cpuid, "ithreadE %d", 306 cpuid); 307 systimer_init_periodic_nq(&emergency_intr_timer[cpuid], 308 emergency_intr_timer_callback, 309 emergency_intr_thread[cpuid], 310 (emergency_intr_enable ? emergency_intr_freq : 1)); 311 } 312 313 /* 314 * Create an interrupt thread if necessary, leave it in an unscheduled 315 * state. 316 */ 317 if (info->i_state == ISTATE_NOTHREAD) { 318 info->i_state = ISTATE_NORMAL; 319 info->i_thread = kmalloc(sizeof(struct thread), M_DEVBUF, 320 M_INTWAIT | M_ZERO); 321 lwkt_create(ithread_handler, (void *)(intptr_t)intr, NULL, 322 info->i_thread, TDF_NOSTART | TDF_INTTHREAD, cpuid, 323 "ithread%d %d", intr, cpuid); 324 if (intr >= FIRST_SOFTINT) 325 lwkt_setpri(info->i_thread, TDPRI_SOFT_NORM); 326 else 327 lwkt_setpri(info->i_thread, TDPRI_INT_MED); 328 info->i_thread->td_preemptable = lwkt_preempt; 329 } 330 331 /* 332 * Keep track of how many fast and slow interrupts we have. 333 * Set i_mplock_required if any handler in the chain requires 334 * the MP lock to operate. 335 */ 336 if ((intr_flags & INTR_MPSAFE) == 0) 337 info->i_mplock_required = 1; 338 if (intr_flags & INTR_CLOCK) 339 ++info->i_fast; 340 else 341 ++info->i_slow; 342 343 info->i_flags |= (intr_flags & INTR_EXCL); 344 if (info->i_slow + info->i_fast == 1 && (intr_flags & INTR_HIFREQ)) { 345 /* 346 * Allow high frequency interrupt, if this intr is not 347 * shared yet. 348 */ 349 info->i_flags |= INTR_HIFREQ; 350 } else { 351 info->i_flags &= ~INTR_HIFREQ; 352 } 353 354 /* 355 * Enable random number generation keying off of this interrupt. 356 */ 357 if ((intr_flags & INTR_NOENTROPY) == 0 && info->i_random.sc_enabled == 0) { 358 info->i_random.sc_enabled = 1; 359 info->i_random.sc_intr = intr; 360 } 361 362 /* 363 * Add the record to the interrupt list. 364 */ 365 crit_enter(); 366 while (*list != NULL) 367 list = &(*list)->next; 368 *list = rec; 369 crit_exit(); 370 371 /* 372 * Update max_installed_hard_intr to make the emergency intr poll 373 * a bit more efficient. 374 */ 375 if (intr < FIRST_SOFTINT) { 376 if (max_installed_hard_intr[cpuid] <= intr) 377 max_installed_hard_intr[cpuid] = intr + 1; 378 } 379 380 if (intr >= FIRST_SOFTINT) 381 swi_info_ary[intr - FIRST_SOFTINT] = info; 382 383 /* 384 * Setup the machine level interrupt vector 385 */ 386 if (intr < FIRST_SOFTINT && info->i_slow + info->i_fast == 1) 387 machintr_intr_setup(intr, intr_flags); 388 389 done: 390 int_moveto_origcpu(orig_cpuid, cpuid); 391 return(rec); 392 } 393 394 void 395 unregister_swi(void *id, int intr, int cpuid) 396 { 397 if (cpuid < 0) 398 cpuid = intr % ncpus; 399 400 unregister_int(id, cpuid); 401 } 402 403 void 404 unregister_int(void *id, int cpuid) 405 { 406 struct intr_info *info; 407 struct intrec **list; 408 intrec_t rec; 409 int intr, orig_cpuid; 410 411 KKASSERT(cpuid >= 0 && cpuid < ncpus); 412 413 intr = ((intrec_t)id)->intr; 414 415 if (intr < 0 || intr >= MAX_INTS) 416 panic("register_int: bad intr %d", intr); 417 418 info = &intr_block->ary[cpuid][intr]; 419 420 int_moveto_destcpu(&orig_cpuid, cpuid); 421 422 /* 423 * Remove the interrupt descriptor, adjust the descriptor count, 424 * and teardown the machine level vector if this was the last interrupt. 425 */ 426 crit_enter(); 427 list = &info->i_reclist; 428 while ((rec = *list) != NULL) { 429 if (rec == id) 430 break; 431 list = &rec->next; 432 } 433 if (rec) { 434 intrec_t rec0; 435 436 *list = rec->next; 437 if (rec->intr_flags & INTR_CLOCK) 438 --info->i_fast; 439 else 440 --info->i_slow; 441 if (intr < FIRST_SOFTINT && info->i_fast + info->i_slow == 0) 442 machintr_intr_teardown(intr); 443 444 /* 445 * Clear i_mplock_required if no handlers in the chain require the 446 * MP lock. 447 */ 448 for (rec0 = info->i_reclist; rec0; rec0 = rec0->next) { 449 if ((rec0->intr_flags & INTR_MPSAFE) == 0) 450 break; 451 } 452 if (rec0 == NULL) 453 info->i_mplock_required = 0; 454 } 455 456 if (info->i_reclist == NULL) { 457 info->i_flags = 0; 458 if (intr >= FIRST_SOFTINT) 459 swi_info_ary[intr - FIRST_SOFTINT] = NULL; 460 } else if (info->i_fast + info->i_slow == 1 && 461 (info->i_reclist->intr_flags & INTR_HIFREQ)) { 462 /* Unshared high frequency interrupt. */ 463 info->i_flags |= INTR_HIFREQ; 464 } 465 466 crit_exit(); 467 468 int_moveto_origcpu(orig_cpuid, cpuid); 469 470 /* 471 * Free the record. 472 */ 473 if (rec != NULL) { 474 kfree(rec->name, M_DEVBUF); 475 kfree(rec, M_DEVBUF); 476 } else { 477 kprintf("warning: unregister_int: int %d handler for %s not found\n", 478 intr, ((intrec_t)id)->name); 479 } 480 } 481 482 long 483 get_interrupt_counter(int intr, int cpuid) 484 { 485 struct intr_info *info; 486 487 KKASSERT(cpuid >= 0 && cpuid < ncpus); 488 489 if (intr < 0 || intr >= MAX_INTS) 490 panic("register_int: bad intr %d", intr); 491 info = &intr_block->ary[cpuid][intr]; 492 return(info->i_count); 493 } 494 495 void 496 register_randintr(int intr) 497 { 498 struct intr_info *info; 499 int cpuid; 500 501 if (intr < 0 || intr >= MAX_INTS) 502 panic("register_randintr: bad intr %d", intr); 503 504 for (cpuid = 0; cpuid < ncpus; ++cpuid) { 505 info = &intr_block->ary[cpuid][intr]; 506 info->i_random.sc_intr = intr; 507 info->i_random.sc_enabled = 1; 508 } 509 } 510 511 void 512 unregister_randintr(int intr) 513 { 514 struct intr_info *info; 515 int cpuid; 516 517 if (intr < 0 || intr >= MAX_INTS) 518 panic("register_swi: bad intr %d", intr); 519 520 for (cpuid = 0; cpuid < ncpus; ++cpuid) { 521 info = &intr_block->ary[cpuid][intr]; 522 info->i_random.sc_enabled = -1; 523 } 524 } 525 526 int 527 next_registered_randintr(int intr) 528 { 529 struct intr_info *info; 530 531 if (intr < 0 || intr >= MAX_INTS) 532 panic("register_swi: bad intr %d", intr); 533 534 while (intr < MAX_INTS) { 535 int cpuid; 536 537 for (cpuid = 0; cpuid < ncpus; ++cpuid) { 538 info = &intr_block->ary[cpuid][intr]; 539 if (info->i_random.sc_enabled > 0) 540 return intr; 541 } 542 ++intr; 543 } 544 return intr; 545 } 546 547 /* 548 * Dispatch an interrupt. If there's nothing to do we have a stray 549 * interrupt and can just return, leaving the interrupt masked. 550 * 551 * We need to schedule the interrupt and set its i_running bit. If 552 * we are not on the interrupt thread's cpu we have to send a message 553 * to the correct cpu that will issue the desired action (interlocking 554 * with the interrupt thread's critical section). We do NOT attempt to 555 * reschedule interrupts whos i_running bit is already set because 556 * this would prematurely wakeup a livelock-limited interrupt thread. 557 * 558 * i_running is only tested/set on the same cpu as the interrupt thread. 559 * 560 * We are NOT in a critical section, which will allow the scheduled 561 * interrupt to preempt us. The MP lock might *NOT* be held here. 562 */ 563 static void 564 sched_ithd_remote(void *arg) 565 { 566 sched_ithd_intern(arg); 567 } 568 569 static void 570 sched_ithd_intern(struct intr_info *info) 571 { 572 ++info->i_count; 573 if (info->i_state != ISTATE_NOTHREAD) { 574 if (info->i_reclist == NULL) { 575 report_stray_interrupt(info, "sched_ithd"); 576 } else { 577 if (info->i_thread->td_gd == mycpu) { 578 if (info->i_running == 0) { 579 info->i_running = 1; 580 if (info->i_state != ISTATE_LIVELOCKED) 581 lwkt_schedule(info->i_thread); /* MIGHT PREEMPT */ 582 } 583 } else { 584 lwkt_send_ipiq(info->i_thread->td_gd, sched_ithd_remote, info); 585 } 586 } 587 } else { 588 report_stray_interrupt(info, "sched_ithd"); 589 } 590 } 591 592 void 593 sched_ithd_soft(int intr) 594 { 595 struct intr_info *info; 596 597 KKASSERT(intr >= FIRST_SOFTINT && intr < MAX_INTS); 598 599 info = swi_info_ary[intr - FIRST_SOFTINT]; 600 if (info != NULL) { 601 sched_ithd_intern(info); 602 } else { 603 kprintf("unregistered softint %d got scheduled on cpu%d\n", 604 intr, mycpuid); 605 } 606 } 607 608 void 609 sched_ithd_hard(int intr) 610 { 611 KKASSERT(intr >= 0 && intr < MAX_HARDINTS); 612 sched_ithd_intern(&intr_block->ary[mycpuid][intr]); 613 } 614 615 #ifdef _KERNEL_VIRTUAL 616 617 void 618 sched_ithd_hard_virtual(int intr) 619 { 620 KKASSERT(intr >= 0 && intr < MAX_HARDINTS); 621 sched_ithd_intern(&intr_block->ary[0][intr]); 622 } 623 624 void * 625 register_int_virtual(int intr, inthand2_t *handler, void *arg, const char *name, 626 struct lwkt_serialize *serializer, int intr_flags) 627 { 628 return register_int(intr, handler, arg, name, serializer, intr_flags, 0); 629 } 630 631 void 632 unregister_int_virtual(void *id) 633 { 634 unregister_int(id, 0); 635 } 636 637 #endif /* _KERN_VIRTUAL */ 638 639 static void 640 report_stray_interrupt(struct intr_info *info, const char *func) 641 { 642 ++info->i_straycount; 643 if (info->i_straycount < 10) { 644 if (info->i_errorticks == ticks) 645 return; 646 info->i_errorticks = ticks; 647 kprintf("%s: stray interrupt %d on cpu%d\n", 648 func, info->i_intr, mycpuid); 649 } else if (info->i_straycount == 10) { 650 kprintf("%s: %ld stray interrupts %d on cpu%d - " 651 "there will be no further reports\n", func, 652 info->i_straycount, info->i_intr, mycpuid); 653 } 654 } 655 656 /* 657 * This is run from a periodic SYSTIMER (and thus must be MP safe, the BGL 658 * might not be held). 659 */ 660 static void 661 ithread_livelock_wakeup(systimer_t st, int in_ipi __unused, 662 struct intrframe *frame __unused) 663 { 664 struct intr_info *info; 665 666 info = &intr_block->ary[mycpuid][(int)(intptr_t)st->data]; 667 if (info->i_state != ISTATE_NOTHREAD) 668 lwkt_schedule(info->i_thread); 669 } 670 671 /* 672 * Schedule ithread within fast intr handler 673 * 674 * XXX Protect sched_ithd_hard() call with gd_intr_nesting_level? 675 * Interrupts aren't enabled, but still... 676 */ 677 static __inline void 678 ithread_fast_sched(int intr, thread_t td) 679 { 680 ++td->td_nest_count; 681 682 /* 683 * We are already in critical section, exit it now to 684 * allow preemption. 685 */ 686 crit_exit_quick(td); 687 sched_ithd_hard(intr); 688 crit_enter_quick(td); 689 690 --td->td_nest_count; 691 } 692 693 /* 694 * This function is called directly from the ICU or APIC vector code assembly 695 * to process an interrupt. The critical section and interrupt deferral 696 * checks have already been done but the function is entered WITHOUT 697 * a critical section held. The BGL may or may not be held. 698 * 699 * Must return non-zero if we do not want the vector code to re-enable 700 * the interrupt (which we don't if we have to schedule the interrupt) 701 */ 702 int ithread_fast_handler(struct intrframe *frame); 703 704 int 705 ithread_fast_handler(struct intrframe *frame) 706 { 707 int intr; 708 struct intr_info *info; 709 struct intrec **list; 710 int must_schedule; 711 int got_mplock; 712 TD_INVARIANTS_DECLARE; 713 intrec_t rec, nrec; 714 globaldata_t gd; 715 thread_t td; 716 717 intr = frame->if_vec; 718 gd = mycpu; 719 td = curthread; 720 721 /* We must be in critical section. */ 722 KKASSERT(td->td_critcount); 723 724 info = &intr_block->ary[mycpuid][intr]; 725 726 /* 727 * If we are not processing any FAST interrupts, just schedule the thing. 728 */ 729 if (info->i_fast == 0) { 730 ++gd->gd_cnt.v_intr; 731 ithread_fast_sched(intr, td); 732 return(1); 733 } 734 735 /* 736 * This should not normally occur since interrupts ought to be 737 * masked if the ithread has been scheduled or is running. 738 */ 739 if (info->i_running) 740 return(1); 741 742 /* 743 * Bump the interrupt nesting level to process any FAST interrupts. 744 * Obtain the MP lock as necessary. If the MP lock cannot be obtained, 745 * schedule the interrupt thread to deal with the issue instead. 746 * 747 * To reduce overhead, just leave the MP lock held once it has been 748 * obtained. 749 */ 750 ++gd->gd_intr_nesting_level; 751 ++gd->gd_cnt.v_intr; 752 must_schedule = info->i_slow; 753 got_mplock = 0; 754 755 TD_INVARIANTS_GET(td); 756 list = &info->i_reclist; 757 758 for (rec = *list; rec; rec = nrec) { 759 /* rec may be invalid after call */ 760 nrec = rec->next; 761 762 if (rec->intr_flags & INTR_CLOCK) { 763 if ((rec->intr_flags & INTR_MPSAFE) == 0 && got_mplock == 0) { 764 if (try_mplock() == 0) { 765 /* Couldn't get the MP lock; just schedule it. */ 766 must_schedule = 1; 767 break; 768 } 769 got_mplock = 1; 770 } 771 if (rec->serializer) { 772 must_schedule += lwkt_serialize_handler_try( 773 rec->serializer, rec->handler, 774 rec->argument, frame); 775 } else { 776 rec->handler(rec->argument, frame); 777 } 778 TD_INVARIANTS_TEST(td, rec->name); 779 } 780 } 781 782 /* 783 * Cleanup 784 */ 785 --gd->gd_intr_nesting_level; 786 if (got_mplock) 787 rel_mplock(); 788 789 /* 790 * If we had a problem, or mixed fast and slow interrupt handlers are 791 * registered, schedule the ithread to catch the missed records (it 792 * will just re-run all of them). A return value of 0 indicates that 793 * all handlers have been run and the interrupt can be re-enabled, and 794 * a non-zero return indicates that the interrupt thread controls 795 * re-enablement. 796 */ 797 if (must_schedule > 0) 798 ithread_fast_sched(intr, td); 799 else if (must_schedule == 0) 800 ++info->i_count; 801 return(must_schedule); 802 } 803 804 /* 805 * Interrupt threads run this as their main loop. 806 * 807 * The handler begins execution outside a critical section and no MP lock. 808 * 809 * The i_running state starts at 0. When an interrupt occurs, the hardware 810 * interrupt is disabled and sched_ithd_hard(). The HW interrupt remains 811 * disabled until all routines have run. We then call machintr_intr_enable() 812 * to reenable the HW interrupt and deschedule us until the next interrupt. 813 * 814 * We are responsible for atomically checking i_running. i_running for our 815 * irq is only set in the context of our cpu, so a critical section is a 816 * sufficient interlock. 817 */ 818 #define LIVELOCK_TIMEFRAME(freq) ((freq) >> 2) /* 1/4 second */ 819 820 static void 821 ithread_handler(void *arg) 822 { 823 struct intr_info *info; 824 int use_limit; 825 uint32_t lseconds; 826 int intr, cpuid = mycpuid; 827 int mpheld; 828 struct intrec **list; 829 intrec_t rec, nrec; 830 globaldata_t gd; 831 struct systimer ill_timer; /* enforced freq. timer */ 832 u_int ill_count; /* interrupt livelock counter */ 833 int upper_limit; /* interrupt livelock upper limit */ 834 TD_INVARIANTS_DECLARE; 835 836 ill_count = 0; 837 intr = (int)(intptr_t)arg; 838 info = &intr_block->ary[cpuid][intr]; 839 list = &info->i_reclist; 840 841 /* 842 * The loop must be entered with one critical section held. The thread 843 * does not hold the mplock on startup. 844 */ 845 gd = mycpu; 846 lseconds = gd->gd_time_seconds; 847 crit_enter_gd(gd); 848 mpheld = 0; 849 850 for (;;) { 851 /* 852 * The chain is only considered MPSAFE if all its interrupt handlers 853 * are MPSAFE. However, if intr_mpsafe has been turned off we 854 * always operate with the BGL. 855 */ 856 if (info->i_mplock_required != mpheld) { 857 if (info->i_mplock_required) { 858 KKASSERT(mpheld == 0); 859 get_mplock(); 860 mpheld = 1; 861 } else { 862 KKASSERT(mpheld != 0); 863 rel_mplock(); 864 mpheld = 0; 865 } 866 } 867 868 TD_INVARIANTS_GET(gd->gd_curthread); 869 870 /* 871 * If an interrupt is pending, clear i_running and execute the 872 * handlers. Note that certain types of interrupts can re-trigger 873 * and set i_running again. 874 * 875 * Each handler is run in a critical section. Note that we run both 876 * FAST and SLOW designated service routines. 877 */ 878 if (info->i_running) { 879 ++ill_count; 880 info->i_running = 0; 881 882 if (*list == NULL) 883 report_stray_interrupt(info, "ithread_handler"); 884 885 for (rec = *list; rec; rec = nrec) { 886 /* rec may be invalid after call */ 887 nrec = rec->next; 888 if (rec->serializer) { 889 lwkt_serialize_handler_call(rec->serializer, rec->handler, 890 rec->argument, NULL); 891 } else { 892 rec->handler(rec->argument, NULL); 893 } 894 TD_INVARIANTS_TEST(gd->gd_curthread, rec->name); 895 } 896 } 897 898 /* 899 * This is our interrupt hook to add rate randomness to the random 900 * number generator. 901 */ 902 if (info->i_random.sc_enabled > 0) 903 add_interrupt_randomness(intr); 904 905 /* 906 * Unmask the interrupt to allow it to trigger again. This only 907 * applies to certain types of interrupts (typ level interrupts). 908 * This can result in the interrupt retriggering, but the retrigger 909 * will not be processed until we cycle our critical section. 910 * 911 * Only unmask interrupts while handlers are installed. It is 912 * possible to hit a situation where no handlers are installed 913 * due to a device driver livelocking and then tearing down its 914 * interrupt on close (the parallel bus being a good example). 915 */ 916 if (intr < FIRST_SOFTINT && *list) 917 machintr_intr_enable(intr); 918 919 /* 920 * Do a quick exit/enter to catch any higher-priority interrupt 921 * sources, such as the statclock, so thread time accounting 922 * will still work. This may also cause an interrupt to re-trigger. 923 */ 924 crit_exit_gd(gd); 925 crit_enter_gd(gd); 926 927 /* 928 * LIVELOCK STATE MACHINE 929 */ 930 switch(info->i_state) { 931 case ISTATE_NORMAL: 932 /* 933 * Reset the count each second. 934 */ 935 if (lseconds != gd->gd_time_seconds) { 936 lseconds = gd->gd_time_seconds; 937 ill_count = 0; 938 } 939 940 /* 941 * If we did not exceed the frequency limit, we are done. 942 * If the interrupt has not retriggered we deschedule ourselves. 943 */ 944 if (info->i_flags & INTR_HIFREQ) 945 upper_limit = livelock_limit_hi; 946 else 947 upper_limit = livelock_limit; 948 if (ill_count <= upper_limit) { 949 if (info->i_running == 0) { 950 lwkt_deschedule_self(gd->gd_curthread); 951 lwkt_switch(); 952 } 953 break; 954 } 955 956 /* 957 * Otherwise we are livelocked. Set up a periodic systimer 958 * to wake the thread up at the limit frequency. 959 */ 960 kprintf("intr %d on cpu%d at %d/%d hz, livelocked limit engaged!\n", 961 intr, cpuid, ill_count, upper_limit); 962 info->i_state = ISTATE_LIVELOCKED; 963 if ((use_limit = upper_limit) < 100) 964 use_limit = 100; 965 else if (use_limit > 500000) 966 use_limit = 500000; 967 systimer_init_periodic_nq(&ill_timer, ithread_livelock_wakeup, 968 (void *)(intptr_t)intr, use_limit); 969 /* fall through */ 970 case ISTATE_LIVELOCKED: 971 /* 972 * Wait for our periodic timer to go off. Since the interrupt 973 * has re-armed it can still set i_running, but it will not 974 * reschedule us while we are in a livelocked state. 975 */ 976 lwkt_deschedule_self(gd->gd_curthread); 977 lwkt_switch(); 978 979 /* 980 * Check once a second to see if the livelock condition no 981 * longer applies. 982 */ 983 if (lseconds != gd->gd_time_seconds) { 984 lseconds = gd->gd_time_seconds; 985 if (ill_count < livelock_lowater) { 986 info->i_state = ISTATE_NORMAL; 987 systimer_del(&ill_timer); 988 kprintf("intr %d on cpu%d at %d/%d hz, livelock removed\n", 989 intr, cpuid, ill_count, livelock_lowater); 990 } else if (livelock_debug == intr || 991 (bootverbose && cold)) { 992 kprintf("intr %d on cpu%d at %d/%d hz, in livelock\n", 993 intr, cpuid, ill_count, livelock_lowater); 994 } 995 ill_count = 0; 996 } 997 break; 998 } 999 } 1000 /* NOT REACHED */ 1001 } 1002 1003 /* 1004 * Emergency interrupt polling thread. The thread begins execution 1005 * outside a critical section with the BGL held. 1006 * 1007 * If emergency interrupt polling is enabled, this thread will 1008 * execute all system interrupts not marked INTR_NOPOLL at the 1009 * specified polling frequency. 1010 * 1011 * WARNING! This thread runs *ALL* interrupt service routines that 1012 * are not marked INTR_NOPOLL, which basically means everything except 1013 * the 8254 clock interrupt and the ATA interrupt. It has very high 1014 * overhead and should only be used in situations where the machine 1015 * cannot otherwise be made to work. Due to the severe performance 1016 * degredation, it should not be enabled on production machines. 1017 */ 1018 static void 1019 ithread_emergency(void *arg __unused) 1020 { 1021 globaldata_t gd = mycpu; 1022 struct intr_info *info; 1023 intrec_t rec, nrec; 1024 int intr, cpuid = mycpuid; 1025 TD_INVARIANTS_DECLARE; 1026 1027 get_mplock(); 1028 crit_enter_gd(gd); 1029 TD_INVARIANTS_GET(gd->gd_curthread); 1030 1031 for (;;) { 1032 for (intr = 0; intr < max_installed_hard_intr[cpuid]; ++intr) { 1033 info = &intr_block->ary[cpuid][intr]; 1034 for (rec = info->i_reclist; rec; rec = nrec) { 1035 /* rec may be invalid after call */ 1036 nrec = rec->next; 1037 if ((rec->intr_flags & INTR_NOPOLL) == 0) { 1038 if (rec->serializer) { 1039 lwkt_serialize_handler_try(rec->serializer, 1040 rec->handler, rec->argument, NULL); 1041 } else { 1042 rec->handler(rec->argument, NULL); 1043 } 1044 TD_INVARIANTS_TEST(gd->gd_curthread, rec->name); 1045 } 1046 } 1047 } 1048 lwkt_deschedule_self(gd->gd_curthread); 1049 lwkt_switch(); 1050 } 1051 /* NOT REACHED */ 1052 } 1053 1054 /* 1055 * Systimer callback - schedule the emergency interrupt poll thread 1056 * if emergency polling is enabled. 1057 */ 1058 static 1059 void 1060 emergency_intr_timer_callback(systimer_t info, int in_ipi __unused, 1061 struct intrframe *frame __unused) 1062 { 1063 if (emergency_intr_enable) 1064 lwkt_schedule(info->data); 1065 } 1066 1067 /* 1068 * Sysctls used by systat and others: hw.intrnames and hw.intrcnt. 1069 * The data for this machine dependent, and the declarations are in machine 1070 * dependent code. The layout of intrnames and intrcnt however is machine 1071 * independent. 1072 * 1073 * We do not know the length of intrcnt and intrnames at compile time, so 1074 * calculate things at run time. 1075 */ 1076 1077 static int 1078 sysctl_intrnames(SYSCTL_HANDLER_ARGS) 1079 { 1080 struct intr_info *info; 1081 intrec_t rec; 1082 int error = 0; 1083 int len; 1084 int intr, cpuid; 1085 char buf[64]; 1086 1087 for (cpuid = 0; cpuid < ncpus; ++cpuid) { 1088 for (intr = 0; error == 0 && intr < MAX_INTS; ++intr) { 1089 info = &intr_block->ary[cpuid][intr]; 1090 1091 len = 0; 1092 buf[0] = 0; 1093 for (rec = info->i_reclist; rec; rec = rec->next) { 1094 ksnprintf(buf + len, sizeof(buf) - len, "%s%s", 1095 (len ? "/" : ""), rec->name); 1096 len += strlen(buf + len); 1097 } 1098 if (len == 0) { 1099 ksnprintf(buf, sizeof(buf), "irq%d", intr); 1100 len = strlen(buf); 1101 } 1102 error = SYSCTL_OUT(req, buf, len + 1); 1103 } 1104 } 1105 return (error); 1106 } 1107 1108 SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD, 1109 NULL, 0, sysctl_intrnames, "", "Interrupt Names"); 1110 1111 static int 1112 sysctl_intrcnt_all(SYSCTL_HANDLER_ARGS) 1113 { 1114 struct intr_info *info; 1115 int error = 0; 1116 int intr, cpuid; 1117 1118 for (cpuid = 0; cpuid < ncpus; ++cpuid) { 1119 for (intr = 0; intr < MAX_INTS; ++intr) { 1120 info = &intr_block->ary[cpuid][intr]; 1121 1122 error = SYSCTL_OUT(req, &info->i_count, sizeof(info->i_count)); 1123 if (error) 1124 goto failed; 1125 } 1126 } 1127 failed: 1128 return(error); 1129 } 1130 1131 SYSCTL_PROC(_hw, OID_AUTO, intrcnt_all, CTLTYPE_OPAQUE | CTLFLAG_RD, 1132 NULL, 0, sysctl_intrcnt_all, "", "Interrupt Counts"); 1133 1134 SYSCTL_PROC(_hw, OID_AUTO, intrcnt, CTLTYPE_OPAQUE | CTLFLAG_RD, 1135 NULL, 0, sysctl_intrcnt_all, "", "Interrupt Counts"); 1136 1137 static void 1138 int_moveto_destcpu(int *orig_cpuid0, int cpuid) 1139 { 1140 int orig_cpuid = mycpuid; 1141 1142 if (cpuid != orig_cpuid) 1143 lwkt_migratecpu(cpuid); 1144 1145 *orig_cpuid0 = orig_cpuid; 1146 } 1147 1148 static void 1149 int_moveto_origcpu(int orig_cpuid, int cpuid) 1150 { 1151 if (cpuid != orig_cpuid) 1152 lwkt_migratecpu(orig_cpuid); 1153 } 1154 1155 static void 1156 intr_init(void *dummy __unused) 1157 { 1158 int cpuid; 1159 1160 kprintf("Initialize MI interrupts\n"); 1161 1162 intr_block = kmalloc(sizeof(*intr_block), M_INTRMNG, 1163 M_INTWAIT | M_ZERO); 1164 1165 for (cpuid = 0; cpuid < ncpus; ++cpuid) { 1166 int intr; 1167 1168 for (intr = 0; intr < MAX_INTS; ++intr) { 1169 struct intr_info *info = &intr_block->ary[cpuid][intr]; 1170 1171 info->i_cpuid = cpuid; 1172 info->i_intr = intr; 1173 } 1174 } 1175 } 1176 SYSINIT(intr_init, SI_BOOT2_FINISH_PIC, SI_ORDER_ANY, intr_init, NULL); 1177