1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved. 3 * Copyright (c) 1997, Stefan Esser <se@freebsd.org> All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/kern/kern_intr.c,v 1.24.2.1 2001/10/14 20:05:50 luigi Exp $ 27 * $DragonFly: src/sys/kern/kern_intr.c,v 1.55 2008/09/01 12:49:00 sephe Exp $ 28 * 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/malloc.h> 34 #include <sys/kernel.h> 35 #include <sys/sysctl.h> 36 #include <sys/thread.h> 37 #include <sys/proc.h> 38 #include <sys/thread2.h> 39 #include <sys/random.h> 40 #include <sys/serialize.h> 41 #include <sys/interrupt.h> 42 #include <sys/bus.h> 43 #include <sys/machintr.h> 44 45 #include <machine/frame.h> 46 47 #include <sys/interrupt.h> 48 49 struct info_info; 50 51 typedef struct intrec { 52 struct intrec *next; 53 struct intr_info *info; 54 inthand2_t *handler; 55 void *argument; 56 char *name; 57 int intr; 58 int intr_flags; 59 struct lwkt_serialize *serializer; 60 } *intrec_t; 61 62 struct intr_info { 63 intrec_t i_reclist; 64 struct thread i_thread; 65 struct random_softc i_random; 66 int i_running; 67 long i_count; /* interrupts dispatched */ 68 int i_mplock_required; 69 int i_fast; 70 int i_slow; 71 int i_state; 72 int i_errorticks; 73 unsigned long i_straycount; 74 } intr_info_ary[MAX_INTS]; 75 76 int max_installed_hard_intr; 77 int max_installed_soft_intr; 78 79 #define EMERGENCY_INTR_POLLING_FREQ_MAX 20000 80 81 static int sysctl_emergency_freq(SYSCTL_HANDLER_ARGS); 82 static int sysctl_emergency_enable(SYSCTL_HANDLER_ARGS); 83 static void emergency_intr_timer_callback(systimer_t, struct intrframe *); 84 static void ithread_handler(void *arg); 85 static void ithread_emergency(void *arg); 86 static void report_stray_interrupt(int intr, struct intr_info *info); 87 static void int_moveto_destcpu(int *, int *, int); 88 static void int_moveto_origcpu(int, int); 89 #ifdef SMP 90 static void intr_get_mplock(void); 91 #endif 92 93 int intr_info_size = sizeof(intr_info_ary) / sizeof(intr_info_ary[0]); 94 95 static struct systimer emergency_intr_timer; 96 static struct thread emergency_intr_thread; 97 98 #define ISTATE_NOTHREAD 0 99 #define ISTATE_NORMAL 1 100 #define ISTATE_LIVELOCKED 2 101 102 #ifdef SMP 103 static int intr_mpsafe = 1; 104 static int intr_migrate = 0; 105 static int intr_migrate_count; 106 TUNABLE_INT("kern.intr_mpsafe", &intr_mpsafe); 107 SYSCTL_INT(_kern, OID_AUTO, intr_mpsafe, 108 CTLFLAG_RW, &intr_mpsafe, 0, "Run INTR_MPSAFE handlers without the BGL"); 109 SYSCTL_INT(_kern, OID_AUTO, intr_migrate, 110 CTLFLAG_RW, &intr_migrate, 0, "Migrate to cpu holding BGL"); 111 SYSCTL_INT(_kern, OID_AUTO, intr_migrate_count, 112 CTLFLAG_RW, &intr_migrate_count, 0, ""); 113 #endif 114 static int livelock_limit = 40000; 115 static int livelock_lowater = 20000; 116 static int livelock_debug = -1; 117 SYSCTL_INT(_kern, OID_AUTO, livelock_limit, 118 CTLFLAG_RW, &livelock_limit, 0, "Livelock interrupt rate limit"); 119 SYSCTL_INT(_kern, OID_AUTO, livelock_lowater, 120 CTLFLAG_RW, &livelock_lowater, 0, "Livelock low-water mark restore"); 121 SYSCTL_INT(_kern, OID_AUTO, livelock_debug, 122 CTLFLAG_RW, &livelock_debug, 0, "Livelock debug intr#"); 123 124 static int emergency_intr_enable = 0; /* emergency interrupt polling */ 125 TUNABLE_INT("kern.emergency_intr_enable", &emergency_intr_enable); 126 SYSCTL_PROC(_kern, OID_AUTO, emergency_intr_enable, CTLTYPE_INT | CTLFLAG_RW, 127 0, 0, sysctl_emergency_enable, "I", "Emergency Interrupt Poll Enable"); 128 129 static int emergency_intr_freq = 10; /* emergency polling frequency */ 130 TUNABLE_INT("kern.emergency_intr_freq", &emergency_intr_freq); 131 SYSCTL_PROC(_kern, OID_AUTO, emergency_intr_freq, CTLTYPE_INT | CTLFLAG_RW, 132 0, 0, sysctl_emergency_freq, "I", "Emergency Interrupt Poll Frequency"); 133 134 /* 135 * Sysctl support routines 136 */ 137 static int 138 sysctl_emergency_enable(SYSCTL_HANDLER_ARGS) 139 { 140 int error, enabled; 141 142 enabled = emergency_intr_enable; 143 error = sysctl_handle_int(oidp, &enabled, 0, req); 144 if (error || req->newptr == NULL) 145 return error; 146 emergency_intr_enable = enabled; 147 if (emergency_intr_enable) { 148 systimer_adjust_periodic(&emergency_intr_timer, 149 emergency_intr_freq); 150 } else { 151 systimer_adjust_periodic(&emergency_intr_timer, 1); 152 } 153 return 0; 154 } 155 156 static int 157 sysctl_emergency_freq(SYSCTL_HANDLER_ARGS) 158 { 159 int error, phz; 160 161 phz = emergency_intr_freq; 162 error = sysctl_handle_int(oidp, &phz, 0, req); 163 if (error || req->newptr == NULL) 164 return error; 165 if (phz <= 0) 166 return EINVAL; 167 else if (phz > EMERGENCY_INTR_POLLING_FREQ_MAX) 168 phz = EMERGENCY_INTR_POLLING_FREQ_MAX; 169 170 emergency_intr_freq = phz; 171 if (emergency_intr_enable) { 172 systimer_adjust_periodic(&emergency_intr_timer, 173 emergency_intr_freq); 174 } else { 175 systimer_adjust_periodic(&emergency_intr_timer, 1); 176 } 177 return 0; 178 } 179 180 /* 181 * Register an SWI or INTerrupt handler. 182 */ 183 void * 184 register_swi(int intr, inthand2_t *handler, void *arg, const char *name, 185 struct lwkt_serialize *serializer) 186 { 187 if (intr < FIRST_SOFTINT || intr >= MAX_INTS) 188 panic("register_swi: bad intr %d", intr); 189 return(register_int(intr, handler, arg, name, serializer, 0)); 190 } 191 192 void * 193 register_swi_mp(int intr, inthand2_t *handler, void *arg, const char *name, 194 struct lwkt_serialize *serializer) 195 { 196 if (intr < FIRST_SOFTINT || intr >= MAX_INTS) 197 panic("register_swi: bad intr %d", intr); 198 return(register_int(intr, handler, arg, name, serializer, INTR_MPSAFE)); 199 } 200 201 void * 202 register_int(int intr, inthand2_t *handler, void *arg, const char *name, 203 struct lwkt_serialize *serializer, int intr_flags) 204 { 205 struct intr_info *info; 206 struct intrec **list; 207 intrec_t rec; 208 int orig_cpuid, cpuid; 209 210 if (intr < 0 || intr >= MAX_INTS) 211 panic("register_int: bad intr %d", intr); 212 if (name == NULL) 213 name = "???"; 214 info = &intr_info_ary[intr]; 215 216 /* 217 * Construct an interrupt handler record 218 */ 219 rec = kmalloc(sizeof(struct intrec), M_DEVBUF, M_INTWAIT); 220 rec->name = kmalloc(strlen(name) + 1, M_DEVBUF, M_INTWAIT); 221 strcpy(rec->name, name); 222 223 rec->info = info; 224 rec->handler = handler; 225 rec->argument = arg; 226 rec->intr = intr; 227 rec->intr_flags = intr_flags; 228 rec->next = NULL; 229 rec->serializer = serializer; 230 231 /* 232 * Create an emergency polling thread and set up a systimer to wake 233 * it up. 234 */ 235 if (emergency_intr_thread.td_kstack == NULL) { 236 lwkt_create(ithread_emergency, NULL, NULL, 237 &emergency_intr_thread, TDF_STOPREQ|TDF_INTTHREAD, -1, 238 "ithread emerg"); 239 systimer_init_periodic_nq(&emergency_intr_timer, 240 emergency_intr_timer_callback, &emergency_intr_thread, 241 (emergency_intr_enable ? emergency_intr_freq : 1)); 242 } 243 244 int_moveto_destcpu(&orig_cpuid, &cpuid, intr); 245 246 /* 247 * Create an interrupt thread if necessary, leave it in an unscheduled 248 * state. 249 */ 250 if (info->i_state == ISTATE_NOTHREAD) { 251 info->i_state = ISTATE_NORMAL; 252 lwkt_create((void *)ithread_handler, (void *)(intptr_t)intr, NULL, 253 &info->i_thread, TDF_STOPREQ|TDF_INTTHREAD|TDF_MPSAFE, -1, 254 "ithread %d", intr); 255 if (intr >= FIRST_SOFTINT) 256 lwkt_setpri(&info->i_thread, TDPRI_SOFT_NORM); 257 else 258 lwkt_setpri(&info->i_thread, TDPRI_INT_MED); 259 info->i_thread.td_preemptable = lwkt_preempt; 260 } 261 262 list = &info->i_reclist; 263 264 /* 265 * Keep track of how many fast and slow interrupts we have. 266 * Set i_mplock_required if any handler in the chain requires 267 * the MP lock to operate. 268 */ 269 if ((intr_flags & INTR_MPSAFE) == 0) 270 info->i_mplock_required = 1; 271 if (intr_flags & INTR_FAST) 272 ++info->i_fast; 273 else 274 ++info->i_slow; 275 276 /* 277 * Enable random number generation keying off of this interrupt. 278 */ 279 if ((intr_flags & INTR_NOENTROPY) == 0 && info->i_random.sc_enabled == 0) { 280 info->i_random.sc_enabled = 1; 281 info->i_random.sc_intr = intr; 282 } 283 284 /* 285 * Add the record to the interrupt list. 286 */ 287 crit_enter(); 288 while (*list != NULL) 289 list = &(*list)->next; 290 *list = rec; 291 crit_exit(); 292 293 /* 294 * Update max_installed_hard_intr to make the emergency intr poll 295 * a bit more efficient. 296 */ 297 if (intr < FIRST_SOFTINT) { 298 if (max_installed_hard_intr <= intr) 299 max_installed_hard_intr = intr + 1; 300 } else { 301 if (max_installed_soft_intr <= intr) 302 max_installed_soft_intr = intr + 1; 303 } 304 305 /* 306 * Setup the machine level interrupt vector 307 */ 308 if (intr < FIRST_SOFTINT && info->i_slow + info->i_fast == 1) { 309 if (machintr_vector_setup(intr, intr_flags)) 310 kprintf("machintr_vector_setup: failed on irq %d\n", intr); 311 } 312 313 int_moveto_origcpu(orig_cpuid, cpuid); 314 315 return(rec); 316 } 317 318 void 319 unregister_swi(void *id) 320 { 321 unregister_int(id); 322 } 323 324 void 325 unregister_int(void *id) 326 { 327 struct intr_info *info; 328 struct intrec **list; 329 intrec_t rec; 330 int intr, orig_cpuid, cpuid; 331 332 intr = ((intrec_t)id)->intr; 333 334 if (intr < 0 || intr >= MAX_INTS) 335 panic("register_int: bad intr %d", intr); 336 337 info = &intr_info_ary[intr]; 338 339 int_moveto_destcpu(&orig_cpuid, &cpuid, intr); 340 341 /* 342 * Remove the interrupt descriptor, adjust the descriptor count, 343 * and teardown the machine level vector if this was the last interrupt. 344 */ 345 crit_enter(); 346 list = &info->i_reclist; 347 while ((rec = *list) != NULL) { 348 if (rec == id) 349 break; 350 list = &rec->next; 351 } 352 if (rec) { 353 intrec_t rec0; 354 355 *list = rec->next; 356 if (rec->intr_flags & INTR_FAST) 357 --info->i_fast; 358 else 359 --info->i_slow; 360 if (intr < FIRST_SOFTINT && info->i_fast + info->i_slow == 0) 361 machintr_vector_teardown(intr); 362 363 /* 364 * Clear i_mplock_required if no handlers in the chain require the 365 * MP lock. 366 */ 367 for (rec0 = info->i_reclist; rec0; rec0 = rec0->next) { 368 if ((rec0->intr_flags & INTR_MPSAFE) == 0) 369 break; 370 } 371 if (rec0 == NULL) 372 info->i_mplock_required = 0; 373 } 374 375 crit_exit(); 376 377 int_moveto_origcpu(orig_cpuid, cpuid); 378 379 /* 380 * Free the record. 381 */ 382 if (rec != NULL) { 383 kfree(rec->name, M_DEVBUF); 384 kfree(rec, M_DEVBUF); 385 } else { 386 kprintf("warning: unregister_int: int %d handler for %s not found\n", 387 intr, ((intrec_t)id)->name); 388 } 389 } 390 391 const char * 392 get_registered_name(int intr) 393 { 394 intrec_t rec; 395 396 if (intr < 0 || intr >= MAX_INTS) 397 panic("register_int: bad intr %d", intr); 398 399 if ((rec = intr_info_ary[intr].i_reclist) == NULL) 400 return(NULL); 401 else if (rec->next) 402 return("mux"); 403 else 404 return(rec->name); 405 } 406 407 int 408 count_registered_ints(int intr) 409 { 410 struct intr_info *info; 411 412 if (intr < 0 || intr >= MAX_INTS) 413 panic("register_int: bad intr %d", intr); 414 info = &intr_info_ary[intr]; 415 return(info->i_fast + info->i_slow); 416 } 417 418 long 419 get_interrupt_counter(int intr) 420 { 421 struct intr_info *info; 422 423 if (intr < 0 || intr >= MAX_INTS) 424 panic("register_int: bad intr %d", intr); 425 info = &intr_info_ary[intr]; 426 return(info->i_count); 427 } 428 429 430 void 431 swi_setpriority(int intr, int pri) 432 { 433 struct intr_info *info; 434 435 if (intr < FIRST_SOFTINT || intr >= MAX_INTS) 436 panic("register_swi: bad intr %d", intr); 437 info = &intr_info_ary[intr]; 438 if (info->i_state != ISTATE_NOTHREAD) 439 lwkt_setpri(&info->i_thread, pri); 440 } 441 442 void 443 register_randintr(int intr) 444 { 445 struct intr_info *info; 446 447 if (intr < 0 || intr >= MAX_INTS) 448 panic("register_randintr: bad intr %d", intr); 449 info = &intr_info_ary[intr]; 450 info->i_random.sc_intr = intr; 451 info->i_random.sc_enabled = 1; 452 } 453 454 void 455 unregister_randintr(int intr) 456 { 457 struct intr_info *info; 458 459 if (intr < 0 || intr >= MAX_INTS) 460 panic("register_swi: bad intr %d", intr); 461 info = &intr_info_ary[intr]; 462 info->i_random.sc_enabled = -1; 463 } 464 465 int 466 next_registered_randintr(int intr) 467 { 468 struct intr_info *info; 469 470 if (intr < 0 || intr >= MAX_INTS) 471 panic("register_swi: bad intr %d", intr); 472 while (intr < MAX_INTS) { 473 info = &intr_info_ary[intr]; 474 if (info->i_random.sc_enabled > 0) 475 break; 476 ++intr; 477 } 478 return(intr); 479 } 480 481 /* 482 * Dispatch an interrupt. If there's nothing to do we have a stray 483 * interrupt and can just return, leaving the interrupt masked. 484 * 485 * We need to schedule the interrupt and set its i_running bit. If 486 * we are not on the interrupt thread's cpu we have to send a message 487 * to the correct cpu that will issue the desired action (interlocking 488 * with the interrupt thread's critical section). We do NOT attempt to 489 * reschedule interrupts whos i_running bit is already set because 490 * this would prematurely wakeup a livelock-limited interrupt thread. 491 * 492 * i_running is only tested/set on the same cpu as the interrupt thread. 493 * 494 * We are NOT in a critical section, which will allow the scheduled 495 * interrupt to preempt us. The MP lock might *NOT* be held here. 496 */ 497 #ifdef SMP 498 499 static void 500 sched_ithd_remote(void *arg) 501 { 502 sched_ithd((int)(intptr_t)arg); 503 } 504 505 #endif 506 507 void 508 sched_ithd(int intr) 509 { 510 struct intr_info *info; 511 512 info = &intr_info_ary[intr]; 513 514 ++info->i_count; 515 if (info->i_state != ISTATE_NOTHREAD) { 516 if (info->i_reclist == NULL) { 517 report_stray_interrupt(intr, info); 518 } else { 519 #ifdef SMP 520 if (info->i_thread.td_gd == mycpu) { 521 if (info->i_running == 0) { 522 info->i_running = 1; 523 if (info->i_state != ISTATE_LIVELOCKED) 524 lwkt_schedule(&info->i_thread); /* MIGHT PREEMPT */ 525 } 526 } else { 527 lwkt_send_ipiq(info->i_thread.td_gd, 528 sched_ithd_remote, (void *)(intptr_t)intr); 529 } 530 #else 531 if (info->i_running == 0) { 532 info->i_running = 1; 533 if (info->i_state != ISTATE_LIVELOCKED) 534 lwkt_schedule(&info->i_thread); /* MIGHT PREEMPT */ 535 } 536 #endif 537 } 538 } else { 539 report_stray_interrupt(intr, info); 540 } 541 } 542 543 static void 544 report_stray_interrupt(int intr, struct intr_info *info) 545 { 546 ++info->i_straycount; 547 if (info->i_straycount < 10) { 548 if (info->i_errorticks == ticks) 549 return; 550 info->i_errorticks = ticks; 551 kprintf("sched_ithd: stray interrupt %d on cpu %d\n", 552 intr, mycpuid); 553 } else if (info->i_straycount == 10) { 554 kprintf("sched_ithd: %ld stray interrupts %d on cpu %d - " 555 "there will be no further reports\n", 556 info->i_straycount, intr, mycpuid); 557 } 558 } 559 560 /* 561 * This is run from a periodic SYSTIMER (and thus must be MP safe, the BGL 562 * might not be held). 563 */ 564 static void 565 ithread_livelock_wakeup(systimer_t st) 566 { 567 struct intr_info *info; 568 569 info = &intr_info_ary[(int)(intptr_t)st->data]; 570 if (info->i_state != ISTATE_NOTHREAD) 571 lwkt_schedule(&info->i_thread); 572 } 573 574 /* 575 * Schedule ithread within fast intr handler 576 * 577 * XXX Protect sched_ithd() call with gd_intr_nesting_level? 578 * Interrupts aren't enabled, but still... 579 */ 580 static __inline void 581 ithread_fast_sched(int intr, thread_t td) 582 { 583 ++td->td_nest_count; 584 585 /* 586 * We are already in critical section, exit it now to 587 * allow preemption. 588 */ 589 crit_exit_quick(td); 590 sched_ithd(intr); 591 crit_enter_quick(td); 592 593 --td->td_nest_count; 594 } 595 596 /* 597 * This function is called directly from the ICU or APIC vector code assembly 598 * to process an interrupt. The critical section and interrupt deferral 599 * checks have already been done but the function is entered WITHOUT 600 * a critical section held. The BGL may or may not be held. 601 * 602 * Must return non-zero if we do not want the vector code to re-enable 603 * the interrupt (which we don't if we have to schedule the interrupt) 604 */ 605 int ithread_fast_handler(struct intrframe *frame); 606 607 int 608 ithread_fast_handler(struct intrframe *frame) 609 { 610 int intr; 611 struct intr_info *info; 612 struct intrec **list; 613 int must_schedule; 614 #ifdef SMP 615 int got_mplock; 616 #endif 617 intrec_t rec, next_rec; 618 globaldata_t gd; 619 thread_t td; 620 621 intr = frame->if_vec; 622 gd = mycpu; 623 td = curthread; 624 625 /* We must be in critical section. */ 626 KKASSERT(td->td_pri >= TDPRI_CRIT); 627 628 info = &intr_info_ary[intr]; 629 630 /* 631 * If we are not processing any FAST interrupts, just schedule the thing. 632 */ 633 if (info->i_fast == 0) { 634 ++gd->gd_cnt.v_intr; 635 ithread_fast_sched(intr, td); 636 return(1); 637 } 638 639 /* 640 * This should not normally occur since interrupts ought to be 641 * masked if the ithread has been scheduled or is running. 642 */ 643 if (info->i_running) 644 return(1); 645 646 /* 647 * Bump the interrupt nesting level to process any FAST interrupts. 648 * Obtain the MP lock as necessary. If the MP lock cannot be obtained, 649 * schedule the interrupt thread to deal with the issue instead. 650 * 651 * To reduce overhead, just leave the MP lock held once it has been 652 * obtained. 653 */ 654 ++gd->gd_intr_nesting_level; 655 ++gd->gd_cnt.v_intr; 656 must_schedule = info->i_slow; 657 #ifdef SMP 658 got_mplock = 0; 659 #endif 660 661 list = &info->i_reclist; 662 for (rec = *list; rec; rec = next_rec) { 663 next_rec = rec->next; /* rec may be invalid after call */ 664 665 if (rec->intr_flags & INTR_FAST) { 666 #ifdef SMP 667 if ((rec->intr_flags & INTR_MPSAFE) == 0 && got_mplock == 0) { 668 if (try_mplock() == 0) { 669 /* Couldn't get the MP lock; just schedule it. */ 670 must_schedule = 1; 671 break; 672 } 673 got_mplock = 1; 674 } 675 #endif 676 if (rec->serializer) { 677 must_schedule += lwkt_serialize_handler_try( 678 rec->serializer, rec->handler, 679 rec->argument, frame); 680 } else { 681 rec->handler(rec->argument, frame); 682 } 683 } 684 } 685 686 /* 687 * Cleanup 688 */ 689 --gd->gd_intr_nesting_level; 690 #ifdef SMP 691 if (got_mplock) 692 rel_mplock(); 693 #endif 694 695 /* 696 * If we had a problem, or mixed fast and slow interrupt handlers are 697 * registered, schedule the ithread to catch the missed records (it 698 * will just re-run all of them). A return value of 0 indicates that 699 * all handlers have been run and the interrupt can be re-enabled, and 700 * a non-zero return indicates that the interrupt thread controls 701 * re-enablement. 702 */ 703 if (must_schedule > 0) 704 ithread_fast_sched(intr, td); 705 else if (must_schedule == 0) 706 ++info->i_count; 707 return(must_schedule); 708 } 709 710 /* 711 * Interrupt threads run this as their main loop. 712 * 713 * The handler begins execution outside a critical section and with the BGL 714 * held. 715 * 716 * The i_running state starts at 0. When an interrupt occurs, the hardware 717 * interrupt is disabled and sched_ithd() The HW interrupt remains disabled 718 * until all routines have run. We then call ithread_done() to reenable 719 * the HW interrupt and deschedule us until the next interrupt. 720 * 721 * We are responsible for atomically checking i_running and ithread_done() 722 * is responsible for atomically checking for platform-specific delayed 723 * interrupts. i_running for our irq is only set in the context of our cpu, 724 * so a critical section is a sufficient interlock. 725 */ 726 #define LIVELOCK_TIMEFRAME(freq) ((freq) >> 2) /* 1/4 second */ 727 728 static void 729 ithread_handler(void *arg) 730 { 731 struct intr_info *info; 732 int use_limit; 733 __uint32_t lseconds; 734 int intr; 735 int mpheld; 736 struct intrec **list; 737 intrec_t rec, nrec; 738 globaldata_t gd; 739 struct systimer ill_timer; /* enforced freq. timer */ 740 u_int ill_count; /* interrupt livelock counter */ 741 742 ill_count = 0; 743 intr = (int)(intptr_t)arg; 744 info = &intr_info_ary[intr]; 745 list = &info->i_reclist; 746 747 /* 748 * The loop must be entered with one critical section held. The thread 749 * is created with TDF_MPSAFE so the MP lock is not held on start. 750 */ 751 gd = mycpu; 752 lseconds = gd->gd_time_seconds; 753 crit_enter_gd(gd); 754 mpheld = 0; 755 756 for (;;) { 757 /* 758 * The chain is only considered MPSAFE if all its interrupt handlers 759 * are MPSAFE. However, if intr_mpsafe has been turned off we 760 * always operate with the BGL. 761 */ 762 #ifdef SMP 763 if (intr_mpsafe == 0) { 764 if (mpheld == 0) { 765 intr_get_mplock(); 766 mpheld = 1; 767 } 768 } else if (info->i_mplock_required != mpheld) { 769 if (info->i_mplock_required) { 770 KKASSERT(mpheld == 0); 771 intr_get_mplock(); 772 mpheld = 1; 773 } else { 774 KKASSERT(mpheld != 0); 775 rel_mplock(); 776 mpheld = 0; 777 } 778 } 779 780 /* 781 * scheduled cpu may have changed, see intr_get_mplock() 782 */ 783 gd = mycpu; 784 #endif 785 786 /* 787 * If an interrupt is pending, clear i_running and execute the 788 * handlers. Note that certain types of interrupts can re-trigger 789 * and set i_running again. 790 * 791 * Each handler is run in a critical section. Note that we run both 792 * FAST and SLOW designated service routines. 793 */ 794 if (info->i_running) { 795 ++ill_count; 796 info->i_running = 0; 797 798 if (*list == NULL) 799 report_stray_interrupt(intr, info); 800 801 for (rec = *list; rec; rec = nrec) { 802 nrec = rec->next; 803 if (rec->serializer) { 804 lwkt_serialize_handler_call(rec->serializer, rec->handler, 805 rec->argument, NULL); 806 } else { 807 rec->handler(rec->argument, NULL); 808 } 809 } 810 } 811 812 /* 813 * This is our interrupt hook to add rate randomness to the random 814 * number generator. 815 */ 816 if (info->i_random.sc_enabled > 0) 817 add_interrupt_randomness(intr); 818 819 /* 820 * Unmask the interrupt to allow it to trigger again. This only 821 * applies to certain types of interrupts (typ level interrupts). 822 * This can result in the interrupt retriggering, but the retrigger 823 * will not be processed until we cycle our critical section. 824 * 825 * Only unmask interrupts while handlers are installed. It is 826 * possible to hit a situation where no handlers are installed 827 * due to a device driver livelocking and then tearing down its 828 * interrupt on close (the parallel bus being a good example). 829 */ 830 if (*list) 831 machintr_intren(intr); 832 833 /* 834 * Do a quick exit/enter to catch any higher-priority interrupt 835 * sources, such as the statclock, so thread time accounting 836 * will still work. This may also cause an interrupt to re-trigger. 837 */ 838 crit_exit_gd(gd); 839 crit_enter_gd(gd); 840 841 /* 842 * LIVELOCK STATE MACHINE 843 */ 844 switch(info->i_state) { 845 case ISTATE_NORMAL: 846 /* 847 * Reset the count each second. 848 */ 849 if (lseconds != gd->gd_time_seconds) { 850 lseconds = gd->gd_time_seconds; 851 ill_count = 0; 852 } 853 854 /* 855 * If we did not exceed the frequency limit, we are done. 856 * If the interrupt has not retriggered we deschedule ourselves. 857 */ 858 if (ill_count <= livelock_limit) { 859 if (info->i_running == 0) { 860 #ifdef SMP 861 if (mpheld && intr_migrate) { 862 rel_mplock(); 863 mpheld = 0; 864 } 865 #endif 866 lwkt_deschedule_self(gd->gd_curthread); 867 lwkt_switch(); 868 } 869 break; 870 } 871 872 /* 873 * Otherwise we are livelocked. Set up a periodic systimer 874 * to wake the thread up at the limit frequency. 875 */ 876 kprintf("intr %d at %d/%d hz, livelocked limit engaged!\n", 877 intr, ill_count, livelock_limit); 878 info->i_state = ISTATE_LIVELOCKED; 879 if ((use_limit = livelock_limit) < 100) 880 use_limit = 100; 881 else if (use_limit > 500000) 882 use_limit = 500000; 883 systimer_init_periodic_nq(&ill_timer, ithread_livelock_wakeup, 884 (void *)(intptr_t)intr, use_limit); 885 /* fall through */ 886 case ISTATE_LIVELOCKED: 887 /* 888 * Wait for our periodic timer to go off. Since the interrupt 889 * has re-armed it can still set i_running, but it will not 890 * reschedule us while we are in a livelocked state. 891 */ 892 lwkt_deschedule_self(gd->gd_curthread); 893 lwkt_switch(); 894 895 /* 896 * Check once a second to see if the livelock condition no 897 * longer applies. 898 */ 899 if (lseconds != gd->gd_time_seconds) { 900 lseconds = gd->gd_time_seconds; 901 if (ill_count < livelock_lowater) { 902 info->i_state = ISTATE_NORMAL; 903 systimer_del(&ill_timer); 904 kprintf("intr %d at %d/%d hz, livelock removed\n", 905 intr, ill_count, livelock_lowater); 906 } else if (livelock_debug == intr || 907 (bootverbose && cold)) { 908 kprintf("intr %d at %d/%d hz, in livelock\n", 909 intr, ill_count, livelock_lowater); 910 } 911 ill_count = 0; 912 } 913 break; 914 } 915 } 916 /* not reached */ 917 } 918 919 #ifdef SMP 920 921 /* 922 * An interrupt thread is trying to get the MP lock. To avoid cpu-bound 923 * code in the kernel on cpu X from interfering we chase the MP lock. 924 */ 925 static void 926 intr_get_mplock(void) 927 { 928 int owner; 929 930 if (intr_migrate == 0) { 931 get_mplock(); 932 return; 933 } 934 while (try_mplock() == 0) { 935 owner = owner_mplock(); 936 if (owner >= 0 && owner != mycpu->gd_cpuid) { 937 lwkt_migratecpu(owner); 938 ++intr_migrate_count; 939 } else { 940 lwkt_switch(); 941 } 942 } 943 } 944 945 #endif 946 947 /* 948 * Emergency interrupt polling thread. The thread begins execution 949 * outside a critical section with the BGL held. 950 * 951 * If emergency interrupt polling is enabled, this thread will 952 * execute all system interrupts not marked INTR_NOPOLL at the 953 * specified polling frequency. 954 * 955 * WARNING! This thread runs *ALL* interrupt service routines that 956 * are not marked INTR_NOPOLL, which basically means everything except 957 * the 8254 clock interrupt and the ATA interrupt. It has very high 958 * overhead and should only be used in situations where the machine 959 * cannot otherwise be made to work. Due to the severe performance 960 * degredation, it should not be enabled on production machines. 961 */ 962 static void 963 ithread_emergency(void *arg __unused) 964 { 965 struct intr_info *info; 966 intrec_t rec, nrec; 967 int intr; 968 969 for (;;) { 970 for (intr = 0; intr < max_installed_hard_intr; ++intr) { 971 info = &intr_info_ary[intr]; 972 for (rec = info->i_reclist; rec; rec = nrec) { 973 if ((rec->intr_flags & INTR_NOPOLL) == 0) { 974 if (rec->serializer) { 975 lwkt_serialize_handler_call(rec->serializer, 976 rec->handler, rec->argument, NULL); 977 } else { 978 rec->handler(rec->argument, NULL); 979 } 980 } 981 nrec = rec->next; 982 } 983 } 984 lwkt_deschedule_self(curthread); 985 lwkt_switch(); 986 } 987 } 988 989 /* 990 * Systimer callback - schedule the emergency interrupt poll thread 991 * if emergency polling is enabled. 992 */ 993 static 994 void 995 emergency_intr_timer_callback(systimer_t info, struct intrframe *frame __unused) 996 { 997 if (emergency_intr_enable) 998 lwkt_schedule(info->data); 999 } 1000 1001 int 1002 ithread_cpuid(int intr) 1003 { 1004 const struct intr_info *info; 1005 1006 KKASSERT(intr >= 0 && intr < MAX_INTS); 1007 info = &intr_info_ary[intr]; 1008 1009 if (info->i_state == ISTATE_NOTHREAD) 1010 return -1; 1011 return info->i_thread.td_gd->gd_cpuid; 1012 } 1013 1014 /* 1015 * Sysctls used by systat and others: hw.intrnames and hw.intrcnt. 1016 * The data for this machine dependent, and the declarations are in machine 1017 * dependent code. The layout of intrnames and intrcnt however is machine 1018 * independent. 1019 * 1020 * We do not know the length of intrcnt and intrnames at compile time, so 1021 * calculate things at run time. 1022 */ 1023 1024 static int 1025 sysctl_intrnames(SYSCTL_HANDLER_ARGS) 1026 { 1027 struct intr_info *info; 1028 intrec_t rec; 1029 int error = 0; 1030 int len; 1031 int intr; 1032 char buf[64]; 1033 1034 for (intr = 0; error == 0 && intr < MAX_INTS; ++intr) { 1035 info = &intr_info_ary[intr]; 1036 1037 len = 0; 1038 buf[0] = 0; 1039 for (rec = info->i_reclist; rec; rec = rec->next) { 1040 ksnprintf(buf + len, sizeof(buf) - len, "%s%s", 1041 (len ? "/" : ""), rec->name); 1042 len += strlen(buf + len); 1043 } 1044 if (len == 0) { 1045 ksnprintf(buf, sizeof(buf), "irq%d", intr); 1046 len = strlen(buf); 1047 } 1048 error = SYSCTL_OUT(req, buf, len + 1); 1049 } 1050 return (error); 1051 } 1052 1053 1054 SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD, 1055 NULL, 0, sysctl_intrnames, "", "Interrupt Names"); 1056 1057 static int 1058 sysctl_intrcnt(SYSCTL_HANDLER_ARGS) 1059 { 1060 struct intr_info *info; 1061 int error = 0; 1062 int intr; 1063 1064 for (intr = 0; intr < max_installed_hard_intr; ++intr) { 1065 info = &intr_info_ary[intr]; 1066 1067 error = SYSCTL_OUT(req, &info->i_count, sizeof(info->i_count)); 1068 if (error) 1069 goto failed; 1070 } 1071 for (intr = FIRST_SOFTINT; intr < max_installed_soft_intr; ++intr) { 1072 info = &intr_info_ary[intr]; 1073 1074 error = SYSCTL_OUT(req, &info->i_count, sizeof(info->i_count)); 1075 if (error) 1076 goto failed; 1077 } 1078 failed: 1079 return(error); 1080 } 1081 1082 SYSCTL_PROC(_hw, OID_AUTO, intrcnt, CTLTYPE_OPAQUE | CTLFLAG_RD, 1083 NULL, 0, sysctl_intrcnt, "", "Interrupt Counts"); 1084 1085 static void 1086 int_moveto_destcpu(int *orig_cpuid0, int *cpuid0, int intr) 1087 { 1088 int orig_cpuid = mycpuid, cpuid; 1089 char envpath[32]; 1090 1091 cpuid = orig_cpuid; 1092 ksnprintf(envpath, sizeof(envpath), "hw.irq.%d.dest", intr); 1093 kgetenv_int(envpath, &cpuid); 1094 if (cpuid >= ncpus) 1095 cpuid = orig_cpuid; 1096 1097 if (cpuid != orig_cpuid) 1098 lwkt_migratecpu(cpuid); 1099 1100 *orig_cpuid0 = orig_cpuid; 1101 *cpuid0 = cpuid; 1102 } 1103 1104 static void 1105 int_moveto_origcpu(int orig_cpuid, int cpuid) 1106 { 1107 if (cpuid != orig_cpuid) 1108 lwkt_migratecpu(orig_cpuid); 1109 } 1110