1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Machine dependent interrupt code for x86. For x86, we have to 30 * deal with different PICs. Thus, we use the passed in vector to lookup 31 * an interrupt source associated with that vector. The interrupt source 32 * describes which PIC the source belongs to and includes methods to handle 33 * that source. 34 */ 35 36 #include "opt_atpic.h" 37 #include "opt_ddb.h" 38 #include "opt_smp.h" 39 40 #include <sys/param.h> 41 #include <sys/bus.h> 42 #include <sys/interrupt.h> 43 #include <sys/ktr.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mutex.h> 48 #include <sys/proc.h> 49 #include <sys/queue.h> 50 #include <sys/sbuf.h> 51 #include <sys/smp.h> 52 #include <sys/sx.h> 53 #include <sys/sysctl.h> 54 #include <sys/syslog.h> 55 #include <sys/systm.h> 56 #include <sys/taskqueue.h> 57 #include <sys/vmmeter.h> 58 #include <machine/clock.h> 59 #include <machine/intr_machdep.h> 60 #include <machine/smp.h> 61 #ifdef DDB 62 #include <ddb/ddb.h> 63 #endif 64 65 #ifndef DEV_ATPIC 66 #include <machine/segments.h> 67 #include <machine/frame.h> 68 #include <dev/ic/i8259.h> 69 #include <x86/isa/icu.h> 70 #include <isa/isareg.h> 71 #endif 72 73 #include <vm/vm.h> 74 75 typedef void (*mask_fn)(void *); 76 77 static int intrcnt_index; 78 static struct intsrc **interrupt_sources; 79 #ifdef SMP 80 static struct intsrc **interrupt_sorted; 81 static int intrbalance; 82 SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RWTUN, &intrbalance, 0, 83 "Interrupt auto-balance interval (seconds). Zero disables."); 84 static struct timeout_task intrbalance_task; 85 #endif 86 static struct sx intrsrc_lock; 87 static struct mtx intrpic_lock; 88 static struct mtx intrcnt_lock; 89 static TAILQ_HEAD(pics_head, pic) pics; 90 u_int num_io_irqs; 91 92 #if defined(SMP) && !defined(EARLY_AP_STARTUP) 93 #error EARLY_AP_STARTUP required on x86 94 #endif 95 96 #define INTRNAME_LEN (MAXCOMLEN + 1) 97 u_long *intrcnt; 98 char *intrnames; 99 size_t sintrcnt = sizeof(intrcnt); 100 size_t sintrnames = sizeof(intrnames); 101 int nintrcnt; 102 103 static MALLOC_DEFINE(M_INTR, "intr", "Interrupt Sources"); 104 105 static int intr_assign_cpu(void *arg, int cpu); 106 static void intr_disable_src(void *arg); 107 static void intr_init(void *__dummy); 108 static int intr_pic_registered(struct pic *pic); 109 static void intrcnt_setname(const char *name, int index); 110 static void intrcnt_updatename(struct intsrc *is); 111 static void intrcnt_register(struct intsrc *is); 112 113 /* 114 * SYSINIT levels for SI_SUB_INTR: 115 * 116 * SI_ORDER_FIRST: Initialize locks and pics TAILQ, xen_hvm_cpu_init 117 * SI_ORDER_SECOND: Xen PICs 118 * SI_ORDER_THIRD: Add I/O APIC PICs, alloc MSI and Xen IRQ ranges 119 * SI_ORDER_FOURTH: Add 8259A PICs 120 * SI_ORDER_FOURTH + 1: Finalize interrupt count and add interrupt sources 121 * SI_ORDER_MIDDLE: SMP interrupt counters 122 * SI_ORDER_ANY: Enable interrupts on BSP 123 */ 124 125 static int 126 intr_pic_registered(struct pic *pic) 127 { 128 struct pic *p; 129 130 TAILQ_FOREACH(p, &pics, pics) { 131 if (p == pic) 132 return (1); 133 } 134 return (0); 135 } 136 137 /* 138 * Register a new interrupt controller (PIC). This is to support suspend 139 * and resume where we suspend/resume controllers rather than individual 140 * sources. This also allows controllers with no active sources (such as 141 * 8259As in a system using the APICs) to participate in suspend and resume. 142 */ 143 int 144 intr_register_pic(struct pic *pic) 145 { 146 int error; 147 148 mtx_lock(&intrpic_lock); 149 if (intr_pic_registered(pic)) 150 error = EBUSY; 151 else { 152 TAILQ_INSERT_TAIL(&pics, pic, pics); 153 error = 0; 154 } 155 mtx_unlock(&intrpic_lock); 156 return (error); 157 } 158 159 /* 160 * Allocate interrupt source arrays and register interrupt sources 161 * once the number of interrupts is known. 162 */ 163 static void 164 intr_init_sources(void *arg) 165 { 166 struct pic *pic; 167 168 MPASS(num_io_irqs > 0); 169 170 interrupt_sources = mallocarray(num_io_irqs, sizeof(*interrupt_sources), 171 M_INTR, M_WAITOK | M_ZERO); 172 #ifdef SMP 173 interrupt_sorted = mallocarray(num_io_irqs, sizeof(*interrupt_sorted), 174 M_INTR, M_WAITOK | M_ZERO); 175 #endif 176 177 /* 178 * - 1 ??? dummy counter. 179 * - 2 counters for each I/O interrupt. 180 * - 1 counter for each CPU for lapic timer. 181 * - 1 counter for each CPU for the Hyper-V vmbus driver. 182 * - 8 counters for each CPU for IPI counters for SMP. 183 */ 184 nintrcnt = 1 + num_io_irqs * 2 + mp_ncpus * 2; 185 #ifdef COUNT_IPIS 186 if (mp_ncpus > 1) 187 nintrcnt += 8 * mp_ncpus; 188 #endif 189 intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTR, M_WAITOK | 190 M_ZERO); 191 intrnames = mallocarray(nintrcnt, INTRNAME_LEN, M_INTR, M_WAITOK | 192 M_ZERO); 193 sintrcnt = nintrcnt * sizeof(u_long); 194 sintrnames = nintrcnt * INTRNAME_LEN; 195 196 intrcnt_setname("???", 0); 197 intrcnt_index = 1; 198 199 /* 200 * NB: intrpic_lock is not held here to avoid LORs due to 201 * malloc() in intr_register_source(). However, we are still 202 * single-threaded at this point in startup so the list of 203 * PICs shouldn't change. 204 */ 205 TAILQ_FOREACH(pic, &pics, pics) { 206 if (pic->pic_register_sources != NULL) 207 pic->pic_register_sources(pic); 208 } 209 } 210 SYSINIT(intr_init_sources, SI_SUB_INTR, SI_ORDER_FOURTH + 1, intr_init_sources, 211 NULL); 212 213 /* 214 * Register a new interrupt source with the global interrupt system. 215 * The global interrupts need to be disabled when this function is 216 * called. 217 */ 218 int 219 intr_register_source(struct intsrc *isrc) 220 { 221 int error, vector; 222 223 KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC")); 224 vector = isrc->is_pic->pic_vector(isrc); 225 KASSERT(vector < num_io_irqs, ("IRQ %d too large (%u irqs)", vector, 226 num_io_irqs)); 227 if (interrupt_sources[vector] != NULL) 228 return (EEXIST); 229 error = intr_event_create(&isrc->is_event, isrc, 0, vector, 230 intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source, 231 (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:", 232 vector); 233 if (error) 234 return (error); 235 sx_xlock(&intrsrc_lock); 236 if (interrupt_sources[vector] != NULL) { 237 sx_xunlock(&intrsrc_lock); 238 intr_event_destroy(isrc->is_event); 239 return (EEXIST); 240 } 241 intrcnt_register(isrc); 242 interrupt_sources[vector] = isrc; 243 isrc->is_handlers = 0; 244 sx_xunlock(&intrsrc_lock); 245 return (0); 246 } 247 248 struct intsrc * 249 intr_lookup_source(int vector) 250 { 251 252 if (vector < 0 || vector >= num_io_irqs) 253 return (NULL); 254 return (interrupt_sources[vector]); 255 } 256 257 int 258 intr_add_handler(const char *name, int vector, driver_filter_t filter, 259 driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep, 260 int domain) 261 { 262 struct intsrc *isrc; 263 int error; 264 265 isrc = intr_lookup_source(vector); 266 if (isrc == NULL) 267 return (EINVAL); 268 error = intr_event_add_handler(isrc->is_event, name, filter, handler, 269 arg, intr_priority(flags), flags, cookiep); 270 if (error == 0) { 271 sx_xlock(&intrsrc_lock); 272 intrcnt_updatename(isrc); 273 isrc->is_handlers++; 274 if (isrc->is_handlers == 1) { 275 isrc->is_domain = domain; 276 isrc->is_pic->pic_enable_intr(isrc); 277 isrc->is_pic->pic_enable_source(isrc); 278 } 279 sx_xunlock(&intrsrc_lock); 280 } 281 return (error); 282 } 283 284 int 285 intr_remove_handler(void *cookie) 286 { 287 struct intsrc *isrc; 288 int error; 289 290 isrc = intr_handler_source(cookie); 291 error = intr_event_remove_handler(cookie); 292 if (error == 0) { 293 sx_xlock(&intrsrc_lock); 294 isrc->is_handlers--; 295 if (isrc->is_handlers == 0) { 296 isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI); 297 isrc->is_pic->pic_disable_intr(isrc); 298 } 299 intrcnt_updatename(isrc); 300 sx_xunlock(&intrsrc_lock); 301 } 302 return (error); 303 } 304 305 int 306 intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol) 307 { 308 struct intsrc *isrc; 309 310 isrc = intr_lookup_source(vector); 311 if (isrc == NULL) 312 return (EINVAL); 313 return (isrc->is_pic->pic_config_intr(isrc, trig, pol)); 314 } 315 316 static void 317 intr_disable_src(void *arg) 318 { 319 struct intsrc *isrc; 320 321 isrc = arg; 322 isrc->is_pic->pic_disable_source(isrc, PIC_EOI); 323 } 324 325 void 326 intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame) 327 { 328 struct intr_event *ie; 329 int vector; 330 331 /* 332 * We count software interrupts when we process them. The 333 * code here follows previous practice, but there's an 334 * argument for counting hardware interrupts when they're 335 * processed too. 336 */ 337 (*isrc->is_count)++; 338 VM_CNT_INC(v_intr); 339 340 ie = isrc->is_event; 341 342 /* 343 * XXX: We assume that IRQ 0 is only used for the ISA timer 344 * device (clk). 345 */ 346 vector = isrc->is_pic->pic_vector(isrc); 347 if (vector == 0) 348 clkintr_pending = 1; 349 350 /* 351 * For stray interrupts, mask and EOI the source, bump the 352 * stray count, and log the condition. 353 */ 354 if (intr_event_handle(ie, frame) != 0) { 355 isrc->is_pic->pic_disable_source(isrc, PIC_EOI); 356 (*isrc->is_straycount)++; 357 if (*isrc->is_straycount < INTR_STRAY_LOG_MAX) 358 log(LOG_ERR, "stray irq%d\n", vector); 359 else if (*isrc->is_straycount == INTR_STRAY_LOG_MAX) 360 log(LOG_CRIT, 361 "too many stray irq %d's: not logging anymore\n", 362 vector); 363 } 364 } 365 366 void 367 intr_resume(bool suspend_cancelled) 368 { 369 struct pic *pic; 370 371 #ifndef DEV_ATPIC 372 atpic_reset(); 373 #endif 374 mtx_lock(&intrpic_lock); 375 TAILQ_FOREACH(pic, &pics, pics) { 376 if (pic->pic_resume != NULL) 377 pic->pic_resume(pic, suspend_cancelled); 378 } 379 mtx_unlock(&intrpic_lock); 380 } 381 382 void 383 intr_suspend(void) 384 { 385 struct pic *pic; 386 387 mtx_lock(&intrpic_lock); 388 TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) { 389 if (pic->pic_suspend != NULL) 390 pic->pic_suspend(pic); 391 } 392 mtx_unlock(&intrpic_lock); 393 } 394 395 static int 396 intr_assign_cpu(void *arg, int cpu) 397 { 398 #ifdef SMP 399 struct intsrc *isrc; 400 int error; 401 402 MPASS(mp_ncpus == 1 || smp_started); 403 404 /* Nothing to do if there is only a single CPU. */ 405 if (mp_ncpus > 1 && cpu != NOCPU) { 406 isrc = arg; 407 sx_xlock(&intrsrc_lock); 408 error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); 409 if (error == 0) 410 isrc->is_cpu = cpu; 411 sx_xunlock(&intrsrc_lock); 412 } else 413 error = 0; 414 return (error); 415 #else 416 return (EOPNOTSUPP); 417 #endif 418 } 419 420 static void 421 intrcnt_setname(const char *name, int index) 422 { 423 424 snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s", 425 INTRNAME_LEN - 1, name); 426 } 427 428 static void 429 intrcnt_updatename(struct intsrc *is) 430 { 431 432 intrcnt_setname(is->is_event->ie_fullname, is->is_index); 433 } 434 435 static void 436 intrcnt_register(struct intsrc *is) 437 { 438 char straystr[INTRNAME_LEN]; 439 440 KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__)); 441 mtx_lock_spin(&intrcnt_lock); 442 MPASS(intrcnt_index + 2 <= nintrcnt); 443 is->is_index = intrcnt_index; 444 intrcnt_index += 2; 445 snprintf(straystr, sizeof(straystr), "stray irq%d", 446 is->is_pic->pic_vector(is)); 447 intrcnt_updatename(is); 448 is->is_count = &intrcnt[is->is_index]; 449 intrcnt_setname(straystr, is->is_index + 1); 450 is->is_straycount = &intrcnt[is->is_index + 1]; 451 mtx_unlock_spin(&intrcnt_lock); 452 } 453 454 void 455 intrcnt_add(const char *name, u_long **countp) 456 { 457 458 mtx_lock_spin(&intrcnt_lock); 459 MPASS(intrcnt_index < nintrcnt); 460 *countp = &intrcnt[intrcnt_index]; 461 intrcnt_setname(name, intrcnt_index); 462 intrcnt_index++; 463 mtx_unlock_spin(&intrcnt_lock); 464 } 465 466 static void 467 intr_init(void *dummy __unused) 468 { 469 470 TAILQ_INIT(&pics); 471 mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF); 472 sx_init(&intrsrc_lock, "intrsrc"); 473 mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN); 474 } 475 SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); 476 477 static void 478 intr_init_final(void *dummy __unused) 479 { 480 481 /* 482 * Enable interrupts on the BSP after all of the interrupt 483 * controllers are initialized. Device interrupts are still 484 * disabled in the interrupt controllers until interrupt 485 * handlers are registered. Interrupts are enabled on each AP 486 * after their first context switch. 487 */ 488 enable_intr(); 489 } 490 SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL); 491 492 #ifndef DEV_ATPIC 493 /* Initialize the two 8259A's to a known-good shutdown state. */ 494 void 495 atpic_reset(void) 496 { 497 498 outb(IO_ICU1, ICW1_RESET | ICW1_IC4); 499 outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS); 500 outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID)); 501 outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE); 502 outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff); 503 outb(IO_ICU1, OCW3_SEL | OCW3_RR); 504 505 outb(IO_ICU2, ICW1_RESET | ICW1_IC4); 506 outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8); 507 outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID); 508 outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE); 509 outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff); 510 outb(IO_ICU2, OCW3_SEL | OCW3_RR); 511 } 512 #endif 513 514 /* Add a description to an active interrupt handler. */ 515 int 516 intr_describe(u_int vector, void *ih, const char *descr) 517 { 518 struct intsrc *isrc; 519 int error; 520 521 isrc = intr_lookup_source(vector); 522 if (isrc == NULL) 523 return (EINVAL); 524 error = intr_event_describe_handler(isrc->is_event, ih, descr); 525 if (error) 526 return (error); 527 intrcnt_updatename(isrc); 528 return (0); 529 } 530 531 void 532 intr_reprogram(void) 533 { 534 struct intsrc *is; 535 u_int v; 536 537 sx_xlock(&intrsrc_lock); 538 for (v = 0; v < num_io_irqs; v++) { 539 is = interrupt_sources[v]; 540 if (is == NULL) 541 continue; 542 if (is->is_pic->pic_reprogram_pin != NULL) 543 is->is_pic->pic_reprogram_pin(is); 544 } 545 sx_xunlock(&intrsrc_lock); 546 } 547 548 #ifdef DDB 549 /* 550 * Dump data about interrupt handlers 551 */ 552 DB_SHOW_COMMAND(irqs, db_show_irqs) 553 { 554 struct intsrc **isrc; 555 u_int i; 556 int verbose; 557 558 if (strcmp(modif, "v") == 0) 559 verbose = 1; 560 else 561 verbose = 0; 562 isrc = interrupt_sources; 563 for (i = 0; i < num_io_irqs && !db_pager_quit; i++, isrc++) 564 if (*isrc != NULL) 565 db_dump_intr_event((*isrc)->is_event, verbose); 566 } 567 #endif 568 569 #ifdef SMP 570 /* 571 * Support for balancing interrupt sources across CPUs. For now we just 572 * allocate CPUs round-robin. 573 * 574 * XXX If the system has a domain with without any usable CPUs (e.g., where all 575 * APIC IDs are 256 or greater and we do not have an IOMMU) we use 576 * intr_no_domain to fall back to assigning interrupts without regard for 577 * domain. Once we can rely on the presence of an IOMMU on all x86 platforms 578 * we can revert this. 579 */ 580 581 cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1); 582 static int current_cpu[MAXMEMDOM]; 583 static bool intr_no_domain; 584 585 static void 586 intr_init_cpus(void) 587 { 588 int i; 589 590 for (i = 0; i < vm_ndomains; i++) { 591 if (CPU_OVERLAP(&cpuset_domain[i], &intr_cpus) == 0) { 592 intr_no_domain = true; 593 printf("%s: unable to route interrupts to CPUs in domain %d\n", 594 __func__, i); 595 } 596 597 current_cpu[i] = 0; 598 if (intr_no_domain && i > 0) 599 continue; 600 if (!CPU_ISSET(current_cpu[i], &intr_cpus) || 601 !CPU_ISSET(current_cpu[i], &cpuset_domain[i])) 602 intr_next_cpu(i); 603 } 604 } 605 606 /* 607 * Return the CPU that the next interrupt source should use. For now 608 * this just returns the next local APIC according to round-robin. 609 */ 610 u_int 611 intr_next_cpu(int domain) 612 { 613 u_int apic_id; 614 615 MPASS(mp_ncpus == 1 || smp_started); 616 if (mp_ncpus == 1) 617 return (PCPU_GET(apic_id)); 618 619 if (intr_no_domain) 620 domain = 0; 621 mtx_lock_spin(&icu_lock); 622 apic_id = cpu_apic_ids[current_cpu[domain]]; 623 do { 624 current_cpu[domain]++; 625 if (current_cpu[domain] > mp_maxid) 626 current_cpu[domain] = 0; 627 } while (!CPU_ISSET(current_cpu[domain], &intr_cpus) || 628 (!CPU_ISSET(current_cpu[domain], &cpuset_domain[domain]) && 629 !intr_no_domain)); 630 mtx_unlock_spin(&icu_lock); 631 return (apic_id); 632 } 633 634 /* 635 * Add a CPU to our mask of valid CPUs that can be destinations of 636 * interrupts. 637 */ 638 void 639 intr_add_cpu(u_int cpu) 640 { 641 642 if (cpu >= MAXCPU) 643 panic("%s: Invalid CPU ID %u", __func__, cpu); 644 if (bootverbose) 645 printf("INTR: Adding local APIC %d as a target\n", 646 cpu_apic_ids[cpu]); 647 648 CPU_SET(cpu, &intr_cpus); 649 } 650 651 static void 652 intr_smp_startup(void *arg __unused) 653 { 654 655 intr_init_cpus(); 656 return; 657 } 658 SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup, 659 NULL); 660 661 /* 662 * TODO: Export this information in a non-MD fashion, integrate with vmstat -i. 663 */ 664 static int 665 sysctl_hw_intrs(SYSCTL_HANDLER_ARGS) 666 { 667 struct sbuf sbuf; 668 struct intsrc *isrc; 669 u_int i; 670 int error; 671 672 error = sysctl_wire_old_buffer(req, 0); 673 if (error != 0) 674 return (error); 675 676 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 677 sx_slock(&intrsrc_lock); 678 for (i = 0; i < num_io_irqs; i++) { 679 isrc = interrupt_sources[i]; 680 if (isrc == NULL) 681 continue; 682 sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n", 683 isrc->is_event->ie_fullname, 684 isrc->is_index, 685 isrc->is_cpu, 686 isrc->is_domain, 687 *isrc->is_count); 688 } 689 690 sx_sunlock(&intrsrc_lock); 691 error = sbuf_finish(&sbuf); 692 sbuf_delete(&sbuf); 693 return (error); 694 } 695 SYSCTL_PROC(_hw, OID_AUTO, intrs, 696 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 697 0, 0, sysctl_hw_intrs, "A", 698 "interrupt:number @cpu: count"); 699 700 /* 701 * Compare two, possibly NULL, entries in the interrupt source array 702 * by load. 703 */ 704 static int 705 intrcmp(const void *one, const void *two) 706 { 707 const struct intsrc *i1, *i2; 708 709 i1 = *(const struct intsrc * const *)one; 710 i2 = *(const struct intsrc * const *)two; 711 if (i1 != NULL && i2 != NULL) 712 return (*i1->is_count - *i2->is_count); 713 if (i1 != NULL) 714 return (1); 715 if (i2 != NULL) 716 return (-1); 717 return (0); 718 } 719 720 /* 721 * Balance IRQs across available CPUs according to load. 722 */ 723 static void 724 intr_balance(void *dummy __unused, int pending __unused) 725 { 726 struct intsrc *isrc; 727 int interval; 728 u_int cpu; 729 int i; 730 731 interval = intrbalance; 732 if (interval == 0) 733 goto out; 734 735 /* 736 * Sort interrupts according to count. 737 */ 738 sx_xlock(&intrsrc_lock); 739 memcpy(interrupt_sorted, interrupt_sources, num_io_irqs * 740 sizeof(interrupt_sorted[0])); 741 qsort(interrupt_sorted, num_io_irqs, sizeof(interrupt_sorted[0]), 742 intrcmp); 743 744 /* 745 * Restart the scan from the same location to avoid moving in the 746 * common case. 747 */ 748 intr_init_cpus(); 749 750 /* 751 * Assign round-robin from most loaded to least. 752 */ 753 for (i = num_io_irqs - 1; i >= 0; i--) { 754 isrc = interrupt_sorted[i]; 755 if (isrc == NULL || isrc->is_event->ie_cpu != NOCPU) 756 continue; 757 cpu = current_cpu[isrc->is_domain]; 758 intr_next_cpu(isrc->is_domain); 759 if (isrc->is_cpu != cpu && 760 isrc->is_pic->pic_assign_cpu(isrc, 761 cpu_apic_ids[cpu]) == 0) 762 isrc->is_cpu = cpu; 763 } 764 sx_xunlock(&intrsrc_lock); 765 out: 766 taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, 767 interval ? hz * interval : hz * 60); 768 769 } 770 771 static void 772 intr_balance_init(void *dummy __unused) 773 { 774 775 TIMEOUT_TASK_INIT(taskqueue_thread, &intrbalance_task, 0, intr_balance, 776 NULL); 777 taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, hz); 778 } 779 SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, intr_balance_init, NULL); 780 781 #else 782 /* 783 * Always route interrupts to the current processor in the UP case. 784 */ 785 u_int 786 intr_next_cpu(int domain) 787 { 788 789 return (PCPU_GET(apic_id)); 790 } 791 #endif 792