1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /* 32 * Machine dependent interrupt code for x86. For x86, we have to 33 * deal with different PICs. Thus, we use the passed in vector to lookup 34 * an interrupt source associated with that vector. The interrupt source 35 * describes which PIC the source belongs to and includes methods to handle 36 * that source. 37 */ 38 39 #include "opt_atpic.h" 40 #include "opt_ddb.h" 41 #include "opt_smp.h" 42 43 #include <sys/param.h> 44 #include <sys/bus.h> 45 #include <sys/interrupt.h> 46 #include <sys/ktr.h> 47 #include <sys/kernel.h> 48 #include <sys/lock.h> 49 #include <sys/malloc.h> 50 #include <sys/mutex.h> 51 #include <sys/proc.h> 52 #include <sys/queue.h> 53 #include <sys/sbuf.h> 54 #include <sys/smp.h> 55 #include <sys/sx.h> 56 #include <sys/sysctl.h> 57 #include <sys/syslog.h> 58 #include <sys/systm.h> 59 #include <sys/taskqueue.h> 60 #include <sys/vmmeter.h> 61 #include <machine/clock.h> 62 #include <machine/intr_machdep.h> 63 #include <machine/smp.h> 64 #ifdef DDB 65 #include <ddb/ddb.h> 66 #endif 67 68 #ifndef DEV_ATPIC 69 #include <machine/segments.h> 70 #include <machine/frame.h> 71 #include <dev/ic/i8259.h> 72 #include <x86/isa/icu.h> 73 #include <isa/isareg.h> 74 #endif 75 76 #include <vm/vm.h> 77 78 #define MAX_STRAY_LOG 5 79 80 typedef void (*mask_fn)(void *); 81 82 static int intrcnt_index; 83 static struct intsrc **interrupt_sources; 84 #ifdef SMP 85 static struct intsrc **interrupt_sorted; 86 static int intrbalance; 87 SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RW, &intrbalance, 0, 88 "Interrupt auto-balance interval (seconds). Zero disables."); 89 static struct timeout_task intrbalance_task; 90 #endif 91 static struct sx intrsrc_lock; 92 static struct mtx intrpic_lock; 93 static struct mtx intrcnt_lock; 94 static TAILQ_HEAD(pics_head, pic) pics; 95 u_int num_io_irqs; 96 97 #if defined(SMP) && !defined(EARLY_AP_STARTUP) 98 static int assign_cpu; 99 #endif 100 101 u_long *intrcnt; 102 char *intrnames; 103 size_t sintrcnt = sizeof(intrcnt); 104 size_t sintrnames = sizeof(intrnames); 105 int nintrcnt; 106 107 static MALLOC_DEFINE(M_INTR, "intr", "Interrupt Sources"); 108 109 static int intr_assign_cpu(void *arg, int cpu); 110 static void intr_disable_src(void *arg); 111 static void intr_init(void *__dummy); 112 static int intr_pic_registered(struct pic *pic); 113 static void intrcnt_setname(const char *name, int index); 114 static void intrcnt_updatename(struct intsrc *is); 115 static void intrcnt_register(struct intsrc *is); 116 117 /* 118 * SYSINIT levels for SI_SUB_INTR: 119 * 120 * SI_ORDER_FIRST: Initialize locks and pics TAILQ, xen_hvm_cpu_init 121 * SI_ORDER_SECOND: Xen PICs 122 * SI_ORDER_THIRD: Add I/O APIC PICs, alloc MSI and Xen IRQ ranges 123 * SI_ORDER_FOURTH: Add 8259A PICs 124 * SI_ORDER_FOURTH + 1: Finalize interrupt count and add interrupt sources 125 * SI_ORDER_MIDDLE: SMP interrupt counters 126 * SI_ORDER_ANY: Enable interrupts on BSP 127 */ 128 129 static int 130 intr_pic_registered(struct pic *pic) 131 { 132 struct pic *p; 133 134 TAILQ_FOREACH(p, &pics, pics) { 135 if (p == pic) 136 return (1); 137 } 138 return (0); 139 } 140 141 /* 142 * Register a new interrupt controller (PIC). This is to support suspend 143 * and resume where we suspend/resume controllers rather than individual 144 * sources. This also allows controllers with no active sources (such as 145 * 8259As in a system using the APICs) to participate in suspend and resume. 146 */ 147 int 148 intr_register_pic(struct pic *pic) 149 { 150 int error; 151 152 mtx_lock(&intrpic_lock); 153 if (intr_pic_registered(pic)) 154 error = EBUSY; 155 else { 156 TAILQ_INSERT_TAIL(&pics, pic, pics); 157 error = 0; 158 } 159 mtx_unlock(&intrpic_lock); 160 return (error); 161 } 162 163 /* 164 * Allocate interrupt source arrays and register interrupt sources 165 * once the number of interrupts is known. 166 */ 167 static void 168 intr_init_sources(void *arg) 169 { 170 struct pic *pic; 171 172 MPASS(num_io_irqs > 0); 173 174 interrupt_sources = mallocarray(num_io_irqs, sizeof(*interrupt_sources), 175 M_INTR, M_WAITOK | M_ZERO); 176 #ifdef SMP 177 interrupt_sorted = mallocarray(num_io_irqs, sizeof(*interrupt_sorted), 178 M_INTR, M_WAITOK | M_ZERO); 179 #endif 180 181 /* 182 * - 1 ??? dummy counter. 183 * - 2 counters for each I/O interrupt. 184 * - 1 counter for each CPU for lapic timer. 185 * - 1 counter for each CPU for the Hyper-V vmbus driver. 186 * - 8 counters for each CPU for IPI counters for SMP. 187 */ 188 nintrcnt = 1 + num_io_irqs * 2 + mp_ncpus * 2; 189 #ifdef COUNT_IPIS 190 if (mp_ncpus > 1) 191 nintrcnt += 8 * mp_ncpus; 192 #endif 193 intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTR, M_WAITOK | 194 M_ZERO); 195 intrnames = mallocarray(nintrcnt, MAXCOMLEN + 1, M_INTR, M_WAITOK | 196 M_ZERO); 197 sintrcnt = nintrcnt * sizeof(u_long); 198 sintrnames = nintrcnt * (MAXCOMLEN + 1); 199 200 intrcnt_setname("???", 0); 201 intrcnt_index = 1; 202 203 /* 204 * NB: intrpic_lock is not held here to avoid LORs due to 205 * malloc() in intr_register_source(). However, we are still 206 * single-threaded at this point in startup so the list of 207 * PICs shouldn't change. 208 */ 209 TAILQ_FOREACH(pic, &pics, pics) { 210 if (pic->pic_register_sources != NULL) 211 pic->pic_register_sources(pic); 212 } 213 } 214 SYSINIT(intr_init_sources, SI_SUB_INTR, SI_ORDER_FOURTH + 1, intr_init_sources, 215 NULL); 216 217 /* 218 * Register a new interrupt source with the global interrupt system. 219 * The global interrupts need to be disabled when this function is 220 * called. 221 */ 222 int 223 intr_register_source(struct intsrc *isrc) 224 { 225 int error, vector; 226 227 KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC")); 228 vector = isrc->is_pic->pic_vector(isrc); 229 KASSERT(vector < num_io_irqs, ("IRQ %d too large (%u irqs)", vector, 230 num_io_irqs)); 231 if (interrupt_sources[vector] != NULL) 232 return (EEXIST); 233 error = intr_event_create(&isrc->is_event, isrc, 0, vector, 234 intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source, 235 (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:", 236 vector); 237 if (error) 238 return (error); 239 sx_xlock(&intrsrc_lock); 240 if (interrupt_sources[vector] != NULL) { 241 sx_xunlock(&intrsrc_lock); 242 intr_event_destroy(isrc->is_event); 243 return (EEXIST); 244 } 245 intrcnt_register(isrc); 246 interrupt_sources[vector] = isrc; 247 isrc->is_handlers = 0; 248 sx_xunlock(&intrsrc_lock); 249 return (0); 250 } 251 252 struct intsrc * 253 intr_lookup_source(int vector) 254 { 255 256 if (vector < 0 || vector >= num_io_irqs) 257 return (NULL); 258 return (interrupt_sources[vector]); 259 } 260 261 int 262 intr_add_handler(const char *name, int vector, driver_filter_t filter, 263 driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep, 264 int domain) 265 { 266 struct intsrc *isrc; 267 int error; 268 269 isrc = intr_lookup_source(vector); 270 if (isrc == NULL) 271 return (EINVAL); 272 error = intr_event_add_handler(isrc->is_event, name, filter, handler, 273 arg, intr_priority(flags), flags, cookiep); 274 if (error == 0) { 275 sx_xlock(&intrsrc_lock); 276 intrcnt_updatename(isrc); 277 isrc->is_handlers++; 278 if (isrc->is_handlers == 1) { 279 isrc->is_domain = domain; 280 isrc->is_pic->pic_enable_intr(isrc); 281 isrc->is_pic->pic_enable_source(isrc); 282 } 283 sx_xunlock(&intrsrc_lock); 284 } 285 return (error); 286 } 287 288 int 289 intr_remove_handler(void *cookie) 290 { 291 struct intsrc *isrc; 292 int error; 293 294 isrc = intr_handler_source(cookie); 295 error = intr_event_remove_handler(cookie); 296 if (error == 0) { 297 sx_xlock(&intrsrc_lock); 298 isrc->is_handlers--; 299 if (isrc->is_handlers == 0) { 300 isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI); 301 isrc->is_pic->pic_disable_intr(isrc); 302 } 303 intrcnt_updatename(isrc); 304 sx_xunlock(&intrsrc_lock); 305 } 306 return (error); 307 } 308 309 int 310 intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol) 311 { 312 struct intsrc *isrc; 313 314 isrc = intr_lookup_source(vector); 315 if (isrc == NULL) 316 return (EINVAL); 317 return (isrc->is_pic->pic_config_intr(isrc, trig, pol)); 318 } 319 320 static void 321 intr_disable_src(void *arg) 322 { 323 struct intsrc *isrc; 324 325 isrc = arg; 326 isrc->is_pic->pic_disable_source(isrc, PIC_EOI); 327 } 328 329 void 330 intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame) 331 { 332 struct intr_event *ie; 333 int vector; 334 335 /* 336 * We count software interrupts when we process them. The 337 * code here follows previous practice, but there's an 338 * argument for counting hardware interrupts when they're 339 * processed too. 340 */ 341 (*isrc->is_count)++; 342 VM_CNT_INC(v_intr); 343 344 ie = isrc->is_event; 345 346 /* 347 * XXX: We assume that IRQ 0 is only used for the ISA timer 348 * device (clk). 349 */ 350 vector = isrc->is_pic->pic_vector(isrc); 351 if (vector == 0) 352 clkintr_pending = 1; 353 354 /* 355 * For stray interrupts, mask and EOI the source, bump the 356 * stray count, and log the condition. 357 */ 358 if (intr_event_handle(ie, frame) != 0) { 359 isrc->is_pic->pic_disable_source(isrc, PIC_EOI); 360 (*isrc->is_straycount)++; 361 if (*isrc->is_straycount < MAX_STRAY_LOG) 362 log(LOG_ERR, "stray irq%d\n", vector); 363 else if (*isrc->is_straycount == MAX_STRAY_LOG) 364 log(LOG_CRIT, 365 "too many stray irq %d's: not logging anymore\n", 366 vector); 367 } 368 } 369 370 void 371 intr_resume(bool suspend_cancelled) 372 { 373 struct pic *pic; 374 375 #ifndef DEV_ATPIC 376 atpic_reset(); 377 #endif 378 mtx_lock(&intrpic_lock); 379 TAILQ_FOREACH(pic, &pics, pics) { 380 if (pic->pic_resume != NULL) 381 pic->pic_resume(pic, suspend_cancelled); 382 } 383 mtx_unlock(&intrpic_lock); 384 } 385 386 void 387 intr_suspend(void) 388 { 389 struct pic *pic; 390 391 mtx_lock(&intrpic_lock); 392 TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) { 393 if (pic->pic_suspend != NULL) 394 pic->pic_suspend(pic); 395 } 396 mtx_unlock(&intrpic_lock); 397 } 398 399 static int 400 intr_assign_cpu(void *arg, int cpu) 401 { 402 #ifdef SMP 403 struct intsrc *isrc; 404 int error; 405 406 #ifdef EARLY_AP_STARTUP 407 MPASS(mp_ncpus == 1 || smp_started); 408 409 /* Nothing to do if there is only a single CPU. */ 410 if (mp_ncpus > 1 && cpu != NOCPU) { 411 #else 412 /* 413 * Don't do anything during early boot. We will pick up the 414 * assignment once the APs are started. 415 */ 416 if (assign_cpu && cpu != NOCPU) { 417 #endif 418 isrc = arg; 419 sx_xlock(&intrsrc_lock); 420 error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); 421 if (error == 0) 422 isrc->is_cpu = cpu; 423 sx_xunlock(&intrsrc_lock); 424 } else 425 error = 0; 426 return (error); 427 #else 428 return (EOPNOTSUPP); 429 #endif 430 } 431 432 static void 433 intrcnt_setname(const char *name, int index) 434 { 435 436 snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s", 437 MAXCOMLEN, name); 438 } 439 440 static void 441 intrcnt_updatename(struct intsrc *is) 442 { 443 444 intrcnt_setname(is->is_event->ie_fullname, is->is_index); 445 } 446 447 static void 448 intrcnt_register(struct intsrc *is) 449 { 450 char straystr[MAXCOMLEN + 1]; 451 452 KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__)); 453 mtx_lock_spin(&intrcnt_lock); 454 MPASS(intrcnt_index + 2 <= nintrcnt); 455 is->is_index = intrcnt_index; 456 intrcnt_index += 2; 457 snprintf(straystr, MAXCOMLEN + 1, "stray irq%d", 458 is->is_pic->pic_vector(is)); 459 intrcnt_updatename(is); 460 is->is_count = &intrcnt[is->is_index]; 461 intrcnt_setname(straystr, is->is_index + 1); 462 is->is_straycount = &intrcnt[is->is_index + 1]; 463 mtx_unlock_spin(&intrcnt_lock); 464 } 465 466 void 467 intrcnt_add(const char *name, u_long **countp) 468 { 469 470 mtx_lock_spin(&intrcnt_lock); 471 MPASS(intrcnt_index < nintrcnt); 472 *countp = &intrcnt[intrcnt_index]; 473 intrcnt_setname(name, intrcnt_index); 474 intrcnt_index++; 475 mtx_unlock_spin(&intrcnt_lock); 476 } 477 478 static void 479 intr_init(void *dummy __unused) 480 { 481 482 TAILQ_INIT(&pics); 483 mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF); 484 sx_init(&intrsrc_lock, "intrsrc"); 485 mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN); 486 } 487 SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); 488 489 static void 490 intr_init_final(void *dummy __unused) 491 { 492 493 /* 494 * Enable interrupts on the BSP after all of the interrupt 495 * controllers are initialized. Device interrupts are still 496 * disabled in the interrupt controllers until interrupt 497 * handlers are registered. Interrupts are enabled on each AP 498 * after their first context switch. 499 */ 500 enable_intr(); 501 } 502 SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL); 503 504 #ifndef DEV_ATPIC 505 /* Initialize the two 8259A's to a known-good shutdown state. */ 506 void 507 atpic_reset(void) 508 { 509 510 outb(IO_ICU1, ICW1_RESET | ICW1_IC4); 511 outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS); 512 outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID)); 513 outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE); 514 outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff); 515 outb(IO_ICU1, OCW3_SEL | OCW3_RR); 516 517 outb(IO_ICU2, ICW1_RESET | ICW1_IC4); 518 outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8); 519 outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID); 520 outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE); 521 outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff); 522 outb(IO_ICU2, OCW3_SEL | OCW3_RR); 523 } 524 #endif 525 526 /* Add a description to an active interrupt handler. */ 527 int 528 intr_describe(u_int vector, void *ih, const char *descr) 529 { 530 struct intsrc *isrc; 531 int error; 532 533 isrc = intr_lookup_source(vector); 534 if (isrc == NULL) 535 return (EINVAL); 536 error = intr_event_describe_handler(isrc->is_event, ih, descr); 537 if (error) 538 return (error); 539 intrcnt_updatename(isrc); 540 return (0); 541 } 542 543 void 544 intr_reprogram(void) 545 { 546 struct intsrc *is; 547 u_int v; 548 549 sx_xlock(&intrsrc_lock); 550 for (v = 0; v < num_io_irqs; v++) { 551 is = interrupt_sources[v]; 552 if (is == NULL) 553 continue; 554 if (is->is_pic->pic_reprogram_pin != NULL) 555 is->is_pic->pic_reprogram_pin(is); 556 } 557 sx_xunlock(&intrsrc_lock); 558 } 559 560 #ifdef DDB 561 /* 562 * Dump data about interrupt handlers 563 */ 564 DB_SHOW_COMMAND(irqs, db_show_irqs) 565 { 566 struct intsrc **isrc; 567 u_int i; 568 int verbose; 569 570 if (strcmp(modif, "v") == 0) 571 verbose = 1; 572 else 573 verbose = 0; 574 isrc = interrupt_sources; 575 for (i = 0; i < num_io_irqs && !db_pager_quit; i++, isrc++) 576 if (*isrc != NULL) 577 db_dump_intr_event((*isrc)->is_event, verbose); 578 } 579 #endif 580 581 #ifdef SMP 582 /* 583 * Support for balancing interrupt sources across CPUs. For now we just 584 * allocate CPUs round-robin. 585 */ 586 587 cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1); 588 static int current_cpu[MAXMEMDOM]; 589 590 static void 591 intr_init_cpus(void) 592 { 593 int i; 594 595 for (i = 0; i < vm_ndomains; i++) { 596 current_cpu[i] = 0; 597 if (!CPU_ISSET(current_cpu[i], &intr_cpus) || 598 !CPU_ISSET(current_cpu[i], &cpuset_domain[i])) 599 intr_next_cpu(i); 600 } 601 } 602 603 /* 604 * Return the CPU that the next interrupt source should use. For now 605 * this just returns the next local APIC according to round-robin. 606 */ 607 u_int 608 intr_next_cpu(int domain) 609 { 610 u_int apic_id; 611 612 #ifdef EARLY_AP_STARTUP 613 MPASS(mp_ncpus == 1 || smp_started); 614 if (mp_ncpus == 1) 615 return (PCPU_GET(apic_id)); 616 #else 617 /* Leave all interrupts on the BSP during boot. */ 618 if (!assign_cpu) 619 return (PCPU_GET(apic_id)); 620 #endif 621 622 mtx_lock_spin(&icu_lock); 623 apic_id = cpu_apic_ids[current_cpu[domain]]; 624 do { 625 current_cpu[domain]++; 626 if (current_cpu[domain] > mp_maxid) 627 current_cpu[domain] = 0; 628 } while (!CPU_ISSET(current_cpu[domain], &intr_cpus) || 629 !CPU_ISSET(current_cpu[domain], &cpuset_domain[domain])); 630 mtx_unlock_spin(&icu_lock); 631 return (apic_id); 632 } 633 634 /* Attempt to bind the specified IRQ to the specified CPU. */ 635 int 636 intr_bind(u_int vector, u_char cpu) 637 { 638 struct intsrc *isrc; 639 640 isrc = intr_lookup_source(vector); 641 if (isrc == NULL) 642 return (EINVAL); 643 return (intr_event_bind(isrc->is_event, cpu)); 644 } 645 646 /* 647 * Add a CPU to our mask of valid CPUs that can be destinations of 648 * interrupts. 649 */ 650 void 651 intr_add_cpu(u_int cpu) 652 { 653 654 if (cpu >= MAXCPU) 655 panic("%s: Invalid CPU ID", __func__); 656 if (bootverbose) 657 printf("INTR: Adding local APIC %d as a target\n", 658 cpu_apic_ids[cpu]); 659 660 CPU_SET(cpu, &intr_cpus); 661 } 662 663 #ifdef EARLY_AP_STARTUP 664 static void 665 intr_smp_startup(void *arg __unused) 666 { 667 668 intr_init_cpus(); 669 return; 670 } 671 SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup, 672 NULL); 673 674 #else 675 /* 676 * Distribute all the interrupt sources among the available CPUs once the 677 * AP's have been launched. 678 */ 679 static void 680 intr_shuffle_irqs(void *arg __unused) 681 { 682 struct intsrc *isrc; 683 u_int cpu, i; 684 685 intr_init_cpus(); 686 /* Don't bother on UP. */ 687 if (mp_ncpus == 1) 688 return; 689 690 /* Round-robin assign a CPU to each enabled source. */ 691 sx_xlock(&intrsrc_lock); 692 assign_cpu = 1; 693 for (i = 0; i < num_io_irqs; i++) { 694 isrc = interrupt_sources[i]; 695 if (isrc != NULL && isrc->is_handlers > 0) { 696 /* 697 * If this event is already bound to a CPU, 698 * then assign the source to that CPU instead 699 * of picking one via round-robin. Note that 700 * this is careful to only advance the 701 * round-robin if the CPU assignment succeeds. 702 */ 703 cpu = isrc->is_event->ie_cpu; 704 if (cpu == NOCPU) 705 cpu = current_cpu[isrc->is_domain]; 706 if (isrc->is_pic->pic_assign_cpu(isrc, 707 cpu_apic_ids[cpu]) == 0) { 708 isrc->is_cpu = cpu; 709 if (isrc->is_event->ie_cpu == NOCPU) 710 intr_next_cpu(isrc->is_domain); 711 } 712 } 713 } 714 sx_xunlock(&intrsrc_lock); 715 } 716 SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs, 717 NULL); 718 #endif 719 720 /* 721 * TODO: Export this information in a non-MD fashion, integrate with vmstat -i. 722 */ 723 static int 724 sysctl_hw_intrs(SYSCTL_HANDLER_ARGS) 725 { 726 struct sbuf sbuf; 727 struct intsrc *isrc; 728 u_int i; 729 int error; 730 731 error = sysctl_wire_old_buffer(req, 0); 732 if (error != 0) 733 return (error); 734 735 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 736 sx_slock(&intrsrc_lock); 737 for (i = 0; i < num_io_irqs; i++) { 738 isrc = interrupt_sources[i]; 739 if (isrc == NULL) 740 continue; 741 sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n", 742 isrc->is_event->ie_fullname, 743 isrc->is_index, 744 isrc->is_cpu, 745 isrc->is_domain, 746 *isrc->is_count); 747 } 748 749 sx_sunlock(&intrsrc_lock); 750 error = sbuf_finish(&sbuf); 751 sbuf_delete(&sbuf); 752 return (error); 753 } 754 SYSCTL_PROC(_hw, OID_AUTO, intrs, CTLTYPE_STRING | CTLFLAG_RW, 755 0, 0, sysctl_hw_intrs, "A", "interrupt:number @cpu: count"); 756 757 /* 758 * Compare two, possibly NULL, entries in the interrupt source array 759 * by load. 760 */ 761 static int 762 intrcmp(const void *one, const void *two) 763 { 764 const struct intsrc *i1, *i2; 765 766 i1 = *(const struct intsrc * const *)one; 767 i2 = *(const struct intsrc * const *)two; 768 if (i1 != NULL && i2 != NULL) 769 return (*i1->is_count - *i2->is_count); 770 if (i1 != NULL) 771 return (1); 772 if (i2 != NULL) 773 return (-1); 774 return (0); 775 } 776 777 /* 778 * Balance IRQs across available CPUs according to load. 779 */ 780 static void 781 intr_balance(void *dummy __unused, int pending __unused) 782 { 783 struct intsrc *isrc; 784 int interval; 785 u_int cpu; 786 int i; 787 788 interval = intrbalance; 789 if (interval == 0) 790 goto out; 791 792 /* 793 * Sort interrupts according to count. 794 */ 795 sx_xlock(&intrsrc_lock); 796 memcpy(interrupt_sorted, interrupt_sources, num_io_irqs * 797 sizeof(interrupt_sorted[0])); 798 qsort(interrupt_sorted, num_io_irqs, sizeof(interrupt_sorted[0]), 799 intrcmp); 800 801 /* 802 * Restart the scan from the same location to avoid moving in the 803 * common case. 804 */ 805 intr_init_cpus(); 806 807 /* 808 * Assign round-robin from most loaded to least. 809 */ 810 for (i = num_io_irqs - 1; i >= 0; i--) { 811 isrc = interrupt_sorted[i]; 812 if (isrc == NULL || isrc->is_event->ie_cpu != NOCPU) 813 continue; 814 cpu = current_cpu[isrc->is_domain]; 815 intr_next_cpu(isrc->is_domain); 816 if (isrc->is_cpu != cpu && 817 isrc->is_pic->pic_assign_cpu(isrc, 818 cpu_apic_ids[cpu]) == 0) 819 isrc->is_cpu = cpu; 820 } 821 sx_xunlock(&intrsrc_lock); 822 out: 823 taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, 824 interval ? hz * interval : hz * 60); 825 826 } 827 828 static void 829 intr_balance_init(void *dummy __unused) 830 { 831 832 TIMEOUT_TASK_INIT(taskqueue_thread, &intrbalance_task, 0, intr_balance, 833 NULL); 834 taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, hz); 835 } 836 SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, intr_balance_init, NULL); 837 838 #else 839 /* 840 * Always route interrupts to the current processor in the UP case. 841 */ 842 u_int 843 intr_next_cpu(int domain) 844 { 845 846 return (PCPU_GET(apic_id)); 847 } 848 #endif 849