1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 /* 31 * Machine dependent interrupt code for x86. For x86, we have to 32 * deal with different PICs. Thus, we use the passed in vector to lookup 33 * an interrupt source associated with that vector. The interrupt source 34 * describes which PIC the source belongs to and includes methods to handle 35 * that source. 36 */ 37 38 #include "opt_atpic.h" 39 #include "opt_ddb.h" 40 #include "opt_smp.h" 41 42 #include <sys/param.h> 43 #include <sys/bus.h> 44 #include <sys/interrupt.h> 45 #include <sys/ktr.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/mutex.h> 50 #include <sys/proc.h> 51 #include <sys/queue.h> 52 #include <sys/sbuf.h> 53 #include <sys/smp.h> 54 #include <sys/sx.h> 55 #include <sys/sysctl.h> 56 #include <sys/syslog.h> 57 #include <sys/systm.h> 58 #include <sys/taskqueue.h> 59 #include <sys/vmmeter.h> 60 #include <machine/clock.h> 61 #include <machine/intr_machdep.h> 62 #include <machine/smp.h> 63 #ifdef DDB 64 #include <ddb/ddb.h> 65 #endif 66 67 #ifndef DEV_ATPIC 68 #include <machine/segments.h> 69 #include <machine/frame.h> 70 #include <dev/ic/i8259.h> 71 #include <x86/isa/icu.h> 72 #include <isa/isareg.h> 73 #endif 74 75 #include <vm/vm.h> 76 77 #define MAX_STRAY_LOG 5 78 79 typedef void (*mask_fn)(void *); 80 81 static int intrcnt_index; 82 static struct intsrc **interrupt_sources; 83 #ifdef SMP 84 static struct intsrc **interrupt_sorted; 85 static int intrbalance; 86 SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RWTUN, &intrbalance, 0, 87 "Interrupt auto-balance interval (seconds). Zero disables."); 88 static struct timeout_task intrbalance_task; 89 #endif 90 static struct sx intrsrc_lock; 91 static struct mtx intrpic_lock; 92 static struct mtx intrcnt_lock; 93 static TAILQ_HEAD(pics_head, pic) pics; 94 u_int num_io_irqs; 95 96 #if defined(SMP) && !defined(EARLY_AP_STARTUP) 97 static int assign_cpu; 98 #endif 99 100 u_long *intrcnt; 101 char *intrnames; 102 size_t sintrcnt = sizeof(intrcnt); 103 size_t sintrnames = sizeof(intrnames); 104 int nintrcnt; 105 106 static MALLOC_DEFINE(M_INTR, "intr", "Interrupt Sources"); 107 108 static int intr_assign_cpu(void *arg, int cpu); 109 static void intr_disable_src(void *arg); 110 static void intr_init(void *__dummy); 111 static int intr_pic_registered(struct pic *pic); 112 static void intrcnt_setname(const char *name, int index); 113 static void intrcnt_updatename(struct intsrc *is); 114 static void intrcnt_register(struct intsrc *is); 115 116 /* 117 * SYSINIT levels for SI_SUB_INTR: 118 * 119 * SI_ORDER_FIRST: Initialize locks and pics TAILQ, xen_hvm_cpu_init 120 * SI_ORDER_SECOND: Xen PICs 121 * SI_ORDER_THIRD: Add I/O APIC PICs, alloc MSI and Xen IRQ ranges 122 * SI_ORDER_FOURTH: Add 8259A PICs 123 * SI_ORDER_FOURTH + 1: Finalize interrupt count and add interrupt sources 124 * SI_ORDER_MIDDLE: SMP interrupt counters 125 * SI_ORDER_ANY: Enable interrupts on BSP 126 */ 127 128 static int 129 intr_pic_registered(struct pic *pic) 130 { 131 struct pic *p; 132 133 TAILQ_FOREACH(p, &pics, pics) { 134 if (p == pic) 135 return (1); 136 } 137 return (0); 138 } 139 140 /* 141 * Register a new interrupt controller (PIC). This is to support suspend 142 * and resume where we suspend/resume controllers rather than individual 143 * sources. This also allows controllers with no active sources (such as 144 * 8259As in a system using the APICs) to participate in suspend and resume. 145 */ 146 int 147 intr_register_pic(struct pic *pic) 148 { 149 int error; 150 151 mtx_lock(&intrpic_lock); 152 if (intr_pic_registered(pic)) 153 error = EBUSY; 154 else { 155 TAILQ_INSERT_TAIL(&pics, pic, pics); 156 error = 0; 157 } 158 mtx_unlock(&intrpic_lock); 159 return (error); 160 } 161 162 /* 163 * Allocate interrupt source arrays and register interrupt sources 164 * once the number of interrupts is known. 165 */ 166 static void 167 intr_init_sources(void *arg) 168 { 169 struct pic *pic; 170 171 MPASS(num_io_irqs > 0); 172 173 interrupt_sources = mallocarray(num_io_irqs, sizeof(*interrupt_sources), 174 M_INTR, M_WAITOK | M_ZERO); 175 #ifdef SMP 176 interrupt_sorted = mallocarray(num_io_irqs, sizeof(*interrupt_sorted), 177 M_INTR, M_WAITOK | M_ZERO); 178 #endif 179 180 /* 181 * - 1 ??? dummy counter. 182 * - 2 counters for each I/O interrupt. 183 * - 1 counter for each CPU for lapic timer. 184 * - 1 counter for each CPU for the Hyper-V vmbus driver. 185 * - 8 counters for each CPU for IPI counters for SMP. 186 */ 187 nintrcnt = 1 + num_io_irqs * 2 + mp_ncpus * 2; 188 #ifdef COUNT_IPIS 189 if (mp_ncpus > 1) 190 nintrcnt += 8 * mp_ncpus; 191 #endif 192 intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTR, M_WAITOK | 193 M_ZERO); 194 intrnames = mallocarray(nintrcnt, MAXCOMLEN + 1, M_INTR, M_WAITOK | 195 M_ZERO); 196 sintrcnt = nintrcnt * sizeof(u_long); 197 sintrnames = nintrcnt * (MAXCOMLEN + 1); 198 199 intrcnt_setname("???", 0); 200 intrcnt_index = 1; 201 202 /* 203 * NB: intrpic_lock is not held here to avoid LORs due to 204 * malloc() in intr_register_source(). However, we are still 205 * single-threaded at this point in startup so the list of 206 * PICs shouldn't change. 207 */ 208 TAILQ_FOREACH(pic, &pics, pics) { 209 if (pic->pic_register_sources != NULL) 210 pic->pic_register_sources(pic); 211 } 212 } 213 SYSINIT(intr_init_sources, SI_SUB_INTR, SI_ORDER_FOURTH + 1, intr_init_sources, 214 NULL); 215 216 /* 217 * Register a new interrupt source with the global interrupt system. 218 * The global interrupts need to be disabled when this function is 219 * called. 220 */ 221 int 222 intr_register_source(struct intsrc *isrc) 223 { 224 int error, vector; 225 226 KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC")); 227 vector = isrc->is_pic->pic_vector(isrc); 228 KASSERT(vector < num_io_irqs, ("IRQ %d too large (%u irqs)", vector, 229 num_io_irqs)); 230 if (interrupt_sources[vector] != NULL) 231 return (EEXIST); 232 error = intr_event_create(&isrc->is_event, isrc, 0, vector, 233 intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source, 234 (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:", 235 vector); 236 if (error) 237 return (error); 238 sx_xlock(&intrsrc_lock); 239 if (interrupt_sources[vector] != NULL) { 240 sx_xunlock(&intrsrc_lock); 241 intr_event_destroy(isrc->is_event); 242 return (EEXIST); 243 } 244 intrcnt_register(isrc); 245 interrupt_sources[vector] = isrc; 246 isrc->is_handlers = 0; 247 sx_xunlock(&intrsrc_lock); 248 return (0); 249 } 250 251 struct intsrc * 252 intr_lookup_source(int vector) 253 { 254 255 if (vector < 0 || vector >= num_io_irqs) 256 return (NULL); 257 return (interrupt_sources[vector]); 258 } 259 260 int 261 intr_add_handler(const char *name, int vector, driver_filter_t filter, 262 driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep, 263 int domain) 264 { 265 struct intsrc *isrc; 266 int error; 267 268 isrc = intr_lookup_source(vector); 269 if (isrc == NULL) 270 return (EINVAL); 271 error = intr_event_add_handler(isrc->is_event, name, filter, handler, 272 arg, intr_priority(flags), flags, cookiep); 273 if (error == 0) { 274 sx_xlock(&intrsrc_lock); 275 intrcnt_updatename(isrc); 276 isrc->is_handlers++; 277 if (isrc->is_handlers == 1) { 278 isrc->is_domain = domain; 279 isrc->is_pic->pic_enable_intr(isrc); 280 isrc->is_pic->pic_enable_source(isrc); 281 } 282 sx_xunlock(&intrsrc_lock); 283 } 284 return (error); 285 } 286 287 int 288 intr_remove_handler(void *cookie) 289 { 290 struct intsrc *isrc; 291 int error; 292 293 isrc = intr_handler_source(cookie); 294 error = intr_event_remove_handler(cookie); 295 if (error == 0) { 296 sx_xlock(&intrsrc_lock); 297 isrc->is_handlers--; 298 if (isrc->is_handlers == 0) { 299 isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI); 300 isrc->is_pic->pic_disable_intr(isrc); 301 } 302 intrcnt_updatename(isrc); 303 sx_xunlock(&intrsrc_lock); 304 } 305 return (error); 306 } 307 308 int 309 intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol) 310 { 311 struct intsrc *isrc; 312 313 isrc = intr_lookup_source(vector); 314 if (isrc == NULL) 315 return (EINVAL); 316 return (isrc->is_pic->pic_config_intr(isrc, trig, pol)); 317 } 318 319 static void 320 intr_disable_src(void *arg) 321 { 322 struct intsrc *isrc; 323 324 isrc = arg; 325 isrc->is_pic->pic_disable_source(isrc, PIC_EOI); 326 } 327 328 void 329 intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame) 330 { 331 struct intr_event *ie; 332 int vector; 333 334 /* 335 * We count software interrupts when we process them. The 336 * code here follows previous practice, but there's an 337 * argument for counting hardware interrupts when they're 338 * processed too. 339 */ 340 (*isrc->is_count)++; 341 VM_CNT_INC(v_intr); 342 343 ie = isrc->is_event; 344 345 /* 346 * XXX: We assume that IRQ 0 is only used for the ISA timer 347 * device (clk). 348 */ 349 vector = isrc->is_pic->pic_vector(isrc); 350 if (vector == 0) 351 clkintr_pending = 1; 352 353 /* 354 * For stray interrupts, mask and EOI the source, bump the 355 * stray count, and log the condition. 356 */ 357 if (intr_event_handle(ie, frame) != 0) { 358 isrc->is_pic->pic_disable_source(isrc, PIC_EOI); 359 (*isrc->is_straycount)++; 360 if (*isrc->is_straycount < MAX_STRAY_LOG) 361 log(LOG_ERR, "stray irq%d\n", vector); 362 else if (*isrc->is_straycount == MAX_STRAY_LOG) 363 log(LOG_CRIT, 364 "too many stray irq %d's: not logging anymore\n", 365 vector); 366 } 367 } 368 369 void 370 intr_resume(bool suspend_cancelled) 371 { 372 struct pic *pic; 373 374 #ifndef DEV_ATPIC 375 atpic_reset(); 376 #endif 377 mtx_lock(&intrpic_lock); 378 TAILQ_FOREACH(pic, &pics, pics) { 379 if (pic->pic_resume != NULL) 380 pic->pic_resume(pic, suspend_cancelled); 381 } 382 mtx_unlock(&intrpic_lock); 383 } 384 385 void 386 intr_suspend(void) 387 { 388 struct pic *pic; 389 390 mtx_lock(&intrpic_lock); 391 TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) { 392 if (pic->pic_suspend != NULL) 393 pic->pic_suspend(pic); 394 } 395 mtx_unlock(&intrpic_lock); 396 } 397 398 static int 399 intr_assign_cpu(void *arg, int cpu) 400 { 401 #ifdef SMP 402 struct intsrc *isrc; 403 int error; 404 405 #ifdef EARLY_AP_STARTUP 406 MPASS(mp_ncpus == 1 || smp_started); 407 408 /* Nothing to do if there is only a single CPU. */ 409 if (mp_ncpus > 1 && cpu != NOCPU) { 410 #else 411 /* 412 * Don't do anything during early boot. We will pick up the 413 * assignment once the APs are started. 414 */ 415 if (assign_cpu && cpu != NOCPU) { 416 #endif 417 isrc = arg; 418 sx_xlock(&intrsrc_lock); 419 error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); 420 if (error == 0) 421 isrc->is_cpu = cpu; 422 sx_xunlock(&intrsrc_lock); 423 } else 424 error = 0; 425 return (error); 426 #else 427 return (EOPNOTSUPP); 428 #endif 429 } 430 431 static void 432 intrcnt_setname(const char *name, int index) 433 { 434 435 snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s", 436 MAXCOMLEN, name); 437 } 438 439 static void 440 intrcnt_updatename(struct intsrc *is) 441 { 442 443 intrcnt_setname(is->is_event->ie_fullname, is->is_index); 444 } 445 446 static void 447 intrcnt_register(struct intsrc *is) 448 { 449 char straystr[MAXCOMLEN + 1]; 450 451 KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__)); 452 mtx_lock_spin(&intrcnt_lock); 453 MPASS(intrcnt_index + 2 <= nintrcnt); 454 is->is_index = intrcnt_index; 455 intrcnt_index += 2; 456 snprintf(straystr, MAXCOMLEN + 1, "stray irq%d", 457 is->is_pic->pic_vector(is)); 458 intrcnt_updatename(is); 459 is->is_count = &intrcnt[is->is_index]; 460 intrcnt_setname(straystr, is->is_index + 1); 461 is->is_straycount = &intrcnt[is->is_index + 1]; 462 mtx_unlock_spin(&intrcnt_lock); 463 } 464 465 void 466 intrcnt_add(const char *name, u_long **countp) 467 { 468 469 mtx_lock_spin(&intrcnt_lock); 470 MPASS(intrcnt_index < nintrcnt); 471 *countp = &intrcnt[intrcnt_index]; 472 intrcnt_setname(name, intrcnt_index); 473 intrcnt_index++; 474 mtx_unlock_spin(&intrcnt_lock); 475 } 476 477 static void 478 intr_init(void *dummy __unused) 479 { 480 481 TAILQ_INIT(&pics); 482 mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF); 483 sx_init(&intrsrc_lock, "intrsrc"); 484 mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN); 485 } 486 SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); 487 488 static void 489 intr_init_final(void *dummy __unused) 490 { 491 492 /* 493 * Enable interrupts on the BSP after all of the interrupt 494 * controllers are initialized. Device interrupts are still 495 * disabled in the interrupt controllers until interrupt 496 * handlers are registered. Interrupts are enabled on each AP 497 * after their first context switch. 498 */ 499 enable_intr(); 500 } 501 SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL); 502 503 #ifndef DEV_ATPIC 504 /* Initialize the two 8259A's to a known-good shutdown state. */ 505 void 506 atpic_reset(void) 507 { 508 509 outb(IO_ICU1, ICW1_RESET | ICW1_IC4); 510 outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS); 511 outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID)); 512 outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE); 513 outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff); 514 outb(IO_ICU1, OCW3_SEL | OCW3_RR); 515 516 outb(IO_ICU2, ICW1_RESET | ICW1_IC4); 517 outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8); 518 outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID); 519 outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE); 520 outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff); 521 outb(IO_ICU2, OCW3_SEL | OCW3_RR); 522 } 523 #endif 524 525 /* Add a description to an active interrupt handler. */ 526 int 527 intr_describe(u_int vector, void *ih, const char *descr) 528 { 529 struct intsrc *isrc; 530 int error; 531 532 isrc = intr_lookup_source(vector); 533 if (isrc == NULL) 534 return (EINVAL); 535 error = intr_event_describe_handler(isrc->is_event, ih, descr); 536 if (error) 537 return (error); 538 intrcnt_updatename(isrc); 539 return (0); 540 } 541 542 void 543 intr_reprogram(void) 544 { 545 struct intsrc *is; 546 u_int v; 547 548 sx_xlock(&intrsrc_lock); 549 for (v = 0; v < num_io_irqs; v++) { 550 is = interrupt_sources[v]; 551 if (is == NULL) 552 continue; 553 if (is->is_pic->pic_reprogram_pin != NULL) 554 is->is_pic->pic_reprogram_pin(is); 555 } 556 sx_xunlock(&intrsrc_lock); 557 } 558 559 #ifdef DDB 560 /* 561 * Dump data about interrupt handlers 562 */ 563 DB_SHOW_COMMAND(irqs, db_show_irqs) 564 { 565 struct intsrc **isrc; 566 u_int i; 567 int verbose; 568 569 if (strcmp(modif, "v") == 0) 570 verbose = 1; 571 else 572 verbose = 0; 573 isrc = interrupt_sources; 574 for (i = 0; i < num_io_irqs && !db_pager_quit; i++, isrc++) 575 if (*isrc != NULL) 576 db_dump_intr_event((*isrc)->is_event, verbose); 577 } 578 #endif 579 580 #ifdef SMP 581 /* 582 * Support for balancing interrupt sources across CPUs. For now we just 583 * allocate CPUs round-robin. 584 */ 585 586 cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1); 587 static int current_cpu[MAXMEMDOM]; 588 589 static void 590 intr_init_cpus(void) 591 { 592 int i; 593 594 for (i = 0; i < vm_ndomains; i++) { 595 current_cpu[i] = 0; 596 if (!CPU_ISSET(current_cpu[i], &intr_cpus) || 597 !CPU_ISSET(current_cpu[i], &cpuset_domain[i])) 598 intr_next_cpu(i); 599 } 600 } 601 602 /* 603 * Return the CPU that the next interrupt source should use. For now 604 * this just returns the next local APIC according to round-robin. 605 */ 606 u_int 607 intr_next_cpu(int domain) 608 { 609 u_int apic_id; 610 611 #ifdef EARLY_AP_STARTUP 612 MPASS(mp_ncpus == 1 || smp_started); 613 if (mp_ncpus == 1) 614 return (PCPU_GET(apic_id)); 615 #else 616 /* Leave all interrupts on the BSP during boot. */ 617 if (!assign_cpu) 618 return (PCPU_GET(apic_id)); 619 #endif 620 621 mtx_lock_spin(&icu_lock); 622 apic_id = cpu_apic_ids[current_cpu[domain]]; 623 do { 624 current_cpu[domain]++; 625 if (current_cpu[domain] > mp_maxid) 626 current_cpu[domain] = 0; 627 } while (!CPU_ISSET(current_cpu[domain], &intr_cpus) || 628 !CPU_ISSET(current_cpu[domain], &cpuset_domain[domain])); 629 mtx_unlock_spin(&icu_lock); 630 return (apic_id); 631 } 632 633 /* Attempt to bind the specified IRQ to the specified CPU. */ 634 int 635 intr_bind(u_int vector, u_char cpu) 636 { 637 struct intsrc *isrc; 638 639 isrc = intr_lookup_source(vector); 640 if (isrc == NULL) 641 return (EINVAL); 642 return (intr_event_bind(isrc->is_event, cpu)); 643 } 644 645 /* 646 * Add a CPU to our mask of valid CPUs that can be destinations of 647 * interrupts. 648 */ 649 void 650 intr_add_cpu(u_int cpu) 651 { 652 653 if (cpu >= MAXCPU) 654 panic("%s: Invalid CPU ID", __func__); 655 if (bootverbose) 656 printf("INTR: Adding local APIC %d as a target\n", 657 cpu_apic_ids[cpu]); 658 659 CPU_SET(cpu, &intr_cpus); 660 } 661 662 #ifdef EARLY_AP_STARTUP 663 static void 664 intr_smp_startup(void *arg __unused) 665 { 666 667 intr_init_cpus(); 668 return; 669 } 670 SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup, 671 NULL); 672 673 #else 674 /* 675 * Distribute all the interrupt sources among the available CPUs once the 676 * AP's have been launched. 677 */ 678 static void 679 intr_shuffle_irqs(void *arg __unused) 680 { 681 struct intsrc *isrc; 682 u_int cpu, i; 683 684 intr_init_cpus(); 685 /* Don't bother on UP. */ 686 if (mp_ncpus == 1) 687 return; 688 689 /* Round-robin assign a CPU to each enabled source. */ 690 sx_xlock(&intrsrc_lock); 691 assign_cpu = 1; 692 for (i = 0; i < num_io_irqs; i++) { 693 isrc = interrupt_sources[i]; 694 if (isrc != NULL && isrc->is_handlers > 0) { 695 /* 696 * If this event is already bound to a CPU, 697 * then assign the source to that CPU instead 698 * of picking one via round-robin. Note that 699 * this is careful to only advance the 700 * round-robin if the CPU assignment succeeds. 701 */ 702 cpu = isrc->is_event->ie_cpu; 703 if (cpu == NOCPU) 704 cpu = current_cpu[isrc->is_domain]; 705 if (isrc->is_pic->pic_assign_cpu(isrc, 706 cpu_apic_ids[cpu]) == 0) { 707 isrc->is_cpu = cpu; 708 if (isrc->is_event->ie_cpu == NOCPU) 709 intr_next_cpu(isrc->is_domain); 710 } 711 } 712 } 713 sx_xunlock(&intrsrc_lock); 714 } 715 SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs, 716 NULL); 717 #endif 718 719 /* 720 * TODO: Export this information in a non-MD fashion, integrate with vmstat -i. 721 */ 722 static int 723 sysctl_hw_intrs(SYSCTL_HANDLER_ARGS) 724 { 725 struct sbuf sbuf; 726 struct intsrc *isrc; 727 u_int i; 728 int error; 729 730 error = sysctl_wire_old_buffer(req, 0); 731 if (error != 0) 732 return (error); 733 734 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 735 sx_slock(&intrsrc_lock); 736 for (i = 0; i < num_io_irqs; i++) { 737 isrc = interrupt_sources[i]; 738 if (isrc == NULL) 739 continue; 740 sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n", 741 isrc->is_event->ie_fullname, 742 isrc->is_index, 743 isrc->is_cpu, 744 isrc->is_domain, 745 *isrc->is_count); 746 } 747 748 sx_sunlock(&intrsrc_lock); 749 error = sbuf_finish(&sbuf); 750 sbuf_delete(&sbuf); 751 return (error); 752 } 753 SYSCTL_PROC(_hw, OID_AUTO, intrs, 754 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 755 0, 0, sysctl_hw_intrs, "A", 756 "interrupt:number @cpu: count"); 757 758 /* 759 * Compare two, possibly NULL, entries in the interrupt source array 760 * by load. 761 */ 762 static int 763 intrcmp(const void *one, const void *two) 764 { 765 const struct intsrc *i1, *i2; 766 767 i1 = *(const struct intsrc * const *)one; 768 i2 = *(const struct intsrc * const *)two; 769 if (i1 != NULL && i2 != NULL) 770 return (*i1->is_count - *i2->is_count); 771 if (i1 != NULL) 772 return (1); 773 if (i2 != NULL) 774 return (-1); 775 return (0); 776 } 777 778 /* 779 * Balance IRQs across available CPUs according to load. 780 */ 781 static void 782 intr_balance(void *dummy __unused, int pending __unused) 783 { 784 struct intsrc *isrc; 785 int interval; 786 u_int cpu; 787 int i; 788 789 interval = intrbalance; 790 if (interval == 0) 791 goto out; 792 793 /* 794 * Sort interrupts according to count. 795 */ 796 sx_xlock(&intrsrc_lock); 797 memcpy(interrupt_sorted, interrupt_sources, num_io_irqs * 798 sizeof(interrupt_sorted[0])); 799 qsort(interrupt_sorted, num_io_irqs, sizeof(interrupt_sorted[0]), 800 intrcmp); 801 802 /* 803 * Restart the scan from the same location to avoid moving in the 804 * common case. 805 */ 806 intr_init_cpus(); 807 808 /* 809 * Assign round-robin from most loaded to least. 810 */ 811 for (i = num_io_irqs - 1; i >= 0; i--) { 812 isrc = interrupt_sorted[i]; 813 if (isrc == NULL || isrc->is_event->ie_cpu != NOCPU) 814 continue; 815 cpu = current_cpu[isrc->is_domain]; 816 intr_next_cpu(isrc->is_domain); 817 if (isrc->is_cpu != cpu && 818 isrc->is_pic->pic_assign_cpu(isrc, 819 cpu_apic_ids[cpu]) == 0) 820 isrc->is_cpu = cpu; 821 } 822 sx_xunlock(&intrsrc_lock); 823 out: 824 taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, 825 interval ? hz * interval : hz * 60); 826 827 } 828 829 static void 830 intr_balance_init(void *dummy __unused) 831 { 832 833 TIMEOUT_TASK_INIT(taskqueue_thread, &intrbalance_task, 0, intr_balance, 834 NULL); 835 taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, hz); 836 } 837 SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, intr_balance_init, NULL); 838 839 #else 840 /* 841 * Always route interrupts to the current processor in the UP case. 842 */ 843 u_int 844 intr_next_cpu(int domain) 845 { 846 847 return (PCPU_GET(apic_id)); 848 } 849 #endif 850