xref: /netbsd/sys/arch/x86/x86/intr.c (revision 6550d01e)
1 /*	$NetBSD: intr.c,v 1.70 2011/01/22 14:01:27 tsutsui Exp $	*/
2 
3 /*-
4  * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright 2002 (c) Wasabi Systems, Inc.
34  * All rights reserved.
35  *
36  * Written by Frank van der Linden for Wasabi Systems, Inc.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *      This product includes software developed for the NetBSD Project by
49  *      Wasabi Systems, Inc.
50  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
51  *    or promote products derived from this software without specific prior
52  *    written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
56  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
57  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
58  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
59  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
60  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
61  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
62  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
63  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64  * POSSIBILITY OF SUCH DAMAGE.
65  */
66 
67 /*-
68  * Copyright (c) 1991 The Regents of the University of California.
69  * All rights reserved.
70  *
71  * This code is derived from software contributed to Berkeley by
72  * William Jolitz.
73  *
74  * Redistribution and use in source and binary forms, with or without
75  * modification, are permitted provided that the following conditions
76  * are met:
77  * 1. Redistributions of source code must retain the above copyright
78  *    notice, this list of conditions and the following disclaimer.
79  * 2. Redistributions in binary form must reproduce the above copyright
80  *    notice, this list of conditions and the following disclaimer in the
81  *    documentation and/or other materials provided with the distribution.
82  * 3. Neither the name of the University nor the names of its contributors
83  *    may be used to endorse or promote products derived from this software
84  *    without specific prior written permission.
85  *
86  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
87  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
88  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
89  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
90  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
91  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
92  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
93  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
94  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
95  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
96  * SUCH DAMAGE.
97  *
98  *	@(#)isa.c	7.2 (Berkeley) 5/13/91
99  */
100 
101 /*-
102  * Copyright (c) 1993, 1994 Charles Hannum.
103  *
104  * Redistribution and use in source and binary forms, with or without
105  * modification, are permitted provided that the following conditions
106  * are met:
107  * 1. Redistributions of source code must retain the above copyright
108  *    notice, this list of conditions and the following disclaimer.
109  * 2. Redistributions in binary form must reproduce the above copyright
110  *    notice, this list of conditions and the following disclaimer in the
111  *    documentation and/or other materials provided with the distribution.
112  * 3. All advertising materials mentioning features or use of this software
113  *    must display the following acknowledgement:
114  *	This product includes software developed by the University of
115  *	California, Berkeley and its contributors.
116  * 4. Neither the name of the University nor the names of its contributors
117  *    may be used to endorse or promote products derived from this software
118  *    without specific prior written permission.
119  *
120  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
121  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
122  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
123  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
124  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
125  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
126  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
127  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
128  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
129  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
130  * SUCH DAMAGE.
131  *
132  *	@(#)isa.c	7.2 (Berkeley) 5/13/91
133  */
134 
135 #include <sys/cdefs.h>
136 __KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.70 2011/01/22 14:01:27 tsutsui Exp $");
137 
138 #include "opt_intrdebug.h"
139 #include "opt_multiprocessor.h"
140 #include "opt_acpi.h"
141 
142 #include <sys/param.h>
143 #include <sys/systm.h>
144 #include <sys/kernel.h>
145 #include <sys/syslog.h>
146 #include <sys/device.h>
147 #include <sys/kmem.h>
148 #include <sys/proc.h>
149 #include <sys/errno.h>
150 #include <sys/intr.h>
151 #include <sys/cpu.h>
152 #include <sys/atomic.h>
153 #include <sys/xcall.h>
154 
155 #include <uvm/uvm_extern.h>
156 
157 #include <machine/i8259.h>
158 #include <machine/pio.h>
159 
160 #include "ioapic.h"
161 #include "lapic.h"
162 #include "pci.h"
163 #include "acpica.h"
164 
165 #if NIOAPIC > 0 || NACPICA > 0
166 #include <machine/i82093var.h>
167 #include <machine/mpbiosvar.h>
168 #include <machine/mpacpi.h>
169 #endif
170 
171 #if NLAPIC > 0
172 #include <machine/i82489var.h>
173 #endif
174 
175 #if NPCI > 0
176 #include <dev/pci/ppbreg.h>
177 #endif
178 
179 #ifdef DDB
180 #include <ddb/db_output.h>
181 #endif
182 
183 struct pic softintr_pic = {
184 	.pic_name = "softintr_fakepic",
185 	.pic_type = PIC_SOFT,
186 	.pic_vecbase = 0,
187 	.pic_apicid = 0,
188 	.pic_lock = __SIMPLELOCK_UNLOCKED,
189 };
190 
191 #if NIOAPIC > 0 || NACPICA > 0
192 static int intr_scan_bus(int, int, int *);
193 #if NPCI > 0
194 static int intr_find_pcibridge(int, pcitag_t *, pci_chipset_tag_t *);
195 #endif
196 #endif
197 
198 /*
199  * Fill in default interrupt table (in case of spurious interrupt
200  * during configuration of kernel), setup interrupt control unit
201  */
202 void
203 intr_default_setup(void)
204 {
205 	int i;
206 
207 	/* icu vectors */
208 	for (i = 0; i < NUM_LEGACY_IRQS; i++) {
209 		idt_vec_reserve(ICU_OFFSET + i);
210 		setgate(&idt[ICU_OFFSET + i],
211 		    i8259_stubs[i].ist_entry, 0, SDT_SYS386IGT,
212 		    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
213 	}
214 
215 	/*
216 	 * Eventually might want to check if it's actually there.
217 	 */
218 	i8259_default_setup();
219 }
220 
221 /*
222  * Handle a NMI, possibly a machine check.
223  * return true to panic system, false to ignore.
224  */
225 int
226 x86_nmi(void)
227 {
228 
229 	log(LOG_CRIT, "NMI port 61 %x, port 70 %x\n", inb(0x61), inb(0x70));
230 	return(0);
231 }
232 
233 /*
234  * Recalculate the interrupt masks from scratch.
235  * During early boot, anything goes and we are always called on the BP.
236  * When the system is up and running:
237  *
238  * => called with ci == curcpu()
239  * => cpu_lock held by the initiator
240  * => interrupts disabled on-chip (PSL_I)
241  *
242  * Do not call printf(), kmem_free() or other "heavyweight" routines
243  * from here.  This routine must be quick and must not block.
244  */
245 static void
246 intr_calculatemasks(struct cpu_info *ci)
247 {
248 	int irq, level, unusedirqs, intrlevel[MAX_INTR_SOURCES];
249 	struct intrhand *q;
250 
251 	/* First, figure out which levels each IRQ uses. */
252 	unusedirqs = 0xffffffff;
253 	for (irq = 0; irq < MAX_INTR_SOURCES; irq++) {
254 		int levels = 0;
255 
256 		if (ci->ci_isources[irq] == NULL) {
257 			intrlevel[irq] = 0;
258 			continue;
259 		}
260 		for (q = ci->ci_isources[irq]->is_handlers; q; q = q->ih_next)
261 			levels |= 1 << q->ih_level;
262 		intrlevel[irq] = levels;
263 		if (levels)
264 			unusedirqs &= ~(1 << irq);
265 	}
266 
267 	/* Then figure out which IRQs use each level. */
268 	for (level = 0; level < NIPL; level++) {
269 		int irqs = 0;
270 		for (irq = 0; irq < MAX_INTR_SOURCES; irq++)
271 			if (intrlevel[irq] & (1 << level))
272 				irqs |= 1 << irq;
273 		ci->ci_imask[level] = irqs | unusedirqs;
274 	}
275 
276 	for (level = 0; level<(NIPL-1); level++)
277 		ci->ci_imask[level+1] |= ci->ci_imask[level];
278 
279 	for (irq = 0; irq < MAX_INTR_SOURCES; irq++) {
280 		int maxlevel = IPL_NONE;
281 		int minlevel = IPL_HIGH;
282 
283 		if (ci->ci_isources[irq] == NULL)
284 			continue;
285 		for (q = ci->ci_isources[irq]->is_handlers; q;
286 		     q = q->ih_next) {
287 			if (q->ih_level < minlevel)
288 				minlevel = q->ih_level;
289 			if (q->ih_level > maxlevel)
290 				maxlevel = q->ih_level;
291 		}
292 		ci->ci_isources[irq]->is_maxlevel = maxlevel;
293 		ci->ci_isources[irq]->is_minlevel = minlevel;
294 	}
295 
296 	for (level = 0; level < NIPL; level++)
297 		ci->ci_iunmask[level] = ~ci->ci_imask[level];
298 }
299 
300 /*
301  * List to keep track of PCI buses that are probed but not known
302  * to the firmware. Used to
303  *
304  * XXX should maintain one list, not an array and a linked list.
305  */
306 #if (NPCI > 0) && ((NIOAPIC > 0) || NACPICA > 0)
307 struct intr_extra_bus {
308 	int bus;
309 	pcitag_t *pci_bridge_tag;
310 	pci_chipset_tag_t pci_chipset_tag;
311 	LIST_ENTRY(intr_extra_bus) list;
312 };
313 
314 LIST_HEAD(, intr_extra_bus) intr_extra_buses =
315     LIST_HEAD_INITIALIZER(intr_extra_buses);
316 
317 
318 void
319 intr_add_pcibus(struct pcibus_attach_args *pba)
320 {
321 	struct intr_extra_bus *iebp;
322 
323 	iebp = kmem_alloc(sizeof(*iebp), KM_SLEEP);
324 	iebp->bus = pba->pba_bus;
325 	iebp->pci_chipset_tag = pba->pba_pc;
326 	iebp->pci_bridge_tag = pba->pba_bridgetag;
327 	LIST_INSERT_HEAD(&intr_extra_buses, iebp, list);
328 }
329 
330 static int
331 intr_find_pcibridge(int bus, pcitag_t *pci_bridge_tag,
332 		    pci_chipset_tag_t *pc)
333 {
334 	struct intr_extra_bus *iebp;
335 	struct mp_bus *mpb;
336 
337 	if (bus < 0)
338 		return ENOENT;
339 
340 	if (bus < mp_nbus) {
341 		mpb = &mp_busses[bus];
342 		if (mpb->mb_pci_bridge_tag == NULL)
343 			return ENOENT;
344 		*pci_bridge_tag = *mpb->mb_pci_bridge_tag;
345 		*pc = mpb->mb_pci_chipset_tag;
346 		return 0;
347 	}
348 
349 	LIST_FOREACH(iebp, &intr_extra_buses, list) {
350 		if (iebp->bus == bus) {
351 			if (iebp->pci_bridge_tag == NULL)
352 				return ENOENT;
353 			*pci_bridge_tag = *iebp->pci_bridge_tag;
354 			*pc = iebp->pci_chipset_tag;
355 			return 0;
356 		}
357 	}
358 	return ENOENT;
359 }
360 #endif
361 
362 #if NIOAPIC > 0 || NACPICA > 0
363 int
364 intr_find_mpmapping(int bus, int pin, int *handle)
365 {
366 #if NPCI > 0
367 	int dev, func;
368 	pcitag_t pci_bridge_tag;
369 	pci_chipset_tag_t pc;
370 #endif
371 
372 #if NPCI > 0
373 	while (intr_scan_bus(bus, pin, handle) != 0) {
374 		if (intr_find_pcibridge(bus, &pci_bridge_tag,
375 		    &pc) != 0)
376 			return ENOENT;
377 		dev = pin >> 2;
378 		pin = pin & 3;
379 		pin = PPB_INTERRUPT_SWIZZLE(pin + 1, dev) - 1;
380 		pci_decompose_tag(pc, pci_bridge_tag, &bus,
381 		    &dev, &func);
382 		pin |= (dev << 2);
383 	}
384 	return 0;
385 #else
386 	return intr_scan_bus(bus, pin, handle);
387 #endif
388 }
389 
390 static int
391 intr_scan_bus(int bus, int pin, int *handle)
392 {
393 	struct mp_intr_map *mip, *intrs;
394 
395 	if (bus < 0 || bus >= mp_nbus)
396 		return ENOENT;
397 
398 	intrs = mp_busses[bus].mb_intrs;
399 	if (intrs == NULL)
400 		return ENOENT;
401 
402 	for (mip = intrs; mip != NULL; mip = mip->next) {
403 		if (mip->bus_pin == pin) {
404 #if NACPICA > 0
405 			if (mip->linkdev != NULL)
406 				if (mpacpi_findintr_linkdev(mip) != 0)
407 					continue;
408 #endif
409 			*handle = mip->ioapic_ih;
410 			return 0;
411 		}
412 	}
413 	return ENOENT;
414 }
415 #endif
416 
417 static int
418 intr_allocate_slot_cpu(struct cpu_info *ci, struct pic *pic, int pin,
419 		       int *index)
420 {
421 	int slot, i;
422 	struct intrsource *isp;
423 
424 	KASSERT(mutex_owned(&cpu_lock));
425 
426 	if (pic == &i8259_pic) {
427 		KASSERT(CPU_IS_PRIMARY(ci));
428 		slot = pin;
429 	} else {
430 		slot = -1;
431 
432 		/*
433 		 * intr_allocate_slot has checked for an existing mapping.
434 		 * Now look for a free slot.
435 		 */
436 		for (i = 0; i < MAX_INTR_SOURCES ; i++) {
437 			if (ci->ci_isources[i] == NULL) {
438 				slot = i;
439 				break;
440 			}
441 		}
442 		if (slot == -1) {
443 			return EBUSY;
444 		}
445 	}
446 
447 	isp = ci->ci_isources[slot];
448 	if (isp == NULL) {
449 		isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
450 		if (isp == NULL) {
451 			return ENOMEM;
452 		}
453 		snprintf(isp->is_evname, sizeof (isp->is_evname),
454 		    "pin %d", pin);
455 		evcnt_attach_dynamic(&isp->is_evcnt, EVCNT_TYPE_INTR, NULL,
456 		    pic->pic_name, isp->is_evname);
457 		ci->ci_isources[slot] = isp;
458 	}
459 
460 	*index = slot;
461 	return 0;
462 }
463 
464 /*
465  * A simple round-robin allocator to assign interrupts to CPUs.
466  */
467 static int __noinline
468 intr_allocate_slot(struct pic *pic, int pin, int level,
469 		   struct cpu_info **cip, int *index, int *idt_slot)
470 {
471 	CPU_INFO_ITERATOR cii;
472 	struct cpu_info *ci, *lci;
473 	struct intrsource *isp;
474 	int slot = 0, idtvec, error;
475 
476 	KASSERT(mutex_owned(&cpu_lock));
477 
478 	/* First check if this pin is already used by an interrupt vector. */
479 	for (CPU_INFO_FOREACH(cii, ci)) {
480 		for (slot = 0 ; slot < MAX_INTR_SOURCES ; slot++) {
481 			if ((isp = ci->ci_isources[slot]) == NULL) {
482 				continue;
483 			}
484 			if (isp->is_pic == pic && isp->is_pin == pin) {
485 				*idt_slot = isp->is_idtvec;
486 				*index = slot;
487 				*cip = ci;
488 				return 0;
489 			}
490 		}
491 	}
492 
493 	/*
494 	 * The pic/pin combination doesn't have an existing mapping.
495 	 * Find a slot for a new interrupt source.  For the i8259 case,
496 	 * we always use reserved slots of the primary CPU.  Otherwise,
497 	 * we make an attempt to balance the interrupt load.
498 	 *
499 	 * PIC and APIC usage are essentially exclusive, so the reservation
500 	 * of the ISA slots is ignored when assigning IOAPIC slots.
501 	 */
502 	if (pic == &i8259_pic) {
503 		/*
504 		 * Must be directed to BP.
505 		 */
506 		ci = &cpu_info_primary;
507 		error = intr_allocate_slot_cpu(ci, pic, pin, &slot);
508 	} else {
509 		/*
510 		 * Find least loaded AP/BP and try to allocate there.
511 		 */
512 		ci = NULL;
513 		for (CPU_INFO_FOREACH(cii, lci)) {
514 			if ((lci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
515 				continue;
516 			}
517 #if 0
518 			if (ci == NULL ||
519 			    ci->ci_nintrhand > lci->ci_nintrhand) {
520 			    	ci = lci;
521 			}
522 #else
523 			ci = &cpu_info_primary;
524 #endif
525 		}
526 		KASSERT(ci != NULL);
527 		error = intr_allocate_slot_cpu(ci, pic, pin, &slot);
528 
529 		/*
530 		 * If that did not work, allocate anywhere.
531 		 */
532 		if (error != 0) {
533 			for (CPU_INFO_FOREACH(cii, ci)) {
534 				if ((ci->ci_schedstate.spc_flags &
535 				    SPCF_NOINTR) != 0) {
536 					continue;
537 				}
538 				error = intr_allocate_slot_cpu(ci, pic,
539 				    pin, &slot);
540 				if (error == 0) {
541 					break;
542 				}
543 			}
544 		}
545 	}
546 	if (error != 0) {
547 		return error;
548 	}
549 	KASSERT(ci != NULL);
550 
551 	/*
552 	 * Now allocate an IDT vector.
553 	 * For the 8259 these are reserved up front.
554 	 */
555 	if (pic == &i8259_pic) {
556 		idtvec = ICU_OFFSET + pin;
557 	} else {
558 		idtvec = idt_vec_alloc(APIC_LEVEL(level), IDT_INTR_HIGH);
559 	}
560 	if (idtvec == 0) {
561 		evcnt_detach(&ci->ci_isources[slot]->is_evcnt);
562 		kmem_free(ci->ci_isources[slot], sizeof(*(ci->ci_isources[slot])));
563 		ci->ci_isources[slot] = NULL;
564 		return EBUSY;
565 	}
566 	ci->ci_isources[slot]->is_idtvec = idtvec;
567 	*idt_slot = idtvec;
568 	*index = slot;
569 	*cip = ci;
570 	return 0;
571 }
572 
573 static void
574 intr_source_free(struct cpu_info *ci, int slot, struct pic *pic, int idtvec)
575 {
576 	struct intrsource *isp;
577 
578 	isp = ci->ci_isources[slot];
579 
580 	if (isp->is_handlers != NULL)
581 		return;
582 	ci->ci_isources[slot] = NULL;
583 	evcnt_detach(&isp->is_evcnt);
584 	kmem_free(isp, sizeof(*isp));
585 	ci->ci_isources[slot] = NULL;
586 	if (pic != &i8259_pic)
587 		idt_vec_free(idtvec);
588 }
589 
590 #ifdef MULTIPROCESSOR
591 static int intr_biglock_wrapper(void *);
592 
593 /*
594  * intr_biglock_wrapper: grab biglock and call a real interrupt handler.
595  */
596 
597 static int
598 intr_biglock_wrapper(void *vp)
599 {
600 	struct intrhand *ih = vp;
601 	int ret;
602 
603 	KERNEL_LOCK(1, NULL);
604 
605 	ret = (*ih->ih_realfun)(ih->ih_realarg);
606 
607 	KERNEL_UNLOCK_ONE(NULL);
608 
609 	return ret;
610 }
611 #endif /* MULTIPROCESSOR */
612 
613 struct pic *
614 intr_findpic(int num)
615 {
616 #if NIOAPIC > 0
617 	struct ioapic_softc *pic;
618 
619 	pic = ioapic_find_bybase(num);
620 	if (pic != NULL)
621 		return &pic->sc_pic;
622 #endif
623 	if (num < NUM_LEGACY_IRQS)
624 		return &i8259_pic;
625 
626 	return NULL;
627 }
628 
629 /*
630  * Handle per-CPU component of interrupt establish.
631  *
632  * => caller (on initiating CPU) holds cpu_lock on our behalf
633  * => arg1: struct intrhand *ih
634  * => arg2: int idt_vec
635  */
636 static void
637 intr_establish_xcall(void *arg1, void *arg2)
638 {
639 	struct intrsource *source;
640 	struct intrstub *stubp;
641 	struct intrhand *ih;
642 	struct cpu_info *ci;
643 	int idt_vec;
644 	u_long psl;
645 
646 	ih = arg1;
647 
648 	KASSERT(ih->ih_cpu == curcpu() || !mp_online);
649 
650 	ci = ih->ih_cpu;
651 	source = ci->ci_isources[ih->ih_slot];
652 	idt_vec = (int)(intptr_t)arg2;
653 
654 	/* Disable interrupts locally. */
655 	psl = x86_read_psl();
656 	x86_disable_intr();
657 
658 	/* Link in the handler and re-calculate masks. */
659 	*(ih->ih_prevp) = ih;
660 	intr_calculatemasks(ci);
661 
662 	/* Hook in new IDT vector and SPL state. */
663 	if (source->is_resume == NULL || source->is_idtvec != idt_vec) {
664 		if (source->is_idtvec != 0 && source->is_idtvec != idt_vec)
665 			idt_vec_free(source->is_idtvec);
666 		source->is_idtvec = idt_vec;
667 		if (source->is_type == IST_LEVEL) {
668 			stubp = &source->is_pic->pic_level_stubs[ih->ih_slot];
669 		} else {
670 			stubp = &source->is_pic->pic_edge_stubs[ih->ih_slot];
671 		}
672 		source->is_resume = stubp->ist_resume;
673 		source->is_recurse = stubp->ist_recurse;
674 		setgate(&idt[idt_vec], stubp->ist_entry, 0, SDT_SYS386IGT,
675 		    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
676 	}
677 
678 	/* Re-enable interrupts locally. */
679 	x86_write_psl(psl);
680 }
681 
682 void *
683 intr_establish(int legacy_irq, struct pic *pic, int pin, int type, int level,
684 	       int (*handler)(void *), void *arg, bool known_mpsafe)
685 {
686 	struct intrhand **p, *q, *ih;
687 	struct cpu_info *ci;
688 	int slot, error, idt_vec;
689 	struct intrsource *source;
690 #ifdef MULTIPROCESSOR
691 	bool mpsafe = (known_mpsafe || level != IPL_VM);
692 #endif /* MULTIPROCESSOR */
693 	uint64_t where;
694 
695 #ifdef DIAGNOSTIC
696 	if (legacy_irq != -1 && (legacy_irq < 0 || legacy_irq > 15))
697 		panic("%s: bad legacy IRQ value", __func__);
698 
699 	if (legacy_irq == -1 && pic == &i8259_pic)
700 		panic("intr_establish: non-legacy IRQ on i8259");
701 #endif
702 
703 	ih = kmem_alloc(sizeof(*ih), KM_SLEEP);
704 	if (ih == NULL) {
705 		printf("%s: can't allocate handler info\n", __func__);
706 		return NULL;
707 	}
708 
709 	mutex_enter(&cpu_lock);
710 	error = intr_allocate_slot(pic, pin, level, &ci, &slot, &idt_vec);
711 	if (error != 0) {
712 		mutex_exit(&cpu_lock);
713 		kmem_free(ih, sizeof(*ih));
714 		printf("failed to allocate interrupt slot for PIC %s pin %d\n",
715 		    pic->pic_name, pin);
716 		return NULL;
717 	}
718 
719 	source = ci->ci_isources[slot];
720 
721 	if (source->is_handlers != NULL &&
722 	    source->is_pic->pic_type != pic->pic_type) {
723 		mutex_exit(&cpu_lock);
724 		kmem_free(ih, sizeof(*ih));
725 		printf("%s: can't share intr source between "
726 		       "different PIC types (legacy_irq %d pin %d slot %d)\n",
727 		    __func__, legacy_irq, pin, slot);
728 		return NULL;
729 	}
730 
731 	source->is_pin = pin;
732 	source->is_pic = pic;
733 
734 	switch (source->is_type) {
735 	case IST_NONE:
736 		source->is_type = type;
737 		break;
738 	case IST_EDGE:
739 	case IST_LEVEL:
740 		if (source->is_type == type)
741 			break;
742 		/* FALLTHROUGH */
743 	case IST_PULSE:
744 		if (type != IST_NONE) {
745 			mutex_exit(&cpu_lock);
746 			kmem_free(ih, sizeof(*ih));
747 			intr_source_free(ci, slot, pic, idt_vec);
748 			printf("%s: pic %s pin %d: can't share "
749 			       "type %d with %d\n",
750 				__func__, pic->pic_name, pin,
751 				source->is_type, type);
752 			return NULL;
753 		}
754 		break;
755 	default:
756 		panic("%s: bad intr type %d for pic %s pin %d\n",
757 		    __func__, source->is_type, pic->pic_name, pin);
758 		/* NOTREACHED */
759 	}
760 
761 	/*
762 	 * We're now committed.  Mask the interrupt in hardware and
763 	 * count it for load distribution.
764 	 */
765 	(*pic->pic_hwmask)(pic, pin);
766 	(ci->ci_nintrhand)++;
767 
768 	/*
769 	 * Figure out where to put the handler.
770 	 * This is O(N^2), but we want to preserve the order, and N is
771 	 * generally small.
772 	 */
773 	for (p = &ci->ci_isources[slot]->is_handlers;
774 	     (q = *p) != NULL && q->ih_level > level;
775 	     p = &q->ih_next) {
776 		/* nothing */;
777 	}
778 
779 	ih->ih_fun = ih->ih_realfun = handler;
780 	ih->ih_arg = ih->ih_realarg = arg;
781 	ih->ih_prevp = p;
782 	ih->ih_next = *p;
783 	ih->ih_level = level;
784 	ih->ih_pin = pin;
785 	ih->ih_cpu = ci;
786 	ih->ih_slot = slot;
787 #ifdef MULTIPROCESSOR
788 	if (!mpsafe) {
789 		ih->ih_fun = intr_biglock_wrapper;
790 		ih->ih_arg = ih;
791 	}
792 #endif /* MULTIPROCESSOR */
793 
794 	/*
795 	 * Call out to the remote CPU to update its interrupt state.
796 	 * Only make RPCs if the APs are up and running.
797 	 */
798 	if (ci == curcpu() || !mp_online) {
799 		intr_establish_xcall(ih, (void *)(intptr_t)idt_vec);
800 	} else {
801 		where = xc_unicast(0, intr_establish_xcall, ih,
802 		    (void *)(intptr_t)idt_vec, ci);
803 		xc_wait(where);
804 	}
805 
806 	/* All set up, so add a route for the interrupt and unmask it. */
807 	(*pic->pic_addroute)(pic, ci, pin, idt_vec, type);
808 	(*pic->pic_hwunmask)(pic, pin);
809 	mutex_exit(&cpu_lock);
810 
811 #ifdef INTRDEBUG
812 	printf("allocated pic %s type %s pin %d level %d to %s slot %d "
813 	    "idt entry %d\n",
814 	    pic->pic_name, type == IST_EDGE ? "edge" : "level", pin, level,
815 	    device_xname(ci->ci_dev), slot, idt_vec);
816 #endif
817 
818 	return (ih);
819 }
820 
821 /*
822  * Called on bound CPU to handle intr_disestablish().
823  *
824  * => caller (on initiating CPU) holds cpu_lock on our behalf
825  * => arg1: struct intrhand *ih
826  * => arg2: unused
827  */
828 static void
829 intr_disestablish_xcall(void *arg1, void *arg2)
830 {
831 	struct intrhand **p, *q;
832 	struct cpu_info *ci;
833 	struct pic *pic;
834 	struct intrsource *source;
835 	struct intrhand *ih;
836 	u_long psl;
837 	int idtvec;
838 
839 	ih = arg1;
840 	ci = ih->ih_cpu;
841 
842 	KASSERT(ci == curcpu() || !mp_online);
843 
844 	/* Disable interrupts locally. */
845 	psl = x86_read_psl();
846 	x86_disable_intr();
847 
848 	pic = ci->ci_isources[ih->ih_slot]->is_pic;
849 	source = ci->ci_isources[ih->ih_slot];
850 	idtvec = source->is_idtvec;
851 
852 	(*pic->pic_hwmask)(pic, ih->ih_pin);
853 	atomic_and_32(&ci->ci_ipending, ~(1 << ih->ih_slot));
854 
855 	/*
856 	 * Remove the handler from the chain.
857 	 */
858 	for (p = &source->is_handlers; (q = *p) != NULL && q != ih;
859 	     p = &q->ih_next)
860 		;
861 	if (q == NULL) {
862 		x86_write_psl(psl);
863 		panic("%s: handler not registered", __func__);
864 		/* NOTREACHED */
865 	}
866 
867 	*p = q->ih_next;
868 
869 	intr_calculatemasks(ci);
870 	(*pic->pic_delroute)(pic, ci, ih->ih_pin, idtvec, source->is_type);
871 	(*pic->pic_hwunmask)(pic, ih->ih_pin);
872 
873 	/* Re-enable interrupts. */
874 	x86_write_psl(psl);
875 
876 	/* If the source is free we can drop it now. */
877 	intr_source_free(ci, ih->ih_slot, pic, idtvec);
878 
879 #ifdef INTRDEBUG
880 	printf("%s: remove slot %d (pic %s pin %d vec %d)\n",
881 	    device_xname(ci->ci_dev), ih->ih_slot, pic->pic_name,
882 	    ih->ih_pin, idtvec);
883 #endif
884 }
885 
886 /*
887  * Deregister an interrupt handler.
888  */
889 void
890 intr_disestablish(struct intrhand *ih)
891 {
892 	struct cpu_info *ci;
893 	uint64_t where;
894 
895 	/*
896 	 * Count the removal for load balancing.
897 	 * Call out to the remote CPU to update its interrupt state.
898 	 * Only make RPCs if the APs are up and running.
899 	 */
900 	mutex_enter(&cpu_lock);
901 	ci = ih->ih_cpu;
902 	(ci->ci_nintrhand)--;
903 	KASSERT(ci->ci_nintrhand >= 0);
904 	if (ci == curcpu() || !mp_online) {
905 		intr_disestablish_xcall(ih, NULL);
906 	} else {
907 		where = xc_unicast(0, intr_disestablish_xcall, ih, NULL, ci);
908 		xc_wait(where);
909 	}
910 	mutex_exit(&cpu_lock);
911 	kmem_free(ih, sizeof(*ih));
912 }
913 
914 const char *
915 intr_string(int ih)
916 {
917 	static char irqstr[64];
918 #if NIOAPIC > 0
919 	struct ioapic_softc *pic;
920 #endif
921 
922 	if (ih == 0)
923 		panic("%s: bogus handle 0x%x", __func__, ih);
924 
925 
926 #if NIOAPIC > 0
927 	if (ih & APIC_INT_VIA_APIC) {
928 		pic = ioapic_find(APIC_IRQ_APIC(ih));
929 		if (pic != NULL) {
930 			snprintf(irqstr, sizeof(irqstr), "%s pin %d",
931 			    device_xname(pic->sc_dev), APIC_IRQ_PIN(ih));
932 		} else {
933 			snprintf(irqstr, sizeof(irqstr),
934 			    "apic %d int %d (irq %d)",
935 			    APIC_IRQ_APIC(ih),
936 			    APIC_IRQ_PIN(ih),
937 			    ih&0xff);
938 		}
939 	} else
940 		snprintf(irqstr, sizeof(irqstr), "irq %d", ih&0xff);
941 #else
942 
943 	snprintf(irqstr, sizeof(irqstr), "irq %d", ih&0xff);
944 #endif
945 	return (irqstr);
946 
947 }
948 
949 /*
950  * Fake interrupt handler structures for the benefit of symmetry with
951  * other interrupt sources, and the benefit of intr_calculatemasks()
952  */
953 struct intrhand fake_softclock_intrhand;
954 struct intrhand fake_softnet_intrhand;
955 struct intrhand fake_softserial_intrhand;
956 struct intrhand fake_softbio_intrhand;
957 struct intrhand fake_timer_intrhand;
958 struct intrhand fake_ipi_intrhand;
959 struct intrhand fake_preempt_intrhand;
960 
961 #if NLAPIC > 0 && defined(MULTIPROCESSOR)
962 static const char *x86_ipi_names[X86_NIPI] = X86_IPI_NAMES;
963 #endif
964 
965 static inline bool
966 redzone_const_or_false(bool x)
967 {
968 #ifdef DIAGNOSTIC
969 	return x;
970 #else
971 	return false;
972 #endif /* !DIAGNOSTIC */
973 }
974 
975 static inline int
976 redzone_const_or_zero(int x)
977 {
978 	return redzone_const_or_false(true) ? x : 0;
979 }
980 
981 /*
982  * Initialize all handlers that aren't dynamically allocated, and exist
983  * for each CPU.
984  */
985 void
986 cpu_intr_init(struct cpu_info *ci)
987 {
988 	struct intrsource *isp;
989 #if NLAPIC > 0 && defined(MULTIPROCESSOR)
990 	int i;
991 	static int first = 1;
992 #endif
993 #ifdef INTRSTACKSIZE
994 	vaddr_t istack;
995 #endif
996 
997 #if NLAPIC > 0
998 	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
999 	KASSERT(isp != NULL);
1000 	isp->is_recurse = Xrecurse_lapic_ltimer;
1001 	isp->is_resume = Xresume_lapic_ltimer;
1002 	fake_timer_intrhand.ih_level = IPL_CLOCK;
1003 	isp->is_handlers = &fake_timer_intrhand;
1004 	isp->is_pic = &local_pic;
1005 	ci->ci_isources[LIR_TIMER] = isp;
1006 	evcnt_attach_dynamic(&isp->is_evcnt,
1007 	    first ? EVCNT_TYPE_INTR : EVCNT_TYPE_MISC, NULL,
1008 	    device_xname(ci->ci_dev), "timer");
1009 	first = 0;
1010 
1011 #ifdef MULTIPROCESSOR
1012 	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
1013 	KASSERT(isp != NULL);
1014 	isp->is_recurse = Xrecurse_lapic_ipi;
1015 	isp->is_resume = Xresume_lapic_ipi;
1016 	fake_ipi_intrhand.ih_level = IPL_HIGH;
1017 	isp->is_handlers = &fake_ipi_intrhand;
1018 	isp->is_pic = &local_pic;
1019 	ci->ci_isources[LIR_IPI] = isp;
1020 
1021 	for (i = 0; i < X86_NIPI; i++)
1022 		evcnt_attach_dynamic(&ci->ci_ipi_events[i], EVCNT_TYPE_MISC,
1023 		    NULL, device_xname(ci->ci_dev), x86_ipi_names[i]);
1024 #endif
1025 #endif
1026 
1027 	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
1028 	KASSERT(isp != NULL);
1029 	isp->is_recurse = Xpreemptrecurse;
1030 	isp->is_resume = Xpreemptresume;
1031 	fake_preempt_intrhand.ih_level = IPL_PREEMPT;
1032 	isp->is_handlers = &fake_preempt_intrhand;
1033 	isp->is_pic = &softintr_pic;
1034 	ci->ci_isources[SIR_PREEMPT] = isp;
1035 
1036 	intr_calculatemasks(ci);
1037 
1038 #if defined(INTRSTACKSIZE)
1039 	/*
1040 	 * If the red zone is activated, protect both the top and
1041 	 * the bottom of the stack with an unmapped page.
1042 	 */
1043 	istack = uvm_km_alloc(kernel_map,
1044 	    INTRSTACKSIZE + redzone_const_or_zero(2 * PAGE_SIZE), 0,
1045 	    UVM_KMF_WIRED);
1046 	if (redzone_const_or_false(true)) {
1047 		pmap_kremove(istack, PAGE_SIZE);
1048 		pmap_kremove(istack + INTRSTACKSIZE + PAGE_SIZE, PAGE_SIZE);
1049 		pmap_update(pmap_kernel());
1050 	}
1051 	/* 33 used to be 1.  Arbitrarily reserve 32 more register_t's
1052 	 * of space for ddb(4) to examine some subroutine arguments
1053 	 * and to hunt for the next stack frame.
1054 	 */
1055 	ci->ci_intrstack = (char *)istack + redzone_const_or_zero(PAGE_SIZE) +
1056 	    INTRSTACKSIZE - 33 * sizeof(register_t);
1057 #if defined(__x86_64__)
1058 	ci->ci_tss.tss_ist[0] = (uintptr_t)ci->ci_intrstack & ~0xf;
1059 #endif /* defined(__x86_64__) */
1060 #endif /* defined(INTRSTACKSIZE) */
1061 	ci->ci_idepth = -1;
1062 }
1063 
1064 #if defined(INTRDEBUG) || defined(DDB)
1065 
1066 #ifdef DDB
1067 #define printf db_printf
1068 #endif
1069 
1070 void
1071 intr_printconfig(void)
1072 {
1073 	int i;
1074 	struct intrhand *ih;
1075 	struct intrsource *isp;
1076 	struct cpu_info *ci;
1077 	CPU_INFO_ITERATOR cii;
1078 
1079 	for (CPU_INFO_FOREACH(cii, ci)) {
1080 		printf("%s: interrupt masks:\n", device_xname(ci->ci_dev));
1081 		for (i = 0; i < NIPL; i++)
1082 			printf("IPL %d mask %lx unmask %lx\n", i,
1083 			    (u_long)ci->ci_imask[i], (u_long)ci->ci_iunmask[i]);
1084 		for (i = 0; i < MAX_INTR_SOURCES; i++) {
1085 			isp = ci->ci_isources[i];
1086 			if (isp == NULL)
1087 				continue;
1088 			printf("%s source %d is pin %d from pic %s maxlevel %d\n",
1089 			    device_xname(ci->ci_dev), i, isp->is_pin,
1090 			    isp->is_pic->pic_name, isp->is_maxlevel);
1091 			for (ih = isp->is_handlers; ih != NULL;
1092 			     ih = ih->ih_next)
1093 				printf("\thandler %p level %d\n",
1094 				    ih->ih_fun, ih->ih_level);
1095 
1096 		}
1097 	}
1098 }
1099 #ifdef DDB
1100 #undef printf
1101 #endif
1102 #endif
1103 
1104 void
1105 softint_init_md(lwp_t *l, u_int level, uintptr_t *machdep)
1106 {
1107 	struct intrsource *isp;
1108 	struct cpu_info *ci;
1109 	u_int sir;
1110 
1111 	ci = l->l_cpu;
1112 
1113 	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
1114 	KASSERT(isp != NULL);
1115 	isp->is_recurse = Xsoftintr;
1116 	isp->is_resume = Xsoftintr;
1117 	isp->is_pic = &softintr_pic;
1118 
1119 	switch (level) {
1120 	case SOFTINT_BIO:
1121 		sir = SIR_BIO;
1122 		fake_softbio_intrhand.ih_level = IPL_SOFTBIO;
1123 		isp->is_handlers = &fake_softbio_intrhand;
1124 		break;
1125 	case SOFTINT_NET:
1126 		sir = SIR_NET;
1127 		fake_softnet_intrhand.ih_level = IPL_SOFTNET;
1128 		isp->is_handlers = &fake_softnet_intrhand;
1129 		break;
1130 	case SOFTINT_SERIAL:
1131 		sir = SIR_SERIAL;
1132 		fake_softserial_intrhand.ih_level = IPL_SOFTSERIAL;
1133 		isp->is_handlers = &fake_softserial_intrhand;
1134 		break;
1135 	case SOFTINT_CLOCK:
1136 		sir = SIR_CLOCK;
1137 		fake_softclock_intrhand.ih_level = IPL_SOFTCLOCK;
1138 		isp->is_handlers = &fake_softclock_intrhand;
1139 		break;
1140 	default:
1141 		panic("softint_init_md");
1142 	}
1143 
1144 	KASSERT(ci->ci_isources[sir] == NULL);
1145 
1146 	*machdep = (1 << sir);
1147 	ci->ci_isources[sir] = isp;
1148 	ci->ci_isources[sir]->is_lwp = l;
1149 
1150 	intr_calculatemasks(ci);
1151 }
1152 
1153 static void
1154 intr_redistribute_xc_t(void *arg1, void *arg2)
1155 {
1156 	struct cpu_info *ci;
1157 	struct intrsource *isp;
1158 	int slot;
1159 	u_long psl;
1160 
1161 	ci = curcpu();
1162 	isp = arg1;
1163 	slot = (int)(intptr_t)arg2;
1164 
1165 	/* Disable interrupts locally. */
1166 	psl = x86_read_psl();
1167 	x86_disable_intr();
1168 
1169 	/* Hook it in and re-calculate masks. */
1170 	ci->ci_isources[slot] = isp;
1171 	intr_calculatemasks(curcpu());
1172 
1173 	/* Re-enable interrupts locally. */
1174 	x86_write_psl(psl);
1175 }
1176 
1177 static void
1178 intr_redistribute_xc_s1(void *arg1, void *arg2)
1179 {
1180 	struct pic *pic;
1181 	struct intrsource *isp;
1182 	struct cpu_info *nci;
1183 	u_long psl;
1184 
1185 	isp = arg1;
1186 	nci = arg2;
1187 
1188 	/*
1189 	 * Disable interrupts on-chip and mask the pin.  Back out
1190 	 * and let the interrupt be processed if one is pending.
1191 	 */
1192 	pic = isp->is_pic;
1193 	for (;;) {
1194 		psl = x86_read_psl();
1195 		x86_disable_intr();
1196 		if ((*pic->pic_trymask)(pic, isp->is_pin)) {
1197 			break;
1198 		}
1199 		x86_write_psl(psl);
1200 		DELAY(1000);
1201 	}
1202 
1203 	/* pic_addroute will unmask the interrupt. */
1204 	(*pic->pic_addroute)(pic, nci, isp->is_pin, isp->is_idtvec,
1205 	    isp->is_type);
1206 	x86_write_psl(psl);
1207 }
1208 
1209 static void
1210 intr_redistribute_xc_s2(void *arg1, void *arg2)
1211 {
1212 	struct cpu_info *ci;
1213 	u_long psl;
1214 	int slot;
1215 
1216 	ci = curcpu();
1217 	slot = (int)(uintptr_t)arg1;
1218 
1219 	/* Disable interrupts locally. */
1220 	psl = x86_read_psl();
1221 	x86_disable_intr();
1222 
1223 	/* Patch out the source and re-calculate masks. */
1224 	ci->ci_isources[slot] = NULL;
1225 	intr_calculatemasks(ci);
1226 
1227 	/* Re-enable interrupts locally. */
1228 	x86_write_psl(psl);
1229 }
1230 
1231 static bool
1232 intr_redistribute(struct cpu_info *oci)
1233 {
1234 	struct intrsource *isp;
1235 	struct intrhand *ih;
1236 	CPU_INFO_ITERATOR cii;
1237 	struct cpu_info *nci, *ici;
1238 	int oslot, nslot;
1239 	uint64_t where;
1240 
1241 	KASSERT(mutex_owned(&cpu_lock));
1242 
1243 	/* Look for an interrupt source that we can migrate. */
1244 	for (oslot = 0; oslot < MAX_INTR_SOURCES; oslot++) {
1245 		if ((isp = oci->ci_isources[oslot]) == NULL) {
1246 			continue;
1247 		}
1248 		if (isp->is_pic->pic_type == PIC_IOAPIC) {
1249 			break;
1250 		}
1251 	}
1252 	if (oslot == MAX_INTR_SOURCES) {
1253 		return false;
1254 	}
1255 
1256 	/* Find least loaded CPU and try to move there. */
1257 	nci = NULL;
1258 	for (CPU_INFO_FOREACH(cii, ici)) {
1259 		if ((ici->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
1260 			continue;
1261 		}
1262 		KASSERT(ici != oci);
1263 		if (nci == NULL || nci->ci_nintrhand > ici->ci_nintrhand) {
1264 			nci = ici;
1265 		}
1266 	}
1267 	if (nci == NULL) {
1268 		return false;
1269 	}
1270 	for (nslot = 0; nslot < MAX_INTR_SOURCES; nslot++) {
1271 		if (nci->ci_isources[nslot] == NULL) {
1272 			break;
1273 		}
1274 	}
1275 
1276 	/* If that did not work, allocate anywhere. */
1277 	if (nslot == MAX_INTR_SOURCES) {
1278 		for (CPU_INFO_FOREACH(cii, nci)) {
1279 			if ((nci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
1280 				continue;
1281 			}
1282 			KASSERT(nci != oci);
1283 			for (nslot = 0; nslot < MAX_INTR_SOURCES; nslot++) {
1284 				if (nci->ci_isources[nslot] == NULL) {
1285 					break;
1286 				}
1287 			}
1288 			if (nslot != MAX_INTR_SOURCES) {
1289 				break;
1290 			}
1291 		}
1292 	}
1293 	if (nslot == MAX_INTR_SOURCES) {
1294 		return false;
1295 	}
1296 
1297 	/*
1298 	 * Now we have new CPU and new slot.  Run a cross-call to set up
1299 	 * the new vector on the target CPU.
1300 	 */
1301 	where = xc_unicast(0, intr_redistribute_xc_t, isp,
1302 	    (void *)(intptr_t)nslot, nci);
1303 	xc_wait(where);
1304 
1305 	/*
1306 	 * We're ready to go on the target CPU.  Run a cross call to
1307 	 * reroute the interrupt away from the source CPU.
1308 	 */
1309 	where = xc_unicast(0, intr_redistribute_xc_s1, isp, nci, oci);
1310 	xc_wait(where);
1311 
1312 	/* Sleep for (at least) 10ms to allow the change to take hold. */
1313 	(void)kpause("intrdist", false, mstohz(10), NULL);
1314 
1315 	/* Complete removal from the source CPU. */
1316 	where = xc_unicast(0, intr_redistribute_xc_s2,
1317 	    (void *)(uintptr_t)oslot, NULL, oci);
1318 	xc_wait(where);
1319 
1320 	/* Finally, take care of book-keeping. */
1321 	for (ih = isp->is_handlers; ih != NULL; ih = ih->ih_next) {
1322 		oci->ci_nintrhand--;
1323 		nci->ci_nintrhand++;
1324 		ih->ih_cpu = nci;
1325 	}
1326 
1327 	return true;
1328 }
1329 
1330 void
1331 cpu_intr_redistribute(void)
1332 {
1333 	CPU_INFO_ITERATOR cii;
1334 	struct cpu_info *ci;
1335 
1336 	KASSERT(mutex_owned(&cpu_lock));
1337 	KASSERT(mp_online);
1338 
1339 	/* Direct interrupts away from shielded CPUs. */
1340 	for (CPU_INFO_FOREACH(cii, ci)) {
1341 		if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) == 0) {
1342 			continue;
1343 		}
1344 		while (intr_redistribute(ci)) {
1345 			/* nothing */
1346 		}
1347 	}
1348 
1349 	/* XXX should now re-balance */
1350 }
1351 
1352 u_int
1353 cpu_intr_count(struct cpu_info *ci)
1354 {
1355 
1356 	KASSERT(ci->ci_nintrhand >= 0);
1357 
1358 	return ci->ci_nintrhand;
1359 }
1360