xref: /freebsd/sys/x86/x86/local_apic.c (revision 9768746b)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1996, by Steve Passe
5  * All rights reserved.
6  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. The name of the developer may NOT be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  * 3. Neither the name of the author nor the names of any co-contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Local APIC support on Pentium and later processors.
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include "opt_atpic.h"
40 #include "opt_hwpmc_hooks.h"
41 
42 #include "opt_ddb.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/asan.h>
47 #include <sys/bus.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/msan.h>
52 #include <sys/mutex.h>
53 #include <sys/pcpu.h>
54 #include <sys/proc.h>
55 #include <sys/sched.h>
56 #include <sys/smp.h>
57 #include <sys/sysctl.h>
58 #include <sys/timeet.h>
59 #include <sys/timetc.h>
60 
61 #include <vm/vm.h>
62 #include <vm/pmap.h>
63 
64 #include <x86/apicreg.h>
65 #include <machine/clock.h>
66 #include <machine/cpufunc.h>
67 #include <machine/cputypes.h>
68 #include <machine/fpu.h>
69 #include <machine/frame.h>
70 #include <machine/intr_machdep.h>
71 #include <x86/apicvar.h>
72 #include <x86/mca.h>
73 #include <machine/md_var.h>
74 #include <machine/smp.h>
75 #include <machine/specialreg.h>
76 #include <x86/init.h>
77 
78 #ifdef DDB
79 #include <sys/interrupt.h>
80 #include <ddb/ddb.h>
81 #endif
82 
83 #ifdef __amd64__
84 #define	SDT_APIC	SDT_SYSIGT
85 #define	GSEL_APIC	0
86 #else
87 #define	SDT_APIC	SDT_SYS386IGT
88 #define	GSEL_APIC	GSEL(GCODE_SEL, SEL_KPL)
89 #endif
90 
91 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items");
92 
93 /* Sanity checks on IDT vectors. */
94 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT);
95 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS);
96 CTASSERT(APIC_LOCAL_INTS == 240);
97 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
98 
99 /*
100  * I/O interrupts use non-negative IRQ values.  These values are used
101  * to mark unused IDT entries or IDT entries reserved for a non-I/O
102  * interrupt.
103  */
104 #define	IRQ_FREE	-1
105 #define	IRQ_TIMER	-2
106 #define	IRQ_SYSCALL	-3
107 #define	IRQ_DTRACE_RET	-4
108 #define	IRQ_EVTCHN	-5
109 
110 enum lat_timer_mode {
111 	LAT_MODE_UNDEF =	0,
112 	LAT_MODE_PERIODIC =	1,
113 	LAT_MODE_ONESHOT =	2,
114 	LAT_MODE_DEADLINE =	3,
115 };
116 
117 /*
118  * Support for local APICs.  Local APICs manage interrupts on each
119  * individual processor as opposed to I/O APICs which receive interrupts
120  * from I/O devices and then forward them on to the local APICs.
121  *
122  * Local APICs can also send interrupts to each other thus providing the
123  * mechanism for IPIs.
124  */
125 
126 struct lvt {
127 	u_int lvt_edgetrigger:1;
128 	u_int lvt_activehi:1;
129 	u_int lvt_masked:1;
130 	u_int lvt_active:1;
131 	u_int lvt_mode:16;
132 	u_int lvt_vector:8;
133 };
134 
135 struct lapic {
136 	struct lvt la_lvts[APIC_LVT_MAX + 1];
137 	struct lvt la_elvts[APIC_ELVT_MAX + 1];
138 	u_int la_id:8;
139 	u_int la_cluster:4;
140 	u_int la_cluster_id:2;
141 	u_int la_present:1;
142 	u_long *la_timer_count;
143 	uint64_t la_timer_period;
144 	enum lat_timer_mode la_timer_mode;
145 	uint32_t lvt_timer_base;
146 	uint32_t lvt_timer_last;
147 	/* Include IDT_SYSCALL to make indexing easier. */
148 	int la_ioint_irqs[APIC_NUM_IOINTS + 1];
149 } static *lapics;
150 
151 /* Global defaults for local APIC LVT entries. */
152 static struct lvt lvts[APIC_LVT_MAX + 1] = {
153 	{ 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 },	/* LINT0: masked ExtINT */
154 	{ 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },	/* LINT1: NMI */
155 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT },	/* Timer */
156 	{ 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT },	/* Error */
157 	{ 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 },	/* PMC */
158 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT },	/* Thermal */
159 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT },	/* CMCI */
160 };
161 
162 /* Global defaults for AMD local APIC ELVT entries. */
163 static struct lvt elvts[APIC_ELVT_MAX + 1] = {
164 	{ 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
165 	{ 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT },
166 	{ 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
167 	{ 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
168 };
169 
170 static inthand_t *ioint_handlers[] = {
171 	NULL,			/* 0 - 31 */
172 	IDTVEC(apic_isr1),	/* 32 - 63 */
173 	IDTVEC(apic_isr2),	/* 64 - 95 */
174 	IDTVEC(apic_isr3),	/* 96 - 127 */
175 	IDTVEC(apic_isr4),	/* 128 - 159 */
176 	IDTVEC(apic_isr5),	/* 160 - 191 */
177 	IDTVEC(apic_isr6),	/* 192 - 223 */
178 	IDTVEC(apic_isr7),	/* 224 - 255 */
179 };
180 
181 static inthand_t *ioint_pti_handlers[] = {
182 	NULL,			/* 0 - 31 */
183 	IDTVEC(apic_isr1_pti),	/* 32 - 63 */
184 	IDTVEC(apic_isr2_pti),	/* 64 - 95 */
185 	IDTVEC(apic_isr3_pti),	/* 96 - 127 */
186 	IDTVEC(apic_isr4_pti),	/* 128 - 159 */
187 	IDTVEC(apic_isr5_pti),	/* 160 - 191 */
188 	IDTVEC(apic_isr6_pti),	/* 192 - 223 */
189 	IDTVEC(apic_isr7_pti),	/* 224 - 255 */
190 };
191 
192 static u_int32_t lapic_timer_divisors[] = {
193 	APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
194 	APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
195 };
196 
197 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd);
198 
199 volatile char *lapic_map;
200 vm_paddr_t lapic_paddr = DEFAULT_APIC_BASE;
201 int x2apic_mode;
202 int lapic_eoi_suppression;
203 static int lapic_timer_tsc_deadline;
204 static u_long lapic_timer_divisor, count_freq;
205 static struct eventtimer lapic_et;
206 #ifdef SMP
207 static uint64_t lapic_ipi_wait_mult;
208 static int __read_mostly lapic_ds_idle_timeout = 1000000;
209 #endif
210 unsigned int max_apic_id;
211 
212 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
213     "APIC options");
214 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, "");
215 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD,
216     &lapic_eoi_suppression, 0, "");
217 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD,
218     &lapic_timer_tsc_deadline, 0, "");
219 #ifdef SMP
220 SYSCTL_INT(_hw_apic, OID_AUTO, ds_idle_timeout, CTLFLAG_RWTUN,
221     &lapic_ds_idle_timeout, 0,
222     "timeout (in us) for APIC Delivery Status to become Idle (xAPIC only)");
223 #endif
224 
225 static void lapic_calibrate_initcount(struct lapic *la);
226 
227 /*
228  * Use __nosanitizethread to exempt the LAPIC I/O accessors from KCSan
229  * instrumentation.  Otherwise, if x2APIC is not available, use of the global
230  * lapic_map will generate a KCSan false positive.  While the mapping is
231  * shared among all CPUs, the physical access will always take place on the
232  * local CPU's APIC, so there isn't in fact a race here.  Furthermore, the
233  * KCSan warning printf can cause a panic if issued during LAPIC access,
234  * due to attempted recursive use of event timer resources.
235  */
236 
237 static uint32_t __nosanitizethread
238 lapic_read32(enum LAPIC_REGISTERS reg)
239 {
240 	uint32_t res;
241 
242 	if (x2apic_mode) {
243 		res = rdmsr32(MSR_APIC_000 + reg);
244 	} else {
245 		res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL);
246 	}
247 	return (res);
248 }
249 
250 static void __nosanitizethread
251 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val)
252 {
253 
254 	if (x2apic_mode) {
255 		mfence();
256 		lfence();
257 		wrmsr(MSR_APIC_000 + reg, val);
258 	} else {
259 		*(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
260 	}
261 }
262 
263 static void __nosanitizethread
264 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val)
265 {
266 
267 	if (x2apic_mode) {
268 		wrmsr(MSR_APIC_000 + reg, val);
269 	} else {
270 		*(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
271 	}
272 }
273 
274 #ifdef SMP
275 static uint64_t
276 lapic_read_icr_lo(void)
277 {
278 
279 	return (lapic_read32(LAPIC_ICR_LO));
280 }
281 
282 static void
283 lapic_write_icr(uint32_t vhi, uint32_t vlo)
284 {
285 	register_t saveintr;
286 	uint64_t v;
287 
288 	if (x2apic_mode) {
289 		v = ((uint64_t)vhi << 32) | vlo;
290 		mfence();
291 		wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v);
292 	} else {
293 		saveintr = intr_disable();
294 		lapic_write32(LAPIC_ICR_HI, vhi);
295 		lapic_write32(LAPIC_ICR_LO, vlo);
296 		intr_restore(saveintr);
297 	}
298 }
299 
300 static void
301 lapic_write_icr_lo(uint32_t vlo)
302 {
303 
304 	if (x2apic_mode) {
305 		mfence();
306 		wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, vlo);
307 	} else {
308 		lapic_write32(LAPIC_ICR_LO, vlo);
309 	}
310 }
311 
312 static void
313 lapic_write_self_ipi(uint32_t vector)
314 {
315 
316 	KASSERT(x2apic_mode, ("SELF IPI write in xAPIC mode"));
317 	wrmsr(MSR_APIC_000 + LAPIC_SELF_IPI, vector);
318 }
319 #endif /* SMP */
320 
321 static void
322 lapic_enable_x2apic(void)
323 {
324 	uint64_t apic_base;
325 
326 	apic_base = rdmsr(MSR_APICBASE);
327 	apic_base |= APICBASE_X2APIC | APICBASE_ENABLED;
328 	wrmsr(MSR_APICBASE, apic_base);
329 }
330 
331 bool
332 lapic_is_x2apic(void)
333 {
334 	uint64_t apic_base;
335 
336 	apic_base = rdmsr(MSR_APICBASE);
337 	return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) ==
338 	    (APICBASE_X2APIC | APICBASE_ENABLED));
339 }
340 
341 static void	lapic_enable(void);
342 static void	lapic_resume(struct pic *pic, bool suspend_cancelled);
343 static void	lapic_timer_oneshot(struct lapic *);
344 static void	lapic_timer_oneshot_nointr(struct lapic *, uint32_t);
345 static void	lapic_timer_periodic(struct lapic *);
346 static void	lapic_timer_deadline(struct lapic *);
347 static void	lapic_timer_stop(struct lapic *);
348 static void	lapic_timer_set_divisor(u_int divisor);
349 static uint32_t	lvt_mode(struct lapic *la, u_int pin, uint32_t value);
350 static int	lapic_et_start(struct eventtimer *et,
351 		    sbintime_t first, sbintime_t period);
352 static int	lapic_et_stop(struct eventtimer *et);
353 static u_int	apic_idt_to_irq(u_int apic_id, u_int vector);
354 static void	lapic_set_tpr(u_int vector);
355 
356 struct pic lapic_pic = { .pic_resume = lapic_resume };
357 
358 static uint32_t
359 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value)
360 {
361 
362 	value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
363 	    APIC_LVT_VECTOR);
364 	if (lvt->lvt_edgetrigger == 0)
365 		value |= APIC_LVT_TM;
366 	if (lvt->lvt_activehi == 0)
367 		value |= APIC_LVT_IIPP_INTALO;
368 	if (lvt->lvt_masked)
369 		value |= APIC_LVT_M;
370 	value |= lvt->lvt_mode;
371 	switch (lvt->lvt_mode) {
372 	case APIC_LVT_DM_NMI:
373 	case APIC_LVT_DM_SMI:
374 	case APIC_LVT_DM_INIT:
375 	case APIC_LVT_DM_EXTINT:
376 		if (!lvt->lvt_edgetrigger && bootverbose) {
377 			printf("lapic%u: Forcing LINT%u to edge trigger\n",
378 			    la->la_id, pin);
379 			value &= ~APIC_LVT_TM;
380 		}
381 		/* Use a vector of 0. */
382 		break;
383 	case APIC_LVT_DM_FIXED:
384 		value |= lvt->lvt_vector;
385 		break;
386 	default:
387 		panic("bad APIC LVT delivery mode: %#x\n", value);
388 	}
389 	return (value);
390 }
391 
392 static uint32_t
393 lvt_mode(struct lapic *la, u_int pin, uint32_t value)
394 {
395 	struct lvt *lvt;
396 
397 	KASSERT(pin <= APIC_LVT_MAX,
398 	    ("%s: pin %u out of range", __func__, pin));
399 	if (la->la_lvts[pin].lvt_active)
400 		lvt = &la->la_lvts[pin];
401 	else
402 		lvt = &lvts[pin];
403 
404 	return (lvt_mode_impl(la, lvt, pin, value));
405 }
406 
407 static uint32_t
408 elvt_mode(struct lapic *la, u_int idx, uint32_t value)
409 {
410 	struct lvt *elvt;
411 
412 	KASSERT(idx <= APIC_ELVT_MAX,
413 	    ("%s: idx %u out of range", __func__, idx));
414 
415 	elvt = &la->la_elvts[idx];
416 	KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx));
417 	KASSERT(elvt->lvt_edgetrigger,
418 	    ("%s: ELVT%u is not edge triggered", __func__, idx));
419 	KASSERT(elvt->lvt_activehi,
420 	    ("%s: ELVT%u is not active high", __func__, idx));
421 	return (lvt_mode_impl(la, elvt, idx, value));
422 }
423 
424 /*
425  * Map the local APIC and setup necessary interrupt vectors.
426  */
427 void
428 lapic_init(vm_paddr_t addr)
429 {
430 #ifdef SMP
431 	uint64_t r, r1, r2, rx;
432 #endif
433 	uint32_t ver;
434 	int i;
435 	bool arat;
436 
437 	TSENTER();
438 
439 	/*
440 	 * Enable x2APIC mode if possible. Map the local APIC
441 	 * registers page.
442 	 *
443 	 * Keep the LAPIC registers page mapped uncached for x2APIC
444 	 * mode too, to have direct map page attribute set to
445 	 * uncached.  This is needed to work around CPU errata present
446 	 * on all Intel processors.
447 	 */
448 	KASSERT(trunc_page(addr) == addr,
449 	    ("local APIC not aligned on a page boundary"));
450 	lapic_paddr = addr;
451 	lapic_map = pmap_mapdev(addr, PAGE_SIZE);
452 	if (x2apic_mode) {
453 		lapic_enable_x2apic();
454 		lapic_map = NULL;
455 	}
456 
457 	/* Setup the spurious interrupt handler. */
458 	setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL,
459 	    GSEL_APIC);
460 
461 	/* Perform basic initialization of the BSP's local APIC. */
462 	lapic_enable();
463 
464 	/* Set BSP's per-CPU local APIC ID. */
465 	PCPU_SET(apic_id, lapic_id());
466 
467 	/* Local APIC timer interrupt. */
468 	setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint),
469 	    SDT_APIC, SEL_KPL, GSEL_APIC);
470 
471 	/* Local APIC error interrupt. */
472 	setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint),
473 	    SDT_APIC, SEL_KPL, GSEL_APIC);
474 
475 	/* XXX: Thermal interrupt */
476 
477 	/* Local APIC CMCI. */
478 	setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint),
479 	    SDT_APIC, SEL_KPL, GSEL_APIC);
480 
481 	if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
482 		/* Set if APIC timer runs in C3. */
483 		arat = (cpu_power_eax & CPUTPM1_ARAT);
484 
485 		bzero(&lapic_et, sizeof(lapic_et));
486 		lapic_et.et_name = "LAPIC";
487 		lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
488 		    ET_FLAGS_PERCPU;
489 		lapic_et.et_quality = 600;
490 		if (!arat) {
491 			lapic_et.et_flags |= ET_FLAGS_C3STOP;
492 			lapic_et.et_quality = 100;
493 		}
494 		if ((cpu_feature & CPUID_TSC) != 0 &&
495 		    (cpu_feature2 & CPUID2_TSCDLT) != 0 &&
496 		    tsc_is_invariant && tsc_freq != 0) {
497 			lapic_timer_tsc_deadline = 1;
498 			TUNABLE_INT_FETCH("hw.lapic_tsc_deadline",
499 			    &lapic_timer_tsc_deadline);
500 		}
501 
502 		lapic_et.et_frequency = 0;
503 		/* We don't know frequency yet, so trying to guess. */
504 		lapic_et.et_min_period = 0x00001000LL;
505 		lapic_et.et_max_period = SBT_1S;
506 		lapic_et.et_start = lapic_et_start;
507 		lapic_et.et_stop = lapic_et_stop;
508 		lapic_et.et_priv = NULL;
509 		et_register(&lapic_et);
510 	}
511 
512 	/*
513 	 * Set lapic_eoi_suppression after lapic_enable(), to not
514 	 * enable suppression in the hardware prematurely.  Note that
515 	 * we by default enable suppression even when system only has
516 	 * one IO-APIC, since EOI is broadcasted to all APIC agents,
517 	 * including CPUs, otherwise.
518 	 *
519 	 * It seems that at least some KVM versions report
520 	 * EOI_SUPPRESSION bit, but auto-EOI does not work.
521 	 */
522 	ver = lapic_read32(LAPIC_VERSION);
523 	if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) {
524 		lapic_eoi_suppression = 1;
525 		if (vm_guest == VM_GUEST_KVM) {
526 			if (bootverbose)
527 				printf(
528 		       "KVM -- disabling lapic eoi suppression\n");
529 			lapic_eoi_suppression = 0;
530 		}
531 		TUNABLE_INT_FETCH("hw.lapic_eoi_suppression",
532 		    &lapic_eoi_suppression);
533 	}
534 
535 #ifdef SMP
536 #define	LOOPS	1000
537 	/*
538 	 * Calibrate the busy loop waiting for IPI ack in xAPIC mode.
539 	 * lapic_ipi_wait_mult contains the number of iterations which
540 	 * approximately delay execution for 1 microsecond (the
541 	 * argument to lapic_ipi_wait() is in microseconds).
542 	 *
543 	 * We assume that TSC is present and already measured.
544 	 * Possible TSC frequency jumps are irrelevant to the
545 	 * calibration loop below, the CPU clock management code is
546 	 * not yet started, and we do not enter sleep states.
547 	 */
548 	KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0,
549 	    ("TSC not initialized"));
550 	if (!x2apic_mode) {
551 		r = rdtsc();
552 		for (rx = 0; rx < LOOPS; rx++) {
553 			(void)lapic_read_icr_lo();
554 			ia32_pause();
555 		}
556 		r = rdtsc() - r;
557 		r1 = tsc_freq * LOOPS;
558 		r2 = r * 1000000;
559 		lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1;
560 		if (bootverbose) {
561 			printf("LAPIC: ipi_wait() us multiplier %ju (r %ju "
562 			    "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult,
563 			    (uintmax_t)r, (uintmax_t)tsc_freq);
564 		}
565 	}
566 #undef LOOPS
567 #endif /* SMP */
568 
569 	TSEXIT();
570 }
571 
572 /*
573  * Create a local APIC instance.
574  */
575 void
576 lapic_create(u_int apic_id, int boot_cpu)
577 {
578 	int i;
579 
580 	if (apic_id > max_apic_id) {
581 		printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
582 		if (boot_cpu)
583 			panic("Can't ignore BSP");
584 		return;
585 	}
586 	KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
587 	    apic_id));
588 
589 	/*
590 	 * Assume no local LVT overrides and a cluster of 0 and
591 	 * intra-cluster ID of 0.
592 	 */
593 	lapics[apic_id].la_present = 1;
594 	lapics[apic_id].la_id = apic_id;
595 	for (i = 0; i <= APIC_LVT_MAX; i++) {
596 		lapics[apic_id].la_lvts[i] = lvts[i];
597 		lapics[apic_id].la_lvts[i].lvt_active = 0;
598 	}
599 	for (i = 0; i <= APIC_ELVT_MAX; i++) {
600 		lapics[apic_id].la_elvts[i] = elvts[i];
601 		lapics[apic_id].la_elvts[i].lvt_active = 0;
602 	}
603 	for (i = 0; i <= APIC_NUM_IOINTS; i++)
604 	    lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE;
605 	lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
606 	lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
607 	    IRQ_TIMER;
608 #ifdef KDTRACE_HOOKS
609 	lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] =
610 	    IRQ_DTRACE_RET;
611 #endif
612 #ifdef XENHVM
613 	lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN;
614 #endif
615 
616 #ifdef SMP
617 	cpu_add(apic_id, boot_cpu);
618 #endif
619 }
620 
621 static inline uint32_t
622 amd_read_ext_features(void)
623 {
624 	uint32_t version;
625 
626 	if (cpu_vendor_id != CPU_VENDOR_AMD &&
627 	    cpu_vendor_id != CPU_VENDOR_HYGON)
628 		return (0);
629 	version = lapic_read32(LAPIC_VERSION);
630 	if ((version & APIC_VER_AMD_EXT_SPACE) != 0)
631 		return (lapic_read32(LAPIC_EXT_FEATURES));
632 	else
633 		return (0);
634 }
635 
636 static inline uint32_t
637 amd_read_elvt_count(void)
638 {
639 	uint32_t extf;
640 	uint32_t count;
641 
642 	extf = amd_read_ext_features();
643 	count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT;
644 	count = min(count, APIC_ELVT_MAX + 1);
645 	return (count);
646 }
647 
648 /*
649  * Dump contents of local APIC registers
650  */
651 void
652 lapic_dump(const char* str)
653 {
654 	uint32_t version;
655 	uint32_t maxlvt;
656 	uint32_t extf;
657 	int elvt_count;
658 	int i;
659 
660 	version = lapic_read32(LAPIC_VERSION);
661 	maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
662 	printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
663 	printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x",
664 	    lapic_read32(LAPIC_ID), version,
665 	    lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR));
666 	if ((cpu_feature2 & CPUID2_X2APIC) != 0)
667 		printf(" x2APIC: %d", x2apic_mode);
668 	printf("\n  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
669 	    lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1),
670 	    lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR));
671 	printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x",
672 	    lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL),
673 	    lapic_read32(LAPIC_LVT_ERROR));
674 	if (maxlvt >= APIC_LVT_PMC)
675 		printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT));
676 	printf("\n");
677 	if (maxlvt >= APIC_LVT_CMCI)
678 		printf("   cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI));
679 	extf = amd_read_ext_features();
680 	if (extf != 0) {
681 		printf("   AMD ext features: 0x%08x", extf);
682 		elvt_count = amd_read_elvt_count();
683 		for (i = 0; i < elvt_count; i++)
684 			printf("%s elvt%d: 0x%08x", (i % 4) ? "" : "\n ", i,
685 			    lapic_read32(LAPIC_EXT_LVT0 + i));
686 		printf("\n");
687 	}
688 }
689 
690 void
691 lapic_xapic_mode(void)
692 {
693 	register_t saveintr;
694 
695 	saveintr = intr_disable();
696 	if (x2apic_mode)
697 		lapic_enable_x2apic();
698 	intr_restore(saveintr);
699 }
700 
701 void
702 lapic_setup(int boot)
703 {
704 	struct lapic *la;
705 	uint32_t version;
706 	uint32_t maxlvt;
707 	register_t saveintr;
708 	int elvt_count;
709 	int i;
710 
711 	saveintr = intr_disable();
712 
713 	la = &lapics[lapic_id()];
714 	KASSERT(la->la_present, ("missing APIC structure"));
715 	version = lapic_read32(LAPIC_VERSION);
716 	maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
717 
718 	/* Initialize the TPR to allow all interrupts. */
719 	lapic_set_tpr(0);
720 
721 	/* Setup spurious vector and enable the local APIC. */
722 	lapic_enable();
723 
724 	/* Program LINT[01] LVT entries. */
725 	lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0,
726 	    lapic_read32(LAPIC_LVT_LINT0)));
727 	lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1,
728 	    lapic_read32(LAPIC_LVT_LINT1)));
729 
730 	/* Program the PMC LVT entry if present. */
731 	if (maxlvt >= APIC_LVT_PMC) {
732 		lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
733 		    LAPIC_LVT_PCINT));
734 	}
735 
736 	/*
737 	 * Program the timer LVT.  Calibration is deferred until it is certain
738 	 * that we have a reliable timecounter.
739 	 */
740 	la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER,
741 	    lapic_read32(LAPIC_LVT_TIMER));
742 	la->lvt_timer_last = la->lvt_timer_base;
743 	lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base);
744 
745 	if (boot)
746 		la->la_timer_mode = LAT_MODE_UNDEF;
747 	else if (la->la_timer_mode != LAT_MODE_UNDEF) {
748 		KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor",
749 		    lapic_id()));
750 		switch (la->la_timer_mode) {
751 		case LAT_MODE_PERIODIC:
752 			lapic_timer_set_divisor(lapic_timer_divisor);
753 			lapic_timer_periodic(la);
754 			break;
755 		case LAT_MODE_ONESHOT:
756 			lapic_timer_set_divisor(lapic_timer_divisor);
757 			lapic_timer_oneshot(la);
758 			break;
759 		case LAT_MODE_DEADLINE:
760 			lapic_timer_deadline(la);
761 			break;
762 		default:
763 			panic("corrupted la_timer_mode %p %d", la,
764 			    la->la_timer_mode);
765 		}
766 	}
767 
768 	/* Program error LVT and clear any existing errors. */
769 	lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR,
770 	    lapic_read32(LAPIC_LVT_ERROR)));
771 	lapic_write32(LAPIC_ESR, 0);
772 
773 	/* XXX: Thermal LVT */
774 
775 	/* Program the CMCI LVT entry if present. */
776 	if (maxlvt >= APIC_LVT_CMCI) {
777 		lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI,
778 		    lapic_read32(LAPIC_LVT_CMCI)));
779 	}
780 
781 	elvt_count = amd_read_elvt_count();
782 	for (i = 0; i < elvt_count; i++) {
783 		if (la->la_elvts[i].lvt_active)
784 			lapic_write32(LAPIC_EXT_LVT0 + i,
785 			    elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i)));
786 	}
787 
788 	intr_restore(saveintr);
789 }
790 
791 static void
792 lapic_intrcnt(void *dummy __unused)
793 {
794 	struct pcpu *pc;
795 	struct lapic *la;
796 	char buf[MAXCOMLEN + 1];
797 
798 	/* If there are no APICs, skip this function. */
799 	if (lapics == NULL)
800 		return;
801 
802 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
803 		la = &lapics[pc->pc_apic_id];
804 		if (!la->la_present)
805 		    continue;
806 
807 		snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid);
808 		intrcnt_add(buf, &la->la_timer_count);
809 	}
810 }
811 SYSINIT(lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, lapic_intrcnt, NULL);
812 
813 void
814 lapic_reenable_pmc(void)
815 {
816 #ifdef HWPMC_HOOKS
817 	uint32_t value;
818 
819 	value = lapic_read32(LAPIC_LVT_PCINT);
820 	value &= ~APIC_LVT_M;
821 	lapic_write32(LAPIC_LVT_PCINT, value);
822 #endif
823 }
824 
825 #ifdef HWPMC_HOOKS
826 static void
827 lapic_update_pmc(void *dummy)
828 {
829 	struct lapic *la;
830 
831 	la = &lapics[lapic_id()];
832 	lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
833 	    lapic_read32(LAPIC_LVT_PCINT)));
834 }
835 #endif
836 
837 void
838 lapic_calibrate_timer(void)
839 {
840 	struct lapic *la;
841 	register_t intr;
842 
843 #ifdef DEV_ATPIC
844 	/* Fail if the local APIC is not present. */
845 	if (!x2apic_mode && lapic_map == NULL)
846 		return;
847 #endif
848 
849 	intr = intr_disable();
850 	la = &lapics[lapic_id()];
851 
852 	lapic_calibrate_initcount(la);
853 
854 	intr_restore(intr);
855 
856 	if (lapic_timer_tsc_deadline && bootverbose) {
857 		printf("lapic: deadline tsc mode, Frequency %ju Hz\n",
858 		    (uintmax_t)tsc_freq);
859 	}
860 }
861 
862 int
863 lapic_enable_pmc(void)
864 {
865 #ifdef HWPMC_HOOKS
866 	u_int32_t maxlvt;
867 
868 #ifdef DEV_ATPIC
869 	/* Fail if the local APIC is not present. */
870 	if (!x2apic_mode && lapic_map == NULL)
871 		return (0);
872 #endif
873 
874 	/* Fail if the PMC LVT is not present. */
875 	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
876 	if (maxlvt < APIC_LVT_PMC)
877 		return (0);
878 
879 	lvts[APIC_LVT_PMC].lvt_masked = 0;
880 
881 #ifdef EARLY_AP_STARTUP
882 	MPASS(mp_ncpus == 1 || smp_started);
883 	smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
884 #else
885 #ifdef SMP
886 	/*
887 	 * If hwpmc was loaded at boot time then the APs may not be
888 	 * started yet.  In that case, don't forward the request to
889 	 * them as they will program the lvt when they start.
890 	 */
891 	if (smp_started)
892 		smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
893 	else
894 #endif
895 		lapic_update_pmc(NULL);
896 #endif
897 	return (1);
898 #else
899 	return (0);
900 #endif
901 }
902 
903 void
904 lapic_disable_pmc(void)
905 {
906 #ifdef HWPMC_HOOKS
907 	u_int32_t maxlvt;
908 
909 #ifdef DEV_ATPIC
910 	/* Fail if the local APIC is not present. */
911 	if (!x2apic_mode && lapic_map == NULL)
912 		return;
913 #endif
914 
915 	/* Fail if the PMC LVT is not present. */
916 	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
917 	if (maxlvt < APIC_LVT_PMC)
918 		return;
919 
920 	lvts[APIC_LVT_PMC].lvt_masked = 1;
921 
922 #ifdef SMP
923 	/* The APs should always be started when hwpmc is unloaded. */
924 	KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early"));
925 #endif
926 	smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
927 #endif
928 }
929 
930 static int
931 lapic_calibrate_initcount_cpuid_vm(void)
932 {
933 	u_int regs[4];
934 	uint64_t freq;
935 
936 	/* Get value from CPUID leaf if possible. */
937 	if (vm_guest == VM_GUEST_NO)
938 		return (false);
939 	if (hv_high < 0x40000010)
940 		return (false);
941 	do_cpuid(0x40000010, regs);
942 	freq = (uint64_t)(regs[1]) * 1000;
943 
944 	/* Pick timer divisor. */
945 	lapic_timer_divisor = 2;
946 	do {
947 		if (freq / lapic_timer_divisor < APIC_TIMER_MAX_COUNT)
948 			break;
949 		lapic_timer_divisor <<= 1;
950 	} while (lapic_timer_divisor <= 128);
951 	if (lapic_timer_divisor > 128)
952 		return (false);
953 
954 	/* Record divided frequency. */
955 	count_freq = freq / lapic_timer_divisor;
956 	return (true);
957 }
958 
959 static uint64_t
960 cb_lapic_getcount(void)
961 {
962 
963 	return (APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER));
964 }
965 
966 static void
967 lapic_calibrate_initcount(struct lapic *la)
968 {
969 	uint64_t freq;
970 
971 	if (lapic_calibrate_initcount_cpuid_vm())
972 		goto done;
973 
974 	/* Calibrate the APIC timer frequency. */
975 	lapic_timer_set_divisor(2);
976 	lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT);
977 	fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX);
978 	freq = clockcalib(cb_lapic_getcount, "lapic");
979 	fpu_kern_leave(curthread, NULL);
980 
981 	/* Pick a different divisor if necessary. */
982 	lapic_timer_divisor = 2;
983 	do {
984 		if (freq * 2 / lapic_timer_divisor < APIC_TIMER_MAX_COUNT)
985 			break;
986 		lapic_timer_divisor <<= 1;
987 	} while (lapic_timer_divisor <= 128);
988 	if (lapic_timer_divisor > 128)
989 		panic("lapic: Divisor too big");
990 	count_freq = freq * 2 / lapic_timer_divisor;
991 done:
992 	if (bootverbose) {
993 		printf("lapic: Divisor %lu, Frequency %lu Hz\n",
994 		    lapic_timer_divisor, count_freq);
995 	}
996 }
997 
998 static void
999 lapic_change_mode(struct eventtimer *et, struct lapic *la,
1000     enum lat_timer_mode newmode)
1001 {
1002 	if (la->la_timer_mode == newmode)
1003 		return;
1004 	switch (newmode) {
1005 	case LAT_MODE_PERIODIC:
1006 		lapic_timer_set_divisor(lapic_timer_divisor);
1007 		et->et_frequency = count_freq;
1008 		break;
1009 	case LAT_MODE_DEADLINE:
1010 		et->et_frequency = tsc_freq;
1011 		break;
1012 	case LAT_MODE_ONESHOT:
1013 		lapic_timer_set_divisor(lapic_timer_divisor);
1014 		et->et_frequency = count_freq;
1015 		break;
1016 	default:
1017 		panic("lapic_change_mode %d", newmode);
1018 	}
1019 	la->la_timer_mode = newmode;
1020 	et->et_min_period = (0x00000002LLU << 32) / et->et_frequency;
1021 	et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency;
1022 }
1023 
1024 static int
1025 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period)
1026 {
1027 	struct lapic *la;
1028 
1029 	la = &lapics[PCPU_GET(apic_id)];
1030 	if (period != 0) {
1031 		lapic_change_mode(et, la, LAT_MODE_PERIODIC);
1032 		la->la_timer_period = ((uint32_t)et->et_frequency * period) >>
1033 		    32;
1034 		lapic_timer_periodic(la);
1035 	} else if (lapic_timer_tsc_deadline) {
1036 		lapic_change_mode(et, la, LAT_MODE_DEADLINE);
1037 		la->la_timer_period = (et->et_frequency * first) >> 32;
1038 		lapic_timer_deadline(la);
1039 	} else {
1040 		lapic_change_mode(et, la, LAT_MODE_ONESHOT);
1041 		la->la_timer_period = ((uint32_t)et->et_frequency * first) >>
1042 		    32;
1043 		lapic_timer_oneshot(la);
1044 	}
1045 	return (0);
1046 }
1047 
1048 static int
1049 lapic_et_stop(struct eventtimer *et)
1050 {
1051 	struct lapic *la;
1052 
1053 	la = &lapics[PCPU_GET(apic_id)];
1054 	lapic_timer_stop(la);
1055 	la->la_timer_mode = LAT_MODE_UNDEF;
1056 	return (0);
1057 }
1058 
1059 void
1060 lapic_disable(void)
1061 {
1062 	uint32_t value;
1063 
1064 	/* Software disable the local APIC. */
1065 	value = lapic_read32(LAPIC_SVR);
1066 	value &= ~APIC_SVR_SWEN;
1067 	lapic_write32(LAPIC_SVR, value);
1068 }
1069 
1070 static void
1071 lapic_enable(void)
1072 {
1073 	uint32_t value;
1074 
1075 	/* Program the spurious vector to enable the local APIC. */
1076 	value = lapic_read32(LAPIC_SVR);
1077 	value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
1078 	value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT;
1079 	if (lapic_eoi_suppression)
1080 		value |= APIC_SVR_EOI_SUPPRESSION;
1081 	lapic_write32(LAPIC_SVR, value);
1082 }
1083 
1084 /* Reset the local APIC on the BSP during resume. */
1085 static void
1086 lapic_resume(struct pic *pic, bool suspend_cancelled)
1087 {
1088 
1089 	lapic_setup(0);
1090 }
1091 
1092 int
1093 lapic_id(void)
1094 {
1095 	uint32_t v;
1096 
1097 	KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped"));
1098 	v = lapic_read32(LAPIC_ID);
1099 	if (!x2apic_mode)
1100 		v >>= APIC_ID_SHIFT;
1101 	return (v);
1102 }
1103 
1104 int
1105 lapic_intr_pending(u_int vector)
1106 {
1107 	uint32_t irr;
1108 
1109 	/*
1110 	 * The IRR registers are an array of registers each of which
1111 	 * only describes 32 interrupts in the low 32 bits.  Thus, we
1112 	 * divide the vector by 32 to get the register index.
1113 	 * Finally, we modulus the vector by 32 to determine the
1114 	 * individual bit to test.
1115 	 */
1116 	irr = lapic_read32(LAPIC_IRR0 + vector / 32);
1117 	return (irr & 1 << (vector % 32));
1118 }
1119 
1120 void
1121 lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
1122 {
1123 	struct lapic *la;
1124 
1125 	KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
1126 	    __func__, apic_id));
1127 	KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
1128 	    __func__, cluster));
1129 	KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
1130 	    ("%s: intra cluster id %u too big", __func__, cluster_id));
1131 	la = &lapics[apic_id];
1132 	la->la_cluster = cluster;
1133 	la->la_cluster_id = cluster_id;
1134 }
1135 
1136 int
1137 lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
1138 {
1139 
1140 	if (pin > APIC_LVT_MAX)
1141 		return (EINVAL);
1142 	if (apic_id == APIC_ID_ALL) {
1143 		lvts[pin].lvt_masked = masked;
1144 		if (bootverbose)
1145 			printf("lapic:");
1146 	} else {
1147 		KASSERT(lapics[apic_id].la_present,
1148 		    ("%s: missing APIC %u", __func__, apic_id));
1149 		lapics[apic_id].la_lvts[pin].lvt_masked = masked;
1150 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
1151 		if (bootverbose)
1152 			printf("lapic%u:", apic_id);
1153 	}
1154 	if (bootverbose)
1155 		printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
1156 	return (0);
1157 }
1158 
1159 int
1160 lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
1161 {
1162 	struct lvt *lvt;
1163 
1164 	if (pin > APIC_LVT_MAX)
1165 		return (EINVAL);
1166 	if (apic_id == APIC_ID_ALL) {
1167 		lvt = &lvts[pin];
1168 		if (bootverbose)
1169 			printf("lapic:");
1170 	} else {
1171 		KASSERT(lapics[apic_id].la_present,
1172 		    ("%s: missing APIC %u", __func__, apic_id));
1173 		lvt = &lapics[apic_id].la_lvts[pin];
1174 		lvt->lvt_active = 1;
1175 		if (bootverbose)
1176 			printf("lapic%u:", apic_id);
1177 	}
1178 	lvt->lvt_mode = mode;
1179 	switch (mode) {
1180 	case APIC_LVT_DM_NMI:
1181 	case APIC_LVT_DM_SMI:
1182 	case APIC_LVT_DM_INIT:
1183 	case APIC_LVT_DM_EXTINT:
1184 		lvt->lvt_edgetrigger = 1;
1185 		lvt->lvt_activehi = 1;
1186 		if (mode == APIC_LVT_DM_EXTINT)
1187 			lvt->lvt_masked = 1;
1188 		else
1189 			lvt->lvt_masked = 0;
1190 		break;
1191 	default:
1192 		panic("Unsupported delivery mode: 0x%x\n", mode);
1193 	}
1194 	if (bootverbose) {
1195 		printf(" Routing ");
1196 		switch (mode) {
1197 		case APIC_LVT_DM_NMI:
1198 			printf("NMI");
1199 			break;
1200 		case APIC_LVT_DM_SMI:
1201 			printf("SMI");
1202 			break;
1203 		case APIC_LVT_DM_INIT:
1204 			printf("INIT");
1205 			break;
1206 		case APIC_LVT_DM_EXTINT:
1207 			printf("ExtINT");
1208 			break;
1209 		}
1210 		printf(" -> LINT%u\n", pin);
1211 	}
1212 	return (0);
1213 }
1214 
1215 int
1216 lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
1217 {
1218 
1219 	if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM)
1220 		return (EINVAL);
1221 	if (apic_id == APIC_ID_ALL) {
1222 		lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
1223 		if (bootverbose)
1224 			printf("lapic:");
1225 	} else {
1226 		KASSERT(lapics[apic_id].la_present,
1227 		    ("%s: missing APIC %u", __func__, apic_id));
1228 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
1229 		lapics[apic_id].la_lvts[pin].lvt_activehi =
1230 		    (pol == INTR_POLARITY_HIGH);
1231 		if (bootverbose)
1232 			printf("lapic%u:", apic_id);
1233 	}
1234 	if (bootverbose)
1235 		printf(" LINT%u polarity: %s\n", pin,
1236 		    pol == INTR_POLARITY_HIGH ? "high" : "low");
1237 	return (0);
1238 }
1239 
1240 int
1241 lapic_set_lvt_triggermode(u_int apic_id, u_int pin,
1242      enum intr_trigger trigger)
1243 {
1244 
1245 	if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
1246 		return (EINVAL);
1247 	if (apic_id == APIC_ID_ALL) {
1248 		lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
1249 		if (bootverbose)
1250 			printf("lapic:");
1251 	} else {
1252 		KASSERT(lapics[apic_id].la_present,
1253 		    ("%s: missing APIC %u", __func__, apic_id));
1254 		lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
1255 		    (trigger == INTR_TRIGGER_EDGE);
1256 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
1257 		if (bootverbose)
1258 			printf("lapic%u:", apic_id);
1259 	}
1260 	if (bootverbose)
1261 		printf(" LINT%u trigger: %s\n", pin,
1262 		    trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
1263 	return (0);
1264 }
1265 
1266 /*
1267  * Adjust the TPR of the current CPU so that it blocks all interrupts below
1268  * the passed in vector.
1269  */
1270 static void
1271 lapic_set_tpr(u_int vector)
1272 {
1273 #ifdef CHEAP_TPR
1274 	lapic_write32(LAPIC_TPR, vector);
1275 #else
1276 	uint32_t tpr;
1277 
1278 	tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO;
1279 	tpr |= vector;
1280 	lapic_write32(LAPIC_TPR, tpr);
1281 #endif
1282 }
1283 
1284 void
1285 lapic_eoi(void)
1286 {
1287 
1288 	lapic_write32_nofence(LAPIC_EOI, 0);
1289 }
1290 
1291 void
1292 lapic_handle_intr(int vector, struct trapframe *frame)
1293 {
1294 	struct intsrc *isrc;
1295 
1296 	kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0);
1297 	kmsan_mark(&vector, sizeof(vector), KMSAN_STATE_INITED);
1298 	kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED);
1299 	trap_check_kstack();
1300 
1301 	isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id),
1302 	    vector));
1303 	intr_execute_handlers(isrc, frame);
1304 }
1305 
1306 void
1307 lapic_handle_timer(struct trapframe *frame)
1308 {
1309 	struct lapic *la;
1310 	struct trapframe *oldframe;
1311 	struct thread *td;
1312 
1313 	/* Send EOI first thing. */
1314 	lapic_eoi();
1315 
1316 	kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0);
1317 	kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED);
1318 	trap_check_kstack();
1319 
1320 #if defined(SMP) && !defined(SCHED_ULE)
1321 	/*
1322 	 * Don't do any accounting for the disabled HTT cores, since it
1323 	 * will provide misleading numbers for the userland.
1324 	 *
1325 	 * No locking is necessary here, since even if we lose the race
1326 	 * when hlt_cpus_mask changes it is not a big deal, really.
1327 	 *
1328 	 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
1329 	 * and unlike other schedulers it actually schedules threads to
1330 	 * those CPUs.
1331 	 */
1332 	if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
1333 		return;
1334 #endif
1335 
1336 	/* Look up our local APIC structure for the tick counters. */
1337 	la = &lapics[PCPU_GET(apic_id)];
1338 	(*la->la_timer_count)++;
1339 	critical_enter();
1340 	if (lapic_et.et_active) {
1341 		td = curthread;
1342 		td->td_intr_nesting_level++;
1343 		oldframe = td->td_intr_frame;
1344 		td->td_intr_frame = frame;
1345 		lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg);
1346 		td->td_intr_frame = oldframe;
1347 		td->td_intr_nesting_level--;
1348 	}
1349 	critical_exit();
1350 }
1351 
1352 static void
1353 lapic_timer_set_divisor(u_int divisor)
1354 {
1355 
1356 	KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
1357 	KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors),
1358 		("lapic: invalid divisor %u", divisor));
1359 	lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]);
1360 }
1361 
1362 static void
1363 lapic_timer_oneshot(struct lapic *la)
1364 {
1365 	uint32_t value;
1366 
1367 	value = la->lvt_timer_base;
1368 	value &= ~(APIC_LVTT_TM | APIC_LVT_M);
1369 	value |= APIC_LVTT_TM_ONE_SHOT;
1370 	la->lvt_timer_last = value;
1371 	lapic_write32(LAPIC_LVT_TIMER, value);
1372 	lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period);
1373 }
1374 
1375 static void
1376 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count)
1377 {
1378 	uint32_t value;
1379 
1380 	value = la->lvt_timer_base;
1381 	value &= ~APIC_LVTT_TM;
1382 	value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M;
1383 	la->lvt_timer_last = value;
1384 	lapic_write32(LAPIC_LVT_TIMER, value);
1385 	lapic_write32(LAPIC_ICR_TIMER, count);
1386 }
1387 
1388 static void
1389 lapic_timer_periodic(struct lapic *la)
1390 {
1391 	uint32_t value;
1392 
1393 	value = la->lvt_timer_base;
1394 	value &= ~(APIC_LVTT_TM | APIC_LVT_M);
1395 	value |= APIC_LVTT_TM_PERIODIC;
1396 	la->lvt_timer_last = value;
1397 	lapic_write32(LAPIC_LVT_TIMER, value);
1398 	lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period);
1399 }
1400 
1401 static void
1402 lapic_timer_deadline(struct lapic *la)
1403 {
1404 	uint32_t value;
1405 
1406 	value = la->lvt_timer_base;
1407 	value &= ~(APIC_LVTT_TM | APIC_LVT_M);
1408 	value |= APIC_LVTT_TM_TSCDLT;
1409 	if (value != la->lvt_timer_last) {
1410 		la->lvt_timer_last = value;
1411 		lapic_write32_nofence(LAPIC_LVT_TIMER, value);
1412 		if (!x2apic_mode)
1413 			mfence();
1414 	}
1415 	wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc());
1416 }
1417 
1418 static void
1419 lapic_timer_stop(struct lapic *la)
1420 {
1421 	uint32_t value;
1422 
1423 	if (la->la_timer_mode == LAT_MODE_DEADLINE) {
1424 		wrmsr(MSR_TSC_DEADLINE, 0);
1425 		mfence();
1426 	} else {
1427 		value = la->lvt_timer_base;
1428 		value &= ~APIC_LVTT_TM;
1429 		value |= APIC_LVT_M;
1430 		la->lvt_timer_last = value;
1431 		lapic_write32(LAPIC_LVT_TIMER, value);
1432 	}
1433 }
1434 
1435 void
1436 lapic_handle_cmc(void)
1437 {
1438 	trap_check_kstack();
1439 
1440 	lapic_eoi();
1441 	cmc_intr();
1442 }
1443 
1444 /*
1445  * Called from the mca_init() to activate the CMC interrupt if this CPU is
1446  * responsible for monitoring any MC banks for CMC events.  Since mca_init()
1447  * is called prior to lapic_setup() during boot, this just needs to unmask
1448  * this CPU's LVT_CMCI entry.
1449  */
1450 void
1451 lapic_enable_cmc(void)
1452 {
1453 	u_int apic_id;
1454 
1455 #ifdef DEV_ATPIC
1456 	if (!x2apic_mode && lapic_map == NULL)
1457 		return;
1458 #endif
1459 	apic_id = PCPU_GET(apic_id);
1460 	KASSERT(lapics[apic_id].la_present,
1461 	    ("%s: missing APIC %u", __func__, apic_id));
1462 	lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0;
1463 	lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1;
1464 }
1465 
1466 int
1467 lapic_enable_mca_elvt(void)
1468 {
1469 	u_int apic_id;
1470 	uint32_t value;
1471 	int elvt_count;
1472 
1473 #ifdef DEV_ATPIC
1474 	if (lapic_map == NULL)
1475 		return (-1);
1476 #endif
1477 
1478 	apic_id = PCPU_GET(apic_id);
1479 	KASSERT(lapics[apic_id].la_present,
1480 	    ("%s: missing APIC %u", __func__, apic_id));
1481 	elvt_count = amd_read_elvt_count();
1482 	if (elvt_count <= APIC_ELVT_MCA)
1483 		return (-1);
1484 
1485 	value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA);
1486 	if ((value & APIC_LVT_M) == 0) {
1487 		if (bootverbose)
1488 			printf("AMD MCE Thresholding Extended LVT is already active\n");
1489 		return (APIC_ELVT_MCA);
1490 	}
1491 	lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0;
1492 	lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1;
1493 	return (APIC_ELVT_MCA);
1494 }
1495 
1496 void
1497 lapic_handle_error(void)
1498 {
1499 	uint32_t esr;
1500 
1501 	trap_check_kstack();
1502 
1503 	/*
1504 	 * Read the contents of the error status register.  Write to
1505 	 * the register first before reading from it to force the APIC
1506 	 * to update its value to indicate any errors that have
1507 	 * occurred since the previous write to the register.
1508 	 */
1509 	lapic_write32(LAPIC_ESR, 0);
1510 	esr = lapic_read32(LAPIC_ESR);
1511 
1512 	printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
1513 	lapic_eoi();
1514 }
1515 
1516 u_int
1517 apic_cpuid(u_int apic_id)
1518 {
1519 #ifdef SMP
1520 	return apic_cpuids[apic_id];
1521 #else
1522 	return 0;
1523 #endif
1524 }
1525 
1526 /* Request a free IDT vector to be used by the specified IRQ. */
1527 u_int
1528 apic_alloc_vector(u_int apic_id, u_int irq)
1529 {
1530 	u_int vector;
1531 
1532 	KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq));
1533 
1534 	/*
1535 	 * Search for a free vector.  Currently we just use a very simple
1536 	 * algorithm to find the first free vector.
1537 	 */
1538 	mtx_lock_spin(&icu_lock);
1539 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
1540 		if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE)
1541 			continue;
1542 		lapics[apic_id].la_ioint_irqs[vector] = irq;
1543 		mtx_unlock_spin(&icu_lock);
1544 		return (vector + APIC_IO_INTS);
1545 	}
1546 	mtx_unlock_spin(&icu_lock);
1547 	return (0);
1548 }
1549 
1550 /*
1551  * Request 'count' free contiguous IDT vectors to be used by 'count'
1552  * IRQs.  'count' must be a power of two and the vectors will be
1553  * aligned on a boundary of 'align'.  If the request cannot be
1554  * satisfied, 0 is returned.
1555  */
1556 u_int
1557 apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
1558 {
1559 	u_int first, run, vector;
1560 
1561 	KASSERT(powerof2(count), ("bad count"));
1562 	KASSERT(powerof2(align), ("bad align"));
1563 	KASSERT(align >= count, ("align < count"));
1564 #ifdef INVARIANTS
1565 	for (run = 0; run < count; run++)
1566 		KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u",
1567 		    irqs[run], run));
1568 #endif
1569 
1570 	/*
1571 	 * Search for 'count' free vectors.  As with apic_alloc_vector(),
1572 	 * this just uses a simple first fit algorithm.
1573 	 */
1574 	run = 0;
1575 	first = 0;
1576 	mtx_lock_spin(&icu_lock);
1577 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
1578 		/* Vector is in use, end run. */
1579 		if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) {
1580 			run = 0;
1581 			first = 0;
1582 			continue;
1583 		}
1584 
1585 		/* Start a new run if run == 0 and vector is aligned. */
1586 		if (run == 0) {
1587 			if ((vector & (align - 1)) != 0)
1588 				continue;
1589 			first = vector;
1590 		}
1591 		run++;
1592 
1593 		/* Keep looping if the run isn't long enough yet. */
1594 		if (run < count)
1595 			continue;
1596 
1597 		/* Found a run, assign IRQs and return the first vector. */
1598 		for (vector = 0; vector < count; vector++)
1599 			lapics[apic_id].la_ioint_irqs[first + vector] =
1600 			    irqs[vector];
1601 		mtx_unlock_spin(&icu_lock);
1602 		return (first + APIC_IO_INTS);
1603 	}
1604 	mtx_unlock_spin(&icu_lock);
1605 	printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
1606 	return (0);
1607 }
1608 
1609 /*
1610  * Enable a vector for a particular apic_id.  Since all lapics share idt
1611  * entries and ioint_handlers this enables the vector on all lapics.  lapics
1612  * which do not have the vector configured would report spurious interrupts
1613  * should it fire.
1614  */
1615 void
1616 apic_enable_vector(u_int apic_id, u_int vector)
1617 {
1618 
1619 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
1620 	KASSERT(ioint_handlers[vector / 32] != NULL,
1621 	    ("No ISR handler for vector %u", vector));
1622 #ifdef KDTRACE_HOOKS
1623 	KASSERT(vector != IDT_DTRACE_RET,
1624 	    ("Attempt to overwrite DTrace entry"));
1625 #endif
1626 	setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32],
1627 	    SDT_APIC, SEL_KPL, GSEL_APIC);
1628 }
1629 
1630 void
1631 apic_disable_vector(u_int apic_id, u_int vector)
1632 {
1633 
1634 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
1635 #ifdef KDTRACE_HOOKS
1636 	KASSERT(vector != IDT_DTRACE_RET,
1637 	    ("Attempt to overwrite DTrace entry"));
1638 #endif
1639 	KASSERT(ioint_handlers[vector / 32] != NULL,
1640 	    ("No ISR handler for vector %u", vector));
1641 #ifdef notyet
1642 	/*
1643 	 * We can not currently clear the idt entry because other cpus
1644 	 * may have a valid vector at this offset.
1645 	 */
1646 	setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
1647 	    SEL_KPL, GSEL_APIC);
1648 #endif
1649 }
1650 
1651 /* Release an APIC vector when it's no longer in use. */
1652 void
1653 apic_free_vector(u_int apic_id, u_int vector, u_int irq)
1654 {
1655 	struct thread *td;
1656 
1657 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
1658 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
1659 	    ("Vector %u does not map to an IRQ line", vector));
1660 	KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq));
1661 	KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] ==
1662 	    irq, ("IRQ mismatch"));
1663 #ifdef KDTRACE_HOOKS
1664 	KASSERT(vector != IDT_DTRACE_RET,
1665 	    ("Attempt to overwrite DTrace entry"));
1666 #endif
1667 
1668 	/*
1669 	 * Bind us to the cpu that owned the vector before freeing it so
1670 	 * we don't lose an interrupt delivery race.
1671 	 */
1672 	td = curthread;
1673 	if (!rebooting) {
1674 		thread_lock(td);
1675 		if (sched_is_bound(td))
1676 			panic("apic_free_vector: Thread already bound.\n");
1677 		sched_bind(td, apic_cpuid(apic_id));
1678 		thread_unlock(td);
1679 	}
1680 	mtx_lock_spin(&icu_lock);
1681 	lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE;
1682 	mtx_unlock_spin(&icu_lock);
1683 	if (!rebooting) {
1684 		thread_lock(td);
1685 		sched_unbind(td);
1686 		thread_unlock(td);
1687 	}
1688 }
1689 
1690 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */
1691 static u_int
1692 apic_idt_to_irq(u_int apic_id, u_int vector)
1693 {
1694 	int irq;
1695 
1696 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
1697 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
1698 	    ("Vector %u does not map to an IRQ line", vector));
1699 #ifdef KDTRACE_HOOKS
1700 	KASSERT(vector != IDT_DTRACE_RET,
1701 	    ("Attempt to overwrite DTrace entry"));
1702 #endif
1703 	irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS];
1704 	if (irq < 0)
1705 		irq = 0;
1706 	return (irq);
1707 }
1708 
1709 #ifdef DDB
1710 /*
1711  * Dump data about APIC IDT vector mappings.
1712  */
1713 DB_SHOW_COMMAND_FLAGS(apic, db_show_apic, DB_CMD_MEMSAFE)
1714 {
1715 	struct intsrc *isrc;
1716 	int i, verbose;
1717 	u_int apic_id;
1718 	u_int irq;
1719 
1720 	if (strcmp(modif, "vv") == 0)
1721 		verbose = 2;
1722 	else if (strcmp(modif, "v") == 0)
1723 		verbose = 1;
1724 	else
1725 		verbose = 0;
1726 	for (apic_id = 0; apic_id <= max_apic_id; apic_id++) {
1727 		if (lapics[apic_id].la_present == 0)
1728 			continue;
1729 		db_printf("Interrupts bound to lapic %u\n", apic_id);
1730 		for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
1731 			irq = lapics[apic_id].la_ioint_irqs[i];
1732 			if (irq == IRQ_FREE || irq == IRQ_SYSCALL)
1733 				continue;
1734 #ifdef KDTRACE_HOOKS
1735 			if (irq == IRQ_DTRACE_RET)
1736 				continue;
1737 #endif
1738 #ifdef XENHVM
1739 			if (irq == IRQ_EVTCHN)
1740 				continue;
1741 #endif
1742 			db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
1743 			if (irq == IRQ_TIMER)
1744 				db_printf("lapic timer\n");
1745 			else if (irq < num_io_irqs) {
1746 				isrc = intr_lookup_source(irq);
1747 				if (isrc == NULL || verbose == 0)
1748 					db_printf("IRQ %u\n", irq);
1749 				else
1750 					db_dump_intr_event(isrc->is_event,
1751 					    verbose == 2);
1752 			} else
1753 				db_printf("IRQ %u ???\n", irq);
1754 		}
1755 	}
1756 }
1757 
1758 static void
1759 dump_mask(const char *prefix, uint32_t v, int base)
1760 {
1761 	int i, first;
1762 
1763 	first = 1;
1764 	for (i = 0; i < 32; i++)
1765 		if (v & (1 << i)) {
1766 			if (first) {
1767 				db_printf("%s:", prefix);
1768 				first = 0;
1769 			}
1770 			db_printf(" %02x", base + i);
1771 		}
1772 	if (!first)
1773 		db_printf("\n");
1774 }
1775 
1776 /* Show info from the lapic regs for this CPU. */
1777 DB_SHOW_COMMAND_FLAGS(lapic, db_show_lapic, DB_CMD_MEMSAFE)
1778 {
1779 	uint32_t v;
1780 
1781 	db_printf("lapic ID = %d\n", lapic_id());
1782 	v = lapic_read32(LAPIC_VERSION);
1783 	db_printf("version  = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
1784 	    v & 0xf);
1785 	db_printf("max LVT  = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
1786 	v = lapic_read32(LAPIC_SVR);
1787 	db_printf("SVR      = %02x (%s)\n", v & APIC_SVR_VECTOR,
1788 	    v & APIC_SVR_ENABLE ? "enabled" : "disabled");
1789 	db_printf("TPR      = %02x\n", lapic_read32(LAPIC_TPR));
1790 
1791 #define dump_field(prefix, regn, index)					\
1792 	dump_mask(__XSTRING(prefix ## index), 				\
1793 	    lapic_read32(LAPIC_ ## regn ## index),			\
1794 	    index * 32)
1795 
1796 	db_printf("In-service Interrupts:\n");
1797 	dump_field(isr, ISR, 0);
1798 	dump_field(isr, ISR, 1);
1799 	dump_field(isr, ISR, 2);
1800 	dump_field(isr, ISR, 3);
1801 	dump_field(isr, ISR, 4);
1802 	dump_field(isr, ISR, 5);
1803 	dump_field(isr, ISR, 6);
1804 	dump_field(isr, ISR, 7);
1805 
1806 	db_printf("TMR Interrupts:\n");
1807 	dump_field(tmr, TMR, 0);
1808 	dump_field(tmr, TMR, 1);
1809 	dump_field(tmr, TMR, 2);
1810 	dump_field(tmr, TMR, 3);
1811 	dump_field(tmr, TMR, 4);
1812 	dump_field(tmr, TMR, 5);
1813 	dump_field(tmr, TMR, 6);
1814 	dump_field(tmr, TMR, 7);
1815 
1816 	db_printf("IRR Interrupts:\n");
1817 	dump_field(irr, IRR, 0);
1818 	dump_field(irr, IRR, 1);
1819 	dump_field(irr, IRR, 2);
1820 	dump_field(irr, IRR, 3);
1821 	dump_field(irr, IRR, 4);
1822 	dump_field(irr, IRR, 5);
1823 	dump_field(irr, IRR, 6);
1824 	dump_field(irr, IRR, 7);
1825 
1826 #undef dump_field
1827 }
1828 #endif
1829 
1830 /*
1831  * APIC probing support code.  This includes code to manage enumerators.
1832  */
1833 
1834 static SLIST_HEAD(, apic_enumerator) enumerators =
1835 	SLIST_HEAD_INITIALIZER(enumerators);
1836 static struct apic_enumerator *best_enum;
1837 
1838 void
1839 apic_register_enumerator(struct apic_enumerator *enumerator)
1840 {
1841 #ifdef INVARIANTS
1842 	struct apic_enumerator *apic_enum;
1843 
1844 	SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
1845 		if (apic_enum == enumerator)
1846 			panic("%s: Duplicate register of %s", __func__,
1847 			    enumerator->apic_name);
1848 	}
1849 #endif
1850 	SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
1851 }
1852 
1853 /*
1854  * We have to look for CPU's very, very early because certain subsystems
1855  * want to know how many CPU's we have extremely early on in the boot
1856  * process.
1857  */
1858 static void
1859 apic_init(void *dummy __unused)
1860 {
1861 	struct apic_enumerator *enumerator;
1862 	int retval, best;
1863 
1864 	/* We only support built in local APICs. */
1865 	if (!(cpu_feature & CPUID_APIC))
1866 		return;
1867 
1868 	/* Don't probe if APIC mode is disabled. */
1869 	if (resource_disabled("apic", 0))
1870 		return;
1871 
1872 	/* Probe all the enumerators to find the best match. */
1873 	best_enum = NULL;
1874 	best = 0;
1875 	SLIST_FOREACH(enumerator, &enumerators, apic_next) {
1876 		retval = enumerator->apic_probe();
1877 		if (retval > 0)
1878 			continue;
1879 		if (best_enum == NULL || best < retval) {
1880 			best_enum = enumerator;
1881 			best = retval;
1882 		}
1883 	}
1884 	if (best_enum == NULL) {
1885 		if (bootverbose)
1886 			printf("APIC: Could not find any APICs.\n");
1887 #ifndef DEV_ATPIC
1888 		panic("running without device atpic requires a local APIC");
1889 #endif
1890 		return;
1891 	}
1892 
1893 	if (bootverbose)
1894 		printf("APIC: Using the %s enumerator.\n",
1895 		    best_enum->apic_name);
1896 
1897 #ifdef I686_CPU
1898 	/*
1899 	 * To work around an errata, we disable the local APIC on some
1900 	 * CPUs during early startup.  We need to turn the local APIC back
1901 	 * on on such CPUs now.
1902 	 */
1903 	ppro_reenable_apic();
1904 #endif
1905 
1906 	/* Probe the CPU's in the system. */
1907 	retval = best_enum->apic_probe_cpus();
1908 	if (retval != 0)
1909 		printf("%s: Failed to probe CPUs: returned %d\n",
1910 		    best_enum->apic_name, retval);
1911 
1912 }
1913 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL);
1914 
1915 /*
1916  * Setup the local APIC.  We have to do this prior to starting up the APs
1917  * in the SMP case.
1918  */
1919 static void
1920 apic_setup_local(void *dummy __unused)
1921 {
1922 	int retval;
1923 
1924 	if (best_enum == NULL)
1925 		return;
1926 
1927 	lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC,
1928 	    M_WAITOK | M_ZERO);
1929 
1930 	/* Initialize the local APIC. */
1931 	retval = best_enum->apic_setup_local();
1932 	if (retval != 0)
1933 		printf("%s: Failed to setup the local APIC: returned %d\n",
1934 		    best_enum->apic_name, retval);
1935 }
1936 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL);
1937 
1938 /*
1939  * Setup the I/O APICs.
1940  */
1941 static void
1942 apic_setup_io(void *dummy __unused)
1943 {
1944 	int retval;
1945 
1946 	if (best_enum == NULL)
1947 		return;
1948 
1949 	/*
1950 	 * Local APIC must be registered before other PICs and pseudo PICs
1951 	 * for proper suspend/resume order.
1952 	 */
1953 	intr_register_pic(&lapic_pic);
1954 
1955 	retval = best_enum->apic_setup_io();
1956 	if (retval != 0)
1957 		printf("%s: Failed to setup I/O APICs: returned %d\n",
1958 		    best_enum->apic_name, retval);
1959 
1960 	/*
1961 	 * Finish setting up the local APIC on the BSP once we know
1962 	 * how to properly program the LINT pins.  In particular, this
1963 	 * enables the EOI suppression mode, if LAPIC supports it and
1964 	 * user did not disable the mode.
1965 	 */
1966 	lapic_setup(1);
1967 	if (bootverbose)
1968 		lapic_dump("BSP");
1969 
1970 	/* Enable the MSI "pic". */
1971 	msi_init();
1972 
1973 #ifdef XENHVM
1974 	xen_intr_alloc_irqs();
1975 #endif
1976 }
1977 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL);
1978 
1979 #ifdef SMP
1980 /*
1981  * Inter Processor Interrupt functions.  The lapic_ipi_*() functions are
1982  * private to the MD code.  The public interface for the rest of the
1983  * kernel is defined in mp_machdep.c.
1984  */
1985 
1986 /*
1987  * Wait delay microseconds for IPI to be sent.  If delay is -1, we
1988  * wait forever.
1989  */
1990 int
1991 lapic_ipi_wait(int delay)
1992 {
1993 	uint64_t rx;
1994 
1995 	/* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */
1996 	if (x2apic_mode)
1997 		return (1);
1998 
1999 	for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) {
2000 		if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) ==
2001 		    APIC_DELSTAT_IDLE)
2002 			return (1);
2003 		ia32_pause();
2004 	}
2005 	return (0);
2006 }
2007 
2008 void
2009 lapic_ipi_raw(register_t icrlo, u_int dest)
2010 {
2011 	uint32_t icrhi;
2012 
2013 	/* XXX: Need more sanity checking of icrlo? */
2014 	KASSERT(x2apic_mode || lapic_map != NULL,
2015 	    ("%s called too early", __func__));
2016 	KASSERT(x2apic_mode ||
2017 	    (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
2018 	    ("%s: invalid dest field", __func__));
2019 	KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
2020 	    ("%s: reserved bits set in ICR LO register", __func__));
2021 
2022 	if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
2023 		if (x2apic_mode)
2024 			icrhi = dest;
2025 		else
2026 			icrhi = dest << APIC_ID_SHIFT;
2027 		lapic_write_icr(icrhi, icrlo);
2028 	} else {
2029 		lapic_write_icr_lo(icrlo);
2030 	}
2031 }
2032 
2033 #ifdef DETECT_DEADLOCK
2034 #define	AFTER_SPIN	50
2035 #endif
2036 
2037 static void
2038 native_lapic_ipi_vectored(u_int vector, int dest)
2039 {
2040 	register_t icrlo, destfield;
2041 
2042 	KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
2043 	    ("%s: invalid vector %d", __func__, vector));
2044 
2045 	destfield = 0;
2046 	switch (dest) {
2047 	case APIC_IPI_DEST_SELF:
2048 		if (x2apic_mode && vector < IPI_NMI_FIRST) {
2049 			lapic_write_self_ipi(vector);
2050 			return;
2051 		}
2052 		icrlo = APIC_DEST_SELF;
2053 		break;
2054 	case APIC_IPI_DEST_ALL:
2055 		icrlo = APIC_DEST_ALLISELF;
2056 		break;
2057 	case APIC_IPI_DEST_OTHERS:
2058 		icrlo = APIC_DEST_ALLESELF;
2059 		break;
2060 	default:
2061 		icrlo = 0;
2062 		KASSERT(x2apic_mode ||
2063 		    (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
2064 		    ("%s: invalid destination 0x%x", __func__, dest));
2065 		destfield = dest;
2066 	}
2067 
2068 	/*
2069 	 * NMI IPIs are just fake vectors used to send a NMI.  Use special rules
2070 	 * regarding NMIs if passed, otherwise specify the vector.
2071 	 */
2072 	if (vector >= IPI_NMI_FIRST)
2073 		icrlo |= APIC_DELMODE_NMI;
2074 	else
2075 		icrlo |= vector | APIC_DELMODE_FIXED;
2076 	icrlo |= APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT;
2077 
2078 	/* Wait for an earlier IPI to finish. */
2079 	if (!lapic_ipi_wait(lapic_ds_idle_timeout)) {
2080 		if (KERNEL_PANICKED())
2081 			return;
2082 		else
2083 			panic("APIC: Previous IPI is stuck");
2084 	}
2085 
2086 	lapic_ipi_raw(icrlo, destfield);
2087 
2088 #ifdef DETECT_DEADLOCK
2089 	/* Wait for IPI to be delivered. */
2090 	if (!lapic_ipi_wait(AFTER_SPIN)) {
2091 #ifdef needsattention
2092 		/*
2093 		 * XXX FIXME:
2094 		 *
2095 		 * The above function waits for the message to actually be
2096 		 * delivered.  It breaks out after an arbitrary timeout
2097 		 * since the message should eventually be delivered (at
2098 		 * least in theory) and that if it wasn't we would catch
2099 		 * the failure with the check above when the next IPI is
2100 		 * sent.
2101 		 *
2102 		 * We could skip this wait entirely, EXCEPT it probably
2103 		 * protects us from other routines that assume that the
2104 		 * message was delivered and acted upon when this function
2105 		 * returns.
2106 		 */
2107 		printf("APIC: IPI might be stuck\n");
2108 #else /* !needsattention */
2109 		/* Wait until mesage is sent without a timeout. */
2110 		while (lapic_read_icr_lo() & APIC_DELSTAT_PEND)
2111 			ia32_pause();
2112 #endif /* needsattention */
2113 	}
2114 #endif /* DETECT_DEADLOCK */
2115 }
2116 
2117 void (*ipi_vectored)(u_int, int) = &native_lapic_ipi_vectored;
2118 #endif /* SMP */
2119 
2120 /*
2121  * Since the IDT is shared by all CPUs the IPI slot update needs to be globally
2122  * visible.
2123  *
2124  * Consider the case where an IPI is generated immediately after allocation:
2125  *     vector = lapic_ipi_alloc(ipifunc);
2126  *     ipi_selected(other_cpus, vector);
2127  *
2128  * In xAPIC mode a write to ICR_LO has serializing semantics because the
2129  * APIC page is mapped as an uncached region. In x2APIC mode there is an
2130  * explicit 'mfence' before the ICR MSR is written. Therefore in both cases
2131  * the IDT slot update is globally visible before the IPI is delivered.
2132  */
2133 int
2134 lapic_ipi_alloc(inthand_t *ipifunc)
2135 {
2136 	struct gate_descriptor *ip;
2137 	long func;
2138 	int idx, vector;
2139 
2140 	KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti),
2141 	    ("invalid ipifunc %p", ipifunc));
2142 
2143 	vector = -1;
2144 	mtx_lock_spin(&icu_lock);
2145 	for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) {
2146 		ip = &idt[idx];
2147 		func = (ip->gd_hioffset << 16) | ip->gd_looffset;
2148 #ifdef __i386__
2149 		func -= setidt_disp;
2150 #endif
2151 		if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) ||
2152 		    (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) {
2153 			vector = idx;
2154 			setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC);
2155 			break;
2156 		}
2157 	}
2158 	mtx_unlock_spin(&icu_lock);
2159 	return (vector);
2160 }
2161 
2162 void
2163 lapic_ipi_free(int vector)
2164 {
2165 	struct gate_descriptor *ip;
2166 	long func __diagused;
2167 
2168 	KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST,
2169 	    ("%s: invalid vector %d", __func__, vector));
2170 
2171 	mtx_lock_spin(&icu_lock);
2172 	ip = &idt[vector];
2173 	func = (ip->gd_hioffset << 16) | ip->gd_looffset;
2174 #ifdef __i386__
2175 	func -= setidt_disp;
2176 #endif
2177 	KASSERT(func != (uintptr_t)&IDTVEC(rsvd) &&
2178 	    func != (uintptr_t)&IDTVEC(rsvd_pti),
2179 	    ("invalid idtfunc %#lx", func));
2180 	setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
2181 	    SEL_KPL, GSEL_APIC);
2182 	mtx_unlock_spin(&icu_lock);
2183 }
2184