xref: /dragonfly/sys/platform/pc64/apic/lapic.c (revision a3127495)
1 /*
2  * Copyright (c) 1996, by Steve Passe
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. The name of the developer may NOT be used to endorse or promote products
11  *    derived from this software without specific prior written permission.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/bus.h>
32 #include <sys/machintr.h>
33 #include <machine/globaldata.h>
34 #include <machine/clock.h>
35 #include <machine/limits.h>
36 #include <machine/smp.h>
37 #include <machine/md_var.h>
38 #include <machine/pmap.h>
39 #include <machine/specialreg.h>
40 #include <machine_base/apic/lapic.h>
41 #include <machine_base/apic/ioapic.h>
42 #include <machine_base/apic/ioapic_abi.h>
43 #include <machine_base/apic/apicvar.h>
44 #include <machine_base/icu/icu_var.h>
45 #include <machine/segments.h>
46 #include <sys/thread2.h>
47 #include <sys/spinlock2.h>
48 
49 #include <machine/cputypes.h>
50 #include <machine/intr_machdep.h>
51 
52 extern int naps;
53 
54 volatile lapic_t *lapic;
55 
56 static void	lapic_timer_calibrate(void);
57 static void	lapic_timer_set_divisor(int);
58 static void	lapic_timer_fixup_handler(void *);
59 static void	lapic_timer_restart_handler(void *);
60 
61 
62 static int	lapic_timer_enable = 1;
63 TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable);
64 
65 static int	lapic_timer_tscdeadline = 1;
66 TUNABLE_INT("hw.lapic_timer_tscdeadline", &lapic_timer_tscdeadline);
67 
68 static void	lapic_timer_tscdlt_reload(struct cputimer_intr *, sysclock_t);
69 static void	lapic_timer_intr_reload(struct cputimer_intr *, sysclock_t);
70 static void	lapic_timer_intr_enable(struct cputimer_intr *);
71 static void	lapic_timer_intr_restart(struct cputimer_intr *);
72 static void	lapic_timer_intr_pmfixup(struct cputimer_intr *);
73 
74 static struct cputimer_intr lapic_cputimer_intr = {
75 	.freq = 0,
76 	.reload = lapic_timer_intr_reload,
77 	.enable = lapic_timer_intr_enable,
78 	.config = cputimer_intr_default_config,
79 	.restart = lapic_timer_intr_restart,
80 	.pmfixup = lapic_timer_intr_pmfixup,
81 	.initclock = cputimer_intr_default_initclock,
82 	.pcpuhand = NULL,
83 	.next = SLIST_ENTRY_INITIALIZER,
84 	.name = "lapic",
85 	.type = CPUTIMER_INTR_LAPIC,
86 	.prio = CPUTIMER_INTR_PRIO_LAPIC,
87 	.caps = CPUTIMER_INTR_CAP_NONE,
88 	.priv = NULL
89 };
90 
91 static int		lapic_timer_divisor_idx = -1;
92 static const uint32_t	lapic_timer_divisors[] = {
93 	APIC_TDCR_2,	APIC_TDCR_4,	APIC_TDCR_8,	APIC_TDCR_16,
94 	APIC_TDCR_32,	APIC_TDCR_64,	APIC_TDCR_128,	APIC_TDCR_1
95 };
96 #define APIC_TIMER_NDIVISORS (int)(NELEM(lapic_timer_divisors))
97 
98 static int	lapic_use_tscdeadline = 0;
99 /* The raw TSC frequency might not fit into a sysclock_t value. */
100 static int	lapic_timer_tscfreq_shift;
101 
102 /*
103  * APIC ID <-> CPU ID mapping structures.
104  */
105 int	cpu_id_to_apic_id[NAPICID];
106 int	apic_id_to_cpu_id[NAPICID];
107 int	lapic_enable = 1;
108 
109 /* Separate cachelines for each cpu's info. */
110 struct deadlines {
111 	uint64_t timestamp;
112 	uint64_t downcount_time;
113 	uint64_t padding[6];
114 };
115 struct deadlines *tsc_deadlines = NULL;
116 
117 /*
118  * Enable LAPIC, configure interrupts.
119  */
120 void
121 lapic_init(boolean_t bsp)
122 {
123 	uint32_t timer;
124 	u_int   temp;
125 
126 	if (bsp) {
127 		/* Decide whether we want to use TSC Deadline mode. */
128 		if (lapic_timer_tscdeadline != 0 &&
129 		    (cpu_feature2 & CPUID2_TSCDLT) &&
130 		    tsc_invariant && tsc_frequency != 0) {
131 			lapic_use_tscdeadline = 1;
132 			tsc_deadlines = kmalloc_cachealign(
133 			    sizeof(struct deadlines) * (naps + 1),
134 			    M_DEVBUF, M_WAITOK | M_ZERO);
135 		}
136 	}
137 
138 	/*
139 	 * Install vectors
140 	 *
141 	 * Since IDT is shared between BSP and APs, these vectors
142 	 * only need to be installed once; we do it on BSP.
143 	 */
144 	if (bsp) {
145 		if (cpu_vendor_id == CPU_VENDOR_AMD &&
146 		    CPUID_TO_FAMILY(cpu_id) >= 0x0f &&
147 		    CPUID_TO_FAMILY(cpu_id) < 0x17) {	/* XXX */
148 			uint32_t tcr;
149 
150 			/*
151 			 * Set the LINTEN bit in the HyperTransport
152 			 * Transaction Control Register.
153 			 *
154 			 * This will cause EXTINT and NMI interrupts
155 			 * routed over the hypertransport bus to be
156 			 * fed into the LAPIC LINT0/LINT1.  If the bit
157 			 * isn't set, the interrupts will go to the
158 			 * general cpu INTR/NMI pins.  On a dual-core
159 			 * cpu the interrupt winds up going to BOTH cpus.
160 			 * The first cpu that does the interrupt ack
161 			 * cycle will get the correct interrupt.  The
162 			 * second cpu that does it will get a spurious
163 			 * interrupt vector (typically IRQ 7).
164 			 */
165 			outl(0x0cf8,
166 			    (1 << 31) |	/* enable */
167 			    (0 << 16) |	/* bus */
168 			    (0x18 << 11) | /* dev (cpu + 0x18) */
169 			    (0 << 8) |	/* func */
170 			    0x68	/* reg */
171 			    );
172 			tcr = inl(0xcfc);
173 			if ((tcr & 0x00010000) == 0) {
174 				kprintf("LAPIC: AMD LINTEN on\n");
175 				outl(0xcfc, tcr|0x00010000);
176 			}
177 			outl(0x0cf8, 0);
178 		}
179 
180 		/* Install a 'Spurious INTerrupt' vector */
181 		setidt_global(XSPURIOUSINT_OFFSET, Xspuriousint,
182 		    SDT_SYSIGT, SEL_KPL, 0);
183 
184 		/* Install a timer vector */
185 		setidt_global(XTIMER_OFFSET, Xtimer,
186 		    SDT_SYSIGT, SEL_KPL, 0);
187 
188 		/* Install an inter-CPU IPI for TLB invalidation */
189 		setidt_global(XINVLTLB_OFFSET, Xinvltlb,
190 		    SDT_SYSIGT, SEL_KPL, 0);
191 
192 		/* Install an inter-CPU IPI for IPIQ messaging */
193 		setidt_global(XIPIQ_OFFSET, Xipiq,
194 		    SDT_SYSIGT, SEL_KPL, 0);
195 
196 		/* Install an inter-CPU IPI for CPU stop/restart */
197 		setidt_global(XCPUSTOP_OFFSET, Xcpustop,
198 		    SDT_SYSIGT, SEL_KPL, 0);
199 
200 		/* Install an inter-CPU IPI for TLB invalidation */
201 		setidt_global(XSNIFF_OFFSET, Xsniff,
202 		    SDT_SYSIGT, SEL_KPL, 0);
203 	}
204 
205 	/*
206 	 * Setup LINT0 as ExtINT on the BSP.  This is theoretically an
207 	 * aggregate interrupt input from the 8259.  The INTA cycle
208 	 * will be routed to the external controller (the 8259) which
209 	 * is expected to supply the vector.
210 	 *
211 	 * Must be setup edge triggered, active high.
212 	 *
213 	 * Disable LINT0 on BSP, if I/O APIC is enabled.
214 	 *
215 	 * Disable LINT0 on the APs.  It doesn't matter what delivery
216 	 * mode we use because we leave it masked.
217 	 */
218 	temp = lapic->lvt_lint0;
219 	temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK |
220 		  APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK);
221 	if (bsp) {
222 		temp |= APIC_LVT_DM_EXTINT;
223 		if (ioapic_enable)
224 			temp |= APIC_LVT_MASKED;
225 	} else {
226 		temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED;
227 	}
228 	lapic->lvt_lint0 = temp;
229 
230 	/*
231 	 * Setup LINT1 as NMI.
232 	 *
233 	 * Must be setup edge trigger, active high.
234 	 *
235 	 * Enable LINT1 on BSP, if I/O APIC is enabled.
236 	 *
237 	 * Disable LINT1 on the APs.
238 	 */
239 	temp = lapic->lvt_lint1;
240 	temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK |
241 		  APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK);
242 	temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI;
243 	if (bsp && ioapic_enable)
244 		temp &= ~APIC_LVT_MASKED;
245 	lapic->lvt_lint1 = temp;
246 
247 	/*
248 	 * Mask the LAPIC error interrupt, LAPIC performance counter
249 	 * interrupt.
250 	 */
251 	lapic->lvt_error = lapic->lvt_error | APIC_LVT_MASKED;
252 	lapic->lvt_pcint = lapic->lvt_pcint | APIC_LVT_MASKED;
253 
254 	/*
255 	 * Set LAPIC timer vector and mask the LAPIC timer interrupt.
256 	 */
257 	timer = lapic->lvt_timer;
258 	timer &= ~APIC_LVTT_VECTOR;
259 	timer |= XTIMER_OFFSET;
260 	timer |= APIC_LVTT_MASKED;
261 	lapic->lvt_timer = timer;
262 
263 	/*
264 	 * Set the Task Priority Register as needed.   At the moment allow
265 	 * interrupts on all cpus (the APs will remain CLId until they are
266 	 * ready to deal).
267 	 */
268 	temp = lapic->tpr;
269 	temp &= ~APIC_TPR_PRIO;		/* clear priority field */
270 	lapic->tpr = temp;
271 
272 	/*
273 	 * AMD specific setup
274 	 */
275 	if (cpu_vendor_id == CPU_VENDOR_AMD &&
276 	    (lapic->version & APIC_VER_AMD_EXT_SPACE)) {
277 		uint32_t ext_feat;
278 		uint32_t count;
279 		uint32_t max_count;
280 		uint32_t lvt;
281 		uint32_t i;
282 
283 		ext_feat = lapic->ext_feat;
284 		count = (ext_feat & APIC_EXTFEAT_MASK) >> APIC_EXTFEAT_SHIFT;
285 		max_count = sizeof(lapic->ext_lvt) / sizeof(lapic->ext_lvt[0]);
286 		if (count > max_count)
287 			count = max_count;
288 		for (i = 0; i < count; ++i) {
289 			lvt = lapic->ext_lvt[i].lvt;
290 
291 			lvt &= ~(APIC_LVT_POLARITY_MASK | APIC_LVT_TRIG_MASK |
292 				 APIC_LVT_DM_MASK | APIC_LVT_MASKED);
293 			lvt |= APIC_LVT_MASKED | APIC_LVT_DM_FIXED;
294 
295 			switch(i) {
296 			case APIC_EXTLVT_IBS:
297 				break;
298 			case APIC_EXTLVT_MCA:
299 				break;
300 			case APIC_EXTLVT_DEI:
301 				break;
302 			case APIC_EXTLVT_SBI:
303 				break;
304 			default:
305 				break;
306 			}
307 			if (bsp) {
308 				kprintf("   LAPIC AMD elvt%d: 0x%08x",
309 					i, lapic->ext_lvt[i].lvt);
310 				if (lapic->ext_lvt[i].lvt != lvt)
311 					kprintf(" -> 0x%08x", lvt);
312 				kprintf("\n");
313 			}
314 			lapic->ext_lvt[i].lvt = lvt;
315 		}
316 	}
317 
318 	/*
319 	 * Enable the LAPIC
320 	 */
321 	temp = lapic->svr;
322 	temp |= APIC_SVR_ENABLE;	/* enable the LAPIC */
323 	temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */
324 
325 	if (lapic->version & APIC_VER_EOI_SUPP) {
326 		if (temp & APIC_SVR_EOI_SUPP) {
327 			temp &= ~APIC_SVR_EOI_SUPP;
328 			if (bsp)
329 				kprintf("    LAPIC disabling EOI supp\n");
330 		}
331 	}
332 
333 	/*
334 	 * Set the spurious interrupt vector.  The low 4 bits of the vector
335 	 * must be 1111.
336 	 */
337 	if ((XSPURIOUSINT_OFFSET & 0x0F) != 0x0F)
338 		panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET);
339 	temp &= ~APIC_SVR_VECTOR;
340 	temp |= XSPURIOUSINT_OFFSET;
341 
342 	lapic->svr = temp;
343 
344 	/*
345 	 * Pump out a few EOIs to clean out interrupts that got through
346 	 * before we were able to set the TPR.
347 	 */
348 	lapic->eoi = 0;
349 	lapic->eoi = 0;
350 	lapic->eoi = 0;
351 
352 	if (bsp) {
353 		lapic_timer_calibrate();
354 		if (lapic_timer_enable) {
355 			if (cpu_thermal_feature & CPUID_THERMAL_ARAT) {
356 				/*
357 				 * Local APIC timer will not stop
358 				 * in deep C-state.
359 				 */
360 				lapic_cputimer_intr.caps |=
361 				    CPUTIMER_INTR_CAP_PS;
362 			}
363 			if (lapic_use_tscdeadline) {
364 				lapic_cputimer_intr.reload =
365 				    lapic_timer_tscdlt_reload;
366 			}
367 			cputimer_intr_register(&lapic_cputimer_intr);
368 			cputimer_intr_select(&lapic_cputimer_intr, 0);
369 		}
370 	} else if (!lapic_use_tscdeadline) {
371 		lapic_timer_set_divisor(lapic_timer_divisor_idx);
372 	}
373 
374 	if (bootverbose)
375 		apic_dump("apic_initialize()");
376 }
377 
378 static void
379 lapic_timer_set_divisor(int divisor_idx)
380 {
381 	KKASSERT(divisor_idx >= 0 && divisor_idx < APIC_TIMER_NDIVISORS);
382 	lapic->dcr_timer = lapic_timer_divisors[divisor_idx];
383 }
384 
385 static void
386 lapic_timer_oneshot(u_int count)
387 {
388 	uint32_t value;
389 
390 	value = lapic->lvt_timer;
391 	value &= ~(APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT);
392 	lapic->lvt_timer = value;
393 	lapic->icr_timer = count;
394 }
395 
396 static void
397 lapic_timer_oneshot_quick(u_int count)
398 {
399 	lapic->icr_timer = count;
400 }
401 
402 static void
403 lapic_timer_tscdeadline_quick(uint64_t diff)
404 {
405 	uint64_t val = rdtsc() + diff;
406 
407 	wrmsr(MSR_TSC_DEADLINE, val);
408 	tsc_deadlines[mycpuid].timestamp = val;
409 }
410 
411 static uint64_t
412 lapic_scale_to_tsc(unsigned value, unsigned scale)
413 {
414 	uint64_t val;
415 
416 	val = value;
417 	val *= tsc_frequency;
418 	val += (scale - 1);
419 	val /= scale;
420 	return val;
421 }
422 
423 static void
424 lapic_timer_calibrate(void)
425 {
426 	sysclock_t value;
427 
428 	/* No need to calibrate lapic_timer, if we will use TSC Deadline mode */
429 	if (lapic_use_tscdeadline) {
430 		lapic_timer_tscfreq_shift = 0;
431 		while ((tsc_frequency >> lapic_timer_tscfreq_shift) > INT_MAX)
432 			lapic_timer_tscfreq_shift++;
433 		lapic_cputimer_intr.freq =
434 		    tsc_frequency >> lapic_timer_tscfreq_shift;
435 		kprintf(
436 		    "lapic: TSC Deadline Mode: shift %d, frequency %u Hz\n",
437 		    lapic_timer_tscfreq_shift, lapic_cputimer_intr.freq);
438 		return;
439 	}
440 
441 	/* Try to calibrate the local APIC timer. */
442 	for (lapic_timer_divisor_idx = 0;
443 	     lapic_timer_divisor_idx < APIC_TIMER_NDIVISORS;
444 	     lapic_timer_divisor_idx++) {
445 		lapic_timer_set_divisor(lapic_timer_divisor_idx);
446 		lapic_timer_oneshot(APIC_TIMER_MAX_COUNT);
447 		DELAY(2000000);
448 		value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer;
449 		if (value != APIC_TIMER_MAX_COUNT)
450 			break;
451 	}
452 	if (lapic_timer_divisor_idx >= APIC_TIMER_NDIVISORS)
453 		panic("lapic: no proper timer divisor?!");
454 	lapic_cputimer_intr.freq = value / 2;
455 
456 	kprintf("lapic: divisor index %d, frequency %u Hz\n",
457 		lapic_timer_divisor_idx, lapic_cputimer_intr.freq);
458 }
459 
460 static void
461 lapic_timer_tscdlt_reload(struct cputimer_intr *cti, sysclock_t reload)
462 {
463 	struct globaldata *gd = mycpu;
464 	uint64_t diff, now, val;
465 
466 	if (reload > 1000*1000*1000)
467 		reload = 1000*1000*1000;
468 	diff = (uint64_t)reload * tsc_frequency / sys_cputimer->freq;
469 	if (diff < 4)
470 		diff = 4;
471 	if (cpu_vendor_id == CPU_VENDOR_INTEL)
472 		cpu_lfence();
473 	else
474 		cpu_mfence();
475 	now = rdtsc();
476 	val = now + diff;
477 	if (gd->gd_timer_running) {
478 		uint64_t deadline = tsc_deadlines[mycpuid].timestamp;
479 		if (deadline == 0 || now > deadline || val < deadline) {
480 			wrmsr(MSR_TSC_DEADLINE, val);
481 			tsc_deadlines[mycpuid].timestamp = val;
482 		}
483 	} else {
484 		gd->gd_timer_running = 1;
485 		wrmsr(MSR_TSC_DEADLINE, val);
486 		tsc_deadlines[mycpuid].timestamp = val;
487 	}
488 }
489 
490 static void
491 lapic_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
492 {
493 	struct globaldata *gd = mycpu;
494 
495 	reload = (int64_t)reload * cti->freq / sys_cputimer->freq;
496 	if (reload < 2)
497 		reload = 2;
498 
499 	if (gd->gd_timer_running) {
500 		if (reload < lapic->ccr_timer)
501 			lapic_timer_oneshot_quick(reload);
502 	} else {
503 		gd->gd_timer_running = 1;
504 		lapic_timer_oneshot_quick(reload);
505 	}
506 }
507 
508 static void
509 lapic_timer_intr_enable(struct cputimer_intr *cti __unused)
510 {
511 	uint32_t timer;
512 
513 	timer = lapic->lvt_timer;
514 	timer &= ~(APIC_LVTT_MASKED | APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT);
515 	if (lapic_use_tscdeadline)
516 		timer |= APIC_LVTT_TSCDLT;
517 	lapic->lvt_timer = timer;
518 	if (lapic_use_tscdeadline)
519 		cpu_mfence();
520 
521 	lapic_timer_fixup_handler(NULL);
522 }
523 
524 static void
525 lapic_timer_fixup_handler(void *arg)
526 {
527 	int *started = arg;
528 
529 	if (started != NULL)
530 		*started = 0;
531 
532 	if (cpu_vendor_id == CPU_VENDOR_AMD) {
533 		/*
534 		 * Detect the presence of C1E capability mostly on latest
535 		 * dual-cores (or future) k8 family.  This feature renders
536 		 * the local APIC timer dead, so we disable it by reading
537 		 * the Interrupt Pending Message register and clearing both
538 		 * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
539 		 *
540 		 * Reference:
541 		 *   "BIOS and Kernel Developer's Guide for AMD NPT
542 		 *    Family 0Fh Processors"
543 		 *   #32559 revision 3.00
544 		 */
545 		if ((cpu_id & 0x00000f00) == 0x00000f00 &&
546 		    (cpu_id & 0x0fff0000) >= 0x00040000) {
547 			uint64_t msr;
548 
549 			msr = rdmsr(0xc0010055);
550 			if (msr & 0x18000000) {
551 				struct globaldata *gd = mycpu;
552 
553 				kprintf("cpu%d: AMD C1E detected\n",
554 					gd->gd_cpuid);
555 				wrmsr(0xc0010055, msr & ~0x18000000ULL);
556 
557 				/*
558 				 * We are kinda stalled;
559 				 * kick start again.
560 				 */
561 				gd->gd_timer_running = 1;
562 				if (lapic_use_tscdeadline) {
563 					/* Maybe reached in Virtual Machines? */
564 					lapic_timer_tscdeadline_quick(5000);
565 				} else {
566 					lapic_timer_oneshot_quick(2);
567 				}
568 
569 				if (started != NULL)
570 					*started = 1;
571 			}
572 		}
573 	}
574 }
575 
576 static void
577 lapic_timer_restart_handler(void *dummy __unused)
578 {
579 	int started;
580 
581 	lapic_timer_fixup_handler(&started);
582 	if (!started) {
583 		struct globaldata *gd = mycpu;
584 
585 		gd->gd_timer_running = 1;
586 		if (lapic_use_tscdeadline) {
587 			/* Maybe reached in Virtual Machines? */
588 			lapic_timer_tscdeadline_quick(5000);
589 		} else {
590 			lapic_timer_oneshot_quick(2);
591 		}
592 	}
593 }
594 
595 /*
596  * This function is called only by ACPICA code currently:
597  * - AMD C1E fixup.  AMD C1E only seems to happen after ACPI
598  *   module controls PM.  So once ACPICA is attached, we try
599  *   to apply the fixup to prevent LAPIC timer from hanging.
600  */
601 static void
602 lapic_timer_intr_pmfixup(struct cputimer_intr *cti __unused)
603 {
604 	lwkt_send_ipiq_mask(smp_active_mask,
605 			    lapic_timer_fixup_handler, NULL);
606 }
607 
608 static void
609 lapic_timer_intr_restart(struct cputimer_intr *cti __unused)
610 {
611 	lwkt_send_ipiq_mask(smp_active_mask, lapic_timer_restart_handler, NULL);
612 }
613 
614 
615 /*
616  * dump contents of local APIC registers
617  */
618 void
619 apic_dump(char* str)
620 {
621 	kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str);
622 	kprintf("     lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
623 		lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
624 }
625 
626 /*
627  * Inter Processor Interrupt functions.
628  */
629 
630 /*
631  * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'.
632  *
633  *  destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF
634  *  vector is any valid SYSTEM INT vector
635  *  delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO
636  *
637  * WARNINGS!
638  *
639  * We now implement a per-cpu interlock (gd->gd_npoll) to prevent more than
640  * one IPI from being sent to any given cpu at a time.  Thus we no longer
641  * have to process incoming IPIs while waiting for the status to clear.
642  * No deadlock should be possible.
643  *
644  * We now physically disable interrupts for the lapic ICR operation.  If
645  * we do not do this then it looks like an EOI sent to the lapic (which
646  * occurs even with a critical section) can interfere with the command
647  * register ready status and cause an IPI to be lost.
648  *
649  * e.g. an interrupt can occur, issue the EOI, IRET, and cause the command
650  * register to busy just before we write to icr_lo, resulting in a lost
651  * issuance.  This only appears to occur on Intel cpus and is not
652  * documented.  It could simply be that cpus are so fast these days that
653  * it was always an issue, but is only now rearing its ugly head.  This
654  * is conjecture.
655  */
656 int
657 apic_ipi(int dest_type, int vector, int delivery_mode)
658 {
659 	uint32_t icr_hi;
660 	uint32_t icr_lo;
661 	int64_t tsc;
662 	int loops = 1;
663 
664 	if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
665 		tsc = rdtsc();
666 		while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
667 			cpu_pause();
668 			if ((tsc_sclock_t)(rdtsc() -
669 					   (tsc + tsc_frequency)) > 0) {
670 				kprintf("apic_ipi stall cpu %d (sing)\n",
671 					mycpuid);
672 				tsc = rdtsc();
673 				if (++loops > 30)
674 					panic("apic stall");
675 			}
676 		}
677 	}
678 	icr_hi = lapic->icr_hi & ~APIC_ID_MASK;
679 	icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) | dest_type |
680 		 APIC_LEVEL_ASSERT | delivery_mode | vector;
681 	lapic->icr_hi = icr_hi;
682 	lapic->icr_lo = icr_lo;
683 
684 	return 0;
685 }
686 
687 /*
688  * Interrupts must be hard-disabled by caller
689  */
690 void
691 single_apic_ipi(int cpu, int vector, int delivery_mode)
692 {
693 	uint32_t  icr_lo;
694 	uint32_t  icr_hi;
695 	int64_t tsc;
696 	int loops = 1;
697 
698 	if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
699 		tsc = rdtsc();
700 		while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
701 			cpu_pause();
702 			if ((tsc_sclock_t)(rdtsc() -
703 					   (tsc + tsc_frequency)) > 0) {
704 				kprintf("single_apic_ipi stall cpu %d (sing)\n",
705 					mycpuid);
706 				tsc = rdtsc();
707 				if (++loops > 30)
708 					panic("apic stall");
709 			}
710 		}
711 	}
712 	icr_hi = lapic->icr_hi & ~APIC_ID_MASK;
713 	icr_hi |= (CPUID_TO_APICID(cpu) << 24);
714 
715 	/* build ICR_LOW */
716 	icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) |
717 		 APIC_LEVEL_ASSERT | APIC_DEST_DESTFLD | delivery_mode | vector;
718 
719 	/* write APIC ICR */
720 	lapic->icr_hi = icr_hi;
721 	lapic->icr_lo = icr_lo;
722 }
723 
724 #if 0
725 
726 /*
727  * Returns 0 if the apic is busy, 1 if we were able to queue the request.
728  *
729  * NOT WORKING YET!  The code as-is may end up not queueing an IPI at all
730  * to the target, and the scheduler does not 'poll' for IPI messages.
731  */
732 int
733 single_apic_ipi_passive(int cpu, int vector, int delivery_mode)
734 {
735 	u_long  icr_lo;
736 	u_long  icr_hi;
737 	unsigned long rflags;
738 
739 	rflags = read_rflags();
740 	cpu_disable_intr();
741 	if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) {
742 		write_rflags(rflags);
743 		return(0);
744 	}
745 	icr_hi = lapic->icr_hi & ~APIC_ID_MASK;
746 	icr_hi |= (CPUID_TO_APICID(cpu) << 24);
747 	lapic->icr_hi = icr_hi;
748 
749 	/* build IRC_LOW */
750 	icr_lo = (lapic->icr_lo & APIC_RESV2_MASK) |
751 		 APIC_DEST_DESTFLD | delivery_mode | vector;
752 
753 	/* write APIC ICR */
754 	lapic->icr_lo = icr_lo;
755 	write_rflags(rflags);
756 
757 	return(1);
758 }
759 
760 #endif
761 
762 /*
763  * Send APIC IPI 'vector' to 'target's via 'delivery_mode'.
764  *
765  * target is a bitmask of destination cpus.  Vector is any
766  * valid system INT vector.  Delivery mode may be either
767  * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO.
768  *
769  * Interrupts must be hard-disabled by caller
770  */
771 void
772 selected_apic_ipi(cpumask_t target, int vector, int delivery_mode)
773 {
774 	while (CPUMASK_TESTNZERO(target)) {
775 		int n = BSFCPUMASK(target);
776 		CPUMASK_NANDBIT(target, n);
777 		single_apic_ipi(n, vector, delivery_mode);
778 	}
779 }
780 
781 /*
782  * Load a 'downcount time' in uSeconds.
783  */
784 void
785 set_apic_timer(int us)
786 {
787 	u_int count;
788 
789 	if (lapic_use_tscdeadline) {
790 		uint64_t val;
791 
792 		val = lapic_scale_to_tsc(us, 1000000);
793 		val += rdtsc();
794 		/* No need to arm the lapic here, just track the timeout. */
795 		tsc_deadlines[mycpuid].downcount_time = val;
796 		return;
797 	}
798 
799 	/*
800 	 * When we reach here, lapic timer's frequency
801 	 * must have been calculated as well as the
802 	 * divisor (lapic->dcr_timer is setup during the
803 	 * divisor calculation).
804 	 */
805 	KKASSERT(lapic_cputimer_intr.freq != 0 &&
806 		 lapic_timer_divisor_idx >= 0);
807 
808 	count = ((us * (int64_t)lapic_cputimer_intr.freq) + 999999) / 1000000;
809 	lapic_timer_oneshot(count);
810 }
811 
812 
813 /*
814  * Read remaining time in timer, in microseconds (rounded up).
815  */
816 int
817 read_apic_timer(void)
818 {
819 	uint64_t val;
820 
821 	if (lapic_use_tscdeadline) {
822 		uint64_t now;
823 
824 		val = tsc_deadlines[mycpuid].downcount_time;
825 		now = rdtsc();
826 		if (val == 0 || now > val) {
827 			return 0;
828 		} else {
829 			val -= now;
830 			val *= 1000000;
831 			val += (tsc_frequency - 1);
832 			val /= tsc_frequency;
833 			if (val > INT_MAX)
834 				val = INT_MAX;
835 			return val;
836 		}
837 	}
838 
839 	val = lapic->ccr_timer;
840 	if (val == 0)
841 		return 0;
842 
843 	KKASSERT(lapic_cputimer_intr.freq > 0);
844 	val *= 1000000;
845 	val += (lapic_cputimer_intr.freq - 1);
846 	val /= lapic_cputimer_intr.freq;
847 	if (val > INT_MAX)
848 		val = INT_MAX;
849 	return val;
850 }
851 
852 
853 /*
854  * Spin-style delay, set delay time in uS, spin till it drains.
855  */
856 void
857 u_sleep(int count)
858 {
859 	set_apic_timer(count);
860 	while (read_apic_timer())
861 		 /* spin */ ;
862 }
863 
864 int
865 lapic_unused_apic_id(int start)
866 {
867 	int i;
868 
869 	for (i = start; i < APICID_MAX; ++i) {
870 		if (APICID_TO_CPUID(i) == -1)
871 			return i;
872 	}
873 	return NAPICID;
874 }
875 
876 void
877 lapic_map(vm_paddr_t lapic_addr)
878 {
879 	lapic = pmap_mapdev_uncacheable(lapic_addr, sizeof(struct LAPIC));
880 }
881 
882 static TAILQ_HEAD(, lapic_enumerator) lapic_enumerators =
883 	TAILQ_HEAD_INITIALIZER(lapic_enumerators);
884 
885 int
886 lapic_config(void)
887 {
888 	struct lapic_enumerator *e;
889 	int error, i, ap_max;
890 
891 	KKASSERT(lapic_enable);
892 
893 	for (i = 0; i < NAPICID; ++i)
894 		APICID_TO_CPUID(i) = -1;
895 
896 	TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) {
897 		error = e->lapic_probe(e);
898 		if (!error)
899 			break;
900 	}
901 	if (e == NULL) {
902 		kprintf("LAPIC: Can't find LAPIC\n");
903 		return ENXIO;
904 	}
905 
906 	error = e->lapic_enumerate(e);
907 	if (error) {
908 		kprintf("LAPIC: enumeration failed\n");
909 		return ENXIO;
910 	}
911 
912 	ap_max = MAXCPU - 1;
913 	TUNABLE_INT_FETCH("hw.ap_max", &ap_max);
914 	if (ap_max > MAXCPU - 1)
915 		ap_max = MAXCPU - 1;
916 
917 	if (naps > ap_max) {
918 		kprintf("LAPIC: Warning use only %d out of %d "
919 			"available APs\n",
920 			ap_max, naps);
921 		naps = ap_max;
922 	}
923 
924 	return 0;
925 }
926 
927 void
928 lapic_enumerator_register(struct lapic_enumerator *ne)
929 {
930 	struct lapic_enumerator *e;
931 
932 	TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) {
933 		if (e->lapic_prio < ne->lapic_prio) {
934 			TAILQ_INSERT_BEFORE(e, ne, lapic_link);
935 			return;
936 		}
937 	}
938 	TAILQ_INSERT_TAIL(&lapic_enumerators, ne, lapic_link);
939 }
940 
941 void
942 lapic_set_cpuid(int cpu_id, int apic_id)
943 {
944 	CPUID_TO_APICID(cpu_id) = apic_id;
945 	APICID_TO_CPUID(apic_id) = cpu_id;
946 }
947 
948 void
949 lapic_fixup_noioapic(void)
950 {
951 	u_int   temp;
952 
953 	/* Only allowed on BSP */
954 	KKASSERT(mycpuid == 0);
955 	KKASSERT(!ioapic_enable);
956 
957 	temp = lapic->lvt_lint0;
958 	temp &= ~APIC_LVT_MASKED;
959 	lapic->lvt_lint0 = temp;
960 
961 	temp = lapic->lvt_lint1;
962 	temp |= APIC_LVT_MASKED;
963 	lapic->lvt_lint1 = temp;
964 }
965 
966 static void
967 lapic_sysinit(void *dummy __unused)
968 {
969 	if (lapic_enable) {
970 		int error;
971 
972 		error = lapic_config();
973 		if (error)
974 			lapic_enable = 0;
975 	}
976 
977 	if (lapic_enable) {
978 		/* Initialize BSP's local APIC */
979 		lapic_init(TRUE);
980 	} else if (ioapic_enable) {
981 		ioapic_enable = 0;
982 		icu_reinit_noioapic();
983 	}
984 }
985 SYSINIT(lapic, SI_BOOT2_LAPIC, SI_ORDER_FIRST, lapic_sysinit, NULL);
986