xref: /openbsd/sys/arch/i386/i386/cpu.c (revision 379777c0)
1 /*	$OpenBSD: cpu.c,v 1.114 2023/10/24 13:20:10 claudio Exp $	*/
2 /* $NetBSD: cpu.c,v 1.1.2.7 2000/06/26 02:04:05 sommerfeld Exp $ */
3 
4 /*-
5  * Copyright (c) 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by RedBack Networks Inc.
10  *
11  * Author: Bill Sommerfeld
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Copyright (c) 1999 Stefan Grefen
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *      This product includes software developed by the NetBSD
49  *      Foundation, Inc. and its contributors.
50  * 4. Neither the name of The NetBSD Foundation nor the names of its
51  *    contributors may be used to endorse or promote products derived
52  *    from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
55  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  */
66 
67 #include "lapic.h"
68 #include "ioapic.h"
69 #include "pvbus.h"
70 
71 #include <sys/param.h>
72 #include <sys/timeout.h>
73 #include <sys/systm.h>
74 #include <sys/device.h>
75 #include <sys/memrange.h>
76 #include <sys/atomic.h>
77 
78 #include <uvm/uvm_extern.h>
79 
80 #include <machine/codepatch.h>
81 #include <machine/cpu_full.h>
82 #include <machine/cpu.h>
83 #include <machine/cpufunc.h>
84 #include <machine/cpuvar.h>
85 #include <machine/pmap.h>
86 #include <machine/mpbiosvar.h>
87 #include <machine/pcb.h>
88 #include <machine/specialreg.h>
89 #include <machine/segments.h>
90 #include <machine/gdt.h>
91 #include <machine/pio.h>
92 
93 #if NLAPIC > 0
94 #include <machine/i82489reg.h>
95 #include <machine/i82489var.h>
96 #endif
97 
98 #if NIOAPIC > 0
99 #include <machine/i82093reg.h>
100 #include <machine/i82093var.h>
101 #endif
102 
103 #if NPVBUS > 0
104 #include <dev/pv/pvvar.h>
105 #endif
106 
107 #include <dev/ic/mc146818reg.h>
108 #include <i386/isa/nvram.h>
109 #include <dev/isa/isareg.h>
110 
111 /* #define CPU_DEBUG */
112 
113 #ifdef CPU_DEBUG
114 #define DPRINTF(x...)	do  { printf(x); } while (0)
115 #else
116 #define DPRINTF(x...)
117 #endif	/* CPU_DEBUG */
118 
119 
120 struct cpu_softc;
121 
122 int     cpu_match(struct device *, void *, void *);
123 void    cpu_attach(struct device *, struct device *, void *);
124 int     cpu_activate(struct device *, int);
125 void	patinit(struct cpu_info *ci);
126 void	cpu_idle_mwait_cycle(void);
127 void	cpu_init_mwait(struct cpu_softc *);
128 void	cpu_init_tss(struct i386tss *, void *, void *);
129 void	cpu_update_nmi_cr3(vaddr_t);
130 
131 u_int cpu_mwait_size, cpu_mwait_states;
132 
133 #ifdef MULTIPROCESSOR
134 int mp_cpu_start(struct cpu_info *);
135 void mp_cpu_start_cleanup(struct cpu_info *);
136 struct cpu_functions mp_cpu_funcs =
137     { mp_cpu_start, NULL, mp_cpu_start_cleanup };
138 #endif
139 
140 /*
141  * Statically-allocated CPU info for the primary CPU (or the only
142  * CPU, on uniprocessors).  The CPU info list is initialized to
143  * point at it.
144  */
145 struct cpu_info_full cpu_info_full_primary = { .cif_cpu = { .ci_self = &cpu_info_primary } };
146 
147 struct cpu_info *cpu_info_list = &cpu_info_primary;
148 
149 #ifdef MULTIPROCESSOR
150 /*
151  * Array of CPU info structures.  Must be statically-allocated because
152  * curproc, etc. are used early.
153  */
154 
155 struct cpu_info *cpu_info[MAXCPUS] = { &cpu_info_primary };
156 
157 void   	cpu_hatch(void *);
158 void   	cpu_boot_secondary(struct cpu_info *);
159 void	cpu_copy_trampoline(void);
160 
161 /*
162  * Runs once per boot once multiprocessor goo has been detected and
163  * the local APIC has been mapped.
164  * Called from mpbios_scan();
165  */
166 void
167 cpu_init_first(void)
168 {
169 	cpu_copy_trampoline();
170 }
171 #endif
172 
173 struct cpu_softc {
174 	struct device sc_dev;
175 	struct cpu_info *sc_info;
176 };
177 
178 const struct cfattach cpu_ca = {
179 	sizeof(struct cpu_softc), cpu_match, cpu_attach, NULL, cpu_activate
180 };
181 
182 struct cfdriver cpu_cd = {
183 	NULL, "cpu", DV_DULL /* XXX DV_CPU */
184 };
185 
186 void	replacesmap(void);
187 
188 extern int _stac;
189 extern int _clac;
190 
191 u_int32_t mp_pdirpa;
192 
193 void
194 replacesmap(void)
195 {
196 	static int replacedone = 0;
197 	int s;
198 
199 	if (replacedone)
200 		return;
201 	replacedone = 1;
202 
203 	s = splhigh();
204 
205 	codepatch_replace(CPTAG_STAC, &_stac, 3);
206 	codepatch_replace(CPTAG_CLAC, &_clac, 3);
207 
208 	splx(s);
209 }
210 
211 int
212 cpu_match(struct device *parent, void *match, void *aux)
213 {
214   	struct cfdata *cf = match;
215 	struct cpu_attach_args *caa = aux;
216 
217 	if (strcmp(caa->caa_name, cf->cf_driver->cd_name) != 0)
218 		return 0;
219 
220 	if (cf->cf_unit >= MAXCPUS)
221 		return 0;
222 
223 	return 1;
224 }
225 
226 void
227 cpu_attach(struct device *parent, struct device *self, void *aux)
228 {
229 	struct cpu_softc *sc = (void *)self;
230 	struct cpu_attach_args *caa = (struct cpu_attach_args *)aux;
231 	struct cpu_info *ci;
232 
233 #ifdef MULTIPROCESSOR
234 	int cpunum = sc->sc_dev.dv_unit;
235 	vaddr_t kstack;
236 	struct pcb *pcb;
237 #endif
238 
239 	if (caa->cpu_role == CPU_ROLE_AP) {
240 		struct cpu_info_full *cif;
241 
242 		cif = km_alloc(sizeof *cif, &kv_any, &kp_zero, &kd_waitok);
243 		ci = &cif->cif_cpu;
244 #ifdef MULTIPROCESSOR
245 		ci->ci_tss = &cif->cif_tss;
246 		ci->ci_nmi_tss = &cif->cif_nmi_tss;
247 		ci->ci_gdt = (void *)&cif->cif_gdt;
248 		cpu_enter_pages(cif);
249 		if (cpu_info[cpunum] != NULL)
250 			panic("cpu at apic id %d already attached?", cpunum);
251 		cpu_info[cpunum] = ci;
252 #endif
253 	} else {
254 		ci = &cpu_info_primary;
255 #ifdef MULTIPROCESSOR
256 		if (caa->cpu_apicid != lapic_cpu_number()) {
257 			panic("%s: running cpu is at apic %d"
258 			    " instead of at expected %d",
259 			    sc->sc_dev.dv_xname, lapic_cpu_number(), caa->cpu_apicid);
260 		}
261 #endif
262 	}
263 
264 	ci->ci_self = ci;
265 	sc->sc_info = ci;
266 	ci->ci_dev = self;
267 	ci->ci_apicid = caa->cpu_apicid;
268 	ci->ci_acpi_proc_id = caa->cpu_acpi_proc_id;
269 #ifdef MULTIPROCESSOR
270 	ci->ci_cpuid = cpunum;
271 #else
272 	ci->ci_cpuid = 0;	/* False for APs, so what, they're not used */
273 #endif
274 	ci->ci_signature = caa->cpu_signature;
275 	ci->ci_feature_flags = caa->feature_flags;
276 	ci->ci_func = caa->cpu_func;
277 
278 #ifdef MULTIPROCESSOR
279 	/*
280 	 * Allocate UPAGES contiguous pages for the idle PCB and stack.
281 	 */
282 
283 	kstack = (vaddr_t)km_alloc(USPACE, &kv_any, &kp_dirty, &kd_nowait);
284 	if (kstack == 0) {
285 		if (cpunum == 0) { /* XXX */
286 			panic("cpu_attach: unable to allocate idle stack for"
287 			    " primary");
288 		}
289 		printf("%s: unable to allocate idle stack\n",
290 		    sc->sc_dev.dv_xname);
291 		return;
292 	}
293 	pcb = ci->ci_idle_pcb = (struct pcb *)kstack;
294 	memset(pcb, 0, USPACE);
295 
296 	pcb->pcb_kstack = kstack + USPACE - 16 - sizeof (struct trapframe);
297 	pcb->pcb_esp = pcb->pcb_ebp = pcb->pcb_kstack;
298 	pcb->pcb_pmap = pmap_kernel();
299 	pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa;
300 #endif
301 	ci->ci_curpmap = pmap_kernel();
302 
303 	/* further PCB init done later. */
304 
305 	printf(": ");
306 
307 	switch (caa->cpu_role) {
308 	case CPU_ROLE_SP:
309 		printf("(uniprocessor)\n");
310 		ci->ci_flags |= CPUF_PRESENT | CPUF_SP | CPUF_PRIMARY;
311 #ifndef SMALL_KERNEL
312 		cpu_ucode_apply(ci);
313 #endif
314 		cpu_tsx_disable(ci);
315 		identifycpu(ci);
316 #ifdef MTRR
317 		mem_range_attach();
318 #endif
319 		cpu_init(ci);
320 		cpu_init_mwait(sc);
321 		break;
322 
323 	case CPU_ROLE_BP:
324 		printf("apid %d (boot processor)\n", caa->cpu_apicid);
325 		ci->ci_flags |= CPUF_PRESENT | CPUF_BSP | CPUF_PRIMARY;
326 #ifndef SMALL_KERNEL
327 		cpu_ucode_apply(ci);
328 #endif
329 		cpu_tsx_disable(ci);
330 		identifycpu(ci);
331 #ifdef MTRR
332 		mem_range_attach();
333 #endif
334 		cpu_init(ci);
335 
336 #if NLAPIC > 0
337 		/*
338 		 * Enable local apic
339 		 */
340 		lapic_enable();
341 		lapic_calibrate_timer(ci);
342 #endif
343 #if NIOAPIC > 0
344 		ioapic_bsp_id = caa->cpu_apicid;
345 #endif
346 		cpu_init_mwait(sc);
347 		break;
348 
349 	case CPU_ROLE_AP:
350 		/*
351 		 * report on an AP
352 		 */
353 		printf("apid %d (application processor)\n", caa->cpu_apicid);
354 
355 #ifdef MULTIPROCESSOR
356 		gdt_alloc_cpu(ci);
357 		ci->ci_flags |= CPUF_PRESENT | CPUF_AP;
358 #ifndef SMALL_KERNEL
359 		cpu_ucode_apply(ci);
360 #endif
361 		cpu_tsx_disable(ci);
362 		identifycpu(ci);
363 		clockqueue_init(&ci->ci_queue);
364 		sched_init_cpu(ci);
365 		ci->ci_next = cpu_info_list->ci_next;
366 		cpu_info_list->ci_next = ci;
367 		ncpus++;
368 #endif
369 		break;
370 
371 	default:
372 		panic("unknown processor type??");
373 	}
374 
375 #ifdef MULTIPROCESSOR
376 	if (mp_verbose) {
377 		printf("%s: kstack at 0x%lx for %d bytes\n",
378 		    ci->ci_dev->dv_xname, kstack, USPACE);
379 		printf("%s: idle pcb at %p, idle sp at 0x%x\n",
380 		    ci->ci_dev->dv_xname, pcb, pcb->pcb_esp);
381 	}
382 #endif
383 
384 }
385 
386 /*
387  * Initialize the processor appropriately.
388  */
389 
390 void
391 cpu_init(struct cpu_info *ci)
392 {
393 	u_int cr4 = 0;
394 
395 	/* configure the CPU if needed */
396 	if (ci->cpu_setup != NULL)
397 		(*ci->cpu_setup)(ci);
398 
399 	/*
400 	 * We do this here after identifycpu() because errata may affect
401 	 * what we do.
402 	 */
403 	patinit(ci);
404 
405 	/*
406 	 * Enable ring 0 write protection.
407 	 */
408 	lcr0(rcr0() | CR0_WP);
409 
410 	if (cpu_feature & CPUID_PGE)
411 		cr4 |= CR4_PGE;	/* enable global TLB caching */
412 
413 	if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMEP)
414 		cr4 |= CR4_SMEP;
415 	if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP)
416 		cr4 |= CR4_SMAP;
417 	if (ci->ci_feature_sefflags_ecx & SEFF0ECX_UMIP)
418 		cr4 |= CR4_UMIP;
419 
420 	/*
421 	 * If we have FXSAVE/FXRESTOR, use them.
422 	 */
423 	if (cpu_feature & CPUID_FXSR) {
424 		cr4 |= CR4_OSFXSR;
425 
426 		/*
427 		 * If we have SSE/SSE2, enable XMM exceptions.
428 		 */
429 		if (cpu_feature & (CPUID_SSE|CPUID_SSE2))
430 			cr4 |= CR4_OSXMMEXCPT;
431 	}
432 	/* no cr4 on most 486s */
433 	if (cr4 != 0)
434 		lcr4(rcr4()|cr4);
435 
436 #ifdef MULTIPROCESSOR
437 	ci->ci_flags |= CPUF_RUNNING;
438 	/*
439 	 * Big hammer: flush all TLB entries, including ones from PTEs
440 	 * with the G bit set.  This should only be necessary if TLB
441 	 * shootdown falls far behind.
442 	 *
443 	 * Intel Architecture Software Developer's Manual, Volume 3,
444 	 *	System Programming, section 9.10, "Invalidating the
445 	 * Translation Lookaside Buffers (TLBS)":
446 	 * "The following operations invalidate all TLB entries, irrespective
447 	 * of the setting of the G flag:
448 	 * ...
449 	 * "(P6 family processors only): Writing to control register CR4 to
450 	 * modify the PSE, PGE, or PAE flag."
451 	 *
452 	 * (the alternatives not quoted above are not an option here.)
453 	 *
454 	 * If PGE is not in use, we reload CR3 for the benefit of
455 	 * pre-P6-family processors.
456 	 */
457 
458 	if (cpu_feature & CPUID_PGE) {
459 		cr4 = rcr4();
460 		lcr4(cr4 & ~CR4_PGE);
461 		lcr4(cr4);
462 	} else
463 		tlbflush();
464 #endif
465 }
466 
467 void
468 cpu_tsx_disable(struct cpu_info *ci)
469 {
470 	uint64_t msr;
471 	uint32_t dummy, sefflags_edx;
472 
473 	/* this runs before identifycpu() populates ci_feature_sefflags_edx */
474 	if (cpuid_level < 0x07)
475 		return;
476 	CPUID_LEAF(0x7, 0, dummy, dummy, dummy, sefflags_edx);
477 
478 	if (strcmp(cpu_vendor, "GenuineIntel") == 0 &&
479 	    (sefflags_edx & SEFF0EDX_ARCH_CAP)) {
480 		msr = rdmsr(MSR_ARCH_CAPABILITIES);
481 		if (msr & ARCH_CAP_TSX_CTRL) {
482 			msr = rdmsr(MSR_TSX_CTRL);
483 			msr |= TSX_CTRL_RTM_DISABLE | TSX_CTRL_TSX_CPUID_CLEAR;
484 			wrmsr(MSR_TSX_CTRL, msr);
485 		}
486 	}
487 }
488 
489 void
490 patinit(struct cpu_info *ci)
491 {
492 	extern int	pmap_pg_wc;
493 	u_int64_t	reg;
494 
495 	if ((ci->ci_feature_flags & CPUID_PAT) == 0)
496 		return;
497 
498 	/*
499 	 * Set up PAT bits.
500 	 * The default pat table is the following:
501 	 * WB, WT, UC- UC, WB, WT, UC-, UC
502 	 * We change it to:
503 	 * WB, WC, UC-, UC, WB, WC, UC-, UC.
504 	 * i.e change the WT bit to be WC.
505 	 */
506 	reg = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) |
507 	    PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) |
508 	    PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) |
509 	    PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC);
510 
511 	wrmsr(MSR_CR_PAT, reg);
512 	pmap_pg_wc = PG_WC;
513 }
514 
515 struct timeout rdrand_tmo;
516 void rdrand(void *);
517 
518 void
519 rdrand(void *v)
520 {
521 	struct timeout *tmo = v;
522 	extern int      has_rdrand;
523 	extern int      has_rdseed;
524 	uint32_t r;
525 	uint64_t tsc = 0;
526 	uint8_t valid = 0;
527 	int i;
528 
529 	if (has_rdrand == 0 && has_rdseed == 0)
530 		return;
531 
532 	for (i = 0; i < 4; i++) {
533 		if (cpu_feature & CPUID_TSC)
534 			tsc = rdtsc();
535 		if (has_rdseed)
536 			__asm volatile(
537 			    "rdseed	%0\n\t"
538 			    "setc	%1\n"
539 			    : "=r" (r), "=qm" (valid) );
540 		if (has_rdseed == 0 || valid == 0)
541 			__asm volatile(
542 			    "rdrand	%0\n\t"
543 			    "setc	%1\n"
544 			    : "=r" (r), "=qm" (valid) );
545 		r ^= tsc;
546 		r ^= valid;		/* potential rdrand empty */
547 		if (has_rdrand)
548 			if (cpu_feature & CPUID_TSC)
549 				r += rdtsc();	/* potential vmexit latency */
550 		enqueue_randomness(r);
551 	}
552 
553 	if (tmo)
554 		timeout_add_msec(tmo, 10);
555 }
556 
557 int
558 cpu_activate(struct device *self, int act)
559 {
560 	struct cpu_softc *sc = (struct cpu_softc *)self;
561 
562 	switch (act) {
563 	case DVACT_RESUME:
564 		if (sc->sc_info->ci_cpuid == 0)
565 			rdrand(NULL);
566 		break;
567 	}
568 
569 	return (0);
570 }
571 
572 void
573 cpu_enter_pages(struct cpu_info_full *cif)
574 {
575 	vaddr_t	va;
576 	paddr_t pa;
577 	extern void Xnmi(void);
578 
579 	/* The TSS + GDT need to be readable */
580 	va = (vaddr_t)&cif->cif_tss;
581 	pmap_extract(pmap_kernel(), va, &pa);
582 	pmap_enter_special(va, pa, PROT_READ, 0);
583 	DPRINTF("%s: entered tss+gdt page at va 0x%08x pa 0x%08x\n", __func__,
584 	    (uint32_t)va, (uint32_t)pa);
585 
586 	/* The trampoline stack page needs to be read/write */
587 	va = (vaddr_t)&cif->cif_tramp_stack;
588 	pmap_extract(pmap_kernel(), va, &pa);
589 	pmap_enter_special(va, pa, PROT_READ | PROT_WRITE, 0);
590 	DPRINTF("%s: entered t.stack page at va 0x%08x pa 0x%08x\n", __func__,
591 	    (uint32_t)va, (uint32_t)pa);
592 
593 	/* Setup trampoline stack in TSS */
594 	cif->cif_tss.tss_esp0 = va + sizeof(cif->cif_tramp_stack) - 16;
595 	cif->cif_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
596 	DPRINTF("%s: cif_tss.tss_esp0 = 0x%08x\n", __func__,
597 	    (uint32_t)cif->cif_tss.tss_esp0);
598 	cif->cif_cpu.ci_intr_esp = cif->cif_tss.tss_esp0 -
599 	    sizeof(struct trampframe);
600 
601 	/* Setup NMI stack in NMI TSS */
602 	va = (vaddr_t)&cif->cif_nmi_stack + sizeof(cif->cif_nmi_stack);
603 	cpu_init_tss(&cif->cif_nmi_tss, (void *)va, Xnmi);
604 	DPRINTF("%s: cif_nmi_tss.tss_esp0 = 0x%08x\n", __func__,
605 	    (uint32_t)cif->cif_nmi_tss.tss_esp0);
606 
607 	/* empty iomap */
608 	cif->cif_tss.tss_ioopt = sizeof(cif->cif_tss) << 16;
609 	cif->cif_nmi_tss.tss_ioopt = sizeof(cif->cif_nmi_tss) << 16;
610 }
611 
612 #ifdef MULTIPROCESSOR
613 void
614 cpu_boot_secondary_processors(void)
615 {
616 	struct cpu_info *ci;
617 	u_long i;
618 
619 	for (i = 0; i < MAXCPUS; i++) {
620 		ci = cpu_info[i];
621 		if (ci == NULL)
622 			continue;
623 		if (ci->ci_idle_pcb == NULL)
624 			continue;
625 		if ((ci->ci_flags & CPUF_PRESENT) == 0)
626 			continue;
627 		if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY))
628 			continue;
629 		ci->ci_randseed = (arc4random() & 0x7fffffff) + 1;
630 		cpu_boot_secondary(ci);
631 	}
632 }
633 
634 void
635 cpu_init_idle_pcbs(void)
636 {
637 	struct cpu_info *ci;
638 	u_long i;
639 
640 	for (i=0; i < MAXCPUS; i++) {
641 		ci = cpu_info[i];
642 		if (ci == NULL)
643 			continue;
644 		if (ci->ci_idle_pcb == NULL)
645 			continue;
646 		if ((ci->ci_flags & CPUF_PRESENT) == 0)
647 			continue;
648 		i386_init_pcb_tss(ci);
649 	}
650 }
651 
652 void
653 cpu_boot_secondary(struct cpu_info *ci)
654 {
655 	struct pcb *pcb;
656 	int i;
657 	struct pmap *kpm = pmap_kernel();
658 
659 	if (mp_verbose)
660 		printf("%s: starting", ci->ci_dev->dv_xname);
661 
662 	/* XXX move elsewhere, not per CPU. */
663 	mp_pdirpa = kpm->pm_pdirpa;
664 
665 	pcb = ci->ci_idle_pcb;
666 
667 	if (mp_verbose)
668 		printf(", init idle stack ptr is 0x%x\n", pcb->pcb_esp);
669 
670 	CPU_STARTUP(ci);
671 
672 	/*
673 	 * wait for it to become ready
674 	 */
675 	for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) {
676 		delay(10);
677 	}
678 	if (!(ci->ci_flags & CPUF_RUNNING)) {
679 		printf("%s failed to become ready\n", ci->ci_dev->dv_xname);
680 #ifdef DDB
681 		db_enter();
682 #endif
683 	}
684 
685 	CPU_START_CLEANUP(ci);
686 }
687 
688 /*
689  * The CPU ends up here when it's ready to run
690  * XXX should share some of this with init386 in machdep.c
691  * for now it jumps into an infinite loop.
692  */
693 void
694 cpu_hatch(void *v)
695 {
696 	struct cpu_info *ci = (struct cpu_info *)v;
697 	int s;
698 
699 	cpu_init_idt();
700 	lapic_enable();
701 	lapic_set_lvt();
702 	gdt_init_cpu(ci);
703 
704 	lldt(0);
705 
706 	npxinit(ci);
707 
708 	ci->ci_curpmap = pmap_kernel();
709 	cpu_init(ci);
710 #if NPVBUS > 0
711 	pvbus_init_cpu();
712 #endif
713 
714 	/* Re-initialise memory range handling on AP */
715 	if (mem_range_softc.mr_op != NULL)
716 		mem_range_softc.mr_op->initAP(&mem_range_softc);
717 
718 	s = splhigh();		/* XXX prevent softints from running here.. */
719 	lapic_tpr = 0;
720 	intr_enable();
721 	if (mp_verbose)
722 		printf("%s: CPU at apid %ld running\n",
723 		    ci->ci_dev->dv_xname, ci->ci_cpuid);
724 	splx(s);
725 
726 	lapic_startclock();
727 
728 	sched_toidle();
729 }
730 
731 void
732 cpu_copy_trampoline(void)
733 {
734 	/*
735 	 * Copy boot code.
736 	 */
737 	extern u_char cpu_spinup_trampoline[];
738 	extern u_char cpu_spinup_trampoline_end[];
739 	extern u_char mp_tramp_data_start[];
740 	extern u_char mp_tramp_data_end[];
741 
742 	memcpy((caddr_t)MP_TRAMPOLINE, cpu_spinup_trampoline,
743 	    cpu_spinup_trampoline_end - cpu_spinup_trampoline);
744 	memcpy((caddr_t)MP_TRAMP_DATA, mp_tramp_data_start,
745 	    mp_tramp_data_end - mp_tramp_data_start);
746 
747 	pmap_write_protect(pmap_kernel(), (vaddr_t)MP_TRAMPOLINE,
748 	    (vaddr_t)(MP_TRAMPOLINE + NBPG), PROT_READ | PROT_EXEC);
749 }
750 
751 #endif
752 
753 #ifdef MULTIPROCESSOR
754 int
755 mp_cpu_start(struct cpu_info *ci)
756 {
757 	unsigned short dwordptr[2];
758 
759 	/*
760 	 * "The BSP must initialize CMOS shutdown code to 0Ah ..."
761 	 */
762 
763 	outb(IO_RTC, NVRAM_RESET);
764 	outb(IO_RTC+1, NVRAM_RESET_JUMP);
765 
766 	/*
767 	 * "and the warm reset vector (DWORD based at 40:67) to point
768 	 * to the AP startup code ..."
769 	 */
770 
771 	dwordptr[0] = 0;
772 	dwordptr[1] = MP_TRAMPOLINE >> 4;
773 
774 	pmap_activate(curproc);
775 
776 	pmap_kenter_pa(0, 0, PROT_READ | PROT_WRITE);
777 	memcpy((u_int8_t *)0x467, dwordptr, 4);
778 	pmap_kremove(0, PAGE_SIZE);
779 
780 #if NLAPIC > 0
781 	/*
782 	 * ... prior to executing the following sequence:"
783 	 */
784 
785 	if (ci->ci_flags & CPUF_AP) {
786 		i386_ipi_init(ci->ci_apicid);
787 
788 		delay(10000);
789 
790 		if (cpu_feature & CPUID_APIC) {
791 			i386_ipi(MP_TRAMPOLINE / PAGE_SIZE, ci->ci_apicid,
792 			    LAPIC_DLMODE_STARTUP);
793 			delay(200);
794 
795 			i386_ipi(MP_TRAMPOLINE / PAGE_SIZE, ci->ci_apicid,
796 			    LAPIC_DLMODE_STARTUP);
797 			delay(200);
798 		}
799 	}
800 #endif
801 	return (0);
802 }
803 
804 void
805 mp_cpu_start_cleanup(struct cpu_info *ci)
806 {
807 	/*
808 	 * Ensure the NVRAM reset byte contains something vaguely sane.
809 	 */
810 
811 	outb(IO_RTC, NVRAM_RESET);
812 	outb(IO_RTC+1, NVRAM_RESET_RST);
813 }
814 
815 #endif /* MULTIPROCESSOR */
816 
817 void
818 cpu_idle_mwait_cycle(void)
819 {
820 	struct cpu_info *ci = curcpu();
821 
822 	if ((read_eflags() & PSL_I) == 0)
823 		panic("idle with interrupts blocked!");
824 
825 	/* something already queued? */
826 	if (!cpu_is_idle(ci))
827 		return;
828 
829 	/*
830 	 * About to idle; setting the MWAIT_IN_IDLE bit tells
831 	 * cpu_unidle() that it can't be a no-op and tells cpu_kick()
832 	 * that it doesn't need to use an IPI.  We also set the
833 	 * MWAIT_KEEP_IDLING bit: those routines clear it to stop
834 	 * the mwait.  Once they're set, we do a final check of the
835 	 * queue, in case another cpu called setrunqueue() and added
836 	 * something to the queue and called cpu_unidle() between
837 	 * the check in sched_idle() and here.
838 	 */
839 	atomic_setbits_int(&ci->ci_mwait, MWAIT_IDLING | MWAIT_ONLY);
840 	if (cpu_is_idle(ci)) {
841 		monitor(&ci->ci_mwait, 0, 0);
842 		if ((ci->ci_mwait & MWAIT_IDLING) == MWAIT_IDLING)
843 			mwait(0, 0);
844 	}
845 
846 	/* done idling; let cpu_kick() know that an IPI is required */
847 	atomic_clearbits_int(&ci->ci_mwait, MWAIT_IDLING);
848 }
849 
850 void
851 cpu_init_mwait(struct cpu_softc *sc)
852 {
853 	unsigned int smallest, largest, extensions, c_substates;
854 
855 	if ((cpu_ecxfeature & CPUIDECX_MWAIT) == 0 || cpuid_level < 0x5)
856 		return;
857 
858 	/* get the monitor granularity */
859 	CPUID(0x5, smallest, largest, extensions, cpu_mwait_states);
860 	smallest &= 0xffff;
861 	largest  &= 0xffff;
862 
863 	printf("%s: mwait min=%u, max=%u", sc->sc_dev.dv_xname,
864 	    smallest, largest);
865 	if (extensions & 0x1) {
866 		if (cpu_mwait_states > 0) {
867 			c_substates = cpu_mwait_states;
868 			printf(", C-substates=%u", 0xf & c_substates);
869 			while ((c_substates >>= 4) > 0)
870 				printf(".%u", 0xf & c_substates);
871 		}
872 		if (extensions & 0x2)
873 			printf(", IBE");
874 	} else {
875 		/* substates not supported, forge the default: just C1 */
876 		cpu_mwait_states = 1 << 4;
877 	}
878 
879 	/* paranoia: check the values */
880 	if (smallest < sizeof(int) || largest < smallest ||
881 	    (largest & (sizeof(int)-1)))
882 		printf(" (bogus)");
883 	else
884 		cpu_mwait_size = largest;
885 	printf("\n");
886 
887 	/* enable use of mwait; may be overridden by acpicpu later */
888 	if (cpu_mwait_size > 0)
889 		cpu_idle_cycle_fcn = &cpu_idle_mwait_cycle;
890 }
891 
892 void
893 cpu_init_tss(struct i386tss *tss, void *stack, void *func)
894 {
895 	memset(tss, 0, sizeof *tss);
896 	tss->tss_esp0 = tss->tss_esp = (int)((char *)stack - 16);
897 	tss->tss_ss0 = tss->tss_ss = GSEL(GDATA_SEL, SEL_KPL);
898 	tss->tss_cs = GSEL(GCODE_SEL, SEL_KPL);
899 	tss->tss_ds = tss->tss_es = tss->tss_ss = GSEL(GDATA_SEL, SEL_KPL);
900 	tss->tss_fs = GSEL(GCPU_SEL, SEL_KPL);
901 	tss->tss_gs = GSEL(GNULL_SEL, SEL_KPL);
902 	tss->tss_ldt = GSEL(GNULL_SEL, SEL_KPL);
903 	tss->tss_cr3 = pmap_kernel()->pm_pdirpa;
904 	/* PSL_I not set -> no IRQs after task switch */
905 	tss->tss_eflags = PSL_MBO;
906 	tss->tss_eip = (int)func;
907 }
908 
909 void
910 cpu_update_nmi_cr3(vaddr_t cr3)
911 {
912 	CPU_INFO_ITERATOR cii;
913 	struct cpu_info *ci;
914 
915 	CPU_INFO_FOREACH(cii, ci)
916 		ci->ci_nmi_tss->tss_cr3 = cr3;
917 }
918 
919 #ifdef MULTIPROCESSOR
920 int
921 wbinvd_on_all_cpus(void)
922 {
923 	i386_broadcast_ipi(I386_IPI_WBINVD);
924 	wbinvd();
925 	return 0;
926 }
927 #endif
928