xref: /openbsd/sys/arch/i386/i386/machdep.c (revision fc30b644)
1 /*	$OpenBSD: machdep.c,v 1.671 2024/05/26 13:37:32 kettenis Exp $	*/
2 /*	$NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $	*/
3 
4 /*-
5  * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*-
35  * Copyright (c) 1993, 1994, 1995, 1996 Charles M. Hannum.  All rights reserved.
36  * Copyright (c) 1992 Terrence R. Lambert.
37  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
38  * All rights reserved.
39  *
40  * This code is derived from software contributed to Berkeley by
41  * William Jolitz.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. Neither the name of the University nor the names of its contributors
52  *    may be used to endorse or promote products derived from this software
53  *    without specific prior written permission.
54  *
55  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65  * SUCH DAMAGE.
66  *
67  *	@(#)machdep.c	7.4 (Berkeley) 6/3/91
68  */
69 
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/signalvar.h>
73 #include <sys/proc.h>
74 #include <sys/user.h>
75 #include <sys/exec.h>
76 #include <sys/buf.h>
77 #include <sys/reboot.h>
78 #include <sys/conf.h>
79 #include <sys/malloc.h>
80 #include <sys/msgbuf.h>
81 #include <sys/mount.h>
82 #include <sys/device.h>
83 #include <sys/extent.h>
84 #include <sys/sysctl.h>
85 #include <sys/syscallargs.h>
86 #include <sys/core.h>
87 #include <sys/kcore.h>
88 #include <sys/sensors.h>
89 
90 #include <dev/cons.h>
91 #include <stand/boot/bootarg.h>
92 
93 #include <net/if.h>
94 #include <uvm/uvm_extern.h>
95 
96 #include <machine/bus.h>
97 
98 #include <machine/cpu.h>
99 #include <machine/cpu_full.h>
100 #include <machine/cpufunc.h>
101 #include <machine/cpuvar.h>
102 #include <machine/kcore.h>
103 #include <machine/pio.h>
104 #include <machine/psl.h>
105 #include <machine/specialreg.h>
106 #include <machine/biosvar.h>
107 #include <machine/pte.h>
108 #ifdef MULTIPROCESSOR
109 #include <machine/mpbiosvar.h>
110 #endif /* MULTIPROCESSOR */
111 
112 #include <dev/isa/isareg.h>
113 #include <dev/ic/i8042reg.h>
114 #include <i386/isa/isa_machdep.h>
115 
116 #include "acpi.h"
117 #if NACPI > 0
118 #include <dev/acpi/acpivar.h>
119 #endif
120 
121 #include "apm.h"
122 #if NAPM > 0
123 #include <machine/apmvar.h>
124 #endif
125 
126 #ifdef DDB
127 #include <machine/db_machdep.h>
128 #include <ddb/db_extern.h>
129 #endif
130 
131 #include "isa.h"
132 #include "isadma.h"
133 #include "npx.h"
134 #if NNPX > 0
135 extern struct proc *npxproc;
136 #endif
137 
138 #include "bios.h"
139 
140 #ifdef HIBERNATE
141 #include <machine/hibernate_var.h>
142 #endif /* HIBERNATE */
143 
144 #include "ukbd.h"
145 #include "pckbc.h"
146 #if NPCKBC > 0 && NUKBD > 0
147 #include <dev/ic/pckbcvar.h>
148 #endif
149 
150 /* #define MACHDEP_DEBUG */
151 
152 #ifdef MACHDEP_DEBUG
153 #define DPRINTF(x...)	do  { printf(x); } while (0)
154 #else
155 #define DPRINTF(x...)
156 #endif	/* MACHDEP_DEBUG */
157 
158 void	replacesmap(void);
159 int     intr_handler(struct intrframe *, struct intrhand *);
160 
161 /* the following is used externally (sysctl_hw) */
162 char machine[] = MACHINE;
163 
164 /*
165  * switchto vectors
166  */
167 void (*cpu_idle_leave_fcn)(void) = NULL;
168 void (*cpu_idle_cycle_fcn)(void) = NULL;
169 void (*cpu_idle_enter_fcn)(void) = NULL;
170 
171 
172 struct uvm_constraint_range  isa_constraint = { 0x0, 0x00ffffffUL };
173 struct uvm_constraint_range  dma_constraint = { 0x0, 0xffffffffUL };
174 struct uvm_constraint_range *uvm_md_constraints[] = {
175 	&isa_constraint,
176 	&dma_constraint,
177 	NULL
178 };
179 
180 extern int	boothowto;
181 int	physmem;
182 
183 struct dumpmem dumpmem[VM_PHYSSEG_MAX];
184 u_int ndumpmem;
185 
186 /*
187  * These variables are needed by /sbin/savecore
188  */
189 u_long	dumpmag = 0x8fca0101;	/* magic number */
190 int	dumpsize = 0;		/* pages */
191 long	dumplo = 0;		/* blocks */
192 
193 int	cpu_class;
194 
195 int	i386_use_fxsave;
196 int	i386_has_sse;
197 int	i386_has_sse2;
198 int	i386_has_xcrypt;
199 
200 bootarg_t *bootargp;
201 paddr_t avail_end;
202 
203 struct vm_map *exec_map = NULL;
204 struct vm_map *phys_map = NULL;
205 
206 #if !defined(SMALL_KERNEL)
207 int p3_early;
208 void (*update_cpuspeed)(void) = NULL;
209 void	via_update_sensor(void *args);
210 #endif
211 int kbd_reset;
212 int lid_action = 1;
213 int pwr_action = 1;
214 int forceukbd;
215 
216 /*
217  * safepri is a safe priority for sleep to set for a spin-wait
218  * during autoconfiguration or after a panic.
219  */
220 int	safepri = 0;
221 
222 #if !defined(SMALL_KERNEL)
223 int bus_clock;
224 #endif
225 void (*setperf_setup)(struct cpu_info *);
226 int setperf_prio = 0;		/* for concurrent handlers */
227 
228 void (*cpusensors_setup)(struct cpu_info *);
229 
230 void (*delay_func)(int) = i8254_delay;
231 void (*initclock_func)(void) = i8254_initclocks;
232 void (*startclock_func)(void) = i8254_start_both_clocks;
233 
234 /*
235  * Extent maps to manage I/O and ISA memory hole space.  Allocate
236  * storage for 16 regions in each, initially.  Later, ioport_malloc_safe
237  * will indicate that it's safe to use malloc() to dynamically allocate
238  * region descriptors.
239  *
240  * N.B. At least two regions are _always_ allocated from the iomem
241  * extent map; (0 -> ISA hole) and (end of ISA hole -> end of RAM).
242  *
243  * The extent maps are not static!  Machine-dependent ISA and EISA
244  * routines need access to them for bus address space allocation.
245  */
246 static	long ioport_ex_storage[EXTENT_FIXED_STORAGE_SIZE(16) / sizeof(long)];
247 static	long iomem_ex_storage[EXTENT_FIXED_STORAGE_SIZE(16) / sizeof(long)];
248 struct	extent *ioport_ex;
249 struct	extent *iomem_ex;
250 static	int ioport_malloc_safe;
251 
252 void	dumpsys(void);
253 int	cpu_dump(void);
254 void	init386(paddr_t);
255 void	consinit(void);
256 void	(*cpuresetfn)(void);
257 
258 int	bus_mem_add_mapping(bus_addr_t, bus_size_t,
259 	    int, bus_space_handle_t *);
260 
261 #ifdef APERTURE
262 int allowaperture = 0;
263 #endif
264 
265 int has_rdrand;
266 int has_rdseed;
267 
268 void	winchip_cpu_setup(struct cpu_info *);
269 void	amd_family5_setperf_setup(struct cpu_info *);
270 void	amd_family5_setup(struct cpu_info *);
271 void	amd_family6_setperf_setup(struct cpu_info *);
272 void	amd_family6_setup(struct cpu_info *);
273 void	cyrix3_setperf_setup(struct cpu_info *);
274 void	cyrix3_cpu_setup(struct cpu_info *);
275 void	cyrix6x86_cpu_setup(struct cpu_info *);
276 void	natsem6x86_cpu_setup(struct cpu_info *);
277 void	intel586_cpu_setup(struct cpu_info *);
278 void	intel686_cpusensors_setup(struct cpu_info *);
279 void	intel686_setperf_setup(struct cpu_info *);
280 void	intel686_common_cpu_setup(struct cpu_info *);
281 void	intel686_cpu_setup(struct cpu_info *);
282 void	intel686_p4_cpu_setup(struct cpu_info *);
283 void	intelcore_update_sensor(void *);
284 void	tm86_cpu_setup(struct cpu_info *);
285 char *	intel686_cpu_name(int);
286 char *	cyrix3_cpu_name(int, int);
287 char *	tm86_cpu_name(int);
288 void	cyrix3_get_bus_clock(struct cpu_info *);
289 void	p4_get_bus_clock(struct cpu_info *);
290 void	p3_get_bus_clock(struct cpu_info *);
291 void	p4_update_cpuspeed(void);
292 void	p3_update_cpuspeed(void);
293 int	pentium_cpuspeed(int *);
294 void	enter_shared_special_pages(void);
295 
296 static __inline u_char
297 cyrix_read_reg(u_char reg)
298 {
299 	outb(0x22, reg);
300 	return inb(0x23);
301 }
302 
303 static __inline void
304 cyrix_write_reg(u_char reg, u_char data)
305 {
306 	outb(0x22, reg);
307 	outb(0x23, data);
308 }
309 
310 /*
311  * cpuid instruction.  request in eax, result in eax, ebx, ecx, edx.
312  * requires caller to provide u_int32_t regs[4] array.
313  */
314 void
315 cpuid(u_int32_t ax, u_int32_t *regs)
316 {
317 	__asm volatile(
318 	    "cpuid\n\t"
319 	    "movl	%%eax, 0(%2)\n\t"
320 	    "movl	%%ebx, 4(%2)\n\t"
321 	    "movl	%%ecx, 8(%2)\n\t"
322 	    "movl	%%edx, 12(%2)\n\t"
323 	    :"=a" (ax)
324 	    :"0" (ax), "S" (regs)
325 	    :"bx", "cx", "dx");
326 }
327 
328 /*
329  * Machine-dependent startup code
330  */
331 void
332 cpu_startup(void)
333 {
334 	unsigned i;
335 	vaddr_t minaddr, maxaddr, va;
336 	paddr_t pa;
337 
338 	/*
339 	 * Initialize error message buffer (at end of core).
340 	 * (space reserved in pmap_bootstrap)
341 	 */
342 	pa = avail_end;
343 	va = (vaddr_t)msgbufp;
344 	for (i = 0; i < atop(MSGBUFSIZE); i++) {
345 		pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
346 		va += PAGE_SIZE;
347 		pa += PAGE_SIZE;
348 	}
349 	pmap_update(pmap_kernel());
350 	initmsgbuf((caddr_t)msgbufp, round_page(MSGBUFSIZE));
351 
352 	printf("%s", version);
353 	startclocks();
354 	rtcinit();
355 
356 	printf("real mem  = %llu (%lluMB)\n",
357 	    (unsigned long long)ptoa((psize_t)physmem),
358 	    (unsigned long long)ptoa((psize_t)physmem)/1024U/1024U);
359 
360 	/*
361 	 * Allocate a submap for exec arguments.  This map effectively
362 	 * limits the number of processes exec'ing at any time.
363 	 */
364 	minaddr = vm_map_min(kernel_map);
365 	exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
366 				   16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL);
367 
368 	/*
369 	 * Allocate a submap for physio
370 	 */
371 	phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
372 				   VM_PHYS_SIZE, 0, FALSE, NULL);
373 
374 	printf("avail mem = %llu (%lluMB)\n",
375 	    (unsigned long long)ptoa((psize_t)uvmexp.free),
376 	    (unsigned long long)ptoa((psize_t)uvmexp.free)/1024U/1024U);
377 
378 	/*
379 	 * Set up buffers, so they can be used to read disk labels.
380 	 */
381 	bufinit();
382 
383 	/*
384 	 * Configure the system.
385 	 */
386 	if (boothowto & RB_CONFIG) {
387 #ifdef BOOT_CONFIG
388 		user_config();
389 #else
390 		printf("kernel does not support -c; continuing..\n");
391 #endif
392 	}
393 	ioport_malloc_safe = 1;
394 
395 #ifndef SMALL_KERNEL
396 	cpu_ucode_setup();
397 #endif
398 
399 	/* enter the IDT and trampoline code in the u-k maps */
400 	enter_shared_special_pages();
401 
402 	/* initialize CPU0's TSS and GDT and put them in the u-k maps */
403 	cpu_enter_pages(&cpu_info_full_primary);
404 }
405 
406 void
407 enter_shared_special_pages(void)
408 {
409 	extern char __kutext_start[], __kutext_end[], __kernel_kutext_phys[];
410 	extern char __kudata_start[], __kudata_end[], __kernel_kudata_phys[];
411 	vaddr_t	va;
412 	paddr_t	pa;
413 
414 	/* idt */
415 	pmap_extract(pmap_kernel(), (vaddr_t)idt, &pa);
416 	pmap_enter_special((vaddr_t)idt, pa, PROT_READ, 0);
417 
418 	/* .kutext section */
419 	va = (vaddr_t)__kutext_start;
420 	pa = (paddr_t)__kernel_kutext_phys;
421 	while (va < (vaddr_t)__kutext_end) {
422 		pmap_enter_special(va, pa, PROT_READ | PROT_EXEC, 0);
423 		DPRINTF("%s: entered kutext page va 0x%08lx pa 0x%08lx\n",
424 		    __func__, (unsigned long)va, (unsigned long)pa);
425 		va += PAGE_SIZE;
426 		pa += PAGE_SIZE;
427 	}
428 
429 	/* .kudata section */
430 	va = (vaddr_t)__kudata_start;
431 	pa = (paddr_t)__kernel_kudata_phys;
432 	while (va < (vaddr_t)__kudata_end) {
433 		pmap_enter_special(va, pa, PROT_READ | PROT_WRITE, 0);
434 		DPRINTF("%s: entered kudata page va 0x%08lx pa 0x%08lx\n",
435 		    __func__, (unsigned long)va, (unsigned long)pa);
436 		va += PAGE_SIZE;
437 		pa += PAGE_SIZE;
438 	}
439 }
440 
441 /*
442  * Set up proc0's TSS
443  */
444 void
445 i386_proc0_tss_init(void)
446 {
447 	struct pcb *pcb;
448 
449 	curpcb = pcb = &proc0.p_addr->u_pcb;
450 	pcb->pcb_cr0 = rcr0();
451 	pcb->pcb_kstack = (int)proc0.p_addr + USPACE - 16;
452 	proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_kstack - 1;
453 }
454 
455 #ifdef MULTIPROCESSOR
456 void
457 i386_init_pcb_tss(struct cpu_info *ci)
458 {
459 	struct pcb *pcb = ci->ci_idle_pcb;
460 
461 	pcb->pcb_cr0 = rcr0();
462 }
463 #endif	/* MULTIPROCESSOR */
464 
465 /*
466  * Info for CTL_HW
467  */
468 char	cpu_model[120];
469 
470 const char *classnames[] = {
471 	"",
472 	"486",
473 	"586",
474 	"686"
475 };
476 
477 const char *modifiers[] = {
478 	"",
479 	"OverDrive ",
480 	"Dual ",
481 	""
482 };
483 
484 const struct cpu_cpuid_nameclass i386_cpuid_cpus[] = {
485 	{
486 		"GenuineIntel",
487 		CPUVENDOR_INTEL,
488 		"Intel",
489 		/* Family 4 */
490 		{ {
491 			CPUCLASS_486,
492 			{
493 				"486DX", "486DX", "486SX", "486DX2", "486SL",
494 				"486SX2", 0, "486DX2 W/B",
495 				"486DX4", 0, 0, 0, 0, 0, 0, 0,
496 				"486"		/* Default */
497 			},
498 			NULL
499 		},
500 		/* Family 5 */
501 		{
502 			CPUCLASS_586,
503 			{
504 				"Pentium (A-step)", "Pentium (P5)",
505 				"Pentium (P54C)", "Pentium (P24T)",
506 				"Pentium/MMX", "Pentium", 0,
507 				"Pentium (P54C)", "Pentium/MMX",
508 				0, 0, 0, 0, 0, 0, 0,
509 				"Pentium"	/* Default */
510 			},
511 			intel586_cpu_setup
512 		},
513 		/* Family 6 */
514 		{
515 			CPUCLASS_686,
516 			{
517 				"Pentium Pro", "Pentium Pro", 0,
518 				"Pentium II", "Pentium Pro",
519 				"Pentium II/Celeron",
520 				"Celeron",
521 				"Pentium III",
522 				"Pentium III",
523 				"Pentium M",
524 				"Pentium III Xeon",
525 				"Pentium III", 0,
526 				"Pentium M",
527 				"Core Duo/Solo", 0,
528 				"Pentium Pro, II or III"	/* Default */
529 			},
530 			intel686_cpu_setup
531 		},
532 		/* Family 7 */
533 		{
534 			CPUCLASS_686,
535 		} ,
536 		/* Family 8 */
537 		{
538 			CPUCLASS_686,
539 		} ,
540 		/* Family 9 */
541 		{
542 			CPUCLASS_686,
543 		} ,
544 		/* Family A */
545 		{
546 			CPUCLASS_686,
547 		} ,
548 		/* Family B */
549 		{
550 			CPUCLASS_686,
551 		} ,
552 		/* Family C */
553 		{
554 			CPUCLASS_686,
555 		} ,
556 		/* Family D */
557 		{
558 			CPUCLASS_686,
559 		} ,
560 		/* Family E */
561 		{
562 			CPUCLASS_686,
563 		} ,
564 		/* Family F */
565 		{
566 			CPUCLASS_686,
567 			{
568 				"Pentium 4", 0, 0, 0,
569 				0, 0, 0, 0,
570 				0, 0, 0, 0,
571 				0, 0, 0, 0,
572 				"Pentium 4"	/* Default */
573 			},
574 			intel686_p4_cpu_setup
575 		} }
576 	},
577 	{
578 		"AuthenticAMD",
579 		CPUVENDOR_AMD,
580 		"AMD",
581 		/* Family 4 */
582 		{ {
583 			CPUCLASS_486,
584 			{
585 				0, 0, 0, "Am486DX2 W/T",
586 				0, 0, 0, "Am486DX2 W/B",
587 				"Am486DX4 W/T or Am5x86 W/T 150",
588 				"Am486DX4 W/B or Am5x86 W/B 150", 0, 0,
589 				0, 0, "Am5x86 W/T 133/160",
590 				"Am5x86 W/B 133/160",
591 				"Am486 or Am5x86"	/* Default */
592 			},
593 			NULL
594 		},
595 		/* Family 5 */
596 		{
597 			CPUCLASS_586,
598 			{
599 				"K5", "K5", "K5", "K5", 0, 0, "K6",
600 				"K6", "K6-2", "K6-III", 0, 0, 0,
601 				"K6-2+/III+", 0, 0,
602 				"K5 or K6"		/* Default */
603 			},
604 			amd_family5_setup
605 		},
606 		/* Family 6 */
607 		{
608 			CPUCLASS_686,
609 			{
610 				0, "Athlon Model 1", "Athlon Model 2",
611 				"Duron Model 3",
612 				"Athlon Model 4",
613 				0, "Athlon XP Model 6",
614 				"Duron Model 7",
615 				"Athlon XP Model 8",
616 				0, "Athlon XP Model 10",
617 				0, 0, 0, 0, 0,
618 				"K7"		/* Default */
619 			},
620 			amd_family6_setup
621 		},
622 		/* Family 7 */
623 		{
624 			CPUCLASS_686,
625 		} ,
626 		/* Family 8 */
627 		{
628 			CPUCLASS_686,
629 		} ,
630 		/* Family 9 */
631 		{
632 			CPUCLASS_686,
633 		} ,
634 		/* Family A */
635 		{
636 			CPUCLASS_686,
637 		} ,
638 		/* Family B */
639 		{
640 			CPUCLASS_686,
641 		} ,
642 		/* Family C */
643 		{
644 			CPUCLASS_686,
645 		} ,
646 		/* Family D */
647 		{
648 			CPUCLASS_686,
649 		} ,
650 		/* Family E */
651 		{
652 			CPUCLASS_686,
653 		} ,
654 		/* Family F */
655 		{
656 			CPUCLASS_686,
657 			{
658 				0, 0, 0, 0, "Athlon64",
659 				"Opteron or Athlon64FX", 0, 0,
660 				0, 0, 0, 0, 0, 0, 0, 0,
661 				"AMD64"			/* DEFAULT */
662 			},
663 			amd_family6_setup
664 		} }
665 	},
666 	{
667 		"CyrixInstead",
668 		CPUVENDOR_CYRIX,
669 		"Cyrix",
670 		/* Family 4 */
671 		{ {
672 			CPUCLASS_486,
673 			{
674 				0, 0, 0, "MediaGX", 0, 0, 0, 0, "5x86", 0, 0,
675 				0, 0, 0, 0,
676 				"486 class"	/* Default */
677 			},
678 			NULL
679 		},
680 		/* Family 5 */
681 		{
682 			CPUCLASS_586,
683 			{
684 				0, 0, "6x86", 0, "GXm", 0, 0, 0, 0, 0,
685 				0, 0, 0, 0, 0, 0,
686 				"586 class"	/* Default */
687 			},
688 			cyrix6x86_cpu_setup
689 		},
690 		/* Family 6 */
691 		{
692 			CPUCLASS_686,
693 			{
694 				"6x86MX", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
695 				0, 0, 0, 0,
696 				"686 class"	/* Default */
697 			},
698 			NULL
699 		} }
700 	},
701 	{
702 		"CentaurHauls",
703 		CPUVENDOR_IDT,
704 		"IDT",
705 		/* Family 4, not available from IDT */
706 		{ {
707 			CPUCLASS_486,
708 			{
709 				0, 0, 0, 0, 0, 0, 0, 0,
710 				0, 0, 0, 0, 0, 0, 0, 0,
711 				"486 class"		/* Default */
712 			},
713 			NULL
714 		},
715 		/* Family 5 */
716 		{
717 			CPUCLASS_586,
718 			{
719 				0, 0, 0, 0, "WinChip C6", 0, 0, 0,
720 				"WinChip 2", "WinChip 3", 0, 0, 0, 0, 0, 0,
721 				"WinChip"		/* Default */
722 			},
723 			winchip_cpu_setup
724 		},
725 		/* Family 6 */
726 		{
727 			CPUCLASS_686,
728 			{
729 				0, 0, 0, 0, 0, 0,
730 				"C3 Samuel",
731 				"C3 Samuel 2/Ezra",
732 				"C3 Ezra-T",
733 				"C3 Nehemiah", "C3 Esther", 0, 0, 0, 0, 0,
734 				"C3"		/* Default */
735 			},
736 			cyrix3_cpu_setup
737 		} }
738 	},
739 	{
740 		"GenuineTMx86",
741 		CPUVENDOR_TRANSMETA,
742 		"Transmeta",
743 		/* Family 4, not available from Transmeta */
744 		{ {
745 			CPUCLASS_486,
746 			{
747 				0, 0, 0, 0, 0, 0, 0, 0,
748 				0, 0, 0, 0, 0, 0, 0, 0,
749 				"486 class"		/* Default */
750 			},
751 			NULL
752 		},
753 		/* Family 5 */
754 		{
755 			CPUCLASS_586,
756 			{
757 				0, 0, 0, 0, "TMS5x00", 0, 0,
758 				0, 0, 0, 0, 0, 0, 0, 0, 0,
759 				"TMS5x00"		/* Default */
760 			},
761 			tm86_cpu_setup
762 		},
763 		/* Family 6, not yet available from Transmeta */
764 		{
765 			CPUCLASS_686,
766 			{
767 				0, 0, 0, 0, 0, 0, 0, 0,
768 				0, 0, 0, 0, 0, 0, 0, 0,
769 				"686 class"		/* Default */
770 			},
771 			NULL
772 		},
773 		/* Family 7 */
774 		{
775 			CPUCLASS_686,
776 		} ,
777 		/* Family 8 */
778 		{
779 			CPUCLASS_686,
780 		} ,
781 		/* Family 9 */
782 		{
783 			CPUCLASS_686,
784 		} ,
785 		/* Family A */
786 		{
787 			CPUCLASS_686,
788 		} ,
789 		/* Family B */
790 		{
791 			CPUCLASS_686,
792 		} ,
793 		/* Family C */
794 		{
795 			CPUCLASS_686,
796 		} ,
797 		/* Family D */
798 		{
799 			CPUCLASS_686,
800 		} ,
801 		/* Family E */
802 		{
803 			CPUCLASS_686,
804 		} ,
805 		/* Family F */
806 		{
807 			/* Extended processor family - Transmeta Efficeon */
808 			CPUCLASS_686,
809 			{
810 				0, 0, "TM8000", "TM8000",
811 				0, 0, 0, 0,
812 				0, 0, 0, 0,
813 				0, 0, 0, 0,
814 				"TM8000"	/* Default */
815 			},
816 			tm86_cpu_setup
817 		} }
818 	},
819 	{
820 		"Geode by NSC",
821 		CPUVENDOR_NS,
822 		"National Semiconductor",
823 		/* Family 4, not available from National Semiconductor */
824 		{ {
825 			CPUCLASS_486,
826 			{
827 				0, 0, 0, 0, 0, 0, 0, 0,
828 				0, 0, 0, 0, 0, 0, 0, 0,
829 				"486 class"	/* Default */
830 			},
831 			NULL
832 		},
833 		/* Family 5 */
834 		{
835 			CPUCLASS_586,
836 			{
837 				0, 0, 0, 0, "Geode GX1", 0, 0, 0, 0, 0,
838 				0, 0, 0, 0, 0, 0,
839 				"586 class"	/* Default */
840 			},
841 			natsem6x86_cpu_setup
842 		} }
843 	},
844 	{
845 		"SiS SiS SiS ",
846 		CPUVENDOR_SIS,
847 		"SiS",
848 		/* Family 4, not available from SiS */
849 		{ {
850 			CPUCLASS_486,
851 			{
852 				0, 0, 0, 0, 0, 0, 0, 0,
853 				0, 0, 0, 0, 0, 0, 0, 0,
854 				"486 class"	/* Default */
855 			},
856 			NULL
857 		},
858 		/* Family 5 */
859 		{
860 			CPUCLASS_586,
861 			{
862 				"SiS55x", 0, 0, 0, 0, 0, 0, 0, 0, 0,
863 				0, 0, 0, 0, 0, 0,
864 				"586 class"	/* Default */
865 			},
866 			NULL
867 		} }
868 	}
869 };
870 
871 const struct cpu_cpuid_feature i386_cpuid_features[] = {
872 	{ CPUID_FPU,	"FPU" },
873 	{ CPUID_VME,	"V86" },
874 	{ CPUID_DE,	"DE" },
875 	{ CPUID_PSE,	"PSE" },
876 	{ CPUID_TSC,	"TSC" },
877 	{ CPUID_MSR,	"MSR" },
878 	{ CPUID_PAE,	"PAE" },
879 	{ CPUID_MCE,	"MCE" },
880 	{ CPUID_CX8,	"CX8" },
881 	{ CPUID_APIC,	"APIC" },
882 	{ CPUID_SYS1,	"SYS" },
883 	{ CPUID_SEP,	"SEP" },
884 	{ CPUID_MTRR,	"MTRR" },
885 	{ CPUID_PGE,	"PGE" },
886 	{ CPUID_MCA,	"MCA" },
887 	{ CPUID_CMOV,	"CMOV" },
888 	{ CPUID_PAT,	"PAT" },
889 	{ CPUID_PSE36,	"PSE36" },
890 	{ CPUID_PSN,	"PSN" },
891 	{ CPUID_CFLUSH,	"CFLUSH" },
892 	{ CPUID_DS,	"DS" },
893 	{ CPUID_ACPI,	"ACPI" },
894 	{ CPUID_MMX,	"MMX" },
895 	{ CPUID_FXSR,	"FXSR" },
896 	{ CPUID_SSE,	"SSE" },
897 	{ CPUID_SSE2,	"SSE2" },
898 	{ CPUID_SS,	"SS" },
899 	{ CPUID_HTT,	"HTT" },
900 	{ CPUID_TM,	"TM" },
901 	{ CPUID_PBE,	"PBE" }
902 };
903 
904 const struct cpu_cpuid_feature i386_ecpuid_features[] = {
905 	{ CPUID_MPC,		"MPC" },
906 	{ CPUID_NXE,		"NXE" },
907 	{ CPUID_MMXX,		"MMXX" },
908 	{ CPUID_FFXSR,		"FFXSR" },
909 	{ CPUID_PAGE1GB,	"PAGE1GB" },
910 	{ CPUID_RDTSCP,		"RDTSCP" },
911 	{ CPUID_LONG,		"LONG" },
912 	{ CPUID_3DNOW2,		"3DNOW2" },
913 	{ CPUID_3DNOW,		"3DNOW" }
914 };
915 
916 const struct cpu_cpuid_feature i386_cpuid_ecxfeatures[] = {
917 	{ CPUIDECX_SSE3,	"SSE3" },
918 	{ CPUIDECX_PCLMUL,	"PCLMUL" },
919 	{ CPUIDECX_DTES64,	"DTES64" },
920 	{ CPUIDECX_MWAIT,	"MWAIT" },
921 	{ CPUIDECX_DSCPL,	"DS-CPL" },
922 	{ CPUIDECX_VMX,		"VMX" },
923 	{ CPUIDECX_SMX,		"SMX" },
924 	{ CPUIDECX_EST,		"EST" },
925 	{ CPUIDECX_TM2,		"TM2" },
926 	{ CPUIDECX_SSSE3,	"SSSE3" },
927 	{ CPUIDECX_CNXTID,	"CNXT-ID" },
928 	{ CPUIDECX_SDBG,	"SDBG" },
929 	{ CPUIDECX_FMA3,	"FMA3" },
930 	{ CPUIDECX_CX16,	"CX16" },
931 	{ CPUIDECX_XTPR,	"xTPR" },
932 	{ CPUIDECX_PDCM,	"PDCM" },
933 	{ CPUIDECX_PCID,	"PCID" },
934 	{ CPUIDECX_DCA,		"DCA" },
935 	{ CPUIDECX_SSE41,	"SSE4.1" },
936 	{ CPUIDECX_SSE42,	"SSE4.2" },
937 	{ CPUIDECX_X2APIC,	"x2APIC" },
938 	{ CPUIDECX_MOVBE,	"MOVBE" },
939 	{ CPUIDECX_POPCNT,	"POPCNT" },
940 	{ CPUIDECX_DEADLINE,	"DEADLINE" },
941 	{ CPUIDECX_AES,		"AES" },
942 	{ CPUIDECX_XSAVE,	"XSAVE" },
943 	{ CPUIDECX_OSXSAVE,	"OSXSAVE" },
944 	{ CPUIDECX_AVX,		"AVX" },
945 	{ CPUIDECX_F16C,	"F16C" },
946 	{ CPUIDECX_RDRAND,	"RDRAND" },
947 	{ CPUIDECX_HV,		"HV" },
948 };
949 
950 const struct cpu_cpuid_feature i386_ecpuid_ecxfeatures[] = {
951 	{ CPUIDECX_LAHF,	"LAHF" },
952 	{ CPUIDECX_CMPLEG,	"CMPLEG" },
953 	{ CPUIDECX_SVM,		"SVM" },
954 	{ CPUIDECX_EAPICSP,	"EAPICSP" },
955 	{ CPUIDECX_AMCR8,	"AMCR8" },
956 	{ CPUIDECX_ABM,		"ABM" },
957 	{ CPUIDECX_SSE4A,	"SSE4A" },
958 	{ CPUIDECX_MASSE,	"MASSE" },
959 	{ CPUIDECX_3DNOWP,	"3DNOWP" },
960 	{ CPUIDECX_OSVW,	"OSVW" },
961 	{ CPUIDECX_IBS,		"IBS" },
962 	{ CPUIDECX_XOP,		"XOP" },
963 	{ CPUIDECX_SKINIT,	"SKINIT" },
964 	{ CPUIDECX_WDT,		"WDT" },
965 	{ CPUIDECX_LWP,		"LWP" },
966 	{ CPUIDECX_FMA4,	"FMA4" },
967 	{ CPUIDECX_TCE,		"TCE" },
968 	{ CPUIDECX_NODEID,	"NODEID" },
969 	{ CPUIDECX_TBM,		"TBM" },
970 	{ CPUIDECX_TOPEXT,	"TOPEXT" },
971 	{ CPUIDECX_CPCTR,	"CPCTR" },
972 	{ CPUIDECX_DBKP,	"DBKP" },
973 	{ CPUIDECX_PERFTSC,	"PERFTSC" },
974 	{ CPUIDECX_PCTRL3,	"PCTRL3" },
975 	{ CPUIDECX_MWAITX,	"MWAITX" },
976 };
977 
978 const struct cpu_cpuid_feature cpu_seff0_ebxfeatures[] = {
979 	{ SEFF0EBX_FSGSBASE,	"FSGSBASE" },
980 	{ SEFF0EBX_TSC_ADJUST,	"TSC_ADJUST" },
981 	{ SEFF0EBX_SGX,		"SGX" },
982 	{ SEFF0EBX_BMI1,	"BMI1" },
983 	{ SEFF0EBX_HLE,		"HLE" },
984 	{ SEFF0EBX_AVX2,	"AVX2" },
985 	{ SEFF0EBX_SMEP,	"SMEP" },
986 	{ SEFF0EBX_BMI2,	"BMI2" },
987 	{ SEFF0EBX_ERMS,	"ERMS" },
988 	{ SEFF0EBX_INVPCID,	"INVPCID" },
989 	{ SEFF0EBX_RTM,		"RTM" },
990 	{ SEFF0EBX_PQM,		"PQM" },
991 	{ SEFF0EBX_MPX,		"MPX" },
992 	{ SEFF0EBX_AVX512F,	"AVX512F" },
993 	{ SEFF0EBX_AVX512DQ,	"AVX512DQ" },
994 	{ SEFF0EBX_RDSEED,	"RDSEED" },
995 	{ SEFF0EBX_ADX,		"ADX" },
996 	{ SEFF0EBX_SMAP,	"SMAP" },
997 	{ SEFF0EBX_AVX512IFMA,	"AVX512IFMA" },
998 	{ SEFF0EBX_PCOMMIT,	"PCOMMIT" },
999 	{ SEFF0EBX_CLFLUSHOPT,	"CLFLUSHOPT" },
1000 	{ SEFF0EBX_CLWB,	"CLWB" },
1001 	{ SEFF0EBX_PT,		"PT" },
1002 	{ SEFF0EBX_AVX512PF,	"AVX512PF" },
1003 	{ SEFF0EBX_AVX512ER,	"AVX512ER" },
1004 	{ SEFF0EBX_AVX512CD,	"AVX512CD" },
1005 	{ SEFF0EBX_SHA,		"SHA" },
1006 	{ SEFF0EBX_AVX512BW,	"AVX512BW" },
1007 	{ SEFF0EBX_AVX512VL,	"AVX512VL" },
1008 };
1009 
1010 const struct cpu_cpuid_feature cpu_seff0_ecxfeatures[] = {
1011 	{ SEFF0ECX_PREFETCHWT1,	"PREFETCHWT1" },
1012 	{ SEFF0ECX_UMIP,	"UMIP" },
1013 	{ SEFF0ECX_AVX512VBMI,	"AVX512VBMI" },
1014 	{ SEFF0ECX_PKU,		"PKU" },
1015 	{ SEFF0ECX_WAITPKG,	"WAITPKG" },
1016 };
1017 
1018 const struct cpu_cpuid_feature cpu_seff0_edxfeatures[] = {
1019 	{ SEFF0EDX_AVX512_4FNNIW, "AVX512FNNIW" },
1020 	{ SEFF0EDX_AVX512_4FMAPS, "AVX512FMAPS" },
1021 	{ SEFF0EDX_SRBDS_CTRL,	"SRBDS_CTRL" },
1022 	{ SEFF0EDX_MD_CLEAR,	"MD_CLEAR" },
1023 	{ SEFF0EDX_TSXFA,	"TSXFA" },
1024 	{ SEFF0EDX_IBRS,	"IBRS,IBPB" },
1025 	{ SEFF0EDX_STIBP,	"STIBP" },
1026 	{ SEFF0EDX_L1DF,	"L1DF" },
1027 	 /* SEFF0EDX_ARCH_CAP (not printed) */
1028 	{ SEFF0EDX_SSBD,	"SSBD" },
1029 };
1030 
1031 const struct cpu_cpuid_feature cpu_tpm_eaxfeatures[] = {
1032 	{ TPM_SENSOR,		"SENSOR" },
1033 	{ TPM_ARAT,		"ARAT" },
1034 };
1035 
1036 const struct cpu_cpuid_feature i386_cpuid_eaxperf[] = {
1037 	{ CPUIDEAX_VERID,	"PERF" },
1038 };
1039 
1040 const struct cpu_cpuid_feature i386_cpuid_edxapmi[] = {
1041 	{ CPUIDEDX_ITSC,	"ITSC" },
1042 };
1043 
1044 const struct cpu_cpuid_feature cpu_xsave_extfeatures[] = {
1045 	{ XSAVE_XSAVEOPT,	"XSAVEOPT" },
1046 	{ XSAVE_XSAVEC,		"XSAVEC" },
1047 	{ XSAVE_XGETBV1,	"XGETBV1" },
1048 	{ XSAVE_XSAVES,		"XSAVES" },
1049 };
1050 
1051 void
1052 winchip_cpu_setup(struct cpu_info *ci)
1053 {
1054 
1055 	switch ((ci->ci_signature >> 4) & 15) { /* model */
1056 	case 4: /* WinChip C6 */
1057 		ci->ci_feature_flags &= ~CPUID_TSC;
1058 		/* Disable RDTSC instruction from user-level. */
1059 		lcr4(rcr4() | CR4_TSD);
1060 		printf("%s: TSC disabled\n", ci->ci_dev->dv_xname);
1061 		break;
1062 	}
1063 }
1064 
1065 #if !defined(SMALL_KERNEL)
1066 void
1067 cyrix3_setperf_setup(struct cpu_info *ci)
1068 {
1069 	if (cpu_ecxfeature & CPUIDECX_EST) {
1070 		if (rdmsr(MSR_MISC_ENABLE) & (1 << 16))
1071 			est_init(ci, CPUVENDOR_VIA);
1072 		else
1073 			printf("%s: Enhanced SpeedStep disabled by BIOS\n",
1074 			    ci->ci_dev->dv_xname);
1075 	}
1076 }
1077 #endif
1078 
1079 void
1080 cyrix3_cpu_setup(struct cpu_info *ci)
1081 {
1082 	int model = (ci->ci_signature >> 4) & 15;
1083 	int step = ci->ci_signature & 15;
1084 
1085 	u_int64_t msreg;
1086 	u_int32_t regs[4];
1087 	unsigned int val;
1088 #if !defined(SMALL_KERNEL)
1089 	extern void (*pagezero)(void *, size_t);
1090 	extern void i686_pagezero(void *, size_t);
1091 
1092 	pagezero = i686_pagezero;
1093 
1094 	setperf_setup = cyrix3_setperf_setup;
1095 #endif
1096 
1097 	switch (model) {
1098 	/* Possible earlier models */
1099 	case 0: case 1: case 2:
1100 	case 3: case 4: case 5:
1101 		break;
1102 
1103 	case 6: /* C3 Samuel 1 */
1104 	case 7: /* C3 Samuel 2 or C3 Ezra */
1105 	case 8: /* C3 Ezra-T */
1106 		cpuid(0x80000001, regs);
1107 		val = regs[3];
1108 		if (val & (1U << 31)) {
1109 			cpu_feature |= CPUID_3DNOW;
1110 		} else {
1111 			cpu_feature &= ~CPUID_3DNOW;
1112 		}
1113 		break;
1114 
1115 	case 9:
1116 		if (step < 3)
1117 			break;
1118 		/*
1119 		 * C3 Nehemiah & later: fall through.
1120 		 */
1121 
1122 	case 10: /* C7-M Type A */
1123 	case 13: /* C7-M Type D */
1124 	case 15: /* Nano */
1125 #if !defined(SMALL_KERNEL)
1126 		if (CPU_IS_PRIMARY(ci) &&
1127 		    (model == 10 || model == 13 || model == 15)) {
1128 			/* Setup the sensors structures */
1129 			strlcpy(ci->ci_sensordev.xname, ci->ci_dev->dv_xname,
1130 			    sizeof(ci->ci_sensordev.xname));
1131 			ci->ci_sensor.type = SENSOR_TEMP;
1132 			sensor_task_register(ci, via_update_sensor, 5);
1133 			sensor_attach(&ci->ci_sensordev, &ci->ci_sensor);
1134 			sensordev_install(&ci->ci_sensordev);
1135 		}
1136 #endif
1137 
1138 	default:
1139 		/*
1140 		 * C3 Nehemiah/Esther & later models:
1141 		 * First we check for extended feature flags, and then
1142 		 * (if present) retrieve the ones at 0xC0000001.  In this
1143 		 * bit 2 tells us if the RNG is present.  Bit 3 tells us
1144 		 * if the RNG has been enabled.  In order to use the RNG
1145 		 * we need 3 things:  We need an RNG, we need the FXSR bit
1146 		 * enabled in cr4 (SSE/SSE2 stuff), and we need to have
1147 		 * Bit 6 of MSR 0x110B set to 1 (the default), which will
1148 		 * show up as bit 3 set here.
1149 		 */
1150 		cpuid(0xC0000000, regs); /* Check for RNG */
1151 		val = regs[0];
1152 		if (val >= 0xC0000001) {
1153 			cpuid(0xC0000001, regs);
1154 			val = regs[3];
1155 		} else
1156 			val = 0;
1157 
1158 		if (val & (C3_CPUID_HAS_RNG | C3_CPUID_HAS_ACE))
1159 			printf("%s:", ci->ci_dev->dv_xname);
1160 
1161 		/* Enable RNG if present and disabled */
1162 		if (val & C3_CPUID_HAS_RNG) {
1163 			extern int viac3_rnd_present;
1164 
1165 			if (!(val & C3_CPUID_DO_RNG)) {
1166 				msreg = rdmsr(0x110B);
1167 				msreg |= 0x40;
1168 				wrmsr(0x110B, msreg);
1169 			}
1170 			viac3_rnd_present = 1;
1171 			printf(" RNG");
1172 		}
1173 
1174 		/* Enable AES engine if present and disabled */
1175 		if (val & C3_CPUID_HAS_ACE) {
1176 #ifdef CRYPTO
1177 			if (!(val & C3_CPUID_DO_ACE)) {
1178 				msreg = rdmsr(0x1107);
1179 				msreg |= (0x01 << 28);
1180 				wrmsr(0x1107, msreg);
1181 			}
1182 			i386_has_xcrypt |= C3_HAS_AES;
1183 #endif /* CRYPTO */
1184 			printf(" AES");
1185 		}
1186 
1187 		/* Enable ACE2 engine if present and disabled */
1188 		if (val & C3_CPUID_HAS_ACE2) {
1189 #ifdef CRYPTO
1190 			if (!(val & C3_CPUID_DO_ACE2)) {
1191 				msreg = rdmsr(0x1107);
1192 				msreg |= (0x01 << 28);
1193 				wrmsr(0x1107, msreg);
1194 			}
1195 			i386_has_xcrypt |= C3_HAS_AESCTR;
1196 #endif /* CRYPTO */
1197 			printf(" AES-CTR");
1198 		}
1199 
1200 		/* Enable SHA engine if present and disabled */
1201 		if (val & C3_CPUID_HAS_PHE) {
1202 #ifdef CRYPTO
1203 			if (!(val & C3_CPUID_DO_PHE)) {
1204 				msreg = rdmsr(0x1107);
1205 				msreg |= (0x01 << 28/**/);
1206 				wrmsr(0x1107, msreg);
1207 			}
1208 			i386_has_xcrypt |= C3_HAS_SHA;
1209 #endif /* CRYPTO */
1210 			printf(" SHA1 SHA256");
1211 		}
1212 
1213 		/* Enable MM engine if present and disabled */
1214 		if (val & C3_CPUID_HAS_PMM) {
1215 #ifdef CRYPTO
1216 			if (!(val & C3_CPUID_DO_PMM)) {
1217 				msreg = rdmsr(0x1107);
1218 				msreg |= (0x01 << 28/**/);
1219 				wrmsr(0x1107, msreg);
1220 			}
1221 			i386_has_xcrypt |= C3_HAS_MM;
1222 #endif /* CRYPTO */
1223 			printf(" RSA");
1224 		}
1225 
1226 		printf("\n");
1227 		break;
1228 	}
1229 }
1230 
1231 #if !defined(SMALL_KERNEL)
1232 void
1233 via_update_sensor(void *args)
1234 {
1235 	struct cpu_info *ci = (struct cpu_info *) args;
1236 	u_int64_t msr;
1237 
1238 	switch (ci->ci_model) {
1239 	case 0xa:
1240 	case 0xd:
1241 		msr = rdmsr(MSR_C7M_TMTEMPERATURE);
1242 		break;
1243 	case 0xf:
1244 		msr = rdmsr(MSR_CENT_TMTEMPERATURE);
1245 		break;
1246 	}
1247 	ci->ci_sensor.value = (msr & 0xffffff);
1248 	/* micro degrees */
1249 	ci->ci_sensor.value *= 1000000;
1250 	ci->ci_sensor.value += 273150000;
1251 	ci->ci_sensor.flags &= ~SENSOR_FINVALID;
1252 }
1253 #endif
1254 
1255 void
1256 cyrix6x86_cpu_setup(struct cpu_info *ci)
1257 {
1258 	extern int clock_broken_latch;
1259 
1260 	switch ((ci->ci_signature >> 4) & 15) { /* model */
1261 	case -1: /* M1 w/o cpuid */
1262 	case 2:	/* M1 */
1263 		/* set up various cyrix registers */
1264 		/* Enable suspend on halt */
1265 		cyrix_write_reg(0xc2, cyrix_read_reg(0xc2) | 0x08);
1266 		/* enable access to ccr4/ccr5 */
1267 		cyrix_write_reg(0xC3, cyrix_read_reg(0xC3) | 0x10);
1268 		/* cyrix's workaround  for the "coma bug" */
1269 		cyrix_write_reg(0x31, cyrix_read_reg(0x31) | 0xf8);
1270 		cyrix_write_reg(0x32, cyrix_read_reg(0x32) | 0x7f);
1271 		cyrix_read_reg(0x33); cyrix_write_reg(0x33, 0);
1272 		cyrix_write_reg(0x3c, cyrix_read_reg(0x3c) | 0x87);
1273 		/* disable access to ccr4/ccr5 */
1274 		cyrix_write_reg(0xC3, cyrix_read_reg(0xC3) & ~0x10);
1275 
1276 		printf("%s: xchg bug workaround performed\n",
1277 		    ci->ci_dev->dv_xname);
1278 		break;	/* fallthrough? */
1279 	case 4:	/* GXm */
1280 		/* Unset the TSC bit until calibrate_delay() gets fixed. */
1281 		clock_broken_latch = 1;
1282 		curcpu()->ci_feature_flags &= ~CPUID_TSC;
1283 		printf("%s: TSC disabled\n", ci->ci_dev->dv_xname);
1284 		break;
1285 	}
1286 }
1287 
1288 void
1289 natsem6x86_cpu_setup(struct cpu_info *ci)
1290 {
1291 	extern int clock_broken_latch;
1292 	int model = (ci->ci_signature >> 4) & 15;
1293 
1294 	clock_broken_latch = 1;
1295 	switch (model) {
1296 	case 4:
1297 		cpu_feature &= ~CPUID_TSC;
1298 		printf("%s: TSC disabled\n", ci->ci_dev->dv_xname);
1299 		break;
1300 	}
1301 }
1302 
1303 void
1304 intel586_cpu_setup(struct cpu_info *ci)
1305 {
1306 	if (!cpu_f00f_bug) {
1307 		fix_f00f();
1308 		printf("%s: F00F bug workaround installed\n",
1309 		    ci->ci_dev->dv_xname);
1310 	}
1311 }
1312 
1313 #if !defined(SMALL_KERNEL)
1314 void
1315 amd_family5_setperf_setup(struct cpu_info *ci)
1316 {
1317 	k6_powernow_init();
1318 }
1319 #endif
1320 
1321 void
1322 amd_family5_setup(struct cpu_info *ci)
1323 {
1324 	int model = (ci->ci_signature >> 4) & 15;
1325 
1326 	switch (model) {
1327 	case 0:		/* AMD-K5 Model 0 */
1328 		/*
1329 		 * According to the AMD Processor Recognition App Note,
1330 		 * the AMD-K5 Model 0 uses the wrong bit to indicate
1331 		 * support for global PTEs, instead using bit 9 (APIC)
1332 		 * rather than bit 13 (i.e. "0x200" vs. 0x2000".  Oops!).
1333 		 */
1334 		if (cpu_feature & CPUID_APIC)
1335 			cpu_feature = (cpu_feature & ~CPUID_APIC) | CPUID_PGE;
1336 		/*
1337 		 * XXX But pmap_pg_g is already initialized -- need to kick
1338 		 * XXX the pmap somehow.  How does the MP branch do this?
1339 		 */
1340 		break;
1341 	case 12:
1342 	case 13:
1343 #if !defined(SMALL_KERNEL)
1344 		setperf_setup = amd_family5_setperf_setup;
1345 #endif
1346 		break;
1347 	}
1348 }
1349 
1350 #if !defined(SMALL_KERNEL)
1351 void
1352 amd_family6_setperf_setup(struct cpu_info *ci)
1353 {
1354 	int family = (ci->ci_signature >> 8) & 15;
1355 
1356 	switch (family) {
1357 	case 6:
1358 		k7_powernow_init();
1359 		break;
1360 	case 15:
1361 		k8_powernow_init();
1362 		break;
1363 	}
1364 	if (ci->ci_family >= 0x10)
1365 		k1x_init(ci);
1366 }
1367 #endif
1368 
1369 void
1370 amd_family6_setup(struct cpu_info *ci)
1371 {
1372 #if !defined(SMALL_KERNEL)
1373 	int family = (ci->ci_signature >> 8) & 15;
1374 	extern void (*pagezero)(void *, size_t);
1375 	extern void sse2_pagezero(void *, size_t);
1376 	extern void i686_pagezero(void *, size_t);
1377 
1378 	if (cpu_feature & CPUID_SSE2)
1379 		pagezero = sse2_pagezero;
1380 	else
1381 		pagezero = i686_pagezero;
1382 
1383 	setperf_setup = amd_family6_setperf_setup;
1384 
1385 	if (family == 0xf) {
1386 		amd64_errata(ci);
1387 	}
1388 #endif
1389 }
1390 
1391 #if !defined(SMALL_KERNEL)
1392 /*
1393  * Temperature read on the CPU is relative to the maximum
1394  * temperature supported by the CPU, Tj(Max).
1395  * Refer to:
1396  * 64-ia-32-architectures-software-developer-vol-3c-part-3-manual.pdf
1397  * Section 35 and
1398  * http://www.intel.com/content/dam/www/public/us/en/documents/
1399  * white-papers/cpu-monitoring-dts-peci-paper.pdf
1400  *
1401  * The temperature on Intel CPUs can be between 70 and 105 degC, since
1402  * Westmere we can read the TJmax from the die. For older CPUs we have
1403  * to guess or use undocumented MSRs. Then we subtract the temperature
1404  * portion of thermal status from max to get current temperature.
1405  */
1406 void
1407 intelcore_update_sensor(void *args)
1408 {
1409 	struct cpu_info *ci = (struct cpu_info *) args;
1410 	u_int64_t msr;
1411 	int max = 100;
1412 
1413 	/* Only some Core family chips have MSR_TEMPERATURE_TARGET. */
1414 	if (ci->ci_model == 0x0e &&
1415 	    (rdmsr(MSR_TEMPERATURE_TARGET_UNDOCUMENTED) &
1416 	     MSR_TEMPERATURE_TARGET_LOW_BIT_UNDOCUMENTED))
1417 		max = 85;
1418 
1419 	/*
1420 	 * Newer CPUs can tell you what their max temperature is.
1421 	 * See: '64-ia-32-architectures-software-developer-
1422 	 * vol-3c-part-3-manual.pdf'
1423 	 */
1424 	if (ci->ci_model > 0x17 && ci->ci_model != 0x1c &&
1425 	    ci->ci_model != 0x26 && ci->ci_model != 0x27 &&
1426 	    ci->ci_model != 0x35 && ci->ci_model != 0x36)
1427 		max = MSR_TEMPERATURE_TARGET_TJMAX(
1428 		    rdmsr(MSR_TEMPERATURE_TARGET));
1429 
1430 	msr = rdmsr(MSR_THERM_STATUS);
1431 	if (msr & MSR_THERM_STATUS_VALID_BIT) {
1432 		ci->ci_sensor.value = max - MSR_THERM_STATUS_TEMP(msr);
1433 		/* micro degrees */
1434 		ci->ci_sensor.value *= 1000000;
1435 		/* kelvin */
1436 		ci->ci_sensor.value += 273150000;
1437 		ci->ci_sensor.flags &= ~SENSOR_FINVALID;
1438 	} else {
1439 		ci->ci_sensor.value = 0;
1440 		ci->ci_sensor.flags |= SENSOR_FINVALID;
1441 	}
1442 }
1443 
1444 void
1445 intel686_cpusensors_setup(struct cpu_info *ci)
1446 {
1447 	if (!CPU_IS_PRIMARY(ci) || (ci->ci_feature_tpmflags & TPM_SENSOR) == 0)
1448 		return;
1449 
1450 	/* Setup the sensors structures */
1451 	strlcpy(ci->ci_sensordev.xname, ci->ci_dev->dv_xname,
1452 	    sizeof(ci->ci_sensordev.xname));
1453 	ci->ci_sensor.type = SENSOR_TEMP;
1454 	sensor_task_register(ci, intelcore_update_sensor, 5);
1455 	sensor_attach(&ci->ci_sensordev, &ci->ci_sensor);
1456 	sensordev_install(&ci->ci_sensordev);
1457 }
1458 #endif
1459 
1460 #if !defined(SMALL_KERNEL)
1461 void
1462 intel686_setperf_setup(struct cpu_info *ci)
1463 {
1464 	int family = (ci->ci_signature >> 8) & 15;
1465 	int step = ci->ci_signature & 15;
1466 
1467 	if (cpu_ecxfeature & CPUIDECX_EST) {
1468 		if (rdmsr(MSR_MISC_ENABLE) & (1 << 16))
1469 			est_init(ci, CPUVENDOR_INTEL);
1470 		else
1471 			printf("%s: Enhanced SpeedStep disabled by BIOS\n",
1472 			    ci->ci_dev->dv_xname);
1473 	} else if ((cpu_feature & (CPUID_ACPI | CPUID_TM)) ==
1474 	    (CPUID_ACPI | CPUID_TM))
1475 		p4tcc_init(family, step);
1476 }
1477 #endif
1478 
1479 void
1480 intel686_common_cpu_setup(struct cpu_info *ci)
1481 {
1482 
1483 #if !defined(SMALL_KERNEL)
1484 	setperf_setup = intel686_setperf_setup;
1485 	cpusensors_setup = intel686_cpusensors_setup;
1486 	{
1487 	extern void (*pagezero)(void *, size_t);
1488 	extern void sse2_pagezero(void *, size_t);
1489 	extern void i686_pagezero(void *, size_t);
1490 
1491 	if (cpu_feature & CPUID_SSE2)
1492 		pagezero = sse2_pagezero;
1493 	else
1494 		pagezero = i686_pagezero;
1495 	}
1496 #endif
1497 	/*
1498 	 * Make sure SYSENTER is disabled.
1499 	 */
1500 	if (cpu_feature & CPUID_SEP)
1501 		wrmsr(MSR_SYSENTER_CS, 0);
1502 }
1503 
1504 void
1505 intel686_cpu_setup(struct cpu_info *ci)
1506 {
1507 	int model = (ci->ci_signature >> 4) & 15;
1508 	int step = ci->ci_signature & 15;
1509 	u_quad_t msr119;
1510 
1511 	intel686_common_cpu_setup(ci);
1512 
1513 	/*
1514 	 * Original PPro returns SYSCALL in CPUID but is non-functional.
1515 	 * From Intel Application Note #485.
1516 	 */
1517 	if ((model == 1) && (step < 3))
1518 		ci->ci_feature_flags &= ~CPUID_SEP;
1519 
1520 	/*
1521 	 * Disable the Pentium3 serial number.
1522 	 */
1523 	if ((model == 7) && (ci->ci_feature_flags & CPUID_PSN)) {
1524 		msr119 = rdmsr(MSR_BBL_CR_CTL);
1525 		msr119 |= 0x0000000000200000LL;
1526 		wrmsr(MSR_BBL_CR_CTL, msr119);
1527 
1528 		printf("%s: disabling processor serial number\n",
1529 			 ci->ci_dev->dv_xname);
1530 		ci->ci_feature_flags &= ~CPUID_PSN;
1531 		ci->ci_level = 2;
1532 	}
1533 
1534 #if !defined(SMALL_KERNEL)
1535 	p3_early = (model == 8 && step == 1) ? 1 : 0;
1536 	update_cpuspeed = p3_update_cpuspeed;
1537 #endif
1538 }
1539 
1540 void
1541 intel686_p4_cpu_setup(struct cpu_info *ci)
1542 {
1543 	intel686_common_cpu_setup(ci);
1544 
1545 #if !defined(SMALL_KERNEL)
1546 	update_cpuspeed = p4_update_cpuspeed;
1547 #endif
1548 }
1549 
1550 void
1551 tm86_cpu_setup(struct cpu_info *ci)
1552 {
1553 #if !defined(SMALL_KERNEL)
1554 	longrun_init();
1555 #endif
1556 }
1557 
1558 char *
1559 intel686_cpu_name(int model)
1560 {
1561 	char *ret = NULL;
1562 
1563 	switch (model) {
1564 	case 5:
1565 		switch (cpu_cache_edx & 0xFF) {
1566 		case 0x40:
1567 		case 0x41:
1568 			ret = "Celeron";
1569 			break;
1570 		/* 0x42 should not exist in this model. */
1571 		case 0x43:
1572 			ret = "Pentium II";
1573 			break;
1574 		case 0x44:
1575 		case 0x45:
1576 			ret = "Pentium II Xeon";
1577 			break;
1578 		}
1579 		break;
1580 	case 7:
1581 		switch (cpu_cache_edx & 0xFF) {
1582 		/* 0x40 - 0x42 should not exist in this model. */
1583 		case 0x43:
1584 			ret = "Pentium III";
1585 			break;
1586 		case 0x44:
1587 		case 0x45:
1588 			ret = "Pentium III Xeon";
1589 			break;
1590 		}
1591 		break;
1592 	}
1593 
1594 	return (ret);
1595 }
1596 
1597 char *
1598 cyrix3_cpu_name(int model, int step)
1599 {
1600 	char	*name = NULL;
1601 
1602 	switch (model) {
1603 	case 7:
1604 		if (step < 8)
1605 			name = "C3 Samuel 2";
1606 		else
1607 			name = "C3 Ezra";
1608 		break;
1609 	}
1610 	return name;
1611 }
1612 
1613 /*
1614  * Print identification for the given CPU.
1615  * XXX XXX
1616  * This is not as clean as one might like, because it references
1617  *
1618  * the "cpuid_level" and "cpu_vendor" globals.
1619  * cpuid_level isn't so bad, since both CPU's will hopefully
1620  * be of the same level.
1621  *
1622  * The Intel multiprocessor spec doesn't give us the cpu_vendor
1623  * information; however, the chance of multi-vendor SMP actually
1624  * ever *working* is sufficiently low that it's probably safe to assume
1625  * all processors are of the same vendor.
1626  */
1627 void
1628 identifycpu(struct cpu_info *ci)
1629 {
1630 	const char *name, *modifier, *vendorname, *token;
1631 	int class = CPUCLASS_486, vendor, i, max;
1632 	int family, model, step, modif, cachesize;
1633 	const struct cpu_cpuid_nameclass *cpup = NULL;
1634 	char *brandstr_from, *brandstr_to;
1635 	char *cpu_device = ci->ci_dev->dv_xname;
1636 	int skipspace;
1637 	extern uint32_t cpu_meltdown;
1638 	uint64_t msr, nmsr;
1639 
1640 	if (cpuid_level == -1) {
1641 		name = "486DX";
1642 		vendor = CPUVENDOR_INTEL;
1643 		vendorname = "Intel";
1644 		model = -1;
1645 		step = -1;
1646 		class = CPUCLASS_486;
1647 		ci->cpu_setup = NULL;
1648 		modifier = "";
1649 		token = "";
1650 	} else {
1651 		max = sizeof (i386_cpuid_cpus) / sizeof (i386_cpuid_cpus[0]);
1652 		modif = (ci->ci_signature >> 12) & 3;
1653 		family = (ci->ci_signature >> 8) & 15;
1654 		ci->ci_family = family;
1655 		model = (ci->ci_signature >> 4) & 15;
1656 		ci->ci_model = model;
1657 		step = ci->ci_signature & 15;
1658 #ifdef CPUDEBUG
1659 		printf("%s: cpuid level %d cache eax %x ebx %x ecx %x edx %x\n",
1660 		    cpu_device, cpuid_level, cpu_cache_eax, cpu_cache_ebx,
1661 		    cpu_cache_ecx, cpu_cache_edx);
1662 #endif
1663 		if (family < CPU_MINFAMILY)
1664 			panic("identifycpu: strange family value");
1665 
1666 		for (i = 0; i < max; i++) {
1667 			if (!strncmp(cpu_vendor,
1668 			    i386_cpuid_cpus[i].cpu_id, 12)) {
1669 				cpup = &i386_cpuid_cpus[i];
1670 				break;
1671 			}
1672 		}
1673 
1674 		if (cpup == NULL) {
1675 			vendor = CPUVENDOR_UNKNOWN;
1676 			if (cpu_vendor[0] != '\0')
1677 				vendorname = &cpu_vendor[0];
1678 			else
1679 				vendorname = "Unknown";
1680 			if (family > CPU_MAXFAMILY)
1681 				family = CPU_MAXFAMILY;
1682 			class = family - 3;
1683 			if (class > CPUCLASS_686)
1684 				class = CPUCLASS_686;
1685 			modifier = "";
1686 			name = "";
1687 			token = "";
1688 			ci->cpu_setup = NULL;
1689 		} else {
1690 			token = cpup->cpu_id;
1691 			vendor = cpup->cpu_vendor;
1692 			vendorname = cpup->cpu_vendorname;
1693 			/*
1694 			 * Special hack for the VIA C3 series.
1695 			 *
1696 			 * VIA bought Centaur Technology from IDT in Aug 1999
1697 			 * and marketed the processors as VIA Cyrix III/C3.
1698 			 */
1699 			if (vendor == CPUVENDOR_IDT && family >= 6) {
1700 				vendor = CPUVENDOR_VIA;
1701 				vendorname = "VIA";
1702 			}
1703 			modifier = modifiers[modif];
1704 			if (family > CPU_MAXFAMILY) {
1705 				family = CPU_MAXFAMILY;
1706 				model = CPU_DEFMODEL;
1707 			} else if (model > CPU_MAXMODEL)
1708 				model = CPU_DEFMODEL;
1709 			i = family - CPU_MINFAMILY;
1710 
1711 			/* store extended family/model values for later use */
1712 			if ((vendor == CPUVENDOR_INTEL &&
1713 			    (family == 0x6 || family == 0xf)) ||
1714 			    (vendor == CPUVENDOR_AMD && family == 0xf)) {
1715 				ci->ci_family += (ci->ci_signature >> 20) &
1716 				    0xff;
1717 				ci->ci_model += ((ci->ci_signature >> 16) &
1718 				    0x0f) << 4;
1719 			}
1720 
1721 			/* Special hack for the PentiumII/III series. */
1722 			if (vendor == CPUVENDOR_INTEL && family == 6 &&
1723 			    (model == 5 || model == 7)) {
1724 				name = intel686_cpu_name(model);
1725 			/* Special hack for the VIA C3 series. */
1726 			} else if (vendor == CPUVENDOR_VIA && family == 6 &&
1727 			    model == 7) {
1728 				name = cyrix3_cpu_name(model, step);
1729 			/* Special hack for the TMS5x00 series. */
1730 			} else if (vendor == CPUVENDOR_TRANSMETA &&
1731 			    family == 5 && model == 4) {
1732 				name = tm86_cpu_name(model);
1733 			} else
1734 				name = cpup->cpu_family[i].cpu_models[model];
1735 			if (name == NULL) {
1736 				name = cpup->cpu_family[i].cpu_models[CPU_DEFMODEL];
1737 				if (name == NULL)
1738 					name = "";
1739 			}
1740 			class = cpup->cpu_family[i].cpu_class;
1741 			ci->cpu_setup = cpup->cpu_family[i].cpu_setup;
1742 		}
1743 	}
1744 
1745 	/* Find the amount of on-chip L2 cache. */
1746 	cachesize = -1;
1747 	if (vendor == CPUVENDOR_INTEL && cpuid_level >= 2 && family < 0xf) {
1748 		int intel_cachetable[] = { 0, 128, 256, 512, 1024, 2048 };
1749 
1750 		if ((cpu_cache_edx & 0xFF) >= 0x40 &&
1751 		    (cpu_cache_edx & 0xFF) <= 0x45)
1752 			cachesize = intel_cachetable[(cpu_cache_edx & 0xFF) - 0x40];
1753 	} else if (vendor == CPUVENDOR_AMD && class == CPUCLASS_686) {
1754 		u_int regs[4];
1755 		cpuid(0x80000000, regs);
1756 
1757 		if (regs[0] >= 0x80000006) {
1758 			cpuid(0x80000006, regs);
1759 			cachesize = (regs[2] >> 16);
1760 		}
1761 	}
1762 
1763 	if (ci->ci_feature_flags & CPUID_CFLUSH) {
1764 		u_int regs[4];
1765 
1766 		/* to get the cacheline size you must do cpuid
1767 		 * with eax 0x01
1768 		 */
1769 
1770 		cpuid(0x01, regs);
1771 		ci->ci_cflushsz = ((regs[1] >> 8) & 0xff) * 8;
1772 	}
1773 
1774 	if (vendor == CPUVENDOR_INTEL) {
1775 		/*
1776 		 * PIII, Core Solo and Core Duo CPUs have known
1777 		 * errata stating:
1778 		 * "Page with PAT set to WC while associated MTRR is UC
1779 		 * may consolidate to UC".
1780 		 * Because of this it is best we just fallback to mtrrs
1781 		 * in this case.
1782 		 */
1783 		if (ci->ci_family == 6 && ci->ci_model < 15)
1784 		    ci->ci_feature_flags &= ~CPUID_PAT;
1785 	}
1786 
1787 	/* Remove leading, trailing and duplicated spaces from cpu_brandstr */
1788 	brandstr_from = brandstr_to = cpu_brandstr;
1789 	skipspace = 1;
1790 	while (*brandstr_from != '\0') {
1791 		if (!skipspace || *brandstr_from != ' ') {
1792 			skipspace = 0;
1793 			*(brandstr_to++) = *brandstr_from;
1794 		}
1795 		if (*brandstr_from == ' ')
1796 			skipspace = 1;
1797 		brandstr_from++;
1798 	}
1799 	if (skipspace && brandstr_to > cpu_brandstr)
1800 		brandstr_to--;
1801 	*brandstr_to = '\0';
1802 
1803 	if (cpu_brandstr[0] == '\0') {
1804 		snprintf(cpu_brandstr, 48 /* sizeof(cpu_brandstr) */,
1805 		    "%s %s%s", vendorname, modifier, name);
1806 	}
1807 
1808 	if (cachesize > -1) {
1809 		snprintf(cpu_model, sizeof(cpu_model),
1810 		    "%s (%s%s%s%s-class, %dKB L2 cache)",
1811 		    cpu_brandstr,
1812 		    ((*token) ? "\"" : ""), ((*token) ? token : ""),
1813 		    ((*token) ? "\" " : ""), classnames[class], cachesize);
1814 	} else {
1815 		snprintf(cpu_model, sizeof(cpu_model),
1816 		    "%s (%s%s%s%s-class)",
1817 		    cpu_brandstr,
1818 		    ((*token) ? "\"" : ""), ((*token) ? token : ""),
1819 		    ((*token) ? "\" " : ""), classnames[class]);
1820 	}
1821 
1822 	printf("%s: %s", cpu_device, cpu_model);
1823 
1824 	if (ci->ci_feature_flags && (ci->ci_feature_flags & CPUID_TSC)) {
1825 		/* Has TSC, check if it's constant */
1826 		switch (vendor) {
1827 		case CPUVENDOR_INTEL:
1828 			if ((ci->ci_family == 0x0f && ci->ci_model >= 0x03) ||
1829 			    (ci->ci_family == 0x06 && ci->ci_model >= 0x0e)) {
1830 				ci->ci_flags |= CPUF_CONST_TSC;
1831 			}
1832 			break;
1833 		case CPUVENDOR_VIA:
1834 			if (ci->ci_model >= 0x0f) {
1835 				ci->ci_flags |= CPUF_CONST_TSC;
1836 			}
1837 			break;
1838 		}
1839 		calibrate_cyclecounter();
1840 		if (cpuspeed > 994) {
1841 			int ghz, fr;
1842 
1843 			ghz = (cpuspeed + 9) / 1000;
1844 			fr = ((cpuspeed + 9) / 10 ) % 100;
1845 			if (fr)
1846 				printf(" %d.%02d GHz", ghz, fr);
1847 			else
1848 				printf(" %d GHz", ghz);
1849 		} else {
1850 			printf(" %d MHz", cpuspeed);
1851 		}
1852 	}
1853 
1854 	if (cpuid_level != -1)
1855 		printf(", %02x-%02x-%02x", ci->ci_family, ci->ci_model,
1856 		    step);
1857 
1858 	if ((cpu_ecxfeature & CPUIDECX_HV) == 0) {
1859 		uint64_t level = 0;
1860 		uint32_t dummy;
1861 
1862 		if (strcmp(cpu_vendor, "AuthenticAMD") == 0 &&
1863 		    ci->ci_family >= 0x0f) {
1864 			level = rdmsr(MSR_PATCH_LEVEL);
1865 		} else if (strcmp(cpu_vendor, "GenuineIntel") == 0 &&
1866 		    ci->ci_family >= 6) {
1867 			wrmsr(MSR_BIOS_SIGN, 0);
1868 			CPUID(1, dummy, dummy, dummy, dummy);
1869 			level = rdmsr(MSR_BIOS_SIGN) >> 32;
1870 		}
1871 		if (level != 0)
1872 			printf(", patch %08llx", level);
1873 	}
1874 
1875 	printf("\n");
1876 
1877 	if (ci->ci_feature_flags) {
1878 		int numbits = 0;
1879 
1880 		printf("%s: ", cpu_device);
1881 		max = sizeof(i386_cpuid_features) /
1882 		    sizeof(i386_cpuid_features[0]);
1883 		for (i = 0; i < max; i++) {
1884 			if (ci->ci_feature_flags &
1885 			    i386_cpuid_features[i].feature_bit) {
1886 				printf("%s%s", (numbits == 0 ? "" : ","),
1887 				    i386_cpuid_features[i].feature_name);
1888 				numbits++;
1889 			}
1890 		}
1891 		max = sizeof(i386_cpuid_ecxfeatures)
1892 			/ sizeof(i386_cpuid_ecxfeatures[0]);
1893 		for (i = 0; i < max; i++) {
1894 			if (cpu_ecxfeature &
1895 			    i386_cpuid_ecxfeatures[i].feature_bit) {
1896 				printf("%s%s", (numbits == 0 ? "" : ","),
1897 				    i386_cpuid_ecxfeatures[i].feature_name);
1898 				numbits++;
1899 			}
1900 		}
1901 		for (i = 0; i < nitems(i386_ecpuid_features); i++) {
1902 			if (ecpu_feature &
1903 			    i386_ecpuid_features[i].feature_bit) {
1904 				printf("%s%s", (numbits == 0 ? "" : ","),
1905 				    i386_ecpuid_features[i].feature_name);
1906 				numbits++;
1907 			}
1908 		}
1909 		for (i = 0; i < nitems(i386_ecpuid_ecxfeatures); i++) {
1910 			if (ecpu_ecxfeature &
1911 			    i386_ecpuid_ecxfeatures[i].feature_bit) {
1912 				printf("%s%s", (numbits == 0 ? "" : ","),
1913 				    i386_ecpuid_ecxfeatures[i].feature_name);
1914 				numbits++;
1915 			}
1916 		}
1917 		for (i = 0; i < nitems(i386_cpuid_eaxperf); i++) {
1918 			if (cpu_perf_eax &
1919 			    i386_cpuid_eaxperf[i].feature_bit) {
1920 				printf("%s%s", (numbits == 0 ? "" : ","),
1921 				    i386_cpuid_eaxperf[i].feature_name);
1922 				numbits++;
1923 			}
1924 		}
1925 		for (i = 0; i < nitems(i386_cpuid_edxapmi); i++) {
1926 			if (cpu_apmi_edx &
1927 			    i386_cpuid_edxapmi[i].feature_bit) {
1928 				printf("%s%s", (numbits == 0 ? "" : ","),
1929 				    i386_cpuid_edxapmi[i].feature_name);
1930 				numbits++;
1931 			}
1932 		}
1933 
1934 		if (cpuid_level >= 0x07) {
1935 			u_int dummy;
1936 
1937 			/* "Structured Extended Feature Flags" */
1938 			CPUID_LEAF(0x7, 0, dummy,
1939 			    ci->ci_feature_sefflags_ebx,
1940 			    ci->ci_feature_sefflags_ecx,
1941 			    ci->ci_feature_sefflags_edx);
1942 			for (i = 0; i < nitems(cpu_seff0_ebxfeatures); i++)
1943 				if (ci->ci_feature_sefflags_ebx &
1944 				    cpu_seff0_ebxfeatures[i].feature_bit)
1945 					printf("%s%s",
1946 					    (numbits == 0 ? "" : ","),
1947 					    cpu_seff0_ebxfeatures[i].feature_name);
1948 			for (i = 0; i < nitems(cpu_seff0_ecxfeatures); i++)
1949 				if (ci->ci_feature_sefflags_ecx &
1950 				    cpu_seff0_ecxfeatures[i].feature_bit)
1951 					printf("%s%s",
1952 					    (numbits == 0 ? "" : ","),
1953 					    cpu_seff0_ecxfeatures[i].feature_name);
1954 			for (i = 0; i < nitems(cpu_seff0_edxfeatures); i++)
1955 				if (ci->ci_feature_sefflags_edx &
1956 				    cpu_seff0_edxfeatures[i].feature_bit)
1957 					printf("%s%s",
1958 					    (numbits == 0 ? "" : ","),
1959 					    cpu_seff0_edxfeatures[i].feature_name);
1960 		}
1961 
1962 		if (!strcmp(cpu_vendor, "GenuineIntel") &&
1963 		    cpuid_level >= 0x06 ) {
1964 			u_int dummy;
1965 
1966 			CPUID(0x06, ci->ci_feature_tpmflags, dummy,
1967 			    dummy, dummy);
1968 			max = nitems(cpu_tpm_eaxfeatures);
1969 			for (i = 0; i < max; i++)
1970 				if (ci->ci_feature_tpmflags &
1971 				    cpu_tpm_eaxfeatures[i].feature_bit)
1972 					printf(",%s", cpu_tpm_eaxfeatures[i].feature_name);
1973 		}
1974 
1975 		/* xsave subfeatures */
1976 		if (cpuid_level >= 0xd) {
1977 			uint32_t dummy, val;
1978 
1979 			CPUID_LEAF(0xd, 1, val, dummy, dummy, dummy);
1980 			for (i = 0; i < nitems(cpu_xsave_extfeatures); i++)
1981 				if (val & cpu_xsave_extfeatures[i].feature_bit)
1982 					printf(",%s",
1983 					    cpu_xsave_extfeatures[i].feature_name);
1984 		}
1985 
1986 		if (cpu_meltdown)
1987 			printf(",MELTDOWN");
1988 
1989 		printf("\n");
1990 	}
1991 
1992 	/*
1993 	 * "Mitigation G-2" per AMD's Whitepaper "Software Techniques
1994 	 * for Managing Speculation on AMD Processors"
1995 	 *
1996 	 * By setting MSR C001_1029[1]=1, LFENCE becomes a dispatch
1997 	 * serializing instruction.
1998 	 *
1999 	 * This MSR is available on all AMD families >= 10h, except 11h
2000  	 * where LFENCE is always serializing.
2001 	 */
2002 	if (!strcmp(cpu_vendor, "AuthenticAMD")) {
2003 		if (ci->ci_family >= 0x10 && ci->ci_family != 0x11) {
2004 			nmsr = msr = rdmsr(MSR_DE_CFG);
2005 			nmsr |= DE_CFG_SERIALIZE_LFENCE;
2006 			if (msr != nmsr)
2007 				wrmsr(MSR_DE_CFG, nmsr);
2008 		}
2009 		if (family == 0x17 && ci->ci_model >= 0x31 &&
2010 		    (cpu_ecxfeature & CPUIDECX_HV) == 0) {
2011 			nmsr = msr = rdmsr(MSR_DE_CFG);
2012 			nmsr |= DE_CFG_SERIALIZE_9;
2013 			if (msr != nmsr)
2014 				wrmsr(MSR_DE_CFG, nmsr);
2015 		}
2016 	}
2017 
2018 	/*
2019 	 * Attempt to disable Silicon Debug and lock the configuration
2020 	 * if it's enabled and unlocked.
2021 	 */
2022 	if (!strcmp(cpu_vendor, "GenuineIntel") &&
2023 	    (cpu_ecxfeature & CPUIDECX_SDBG)) {
2024 		uint64_t msr;
2025 
2026 		msr = rdmsr(IA32_DEBUG_INTERFACE);
2027 		if ((msr & IA32_DEBUG_INTERFACE_ENABLE) &&
2028 		    (msr & IA32_DEBUG_INTERFACE_LOCK) == 0) {
2029 			msr &= IA32_DEBUG_INTERFACE_MASK;
2030 			msr |= IA32_DEBUG_INTERFACE_LOCK;
2031 			wrmsr(IA32_DEBUG_INTERFACE, msr);
2032 		} else if (msr & IA32_DEBUG_INTERFACE_ENABLE)
2033 			printf("%s: cannot disable silicon debug\n",
2034 			    cpu_device);
2035 	}
2036 
2037 	if (CPU_IS_PRIMARY(ci)) {
2038 		if (cpu_ecxfeature & CPUIDECX_RDRAND)
2039 			has_rdrand = 1;
2040 		if (ci->ci_feature_sefflags_ebx & SEFF0EBX_RDSEED)
2041 			has_rdseed = 1;
2042 		if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP)
2043 			replacesmap();
2044 	}
2045 
2046 #ifndef SMALL_KERNEL
2047 	if (cpuspeed != 0 && cpu_cpuspeed == NULL)
2048 		cpu_cpuspeed = pentium_cpuspeed;
2049 #endif
2050 
2051 	cpu_class = class;
2052 
2053 	ci->cpu_class = class;
2054 
2055 	/*
2056 	 * Enable ring 0 write protection.
2057 	 */
2058 	lcr0(rcr0() | CR0_WP);
2059 
2060 	/*
2061 	 * If we have FXSAVE/FXRESTOR, use them.
2062 	 */
2063 	if (cpu_feature & CPUID_FXSR) {
2064 		i386_use_fxsave = 1;
2065 		lcr4(rcr4() | CR4_OSFXSR);
2066 
2067 		/*
2068 		 * If we have SSE/SSE2, enable XMM exceptions, and
2069 		 * notify userland.
2070 		 */
2071 		if (cpu_feature & (CPUID_SSE|CPUID_SSE2)) {
2072 			if (cpu_feature & CPUID_SSE)
2073 				i386_has_sse = 1;
2074 			if (cpu_feature & CPUID_SSE2)
2075 				i386_has_sse2 = 1;
2076 			lcr4(rcr4() | CR4_OSXMMEXCPT);
2077 		}
2078 	} else
2079 		i386_use_fxsave = 0;
2080 
2081 }
2082 
2083 char *
2084 tm86_cpu_name(int model)
2085 {
2086 	u_int32_t regs[4];
2087 	char *name = NULL;
2088 
2089 	cpuid(0x80860001, regs);
2090 
2091 	switch (model) {
2092 	case 4:
2093 		if (((regs[1] >> 16) & 0xff) >= 0x3)
2094 			name = "TMS5800";
2095 		else
2096 			name = "TMS5600";
2097 	}
2098 
2099 	return name;
2100 }
2101 
2102 #ifndef SMALL_KERNEL
2103 void
2104 cyrix3_get_bus_clock(struct cpu_info *ci)
2105 {
2106 	u_int64_t msr;
2107 	int bus;
2108 
2109 	msr = rdmsr(MSR_EBL_CR_POWERON);
2110 	bus = (msr >> 18) & 0x3;
2111 	switch (bus) {
2112 	case 0:
2113 		bus_clock = BUS100;
2114 		break;
2115 	case 1:
2116 		bus_clock = BUS133;
2117 		break;
2118 	case 2:
2119 		bus_clock = BUS200;
2120 		break;
2121 	case 3:
2122 		bus_clock = BUS166;
2123 		break;
2124 	}
2125 }
2126 
2127 void
2128 p4_get_bus_clock(struct cpu_info *ci)
2129 {
2130 	u_int64_t msr;
2131 	int model, bus;
2132 
2133 	model = (ci->ci_signature >> 4) & 15;
2134 	msr = rdmsr(MSR_EBC_FREQUENCY_ID);
2135 	if (model < 2) {
2136 		bus = (msr >> 21) & 0x7;
2137 		switch (bus) {
2138 		case 0:
2139 			bus_clock = BUS100;
2140 			break;
2141 		case 1:
2142 			bus_clock = BUS133;
2143 			break;
2144 		default:
2145 			printf("%s: unknown Pentium 4 (model %d) "
2146 			    "EBC_FREQUENCY_ID value %d\n",
2147 			    ci->ci_dev->dv_xname, model, bus);
2148 			break;
2149 		}
2150 	} else {
2151 		bus = (msr >> 16) & 0x7;
2152 		switch (bus) {
2153 		case 0:
2154 			bus_clock = (model == 2) ? BUS100 : BUS266;
2155 			break;
2156 		case 1:
2157 			bus_clock = BUS133;
2158 			break;
2159 		case 2:
2160 			bus_clock = BUS200;
2161 			break;
2162 		case 3:
2163 			bus_clock = BUS166;
2164 			break;
2165 		default:
2166 			printf("%s: unknown Pentium 4 (model %d) "
2167 			    "EBC_FREQUENCY_ID value %d\n",
2168 			    ci->ci_dev->dv_xname, model, bus);
2169 			break;
2170 		}
2171 	}
2172 }
2173 
2174 void
2175 p3_get_bus_clock(struct cpu_info *ci)
2176 {
2177 	u_int64_t msr;
2178 	int bus;
2179 
2180 	switch (ci->ci_model) {
2181 	case 0x9: /* Pentium M (130 nm, Banias) */
2182 		bus_clock = BUS100;
2183 		break;
2184 	case 0xd: /* Pentium M (90 nm, Dothan) */
2185 		msr = rdmsr(MSR_FSB_FREQ);
2186 		bus = (msr >> 0) & 0x7;
2187 		switch (bus) {
2188 		case 0:
2189 			bus_clock = BUS100;
2190 			break;
2191 		case 1:
2192 			bus_clock = BUS133;
2193 			break;
2194 		default:
2195 			printf("%s: unknown Pentium M FSB_FREQ value %d",
2196 			    ci->ci_dev->dv_xname, bus);
2197 			goto print_msr;
2198 		}
2199 		break;
2200 	case 0x15:	/* EP80579 no FSB */
2201 		break;
2202 	case 0xe: /* Core Duo/Solo */
2203 	case 0xf: /* Core Xeon */
2204 	case 0x16: /* 65nm Celeron */
2205 	case 0x17: /* Core 2 Extreme/45nm Xeon */
2206 	case 0x1d: /* Xeon MP 7400 */
2207 		msr = rdmsr(MSR_FSB_FREQ);
2208 		bus = (msr >> 0) & 0x7;
2209 		switch (bus) {
2210 		case 5:
2211 			bus_clock = BUS100;
2212 			break;
2213 		case 1:
2214 			bus_clock = BUS133;
2215 			break;
2216 		case 3:
2217 			bus_clock = BUS166;
2218 			break;
2219 		case 2:
2220 			bus_clock = BUS200;
2221 			break;
2222 		case 0:
2223 			bus_clock = BUS266;
2224 			break;
2225 		case 4:
2226 			bus_clock = BUS333;
2227 			break;
2228 		default:
2229 			printf("%s: unknown Core FSB_FREQ value %d",
2230 			    ci->ci_dev->dv_xname, bus);
2231 			goto print_msr;
2232 		}
2233 		break;
2234 	case 0x1c: /* Atom */
2235 	case 0x26: /* Atom Z6xx */
2236 	case 0x36: /* Atom [DN]2xxx */
2237 		msr = rdmsr(MSR_FSB_FREQ);
2238 		bus = (msr >> 0) & 0x7;
2239 		switch (bus) {
2240 		case 5:
2241 			bus_clock = BUS100;
2242 			break;
2243 		case 1:
2244 			bus_clock = BUS133;
2245 			break;
2246 		case 3:
2247 			bus_clock = BUS166;
2248 			break;
2249 		case 2:
2250 			bus_clock = BUS200;
2251 			break;
2252 		default:
2253 			printf("%s: unknown Atom FSB_FREQ value %d",
2254 			    ci->ci_dev->dv_xname, bus);
2255 			goto print_msr;
2256 		}
2257 		break;
2258 	case 0x1: /* Pentium Pro, model 1 */
2259 	case 0x3: /* Pentium II, model 3 */
2260 	case 0x5: /* Pentium II, II Xeon, Celeron, model 5 */
2261 	case 0x6: /* Celeron, model 6 */
2262 	case 0x7: /* Pentium III, III Xeon, model 7 */
2263 	case 0x8: /* Pentium III, III Xeon, Celeron, model 8 */
2264 	case 0xa: /* Pentium III Xeon, model A */
2265 	case 0xb: /* Pentium III, model B */
2266 		msr = rdmsr(MSR_EBL_CR_POWERON);
2267 		bus = (msr >> 18) & 0x3;
2268 		switch (bus) {
2269 		case 0:
2270 			bus_clock = BUS66;
2271 			break;
2272 		case 1:
2273 			bus_clock = BUS133;
2274 			break;
2275 		case 2:
2276 			bus_clock = BUS100;
2277 			break;
2278 		default:
2279 			printf("%s: unknown i686 EBL_CR_POWERON value %d",
2280 			    ci->ci_dev->dv_xname, bus);
2281 			goto print_msr;
2282 		}
2283 		break;
2284 	default:
2285 		/* no FSB on modern Intel processors */
2286 		break;
2287 	}
2288 	return;
2289 print_msr:
2290 	/*
2291 	 * Show the EBL_CR_POWERON MSR, so we'll at least have
2292 	 * some extra information, such as clock ratio, etc.
2293 	 */
2294 	printf(" (0x%llx)\n", rdmsr(MSR_EBL_CR_POWERON));
2295 }
2296 
2297 void
2298 p4_update_cpuspeed(void)
2299 {
2300 	struct cpu_info *ci;
2301 	u_int64_t msr;
2302 	int mult;
2303 
2304 	ci = curcpu();
2305 	p4_get_bus_clock(ci);
2306 
2307 	if (bus_clock == 0) {
2308 		printf("p4_update_cpuspeed: unknown bus clock\n");
2309 		return;
2310 	}
2311 
2312 	msr = rdmsr(MSR_EBC_FREQUENCY_ID);
2313 	mult = ((msr >> 24) & 0xff);
2314 
2315 	cpuspeed = (bus_clock * mult) / 100;
2316 }
2317 
2318 void
2319 p3_update_cpuspeed(void)
2320 {
2321 	struct cpu_info *ci;
2322 	u_int64_t msr;
2323 	int mult;
2324 	const u_int8_t mult_code[] = {
2325 	    50, 30, 40, 0, 55, 35, 45, 0, 0, 70, 80, 60, 0, 75, 0, 65 };
2326 
2327 	ci = curcpu();
2328 	p3_get_bus_clock(ci);
2329 
2330 	if (bus_clock == 0) {
2331 		printf("p3_update_cpuspeed: unknown bus clock\n");
2332 		return;
2333 	}
2334 
2335 	msr = rdmsr(MSR_EBL_CR_POWERON);
2336 	mult = (msr >> 22) & 0xf;
2337 	mult = mult_code[mult];
2338 	if (!p3_early)
2339 		mult += ((msr >> 27) & 0x1) * 40;
2340 
2341 	cpuspeed = (bus_clock * mult) / 1000;
2342 }
2343 
2344 int
2345 pentium_cpuspeed(int *freq)
2346 {
2347 	*freq = cpuspeed;
2348 	return (0);
2349 }
2350 #endif	/* !SMALL_KERNEL */
2351 
2352 /*
2353  * Send an interrupt to process.
2354  *
2355  * Stack is set up to allow sigcode stored
2356  * in u. to call routine, followed by kcall
2357  * to sigreturn routine below.  After sigreturn
2358  * resets the signal mask, the stack, and the
2359  * frame pointer, it returns to the user
2360  * specified pc, psl.
2361  */
2362 int
2363 sendsig(sig_t catcher, int sig, sigset_t mask, const siginfo_t *ksip,
2364     int info, int onstack)
2365 {
2366 	struct proc *p = curproc;
2367 	struct trapframe *tf = p->p_md.md_regs;
2368 	struct sigframe *fp, frame;
2369 	register_t sp;
2370 
2371 	/*
2372 	 * Build the argument list for the signal handler.
2373 	 */
2374 	bzero(&frame, sizeof(frame));
2375 	frame.sf_signum = sig;
2376 
2377 	/*
2378 	 * Allocate space for the signal handler context.
2379 	 */
2380 	if ((p->p_sigstk.ss_flags & SS_DISABLE) == 0 &&
2381 	    !sigonstack(tf->tf_esp) && onstack)
2382 		sp = trunc_page((vaddr_t)p->p_sigstk.ss_sp + p->p_sigstk.ss_size);
2383 	else
2384 		sp = tf->tf_esp;
2385 
2386 	frame.sf_sc.sc_fpstate = NULL;
2387 	if (p->p_md.md_flags & MDP_USEDFPU) {
2388 		npxsave_proc(p, 1);
2389 		sp -= sizeof(union savefpu);
2390 		sp &= ~0xf;	/* for XMM regs */
2391 		frame.sf_sc.sc_fpstate = (void *)sp;
2392 		if (copyout(&p->p_addr->u_pcb.pcb_savefpu,
2393 		    (void *)sp, sizeof(union savefpu)))
2394 		    	return 1;
2395 
2396 		/* Signal handlers get a completely clean FP state */
2397 		p->p_md.md_flags &= ~MDP_USEDFPU;
2398 	}
2399 
2400 	fp = (struct sigframe *)sp - 1;
2401 	frame.sf_scp = &fp->sf_sc;
2402 	frame.sf_sip = NULL;
2403 	frame.sf_handler = catcher;
2404 
2405 	/*
2406 	 * Build the signal context to be used by sigreturn.
2407 	 */
2408 	frame.sf_sc.sc_err = tf->tf_err;
2409 	frame.sf_sc.sc_trapno = tf->tf_trapno;
2410 	frame.sf_sc.sc_mask = mask;
2411 	frame.sf_sc.sc_fs = tf->tf_fs;
2412 	frame.sf_sc.sc_gs = tf->tf_gs;
2413 	frame.sf_sc.sc_es = tf->tf_es;
2414 	frame.sf_sc.sc_ds = tf->tf_ds;
2415 	frame.sf_sc.sc_eflags = tf->tf_eflags;
2416 	frame.sf_sc.sc_edi = tf->tf_edi;
2417 	frame.sf_sc.sc_esi = tf->tf_esi;
2418 	frame.sf_sc.sc_ebp = tf->tf_ebp;
2419 	frame.sf_sc.sc_ebx = tf->tf_ebx;
2420 	frame.sf_sc.sc_edx = tf->tf_edx;
2421 	frame.sf_sc.sc_ecx = tf->tf_ecx;
2422 	frame.sf_sc.sc_eax = tf->tf_eax;
2423 	frame.sf_sc.sc_eip = tf->tf_eip;
2424 	frame.sf_sc.sc_cs = tf->tf_cs;
2425 	frame.sf_sc.sc_esp = tf->tf_esp;
2426 	frame.sf_sc.sc_ss = tf->tf_ss;
2427 
2428 	if (info) {
2429 		frame.sf_sip = &fp->sf_si;
2430 		frame.sf_si = *ksip;
2431 	}
2432 
2433 	/* XXX don't copyout siginfo if not needed? */
2434 	frame.sf_sc.sc_cookie = (long)&fp->sf_sc ^ p->p_p->ps_sigcookie;
2435 	if (copyout(&frame, fp, sizeof(frame)) != 0)
2436 		return 1;
2437 
2438 	/*
2439 	 * Build context to run handler in.
2440 	 */
2441 	tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
2442 	tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
2443 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
2444 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
2445 	tf->tf_eip = p->p_p->ps_sigcode;
2446 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
2447 	tf->tf_eflags &= ~(PSL_T|PSL_D|PSL_VM|PSL_AC);
2448 	tf->tf_esp = (int)fp;
2449 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
2450 
2451 	return 0;
2452 }
2453 
2454 /*
2455  * System call to cleanup state after a signal
2456  * has been taken.  Reset signal mask and
2457  * stack state from context left by sendsig (above).
2458  * Return to previous pc and psl as specified by
2459  * context left by sendsig. Check carefully to
2460  * make sure that the user has not modified the
2461  * psl to gain improper privileges or to cause
2462  * a machine fault.
2463  */
2464 int
2465 sys_sigreturn(struct proc *p, void *v, register_t *retval)
2466 {
2467 	struct sys_sigreturn_args /* {
2468 		syscallarg(struct sigcontext *) sigcntxp;
2469 	} */ *uap = v;
2470 	struct sigcontext ksc, *scp = SCARG(uap, sigcntxp);
2471 	struct trapframe *tf = p->p_md.md_regs;
2472 	int error;
2473 
2474 	if (PROC_PC(p) != p->p_p->ps_sigcoderet) {
2475 		sigexit(p, SIGILL);
2476 		return (EPERM);
2477 	}
2478 
2479 	if ((error = copyin((caddr_t)scp, &ksc, sizeof(*scp))))
2480 		return (error);
2481 
2482 	if (ksc.sc_cookie != ((long)scp ^ p->p_p->ps_sigcookie)) {
2483 		sigexit(p, SIGILL);
2484 		return (EFAULT);
2485 	}
2486 
2487 	/* Prevent reuse of the sigcontext cookie */
2488 	ksc.sc_cookie = 0;
2489 	(void)copyout(&ksc.sc_cookie, (caddr_t)scp +
2490 	    offsetof(struct sigcontext, sc_cookie), sizeof (ksc.sc_cookie));
2491 
2492 	/*
2493 	 * Restore signal ksc.
2494 	 */
2495 	/*
2496 	 * Check for security violations.  If we're returning to
2497 	 * protected mode, the CPU will validate the segment registers
2498 	 * automatically and generate a trap on violations.  We handle
2499 	 * the trap, rather than doing all of the checking here.
2500 	 */
2501 	if (((ksc.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
2502 	    !USERMODE(ksc.sc_cs, ksc.sc_eflags))
2503 		return (EINVAL);
2504 
2505 	tf->tf_fs = ksc.sc_fs;
2506 	tf->tf_gs = ksc.sc_gs;
2507 	tf->tf_es = ksc.sc_es;
2508 	tf->tf_ds = ksc.sc_ds;
2509 	tf->tf_eflags = ksc.sc_eflags;
2510 	tf->tf_edi = ksc.sc_edi;
2511 	tf->tf_esi = ksc.sc_esi;
2512 	tf->tf_ebp = ksc.sc_ebp;
2513 	tf->tf_ebx = ksc.sc_ebx;
2514 	tf->tf_edx = ksc.sc_edx;
2515 	tf->tf_ecx = ksc.sc_ecx;
2516 	tf->tf_eax = ksc.sc_eax;
2517 	tf->tf_eip = ksc.sc_eip;
2518 	tf->tf_cs = ksc.sc_cs;
2519 	tf->tf_esp = ksc.sc_esp;
2520 	tf->tf_ss = ksc.sc_ss;
2521 
2522 	if (p->p_md.md_flags & MDP_USEDFPU)
2523 		npxsave_proc(p, 0);
2524 
2525 	if (ksc.sc_fpstate) {
2526 		union savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu;
2527 
2528 		if ((error = copyin(ksc.sc_fpstate, sfp, sizeof(*sfp))))
2529 			return (error);
2530 		if (i386_use_fxsave)
2531 			sfp->sv_xmm.sv_env.en_mxcsr &= fpu_mxcsr_mask;
2532 		p->p_md.md_flags |= MDP_USEDFPU;
2533 	}
2534 
2535 	p->p_sigmask = ksc.sc_mask & ~sigcantmask;
2536 
2537 	return (EJUSTRETURN);
2538 }
2539 
2540 #ifdef MULTIPROCESSOR
2541 /* force a CPU into the kernel, whether or not it's idle */
2542 void
2543 cpu_kick(struct cpu_info *ci)
2544 {
2545 	/* only need to kick other CPUs */
2546 	if (ci != curcpu()) {
2547 		if (cpu_mwait_size > 0) {
2548 			/*
2549 			 * If not idling, then send an IPI, else
2550 			 * just clear the "keep idling" bit.
2551 			 */
2552 			if ((ci->ci_mwait & MWAIT_IN_IDLE) == 0)
2553 				i386_send_ipi(ci, I386_IPI_NOP);
2554 			else
2555 				atomic_clearbits_int(&ci->ci_mwait,
2556 				    MWAIT_KEEP_IDLING);
2557 		} else {
2558 			/* no mwait, so need an IPI */
2559 			i386_send_ipi(ci, I386_IPI_NOP);
2560 		}
2561 	}
2562 }
2563 #endif
2564 
2565 /*
2566  * Notify the current process (p) that it has a signal pending,
2567  * process as soon as possible.
2568  */
2569 void
2570 signotify(struct proc *p)
2571 {
2572 	aston(p);
2573 	cpu_kick(p->p_cpu);
2574 }
2575 
2576 #ifdef MULTIPROCESSOR
2577 void
2578 cpu_unidle(struct cpu_info *ci)
2579 {
2580 	if (cpu_mwait_size > 0 && (ci->ci_mwait & MWAIT_ONLY)) {
2581 		/*
2582 		 * Just clear the "keep idling" bit; if it wasn't
2583 		 * idling then we didn't need to do anything anyway.
2584 		 */
2585 		atomic_clearbits_int(&ci->ci_mwait, MWAIT_KEEP_IDLING);
2586 		return;
2587 	}
2588 
2589 	if (ci != curcpu())
2590 		i386_send_ipi(ci, I386_IPI_NOP);
2591 }
2592 #endif
2593 
2594 int	waittime = -1;
2595 struct pcb dumppcb;
2596 
2597 __dead void
2598 boot(int howto)
2599 {
2600 	if ((howto & RB_POWERDOWN) != 0)
2601 		lid_action = 0;
2602 
2603 	if ((howto & RB_RESET) != 0)
2604 		goto doreset;
2605 
2606 	if (cold) {
2607 		if ((howto & RB_USERREQ) == 0)
2608 			howto |= RB_HALT;
2609 		goto haltsys;
2610 	}
2611 
2612 	boothowto = howto;
2613 	if ((howto & RB_NOSYNC) == 0 && waittime < 0) {
2614 		waittime = 0;
2615 		vfs_shutdown(curproc);
2616 
2617 		if ((howto & RB_TIMEBAD) == 0) {
2618 			resettodr();
2619 		} else {
2620 			printf("WARNING: not updating battery clock\n");
2621 		}
2622 	}
2623 	if_downall();
2624 
2625 	uvm_shutdown();
2626 	splhigh();
2627 	cold = 1;
2628 
2629 	if ((howto & RB_DUMP) != 0)
2630 		dumpsys();
2631 
2632 haltsys:
2633 	config_suspend_all(DVACT_POWERDOWN);
2634 
2635 #ifdef MULTIPROCESSOR
2636 	i386_broadcast_ipi(I386_IPI_HALT);
2637 #endif
2638 
2639 	if ((howto & RB_HALT) != 0) {
2640 #if NACPI > 0 && !defined(SMALL_KERNEL)
2641 		extern int acpi_enabled;
2642 
2643 		if (acpi_enabled) {
2644 			delay(500000);
2645 			if ((howto & RB_POWERDOWN) != 0)
2646 				acpi_powerdown();
2647 		}
2648 #endif
2649 
2650 #if NAPM > 0
2651 		if ((howto & RB_POWERDOWN) != 0) {
2652 			int rv;
2653 
2654 			printf("\nAttempting to power down...\n");
2655 			/*
2656 			 * Turn off, if we can.  But try to turn disk off and
2657 			 * wait a bit first--some disk drives are slow to
2658 			 * clean up and users have reported disk corruption.
2659 			 *
2660 			 * If apm_set_powstate() fails the first time, don't
2661 			 * try to turn the system off.
2662 			 */
2663 			delay(500000);
2664 			apm_set_powstate(APM_DEV_DISK(0xff), APM_SYS_OFF);
2665 			delay(500000);
2666 			rv = apm_set_powstate(APM_DEV_DISK(0xff), APM_SYS_OFF);
2667 			if (rv == 0 || rv == ENXIO) {
2668 				delay(500000);
2669 				(void) apm_set_powstate(APM_DEV_ALLDEVS,
2670 							APM_SYS_OFF);
2671 			}
2672 		}
2673 #endif
2674 		printf("\n");
2675 		printf("The operating system has halted.\n");
2676 		printf("Please press any key to reboot.\n\n");
2677 		cnpollc(1);	/* for proper keyboard command handling */
2678 		cngetc();
2679 		cnpollc(0);
2680 	}
2681 
2682 doreset:
2683 	printf("rebooting...\n");
2684 	cpu_reset();
2685 	for (;;)
2686 		continue;
2687 	/* NOTREACHED */
2688 }
2689 
2690 /*
2691  * This is called by configure to set dumplo and dumpsize.
2692  * Dumps always skip the first block of disk space
2693  * in case there might be a disk label stored there.
2694  * If there is extra space, put dump at the end to
2695  * reduce the chance that swapping trashes it.
2696  */
2697 void
2698 dumpconf(void)
2699 {
2700 	int nblks;	/* size of dump area */
2701 	int i;
2702 
2703 	if (dumpdev == NODEV ||
2704 	    (nblks = (bdevsw[major(dumpdev)].d_psize)(dumpdev)) == 0)
2705 		return;
2706 	if (nblks <= ctod(1))
2707 		return;
2708 
2709 	/* Always skip the first block, in case there is a label there. */
2710 	if (dumplo < ctod(1))
2711 		dumplo = ctod(1);
2712 
2713 	for (i = 0; i < ndumpmem; i++)
2714 		dumpsize = max(dumpsize, dumpmem[i].end);
2715 
2716 	/* Put dump at end of partition, and make it fit. */
2717 	if (dumpsize > dtoc(nblks - dumplo - 1))
2718 		dumpsize = dtoc(nblks - dumplo - 1);
2719 	if (dumplo < nblks - ctod(dumpsize) - 1)
2720 		dumplo = nblks - ctod(dumpsize) - 1;
2721 }
2722 
2723 /*
2724  * cpu_dump: dump machine-dependent kernel core dump headers.
2725  */
2726 int
2727 cpu_dump(void)
2728 {
2729 	int (*dump)(dev_t, daddr_t, caddr_t, size_t);
2730 	long buf[dbtob(1) / sizeof (long)];
2731 	kcore_seg_t	*segp;
2732 
2733 	dump = bdevsw[major(dumpdev)].d_dump;
2734 
2735 	segp = (kcore_seg_t *)buf;
2736 
2737 	/*
2738 	 * Generate a segment header.
2739 	 */
2740 	CORE_SETMAGIC(*segp, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
2741 	segp->c_size = dbtob(1) - ALIGN(sizeof(*segp));
2742 
2743 	return (dump(dumpdev, dumplo, (caddr_t)buf, dbtob(1)));
2744 }
2745 
2746 /*
2747  * Doadump comes here after turning off memory management and
2748  * getting on the dump stack, either when called above, or by
2749  * the auto-restart code.
2750  */
2751 static vaddr_t dumpspace;
2752 
2753 vaddr_t
2754 reserve_dumppages(vaddr_t p)
2755 {
2756 
2757 	dumpspace = p;
2758 	return (p + PAGE_SIZE);
2759 }
2760 
2761 void
2762 dumpsys(void)
2763 {
2764 	u_int i, j, npg;
2765 	int maddr;
2766 	daddr_t blkno;
2767 	int (*dump)(dev_t, daddr_t, caddr_t, size_t);
2768 	int error;
2769 	char *str;
2770 	extern int msgbufmapped;
2771 
2772 	/* Save registers. */
2773 	savectx(&dumppcb);
2774 
2775 	msgbufmapped = 0;	/* don't record dump msgs in msgbuf */
2776 	if (dumpdev == NODEV)
2777 		return;
2778 
2779 	/*
2780 	 * For dumps during autoconfiguration,
2781 	 * if dump device has already configured...
2782 	 */
2783 	if (dumpsize == 0)
2784 		dumpconf();
2785 	if (dumplo < 0)
2786 		return;
2787 	printf("\ndumping to dev %x, offset %ld\n", dumpdev, dumplo);
2788 
2789 	error = (*bdevsw[major(dumpdev)].d_psize)(dumpdev);
2790 	printf("dump ");
2791 	if (error == -1) {
2792 		printf("area unavailable\n");
2793 		return;
2794 	}
2795 
2796 #if 0	/* XXX this doesn't work.  grr. */
2797 	/* toss any characters present prior to dump */
2798 	while (sget() != NULL); /*syscons and pccons differ */
2799 #endif
2800 
2801 	/* scan through the dumpmem list */
2802 	dump = bdevsw[major(dumpdev)].d_dump;
2803 	error = cpu_dump();
2804 	for (i = 0; !error && i < ndumpmem; i++) {
2805 
2806 		npg = dumpmem[i].end - dumpmem[i].start;
2807 		maddr = ptoa(dumpmem[i].start);
2808 		blkno = dumplo + btodb(maddr) + 1;
2809 #if 0
2810 		printf("(%d %lld %d) ", maddr, (long long)blkno, npg);
2811 #endif
2812 		for (j = npg; j--; maddr += NBPG, blkno += btodb(NBPG)) {
2813 
2814 			/* Print out how many MBs we have more to go. */
2815 			if (dbtob(blkno - dumplo) % (1024 * 1024) < NBPG)
2816 				printf("%ld ",
2817 				    (ptoa(dumpsize) - maddr) / (1024 * 1024));
2818 #if 0
2819 			printf("(%x %lld) ", maddr, (long long)blkno);
2820 #endif
2821 			pmap_enter(pmap_kernel(), dumpspace, maddr,
2822 			    PROT_READ, PMAP_WIRED);
2823 			if ((error = (*dump)(dumpdev, blkno,
2824 			    (caddr_t)dumpspace, NBPG)))
2825 				break;
2826 
2827 #if 0	/* XXX this doesn't work.  grr. */
2828 			/* operator aborting dump? */
2829 			if (sget() != NULL) {
2830 				error = EINTR;
2831 				break;
2832 			}
2833 #endif
2834 		}
2835 	}
2836 
2837 	switch (error) {
2838 
2839 	case 0:		str = "succeeded\n\n";			break;
2840 	case ENXIO:	str = "device bad\n\n";			break;
2841 	case EFAULT:	str = "device not ready\n\n";		break;
2842 	case EINVAL:	str = "area improper\n\n";		break;
2843 	case EIO:	str = "i/o error\n\n";			break;
2844 	case EINTR:	str = "aborted from console\n\n";	break;
2845 	default:	str = "error %d\n\n";			break;
2846 	}
2847 	printf(str, error);
2848 
2849 	delay(5000000);		/* 5 seconds */
2850 }
2851 
2852 /*
2853  * Clear registers on exec
2854  */
2855 void
2856 setregs(struct proc *p, struct exec_package *pack, u_long stack,
2857     struct ps_strings *arginfo)
2858 {
2859 	struct pcb *pcb = &p->p_addr->u_pcb;
2860 	struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map);
2861 	struct trapframe *tf = p->p_md.md_regs;
2862 
2863 #if NNPX > 0
2864 	/* If we were using the FPU, forget about it. */
2865 	if (pcb->pcb_fpcpu != NULL)
2866 		npxsave_proc(p, 0);
2867 	p->p_md.md_flags &= ~MDP_USEDFPU;
2868 #endif
2869 
2870 	initcodesegment(&pmap->pm_codeseg);
2871 	setsegment(&pcb->pcb_threadsegs[TSEG_FS], 0,
2872 	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
2873 	setsegment(&pcb->pcb_threadsegs[TSEG_GS], 0,
2874 	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
2875 
2876 	/*
2877 	 * And update the GDT since we return to the user process
2878 	 * by leaving the syscall (we don't do another pmap_activate()).
2879 	 */
2880 	curcpu()->ci_gdt[GUCODE_SEL].sd = pmap->pm_codeseg;
2881 	curcpu()->ci_gdt[GUFS_SEL].sd = pcb->pcb_threadsegs[TSEG_FS];
2882 	curcpu()->ci_gdt[GUGS_SEL].sd = pcb->pcb_threadsegs[TSEG_GS];
2883 
2884 	/*
2885 	 * And reset the hiexec marker in the pmap.
2886 	 */
2887 	pmap->pm_hiexec = 0;
2888 
2889 	tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
2890 	tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
2891 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
2892 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
2893 	tf->tf_edi = 0;
2894 	tf->tf_esi = 0;
2895 	tf->tf_ebp = 0;
2896 	tf->tf_ebx = (int)p->p_p->ps_strings;
2897 	tf->tf_edx = 0;
2898 	tf->tf_ecx = 0;
2899 	tf->tf_eax = 0;
2900 	tf->tf_eip = pack->ep_entry;
2901 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
2902 	tf->tf_eflags = PSL_USERSET;
2903 	tf->tf_esp = stack;
2904 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
2905 }
2906 
2907 /*
2908  * Initialize segments and descriptor tables
2909  */
2910 
2911 /* IDT is now a full page, so we can map it in u-k */
2912 union {
2913 	struct gate_descriptor	idt[NIDT];
2914 	char			align[PAGE_SIZE];
2915 } _idt_region __aligned(PAGE_SIZE);
2916 #define idt_region _idt_region.idt
2917 struct gate_descriptor *idt = idt_region;
2918 
2919 extern  struct user *proc0paddr;
2920 
2921 void
2922 setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl,
2923     int seg)
2924 {
2925 
2926 	gd->gd_looffset = (int)func;
2927 	gd->gd_selector = GSEL(seg, SEL_KPL);
2928 	gd->gd_stkcpy = args;
2929 	gd->gd_xx = 0;
2930 	gd->gd_type = type;
2931 	gd->gd_dpl = dpl;
2932 	gd->gd_p = 1;
2933 	gd->gd_hioffset = (int)func >> 16;
2934 }
2935 
2936 void
2937 unsetgate(struct gate_descriptor *gd)
2938 {
2939 	gd->gd_p = 0;
2940 	gd->gd_hioffset = 0;
2941 	gd->gd_looffset = 0;
2942 	gd->gd_selector = 0;
2943 	gd->gd_xx = 0;
2944 	gd->gd_stkcpy = 0;
2945 	gd->gd_type = 0;
2946 	gd->gd_dpl = 0;
2947 }
2948 
2949 void
2950 setregion(struct region_descriptor *rd, void *base, size_t limit)
2951 {
2952 	rd->rd_limit = (int)limit;
2953 	rd->rd_base = (int)base;
2954 }
2955 
2956 void
2957 initcodesegment(struct segment_descriptor *cs)
2958 {
2959 	if (cpu_pae) {
2960 		/*
2961 		 * When code execution is managed using NX feature
2962 		 * in pmapae.c, GUCODE_SEL should cover userland.
2963 		 */
2964 		setsegment(cs, 0, atop(VM_MAXUSER_ADDRESS - 1),
2965 		    SDT_MEMERA, SEL_UPL, 1, 1);
2966 	} else {
2967 		/*
2968 		 * For pmap.c's non-PAE/NX line-in-the-sand execution, reset
2969 		 * the code segment limit to I386_MAX_EXE_ADDR in the pmap;
2970 		 * this gets copied into the GDT for GUCODE_SEL by
2971 		 * pmap_activate().  Similarly, reset the base of each of
2972 		 * the two thread data segments to zero in the pcb; they'll
2973 		 * get copied into the GDT for GUFS_SEL and GUGS_SEL.
2974 		 */
2975 		setsegment(cs, 0, atop(I386_MAX_EXE_ADDR - 1),
2976 		    SDT_MEMERA, SEL_UPL, 1, 1);
2977 	}
2978 }
2979 
2980 void
2981 setsegment(struct segment_descriptor *sd, void *base, size_t limit, int type,
2982     int dpl, int def32, int gran)
2983 {
2984 
2985 	sd->sd_lolimit = (int)limit;
2986 	sd->sd_lobase = (int)base;
2987 	sd->sd_type = type;
2988 	sd->sd_dpl = dpl;
2989 	sd->sd_p = 1;
2990 	sd->sd_hilimit = (int)limit >> 16;
2991 	sd->sd_xx = 0;
2992 	sd->sd_def32 = def32;
2993 	sd->sd_gran = gran;
2994 	sd->sd_hibase = (int)base >> 24;
2995 }
2996 
2997 #define	IDTVEC(name)	__CONCAT(X, name)
2998 extern int IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
2999     IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
3000     IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page),
3001     IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(syscall), IDTVEC(mchk),
3002     IDTVEC(simd);
3003 
3004 extern int IDTVEC(f00f_redirect);
3005 
3006 int cpu_f00f_bug = 0;
3007 
3008 void
3009 fix_f00f(void)
3010 {
3011 	struct region_descriptor region;
3012 	vaddr_t va;
3013 	paddr_t pa;
3014 	void *p;
3015 
3016 	/* Allocate two new pages */
3017 	va = (vaddr_t)km_alloc(NBPG*2, &kv_any, &kp_zero, &kd_waitok);
3018 	p = (void *)(va + NBPG - 7*sizeof(*idt));
3019 
3020 	/* Copy over old IDT */
3021 	bcopy(idt, p, sizeof(idt_region));
3022 	idt = p;
3023 
3024 	/* Fix up paging redirect */
3025 	setgate(&idt[ 14], &IDTVEC(f00f_redirect), 0, SDT_SYS386IGT, SEL_KPL,
3026 	    GCODE_SEL);
3027 
3028 	/* Map first page RO */
3029 	pmap_pte_setbits(va, 0, PG_RW);
3030 
3031 	/* add k-u read-only mappings XXX old IDT stays in place */
3032 	/* XXX hshoexer: are f00f affected CPUs affected by meltdown? */
3033 	pmap_extract(pmap_kernel(), va, &pa);
3034 	pmap_enter_special(va, pa, PROT_READ, 0);
3035 	pmap_extract(pmap_kernel(), va + PAGE_SIZE, &pa);
3036 	pmap_enter_special(va + PAGE_SIZE, pa, PROT_READ, 0);
3037 
3038 	/* Reload idtr */
3039 	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
3040 	lidt(&region);
3041 
3042 	/* Tell the rest of the world */
3043 	cpu_f00f_bug = 1;
3044 }
3045 
3046 #ifdef MULTIPROCESSOR
3047 void
3048 cpu_init_idt(void)
3049 {
3050 	struct region_descriptor region;
3051 	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
3052 	lidt(&region);
3053 }
3054 #endif /* MULTIPROCESSOR */
3055 
3056 void
3057 init386(paddr_t first_avail)
3058 {
3059 	int i, kb;
3060 	struct region_descriptor region;
3061 	bios_memmap_t *im;
3062 
3063 	proc0.p_addr = proc0paddr;
3064 	cpu_info_primary.ci_self = &cpu_info_primary;
3065 	cpu_info_primary.ci_curpcb = &proc0.p_addr->u_pcb;
3066 	cpu_info_primary.ci_tss = &cpu_info_full_primary.cif_tss;
3067 	cpu_info_primary.ci_nmi_tss = &cpu_info_full_primary.cif_nmi_tss;
3068 	cpu_info_primary.ci_gdt = (void *)&cpu_info_full_primary.cif_gdt;
3069 
3070 	/* make bootstrap gdt gates and memory segments */
3071 	setsegment(&cpu_info_primary.ci_gdt[GCODE_SEL].sd, 0, 0xfffff,
3072 	    SDT_MEMERA, SEL_KPL, 1, 1);
3073 	setsegment(&cpu_info_primary.ci_gdt[GICODE_SEL].sd, 0, 0xfffff,
3074 	    SDT_MEMERA, SEL_KPL, 1, 1);
3075 	setsegment(&cpu_info_primary.ci_gdt[GDATA_SEL].sd, 0, 0xfffff,
3076 	    SDT_MEMRWA, SEL_KPL, 1, 1);
3077 	setsegment(&cpu_info_primary.ci_gdt[GUCODE_SEL].sd, 0,
3078 	    atop(I386_MAX_EXE_ADDR) - 1, SDT_MEMERA, SEL_UPL, 1, 1);
3079 	setsegment(&cpu_info_primary.ci_gdt[GUDATA_SEL].sd, 0,
3080 	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
3081 	setsegment(&cpu_info_primary.ci_gdt[GCPU_SEL].sd, &cpu_info_primary,
3082 	    sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 0, 0);
3083 	setsegment(&cpu_info_primary.ci_gdt[GUFS_SEL].sd, 0,
3084 	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
3085 	setsegment(&cpu_info_primary.ci_gdt[GUGS_SEL].sd, 0,
3086 	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
3087 	setsegment(&cpu_info_primary.ci_gdt[GTSS_SEL].sd,
3088 	    cpu_info_primary.ci_tss, sizeof(struct i386tss)-1,
3089 	    SDT_SYS386TSS, SEL_KPL, 0, 0);
3090 	setsegment(&cpu_info_primary.ci_gdt[GNMITSS_SEL].sd,
3091 	    cpu_info_primary.ci_nmi_tss, sizeof(struct i386tss)-1,
3092 	    SDT_SYS386TSS, SEL_KPL, 0, 0);
3093 
3094 	/* exceptions */
3095 	setgate(&idt[  0], &IDTVEC(div),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3096 	setgate(&idt[  1], &IDTVEC(dbg),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3097 	setgate(&idt[  2], NULL,             0, SDT_SYSTASKGT, SEL_KPL, GNMITSS_SEL);
3098 	setgate(&idt[  3], &IDTVEC(bpt),     0, SDT_SYS386IGT, SEL_UPL, GCODE_SEL);
3099 	setgate(&idt[  4], &IDTVEC(ofl),     0, SDT_SYS386IGT, SEL_UPL, GCODE_SEL);
3100 	setgate(&idt[  5], &IDTVEC(bnd),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3101 	setgate(&idt[  6], &IDTVEC(ill),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3102 	setgate(&idt[  7], &IDTVEC(dna),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3103 	setgate(&idt[  8], &IDTVEC(dble),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3104 	setgate(&idt[  9], &IDTVEC(fpusegm), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3105 	setgate(&idt[ 10], &IDTVEC(tss),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3106 	setgate(&idt[ 11], &IDTVEC(missing), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3107 	setgate(&idt[ 12], &IDTVEC(stk),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3108 	setgate(&idt[ 13], &IDTVEC(prot),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3109 	setgate(&idt[ 14], &IDTVEC(page),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3110 	setgate(&idt[ 15], &IDTVEC(rsvd),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3111 	setgate(&idt[ 16], &IDTVEC(fpu),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3112 	setgate(&idt[ 17], &IDTVEC(align),   0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3113 	setgate(&idt[ 18], &IDTVEC(mchk),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3114 	setgate(&idt[ 19], &IDTVEC(simd),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3115 	for (i = 20; i < NRSVIDT; i++)
3116 		setgate(&idt[i], &IDTVEC(rsvd), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3117 	for (i = NRSVIDT; i < NIDT; i++)
3118 		unsetgate(&idt[i]);
3119 	setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386IGT, SEL_UPL, GCODE_SEL);
3120 
3121 	setregion(&region, cpu_info_primary.ci_gdt, GDT_SIZE - 1);
3122 	lgdt(&region);
3123 	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
3124 	lidt(&region);
3125 
3126 	/*
3127 	 * Initialize the I/O port and I/O mem extent maps.
3128 	 * Note: we don't have to check the return value since
3129 	 * creation of a fixed extent map will never fail (since
3130 	 * descriptor storage has already been allocated).
3131 	 *
3132 	 * N.B. The iomem extent manages _all_ physical addresses
3133 	 * on the machine.  When the amount of RAM is found, the two
3134 	 * extents of RAM are allocated from the map (0 -> ISA hole
3135 	 * and end of ISA hole -> end of RAM).
3136 	 */
3137 	ioport_ex = extent_create("ioport", 0x0, 0xffff, M_DEVBUF,
3138 	    (caddr_t)ioport_ex_storage, sizeof(ioport_ex_storage),
3139 	    EX_NOCOALESCE|EX_NOWAIT);
3140 	iomem_ex = extent_create("iomem", 0x0, 0xffffffff, M_DEVBUF,
3141 	    (caddr_t)iomem_ex_storage, sizeof(iomem_ex_storage),
3142 	    EX_NOCOALESCE|EX_NOWAIT);
3143 
3144 #if NISA > 0
3145 	isa_defaultirq();
3146 #endif
3147 
3148 	/*
3149 	 * Attach the glass console early in case we need to display a panic.
3150 	 */
3151 	cninit();
3152 
3153 	/*
3154 	 * Saving SSE registers won't work if the save area isn't
3155 	 * 16-byte aligned.
3156 	 */
3157 	if (offsetof(struct user, u_pcb.pcb_savefpu) & 0xf)
3158 		panic("init386: pcb_savefpu not 16-byte aligned");
3159 
3160 	/* call pmap initialization to make new kernel address space */
3161 	pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE);
3162 
3163 	/*
3164 	 * Boot arguments are in a single page specified by /boot.
3165 	 *
3166 	 * We require the "new" vector form, as well as memory ranges
3167 	 * to be given in bytes rather than KB.
3168 	 */
3169 	if ((bootapiver & (BAPIV_VECTOR | BAPIV_BMEMMAP)) ==
3170 	    (BAPIV_VECTOR | BAPIV_BMEMMAP)) {
3171 		if (bootargc > NBPG)
3172 			panic("too many boot args");
3173 
3174 		if (extent_alloc_region(iomem_ex, (paddr_t)bootargv, bootargc,
3175 		    EX_NOWAIT))
3176 			panic("cannot reserve /boot args memory");
3177 
3178 		pmap_enter(pmap_kernel(), (vaddr_t)bootargp, (paddr_t)bootargv,
3179 		    PROT_READ | PROT_WRITE,
3180 		    PROT_READ | PROT_WRITE | PMAP_WIRED);
3181 
3182 		bios_getopt();
3183 
3184 	} else
3185 		panic("/boot too old: upgrade!");
3186 
3187 #ifdef DIAGNOSTIC
3188 	if (bios_memmap == NULL)
3189 		panic("no BIOS memory map supplied");
3190 #endif
3191 
3192 	/*
3193 	 * account all the memory passed in the map from /boot
3194 	 * calculate avail_end and count the physmem.
3195 	 */
3196 	avail_end = 0;
3197 	physmem = 0;
3198 #ifdef DEBUG
3199 	printf("memmap:");
3200 #endif
3201 	for(i = 0, im = bios_memmap; im->type != BIOS_MAP_END; im++)
3202 		if (im->type == BIOS_MAP_FREE) {
3203 			paddr_t a, e;
3204 #ifdef DEBUG
3205 			printf(" %llx-%llx", im->addr, im->addr + im->size);
3206 #endif
3207 
3208 			if (im->addr >= 0x100000000ULL) {
3209 #ifdef DEBUG
3210 				printf("-H");
3211 #endif
3212 				continue;
3213 			}
3214 
3215 			a = round_page(im->addr);
3216 			if (im->addr + im->size <= 0xfffff000ULL)
3217 				e = trunc_page(im->addr + im->size);
3218 			else {
3219 #ifdef DEBUG
3220 				printf("-T");
3221 #endif
3222 				e = 0xfffff000;
3223 			}
3224 
3225 			/* skip first 16 pages due to SMI corruption */
3226 			if (a < 16 * NBPG)
3227 				a = 16 * NBPG;
3228 
3229 #ifdef MULTIPROCESSOR
3230 			/* skip MP trampoline code page */
3231 			if (a < MP_TRAMPOLINE + NBPG)
3232 				a = MP_TRAMPOLINE + NBPG;
3233 
3234 			/* skip MP trampoline data page */
3235 			if (a < MP_TRAMP_DATA + NBPG)
3236 				a = MP_TRAMP_DATA + NBPG;
3237 #endif /* MULTIPROCESSOR */
3238 
3239 #if NACPI > 0 && !defined(SMALL_KERNEL)
3240 			/* skip ACPI resume trampoline code page */
3241 			if (a < ACPI_TRAMPOLINE + NBPG)
3242 				a = ACPI_TRAMPOLINE + NBPG;
3243 
3244 			/* skip ACPI resume trampoline data page */
3245 			if (a < ACPI_TRAMP_DATA + NBPG)
3246 				a = ACPI_TRAMP_DATA + NBPG;
3247 #endif /* ACPI */
3248 
3249 #ifdef HIBERNATE
3250 			/* skip hibernate reserved pages */
3251 			if (a < HIBERNATE_HIBALLOC_PAGE + PAGE_SIZE)
3252 				a = HIBERNATE_HIBALLOC_PAGE + PAGE_SIZE;
3253 #endif /* HIBERNATE */
3254 
3255 			/* skip shorter than page regions */
3256 			if (a >= e || (e - a) < NBPG) {
3257 #ifdef DEBUG
3258 				printf("-S");
3259 #endif
3260 				continue;
3261 			}
3262 
3263 			/*
3264 			 * XXX Some buggy ACPI BIOSes use memory that
3265 			 * they declare as free. Current worst offender
3266 			 * is Supermicro 5019D-FTN4.  Typically the
3267 			 * affected memory areas are small blocks
3268 			 * between areas reserved for ACPI and other
3269 			 * BIOS goo.  So skip areas smaller than 32 MB
3270 			 * above the 16 MB boundary (to avoid
3271 			 * affecting legacy stuff).
3272 			 */
3273 			if (a > 16*1024*1024 && (e - a) < 32*1024*1024) {
3274 #ifdef DEBUG
3275 				printf("-X");
3276 #endif
3277 				continue;
3278 			}
3279 
3280 			/* skip legacy IO region */
3281 			if ((a > IOM_BEGIN && a < IOM_END) ||
3282 			    (e > IOM_BEGIN && e < IOM_END)) {
3283 #ifdef DEBUG
3284 				printf("-I");
3285 #endif
3286 				continue;
3287 			}
3288 
3289 			if (extent_alloc_region(iomem_ex, a, e - a, EX_NOWAIT))
3290 				/* XXX What should we do? */
3291 				printf("\nWARNING: CAN'T ALLOCATE RAM (%lx-%lx)"
3292 				    " FROM IOMEM EXTENT MAP!\n", a, e);
3293 
3294 			physmem += atop(e - a);
3295 			dumpmem[i].start = atop(a);
3296 			dumpmem[i].end = atop(e);
3297 			i++;
3298 			avail_end = max(avail_end, e);
3299 		}
3300 
3301 	ndumpmem = i;
3302 	avail_end -= round_page(MSGBUFSIZE);
3303 
3304 #ifdef DEBUG
3305 	printf(": %lx\n", avail_end);
3306 #endif
3307 	if (physmem < atop(4 * 1024 * 1024)) {
3308 		printf("\awarning: too little memory available;"
3309 		    "running in degraded mode\npress a key to confirm\n\n");
3310 		cnpollc(1);
3311 		cngetc();
3312 		cnpollc(0);
3313 	}
3314 
3315 #ifdef DEBUG
3316 	printf("physload: ");
3317 #endif
3318 	kb = atop(KERNTEXTOFF - KERNBASE);
3319 	if (kb > atop(IOM_END)) {
3320 		paddr_t lim = atop(IOM_END);
3321 #ifdef DEBUG
3322 		printf(" %lx-%x (<16M)", lim, kb);
3323 #endif
3324 		uvm_page_physload(lim, kb, lim, kb, 0);
3325 	}
3326 
3327 	for (i = 0; i < ndumpmem; i++) {
3328 		paddr_t a, e;
3329 
3330 		a = dumpmem[i].start;
3331 		e = dumpmem[i].end;
3332 		if (a < atop(first_avail) && e > atop(first_avail))
3333 			a = atop(first_avail);
3334 		if (e > atop(avail_end))
3335 			e = atop(avail_end);
3336 
3337 		if (a < e) {
3338 #ifdef DEBUG
3339 				printf(" %lx-%lx", a, e);
3340 #endif
3341 				uvm_page_physload(a, e, a, e, 0);
3342 		}
3343 	}
3344 #ifdef DEBUG
3345 	printf("\n");
3346 #endif
3347 
3348 	tlbflush();
3349 #if 0
3350 #if NISADMA > 0
3351 	/*
3352 	 * Some motherboards/BIOSes remap the 384K of RAM that would
3353 	 * normally be covered by the ISA hole to the end of memory
3354 	 * so that it can be used.  However, on a 16M system, this
3355 	 * would cause bounce buffers to be allocated and used.
3356 	 * This is not desirable behaviour, as more than 384K of
3357 	 * bounce buffers might be allocated.  As a work-around,
3358 	 * we round memory down to the nearest 1M boundary if
3359 	 * we're using any isadma devices and the remapped memory
3360 	 * is what puts us over 16M.
3361 	 */
3362 	if (extmem > (15*1024) && extmem < (16*1024)) {
3363 		printf("Warning: ignoring %dk of remapped memory\n",
3364 		    extmem - (15*1024));
3365 		extmem = (15*1024);
3366 	}
3367 #endif
3368 #endif
3369 
3370 #ifdef DDB
3371 	db_machine_init();
3372 	ddb_init();
3373 	if (boothowto & RB_KDB)
3374 		db_enter();
3375 #endif
3376 
3377 	softintr_init();
3378 }
3379 
3380 /*
3381  * consinit:
3382  * initialize the system console.
3383  */
3384 void
3385 consinit(void)
3386 {
3387 	/* Already done in init386(). */
3388 }
3389 
3390 void
3391 cpu_reset(void)
3392 {
3393 	struct region_descriptor region;
3394 
3395 	intr_disable();
3396 
3397 	if (cpuresetfn)
3398 		(*cpuresetfn)();
3399 
3400 	/*
3401 	 * The keyboard controller has 4 random output pins, one of which is
3402 	 * connected to the RESET pin on the CPU in many PCs.  We tell the
3403 	 * keyboard controller to pulse this line a couple of times.
3404 	 */
3405 	outb(IO_KBD + KBCMDP, KBC_PULSE0);
3406 	delay(100000);
3407 	outb(IO_KBD + KBCMDP, KBC_PULSE0);
3408 	delay(100000);
3409 
3410 	/*
3411 	 * Try to cause a triple fault and watchdog reset by setting the
3412 	 * IDT to point to nothing.
3413 	 */
3414 	bzero((caddr_t)idt, sizeof(idt_region));
3415 	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
3416 	lidt(&region);
3417 	__asm volatile("divl %0,%1" : : "q" (0), "a" (0));
3418 
3419 	/*
3420 	 * Try to cause a triple fault and watchdog reset by unmapping the
3421 	 * entire address space.
3422 	 */
3423 	bzero((caddr_t)PTD, NBPG);
3424 	tlbflush();
3425 
3426 	for (;;)
3427 		continue;
3428 	/* NOTREACHED */
3429 }
3430 
3431 void
3432 cpu_initclocks(void)
3433 {
3434 	(*initclock_func)();		/* lapic or i8254 */
3435 }
3436 
3437 void
3438 cpu_startclock(void)
3439 {
3440 	(*startclock_func)();
3441 }
3442 
3443 void
3444 need_resched(struct cpu_info *ci)
3445 {
3446 	ci->ci_want_resched = 1;
3447 
3448 	/* There's a risk we'll be called before the idle threads start */
3449 	if (ci->ci_curproc) {
3450 		aston(ci->ci_curproc);
3451 		cpu_kick(ci);
3452 	}
3453 }
3454 
3455 /* Allocate an IDT vector slot within the given range.
3456  * XXX needs locking to avoid MP allocation races.
3457  */
3458 
3459 int
3460 idt_vec_alloc(int low, int high)
3461 {
3462 	int vec;
3463 
3464 	for (vec = low; vec <= high; vec++)
3465 		if (idt[vec].gd_p == 0)
3466 			return (vec);
3467 	return (0);
3468 }
3469 
3470 void
3471 idt_vec_set(int vec, void (*function)(void))
3472 {
3473 	setgate(&idt[vec], function, 0, SDT_SYS386IGT, SEL_KPL, GICODE_SEL);
3474 }
3475 
3476 void
3477 idt_vec_free(int vec)
3478 {
3479 	unsetgate(&idt[vec]);
3480 }
3481 
3482 const struct sysctl_bounded_args cpuctl_vars[] = {
3483 	{ CPU_LIDACTION, &lid_action, 0, 2 },
3484 	{ CPU_CPUID, &cpu_id, SYSCTL_INT_READONLY },
3485 	{ CPU_OSFXSR, &i386_use_fxsave, SYSCTL_INT_READONLY },
3486 	{ CPU_SSE, &i386_has_sse, SYSCTL_INT_READONLY },
3487 	{ CPU_SSE2, &i386_has_sse2, SYSCTL_INT_READONLY },
3488 	{ CPU_XCRYPT, &i386_has_xcrypt, SYSCTL_INT_READONLY },
3489 };
3490 
3491 /*
3492  * machine dependent system variables.
3493  */
3494 int
3495 cpu_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
3496     size_t newlen, struct proc *p)
3497 {
3498 	dev_t dev;
3499 
3500 	switch (name[0]) {
3501 	case CPU_CONSDEV:
3502 		if (namelen != 1)
3503 			return (ENOTDIR);		/* overloaded */
3504 
3505 		if (cn_tab != NULL)
3506 			dev = cn_tab->cn_dev;
3507 		else
3508 			dev = NODEV;
3509 		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
3510 #if NBIOS > 0
3511 	case CPU_BIOS:
3512 		return bios_sysctl(name + 1, namelen - 1, oldp, oldlenp,
3513 		    newp, newlen, p);
3514 #endif
3515 	case CPU_BLK2CHR:
3516 		if (namelen != 2)
3517 			return (ENOTDIR);		/* overloaded */
3518 		dev = blktochr((dev_t)name[1]);
3519 		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
3520 	case CPU_CHR2BLK:
3521 		if (namelen != 2)
3522 			return (ENOTDIR);		/* overloaded */
3523 		dev = chrtoblk((dev_t)name[1]);
3524 		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
3525 	case CPU_ALLOWAPERTURE:
3526 #ifdef APERTURE
3527 		if (securelevel > 0)
3528 			return (sysctl_int_lower(oldp, oldlenp, newp, newlen,
3529 			    &allowaperture));
3530 		else
3531 			return (sysctl_int(oldp, oldlenp, newp, newlen,
3532 			    &allowaperture));
3533 #else
3534 		return (sysctl_rdint(oldp, oldlenp, newp, 0));
3535 #endif
3536 	case CPU_CPUVENDOR:
3537 		return (sysctl_rdstring(oldp, oldlenp, newp, cpu_vendor));
3538 	case CPU_CPUFEATURE:
3539 		return (sysctl_rdint(oldp, oldlenp, newp, curcpu()->ci_feature_flags));
3540 	case CPU_KBDRESET:
3541 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
3542 		    &kbd_reset));
3543 #if NPCKBC > 0 && NUKBD > 0
3544 	case CPU_FORCEUKBD:
3545 		{
3546 		int error;
3547 
3548 		if (forceukbd)
3549 			return (sysctl_rdint(oldp, oldlenp, newp, forceukbd));
3550 
3551 		error = sysctl_int(oldp, oldlenp, newp, newlen, &forceukbd);
3552 		if (forceukbd)
3553 			pckbc_release_console();
3554 		return (error);
3555 		}
3556 #endif
3557 	default:
3558 		return (sysctl_bounded_arr(cpuctl_vars, nitems(cpuctl_vars),
3559 		    name, namelen, oldp, oldlenp, newp, newlen));
3560 	}
3561 	/* NOTREACHED */
3562 }
3563 
3564 int
3565 bus_space_map(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size, int flags,
3566     bus_space_handle_t *bshp)
3567 {
3568 	int error;
3569 	struct extent *ex;
3570 
3571 	/*
3572 	 * Pick the appropriate extent map.
3573 	 */
3574 	if (t == I386_BUS_SPACE_IO) {
3575 		ex = ioport_ex;
3576 		if (flags & BUS_SPACE_MAP_LINEAR)
3577 			return (EINVAL);
3578 	} else if (t == I386_BUS_SPACE_MEM) {
3579 		ex = iomem_ex;
3580 	} else {
3581 		panic("bus_space_map: bad bus space tag");
3582 	}
3583 
3584 	/*
3585 	 * Before we go any further, let's make sure that this
3586 	 * region is available.
3587 	 */
3588 	error = extent_alloc_region(ex, bpa, size,
3589 	    EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0));
3590 	if (error)
3591 		return (error);
3592 
3593 	/*
3594 	 * For I/O space, that's all she wrote.
3595 	 */
3596 	if (t == I386_BUS_SPACE_IO) {
3597 		*bshp = bpa;
3598 		return (0);
3599 	}
3600 
3601 	if (IOM_BEGIN <= bpa && bpa <= IOM_END) {
3602 		*bshp = (bus_space_handle_t)ISA_HOLE_VADDR(bpa);
3603 		return (0);
3604 	}
3605 
3606 	/*
3607 	 * For memory space, map the bus physical address to
3608 	 * a kernel virtual address.
3609 	 */
3610 	error = bus_mem_add_mapping(bpa, size, flags, bshp);
3611 	if (error) {
3612 		if (extent_free(ex, bpa, size, EX_NOWAIT |
3613 		    (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
3614 			printf("bus_space_map: pa 0x%lx, size 0x%lx\n",
3615 			    bpa, size);
3616 			printf("bus_space_map: can't free region\n");
3617 		}
3618 	}
3619 
3620 	return (error);
3621 }
3622 
3623 int
3624 _bus_space_map(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size,
3625     int flags, bus_space_handle_t *bshp)
3626 {
3627 	/*
3628 	 * For I/O space, that's all she wrote.
3629 	 */
3630 	if (t == I386_BUS_SPACE_IO) {
3631 		*bshp = bpa;
3632 		return (0);
3633 	}
3634 
3635 	/*
3636 	 * For memory space, map the bus physical address to
3637 	 * a kernel virtual address.
3638 	 */
3639 	return (bus_mem_add_mapping(bpa, size, flags, bshp));
3640 }
3641 
3642 int
3643 bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart, bus_addr_t rend,
3644     bus_size_t size, bus_size_t alignment, bus_size_t boundary,
3645     int flags, bus_addr_t *bpap, bus_space_handle_t *bshp)
3646 {
3647 	struct extent *ex;
3648 	u_long bpa;
3649 	int error;
3650 
3651 	/*
3652 	 * Pick the appropriate extent map.
3653 	 */
3654 	if (t == I386_BUS_SPACE_IO) {
3655 		ex = ioport_ex;
3656 	} else if (t == I386_BUS_SPACE_MEM) {
3657 		ex = iomem_ex;
3658 	} else {
3659 		panic("bus_space_alloc: bad bus space tag");
3660 	}
3661 
3662 	/*
3663 	 * Sanity check the allocation against the extent's boundaries.
3664 	 */
3665 	if (rstart < ex->ex_start || rend > ex->ex_end)
3666 		panic("bus_space_alloc: bad region start/end");
3667 
3668 	/*
3669 	 * Do the requested allocation.
3670 	 */
3671 	error = extent_alloc_subregion(ex, rstart, rend, size, alignment, 0,
3672 	    boundary, EX_NOWAIT | (ioport_malloc_safe ?  EX_MALLOCOK : 0),
3673 	    &bpa);
3674 
3675 	if (error)
3676 		return (error);
3677 
3678 	/*
3679 	 * For I/O space, that's all she wrote.
3680 	 */
3681 	if (t == I386_BUS_SPACE_IO) {
3682 		*bshp = *bpap = bpa;
3683 		return (0);
3684 	}
3685 
3686 	/*
3687 	 * For memory space, map the bus physical address to
3688 	 * a kernel virtual address.
3689 	 */
3690 	error = bus_mem_add_mapping(bpa, size, flags, bshp);
3691 	if (error) {
3692 		if (extent_free(iomem_ex, bpa, size, EX_NOWAIT |
3693 		    (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
3694 			printf("bus_space_alloc: pa 0x%lx, size 0x%lx\n",
3695 			    bpa, size);
3696 			printf("bus_space_alloc: can't free region\n");
3697 		}
3698 	}
3699 
3700 	*bpap = bpa;
3701 
3702 	return (error);
3703 }
3704 
3705 int
3706 bus_mem_add_mapping(bus_addr_t bpa, bus_size_t size, int flags,
3707     bus_space_handle_t *bshp)
3708 {
3709 	paddr_t pa, endpa;
3710 	vaddr_t va;
3711 	bus_size_t map_size;
3712 	int pmap_flags = PMAP_NOCACHE;
3713 
3714 	pa = trunc_page(bpa);
3715 	endpa = round_page(bpa + size);
3716 
3717 #ifdef DIAGNOSTIC
3718 	if (endpa <= pa && endpa != 0)
3719 		panic("bus_mem_add_mapping: overflow");
3720 #endif
3721 
3722 	map_size = endpa - pa;
3723 
3724 	va = (vaddr_t)km_alloc(map_size, &kv_any, &kp_none, &kd_nowait);
3725 	if (va == 0)
3726 		return (ENOMEM);
3727 
3728 	*bshp = (bus_space_handle_t)(va + (bpa & PGOFSET));
3729 
3730 	if (flags & BUS_SPACE_MAP_CACHEABLE)
3731 		pmap_flags = 0;
3732 	else if (flags & BUS_SPACE_MAP_PREFETCHABLE)
3733 		pmap_flags = PMAP_WC;
3734 
3735 	for (; map_size > 0;
3736 	    pa += PAGE_SIZE, va += PAGE_SIZE, map_size -= PAGE_SIZE)
3737 		pmap_kenter_pa(va, pa | pmap_flags,
3738 		    PROT_READ | PROT_WRITE);
3739 	pmap_update(pmap_kernel());
3740 
3741 	return 0;
3742 }
3743 
3744 void
3745 bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size)
3746 {
3747 	struct extent *ex;
3748 	u_long va, endva;
3749 	bus_addr_t bpa;
3750 
3751 	/*
3752 	 * Find the correct extent and bus physical address.
3753 	 */
3754 	if (t == I386_BUS_SPACE_IO) {
3755 		ex = ioport_ex;
3756 		bpa = bsh;
3757 	} else if (t == I386_BUS_SPACE_MEM) {
3758 		ex = iomem_ex;
3759 		bpa = (bus_addr_t)ISA_PHYSADDR(bsh);
3760 		if (IOM_BEGIN <= bpa && bpa <= IOM_END)
3761 			goto ok;
3762 
3763 		va = trunc_page(bsh);
3764 		endva = round_page(bsh + size);
3765 
3766 #ifdef DIAGNOSTIC
3767 		if (endva <= va)
3768 			panic("bus_space_unmap: overflow");
3769 #endif
3770 
3771 		(void) pmap_extract(pmap_kernel(), va, &bpa);
3772 		bpa += (bsh & PGOFSET);
3773 
3774 		pmap_kremove(va, endva - va);
3775 		pmap_update(pmap_kernel());
3776 
3777 		/*
3778 		 * Free the kernel virtual mapping.
3779 		 */
3780 		km_free((void *)va, endva - va, &kv_any, &kp_none);
3781 	} else
3782 		panic("bus_space_unmap: bad bus space tag");
3783 
3784 ok:
3785 	if (extent_free(ex, bpa, size,
3786 	    EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
3787 		printf("bus_space_unmap: %s 0x%lx, size 0x%lx\n",
3788 		    (t == I386_BUS_SPACE_IO) ? "port" : "pa", bpa, size);
3789 		printf("bus_space_unmap: can't free region\n");
3790 	}
3791 }
3792 
3793 void
3794 _bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size,
3795     bus_addr_t *adrp)
3796 {
3797 	u_long va, endva;
3798 	bus_addr_t bpa;
3799 
3800 	/*
3801 	 * Find the correct bus physical address.
3802 	 */
3803 	if (t == I386_BUS_SPACE_IO) {
3804 		bpa = bsh;
3805 	} else if (t == I386_BUS_SPACE_MEM) {
3806 		bpa = (bus_addr_t)ISA_PHYSADDR(bsh);
3807 		if (IOM_BEGIN <= bpa && bpa <= IOM_END)
3808 			goto ok;
3809 
3810 		va = trunc_page(bsh);
3811 		endva = round_page(bsh + size);
3812 
3813 #ifdef DIAGNOSTIC
3814 		if (endva <= va)
3815 			panic("_bus_space_unmap: overflow");
3816 #endif
3817 
3818 		(void) pmap_extract(pmap_kernel(), va, &bpa);
3819 		bpa += (bsh & PGOFSET);
3820 
3821 		pmap_kremove(va, endva - va);
3822 		pmap_update(pmap_kernel());
3823 
3824 		/*
3825 		 * Free the kernel virtual mapping.
3826 		 */
3827 		km_free((void *)va, endva - va, &kv_any, &kp_none);
3828 	} else
3829 		panic("bus_space_unmap: bad bus space tag");
3830 
3831 ok:
3832 	if (adrp != NULL)
3833 		*adrp = bpa;
3834 }
3835 
3836 void
3837 bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size)
3838 {
3839 
3840 	/* bus_space_unmap() does all that we need to do. */
3841 	bus_space_unmap(t, bsh, size);
3842 }
3843 
3844 int
3845 bus_space_subregion(bus_space_tag_t t, bus_space_handle_t bsh,
3846     bus_size_t offset, bus_size_t size, bus_space_handle_t *nbshp)
3847 {
3848 	*nbshp = bsh + offset;
3849 	return (0);
3850 }
3851 
3852 paddr_t
3853 bus_space_mmap(bus_space_tag_t t, bus_addr_t addr, off_t off, int prot, int flags)
3854 {
3855 	/* Can't mmap I/O space. */
3856 	if (t == I386_BUS_SPACE_IO)
3857 		return (-1);
3858 
3859 	return (addr + off);
3860 }
3861 
3862 #ifdef DIAGNOSTIC
3863 void
3864 splassert_check(int wantipl, const char *func)
3865 {
3866 	if (lapic_tpr < wantipl)
3867 		splassert_fail(wantipl, lapic_tpr, func);
3868 	if (wantipl == IPL_NONE && curcpu()->ci_idepth != 0)
3869 		splassert_fail(-1, curcpu()->ci_idepth, func);
3870 }
3871 #endif
3872 
3873 int
3874 copyin32(const uint32_t *uaddr, uint32_t *kaddr)
3875 {
3876 	if ((vaddr_t)uaddr & 0x3)
3877 		return EFAULT;
3878 
3879 	/* copyin(9) is atomic */
3880 	return copyin(uaddr, kaddr, sizeof(uint32_t));
3881 }
3882 
3883 /*
3884  * True if the system has any non-level interrupts which are shared
3885  * on the same pin.
3886  */
3887 int	intr_shared_edge;
3888 
3889 /*
3890  * Software interrupt registration
3891  *
3892  * We hand-code this to ensure that it's atomic.
3893  */
3894 void
3895 softintr(int sir)
3896 {
3897 	struct cpu_info *ci = curcpu();
3898 
3899 	__asm volatile("orl %1, %0" :
3900 	    "=m" (ci->ci_ipending) : "ir" (1 << sir));
3901 }
3902 
3903 /*
3904  * Raise current interrupt priority level, and return the old one.
3905  */
3906 int
3907 splraise(int ncpl)
3908 {
3909 	int ocpl;
3910 
3911 	KASSERT(ncpl >= IPL_NONE);
3912 
3913 	_SPLRAISE(ocpl, ncpl);
3914 	return (ocpl);
3915 }
3916 
3917 /*
3918  * Restore an old interrupt priority level.  If any thereby unmasked
3919  * interrupts are pending, call Xspllower() to process them.
3920  */
3921 void
3922 splx(int ncpl)
3923 {
3924 	_SPLX(ncpl);
3925 }
3926 
3927 /*
3928  * Same as splx(), but we return the old value of spl, for the
3929  * benefit of some splsoftclock() callers.
3930  */
3931 int
3932 spllower(int ncpl)
3933 {
3934 	int ocpl = lapic_tpr;
3935 
3936 	splx(ncpl);
3937 	return (ocpl);
3938 }
3939 
3940 int
3941 intr_handler(struct intrframe *frame, struct intrhand *ih)
3942 {
3943 	int rc;
3944 #ifdef MULTIPROCESSOR
3945 	int need_lock;
3946 
3947 	if (ih->ih_flags & IPL_MPSAFE)
3948 		need_lock = 0;
3949 	else
3950 		need_lock = 1;
3951 
3952 	if (need_lock)
3953 		__mp_lock(&kernel_lock);
3954 #endif
3955 	rc = (*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : frame);
3956 #ifdef MULTIPROCESSOR
3957 	if (need_lock)
3958 		__mp_unlock(&kernel_lock);
3959 #endif
3960 	return rc;
3961 }
3962 
3963 void
3964 intr_barrier(void *ih)
3965 {
3966 	sched_barrier(NULL);
3967 }
3968 
3969 #ifdef SUSPEND
3970 
3971 void
3972 intr_enable_wakeup(void)
3973 {
3974 }
3975 
3976 void
3977 intr_disable_wakeup(void)
3978 {
3979 }
3980 
3981 #endif
3982 
3983 unsigned int
3984 cpu_rnd_messybits(void)
3985 {
3986 	struct timespec ts;
3987 
3988 	nanotime(&ts);
3989 	return (ts.tv_nsec ^ (ts.tv_sec << 20));
3990 }
3991 
3992 int i386_delay_quality;
3993 
3994 void
3995 delay_init(void(*fn)(int), int fn_quality)
3996 {
3997 	if (fn_quality > i386_delay_quality) {
3998 		delay_func = fn;
3999 		i386_delay_quality = fn_quality;
4000 	}
4001 }
4002 
4003 void
4004 delay_fini(void (*fn)(int))
4005 {
4006 	if (delay_func == fn) {
4007 		delay_func = i8254_delay;
4008 		i386_delay_quality = 0;
4009 	}
4010 }
4011