xref: /openbsd/sys/arch/i386/i386/machdep.c (revision 8d0e9f7b)
1 /*	$OpenBSD: machdep.c,v 1.665 2023/07/25 04:42:02 deraadt Exp $	*/
2 /*	$NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $	*/
3 
4 /*-
5  * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*-
35  * Copyright (c) 1993, 1994, 1995, 1996 Charles M. Hannum.  All rights reserved.
36  * Copyright (c) 1992 Terrence R. Lambert.
37  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
38  * All rights reserved.
39  *
40  * This code is derived from software contributed to Berkeley by
41  * William Jolitz.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. Neither the name of the University nor the names of its contributors
52  *    may be used to endorse or promote products derived from this software
53  *    without specific prior written permission.
54  *
55  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65  * SUCH DAMAGE.
66  *
67  *	@(#)machdep.c	7.4 (Berkeley) 6/3/91
68  */
69 
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/signalvar.h>
73 #include <sys/proc.h>
74 #include <sys/user.h>
75 #include <sys/exec.h>
76 #include <sys/buf.h>
77 #include <sys/reboot.h>
78 #include <sys/conf.h>
79 #include <sys/malloc.h>
80 #include <sys/msgbuf.h>
81 #include <sys/mount.h>
82 #include <sys/device.h>
83 #include <sys/extent.h>
84 #include <sys/sysctl.h>
85 #include <sys/syscallargs.h>
86 #include <sys/core.h>
87 #include <sys/kcore.h>
88 #include <sys/sensors.h>
89 
90 #include <dev/cons.h>
91 #include <stand/boot/bootarg.h>
92 
93 #include <net/if.h>
94 #include <uvm/uvm_extern.h>
95 
96 #include <machine/bus.h>
97 
98 #include <machine/cpu.h>
99 #include <machine/cpu_full.h>
100 #include <machine/cpufunc.h>
101 #include <machine/cpuvar.h>
102 #include <machine/kcore.h>
103 #include <machine/pio.h>
104 #include <machine/psl.h>
105 #include <machine/specialreg.h>
106 #include <machine/biosvar.h>
107 #include <machine/pte.h>
108 #ifdef MULTIPROCESSOR
109 #include <machine/mpbiosvar.h>
110 #endif /* MULTIPROCESSOR */
111 
112 #include <dev/isa/isareg.h>
113 #include <dev/ic/i8042reg.h>
114 #include <i386/isa/isa_machdep.h>
115 
116 #include "acpi.h"
117 #if NACPI > 0
118 #include <dev/acpi/acpivar.h>
119 #endif
120 
121 #include "apm.h"
122 #if NAPM > 0
123 #include <machine/apmvar.h>
124 #endif
125 
126 #ifdef DDB
127 #include <machine/db_machdep.h>
128 #include <ddb/db_extern.h>
129 #endif
130 
131 #include "isa.h"
132 #include "isadma.h"
133 #include "npx.h"
134 #if NNPX > 0
135 extern struct proc *npxproc;
136 #endif
137 
138 #include "bios.h"
139 
140 #ifdef HIBERNATE
141 #include <machine/hibernate_var.h>
142 #endif /* HIBERNATE */
143 
144 #include "ukbd.h"
145 #include "pckbc.h"
146 #if NPCKBC > 0 && NUKBD > 0
147 #include <dev/ic/pckbcvar.h>
148 #endif
149 
150 /* #define MACHDEP_DEBUG */
151 
152 #ifdef MACHDEP_DEBUG
153 #define DPRINTF(x...)	do  { printf(x); } while (0)
154 #else
155 #define DPRINTF(x...)
156 #endif	/* MACHDEP_DEBUG */
157 
158 void	replacesmap(void);
159 int     intr_handler(struct intrframe *, struct intrhand *);
160 
161 /* the following is used externally (sysctl_hw) */
162 char machine[] = MACHINE;
163 
164 /*
165  * switchto vectors
166  */
167 void (*cpu_idle_leave_fcn)(void) = NULL;
168 void (*cpu_idle_cycle_fcn)(void) = NULL;
169 void (*cpu_idle_enter_fcn)(void) = NULL;
170 
171 
172 struct uvm_constraint_range  isa_constraint = { 0x0, 0x00ffffffUL };
173 struct uvm_constraint_range  dma_constraint = { 0x0, 0xffffffffUL };
174 struct uvm_constraint_range *uvm_md_constraints[] = {
175 	&isa_constraint,
176 	&dma_constraint,
177 	NULL
178 };
179 
180 extern int	boothowto;
181 int	physmem;
182 
183 struct dumpmem dumpmem[VM_PHYSSEG_MAX];
184 u_int ndumpmem;
185 
186 /*
187  * These variables are needed by /sbin/savecore
188  */
189 u_long	dumpmag = 0x8fca0101;	/* magic number */
190 int	dumpsize = 0;		/* pages */
191 long	dumplo = 0;		/* blocks */
192 
193 int	cpu_class;
194 int	i386_fpu_present;
195 int	i386_fpu_exception;
196 int	i386_fpu_fdivbug;
197 
198 int	i386_use_fxsave;
199 int	i386_has_sse;
200 int	i386_has_sse2;
201 int	i386_has_xcrypt;
202 
203 bootarg_t *bootargp;
204 paddr_t avail_end;
205 
206 struct vm_map *exec_map = NULL;
207 struct vm_map *phys_map = NULL;
208 
209 #if !defined(SMALL_KERNEL)
210 int p4_model;
211 int p3_early;
212 void (*update_cpuspeed)(void) = NULL;
213 void	via_update_sensor(void *args);
214 #endif
215 int kbd_reset;
216 int lid_action = 1;
217 int pwr_action = 1;
218 int forceukbd;
219 
220 /*
221  * safepri is a safe priority for sleep to set for a spin-wait
222  * during autoconfiguration or after a panic.
223  */
224 int	safepri = 0;
225 
226 #if !defined(SMALL_KERNEL)
227 int bus_clock;
228 #endif
229 void (*setperf_setup)(struct cpu_info *);
230 int setperf_prio = 0;		/* for concurrent handlers */
231 
232 void (*cpusensors_setup)(struct cpu_info *);
233 
234 void (*delay_func)(int) = i8254_delay;
235 void (*initclock_func)(void) = i8254_initclocks;
236 
237 /*
238  * Extent maps to manage I/O and ISA memory hole space.  Allocate
239  * storage for 16 regions in each, initially.  Later, ioport_malloc_safe
240  * will indicate that it's safe to use malloc() to dynamically allocate
241  * region descriptors.
242  *
243  * N.B. At least two regions are _always_ allocated from the iomem
244  * extent map; (0 -> ISA hole) and (end of ISA hole -> end of RAM).
245  *
246  * The extent maps are not static!  Machine-dependent ISA and EISA
247  * routines need access to them for bus address space allocation.
248  */
249 static	long ioport_ex_storage[EXTENT_FIXED_STORAGE_SIZE(16) / sizeof(long)];
250 static	long iomem_ex_storage[EXTENT_FIXED_STORAGE_SIZE(16) / sizeof(long)];
251 struct	extent *ioport_ex;
252 struct	extent *iomem_ex;
253 static	int ioport_malloc_safe;
254 
255 void	dumpsys(void);
256 int	cpu_dump(void);
257 void	init386(paddr_t);
258 void	consinit(void);
259 void	(*cpuresetfn)(void);
260 
261 int	bus_mem_add_mapping(bus_addr_t, bus_size_t,
262 	    int, bus_space_handle_t *);
263 
264 #ifdef APERTURE
265 int allowaperture = 0;
266 #endif
267 
268 int has_rdrand;
269 int has_rdseed;
270 
271 void	winchip_cpu_setup(struct cpu_info *);
272 void	amd_family5_setperf_setup(struct cpu_info *);
273 void	amd_family5_setup(struct cpu_info *);
274 void	amd_family6_setperf_setup(struct cpu_info *);
275 void	amd_family6_setup(struct cpu_info *);
276 void	cyrix3_setperf_setup(struct cpu_info *);
277 void	cyrix3_cpu_setup(struct cpu_info *);
278 void	cyrix6x86_cpu_setup(struct cpu_info *);
279 void	natsem6x86_cpu_setup(struct cpu_info *);
280 void	intel586_cpu_setup(struct cpu_info *);
281 void	intel686_cpusensors_setup(struct cpu_info *);
282 void	intel686_setperf_setup(struct cpu_info *);
283 void	intel686_common_cpu_setup(struct cpu_info *);
284 void	intel686_cpu_setup(struct cpu_info *);
285 void	intel686_p4_cpu_setup(struct cpu_info *);
286 void	intelcore_update_sensor(void *);
287 void	tm86_cpu_setup(struct cpu_info *);
288 char *	intel686_cpu_name(int);
289 char *	cyrix3_cpu_name(int, int);
290 char *	tm86_cpu_name(int);
291 void	cyrix3_get_bus_clock(struct cpu_info *);
292 void	p4_get_bus_clock(struct cpu_info *);
293 void	p3_get_bus_clock(struct cpu_info *);
294 void	p4_update_cpuspeed(void);
295 void	p3_update_cpuspeed(void);
296 int	pentium_cpuspeed(int *);
297 void	enter_shared_special_pages(void);
298 
299 static __inline u_char
300 cyrix_read_reg(u_char reg)
301 {
302 	outb(0x22, reg);
303 	return inb(0x23);
304 }
305 
306 static __inline void
307 cyrix_write_reg(u_char reg, u_char data)
308 {
309 	outb(0x22, reg);
310 	outb(0x23, data);
311 }
312 
313 /*
314  * cpuid instruction.  request in eax, result in eax, ebx, ecx, edx.
315  * requires caller to provide u_int32_t regs[4] array.
316  */
317 void
318 cpuid(u_int32_t ax, u_int32_t *regs)
319 {
320 	__asm volatile(
321 	    "cpuid\n\t"
322 	    "movl	%%eax, 0(%2)\n\t"
323 	    "movl	%%ebx, 4(%2)\n\t"
324 	    "movl	%%ecx, 8(%2)\n\t"
325 	    "movl	%%edx, 12(%2)\n\t"
326 	    :"=a" (ax)
327 	    :"0" (ax), "S" (regs)
328 	    :"bx", "cx", "dx");
329 }
330 
331 /*
332  * Machine-dependent startup code
333  */
334 void
335 cpu_startup(void)
336 {
337 	unsigned i;
338 	vaddr_t minaddr, maxaddr, va;
339 	paddr_t pa;
340 
341 	/*
342 	 * Initialize error message buffer (at end of core).
343 	 * (space reserved in pmap_bootstrap)
344 	 */
345 	pa = avail_end;
346 	va = (vaddr_t)msgbufp;
347 	for (i = 0; i < atop(MSGBUFSIZE); i++) {
348 		pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
349 		va += PAGE_SIZE;
350 		pa += PAGE_SIZE;
351 	}
352 	pmap_update(pmap_kernel());
353 	initmsgbuf((caddr_t)msgbufp, round_page(MSGBUFSIZE));
354 
355 	printf("%s", version);
356 	startclocks();
357 	rtcinit();
358 
359 	printf("real mem  = %llu (%lluMB)\n",
360 	    (unsigned long long)ptoa((psize_t)physmem),
361 	    (unsigned long long)ptoa((psize_t)physmem)/1024U/1024U);
362 
363 	/*
364 	 * Allocate a submap for exec arguments.  This map effectively
365 	 * limits the number of processes exec'ing at any time.
366 	 */
367 	minaddr = vm_map_min(kernel_map);
368 	exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
369 				   16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL);
370 
371 	/*
372 	 * Allocate a submap for physio
373 	 */
374 	phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
375 				   VM_PHYS_SIZE, 0, FALSE, NULL);
376 
377 	printf("avail mem = %llu (%lluMB)\n",
378 	    (unsigned long long)ptoa((psize_t)uvmexp.free),
379 	    (unsigned long long)ptoa((psize_t)uvmexp.free)/1024U/1024U);
380 
381 	/*
382 	 * Set up buffers, so they can be used to read disk labels.
383 	 */
384 	bufinit();
385 
386 	/*
387 	 * Configure the system.
388 	 */
389 	if (boothowto & RB_CONFIG) {
390 #ifdef BOOT_CONFIG
391 		user_config();
392 #else
393 		printf("kernel does not support -c; continuing..\n");
394 #endif
395 	}
396 	ioport_malloc_safe = 1;
397 
398 #ifndef SMALL_KERNEL
399 	cpu_ucode_setup();
400 #endif
401 
402 	/* enter the IDT and trampoline code in the u-k maps */
403 	enter_shared_special_pages();
404 
405 	/* initialize CPU0's TSS and GDT and put them in the u-k maps */
406 	cpu_enter_pages(&cpu_info_full_primary);
407 }
408 
409 void
410 enter_shared_special_pages(void)
411 {
412 	extern char __kutext_start[], __kutext_end[], __kernel_kutext_phys[];
413 	extern char __kudata_start[], __kudata_end[], __kernel_kudata_phys[];
414 	vaddr_t	va;
415 	paddr_t	pa;
416 
417 	/* idt */
418 	pmap_extract(pmap_kernel(), (vaddr_t)idt, &pa);
419 	pmap_enter_special((vaddr_t)idt, pa, PROT_READ, 0);
420 
421 	/* .kutext section */
422 	va = (vaddr_t)__kutext_start;
423 	pa = (paddr_t)__kernel_kutext_phys;
424 	while (va < (vaddr_t)__kutext_end) {
425 		pmap_enter_special(va, pa, PROT_READ | PROT_EXEC, 0);
426 		DPRINTF("%s: entered kutext page va 0x%08lx pa 0x%08lx\n",
427 		    __func__, (unsigned long)va, (unsigned long)pa);
428 		va += PAGE_SIZE;
429 		pa += PAGE_SIZE;
430 	}
431 
432 	/* .kudata section */
433 	va = (vaddr_t)__kudata_start;
434 	pa = (paddr_t)__kernel_kudata_phys;
435 	while (va < (vaddr_t)__kudata_end) {
436 		pmap_enter_special(va, pa, PROT_READ | PROT_WRITE, 0);
437 		DPRINTF("%s: entered kudata page va 0x%08lx pa 0x%08lx\n",
438 		    __func__, (unsigned long)va, (unsigned long)pa);
439 		va += PAGE_SIZE;
440 		pa += PAGE_SIZE;
441 	}
442 }
443 
444 /*
445  * Set up proc0's TSS
446  */
447 void
448 i386_proc0_tss_init(void)
449 {
450 	struct pcb *pcb;
451 
452 	curpcb = pcb = &proc0.p_addr->u_pcb;
453 	pcb->pcb_cr0 = rcr0();
454 	pcb->pcb_kstack = (int)proc0.p_addr + USPACE - 16;
455 	proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_kstack - 1;
456 }
457 
458 #ifdef MULTIPROCESSOR
459 void
460 i386_init_pcb_tss(struct cpu_info *ci)
461 {
462 	struct pcb *pcb = ci->ci_idle_pcb;
463 
464 	pcb->pcb_cr0 = rcr0();
465 }
466 #endif	/* MULTIPROCESSOR */
467 
468 /*
469  * Info for CTL_HW
470  */
471 char	cpu_model[120];
472 
473 const char *classnames[] = {
474 	"",
475 	"486",
476 	"586",
477 	"686"
478 };
479 
480 const char *modifiers[] = {
481 	"",
482 	"OverDrive ",
483 	"Dual ",
484 	""
485 };
486 
487 const struct cpu_cpuid_nameclass i386_cpuid_cpus[] = {
488 	{
489 		"GenuineIntel",
490 		CPUVENDOR_INTEL,
491 		"Intel",
492 		/* Family 4 */
493 		{ {
494 			CPUCLASS_486,
495 			{
496 				"486DX", "486DX", "486SX", "486DX2", "486SL",
497 				"486SX2", 0, "486DX2 W/B",
498 				"486DX4", 0, 0, 0, 0, 0, 0, 0,
499 				"486"		/* Default */
500 			},
501 			NULL
502 		},
503 		/* Family 5 */
504 		{
505 			CPUCLASS_586,
506 			{
507 				"Pentium (A-step)", "Pentium (P5)",
508 				"Pentium (P54C)", "Pentium (P24T)",
509 				"Pentium/MMX", "Pentium", 0,
510 				"Pentium (P54C)", "Pentium/MMX",
511 				0, 0, 0, 0, 0, 0, 0,
512 				"Pentium"	/* Default */
513 			},
514 			intel586_cpu_setup
515 		},
516 		/* Family 6 */
517 		{
518 			CPUCLASS_686,
519 			{
520 				"Pentium Pro", "Pentium Pro", 0,
521 				"Pentium II", "Pentium Pro",
522 				"Pentium II/Celeron",
523 				"Celeron",
524 				"Pentium III",
525 				"Pentium III",
526 				"Pentium M",
527 				"Pentium III Xeon",
528 				"Pentium III", 0,
529 				"Pentium M",
530 				"Core Duo/Solo", 0,
531 				"Pentium Pro, II or III"	/* Default */
532 			},
533 			intel686_cpu_setup
534 		},
535 		/* Family 7 */
536 		{
537 			CPUCLASS_686,
538 		} ,
539 		/* Family 8 */
540 		{
541 			CPUCLASS_686,
542 		} ,
543 		/* Family 9 */
544 		{
545 			CPUCLASS_686,
546 		} ,
547 		/* Family A */
548 		{
549 			CPUCLASS_686,
550 		} ,
551 		/* Family B */
552 		{
553 			CPUCLASS_686,
554 		} ,
555 		/* Family C */
556 		{
557 			CPUCLASS_686,
558 		} ,
559 		/* Family D */
560 		{
561 			CPUCLASS_686,
562 		} ,
563 		/* Family E */
564 		{
565 			CPUCLASS_686,
566 		} ,
567 		/* Family F */
568 		{
569 			CPUCLASS_686,
570 			{
571 				"Pentium 4", 0, 0, 0,
572 				0, 0, 0, 0,
573 				0, 0, 0, 0,
574 				0, 0, 0, 0,
575 				"Pentium 4"	/* Default */
576 			},
577 			intel686_p4_cpu_setup
578 		} }
579 	},
580 	{
581 		"AuthenticAMD",
582 		CPUVENDOR_AMD,
583 		"AMD",
584 		/* Family 4 */
585 		{ {
586 			CPUCLASS_486,
587 			{
588 				0, 0, 0, "Am486DX2 W/T",
589 				0, 0, 0, "Am486DX2 W/B",
590 				"Am486DX4 W/T or Am5x86 W/T 150",
591 				"Am486DX4 W/B or Am5x86 W/B 150", 0, 0,
592 				0, 0, "Am5x86 W/T 133/160",
593 				"Am5x86 W/B 133/160",
594 				"Am486 or Am5x86"	/* Default */
595 			},
596 			NULL
597 		},
598 		/* Family 5 */
599 		{
600 			CPUCLASS_586,
601 			{
602 				"K5", "K5", "K5", "K5", 0, 0, "K6",
603 				"K6", "K6-2", "K6-III", 0, 0, 0,
604 				"K6-2+/III+", 0, 0,
605 				"K5 or K6"		/* Default */
606 			},
607 			amd_family5_setup
608 		},
609 		/* Family 6 */
610 		{
611 			CPUCLASS_686,
612 			{
613 				0, "Athlon Model 1", "Athlon Model 2",
614 				"Duron Model 3",
615 				"Athlon Model 4",
616 				0, "Athlon XP Model 6",
617 				"Duron Model 7",
618 				"Athlon XP Model 8",
619 				0, "Athlon XP Model 10",
620 				0, 0, 0, 0, 0,
621 				"K7"		/* Default */
622 			},
623 			amd_family6_setup
624 		},
625 		/* Family 7 */
626 		{
627 			CPUCLASS_686,
628 		} ,
629 		/* Family 8 */
630 		{
631 			CPUCLASS_686,
632 		} ,
633 		/* Family 9 */
634 		{
635 			CPUCLASS_686,
636 		} ,
637 		/* Family A */
638 		{
639 			CPUCLASS_686,
640 		} ,
641 		/* Family B */
642 		{
643 			CPUCLASS_686,
644 		} ,
645 		/* Family C */
646 		{
647 			CPUCLASS_686,
648 		} ,
649 		/* Family D */
650 		{
651 			CPUCLASS_686,
652 		} ,
653 		/* Family E */
654 		{
655 			CPUCLASS_686,
656 		} ,
657 		/* Family F */
658 		{
659 			CPUCLASS_686,
660 			{
661 				0, 0, 0, 0, "Athlon64",
662 				"Opteron or Athlon64FX", 0, 0,
663 				0, 0, 0, 0, 0, 0, 0, 0,
664 				"AMD64"			/* DEFAULT */
665 			},
666 			amd_family6_setup
667 		} }
668 	},
669 	{
670 		"CyrixInstead",
671 		CPUVENDOR_CYRIX,
672 		"Cyrix",
673 		/* Family 4 */
674 		{ {
675 			CPUCLASS_486,
676 			{
677 				0, 0, 0, "MediaGX", 0, 0, 0, 0, "5x86", 0, 0,
678 				0, 0, 0, 0,
679 				"486 class"	/* Default */
680 			},
681 			NULL
682 		},
683 		/* Family 5 */
684 		{
685 			CPUCLASS_586,
686 			{
687 				0, 0, "6x86", 0, "GXm", 0, 0, 0, 0, 0,
688 				0, 0, 0, 0, 0, 0,
689 				"586 class"	/* Default */
690 			},
691 			cyrix6x86_cpu_setup
692 		},
693 		/* Family 6 */
694 		{
695 			CPUCLASS_686,
696 			{
697 				"6x86MX", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
698 				0, 0, 0, 0,
699 				"686 class"	/* Default */
700 			},
701 			NULL
702 		} }
703 	},
704 	{
705 		"CentaurHauls",
706 		CPUVENDOR_IDT,
707 		"IDT",
708 		/* Family 4, not available from IDT */
709 		{ {
710 			CPUCLASS_486,
711 			{
712 				0, 0, 0, 0, 0, 0, 0, 0,
713 				0, 0, 0, 0, 0, 0, 0, 0,
714 				"486 class"		/* Default */
715 			},
716 			NULL
717 		},
718 		/* Family 5 */
719 		{
720 			CPUCLASS_586,
721 			{
722 				0, 0, 0, 0, "WinChip C6", 0, 0, 0,
723 				"WinChip 2", "WinChip 3", 0, 0, 0, 0, 0, 0,
724 				"WinChip"		/* Default */
725 			},
726 			winchip_cpu_setup
727 		},
728 		/* Family 6 */
729 		{
730 			CPUCLASS_686,
731 			{
732 				0, 0, 0, 0, 0, 0,
733 				"C3 Samuel",
734 				"C3 Samuel 2/Ezra",
735 				"C3 Ezra-T",
736 				"C3 Nehemiah", "C3 Esther", 0, 0, 0, 0, 0,
737 				"C3"		/* Default */
738 			},
739 			cyrix3_cpu_setup
740 		} }
741 	},
742 	{
743 		"GenuineTMx86",
744 		CPUVENDOR_TRANSMETA,
745 		"Transmeta",
746 		/* Family 4, not available from Transmeta */
747 		{ {
748 			CPUCLASS_486,
749 			{
750 				0, 0, 0, 0, 0, 0, 0, 0,
751 				0, 0, 0, 0, 0, 0, 0, 0,
752 				"486 class"		/* Default */
753 			},
754 			NULL
755 		},
756 		/* Family 5 */
757 		{
758 			CPUCLASS_586,
759 			{
760 				0, 0, 0, 0, "TMS5x00", 0, 0,
761 				0, 0, 0, 0, 0, 0, 0, 0, 0,
762 				"TMS5x00"		/* Default */
763 			},
764 			tm86_cpu_setup
765 		},
766 		/* Family 6, not yet available from Transmeta */
767 		{
768 			CPUCLASS_686,
769 			{
770 				0, 0, 0, 0, 0, 0, 0, 0,
771 				0, 0, 0, 0, 0, 0, 0, 0,
772 				"686 class"		/* Default */
773 			},
774 			NULL
775 		},
776 		/* Family 7 */
777 		{
778 			CPUCLASS_686,
779 		} ,
780 		/* Family 8 */
781 		{
782 			CPUCLASS_686,
783 		} ,
784 		/* Family 9 */
785 		{
786 			CPUCLASS_686,
787 		} ,
788 		/* Family A */
789 		{
790 			CPUCLASS_686,
791 		} ,
792 		/* Family B */
793 		{
794 			CPUCLASS_686,
795 		} ,
796 		/* Family C */
797 		{
798 			CPUCLASS_686,
799 		} ,
800 		/* Family D */
801 		{
802 			CPUCLASS_686,
803 		} ,
804 		/* Family E */
805 		{
806 			CPUCLASS_686,
807 		} ,
808 		/* Family F */
809 		{
810 			/* Extended processor family - Transmeta Efficeon */
811 			CPUCLASS_686,
812 			{
813 				0, 0, "TM8000", "TM8000",
814 				0, 0, 0, 0,
815 				0, 0, 0, 0,
816 				0, 0, 0, 0,
817 				"TM8000"	/* Default */
818 			},
819 			tm86_cpu_setup
820 		} }
821 	},
822 	{
823 		"Geode by NSC",
824 		CPUVENDOR_NS,
825 		"National Semiconductor",
826 		/* Family 4, not available from National Semiconductor */
827 		{ {
828 			CPUCLASS_486,
829 			{
830 				0, 0, 0, 0, 0, 0, 0, 0,
831 				0, 0, 0, 0, 0, 0, 0, 0,
832 				"486 class"	/* Default */
833 			},
834 			NULL
835 		},
836 		/* Family 5 */
837 		{
838 			CPUCLASS_586,
839 			{
840 				0, 0, 0, 0, "Geode GX1", 0, 0, 0, 0, 0,
841 				0, 0, 0, 0, 0, 0,
842 				"586 class"	/* Default */
843 			},
844 			natsem6x86_cpu_setup
845 		} }
846 	},
847 	{
848 		"SiS SiS SiS ",
849 		CPUVENDOR_SIS,
850 		"SiS",
851 		/* Family 4, not available from SiS */
852 		{ {
853 			CPUCLASS_486,
854 			{
855 				0, 0, 0, 0, 0, 0, 0, 0,
856 				0, 0, 0, 0, 0, 0, 0, 0,
857 				"486 class"	/* Default */
858 			},
859 			NULL
860 		},
861 		/* Family 5 */
862 		{
863 			CPUCLASS_586,
864 			{
865 				"SiS55x", 0, 0, 0, 0, 0, 0, 0, 0, 0,
866 				0, 0, 0, 0, 0, 0,
867 				"586 class"	/* Default */
868 			},
869 			NULL
870 		} }
871 	}
872 };
873 
874 const struct cpu_cpuid_feature i386_cpuid_features[] = {
875 	{ CPUID_FPU,	"FPU" },
876 	{ CPUID_VME,	"V86" },
877 	{ CPUID_DE,	"DE" },
878 	{ CPUID_PSE,	"PSE" },
879 	{ CPUID_TSC,	"TSC" },
880 	{ CPUID_MSR,	"MSR" },
881 	{ CPUID_PAE,	"PAE" },
882 	{ CPUID_MCE,	"MCE" },
883 	{ CPUID_CX8,	"CX8" },
884 	{ CPUID_APIC,	"APIC" },
885 	{ CPUID_SYS1,	"SYS" },
886 	{ CPUID_SEP,	"SEP" },
887 	{ CPUID_MTRR,	"MTRR" },
888 	{ CPUID_PGE,	"PGE" },
889 	{ CPUID_MCA,	"MCA" },
890 	{ CPUID_CMOV,	"CMOV" },
891 	{ CPUID_PAT,	"PAT" },
892 	{ CPUID_PSE36,	"PSE36" },
893 	{ CPUID_PSN,	"PSN" },
894 	{ CPUID_CFLUSH,	"CFLUSH" },
895 	{ CPUID_DS,	"DS" },
896 	{ CPUID_ACPI,	"ACPI" },
897 	{ CPUID_MMX,	"MMX" },
898 	{ CPUID_FXSR,	"FXSR" },
899 	{ CPUID_SSE,	"SSE" },
900 	{ CPUID_SSE2,	"SSE2" },
901 	{ CPUID_SS,	"SS" },
902 	{ CPUID_HTT,	"HTT" },
903 	{ CPUID_TM,	"TM" },
904 	{ CPUID_PBE,	"PBE" }
905 };
906 
907 const struct cpu_cpuid_feature i386_ecpuid_features[] = {
908 	{ CPUID_MPC,		"MPC" },
909 	{ CPUID_NXE,		"NXE" },
910 	{ CPUID_MMXX,		"MMXX" },
911 	{ CPUID_FFXSR,		"FFXSR" },
912 	{ CPUID_PAGE1GB,	"PAGE1GB" },
913 	{ CPUID_RDTSCP,		"RDTSCP" },
914 	{ CPUID_LONG,		"LONG" },
915 	{ CPUID_3DNOW2,		"3DNOW2" },
916 	{ CPUID_3DNOW,		"3DNOW" }
917 };
918 
919 const struct cpu_cpuid_feature i386_cpuid_ecxfeatures[] = {
920 	{ CPUIDECX_SSE3,	"SSE3" },
921 	{ CPUIDECX_PCLMUL,	"PCLMUL" },
922 	{ CPUIDECX_DTES64,	"DTES64" },
923 	{ CPUIDECX_MWAIT,	"MWAIT" },
924 	{ CPUIDECX_DSCPL,	"DS-CPL" },
925 	{ CPUIDECX_VMX,		"VMX" },
926 	{ CPUIDECX_SMX,		"SMX" },
927 	{ CPUIDECX_EST,		"EST" },
928 	{ CPUIDECX_TM2,		"TM2" },
929 	{ CPUIDECX_SSSE3,	"SSSE3" },
930 	{ CPUIDECX_CNXTID,	"CNXT-ID" },
931 	{ CPUIDECX_SDBG,	"SDBG" },
932 	{ CPUIDECX_FMA3,	"FMA3" },
933 	{ CPUIDECX_CX16,	"CX16" },
934 	{ CPUIDECX_XTPR,	"xTPR" },
935 	{ CPUIDECX_PDCM,	"PDCM" },
936 	{ CPUIDECX_PCID,	"PCID" },
937 	{ CPUIDECX_DCA,		"DCA" },
938 	{ CPUIDECX_SSE41,	"SSE4.1" },
939 	{ CPUIDECX_SSE42,	"SSE4.2" },
940 	{ CPUIDECX_X2APIC,	"x2APIC" },
941 	{ CPUIDECX_MOVBE,	"MOVBE" },
942 	{ CPUIDECX_POPCNT,	"POPCNT" },
943 	{ CPUIDECX_DEADLINE,	"DEADLINE" },
944 	{ CPUIDECX_AES,		"AES" },
945 	{ CPUIDECX_XSAVE,	"XSAVE" },
946 	{ CPUIDECX_OSXSAVE,	"OSXSAVE" },
947 	{ CPUIDECX_AVX,		"AVX" },
948 	{ CPUIDECX_F16C,	"F16C" },
949 	{ CPUIDECX_RDRAND,	"RDRAND" },
950 	{ CPUIDECX_HV,		"HV" },
951 };
952 
953 const struct cpu_cpuid_feature i386_ecpuid_ecxfeatures[] = {
954 	{ CPUIDECX_LAHF,	"LAHF" },
955 	{ CPUIDECX_CMPLEG,	"CMPLEG" },
956 	{ CPUIDECX_SVM,		"SVM" },
957 	{ CPUIDECX_EAPICSP,	"EAPICSP" },
958 	{ CPUIDECX_AMCR8,	"AMCR8" },
959 	{ CPUIDECX_ABM,		"ABM" },
960 	{ CPUIDECX_SSE4A,	"SSE4A" },
961 	{ CPUIDECX_MASSE,	"MASSE" },
962 	{ CPUIDECX_3DNOWP,	"3DNOWP" },
963 	{ CPUIDECX_OSVW,	"OSVW" },
964 	{ CPUIDECX_IBS,		"IBS" },
965 	{ CPUIDECX_XOP,		"XOP" },
966 	{ CPUIDECX_SKINIT,	"SKINIT" },
967 	{ CPUIDECX_WDT,		"WDT" },
968 	{ CPUIDECX_LWP,		"LWP" },
969 	{ CPUIDECX_FMA4,	"FMA4" },
970 	{ CPUIDECX_TCE,		"TCE" },
971 	{ CPUIDECX_NODEID,	"NODEID" },
972 	{ CPUIDECX_TBM,		"TBM" },
973 	{ CPUIDECX_TOPEXT,	"TOPEXT" },
974 	{ CPUIDECX_CPCTR,	"CPCTR" },
975 	{ CPUIDECX_DBKP,	"DBKP" },
976 	{ CPUIDECX_PERFTSC,	"PERFTSC" },
977 	{ CPUIDECX_PCTRL3,	"PCTRL3" },
978 	{ CPUIDECX_MWAITX,	"MWAITX" },
979 };
980 
981 const struct cpu_cpuid_feature cpu_seff0_ebxfeatures[] = {
982 	{ SEFF0EBX_FSGSBASE,	"FSGSBASE" },
983 	{ SEFF0EBX_TSC_ADJUST,	"TSC_ADJUST" },
984 	{ SEFF0EBX_SGX,		"SGX" },
985 	{ SEFF0EBX_BMI1,	"BMI1" },
986 	{ SEFF0EBX_HLE,		"HLE" },
987 	{ SEFF0EBX_AVX2,	"AVX2" },
988 	{ SEFF0EBX_SMEP,	"SMEP" },
989 	{ SEFF0EBX_BMI2,	"BMI2" },
990 	{ SEFF0EBX_ERMS,	"ERMS" },
991 	{ SEFF0EBX_INVPCID,	"INVPCID" },
992 	{ SEFF0EBX_RTM,		"RTM" },
993 	{ SEFF0EBX_PQM,		"PQM" },
994 	{ SEFF0EBX_MPX,		"MPX" },
995 	{ SEFF0EBX_AVX512F,	"AVX512F" },
996 	{ SEFF0EBX_AVX512DQ,	"AVX512DQ" },
997 	{ SEFF0EBX_RDSEED,	"RDSEED" },
998 	{ SEFF0EBX_ADX,		"ADX" },
999 	{ SEFF0EBX_SMAP,	"SMAP" },
1000 	{ SEFF0EBX_AVX512IFMA,	"AVX512IFMA" },
1001 	{ SEFF0EBX_PCOMMIT,	"PCOMMIT" },
1002 	{ SEFF0EBX_CLFLUSHOPT,	"CLFLUSHOPT" },
1003 	{ SEFF0EBX_CLWB,	"CLWB" },
1004 	{ SEFF0EBX_PT,		"PT" },
1005 	{ SEFF0EBX_AVX512PF,	"AVX512PF" },
1006 	{ SEFF0EBX_AVX512ER,	"AVX512ER" },
1007 	{ SEFF0EBX_AVX512CD,	"AVX512CD" },
1008 	{ SEFF0EBX_SHA,		"SHA" },
1009 	{ SEFF0EBX_AVX512BW,	"AVX512BW" },
1010 	{ SEFF0EBX_AVX512VL,	"AVX512VL" },
1011 };
1012 
1013 const struct cpu_cpuid_feature cpu_seff0_ecxfeatures[] = {
1014 	{ SEFF0ECX_PREFETCHWT1,	"PREFETCHWT1" },
1015 	{ SEFF0ECX_UMIP,	"UMIP" },
1016 	{ SEFF0ECX_AVX512VBMI,	"AVX512VBMI" },
1017 	{ SEFF0ECX_PKU,		"PKU" },
1018 	{ SEFF0ECX_WAITPKG,	"WAITPKG" },
1019 };
1020 
1021 const struct cpu_cpuid_feature cpu_seff0_edxfeatures[] = {
1022 	{ SEFF0EDX_AVX512_4FNNIW, "AVX512FNNIW" },
1023 	{ SEFF0EDX_AVX512_4FMAPS, "AVX512FMAPS" },
1024 	{ SEFF0EDX_SRBDS_CTRL,	"SRBDS_CTRL" },
1025 	{ SEFF0EDX_MD_CLEAR,	"MD_CLEAR" },
1026 	{ SEFF0EDX_TSXFA,	"TSXFA" },
1027 	{ SEFF0EDX_IBRS,	"IBRS,IBPB" },
1028 	{ SEFF0EDX_STIBP,	"STIBP" },
1029 	{ SEFF0EDX_L1DF,	"L1DF" },
1030 	 /* SEFF0EDX_ARCH_CAP (not printed) */
1031 	{ SEFF0EDX_SSBD,	"SSBD" },
1032 };
1033 
1034 const struct cpu_cpuid_feature cpu_tpm_eaxfeatures[] = {
1035 	{ TPM_SENSOR,		"SENSOR" },
1036 	{ TPM_ARAT,		"ARAT" },
1037 };
1038 
1039 const struct cpu_cpuid_feature i386_cpuid_eaxperf[] = {
1040 	{ CPUIDEAX_VERID,	"PERF" },
1041 };
1042 
1043 const struct cpu_cpuid_feature i386_cpuid_edxapmi[] = {
1044 	{ CPUIDEDX_ITSC,	"ITSC" },
1045 };
1046 
1047 const struct cpu_cpuid_feature cpu_xsave_extfeatures[] = {
1048 	{ XSAVE_XSAVEOPT,	"XSAVEOPT" },
1049 	{ XSAVE_XSAVEC,		"XSAVEC" },
1050 	{ XSAVE_XGETBV1,	"XGETBV1" },
1051 	{ XSAVE_XSAVES,		"XSAVES" },
1052 };
1053 
1054 void
1055 winchip_cpu_setup(struct cpu_info *ci)
1056 {
1057 
1058 	switch ((ci->ci_signature >> 4) & 15) { /* model */
1059 	case 4: /* WinChip C6 */
1060 		ci->ci_feature_flags &= ~CPUID_TSC;
1061 		/* Disable RDTSC instruction from user-level. */
1062 		lcr4(rcr4() | CR4_TSD);
1063 		printf("%s: TSC disabled\n", ci->ci_dev->dv_xname);
1064 		break;
1065 	}
1066 }
1067 
1068 #if !defined(SMALL_KERNEL)
1069 void
1070 cyrix3_setperf_setup(struct cpu_info *ci)
1071 {
1072 	if (cpu_ecxfeature & CPUIDECX_EST) {
1073 		if (rdmsr(MSR_MISC_ENABLE) & (1 << 16))
1074 			est_init(ci, CPUVENDOR_VIA);
1075 		else
1076 			printf("%s: Enhanced SpeedStep disabled by BIOS\n",
1077 			    ci->ci_dev->dv_xname);
1078 	}
1079 }
1080 #endif
1081 
1082 void
1083 cyrix3_cpu_setup(struct cpu_info *ci)
1084 {
1085 	int model = (ci->ci_signature >> 4) & 15;
1086 	int step = ci->ci_signature & 15;
1087 
1088 	u_int64_t msreg;
1089 	u_int32_t regs[4];
1090 	unsigned int val;
1091 #if !defined(SMALL_KERNEL)
1092 	extern void (*pagezero)(void *, size_t);
1093 	extern void i686_pagezero(void *, size_t);
1094 
1095 	pagezero = i686_pagezero;
1096 
1097 	setperf_setup = cyrix3_setperf_setup;
1098 #endif
1099 
1100 	switch (model) {
1101 	/* Possible earlier models */
1102 	case 0: case 1: case 2:
1103 	case 3: case 4: case 5:
1104 		break;
1105 
1106 	case 6: /* C3 Samuel 1 */
1107 	case 7: /* C3 Samuel 2 or C3 Ezra */
1108 	case 8: /* C3 Ezra-T */
1109 		cpuid(0x80000001, regs);
1110 		val = regs[3];
1111 		if (val & (1U << 31)) {
1112 			cpu_feature |= CPUID_3DNOW;
1113 		} else {
1114 			cpu_feature &= ~CPUID_3DNOW;
1115 		}
1116 		break;
1117 
1118 	case 9:
1119 		if (step < 3)
1120 			break;
1121 		/*
1122 		 * C3 Nehemiah & later: fall through.
1123 		 */
1124 
1125 	case 10: /* C7-M Type A */
1126 	case 13: /* C7-M Type D */
1127 	case 15: /* Nano */
1128 #if !defined(SMALL_KERNEL)
1129 		if (CPU_IS_PRIMARY(ci) &&
1130 		    (model == 10 || model == 13 || model == 15)) {
1131 			/* Setup the sensors structures */
1132 			strlcpy(ci->ci_sensordev.xname, ci->ci_dev->dv_xname,
1133 			    sizeof(ci->ci_sensordev.xname));
1134 			ci->ci_sensor.type = SENSOR_TEMP;
1135 			sensor_task_register(ci, via_update_sensor, 5);
1136 			sensor_attach(&ci->ci_sensordev, &ci->ci_sensor);
1137 			sensordev_install(&ci->ci_sensordev);
1138 		}
1139 #endif
1140 
1141 	default:
1142 		/*
1143 		 * C3 Nehemiah/Esther & later models:
1144 		 * First we check for extended feature flags, and then
1145 		 * (if present) retrieve the ones at 0xC0000001.  In this
1146 		 * bit 2 tells us if the RNG is present.  Bit 3 tells us
1147 		 * if the RNG has been enabled.  In order to use the RNG
1148 		 * we need 3 things:  We need an RNG, we need the FXSR bit
1149 		 * enabled in cr4 (SSE/SSE2 stuff), and we need to have
1150 		 * Bit 6 of MSR 0x110B set to 1 (the default), which will
1151 		 * show up as bit 3 set here.
1152 		 */
1153 		cpuid(0xC0000000, regs); /* Check for RNG */
1154 		val = regs[0];
1155 		if (val >= 0xC0000001) {
1156 			cpuid(0xC0000001, regs);
1157 			val = regs[3];
1158 		} else
1159 			val = 0;
1160 
1161 		if (val & (C3_CPUID_HAS_RNG | C3_CPUID_HAS_ACE))
1162 			printf("%s:", ci->ci_dev->dv_xname);
1163 
1164 		/* Enable RNG if present and disabled */
1165 		if (val & C3_CPUID_HAS_RNG) {
1166 			extern int viac3_rnd_present;
1167 
1168 			if (!(val & C3_CPUID_DO_RNG)) {
1169 				msreg = rdmsr(0x110B);
1170 				msreg |= 0x40;
1171 				wrmsr(0x110B, msreg);
1172 			}
1173 			viac3_rnd_present = 1;
1174 			printf(" RNG");
1175 		}
1176 
1177 		/* Enable AES engine if present and disabled */
1178 		if (val & C3_CPUID_HAS_ACE) {
1179 #ifdef CRYPTO
1180 			if (!(val & C3_CPUID_DO_ACE)) {
1181 				msreg = rdmsr(0x1107);
1182 				msreg |= (0x01 << 28);
1183 				wrmsr(0x1107, msreg);
1184 			}
1185 			i386_has_xcrypt |= C3_HAS_AES;
1186 #endif /* CRYPTO */
1187 			printf(" AES");
1188 		}
1189 
1190 		/* Enable ACE2 engine if present and disabled */
1191 		if (val & C3_CPUID_HAS_ACE2) {
1192 #ifdef CRYPTO
1193 			if (!(val & C3_CPUID_DO_ACE2)) {
1194 				msreg = rdmsr(0x1107);
1195 				msreg |= (0x01 << 28);
1196 				wrmsr(0x1107, msreg);
1197 			}
1198 			i386_has_xcrypt |= C3_HAS_AESCTR;
1199 #endif /* CRYPTO */
1200 			printf(" AES-CTR");
1201 		}
1202 
1203 		/* Enable SHA engine if present and disabled */
1204 		if (val & C3_CPUID_HAS_PHE) {
1205 #ifdef CRYPTO
1206 			if (!(val & C3_CPUID_DO_PHE)) {
1207 				msreg = rdmsr(0x1107);
1208 				msreg |= (0x01 << 28/**/);
1209 				wrmsr(0x1107, msreg);
1210 			}
1211 			i386_has_xcrypt |= C3_HAS_SHA;
1212 #endif /* CRYPTO */
1213 			printf(" SHA1 SHA256");
1214 		}
1215 
1216 		/* Enable MM engine if present and disabled */
1217 		if (val & C3_CPUID_HAS_PMM) {
1218 #ifdef CRYPTO
1219 			if (!(val & C3_CPUID_DO_PMM)) {
1220 				msreg = rdmsr(0x1107);
1221 				msreg |= (0x01 << 28/**/);
1222 				wrmsr(0x1107, msreg);
1223 			}
1224 			i386_has_xcrypt |= C3_HAS_MM;
1225 #endif /* CRYPTO */
1226 			printf(" RSA");
1227 		}
1228 
1229 		printf("\n");
1230 		break;
1231 	}
1232 }
1233 
1234 #if !defined(SMALL_KERNEL)
1235 void
1236 via_update_sensor(void *args)
1237 {
1238 	struct cpu_info *ci = (struct cpu_info *) args;
1239 	u_int64_t msr;
1240 
1241 	switch (ci->ci_model) {
1242 	case 0xa:
1243 	case 0xd:
1244 		msr = rdmsr(MSR_C7M_TMTEMPERATURE);
1245 		break;
1246 	case 0xf:
1247 		msr = rdmsr(MSR_CENT_TMTEMPERATURE);
1248 		break;
1249 	}
1250 	ci->ci_sensor.value = (msr & 0xffffff);
1251 	/* micro degrees */
1252 	ci->ci_sensor.value *= 1000000;
1253 	ci->ci_sensor.value += 273150000;
1254 	ci->ci_sensor.flags &= ~SENSOR_FINVALID;
1255 }
1256 #endif
1257 
1258 void
1259 cyrix6x86_cpu_setup(struct cpu_info *ci)
1260 {
1261 	extern int clock_broken_latch;
1262 
1263 	switch ((ci->ci_signature >> 4) & 15) { /* model */
1264 	case -1: /* M1 w/o cpuid */
1265 	case 2:	/* M1 */
1266 		/* set up various cyrix registers */
1267 		/* Enable suspend on halt */
1268 		cyrix_write_reg(0xc2, cyrix_read_reg(0xc2) | 0x08);
1269 		/* enable access to ccr4/ccr5 */
1270 		cyrix_write_reg(0xC3, cyrix_read_reg(0xC3) | 0x10);
1271 		/* cyrix's workaround  for the "coma bug" */
1272 		cyrix_write_reg(0x31, cyrix_read_reg(0x31) | 0xf8);
1273 		cyrix_write_reg(0x32, cyrix_read_reg(0x32) | 0x7f);
1274 		cyrix_read_reg(0x33); cyrix_write_reg(0x33, 0);
1275 		cyrix_write_reg(0x3c, cyrix_read_reg(0x3c) | 0x87);
1276 		/* disable access to ccr4/ccr5 */
1277 		cyrix_write_reg(0xC3, cyrix_read_reg(0xC3) & ~0x10);
1278 
1279 		printf("%s: xchg bug workaround performed\n",
1280 		    ci->ci_dev->dv_xname);
1281 		break;	/* fallthrough? */
1282 	case 4:	/* GXm */
1283 		/* Unset the TSC bit until calibrate_delay() gets fixed. */
1284 		clock_broken_latch = 1;
1285 		curcpu()->ci_feature_flags &= ~CPUID_TSC;
1286 		printf("%s: TSC disabled\n", ci->ci_dev->dv_xname);
1287 		break;
1288 	}
1289 }
1290 
1291 void
1292 natsem6x86_cpu_setup(struct cpu_info *ci)
1293 {
1294 	extern int clock_broken_latch;
1295 	int model = (ci->ci_signature >> 4) & 15;
1296 
1297 	clock_broken_latch = 1;
1298 	switch (model) {
1299 	case 4:
1300 		cpu_feature &= ~CPUID_TSC;
1301 		printf("%s: TSC disabled\n", ci->ci_dev->dv_xname);
1302 		break;
1303 	}
1304 }
1305 
1306 void
1307 intel586_cpu_setup(struct cpu_info *ci)
1308 {
1309 	if (!cpu_f00f_bug) {
1310 		fix_f00f();
1311 		printf("%s: F00F bug workaround installed\n",
1312 		    ci->ci_dev->dv_xname);
1313 	}
1314 }
1315 
1316 #if !defined(SMALL_KERNEL)
1317 void
1318 amd_family5_setperf_setup(struct cpu_info *ci)
1319 {
1320 	k6_powernow_init();
1321 }
1322 #endif
1323 
1324 void
1325 amd_family5_setup(struct cpu_info *ci)
1326 {
1327 	int model = (ci->ci_signature >> 4) & 15;
1328 
1329 	switch (model) {
1330 	case 0:		/* AMD-K5 Model 0 */
1331 		/*
1332 		 * According to the AMD Processor Recognition App Note,
1333 		 * the AMD-K5 Model 0 uses the wrong bit to indicate
1334 		 * support for global PTEs, instead using bit 9 (APIC)
1335 		 * rather than bit 13 (i.e. "0x200" vs. 0x2000".  Oops!).
1336 		 */
1337 		if (cpu_feature & CPUID_APIC)
1338 			cpu_feature = (cpu_feature & ~CPUID_APIC) | CPUID_PGE;
1339 		/*
1340 		 * XXX But pmap_pg_g is already initialized -- need to kick
1341 		 * XXX the pmap somehow.  How does the MP branch do this?
1342 		 */
1343 		break;
1344 	case 12:
1345 	case 13:
1346 #if !defined(SMALL_KERNEL)
1347 		setperf_setup = amd_family5_setperf_setup;
1348 #endif
1349 		break;
1350 	}
1351 }
1352 
1353 #if !defined(SMALL_KERNEL)
1354 void
1355 amd_family6_setperf_setup(struct cpu_info *ci)
1356 {
1357 	int family = (ci->ci_signature >> 8) & 15;
1358 
1359 	switch (family) {
1360 	case 6:
1361 		k7_powernow_init();
1362 		break;
1363 	case 15:
1364 		k8_powernow_init();
1365 		break;
1366 	}
1367 	if (ci->ci_family >= 0x10)
1368 		k1x_init(ci);
1369 }
1370 #endif
1371 
1372 void
1373 amd_family6_setup(struct cpu_info *ci)
1374 {
1375 #if !defined(SMALL_KERNEL)
1376 	int family = (ci->ci_signature >> 8) & 15;
1377 	extern void (*pagezero)(void *, size_t);
1378 	extern void sse2_pagezero(void *, size_t);
1379 	extern void i686_pagezero(void *, size_t);
1380 
1381 	if (cpu_feature & CPUID_SSE2)
1382 		pagezero = sse2_pagezero;
1383 	else
1384 		pagezero = i686_pagezero;
1385 
1386 	setperf_setup = amd_family6_setperf_setup;
1387 
1388 	if (family == 0xf) {
1389 		amd64_errata(ci);
1390 	}
1391 #endif
1392 }
1393 
1394 #if !defined(SMALL_KERNEL)
1395 /*
1396  * Temperature read on the CPU is relative to the maximum
1397  * temperature supported by the CPU, Tj(Max).
1398  * Refer to:
1399  * 64-ia-32-architectures-software-developer-vol-3c-part-3-manual.pdf
1400  * Section 35 and
1401  * http://www.intel.com/content/dam/www/public/us/en/documents/
1402  * white-papers/cpu-monitoring-dts-peci-paper.pdf
1403  *
1404  * The temperature on Intel CPUs can be between 70 and 105 degC, since
1405  * Westmere we can read the TJmax from the die. For older CPUs we have
1406  * to guess or use undocumented MSRs. Then we subtract the temperature
1407  * portion of thermal status from max to get current temperature.
1408  */
1409 void
1410 intelcore_update_sensor(void *args)
1411 {
1412 	struct cpu_info *ci = (struct cpu_info *) args;
1413 	u_int64_t msr;
1414 	int max = 100;
1415 
1416 	/* Only some Core family chips have MSR_TEMPERATURE_TARGET. */
1417 	if (ci->ci_model == 0x0e &&
1418 	    (rdmsr(MSR_TEMPERATURE_TARGET_UNDOCUMENTED) &
1419 	     MSR_TEMPERATURE_TARGET_LOW_BIT_UNDOCUMENTED))
1420 		max = 85;
1421 
1422 	/*
1423 	 * Newer CPUs can tell you what their max temperature is.
1424 	 * See: '64-ia-32-architectures-software-developer-
1425 	 * vol-3c-part-3-manual.pdf'
1426 	 */
1427 	if (ci->ci_model > 0x17 && ci->ci_model != 0x1c &&
1428 	    ci->ci_model != 0x26 && ci->ci_model != 0x27 &&
1429 	    ci->ci_model != 0x35 && ci->ci_model != 0x36)
1430 		max = MSR_TEMPERATURE_TARGET_TJMAX(
1431 		    rdmsr(MSR_TEMPERATURE_TARGET));
1432 
1433 	msr = rdmsr(MSR_THERM_STATUS);
1434 	if (msr & MSR_THERM_STATUS_VALID_BIT) {
1435 		ci->ci_sensor.value = max - MSR_THERM_STATUS_TEMP(msr);
1436 		/* micro degrees */
1437 		ci->ci_sensor.value *= 1000000;
1438 		/* kelvin */
1439 		ci->ci_sensor.value += 273150000;
1440 		ci->ci_sensor.flags &= ~SENSOR_FINVALID;
1441 	} else {
1442 		ci->ci_sensor.value = 0;
1443 		ci->ci_sensor.flags |= SENSOR_FINVALID;
1444 	}
1445 }
1446 
1447 void
1448 intel686_cpusensors_setup(struct cpu_info *ci)
1449 {
1450 	if (!CPU_IS_PRIMARY(ci) || (ci->ci_feature_tpmflags & TPM_SENSOR) == 0)
1451 		return;
1452 
1453 	/* Setup the sensors structures */
1454 	strlcpy(ci->ci_sensordev.xname, ci->ci_dev->dv_xname,
1455 	    sizeof(ci->ci_sensordev.xname));
1456 	ci->ci_sensor.type = SENSOR_TEMP;
1457 	sensor_task_register(ci, intelcore_update_sensor, 5);
1458 	sensor_attach(&ci->ci_sensordev, &ci->ci_sensor);
1459 	sensordev_install(&ci->ci_sensordev);
1460 }
1461 #endif
1462 
1463 #if !defined(SMALL_KERNEL)
1464 void
1465 intel686_setperf_setup(struct cpu_info *ci)
1466 {
1467 	int family = (ci->ci_signature >> 8) & 15;
1468 	int step = ci->ci_signature & 15;
1469 
1470 	if (cpu_ecxfeature & CPUIDECX_EST) {
1471 		if (rdmsr(MSR_MISC_ENABLE) & (1 << 16))
1472 			est_init(ci, CPUVENDOR_INTEL);
1473 		else
1474 			printf("%s: Enhanced SpeedStep disabled by BIOS\n",
1475 			    ci->ci_dev->dv_xname);
1476 	} else if ((cpu_feature & (CPUID_ACPI | CPUID_TM)) ==
1477 	    (CPUID_ACPI | CPUID_TM))
1478 		p4tcc_init(family, step);
1479 }
1480 #endif
1481 
1482 void
1483 intel686_common_cpu_setup(struct cpu_info *ci)
1484 {
1485 
1486 #if !defined(SMALL_KERNEL)
1487 	setperf_setup = intel686_setperf_setup;
1488 	cpusensors_setup = intel686_cpusensors_setup;
1489 	{
1490 	extern void (*pagezero)(void *, size_t);
1491 	extern void sse2_pagezero(void *, size_t);
1492 	extern void i686_pagezero(void *, size_t);
1493 
1494 	if (cpu_feature & CPUID_SSE2)
1495 		pagezero = sse2_pagezero;
1496 	else
1497 		pagezero = i686_pagezero;
1498 	}
1499 #endif
1500 	/*
1501 	 * Make sure SYSENTER is disabled.
1502 	 */
1503 	if (cpu_feature & CPUID_SEP)
1504 		wrmsr(MSR_SYSENTER_CS, 0);
1505 }
1506 
1507 void
1508 intel686_cpu_setup(struct cpu_info *ci)
1509 {
1510 	int model = (ci->ci_signature >> 4) & 15;
1511 	int step = ci->ci_signature & 15;
1512 	u_quad_t msr119;
1513 
1514 	intel686_common_cpu_setup(ci);
1515 
1516 	/*
1517 	 * Original PPro returns SYSCALL in CPUID but is non-functional.
1518 	 * From Intel Application Note #485.
1519 	 */
1520 	if ((model == 1) && (step < 3))
1521 		ci->ci_feature_flags &= ~CPUID_SEP;
1522 
1523 	/*
1524 	 * Disable the Pentium3 serial number.
1525 	 */
1526 	if ((model == 7) && (ci->ci_feature_flags & CPUID_PSN)) {
1527 		msr119 = rdmsr(MSR_BBL_CR_CTL);
1528 		msr119 |= 0x0000000000200000LL;
1529 		wrmsr(MSR_BBL_CR_CTL, msr119);
1530 
1531 		printf("%s: disabling processor serial number\n",
1532 			 ci->ci_dev->dv_xname);
1533 		ci->ci_feature_flags &= ~CPUID_PSN;
1534 		ci->ci_level = 2;
1535 	}
1536 
1537 #if !defined(SMALL_KERNEL)
1538 	p3_early = (model == 8 && step == 1) ? 1 : 0;
1539 	update_cpuspeed = p3_update_cpuspeed;
1540 #endif
1541 }
1542 
1543 void
1544 intel686_p4_cpu_setup(struct cpu_info *ci)
1545 {
1546 	intel686_common_cpu_setup(ci);
1547 
1548 #if !defined(SMALL_KERNEL)
1549 	p4_model = (ci->ci_signature >> 4) & 15;
1550 	update_cpuspeed = p4_update_cpuspeed;
1551 #endif
1552 }
1553 
1554 void
1555 tm86_cpu_setup(struct cpu_info *ci)
1556 {
1557 #if !defined(SMALL_KERNEL)
1558 	longrun_init();
1559 #endif
1560 }
1561 
1562 char *
1563 intel686_cpu_name(int model)
1564 {
1565 	char *ret = NULL;
1566 
1567 	switch (model) {
1568 	case 5:
1569 		switch (cpu_cache_edx & 0xFF) {
1570 		case 0x40:
1571 		case 0x41:
1572 			ret = "Celeron";
1573 			break;
1574 		/* 0x42 should not exist in this model. */
1575 		case 0x43:
1576 			ret = "Pentium II";
1577 			break;
1578 		case 0x44:
1579 		case 0x45:
1580 			ret = "Pentium II Xeon";
1581 			break;
1582 		}
1583 		break;
1584 	case 7:
1585 		switch (cpu_cache_edx & 0xFF) {
1586 		/* 0x40 - 0x42 should not exist in this model. */
1587 		case 0x43:
1588 			ret = "Pentium III";
1589 			break;
1590 		case 0x44:
1591 		case 0x45:
1592 			ret = "Pentium III Xeon";
1593 			break;
1594 		}
1595 		break;
1596 	}
1597 
1598 	return (ret);
1599 }
1600 
1601 char *
1602 cyrix3_cpu_name(int model, int step)
1603 {
1604 	char	*name = NULL;
1605 
1606 	switch (model) {
1607 	case 7:
1608 		if (step < 8)
1609 			name = "C3 Samuel 2";
1610 		else
1611 			name = "C3 Ezra";
1612 		break;
1613 	}
1614 	return name;
1615 }
1616 
1617 /*
1618  * Print identification for the given CPU.
1619  * XXX XXX
1620  * This is not as clean as one might like, because it references
1621  *
1622  * the "cpuid_level" and "cpu_vendor" globals.
1623  * cpuid_level isn't so bad, since both CPU's will hopefully
1624  * be of the same level.
1625  *
1626  * The Intel multiprocessor spec doesn't give us the cpu_vendor
1627  * information; however, the chance of multi-vendor SMP actually
1628  * ever *working* is sufficiently low that it's probably safe to assume
1629  * all processors are of the same vendor.
1630  */
1631 void
1632 identifycpu(struct cpu_info *ci)
1633 {
1634 	const char *name, *modifier, *vendorname, *token;
1635 	int class = CPUCLASS_486, vendor, i, max;
1636 	int family, model, step, modif, cachesize;
1637 	const struct cpu_cpuid_nameclass *cpup = NULL;
1638 	char *brandstr_from, *brandstr_to;
1639 	char *cpu_device = ci->ci_dev->dv_xname;
1640 	int skipspace;
1641 	extern uint32_t cpu_meltdown;
1642 	uint64_t msr, nmsr;
1643 
1644 	if (cpuid_level == -1) {
1645 		name = "486DX";
1646 		vendor = CPUVENDOR_INTEL;
1647 		vendorname = "Intel";
1648 		model = -1;
1649 		step = -1;
1650 		class = CPUCLASS_486;
1651 		ci->cpu_setup = NULL;
1652 		modifier = "";
1653 		token = "";
1654 	} else {
1655 		max = sizeof (i386_cpuid_cpus) / sizeof (i386_cpuid_cpus[0]);
1656 		modif = (ci->ci_signature >> 12) & 3;
1657 		family = (ci->ci_signature >> 8) & 15;
1658 		ci->ci_family = family;
1659 		model = (ci->ci_signature >> 4) & 15;
1660 		ci->ci_model = model;
1661 		step = ci->ci_signature & 15;
1662 #ifdef CPUDEBUG
1663 		printf("%s: cpuid level %d cache eax %x ebx %x ecx %x edx %x\n",
1664 		    cpu_device, cpuid_level, cpu_cache_eax, cpu_cache_ebx,
1665 		    cpu_cache_ecx, cpu_cache_edx);
1666 #endif
1667 		if (family < CPU_MINFAMILY)
1668 			panic("identifycpu: strange family value");
1669 
1670 		for (i = 0; i < max; i++) {
1671 			if (!strncmp(cpu_vendor,
1672 			    i386_cpuid_cpus[i].cpu_id, 12)) {
1673 				cpup = &i386_cpuid_cpus[i];
1674 				break;
1675 			}
1676 		}
1677 
1678 		if (cpup == NULL) {
1679 			vendor = CPUVENDOR_UNKNOWN;
1680 			if (cpu_vendor[0] != '\0')
1681 				vendorname = &cpu_vendor[0];
1682 			else
1683 				vendorname = "Unknown";
1684 			if (family > CPU_MAXFAMILY)
1685 				family = CPU_MAXFAMILY;
1686 			class = family - 3;
1687 			if (class > CPUCLASS_686)
1688 				class = CPUCLASS_686;
1689 			modifier = "";
1690 			name = "";
1691 			token = "";
1692 			ci->cpu_setup = NULL;
1693 		} else {
1694 			token = cpup->cpu_id;
1695 			vendor = cpup->cpu_vendor;
1696 			vendorname = cpup->cpu_vendorname;
1697 			/*
1698 			 * Special hack for the VIA C3 series.
1699 			 *
1700 			 * VIA bought Centaur Technology from IDT in Aug 1999
1701 			 * and marketed the processors as VIA Cyrix III/C3.
1702 			 */
1703 			if (vendor == CPUVENDOR_IDT && family >= 6) {
1704 				vendor = CPUVENDOR_VIA;
1705 				vendorname = "VIA";
1706 			}
1707 			modifier = modifiers[modif];
1708 			if (family > CPU_MAXFAMILY) {
1709 				family = CPU_MAXFAMILY;
1710 				model = CPU_DEFMODEL;
1711 			} else if (model > CPU_MAXMODEL)
1712 				model = CPU_DEFMODEL;
1713 			i = family - CPU_MINFAMILY;
1714 
1715 			/* store extended family/model values for later use */
1716 			if ((vendor == CPUVENDOR_INTEL &&
1717 			    (family == 0x6 || family == 0xf)) ||
1718 			    (vendor == CPUVENDOR_AMD && family == 0xf)) {
1719 				ci->ci_family += (ci->ci_signature >> 20) &
1720 				    0xff;
1721 				ci->ci_model += ((ci->ci_signature >> 16) &
1722 				    0x0f) << 4;
1723 			}
1724 
1725 			/* Special hack for the PentiumII/III series. */
1726 			if (vendor == CPUVENDOR_INTEL && family == 6 &&
1727 			    (model == 5 || model == 7)) {
1728 				name = intel686_cpu_name(model);
1729 			/* Special hack for the VIA C3 series. */
1730 			} else if (vendor == CPUVENDOR_VIA && family == 6 &&
1731 			    model == 7) {
1732 				name = cyrix3_cpu_name(model, step);
1733 			/* Special hack for the TMS5x00 series. */
1734 			} else if (vendor == CPUVENDOR_TRANSMETA &&
1735 			    family == 5 && model == 4) {
1736 				name = tm86_cpu_name(model);
1737 			} else
1738 				name = cpup->cpu_family[i].cpu_models[model];
1739 			if (name == NULL) {
1740 				name = cpup->cpu_family[i].cpu_models[CPU_DEFMODEL];
1741 				if (name == NULL)
1742 					name = "";
1743 			}
1744 			class = cpup->cpu_family[i].cpu_class;
1745 			ci->cpu_setup = cpup->cpu_family[i].cpu_setup;
1746 		}
1747 	}
1748 
1749 	/* Find the amount of on-chip L2 cache. */
1750 	cachesize = -1;
1751 	if (vendor == CPUVENDOR_INTEL && cpuid_level >= 2 && family < 0xf) {
1752 		int intel_cachetable[] = { 0, 128, 256, 512, 1024, 2048 };
1753 
1754 		if ((cpu_cache_edx & 0xFF) >= 0x40 &&
1755 		    (cpu_cache_edx & 0xFF) <= 0x45)
1756 			cachesize = intel_cachetable[(cpu_cache_edx & 0xFF) - 0x40];
1757 	} else if (vendor == CPUVENDOR_AMD && class == CPUCLASS_686) {
1758 		u_int regs[4];
1759 		cpuid(0x80000000, regs);
1760 
1761 		if (regs[0] >= 0x80000006) {
1762 			cpuid(0x80000006, regs);
1763 			cachesize = (regs[2] >> 16);
1764 		}
1765 	}
1766 
1767 	if (ci->ci_feature_flags & CPUID_CFLUSH) {
1768 		u_int regs[4];
1769 
1770 		/* to get the cacheline size you must do cpuid
1771 		 * with eax 0x01
1772 		 */
1773 
1774 		cpuid(0x01, regs);
1775 		ci->ci_cflushsz = ((regs[1] >> 8) & 0xff) * 8;
1776 	}
1777 
1778 	if (vendor == CPUVENDOR_INTEL) {
1779 		/*
1780 		 * PIII, Core Solo and Core Duo CPUs have known
1781 		 * errata stating:
1782 		 * "Page with PAT set to WC while associated MTRR is UC
1783 		 * may consolidate to UC".
1784 		 * Because of this it is best we just fallback to mtrrs
1785 		 * in this case.
1786 		 */
1787 		if (ci->ci_family == 6 && ci->ci_model < 15)
1788 		    ci->ci_feature_flags &= ~CPUID_PAT;
1789 	}
1790 
1791 	/* Remove leading, trailing and duplicated spaces from cpu_brandstr */
1792 	brandstr_from = brandstr_to = cpu_brandstr;
1793 	skipspace = 1;
1794 	while (*brandstr_from != '\0') {
1795 		if (!skipspace || *brandstr_from != ' ') {
1796 			skipspace = 0;
1797 			*(brandstr_to++) = *brandstr_from;
1798 		}
1799 		if (*brandstr_from == ' ')
1800 			skipspace = 1;
1801 		brandstr_from++;
1802 	}
1803 	if (skipspace && brandstr_to > cpu_brandstr)
1804 		brandstr_to--;
1805 	*brandstr_to = '\0';
1806 
1807 	if (cpu_brandstr[0] == '\0') {
1808 		snprintf(cpu_brandstr, 48 /* sizeof(cpu_brandstr) */,
1809 		    "%s %s%s", vendorname, modifier, name);
1810 	}
1811 
1812 	if (cachesize > -1) {
1813 		snprintf(cpu_model, sizeof(cpu_model),
1814 		    "%s (%s%s%s%s-class, %dKB L2 cache)",
1815 		    cpu_brandstr,
1816 		    ((*token) ? "\"" : ""), ((*token) ? token : ""),
1817 		    ((*token) ? "\" " : ""), classnames[class], cachesize);
1818 	} else {
1819 		snprintf(cpu_model, sizeof(cpu_model),
1820 		    "%s (%s%s%s%s-class)",
1821 		    cpu_brandstr,
1822 		    ((*token) ? "\"" : ""), ((*token) ? token : ""),
1823 		    ((*token) ? "\" " : ""), classnames[class]);
1824 	}
1825 
1826 	printf("%s: %s", cpu_device, cpu_model);
1827 
1828 	if (ci->ci_feature_flags && (ci->ci_feature_flags & CPUID_TSC)) {
1829 		/* Has TSC, check if it's constant */
1830 		switch (vendor) {
1831 		case CPUVENDOR_INTEL:
1832 			if ((ci->ci_family == 0x0f && ci->ci_model >= 0x03) ||
1833 			    (ci->ci_family == 0x06 && ci->ci_model >= 0x0e)) {
1834 				ci->ci_flags |= CPUF_CONST_TSC;
1835 			}
1836 			break;
1837 		case CPUVENDOR_VIA:
1838 			if (ci->ci_model >= 0x0f) {
1839 				ci->ci_flags |= CPUF_CONST_TSC;
1840 			}
1841 			break;
1842 		}
1843 		calibrate_cyclecounter();
1844 		if (cpuspeed > 994) {
1845 			int ghz, fr;
1846 
1847 			ghz = (cpuspeed + 9) / 1000;
1848 			fr = ((cpuspeed + 9) / 10 ) % 100;
1849 			if (fr)
1850 				printf(" %d.%02d GHz", ghz, fr);
1851 			else
1852 				printf(" %d GHz", ghz);
1853 		} else {
1854 			printf(" %d MHz", cpuspeed);
1855 		}
1856 	}
1857 
1858 	if (cpuid_level != -1)
1859 		printf(", %02x-%02x-%02x", ci->ci_family, ci->ci_model,
1860 		    step);
1861 
1862 	printf("\n");
1863 
1864 	if (ci->ci_feature_flags) {
1865 		int numbits = 0;
1866 
1867 		printf("%s: ", cpu_device);
1868 		max = sizeof(i386_cpuid_features) /
1869 		    sizeof(i386_cpuid_features[0]);
1870 		for (i = 0; i < max; i++) {
1871 			if (ci->ci_feature_flags &
1872 			    i386_cpuid_features[i].feature_bit) {
1873 				printf("%s%s", (numbits == 0 ? "" : ","),
1874 				    i386_cpuid_features[i].feature_name);
1875 				numbits++;
1876 			}
1877 		}
1878 		max = sizeof(i386_cpuid_ecxfeatures)
1879 			/ sizeof(i386_cpuid_ecxfeatures[0]);
1880 		for (i = 0; i < max; i++) {
1881 			if (cpu_ecxfeature &
1882 			    i386_cpuid_ecxfeatures[i].feature_bit) {
1883 				printf("%s%s", (numbits == 0 ? "" : ","),
1884 				    i386_cpuid_ecxfeatures[i].feature_name);
1885 				numbits++;
1886 			}
1887 		}
1888 		for (i = 0; i < nitems(i386_ecpuid_features); i++) {
1889 			if (ecpu_feature &
1890 			    i386_ecpuid_features[i].feature_bit) {
1891 				printf("%s%s", (numbits == 0 ? "" : ","),
1892 				    i386_ecpuid_features[i].feature_name);
1893 				numbits++;
1894 			}
1895 		}
1896 		for (i = 0; i < nitems(i386_ecpuid_ecxfeatures); i++) {
1897 			if (ecpu_ecxfeature &
1898 			    i386_ecpuid_ecxfeatures[i].feature_bit) {
1899 				printf("%s%s", (numbits == 0 ? "" : ","),
1900 				    i386_ecpuid_ecxfeatures[i].feature_name);
1901 				numbits++;
1902 			}
1903 		}
1904 		for (i = 0; i < nitems(i386_cpuid_eaxperf); i++) {
1905 			if (cpu_perf_eax &
1906 			    i386_cpuid_eaxperf[i].feature_bit) {
1907 				printf("%s%s", (numbits == 0 ? "" : ","),
1908 				    i386_cpuid_eaxperf[i].feature_name);
1909 				numbits++;
1910 			}
1911 		}
1912 		for (i = 0; i < nitems(i386_cpuid_edxapmi); i++) {
1913 			if (cpu_apmi_edx &
1914 			    i386_cpuid_edxapmi[i].feature_bit) {
1915 				printf("%s%s", (numbits == 0 ? "" : ","),
1916 				    i386_cpuid_edxapmi[i].feature_name);
1917 				numbits++;
1918 			}
1919 		}
1920 
1921 		if (cpuid_level >= 0x07) {
1922 			u_int dummy;
1923 
1924 			/* "Structured Extended Feature Flags" */
1925 			CPUID_LEAF(0x7, 0, dummy,
1926 			    ci->ci_feature_sefflags_ebx,
1927 			    ci->ci_feature_sefflags_ecx,
1928 			    ci->ci_feature_sefflags_edx);
1929 			for (i = 0; i < nitems(cpu_seff0_ebxfeatures); i++)
1930 				if (ci->ci_feature_sefflags_ebx &
1931 				    cpu_seff0_ebxfeatures[i].feature_bit)
1932 					printf("%s%s",
1933 					    (numbits == 0 ? "" : ","),
1934 					    cpu_seff0_ebxfeatures[i].feature_name);
1935 			for (i = 0; i < nitems(cpu_seff0_ecxfeatures); i++)
1936 				if (ci->ci_feature_sefflags_ecx &
1937 				    cpu_seff0_ecxfeatures[i].feature_bit)
1938 					printf("%s%s",
1939 					    (numbits == 0 ? "" : ","),
1940 					    cpu_seff0_ecxfeatures[i].feature_name);
1941 			for (i = 0; i < nitems(cpu_seff0_edxfeatures); i++)
1942 				if (ci->ci_feature_sefflags_edx &
1943 				    cpu_seff0_edxfeatures[i].feature_bit)
1944 					printf("%s%s",
1945 					    (numbits == 0 ? "" : ","),
1946 					    cpu_seff0_edxfeatures[i].feature_name);
1947 		}
1948 
1949 		if (!strcmp(cpu_vendor, "GenuineIntel") &&
1950 		    cpuid_level >= 0x06 ) {
1951 			u_int dummy;
1952 
1953 			CPUID(0x06, ci->ci_feature_tpmflags, dummy,
1954 			    dummy, dummy);
1955 			max = nitems(cpu_tpm_eaxfeatures);
1956 			for (i = 0; i < max; i++)
1957 				if (ci->ci_feature_tpmflags &
1958 				    cpu_tpm_eaxfeatures[i].feature_bit)
1959 					printf(",%s", cpu_tpm_eaxfeatures[i].feature_name);
1960 		}
1961 
1962 		/* xsave subfeatures */
1963 		if (cpuid_level >= 0xd) {
1964 			uint32_t dummy, val;
1965 
1966 			CPUID_LEAF(0xd, 1, val, dummy, dummy, dummy);
1967 			for (i = 0; i < nitems(cpu_xsave_extfeatures); i++)
1968 				if (val & cpu_xsave_extfeatures[i].feature_bit)
1969 					printf(",%s",
1970 					    cpu_xsave_extfeatures[i].feature_name);
1971 		}
1972 
1973 		if (cpu_meltdown)
1974 			printf(",MELTDOWN");
1975 
1976 		printf("\n");
1977 	}
1978 
1979 	/*
1980 	 * "Mitigation G-2" per AMD's Whitepaper "Software Techniques
1981 	 * for Managing Speculation on AMD Processors"
1982 	 *
1983 	 * By setting MSR C001_1029[1]=1, LFENCE becomes a dispatch
1984 	 * serializing instruction.
1985 	 *
1986 	 * This MSR is available on all AMD families >= 10h, except 11h
1987  	 * where LFENCE is always serializing.
1988 	 */
1989 	if (!strcmp(cpu_vendor, "AuthenticAMD")) {
1990 		if (ci->ci_family >= 0x10 && ci->ci_family != 0x11) {
1991 			nmsr = msr = rdmsr(MSR_DE_CFG);
1992 			nmsr |= DE_CFG_SERIALIZE_LFENCE;
1993 			if (msr != nmsr)
1994 				wrmsr(MSR_DE_CFG, nmsr);
1995 		}
1996 		if (family == 0x17 && ci->ci_model >= 0x31 &&
1997 		    (cpu_ecxfeature & CPUIDECX_HV) == 0) {
1998 			nmsr = msr = rdmsr(MSR_DE_CFG);
1999 			nmsr |= DE_CFG_SERIALIZE_9;
2000 			if (msr != nmsr)
2001 				wrmsr(MSR_DE_CFG, nmsr);
2002 		}
2003 	}
2004 
2005 	/*
2006 	 * Attempt to disable Silicon Debug and lock the configuration
2007 	 * if it's enabled and unlocked.
2008 	 */
2009 	if (!strcmp(cpu_vendor, "GenuineIntel") &&
2010 	    (cpu_ecxfeature & CPUIDECX_SDBG)) {
2011 		uint64_t msr;
2012 
2013 		msr = rdmsr(IA32_DEBUG_INTERFACE);
2014 		if ((msr & IA32_DEBUG_INTERFACE_ENABLE) &&
2015 		    (msr & IA32_DEBUG_INTERFACE_LOCK) == 0) {
2016 			msr &= IA32_DEBUG_INTERFACE_MASK;
2017 			msr |= IA32_DEBUG_INTERFACE_LOCK;
2018 			wrmsr(IA32_DEBUG_INTERFACE, msr);
2019 		} else if (msr & IA32_DEBUG_INTERFACE_ENABLE)
2020 			printf("%s: cannot disable silicon debug\n",
2021 			    cpu_device);
2022 	}
2023 
2024 	if (CPU_IS_PRIMARY(ci)) {
2025 		if (cpu_ecxfeature & CPUIDECX_RDRAND)
2026 			has_rdrand = 1;
2027 		if (ci->ci_feature_sefflags_ebx & SEFF0EBX_RDSEED)
2028 			has_rdseed = 1;
2029 		if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP)
2030 			replacesmap();
2031 	}
2032 
2033 #ifndef SMALL_KERNEL
2034 	if (cpuspeed != 0 && cpu_cpuspeed == NULL)
2035 		cpu_cpuspeed = pentium_cpuspeed;
2036 #endif
2037 
2038 	cpu_class = class;
2039 
2040 	ci->cpu_class = class;
2041 
2042 	/*
2043 	 * Enable ring 0 write protection.
2044 	 */
2045 	lcr0(rcr0() | CR0_WP);
2046 
2047 	/*
2048 	 * If we have FXSAVE/FXRESTOR, use them.
2049 	 */
2050 	if (cpu_feature & CPUID_FXSR) {
2051 		i386_use_fxsave = 1;
2052 		lcr4(rcr4() | CR4_OSFXSR);
2053 
2054 		/*
2055 		 * If we have SSE/SSE2, enable XMM exceptions, and
2056 		 * notify userland.
2057 		 */
2058 		if (cpu_feature & (CPUID_SSE|CPUID_SSE2)) {
2059 			if (cpu_feature & CPUID_SSE)
2060 				i386_has_sse = 1;
2061 			if (cpu_feature & CPUID_SSE2)
2062 				i386_has_sse2 = 1;
2063 			lcr4(rcr4() | CR4_OSXMMEXCPT);
2064 		}
2065 	} else
2066 		i386_use_fxsave = 0;
2067 
2068 }
2069 
2070 char *
2071 tm86_cpu_name(int model)
2072 {
2073 	u_int32_t regs[4];
2074 	char *name = NULL;
2075 
2076 	cpuid(0x80860001, regs);
2077 
2078 	switch (model) {
2079 	case 4:
2080 		if (((regs[1] >> 16) & 0xff) >= 0x3)
2081 			name = "TMS5800";
2082 		else
2083 			name = "TMS5600";
2084 	}
2085 
2086 	return name;
2087 }
2088 
2089 #ifndef SMALL_KERNEL
2090 void
2091 cyrix3_get_bus_clock(struct cpu_info *ci)
2092 {
2093 	u_int64_t msr;
2094 	int bus;
2095 
2096 	msr = rdmsr(MSR_EBL_CR_POWERON);
2097 	bus = (msr >> 18) & 0x3;
2098 	switch (bus) {
2099 	case 0:
2100 		bus_clock = BUS100;
2101 		break;
2102 	case 1:
2103 		bus_clock = BUS133;
2104 		break;
2105 	case 2:
2106 		bus_clock = BUS200;
2107 		break;
2108 	case 3:
2109 		bus_clock = BUS166;
2110 		break;
2111 	}
2112 }
2113 
2114 void
2115 p4_get_bus_clock(struct cpu_info *ci)
2116 {
2117 	u_int64_t msr;
2118 	int model, bus;
2119 
2120 	model = (ci->ci_signature >> 4) & 15;
2121 	msr = rdmsr(MSR_EBC_FREQUENCY_ID);
2122 	if (model < 2) {
2123 		bus = (msr >> 21) & 0x7;
2124 		switch (bus) {
2125 		case 0:
2126 			bus_clock = BUS100;
2127 			break;
2128 		case 1:
2129 			bus_clock = BUS133;
2130 			break;
2131 		default:
2132 			printf("%s: unknown Pentium 4 (model %d) "
2133 			    "EBC_FREQUENCY_ID value %d\n",
2134 			    ci->ci_dev->dv_xname, model, bus);
2135 			break;
2136 		}
2137 	} else {
2138 		bus = (msr >> 16) & 0x7;
2139 		switch (bus) {
2140 		case 0:
2141 			bus_clock = (model == 2) ? BUS100 : BUS266;
2142 			break;
2143 		case 1:
2144 			bus_clock = BUS133;
2145 			break;
2146 		case 2:
2147 			bus_clock = BUS200;
2148 			break;
2149 		case 3:
2150 			bus_clock = BUS166;
2151 			break;
2152 		default:
2153 			printf("%s: unknown Pentium 4 (model %d) "
2154 			    "EBC_FREQUENCY_ID value %d\n",
2155 			    ci->ci_dev->dv_xname, model, bus);
2156 			break;
2157 		}
2158 	}
2159 }
2160 
2161 void
2162 p3_get_bus_clock(struct cpu_info *ci)
2163 {
2164 	u_int64_t msr;
2165 	int bus;
2166 
2167 	switch (ci->ci_model) {
2168 	case 0x9: /* Pentium M (130 nm, Banias) */
2169 		bus_clock = BUS100;
2170 		break;
2171 	case 0xd: /* Pentium M (90 nm, Dothan) */
2172 		msr = rdmsr(MSR_FSB_FREQ);
2173 		bus = (msr >> 0) & 0x7;
2174 		switch (bus) {
2175 		case 0:
2176 			bus_clock = BUS100;
2177 			break;
2178 		case 1:
2179 			bus_clock = BUS133;
2180 			break;
2181 		default:
2182 			printf("%s: unknown Pentium M FSB_FREQ value %d",
2183 			    ci->ci_dev->dv_xname, bus);
2184 			goto print_msr;
2185 		}
2186 		break;
2187 	case 0x15:	/* EP80579 no FSB */
2188 		break;
2189 	case 0xe: /* Core Duo/Solo */
2190 	case 0xf: /* Core Xeon */
2191 	case 0x16: /* 65nm Celeron */
2192 	case 0x17: /* Core 2 Extreme/45nm Xeon */
2193 	case 0x1d: /* Xeon MP 7400 */
2194 		msr = rdmsr(MSR_FSB_FREQ);
2195 		bus = (msr >> 0) & 0x7;
2196 		switch (bus) {
2197 		case 5:
2198 			bus_clock = BUS100;
2199 			break;
2200 		case 1:
2201 			bus_clock = BUS133;
2202 			break;
2203 		case 3:
2204 			bus_clock = BUS166;
2205 			break;
2206 		case 2:
2207 			bus_clock = BUS200;
2208 			break;
2209 		case 0:
2210 			bus_clock = BUS266;
2211 			break;
2212 		case 4:
2213 			bus_clock = BUS333;
2214 			break;
2215 		default:
2216 			printf("%s: unknown Core FSB_FREQ value %d",
2217 			    ci->ci_dev->dv_xname, bus);
2218 			goto print_msr;
2219 		}
2220 		break;
2221 	case 0x1c: /* Atom */
2222 	case 0x26: /* Atom Z6xx */
2223 	case 0x36: /* Atom [DN]2xxx */
2224 		msr = rdmsr(MSR_FSB_FREQ);
2225 		bus = (msr >> 0) & 0x7;
2226 		switch (bus) {
2227 		case 5:
2228 			bus_clock = BUS100;
2229 			break;
2230 		case 1:
2231 			bus_clock = BUS133;
2232 			break;
2233 		case 3:
2234 			bus_clock = BUS166;
2235 			break;
2236 		case 2:
2237 			bus_clock = BUS200;
2238 			break;
2239 		default:
2240 			printf("%s: unknown Atom FSB_FREQ value %d",
2241 			    ci->ci_dev->dv_xname, bus);
2242 			goto print_msr;
2243 		}
2244 		break;
2245 	case 0x1: /* Pentium Pro, model 1 */
2246 	case 0x3: /* Pentium II, model 3 */
2247 	case 0x5: /* Pentium II, II Xeon, Celeron, model 5 */
2248 	case 0x6: /* Celeron, model 6 */
2249 	case 0x7: /* Pentium III, III Xeon, model 7 */
2250 	case 0x8: /* Pentium III, III Xeon, Celeron, model 8 */
2251 	case 0xa: /* Pentium III Xeon, model A */
2252 	case 0xb: /* Pentium III, model B */
2253 		msr = rdmsr(MSR_EBL_CR_POWERON);
2254 		bus = (msr >> 18) & 0x3;
2255 		switch (bus) {
2256 		case 0:
2257 			bus_clock = BUS66;
2258 			break;
2259 		case 1:
2260 			bus_clock = BUS133;
2261 			break;
2262 		case 2:
2263 			bus_clock = BUS100;
2264 			break;
2265 		default:
2266 			printf("%s: unknown i686 EBL_CR_POWERON value %d",
2267 			    ci->ci_dev->dv_xname, bus);
2268 			goto print_msr;
2269 		}
2270 		break;
2271 	default:
2272 		/* no FSB on modern Intel processors */
2273 		break;
2274 	}
2275 	return;
2276 print_msr:
2277 	/*
2278 	 * Show the EBL_CR_POWERON MSR, so we'll at least have
2279 	 * some extra information, such as clock ratio, etc.
2280 	 */
2281 	printf(" (0x%llx)\n", rdmsr(MSR_EBL_CR_POWERON));
2282 }
2283 
2284 void
2285 p4_update_cpuspeed(void)
2286 {
2287 	struct cpu_info *ci;
2288 	u_int64_t msr;
2289 	int mult;
2290 
2291 	ci = curcpu();
2292 	p4_get_bus_clock(ci);
2293 
2294 	if (bus_clock == 0) {
2295 		printf("p4_update_cpuspeed: unknown bus clock\n");
2296 		return;
2297 	}
2298 
2299 	msr = rdmsr(MSR_EBC_FREQUENCY_ID);
2300 	mult = ((msr >> 24) & 0xff);
2301 
2302 	cpuspeed = (bus_clock * mult) / 100;
2303 }
2304 
2305 void
2306 p3_update_cpuspeed(void)
2307 {
2308 	struct cpu_info *ci;
2309 	u_int64_t msr;
2310 	int mult;
2311 	const u_int8_t mult_code[] = {
2312 	    50, 30, 40, 0, 55, 35, 45, 0, 0, 70, 80, 60, 0, 75, 0, 65 };
2313 
2314 	ci = curcpu();
2315 	p3_get_bus_clock(ci);
2316 
2317 	if (bus_clock == 0) {
2318 		printf("p3_update_cpuspeed: unknown bus clock\n");
2319 		return;
2320 	}
2321 
2322 	msr = rdmsr(MSR_EBL_CR_POWERON);
2323 	mult = (msr >> 22) & 0xf;
2324 	mult = mult_code[mult];
2325 	if (!p3_early)
2326 		mult += ((msr >> 27) & 0x1) * 40;
2327 
2328 	cpuspeed = (bus_clock * mult) / 1000;
2329 }
2330 
2331 int
2332 pentium_cpuspeed(int *freq)
2333 {
2334 	*freq = cpuspeed;
2335 	return (0);
2336 }
2337 #endif	/* !SMALL_KERNEL */
2338 
2339 /*
2340  * Send an interrupt to process.
2341  *
2342  * Stack is set up to allow sigcode stored
2343  * in u. to call routine, followed by kcall
2344  * to sigreturn routine below.  After sigreturn
2345  * resets the signal mask, the stack, and the
2346  * frame pointer, it returns to the user
2347  * specified pc, psl.
2348  */
2349 int
2350 sendsig(sig_t catcher, int sig, sigset_t mask, const siginfo_t *ksip,
2351     int info, int onstack)
2352 {
2353 	struct proc *p = curproc;
2354 	struct trapframe *tf = p->p_md.md_regs;
2355 	struct sigframe *fp, frame;
2356 	register_t sp;
2357 
2358 	/*
2359 	 * Build the argument list for the signal handler.
2360 	 */
2361 	bzero(&frame, sizeof(frame));
2362 	frame.sf_signum = sig;
2363 
2364 	/*
2365 	 * Allocate space for the signal handler context.
2366 	 */
2367 	if ((p->p_sigstk.ss_flags & SS_DISABLE) == 0 &&
2368 	    !sigonstack(tf->tf_esp) && onstack)
2369 		sp = trunc_page((vaddr_t)p->p_sigstk.ss_sp + p->p_sigstk.ss_size);
2370 	else
2371 		sp = tf->tf_esp;
2372 
2373 	frame.sf_sc.sc_fpstate = NULL;
2374 	if (p->p_md.md_flags & MDP_USEDFPU) {
2375 		npxsave_proc(p, 1);
2376 		sp -= sizeof(union savefpu);
2377 		sp &= ~0xf;	/* for XMM regs */
2378 		frame.sf_sc.sc_fpstate = (void *)sp;
2379 		if (copyout(&p->p_addr->u_pcb.pcb_savefpu,
2380 		    (void *)sp, sizeof(union savefpu)))
2381 		    	return 1;
2382 
2383 		/* Signal handlers get a completely clean FP state */
2384 		p->p_md.md_flags &= ~MDP_USEDFPU;
2385 	}
2386 
2387 	fp = (struct sigframe *)sp - 1;
2388 	frame.sf_scp = &fp->sf_sc;
2389 	frame.sf_sip = NULL;
2390 	frame.sf_handler = catcher;
2391 
2392 	/*
2393 	 * Build the signal context to be used by sigreturn.
2394 	 */
2395 	frame.sf_sc.sc_err = tf->tf_err;
2396 	frame.sf_sc.sc_trapno = tf->tf_trapno;
2397 	frame.sf_sc.sc_mask = mask;
2398 	frame.sf_sc.sc_fs = tf->tf_fs;
2399 	frame.sf_sc.sc_gs = tf->tf_gs;
2400 	frame.sf_sc.sc_es = tf->tf_es;
2401 	frame.sf_sc.sc_ds = tf->tf_ds;
2402 	frame.sf_sc.sc_eflags = tf->tf_eflags;
2403 	frame.sf_sc.sc_edi = tf->tf_edi;
2404 	frame.sf_sc.sc_esi = tf->tf_esi;
2405 	frame.sf_sc.sc_ebp = tf->tf_ebp;
2406 	frame.sf_sc.sc_ebx = tf->tf_ebx;
2407 	frame.sf_sc.sc_edx = tf->tf_edx;
2408 	frame.sf_sc.sc_ecx = tf->tf_ecx;
2409 	frame.sf_sc.sc_eax = tf->tf_eax;
2410 	frame.sf_sc.sc_eip = tf->tf_eip;
2411 	frame.sf_sc.sc_cs = tf->tf_cs;
2412 	frame.sf_sc.sc_esp = tf->tf_esp;
2413 	frame.sf_sc.sc_ss = tf->tf_ss;
2414 
2415 	if (info) {
2416 		frame.sf_sip = &fp->sf_si;
2417 		frame.sf_si = *ksip;
2418 	}
2419 
2420 	/* XXX don't copyout siginfo if not needed? */
2421 	frame.sf_sc.sc_cookie = (long)&fp->sf_sc ^ p->p_p->ps_sigcookie;
2422 	if (copyout(&frame, fp, sizeof(frame)) != 0)
2423 		return 1;
2424 
2425 	/*
2426 	 * Build context to run handler in.
2427 	 */
2428 	tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
2429 	tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
2430 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
2431 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
2432 	tf->tf_eip = p->p_p->ps_sigcode;
2433 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
2434 	tf->tf_eflags &= ~(PSL_T|PSL_D|PSL_VM|PSL_AC);
2435 	tf->tf_esp = (int)fp;
2436 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
2437 
2438 	return 0;
2439 }
2440 
2441 /*
2442  * System call to cleanup state after a signal
2443  * has been taken.  Reset signal mask and
2444  * stack state from context left by sendsig (above).
2445  * Return to previous pc and psl as specified by
2446  * context left by sendsig. Check carefully to
2447  * make sure that the user has not modified the
2448  * psl to gain improper privileges or to cause
2449  * a machine fault.
2450  */
2451 int
2452 sys_sigreturn(struct proc *p, void *v, register_t *retval)
2453 {
2454 	struct sys_sigreturn_args /* {
2455 		syscallarg(struct sigcontext *) sigcntxp;
2456 	} */ *uap = v;
2457 	struct sigcontext ksc, *scp = SCARG(uap, sigcntxp);
2458 	struct trapframe *tf = p->p_md.md_regs;
2459 	int error;
2460 
2461 	if (PROC_PC(p) != p->p_p->ps_sigcoderet) {
2462 		sigexit(p, SIGILL);
2463 		return (EPERM);
2464 	}
2465 
2466 	if ((error = copyin((caddr_t)scp, &ksc, sizeof(*scp))))
2467 		return (error);
2468 
2469 	if (ksc.sc_cookie != ((long)scp ^ p->p_p->ps_sigcookie)) {
2470 		sigexit(p, SIGILL);
2471 		return (EFAULT);
2472 	}
2473 
2474 	/* Prevent reuse of the sigcontext cookie */
2475 	ksc.sc_cookie = 0;
2476 	(void)copyout(&ksc.sc_cookie, (caddr_t)scp +
2477 	    offsetof(struct sigcontext, sc_cookie), sizeof (ksc.sc_cookie));
2478 
2479 	/*
2480 	 * Restore signal ksc.
2481 	 */
2482 	/*
2483 	 * Check for security violations.  If we're returning to
2484 	 * protected mode, the CPU will validate the segment registers
2485 	 * automatically and generate a trap on violations.  We handle
2486 	 * the trap, rather than doing all of the checking here.
2487 	 */
2488 	if (((ksc.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
2489 	    !USERMODE(ksc.sc_cs, ksc.sc_eflags))
2490 		return (EINVAL);
2491 
2492 	tf->tf_fs = ksc.sc_fs;
2493 	tf->tf_gs = ksc.sc_gs;
2494 	tf->tf_es = ksc.sc_es;
2495 	tf->tf_ds = ksc.sc_ds;
2496 	tf->tf_eflags = ksc.sc_eflags;
2497 	tf->tf_edi = ksc.sc_edi;
2498 	tf->tf_esi = ksc.sc_esi;
2499 	tf->tf_ebp = ksc.sc_ebp;
2500 	tf->tf_ebx = ksc.sc_ebx;
2501 	tf->tf_edx = ksc.sc_edx;
2502 	tf->tf_ecx = ksc.sc_ecx;
2503 	tf->tf_eax = ksc.sc_eax;
2504 	tf->tf_eip = ksc.sc_eip;
2505 	tf->tf_cs = ksc.sc_cs;
2506 	tf->tf_esp = ksc.sc_esp;
2507 	tf->tf_ss = ksc.sc_ss;
2508 
2509 	if (p->p_md.md_flags & MDP_USEDFPU)
2510 		npxsave_proc(p, 0);
2511 
2512 	if (ksc.sc_fpstate) {
2513 		union savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu;
2514 
2515 		if ((error = copyin(ksc.sc_fpstate, sfp, sizeof(*sfp))))
2516 			return (error);
2517 		if (i386_use_fxsave)
2518 			sfp->sv_xmm.sv_env.en_mxcsr &= fpu_mxcsr_mask;
2519 		p->p_md.md_flags |= MDP_USEDFPU;
2520 	}
2521 
2522 	p->p_sigmask = ksc.sc_mask & ~sigcantmask;
2523 
2524 	return (EJUSTRETURN);
2525 }
2526 
2527 #ifdef MULTIPROCESSOR
2528 /* force a CPU into the kernel, whether or not it's idle */
2529 void
2530 cpu_kick(struct cpu_info *ci)
2531 {
2532 	/* only need to kick other CPUs */
2533 	if (ci != curcpu()) {
2534 		if (cpu_mwait_size > 0) {
2535 			/*
2536 			 * If not idling, then send an IPI, else
2537 			 * just clear the "keep idling" bit.
2538 			 */
2539 			if ((ci->ci_mwait & MWAIT_IN_IDLE) == 0)
2540 				i386_send_ipi(ci, I386_IPI_NOP);
2541 			else
2542 				atomic_clearbits_int(&ci->ci_mwait,
2543 				    MWAIT_KEEP_IDLING);
2544 		} else {
2545 			/* no mwait, so need an IPI */
2546 			i386_send_ipi(ci, I386_IPI_NOP);
2547 		}
2548 	}
2549 }
2550 #endif
2551 
2552 /*
2553  * Notify the current process (p) that it has a signal pending,
2554  * process as soon as possible.
2555  */
2556 void
2557 signotify(struct proc *p)
2558 {
2559 	aston(p);
2560 	cpu_kick(p->p_cpu);
2561 }
2562 
2563 #ifdef MULTIPROCESSOR
2564 void
2565 cpu_unidle(struct cpu_info *ci)
2566 {
2567 	if (cpu_mwait_size > 0 && (ci->ci_mwait & MWAIT_ONLY)) {
2568 		/*
2569 		 * Just clear the "keep idling" bit; if it wasn't
2570 		 * idling then we didn't need to do anything anyway.
2571 		 */
2572 		atomic_clearbits_int(&ci->ci_mwait, MWAIT_KEEP_IDLING);
2573 		return;
2574 	}
2575 
2576 	if (ci != curcpu())
2577 		i386_send_ipi(ci, I386_IPI_NOP);
2578 }
2579 #endif
2580 
2581 int	waittime = -1;
2582 struct pcb dumppcb;
2583 
2584 __dead void
2585 boot(int howto)
2586 {
2587 	if ((howto & RB_POWERDOWN) != 0)
2588 		lid_action = 0;
2589 
2590 	if ((howto & RB_RESET) != 0)
2591 		goto doreset;
2592 
2593 	if (cold) {
2594 		if ((howto & RB_USERREQ) == 0)
2595 			howto |= RB_HALT;
2596 		goto haltsys;
2597 	}
2598 
2599 	boothowto = howto;
2600 	if ((howto & RB_NOSYNC) == 0 && waittime < 0) {
2601 		waittime = 0;
2602 		vfs_shutdown(curproc);
2603 
2604 		if ((howto & RB_TIMEBAD) == 0) {
2605 			resettodr();
2606 		} else {
2607 			printf("WARNING: not updating battery clock\n");
2608 		}
2609 	}
2610 	if_downall();
2611 
2612 	uvm_shutdown();
2613 	splhigh();
2614 	cold = 1;
2615 
2616 	if ((howto & RB_DUMP) != 0)
2617 		dumpsys();
2618 
2619 haltsys:
2620 	config_suspend_all(DVACT_POWERDOWN);
2621 
2622 #ifdef MULTIPROCESSOR
2623 	i386_broadcast_ipi(I386_IPI_HALT);
2624 #endif
2625 
2626 	if ((howto & RB_HALT) != 0) {
2627 #if NACPI > 0 && !defined(SMALL_KERNEL)
2628 		extern int acpi_enabled;
2629 
2630 		if (acpi_enabled) {
2631 			delay(500000);
2632 			if ((howto & RB_POWERDOWN) != 0)
2633 				acpi_powerdown();
2634 		}
2635 #endif
2636 
2637 #if NAPM > 0
2638 		if ((howto & RB_POWERDOWN) != 0) {
2639 			int rv;
2640 
2641 			printf("\nAttempting to power down...\n");
2642 			/*
2643 			 * Turn off, if we can.  But try to turn disk off and
2644 			 * wait a bit first--some disk drives are slow to
2645 			 * clean up and users have reported disk corruption.
2646 			 *
2647 			 * If apm_set_powstate() fails the first time, don't
2648 			 * try to turn the system off.
2649 			 */
2650 			delay(500000);
2651 			apm_set_powstate(APM_DEV_DISK(0xff), APM_SYS_OFF);
2652 			delay(500000);
2653 			rv = apm_set_powstate(APM_DEV_DISK(0xff), APM_SYS_OFF);
2654 			if (rv == 0 || rv == ENXIO) {
2655 				delay(500000);
2656 				(void) apm_set_powstate(APM_DEV_ALLDEVS,
2657 							APM_SYS_OFF);
2658 			}
2659 		}
2660 #endif
2661 		printf("\n");
2662 		printf("The operating system has halted.\n");
2663 		printf("Please press any key to reboot.\n\n");
2664 		cnpollc(1);	/* for proper keyboard command handling */
2665 		cngetc();
2666 		cnpollc(0);
2667 	}
2668 
2669 doreset:
2670 	printf("rebooting...\n");
2671 	cpu_reset();
2672 	for (;;)
2673 		continue;
2674 	/* NOTREACHED */
2675 }
2676 
2677 /*
2678  * This is called by configure to set dumplo and dumpsize.
2679  * Dumps always skip the first block of disk space
2680  * in case there might be a disk label stored there.
2681  * If there is extra space, put dump at the end to
2682  * reduce the chance that swapping trashes it.
2683  */
2684 void
2685 dumpconf(void)
2686 {
2687 	int nblks;	/* size of dump area */
2688 	int i;
2689 
2690 	if (dumpdev == NODEV ||
2691 	    (nblks = (bdevsw[major(dumpdev)].d_psize)(dumpdev)) == 0)
2692 		return;
2693 	if (nblks <= ctod(1))
2694 		return;
2695 
2696 	/* Always skip the first block, in case there is a label there. */
2697 	if (dumplo < ctod(1))
2698 		dumplo = ctod(1);
2699 
2700 	for (i = 0; i < ndumpmem; i++)
2701 		dumpsize = max(dumpsize, dumpmem[i].end);
2702 
2703 	/* Put dump at end of partition, and make it fit. */
2704 	if (dumpsize > dtoc(nblks - dumplo - 1))
2705 		dumpsize = dtoc(nblks - dumplo - 1);
2706 	if (dumplo < nblks - ctod(dumpsize) - 1)
2707 		dumplo = nblks - ctod(dumpsize) - 1;
2708 }
2709 
2710 /*
2711  * cpu_dump: dump machine-dependent kernel core dump headers.
2712  */
2713 int
2714 cpu_dump(void)
2715 {
2716 	int (*dump)(dev_t, daddr_t, caddr_t, size_t);
2717 	long buf[dbtob(1) / sizeof (long)];
2718 	kcore_seg_t	*segp;
2719 
2720 	dump = bdevsw[major(dumpdev)].d_dump;
2721 
2722 	segp = (kcore_seg_t *)buf;
2723 
2724 	/*
2725 	 * Generate a segment header.
2726 	 */
2727 	CORE_SETMAGIC(*segp, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
2728 	segp->c_size = dbtob(1) - ALIGN(sizeof(*segp));
2729 
2730 	return (dump(dumpdev, dumplo, (caddr_t)buf, dbtob(1)));
2731 }
2732 
2733 /*
2734  * Doadump comes here after turning off memory management and
2735  * getting on the dump stack, either when called above, or by
2736  * the auto-restart code.
2737  */
2738 static vaddr_t dumpspace;
2739 
2740 vaddr_t
2741 reserve_dumppages(vaddr_t p)
2742 {
2743 
2744 	dumpspace = p;
2745 	return (p + PAGE_SIZE);
2746 }
2747 
2748 void
2749 dumpsys(void)
2750 {
2751 	u_int i, j, npg;
2752 	int maddr;
2753 	daddr_t blkno;
2754 	int (*dump)(dev_t, daddr_t, caddr_t, size_t);
2755 	int error;
2756 	char *str;
2757 	extern int msgbufmapped;
2758 
2759 	/* Save registers. */
2760 	savectx(&dumppcb);
2761 
2762 	msgbufmapped = 0;	/* don't record dump msgs in msgbuf */
2763 	if (dumpdev == NODEV)
2764 		return;
2765 
2766 	/*
2767 	 * For dumps during autoconfiguration,
2768 	 * if dump device has already configured...
2769 	 */
2770 	if (dumpsize == 0)
2771 		dumpconf();
2772 	if (dumplo < 0)
2773 		return;
2774 	printf("\ndumping to dev %x, offset %ld\n", dumpdev, dumplo);
2775 
2776 	error = (*bdevsw[major(dumpdev)].d_psize)(dumpdev);
2777 	printf("dump ");
2778 	if (error == -1) {
2779 		printf("area unavailable\n");
2780 		return;
2781 	}
2782 
2783 #if 0	/* XXX this doesn't work.  grr. */
2784 	/* toss any characters present prior to dump */
2785 	while (sget() != NULL); /*syscons and pccons differ */
2786 #endif
2787 
2788 	/* scan through the dumpmem list */
2789 	dump = bdevsw[major(dumpdev)].d_dump;
2790 	error = cpu_dump();
2791 	for (i = 0; !error && i < ndumpmem; i++) {
2792 
2793 		npg = dumpmem[i].end - dumpmem[i].start;
2794 		maddr = ptoa(dumpmem[i].start);
2795 		blkno = dumplo + btodb(maddr) + 1;
2796 #if 0
2797 		printf("(%d %lld %d) ", maddr, (long long)blkno, npg);
2798 #endif
2799 		for (j = npg; j--; maddr += NBPG, blkno += btodb(NBPG)) {
2800 
2801 			/* Print out how many MBs we have more to go. */
2802 			if (dbtob(blkno - dumplo) % (1024 * 1024) < NBPG)
2803 				printf("%ld ",
2804 				    (ptoa(dumpsize) - maddr) / (1024 * 1024));
2805 #if 0
2806 			printf("(%x %lld) ", maddr, (long long)blkno);
2807 #endif
2808 			pmap_enter(pmap_kernel(), dumpspace, maddr,
2809 			    PROT_READ, PMAP_WIRED);
2810 			if ((error = (*dump)(dumpdev, blkno,
2811 			    (caddr_t)dumpspace, NBPG)))
2812 				break;
2813 
2814 #if 0	/* XXX this doesn't work.  grr. */
2815 			/* operator aborting dump? */
2816 			if (sget() != NULL) {
2817 				error = EINTR;
2818 				break;
2819 			}
2820 #endif
2821 		}
2822 	}
2823 
2824 	switch (error) {
2825 
2826 	case 0:		str = "succeeded\n\n";			break;
2827 	case ENXIO:	str = "device bad\n\n";			break;
2828 	case EFAULT:	str = "device not ready\n\n";		break;
2829 	case EINVAL:	str = "area improper\n\n";		break;
2830 	case EIO:	str = "i/o error\n\n";			break;
2831 	case EINTR:	str = "aborted from console\n\n";	break;
2832 	default:	str = "error %d\n\n";			break;
2833 	}
2834 	printf(str, error);
2835 
2836 	delay(5000000);		/* 5 seconds */
2837 }
2838 
2839 /*
2840  * Clear registers on exec
2841  */
2842 void
2843 setregs(struct proc *p, struct exec_package *pack, u_long stack,
2844     struct ps_strings *arginfo)
2845 {
2846 	struct pcb *pcb = &p->p_addr->u_pcb;
2847 	struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map);
2848 	struct trapframe *tf = p->p_md.md_regs;
2849 
2850 #if NNPX > 0
2851 	/* If we were using the FPU, forget about it. */
2852 	if (pcb->pcb_fpcpu != NULL)
2853 		npxsave_proc(p, 0);
2854 	p->p_md.md_flags &= ~MDP_USEDFPU;
2855 #endif
2856 
2857 	initcodesegment(&pmap->pm_codeseg);
2858 	setsegment(&pcb->pcb_threadsegs[TSEG_FS], 0,
2859 	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
2860 	setsegment(&pcb->pcb_threadsegs[TSEG_GS], 0,
2861 	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
2862 
2863 	/*
2864 	 * And update the GDT since we return to the user process
2865 	 * by leaving the syscall (we don't do another pmap_activate()).
2866 	 */
2867 	curcpu()->ci_gdt[GUCODE_SEL].sd = pmap->pm_codeseg;
2868 	curcpu()->ci_gdt[GUFS_SEL].sd = pcb->pcb_threadsegs[TSEG_FS];
2869 	curcpu()->ci_gdt[GUGS_SEL].sd = pcb->pcb_threadsegs[TSEG_GS];
2870 
2871 	/*
2872 	 * And reset the hiexec marker in the pmap.
2873 	 */
2874 	pmap->pm_hiexec = 0;
2875 
2876 	tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
2877 	tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
2878 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
2879 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
2880 	tf->tf_edi = 0;
2881 	tf->tf_esi = 0;
2882 	tf->tf_ebp = 0;
2883 	tf->tf_ebx = (int)p->p_p->ps_strings;
2884 	tf->tf_edx = 0;
2885 	tf->tf_ecx = 0;
2886 	tf->tf_eax = 0;
2887 	tf->tf_eip = pack->ep_entry;
2888 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
2889 	tf->tf_eflags = PSL_USERSET;
2890 	tf->tf_esp = stack;
2891 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
2892 }
2893 
2894 /*
2895  * Initialize segments and descriptor tables
2896  */
2897 
2898 /* IDT is now a full page, so we can map it in u-k */
2899 union {
2900 	struct gate_descriptor	idt[NIDT];
2901 	char			align[PAGE_SIZE];
2902 } _idt_region __aligned(PAGE_SIZE);
2903 #define idt_region _idt_region.idt
2904 struct gate_descriptor *idt = idt_region;
2905 
2906 extern  struct user *proc0paddr;
2907 
2908 void
2909 setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl,
2910     int seg)
2911 {
2912 
2913 	gd->gd_looffset = (int)func;
2914 	gd->gd_selector = GSEL(seg, SEL_KPL);
2915 	gd->gd_stkcpy = args;
2916 	gd->gd_xx = 0;
2917 	gd->gd_type = type;
2918 	gd->gd_dpl = dpl;
2919 	gd->gd_p = 1;
2920 	gd->gd_hioffset = (int)func >> 16;
2921 }
2922 
2923 void
2924 unsetgate(struct gate_descriptor *gd)
2925 {
2926 	gd->gd_p = 0;
2927 	gd->gd_hioffset = 0;
2928 	gd->gd_looffset = 0;
2929 	gd->gd_selector = 0;
2930 	gd->gd_xx = 0;
2931 	gd->gd_stkcpy = 0;
2932 	gd->gd_type = 0;
2933 	gd->gd_dpl = 0;
2934 }
2935 
2936 void
2937 setregion(struct region_descriptor *rd, void *base, size_t limit)
2938 {
2939 	rd->rd_limit = (int)limit;
2940 	rd->rd_base = (int)base;
2941 }
2942 
2943 void
2944 initcodesegment(struct segment_descriptor *cs)
2945 {
2946 	if (cpu_pae) {
2947 		/*
2948 		 * When code execution is managed using NX feature
2949 		 * in pmapae.c, GUCODE_SEL should cover userland.
2950 		 */
2951 		setsegment(cs, 0, atop(VM_MAXUSER_ADDRESS - 1),
2952 		    SDT_MEMERA, SEL_UPL, 1, 1);
2953 	} else {
2954 		/*
2955 		 * For pmap.c's non-PAE/NX line-in-the-sand execution, reset
2956 		 * the code segment limit to I386_MAX_EXE_ADDR in the pmap;
2957 		 * this gets copied into the GDT for GUCODE_SEL by
2958 		 * pmap_activate().  Similarly, reset the base of each of
2959 		 * the two thread data segments to zero in the pcb; they'll
2960 		 * get copied into the GDT for GUFS_SEL and GUGS_SEL.
2961 		 */
2962 		setsegment(cs, 0, atop(I386_MAX_EXE_ADDR - 1),
2963 		    SDT_MEMERA, SEL_UPL, 1, 1);
2964 	}
2965 }
2966 
2967 void
2968 setsegment(struct segment_descriptor *sd, void *base, size_t limit, int type,
2969     int dpl, int def32, int gran)
2970 {
2971 
2972 	sd->sd_lolimit = (int)limit;
2973 	sd->sd_lobase = (int)base;
2974 	sd->sd_type = type;
2975 	sd->sd_dpl = dpl;
2976 	sd->sd_p = 1;
2977 	sd->sd_hilimit = (int)limit >> 16;
2978 	sd->sd_xx = 0;
2979 	sd->sd_def32 = def32;
2980 	sd->sd_gran = gran;
2981 	sd->sd_hibase = (int)base >> 24;
2982 }
2983 
2984 #define	IDTVEC(name)	__CONCAT(X, name)
2985 extern int IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
2986     IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
2987     IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page),
2988     IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(syscall), IDTVEC(mchk),
2989     IDTVEC(simd);
2990 
2991 extern int IDTVEC(f00f_redirect);
2992 
2993 int cpu_f00f_bug = 0;
2994 
2995 void
2996 fix_f00f(void)
2997 {
2998 	struct region_descriptor region;
2999 	vaddr_t va;
3000 	paddr_t pa;
3001 	void *p;
3002 
3003 	/* Allocate two new pages */
3004 	va = (vaddr_t)km_alloc(NBPG*2, &kv_any, &kp_zero, &kd_waitok);
3005 	p = (void *)(va + NBPG - 7*sizeof(*idt));
3006 
3007 	/* Copy over old IDT */
3008 	bcopy(idt, p, sizeof(idt_region));
3009 	idt = p;
3010 
3011 	/* Fix up paging redirect */
3012 	setgate(&idt[ 14], &IDTVEC(f00f_redirect), 0, SDT_SYS386IGT, SEL_KPL,
3013 	    GCODE_SEL);
3014 
3015 	/* Map first page RO */
3016 	pmap_pte_setbits(va, 0, PG_RW);
3017 
3018 	/* add k-u read-only mappings XXX old IDT stays in place */
3019 	/* XXX hshoexer: are f00f affected CPUs affected by meltdown? */
3020 	pmap_extract(pmap_kernel(), va, &pa);
3021 	pmap_enter_special(va, pa, PROT_READ, 0);
3022 	pmap_extract(pmap_kernel(), va + PAGE_SIZE, &pa);
3023 	pmap_enter_special(va + PAGE_SIZE, pa, PROT_READ, 0);
3024 
3025 	/* Reload idtr */
3026 	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
3027 	lidt(&region);
3028 
3029 	/* Tell the rest of the world */
3030 	cpu_f00f_bug = 1;
3031 }
3032 
3033 #ifdef MULTIPROCESSOR
3034 void
3035 cpu_init_idt(void)
3036 {
3037 	struct region_descriptor region;
3038 	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
3039 	lidt(&region);
3040 }
3041 #endif /* MULTIPROCESSOR */
3042 
3043 void
3044 init386(paddr_t first_avail)
3045 {
3046 	int i, kb;
3047 	struct region_descriptor region;
3048 	bios_memmap_t *im;
3049 
3050 	proc0.p_addr = proc0paddr;
3051 	cpu_info_primary.ci_self = &cpu_info_primary;
3052 	cpu_info_primary.ci_curpcb = &proc0.p_addr->u_pcb;
3053 	cpu_info_primary.ci_tss = &cpu_info_full_primary.cif_tss;
3054 	cpu_info_primary.ci_nmi_tss = &cpu_info_full_primary.cif_nmi_tss;
3055 	cpu_info_primary.ci_gdt = (void *)&cpu_info_full_primary.cif_gdt;
3056 
3057 	/* make bootstrap gdt gates and memory segments */
3058 	setsegment(&cpu_info_primary.ci_gdt[GCODE_SEL].sd, 0, 0xfffff,
3059 	    SDT_MEMERA, SEL_KPL, 1, 1);
3060 	setsegment(&cpu_info_primary.ci_gdt[GICODE_SEL].sd, 0, 0xfffff,
3061 	    SDT_MEMERA, SEL_KPL, 1, 1);
3062 	setsegment(&cpu_info_primary.ci_gdt[GDATA_SEL].sd, 0, 0xfffff,
3063 	    SDT_MEMRWA, SEL_KPL, 1, 1);
3064 	setsegment(&cpu_info_primary.ci_gdt[GUCODE_SEL].sd, 0,
3065 	    atop(I386_MAX_EXE_ADDR) - 1, SDT_MEMERA, SEL_UPL, 1, 1);
3066 	setsegment(&cpu_info_primary.ci_gdt[GUDATA_SEL].sd, 0,
3067 	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
3068 	setsegment(&cpu_info_primary.ci_gdt[GCPU_SEL].sd, &cpu_info_primary,
3069 	    sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 0, 0);
3070 	setsegment(&cpu_info_primary.ci_gdt[GUFS_SEL].sd, 0,
3071 	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
3072 	setsegment(&cpu_info_primary.ci_gdt[GUGS_SEL].sd, 0,
3073 	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
3074 	setsegment(&cpu_info_primary.ci_gdt[GTSS_SEL].sd,
3075 	    cpu_info_primary.ci_tss, sizeof(struct i386tss)-1,
3076 	    SDT_SYS386TSS, SEL_KPL, 0, 0);
3077 	setsegment(&cpu_info_primary.ci_gdt[GNMITSS_SEL].sd,
3078 	    cpu_info_primary.ci_nmi_tss, sizeof(struct i386tss)-1,
3079 	    SDT_SYS386TSS, SEL_KPL, 0, 0);
3080 
3081 	/* exceptions */
3082 	setgate(&idt[  0], &IDTVEC(div),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3083 	setgate(&idt[  1], &IDTVEC(dbg),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3084 	setgate(&idt[  2], NULL,             0, SDT_SYSTASKGT, SEL_KPL, GNMITSS_SEL);
3085 	setgate(&idt[  3], &IDTVEC(bpt),     0, SDT_SYS386IGT, SEL_UPL, GCODE_SEL);
3086 	setgate(&idt[  4], &IDTVEC(ofl),     0, SDT_SYS386IGT, SEL_UPL, GCODE_SEL);
3087 	setgate(&idt[  5], &IDTVEC(bnd),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3088 	setgate(&idt[  6], &IDTVEC(ill),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3089 	setgate(&idt[  7], &IDTVEC(dna),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3090 	setgate(&idt[  8], &IDTVEC(dble),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3091 	setgate(&idt[  9], &IDTVEC(fpusegm), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3092 	setgate(&idt[ 10], &IDTVEC(tss),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3093 	setgate(&idt[ 11], &IDTVEC(missing), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3094 	setgate(&idt[ 12], &IDTVEC(stk),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3095 	setgate(&idt[ 13], &IDTVEC(prot),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3096 	setgate(&idt[ 14], &IDTVEC(page),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3097 	setgate(&idt[ 15], &IDTVEC(rsvd),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3098 	setgate(&idt[ 16], &IDTVEC(fpu),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3099 	setgate(&idt[ 17], &IDTVEC(align),   0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3100 	setgate(&idt[ 18], &IDTVEC(mchk),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3101 	setgate(&idt[ 19], &IDTVEC(simd),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3102 	for (i = 20; i < NRSVIDT; i++)
3103 		setgate(&idt[i], &IDTVEC(rsvd), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3104 	for (i = NRSVIDT; i < NIDT; i++)
3105 		unsetgate(&idt[i]);
3106 	setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386IGT, SEL_UPL, GCODE_SEL);
3107 
3108 	setregion(&region, cpu_info_primary.ci_gdt, GDT_SIZE - 1);
3109 	lgdt(&region);
3110 	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
3111 	lidt(&region);
3112 
3113 	/*
3114 	 * Initialize the I/O port and I/O mem extent maps.
3115 	 * Note: we don't have to check the return value since
3116 	 * creation of a fixed extent map will never fail (since
3117 	 * descriptor storage has already been allocated).
3118 	 *
3119 	 * N.B. The iomem extent manages _all_ physical addresses
3120 	 * on the machine.  When the amount of RAM is found, the two
3121 	 * extents of RAM are allocated from the map (0 -> ISA hole
3122 	 * and end of ISA hole -> end of RAM).
3123 	 */
3124 	ioport_ex = extent_create("ioport", 0x0, 0xffff, M_DEVBUF,
3125 	    (caddr_t)ioport_ex_storage, sizeof(ioport_ex_storage),
3126 	    EX_NOCOALESCE|EX_NOWAIT);
3127 	iomem_ex = extent_create("iomem", 0x0, 0xffffffff, M_DEVBUF,
3128 	    (caddr_t)iomem_ex_storage, sizeof(iomem_ex_storage),
3129 	    EX_NOCOALESCE|EX_NOWAIT);
3130 
3131 #if NISA > 0
3132 	isa_defaultirq();
3133 #endif
3134 
3135 	/*
3136 	 * Attach the glass console early in case we need to display a panic.
3137 	 */
3138 	cninit();
3139 
3140 	/*
3141 	 * Saving SSE registers won't work if the save area isn't
3142 	 * 16-byte aligned.
3143 	 */
3144 	if (offsetof(struct user, u_pcb.pcb_savefpu) & 0xf)
3145 		panic("init386: pcb_savefpu not 16-byte aligned");
3146 
3147 	/* call pmap initialization to make new kernel address space */
3148 	pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE);
3149 
3150 	/*
3151 	 * Boot arguments are in a single page specified by /boot.
3152 	 *
3153 	 * We require the "new" vector form, as well as memory ranges
3154 	 * to be given in bytes rather than KB.
3155 	 */
3156 	if ((bootapiver & (BAPIV_VECTOR | BAPIV_BMEMMAP)) ==
3157 	    (BAPIV_VECTOR | BAPIV_BMEMMAP)) {
3158 		if (bootargc > NBPG)
3159 			panic("too many boot args");
3160 
3161 		if (extent_alloc_region(iomem_ex, (paddr_t)bootargv, bootargc,
3162 		    EX_NOWAIT))
3163 			panic("cannot reserve /boot args memory");
3164 
3165 		pmap_enter(pmap_kernel(), (vaddr_t)bootargp, (paddr_t)bootargv,
3166 		    PROT_READ | PROT_WRITE,
3167 		    PROT_READ | PROT_WRITE | PMAP_WIRED);
3168 
3169 		bios_getopt();
3170 
3171 	} else
3172 		panic("/boot too old: upgrade!");
3173 
3174 #ifdef DIAGNOSTIC
3175 	if (bios_memmap == NULL)
3176 		panic("no BIOS memory map supplied");
3177 #endif
3178 
3179 	/*
3180 	 * account all the memory passed in the map from /boot
3181 	 * calculate avail_end and count the physmem.
3182 	 */
3183 	avail_end = 0;
3184 	physmem = 0;
3185 #ifdef DEBUG
3186 	printf("memmap:");
3187 #endif
3188 	for(i = 0, im = bios_memmap; im->type != BIOS_MAP_END; im++)
3189 		if (im->type == BIOS_MAP_FREE) {
3190 			paddr_t a, e;
3191 #ifdef DEBUG
3192 			printf(" %llx-%llx", im->addr, im->addr + im->size);
3193 #endif
3194 
3195 			if (im->addr >= 0x100000000ULL) {
3196 #ifdef DEBUG
3197 				printf("-H");
3198 #endif
3199 				continue;
3200 			}
3201 
3202 			a = round_page(im->addr);
3203 			if (im->addr + im->size <= 0xfffff000ULL)
3204 				e = trunc_page(im->addr + im->size);
3205 			else {
3206 #ifdef DEBUG
3207 				printf("-T");
3208 #endif
3209 				e = 0xfffff000;
3210 			}
3211 
3212 			/* skip first 16 pages due to SMI corruption */
3213 			if (a < 16 * NBPG)
3214 				a = 16 * NBPG;
3215 
3216 #ifdef MULTIPROCESSOR
3217 			/* skip MP trampoline code page */
3218 			if (a < MP_TRAMPOLINE + NBPG)
3219 				a = MP_TRAMPOLINE + NBPG;
3220 
3221 			/* skip MP trampoline data page */
3222 			if (a < MP_TRAMP_DATA + NBPG)
3223 				a = MP_TRAMP_DATA + NBPG;
3224 #endif /* MULTIPROCESSOR */
3225 
3226 #if NACPI > 0 && !defined(SMALL_KERNEL)
3227 			/* skip ACPI resume trampoline code page */
3228 			if (a < ACPI_TRAMPOLINE + NBPG)
3229 				a = ACPI_TRAMPOLINE + NBPG;
3230 
3231 			/* skip ACPI resume trampoline data page */
3232 			if (a < ACPI_TRAMP_DATA + NBPG)
3233 				a = ACPI_TRAMP_DATA + NBPG;
3234 #endif /* ACPI */
3235 
3236 #ifdef HIBERNATE
3237 			/* skip hibernate reserved pages */
3238 			if (a < HIBERNATE_HIBALLOC_PAGE + PAGE_SIZE)
3239 				a = HIBERNATE_HIBALLOC_PAGE + PAGE_SIZE;
3240 #endif /* HIBERNATE */
3241 
3242 			/* skip shorter than page regions */
3243 			if (a >= e || (e - a) < NBPG) {
3244 #ifdef DEBUG
3245 				printf("-S");
3246 #endif
3247 				continue;
3248 			}
3249 
3250 			/*
3251 			 * XXX Some buggy ACPI BIOSes use memory that
3252 			 * they declare as free. Current worst offender
3253 			 * is Supermicro 5019D-FTN4.  Typically the
3254 			 * affected memory areas are small blocks
3255 			 * between areas reserved for ACPI and other
3256 			 * BIOS goo.  So skip areas smaller than 32 MB
3257 			 * above the 16 MB boundary (to avoid
3258 			 * affecting legacy stuff).
3259 			 */
3260 			if (a > 16*1024*1024 && (e - a) < 32*1024*1024) {
3261 #ifdef DEBUG
3262 				printf("-X");
3263 #endif
3264 				continue;
3265 			}
3266 
3267 			/* skip legacy IO region */
3268 			if ((a > IOM_BEGIN && a < IOM_END) ||
3269 			    (e > IOM_BEGIN && e < IOM_END)) {
3270 #ifdef DEBUG
3271 				printf("-I");
3272 #endif
3273 				continue;
3274 			}
3275 
3276 			if (extent_alloc_region(iomem_ex, a, e - a, EX_NOWAIT))
3277 				/* XXX What should we do? */
3278 				printf("\nWARNING: CAN'T ALLOCATE RAM (%lx-%lx)"
3279 				    " FROM IOMEM EXTENT MAP!\n", a, e);
3280 
3281 			physmem += atop(e - a);
3282 			dumpmem[i].start = atop(a);
3283 			dumpmem[i].end = atop(e);
3284 			i++;
3285 			avail_end = max(avail_end, e);
3286 		}
3287 
3288 	ndumpmem = i;
3289 	avail_end -= round_page(MSGBUFSIZE);
3290 
3291 #ifdef DEBUG
3292 	printf(": %lx\n", avail_end);
3293 #endif
3294 	if (physmem < atop(4 * 1024 * 1024)) {
3295 		printf("\awarning: too little memory available;"
3296 		    "running in degraded mode\npress a key to confirm\n\n");
3297 		cnpollc(1);
3298 		cngetc();
3299 		cnpollc(0);
3300 	}
3301 
3302 #ifdef DEBUG
3303 	printf("physload: ");
3304 #endif
3305 	kb = atop(KERNTEXTOFF - KERNBASE);
3306 	if (kb > atop(IOM_END)) {
3307 		paddr_t lim = atop(IOM_END);
3308 #ifdef DEBUG
3309 		printf(" %lx-%x (<16M)", lim, kb);
3310 #endif
3311 		uvm_page_physload(lim, kb, lim, kb, 0);
3312 	}
3313 
3314 	for (i = 0; i < ndumpmem; i++) {
3315 		paddr_t a, e;
3316 
3317 		a = dumpmem[i].start;
3318 		e = dumpmem[i].end;
3319 		if (a < atop(first_avail) && e > atop(first_avail))
3320 			a = atop(first_avail);
3321 		if (e > atop(avail_end))
3322 			e = atop(avail_end);
3323 
3324 		if (a < e) {
3325 #ifdef DEBUG
3326 				printf(" %lx-%lx", a, e);
3327 #endif
3328 				uvm_page_physload(a, e, a, e, 0);
3329 		}
3330 	}
3331 #ifdef DEBUG
3332 	printf("\n");
3333 #endif
3334 
3335 	tlbflush();
3336 #if 0
3337 #if NISADMA > 0
3338 	/*
3339 	 * Some motherboards/BIOSes remap the 384K of RAM that would
3340 	 * normally be covered by the ISA hole to the end of memory
3341 	 * so that it can be used.  However, on a 16M system, this
3342 	 * would cause bounce buffers to be allocated and used.
3343 	 * This is not desirable behaviour, as more than 384K of
3344 	 * bounce buffers might be allocated.  As a work-around,
3345 	 * we round memory down to the nearest 1M boundary if
3346 	 * we're using any isadma devices and the remapped memory
3347 	 * is what puts us over 16M.
3348 	 */
3349 	if (extmem > (15*1024) && extmem < (16*1024)) {
3350 		printf("Warning: ignoring %dk of remapped memory\n",
3351 		    extmem - (15*1024));
3352 		extmem = (15*1024);
3353 	}
3354 #endif
3355 #endif
3356 
3357 #ifdef DDB
3358 	db_machine_init();
3359 	ddb_init();
3360 	if (boothowto & RB_KDB)
3361 		db_enter();
3362 #endif
3363 
3364 	softintr_init();
3365 }
3366 
3367 /*
3368  * consinit:
3369  * initialize the system console.
3370  */
3371 void
3372 consinit(void)
3373 {
3374 	/* Already done in init386(). */
3375 }
3376 
3377 void
3378 cpu_reset(void)
3379 {
3380 	struct region_descriptor region;
3381 
3382 	intr_disable();
3383 
3384 	if (cpuresetfn)
3385 		(*cpuresetfn)();
3386 
3387 	/*
3388 	 * The keyboard controller has 4 random output pins, one of which is
3389 	 * connected to the RESET pin on the CPU in many PCs.  We tell the
3390 	 * keyboard controller to pulse this line a couple of times.
3391 	 */
3392 	outb(IO_KBD + KBCMDP, KBC_PULSE0);
3393 	delay(100000);
3394 	outb(IO_KBD + KBCMDP, KBC_PULSE0);
3395 	delay(100000);
3396 
3397 	/*
3398 	 * Try to cause a triple fault and watchdog reset by setting the
3399 	 * IDT to point to nothing.
3400 	 */
3401 	bzero((caddr_t)idt, sizeof(idt_region));
3402 	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
3403 	lidt(&region);
3404 	__asm volatile("divl %0,%1" : : "q" (0), "a" (0));
3405 
3406 	/*
3407 	 * Try to cause a triple fault and watchdog reset by unmapping the
3408 	 * entire address space.
3409 	 */
3410 	bzero((caddr_t)PTD, NBPG);
3411 	tlbflush();
3412 
3413 	for (;;)
3414 		continue;
3415 	/* NOTREACHED */
3416 }
3417 
3418 void
3419 cpu_initclocks(void)
3420 {
3421 	(*initclock_func)();		/* lapic or i8254 */
3422 }
3423 
3424 void
3425 need_resched(struct cpu_info *ci)
3426 {
3427 	ci->ci_want_resched = 1;
3428 
3429 	/* There's a risk we'll be called before the idle threads start */
3430 	if (ci->ci_curproc) {
3431 		aston(ci->ci_curproc);
3432 		cpu_kick(ci);
3433 	}
3434 }
3435 
3436 /* Allocate an IDT vector slot within the given range.
3437  * XXX needs locking to avoid MP allocation races.
3438  */
3439 
3440 int
3441 idt_vec_alloc(int low, int high)
3442 {
3443 	int vec;
3444 
3445 	for (vec = low; vec <= high; vec++)
3446 		if (idt[vec].gd_p == 0)
3447 			return (vec);
3448 	return (0);
3449 }
3450 
3451 void
3452 idt_vec_set(int vec, void (*function)(void))
3453 {
3454 	setgate(&idt[vec], function, 0, SDT_SYS386IGT, SEL_KPL, GICODE_SEL);
3455 }
3456 
3457 void
3458 idt_vec_free(int vec)
3459 {
3460 	unsetgate(&idt[vec]);
3461 }
3462 
3463 const struct sysctl_bounded_args cpuctl_vars[] = {
3464 	{ CPU_LIDACTION, &lid_action, 0, 2 },
3465 	{ CPU_CPUID, &cpu_id, SYSCTL_INT_READONLY },
3466 	{ CPU_OSFXSR, &i386_use_fxsave, SYSCTL_INT_READONLY },
3467 	{ CPU_SSE, &i386_has_sse, SYSCTL_INT_READONLY },
3468 	{ CPU_SSE2, &i386_has_sse2, SYSCTL_INT_READONLY },
3469 	{ CPU_XCRYPT, &i386_has_xcrypt, SYSCTL_INT_READONLY },
3470 };
3471 
3472 /*
3473  * machine dependent system variables.
3474  */
3475 int
3476 cpu_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
3477     size_t newlen, struct proc *p)
3478 {
3479 	dev_t dev;
3480 
3481 	switch (name[0]) {
3482 	case CPU_CONSDEV:
3483 		if (namelen != 1)
3484 			return (ENOTDIR);		/* overloaded */
3485 
3486 		if (cn_tab != NULL)
3487 			dev = cn_tab->cn_dev;
3488 		else
3489 			dev = NODEV;
3490 		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
3491 #if NBIOS > 0
3492 	case CPU_BIOS:
3493 		return bios_sysctl(name + 1, namelen - 1, oldp, oldlenp,
3494 		    newp, newlen, p);
3495 #endif
3496 	case CPU_BLK2CHR:
3497 		if (namelen != 2)
3498 			return (ENOTDIR);		/* overloaded */
3499 		dev = blktochr((dev_t)name[1]);
3500 		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
3501 	case CPU_CHR2BLK:
3502 		if (namelen != 2)
3503 			return (ENOTDIR);		/* overloaded */
3504 		dev = chrtoblk((dev_t)name[1]);
3505 		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
3506 	case CPU_ALLOWAPERTURE:
3507 #ifdef APERTURE
3508 		if (securelevel > 0)
3509 			return (sysctl_int_lower(oldp, oldlenp, newp, newlen,
3510 			    &allowaperture));
3511 		else
3512 			return (sysctl_int(oldp, oldlenp, newp, newlen,
3513 			    &allowaperture));
3514 #else
3515 		return (sysctl_rdint(oldp, oldlenp, newp, 0));
3516 #endif
3517 	case CPU_CPUVENDOR:
3518 		return (sysctl_rdstring(oldp, oldlenp, newp, cpu_vendor));
3519 	case CPU_CPUFEATURE:
3520 		return (sysctl_rdint(oldp, oldlenp, newp, curcpu()->ci_feature_flags));
3521 	case CPU_KBDRESET:
3522 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
3523 		    &kbd_reset));
3524 #if NPCKBC > 0 && NUKBD > 0
3525 	case CPU_FORCEUKBD:
3526 		{
3527 		int error;
3528 
3529 		if (forceukbd)
3530 			return (sysctl_rdint(oldp, oldlenp, newp, forceukbd));
3531 
3532 		error = sysctl_int(oldp, oldlenp, newp, newlen, &forceukbd);
3533 		if (forceukbd)
3534 			pckbc_release_console();
3535 		return (error);
3536 		}
3537 #endif
3538 	default:
3539 		return (sysctl_bounded_arr(cpuctl_vars, nitems(cpuctl_vars),
3540 		    name, namelen, oldp, oldlenp, newp, newlen));
3541 	}
3542 	/* NOTREACHED */
3543 }
3544 
3545 int
3546 bus_space_map(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size, int flags,
3547     bus_space_handle_t *bshp)
3548 {
3549 	int error;
3550 	struct extent *ex;
3551 
3552 	/*
3553 	 * Pick the appropriate extent map.
3554 	 */
3555 	if (t == I386_BUS_SPACE_IO) {
3556 		ex = ioport_ex;
3557 		if (flags & BUS_SPACE_MAP_LINEAR)
3558 			return (EINVAL);
3559 	} else if (t == I386_BUS_SPACE_MEM) {
3560 		ex = iomem_ex;
3561 	} else {
3562 		panic("bus_space_map: bad bus space tag");
3563 	}
3564 
3565 	/*
3566 	 * Before we go any further, let's make sure that this
3567 	 * region is available.
3568 	 */
3569 	error = extent_alloc_region(ex, bpa, size,
3570 	    EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0));
3571 	if (error)
3572 		return (error);
3573 
3574 	/*
3575 	 * For I/O space, that's all she wrote.
3576 	 */
3577 	if (t == I386_BUS_SPACE_IO) {
3578 		*bshp = bpa;
3579 		return (0);
3580 	}
3581 
3582 	if (IOM_BEGIN <= bpa && bpa <= IOM_END) {
3583 		*bshp = (bus_space_handle_t)ISA_HOLE_VADDR(bpa);
3584 		return (0);
3585 	}
3586 
3587 	/*
3588 	 * For memory space, map the bus physical address to
3589 	 * a kernel virtual address.
3590 	 */
3591 	error = bus_mem_add_mapping(bpa, size, flags, bshp);
3592 	if (error) {
3593 		if (extent_free(ex, bpa, size, EX_NOWAIT |
3594 		    (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
3595 			printf("bus_space_map: pa 0x%lx, size 0x%lx\n",
3596 			    bpa, size);
3597 			printf("bus_space_map: can't free region\n");
3598 		}
3599 	}
3600 
3601 	return (error);
3602 }
3603 
3604 int
3605 _bus_space_map(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size,
3606     int flags, bus_space_handle_t *bshp)
3607 {
3608 	/*
3609 	 * For I/O space, that's all she wrote.
3610 	 */
3611 	if (t == I386_BUS_SPACE_IO) {
3612 		*bshp = bpa;
3613 		return (0);
3614 	}
3615 
3616 	/*
3617 	 * For memory space, map the bus physical address to
3618 	 * a kernel virtual address.
3619 	 */
3620 	return (bus_mem_add_mapping(bpa, size, flags, bshp));
3621 }
3622 
3623 int
3624 bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart, bus_addr_t rend,
3625     bus_size_t size, bus_size_t alignment, bus_size_t boundary,
3626     int flags, bus_addr_t *bpap, bus_space_handle_t *bshp)
3627 {
3628 	struct extent *ex;
3629 	u_long bpa;
3630 	int error;
3631 
3632 	/*
3633 	 * Pick the appropriate extent map.
3634 	 */
3635 	if (t == I386_BUS_SPACE_IO) {
3636 		ex = ioport_ex;
3637 	} else if (t == I386_BUS_SPACE_MEM) {
3638 		ex = iomem_ex;
3639 	} else {
3640 		panic("bus_space_alloc: bad bus space tag");
3641 	}
3642 
3643 	/*
3644 	 * Sanity check the allocation against the extent's boundaries.
3645 	 */
3646 	if (rstart < ex->ex_start || rend > ex->ex_end)
3647 		panic("bus_space_alloc: bad region start/end");
3648 
3649 	/*
3650 	 * Do the requested allocation.
3651 	 */
3652 	error = extent_alloc_subregion(ex, rstart, rend, size, alignment, 0,
3653 	    boundary, EX_NOWAIT | (ioport_malloc_safe ?  EX_MALLOCOK : 0),
3654 	    &bpa);
3655 
3656 	if (error)
3657 		return (error);
3658 
3659 	/*
3660 	 * For I/O space, that's all she wrote.
3661 	 */
3662 	if (t == I386_BUS_SPACE_IO) {
3663 		*bshp = *bpap = bpa;
3664 		return (0);
3665 	}
3666 
3667 	/*
3668 	 * For memory space, map the bus physical address to
3669 	 * a kernel virtual address.
3670 	 */
3671 	error = bus_mem_add_mapping(bpa, size, flags, bshp);
3672 	if (error) {
3673 		if (extent_free(iomem_ex, bpa, size, EX_NOWAIT |
3674 		    (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
3675 			printf("bus_space_alloc: pa 0x%lx, size 0x%lx\n",
3676 			    bpa, size);
3677 			printf("bus_space_alloc: can't free region\n");
3678 		}
3679 	}
3680 
3681 	*bpap = bpa;
3682 
3683 	return (error);
3684 }
3685 
3686 int
3687 bus_mem_add_mapping(bus_addr_t bpa, bus_size_t size, int flags,
3688     bus_space_handle_t *bshp)
3689 {
3690 	paddr_t pa, endpa;
3691 	vaddr_t va;
3692 	bus_size_t map_size;
3693 	int pmap_flags = PMAP_NOCACHE;
3694 
3695 	pa = trunc_page(bpa);
3696 	endpa = round_page(bpa + size);
3697 
3698 #ifdef DIAGNOSTIC
3699 	if (endpa <= pa && endpa != 0)
3700 		panic("bus_mem_add_mapping: overflow");
3701 #endif
3702 
3703 	map_size = endpa - pa;
3704 
3705 	va = (vaddr_t)km_alloc(map_size, &kv_any, &kp_none, &kd_nowait);
3706 	if (va == 0)
3707 		return (ENOMEM);
3708 
3709 	*bshp = (bus_space_handle_t)(va + (bpa & PGOFSET));
3710 
3711 	if (flags & BUS_SPACE_MAP_CACHEABLE)
3712 		pmap_flags = 0;
3713 	else if (flags & BUS_SPACE_MAP_PREFETCHABLE)
3714 		pmap_flags = PMAP_WC;
3715 
3716 	for (; map_size > 0;
3717 	    pa += PAGE_SIZE, va += PAGE_SIZE, map_size -= PAGE_SIZE)
3718 		pmap_kenter_pa(va, pa | pmap_flags,
3719 		    PROT_READ | PROT_WRITE);
3720 	pmap_update(pmap_kernel());
3721 
3722 	return 0;
3723 }
3724 
3725 void
3726 bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size)
3727 {
3728 	struct extent *ex;
3729 	u_long va, endva;
3730 	bus_addr_t bpa;
3731 
3732 	/*
3733 	 * Find the correct extent and bus physical address.
3734 	 */
3735 	if (t == I386_BUS_SPACE_IO) {
3736 		ex = ioport_ex;
3737 		bpa = bsh;
3738 	} else if (t == I386_BUS_SPACE_MEM) {
3739 		ex = iomem_ex;
3740 		bpa = (bus_addr_t)ISA_PHYSADDR(bsh);
3741 		if (IOM_BEGIN <= bpa && bpa <= IOM_END)
3742 			goto ok;
3743 
3744 		va = trunc_page(bsh);
3745 		endva = round_page(bsh + size);
3746 
3747 #ifdef DIAGNOSTIC
3748 		if (endva <= va)
3749 			panic("bus_space_unmap: overflow");
3750 #endif
3751 
3752 		(void) pmap_extract(pmap_kernel(), va, &bpa);
3753 		bpa += (bsh & PGOFSET);
3754 
3755 		pmap_kremove(va, endva - va);
3756 		pmap_update(pmap_kernel());
3757 
3758 		/*
3759 		 * Free the kernel virtual mapping.
3760 		 */
3761 		km_free((void *)va, endva - va, &kv_any, &kp_none);
3762 	} else
3763 		panic("bus_space_unmap: bad bus space tag");
3764 
3765 ok:
3766 	if (extent_free(ex, bpa, size,
3767 	    EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
3768 		printf("bus_space_unmap: %s 0x%lx, size 0x%lx\n",
3769 		    (t == I386_BUS_SPACE_IO) ? "port" : "pa", bpa, size);
3770 		printf("bus_space_unmap: can't free region\n");
3771 	}
3772 }
3773 
3774 void
3775 _bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size,
3776     bus_addr_t *adrp)
3777 {
3778 	u_long va, endva;
3779 	bus_addr_t bpa;
3780 
3781 	/*
3782 	 * Find the correct bus physical address.
3783 	 */
3784 	if (t == I386_BUS_SPACE_IO) {
3785 		bpa = bsh;
3786 	} else if (t == I386_BUS_SPACE_MEM) {
3787 		bpa = (bus_addr_t)ISA_PHYSADDR(bsh);
3788 		if (IOM_BEGIN <= bpa && bpa <= IOM_END)
3789 			goto ok;
3790 
3791 		va = trunc_page(bsh);
3792 		endva = round_page(bsh + size);
3793 
3794 #ifdef DIAGNOSTIC
3795 		if (endva <= va)
3796 			panic("_bus_space_unmap: overflow");
3797 #endif
3798 
3799 		(void) pmap_extract(pmap_kernel(), va, &bpa);
3800 		bpa += (bsh & PGOFSET);
3801 
3802 		pmap_kremove(va, endva - va);
3803 		pmap_update(pmap_kernel());
3804 
3805 		/*
3806 		 * Free the kernel virtual mapping.
3807 		 */
3808 		km_free((void *)va, endva - va, &kv_any, &kp_none);
3809 	} else
3810 		panic("bus_space_unmap: bad bus space tag");
3811 
3812 ok:
3813 	if (adrp != NULL)
3814 		*adrp = bpa;
3815 }
3816 
3817 void
3818 bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size)
3819 {
3820 
3821 	/* bus_space_unmap() does all that we need to do. */
3822 	bus_space_unmap(t, bsh, size);
3823 }
3824 
3825 int
3826 bus_space_subregion(bus_space_tag_t t, bus_space_handle_t bsh,
3827     bus_size_t offset, bus_size_t size, bus_space_handle_t *nbshp)
3828 {
3829 	*nbshp = bsh + offset;
3830 	return (0);
3831 }
3832 
3833 paddr_t
3834 bus_space_mmap(bus_space_tag_t t, bus_addr_t addr, off_t off, int prot, int flags)
3835 {
3836 	/* Can't mmap I/O space. */
3837 	if (t == I386_BUS_SPACE_IO)
3838 		return (-1);
3839 
3840 	return (addr + off);
3841 }
3842 
3843 #ifdef DIAGNOSTIC
3844 void
3845 splassert_check(int wantipl, const char *func)
3846 {
3847 	if (lapic_tpr < wantipl)
3848 		splassert_fail(wantipl, lapic_tpr, func);
3849 	if (wantipl == IPL_NONE && curcpu()->ci_idepth != 0)
3850 		splassert_fail(-1, curcpu()->ci_idepth, func);
3851 }
3852 #endif
3853 
3854 int
3855 copyin32(const uint32_t *uaddr, uint32_t *kaddr)
3856 {
3857 	if ((vaddr_t)uaddr & 0x3)
3858 		return EFAULT;
3859 
3860 	/* copyin(9) is atomic */
3861 	return copyin(uaddr, kaddr, sizeof(uint32_t));
3862 }
3863 
3864 /*
3865  * True if the system has any non-level interrupts which are shared
3866  * on the same pin.
3867  */
3868 int	intr_shared_edge;
3869 
3870 /*
3871  * Software interrupt registration
3872  *
3873  * We hand-code this to ensure that it's atomic.
3874  */
3875 void
3876 softintr(int sir)
3877 {
3878 	struct cpu_info *ci = curcpu();
3879 
3880 	__asm volatile("orl %1, %0" :
3881 	    "=m" (ci->ci_ipending) : "ir" (1 << sir));
3882 }
3883 
3884 /*
3885  * Raise current interrupt priority level, and return the old one.
3886  */
3887 int
3888 splraise(int ncpl)
3889 {
3890 	int ocpl;
3891 
3892 	KASSERT(ncpl >= IPL_NONE);
3893 
3894 	_SPLRAISE(ocpl, ncpl);
3895 	return (ocpl);
3896 }
3897 
3898 /*
3899  * Restore an old interrupt priority level.  If any thereby unmasked
3900  * interrupts are pending, call Xspllower() to process them.
3901  */
3902 void
3903 splx(int ncpl)
3904 {
3905 	_SPLX(ncpl);
3906 }
3907 
3908 /*
3909  * Same as splx(), but we return the old value of spl, for the
3910  * benefit of some splsoftclock() callers.
3911  */
3912 int
3913 spllower(int ncpl)
3914 {
3915 	int ocpl = lapic_tpr;
3916 
3917 	splx(ncpl);
3918 	return (ocpl);
3919 }
3920 
3921 int
3922 intr_handler(struct intrframe *frame, struct intrhand *ih)
3923 {
3924 	int rc;
3925 #ifdef MULTIPROCESSOR
3926 	int need_lock;
3927 
3928 	if (ih->ih_flags & IPL_MPSAFE)
3929 		need_lock = 0;
3930 	else
3931 		need_lock = 1;
3932 
3933 	if (need_lock)
3934 		__mp_lock(&kernel_lock);
3935 #endif
3936 	rc = (*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : frame);
3937 #ifdef MULTIPROCESSOR
3938 	if (need_lock)
3939 		__mp_unlock(&kernel_lock);
3940 #endif
3941 	return rc;
3942 }
3943 
3944 void
3945 intr_barrier(void *ih)
3946 {
3947 	sched_barrier(NULL);
3948 }
3949 
3950 unsigned int
3951 cpu_rnd_messybits(void)
3952 {
3953 	struct timespec ts;
3954 
3955 	nanotime(&ts);
3956 	return (ts.tv_nsec ^ (ts.tv_sec << 20));
3957 }
3958 
3959 int i386_delay_quality;
3960 
3961 void
3962 delay_init(void(*fn)(int), int fn_quality)
3963 {
3964 	if (fn_quality > i386_delay_quality) {
3965 		delay_func = fn;
3966 		i386_delay_quality = fn_quality;
3967 	}
3968 }
3969 
3970 void
3971 delay_fini(void (*fn)(int))
3972 {
3973 	if (delay_func == fn) {
3974 		delay_func = i8254_delay;
3975 		i386_delay_quality = 0;
3976 	}
3977 }
3978