xref: /freebsd/sys/i386/i386/vm86.c (revision 42249ef2)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1997 Jonathan Lemon
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/priv.h>
35 #include <sys/proc.h>
36 #include <sys/lock.h>
37 #include <sys/malloc.h>
38 #include <sys/mutex.h>
39 
40 #include <vm/vm.h>
41 #include <vm/pmap.h>
42 #include <vm/vm_map.h>
43 #include <vm/vm_page.h>
44 
45 #include <machine/md_var.h>
46 #include <machine/pcb.h>
47 #include <machine/pcb_ext.h>
48 #include <machine/psl.h>
49 #include <machine/specialreg.h>
50 #include <machine/sysarch.h>
51 
52 extern int vm86pa;
53 extern struct pcb *vm86pcb;
54 
55 static struct mtx vm86_lock;
56 
57 extern int vm86_bioscall(struct vm86frame *);
58 extern void vm86_biosret(struct vm86frame *);
59 
60 void vm86_prepcall(struct vm86frame *);
61 
62 struct system_map {
63 	int		type;
64 	vm_offset_t	start;
65 	vm_offset_t	end;
66 };
67 
68 #define	HLT	0xf4
69 #define	CLI	0xfa
70 #define	STI	0xfb
71 #define	PUSHF	0x9c
72 #define	POPF	0x9d
73 #define	INTn	0xcd
74 #define	IRET	0xcf
75 #define	CALLm	0xff
76 #define OPERAND_SIZE_PREFIX	0x66
77 #define ADDRESS_SIZE_PREFIX	0x67
78 #define PUSH_MASK	~(PSL_VM | PSL_RF | PSL_I)
79 #define POP_MASK	~(PSL_VIP | PSL_VIF | PSL_VM | PSL_RF | PSL_IOPL)
80 
81 static int
82 vm86_suword16(volatile void *base, int word)
83 {
84 
85 	if (curthread->td_critnest != 0) {
86 		*(volatile uint16_t *)base = word;
87 		return (0);
88 	}
89 	return (suword16(base, word));
90 }
91 
92 static int
93 vm86_suword(volatile void *base, long word)
94 {
95 
96 	if (curthread->td_critnest != 0) {
97 		*(volatile long *)base = word;
98 		return (0);
99 	}
100 	return (suword(base, word));
101 }
102 
103 static int
104 vm86_fubyte(volatile const void *base)
105 {
106 
107 	if (curthread->td_critnest != 0)
108 		return (*(volatile const u_char *)base);
109 	return (fubyte(base));
110 }
111 
112 static int
113 vm86_fuword16(volatile const void *base)
114 {
115 
116 	if (curthread->td_critnest != 0)
117 		return (*(volatile const uint16_t *)base);
118 	return (fuword16(base));
119 }
120 
121 static long
122 vm86_fuword(volatile const void *base)
123 {
124 
125 	if (curthread->td_critnest != 0)
126 		return (*(volatile const long *)base);
127 	return (fuword(base));
128 }
129 
130 static __inline caddr_t
131 MAKE_ADDR(u_short sel, u_short off)
132 {
133 	return ((caddr_t)((sel << 4) + off));
134 }
135 
136 static __inline void
137 GET_VEC(u_int vec, u_short *sel, u_short *off)
138 {
139 	*sel = vec >> 16;
140 	*off = vec & 0xffff;
141 }
142 
143 static __inline u_int
144 MAKE_VEC(u_short sel, u_short off)
145 {
146 	return ((sel << 16) | off);
147 }
148 
149 static __inline void
150 PUSH(u_short x, struct vm86frame *vmf)
151 {
152 	vmf->vmf_sp -= 2;
153 	vm86_suword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
154 }
155 
156 static __inline void
157 PUSHL(u_int x, struct vm86frame *vmf)
158 {
159 	vmf->vmf_sp -= 4;
160 	vm86_suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
161 }
162 
163 static __inline u_short
164 POP(struct vm86frame *vmf)
165 {
166 	u_short x = vm86_fuword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
167 
168 	vmf->vmf_sp += 2;
169 	return (x);
170 }
171 
172 static __inline u_int
173 POPL(struct vm86frame *vmf)
174 {
175 	u_int x = vm86_fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
176 
177 	vmf->vmf_sp += 4;
178 	return (x);
179 }
180 
181 int
182 vm86_emulate(struct vm86frame *vmf)
183 {
184 	struct vm86_kernel *vm86;
185 	caddr_t addr;
186 	u_char i_byte;
187 	u_int temp_flags;
188 	int inc_ip = 1;
189 	int retcode = 0;
190 
191 	/*
192 	 * pcb_ext contains the address of the extension area, or zero if
193 	 * the extension is not present.  (This check should not be needed,
194 	 * as we can't enter vm86 mode until we set up an extension area)
195 	 */
196 	if (curpcb->pcb_ext == 0)
197 		return (SIGBUS);
198 	vm86 = &curpcb->pcb_ext->ext_vm86;
199 
200 	if (vmf->vmf_eflags & PSL_T)
201 		retcode = SIGTRAP;
202 
203 	addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
204 	i_byte = vm86_fubyte(addr);
205 	if (i_byte == ADDRESS_SIZE_PREFIX) {
206 		i_byte = vm86_fubyte(++addr);
207 		inc_ip++;
208 	}
209 
210 	if (vm86->vm86_has_vme) {
211 		switch (i_byte) {
212 		case OPERAND_SIZE_PREFIX:
213 			i_byte = vm86_fubyte(++addr);
214 			inc_ip++;
215 			switch (i_byte) {
216 			case PUSHF:
217 				if (vmf->vmf_eflags & PSL_VIF)
218 					PUSHL((vmf->vmf_eflags & PUSH_MASK)
219 					    | PSL_IOPL | PSL_I, vmf);
220 				else
221 					PUSHL((vmf->vmf_eflags & PUSH_MASK)
222 					    | PSL_IOPL, vmf);
223 				vmf->vmf_ip += inc_ip;
224 				return (retcode);
225 
226 			case POPF:
227 				temp_flags = POPL(vmf) & POP_MASK;
228 				vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
229 				    | temp_flags | PSL_VM | PSL_I;
230 				vmf->vmf_ip += inc_ip;
231 				if (temp_flags & PSL_I) {
232 					vmf->vmf_eflags |= PSL_VIF;
233 					if (vmf->vmf_eflags & PSL_VIP)
234 						break;
235 				} else {
236 					vmf->vmf_eflags &= ~PSL_VIF;
237 				}
238 				return (retcode);
239 			}
240 			break;
241 
242 		/* VME faults here if VIP is set, but does not set VIF. */
243 		case STI:
244 			vmf->vmf_eflags |= PSL_VIF;
245 			vmf->vmf_ip += inc_ip;
246 			if ((vmf->vmf_eflags & PSL_VIP) == 0) {
247 				uprintf("fatal sti\n");
248 				return (SIGKILL);
249 			}
250 			break;
251 
252 		/* VME if no redirection support */
253 		case INTn:
254 			break;
255 
256 		/* VME if trying to set PSL_T, or PSL_I when VIP is set */
257 		case POPF:
258 			temp_flags = POP(vmf) & POP_MASK;
259 			vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
260 			    | temp_flags | PSL_VM | PSL_I;
261 			vmf->vmf_ip += inc_ip;
262 			if (temp_flags & PSL_I) {
263 				vmf->vmf_eflags |= PSL_VIF;
264 				if (vmf->vmf_eflags & PSL_VIP)
265 					break;
266 			} else {
267 				vmf->vmf_eflags &= ~PSL_VIF;
268 			}
269 			return (retcode);
270 
271 		/* VME if trying to set PSL_T, or PSL_I when VIP is set */
272 		case IRET:
273 			vmf->vmf_ip = POP(vmf);
274 			vmf->vmf_cs = POP(vmf);
275 			temp_flags = POP(vmf) & POP_MASK;
276 			vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
277 			    | temp_flags | PSL_VM | PSL_I;
278 			if (temp_flags & PSL_I) {
279 				vmf->vmf_eflags |= PSL_VIF;
280 				if (vmf->vmf_eflags & PSL_VIP)
281 					break;
282 			} else {
283 				vmf->vmf_eflags &= ~PSL_VIF;
284 			}
285 			return (retcode);
286 
287 		}
288 		return (SIGBUS);
289 	}
290 
291 	switch (i_byte) {
292 	case OPERAND_SIZE_PREFIX:
293 		i_byte = vm86_fubyte(++addr);
294 		inc_ip++;
295 		switch (i_byte) {
296 		case PUSHF:
297 			if (vm86->vm86_eflags & PSL_VIF)
298 				PUSHL((vmf->vmf_flags & PUSH_MASK)
299 				    | PSL_IOPL | PSL_I, vmf);
300 			else
301 				PUSHL((vmf->vmf_flags & PUSH_MASK)
302 				    | PSL_IOPL, vmf);
303 			vmf->vmf_ip += inc_ip;
304 			return (retcode);
305 
306 		case POPF:
307 			temp_flags = POPL(vmf) & POP_MASK;
308 			vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
309 			    | temp_flags | PSL_VM | PSL_I;
310 			vmf->vmf_ip += inc_ip;
311 			if (temp_flags & PSL_I) {
312 				vm86->vm86_eflags |= PSL_VIF;
313 				if (vm86->vm86_eflags & PSL_VIP)
314 					break;
315 			} else {
316 				vm86->vm86_eflags &= ~PSL_VIF;
317 			}
318 			return (retcode);
319 		}
320 		return (SIGBUS);
321 
322 	case CLI:
323 		vm86->vm86_eflags &= ~PSL_VIF;
324 		vmf->vmf_ip += inc_ip;
325 		return (retcode);
326 
327 	case STI:
328 		/* if there is a pending interrupt, go to the emulator */
329 		vm86->vm86_eflags |= PSL_VIF;
330 		vmf->vmf_ip += inc_ip;
331 		if (vm86->vm86_eflags & PSL_VIP)
332 			break;
333 		return (retcode);
334 
335 	case PUSHF:
336 		if (vm86->vm86_eflags & PSL_VIF)
337 			PUSH((vmf->vmf_flags & PUSH_MASK)
338 			    | PSL_IOPL | PSL_I, vmf);
339 		else
340 			PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
341 		vmf->vmf_ip += inc_ip;
342 		return (retcode);
343 
344 	case INTn:
345 		i_byte = vm86_fubyte(addr + 1);
346 		if ((vm86->vm86_intmap[i_byte >> 3] & (1 << (i_byte & 7))) != 0)
347 			break;
348 		if (vm86->vm86_eflags & PSL_VIF)
349 			PUSH((vmf->vmf_flags & PUSH_MASK)
350 			    | PSL_IOPL | PSL_I, vmf);
351 		else
352 			PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
353 		PUSH(vmf->vmf_cs, vmf);
354 		PUSH(vmf->vmf_ip + inc_ip + 1, vmf);	/* increment IP */
355 		GET_VEC(vm86_fuword((caddr_t)(i_byte * 4)),
356 		     &vmf->vmf_cs, &vmf->vmf_ip);
357 		vmf->vmf_flags &= ~PSL_T;
358 		vm86->vm86_eflags &= ~PSL_VIF;
359 		return (retcode);
360 
361 	case IRET:
362 		vmf->vmf_ip = POP(vmf);
363 		vmf->vmf_cs = POP(vmf);
364 		temp_flags = POP(vmf) & POP_MASK;
365 		vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
366 		    | temp_flags | PSL_VM | PSL_I;
367 		if (temp_flags & PSL_I) {
368 			vm86->vm86_eflags |= PSL_VIF;
369 			if (vm86->vm86_eflags & PSL_VIP)
370 				break;
371 		} else {
372 			vm86->vm86_eflags &= ~PSL_VIF;
373 		}
374 		return (retcode);
375 
376 	case POPF:
377 		temp_flags = POP(vmf) & POP_MASK;
378 		vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
379 		    | temp_flags | PSL_VM | PSL_I;
380 		vmf->vmf_ip += inc_ip;
381 		if (temp_flags & PSL_I) {
382 			vm86->vm86_eflags |= PSL_VIF;
383 			if (vm86->vm86_eflags & PSL_VIP)
384 				break;
385 		} else {
386 			vm86->vm86_eflags &= ~PSL_VIF;
387 		}
388 		return (retcode);
389 	}
390 	return (SIGBUS);
391 }
392 
393 #define PGTABLE_SIZE	((1024 + 64) * 1024 / PAGE_SIZE)
394 #define INTMAP_SIZE	32
395 #define IOMAP_SIZE	ctob(IOPAGES)
396 #define TSS_SIZE \
397 	(sizeof(struct pcb_ext) - sizeof(struct segment_descriptor) + \
398 	 INTMAP_SIZE + IOMAP_SIZE + 1)
399 
400 struct vm86_layout_pae {
401 	uint64_t	vml_pgtbl[PGTABLE_SIZE];
402 	struct 	pcb vml_pcb;
403 	struct	pcb_ext vml_ext;
404 	char	vml_intmap[INTMAP_SIZE];
405 	char	vml_iomap[IOMAP_SIZE];
406 	char	vml_iomap_trailer;
407 };
408 
409 struct vm86_layout_nopae {
410 	uint32_t	vml_pgtbl[PGTABLE_SIZE];
411 	struct 	pcb vml_pcb;
412 	struct	pcb_ext vml_ext;
413 	char	vml_intmap[INTMAP_SIZE];
414 	char	vml_iomap[IOMAP_SIZE];
415 	char	vml_iomap_trailer;
416 };
417 
418 _Static_assert(sizeof(struct vm86_layout_pae) <= ctob(3),
419     "struct vm86_layout_pae exceeds space allocated in locore.s");
420 _Static_assert(sizeof(struct vm86_layout_nopae) <= ctob(3),
421     "struct vm86_layout_nopae exceeds space allocated in locore.s");
422 
423 static void
424 vm86_initialize_pae(void)
425 {
426 	int i;
427 	u_int *addr;
428 	struct vm86_layout_pae *vml;
429 	struct pcb *pcb;
430 	struct pcb_ext *ext;
431 	struct soft_segment_descriptor ssd = {
432 		0,			/* segment base address (overwritten) */
433 		0,			/* length (overwritten) */
434 		SDT_SYS386TSS,		/* segment type */
435 		0,			/* priority level */
436 		1,			/* descriptor present */
437 		0, 0,
438 		0,			/* default 16 size */
439 		0			/* granularity */
440 	};
441 
442 	/*
443 	 * Below is the memory layout that we use for the vm86 region.
444 	 *
445 	 * +--------+
446 	 * |        |
447 	 * |        |
448 	 * | page 0 |
449 	 * |        | +--------+
450 	 * |        | | stack  |
451 	 * +--------+ +--------+ <--------- vm86paddr
452 	 * |        | |Page Tbl| 1M + 64K = 272 entries = 1088 bytes
453 	 * |        | +--------+
454 	 * |        | |  PCB   | size: ~240 bytes
455 	 * | page 1 | |PCB Ext | size: ~140 bytes (includes TSS)
456 	 * |        | +--------+
457 	 * |        | |int map |
458 	 * |        | +--------+
459 	 * +--------+ |        |
460 	 * | page 2 | |  I/O   |
461 	 * +--------+ | bitmap |
462 	 * | page 3 | |        |
463 	 * |        | +--------+
464 	 * +--------+
465 	 */
466 
467 	/*
468 	 * A rudimentary PCB must be installed, in order to get to the
469 	 * PCB extension area.  We use the PCB area as a scratchpad for
470 	 * data storage, the layout of which is shown below.
471 	 *
472 	 * pcb_esi	= new PTD entry 0
473 	 * pcb_ebp	= pointer to frame on vm86 stack
474 	 * pcb_esp	=    stack frame pointer at time of switch
475 	 * pcb_ebx	= va of vm86 page table
476 	 * pcb_eip	=    argument pointer to initial call
477 	 * pcb_vm86[0]	=    saved TSS descriptor, word 0
478 	 * pcb_vm86[1]	=    saved TSS descriptor, word 1
479 	 */
480 #define new_ptd		pcb_esi
481 #define vm86_frame	pcb_ebp
482 #define pgtable_va	pcb_ebx
483 
484 	vml = (struct vm86_layout_pae *)vm86paddr;
485 	pcb = &vml->vml_pcb;
486 	ext = &vml->vml_ext;
487 
488 	mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF);
489 
490 	bzero(pcb, sizeof(struct pcb));
491 	pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
492 	pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
493 	pcb->pgtable_va = vm86paddr;
494 	pcb->pcb_flags = PCB_VM86CALL;
495 	pcb->pcb_ext = ext;
496 
497 	bzero(ext, sizeof(struct pcb_ext));
498 	ext->ext_tss.tss_esp0 = vm86paddr;
499 	ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
500 	ext->ext_tss.tss_ioopt =
501 		((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
502 	ext->ext_iomap = vml->vml_iomap;
503 	ext->ext_vm86.vm86_intmap = vml->vml_intmap;
504 
505 	if (cpu_feature & CPUID_VME)
506 		ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
507 
508 	addr = (u_int *)ext->ext_vm86.vm86_intmap;
509 	for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
510 		*addr++ = 0;
511 	vml->vml_iomap_trailer = 0xff;
512 
513 	ssd.ssd_base = (u_int)&ext->ext_tss;
514 	ssd.ssd_limit = TSS_SIZE - 1;
515 	ssdtosd(&ssd, &ext->ext_tssd);
516 
517 	vm86pcb = pcb;
518 
519 #if 0
520         /*
521          * use whatever is leftover of the vm86 page layout as a
522          * message buffer so we can capture early output.
523          */
524         msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
525             ctob(3) - sizeof(struct vm86_layout));
526 #endif
527 }
528 
529 static void
530 vm86_initialize_nopae(void)
531 {
532 	int i;
533 	u_int *addr;
534 	struct vm86_layout_nopae *vml;
535 	struct pcb *pcb;
536 	struct pcb_ext *ext;
537 	struct soft_segment_descriptor ssd = {
538 		0,			/* segment base address (overwritten) */
539 		0,			/* length (overwritten) */
540 		SDT_SYS386TSS,		/* segment type */
541 		0,			/* priority level */
542 		1,			/* descriptor present */
543 		0, 0,
544 		0,			/* default 16 size */
545 		0			/* granularity */
546 	};
547 
548 	vml = (struct vm86_layout_nopae *)vm86paddr;
549 	pcb = &vml->vml_pcb;
550 	ext = &vml->vml_ext;
551 
552 	mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF);
553 
554 	bzero(pcb, sizeof(struct pcb));
555 	pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
556 	pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
557 	pcb->pgtable_va = vm86paddr;
558 	pcb->pcb_flags = PCB_VM86CALL;
559 	pcb->pcb_ext = ext;
560 
561 	bzero(ext, sizeof(struct pcb_ext));
562 	ext->ext_tss.tss_esp0 = vm86paddr;
563 	ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
564 	ext->ext_tss.tss_ioopt =
565 		((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
566 	ext->ext_iomap = vml->vml_iomap;
567 	ext->ext_vm86.vm86_intmap = vml->vml_intmap;
568 
569 	if (cpu_feature & CPUID_VME)
570 		ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
571 
572 	addr = (u_int *)ext->ext_vm86.vm86_intmap;
573 	for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
574 		*addr++ = 0;
575 	vml->vml_iomap_trailer = 0xff;
576 
577 	ssd.ssd_base = (u_int)&ext->ext_tss;
578 	ssd.ssd_limit = TSS_SIZE - 1;
579 	ssdtosd(&ssd, &ext->ext_tssd);
580 
581 	vm86pcb = pcb;
582 
583 #if 0
584         /*
585          * use whatever is leftover of the vm86 page layout as a
586          * message buffer so we can capture early output.
587          */
588         msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
589             ctob(3) - sizeof(struct vm86_layout));
590 #endif
591 }
592 
593 void
594 vm86_initialize(void)
595 {
596 
597 	if (pae_mode)
598 		vm86_initialize_pae();
599 	else
600 		vm86_initialize_nopae();
601 }
602 
603 vm_offset_t
604 vm86_getpage(struct vm86context *vmc, int pagenum)
605 {
606 	int i;
607 
608 	for (i = 0; i < vmc->npages; i++)
609 		if (vmc->pmap[i].pte_num == pagenum)
610 			return (vmc->pmap[i].kva);
611 	return (0);
612 }
613 
614 vm_offset_t
615 vm86_addpage(struct vm86context *vmc, int pagenum, vm_offset_t kva)
616 {
617 	int i, flags = 0;
618 
619 	for (i = 0; i < vmc->npages; i++)
620 		if (vmc->pmap[i].pte_num == pagenum)
621 			goto overlap;
622 
623 	if (vmc->npages == VM86_PMAPSIZE)
624 		goto full;			/* XXX grow map? */
625 
626 	if (kva == 0) {
627 		kva = (vm_offset_t)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
628 		flags = VMAP_MALLOC;
629 	}
630 
631 	i = vmc->npages++;
632 	vmc->pmap[i].flags = flags;
633 	vmc->pmap[i].kva = kva;
634 	vmc->pmap[i].pte_num = pagenum;
635 	return (kva);
636 overlap:
637 	panic("vm86_addpage: overlap");
638 full:
639 	panic("vm86_addpage: not enough room");
640 }
641 
642 /*
643  * called from vm86_bioscall, while in vm86 address space, to finalize setup.
644  */
645 void
646 vm86_prepcall(struct vm86frame *vmf)
647 {
648 	struct vm86_kernel *vm86;
649 	uint32_t *stack;
650 	uint8_t *code;
651 
652 	code = (void *)0xa00;
653 	stack = (void *)(0x1000 - 2);	/* keep aligned */
654 	if ((vmf->vmf_trapno & PAGE_MASK) <= 0xff) {
655 		/* interrupt call requested */
656 		code[0] = INTn;
657 		code[1] = vmf->vmf_trapno & 0xff;
658 		code[2] = HLT;
659 		vmf->vmf_ip = (uintptr_t)code;
660 		vmf->vmf_cs = 0;
661 	} else {
662 		code[0] = HLT;
663 		stack--;
664 		stack[0] = MAKE_VEC(0, (uintptr_t)code);
665 	}
666 	vmf->vmf_sp = (uintptr_t)stack;
667 	vmf->vmf_ss = 0;
668 	vmf->kernel_fs = vmf->kernel_es = vmf->kernel_ds = 0;
669 	vmf->vmf_eflags = PSL_VIF | PSL_VM | PSL_USER;
670 
671 	vm86 = &curpcb->pcb_ext->ext_vm86;
672 	if (!vm86->vm86_has_vme)
673 		vm86->vm86_eflags = vmf->vmf_eflags;  /* save VIF, VIP */
674 }
675 
676 /*
677  * vm86 trap handler; determines whether routine succeeded or not.
678  * Called while in vm86 space, returns to calling process.
679  */
680 void
681 vm86_trap(struct vm86frame *vmf)
682 {
683 	void (*p)(struct vm86frame *);
684 	caddr_t addr;
685 
686 	/* "should not happen" */
687 	if ((vmf->vmf_eflags & PSL_VM) == 0)
688 		panic("vm86_trap called, but not in vm86 mode");
689 
690 	addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
691 	if (*(u_char *)addr == HLT)
692 		vmf->vmf_trapno = vmf->vmf_eflags & PSL_C;
693 	else
694 		vmf->vmf_trapno = vmf->vmf_trapno << 16;
695 
696 	p = (void (*)(struct vm86frame *))((uintptr_t)vm86_biosret +
697 	    setidt_disp);
698 	p(vmf);
699 }
700 
701 int
702 vm86_intcall(int intnum, struct vm86frame *vmf)
703 {
704 	int (*p)(struct vm86frame *);
705 	int retval;
706 
707 	if (intnum < 0 || intnum > 0xff)
708 		return (EINVAL);
709 
710 	vmf->vmf_trapno = intnum;
711 	p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
712 	    setidt_disp);
713 	mtx_lock(&vm86_lock);
714 	critical_enter();
715 	retval = p(vmf);
716 	critical_exit();
717 	mtx_unlock(&vm86_lock);
718 	return (retval);
719 }
720 
721 /*
722  * struct vm86context contains the page table to use when making
723  * vm86 calls.  If intnum is a valid interrupt number (0-255), then
724  * the "interrupt trampoline" will be used, otherwise we use the
725  * caller's cs:ip routine.
726  */
727 int
728 vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
729 {
730 	uint64_t *pte_pae;
731 	uint32_t *pte_nopae;
732 	int (*p)(struct vm86frame *);
733 	vm_paddr_t page;
734 	int i, entry, retval;
735 
736 	mtx_lock(&vm86_lock);
737 	if (pae_mode) {
738 		pte_pae = (uint64_t *)vm86paddr;
739 		for (i = 0; i < vmc->npages; i++) {
740 			page = vtophys(vmc->pmap[i].kva & PG_FRAME_PAE);
741 			entry = vmc->pmap[i].pte_num;
742 			vmc->pmap[i].old_pte = pte_pae[entry];
743 			pte_pae[entry] = page | PG_V | PG_RW | PG_U;
744 			pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
745 		}
746 	} else {
747 		pte_nopae = (uint32_t *)vm86paddr;
748 		for (i = 0; i < vmc->npages; i++) {
749 			page = vtophys(vmc->pmap[i].kva & PG_FRAME_NOPAE);
750 			entry = vmc->pmap[i].pte_num;
751 			vmc->pmap[i].old_pte = pte_nopae[entry];
752 			pte_nopae[entry] = page | PG_V | PG_RW | PG_U;
753 			pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
754 		}
755 	}
756 
757 	vmf->vmf_trapno = intnum;
758 	p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
759 	    setidt_disp);
760 	critical_enter();
761 	retval = p(vmf);
762 	critical_exit();
763 
764 	if (pae_mode) {
765 		for (i = 0; i < vmc->npages; i++) {
766 			entry = vmc->pmap[i].pte_num;
767 			pte_pae[entry] = vmc->pmap[i].old_pte;
768 			pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
769 		}
770 	} else {
771 		for (i = 0; i < vmc->npages; i++) {
772 			entry = vmc->pmap[i].pte_num;
773 			pte_nopae[entry] = vmc->pmap[i].old_pte;
774 			pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
775 		}
776 	}
777 	mtx_unlock(&vm86_lock);
778 
779 	return (retval);
780 }
781 
782 vm_offset_t
783 vm86_getaddr(struct vm86context *vmc, u_short sel, u_short off)
784 {
785 	int i, page;
786 	vm_offset_t addr;
787 
788 	addr = (vm_offset_t)MAKE_ADDR(sel, off);
789 	page = addr >> PAGE_SHIFT;
790 	for (i = 0; i < vmc->npages; i++)
791 		if (page == vmc->pmap[i].pte_num)
792 			return (vmc->pmap[i].kva + (addr & PAGE_MASK));
793 	return (0);
794 }
795 
796 int
797 vm86_getptr(struct vm86context *vmc, vm_offset_t kva, u_short *sel,
798      u_short *off)
799 {
800 	int i;
801 
802 	for (i = 0; i < vmc->npages; i++)
803 		if (kva >= vmc->pmap[i].kva &&
804 		    kva < vmc->pmap[i].kva + PAGE_SIZE) {
805 			*off = kva - vmc->pmap[i].kva;
806 			*sel = vmc->pmap[i].pte_num << 8;
807 			return (1);
808 		}
809 	return (0);
810 }
811 
812 int
813 vm86_sysarch(struct thread *td, char *args)
814 {
815 	int error = 0;
816 	struct i386_vm86_args ua;
817 	struct vm86_kernel *vm86;
818 
819 	if ((error = copyin(args, &ua, sizeof(struct i386_vm86_args))) != 0)
820 		return (error);
821 
822 	if (td->td_pcb->pcb_ext == 0)
823 		if ((error = i386_extend_pcb(td)) != 0)
824 			return (error);
825 	vm86 = &td->td_pcb->pcb_ext->ext_vm86;
826 
827 	switch (ua.sub_op) {
828 	case VM86_INIT: {
829 		struct vm86_init_args sa;
830 
831 		if ((error = copyin(ua.sub_args, &sa, sizeof(sa))) != 0)
832 			return (error);
833 		if (cpu_feature & CPUID_VME)
834 			vm86->vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
835 		else
836 			vm86->vm86_has_vme = 0;
837 		vm86->vm86_inited = 1;
838 		vm86->vm86_debug = sa.debug;
839 		bcopy(&sa.int_map, vm86->vm86_intmap, 32);
840 		}
841 		break;
842 
843 #if 0
844 	case VM86_SET_VME: {
845 		struct vm86_vme_args sa;
846 
847 		if ((cpu_feature & CPUID_VME) == 0)
848 			return (ENODEV);
849 
850 		if (error = copyin(ua.sub_args, &sa, sizeof(sa)))
851 			return (error);
852 		if (sa.state)
853 			load_cr4(rcr4() | CR4_VME);
854 		else
855 			load_cr4(rcr4() & ~CR4_VME);
856 		}
857 		break;
858 #endif
859 
860 	case VM86_GET_VME: {
861 		struct vm86_vme_args sa;
862 
863 		sa.state = (rcr4() & CR4_VME ? 1 : 0);
864         	error = copyout(&sa, ua.sub_args, sizeof(sa));
865 		}
866 		break;
867 
868 	case VM86_INTCALL: {
869 		struct vm86_intcall_args sa;
870 
871 		if ((error = priv_check(td, PRIV_VM86_INTCALL)))
872 			return (error);
873 		if ((error = copyin(ua.sub_args, &sa, sizeof(sa))))
874 			return (error);
875 		if ((error = vm86_intcall(sa.intnum, &sa.vmf)))
876 			return (error);
877 		error = copyout(&sa, ua.sub_args, sizeof(sa));
878 		}
879 		break;
880 
881 	default:
882 		error = EINVAL;
883 	}
884 	return (error);
885 }
886