1 /*	$NetBSD: trap.c,v 1.276 2015/12/16 18:54:03 maxv Exp $	*/
2 
3 /*-
4  * Copyright (c) 1998, 2000, 2005, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Charles M. Hannum.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1990 The Regents of the University of California.
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * the University of Utah, and William Jolitz.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  *
63  *	@(#)trap.c	7.4 (Berkeley) 5/13/91
64  */
65 
66 /*
67  * 386 Trap and System call handling
68  */
69 
70 #include <sys/cdefs.h>
71 __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.276 2015/12/16 18:54:03 maxv Exp $");
72 
73 #include "opt_ddb.h"
74 #include "opt_kgdb.h"
75 #include "opt_lockdebug.h"
76 #include "opt_multiprocessor.h"
77 #include "opt_vm86.h"
78 #include "opt_kstack_dr0.h"
79 #include "opt_xen.h"
80 #include "opt_dtrace.h"
81 
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/proc.h>
85 #include <sys/acct.h>
86 #include <sys/kauth.h>
87 #include <sys/kernel.h>
88 #include <sys/kmem.h>
89 #include <sys/ras.h>
90 #include <sys/signal.h>
91 #include <sys/syscall.h>
92 #include <sys/cpu.h>
93 #include <sys/ucontext.h>
94 
95 #include <uvm/uvm_extern.h>
96 
97 #include <machine/cpufunc.h>
98 #include <machine/psl.h>
99 #include <machine/reg.h>
100 #include <machine/trap.h>
101 #include <machine/userret.h>
102 #include <machine/db_machdep.h>
103 
104 #include "mca.h"
105 #if NMCA > 0
106 #include <machine/mca_machdep.h>
107 #endif
108 
109 #include <x86/nmi.h>
110 
111 #include "isa.h"
112 
113 #include <sys/kgdb.h>
114 
115 #ifdef KDTRACE_HOOKS
116 #include <sys/dtrace_bsd.h>
117 
118 /*
119  * This is a hook which is initialised by the dtrace module
120  * to handle traps which might occur during DTrace probe
121  * execution.
122  */
123 dtrace_trap_func_t	dtrace_trap_func = NULL;
124 
125 dtrace_doubletrap_func_t	dtrace_doubletrap_func = NULL;
126 #endif
127 
128 
129 void trap(struct trapframe *);
130 void trap_tss(struct i386tss *, int, int);
131 void trap_return_fault_return(struct trapframe *) __dead;
132 
133 const char * const trap_type[] = {
134 	"privileged instruction fault",		/*  0 T_PRIVINFLT */
135 	"breakpoint trap",			/*  1 T_BPTFLT */
136 	"arithmetic trap",			/*  2 T_ARITHTRAP */
137 	"asynchronous system trap",		/*  3 T_ASTFLT */
138 	"protection fault",			/*  4 T_PROTFLT */
139 	"trace trap",				/*  5 T_TRCTRAP */
140 	"page fault",				/*  6 T_PAGEFLT */
141 	"alignment fault",			/*  7 T_ALIGNFLT */
142 	"integer divide fault",			/*  8 T_DIVIDE */
143 	"non-maskable interrupt",		/*  9 T_NMI */
144 	"overflow trap",			/* 10 T_OFLOW */
145 	"bounds check fault",			/* 11 T_BOUND */
146 	"FPU not available fault",		/* 12 T_DNA */
147 	"double fault",				/* 13 T_DOUBLEFLT */
148 	"FPU operand fetch fault",		/* 14 T_FPOPFLT */
149 	"invalid TSS fault",			/* 15 T_TSSFLT */
150 	"segment not present fault",		/* 16 T_SEGNPFLT */
151 	"stack fault",				/* 17 T_STKFLT */
152 	"machine check fault",			/* 18 T_MCA */
153 	"SSE FP exception",			/* 19 T_XMM */
154 	"reserved trap",			/* 20 T_RESERVED */
155 };
156 int	trap_types = __arraycount(trap_type);
157 
158 #ifdef DEBUG
159 int	trapdebug = 0;
160 #endif
161 
162 #define	IDTVEC(name)	__CONCAT(X, name)
163 
164 void
trap_tss(struct i386tss * tss,int trapno,int code)165 trap_tss(struct i386tss *tss, int trapno, int code)
166 {
167 	struct trapframe tf;
168 
169 	tf.tf_gs = tss->tss_gs;
170 	tf.tf_fs = tss->tss_fs;
171 	tf.tf_es = tss->__tss_es;
172 	tf.tf_ds = tss->__tss_ds;
173 	tf.tf_edi = tss->__tss_edi;
174 	tf.tf_esi = tss->__tss_esi;
175 	tf.tf_ebp = tss->tss_ebp;
176 	tf.tf_ebx = tss->__tss_ebx;
177 	tf.tf_edx = tss->__tss_edx;
178 	tf.tf_ecx = tss->__tss_ecx;
179 	tf.tf_eax = tss->__tss_eax;
180 	tf.tf_trapno = trapno;
181 	tf.tf_err = code | TC_TSS;
182 	tf.tf_eip = tss->__tss_eip;
183 	tf.tf_cs = tss->__tss_cs;
184 	tf.tf_eflags = tss->__tss_eflags;
185 	tf.tf_esp = tss->tss_esp;
186 	tf.tf_ss = tss->__tss_ss;
187 	trap(&tf);
188 }
189 
190 static void *
onfault_handler(const struct pcb * pcb,const struct trapframe * tf)191 onfault_handler(const struct pcb *pcb, const struct trapframe *tf)
192 {
193 	struct onfault_table {
194 		uintptr_t start;
195 		uintptr_t end;
196 		void *handler;
197 	};
198 	extern const struct onfault_table onfault_table[];
199 	const struct onfault_table *p;
200 	uintptr_t pc;
201 
202 	if (pcb->pcb_onfault != NULL) {
203 		return pcb->pcb_onfault;
204 	}
205 
206 	pc = tf->tf_eip;
207 	for (p = onfault_table; p->start; p++) {
208 		if (p->start <= pc && pc < p->end) {
209 			return p->handler;
210 		}
211 	}
212 	return NULL;
213 }
214 
215 static void
trap_print(const struct trapframe * frame,const lwp_t * l)216 trap_print(const struct trapframe *frame, const lwp_t *l)
217 {
218 	const int type = frame->tf_trapno;
219 
220 	if (frame->tf_trapno < trap_types) {
221 		printf("fatal %s", trap_type[type]);
222 	} else {
223 		printf("unknown trap %d", type);
224 	}
225 	printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor");
226 
227 	printf("trap type %d code %x eip %x cs %x eflags %x cr2 %lx "
228 	    "ilevel %x esp %x\n",
229 	    type, frame->tf_err, frame->tf_eip, frame->tf_cs, frame->tf_eflags,
230 	    (long)rcr2(), curcpu()->ci_ilevel, frame->tf_esp);
231 
232 	printf("curlwp %p pid %d lid %d lowest kstack %p\n",
233 	    l, l->l_proc->p_pid, l->l_lid, KSTACK_LOWEST_ADDR(l));
234 }
235 
236 static void
check_dr0(void)237 check_dr0(void)
238 {
239 #ifdef KSTACK_CHECK_DR0
240 	u_int mask, dr6 = rdr6();
241 
242 	mask = 1 << 0; /* dr0 */
243 	if (dr6 & mask) {
244 		panic("trap on DR0: maybe kernel stack overflow\n");
245 #if 0
246 		dr6 &= ~mask;
247 		ldr6(dr6);
248 		return;
249 #endif
250 	}
251 #endif
252 }
253 
254 /*
255  * trap(frame): exception, fault, and trap interface to BSD kernel.
256  *
257  * This common code is called from assembly language IDT gate entry routines
258  * that prepare a suitable stack frame, and restore this frame after the
259  * exception has been processed. Note that the effect is as if the arguments
260  * were passed call by reference.
261  */
262 void
trap(struct trapframe * frame)263 trap(struct trapframe *frame)
264 {
265 	struct lwp *l = curlwp;
266 	struct proc *p;
267 	struct pcb *pcb;
268 	extern char fusubail[], kcopy_fault[], return_address_fault[],
269 	    IDTVEC(osyscall)[];
270 	struct trapframe *vframe;
271 	ksiginfo_t ksi;
272 	void *onfault;
273 	int type, error;
274 	uint32_t cr2;
275 	bool pfail;
276 
277 	if (__predict_true(l != NULL)) {
278 		pcb = lwp_getpcb(l);
279 		p = l->l_proc;
280 	} else {
281 		/*
282 		 * this can happen eg. on break points in early on boot.
283 		 */
284 		pcb = NULL;
285 		p = NULL;
286 	}
287 	type = frame->tf_trapno;
288 
289 #ifdef DEBUG
290 	if (trapdebug) {
291 		trap_print(frame, l);
292 	}
293 #endif
294 	if (type != T_NMI &&
295 	    !KERNELMODE(frame->tf_cs, frame->tf_eflags)) {
296 		type |= T_USER;
297 		l->l_md.md_regs = frame;
298 		pcb->pcb_cr2 = 0;
299 		LWP_CACHE_CREDS(l, p);
300 	}
301 
302 #ifdef KDTRACE_HOOKS
303 	/*
304 	 * A trap can occur while DTrace executes a probe. Before
305 	 * executing the probe, DTrace blocks re-scheduling and sets
306 	 * a flag in its per-cpu flags to indicate that it doesn't
307 	 * want to fault. On returning from the the probe, the no-fault
308 	 * flag is cleared and finally re-scheduling is enabled.
309 	 *
310 	 * If the DTrace kernel module has registered a trap handler,
311 	 * call it and if it returns non-zero, assume that it has
312 	 * handled the trap and modified the trap frame so that this
313 	 * function can return normally.
314 	 */
315 	if ((type == T_PROTFLT || type == T_PAGEFLT) &&
316 	    dtrace_trap_func != NULL) {
317 		if ((*dtrace_trap_func)(frame, type)) {
318 			return;
319 		}
320 	}
321 #endif
322 
323 	switch (type) {
324 
325 	case T_ASTFLT:
326 		/*FALLTHROUGH*/
327 
328 	default:
329 	we_re_toast:
330 		if (type == T_TRCTRAP)
331 			check_dr0();
332 		else
333 			trap_print(frame, l);
334 
335 		if (kdb_trap(type, 0, frame))
336 			return;
337 		if (kgdb_trap(type, frame))
338 			return;
339 		/*
340 		 * If this is a breakpoint, don't panic if we're not connected.
341 		 */
342 		if (type == T_BPTFLT && kgdb_disconnected()) {
343 			printf("kgdb: ignored %s\n", trap_type[type]);
344 			return;
345 		}
346 		panic("trap");
347 		/*NOTREACHED*/
348 
349 	case T_PROTFLT:
350 	case T_SEGNPFLT:
351 	case T_ALIGNFLT:
352 	case T_TSSFLT:
353 		if (p == NULL)
354 			goto we_re_toast;
355 		/* Check for copyin/copyout fault. */
356 		onfault = onfault_handler(pcb, frame);
357 		if (onfault != NULL) {
358 copyefault:
359 			error = EFAULT;
360 copyfault:
361 			frame->tf_eip = (uintptr_t)onfault;
362 			frame->tf_eax = error;
363 			return;
364 		}
365 
366 		/*
367 		 * Check for failure during return to user mode.
368 		 * This can happen loading invalid values into the segment
369 		 * registers, or during the 'iret' itself.
370 		 *
371 		 * We do this by looking at the instruction we faulted on.
372 		 * The specific instructions we recognize only happen when
373 		 * returning from a trap, syscall, or interrupt.
374 		 */
375 
376 kernelfault:
377 		KSI_INIT_TRAP(&ksi);
378 		ksi.ksi_signo = SIGSEGV;
379 		ksi.ksi_code = SEGV_ACCERR;
380 		ksi.ksi_trap = type;
381 
382 		switch (*(u_char *)frame->tf_eip) {
383 		case 0xcf:	/* iret */
384 			/*
385 			 * The 'iret' instruction faulted, so we have the
386 			 * 'user' registers saved after the kernel %eip:%cs:%fl
387 			 * of the 'iret' and below that the user %eip:%cs:%fl
388 			 * the 'iret' was processing.
389 			 * We must delete the 3 words of kernel return address
390 			 * from the stack to generate a normal stack frame
391 			 * (eg for sending a SIGSEGV).
392 			 */
393 			vframe = (void *)((int *)frame + 3);
394 			if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
395 				goto we_re_toast;
396 			memmove(vframe, frame,
397 			    offsetof(struct trapframe, tf_eip));
398 			/* Set the faulting address to the user %eip */
399 			ksi.ksi_addr = (void *)vframe->tf_eip;
400 			break;
401 		case 0x8e:
402 			switch (*(uint32_t *)frame->tf_eip) {
403 			case 0x8e242c8e:	/* mov (%esp,%gs), then */
404 			case 0x0424648e:	/* mov 0x4(%esp),%fs */
405 			case 0x0824448e:	/* mov 0x8(%esp),%es */
406 			case 0x0c245c8e:	/* mov 0xc(%esp),%ds */
407 				break;
408 			default:
409 				goto we_re_toast;
410 			}
411 			/*
412 			 * We faulted loading one if the user segment registers.
413 			 * The stack frame containing the user registers is
414 			 * still valid and is just below the %eip:%cs:%fl of
415 			 * the kernel fault frame.
416 			 */
417 			vframe = (void *)(&frame->tf_eflags + 1);
418 			if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
419 				goto we_re_toast;
420 			/* There is no valid address for the fault */
421 			break;
422 		default:
423 			goto we_re_toast;
424 		}
425 		/*
426 		 * We might have faulted trying to execute the
427 		 * trampoline for a local (nested) signal handler.
428 		 * Only generate SIGSEGV if the user %cs isn't changed.
429 		 * (This is only strictly necessary in the 'iret' case.)
430 		 */
431 		if (!pmap_exec_fixup(&p->p_vmspace->vm_map, vframe, pcb)) {
432 			/* Save outer frame for any signal return */
433 			l->l_md.md_regs = vframe;
434 			(*p->p_emul->e_trapsignal)(l, &ksi);
435 		}
436 		/* Return to user by reloading the user frame */
437 		trap_return_fault_return(vframe);
438 		/* NOTREACHED */
439 
440 	case T_PROTFLT|T_USER:		/* protection fault */
441 	case T_TSSFLT|T_USER:
442 	case T_SEGNPFLT|T_USER:
443 	case T_STKFLT|T_USER:
444 	case T_ALIGNFLT|T_USER:
445 		KSI_INIT_TRAP(&ksi);
446 
447 		ksi.ksi_addr = (void *)rcr2();
448 		switch (type) {
449 		case T_SEGNPFLT|T_USER:
450 		case T_STKFLT|T_USER:
451 			ksi.ksi_signo = SIGBUS;
452 			ksi.ksi_code = BUS_ADRERR;
453 			break;
454 		case T_TSSFLT|T_USER:
455 			ksi.ksi_signo = SIGBUS;
456 			ksi.ksi_code = BUS_OBJERR;
457 			break;
458 		case T_ALIGNFLT|T_USER:
459 			ksi.ksi_signo = SIGBUS;
460 			ksi.ksi_code = BUS_ADRALN;
461 			break;
462 		case T_PROTFLT|T_USER:
463 #ifdef VM86
464 			if (frame->tf_eflags & PSL_VM) {
465 				vm86_gpfault(l, type & ~T_USER);
466 				goto out;
467 			}
468 #endif
469 			/*
470 			 * If pmap_exec_fixup does something,
471 			 * let's retry the trap.
472 			 */
473 			if (pmap_exec_fixup(&p->p_vmspace->vm_map, frame, pcb)){
474 				goto out;
475 			}
476 			ksi.ksi_signo = SIGSEGV;
477 			ksi.ksi_code = SEGV_ACCERR;
478 			break;
479 		default:
480 			KASSERT(0);
481 			break;
482 		}
483 		goto trapsignal;
484 
485 	case T_PRIVINFLT|T_USER:	/* privileged instruction fault */
486 	case T_FPOPFLT|T_USER:		/* coprocessor operand fault */
487 		KSI_INIT_TRAP(&ksi);
488 		ksi.ksi_signo = SIGILL;
489 		ksi.ksi_addr = (void *) frame->tf_eip;
490 		switch (type) {
491 		case T_PRIVINFLT|T_USER:
492 			ksi.ksi_code = ILL_PRVOPC;
493 			break;
494 		case T_FPOPFLT|T_USER:
495 			ksi.ksi_code = ILL_COPROC;
496 			break;
497 		default:
498 			ksi.ksi_code = 0;
499 			break;
500 		}
501 		goto trapsignal;
502 
503 	case T_ASTFLT|T_USER:
504 		/* Allow process switch. */
505 		//curcpu()->ci_data.cpu_nast++;
506 		if (l->l_pflag & LP_OWEUPC) {
507 			l->l_pflag &= ~LP_OWEUPC;
508 			ADDUPROF(l);
509 		}
510 		/* Allow a forced task switch. */
511 		if (curcpu()->ci_want_resched) {
512 			preempt();
513 		}
514 		goto out;
515 
516 	case T_BOUND|T_USER:
517 	case T_OFLOW|T_USER:
518 	case T_DIVIDE|T_USER:
519 		KSI_INIT_TRAP(&ksi);
520 		ksi.ksi_signo = SIGFPE;
521 		ksi.ksi_addr = (void *)frame->tf_eip;
522 		switch (type) {
523 		case T_BOUND|T_USER:
524 			ksi.ksi_code = FPE_FLTSUB;
525 			break;
526 		case T_OFLOW|T_USER:
527 			ksi.ksi_code = FPE_INTOVF;
528 			break;
529 		case T_DIVIDE|T_USER:
530 			ksi.ksi_code = FPE_INTDIV;
531 			break;
532 		default:
533 			ksi.ksi_code = 0;
534 			break;
535 		}
536 		goto trapsignal;
537 
538 	case T_PAGEFLT:
539 		/* Allow page faults in kernel mode. */
540 		if (__predict_false(l == NULL))
541 			goto we_re_toast;
542 
543 		/*
544 		 * fusubail is used by [fs]uswintr() to prevent page faulting
545 		 * from inside the profiling interrupt.
546 		 */
547 		onfault = pcb->pcb_onfault;
548 		if (onfault == fusubail || onfault == return_address_fault) {
549 			goto copyefault;
550 		}
551 		if (cpu_intr_p() || (l->l_pflag & LP_INTR) != 0) {
552 			goto we_re_toast;
553 		}
554 
555 		cr2 = rcr2();
556 
557 		if (frame->tf_err & PGEX_X) {
558 			/* SMEP might have brought us here */
559 			if (cr2 > VM_MIN_ADDRESS && cr2 <= VM_MAXUSER_ADDRESS)
560 				panic("prevented execution of %p (SMEP)",
561 				    (void *)cr2);
562 		}
563 
564 		goto faultcommon;
565 
566 	case T_PAGEFLT|T_USER: {	/* page fault */
567 		register vaddr_t va;
568 		register struct vmspace *vm;
569 		register struct vm_map *map;
570 		vm_prot_t ftype;
571 		extern struct vm_map *kernel_map;
572 
573 		cr2 = rcr2();
574 faultcommon:
575 		vm = p->p_vmspace;
576 		if (__predict_false(vm == NULL)) {
577 			goto we_re_toast;
578 		}
579 		pcb->pcb_cr2 = cr2;
580 		va = trunc_page((vaddr_t)cr2);
581 		/*
582 		 * It is only a kernel address space fault iff:
583 		 *	1. (type & T_USER) == 0  and
584 		 *	2. pcb_onfault not set or
585 		 *	3. pcb_onfault set but supervisor space fault
586 		 * The last can occur during an exec() copyin where the
587 		 * argument space is lazy-allocated.
588 		 */
589 		if (type == T_PAGEFLT && va >= KERNBASE)
590 			map = kernel_map;
591 		else
592 			map = &vm->vm_map;
593 		if (frame->tf_err & PGEX_W)
594 			ftype = VM_PROT_WRITE;
595 		else if (frame->tf_err & PGEX_X)
596 			ftype = VM_PROT_EXECUTE;
597 		else
598 			ftype = VM_PROT_READ;
599 
600 #ifdef DIAGNOSTIC
601 		if (map == kernel_map && va == 0) {
602 			printf("trap: bad kernel access at %lx\n", va);
603 			goto we_re_toast;
604 		}
605 #endif
606 		/* Fault the original page in. */
607 		onfault = pcb->pcb_onfault;
608 		pcb->pcb_onfault = NULL;
609 		error = uvm_fault(map, va, ftype);
610 		pcb->pcb_onfault = onfault;
611 		if (error == 0) {
612 			if (map != kernel_map && (void *)va >= vm->vm_maxsaddr)
613 				uvm_grow(p, va);
614 
615 			pfail = false;
616 			while (type == T_PAGEFLT) {
617 				/*
618 				 * we need to switch pmap now if we're in
619 				 * the middle of copyin/out.
620 				 *
621 				 * but we don't need to do so for kcopy as
622 				 * it never touch userspace.
623  				 */
624 				kpreempt_disable();
625 				if (curcpu()->ci_want_pmapload) {
626 					onfault = onfault_handler(pcb, frame);
627 					if (onfault != kcopy_fault) {
628 						pmap_load();
629 					}
630 				}
631 				/*
632 				 * We need to keep the pmap loaded and
633 				 * so avoid being preempted until back
634 				 * into the copy functions.  Disable
635 				 * interrupts at the hardware level before
636 				 * re-enabling preemption.  Interrupts
637 				 * will be re-enabled by 'iret' when
638 				 * returning back out of the trap stub.
639 				 * They'll only be re-enabled when the
640 				 * program counter is once again in
641 				 * the copy functions, and so visible
642 				 * to cpu_kpreempt_exit().
643 				 */
644 #ifndef XEN
645 				x86_disable_intr();
646 #endif
647 				l->l_nopreempt--;
648 				if (l->l_nopreempt > 0 || !l->l_dopreempt ||
649 				    pfail) {
650 					return;
651 				}
652 #ifndef XEN
653 				x86_enable_intr();
654 #endif
655 				/*
656 				 * If preemption fails for some reason,
657 				 * don't retry it.  The conditions won't
658 				 * change under our nose.
659 				 */
660 				pfail = kpreempt(0);
661 			}
662 			goto out;
663 		}
664 
665 		if (type == T_PAGEFLT) {
666 			onfault = onfault_handler(pcb, frame);
667 			if (onfault != NULL)
668 				goto copyfault;
669 			printf("uvm_fault(%p, %#lx, %d) -> %#x\n",
670 			    map, va, ftype, error);
671 			goto kernelfault;
672 		}
673 
674 		KSI_INIT_TRAP(&ksi);
675 		ksi.ksi_trap = type & ~T_USER;
676 		ksi.ksi_addr = (void *)cr2;
677 		switch (error) {
678 		case EINVAL:
679 			ksi.ksi_signo = SIGBUS;
680 			ksi.ksi_code = BUS_ADRERR;
681 			break;
682 		case EACCES:
683 			ksi.ksi_signo = SIGSEGV;
684 			ksi.ksi_code = SEGV_ACCERR;
685 			error = EFAULT;
686 			break;
687 		case ENOMEM:
688 			ksi.ksi_signo = SIGKILL;
689 			printf("UVM: pid %d.%d (%s), uid %d killed: "
690 			    "out of swap\n", p->p_pid, l->l_lid, p->p_comm,
691 			    l->l_cred ?  kauth_cred_geteuid(l->l_cred) : -1);
692 			break;
693 		default:
694 			ksi.ksi_signo = SIGSEGV;
695 			ksi.ksi_code = SEGV_MAPERR;
696 			break;
697 		}
698 
699 #ifdef TRAP_SIGDEBUG
700 		printf("pid %d.%d (%s): signal %d at eip %x addr %lx "
701 		    "error %d\n", p->p_pid, l->l_lid, p->p_comm, ksi.ksi_signo,
702 		    frame->tf_eip, va, error);
703 #endif
704 		(*p->p_emul->e_trapsignal)(l, &ksi);
705 		break;
706 	}
707 
708 	case T_TRCTRAP:
709 		/* Check whether they single-stepped into a lcall. */
710 		if (frame->tf_eip == (int)IDTVEC(osyscall))
711 			return;
712 		if (frame->tf_eip == (int)IDTVEC(osyscall) + 1) {
713 			frame->tf_eflags &= ~PSL_T;
714 			return;
715 		}
716 		goto we_re_toast;
717 
718 	case T_BPTFLT|T_USER:		/* bpt instruction fault */
719 	case T_TRCTRAP|T_USER:		/* trace trap */
720 		/*
721 		 * Don't go single-stepping into a RAS.
722 		 */
723 		if (p->p_raslist == NULL ||
724 		    (ras_lookup(p, (void *)frame->tf_eip) == (void *)-1)) {
725 			KSI_INIT_TRAP(&ksi);
726 			ksi.ksi_signo = SIGTRAP;
727 			ksi.ksi_trap = type & ~T_USER;
728 			if (type == (T_BPTFLT|T_USER))
729 				ksi.ksi_code = TRAP_BRKPT;
730 			else
731 				ksi.ksi_code = TRAP_TRACE;
732 			ksi.ksi_addr = (void *)frame->tf_eip;
733 			(*p->p_emul->e_trapsignal)(l, &ksi);
734 		}
735 		break;
736 
737 	case T_NMI:
738 		if (nmi_dispatch(frame))
739 			return;
740 		/* NMI can be hooked up to a pushbutton for debugging */
741 		if (kgdb_trap(type, frame))
742 			return;
743 		if (kdb_trap(type, 0, frame))
744 			return;
745 		/* machine/parity/power fail/"kitchen sink" faults */
746 #if NMCA > 0
747 		mca_nmi();
748 #endif
749 		x86_nmi();
750 	}
751 
752 	if ((type & T_USER) == 0)
753 		return;
754 out:
755 	userret(l);
756 	return;
757 trapsignal:
758 	ksi.ksi_trap = type & ~T_USER;
759 	(*p->p_emul->e_trapsignal)(l, &ksi);
760 	userret(l);
761 }
762 
763 /*
764  * startlwp: start of a new LWP.
765  */
766 void
startlwp(void * arg)767 startlwp(void *arg)
768 {
769 	ucontext_t *uc = arg;
770 	lwp_t *l = curlwp;
771 	int error __diagused;
772 
773 	error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
774 	KASSERT(error == 0);
775 
776 	kmem_free(uc, sizeof(ucontext_t));
777 	userret(l);
778 }
779