xref: /original-bsd/sys/i386/i386/trap.c (revision abb91716)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * the University of Utah, and William Jolitz.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)trap.c	7.13 (Berkeley) 10/11/92
11  */
12 
13 /*
14  * 386 Trap and System call handleing
15  */
16 
17 #include <machine/cpu.h>
18 #include <machine/psl.h>
19 #include <machine/reg.h>
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/proc.h>
24 #include <sys/user.h>
25 #include <sys/acct.h>
26 #include <sys/kernel.h>
27 #ifdef KTRACE
28 #include <sys/ktrace.h>
29 #endif
30 
31 #include <vm/vm_param.h>
32 #include <vm/pmap.h>
33 #include <vm/vm_map.h>
34 
35 #include <machine/trap.h>
36 #include <machine/dbg.h>
37 
38 
39 struct	sysent sysent[];
40 int	nsysent;
41 unsigned rcr2();
42 extern short cpl;
43 
44 
45 /*
46  * trap(frame):
47  *	Exception, fault, and trap interface to BSD kernel. This
48  * common code is called from assembly language IDT gate entry
49  * routines that prepare a suitable stack frame, and restore this
50  * frame after the exception has been processed. Note that the
51  * effect is as if the arguments were passed call by reference.
52  */
53 
54 /*ARGSUSED*/
55 trap(frame)
56 	struct trapframe frame;
57 {
58 	register int i;
59 	register struct proc *p = curproc;
60 	struct timeval syst;
61 	int ucode, type, code, eva;
62 	extern int cold;
63 
64 if(cold) goto we_re_toast;
65 	frame.tf_eflags &= ~PSL_NT;	/* clear nested trap XXX */
66 	type = frame.tf_trapno;
67 
68 	if (curpcb && curpcb->pcb_onfault && frame.tf_trapno != 0xc) {
69 copyfault:	frame.tf_eip = (int)curpcb->pcb_onfault;
70 		return;
71 	}
72 
73 	syst = p->p_stime;
74 	if (ISPL(frame.tf_cs) == SEL_UPL) {
75 		type |= T_USER;
76 		p->p_md.md_regs = (int *)&frame;
77 		curpcb->pcb_flags |= FM_TRAP;	/* used by sendsig */
78 	}
79 
80 	ucode=0;
81 	eva = rcr2();
82 	code = frame.tf_err;
83 	switch (type) {
84 
85 	default:
86 	we_re_toast:
87 #ifdef KDB
88 		if (kdb_trap(&psl))
89 			return;
90 #endif
91 
92 		printf("trap type %d code = %x eip = %x cs = %x eflags = %x ",
93 			frame.tf_trapno, frame.tf_err, frame.tf_eip,
94 			frame.tf_cs, frame.tf_eflags);
95 		printf("cr2 %x cpl %x\n", eva, cpl);
96 		type &= ~T_USER;
97 		panic("trap");
98 		/*NOTREACHED*/
99 
100 	case T_SEGNPFLT|T_USER:
101 	case T_STKFLT|T_USER:		/* 386bsd */
102 	case T_PROTFLT|T_USER:		/* protection fault */
103 		ucode = code + BUS_SEGM_FAULT ;
104 		i = SIGBUS;
105 		break;
106 
107 	case T_PRIVINFLT|T_USER:	/* privileged instruction fault */
108 	case T_RESADFLT|T_USER:		/* reserved addressing fault */
109 	case T_RESOPFLT|T_USER:		/* reserved operand fault */
110 	case T_FPOPFLT|T_USER:		/* coprocessor operand fault */
111 		ucode = type &~ T_USER;
112 		i = SIGILL;
113 		break;
114 
115 	case T_ASTFLT|T_USER:		/* Allow process switch */
116 	case T_ASTFLT:
117 		astoff();
118 		if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) {
119 			addupc(frame.tf_eip, &p->p_stats->p_prof, 1);
120 			p->p_flag &= ~SOWEUPC;
121 		}
122 		goto out;
123 
124 	case T_DNA|T_USER:
125 #include "npx.h"
126 #if NNPX > 0
127 		/* if a transparent fault (due to context switch "late") */
128 		if (npxdna()) return;
129 #endif
130 		ucode = FPE_FPU_NP_TRAP;
131 		i = SIGFPE;
132 		break;
133 
134 	case T_BOUND|T_USER:
135 		ucode = FPE_SUBRNG_TRAP;
136 		i = SIGFPE;
137 		break;
138 
139 	case T_OFLOW|T_USER:
140 		ucode = FPE_INTOVF_TRAP;
141 		i = SIGFPE;
142 		break;
143 
144 	case T_DIVIDE|T_USER:
145 		ucode = FPE_INTDIV_TRAP;
146 		i = SIGFPE;
147 		break;
148 
149 	case T_ARITHTRAP|T_USER:
150 		ucode = code;
151 		i = SIGFPE;
152 		break;
153 
154 	case T_PAGEFLT:			/* allow page faults in kernel mode */
155 		if (code & PGEX_P) goto we_re_toast;
156 
157 		/* fall into */
158 	case T_PAGEFLT|T_USER:		/* page fault */
159 	    {
160 		register vm_offset_t va;
161 		register struct vmspace *vm = p->p_vmspace;
162 		register vm_map_t map;
163 		int rv;
164 		vm_prot_t ftype;
165 		extern vm_map_t kernel_map;
166 
167 		va = trunc_page((vm_offset_t)eva);
168 		/*
169 		 * It is only a kernel address space fault iff:
170 		 * 	1. (type & T_USER) == 0  and
171 		 * 	2. pcb_onfault not set or
172 		 *	3. pcb_onfault set but supervisor space fault
173 		 * The last can occur during an exec() copyin where the
174 		 * argument space is lazy-allocated.
175 		 */
176 		if (type == T_PAGEFLT && va >= 0xfe000000)
177 			map = kernel_map;
178 		else
179 			map = &vm->vm_map;
180 		if (code & PGEX_W)
181 			ftype = VM_PROT_READ | VM_PROT_WRITE;
182 		else
183 			ftype = VM_PROT_READ;
184 
185 		rv = user_page_fault(p, map, va, ftype, type);
186 
187 		if (rv == KERN_SUCCESS) {
188 			if (type == T_PAGEFLT)
189 				return;
190 			goto out;
191 		}
192 
193 		if (type == T_PAGEFLT) {
194 			if (curpcb->pcb_onfault)
195 				goto copyfault;
196 			printf("vm_fault(%x, %x, %x, 0) -> %x\n",
197 			       map, va, ftype, rv);
198 			printf("  type %x, code %x\n",
199 			       type, code);
200 			goto we_re_toast;
201 		}
202 		i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
203 		break;
204 	    }
205 
206 	case T_TRCTRAP:	 /* trace trap -- someone single stepping lcall's */
207 		frame.tf_eflags &= ~PSL_T;
208 
209 			/* Q: how do we turn it on again? */
210 		return;
211 
212 	case T_BPTFLT|T_USER:		/* bpt instruction fault */
213 	case T_TRCTRAP|T_USER:		/* trace trap */
214 		frame.tf_eflags &= ~PSL_T;
215 		i = SIGTRAP;
216 		break;
217 
218 #include "isa.h"
219 #if	NISA > 0
220 	case T_NMI:
221 	case T_NMI|T_USER:
222 		/* machine/parity/power fail/"kitchen sink" faults */
223 		if(isa_nmi(code) == 0) return;
224 		else goto we_re_toast;
225 #endif
226 	}
227 
228 	trapsignal(p, i, ucode);
229 	if ((type & T_USER) == 0)
230 		return;
231 out:
232 	while (i = CURSIG(p))
233 		psig(i);
234 	p->p_pri = p->p_usrpri;
235 	if (want_resched) {
236 		int pl;
237 		/*
238 		 * Since we are curproc, clock will normally just change
239 		 * our priority without moving us from one queue to another
240 		 * (since the running process is not on a queue.)
241 		 * If that happened after we setrq ourselves but before we
242 		 * swtch()'ed, we might not be on the queue indicated by
243 		 * our priority.
244 		 */
245 		pl = splclock();
246 		setrq(p);
247 		p->p_stats->p_ru.ru_nivcsw++;
248 		swtch();
249 		splx(pl);
250 		while (i = CURSIG(p))
251 			psig(i);
252 	}
253 	if (p->p_stats->p_prof.pr_scale) {
254 		int ticks;
255 		struct timeval *tv = &p->p_stime;
256 
257 		ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
258 			(tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
259 		if (ticks) {
260 #ifdef PROFTIMER
261 			extern int profscale;
262 			addupc(frame.tf_eip, &p->p_stats->p_prof,
263 			    ticks * profscale);
264 #else
265 			addupc(frame.tf_eip, &p->p_stats->p_prof, ticks);
266 #endif
267 		}
268 	}
269 	curpri = p->p_pri;
270 	curpcb->pcb_flags &= ~FM_TRAP;	/* used by sendsig */
271 }
272 
273 /*
274  * syscall(frame):
275  *	System call request from POSIX system call gate interface to kernel.
276  * Like trap(), argument is call by reference.
277  */
278 /*ARGSUSED*/
279 syscall(frame)
280 	volatile struct syscframe frame;
281 {
282 	register int *locr0 = ((int *)&frame);
283 	register caddr_t params;
284 	register int i;
285 	register struct sysent *callp;
286 	register struct proc *p = curproc;
287 	struct timeval syst;
288 	int error, opc;
289 	int args[8], rval[2];
290 	unsigned int code;
291 
292 #ifdef lint
293 	r0 = 0; r0 = r0; r1 = 0; r1 = r1;
294 #endif
295 	syst = p->p_stime;
296 	if (ISPL(frame.sf_cs) != SEL_UPL)
297 		panic("syscall");
298 
299 	code = frame.sf_eax;
300 	p->p_md.md_regs = (int *)&frame;
301 	curpcb->pcb_flags &= ~FM_TRAP;	/* used by sendsig */
302 	params = (caddr_t)frame.sf_esp + sizeof (int) ;
303 
304 	/*
305 	 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always.
306 	 */
307 	opc = frame.sf_eip - 7;
308 	callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
309 	if (callp == sysent) {
310 		code = fuword(params);
311 		params += sizeof (int);
312 		callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
313 	}
314 
315 	if ((i = callp->sy_narg * sizeof (int)) &&
316 	    (error = copyin(params, (caddr_t)args, (u_int)i))) {
317 		frame.sf_eax = error;
318 		frame.sf_eflags |= PSL_C;	/* carry bit */
319 #ifdef KTRACE
320 		if (KTRPOINT(p, KTR_SYSCALL))
321 			ktrsyscall(p->p_tracep, code, callp->sy_narg, &args);
322 #endif
323 		goto done;
324 	}
325 #ifdef KTRACE
326 	if (KTRPOINT(p, KTR_SYSCALL))
327 		ktrsyscall(p->p_tracep, code, callp->sy_narg, &args);
328 #endif
329 	rval[0] = 0;
330 	rval[1] = frame.sf_edx;
331 	error = (*callp->sy_call)(p, args, rval);
332 	if (error == ERESTART)
333 		frame.sf_eip = opc;
334 	else if (error != EJUSTRETURN) {
335 		if (error) {
336 			frame.sf_eax = error;
337 			frame.sf_eflags |= PSL_C;	/* carry bit */
338 		} else {
339 			frame.sf_eax = rval[0];
340 			frame.sf_edx = rval[1];
341 			frame.sf_eflags &= ~PSL_C;	/* carry bit */
342 		}
343 	}
344 	/* else if (error == EJUSTRETURN) */
345 		/* nothing to do */
346 done:
347 	/*
348 	 * Reinitialize proc pointer `p' as it may be different
349 	 * if this is a child returning from fork syscall.
350 	 */
351 	p = curproc;
352 	while (i = CURSIG(p))
353 		psig(i);
354 	p->p_pri = p->p_usrpri;
355 	if (want_resched) {
356 		int pl;
357 		/*
358 		 * Since we are curproc, clock will normally just change
359 		 * our priority without moving us from one queue to another
360 		 * (since the running process is not on a queue.)
361 		 * If that happened after we setrq ourselves but before we
362 		 * swtch()'ed, we might not be on the queue indicated by
363 		 * our priority.
364 		 */
365 		pl = splclock();
366 		setrq(p);
367 		p->p_stats->p_ru.ru_nivcsw++;
368 		swtch();
369 		splx(pl);
370 		while (i = CURSIG(p))
371 			psig(i);
372 	}
373 	if (p->p_stats->p_prof.pr_scale) {
374 		int ticks;
375 		struct timeval *tv = &p->p_stime;
376 
377 		ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
378 			(tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
379 		if (ticks) {
380 #ifdef PROFTIMER
381 			extern int profscale;
382 			addupc(frame.sf_eip, &p->p_stats->p_prof,
383 			    ticks * profscale);
384 #else
385 			addupc(frame.sf_eip, &p->p_stats->p_prof, ticks);
386 #endif
387 		}
388 	}
389 	curpri = p->p_pri;
390 #ifdef KTRACE
391 	if (KTRPOINT(p, KTR_SYSRET))
392 		ktrsysret(p->p_tracep, code, error, rval[0]);
393 #endif
394 }
395 
396 int
397 user_page_fault (p, map, addr, ftype, type)
398 struct proc *p;
399 vm_map_t map;
400 caddr_t addr;
401 vm_prot_t ftype;
402 int type;
403 {
404 	struct vmspace *vm;
405 	vm_offset_t va;
406 	int rv;
407 	extern vm_map_t kernel_map;
408 	unsigned nss, v;
409 
410 	vm = p->p_vmspace;
411 
412 	va = trunc_page((vm_offset_t)addr);
413 
414 	/*
415 	 * XXX: rude hack to make stack limits "work"
416 	 */
417 	nss = 0;
418 	if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map) {
419 		nss = clrnd(btoc(USRSTACK - (unsigned)va));
420 		if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur))
421 			return (KERN_FAILURE);
422 	}
423 
424 	/* check if page table is mapped, if not, fault it first */
425 #define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v)
426 	if (!pde_v(va)) {
427 		v = trunc_page(vtopte(va));
428 		if ((rv = vm_fault(map, v, ftype, FALSE)) != KERN_SUCCESS)
429 			return (rv);
430 		/* check if page table fault, increment wiring */
431 		vm_map_pageable(map, v, round_page(v+1), FALSE);
432 	} else
433 		v = 0;
434 
435 	if ((rv = vm_fault(map, va, ftype, FALSE)) != KERN_SUCCESS)
436 		return (rv);
437 
438 	/*
439 	 * XXX: continuation of rude stack hack
440 	 */
441 	if (nss > vm->vm_ssize)
442 		vm->vm_ssize = nss;
443 	va = trunc_page(vtopte(va));
444 	/*
445 	 * for page table, increment wiring
446 	 * as long as not a page table fault as well
447 	 */
448 	if (!v && type != T_PAGEFLT)
449 		vm_map_pageable(map, va, round_page(va+1), FALSE);
450 	return (KERN_SUCCESS);
451 }
452 
453 int
454 user_write_fault (addr)
455 void *addr;
456 {
457 	if (user_page_fault (curproc, &curproc->p_vmspace->vm_map,
458 			     addr, VM_PROT_READ | VM_PROT_WRITE,
459 			     T_PAGEFLT) == KERN_SUCCESS)
460 		return (0);
461 	else
462 		return (EFAULT);
463 }
464 
465 int
466 copyout (from, to, len)
467 void *from;
468 void *to;
469 u_int len;
470 {
471 	u_int *pte, *pde;
472 	int rest_of_page;
473 	int thistime;
474 	int err;
475 
476 	/* be very careful not to overflow doing this check */
477 	if (to >= (void *)USRSTACK || (void *)USRSTACK - to < len)
478 		return (EFAULT);
479 
480 	pte = (u_int *)vtopte (to);
481 	pde = (u_int *)vtopte (pte);
482 
483 	rest_of_page = PAGE_SIZE - ((int)to & (PAGE_SIZE - 1));
484 
485 	while (1) {
486 		thistime = len;
487 		if (thistime > rest_of_page)
488 			thistime = rest_of_page;
489 
490 		if ((*pde & PG_V) == 0
491 		    || (*pte & (PG_V | PG_UW)) != (PG_V | PG_UW))
492 			if (err = user_write_fault (to))
493 				return (err);
494 
495 		bcopy (from, to, thistime);
496 
497 		len -= thistime;
498 
499 		/*
500 		 * Break out as soon as possible in the common case
501 		 * that the whole transfer is containted in one page.
502 		 */
503 		if (len == 0)
504 			break;
505 
506 		from += thistime;
507 		to += thistime;
508 		pte++;
509 		pde = (u_int *)vtopte (pte);
510 		rest_of_page = PAGE_SIZE;
511 	}
512 
513 	return (0);
514 }
515