1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * the University of Utah, and William Jolitz. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)trap.c 8.4 (Berkeley) 09/23/93 11 */ 12 13 /* 14 * 386 Trap and System call handleing 15 */ 16 17 #include <machine/cpu.h> 18 #include <machine/psl.h> 19 #include <machine/reg.h> 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/proc.h> 24 #include <sys/user.h> 25 #include <sys/acct.h> 26 #include <sys/kernel.h> 27 #ifdef KTRACE 28 #include <sys/ktrace.h> 29 #endif 30 31 #include <vm/vm_param.h> 32 #include <vm/pmap.h> 33 #include <vm/vm_map.h> 34 35 #include <machine/trap.h> 36 #include <machine/dbg.h> 37 38 39 struct sysent sysent[]; 40 int nsysent; 41 unsigned rcr2(); 42 extern short cpl; 43 44 45 /* 46 * trap(frame): 47 * Exception, fault, and trap interface to BSD kernel. This 48 * common code is called from assembly language IDT gate entry 49 * routines that prepare a suitable stack frame, and restore this 50 * frame after the exception has been processed. Note that the 51 * effect is as if the arguments were passed call by reference. 52 */ 53 54 /*ARGSUSED*/ 55 trap(frame) 56 struct trapframe frame; 57 { 58 register int i; 59 register struct proc *p = curproc; 60 u_quad_t sticks; 61 int ucode, type, code, eva; 62 extern int cold; 63 64 if(cold) goto we_re_toast; 65 frame.tf_eflags &= ~PSL_NT; /* clear nested trap XXX */ 66 type = frame.tf_trapno; 67 68 if (curpcb && curpcb->pcb_onfault && frame.tf_trapno != 0xc) { 69 copyfault: frame.tf_eip = (int)curpcb->pcb_onfault; 70 return; 71 } 72 73 if (ISPL(frame.tf_cs) == SEL_UPL) { 74 type |= T_USER; 75 p->p_md.md_regs = (int *)&frame; 76 curpcb->pcb_flags |= FM_TRAP; /* used by sendsig */ 77 sticks = p->p_sticks; 78 } 79 80 ucode=0; 81 eva = rcr2(); 82 code = frame.tf_err; 83 switch (type) { 84 85 default: 86 we_re_toast: 87 #ifdef KDB 88 if (kdb_trap(&psl)) 89 return; 90 #endif 91 92 printf("trap type %d code = %x eip = %x cs = %x eflags = %x ", 93 frame.tf_trapno, frame.tf_err, frame.tf_eip, 94 frame.tf_cs, frame.tf_eflags); 95 printf("cr2 %x cpl %x\n", eva, cpl); 96 type &= ~T_USER; 97 panic("trap"); 98 /*NOTREACHED*/ 99 100 case T_SEGNPFLT|T_USER: 101 case T_STKFLT|T_USER: /* 386bsd */ 102 case T_PROTFLT|T_USER: /* protection fault */ 103 ucode = code + BUS_SEGM_FAULT ; 104 i = SIGBUS; 105 break; 106 107 case T_PRIVINFLT|T_USER: /* privileged instruction fault */ 108 case T_RESADFLT|T_USER: /* reserved addressing fault */ 109 case T_RESOPFLT|T_USER: /* reserved operand fault */ 110 case T_FPOPFLT|T_USER: /* coprocessor operand fault */ 111 ucode = type &~ T_USER; 112 i = SIGILL; 113 break; 114 115 case T_ASTFLT|T_USER: /* Allow process switch */ 116 case T_ASTFLT: 117 astoff(); 118 if ((p->p_flag & P_OWEUPC) && p->p_stats->p_prof.pr_scale) { 119 addupc(frame.tf_eip, &p->p_stats->p_prof, 1); 120 p->p_flag &= ~P_OWEUPC; 121 } 122 goto out; 123 124 case T_DNA|T_USER: 125 #include "npx.h" 126 #if NNPX > 0 127 /* if a transparent fault (due to context switch "late") */ 128 if (npxdna()) return; 129 #endif 130 ucode = FPE_FPU_NP_TRAP; 131 i = SIGFPE; 132 break; 133 134 case T_BOUND|T_USER: 135 ucode = FPE_SUBRNG_TRAP; 136 i = SIGFPE; 137 break; 138 139 case T_OFLOW|T_USER: 140 ucode = FPE_INTOVF_TRAP; 141 i = SIGFPE; 142 break; 143 144 case T_DIVIDE|T_USER: 145 ucode = FPE_INTDIV_TRAP; 146 i = SIGFPE; 147 break; 148 149 case T_ARITHTRAP|T_USER: 150 ucode = code; 151 i = SIGFPE; 152 break; 153 154 case T_PAGEFLT: /* allow page faults in kernel mode */ 155 if (code & PGEX_P) goto we_re_toast; 156 157 /* fall into */ 158 case T_PAGEFLT|T_USER: /* page fault */ 159 { 160 register vm_offset_t va; 161 register struct vmspace *vm = p->p_vmspace; 162 register vm_map_t map; 163 int rv; 164 vm_prot_t ftype; 165 extern vm_map_t kernel_map; 166 167 va = trunc_page((vm_offset_t)eva); 168 /* 169 * It is only a kernel address space fault iff: 170 * 1. (type & T_USER) == 0 and 171 * 2. pcb_onfault not set or 172 * 3. pcb_onfault set but supervisor space fault 173 * The last can occur during an exec() copyin where the 174 * argument space is lazy-allocated. 175 */ 176 if (type == T_PAGEFLT && va >= 0xfe000000) 177 map = kernel_map; 178 else 179 map = &vm->vm_map; 180 if (code & PGEX_W) 181 ftype = VM_PROT_READ | VM_PROT_WRITE; 182 else 183 ftype = VM_PROT_READ; 184 185 rv = user_page_fault(p, map, va, ftype, type); 186 187 if (rv == KERN_SUCCESS) { 188 if (type == T_PAGEFLT) 189 return; 190 goto out; 191 } 192 193 if (type == T_PAGEFLT) { 194 if (curpcb->pcb_onfault) 195 goto copyfault; 196 printf("vm_fault(%x, %x, %x, 0) -> %x\n", 197 map, va, ftype, rv); 198 printf(" type %x, code %x\n", 199 type, code); 200 goto we_re_toast; 201 } 202 i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV; 203 break; 204 } 205 206 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */ 207 frame.tf_eflags &= ~PSL_T; 208 209 /* Q: how do we turn it on again? */ 210 return; 211 212 case T_BPTFLT|T_USER: /* bpt instruction fault */ 213 case T_TRCTRAP|T_USER: /* trace trap */ 214 frame.tf_eflags &= ~PSL_T; 215 i = SIGTRAP; 216 break; 217 218 #include "isa.h" 219 #if NISA > 0 220 case T_NMI: 221 case T_NMI|T_USER: 222 /* machine/parity/power fail/"kitchen sink" faults */ 223 if(isa_nmi(code) == 0) return; 224 else goto we_re_toast; 225 #endif 226 } 227 228 trapsignal(p, i, ucode); 229 if ((type & T_USER) == 0) 230 return; 231 out: 232 while (i = CURSIG(p)) 233 postsig(i); 234 p->p_priority = p->p_usrpri; 235 if (want_resched) { 236 int pl; 237 238 /* 239 * Since we are curproc, clock will normally just change 240 * our priority without moving us from one queue to another 241 * (since the running process is not on a queue.) 242 * If that happened after we put ourselves on the run queue 243 * but before we switched, we might not be on the queue 244 * indicated by our priority. 245 */ 246 pl = splclock(); 247 setrunqueue(p); 248 p->p_stats->p_ru.ru_nivcsw++; 249 mi_switch(); 250 splx(pl); 251 while (i = CURSIG(p)) 252 postsig(i); 253 } 254 if (p->p_stats->p_prof.pr_scale) { 255 u_quad_t ticks = p->p_sticks - sticks; 256 257 if (ticks) { 258 #ifdef PROFTIMER 259 extern int profscale; 260 addupc(frame.tf_eip, &p->p_stats->p_prof, 261 ticks * profscale); 262 #else 263 addupc(frame.tf_eip, &p->p_stats->p_prof, ticks); 264 #endif 265 } 266 } 267 curpriority = p->p_priority; 268 curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ 269 } 270 271 /* 272 * syscall(frame): 273 * System call request from POSIX system call gate interface to kernel. 274 * Like trap(), argument is call by reference. 275 */ 276 /*ARGSUSED*/ 277 syscall(frame) 278 volatile struct syscframe frame; 279 { 280 register int *locr0 = ((int *)&frame); 281 register caddr_t params; 282 register int i; 283 register struct sysent *callp; 284 register struct proc *p = curproc; 285 u_quad_t sticks; 286 int error, opc; 287 int args[8], rval[2]; 288 unsigned int code; 289 290 #ifdef lint 291 r0 = 0; r0 = r0; r1 = 0; r1 = r1; 292 #endif 293 sticks = p->p_sticks; 294 if (ISPL(frame.sf_cs) != SEL_UPL) 295 panic("syscall"); 296 297 code = frame.sf_eax; 298 p->p_md.md_regs = (int *)&frame; 299 curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ 300 params = (caddr_t)frame.sf_esp + sizeof (int) ; 301 302 /* 303 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always. 304 */ 305 opc = frame.sf_eip - 7; 306 callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; 307 if (callp == sysent) { 308 code = fuword(params); 309 params += sizeof (int); 310 callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; 311 } 312 313 if ((i = callp->sy_narg * sizeof (int)) && 314 (error = copyin(params, (caddr_t)args, (u_int)i))) { 315 frame.sf_eax = error; 316 frame.sf_eflags |= PSL_C; /* carry bit */ 317 #ifdef KTRACE 318 if (KTRPOINT(p, KTR_SYSCALL)) 319 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 320 #endif 321 goto done; 322 } 323 #ifdef KTRACE 324 if (KTRPOINT(p, KTR_SYSCALL)) 325 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 326 #endif 327 rval[0] = 0; 328 rval[1] = frame.sf_edx; 329 error = (*callp->sy_call)(p, args, rval); 330 if (error == ERESTART) 331 frame.sf_eip = opc; 332 else if (error != EJUSTRETURN) { 333 if (error) { 334 frame.sf_eax = error; 335 frame.sf_eflags |= PSL_C; /* carry bit */ 336 } else { 337 frame.sf_eax = rval[0]; 338 frame.sf_edx = rval[1]; 339 frame.sf_eflags &= ~PSL_C; /* carry bit */ 340 } 341 } 342 /* else if (error == EJUSTRETURN) */ 343 /* nothing to do */ 344 done: 345 /* 346 * Reinitialize proc pointer `p' as it may be different 347 * if this is a child returning from fork syscall. 348 */ 349 p = curproc; 350 while (i = CURSIG(p)) 351 postsig(i); 352 p->p_priority = p->p_usrpri; 353 if (want_resched) { 354 int pl; 355 356 /* 357 * Since we are curproc, clock will normally just change 358 * our priority without moving us from one queue to another 359 * (since the running process is not on a queue.) 360 * If that happened after we put ourselves on the run queue 361 * but before we switched, we might not be on the queue 362 * indicated by our priority. 363 */ 364 pl = splclock(); 365 setrunqueue(p); 366 p->p_stats->p_ru.ru_nivcsw++; 367 mi_switch(); 368 splx(pl); 369 while (i = CURSIG(p)) 370 postsig(i); 371 } 372 if (p->p_stats->p_prof.pr_scale) { 373 u_quad_t ticks = p->p_sticks - sticks; 374 375 if (ticks) { 376 #ifdef PROFTIMER 377 extern int profscale; 378 addupc(frame.sf_eip, &p->p_stats->p_prof, 379 ticks * profscale); 380 #else 381 addupc(frame.sf_eip, &p->p_stats->p_prof, ticks); 382 #endif 383 } 384 } 385 curpriority = p->p_priority; 386 #ifdef KTRACE 387 if (KTRPOINT(p, KTR_SYSRET)) 388 ktrsysret(p->p_tracep, code, error, rval[0]); 389 #endif 390 } 391 392 int 393 user_page_fault (p, map, addr, ftype, type) 394 struct proc *p; 395 vm_map_t map; 396 caddr_t addr; 397 vm_prot_t ftype; 398 int type; 399 { 400 struct vmspace *vm; 401 vm_offset_t va; 402 int rv; 403 extern vm_map_t kernel_map; 404 unsigned nss, v; 405 406 vm = p->p_vmspace; 407 408 va = trunc_page((vm_offset_t)addr); 409 410 /* 411 * XXX: rude hack to make stack limits "work" 412 */ 413 nss = 0; 414 if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map) { 415 nss = clrnd(btoc(USRSTACK - (unsigned)va)); 416 if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) 417 return (KERN_FAILURE); 418 } 419 420 /* check if page table is mapped, if not, fault it first */ 421 #define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v) 422 if (!pde_v(va)) { 423 v = trunc_page(vtopte(va)); 424 if ((rv = vm_fault(map, v, ftype, FALSE)) != KERN_SUCCESS) 425 return (rv); 426 /* check if page table fault, increment wiring */ 427 vm_map_pageable(map, v, round_page(v+1), FALSE); 428 } else 429 v = 0; 430 431 if ((rv = vm_fault(map, va, ftype, FALSE)) != KERN_SUCCESS) 432 return (rv); 433 434 /* 435 * XXX: continuation of rude stack hack 436 */ 437 if (nss > vm->vm_ssize) 438 vm->vm_ssize = nss; 439 va = trunc_page(vtopte(va)); 440 /* 441 * for page table, increment wiring 442 * as long as not a page table fault as well 443 */ 444 if (!v && type != T_PAGEFLT) 445 vm_map_pageable(map, va, round_page(va+1), FALSE); 446 return (KERN_SUCCESS); 447 } 448 449 int 450 user_write_fault (addr) 451 void *addr; 452 { 453 if (user_page_fault (curproc, &curproc->p_vmspace->vm_map, 454 addr, VM_PROT_READ | VM_PROT_WRITE, 455 T_PAGEFLT) == KERN_SUCCESS) 456 return (0); 457 else 458 return (EFAULT); 459 } 460 461 int 462 copyout (from, to, len) 463 void *from; 464 void *to; 465 u_int len; 466 { 467 u_int *pte, *pde; 468 int rest_of_page; 469 int thistime; 470 int err; 471 472 /* be very careful not to overflow doing this check */ 473 if (to >= (void *)USRSTACK || (void *)USRSTACK - to < len) 474 return (EFAULT); 475 476 pte = (u_int *)vtopte (to); 477 pde = (u_int *)vtopte (pte); 478 479 rest_of_page = PAGE_SIZE - ((int)to & (PAGE_SIZE - 1)); 480 481 while (1) { 482 thistime = len; 483 if (thistime > rest_of_page) 484 thistime = rest_of_page; 485 486 if ((*pde & PG_V) == 0 487 || (*pte & (PG_V | PG_UW)) != (PG_V | PG_UW)) 488 if (err = user_write_fault (to)) 489 return (err); 490 491 bcopy (from, to, thistime); 492 493 len -= thistime; 494 495 /* 496 * Break out as soon as possible in the common case 497 * that the whole transfer is containted in one page. 498 */ 499 if (len == 0) 500 break; 501 502 from += thistime; 503 to += thistime; 504 pte++; 505 pde = (u_int *)vtopte (pte); 506 rest_of_page = PAGE_SIZE; 507 } 508 509 return (0); 510 } 511