1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * the University of Utah, and William Jolitz. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)trap.c 8.1 (Berkeley) 06/11/93 11 */ 12 13 /* 14 * 386 Trap and System call handleing 15 */ 16 17 #include <machine/cpu.h> 18 #include <machine/psl.h> 19 #include <machine/reg.h> 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/proc.h> 24 #include <sys/user.h> 25 #include <sys/acct.h> 26 #include <sys/kernel.h> 27 #ifdef KTRACE 28 #include <sys/ktrace.h> 29 #endif 30 31 #include <vm/vm_param.h> 32 #include <vm/pmap.h> 33 #include <vm/vm_map.h> 34 35 #include <machine/trap.h> 36 #include <machine/dbg.h> 37 38 39 struct sysent sysent[]; 40 int nsysent; 41 unsigned rcr2(); 42 extern short cpl; 43 44 45 /* 46 * trap(frame): 47 * Exception, fault, and trap interface to BSD kernel. This 48 * common code is called from assembly language IDT gate entry 49 * routines that prepare a suitable stack frame, and restore this 50 * frame after the exception has been processed. Note that the 51 * effect is as if the arguments were passed call by reference. 52 */ 53 54 /*ARGSUSED*/ 55 trap(frame) 56 struct trapframe frame; 57 { 58 register int i; 59 register struct proc *p = curproc; 60 u_quad_t sticks; 61 int ucode, type, code, eva; 62 extern int cold; 63 64 if(cold) goto we_re_toast; 65 frame.tf_eflags &= ~PSL_NT; /* clear nested trap XXX */ 66 type = frame.tf_trapno; 67 68 if (curpcb && curpcb->pcb_onfault && frame.tf_trapno != 0xc) { 69 copyfault: frame.tf_eip = (int)curpcb->pcb_onfault; 70 return; 71 } 72 73 if (ISPL(frame.tf_cs) == SEL_UPL) { 74 type |= T_USER; 75 p->p_md.md_regs = (int *)&frame; 76 curpcb->pcb_flags |= FM_TRAP; /* used by sendsig */ 77 sticks = p->p_sticks; 78 } 79 80 ucode=0; 81 eva = rcr2(); 82 code = frame.tf_err; 83 switch (type) { 84 85 default: 86 we_re_toast: 87 #ifdef KDB 88 if (kdb_trap(&psl)) 89 return; 90 #endif 91 92 printf("trap type %d code = %x eip = %x cs = %x eflags = %x ", 93 frame.tf_trapno, frame.tf_err, frame.tf_eip, 94 frame.tf_cs, frame.tf_eflags); 95 printf("cr2 %x cpl %x\n", eva, cpl); 96 type &= ~T_USER; 97 panic("trap"); 98 /*NOTREACHED*/ 99 100 case T_SEGNPFLT|T_USER: 101 case T_STKFLT|T_USER: /* 386bsd */ 102 case T_PROTFLT|T_USER: /* protection fault */ 103 ucode = code + BUS_SEGM_FAULT ; 104 i = SIGBUS; 105 break; 106 107 case T_PRIVINFLT|T_USER: /* privileged instruction fault */ 108 case T_RESADFLT|T_USER: /* reserved addressing fault */ 109 case T_RESOPFLT|T_USER: /* reserved operand fault */ 110 case T_FPOPFLT|T_USER: /* coprocessor operand fault */ 111 ucode = type &~ T_USER; 112 i = SIGILL; 113 break; 114 115 case T_ASTFLT|T_USER: /* Allow process switch */ 116 case T_ASTFLT: 117 astoff(); 118 if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) { 119 addupc(frame.tf_eip, &p->p_stats->p_prof, 1); 120 p->p_flag &= ~SOWEUPC; 121 } 122 goto out; 123 124 case T_DNA|T_USER: 125 #include "npx.h" 126 #if NNPX > 0 127 /* if a transparent fault (due to context switch "late") */ 128 if (npxdna()) return; 129 #endif 130 ucode = FPE_FPU_NP_TRAP; 131 i = SIGFPE; 132 break; 133 134 case T_BOUND|T_USER: 135 ucode = FPE_SUBRNG_TRAP; 136 i = SIGFPE; 137 break; 138 139 case T_OFLOW|T_USER: 140 ucode = FPE_INTOVF_TRAP; 141 i = SIGFPE; 142 break; 143 144 case T_DIVIDE|T_USER: 145 ucode = FPE_INTDIV_TRAP; 146 i = SIGFPE; 147 break; 148 149 case T_ARITHTRAP|T_USER: 150 ucode = code; 151 i = SIGFPE; 152 break; 153 154 case T_PAGEFLT: /* allow page faults in kernel mode */ 155 if (code & PGEX_P) goto we_re_toast; 156 157 /* fall into */ 158 case T_PAGEFLT|T_USER: /* page fault */ 159 { 160 register vm_offset_t va; 161 register struct vmspace *vm = p->p_vmspace; 162 register vm_map_t map; 163 int rv; 164 vm_prot_t ftype; 165 extern vm_map_t kernel_map; 166 167 va = trunc_page((vm_offset_t)eva); 168 /* 169 * It is only a kernel address space fault iff: 170 * 1. (type & T_USER) == 0 and 171 * 2. pcb_onfault not set or 172 * 3. pcb_onfault set but supervisor space fault 173 * The last can occur during an exec() copyin where the 174 * argument space is lazy-allocated. 175 */ 176 if (type == T_PAGEFLT && va >= 0xfe000000) 177 map = kernel_map; 178 else 179 map = &vm->vm_map; 180 if (code & PGEX_W) 181 ftype = VM_PROT_READ | VM_PROT_WRITE; 182 else 183 ftype = VM_PROT_READ; 184 185 rv = user_page_fault(p, map, va, ftype, type); 186 187 if (rv == KERN_SUCCESS) { 188 if (type == T_PAGEFLT) 189 return; 190 goto out; 191 } 192 193 if (type == T_PAGEFLT) { 194 if (curpcb->pcb_onfault) 195 goto copyfault; 196 printf("vm_fault(%x, %x, %x, 0) -> %x\n", 197 map, va, ftype, rv); 198 printf(" type %x, code %x\n", 199 type, code); 200 goto we_re_toast; 201 } 202 i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV; 203 break; 204 } 205 206 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */ 207 frame.tf_eflags &= ~PSL_T; 208 209 /* Q: how do we turn it on again? */ 210 return; 211 212 case T_BPTFLT|T_USER: /* bpt instruction fault */ 213 case T_TRCTRAP|T_USER: /* trace trap */ 214 frame.tf_eflags &= ~PSL_T; 215 i = SIGTRAP; 216 break; 217 218 #include "isa.h" 219 #if NISA > 0 220 case T_NMI: 221 case T_NMI|T_USER: 222 /* machine/parity/power fail/"kitchen sink" faults */ 223 if(isa_nmi(code) == 0) return; 224 else goto we_re_toast; 225 #endif 226 } 227 228 trapsignal(p, i, ucode); 229 if ((type & T_USER) == 0) 230 return; 231 out: 232 while (i = CURSIG(p)) 233 psig(i); 234 p->p_pri = p->p_usrpri; 235 if (want_resched) { 236 int pl; 237 /* 238 * Since we are curproc, clock will normally just change 239 * our priority without moving us from one queue to another 240 * (since the running process is not on a queue.) 241 * If that happened after we setrq ourselves but before we 242 * swtch()'ed, we might not be on the queue indicated by 243 * our priority. 244 */ 245 pl = splclock(); 246 setrq(p); 247 p->p_stats->p_ru.ru_nivcsw++; 248 swtch(); 249 splx(pl); 250 while (i = CURSIG(p)) 251 psig(i); 252 } 253 if (p->p_stats->p_prof.pr_scale) { 254 u_quad_t ticks = p->p_sticks - sticks; 255 256 if (ticks) { 257 #ifdef PROFTIMER 258 extern int profscale; 259 addupc(frame.tf_eip, &p->p_stats->p_prof, 260 ticks * profscale); 261 #else 262 addupc(frame.tf_eip, &p->p_stats->p_prof, ticks); 263 #endif 264 } 265 } 266 curpri = p->p_pri; 267 curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ 268 } 269 270 /* 271 * syscall(frame): 272 * System call request from POSIX system call gate interface to kernel. 273 * Like trap(), argument is call by reference. 274 */ 275 /*ARGSUSED*/ 276 syscall(frame) 277 volatile struct syscframe frame; 278 { 279 register int *locr0 = ((int *)&frame); 280 register caddr_t params; 281 register int i; 282 register struct sysent *callp; 283 register struct proc *p = curproc; 284 u_quad_t sticks; 285 int error, opc; 286 int args[8], rval[2]; 287 unsigned int code; 288 289 #ifdef lint 290 r0 = 0; r0 = r0; r1 = 0; r1 = r1; 291 #endif 292 sticks = p->p_sticks; 293 if (ISPL(frame.sf_cs) != SEL_UPL) 294 panic("syscall"); 295 296 code = frame.sf_eax; 297 p->p_md.md_regs = (int *)&frame; 298 curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ 299 params = (caddr_t)frame.sf_esp + sizeof (int) ; 300 301 /* 302 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always. 303 */ 304 opc = frame.sf_eip - 7; 305 callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; 306 if (callp == sysent) { 307 code = fuword(params); 308 params += sizeof (int); 309 callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; 310 } 311 312 if ((i = callp->sy_narg * sizeof (int)) && 313 (error = copyin(params, (caddr_t)args, (u_int)i))) { 314 frame.sf_eax = error; 315 frame.sf_eflags |= PSL_C; /* carry bit */ 316 #ifdef KTRACE 317 if (KTRPOINT(p, KTR_SYSCALL)) 318 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 319 #endif 320 goto done; 321 } 322 #ifdef KTRACE 323 if (KTRPOINT(p, KTR_SYSCALL)) 324 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 325 #endif 326 rval[0] = 0; 327 rval[1] = frame.sf_edx; 328 error = (*callp->sy_call)(p, args, rval); 329 if (error == ERESTART) 330 frame.sf_eip = opc; 331 else if (error != EJUSTRETURN) { 332 if (error) { 333 frame.sf_eax = error; 334 frame.sf_eflags |= PSL_C; /* carry bit */ 335 } else { 336 frame.sf_eax = rval[0]; 337 frame.sf_edx = rval[1]; 338 frame.sf_eflags &= ~PSL_C; /* carry bit */ 339 } 340 } 341 /* else if (error == EJUSTRETURN) */ 342 /* nothing to do */ 343 done: 344 /* 345 * Reinitialize proc pointer `p' as it may be different 346 * if this is a child returning from fork syscall. 347 */ 348 p = curproc; 349 while (i = CURSIG(p)) 350 psig(i); 351 p->p_pri = p->p_usrpri; 352 if (want_resched) { 353 int pl; 354 /* 355 * Since we are curproc, clock will normally just change 356 * our priority without moving us from one queue to another 357 * (since the running process is not on a queue.) 358 * If that happened after we setrq ourselves but before we 359 * swtch()'ed, we might not be on the queue indicated by 360 * our priority. 361 */ 362 pl = splclock(); 363 setrq(p); 364 p->p_stats->p_ru.ru_nivcsw++; 365 swtch(); 366 splx(pl); 367 while (i = CURSIG(p)) 368 psig(i); 369 } 370 if (p->p_stats->p_prof.pr_scale) { 371 u_quad_t ticks = p->p_sticks - sticks; 372 373 if (ticks) { 374 #ifdef PROFTIMER 375 extern int profscale; 376 addupc(frame.sf_eip, &p->p_stats->p_prof, 377 ticks * profscale); 378 #else 379 addupc(frame.sf_eip, &p->p_stats->p_prof, ticks); 380 #endif 381 } 382 } 383 curpri = p->p_pri; 384 #ifdef KTRACE 385 if (KTRPOINT(p, KTR_SYSRET)) 386 ktrsysret(p->p_tracep, code, error, rval[0]); 387 #endif 388 } 389 390 int 391 user_page_fault (p, map, addr, ftype, type) 392 struct proc *p; 393 vm_map_t map; 394 caddr_t addr; 395 vm_prot_t ftype; 396 int type; 397 { 398 struct vmspace *vm; 399 vm_offset_t va; 400 int rv; 401 extern vm_map_t kernel_map; 402 unsigned nss, v; 403 404 vm = p->p_vmspace; 405 406 va = trunc_page((vm_offset_t)addr); 407 408 /* 409 * XXX: rude hack to make stack limits "work" 410 */ 411 nss = 0; 412 if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map) { 413 nss = clrnd(btoc(USRSTACK - (unsigned)va)); 414 if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) 415 return (KERN_FAILURE); 416 } 417 418 /* check if page table is mapped, if not, fault it first */ 419 #define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v) 420 if (!pde_v(va)) { 421 v = trunc_page(vtopte(va)); 422 if ((rv = vm_fault(map, v, ftype, FALSE)) != KERN_SUCCESS) 423 return (rv); 424 /* check if page table fault, increment wiring */ 425 vm_map_pageable(map, v, round_page(v+1), FALSE); 426 } else 427 v = 0; 428 429 if ((rv = vm_fault(map, va, ftype, FALSE)) != KERN_SUCCESS) 430 return (rv); 431 432 /* 433 * XXX: continuation of rude stack hack 434 */ 435 if (nss > vm->vm_ssize) 436 vm->vm_ssize = nss; 437 va = trunc_page(vtopte(va)); 438 /* 439 * for page table, increment wiring 440 * as long as not a page table fault as well 441 */ 442 if (!v && type != T_PAGEFLT) 443 vm_map_pageable(map, va, round_page(va+1), FALSE); 444 return (KERN_SUCCESS); 445 } 446 447 int 448 user_write_fault (addr) 449 void *addr; 450 { 451 if (user_page_fault (curproc, &curproc->p_vmspace->vm_map, 452 addr, VM_PROT_READ | VM_PROT_WRITE, 453 T_PAGEFLT) == KERN_SUCCESS) 454 return (0); 455 else 456 return (EFAULT); 457 } 458 459 int 460 copyout (from, to, len) 461 void *from; 462 void *to; 463 u_int len; 464 { 465 u_int *pte, *pde; 466 int rest_of_page; 467 int thistime; 468 int err; 469 470 /* be very careful not to overflow doing this check */ 471 if (to >= (void *)USRSTACK || (void *)USRSTACK - to < len) 472 return (EFAULT); 473 474 pte = (u_int *)vtopte (to); 475 pde = (u_int *)vtopte (pte); 476 477 rest_of_page = PAGE_SIZE - ((int)to & (PAGE_SIZE - 1)); 478 479 while (1) { 480 thistime = len; 481 if (thistime > rest_of_page) 482 thistime = rest_of_page; 483 484 if ((*pde & PG_V) == 0 485 || (*pte & (PG_V | PG_UW)) != (PG_V | PG_UW)) 486 if (err = user_write_fault (to)) 487 return (err); 488 489 bcopy (from, to, thistime); 490 491 len -= thistime; 492 493 /* 494 * Break out as soon as possible in the common case 495 * that the whole transfer is containted in one page. 496 */ 497 if (len == 0) 498 break; 499 500 from += thistime; 501 to += thistime; 502 pte++; 503 pde = (u_int *)vtopte (pte); 504 rest_of_page = PAGE_SIZE; 505 } 506 507 return (0); 508 } 509