1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * the University of Utah, and William Jolitz. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)trap.c 5.10 (Berkeley) 04/15/91 11 */ 12 13 /* 14 * 386 Trap and System call handling 15 */ 16 17 #include "machine/psl.h" 18 #include "machine/reg.h" 19 #include "machine/segments.h" 20 #include "machine/frame.h" 21 22 #include "param.h" 23 #include "systm.h" 24 #include "user.h" 25 #include "proc.h" 26 #include "seg.h" 27 #include "acct.h" 28 #include "kernel.h" 29 #ifdef KTRACE 30 #include "ktrace.h" 31 #endif 32 33 #include "vm/vm_param.h" 34 #include "vm/pmap.h" 35 #include "vm/vm_map.h" 36 #include "sys/vmmeter.h" 37 38 #include "machine/trap.h" 39 #include "machine/dbg.h" 40 41 #define USER 0x40 /* user-mode flag added to type */ 42 #define FRMTRAP 0x100 /* distinguish trap from syscall */ 43 44 struct sysent sysent[]; 45 int nsysent; 46 /* 47 * trap(frame): 48 * Exception, fault, and trap interface to BSD kernel. This 49 * common code is called from assembly language IDT gate entry 50 * routines that prepare a suitable stack frame, and restore this 51 * frame after the exception has been processed. Note that the 52 * effect is as if the arguments were passed call by reference. 53 */ 54 unsigned rcr2(), Sysbase; 55 extern short cpl; 56 int um; 57 /*ARGSUSED*/ 58 trap(frame) 59 struct trapframe frame; 60 #define type frame.tf_trapno 61 #define code frame.tf_err 62 #define pc frame.tf_eip 63 { 64 register int *locr0 = ((int *)&frame); 65 register int i; 66 register struct proc *p; 67 struct timeval syst; 68 extern int nofault; 69 int ucode; 70 71 #define DEBUG 72 #ifdef DEBUG 73 dprintf(DALLTRAPS, "\n%d. trap",u.u_procp->p_pid); 74 dprintf(DALLTRAPS, " pc:%x cs:%x ds:%x eflags:%x isp %x\n", 75 frame.tf_eip, frame.tf_cs, frame.tf_ds, frame.tf_eflags, 76 frame.tf_isp); 77 dprintf(DALLTRAPS, "edi %x esi %x ebp %x ebx %x esp %x\n", 78 frame.tf_edi, frame.tf_esi, frame.tf_ebp, 79 frame.tf_ebx, frame.tf_esp); 80 dprintf(DALLTRAPS, "edx %x ecx %x eax %x\n", 81 frame.tf_edx, frame.tf_ecx, frame.tf_eax); 82 /*p=u.u_procp; 83 dprintf(DALLTRAPS, "sig %x %x %x \n", 84 p->p_sigignore, p->p_sigcatch, p->p_sigmask); */ 85 dprintf(DALLTRAPS, " ec %x type %x cpl %x ", 86 frame.tf_err&0xffff, frame.tf_trapno, cpl); 87 /*pg("trap cr2 %x", rcr2());*/ 88 #endif 89 90 /*if(um && frame.tf_trapno == 0xc && (rcr2()&0xfffff000) == 0){ 91 if (ISPL(locr0[tCS]) != SEL_UPL) { 92 if(nofault) goto anyways; 93 locr0[tEFLAGS] |= PSL_T; 94 *(int *)PTmap |= 1; load_cr3(rcr3()); 95 return; 96 } 97 } else if (um) { 98 printf("p %x ", *(int *) PTmap); 99 *(int *)PTmap &= 0xfffffffe; load_cr3(rcr3()); 100 printf("p %x ", *(int *) PTmap); 101 } 102 anyways: 103 104 if(pc == 0) um++;*/ 105 106 locr0[tEFLAGS] &= ~PSL_NT; /* clear nested trap XXX */ 107 if(nofault && frame.tf_trapno != 0xc) 108 { locr0[tEIP] = nofault; return;} 109 110 syst = u.u_ru.ru_stime; 111 if (ISPL(locr0[tCS]) == SEL_UPL) { 112 type |= USER; 113 u.u_ar0 = locr0; 114 } 115 ucode=0; 116 switch (type) { 117 118 default: 119 bit_sucker: 120 #ifdef KDB 121 if (kdb_trap(&psl)) 122 return; 123 #endif 124 125 splhigh(); 126 printf("cr2 %x cpl %x ", rcr2(), cpl); 127 printf("trap type %d, code = %x, pc = %x cs = %x, eflags = %x\n", type, code, pc, frame.tf_cs, frame.tf_eflags); 128 type &= ~USER; 129 pg("panic"); 130 panic("trap"); 131 /*NOTREACHED*/ 132 133 case T_SEGNPFLT + USER: 134 case T_PROTFLT + USER: /* protection fault */ 135 copyfault: 136 ucode = code + BUS_SEGM_FAULT ; 137 i = SIGBUS; 138 break; 139 140 case T_PRIVINFLT + USER: /* privileged instruction fault */ 141 case T_RESADFLT + USER: /* reserved addressing fault */ 142 case T_RESOPFLT + USER: /* reserved operand fault */ 143 case T_FPOPFLT + USER: /* coprocessor operand fault */ 144 ucode = type &~ USER; 145 i = SIGILL; 146 break; 147 148 case T_ASTFLT + USER: /* Allow process switch */ 149 case T_ASTFLT: 150 astoff(); 151 if ((u.u_procp->p_flag & SOWEUPC) && u.u_prof.pr_scale) { 152 addupc(pc, &u.u_prof, 1); 153 u.u_procp->p_flag &= ~SOWEUPC; 154 } 155 goto out; 156 157 case T_DNA + USER: 158 #ifdef NPX 159 if (npxdna()) return; 160 #endif 161 ucode = FPE_FPU_NP_TRAP; 162 i = SIGFPE; 163 break; 164 165 case T_BOUND + USER: 166 ucode = FPE_SUBRNG_TRAP; 167 i = SIGFPE; 168 break; 169 170 case T_OFLOW + USER: 171 ucode = FPE_INTOVF_TRAP; 172 i = SIGFPE; 173 break; 174 175 case T_DIVIDE + USER: 176 ucode = FPE_INTDIV_TRAP; 177 i = SIGFPE; 178 break; 179 180 case T_ARITHTRAP + USER: 181 ucode = code; 182 i = SIGFPE; 183 break; 184 185 case T_PAGEFLT: /* allow page faults in kernel mode */ 186 if (code & PGEX_P) goto bit_sucker; 187 /* fall into */ 188 case T_PAGEFLT + USER: /* page fault */ 189 { 190 register vm_offset_t va; 191 register vm_map_t map; 192 int rv; 193 vm_prot_t ftype; 194 extern vm_map_t kernel_map; 195 unsigned nss,v; 196 197 va = trunc_page((vm_offset_t)rcr2()); 198 /* 199 * It is only a kernel address space fault iff: 200 * 1. (type & USER) == 0 and 201 * 2. nofault not set or 202 * 3. nofault set but supervisor space data fault 203 * The last can occur during an exec() copyin where the 204 * argument space is lazy-allocated. 205 */ 206 /*if (type == T_PAGEFLT && !nofault)*/ 207 if (type == T_PAGEFLT && va >= 0xfe000000) 208 map = kernel_map; 209 else 210 map = u.u_procp->p_map; 211 if (code & PGEX_W) 212 ftype = VM_PROT_READ | VM_PROT_WRITE; 213 else 214 ftype = VM_PROT_READ; 215 216 #ifdef DEBUG 217 if (map == kernel_map && va == 0) { 218 printf("trap: bad kernel access at %x\n", v); 219 goto bit_sucker; 220 } 221 #endif 222 /* 223 * XXX: rude hack to make stack limits "work" 224 */ 225 nss = 0; 226 if ((caddr_t)va >= u.u_maxsaddr && map != kernel_map) { 227 nss = clrnd(btoc(USRSTACK-(unsigned)va)); 228 if (nss > btoc(u.u_rlimit[RLIMIT_STACK].rlim_cur)) { 229 pg("stk fuck"); 230 rv = KERN_FAILURE; 231 goto nogo; 232 } 233 } 234 /* check if page table is mapped, if not, fault it first */ 235 if (!PTD[(va>>PD_SHIFT)&1023].pd_v) { 236 v = trunc_page(vtopte(va)); 237 /*pg("pt fault");*/ 238 rv = vm_fault(map, v, ftype, FALSE); 239 if (rv != KERN_SUCCESS) goto nogo; 240 /* check if page table fault, increment wiring */ 241 vm_map_pageable(map, v, round_page(v+1), FALSE); 242 } else v=0; 243 rv = vm_fault(map, va, ftype, FALSE); 244 if (rv == KERN_SUCCESS) { 245 /* 246 * XXX: continuation of rude stack hack 247 */ 248 if (nss > u.u_ssize) 249 u.u_ssize = nss; 250 va = trunc_page(vtopte(va)); 251 /* for page table, increment wiring 252 as long as not a page table fault as well */ 253 if (!v && type != T_PAGEFLT) 254 vm_map_pageable(map, va, round_page(va+1), FALSE); 255 if (type == T_PAGEFLT) 256 return; 257 goto out; 258 } 259 nogo: 260 /*pg("nogo");*/ 261 if (type == T_PAGEFLT) { 262 if (nofault) 263 goto copyfault; 264 printf("vm_fault(%x, %x, %x, 0) -> %x\n", 265 map, va, ftype, rv); 266 printf(" type %x, code %x\n", 267 type, code); 268 goto bit_sucker; 269 } 270 i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV; 271 break; 272 } 273 274 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */ 275 locr0[tEFLAGS] &= ~PSL_T; 276 if (um) {*(int *)PTmap &= 0xfffffffe; load_cr3(rcr3()); } 277 278 /* Q: how do we turn it on again? */ 279 return; 280 281 case T_BPTFLT + USER: /* bpt instruction fault */ 282 case T_TRCTRAP + USER: /* trace trap */ 283 locr0[tEFLAGS] &= ~PSL_T; 284 i = SIGTRAP; 285 break; 286 287 #include "isa.h" 288 #if NISA > 0 289 case T_NMI: 290 case T_NMI + USER: 291 if(isa_nmi(code) == 0) return; 292 else goto bit_sucker; 293 #endif 294 } 295 /*if(u.u_procp && (u.u_procp->p_pid == 1 || u.u_procp->p_pid == 3)) { 296 if( *(u_char *) 0xf7c != 0xc7) { 297 printf("%x!", *(u_char *) 0xf7c); 298 *(u_char *) 0xf7c = 0xc7; 299 } 300 }*/ 301 trapsignal(i, ucode|FRMTRAP); 302 if ((type & USER) == 0) 303 return; 304 out: 305 p = u.u_procp; 306 if (i = CURSIG(p)) 307 psig(i,FRMTRAP); 308 p->p_pri = p->p_usrpri; 309 if (runrun) { 310 /* 311 * Since we are u.u_procp, clock will normally just change 312 * our priority without moving us from one queue to another 313 * (since the running process is not on a queue.) 314 * If that happened after we setrq ourselves but before we 315 * swtch()'ed, we might not be on the queue indicated by 316 * our priority. 317 */ 318 (void) splclock(); 319 setrq(p); 320 u.u_ru.ru_nivcsw++; 321 swtch(); 322 if (i = CURSIG(p)) 323 psig(i,FRMTRAP); 324 } 325 if (u.u_prof.pr_scale) { 326 int ticks; 327 struct timeval *tv = &u.u_ru.ru_stime; 328 329 ticks = ((tv->tv_sec - syst.tv_sec) * 1000 + 330 (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000); 331 if (ticks) 332 addupc(pc, &u.u_prof, ticks); 333 } 334 curpri = p->p_pri; 335 /*if(u.u_procp->p_pid == 3) 336 locr0[tEFLAGS] |= PSL_T; 337 if(u.u_procp->p_pid == 1 && (pc == 0xec9 || pc == 0xebd)) 338 locr0[tEFLAGS] |= PSL_T;*/ 339 spl0(); /*XXX*/ 340 #undef type 341 #undef code 342 #undef pc 343 } 344 345 /* 346 * syscall(frame): 347 * System call request from POSIX system call gate interface to kernel. 348 * Like trap(), argument is call by reference. 349 */ 350 /*ARGSUSED*/ 351 syscall(frame) 352 struct syscframe frame; 353 /*#define code frame.sf_eax /* note: written over! */ 354 #define pc frame.sf_eip 355 { 356 register int *locr0 = ((int *)&frame); 357 register caddr_t params; 358 register int i; 359 register struct sysent *callp; 360 register struct proc *p; 361 struct timeval syst; 362 int error, opc; 363 int args[8], rval[2]; 364 int code; 365 366 #ifdef lint 367 r0 = 0; r0 = r0; r1 = 0; r1 = r1; 368 #endif 369 syst = u.u_ru.ru_stime; 370 p = u.u_procp; 371 if (ISPL(locr0[sCS]) != SEL_UPL) 372 { 373 printf("\npc:%x cs:%x eflags:%x\n", 374 frame.sf_eip, frame.sf_cs, frame.sf_eflags); 375 printf("edi %x esi %x ebp %x ebx %x esp %x\n", 376 frame.sf_edi, frame.sf_esi, frame.sf_ebp, 377 frame.sf_ebx, frame.sf_esp); 378 printf("edx %x ecx %x eax %x\n", frame.sf_edx, frame.sf_ecx, frame.sf_eax); 379 printf("cr0 %x cr2 %x cpl %x \n", rcr0(), rcr2(), cpl); 380 panic("syscall"); 381 } 382 if (um) {*(int *)PTmap &= 0xfffffffe; load_cr3(rcr3()); } 383 /*if(u.u_procp && (u.u_procp->p_pid == 1 || u.u_procp->p_pid == 3)) { 384 if( *(u_char *) 0xf7c != 0xc7) { 385 printf("%x!", *(u_char *) 0xf7c); 386 *(u_char *) 0xf7c = 0xc7; 387 } 388 }*/ 389 u.u_ar0 = locr0; 390 params = (caddr_t)locr0[sESP] + NBPW ; 391 code = frame.sf_eax; 392 393 /* 394 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always. 395 */ 396 opc = pc - 7; 397 callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; 398 if (callp == sysent) { 399 i = fuword(params); 400 params += NBPW; 401 callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; 402 } 403 dprintf(DALLSYSC,"%d. call %d ", p->p_pid, code); 404 if ((i = callp->sy_narg * sizeof (int)) && 405 (error = copyin(params, (caddr_t)args, (u_int)i))) { 406 locr0[sEAX] = /*(u_char)*/ error; 407 locr0[sEFLAGS] |= PSL_C; /* carry bit */ 408 #ifdef KTRACEx 409 if (KTRPOINT(p, KTR_SYSCALL)) 410 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 411 #endif 412 goto done; 413 } 414 #ifdef KTRACEx 415 if (KTRPOINT(p, KTR_SYSCALL)) 416 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 417 #endif 418 rval[0] = 0; 419 rval[1] = locr0[sEDX]; 420 error = (*callp->sy_call)(p, args, rval); 421 if (error == ERESTART) 422 pc = opc; 423 else if (error != EJUSTRETURN) { 424 if (error) { 425 locr0[sEAX] = error; 426 locr0[sEFLAGS] |= PSL_C; /* carry bit */ 427 } else { 428 locr0[sEAX] = rval[0]; 429 locr0[sEDX] = rval[1]; 430 locr0[sEFLAGS] &= ~PSL_C; /* carry bit */ 431 } 432 } 433 /* else if (error == EJUSTRETURN) */ 434 /* nothing to do */ 435 done: 436 /* 437 * Reinitialize proc pointer `p' as it may be different 438 * if this is a child returning from fork syscall. 439 */ 440 p = u.u_procp; 441 /* 442 * XXX the check for sigreturn ensures that we don't 443 * attempt to set up a call to a signal handler (sendsig) before 444 * we have cleaned up the stack from the last call (sigreturn). 445 * Allowing this seems to lock up the machine in certain scenarios. 446 * What should really be done is to clean up the signal handling 447 * so that this is not a problem. 448 */ 449 #include "syscall.h" 450 if (code != SYS_sigreturn && (i = CURSIG(p))) 451 psig(i,0); 452 p->p_pri = p->p_usrpri; 453 if (runrun) { 454 /* 455 * Since we are u.u_procp, clock will normally just change 456 * our priority without moving us from one queue to another 457 * (since the running process is not on a queue.) 458 * If that happened after we setrq ourselves but before we 459 * swtch()'ed, we might not be on the queue indicated by 460 * our priority. 461 */ 462 (void) splclock(); 463 setrq(p); 464 u.u_ru.ru_nivcsw++; 465 swtch(); 466 if (code != SYS_sigreturn && (i = CURSIG(p))) 467 psig(i,0); 468 } 469 if (u.u_prof.pr_scale) { 470 int ticks; 471 struct timeval *tv = &u.u_ru.ru_stime; 472 473 ticks = ((tv->tv_sec - syst.tv_sec) * 1000 + 474 (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000); 475 if (ticks) { 476 #ifdef PROFTIMER 477 extern int profscale; 478 addupc(pc, &u.u_prof, ticks * profscale); 479 #else 480 addupc(pc, &u.u_prof, ticks); 481 #endif 482 } 483 } 484 curpri = p->p_pri; 485 #ifdef KTRACEx 486 if (KTRPOINT(p, KTR_SYSRET)) 487 ktrsysret(p->p_tracep, code, error, rval[0]); 488 #endif 489 } 490 491 #ifdef notdef 492 /* 493 * nonexistent system call-- signal process (may want to handle it) 494 * flag error if process won't see signal immediately 495 * Q: should we do that all the time ?? 496 */ 497 nosys() 498 { 499 500 if (u.u_signal[SIGSYS] == SIG_IGN || u.u_signal[SIGSYS] == SIG_HOLD) 501 u.u_error = EINVAL; 502 psignal(u.u_procp, SIGSYS); 503 } 504 #endif 505 506 /* 507 * Ignored system call 508 */ 509 nullsys() 510 { 511 512 } 513