xref: /original-bsd/sys/sparc/sparc/trap.c (revision 27393bdf)
1 /*
2  * Copyright (c) 1992, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This software was developed by the Computer Systems Engineering group
6  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7  * contributed to Berkeley.
8  *
9  * All advertising materials mentioning features or use of this software
10  * must display the following acknowledgement:
11  *	This product includes software developed by the University of
12  *	California, Lawrence Berkeley Laboratory.
13  *
14  * %sccs.include.redist.c%
15  *
16  *	@(#)trap.c	8.5 (Berkeley) 11/21/94
17  *
18  * from: $Header: trap.c,v 1.34 93/05/28 04:34:50 torek Exp $
19  */
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/proc.h>
24 #include <sys/user.h>
25 #include <sys/kernel.h>
26 #include <sys/malloc.h>
27 #include <sys/resource.h>
28 #include <sys/signal.h>
29 #include <sys/wait.h>
30 #include <sys/syscall.h>
31 #include <sys/syslog.h>
32 #ifdef KTRACE
33 #include <sys/ktrace.h>
34 #endif
35 
36 #include <vm/vm_kern.h>
37 
38 #include <machine/cpu.h>
39 #include <machine/ctlreg.h>
40 #include <machine/frame.h>
41 #include <machine/trap.h>
42 
43 #define	offsetof(s, f) ((int)&((s *)0)->f)
44 
45 extern int cold;
46 
47 int	rwindow_debug = 0;
48 
49 /*
50  * Initial FPU state is all registers == all 1s, everything else == all 0s.
51  * This makes every floating point register a signalling NaN, with sign bit
52  * set, no matter how it is interpreted.  Appendix N of the Sparc V8 document
53  * seems to imply that we should do this, and it does make sense.
54  */
55 struct	fpstate initfpstate = {
56 	{ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
57 	  ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }
58 };
59 
60 /*
61  * There are more than 100 trap types, but most are unused.
62  *
63  * Trap type 0 is taken over as an `Asynchronous System Trap'.
64  * This is left-over Vax emulation crap that should be fixed.
65  */
66 static const char T[] = "trap";
67 const char *trap_type[] = {
68 	/* non-user vectors */
69 	"ast",			/* 0 */
70 	"text fault",		/* 1 */
71 	"illegal instruction",	/* 2 */
72 	"privileged instruction",/*3 */
73 	"fp disabled",		/* 4 */
74 	"window overflow",	/* 5 */
75 	"window underflow",	/* 6 */
76 	"alignment fault",	/* 7 */
77 	"fp exception",		/* 8 */
78 	"data fault",		/* 9 */
79 	"tag overflow",		/* 0a */
80 	T, T, T, T, T, T,	/* 0b..10 */
81 	"level 1 int",		/* 11 */
82 	"level 2 int",		/* 12 */
83 	"level 3 int",		/* 13 */
84 	"level 4 int",		/* 14 */
85 	"level 5 int",		/* 15 */
86 	"level 6 int",		/* 16 */
87 	"level 7 int",		/* 17 */
88 	"level 8 int",		/* 18 */
89 	"level 9 int",		/* 19 */
90 	"level 10 int",		/* 1a */
91 	"level 11 int",		/* 1b */
92 	"level 12 int",		/* 1c */
93 	"level 13 int",		/* 1d */
94 	"level 14 int",		/* 1e */
95 	"level 15 int",		/* 1f */
96 	T, T, T, T, T, T, T, T,	/* 20..27 */
97 	T, T, T, T, T, T, T, T,	/* 28..2f */
98 	T, T, T, T, T, T,	/* 30..35 */
99 	"cp disabled",		/* 36 */
100 	T,			/* 37 */
101 	T, T, T, T, T, T, T, T,	/* 38..3f */
102 	"cp exception",		/* 40 */
103 	T, T, T, T, T, T, T,	/* 41..47 */
104 	T, T, T, T, T, T, T, T,	/* 48..4f */
105 	T, T, T, T, T, T, T, T,	/* 50..57 */
106 	T, T, T, T, T, T, T, T,	/* 58..5f */
107 	T, T, T, T, T, T, T, T,	/* 60..67 */
108 	T, T, T, T, T, T, T, T,	/* 68..6f */
109 	T, T, T, T, T, T, T, T,	/* 70..77 */
110 	T, T, T, T, T, T, T, T,	/* 78..7f */
111 
112 	/* user (software trap) vectors */
113 	"sun syscall",		/* 80 */
114 	"breakpoint",		/* 81 */
115 	"zero divide",		/* 82 */
116 	"flush windows",	/* 83 */
117 	"clean windows",	/* 84 */
118 	"range check",		/* 85 */
119 	"fix align",		/* 86 */
120 	"integer overflow",	/* 87 */
121 	"kgdb exec",		/* 88 */
122 	"syscall"		/* 89 */
123 };
124 
125 #define	N_TRAP_TYPES	(sizeof trap_type / sizeof *trap_type)
126 
127 /*
128  * Define the code needed before returning to user mode, for
129  * trap, mem_access_fault, and syscall.
130  */
131 static inline void
132 userret(struct proc *p, int pc, u_quad_t oticks)
133 {
134 	int sig;
135 
136 	/* take pending signals */
137 	while ((sig = CURSIG(p)) != 0)
138 		postsig(sig);
139 	p->p_priority = p->p_usrpri;
140 	if (want_ast) {
141 		want_ast = 0;
142 		if (p->p_flag & P_OWEUPC) {
143 			p->p_flag &= ~P_OWEUPC;
144 			ADDUPROF(p);
145 		}
146 	}
147 	if (want_resched) {
148 		/*
149 		 * Since we are curproc, clock will normally just change
150 		 * our priority without moving us from one queue to another
151 		 * (since the running process is not on a queue.)
152 		 * If that happened after we put ourselves on the run queue
153 		 * but before we switched, we might not be on the queue
154 		 * indicated by our priority.
155 		 */
156 		(void) splstatclock();
157 		setrunqueue(p);
158 		p->p_stats->p_ru.ru_nivcsw++;
159 		mi_switch();
160 		(void) spl0();
161 		while ((sig = CURSIG(p)) != 0)
162 			postsig(sig);
163 	}
164 
165 	/*
166 	 * If profiling, charge recent system time to the trapped pc.
167 	 */
168 	if (p->p_flag & P_PROFIL)
169 		addupc_task(p, pc, (int)(p->p_sticks - oticks));
170 
171 	curpriority = p->p_priority;
172 }
173 
174 /*
175  * If someone stole the FPU while we were away, do not enable it
176  * on return.  This is not done in userret() above as it must follow
177  * the ktrsysret() in syscall().  Actually, it is likely that the
178  * ktrsysret should occur before the call to userret.
179  */
180 static inline void share_fpu(struct proc *p, struct trapframe *tf) {
181 	if ((tf->tf_psr & PSR_EF) != 0 && fpproc != p)
182 		tf->tf_psr &= ~PSR_EF;
183 }
184 
185 /*
186  * Called from locore.s trap handling, for non-MMU-related traps.
187  * (MMU-related traps go through mem_access_fault, below.)
188  */
189 trap(type, psr, pc, tf)
190 	register unsigned type;
191 	register int psr, pc;
192 	register struct trapframe *tf;
193 {
194 	register struct proc *p;
195 	register struct pcb *pcb;
196 	register int n;
197 	u_quad_t sticks;
198 
199 	/* This steps the PC over the trap. */
200 #define	ADVANCE (n = tf->tf_npc, tf->tf_pc = n, tf->tf_npc = n + 4)
201 
202 	cnt.v_trap++;
203 	/*
204 	 * Generally, kernel traps cause a panic.  Any exceptions are
205 	 * handled early here.
206 	 */
207 	if (psr & PSR_PS) {
208 		/*
209 		 * Storing %fsr in cpu_attach will cause this trap
210 		 * even though the fpu has been enabled, if and only
211 		 * if there is no FPU.
212 		 */
213 		if (type == T_FPDISABLED && cold) {
214 			ADVANCE;
215 			return;
216 		}
217 		goto dopanic;
218 	}
219 	if ((p = curproc) == NULL)
220 		p = &proc0;
221 	sticks = p->p_sticks;
222 	pcb = &p->p_addr->u_pcb;
223 	p->p_md.md_tf = tf;	/* for ptrace/signals */
224 
225 	switch (type) {
226 
227 	default:
228 		if (type < 0x80) {
229 dopanic:
230 			printf("trap type 0x%x: pc=%x npc=%x psr=%b\n",
231 			    type, pc, tf->tf_npc, psr, PSR_BITS);
232 			panic(type < N_TRAP_TYPES ? trap_type[type] : T);
233 			/* NOTREACHED */
234 		}
235 		/* the following message is gratuitous */
236 		/* ... but leave it in until we find anything */
237 		printf("%s[%d]: unimplemented software trap 0x%x\n",
238 		    p->p_comm, p->p_pid, type);
239 		trapsignal(p, SIGILL, type);
240 		break;
241 
242 	case T_AST:
243 		break;	/* the work is all in userret() */
244 
245 	case T_ILLINST:
246 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
247 		break;
248 
249 	case T_PRIVINST:
250 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
251 		break;
252 
253 	case T_FPDISABLED: {
254 		register struct fpstate *fs = p->p_md.md_fpstate;
255 
256 		if (fs == NULL) {
257 			fs = malloc(sizeof *fs, M_SUBPROC, M_WAITOK);
258 			*fs = initfpstate;
259 			p->p_md.md_fpstate = fs;
260 		}
261 		/*
262 		 * If we have not found an FPU, we have to emulate it.
263 		 */
264 		if (!foundfpu) {
265 #ifdef notyet
266 			fpu_emulate(p, tf, fs);
267 			break;
268 #else
269 			trapsignal(p, SIGFPE, 0);	/* XXX code?? */
270 			break;
271 #endif
272 		}
273 		/*
274 		 * We may have more FPEs stored up and/or ops queued.
275 		 * If they exist, handle them and get out.  Otherwise,
276 		 * resolve the FPU state, turn it on, and try again.
277 		 */
278 		if (fs->fs_qsize) {
279 			fpu_cleanup(p, fs);
280 			break;
281 		}
282 		if (fpproc != p) {		/* we do not have it */
283 			if (fpproc != NULL)	/* someone else had it */
284 				savefpstate(fpproc->p_md.md_fpstate);
285 			loadfpstate(fs);
286 			fpproc = p;		/* now we do have it */
287 		}
288 		tf->tf_psr |= PSR_EF;
289 		break;
290 	}
291 
292 	case T_WINOF:
293 		if (rwindow_save(p))
294 			sigexit(p, SIGILL);
295 		break;
296 
297 #define read_rw(src, dst) \
298 	copyin((caddr_t)(src), (caddr_t)(dst), sizeof(struct rwindow))
299 
300 	case T_RWRET:
301 		/*
302 		 * T_RWRET is a window load needed in order to rett.
303 		 * It simply needs the window to which tf->tf_out[6]
304 		 * (%sp) points.  There are no user or saved windows now.
305 		 * Copy the one from %sp into pcb->pcb_rw[0] and set
306 		 * nsaved to -1.  If we decide to deliver a signal on
307 		 * our way out, we will clear nsaved.
308 		 */
309 if (pcb->pcb_uw || pcb->pcb_nsaved) panic("trap T_RWRET 1");
310 if (rwindow_debug)
311 printf("%s[%d]: rwindow: pcb<-stack: %x\n", p->p_comm, p->p_pid, tf->tf_out[6]);
312 		if (read_rw(tf->tf_out[6], &pcb->pcb_rw[0]))
313 			sigexit(p, SIGILL);
314 if (pcb->pcb_nsaved) panic("trap T_RWRET 2");
315 		pcb->pcb_nsaved = -1;		/* mark success */
316 		break;
317 
318 	case T_WINUF:
319 		/*
320 		 * T_WINUF is a real window underflow, from a restore
321 		 * instruction.  It needs to have the contents of two
322 		 * windows---the one belonging to the restore instruction
323 		 * itself, which is at its %sp, and the one belonging to
324 		 * the window above, which is at its %fp or %i6---both
325 		 * in the pcb.  The restore's window may still be in
326 		 * the cpu; we need to force it out to the stack.
327 		 */
328 if (rwindow_debug)
329 printf("%s[%d]: rwindow: T_WINUF 0: pcb<-stack: %x\n",
330 p->p_comm, p->p_pid, tf->tf_out[6]);
331 		write_user_windows();
332 		if (rwindow_save(p) || read_rw(tf->tf_out[6], &pcb->pcb_rw[0]))
333 			sigexit(p, SIGILL);
334 if (rwindow_debug)
335 printf("%s[%d]: rwindow: T_WINUF 1: pcb<-stack: %x\n",
336 p->p_comm, p->p_pid, pcb->pcb_rw[0].rw_in[6]);
337 		if (read_rw(pcb->pcb_rw[0].rw_in[6], &pcb->pcb_rw[1]))
338 			sigexit(p, SIGILL);
339 if (pcb->pcb_nsaved) panic("trap T_WINUF");
340 		pcb->pcb_nsaved = -1;		/* mark success */
341 		break;
342 
343 	case T_ALIGN:
344 		trapsignal(p, SIGBUS, 0);	/* XXX code?? */
345 		break;
346 
347 	case T_FPE:
348 		/*
349 		 * Clean up after a floating point exception.
350 		 * fpu_cleanup can (and usually does) modify the
351 		 * state we save here, so we must `give up' the FPU
352 		 * chip context.  (The software and hardware states
353 		 * will not match once fpu_cleanup does its job, so
354 		 * we must not save again later.)
355 		 */
356 		if (p != fpproc)
357 			panic("fpe without being the FP user");
358 		savefpstate(p->p_md.md_fpstate);
359 		fpproc = NULL;
360 		/* tf->tf_psr &= ~PSR_EF; */	/* share_fpu will do this */
361 		fpu_cleanup(p, p->p_md.md_fpstate);
362 		/* fpu_cleanup posts signals if needed */
363 #if 0		/* ??? really never??? */
364 		ADVANCE;
365 #endif
366 		break;
367 
368 	case T_TAGOF:
369 		trapsignal(p, SIGEMT, 0);	/* XXX code?? */
370 		break;
371 
372 	case T_CPDISABLED:
373 		uprintf("coprocessor instruction\n");	/* XXX */
374 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
375 		break;
376 
377 	case T_BREAKPOINT:
378 		trapsignal(p, SIGTRAP, 0);
379 		break;
380 
381 	case T_DIV0:
382 		ADVANCE;
383 		trapsignal(p, SIGFPE, FPE_INTDIV_TRAP);
384 		break;
385 
386 	case T_FLUSHWIN:
387 		write_user_windows();
388 #ifdef probably_slower_since_this_is_usually_false
389 		if (pcb->pcb_nsaved && rwindow_save(p))
390 			sigexit(p, SIGILL);
391 #endif
392 		ADVANCE;
393 		break;
394 
395 	case T_CLEANWIN:
396 		uprintf("T_CLEANWIN\n");	/* XXX */
397 		ADVANCE;
398 		break;
399 
400 	case T_RANGECHECK:
401 		uprintf("T_RANGECHECK\n");	/* XXX */
402 		ADVANCE;
403 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
404 		break;
405 
406 	case T_FIXALIGN:
407 		uprintf("T_FIXALIGN\n");	/* XXX */
408 		ADVANCE;
409 		break;
410 
411 	case T_INTOF:
412 		uprintf("T_INTOF\n");		/* XXX */
413 		ADVANCE;
414 		trapsignal(p, SIGFPE, FPE_INTOVF_TRAP);
415 		break;
416 	}
417 	userret(p, pc, sticks);
418 	share_fpu(p, tf);
419 #undef ADVANCE
420 }
421 
422 /*
423  * Save windows from PCB into user stack, and return 0.  This is used on
424  * window overflow pseudo-traps (from locore.s, just before returning to
425  * user mode) and when ptrace or sendsig needs a consistent state.
426  * As a side effect, rwindow_save() always sets pcb_nsaved to 0,
427  * clobbering the `underflow restore' indicator if it was -1.
428  *
429  * If the windows cannot be saved, pcb_nsaved is restored and we return -1.
430  */
431 int
432 rwindow_save(p)
433 	register struct proc *p;
434 {
435 	register struct pcb *pcb = &p->p_addr->u_pcb;
436 	register struct rwindow *rw = &pcb->pcb_rw[0];
437 	register int i;
438 
439 	i = pcb->pcb_nsaved;
440 	if (i < 0) {
441 		pcb->pcb_nsaved = 0;
442 		return (0);
443 	}
444 	if (i == 0)
445 		return (0);
446 if(rwindow_debug)
447 printf("%s[%d]: rwindow: pcb->stack:", p->p_comm, p->p_pid);
448 	do {
449 if(rwindow_debug)
450 printf(" %x", rw[1].rw_in[6]);
451 		if (copyout((caddr_t)rw, (caddr_t)rw[1].rw_in[6],
452 		    sizeof *rw))
453 			return (-1);
454 		rw++;
455 	} while (--i > 0);
456 if(rwindow_debug)
457 printf("\n");
458 	pcb->pcb_nsaved = 0;
459 	return (0);
460 }
461 
462 /*
463  * Kill user windows (before exec) by writing back to stack or pcb
464  * and then erasing any pcb tracks.  Otherwise we might try to write
465  * the registers into the new process after the exec.
466  */
467 kill_user_windows(p)
468 	struct proc *p;
469 {
470 
471 	write_user_windows();
472 	p->p_addr->u_pcb.pcb_nsaved = 0;
473 }
474 
475 /*
476  * Called from locore.s trap handling, for synchronous memory faults.
477  *
478  * This duplicates a lot of logic in trap() and perhaps should be
479  * moved there; but the bus-error-register parameters are unique to
480  * this routine.
481  *
482  * Since synchronous errors accumulate during prefetch, we can have
483  * more than one `cause'.  But we do not care what the cause, here;
484  * we just want to page in the page and try again.
485  */
486 mem_access_fault(type, ser, v, pc, psr, tf)
487 	register unsigned type;
488 	register int ser;
489 	register u_int v;
490 	register int pc, psr;
491 	register struct trapframe *tf;
492 {
493 	register struct proc *p;
494 	register struct vmspace *vm;
495 	register vm_offset_t va;
496 	register int i, rv, sig = SIGBUS;
497 	vm_prot_t ftype;
498 	int onfault, mmucode;
499 	u_quad_t sticks;
500 
501 	cnt.v_trap++;
502 	if ((p = curproc) == NULL)	/* safety check */
503 		p = &proc0;
504 	sticks = p->p_sticks;
505 
506 	/*
507 	 * Figure out what to pass the VM code, and ignore the sva register
508 	 * value in v on text faults (text faults are always at pc).
509 	 * Kernel faults are somewhat different: text faults are always
510 	 * illegal, and data faults are extra complex.  User faults must
511 	 * set p->p_md.md_tf, in case we decide to deliver a signal.  Check
512 	 * for illegal virtual addresses early since those can induce more
513 	 * faults.
514 	 */
515 	if (type == T_TEXTFAULT)
516 		v = pc;
517 	i = (int)v >> PG_VSHIFT;
518 	if (i != 0 && i != -1)
519 		goto fault;
520 	ftype = ser & SER_WRITE ? VM_PROT_READ|VM_PROT_WRITE : VM_PROT_READ;
521 	va = trunc_page(v);
522 	if (psr & PSR_PS) {
523 		extern char Lfsbail[];
524 		if (type == T_TEXTFAULT) {
525 			(void) splhigh();
526 			printf("text fault: pc=%x ser=%b\n", pc, ser, SER_BITS);
527 			panic("kernel fault");
528 			/* NOTREACHED */
529 		}
530 		/*
531 		 * If this was an access that we shouldn't try to page in,
532 		 * resume at the fault handler without any action.
533 		 */
534 		if (p->p_addr && p->p_addr->u_pcb.pcb_onfault == Lfsbail)
535 			goto kfault;
536 
537 		/*
538 		 * During autoconfiguration, faults are never OK unless
539 		 * pcb_onfault is set.  Once running normally we must allow
540 		 * exec() to cause copy-on-write faults to kernel addresses.
541 		 */
542 		if (cold)
543 			goto kfault;
544 		if (va >= KERNBASE) {
545 			if (vm_fault(kernel_map, va, ftype, 0) == KERN_SUCCESS)
546 				return;
547 			goto kfault;
548 		}
549 	} else
550 		p->p_md.md_tf = tf;
551 
552 	/*
553 	 * mmu_pagein returns -1 if the page is already valid, in which
554 	 * case we have a hard fault; it returns 1 if it loads a segment
555 	 * that got bumped out via LRU replacement.
556 	 */
557 	vm = p->p_vmspace;
558 	rv = mmu_pagein(&vm->vm_pmap, va, ser & SER_WRITE ? PG_V|PG_W : PG_V);
559 	if (rv < 0)
560 		goto fault;
561 	if (rv > 0)
562 		goto out;
563 
564 	/* alas! must call the horrible vm code */
565 	rv = vm_fault(&vm->vm_map, (vm_offset_t)va, ftype, FALSE);
566 
567 	/*
568 	 * If this was a stack access we keep track of the maximum
569 	 * accessed stack size.  Also, if vm_fault gets a protection
570 	 * failure it is due to accessing the stack region outside
571 	 * the current limit and we need to reflect that as an access
572 	 * error.
573 	 */
574 	if ((caddr_t)va >= vm->vm_maxsaddr) {
575 		if (rv == KERN_SUCCESS) {
576 			unsigned nss = clrnd(btoc(USRSTACK - va));
577 			if (nss > vm->vm_ssize)
578 				vm->vm_ssize = nss;
579 		} else if (rv == KERN_PROTECTION_FAILURE)
580 			rv = KERN_INVALID_ADDRESS;
581 	}
582 	if (rv == KERN_SUCCESS) {
583 		/*
584 		 * pmap_enter() does not enter all requests made from
585 		 * vm_fault into the MMU (as that causes unnecessary
586 		 * entries for `wired' pages).  Instead, we call
587 		 * mmu_pagein here to make sure the new PTE gets installed.
588 		 */
589 		(void) mmu_pagein(&vm->vm_pmap, va, 0);
590 	} else {
591 		/*
592 		 * Pagein failed.  If doing copyin/out, return to onfault
593 		 * address.  Any other page fault in kernel, die; if user
594 		 * fault, deliver SIGBUS or SIGSEGV.
595 		 */
596 		if (rv != KERN_PROTECTION_FAILURE)
597 			sig = SIGSEGV;
598 fault:
599 		if (psr & PSR_PS) {
600 kfault:
601 			onfault = p->p_addr ?
602 			    (int)p->p_addr->u_pcb.pcb_onfault : 0;
603 			if (!onfault) {
604 				(void) splhigh();
605 				printf("data fault: pc=%x addr=%x ser=%b\n",
606 				    pc, v, ser, SER_BITS);
607 				panic("kernel fault");
608 				/* NOTREACHED */
609 			}
610 			tf->tf_pc = onfault;
611 			tf->tf_npc = onfault + 4;
612 			return;
613 		}
614 		trapsignal(p, sig, (u_int)v);
615 	}
616 out:
617 	if ((psr & PSR_PS) == 0) {
618 		userret(p, pc, sticks);
619 		share_fpu(p, tf);
620 	}
621 }
622 
623 /*
624  * System calls.  `pc' is just a copy of tf->tf_pc.
625  *
626  * Note that the things labelled `out' registers in the trapframe were the
627  * `in' registers within the syscall trap code (because of the automatic
628  * `save' effect of each trap).  They are, however, the %o registers of the
629  * thing that made the system call, and are named that way here.
630  *
631  * The `suncompat' parameter actually only exists if COMPAT_SUNOS is defined.
632  */
633 syscall(code, tf, pc, suncompat)
634 	register u_int code;
635 	register struct trapframe *tf;
636 	int pc, suncompat;
637 {
638 	register int i, nsys, *ap, nap;
639 	register struct sysent *callp;
640 	register struct proc *p;
641 	int error, new;
642 	struct args {
643 		int i[8];
644 	} args;
645 	int rval[2];
646 	u_quad_t sticks;
647 	extern int nsysent;
648 	extern struct pcb *cpcb;
649 
650 	cnt.v_syscall++;
651 	p = curproc;
652 #ifdef DIAGNOSTIC
653 	if (tf->tf_psr & PSR_PS)
654 		panic("syscall");
655 	if (cpcb != &p->p_addr->u_pcb)
656 		panic("syscall cpcb/ppcb");
657 	if (tf != (struct trapframe *)((caddr_t)cpcb + UPAGES * NBPG) - 1)
658 		panic("syscall trapframe");
659 #endif
660 	sticks = p->p_sticks;
661 	p->p_md.md_tf = tf;
662 	new = code & (SYSCALL_G7RFLAG | SYSCALL_G2RFLAG);
663 	code &= ~(SYSCALL_G7RFLAG | SYSCALL_G2RFLAG);
664 #ifdef COMPAT_SUNOS
665 	if (suncompat) {
666 		extern int nsunsys;
667 		extern struct sysent sunsys[];
668 
669 		callp = sunsys, nsys = nsunsys;
670 	} else
671 #endif
672 		callp = sysent, nsys = nsysent;
673 
674 	/*
675 	 * The first six system call arguments are in the six %o registers.
676 	 * Any arguments beyond that are in the `argument extension' area
677 	 * of the user's stack frame (see <machine/frame.h>).
678 	 *
679 	 * Check for ``special'' codes that alter this, namely syscall and
680 	 * __syscall.  The latter takes a quad syscall number, so that other
681 	 * arguments are at their natural alignments.  Adjust the number
682 	 * of ``easy'' arguments as appropriate; we will copy the hard
683 	 * ones later as needed.
684 	 */
685 	ap = &tf->tf_out[0];
686 	nap = 6;
687 	switch (code) {
688 
689 	case SYS_syscall:
690 		code = *ap++;
691 		nap--;
692 		break;
693 
694 	case SYS___syscall:
695 #ifdef COMPAT_SUNOS
696 		if (suncompat)
697 			break;
698 #endif
699 		code = ap[_QUAD_LOWWORD];
700 		ap += 2;
701 		nap -= 2;
702 		break;
703 
704 	}
705 	/* Callp currently points to syscall, which returns ENOSYS. */
706 	if (code < nsys) {
707 		callp += code;
708 		i = callp->sy_narg;
709 		if (i > nap) {	/* usually false */
710 			if (i > 8)
711 				panic("syscall nargs");
712 			error = copyin((caddr_t)tf->tf_out[6] +
713 				    offsetof(struct frame, fr_argx),
714 			    (caddr_t)&args.i[nap], (i - nap) * sizeof(int));
715 			if (error) {
716 #ifdef KTRACE
717 				if (KTRPOINT(p, KTR_SYSCALL))
718 					ktrsyscall(p->p_tracep, code,
719 					    callp->sy_narg, args.i);
720 #endif
721 				goto bad;
722 			}
723 			i = nap;
724 		}
725 		copywords(ap, args.i, i * 4);
726 	}
727 #ifdef KTRACE
728 	if (KTRPOINT(p, KTR_SYSCALL))
729 		ktrsyscall(p->p_tracep, code, callp->sy_narg, args.i);
730 #endif
731 	rval[0] = 0;
732 	rval[1] = tf->tf_out[1];
733 	error = (*callp->sy_call)(p, &args, rval);
734 	if (error == 0) {
735 		/*
736 		 * If fork succeeded and we are the child, our stack
737 		 * has moved and the pointer tf is no longer valid,
738 		 * and p is wrong.  Compute the new trapframe pointer.
739 		 * (The trap frame invariably resides at the
740 		 * tippity-top of the u. area.)
741 		 */
742 		p = curproc;
743 		tf = (struct trapframe *)
744 		    ((caddr_t)p->p_addr + UPAGES * NBPG - sizeof(*tf));
745 /* this is done earlier: */
746 /*		p->p_md.md_tf = tf; */
747 		tf->tf_out[0] = rval[0];
748 		tf->tf_out[1] = rval[1];
749 		if (new) {
750 			/* jmp %g2 (or %g7, deprecated) on success */
751 			i = tf->tf_global[new & SYSCALL_G2RFLAG ? 2 : 7];
752 			if (i & 3) {
753 				error = EINVAL;
754 				goto bad;
755 			}
756 		} else {
757 			/* old system call convention: clear C on success */
758 			tf->tf_psr &= ~PSR_C;	/* success */
759 			i = tf->tf_npc;
760 		}
761 		tf->tf_pc = i;
762 		tf->tf_npc = i + 4;
763 	} else if (error > 0 /*error != ERESTART && error != EJUSTRETURN*/) {
764 bad:
765 		tf->tf_out[0] = error;
766 		tf->tf_psr |= PSR_C;	/* fail */
767 		i = tf->tf_npc;
768 		tf->tf_pc = i;
769 		tf->tf_npc = i + 4;
770 	}
771 	/* else if (error == ERESTART || error == EJUSTRETURN) */
772 		/* nothing to do */
773 	userret(p, pc, sticks);
774 #ifdef KTRACE
775 	if (KTRPOINT(p, KTR_SYSRET))
776 		ktrsysret(p->p_tracep, code, error, rval[0]);
777 #endif
778 	share_fpu(p, tf);
779 }
780