xref: /original-bsd/sys/sparc/sparc/trap.c (revision 47436896)
1 /*
2  * Copyright (c) 1992 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This software was developed by the Computer Systems Engineering group
6  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7  * contributed to Berkeley.
8  *
9  * All advertising materials mentioning features or use of this software
10  * must display the following acknowledgement:
11  *	This product includes software developed by the University of
12  *	California, Lawrence Berkeley Laboratories.
13  *
14  * %sccs.include.redist.c%
15  *
16  *	@(#)trap.c	7.2 (Berkeley) 07/21/92
17  *
18  * from: $Header: trap.c,v 1.31 92/07/11 11:08:57 torek Exp $
19  */
20 
21 #include "param.h"
22 #include "systm.h"
23 #include "proc.h"
24 #include "user.h"
25 #include "kernel.h"
26 #include "malloc.h"
27 #include "resource.h"
28 #include "signal.h"
29 #include "wait.h"
30 #include "syscall.h"
31 #include "syslog.h"
32 #ifdef KTRACE
33 #include "ktrace.h"
34 #endif
35 
36 #include "vm/vm_kern.h"
37 
38 #include "machine/cpu.h"
39 #include "machine/frame.h"
40 #include "machine/trap.h"
41 
42 #include "ctlreg.h"
43 
44 #define	offsetof(s, f) ((int)&((s *)0)->f)
45 
46 extern int cold;
47 
48 int	rwindow_debug = 0;
49 
50 /*
51  * Initial FPU state is all registers == all 1s, everything else == all 0s.
52  * This makes every floating point register a signalling NaN, with sign bit
53  * set, no matter how it is interpreted.  Appendix N of the Sparc V8 document
54  * seems to imply that we should do this, and it does make sense.
55  */
56 struct	fpstate initfpstate = {
57 	~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
58 	~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
59 };
60 
61 /*
62  * There are more than 100 trap types, but most are unused.
63  *
64  * Trap type 0 is taken over as an `Asynchronous System Trap'.
65  * This is left-over Vax emulation crap that should be fixed.
66  */
67 static const char T[] = "trap";
68 const char *trap_type[] = {
69 	/* non-user vectors */
70 	"ast",			/* 0 */
71 	"text fault",		/* 1 */
72 	"illegal instruction",	/* 2 */
73 	"privileged instruction",/*3 */
74 	"fp disabled",		/* 4 */
75 	"window overflow",	/* 5 */
76 	"window underflow",	/* 6 */
77 	"alignment fault",	/* 7 */
78 	"fp exception",		/* 8 */
79 	"data fault",		/* 9 */
80 	"tag overflow",		/* 0a */
81 	T, T, T, T, T, T,	/* 0b..10 */
82 	"level 1 int",		/* 11 */
83 	"level 2 int",		/* 12 */
84 	"level 3 int",		/* 13 */
85 	"level 4 int",		/* 14 */
86 	"level 5 int",		/* 15 */
87 	"level 6 int",		/* 16 */
88 	"level 7 int",		/* 17 */
89 	"level 8 int",		/* 18 */
90 	"level 9 int",		/* 19 */
91 	"level 10 int",		/* 1a */
92 	"level 11 int",		/* 1b */
93 	"level 12 int",		/* 1c */
94 	"level 13 int",		/* 1d */
95 	"level 14 int",		/* 1e */
96 	"level 15 int",		/* 1f */
97 	T, T, T, T, T, T, T, T,	/* 20..27 */
98 	T, T, T, T, T, T, T, T,	/* 28..2f */
99 	T, T, T, T, T, T,	/* 30..35 */
100 	"cp disabled",		/* 36 */
101 	T,			/* 37 */
102 	T, T, T, T, T, T, T, T,	/* 38..3f */
103 	"cp exception",		/* 40 */
104 	T, T, T, T, T, T, T,	/* 41..47 */
105 	T, T, T, T, T, T, T, T,	/* 48..4f */
106 	T, T, T, T, T, T, T, T,	/* 50..57 */
107 	T, T, T, T, T, T, T, T,	/* 58..5f */
108 	T, T, T, T, T, T, T, T,	/* 60..67 */
109 	T, T, T, T, T, T, T, T,	/* 68..6f */
110 	T, T, T, T, T, T, T, T,	/* 70..77 */
111 	T, T, T, T, T, T, T, T,	/* 78..7f */
112 
113 	/* user (software trap) vectors */
114 	"sun syscall",		/* 80 */
115 	"breakpoint",		/* 81 */
116 	"zero divide",		/* 82 */
117 	"flush windows",	/* 83 */
118 	"clean windows",	/* 84 */
119 	"range check",		/* 85 */
120 	"fix align",		/* 86 */
121 	"integer overflow",	/* 87 */
122 	"kgdb exec",		/* 88 */
123 	"syscall"		/* 89 */
124 };
125 
126 #define	N_TRAP_TYPES	(sizeof trap_type / sizeof *trap_type)
127 
128 /*
129  * Define the code needed before returning to user mode, for
130  * trap, mem_access_fault, and syscall.
131  */
132 static inline void
133 userret(struct proc *p, int pc, u_quad_t oticks)
134 {
135 	int sig;
136 
137 	/* take pending signals */
138 	while ((sig = CURSIG(p)) != 0)
139 		psig(sig);
140 	p->p_pri = p->p_usrpri;
141 	if (want_ast) {
142 		want_ast = 0;
143 		if (p->p_flag & SOWEUPC) {
144 			p->p_flag &= ~SOWEUPC;
145 			ADDUPROF(p);
146 		}
147 	}
148 	if (want_resched) {
149 		/*
150 		 * Since we are curproc, a clock interrupt could
151 		 * change our priority without changing run queues
152 		 * (the running process is not kept on a run queue).
153 		 * If this happened after we setrq ourselves but
154 		 * before we swtch()'ed, we might not be on the queue
155 		 * indicated by our priority.
156 		 */
157 		(void) splstatclock();
158 		setrq(p);
159 		p->p_stats->p_ru.ru_nivcsw++;
160 		swtch();
161 		(void) spl0();
162 		while ((sig = CURSIG(p)) != 0)
163 			psig(sig);
164 	}
165 
166 	/*
167 	 * If profiling, charge recent system time to the trapped pc.
168 	 */
169 	if (p->p_flag & SPROFIL)
170 		addupc_task(p, pc, (int)(p->p_sticks - oticks));
171 
172 	curpri = p->p_pri;
173 }
174 
175 /*
176  * If someone stole the FPU while we were away, do not enable it
177  * on return.  This is not done in userret() above as it must follow
178  * the ktrsysret() in syscall().  Actually, it is likely that the
179  * ktrsysret should occur before the call to userret.
180  */
181 static inline void share_fpu(struct proc *p, struct trapframe *tf) {
182 	if ((tf->tf_psr & PSR_EF) != 0 && fpproc != p)
183 		tf->tf_psr &= ~PSR_EF;
184 }
185 
186 /*
187  * Called from locore.s trap handling, for non-MMU-related traps.
188  * (MMU-related traps go through mem_access_fault, below.)
189  */
190 trap(type, psr, pc, tf)
191 	register unsigned type;
192 	register int psr, pc;
193 	register struct trapframe *tf;
194 {
195 	register struct proc *p;
196 	register struct pcb *pcb;
197 	register int n;
198 	u_quad_t sticks;
199 
200 	/* This steps the PC over the trap. */
201 #define	ADVANCE (n = tf->tf_npc, tf->tf_pc = n, tf->tf_npc = n + 4)
202 
203 	cnt.v_trap++;
204 	/*
205 	 * Generally, kernel traps cause a panic.  Any exceptions are
206 	 * handled early here.
207 	 */
208 	if (psr & PSR_PS) {
209 		/*
210 		 * Storing %fsr in cpu_attach will cause this trap
211 		 * even though the fpu has been enabled, if and only
212 		 * if there is no FPU.
213 		 */
214 		if (type == T_FPDISABLED && cold) {
215 			ADVANCE;
216 			return;
217 		}
218 		goto dopanic;
219 	}
220 	if ((p = curproc) == NULL)
221 		p = &proc0;
222 	sticks = p->p_sticks;
223 	pcb = &p->p_addr->u_pcb;
224 	p->p_md.md_tf = tf;	/* for ptrace/signals */
225 
226 	switch (type) {
227 
228 	default:
229 		if (type < 0x80) {
230 dopanic:
231 			printf("trap type 0x%x: pc=%x npc=%x psr=%b\n",
232 			    type, pc, tf->tf_npc, psr, PSR_BITS);
233 			panic(type < N_TRAP_TYPES ? trap_type[type] : T);
234 			/* NOTREACHED */
235 		}
236 		/* the following message is gratuitous */
237 		/* ... but leave it in until we find anything */
238 		printf("%s[%d]: unimplemented software trap 0x%x\n",
239 		    p->p_comm, p->p_pid, type);
240 		trapsignal(p, SIGILL, type);
241 		break;
242 
243 	case T_AST:
244 		break;	/* the work is all in userret() */
245 
246 	case T_ILLINST:
247 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
248 		break;
249 
250 	case T_PRIVINST:
251 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
252 		break;
253 
254 	case T_FPDISABLED: {
255 		register struct fpstate *fs = p->p_md.md_fpstate;
256 
257 		if (fs == NULL) {
258 			fs = malloc(sizeof *fs, M_SUBPROC, M_WAITOK);
259 			*fs = initfpstate;
260 			p->p_md.md_fpstate = fs;
261 		}
262 		/*
263 		 * If we have not found an FPU, we have to emulate it.
264 		 */
265 		if (!foundfpu) {
266 #ifdef notyet
267 			fpu_emulate(p, tf, fs);
268 			break;
269 #else
270 			trapsignal(p, SIGFPE, 0);	/* XXX code?? */
271 			break;
272 #endif
273 		}
274 		/*
275 		 * We may have more FPEs stored up and/or ops queued.
276 		 * If they exist, handle them and get out.  Otherwise,
277 		 * resolve the FPU state, turn it on, and try again.
278 		 */
279 		if (fs->fs_qsize) {
280 			fpu_cleanup(p, fs);
281 			break;
282 		}
283 		if (fpproc != p) {		/* we do not have it */
284 			if (fpproc != NULL)	/* someone else had it */
285 				savefpstate(fpproc->p_md.md_fpstate);
286 			loadfpstate(fs);
287 			fpproc = p;		/* now we do have it */
288 		}
289 		tf->tf_psr |= PSR_EF;
290 		break;
291 	}
292 
293 	case T_WINOF:
294 		if (rwindow_save(p))
295 			sigexit(p, SIGILL);
296 		break;
297 
298 #define read_rw(src, dst) \
299 	copyin((caddr_t)(src), (caddr_t)(dst), sizeof(struct rwindow))
300 
301 	case T_RWRET:
302 		/*
303 		 * T_RWRET is a window load needed in order to rett.
304 		 * It simply needs the window to which tf->tf_out[6]
305 		 * (%sp) points.  There are no user or saved windows now.
306 		 * Copy the one from %sp into pcb->pcb_rw[0] and set
307 		 * nsaved to -1.  If we decide to deliver a signal on
308 		 * our way out, we will clear nsaved.
309 		 */
310 if (pcb->pcb_uw || pcb->pcb_nsaved) panic("trap T_RWRET 1");
311 if (rwindow_debug)
312 printf("%s[%d]: rwindow: pcb<-stack: %x\n", p->p_comm, p->p_pid, tf->tf_out[6]);
313 		if (read_rw(tf->tf_out[6], &pcb->pcb_rw[0]))
314 			sigexit(p, SIGILL);
315 if (pcb->pcb_nsaved) panic("trap T_RWRET 2");
316 		pcb->pcb_nsaved = -1;		/* mark success */
317 		break;
318 
319 	case T_WINUF:
320 		/*
321 		 * T_WINUF is a real window underflow, from a restore
322 		 * instruction.  It needs to have the contents of two
323 		 * windows---the one belonging to the restore instruction
324 		 * itself, which is at its %sp, and the one belonging to
325 		 * the window above, which is at its %fp or %i6---both
326 		 * in the pcb.  The restore's window may still be in
327 		 * the cpu; we need to force it out to the stack.
328 		 */
329 if (rwindow_debug)
330 printf("%s[%d]: rwindow: T_WINUF 0: pcb<-stack: %x\n",
331 p->p_comm, p->p_pid, tf->tf_out[6]);
332 		write_user_windows();
333 		if (rwindow_save(p) || read_rw(tf->tf_out[6], &pcb->pcb_rw[0]))
334 			sigexit(p, SIGILL);
335 if (rwindow_debug)
336 printf("%s[%d]: rwindow: T_WINUF 1: pcb<-stack: %x\n",
337 p->p_comm, p->p_pid, pcb->pcb_rw[0].rw_in[6]);
338 		if (read_rw(pcb->pcb_rw[0].rw_in[6], &pcb->pcb_rw[1]))
339 			sigexit(p, SIGILL);
340 if (pcb->pcb_nsaved) panic("trap T_WINUF");
341 		pcb->pcb_nsaved = -1;		/* mark success */
342 		break;
343 
344 	case T_ALIGN:
345 		trapsignal(p, SIGBUS, 0);	/* XXX code?? */
346 		break;
347 
348 	case T_FPE:
349 		/*
350 		 * Clean up after a floating point exception.
351 		 * fpu_cleanup can (and usually does) modify the
352 		 * state we save here, so we must `give up' the FPU
353 		 * chip context.  (The software and hardware states
354 		 * will not match once fpu_cleanup does its job, so
355 		 * we must not save again later.)
356 		 */
357 		if (p != fpproc)
358 			panic("fpe without being the FP user");
359 		savefpstate(p->p_md.md_fpstate);
360 		fpproc = NULL;
361 		/* tf->tf_psr &= ~PSR_EF; */	/* share_fpu will do this */
362 		fpu_cleanup(p, p->p_md.md_fpstate);
363 		/* fpu_cleanup posts signals if needed */
364 #if 0		/* ??? really never??? */
365 		ADVANCE;
366 #endif
367 		break;
368 
369 	case T_TAGOF:
370 		trapsignal(p, SIGEMT, 0);	/* XXX code?? */
371 		break;
372 
373 	case T_CPDISABLED:
374 		uprintf("coprocessor instruction\n");	/* XXX */
375 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
376 		break;
377 
378 	case T_BREAKPOINT:
379 		trapsignal(p, SIGTRAP, 0);
380 		break;
381 
382 	case T_DIV0:
383 		ADVANCE;
384 		trapsignal(p, SIGFPE, FPE_INTDIV_TRAP);
385 		break;
386 
387 	case T_FLUSHWIN:
388 		write_user_windows();
389 #ifdef probably_slower_since_this_is_usually_false
390 		if (pcb->pcb_nsaved && rwindow_save(p))
391 			sigexit(p, SIGILL);
392 #endif
393 		ADVANCE;
394 		break;
395 
396 	case T_CLEANWIN:
397 		uprintf("T_CLEANWIN\n");	/* XXX */
398 		ADVANCE;
399 		break;
400 
401 	case T_RANGECHECK:
402 		uprintf("T_RANGECHECK\n");	/* XXX */
403 		ADVANCE;
404 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
405 		break;
406 
407 	case T_FIXALIGN:
408 		uprintf("T_FIXALIGN\n");	/* XXX */
409 		ADVANCE;
410 		break;
411 
412 	case T_INTOF:
413 		uprintf("T_INTOF\n");		/* XXX */
414 		ADVANCE;
415 		trapsignal(p, SIGFPE, FPE_INTOVF_TRAP);
416 		break;
417 	}
418 	userret(p, pc, sticks);
419 	share_fpu(p, tf);
420 #undef ADVANCE
421 }
422 
423 /*
424  * Save windows from PCB into user stack, and return 0.  This is used on
425  * window overflow pseudo-traps (from locore.s, just before returning to
426  * user mode) and when ptrace or sendsig needs a consistent state.
427  * As a side effect, rwindow_save() always sets pcb_nsaved to 0,
428  * clobbering the `underflow restore' indicator if it was -1.
429  *
430  * If the windows cannot be saved, pcb_nsaved is restored and we return -1.
431  */
432 int
433 rwindow_save(p)
434 	register struct proc *p;
435 {
436 	register struct pcb *pcb = &p->p_addr->u_pcb;
437 	register struct rwindow *rw = &pcb->pcb_rw[0];
438 	register int i;
439 
440 	i = pcb->pcb_nsaved;
441 	if (i < 0) {
442 		pcb->pcb_nsaved = 0;
443 		return (0);
444 	}
445 	if (i == 0)
446 		return (0);
447 if(rwindow_debug)
448 printf("%s[%d]: rwindow: pcb->stack:", p->p_comm, p->p_pid);
449 	do {
450 if(rwindow_debug)
451 printf(" %x", rw[1].rw_in[6]);
452 		if (copyout((caddr_t)rw, (caddr_t)rw[1].rw_in[6],
453 		    sizeof *rw))
454 			return (-1);
455 		rw++;
456 	} while (--i > 0);
457 if(rwindow_debug)
458 printf("\n");
459 	pcb->pcb_nsaved = 0;
460 	return (0);
461 }
462 
463 /*
464  * Kill user windows (before exec) by writing back to stack or pcb
465  * and then erasing any pcb tracks.  Otherwise we might try to write
466  * the registers into the new process after the exec.
467  */
468 kill_user_windows(p)
469 	struct proc *p;
470 {
471 
472 	write_user_windows();
473 	p->p_addr->u_pcb.pcb_nsaved = 0;
474 }
475 
476 /*
477  * Called from locore.s trap handling, for synchronous memory faults.
478  *
479  * This duplicates a lot of logic in trap() and perhaps should be
480  * moved there; but the bus-error-register parameters are unique to
481  * this routine.
482  *
483  * Since synchronous errors accumulate during prefetch, we can have
484  * more than one `cause'.  But we do not care what the cause, here;
485  * we just want to page in the page and try again.
486  */
487 mem_access_fault(type, ser, v, pc, psr, tf)
488 	register unsigned type;
489 	register int ser;
490 	register u_int v;
491 	register int pc, psr;
492 	register struct trapframe *tf;
493 {
494 	register struct proc *p;
495 	register struct vmspace *vm;
496 	register vm_offset_t va;
497 	register int i, rv, sig = SIGBUS;
498 	vm_prot_t ftype;
499 	int onfault, mmucode;
500 	u_quad_t sticks;
501 
502 	cnt.v_trap++;
503 	if ((p = curproc) == NULL)	/* safety check */
504 		p = &proc0;
505 	sticks = p->p_sticks;
506 
507 	/*
508 	 * Figure out what to pass the VM code, and ignore the sva register
509 	 * value in v on text faults (text faults are always at pc).
510 	 * Kernel faults are somewhat different: text faults are always
511 	 * illegal, and data faults are extra complex.  User faults must
512 	 * set p->p_md.md_tf, in case we decide to deliver a signal.  Check
513 	 * for illegal virtual addresses early since those can induce more
514 	 * faults.
515 	 */
516 	if (type == T_TEXTFAULT)
517 		v = pc;
518 	i = (int)v >> PG_VSHIFT;
519 	if (i != 0 && i != -1)
520 		goto fault;
521 	ftype = ser & SER_WRITE ? VM_PROT_READ|VM_PROT_WRITE : VM_PROT_READ;
522 	va = trunc_page(v);
523 	if (psr & PSR_PS) {
524 		extern char Lfsbail[];
525 		if (type == T_TEXTFAULT) {
526 			(void) splhigh();
527 			printf("text fault: pc=%x ser=%b\n", pc, ser, SER_BITS);
528 			panic("kernel fault");
529 			/* NOTREACHED */
530 		}
531 		/*
532 		 * If this was an access that we shouldn't try to page in,
533 		 * resume at the fault handler without any action.
534 		 */
535 		if (p->p_addr && p->p_addr->u_pcb.pcb_onfault == Lfsbail)
536 			goto kfault;
537 
538 		/*
539 		 * During autoconfiguration, faults are never OK unless
540 		 * pcb_onfault is set.  Once running normally we must allow
541 		 * exec() to cause copy-on-write faults to kernel addresses.
542 		 */
543 		if (cold)
544 			goto kfault;
545 		if (va >= KERNBASE) {
546 			if (vm_fault(kernel_map, va, ftype, 0) == KERN_SUCCESS)
547 				return;
548 			goto kfault;
549 		}
550 	} else
551 		p->p_md.md_tf = tf;
552 
553 	/*
554 	 * mmu_pagein returns -1 if the page is already valid, in which
555 	 * case we have a hard fault; it returns 1 if it loads a segment
556 	 * that got bumped out via LRU replacement.
557 	 */
558 	vm = p->p_vmspace;
559 	rv = mmu_pagein(&vm->vm_pmap, va, ser & SER_WRITE ? PG_V|PG_W : PG_V);
560 	if (rv < 0)
561 		goto fault;
562 	if (rv > 0)
563 		goto out;
564 
565 	/* alas! must call the horrible vm code */
566 	rv = vm_fault(&vm->vm_map, (vm_offset_t)va, ftype, FALSE);
567 
568 	/*
569 	 * If this was a stack access we keep track of the maximum
570 	 * accessed stack size.  Also, if vm_fault gets a protection
571 	 * failure it is due to accessing the stack region outside
572 	 * the current limit and we need to reflect that as an access
573 	 * error.
574 	 */
575 	if ((caddr_t)va >= vm->vm_maxsaddr) {
576 		if (rv == KERN_SUCCESS) {
577 			unsigned nss = clrnd(btoc(USRSTACK - va));
578 			if (nss > vm->vm_ssize)
579 				vm->vm_ssize = nss;
580 		} else if (rv == KERN_PROTECTION_FAILURE)
581 			rv = KERN_INVALID_ADDRESS;
582 	}
583 	if (rv == KERN_SUCCESS) {
584 		/*
585 		 * pmap_enter() does not enter all requests made from
586 		 * vm_fault into the MMU (as that causes unnecessary
587 		 * entries for `wired' pages).  Instead, we call
588 		 * mmu_pagein here to make sure the new PTE gets installed.
589 		 */
590 		(void) mmu_pagein(&vm->vm_pmap, va, 0);
591 	} else {
592 		/*
593 		 * Pagein failed.  If doing copyin/out, return to onfault
594 		 * address.  Any other page fault in kernel, die; if user
595 		 * fault, deliver SIGBUS or SIGSEGV.
596 		 */
597 		if (rv != KERN_PROTECTION_FAILURE)
598 			sig = SIGSEGV;
599 fault:
600 		if (psr & PSR_PS) {
601 kfault:
602 			onfault = p->p_addr ?
603 			    (int)p->p_addr->u_pcb.pcb_onfault : 0;
604 			if (!onfault) {
605 				(void) splhigh();
606 				printf("data fault: pc=%x addr=%x ser=%b\n",
607 				    pc, v, ser, SER_BITS);
608 				panic("kernel fault");
609 				/* NOTREACHED */
610 			}
611 			tf->tf_pc = onfault;
612 			tf->tf_npc = onfault + 4;
613 			return;
614 		}
615 		trapsignal(p, sig, (u_int)v);
616 	}
617 out:
618 	if ((psr & PSR_PS) == 0) {
619 		userret(p, pc, sticks);
620 		share_fpu(p, tf);
621 	}
622 }
623 
624 /*
625  * System calls.  `pc' is just a copy of tf->tf_pc.
626  *
627  * Note that the things labelled `out' registers in the trapframe were the
628  * `in' registers within the syscall trap code (because of the automatic
629  * `save' effect of each trap).  They are, however, the %o registers of the
630  * thing that made the system call, and are named that way here.
631  *
632  * The `suncompat' parameter actually only exists if COMPAT_SUNOS is defined.
633  */
634 syscall(code, tf, pc, suncompat)
635 	register u_int code;
636 	register struct trapframe *tf;
637 	int pc, suncompat;
638 {
639 	register int i, nsys, *ap, nap;
640 	register struct sysent *callp;
641 	register struct proc *p;
642 	int error, new;
643 	struct args {
644 		int i[8];
645 	} args;
646 	int rval[2];
647 	u_quad_t sticks;
648 	extern int nsysent;
649 	extern struct pcb *cpcb;
650 
651 	cnt.v_syscall++;
652 	p = curproc;
653 #ifdef DIAGNOSTIC
654 	if (tf->tf_psr & PSR_PS)
655 		panic("syscall");
656 	if (cpcb != &p->p_addr->u_pcb)
657 		panic("syscall cpcb/ppcb");
658 	if (tf != (struct trapframe *)((caddr_t)cpcb + UPAGES * NBPG) - 1)
659 		panic("syscall trapframe");
660 #endif
661 	sticks = p->p_sticks;
662 	p->p_md.md_tf = tf;
663 	new = code & (SYSCALL_G7RFLAG | SYSCALL_G2RFLAG);
664 	code &= ~(SYSCALL_G7RFLAG | SYSCALL_G2RFLAG);
665 #ifdef COMPAT_SUNOS
666 	if (suncompat) {
667 		extern int nsunsys;
668 		extern struct sysent sunsys[];
669 
670 		callp = sunsys, nsys = nsunsys;
671 	} else
672 #endif
673 		callp = sysent, nsys = nsysent;
674 
675 	/*
676 	 * The first six system call arguments are in the six %o registers.
677 	 * Any arguments beyond that are in the `argument extension' area
678 	 * of the user's stack frame (see <machine/frame.h>).
679 	 *
680 	 * Check for ``special'' codes that alter this, namely indir and
681 	 * __indir.  The latter takes a quad syscall number, so that other
682 	 * arguments are at their natural alignments.  Adjust the number
683 	 * of ``easy'' arguments as appropriate; we will copy the hard
684 	 * ones later as needed.
685 	 */
686 	ap = &tf->tf_out[0];
687 	nap = 6;
688 	switch (code) {
689 
690 	case SYS_indir:
691 		code = *ap++;
692 		nap--;
693 		break;
694 
695 	case SYS___indir:
696 #ifdef COMPAT_SUNOS
697 		if (suncompat)
698 			break;
699 #endif
700 		code = ap[_QUAD_LOWWORD];
701 		ap += 2;
702 		nap -= 2;
703 		break;
704 
705 	}
706 	/* Callp currently points to indir, which returns ENOSYS. */
707 	if (code < nsys) {
708 		callp += code;
709 		i = callp->sy_narg;
710 		if (i > nap) {	/* usually false */
711 			if (i > 8)
712 				panic("syscall nargs");
713 			error = copyin((caddr_t)tf->tf_out[6] +
714 				    offsetof(struct frame, fr_argx),
715 			    (caddr_t)&args.i[nap], (i - nap) * sizeof(int));
716 			if (error) {
717 #ifdef KTRACE
718 				if (KTRPOINT(p, KTR_SYSCALL))
719 					ktrsyscall(p->p_tracep, code,
720 					    callp->sy_narg, args.i);
721 #endif
722 				goto bad;
723 			}
724 			i = nap;
725 		}
726 		copywords(ap, args.i, i * 4);
727 	}
728 	rval[0] = 0;
729 	rval[1] = tf->tf_out[1];
730 	error = (*callp->sy_call)(p, &args, rval);
731 	if (error == 0) {
732 		/*
733 		 * If fork succeeded and we are the child, our stack
734 		 * has moved and the pointer tf is no longer valid,
735 		 * and p is wrong.  Compute the new trapframe pointer.
736 		 * (The trap frame invariably resides at the
737 		 * tippity-top of the u. area.)
738 		 */
739 		p = curproc;
740 		tf = (struct trapframe *)
741 		    ((caddr_t)p->p_addr + UPAGES * NBPG - sizeof(*tf));
742 /* this is done earlier: */
743 /*		p->p_md.md_tf = tf; */
744 		tf->tf_out[0] = rval[0];
745 		tf->tf_out[1] = rval[1];
746 		if (new) {
747 			/* jmp %g2 (or %g7, deprecated) on success */
748 			i = tf->tf_global[new & SYSCALL_G2RFLAG ? 2 : 7];
749 			if (i & 3) {
750 				error = EINVAL;
751 				goto bad;
752 			}
753 		} else {
754 			/* old system call convention: clear C on success */
755 			tf->tf_psr &= ~PSR_C;	/* success */
756 			i = tf->tf_npc;
757 		}
758 		tf->tf_pc = i;
759 		tf->tf_npc = i + 4;
760 	} else if (error > 0 /*error != ERESTART && error != EJUSTRETURN*/) {
761 bad:
762 		tf->tf_out[0] = error;
763 		tf->tf_psr |= PSR_C;	/* fail */
764 		i = tf->tf_npc;
765 		tf->tf_pc = i;
766 		tf->tf_npc = i + 4;
767 	}
768 	/* else if (error == ERESTART || error == EJUSTRETURN) */
769 		/* nothing to do */
770 	userret(p, pc, sticks);
771 #ifdef KTRACE
772 	if (KTRPOINT(p, KTR_SYSRET))
773 		ktrsysret(p->p_tracep, code, error, rval[0]);
774 #endif
775 	share_fpu(p, tf);
776 }
777