xref: /netbsd/sys/arch/sparc64/sparc64/trap.c (revision c4a72b64)
1 /*	$NetBSD: trap.c,v 1.85 2002/11/16 07:40:40 uebayasi Exp $ */
2 
3 /*
4  * Copyright (c) 1996-2002 Eduardo Horvath.  All rights reserved.
5  * Copyright (c) 1996
6  *	The President and Fellows of Harvard College. All rights reserved.
7  * Copyright (c) 1992, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * This software was developed by the Computer Systems Engineering group
11  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
12  * contributed to Berkeley.
13  *
14  * All advertising materials mentioning features or use of this software
15  * must display the following acknowledgement:
16  *	This product includes software developed by the University of
17  *	California, Lawrence Berkeley Laboratory.
18  *	This product includes software developed by Harvard University.
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  * 1. Redistributions of source code must retain the above copyright
24  *    notice, this list of conditions and the following disclaimer.
25  * 2. Redistributions in binary form must reproduce the above copyright
26  *    notice, this list of conditions and the following disclaimer in the
27  *    documentation and/or other materials provided with the distribution.
28  * 3. All advertising materials mentioning features or use of this software
29  *    must display the following acknowledgement:
30  *	This product includes software developed by the University of
31  *	California, Berkeley and its contributors.
32  *	This product includes software developed by Harvard University.
33  * 4. Neither the name of the University nor the names of its contributors
34  *    may be used to endorse or promote products derived from this software
35  *    without specific prior written permission.
36  *
37  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
38  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
41  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  *
49  *	@(#)trap.c	8.4 (Berkeley) 9/23/93
50  */
51 
52 #define NEW_FPSTATE
53 
54 #include "opt_ddb.h"
55 #include "opt_syscall_debug.h"
56 #include "opt_ktrace.h"
57 #include "opt_systrace.h"
58 #include "opt_compat_svr4.h"
59 #include "opt_compat_netbsd32.h"
60 
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/proc.h>
64 #include <sys/user.h>
65 #include <sys/kernel.h>
66 #include <sys/malloc.h>
67 #include <sys/resource.h>
68 #include <sys/signal.h>
69 #include <sys/wait.h>
70 #include <sys/syscall.h>
71 #include <sys/syslog.h>
72 #ifdef KTRACE
73 #include <sys/ktrace.h>
74 #endif
75 #ifdef SYSTRACE
76 #include <sys/systrace.h>
77 #endif
78 
79 #include <uvm/uvm_extern.h>
80 
81 #include <machine/cpu.h>
82 #include <machine/ctlreg.h>
83 #include <machine/trap.h>
84 #include <machine/instr.h>
85 #include <machine/pmap.h>
86 
87 #ifdef DDB
88 #include <machine/db_machdep.h>
89 #else
90 #include <machine/frame.h>
91 #endif
92 #ifdef COMPAT_SVR4
93 #include <machine/svr4_machdep.h>
94 #endif
95 #ifdef COMPAT_SVR4_32
96 #include <machine/svr4_32_machdep.h>
97 #endif
98 
99 #include <sparc/fpu/fpu_extern.h>
100 #include <sparc64/sparc64/cache.h>
101 
102 #ifndef offsetof
103 #define	offsetof(s, f) ((size_t)&((s *)0)->f)
104 #endif
105 
106 #ifdef DEBUG
107 /* What trap level are we running? */
108 #define tl() ({ \
109 	int l; \
110 	__asm __volatile("rdpr %%tl, %0" : "=r" (l) :); \
111 	l; \
112 })
113 #endif
114 
115 /* trapstats */
116 int trapstats = 0;
117 int protfix = 0;
118 int udmiss = 0;	/* Number of normal/nucleus data/text miss/protection faults */
119 int udhit = 0;
120 int udprot = 0;
121 int utmiss = 0;
122 int kdmiss = 0;
123 int kdhit = 0;
124 int kdprot = 0;
125 int ktmiss = 0;
126 int iveccnt = 0; /* number if normal/nucleus interrupt/interrupt vector faults */
127 int uintrcnt = 0;
128 int kiveccnt = 0;
129 int kintrcnt = 0;
130 int intristk = 0; /* interrupts when already on intrstack */
131 int intrpoll = 0; /* interrupts not using vector lists */
132 int wfill = 0;
133 int kwfill = 0;
134 int wspill = 0;
135 int wspillskip = 0;
136 int rftucnt = 0;
137 int rftuld = 0;
138 int rftudone = 0;
139 int rftkcnt[5] = { 0, 0, 0, 0, 0 };
140 
141 #ifdef DEBUG
142 #define RW_64		0x1
143 #define RW_ERR		0x2
144 #define RW_FOLLOW	0x4
145 int	rwindow_debug = RW_ERR;
146 #define TDB_ADDFLT	0x1
147 #define TDB_TXTFLT	0x2
148 #define TDB_TRAP	0x4
149 #define TDB_SYSCALL	0x8
150 #define TDB_FOLLOW	0x10
151 #define TDB_FRAME	0x20
152 #define TDB_NSAVED	0x40
153 #define TDB_TL		0x80
154 #define TDB_STOPSIG	0x100
155 #define TDB_STOPCALL	0x200
156 #define TDB_STOPCPIO	0x400
157 #define TDB_SYSTOP	0x800
158 int	trapdebug = 0/*|TDB_SYSCALL|TDB_STOPSIG|TDB_STOPCPIO|TDB_ADDFLT|TDB_FOLLOW*/;
159 /* #define __inline */
160 #endif
161 
162 #ifdef DDB
163 #if 1
164 #define DEBUGGER(t,f)	do { kdb_trap(t,f); } while (0)
165 #else
166 #define DEBUGGER(t,f)	Debugger()
167 #endif
168 #else
169 #define DEBUGGER(t,f)
170 #define Debugger()
171 #endif
172 
173 /*
174  * Initial FPU state is all registers == all 1s, everything else == all 0s.
175  * This makes every floating point register a signalling NaN, with sign bit
176  * set, no matter how it is interpreted.  Appendix N of the Sparc V8 document
177  * seems to imply that we should do this, and it does make sense.
178  */
179 __asm(".align 64");
180 struct	fpstate64 initfpstate = {
181 	{ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
182 	  ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
183 	  ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
184 	  ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }
185 };
186 
187 /*
188  * There are more than 100 trap types, but most are unused.
189  *
190  * Trap type 0 is taken over as an `Asynchronous System Trap'.
191  * This is left-over Vax emulation crap that should be fixed.
192  *
193  * Traps not supported on the spitfire are marked with `*',
194  * and additions are marked with `+'
195  */
196 static const char T[] = "*trap";
197 const char *trap_type[] = {
198 	/* non-user vectors */
199 	"ast",			/* 0 */
200 	"power on reset",	/* 1 */
201 	"watchdog reset",	/* 2 */
202 	"externally initiated reset",/*3 */
203 	"software initiated reset",/* 4 */
204 	"RED state exception",	/* 5 */
205 	T, T,			/* 6..7 */
206 	"instruction access exception",	/* 8 */
207 	"*instruction MMU miss",/* 9 */
208 	"instruction access error",/* 0a */
209 	T, T, T, T, T,		/* 0b..0f */
210 	"illegal instruction",	/* 10 */
211 	"privileged opcode",	/* 11 */
212 	"*unimplemented LDD",	/* 12 */
213 	"*unimplemented STD",	/* 13 */
214 	T, T, T, T,		/* 14..17 */
215 	T, T, T, T, T, T, T, T, /* 18..1f */
216 	"fp disabled",		/* 20 */
217 	"fp exception ieee 754",/* 21 */
218 	"fp exception other",	/* 22 */
219 	"tag overflow",		/* 23 */
220 	"clean window",		/* 24 */
221 	T, T, T,		/* 25..27 -- trap continues */
222 	"division by zero",	/* 28 */
223 	"*internal processor error",/* 29 */
224 	T, T, T, T, T, T,	/* 2a..2f */
225 	"data access exception",/* 30 */
226 	"*data access MMU miss",/* 31 */
227 	"data access error",	/* 32 */
228 	"*data access protection",/* 33 */
229 	"mem address not aligned",	/* 34 */
230 	"LDDF mem address not aligned",/* 35 */
231 	"STDF mem address not aligned",/* 36 */
232 	"privileged action",	/* 37 */
233 	"LDQF mem address not aligned",/* 38 */
234 	"STQF mem address not aligned",/* 39 */
235 	T, T, T, T, T, T,	/* 3a..3f */
236 	"*async data error",	/* 40 */
237 	"level 1 int",		/* 41 */
238 	"level 2 int",		/* 42 */
239 	"level 3 int",		/* 43 */
240 	"level 4 int",		/* 44 */
241 	"level 5 int",		/* 45 */
242 	"level 6 int",		/* 46 */
243 	"level 7 int",		/* 47 */
244 	"level 8 int",		/* 48 */
245 	"level 9 int",		/* 49 */
246 	"level 10 int",		/* 4a */
247 	"level 11 int",		/* 4b */
248 	"level 12 int",		/* 4c */
249 	"level 13 int",		/* 4d */
250 	"level 14 int",		/* 4e */
251 	"level 15 int",		/* 4f */
252 	T, T, T, T, T, T, T, T, /* 50..57 */
253 	T, T, T, T, T, T, T, T, /* 58..5f */
254 	"+interrupt vector",	/* 60 */
255 	"+PA_watchpoint",	/* 61 */
256 	"+VA_watchpoint",	/* 62 */
257 	"+corrected ECC error",	/* 63 */
258 	"+fast instruction access MMU miss",/* 64 */
259 	T, T, T,		/* 65..67 -- trap continues */
260 	"+fast data access MMU miss",/* 68 */
261 	T, T, T,		/* 69..6b -- trap continues */
262 	"+fast data access protection",/* 6c */
263 	T, T, T,		/* 6d..6f -- trap continues */
264 	T, T, T, T, T, T, T, T, /* 70..77 */
265 	T, T, T, T, T, T, T, T, /* 78..7f */
266 	"spill 0 normal",	/* 80 */
267 	T, T, T,		/* 81..83 -- trap continues */
268 	"spill 1 normal",	/* 84 */
269 	T, T, T,		/* 85..87 -- trap continues */
270 	"spill 2 normal",	/* 88 */
271 	T, T, T,		/* 89..8b -- trap continues */
272 	"spill 3 normal",	/* 8c */
273 	T, T, T,		/* 8d..8f -- trap continues */
274 	"spill 4 normal",	/* 90 */
275 	T, T, T,		/* 91..93 -- trap continues */
276 	"spill 5 normal",	/* 94 */
277 	T, T, T,		/* 95..97 -- trap continues */
278 	"spill 6 normal",	/* 98 */
279 	T, T, T,		/* 99..9b -- trap continues */
280 	"spill 7 normal",	/* 9c */
281 	T, T, T,		/* 9c..9f -- trap continues */
282 	"spill 0 other",	/* a0 */
283 	T, T, T,		/* a1..a3 -- trap continues */
284 	"spill 1 other",	/* a4 */
285 	T, T, T,		/* a5..a7 -- trap continues */
286 	"spill 2 other",	/* a8 */
287 	T, T, T,		/* a9..ab -- trap continues */
288 	"spill 3 other",	/* ac */
289 	T, T, T,		/* ad..af -- trap continues */
290 	"spill 4 other",	/* b0 */
291 	T, T, T,		/* b1..b3 -- trap continues */
292 	"spill 5 other",	/* b4 */
293 	T, T, T,		/* b5..b7 -- trap continues */
294 	"spill 6 other",	/* b8 */
295 	T, T, T,		/* b9..bb -- trap continues */
296 	"spill 7 other",	/* bc */
297 	T, T, T,		/* bc..bf -- trap continues */
298 	"fill 0 normal",	/* c0 */
299 	T, T, T,		/* c1..c3 -- trap continues */
300 	"fill 1 normal",	/* c4 */
301 	T, T, T,		/* c5..c7 -- trap continues */
302 	"fill 2 normal",	/* c8 */
303 	T, T, T,		/* c9..cb -- trap continues */
304 	"fill 3 normal",	/* cc */
305 	T, T, T,		/* cd..cf -- trap continues */
306 	"fill 4 normal",	/* d0 */
307 	T, T, T,		/* d1..d3 -- trap continues */
308 	"fill 5 normal",	/* d4 */
309 	T, T, T,		/* d5..d7 -- trap continues */
310 	"fill 6 normal",	/* d8 */
311 	T, T, T,		/* d9..db -- trap continues */
312 	"fill 7 normal",	/* dc */
313 	T, T, T,		/* dc..df -- trap continues */
314 	"fill 0 other",		/* e0 */
315 	T, T, T,		/* e1..e3 -- trap continues */
316 	"fill 1 other",		/* e4 */
317 	T, T, T,		/* e5..e7 -- trap continues */
318 	"fill 2 other",		/* e8 */
319 	T, T, T,		/* e9..eb -- trap continues */
320 	"fill 3 other",		/* ec */
321 	T, T, T,		/* ed..ef -- trap continues */
322 	"fill 4 other",		/* f0 */
323 	T, T, T,		/* f1..f3 -- trap continues */
324 	"fill 5 other",		/* f4 */
325 	T, T, T,		/* f5..f7 -- trap continues */
326 	"fill 6 other",		/* f8 */
327 	T, T, T,		/* f9..fb -- trap continues */
328 	"fill 7 other",		/* fc */
329 	T, T, T,		/* fc..ff -- trap continues */
330 
331 	/* user (software trap) vectors */
332 	"syscall",		/* 100 */
333 	"breakpoint",		/* 101 */
334 	"zero divide",		/* 102 */
335 	"flush windows",	/* 103 */
336 	"clean windows",	/* 104 */
337 	"range check",		/* 105 */
338 	"fix align",		/* 106 */
339 	"integer overflow",	/* 107 */
340 	"svr4 syscall",		/* 108 */
341 	"4.4 syscall",		/* 109 */
342 	"kgdb exec",		/* 10a */
343 	T, T, T, T, T,		/* 10b..10f */
344 	T, T, T, T, T, T, T, T,	/* 11a..117 */
345 	T, T, T, T, T, T, T, T,	/* 118..11f */
346 	"svr4 getcc",		/* 120 */
347 	"svr4 setcc",		/* 121 */
348 	"svr4 getpsr",		/* 122 */
349 	"svr4 setpsr",		/* 123 */
350 	"svr4 gethrtime",	/* 124 */
351 	"svr4 gethrvtime",	/* 125 */
352 	T,			/* 126 */
353 	"svr4 gethrestime",	/* 127 */
354 	T, T, T, T, T, T, T, T, /* 128..12f */
355 	T, T,			/* 130..131 */
356 	"get condition codes",	/* 132 */
357 	"set condision codes",	/* 133 */
358 	T, T, T, T,		/* 134..137 */
359 	T, T, T, T, T, T, T, T, /* 138..13f */
360 	T, T, T, T, T, T, T, T, /* 140..147 */
361 	T, T, T, T, T, T, T, T, /* 148..14f */
362 	T, T, T, T, T, T, T, T, /* 150..157 */
363 	T, T, T, T, T, T, T, T, /* 158..15f */
364 	T, T, T, T,		/* 160..163 */
365 	"SVID syscall64",	/* 164 */
366 	"SPARC Intl syscall64",	/* 165 */
367 	"OS vedor spec syscall",/* 166 */
368 	"HW OEM syscall",	/* 167 */
369 	"ret from deferred trap",	/* 168 */
370 };
371 
372 #define	N_TRAP_TYPES	(sizeof trap_type / sizeof *trap_type)
373 
374 static __inline void share_fpu __P((struct proc *, struct trapframe64 *));
375 static __inline void userret __P((struct proc *, int,  u_quad_t));
376 
377 void trap __P((struct trapframe64 *tf, unsigned type, vaddr_t pc, long tstate));
378 void data_access_fault __P((struct trapframe64 *tf, unsigned type, vaddr_t pc,
379 	vaddr_t va, vaddr_t sfva, u_long sfsr));
380 void data_access_error __P((struct trapframe64 *tf, unsigned type,
381 	vaddr_t afva, u_long afsr, vaddr_t sfva, u_long sfsr));
382 void text_access_fault __P((struct trapframe64 *tf, unsigned type,
383 	vaddr_t pc, u_long sfsr));
384 void text_access_error __P((struct trapframe64 *tf, unsigned type,
385 	vaddr_t pc, u_long sfsr, vaddr_t afva, u_long afsr));
386 void syscall __P((struct trapframe64 *, register_t code, register_t pc));
387 
388 #ifdef DEBUG
389 void print_trapframe __P((struct trapframe64 *));
390 void
391 print_trapframe(tf)
392 	struct trapframe64 *tf;
393 {
394 
395 	printf("Trapframe %p:\ttstate: %lx\tpc: %lx\tnpc: %lx\n",
396 	       tf, (u_long)tf->tf_tstate, (u_long)tf->tf_pc, (u_long)tf->tf_npc);
397 	printf("fault: %p\ty: %x\t",
398 	       (void *)(u_long)tf->tf_fault, (int)tf->tf_y);
399 	printf("pil: %d\toldpil: %d\ttt: %x\tGlobals:\n",
400 	       (int)tf->tf_pil, (int)tf->tf_oldpil, (int)tf->tf_tt);
401 	printf("%08x%08x %08x%08x %08x%08x %08x%08x\n",
402 	       (u_int)(tf->tf_global[0]>>32), (u_int)tf->tf_global[0],
403 	       (u_int)(tf->tf_global[1]>>32), (u_int)tf->tf_global[1],
404 	       (u_int)(tf->tf_global[2]>>32), (u_int)tf->tf_global[2],
405 	       (u_int)(tf->tf_global[3]>>32), (u_int)tf->tf_global[3]);
406 	printf("%08x%08x %08x%08x %08x%08x %08x%08x\nouts:\n",
407 	       (u_int)(tf->tf_global[4]>>32), (u_int)tf->tf_global[4],
408 	       (u_int)(tf->tf_global[5]>>32), (u_int)tf->tf_global[5],
409 	       (u_int)(tf->tf_global[6]>>32), (u_int)tf->tf_global[6],
410 	       (u_int)(tf->tf_global[7]>>32), (u_int)tf->tf_global[7]);
411 #ifdef DEBUG
412 	printf("%08x%08x %08x%08x %08x%08x %08x%08x\n",
413 	       (u_int)(tf->tf_out[0]>>32), (u_int)tf->tf_out[0],
414 	       (u_int)(tf->tf_out[1]>>32), (u_int)tf->tf_out[1],
415 	       (u_int)(tf->tf_out[2]>>32), (u_int)tf->tf_out[2],
416 	       (u_int)(tf->tf_out[3]>>32), (u_int)tf->tf_out[3]);
417 	printf("%08x%08x %08x%08x %08x%08x %08x%08x\n",
418 	       (u_int)(tf->tf_out[4]>>32), (u_int)tf->tf_out[4],
419 	       (u_int)(tf->tf_out[5]>>32), (u_int)tf->tf_out[5],
420 	       (u_int)(tf->tf_out[6]>>32), (u_int)tf->tf_out[6],
421 	       (u_int)(tf->tf_out[7]>>32), (u_int)tf->tf_out[7]);
422 #endif
423 
424 }
425 #endif
426 
427 /*
428  * Define the code needed before returning to user mode, for
429  * trap, mem_access_fault, and syscall.
430  */
431 static __inline void
432 userret(p, pc, oticks)
433 	struct proc *p;
434 	int pc;
435 	u_quad_t oticks;
436 {
437 	int sig;
438 
439 	/* take pending signals */
440 	while ((sig = CURSIG(p)) != 0)
441 		postsig(sig);
442 	p->p_priority = p->p_usrpri;
443 	if (want_ast) {
444 		want_ast = 0;
445 		if (p->p_flag & P_OWEUPC) {
446 			p->p_flag &= ~P_OWEUPC;
447 			ADDUPROF(p);
448 		}
449 	}
450 	if (want_resched) {
451 		/*
452 		 * We are being preempted.
453 		 */
454 		preempt(NULL);
455 		while ((sig = CURSIG(p)) != 0)
456 			postsig(sig);
457 	}
458 
459 	/*
460 	 * If profiling, charge recent system time to the trapped pc.
461 	 */
462 	if (p->p_flag & P_PROFIL)
463 		addupc_task(p, pc, (int)(p->p_sticks - oticks));
464 
465 	curcpu()->ci_schedstate.spc_curpriority = p->p_priority;
466 }
467 
468 /*
469  * If someone stole the FPU while we were away, do not enable it
470  * on return.  This is not done in userret() above as it must follow
471  * the ktrsysret() in syscall().  Actually, it is likely that the
472  * ktrsysret should occur before the call to userret.
473  *
474  * Oh, and don't touch the FPU bit if we're returning to the kernel.
475  */
476 static __inline void
477 share_fpu(p, tf)
478 	struct proc *p;
479 	struct trapframe64 *tf;
480 {
481 	if (!(tf->tf_tstate & (PSTATE_PRIV << TSTATE_PSTATE_SHIFT)) &&
482 	    fpproc != p)
483 		tf->tf_tstate &= ~(PSTATE_PEF << TSTATE_PSTATE_SHIFT);
484 }
485 
486 /*
487  * Called from locore.s trap handling, for non-MMU-related traps.
488  * (MMU-related traps go through mem_access_fault, below.)
489  */
490 void
491 trap(tf, type, pc, tstate)
492 	struct trapframe64 *tf;
493 	unsigned int type;
494 	vaddr_t pc;
495 	long tstate;
496 {
497 	struct proc *p;
498 	struct pcb *pcb;
499 	int64_t n;
500 	u_quad_t sticks;
501 	int pstate = tstate >> TSTATE_PSTATE_SHIFT;
502 	int error;
503 
504 	/* This steps the PC over the trap. */
505 #define	ADVANCE (n = tf->tf_npc, tf->tf_pc = n, tf->tf_npc = n + 4)
506 
507 #ifdef DEBUG
508 	if (tf->tf_pc == tf->tf_npc) {
509 		printf("trap: tpc %p == tnpc %p\n",
510 		    (void *)(u_long)tf->tf_pc, (void *)(u_long)tf->tf_npc);
511 		Debugger();
512 	}
513 	if ((trapdebug & TDB_NSAVED && cpcb->pcb_nsaved) ||
514 	    trapdebug & (TDB_FOLLOW | TDB_TRAP)) {
515 		char sbuf[sizeof(PSTATE_BITS) + 64];
516 
517 		printf("trap: type 0x%x: pc=%lx &tf=%p\n",
518 		       type, pc, tf);
519 		bitmask_snprintf(pstate, PSTATE_BITS, sbuf, sizeof(sbuf));
520 		printf(" npc=%lx pstate=%s %s\n",
521 		       (long)tf->tf_npc, sbuf,
522 		       type < N_TRAP_TYPES ? trap_type[type] :
523 		       ((type == T_AST) ? "ast" :
524 			((type == T_RWRET) ? "rwret" : T)));
525 	}
526 	if ((trapdebug & (TDB_FOLLOW | TDB_TRAP)) ||
527 	    ((trapdebug & TDB_TL) && tl())) {
528 		char sbuf[sizeof(PSTATE_BITS) + 64];
529 
530 		extern int trap_trace_dis;
531 		trap_trace_dis = 1;
532 		printf("trap: type 0x%x: lvl=%d pc=%lx &tf=%p",
533 		       type, (int)tl(), pc, tf);
534 		bitmask_snprintf(pstate, PSTATE_BITS, sbuf, sizeof(sbuf));
535 		printf(" npc=%lx pstate=%s %s\n",
536 		       (long)tf->tf_npc, sbuf,
537 		       type < N_TRAP_TYPES ? trap_type[type] :
538 		       ((type == T_AST) ? "ast" :
539 			((type == T_RWRET) ? "rwret" : T)));
540 #ifdef DDB
541 		kdb_trap(type, tf);
542 #endif
543 	}
544 #endif
545 
546 	uvmexp.traps++;
547 
548 	/*
549 	 * Generally, kernel traps cause a panic.  Any exceptions are
550 	 * handled early here.
551 	 */
552 	if (pstate & PSTATE_PRIV) {
553 #ifdef DDB
554 		if (type == T_BREAKPOINT) {
555 			write_all_windows();
556 			if (kdb_trap(type, tf)) {
557 				/* ADVANCE; */
558 				return;
559 			}
560 		}
561 		if (type == T_PA_WATCHPT || type == T_VA_WATCHPT) {
562 			if (kdb_trap(type, tf)) {
563 				/* DDB must turn off watchpoints or something */
564 				return;
565 			}
566 		}
567 #endif
568 		/*
569 		 * The kernel needs to use FPU registers for block
570 		 * load/store.  If we trap in priviliged code, save
571 		 * the FPU state if there is any and enable the FPU.
572 		 *
573 		 * We rely on the kernel code properly enabling the FPU
574 		 * in %fprs, otherwise we'll hang here trying to enable
575 		 * the FPU.
576 		 */
577 		if (type == T_FPDISABLED) {
578 extern void db_printf(const char * , ...);
579 #ifndef NEW_FPSTATE
580 			if (fpproc != NULL) {	/* someone else had it */
581 				savefpstate(fpproc->p_md.md_fpstate);
582 				fpproc = NULL;
583 				/* Enable the FPU */
584 /*				loadfpstate(initfpstate);*/
585 			}
586 			tf->tf_tstate |= (PSTATE_PEF << TSTATE_PSTATE_SHIFT);
587 			return;
588 #else
589 			struct proc *newfpproc;
590 
591 			/* New scheme */
592 			if (CLKF_INTR((struct clockframe *)tf) || !curproc) {
593 				newfpproc = &proc0;
594 			} else {
595 				newfpproc = curproc;
596 			}
597 			if (fpproc != newfpproc) {
598 				if (fpproc != NULL) {
599 					/* someone else had it, maybe? */
600 					savefpstate(fpproc->p_md.md_fpstate);
601 					fpproc = NULL;
602 				}
603 				/* If we have an allocated fpstate, load it */
604 				if (newfpproc->p_md.md_fpstate != 0) {
605 					fpproc = newfpproc;
606 					loadfpstate(fpproc->p_md.md_fpstate);
607 				} else
608 					fpproc = NULL;
609 			}
610 			/* Enable the FPU */
611 			tf->tf_tstate |= (PSTATE_PEF << TSTATE_PSTATE_SHIFT);
612 			return;
613 #endif
614 		}
615 		goto dopanic;
616 	}
617 	if ((p = curproc) == NULL)
618 		p = &proc0;
619 	sticks = p->p_sticks;
620 	pcb = &p->p_addr->u_pcb;
621 	p->p_md.md_tf = tf;	/* for ptrace/signals */
622 
623 	switch (type) {
624 
625 	default:
626 		if (type < 0x100) {
627 			extern int trap_trace_dis;
628 dopanic:
629 			trap_trace_dis = 1;
630 
631 			{
632 				char sbuf[sizeof(PSTATE_BITS) + 64];
633 
634 				printf("trap type 0x%x: pc=%lx",
635 				       type, pc);
636 				bitmask_snprintf(pstate, PSTATE_BITS, sbuf,
637 						 sizeof(sbuf));
638 				printf(" npc=%lx pstate=%s\n",
639 				       (long)tf->tf_npc, sbuf);
640 				DEBUGGER(type, tf);
641 				panic(type < N_TRAP_TYPES ? trap_type[type] : T);
642 			}
643 			/* NOTREACHED */
644 		}
645 #if defined(COMPAT_SVR4) || defined(COMPAT_SVR4_32)
646 badtrap:
647 #endif
648 		/* the following message is gratuitous */
649 		/* ... but leave it in until we find anything */
650 		printf("%s[%d]: unimplemented software trap 0x%x\n",
651 		    p->p_comm, p->p_pid, type);
652 		trapsignal(p, SIGILL, type);
653 		break;
654 
655 #if defined(COMPAT_SVR4) || defined(COMPAT_SVR4_32)
656 	case T_SVR4_GETCC:
657 	case T_SVR4_SETCC:
658 	case T_SVR4_GETPSR:
659 	case T_SVR4_SETPSR:
660 	case T_SVR4_GETHRTIME:
661 	case T_SVR4_GETHRVTIME:
662 	case T_SVR4_GETHRESTIME:
663 #if defined(COMPAT_SVR4_32)
664 		if (svr4_32_trap(type, p))
665 			break;
666 #endif
667 #if defined(COMPAT_SVR4)
668 		if (svr4_trap(type, p))
669 			break;
670 #endif
671 		goto badtrap;
672 #endif
673 
674 	case T_AST:
675 		break;	/* the work is all in userret() */
676 
677 	case T_ILLINST:
678 	case T_INST_EXCEPT:
679 	case T_TEXTFAULT:
680 		/* This is not an MMU issue!!!! */
681 		printf("trap: textfault at %lx!! sending SIGILL due to trap %d: %s\n",
682 		       pc, type, type < N_TRAP_TYPES ? trap_type[type] : T);
683 #if defined(DDB) && defined(DEBUG)
684 		if (trapdebug & TDB_STOPSIG)
685 			Debugger();
686 #endif
687 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
688 		break;
689 
690 	case T_PRIVINST:
691 		printf("trap: privinst!! sending SIGILL due to trap %d: %s\n",
692 		       type, type < N_TRAP_TYPES ? trap_type[type] : T);
693 #if defined(DDB) && defined(DEBUG)
694 		if (trapdebug & TDB_STOPSIG)
695 			Debugger();
696 #endif
697 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
698 		break;
699 
700 	case T_PRIVACT:
701 		trapsignal(p, SIGILL, 0);
702 		break;
703 
704 	case T_FPDISABLED: {
705 		struct fpstate64 *fs = p->p_md.md_fpstate;
706 
707 		if (fs == NULL) {
708 			/* NOTE: fpstate must be 64-bit aligned */
709 			fs = malloc((sizeof *fs), M_SUBPROC, M_WAITOK);
710 			*fs = initfpstate;
711 			fs->fs_qsize = 0;
712 			p->p_md.md_fpstate = fs;
713 		}
714 		/*
715 		 * We may have more FPEs stored up and/or ops queued.
716 		 * If they exist, handle them and get out.  Otherwise,
717 		 * resolve the FPU state, turn it on, and try again.
718 		 *
719 		 * Ultras should never have a FPU queue.
720 		 */
721 		if (fs->fs_qsize) {
722 			printf("trap: Warning fs_qsize is %d\n",fs->fs_qsize);
723 			fpu_cleanup(p, fs);
724 			break;
725 		}
726 		if (fpproc != p) {		/* we do not have it */
727 			if (fpproc != NULL)	/* someone else had it */
728 				savefpstate(fpproc->p_md.md_fpstate);
729 			loadfpstate(fs);
730 			fpproc = p;		/* now we do have it */
731 		}
732 		tf->tf_tstate |= (PSTATE_PEF << TSTATE_PSTATE_SHIFT);
733 		break;
734 	}
735 
736 	case T_ALIGN:
737 	case T_LDDF_ALIGN:
738 	case T_STDF_ALIGN:
739 	{
740 		int64_t dsfsr, dsfar=0, isfsr;
741 
742 		dsfsr = ldxa(SFSR, ASI_DMMU);
743 		if (dsfsr & SFSR_FV)
744 			dsfar = ldxa(SFAR, ASI_DMMU);
745 		isfsr = ldxa(SFSR, ASI_IMMU);
746 		/*
747 		 * If we're busy doing copyin/copyout continue
748 		 */
749 		if (p->p_addr && p->p_addr->u_pcb.pcb_onfault) {
750 			tf->tf_pc = (vaddr_t)p->p_addr->u_pcb.pcb_onfault;
751 			tf->tf_npc = tf->tf_pc + 4;
752 			break;
753 		}
754 
755 #ifdef DEBUG
756 #define fmt64(x)	(u_int)((x)>>32), (u_int)((x))
757 		printf("Alignment error: pid=%d comm=%s dsfsr=%08x:%08x "
758 		       "dsfar=%x:%x isfsr=%08x:%08x pc=%lx\n",
759 		       p->p_pid, p->p_comm, fmt64(dsfsr), fmt64(dsfar),
760 		       fmt64(isfsr), pc);
761 #endif
762 	}
763 
764 #if defined(DDB) && defined(DEBUG)
765 	if (trapdebug & TDB_STOPSIG) {
766 		write_all_windows();
767 		kdb_trap(type, tf);
768 	}
769 #endif
770 		if ((p->p_md.md_flags & MDP_FIXALIGN) != 0 &&
771 		    fixalign(p, tf) == 0) {
772 			ADVANCE;
773 			break;
774 		}
775 		trapsignal(p, SIGBUS, 0);	/* XXX code?? */
776 		break;
777 
778 	case T_FP_IEEE_754:
779 	case T_FP_OTHER:
780 		/*
781 		 * Clean up after a floating point exception.
782 		 * fpu_cleanup can (and usually does) modify the
783 		 * state we save here, so we must `give up' the FPU
784 		 * chip context.  (The software and hardware states
785 		 * will not match once fpu_cleanup does its job, so
786 		 * we must not save again later.)
787 		 */
788 		if (p != fpproc)
789 			panic("fpe without being the FP user");
790 		savefpstate(p->p_md.md_fpstate);
791 		fpproc = NULL;
792 		/* tf->tf_psr &= ~PSR_EF; */	/* share_fpu will do this */
793 		if (p->p_md.md_fpstate->fs_qsize == 0) {
794 			error = copyin((caddr_t)pc,
795 			    &p->p_md.md_fpstate->fs_queue[0].fq_instr,
796 			    sizeof(int));
797 			if (error) {
798 				trapsignal(p, SIGBUS, 0);	/* XXX code */
799 				break;
800 			}
801 			p->p_md.md_fpstate->fs_qsize = 1;
802 			fpu_cleanup(p, p->p_md.md_fpstate);
803 			ADVANCE;
804 		} else
805 			fpu_cleanup(p, p->p_md.md_fpstate);
806 		/* fpu_cleanup posts signals if needed */
807 #if 0		/* ??? really never??? */
808 		ADVANCE;
809 #endif
810 		break;
811 
812 	case T_TAGOF:
813 		trapsignal(p, SIGEMT, 0);	/* XXX code?? */
814 		break;
815 
816 	case T_BREAKPOINT:
817 		trapsignal(p, SIGTRAP, 0);
818 		break;
819 
820 	case T_DIV0:
821 		ADVANCE;
822 		trapsignal(p, SIGFPE, FPE_INTDIV_TRAP);
823 		break;
824 
825 	case T_CLEANWIN:
826 		uprintf("T_CLEANWIN\n");	/* XXX Should not get this */
827 		ADVANCE;
828 		break;
829 
830 	case T_FLUSHWIN:
831 		/* Software window flush for v8 software */
832 		write_all_windows();
833 		ADVANCE;
834 		break;
835 
836 	case T_RANGECHECK:
837 		printf("T_RANGECHECK\n");	/* XXX */
838 		ADVANCE;
839 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
840 		break;
841 
842 	case T_FIXALIGN:
843 #ifdef DEBUG_ALIGN
844 		uprintf("T_FIXALIGN\n");
845 #endif
846 		/* User wants us to fix alignment faults */
847 		p->p_md.md_flags |= MDP_FIXALIGN;
848 		ADVANCE;
849 		break;
850 
851 	case T_INTOF:
852 		uprintf("T_INTOF\n");		/* XXX */
853 		ADVANCE;
854 		trapsignal(p, SIGFPE, FPE_INTOVF_TRAP);
855 		break;
856 	}
857 	userret(p, pc, sticks);
858 	share_fpu(p, tf);
859 #undef ADVANCE
860 #ifdef DEBUG
861 	if (trapdebug & (TDB_FOLLOW | TDB_TRAP)) {
862 		printf("trap: done\n");
863 		/* if (type != T_BREAKPOINT) Debugger(); */
864 	}
865 #if 0
866 	if (trapdebug & TDB_FRAME) {
867 		print_trapframe(tf);
868 	}
869 #endif
870 #endif
871 }
872 
873 /*
874  * Save windows from PCB into user stack, and return 0.  This is used on
875  * window overflow pseudo-traps (from locore.s, just before returning to
876  * user mode) and when ptrace or sendsig needs a consistent state.
877  * As a side effect, rwindow_save() always sets pcb_nsaved to 0.
878  *
879  * If the windows cannot be saved, pcb_nsaved is restored and we return -1.
880  *
881  * XXXXXX This cannot work properly.  I need to re-examine this register
882  * window thing entirely.
883  */
884 int
885 rwindow_save(p)
886 	struct proc *p;
887 {
888 	struct pcb *pcb = &p->p_addr->u_pcb;
889 	struct rwindow64 *rw = &pcb->pcb_rw[0];
890 	u_int64_t rwdest;
891 	int i, j;
892 
893 	i = pcb->pcb_nsaved;
894 #ifdef DEBUG
895 	if (rwindow_debug & RW_FOLLOW)
896 		printf("rwindow_save(%p): nsaved %d\n", p, i);
897 #endif
898 	if (i == 0)
899 		return (0);
900 #ifdef DEBUG
901 	if (rwindow_debug & RW_FOLLOW)
902 		printf("%s[%d]: rwindow: pcb->stack:", p->p_comm, p->p_pid);
903 #endif
904 	 while (i > 0) {
905 		rwdest = rw[i--].rw_in[6];
906 #ifdef DEBUG
907 		if (rwindow_debug & RW_FOLLOW)
908 			printf("window %d at %lx\n", i, (long)rwdest);
909 #endif
910 		if (rwdest & 1) {
911 #ifdef DEBUG
912 			if (rwindow_debug & RW_64) {
913 				printf("rwindow_save: 64-bit tf to %p+BIAS "
914 				       "or %p\n",
915 				       (void *)(long)rwdest,
916 				       (void *)(long)(rwdest+BIAS));
917 				Debugger();
918 			}
919 #endif
920 			rwdest += BIAS;
921 			if (copyout((caddr_t)&rw[i], (caddr_t)(u_long)rwdest,
922 				    sizeof(*rw))) {
923 #ifdef DEBUG
924 			if (rwindow_debug & (RW_ERR | RW_64))
925 				printf("rwindow_save: 64-bit pcb copyout "
926 				       "to %p failed\n",
927 				       (void *)(long)rwdest);
928 #endif
929 				return (-1);
930 			}
931 #ifdef DEBUG
932 			if (rwindow_debug & RW_64) {
933 				printf("Finished copyout(%p, %p, %lx)\n",
934 					(caddr_t)&rw[i], (caddr_t)(long)rwdest,
935                                 	sizeof(*rw));
936 				Debugger();
937 			}
938 #endif
939 		} else {
940 			struct rwindow32 rwstack;
941 
942 			/* 32-bit window */
943 			for (j = 0; j < 8; j++) {
944 				rwstack.rw_local[j] = (int)rw[i].rw_local[j];
945 				rwstack.rw_in[j] = (int)rw[i].rw_in[j];
946 			}
947 			/* Must truncate rwdest */
948 			if (copyout(&rwstack, (caddr_t)(u_long)(u_int)rwdest,
949 				    sizeof(rwstack))) {
950 #ifdef DEBUG
951 				if (rwindow_debug & RW_ERR)
952 					printf("rwindow_save: 32-bit pcb "
953 					       "copyout to %p (%p) failed\n",
954 					       (void *)(u_long)(u_int)rwdest,
955 					       (void *)(u_long)rwdest);
956 #endif
957 				return (-1);
958 			}
959 		}
960 	}
961 	pcb->pcb_nsaved = 0;
962 #ifdef DEBUG
963 	if (rwindow_debug & RW_FOLLOW) {
964 		printf("\n");
965 		Debugger();
966 	}
967 #endif
968 	return (0);
969 }
970 
971 /*
972  * Kill user windows (before exec) by writing back to stack or pcb
973  * and then erasing any pcb tracks.  Otherwise we might try to write
974  * the registers into the new process after the exec.
975  */
976 void
977 kill_user_windows(p)
978 	struct proc *p;
979 {
980 
981 	write_user_windows();
982 	p->p_addr->u_pcb.pcb_nsaved = 0;
983 }
984 
985 /*
986  * This routine handles MMU generated faults.  About half
987  * of them could be recoverable through uvm_fault.
988  */
989 void
990 data_access_fault(tf, type, pc, addr, sfva, sfsr)
991 	struct trapframe64 *tf;
992 	unsigned type;
993 	vaddr_t pc;
994 	vaddr_t addr;
995 	vaddr_t sfva;
996 	u_long sfsr;
997 {
998 	u_int64_t tstate;
999 	struct proc *p;
1000 	struct vmspace *vm;
1001 	vaddr_t va;
1002 	int rv;
1003 	vm_prot_t access_type;
1004 	vaddr_t onfault;
1005 	u_quad_t sticks;
1006 #ifdef DEBUG
1007 	static int lastdouble;
1008 	extern struct pcb* cpcb;
1009 #endif
1010 
1011 #ifdef DEBUG
1012 	if (tf->tf_pc == tf->tf_npc) {
1013 		printf("data_access_fault: tpc %lx == tnpc %lx\n",
1014 		       (long)tf->tf_pc, (long)tf->tf_npc);
1015 		Debugger();
1016 	}
1017 	write_user_windows();
1018 	if ((cpcb->pcb_nsaved > 8) ||
1019 	    (trapdebug & TDB_NSAVED && cpcb->pcb_nsaved) ||
1020 	    (trapdebug & (TDB_ADDFLT | TDB_FOLLOW))) {
1021 		printf("%ld: data_access_fault(%p, %x, %p, %p, %lx, %lx) "
1022 			"nsaved=%d\n",
1023 			(long)(curproc?curproc->p_pid:-1), tf, type,
1024 			(void *)addr, (void *)pc,
1025 			sfva, sfsr, (int)cpcb->pcb_nsaved);
1026 		if ((trapdebug & TDB_NSAVED && cpcb->pcb_nsaved))
1027 			Debugger();
1028 	}
1029 	if (trapdebug & TDB_FRAME) {
1030 		print_trapframe(tf);
1031 	}
1032 	if ((trapdebug & TDB_TL) && tl()) {
1033 		printf("%ld: data_access_fault(%p, %x, %p, %p, %lx, %lx) "
1034 			"nsaved=%d\n",
1035 			(long)(curproc?curproc->p_pid:-1), tf, type,
1036 			(void*)addr, (void*)pc,
1037 			sfva, sfsr, (int)cpcb->pcb_nsaved);
1038 		Debugger();
1039 	}
1040 	if (trapdebug & TDB_STOPCALL) {
1041 		Debugger();
1042 	}
1043 #endif
1044 
1045 	uvmexp.traps++;
1046 	if ((p = curproc) == NULL)	/* safety check */
1047 		p = &proc0;
1048 	sticks = p->p_sticks;
1049 	tstate = tf->tf_tstate;
1050 
1051 	/* Find the faulting va to give to uvm_fault */
1052 	va = trunc_page(addr);
1053 
1054 #ifdef DEBUG
1055 	if (lastdouble) {
1056 		printf("stacked data fault @ %lx (pc %lx);", addr, pc);
1057 		lastdouble = 0;
1058 		if (curproc == NULL)
1059 			printf("NULL proc\n");
1060 		else
1061 			printf("pid %d(%s); sigmask %x, sigcatch %x\n",
1062 			       curproc->p_pid, curproc->p_comm,
1063 				/* XXX */
1064 			       curproc->p_sigctx.ps_sigmask.__bits[0],
1065 			       curproc->p_sigctx.ps_sigcatch.__bits[0]);
1066 	}
1067 #endif
1068 
1069 	/*
1070 	 * Now munch on protections.
1071 	 *
1072 	 * If it was a FAST_DATA_ACCESS_MMU_MISS we have no idea what the
1073 	 * access was since the SFSR is not set.  But we should never get
1074 	 * here from there.
1075 	 */
1076 	if (type == T_FDMMU_MISS || (sfsr & SFSR_FV) == 0) {
1077 		/* Punt */
1078 		access_type = VM_PROT_READ;
1079 	} else {
1080 		access_type = (sfsr & SFSR_W) ? VM_PROT_WRITE : VM_PROT_READ;
1081 	}
1082 	if (tstate & (PSTATE_PRIV<<TSTATE_PSTATE_SHIFT)) {
1083 		extern char Lfsbail[];
1084 
1085 		/*
1086 		 * If this was an access that we shouldn't try to page in,
1087 		 * resume at the fault handler without any action.
1088 		 */
1089 		if (p->p_addr && p->p_addr->u_pcb.pcb_onfault == Lfsbail)
1090 			goto kfault;
1091 
1092 		/*
1093 		 * During autoconfiguration, faults are never OK unless
1094 		 * pcb_onfault is set.  Once running normally we must allow
1095 		 * exec() to cause copy-on-write faults to kernel addresses.
1096 		 */
1097 		if (cold)
1098 			goto kfault;
1099 		if (!(addr & TLB_TAG_ACCESS_CTX)) {
1100 			/* CTXT == NUCLEUS */
1101 			rv = uvm_fault(kernel_map, va, 0, access_type);
1102 #ifdef DEBUG
1103 			if (trapdebug & (TDB_ADDFLT | TDB_FOLLOW))
1104 				printf("data_access_fault: kernel "
1105 					"uvm_fault(%p, %lx, %x, %x) "
1106 					"sez %x -- %s\n",
1107 					kernel_map, va, 0, access_type, rv,
1108 					rv ? "failure" : "success");
1109 #endif
1110 			if (rv == 0)
1111 				return;
1112 			goto kfault;
1113 		}
1114 	} else
1115 		p->p_md.md_tf = tf;
1116 
1117 	vm = p->p_vmspace;
1118 	/* alas! must call the horrible vm code */
1119 	onfault = (vaddr_t)p->p_addr->u_pcb.pcb_onfault;
1120 	p->p_addr->u_pcb.pcb_onfault = NULL;
1121 	rv = uvm_fault(&vm->vm_map, va, 0, access_type);
1122 	p->p_addr->u_pcb.pcb_onfault = (void *)onfault;
1123 
1124 #ifdef DEBUG
1125 	if (trapdebug & (TDB_ADDFLT | TDB_FOLLOW))
1126 		printf("data_access_fault: %s uvm_fault(%p, %lx, %x, %x) "
1127 			"sez %x -- %s\n",
1128 			&vm->vm_map == kernel_map ? "kernel!!!" : "user",
1129 			&vm->vm_map, va, 0, access_type, rv,
1130 			rv ? "failure" : "success");
1131 #endif
1132 
1133 	/*
1134 	 * If this was a stack access we keep track of the maximum
1135 	 * accessed stack size.  Also, if uvm_fault gets a protection
1136 	 * failure it is due to accessing the stack region outside
1137 	 * the current limit and we need to reflect that as an access
1138 	 * error.
1139 	 */
1140 	if ((caddr_t)va >= vm->vm_maxsaddr) {
1141 		if (rv == 0) {
1142 			segsz_t nss = btoc(p->p_vmspace->vm_minsaddr - va);
1143 			if (nss > vm->vm_ssize)
1144 				vm->vm_ssize = nss;
1145 		} else if (rv == EACCES)
1146 			rv = EFAULT;
1147 	}
1148 	if (rv != 0) {
1149 
1150 		/*
1151 		 * Pagein failed.  If doing copyin/out, return to onfault
1152 		 * address.  Any other page fault in kernel, die; if user
1153 		 * fault, deliver SIGSEGV.
1154 		 */
1155 		if (tstate & (PSTATE_PRIV << TSTATE_PSTATE_SHIFT)) {
1156 kfault:
1157 			onfault = p->p_addr ?
1158 			    (long)p->p_addr->u_pcb.pcb_onfault : 0;
1159 			if (!onfault) {
1160 				extern int trap_trace_dis;
1161 
1162 				/* Disable traptrace for printf */
1163 				trap_trace_dis = 1;
1164 				(void) splhigh();
1165 				printf("data fault: pc=%lx addr=%lx\n",
1166 				    pc, addr);
1167 				DEBUGGER(type, tf);
1168 				panic("kernel fault");
1169 				/* NOTREACHED */
1170 			}
1171 #ifdef DEBUG
1172 			if (trapdebug &
1173 			    (TDB_ADDFLT | TDB_FOLLOW | TDB_STOPCPIO)) {
1174 				printf("data_access_fault: copyin/out of %p "
1175 				       "fault -- recover\n", (void *)addr);
1176 				DEBUGGER(type, tf);
1177 			}
1178 #endif
1179 			tf->tf_pc = onfault;
1180 			tf->tf_npc = onfault + 4;
1181 			return;
1182 		}
1183 #ifdef DEBUG
1184 		if (trapdebug & (TDB_ADDFLT | TDB_STOPSIG)) {
1185 			extern int trap_trace_dis;
1186 			trap_trace_dis = 1;
1187 			printf("data_access_fault at addr %p: "
1188 			    "sending SIGSEGV\n", (void *)addr);
1189 			printf("%ld: data_access_fault(%p, %x, %p, %p, "
1190 			       "%lx, %lx) nsaved=%d\n",
1191 				(long)(curproc ? curproc->p_pid : -1), tf, type,
1192 				(void *)addr, (void *)pc,
1193 				sfva, sfsr, (int)cpcb->pcb_nsaved);
1194 			Debugger();
1195 		}
1196 #endif
1197 		if (rv == ENOMEM) {
1198 			printf("UVM: pid %d (%s), uid %d killed: out of swap\n",
1199 			       p->p_pid, p->p_comm,
1200 			       p->p_cred && p->p_ucred ?
1201 			       p->p_ucred->cr_uid : -1);
1202 			trapsignal(p, SIGKILL, (u_long)addr);
1203 		} else {
1204 			trapsignal(p, SIGSEGV, (u_long)addr);
1205 		}
1206 	}
1207 	if ((tstate & TSTATE_PRIV) == 0) {
1208 		userret(p, pc, sticks);
1209 		share_fpu(p, tf);
1210 	}
1211 #ifdef DEBUG
1212 	if (trapdebug & (TDB_ADDFLT | TDB_FOLLOW))
1213 		printf("data_access_fault: done\n");
1214 	if (trapdebug & TDB_FRAME) {
1215 		print_trapframe(tf);
1216 	}
1217 	if (trapdebug & (TDB_ADDFLT | TDB_FOLLOW)) {
1218 		extern void *return_from_trap __P((void));
1219 
1220 		if ((void *)(u_long)tf->tf_pc == (void *)return_from_trap) {
1221 			printf("Returning from stack datafault\n");
1222 		}
1223 	}
1224 #endif
1225 }
1226 
1227 /*
1228  * This routine handles deferred errors caused by the memory
1229  * or I/O bus subsystems.  Most of these are fatal, and even
1230  * if they are not, recovery is painful.  Also, the TPC and
1231  * TNPC values are probably not valid if we're not doing a
1232  * special PEEK/POKE code sequence.
1233  */
1234 void
1235 data_access_error(tf, type, afva, afsr, sfva, sfsr)
1236 	struct trapframe64 *tf;
1237 	unsigned type;
1238 	vaddr_t sfva;
1239 	u_long sfsr;
1240 	vaddr_t afva;
1241 	u_long afsr;
1242 {
1243 	u_long pc;
1244 	u_int64_t tstate;
1245 	struct proc *p;
1246 	vaddr_t onfault;
1247 	u_quad_t sticks;
1248 #ifdef DEBUG
1249 	static int lastdouble;
1250 #endif
1251 
1252 #ifdef DEBUG
1253 	if (tf->tf_pc == tf->tf_npc) {
1254 		printf("data_access_error: tpc %lx == tnpc %lx\n",
1255 		       (long)tf->tf_pc, (long)tf->tf_npc);
1256 		Debugger();
1257 	}
1258 	write_user_windows();
1259 	if ((trapdebug & TDB_NSAVED && cpcb->pcb_nsaved) ||
1260 	    trapdebug & (TDB_ADDFLT | TDB_FOLLOW)) {
1261 		char buf[768];
1262 
1263 		bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1264 		printf("%d data_access_error(%lx, %lx, %lx, %p)=%lx @ %p %s\n",
1265 		       curproc?curproc->p_pid:-1,
1266 		       (long)type, (long)sfva, (long)afva, tf,
1267 		       (long)tf->tf_tstate,
1268 		       (void *)(u_long)tf->tf_pc, buf);
1269 	}
1270 	if (trapdebug & TDB_FRAME) {
1271 		print_trapframe(tf);
1272 	}
1273 	if ((trapdebug & TDB_TL) && tl()) {
1274 		char buf[768];
1275 		bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1276 
1277 		printf("%d tl %ld data_access_error(%lx, %lx, %lx, %p)="
1278 		       "%lx @ %lx %s\n",
1279 		       curproc ? curproc->p_pid : -1, (long)tl(),
1280 		       (long)type, (long)sfva, (long)afva, tf,
1281 		       (long)tf->tf_tstate,
1282 		       (long)tf->tf_pc, buf);
1283 		Debugger();
1284 	}
1285 	if (trapdebug & TDB_STOPCALL) {
1286 		Debugger();
1287 	}
1288 #endif
1289 
1290 	uvmexp.traps++;
1291 	if ((p = curproc) == NULL)	/* safety check */
1292 		p = &proc0;
1293 	sticks = p->p_sticks;
1294 
1295 	pc = tf->tf_pc;
1296 	tstate = tf->tf_tstate;
1297 
1298 	onfault = p->p_addr ? (long)p->p_addr->u_pcb.pcb_onfault : 0;
1299 	printf("data error type %x sfsr=%lx sfva=%lx afsr=%lx afva=%lx tf=%p\n",
1300 		type, sfsr, sfva, afsr, afva, tf);
1301 
1302 	if (afsr == 0) {
1303 		printf("data_access_error: no fault\n");
1304 		goto out;	/* No fault. Why were we called? */
1305 	}
1306 
1307 #ifdef DEBUG
1308 	if (lastdouble) {
1309 		printf("stacked data error @ %lx (pc %lx); sfsr %lx",
1310 		       sfva, pc, sfsr);
1311 		lastdouble = 0;
1312 		if (curproc == NULL)
1313 			printf("NULL proc\n");
1314 		else
1315 			printf("pid %d(%s); sigmask %x, sigcatch %x\n",
1316 			       curproc->p_pid, curproc->p_comm,
1317 				/* XXX */
1318 			       curproc->p_sigctx.ps_sigmask.__bits[0],
1319 			       curproc->p_sigctx.ps_sigcatch.__bits[0]);
1320 	}
1321 #endif
1322 
1323 	if (tstate & (PSTATE_PRIV<<TSTATE_PSTATE_SHIFT)) {
1324 		if (!onfault) {
1325 			extern int trap_trace_dis;
1326 			char buf[768];
1327 
1328 			trap_trace_dis = 1; /* Disable traptrace for printf */
1329 			bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1330 			(void) splhigh();
1331 			printf("data fault: pc=%lx addr=%lx sfsr=%s\n",
1332 				(u_long)pc, (long)sfva, buf);
1333 			DEBUGGER(type, tf);
1334 			panic("kernel fault");
1335 			/* NOTREACHED */
1336 		}
1337 
1338 		/*
1339 		 * If this was a priviliged error but not a probe, we
1340 		 * cannot recover, so panic.
1341 		 */
1342 		if (afsr & ASFR_PRIV) {
1343 			char buf[128];
1344 
1345 			bitmask_snprintf(afsr, AFSR_BITS, buf, sizeof(buf));
1346 			panic("Privileged Async Fault: AFAR %p AFSR %lx\n%s",
1347 				(void *)afva, afsr, buf);
1348 			/* NOTREACHED */
1349 		}
1350 #ifdef DEBUG
1351 		if (trapdebug & (TDB_ADDFLT | TDB_FOLLOW | TDB_STOPCPIO)) {
1352 			printf("data_access_error: kern fault -- "
1353 			       "skipping instr\n");
1354 			if (trapdebug & TDB_STOPCPIO)
1355 				DEBUGGER(type, tf);
1356 		}
1357 #endif
1358 		tf->tf_pc = onfault;
1359 		tf->tf_npc = onfault + 4;
1360 		return;
1361 	}
1362 #ifdef DEBUG
1363 	if (trapdebug & (TDB_ADDFLT | TDB_STOPSIG)) {
1364 		extern int trap_trace_dis;
1365 
1366 		trap_trace_dis = 1;
1367 		printf("data_access_error at %p: sending SIGSEGV\n",
1368 			(void *)(u_long)afva);
1369 		Debugger();
1370 	}
1371 #endif
1372 	trapsignal(p, SIGSEGV, (u_long)sfva);
1373 out:
1374 	if ((tstate & TSTATE_PRIV) == 0) {
1375 		userret(p, pc, sticks);
1376 		share_fpu(p, tf);
1377 	}
1378 #ifdef DEBUG
1379 	if (trapdebug & (TDB_ADDFLT | TDB_FOLLOW))
1380 		printf("data_access_error: done\n");
1381 	if (trapdebug & TDB_FRAME) {
1382 		print_trapframe(tf);
1383 	}
1384 #endif
1385 }
1386 
1387 /*
1388  * This routine handles MMU generated faults.  About half
1389  * of them could be recoverable through uvm_fault.
1390  */
1391 void
1392 text_access_fault(tf, type, pc, sfsr)
1393 	unsigned type;
1394 	vaddr_t pc;
1395 	struct trapframe64 *tf;
1396 	u_long sfsr;
1397 {
1398 	u_int64_t tstate;
1399 	struct proc *p;
1400 	struct vmspace *vm;
1401 	vaddr_t va;
1402 	int rv;
1403 	vm_prot_t access_type;
1404 	u_quad_t sticks;
1405 
1406 #ifdef DEBUG
1407 	if (tf->tf_pc == tf->tf_npc) {
1408 		printf("text_access_fault: tpc %p == tnpc %p\n",
1409 		    (void *)(u_long)tf->tf_pc, (void *)(u_long)tf->tf_npc);
1410 		Debugger();
1411 	}
1412 	write_user_windows();
1413 	if (((trapdebug & TDB_NSAVED) && cpcb->pcb_nsaved) ||
1414 	    (trapdebug & (TDB_TXTFLT | TDB_FOLLOW)))
1415 		printf("%d text_access_fault(%x, %lx, %p)\n",
1416 		       curproc?curproc->p_pid:-1, type, pc, tf);
1417 	if (trapdebug & TDB_FRAME) {
1418 		print_trapframe(tf);
1419 	}
1420 	if ((trapdebug & TDB_TL) && tl()) {
1421 		printf("%d tl %d text_access_fault(%x, %lx, %p)\n",
1422 		       curproc?curproc->p_pid:-1, tl(), type, pc, tf);
1423 		Debugger();
1424 	}
1425 	if (trapdebug & TDB_STOPCALL) {
1426 		Debugger();
1427 	}
1428 #endif
1429 
1430 	uvmexp.traps++;
1431 	if ((p = curproc) == NULL)	/* safety check */
1432 		p = &proc0;
1433 	sticks = p->p_sticks;
1434 	tstate = tf->tf_tstate;
1435 	va = trunc_page(pc);
1436 
1437 	/* Now munch on protections... */
1438 
1439 	access_type = VM_PROT_EXECUTE;
1440 	if (tstate & (PSTATE_PRIV << TSTATE_PSTATE_SHIFT)) {
1441 		extern int trap_trace_dis;
1442 		trap_trace_dis = 1; /* Disable traptrace for printf */
1443 		(void) splhigh();
1444 		printf("text_access_fault: pc=%lx va=%lx\n", pc, va);
1445 		DEBUGGER(type, tf);
1446 		panic("kernel fault");
1447 		/* NOTREACHED */
1448 	} else
1449 		p->p_md.md_tf = tf;
1450 
1451 	vm = p->p_vmspace;
1452 	/* alas! must call the horrible vm code */
1453 	rv = uvm_fault(&vm->vm_map, va, 0, access_type);
1454 
1455 #ifdef DEBUG
1456 	if (trapdebug & (TDB_TXTFLT | TDB_FOLLOW))
1457 		printf("text_access_fault: uvm_fault(%p, %lx, %x) sez %x\n",
1458 		       &vm->vm_map, va, 0, rv);
1459 #endif
1460 	/*
1461 	 * If this was a stack access we keep track of the maximum
1462 	 * accessed stack size.  Also, if uvm_fault gets a protection
1463 	 * failure it is due to accessing the stack region outside
1464 	 * the current limit and we need to reflect that as an access
1465 	 * error.
1466 	 */
1467 	if ((caddr_t)va >= vm->vm_maxsaddr) {
1468 		if (rv == 0) {
1469 			segsz_t nss = btoc(p->p_vmspace->vm_minsaddr - va);
1470 			if (nss > vm->vm_ssize)
1471 				vm->vm_ssize = nss;
1472 		} else if (rv == EACCES)
1473 			rv = EFAULT;
1474 	}
1475 	if (rv != 0) {
1476 
1477 		/*
1478 		 * Pagein failed. Any other page fault in kernel, die; if user
1479 		 * fault, deliver SIGSEGV.
1480 		 */
1481 		if (tstate & TSTATE_PRIV) {
1482 			extern int trap_trace_dis;
1483 			trap_trace_dis = 1; /* Disable traptrace for printf */
1484 			(void) splhigh();
1485 			printf("text fault: pc=%llx\n", (unsigned long long)pc);
1486 			DEBUGGER(type, tf);
1487 			panic("kernel fault");
1488 			/* NOTREACHED */
1489 		}
1490 #ifdef DEBUG
1491 		if (trapdebug & (TDB_TXTFLT | TDB_STOPSIG)) {
1492 			extern int trap_trace_dis;
1493 			trap_trace_dis = 1;
1494 			printf("text_access_fault at %p: sending SIGSEGV\n",
1495 			    (void *)(u_long)va);
1496 			Debugger();
1497 		}
1498 #endif
1499 		trapsignal(p, SIGSEGV, (u_long)pc);
1500 	}
1501 	if ((tstate & TSTATE_PRIV) == 0) {
1502 		userret(p, pc, sticks);
1503 		share_fpu(p, tf);
1504 	}
1505 #ifdef DEBUG
1506 	if (trapdebug & (TDB_TXTFLT | TDB_FOLLOW)) {
1507 		printf("text_access_fault: done\n");
1508 		/* kdb_trap(T_BREAKPOINT, tf); */
1509 	}
1510 	if (trapdebug & TDB_FRAME) {
1511 		print_trapframe(tf);
1512 	}
1513 #endif
1514 }
1515 
1516 
1517 /*
1518  * This routine handles deferred errors caused by the memory
1519  * or I/O bus subsystems.  Most of these are fatal, and even
1520  * if they are not, recovery is painful.  Also, the TPC and
1521  * TNPC values are probably not valid if we're not doing a
1522  * special PEEK/POKE code sequence.
1523  */
1524 void
1525 text_access_error(tf, type, pc, sfsr, afva, afsr)
1526 	struct trapframe64 *tf;
1527 	unsigned type;
1528 	vaddr_t pc;
1529 	u_long sfsr;
1530 	vaddr_t afva;
1531 	u_long afsr;
1532 {
1533 	int64_t tstate;
1534 	struct proc *p;
1535 	struct vmspace *vm;
1536 	vaddr_t va;
1537 	int rv;
1538 	vm_prot_t access_type;
1539 	u_quad_t sticks;
1540 #ifdef DEBUG
1541 	static int lastdouble;
1542 #endif
1543 	char buf[768];
1544 
1545 #ifdef DEBUG
1546 	if (tf->tf_pc == tf->tf_npc) {
1547 		printf("text_access_error: tpc %p == tnpc %p\n",
1548 		    (void *)(u_long)tf->tf_pc, (void *)(u_long)tf->tf_npc);
1549 		Debugger();
1550 	}
1551 	write_user_windows();
1552 	if ((trapdebug & TDB_NSAVED && cpcb->pcb_nsaved) ||
1553 	    trapdebug & (TDB_TXTFLT | TDB_FOLLOW)) {
1554 		bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1555 		printf("%ld text_access_error(%lx, %lx, %lx, %p)=%lx @ %lx %s\n",
1556 		       (long)(curproc?curproc->p_pid:-1),
1557 		       (long)type, pc, (long)afva, tf, (long)tf->tf_tstate,
1558 		       (long)tf->tf_pc, buf);
1559 	}
1560 	if (trapdebug & TDB_FRAME) {
1561 		print_trapframe(tf);
1562 	}
1563 	if ((trapdebug & TDB_TL) && tl()) {
1564 		bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1565 		printf("%ld tl %ld text_access_error(%lx, %lx, %lx, %p)=%lx @ %lx %s\n",
1566 		       (long)(curproc?curproc->p_pid:-1), (long)tl(),
1567 		       (long)type, (long)pc, (long)afva, tf,
1568 		       (long)tf->tf_tstate, (long)tf->tf_pc, buf);
1569 		Debugger();
1570 	}
1571 	if (trapdebug & TDB_STOPCALL) {
1572 		Debugger();
1573 	}
1574 #endif
1575 	uvmexp.traps++;
1576 	if ((p = curproc) == NULL)	/* safety check */
1577 		p = &proc0;
1578 	sticks = p->p_sticks;
1579 
1580 	tstate = tf->tf_tstate;
1581 
1582 	if ((afsr) != 0) {
1583 		extern int trap_trace_dis;
1584 
1585 		trap_trace_dis++; /* Disable traptrace for printf */
1586 		printf("text_access_error: memory error...\n");
1587 		printf("text memory error type %d sfsr=%lx sfva=%lx afsr=%lx afva=%lx tf=%p\n",
1588 		       type, sfsr, pc, afsr, afva, tf);
1589 		trap_trace_dis--; /* Reenable traptrace for printf */
1590 
1591 		if (tstate & (PSTATE_PRIV<<TSTATE_PSTATE_SHIFT))
1592 			panic("text_access_error: kernel memory error");
1593 
1594 		/* User fault -- Berr */
1595 		trapsignal(p, SIGBUS, (u_long)pc);
1596 	}
1597 
1598 	if ((sfsr & SFSR_FV) == 0 || (sfsr & SFSR_FT) == 0)
1599 		goto out;	/* No fault. Why were we called? */
1600 
1601 	va = trunc_page(pc);
1602 
1603 #ifdef DEBUG
1604 	if (lastdouble) {
1605 		printf("stacked text error @ pc %lx; sfsr %lx", pc, sfsr);
1606 		lastdouble = 0;
1607 		if (curproc == NULL)
1608 			printf("NULL proc\n");
1609 		else
1610 			printf("pid %d(%s); sigmask %x, sigcatch %x\n",
1611 			       curproc->p_pid, curproc->p_comm,
1612 				/* XXX */
1613 			       curproc->p_sigctx.ps_sigmask.__bits[0],
1614 			       curproc->p_sigctx.ps_sigcatch.__bits[0]);
1615 	}
1616 #endif
1617 	/* Now munch on protections... */
1618 
1619 	access_type = VM_PROT_EXECUTE;
1620 	if (tstate & (PSTATE_PRIV << TSTATE_PSTATE_SHIFT)) {
1621 		extern int trap_trace_dis;
1622 		trap_trace_dis = 1; /* Disable traptrace for printf */
1623 		bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1624 		(void) splhigh();
1625 		printf("text error: pc=%lx sfsr=%s\n", pc, buf);
1626 		DEBUGGER(type, tf);
1627 		panic("kernel fault");
1628 		/* NOTREACHED */
1629 	} else
1630 		p->p_md.md_tf = tf;
1631 
1632 	vm = p->p_vmspace;
1633 	/* alas! must call the horrible vm code */
1634 	rv = uvm_fault(&vm->vm_map, va, 0, access_type);
1635 
1636 	/*
1637 	 * If this was a stack access we keep track of the maximum
1638 	 * accessed stack size.  Also, if uvm_fault gets a protection
1639 	 * failure it is due to accessing the stack region outside
1640 	 * the current limit and we need to reflect that as an access
1641 	 * error.
1642 	 */
1643 	if ((caddr_t)va >= vm->vm_maxsaddr) {
1644 		if (rv == 0) {
1645 			segsz_t nss = btoc(p->p_vmspace->vm_minsaddr - va);
1646 			if (nss > vm->vm_ssize)
1647 				vm->vm_ssize = nss;
1648 		} else if (rv == EACCES)
1649 			rv = EFAULT;
1650 	}
1651 	if (rv != 0) {
1652 		/*
1653 		 * Pagein failed.  If doing copyin/out, return to onfault
1654 		 * address.  Any other page fault in kernel, die; if user
1655 		 * fault, deliver SIGSEGV.
1656 		 */
1657 		if (tstate & TSTATE_PRIV) {
1658 			extern int trap_trace_dis;
1659 			trap_trace_dis = 1; /* Disable traptrace for printf */
1660 			bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1661 			(void) splhigh();
1662 			printf("text error: pc=%lx sfsr=%s\n", pc, buf);
1663 			DEBUGGER(type, tf);
1664 			panic("kernel fault");
1665 			/* NOTREACHED */
1666 		}
1667 #ifdef DEBUG
1668 		if (trapdebug & (TDB_TXTFLT | TDB_STOPSIG)) {
1669 			extern int trap_trace_dis;
1670 			trap_trace_dis = 1;
1671 			printf("text_access_error at %p: sending SIGSEGV\n",
1672 			    (void *)(u_long)va);
1673 			Debugger();
1674 		}
1675 #endif
1676 		trapsignal(p, SIGSEGV, (u_long)pc);
1677 	}
1678 out:
1679 	if ((tstate & TSTATE_PRIV) == 0) {
1680 		userret(p, pc, sticks);
1681 		share_fpu(p, tf);
1682 	}
1683 #ifdef DEBUG
1684 	if (trapdebug & (TDB_TXTFLT | TDB_FOLLOW))
1685 		printf("text_access_error: done\n");
1686 	if (trapdebug & TDB_FRAME) {
1687 		print_trapframe(tf);
1688 	}
1689 #endif
1690 }
1691 
1692 /*
1693  * System calls.  `pc' is just a copy of tf->tf_pc.
1694  *
1695  * Note that the things labelled `out' registers in the trapframe were the
1696  * `in' registers within the syscall trap code (because of the automatic
1697  * `save' effect of each trap).  They are, however, the %o registers of the
1698  * thing that made the system call, and are named that way here.
1699  *
1700  * 32-bit system calls on a 64-bit system are a problem.  Each system call
1701  * argument is stored in the smaller of the argument's true size or a
1702  * `register_t'.  Now on a 64-bit machine all normal types can be stored in a
1703  * `register_t'.  (The only exceptions would be 128-bit `quad's or 128-bit
1704  * extended precision floating point values, which we don't support.)  For
1705  * 32-bit syscalls, 64-bit integers like `off_t's, double precision floating
1706  * point values, and several other types cannot fit in a 32-bit `register_t'.
1707  * These will require reading in two `register_t' values for one argument.
1708  *
1709  * In order to calculate the true size of the arguments and therefore whether
1710  * any argument needs to be split into two slots, the system call args
1711  * structure needs to be built with the appropriately sized register_t.
1712  * Otherwise the emul needs to do some magic to split oversized arguments.
1713  *
1714  * We can handle most this stuff for normal syscalls by using either a 32-bit
1715  * or 64-bit array of `register_t' arguments.  Unfortunately ktrace always
1716  * expects arguments to be `register_t's, so it loses badly.  What's worse,
1717  * ktrace may need to do size translations to massage the argument array
1718  * appropriately according to the emulation that is doing the ktrace.
1719  *
1720  */
1721 void
1722 syscall(tf, code, pc)
1723 	register_t code;
1724 	struct trapframe64 *tf;
1725 	register_t pc;
1726 {
1727 	int i, nsys, nap;
1728 	int64_t *ap;
1729 	const struct sysent *callp;
1730 	struct proc *p;
1731 	int error = 0, new;
1732 	union args {
1733 		register32_t i[8];
1734 		register64_t l[8];
1735 	} args;
1736 	register_t rval[2];
1737 	u_quad_t sticks;
1738 #ifdef DIAGNOSTIC
1739 	extern struct pcb *cpcb;
1740 #endif
1741 
1742 #ifdef DEBUG
1743 	write_user_windows();
1744 	if (tf->tf_pc == tf->tf_npc) {
1745 		printf("syscall: tpc %p == tnpc %p\n", (void *)(u_long)tf->tf_pc,
1746 		    (void *)(u_long)tf->tf_npc);
1747 		Debugger();
1748 	}
1749 	if ((trapdebug & TDB_NSAVED && cpcb->pcb_nsaved) ||
1750 	    trapdebug & (TDB_SYSCALL | TDB_FOLLOW))
1751 		printf("%d syscall(%lx, %p, %lx)\n",
1752 		       curproc ? curproc->p_pid : -1, (u_long)code, tf,
1753 		       (u_long)pc);
1754 	if (trapdebug & TDB_FRAME) {
1755 		print_trapframe(tf);
1756 	}
1757 	if ((trapdebug & TDB_TL) && tl()) {
1758 		printf("%d tl %d syscall(%lx, %p, %lx)\n",
1759 		       curproc ? curproc->p_pid : -1, tl(), (u_long)code, tf,
1760 		       (u_long)pc);
1761 		Debugger();
1762 	}
1763 #endif
1764 
1765 	uvmexp.syscalls++;
1766 	p = curproc;
1767 #ifdef DIAGNOSTIC
1768 	if (tf->tf_tstate & TSTATE_PRIV)
1769 		panic("syscall from kernel");
1770 	if (cpcb != &p->p_addr->u_pcb)
1771 		panic("syscall: cpcb/ppcb mismatch");
1772 	if (tf != (struct trapframe64 *)((caddr_t)cpcb + USPACE) - 1)
1773 		panic("syscall: trapframe");
1774 #endif
1775 	sticks = p->p_sticks;
1776 	p->p_md.md_tf = tf;
1777 	new = code & (SYSCALL_G7RFLAG | SYSCALL_G2RFLAG);
1778 	code &= ~(SYSCALL_G7RFLAG | SYSCALL_G2RFLAG);
1779 
1780 	callp = p->p_emul->e_sysent;
1781 	nsys = p->p_emul->e_nsysent;
1782 
1783 	/*
1784 	 * The first six system call arguments are in the six %o registers.
1785 	 * Any arguments beyond that are in the `argument extension' area
1786 	 * of the user's stack frame (see <machine/frame.h>).
1787 	 *
1788 	 * Check for ``special'' codes that alter this, namely syscall and
1789 	 * __syscall.  The latter takes a quad syscall number, so that other
1790 	 * arguments are at their natural alignments.  Adjust the number
1791 	 * of ``easy'' arguments as appropriate; we will copy the hard
1792 	 * ones later as needed.
1793 	 */
1794 	ap = &tf->tf_out[0];
1795 	nap = 6;
1796 
1797 	switch (code) {
1798 	case SYS_syscall:
1799 		code = *ap++;
1800 		nap--;
1801 		break;
1802 	case SYS___syscall:
1803 		if (code < nsys &&
1804 		    callp[code].sy_call != callp[p->p_emul->e_nosys].sy_call)
1805 			break; /* valid system call */
1806 		if (tf->tf_out[6] & 1L) {
1807 			/* longs *are* quadwords */
1808 			code = ap[0];
1809 			ap += 1;
1810 			nap -= 1;
1811 		} else {
1812 			code = ap[_QUAD_LOWWORD];
1813 			ap += 2;
1814 			nap -= 2;
1815 		}
1816 		break;
1817 	}
1818 
1819 #ifdef DEBUG
1820 	if (trapdebug & (TDB_SYSCALL | TDB_FOLLOW))
1821 		printf("%d syscall(%d[%x]): tstate=%x:%x %s\n",
1822 		       curproc?curproc->p_pid:-1, (int)code, (u_int)code,
1823 		       (int)(tf->tf_tstate>>32), (int)(tf->tf_tstate),
1824 		       (p->p_emul->e_syscallnames) ?
1825 		       ((code < 0 || code >= nsys) ?
1826 			"illegal syscall" :
1827 			p->p_emul->e_syscallnames[code]) :
1828 		       "unknown syscall");
1829 	if (p->p_emul->e_syscallnames)
1830 		p->p_addr->u_pcb.lastcall =
1831 			((code < 0 || code >= nsys) ?
1832 			 "illegal syscall" :
1833 			 p->p_emul->e_syscallnames[code]);
1834 #endif
1835 	if (code < 0 || code >= nsys)
1836 		callp += p->p_emul->e_nosys;
1837 	else if (tf->tf_out[6] & 1L) {
1838 		register64_t *argp;
1839 #ifdef DEBUG
1840 #ifdef __arch64__
1841 		if ((curproc->p_flag & P_32) != 0) {
1842 			printf("syscall(): 64-bit stack but P_32 set\n");
1843 			Debugger();
1844 		}
1845 #else
1846 		printf("syscall(): 64-bit stack on a 32-bit kernel????\n");
1847 		Debugger();
1848 #endif
1849 #endif
1850 		/* 64-bit stack -- not really supported on 32-bit kernels */
1851 		callp += code;
1852 		i = callp->sy_narg; /* Why divide? */
1853 #ifdef DEBUG
1854 		if (i != (long)callp->sy_argsize / sizeof(register64_t))
1855 			printf("syscall %s: narg=%hd, argsize=%hd, call=%p, "
1856 			       "argsz/reg64=%ld\n",
1857 			       p->p_emul->e_syscallnames ?
1858 			       ((code < 0 || code >= nsys) ?
1859 				"illegal syscall" :
1860 				p->p_emul->e_syscallnames[code])
1861 			       : "unknown syscall",
1862 			       callp->sy_narg, callp->sy_argsize,
1863 			       callp->sy_call,
1864 			       (long)callp->sy_argsize / sizeof(register64_t));
1865 #endif
1866 		if (i > nap) {	/* usually false */
1867 #ifdef DEBUG
1868 			if (trapdebug & (TDB_SYSCALL | TDB_FOLLOW) || i > 8) {
1869 				printf("Args64 %d>%d -- need to copyin\n",
1870 				       i, nap);
1871 			}
1872 #endif
1873 			if (i > 8)
1874 				panic("syscall nargs");
1875 			/* Read the whole block in */
1876 			error = copyin((caddr_t)(u_long)tf->tf_out[6] + BIAS +
1877 				       offsetof(struct frame64, fr_argx),
1878 				       &args.l[nap],
1879 				       (i - nap) * sizeof(register64_t));
1880 			i = nap;
1881 		}
1882 		/* It should be faster to do <=6 longword copies than call bcopy */
1883 		for (argp = &args.l[0]; i--;)
1884 			*argp++ = *ap++;
1885 
1886 #ifdef KTRACE
1887 		if (KTRPOINT(p, KTR_SYSCALL))
1888 			ktrsyscall(p, code, code, (register_t *)args.l);
1889 #endif
1890 		if (error)
1891 			goto bad;
1892 #ifdef DEBUG
1893 		if (trapdebug & (TDB_SYSCALL | TDB_FOLLOW)) {
1894 			for (i = 0; i < callp->sy_narg; i++)
1895 				printf("arg[%d]=%lx ", i, (long)(args.l[i]));
1896 			printf("\n");
1897 		}
1898 		if (trapdebug & TDB_STOPCALL) {
1899 			printf("stop precall\n");
1900 			Debugger();
1901 		}
1902 #endif
1903 	} else {
1904 		register32_t *argp;
1905 		int j = 0;
1906 
1907 		/* 32-bit stack */
1908 		callp += code;
1909 
1910 #if defined(__arch64__) && defined(DEBUG)
1911 		if ((curproc->p_flag & P_32) == 0) {
1912 			printf("syscall(): 32-bit stack but no P_32\n");
1913 			Debugger();
1914 		}
1915 #endif
1916 
1917 		i = (long)callp->sy_argsize / sizeof(register32_t);
1918 		if (i > nap) {	/* usually false */
1919 			register32_t temp[6];
1920 #ifdef DEBUG
1921 			if (trapdebug & (TDB_SYSCALL | TDB_FOLLOW) || i > 8)
1922 				printf("Args %d>%d -- need to copyin\n",
1923 				       i, nap);
1924 #endif
1925 			if (i > 8)
1926 				panic("syscall nargs");
1927 			/* Read the whole block in */
1928 			error = copyin((caddr_t)(u_long)(tf->tf_out[6] +
1929 					offsetof(struct frame32, fr_argx)),
1930 				       &temp, (i - nap) * sizeof(register32_t));
1931 			/* Copy each to the argument array */
1932 			for (j = 0; nap + j < i; j++)
1933 				args.i[nap+j] = temp[j];
1934 #ifdef DEBUG
1935 			if (trapdebug & (TDB_SYSCALL | TDB_FOLLOW)) {
1936 				int k;
1937 				printf("Copyin args of %d from %p:\n", j,
1938 				       (caddr_t)(u_long)(tf->tf_out[6] +
1939 					offsetof(struct frame32, fr_argx)));
1940 				for (k = 0; k < j; k++)
1941 					printf("arg %d = %p at %d val %p\n",
1942 					    k, (void *)(u_long)temp[k], nap + k,
1943 					    (void *)(u_long)args.i[nap + k]);
1944 			}
1945 #endif
1946 			i = nap;
1947 		}
1948 		/* Need to convert from int64 to int32 or we lose */
1949 		for (argp = &args.i[0]; i--;)
1950 			*argp++ = *ap++;
1951 
1952 #ifdef KTRACE
1953 		if (KTRPOINT(p, KTR_SYSCALL)) {
1954 #if defined(__arch64__)
1955 			register_t temp[8];
1956 
1957 			/* Need to xlate 32-bit->64-bit */
1958 			i = callp->sy_narg;
1959 			for (j = 0; j < i; j++)
1960 				temp[j] = args.i[j];
1961 			ktrsyscall(p, code, code, (register_t *)temp);
1962 #else
1963 			ktrsyscall(p, code, code, (register_t *)&args.i);
1964 #endif
1965 		}
1966 #endif /* KTRACE */
1967 		if (error) {
1968 			goto bad;
1969 		}
1970 #ifdef DEBUG
1971 		if (trapdebug & (TDB_SYSCALL | TDB_FOLLOW)) {
1972 			for (i = 0; i < (long)callp->sy_argsize /
1973 				     sizeof(register32_t); i++)
1974 				printf("arg[%d]=%x ", i, (int)(args.i[i]));
1975 			printf("\n");
1976 		}
1977 		if (trapdebug & TDB_STOPCALL) {
1978 			printf("stop precall\n");
1979 			Debugger();
1980 		}
1981 #endif
1982 	}
1983 
1984 	rval[0] = 0;
1985 	rval[1] = tf->tf_out[1];
1986 #ifdef DEBUG
1987 	if (callp->sy_call == sys_nosys) {
1988 		printf("trapdebug: emul %s UNIPL syscall %d:%s\n",
1989 		       p->p_emul->e_name, (int)code,
1990 		       p->p_emul->e_syscallnames ? (
1991 			       (code < 0 || code >= nsys) ?
1992 			       "illegal syscall" :
1993 			       p->p_emul->e_syscallnames[code]) :
1994 		       "unknown syscall");
1995 	}
1996 #endif
1997 	error = (*callp->sy_call)(p, &args, rval);
1998 
1999 	switch (error) {
2000 		vaddr_t dest;
2001 	case 0:
2002 		/* Note: fork() does not return here in the child */
2003 		tf->tf_out[0] = rval[0];
2004 		tf->tf_out[1] = rval[1];
2005 		if (new) {
2006 			/* jmp %g2 (or %g7, deprecated) on success */
2007 			dest = tf->tf_global[new & SYSCALL_G2RFLAG ? 2 : 7];
2008 #ifdef DEBUG
2009 			if (trapdebug & (TDB_SYSCALL | TDB_FOLLOW))
2010 				printf("syscall: return tstate=%llx new "
2011 				       "success to %p retval %lx:%lx\n",
2012 				       (unsigned long long)tf->tf_tstate,
2013 				       (void *)(u_long)dest,
2014 				       (u_long)rval[0], (u_long)rval[1]);
2015 #endif
2016 			if (dest & 3) {
2017 				error = EINVAL;
2018 				goto bad;
2019 			}
2020 		} else {
2021 			/* old system call convention: clear C on success */
2022 			tf->tf_tstate &= ~(((int64_t)(ICC_C | XCC_C)) <<
2023 					   TSTATE_CCR_SHIFT);	/* success */
2024 			dest = tf->tf_npc;
2025 #ifdef DEBUG
2026 			if (trapdebug & (TDB_SYSCALL | TDB_FOLLOW))
2027 				printf("syscall: return tstate=%llx old "
2028 				       "success to %p retval %lx:%lx\n",
2029 				       (unsigned long long)tf->tf_tstate,
2030 				       (void *)(u_long)dest,
2031 				       (u_long)rval[0], (u_long)rval[1]);
2032 			if (trapdebug & (TDB_SYSCALL | TDB_FOLLOW))
2033 				printf("old pc=%p npc=%p dest=%p\n",
2034 				    (void *)(u_long)tf->tf_pc,
2035 				    (void *)(u_long)tf->tf_npc,
2036 				    (void *)(u_long)dest);
2037 #endif
2038 		}
2039 		tf->tf_pc = dest;
2040 		tf->tf_npc = dest + 4;
2041 		break;
2042 
2043 	case ERESTART:
2044 	case EJUSTRETURN:
2045 		/* nothing to do */
2046 		break;
2047 
2048 	default:
2049 	bad:
2050 		if (p->p_emul->e_errno)
2051 			error = p->p_emul->e_errno[error];
2052 		tf->tf_out[0] = error;
2053 		tf->tf_tstate |= (((int64_t)(ICC_C | XCC_C)) <<
2054 				  TSTATE_CCR_SHIFT);	/* fail */
2055 		dest = tf->tf_npc;
2056 		tf->tf_pc = dest;
2057 		tf->tf_npc = dest + 4;
2058 #ifdef DEBUG
2059 		if (trapdebug & (TDB_SYSCALL | TDB_FOLLOW))
2060 			printf("syscall: return tstate=%llx fail %d to %p\n",
2061 			       (unsigned long long)tf->tf_tstate, error,
2062 			       (void *)(long)dest);
2063 #endif
2064 		break;
2065 	}
2066 
2067 
2068 	userret(p, pc, sticks);
2069 #ifdef KTRACE
2070 	if (KTRPOINT(p, KTR_SYSRET))
2071 		ktrsysret(p, code, error, rval[0]);
2072 #endif
2073 	share_fpu(p, tf);
2074 #ifdef DEBUG
2075 	if (trapdebug & (TDB_STOPCALL | TDB_SYSTOP)) {
2076 		Debugger();
2077 	}
2078 #endif
2079 #ifdef DEBUG
2080 	if (trapdebug & TDB_FRAME) {
2081 		print_trapframe(tf);
2082 	}
2083 #endif
2084 }
2085 
2086 /*
2087  * Process the tail end of a fork() for the child.
2088  */
2089 void
2090 child_return(arg)
2091 	void *arg;
2092 {
2093 	struct proc *p = arg;
2094 
2095 	/*
2096 	 * Return values in the frame set by cpu_fork().
2097 	 */
2098 #ifdef NOTDEF_DEBUG
2099 	printf("child_return: proc=%p\n", p);
2100 #endif
2101 	userret(p, p->p_md.md_tf->tf_pc, 0);
2102 #ifdef KTRACE
2103 	if (KTRPOINT(p, KTR_SYSRET))
2104 		ktrsysret(p,
2105 			  (p->p_flag & P_PPWAIT) ? SYS_vfork : SYS_fork, 0, 0);
2106 #endif
2107 }
2108