xref: /netbsd/sys/arch/sparc64/sparc64/trap.c (revision bf9ec67e)
1 /*	$NetBSD: trap.c,v 1.77 2002/05/14 02:34:14 eeh Exp $ */
2 
3 /*
4  * Copyright (c) 1996-2002 Eduardo Horvath.  All rights reserved.
5  * Copyright (c) 1996
6  *	The President and Fellows of Harvard College. All rights reserved.
7  * Copyright (c) 1992, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * This software was developed by the Computer Systems Engineering group
11  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
12  * contributed to Berkeley.
13  *
14  * All advertising materials mentioning features or use of this software
15  * must display the following acknowledgement:
16  *	This product includes software developed by the University of
17  *	California, Lawrence Berkeley Laboratory.
18  *	This product includes software developed by Harvard University.
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  * 1. Redistributions of source code must retain the above copyright
24  *    notice, this list of conditions and the following disclaimer.
25  * 2. Redistributions in binary form must reproduce the above copyright
26  *    notice, this list of conditions and the following disclaimer in the
27  *    documentation and/or other materials provided with the distribution.
28  * 3. All advertising materials mentioning features or use of this software
29  *    must display the following acknowledgement:
30  *	This product includes software developed by the University of
31  *	California, Berkeley and its contributors.
32  *	This product includes software developed by Harvard University.
33  * 4. Neither the name of the University nor the names of its contributors
34  *    may be used to endorse or promote products derived from this software
35  *    without specific prior written permission.
36  *
37  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
38  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
41  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  *
49  *	@(#)trap.c	8.4 (Berkeley) 9/23/93
50  */
51 
52 #define NEW_FPSTATE
53 
54 #include "opt_ddb.h"
55 #include "opt_syscall_debug.h"
56 #include "opt_ktrace.h"
57 #include "opt_compat_svr4.h"
58 #include "opt_compat_netbsd32.h"
59 
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/proc.h>
63 #include <sys/user.h>
64 #include <sys/kernel.h>
65 #include <sys/malloc.h>
66 #include <sys/resource.h>
67 #include <sys/signal.h>
68 #include <sys/wait.h>
69 #include <sys/syscall.h>
70 #include <sys/syslog.h>
71 #ifdef KTRACE
72 #include <sys/ktrace.h>
73 #endif
74 
75 #include <uvm/uvm_extern.h>
76 
77 #include <machine/cpu.h>
78 #include <machine/ctlreg.h>
79 #include <machine/trap.h>
80 #include <machine/instr.h>
81 #include <machine/pmap.h>
82 
83 #ifdef DDB
84 #include <machine/db_machdep.h>
85 #else
86 #include <machine/frame.h>
87 #endif
88 #ifdef COMPAT_SVR4
89 #include <machine/svr4_machdep.h>
90 #endif
91 #ifdef COMPAT_SVR4_32
92 #include <machine/svr4_32_machdep.h>
93 #endif
94 
95 #include <sparc/fpu/fpu_extern.h>
96 #include <sparc64/sparc64/cache.h>
97 
98 #ifndef offsetof
99 #define	offsetof(s, f) ((int)&((s *)0)->f)
100 #endif
101 
102 #ifdef DEBUG
103 /* What trap level are we running? */
104 #define tl() ({ \
105 	int l; \
106 	__asm __volatile("rdpr %%tl, %0" : "=r" (l) :); \
107 	l; \
108 })
109 #endif
110 
111 /* trapstats */
112 int trapstats = 0;
113 int protfix = 0;
114 int udmiss = 0;	/* Number of normal/nucleus data/text miss/protection faults */
115 int udhit = 0;
116 int udprot = 0;
117 int utmiss = 0;
118 int kdmiss = 0;
119 int kdhit = 0;
120 int kdprot = 0;
121 int ktmiss = 0;
122 int iveccnt = 0; /* number if normal/nucleus interrupt/interrupt vector faults */
123 int uintrcnt = 0;
124 int kiveccnt = 0;
125 int kintrcnt = 0;
126 int intristk = 0; /* interrupts when already on intrstack */
127 int intrpoll = 0; /* interrupts not using vector lists */
128 int wfill = 0;
129 int kwfill = 0;
130 int wspill = 0;
131 int wspillskip = 0;
132 int rftucnt = 0;
133 int rftuld = 0;
134 int rftudone = 0;
135 int rftkcnt[5] = { 0, 0, 0, 0, 0 };
136 
137 #ifdef DEBUG
138 #define RW_64		0x1
139 #define RW_ERR		0x2
140 #define RW_FOLLOW	0x4
141 int	rwindow_debug = RW_ERR;
142 #define TDB_ADDFLT	0x1
143 #define TDB_TXTFLT	0x2
144 #define TDB_TRAP	0x4
145 #define TDB_SYSCALL	0x8
146 #define TDB_FOLLOW	0x10
147 #define TDB_FRAME	0x20
148 #define TDB_NSAVED	0x40
149 #define TDB_TL		0x80
150 #define TDB_STOPSIG	0x100
151 #define TDB_STOPCALL	0x200
152 #define TDB_STOPCPIO	0x400
153 #define TDB_SYSTOP	0x800
154 int	trapdebug = 0/*|TDB_SYSCALL|TDB_STOPSIG|TDB_STOPCPIO|TDB_ADDFLT|TDB_FOLLOW*/;
155 /* #define __inline */
156 #endif
157 
158 #ifdef DDB
159 #if 1
160 #define DEBUGGER(t,f)	do { kdb_trap(t,f); } while (0)
161 #else
162 #define DEBUGGER(t,f)	Debugger()
163 #endif
164 #else
165 #define DEBUGGER(t,f)
166 #define Debugger()
167 #endif
168 
169 /*
170  * Initial FPU state is all registers == all 1s, everything else == all 0s.
171  * This makes every floating point register a signalling NaN, with sign bit
172  * set, no matter how it is interpreted.  Appendix N of the Sparc V8 document
173  * seems to imply that we should do this, and it does make sense.
174  */
175 __asm(".align 64");
176 struct	fpstate64 initfpstate = {
177 	{ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
178 	  ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
179 	  ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0,
180 	  ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }
181 };
182 
183 /*
184  * There are more than 100 trap types, but most are unused.
185  *
186  * Trap type 0 is taken over as an `Asynchronous System Trap'.
187  * This is left-over Vax emulation crap that should be fixed.
188  *
189  * Traps not supported on the spitfire are marked with `*',
190  * and additions are marked with `+'
191  */
192 static const char T[] = "*trap";
193 const char *trap_type[] = {
194 	/* non-user vectors */
195 	"ast",			/* 0 */
196 	"power on reset",	/* 1 */
197 	"watchdog reset",	/* 2 */
198 	"externally initiated reset",/*3 */
199 	"software initiated reset",/* 4 */
200 	"RED state exception",	/* 5 */
201 	T, T,			/* 6..7 */
202 	"instruction access exception",	/* 8 */
203 	"*instruction MMU miss",/* 9 */
204 	"instruction access error",/* 0a */
205 	T, T, T, T, T,		/* 0b..0f */
206 	"illegal instruction",	/* 10 */
207 	"privileged opcode",	/* 11 */
208 	"*unimplemented LDD",	/* 12 */
209 	"*unimplemented STD",	/* 13 */
210 	T, T, T, T,		/* 14..17 */
211 	T, T, T, T, T, T, T, T, /* 18..1f */
212 	"fp disabled",		/* 20 */
213 	"fp exception ieee 754",/* 21 */
214 	"fp exception other",	/* 22 */
215 	"tag overflow",		/* 23 */
216 	"clean window",		/* 24 */
217 	T, T, T,		/* 25..27 -- trap continues */
218 	"division by zero",	/* 28 */
219 	"*internal processor error",/* 29 */
220 	T, T, T, T, T, T,	/* 2a..2f */
221 	"data access exception",/* 30 */
222 	"*data access MMU miss",/* 31 */
223 	"data access error",	/* 32 */
224 	"*data access protection",/* 33 */
225 	"mem address not aligned",	/* 34 */
226 	"LDDF mem address not aligned",/* 35 */
227 	"STDF mem address not aligned",/* 36 */
228 	"privileged action",	/* 37 */
229 	"LDQF mem address not aligned",/* 38 */
230 	"STQF mem address not aligned",/* 39 */
231 	T, T, T, T, T, T,	/* 3a..3f */
232 	"*async data error",	/* 40 */
233 	"level 1 int",		/* 41 */
234 	"level 2 int",		/* 42 */
235 	"level 3 int",		/* 43 */
236 	"level 4 int",		/* 44 */
237 	"level 5 int",		/* 45 */
238 	"level 6 int",		/* 46 */
239 	"level 7 int",		/* 47 */
240 	"level 8 int",		/* 48 */
241 	"level 9 int",		/* 49 */
242 	"level 10 int",		/* 4a */
243 	"level 11 int",		/* 4b */
244 	"level 12 int",		/* 4c */
245 	"level 13 int",		/* 4d */
246 	"level 14 int",		/* 4e */
247 	"level 15 int",		/* 4f */
248 	T, T, T, T, T, T, T, T, /* 50..57 */
249 	T, T, T, T, T, T, T, T, /* 58..5f */
250 	"+interrupt vector",	/* 60 */
251 	"+PA_watchpoint",	/* 61 */
252 	"+VA_watchpoint",	/* 62 */
253 	"+corrected ECC error",	/* 63 */
254 	"+fast instruction access MMU miss",/* 64 */
255 	T, T, T,		/* 65..67 -- trap continues */
256 	"+fast data access MMU miss",/* 68 */
257 	T, T, T,		/* 69..6b -- trap continues */
258 	"+fast data access protection",/* 6c */
259 	T, T, T,		/* 6d..6f -- trap continues */
260 	T, T, T, T, T, T, T, T, /* 70..77 */
261 	T, T, T, T, T, T, T, T, /* 78..7f */
262 	"spill 0 normal",	/* 80 */
263 	T, T, T,		/* 81..83 -- trap continues */
264 	"spill 1 normal",	/* 84 */
265 	T, T, T,		/* 85..87 -- trap continues */
266 	"spill 2 normal",	/* 88 */
267 	T, T, T,		/* 89..8b -- trap continues */
268 	"spill 3 normal",	/* 8c */
269 	T, T, T,		/* 8d..8f -- trap continues */
270 	"spill 4 normal",	/* 90 */
271 	T, T, T,		/* 91..93 -- trap continues */
272 	"spill 5 normal",	/* 94 */
273 	T, T, T,		/* 95..97 -- trap continues */
274 	"spill 6 normal",	/* 98 */
275 	T, T, T,		/* 99..9b -- trap continues */
276 	"spill 7 normal",	/* 9c */
277 	T, T, T,		/* 9c..9f -- trap continues */
278 	"spill 0 other",	/* a0 */
279 	T, T, T,		/* a1..a3 -- trap continues */
280 	"spill 1 other",	/* a4 */
281 	T, T, T,		/* a5..a7 -- trap continues */
282 	"spill 2 other",	/* a8 */
283 	T, T, T,		/* a9..ab -- trap continues */
284 	"spill 3 other",	/* ac */
285 	T, T, T,		/* ad..af -- trap continues */
286 	"spill 4 other",	/* b0 */
287 	T, T, T,		/* b1..b3 -- trap continues */
288 	"spill 5 other",	/* b4 */
289 	T, T, T,		/* b5..b7 -- trap continues */
290 	"spill 6 other",	/* b8 */
291 	T, T, T,		/* b9..bb -- trap continues */
292 	"spill 7 other",	/* bc */
293 	T, T, T,		/* bc..bf -- trap continues */
294 	"fill 0 normal",	/* c0 */
295 	T, T, T,		/* c1..c3 -- trap continues */
296 	"fill 1 normal",	/* c4 */
297 	T, T, T,		/* c5..c7 -- trap continues */
298 	"fill 2 normal",	/* c8 */
299 	T, T, T,		/* c9..cb -- trap continues */
300 	"fill 3 normal",	/* cc */
301 	T, T, T,		/* cd..cf -- trap continues */
302 	"fill 4 normal",	/* d0 */
303 	T, T, T,		/* d1..d3 -- trap continues */
304 	"fill 5 normal",	/* d4 */
305 	T, T, T,		/* d5..d7 -- trap continues */
306 	"fill 6 normal",	/* d8 */
307 	T, T, T,		/* d9..db -- trap continues */
308 	"fill 7 normal",	/* dc */
309 	T, T, T,		/* dc..df -- trap continues */
310 	"fill 0 other",		/* e0 */
311 	T, T, T,		/* e1..e3 -- trap continues */
312 	"fill 1 other",		/* e4 */
313 	T, T, T,		/* e5..e7 -- trap continues */
314 	"fill 2 other",		/* e8 */
315 	T, T, T,		/* e9..eb -- trap continues */
316 	"fill 3 other",		/* ec */
317 	T, T, T,		/* ed..ef -- trap continues */
318 	"fill 4 other",		/* f0 */
319 	T, T, T,		/* f1..f3 -- trap continues */
320 	"fill 5 other",		/* f4 */
321 	T, T, T,		/* f5..f7 -- trap continues */
322 	"fill 6 other",		/* f8 */
323 	T, T, T,		/* f9..fb -- trap continues */
324 	"fill 7 other",		/* fc */
325 	T, T, T,		/* fc..ff -- trap continues */
326 
327 	/* user (software trap) vectors */
328 	"syscall",		/* 100 */
329 	"breakpoint",		/* 101 */
330 	"zero divide",		/* 102 */
331 	"flush windows",	/* 103 */
332 	"clean windows",	/* 104 */
333 	"range check",		/* 105 */
334 	"fix align",		/* 106 */
335 	"integer overflow",	/* 107 */
336 	"svr4 syscall",		/* 108 */
337 	"4.4 syscall",		/* 109 */
338 	"kgdb exec",		/* 10a */
339 	T, T, T, T, T,		/* 10b..10f */
340 	T, T, T, T, T, T, T, T,	/* 11a..117 */
341 	T, T, T, T, T, T, T, T,	/* 118..11f */
342 	"svr4 getcc",		/* 120 */
343 	"svr4 setcc",		/* 121 */
344 	"svr4 getpsr",		/* 122 */
345 	"svr4 setpsr",		/* 123 */
346 	"svr4 gethrtime",	/* 124 */
347 	"svr4 gethrvtime",	/* 125 */
348 	T,			/* 126 */
349 	"svr4 gethrestime",	/* 127 */
350 	T, T, T, T, T, T, T, T, /* 128..12f */
351 	T, T,			/* 130..131 */
352 	"get condition codes",	/* 132 */
353 	"set condision codes",	/* 133 */
354 	T, T, T, T,		/* 134..137 */
355 	T, T, T, T, T, T, T, T, /* 138..13f */
356 	T, T, T, T, T, T, T, T, /* 140..147 */
357 	T, T, T, T, T, T, T, T, /* 148..14f */
358 	T, T, T, T, T, T, T, T, /* 150..157 */
359 	T, T, T, T, T, T, T, T, /* 158..15f */
360 	T, T, T, T,		/* 160..163 */
361 	"SVID syscall64",	/* 164 */
362 	"SPARC Intl syscall64",	/* 165 */
363 	"OS vedor spec syscall",/* 166 */
364 	"HW OEM syscall",	/* 167 */
365 	"ret from deferred trap",	/* 168 */
366 };
367 
368 #define	N_TRAP_TYPES	(sizeof trap_type / sizeof *trap_type)
369 
370 static __inline void userret __P((struct proc *, int,  u_quad_t));
371 static __inline void share_fpu __P((struct proc *, struct trapframe64 *));
372 
373 void trap __P((struct trapframe64 *tf, unsigned type, vaddr_t pc, long tstate));
374 void data_access_fault __P((struct trapframe64 *tf, unsigned type, vaddr_t pc,
375 	vaddr_t va, vaddr_t sfva, u_long sfsr));
376 void data_access_error __P((struct trapframe64 *tf, unsigned type,
377 	vaddr_t afva, u_long afsr, vaddr_t sfva, u_long sfsr));
378 void text_access_fault __P((struct trapframe64 *tf, unsigned type,
379 	vaddr_t pc, u_long sfsr));
380 void text_access_error __P((struct trapframe64 *tf, unsigned type,
381 	vaddr_t pc, u_long sfsr, vaddr_t afva, u_long afsr));
382 void syscall __P((struct trapframe64 *, register_t code, register_t pc));
383 
384 #ifdef DEBUG
385 void print_trapframe __P((struct trapframe64 *));
386 void
387 print_trapframe(tf)
388 	struct trapframe64 *tf;
389 {
390 
391 	printf("Trapframe %p:\ttstate: %lx\tpc: %lx\tnpc: %lx\n",
392 	       tf, (u_long)tf->tf_tstate, (u_long)tf->tf_pc, (u_long)tf->tf_npc);
393 	printf("fault: %p\ty: %x\t",
394 	       (void *)(u_long)tf->tf_fault, (int)tf->tf_y);
395 	printf("pil: %d\toldpil: %d\ttt: %x\tGlobals:\n",
396 	       (int)tf->tf_pil, (int)tf->tf_oldpil, (int)tf->tf_tt);
397 	printf("%08x%08x %08x%08x %08x%08x %08x%08x\n",
398 	       (u_int)(tf->tf_global[0]>>32), (u_int)tf->tf_global[0],
399 	       (u_int)(tf->tf_global[1]>>32), (u_int)tf->tf_global[1],
400 	       (u_int)(tf->tf_global[2]>>32), (u_int)tf->tf_global[2],
401 	       (u_int)(tf->tf_global[3]>>32), (u_int)tf->tf_global[3]);
402 	printf("%08x%08x %08x%08x %08x%08x %08x%08x\nouts:\n",
403 	       (u_int)(tf->tf_global[4]>>32), (u_int)tf->tf_global[4],
404 	       (u_int)(tf->tf_global[5]>>32), (u_int)tf->tf_global[5],
405 	       (u_int)(tf->tf_global[6]>>32), (u_int)tf->tf_global[6],
406 	       (u_int)(tf->tf_global[7]>>32), (u_int)tf->tf_global[7]);
407 #ifdef DEBUG
408 	printf("%08x%08x %08x%08x %08x%08x %08x%08x\n",
409 	       (u_int)(tf->tf_out[0]>>32), (u_int)tf->tf_out[0],
410 	       (u_int)(tf->tf_out[1]>>32), (u_int)tf->tf_out[1],
411 	       (u_int)(tf->tf_out[2]>>32), (u_int)tf->tf_out[2],
412 	       (u_int)(tf->tf_out[3]>>32), (u_int)tf->tf_out[3]);
413 	printf("%08x%08x %08x%08x %08x%08x %08x%08x\n",
414 	       (u_int)(tf->tf_out[4]>>32), (u_int)tf->tf_out[4],
415 	       (u_int)(tf->tf_out[5]>>32), (u_int)tf->tf_out[5],
416 	       (u_int)(tf->tf_out[6]>>32), (u_int)tf->tf_out[6],
417 	       (u_int)(tf->tf_out[7]>>32), (u_int)tf->tf_out[7]);
418 #endif
419 
420 }
421 #endif
422 
423 /*
424  * Define the code needed before returning to user mode, for
425  * trap, mem_access_fault, and syscall.
426  */
427 static __inline void
428 userret(p, pc, oticks)
429 	struct proc *p;
430 	int pc;
431 	u_quad_t oticks;
432 {
433 	int sig;
434 
435 	/* take pending signals */
436 	while ((sig = CURSIG(p)) != 0)
437 		postsig(sig);
438 	p->p_priority = p->p_usrpri;
439 	if (want_ast) {
440 		want_ast = 0;
441 		if (p->p_flag & P_OWEUPC) {
442 			p->p_flag &= ~P_OWEUPC;
443 			ADDUPROF(p);
444 		}
445 	}
446 	if (want_resched) {
447 		/*
448 		 * We are being preempted.
449 		 */
450 		preempt(NULL);
451 		while ((sig = CURSIG(p)) != 0)
452 			postsig(sig);
453 	}
454 
455 	/*
456 	 * If profiling, charge recent system time to the trapped pc.
457 	 */
458 	if (p->p_flag & P_PROFIL)
459 		addupc_task(p, pc, (int)(p->p_sticks - oticks));
460 
461 	curcpu()->ci_schedstate.spc_curpriority = p->p_priority;
462 }
463 
464 /*
465  * If someone stole the FPU while we were away, do not enable it
466  * on return.  This is not done in userret() above as it must follow
467  * the ktrsysret() in syscall().  Actually, it is likely that the
468  * ktrsysret should occur before the call to userret.
469  *
470  * Oh, and don't touch the FPU bit if we're returning to the kernel.
471  */
472 static __inline void share_fpu(p, tf)
473 	struct proc *p;
474 	struct trapframe64 *tf;
475 {
476 	if (!(tf->tf_tstate & (PSTATE_PRIV<<TSTATE_PSTATE_SHIFT)) &&
477 	    (tf->tf_tstate & (PSTATE_PEF<<TSTATE_PSTATE_SHIFT)) && fpproc != p)
478 		tf->tf_tstate &= ~(PSTATE_PEF<<TSTATE_PSTATE_SHIFT);
479 }
480 
481 /*
482  * Called from locore.s trap handling, for non-MMU-related traps.
483  * (MMU-related traps go through mem_access_fault, below.)
484  */
485 void
486 trap(tf, type, pc, tstate)
487 	struct trapframe64 *tf;
488 	unsigned type;
489 	vaddr_t pc;
490 	long tstate;
491 {
492 	struct proc *p;
493 	struct pcb *pcb;
494 	int pstate = (tstate>>TSTATE_PSTATE_SHIFT);
495 	int64_t n;
496 	u_quad_t sticks;
497 
498 	/* This steps the PC over the trap. */
499 #define	ADVANCE (n = tf->tf_npc, tf->tf_pc = n, tf->tf_npc = n + 4)
500 
501 #ifdef DEBUG
502 	if (tf->tf_pc == tf->tf_npc) {
503 		printf("trap: tpc %p == tnpc %p\n",
504 		    (void *)(u_long)tf->tf_pc, (void *)(u_long)tf->tf_npc);
505 		Debugger();
506 	}
507 #if 0
508 	{
509 		/* Check to make sure we're on the normal stack */
510 		int* sp;
511 
512 		__asm("mov %%sp, %0" : "=r" (sp) :);
513 		if (sp < EINTSTACK) {
514 			printf("trap: We're on the interrupt stack!\ntype=0x%x tf=%p %s\n",
515 			       type, tf, type < N_TRAP_TYPES ? trap_type[type] :
516 			       ((type == T_AST) ? "ast" :
517 				((type == T_RWRET) ? "rwret" : T)));
518 		}
519 	}
520 #endif
521 #endif
522 
523 
524 #ifdef DEBUG
525 	if ((trapdebug&TDB_NSAVED && cpcb->pcb_nsaved) || trapdebug&(TDB_FOLLOW|TDB_TRAP)) {
526 		char sbuf[sizeof(PSTATE_BITS) + 64];
527 
528 		printf("trap: type 0x%x: pc=%lx &tf=%p\n",
529 		       type, pc, tf);
530 		bitmask_snprintf(pstate, PSTATE_BITS, sbuf, sizeof(sbuf));
531 		printf(" npc=%lx pstate=%s %s\n",
532 		       (long)tf->tf_npc, sbuf,
533 		       type < N_TRAP_TYPES ? trap_type[type] :
534 		       ((type == T_AST) ? "ast" :
535 			((type == T_RWRET) ? "rwret" : T)));
536 	}
537 #if 0
538 	if (trapdebug & TDB_FRAME) {
539 		print_trapframe(tf);
540 	}
541 #endif
542 #endif
543 
544 	uvmexp.traps++;
545 #ifdef DEBUG
546 	if ((trapdebug&(TDB_FOLLOW|TDB_TRAP)) || ((trapdebug & TDB_TL) && tl())) {
547 		char sbuf[sizeof(PSTATE_BITS) + 64];
548 
549 		extern int trap_trace_dis;
550 		trap_trace_dis = 1;
551 		printf("trap: type 0x%x: lvl=%d pc=%lx &tf=%p",
552 		       type, (int)tl(), pc, tf);
553 		bitmask_snprintf(pstate, PSTATE_BITS, sbuf, sizeof(sbuf));
554 		printf(" npc=%lx pstate=%s %s\n",
555 		       (long)tf->tf_npc, sbuf,
556 		       type < N_TRAP_TYPES ? trap_type[type] :
557 		       ((type == T_AST) ? "ast" :
558 			((type == T_RWRET) ? "rwret" : T)));
559 #ifdef DDB
560 		kdb_trap(type, tf);
561 #endif
562 	}
563 #endif
564 	/*
565 	 * Generally, kernel traps cause a panic.  Any exceptions are
566 	 * handled early here.
567 	 */
568 	if (pstate & PSTATE_PRIV) {
569 #ifdef DDB
570 		if (type == T_BREAKPOINT) {
571 			write_all_windows();
572 			if (kdb_trap(type, tf)) {
573 				/* ADVANCE; */
574 				return;
575 			}
576 		}
577 		if (type == T_PA_WATCHPT || type == T_VA_WATCHPT) {
578 			if (kdb_trap(type, tf)) {
579 				/* DDB must turn off watchpoints or something */
580 				return;
581 			}
582 		}
583 #endif
584 		/*
585 		 * The kernel needs to use FPU registers for block
586 		 * load/store.  If we trap in priviliged code, save
587 		 * the FPU state if there is any and enable the FPU.
588 		 *
589 		 * We rely on the kernel code properly enabling the FPU
590 		 * in %fprs, otherwise we'll hang here trying to enable
591 		 * the FPU.
592 		 */
593 		if (type == T_FPDISABLED) {
594 extern void db_printf(const char * , ...);
595 #ifndef NEW_FPSTATE
596 			if (fpproc != NULL) {	/* someone else had it */
597 				savefpstate(fpproc->p_md.md_fpstate);
598 				fpproc = NULL;
599 				/* Enable the FPU */
600 /*				loadfpstate(initfpstate);*/
601 			}
602 			tf->tf_tstate |= (PSTATE_PEF<<TSTATE_PSTATE_SHIFT);
603 			return;
604 #else
605 			struct proc *newfpproc;
606 
607 			/* New scheme */
608 			if (CLKF_INTR((struct clockframe *)tf) || !curproc) {
609 				newfpproc = &proc0;
610 			} else {
611 				newfpproc = curproc;
612 			}
613 			if (fpproc != newfpproc) {
614 				if (fpproc != NULL) {
615 				/* someone else had it, maybe? */
616 					savefpstate(fpproc->p_md.md_fpstate);
617 					fpproc = NULL;
618 				}
619 				/* If we have an allocated fpstate, load it */
620 				if (newfpproc->p_md.md_fpstate != 0) {
621 					fpproc = newfpproc;
622 					loadfpstate(fpproc->p_md.md_fpstate);
623 				} else
624 					fpproc = NULL;
625 			}
626 			/* Enable the FPU */
627 			tf->tf_tstate |= (PSTATE_PEF<<TSTATE_PSTATE_SHIFT);
628 			return;
629 #endif
630 		}
631 		goto dopanic;
632 	}
633 	if ((p = curproc) == NULL)
634 		p = &proc0;
635 	sticks = p->p_sticks;
636 	pcb = &p->p_addr->u_pcb;
637 	p->p_md.md_tf = tf;	/* for ptrace/signals */
638 
639 	switch (type) {
640 
641 	default:
642 		if (type < 0x100) {
643 			extern int trap_trace_dis;
644 dopanic:
645 			trap_trace_dis = 1;
646 
647 			{
648 				char sbuf[sizeof(PSTATE_BITS) + 64];
649 
650 				printf("trap type 0x%x: pc=%lx",
651 				       type, pc);
652 				bitmask_snprintf(pstate, PSTATE_BITS, sbuf,
653 						 sizeof(sbuf));
654 				printf(" npc=%lx pstate=%s\n",
655 				       (long)tf->tf_npc, sbuf);
656 				DEBUGGER(type, tf);
657 				panic(type < N_TRAP_TYPES ? trap_type[type] : T);
658 			}
659 			/* NOTREACHED */
660 		}
661 #if defined(COMPAT_SVR4) || defined(COMPAT_SVR4_32)
662 badtrap:
663 #endif
664 		/* the following message is gratuitous */
665 		/* ... but leave it in until we find anything */
666 		printf("%s[%d]: unimplemented software trap 0x%x\n",
667 		    p->p_comm, p->p_pid, type);
668 		trapsignal(p, SIGILL, type);
669 		break;
670 
671 #if defined(COMPAT_SVR4) || defined(COMPAT_SVR4_32)
672 	case T_SVR4_GETCC:
673 	case T_SVR4_SETCC:
674 	case T_SVR4_GETPSR:
675 	case T_SVR4_SETPSR:
676 	case T_SVR4_GETHRTIME:
677 	case T_SVR4_GETHRVTIME:
678 	case T_SVR4_GETHRESTIME:
679 #if defined(COMPAT_SVR4_32)
680 		if (svr4_32_trap(type, p))
681 			break;
682 #endif
683 #if defined(COMPAT_SVR4)
684 		if (svr4_trap(type, p))
685 			break;
686 #endif
687 		goto badtrap;
688 #endif
689 
690 	case T_AST:
691 		break;	/* the work is all in userret() */
692 
693 	case T_ILLINST:
694 	case T_INST_EXCEPT:
695 	case T_TEXTFAULT:
696 		/* This is not an MMU issue!!!! */
697 		printf("trap: textfault at %lx!! sending SIGILL due to trap %d: %s\n",
698 		       pc, type, type < N_TRAP_TYPES ? trap_type[type] : T);
699 #if defined(DDB) && defined(DEBUG)
700 		if (trapdebug & TDB_STOPSIG)
701 			Debugger();
702 #endif
703 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
704 		break;
705 
706 	case T_PRIVINST:
707 		printf("trap: privinst!! sending SIGILL due to trap %d: %s\n",
708 		       type, type < N_TRAP_TYPES ? trap_type[type] : T);
709 #if defined(DDB) && defined(DEBUG)
710 		if (trapdebug & TDB_STOPSIG)
711 			Debugger();
712 #endif
713 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
714 		break;
715 
716 	case T_PRIVACT:
717 		trapsignal(p, SIGILL, 0);
718 		break;
719 
720 	case T_FPDISABLED: {
721 		struct fpstate64 *fs = p->p_md.md_fpstate;
722 
723 		if (fs == NULL) {
724 			/* NOTE: fpstate must be 64-bit aligned */
725 			fs = malloc((sizeof *fs), M_SUBPROC, M_WAITOK);
726 			*fs = initfpstate;
727 			fs->fs_qsize = 0;
728 			p->p_md.md_fpstate = fs;
729 		}
730 		/*
731 		 * If we have not found an FPU, we have to emulate it.
732 		 *
733 		 * Since All UltraSPARC CPUs have an FPU how can this happen?
734 		 */
735 		if (!foundfpu) {
736 #ifdef notyet
737 			fpu_emulate(p, tf, fs);
738 			break;
739 #else
740 			trapsignal(p, SIGFPE, 0);	/* XXX code?? */
741 			break;
742 #endif
743 		}
744 		/*
745 		 * We may have more FPEs stored up and/or ops queued.
746 		 * If they exist, handle them and get out.  Otherwise,
747 		 * resolve the FPU state, turn it on, and try again.
748 		 *
749 		 * Ultras should never have a FPU queue.
750 		 */
751 		if (fs->fs_qsize) {
752 
753 			printf("trap: Warning fs_qsize is %d\n",fs->fs_qsize);
754 			fpu_cleanup(p, fs);
755 			break;
756 		}
757 		if (fpproc != p) {		/* we do not have it */
758 			if (fpproc != NULL)	/* someone else had it */
759 				savefpstate(fpproc->p_md.md_fpstate);
760 			loadfpstate(fs);
761 			fpproc = p;		/* now we do have it */
762 		}
763 		tf->tf_tstate |= (PSTATE_PEF<<TSTATE_PSTATE_SHIFT);
764 		break;
765 	}
766 
767 	case T_ALIGN:
768 	case T_LDDF_ALIGN:
769 	case T_STDF_ALIGN:
770 	{
771 		int64_t dsfsr, dsfar=0, isfsr;
772 
773 		dsfsr = ldxa(SFSR, ASI_DMMU);
774 		if (dsfsr & SFSR_FV)
775 			dsfar = ldxa(SFAR, ASI_DMMU);
776 		isfsr = ldxa(SFSR, ASI_IMMU);
777 		/*
778 		 * If we're busy doing copyin/copyout continue
779 		 */
780 		if (p->p_addr && p->p_addr->u_pcb.pcb_onfault) {
781 			tf->tf_pc = (vaddr_t)p->p_addr->u_pcb.pcb_onfault;
782 			tf->tf_npc = tf->tf_pc + 4;
783 			break;
784 		}
785 
786 #define fmt64(x)	(u_int)((x)>>32), (u_int)((x))
787 		printf("Alignment error: pid=%d comm=%s dsfsr=%08x:%08x dsfar=%x:%x isfsr=%08x:%08x pc=%lx\n",
788 		       p->p_pid, p->p_comm, fmt64(dsfsr), fmt64(dsfar), fmt64(isfsr), pc);
789 	}
790 
791 #if defined(DDB) && defined(DEBUG)
792 	if (trapdebug & TDB_STOPSIG) {
793 		write_all_windows();
794 		kdb_trap(type, tf);
795 	}
796 #endif
797 		if ((p->p_md.md_flags & MDP_FIXALIGN) != 0 &&
798 		    fixalign(p, tf) == 0) {
799 			ADVANCE;
800 			break;
801 		}
802 		trapsignal(p, SIGBUS, 0);	/* XXX code?? */
803 		break;
804 
805 	case T_FP_IEEE_754:
806 	case T_FP_OTHER:
807 		/*
808 		 * Clean up after a floating point exception.
809 		 * fpu_cleanup can (and usually does) modify the
810 		 * state we save here, so we must `give up' the FPU
811 		 * chip context.  (The software and hardware states
812 		 * will not match once fpu_cleanup does its job, so
813 		 * we must not save again later.)
814 		 */
815 		if (p != fpproc)
816 			panic("fpe without being the FP user");
817 		savefpstate(p->p_md.md_fpstate);
818 		fpproc = NULL;
819 		/* tf->tf_psr &= ~PSR_EF; */	/* share_fpu will do this */
820 		if (p->p_md.md_fpstate->fs_qsize == 0) {
821 			copyin((caddr_t)pc, &p->p_md.md_fpstate->fs_queue[0].fq_instr, sizeof(int));
822 			p->p_md.md_fpstate->fs_qsize = 1;
823 			fpu_cleanup(p, p->p_md.md_fpstate);
824 			ADVANCE;
825 		} else
826 			fpu_cleanup(p, p->p_md.md_fpstate);
827 		/* fpu_cleanup posts signals if needed */
828 #if 0		/* ??? really never??? */
829 		ADVANCE;
830 #endif
831 		break;
832 
833 	case T_TAGOF:
834 		trapsignal(p, SIGEMT, 0);	/* XXX code?? */
835 		break;
836 
837 	case T_BREAKPOINT:
838 		trapsignal(p, SIGTRAP, 0);
839 		break;
840 
841 	case T_DIV0:
842 		ADVANCE;
843 		trapsignal(p, SIGFPE, FPE_INTDIV_TRAP);
844 		break;
845 
846 	case T_CLEANWIN:
847 		uprintf("T_CLEANWIN\n");	/* XXX Should not get this */
848 		ADVANCE;
849 		break;
850 
851 	case T_FLUSHWIN:
852 		/* Software window flush for v8 software */
853 		write_all_windows();
854 		ADVANCE;
855 		break;
856 
857 	case T_RANGECHECK:
858 		printf("T_RANGECHECK\n");	/* XXX */
859 		ADVANCE;
860 		trapsignal(p, SIGILL, 0);	/* XXX code?? */
861 		break;
862 
863 	case T_FIXALIGN:
864 #ifdef DEBUG_ALIGN
865 		uprintf("T_FIXALIGN\n");
866 #endif
867 		/* User wants us to fix alignment faults */
868 		p->p_md.md_flags |= MDP_FIXALIGN;
869 		ADVANCE;
870 		break;
871 
872 	case T_INTOF:
873 		uprintf("T_INTOF\n");		/* XXX */
874 		ADVANCE;
875 		trapsignal(p, SIGFPE, FPE_INTOVF_TRAP);
876 		break;
877 	}
878 	userret(p, pc, sticks);
879 	share_fpu(p, tf);
880 #undef ADVANCE
881 #ifdef DEBUG
882 	if (trapdebug&(TDB_FOLLOW|TDB_TRAP)) {
883 		printf("trap: done\n");
884 		/* if (type != T_BREAKPOINT) Debugger(); */
885 	}
886 #if 0
887 	if (trapdebug & TDB_FRAME) {
888 		print_trapframe(tf);
889 	}
890 #endif
891 #endif
892 }
893 
894 /*
895  * Save windows from PCB into user stack, and return 0.  This is used on
896  * window overflow pseudo-traps (from locore.s, just before returning to
897  * user mode) and when ptrace or sendsig needs a consistent state.
898  * As a side effect, rwindow_save() always sets pcb_nsaved to 0.
899  *
900  * If the windows cannot be saved, pcb_nsaved is restored and we return -1.
901  *
902  * XXXXXX This cannot work properly.  I need to re-examine this register
903  * window thing entirely.
904  */
905 int
906 rwindow_save(p)
907 	struct proc *p;
908 {
909 	struct pcb *pcb = &p->p_addr->u_pcb;
910 	struct rwindow64 *rw = &pcb->pcb_rw[0];
911 	u_int64_t rwdest;
912 	int i, j;
913 
914 	i = pcb->pcb_nsaved;
915 #ifdef DEBUG
916 	if (rwindow_debug&RW_FOLLOW)
917 		printf("rwindow_save(%p): nsaved %d\n", p, i);
918 #endif
919 	if (i == 0)
920 		return (0);
921 #ifdef DEBUG
922 	if (rwindow_debug&RW_FOLLOW)
923 		printf("%s[%d]: rwindow: pcb->stack:", p->p_comm, p->p_pid);
924 #endif
925 	 while (i > 0) {
926 		rwdest = rw[i--].rw_in[6];
927 #ifdef DEBUG
928 		if (rwindow_debug&RW_FOLLOW)
929 			printf("window %d at %lx\n", i, (long)rwdest);
930 #endif
931 		if (rwdest & 1) {
932 #ifdef DEBUG
933 			if (rwindow_debug&RW_64) {
934 				printf("rwindow_save: 64-bit tf to %p+BIAS or %p\n",
935 				       (void *)(long)rwdest, (void *)(long)(rwdest+BIAS));
936 				Debugger();
937 			}
938 #endif
939 			rwdest += BIAS;
940 			if (copyout((caddr_t)&rw[i], (caddr_t)(u_long)rwdest,
941 				    sizeof(*rw))) {
942 #ifdef DEBUG
943 			if (rwindow_debug&(RW_ERR|RW_64))
944 				printf("rwindow_save: 64-bit pcb copyout to %p failed\n",
945 				       (void *)(long)rwdest);
946 #endif
947 				return (-1);
948 			}
949 #ifdef DEBUG
950 			if (rwindow_debug&RW_64) {
951 				printf("Finished copyout(%p, %p, %lx)\n",
952 					(caddr_t)&rw[i], (caddr_t)(long)rwdest,
953                                 	sizeof(*rw));
954 				Debugger();
955 			}
956 #endif
957 		} else {
958 			struct rwindow32 rwstack;
959 
960 			/* 32-bit window */
961 			for (j = 0; j < 8; j++) {
962 				rwstack.rw_local[j] = (int)rw[i].rw_local[j];
963 				rwstack.rw_in[j] = (int)rw[i].rw_in[j];
964 			}
965 			/* Must truncate rwdest */
966 			if (copyout(&rwstack, (caddr_t)(u_long)(u_int)rwdest, sizeof(rwstack))) {
967 #ifdef DEBUG
968 				if (rwindow_debug&RW_ERR)
969 					printf("rwindow_save: 32-bit pcb copyout to %p (%p) failed\n",
970 					       (void *)(u_long)(u_int)rwdest, (void *)(u_long)rwdest);
971 #endif
972 				return (-1);
973 			}
974 		}
975 	}
976 	pcb->pcb_nsaved = 0;
977 #ifdef DEBUG
978 	if (rwindow_debug&RW_FOLLOW) {
979 		printf("\n");
980 		Debugger();
981 	}
982 #endif
983 	return (0);
984 }
985 
986 /*
987  * Kill user windows (before exec) by writing back to stack or pcb
988  * and then erasing any pcb tracks.  Otherwise we might try to write
989  * the registers into the new process after the exec.
990  */
991 void
992 kill_user_windows(p)
993 	struct proc *p;
994 {
995 
996 	write_user_windows();
997 	p->p_addr->u_pcb.pcb_nsaved = 0;
998 }
999 
1000 /*
1001  * This routine handles MMU generated faults.  About half
1002  * of them could be recoverable through uvm_fault.
1003  */
1004 void
1005 data_access_fault(tf, type, pc, addr, sfva, sfsr)
1006 	struct trapframe64 *tf;
1007 	unsigned type;
1008 	vaddr_t pc;
1009 	vaddr_t addr;
1010 	vaddr_t sfva;
1011 	u_long sfsr;
1012 {
1013 	u_int64_t tstate;
1014 	struct proc *p;
1015 	struct vmspace *vm;
1016 	vaddr_t va;
1017 	int rv;
1018 	vm_prot_t access_type;
1019 	vaddr_t onfault;
1020 	u_quad_t sticks;
1021 #ifdef DEBUG
1022 	static int lastdouble;
1023 	extern struct pcb* cpcb;
1024 #endif
1025 
1026 #ifdef DEBUG
1027 	if (tf->tf_pc == tf->tf_npc) {
1028 		printf("data_access_fault: tpc %lx == tnpc %lx\n",
1029 		       (long)tf->tf_pc, (long)tf->tf_npc);
1030 		Debugger();
1031 	}
1032 	write_user_windows();
1033 	if ((cpcb->pcb_nsaved > 8) ||
1034 	    (trapdebug&TDB_NSAVED && cpcb->pcb_nsaved) ||
1035 	    (trapdebug&(TDB_ADDFLT|TDB_FOLLOW))) {
1036 		printf("%ld: data_access_fault(%p, %x, %p, %p, %lx, %lx) "
1037 			"nsaved=%d\n",
1038 			(long)(curproc?curproc->p_pid:-1), tf, type,
1039 			(void*)addr, (void*)pc,
1040 			sfva, sfsr, (int)cpcb->pcb_nsaved);
1041 		if ((trapdebug&TDB_NSAVED && cpcb->pcb_nsaved)) Debugger();
1042 	}
1043 	if (trapdebug & TDB_FRAME) {
1044 		print_trapframe(tf);
1045 	}
1046 	if ((trapdebug & TDB_TL) && tl()) {
1047 		printf("%ld: data_access_fault(%p, %x, %p, %p, %lx, %lx) "
1048 			"nsaved=%d\n",
1049 			(long)(curproc?curproc->p_pid:-1), tf, type,
1050 			(void*)addr, (void*)pc,
1051 			sfva, sfsr, (int)cpcb->pcb_nsaved);
1052 		Debugger();
1053 	}
1054 	if (trapdebug&TDB_STOPCALL) {
1055 		Debugger();
1056 	}
1057 #endif
1058 
1059 	uvmexp.traps++;
1060 	if ((p = curproc) == NULL)	/* safety check */
1061 		p = &proc0;
1062 	sticks = p->p_sticks;
1063 
1064 #if 0
1065 	/*
1066 	 * This can happen when we're in DDB w/curproc == NULL and try
1067 	 * to access user space.
1068 	 */
1069 #ifdef DIAGNOSTIC
1070 	if ((addr & PAGE_MASK) &&
1071 	    (addr & PAGE_MASK) != p->p_vmspace->vm_map.pmap->pm_ctx) {
1072 		printf("data_access_fault: va ctx %x != pm ctx %x\n",
1073 		       (addr & PAGE_MASK), p->p_vmspace->vm_map.pmap->pm_ctx);
1074 		Debugger();
1075 	}
1076 #endif
1077 #endif
1078 	tstate = tf->tf_tstate;
1079 
1080 	/* Find the faulting va to give to uvm_fault */
1081 	va = trunc_page(addr);
1082 
1083 #ifdef DEBUG
1084 	if (lastdouble) {
1085 		printf("stacked data fault @ %lx (pc %lx);", addr, pc);
1086 		lastdouble = 0;
1087 		if (curproc == NULL)
1088 			printf("NULL proc\n");
1089 		else
1090 			printf("pid %d(%s); sigmask %x, sigcatch %x\n",
1091 			       curproc->p_pid, curproc->p_comm,
1092 				/* XXX */
1093 			       curproc->p_sigctx.ps_sigmask.__bits[0],
1094 			       curproc->p_sigctx.ps_sigcatch.__bits[0]);
1095 	}
1096 #endif
1097 	/*
1098 	 * Now munch on protections.
1099 	 *
1100 	 * If it was a FAST_DATA_ACCESS_MMU_MISS we have no idea what the
1101 	 * access was since the SFSR is not set.  But we should never get
1102 	 * here from there.
1103 	 */
1104 	if (type == T_FDMMU_MISS || (sfsr & SFSR_FV) == 0) {
1105 		/* Punt */
1106 		access_type = VM_PROT_READ;
1107 	} else {
1108 		access_type = (sfsr & SFSR_W) ? VM_PROT_WRITE : VM_PROT_READ;
1109 	}
1110 	if (tstate & (PSTATE_PRIV<<TSTATE_PSTATE_SHIFT)) {
1111 		extern char Lfsbail[];
1112 		/*
1113 		 * If this was an access that we shouldn't try to page in,
1114 		 * resume at the fault handler without any action.
1115 		 */
1116 		if (p->p_addr && p->p_addr->u_pcb.pcb_onfault == Lfsbail)
1117 			goto kfault;
1118 
1119 		/*
1120 		 * During autoconfiguration, faults are never OK unless
1121 		 * pcb_onfault is set.  Once running normally we must allow
1122 		 * exec() to cause copy-on-write faults to kernel addresses.
1123 		 */
1124 		if (cold)
1125 			goto kfault;
1126 		if (!(addr&TLB_TAG_ACCESS_CTX)) {
1127 			/* CTXT == NUCLEUS */
1128 			rv = uvm_fault(kernel_map, va, 0, access_type);
1129 #ifdef DEBUG
1130 			if (trapdebug&(TDB_ADDFLT|TDB_FOLLOW))
1131 				printf("data_access_fault: kernel "
1132 					"uvm_fault(%p, %lx, %x, %x) "
1133 					"sez %x -- %s\n",
1134 					kernel_map, (vaddr_t)va, 0,
1135 					access_type, rv,
1136 					rv ? "failure" : "success");
1137 #endif
1138 			if (rv == 0)
1139 				return;
1140 			goto kfault;
1141 		}
1142 	} else
1143 		p->p_md.md_tf = tf;
1144 
1145 	vm = p->p_vmspace;
1146 	/* alas! must call the horrible vm code */
1147 	onfault = (vaddr_t)p->p_addr->u_pcb.pcb_onfault;
1148 	p->p_addr->u_pcb.pcb_onfault = NULL;
1149 	rv = uvm_fault(&vm->vm_map, (vaddr_t)va, 0, access_type);
1150 	p->p_addr->u_pcb.pcb_onfault = (void *)onfault;
1151 
1152 #ifdef DEBUG
1153 	if (trapdebug&(TDB_ADDFLT|TDB_FOLLOW))
1154 		printf("data_access_fault: %s uvm_fault(%p, %lx, %x, %x) "
1155 			"sez %x -- %s\n",
1156 			&vm->vm_map == kernel_map ? "kernel!!!" : "user",
1157 			&vm->vm_map, (vaddr_t)va, 0, access_type, rv,
1158 			rv ? "failure" : "success");
1159 #endif
1160 	/*
1161 	 * If this was a stack access we keep track of the maximum
1162 	 * accessed stack size.  Also, if uvm_fault gets a protection
1163 	 * failure it is due to accessing the stack region outside
1164 	 * the current limit and we need to reflect that as an access
1165 	 * error.
1166 	 */
1167 	if ((caddr_t)va >= vm->vm_maxsaddr) {
1168 		if (rv == 0) {
1169 			segsz_t nss = btoc(p->p_vmspace->vm_minsaddr - va);
1170 			if (nss > vm->vm_ssize)
1171 				vm->vm_ssize = nss;
1172 		} else if (rv == EACCES)
1173 			rv = EFAULT;
1174 	}
1175 	if (rv != 0) {
1176 		/*
1177 		 * Pagein failed.  If doing copyin/out, return to onfault
1178 		 * address.  Any other page fault in kernel, die; if user
1179 		 * fault, deliver SIGSEGV.
1180 		 */
1181 		if (tstate & (PSTATE_PRIV<<TSTATE_PSTATE_SHIFT)) {
1182 kfault:
1183 			onfault = p->p_addr ?
1184 			    (long)p->p_addr->u_pcb.pcb_onfault : 0;
1185 			if (!onfault) {
1186 				extern int trap_trace_dis;
1187 				trap_trace_dis = 1; /* Disable traptrace for printf */
1188 				(void) splhigh();
1189 				printf("data fault: pc=%lx addr=%lx\n",
1190 				    pc, addr);
1191 				DEBUGGER(type, tf);
1192 				panic("kernel fault");
1193 				/* NOTREACHED */
1194 			}
1195 #ifdef DEBUG
1196 			if (trapdebug&(TDB_ADDFLT|TDB_FOLLOW|TDB_STOPCPIO)) {
1197 				printf("data_access_fault: copyin/out of %p fault -- recover\n", (void *)addr);
1198 				DEBUGGER(type, tf);
1199 			}
1200 #endif
1201 			tf->tf_pc = onfault;
1202 			tf->tf_npc = onfault + 4;
1203 			return;
1204 		}
1205 #ifdef DEBUG
1206 		if (trapdebug&(TDB_ADDFLT|TDB_STOPSIG)) {
1207 			extern int trap_trace_dis;
1208 			trap_trace_dis = 1;
1209 			printf("data_access_fault at addr %p: sending SIGSEGV\n", (void *)addr);
1210 			printf("%ld: data_access_fault(%p, %x, %p, %p, %lx, %lx) "
1211 				"nsaved=%d\n",
1212 				(long)(curproc?curproc->p_pid:-1), tf, type,
1213 				(void*)addr, (void*)pc,
1214 				sfva, sfsr, (int)cpcb->pcb_nsaved);
1215 			Debugger();
1216 		}
1217 #endif
1218 		if (rv == ENOMEM) {
1219 			printf("UVM: pid %d (%s), uid %d killed: out of swap\n",
1220 			       p->p_pid, p->p_comm,
1221 			       p->p_cred && p->p_ucred ?
1222 			       p->p_ucred->cr_uid : -1);
1223 			trapsignal(p, SIGKILL, (u_long)addr);
1224 		} else {
1225 			trapsignal(p, SIGSEGV, (u_long)addr);
1226 		}
1227 	}
1228 	if ((tstate & TSTATE_PRIV) == 0) {
1229 		userret(p, pc, sticks);
1230 		share_fpu(p, tf);
1231 	}
1232 #ifdef DEBUG
1233 	if (trapdebug&(TDB_ADDFLT|TDB_FOLLOW))
1234 		printf("data_access_fault: done\n");
1235 	if (trapdebug & TDB_FRAME) {
1236 		print_trapframe(tf);
1237 	}
1238 	if (trapdebug&(TDB_ADDFLT|TDB_FOLLOW)) {
1239 		extern void* return_from_trap __P((void));
1240 		if ((void *)(u_long)tf->tf_pc == (void *)return_from_trap) {
1241 			printf("Returning from stack datafault\n");
1242 		}
1243 	}
1244 #endif
1245 }
1246 
1247 /*
1248  * This routine handles deferred errors caused by the memory
1249  * or I/O bus subsystems.  Most of these are fatal, and even
1250  * if they are not, recovery is painful.  Also, the TPC and
1251  * TNPC values are probably not valid if we're not doing a
1252  * special PEEK/POKE code sequence.
1253  */
1254 void
1255 data_access_error(tf, type, afva, afsr, sfva, sfsr)
1256 	struct trapframe64 *tf;
1257 	unsigned type;
1258 	vaddr_t sfva;
1259 	u_long sfsr;
1260 	vaddr_t afva;
1261 	u_long afsr;
1262 {
1263 	u_long pc;
1264 	u_int64_t tstate;
1265 	struct proc *p;
1266 	vaddr_t onfault;
1267 	u_quad_t sticks;
1268 #ifdef DEBUG
1269 	static int lastdouble;
1270 #endif
1271 
1272 #ifdef DEBUG
1273 	if (tf->tf_pc == tf->tf_npc) {
1274 		printf("data_access_error: tpc %lx == tnpc %lx\n",
1275 		       (long)tf->tf_pc, (long)tf->tf_npc);
1276 		Debugger();
1277 	}
1278 	write_user_windows();
1279 	if ((trapdebug&TDB_NSAVED && cpcb->pcb_nsaved) ||
1280 	    trapdebug&(TDB_ADDFLT|TDB_FOLLOW)) {
1281 		char buf[768];
1282 		bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1283 
1284 		printf("%d data_access_error(%lx, %lx, %lx, %p)=%lx @ %p %s\n",
1285 		       curproc?curproc->p_pid:-1,
1286 		       (long)type, (long)sfva, (long)afva, tf, (long)tf->tf_tstate,
1287 		       (void *)(u_long)tf->tf_pc, buf);
1288 	}
1289 	if (trapdebug & TDB_FRAME) {
1290 		print_trapframe(tf);
1291 	}
1292 	if ((trapdebug & TDB_TL) && tl()) {
1293 		char buf[768];
1294 		bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1295 
1296 		printf("%d tl %ld data_access_error(%lx, %lx, %lx, %p)=%lx @ %lx %s\n",
1297 		       curproc?curproc->p_pid:-1, (long)tl(),
1298 		       (long)type, (long)sfva, (long)afva, tf, (long)tf->tf_tstate,
1299 		       (long)tf->tf_pc, buf);
1300 		Debugger();
1301 	}
1302 	if (trapdebug&TDB_STOPCALL) {
1303 		Debugger();
1304 	}
1305 #endif
1306 
1307 	uvmexp.traps++;
1308 	if ((p = curproc) == NULL)	/* safety check */
1309 		p = &proc0;
1310 	sticks = p->p_sticks;
1311 
1312 	pc = tf->tf_pc;
1313 	tstate = tf->tf_tstate;
1314 
1315 	onfault = p->p_addr ? (long)p->p_addr->u_pcb.pcb_onfault : 0;
1316 	printf("data error type %x sfsr=%lx sfva=%lx afsr=%lx afva=%lx tf=%p\n",
1317 		type, sfsr, sfva, afsr, afva, tf);
1318 
1319 	if (afsr == 0) {
1320 		printf("data_access_error: no fault\n");
1321 		goto out;	/* No fault. Why were we called? */
1322 	}
1323 
1324 #ifdef DEBUG
1325 	if (lastdouble) {
1326 		printf("stacked data error @ %lx (pc %lx); sfsr %lx", sfva, pc, sfsr);
1327 		lastdouble = 0;
1328 		if (curproc == NULL)
1329 			printf("NULL proc\n");
1330 		else
1331 			printf("pid %d(%s); sigmask %x, sigcatch %x\n",
1332 			       curproc->p_pid, curproc->p_comm,
1333 				/* XXX */
1334 			       curproc->p_sigctx.ps_sigmask.__bits[0],
1335 			       curproc->p_sigctx.ps_sigcatch.__bits[0]);
1336 	}
1337 #endif
1338 
1339 	if (tstate & (PSTATE_PRIV<<TSTATE_PSTATE_SHIFT)) {
1340 
1341 		if (!onfault) {
1342 			extern int trap_trace_dis;
1343 			char buf[768];
1344 
1345 			trap_trace_dis = 1; /* Disable traptrace for printf */
1346 			bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1347 			(void) splhigh();
1348 			printf("data fault: pc=%lx addr=%lx sfsr=%s\n",
1349 				(u_long)pc, (long)sfva, buf);
1350 			DEBUGGER(type, tf);
1351 			panic("kernel fault");
1352 			/* NOTREACHED */
1353 		}
1354 
1355 		/*
1356 		 * If this was a priviliged error but not a probe, we
1357 		 * cannot recover, so panic.
1358 		 */
1359 		if (afsr & ASFR_PRIV) {
1360 			char buf[128];
1361 
1362 			bitmask_snprintf(afsr, AFSR_BITS, buf, sizeof(buf));
1363 			panic("Privileged Async Fault: AFAR %p AFSR %lx\n%s",
1364 				(void *)afva, afsr, buf);
1365 			/* NOTREACHED */
1366 		}
1367 #ifdef DEBUG
1368 		if (trapdebug&(TDB_ADDFLT|TDB_FOLLOW|TDB_STOPCPIO)) {
1369 			printf("data_access_error: kern fault -- skipping instr\n");
1370 			if (trapdebug&TDB_STOPCPIO) DEBUGGER(type, tf);
1371 		}
1372 #endif
1373 		tf->tf_pc = onfault;
1374 		tf->tf_npc = onfault + 4;
1375 		return;
1376 	}
1377 #ifdef DEBUG
1378 	if (trapdebug&(TDB_ADDFLT|TDB_STOPSIG)) {
1379 		extern int trap_trace_dis;
1380 		trap_trace_dis = 1;
1381 		printf("data_access_error at %p: sending SIGSEGV\n",
1382 			(void *)(u_long)afva);
1383 		Debugger();
1384 	}
1385 #endif
1386 	trapsignal(p, SIGSEGV, (u_long)sfva);
1387 out:
1388 	if ((tstate & TSTATE_PRIV) == 0) {
1389 		userret(p, pc, sticks);
1390 		share_fpu(p, tf);
1391 	}
1392 #ifdef DEBUG
1393 	if (trapdebug&(TDB_ADDFLT|TDB_FOLLOW))
1394 		printf("data_access_error: done\n");
1395 	if (trapdebug & TDB_FRAME) {
1396 		print_trapframe(tf);
1397 	}
1398 #endif
1399 }
1400 
1401 /*
1402  * This routine handles MMU generated faults.  About half
1403  * of them could be recoverable through uvm_fault.
1404  */
1405 void
1406 text_access_fault(tf, type, pc, sfsr)
1407 	unsigned type;
1408 	vaddr_t pc;
1409 	struct trapframe64 *tf;
1410 	u_long sfsr;
1411 {
1412 	u_int64_t tstate;
1413 	struct proc *p;
1414 	struct vmspace *vm;
1415 	vaddr_t va;
1416 	int rv;
1417 	vm_prot_t access_type;
1418 	u_quad_t sticks;
1419 
1420 #ifdef DEBUG
1421 	if (tf->tf_pc == tf->tf_npc) {
1422 		printf("text_access_fault: tpc %p == tnpc %p\n", (void *)(u_long)tf->tf_pc, (void *)(u_long)tf->tf_npc);
1423 		Debugger();
1424 	}
1425 	write_user_windows();
1426 	if (((trapdebug&TDB_NSAVED) && cpcb->pcb_nsaved) ||
1427 	    (trapdebug&(TDB_TXTFLT|TDB_FOLLOW)))
1428 		printf("%d text_access_fault(%x, %lx, %p)\n",
1429 		       curproc?curproc->p_pid:-1, type, pc, tf);
1430 	if (trapdebug & TDB_FRAME) {
1431 		print_trapframe(tf);
1432 	}
1433 	if ((trapdebug & TDB_TL) && tl()) {
1434 		printf("%d tl %d text_access_fault(%x, %lx, %p)\n",
1435 		       curproc?curproc->p_pid:-1, tl(), type, pc, tf);
1436 		Debugger();
1437 	}
1438 	if (trapdebug&TDB_STOPCALL) {
1439 		Debugger();
1440 	}
1441 #endif
1442 
1443 	uvmexp.traps++;
1444 	if ((p = curproc) == NULL)	/* safety check */
1445 		p = &proc0;
1446 	sticks = p->p_sticks;
1447 
1448 	tstate = tf->tf_tstate;
1449 
1450 	va = trunc_page(pc);
1451 
1452 	/* Now munch on protections... */
1453 
1454 	access_type = /* VM_PROT_EXECUTE| */VM_PROT_READ;
1455 	if (tstate & (PSTATE_PRIV<<TSTATE_PSTATE_SHIFT)) {
1456 		extern int trap_trace_dis;
1457 		trap_trace_dis = 1; /* Disable traptrace for printf */
1458 		(void) splhigh();
1459 		printf("text_access_fault: pc=%lx va=%lx\n", pc, va);
1460 		DEBUGGER(type, tf);
1461 		panic("kernel fault");
1462 		/* NOTREACHED */
1463 	} else
1464 		p->p_md.md_tf = tf;
1465 
1466 	vm = p->p_vmspace;
1467 	/* alas! must call the horrible vm code */
1468 	rv = uvm_fault(&vm->vm_map, va, 0, access_type);
1469 
1470 #ifdef DEBUG
1471 	if (trapdebug&(TDB_TXTFLT|TDB_FOLLOW))
1472 		printf("text_access_fault: uvm_fault(%p, %lx, %x, FALSE) sez %x\n",
1473 		       &vm->vm_map, va, 0, rv);
1474 #endif
1475 	/*
1476 	 * If this was a stack access we keep track of the maximum
1477 	 * accessed stack size.  Also, if uvm_fault gets a protection
1478 	 * failure it is due to accessing the stack region outside
1479 	 * the current limit and we need to reflect that as an access
1480 	 * error.
1481 	 */
1482 	if ((caddr_t)va >= vm->vm_maxsaddr) {
1483 		if (rv == 0) {
1484 			segsz_t nss = btoc(p->p_vmspace->vm_minsaddr - va);
1485 			if (nss > vm->vm_ssize)
1486 				vm->vm_ssize = nss;
1487 		} else if (rv == EACCES)
1488 			rv = EFAULT;
1489 	}
1490 	if (rv != 0) {
1491 		/*
1492 		 * Pagein failed. Any other page fault in kernel, die; if user
1493 		 * fault, deliver SIGSEGV.
1494 		 */
1495 		if (tstate & TSTATE_PRIV) {
1496 			extern int trap_trace_dis;
1497 			trap_trace_dis = 1; /* Disable traptrace for printf */
1498 			(void) splhigh();
1499 			printf("text fault: pc=%llx\n", (unsigned long long)pc);
1500 			DEBUGGER(type, tf);
1501 			panic("kernel fault");
1502 			/* NOTREACHED */
1503 		}
1504 #ifdef DEBUG
1505 		if (trapdebug&(TDB_TXTFLT|TDB_STOPSIG)) {
1506 			extern int trap_trace_dis;
1507 			trap_trace_dis = 1;
1508 			printf("text_access_fault at %p: sending SIGSEGV\n", (void *)(u_long)va);
1509 			Debugger();
1510 		}
1511 #endif
1512 		trapsignal(p, SIGSEGV, (u_long)pc);
1513 	}
1514 	if ((tstate & TSTATE_PRIV) == 0) {
1515 		userret(p, pc, sticks);
1516 		share_fpu(p, tf);
1517 	}
1518 #ifdef DEBUG
1519 	if (trapdebug&(TDB_TXTFLT|TDB_FOLLOW)) {
1520 		printf("text_access_fault: done\n");
1521 		/* kdb_trap(T_BREAKPOINT, tf); */
1522 	}
1523 	if (trapdebug & TDB_FRAME) {
1524 		print_trapframe(tf);
1525 	}
1526 #endif
1527 }
1528 
1529 
1530 /*
1531  * This routine handles deferred errors caused by the memory
1532  * or I/O bus subsystems.  Most of these are fatal, and even
1533  * if they are not, recovery is painful.  Also, the TPC and
1534  * TNPC values are probably not valid if we're not doing a
1535  * special PEEK/POKE code sequence.
1536  */
1537 void
1538 text_access_error(tf, type, pc, sfsr, afva, afsr)
1539 	struct trapframe64 *tf;
1540 	unsigned type;
1541 	vaddr_t pc;
1542 	u_long sfsr;
1543 	vaddr_t afva;
1544 	u_long afsr;
1545 {
1546 	int64_t tstate;
1547 	struct proc *p;
1548 	struct vmspace *vm;
1549 	vaddr_t va;
1550 	int rv;
1551 	vm_prot_t access_type;
1552 	u_quad_t sticks;
1553 #ifdef DEBUG
1554 	static int lastdouble;
1555 #endif
1556 	char buf[768];
1557 
1558 #ifdef DEBUG
1559 	if (tf->tf_pc == tf->tf_npc) {
1560 		printf("text_access_error: tpc %p == tnpc %p\n",
1561 		    (void *)(u_long)tf->tf_pc, (void *)(u_long)tf->tf_npc);
1562 		Debugger();
1563 	}
1564 	write_user_windows();
1565 	if ((trapdebug&TDB_NSAVED && cpcb->pcb_nsaved) || trapdebug&(TDB_TXTFLT|TDB_FOLLOW)) {
1566 		bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1567 		printf("%ld text_access_error(%lx, %lx, %lx, %p)=%lx @ %lx %s\n",
1568 		       (long)(curproc?curproc->p_pid:-1),
1569 		       (long)type, pc, (long)afva, tf, (long)tf->tf_tstate,
1570 		       (long)tf->tf_pc, buf);
1571 	}
1572 	if (trapdebug & TDB_FRAME) {
1573 		print_trapframe(tf);
1574 	}
1575 	if ((trapdebug & TDB_TL) && tl()) {
1576 		bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1577 		printf("%ld tl %ld text_access_error(%lx, %lx, %lx, %p)=%lx @ %lx %s\n",
1578 		       (long)(curproc?curproc->p_pid:-1), (long)tl(),
1579 		       (long)type, (long)pc, (long)afva, tf,
1580 		       (long)tf->tf_tstate, (long)tf->tf_pc, buf);
1581 		Debugger();
1582 	}
1583 	if (trapdebug&TDB_STOPCALL) {
1584 		Debugger();
1585 	}
1586 #endif
1587 	uvmexp.traps++;
1588 	if ((p = curproc) == NULL)	/* safety check */
1589 		p = &proc0;
1590 	sticks = p->p_sticks;
1591 
1592 	tstate = tf->tf_tstate;
1593 
1594 	if ((afsr) != 0) {
1595 		extern int trap_trace_dis;
1596 
1597 		trap_trace_dis++; /* Disable traptrace for printf */
1598 		printf("text_access_error: memory error...\n");
1599 		printf("text memory error type %d sfsr=%lx sfva=%lx afsr=%lx afva=%lx tf=%p\n",
1600 		       type, sfsr, pc, afsr, afva, tf);
1601 		trap_trace_dis--; /* Reenable traptrace for printf */
1602 
1603 		if (tstate & (PSTATE_PRIV<<TSTATE_PSTATE_SHIFT))
1604 			panic("text_access_error: kernel memory error");
1605 
1606 		/* User fault -- Berr */
1607 		trapsignal(p, SIGBUS, (u_long)pc);
1608 	}
1609 
1610 	if ((sfsr & SFSR_FV) == 0 || (sfsr & SFSR_FT) == 0)
1611 		goto out;	/* No fault. Why were we called? */
1612 
1613 	va = trunc_page(pc);
1614 
1615 #ifdef DEBUG
1616 	if (lastdouble) {
1617 		printf("stacked text error @ pc %lx; sfsr %lx", pc, sfsr);
1618 		lastdouble = 0;
1619 		if (curproc == NULL)
1620 			printf("NULL proc\n");
1621 		else
1622 			printf("pid %d(%s); sigmask %x, sigcatch %x\n",
1623 			       curproc->p_pid, curproc->p_comm,
1624 				/* XXX */
1625 			       curproc->p_sigctx.ps_sigmask.__bits[0],
1626 			       curproc->p_sigctx.ps_sigcatch.__bits[0]);
1627 	}
1628 #endif
1629 	/* Now munch on protections... */
1630 
1631 	access_type = /* VM_PROT_EXECUTE| */ VM_PROT_READ;
1632 	if (tstate & (PSTATE_PRIV<<TSTATE_PSTATE_SHIFT)) {
1633 		extern int trap_trace_dis;
1634 		trap_trace_dis = 1; /* Disable traptrace for printf */
1635 		bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1636 		(void) splhigh();
1637 		printf("text error: pc=%lx sfsr=%s\n", pc, buf);
1638 		DEBUGGER(type, tf);
1639 		panic("kernel fault");
1640 		/* NOTREACHED */
1641 	} else
1642 		p->p_md.md_tf = tf;
1643 
1644 	vm = p->p_vmspace;
1645 	/* alas! must call the horrible vm code */
1646 	rv = uvm_fault(&vm->vm_map, (vaddr_t)va, 0, access_type);
1647 
1648 	/*
1649 	 * If this was a stack access we keep track of the maximum
1650 	 * accessed stack size.  Also, if uvm_fault gets a protection
1651 	 * failure it is due to accessing the stack region outside
1652 	 * the current limit and we need to reflect that as an access
1653 	 * error.
1654 	 */
1655 	if ((caddr_t)va >= vm->vm_maxsaddr) {
1656 		if (rv == 0) {
1657 			segsz_t nss = btoc(p->p_vmspace->vm_minsaddr - va);
1658 			if (nss > vm->vm_ssize)
1659 				vm->vm_ssize = nss;
1660 		} else if (rv == EACCES)
1661 			rv = EFAULT;
1662 	}
1663 	if (rv != 0) {
1664 		/*
1665 		 * Pagein failed.  If doing copyin/out, return to onfault
1666 		 * address.  Any other page fault in kernel, die; if user
1667 		 * fault, deliver SIGSEGV.
1668 		 */
1669 		if (tstate & TSTATE_PRIV) {
1670 			extern int trap_trace_dis;
1671 			trap_trace_dis = 1; /* Disable traptrace for printf */
1672 			bitmask_snprintf(sfsr, SFSR_BITS, buf, sizeof buf);
1673 			(void) splhigh();
1674 			printf("text error: pc=%lx sfsr=%s\n", pc, buf);
1675 			DEBUGGER(type, tf);
1676 			panic("kernel fault");
1677 			/* NOTREACHED */
1678 		}
1679 #ifdef DEBUG
1680 		if (trapdebug&(TDB_TXTFLT|TDB_STOPSIG)) {
1681 			extern int trap_trace_dis;
1682 			trap_trace_dis = 1;
1683 			printf("text_access_error at %p: sending SIGSEGV\n",
1684 			    (void *)(u_long)va);
1685 			Debugger();
1686 		}
1687 #endif
1688 		trapsignal(p, SIGSEGV, (u_long)pc);
1689 	}
1690 out:
1691 	if ((tstate & TSTATE_PRIV) == 0) {
1692 		userret(p, pc, sticks);
1693 		share_fpu(p, tf);
1694 	}
1695 #ifdef DEBUG
1696 	if (trapdebug&(TDB_TXTFLT|TDB_FOLLOW))
1697 		printf("text_access_error: done\n");
1698 	if (trapdebug & TDB_FRAME) {
1699 		print_trapframe(tf);
1700 	}
1701 #endif
1702 }
1703 
1704 /*
1705  * System calls.  `pc' is just a copy of tf->tf_pc.
1706  *
1707  * Note that the things labelled `out' registers in the trapframe were the
1708  * `in' registers within the syscall trap code (because of the automatic
1709  * `save' effect of each trap).  They are, however, the %o registers of the
1710  * thing that made the system call, and are named that way here.
1711  *
1712  * 32-bit system calls on a 64-bit system are a problem.  Each system call
1713  * argument is stored in the smaller of the argument's true size or a
1714  * `register_t'.  Now on a 64-bit machine all normal types can be stored in a
1715  * `register_t'.  (The only exceptions would be 128-bit `quad's or 128-bit
1716  * extended precision floating point values, which we don't support.)  For
1717  * 32-bit syscalls, 64-bit integers like `off_t's, double precision floating
1718  * point values, and several other types cannot fit in a 32-bit `register_t'.
1719  * These will require reading in two `register_t' values for one argument.
1720  *
1721  * In order to calculate the true size of the arguments and therefore whether
1722  * any argument needs to be split into two slots, the system call args
1723  * structure needs to be built with the appropriately sized register_t.
1724  * Otherwise the emul needs to do some magic to split oversized arguments.
1725  *
1726  * We can handle most this stuff for normal syscalls by using either a 32-bit
1727  * or 64-bit array of `register_t' arguments.  Unfortunately ktrace always
1728  * expects arguments to be `register_t's, so it loses badly.  What's worse,
1729  * ktrace may need to do size translations to massage the argument array
1730  * appropriately according to the emulation that is doing the ktrace.
1731  *
1732  */
1733 void
1734 syscall(tf, code, pc)
1735 	register_t code;
1736 	struct trapframe64 *tf;
1737 	register_t pc;
1738 {
1739 	int i, nsys, nap;
1740 	int64_t *ap;
1741 	const struct sysent *callp;
1742 	struct proc *p;
1743 	int error = 0, new;
1744 	union args {
1745 		register32_t i[8];
1746 		register64_t l[8];
1747 	} args;
1748 	register_t rval[2];
1749 	u_quad_t sticks;
1750 #ifdef DIAGNOSTIC
1751 	extern struct pcb *cpcb;
1752 #endif
1753 
1754 #ifdef DEBUG
1755 	write_user_windows();
1756 	if (tf->tf_pc == tf->tf_npc) {
1757 		printf("syscall: tpc %p == tnpc %p\n", (void *)(u_long)tf->tf_pc,
1758 		    (void *)(u_long)tf->tf_npc);
1759 		Debugger();
1760 	}
1761 	if ((trapdebug&TDB_NSAVED && cpcb->pcb_nsaved) || trapdebug&(TDB_SYSCALL|TDB_FOLLOW))
1762 		printf("%d syscall(%lx, %p, %lx)\n",
1763 		       curproc?curproc->p_pid:-1, (u_long)code, tf, (u_long)pc);
1764 	if (trapdebug & TDB_FRAME) {
1765 		print_trapframe(tf);
1766 	}
1767 	if ((trapdebug & TDB_TL) && tl()) {
1768 		printf("%d tl %d syscall(%lx, %p, %lx)\n",
1769 		       curproc?curproc->p_pid:-1, tl(), (u_long)code, tf, (u_long)pc);
1770 		Debugger();
1771 	}
1772 #endif
1773 
1774 	uvmexp.syscalls++;
1775 	p = curproc;
1776 #ifdef DIAGNOSTIC
1777 	if (tf->tf_tstate & TSTATE_PRIV)
1778 		panic("syscall from kernel");
1779 	if (cpcb != &p->p_addr->u_pcb)
1780 		panic("syscall: cpcb/ppcb mismatch");
1781 	if (tf != (struct trapframe64 *)((caddr_t)cpcb + USPACE) - 1)
1782 		panic("syscall: trapframe");
1783 #endif
1784 	sticks = p->p_sticks;
1785 	p->p_md.md_tf = tf;
1786 	new = code & (SYSCALL_G7RFLAG | SYSCALL_G2RFLAG);
1787 	code &= ~(SYSCALL_G7RFLAG | SYSCALL_G2RFLAG);
1788 
1789 	callp = p->p_emul->e_sysent;
1790 	nsys = p->p_emul->e_nsysent;
1791 
1792 	/*
1793 	 * The first six system call arguments are in the six %o registers.
1794 	 * Any arguments beyond that are in the `argument extension' area
1795 	 * of the user's stack frame (see <machine/frame.h>).
1796 	 *
1797 	 * Check for ``special'' codes that alter this, namely syscall and
1798 	 * __syscall.  The latter takes a quad syscall number, so that other
1799 	 * arguments are at their natural alignments.  Adjust the number
1800 	 * of ``easy'' arguments as appropriate; we will copy the hard
1801 	 * ones later as needed.
1802 	 */
1803 	ap = &tf->tf_out[0];
1804 	nap = 6;
1805 
1806 	switch (code) {
1807 	case SYS_syscall:
1808 		code = *ap++;
1809 		nap--;
1810 		break;
1811 	case SYS___syscall:
1812 		if (code < nsys &&
1813 		    callp[code].sy_call != callp[p->p_emul->e_nosys].sy_call)
1814 			break; /* valid system call */
1815 		if (tf->tf_out[6] & 1L) {
1816 			/* longs *are* quadwords */
1817 			code = ap[0];
1818 			ap += 1;
1819 			nap -= 1;
1820 		} else {
1821 			code = ap[_QUAD_LOWWORD];
1822 			ap += 2;
1823 			nap -= 2;
1824 		}
1825 		break;
1826 	}
1827 
1828 #ifdef DEBUG
1829 /*	printf("code=%x, nsys=%x\n", code, nsys); */
1830 	if (trapdebug&(TDB_SYSCALL|TDB_FOLLOW))
1831 		printf("%d syscall(%d[%x]): tstate=%x:%x %s\n",
1832 		       curproc?curproc->p_pid:-1, (int)code, (u_int)code,
1833 		       (int)(tf->tf_tstate>>32), (int)(tf->tf_tstate),
1834 		       (p->p_emul->e_syscallnames) ?
1835 		       ((code < 0 || code >= nsys) ?
1836 			"illegal syscall" :
1837 			p->p_emul->e_syscallnames[code]) :
1838 		       "unknown syscall");
1839 	if (p->p_emul->e_syscallnames)
1840 		p->p_addr->u_pcb.lastcall =
1841 			((code < 0 || code >= nsys) ?
1842 			 "illegal syscall" :
1843 			 p->p_emul->e_syscallnames[code]);
1844 #endif
1845 	if (code < 0 || code >= nsys)
1846 		callp += p->p_emul->e_nosys;
1847 	else if (tf->tf_out[6] & 1L) {
1848 		register64_t *argp;
1849 #ifndef __arch64__
1850 #ifdef DEBUG
1851 		printf("syscall(): 64-bit stack on a 32-bit kernel????\n");
1852 		Debugger();
1853 #endif
1854 #endif
1855 		/* 64-bit stack -- not really supported on 32-bit kernels */
1856 		callp += code;
1857 		i = callp->sy_narg; /* Why divide? */
1858 #ifdef DEBUG
1859 		if (i != (long)callp->sy_argsize / sizeof(register64_t))
1860 			printf("syscall %s: narg=%hd, argsize=%hd, call=%p, argsz/reg64=%ld\n",
1861 			       (p->p_emul->e_syscallnames) ? ((code < 0 || code >= nsys) ?
1862 							      "illegal syscall" :
1863 							      p->p_emul->e_syscallnames[code])
1864 			       : "unknown syscall",
1865 			       callp->sy_narg, callp->sy_argsize, callp->sy_call,
1866 			       (long)callp->sy_argsize / sizeof(register64_t));
1867 #endif
1868 		if (i > nap) {	/* usually false */
1869 #ifdef DEBUG
1870 			if (trapdebug&(TDB_SYSCALL|TDB_FOLLOW) || i>8) {
1871 				printf("Args64 %d>%d -- need to copyin\n", i , nap);
1872 			}
1873 #endif
1874 			if (i > 8)
1875 				panic("syscall nargs");
1876 			/* Read the whole block in */
1877 			error = copyin((caddr_t)(u_long)tf->tf_out[6] + BIAS +
1878 				       offsetof(struct frame64, fr_argx),
1879 				       (caddr_t)&args.l[nap], (i - nap) * sizeof(register64_t));
1880 			i = nap;
1881 		}
1882 		/* It should be faster to do <=6 longword copies than call bcopy */
1883 		for (argp = &args.l[0]; i--;)
1884 			*argp++ = *ap++;
1885 
1886 #ifdef KTRACE
1887 		if (KTRPOINT(p, KTR_SYSCALL))
1888 			ktrsyscall(p, code,
1889 				   callp->sy_argsize, (register_t*)args.l);
1890 #endif
1891 		if (error) goto bad;
1892 #ifdef DEBUG
1893 		if (trapdebug&(TDB_SYSCALL|TDB_FOLLOW)) {
1894 			for (i=0; i < callp->sy_narg; i++)
1895 				printf("arg[%d]=%lx ", i, (long)(args.l[i]));
1896 			printf("\n");
1897 		}
1898 		if (trapdebug&(TDB_STOPCALL)) {
1899 			printf("stop precall\n");
1900 			Debugger();
1901 		}
1902 #endif
1903 	} else {
1904 		register32_t *argp;
1905 		int j = 0;
1906 
1907 		/* 32-bit stack */
1908 		callp += code;
1909 
1910 #if defined(__arch64__) && !defined(COMPAT_NETBSD32)
1911 #ifdef DEBUG
1912 #ifdef LKM
1913 		if ((curproc->p_flag & P_32) == 0)
1914 #endif
1915 		{
1916 			printf("syscall(): 32-bit stack on a 64-bit kernel????\n");
1917 			Debugger();
1918 		}
1919 #endif
1920 #endif
1921 
1922 		i = (long)callp->sy_argsize / sizeof(register32_t);
1923 		if (i > nap) {	/* usually false */
1924 			register32_t temp[6];
1925 #ifdef DEBUG
1926 			if (trapdebug&(TDB_SYSCALL|TDB_FOLLOW) || i>8)
1927 				printf("Args %d>%d -- need to copyin\n", i , nap);
1928 #endif
1929 			if (i > 8)
1930 				panic("syscall nargs");
1931 			/* Read the whole block in */
1932 			error = copyin((caddr_t)(u_long)(tf->tf_out[6] +
1933 						 offsetof(struct frame32, fr_argx)),
1934 				       (caddr_t)&temp, (i - nap) * sizeof(register32_t));
1935 			/* Copy each to the argument array */
1936 			for (j = 0; nap + j < i; j++)
1937 				args.i[nap+j] = temp[j];
1938 #ifdef DEBUG
1939 			if (trapdebug & (TDB_SYSCALL|TDB_FOLLOW))	{
1940 				int k;
1941 				printf("Copyin args of %d from %p:\n", j,
1942 				       (caddr_t)(u_long)(tf->tf_out[6] + offsetof(struct frame32, fr_argx)));
1943 				for (k = 0; k < j; k++)
1944 					printf("arg %d = %p at %d val %p\n", k, (void *)(u_long)temp[k], nap+k, (void *)(u_long)args.i[nap+k]);
1945 			}
1946 #endif
1947 			i = nap;
1948 		}
1949 		/* Need to convert from int64 to int32 or we lose */
1950 		for (argp = &args.i[0]; i--;)
1951 				*argp++ = *ap++;
1952 #ifdef KTRACE
1953 		if (KTRPOINT(p, KTR_SYSCALL)) {
1954 #if defined(__arch64__)
1955 			register_t temp[8];
1956 
1957 			/* Need to xlate 32-bit->64-bit */
1958 			i = (long)callp->sy_argsize /
1959 				sizeof(register32_t);
1960 			for (j=0; j<i; j++)
1961 				temp[j] = args.i[j];
1962 			ktrsyscall(p, code,
1963 				   i * sizeof(register_t), (register_t *)temp);
1964 #else
1965 			ktrsyscall(p, code,
1966 				   callp->sy_argsize, (register_t *)args.i);
1967 #endif
1968 		}
1969 #endif
1970 		if (error) {
1971 			goto bad;
1972 		}
1973 #ifdef DEBUG
1974 		if (trapdebug&(TDB_SYSCALL|TDB_FOLLOW)) {
1975 			for (i=0; i < (long)callp->sy_argsize / sizeof(register32_t); i++)
1976 				printf("arg[%d]=%x ", i, (int)(args.i[i]));
1977 			printf("\n");
1978 		}
1979 		if (trapdebug&(TDB_STOPCALL)) {
1980 			printf("stop precall\n");
1981 			Debugger();
1982 		}
1983 #endif
1984 	}
1985 #ifdef SYSCALL_DEBUG
1986 	scdebug_call(p, code, (register_t *)&args);
1987 #endif
1988 	rval[0] = 0;
1989 	rval[1] = tf->tf_out[1];
1990 #ifdef DEBUG
1991 	if (callp->sy_call == sys_nosys) {
1992 		printf("trapdebug: emul %s UNIPL syscall %d:%s\n",
1993 		       p->p_emul->e_name, (int)code,
1994 		       p->p_emul->e_syscallnames ? (
1995 			       (code < 0 || code >= nsys) ?
1996 			       "illegal syscall" :
1997 			       p->p_emul->e_syscallnames[code]) :
1998 		       "unknown syscall");
1999 	}
2000 #endif
2001 	error = (*callp->sy_call)(p, &args, rval);
2002 
2003 	switch (error) {
2004 		vaddr_t dest;
2005 	case 0:
2006 		/* Note: fork() does not return here in the child */
2007 		tf->tf_out[0] = rval[0];
2008 		tf->tf_out[1] = rval[1];
2009 		if (new) {
2010 			/* jmp %g2 (or %g7, deprecated) on success */
2011 			dest = tf->tf_global[new & SYSCALL_G2RFLAG ? 2 : 7];
2012 #ifdef DEBUG
2013 			if (trapdebug&(TDB_SYSCALL|TDB_FOLLOW))
2014 				printf("syscall: return tstate=%llx new success to %p retval %lx:%lx\n",
2015 				       (unsigned long long)tf->tf_tstate, (void *)(u_long)dest,
2016 				       (u_long)rval[0], (u_long)rval[1]);
2017 #endif
2018 			if (dest & 3) {
2019 				error = EINVAL;
2020 				goto bad;
2021 			}
2022 		} else {
2023 			/* old system call convention: clear C on success */
2024 			tf->tf_tstate &= ~(((int64_t)(ICC_C|XCC_C))<<TSTATE_CCR_SHIFT);	/* success */
2025 			dest = tf->tf_npc;
2026 #ifdef DEBUG
2027 			if (trapdebug&(TDB_SYSCALL|TDB_FOLLOW))
2028 				printf("syscall: return tstate=%llx old success to %p retval %lx:%lx\n",
2029 				       (unsigned long long)tf->tf_tstate, (void *)(u_long)dest,
2030 				       (u_long)rval[0], (u_long)rval[1]);
2031 			if (trapdebug&(TDB_SYSCALL|TDB_FOLLOW))
2032 				printf("old pc=%p npc=%p dest=%p\n",
2033 				    (void *)(u_long)tf->tf_pc,
2034 				    (void *)(u_long)tf->tf_npc,
2035 				    (void *)(u_long)dest);
2036 #endif
2037 		}
2038 		tf->tf_pc = dest;
2039 		tf->tf_npc = dest + 4;
2040 		break;
2041 
2042 	case ERESTART:
2043 	case EJUSTRETURN:
2044 		/* nothing to do */
2045 		break;
2046 
2047 	default:
2048 	bad:
2049 		if (p->p_emul->e_errno)
2050 			error = p->p_emul->e_errno[error];
2051 		tf->tf_out[0] = error;
2052 		tf->tf_tstate |= (((int64_t)(ICC_C|XCC_C))<<TSTATE_CCR_SHIFT);	/* fail */
2053 		dest = tf->tf_npc;
2054 		tf->tf_pc = dest;
2055 		tf->tf_npc = dest + 4;
2056 #ifdef DEBUG
2057 		if (trapdebug&(TDB_SYSCALL|TDB_FOLLOW))
2058 			printf("syscall: return tstate=%llx fail %d to %p\n",
2059 			       (unsigned long long)tf->tf_tstate, error,
2060 			       (void *)(long)dest);
2061 #endif
2062 		break;
2063 	}
2064 
2065 #ifdef SYSCALL_DEBUG
2066 	scdebug_ret(p, code, error, rval);
2067 #endif
2068 	userret(p, pc, sticks);
2069 #ifdef NOTDEF_DEBUG
2070 	if ( code == 202) {
2071 		/* Trap on __sysctl */
2072 		Debugger();
2073 	}
2074 #endif
2075 #ifdef KTRACE
2076 	if (KTRPOINT(p, KTR_SYSRET))
2077 		ktrsysret(p, code, error, rval[0]);
2078 #endif
2079 	share_fpu(p, tf);
2080 #ifdef DEBUG
2081 	if (trapdebug&(TDB_STOPCALL|TDB_SYSTOP)) {
2082 		Debugger();
2083 	}
2084 #endif
2085 #ifdef DEBUG
2086 	if (trapdebug & TDB_FRAME) {
2087 		print_trapframe(tf);
2088 	}
2089 #endif
2090 }
2091 
2092 /*
2093  * Process the tail end of a fork() for the child.
2094  */
2095 void
2096 child_return(arg)
2097 	void *arg;
2098 {
2099 	struct proc *p = arg;
2100 
2101 	/*
2102 	 * Return values in the frame set by cpu_fork().
2103 	 */
2104 #ifdef NOTDEF_DEBUG
2105 	printf("child_return: proc=%p\n", p);
2106 #endif
2107 	userret(p, p->p_md.md_tf->tf_pc, 0);
2108 #ifdef KTRACE
2109 	if (KTRPOINT(p, KTR_SYSRET))
2110 		ktrsysret(p,
2111 			  (p->p_flag & P_PPWAIT) ? SYS_vfork : SYS_fork, 0, 0);
2112 #endif
2113 }
2114 
2115