xref: /netbsd/sys/arch/sparc/sparc/locore.s (revision 36cae959)
1/*	$NetBSD: locore.s,v 1.284 2023/02/23 14:56:37 riastradh Exp $	*/
2
3/*
4 * Copyright (c) 1996 Paul Kranenburg
5 * Copyright (c) 1996
6 * 	The President and Fellows of Harvard College. All rights reserved.
7 * Copyright (c) 1992, 1993
8 *	The Regents of the University of California.  All rights reserved.
9 *
10 * This software was developed by the Computer Systems Engineering group
11 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
12 * contributed to Berkeley.
13 *
14 * All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 *	This product includes software developed by the University of
17 *	California, Lawrence Berkeley Laboratory.
18 *	This product includes software developed by Harvard University.
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 * 1. Redistributions of source code must retain the above copyright
24 *    notice, this list of conditions and the following disclaimer.
25 * 2. Redistributions in binary form must reproduce the above copyright
26 *    notice, this list of conditions and the following disclaimer in the
27 *    documentation and/or other materials provided with the distribution.
28 * 3. All advertising materials mentioning features or use of this software
29 *    must display the following acknowledgement:
30 *	This product includes software developed by the University of
31 *	California, Berkeley and its contributors.
32 *	This product includes software developed by Harvard University.
33 *	This product includes software developed by Paul Kranenburg.
34 * 4. Neither the name of the University nor the names of its contributors
35 *    may be used to endorse or promote products derived from this software
36 *    without specific prior written permission.
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * SUCH DAMAGE.
49 *
50 *	@(#)locore.s	8.4 (Berkeley) 12/10/93
51 */
52
53#include "opt_ddb.h"
54#include "opt_kgdb.h"
55#include "opt_compat_netbsd.h"
56#include "opt_compat_sunos.h"
57#include "opt_multiprocessor.h"
58#include "opt_lockdebug.h"
59
60#include "assym.h"
61#include <machine/param.h>
62#include <machine/asm.h>
63#include <sparc/sparc/intreg.h>
64#include <sparc/sparc/timerreg.h>
65#include <sparc/sparc/vaddrs.h>
66#ifdef notyet
67#include <sparc/dev/zsreg.h>
68#endif
69#include <machine/ctlreg.h>
70#include <machine/intr.h>
71#include <machine/psl.h>
72#include <machine/signal.h>
73#include <machine/trap.h>
74
75#include <sys/syscall.h>
76
77/* use as needed to align things on longword boundaries */
78#define	_ALIGN	.align 4
79
80/*
81 * CCFSZ (C Compiler Frame SiZe) is the size of a stack frame required if
82 * a function is to call C code.  It should be just 64, but Sun defined
83 * their frame with space to hold arguments 0 through 5 (plus some junk),
84 * and varargs routines (such as printf) demand this, and gcc uses this
85 * area at times anyway.
86 */
87#define	CCFSZ	96
88
89/* We rely on the fact that %lo(CPUINFO_VA) is zero */
90.if CPUINFO_VA & 0x1fff
91BARF
92.endif
93
94#if EV_COUNT != 0
95# error "this code does not work with EV_COUNT != 0"
96#endif
97#if EV_STRUCTSIZE != 32
98# error "this code does not work with EV_STRUCTSIZE != 32"
99#else
100# define EV_STRUCTSHIFT	5
101#endif
102
103/*
104 * Another handy macro: load one register window, given `base' address.
105 * This can be either a simple register (e.g., %sp) or include an initial
106 * offset (e.g., %g6 + PCB_RW).
107 */
108#define	LOADWIN(addr) \
109	ldd	[addr], %l0; \
110	ldd	[addr + 8], %l2; \
111	ldd	[addr + 16], %l4; \
112	ldd	[addr + 24], %l6; \
113	ldd	[addr + 32], %i0; \
114	ldd	[addr + 40], %i2; \
115	ldd	[addr + 48], %i4; \
116	ldd	[addr + 56], %i6
117
118/*
119 * To return from trap we need the two-instruction sequence
120 * `jmp %l1; rett %l2', which is defined here for convenience.
121 */
122#define	RETT	jmp %l1; rett %l2
123
124	.data
125/*
126 * The interrupt stack.
127 *
128 * This is the very first thing in the data segment, and therefore has
129 * the lowest kernel stack address.  We count on this in the interrupt
130 * trap-frame setup code, since we may need to switch from the kernel
131 * stack to the interrupt stack (iff we are not already on the interrupt
132 * stack).  One sethi+cmp is all we need since this is so carefully
133 * arranged.
134 *
135 * In SMP kernels, each CPU has its own interrupt stack and the computation
136 * to determine whether we're already on the interrupt stack is slightly
137 * more time consuming (see INTR_SETUP() below).
138 */
139	.globl	_C_LABEL(intstack)
140	.globl	_C_LABEL(eintstack)
141_C_LABEL(intstack):
142	.skip	INT_STACK_SIZE		! 16k = 128 128-byte stack frames
143_C_LABEL(eintstack):
144
145_EINTSTACKP = CPUINFO_VA + CPUINFO_EINTSTACK
146
147/*
148 * CPUINFO_VA is a CPU-local virtual address; cpi->ci_self is a global
149 * virtual address for the same structure.  It must be stored in p->p_cpu
150 * upon context switch.
151 */
152_CISELFP	= CPUINFO_VA + CPUINFO_SELF
153_CIFLAGS	= CPUINFO_VA + CPUINFO_FLAGS
154
155/* Per-CPU AST requests */
156_WANT_AST	= CPUINFO_VA + CPUINFO_WANT_AST
157
158/*
159 * Process 0's u.
160 *
161 * This must be aligned on an 8 byte boundary.
162 */
163	.globl	_C_LABEL(u0)
164_C_LABEL(u0):	.skip	USPACE
165estack0:
166
167#ifdef KGDB
168/*
169 * Another item that must be aligned, easiest to put it here.
170 */
171KGDB_STACK_SIZE = 2048
172	.globl	_C_LABEL(kgdb_stack)
173_C_LABEL(kgdb_stack):
174	.skip	KGDB_STACK_SIZE		! hope this is enough
175#endif
176
177/*
178 * cpcb points to the current pcb (and hence u. area).
179 * Initially this is the special one.
180 */
181cpcb = CPUINFO_VA + CPUINFO_CURPCB
182
183/* curlwp points to the current LWP that has the CPU */
184curlwp = CPUINFO_VA + CPUINFO_CURLWP
185
186/*
187 * cputyp is the current CPU type, used to distinguish between
188 * the many variations of different sun4* machines. It contains
189 * the value CPU_SUN4, CPU_SUN4C, or CPU_SUN4M.
190 */
191	.globl	_C_LABEL(cputyp)
192_C_LABEL(cputyp):
193	.word	1
194
195#if defined(SUN4C) || defined(SUN4M)
196cputypval:
197	.asciz	"sun4c"
198	.ascii	"     "
199cputypvar:
200	.asciz	"compatible"
201	_ALIGN
202#endif
203
204/*
205 * There variables are pointed to by the cpp symbols PGSHIFT, NBPG,
206 * and PGOFSET.
207 */
208	.globl	_C_LABEL(pgshift), _C_LABEL(nbpg), _C_LABEL(pgofset)
209_C_LABEL(pgshift):
210	.word	0
211_C_LABEL(nbpg):
212	.word	0
213_C_LABEL(pgofset):
214	.word	0
215
216	.globl	_C_LABEL(trapbase)
217_C_LABEL(trapbase):
218	.word	0
219
220#if 0
221#if defined(SUN4M)
222_mapme:
223	.asciz "0 0 f8000000 15c6a0 map-pages"
224#endif
225#endif
226
227#if !defined(SUN4D)
228sun4d_notsup:
229	.asciz	"cr .( NetBSD/sparc: this kernel does not support the sun4d) cr"
230#endif
231#if !defined(SUN4M)
232sun4m_notsup:
233	.asciz	"cr .( NetBSD/sparc: this kernel does not support the sun4m) cr"
234#endif
235#if !defined(SUN4C)
236sun4c_notsup:
237	.asciz	"cr .( NetBSD/sparc: this kernel does not support the sun4c) cr"
238#endif
239#if !defined(SUN4)
240sun4_notsup:
241	! the extra characters at the end are to ensure the zs fifo drains
242	! before we halt. Sick, eh?
243	.asciz	"NetBSD/sparc: this kernel does not support the sun4\n\r \b"
244#endif
245	_ALIGN
246
247	.text
248
249/*
250 * The first thing in the real text segment is the trap vector table,
251 * which must be aligned on a 4096 byte boundary.  The text segment
252 * starts beyond page 0 of KERNBASE so that there is a red zone
253 * between user and kernel space.  Since the boot ROM loads us at
254 * PROM_LOADADDR, it is far easier to start at KERNBASE+PROM_LOADADDR than to
255 * buck the trend.  This is two or four pages in (depending on if
256 * pagesize is 8192 or 4096).    We place two items in this area:
257 * the message buffer (phys addr 0) and the cpu_softc structure for
258 * the first processor in the system (phys addr 0x2000).
259 * Because the message buffer is in our "red zone" between user and
260 * kernel space we remap it in configure() to another location and
261 * invalidate the mapping at KERNBASE.
262 */
263
264/*
265 * Each trap has room for four instructions, of which one perforce must
266 * be a branch.  On entry the hardware has copied pc and npc to %l1 and
267 * %l2 respectively.  We use two more to read the psr into %l0, and to
268 * put the trap type value into %l3 (with a few exceptions below).
269 * We could read the trap type field of %tbr later in the code instead,
270 * but there is no need, and that would require more instructions
271 * (read+mask, vs 1 `mov' here).
272 *
273 * I used to generate these numbers by address arithmetic, but gas's
274 * expression evaluator has about as much sense as your average slug
275 * (oddly enough, the code looks about as slimy too).  Thus, all the
276 * trap numbers are given as arguments to the trap macros.  This means
277 * there is one line per trap.  Sigh.
278 *
279 * Note that only the local registers may be used, since the trap
280 * window is potentially the last window.  Its `in' registers are
281 * the previous window's outs (as usual), but more important, its
282 * `out' registers may be in use as the `topmost' window's `in' registers.
283 * The global registers are of course verboten (well, until we save
284 * them away).
285 *
286 * Hardware interrupt vectors can be `linked'---the linkage is to regular
287 * C code---or rewired to fast in-window handlers.  The latter are good
288 * for unbuffered hardware like the Zilog serial chip and the AMD audio
289 * chip, where many interrupts can be handled trivially with pseudo-DMA or
290 * similar.  Only one `fast' interrupt can be used per level, however, and
291 * direct and `fast' interrupts are incompatible.  Routines in intr.c
292 * handle setting these, with optional paranoia.
293 */
294
295	/* regular vectored traps */
296#define	VTRAP(type, label) \
297	mov (type), %l3; b label; mov %psr, %l0; nop
298
299	/* hardware interrupts (can be linked or made `fast') */
300#define	HARDINT44C(lev) \
301	mov (lev), %l3; b _C_LABEL(sparc_interrupt44c); mov %psr, %l0; nop
302
303	/* hardware interrupts (can be linked or made `fast') */
304#define	HARDINT4M(lev) \
305	mov (lev), %l3; b _C_LABEL(sparc_interrupt4m); mov %psr, %l0; nop
306
307	/* software interrupts (may not be made direct, sorry---but you
308	   should not be using them trivially anyway) */
309#define	SOFTINT44C(lev, bit) \
310	mov (lev), %l3; mov (bit), %l4; b softintr_sun44c; mov %psr, %l0
311
312	/* There's no SOFTINT4M(): both hard and soft vector the same way */
313
314	/* traps that just call trap() */
315#define	TRAP(type)	VTRAP(type, slowtrap)
316
317	/* architecturally undefined traps (cause panic) */
318#define	UTRAP(type)	VTRAP(type, slowtrap)
319
320	/* software undefined traps (may be replaced) */
321#define	STRAP(type)	VTRAP(type, slowtrap)
322
323/* breakpoint acts differently under kgdb */
324#ifdef KGDB
325#define	BPT		VTRAP(T_BREAKPOINT, bpt)
326#define	BPT_KGDB_EXEC	VTRAP(T_KGDB_EXEC, bpt)
327#else
328#define	BPT		TRAP(T_BREAKPOINT)
329#define	BPT_KGDB_EXEC	TRAP(T_KGDB_EXEC)
330#endif
331
332/* special high-speed 1-instruction-shaved-off traps (get nothing in %l3) */
333#define	SYSCALL		b _C_LABEL(_syscall); mov %psr, %l0; nop; nop
334#define	WINDOW_OF	b window_of; mov %psr, %l0; nop; nop
335#define	WINDOW_UF	b window_uf; mov %psr, %l0; nop; nop
336#ifdef notyet
337#define	ZS_INTERRUPT	b zshard; mov %psr, %l0; nop; nop
338#else
339#define	ZS_INTERRUPT44C	HARDINT44C(12)
340#define	ZS_INTERRUPT4M	HARDINT4M(12)
341#endif
342
343#ifdef DEBUG
344#define TRAP_TRACE(tt, tmp)					\
345	sethi	%hi(CPUINFO_VA + CPUINFO_TT), tmp;		\
346	st	tt, [tmp + %lo(CPUINFO_VA + CPUINFO_TT)];
347#define TRAP_TRACE2(tt, tmp1, tmp2)				\
348	mov	tt, tmp1;					\
349	TRAP_TRACE(tmp1, tmp2)
350#else /* DEBUG */
351#define TRAP_TRACE(tt,tmp)		/**/
352#define TRAP_TRACE2(tt,tmp1,tmp2)	/**/
353#endif /* DEBUG */
354
355	.globl	_ASM_LABEL(start), _C_LABEL(kernel_text)
356	_C_LABEL(kernel_text) = start		! for kvm_mkdb(8)
357_ASM_LABEL(start):
358/*
359 * Put sun4 traptable first, since it needs the most stringent alignment (8192)
360 */
361#if defined(SUN4)
362trapbase_sun4:
363	/* trap 0 is special since we cannot receive it */
364	b dostart; nop; nop; nop	! 00 = reset (fake)
365	VTRAP(T_TEXTFAULT, memfault_sun4)	! 01 = instr. fetch fault
366	TRAP(T_ILLINST)			! 02 = illegal instruction
367	TRAP(T_PRIVINST)		! 03 = privileged instruction
368	TRAP(T_FPDISABLED)		! 04 = fp instr, but EF bit off in psr
369	WINDOW_OF			! 05 = window overflow
370	WINDOW_UF			! 06 = window underflow
371	TRAP(T_ALIGN)			! 07 = address alignment error
372	VTRAP(T_FPE, fp_exception)	! 08 = fp exception
373	VTRAP(T_DATAFAULT, memfault_sun4)	! 09 = data fetch fault
374	TRAP(T_TAGOF)			! 0a = tag overflow
375	UTRAP(0x0b)
376	UTRAP(0x0c)
377	UTRAP(0x0d)
378	UTRAP(0x0e)
379	UTRAP(0x0f)
380	UTRAP(0x10)
381	SOFTINT44C(1, IE_L1)		! 11 = level 1 interrupt
382	HARDINT44C(2)			! 12 = level 2 interrupt
383	HARDINT44C(3)			! 13 = level 3 interrupt
384	SOFTINT44C(4, IE_L4)		! 14 = level 4 interrupt
385	HARDINT44C(5)			! 15 = level 5 interrupt
386	SOFTINT44C(6, IE_L6)		! 16 = level 6 interrupt
387	HARDINT44C(7)			! 17 = level 7 interrupt
388	HARDINT44C(8)			! 18 = level 8 interrupt
389	HARDINT44C(9)			! 19 = level 9 interrupt
390	HARDINT44C(10)			! 1a = level 10 interrupt
391	HARDINT44C(11)			! 1b = level 11 interrupt
392	ZS_INTERRUPT44C			! 1c = level 12 (zs) interrupt
393	HARDINT44C(13)			! 1d = level 13 interrupt
394	HARDINT44C(14)			! 1e = level 14 interrupt
395	VTRAP(15, nmi_sun4)		! 1f = nonmaskable interrupt
396	UTRAP(0x20)
397	UTRAP(0x21)
398	UTRAP(0x22)
399	UTRAP(0x23)
400	TRAP(T_CPDISABLED)	! 24 = coprocessor instr, EC bit off in psr
401	UTRAP(0x25)
402	UTRAP(0x26)
403	UTRAP(0x27)
404	TRAP(T_CPEXCEPTION)	! 28 = coprocessor exception
405	UTRAP(0x29)
406	UTRAP(0x2a)
407	UTRAP(0x2b)
408	UTRAP(0x2c)
409	UTRAP(0x2d)
410	UTRAP(0x2e)
411	UTRAP(0x2f)
412	UTRAP(0x30)
413	UTRAP(0x31)
414	UTRAP(0x32)
415	UTRAP(0x33)
416	UTRAP(0x34)
417	UTRAP(0x35)
418	UTRAP(0x36)
419	UTRAP(0x37)
420	UTRAP(0x38)
421	UTRAP(0x39)
422	UTRAP(0x3a)
423	UTRAP(0x3b)
424	UTRAP(0x3c)
425	UTRAP(0x3d)
426	UTRAP(0x3e)
427	UTRAP(0x3f)
428	UTRAP(0x40)
429	UTRAP(0x41)
430	UTRAP(0x42)
431	UTRAP(0x43)
432	UTRAP(0x44)
433	UTRAP(0x45)
434	UTRAP(0x46)
435	UTRAP(0x47)
436	UTRAP(0x48)
437	UTRAP(0x49)
438	UTRAP(0x4a)
439	UTRAP(0x4b)
440	UTRAP(0x4c)
441	UTRAP(0x4d)
442	UTRAP(0x4e)
443	UTRAP(0x4f)
444	UTRAP(0x50)
445	UTRAP(0x51)
446	UTRAP(0x52)
447	UTRAP(0x53)
448	UTRAP(0x54)
449	UTRAP(0x55)
450	UTRAP(0x56)
451	UTRAP(0x57)
452	UTRAP(0x58)
453	UTRAP(0x59)
454	UTRAP(0x5a)
455	UTRAP(0x5b)
456	UTRAP(0x5c)
457	UTRAP(0x5d)
458	UTRAP(0x5e)
459	UTRAP(0x5f)
460	UTRAP(0x60)
461	UTRAP(0x61)
462	UTRAP(0x62)
463	UTRAP(0x63)
464	UTRAP(0x64)
465	UTRAP(0x65)
466	UTRAP(0x66)
467	UTRAP(0x67)
468	UTRAP(0x68)
469	UTRAP(0x69)
470	UTRAP(0x6a)
471	UTRAP(0x6b)
472	UTRAP(0x6c)
473	UTRAP(0x6d)
474	UTRAP(0x6e)
475	UTRAP(0x6f)
476	UTRAP(0x70)
477	UTRAP(0x71)
478	UTRAP(0x72)
479	UTRAP(0x73)
480	UTRAP(0x74)
481	UTRAP(0x75)
482	UTRAP(0x76)
483	UTRAP(0x77)
484	UTRAP(0x78)
485	UTRAP(0x79)
486	UTRAP(0x7a)
487	UTRAP(0x7b)
488	UTRAP(0x7c)
489	UTRAP(0x7d)
490	UTRAP(0x7e)
491	UTRAP(0x7f)
492	SYSCALL			! 80 = sun syscall
493	BPT			! 81 = pseudo breakpoint instruction
494	TRAP(T_DIV0)		! 82 = divide by zero
495	TRAP(T_FLUSHWIN)	! 83 = flush windows
496	TRAP(T_CLEANWIN)	! 84 = provide clean windows
497	TRAP(T_RANGECHECK)	! 85 = ???
498	TRAP(T_FIXALIGN)	! 86 = fix up unaligned accesses
499	TRAP(T_INTOF)		! 87 = integer overflow
500	SYSCALL			! 88 = svr4 syscall
501	SYSCALL			! 89 = bsd syscall
502	BPT_KGDB_EXEC		! 8a = enter kernel gdb on kernel startup
503	STRAP(0x8b)
504	STRAP(0x8c)
505	STRAP(0x8d)
506	STRAP(0x8e)
507	STRAP(0x8f)
508	STRAP(0x90)
509	STRAP(0x91)
510	STRAP(0x92)
511	STRAP(0x93)
512	STRAP(0x94)
513	STRAP(0x95)
514	STRAP(0x96)
515	STRAP(0x97)
516	STRAP(0x98)
517	STRAP(0x99)
518	STRAP(0x9a)
519	STRAP(0x9b)
520	STRAP(0x9c)
521	STRAP(0x9d)
522	STRAP(0x9e)
523	STRAP(0x9f)
524	STRAP(0xa0)
525	STRAP(0xa1)
526	STRAP(0xa2)
527	STRAP(0xa3)
528	STRAP(0xa4)
529	STRAP(0xa5)
530	STRAP(0xa6)
531	STRAP(0xa7)
532	STRAP(0xa8)
533	STRAP(0xa9)
534	STRAP(0xaa)
535	STRAP(0xab)
536	STRAP(0xac)
537	STRAP(0xad)
538	STRAP(0xae)
539	STRAP(0xaf)
540	STRAP(0xb0)
541	STRAP(0xb1)
542	STRAP(0xb2)
543	STRAP(0xb3)
544	STRAP(0xb4)
545	STRAP(0xb5)
546	STRAP(0xb6)
547	STRAP(0xb7)
548	STRAP(0xb8)
549	STRAP(0xb9)
550	STRAP(0xba)
551	STRAP(0xbb)
552	STRAP(0xbc)
553	STRAP(0xbd)
554	STRAP(0xbe)
555	STRAP(0xbf)
556	STRAP(0xc0)
557	STRAP(0xc1)
558	STRAP(0xc2)
559	STRAP(0xc3)
560	STRAP(0xc4)
561	STRAP(0xc5)
562	STRAP(0xc6)
563	STRAP(0xc7)
564	STRAP(0xc8)
565	STRAP(0xc9)
566	STRAP(0xca)
567	STRAP(0xcb)
568	STRAP(0xcc)
569	STRAP(0xcd)
570	STRAP(0xce)
571	STRAP(0xcf)
572	STRAP(0xd0)
573	STRAP(0xd1)
574	STRAP(0xd2)
575	STRAP(0xd3)
576	STRAP(0xd4)
577	STRAP(0xd5)
578	STRAP(0xd6)
579	STRAP(0xd7)
580	STRAP(0xd8)
581	STRAP(0xd9)
582	STRAP(0xda)
583	STRAP(0xdb)
584	STRAP(0xdc)
585	STRAP(0xdd)
586	STRAP(0xde)
587	STRAP(0xdf)
588	STRAP(0xe0)
589	STRAP(0xe1)
590	STRAP(0xe2)
591	STRAP(0xe3)
592	STRAP(0xe4)
593	STRAP(0xe5)
594	STRAP(0xe6)
595	STRAP(0xe7)
596	STRAP(0xe8)
597	STRAP(0xe9)
598	STRAP(0xea)
599	STRAP(0xeb)
600	STRAP(0xec)
601	STRAP(0xed)
602	STRAP(0xee)
603	STRAP(0xef)
604	STRAP(0xf0)
605	STRAP(0xf1)
606	STRAP(0xf2)
607	STRAP(0xf3)
608	STRAP(0xf4)
609	STRAP(0xf5)
610	STRAP(0xf6)
611	STRAP(0xf7)
612	STRAP(0xf8)
613	STRAP(0xf9)
614	STRAP(0xfa)
615	STRAP(0xfb)
616	STRAP(0xfc)
617	STRAP(0xfd)
618	STRAP(0xfe)
619	STRAP(0xff)
620#endif
621
622#if defined(SUN4C)
623trapbase_sun4c:
624/* trap 0 is special since we cannot receive it */
625	b dostart; nop; nop; nop	! 00 = reset (fake)
626	VTRAP(T_TEXTFAULT, memfault_sun4c)	! 01 = instr. fetch fault
627	TRAP(T_ILLINST)			! 02 = illegal instruction
628	TRAP(T_PRIVINST)		! 03 = privileged instruction
629	TRAP(T_FPDISABLED)		! 04 = fp instr, but EF bit off in psr
630	WINDOW_OF			! 05 = window overflow
631	WINDOW_UF			! 06 = window underflow
632	TRAP(T_ALIGN)			! 07 = address alignment error
633	VTRAP(T_FPE, fp_exception)	! 08 = fp exception
634	VTRAP(T_DATAFAULT, memfault_sun4c)	! 09 = data fetch fault
635	TRAP(T_TAGOF)			! 0a = tag overflow
636	UTRAP(0x0b)
637	UTRAP(0x0c)
638	UTRAP(0x0d)
639	UTRAP(0x0e)
640	UTRAP(0x0f)
641	UTRAP(0x10)
642	SOFTINT44C(1, IE_L1)		! 11 = level 1 interrupt
643	HARDINT44C(2)			! 12 = level 2 interrupt
644	HARDINT44C(3)			! 13 = level 3 interrupt
645	SOFTINT44C(4, IE_L4)		! 14 = level 4 interrupt
646	HARDINT44C(5)			! 15 = level 5 interrupt
647	SOFTINT44C(6, IE_L6)		! 16 = level 6 interrupt
648	HARDINT44C(7)			! 17 = level 7 interrupt
649	HARDINT44C(8)			! 18 = level 8 interrupt
650	HARDINT44C(9)			! 19 = level 9 interrupt
651	HARDINT44C(10)			! 1a = level 10 interrupt
652	HARDINT44C(11)			! 1b = level 11 interrupt
653	ZS_INTERRUPT44C			! 1c = level 12 (zs) interrupt
654	HARDINT44C(13)			! 1d = level 13 interrupt
655	HARDINT44C(14)			! 1e = level 14 interrupt
656	VTRAP(15, nmi_sun4c)		! 1f = nonmaskable interrupt
657	UTRAP(0x20)
658	UTRAP(0x21)
659	UTRAP(0x22)
660	UTRAP(0x23)
661	TRAP(T_CPDISABLED)	! 24 = coprocessor instr, EC bit off in psr
662	UTRAP(0x25)
663	UTRAP(0x26)
664	UTRAP(0x27)
665	TRAP(T_CPEXCEPTION)	! 28 = coprocessor exception
666	UTRAP(0x29)
667	UTRAP(0x2a)
668	UTRAP(0x2b)
669	UTRAP(0x2c)
670	UTRAP(0x2d)
671	UTRAP(0x2e)
672	UTRAP(0x2f)
673	UTRAP(0x30)
674	UTRAP(0x31)
675	UTRAP(0x32)
676	UTRAP(0x33)
677	UTRAP(0x34)
678	UTRAP(0x35)
679	UTRAP(0x36)
680	UTRAP(0x37)
681	UTRAP(0x38)
682	UTRAP(0x39)
683	UTRAP(0x3a)
684	UTRAP(0x3b)
685	UTRAP(0x3c)
686	UTRAP(0x3d)
687	UTRAP(0x3e)
688	UTRAP(0x3f)
689	UTRAP(0x40)
690	UTRAP(0x41)
691	UTRAP(0x42)
692	UTRAP(0x43)
693	UTRAP(0x44)
694	UTRAP(0x45)
695	UTRAP(0x46)
696	UTRAP(0x47)
697	UTRAP(0x48)
698	UTRAP(0x49)
699	UTRAP(0x4a)
700	UTRAP(0x4b)
701	UTRAP(0x4c)
702	UTRAP(0x4d)
703	UTRAP(0x4e)
704	UTRAP(0x4f)
705	UTRAP(0x50)
706	UTRAP(0x51)
707	UTRAP(0x52)
708	UTRAP(0x53)
709	UTRAP(0x54)
710	UTRAP(0x55)
711	UTRAP(0x56)
712	UTRAP(0x57)
713	UTRAP(0x58)
714	UTRAP(0x59)
715	UTRAP(0x5a)
716	UTRAP(0x5b)
717	UTRAP(0x5c)
718	UTRAP(0x5d)
719	UTRAP(0x5e)
720	UTRAP(0x5f)
721	UTRAP(0x60)
722	UTRAP(0x61)
723	UTRAP(0x62)
724	UTRAP(0x63)
725	UTRAP(0x64)
726	UTRAP(0x65)
727	UTRAP(0x66)
728	UTRAP(0x67)
729	UTRAP(0x68)
730	UTRAP(0x69)
731	UTRAP(0x6a)
732	UTRAP(0x6b)
733	UTRAP(0x6c)
734	UTRAP(0x6d)
735	UTRAP(0x6e)
736	UTRAP(0x6f)
737	UTRAP(0x70)
738	UTRAP(0x71)
739	UTRAP(0x72)
740	UTRAP(0x73)
741	UTRAP(0x74)
742	UTRAP(0x75)
743	UTRAP(0x76)
744	UTRAP(0x77)
745	UTRAP(0x78)
746	UTRAP(0x79)
747	UTRAP(0x7a)
748	UTRAP(0x7b)
749	UTRAP(0x7c)
750	UTRAP(0x7d)
751	UTRAP(0x7e)
752	UTRAP(0x7f)
753	SYSCALL			! 80 = sun syscall
754	BPT			! 81 = pseudo breakpoint instruction
755	TRAP(T_DIV0)		! 82 = divide by zero
756	TRAP(T_FLUSHWIN)	! 83 = flush windows
757	TRAP(T_CLEANWIN)	! 84 = provide clean windows
758	TRAP(T_RANGECHECK)	! 85 = ???
759	TRAP(T_FIXALIGN)	! 86 = fix up unaligned accesses
760	TRAP(T_INTOF)		! 87 = integer overflow
761	SYSCALL			! 88 = svr4 syscall
762	SYSCALL			! 89 = bsd syscall
763	BPT_KGDB_EXEC		! 8a = enter kernel gdb on kernel startup
764	STRAP(0x8b)
765	STRAP(0x8c)
766	STRAP(0x8d)
767	STRAP(0x8e)
768	STRAP(0x8f)
769	STRAP(0x90)
770	STRAP(0x91)
771	STRAP(0x92)
772	STRAP(0x93)
773	STRAP(0x94)
774	STRAP(0x95)
775	STRAP(0x96)
776	STRAP(0x97)
777	STRAP(0x98)
778	STRAP(0x99)
779	STRAP(0x9a)
780	STRAP(0x9b)
781	STRAP(0x9c)
782	STRAP(0x9d)
783	STRAP(0x9e)
784	STRAP(0x9f)
785	STRAP(0xa0)
786	STRAP(0xa1)
787	STRAP(0xa2)
788	STRAP(0xa3)
789	STRAP(0xa4)
790	STRAP(0xa5)
791	STRAP(0xa6)
792	STRAP(0xa7)
793	STRAP(0xa8)
794	STRAP(0xa9)
795	STRAP(0xaa)
796	STRAP(0xab)
797	STRAP(0xac)
798	STRAP(0xad)
799	STRAP(0xae)
800	STRAP(0xaf)
801	STRAP(0xb0)
802	STRAP(0xb1)
803	STRAP(0xb2)
804	STRAP(0xb3)
805	STRAP(0xb4)
806	STRAP(0xb5)
807	STRAP(0xb6)
808	STRAP(0xb7)
809	STRAP(0xb8)
810	STRAP(0xb9)
811	STRAP(0xba)
812	STRAP(0xbb)
813	STRAP(0xbc)
814	STRAP(0xbd)
815	STRAP(0xbe)
816	STRAP(0xbf)
817	STRAP(0xc0)
818	STRAP(0xc1)
819	STRAP(0xc2)
820	STRAP(0xc3)
821	STRAP(0xc4)
822	STRAP(0xc5)
823	STRAP(0xc6)
824	STRAP(0xc7)
825	STRAP(0xc8)
826	STRAP(0xc9)
827	STRAP(0xca)
828	STRAP(0xcb)
829	STRAP(0xcc)
830	STRAP(0xcd)
831	STRAP(0xce)
832	STRAP(0xcf)
833	STRAP(0xd0)
834	STRAP(0xd1)
835	STRAP(0xd2)
836	STRAP(0xd3)
837	STRAP(0xd4)
838	STRAP(0xd5)
839	STRAP(0xd6)
840	STRAP(0xd7)
841	STRAP(0xd8)
842	STRAP(0xd9)
843	STRAP(0xda)
844	STRAP(0xdb)
845	STRAP(0xdc)
846	STRAP(0xdd)
847	STRAP(0xde)
848	STRAP(0xdf)
849	STRAP(0xe0)
850	STRAP(0xe1)
851	STRAP(0xe2)
852	STRAP(0xe3)
853	STRAP(0xe4)
854	STRAP(0xe5)
855	STRAP(0xe6)
856	STRAP(0xe7)
857	STRAP(0xe8)
858	STRAP(0xe9)
859	STRAP(0xea)
860	STRAP(0xeb)
861	STRAP(0xec)
862	STRAP(0xed)
863	STRAP(0xee)
864	STRAP(0xef)
865	STRAP(0xf0)
866	STRAP(0xf1)
867	STRAP(0xf2)
868	STRAP(0xf3)
869	STRAP(0xf4)
870	STRAP(0xf5)
871	STRAP(0xf6)
872	STRAP(0xf7)
873	STRAP(0xf8)
874	STRAP(0xf9)
875	STRAP(0xfa)
876	STRAP(0xfb)
877	STRAP(0xfc)
878	STRAP(0xfd)
879	STRAP(0xfe)
880	STRAP(0xff)
881#endif
882
883#if defined(SUN4M)
884trapbase_sun4m:
885/* trap 0 is special since we cannot receive it */
886	b dostart; nop; nop; nop	! 00 = reset (fake)
887	VTRAP(T_TEXTFAULT, memfault_sun4m)	! 01 = instr. fetch fault
888	VTRAP(T_ILLINST, illinst4m)	! 02 = illegal instruction
889	TRAP(T_PRIVINST)		! 03 = privileged instruction
890	TRAP(T_FPDISABLED)		! 04 = fp instr, but EF bit off in psr
891	WINDOW_OF			! 05 = window overflow
892	WINDOW_UF			! 06 = window underflow
893	TRAP(T_ALIGN)			! 07 = address alignment error
894	VTRAP(T_FPE, fp_exception)	! 08 = fp exception
895	VTRAP(T_DATAFAULT, memfault_sun4m)	! 09 = data fetch fault
896	TRAP(T_TAGOF)			! 0a = tag overflow
897	UTRAP(0x0b)
898	UTRAP(0x0c)
899	UTRAP(0x0d)
900	UTRAP(0x0e)
901	UTRAP(0x0f)
902	UTRAP(0x10)
903	HARDINT4M(1)			! 11 = level 1 interrupt
904	HARDINT4M(2)			! 12 = level 2 interrupt
905	HARDINT4M(3)			! 13 = level 3 interrupt
906	HARDINT4M(4)			! 14 = level 4 interrupt
907	HARDINT4M(5)			! 15 = level 5 interrupt
908	HARDINT4M(6)			! 16 = level 6 interrupt
909	HARDINT4M(7)			! 17 = level 7 interrupt
910	HARDINT4M(8)			! 18 = level 8 interrupt
911	HARDINT4M(9)			! 19 = level 9 interrupt
912	HARDINT4M(10)			! 1a = level 10 interrupt
913	HARDINT4M(11)			! 1b = level 11 interrupt
914	ZS_INTERRUPT4M			! 1c = level 12 (zs) interrupt
915	HARDINT4M(13)			! 1d = level 13 interrupt
916	HARDINT4M(14)			! 1e = level 14 interrupt
917	VTRAP(15, nmi_sun4m)		! 1f = nonmaskable interrupt
918	UTRAP(0x20)
919	VTRAP(T_TEXTERROR, memfault_sun4m)	! 21 = instr. fetch error
920	UTRAP(0x22)
921	UTRAP(0x23)
922	TRAP(T_CPDISABLED)	! 24 = coprocessor instr, EC bit off in psr
923	UTRAP(0x25)
924	UTRAP(0x26)
925	UTRAP(0x27)
926	TRAP(T_CPEXCEPTION)	! 28 = coprocessor exception
927	VTRAP(T_DATAERROR, memfault_sun4m)	! 29 = data fetch error
928	UTRAP(0x2a)
929	VTRAP(T_STOREBUFFAULT, memfault_sun4m) ! 2b = SuperSPARC store buffer fault
930	UTRAP(0x2c)
931	UTRAP(0x2d)
932	UTRAP(0x2e)
933	UTRAP(0x2f)
934	UTRAP(0x30)
935	UTRAP(0x31)
936	UTRAP(0x32)
937	UTRAP(0x33)
938	UTRAP(0x34)
939	UTRAP(0x35)
940	UTRAP(0x36)
941	UTRAP(0x37)
942	UTRAP(0x38)
943	UTRAP(0x39)
944	UTRAP(0x3a)
945	UTRAP(0x3b)
946	UTRAP(0x3c)
947	UTRAP(0x3d)
948	UTRAP(0x3e)
949	UTRAP(0x3f)
950	UTRAP(0x40)
951	UTRAP(0x41)
952	UTRAP(0x42)
953	UTRAP(0x43)
954	UTRAP(0x44)
955	UTRAP(0x45)
956	UTRAP(0x46)
957	UTRAP(0x47)
958	UTRAP(0x48)
959	UTRAP(0x49)
960	UTRAP(0x4a)
961	UTRAP(0x4b)
962	UTRAP(0x4c)
963	UTRAP(0x4d)
964	UTRAP(0x4e)
965	UTRAP(0x4f)
966	UTRAP(0x50)
967	UTRAP(0x51)
968	UTRAP(0x52)
969	UTRAP(0x53)
970	UTRAP(0x54)
971	UTRAP(0x55)
972	UTRAP(0x56)
973	UTRAP(0x57)
974	UTRAP(0x58)
975	UTRAP(0x59)
976	UTRAP(0x5a)
977	UTRAP(0x5b)
978	UTRAP(0x5c)
979	UTRAP(0x5d)
980	UTRAP(0x5e)
981	UTRAP(0x5f)
982	UTRAP(0x60)
983	UTRAP(0x61)
984	UTRAP(0x62)
985	UTRAP(0x63)
986	UTRAP(0x64)
987	UTRAP(0x65)
988	UTRAP(0x66)
989	UTRAP(0x67)
990	UTRAP(0x68)
991	UTRAP(0x69)
992	UTRAP(0x6a)
993	UTRAP(0x6b)
994	UTRAP(0x6c)
995	UTRAP(0x6d)
996	UTRAP(0x6e)
997	UTRAP(0x6f)
998	UTRAP(0x70)
999	UTRAP(0x71)
1000	UTRAP(0x72)
1001	UTRAP(0x73)
1002	UTRAP(0x74)
1003	UTRAP(0x75)
1004	UTRAP(0x76)
1005	UTRAP(0x77)
1006	UTRAP(0x78)
1007	UTRAP(0x79)
1008	UTRAP(0x7a)
1009	UTRAP(0x7b)
1010	UTRAP(0x7c)
1011	UTRAP(0x7d)
1012	UTRAP(0x7e)
1013	UTRAP(0x7f)
1014	SYSCALL			! 80 = sun syscall
1015	BPT			! 81 = pseudo breakpoint instruction
1016	TRAP(T_DIV0)		! 82 = divide by zero
1017	TRAP(T_FLUSHWIN)	! 83 = flush windows
1018	TRAP(T_CLEANWIN)	! 84 = provide clean windows
1019	TRAP(T_RANGECHECK)	! 85 = ???
1020	TRAP(T_FIXALIGN)	! 86 = fix up unaligned accesses
1021	TRAP(T_INTOF)		! 87 = integer overflow
1022	SYSCALL			! 88 = svr4 syscall
1023	SYSCALL			! 89 = bsd syscall
1024	BPT_KGDB_EXEC		! 8a = enter kernel gdb on kernel startup
1025	TRAP(T_DBPAUSE)		! 8b = hold CPU for kernel debugger
1026	STRAP(0x8c)
1027	STRAP(0x8d)
1028	STRAP(0x8e)
1029	STRAP(0x8f)
1030	STRAP(0x90)
1031	STRAP(0x91)
1032	STRAP(0x92)
1033	STRAP(0x93)
1034	STRAP(0x94)
1035	STRAP(0x95)
1036	STRAP(0x96)
1037	STRAP(0x97)
1038	STRAP(0x98)
1039	STRAP(0x99)
1040	STRAP(0x9a)
1041	STRAP(0x9b)
1042	STRAP(0x9c)
1043	STRAP(0x9d)
1044	STRAP(0x9e)
1045	STRAP(0x9f)
1046	STRAP(0xa0)
1047	STRAP(0xa1)
1048	STRAP(0xa2)
1049	STRAP(0xa3)
1050	STRAP(0xa4)
1051	STRAP(0xa5)
1052	STRAP(0xa6)
1053	STRAP(0xa7)
1054	STRAP(0xa8)
1055	STRAP(0xa9)
1056	STRAP(0xaa)
1057	STRAP(0xab)
1058	STRAP(0xac)
1059	STRAP(0xad)
1060	STRAP(0xae)
1061	STRAP(0xaf)
1062	STRAP(0xb0)
1063	STRAP(0xb1)
1064	STRAP(0xb2)
1065	STRAP(0xb3)
1066	STRAP(0xb4)
1067	STRAP(0xb5)
1068	STRAP(0xb6)
1069	STRAP(0xb7)
1070	STRAP(0xb8)
1071	STRAP(0xb9)
1072	STRAP(0xba)
1073	STRAP(0xbb)
1074	STRAP(0xbc)
1075	STRAP(0xbd)
1076	STRAP(0xbe)
1077	STRAP(0xbf)
1078	STRAP(0xc0)
1079	STRAP(0xc1)
1080	STRAP(0xc2)
1081	STRAP(0xc3)
1082	STRAP(0xc4)
1083	STRAP(0xc5)
1084	STRAP(0xc6)
1085	STRAP(0xc7)
1086	STRAP(0xc8)
1087	STRAP(0xc9)
1088	STRAP(0xca)
1089	STRAP(0xcb)
1090	STRAP(0xcc)
1091	STRAP(0xcd)
1092	STRAP(0xce)
1093	STRAP(0xcf)
1094	STRAP(0xd0)
1095	STRAP(0xd1)
1096	STRAP(0xd2)
1097	STRAP(0xd3)
1098	STRAP(0xd4)
1099	STRAP(0xd5)
1100	STRAP(0xd6)
1101	STRAP(0xd7)
1102	STRAP(0xd8)
1103	STRAP(0xd9)
1104	STRAP(0xda)
1105	STRAP(0xdb)
1106	STRAP(0xdc)
1107	STRAP(0xdd)
1108	STRAP(0xde)
1109	STRAP(0xdf)
1110	STRAP(0xe0)
1111	STRAP(0xe1)
1112	STRAP(0xe2)
1113	STRAP(0xe3)
1114	STRAP(0xe4)
1115	STRAP(0xe5)
1116	STRAP(0xe6)
1117	STRAP(0xe7)
1118	STRAP(0xe8)
1119	STRAP(0xe9)
1120	STRAP(0xea)
1121	STRAP(0xeb)
1122	STRAP(0xec)
1123	STRAP(0xed)
1124	STRAP(0xee)
1125	STRAP(0xef)
1126	STRAP(0xf0)
1127	STRAP(0xf1)
1128	STRAP(0xf2)
1129	STRAP(0xf3)
1130	STRAP(0xf4)
1131	STRAP(0xf5)
1132	STRAP(0xf6)
1133	STRAP(0xf7)
1134	STRAP(0xf8)
1135	STRAP(0xf9)
1136	STRAP(0xfa)
1137	STRAP(0xfb)
1138	STRAP(0xfc)
1139	STRAP(0xfd)
1140	STRAP(0xfe)
1141	STRAP(0xff)
1142#endif
1143
1144/*
1145 * Pad the trap table to max page size.
1146 * Trap table size is 0x100 * 4instr * 4byte/instr = 4096 bytes;
1147 * need to .skip 4096 to pad to page size iff. the number of trap tables
1148 * defined above is odd.
1149 */
1150#if (defined(SUN4) + defined(SUN4C) + defined(SUN4M)) % 2 == 1
1151	.skip	4096
1152#endif
1153
1154/* redzones don't work currently in multi-processor mode */
1155#if defined(DEBUG) && !defined(MULTIPROCESSOR)
1156/*
1157 * A hardware red zone is impossible.  We simulate one in software by
1158 * keeping a `red zone' pointer; if %sp becomes less than this, we panic.
1159 * This is expensive and is only enabled when debugging.
1160 */
1161
1162/* `redzone' is located in the per-CPU information structure */
1163_redzone = CPUINFO_VA + CPUINFO_REDZONE
1164	.data
1165#define	REDSTACK 2048		/* size of `panic: stack overflow' region */
1166_redstack:
1167	.skip	REDSTACK
1168	.text
1169Lpanic_red:
1170	.asciz	"stack overflow"
1171	_ALIGN
1172
1173	/* set stack pointer redzone to base+minstack; alters base */
1174#define	SET_SP_REDZONE(base, tmp) \
1175	add	base, REDSIZE, base; \
1176	sethi	%hi(_redzone), tmp; \
1177	st	base, [tmp + %lo(_redzone)]
1178
1179	/* variant with a constant */
1180#define	SET_SP_REDZONE_CONST(const, tmp1, tmp2) \
1181	set	(const) + REDSIZE, tmp1; \
1182	sethi	%hi(_redzone), tmp2; \
1183	st	tmp1, [tmp2 + %lo(_redzone)]
1184
1185	/* variant with a variable & offset */
1186#define	SET_SP_REDZONE_VAR(var, offset, tmp1, tmp2) \
1187	sethi	%hi(var), tmp1; \
1188	ld	[tmp1 + %lo(var)], tmp1; \
1189	sethi	%hi(offset), tmp2; \
1190	add	tmp1, tmp2, tmp1; \
1191	SET_SP_REDZONE(tmp1, tmp2)
1192
1193	/* check stack pointer against redzone (uses two temps) */
1194#define	CHECK_SP_REDZONE(t1, t2) \
1195	sethi	%hi(_redzone), t1; \
1196	ld	[t1 + %lo(_redzone)], t2; \
1197	cmp	%sp, t2;	/* if sp >= t2, not in red zone */ \
1198	bgeu	7f; nop;	/* and can continue normally */ \
1199	/* move to panic stack */ \
1200	st	%g0, [t1 + %lo(_redzone)]; \
1201	set	_redstack + REDSTACK - 96, %sp; \
1202	/* prevent panic() from lowering ipl */ \
1203	sethi	%hi(_C_LABEL(panicstr)), t1; \
1204	set	Lpanic_red, t2; \
1205	st	t2, [t1 + %lo(_C_LABEL(panicstr))]; \
1206	rd	%psr, t1;		/* t1 = splhigh() */ \
1207	or	t1, PSR_PIL, t2; \
1208	wr	t2, 0, %psr; \
1209	wr	t2, PSR_ET, %psr;	/* turn on traps */ \
1210	nop; nop; nop; \
1211	save	%sp, -CCFSZ, %sp;	/* preserve current window */ \
1212	sethi	%hi(Lpanic_red), %o0; \
1213	call	_C_LABEL(panic); or %o0, %lo(Lpanic_red), %o0; \
12147:
1215
1216#else
1217
1218#define	SET_SP_REDZONE(base, tmp)
1219#define	SET_SP_REDZONE_CONST(const, t1, t2)
1220#define	SET_SP_REDZONE_VAR(var, offset, t1, t2)
1221#define	CHECK_SP_REDZONE(t1, t2)
1222#endif /* DEBUG */
1223
1224/*
1225 * The window code must verify user stack addresses before using them.
1226 * A user stack pointer is invalid if:
1227 *	- it is not on an 8 byte boundary;
1228 *	- its pages (a register window, being 64 bytes, can occupy
1229 *	  two pages) are not readable or writable.
1230 * We define three separate macros here for testing user stack addresses.
1231 *
1232 * PTE_OF_ADDR locates a PTE, branching to a `bad address'
1233 *	handler if the stack pointer points into the hole in the
1234 *	address space (i.e., top 3 bits are not either all 1 or all 0);
1235 * CMP_PTE_USER_READ compares the located PTE against `user read' mode;
1236 * CMP_PTE_USER_WRITE compares the located PTE against `user write' mode.
1237 * The compares give `equal' if read or write is OK.
1238 *
1239 * Note that the user stack pointer usually points into high addresses
1240 * (top 3 bits all 1), so that is what we check first.
1241 *
1242 * The code below also assumes that PTE_OF_ADDR is safe in a delay
1243 * slot; it is, at it merely sets its `pte' register to a temporary value.
1244 */
1245#if defined(SUN4) || defined(SUN4C)
1246	/* input: addr, output: pte; aux: bad address label */
1247#define	PTE_OF_ADDR4_4C(addr, pte, bad, page_offset) \
1248	sra	addr, PG_VSHIFT, pte; \
1249	cmp	pte, -1; \
1250	be,a	1f; andn addr, page_offset, pte; \
1251	tst	pte; \
1252	bne	bad; .empty; \
1253	andn	addr, page_offset, pte; \
12541:
1255
1256	/* input: pte; output: condition codes */
1257#define	CMP_PTE_USER_READ4_4C(pte) \
1258	lda	[pte] ASI_PTE, pte; \
1259	srl	pte, PG_PROTSHIFT, pte; \
1260	andn	pte, (PG_W >> PG_PROTSHIFT), pte; \
1261	cmp	pte, PG_PROTUREAD
1262
1263	/* input: pte; output: condition codes */
1264#define	CMP_PTE_USER_WRITE4_4C(pte) \
1265	lda	[pte] ASI_PTE, pte; \
1266	srl	pte, PG_PROTSHIFT, pte; \
1267	cmp	pte, PG_PROTUWRITE
1268#endif
1269
1270/*
1271 * The Sun4M does not have the memory hole that the 4C does. Thus all
1272 * we need to do here is clear the page offset from addr.
1273 */
1274#if defined(SUN4M)
1275#define	PTE_OF_ADDR4M(addr, pte, bad, page_offset) \
1276	andn	addr, page_offset, pte
1277
1278/*
1279 * After obtaining the PTE through ASI_SRMMUFP, we read the Sync Fault
1280 * Status register. This is necessary on Hypersparcs which stores and
1281 * locks the fault address and status registers if the translation
1282 * fails (thanks to Chris Torek for finding this quirk).
1283 */
1284#define CMP_PTE_USER_READ4M(pte, tmp) \
1285	/*or	pte, ASI_SRMMUFP_L3, pte; -- ASI_SRMMUFP_L3 == 0 */ \
1286	lda	[pte] ASI_SRMMUFP, pte; \
1287	set	SRMMU_SFSR, tmp; \
1288	lda	[tmp] ASI_SRMMU, %g0; \
1289	and	pte, SRMMU_TETYPE, tmp; \
1290	/* Check for valid pte */ \
1291	cmp	tmp, SRMMU_TEPTE; \
1292	bnz	8f; \
1293	and	pte, SRMMU_PROT_MASK, pte; \
1294	/* check for one of: R_R, RW_RW, RX_RX and RWX_RWX */ \
1295	cmp	pte, PPROT_X_X; \
1296	bcs,a	8f; \
1297	 /* Now we have carry set if OK; turn it into Z bit */ \
1298	 subxcc	%g0, -1, %g0; \
1299	/* One more case to check: R_RW */ \
1300	cmp	pte, PPROT_R_RW; \
13018:
1302
1303
1304/* note: PTE bit 4 set implies no user writes */
1305#define CMP_PTE_USER_WRITE4M(pte, tmp) \
1306	or	pte, ASI_SRMMUFP_L3, pte; \
1307	lda	[pte] ASI_SRMMUFP, pte; \
1308	set	SRMMU_SFSR, tmp; \
1309	lda	[tmp] ASI_SRMMU, %g0; \
1310	and	pte, (SRMMU_TETYPE | 0x14), pte; \
1311	cmp	pte, (SRMMU_TEPTE | PPROT_WRITE)
1312#endif /* 4m */
1313
1314#if defined(SUN4M) && !(defined(SUN4C) || defined(SUN4))
1315
1316#define PTE_OF_ADDR(addr, pte, bad, page_offset, label) \
1317	PTE_OF_ADDR4M(addr, pte, bad, page_offset)
1318#define CMP_PTE_USER_WRITE(pte, tmp, label)	CMP_PTE_USER_WRITE4M(pte,tmp)
1319#define CMP_PTE_USER_READ(pte, tmp, label)	CMP_PTE_USER_READ4M(pte,tmp)
1320
1321#elif (defined(SUN4C) || defined(SUN4)) && !defined(SUN4M)
1322
1323#define PTE_OF_ADDR(addr, pte, bad, page_offset,label) \
1324	PTE_OF_ADDR4_4C(addr, pte, bad, page_offset)
1325#define CMP_PTE_USER_WRITE(pte, tmp, label)	CMP_PTE_USER_WRITE4_4C(pte)
1326#define CMP_PTE_USER_READ(pte, tmp, label)	CMP_PTE_USER_READ4_4C(pte)
1327
1328#else /* both defined, ugh */
1329
1330#define	PTE_OF_ADDR(addr, pte, bad, page_offset, label) \
1331label:	b,a	2f; \
1332	PTE_OF_ADDR4M(addr, pte, bad, page_offset); \
1333	b,a	3f; \
13342: \
1335	PTE_OF_ADDR4_4C(addr, pte, bad, page_offset); \
13363:
1337
1338#define CMP_PTE_USER_READ(pte, tmp, label) \
1339label:	b,a	1f; \
1340	CMP_PTE_USER_READ4M(pte,tmp); \
1341	b,a	2f; \
13421: \
1343	CMP_PTE_USER_READ4_4C(pte); \
13442:
1345
1346#define CMP_PTE_USER_WRITE(pte, tmp, label) \
1347label:	b,a	1f; \
1348	CMP_PTE_USER_WRITE4M(pte,tmp); \
1349	b,a	2f; \
13501: \
1351	CMP_PTE_USER_WRITE4_4C(pte); \
13522:
1353#endif
1354
1355
1356/*
1357 * The calculations in PTE_OF_ADDR and CMP_PTE_USER_* are rather slow:
1358 * in particular, according to Gordon Irlam of the University of Adelaide
1359 * in Australia, these consume at least 18 cycles on an SS1 and 37 on an
1360 * SS2.  Hence, we try to avoid them in the common case.
1361 *
1362 * A chunk of 64 bytes is on a single page if and only if:
1363 *
1364 *	((base + 64 - 1) & ~(NBPG-1)) == (base & ~(NBPG-1))
1365 *
1366 * Equivalently (and faster to test), the low order bits (base & 4095) must
1367 * be small enough so that the sum (base + 63) does not carry out into the
1368 * upper page-address bits, i.e.,
1369 *
1370 *	(base & (NBPG-1)) < (NBPG - 63)
1371 *
1372 * so we allow testing that here.  This macro is also assumed to be safe
1373 * in a delay slot (modulo overwriting its temporary).
1374 */
1375#define	SLT_IF_1PAGE_RW(addr, tmp, page_offset) \
1376	and	addr, page_offset, tmp; \
1377	sub	page_offset, 62, page_offset; \
1378	cmp	tmp, page_offset
1379
1380/*
1381 * Every trap that enables traps must set up stack space.
1382 * If the trap is from user mode, this involves switching to the kernel
1383 * stack for the current process, and we must also set cpcb->pcb_uw
1384 * so that the window overflow handler can tell user windows from kernel
1385 * windows.
1386 *
1387 * The number of user windows is:
1388 *
1389 *	cpcb->pcb_uw = (cpcb->pcb_wim - 1 - CWP) % nwindows
1390 *
1391 * (where pcb_wim = log2(current %wim) and CWP = low 5 bits of %psr).
1392 * We compute this expression by table lookup in uwtab[CWP - pcb_wim],
1393 * which has been set up as:
1394 *
1395 *	for i in [-nwin+1 .. nwin-1]
1396 *		uwtab[i] = (nwin - 1 - i) % nwin;
1397 *
1398 * (If you do not believe this works, try it for yourself.)
1399 *
1400 * We also keep one or two more tables:
1401 *
1402 *	for i in 0..nwin-1
1403 *		wmask[i] = 1 << ((i + 1) % nwindows);
1404 *
1405 * wmask[CWP] tells whether a `rett' would return into the invalid window.
1406 */
1407	.data
1408	.skip	32			! alignment byte & negative indicies
1409uwtab:	.skip	32			! u_char uwtab[-31..31];
1410wmask:	.skip	32			! u_char wmask[0..31];
1411
1412	.text
1413/*
1414 * Things begin to grow uglier....
1415 *
1416 * Each trap handler may (always) be running in the trap window.
1417 * If this is the case, it cannot enable further traps until it writes
1418 * the register windows into the stack (or, if the stack is no good,
1419 * the current pcb).
1420 *
1421 * ASSUMPTIONS: TRAP_SETUP() is called with:
1422 *	%l0 = %psr
1423 *	%l1 = return pc
1424 *	%l2 = return npc
1425 *	%l3 = (some value that must not be altered)
1426 * which means we have 4 registers to work with.
1427 *
1428 * The `stackspace' argument is the number of stack bytes to allocate
1429 * for register-saving, and must be at least -64 (and typically more,
1430 * for global registers and %y).
1431 *
1432 * Trapframes should use -CCFSZ-80.  (80 = sizeof(struct trapframe);
1433 * see trap.h.  This basically means EVERYONE.  Interrupt frames could
1434 * get away with less, but currently do not.)
1435 *
1436 * The basic outline here is:
1437 *
1438 *	if (trap came from kernel mode) {
1439 *		if (we are in the trap window)
1440 *			save it away;
1441 *		%sp = %fp - stackspace;
1442 *	} else {
1443 *		compute the number of user windows;
1444 *		if (we are in the trap window)
1445 *			save it away;
1446 *		%sp = (top of kernel stack) - stackspace;
1447 *	}
1448 *
1449 * Again, the number of user windows is:
1450 *
1451 *	cpcb->pcb_uw = (cpcb->pcb_wim - 1 - CWP) % nwindows
1452 *
1453 * (where pcb_wim = log2(current %wim) and CWP is the low 5 bits of %psr),
1454 * and this is computed as `uwtab[CWP - pcb_wim]'.
1455 *
1456 * NOTE: if you change this code, you will have to look carefully
1457 * at the window overflow and underflow handlers and make sure they
1458 * have similar changes made as needed.
1459 */
1460#define	CALL_CLEAN_TRAP_WINDOW \
1461	sethi	%hi(clean_trap_window), %l7; \
1462	jmpl	%l7 + %lo(clean_trap_window), %l4; \
1463	 mov	%g7, %l7	/* save %g7 in %l7 for clean_trap_window */
1464
1465#define	TRAP_SETUP(stackspace) \
1466	TRAP_TRACE(%l3,%l5); \
1467	rd	%wim, %l4; \
1468	mov	1, %l5; \
1469	sll	%l5, %l0, %l5; \
1470	btst	PSR_PS, %l0; \
1471	bz	1f; \
1472	 btst	%l5, %l4; \
1473	/* came from kernel mode; cond codes indicate trap window */ \
1474	bz,a	3f; \
1475	 add	%fp, stackspace, %sp;	/* want to just set %sp */ \
1476	CALL_CLEAN_TRAP_WINDOW;		/* but maybe need to clean first */ \
1477	b	3f; \
1478	 add	%fp, stackspace, %sp; \
14791: \
1480	/* came from user mode: compute pcb_nw */ \
1481	sethi	%hi(cpcb), %l6; \
1482	ld	[%l6 + %lo(cpcb)], %l6; \
1483	ld	[%l6 + PCB_WIM], %l5; \
1484	and	%l0, 31, %l4; \
1485	sub	%l4, %l5, %l5; \
1486	set	uwtab, %l4; \
1487	ldub	[%l4 + %l5], %l5; \
1488	st	%l5, [%l6 + PCB_UW]; \
1489	/* cond codes still indicate whether in trap window */ \
1490	bz,a	2f; \
1491	 sethi	%hi(USPACE+(stackspace)), %l5; \
1492	/* yes, in trap window; must clean it */ \
1493	CALL_CLEAN_TRAP_WINDOW; \
1494	sethi	%hi(cpcb), %l6; \
1495	ld	[%l6 + %lo(cpcb)], %l6; \
1496	sethi	%hi(USPACE+(stackspace)), %l5; \
14972: \
1498	/* trap window is (now) clean: set %sp */ \
1499	or	%l5, %lo(USPACE+(stackspace)), %l5; \
1500	add	%l6, %l5, %sp; \
1501	SET_SP_REDZONE(%l6, %l5); \
15023: \
1503	CHECK_SP_REDZONE(%l6, %l5)
1504
1505/*
1506 * Interrupt setup is almost exactly like trap setup, but we need to
1507 * go to the interrupt stack if (a) we came from user mode or (b) we
1508 * came from kernel mode on the kernel stack.
1509 */
1510#if defined(MULTIPROCESSOR)
1511/*
1512 * SMP kernels: read `eintstack' from cpuinfo structure. Since the
1513 * location of the interrupt stack is not known in advance, we need
1514 * to check the current %fp against both ends of the stack space.
1515 */
1516#define	INTR_SETUP(stackspace) \
1517	TRAP_TRACE(%l3,%l5); \
1518	rd	%wim, %l4; \
1519	mov	1, %l5; \
1520	sll	%l5, %l0, %l5; \
1521	btst	PSR_PS, %l0; \
1522	bz	1f; \
1523	 btst	%l5, %l4; \
1524	/* came from kernel mode; cond codes still indicate trap window */ \
1525	bz,a	0f; \
1526	 sethi	%hi(_EINTSTACKP), %l7; \
1527	CALL_CLEAN_TRAP_WINDOW; \
1528	sethi	%hi(_EINTSTACKP), %l7; \
15290:	/* now if not intstack > %fp >= eintstack, we were on the kernel stack */ \
1530	ld	[%l7 + %lo(_EINTSTACKP)], %l7; \
1531	cmp	%fp, %l7; \
1532	bge,a	3f;			/* %fp >= eintstack */ \
1533	 add	%l7, stackspace, %sp;	/* so switch to intstack */ \
1534	sethi	%hi(INT_STACK_SIZE), %l6; \
1535	sub	%l7, %l6, %l6; \
1536	cmp	%fp, %l6; \
1537	blu,a	3f;			/* %fp < intstack */ \
1538	 add	%l7, stackspace, %sp;	/* so switch to intstack */ \
1539	b	4f; \
1540	 add	%fp, stackspace, %sp;	/* else stay on intstack */ \
15411: \
1542	/* came from user mode: compute pcb_nw */ \
1543	sethi	%hi(cpcb), %l6; \
1544	ld	[%l6 + %lo(cpcb)], %l6; \
1545	ld	[%l6 + PCB_WIM], %l5; \
1546	and	%l0, 31, %l4; \
1547	sub	%l4, %l5, %l5; \
1548	set	uwtab, %l4; \
1549	ldub	[%l4 + %l5], %l5; \
1550	st	%l5, [%l6 + PCB_UW]; \
1551	/* cond codes still indicate whether in trap window */ \
1552	bz,a	2f; \
1553	 sethi	%hi(_EINTSTACKP), %l7; \
1554	/* yes, in trap window; must save regs */ \
1555	CALL_CLEAN_TRAP_WINDOW; \
1556	sethi	%hi(_EINTSTACKP), %l7; \
15572: \
1558	ld	[%l7 + %lo(_EINTSTACKP)], %l7; \
1559	add	%l7, stackspace, %sp; \
15603: \
1561	SET_SP_REDZONE_VAR(_EINTSTACKP, -INT_STACK_SIZE, %l6, %l5); \
15624: \
1563	CHECK_SP_REDZONE(%l6, %l5)
1564
1565#else /* MULTIPROCESSOR */
1566
1567#define	INTR_SETUP(stackspace) \
1568	TRAP_TRACE(%l3,%l5); \
1569	rd	%wim, %l4; \
1570	mov	1, %l5; \
1571	sll	%l5, %l0, %l5; \
1572	btst	PSR_PS, %l0; \
1573	bz	1f; \
1574	 btst	%l5, %l4; \
1575	/* came from kernel mode; cond codes still indicate trap window */ \
1576	bz,a	0f; \
1577	 sethi	%hi(_C_LABEL(eintstack)), %l7; \
1578	CALL_CLEAN_TRAP_WINDOW; \
1579	sethi	%hi(_C_LABEL(eintstack)), %l7; \
15800:	/* now if %fp >= eintstack, we were on the kernel stack */ \
1581	cmp	%fp, %l7; \
1582	bge,a	3f; \
1583	 add	%l7, stackspace, %sp;	/* so switch to intstack */ \
1584	b	4f; \
1585	 add	%fp, stackspace, %sp;	/* else stay on intstack */ \
15861: \
1587	/* came from user mode: compute pcb_nw */ \
1588	sethi	%hi(cpcb), %l6; \
1589	ld	[%l6 + %lo(cpcb)], %l6; \
1590	ld	[%l6 + PCB_WIM], %l5; \
1591	and	%l0, 31, %l4; \
1592	sub	%l4, %l5, %l5; \
1593	set	uwtab, %l4; \
1594	ldub	[%l4 + %l5], %l5; \
1595	st	%l5, [%l6 + PCB_UW]; \
1596	/* cond codes still indicate whether in trap window */ \
1597	bz,a	2f; \
1598	 sethi	%hi(_C_LABEL(eintstack)), %l7; \
1599	/* yes, in trap window; must save regs */ \
1600	CALL_CLEAN_TRAP_WINDOW; \
1601	sethi	%hi(_C_LABEL(eintstack)), %l7; \
16022: \
1603	add	%l7, stackspace, %sp; \
16043: \
1605	SET_SP_REDZONE_CONST(_C_LABEL(intstack), %l6, %l5); \
16064: \
1607	CHECK_SP_REDZONE(%l6, %l5)
1608#endif /* MULTIPROCESSOR */
1609
1610/*
1611 * Handler for making the trap window shiny clean.
1612 *
1613 * On entry:
1614 *	cpcb->pcb_nw = number of user windows
1615 *	%l0 = %psr
1616 *	%l1 must not be clobbered
1617 *	%l2 must not be clobbered
1618 *	%l3 must not be clobbered
1619 *	%l4 = address for `return'
1620 *	%l7 = saved %g7 (we put this in a delay slot above, to save work)
1621 *
1622 * On return:
1623 *	%wim has changed, along with cpcb->pcb_wim
1624 *	%g7 has been restored
1625 *
1626 * Normally, we push only one window.
1627 */
1628clean_trap_window:
1629	mov	%g5, %l5		! save %g5
1630	mov	%g6, %l6		! ... and %g6
1631/*	mov	%g7, %l7		! ... and %g7 (already done for us) */
1632	sethi	%hi(cpcb), %g6		! get current pcb
1633	ld	[%g6 + %lo(cpcb)], %g6
1634
1635	/* Figure out whether it is a user window (cpcb->pcb_uw > 0). */
1636	ld	[%g6 + PCB_UW], %g7
1637	deccc	%g7
1638	bge	ctw_user
1639	 save	%g0, %g0, %g0		! in any case, enter window to save
1640
1641	/* The window to be pushed is a kernel window. */
1642	std	%l0, [%sp + (0*8)]
1643ctw_merge:
1644	std	%l2, [%sp + (1*8)]
1645	std	%l4, [%sp + (2*8)]
1646	std	%l6, [%sp + (3*8)]
1647	std	%i0, [%sp + (4*8)]
1648	std	%i2, [%sp + (5*8)]
1649	std	%i4, [%sp + (6*8)]
1650	std	%i6, [%sp + (7*8)]
1651
1652	/* Set up new window invalid mask, and update cpcb->pcb_wim. */
1653	rd	%psr, %g7		! g7 = (junk << 5) + new_cwp
1654	mov	1, %g5			! g5 = 1 << new_cwp;
1655	sll	%g5, %g7, %g5
1656	wr	%g5, 0, %wim		! setwim(g5);
1657	and	%g7, 31, %g7		! cpcb->pcb_wim = g7 & 31;
1658	sethi	%hi(cpcb), %g6		! re-get current pcb
1659	ld	[%g6 + %lo(cpcb)], %g6
1660	st	%g7, [%g6 + PCB_WIM]
1661	nop
1662	restore				! back to trap window
1663
1664	mov	%l5, %g5		! restore g5
1665	mov	%l6, %g6		! ... and g6
1666	jmp	%l4 + 8			! return to caller
1667	 mov	%l7, %g7		! ... and g7
1668	/* NOTREACHED */
1669
1670ctw_user:
1671	/*
1672	 * The window to be pushed is a user window.
1673	 * We must verify the stack pointer (alignment & permissions).
1674	 * See comments above definition of PTE_OF_ADDR.
1675	 */
1676	st	%g7, [%g6 + PCB_UW]	! cpcb->pcb_uw--;
1677	btst	7, %sp			! if not aligned,
1678	bne	ctw_invalid		! choke on it
1679	 .empty
1680
1681	sethi	%hi(_C_LABEL(pgofset)), %g6	! trash %g6=curpcb
1682	ld	[%g6 + %lo(_C_LABEL(pgofset))], %g6
1683	PTE_OF_ADDR(%sp, %g7, ctw_invalid, %g6, NOP_ON_4M_1)
1684	CMP_PTE_USER_WRITE(%g7, %g5, NOP_ON_4M_2) ! likewise if not writable
1685	bne	ctw_invalid
1686	 .empty
1687	/* Note side-effect of SLT_IF_1PAGE_RW: decrements %g6 by 62 */
1688	SLT_IF_1PAGE_RW(%sp, %g7, %g6)
1689	bl,a	ctw_merge		! all ok if only 1
1690	 std	%l0, [%sp]
1691	add	%sp, 7*8, %g5		! check last addr too
1692	add	%g6, 62, %g6		/* restore %g6 to `pgofset' */
1693	PTE_OF_ADDR(%g5, %g7, ctw_invalid, %g6, NOP_ON_4M_3)
1694	CMP_PTE_USER_WRITE(%g7, %g6, NOP_ON_4M_4)
1695	be,a	ctw_merge		! all ok: store <l0,l1> and merge
1696	 std	%l0, [%sp]
1697
1698	/*
1699	 * The window we wanted to push could not be pushed.
1700	 * Instead, save ALL user windows into the pcb.
1701	 * We will notice later that we did this, when we
1702	 * get ready to return from our trap or syscall.
1703	 *
1704	 * The code here is run rarely and need not be optimal.
1705	 */
1706ctw_invalid:
1707	/*
1708	 * Reread cpcb->pcb_uw.  We decremented this earlier,
1709	 * so it is off by one.
1710	 */
1711	sethi	%hi(cpcb), %g6		! re-get current pcb
1712	ld	[%g6 + %lo(cpcb)], %g6
1713
1714	ld	[%g6 + PCB_UW], %g7	! (number of user windows) - 1
1715	add	%g6, PCB_RW, %g5
1716
1717	/* save g7+1 windows, starting with the current one */
17181:					! do {
1719	std	%l0, [%g5 + (0*8)]	!	rw->rw_local[0] = l0;
1720	std	%l2, [%g5 + (1*8)]	!	...
1721	std	%l4, [%g5 + (2*8)]
1722	std	%l6, [%g5 + (3*8)]
1723	std	%i0, [%g5 + (4*8)]
1724	std	%i2, [%g5 + (5*8)]
1725	std	%i4, [%g5 + (6*8)]
1726	std	%i6, [%g5 + (7*8)]
1727	deccc	%g7			!	if (n > 0) save(), rw++;
1728	bge,a	1b			! } while (--n >= 0);
1729	 save	%g5, 64, %g5
1730
1731	/* stash sp for bottommost window */
1732	st	%sp, [%g5 + 64 + (7*8)]
1733
1734	/* set up new wim */
1735	rd	%psr, %g7		! g7 = (junk << 5) + new_cwp;
1736	mov	1, %g5			! g5 = 1 << new_cwp;
1737	sll	%g5, %g7, %g5
1738	wr	%g5, 0, %wim		! wim = g5;
1739	and	%g7, 31, %g7
1740	st	%g7, [%g6 + PCB_WIM]	! cpcb->pcb_wim = new_cwp;
1741
1742	/* fix up pcb fields */
1743	ld	[%g6 + PCB_UW], %g7	! n = cpcb->pcb_uw;
1744	add	%g7, 1, %g5
1745	st	%g5, [%g6 + PCB_NSAVED]	! cpcb->pcb_nsaved = n + 1;
1746	st	%g0, [%g6 + PCB_UW]	! cpcb->pcb_uw = 0;
1747
1748	/* return to trap window */
17491:	deccc	%g7			! do {
1750	bge	1b			!	restore();
1751	 restore			! } while (--n >= 0);
1752
1753	mov	%l5, %g5		! restore g5, g6, & g7, and return
1754	mov	%l6, %g6
1755	jmp	%l4 + 8
1756	 mov	%l7, %g7
1757	/* NOTREACHED */
1758
1759
1760/*
1761 * Each memory access (text or data) fault, from user or kernel mode,
1762 * comes here.  We read the error register and figure out what has
1763 * happened.
1764 *
1765 * This cannot be done from C code since we must not enable traps (and
1766 * hence may not use the `save' instruction) until we have decided that
1767 * the error is or is not an asynchronous one that showed up after a
1768 * synchronous error, but which must be handled before the sync err.
1769 *
1770 * Most memory faults are user mode text or data faults, which can cause
1771 * signal delivery or ptracing, for which we must build a full trapframe.
1772 * It does not seem worthwhile to work to avoid this in the other cases,
1773 * so we store all the %g registers on the stack immediately.
1774 *
1775 * On entry:
1776 *	%l0 = %psr
1777 *	%l1 = return pc
1778 *	%l2 = return npc
1779 *	%l3 = T_TEXTFAULT or T_DATAFAULT
1780 *
1781 * Internal:
1782 *	%l4 = %y, until we call mem_access_fault (then onto trapframe)
1783 *	%l5 = IE_reg_addr, if async mem error
1784 *
1785 */
1786
1787#if defined(SUN4)
1788_ENTRY(memfault_sun4)
1789memfault_sun4:
1790	TRAP_SETUP(-CCFSZ-80)
1791	! tally interrupt (curcpu()->cpu_data.cpu_nfault++) (clobbers %o0,%o1)
1792	INCR64(CPUINFO_VA + CPUINFO_NFAULT)
1793
1794	st	%g1, [%sp + CCFSZ + 20]	! save g1
1795	rd	%y, %l4			! save y
1796
1797	/*
1798	 * registers:
1799	 * memerr.ctrl	= memory error control reg., error if 0x80 set
1800	 * memerr.vaddr	= address of memory error
1801	 * buserr	= basically just like sun4c sync error reg but
1802	 *		  no SER_WRITE bit (have to figure out from code).
1803	 */
1804	set	_C_LABEL(par_err_reg), %o0 ! memerr ctrl addr -- XXX mapped?
1805	ld	[%o0], %o0		! get it
1806	std	%g2, [%sp + CCFSZ + 24]	! save g2, g3
1807	ld	[%o0], %o1		! memerr ctrl register
1808	inc	4, %o0			! now VA of memerr vaddr register
1809	std	%g4, [%sp + CCFSZ + 32]	! (sneak g4,g5 in here)
1810	ld	[%o0], %o2		! memerr virt addr
1811	st	%g0, [%o0]		! NOTE: this clears latching!!!
1812	btst	ME_REG_IERR, %o1	! memory error?
1813					! XXX this value may not be correct
1814					! as I got some parity errors and the
1815					! correct bits were not on?
1816	std	%g6, [%sp + CCFSZ + 40]
1817	bz,a	0f			! no, just a regular fault
1818	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1819
1820	/* memory error = death for now XXX */
1821	clr	%o3
1822	clr	%o4
1823	call	_C_LABEL(memerr4_4c)	! memerr(0, ser, sva, 0, 0)
1824	 clr	%o0
1825	call	_C_LABEL(prom_halt)
1826	 nop
1827
18280:
1829	/*
1830	 * have to make SUN4 emulate SUN4C.   4C code expects
1831	 * SER in %o1 and the offending VA in %o2, everything else is ok.
1832	 * (must figure out if SER_WRITE should be set)
1833	 */
1834	set	AC_BUS_ERR, %o0		! bus error register
1835	cmp	%l3, T_TEXTFAULT	! text fault always on PC
1836	be	normal_mem_fault	! go
1837	 lduba	[%o0] ASI_CONTROL, %o1	! get its value
1838
1839#define STORE_BIT 21 /* bit that indicates a store instruction for sparc */
1840	ld	[%l1], %o3		! offending instruction in %o3 [l1=pc]
1841	srl	%o3, STORE_BIT, %o3	! get load/store bit (wont fit simm13)
1842	btst	1, %o3			! test for store operation
1843
1844	bz	normal_mem_fault	! if (z) is a load (so branch)
1845	 sethi	%hi(SER_WRITE), %o5     ! damn SER_WRITE wont fit simm13
1846!	or	%lo(SER_WRITE), %o5, %o5! not necessary since %lo is zero
1847	or	%o5, %o1, %o1		! set SER_WRITE
1848#if defined(SUN4C) || defined(SUN4M)
1849	ba,a	normal_mem_fault
1850	 !!nop				! XXX make efficient later
1851#endif /* SUN4C || SUN4M */
1852#endif /* SUN4 */
1853
1854#if defined(SUN4C)
1855_ENTRY(memfault_sun4c)
1856memfault_sun4c:
1857	TRAP_SETUP(-CCFSZ-80)
1858	! tally fault (curcpu()->cpu_data.cpu_nfault++) (clobbers %o0,%o1,%o2)
1859	INCR64(CPUINFO_VA + CPUINFO_NFAULT)
1860
1861	st	%g1, [%sp + CCFSZ + 20]	! save g1
1862	rd	%y, %l4			! save y
1863
1864	/*
1865	 * We know about the layout of the error registers here.
1866	 *	addr	reg
1867	 *	----	---
1868	 *	a	AC_SYNC_ERR
1869	 *	a+4	AC_SYNC_VA
1870	 *	a+8	AC_ASYNC_ERR
1871	 *	a+12	AC_ASYNC_VA
1872	 */
1873
1874#if AC_SYNC_ERR + 4 != AC_SYNC_VA || \
1875    AC_SYNC_ERR + 8 != AC_ASYNC_ERR || AC_SYNC_ERR + 12 != AC_ASYNC_VA
1876	help help help		! I, I, I wanna be a lifeguard
1877#endif
1878	set	AC_SYNC_ERR, %o0
1879	std	%g2, [%sp + CCFSZ + 24]	! save g2, g3
1880	lda	[%o0] ASI_CONTROL, %o1	! sync err reg
1881	inc	4, %o0
1882	std	%g4, [%sp + CCFSZ + 32]	! (sneak g4,g5 in here)
1883	lda	[%o0] ASI_CONTROL, %o2	! sync virt addr
1884	btst	SER_MEMERR, %o1		! memory error?
1885	std	%g6, [%sp + CCFSZ + 40]
1886	bz,a	normal_mem_fault	! no, just a regular fault
1887 	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1888
1889	/*
1890	 * We got a synchronous memory error.  It could be one that
1891	 * happened because there were two stores in a row, and the
1892	 * first went into the write buffer, and the second caused this
1893	 * synchronous trap; so there could now be a pending async error.
1894	 * This is in fact the case iff the two va's differ.
1895	 */
1896	inc	4, %o0
1897	lda	[%o0] ASI_CONTROL, %o3	! async err reg
1898	inc	4, %o0
1899	lda	[%o0] ASI_CONTROL, %o4	! async virt addr
1900	cmp	%o2, %o4
1901	be,a	1f			! no, not an async err
1902	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1903
1904	/*
1905	 * Handle the async error; ignore the sync error for now
1906	 * (we may end up getting it again, but so what?).
1907	 * This code is essentially the same as that at `nmi' below,
1908	 * but the register usage is different and we cannot merge.
1909	 */
1910	sethi	%hi(INTRREG_VA), %l5	! ienab_bic(IE_ALLIE);
1911	ldub	[%l5 + %lo(INTRREG_VA)], %o0
1912	andn	%o0, IE_ALLIE, %o0
1913	stb	%o0, [%l5 + %lo(INTRREG_VA)]
1914
1915	/*
1916	 * Now reenable traps and call C code.
1917	 * %o1 through %o4 still hold the error reg contents.
1918	 * If memerr() returns, return from the trap.
1919	 */
1920	wr	%l0, PSR_ET, %psr
1921	call	_C_LABEL(memerr4_4c)	! memerr(0, ser, sva, aer, ava)
1922	 clr	%o0
1923
1924	ld	[%sp + CCFSZ + 20], %g1	! restore g1 through g7
1925	wr	%l0, 0, %psr		! and disable traps, 3 instr delay
1926	ldd	[%sp + CCFSZ + 24], %g2
1927	ldd	[%sp + CCFSZ + 32], %g4
1928	ldd	[%sp + CCFSZ + 40], %g6
1929	/* now safe to set IE_ALLIE again */
1930	ldub	[%l5 + %lo(INTRREG_VA)], %o1
1931	or	%o1, IE_ALLIE, %o1
1932	stb	%o1, [%l5 + %lo(INTRREG_VA)]
1933	b	return_from_trap
1934	 wr	%l4, 0, %y		! restore y
1935
1936	/*
1937	 * Trap was a synchronous memory error.
1938	 * %o1 through %o4 still hold the error reg contents.
1939	 */
19401:
1941	call	_C_LABEL(memerr4_4c)	! memerr(1, ser, sva, aer, ava)
1942	 mov	1, %o0
1943
1944	ld	[%sp + CCFSZ + 20], %g1	! restore g1 through g7
1945	ldd	[%sp + CCFSZ + 24], %g2
1946	ldd	[%sp + CCFSZ + 32], %g4
1947	ldd	[%sp + CCFSZ + 40], %g6
1948	wr	%l4, 0, %y		! restore y
1949	b	return_from_trap
1950	 wr	%l0, 0, %psr
1951	/* NOTREACHED */
1952#endif /* SUN4C */
1953
1954#if defined(SUN4M)
1955_ENTRY(memfault_sun4m)
1956memfault_sun4m:
1957	sethi	%hi(CPUINFO_VA+CPUINFO_GETSYNCFLT), %l4
1958	ld	[%l4 + %lo(CPUINFO_VA+CPUINFO_GETSYNCFLT)], %l5
1959	sethi	%hi(CPUINFO_VA+CPUINFO_SYNCFLTDUMP), %l4
1960	jmpl	%l5, %l7
1961	 or	%l4, %lo(CPUINFO_VA+CPUINFO_SYNCFLTDUMP), %l4
1962	TRAP_SETUP(-CCFSZ-80)
1963	! tally fault (curcpu()->cpu_data.cpu_nfault++) (clobbers %o0,%o1,%o2)
1964	INCR64(CPUINFO_VA + CPUINFO_NFAULT)
1965
1966	st	%g1, [%sp + CCFSZ + 20]	! save g1
1967	rd	%y, %l4			! save y
1968
1969	std	%g2, [%sp + CCFSZ + 24]	! save g2, g3
1970	std	%g4, [%sp + CCFSZ + 32]	! save g4, g5
1971	std	%g6, [%sp + CCFSZ + 40]	! sneak in g6, g7
1972
1973	! retrieve sync fault status/address
1974	sethi	%hi(CPUINFO_VA+CPUINFO_SYNCFLTDUMP), %o0
1975	ld	[%o0 + %lo(CPUINFO_VA+CPUINFO_SYNCFLTDUMP)], %o1
1976	ld	[%o0 + %lo(CPUINFO_VA+CPUINFO_SYNCFLTDUMP+4)], %o2
1977
1978	wr	%l0, PSR_ET, %psr	! reenable traps
1979
1980	/* Finish stackframe, call C trap handler */
1981	std	%l0, [%sp + CCFSZ + 0]	! set tf.tf_psr, tf.tf_pc
1982	mov	%l3, %o0		! (argument: type)
1983	st	%l2, [%sp + CCFSZ + 8]	! set tf.tf_npc
1984	st	%l4, [%sp + CCFSZ + 12]	! set tf.tf_y
1985	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
1986	std	%i2, [%sp + CCFSZ + 56]
1987	std	%i4, [%sp + CCFSZ + 64]
1988	std	%i6, [%sp + CCFSZ + 72]
1989					! mem_access_fault(type,sfsr,sfva,&tf);
1990	call	_C_LABEL(mem_access_fault4m)
1991	 add	%sp, CCFSZ, %o3		! (argument: &tf)
1992
1993	ldd	[%sp + CCFSZ + 0], %l0	! load new values
1994	ldd	[%sp + CCFSZ + 8], %l2
1995	wr	%l3, 0, %y
1996	ld	[%sp + CCFSZ + 20], %g1
1997	ldd	[%sp + CCFSZ + 24], %g2
1998	ldd	[%sp + CCFSZ + 32], %g4
1999	ldd	[%sp + CCFSZ + 40], %g6
2000	ldd	[%sp + CCFSZ + 48], %i0
2001	ldd	[%sp + CCFSZ + 56], %i2
2002	ldd	[%sp + CCFSZ + 64], %i4
2003	ldd	[%sp + CCFSZ + 72], %i6
2004
2005	b	return_from_trap	! go return
2006	 wr	%l0, 0, %psr		! (but first disable traps again)
2007#endif /* SUN4M */
2008
2009normal_mem_fault:
2010	/*
2011	 * Trap was some other error; call C code to deal with it.
2012	 * Must finish trap frame (psr,pc,npc,%y,%o0..%o7) in case
2013	 * we decide to deliver a signal or ptrace the process.
2014	 * %g1..%g7 were already set up above.
2015	 */
2016	std	%l0, [%sp + CCFSZ + 0]	! set tf.tf_psr, tf.tf_pc
2017	mov	%l3, %o0		! (argument: type)
2018	st	%l2, [%sp + CCFSZ + 8]	! set tf.tf_npc
2019	st	%l4, [%sp + CCFSZ + 12]	! set tf.tf_y
2020	mov	%l1, %o3		! (argument: pc)
2021	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
2022	std	%i2, [%sp + CCFSZ + 56]
2023	mov	%l0, %o4		! (argument: psr)
2024	std	%i4, [%sp + CCFSZ + 64]
2025	std	%i6, [%sp + CCFSZ + 72]
2026	call	_C_LABEL(mem_access_fault)! mem_access_fault(type, ser, sva,
2027					!		pc, psr, &tf);
2028	 add	%sp, CCFSZ, %o5		! (argument: &tf)
2029
2030	ldd	[%sp + CCFSZ + 0], %l0	! load new values
2031	ldd	[%sp + CCFSZ + 8], %l2
2032	wr	%l3, 0, %y
2033	ld	[%sp + CCFSZ + 20], %g1
2034	ldd	[%sp + CCFSZ + 24], %g2
2035	ldd	[%sp + CCFSZ + 32], %g4
2036	ldd	[%sp + CCFSZ + 40], %g6
2037	ldd	[%sp + CCFSZ + 48], %i0
2038	ldd	[%sp + CCFSZ + 56], %i2
2039	ldd	[%sp + CCFSZ + 64], %i4
2040	ldd	[%sp + CCFSZ + 72], %i6
2041
2042	b	return_from_trap	! go return
2043	 wr	%l0, 0, %psr		! (but first disable traps again)
2044
2045illinst4m:
2046	/*
2047	 * Cypress CPUs like to generate an Illegal Instruction trap
2048	 * for FLUSH instructions. Since we turn FLUSHes into no-ops
2049	 * (see also trap.c/emul.c), we check for this case here in
2050	 * the trap window, saving the overhead of a slow trap.
2051	 *
2052	 * We have to be careful not to incur a trap while probing
2053	 * for the instruction in user space. Use the Inhibit Fault
2054	 * bit in the PCR register to prevent that.
2055	 */
2056
2057	btst	PSR_PS, %l0		! slowtrap() if from kernel
2058	bnz	slowtrap
2059	 .empty
2060
2061	! clear fault status
2062	set	SRMMU_SFSR, %l7
2063	lda	[%l7]ASI_SRMMU, %g0
2064
2065	! turn on the fault inhibit in PCR
2066	!set	SRMMU_PCR, reg			- SRMMU_PCR == 0, so use %g0
2067	lda	[%g0]ASI_SRMMU, %l4
2068	or	%l4, SRMMU_PCR_NF, %l5
2069	sta	%l5, [%g0]ASI_SRMMU
2070
2071	! load the insn word as if user insn fetch
2072	lda	[%l1]ASI_USERI, %l5
2073
2074	sta	%l4, [%g0]ASI_SRMMU		! restore PCR
2075
2076	! check fault status; if we have a fault, take a regular trap
2077	set	SRMMU_SFAR, %l6
2078	lda	[%l6]ASI_SRMMU, %g0		! fault VA; must be read first
2079	lda	[%l7]ASI_SRMMU, %l6		! fault status
2080	andcc	%l6, SFSR_FAV, %l6		! get fault status bits
2081	bnz	slowtrap
2082	 .empty
2083
2084	! we got the insn; check whether it was a FLUSH
2085	! instruction format: op=2, op3=0x3b (see also instr.h)
2086	set	((3 << 30) | (0x3f << 19)), %l7	! extract op & op3 fields
2087	and	%l5, %l7, %l6
2088	set	((2 << 30) | (0x3b << 19)), %l7	! any FLUSH opcode
2089	cmp	%l6, %l7
2090	bne	slowtrap
2091	 nop
2092
2093	mov	%l2, %l1			! ADVANCE <pc,npc>
2094	mov	%l0, %psr			! and return from trap
2095	 add	%l2, 4, %l2
2096	RETT
2097
2098
2099/*
2100 * fp_exception has to check to see if we are trying to save
2101 * the FP state, and if so, continue to save the FP state.
2102 *
2103 * We do not even bother checking to see if we were in kernel mode,
2104 * since users have no access to the special_fp_store instruction.
2105 *
2106 * This whole idea was stolen from Sprite.
2107 */
2108fp_exception:
2109	set	special_fp_store, %l4	! see if we came from the special one
2110	cmp	%l1, %l4		! pc == special_fp_store?
2111	bne	slowtrap		! no, go handle per usual
2112	 .empty
2113	sethi	%hi(savefpcont), %l4	! yes, "return" to the special code
2114	or	%lo(savefpcont), %l4, %l4
2115	jmp	%l4
2116	 rett	%l4 + 4
2117
2118/*
2119 * slowtrap() builds a trap frame and calls trap().
2120 * This is called `slowtrap' because it *is*....
2121 * We have to build a full frame for ptrace(), for instance.
2122 *
2123 * Registers:
2124 *	%l0 = %psr
2125 *	%l1 = return pc
2126 *	%l2 = return npc
2127 *	%l3 = trap code
2128 */
2129slowtrap:
2130	TRAP_SETUP(-CCFSZ-80)
2131	/*
2132	 * Phew, ready to enable traps and call C code.
2133	 */
2134	mov	%l3, %o0		! put type in %o0 for later
2135Lslowtrap_reenter:
2136	wr	%l0, PSR_ET, %psr	! traps on again
2137	std	%l0, [%sp + CCFSZ]	! tf.tf_psr = psr; tf.tf_pc = ret_pc;
2138	rd	%y, %l3
2139	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc = return_npc; tf.tf_y = %y;
2140	st	%g1, [%sp + CCFSZ + 20]
2141	std	%g2, [%sp + CCFSZ + 24]
2142	std	%g4, [%sp + CCFSZ + 32]
2143	std	%g6, [%sp + CCFSZ + 40]
2144	std	%i0, [%sp + CCFSZ + 48]
2145	mov	%l0, %o1		! (psr)
2146	std	%i2, [%sp + CCFSZ + 56]
2147	mov	%l1, %o2		! (pc)
2148	std	%i4, [%sp + CCFSZ + 64]
2149	add	%sp, CCFSZ, %o3		! (&tf)
2150	call	_C_LABEL(trap)		! trap(type, psr, pc, &tf)
2151	 std	%i6, [%sp + CCFSZ + 72]
2152
2153	ldd	[%sp + CCFSZ], %l0	! load new values
2154	ldd	[%sp + CCFSZ + 8], %l2
2155	wr	%l3, 0, %y
2156	ld	[%sp + CCFSZ + 20], %g1
2157	ldd	[%sp + CCFSZ + 24], %g2
2158	ldd	[%sp + CCFSZ + 32], %g4
2159	ldd	[%sp + CCFSZ + 40], %g6
2160	ldd	[%sp + CCFSZ + 48], %i0
2161	ldd	[%sp + CCFSZ + 56], %i2
2162	ldd	[%sp + CCFSZ + 64], %i4
2163	ldd	[%sp + CCFSZ + 72], %i6
2164	b	return_from_trap
2165	 wr	%l0, 0, %psr
2166
2167/*
2168 * Do a `software' trap by re-entering the trap code, possibly first
2169 * switching from interrupt stack to kernel stack.  This is used for
2170 * scheduling and signal ASTs (which generally occur from softclock or
2171 * tty or net interrupts) and register window saves (which might occur
2172 * from anywhere).
2173 *
2174 * The current window is the trap window, and it is by definition clean.
2175 * We enter with the trap type in %o0.  All we have to do is jump to
2176 * Lslowtrap_reenter above, but maybe after switching stacks....
2177 */
2178softtrap:
2179#if defined(MULTIPROCESSOR)
2180	/*
2181	 * The interrupt stack is not at a fixed location
2182	 * and %sp must be checked against both ends.
2183	 */
2184	sethi	%hi(_EINTSTACKP), %l6
2185	ld	[%l6 + %lo(_EINTSTACKP)], %l7
2186	cmp	%sp, %l7
2187	bge	Lslowtrap_reenter
2188	 .empty
2189	set	INT_STACK_SIZE, %l6
2190	sub	%l7, %l6, %l7
2191	cmp	%sp, %l7
2192	blu	Lslowtrap_reenter
2193	 .empty
2194#else
2195	sethi	%hi(_C_LABEL(eintstack)), %l7
2196	cmp	%sp, %l7
2197	bge	Lslowtrap_reenter
2198	 .empty
2199#endif
2200	sethi	%hi(cpcb), %l6
2201	ld	[%l6 + %lo(cpcb)], %l6
2202	set	USPACE-CCFSZ-80, %l5
2203	add	%l6, %l5, %l7
2204	SET_SP_REDZONE(%l6, %l5)
2205	b	Lslowtrap_reenter
2206	 mov	%l7, %sp
2207
2208#ifdef KGDB
2209/*
2210 * bpt is entered on all breakpoint traps.
2211 * If this is a kernel breakpoint, we do not want to call trap().
2212 * Among other reasons, this way we can set breakpoints in trap().
2213 */
2214bpt:
2215	btst	PSR_PS, %l0		! breakpoint from kernel?
2216	bz	slowtrap		! no, go do regular trap
2217	 nop
2218
2219/* XXXSMP */
2220	/*
2221	 * Build a trap frame for kgdb_trap_glue to copy.
2222	 * Enable traps but set ipl high so that we will not
2223	 * see interrupts from within breakpoints.
2224	 */
2225	TRAP_SETUP(-CCFSZ-80)
2226	or	%l0, PSR_PIL, %l4	! splhigh()
2227	wr	%l4, 0, %psr		! the manual claims that this
2228	wr	%l4, PSR_ET, %psr	! song and dance is necessary
2229	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
2230	mov	%l3, %o0		! trap type arg for kgdb_trap_glue
2231	rd	%y, %l3
2232	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
2233	rd	%wim, %l3
2234	st	%l3, [%sp + CCFSZ + 16]	! tf.tf_wim (a kgdb-only r/o field)
2235	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
2236	std	%g2, [%sp + CCFSZ + 24]	! etc
2237	std	%g4, [%sp + CCFSZ + 32]
2238	std	%g6, [%sp + CCFSZ + 40]
2239	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_in[0..1]
2240	std	%i2, [%sp + CCFSZ + 56]	! etc
2241	std	%i4, [%sp + CCFSZ + 64]
2242	std	%i6, [%sp + CCFSZ + 72]
2243
2244	/*
2245	 * Now call kgdb_trap_glue(); if it returns, call trap().
2246	 */
2247	mov	%o0, %l3		! gotta save trap type
2248	call	_C_LABEL(kgdb_trap_glue)! kgdb_trap_glue(type, &trapframe)
2249	 add	%sp, CCFSZ, %o1		! (&trapframe)
2250
2251	/*
2252	 * Use slowtrap to call trap---but first erase our tracks
2253	 * (put the registers back the way they were).
2254	 */
2255	mov	%l3, %o0		! slowtrap will need trap type
2256	ld	[%sp + CCFSZ + 12], %l3
2257	wr	%l3, 0, %y
2258	ld	[%sp + CCFSZ + 20], %g1
2259	ldd	[%sp + CCFSZ + 24], %g2
2260	ldd	[%sp + CCFSZ + 32], %g4
2261	b	Lslowtrap_reenter
2262	 ldd	[%sp + CCFSZ + 40], %g6
2263
2264/*
2265 * Enter kernel breakpoint.  Write all the windows (not including the
2266 * current window) into the stack, so that backtrace works.  Copy the
2267 * supplied trap frame to the kgdb stack and switch stacks.
2268 *
2269 * kgdb_trap_glue(type, tf0)
2270 *	int type;
2271 *	struct trapframe *tf0;
2272 */
2273_ENTRY(_C_LABEL(kgdb_trap_glue))
2274	save	%sp, -CCFSZ, %sp
2275
2276	call	_C_LABEL(write_all_windows)
2277	 mov	%sp, %l4		! %l4 = current %sp
2278
2279	/* copy trapframe to top of kgdb stack */
2280	set	_C_LABEL(kgdb_stack) + KGDB_STACK_SIZE - 80, %l0
2281					! %l0 = tfcopy -> end_of_kgdb_stack
2282	mov	80, %l1
22831:	ldd	[%i1], %l2
2284	inc	8, %i1
2285	deccc	8, %l1
2286	std	%l2, [%l0]
2287	bg	1b
2288	 inc	8, %l0
2289
2290#if defined(DEBUG) && !defined(MULTIPROCESSOR)
2291	/* save old red zone and then turn it off */
2292	sethi	%hi(_redzone), %l7
2293	ld	[%l7 + %lo(_redzone)], %l6
2294	st	%g0, [%l7 + %lo(_redzone)]
2295#endif
2296	/* switch to kgdb stack */
2297	add	%l0, -CCFSZ-80, %sp
2298
2299	/* if (kgdb_trap(type, tfcopy)) kgdb_rett(tfcopy); */
2300	mov	%i0, %o0
2301	call	_C_LABEL(kgdb_trap)
2302	add	%l0, -80, %o1
2303	tst	%o0
2304	bnz,a	kgdb_rett
2305	 add	%l0, -80, %g1
2306
2307	/*
2308	 * kgdb_trap() did not handle the trap at all so the stack is
2309	 * still intact.  A simple `restore' will put everything back,
2310	 * after we reset the stack pointer.
2311	 */
2312	mov	%l4, %sp
2313#if defined(DEBUG) && !defined(MULTIPROCESSOR)
2314	st	%l6, [%l7 + %lo(_redzone)]	! restore red zone
2315#endif
2316	ret
2317	restore
2318
2319/*
2320 * Return from kgdb trap.  This is sort of special.
2321 *
2322 * We know that kgdb_trap_glue wrote the window above it, so that we will
2323 * be able to (and are sure to have to) load it up.  We also know that we
2324 * came from kernel land and can assume that the %fp (%i6) we load here
2325 * is proper.  We must also be sure not to lower ipl (it is at splhigh())
2326 * until we have traps disabled, due to the SPARC taking traps at the
2327 * new ipl before noticing that PSR_ET has been turned off.  We are on
2328 * the kgdb stack, so this could be disastrous.
2329 *
2330 * Note that the trapframe argument in %g1 points into the current stack
2331 * frame (current window).  We abandon this window when we move %g1->tf_psr
2332 * into %psr, but we will not have loaded the new %sp yet, so again traps
2333 * must be disabled.
2334 */
2335kgdb_rett:
2336	rd	%psr, %g4		! turn off traps
2337	wr	%g4, PSR_ET, %psr
2338	/* use the three-instruction delay to do something useful */
2339	ld	[%g1], %g2		! pick up new %psr
2340	ld	[%g1 + 12], %g3		! set %y
2341	wr	%g3, 0, %y
2342#if defined(DEBUG) && !defined(MULTIPROCESSOR)
2343	st	%l6, [%l7 + %lo(_redzone)] ! and restore red zone
2344#endif
2345	wr	%g0, 0, %wim		! enable window changes
2346	nop; nop; nop
2347	/* now safe to set the new psr (changes CWP, leaves traps disabled) */
2348	wr	%g2, 0, %psr		! set rett psr (including cond codes)
2349	/* 3 instruction delay before we can use the new window */
2350/*1*/	ldd	[%g1 + 24], %g2		! set new %g2, %g3
2351/*2*/	ldd	[%g1 + 32], %g4		! set new %g4, %g5
2352/*3*/	ldd	[%g1 + 40], %g6		! set new %g6, %g7
2353
2354	/* now we can use the new window */
2355	mov	%g1, %l4
2356	ld	[%l4 + 4], %l1		! get new pc
2357	ld	[%l4 + 8], %l2		! get new npc
2358	ld	[%l4 + 20], %g1		! set new %g1
2359
2360	/* set up returnee's out registers, including its %sp */
2361	ldd	[%l4 + 48], %i0
2362	ldd	[%l4 + 56], %i2
2363	ldd	[%l4 + 64], %i4
2364	ldd	[%l4 + 72], %i6
2365
2366	/* load returnee's window, making the window above it be invalid */
2367	restore
2368	restore	%g0, 1, %l1		! move to inval window and set %l1 = 1
2369	rd	%psr, %l0
2370	sll	%l1, %l0, %l1
2371	wr	%l1, 0, %wim		! %wim = 1 << (%psr & 31)
2372	sethi	%hi(cpcb), %l1
2373	ld	[%l1 + %lo(cpcb)], %l1
2374	and	%l0, 31, %l0		! CWP = %psr & 31;
2375	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = CWP;
2376	save	%g0, %g0, %g0		! back to window to reload
2377	LOADWIN(%sp)
2378	save	%g0, %g0, %g0		! back to trap window
2379	/* note, we have not altered condition codes; safe to just rett */
2380	RETT
2381#endif
2382
2383/*
2384 * syscall() builds a trap frame and calls syscall().
2385 * sun_syscall is same but delivers sun system call number
2386 * XXX	should not have to save&reload ALL the registers just for
2387 *	ptrace...
2388 */
2389_C_LABEL(_syscall):
2390	TRAP_SETUP(-CCFSZ-80)
2391#ifdef DEBUG
2392	or	%g1, 0x1000, %l6	! mark syscall
2393	TRAP_TRACE(%l6,%l5)
2394#endif
2395	wr	%l0, PSR_ET, %psr
2396	std	%l0, [%sp + CCFSZ + 0]	! tf_psr, tf_pc
2397	rd	%y, %l3
2398	std	%l2, [%sp + CCFSZ + 8]	! tf_npc, tf_y
2399	st	%g1, [%sp + CCFSZ + 20]	! tf_g[1]
2400	std	%g2, [%sp + CCFSZ + 24]	! tf_g[2], tf_g[3]
2401	std	%g4, [%sp + CCFSZ + 32]	! etc
2402	std	%g6, [%sp + CCFSZ + 40]
2403	mov	%g1, %o0		! (code)
2404	std	%i0, [%sp + CCFSZ + 48]
2405	add	%sp, CCFSZ, %o1		! (&tf)
2406	std	%i2, [%sp + CCFSZ + 56]
2407	mov	%l1, %o2		! (pc)
2408	std	%i4, [%sp + CCFSZ + 64]
2409
2410	sethi	%hi(curlwp), %l1
2411	ld	[%l1 + %lo(curlwp)], %l1
2412	ld	[%l1 + L_PROC], %l1
2413	ld	[%l1 + P_MD_SYSCALL], %l1
2414	call	%l1			! syscall(code, &tf, pc, suncompat)
2415	 std	%i6, [%sp + CCFSZ + 72]
2416	! now load em all up again, sigh
2417	ldd	[%sp + CCFSZ + 0], %l0	! new %psr, new pc
2418	ldd	[%sp + CCFSZ + 8], %l2	! new npc, new %y
2419	wr	%l3, 0, %y
2420	/* see `lwp_trampoline' for the reason for this label */
2421return_from_syscall:
2422	ld	[%sp + CCFSZ + 20], %g1
2423	ldd	[%sp + CCFSZ + 24], %g2
2424	ldd	[%sp + CCFSZ + 32], %g4
2425	ldd	[%sp + CCFSZ + 40], %g6
2426	ldd	[%sp + CCFSZ + 48], %i0
2427	ldd	[%sp + CCFSZ + 56], %i2
2428	ldd	[%sp + CCFSZ + 64], %i4
2429	ldd	[%sp + CCFSZ + 72], %i6
2430	b	return_from_trap
2431	 wr	%l0, 0, %psr
2432
2433/*
2434 * Interrupts.  Software interrupts must be cleared from the software
2435 * interrupt enable register.  Rather than calling ienab_bic for each,
2436 * we do them in-line before enabling traps.
2437 *
2438 * After preliminary setup work, the interrupt is passed to each
2439 * registered handler in turn.  These are expected to return nonzero if
2440 * they took care of the interrupt.  If a handler claims the interrupt,
2441 * we exit (hardware interrupts are latched in the requestor so we'll
2442 * just take another interrupt in the unlikely event of simultaneous
2443 * interrupts from two different devices at the same level).  If we go
2444 * through all the registered handlers and no one claims it, we report a
2445 * stray interrupt.  This is more or less done as:
2446 *
2447 *	for (ih = intrhand[intlev]; ih; ih = ih->ih_next)
2448 *		if ((*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : &frame))
2449 *			return;
2450 *	strayintr(&frame);
2451 *
2452 * Software interrupts are almost the same with three exceptions:
2453 * (1) we clear the interrupt from the software interrupt enable
2454 *     register before calling any handler (we have to clear it first
2455 *     to avoid an interrupt-losing race),
2456 * (2) we always call all the registered handlers (there is no way
2457 *     to tell if the single bit in the software interrupt register
2458 *     represents one or many requests)
2459 * (3) we never announce a stray interrupt (because of (1), another
2460 *     interrupt request can come in while we're in the handler.  If
2461 *     the handler deals with everything for both the original & the
2462 *     new request, we'll erroneously report a stray interrupt when
2463 *     we take the software interrupt for the new request.
2464 *
2465 * Inputs:
2466 *	%l0 = %psr
2467 *	%l1 = return pc
2468 *	%l2 = return npc
2469 *	%l3 = interrupt level
2470 *	(software interrupt only) %l4 = bits to clear in interrupt register
2471 *
2472 * Internal:
2473 *	%l4, %l5: local variables
2474 *	%l6 = %y
2475 *	%l7 = %g1
2476 *	%g2..%g7 go to stack
2477 *
2478 * An interrupt frame is built in the space for a full trapframe;
2479 * this contains the psr, pc, npc, and interrupt level.
2480 */
2481softintr_sun44c:
2482	/*
2483	 * Entry point for level 1, 4 or 6 interrupts on sun4/sun4c
2484	 * which may be software interrupts. Check the interrupt
2485	 * register to see whether we're dealing software or hardware
2486	 * interrupt.
2487	 */
2488	sethi	%hi(INTRREG_VA), %l6
2489	ldub	[%l6 + %lo(INTRREG_VA)], %l5
2490	btst	%l5, %l4		! is IE_L{1,4,6} set?
2491	bz	sparc_interrupt44c	! if not, must be a hw intr
2492	andn	%l5, %l4, %l5		! clear soft intr bit
2493	stb	%l5, [%l6 + %lo(INTRREG_VA)]
2494
2495softintr_common:
2496	INTR_SETUP(-CCFSZ-80)
2497	std	%g2, [%sp + CCFSZ + 24]	! save registers
2498	! tally softint (curcpu()->cpu_data.cpu_nintr++) (clobbers %o0,%o1,%o2)
2499	INCR64(CPUINFO_VA + CPUINFO_NSOFT)
2500	mov	%g1, %l7
2501	rd	%y, %l6
2502	std	%g4, [%sp + CCFSZ + 32]
2503	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
2504	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
2505	std	%g6, [%sp + CCFSZ + 40]
2506	or	%l5, %l4, %l4		!			;
2507	wr	%l4, 0, %psr		! the manual claims this
2508	wr	%l4, PSR_ET, %psr	! song and dance is necessary
2509	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
2510	sll	%l3, 2, %l5
2511
2512	set	CPUINFO_VA + CPUINFO_SINTRCNT, %l4	! sintrcnt[intlev].ev_count++;
2513	sll	%l3, EV_STRUCTSHIFT, %o2
2514	ldd	[%l4 + %o2], %o0
2515	std	%l2, [%sp + CCFSZ + 8]	! set up intrframe/clockframe
2516	inccc   %o1
2517	addx    %o0, 0, %o0
2518	std	%o0, [%l4 + %o2]
2519
2520	set	_C_LABEL(sintrhand), %l4! %l4 = sintrhand[intlev];
2521	ld	[%l4 + %l5], %l4
2522
2523	sethi	%hi(CPUINFO_VA+CPUINFO_IDEPTH), %o2
2524	ld	[ %o2 + %lo(CPUINFO_VA+CPUINFO_IDEPTH) ], %o3
2525	inc	%o3
2526	st	%o3, [ %o2 + %lo(CPUINFO_VA+CPUINFO_IDEPTH) ]
2527
2528	b	3f
2529	 st	%fp, [%sp + CCFSZ + 16]
2530
25311:	ld	[%l4 + IH_CLASSIPL], %o2 ! ih->ih_classipl
2532	rd	%psr, %o3		!  (bits already shifted to PIL field)
2533	andn	%o3, PSR_PIL, %o3	! %o3 = psr & ~PSR_PIL
2534	wr	%o3, %o2, %psr		! splraise(ih->ih_classipl)
2535	ld	[%l4 + IH_FUN], %o1
2536	ld	[%l4 + IH_ARG], %o0
2537	nop				! one more isns before touching ICC
2538	tst	%o0
2539	bz,a	2f
2540	 add	%sp, CCFSZ, %o0
25412:	jmpl	%o1, %o7		!	(void)(*ih->ih_fun)(...)
2542	 ld	[%l4 + IH_NEXT], %l4	!	and ih = ih->ih_next
25433:	tst	%l4			! while ih != NULL
2544	bnz	1b
2545	 nop
2546
2547	sethi	%hi(CPUINFO_VA+CPUINFO_IDEPTH), %o2
2548	ld	[ %o2 + %lo(CPUINFO_VA+CPUINFO_IDEPTH) ], %o3
2549	dec	%o3
2550	st	%o3, [ %o2 + %lo(CPUINFO_VA+CPUINFO_IDEPTH) ]
2551
2552	mov	%l7, %g1
2553	wr	%l6, 0, %y
2554	ldd	[%sp + CCFSZ + 24], %g2
2555	ldd	[%sp + CCFSZ + 32], %g4
2556	ldd	[%sp + CCFSZ + 40], %g6
2557	b	return_from_trap
2558	 wr	%l0, 0, %psr
2559
2560	/*
2561	 * _sparc_interrupt{44c,4m} is exported for paranoia checking
2562	 * (see intr.c).
2563	 */
2564#if defined(SUN4M)
2565_ENTRY(_C_LABEL(sparc_interrupt4m))
2566#if !defined(MSIIEP)	/* "normal" sun4m */
2567	sethi	%hi(CPUINFO_VA+CPUINFO_INTREG), %l6
2568	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_INTREG)], %l7
2569	mov	1, %l4
2570	ld	[%l7 + ICR_PI_PEND_OFFSET], %l5	! get pending interrupts
2571	sll	%l4, %l3, %l4	! hw intr bits are in the lower halfword
2572
2573	btst	%l4, %l5	! has pending hw intr at this level?
2574	bnz	sparc_interrupt_common
2575	 nop
2576
2577	! both softint pending and clear bits are in upper halfwords of
2578	! their respective registers so shift the test bit in %l4 up there
2579	sll	%l4, 16, %l4
2580
2581	st	%l4, [%l7 + ICR_PI_CLR_OFFSET]	! ack soft intr
2582#if defined(MULTIPROCESSOR)
2583	cmp	%l3, 14
2584	be	lev14_softint
2585#endif
2586	/* Drain hw reg; might be necessary for Ross CPUs */
2587	 ld	[%l7 + ICR_PI_PEND_OFFSET], %g0
2588
2589#ifdef DIAGNOSTIC
2590	btst	%l4, %l5	! make sure softint pending bit is set
2591	bnz	softintr_common
2592	/* FALLTHROUGH to sparc_interrupt4m_bogus */
2593#else
2594	b	softintr_common
2595#endif
2596	 nop
2597
2598#else /* MSIIEP */
2599	sethi	%hi(MSIIEP_PCIC_VA), %l6
2600	mov	1, %l4
2601	xor	%l3, 0x18, %l7	! change endianness of the resulting bit mask
2602	ld	[%l6 + PCIC_PROC_IPR_REG], %l5 ! get pending interrupts
2603	sll	%l4, %l7, %l4	! hw intr bits are in the upper halfword
2604				! because the register is little-endian
2605	btst	%l4, %l5	! has pending hw intr at this level?
2606	bnz	sparc_interrupt_common
2607	 nop
2608
2609	srl	%l4, 16, %l4	! move the mask bit into the lower 16 bit
2610				! so we can use it to clear a sw interrupt
2611
2612#ifdef DIAGNOSTIC
2613	! check if there's really a sw interrupt pending
2614	btst	%l4, %l5	! make sure softint pending bit is set
2615	bnz	softintr_common
2616	 sth	%l4, [%l6 + PCIC_SOFT_INTR_CLEAR_REG]
2617	/* FALLTHROUGH to sparc_interrupt4m_bogus */
2618#else
2619	b	softintr_common
2620	 sth	%l4, [%l6 + PCIC_SOFT_INTR_CLEAR_REG]
2621#endif
2622
2623#endif /* MSIIEP */
2624
2625#ifdef DIAGNOSTIC
2626	/*
2627	 * sparc_interrupt4m detected that neither hardware nor software
2628	 * interrupt pending bit is set for this interrupt.  Report this
2629	 * situation, this is most probably a symptom of a driver bug.
2630	 */
2631sparc_interrupt4m_bogus:
2632	INTR_SETUP(-CCFSZ-80)
2633	std	%g2, [%sp + CCFSZ + 24]	! save registers
2634	! tally interrupt (curcpu()->cpu_data.cpu_nintr++) (clobbers %o0,%o1)
2635	INCR64X(CPUINFO_VA + CPUINFO_NINTR, %o0, %o1, %l7)
2636	mov	%g1, %l7
2637	rd	%y, %l6
2638	std	%g4, [%sp + CCFSZ + 32]
2639	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
2640	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
2641	std	%g6, [%sp + CCFSZ + 40]
2642	or	%l5, %l4, %l4		!			;
2643	wr	%l4, 0, %psr		! the manual claims this
2644	wr	%l4, PSR_ET, %psr	! song and dance is necessary
2645	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
2646	sll	%l3, 2, %l5
2647
2648	set	CPUINFO_VA + CPUINFO_INTRCNT, %l4	! intrcnt[intlev].ev_count++;
2649	sll	%l3, EV_STRUCTSHIFT, %o2
2650	ldd	[%l4 + %o2], %o0
2651	std	%l2, [%sp + CCFSZ + 8]	! set up intrframe/clockframe
2652	inccc   %o1
2653	addx    %o0, 0, %o0
2654	std	%o0, [%l4 + %o2]
2655
2656	st	%fp, [%sp + CCFSZ + 16]
2657
2658	/* Unhandled interrupts while cold cause IPL to be raised to `high' */
2659	sethi	%hi(_C_LABEL(cold)), %o0
2660	ld	[%o0 + %lo(_C_LABEL(cold))], %o0
2661	tst	%o0			! if (cold) {
2662	bnz,a	1f			!	splhigh();
2663	 or	%l0, 0xf00, %l0		! } else
2664
2665	call	_C_LABEL(bogusintr)	!	bogusintr(&intrframe)
2666	 add	%sp, CCFSZ, %o0
2667	/* all done: restore registers and go return */
26681:
2669	mov	%l7, %g1
2670	wr	%l6, 0, %y
2671	ldd	[%sp + CCFSZ + 24], %g2
2672	ldd	[%sp + CCFSZ + 32], %g4
2673	ldd	[%sp + CCFSZ + 40], %g6
2674	b	return_from_trap
2675	 wr	%l0, 0, %psr
2676#endif /* DIAGNOSTIC */
2677#endif /* SUN4M */
2678
2679_ENTRY(_C_LABEL(sparc_interrupt44c))
2680sparc_interrupt_common:
2681	INTR_SETUP(-CCFSZ-80)
2682	std	%g2, [%sp + CCFSZ + 24]	! save registers
2683	! tally intr (curcpu()->cpu_data.cpu_nintr++) (clobbers %o0,%o1)
2684	INCR64X(CPUINFO_VA + CPUINFO_NINTR, %o0, %o1, %l7)
2685	mov	%g1, %l7
2686	rd	%y, %l6
2687	std	%g4, [%sp + CCFSZ + 32]
2688	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
2689	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
2690	std	%g6, [%sp + CCFSZ + 40]
2691	or	%l5, %l4, %l4		!			;
2692	wr	%l4, 0, %psr		! the manual claims this
2693	wr	%l4, PSR_ET, %psr	! song and dance is necessary
2694	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
2695	sll	%l3, 2, %l5
2696
2697	set	CPUINFO_VA + CPUINFO_INTRCNT, %l4	! intrcnt[intlev].ev_count++;
2698	sll	%l3, EV_STRUCTSHIFT, %o2
2699	ldd	[%l4 + %o2], %o0
2700	std	%l2, [%sp + CCFSZ + 8]	! set up intrframe/clockframe
2701	inccc   %o1
2702	addx    %o0, 0, %o0
2703	std	%o0, [%l4 + %o2]
2704
2705	set	_C_LABEL(intrhand), %l4	! %l4 = intrhand[intlev];
2706	ld	[%l4 + %l5], %l4
2707
2708	sethi	%hi(CPUINFO_VA+CPUINFO_IDEPTH), %o2
2709	ld	[ %o2 + %lo(CPUINFO_VA+CPUINFO_IDEPTH) ], %o3
2710	inc	%o3
2711	st	%o3, [ %o2 + %lo(CPUINFO_VA+CPUINFO_IDEPTH) ]
2712
2713	b	3f
2714	 st	%fp, [%sp + CCFSZ + 16]
2715
27161:	ld	[%l4 + IH_CLASSIPL], %o2 ! ih->ih_classipl
2717	rd	%psr, %o3		!  (bits already shifted to PIL field)
2718	andn	%o3, PSR_PIL, %o3	! %o3 = psr & ~PSR_PIL
2719	wr	%o3, %o2, %psr		! splraise(ih->ih_classipl)
2720	ld	[%l4 + IH_FUN], %o1
2721	ld	[%l4 + IH_ARG], %o0
2722	nop				! one more isns before touching ICC
2723	tst	%o0
2724	bz,a	2f
2725	 add	%sp, CCFSZ, %o0
27262:	jmpl	%o1, %o7		!	handled = (*ih->ih_fun)(...)
2727	 ld	[%l4 + IH_NEXT], %l4	!	and ih = ih->ih_next
2728	tst	%o0
2729	bnz	4f			! if (handled) break
2730	 nop
27313:	tst	%l4
2732	bnz	1b			! while (ih)
2733	 nop
2734
2735	/* Unhandled interrupts while cold cause IPL to be raised to `high' */
2736	sethi	%hi(_C_LABEL(cold)), %o0
2737	ld	[%o0 + %lo(_C_LABEL(cold))], %o0
2738	tst	%o0			! if (cold) {
2739	bnz,a	4f			!	splhigh();
2740	 or	%l0, 0xf00, %l0		! } else
2741
2742	call	_C_LABEL(strayintr)	!	strayintr(&intrframe)
2743	 add	%sp, CCFSZ, %o0
2744	/* all done: restore registers and go return */
27454:
2746	sethi	%hi(CPUINFO_VA+CPUINFO_IDEPTH), %o2
2747	ld	[ %o2 + %lo(CPUINFO_VA+CPUINFO_IDEPTH) ], %o3
2748	dec	%o3
2749	st	%o3, [ %o2 + %lo(CPUINFO_VA+CPUINFO_IDEPTH) ]
2750
2751	mov	%l7, %g1
2752	wr	%l6, 0, %y
2753	ldd	[%sp + CCFSZ + 24], %g2
2754	ldd	[%sp + CCFSZ + 32], %g4
2755	ldd	[%sp + CCFSZ + 40], %g6
2756	b	return_from_trap
2757	 wr	%l0, 0, %psr
2758
2759#if defined(MULTIPROCESSOR)
2760/*
2761 * Level 14 software interrupt: fast IPI
2762 * <%l0,%l1,%l2> = <psr, pc, npc>
2763 * %l3 = int level
2764 * %l6 = &cpuinfo
2765 */
2766lev14_softint:
2767	sethi	%hi(CPUINFO_VA+CPUINFO_LEV14), %l7
2768	ldd	[%l7 + %lo(CPUINFO_VA+CPUINFO_LEV14)], %l4
2769	inccc	%l5
2770	addx	%l4, %g0, %l4
2771	std	%l4, [%l7 + CPUINFO_LEV14]
2772
2773	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_TRAP), %l6
2774	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_TRAP)], %l7
2775#ifdef DIAGNOSTIC
2776	tst	%l7
2777	bz	sparc_interrupt4m_bogus
2778	 nop
2779#endif
2780	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_ARG0), %l6
2781	jmp	%l7
2782	 ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_ARG0)], %l3	! prefetch 1st arg
2783
2784/*
2785 * Fast flush handlers. xcalled from other CPUs throught soft interrupt 14
2786 * On entry:	%l6 = CPUINFO_VA
2787 *		%l3 = first argument
2788 *
2789 * As always, these fast trap handlers should preserve all registers
2790 * except %l3 to %l7
2791 */
2792_ENTRY(_C_LABEL(ft_tlb_flush))
2793	!	<%l3 already fetched for us>	! va
2794	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_ARG2), %l6
2795	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_ARG2)], %l5	! level
2796	andn	%l3, 0xfff, %l3			! %l3 = (va&~0xfff | lvl);
2797	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_ARG1), %l6
2798	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_ARG1)], %l4	! context
2799	or	%l3, %l5, %l3
2800
2801	mov	SRMMU_CXR, %l7			!
2802	lda	[%l7]ASI_SRMMU, %l5		! %l5 = old context
2803	sta	%l4, [%l7]ASI_SRMMU		! set new context
2804
2805	sta	%g0, [%l3]ASI_SRMMUFP		! flush TLB
2806
2807ft_rett:
2808	! common return from Fast Flush handlers
2809	! enter here with %l5 = ctx to restore, %l6 = CPUINFO_VA, %l7 = ctx reg
2810	mov	1, %l4				!
2811	sta	%l5, [%l7]ASI_SRMMU		! restore context
2812	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_CMPLT), %l6
2813	st	%l4, [%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_CMPLT)]	! completed = 1
2814
2815	mov	%l0, %psr			! return from trap
2816	 nop
2817	RETT
2818
2819_ENTRY(_C_LABEL(ft_srmmu_vcache_flush_page))
2820	!	<%l3 already fetched for us>	! va
2821	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_ARG1), %l6
2822	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_ARG1)], %l4	! context
2823
2824	mov	SRMMU_CXR, %l7			!
2825	lda	[%l7]ASI_SRMMU, %l5		! %l5 = old context
2826	sta	%l4, [%l7]ASI_SRMMU		! set new context
2827
2828	set	4096, %l4			! N = page size
2829	sethi	%hi(CPUINFO_VA+CPUINFO_CACHE_LINESZ), %l6
2830	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_CACHE_LINESZ)], %l7
28311:
2832	sta	%g0, [%l3]ASI_IDCACHELFP	!  flush cache line
2833	subcc	%l4, %l7, %l4			!  p += linesz;
2834	bgu	1b				! while ((N -= linesz) > 0)
2835	 add	%l3, %l7, %l3
2836
2837	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_ARG0), %l6
2838	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_ARG0)], %l3	! reload va
2839	!or	%l3, ASI_SRMMUFP_L3(=0), %l3	! va |= ASI_SRMMUFP_L3
2840	sta	%g0, [%l3]ASI_SRMMUFP		! flush TLB
2841
2842	b	ft_rett
2843	 mov	SRMMU_CXR, %l7			! reload ctx register
2844
2845_ENTRY(_C_LABEL(ft_srmmu_vcache_flush_segment))
2846	!	<%l3 already fetched for us>	! vr
2847	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_ARG1), %l6
2848	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_ARG1)], %l5	! vs
2849	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_ARG2), %l6
2850	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_ARG2)], %l4	! context
2851
2852	sll	%l3, 24, %l3			! va = VSTOVA(vr,vs)
2853	sll	%l5, 18, %l5
2854	or	%l3, %l5, %l3
2855
2856	mov	SRMMU_CXR, %l7			!
2857	lda	[%l7]ASI_SRMMU, %l5		! %l5 = old context
2858	sta	%l4, [%l7]ASI_SRMMU		! set new context
2859
2860	sethi	%hi(CPUINFO_VA+CPUINFO_CACHE_NLINES), %l6
2861	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_CACHE_NLINES)], %l4
2862	sethi	%hi(CPUINFO_VA+CPUINFO_CACHE_LINESZ), %l6
2863	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_CACHE_LINESZ)], %l7
28641:
2865	sta	%g0, [%l3]ASI_IDCACHELFS	!  flush cache line
2866	deccc	%l4				!  p += linesz;
2867	bgu	1b				! while (--nlines > 0)
2868	 add	%l3, %l7, %l3
2869
2870	b	ft_rett
2871	 mov	SRMMU_CXR, %l7			! reload ctx register
2872
2873_ENTRY(_C_LABEL(ft_srmmu_vcache_flush_region))
2874	!	<%l3 already fetched for us>	! vr
2875	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_ARG1), %l6
2876	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_ARG1)], %l4	! context
2877
2878	sll	%l3, 24, %l3			! va = VRTOVA(vr)
2879
2880	mov	SRMMU_CXR, %l7			!
2881	lda	[%l7]ASI_SRMMU, %l5		! %l5 = old context
2882	sta	%l4, [%l7]ASI_SRMMU		! set new context
2883
2884	sethi	%hi(CPUINFO_VA+CPUINFO_CACHE_NLINES), %l6
2885	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_CACHE_NLINES)], %l4
2886	sethi	%hi(CPUINFO_VA+CPUINFO_CACHE_LINESZ), %l6
2887	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_CACHE_LINESZ)], %l7
28881:
2889	sta	%g0, [%l3]ASI_IDCACHELFR	!  flush cache line
2890	deccc	%l4				!  p += linesz;
2891	bgu	1b				! while (--nlines > 0)
2892	 add	%l3, %l7, %l3
2893
2894	b	ft_rett
2895	 mov	SRMMU_CXR, %l7			! reload ctx register
2896
2897_ENTRY(_C_LABEL(ft_srmmu_vcache_flush_context))
2898	!	<%l3 already fetched for us>	! context
2899
2900	mov	SRMMU_CXR, %l7			!
2901	lda	[%l7]ASI_SRMMU, %l5		! %l5 = old context
2902	sta	%l3, [%l7]ASI_SRMMU		! set new context
2903
2904	sethi	%hi(CPUINFO_VA+CPUINFO_CACHE_NLINES), %l6
2905	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_CACHE_NLINES)], %l4
2906	sethi	%hi(CPUINFO_VA+CPUINFO_CACHE_LINESZ), %l6
2907	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_CACHE_LINESZ)], %l7
2908	mov	%g0, %l3			! va = 0
29091:
2910	sta	%g0, [%l3]ASI_IDCACHELFC	!  flush cache line
2911	deccc	%l4				!  p += linesz;
2912	bgu	1b				! while (--nlines > 0)
2913	 add	%l3, %l7, %l3
2914
2915	b	ft_rett
2916	 mov	SRMMU_CXR, %l7			! reload ctx register
2917
2918_ENTRY(_C_LABEL(ft_srmmu_vcache_flush_range))
2919	!	<%l3 already fetched for us>	! va
2920	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_ARG2), %l6
2921	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_ARG2)], %l4	! context
2922
2923	mov	SRMMU_CXR, %l7			!
2924	lda	[%l7]ASI_SRMMU, %l5		! %l5 = old context
2925	sta	%l4, [%l7]ASI_SRMMU		! set new context
2926
2927	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_ARG1), %l6
2928	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_ARG1)], %l4	! size
2929	and	%l3, 7, %l7			! double-word alignment
2930	andn	%l3, 7, %l3			!  off = va & 7; va &= ~7
2931	add	%l4, %l7, %l4			!  sz += off
2932
2933	sethi	%hi(CPUINFO_VA+CPUINFO_CACHE_LINESZ), %l6
2934	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_CACHE_LINESZ)], %l7
29351:
2936	sta	%g0, [%l3]ASI_IDCACHELFP	!  flush cache line
2937	subcc	%l4, %l7, %l4			!  p += linesz;
2938	bgu	1b				! while ((sz -= linesz) > 0)
2939	 add	%l3, %l7, %l3
2940
2941	/* Flush TLB on all pages we visited */
2942	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_ARG0), %l6
2943	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_ARG0)], %l3	! reload va
2944	sethi	%hi(CPUINFO_VA+CPUINFO_XMSG_ARG1), %l6
2945	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_XMSG_ARG1)], %l4	! reload sz
2946	add	%l3, %l4, %l4			! %l4 = round_page(va + sz)
2947	add	%l4, 0xfff, %l4
2948	andn	%l4, 0xfff, %l4
2949	andn	%l3, 0xfff, %l3			! va &= ~PGOFSET;
2950	sub	%l4, %l3, %l4			! and finally: size rounded
2951						! to page boundary
2952	set	4096, %l7			! page size
2953
29542:
2955	!or	%l3, ASI_SRMMUFP_L3(=0), %l3	!  va |= ASI_SRMMUFP_L3
2956	sta	%g0, [%l3]ASI_SRMMUFP		!  flush TLB
2957	subcc	%l4, %l7, %l4			! while ((sz -= PGSIZE) > 0)
2958	bgu	2b
2959	 add	%l3, %l7, %l3
2960
2961	b	ft_rett
2962	 mov	SRMMU_CXR, %l7			! reload ctx register
2963
2964#endif /* MULTIPROCESSOR */
2965
2966#ifdef notyet
2967/*
2968 * Level 12 (ZS serial) interrupt.  Handle it quickly, schedule a
2969 * software interrupt, and get out.  Do the software interrupt directly
2970 * if we would just take it on the way out.
2971 *
2972 * Input:
2973 *	%l0 = %psr
2974 *	%l1 = return pc
2975 *	%l2 = return npc
2976 * Internal:
2977 *	%l3 = zs device
2978 *	%l4, %l5 = temporary
2979 *	%l6 = rr3 (or temporary data) + 0x100 => need soft int
2980 *	%l7 = zs soft status
2981 */
2982zshard:
2983#endif /* notyet */
2984
2985/*
2986 * Level 15 interrupt.  An async memory error has occurred;
2987 * take care of it (typically by panicking, but hey...).
2988 *	%l0 = %psr
2989 *	%l1 = return pc
2990 *	%l2 = return npc
2991 *	%l3 = 15 * 4 (why? just because!)
2992 *
2993 * Internal:
2994 *	%l4 = %y
2995 *	%l5 = %g1
2996 *	%l6 = %g6
2997 *	%l7 = %g7
2998 *  g2, g3, g4, g5 go to stack
2999 *
3000 * This code is almost the same as that in mem_access_fault,
3001 * except that we already know the problem is not a `normal' fault,
3002 * and that we must be extra-careful with interrupt enables.
3003 */
3004
3005#if defined(SUN4)
3006_ENTRY(_C_LABEL(nmi_sun4))
3007	INTR_SETUP(-CCFSZ-80)
3008	! tally intr (curcpu()->cpu_data.cpu_nintr++) (clobbers %o0,%o1,%o2)
3009	INCR64(CPUINFO_VA + CPUINFO_NINTR)
3010	/*
3011	 * Level 15 interrupts are nonmaskable, so with traps off,
3012	 * disable all interrupts to prevent recursion.
3013	 */
3014	sethi	%hi(INTRREG_VA), %o0
3015	ldub	[%o0 + %lo(INTRREG_VA)], %o1
3016	andn	%o1, IE_ALLIE, %o1
3017	stb	%o1, [%o0 + %lo(INTRREG_VA)]
3018	wr	%l0, PSR_ET, %psr	! okay, turn traps on again
3019
3020	std	%g2, [%sp + CCFSZ + 0]	! save g2, g3
3021	rd	%y, %l4			! save y
3022
3023	std	%g4, [%sp + CCFSZ + 8]	! save g4, g5
3024	mov	%g1, %l5		! save g1, g6, g7
3025	mov	%g6, %l6
3026	mov	%g7, %l7
3027#if defined(SUN4C) || defined(SUN4M)
3028	b,a	nmi_common
3029#endif /* SUN4C || SUN4M */
3030#endif
3031
3032#if defined(SUN4C)
3033_ENTRY(_C_LABEL(nmi_sun4c))
3034	INTR_SETUP(-CCFSZ-80)
3035	! tally intr (curcpu()->cpu_data.cpu_nintr++) (clobbers %o0,%o1,%o2)
3036	INCR64(CPUINFO_VA + CPUINFO_NINTR)
3037	/*
3038	 * Level 15 interrupts are nonmaskable, so with traps off,
3039	 * disable all interrupts to prevent recursion.
3040	 */
3041	sethi	%hi(INTRREG_VA), %o0
3042	ldub	[%o0 + %lo(INTRREG_VA)], %o1
3043	andn	%o1, IE_ALLIE, %o1
3044	stb	%o1, [%o0 + %lo(INTRREG_VA)]
3045	wr	%l0, PSR_ET, %psr	! okay, turn traps on again
3046
3047	std	%g2, [%sp + CCFSZ + 0]	! save g2, g3
3048	rd	%y, %l4			! save y
3049
3050	! must read the sync error register too.
3051	set	AC_SYNC_ERR, %o0
3052	lda	[%o0] ASI_CONTROL, %o1	! sync err reg
3053	inc	4, %o0
3054	lda	[%o0] ASI_CONTROL, %o2	! sync virt addr
3055	std	%g4, [%sp + CCFSZ + 8]	! save g4,g5
3056	mov	%g1, %l5		! save g1,g6,g7
3057	mov	%g6, %l6
3058	mov	%g7, %l7
3059	inc	4, %o0
3060	lda	[%o0] ASI_CONTROL, %o3	! async err reg
3061	inc	4, %o0
3062	lda	[%o0] ASI_CONTROL, %o4	! async virt addr
3063#if defined(SUN4M)
3064	!!b,a	nmi_common
3065#endif /* SUN4M */
3066#endif /* SUN4C */
3067
3068_ENTRY(_C_LABEL(nmi_common))
3069	! and call C code
3070	call	_C_LABEL(memerr4_4c)	! memerr(0, ser, sva, aer, ava)
3071	 clr	%o0
3072
3073	mov	%l5, %g1		! restore g1 through g7
3074	ldd	[%sp + CCFSZ + 0], %g2
3075	ldd	[%sp + CCFSZ + 8], %g4
3076	wr	%l0, 0, %psr		! re-disable traps
3077	mov	%l6, %g6
3078	mov	%l7, %g7
3079
3080	! set IE_ALLIE again (safe, we disabled traps again above)
3081	sethi	%hi(INTRREG_VA), %o0
3082	ldub	[%o0 + %lo(INTRREG_VA)], %o1
3083	or	%o1, IE_ALLIE, %o1
3084	stb	%o1, [%o0 + %lo(INTRREG_VA)]
3085	b	return_from_trap
3086	 wr	%l4, 0, %y		! restore y
3087
3088#if defined(SUN4M)
3089_ENTRY(_C_LABEL(nmi_sun4m))
3090	INTR_SETUP(-CCFSZ-80-8-8)	! normal frame, plus g2..g5
3091
3092#if !defined(MSIIEP) /* normal sun4m */
3093
3094	/* Read the Pending Interrupts register */
3095	sethi	%hi(CPUINFO_VA+CPUINFO_INTREG), %l6
3096	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_INTREG)], %l6
3097	ld	[%l6 + ICR_PI_PEND_OFFSET], %l5	! get pending interrupts
3098
3099	set	_C_LABEL(nmi_soft), %o3		! assume a softint
3100	set	PINTR_IC, %o1			! hard lvl 15 bit
3101	sethi	%hi(PINTR_SINTRLEV(15)), %o0	! soft lvl 15 bit
3102	btst	%o0, %l5		! soft level 15?
3103	bnz,a	1f			!
3104	 mov	%o0, %o1		! shift int clear bit to SOFTINT 15
3105
3106	set	_C_LABEL(nmi_hard), %o3	/* it's a hardint; switch handler */
3107
3108	/*
3109	 * Level 15 interrupts are nonmaskable, so with traps off,
3110	 * disable all interrupts to prevent recursion.
3111	 */
3112	sethi	%hi(ICR_SI_SET), %o0
3113	set	SINTR_MA, %o2
3114	st	%o2, [%o0 + %lo(ICR_SI_SET)]
3115#if defined(MULTIPROCESSOR) && defined(DDB)
3116	b	2f
3117	 clr	%o0
3118#endif
3119
31201:
3121#if defined(MULTIPROCESSOR) && defined(DDB)
3122	/*
3123	 * Setup a trapframe for nmi_soft; this might be an IPI telling
3124	 * us to pause, so lets save some state for DDB to get at.
3125	 */
3126	std	%l0, [%sp + CCFSZ]	! tf.tf_psr = psr; tf.tf_pc = ret_pc;
3127	rd	%y, %l3
3128	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc = return_npc; tf.tf_y = %y;
3129	st	%g1, [%sp + CCFSZ + 20]
3130	std	%g2, [%sp + CCFSZ + 24]
3131	std	%g4, [%sp + CCFSZ + 32]
3132	std	%g6, [%sp + CCFSZ + 40]
3133	std	%i0, [%sp + CCFSZ + 48]
3134	std	%i2, [%sp + CCFSZ + 56]
3135	std	%i4, [%sp + CCFSZ + 64]
3136	std	%i6, [%sp + CCFSZ + 72]
3137	add	%sp, CCFSZ, %o0
31382:
3139#else
3140	clr	%o0
3141#endif
3142	/*
3143	 * Now clear the NMI. Apparently, we must allow some time
3144	 * to let the bits sink in..
3145	 */
3146	st	%o1, [%l6 + ICR_PI_CLR_OFFSET]
3147	 nop; nop; nop;
3148	ld	[%l6 + ICR_PI_PEND_OFFSET], %g0	! drain register!?
3149	 nop;
3150
3151	or	%l0, PSR_PIL, %o4	! splhigh()
3152	wr	%o4, 0, %psr		!
3153	wr	%o4, PSR_ET, %psr	! turn traps on again
3154
3155	std	%g2, [%sp + CCFSZ + 80]	! save g2, g3
3156	rd	%y, %l4			! save y
3157	std	%g4, [%sp + CCFSZ + 88]	! save g4,g5
3158
3159	/* Finish stackframe, call C trap handler */
3160	mov	%g1, %l5		! save g1,g6,g7
3161	mov	%g6, %l6
3162
3163	jmpl	%o3, %o7		! nmi_hard(0) or nmi_soft(&tf)
3164	 mov	%g7, %l7
3165
3166	mov	%l5, %g1		! restore g1 through g7
3167	ldd	[%sp + CCFSZ + 80], %g2
3168	ldd	[%sp + CCFSZ + 88], %g4
3169	wr	%l0, 0, %psr		! re-disable traps
3170	mov	%l6, %g6
3171	mov	%l7, %g7
3172
3173	!cmp	%o0, 0			! was this a soft nmi
3174	!be	4f
3175	/* XXX - we need to unblock `mask all ints' only on a hard nmi */
3176
3177	! enable interrupts again (safe, we disabled traps again above)
3178	sethi	%hi(ICR_SI_CLR), %o0
3179	set	SINTR_MA, %o1
3180	st	%o1, [%o0 + %lo(ICR_SI_CLR)]
3181
31824:
3183	b	return_from_trap
3184	 wr	%l4, 0, %y		! restore y
3185
3186#else /* MSIIEP*/
3187	sethi	%hi(MSIIEP_PCIC_VA), %l6
3188
3189	/* Read the Processor Interrupt Pending register */
3190	ld	[%l6 + PCIC_PROC_IPR_REG], %l5
3191
3192	/*
3193	 * Level 15 interrupts are nonmaskable, so with traps off,
3194	 * disable all interrupts to prevent recursion.
3195	 */
3196	mov	0x80, %l4	! htole32(MSIIEP_SYS_ITMR_ALL)
3197	st	%l4, [%l6 + PCIC_SYS_ITMR_SET_REG]
3198
3199	set	(1 << 23), %l4	! htole32(1 << 15)
3200	btst	%l4, %l5	! has pending level 15 hw intr?
3201	bz	1f
3202	 nop
3203
3204	/* hard level 15 interrupt */
3205	sethi	%hi(_C_LABEL(nmi_hard_msiiep)), %o3
3206	b	2f
3207	 or	%o3, %lo(_C_LABEL(nmi_hard_msiiep)), %o3
3208
32091:	/* soft level 15 interrupt */
3210	set	(1 << 7), %l4	! htole16(1 << 15)
3211	sth	%l4, [%l6 + PCIC_SOFT_INTR_CLEAR_REG]
3212	set	_C_LABEL(nmi_soft_msiiep), %o3
32132:
3214
3215	/* XXX:	call sequence is identical to sun4m case above. merge? */
3216	or	%l0, PSR_PIL, %o4	! splhigh()
3217	wr	%o4, 0, %psr		!
3218	wr	%o4, PSR_ET, %psr	! turn traps on again
3219
3220	std	%g2, [%sp + CCFSZ + 80]	! save g2, g3
3221	rd	%y, %l4			! save y
3222	std	%g4, [%sp + CCFSZ + 88]	! save g4, g5
3223
3224	/* Finish stackframe, call C trap handler */
3225	mov	%g1, %l5		! save g1, g6, g7
3226	mov	%g6, %l6
3227
3228	call	%o3			! nmi_hard(0) or nmi_soft(&tf)
3229	 mov	%g7, %l7
3230
3231	mov	%l5, %g1		! restore g1 through g7
3232	ldd	[%sp + CCFSZ + 80], %g2
3233	ldd	[%sp + CCFSZ + 88], %g4
3234	wr	%l0, 0, %psr		! re-disable traps
3235	mov	%l6, %g6
3236	mov	%l7, %g7
3237
3238	! enable interrupts again (safe, we disabled traps again above)
3239	sethi	%hi(MSIIEP_PCIC_VA), %o0
3240	mov	0x80, %o1	! htole32(MSIIEP_SYS_ITMR_ALL)
3241	st	%o1, [%o0 + PCIC_SYS_ITMR_CLR_REG]
3242
3243	b	return_from_trap
3244	 wr	%l4, 0, %y		! restore y
3245#endif /* MSIIEP */
3246#endif /* SUN4M */
3247
3248
3249#ifdef GPROF
3250	.globl	window_of, winof_user
3251	.globl	window_uf, winuf_user, winuf_ok, winuf_invalid
3252	.globl	return_from_trap, rft_kernel, rft_user, rft_invalid
3253	.globl	softtrap, slowtrap
3254	.globl	clean_trap_window, _C_LABEL(_syscall)
3255#endif
3256
3257/*
3258 * Window overflow trap handler.
3259 *	%l0 = %psr
3260 *	%l1 = return pc
3261 *	%l2 = return npc
3262 */
3263window_of:
3264#ifdef TRIVIAL_WINDOW_OVERFLOW_HANDLER
3265	/* a trivial version that assumes %sp is ok */
3266	/* (for testing only!) */
3267	save	%g0, %g0, %g0
3268	std	%l0, [%sp + (0*8)]
3269	rd	%psr, %l0
3270	mov	1, %l1
3271	sll	%l1, %l0, %l0
3272	wr	%l0, 0, %wim
3273	std	%l2, [%sp + (1*8)]
3274	std	%l4, [%sp + (2*8)]
3275	std	%l6, [%sp + (3*8)]
3276	std	%i0, [%sp + (4*8)]
3277	std	%i2, [%sp + (5*8)]
3278	std	%i4, [%sp + (6*8)]
3279	std	%i6, [%sp + (7*8)]
3280	restore
3281	RETT
3282#else
3283	/*
3284	 * This is similar to TRAP_SETUP, but we do not want to spend
3285	 * a lot of time, so we have separate paths for kernel and user.
3286	 * We also know for sure that the window has overflowed.
3287	 */
3288	TRAP_TRACE2(5,%l6,%l5)
3289	btst	PSR_PS, %l0
3290	bz	winof_user
3291	 sethi	%hi(clean_trap_window), %l7
3292
3293	/*
3294	 * Overflow from kernel mode.  Call clean_trap_window to
3295	 * do the dirty work, then just return, since we know prev
3296	 * window is valid.  clean_trap_windows might dump all *user*
3297	 * windows into the pcb, but we do not care: there is at
3298	 * least one kernel window (a trap or interrupt frame!)
3299	 * above us.
3300	 */
3301	jmpl	%l7 + %lo(clean_trap_window), %l4
3302	 mov	%g7, %l7		! for clean_trap_window
3303
3304	wr	%l0, 0, %psr		! put back the @%*! cond. codes
3305	nop				! (let them settle in)
3306	RETT
3307
3308winof_user:
3309	/*
3310	 * Overflow from user mode.
3311	 * If clean_trap_window dumps the registers into the pcb,
3312	 * rft_user will need to call trap(), so we need space for
3313	 * a trap frame.  We also have to compute pcb_nw.
3314	 *
3315	 * SHOULD EXPAND IN LINE TO AVOID BUILDING TRAP FRAME ON
3316	 * `EASY' SAVES
3317	 */
3318	sethi	%hi(cpcb), %l6
3319	ld	[%l6 + %lo(cpcb)], %l6
3320	ld	[%l6 + PCB_WIM], %l5
3321	and	%l0, 31, %l3
3322	sub	%l3, %l5, %l5 		/* l5 = CWP - pcb_wim */
3323	set	uwtab, %l4
3324	ldub	[%l4 + %l5], %l5	/* l5 = uwtab[l5] */
3325	st	%l5, [%l6 + PCB_UW]
3326	jmpl	%l7 + %lo(clean_trap_window), %l4
3327	 mov	%g7, %l7		! for clean_trap_window
3328	sethi	%hi(cpcb), %l6
3329	ld	[%l6 + %lo(cpcb)], %l6
3330	set	USPACE-CCFSZ-80, %l5
3331	add	%l6, %l5, %sp		/* over to kernel stack */
3332	CHECK_SP_REDZONE(%l6, %l5)
3333
3334	/*
3335	 * Copy return_from_trap far enough to allow us
3336	 * to jump directly to rft_user_or_recover_pcb_windows
3337	 * (since we know that is where we are headed).
3338	 */
3339!	and	%l0, 31, %l3		! still set (clean_trap_window
3340					! leaves this register alone)
3341	set	wmask, %l6
3342	ldub	[%l6 + %l3], %l5	! %l5 = 1 << ((CWP + 1) % nwindows)
3343	b	rft_user_or_recover_pcb_windows
3344	 rd	%wim, %l4		! (read %wim first)
3345#endif /* end `real' version of window overflow trap handler */
3346
3347/*
3348 * Window underflow trap handler.
3349 *	%l0 = %psr
3350 *	%l1 = return pc
3351 *	%l2 = return npc
3352 *
3353 * A picture:
3354 *
3355 *	  T R I X
3356 *	0 0 0 1 0 0 0	(%wim)
3357 * [bit numbers increase towards the right;
3358 * `restore' moves right & `save' moves left]
3359 *
3360 * T is the current (Trap) window, R is the window that attempted
3361 * a `Restore' instruction, I is the Invalid window, and X is the
3362 * window we want to make invalid before we return.
3363 *
3364 * Since window R is valid, we cannot use rft_user to restore stuff
3365 * for us.  We have to duplicate its logic.  YUCK.
3366 *
3367 * Incidentally, TRIX are for kids.  Silly rabbit!
3368 */
3369window_uf:
3370#ifdef TRIVIAL_WINDOW_UNDERFLOW_HANDLER
3371	wr	%g0, 0, %wim		! allow us to enter I
3372	restore				! to R
3373	nop
3374	nop
3375	restore				! to I
3376	restore	%g0, 1, %l1		! to X
3377	rd	%psr, %l0
3378	sll	%l1, %l0, %l0
3379	wr	%l0, 0, %wim
3380	save	%g0, %g0, %g0		! back to I
3381	LOADWIN(%sp)
3382	save	%g0, %g0, %g0		! back to R
3383	save	%g0, %g0, %g0		! back to T
3384	RETT
3385#else
3386	TRAP_TRACE2(6,%l6,%l5)
3387	wr	%g0, 0, %wim		! allow us to enter I
3388	btst	PSR_PS, %l0
3389	restore				! enter window R
3390	bz	winuf_user
3391	 restore			! enter window I
3392
3393	/*
3394	 * Underflow from kernel mode.  Just recover the
3395	 * registers and go (except that we have to update
3396	 * the blasted user pcb fields).
3397	 */
3398	restore	%g0, 1, %l1		! enter window X, then set %l1 to 1
3399	rd	%psr, %l0		! cwp = %psr & 31;
3400	and	%l0, 31, %l0
3401	sll	%l1, %l0, %l1		! wim = 1 << cwp;
3402	wr	%l1, 0, %wim		! setwim(wim);
3403	sethi	%hi(cpcb), %l1
3404	ld	[%l1 + %lo(cpcb)], %l1
3405	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = cwp;
3406	save	%g0, %g0, %g0		! back to window I
3407	LOADWIN(%sp)
3408	save	%g0, %g0, %g0		! back to R
3409	save	%g0, %g0, %g0		! and then to T
3410	wr	%l0, 0, %psr		! fix those cond codes....
3411	nop				! (let them settle in)
3412	RETT
3413
3414winuf_user:
3415	/*
3416	 * Underflow from user mode.
3417	 *
3418	 * We cannot use rft_user (as noted above) because
3419	 * we must re-execute the `restore' instruction.
3420	 * Since it could be, e.g., `restore %l0,0,%l0',
3421	 * it is not okay to touch R's registers either.
3422	 *
3423	 * We are now in window I.
3424	 */
3425	btst	7, %sp			! if unaligned, it is invalid
3426	bne	winuf_invalid
3427	 .empty
3428
3429	sethi	%hi(_C_LABEL(pgofset)), %l4
3430	ld	[%l4 + %lo(_C_LABEL(pgofset))], %l4
3431	PTE_OF_ADDR(%sp, %l7, winuf_invalid, %l4, NOP_ON_4M_5)
3432	CMP_PTE_USER_READ(%l7, %l5, NOP_ON_4M_6) ! if first page not readable,
3433	bne	winuf_invalid		! it is invalid
3434	 .empty
3435	SLT_IF_1PAGE_RW(%sp, %l7, %l4)	! first page is readable
3436	bl,a	winuf_ok		! if only one page, enter window X
3437	 restore %g0, 1, %l1		! and goto ok, & set %l1 to 1
3438	add	%sp, 7*8, %l5
3439	add     %l4, 62, %l4
3440	PTE_OF_ADDR(%l5, %l7, winuf_invalid, %l4, NOP_ON_4M_7)
3441	CMP_PTE_USER_READ(%l7, %l5, NOP_ON_4M_8) ! check second page too
3442	be,a	winuf_ok		! enter window X and goto ok
3443	 restore %g0, 1, %l1		! (and then set %l1 to 1)
3444
3445winuf_invalid:
3446	/*
3447	 * We were unable to restore the window because %sp
3448	 * is invalid or paged out.  Return to the trap window
3449	 * and call trap(T_WINUF).  This will save R to the user
3450	 * stack, then load both R and I into the pcb rw[] area,
3451	 * and return with pcb_nsaved set to -1 for success, 0 for
3452	 * failure.  `Failure' indicates that someone goofed with the
3453	 * trap registers (e.g., signals), so that we need to return
3454	 * from the trap as from a syscall (probably to a signal handler)
3455	 * and let it retry the restore instruction later.  Note that
3456	 * window R will have been pushed out to user space, and thus
3457	 * be the invalid window, by the time we get back here.  (We
3458	 * continue to label it R anyway.)  We must also set %wim again,
3459	 * and set pcb_uw to 1, before enabling traps.  (Window R is the
3460	 * only window, and it is a user window).
3461	 */
3462	save	%g0, %g0, %g0		! back to R
3463	save	%g0, 1, %l4		! back to T, then %l4 = 1
3464	sethi	%hi(cpcb), %l6
3465	ld	[%l6 + %lo(cpcb)], %l6
3466	st	%l4, [%l6 + PCB_UW]	! pcb_uw = 1
3467	ld	[%l6 + PCB_WIM], %l5	! get log2(%wim)
3468	sll	%l4, %l5, %l4		! %l4 = old %wim
3469	wr	%l4, 0, %wim		! window I is now invalid again
3470	set	USPACE-CCFSZ-80, %l5
3471	add	%l6, %l5, %sp		! get onto kernel stack
3472	CHECK_SP_REDZONE(%l6, %l5)
3473
3474	/*
3475	 * Okay, call trap(T_WINUF, psr, pc, &tf).
3476	 * See `slowtrap' above for operation.
3477	 */
3478	wr	%l0, PSR_ET, %psr
3479	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
3480	rd	%y, %l3
3481	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
3482	mov	T_WINUF, %o0
3483	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
3484	mov	%l0, %o1
3485	std	%g2, [%sp + CCFSZ + 24]	! etc
3486	mov	%l1, %o2
3487	std	%g4, [%sp + CCFSZ + 32]
3488	add	%sp, CCFSZ, %o3
3489	std	%g6, [%sp + CCFSZ + 40]
3490	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
3491	std	%i2, [%sp + CCFSZ + 56]
3492	std	%i4, [%sp + CCFSZ + 64]
3493	call	_C_LABEL(trap)		! trap(T_WINUF, pc, psr, &tf)
3494	 std	%i6, [%sp + CCFSZ + 72]	! tf.tf_out[6]
3495
3496	ldd	[%sp + CCFSZ + 0], %l0	! new psr, pc
3497	ldd	[%sp + CCFSZ + 8], %l2	! new npc, %y
3498	wr	%l3, 0, %y
3499	ld	[%sp + CCFSZ + 20], %g1
3500	ldd	[%sp + CCFSZ + 24], %g2
3501	ldd	[%sp + CCFSZ + 32], %g4
3502	ldd	[%sp + CCFSZ + 40], %g6
3503	ldd	[%sp + CCFSZ + 48], %i0	! %o0 for window R, etc
3504	ldd	[%sp + CCFSZ + 56], %i2
3505	ldd	[%sp + CCFSZ + 64], %i4
3506	wr	%l0, 0, %psr		! disable traps: test must be atomic
3507	ldd	[%sp + CCFSZ + 72], %i6
3508	sethi	%hi(cpcb), %l6
3509	ld	[%l6 + %lo(cpcb)], %l6
3510	ld	[%l6 + PCB_NSAVED], %l7	! if nsaved is -1, we have our regs
3511	tst	%l7
3512	bl,a	1f			! got them
3513	 wr	%g0, 0, %wim		! allow us to enter windows R, I
3514	b,a	return_from_trap
3515
3516	/*
3517	 * Got 'em.  Load 'em up.
3518	 */
35191:
3520	mov	%g6, %l3		! save %g6; set %g6 = cpcb
3521	mov	%l6, %g6
3522	st	%g0, [%g6 + PCB_NSAVED]	! and clear magic flag
3523	restore				! from T to R
3524	restore				! from R to I
3525	restore	%g0, 1, %l1		! from I to X, then %l1 = 1
3526	rd	%psr, %l0		! cwp = %psr;
3527	sll	%l1, %l0, %l1
3528	wr	%l1, 0, %wim		! make window X invalid
3529	and	%l0, 31, %l0
3530	st	%l0, [%g6 + PCB_WIM]	! cpcb->pcb_wim = cwp;
3531	nop				! unnecessary? old wim was 0...
3532	save	%g0, %g0, %g0		! back to I
3533	LOADWIN(%g6 + PCB_RW + 64)	! load from rw[1]
3534	save	%g0, %g0, %g0		! back to R
3535	LOADWIN(%g6 + PCB_RW)		! load from rw[0]
3536	save	%g0, %g0, %g0		! back to T
3537	wr	%l0, 0, %psr		! restore condition codes
3538	mov	%l3, %g6		! fix %g6
3539	RETT
3540
3541	/*
3542	 * Restoring from user stack, but everything has checked out
3543	 * as good.  We are now in window X, and %l1 = 1.  Window R
3544	 * is still valid and holds user values.
3545	 */
3546winuf_ok:
3547	rd	%psr, %l0
3548	sll	%l1, %l0, %l1
3549	wr	%l1, 0, %wim		! make this one invalid
3550	sethi	%hi(cpcb), %l2
3551	ld	[%l2 + %lo(cpcb)], %l2
3552	and	%l0, 31, %l0
3553	st	%l0, [%l2 + PCB_WIM]	! cpcb->pcb_wim = cwp;
3554	save	%g0, %g0, %g0		! back to I
3555	LOADWIN(%sp)
3556	save	%g0, %g0, %g0		! back to R
3557	save	%g0, %g0, %g0		! back to T
3558	wr	%l0, 0, %psr		! restore condition codes
3559	nop				! it takes three to tangle
3560	RETT
3561#endif /* end `real' version of window underflow trap handler */
3562
3563/*
3564 * Various return-from-trap routines (see return_from_trap).
3565 */
3566
3567/*
3568 * Return from trap, to kernel.
3569 *	%l0 = %psr
3570 *	%l1 = return pc
3571 *	%l2 = return npc
3572 *	%l4 = %wim
3573 *	%l5 = bit for previous window
3574 */
3575rft_kernel:
3576	btst	%l5, %l4		! if (wim & l5)
3577	bnz	1f			!	goto reload;
3578	 wr	%l0, 0, %psr		! but first put !@#*% cond codes back
3579
3580	/* previous window is valid; just rett */
3581	nop				! wait for cond codes to settle in
3582	RETT
3583
3584	/*
3585	 * Previous window is invalid.
3586	 * Update %wim and then reload l0..i7 from frame.
3587	 *
3588	 *	  T I X
3589	 *	0 0 1 0 0   (%wim)
3590	 * [see picture in window_uf handler]
3591	 *
3592	 * T is the current (Trap) window, I is the Invalid window,
3593	 * and X is the window we want to make invalid.  Window X
3594	 * currently has no useful values.
3595	 */
35961:
3597	wr	%g0, 0, %wim		! allow us to enter window I
3598	nop; nop; nop			! (it takes a while)
3599	restore				! enter window I
3600	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
3601	rd	%psr, %l0		! CWP = %psr & 31;
3602	and	%l0, 31, %l0
3603	sll	%l1, %l0, %l1		! wim = 1 << CWP;
3604	wr	%l1, 0, %wim		! setwim(wim);
3605	sethi	%hi(cpcb), %l1
3606	ld	[%l1 + %lo(cpcb)], %l1
3607	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = l0 & 31;
3608	save	%g0, %g0, %g0		! back to window I
3609	LOADWIN(%sp)
3610	save	%g0, %g0, %g0		! back to window T
3611	/*
3612	 * Note that the condition codes are still set from
3613	 * the code at rft_kernel; we can simply return.
3614	 */
3615	RETT
3616
3617/*
3618 * Return from trap, to user.  Checks for scheduling trap (`ast') first;
3619 * will re-enter trap() if set.  Note that we may have to switch from
3620 * the interrupt stack to the kernel stack in this case.
3621 *	%l0 = %psr
3622 *	%l1 = return pc
3623 *	%l2 = return npc
3624 *	%l4 = %wim
3625 *	%l5 = bit for previous window
3626 *	%l6 = cpcb
3627 * If returning to a valid window, just set psr and return.
3628 */
3629rft_user:
3630!	sethi	%hi(_WANT_AST)), %l7	! (done below)
3631	ld	[%l7 + %lo(_WANT_AST)], %l7
3632	tst	%l7			! want AST trap?
3633	bne,a	softtrap		! yes, re-enter trap with type T_AST
3634	 mov	T_AST, %o0
3635
3636	btst	%l5, %l4		! if (wim & l5)
3637	bnz	1f			!	goto reload;
3638	 wr	%l0, 0, %psr		! restore cond codes
3639	nop				! (three instruction delay)
3640	RETT
3641
3642	/*
3643	 * Previous window is invalid.
3644	 * Before we try to load it, we must verify its stack pointer.
3645	 * This is much like the underflow handler, but a bit easier
3646	 * since we can use our own local registers.
3647	 */
36481:
3649	btst	7, %fp			! if unaligned, address is invalid
3650	bne	rft_invalid
3651	 .empty
3652
3653	sethi	%hi(_C_LABEL(pgofset)), %l3
3654	ld	[%l3 + %lo(_C_LABEL(pgofset))], %l3
3655	PTE_OF_ADDR(%fp, %l7, rft_invalid, %l3, NOP_ON_4M_9)
3656	CMP_PTE_USER_READ(%l7, %l5, NOP_ON_4M_10)	! try first page
3657	bne	rft_invalid		! no good
3658	 .empty
3659	SLT_IF_1PAGE_RW(%fp, %l7, %l3)
3660	bl,a	rft_user_ok		! only 1 page: ok
3661	 wr	%g0, 0, %wim
3662	add	%fp, 7*8, %l5
3663	add	%l3, 62, %l3
3664	PTE_OF_ADDR(%l5, %l7, rft_invalid, %l3, NOP_ON_4M_11)
3665	CMP_PTE_USER_READ(%l7, %l5, NOP_ON_4M_12)	! check 2nd page too
3666	be,a	rft_user_ok
3667	 wr	%g0, 0, %wim
3668
3669	/*
3670	 * The window we wanted to pull could not be pulled.  Instead,
3671	 * re-enter trap with type T_RWRET.  This will pull the window
3672	 * into cpcb->pcb_rw[0] and set cpcb->pcb_nsaved to -1, which we
3673	 * will detect when we try to return again.
3674	 */
3675rft_invalid:
3676	b	softtrap
3677	 mov	T_RWRET, %o0
3678
3679	/*
3680	 * The window we want to pull can be pulled directly.
3681	 */
3682rft_user_ok:
3683!	wr	%g0, 0, %wim		! allow us to get into it
3684	wr	%l0, 0, %psr		! fix up the cond codes now
3685	nop; nop; nop
3686	restore				! enter window I
3687	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
3688	rd	%psr, %l0		! l0 = (junk << 5) + CWP;
3689	sll	%l1, %l0, %l1		! %wim = 1 << CWP;
3690	wr	%l1, 0, %wim
3691	sethi	%hi(cpcb), %l1
3692	ld	[%l1 + %lo(cpcb)], %l1
3693	and	%l0, 31, %l0
3694	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = l0 & 31;
3695	save	%g0, %g0, %g0		! back to window I
3696	LOADWIN(%sp)			! suck hard
3697	save	%g0, %g0, %g0		! back to window T
3698	RETT
3699
3700/*
3701 * Return from trap.  Entered after a
3702 *	wr	%l0, 0, %psr
3703 * which disables traps so that we can rett; registers are:
3704 *
3705 *	%l0 = %psr
3706 *	%l1 = return pc
3707 *	%l2 = return npc
3708 *
3709 * (%l3..%l7 anything).
3710 *
3711 * If we are returning to user code, we must:
3712 *  1.  Check for register windows in the pcb that belong on the stack.
3713 *	If there are any, reenter trap with type T_WINOF.
3714 *  2.  Make sure the register windows will not underflow.  This is
3715 *	much easier in kernel mode....
3716 */
3717return_from_trap:
3718!	wr	%l0, 0, %psr		! disable traps so we can rett
3719! (someone else did this already)
3720	and	%l0, 31, %l5
3721	set	wmask, %l6
3722	ldub	[%l6 + %l5], %l5	! %l5 = 1 << ((CWP + 1) % nwindows)
3723	btst	PSR_PS, %l0		! returning to userland?
3724	bnz	rft_kernel		! no, go return to kernel
3725	 rd	%wim, %l4		! (read %wim in any case)
3726
3727rft_user_or_recover_pcb_windows:
3728	/*
3729	 * (entered with %l4=%wim, %l5=wmask[cwp]; %l0..%l2 as usual)
3730	 *
3731	 * check cpcb->pcb_nsaved:
3732	 * if 0, do a `normal' return to user (see rft_user);
3733	 * if > 0, cpcb->pcb_rw[] holds registers to be copied to stack;
3734	 * if -1, cpcb->pcb_rw[0] holds user registers for rett window
3735	 * from an earlier T_RWRET pseudo-trap.
3736	 */
3737	sethi	%hi(cpcb), %l6
3738	ld	[%l6 + %lo(cpcb)], %l6
3739	ld	[%l6 + PCB_NSAVED], %l7
3740	tst	%l7
3741	bz,a	rft_user
3742	 sethi	%hi(_WANT_AST), %l7	! first instr of rft_user
3743
3744	bg,a	softtrap		! if (pcb_nsaved > 0)
3745	 mov	T_WINOF, %o0		!	trap(T_WINOF);
3746
3747	/*
3748	 * To get here, we must have tried to return from a previous
3749	 * trap and discovered that it would cause a window underflow.
3750	 * We then must have tried to pull the registers out of the
3751	 * user stack (from the address in %fp==%i6) and discovered
3752	 * that it was either unaligned or not loaded in memory, and
3753	 * therefore we ran a trap(T_RWRET), which loaded one set of
3754	 * registers into cpcb->pcb_pcb_rw[0] (if it had killed the
3755	 * process due to a bad stack, we would not be here).
3756	 *
3757	 * We want to load pcb_rw[0] into the previous window, which
3758	 * we know is currently invalid.  In other words, we want
3759	 * %wim to be 1 << ((cwp + 2) % nwindows).
3760	 */
3761	wr	%g0, 0, %wim		! enable restores
3762	mov	%g6, %l3		! save g6 in l3
3763	mov	%l6, %g6		! set g6 = &u
3764	st	%g0, [%g6 + PCB_NSAVED]	! clear cpcb->pcb_nsaved
3765	restore				! enter window I
3766	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
3767	rd	%psr, %l0
3768	sll	%l1, %l0, %l1		! %wim = 1 << CWP;
3769	wr	%l1, 0, %wim
3770	and	%l0, 31, %l0
3771	st	%l0, [%g6 + PCB_WIM]	! cpcb->pcb_wim = CWP;
3772	nop				! unnecessary? old wim was 0...
3773	save	%g0, %g0, %g0		! back to window I
3774	LOADWIN(%g6 + PCB_RW)
3775	save	%g0, %g0, %g0		! back to window T (trap window)
3776	wr	%l0, 0, %psr		! cond codes, cond codes everywhere
3777	mov	%l3, %g6		! restore g6
3778	RETT
3779
3780! exported end marker for kernel gdb
3781	.globl	_C_LABEL(endtrapcode)
3782_C_LABEL(endtrapcode):
3783
3784/*
3785 * init_tables(nwin) int nwin;
3786 *
3787 * Set up the uwtab and wmask tables.
3788 * We know nwin > 1.
3789 */
3790init_tables:
3791	/*
3792	 * for (i = -nwin, j = nwin - 2; ++i < 0; j--)
3793	 *	uwtab[i] = j;
3794	 * (loop runs at least once)
3795	 */
3796	set	uwtab, %o3
3797	sub	%g0, %o0, %o1		! i = -nwin + 1
3798	inc	%o1
3799	add	%o0, -2, %o2		! j = nwin - 2;
38000:
3801	stb	%o2, [%o3 + %o1]	! uwtab[i] = j;
38021:
3803	inccc	%o1			! ++i < 0?
3804	bl	0b			! yes, continue loop
3805	 dec	%o2			! in any case, j--
3806
3807	/*
3808	 * (i now equals 0)
3809	 * for (j = nwin - 1; i < nwin; i++, j--)
3810	 *	uwtab[i] = j;
3811	 * (loop runs at least twice)
3812	 */
3813	sub	%o0, 1, %o2		! j = nwin - 1
38140:
3815	stb	%o2, [%o3 + %o1]	! uwtab[i] = j
3816	inc	%o1			! i++
38171:
3818	cmp	%o1, %o0		! i < nwin?
3819	bl	0b			! yes, continue
3820	 dec	%o2			! in any case, j--
3821
3822	/*
3823	 * We observe that, for i in 0..nwin-2, (i+1)%nwin == i+1;
3824	 * for i==nwin-1, (i+1)%nwin == 0.
3825	 * To avoid adding 1, we run i from 1 to nwin and set
3826	 * wmask[i-1].
3827	 *
3828	 * for (i = j = 1; i < nwin; i++) {
3829	 *	j <<= 1;	(j now == 1 << i)
3830	 *	wmask[i - 1] = j;
3831	 * }
3832	 * (loop runs at least once)
3833	 */
3834	set	wmask - 1, %o3
3835	mov	1, %o1			! i = 1;
3836	mov	2, %o2			! j = 2;
38370:
3838	stb	%o2, [%o3 + %o1]	! (wmask - 1)[i] = j;
3839	inc	%o1			! i++
3840	cmp	%o1, %o0		! i < nwin?
3841	bl,a	0b			! yes, continue
3842	 sll	%o2, 1, %o2		! (and j <<= 1)
3843
3844	/*
3845	 * Now i==nwin, so we want wmask[i-1] = 1.
3846	 */
3847	mov	1, %o2			! j = 1;
3848	retl
3849	 stb	%o2, [%o3 + %o1]	! (wmask - 1)[i] = j;
3850
3851
3852dostart:
3853	/*
3854	 * Startup.
3855	 *
3856	 * We may have been loaded in low RAM, at some address which
3857	 * is page aligned (PROM_LOADADDR actually) rather than where we
3858	 * want to run (KERNBASE+PROM_LOADADDR).  Until we get everything set,
3859	 * we have to be sure to use only pc-relative addressing.
3860	 */
3861
3862	/*
3863	 * Find out if the above is the case.
3864	 */
38650:	call	1f
3866	 sethi	%hi(0b), %l0		! %l0 = virtual address of 0:
38671:	or	%l0, %lo(0b), %l0
3868	sub	%l0, %o7, %l7		! subtract actual physical address of 0:
3869
3870	/*
3871	 * If we're already running at our desired virtual load address,
3872	 * %l7 will be set to 0, otherwise it will be KERNBASE.
3873	 * From now on until the end of locore bootstrap code, %l7 will
3874	 * be used to relocate memory references.
3875	 */
3876#define RELOCATE(l,r)		\
3877	set	l, r;		\
3878	sub	r, %l7, r
3879
3880	/*
3881	 * We use the bootinfo method to pass arguments, and the new
3882	 * magic number indicates that. A pointer to the kernel top, i.e.
3883	 * the first address after the load kernel image (including DDB
3884	 * symbols, if any) is passed in %o4[0] and the bootinfo structure
3885	 * is passed in %o4[1].
3886	 *
3887	 * A magic number is passed in %o5 to allow for bootloaders
3888	 * that know nothing about the bootinfo structure or previous
3889	 * DDB symbol loading conventions.
3890	 *
3891	 * For compatibility with older versions, we check for DDB arguments
3892	 * if the older magic number is there. The loader passes `kernel_top'
3893	 * (previously known as `esym') in %o4.
3894	 *
3895	 * Note: we don't touch %o1-%o3; SunOS bootloaders seem to use them
3896	 * for their own mirky business.
3897	 *
3898	 * Pre-NetBSD 1.3 bootblocks had KERNBASE compiled in, and used it
3899	 * to compute the value of `kernel_top' (previously known as `esym').
3900	 * In order to successfully boot a kernel built with a different value
3901	 * for KERNBASE using old bootblocks, we fixup `kernel_top' here by
3902	 * the difference between KERNBASE and the old value (known to be
3903	 * 0xf8000000) compiled into pre-1.3 bootblocks.
3904	 */
3905
3906	set	0x44444232, %l3		! bootinfo magic
3907	cmp	%o5, %l3
3908	bne	1f
3909	 nop
3910
3911	/* The loader has passed to us a `bootinfo' structure */
3912	ld	[%o4], %l3		! 1st word is kernel_top
3913	add	%l3, %l7, %o5		! relocate: + KERNBASE
3914	RELOCATE(_C_LABEL(kernel_top),%l3)
3915	st	%o5, [%l3]		! and store it
3916
3917	ld	[%o4 + 4], %l3		! 2nd word is bootinfo
3918	add	%l3, %l7, %o5		! relocate
3919	RELOCATE(_C_LABEL(bootinfo),%l3)
3920	st	%o5, [%l3]		! store bootinfo
3921	b,a	4f
3922
39231:
3924#ifdef DDB
3925	/* Check for old-style DDB loader magic */
3926	set	KERNBASE, %l4
3927	set	0x44444231, %l3		! Is it DDB_MAGIC1?
3928	cmp	%o5, %l3
3929	be,a	2f
3930	 clr	%l4			! if DDB_MAGIC1, clear %l4
3931
3932	set	0x44444230, %l3		! Is it DDB_MAGIC0?
3933	cmp	%o5, %l3		! if so, need to relocate %o4
3934	bne	3f			/* if not, there's no bootloader info */
3935
3936					! note: %l4 set to KERNBASE above.
3937	set	0xf8000000, %l5		! compute correction term:
3938	sub	%l5, %l4, %l4		!  old KERNBASE (0xf8000000 ) - KERNBASE
3939
39402:
3941	tst	%o4			! do we have the symbols?
3942	bz	3f
3943	 sub	%o4, %l4, %o4		! apply compat correction
3944	sethi	%hi(_C_LABEL(kernel_top) - KERNBASE), %l3 ! and store it
3945	st	%o4, [%l3 + %lo(_C_LABEL(kernel_top) - KERNBASE)]
3946	b,a	4f
39473:
3948#endif
3949	/*
3950	 * The boot loader did not pass in a value for `kernel_top';
3951	 * let it default to `end'.
3952	 */
3953	set	end, %o4
3954	RELOCATE(_C_LABEL(kernel_top),%l3)
3955	st	%o4, [%l3]	! store kernel_top
3956
39574:
3958
3959	/*
3960	 * Sun4 passes in the `load address'.  Although possible, its highly
3961	 * unlikely that OpenBoot would place the prom vector there.
3962	 */
3963	set	PROM_LOADADDR, %g7
3964	cmp	%o0, %g7
3965	be	is_sun4
3966	 nop
3967
3968#if defined(SUN4C) || defined(SUN4M) || defined(SUN4D)
3969	/*
3970	 * Be prepared to get OF client entry in either %o0 or %o3.
3971	 * XXX Will this ever trip on sun4d?  Let's hope not!
3972	 */
3973	cmp	%o0, 0
3974	be	is_openfirm
3975	 nop
3976
3977	mov	%o0, %g7		! save romp passed by boot code
3978
3979	/* First, check `romp->pv_magic' */
3980	ld	[%g7 + PV_MAGIC], %o0	! v = pv->pv_magic
3981	set	OBP_MAGIC, %o1
3982	cmp	%o0, %o1		! if ( v != OBP_MAGIC) {
3983	bne	is_sun4m		!    assume this is an OPENFIRM machine
3984	 nop				! }
3985
3986	/*
3987	 * are we on a sun4c or a sun4m or a sun4d?
3988	 */
3989	ld	[%g7 + PV_NODEOPS], %o4	! node = pv->pv_nodeops->no_nextnode(0)
3990	ld	[%o4 + NO_NEXTNODE], %o4
3991	call	%o4
3992	 mov	0, %o0			! node
3993
3994	!mov	%o0, %l0
3995	RELOCATE(cputypvar,%o1)		! name = "compatible"
3996	RELOCATE(cputypval,%l2)		! buffer ptr (assume buffer long enough)
3997	ld	[%g7 + PV_NODEOPS], %o4	! (void)pv->pv_nodeops->no_getprop(...)
3998	ld	[%o4 + NO_GETPROP], %o4
3999	call	 %o4
4000	 mov	%l2, %o2
4001	!set	cputypval-KERNBASE, %o2	! buffer ptr
4002	ldub	[%l2 + 4], %o0		! which is it... "sun4c", "sun4m", "sun4d"?
4003	cmp	%o0, 'c'
4004	be	is_sun4c
4005	 nop
4006	cmp	%o0, 'm'
4007	be	is_sun4m
4008	 nop
4009	cmp	%o0, 'd'
4010	be	is_sun4d
4011	 nop
4012#endif /* SUN4C || SUN4M || SUN4D */
4013
4014	/*
4015	 * Don't know what type of machine this is; just halt back
4016	 * out to the PROM.
4017	 */
4018	ld	[%g7 + PV_HALT], %o1	! by this kernel, then halt
4019	call	%o1
4020	 nop
4021
4022is_openfirm:
4023	! OF client entry in %o3 (kernel booted directly by PROM?)
4024	mov	%o3, %g7
4025	/* FALLTHROUGH to sun4m case */
4026
4027is_sun4m:
4028#if defined(SUN4M)
4029	set	trapbase_sun4m, %g6
4030	mov	SUN4CM_PGSHIFT, %g5
4031	b	start_havetype
4032	 mov	CPU_SUN4M, %g4
4033#else
4034	RELOCATE(sun4m_notsup,%o0)
4035	ld	[%g7 + PV_EVAL], %o1
4036	call	%o1			! print a message saying that the
4037	 nop				! sun4m architecture is not supported
4038	ld	[%g7 + PV_HALT], %o1	! by this kernel, then halt
4039	call	%o1
4040	 nop
4041	/*NOTREACHED*/
4042#endif
4043is_sun4d:
4044#if defined(SUN4D)
4045	set	trapbase_sun4m, %g6	/* XXXJRT trapbase_sun4d */
4046	mov	SUN4CM_PGSHIFT, %g5
4047	b	start_havetype
4048	 mov	CPU_SUN4D, %g4
4049#else
4050	RELOCATE(sun4d_notsup,%o0)
4051	ld	[%g7 + PV_EVAL], %o1
4052	call	%o1			! print a message saying that the
4053	 nop				! sun4d architecture is not supported
4054	ld	[%g7 + PV_HALT], %o1	! by this kernel, then halt
4055	call	%o1
4056	 nop
4057	/*NOTREACHED*/
4058#endif
4059is_sun4c:
4060#if defined(SUN4C)
4061	set	trapbase_sun4c, %g6
4062	mov	SUN4CM_PGSHIFT, %g5
4063
4064	set	AC_CONTEXT, %g1		! paranoia: set context to kernel
4065	stba	%g0, [%g1] ASI_CONTROL
4066
4067	b	start_havetype
4068	 mov	CPU_SUN4C, %g4		! XXX CPU_SUN4
4069#else
4070	RELOCATE(sun4c_notsup,%o0)
4071
4072	ld	[%g7 + PV_ROMVEC_VERS], %o1
4073	cmp	%o1, 0
4074	bne	1f
4075	 nop
4076
4077	! stupid version 0 rom interface is pv_eval(int length, char *string)
4078	mov	%o0, %o1
40792:	ldub	[%o0], %o4
4080	tst	%o4
4081	bne	2b
4082	 inc	%o0
4083	dec	%o0
4084	sub	%o0, %o1, %o0
4085
40861:	ld	[%g7 + PV_EVAL], %o2
4087	call	%o2			! print a message saying that the
4088	 nop				! sun4c architecture is not supported
4089	ld	[%g7 + PV_HALT], %o1	! by this kernel, then halt
4090	call	%o1
4091	 nop
4092	/*NOTREACHED*/
4093#endif
4094is_sun4:
4095#if defined(SUN4)
4096	set	trapbase_sun4, %g6
4097	mov	SUN4_PGSHIFT, %g5
4098
4099	set	AC_CONTEXT, %g1		! paranoia: set context to kernel
4100	stba	%g0, [%g1] ASI_CONTROL
4101
4102	b	start_havetype
4103	 mov	CPU_SUN4, %g4
4104#else
4105	set	PROM_BASE, %g7
4106
4107	RELOCATE(sun4_notsup,%o0)
4108	ld	[%g7 + OLDMON_PRINTF], %o1
4109	call	%o1			! print a message saying that the
4110	 nop				! sun4 architecture is not supported
4111	ld	[%g7 + OLDMON_HALT], %o1 ! by this kernel, then halt
4112	call	%o1
4113	 nop
4114	/*NOTREACHED*/
4115#endif
4116
4117start_havetype:
4118	cmp	%l7, 0
4119	be	startmap_done
4120
4121	/*
4122	 * Step 1: double map low RAM (addresses [0.._end-start-1])
4123	 * to KERNBASE (addresses [KERNBASE.._end-1]).  None of these
4124	 * are `bad' aliases (since they are all on segment boundaries)
4125	 * so we do not have to worry about cache aliasing.
4126	 *
4127	 * We map in another couple of segments just to have some
4128	 * more memory (512K, actually) guaranteed available for
4129	 * bootstrap code (pmap_bootstrap needs memory to hold MMU
4130	 * and context data structures). Note: this is only relevant
4131	 * for 2-level MMU sun4/sun4c machines.
4132	 */
4133	clr	%l0			! lowva
4134	set	KERNBASE, %l1		! highva
4135
4136	sethi	%hi(_C_LABEL(kernel_top) - KERNBASE), %o0
4137	ld	[%o0 + %lo(_C_LABEL(kernel_top) - KERNBASE)], %o1
4138	set	(2 << 18), %o2		! add slack for sun4c MMU
4139	add	%o1, %o2, %l2		! last va that must be remapped
4140
4141	/*
4142	 * Need different initial mapping functions for different
4143	 * types of machines.
4144	 */
4145#if defined(SUN4C)
4146	cmp	%g4, CPU_SUN4C
4147	bne	1f
4148	 set	1 << 18, %l3		! segment size in bytes
41490:
4150	lduba	[%l0] ASI_SEGMAP, %l4	! segmap[highva] = segmap[lowva];
4151	stba	%l4, [%l1] ASI_SEGMAP
4152	add	%l3, %l1, %l1		! highva += segsiz;
4153	cmp	%l1, %l2		! done?
4154	blu	0b			! no, loop
4155	 add	%l3, %l0, %l0		! (and lowva += segsz)
4156	b,a	startmap_done
41571:
4158#endif /* SUN4C */
4159
4160#if defined(SUN4)
4161	cmp	%g4, CPU_SUN4
4162	bne	2f
4163#if defined(SUN4_MMU3L)
4164	set	AC_IDPROM+1, %l3
4165	lduba	[%l3] ASI_CONTROL, %l3
4166	cmp	%l3, 0x24 ! XXX - SUN4_400
4167	bne	no_3mmu
4168	 nop
4169
4170	/*
4171	 * Three-level sun4 MMU.
4172	 * Double-map by duplicating a single region entry (which covers
4173	 * 16MB) corresponding to the kernel's virtual load address.
4174	 */
4175	add	%l0, 2, %l0		! get to proper half-word in RG space
4176	add	%l1, 2, %l1
4177	lduha	[%l0] ASI_REGMAP, %l4	! regmap[highva] = regmap[lowva];
4178	stha	%l4, [%l1] ASI_REGMAP
4179	b,a	startmap_done
4180no_3mmu:
4181#endif
4182
4183	/*
4184	 * Three-level sun4 MMU.
4185	 * Double-map by duplicating the required number of segment
4186	 * entries corresponding to the kernel's virtual load address.
4187	 */
4188	set	1 << 18, %l3		! segment size in bytes
41890:
4190	lduha	[%l0] ASI_SEGMAP, %l4	! segmap[highva] = segmap[lowva];
4191	stha	%l4, [%l1] ASI_SEGMAP
4192	add	%l3, %l1, %l1		! highva += segsiz;
4193	cmp	%l1, %l2		! done?
4194	blu	0b			! no, loop
4195	 add	%l3, %l0, %l0		! (and lowva += segsz)
4196	b,a	startmap_done
41972:
4198#endif /* SUN4 */
4199
4200#if defined(SUN4M) || defined(SUN4D)
4201	cmp	%g4, CPU_SUN4M
4202	beq	3f
4203	 nop
4204	cmp	%g4, CPU_SUN4D
4205	bne	4f
4206
42073:
4208	/*
4209	 * The OBP guarantees us a 16MB mapping using a level 1 PTE at
4210	 * the start of the memory bank in which we were loaded. All we
4211	 * have to do is copy the entry.
4212	 * Also, we must check to see if we have a TI Viking in non-mbus mode,
4213	 * and if so do appropriate flipping and turning off traps before
4214	 * we dork with MMU passthrough.  -grrr
4215	 */
4216
4217	sethi	%hi(0x40000000), %o1	! TI version bit
4218	rd	%psr, %o0
4219	andcc	%o0, %o1, %g0
4220	be	remap_notvik		! is non-TI normal MBUS module
4221	lda	[%g0] ASI_SRMMU, %o0	! load MMU
4222	andcc	%o0, 0x800, %g0
4223	bne	remap_notvik		! It is a viking MBUS module
4224	nop
4225
4226	/*
4227	 * Ok, we have a non-Mbus TI Viking, a MicroSparc.
4228	 * In this scenario, in order to play with the MMU
4229	 * passthrough safely, we need turn off traps, flip
4230	 * the AC bit on in the mmu status register, do our
4231	 * passthroughs, then restore the mmu reg and %psr
4232	 */
4233	rd	%psr, %o4		! saved here till done
4234	andn	%o4, 0x20, %o5
4235	wr	%o5, 0x0, %psr
4236	nop; nop; nop;
4237	set	SRMMU_CXTPTR, %o0
4238	lda	[%o0] ASI_SRMMU, %o0	! get context table ptr
4239	sll	%o0, 4, %o0		! make physical
4240	lda	[%g0] ASI_SRMMU, %o3	! hold mmu-sreg here
4241	/* 0x8000 is AC bit in Viking mmu-ctl reg */
4242	set	0x8000, %o2
4243	or	%o3, %o2, %o2
4244	sta	%o2, [%g0] ASI_SRMMU	! AC bit on
4245
4246	lda	[%o0] ASI_BYPASS, %o1
4247	srl	%o1, 4, %o1
4248	sll	%o1, 8, %o1		! get phys addr of l1 entry
4249	lda	[%o1] ASI_BYPASS, %l4
4250	srl	%l1, 22, %o2		! note: 22 == RGSHIFT - 2
4251	add	%o1, %o2, %o1
4252	sta	%l4, [%o1] ASI_BYPASS
4253
4254	sta	%o3, [%g0] ASI_SRMMU	! restore mmu-sreg
4255	wr	%o4, 0x0, %psr		! restore psr
4256	b,a	startmap_done
4257
4258	/*
4259	 * The following is generic and should work on all
4260	 * Mbus based SRMMU's.
4261	 */
4262remap_notvik:
4263	set	SRMMU_CXTPTR, %o0
4264	lda	[%o0] ASI_SRMMU, %o0	! get context table ptr
4265	sll	%o0, 4, %o0		! make physical
4266	lda	[%o0] ASI_BYPASS, %o1
4267	srl	%o1, 4, %o1
4268	sll	%o1, 8, %o1		! get phys addr of l1 entry
4269	lda	[%o1] ASI_BYPASS, %l4
4270	srl	%l1, 22, %o2		! note: 22 == RGSHIFT - 2
4271	add	%o1, %o2, %o1
4272	sta	%l4, [%o1] ASI_BYPASS
4273	!b,a	startmap_done
42744:
4275#endif /* SUN4M || SUN4D */
4276	! botch! We should blow up.
4277
4278startmap_done:
4279	/*
4280	 * All set, fix pc and npc.  Once we are where we should be,
4281	 * we can give ourselves a stack and enable traps.
4282	 */
4283	set	1f, %g1
4284	jmp	%g1
4285	 nop
42861:
4287	sethi	%hi(_C_LABEL(cputyp)), %o0	! what type of CPU we are on
4288	st	%g4, [%o0 + %lo(_C_LABEL(cputyp))]
4289
4290	sethi	%hi(_C_LABEL(pgshift)), %o0	! pgshift = log2(nbpg)
4291	st	%g5, [%o0 + %lo(_C_LABEL(pgshift))]
4292
4293	mov	1, %o0			! nbpg = 1 << pgshift
4294	sll	%o0, %g5, %g5
4295	sethi	%hi(_C_LABEL(nbpg)), %o0	! nbpg = bytes in a page
4296	st	%g5, [%o0 + %lo(_C_LABEL(nbpg))]
4297
4298	sub	%g5, 1, %g5
4299	sethi	%hi(_C_LABEL(pgofset)), %o0 ! page offset = bytes in a page - 1
4300	st	%g5, [%o0 + %lo(_C_LABEL(pgofset))]
4301
4302	rd	%psr, %g3		! paranoia: make sure ...
4303	andn	%g3, PSR_ET, %g3	! we have traps off
4304	wr	%g3, 0, %psr		! so that we can fiddle safely
4305	nop; nop; nop
4306
4307	wr	%g0, 0, %wim		! make sure we can set psr
4308	nop; nop; nop
4309	wr	%g0, PSR_S|PSR_PS|PSR_PIL, %psr	! set initial psr
4310	 nop; nop; nop
4311
4312	wr	%g0, 2, %wim		! set initial %wim (w1 invalid)
4313	mov	1, %g1			! set pcb_wim (log2(%wim) = 1)
4314	sethi	%hi(_C_LABEL(u0) + PCB_WIM), %g2
4315	st	%g1, [%g2 + %lo(_C_LABEL(u0) + PCB_WIM)]
4316
4317	set	USRSTACK - CCFSZ, %fp	! as if called from user code
4318	set	estack0 - CCFSZ - 80, %sp ! via syscall(boot_me_up) or somesuch
4319	rd	%psr, %l0
4320	wr	%l0, PSR_ET, %psr
4321	nop; nop; nop
4322
4323	/* Export actual trapbase */
4324	sethi	%hi(_C_LABEL(trapbase)), %o0
4325	st	%g6, [%o0+%lo(_C_LABEL(trapbase))]
4326
4327#ifdef notdef
4328	/*
4329	 * Step 2: clear BSS.  This may just be paranoia; the boot
4330	 * loader might already do it for us; but what the hell.
4331	 */
4332	set	_edata, %o0		! bzero(edata, end - edata)
4333	set	_end, %o1
4334	call	_C_LABEL(bzero)
4335	 sub	%o1, %o0, %o1
4336#endif
4337
4338	/*
4339	 * Stash prom vectors now, after bzero, as it lives in bss
4340	 * (which we just zeroed).
4341	 * This depends on the fact that bzero does not use %g7.
4342	 */
4343	sethi	%hi(_C_LABEL(romp)), %l0
4344	st	%g7, [%l0 + %lo(_C_LABEL(romp))]
4345
4346	/*
4347	 * Step 3: compute number of windows and set up tables.
4348	 * We could do some of this later.
4349	 */
4350	save	%sp, -64, %sp
4351	rd	%psr, %g1
4352	restore
4353	and	%g1, 31, %g1		! want just the CWP bits
4354	add	%g1, 1, %o0		! compute nwindows
4355	sethi	%hi(_C_LABEL(nwindows)), %o1	! may as well tell everyone
4356	call	init_tables
4357	 st	%o0, [%o1 + %lo(_C_LABEL(nwindows))]
4358
4359#if defined(SUN4) || defined(SUN4C)
4360	/*
4361	 * Some sun4/sun4c models have fewer than 8 windows. For extra
4362	 * speed, we do not need to save/restore those windows
4363	 * The save/restore code has 6 "save"'s followed by 6
4364	 * "restore"'s -- we "nop" out the last "save" and first
4365	 * "restore"
4366	 */
4367	cmp	%o0, 8
4368	be	1f
4369noplab:	 nop
4370	sethi	%hi(noplab), %l0
4371	ld	[%l0 + %lo(noplab)], %l1
4372	set	Lwb1, %l0
4373	st	%l1, [%l0 + 5*4]
4374	st	%l1, [%l0 + 6*4]
43751:
4376#endif
4377
4378#if (defined(SUN4) || defined(SUN4C)) && (defined(SUN4M) || defined(SUN4D))
4379
4380	/*
4381	 * Patch instructions at specified labels that start
4382	 * per-architecture code-paths.
4383	 */
4384Lgandul:	nop
4385
4386#define MUNGE(label) \
4387	sethi	%hi(label), %o0; \
4388	st	%l0, [%o0 + %lo(label)]
4389
4390	sethi	%hi(Lgandul), %o0
4391	ld	[%o0 + %lo(Lgandul)], %l0	! %l0 = NOP
4392
4393	cmp	%g4, CPU_SUN4M
4394	beq,a	2f
4395	 nop
4396
4397	cmp	%g4, CPU_SUN4D
4398	bne,a	1f
4399	 nop
4400
44012:	! this should be automated!
4402	MUNGE(NOP_ON_4M_1)
4403	MUNGE(NOP_ON_4M_2)
4404	MUNGE(NOP_ON_4M_3)
4405	MUNGE(NOP_ON_4M_4)
4406	MUNGE(NOP_ON_4M_5)
4407	MUNGE(NOP_ON_4M_6)
4408	MUNGE(NOP_ON_4M_7)
4409	MUNGE(NOP_ON_4M_8)
4410	MUNGE(NOP_ON_4M_9)
4411	MUNGE(NOP_ON_4M_10)
4412	MUNGE(NOP_ON_4M_11)
4413	MUNGE(NOP_ON_4M_12)
4414	b,a	2f
4415
44161:
4417#if 0 /* currently there are no NOP_ON_4_4C_* */
4418	MUNGE(NOP_ON_4_4C_1)
4419#endif
4420
44212:
4422
4423#undef MUNGE
4424#endif /* (SUN4 || SUN4C) && (SUN4M || SUN4D) */
4425
4426	/*
4427	 * Step 4: change the trap base register, now that our trap handlers
4428	 * will function (they need the tables we just set up).
4429	 * This depends on the fact that memset does not use %g6.
4430	 */
4431	wr	%g6, 0, %tbr
4432	nop; nop; nop			! paranoia
4433
4434	/* Clear `cpuinfo': memset(&cpuinfo, 0, sizeof cpuinfo) */
4435	sethi	%hi(CPUINFO_VA), %o0
4436	set	CPUINFO_STRUCTSIZE, %o2
4437	call	_C_LABEL(memset)
4438	 clr	%o1
4439
4440	/*
4441	 * Initialize `cpuinfo' fields which are needed early.  Note
4442	 * we make the cpuinfo self-reference at the local VA for now.
4443	 * It may be changed to reference a global VA later.
4444	 */
4445	set	_C_LABEL(u0), %o0		! cpuinfo.curpcb = u0;
4446	sethi	%hi(cpcb), %l0
4447	st	%o0, [%l0 + %lo(cpcb)]
4448
4449	sethi	%hi(CPUINFO_VA), %o0		! cpuinfo.ci_self = &cpuinfo;
4450	sethi	%hi(_CISELFP), %l0
4451	st	%o0, [%l0 + %lo(_CISELFP)]
4452
4453	set	_C_LABEL(eintstack), %o0	! cpuinfo.eintstack= _eintstack;
4454	sethi	%hi(_EINTSTACKP), %l0
4455	st	%o0, [%l0 + %lo(_EINTSTACKP)]
4456
4457	/*
4458	 * Ready to run C code; finish bootstrap.
4459	 */
4460	call	_C_LABEL(bootstrap)
4461	 nop
4462
4463	/*
4464	 * Call main.  This returns to us after loading /sbin/init into
4465	 * user space.  (If the exec fails, main() does not return.)
4466	 */
4467	call	_C_LABEL(main)
4468	 clr	%o0			! our frame arg is ignored
4469	/*NOTREACHED*/
4470
4471/*
4472 * Openfirmware entry point: openfirmware(void *args)
4473 */
4474ENTRY(openfirmware)
4475	sethi	%hi(_C_LABEL(romp)), %o1
4476	ld	[%o1 + %lo(_C_LABEL(romp))], %o2
4477	jmp	%o2
4478	 nop
4479
4480#if defined(SUN4M) || defined(SUN4D)
4481/*
4482 * V8 multiply and divide routines, to be copied over the code
4483 * for the V6/V7 routines.  Seems a shame to spend the call, but....
4484 * Note: while .umul and .smul return a 64-bit result in %o1%o0,
4485 * gcc only really cares about the low 32 bits in %o0.  This is
4486 * really just gcc output, cleaned up a bit.
4487 */
4488	.globl	_C_LABEL(sparc_v8_muldiv)
4489_C_LABEL(sparc_v8_muldiv):
4490	save    %sp, -CCFSZ, %sp
4491
4492#define	OVERWRITE(rtn, v8_rtn, len)	\
4493	set	v8_rtn, %o0;		\
4494	set	rtn, %o1;		\
4495	call	_C_LABEL(bcopy);	\
4496	 mov	len, %o2;		\
4497	/* now flush the insn cache */	\
4498	set	rtn, %o0;		\
4499	 mov	len, %o1;		\
45000:					\
4501	flush	%o0;			\
4502	subcc	%o1, 8, %o1;		\
4503	bgu	0b;			\
4504	 add	%o0, 8, %o0;		\
4505
4506	OVERWRITE(.mul,  v8_smul, .Lv8_smul_len)
4507	OVERWRITE(.umul, v8_umul, .Lv8_umul_len)
4508	OVERWRITE(.div,  v8_sdiv, .Lv8_sdiv_len)
4509	OVERWRITE(.udiv, v8_udiv, .Lv8_udiv_len)
4510	OVERWRITE(.rem,  v8_srem, .Lv8_srem_len)
4511	OVERWRITE(.urem, v8_urem, .Lv8_urem_len)
4512#undef	OVERWRITE
4513	ret
4514	 restore
4515
4516v8_smul:
4517	retl
4518	 smul	%o0, %o1, %o0
4519.Lv8_smul_len = .-v8_smul
4520v8_umul:
4521	retl
4522	 umul	%o0, %o1, %o0
4523!v8_umul_len = 2 * 4
4524.Lv8_umul_len = .-v8_umul
4525v8_sdiv:
4526	sra	%o0, 31, %g2
4527	wr	%g2, 0, %y
4528	nop; nop; nop
4529	retl
4530	 sdiv	%o0, %o1, %o0
4531.Lv8_sdiv_len = .-v8_sdiv
4532v8_udiv:
4533	wr	%g0, 0, %y
4534	nop; nop; nop
4535	retl
4536	 udiv	%o0, %o1, %o0
4537.Lv8_udiv_len = .-v8_udiv
4538v8_srem:
4539	sra	%o0, 31, %g3
4540	wr	%g3, 0, %y
4541	nop; nop; nop
4542	sdiv	%o0, %o1, %g2
4543	smul	%g2, %o1, %g2
4544	retl
4545	 sub	%o0, %g2, %o0
4546.Lv8_srem_len = .-v8_srem
4547v8_urem:
4548	wr	%g0, 0, %y
4549	nop; nop; nop
4550	udiv	%o0, %o1, %g2
4551	smul	%g2, %o1, %g2
4552	retl
4553	 sub	%o0, %g2, %o0
4554.Lv8_urem_len = .-v8_urem
4555
4556#endif /* SUN4M || SUN4D */
4557
4558#if defined(MULTIPROCESSOR)
4559	/*
4560	 * Entry point for non-boot CPUs in MP systems.
4561	 */
4562	.globl	_C_LABEL(cpu_hatch)
4563_C_LABEL(cpu_hatch):
4564	rd	%psr, %g3		! paranoia: make sure ...
4565	andn	%g3, PSR_ET, %g3	! we have traps off
4566	wr	%g3, 0, %psr		! so that we can fiddle safely
4567	nop; nop; nop
4568
4569	wr	%g0, 0, %wim		! make sure we can set psr
4570	nop; nop; nop
4571	wr	%g0, PSR_S|PSR_PS|PSR_PIL, %psr	! set initial psr
4572	nop; nop; nop
4573
4574	wr	%g0, 2, %wim		! set initial %wim (w1 invalid)
4575
4576	/* Initialize Trap Base register */
4577	sethi	%hi(_C_LABEL(trapbase)), %o0
4578	ld	[%o0+%lo(_C_LABEL(trapbase))], %g6
4579	wr	%g6, 0, %tbr
4580	nop; nop; nop			! paranoia
4581
4582	/*
4583	 * Use this CPUs idlelwp's stack
4584	 */
4585	sethi	%hi(cpcb), %o0
4586	ld	[%o0 + %lo(cpcb)], %o0
4587	set	USPACE - 80 - CCFSZ, %sp
4588	add	%o0, %sp, %sp
4589
4590	add	80, %sp, %fp
4591
4592	/* Enable traps */
4593	rd	%psr, %l0
4594	wr	%l0, PSR_ET, %psr
4595	nop; nop
4596
4597	/* Call C code */
4598	call	_C_LABEL(cpu_setup)
4599	 nop				! 3rd from above
4600
4601	/* Enable interrupts */
4602	rd	%psr, %l0
4603	andn	%l0, PSR_PIL, %l0	! psr &= ~PSR_PIL;
4604	wr	%l0, 0, %psr		! (void) spl0();
4605	nop; nop; nop
4606
4607	/* Wait for go_smp_cpus to go */
4608	set	_C_LABEL(go_smp_cpus), %l1
4609	ld	[%l1], %l0
46101:
4611	cmp	%l0, %g0
4612	be	1b
4613	 ld	[%l1], %l0
4614
4615	b	idle_loop
4616	 nop
4617
4618#endif /* MULTIPROCESSOR */
4619
4620#ifdef COMPAT_16
4621#include "sigcode_state.s"
4622
4623	.globl	_C_LABEL(sigcode)
4624	.globl	_C_LABEL(esigcode)
4625_C_LABEL(sigcode):
4626
4627	SAVE_STATE
4628
4629	ldd	[%fp + 64], %o0		! sig, code
4630	ld	[%fp + 76], %o3		! arg3
4631	call	%g1			! (*sa->sa_handler)(sig,code,scp,arg3)
4632	 add	%fp, 64 + 16, %o2	! scp
4633
4634	RESTORE_STATE
4635
4636	! get registers back & set syscall #
4637	restore	%g0, SYS_compat_16___sigreturn14, %g1
4638	add	%sp, 64 + 16, %o0	! compute scp
4639	t	ST_SYSCALL		! sigreturn(scp)
4640	! sigreturn does not return unless it fails
4641	mov	SYS_exit, %g1		! exit(errno)
4642	t	ST_SYSCALL
4643	/* NOTREACHED */
4644_C_LABEL(esigcode):
4645#endif /* COMPAT_16 */
4646
4647
4648/*
4649 * Primitives
4650 */
4651
4652/*
4653 * General-purpose NULL routine.
4654 */
4655ENTRY(sparc_noop)
4656	retl
4657	 nop
4658
4659/*
4660 * getfp() - get stack frame pointer
4661 */
4662ENTRY(getfp)
4663	retl
4664	 mov %fp, %o0
4665
4666/*
4667 * copyinstr(fromaddr, toaddr, maxlength, &lencopied)
4668 *
4669 * Copy a null terminated string from the user address space into
4670 * the kernel address space.
4671 */
4672ENTRY(copyinstr)
4673	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
4674	mov	%o1, %o5		! save = toaddr;
4675	tst	%o2			! maxlen == 0?
4676	beq,a	Lcstoolong		! yes, return ENAMETOOLONG
4677	 sethi	%hi(cpcb), %o4
4678
4679	set	KERNBASE, %o4
4680	cmp	%o0, %o4		! fromaddr < KERNBASE?
4681	blu	Lcsdocopy		! yes, go do it
4682	 sethi	%hi(cpcb), %o4		! (first instr of copy)
4683
4684	b	Lcsdone			! no, return EFAULT
4685	 mov	EFAULT, %o0
4686
4687/*
4688 * copyoutstr(fromaddr, toaddr, maxlength, &lencopied)
4689 *
4690 * Copy a null terminated string from the kernel
4691 * address space to the user address space.
4692 */
4693ENTRY(copyoutstr)
4694	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
4695	mov	%o1, %o5		! save = toaddr;
4696	tst	%o2			! maxlen == 0?
4697	beq,a	Lcstoolong		! yes, return ENAMETOOLONG
4698	 sethi	%hi(cpcb), %o4
4699
4700	set	KERNBASE, %o4
4701	cmp	%o1, %o4		! toaddr < KERNBASE?
4702	blu	Lcsdocopy		! yes, go do it
4703	 sethi	%hi(cpcb), %o4		! (first instr of copy)
4704
4705	b	Lcsdone			! no, return EFAULT
4706	 mov	EFAULT, %o0
4707
4708Lcsdocopy:
4709!	sethi	%hi(cpcb), %o4		! (done earlier)
4710	ld	[%o4 + %lo(cpcb)], %o4	! catch faults
4711	set	Lcsdone, %g1
4712	st	%g1, [%o4 + PCB_ONFAULT]
4713
4714! XXX should do this in bigger chunks when possible
47150:					! loop:
4716	ldsb	[%o0], %g1		!	c = *fromaddr;
4717	tst	%g1
4718	stb	%g1, [%o1]		!	*toaddr++ = c;
4719	be	1f			!	if (c == NULL)
4720	 inc	%o1			!		goto ok;
4721	deccc	%o2			!	if (--len > 0) {
4722	bgu	0b			!		fromaddr++;
4723	 inc	%o0			!		goto loop;
4724					!	}
4725Lcstoolong:				!
4726	b	Lcsdone			!	error = ENAMETOOLONG;
4727	 mov	ENAMETOOLONG, %o0	!	goto done;
47281:					! ok:
4729	clr	%o0			!    error = 0;
4730Lcsdone:				! done:
4731	sub	%o1, %o5, %o1		!	len = to - save;
4732	tst	%o3			!	if (lencopied)
4733	bnz,a	3f
4734	 st	%o1, [%o3]		!		*lencopied = len;
47353:
4736	retl				! cpcb->pcb_onfault = 0;
4737	 st	%g0, [%o4 + PCB_ONFAULT]! return (error);
4738
4739/*
4740 * Copyin(src, dst, len)
4741 *
4742 * Copy specified amount of data from user space into the kernel.
4743 */
4744ENTRY(copyin)
4745	set	KERNBASE, %o3
4746	cmp	%o0, %o3		! src < KERNBASE?
4747	blu,a	Ldocopy			! yes, can try it
4748	 sethi	%hi(cpcb), %o3
4749
4750	/* source address points into kernel space: return EFAULT */
4751	retl
4752	 mov	EFAULT, %o0
4753
4754/*
4755 * Copyout(src, dst, len)
4756 *
4757 * Copy specified amount of data from kernel to user space.
4758 * Just like copyin, except that the `dst' addresses are user space
4759 * rather than the `src' addresses.
4760 */
4761ENTRY(copyout)
4762	set	KERNBASE, %o3
4763	cmp	%o1, %o3		! dst < KERBASE?
4764	blu,a	Ldocopy
4765	 sethi	%hi(cpcb), %o3
4766
4767	/* destination address points into kernel space: return EFAULT */
4768	retl
4769	 mov	EFAULT, %o0
4770
4771	/*
4772	 * ******NOTE****** this depends on bcopy() not using %g7
4773	 */
4774Ldocopy:
4775!	sethi	%hi(cpcb), %o3
4776	ld	[%o3 + %lo(cpcb)], %o3
4777	set	Lcopyfault, %o4
4778	mov	%o7, %g7		! save return address
4779	call	_C_LABEL(bcopy)		! bcopy(src, dst, len)
4780	 st	%o4, [%o3 + PCB_ONFAULT]
4781
4782	sethi	%hi(cpcb), %o3
4783	ld	[%o3 + %lo(cpcb)], %o3
4784	st	%g0, [%o3 + PCB_ONFAULT]
4785	jmp	%g7 + 8
4786	 clr	%o0			! return 0
4787
4788! Copyin or copyout fault.  Clear cpcb->pcb_onfault.
4789! The return value was already put in %o0 by the fault handler.
4790! Note that although we were in bcopy, there is no state to clean up;
4791! the only special thing is that we have to return to [g7 + 8] rather than
4792! [o7 + 8].
4793Lcopyfault:
4794	sethi	%hi(cpcb), %o3
4795	ld	[%o3 + %lo(cpcb)], %o3
4796	jmp	%g7 + 8
4797	 st	%g0, [%o3 + PCB_ONFAULT]
4798
4799
4800/*
4801 * Write all user windows presently in the CPU back to the user's stack.
4802 * We just do `save' instructions until pcb_uw == 0.
4803 *
4804 *	p = cpcb;
4805 *	nsaves = 0;
4806 *	while (p->pcb_uw > 0)
4807 *		save(), nsaves++;
4808 *	while (--nsaves >= 0)
4809 *		restore();
4810 */
4811ENTRY(write_user_windows)
4812	sethi	%hi(cpcb), %g6
4813	ld	[%g6 + %lo(cpcb)], %g6
4814	b	2f
4815	 clr	%g5
48161:
4817	save	%sp, -64, %sp
48182:
4819	ld	[%g6 + PCB_UW], %g7
4820	tst	%g7
4821	bg,a	1b
4822	 inc	%g5
48233:
4824	deccc	%g5
4825	bge,a	3b
4826	 restore
4827	retl
4828	 nop
4829
4830/*
4831 * cpu_switchto() runs an lwp, saving the current one away.
4832 */
4833ENTRY(cpu_switchto)
4834	/*
4835	 * Register Usage:
4836	 *	%g1 = oldlwp (return value)
4837	 *	%g2 = psr
4838	 *	%g3 = newlwp
4839	 *	%g5 = newpcb
4840	 *	%l1 = oldpsr (excluding ipl bits)
4841	 *	%l6 = %hi(cpcb)
4842	 *	%o0 = tmp 1
4843	 *	%o1 = tmp 2
4844	 *	%o2 = tmp 3
4845	 *	%o3 = vmspace->vm_pmap
4846	 */
4847	save	%sp, -CCFSZ, %sp
4848	mov	%i0, %g1			! save oldlwp
4849	mov	%i1, %g3			! and newlwp
4850
4851	sethi	%hi(cpcb), %l6
4852
4853	rd	%psr, %l1			! psr = %psr;
4854
4855	ld	[%l6 + %lo(cpcb)], %o0
4856
4857	std	%i6, [%o0 + PCB_SP]		! cpcb->pcb_<sp,pc> = <fp,pc>;
4858
4859	st	%l1, [%o0 + PCB_PSR]		! cpcb->pcb_pcb = psr
4860
4861	/*
4862	 * Save the old process: write back all windows (excluding
4863	 * the current one).  XXX crude; knows nwindows <= 8
4864	 */
4865#define	SAVE save %sp, -64, %sp
4866Lwb1:	SAVE; SAVE; SAVE; SAVE; SAVE; SAVE;	/* 6 of each: */
4867	restore; restore; restore; restore; restore; restore
4868
4869	andn	%l1, PSR_PIL, %l1		! oldpsr &= ~PSR_PIL;
4870
4871	/*
4872	 * Load the new process.  To load, we must change stacks and
4873	 * and alter cpcb. We must also load the CWP and WIM from the
4874	 * new process' PCB, since, when we finally return from
4875	 * the trap, the CWP of the trap window must match the
4876	 * CWP stored in the trap frame.
4877	 *
4878	 * Once the new CWP is set below our local registers become
4879	 * invalid, so, we use globals at that point for any values
4880	 * we need afterwards.
4881	 */
4882
4883	ld	[%g3 + L_PCB], %g5	! newpcb
4884	ld	[%g5 + PCB_PSR], %g2    ! cwpbits = newpcb->pcb_psr;
4885
4886	/* traps off while we switch to the new stack */
4887	wr	%l1, (IPL_SCHED << 8) | PSR_ET, %psr
4888
4889	/* set new cpcb, and curlwp */
4890	sethi	%hi(curlwp), %l7
4891	st	%g5, [%l6 + %lo(cpcb)]		! cpcb = newpcb;
4892
4893	/*
4894	 * Issue barriers to coordinate mutex_exit on this CPU with
4895	 * mutex_vector_enter on another CPU.
4896	 *
4897	 * 1. Any prior mutex_exit by oldlwp must be visible to other
4898	 *    CPUs before we set ci_curlwp := newlwp on this one,
4899	 *    requiring a store-before-store barrier.
4900	 *
4901	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
4902	 *    before any subsequent mutex_exit by newlwp can even test
4903	 *    whether there might be waiters, requiring a
4904	 *    store-before-load barrier.
4905	 *
4906	 * See kern_mutex.c for details -- this is necessary for
4907	 * adaptive mutexes to detect whether the lwp is on the CPU in
4908	 * order to safely block without requiring atomic r/m/w in
4909	 * mutex_exit.
4910	 */
4911	/* stbar -- store-before-store, not needed on TSO */
4912	st      %g3, [%l7 + %lo(curlwp)]        ! curlwp = l;
4913#ifdef MULTIPROCESSOR
4914	ldstub	[%sp - 4], %g0	/* makeshift store-before-load barrier */
4915#endif
4916
4917	/* compute new wim */
4918	ld	[%g5 + PCB_WIM], %o0
4919	mov	1, %o1
4920	sll	%o1, %o0, %o0
4921	wr	%o0, 0, %wim		! %wim = 1 << newpcb->pcb_wim;
4922
4923	/* now must not change %psr for 3 more instrs */
4924	/* Clear FP & CP enable bits, as well as the PIL field */
4925/*1,2*/	set     PSR_EF|PSR_EC|PSR_PIL, %o0
4926/*3*/	andn    %g2, %o0, %g2           ! newpsr &= ~(PSR_EF|PSR_EC|PSR_PIL);
4927	/* set new psr, but with traps disabled */
4928	wr      %g2, (IPL_SCHED << 8)|PSR_ET, %psr ! %psr = newpsr ^ PSR_ET;
4929	/* load new stack and return address */
4930	ldd	[%g5 + PCB_SP], %i6	! <fp,pc> = newpcb->pcb_<sp,pc>
4931	add	%fp, -CCFSZ, %sp	! set stack frame for this window
4932
4933#ifdef DEBUG
4934	mov	%g5, %o0
4935	SET_SP_REDZONE(%o0, %o1)
4936	CHECK_SP_REDZONE(%o0, %o1)
4937#endif
4938
4939	/* finally, enable traps and continue at splsched() */
4940	wr      %g2, IPL_SCHED << 8 , %psr      ! psr = newpsr;
4941
4942	/*
4943	 * Now running p.
4944	 */
4945
4946	/*
4947	 * Check for restartable atomic sequences (RAS)
4948	 */
4949	ld	[%g3 + L_PROC], %o0	! now %o0 points to p
4950	ld	[%o0 + P_RASLIST], %o1	! any RAS in p?
4951	cmp	%o1, 0
4952	be	Lsw_noras		! no, skip RAS check
4953	 mov	%g1, %i0		! restore oldlwp (for return value)
4954	ld	[%g3 + L_TF], %l3	! pointer to trap frame
4955	call	_C_LABEL(ras_lookup)
4956	 ld	[%l3 + TF_PC], %o1
4957	cmp	%o0, -1
4958	be	Lsw_noras
4959	 add	%o0, 4, %o1
4960	st	%o0, [%l3 + TF_PC]	! store rewound %pc
4961	st	%o1, [%l3 + TF_NPC]	! and %npc
4962
4963Lsw_noras:
4964
4965	ret
4966	 restore			! return (oldlwp)
4967
4968/*
4969 * Call the idlespin() function if it exists, otherwise just return.
4970 */
4971ENTRY(cpu_idle)
4972	sethi	%hi(CPUINFO_VA+CPUINFO_IDLESPIN), %o0
4973	ld	[%o0 + %lo(CPUINFO_VA+CPUINFO_IDLESPIN)], %o1
4974	tst	%o1
4975	bz	1f
4976	 nop
4977	jmp	%o1
4978	 nop
49791:
4980	retl
4981	 nop
4982
4983/*
4984 * Snapshot the current process so that stack frames are up to date.
4985 * Only used just before a crash dump.
4986 */
4987ENTRY(snapshot)
4988	std	%o6, [%o0 + PCB_SP]	! save sp
4989	rd	%psr, %o1		! save psr
4990	st	%o1, [%o0 + PCB_PSR]
4991
4992	/*
4993	 * Just like switch(); same XXX comments apply.
4994	 * 7 of each.  Minor tweak: the 7th restore is
4995	 * done after a ret.
4996	 */
4997	SAVE; SAVE; SAVE; SAVE; SAVE; SAVE; SAVE
4998	restore; restore; restore; restore; restore; restore; ret; restore
4999
5000
5001/*
5002 * cpu_lwp_fork() arranges for lwp_trampoline() to run when the
5003 * nascent lwp is selected by switch().
5004 *
5005 * The switch frame will contain pointer to struct lwp of this lwp in
5006 * %l2, a pointer to the function to call in %l0, and an argument to
5007 * pass to it in %l1 (we abuse the callee-saved registers).
5008 *
5009 * We enter lwp_trampoline as if we are "returning" from
5010 * cpu_switchto(), so %o0 contains previous lwp (the one we are
5011 * switching from) that we pass to lwp_startup().
5012 *
5013 * If the function *(%l0) returns, we arrange for an immediate return
5014 * to user mode.  This happens in two known cases: after execve(2) of
5015 * init, and when returning a child to user mode after a fork(2).
5016 *
5017 * If were setting up a kernel thread, the function *(%l0) will not
5018 * return.
5019 */
5020ENTRY(lwp_trampoline)
5021	/*
5022	 * Note: cpu_lwp_fork() has set up a stack frame for us to run
5023	 * in, so we can call other functions from here without using
5024	 * `save ... restore'.
5025	 */
5026
5027	! newlwp in %l2, oldlwp already in %o0
5028	call	lwp_startup
5029	 mov	%l2, %o1
5030
5031	call	%l0
5032	 mov	%l1, %o0
5033
5034	/*
5035	 * Here we finish up as in syscall, but simplified.
5036	 * cpu_lwp_fork() (or sendsig(), if we took a pending signal
5037	 * in child_return()) will have set the user-space return
5038	 * address in tf_pc. In both cases, %npc should be %pc + 4.
5039	 */
5040	rd      %psr, %l2
5041	ld	[%sp + CCFSZ + 4], %l1	! pc = tf->tf_pc from cpu_lwp_fork()
5042	and	%l2, PSR_CWP, %o1	! keep current CWP
5043	or	%o1, PSR_S, %l0		! user psr
5044	b	return_from_syscall
5045	 add	%l1, 4, %l2		! npc = pc+4
5046
5047/**************************************************************************/
5048
5049#define	UFETCHSTORE_PROLOGUE						 \
5050	set	KERNBASE, %o2					 	;\
5051	cmp	%o0, %o2		/* if addr >= KERNBASE... */	;\
5052	bgeu	Lufetchstore_badaddr				 	;\
5053	 .empty							 	;\
5054	sethi	%hi(cpcb), %o2		/* cpcb->pcb_onfault =	  */ 	;\
5055	ld	[%o2 + %lo(cpcb)], %o2	/*    Lufetchstore_fault  */	;\
5056	set	Lufetchstore_fault, %o3				 	;\
5057	st	%o3, [%o2 + PCB_ONFAULT]
5058
5059	/* keep to a single insn; it's used in a branch delay slot */
5060#define	UFETCHSTORE_EPILOGUE						\
5061	st	%g0, [%o2 + PCB_ONFAULT]! cpcb->pcb_onfault = NULL
5062
5063#define	UFETCHSTORE_RETURN_SUCCESS					\
5064	retl							;	\
5065	 clr	%o0
5066
5067/* LINTSTUB: int _ufetch_8(const uint8_t *uaddr, uint8_t *valp); */
5068ENTRY(_ufetch_8)
5069	UFETCHSTORE_PROLOGUE
5070	ldub	[%o0], %o0		! %o0 = *uaddr
5071	UFETCHSTORE_EPILOGUE
5072	stb	%o0, [%o1]		! *valp = %o0
5073	UFETCHSTORE_RETURN_SUCCESS
5074
5075/* LINTSTUB: int _ufetch_16(const uint16_t *uaddr, uint16_t *valp); */
5076ENTRY(_ufetch_16)
5077	UFETCHSTORE_PROLOGUE
5078	lduh	[%o0], %o0		! %o0 = *uaddr
5079	UFETCHSTORE_EPILOGUE
5080	sth	%o0, [%o1]		! *valp = %o0
5081	UFETCHSTORE_RETURN_SUCCESS
5082
5083/* LINTSTUB: int _ufetch_32(const uint32_t *uaddr, uint32_t *valp); */
5084ENTRY(_ufetch_32)
5085	UFETCHSTORE_PROLOGUE
5086	ld	[%o0], %o0		! %o0 = *uaddr
5087	UFETCHSTORE_EPILOGUE
5088	st	%o0, [%o1]		! *valp = %o0
5089	UFETCHSTORE_RETURN_SUCCESS
5090
5091/* LINTSTUB: int _ustore_8(uint8_t *uaddr, uint8_t val); */
5092ENTRY(_ustore_8)
5093	UFETCHSTORE_PROLOGUE
5094	stb	%o1, [%o0]		! *uaddr = val
5095	UFETCHSTORE_EPILOGUE
5096	UFETCHSTORE_RETURN_SUCCESS
5097
5098/* LINTSTUB: int _ustore_16(uint16_t *uaddr, uint16_t val); */
5099ENTRY(_ustore_16)
5100	UFETCHSTORE_PROLOGUE
5101	sth	%o1, [%o0]		! *uaddr = val
5102	UFETCHSTORE_EPILOGUE
5103	UFETCHSTORE_RETURN_SUCCESS
5104
5105/* LINTSTUB: int _ustore_32(uint32_t *uaddr, uint32_t val); */
5106ENTRY(_ustore_32)
5107	UFETCHSTORE_PROLOGUE
5108	st	%o1, [%o0]		! *uaddr = val
5109	UFETCHSTORE_EPILOGUE
5110	UFETCHSTORE_RETURN_SUCCESS
5111
5112Lufetchstore_badaddr:
5113	retl				! return EFAULT
5114	 mov	EFAULT, %o0
5115
5116Lufetchstore_fault:
5117	retl
5118	 UFETCHSTORE_EPILOGUE		! error already in %o0
5119
5120/**************************************************************************/
5121
5122/* probeget and probeset are meant to be used during autoconfiguration */
5123
5124	.globl	_C_LABEL(sparc_fsbail)
5125_C_LABEL(sparc_fsbail):
5126	st	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
5127	retl				! and return error indicator
5128	 mov	-1, %o0
5129
5130/*
5131 * probeget(addr, size) void *addr; int size;
5132 *
5133 * Read or write a (byte,word,longword) from the given address.
5134 * Like {fu,su}{byte,halfword,word} but our caller is supposed
5135 * to know what he is doing... the address can be anywhere.
5136 *
5137 * We optimize for space, rather than time, here.
5138 */
5139ENTRY(probeget)
5140	! %o0 = addr, %o1 = (1,2,4)
5141	sethi	%hi(cpcb), %o2
5142	ld	[%o2 + %lo(cpcb)], %o2	! cpcb->pcb_onfault = sparc_fsbail;
5143	set	sparc_fsbail, %o5
5144	st	%o5, [%o2 + PCB_ONFAULT]
5145	btst	1, %o1
5146	bnz,a	0f			! if (len & 1)
5147	 ldub	[%o0], %o0		!	value = *(char *)addr;
51480:	btst	2, %o1
5149	bnz,a	0f			! if (len & 2)
5150	 lduh	[%o0], %o0		!	value = *(short *)addr;
51510:	btst	4, %o1
5152	bnz,a	0f			! if (len & 4)
5153	 ld	[%o0], %o0		!	value = *(int *)addr;
51540:	retl				! made it, clear onfault and return
5155	 st	%g0, [%o2 + PCB_ONFAULT]
5156
5157/*
5158 * probeset(addr, size, val) void *addr; int size, val;
5159 *
5160 * As above, but we return 0 on success.
5161 */
5162ENTRY(probeset)
5163	! %o0 = addr, %o1 = (1,2,4), %o2 = val
5164	sethi	%hi(cpcb), %o3
5165	ld	[%o3 + %lo(cpcb)], %o3	! cpcb->pcb_onfault = sparc_fsbail;
5166	set	sparc_fsbail, %o5
5167	st	%o5, [%o3 + PCB_ONFAULT]
5168	btst	1, %o1
5169	bnz,a	0f			! if (len & 1)
5170	 stb	%o2, [%o0]		!	*(char *)addr = value;
51710:	btst	2, %o1
5172	bnz,a	0f			! if (len & 2)
5173	 sth	%o2, [%o0]		!	*(short *)addr = value;
51740:	btst	4, %o1
5175	bnz,a	0f			! if (len & 4)
5176	 st	%o2, [%o0]		!	*(int *)addr = value;
51770:	clr	%o0			! made it, clear onfault and return 0
5178	retl
5179	 st	%g0, [%o3 + PCB_ONFAULT]
5180
5181/*
5182 * int xldcontrolb(void *, pcb)
5183 *		    %o0     %o1
5184 *
5185 * read a byte from the specified address in ASI_CONTROL space.
5186 */
5187ENTRY(xldcontrolb)
5188	!sethi	%hi(cpcb), %o2
5189	!ld	[%o2 + %lo(cpcb)], %o2	! cpcb->pcb_onfault = sparc_fsbail;
5190	or	%o1, %g0, %o2		! %o2 = %o1
5191	set	_C_LABEL(sparc_fsbail), %o5
5192	st	%o5, [%o2 + PCB_ONFAULT]
5193	lduba	[%o0] ASI_CONTROL, %o0	! read
51940:	retl
5195	 st	%g0, [%o2 + PCB_ONFAULT]
5196
5197/*
5198 * int fkbyte(void *, pcb)
5199 *	      %o0      %o1
5200 *
5201 * Just like fubyte(), but for kernel space.
5202 * (currently used to work around unexplained transient bus errors
5203 *  when reading the VME interrupt vector)
5204 */
5205ENTRY(fkbyte)
5206	or	%o1, %g0, %o2		! %o2 = %o1
5207	set	_C_LABEL(sparc_fsbail), %o5
5208	st	%o5, [%o2 + PCB_ONFAULT]
5209	ldub	[%o0], %o0		! fetch the byte
5210	retl				! made it
5211	 st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
5212
5213
5214/*
5215 * copywords(src, dst, nbytes)
5216 *
5217 * Copy `nbytes' bytes from src to dst, both of which are word-aligned;
5218 * nbytes is a multiple of four.  It may, however, be zero, in which case
5219 * nothing is to be copied.
5220 */
5221ENTRY(copywords)
5222	! %o0 = src, %o1 = dst, %o2 = nbytes
5223	b	1f
5224	deccc	4, %o2
52250:
5226	st	%o3, [%o1 + %o2]
5227	deccc	4, %o2			! while ((n -= 4) >= 0)
52281:
5229	bge,a	0b			!    *(int *)(dst+n) = *(int *)(src+n);
5230	ld	[%o0 + %o2], %o3
5231	retl
5232	nop
5233
5234/*
5235 * qcopy(src, dst, nbytes)
5236 *
5237 * (q for `quad' or `quick', as opposed to b for byte/block copy)
5238 *
5239 * Just like copywords, but everything is multiples of 8.
5240 */
5241ENTRY(qcopy)
5242	b	1f
5243	deccc	8, %o2
52440:
5245	std	%o4, [%o1 + %o2]
5246	deccc	8, %o2
52471:
5248	bge,a	0b
5249	ldd	[%o0 + %o2], %o4
5250	retl
5251	nop
5252
5253/*
5254 * qzero(addr, nbytes)
5255 *
5256 * Zeroes `nbytes' bytes of a quad-aligned virtual address,
5257 * where nbytes is itself a multiple of 8.
5258 */
5259ENTRY(qzero)
5260	! %o0 = addr, %o1 = len (in bytes)
5261	clr	%g1
52620:
5263	deccc	8, %o1			! while ((n =- 8) >= 0)
5264	bge,a	0b
5265	std	%g0, [%o0 + %o1]	!	*(quad *)(addr + n) = 0;
5266	retl
5267	nop
5268
5269/*
5270 * kernel bcopy
5271 * Assumes regions do not overlap; has no useful return value.
5272 *
5273 * Must not use %g7 (see copyin/copyout above).
5274 */
5275
5276#define	BCOPY_SMALL	32	/* if < 32, copy by bytes */
5277
5278ENTRY(bcopy)
5279	cmp	%o2, BCOPY_SMALL
5280	bge,a	Lbcopy_fancy	! if >= this many, go be fancy.
5281	btst	7, %o0		! (part of being fancy)
5282
5283	/*
5284	 * Not much to copy, just do it a byte at a time.
5285	 */
5286	deccc	%o2		! while (--len >= 0)
5287	bl	1f
5288	 .empty
52890:
5290	inc	%o0
5291	ldsb	[%o0 - 1], %o4	!	(++dst)[-1] = *src++;
5292	stb	%o4, [%o1]
5293	deccc	%o2
5294	bge	0b
5295	inc	%o1
52961:
5297	retl
5298	 nop
5299	/* NOTREACHED */
5300
5301	/*
5302	 * Plenty of data to copy, so try to do it optimally.
5303	 */
5304Lbcopy_fancy:
5305	! check for common case first: everything lines up.
5306!	btst	7, %o0		! done already
5307	bne	1f
5308	 .empty
5309	btst	7, %o1
5310	be,a	Lbcopy_doubles
5311	dec	8, %o2		! if all lined up, len -= 8, goto bcopy_doubes
5312
5313	! If the low bits match, we can make these line up.
53141:
5315	xor	%o0, %o1, %o3	! t = src ^ dst;
5316	btst	1, %o3		! if (t & 1) {
5317	be,a	1f
5318	btst	1, %o0		! [delay slot: if (src & 1)]
5319
5320	! low bits do not match, must copy by bytes.
53210:
5322	ldsb	[%o0], %o4	!	do {
5323	inc	%o0		!		(++dst)[-1] = *src++;
5324	inc	%o1
5325	deccc	%o2
5326	bnz	0b		!	} while (--len != 0);
5327	stb	%o4, [%o1 - 1]
5328	retl
5329	 nop
5330	/* NOTREACHED */
5331
5332	! lowest bit matches, so we can copy by words, if nothing else
53331:
5334	be,a	1f		! if (src & 1) {
5335	btst	2, %o3		! [delay slot: if (t & 2)]
5336
5337	! although low bits match, both are 1: must copy 1 byte to align
5338	ldsb	[%o0], %o4	!	*dst++ = *src++;
5339	stb	%o4, [%o1]
5340	inc	%o0
5341	inc	%o1
5342	dec	%o2		!	len--;
5343	btst	2, %o3		! } [if (t & 2)]
53441:
5345	be,a	1f		! if (t & 2) {
5346	btst	2, %o0		! [delay slot: if (src & 2)]
5347	dec	2, %o2		!	len -= 2;
53480:
5349	ldsh	[%o0], %o4	!	do {
5350	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
5351	inc	2, %o0		!		dst += 2, src += 2;
5352	deccc	2, %o2		!	} while ((len -= 2) >= 0);
5353	bge	0b
5354	inc	2, %o1
5355	b	Lbcopy_mopb	!	goto mop_up_byte;
5356	btst	1, %o2		! } [delay slot: if (len & 1)]
5357	/* NOTREACHED */
5358
5359	! low two bits match, so we can copy by longwords
53601:
5361	be,a	1f		! if (src & 2) {
5362	btst	4, %o3		! [delay slot: if (t & 4)]
5363
5364	! although low 2 bits match, they are 10: must copy one short to align
5365	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
5366	sth	%o4, [%o1]
5367	inc	2, %o0		!	dst += 2;
5368	inc	2, %o1		!	src += 2;
5369	dec	2, %o2		!	len -= 2;
5370	btst	4, %o3		! } [if (t & 4)]
53711:
5372	be,a	1f		! if (t & 4) {
5373	btst	4, %o0		! [delay slot: if (src & 4)]
5374	dec	4, %o2		!	len -= 4;
53750:
5376	ld	[%o0], %o4	!	do {
5377	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
5378	inc	4, %o0		!		dst += 4, src += 4;
5379	deccc	4, %o2		!	} while ((len -= 4) >= 0);
5380	bge	0b
5381	inc	4, %o1
5382	b	Lbcopy_mopw	!	goto mop_up_word_and_byte;
5383	btst	2, %o2		! } [delay slot: if (len & 2)]
5384	/* NOTREACHED */
5385
5386	! low three bits match, so we can copy by doublewords
53871:
5388	be	1f		! if (src & 4) {
5389	dec	8, %o2		! [delay slot: len -= 8]
5390	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
5391	st	%o4, [%o1]
5392	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
5393	inc	4, %o1
5394	dec	4, %o2		! }
53951:
5396Lbcopy_doubles:
5397	ldd	[%o0], %o4	! do {
5398	std	%o4, [%o1]	!	*(double *)dst = *(double *)src;
5399	inc	8, %o0		!	dst += 8, src += 8;
5400	deccc	8, %o2		! } while ((len -= 8) >= 0);
5401	bge	Lbcopy_doubles
5402	inc	8, %o1
5403
5404	! check for a usual case again (save work)
5405	btst	7, %o2		! if ((len & 7) == 0)
5406	be	Lbcopy_done	!	goto bcopy_done;
5407
5408	btst	4, %o2		! if ((len & 4)) == 0)
5409	be,a	Lbcopy_mopw	!	goto mop_up_word_and_byte;
5410	btst	2, %o2		! [delay slot: if (len & 2)]
5411	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
5412	st	%o4, [%o1]
5413	inc	4, %o0		!	dst += 4;
5414	inc	4, %o1		!	src += 4;
5415	btst	2, %o2		! } [if (len & 2)]
5416
54171:
5418	! mop up trailing word (if present) and byte (if present).
5419Lbcopy_mopw:
5420	be	Lbcopy_mopb	! no word, go mop up byte
5421	btst	1, %o2		! [delay slot: if (len & 1)]
5422	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
5423	be	Lbcopy_done	! if ((len & 1) == 0) goto done;
5424	sth	%o4, [%o1]
5425	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
5426	retl
5427	 stb	%o4, [%o1 + 2]
5428	/* NOTREACHED */
5429
5430	! mop up trailing byte (if present).
5431Lbcopy_mopb:
5432	bne,a	1f
5433	ldsb	[%o0], %o4
5434
5435Lbcopy_done:
5436	retl
5437	 nop
5438
54391:
5440	retl
5441	 stb	%o4,[%o1]
5442
5443/*
5444 * kcopy() is exactly like bcopy except that it set pcb_onfault such that
5445 * when a fault occurs, it is able to return -1 to indicate this to the
5446 * caller.
5447 */
5448ENTRY(kcopy)
5449	sethi	%hi(cpcb), %o5		! cpcb->pcb_onfault = Lkcerr;
5450	ld	[%o5 + %lo(cpcb)], %o5
5451	set	Lkcerr, %o3
5452	ld	[%o5 + PCB_ONFAULT], %g1! save current onfault handler
5453	st	%o3, [%o5 + PCB_ONFAULT]
5454
5455	cmp	%o2, BCOPY_SMALL
5456Lkcopy_start:
5457	bge,a	Lkcopy_fancy	! if >= this many, go be fancy.
5458	 btst	7, %o0		! (part of being fancy)
5459
5460	/*
5461	 * Not much to copy, just do it a byte at a time.
5462	 */
5463	deccc	%o2		! while (--len >= 0)
5464	bl	1f
5465	 .empty
54660:
5467	ldsb	[%o0], %o4	!	*dst++ = *src++;
5468	inc	%o0
5469	stb	%o4, [%o1]
5470	deccc	%o2
5471	bge	0b
5472	 inc	%o1
54731:
5474	st	%g1, [%o5 + PCB_ONFAULT]	! restore onfault
5475	retl
5476	 mov	0, %o0		! delay slot: return success
5477	/* NOTREACHED */
5478
5479	/*
5480	 * Plenty of data to copy, so try to do it optimally.
5481	 */
5482Lkcopy_fancy:
5483	! check for common case first: everything lines up.
5484!	btst	7, %o0		! done already
5485	bne	1f
5486	 .empty
5487	btst	7, %o1
5488	be,a	Lkcopy_doubles
5489	 dec	8, %o2		! if all lined up, len -= 8, goto bcopy_doubes
5490
5491	! If the low bits match, we can make these line up.
54921:
5493	xor	%o0, %o1, %o3	! t = src ^ dst;
5494	btst	1, %o3		! if (t & 1) {
5495	be,a	1f
5496	 btst	1, %o0		! [delay slot: if (src & 1)]
5497
5498	! low bits do not match, must copy by bytes.
54990:
5500	ldsb	[%o0], %o4	!	do {
5501	inc	%o0		!		*dst++ = *src++;
5502	stb	%o4, [%o1]
5503	deccc	%o2
5504	bnz	0b		!	} while (--len != 0);
5505	 inc	%o1
5506	st	%g1, [%o5 + PCB_ONFAULT]	! restore onfault
5507	retl
5508	 mov	0, %o0		! delay slot: return success
5509	/* NOTREACHED */
5510
5511	! lowest bit matches, so we can copy by words, if nothing else
55121:
5513	be,a	1f		! if (src & 1) {
5514	 btst	2, %o3		! [delay slot: if (t & 2)]
5515
5516	! although low bits match, both are 1: must copy 1 byte to align
5517	ldsb	[%o0], %o4	!	*dst++ = *src++;
5518	inc	%o0
5519	stb	%o4, [%o1]
5520	dec	%o2		!	len--;
5521	inc	%o1
5522	btst	2, %o3		! } [if (t & 2)]
55231:
5524	be,a	1f		! if (t & 2) {
5525	 btst	2, %o0		! [delay slot: if (src & 2)]
5526	dec	2, %o2		!	len -= 2;
55270:
5528	ldsh	[%o0], %o4	!	do {
5529	inc	2, %o0		!		dst += 2, src += 2;
5530	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
5531	deccc	2, %o2		!	} while ((len -= 2) >= 0);
5532	bge	0b
5533	 inc	2, %o1
5534	b	Lkcopy_mopb	!	goto mop_up_byte;
5535	 btst	1, %o2		! } [delay slot: if (len & 1)]
5536	/* NOTREACHED */
5537
5538	! low two bits match, so we can copy by longwords
55391:
5540	be,a	1f		! if (src & 2) {
5541	 btst	4, %o3		! [delay slot: if (t & 4)]
5542
5543	! although low 2 bits match, they are 10: must copy one short to align
5544	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
5545	inc	2, %o0		!	dst += 2;
5546	sth	%o4, [%o1]
5547	dec	2, %o2		!	len -= 2;
5548	inc	2, %o1		!	src += 2;
5549	btst	4, %o3		! } [if (t & 4)]
55501:
5551	be,a	1f		! if (t & 4) {
5552	 btst	4, %o0		! [delay slot: if (src & 4)]
5553	dec	4, %o2		!	len -= 4;
55540:
5555	ld	[%o0], %o4	!	do {
5556	inc	4, %o0		!		dst += 4, src += 4;
5557	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
5558	deccc	4, %o2		!	} while ((len -= 4) >= 0);
5559	bge	0b
5560	 inc	4, %o1
5561	b	Lkcopy_mopw	!	goto mop_up_word_and_byte;
5562	 btst	2, %o2		! } [delay slot: if (len & 2)]
5563	/* NOTREACHED */
5564
5565	! low three bits match, so we can copy by doublewords
55661:
5567	be	1f		! if (src & 4) {
5568	 dec	8, %o2		! [delay slot: len -= 8]
5569	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
5570	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
5571	st	%o4, [%o1]
5572	dec	4, %o2		! }
5573	inc	4, %o1
55741:
5575Lkcopy_doubles:
5576	! swap %o4 with %o2 during doubles copy, since %o5 is verboten
5577	mov     %o2, %o4
5578Lkcopy_doubles2:
5579	ldd	[%o0], %o2	! do {
5580	inc	8, %o0		!	dst += 8, src += 8;
5581	std	%o2, [%o1]	!	*(double *)dst = *(double *)src;
5582	deccc	8, %o4		! } while ((len -= 8) >= 0);
5583	bge	Lkcopy_doubles2
5584	 inc	8, %o1
5585	mov	%o4, %o2	! restore len
5586
5587	! check for a usual case again (save work)
5588	btst	7, %o2		! if ((len & 7) == 0)
5589	be	Lkcopy_done	!	goto bcopy_done;
5590
5591	 btst	4, %o2		! if ((len & 4)) == 0)
5592	be,a	Lkcopy_mopw	!	goto mop_up_word_and_byte;
5593	 btst	2, %o2		! [delay slot: if (len & 2)]
5594	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
5595	inc	4, %o0		!	dst += 4;
5596	st	%o4, [%o1]
5597	inc	4, %o1		!	src += 4;
5598	btst	2, %o2		! } [if (len & 2)]
5599
56001:
5601	! mop up trailing word (if present) and byte (if present).
5602Lkcopy_mopw:
5603	be	Lkcopy_mopb	! no word, go mop up byte
5604	 btst	1, %o2		! [delay slot: if (len & 1)]
5605	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
5606	be	Lkcopy_done	! if ((len & 1) == 0) goto done;
5607	 sth	%o4, [%o1]
5608	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
5609	stb	%o4, [%o1 + 2]
5610	st	%g1, [%o5 + PCB_ONFAULT]! restore onfault
5611	retl
5612	 mov	0, %o0		! delay slot: return success
5613	/* NOTREACHED */
5614
5615	! mop up trailing byte (if present).
5616Lkcopy_mopb:
5617	bne,a	1f
5618	 ldsb	[%o0], %o4
5619
5620Lkcopy_done:
5621	st	%g1, [%o5 + PCB_ONFAULT]	! restore onfault
5622	retl
5623	 mov	0, %o0		! delay slot: return success
5624	/* NOTREACHED */
5625
56261:
5627	stb	%o4, [%o1]
5628	st	%g1, [%o5 + PCB_ONFAULT]	! restore onfault
5629	retl
5630	 mov	0, %o0		! delay slot: return success
5631	/* NOTREACHED */
5632
5633Lkcerr:
5634	retl
5635	 st	%g1, [%o5 + PCB_ONFAULT]	! restore onfault
5636	/* NOTREACHED */
5637
5638/*
5639 * savefpstate(struct fpstate *f);
5640 * ipi_savefpstate(struct fpstate *f);
5641 *
5642 * Store the current FPU state.  The first `st %fsr' may cause a trap;
5643 * our trap handler knows how to recover (by `returning' to savefpcont).
5644 *
5645 * The IPI version just deals with updating event counters first.
5646 */
5647ENTRY(ipi_savefpstate)
5648	sethi	%hi(CPUINFO_VA+CPUINFO_SAVEFPSTATE), %o5
5649	ldd	[%o5 + %lo(CPUINFO_VA+CPUINFO_SAVEFPSTATE)], %o2
5650	inccc   %o3
5651	addx    %o2, 0, %o2
5652	std	%o2, [%o5 + CPUINFO_SAVEFPSTATE]
5653
5654ENTRY(savefpstate)
5655	cmp	%o0, 0
5656	rd	%psr, %o1		! enable FP before we begin
5657	set	PSR_EF, %o2
5658	or	%o1, %o2, %o1
5659	wr	%o1, 0, %psr
5660	/* do some setup work while we wait for PSR_EF to turn on */
5661	set	FSR_QNE, %o5		! QNE = 0x2000, too big for immediate
5662	clr	%o3			! qsize = 0;
5663	nop				! (still waiting for PSR_EF)
5664special_fp_store:
5665	st	%fsr, [%o0 + FS_FSR]	! f->fs_fsr = getfsr();
5666	/*
5667	 * Even if the preceding instruction did not trap, the queue
5668	 * is not necessarily empty: this state save might be happening
5669	 * because user code tried to store %fsr and took the FPU
5670	 * from `exception pending' mode to `exception' mode.
5671	 * So we still have to check the blasted QNE bit.
5672	 * With any luck it will usually not be set.
5673	 */
5674	ld	[%o0 + FS_FSR], %o2	! if (f->fs_fsr & QNE)
5675	btst	%o5, %o2
5676	bnz	Lfp_storeq		!	goto storeq;
5677	 std	%f0, [%o0 + FS_REGS + (4*0)]	! f->fs_f0 = etc;
5678Lfp_finish:
5679	st	%o3, [%o0 + FS_QSIZE]	! f->fs_qsize = qsize;
5680	std	%f2, [%o0 + FS_REGS + (4*2)]
5681	std	%f4, [%o0 + FS_REGS + (4*4)]
5682	std	%f6, [%o0 + FS_REGS + (4*6)]
5683	std	%f8, [%o0 + FS_REGS + (4*8)]
5684	std	%f10, [%o0 + FS_REGS + (4*10)]
5685	std	%f12, [%o0 + FS_REGS + (4*12)]
5686	std	%f14, [%o0 + FS_REGS + (4*14)]
5687	std	%f16, [%o0 + FS_REGS + (4*16)]
5688	std	%f18, [%o0 + FS_REGS + (4*18)]
5689	std	%f20, [%o0 + FS_REGS + (4*20)]
5690	std	%f22, [%o0 + FS_REGS + (4*22)]
5691	std	%f24, [%o0 + FS_REGS + (4*24)]
5692	std	%f26, [%o0 + FS_REGS + (4*26)]
5693	std	%f28, [%o0 + FS_REGS + (4*28)]
5694	retl
5695	 std	%f30, [%o0 + FS_REGS + (4*30)]
5696
5697/*
5698 * Store the (now known nonempty) FP queue.
5699 * We have to reread the fsr each time in order to get the new QNE bit.
5700 */
5701Lfp_storeq:
5702	add	%o0, FS_QUEUE, %o1	! q = &f->fs_queue[0];
57031:
5704	std	%fq, [%o1 + %o3]	! q[qsize++] = fsr_qfront();
5705	st	%fsr, [%o0 + FS_FSR]	! reread fsr
5706	ld	[%o0 + FS_FSR], %o4	! if fsr & QNE, loop
5707	btst	%o5, %o4
5708	bnz	1b
5709	 inc	8, %o3
5710	st	%o2, [%o0 + FS_FSR]	! fs->fs_fsr = original_fsr
5711	b	Lfp_finish		! set qsize and finish storing fregs
5712	 srl	%o3, 3, %o3		! (but first fix qsize)
5713
5714/*
5715 * The fsr store trapped.  Do it again; this time it will not trap.
5716 * We could just have the trap handler return to the `st %fsr', but
5717 * if for some reason it *does* trap, that would lock us into a tight
5718 * loop.  This way we panic instead.  Whoopee.
5719 */
5720savefpcont:
5721	b	special_fp_store + 4	! continue
5722	 st	%fsr, [%o0 + FS_FSR]	! but first finish the %fsr store
5723
5724/*
5725 * Load FPU state.
5726 */
5727ENTRY(loadfpstate)
5728	rd	%psr, %o1		! enable FP before we begin
5729	set	PSR_EF, %o2
5730	or	%o1, %o2, %o1
5731	wr	%o1, 0, %psr
5732	nop; nop; nop			! paranoia
5733	ldd	[%o0 + FS_REGS + (4*0)], %f0
5734	ldd	[%o0 + FS_REGS + (4*2)], %f2
5735	ldd	[%o0 + FS_REGS + (4*4)], %f4
5736	ldd	[%o0 + FS_REGS + (4*6)], %f6
5737	ldd	[%o0 + FS_REGS + (4*8)], %f8
5738	ldd	[%o0 + FS_REGS + (4*10)], %f10
5739	ldd	[%o0 + FS_REGS + (4*12)], %f12
5740	ldd	[%o0 + FS_REGS + (4*14)], %f14
5741	ldd	[%o0 + FS_REGS + (4*16)], %f16
5742	ldd	[%o0 + FS_REGS + (4*18)], %f18
5743	ldd	[%o0 + FS_REGS + (4*20)], %f20
5744	ldd	[%o0 + FS_REGS + (4*22)], %f22
5745	ldd	[%o0 + FS_REGS + (4*24)], %f24
5746	ldd	[%o0 + FS_REGS + (4*26)], %f26
5747	ldd	[%o0 + FS_REGS + (4*28)], %f28
5748	ldd	[%o0 + FS_REGS + (4*30)], %f30
5749	retl
5750	 ld	[%o0 + FS_FSR], %fsr	! setfsr(f->fs_fsr);
5751
5752/*
5753 * ienab_bis(bis) int bis;
5754 * ienab_bic(bic) int bic;
5755 *
5756 * Set and clear bits in the sun4/sun4c interrupt register.
5757 */
5758
5759#if defined(SUN4) || defined(SUN4C)
5760/*
5761 * Since there are no read-modify-write instructions for this,
5762 * and one of the interrupts is nonmaskable, we must disable traps.
5763 */
5764ENTRY(ienab_bis)
5765	! %o0 = bits to set
5766	rd	%psr, %o2
5767	wr	%o2, PSR_ET, %psr	! disable traps
5768	nop; nop			! 3-instr delay until ET turns off
5769	sethi	%hi(INTRREG_VA), %o3
5770	ldub	[%o3 + %lo(INTRREG_VA)], %o4
5771	or	%o4, %o0, %o4		! *INTRREG_VA |= bis;
5772	stb	%o4, [%o3 + %lo(INTRREG_VA)]
5773	wr	%o2, 0, %psr		! reenable traps
5774	nop
5775	retl
5776	 nop
5777
5778ENTRY(ienab_bic)
5779	! %o0 = bits to clear
5780	rd	%psr, %o2
5781	wr	%o2, PSR_ET, %psr	! disable traps
5782	nop; nop
5783	sethi	%hi(INTRREG_VA), %o3
5784	ldub	[%o3 + %lo(INTRREG_VA)], %o4
5785	andn	%o4, %o0, %o4		! *INTRREG_VA &=~ bic;
5786	stb	%o4, [%o3 + %lo(INTRREG_VA)]
5787	wr	%o2, 0, %psr		! reenable traps
5788	nop
5789	retl
5790	 nop
5791#endif	/* SUN4 || SUN4C */
5792
5793#if defined(SUN4M)
5794/*
5795 * raise(cpu, level)
5796 */
5797ENTRY(raise)
5798#if !defined(MSIIEP) /* normal suns */
5799	! *(ICR_PI_SET + cpu*_MAXNBPG) = PINTR_SINTRLEV(level)
5800	sethi	%hi(1 << 16), %o2
5801	sll	%o2, %o1, %o2
5802	set	ICR_PI_SET, %o1
5803	set	_MAXNBPG, %o3
58041:
5805	subcc	%o0, 1, %o0
5806	bpos,a	1b
5807	 add	%o1, %o3, %o1
5808	retl
5809	 st	%o2, [%o1]
5810#else /* MSIIEP - ignore %o0, only one CPU ever */
5811	mov	1, %o2
5812	xor	%o1, 8, %o1	! change 'endianness' of the shift distance
5813	sethi	%hi(MSIIEP_PCIC_VA), %o0
5814	sll	%o2, %o1, %o2
5815	retl
5816	 sth	%o2, [%o0 + PCIC_SOFT_INTR_SET_REG]
5817#endif
5818
5819/*
5820 * Read Synchronous Fault Status registers.
5821 * On entry: %l1 == PC, %l3 == fault type, %l4 == storage, %l7 == return address
5822 * Only use %l5 and %l6.
5823 * Note: not C callable.
5824 */
5825_ENTRY(_C_LABEL(srmmu_get_syncflt))
5826_ENTRY(_C_LABEL(hypersparc_get_syncflt))
5827	set	SRMMU_SFAR, %l5
5828	lda	[%l5] ASI_SRMMU, %l5	! sync virt addr; must be read first
5829	st	%l5, [%l4 + 4]		! => dump.sfva
5830	set	SRMMU_SFSR, %l5
5831	lda	[%l5] ASI_SRMMU, %l5	! get sync fault status register
5832	jmp	%l7 + 8			! return to caller
5833	 st	%l5, [%l4]		! => dump.sfsr
5834
5835_ENTRY(_C_LABEL(viking_get_syncflt))
5836_ENTRY(_C_LABEL(ms1_get_syncflt))
5837_ENTRY(_C_LABEL(swift_get_syncflt))
5838_ENTRY(_C_LABEL(turbosparc_get_syncflt))
5839_ENTRY(_C_LABEL(cypress_get_syncflt))
5840	cmp	%l3, T_TEXTFAULT
5841	be,a	1f
5842	 mov	%l1, %l5		! use PC if type == T_TEXTFAULT
5843
5844	set	SRMMU_SFAR, %l5
5845	lda	[%l5] ASI_SRMMU, %l5	! sync virt addr; must be read first
58461:
5847	st	%l5, [%l4 + 4]		! => dump.sfva
5848
5849	set	SRMMU_SFSR, %l5
5850	lda	[%l5] ASI_SRMMU, %l5	! get sync fault status register
5851	jmp	%l7 + 8			! return to caller
5852	 st	%l5, [%l4]		! => dump.sfsr
5853
5854#if defined(MULTIPROCESSOR) && 0 /* notyet */
5855/*
5856 * Read Synchronous Fault Status registers.
5857 * On entry: %o0 == &sfsr, %o1 == &sfar
5858 */
5859_ENTRY(_C_LABEL(smp_get_syncflt))
5860	save    %sp, -CCFSZ, %sp
5861
5862	sethi	%hi(CPUINFO_VA+CPUINFO_GETSYNCFLT), %o4
5863	ld	[%l4 + %lo(CPUINFO_VA+CPUINFO_GETSYNCFLT)], %o5
5864	clr	%l1
5865	clr	%l3
5866	sethi	%hi(CPUINFO_VA+CPUINFO_SYNCFLTDUMP), %o4
5867	jmpl	%o5, %l7
5868	 or	%o4, %lo(CPUINFO_VA+CPUINFO_SYNCFLTDUMP), %l4
5869
5870	! load values out of the dump
5871	sethi	%hi(CPUINFO_VA+CPUINFO_SYNCFLTDUMP), %o4
5872	ld	[%o4 + %lo(CPUINFO_VA+CPUINFO_SYNCFLTDUMP)], %o5
5873	st	%o5, [%i0]
5874	sethi	%hi(CPUINFO_VA+CPUINFO_SYNCFLTDUMP+4), %o4
5875	ld	[%o4 + %lo(CPUINFO_VA+CPUINFO_SYNCFLTDUMP+4)], %o5
5876	st	%o5, [%i1]
5877	ret
5878	 restore
5879#endif /* MULTIPROCESSOR */
5880
5881/*
5882 * Read Asynchronous Fault Status registers.
5883 * On entry: %o0 == &afsr, %o1 == &afar
5884 * Return 0 if async register are present.
5885 */
5886_ENTRY(_C_LABEL(srmmu_get_asyncflt))
5887	set	SRMMU_AFAR, %o4
5888	lda	[%o4] ASI_SRMMU, %o4	! get async fault address
5889	set	SRMMU_AFSR, %o3	!
5890	st	%o4, [%o1]
5891	lda	[%o3] ASI_SRMMU, %o3	! get async fault status
5892	st	%o3, [%o0]
5893	retl
5894	 clr	%o0			! return value
5895
5896_ENTRY(_C_LABEL(cypress_get_asyncflt))
5897_ENTRY(_C_LABEL(hypersparc_get_asyncflt))
5898	set	SRMMU_AFSR, %o3		! must read status before fault on HS
5899	lda	[%o3] ASI_SRMMU, %o3	! get async fault status
5900	st	%o3, [%o0]
5901	btst	AFSR_AFO, %o3		! and only read fault address
5902	bz	1f			! if valid.
5903	set	SRMMU_AFAR, %o4
5904	lda	[%o4] ASI_SRMMU, %o4	! get async fault address
5905	clr	%o0			! return value
5906	retl
5907	 st	%o4, [%o1]
59081:
5909	retl
5910	 clr	%o0			! return value
5911
5912_ENTRY(_C_LABEL(no_asyncflt_regs))
5913	retl
5914	 mov	1, %o0			! return value
5915
5916_ENTRY(_C_LABEL(hypersparc_pure_vcache_flush))
5917	/*
5918	 * Flush entire on-chip instruction cache, which is
5919	 * a pure vitually-indexed/virtually-tagged cache.
5920	 */
5921	retl
5922	 sta	%g0, [%g0] ASI_HICACHECLR
5923
5924#endif /* SUN4M */
5925
5926
5927/*
5928 * delay function
5929 *
5930 * void delay(N)  -- delay N microseconds
5931 *
5932 * Register usage: %o0 = "N" number of usecs to go (counts down to zero)
5933 *		   %o1 = "timerblurb" (stays constant)
5934 *		   %o2 = counter for 1 usec (counts down from %o1 to zero)
5935 *
5936 */
5937
5938ENTRY(delay)			! %o0 = n
5939	subcc	%o0, %g0, %g0
5940	be	2f
5941
5942	sethi	%hi(_C_LABEL(timerblurb)), %o1
5943	ld	[%o1 + %lo(_C_LABEL(timerblurb))], %o1	! %o1 = timerblurb
5944
5945	 addcc	%o1, %g0, %o2		! %o2 = cntr (start @ %o1), clear CCs
5946					! first time through only
5947
5948					! delay 1 usec
59491:	bne	1b			! come back here if not done
5950	 subcc	%o2, 1, %o2		! %o2 = %o2 - 1 [delay slot]
5951
5952	subcc	%o0, 1, %o0		! %o0 = %o0 - 1
5953	bne	1b			! done yet?
5954	 addcc	%o1, %g0, %o2		! reinit %o2 and CCs  [delay slot]
5955					! harmless if not branching
59562:
5957	retl				! return
5958	 nop				! [delay slot]
5959
5960
5961/*
5962 * void __cpu_simple_lock(__cpu_simple_lock_t *alp)
5963 */
5964ENTRY_NOPROFILE(__cpu_simple_lock)
59650:
5966	ldstub	[%o0], %o1
5967	tst	%o1
5968	bnz,a	2f
5969	 ldub	[%o0], %o1
59701:
5971	retl
5972	 .empty
59732:
5974	set	0x1000000, %o2	! set spinout counter
59753:
5976	tst	%o1
5977	bz	0b		! lock has been released; try again
5978	deccc	%o2
5979	bcc,a	3b		! repeat until counter < 0
5980	 ldub	[%o0], %o1
5981
5982	! spun out; check if already panicking
5983	sethi	%hi(_C_LABEL(panicstr)), %o2
5984	ld	[%o2 + %lo(_C_LABEL(panicstr))], %o1
5985	tst	%o1
5986	! if so, just take the lock and return on the assumption that
5987	! in panic mode we're running on a single CPU anyway.
5988	bnz,a	1b
5989	 ldstub	[%o0], %g0
5990
5991	! set up stack frame and call panic
5992	save	%sp, -CCFSZ, %sp
5993	sethi	%hi(CPUINFO_VA + CPUINFO_CPUNO), %o0
5994	ld	[%o0 + %lo(CPUINFO_VA + CPUINFO_CPUNO)], %o1
5995	mov	%i0, %o2
5996	sethi	%hi(Lpanic_spunout), %o0
5997	call	_C_LABEL(panic)
5998	or	%o0, %lo(Lpanic_spunout), %o0
5999
6000Lpanic_spunout:
6001	.asciz	"cpu%d: stuck on lock@%x"
6002	_ALIGN
6003
6004#if defined(KGDB) || defined(DDB) || defined(DIAGNOSTIC)
6005/*
6006 * Write all windows (user or otherwise), except the current one.
6007 *
6008 * THIS COULD BE DONE IN USER CODE
6009 */
6010ENTRY(write_all_windows)
6011	/*
6012	 * g2 = g1 = nwindows - 1;
6013	 * while (--g1 > 0) save();
6014	 * while (--g2 > 0) restore();
6015	 */
6016	sethi	%hi(_C_LABEL(nwindows)), %g1
6017	ld	[%g1 + %lo(_C_LABEL(nwindows))], %g1
6018	dec	%g1
6019	mov	%g1, %g2
6020
60211:	deccc	%g1
6022	bg,a	1b
6023	 save	%sp, -64, %sp
6024
60252:	deccc	%g2
6026	bg,a	2b
6027	 restore
6028
6029	retl
6030	nop
6031#endif /* KGDB */
6032
6033ENTRY(setjmp)
6034	st	%sp, [%o0+0]	! stack pointer
6035	st	%o7, [%o0+4]	! return pc
6036	st	%fp, [%o0+8]	! frame pointer
6037	retl
6038	 clr	%o0
6039
6040Lpanic_ljmp:
6041	.asciz	"longjmp botch"
6042	_ALIGN
6043
6044ENTRY(longjmp)
6045	addcc	%o1, %g0, %g6	! compute v ? v : 1 in a global register
6046	be,a	0f
6047	 mov	1, %g6
60480:
6049	mov	%o0, %g1	! save a in another global register
6050	ld	[%g1+8], %g7	/* get caller's frame */
60511:
6052	cmp	%fp, %g7	! compare against desired frame
6053	bl,a	1b		! if below,
6054	 restore		!    pop frame and loop
6055	ld	[%g1+0], %o2	! fetch return %sp
6056	be,a	2f		! we're there, get out
6057	 ld	[%g1+4], %o3	! fetch return pc
6058
6059Llongjmpbotch:
6060				! otherwise, went too far; bomb out
6061	save	%sp, -CCFSZ, %sp	/* preserve current window */
6062	sethi	%hi(Lpanic_ljmp), %o0
6063	call	_C_LABEL(panic)
6064	or %o0, %lo(Lpanic_ljmp), %o0;
6065	unimp	0
6066
60672:
6068	cmp	%o2, %sp	! %sp must not decrease
6069	bge,a	3f
6070	 mov	%o2, %sp	! it is OK, put it in place
6071	b,a	Llongjmpbotch
60723:
6073	jmp	%o3 + 8		! success, return %g6
6074	 mov	%g6, %o0
6075
6076	.data
6077	.globl	_C_LABEL(kernel_top)
6078_C_LABEL(kernel_top):
6079	.word	0
6080	.globl	_C_LABEL(bootinfo)
6081_C_LABEL(bootinfo):
6082	.word	0
6083
6084	.comm	_C_LABEL(nwindows), 4
6085	.comm	_C_LABEL(romp), 4
6086