xref: /netbsd/sys/arch/sparc/sparc/locore.s (revision c4a72b64)
1/*	$NetBSD: locore.s,v 1.168 2002/12/08 16:16:59 pk Exp $	*/
2
3/*
4 * Copyright (c) 1996 Paul Kranenburg
5 * Copyright (c) 1996
6 * 	The President and Fellows of Harvard College. All rights reserved.
7 * Copyright (c) 1992, 1993
8 *	The Regents of the University of California.  All rights reserved.
9 *
10 * This software was developed by the Computer Systems Engineering group
11 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
12 * contributed to Berkeley.
13 *
14 * All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 *	This product includes software developed by the University of
17 *	California, Lawrence Berkeley Laboratory.
18 *	This product includes software developed by Harvard University.
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 * 1. Redistributions of source code must retain the above copyright
24 *    notice, this list of conditions and the following disclaimer.
25 * 2. Redistributions in binary form must reproduce the above copyright
26 *    notice, this list of conditions and the following disclaimer in the
27 *    documentation and/or other materials provided with the distribution.
28 * 3. All advertising materials mentioning features or use of this software
29 *    must display the following acknowledgement:
30 *	This product includes software developed by the University of
31 *	California, Berkeley and its contributors.
32 *	This product includes software developed by Harvard University.
33 *	This product includes software developed by Paul Kranenburg.
34 * 4. Neither the name of the University nor the names of its contributors
35 *    may be used to endorse or promote products derived from this software
36 *    without specific prior written permission.
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * SUCH DAMAGE.
49 *
50 *	@(#)locore.s	8.4 (Berkeley) 12/10/93
51 */
52
53#include "opt_ddb.h"
54#include "opt_kgdb.h"
55#include "opt_compat_svr4.h"
56#include "opt_compat_sunos.h"
57#include "opt_multiprocessor.h"
58#include "opt_lockdebug.h"
59
60#include "assym.h"
61#include <machine/param.h>
62#include <machine/asm.h>
63#include <sparc/sparc/intreg.h>
64#include <sparc/sparc/timerreg.h>
65#include <sparc/sparc/vaddrs.h>
66#ifdef notyet
67#include <sparc/dev/zsreg.h>
68#endif
69#include <machine/ctlreg.h>
70#include <machine/psl.h>
71#include <machine/signal.h>
72#include <machine/trap.h>
73#include <sys/syscall.h>
74
75/*
76 * GNU assembler does not understand `.empty' directive; Sun assembler
77 * gripes about labels without it.  To allow cross-compilation using
78 * the Sun assembler, and because .empty directives are useful documentation,
79 * we use this trick.
80 */
81#ifdef SUN_AS
82#define	EMPTY	.empty
83#else
84#define	EMPTY	/* .empty */
85#endif
86
87/* use as needed to align things on longword boundaries */
88#define	_ALIGN	.align 4
89
90/*
91 * CCFSZ (C Compiler Frame SiZe) is the size of a stack frame required if
92 * a function is to call C code.  It should be just 64, but Sun defined
93 * their frame with space to hold arguments 0 through 5 (plus some junk),
94 * and varargs routines (such as printf) demand this, and gcc uses this
95 * area at times anyway.
96 */
97#define	CCFSZ	96
98
99/*
100 * A handy macro for maintaining instrumentation counters.
101 * Note that this clobbers %o0 and %o1.  Normal usage is
102 * something like:
103 *	foointr:
104 *		TRAP_SETUP(...)		! makes %o registers safe
105 *		INCR(cnt+V_FOO)	! count a foo
106 */
107#define INCR(what) \
108	sethi	%hi(what), %o0; \
109	ld	[%o0 + %lo(what)], %o1; \
110	inc	%o1; \
111	st	%o1, [%o0 + %lo(what)]
112
113/*
114 * Another handy macro: load one register window, given `base' address.
115 * This can be either a simple register (e.g., %sp) or include an initial
116 * offset (e.g., %g6 + PCB_RW).
117 */
118#define	LOADWIN(addr) \
119	ldd	[addr], %l0; \
120	ldd	[addr + 8], %l2; \
121	ldd	[addr + 16], %l4; \
122	ldd	[addr + 24], %l6; \
123	ldd	[addr + 32], %i0; \
124	ldd	[addr + 40], %i2; \
125	ldd	[addr + 48], %i4; \
126	ldd	[addr + 56], %i6
127
128/*
129 * To return from trap we need the two-instruction sequence
130 * `jmp %l1; rett %l2', which is defined here for convenience.
131 */
132#define	RETT	jmp %l1; rett %l2
133
134	.data
135/*
136 * The interrupt stack.
137 *
138 * This is the very first thing in the data segment, and therefore has
139 * the lowest kernel stack address.  We count on this in the interrupt
140 * trap-frame setup code, since we may need to switch from the kernel
141 * stack to the interrupt stack (iff we are not already on the interrupt
142 * stack).  One sethi+cmp is all we need since this is so carefully
143 * arranged.
144 *
145 * In SMP kernels, each CPU has its own interrupt stack and the computation
146 * to determine whether we're already on the interrupt stack is slightly
147 * more time consuming (see INTR_SETUP() below).
148 */
149	.globl	_C_LABEL(intstack)
150	.globl	_C_LABEL(eintstack)
151_C_LABEL(intstack):
152	.skip	INT_STACK_SIZE		! 16k = 128 128-byte stack frames
153_C_LABEL(eintstack):
154
155_EINTSTACKP = CPUINFO_VA + CPUINFO_EINTSTACK
156
157/*
158 * CPUINFO_VA is a CPU-local virtual address; cpi->ci_self is a global
159 * virtual address for the same structure.  It must be stored in p->p_cpu
160 * upon context switch.
161 */
162_CISELFP = CPUINFO_VA + CPUINFO_SELF
163_CIFLAGS = CPUINFO_VA + CPUINFO_FLAGS
164
165/*
166 * When a process exits and its u. area goes away, we set cpcb to point
167 * to this `u.', leaving us with something to use for an interrupt stack,
168 * and letting all the register save code have a pcb_uw to examine.
169 * This is also carefully arranged (to come just before u0, so that
170 * process 0's kernel stack can quietly overrun into it during bootup, if
171 * we feel like doing that).
172 */
173	.globl	_C_LABEL(idle_u)
174_C_LABEL(idle_u):
175	.skip	USPACE
176/*
177 * On SMP kernels, there's an idle u-area for each CPU and we must
178 * read its location from cpuinfo.
179 */
180IDLE_UP = CPUINFO_VA + CPUINFO_IDLE_U
181
182/*
183 * Process 0's u.
184 *
185 * This must be aligned on an 8 byte boundary.
186 */
187	.globl	_C_LABEL(u0)
188_C_LABEL(u0):	.skip	USPACE
189estack0:
190
191#ifdef KGDB
192/*
193 * Another item that must be aligned, easiest to put it here.
194 */
195KGDB_STACK_SIZE = 2048
196	.globl	_C_LABEL(kgdb_stack)
197_C_LABEL(kgdb_stack):
198	.skip	KGDB_STACK_SIZE		! hope this is enough
199#endif
200
201/*
202 * cpcb points to the current pcb (and hence u. area).
203 * Initially this is the special one.
204 */
205cpcb = CPUINFO_VA + CPUINFO_CURPCB
206
207/* curproc points to the current process that has the CPU */
208curproc = CPUINFO_VA + CPUINFO_CURPROC
209
210/*
211 * cputyp is the current cpu type, used to distinguish between
212 * the many variations of different sun4* machines. It contains
213 * the value CPU_SUN4, CPU_SUN4C, or CPU_SUN4M.
214 */
215	.globl	_C_LABEL(cputyp)
216_C_LABEL(cputyp):
217	.word	1
218
219#if defined(SUN4C) || defined(SUN4M)
220cputypval:
221	.asciz	"sun4c"
222	.ascii	"     "
223cputypvar:
224	.asciz	"compatible"
225	_ALIGN
226#endif
227
228/*
229 * There variables are pointed to by the cpp symbols PGSHIFT, NBPG,
230 * and PGOFSET.
231 */
232	.globl	_C_LABEL(pgshift), _C_LABEL(nbpg), _C_LABEL(pgofset)
233_C_LABEL(pgshift):
234	.word	0
235_C_LABEL(nbpg):
236	.word	0
237_C_LABEL(pgofset):
238	.word	0
239
240	.globl	_C_LABEL(trapbase)
241_C_LABEL(trapbase):
242	.word	0
243
244#if 0
245#if defined(SUN4M)
246_mapme:
247	.asciz "0 0 f8000000 15c6a0 map-pages"
248#endif
249#endif
250
251#if !defined(SUN4D)
252sun4d_notsup:
253	.asciz	"cr .( NetBSD/sparc: this kernel does not support the sun4d) cr"
254#endif
255#if !defined(SUN4M)
256sun4m_notsup:
257	.asciz	"cr .( NetBSD/sparc: this kernel does not support the sun4m) cr"
258#endif
259#if !defined(SUN4C)
260sun4c_notsup:
261	.asciz	"cr .( NetBSD/sparc: this kernel does not support the sun4c) cr"
262#endif
263#if !defined(SUN4)
264sun4_notsup:
265	! the extra characters at the end are to ensure the zs fifo drains
266	! before we halt. Sick, eh?
267	.asciz	"NetBSD/sparc: this kernel does not support the sun4\n\r \b"
268#endif
269	_ALIGN
270
271	.text
272
273/*
274 * The first thing in the real text segment is the trap vector table,
275 * which must be aligned on a 4096 byte boundary.  The text segment
276 * starts beyond page 0 of KERNBASE so that there is a red zone
277 * between user and kernel space.  Since the boot ROM loads us at
278 * PROM_LOADADDR, it is far easier to start at KERNBASE+PROM_LOADADDR than to
279 * buck the trend.  This is two or four pages in (depending on if
280 * pagesize is 8192 or 4096).    We place two items in this area:
281 * the message buffer (phys addr 0) and the cpu_softc structure for
282 * the first processor in the system (phys addr 0x2000).
283 * Because the message buffer is in our "red zone" between user and
284 * kernel space we remap it in configure() to another location and
285 * invalidate the mapping at KERNBASE.
286 */
287
288/*
289 * Each trap has room for four instructions, of which one perforce must
290 * be a branch.  On entry the hardware has copied pc and npc to %l1 and
291 * %l2 respectively.  We use two more to read the psr into %l0, and to
292 * put the trap type value into %l3 (with a few exceptions below).
293 * We could read the trap type field of %tbr later in the code instead,
294 * but there is no need, and that would require more instructions
295 * (read+mask, vs 1 `mov' here).
296 *
297 * I used to generate these numbers by address arithmetic, but gas's
298 * expression evaluator has about as much sense as your average slug
299 * (oddly enough, the code looks about as slimy too).  Thus, all the
300 * trap numbers are given as arguments to the trap macros.  This means
301 * there is one line per trap.  Sigh.
302 *
303 * Note that only the local registers may be used, since the trap
304 * window is potentially the last window.  Its `in' registers are
305 * the previous window's outs (as usual), but more important, its
306 * `out' registers may be in use as the `topmost' window's `in' registers.
307 * The global registers are of course verboten (well, until we save
308 * them away).
309 *
310 * Hardware interrupt vectors can be `linked'---the linkage is to regular
311 * C code---or rewired to fast in-window handlers.  The latter are good
312 * for unbuffered hardware like the Zilog serial chip and the AMD audio
313 * chip, where many interrupts can be handled trivially with pseudo-DMA or
314 * similar.  Only one `fast' interrupt can be used per level, however, and
315 * direct and `fast' interrupts are incompatible.  Routines in intr.c
316 * handle setting these, with optional paranoia.
317 */
318
319	/* regular vectored traps */
320#define	VTRAP(type, label) \
321	mov (type), %l3; b label; mov %psr, %l0; nop
322
323	/* hardware interrupts (can be linked or made `fast') */
324#define	HARDINT44C(lev) \
325	mov (lev), %l3; b _C_LABEL(sparc_interrupt44c); mov %psr, %l0; nop
326
327	/* hardware interrupts (can be linked or made `fast') */
328#define	HARDINT4M(lev) \
329	mov (lev), %l3; b _C_LABEL(sparc_interrupt4m); mov %psr, %l0; nop
330
331	/* software interrupts (may not be made direct, sorry---but you
332	   should not be using them trivially anyway) */
333#define	SOFTINT44C(lev, bit) \
334	mov (lev), %l3; mov (bit), %l4; b softintr_sun44c; mov %psr, %l0
335
336	/* There's no SOFTINT4M(): both hard and soft vector the same way */
337
338	/* traps that just call trap() */
339#define	TRAP(type)	VTRAP(type, slowtrap)
340
341	/* architecturally undefined traps (cause panic) */
342#define	UTRAP(type)	VTRAP(type, slowtrap)
343
344	/* software undefined traps (may be replaced) */
345#define	STRAP(type)	VTRAP(type, slowtrap)
346
347/* breakpoint acts differently under kgdb */
348#ifdef KGDB
349#define	BPT		VTRAP(T_BREAKPOINT, bpt)
350#define	BPT_KGDB_EXEC	VTRAP(T_KGDB_EXEC, bpt)
351#else
352#define	BPT		TRAP(T_BREAKPOINT)
353#define	BPT_KGDB_EXEC	TRAP(T_KGDB_EXEC)
354#endif
355
356/* special high-speed 1-instruction-shaved-off traps (get nothing in %l3) */
357#define	SYSCALL		b _C_LABEL(_syscall); mov %psr, %l0; nop; nop
358#define	WINDOW_OF	b window_of; mov %psr, %l0; nop; nop
359#define	WINDOW_UF	b window_uf; mov %psr, %l0; nop; nop
360#ifdef notyet
361#define	ZS_INTERRUPT	b zshard; mov %psr, %l0; nop; nop
362#else
363#define	ZS_INTERRUPT44C	HARDINT44C(12)
364#define	ZS_INTERRUPT4M	HARDINT4M(12)
365#endif
366
367	.globl	_ASM_LABEL(start), _C_LABEL(kernel_text)
368	_C_LABEL(kernel_text) = start		! for kvm_mkdb(8)
369_ASM_LABEL(start):
370/*
371 * Put sun4 traptable first, since it needs the most stringent aligment (8192)
372 */
373#if defined(SUN4)
374trapbase_sun4:
375	/* trap 0 is special since we cannot receive it */
376	b dostart; nop; nop; nop	! 00 = reset (fake)
377	VTRAP(T_TEXTFAULT, memfault_sun4)	! 01 = instr. fetch fault
378	TRAP(T_ILLINST)			! 02 = illegal instruction
379	TRAP(T_PRIVINST)		! 03 = privileged instruction
380	TRAP(T_FPDISABLED)		! 04 = fp instr, but EF bit off in psr
381	WINDOW_OF			! 05 = window overflow
382	WINDOW_UF			! 06 = window underflow
383	TRAP(T_ALIGN)			! 07 = address alignment error
384	VTRAP(T_FPE, fp_exception)	! 08 = fp exception
385	VTRAP(T_DATAFAULT, memfault_sun4)	! 09 = data fetch fault
386	TRAP(T_TAGOF)			! 0a = tag overflow
387	UTRAP(0x0b)
388	UTRAP(0x0c)
389	UTRAP(0x0d)
390	UTRAP(0x0e)
391	UTRAP(0x0f)
392	UTRAP(0x10)
393	SOFTINT44C(1, IE_L1)		! 11 = level 1 interrupt
394	HARDINT44C(2)			! 12 = level 2 interrupt
395	HARDINT44C(3)			! 13 = level 3 interrupt
396	SOFTINT44C(4, IE_L4)		! 14 = level 4 interrupt
397	HARDINT44C(5)			! 15 = level 5 interrupt
398	SOFTINT44C(6, IE_L6)		! 16 = level 6 interrupt
399	HARDINT44C(7)			! 17 = level 7 interrupt
400	HARDINT44C(8)			! 18 = level 8 interrupt
401	HARDINT44C(9)			! 19 = level 9 interrupt
402	HARDINT44C(10)			! 1a = level 10 interrupt
403	HARDINT44C(11)			! 1b = level 11 interrupt
404	ZS_INTERRUPT44C			! 1c = level 12 (zs) interrupt
405	HARDINT44C(13)			! 1d = level 13 interrupt
406	HARDINT44C(14)			! 1e = level 14 interrupt
407	VTRAP(15, nmi_sun4)		! 1f = nonmaskable interrupt
408	UTRAP(0x20)
409	UTRAP(0x21)
410	UTRAP(0x22)
411	UTRAP(0x23)
412	TRAP(T_CPDISABLED)	! 24 = coprocessor instr, EC bit off in psr
413	UTRAP(0x25)
414	UTRAP(0x26)
415	UTRAP(0x27)
416	TRAP(T_CPEXCEPTION)	! 28 = coprocessor exception
417	UTRAP(0x29)
418	UTRAP(0x2a)
419	UTRAP(0x2b)
420	UTRAP(0x2c)
421	UTRAP(0x2d)
422	UTRAP(0x2e)
423	UTRAP(0x2f)
424	UTRAP(0x30)
425	UTRAP(0x31)
426	UTRAP(0x32)
427	UTRAP(0x33)
428	UTRAP(0x34)
429	UTRAP(0x35)
430	UTRAP(0x36)
431	UTRAP(0x37)
432	UTRAP(0x38)
433	UTRAP(0x39)
434	UTRAP(0x3a)
435	UTRAP(0x3b)
436	UTRAP(0x3c)
437	UTRAP(0x3d)
438	UTRAP(0x3e)
439	UTRAP(0x3f)
440	UTRAP(0x40)
441	UTRAP(0x41)
442	UTRAP(0x42)
443	UTRAP(0x43)
444	UTRAP(0x44)
445	UTRAP(0x45)
446	UTRAP(0x46)
447	UTRAP(0x47)
448	UTRAP(0x48)
449	UTRAP(0x49)
450	UTRAP(0x4a)
451	UTRAP(0x4b)
452	UTRAP(0x4c)
453	UTRAP(0x4d)
454	UTRAP(0x4e)
455	UTRAP(0x4f)
456	UTRAP(0x50)
457	UTRAP(0x51)
458	UTRAP(0x52)
459	UTRAP(0x53)
460	UTRAP(0x54)
461	UTRAP(0x55)
462	UTRAP(0x56)
463	UTRAP(0x57)
464	UTRAP(0x58)
465	UTRAP(0x59)
466	UTRAP(0x5a)
467	UTRAP(0x5b)
468	UTRAP(0x5c)
469	UTRAP(0x5d)
470	UTRAP(0x5e)
471	UTRAP(0x5f)
472	UTRAP(0x60)
473	UTRAP(0x61)
474	UTRAP(0x62)
475	UTRAP(0x63)
476	UTRAP(0x64)
477	UTRAP(0x65)
478	UTRAP(0x66)
479	UTRAP(0x67)
480	UTRAP(0x68)
481	UTRAP(0x69)
482	UTRAP(0x6a)
483	UTRAP(0x6b)
484	UTRAP(0x6c)
485	UTRAP(0x6d)
486	UTRAP(0x6e)
487	UTRAP(0x6f)
488	UTRAP(0x70)
489	UTRAP(0x71)
490	UTRAP(0x72)
491	UTRAP(0x73)
492	UTRAP(0x74)
493	UTRAP(0x75)
494	UTRAP(0x76)
495	UTRAP(0x77)
496	UTRAP(0x78)
497	UTRAP(0x79)
498	UTRAP(0x7a)
499	UTRAP(0x7b)
500	UTRAP(0x7c)
501	UTRAP(0x7d)
502	UTRAP(0x7e)
503	UTRAP(0x7f)
504	SYSCALL			! 80 = sun syscall
505	BPT			! 81 = pseudo breakpoint instruction
506	TRAP(T_DIV0)		! 82 = divide by zero
507	TRAP(T_FLUSHWIN)	! 83 = flush windows
508	TRAP(T_CLEANWIN)	! 84 = provide clean windows
509	TRAP(T_RANGECHECK)	! 85 = ???
510	TRAP(T_FIXALIGN)	! 86 = fix up unaligned accesses
511	TRAP(T_INTOF)		! 87 = integer overflow
512	SYSCALL			! 88 = svr4 syscall
513	SYSCALL			! 89 = bsd syscall
514	BPT_KGDB_EXEC		! 8a = enter kernel gdb on kernel startup
515	STRAP(0x8b)
516	STRAP(0x8c)
517	STRAP(0x8d)
518	STRAP(0x8e)
519	STRAP(0x8f)
520	STRAP(0x90)
521	STRAP(0x91)
522	STRAP(0x92)
523	STRAP(0x93)
524	STRAP(0x94)
525	STRAP(0x95)
526	STRAP(0x96)
527	STRAP(0x97)
528	STRAP(0x98)
529	STRAP(0x99)
530	STRAP(0x9a)
531	STRAP(0x9b)
532	STRAP(0x9c)
533	STRAP(0x9d)
534	STRAP(0x9e)
535	STRAP(0x9f)
536	STRAP(0xa0)
537	STRAP(0xa1)
538	STRAP(0xa2)
539	STRAP(0xa3)
540	STRAP(0xa4)
541	STRAP(0xa5)
542	STRAP(0xa6)
543	STRAP(0xa7)
544	STRAP(0xa8)
545	STRAP(0xa9)
546	STRAP(0xaa)
547	STRAP(0xab)
548	STRAP(0xac)
549	STRAP(0xad)
550	STRAP(0xae)
551	STRAP(0xaf)
552	STRAP(0xb0)
553	STRAP(0xb1)
554	STRAP(0xb2)
555	STRAP(0xb3)
556	STRAP(0xb4)
557	STRAP(0xb5)
558	STRAP(0xb6)
559	STRAP(0xb7)
560	STRAP(0xb8)
561	STRAP(0xb9)
562	STRAP(0xba)
563	STRAP(0xbb)
564	STRAP(0xbc)
565	STRAP(0xbd)
566	STRAP(0xbe)
567	STRAP(0xbf)
568	STRAP(0xc0)
569	STRAP(0xc1)
570	STRAP(0xc2)
571	STRAP(0xc3)
572	STRAP(0xc4)
573	STRAP(0xc5)
574	STRAP(0xc6)
575	STRAP(0xc7)
576	STRAP(0xc8)
577	STRAP(0xc9)
578	STRAP(0xca)
579	STRAP(0xcb)
580	STRAP(0xcc)
581	STRAP(0xcd)
582	STRAP(0xce)
583	STRAP(0xcf)
584	STRAP(0xd0)
585	STRAP(0xd1)
586	STRAP(0xd2)
587	STRAP(0xd3)
588	STRAP(0xd4)
589	STRAP(0xd5)
590	STRAP(0xd6)
591	STRAP(0xd7)
592	STRAP(0xd8)
593	STRAP(0xd9)
594	STRAP(0xda)
595	STRAP(0xdb)
596	STRAP(0xdc)
597	STRAP(0xdd)
598	STRAP(0xde)
599	STRAP(0xdf)
600	STRAP(0xe0)
601	STRAP(0xe1)
602	STRAP(0xe2)
603	STRAP(0xe3)
604	STRAP(0xe4)
605	STRAP(0xe5)
606	STRAP(0xe6)
607	STRAP(0xe7)
608	STRAP(0xe8)
609	STRAP(0xe9)
610	STRAP(0xea)
611	STRAP(0xeb)
612	STRAP(0xec)
613	STRAP(0xed)
614	STRAP(0xee)
615	STRAP(0xef)
616	STRAP(0xf0)
617	STRAP(0xf1)
618	STRAP(0xf2)
619	STRAP(0xf3)
620	STRAP(0xf4)
621	STRAP(0xf5)
622	STRAP(0xf6)
623	STRAP(0xf7)
624	STRAP(0xf8)
625	STRAP(0xf9)
626	STRAP(0xfa)
627	STRAP(0xfb)
628	STRAP(0xfc)
629	STRAP(0xfd)
630	STRAP(0xfe)
631	STRAP(0xff)
632#endif
633
634#if defined(SUN4C)
635trapbase_sun4c:
636/* trap 0 is special since we cannot receive it */
637	b dostart; nop; nop; nop	! 00 = reset (fake)
638	VTRAP(T_TEXTFAULT, memfault_sun4c)	! 01 = instr. fetch fault
639	TRAP(T_ILLINST)			! 02 = illegal instruction
640	TRAP(T_PRIVINST)		! 03 = privileged instruction
641	TRAP(T_FPDISABLED)		! 04 = fp instr, but EF bit off in psr
642	WINDOW_OF			! 05 = window overflow
643	WINDOW_UF			! 06 = window underflow
644	TRAP(T_ALIGN)			! 07 = address alignment error
645	VTRAP(T_FPE, fp_exception)	! 08 = fp exception
646	VTRAP(T_DATAFAULT, memfault_sun4c)	! 09 = data fetch fault
647	TRAP(T_TAGOF)			! 0a = tag overflow
648	UTRAP(0x0b)
649	UTRAP(0x0c)
650	UTRAP(0x0d)
651	UTRAP(0x0e)
652	UTRAP(0x0f)
653	UTRAP(0x10)
654	SOFTINT44C(1, IE_L1)		! 11 = level 1 interrupt
655	HARDINT44C(2)			! 12 = level 2 interrupt
656	HARDINT44C(3)			! 13 = level 3 interrupt
657	SOFTINT44C(4, IE_L4)		! 14 = level 4 interrupt
658	HARDINT44C(5)			! 15 = level 5 interrupt
659	SOFTINT44C(6, IE_L6)		! 16 = level 6 interrupt
660	HARDINT44C(7)			! 17 = level 7 interrupt
661	HARDINT44C(8)			! 18 = level 8 interrupt
662	HARDINT44C(9)			! 19 = level 9 interrupt
663	HARDINT44C(10)			! 1a = level 10 interrupt
664	HARDINT44C(11)			! 1b = level 11 interrupt
665	ZS_INTERRUPT44C			! 1c = level 12 (zs) interrupt
666	HARDINT44C(13)			! 1d = level 13 interrupt
667	HARDINT44C(14)			! 1e = level 14 interrupt
668	VTRAP(15, nmi_sun4c)		! 1f = nonmaskable interrupt
669	UTRAP(0x20)
670	UTRAP(0x21)
671	UTRAP(0x22)
672	UTRAP(0x23)
673	TRAP(T_CPDISABLED)	! 24 = coprocessor instr, EC bit off in psr
674	UTRAP(0x25)
675	UTRAP(0x26)
676	UTRAP(0x27)
677	TRAP(T_CPEXCEPTION)	! 28 = coprocessor exception
678	UTRAP(0x29)
679	UTRAP(0x2a)
680	UTRAP(0x2b)
681	UTRAP(0x2c)
682	UTRAP(0x2d)
683	UTRAP(0x2e)
684	UTRAP(0x2f)
685	UTRAP(0x30)
686	UTRAP(0x31)
687	UTRAP(0x32)
688	UTRAP(0x33)
689	UTRAP(0x34)
690	UTRAP(0x35)
691	UTRAP(0x36)
692	UTRAP(0x37)
693	UTRAP(0x38)
694	UTRAP(0x39)
695	UTRAP(0x3a)
696	UTRAP(0x3b)
697	UTRAP(0x3c)
698	UTRAP(0x3d)
699	UTRAP(0x3e)
700	UTRAP(0x3f)
701	UTRAP(0x40)
702	UTRAP(0x41)
703	UTRAP(0x42)
704	UTRAP(0x43)
705	UTRAP(0x44)
706	UTRAP(0x45)
707	UTRAP(0x46)
708	UTRAP(0x47)
709	UTRAP(0x48)
710	UTRAP(0x49)
711	UTRAP(0x4a)
712	UTRAP(0x4b)
713	UTRAP(0x4c)
714	UTRAP(0x4d)
715	UTRAP(0x4e)
716	UTRAP(0x4f)
717	UTRAP(0x50)
718	UTRAP(0x51)
719	UTRAP(0x52)
720	UTRAP(0x53)
721	UTRAP(0x54)
722	UTRAP(0x55)
723	UTRAP(0x56)
724	UTRAP(0x57)
725	UTRAP(0x58)
726	UTRAP(0x59)
727	UTRAP(0x5a)
728	UTRAP(0x5b)
729	UTRAP(0x5c)
730	UTRAP(0x5d)
731	UTRAP(0x5e)
732	UTRAP(0x5f)
733	UTRAP(0x60)
734	UTRAP(0x61)
735	UTRAP(0x62)
736	UTRAP(0x63)
737	UTRAP(0x64)
738	UTRAP(0x65)
739	UTRAP(0x66)
740	UTRAP(0x67)
741	UTRAP(0x68)
742	UTRAP(0x69)
743	UTRAP(0x6a)
744	UTRAP(0x6b)
745	UTRAP(0x6c)
746	UTRAP(0x6d)
747	UTRAP(0x6e)
748	UTRAP(0x6f)
749	UTRAP(0x70)
750	UTRAP(0x71)
751	UTRAP(0x72)
752	UTRAP(0x73)
753	UTRAP(0x74)
754	UTRAP(0x75)
755	UTRAP(0x76)
756	UTRAP(0x77)
757	UTRAP(0x78)
758	UTRAP(0x79)
759	UTRAP(0x7a)
760	UTRAP(0x7b)
761	UTRAP(0x7c)
762	UTRAP(0x7d)
763	UTRAP(0x7e)
764	UTRAP(0x7f)
765	SYSCALL			! 80 = sun syscall
766	BPT			! 81 = pseudo breakpoint instruction
767	TRAP(T_DIV0)		! 82 = divide by zero
768	TRAP(T_FLUSHWIN)	! 83 = flush windows
769	TRAP(T_CLEANWIN)	! 84 = provide clean windows
770	TRAP(T_RANGECHECK)	! 85 = ???
771	TRAP(T_FIXALIGN)	! 86 = fix up unaligned accesses
772	TRAP(T_INTOF)		! 87 = integer overflow
773	SYSCALL			! 88 = svr4 syscall
774	SYSCALL			! 89 = bsd syscall
775	BPT_KGDB_EXEC		! 8a = enter kernel gdb on kernel startup
776	STRAP(0x8b)
777	STRAP(0x8c)
778	STRAP(0x8d)
779	STRAP(0x8e)
780	STRAP(0x8f)
781	STRAP(0x90)
782	STRAP(0x91)
783	STRAP(0x92)
784	STRAP(0x93)
785	STRAP(0x94)
786	STRAP(0x95)
787	STRAP(0x96)
788	STRAP(0x97)
789	STRAP(0x98)
790	STRAP(0x99)
791	STRAP(0x9a)
792	STRAP(0x9b)
793	STRAP(0x9c)
794	STRAP(0x9d)
795	STRAP(0x9e)
796	STRAP(0x9f)
797	STRAP(0xa0)
798	STRAP(0xa1)
799	STRAP(0xa2)
800	STRAP(0xa3)
801	STRAP(0xa4)
802	STRAP(0xa5)
803	STRAP(0xa6)
804	STRAP(0xa7)
805	STRAP(0xa8)
806	STRAP(0xa9)
807	STRAP(0xaa)
808	STRAP(0xab)
809	STRAP(0xac)
810	STRAP(0xad)
811	STRAP(0xae)
812	STRAP(0xaf)
813	STRAP(0xb0)
814	STRAP(0xb1)
815	STRAP(0xb2)
816	STRAP(0xb3)
817	STRAP(0xb4)
818	STRAP(0xb5)
819	STRAP(0xb6)
820	STRAP(0xb7)
821	STRAP(0xb8)
822	STRAP(0xb9)
823	STRAP(0xba)
824	STRAP(0xbb)
825	STRAP(0xbc)
826	STRAP(0xbd)
827	STRAP(0xbe)
828	STRAP(0xbf)
829	STRAP(0xc0)
830	STRAP(0xc1)
831	STRAP(0xc2)
832	STRAP(0xc3)
833	STRAP(0xc4)
834	STRAP(0xc5)
835	STRAP(0xc6)
836	STRAP(0xc7)
837	STRAP(0xc8)
838	STRAP(0xc9)
839	STRAP(0xca)
840	STRAP(0xcb)
841	STRAP(0xcc)
842	STRAP(0xcd)
843	STRAP(0xce)
844	STRAP(0xcf)
845	STRAP(0xd0)
846	STRAP(0xd1)
847	STRAP(0xd2)
848	STRAP(0xd3)
849	STRAP(0xd4)
850	STRAP(0xd5)
851	STRAP(0xd6)
852	STRAP(0xd7)
853	STRAP(0xd8)
854	STRAP(0xd9)
855	STRAP(0xda)
856	STRAP(0xdb)
857	STRAP(0xdc)
858	STRAP(0xdd)
859	STRAP(0xde)
860	STRAP(0xdf)
861	STRAP(0xe0)
862	STRAP(0xe1)
863	STRAP(0xe2)
864	STRAP(0xe3)
865	STRAP(0xe4)
866	STRAP(0xe5)
867	STRAP(0xe6)
868	STRAP(0xe7)
869	STRAP(0xe8)
870	STRAP(0xe9)
871	STRAP(0xea)
872	STRAP(0xeb)
873	STRAP(0xec)
874	STRAP(0xed)
875	STRAP(0xee)
876	STRAP(0xef)
877	STRAP(0xf0)
878	STRAP(0xf1)
879	STRAP(0xf2)
880	STRAP(0xf3)
881	STRAP(0xf4)
882	STRAP(0xf5)
883	STRAP(0xf6)
884	STRAP(0xf7)
885	STRAP(0xf8)
886	STRAP(0xf9)
887	STRAP(0xfa)
888	STRAP(0xfb)
889	STRAP(0xfc)
890	STRAP(0xfd)
891	STRAP(0xfe)
892	STRAP(0xff)
893#endif
894
895#if defined(SUN4M)
896trapbase_sun4m:
897/* trap 0 is special since we cannot receive it */
898	b dostart; nop; nop; nop	! 00 = reset (fake)
899	VTRAP(T_TEXTFAULT, memfault_sun4m)	! 01 = instr. fetch fault
900	TRAP(T_ILLINST)			! 02 = illegal instruction
901	TRAP(T_PRIVINST)		! 03 = privileged instruction
902	TRAP(T_FPDISABLED)		! 04 = fp instr, but EF bit off in psr
903	WINDOW_OF			! 05 = window overflow
904	WINDOW_UF			! 06 = window underflow
905	TRAP(T_ALIGN)			! 07 = address alignment error
906	VTRAP(T_FPE, fp_exception)	! 08 = fp exception
907	VTRAP(T_DATAFAULT, memfault_sun4m)	! 09 = data fetch fault
908	TRAP(T_TAGOF)			! 0a = tag overflow
909	UTRAP(0x0b)
910	UTRAP(0x0c)
911	UTRAP(0x0d)
912	UTRAP(0x0e)
913	UTRAP(0x0f)
914	UTRAP(0x10)
915	HARDINT4M(1)			! 11 = level 1 interrupt
916	HARDINT4M(2)			! 12 = level 2 interrupt
917	HARDINT4M(3)			! 13 = level 3 interrupt
918	HARDINT4M(4)			! 14 = level 4 interrupt
919	HARDINT4M(5)			! 15 = level 5 interrupt
920	HARDINT4M(6)			! 16 = level 6 interrupt
921	HARDINT4M(7)			! 17 = level 7 interrupt
922	HARDINT4M(8)			! 18 = level 8 interrupt
923	HARDINT4M(9)			! 19 = level 9 interrupt
924	HARDINT4M(10)			! 1a = level 10 interrupt
925	HARDINT4M(11)			! 1b = level 11 interrupt
926	ZS_INTERRUPT4M			! 1c = level 12 (zs) interrupt
927	HARDINT4M(13)			! 1d = level 13 interrupt
928	HARDINT4M(14)			! 1e = level 14 interrupt
929	VTRAP(15, nmi_sun4m)		! 1f = nonmaskable interrupt
930	UTRAP(0x20)
931	UTRAP(0x21)
932	UTRAP(0x22)
933	UTRAP(0x23)
934	TRAP(T_CPDISABLED)	! 24 = coprocessor instr, EC bit off in psr
935	UTRAP(0x25)
936	UTRAP(0x26)
937	UTRAP(0x27)
938	TRAP(T_CPEXCEPTION)	! 28 = coprocessor exception
939	UTRAP(0x29)
940	UTRAP(0x2a)
941	VTRAP(T_STOREBUFFAULT, memfault_sun4m) ! 2b = SuperSPARC store buffer fault
942	UTRAP(0x2c)
943	UTRAP(0x2d)
944	UTRAP(0x2e)
945	UTRAP(0x2f)
946	UTRAP(0x30)
947	UTRAP(0x31)
948	UTRAP(0x32)
949	UTRAP(0x33)
950	UTRAP(0x34)
951	UTRAP(0x35)
952	UTRAP(0x36)
953	UTRAP(0x37)
954	UTRAP(0x38)
955	UTRAP(0x39)
956	UTRAP(0x3a)
957	UTRAP(0x3b)
958	UTRAP(0x3c)
959	UTRAP(0x3d)
960	UTRAP(0x3e)
961	UTRAP(0x3f)
962	UTRAP(0x40)
963	UTRAP(0x41)
964	UTRAP(0x42)
965	UTRAP(0x43)
966	UTRAP(0x44)
967	UTRAP(0x45)
968	UTRAP(0x46)
969	UTRAP(0x47)
970	UTRAP(0x48)
971	UTRAP(0x49)
972	UTRAP(0x4a)
973	UTRAP(0x4b)
974	UTRAP(0x4c)
975	UTRAP(0x4d)
976	UTRAP(0x4e)
977	UTRAP(0x4f)
978	UTRAP(0x50)
979	UTRAP(0x51)
980	UTRAP(0x52)
981	UTRAP(0x53)
982	UTRAP(0x54)
983	UTRAP(0x55)
984	UTRAP(0x56)
985	UTRAP(0x57)
986	UTRAP(0x58)
987	UTRAP(0x59)
988	UTRAP(0x5a)
989	UTRAP(0x5b)
990	UTRAP(0x5c)
991	UTRAP(0x5d)
992	UTRAP(0x5e)
993	UTRAP(0x5f)
994	UTRAP(0x60)
995	UTRAP(0x61)
996	UTRAP(0x62)
997	UTRAP(0x63)
998	UTRAP(0x64)
999	UTRAP(0x65)
1000	UTRAP(0x66)
1001	UTRAP(0x67)
1002	UTRAP(0x68)
1003	UTRAP(0x69)
1004	UTRAP(0x6a)
1005	UTRAP(0x6b)
1006	UTRAP(0x6c)
1007	UTRAP(0x6d)
1008	UTRAP(0x6e)
1009	UTRAP(0x6f)
1010	UTRAP(0x70)
1011	UTRAP(0x71)
1012	UTRAP(0x72)
1013	UTRAP(0x73)
1014	UTRAP(0x74)
1015	UTRAP(0x75)
1016	UTRAP(0x76)
1017	UTRAP(0x77)
1018	UTRAP(0x78)
1019	UTRAP(0x79)
1020	UTRAP(0x7a)
1021	UTRAP(0x7b)
1022	UTRAP(0x7c)
1023	UTRAP(0x7d)
1024	UTRAP(0x7e)
1025	UTRAP(0x7f)
1026	SYSCALL			! 80 = sun syscall
1027	BPT			! 81 = pseudo breakpoint instruction
1028	TRAP(T_DIV0)		! 82 = divide by zero
1029	TRAP(T_FLUSHWIN)	! 83 = flush windows
1030	TRAP(T_CLEANWIN)	! 84 = provide clean windows
1031	TRAP(T_RANGECHECK)	! 85 = ???
1032	TRAP(T_FIXALIGN)	! 86 = fix up unaligned accesses
1033	TRAP(T_INTOF)		! 87 = integer overflow
1034	SYSCALL			! 88 = svr4 syscall
1035	SYSCALL			! 89 = bsd syscall
1036	BPT_KGDB_EXEC		! 8a = enter kernel gdb on kernel startup
1037	STRAP(0x8b)
1038	STRAP(0x8c)
1039	STRAP(0x8d)
1040	STRAP(0x8e)
1041	STRAP(0x8f)
1042	STRAP(0x90)
1043	STRAP(0x91)
1044	STRAP(0x92)
1045	STRAP(0x93)
1046	STRAP(0x94)
1047	STRAP(0x95)
1048	STRAP(0x96)
1049	STRAP(0x97)
1050	STRAP(0x98)
1051	STRAP(0x99)
1052	STRAP(0x9a)
1053	STRAP(0x9b)
1054	STRAP(0x9c)
1055	STRAP(0x9d)
1056	STRAP(0x9e)
1057	STRAP(0x9f)
1058	STRAP(0xa0)
1059	STRAP(0xa1)
1060	STRAP(0xa2)
1061	STRAP(0xa3)
1062	STRAP(0xa4)
1063	STRAP(0xa5)
1064	STRAP(0xa6)
1065	STRAP(0xa7)
1066	STRAP(0xa8)
1067	STRAP(0xa9)
1068	STRAP(0xaa)
1069	STRAP(0xab)
1070	STRAP(0xac)
1071	STRAP(0xad)
1072	STRAP(0xae)
1073	STRAP(0xaf)
1074	STRAP(0xb0)
1075	STRAP(0xb1)
1076	STRAP(0xb2)
1077	STRAP(0xb3)
1078	STRAP(0xb4)
1079	STRAP(0xb5)
1080	STRAP(0xb6)
1081	STRAP(0xb7)
1082	STRAP(0xb8)
1083	STRAP(0xb9)
1084	STRAP(0xba)
1085	STRAP(0xbb)
1086	STRAP(0xbc)
1087	STRAP(0xbd)
1088	STRAP(0xbe)
1089	STRAP(0xbf)
1090	STRAP(0xc0)
1091	STRAP(0xc1)
1092	STRAP(0xc2)
1093	STRAP(0xc3)
1094	STRAP(0xc4)
1095	STRAP(0xc5)
1096	STRAP(0xc6)
1097	STRAP(0xc7)
1098	STRAP(0xc8)
1099	STRAP(0xc9)
1100	STRAP(0xca)
1101	STRAP(0xcb)
1102	STRAP(0xcc)
1103	STRAP(0xcd)
1104	STRAP(0xce)
1105	STRAP(0xcf)
1106	STRAP(0xd0)
1107	STRAP(0xd1)
1108	STRAP(0xd2)
1109	STRAP(0xd3)
1110	STRAP(0xd4)
1111	STRAP(0xd5)
1112	STRAP(0xd6)
1113	STRAP(0xd7)
1114	STRAP(0xd8)
1115	STRAP(0xd9)
1116	STRAP(0xda)
1117	STRAP(0xdb)
1118	STRAP(0xdc)
1119	STRAP(0xdd)
1120	STRAP(0xde)
1121	STRAP(0xdf)
1122	STRAP(0xe0)
1123	STRAP(0xe1)
1124	STRAP(0xe2)
1125	STRAP(0xe3)
1126	STRAP(0xe4)
1127	STRAP(0xe5)
1128	STRAP(0xe6)
1129	STRAP(0xe7)
1130	STRAP(0xe8)
1131	STRAP(0xe9)
1132	STRAP(0xea)
1133	STRAP(0xeb)
1134	STRAP(0xec)
1135	STRAP(0xed)
1136	STRAP(0xee)
1137	STRAP(0xef)
1138	STRAP(0xf0)
1139	STRAP(0xf1)
1140	STRAP(0xf2)
1141	STRAP(0xf3)
1142	STRAP(0xf4)
1143	STRAP(0xf5)
1144	STRAP(0xf6)
1145	STRAP(0xf7)
1146	STRAP(0xf8)
1147	STRAP(0xf9)
1148	STRAP(0xfa)
1149	STRAP(0xfb)
1150	STRAP(0xfc)
1151	STRAP(0xfd)
1152	STRAP(0xfe)
1153	STRAP(0xff)
1154#endif
1155
1156/*
1157 * Pad the trap table to max page size.
1158 * Trap table size is 0x100 * 4instr * 4byte/instr = 4096 bytes;
1159 * need to .skip 4096 to pad to page size iff. the number of trap tables
1160 * defined above is odd.
1161 */
1162#if (defined(SUN4) + defined(SUN4C) + defined(SUN4M)) % 2 == 1
1163	.skip	4096
1164#endif
1165
1166#ifdef DEBUG
1167/*
1168 * A hardware red zone is impossible.  We simulate one in software by
1169 * keeping a `red zone' pointer; if %sp becomes less than this, we panic.
1170 * This is expensive and is only enabled when debugging.
1171 */
1172
1173/* `redzone' is located in the per-CPU information structure */
1174_redzone = CPUINFO_VA + CPUINFO_REDZONE
1175	.data
1176#define	REDSTACK 2048		/* size of `panic: stack overflow' region */
1177_redstack:
1178	.skip	REDSTACK
1179	.text
1180Lpanic_red:
1181	.asciz	"stack overflow"
1182	_ALIGN
1183
1184	/* set stack pointer redzone to base+minstack; alters base */
1185#define	SET_SP_REDZONE(base, tmp) \
1186	add	base, REDSIZE, base; \
1187	sethi	%hi(_redzone), tmp; \
1188	st	base, [tmp + %lo(_redzone)]
1189
1190	/* variant with a constant */
1191#define	SET_SP_REDZONE_CONST(const, tmp1, tmp2) \
1192	set	(const) + REDSIZE, tmp1; \
1193	sethi	%hi(_redzone), tmp2; \
1194	st	tmp1, [tmp2 + %lo(_redzone)]
1195
1196	/* variant with a variable & offset */
1197#define	SET_SP_REDZONE_VAR(var, offset, tmp1, tmp2) \
1198	sethi	%hi(var), tmp1; \
1199	ld	[tmp1 + %lo(var)], tmp1; \
1200	sethi	%hi(offset), tmp2; \
1201	add	tmp1, tmp2, tmp1; \
1202	SET_SP_REDZONE(tmp1, tmp2)
1203
1204	/* check stack pointer against redzone (uses two temps) */
1205#define	CHECK_SP_REDZONE(t1, t2) \
1206	sethi	%hi(_redzone), t1; \
1207	ld	[t1 + %lo(_redzone)], t2; \
1208	cmp	%sp, t2;	/* if sp >= t2, not in red zone */ \
1209	bgeu	7f; nop;	/* and can continue normally */ \
1210	/* move to panic stack */ \
1211	st	%g0, [t1 + %lo(_redzone)]; \
1212	set	_redstack + REDSTACK - 96, %sp; \
1213	/* prevent panic() from lowering ipl */ \
1214	sethi	%hi(_C_LABEL(panicstr)), t2; \
1215	set	Lpanic_red, t2; \
1216	st	t2, [t1 + %lo(_C_LABEL(panicstr))]; \
1217	rd	%psr, t1;		/* t1 = splhigh() */ \
1218	or	t1, PSR_PIL, t2; \
1219	wr	t2, 0, %psr; \
1220	wr	t2, PSR_ET, %psr;	/* turn on traps */ \
1221	nop; nop; nop; \
1222	save	%sp, -CCFSZ, %sp;	/* preserve current window */ \
1223	sethi	%hi(Lpanic_red), %o0; \
1224	call	_C_LABEL(panic); or %o0, %lo(Lpanic_red), %o0; \
12257:
1226
1227#else
1228
1229#define	SET_SP_REDZONE(base, tmp)
1230#define	SET_SP_REDZONE_CONST(const, t1, t2)
1231#define	SET_SP_REDZONE_VAR(var, offset, t1, t2)
1232#define	CHECK_SP_REDZONE(t1, t2)
1233#endif /* DEBUG */
1234
1235/*
1236 * The window code must verify user stack addresses before using them.
1237 * A user stack pointer is invalid if:
1238 *	- it is not on an 8 byte boundary;
1239 *	- its pages (a register window, being 64 bytes, can occupy
1240 *	  two pages) are not readable or writable.
1241 * We define three separate macros here for testing user stack addresses.
1242 *
1243 * PTE_OF_ADDR locates a PTE, branching to a `bad address'
1244 *	handler if the stack pointer points into the hole in the
1245 *	address space (i.e., top 3 bits are not either all 1 or all 0);
1246 * CMP_PTE_USER_READ compares the located PTE against `user read' mode;
1247 * CMP_PTE_USER_WRITE compares the located PTE against `user write' mode.
1248 * The compares give `equal' if read or write is OK.
1249 *
1250 * Note that the user stack pointer usually points into high addresses
1251 * (top 3 bits all 1), so that is what we check first.
1252 *
1253 * The code below also assumes that PTE_OF_ADDR is safe in a delay
1254 * slot; it is, at it merely sets its `pte' register to a temporary value.
1255 */
1256#if defined(SUN4) || defined(SUN4C)
1257	/* input: addr, output: pte; aux: bad address label */
1258#define	PTE_OF_ADDR4_4C(addr, pte, bad, page_offset) \
1259	sra	addr, PG_VSHIFT, pte; \
1260	cmp	pte, -1; \
1261	be,a	1f; andn addr, page_offset, pte; \
1262	tst	pte; \
1263	bne	bad; EMPTY; \
1264	andn	addr, page_offset, pte; \
12651:
1266
1267	/* input: pte; output: condition codes */
1268#define	CMP_PTE_USER_READ4_4C(pte) \
1269	lda	[pte] ASI_PTE, pte; \
1270	srl	pte, PG_PROTSHIFT, pte; \
1271	andn	pte, (PG_W >> PG_PROTSHIFT), pte; \
1272	cmp	pte, PG_PROTUREAD
1273
1274	/* input: pte; output: condition codes */
1275#define	CMP_PTE_USER_WRITE4_4C(pte) \
1276	lda	[pte] ASI_PTE, pte; \
1277	srl	pte, PG_PROTSHIFT, pte; \
1278	cmp	pte, PG_PROTUWRITE
1279#endif
1280
1281/*
1282 * The Sun4M does not have the memory hole that the 4C does. Thus all
1283 * we need to do here is clear the page offset from addr.
1284 */
1285#if defined(SUN4M)
1286#define	PTE_OF_ADDR4M(addr, pte, bad, page_offset) \
1287	andn	addr, page_offset, pte
1288
1289/*
1290 * After obtaining the PTE through ASI_SRMMUFP, we read the Sync Fault
1291 * Status register. This is necessary on Hypersparcs which stores and
1292 * locks the fault address and status registers if the translation
1293 * fails (thanks to Chris Torek for finding this quirk).
1294 */
1295/* note: pmap currently does not use the PPROT_R_R and PPROT_RW_RW cases */
1296#define CMP_PTE_USER_READ4M(pte, tmp) \
1297	or	pte, ASI_SRMMUFP_L3, pte; \
1298	lda	[pte] ASI_SRMMUFP, pte; \
1299	set	SRMMU_SFSR, tmp; \
1300	and	pte, (SRMMU_TETYPE | SRMMU_PROT_MASK), pte; \
1301	cmp	pte, (SRMMU_TEPTE | PPROT_RWX_RWX); \
1302	be	8f; \
1303	 lda	[tmp] ASI_SRMMU, %g0; \
1304	cmp	pte, (SRMMU_TEPTE | PPROT_RX_RX); \
13058:
1306
1307
1308/* note: PTE bit 4 set implies no user writes */
1309#define CMP_PTE_USER_WRITE4M(pte, tmp) \
1310	or	pte, ASI_SRMMUFP_L3, pte; \
1311	lda	[pte] ASI_SRMMUFP, pte; \
1312	set	SRMMU_SFSR, tmp; \
1313	lda	[tmp] ASI_SRMMU, %g0; \
1314	and	pte, (SRMMU_TETYPE | 0x14), pte; \
1315	cmp	pte, (SRMMU_TEPTE | PPROT_WRITE)
1316#endif /* 4m */
1317
1318#if defined(SUN4M) && !(defined(SUN4C) || defined(SUN4))
1319
1320#define PTE_OF_ADDR(addr, pte, bad, page_offset, label) \
1321	PTE_OF_ADDR4M(addr, pte, bad, page_offset)
1322#define CMP_PTE_USER_WRITE(pte, tmp, label)	CMP_PTE_USER_WRITE4M(pte,tmp)
1323#define CMP_PTE_USER_READ(pte, tmp, label)	CMP_PTE_USER_READ4M(pte,tmp)
1324
1325#elif (defined(SUN4C) || defined(SUN4)) && !defined(SUN4M)
1326
1327#define PTE_OF_ADDR(addr, pte, bad, page_offset,label) \
1328	PTE_OF_ADDR4_4C(addr, pte, bad, page_offset)
1329#define CMP_PTE_USER_WRITE(pte, tmp, label)	CMP_PTE_USER_WRITE4_4C(pte)
1330#define CMP_PTE_USER_READ(pte, tmp, label)	CMP_PTE_USER_READ4_4C(pte)
1331
1332#else /* both defined, ugh */
1333
1334#define	PTE_OF_ADDR(addr, pte, bad, page_offset, label) \
1335label:	b,a	2f; \
1336	PTE_OF_ADDR4M(addr, pte, bad, page_offset); \
1337	b,a	3f; \
13382: \
1339	PTE_OF_ADDR4_4C(addr, pte, bad, page_offset); \
13403:
1341
1342#define CMP_PTE_USER_READ(pte, tmp, label) \
1343label:	b,a	1f; \
1344	CMP_PTE_USER_READ4M(pte,tmp); \
1345	b,a	2f; \
13461: \
1347	CMP_PTE_USER_READ4_4C(pte); \
13482:
1349
1350#define CMP_PTE_USER_WRITE(pte, tmp, label) \
1351label:	b,a	1f; \
1352	CMP_PTE_USER_WRITE4M(pte,tmp); \
1353	b,a	2f; \
13541: \
1355	CMP_PTE_USER_WRITE4_4C(pte); \
13562:
1357#endif
1358
1359
1360/*
1361 * The calculations in PTE_OF_ADDR and CMP_PTE_USER_* are rather slow:
1362 * in particular, according to Gordon Irlam of the University of Adelaide
1363 * in Australia, these consume at least 18 cycles on an SS1 and 37 on an
1364 * SS2.  Hence, we try to avoid them in the common case.
1365 *
1366 * A chunk of 64 bytes is on a single page if and only if:
1367 *
1368 *	((base + 64 - 1) & ~(NBPG-1)) == (base & ~(NBPG-1))
1369 *
1370 * Equivalently (and faster to test), the low order bits (base & 4095) must
1371 * be small enough so that the sum (base + 63) does not carry out into the
1372 * upper page-address bits, i.e.,
1373 *
1374 *	(base & (NBPG-1)) < (NBPG - 63)
1375 *
1376 * so we allow testing that here.  This macro is also assumed to be safe
1377 * in a delay slot (modulo overwriting its temporary).
1378 */
1379#define	SLT_IF_1PAGE_RW(addr, tmp, page_offset) \
1380	and	addr, page_offset, tmp; \
1381	sub	page_offset, 62, page_offset; \
1382	cmp	tmp, page_offset
1383
1384/*
1385 * Every trap that enables traps must set up stack space.
1386 * If the trap is from user mode, this involves switching to the kernel
1387 * stack for the current process, and we must also set cpcb->pcb_uw
1388 * so that the window overflow handler can tell user windows from kernel
1389 * windows.
1390 *
1391 * The number of user windows is:
1392 *
1393 *	cpcb->pcb_uw = (cpcb->pcb_wim - 1 - CWP) % nwindows
1394 *
1395 * (where pcb_wim = log2(current %wim) and CWP = low 5 bits of %psr).
1396 * We compute this expression by table lookup in uwtab[CWP - pcb_wim],
1397 * which has been set up as:
1398 *
1399 *	for i in [-nwin+1 .. nwin-1]
1400 *		uwtab[i] = (nwin - 1 - i) % nwin;
1401 *
1402 * (If you do not believe this works, try it for yourself.)
1403 *
1404 * We also keep one or two more tables:
1405 *
1406 *	for i in 0..nwin-1
1407 *		wmask[i] = 1 << ((i + 1) % nwindows);
1408 *
1409 * wmask[CWP] tells whether a `rett' would return into the invalid window.
1410 */
1411	.data
1412	.skip	32			! alignment byte & negative indicies
1413uwtab:	.skip	32			! u_char uwtab[-31..31];
1414wmask:	.skip	32			! u_char wmask[0..31];
1415
1416	.text
1417/*
1418 * Things begin to grow uglier....
1419 *
1420 * Each trap handler may (always) be running in the trap window.
1421 * If this is the case, it cannot enable further traps until it writes
1422 * the register windows into the stack (or, if the stack is no good,
1423 * the current pcb).
1424 *
1425 * ASSUMPTIONS: TRAP_SETUP() is called with:
1426 *	%l0 = %psr
1427 *	%l1 = return pc
1428 *	%l2 = return npc
1429 *	%l3 = (some value that must not be altered)
1430 * which means we have 4 registers to work with.
1431 *
1432 * The `stackspace' argument is the number of stack bytes to allocate
1433 * for register-saving, and must be at least -64 (and typically more,
1434 * for global registers and %y).
1435 *
1436 * Trapframes should use -CCFSZ-80.  (80 = sizeof(struct trapframe);
1437 * see trap.h.  This basically means EVERYONE.  Interrupt frames could
1438 * get away with less, but currently do not.)
1439 *
1440 * The basic outline here is:
1441 *
1442 *	if (trap came from kernel mode) {
1443 *		if (we are in the trap window)
1444 *			save it away;
1445 *		%sp = %fp - stackspace;
1446 *	} else {
1447 *		compute the number of user windows;
1448 *		if (we are in the trap window)
1449 *			save it away;
1450 *		%sp = (top of kernel stack) - stackspace;
1451 *	}
1452 *
1453 * Again, the number of user windows is:
1454 *
1455 *	cpcb->pcb_uw = (cpcb->pcb_wim - 1 - CWP) % nwindows
1456 *
1457 * (where pcb_wim = log2(current %wim) and CWP is the low 5 bits of %psr),
1458 * and this is computed as `uwtab[CWP - pcb_wim]'.
1459 *
1460 * NOTE: if you change this code, you will have to look carefully
1461 * at the window overflow and underflow handlers and make sure they
1462 * have similar changes made as needed.
1463 */
1464#define	CALL_CLEAN_TRAP_WINDOW \
1465	sethi	%hi(clean_trap_window), %l7; \
1466	jmpl	%l7 + %lo(clean_trap_window), %l4; \
1467	 mov	%g7, %l7	/* save %g7 in %l7 for clean_trap_window */
1468
1469#define	TRAP_SETUP(stackspace) \
1470	rd	%wim, %l4; \
1471	mov	1, %l5; \
1472	sll	%l5, %l0, %l5; \
1473	btst	PSR_PS, %l0; \
1474	bz	1f; \
1475	 btst	%l5, %l4; \
1476	/* came from kernel mode; cond codes indicate trap window */ \
1477	bz,a	3f; \
1478	 add	%fp, stackspace, %sp;	/* want to just set %sp */ \
1479	CALL_CLEAN_TRAP_WINDOW;		/* but maybe need to clean first */ \
1480	b	3f; \
1481	 add	%fp, stackspace, %sp; \
14821: \
1483	/* came from user mode: compute pcb_nw */ \
1484	sethi	%hi(cpcb), %l6; \
1485	ld	[%l6 + %lo(cpcb)], %l6; \
1486	ld	[%l6 + PCB_WIM], %l5; \
1487	and	%l0, 31, %l4; \
1488	sub	%l4, %l5, %l5; \
1489	set	uwtab, %l4; \
1490	ldub	[%l4 + %l5], %l5; \
1491	st	%l5, [%l6 + PCB_UW]; \
1492	/* cond codes still indicate whether in trap window */ \
1493	bz,a	2f; \
1494	 sethi	%hi(USPACE+(stackspace)), %l5; \
1495	/* yes, in trap window; must clean it */ \
1496	CALL_CLEAN_TRAP_WINDOW; \
1497	sethi	%hi(cpcb), %l6; \
1498	ld	[%l6 + %lo(cpcb)], %l6; \
1499	sethi	%hi(USPACE+(stackspace)), %l5; \
15002: \
1501	/* trap window is (now) clean: set %sp */ \
1502	or	%l5, %lo(USPACE+(stackspace)), %l5; \
1503	add	%l6, %l5, %sp; \
1504	SET_SP_REDZONE(%l6, %l5); \
15053: \
1506	CHECK_SP_REDZONE(%l6, %l5)
1507
1508/*
1509 * Interrupt setup is almost exactly like trap setup, but we need to
1510 * go to the interrupt stack if (a) we came from user mode or (b) we
1511 * came from kernel mode on the kernel stack.
1512 */
1513#if defined(MULTIPROCESSOR)
1514/*
1515 * SMP kernels: read `eintstack' from cpuinfo structure. Since the
1516 * location of the interrupt stack is not known in advance, we need
1517 * to check the current %fp against both ends of the stack space.
1518 */
1519#define	INTR_SETUP(stackspace) \
1520	rd	%wim, %l4; \
1521	mov	1, %l5; \
1522	sll	%l5, %l0, %l5; \
1523	btst	PSR_PS, %l0; \
1524	bz	1f; \
1525	 btst	%l5, %l4; \
1526	/* came from kernel mode; cond codes still indicate trap window */ \
1527	bz,a	0f; \
1528	 sethi	%hi(_EINTSTACKP), %l7; \
1529	CALL_CLEAN_TRAP_WINDOW; \
1530	sethi	%hi(_EINTSTACKP), %l7; \
15310:	/* now if not intstack > %fp >= eintstack, we were on the kernel stack */ \
1532	ld	[%l7 + %lo(_EINTSTACKP)], %l7; \
1533	cmp	%fp, %l7; \
1534	bge,a	3f;			/* %fp >= eintstack */ \
1535	 add	%l7, stackspace, %sp;	/* so switch to intstack */ \
1536	sethi	%hi(INT_STACK_SIZE), %l6; \
1537	sub	%l7, %l6, %l6; \
1538	cmp	%fp, %l6; \
1539	blu,a	3f;			/* %fp < intstack */ \
1540	 add	%l7, stackspace, %sp;	/* so switch to intstack */ \
1541	b	4f; \
1542	 add	%fp, stackspace, %sp;	/* else stay on intstack */ \
15431: \
1544	/* came from user mode: compute pcb_nw */ \
1545	sethi	%hi(cpcb), %l6; \
1546	ld	[%l6 + %lo(cpcb)], %l6; \
1547	ld	[%l6 + PCB_WIM], %l5; \
1548	and	%l0, 31, %l4; \
1549	sub	%l4, %l5, %l5; \
1550	set	uwtab, %l4; \
1551	ldub	[%l4 + %l5], %l5; \
1552	st	%l5, [%l6 + PCB_UW]; \
1553	/* cond codes still indicate whether in trap window */ \
1554	bz,a	2f; \
1555	 sethi	%hi(_EINTSTACKP), %l7; \
1556	/* yes, in trap window; must save regs */ \
1557	CALL_CLEAN_TRAP_WINDOW; \
1558	sethi	%hi(_EINTSTACKP), %l7; \
15592: \
1560	ld	[%l7 + %lo(_EINTSTACKP)], %l7; \
1561	add	%l7, stackspace, %sp; \
15623: \
1563	SET_SP_REDZONE_VAR(_EINTSTACKP, -INT_STACK_SIZE, %l6, %l5); \
15644: \
1565	CHECK_SP_REDZONE(%l6, %l5)
1566
1567#else /* MULTIPROCESSOR */
1568
1569#define	INTR_SETUP(stackspace) \
1570	rd	%wim, %l4; \
1571	mov	1, %l5; \
1572	sll	%l5, %l0, %l5; \
1573	btst	PSR_PS, %l0; \
1574	bz	1f; \
1575	 btst	%l5, %l4; \
1576	/* came from kernel mode; cond codes still indicate trap window */ \
1577	bz,a	0f; \
1578	 sethi	%hi(_C_LABEL(eintstack)), %l7; \
1579	CALL_CLEAN_TRAP_WINDOW; \
1580	sethi	%hi(_C_LABEL(eintstack)), %l7; \
15810:	/* now if %fp >= eintstack, we were on the kernel stack */ \
1582	cmp	%fp, %l7; \
1583	bge,a	3f; \
1584	 add	%l7, stackspace, %sp;	/* so switch to intstack */ \
1585	b	4f; \
1586	 add	%fp, stackspace, %sp;	/* else stay on intstack */ \
15871: \
1588	/* came from user mode: compute pcb_nw */ \
1589	sethi	%hi(cpcb), %l6; \
1590	ld	[%l6 + %lo(cpcb)], %l6; \
1591	ld	[%l6 + PCB_WIM], %l5; \
1592	and	%l0, 31, %l4; \
1593	sub	%l4, %l5, %l5; \
1594	set	uwtab, %l4; \
1595	ldub	[%l4 + %l5], %l5; \
1596	st	%l5, [%l6 + PCB_UW]; \
1597	/* cond codes still indicate whether in trap window */ \
1598	bz,a	2f; \
1599	 sethi	%hi(_C_LABEL(eintstack)), %l7; \
1600	/* yes, in trap window; must save regs */ \
1601	CALL_CLEAN_TRAP_WINDOW; \
1602	sethi	%hi(_C_LABEL(eintstack)), %l7; \
16032: \
1604	add	%l7, stackspace, %sp; \
16053: \
1606	SET_SP_REDZONE_CONST(_C_LABEL(intstack), %l6, %l5); \
16074: \
1608	CHECK_SP_REDZONE(%l6, %l5)
1609#endif /* MULTIPROCESSOR */
1610
1611/*
1612 * Handler for making the trap window shiny clean.
1613 *
1614 * On entry:
1615 *	cpcb->pcb_nw = number of user windows
1616 *	%l0 = %psr
1617 *	%l1 must not be clobbered
1618 *	%l2 must not be clobbered
1619 *	%l3 must not be clobbered
1620 *	%l4 = address for `return'
1621 *	%l7 = saved %g7 (we put this in a delay slot above, to save work)
1622 *
1623 * On return:
1624 *	%wim has changed, along with cpcb->pcb_wim
1625 *	%g7 has been restored
1626 *
1627 * Normally, we push only one window.
1628 */
1629clean_trap_window:
1630	mov	%g5, %l5		! save %g5
1631	mov	%g6, %l6		! ... and %g6
1632/*	mov	%g7, %l7		! ... and %g7 (already done for us) */
1633	sethi	%hi(cpcb), %g6		! get current pcb
1634	ld	[%g6 + %lo(cpcb)], %g6
1635
1636	/* Figure out whether it is a user window (cpcb->pcb_uw > 0). */
1637	ld	[%g6 + PCB_UW], %g7
1638	deccc	%g7
1639	bge	ctw_user
1640	 save	%g0, %g0, %g0		! in any case, enter window to save
1641
1642	/* The window to be pushed is a kernel window. */
1643	std	%l0, [%sp + (0*8)]
1644ctw_merge:
1645	std	%l2, [%sp + (1*8)]
1646	std	%l4, [%sp + (2*8)]
1647	std	%l6, [%sp + (3*8)]
1648	std	%i0, [%sp + (4*8)]
1649	std	%i2, [%sp + (5*8)]
1650	std	%i4, [%sp + (6*8)]
1651	std	%i6, [%sp + (7*8)]
1652
1653	/* Set up new window invalid mask, and update cpcb->pcb_wim. */
1654	rd	%psr, %g7		! g7 = (junk << 5) + new_cwp
1655	mov	1, %g5			! g5 = 1 << new_cwp;
1656	sll	%g5, %g7, %g5
1657	wr	%g5, 0, %wim		! setwim(g5);
1658	and	%g7, 31, %g7		! cpcb->pcb_wim = g7 & 31;
1659	sethi	%hi(cpcb), %g6		! re-get current pcb
1660	ld	[%g6 + %lo(cpcb)], %g6
1661	st	%g7, [%g6 + PCB_WIM]
1662	nop
1663	restore				! back to trap window
1664
1665	mov	%l5, %g5		! restore g5
1666	mov	%l6, %g6		! ... and g6
1667	jmp	%l4 + 8			! return to caller
1668	 mov	%l7, %g7		! ... and g7
1669	/* NOTREACHED */
1670
1671ctw_user:
1672	/*
1673	 * The window to be pushed is a user window.
1674	 * We must verify the stack pointer (alignment & permissions).
1675	 * See comments above definition of PTE_OF_ADDR.
1676	 */
1677	st	%g7, [%g6 + PCB_UW]	! cpcb->pcb_uw--;
1678	btst	7, %sp			! if not aligned,
1679	bne	ctw_invalid		! choke on it
1680	 EMPTY
1681
1682	sethi	%hi(_C_LABEL(pgofset)), %g6	! trash %g6=curpcb
1683	ld	[%g6 + %lo(_C_LABEL(pgofset))], %g6
1684	PTE_OF_ADDR(%sp, %g7, ctw_invalid, %g6, NOP_ON_4M_1)
1685	CMP_PTE_USER_WRITE(%g7, %g5, NOP_ON_4M_2) ! likewise if not writable
1686	bne	ctw_invalid
1687	 EMPTY
1688	/* Note side-effect of SLT_IF_1PAGE_RW: decrements %g6 by 62 */
1689	SLT_IF_1PAGE_RW(%sp, %g7, %g6)
1690	bl,a	ctw_merge		! all ok if only 1
1691	 std	%l0, [%sp]
1692	add	%sp, 7*8, %g5		! check last addr too
1693	add	%g6, 62, %g6		/* restore %g6 to `pgofset' */
1694	PTE_OF_ADDR(%g5, %g7, ctw_invalid, %g6, NOP_ON_4M_3)
1695	CMP_PTE_USER_WRITE(%g7, %g6, NOP_ON_4M_4)
1696	be,a	ctw_merge		! all ok: store <l0,l1> and merge
1697	 std	%l0, [%sp]
1698
1699	/*
1700	 * The window we wanted to push could not be pushed.
1701	 * Instead, save ALL user windows into the pcb.
1702	 * We will notice later that we did this, when we
1703	 * get ready to return from our trap or syscall.
1704	 *
1705	 * The code here is run rarely and need not be optimal.
1706	 */
1707ctw_invalid:
1708	/*
1709	 * Reread cpcb->pcb_uw.  We decremented this earlier,
1710	 * so it is off by one.
1711	 */
1712	sethi	%hi(cpcb), %g6		! re-get current pcb
1713	ld	[%g6 + %lo(cpcb)], %g6
1714
1715	ld	[%g6 + PCB_UW], %g7	! (number of user windows) - 1
1716	add	%g6, PCB_RW, %g5
1717
1718	/* save g7+1 windows, starting with the current one */
17191:					! do {
1720	std	%l0, [%g5 + (0*8)]	!	rw->rw_local[0] = l0;
1721	std	%l2, [%g5 + (1*8)]	!	...
1722	std	%l4, [%g5 + (2*8)]
1723	std	%l6, [%g5 + (3*8)]
1724	std	%i0, [%g5 + (4*8)]
1725	std	%i2, [%g5 + (5*8)]
1726	std	%i4, [%g5 + (6*8)]
1727	std	%i6, [%g5 + (7*8)]
1728	deccc	%g7			!	if (n > 0) save(), rw++;
1729	bge,a	1b			! } while (--n >= 0);
1730	 save	%g5, 64, %g5
1731
1732	/* stash sp for bottommost window */
1733	st	%sp, [%g5 + 64 + (7*8)]
1734
1735	/* set up new wim */
1736	rd	%psr, %g7		! g7 = (junk << 5) + new_cwp;
1737	mov	1, %g5			! g5 = 1 << new_cwp;
1738	sll	%g5, %g7, %g5
1739	wr	%g5, 0, %wim		! wim = g5;
1740	and	%g7, 31, %g7
1741	st	%g7, [%g6 + PCB_WIM]	! cpcb->pcb_wim = new_cwp;
1742
1743	/* fix up pcb fields */
1744	ld	[%g6 + PCB_UW], %g7	! n = cpcb->pcb_uw;
1745	add	%g7, 1, %g5
1746	st	%g5, [%g6 + PCB_NSAVED]	! cpcb->pcb_nsaved = n + 1;
1747	st	%g0, [%g6 + PCB_UW]	! cpcb->pcb_uw = 0;
1748
1749	/* return to trap window */
17501:	deccc	%g7			! do {
1751	bge	1b			!	restore();
1752	 restore			! } while (--n >= 0);
1753
1754	mov	%l5, %g5		! restore g5, g6, & g7, and return
1755	mov	%l6, %g6
1756	jmp	%l4 + 8
1757	 mov	%l7, %g7
1758	/* NOTREACHED */
1759
1760
1761/*
1762 * Each memory access (text or data) fault, from user or kernel mode,
1763 * comes here.  We read the error register and figure out what has
1764 * happened.
1765 *
1766 * This cannot be done from C code since we must not enable traps (and
1767 * hence may not use the `save' instruction) until we have decided that
1768 * the error is or is not an asynchronous one that showed up after a
1769 * synchronous error, but which must be handled before the sync err.
1770 *
1771 * Most memory faults are user mode text or data faults, which can cause
1772 * signal delivery or ptracing, for which we must build a full trapframe.
1773 * It does not seem worthwhile to work to avoid this in the other cases,
1774 * so we store all the %g registers on the stack immediately.
1775 *
1776 * On entry:
1777 *	%l0 = %psr
1778 *	%l1 = return pc
1779 *	%l2 = return npc
1780 *	%l3 = T_TEXTFAULT or T_DATAFAULT
1781 *
1782 * Internal:
1783 *	%l4 = %y, until we call mem_access_fault (then onto trapframe)
1784 *	%l5 = IE_reg_addr, if async mem error
1785 *
1786 */
1787
1788#if defined(SUN4)
1789memfault_sun4:
1790	TRAP_SETUP(-CCFSZ-80)
1791	INCR(_C_LABEL(uvmexp)+V_FAULTS)	! cnt.v_faults++ (clobbers %o0,%o1)
1792
1793	st	%g1, [%sp + CCFSZ + 20]	! save g1
1794	rd	%y, %l4			! save y
1795
1796	/*
1797	 * registers:
1798	 * memerr.ctrl	= memory error control reg., error if 0x80 set
1799	 * memerr.vaddr	= address of memory error
1800	 * buserr	= basically just like sun4c sync error reg but
1801	 *		  no SER_WRITE bit (have to figure out from code).
1802	 */
1803	set	_C_LABEL(par_err_reg), %o0 ! memerr ctrl addr -- XXX mapped?
1804	ld	[%o0], %o0		! get it
1805	std	%g2, [%sp + CCFSZ + 24]	! save g2, g3
1806	ld	[%o0], %o1		! memerr ctrl register
1807	inc	4, %o0			! now VA of memerr vaddr register
1808	std	%g4, [%sp + CCFSZ + 32]	! (sneak g4,g5 in here)
1809	ld	[%o0], %o2		! memerr virt addr
1810	st	%g0, [%o0]		! NOTE: this clears latching!!!
1811	btst	ME_REG_IERR, %o1	! memory error?
1812					! XXX this value may not be correct
1813					! as I got some parity errors and the
1814					! correct bits were not on?
1815	std	%g6, [%sp + CCFSZ + 40]
1816	bz,a	0f			! no, just a regular fault
1817	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1818
1819	/* memory error = death for now XXX */
1820	clr	%o3
1821	clr	%o4
1822	call	_C_LABEL(memerr4_4c)	! memerr(0, ser, sva, 0, 0)
1823	 clr	%o0
1824	call	_C_LABEL(prom_halt)
1825	 nop
1826
18270:
1828	/*
1829	 * have to make SUN4 emulate SUN4C.   4C code expects
1830	 * SER in %o1 and the offending VA in %o2, everything else is ok.
1831	 * (must figure out if SER_WRITE should be set)
1832	 */
1833	set	AC_BUS_ERR, %o0		! bus error register
1834	cmp	%l3, T_TEXTFAULT	! text fault always on PC
1835	be	normal_mem_fault	! go
1836	 lduba	[%o0] ASI_CONTROL, %o1	! get its value
1837
1838#define STORE_BIT 21 /* bit that indicates a store instruction for sparc */
1839	ld	[%l1], %o3		! offending instruction in %o3 [l1=pc]
1840	srl	%o3, STORE_BIT, %o3	! get load/store bit (wont fit simm13)
1841	btst	1, %o3			! test for store operation
1842
1843	bz	normal_mem_fault	! if (z) is a load (so branch)
1844	 sethi	%hi(SER_WRITE), %o5     ! damn SER_WRITE wont fit simm13
1845!	or	%lo(SER_WRITE), %o5, %o5! not necessary since %lo is zero
1846	or	%o5, %o1, %o1		! set SER_WRITE
1847#if defined(SUN4C) || defined(SUN4M)
1848	ba,a	normal_mem_fault
1849	 !!nop				! XXX make efficient later
1850#endif /* SUN4C || SUN4M */
1851#endif /* SUN4 */
1852
1853memfault_sun4c:
1854#if defined(SUN4C)
1855	TRAP_SETUP(-CCFSZ-80)
1856	INCR(_C_LABEL(uvmexp)+V_FAULTS)	! cnt.v_faults++ (clobbers %o0,%o1)
1857
1858	st	%g1, [%sp + CCFSZ + 20]	! save g1
1859	rd	%y, %l4			! save y
1860
1861	/*
1862	 * We know about the layout of the error registers here.
1863	 *	addr	reg
1864	 *	----	---
1865	 *	a	AC_SYNC_ERR
1866	 *	a+4	AC_SYNC_VA
1867	 *	a+8	AC_ASYNC_ERR
1868	 *	a+12	AC_ASYNC_VA
1869	 */
1870
1871#if AC_SYNC_ERR + 4 != AC_SYNC_VA || \
1872    AC_SYNC_ERR + 8 != AC_ASYNC_ERR || AC_SYNC_ERR + 12 != AC_ASYNC_VA
1873	help help help		! I, I, I wanna be a lifeguard
1874#endif
1875	set	AC_SYNC_ERR, %o0
1876	std	%g2, [%sp + CCFSZ + 24]	! save g2, g3
1877	lda	[%o0] ASI_CONTROL, %o1	! sync err reg
1878	inc	4, %o0
1879	std	%g4, [%sp + CCFSZ + 32]	! (sneak g4,g5 in here)
1880	lda	[%o0] ASI_CONTROL, %o2	! sync virt addr
1881	btst	SER_MEMERR, %o1		! memory error?
1882	std	%g6, [%sp + CCFSZ + 40]
1883	bz,a	normal_mem_fault	! no, just a regular fault
1884 	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1885
1886	/*
1887	 * We got a synchronous memory error.  It could be one that
1888	 * happened because there were two stores in a row, and the
1889	 * first went into the write buffer, and the second caused this
1890	 * synchronous trap; so there could now be a pending async error.
1891	 * This is in fact the case iff the two va's differ.
1892	 */
1893	inc	4, %o0
1894	lda	[%o0] ASI_CONTROL, %o3	! async err reg
1895	inc	4, %o0
1896	lda	[%o0] ASI_CONTROL, %o4	! async virt addr
1897	cmp	%o2, %o4
1898	be,a	1f			! no, not an async err
1899	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1900
1901	/*
1902	 * Handle the async error; ignore the sync error for now
1903	 * (we may end up getting it again, but so what?).
1904	 * This code is essentially the same as that at `nmi' below,
1905	 * but the register usage is different and we cannot merge.
1906	 */
1907	sethi	%hi(INTRREG_VA), %l5	! ienab_bic(IE_ALLIE);
1908	ldub	[%l5 + %lo(INTRREG_VA)], %o0
1909	andn	%o0, IE_ALLIE, %o0
1910	stb	%o0, [%l5 + %lo(INTRREG_VA)]
1911
1912	/*
1913	 * Now reenable traps and call C code.
1914	 * %o1 through %o4 still hold the error reg contents.
1915	 * If memerr() returns, return from the trap.
1916	 */
1917	wr	%l0, PSR_ET, %psr
1918	call	_C_LABEL(memerr4_4c)	! memerr(0, ser, sva, aer, ava)
1919	 clr	%o0
1920
1921	ld	[%sp + CCFSZ + 20], %g1	! restore g1 through g7
1922	wr	%l0, 0, %psr		! and disable traps, 3 instr delay
1923	ldd	[%sp + CCFSZ + 24], %g2
1924	ldd	[%sp + CCFSZ + 32], %g4
1925	ldd	[%sp + CCFSZ + 40], %g6
1926	/* now safe to set IE_ALLIE again */
1927	ldub	[%l5 + %lo(INTRREG_VA)], %o1
1928	or	%o1, IE_ALLIE, %o1
1929	stb	%o1, [%l5 + %lo(INTRREG_VA)]
1930	b	return_from_trap
1931	 wr	%l4, 0, %y		! restore y
1932
1933	/*
1934	 * Trap was a synchronous memory error.
1935	 * %o1 through %o4 still hold the error reg contents.
1936	 */
19371:
1938	call	_C_LABEL(memerr4_4c)	! memerr(1, ser, sva, aer, ava)
1939	 mov	1, %o0
1940
1941	ld	[%sp + CCFSZ + 20], %g1	! restore g1 through g7
1942	ldd	[%sp + CCFSZ + 24], %g2
1943	ldd	[%sp + CCFSZ + 32], %g4
1944	ldd	[%sp + CCFSZ + 40], %g6
1945	wr	%l4, 0, %y		! restore y
1946	b	return_from_trap
1947	 wr	%l0, 0, %psr
1948	/* NOTREACHED */
1949#endif /* SUN4C */
1950
1951#if defined(SUN4M)
1952memfault_sun4m:
1953	! DANGER: we use the fact that %lo(CPUINFO_VA) is zero
1954.if CPUINFO_VA & 0x1fff
1955BARF
1956.endif
1957	sethi	%hi(CPUINFO_VA), %l4
1958	ld	[%l4 + %lo(CPUINFO_VA+CPUINFO_GETSYNCFLT)], %l5
1959	jmpl	%l5, %l7
1960	 or	%l4, %lo(CPUINFO_SYNCFLTDUMP), %l4
1961	TRAP_SETUP(-CCFSZ-80)
1962	INCR(_C_LABEL(uvmexp)+V_FAULTS)	! cnt.v_faults++ (clobbers %o0,%o1)
1963
1964	st	%g1, [%sp + CCFSZ + 20]	! save g1
1965	rd	%y, %l4			! save y
1966
1967	std	%g2, [%sp + CCFSZ + 24]	! save g2, g3
1968	std	%g4, [%sp + CCFSZ + 32]	! save g4, g5
1969	std	%g6, [%sp + CCFSZ + 40]	! sneak in g6, g7
1970
1971	! retrieve sync fault status/address
1972	sethi	%hi(CPUINFO_VA+CPUINFO_SYNCFLTDUMP), %o0
1973	ld	[%o0 + %lo(CPUINFO_VA+CPUINFO_SYNCFLTDUMP)], %o1
1974	ld	[%o0 + %lo(CPUINFO_VA+CPUINFO_SYNCFLTDUMP+4)], %o2
1975
1976	wr	%l0, PSR_ET, %psr	! reenable traps
1977
1978	/* Finish stackframe, call C trap handler */
1979	std	%l0, [%sp + CCFSZ + 0]	! set tf.tf_psr, tf.tf_pc
1980	mov	%l3, %o0		! (argument: type)
1981	st	%l2, [%sp + CCFSZ + 8]	! set tf.tf_npc
1982	st	%l4, [%sp + CCFSZ + 12]	! set tf.tf_y
1983	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
1984	std	%i2, [%sp + CCFSZ + 56]
1985	std	%i4, [%sp + CCFSZ + 64]
1986	std	%i6, [%sp + CCFSZ + 72]
1987					! mem_access_fault(type,sfsr,sfva,&tf);
1988	call	_C_LABEL(mem_access_fault4m)
1989	 add	%sp, CCFSZ, %o3		! (argument: &tf)
1990
1991	ldd	[%sp + CCFSZ + 0], %l0	! load new values
1992	ldd	[%sp + CCFSZ + 8], %l2
1993	wr	%l3, 0, %y
1994	ld	[%sp + CCFSZ + 20], %g1
1995	ldd	[%sp + CCFSZ + 24], %g2
1996	ldd	[%sp + CCFSZ + 32], %g4
1997	ldd	[%sp + CCFSZ + 40], %g6
1998	ldd	[%sp + CCFSZ + 48], %i0
1999	ldd	[%sp + CCFSZ + 56], %i2
2000	ldd	[%sp + CCFSZ + 64], %i4
2001	ldd	[%sp + CCFSZ + 72], %i6
2002
2003	b	return_from_trap	! go return
2004	 wr	%l0, 0, %psr		! (but first disable traps again)
2005#endif /* SUN4M */
2006
2007normal_mem_fault:
2008	/*
2009	 * Trap was some other error; call C code to deal with it.
2010	 * Must finish trap frame (psr,pc,npc,%y,%o0..%o7) in case
2011	 * we decide to deliver a signal or ptrace the process.
2012	 * %g1..%g7 were already set up above.
2013	 */
2014	std	%l0, [%sp + CCFSZ + 0]	! set tf.tf_psr, tf.tf_pc
2015	mov	%l3, %o0		! (argument: type)
2016	st	%l2, [%sp + CCFSZ + 8]	! set tf.tf_npc
2017	st	%l4, [%sp + CCFSZ + 12]	! set tf.tf_y
2018	mov	%l1, %o3		! (argument: pc)
2019	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
2020	std	%i2, [%sp + CCFSZ + 56]
2021	mov	%l0, %o4		! (argument: psr)
2022	std	%i4, [%sp + CCFSZ + 64]
2023	std	%i6, [%sp + CCFSZ + 72]
2024	call	_C_LABEL(mem_access_fault)! mem_access_fault(type, ser, sva,
2025					!		pc, psr, &tf);
2026	 add	%sp, CCFSZ, %o5		! (argument: &tf)
2027
2028	ldd	[%sp + CCFSZ + 0], %l0	! load new values
2029	ldd	[%sp + CCFSZ + 8], %l2
2030	wr	%l3, 0, %y
2031	ld	[%sp + CCFSZ + 20], %g1
2032	ldd	[%sp + CCFSZ + 24], %g2
2033	ldd	[%sp + CCFSZ + 32], %g4
2034	ldd	[%sp + CCFSZ + 40], %g6
2035	ldd	[%sp + CCFSZ + 48], %i0
2036	ldd	[%sp + CCFSZ + 56], %i2
2037	ldd	[%sp + CCFSZ + 64], %i4
2038	ldd	[%sp + CCFSZ + 72], %i6
2039
2040	b	return_from_trap	! go return
2041	 wr	%l0, 0, %psr		! (but first disable traps again)
2042
2043
2044/*
2045 * fp_exception has to check to see if we are trying to save
2046 * the FP state, and if so, continue to save the FP state.
2047 *
2048 * We do not even bother checking to see if we were in kernel mode,
2049 * since users have no access to the special_fp_store instruction.
2050 *
2051 * This whole idea was stolen from Sprite.
2052 */
2053fp_exception:
2054	set	special_fp_store, %l4	! see if we came from the special one
2055	cmp	%l1, %l4		! pc == special_fp_store?
2056	bne	slowtrap		! no, go handle per usual
2057	 EMPTY
2058	sethi	%hi(savefpcont), %l4	! yes, "return" to the special code
2059	or	%lo(savefpcont), %l4, %l4
2060	jmp	%l4
2061	 rett	%l4 + 4
2062
2063/*
2064 * slowtrap() builds a trap frame and calls trap().
2065 * This is called `slowtrap' because it *is*....
2066 * We have to build a full frame for ptrace(), for instance.
2067 *
2068 * Registers:
2069 *	%l0 = %psr
2070 *	%l1 = return pc
2071 *	%l2 = return npc
2072 *	%l3 = trap code
2073 */
2074slowtrap:
2075	TRAP_SETUP(-CCFSZ-80)
2076	/*
2077	 * Phew, ready to enable traps and call C code.
2078	 */
2079	mov	%l3, %o0		! put type in %o0 for later
2080Lslowtrap_reenter:
2081	wr	%l0, PSR_ET, %psr	! traps on again
2082	std	%l0, [%sp + CCFSZ]	! tf.tf_psr = psr; tf.tf_pc = ret_pc;
2083	rd	%y, %l3
2084	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc = return_npc; tf.tf_y = %y;
2085	st	%g1, [%sp + CCFSZ + 20]
2086	std	%g2, [%sp + CCFSZ + 24]
2087	std	%g4, [%sp + CCFSZ + 32]
2088	std	%g6, [%sp + CCFSZ + 40]
2089	std	%i0, [%sp + CCFSZ + 48]
2090	mov	%l0, %o1		! (psr)
2091	std	%i2, [%sp + CCFSZ + 56]
2092	mov	%l1, %o2		! (pc)
2093	std	%i4, [%sp + CCFSZ + 64]
2094	add	%sp, CCFSZ, %o3		! (&tf)
2095	call	_C_LABEL(trap)		! trap(type, psr, pc, &tf)
2096	 std	%i6, [%sp + CCFSZ + 72]
2097
2098	ldd	[%sp + CCFSZ], %l0	! load new values
2099	ldd	[%sp + CCFSZ + 8], %l2
2100	wr	%l3, 0, %y
2101	ld	[%sp + CCFSZ + 20], %g1
2102	ldd	[%sp + CCFSZ + 24], %g2
2103	ldd	[%sp + CCFSZ + 32], %g4
2104	ldd	[%sp + CCFSZ + 40], %g6
2105	ldd	[%sp + CCFSZ + 48], %i0
2106	ldd	[%sp + CCFSZ + 56], %i2
2107	ldd	[%sp + CCFSZ + 64], %i4
2108	ldd	[%sp + CCFSZ + 72], %i6
2109	b	return_from_trap
2110	 wr	%l0, 0, %psr
2111
2112/*
2113 * Do a `software' trap by re-entering the trap code, possibly first
2114 * switching from interrupt stack to kernel stack.  This is used for
2115 * scheduling and signal ASTs (which generally occur from softclock or
2116 * tty or net interrupts) and register window saves (which might occur
2117 * from anywhere).
2118 *
2119 * The current window is the trap window, and it is by definition clean.
2120 * We enter with the trap type in %o0.  All we have to do is jump to
2121 * Lslowtrap_reenter above, but maybe after switching stacks....
2122 */
2123softtrap:
2124#if defined(MULTIPROCESSOR)
2125	/*
2126	 * The interrupt stack is not at a fixed location
2127	 * and %sp must be checked against both ends.
2128	 */
2129	sethi	%hi(_EINTSTACKP), %l7
2130	ld	[%l7 + %lo(_EINTSTACKP)], %l7
2131	cmp	%sp, %l7
2132	bge	Lslowtrap_reenter
2133	 EMPTY
2134	set	INT_STACK_SIZE, %l6
2135	sub	%l7, %l6, %l7
2136	cmp	%sp, %l7
2137	blu	Lslowtrap_reenter
2138	 EMPTY
2139#else
2140	sethi	%hi(_C_LABEL(eintstack)), %l7
2141	cmp	%sp, %l7
2142	bge	Lslowtrap_reenter
2143	 EMPTY
2144#endif
2145	sethi	%hi(cpcb), %l6
2146	ld	[%l6 + %lo(cpcb)], %l6
2147	set	USPACE-CCFSZ-80, %l5
2148	add	%l6, %l5, %l7
2149	SET_SP_REDZONE(%l6, %l5)
2150	b	Lslowtrap_reenter
2151	 mov	%l7, %sp
2152
2153#ifdef KGDB
2154/*
2155 * bpt is entered on all breakpoint traps.
2156 * If this is a kernel breakpoint, we do not want to call trap().
2157 * Among other reasons, this way we can set breakpoints in trap().
2158 */
2159bpt:
2160	btst	PSR_PS, %l0		! breakpoint from kernel?
2161	bz	slowtrap		! no, go do regular trap
2162	 nop
2163
2164/* XXXSMP */
2165	/*
2166	 * Build a trap frame for kgdb_trap_glue to copy.
2167	 * Enable traps but set ipl high so that we will not
2168	 * see interrupts from within breakpoints.
2169	 */
2170	TRAP_SETUP(-CCFSZ-80)
2171	or	%l0, PSR_PIL, %l4	! splhigh()
2172	wr	%l4, 0, %psr		! the manual claims that this
2173	wr	%l4, PSR_ET, %psr	! song and dance is necessary
2174	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
2175	mov	%l3, %o0		! trap type arg for kgdb_trap_glue
2176	rd	%y, %l3
2177	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
2178	rd	%wim, %l3
2179	st	%l3, [%sp + CCFSZ + 16]	! tf.tf_wim (a kgdb-only r/o field)
2180	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
2181	std	%g2, [%sp + CCFSZ + 24]	! etc
2182	std	%g4, [%sp + CCFSZ + 32]
2183	std	%g6, [%sp + CCFSZ + 40]
2184	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_in[0..1]
2185	std	%i2, [%sp + CCFSZ + 56]	! etc
2186	std	%i4, [%sp + CCFSZ + 64]
2187	std	%i6, [%sp + CCFSZ + 72]
2188
2189	/*
2190	 * Now call kgdb_trap_glue(); if it returns, call trap().
2191	 */
2192	mov	%o0, %l3		! gotta save trap type
2193	call	_C_LABEL(kgdb_trap_glue)! kgdb_trap_glue(type, &trapframe)
2194	 add	%sp, CCFSZ, %o1		! (&trapframe)
2195
2196	/*
2197	 * Use slowtrap to call trap---but first erase our tracks
2198	 * (put the registers back the way they were).
2199	 */
2200	mov	%l3, %o0		! slowtrap will need trap type
2201	ld	[%sp + CCFSZ + 12], %l3
2202	wr	%l3, 0, %y
2203	ld	[%sp + CCFSZ + 20], %g1
2204	ldd	[%sp + CCFSZ + 24], %g2
2205	ldd	[%sp + CCFSZ + 32], %g4
2206	b	Lslowtrap_reenter
2207	 ldd	[%sp + CCFSZ + 40], %g6
2208
2209/*
2210 * Enter kernel breakpoint.  Write all the windows (not including the
2211 * current window) into the stack, so that backtrace works.  Copy the
2212 * supplied trap frame to the kgdb stack and switch stacks.
2213 *
2214 * kgdb_trap_glue(type, tf0)
2215 *	int type;
2216 *	struct trapframe *tf0;
2217 */
2218_ENTRY(_C_LABEL(kgdb_trap_glue))
2219	save	%sp, -CCFSZ, %sp
2220
2221	call	_C_LABEL(write_all_windows)
2222	 mov	%sp, %l4		! %l4 = current %sp
2223
2224	/* copy trapframe to top of kgdb stack */
2225	set	_C_LABEL(kgdb_stack) + KGDB_STACK_SIZE - 80, %l0
2226					! %l0 = tfcopy -> end_of_kgdb_stack
2227	mov	80, %l1
22281:	ldd	[%i1], %l2
2229	inc	8, %i1
2230	deccc	8, %l1
2231	std	%l2, [%l0]
2232	bg	1b
2233	 inc	8, %l0
2234
2235#ifdef DEBUG
2236	/* save old red zone and then turn it off */
2237	sethi	%hi(_redzone), %l7
2238	ld	[%l7 + %lo(_redzone)], %l6
2239	st	%g0, [%l7 + %lo(_redzone)]
2240#endif
2241	/* switch to kgdb stack */
2242	add	%l0, -CCFSZ-80, %sp
2243
2244	/* if (kgdb_trap(type, tfcopy)) kgdb_rett(tfcopy); */
2245	mov	%i0, %o0
2246	call	_C_LABEL(kgdb_trap)
2247	add	%l0, -80, %o1
2248	tst	%o0
2249	bnz,a	kgdb_rett
2250	 add	%l0, -80, %g1
2251
2252	/*
2253	 * kgdb_trap() did not handle the trap at all so the stack is
2254	 * still intact.  A simple `restore' will put everything back,
2255	 * after we reset the stack pointer.
2256	 */
2257	mov	%l4, %sp
2258#ifdef DEBUG
2259	st	%l6, [%l7 + %lo(_redzone)]	! restore red zone
2260#endif
2261	ret
2262	restore
2263
2264/*
2265 * Return from kgdb trap.  This is sort of special.
2266 *
2267 * We know that kgdb_trap_glue wrote the window above it, so that we will
2268 * be able to (and are sure to have to) load it up.  We also know that we
2269 * came from kernel land and can assume that the %fp (%i6) we load here
2270 * is proper.  We must also be sure not to lower ipl (it is at splhigh())
2271 * until we have traps disabled, due to the SPARC taking traps at the
2272 * new ipl before noticing that PSR_ET has been turned off.  We are on
2273 * the kgdb stack, so this could be disastrous.
2274 *
2275 * Note that the trapframe argument in %g1 points into the current stack
2276 * frame (current window).  We abandon this window when we move %g1->tf_psr
2277 * into %psr, but we will not have loaded the new %sp yet, so again traps
2278 * must be disabled.
2279 */
2280kgdb_rett:
2281	rd	%psr, %g4		! turn off traps
2282	wr	%g4, PSR_ET, %psr
2283	/* use the three-instruction delay to do something useful */
2284	ld	[%g1], %g2		! pick up new %psr
2285	ld	[%g1 + 12], %g3		! set %y
2286	wr	%g3, 0, %y
2287#ifdef DEBUG
2288	st	%l6, [%l7 + %lo(_redzone)] ! and restore red zone
2289#endif
2290	wr	%g0, 0, %wim		! enable window changes
2291	nop; nop; nop
2292	/* now safe to set the new psr (changes CWP, leaves traps disabled) */
2293	wr	%g2, 0, %psr		! set rett psr (including cond codes)
2294	/* 3 instruction delay before we can use the new window */
2295/*1*/	ldd	[%g1 + 24], %g2		! set new %g2, %g3
2296/*2*/	ldd	[%g1 + 32], %g4		! set new %g4, %g5
2297/*3*/	ldd	[%g1 + 40], %g6		! set new %g6, %g7
2298
2299	/* now we can use the new window */
2300	mov	%g1, %l4
2301	ld	[%l4 + 4], %l1		! get new pc
2302	ld	[%l4 + 8], %l2		! get new npc
2303	ld	[%l4 + 20], %g1		! set new %g1
2304
2305	/* set up returnee's out registers, including its %sp */
2306	ldd	[%l4 + 48], %i0
2307	ldd	[%l4 + 56], %i2
2308	ldd	[%l4 + 64], %i4
2309	ldd	[%l4 + 72], %i6
2310
2311	/* load returnee's window, making the window above it be invalid */
2312	restore
2313	restore	%g0, 1, %l1		! move to inval window and set %l1 = 1
2314	rd	%psr, %l0
2315	sll	%l1, %l0, %l1
2316	wr	%l1, 0, %wim		! %wim = 1 << (%psr & 31)
2317	sethi	%hi(cpcb), %l1
2318	ld	[%l1 + %lo(cpcb)], %l1
2319	and	%l0, 31, %l0		! CWP = %psr & 31;
2320	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = CWP;
2321	save	%g0, %g0, %g0		! back to window to reload
2322	LOADWIN(%sp)
2323	save	%g0, %g0, %g0		! back to trap window
2324	/* note, we have not altered condition codes; safe to just rett */
2325	RETT
2326#endif
2327
2328/*
2329 * syscall() builds a trap frame and calls syscall().
2330 * sun_syscall is same but delivers sun system call number
2331 * XXX	should not have to save&reload ALL the registers just for
2332 *	ptrace...
2333 */
2334_C_LABEL(_syscall):
2335	TRAP_SETUP(-CCFSZ-80)
2336	wr	%l0, PSR_ET, %psr
2337	std	%l0, [%sp + CCFSZ + 0]	! tf_psr, tf_pc
2338	rd	%y, %l3
2339	std	%l2, [%sp + CCFSZ + 8]	! tf_npc, tf_y
2340	st	%g1, [%sp + CCFSZ + 20]	! tf_g[1]
2341	std	%g2, [%sp + CCFSZ + 24]	! tf_g[2], tf_g[3]
2342	std	%g4, [%sp + CCFSZ + 32]	! etc
2343	std	%g6, [%sp + CCFSZ + 40]
2344	mov	%g1, %o0		! (code)
2345	std	%i0, [%sp + CCFSZ + 48]
2346	add	%sp, CCFSZ, %o1		! (&tf)
2347	std	%i2, [%sp + CCFSZ + 56]
2348	mov	%l1, %o2		! (pc)
2349	std	%i4, [%sp + CCFSZ + 64]
2350	call	_C_LABEL(syscall)	! syscall(code, &tf, pc, suncompat)
2351	 std	%i6, [%sp + CCFSZ + 72]
2352	! now load em all up again, sigh
2353	ldd	[%sp + CCFSZ + 0], %l0	! new %psr, new pc
2354	ldd	[%sp + CCFSZ + 8], %l2	! new npc, new %y
2355	wr	%l3, 0, %y
2356	/* see `proc_trampoline' for the reason for this label */
2357return_from_syscall:
2358	ld	[%sp + CCFSZ + 20], %g1
2359	ldd	[%sp + CCFSZ + 24], %g2
2360	ldd	[%sp + CCFSZ + 32], %g4
2361	ldd	[%sp + CCFSZ + 40], %g6
2362	ldd	[%sp + CCFSZ + 48], %i0
2363	ldd	[%sp + CCFSZ + 56], %i2
2364	ldd	[%sp + CCFSZ + 64], %i4
2365	ldd	[%sp + CCFSZ + 72], %i6
2366	b	return_from_trap
2367	 wr	%l0, 0, %psr
2368
2369/*
2370 * Interrupts.  Software interrupts must be cleared from the software
2371 * interrupt enable register.  Rather than calling ienab_bic for each,
2372 * we do them in-line before enabling traps.
2373 *
2374 * After preliminary setup work, the interrupt is passed to each
2375 * registered handler in turn.  These are expected to return nonzero if
2376 * they took care of the interrupt.  If a handler claims the interrupt,
2377 * we exit (hardware interrupts are latched in the requestor so we'll
2378 * just take another interrupt in the unlikely event of simultaneous
2379 * interrupts from two different devices at the same level).  If we go
2380 * through all the registered handlers and no one claims it, we report a
2381 * stray interrupt.  This is more or less done as:
2382 *
2383 *	for (ih = intrhand[intlev]; ih; ih = ih->ih_next)
2384 *		if ((*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : &frame))
2385 *			return;
2386 *	strayintr(&frame);
2387 *
2388 * Software interrupts are almost the same with three exceptions:
2389 * (1) we clear the interrupt from the software interrupt enable
2390 *     register before calling any handler (we have to clear it first
2391 *     to avoid an interrupt-losing race),
2392 * (2) we always call all the registered handlers (there is no way
2393 *     to tell if the single bit in the software interrupt register
2394 *     represents one or many requests)
2395 * (3) we never announce a stray interrupt (because of (1), another
2396 *     interrupt request can come in while we're in the handler.  If
2397 *     the handler deals with everything for both the original & the
2398 *     new request, we'll erroneously report a stray interrupt when
2399 *     we take the software interrupt for the new request.
2400 *
2401 * Inputs:
2402 *	%l0 = %psr
2403 *	%l1 = return pc
2404 *	%l2 = return npc
2405 *	%l3 = interrupt level
2406 *	(software interrupt only) %l4 = bits to clear in interrupt register
2407 *
2408 * Internal:
2409 *	%l4, %l5: local variables
2410 *	%l6 = %y
2411 *	%l7 = %g1
2412 *	%g2..%g7 go to stack
2413 *
2414 * An interrupt frame is built in the space for a full trapframe;
2415 * this contains the psr, pc, npc, and interrupt level.
2416 */
2417softintr_sun44c:
2418	sethi	%hi(INTRREG_VA), %l6
2419	ldub	[%l6 + %lo(INTRREG_VA)], %l5
2420	andn	%l5, %l4, %l5
2421	stb	%l5, [%l6 + %lo(INTRREG_VA)]
2422
2423softintr_common:
2424	INTR_SETUP(-CCFSZ-80)
2425	std	%g2, [%sp + CCFSZ + 24]	! save registers
2426	INCR(_C_LABEL(uvmexp)+V_INTR)	! cnt.v_intr++; (clobbers %o0,%o1)
2427	mov	%g1, %l7
2428	rd	%y, %l6
2429	std	%g4, [%sp + CCFSZ + 32]
2430	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
2431	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
2432	std	%g6, [%sp + CCFSZ + 40]
2433	or	%l5, %l4, %l4		!			;
2434	wr	%l4, 0, %psr		! the manual claims this
2435	wr	%l4, PSR_ET, %psr	! song and dance is necessary
2436	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
2437	sll	%l3, 2, %l5
2438	set	_C_LABEL(intrcnt), %l4	! intrcnt[intlev]++;
2439	ld	[%l4 + %l5], %o0
2440	std	%l2, [%sp + CCFSZ + 8]
2441	inc	%o0
2442	st	%o0, [%l4 + %l5]
2443	set	_C_LABEL(intrhand), %l4	! %l4 = intrhand[intlev];
2444	ld	[%l4 + %l5], %l4
2445	b	3f
2446	 st	%fp, [%sp + CCFSZ + 16]
2447
24481:	ld	[%l4 + 12], %o2		! ih->ih_classipl
2449	rd	%psr, %o3		!  (bits already shifted to PIL field)
2450	andn	%o3, PSR_PIL, %o3	! %o3 = psr & ~PSR_PIL
2451	wr	%o3, %o2, %psr		! splraise(ih->ih_classipl)
2452	ld	[%l4], %o1
2453	ld	[%l4 + 4], %o0
2454	nop				! one more isns before touching ICC
2455	tst	%o0
2456	bz,a	2f
2457	 add	%sp, CCFSZ, %o0
24582:	jmpl	%o1, %o7		!	(void)(*ih->ih_fun)(...)
2459	 ld	[%l4 + 8], %l4		!	and ih = ih->ih_next
24603:	tst	%l4			! while ih != NULL
2461	bnz	1b
2462	 nop
2463	mov	%l7, %g1
2464	wr	%l6, 0, %y
2465	ldd	[%sp + CCFSZ + 24], %g2
2466	ldd	[%sp + CCFSZ + 32], %g4
2467	ldd	[%sp + CCFSZ + 40], %g6
2468	b	return_from_trap
2469	 wr	%l0, 0, %psr
2470
2471	/*
2472	 * _sparc_interrupt{44c,4m} is exported for paranoia checking
2473	 * (see intr.c).
2474	 */
2475#if defined(SUN4M)
2476_ENTRY(_C_LABEL(sparc_interrupt4m))
2477#if !defined(MSIIEP)	/* "normal" sun4m */
2478	sethi	%hi(CPUINFO_VA+CPUINFO_INTREG), %l6
2479	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_INTREG)], %l6
2480	mov	1, %l4
2481	ld	[%l6 + ICR_PI_PEND_OFFSET], %l5	! get pending interrupts
2482	sll	%l4, %l3, %l4	! hw intr bits are in the lower halfword
2483
2484	btst	%l4, %l5	! has pending hw intr at this level?
2485	bnz	sparc_interrupt_common
2486	 nop
2487
2488	! both softint pending and clear bits are in upper halfwords of
2489	! their respective registers so shift the test bit in %l4 up there
2490	sll	%l4, 16, %l4
2491#ifdef DIAGNOSTIC
2492	btst	%l4, %l5	! make sure softint pending bit is set
2493	bnz	softintr_common
2494	 st	%l4, [%l6 + ICR_PI_CLR_OFFSET]
2495	/* FALLTHROUGH to sparc_interrupt4m_bogus */
2496#else
2497	b	softintr_common
2498	 st	%l4, [%l6 + ICR_PI_CLR_OFFSET]
2499#endif
2500
2501#else /* MSIIEP */
2502	sethi	%hi(MSIIEP_PCIC_VA), %l6
2503	mov	1, %l4
2504	ld	[%l6 + PCIC_PROC_IPR_REG], %l5 ! get pending interrupts
2505	sll	%l4, %l3, %l4	! hw intr bits are in the lower halfword
2506
2507	btst	%l4, %l5	! has pending hw intr at this level?
2508	bnz	sparc_interrupt_common
2509	 nop
2510
2511#ifdef DIAGNOSTIC
2512	! softint pending bits are in the upper halfword, but softint
2513	! clear bits are in the lower halfword so we want the bit in %l4
2514	! kept in the lower half and instead shift pending bits right
2515	srl	%l5, 16, %l7
2516	btst	%l4, %l7	! make sure softint pending bit is set
2517	bnz	softintr_common
2518	 sth	%l4, [%l6 + PCIC_SOFT_INTR_CLEAR_REG]
2519	/* FALLTHROUGH to sparc_interrupt4m_bogus */
2520#else
2521	b	softintr_common
2522	 sth	%l4, [%l6 + PCIC_SOFT_INTR_CLEAR_REG]
2523#endif
2524
2525#endif /* MSIIEP */
2526
2527#ifdef DIAGNOSTIC
2528	/*
2529	 * sparc_interrupt4m detected that neither hardware nor software
2530	 * interrupt pending bit is set for this interrupt.  Report this
2531	 * situation, this is most probably a symptom of a driver bug.
2532	 */
2533sparc_interrupt4m_bogus:
2534	INTR_SETUP(-CCFSZ-80)
2535	std	%g2, [%sp + CCFSZ + 24]	! save registers
2536	INCR(_C_LABEL(uvmexp)+V_INTR)	! cnt.v_intr++; (clobbers %o0,%o1)
2537	mov	%g1, %l7
2538	rd	%y, %l6
2539	std	%g4, [%sp + CCFSZ + 32]
2540	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
2541	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
2542	std	%g6, [%sp + CCFSZ + 40]
2543	or	%l5, %l4, %l4		!			;
2544	wr	%l4, 0, %psr		! the manual claims this
2545	wr	%l4, PSR_ET, %psr	! song and dance is necessary
2546	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
2547	sll	%l3, 2, %l5
2548	set	_C_LABEL(intrcnt), %l4	! intrcnt[intlev]++;
2549	ld	[%l4 + %l5], %o0
2550	std	%l2, [%sp + CCFSZ + 8]	! set up intrframe/clockframe
2551	inc	%o0
2552	st	%o0, [%l4 + %l5]
2553
2554	st	%fp, [%sp + CCFSZ + 16]
2555
2556	/* Unhandled interrupts while cold cause IPL to be raised to `high' */
2557	sethi	%hi(_C_LABEL(cold)), %o0
2558	ld	[%o0 + %lo(_C_LABEL(cold))], %o0
2559	tst	%o0			! if (cold) {
2560	bnz,a	1f			!	splhigh();
2561	 or	%l0, 0xf00, %l0		! } else
2562
2563	call	_C_LABEL(bogusintr)	!	strayintr(&intrframe)
2564	 add	%sp, CCFSZ, %o0
2565	/* all done: restore registers and go return */
25661:
2567	mov	%l7, %g1
2568	wr	%l6, 0, %y
2569	ldd	[%sp + CCFSZ + 24], %g2
2570	ldd	[%sp + CCFSZ + 32], %g4
2571	ldd	[%sp + CCFSZ + 40], %g6
2572	b	return_from_trap
2573	 wr	%l0, 0, %psr
2574#endif /* DIAGNOSTIC */
2575#endif /* SUN4M */
2576
2577_ENTRY(_C_LABEL(sparc_interrupt44c))
2578sparc_interrupt_common:
2579	INTR_SETUP(-CCFSZ-80)
2580	std	%g2, [%sp + CCFSZ + 24]	! save registers
2581	INCR(_C_LABEL(uvmexp)+V_INTR)	! cnt.v_intr++; (clobbers %o0,%o1)
2582	mov	%g1, %l7
2583	rd	%y, %l6
2584	std	%g4, [%sp + CCFSZ + 32]
2585	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
2586	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
2587	std	%g6, [%sp + CCFSZ + 40]
2588	or	%l5, %l4, %l4		!			;
2589	wr	%l4, 0, %psr		! the manual claims this
2590	wr	%l4, PSR_ET, %psr	! song and dance is necessary
2591	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
2592	sll	%l3, 2, %l5
2593	set	_C_LABEL(intrcnt), %l4	! intrcnt[intlev]++;
2594	ld	[%l4 + %l5], %o0
2595	std	%l2, [%sp + CCFSZ + 8]	! set up intrframe/clockframe
2596	inc	%o0
2597	st	%o0, [%l4 + %l5]
2598	set	_C_LABEL(intrhand), %l4	! %l4 = intrhand[intlev];
2599	ld	[%l4 + %l5], %l4
2600
2601#if defined(MULTIPROCESSOR) && defined(SUN4M) /* XXX */
2602	call	_C_LABEL(intr_lock_kernel)
2603	 nop
2604#endif
2605
2606	b	3f
2607	 st	%fp, [%sp + CCFSZ + 16]
2608
26091:	ld	[%l4 + 12], %o2		! ih->ih_classipl
2610	rd	%psr, %o3		!  (bits already shifted to PIL field)
2611	andn	%o3, PSR_PIL, %o3	! %o3 = psr & ~PSR_PIL
2612	wr	%o3, %o2, %psr		! splraise(ih->ih_classipl)
2613	ld	[%l4], %o1
2614	ld	[%l4 + 4], %o0
2615	nop				! one more isns before touching ICC
2616	tst	%o0
2617	bz,a	2f
2618	 add	%sp, CCFSZ, %o0
26192:	jmpl	%o1, %o7		!	handled = (*ih->ih_fun)(...)
2620	 ld	[%l4 + 8], %l4		!	and ih = ih->ih_next
2621	tst	%o0
2622	bnz	4f			! if (handled) break
2623	 nop
26243:	tst	%l4
2625	bnz	1b			! while (ih)
2626	 nop
2627
2628	/* Unhandled interrupts while cold cause IPL to be raised to `high' */
2629	sethi	%hi(_C_LABEL(cold)), %o0
2630	ld	[%o0 + %lo(_C_LABEL(cold))], %o0
2631	tst	%o0			! if (cold) {
2632	bnz,a	4f			!	splhigh();
2633	 or	%l0, 0xf00, %l0		! } else
2634
2635	call	_C_LABEL(strayintr)	!	strayintr(&intrframe)
2636	 add	%sp, CCFSZ, %o0
2637	/* all done: restore registers and go return */
26384:
2639#if defined(MULTIPROCESSOR) && defined(SUN4M) /* XXX */
2640	call	_C_LABEL(intr_unlock_kernel)
2641	 nop
2642#endif
2643	mov	%l7, %g1
2644	wr	%l6, 0, %y
2645	ldd	[%sp + CCFSZ + 24], %g2
2646	ldd	[%sp + CCFSZ + 32], %g4
2647	ldd	[%sp + CCFSZ + 40], %g6
2648	b	return_from_trap
2649	 wr	%l0, 0, %psr
2650
2651#ifdef notyet
2652/*
2653 * Level 12 (ZS serial) interrupt.  Handle it quickly, schedule a
2654 * software interrupt, and get out.  Do the software interrupt directly
2655 * if we would just take it on the way out.
2656 *
2657 * Input:
2658 *	%l0 = %psr
2659 *	%l1 = return pc
2660 *	%l2 = return npc
2661 * Internal:
2662 *	%l3 = zs device
2663 *	%l4, %l5 = temporary
2664 *	%l6 = rr3 (or temporary data) + 0x100 => need soft int
2665 *	%l7 = zs soft status
2666 */
2667zshard:
2668#endif /* notyet */
2669
2670/*
2671 * Level 15 interrupt.  An async memory error has occurred;
2672 * take care of it (typically by panicking, but hey...).
2673 *	%l0 = %psr
2674 *	%l1 = return pc
2675 *	%l2 = return npc
2676 *	%l3 = 15 * 4 (why? just because!)
2677 *
2678 * Internal:
2679 *	%l4 = %y
2680 *	%l5 = %g1
2681 *	%l6 = %g6
2682 *	%l7 = %g7
2683 *  g2, g3, g4, g5 go to stack
2684 *
2685 * This code is almost the same as that in mem_access_fault,
2686 * except that we already know the problem is not a `normal' fault,
2687 * and that we must be extra-careful with interrupt enables.
2688 */
2689
2690#if defined(SUN4)
2691nmi_sun4:
2692	INTR_SETUP(-CCFSZ-80)
2693	INCR(_C_LABEL(uvmexp)+V_INTR)	! cnt.v_intr++; (clobbers %o0,%o1)
2694	/*
2695	 * Level 15 interrupts are nonmaskable, so with traps off,
2696	 * disable all interrupts to prevent recursion.
2697	 */
2698	sethi	%hi(INTRREG_VA), %o0
2699	ldub	[%o0 + %lo(INTRREG_VA)], %o1
2700	andn	%o1, IE_ALLIE, %o1
2701	stb	%o1, [%o0 + %lo(INTRREG_VA)]
2702	wr	%l0, PSR_ET, %psr	! okay, turn traps on again
2703
2704	std	%g2, [%sp + CCFSZ + 0]	! save g2, g3
2705	rd	%y, %l4			! save y
2706
2707	std	%g4, [%sp + CCFSZ + 8]	! save g4, g5
2708	mov	%g1, %l5		! save g1, g6, g7
2709	mov	%g6, %l6
2710	mov	%g7, %l7
2711#if defined(SUN4C) || defined(SUN4M)
2712	b,a	nmi_common
2713#endif /* SUN4C || SUN4M */
2714#endif
2715
2716#if defined(SUN4C)
2717nmi_sun4c:
2718	INTR_SETUP(-CCFSZ-80)
2719	INCR(_C_LABEL(uvmexp)+V_INTR)	! cnt.v_intr++; (clobbers %o0,%o1)
2720	/*
2721	 * Level 15 interrupts are nonmaskable, so with traps off,
2722	 * disable all interrupts to prevent recursion.
2723	 */
2724	sethi	%hi(INTRREG_VA), %o0
2725	ldub	[%o0 + %lo(INTRREG_VA)], %o1
2726	andn	%o1, IE_ALLIE, %o1
2727	stb	%o1, [%o0 + %lo(INTRREG_VA)]
2728	wr	%l0, PSR_ET, %psr	! okay, turn traps on again
2729
2730	std	%g2, [%sp + CCFSZ + 0]	! save g2, g3
2731	rd	%y, %l4			! save y
2732
2733	! must read the sync error register too.
2734	set	AC_SYNC_ERR, %o0
2735	lda	[%o0] ASI_CONTROL, %o1	! sync err reg
2736	inc	4, %o0
2737	lda	[%o0] ASI_CONTROL, %o2	! sync virt addr
2738	std	%g4, [%sp + CCFSZ + 8]	! save g4,g5
2739	mov	%g1, %l5		! save g1,g6,g7
2740	mov	%g6, %l6
2741	mov	%g7, %l7
2742	inc	4, %o0
2743	lda	[%o0] ASI_CONTROL, %o3	! async err reg
2744	inc	4, %o0
2745	lda	[%o0] ASI_CONTROL, %o4	! async virt addr
2746#if defined(SUN4M)
2747	!!b,a	nmi_common
2748#endif /* SUN4M */
2749#endif /* SUN4C */
2750
2751nmi_common:
2752	! and call C code
2753	call	_C_LABEL(memerr4_4c)	! memerr(0, ser, sva, aer, ava)
2754	 clr	%o0
2755
2756	mov	%l5, %g1		! restore g1 through g7
2757	ldd	[%sp + CCFSZ + 0], %g2
2758	ldd	[%sp + CCFSZ + 8], %g4
2759	wr	%l0, 0, %psr		! re-disable traps
2760	mov	%l6, %g6
2761	mov	%l7, %g7
2762
2763	! set IE_ALLIE again (safe, we disabled traps again above)
2764	sethi	%hi(INTRREG_VA), %o0
2765	ldub	[%o0 + %lo(INTRREG_VA)], %o1
2766	or	%o1, IE_ALLIE, %o1
2767	stb	%o1, [%o0 + %lo(INTRREG_VA)]
2768	b	return_from_trap
2769	 wr	%l4, 0, %y		! restore y
2770
2771#if defined(SUN4M)
2772nmi_sun4m:
2773	INTR_SETUP(-CCFSZ-80)
2774	INCR(_C_LABEL(uvmexp)+V_INTR)	! cnt.v_intr++; (clobbers %o0,%o1)
2775
2776	/* Read the Pending Interrupts register */
2777	sethi	%hi(CPUINFO_VA+CPUINFO_INTREG), %l6
2778	ld	[%l6 + %lo(CPUINFO_VA+CPUINFO_INTREG)], %l6
2779	ld	[%l6 + ICR_PI_PEND_OFFSET], %l5	! get pending interrupts
2780
2781	set	_C_LABEL(nmi_soft), %o3		! assume a softint
2782	set	PINTR_IC, %o1			! hard lvl 15 bit
2783	sethi	%hi(PINTR_SINTRLEV(15)), %o0	! soft lvl 15 bit
2784	btst	%o0, %l5		! soft level 15?
2785	bnz,a	1f			!
2786	 mov	%o0, %o1		! shift int clear bit to SOFTINT 15
2787
2788	set	_C_LABEL(nmi_hard), %o3	/* it's a hardint; switch handler */
2789
2790	/*
2791	 * Level 15 interrupts are nonmaskable, so with traps off,
2792	 * disable all interrupts to prevent recursion.
2793	 */
2794	sethi	%hi(ICR_SI_SET), %o0
2795	set	SINTR_MA, %o2
2796	st	%o2, [%o0 + %lo(ICR_SI_SET)]
2797#if defined(MULTIPROCESSOR) && defined(DDB)
2798	b	2f
2799	 clr	%o0
2800#endif
2801
28021:
2803#if defined(MULTIPROCESSOR) && defined(DDB)
2804	/*
2805	 * Setup a trapframe for nmi_soft; this might be an IPI telling
2806	 * us to pause, so lets save some state for DDB to get at.
2807	 */
2808	std	%l0, [%sp + CCFSZ]	! tf.tf_psr = psr; tf.tf_pc = ret_pc;
2809	rd	%y, %l3
2810	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc = return_npc; tf.tf_y = %y;
2811	st	%g1, [%sp + CCFSZ + 20]
2812	std	%g2, [%sp + CCFSZ + 24]
2813	std	%g4, [%sp + CCFSZ + 32]
2814	std	%g6, [%sp + CCFSZ + 40]
2815	std	%i0, [%sp + CCFSZ + 48]
2816	std	%i2, [%sp + CCFSZ + 56]
2817	std	%i4, [%sp + CCFSZ + 64]
2818	std	%i6, [%sp + CCFSZ + 72]
2819	add	%sp, CCFSZ, %o0
28202:
2821#else
2822	clr	%o0
2823#endif
2824	/*
2825	 * Now clear the NMI. Apparently, we must allow some time
2826	 * to let the bits sink in..
2827	 */
2828	st	%o1, [%l6 + ICR_PI_CLR_OFFSET]
2829	 nop; nop; nop;
2830	ld	[%l6 + ICR_PI_PEND_OFFSET], %g0	! drain register!?
2831	 nop; nop; nop;
2832
2833	wr	%l0, PSR_ET, %psr	! okay, turn traps on again
2834
2835	std	%g2, [%sp + CCFSZ + 80]	! save g2, g3
2836	rd	%y, %l4			! save y
2837	std	%g4, [%sp + CCFSZ + 88]	! save g4,g5
2838
2839	/* Finish stackframe, call C trap handler */
2840	mov	%g1, %l5		! save g1,g6,g7
2841	mov	%g6, %l6
2842
2843	jmpl	%o3, %o7		! nmi_hard(0) or nmi_soft(&tf)
2844	 mov	%g7, %l7
2845
2846	mov	%l5, %g1		! restore g1 through g7
2847	ldd	[%sp + CCFSZ + 80], %g2
2848	ldd	[%sp + CCFSZ + 88], %g4
2849	wr	%l0, 0, %psr		! re-disable traps
2850	mov	%l6, %g6
2851	mov	%l7, %g7
2852
2853	!cmp	%o0, 0			! was this a soft nmi
2854	!be	4f
2855	/* XXX - we need to unblock `mask all ints' only on a hard nmi */
2856
2857	! enable interrupts again (safe, we disabled traps again above)
2858	sethi	%hi(ICR_SI_CLR), %o0
2859	set	SINTR_MA, %o1
2860	st	%o1, [%o0 + %lo(ICR_SI_CLR)]
2861
28624:
2863	b	return_from_trap
2864	 wr	%l4, 0, %y		! restore y
2865#endif /* SUN4M */
2866
2867#ifdef GPROF
2868	.globl	window_of, winof_user
2869	.globl	window_uf, winuf_user, winuf_ok, winuf_invalid
2870	.globl	return_from_trap, rft_kernel, rft_user, rft_invalid
2871	.globl	softtrap, slowtrap
2872	.globl	clean_trap_window, _C_LABEL(_syscall)
2873#endif
2874
2875/*
2876 * Window overflow trap handler.
2877 *	%l0 = %psr
2878 *	%l1 = return pc
2879 *	%l2 = return npc
2880 */
2881window_of:
2882#ifdef TRIVIAL_WINDOW_OVERFLOW_HANDLER
2883	/* a trivial version that assumes %sp is ok */
2884	/* (for testing only!) */
2885	save	%g0, %g0, %g0
2886	std	%l0, [%sp + (0*8)]
2887	rd	%psr, %l0
2888	mov	1, %l1
2889	sll	%l1, %l0, %l0
2890	wr	%l0, 0, %wim
2891	std	%l2, [%sp + (1*8)]
2892	std	%l4, [%sp + (2*8)]
2893	std	%l6, [%sp + (3*8)]
2894	std	%i0, [%sp + (4*8)]
2895	std	%i2, [%sp + (5*8)]
2896	std	%i4, [%sp + (6*8)]
2897	std	%i6, [%sp + (7*8)]
2898	restore
2899	RETT
2900#else
2901	/*
2902	 * This is similar to TRAP_SETUP, but we do not want to spend
2903	 * a lot of time, so we have separate paths for kernel and user.
2904	 * We also know for sure that the window has overflowed.
2905	 */
2906	btst	PSR_PS, %l0
2907	bz	winof_user
2908	 sethi	%hi(clean_trap_window), %l7
2909
2910	/*
2911	 * Overflow from kernel mode.  Call clean_trap_window to
2912	 * do the dirty work, then just return, since we know prev
2913	 * window is valid.  clean_trap_windows might dump all *user*
2914	 * windows into the pcb, but we do not care: there is at
2915	 * least one kernel window (a trap or interrupt frame!)
2916	 * above us.
2917	 */
2918	jmpl	%l7 + %lo(clean_trap_window), %l4
2919	 mov	%g7, %l7		! for clean_trap_window
2920
2921	wr	%l0, 0, %psr		! put back the @%*! cond. codes
2922	nop				! (let them settle in)
2923	RETT
2924
2925winof_user:
2926	/*
2927	 * Overflow from user mode.
2928	 * If clean_trap_window dumps the registers into the pcb,
2929	 * rft_user will need to call trap(), so we need space for
2930	 * a trap frame.  We also have to compute pcb_nw.
2931	 *
2932	 * SHOULD EXPAND IN LINE TO AVOID BUILDING TRAP FRAME ON
2933	 * `EASY' SAVES
2934	 */
2935	sethi	%hi(cpcb), %l6
2936	ld	[%l6 + %lo(cpcb)], %l6
2937	ld	[%l6 + PCB_WIM], %l5
2938	and	%l0, 31, %l3
2939	sub	%l3, %l5, %l5 		/* l5 = CWP - pcb_wim */
2940	set	uwtab, %l4
2941	ldub	[%l4 + %l5], %l5	/* l5 = uwtab[l5] */
2942	st	%l5, [%l6 + PCB_UW]
2943	jmpl	%l7 + %lo(clean_trap_window), %l4
2944	 mov	%g7, %l7		! for clean_trap_window
2945	sethi	%hi(cpcb), %l6
2946	ld	[%l6 + %lo(cpcb)], %l6
2947	set	USPACE-CCFSZ-80, %l5
2948	add	%l6, %l5, %sp		/* over to kernel stack */
2949	CHECK_SP_REDZONE(%l6, %l5)
2950
2951	/*
2952	 * Copy return_from_trap far enough to allow us
2953	 * to jump directly to rft_user_or_recover_pcb_windows
2954	 * (since we know that is where we are headed).
2955	 */
2956!	and	%l0, 31, %l3		! still set (clean_trap_window
2957					! leaves this register alone)
2958	set	wmask, %l6
2959	ldub	[%l6 + %l3], %l5	! %l5 = 1 << ((CWP + 1) % nwindows)
2960	b	rft_user_or_recover_pcb_windows
2961	 rd	%wim, %l4		! (read %wim first)
2962#endif /* end `real' version of window overflow trap handler */
2963
2964/*
2965 * Window underflow trap handler.
2966 *	%l0 = %psr
2967 *	%l1 = return pc
2968 *	%l2 = return npc
2969 *
2970 * A picture:
2971 *
2972 *	  T R I X
2973 *	0 0 0 1 0 0 0	(%wim)
2974 * [bit numbers increase towards the right;
2975 * `restore' moves right & `save' moves left]
2976 *
2977 * T is the current (Trap) window, R is the window that attempted
2978 * a `Restore' instruction, I is the Invalid window, and X is the
2979 * window we want to make invalid before we return.
2980 *
2981 * Since window R is valid, we cannot use rft_user to restore stuff
2982 * for us.  We have to duplicate its logic.  YUCK.
2983 *
2984 * Incidentally, TRIX are for kids.  Silly rabbit!
2985 */
2986window_uf:
2987#ifdef TRIVIAL_WINDOW_UNDERFLOW_HANDLER
2988	wr	%g0, 0, %wim		! allow us to enter I
2989	restore				! to R
2990	nop
2991	nop
2992	restore				! to I
2993	restore	%g0, 1, %l1		! to X
2994	rd	%psr, %l0
2995	sll	%l1, %l0, %l0
2996	wr	%l0, 0, %wim
2997	save	%g0, %g0, %g0		! back to I
2998	LOADWIN(%sp)
2999	save	%g0, %g0, %g0		! back to R
3000	save	%g0, %g0, %g0		! back to T
3001	RETT
3002#else
3003	wr	%g0, 0, %wim		! allow us to enter I
3004	btst	PSR_PS, %l0
3005	restore				! enter window R
3006	bz	winuf_user
3007	 restore			! enter window I
3008
3009	/*
3010	 * Underflow from kernel mode.  Just recover the
3011	 * registers and go (except that we have to update
3012	 * the blasted user pcb fields).
3013	 */
3014	restore	%g0, 1, %l1		! enter window X, then set %l1 to 1
3015	rd	%psr, %l0		! cwp = %psr & 31;
3016	and	%l0, 31, %l0
3017	sll	%l1, %l0, %l1		! wim = 1 << cwp;
3018	wr	%l1, 0, %wim		! setwim(wim);
3019	sethi	%hi(cpcb), %l1
3020	ld	[%l1 + %lo(cpcb)], %l1
3021	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = cwp;
3022	save	%g0, %g0, %g0		! back to window I
3023	LOADWIN(%sp)
3024	save	%g0, %g0, %g0		! back to R
3025	save	%g0, %g0, %g0		! and then to T
3026	wr	%l0, 0, %psr		! fix those cond codes....
3027	nop				! (let them settle in)
3028	RETT
3029
3030winuf_user:
3031	/*
3032	 * Underflow from user mode.
3033	 *
3034	 * We cannot use rft_user (as noted above) because
3035	 * we must re-execute the `restore' instruction.
3036	 * Since it could be, e.g., `restore %l0,0,%l0',
3037	 * it is not okay to touch R's registers either.
3038	 *
3039	 * We are now in window I.
3040	 */
3041	btst	7, %sp			! if unaligned, it is invalid
3042	bne	winuf_invalid
3043	 EMPTY
3044
3045	sethi	%hi(_C_LABEL(pgofset)), %l4
3046	ld	[%l4 + %lo(_C_LABEL(pgofset))], %l4
3047	PTE_OF_ADDR(%sp, %l7, winuf_invalid, %l4, NOP_ON_4M_5)
3048	CMP_PTE_USER_READ(%l7, %l5, NOP_ON_4M_6) ! if first page not readable,
3049	bne	winuf_invalid		! it is invalid
3050	 EMPTY
3051	SLT_IF_1PAGE_RW(%sp, %l7, %l4)	! first page is readable
3052	bl,a	winuf_ok		! if only one page, enter window X
3053	 restore %g0, 1, %l1		! and goto ok, & set %l1 to 1
3054	add	%sp, 7*8, %l5
3055	add     %l4, 62, %l4
3056	PTE_OF_ADDR(%l5, %l7, winuf_invalid, %l4, NOP_ON_4M_7)
3057	CMP_PTE_USER_READ(%l7, %l5, NOP_ON_4M_8) ! check second page too
3058	be,a	winuf_ok		! enter window X and goto ok
3059	 restore %g0, 1, %l1		! (and then set %l1 to 1)
3060
3061winuf_invalid:
3062	/*
3063	 * We were unable to restore the window because %sp
3064	 * is invalid or paged out.  Return to the trap window
3065	 * and call trap(T_WINUF).  This will save R to the user
3066	 * stack, then load both R and I into the pcb rw[] area,
3067	 * and return with pcb_nsaved set to -1 for success, 0 for
3068	 * failure.  `Failure' indicates that someone goofed with the
3069	 * trap registers (e.g., signals), so that we need to return
3070	 * from the trap as from a syscall (probably to a signal handler)
3071	 * and let it retry the restore instruction later.  Note that
3072	 * window R will have been pushed out to user space, and thus
3073	 * be the invalid window, by the time we get back here.  (We
3074	 * continue to label it R anyway.)  We must also set %wim again,
3075	 * and set pcb_uw to 1, before enabling traps.  (Window R is the
3076	 * only window, and it is a user window).
3077	 */
3078	save	%g0, %g0, %g0		! back to R
3079	save	%g0, 1, %l4		! back to T, then %l4 = 1
3080	sethi	%hi(cpcb), %l6
3081	ld	[%l6 + %lo(cpcb)], %l6
3082	st	%l4, [%l6 + PCB_UW]	! pcb_uw = 1
3083	ld	[%l6 + PCB_WIM], %l5	! get log2(%wim)
3084	sll	%l4, %l5, %l4		! %l4 = old %wim
3085	wr	%l4, 0, %wim		! window I is now invalid again
3086	set	USPACE-CCFSZ-80, %l5
3087	add	%l6, %l5, %sp		! get onto kernel stack
3088	CHECK_SP_REDZONE(%l6, %l5)
3089
3090	/*
3091	 * Okay, call trap(T_WINUF, psr, pc, &tf).
3092	 * See `slowtrap' above for operation.
3093	 */
3094	wr	%l0, PSR_ET, %psr
3095	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
3096	rd	%y, %l3
3097	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
3098	mov	T_WINUF, %o0
3099	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
3100	mov	%l0, %o1
3101	std	%g2, [%sp + CCFSZ + 24]	! etc
3102	mov	%l1, %o2
3103	std	%g4, [%sp + CCFSZ + 32]
3104	add	%sp, CCFSZ, %o3
3105	std	%g6, [%sp + CCFSZ + 40]
3106	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
3107	std	%i2, [%sp + CCFSZ + 56]
3108	std	%i4, [%sp + CCFSZ + 64]
3109	call	_C_LABEL(trap)		! trap(T_WINUF, pc, psr, &tf)
3110	 std	%i6, [%sp + CCFSZ + 72]	! tf.tf_out[6]
3111
3112	ldd	[%sp + CCFSZ + 0], %l0	! new psr, pc
3113	ldd	[%sp + CCFSZ + 8], %l2	! new npc, %y
3114	wr	%l3, 0, %y
3115	ld	[%sp + CCFSZ + 20], %g1
3116	ldd	[%sp + CCFSZ + 24], %g2
3117	ldd	[%sp + CCFSZ + 32], %g4
3118	ldd	[%sp + CCFSZ + 40], %g6
3119	ldd	[%sp + CCFSZ + 48], %i0	! %o0 for window R, etc
3120	ldd	[%sp + CCFSZ + 56], %i2
3121	ldd	[%sp + CCFSZ + 64], %i4
3122	wr	%l0, 0, %psr		! disable traps: test must be atomic
3123	ldd	[%sp + CCFSZ + 72], %i6
3124	sethi	%hi(cpcb), %l6
3125	ld	[%l6 + %lo(cpcb)], %l6
3126	ld	[%l6 + PCB_NSAVED], %l7	! if nsaved is -1, we have our regs
3127	tst	%l7
3128	bl,a	1f			! got them
3129	 wr	%g0, 0, %wim		! allow us to enter windows R, I
3130	b,a	return_from_trap
3131
3132	/*
3133	 * Got 'em.  Load 'em up.
3134	 */
31351:
3136	mov	%g6, %l3		! save %g6; set %g6 = cpcb
3137	mov	%l6, %g6
3138	st	%g0, [%g6 + PCB_NSAVED]	! and clear magic flag
3139	restore				! from T to R
3140	restore				! from R to I
3141	restore	%g0, 1, %l1		! from I to X, then %l1 = 1
3142	rd	%psr, %l0		! cwp = %psr;
3143	sll	%l1, %l0, %l1
3144	wr	%l1, 0, %wim		! make window X invalid
3145	and	%l0, 31, %l0
3146	st	%l0, [%g6 + PCB_WIM]	! cpcb->pcb_wim = cwp;
3147	nop				! unnecessary? old wim was 0...
3148	save	%g0, %g0, %g0		! back to I
3149	LOADWIN(%g6 + PCB_RW + 64)	! load from rw[1]
3150	save	%g0, %g0, %g0		! back to R
3151	LOADWIN(%g6 + PCB_RW)		! load from rw[0]
3152	save	%g0, %g0, %g0		! back to T
3153	wr	%l0, 0, %psr		! restore condition codes
3154	mov	%l3, %g6		! fix %g6
3155	RETT
3156
3157	/*
3158	 * Restoring from user stack, but everything has checked out
3159	 * as good.  We are now in window X, and %l1 = 1.  Window R
3160	 * is still valid and holds user values.
3161	 */
3162winuf_ok:
3163	rd	%psr, %l0
3164	sll	%l1, %l0, %l1
3165	wr	%l1, 0, %wim		! make this one invalid
3166	sethi	%hi(cpcb), %l2
3167	ld	[%l2 + %lo(cpcb)], %l2
3168	and	%l0, 31, %l0
3169	st	%l0, [%l2 + PCB_WIM]	! cpcb->pcb_wim = cwp;
3170	save	%g0, %g0, %g0		! back to I
3171	LOADWIN(%sp)
3172	save	%g0, %g0, %g0		! back to R
3173	save	%g0, %g0, %g0		! back to T
3174	wr	%l0, 0, %psr		! restore condition codes
3175	nop				! it takes three to tangle
3176	RETT
3177#endif /* end `real' version of window underflow trap handler */
3178
3179/*
3180 * Various return-from-trap routines (see return_from_trap).
3181 */
3182
3183/*
3184 * Return from trap, to kernel.
3185 *	%l0 = %psr
3186 *	%l1 = return pc
3187 *	%l2 = return npc
3188 *	%l4 = %wim
3189 *	%l5 = bit for previous window
3190 */
3191rft_kernel:
3192	btst	%l5, %l4		! if (wim & l5)
3193	bnz	1f			!	goto reload;
3194	 wr	%l0, 0, %psr		! but first put !@#*% cond codes back
3195
3196	/* previous window is valid; just rett */
3197	nop				! wait for cond codes to settle in
3198	RETT
3199
3200	/*
3201	 * Previous window is invalid.
3202	 * Update %wim and then reload l0..i7 from frame.
3203	 *
3204	 *	  T I X
3205	 *	0 0 1 0 0   (%wim)
3206	 * [see picture in window_uf handler]
3207	 *
3208	 * T is the current (Trap) window, I is the Invalid window,
3209	 * and X is the window we want to make invalid.  Window X
3210	 * currently has no useful values.
3211	 */
32121:
3213	wr	%g0, 0, %wim		! allow us to enter window I
3214	nop; nop; nop			! (it takes a while)
3215	restore				! enter window I
3216	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
3217	rd	%psr, %l0		! CWP = %psr & 31;
3218	and	%l0, 31, %l0
3219	sll	%l1, %l0, %l1		! wim = 1 << CWP;
3220	wr	%l1, 0, %wim		! setwim(wim);
3221	sethi	%hi(cpcb), %l1
3222	ld	[%l1 + %lo(cpcb)], %l1
3223	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = l0 & 31;
3224	save	%g0, %g0, %g0		! back to window I
3225	LOADWIN(%sp)
3226	save	%g0, %g0, %g0		! back to window T
3227	/*
3228	 * Note that the condition codes are still set from
3229	 * the code at rft_kernel; we can simply return.
3230	 */
3231	RETT
3232
3233/*
3234 * Return from trap, to user.  Checks for scheduling trap (`ast') first;
3235 * will re-enter trap() if set.  Note that we may have to switch from
3236 * the interrupt stack to the kernel stack in this case.
3237 *	%l0 = %psr
3238 *	%l1 = return pc
3239 *	%l2 = return npc
3240 *	%l4 = %wim
3241 *	%l5 = bit for previous window
3242 *	%l6 = cpcb
3243 * If returning to a valid window, just set psr and return.
3244 */
3245rft_user:
3246!	sethi	%hi(_C_LABEL(want_ast)), %l7	! (done below)
3247	ld	[%l7 + %lo(_C_LABEL(want_ast))], %l7
3248	tst	%l7			! want AST trap?
3249	bne,a	softtrap		! yes, re-enter trap with type T_AST
3250	 mov	T_AST, %o0
3251
3252	btst	%l5, %l4		! if (wim & l5)
3253	bnz	1f			!	goto reload;
3254	 wr	%l0, 0, %psr		! restore cond codes
3255	nop				! (three instruction delay)
3256	RETT
3257
3258	/*
3259	 * Previous window is invalid.
3260	 * Before we try to load it, we must verify its stack pointer.
3261	 * This is much like the underflow handler, but a bit easier
3262	 * since we can use our own local registers.
3263	 */
32641:
3265	btst	7, %fp			! if unaligned, address is invalid
3266	bne	rft_invalid
3267	 EMPTY
3268
3269	sethi	%hi(_C_LABEL(pgofset)), %l3
3270	ld	[%l3 + %lo(_C_LABEL(pgofset))], %l3
3271	PTE_OF_ADDR(%fp, %l7, rft_invalid, %l3, NOP_ON_4M_9)
3272	CMP_PTE_USER_READ(%l7, %l5, NOP_ON_4M_10)	! try first page
3273	bne	rft_invalid		! no good
3274	 EMPTY
3275	SLT_IF_1PAGE_RW(%fp, %l7, %l3)
3276	bl,a	rft_user_ok		! only 1 page: ok
3277	 wr	%g0, 0, %wim
3278	add	%fp, 7*8, %l5
3279	add	%l3, 62, %l3
3280	PTE_OF_ADDR(%l5, %l7, rft_invalid, %l3, NOP_ON_4M_11)
3281	CMP_PTE_USER_READ(%l7, %l5, NOP_ON_4M_12)	! check 2nd page too
3282	be,a	rft_user_ok
3283	 wr	%g0, 0, %wim
3284
3285	/*
3286	 * The window we wanted to pull could not be pulled.  Instead,
3287	 * re-enter trap with type T_RWRET.  This will pull the window
3288	 * into cpcb->pcb_rw[0] and set cpcb->pcb_nsaved to -1, which we
3289	 * will detect when we try to return again.
3290	 */
3291rft_invalid:
3292	b	softtrap
3293	 mov	T_RWRET, %o0
3294
3295	/*
3296	 * The window we want to pull can be pulled directly.
3297	 */
3298rft_user_ok:
3299!	wr	%g0, 0, %wim		! allow us to get into it
3300	wr	%l0, 0, %psr		! fix up the cond codes now
3301	nop; nop; nop
3302	restore				! enter window I
3303	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
3304	rd	%psr, %l0		! l0 = (junk << 5) + CWP;
3305	sll	%l1, %l0, %l1		! %wim = 1 << CWP;
3306	wr	%l1, 0, %wim
3307	sethi	%hi(cpcb), %l1
3308	ld	[%l1 + %lo(cpcb)], %l1
3309	and	%l0, 31, %l0
3310	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = l0 & 31;
3311	save	%g0, %g0, %g0		! back to window I
3312	LOADWIN(%sp)			! suck hard
3313	save	%g0, %g0, %g0		! back to window T
3314	RETT
3315
3316/*
3317 * Return from trap.  Entered after a
3318 *	wr	%l0, 0, %psr
3319 * which disables traps so that we can rett; registers are:
3320 *
3321 *	%l0 = %psr
3322 *	%l1 = return pc
3323 *	%l2 = return npc
3324 *
3325 * (%l3..%l7 anything).
3326 *
3327 * If we are returning to user code, we must:
3328 *  1.  Check for register windows in the pcb that belong on the stack.
3329 *	If there are any, reenter trap with type T_WINOF.
3330 *  2.  Make sure the register windows will not underflow.  This is
3331 *	much easier in kernel mode....
3332 */
3333return_from_trap:
3334!	wr	%l0, 0, %psr		! disable traps so we can rett
3335! (someone else did this already)
3336	and	%l0, 31, %l5
3337	set	wmask, %l6
3338	ldub	[%l6 + %l5], %l5	! %l5 = 1 << ((CWP + 1) % nwindows)
3339	btst	PSR_PS, %l0		! returning to userland?
3340	bnz	rft_kernel		! no, go return to kernel
3341	 rd	%wim, %l4		! (read %wim in any case)
3342
3343rft_user_or_recover_pcb_windows:
3344	/*
3345	 * (entered with %l4=%wim, %l5=wmask[cwp]; %l0..%l2 as usual)
3346	 *
3347	 * check cpcb->pcb_nsaved:
3348	 * if 0, do a `normal' return to user (see rft_user);
3349	 * if > 0, cpcb->pcb_rw[] holds registers to be copied to stack;
3350	 * if -1, cpcb->pcb_rw[0] holds user registers for rett window
3351	 * from an earlier T_RWRET pseudo-trap.
3352	 */
3353	sethi	%hi(cpcb), %l6
3354	ld	[%l6 + %lo(cpcb)], %l6
3355	ld	[%l6 + PCB_NSAVED], %l7
3356	tst	%l7
3357	bz,a	rft_user
3358	 sethi	%hi(_C_LABEL(want_ast)), %l7	! first instr of rft_user
3359
3360	bg,a	softtrap		! if (pcb_nsaved > 0)
3361	 mov	T_WINOF, %o0		!	trap(T_WINOF);
3362
3363	/*
3364	 * To get here, we must have tried to return from a previous
3365	 * trap and discovered that it would cause a window underflow.
3366	 * We then must have tried to pull the registers out of the
3367	 * user stack (from the address in %fp==%i6) and discovered
3368	 * that it was either unaligned or not loaded in memory, and
3369	 * therefore we ran a trap(T_RWRET), which loaded one set of
3370	 * registers into cpcb->pcb_pcb_rw[0] (if it had killed the
3371	 * process due to a bad stack, we would not be here).
3372	 *
3373	 * We want to load pcb_rw[0] into the previous window, which
3374	 * we know is currently invalid.  In other words, we want
3375	 * %wim to be 1 << ((cwp + 2) % nwindows).
3376	 */
3377	wr	%g0, 0, %wim		! enable restores
3378	mov	%g6, %l3		! save g6 in l3
3379	mov	%l6, %g6		! set g6 = &u
3380	st	%g0, [%g6 + PCB_NSAVED]	! clear cpcb->pcb_nsaved
3381	restore				! enter window I
3382	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
3383	rd	%psr, %l0
3384	sll	%l1, %l0, %l1		! %wim = 1 << CWP;
3385	wr	%l1, 0, %wim
3386	and	%l0, 31, %l0
3387	st	%l0, [%g6 + PCB_WIM]	! cpcb->pcb_wim = CWP;
3388	nop				! unnecessary? old wim was 0...
3389	save	%g0, %g0, %g0		! back to window I
3390	LOADWIN(%g6 + PCB_RW)
3391	save	%g0, %g0, %g0		! back to window T (trap window)
3392	wr	%l0, 0, %psr		! cond codes, cond codes everywhere
3393	mov	%l3, %g6		! restore g6
3394	RETT
3395
3396! exported end marker for kernel gdb
3397	.globl	_C_LABEL(endtrapcode)
3398_C_LABEL(endtrapcode):
3399
3400/*
3401 * init_tables(nwin) int nwin;
3402 *
3403 * Set up the uwtab and wmask tables.
3404 * We know nwin > 1.
3405 */
3406init_tables:
3407	/*
3408	 * for (i = -nwin, j = nwin - 2; ++i < 0; j--)
3409	 *	uwtab[i] = j;
3410	 * (loop runs at least once)
3411	 */
3412	set	uwtab, %o3
3413	sub	%g0, %o0, %o1		! i = -nwin + 1
3414	inc	%o1
3415	add	%o0, -2, %o2		! j = nwin - 2;
34160:
3417	stb	%o2, [%o3 + %o1]	! uwtab[i] = j;
34181:
3419	inccc	%o1			! ++i < 0?
3420	bl	0b			! yes, continue loop
3421	 dec	%o2			! in any case, j--
3422
3423	/*
3424	 * (i now equals 0)
3425	 * for (j = nwin - 1; i < nwin; i++, j--)
3426	 *	uwtab[i] = j;
3427	 * (loop runs at least twice)
3428	 */
3429	sub	%o0, 1, %o2		! j = nwin - 1
34300:
3431	stb	%o2, [%o3 + %o1]	! uwtab[i] = j
3432	inc	%o1			! i++
34331:
3434	cmp	%o1, %o0		! i < nwin?
3435	bl	0b			! yes, continue
3436	 dec	%o2			! in any case, j--
3437
3438	/*
3439	 * We observe that, for i in 0..nwin-2, (i+1)%nwin == i+1;
3440	 * for i==nwin-1, (i+1)%nwin == 0.
3441	 * To avoid adding 1, we run i from 1 to nwin and set
3442	 * wmask[i-1].
3443	 *
3444	 * for (i = j = 1; i < nwin; i++) {
3445	 *	j <<= 1;	(j now == 1 << i)
3446	 *	wmask[i - 1] = j;
3447	 * }
3448	 * (loop runs at least once)
3449	 */
3450	set	wmask - 1, %o3
3451	mov	1, %o1			! i = 1;
3452	mov	2, %o2			! j = 2;
34530:
3454	stb	%o2, [%o3 + %o1]	! (wmask - 1)[i] = j;
3455	inc	%o1			! i++
3456	cmp	%o1, %o0		! i < nwin?
3457	bl,a	0b			! yes, continue
3458	 sll	%o2, 1, %o2		! (and j <<= 1)
3459
3460	/*
3461	 * Now i==nwin, so we want wmask[i-1] = 1.
3462	 */
3463	mov	1, %o2			! j = 1;
3464	retl
3465	 stb	%o2, [%o3 + %o1]	! (wmask - 1)[i] = j;
3466
3467#ifdef SUN4
3468/*
3469 * getidprom(struct idprom *, sizeof(struct idprom))
3470 */
3471_ENTRY(_C_LABEL(getidprom))
3472	set	AC_IDPROM, %o2
34731:	lduba	[%o2] ASI_CONTROL, %o3
3474	stb	%o3, [%o0]
3475	inc	%o0
3476	inc	%o2
3477	dec	%o1
3478	cmp	%o1, 0
3479	bne	1b
3480	 nop
3481	retl
3482	 nop
3483#endif
3484
3485dostart:
3486	/*
3487	 * Startup.
3488	 *
3489	 * We have been loaded in low RAM, at some address which
3490	 * is page aligned (PROM_LOADADDR actually) rather than where we
3491	 * want to run (KERNBASE+PROM_LOADADDR).  Until we get everything set,
3492	 * we have to be sure to use only pc-relative addressing.
3493	 */
3494
3495	/*
3496	 * We now use the bootinfo method to pass arguments, and the new
3497	 * magic number indicates that. A pointer to the kernel top, i.e.
3498	 * the first address after the load kernel image (including DDB
3499	 * symbols, if any) is passed in %o4[0] and the bootinfo structure
3500	 * is passed in %o4[1].
3501	 *
3502	 * A magic number is passed in %o5 to allow for bootloaders
3503	 * that know nothing about the bootinfo structure or previous
3504	 * DDB symbol loading conventions.
3505	 *
3506	 * For compatibility with older versions, we check for DDB arguments
3507	 * if the older magic number is there. The loader passes `kernel_top'
3508	 * (previously known as `esym') in %o4.
3509	 *
3510	 * Note: we don't touch %o1-%o3; SunOS bootloaders seem to use them
3511	 * for their own mirky business.
3512	 *
3513	 * Pre-NetBSD 1.3 bootblocks had KERNBASE compiled in, and used it
3514	 * to compute the value of `kernel_top' (previously known as `esym').
3515	 * In order to successfully boot a kernel built with a different value
3516	 * for KERNBASE using old bootblocks, we fixup `kernel_top' here by
3517	 * the difference between KERNBASE and the old value (known to be
3518	 * 0xf8000000) compiled into pre-1.3 bootblocks.
3519	 */
3520	set	KERNBASE, %l4
3521
3522	set	0x44444232, %l3		! bootinfo magic
3523	cmp	%o5, %l3
3524	bne	1f
3525	 nop
3526
3527	/* The loader has passed to us a `bootinfo' structure */
3528	ld	[%o4], %l3		! 1st word is kernel_top
3529	add	%l3, %l4, %o5		! relocate: + KERNBASE
3530	sethi	%hi(_C_LABEL(kernel_top) - KERNBASE), %l3 ! and store it
3531	st	%o5, [%l3 + %lo(_C_LABEL(kernel_top) - KERNBASE)]
3532
3533	ld	[%o4 + 4], %l3		! 2nd word is bootinfo
3534	add	%l3, %l4, %o5		! relocate
3535	sethi	%hi(_C_LABEL(bootinfo) - KERNBASE), %l3	! store bootinfo
3536	st	%o5, [%l3 + %lo(_C_LABEL(bootinfo) - KERNBASE)]
3537	b,a	4f
3538
35391:
3540#ifdef DDB
3541	/* Check for old-style DDB loader magic */
3542	set	0x44444231, %l3		! Is it DDB_MAGIC1?
3543	cmp	%o5, %l3
3544	be,a	2f
3545	 clr	%l4			! if DDB_MAGIC1, clear %l4
3546
3547	set	0x44444230, %l3		! Is it DDB_MAGIC0?
3548	cmp	%o5, %l3		! if so, need to relocate %o4
3549	bne	3f			/* if not, there's no bootloader info */
3550
3551					! note: %l4 set to KERNBASE above.
3552	set	0xf8000000, %l5		! compute correction term:
3553	sub	%l5, %l4, %l4		!  old KERNBASE (0xf8000000 ) - KERNBASE
3554
35552:
3556	tst	%o4			! do we have the symbols?
3557	bz	3f
3558	 sub	%o4, %l4, %o4		! apply compat correction
3559	sethi	%hi(_C_LABEL(kernel_top) - KERNBASE), %l3 ! and store it
3560	st	%o4, [%l3 + %lo(_C_LABEL(kernel_top) - KERNBASE)]
3561	b,a	4f
35623:
3563#endif
3564	/*
3565	 * The boot loader did not pass in a value for `kernel_top';
3566	 * let it default to `end'.
3567	 */
3568	set	end, %o4
3569	sethi	%hi(_C_LABEL(kernel_top) - KERNBASE), %l3 ! store kernel_top
3570	st	%o4, [%l3 + %lo(_C_LABEL(kernel_top) - KERNBASE)]
3571
35724:
3573
3574	/*
3575	 * Sun4 passes in the `load address'.  Although possible, its highly
3576	 * unlikely that OpenBoot would place the prom vector there.
3577	 */
3578	set	PROM_LOADADDR, %g7
3579	cmp	%o0, %g7
3580	be	is_sun4
3581	 nop
3582
3583#if defined(SUN4C) || defined(SUN4M) || defined(SUN4D)
3584	/*
3585	 * Be prepared to get OF client entry in either %o0 or %o3.
3586	 * XXX Will this ever trip on sun4d?  Let's hope not!
3587	 */
3588	cmp	%o0, 0
3589	be	is_openfirm
3590	 nop
3591
3592	mov	%o0, %g7		! save romp passed by boot code
3593
3594	/* First, check `romp->pv_magic' */
3595	ld	[%g7 + PV_MAGIC], %o0	! v = pv->pv_magic
3596	set	OBP_MAGIC, %o1
3597	cmp	%o0, %o1		! if ( v != OBP_MAGIC) {
3598	bne	is_sun4m		!    assume this is an OPENFIRM machine
3599	 nop				! }
3600
3601	/*
3602	 * are we on a sun4c or a sun4m or a sun4d?
3603	 */
3604	ld	[%g7 + PV_NODEOPS], %o4	! node = pv->pv_nodeops->no_nextnode(0)
3605	ld	[%o4 + NO_NEXTNODE], %o4
3606	call	%o4
3607	 mov	0, %o0			! node
3608
3609	mov	%o0, %l0
3610	set	cputypvar-KERNBASE, %o1	! name = "compatible"
3611	set	cputypval-KERNBASE, %o2	! buffer ptr (assume buffer long enough)
3612	ld	[%g7 + PV_NODEOPS], %o4	! (void)pv->pv_nodeops->no_getprop(...)
3613	ld	[%o4 + NO_GETPROP], %o4
3614	call	 %o4
3615	 nop
3616	set	cputypval-KERNBASE, %o2	! buffer ptr
3617	ldub	[%o2 + 4], %o0		! which is it... "sun4c", "sun4m", "sun4d"?
3618	cmp	%o0, 'c'
3619	be	is_sun4c
3620	 nop
3621	cmp	%o0, 'm'
3622	be	is_sun4m
3623	 nop
3624	cmp	%o0, 'd'
3625	be	is_sun4d
3626	 nop
3627#endif /* SUN4C || SUN4M || SUN4D */
3628
3629	/*
3630	 * Don't know what type of machine this is; just halt back
3631	 * out to the PROM.
3632	 */
3633	ld	[%g7 + PV_HALT], %o1	! by this kernel, then halt
3634	call	%o1
3635	 nop
3636
3637is_openfirm:
3638	! OF client entry in %o3 (kernel booted directly by PROM?)
3639	mov	%o3, %g7
3640	/* FALLTHROUGH to sun4m case */
3641
3642is_sun4m:
3643#if defined(SUN4M)
3644	set	trapbase_sun4m, %g6
3645	mov	SUN4CM_PGSHIFT, %g5
3646	b	start_havetype
3647	 mov	CPU_SUN4M, %g4
3648#else
3649	set	sun4m_notsup-KERNBASE, %o0
3650	ld	[%g7 + PV_EVAL], %o1
3651	call	%o1			! print a message saying that the
3652	 nop				! sun4m architecture is not supported
3653	ld	[%g7 + PV_HALT], %o1	! by this kernel, then halt
3654	call	%o1
3655	 nop
3656	/*NOTREACHED*/
3657#endif
3658is_sun4d:
3659#if defined(SUN4D)
3660	set	trapbase_sun4m, %g6	/* XXXJRT trapbase_sun4d */
3661	mov	SUN4CM_PGSHIFT, %g5
3662	b	start_havetype
3663	 mov	CPU_SUN4D, %g4
3664#else
3665	set	sun4d_notsup-KERNBASE, %o0
3666	ld	[%g7 + PV_EVAL], %o1
3667	call	%o1			! print a message saying that the
3668	 nop				! sun4d architecture is not supported
3669	ld	[%g7 + PV_HALT], %o1	! by this kernel, then halt
3670	call	%o1
3671	 nop
3672	/*NOTREACHED*/
3673#endif
3674is_sun4c:
3675#if defined(SUN4C)
3676	set	trapbase_sun4c, %g6
3677	mov	SUN4CM_PGSHIFT, %g5
3678
3679	set	AC_CONTEXT, %g1		! paranoia: set context to kernel
3680	stba	%g0, [%g1] ASI_CONTROL
3681
3682	b	start_havetype
3683	 mov	CPU_SUN4C, %g4		! XXX CPU_SUN4
3684#else
3685	set	sun4c_notsup-KERNBASE, %o0
3686
3687	ld	[%g7 + PV_ROMVEC_VERS], %o1
3688	cmp	%o1, 0
3689	bne	1f
3690	 nop
3691
3692	! stupid version 0 rom interface is pv_eval(int length, char *string)
3693	mov	%o0, %o1
36942:	ldub	[%o0], %o4
3695	bne	2b
3696	 inc	%o0
3697	dec	%o0
3698	sub	%o0, %o1, %o0
3699
37001:	ld	[%g7 + PV_EVAL], %o2
3701	call	%o2			! print a message saying that the
3702	 nop				! sun4c architecture is not supported
3703	ld	[%g7 + PV_HALT], %o1	! by this kernel, then halt
3704	call	%o1
3705	 nop
3706	/*NOTREACHED*/
3707#endif
3708is_sun4:
3709#if defined(SUN4)
3710	set	trapbase_sun4, %g6
3711	mov	SUN4_PGSHIFT, %g5
3712
3713	set	AC_CONTEXT, %g1		! paranoia: set context to kernel
3714	stba	%g0, [%g1] ASI_CONTROL
3715
3716	b	start_havetype
3717	 mov	CPU_SUN4, %g4
3718#else
3719	set	PROM_BASE, %g7
3720
3721	set	sun4_notsup-KERNBASE, %o0
3722	ld	[%g7 + OLDMON_PRINTF], %o1
3723	call	%o1			! print a message saying that the
3724	 nop				! sun4 architecture is not supported
3725	ld	[%g7 + OLDMON_HALT], %o1 ! by this kernel, then halt
3726	call	%o1
3727	 nop
3728	/*NOTREACHED*/
3729#endif
3730
3731start_havetype:
3732	/*
3733	 * Step 1: double map low RAM (addresses [0.._end-start-1])
3734	 * to KERNBASE (addresses [KERNBASE.._end-1]).  None of these
3735	 * are `bad' aliases (since they are all on segment boundaries)
3736	 * so we do not have to worry about cache aliasing.
3737	 *
3738	 * We map in another couple of segments just to have some
3739	 * more memory (512K, actually) guaranteed available for
3740	 * bootstrap code (pmap_bootstrap needs memory to hold MMU
3741	 * and context data structures). Note: this is only relevant
3742	 * for 2-level MMU sun4/sun4c machines.
3743	 */
3744	clr	%l0			! lowva
3745	set	KERNBASE, %l1		! highva
3746
3747	sethi	%hi(_C_LABEL(kernel_top) - KERNBASE), %o0
3748	ld	[%o0 + %lo(_C_LABEL(kernel_top) - KERNBASE)], %o1
3749	set	(2 << 18), %o2		! add slack for sun4c MMU
3750	add	%o1, %o2, %l2		! last va that must be remapped
3751
3752	/*
3753	 * Need different initial mapping functions for different
3754	 * types of machines.
3755	 */
3756#if defined(SUN4C)
3757	cmp	%g4, CPU_SUN4C
3758	bne	1f
3759	 set	1 << 18, %l3		! segment size in bytes
37600:
3761	lduba	[%l0] ASI_SEGMAP, %l4	! segmap[highva] = segmap[lowva];
3762	stba	%l4, [%l1] ASI_SEGMAP
3763	add	%l3, %l1, %l1		! highva += segsiz;
3764	cmp	%l1, %l2		! done?
3765	blu	0b			! no, loop
3766	 add	%l3, %l0, %l0		! (and lowva += segsz)
3767	b,a	startmap_done
37681:
3769#endif /* SUN4C */
3770
3771#if defined(SUN4)
3772	cmp	%g4, CPU_SUN4
3773	bne	2f
3774#if defined(SUN4_MMU3L)
3775	set	AC_IDPROM+1, %l3
3776	lduba	[%l3] ASI_CONTROL, %l3
3777	cmp	%l3, 0x24 ! XXX - SUN4_400
3778	bne	no_3mmu
3779	 nop
3780
3781	/*
3782	 * Three-level sun4 MMU.
3783	 * Double-map by duplicating a single region entry (which covers
3784	 * 16MB) corresponding to the kernel's virtual load address.
3785	 */
3786	add	%l0, 2, %l0		! get to proper half-word in RG space
3787	add	%l1, 2, %l1
3788	lduha	[%l0] ASI_REGMAP, %l4	! regmap[highva] = regmap[lowva];
3789	stha	%l4, [%l1] ASI_REGMAP
3790	b,a	startmap_done
3791no_3mmu:
3792#endif
3793
3794	/*
3795	 * Three-level sun4 MMU.
3796	 * Double-map by duplicating the required number of segment
3797	 * entries corresponding to the kernel's virtual load address.
3798	 */
3799	set	1 << 18, %l3		! segment size in bytes
38000:
3801	lduha	[%l0] ASI_SEGMAP, %l4	! segmap[highva] = segmap[lowva];
3802	stha	%l4, [%l1] ASI_SEGMAP
3803	add	%l3, %l1, %l1		! highva += segsiz;
3804	cmp	%l1, %l2		! done?
3805	blu	0b			! no, loop
3806	 add	%l3, %l0, %l0		! (and lowva += segsz)
3807	b,a	startmap_done
38082:
3809#endif /* SUN4 */
3810
3811#if defined(SUN4M) || defined(SUN4D)
3812	cmp	%g4, CPU_SUN4M
3813	beq	3f
3814	 nop
3815	cmp	%g4, CPU_SUN4D
3816	bne	4f
3817
38183:
3819	/*
3820	 * The OBP guarantees us a 16MB mapping using a level 1 PTE at
3821	 * the start of the memory bank in which we were loaded. All we
3822	 * have to do is copy the entry.
3823	 * Also, we must check to see if we have a TI Viking in non-mbus mode,
3824	 * and if so do appropriate flipping and turning off traps before
3825	 * we dork with MMU passthrough.  -grrr
3826	 */
3827
3828	sethi	%hi(0x40000000), %o1	! TI version bit
3829	rd	%psr, %o0
3830	andcc	%o0, %o1, %g0
3831	be	remap_notvik		! is non-TI normal MBUS module
3832	lda	[%g0] ASI_SRMMU, %o0	! load MMU
3833	andcc	%o0, 0x800, %g0
3834	bne	remap_notvik		! It is a viking MBUS module
3835	nop
3836
3837	/*
3838	 * Ok, we have a non-Mbus TI Viking, a MicroSparc.
3839	 * In this scenerio, in order to play with the MMU
3840	 * passthrough safely, we need turn off traps, flip
3841	 * the AC bit on in the mmu status register, do our
3842	 * passthroughs, then restore the mmu reg and %psr
3843	 */
3844	rd	%psr, %o4		! saved here till done
3845	andn	%o4, 0x20, %o5
3846	wr	%o5, 0x0, %psr
3847	nop; nop; nop;
3848	set	SRMMU_CXTPTR, %o0
3849	lda	[%o0] ASI_SRMMU, %o0	! get context table ptr
3850	sll	%o0, 4, %o0		! make physical
3851	lda	[%g0] ASI_SRMMU, %o3	! hold mmu-sreg here
3852	/* 0x8000 is AC bit in Viking mmu-ctl reg */
3853	set	0x8000, %o2
3854	or	%o3, %o2, %o2
3855	sta	%o2, [%g0] ASI_SRMMU	! AC bit on
3856
3857	lda	[%o0] ASI_BYPASS, %o1
3858	srl	%o1, 4, %o1
3859	sll	%o1, 8, %o1		! get phys addr of l1 entry
3860	lda	[%o1] ASI_BYPASS, %l4
3861	srl	%l1, 22, %o2		! note: 22 == RGSHIFT - 2
3862	add	%o1, %o2, %o1
3863	sta	%l4, [%o1] ASI_BYPASS
3864
3865	sta	%o3, [%g0] ASI_SRMMU	! restore mmu-sreg
3866	wr	%o4, 0x0, %psr		! restore psr
3867	b,a	startmap_done
3868
3869	/*
3870	 * The following is generic and should work on all
3871	 * Mbus based SRMMU's.
3872	 */
3873remap_notvik:
3874	set	SRMMU_CXTPTR, %o0
3875	lda	[%o0] ASI_SRMMU, %o0	! get context table ptr
3876	sll	%o0, 4, %o0		! make physical
3877	lda	[%o0] ASI_BYPASS, %o1
3878	srl	%o1, 4, %o1
3879	sll	%o1, 8, %o1		! get phys addr of l1 entry
3880	lda	[%o1] ASI_BYPASS, %l4
3881	srl	%l1, 22, %o2		! note: 22 == RGSHIFT - 2
3882	add	%o1, %o2, %o1
3883	sta	%l4, [%o1] ASI_BYPASS
3884	!b,a	startmap_done
38854:
3886#endif /* SUN4M || SUN4D */
3887	! botch! We should blow up.
3888
3889startmap_done:
3890	/*
3891	 * All set, fix pc and npc.  Once we are where we should be,
3892	 * we can give ourselves a stack and enable traps.
3893	 */
3894	set	1f, %g1
3895	jmp	%g1
3896	 nop
38971:
3898	sethi	%hi(_C_LABEL(cputyp)), %o0	! what type of cpu we are on
3899	st	%g4, [%o0 + %lo(_C_LABEL(cputyp))]
3900
3901	sethi	%hi(_C_LABEL(pgshift)), %o0	! pgshift = log2(nbpg)
3902	st	%g5, [%o0 + %lo(_C_LABEL(pgshift))]
3903
3904	mov	1, %o0			! nbpg = 1 << pgshift
3905	sll	%o0, %g5, %g5
3906	sethi	%hi(_C_LABEL(nbpg)), %o0	! nbpg = bytes in a page
3907	st	%g5, [%o0 + %lo(_C_LABEL(nbpg))]
3908
3909	sub	%g5, 1, %g5
3910	sethi	%hi(_C_LABEL(pgofset)), %o0 ! page offset = bytes in a page - 1
3911	st	%g5, [%o0 + %lo(_C_LABEL(pgofset))]
3912
3913	rd	%psr, %g3		! paranoia: make sure ...
3914	andn	%g3, PSR_ET, %g3	! we have traps off
3915	wr	%g3, 0, %psr		! so that we can fiddle safely
3916	nop; nop; nop
3917
3918	wr	%g0, 0, %wim		! make sure we can set psr
3919	nop; nop; nop
3920	wr	%g0, PSR_S|PSR_PS|PSR_PIL, %psr	! set initial psr
3921	 nop; nop; nop
3922
3923	wr	%g0, 2, %wim		! set initial %wim (w1 invalid)
3924	mov	1, %g1			! set pcb_wim (log2(%wim) = 1)
3925	sethi	%hi(_C_LABEL(u0) + PCB_WIM), %g2
3926	st	%g1, [%g2 + %lo(_C_LABEL(u0) + PCB_WIM)]
3927
3928	set	USRSTACK - CCFSZ, %fp	! as if called from user code
3929	set	estack0 - CCFSZ - 80, %sp ! via syscall(boot_me_up) or somesuch
3930	rd	%psr, %l0
3931	wr	%l0, PSR_ET, %psr
3932	nop; nop; nop
3933
3934	/* Export actual trapbase */
3935	sethi	%hi(_C_LABEL(trapbase)), %o0
3936	st	%g6, [%o0+%lo(_C_LABEL(trapbase))]
3937
3938#ifdef notdef
3939	/*
3940	 * Step 2: clear BSS.  This may just be paranoia; the boot
3941	 * loader might already do it for us; but what the hell.
3942	 */
3943	set	_edata, %o0		! bzero(edata, end - edata)
3944	set	_end, %o1
3945	call	_C_LABEL(bzero)
3946	 sub	%o1, %o0, %o1
3947#endif
3948
3949	/*
3950	 * Stash prom vectors now, after bzero, as it lives in bss
3951	 * (which we just zeroed).
3952	 * This depends on the fact that bzero does not use %g7.
3953	 */
3954	sethi	%hi(_C_LABEL(romp)), %l0
3955	st	%g7, [%l0 + %lo(_C_LABEL(romp))]
3956
3957	/*
3958	 * Step 3: compute number of windows and set up tables.
3959	 * We could do some of this later.
3960	 */
3961	save	%sp, -64, %sp
3962	rd	%psr, %g1
3963	restore
3964	and	%g1, 31, %g1		! want just the CWP bits
3965	add	%g1, 1, %o0		! compute nwindows
3966	sethi	%hi(_C_LABEL(nwindows)), %o1	! may as well tell everyone
3967	call	init_tables
3968	 st	%o0, [%o1 + %lo(_C_LABEL(nwindows))]
3969
3970#if defined(SUN4) || defined(SUN4C)
3971	/*
3972	 * Some sun4/sun4c models have fewer than 8 windows. For extra
3973	 * speed, we do not need to save/restore those windows
3974	 * The save/restore code has 7 "save"'s followed by 7
3975	 * "restore"'s -- we "nop" out the last "save" and first
3976	 * "restore"
3977	 */
3978	cmp	%o0, 8
3979	be	1f
3980noplab:	 nop
3981	sethi	%hi(noplab), %l0
3982	ld	[%l0 + %lo(noplab)], %l1
3983	set	wb1, %l0
3984	st	%l1, [%l0 + 6*4]
3985	st	%l1, [%l0 + 7*4]
39861:
3987#endif
3988
3989#if (defined(SUN4) || defined(SUN4C)) && (defined(SUN4M) || defined(SUN4D))
3990
3991	/*
3992	 * Patch instructions at specified labels that start
3993	 * per-architecture code-paths.
3994	 */
3995Lgandul:	nop
3996
3997#define MUNGE(label) \
3998	sethi	%hi(label), %o0; \
3999	st	%l0, [%o0 + %lo(label)]
4000
4001	sethi	%hi(Lgandul), %o0
4002	ld	[%o0 + %lo(Lgandul)], %l0	! %l0 = NOP
4003
4004	cmp	%g4, CPU_SUN4M
4005	beq,a	2f
4006	 nop
4007
4008	cmp	%g4, CPU_SUN4D
4009	bne,a	1f
4010	 nop
4011
40122:	! this should be automated!
4013	MUNGE(NOP_ON_4M_1)
4014	MUNGE(NOP_ON_4M_2)
4015	MUNGE(NOP_ON_4M_3)
4016	MUNGE(NOP_ON_4M_4)
4017	MUNGE(NOP_ON_4M_5)
4018	MUNGE(NOP_ON_4M_6)
4019	MUNGE(NOP_ON_4M_7)
4020	MUNGE(NOP_ON_4M_8)
4021	MUNGE(NOP_ON_4M_9)
4022	MUNGE(NOP_ON_4M_10)
4023	MUNGE(NOP_ON_4M_11)
4024	MUNGE(NOP_ON_4M_12)
4025	MUNGE(NOP_ON_4M_15)
4026	b,a	2f
4027
40281:
4029	MUNGE(NOP_ON_4_4C_1)
4030
40312:
4032
4033#undef MUNGE
4034#endif
4035
4036	/*
4037	 * Step 4: change the trap base register, now that our trap handlers
4038	 * will function (they need the tables we just set up).
4039	 * This depends on the fact that bzero does not use %g6.
4040	 */
4041	wr	%g6, 0, %tbr
4042	nop; nop; nop			! paranoia
4043
4044
4045	/* Clear `cpuinfo' */
4046	sethi	%hi(CPUINFO_VA), %o0		! bzero(&cpuinfo, NBPG)
4047	sethi	%hi(CPUINFO_STRUCTSIZE), %o1
4048	call	_C_LABEL(bzero)
4049	 add	%o1, %lo(CPUINFO_STRUCTSIZE), %o1
4050
4051	/*
4052	 * Initialize `cpuinfo' fields which are needed early.  Note
4053	 * we make the cpuinfo self-reference at the local VA for now.
4054	 * It may be changed to reference a global VA later.
4055	 */
4056	set	_C_LABEL(u0), %o0		! cpuinfo.curpcb = u0;
4057	sethi	%hi(cpcb), %l0
4058	st	%o0, [%l0 + %lo(cpcb)]
4059
4060	sethi	%hi(CPUINFO_VA), %o0		! cpuinfo.ci_self = &cpuinfo;
4061	sethi	%hi(_CISELFP), %l0
4062	st	%o0, [%l0 + %lo(_CISELFP)]
4063
4064	set	_C_LABEL(eintstack), %o0	! cpuinfo.eintstack= _eintstack;
4065	sethi	%hi(_EINTSTACKP), %l0
4066	st	%o0, [%l0 + %lo(_EINTSTACKP)]
4067
4068	/*
4069	 * Ready to run C code; finish bootstrap.
4070	 */
4071	call	_C_LABEL(bootstrap)
4072	 nop
4073
4074	/*
4075	 * Call main.  This returns to us after loading /sbin/init into
4076	 * user space.  (If the exec fails, main() does not return.)
4077	 */
4078	call	_C_LABEL(main)
4079	 clr	%o0			! our frame arg is ignored
4080	/*NOTREACHED*/
4081
4082
4083#if defined(SUN4M) || defined(SUN4D)
4084/*
4085 * V8 multiply and divide routines, to be copied over the code
4086 * for the V6/V7 routines.  Seems a shame to spend the call, but....
4087 * Note: while .umul and .smul return a 64-bit result in %o1%o0,
4088 * gcc only really cares about the low 32 bits in %o0.  This is
4089 * really just gcc output, cleaned up a bit.
4090 */
4091	.globl	_C_LABEL(sparc_v8_muldiv)
4092_C_LABEL(sparc_v8_muldiv):
4093	save    %sp, -CCFSZ, %sp
4094
4095#define	OVERWRITE(rtn, v8_rtn, len)	\
4096	set	v8_rtn, %o0;		\
4097	set	rtn, %o1;		\
4098	call	_C_LABEL(bcopy);	\
4099	 mov	len, %o2;		\
4100	/* now flush the insn cache */	\
4101	set	rtn, %o0;		\
4102	 mov	len, %o1;		\
41030:					\
4104	flush	%o0;			\
4105	subcc	%o1, 8, %o1;		\
4106	bgu	0b;			\
4107	 add	%o0, 8, %o0;		\
4108
4109	OVERWRITE(.mul, v8_smul, v8_smul_len)
4110	OVERWRITE(.umul, v8_umul, v8_umul_len)
4111	OVERWRITE(.div, v8_sdiv, v8_sdiv_len)
4112	OVERWRITE(.udiv, v8_udiv, v8_udiv_len)
4113	OVERWRITE(.rem, v8_srem, v8_srem_len)
4114	OVERWRITE(.urem, v8_urem, v8_urem_len)
4115#undef	OVERWRITE
4116	ret
4117	 restore
4118
4119v8_smul:
4120	retl
4121	 smul	%o0, %o1, %o0
4122v8_smul_len = .-v8_smul
4123v8_umul:
4124	retl
4125	 umul	%o0, %o1, %o0
4126!v8_umul_len = 2 * 4
4127v8_umul_len = .-v8_umul
4128v8_sdiv:
4129	sra	%o0, 31, %g2
4130	wr	%g2, 0, %y
4131	nop; nop; nop
4132	retl
4133	 sdiv	%o0, %o1, %o0
4134v8_sdiv_len = .-v8_sdiv
4135v8_udiv:
4136	wr	%g0, 0, %y
4137	nop; nop; nop
4138	retl
4139	 udiv	%o0, %o1, %o0
4140v8_udiv_len = .-v8_udiv
4141v8_srem:
4142	sra	%o0, 31, %g3
4143	wr	%g3, 0, %y
4144	nop; nop; nop
4145	sdiv	%o0, %o1, %g2
4146	smul	%g2, %o1, %g2
4147	retl
4148	 sub	%o0, %g2, %o0
4149v8_srem_len = .-v8_srem
4150v8_urem:
4151	wr	%g0, 0, %y
4152	nop; nop; nop
4153	udiv	%o0, %o1, %g2
4154	smul	%g2, %o1, %g2
4155	retl
4156	 sub	%o0, %g2, %o0
4157v8_urem_len = .-v8_urem
4158
4159#endif /* SUN4M || SUN4D */
4160
4161#if defined(MULTIPROCESSOR)
4162	/*
4163	 * Entry point for non-boot CPUs in MP systems.
4164	 */
4165	.globl	_C_LABEL(cpu_hatch)
4166_C_LABEL(cpu_hatch):
4167	rd	%psr, %g3		! paranoia: make sure ...
4168	andn	%g3, PSR_ET, %g3	! we have traps off
4169	wr	%g3, 0, %psr		! so that we can fiddle safely
4170	nop; nop; nop
4171
4172	wr	%g0, 0, %wim		! make sure we can set psr
4173	nop; nop; nop
4174	wr	%g0, PSR_S|PSR_PS|PSR_PIL, %psr	! set initial psr
4175	nop; nop; nop
4176
4177	wr	%g0, 2, %wim		! set initial %wim (w1 invalid)
4178
4179	/* Initialize Trap Base register */
4180	sethi	%hi(_C_LABEL(trapbase)), %o0
4181	ld	[%o0+%lo(_C_LABEL(trapbase))], %g6
4182	wr	%g6, 0, %tbr
4183	nop; nop; nop			! paranoia
4184
4185	/* Set up a stack */
4186	set	USRSTACK - CCFSZ, %fp	! as if called from user code
4187	sethi	%hi(_C_LABEL(cpu_hatchstack)), %o0
4188	ld	[%o0+%lo(_C_LABEL(cpu_hatchstack))], %o0
4189	set	USPACE - CCFSZ - 80, %sp
4190	add	%sp, %o0, %sp
4191
4192	/* Enable traps */
4193	rd	%psr, %l0
4194	wr	%l0, PSR_ET, %psr
4195	nop; nop; nop
4196
4197	/* Call C code */
4198	sethi	%hi(_C_LABEL(cpu_hatch_sc)), %o0
4199	call	_C_LABEL(cpu_setup)
4200	 ld	[%o0+%lo(_C_LABEL(cpu_hatch_sc))], %o0
4201
4202	/* Wait for go_smp_cpus to go */
4203	set	_C_LABEL(go_smp_cpus), %l1
4204	ld	[%l1], %l0
42051:
4206	cmp	%l0, %g0
4207	be	1b
4208	 ld	[%l1], %l0
4209
4210#if 0	/* doesn't quite work yet */
4211
4212	set	_C_LABEL(proc0), %g3		! p = proc0
4213	sethi	%hi(_C_LABEL(sched_whichqs)), %g2
4214	sethi	%hi(cpcb), %g6
4215	sethi	%hi(curproc), %g7
4216	st	%g0, [%g7 + %lo(curproc)]	! curproc = NULL;
4217
4218	mov	PSR_S|PSR_ET, %g1		! oldpsr = PSR_S | PSR_ET;
4219	sethi	%hi(IDLE_UP), %g5
4220	ld	[%g5 + %lo(IDLE_UP)], %g5
4221	st	%g5, [%g6 + %lo(cpcb)]		! cpcb = &idle_u
4222	set	USPACE-CCFSZ, %o1
4223	add	%g5, %o1, %sp			! set new %sp
4224
4225#ifdef DEBUG
4226	mov	%g5, %o2			! %o2 = _idle_u
4227	SET_SP_REDZONE(%o2, %o1)
4228#endif /* DEBUG */
4229
4230	b	idle_enter_no_schedlock
4231	 clr	%g4				! lastproc = NULL;
4232#else
4233	/* Idle here .. */
4234	rd	%psr, %l0
4235	andn	%l0, PSR_PIL, %l0	! psr &= ~PSR_PIL;
4236	wr	%l0, 0, %psr		! (void) spl0();
4237	nop; nop; nop
42389:	ba 9b
4239	 nop
4240	/*NOTREACHED*/
4241#endif
4242
4243#endif /* MULTIPROCESSOR */
4244
4245#include "sigcode_state.s"
4246
4247	.globl	_C_LABEL(sigcode)
4248	.globl	_C_LABEL(esigcode)
4249_C_LABEL(sigcode):
4250
4251	SAVE_STATE
4252
4253	ldd	[%fp + 64], %o0		! sig, code
4254	ld	[%fp + 76], %o3		! arg3
4255	call	%g1			! (*sa->sa_handler)(sig,code,scp,arg3)
4256	 add	%fp, 64 + 16, %o2	! scp
4257
4258	RESTORE_STATE
4259
4260	! get registers back & set syscall #
4261	restore	%g0, SYS___sigreturn14, %g1
4262	add	%sp, 64 + 16, %o0	! compute scp
4263	t	ST_SYSCALL		! sigreturn(scp)
4264	! sigreturn does not return unless it fails
4265	mov	SYS_exit, %g1		! exit(errno)
4266	t	ST_SYSCALL
4267_C_LABEL(esigcode):
4268
4269/*
4270 * Primitives
4271 */
4272
4273/*
4274 * General-purpose NULL routine.
4275 */
4276ENTRY(sparc_noop)
4277	retl
4278	 nop
4279
4280/*
4281 * getfp() - get stack frame pointer
4282 */
4283ENTRY(getfp)
4284	retl
4285	 mov %fp, %o0
4286
4287/*
4288 * copyinstr(fromaddr, toaddr, maxlength, &lencopied)
4289 *
4290 * Copy a null terminated string from the user address space into
4291 * the kernel address space.
4292 */
4293ENTRY(copyinstr)
4294	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
4295	mov	%o1, %o5		! save = toaddr;
4296	tst	%o2			! maxlen == 0?
4297	beq,a	Lcstoolong		! yes, return ENAMETOOLONG
4298	 sethi	%hi(cpcb), %o4
4299
4300	set	KERNBASE, %o4
4301	cmp	%o0, %o4		! fromaddr < KERNBASE?
4302	blu	Lcsdocopy		! yes, go do it
4303	 sethi	%hi(cpcb), %o4		! (first instr of copy)
4304
4305	b	Lcsdone			! no, return EFAULT
4306	 mov	EFAULT, %o0
4307
4308/*
4309 * copyoutstr(fromaddr, toaddr, maxlength, &lencopied)
4310 *
4311 * Copy a null terminated string from the kernel
4312 * address space to the user address space.
4313 */
4314ENTRY(copyoutstr)
4315	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
4316	mov	%o1, %o5		! save = toaddr;
4317	tst	%o2			! maxlen == 0?
4318	beq,a	Lcstoolong		! yes, return ENAMETOOLONG
4319	 sethi	%hi(cpcb), %o4
4320
4321	set	KERNBASE, %o4
4322	cmp	%o1, %o4		! toaddr < KERNBASE?
4323	blu	Lcsdocopy		! yes, go do it
4324	 sethi	%hi(cpcb), %o4		! (first instr of copy)
4325
4326	b	Lcsdone			! no, return EFAULT
4327	 mov	EFAULT, %o0
4328
4329Lcsdocopy:
4330!	sethi	%hi(cpcb), %o4		! (done earlier)
4331	ld	[%o4 + %lo(cpcb)], %o4	! catch faults
4332	set	Lcsdone, %g1
4333	st	%g1, [%o4 + PCB_ONFAULT]
4334
4335! XXX should do this in bigger chunks when possible
43360:					! loop:
4337	ldsb	[%o0], %g1		!	c = *fromaddr;
4338	tst	%g1
4339	stb	%g1, [%o1]		!	*toaddr++ = c;
4340	be	1f			!	if (c == NULL)
4341	 inc	%o1			!		goto ok;
4342	deccc	%o2			!	if (--len > 0) {
4343	bgu	0b			!		fromaddr++;
4344	 inc	%o0			!		goto loop;
4345					!	}
4346Lcstoolong:				!
4347	b	Lcsdone			!	error = ENAMETOOLONG;
4348	 mov	ENAMETOOLONG, %o0	!	goto done;
43491:					! ok:
4350	clr	%o0			!    error = 0;
4351Lcsdone:				! done:
4352	sub	%o1, %o5, %o1		!	len = to - save;
4353	tst	%o3			!	if (lencopied)
4354	bnz,a	3f
4355	 st	%o1, [%o3]		!		*lencopied = len;
43563:
4357	retl				! cpcb->pcb_onfault = 0;
4358	 st	%g0, [%o4 + PCB_ONFAULT]! return (error);
4359
4360/*
4361 * copystr(fromaddr, toaddr, maxlength, &lencopied)
4362 *
4363 * Copy a null terminated string from one point to another in
4364 * the kernel address space.  (This is a leaf procedure, but
4365 * it does not seem that way to the C compiler.)
4366 */
4367ENTRY(copystr)
4368	mov	%o1, %o5		!	to0 = to;
4369	tst	%o2			! if (maxlength == 0)
4370	beq,a	2f			!
4371	 mov	ENAMETOOLONG, %o0	!	ret = ENAMETOOLONG; goto done;
4372
43730:					! loop:
4374	ldsb	[%o0], %o4		!	c = *from;
4375	tst	%o4
4376	stb	%o4, [%o1]		!	*to++ = c;
4377	be	1f			!	if (c == 0)
4378	 inc	%o1			!		goto ok;
4379	deccc	%o2			!	if (--len > 0) {
4380	bgu,a	0b			!		from++;
4381	 inc	%o0			!		goto loop;
4382	b	2f			!	}
4383	 mov	ENAMETOOLONG, %o0	!	ret = ENAMETOOLONG; goto done;
43841:					! ok:
4385	clr	%o0			!	ret = 0;
43862:
4387	sub	%o1, %o5, %o1		!	len = to - to0;
4388	tst	%o3			!	if (lencopied)
4389	bnz,a	3f
4390	 st	%o1, [%o3]		!		*lencopied = len;
43913:
4392	retl
4393	 nop
4394
4395/*
4396 * Copyin(src, dst, len)
4397 *
4398 * Copy specified amount of data from user space into the kernel.
4399 */
4400ENTRY(copyin)
4401	set	KERNBASE, %o3
4402	cmp	%o0, %o3		! src < KERNBASE?
4403	blu,a	Ldocopy			! yes, can try it
4404	 sethi	%hi(cpcb), %o3
4405
4406	/* source address points into kernel space: return EFAULT */
4407	retl
4408	 mov	EFAULT, %o0
4409
4410/*
4411 * Copyout(src, dst, len)
4412 *
4413 * Copy specified amount of data from kernel to user space.
4414 * Just like copyin, except that the `dst' addresses are user space
4415 * rather than the `src' addresses.
4416 */
4417ENTRY(copyout)
4418	set	KERNBASE, %o3
4419	cmp	%o1, %o3		! dst < KERBASE?
4420	blu,a	Ldocopy
4421	 sethi	%hi(cpcb), %o3
4422
4423	/* destination address points into kernel space: return EFAULT */
4424	retl
4425	 mov	EFAULT, %o0
4426
4427	/*
4428	 * ******NOTE****** this depends on bcopy() not using %g7
4429	 */
4430Ldocopy:
4431!	sethi	%hi(cpcb), %o3
4432	ld	[%o3 + %lo(cpcb)], %o3
4433	set	Lcopyfault, %o4
4434	mov	%o7, %g7		! save return address
4435	call	_C_LABEL(bcopy)		! bcopy(src, dst, len)
4436	 st	%o4, [%o3 + PCB_ONFAULT]
4437
4438	sethi	%hi(cpcb), %o3
4439	ld	[%o3 + %lo(cpcb)], %o3
4440	st	%g0, [%o3 + PCB_ONFAULT]
4441	jmp	%g7 + 8
4442	 clr	%o0			! return 0
4443
4444! Copyin or copyout fault.  Clear cpcb->pcb_onfault and return EFAULT.
4445! Note that although we were in bcopy, there is no state to clean up;
4446! the only special thing is that we have to return to [g7 + 8] rather than
4447! [o7 + 8].
4448Lcopyfault:
4449	sethi	%hi(cpcb), %o3
4450	ld	[%o3 + %lo(cpcb)], %o3
4451	jmp	%g7 + 8
4452	 st	%g0, [%o3 + PCB_ONFAULT]
4453
4454
4455/*
4456 * Write all user windows presently in the CPU back to the user's stack.
4457 * We just do `save' instructions until pcb_uw == 0.
4458 *
4459 *	p = cpcb;
4460 *	nsaves = 0;
4461 *	while (p->pcb_uw > 0)
4462 *		save(), nsaves++;
4463 *	while (--nsaves >= 0)
4464 *		restore();
4465 */
4466ENTRY(write_user_windows)
4467	sethi	%hi(cpcb), %g6
4468	ld	[%g6 + %lo(cpcb)], %g6
4469	b	2f
4470	 clr	%g5
44711:
4472	save	%sp, -64, %sp
44732:
4474	ld	[%g6 + PCB_UW], %g7
4475	tst	%g7
4476	bg,a	1b
4477	 inc	%g5
44783:
4479	deccc	%g5
4480	bge,a	3b
4481	 restore
4482	retl
4483	 nop
4484
4485
4486	.comm	_C_LABEL(want_resched),4
4487	.comm	_C_LABEL(want_ast),4
4488/*
4489 * Masterpaddr is the p->p_addr of the last process on the processor.
4490 * XXX masterpaddr is almost the same as cpcb
4491 * XXX should delete this entirely
4492 */
4493	.comm	_C_LABEL(masterpaddr), 4
4494
4495/*
4496 * Switch statistics (for later tweaking):
4497 *	nswitchdiff = p1 => p2 (i.e., chose different process)
4498 *	nswitchexit = number of calls to switchexit()
4499 *	cnt.v_swtch = total calls to swtch+swtchexit
4500 */
4501	.comm	_C_LABEL(nswitchdiff), 4
4502	.comm	_C_LABEL(nswitchexit), 4
4503
4504/*
4505 * REGISTER USAGE IN cpu_switch AND switchexit:
4506 * This is split into two phases, more or less
4507 * `before we locate a new proc' and `after'.
4508 * Some values are the same in both phases.
4509 * Note that the %o0-registers are not preserved across
4510 * the psr change when entering a new process, since this
4511 * usually changes the CWP field (hence heavy usage of %g's).
4512 *
4513 *	%g1 = oldpsr (excluding ipl bits)
4514 *	%g2 = %hi(whichqs); newpsr
4515 *	%g3 = p
4516 *	%g4 = lastproc
4517 *	%g5 = <free>; newpcb
4518 *	%g6 = %hi(cpcb)
4519 *	%g7 = %hi(curproc)
4520 *	%o0 = tmp 1
4521 *	%o1 = tmp 2
4522 *	%o2 = tmp 3
4523 *	%o3 = tmp 4; whichqs; vm
4524 *	%o4 = tmp 4; which; sswap
4525 *	%o5 = tmp 5; q; <free>
4526 */
4527
4528/*
4529 * When calling external functions from cpu_switch() and idle(), we must
4530 * preserve the global registers mentioned above across the call.  We also
4531 * set up a stack frame since we will be running in our caller's frame
4532 * in cpu_switch().
4533 */
4534#define SAVE_GLOBALS_AND_CALL(name)	\
4535	save	%sp, -CCFSZ, %sp;	\
4536	mov	%g1, %i0;		\
4537	mov	%g2, %i1;		\
4538	mov	%g3, %i2;		\
4539	mov	%g4, %i3;		\
4540	mov	%g6, %i4;		\
4541	call	_C_LABEL(name);		\
4542	 mov	%g7, %i5;		\
4543	mov	%i5, %g7;		\
4544	mov	%i4, %g6;		\
4545	mov	%i3, %g4;		\
4546	mov	%i2, %g3;		\
4547	mov	%i1, %g2;		\
4548	mov	%i0, %g1;		\
4549	restore
4550
4551
4552/*
4553 * switchexit is called only from cpu_exit() before the current process
4554 * has freed its vmspace and kernel stack; we must schedule them to be
4555 * freed.  (curproc is already NULL.)
4556 *
4557 * We lay the process to rest by changing to the `idle' kernel stack,
4558 * and note that the `last loaded process' is nonexistent.
4559 */
4560ENTRY(switchexit)
4561	mov	%o0, %g2		! save proc for exit2() call
4562
4563	/*
4564	 * Change pcb to idle u. area, i.e., set %sp to top of stack
4565	 * and %psr to PSR_S|PSR_ET, and set cpcb to point to idle_u.
4566	 * Once we have left the old stack, we can call exit2() to
4567	 * destroy it.  Call it any sooner and the register windows
4568	 * go bye-bye.
4569	 */
4570#if defined(MULTIPROCESSOR)
4571	sethi	%hi(IDLE_UP), %g5
4572	ld	[%g5 + %lo(IDLE_UP)], %g5
4573#else
4574	set	_C_LABEL(idle_u), %g5
4575#endif
4576	sethi	%hi(cpcb), %g6
4577	mov	1, %g7
4578	wr	%g0, PSR_S, %psr	! change to window 0, traps off
4579	wr	%g0, 2, %wim		! and make window 1 the trap window
4580	st	%g5, [%g6 + %lo(cpcb)]	! cpcb = &idle_u
4581	st	%g7, [%g5 + PCB_WIM]	! idle_u.pcb_wim = log2(2) = 1
4582#if defined(MULTIPROCESSOR)
4583	set	USPACE-CCFSZ, %o1	!
4584	add	%g5, %o1, %sp		! set new %sp
4585#else
4586	set	_C_LABEL(idle_u) + USPACE-CCFSZ, %sp	! set new %sp
4587#endif
4588
4589#ifdef DEBUG
4590	mov	%g5, %l6		! %l6 = _idle_u
4591	SET_SP_REDZONE(%l6, %l5)
4592#endif
4593
4594	wr	%g0, PSR_S|PSR_ET, %psr	! and then enable traps
4595	call	_C_LABEL(exit2)		! exit2(p)
4596	 mov	%g2, %o0
4597
4598	/*
4599	 * Now fall through to `the last switch'.  %g6 was set to
4600	 * %hi(cpcb), but may have been clobbered in exit2(),
4601	 * so all the registers described below will be set here.
4602	 *
4603	 * REGISTER USAGE AT THIS POINT:
4604	 *	%g1 = oldpsr (excluding ipl bits)
4605	 *	%g2 = %hi(whichqs)
4606	 *	%g4 = lastproc
4607	 *	%g6 = %hi(cpcb)
4608	 *	%g7 = %hi(curproc)
4609	 *	%o0 = tmp 1
4610	 *	%o1 = tmp 2
4611	 *	%o3 = whichqs
4612	 */
4613
4614	INCR(_C_LABEL(nswitchexit))	! nswitchexit++;
4615	INCR(_C_LABEL(uvmexp)+V_SWTCH)	! cnt.v_switch++;
4616
4617	mov	PSR_S|PSR_ET, %g1	! oldpsr = PSR_S | PSR_ET;
4618	sethi	%hi(_C_LABEL(sched_whichqs)), %g2
4619	clr	%g4			! lastproc = NULL;
4620	sethi	%hi(cpcb), %g6
4621	sethi	%hi(curproc), %g7
4622	st	%g0, [%g7 + %lo(curproc)]	! curproc = NULL;
4623	b,a	idle_enter_no_schedlock
4624	/* FALLTHROUGH */
4625
4626
4627/* Macro used for register window flushing in the context switch code */
4628#define	SAVE save %sp, -64, %sp
4629
4630/*
4631 * When no processes are on the runq, switch
4632 * idles here waiting for something to come ready.
4633 * The registers are set up as noted above.
4634 */
4635idle:
4636#if defined(MULTIPROCESSOR)
4637	/*
4638	 * Change pcb to idle u. area, i.e., set %sp to top of stack
4639	 * and %psr to PSR_S, and set cpcb to point to idle_u.
4640	 */
4641	/* XXX: FIXME
4642	 * 7 of each:
4643	 */
4644	SAVE;    SAVE;    SAVE;    SAVE;    SAVE;    SAVE;    SAVE
4645	restore; restore; restore; restore; restore; restore; restore
4646
4647	sethi	%hi(IDLE_UP), %g5
4648	ld	[%g5 + %lo(IDLE_UP)], %g5
4649	rd	%psr, %g1		! oldpsr = %psr;
4650	andn	%g1, PSR_PIL|PSR_PS, %g1! oldpsr &= ~(PSR_PIL|PSR_PS);
4651	and	%g1, PSR_S|PSR_ET, %g1	! oldpsr |= PSR_S|PSR_ET;
4652	st	%g5, [%g6 + %lo(cpcb)]	! cpcb = &idle_u
4653	set	USPACE-CCFSZ, %o1
4654	add	%g5, %o1, %sp		! set new %sp
4655	clr	%g4			! lastproc = NULL;
4656
4657#ifdef DEBUG
4658	mov	%g5, %o2		! %o2 = _idle_u
4659	SET_SP_REDZONE(%o2, %o1)
4660#endif /* DEBUG */
4661#endif /* MULTIPROCESSOR */
4662
4663#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
4664	/* Release the scheduler lock */
4665	SAVE_GLOBALS_AND_CALL(sched_unlock_idle)
4666#endif
4667
4668idle_enter_no_schedlock:
4669	wr	%g1, 0, %psr		! spl0();
46701:					! spin reading whichqs until nonzero
4671	ld	[%g2 + %lo(_C_LABEL(sched_whichqs))], %o3
4672	tst	%o3
4673#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
4674	bnz,a	idle_leave
4675#else
4676	bnz,a	Lsw_scan
4677#endif
4678	! NB: annulled delay slot (executed when we leave the idle loop)
4679	 wr	%g1, PSR_PIL, %psr	! (void) splhigh();
4680
4681	! Check uvm.page_idle_zero
4682	sethi	%hi(_C_LABEL(uvm) + UVM_PAGE_IDLE_ZERO), %o3
4683	ld	[%o3 + %lo(_C_LABEL(uvm) + UVM_PAGE_IDLE_ZERO)], %o3
4684	tst	%o3
4685	bz	1b
4686	 nop
4687
4688	SAVE_GLOBALS_AND_CALL(uvm_pageidlezero)
4689	b,a	1b
4690
4691#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
4692idle_leave:
4693	/* Before we leave the idle loop, detain the scheduler lock */
4694	nop;nop;nop;	/* just wrote to %psr; delay before doing a `save' */
4695	SAVE_GLOBALS_AND_CALL(sched_lock_idle)
4696	b,a	Lsw_scan
4697#endif
4698
4699Lsw_panic_rq:
4700	sethi	%hi(1f), %o0
4701	call	_C_LABEL(panic)
4702	 or	%lo(1f), %o0, %o0
4703Lsw_panic_wchan:
4704	sethi	%hi(2f), %o0
4705	call	_C_LABEL(panic)
4706	 or	%lo(2f), %o0, %o0
4707Lsw_panic_srun:
4708	sethi	%hi(3f), %o0
4709	call	_C_LABEL(panic)
4710	 or	%lo(3f), %o0, %o0
47111:	.asciz	"switch rq"
47122:	.asciz	"switch wchan"
47133:	.asciz	"switch SRUN"
4714	_ALIGN
4715
4716/*
4717 * cpu_switch() picks a process to run and runs it, saving the current
4718 * one away.  On the assumption that (since most workstations are
4719 * single user machines) the chances are quite good that the new
4720 * process will turn out to be the current process, we defer saving
4721 * it here until we have found someone to load.  If that someone
4722 * is the current process we avoid both store and load.
4723 *
4724 * cpu_switch() is always entered at splstatclock or splhigh.
4725 *
4726 * IT MIGHT BE WORTH SAVING BEFORE ENTERING idle TO AVOID HAVING TO
4727 * SAVE LATER WHEN SOMEONE ELSE IS READY ... MUST MEASURE!
4728 */
4729	.globl	_C_LABEL(__ffstab)
4730ENTRY(cpu_switch)
4731	/*
4732	 * REGISTER USAGE AT THIS POINT:
4733	 *	%g1 = oldpsr (excluding ipl bits)
4734	 *	%g2 = %hi(whichqs)
4735	 *	%g3 = p
4736	 *	%g4 = lastproc
4737	 *	%g5 = tmp 0
4738	 *	%g6 = %hi(cpcb)
4739	 *	%g7 = %hi(curproc)
4740	 *	%o0 = tmp 1
4741	 *	%o1 = tmp 2
4742	 *	%o2 = tmp 3
4743	 *	%o3 = tmp 4, then at Lsw_scan, whichqs
4744	 *	%o4 = tmp 5, then at Lsw_scan, which
4745	 *	%o5 = tmp 6, then at Lsw_scan, q
4746	 */
4747	mov	%o0, %g4			! lastproc = p;
4748	sethi	%hi(_C_LABEL(sched_whichqs)), %g2	! set up addr regs
4749	sethi	%hi(cpcb), %g6
4750	ld	[%g6 + %lo(cpcb)], %o0
4751	std	%o6, [%o0 + PCB_SP]		! cpcb->pcb_<sp,pc> = <sp,pc>;
4752	rd	%psr, %g1			! oldpsr = %psr;
4753	st	%g1, [%o0 + PCB_PSR]		! cpcb->pcb_psr = oldpsr;
4754	andn	%g1, PSR_PIL, %g1		! oldpsr &= ~PSR_PIL;
4755	sethi	%hi(curproc), %g7
4756	st	%g0, [%g7 + %lo(curproc)]	! curproc = NULL;
4757
4758Lsw_scan:
4759	nop; nop; nop				! paranoia
4760	ld	[%g2 + %lo(_C_LABEL(sched_whichqs))], %o3
4761
4762	/*
4763	 * Optimized inline expansion of `which = ffs(whichqs) - 1';
4764	 * branches to idle if ffs(whichqs) was 0.
4765	 */
4766	set	_C_LABEL(__ffstab), %o2
4767	andcc	%o3, 0xff, %o1		! byte 0 zero?
4768	bz,a	1f			! yes, try byte 1
4769	 srl	%o3, 8, %o0
4770	b	2f			! ffs = ffstab[byte0]; which = ffs - 1;
4771	 ldsb	[%o2 + %o1], %o0
47721:	andcc	%o0, 0xff, %o1		! byte 1 zero?
4773	bz,a	1f			! yes, try byte 2
4774	 srl	%o0, 8, %o0
4775	ldsb	[%o2 + %o1], %o0	! which = ffstab[byte1] + 7;
4776	b	3f
4777	 add	%o0, 7, %o4
47781:	andcc	%o0, 0xff, %o1		! byte 2 zero?
4779	bz,a	1f			! yes, try byte 3
4780	 srl	%o0, 8, %o0
4781	ldsb	[%o2 + %o1], %o0	! which = ffstab[byte2] + 15;
4782	b	3f
4783	 add	%o0, 15, %o4
47841:	ldsb	[%o2 + %o0], %o0	! ffs = ffstab[byte3] + 24
4785	addcc	%o0, 24, %o0		! (note that ffstab[0] == -24)
4786	bz	idle			! if answer was 0, go idle
4787	 EMPTY
47882:	sub	%o0, 1, %o4		! which = ffs(whichqs) - 1
47893:	/* end optimized inline expansion */
4790
4791	/*
4792	 * We found a nonempty run queue.  Take its first process.
4793	 */
4794	set	_C_LABEL(sched_qs), %o5	! q = &qs[which];
4795	sll	%o4, 3, %o0
4796	add	%o0, %o5, %o5
4797	ld	[%o5], %g3		! p = q->ph_link;
4798	cmp	%g3, %o5		! if (p == q)
4799	be	Lsw_panic_rq		!	panic("switch rq");
4800	 EMPTY
4801	ld	[%g3], %o0		! tmp0 = p->p_forw;
4802	st	%o0, [%o5]		! q->ph_link = tmp0;
4803	st	%o5, [%o0 + 4]		! tmp0->p_back = q;
4804	cmp	%o0, %o5		! if (tmp0 == q)
4805	bne	1f
4806	 EMPTY
4807	mov	1, %o1			!	whichqs &= ~(1 << which);
4808	sll	%o1, %o4, %o1
4809	andn	%o3, %o1, %o3
4810	st	%o3, [%g2 + %lo(_C_LABEL(sched_whichqs))]
48111:
4812	/*
4813	 * PHASE TWO: NEW REGISTER USAGE:
4814	 *	%g1 = oldpsr (excluding ipl bits)
4815	 *	%g2 = newpsr
4816	 *	%g3 = p
4817	 *	%g4 = lastproc
4818	 *	%g5 = newpcb
4819	 *	%g6 = %hi(cpcb)
4820	 *	%g7 = %hi(curproc)
4821	 *	%o0 = tmp 1
4822	 *	%o1 = tmp 2
4823	 *	%o2 = tmp 3
4824	 *	%o3 = vm
4825	 */
4826
4827	/* firewalls */
4828	ld	[%g3 + P_WCHAN], %o0	! if (p->p_wchan)
4829	tst	%o0
4830	bne	Lsw_panic_wchan		!	panic("switch wchan");
4831	 EMPTY
4832	ldsb	[%g3 + P_STAT], %o0	! if (p->p_stat != SRUN)
4833	cmp	%o0, SRUN
4834	bne	Lsw_panic_srun		!	panic("switch SRUN");
4835	 EMPTY
4836
4837	/*
4838	 * Committed to running process p.
4839	 * It may be the same as the one we were running before.
4840	 */
4841	mov	SONPROC, %o0			! p->p_stat = SONPROC;
4842	stb	%o0, [%g3 + P_STAT]
4843
4844	/* p->p_cpu initialized in fork1() for single-processor */
4845#if defined(MULTIPROCESSOR)
4846	sethi	%hi(_CISELFP), %o0		! p->p_cpu = cpuinfo.ci_self;
4847	ld	[%o0 + %lo(_CISELFP)], %o0
4848	st	%o0, [%g3 + P_CPU]
4849#endif
4850
4851	sethi	%hi(_C_LABEL(want_resched)), %o0	! want_resched = 0;
4852	st	%g0, [%o0 + %lo(_C_LABEL(want_resched))]
4853#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
4854	/* Done with the run queues; release the scheduler lock */
4855	SAVE_GLOBALS_AND_CALL(sched_unlock_idle)
4856#endif
4857	ld	[%g3 + P_ADDR], %g5		! newpcb = p->p_addr;
4858	st	%g0, [%g3 + 4]			! p->p_back = NULL;
4859	ld	[%g5 + PCB_PSR], %g2		! newpsr = newpcb->pcb_psr;
4860	st	%g3, [%g7 + %lo(curproc)]	! curproc = p;
4861
4862	cmp	%g3, %g4		! p == lastproc?
4863	be,a	Lsw_sameproc		! yes, go return 0
4864	 wr	%g2, 0, %psr		! (after restoring ipl)
4865
4866	/*
4867	 * Not the old process.  Save the old process, if any;
4868	 * then load p.
4869	 */
4870	tst	%g4
4871	be,a	Lsw_load		! if no old process, go load
4872	 wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr
4873
4874	INCR(_C_LABEL(nswitchdiff))	! clobbers %o0,%o1
4875	/*
4876	 * save: write back all windows (including the current one).
4877	 * XXX	crude; knows nwindows <= 8
4878	 */
4879wb1:	/* 7 of each: */
4880	SAVE;    SAVE;    SAVE;    SAVE;    SAVE;    SAVE;    SAVE
4881	restore; restore; restore; restore; restore; restore; restore
4882
4883	/*
4884	 * Load the new process.  To load, we must change stacks and
4885	 * alter cpcb and %wim, hence we must disable traps.  %psr is
4886	 * currently equal to oldpsr (%g1) ^ (PIL_CLOCK << 8);
4887	 * this means that PSR_ET is on.  Likewise, PSR_ET is on
4888	 * in newpsr (%g2), although we do not know newpsr's ipl.
4889	 *
4890	 * We also must load up the `in' and `local' registers.
4891	 */
4892	wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr
4893Lsw_load:
4894!	wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr	! done above
4895	/* compute new wim */
4896	ld	[%g5 + PCB_WIM], %o0
4897	mov	1, %o1
4898	sll	%o1, %o0, %o0
4899	wr	%o0, 0, %wim		! %wim = 1 << newpcb->pcb_wim;
4900	/* now must not change %psr for 3 more instrs */
4901	/* Clear FP & CP enable bits; continue new process at splclock() */
4902/*1,2*/	set	PSR_EF|PSR_EC|PSR_PIL, %o0
4903/*3*/	andn	%g2, %o0, %g2		! newpsr &= ~(PSR_EF|PSR_EC|PSR_PIL);
4904	/* set new psr, but with traps disabled */
4905	wr	%g2, (PIL_CLOCK << 8)|PSR_ET, %psr ! %psr = newpsr ^ PSR_ET;
4906	/* set new cpcb */
4907	st	%g5, [%g6 + %lo(cpcb)]	! cpcb = newpcb;
4908	ldd	[%g5 + PCB_SP], %o6	! <sp,pc> = newpcb->pcb_<sp,pc>
4909	/* load window */
4910	ldd	[%sp + (0*8)], %l0
4911	ldd	[%sp + (1*8)], %l2
4912	ldd	[%sp + (2*8)], %l4
4913	ldd	[%sp + (3*8)], %l6
4914	ldd	[%sp + (4*8)], %i0
4915	ldd	[%sp + (5*8)], %i2
4916	ldd	[%sp + (6*8)], %i4
4917	ldd	[%sp + (7*8)], %i6
4918#ifdef DEBUG
4919	mov	%g5, %o0
4920	SET_SP_REDZONE(%o0, %o1)
4921	CHECK_SP_REDZONE(%o0, %o1)
4922#endif
4923	/* finally, enable traps */
4924	wr	%g2, PIL_CLOCK << 8, %psr	! psr = newpsr;
4925
4926	/*
4927	 * Now running p.  Make sure it has a context so that it
4928	 * can talk about user space stuff.  (Its pcb_uw is currently
4929	 * zero so it is safe to have interrupts going here.)
4930	 */
4931	ld	[%g3 + P_VMSPACE], %o3	! vm = p->p_vmspace;
4932	ld	[%o3 + VM_PMAP], %o3	! pm = vm->vm_map.vm_pmap;
4933	ld	[%o3 + PMAP_CTX], %o0	! if (pm->pm_ctx != NULL)
4934	tst	%o0
4935	bnz,a	Lsw_havectx		!	goto havecontext;
4936	 ld	[%o3 + PMAP_CTXNUM], %o0	! load context number
4937
4938	/* p does not have a context: call ctx_alloc to get one */
4939	save	%sp, -CCFSZ, %sp
4940	call	_C_LABEL(ctx_alloc)	! ctx_alloc(pm);
4941	 mov	%i3, %o0
4942
4943	ret
4944	 restore
4945
4946	/* p does have a context: just switch to it */
4947Lsw_havectx:
4948	! context is in %o0
4949	! pmap is in %o3
4950#if (defined(SUN4) || defined(SUN4C)) && defined(SUN4M)
4951NOP_ON_4M_15:
4952	b,a	1f
4953	b,a	2f
4954#endif
49551:
4956#if defined(SUN4) || defined(SUN4C)
4957	set	AC_CONTEXT, %o1
4958	retl
4959	 stba	%o0, [%o1] ASI_CONTROL	! setcontext(vm->vm_pmap.pm_ctxnum);
4960#endif
49612:
4962#if defined(SUN4M)
4963	/*
4964	 * Flush caches that need to be flushed on context switch.
4965	 * We know this is currently only necessary on the sun4m hypersparc.
4966	 */
4967	set	CPUINFO_VA+CPUINFO_PURE_VCACHE_FLS, %o2
4968	ld	[%o2], %o2
4969	mov	%o7, %g7	! save return address
4970	jmpl	%o2, %o7	! this function must not clobber %o0 and %g7
4971	 nop
4972
4973	set	SRMMU_CXR, %o1
4974	jmp	%g7 + 8
4975	 sta	%o0, [%o1] ASI_SRMMU	! setcontext(vm->vm_pmap.pm_ctxnum);
4976#endif
4977
4978Lsw_sameproc:
4979	/*
4980	 * We are resuming the process that was running at the
4981	 * call to switch().  Just set psr ipl and return.
4982	 */
4983!	wr	%g2, 0 %psr		! %psr = newpsr; (done earlier)
4984	nop
4985	retl
4986	 nop
4987
4988
4989/*
4990 * Snapshot the current process so that stack frames are up to date.
4991 * Only used just before a crash dump.
4992 */
4993ENTRY(snapshot)
4994	std	%o6, [%o0 + PCB_SP]	! save sp
4995	rd	%psr, %o1		! save psr
4996	st	%o1, [%o0 + PCB_PSR]
4997
4998	/*
4999	 * Just like switch(); same XXX comments apply.
5000	 * 7 of each.  Minor tweak: the 7th restore is
5001	 * done after a ret.
5002	 */
5003	SAVE; SAVE; SAVE; SAVE; SAVE; SAVE; SAVE
5004	restore; restore; restore; restore; restore; restore; ret; restore
5005
5006
5007/*
5008 * cpu_fork() arrange for proc_trampoline() to run after a process gets
5009 * chosen in switch(). The stack frame will contain a function pointer
5010 * in %l0, and an argument to pass to it in %l2.
5011 *
5012 * If the function *(%l0) returns, we arrange for an immediate return
5013 * to user mode. This happens in two known cases: after execve(2) of init,
5014 * and when returning a child to user mode after a fork(2).
5015 *
5016 * If were setting up a kernel thread, the function *(%l0) will not return.
5017 */
5018ENTRY(proc_trampoline)
5019	/*
5020	 * Note: cpu_fork() has set up a stack frame for us to run in,
5021	 * so we can call other functions from here without using
5022	 * `save ... restore'.
5023	 */
5024#if defined(MULTIPROCESSOR)
5025	/* Finish setup in SMP environment: acquire locks etc. */
5026	call _C_LABEL(proc_trampoline_mp)
5027	 nop
5028#endif
5029
5030	/* Reset interrupt level */
5031	rd	%psr, %o0
5032	andn	%o0, PSR_PIL, %o0	! psr &= ~PSR_PIL;
5033	wr	%o0, 0, %psr		! (void) spl0();
5034	 nop				! psr delay; the next 2 instructions
5035					! can safely be made part of the
5036					! required 3 instructions psr delay
5037	call	%l0
5038	 mov	%l1, %o0
5039
5040	/*
5041	 * Here we finish up as in syscall, but simplified.
5042	 * cpu_fork() or sendsig() (if we took a pending signal
5043	 * in child_return()) will have set the user-space return
5044	 * address in tf_pc. In both cases, %npc should be %pc + 4.
5045	 */
5046	mov	PSR_S, %l0		! user psr (no need to load it)
5047	!?wr	%g0, 2, %wim		! %wim = 2
5048	ld	[%sp + CCFSZ + 4], %l1	! pc = tf->tf_pc from cpu_fork()
5049	b	return_from_syscall
5050	 add	%l1, 4, %l2		! npc = pc+4
5051
5052/*
5053 * {fu,su}{,i}{byte,word}
5054 */
5055_ENTRY(fuiword)
5056ENTRY(fuword)
5057	set	KERNBASE, %o2
5058	cmp	%o0, %o2		! if addr >= KERNBASE...
5059	bgeu	Lfsbadaddr
5060	EMPTY
5061	btst	3, %o0			! or has low bits set...
5062	bnz	Lfsbadaddr		!	go return -1
5063	EMPTY
5064	sethi	%hi(cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
5065	ld	[%o2 + %lo(cpcb)], %o2
5066	set	Lfserr, %o3
5067	st	%o3, [%o2 + PCB_ONFAULT]
5068	ld	[%o0], %o0		! fetch the word
5069	retl				! phew, made it, return the word
5070	 st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
5071
5072Lfserr:
5073	st	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
5074Lfsbadaddr:
5075	retl				! and return error indicator
5076	 mov	-1, %o0
5077
5078	/*
5079	 * This is just like Lfserr, but it's a global label that allows
5080	 * mem_access_fault() to check to see that we don't want to try to
5081	 * page in the fault.  It's used by fuswintr() etc.
5082	 */
5083	.globl	_C_LABEL(Lfsbail)
5084_C_LABEL(Lfsbail):
5085	st	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
5086	retl				! and return error indicator
5087	 mov	-1, %o0
5088
5089	/*
5090	 * Like fusword but callable from interrupt context.
5091	 * Fails if data isn't resident.
5092	 */
5093ENTRY(fuswintr)
5094	set	KERNBASE, %o2
5095	cmp	%o0, %o2		! if addr >= KERNBASE
5096	bgeu	Lfsbadaddr		!	return error
5097	EMPTY
5098	sethi	%hi(cpcb), %o2		! cpcb->pcb_onfault = Lfsbail;
5099	ld	[%o2 + %lo(cpcb)], %o2
5100	set	_C_LABEL(Lfsbail), %o3
5101	st	%o3, [%o2 + PCB_ONFAULT]
5102	lduh	[%o0], %o0		! fetch the halfword
5103	retl				! made it
5104	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
5105
5106ENTRY(fusword)
5107	set	KERNBASE, %o2
5108	cmp	%o0, %o2		! if addr >= KERNBASE
5109	bgeu	Lfsbadaddr		!	return error
5110	EMPTY
5111	sethi	%hi(cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
5112	ld	[%o2 + %lo(cpcb)], %o2
5113	set	Lfserr, %o3
5114	st	%o3, [%o2 + PCB_ONFAULT]
5115	lduh	[%o0], %o0		! fetch the halfword
5116	retl				! made it
5117	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
5118
5119_ENTRY(fuibyte)
5120ENTRY(fubyte)
5121	set	KERNBASE, %o2
5122	cmp	%o0, %o2		! if addr >= KERNBASE
5123	bgeu	Lfsbadaddr		!	return error
5124	EMPTY
5125	sethi	%hi(cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
5126	ld	[%o2 + %lo(cpcb)], %o2
5127	set	Lfserr, %o3
5128	st	%o3, [%o2 + PCB_ONFAULT]
5129	ldub	[%o0], %o0		! fetch the byte
5130	retl				! made it
5131	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
5132
5133_ENTRY(suiword)
5134ENTRY(suword)
5135	set	KERNBASE, %o2
5136	cmp	%o0, %o2		! if addr >= KERNBASE ...
5137	bgeu	Lfsbadaddr
5138	EMPTY
5139	btst	3, %o0			! or has low bits set ...
5140	bnz	Lfsbadaddr		!	go return error
5141	EMPTY
5142	sethi	%hi(cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
5143	ld	[%o2 + %lo(cpcb)], %o2
5144	set	Lfserr, %o3
5145	st	%o3, [%o2 + PCB_ONFAULT]
5146	st	%o1, [%o0]		! store the word
5147	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
5148	retl				! and return 0
5149	clr	%o0
5150
5151ENTRY(suswintr)
5152	set	KERNBASE, %o2
5153	cmp	%o0, %o2		! if addr >= KERNBASE
5154	bgeu	Lfsbadaddr		!	go return error
5155	EMPTY
5156	sethi	%hi(cpcb), %o2		! cpcb->pcb_onfault = Lfsbail;
5157	ld	[%o2 + %lo(cpcb)], %o2
5158	set	_C_LABEL(Lfsbail), %o3
5159	st	%o3, [%o2 + PCB_ONFAULT]
5160	sth	%o1, [%o0]		! store the halfword
5161	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
5162	retl				! and return 0
5163	clr	%o0
5164
5165ENTRY(susword)
5166	set	KERNBASE, %o2
5167	cmp	%o0, %o2		! if addr >= KERNBASE
5168	bgeu	Lfsbadaddr		!	go return error
5169	EMPTY
5170	sethi	%hi(cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
5171	ld	[%o2 + %lo(cpcb)], %o2
5172	set	Lfserr, %o3
5173	st	%o3, [%o2 + PCB_ONFAULT]
5174	sth	%o1, [%o0]		! store the halfword
5175	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
5176	retl				! and return 0
5177	clr	%o0
5178
5179_ENTRY(suibyte)
5180ENTRY(subyte)
5181	set	KERNBASE, %o2
5182	cmp	%o0, %o2		! if addr >= KERNBASE
5183	bgeu	Lfsbadaddr		!	go return error
5184	EMPTY
5185	sethi	%hi(cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
5186	ld	[%o2 + %lo(cpcb)], %o2
5187	set	Lfserr, %o3
5188	st	%o3, [%o2 + PCB_ONFAULT]
5189	stb	%o1, [%o0]		! store the byte
5190	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
5191	retl				! and return 0
5192	clr	%o0
5193
5194/* probeget and probeset are meant to be used during autoconfiguration */
5195
5196/*
5197 * probeget(addr, size) caddr_t addr; int size;
5198 *
5199 * Read or write a (byte,word,longword) from the given address.
5200 * Like {fu,su}{byte,halfword,word} but our caller is supposed
5201 * to know what he is doing... the address can be anywhere.
5202 *
5203 * We optimize for space, rather than time, here.
5204 */
5205ENTRY(probeget)
5206	! %o0 = addr, %o1 = (1,2,4)
5207	sethi	%hi(cpcb), %o2
5208	ld	[%o2 + %lo(cpcb)], %o2	! cpcb->pcb_onfault = Lfserr;
5209	set	Lfserr, %o5
5210	st	%o5, [%o2 + PCB_ONFAULT]
5211	btst	1, %o1
5212	bnz,a	0f			! if (len & 1)
5213	 ldub	[%o0], %o0		!	value = *(char *)addr;
52140:	btst	2, %o1
5215	bnz,a	0f			! if (len & 2)
5216	 lduh	[%o0], %o0		!	value = *(short *)addr;
52170:	btst	4, %o1
5218	bnz,a	0f			! if (len & 4)
5219	 ld	[%o0], %o0		!	value = *(int *)addr;
52200:	retl				! made it, clear onfault and return
5221	 st	%g0, [%o2 + PCB_ONFAULT]
5222
5223/*
5224 * probeset(addr, size, val) caddr_t addr; int size, val;
5225 *
5226 * As above, but we return 0 on success.
5227 */
5228ENTRY(probeset)
5229	! %o0 = addr, %o1 = (1,2,4), %o2 = val
5230	sethi	%hi(cpcb), %o3
5231	ld	[%o3 + %lo(cpcb)], %o3	! cpcb->pcb_onfault = Lfserr;
5232	set	Lfserr, %o5
5233	st	%o5, [%o3 + PCB_ONFAULT]
5234	btst	1, %o1
5235	bnz,a	0f			! if (len & 1)
5236	 stb	%o2, [%o0]		!	*(char *)addr = value;
52370:	btst	2, %o1
5238	bnz,a	0f			! if (len & 2)
5239	 sth	%o2, [%o0]		!	*(short *)addr = value;
52400:	btst	4, %o1
5241	bnz,a	0f			! if (len & 4)
5242	 st	%o2, [%o0]		!	*(int *)addr = value;
52430:	clr	%o0			! made it, clear onfault and return 0
5244	retl
5245	 st	%g0, [%o3 + PCB_ONFAULT]
5246
5247/*
5248 * int xldcontrolb(caddr_t, pcb)
5249 *		    %o0     %o1
5250 *
5251 * read a byte from the specified address in ASI_CONTROL space.
5252 */
5253ENTRY(xldcontrolb)
5254	!sethi	%hi(cpcb), %o2
5255	!ld	[%o2 + %lo(cpcb)], %o2	! cpcb->pcb_onfault = Lfsbail;
5256	or	%o1, %g0, %o2		! %o2 = %o1
5257	set	_C_LABEL(Lfsbail), %o5
5258	st	%o5, [%o2 + PCB_ONFAULT]
5259	lduba	[%o0] ASI_CONTROL, %o0	! read
52600:	retl
5261	 st	%g0, [%o2 + PCB_ONFAULT]
5262
5263/*
5264 * int fkbyte(caddr_t, pcb)
5265 *	      %o0      %o1
5266 *
5267 * Just like fubyte(), but for kernel space.
5268 * (currently used to work around unexplained transient bus errors
5269 *  when reading the VME interrupt vector)
5270 */
5271ENTRY(fkbyte)
5272	or	%o1, %g0, %o2		! %o2 = %o1
5273	set	_C_LABEL(Lfsbail), %o5
5274	st	%o5, [%o2 + PCB_ONFAULT]
5275	ldub	[%o0], %o0		! fetch the byte
5276	retl				! made it
5277	 st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
5278
5279
5280/*
5281 * copywords(src, dst, nbytes)
5282 *
5283 * Copy `nbytes' bytes from src to dst, both of which are word-aligned;
5284 * nbytes is a multiple of four.  It may, however, be zero, in which case
5285 * nothing is to be copied.
5286 */
5287ENTRY(copywords)
5288	! %o0 = src, %o1 = dst, %o2 = nbytes
5289	b	1f
5290	deccc	4, %o2
52910:
5292	st	%o3, [%o1 + %o2]
5293	deccc	4, %o2			! while ((n -= 4) >= 0)
52941:
5295	bge,a	0b			!    *(int *)(dst+n) = *(int *)(src+n);
5296	ld	[%o0 + %o2], %o3
5297	retl
5298	nop
5299
5300/*
5301 * qcopy(src, dst, nbytes)
5302 *
5303 * (q for `quad' or `quick', as opposed to b for byte/block copy)
5304 *
5305 * Just like copywords, but everything is multiples of 8.
5306 */
5307ENTRY(qcopy)
5308	b	1f
5309	deccc	8, %o2
53100:
5311	std	%o4, [%o1 + %o2]
5312	deccc	8, %o2
53131:
5314	bge,a	0b
5315	ldd	[%o0 + %o2], %o4
5316	retl
5317	nop
5318
5319/*
5320 * qzero(addr, nbytes)
5321 *
5322 * Zeroes `nbytes' bytes of a quad-aligned virtual address,
5323 * where nbytes is itself a multiple of 8.
5324 */
5325ENTRY(qzero)
5326	! %o0 = addr, %o1 = len (in bytes)
5327	clr	%g1
53280:
5329	deccc	8, %o1			! while ((n =- 8) >= 0)
5330	bge,a	0b
5331	std	%g0, [%o0 + %o1]	!	*(quad *)(addr + n) = 0;
5332	retl
5333	nop
5334
5335/*
5336 * kernel bcopy
5337 * Assumes regions do not overlap; has no useful return value.
5338 *
5339 * Must not use %g7 (see copyin/copyout above).
5340 */
5341
5342#define	BCOPY_SMALL	32	/* if < 32, copy by bytes */
5343
5344ENTRY(bcopy)
5345	cmp	%o2, BCOPY_SMALL
5346Lbcopy_start:
5347	bge,a	Lbcopy_fancy	! if >= this many, go be fancy.
5348	btst	7, %o0		! (part of being fancy)
5349
5350	/*
5351	 * Not much to copy, just do it a byte at a time.
5352	 */
5353	deccc	%o2		! while (--len >= 0)
5354	bl	1f
5355	EMPTY
53560:
5357	inc	%o0
5358	ldsb	[%o0 - 1], %o4	!	(++dst)[-1] = *src++;
5359	stb	%o4, [%o1]
5360	deccc	%o2
5361	bge	0b
5362	inc	%o1
53631:
5364	retl
5365	 nop
5366	/* NOTREACHED */
5367
5368	/*
5369	 * Plenty of data to copy, so try to do it optimally.
5370	 */
5371Lbcopy_fancy:
5372	! check for common case first: everything lines up.
5373!	btst	7, %o0		! done already
5374	bne	1f
5375	EMPTY
5376	btst	7, %o1
5377	be,a	Lbcopy_doubles
5378	dec	8, %o2		! if all lined up, len -= 8, goto bcopy_doubes
5379
5380	! If the low bits match, we can make these line up.
53811:
5382	xor	%o0, %o1, %o3	! t = src ^ dst;
5383	btst	1, %o3		! if (t & 1) {
5384	be,a	1f
5385	btst	1, %o0		! [delay slot: if (src & 1)]
5386
5387	! low bits do not match, must copy by bytes.
53880:
5389	ldsb	[%o0], %o4	!	do {
5390	inc	%o0		!		(++dst)[-1] = *src++;
5391	inc	%o1
5392	deccc	%o2
5393	bnz	0b		!	} while (--len != 0);
5394	stb	%o4, [%o1 - 1]
5395	retl
5396	 nop
5397	/* NOTREACHED */
5398
5399	! lowest bit matches, so we can copy by words, if nothing else
54001:
5401	be,a	1f		! if (src & 1) {
5402	btst	2, %o3		! [delay slot: if (t & 2)]
5403
5404	! although low bits match, both are 1: must copy 1 byte to align
5405	ldsb	[%o0], %o4	!	*dst++ = *src++;
5406	stb	%o4, [%o1]
5407	inc	%o0
5408	inc	%o1
5409	dec	%o2		!	len--;
5410	btst	2, %o3		! } [if (t & 2)]
54111:
5412	be,a	1f		! if (t & 2) {
5413	btst	2, %o0		! [delay slot: if (src & 2)]
5414	dec	2, %o2		!	len -= 2;
54150:
5416	ldsh	[%o0], %o4	!	do {
5417	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
5418	inc	2, %o0		!		dst += 2, src += 2;
5419	deccc	2, %o2		!	} while ((len -= 2) >= 0);
5420	bge	0b
5421	inc	2, %o1
5422	b	Lbcopy_mopb	!	goto mop_up_byte;
5423	btst	1, %o2		! } [delay slot: if (len & 1)]
5424	/* NOTREACHED */
5425
5426	! low two bits match, so we can copy by longwords
54271:
5428	be,a	1f		! if (src & 2) {
5429	btst	4, %o3		! [delay slot: if (t & 4)]
5430
5431	! although low 2 bits match, they are 10: must copy one short to align
5432	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
5433	sth	%o4, [%o1]
5434	inc	2, %o0		!	dst += 2;
5435	inc	2, %o1		!	src += 2;
5436	dec	2, %o2		!	len -= 2;
5437	btst	4, %o3		! } [if (t & 4)]
54381:
5439	be,a	1f		! if (t & 4) {
5440	btst	4, %o0		! [delay slot: if (src & 4)]
5441	dec	4, %o2		!	len -= 4;
54420:
5443	ld	[%o0], %o4	!	do {
5444	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
5445	inc	4, %o0		!		dst += 4, src += 4;
5446	deccc	4, %o2		!	} while ((len -= 4) >= 0);
5447	bge	0b
5448	inc	4, %o1
5449	b	Lbcopy_mopw	!	goto mop_up_word_and_byte;
5450	btst	2, %o2		! } [delay slot: if (len & 2)]
5451	/* NOTREACHED */
5452
5453	! low three bits match, so we can copy by doublewords
54541:
5455	be	1f		! if (src & 4) {
5456	dec	8, %o2		! [delay slot: len -= 8]
5457	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
5458	st	%o4, [%o1]
5459	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
5460	inc	4, %o1
5461	dec	4, %o2		! }
54621:
5463Lbcopy_doubles:
5464	ldd	[%o0], %o4	! do {
5465	std	%o4, [%o1]	!	*(double *)dst = *(double *)src;
5466	inc	8, %o0		!	dst += 8, src += 8;
5467	deccc	8, %o2		! } while ((len -= 8) >= 0);
5468	bge	Lbcopy_doubles
5469	inc	8, %o1
5470
5471	! check for a usual case again (save work)
5472	btst	7, %o2		! if ((len & 7) == 0)
5473	be	Lbcopy_done	!	goto bcopy_done;
5474
5475	btst	4, %o2		! if ((len & 4)) == 0)
5476	be,a	Lbcopy_mopw	!	goto mop_up_word_and_byte;
5477	btst	2, %o2		! [delay slot: if (len & 2)]
5478	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
5479	st	%o4, [%o1]
5480	inc	4, %o0		!	dst += 4;
5481	inc	4, %o1		!	src += 4;
5482	btst	2, %o2		! } [if (len & 2)]
5483
54841:
5485	! mop up trailing word (if present) and byte (if present).
5486Lbcopy_mopw:
5487	be	Lbcopy_mopb	! no word, go mop up byte
5488	btst	1, %o2		! [delay slot: if (len & 1)]
5489	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
5490	be	Lbcopy_done	! if ((len & 1) == 0) goto done;
5491	sth	%o4, [%o1]
5492	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
5493	retl
5494	 stb	%o4, [%o1 + 2]
5495	/* NOTREACHED */
5496
5497	! mop up trailing byte (if present).
5498Lbcopy_mopb:
5499	bne,a	1f
5500	ldsb	[%o0], %o4
5501
5502Lbcopy_done:
5503	retl
5504	 nop
5505
55061:
5507	retl
5508	 stb	%o4,[%o1]
5509/*
5510 * ovbcopy(src, dst, len): like bcopy, but regions may overlap.
5511 */
5512ENTRY(ovbcopy)
5513	cmp	%o0, %o1	! src < dst?
5514	bgeu	Lbcopy_start	! no, go copy forwards as via bcopy
5515	cmp	%o2, BCOPY_SMALL! (check length for doublecopy first)
5516
5517	/*
5518	 * Since src comes before dst, and the regions might overlap,
5519	 * we have to do the copy starting at the end and working backwards.
5520	 */
5521	add	%o2, %o0, %o0	! src += len
5522	add	%o2, %o1, %o1	! dst += len
5523	bge,a	Lback_fancy	! if len >= BCOPY_SMALL, go be fancy
5524	btst	3, %o0
5525
5526	/*
5527	 * Not much to copy, just do it a byte at a time.
5528	 */
5529	deccc	%o2		! while (--len >= 0)
5530	bl	1f
5531	EMPTY
55320:
5533	dec	%o0		!	*--dst = *--src;
5534	ldsb	[%o0], %o4
5535	dec	%o1
5536	deccc	%o2
5537	bge	0b
5538	stb	%o4, [%o1]
55391:
5540	retl
5541	nop
5542
5543	/*
5544	 * Plenty to copy, try to be optimal.
5545	 * We only bother with word/halfword/byte copies here.
5546	 */
5547Lback_fancy:
5548!	btst	3, %o0		! done already
5549	bnz	1f		! if ((src & 3) == 0 &&
5550	btst	3, %o1		!     (dst & 3) == 0)
5551	bz,a	Lback_words	!	goto words;
5552	dec	4, %o2		! (done early for word copy)
5553
55541:
5555	/*
5556	 * See if the low bits match.
5557	 */
5558	xor	%o0, %o1, %o3	! t = src ^ dst;
5559	btst	1, %o3
5560	bz,a	3f		! if (t & 1) == 0, can do better
5561	btst	1, %o0
5562
5563	/*
5564	 * Nope; gotta do byte copy.
5565	 */
55662:
5567	dec	%o0		! do {
5568	ldsb	[%o0], %o4	!	*--dst = *--src;
5569	dec	%o1
5570	deccc	%o2		! } while (--len != 0);
5571	bnz	2b
5572	stb	%o4, [%o1]
5573	retl
5574	nop
5575
55763:
5577	/*
5578	 * Can do halfword or word copy, but might have to copy 1 byte first.
5579	 */
5580!	btst	1, %o0		! done earlier
5581	bz,a	4f		! if (src & 1) {	/* copy 1 byte */
5582	btst	2, %o3		! (done early)
5583	dec	%o0		!	*--dst = *--src;
5584	ldsb	[%o0], %o4
5585	dec	%o1
5586	stb	%o4, [%o1]
5587	dec	%o2		!	len--;
5588	btst	2, %o3		! }
5589
55904:
5591	/*
5592	 * See if we can do a word copy ((t&2) == 0).
5593	 */
5594!	btst	2, %o3		! done earlier
5595	bz,a	6f		! if (t & 2) == 0, can do word copy
5596	btst	2, %o0		! (src&2, done early)
5597
5598	/*
5599	 * Gotta do halfword copy.
5600	 */
5601	dec	2, %o2		! len -= 2;
56025:
5603	dec	2, %o0		! do {
5604	ldsh	[%o0], %o4	!	src -= 2;
5605	dec	2, %o1		!	dst -= 2;
5606	deccc	2, %o0		!	*(short *)dst = *(short *)src;
5607	bge	5b		! } while ((len -= 2) >= 0);
5608	sth	%o4, [%o1]
5609	b	Lback_mopb	! goto mop_up_byte;
5610	btst	1, %o2		! (len&1, done early)
5611
56126:
5613	/*
5614	 * We can do word copies, but we might have to copy
5615	 * one halfword first.
5616	 */
5617!	btst	2, %o0		! done already
5618	bz	7f		! if (src & 2) {
5619	dec	4, %o2		! (len -= 4, done early)
5620	dec	2, %o0		!	src -= 2, dst -= 2;
5621	ldsh	[%o0], %o4	!	*(short *)dst = *(short *)src;
5622	dec	2, %o1
5623	sth	%o4, [%o1]
5624	dec	2, %o2		!	len -= 2;
5625				! }
5626
56277:
5628Lback_words:
5629	/*
5630	 * Do word copies (backwards), then mop up trailing halfword
5631	 * and byte if any.
5632	 */
5633!	dec	4, %o2		! len -= 4, done already
56340:				! do {
5635	dec	4, %o0		!	src -= 4;
5636	dec	4, %o1		!	src -= 4;
5637	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
5638	deccc	4, %o2		! } while ((len -= 4) >= 0);
5639	bge	0b
5640	st	%o4, [%o1]
5641
5642	/*
5643	 * Check for trailing shortword.
5644	 */
5645	btst	2, %o2		! if (len & 2) {
5646	bz,a	1f
5647	btst	1, %o2		! (len&1, done early)
5648	dec	2, %o0		!	src -= 2, dst -= 2;
5649	ldsh	[%o0], %o4	!	*(short *)dst = *(short *)src;
5650	dec	2, %o1
5651	sth	%o4, [%o1]	! }
5652	btst	1, %o2
5653
5654	/*
5655	 * Check for trailing byte.
5656	 */
56571:
5658Lback_mopb:
5659!	btst	1, %o2		! (done already)
5660	bnz,a	1f		! if (len & 1) {
5661	ldsb	[%o0 - 1], %o4	!	b = src[-1];
5662	retl
5663	nop
56641:
5665	retl			!	dst[-1] = b;
5666	stb	%o4, [%o1 - 1]	! }
5667
5668/*
5669 * kcopy() is exactly like bcopy except that it set pcb_onfault such that
5670 * when a fault occurs, it is able to return -1 to indicate this to the
5671 * caller.
5672 */
5673ENTRY(kcopy)
5674	sethi	%hi(cpcb), %o5		! cpcb->pcb_onfault = Lkcerr;
5675	ld	[%o5 + %lo(cpcb)], %o5
5676	set	Lkcerr, %o3
5677	ld	[%o5 + PCB_ONFAULT], %g1! save current onfault handler
5678	st	%o3, [%o5 + PCB_ONFAULT]
5679
5680	cmp	%o2, BCOPY_SMALL
5681Lkcopy_start:
5682	bge,a	Lkcopy_fancy	! if >= this many, go be fancy.
5683	 btst	7, %o0		! (part of being fancy)
5684
5685	/*
5686	 * Not much to copy, just do it a byte at a time.
5687	 */
5688	deccc	%o2		! while (--len >= 0)
5689	bl	1f
5690	 EMPTY
56910:
5692	ldsb	[%o0], %o4	!	*dst++ = *src++;
5693	inc	%o0
5694	stb	%o4, [%o1]
5695	deccc	%o2
5696	bge	0b
5697	 inc	%o1
56981:
5699	st	%g1, [%o5 + PCB_ONFAULT]	! restore onfault
5700	retl
5701	 mov	0, %o0		! delay slot: return success
5702	/* NOTREACHED */
5703
5704	/*
5705	 * Plenty of data to copy, so try to do it optimally.
5706	 */
5707Lkcopy_fancy:
5708	! check for common case first: everything lines up.
5709!	btst	7, %o0		! done already
5710	bne	1f
5711	 EMPTY
5712	btst	7, %o1
5713	be,a	Lkcopy_doubles
5714	 dec	8, %o2		! if all lined up, len -= 8, goto bcopy_doubes
5715
5716	! If the low bits match, we can make these line up.
57171:
5718	xor	%o0, %o1, %o3	! t = src ^ dst;
5719	btst	1, %o3		! if (t & 1) {
5720	be,a	1f
5721	 btst	1, %o0		! [delay slot: if (src & 1)]
5722
5723	! low bits do not match, must copy by bytes.
57240:
5725	ldsb	[%o0], %o4	!	do {
5726	inc	%o0		!		*dst++ = *src++;
5727	stb	%o4, [%o1]
5728	deccc	%o2
5729	bnz	0b		!	} while (--len != 0);
5730	 inc	%o1
5731	st	%g1, [%o5 + PCB_ONFAULT]	! restore onfault
5732	retl
5733	 mov	0, %o0		! delay slot: return success
5734	/* NOTREACHED */
5735
5736	! lowest bit matches, so we can copy by words, if nothing else
57371:
5738	be,a	1f		! if (src & 1) {
5739	 btst	2, %o3		! [delay slot: if (t & 2)]
5740
5741	! although low bits match, both are 1: must copy 1 byte to align
5742	ldsb	[%o0], %o4	!	*dst++ = *src++;
5743	inc	%o0
5744	stb	%o4, [%o1]
5745	dec	%o2		!	len--;
5746	inc	%o1
5747	btst	2, %o3		! } [if (t & 2)]
57481:
5749	be,a	1f		! if (t & 2) {
5750	 btst	2, %o0		! [delay slot: if (src & 2)]
5751	dec	2, %o2		!	len -= 2;
57520:
5753	ldsh	[%o0], %o4	!	do {
5754	inc	2, %o0		!		dst += 2, src += 2;
5755	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
5756	deccc	2, %o2		!	} while ((len -= 2) >= 0);
5757	bge	0b
5758	 inc	2, %o1
5759	b	Lkcopy_mopb	!	goto mop_up_byte;
5760	 btst	1, %o2		! } [delay slot: if (len & 1)]
5761	/* NOTREACHED */
5762
5763	! low two bits match, so we can copy by longwords
57641:
5765	be,a	1f		! if (src & 2) {
5766	 btst	4, %o3		! [delay slot: if (t & 4)]
5767
5768	! although low 2 bits match, they are 10: must copy one short to align
5769	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
5770	inc	2, %o0		!	dst += 2;
5771	sth	%o4, [%o1]
5772	dec	2, %o2		!	len -= 2;
5773	inc	2, %o1		!	src += 2;
5774	btst	4, %o3		! } [if (t & 4)]
57751:
5776	be,a	1f		! if (t & 4) {
5777	 btst	4, %o0		! [delay slot: if (src & 4)]
5778	dec	4, %o2		!	len -= 4;
57790:
5780	ld	[%o0], %o4	!	do {
5781	inc	4, %o0		!		dst += 4, src += 4;
5782	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
5783	deccc	4, %o2		!	} while ((len -= 4) >= 0);
5784	bge	0b
5785	 inc	4, %o1
5786	b	Lkcopy_mopw	!	goto mop_up_word_and_byte;
5787	 btst	2, %o2		! } [delay slot: if (len & 2)]
5788	/* NOTREACHED */
5789
5790	! low three bits match, so we can copy by doublewords
57911:
5792	be	1f		! if (src & 4) {
5793	 dec	8, %o2		! [delay slot: len -= 8]
5794	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
5795	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
5796	st	%o4, [%o1]
5797	dec	4, %o2		! }
5798	inc	4, %o1
57991:
5800Lkcopy_doubles:
5801	! swap %o4 with %o2 during doubles copy, since %o5 is verboten
5802	mov     %o2, %o4
5803Lkcopy_doubles2:
5804	ldd	[%o0], %o2	! do {
5805	inc	8, %o0		!	dst += 8, src += 8;
5806	std	%o2, [%o1]	!	*(double *)dst = *(double *)src;
5807	deccc	8, %o4		! } while ((len -= 8) >= 0);
5808	bge	Lkcopy_doubles2
5809	 inc	8, %o1
5810	mov	%o4, %o2	! restore len
5811
5812	! check for a usual case again (save work)
5813	btst	7, %o2		! if ((len & 7) == 0)
5814	be	Lkcopy_done	!	goto bcopy_done;
5815
5816	 btst	4, %o2		! if ((len & 4)) == 0)
5817	be,a	Lkcopy_mopw	!	goto mop_up_word_and_byte;
5818	 btst	2, %o2		! [delay slot: if (len & 2)]
5819	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
5820	inc	4, %o0		!	dst += 4;
5821	st	%o4, [%o1]
5822	inc	4, %o1		!	src += 4;
5823	btst	2, %o2		! } [if (len & 2)]
5824
58251:
5826	! mop up trailing word (if present) and byte (if present).
5827Lkcopy_mopw:
5828	be	Lkcopy_mopb	! no word, go mop up byte
5829	 btst	1, %o2		! [delay slot: if (len & 1)]
5830	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
5831	be	Lkcopy_done	! if ((len & 1) == 0) goto done;
5832	 sth	%o4, [%o1]
5833	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
5834	stb	%o4, [%o1 + 2]
5835	st	%g1, [%o5 + PCB_ONFAULT]! restore onfault
5836	retl
5837	 mov	0, %o0		! delay slot: return success
5838	/* NOTREACHED */
5839
5840	! mop up trailing byte (if present).
5841Lkcopy_mopb:
5842	bne,a	1f
5843	 ldsb	[%o0], %o4
5844
5845Lkcopy_done:
5846	st	%g1, [%o5 + PCB_ONFAULT]	! restore onfault
5847	retl
5848	 mov	0, %o0		! delay slot: return success
5849	/* NOTREACHED */
5850
58511:
5852	stb	%o4, [%o1]
5853	st	%g1, [%o5 + PCB_ONFAULT]	! restore onfault
5854	retl
5855	 mov	0, %o0		! delay slot: return success
5856	/* NOTREACHED */
5857
5858Lkcerr:
5859	retl
5860	 st	%g1, [%o5 + PCB_ONFAULT]	! restore onfault
5861	/* NOTREACHED */
5862
5863/*
5864 * savefpstate(f) struct fpstate *f;
5865 *
5866 * Store the current FPU state.  The first `st %fsr' may cause a trap;
5867 * our trap handler knows how to recover (by `returning' to savefpcont).
5868 */
5869ENTRY(savefpstate)
5870	rd	%psr, %o1		! enable FP before we begin
5871	set	PSR_EF, %o2
5872	or	%o1, %o2, %o1
5873	wr	%o1, 0, %psr
5874	/* do some setup work while we wait for PSR_EF to turn on */
5875	set	FSR_QNE, %o5		! QNE = 0x2000, too big for immediate
5876	clr	%o3			! qsize = 0;
5877	nop				! (still waiting for PSR_EF)
5878special_fp_store:
5879	st	%fsr, [%o0 + FS_FSR]	! f->fs_fsr = getfsr();
5880	/*
5881	 * Even if the preceding instruction did not trap, the queue
5882	 * is not necessarily empty: this state save might be happening
5883	 * because user code tried to store %fsr and took the FPU
5884	 * from `exception pending' mode to `exception' mode.
5885	 * So we still have to check the blasted QNE bit.
5886	 * With any luck it will usually not be set.
5887	 */
5888	ld	[%o0 + FS_FSR], %o4	! if (f->fs_fsr & QNE)
5889	btst	%o5, %o4
5890	bnz	Lfp_storeq		!	goto storeq;
5891	 std	%f0, [%o0 + FS_REGS + (4*0)]	! f->fs_f0 = etc;
5892Lfp_finish:
5893	st	%o3, [%o0 + FS_QSIZE]	! f->fs_qsize = qsize;
5894	std	%f2, [%o0 + FS_REGS + (4*2)]
5895	std	%f4, [%o0 + FS_REGS + (4*4)]
5896	std	%f6, [%o0 + FS_REGS + (4*6)]
5897	std	%f8, [%o0 + FS_REGS + (4*8)]
5898	std	%f10, [%o0 + FS_REGS + (4*10)]
5899	std	%f12, [%o0 + FS_REGS + (4*12)]
5900	std	%f14, [%o0 + FS_REGS + (4*14)]
5901	std	%f16, [%o0 + FS_REGS + (4*16)]
5902	std	%f18, [%o0 + FS_REGS + (4*18)]
5903	std	%f20, [%o0 + FS_REGS + (4*20)]
5904	std	%f22, [%o0 + FS_REGS + (4*22)]
5905	std	%f24, [%o0 + FS_REGS + (4*24)]
5906	std	%f26, [%o0 + FS_REGS + (4*26)]
5907	std	%f28, [%o0 + FS_REGS + (4*28)]
5908	retl
5909	 std	%f30, [%o0 + FS_REGS + (4*30)]
5910
5911/*
5912 * Store the (now known nonempty) FP queue.
5913 * We have to reread the fsr each time in order to get the new QNE bit.
5914 */
5915Lfp_storeq:
5916	add	%o0, FS_QUEUE, %o1	! q = &f->fs_queue[0];
59171:
5918	std	%fq, [%o1 + %o3]	! q[qsize++] = fsr_qfront();
5919	st	%fsr, [%o0 + FS_FSR]	! reread fsr
5920	ld	[%o0 + FS_FSR], %o4	! if fsr & QNE, loop
5921	btst	%o5, %o4
5922	bnz	1b
5923	 inc	8, %o3
5924	b	Lfp_finish		! set qsize and finish storing fregs
5925	 srl	%o3, 3, %o3		! (but first fix qsize)
5926
5927/*
5928 * The fsr store trapped.  Do it again; this time it will not trap.
5929 * We could just have the trap handler return to the `st %fsr', but
5930 * if for some reason it *does* trap, that would lock us into a tight
5931 * loop.  This way we panic instead.  Whoopee.
5932 */
5933savefpcont:
5934	b	special_fp_store + 4	! continue
5935	 st	%fsr, [%o0 + FS_FSR]	! but first finish the %fsr store
5936
5937/*
5938 * Load FPU state.
5939 */
5940ENTRY(loadfpstate)
5941	rd	%psr, %o1		! enable FP before we begin
5942	set	PSR_EF, %o2
5943	or	%o1, %o2, %o1
5944	wr	%o1, 0, %psr
5945	nop; nop; nop			! paranoia
5946	ldd	[%o0 + FS_REGS + (4*0)], %f0
5947	ldd	[%o0 + FS_REGS + (4*2)], %f2
5948	ldd	[%o0 + FS_REGS + (4*4)], %f4
5949	ldd	[%o0 + FS_REGS + (4*6)], %f6
5950	ldd	[%o0 + FS_REGS + (4*8)], %f8
5951	ldd	[%o0 + FS_REGS + (4*10)], %f10
5952	ldd	[%o0 + FS_REGS + (4*12)], %f12
5953	ldd	[%o0 + FS_REGS + (4*14)], %f14
5954	ldd	[%o0 + FS_REGS + (4*16)], %f16
5955	ldd	[%o0 + FS_REGS + (4*18)], %f18
5956	ldd	[%o0 + FS_REGS + (4*20)], %f20
5957	ldd	[%o0 + FS_REGS + (4*22)], %f22
5958	ldd	[%o0 + FS_REGS + (4*24)], %f24
5959	ldd	[%o0 + FS_REGS + (4*26)], %f26
5960	ldd	[%o0 + FS_REGS + (4*28)], %f28
5961	ldd	[%o0 + FS_REGS + (4*30)], %f30
5962	retl
5963	 ld	[%o0 + FS_FSR], %fsr	! setfsr(f->fs_fsr);
5964
5965/*
5966 * ienab_bis(bis) int bis;
5967 * ienab_bic(bic) int bic;
5968 *
5969 * Set and clear bits in the sun4/sun4c interrupt register.
5970 */
5971
5972#if defined(SUN4) || defined(SUN4C)
5973/*
5974 * Since there are no read-modify-write instructions for this,
5975 * and one of the interrupts is nonmaskable, we must disable traps.
5976 */
5977ENTRY(ienab_bis)
5978	! %o0 = bits to set
5979	rd	%psr, %o2
5980	wr	%o2, PSR_ET, %psr	! disable traps
5981	nop; nop			! 3-instr delay until ET turns off
5982	sethi	%hi(INTRREG_VA), %o3
5983	ldub	[%o3 + %lo(INTRREG_VA)], %o4
5984	or	%o4, %o0, %o4		! *INTRREG_VA |= bis;
5985	stb	%o4, [%o3 + %lo(INTRREG_VA)]
5986	wr	%o2, 0, %psr		! reenable traps
5987	nop
5988	retl
5989	 nop
5990
5991ENTRY(ienab_bic)
5992	! %o0 = bits to clear
5993	rd	%psr, %o2
5994	wr	%o2, PSR_ET, %psr	! disable traps
5995	nop; nop
5996	sethi	%hi(INTRREG_VA), %o3
5997	ldub	[%o3 + %lo(INTRREG_VA)], %o4
5998	andn	%o4, %o0, %o4		! *INTRREG_VA &=~ bic;
5999	stb	%o4, [%o3 + %lo(INTRREG_VA)]
6000	wr	%o2, 0, %psr		! reenable traps
6001	nop
6002	retl
6003	 nop
6004#endif	/* SUN4 || SUN4C */
6005
6006#if defined(SUN4M)
6007/*
6008 * raise(cpu, level)
6009 */
6010ENTRY(raise)
6011#if !defined(MSIIEP) /* normal suns */
6012	! *(ICR_PI_SET + cpu*_MAXNBPG) = PINTR_SINTRLEV(level)
6013	sethi	%hi(1 << 16), %o2
6014	sll	%o2, %o1, %o2
6015	set	ICR_PI_SET, %o1
6016	set	_MAXNBPG, %o3
60171:
6018	subcc	%o0, 1, %o0
6019	bpos,a	1b
6020	 add	%o1, %o3, %o1
6021	retl
6022	 st	%o2, [%o1]
6023#else /* MSIIEP - ignore %o0, only one cpu ever */
6024	mov	1, %o2
6025	sethi	%hi(MSIIEP_PCIC_VA), %o0
6026	sll	%o2, %o1, %o2
6027	retl
6028	 sth	%o2, [%o0 + PCIC_SOFT_INTR_SET_REG]
6029#endif
6030
6031/*
6032 * Read Synchronous Fault Status registers.
6033 * On entry: %l1 == PC, %l3 == fault type, %l4 == storage, %l7 == return address
6034 * Only use %l5 and %l6.
6035 * Note: not C callable.
6036 */
6037_ENTRY(_C_LABEL(srmmu_get_syncflt))
6038_ENTRY(_C_LABEL(hypersparc_get_syncflt))
6039	set	SRMMU_SFAR, %l5
6040	lda	[%l5] ASI_SRMMU, %l5	! sync virt addr; must be read first
6041	st	%l5, [%l4 + 4]		! => dump.sfva
6042	set	SRMMU_SFSR, %l5
6043	lda	[%l5] ASI_SRMMU, %l5	! get sync fault status register
6044	jmp	%l7 + 8			! return to caller
6045	 st	%l5, [%l4]		! => dump.sfsr
6046
6047_ENTRY(_C_LABEL(viking_get_syncflt))
6048_ENTRY(_C_LABEL(ms1_get_syncflt))
6049_ENTRY(_C_LABEL(swift_get_syncflt))
6050_ENTRY(_C_LABEL(turbosparc_get_syncflt))
6051_ENTRY(_C_LABEL(cypress_get_syncflt))
6052	cmp	%l3, T_TEXTFAULT
6053	be,a	1f
6054	 mov	%l1, %l5		! use PC if type == T_TEXTFAULT
6055
6056	set	SRMMU_SFAR, %l5
6057	lda	[%l5] ASI_SRMMU, %l5	! sync virt addr; must be read first
60581:
6059	st	%l5, [%l4 + 4]		! => dump.sfva
6060
6061	set	SRMMU_SFSR, %l5
6062	lda	[%l5] ASI_SRMMU, %l5	! get sync fault status register
6063	jmp	%l7 + 8			! return to caller
6064	 st	%l5, [%l4]		! => dump.sfsr
6065
6066#if defined(MULTIPROCESSOR) && 0 /* notyet */
6067/*
6068 * Read Synchronous Fault Status registers.
6069 * On entry: %o0 == &sfsr, %o1 == &sfar
6070 */
6071_ENTRY(_C_LABEL(smp_get_syncflt))
6072	save    %sp, -CCFSZ, %sp
6073
6074	sethi	%hi(CPUINFO_VA), %o4
6075	ld	[%l4 + %lo(CPUINFO_VA+CPUINFO_GETSYNCFLT)], %o5
6076	clr	%l1
6077	clr	%l3
6078	jmpl	%o5, %l7
6079	 or	%o4, %lo(CPUINFO_SYNCFLTDUMP), %l4
6080
6081	! load values out of the dump
6082	ld	[%o4 + %lo(CPUINFO_VA+CPUINFO_SYNCFLTDUMP)], %o5
6083	st	%o5, [%i0]
6084	ld	[%o4 + %lo(CPUINFO_VA+CPUINFO_SYNCFLTDUMP+4)], %o5
6085	st	%o5, [%i1]
6086	ret
6087	 restore
6088#endif /* MULTIPROCESSOR */
6089
6090/*
6091 * Read Asynchronous Fault Status registers.
6092 * On entry: %o0 == &afsr, %o1 == &afar
6093 * Return 0 if async register are present.
6094 */
6095_ENTRY(_C_LABEL(srmmu_get_asyncflt))
6096	set	SRMMU_AFAR, %o4
6097	lda	[%o4] ASI_SRMMU, %o4	! get async fault address
6098	set	SRMMU_AFSR, %o3	!
6099	st	%o4, [%o1]
6100	lda	[%o3] ASI_SRMMU, %o3	! get async fault status
6101	st	%o3, [%o0]
6102	retl
6103	 clr	%o0			! return value
6104
6105_ENTRY(_C_LABEL(cypress_get_asyncflt))
6106_ENTRY(_C_LABEL(hypersparc_get_asyncflt))
6107	set	SRMMU_AFSR, %o3		! must read status before fault on HS
6108	lda	[%o3] ASI_SRMMU, %o3	! get async fault status
6109	st	%o3, [%o0]
6110	btst	AFSR_AFO, %o3		! and only read fault address
6111	bz	1f			! if valid.
6112	set	SRMMU_AFAR, %o4
6113	lda	[%o4] ASI_SRMMU, %o4	! get async fault address
6114	clr	%o0			! return value
6115	retl
6116	 st	%o4, [%o1]
61171:
6118	retl
6119	 clr	%o0			! return value
6120
6121_ENTRY(_C_LABEL(no_asyncflt_regs))
6122	retl
6123	 mov	1, %o0			! return value
6124
6125_ENTRY(_C_LABEL(hypersparc_pure_vcache_flush))
6126	/*
6127	 * Flush entire on-chip instruction cache, which is
6128	 * a pure vitually-indexed/virtually-tagged cache.
6129	 */
6130	retl
6131	 sta	%g0, [%g0] ASI_HICACHECLR
6132
6133#endif /* SUN4M */
6134
6135#if !defined(MSIIEP)	/* normal suns */
6136/*
6137 * void lo_microtime(struct timeval *tv)
6138 *
6139 * LBL's sparc bsd 'microtime': We don't need to spl (so this routine
6140 * can be a leaf routine) and we don't keep a 'last' timeval (there
6141 * can't be two calls to this routine in a microsecond).  This seems to
6142 * be about 20 times faster than the Sun code on an SS-2. - vj
6143 *
6144 * Read time values from slowest-changing to fastest-changing,
6145 * then re-read out to slowest.  If the values read before
6146 * the innermost match those read after, the innermost value
6147 * is consistent with the outer values.  If not, it may not
6148 * be and we must retry.  Typically this loop runs only once;
6149 * occasionally it runs twice, and only rarely does it run longer.
6150 */
6151#if defined(SUN4)
6152ENTRY(lo_microtime)
6153#else
6154ENTRY(microtime)
6155#endif
6156	sethi	%hi(_C_LABEL(time)), %g2
6157
6158#if defined(SUN4M) && !(defined(SUN4C) || defined(SUN4))
6159	sethi	%hi(TIMERREG_VA+4), %g3
6160	or	%g3, %lo(TIMERREG_VA+4), %g3
6161#elif (defined(SUN4C) || defined(SUN4)) && !defined(SUN4M)
6162	sethi	%hi(TIMERREG_VA), %g3
6163	or	%g3, %lo(TIMERREG_VA), %g3
6164#else
6165	sethi	%hi(TIMERREG_VA), %g3
6166	or	%g3, %lo(TIMERREG_VA), %g3
6167NOP_ON_4_4C_1:
6168	 add	%g3, 4, %g3
6169#endif
6170
61712:
6172	ldd	[%g2+%lo(_C_LABEL(time))], %o2	! time.tv_sec & time.tv_usec
6173	ld	[%g3], %o4			! usec counter
6174	ldd	[%g2+%lo(_C_LABEL(time))], %g4	! see if time values changed
6175	cmp	%g4, %o2
6176	bne	2b				! if time.tv_sec changed
6177	 cmp	%g5, %o3
6178	bne	2b				! if time.tv_usec changed
6179	 tst	%o4
6180
6181	bpos	3f				! reached limit?
6182	 srl	%o4, TMR_SHIFT, %o4		! convert counter to usec
6183	sethi	%hi(_C_LABEL(tick)), %g4	! bump usec by 1 tick
6184	ld	[%g4+%lo(_C_LABEL(tick))], %o1
6185	set	TMR_MASK, %g5
6186	add	%o1, %o3, %o3
6187	and	%o4, %g5, %o4
61883:
6189	add	%o4, %o3, %o3
6190	set	1000000, %g5			! normalize usec value
6191	cmp	%o3, %g5
6192	bl,a	4f
6193	 st	%o2, [%o0]
6194	add	%o2, 1, %o2			! overflow
6195	sub	%o3, %g5, %o3
6196	st	%o2, [%o0]
61974:
6198	retl
6199	 st	%o3, [%o0+4]
6200
6201#else /* MSIIEP */
6202/* XXX: uwe: can be merged with 4c/4m version above */
6203/*
6204 * ms-IIep version of
6205 * void microtime(struct timeval *tv)
6206 *
6207 * This is similar to 4c/4m microtime.   The difference is that
6208 * counter uses 31 bits and ticks every 4 CPU cycles (cpu is @100MHz)
6209 * the magic to divide by 25 is stolen from gcc
6210 */
6211ENTRY(microtime)
6212	sethi	%hi(_C_LABEL(time)), %g2
6213
6214	sethi	%hi(MSIIEP_PCIC_VA), %g3
6215	or	%g3, PCIC_SCCR_REG, %g3
6216
62172:
6218	ldd	[%g2+%lo(_C_LABEL(time))], %o2	! time.tv_sec & time.tv_usec
6219	ld	[%g3], %o4			! system (timer) counter
6220	ldd	[%g2+%lo(_C_LABEL(time))], %g4	! see if time values changed
6221	cmp	%g4, %o2
6222	bne	2b				! if time.tv_sec changed
6223	 cmp	%g5, %o3
6224	bne	2b				! if time.tv_usec changed
6225	 tst	%o4
6226	!! %o2 - time.tv_sec;  %o3 - time.tv_usec;  %o4 - timer counter
6227
6228!!! BEGIN ms-IIep specific code
6229	bpos	3f				! if limit not reached yet
6230	 clr	%g4				!  then use timer as is
6231
6232	set	0x80000000, %g5
6233	sethi	%hi(_C_LABEL(tick)), %g4
6234	bclr	%g5, %o4			! cleat limit reached flag
6235	ld	[%g4+%lo(_C_LABEL(tick))], %g4
6236
6237	!! %g4 - either 0 or tick (if timer has hit the limit)
62383:
6239	inc	-1, %o4				! timer is 1-based, adjust
6240	!! divide by 25 magic stolen from a gcc output
6241	set	1374389535, %g5
6242	umul	%o4, %g5, %g0
6243	rd	%y, %o4
6244	srl	%o4, 3, %o4
6245	add	%o4, %g4, %o4			! may be bump usec by tick
6246!!! END ms-IIep specific code
6247
6248	add	%o3, %o4, %o3			! add timer to time.tv_usec
6249	set	1000000, %g5			! normalize usec value
6250	cmp	%o3, %g5
6251	bl,a	4f
6252	 st	%o2, [%o0]
6253	inc	%o2				! overflow into tv_sec
6254	sub	%o3, %g5, %o3
6255	st	%o2, [%o0]
62564:	retl
6257	 st	%o3, [%o0 + 4]
6258#endif /* MSIIEP */
6259
6260/*
6261 * delay function
6262 *
6263 * void delay(N)  -- delay N microseconds
6264 *
6265 * Register usage: %o0 = "N" number of usecs to go (counts down to zero)
6266 *		   %o1 = "timerblurb" (stays constant)
6267 *		   %o2 = counter for 1 usec (counts down from %o1 to zero)
6268 *
6269 */
6270
6271ENTRY(delay)			! %o0 = n
6272	subcc	%o0, %g0, %g0
6273	be	2f
6274
6275	sethi	%hi(_C_LABEL(timerblurb)), %o1
6276	ld	[%o1 + %lo(_C_LABEL(timerblurb))], %o1	! %o1 = timerblurb
6277
6278	 addcc	%o1, %g0, %o2		! %o2 = cntr (start @ %o1), clear CCs
6279					! first time through only
6280
6281					! delay 1 usec
62821:	bne	1b			! come back here if not done
6283	 subcc	%o2, 1, %o2		! %o2 = %o2 - 1 [delay slot]
6284
6285	subcc	%o0, 1, %o0		! %o0 = %o0 - 1
6286	bne	1b			! done yet?
6287	 addcc	%o1, %g0, %o2		! reinit %o2 and CCs  [delay slot]
6288					! harmless if not branching
62892:
6290	retl				! return
6291	 nop				! [delay slot]
6292
6293#if defined(KGDB) || defined(DDB) || defined(DIAGNOSTIC)
6294/*
6295 * Write all windows (user or otherwise), except the current one.
6296 *
6297 * THIS COULD BE DONE IN USER CODE
6298 */
6299ENTRY(write_all_windows)
6300	/*
6301	 * g2 = g1 = nwindows - 1;
6302	 * while (--g1 > 0) save();
6303	 * while (--g2 > 0) restore();
6304	 */
6305	sethi	%hi(_C_LABEL(nwindows)), %g1
6306	ld	[%g1 + %lo(_C_LABEL(nwindows))], %g1
6307	dec	%g1
6308	mov	%g1, %g2
6309
63101:	deccc	%g1
6311	bg,a	1b
6312	 save	%sp, -64, %sp
6313
63142:	deccc	%g2
6315	bg,a	2b
6316	 restore
6317
6318	retl
6319	nop
6320#endif /* KGDB */
6321
6322ENTRY(setjmp)
6323	std	%sp, [%o0+0]	! stack pointer & return pc
6324	st	%fp, [%o0+8]	! frame pointer
6325	retl
6326	 clr	%o0
6327
6328Lpanic_ljmp:
6329	.asciz	"longjmp botch"
6330	_ALIGN
6331
6332ENTRY(longjmp)
6333	addcc	%o1, %g0, %g6	! compute v ? v : 1 in a global register
6334	be,a	0f
6335	 mov	1, %g6
63360:
6337	mov	%o0, %g1	! save a in another global register
6338	ld	[%g1+8], %g7	/* get caller's frame */
63391:
6340	cmp	%fp, %g7	! compare against desired frame
6341	bl,a	1b		! if below,
6342	 restore		!    pop frame and loop
6343	be,a	2f		! if there,
6344	 ldd	[%g1+0], %o2	!    fetch return %sp and pc, and get out
6345
6346Llongjmpbotch:
6347				! otherwise, went too far; bomb out
6348	save	%sp, -CCFSZ, %sp	/* preserve current window */
6349	sethi	%hi(Lpanic_ljmp), %o0
6350	call	_C_LABEL(panic)
6351	or %o0, %lo(Lpanic_ljmp), %o0;
6352	unimp	0
6353
63542:
6355	cmp	%o2, %sp	! %sp must not decrease
6356	bge,a	3f
6357	 mov	%o2, %sp	! it is OK, put it in place
6358	b,a	Llongjmpbotch
63593:
6360	jmp	%o3 + 8		! success, return %g6
6361	 mov	%g6, %o0
6362
6363	.data
6364	.globl	_C_LABEL(kernel_top)
6365_C_LABEL(kernel_top):
6366	.word	0
6367	.globl	_C_LABEL(bootinfo)
6368_C_LABEL(bootinfo):
6369	.word	0
6370
6371	.globl	_C_LABEL(proc0paddr)
6372_C_LABEL(proc0paddr):
6373	.word	_C_LABEL(u0)	! KVA of proc0 uarea
6374
6375/* interrupt counters	XXX THESE BELONG ELSEWHERE (if anywhere) */
6376	.globl	_C_LABEL(intrcnt), _C_LABEL(eintrcnt)
6377	.globl	_C_LABEL(intrnames), _C_LABEL(eintrnames)
6378_C_LABEL(intrnames):
6379	.asciz	"spur"
6380	.asciz	"lev1"
6381	.asciz	"lev2"
6382	.asciz	"lev3"
6383	.asciz	"lev4"
6384	.asciz	"lev5"
6385	.asciz	"lev6"
6386	.asciz	"lev7"
6387	.asciz  "lev8"
6388	.asciz	"lev9"
6389	.asciz	"clock"
6390	.asciz	"lev11"
6391	.asciz	"lev12"
6392	.asciz	"lev13"
6393	.asciz	"prof"
6394_C_LABEL(eintrnames):
6395	_ALIGN
6396_C_LABEL(intrcnt):
6397	.skip	4*15
6398_C_LABEL(eintrcnt):
6399
6400	.comm	_C_LABEL(nwindows), 4
6401	.comm	_C_LABEL(romp), 4
6402