xref: /original-bsd/sys/sparc/sparc/locore.s (revision 6afd9275)
1/*
2 * Copyright (c) 1992 Regents of the University of California.
3 * All rights reserved.
4 *
5 * This software was developed by the Computer Systems Engineering group
6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7 * contributed to Berkeley.
8 *
9 * All advertising materials mentioning features or use of this software
10 * must display the following acknowledgement:
11 *	This product includes software developed by the University of
12 *	California, Lawrence Berkeley Laboratories.
13 *
14 * %sccs.include.redist.c%
15 *
16 *	@(#)locore.s	7.4 (Berkeley) 10/11/92
17 *
18 * from: $Header: locore.s,v 1.46 92/08/05 04:17:58 torek Exp $
19 */
20
21#define	LOCORE
22#include "assym.s"
23#include <sparc/sparc/ctlreg.h>
24#include <sparc/sparc/intreg.h>
25#include <sparc/sparc/timerreg.h>
26#ifdef notyet
27#include "zsaddr.h"
28#include <sparc/dev/zsreg.h>
29#endif
30#include <machine/psl.h>
31#include <machine/signal.h>
32#include <machine/trap.h>
33
34/*
35 * GNU assembler does not understand `.empty' directive; Sun assembler
36 * gripes about labels without it.  To allow cross-compilation using
37 * the Sun assembler, and because .empty directives are useful documentation,
38 * we use this trick.  While we are at it we fix the .align mismatch.
39 *
40 * XXX	new GNU assemblers match the Sun assembler in interpretation of
41 * XXX	.align -- will have to fix this for them
42 */
43#ifdef SUN_AS
44#define	EMPTY	.empty
45#define	ALIGN	.align 4
46#else
47#define	EMPTY	/* .empty */
48#define	ALIGN	.align 2
49#endif
50
51/*
52 * CCFSZ (C Compiler Frame SiZe) is the size of a stack frame required if
53 * a function is to call C code.  It should be just 64, but Sun defined
54 * their frame with space to hold arguments 0 through 5 (plus some junk),
55 * and varargs routines (such as printf) demand this, and gcc uses this
56 * area at times anyway.
57 */
58#define	CCFSZ	96
59
60/*
61 * A handy macro for maintaining instrumentation counters.
62 * Note that this clobbers %o0 and %o1.  Normal usage is
63 * something like:
64 *	foointr:
65 *		TRAP_SETUP(...)		! makes %o registers safe
66 *		INCR(_cnt+V_FOO)	! count a foo
67 */
68#define INCR(what) \
69	sethi	%hi(what), %o0; \
70	ld	[%o0 + %lo(what)], %o1; \
71	inc	%o1; \
72	st	%o1, [%o0 + %lo(what)]
73
74/*
75 * Another handy macro: load one register window, given `base' address.
76 * This can be either a simple register (e.g., %sp) or include an initial
77 * offset (e.g., %g6 + PCB_RW).
78 */
79#define	LOADWIN(addr) \
80	ldd	[addr], %l0; \
81	ldd	[addr + 8], %l2; \
82	ldd	[addr + 16], %l4; \
83	ldd	[addr + 24], %l6; \
84	ldd	[addr + 32], %i0; \
85	ldd	[addr + 40], %i2; \
86	ldd	[addr + 48], %i4; \
87	ldd	[addr + 56], %i6
88
89/*
90 * To return from trap we need the two-instruction sequence
91 * `jmp %l1; rett %l2', which is defined here for convenience.
92 */
93#define	RETT	jmp %l1; rett %l2
94
95	.data
96/*
97 * The interrupt stack.
98 *
99 * This is the very first thing in the data segment, and therefore has
100 * the lowest kernel stack address.  We count on this in the interrupt
101 * trap-frame setup code, since we may need to switch from the kernel
102 * stack to the interrupt stack (iff we are not already on the interrupt
103 * stack).  One sethi+cmp is all we need since this is so carefully
104 * arranged.
105 */
106	.globl	_intstack
107	.globl	_eintstack
108_intstack:
109	.skip	4 * NBPG		! 16k = 128 128-byte stack frames
110_eintstack:
111
112/*
113 * When a process exits and its u. area goes away, we set cpcb to point
114 * to this `u.', leaving us with something to use for an interrupt stack,
115 * and letting all the register save code have a pcb_uw to examine.
116 * This is also carefully arranged (to come just before u0, so that
117 * process 0's kernel stack can quietly overrun into it during bootup, if
118 * we feel like doing that).
119 */
120	.globl	_idle_u
121_idle_u:
122	.skip	UPAGES * NBPG
123
124/*
125 * Process 0's u.
126 *
127 * This must be aligned on an 8 byte boundary.
128 */
129	.globl	_u0
130_u0:	.skip	UPAGES * NBPG
131estack0:
132
133#ifdef KGDB
134/*
135 * Another item that must be aligned, easiest to put it here.
136 */
137KGDB_STACK_SIZE = 2048
138	.globl	_kgdb_stack
139_kgdb_stack:
140	.skip	KGDB_STACK_SIZE		! hope this is enough
141#endif
142
143/*
144 * _cpcb points to the current pcb (and hence u. area).
145 * Initially this is the special one.
146 */
147	.globl	_cpcb
148_cpcb:	.word	_u0
149
150	.text
151
152/*
153 * The first thing in the real text segment is the trap vector table,
154 * which must be aligned on a 4096 byte boundary.  The text segment
155 * starts beyond page 0 of KERNBASE so that there is a red zone
156 * between user and kernel space.  Since the boot ROM loads us at
157 * 0x4000, it is far easier to start at KERNBASE+0x4000 than to
158 * buck the trend.  This is four pages in; we can stuff something
159 * into the three pages left beneath us later ... like, oh, say, the
160 * message buffer (1 page).
161 */
162	.globl	_msgbuf
163msgbufsize = NBPG			! 1 page for msg buffer
164_msgbuf	= KERNBASE + NBPG
165
166/*
167 * The remaining two physical pages are currently unused.  We need to
168 * map the interrupt enable register very early on in the boot process,
169 * so that we can handle NMIs (parity errors) halfway sensibly during
170 * boot.  We use virtual address f8002000 (`page 2') for this, wasting
171 * 4096 bytes of physical memory.
172 */
173IE_reg_addr = _msgbuf + msgbufsize	! this page not used; points to IEreg
174
175/*
176 * Each trap has room for four instructions, of which one perforce must
177 * be a branch.  On entry the hardware has copied pc and npc to %l1 and
178 * %l2 respectively.  We use two more to read the psr into %l0, and to
179 * put the trap type value into %l3 (with a few exceptions below).
180 * We could read the trap type field of %tbr later in the code instead,
181 * but there is no need, and that would require more instructions
182 * (read+mask, vs 1 `mov' here).
183 *
184 * I used to generate these numbers by address arithmetic, but gas's
185 * expression evaluator has about as much sense as your average slug
186 * (oddly enough, the code looks about as slimy too).  Thus, all the
187 * trap numbers are given as arguments to the trap macros.  This means
188 * there is one line per trap.  Sigh.
189 *
190 * Note that only the local registers may be used, since the trap
191 * window is potentially the last window.  Its `in' registers are
192 * the previous window's outs (as usual), but more important, its
193 * `out' registers may be in use as the `topmost' window's `in' registers.
194 * The global registers are of course verboten (well, until we save
195 * them away).
196 *
197 * Hardware interrupt vectors can be `linked'---the linkage is to regular
198 * C code---or rewired to fast in-window handlers.  The latter are good
199 * for unbuffered hardware like the Zilog serial chip and the AMD audio
200 * chip, where many interrupts can be handled trivially with pseudo-DMA or
201 * similar.  Only one `fast' interrupt can be used per level, however, and
202 * direct and `fast' interrupts are incompatible.  Routines in intr.c
203 * handle setting these, with optional paranoia.
204 */
205
206	/* regular vectored traps */
207#define	VTRAP(type, label) \
208	mov (type), %l3; b label; mov %psr, %l0; nop
209
210	/* hardware interrupts (can be linked or made `fast') */
211#define	HARDINT(lev) \
212	mov (lev), %l3; b _sparc_interrupt; mov %psr, %l0; nop
213
214	/* software interrupts (may not be made direct, sorry---but you
215	   should not be using them trivially anyway) */
216#define	SOFTINT(lev, bit) \
217	mov (lev), %l3; mov (bit), %l4; b softintr; mov %psr, %l0
218
219	/* traps that just call trap() */
220#define	TRAP(type)	VTRAP(type, slowtrap)
221
222	/* architecturally undefined traps (cause panic) */
223#define	UTRAP(type)	VTRAP(type, slowtrap)
224
225	/* software undefined traps (may be replaced) */
226#define	STRAP(type)	VTRAP(type, slowtrap)
227
228/* breakpoint acts differently under kgdb */
229#ifdef KGDB
230#define	BPT		VTRAP(T_BREAKPOINT, bpt)
231#define	BPT_KGDB_EXEC	VTRAP(T_KGDB_EXEC, bpt)
232#else
233#define	BPT		TRAP(T_BREAKPOINT)
234#define	BPT_KGDB_EXEC	TRAP(T_KGDB_EXEC)
235#endif
236
237/* special high-speed 1-instruction-shaved-off traps (get nothing in %l3) */
238#ifdef COMPAT_SUNOS
239#define	SUN_SYSCALL	b sun_syscall; mov %psr, %l0; nop; nop
240#else
241#define	SUN_SYSCALL	TRAP(T_SUN_SYSCALL)
242#endif
243#define	SYSCALL		b syscall; mov %psr, %l0; nop; nop
244#define	WINDOW_OF	b window_of; mov %psr, %l0; nop; nop
245#define	WINDOW_UF	b window_uf; mov %psr, %l0; nop; nop
246#ifdef notyet
247#define	ZS_INTERRUPT	b zshard; mov %psr, %l0; nop; nop
248#else
249#define	ZS_INTERRUPT	HARDINT(12)
250#endif
251
252	.globl	start
253	.globl	_trapbase
254start:
255_trapbase:
256/* trap 0 is special since we cannot receive it */
257	b dostart; nop; nop; nop	! 00 = reset (fake)
258	VTRAP(T_TEXTFAULT, memfault)	! 01 = instr. fetch fault
259	TRAP(T_ILLINST)			! 02 = illegal instruction
260	TRAP(T_PRIVINST)		! 03 = privileged instruction
261	TRAP(T_FPDISABLED)		! 04 = fp instr, but EF bit off in psr
262	WINDOW_OF			! 05 = window overflow
263	WINDOW_UF			! 06 = window underflow
264	TRAP(T_ALIGN)			! 07 = address alignment error
265	VTRAP(T_FPE, fp_exception)	! 08 = fp exception
266	VTRAP(T_DATAFAULT, memfault)	! 09 = data fetch fault
267	TRAP(T_TAGOF)			! 0a = tag overflow
268	UTRAP(0x0b)
269	UTRAP(0x0c)
270	UTRAP(0x0d)
271	UTRAP(0x0e)
272	UTRAP(0x0f)
273	UTRAP(0x10)
274	SOFTINT(1, IE_L1)		! 11 = level 1 interrupt
275	HARDINT(2)			! 12 = level 2 interrupt
276	HARDINT(3)			! 13 = level 3 interrupt
277	SOFTINT(4, IE_L4)		! 14 = level 4 interrupt
278	HARDINT(5)			! 15 = level 5 interrupt
279	SOFTINT(6, IE_L6)		! 16 = level 6 interrupt
280	HARDINT(7)			! 17 = level 7 interrupt
281	HARDINT(8)			! 18 = level 8 interrupt
282	HARDINT(9)			! 19 = level 9 interrupt
283	HARDINT(10)			! 1a = level 10 interrupt
284	HARDINT(11)			! 1b = level 11 interrupt
285	ZS_INTERRUPT			! 1c = level 12 (zs) interrupt
286	HARDINT(13)			! 1d = level 13 interrupt
287	HARDINT(14)			! 1e = level 14 interrupt
288	VTRAP(15, nmi)			! 1f = nonmaskable interrupt
289	UTRAP(0x20)
290	UTRAP(0x21)
291	UTRAP(0x22)
292	UTRAP(0x23)
293	UTRAP(0x24)
294	UTRAP(0x25)
295	UTRAP(0x26)
296	UTRAP(0x27)
297	UTRAP(0x28)
298	UTRAP(0x29)
299	UTRAP(0x2a)
300	UTRAP(0x2b)
301	UTRAP(0x2c)
302	UTRAP(0x2d)
303	UTRAP(0x2e)
304	UTRAP(0x2f)
305	UTRAP(0x30)
306	UTRAP(0x31)
307	UTRAP(0x32)
308	UTRAP(0x33)
309	UTRAP(0x34)
310	UTRAP(0x35)
311	TRAP(T_CPDISABLED)	! 36 = coprocessor instr, EC bit off in psr
312	UTRAP(0x37)
313	UTRAP(0x38)
314	UTRAP(0x39)
315	UTRAP(0x3a)
316	UTRAP(0x3b)
317	UTRAP(0x3c)
318	UTRAP(0x3d)
319	UTRAP(0x3e)
320	UTRAP(0x3f)
321	TRAP(T_CPEXCEPTION)	! 40 = coprocessor exception
322	UTRAP(0x41)
323	UTRAP(0x42)
324	UTRAP(0x43)
325	UTRAP(0x44)
326	UTRAP(0x45)
327	UTRAP(0x46)
328	UTRAP(0x47)
329	UTRAP(0x48)
330	UTRAP(0x49)
331	UTRAP(0x4a)
332	UTRAP(0x4b)
333	UTRAP(0x4c)
334	UTRAP(0x4d)
335	UTRAP(0x4e)
336	UTRAP(0x4f)
337	UTRAP(0x50)
338	UTRAP(0x51)
339	UTRAP(0x52)
340	UTRAP(0x53)
341	UTRAP(0x54)
342	UTRAP(0x55)
343	UTRAP(0x56)
344	UTRAP(0x57)
345	UTRAP(0x58)
346	UTRAP(0x59)
347	UTRAP(0x5a)
348	UTRAP(0x5b)
349	UTRAP(0x5c)
350	UTRAP(0x5d)
351	UTRAP(0x5e)
352	UTRAP(0x5f)
353	UTRAP(0x60)
354	UTRAP(0x61)
355	UTRAP(0x62)
356	UTRAP(0x63)
357	UTRAP(0x64)
358	UTRAP(0x65)
359	UTRAP(0x66)
360	UTRAP(0x67)
361	UTRAP(0x68)
362	UTRAP(0x69)
363	UTRAP(0x6a)
364	UTRAP(0x6b)
365	UTRAP(0x6c)
366	UTRAP(0x6d)
367	UTRAP(0x6e)
368	UTRAP(0x6f)
369	UTRAP(0x70)
370	UTRAP(0x71)
371	UTRAP(0x72)
372	UTRAP(0x73)
373	UTRAP(0x74)
374	UTRAP(0x75)
375	UTRAP(0x76)
376	UTRAP(0x77)
377	UTRAP(0x78)
378	UTRAP(0x79)
379	UTRAP(0x7a)
380	UTRAP(0x7b)
381	UTRAP(0x7c)
382	UTRAP(0x7d)
383	UTRAP(0x7e)
384	UTRAP(0x7f)
385	SUN_SYSCALL		! 80 = sun syscall
386	BPT			! 81 = pseudo breakpoint instruction
387	TRAP(T_DIV0)		! 82 = divide by zero
388	TRAP(T_FLUSHWIN)	! 83 = flush windows
389	TRAP(T_CLEANWIN)	! 84 = provide clean windows
390	TRAP(T_RANGECHECK)	! 85 = ???
391	TRAP(T_FIXALIGN)	! 86 = fix up unaligned accesses
392	TRAP(T_INTOF)		! 87 = integer overflow
393	BPT_KGDB_EXEC		! 88 = enter kernel gdb on kernel startup
394	SYSCALL			! 89 = bsd syscall
395	STRAP(0x8a)
396	STRAP(0x8b)
397	STRAP(0x8c)
398	STRAP(0x8d)
399	STRAP(0x8e)
400	STRAP(0x8f)
401	STRAP(0x90)
402	STRAP(0x91)
403	STRAP(0x92)
404	STRAP(0x93)
405	STRAP(0x94)
406	STRAP(0x95)
407	STRAP(0x96)
408	STRAP(0x97)
409	STRAP(0x98)
410	STRAP(0x99)
411	STRAP(0x9a)
412	STRAP(0x9b)
413	STRAP(0x9c)
414	STRAP(0x9d)
415	STRAP(0x9e)
416	STRAP(0x9f)
417	STRAP(0xa0)
418	STRAP(0xa1)
419	STRAP(0xa2)
420	STRAP(0xa3)
421	STRAP(0xa4)
422	STRAP(0xa5)
423	STRAP(0xa6)
424	STRAP(0xa7)
425	STRAP(0xa8)
426	STRAP(0xa9)
427	STRAP(0xaa)
428	STRAP(0xab)
429	STRAP(0xac)
430	STRAP(0xad)
431	STRAP(0xae)
432	STRAP(0xaf)
433	STRAP(0xb0)
434	STRAP(0xb1)
435	STRAP(0xb2)
436	STRAP(0xb3)
437	STRAP(0xb4)
438	STRAP(0xb5)
439	STRAP(0xb6)
440	STRAP(0xb7)
441	STRAP(0xb8)
442	STRAP(0xb9)
443	STRAP(0xba)
444	STRAP(0xbb)
445	STRAP(0xbc)
446	STRAP(0xbd)
447	STRAP(0xbe)
448	STRAP(0xbf)
449	STRAP(0xc0)
450	STRAP(0xc1)
451	STRAP(0xc2)
452	STRAP(0xc3)
453	STRAP(0xc4)
454	STRAP(0xc5)
455	STRAP(0xc6)
456	STRAP(0xc7)
457	STRAP(0xc8)
458	STRAP(0xc9)
459	STRAP(0xca)
460	STRAP(0xcb)
461	STRAP(0xcc)
462	STRAP(0xcd)
463	STRAP(0xce)
464	STRAP(0xcf)
465	STRAP(0xd0)
466	STRAP(0xd1)
467	STRAP(0xd2)
468	STRAP(0xd3)
469	STRAP(0xd4)
470	STRAP(0xd5)
471	STRAP(0xd6)
472	STRAP(0xd7)
473	STRAP(0xd8)
474	STRAP(0xd9)
475	STRAP(0xda)
476	STRAP(0xdb)
477	STRAP(0xdc)
478	STRAP(0xdd)
479	STRAP(0xde)
480	STRAP(0xdf)
481	STRAP(0xe0)
482	STRAP(0xe1)
483	STRAP(0xe2)
484	STRAP(0xe3)
485	STRAP(0xe4)
486	STRAP(0xe5)
487	STRAP(0xe6)
488	STRAP(0xe7)
489	STRAP(0xe8)
490	STRAP(0xe9)
491	STRAP(0xea)
492	STRAP(0xeb)
493	STRAP(0xec)
494	STRAP(0xed)
495	STRAP(0xee)
496	STRAP(0xef)
497	STRAP(0xf0)
498	STRAP(0xf1)
499	STRAP(0xf2)
500	STRAP(0xf3)
501	STRAP(0xf4)
502	STRAP(0xf5)
503	STRAP(0xf6)
504	STRAP(0xf7)
505	STRAP(0xf8)
506	STRAP(0xf9)
507	STRAP(0xfa)
508	STRAP(0xfb)
509	STRAP(0xfc)
510	STRAP(0xfd)
511	STRAP(0xfe)
512	STRAP(0xff)
513
514	/* the message buffer is always mapped */
515_msgbufmapped:
516	.word	1
517
518#ifdef DEBUG
519/*
520 * A hardware red zone is impossible.  We simulate one in software by
521 * keeping a `red zone' pointer; if %sp becomes less than this, we panic.
522 * This is expensive and is only enabled when debugging.
523 */
524#define	REDSIZE	(8*96)		/* some room for bouncing */
525#define	REDSTACK 2048		/* size of `panic: stack overflow' region */
526	.data
527_redzone:
528	.word	_idle_u + REDSIZE
529_redstack:
530	.skip	REDSTACK
531	.text
532Lpanic_red:
533	.asciz	"stack overflow"
534	ALIGN
535
536	/* set stack pointer redzone to base+minstack; alters base */
537#define	SET_SP_REDZONE(base, tmp) \
538	add	base, REDSIZE, base; \
539	sethi	%hi(_redzone), tmp; \
540	st	base, [tmp + %lo(_redzone)]
541
542	/* variant with a constant */
543#define	SET_SP_REDZONE_CONST(const, tmp1, tmp2) \
544	set	(const) + REDSIZE, tmp1; \
545	sethi	%hi(_redzone), tmp2; \
546	st	tmp1, [tmp2 + %lo(_redzone)]
547
548	/* check stack pointer against redzone (uses two temps) */
549#define	CHECK_SP_REDZONE(t1, t2) \
550	sethi	%hi(_redzone), t1; \
551	ld	[t1 + %lo(_redzone)], t2; \
552	cmp	%sp, t2;	/* if sp >= t2, not in red zone */ \
553	bgeu	7f; nop;	/* and can continue normally */ \
554	/* move to panic stack */ \
555	st	%g0, [t1 + %lo(_redzone)]; \
556	set	_redstack + REDSTACK - 96, %sp; \
557	/* prevent panic() from lowering ipl */ \
558	sethi	%hi(_panicstr), t2; \
559	set	Lpanic_red, t2; \
560	st	t2, [t1 + %lo(_panicstr)]; \
561	rd	%psr, t1;		/* t1 = splhigh() */ \
562	or	t1, PSR_PIL, t2; \
563	wr	t2, 0, %psr; \
564	wr	t2, PSR_ET, %psr;	/* turn on traps */ \
565	nop; nop; nop; \
566	save	%sp, -96, %sp;		/* preserve current window */ \
567	sethi	%hi(Lpanic_red), %o0; \
568	call	_panic; or %o0, %lo(Lpanic_red), %o0; \
5697:
570
571#else
572
573#define	SET_SP_REDZONE(base, tmp)
574#define	SET_SP_REDZONE_CONST(const, t1, t2)
575#define	CHECK_SP_REDZONE(t1, t2)
576#endif
577
578/*
579 * The window code must verify user stack addresses before using them.
580 * A user stack pointer is invalid if:
581 *	- it is not on an 8 byte boundary;
582 *	- its pages (a register window, being 64 bytes, can occupy
583 *	  two pages) are not readable or writable.
584 * We define three separate macros here for testing user stack addresses.
585 *
586 * PTE_OF_ADDR locates a PTE, branching to a `bad address'
587 *	handler if the stack pointer points into the hole in the
588 *	address space (i.e., top 3 bits are not either all 1 or all 0);
589 * CMP_PTE_USER_READ compares the located PTE against `user read' mode;
590 * CMP_PTE_USER_WRITE compares the located PTE against `user write' mode.
591 * The compares give `equal' if read or write is OK.
592 *
593 * Note that the user stack pointer usually points into high addresses
594 * (top 3 bits all 1), so that is what we check first.
595 *
596 * The code below also assumes that PTE_OF_ADDR is safe in a delay
597 * slot; it is, at it merely sets its `pte' register to a temporary value.
598 */
599	/* input: addr, output: pte; aux: bad address label */
600#define	PTE_OF_ADDR(addr, pte, bad) \
601	sra	addr, PG_VSHIFT, pte; \
602	cmp	pte, -1; \
603	be,a	1f; andn addr, 4095, pte; \
604	tst	pte; \
605	bne	bad; EMPTY; \
606	andn	addr, 4095, pte; \
6071:
608
609	/* input: pte; output: condition codes */
610#define	CMP_PTE_USER_READ(pte) \
611	lda	[pte] ASI_PTE, pte; \
612	srl	pte, PG_PROTSHIFT, pte; \
613	andn	pte, (PG_W >> PG_PROTSHIFT), pte; \
614	cmp	pte, PG_PROTUREAD
615
616	/* input: pte; output: condition codes */
617#define	CMP_PTE_USER_WRITE(pte) \
618	lda	[pte] ASI_PTE, pte; \
619	srl	pte, PG_PROTSHIFT, pte; \
620	cmp	pte, PG_PROTUWRITE
621
622/*
623 * The calculations in PTE_OF_ADDR and CMP_PTE_USER_* are rather slow:
624 * in particular, according to Gordon Irlam of the University of Adelaide
625 * in Australia, these consume at least 18 cycles on an SS1 and 37 on an
626 * SS2.  Hence, we try to avoid them in the common case.
627 *
628 * A chunk of 64 bytes is on a single page if and only if:
629 *
630 *	((base + 64 - 1) & ~4095) == (base & ~4095)
631 *
632 * Equivalently (and faster to test), the low order bits (base & 4095) must
633 * be small enough so that the sum (base + 63) does not carry out into the
634 * upper page-address bits, i.e.,
635 *
636 *	(base & 4095) < (4096 - 63)
637 *
638 * so we allow testing that here.  This macro is also assumed to be safe
639 * in a delay slot (modulo overwriting its temporary).
640 */
641#define	SLT_IF_1PAGE_RW(addr, tmp) \
642	and	addr, 4095, tmp; \
643	cmp	tmp, (4096 - 63)
644
645/*
646 * Every trap that enables traps must set up stack space.
647 * If the trap is from user mode, this involves switching to the kernel
648 * stack for the current process, and we must also set cpcb->pcb_uw
649 * so that the window overflow handler can tell user windows from kernel
650 * windows.
651 *
652 * The number of user windows is:
653 *
654 *	cpcb->pcb_uw = (cpcb->pcb_wim - 1 - CWP) % nwindows
655 *
656 * (where pcb_wim = log2(current %wim) and CWP = low 5 bits of %psr).
657 * We compute this expression by table lookup in uwtab[CWP - pcb_wim],
658 * which has been set up as:
659 *
660 *	for i in [-nwin+1 .. nwin-1]
661 *		uwtab[i] = (nwin - 1 - i) % nwin;
662 *
663 * (If you do not believe this works, try it for yourself.)
664 *
665 * We also keep one or two more tables:
666 *
667 *	for i in 0..nwin-1
668 *		wmask[i] = 1 << ((i + 1) % nwindows);
669 *
670 * wmask[CWP] tells whether a `rett' would return into the invalid window.
671 */
672	.data
673	.skip	32			! alignment byte & negative indicies
674uwtab:	.skip	32			! u_char uwtab[-31..31];
675wmask:	.skip	32			! u_char wmask[0..31];
676
677	.text
678/*
679 * Things begin to grow uglier....
680 *
681 * Each trap handler may (always) be running in the trap window.
682 * If this is the case, it cannot enable further traps until it writes
683 * the register windows into the stack (or, if the stack is no good,
684 * the current pcb).
685 *
686 * ASSUMPTIONS: TRAP_SETUP() is called with:
687 *	%l0 = %psr
688 *	%l1 = return pc
689 *	%l2 = return npc
690 *	%l3 = (some value that must not be altered)
691 * which means we have 4 registers to work with.
692 *
693 * The `stackspace' argument is the number of stack bytes to allocate
694 * for register-saving, and must be at least -64 (and typically more,
695 * for global registers and %y).
696 *
697 * Trapframes should use -CCFSZ-80.  (80 = sizeof(struct trapframe);
698 * see trap.h.  This basically means EVERYONE.  Interrupt frames could
699 * get away with less, but currently do not.)
700 *
701 * The basic outline here is:
702 *
703 *	if (trap came from kernel mode) {
704 *		if (we are in the trap window)
705 *			save it away;
706 *		%sp = %fp - stackspace;
707 *	} else {
708 *		compute the number of user windows;
709 *		if (we are in the trap window)
710 *			save it away;
711 *		%sp = (top of kernel stack) - stackspace;
712 *	}
713 *
714 * Again, the number of user windows is:
715 *
716 *	cpcb->pcb_uw = (cpcb->pcb_wim - 1 - CWP) % nwindows
717 *
718 * (where pcb_wim = log2(current %wim) and CWP is the low 5 bits of %psr),
719 * and this is computed as `uwtab[CWP - pcb_wim]'.
720 *
721 * NOTE: if you change this code, you will have to look carefully
722 * at the window overflow and underflow handlers and make sure they
723 * have similar changes made as needed.
724 */
725#define	CALL_CLEAN_TRAP_WINDOW \
726	sethi	%hi(clean_trap_window), %l7; \
727	jmpl	%l7 + %lo(clean_trap_window), %l4; \
728	 mov	%g7, %l7	/* save %g7 in %l7 for clean_trap_window */
729
730#define	TRAP_SETUP(stackspace) \
731	rd	%wim, %l4; \
732	mov	1, %l5; \
733	sll	%l5, %l0, %l5; \
734	btst	PSR_PS, %l0; \
735	bz	1f; \
736	 btst	%l5, %l4; \
737	/* came from kernel mode; cond codes indicate trap window */ \
738	bz,a	3f; \
739	 add	%fp, stackspace, %sp;	/* want to just set %sp */ \
740	CALL_CLEAN_TRAP_WINDOW;		/* but maybe need to clean first */ \
741	b	3f; \
742	 add	%fp, stackspace, %sp; \
7431: \
744	/* came from user mode: compute pcb_nw */ \
745	sethi	%hi(_cpcb), %l6; \
746	ld	[%l6 + %lo(_cpcb)], %l6; \
747	ld	[%l6 + PCB_WIM], %l5; \
748	and	%l0, 31, %l4; \
749	sub	%l4, %l5, %l5; \
750	set	uwtab, %l4; \
751	ldub	[%l4 + %l5], %l5; \
752	st	%l5, [%l6 + PCB_UW]; \
753	/* cond codes still indicate whether in trap window */ \
754	bz,a	2f; \
755	 sethi	%hi(UPAGES*NBPG+(stackspace)), %l5; \
756	/* yes, in trap window; must clean it */ \
757	CALL_CLEAN_TRAP_WINDOW; \
758	sethi	%hi(_cpcb), %l6; \
759	ld	[%l6 + %lo(_cpcb)], %l6; \
760	sethi	%hi(UPAGES*NBPG+(stackspace)), %l5; \
7612: \
762	/* trap window is (now) clean: set %sp */ \
763	or	%l5, %lo(UPAGES*NBPG+(stackspace)), %l5; \
764	add	%l6, %l5, %sp; \
765	SET_SP_REDZONE(%l6, %l5); \
7663: \
767	CHECK_SP_REDZONE(%l6, %l5)
768
769/*
770 * Interrupt setup is almost exactly like trap setup, but we need to
771 * go to the interrupt stack if (a) we came from user mode or (b) we
772 * came from kernel mode on the kernel stack.
773 */
774#define	INTR_SETUP(stackspace) \
775	rd	%wim, %l4; \
776	mov	1, %l5; \
777	sll	%l5, %l0, %l5; \
778	btst	PSR_PS, %l0; \
779	bz	1f; \
780	 btst	%l5, %l4; \
781	/* came from kernel mode; cond codes still indicate trap window */ \
782	bz,a	0f; \
783	 sethi	%hi(_eintstack), %l7; \
784	CALL_CLEAN_TRAP_WINDOW; \
785	sethi	%hi(_eintstack), %l7; \
7860:	/* now if %fp >= eintstack, we were on the kernel stack */ \
787	cmp	%fp, %l7; \
788	bge,a	3f; \
789	 add	%l7, stackspace, %sp;	/* so switch to intstack */ \
790	b	4f; \
791	 add	%fp, stackspace, %sp;	/* else stay on intstack */ \
7921: \
793	/* came from user mode: compute pcb_nw */ \
794	sethi	%hi(_cpcb), %l6; \
795	ld	[%l6 + %lo(_cpcb)], %l6; \
796	ld	[%l6 + PCB_WIM], %l5; \
797	and	%l0, 31, %l4; \
798	sub	%l4, %l5, %l5; \
799	set	uwtab, %l4; \
800	ldub	[%l4 + %l5], %l5; \
801	st	%l5, [%l6 + PCB_UW]; \
802	/* cond codes still indicate whether in trap window */ \
803	bz,a	2f; \
804	 sethi	%hi(_eintstack), %l7; \
805	/* yes, in trap window; must save regs */ \
806	CALL_CLEAN_TRAP_WINDOW; \
807	sethi	%hi(_eintstack), %l7; \
8082: \
809	add	%l7, stackspace, %sp; \
8103: \
811	SET_SP_REDZONE_CONST(_intstack, %l6, %l5); \
8124: \
813	CHECK_SP_REDZONE(%l6, %l5)
814
815/*
816 * Handler for making the trap window shiny clean.
817 *
818 * On entry:
819 *	cpcb->pcb_nw = number of user windows
820 *	%l0 = %psr
821 *	%l1 must not be clobbered
822 *	%l2 must not be clobbered
823 *	%l3 must not be clobbered
824 *	%l4 = address for `return'
825 *	%l7 = saved %g7 (we put this in a delay slot above, to save work)
826 *
827 * On return:
828 *	%wim has changed, along with cpcb->pcb_wim
829 *	%g7 has been restored
830 *
831 * Normally, we push only one window.
832 */
833clean_trap_window:
834	mov	%g5, %l5		! save %g5
835	mov	%g6, %l6		! ... and %g6
836/*	mov	%g7, %l7		! ... and %g7 (already done for us) */
837	sethi	%hi(_cpcb), %g6		! get current pcb
838	ld	[%g6 + %lo(_cpcb)], %g6
839
840	/* Figure out whether it is a user window (cpcb->pcb_uw > 0). */
841	ld	[%g6 + PCB_UW], %g7
842	deccc	%g7
843	bge	ctw_user
844	 save	%g0, %g0, %g0		! in any case, enter window to save
845
846	/* The window to be pushed is a kernel window. */
847	std	%l0, [%sp + (0*8)]
848ctw_merge:
849	std	%l2, [%sp + (1*8)]
850	std	%l4, [%sp + (2*8)]
851	std	%l6, [%sp + (3*8)]
852	std	%i0, [%sp + (4*8)]
853	std	%i2, [%sp + (5*8)]
854	std	%i4, [%sp + (6*8)]
855	std	%i6, [%sp + (7*8)]
856
857	/* Set up new window invalid mask, and update cpcb->pcb_wim. */
858	rd	%psr, %g7		! g7 = (junk << 5) + new_cwp
859	mov	1, %g5			! g5 = 1 << new_cwp;
860	sll	%g5, %g7, %g5
861	wr	%g5, 0, %wim		! setwim(g5);
862	and	%g7, 31, %g7		! cpcb->pcb_wim = g7 & 31;
863	st	%g7, [%g6 + PCB_WIM]
864	nop
865	restore				! back to trap window
866
867	mov	%l5, %g5		! restore g5
868	mov	%l6, %g6		! ... and g6
869	jmp	%l4 + 8			! return to caller
870	 mov	%l7, %g7		! ... and g7
871	/* NOTREACHED */
872
873ctw_user:
874	/*
875	 * The window to be pushed is a user window.
876	 * We must verify the stack pointer (alignment & permissions).
877	 * See comments above definition of PTE_OF_ADDR.
878	 */
879	st	%g7, [%g6 + PCB_UW]	! cpcb->pcb_uw--;
880	btst	7, %sp			! if not aligned,
881	bne	ctw_invalid		! choke on it
882	 EMPTY
883	PTE_OF_ADDR(%sp, %g7, ctw_invalid)
884	CMP_PTE_USER_WRITE(%g7)		! likewise if not writable
885	bne	ctw_invalid
886	 EMPTY
887	SLT_IF_1PAGE_RW(%sp, %g7)
888	bl,a	ctw_merge		! all ok if only 1
889	 std	%l0, [%sp]
890	add	%sp, 7*8, %g5		! check last addr too
891	PTE_OF_ADDR(%g5, %g7, ctw_invalid)
892	CMP_PTE_USER_WRITE(%g7)
893	be,a	ctw_merge		! all ok: store <l0,l1> and merge
894	 std	%l0, [%sp]
895
896	/*
897	 * The window we wanted to push could not be pushed.
898	 * Instead, save ALL user windows into the pcb.
899	 * We will notice later that we did this, when we
900	 * get ready to return from our trap or syscall.
901	 *
902	 * The code here is run rarely and need not be optimal.
903	 */
904ctw_invalid:
905	/*
906	 * Reread cpcb->pcb_uw.  We decremented this earlier,
907	 * so it is off by one.
908	 */
909	ld	[%g6 + PCB_UW], %g7	! (number of user windows) - 1
910	add	%g6, PCB_RW, %g5
911
912	/* save g7+1 windows, starting with the current one */
9131:					! do {
914	std	%l0, [%g5 + (0*8)]	!	rw->rw_local[0] = l0;
915	std	%l2, [%g5 + (1*8)]	!	...
916	std	%l4, [%g5 + (2*8)]
917	std	%l6, [%g5 + (3*8)]
918	std	%i0, [%g5 + (4*8)]
919	std	%i2, [%g5 + (5*8)]
920	std	%i4, [%g5 + (6*8)]
921	std	%i6, [%g5 + (7*8)]
922	deccc	%g7			!	if (n > 0) save(), rw++;
923	bge,a	1b			! } while (--n >= 0);
924	 save	%g5, 64, %g5
925
926	/* stash sp for bottommost window */
927	st	%sp, [%g5 + 64 + (7*8)]
928
929	/* set up new wim */
930	rd	%psr, %g7		! g7 = (junk << 5) + new_cwp;
931	mov	1, %g5			! g5 = 1 << new_cwp;
932	sll	%g5, %g7, %g5
933	wr	%g5, 0, %wim		! wim = g5;
934	and	%g7, 31, %g7
935	st	%g7, [%g6 + PCB_WIM]	! cpcb->pcb_wim = new_cwp;
936
937	/* fix up pcb fields */
938	ld	[%g6 + PCB_UW], %g7	! n = cpcb->pcb_uw;
939	add	%g7, 1, %g5
940	st	%g5, [%g6 + PCB_NSAVED]	! cpcb->pcb_nsaved = n + 1;
941	st	%g0, [%g6 + PCB_UW]	! cpcb->pcb_uw = 0;
942
943	/* return to trap window */
9441:	deccc	%g7			! do {
945	bge	1b			!	restore();
946	 restore			! } while (--n >= 0);
947
948	mov	%l5, %g5		! restore g5, g6, & g7, and return
949	mov	%l6, %g6
950	jmp	%l4 + 8
951	 mov	%l7, %g7
952	/* NOTREACHED */
953
954
955/*
956 * Each memory access (text or data) fault, from user or kernel mode,
957 * comes here.  We read the error register and figure out what has
958 * happened.
959 *
960 * This cannot be done from C code since we must not enable traps (and
961 * hence may not use the `save' instruction) until we have decided that
962 * the error is or is not an asynchronous one that showed up after a
963 * synchronous error, but which must be handled before the sync err.
964 *
965 * Most memory faults are user mode text or data faults, which can cause
966 * signal delivery or ptracing, for which we must build a full trapframe.
967 * It does not seem worthwhile to work to avoid this in the other cases,
968 * so we store all the %g registers on the stack immediately.
969 *
970 * On entry:
971 *	%l0 = %psr
972 *	%l1 = return pc
973 *	%l2 = return npc
974 *	%l3 = T_TEXTFAULT or T_DATAFAULT
975 *
976 * Internal:
977 *	%l4 = %y, until we call mem_access_fault (then onto trapframe)
978 *	%l5 = IE_reg_addr, if async mem error
979 *
980 * We know about the layout of the error registers here.
981 *	addr	reg
982 *	----	---
983 *	a	AC_SYNC_ERR
984 *	a+4	AC_SYNC_VA
985 *	a+8	AC_ASYNC_ERR
986 *	a+12	AC_ASYNC_VA
987 */
988memfault:
989	TRAP_SETUP(-CCFSZ-80)
990
991	INCR(_cnt+V_FAULTS)		! cnt.v_faults++ (clobbers %o0,%o1)
992
993	st	%g1, [%sp + CCFSZ + 20]	! save g1
994	rd	%y, %l4			! save y
995
996#if AC_SYNC_ERR + 4 != AC_SYNC_VA || \
997    AC_SYNC_ERR + 8 != AC_ASYNC_ERR || AC_SYNC_ERR + 12 != AC_ASYNC_VA
998	help help help		! I, I, I wanna be a lifeguard
999#endif
1000	set	AC_SYNC_ERR, %o0
1001	std	%g2, [%sp + CCFSZ + 24]	! save g2, g3
1002	lda	[%o0] ASI_CONTROL, %o1	! sync err reg
1003	inc	4, %o0
1004	std	%g4, [%sp + CCFSZ + 32]	! (sneak g4,g5 in here)
1005	lda	[%o0] ASI_CONTROL, %o2	! sync virt addr
1006	btst	SER_MEMERR, %o1		! memory error?
1007	std	%g6, [%sp + CCFSZ + 40]
1008	bz,a	normal_mem_fault	! no, just a regular fault
1009 	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1010
1011	/*
1012	 * We got a synchronous memory error.  It could be one that
1013	 * happened because there were two stores in a row, and the
1014	 * first went into the write buffer, and the second caused this
1015	 * synchronous trap; so there could now be a pending async error.
1016	 * This is in fact the case iff the two va's differ.
1017	 */
1018	inc	4, %o0
1019	lda	[%o0] ASI_CONTROL, %o3	! async err reg
1020	inc	4, %o0
1021	lda	[%o0] ASI_CONTROL, %o4	! async virt addr
1022	cmp	%o2, %o4
1023	be,a	1f			! no, not an async err
1024	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1025
1026	/*
1027	 * Handle the async error; ignore the sync error for now
1028	 * (we may end up getting it again, but so what?).
1029	 * This code is essentially the same as that at `nmi' below,
1030	 * but the register usage is different and we cannot merge.
1031	 */
1032	sethi	%hi(IE_reg_addr), %l5	! ienab_bic(IE_ALLIE);
1033	ldub	[%l5 + %lo(IE_reg_addr)], %o0
1034	andn	%o0, IE_ALLIE, %o0
1035	stb	%o0, [%l5 + %lo(IE_reg_addr)]
1036
1037	/*
1038	 * Now reenable traps and call C code.
1039	 * %o1 through %o4 still hold the error reg contents.
1040	 * If memerr() returns, return from the trap.
1041	 */
1042	wr	%l0, PSR_ET, %psr
1043	call	_memerr			! memerr(0, ser, sva, aer, ava)
1044	 clr	%o0
1045
1046	ld	[%sp + CCFSZ + 20], %g1	! restore g1 through g7
1047	wr	%l0, 0, %psr		! and disable traps, 3 instr delay
1048	ldd	[%sp + CCFSZ + 24], %g2
1049	ldd	[%sp + CCFSZ + 32], %g4
1050	ldd	[%sp + CCFSZ + 40], %g6
1051	/* now safe to set IE_ALLIE again */
1052	ldub	[%l5 + %lo(IE_reg_addr)], %o1
1053	or	%o1, IE_ALLIE, %o1
1054	stb	%o1, [%l5 + %lo(IE_reg_addr)]
1055	b	return_from_trap
1056	 wr	%l4, 0, %y		! restore y
1057
1058	/*
1059	 * Trap was a synchronous memory error.
1060	 * %o1 through %o4 still hold the error reg contents.
1061	 */
10621:
1063	call	_memerr			! memerr(1, ser, sva, aer, ava)
1064	 mov	1, %o0
1065
1066	ld	[%sp + CCFSZ + 20], %g1	! restore g1 through g7
1067	ldd	[%sp + CCFSZ + 24], %g2
1068	ldd	[%sp + CCFSZ + 32], %g4
1069	ldd	[%sp + CCFSZ + 40], %g6
1070	wr	%l4, 0, %y		! restore y
1071	b	return_from_trap
1072	 wr	%l0, 0, %psr
1073	/* NOTREACHED */
1074
1075normal_mem_fault:
1076	/*
1077	 * Trap was some other error; call C code to deal with it.
1078	 * Must finish trap frame (psr,pc,npc,%y,%o0..%o7) in case
1079	 * we decide to deliver a signal or ptrace the process.
1080	 * %g1..%g7 were already set up above.
1081	 */
1082	std	%l0, [%sp + CCFSZ + 0]	! set tf.tf_psr, tf.tf_pc
1083	mov	%l3, %o0		! (argument: type)
1084	st	%l2, [%sp + CCFSZ + 8]	! set tf.tf_npc
1085	st	%l4, [%sp + CCFSZ + 12]	! set tf.tf_y
1086	mov	%l1, %o3		! (argument: pc)
1087	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
1088	std	%i2, [%sp + CCFSZ + 56]
1089	mov	%l0, %o4		! (argument: psr)
1090	std	%i4, [%sp + CCFSZ + 64]
1091	std	%i6, [%sp + CCFSZ + 72]
1092	call	_mem_access_fault	! mem_access_fault(type, ser, sva,
1093					!		pc, psr, &tf);
1094	 add	%sp, CCFSZ, %o5		! (argument: &tf)
1095
1096	ldd	[%sp + CCFSZ + 0], %l0	! load new values
1097	ldd	[%sp + CCFSZ + 8], %l2
1098	wr	%l3, 0, %y
1099	ld	[%sp + CCFSZ + 20], %g1
1100	ldd	[%sp + CCFSZ + 24], %g2
1101	ldd	[%sp + CCFSZ + 32], %g4
1102	ldd	[%sp + CCFSZ + 40], %g6
1103	ldd	[%sp + CCFSZ + 48], %i0
1104	ldd	[%sp + CCFSZ + 56], %i2
1105	ldd	[%sp + CCFSZ + 64], %i4
1106	ldd	[%sp + CCFSZ + 72], %i6
1107
1108	b	return_from_trap	! go return
1109	 wr	%l0, 0, %psr		! (but first disable traps again)
1110
1111
1112/*
1113 * fp_exception has to check to see if we are trying to save
1114 * the FP state, and if so, continue to save the FP state.
1115 *
1116 * We do not even bother checking to see if we were in kernel mode,
1117 * since users have no access to the special_fp_store instruction.
1118 *
1119 * This whole idea was stolen from Sprite.
1120 */
1121fp_exception:
1122	set	special_fp_store, %l4	! see if we came from the special one
1123	cmp	%l1, %l4		! pc == special_fp_store?
1124	bne	slowtrap		! no, go handle per usual
1125	 EMPTY
1126	sethi	%hi(savefpcont), %l4	! yes, "return" to the special code
1127	or	%lo(savefpcont), %l4, %l4
1128	jmp	%l4
1129	 rett	%l4 + 4
1130
1131/*
1132 * slowtrap() builds a trap frame and calls trap().
1133 * This is called `slowtrap' because it *is*....
1134 * We have to build a full frame for ptrace(), for instance.
1135 *
1136 * Registers:
1137 *	%l0 = %psr
1138 *	%l1 = return pc
1139 *	%l2 = return npc
1140 *	%l3 = trap code
1141 */
1142slowtrap:
1143	TRAP_SETUP(-CCFSZ-80)
1144	/*
1145	 * Phew, ready to enable traps and call C code.
1146	 */
1147	mov	%l3, %o0		! put type in %o0 for later
1148Lslowtrap_reenter:
1149	wr	%l0, PSR_ET, %psr	! traps on again
1150	std	%l0, [%sp + CCFSZ]	! tf.tf_psr = psr; tf.tf_pc = ret_pc;
1151	rd	%y, %l3
1152	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc = return_npc; tf.tf_y = %y;
1153	st	%g1, [%sp + CCFSZ + 20]
1154	std	%g2, [%sp + CCFSZ + 24]
1155	std	%g4, [%sp + CCFSZ + 32]
1156	std	%g6, [%sp + CCFSZ + 40]
1157	std	%i0, [%sp + CCFSZ + 48]
1158	mov	%l0, %o1		! (psr)
1159	std	%i2, [%sp + CCFSZ + 56]
1160	mov	%l1, %o2		! (pc)
1161	std	%i4, [%sp + CCFSZ + 64]
1162	add	%sp, CCFSZ, %o3		! (&tf)
1163	call	_trap			! trap(type, psr, pc, &tf)
1164	 std	%i6, [%sp + CCFSZ + 72]
1165
1166	ldd	[%sp + CCFSZ], %l0	! load new values
1167	ldd	[%sp + CCFSZ + 8], %l2
1168	wr	%l3, 0, %y
1169	ld	[%sp + CCFSZ + 20], %g1
1170	ldd	[%sp + CCFSZ + 24], %g2
1171	ldd	[%sp + CCFSZ + 32], %g4
1172	ldd	[%sp + CCFSZ + 40], %g6
1173	ldd	[%sp + CCFSZ + 48], %i0
1174	ldd	[%sp + CCFSZ + 56], %i2
1175	ldd	[%sp + CCFSZ + 64], %i4
1176	ldd	[%sp + CCFSZ + 72], %i6
1177	b	return_from_trap
1178	 wr	%l0, 0, %psr
1179
1180/*
1181 * Do a `software' trap by re-entering the trap code, possibly first
1182 * switching from interrupt stack to kernel stack.  This is used for
1183 * scheduling and signal ASTs (which generally occur from softclock or
1184 * tty or net interrupts) and register window saves (which might occur
1185 * from anywhere).
1186 *
1187 * The current window is the trap window, and it is by definition clean.
1188 * We enter with the trap type in %o0.  All we have to do is jump to
1189 * Lslowtrap_reenter above, but maybe after switching stacks....
1190 */
1191softtrap:
1192	sethi	%hi(_eintstack), %l7
1193	cmp	%sp, %l7
1194	bge	Lslowtrap_reenter
1195	 EMPTY
1196	sethi	%hi(_cpcb), %l6
1197	ld	[%l6 + %lo(_cpcb)], %l6
1198	set	UPAGES*NBPG - CCFSZ - 80, %l5
1199	add	%l6, %l5, %l7
1200	SET_SP_REDZONE(%l6, %l5)
1201	b	Lslowtrap_reenter
1202	 mov	%l7, %sp
1203
1204#ifdef KGDB
1205/*
1206 * bpt is entered on all breakpoint traps.
1207 * If this is a kernel breakpoint, we do not want to call trap().
1208 * Among other reasons, this way we can set breakpoints in trap().
1209 */
1210bpt:
1211	btst	PSR_PS, %l0		! breakpoint from kernel?
1212	bz	slowtrap		! no, go do regular trap
1213	 nop
1214
1215	/*
1216	 * Build a trap frame for kgdb_trap_glue to copy.
1217	 * Enable traps but set ipl high so that we will not
1218	 * see interrupts from within breakpoints.
1219	 */
1220	TRAP_SETUP(-CCFSZ-80)
1221	or	%l0, PSR_PIL, %l4	! splhigh()
1222	wr	%l4, 0, %psr		! the manual claims that this
1223	wr	%l4, PSR_ET, %psr	! song and dance is necessary
1224	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
1225	mov	%l3, %o0		! trap type arg for kgdb_trap_glue
1226	rd	%y, %l3
1227	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
1228	rd	%wim, %l3
1229	st	%l3, [%sp + CCFSZ + 16]	! tf.tf_wim (a kgdb-only r/o field)
1230	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
1231	std	%g2, [%sp + CCFSZ + 24]	! etc
1232	std	%g4, [%sp + CCFSZ + 32]
1233	std	%g6, [%sp + CCFSZ + 40]
1234	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_in[0..1]
1235	std	%i2, [%sp + CCFSZ + 56]	! etc
1236	std	%i4, [%sp + CCFSZ + 64]
1237	std	%i6, [%sp + CCFSZ + 72]
1238
1239	/*
1240	 * Now call kgdb_trap_glue(); if it returns, call trap().
1241	 */
1242	mov	%o0, %l3		! gotta save trap type
1243	call	_kgdb_trap_glue		! kgdb_trap_glue(type, &trapframe)
1244	 add	%sp, CCFSZ, %o1		! (&trapframe)
1245
1246	/*
1247	 * Use slowtrap to call trap---but first erase our tracks
1248	 * (put the registers back the way they were).
1249	 */
1250	mov	%l3, %o0		! slowtrap will need trap type
1251	ld	[%sp + CCFSZ + 12], %l3
1252	wr	%l3, 0, %y
1253	ld	[%sp + CCFSZ + 20], %g1
1254	ldd	[%sp + CCFSZ + 24], %g2
1255	ldd	[%sp + CCFSZ + 32], %g4
1256	b	Lslowtrap_reenter
1257	 ldd	[%sp + CCFSZ + 40], %g6
1258
1259/*
1260 * Enter kernel breakpoint.  Write all the windows (not including the
1261 * current window) into the stack, so that backtrace works.  Copy the
1262 * supplied trap frame to the kgdb stack and switch stacks.
1263 *
1264 * kgdb_trap_glue(type, tf0)
1265 *	int type;
1266 *	struct trapframe *tf0;
1267 */
1268	.globl	_kgdb_trap_glue
1269_kgdb_trap_glue:
1270	save	%sp, -CCFSZ, %sp
1271
1272	call	_write_all_windows
1273	 mov	%sp, %l4		! %l4 = current %sp
1274
1275	/* copy trapframe to top of kgdb stack */
1276	set	_kgdb_stack + KGDB_STACK_SIZE - 80, %l0
1277					! %l0 = tfcopy -> end_of_kgdb_stack
1278	mov	80, %l1
12791:	ldd	[%i1], %l2
1280	inc	8, %i1
1281	deccc	8, %l1
1282	std	%l2, [%l0]
1283	bg	1b
1284	 inc	8, %l0
1285
1286#ifdef DEBUG
1287	/* save old red zone and then turn it off */
1288	sethi	%hi(_redzone), %l7
1289	ld	[%l7 + %lo(_redzone)], %l6
1290	st	%g0, [%l7 + %lo(_redzone)]
1291#endif
1292	/* switch to kgdb stack */
1293	add	%l0, -CCFSZ-80, %sp
1294
1295	/* if (kgdb_trap(type, tfcopy)) kgdb_rett(tfcopy); */
1296	mov	%i0, %o0
1297	call	_kgdb_trap
1298	add	%l0, -80, %o1
1299	tst	%o0
1300	bnz,a	kgdb_rett
1301	 add	%l0, -80, %g1
1302
1303	/*
1304	 * kgdb_trap() did not handle the trap at all so the stack is
1305	 * still intact.  A simple `restore' will put everything back,
1306	 * after we reset the stack pointer.
1307	 */
1308	mov	%l4, %sp
1309#ifdef DEBUG
1310	st	%l6, [%l7 + %lo(_redzone)]	! restore red zone
1311#endif
1312	ret
1313	restore
1314
1315/*
1316 * Return from kgdb trap.  This is sort of special.
1317 *
1318 * We know that kgdb_trap_glue wrote the window above it, so that we will
1319 * be able to (and are sure to have to) load it up.  We also know that we
1320 * came from kernel land and can assume that the %fp (%i6) we load here
1321 * is proper.  We must also be sure not to lower ipl (it is at splhigh())
1322 * until we have traps disabled, due to the SPARC taking traps at the
1323 * new ipl before noticing that PSR_ET has been turned off.  We are on
1324 * the kgdb stack, so this could be disastrous.
1325 *
1326 * Note that the trapframe argument in %g1 points into the current stack
1327 * frame (current window).  We abandon this window when we move %g1->tf_psr
1328 * into %psr, but we will not have loaded the new %sp yet, so again traps
1329 * must be disabled.
1330 */
1331kgdb_rett:
1332	rd	%psr, %g4		! turn off traps
1333	wr	%g4, PSR_ET, %psr
1334	/* use the three-instruction delay to do something useful */
1335	ld	[%g1], %g2		! pick up new %psr
1336	ld	[%g1 + 12], %g3		! set %y
1337	wr	%g3, 0, %y
1338#ifdef DEBUG
1339	st	%l6, [%l7 + %lo(_redzone)] ! and restore red zone
1340#endif
1341	wr	%g0, 0, %wim		! enable window changes
1342	nop; nop; nop
1343	/* now safe to set the new psr (changes CWP, leaves traps disabled) */
1344	wr	%g2, 0, %psr		! set rett psr (including cond codes)
1345	/* 3 instruction delay before we can use the new window */
1346/*1*/	ldd	[%g1 + 24], %g2		! set new %g2, %g3
1347/*2*/	ldd	[%g1 + 32], %g4		! set new %g4, %g5
1348/*3*/	ldd	[%g1 + 40], %g6		! set new %g6, %g7
1349
1350	/* now we can use the new window */
1351	mov	%g1, %l4
1352	ld	[%l4 + 4], %l1		! get new pc
1353	ld	[%l4 + 8], %l2		! get new npc
1354	ld	[%l4 + 20], %g1		! set new %g1
1355
1356	/* set up returnee's out registers, including its %sp */
1357	ldd	[%l4 + 48], %i0
1358	ldd	[%l4 + 56], %i2
1359	ldd	[%l4 + 64], %i4
1360	ldd	[%l4 + 72], %i6
1361
1362	/* load returnee's window, making the window above it be invalid */
1363	restore
1364	restore	%g0, 1, %l1		! move to inval window and set %l1 = 1
1365	rd	%psr, %l0
1366	sll	%l1, %l0, %l1
1367	wr	%l1, 0, %wim		! %wim = 1 << (%psr & 31)
1368	sethi	%hi(_cpcb), %l1
1369	ld	[%l1 + %lo(_cpcb)], %l1
1370	and	%l0, 31, %l0		! CWP = %psr & 31;
1371	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = CWP;
1372	save	%g0, %g0, %g0		! back to window to reload
1373	LOADWIN(%sp)
1374	save	%g0, %g0, %g0		! back to trap window
1375	/* note, we have not altered condition codes; safe to just rett */
1376	RETT
1377#endif
1378
1379/*
1380 * syscall() builds a trap frame and calls syscall().
1381 * sun_syscall is same but delivers sun system call number
1382 * XXX	should not have to save&reload ALL the registers just for
1383 *	ptrace...
1384 */
1385#ifdef COMPAT_SUNOS
1386sun_syscall:
1387	TRAP_SETUP(-CCFSZ-80)
1388	b	sys_merge
1389	 mov	1, %o3			! third arg to syscall: sun compat
1390syscall:
1391	TRAP_SETUP(-CCFSZ-80)
1392	clr	%o3			! third arg to syscall: native bsd
1393sys_merge:
1394#else
1395syscall:
1396	TRAP_SETUP(-CCFSZ-80)
1397#endif
1398	wr	%l0, PSR_ET, %psr
1399	std	%l0, [%sp + CCFSZ + 0]	! tf_psr, tf_pc
1400	rd	%y, %l3
1401	std	%l2, [%sp + CCFSZ + 8]	! tf_npc, tf_y
1402	st	%g1, [%sp + CCFSZ + 20]	! tf_g[1]
1403	std	%g2, [%sp + CCFSZ + 24]	! tf_g[2], tf_g[3]
1404	std	%g4, [%sp + CCFSZ + 32]	! etc
1405	std	%g6, [%sp + CCFSZ + 40]
1406	mov	%g1, %o0		! (code)
1407	std	%i0, [%sp + CCFSZ + 48]
1408	add	%sp, CCFSZ, %o1		! (&tf)
1409	std	%i2, [%sp + CCFSZ + 56]
1410	mov	%l1, %o2		! (pc)
1411	std	%i4, [%sp + CCFSZ + 64]
1412	call	_syscall		! syscall(code, &tf, pc, suncompat)
1413	 std	%i6, [%sp + CCFSZ + 72]
1414	! now load em all up again, sigh
1415	ldd	[%sp + CCFSZ + 0], %l0	! new %psr, new pc
1416	ldd	[%sp + CCFSZ + 8], %l2	! new npc, new %y
1417	wr	%l3, 0, %y
1418	ld	[%sp + CCFSZ + 20], %g1
1419	ldd	[%sp + CCFSZ + 24], %g2
1420	ldd	[%sp + CCFSZ + 32], %g4
1421	ldd	[%sp + CCFSZ + 40], %g6
1422	ldd	[%sp + CCFSZ + 48], %i0
1423	ldd	[%sp + CCFSZ + 56], %i2
1424	ldd	[%sp + CCFSZ + 64], %i4
1425	ldd	[%sp + CCFSZ + 72], %i6
1426	b	return_from_trap
1427	 wr	%l0, 0, %psr
1428
1429/*
1430 * Interrupts.  Software interrupts must be cleared from the software
1431 * interrupt enable register.  Rather than calling ienab_bic for each,
1432 * we do them in-line before enabling traps.
1433 *
1434 * After preliminary setup work, the interrupt is passed to each
1435 * registered handler in turn.  These are expected to return nonzero if
1436 * they took care of the interrupt.  If a handler claims the interrupt,
1437 * we exit (hardware interrupts are latched in the requestor so we'll
1438 * just take another interrupt in the unlikely event of simultaneous
1439 * interrupts from two different devices at the same level).  If we go
1440 * through all the registered handlers and no one claims it, we report a
1441 * stray interrupt.  This is more or less done as:
1442 *
1443 *	for (ih = intrhand[intlev]; ih; ih = ih->ih_next)
1444 *		if ((*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : &frame))
1445 *			return;
1446 *	strayintr(&frame);
1447 *
1448 * Software interrupts are almost the same with three exceptions:
1449 * (1) we clear the interrupt from the software interrupt enable
1450 *     register before calling any handler (we have to clear it first
1451 *     to avoid an interrupt-losing race),
1452 * (2) we always call all the registered handlers (there is no way
1453 *     to tell if the single bit in the software interrupt register
1454 *     represents one or many requests)
1455 * (3) we never announce a stray interrupt (because of (1), another
1456 *     interrupt request can come in while we're in the handler.  If
1457 *     the handler deal with everything for both the original & the
1458 *     new request, we'll erroneously report a stray interrupt when
1459 *     we take the software interrupt for the new request.
1460 *
1461 * Inputs:
1462 *	%l0 = %psr
1463 *	%l1 = return pc
1464 *	%l2 = return npc
1465 *	%l3 = interrupt level
1466 *	(software interrupt only) %l4 = bits to clear in interrupt register
1467 *
1468 * Internal:
1469 *	%l4, %l5: local variables
1470 *	%l6 = %y
1471 *	%l7 = %g1
1472 *	%g2..%g7 go to stack
1473 *
1474 * An interrupt frame is built in the space for a full trapframe;
1475 * this contains the psr, pc, npc, and interrupt level.
1476 */
1477	.comm	_intrhand, 15 * 8	! intrhand[0..14]; 0 => error
1478softintr:
1479	sethi	%hi(IE_reg_addr), %l6
1480	ldub	[%l6 + %lo(IE_reg_addr)], %l5
1481	andn	%l5, %l4, %l5
1482	stb	%l5, [%l6 + %lo(IE_reg_addr)]
1483	INTR_SETUP(-CCFSZ-80)
1484	std	%g2, [%sp + CCFSZ + 24]	! save registers
1485	INCR(_cnt+V_INTR)		! cnt.v_intr++; (clobbers %o0,%o1)
1486	mov	%g1, %l7
1487	rd	%y, %l6
1488	std	%g4, [%sp + CCFSZ + 32]
1489	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
1490	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
1491	std	%g6, [%sp + CCFSZ + 40]
1492	or	%l5, %l4, %l4		!			;
1493	wr	%l4, 0, %psr		! the manual claims this
1494	wr	%l4, PSR_ET, %psr	! song and dance is necessary
1495	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
1496	sll	%l3, 2, %l5
1497	set	_intrcnt, %l4		! intrcnt[intlev]++;
1498	ld	[%l4 + %l5], %o0
1499	std	%l2, [%sp + CCFSZ + 8]
1500	inc	%o0
1501	st	%o0, [%l4 + %l5]
1502	set	_intrhand, %l4		! %l4 = intrhand[intlev];
1503	ld	[%l4 + %l5], %l4
1504	b	3f
1505	 st	%fp, [%sp + CCFSZ + 16]
1506
15071:	ld	[%l4], %o1
1508	ld	[%l4 + 4], %o0
1509	tst	%o0
1510	bz,a	2f
1511	 add	%sp, CCFSZ, %o0
15122:	jmpl	%o1, %o7		!	(void)(*ih->ih_fun)(...)
1513	 ld	[%l4 + 8], %l4		!	and ih = ih->ih_next
15143:	tst	%l4			! while ih != NULL
1515	bnz	1b
1516	 nop
1517	mov	%l7, %g1
1518	wr	%l6, 0, %y
1519	ldd	[%sp + CCFSZ + 24], %g2
1520	ldd	[%sp + CCFSZ + 32], %g4
1521	ldd	[%sp + CCFSZ + 40], %g6
1522	b	return_from_trap
1523	 wr	%l0, 0, %psr
1524
1525	/*
1526	 * _sparc_interrupt is exported for paranoia checking (see intr.c).
1527	 */
1528	.globl	_sparc_interrupt
1529_sparc_interrupt:
1530	INTR_SETUP(-CCFSZ-80)
1531	std	%g2, [%sp + CCFSZ + 24]	! save registers
1532	INCR(_cnt+V_INTR)		! cnt.v_intr++; (clobbers %o0,%o1)
1533	mov	%g1, %l7
1534	rd	%y, %l6
1535	std	%g4, [%sp + CCFSZ + 32]
1536	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
1537	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
1538	std	%g6, [%sp + CCFSZ + 40]
1539	or	%l5, %l4, %l4		!			;
1540	wr	%l4, 0, %psr		! the manual claims this
1541	wr	%l4, PSR_ET, %psr	! song and dance is necessary
1542	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
1543	sll	%l3, 2, %l5
1544	set	_intrcnt, %l4		! intrcnt[intlev]++;
1545	ld	[%l4 + %l5], %o0
1546	std	%l2, [%sp + CCFSZ + 8]	! set up intrframe/clockframe
1547	inc	%o0
1548	st	%o0, [%l4 + %l5]
1549	set	_intrhand, %l4		! %l4 = intrhand[intlev];
1550	ld	[%l4 + %l5], %l4
1551	b	3f
1552	 st	%fp, [%sp + CCFSZ + 16]
1553
15541:	ld	[%l4], %o1
1555	ld	[%l4 + 4], %o0
1556	tst	%o0
1557	bz,a	2f
1558	 add	%sp, CCFSZ, %o0
15592:	jmpl	%o1, %o7		!	handled = (*ih->ih_fun)(...)
1560	 ld	[%l4 + 8], %l4		!	and ih = ih->ih_next
1561	tst	%o0
1562	bnz	4f			! if (handled) break
1563	 nop
15643:	tst	%l4
1565	bnz	1b			! while (ih)
1566	 nop
1567	call	_strayintr		!	strayintr(&intrframe)
1568	 add	%sp, CCFSZ, %o0
1569	/* all done: restore registers and go return */
15704:	mov	%l7, %g1
1571	wr	%l6, 0, %y
1572	ldd	[%sp + CCFSZ + 24], %g2
1573	ldd	[%sp + CCFSZ + 32], %g4
1574	ldd	[%sp + CCFSZ + 40], %g6
1575	b	return_from_trap
1576	 wr	%l0, 0, %psr
1577
1578#ifdef notyet
1579/*
1580 * Level 12 (ZS serial) interrupt.  Handle it quickly, schedule a
1581 * software interrupt, and get out.  Do the software interrupt directly
1582 * if we would just take it on the way out.
1583 *
1584 * Input:
1585 *	%l0 = %psr
1586 *	%l1 = return pc
1587 *	%l2 = return npc
1588 * Internal:
1589 *	%l3 = zs device
1590 *	%l4, %l5 = temporary
1591 *	%l6 = rr3 (or temporary data) + 0x100 => need soft int
1592 *	%l7 = zs soft status
1593 */
1594zshard:
1595#endif /* notyet */
1596
1597/*
1598 * Level 15 interrupt.  An async memory error has occurred;
1599 * take care of it (typically by panicking, but hey...).
1600 *	%l0 = %psr
1601 *	%l1 = return pc
1602 *	%l2 = return npc
1603 *	%l3 = 15 * 4 (why? just because!)
1604 *
1605 * Internal:
1606 *	%l4 = %y
1607 *	%l5 = %g1
1608 *	%l6 = %g6
1609 *	%l7 = %g7
1610 *  g2, g3, g4, g5 go to stack
1611 *
1612 * This code is almost the same as that in mem_access_fault,
1613 * except that we already know the problem is not a `normal' fault,
1614 * and that we must be extra-careful with interrupt enables.
1615 */
1616nmi:
1617	INTR_SETUP(-CCFSZ-80)
1618	INCR(_cnt+V_INTR)		! cnt.v_intr++; (clobbers %o0,%o1)
1619	/*
1620	 * Level 15 interrupts are nonmaskable, so with traps off,
1621	 * disable all interrupts to prevent recursion.
1622	 */
1623	sethi	%hi(IE_reg_addr), %o0
1624	ldub	[%o0 + %lo(IE_reg_addr)], %o1
1625	andn	%o1, IE_ALLIE, %o1
1626	stb	%o1, [%o0 + %lo(IE_reg_addr)]
1627	wr	%l0, PSR_ET, %psr	! okay, turn traps on again
1628
1629	std	%g2, [%sp + CCFSZ + 0]	! save g2, g3
1630	rd	%y, %l4			! save y
1631
1632	! must read the sync error register too.
1633	set	AC_SYNC_ERR, %o0
1634	lda	[%o0] ASI_CONTROL, %o1	! sync err reg
1635	inc	4, %o0
1636	lda	[%o0] ASI_CONTROL, %o2	! sync virt addr
1637	std	%g4, [%sp + CCFSZ + 8]	! save g4,g5
1638	mov	%g1, %l5		! save g1,g6,g7
1639	mov	%g6, %l6
1640	mov	%g7, %l7
1641	inc	4, %o0
1642	lda	[%o0] ASI_CONTROL, %o3	! async err reg
1643	inc	4, %o0
1644	lda	[%o0] ASI_CONTROL, %o4	! async virt addr
1645
1646	! and call C code
1647	call	_memerr			! memerr(0, ser, sva, aer, ava)
1648	clr	%o0
1649
1650	mov	%l5, %g1		! restore g1 through g7
1651	ldd	[%sp + CCFSZ + 0], %g2
1652	ldd	[%sp + CCFSZ + 8], %g4
1653	wr	%l0, 0, %psr		! re-disable traps
1654	mov	%l6, %g6
1655	mov	%l7, %g7
1656
1657	! set IE_ALLIE again (safe, we disabled traps again above)
1658	sethi	%hi(IE_reg_addr), %o0
1659	ldub	[%o0 + %lo(IE_reg_addr)], %o1
1660	or	%o1, IE_ALLIE, %o1
1661	stb	%o1, [%o0 + %lo(IE_reg_addr)]
1662	b	return_from_trap
1663	 wr	%l4, 0, %y		! restore y
1664
1665
1666/*
1667 * Window overflow trap handler.
1668 *	%l0 = %psr
1669 *	%l1 = return pc
1670 *	%l2 = return npc
1671 */
1672window_of:
1673#ifdef TRIVIAL_WINDOW_OVERFLOW_HANDLER
1674	/* a trivial version that assumes %sp is ok */
1675	/* (for testing only!) */
1676	save	%g0, %g0, %g0
1677	std	%l0, [%sp + (0*8)]
1678	rd	%psr, %l0
1679	mov	1, %l1
1680	sll	%l1, %l0, %l0
1681	wr	%l0, 0, %wim
1682	std	%l2, [%sp + (1*8)]
1683	std	%l4, [%sp + (2*8)]
1684	std	%l6, [%sp + (3*8)]
1685	std	%i0, [%sp + (4*8)]
1686	std	%i2, [%sp + (5*8)]
1687	std	%i4, [%sp + (6*8)]
1688	std	%i6, [%sp + (7*8)]
1689	restore
1690	RETT
1691#else
1692	/*
1693	 * This is similar to TRAP_SETUP, but we do not want to spend
1694	 * a lot of time, so we have separate paths for kernel and user.
1695	 * We also know for sure that the window has overflowed.
1696	 */
1697	btst	PSR_PS, %l0
1698	bz	winof_user
1699	 sethi	%hi(clean_trap_window), %l7
1700
1701	/*
1702	 * Overflow from kernel mode.  Call clean_trap_window to
1703	 * do the dirty work, then just return, since we know prev
1704	 * window is valid.  clean_trap_windows might dump all *user*
1705	 * windows into the pcb, but we do not care: there is at
1706	 * least one kernel window (a trap or interrupt frame!)
1707	 * above us.
1708	 */
1709	jmpl	%l7 + %lo(clean_trap_window), %l4
1710	 mov	%g7, %l7		! for clean_trap_window
1711
1712	wr	%l0, 0, %psr		! put back the @%*! cond. codes
1713	nop				! (let them settle in)
1714	RETT
1715
1716winof_user:
1717	/*
1718	 * Overflow from user mode.
1719	 * If clean_trap_window dumps the registers into the pcb,
1720	 * rft_user will need to call trap(), so we need space for
1721	 * a trap frame.  We also have to compute pcb_nw.
1722	 *
1723	 * SHOULD EXPAND IN LINE TO AVOID BUILDING TRAP FRAME ON
1724	 * `EASY' SAVES
1725	 */
1726	sethi	%hi(_cpcb), %l6
1727	ld	[%l6 + %lo(_cpcb)], %l6
1728	ld	[%l6 + PCB_WIM], %l5
1729	and	%l0, 31, %l3
1730	sub	%l3, %l5, %l5 		/* l5 = CWP - pcb_wim */
1731	set	uwtab, %l4
1732	ldub	[%l4 + %l5], %l5	/* l5 = uwtab[l5] */
1733	st	%l5, [%l6 + PCB_UW]
1734	jmpl	%l7 + %lo(clean_trap_window), %l4
1735	 mov	%g7, %l7		! for clean_trap_window
1736	sethi	%hi(_cpcb), %l6
1737	ld	[%l6 + %lo(_cpcb)], %l6
1738	set	UPAGES*NBPG-CCFSZ-80, %l5
1739	add	%l6, %l5, %sp		/* over to kernel stack */
1740	CHECK_SP_REDZONE(%l6, %l5)
1741
1742	/*
1743	 * Copy return_from_trap far enough to allow us
1744	 * to jump directly to rft_user_or_recover_pcb_windows
1745	 * (since we know that is where we are headed).
1746	 */
1747!	and	%l0, 31, %l3		! still set (clean_trap_window
1748					! leaves this register alone)
1749	set	wmask, %l6
1750	ldub	[%l6 + %l3], %l5	! %l5 = 1 << ((CWP + 1) % nwindows)
1751	b	rft_user_or_recover_pcb_windows
1752	 rd	%wim, %l4		! (read %wim first)
1753#endif /* end `real' version of window overflow trap handler */
1754
1755/*
1756 * Window underflow trap handler.
1757 *	%l0 = %psr
1758 *	%l1 = return pc
1759 *	%l2 = return npc
1760 *
1761 * A picture:
1762 *
1763 *	  T R I X
1764 *	0 0 0 1 0 0 0	(%wim)
1765 * [bit numbers increase towards the right;
1766 * `restore' moves right & `save' moves left]
1767 *
1768 * T is the current (Trap) window, R is the window that attempted
1769 * a `Restore' instruction, I is the Invalid window, and X is the
1770 * window we want to make invalid before we return.
1771 *
1772 * Since window R is valid, we cannot use rft_user to restore stuff
1773 * for us.  We have to duplicate its logic.  YUCK.
1774 *
1775 * Incidentally, TRIX are for kids.  Silly rabbit!
1776 */
1777window_uf:
1778#ifdef TRIVIAL_WINDOW_UNDERFLOW_HANDLER
1779	wr	%g0, 0, %wim		! allow us to enter I
1780	restore				! to R
1781	nop
1782	nop
1783	restore				! to I
1784	restore	%g0, 1, %l1		! to X
1785	rd	%psr, %l0
1786	sll	%l1, %l0, %l0
1787	wr	%l0, 0, %wim
1788	save	%g0, %g0, %g0		! back to I
1789	LOADWIN(%sp)
1790	save	%g0, %g0, %g0		! back to R
1791	save	%g0, %g0, %g0		! back to T
1792	RETT
1793#else
1794	wr	%g0, 0, %wim		! allow us to enter I
1795	btst	PSR_PS, %l0
1796	restore				! enter window R
1797	bz	winuf_user
1798	 restore			! enter window I
1799
1800	/*
1801	 * Underflow from kernel mode.  Just recover the
1802	 * registers and go (except that we have to update
1803	 * the blasted user pcb fields).
1804	 */
1805	restore	%g0, 1, %l1		! enter window X, then set %l1 to 1
1806	rd	%psr, %l0		! cwp = %psr & 31;
1807	and	%l0, 31, %l0
1808	sll	%l1, %l0, %l1		! wim = 1 << cwp;
1809	wr	%l1, 0, %wim		! setwim(wim);
1810	sethi	%hi(_cpcb), %l1
1811	ld	[%l1 + %lo(_cpcb)], %l1
1812	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = cwp;
1813	save	%g0, %g0, %g0		! back to window I
1814	LOADWIN(%sp)
1815	save	%g0, %g0, %g0		! back to R
1816	save	%g0, %g0, %g0		! and then to T
1817	wr	%l0, 0, %psr		! fix those cond codes....
1818	nop				! (let them settle in)
1819	RETT
1820
1821winuf_user:
1822	/*
1823	 * Underflow from user mode.
1824	 *
1825	 * We cannot use rft_user (as noted above) because
1826	 * we must re-execute the `restore' instruction.
1827	 * Since it could be, e.g., `restore %l0,0,%l0',
1828	 * it is not okay to touch R's registers either.
1829	 *
1830	 * We are now in window I.
1831	 */
1832	btst	7, %sp			! if unaligned, it is invalid
1833	bne	winuf_invalid
1834	 EMPTY
1835
1836	PTE_OF_ADDR(%sp, %l7, winuf_invalid)
1837	CMP_PTE_USER_READ(%l7)		! if first page not readable,
1838	bne	winuf_invalid		! it is invalid
1839	 EMPTY
1840	SLT_IF_1PAGE_RW(%sp, %l7)	! first page is readable
1841	bl,a	winuf_ok		! if only one page, enter window X
1842	 restore %g0, 1, %l1		! and goto ok, & set %l1 to 1
1843	add	%sp, 7*8, %l5
1844	PTE_OF_ADDR(%l5, %l7, winuf_invalid)
1845	CMP_PTE_USER_READ(%l7)		! check second page too
1846	be,a	winuf_ok		! enter window X and goto ok
1847	 restore %g0, 1, %l1		! (and then set %l1 to 1)
1848
1849winuf_invalid:
1850	/*
1851	 * We were unable to restore the window because %sp
1852	 * is invalid or paged out.  Return to the trap window
1853	 * and call trap(T_WINUF).  This will save R to the user
1854	 * stack, then load both R and I into the pcb rw[] area,
1855	 * and return with pcb_nsaved set to -1 for success, 0 for
1856	 * failure.  `Failure' indicates that someone goofed with the
1857	 * trap registers (e.g., signals), so that we need to return
1858	 * from the trap as from a syscall (probably to a signal handler)
1859	 * and let it retry the restore instruction later.  Note that
1860	 * window R will have been pushed out to user space, and thus
1861	 * be the invalid window, by the time we get back here.  (We
1862	 * continue to label it R anyway.)  We must also set %wim again,
1863	 * and set pcb_uw to 1, before enabling traps.  (Window R is the
1864	 * only window, and it is a user window).
1865	 */
1866	save	%g0, %g0, %g0		! back to R
1867#if 0		/* this gives `as' mild heartburn */
1868	save	%g0, 1, %l4		! back to T, then %l4 = 1
1869#else
1870	save	%g0, %g0, %g0		! back to T
1871	mov	1, %l4			! and set %l4 = 1
1872#endif
1873	sethi	%hi(_cpcb), %l6
1874	ld	[%l6 + %lo(_cpcb)], %l6
1875	st	%l4, [%l6 + PCB_UW]	! pcb_uw = 1
1876	ld	[%l6 + PCB_WIM], %l5	! get log2(%wim)
1877	sll	%l4, %l5, %l4		! %l4 = old %wim
1878	wr	%l4, 0, %wim		! window I is now invalid again
1879	set	UPAGES*NBPG-CCFSZ-80, %l5
1880	add	%l6, %l5, %sp		! get onto kernel stack
1881	CHECK_SP_REDZONE(%l6, %l5)
1882
1883	/*
1884	 * Okay, call trap(T_WINUF, psr, pc, &tf).
1885	 * See `slowtrap' above for operation.
1886	 */
1887	wr	%l0, PSR_ET, %psr
1888	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
1889	rd	%y, %l3
1890	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
1891	mov	T_WINUF, %o0
1892	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
1893	mov	%l0, %o1
1894	std	%g2, [%sp + CCFSZ + 24]	! etc
1895	mov	%l1, %o2
1896	std	%g4, [%sp + CCFSZ + 32]
1897	add	%sp, CCFSZ, %o3
1898	std	%g6, [%sp + CCFSZ + 40]
1899	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
1900	std	%i2, [%sp + CCFSZ + 56]
1901	std	%i4, [%sp + CCFSZ + 64]
1902	call	_trap			! trap(T_WINUF, pc, psr, &tf)
1903	 std	%i6, [%sp + CCFSZ + 72]	! tf.tf_out[6]
1904
1905	ldd	[%sp + CCFSZ + 0], %l0	! new psr, pc
1906	ldd	[%sp + CCFSZ + 8], %l2	! new npc, %y
1907	wr	%l3, 0, %y
1908	ld	[%sp + CCFSZ + 20], %g1
1909	ldd	[%sp + CCFSZ + 24], %g2
1910	ldd	[%sp + CCFSZ + 32], %g4
1911	ldd	[%sp + CCFSZ + 40], %g6
1912	ldd	[%sp + CCFSZ + 48], %i0	! %o0 for window R, etc
1913	ldd	[%sp + CCFSZ + 56], %i2
1914	ldd	[%sp + CCFSZ + 64], %i4
1915	wr	%l0, 0, %psr		! disable traps: test must be atomic
1916	ldd	[%sp + CCFSZ + 72], %i6
1917	sethi	%hi(_cpcb), %l6
1918	ld	[%l6 + %lo(_cpcb)], %l6
1919	ld	[%l6 + PCB_NSAVED], %l7	! if nsaved is -1, we have our regs
1920	tst	%l7
1921	bl,a	1f			! got them
1922	 wr	%g0, 0, %wim		! allow us to enter windows R, I
1923	b,a	return_from_trap
1924
1925	/*
1926	 * Got 'em.  Load 'em up.
1927	 */
19281:
1929	mov	%g6, %l3		! save %g6; set %g6 = cpcb
1930	mov	%l6, %g6
1931	st	%g0, [%g6 + PCB_NSAVED]	! and clear magic flag
1932	restore				! from T to R
1933	restore				! from R to I
1934	restore	%g0, 1, %l1		! from I to X, then %l1 = 1
1935	rd	%psr, %l0		! cwp = %psr;
1936	sll	%l1, %l0, %l1
1937	wr	%l1, 0, %wim		! make window X invalid
1938	and	%l0, 31, %l0
1939	st	%l0, [%g6 + PCB_WIM]	! cpcb->pcb_wim = cwp;
1940	nop				! unnecessary? old wim was 0...
1941	save	%g0, %g0, %g0		! back to I
1942	LOADWIN(%g6 + PCB_RW + 64)	! load from rw[1]
1943	save	%g0, %g0, %g0		! back to R
1944	LOADWIN(%g6 + PCB_RW)		! load from rw[0]
1945	save	%g0, %g0, %g0		! back to T
1946	wr	%l0, 0, %psr		! restore condition codes
1947	mov	%l3, %g6		! fix %g6
1948	RETT
1949
1950	/*
1951	 * Restoring from user stack, but everything has checked out
1952	 * as good.  We are now in window X, and %l1 = 1.  Window R
1953	 * is still valid and holds user values.
1954	 */
1955winuf_ok:
1956	rd	%psr, %l0
1957	sll	%l1, %l0, %l1
1958	wr	%l1, 0, %wim		! make this one invalid
1959	sethi	%hi(_cpcb), %l2
1960	ld	[%l2 + %lo(_cpcb)], %l2
1961	and	%l0, 31, %l0
1962	st	%l0, [%l2 + PCB_WIM]	! cpcb->pcb_wim = cwp;
1963	save	%g0, %g0, %g0		! back to I
1964	LOADWIN(%sp)
1965	save	%g0, %g0, %g0		! back to R
1966	save	%g0, %g0, %g0		! back to T
1967	wr	%l0, 0, %psr		! restore condition codes
1968	nop				! it takes three to tangle
1969	RETT
1970#endif /* end `real' version of window underflow trap handler */
1971
1972/*
1973 * Various return-from-trap routines (see return_from_trap).
1974 */
1975
1976/*
1977 * Return from trap, to kernel.
1978 *	%l0 = %psr
1979 *	%l1 = return pc
1980 *	%l2 = return npc
1981 *	%l4 = %wim
1982 *	%l5 = bit for previous window
1983 */
1984rft_kernel:
1985	btst	%l5, %l4		! if (wim & l5)
1986	bnz	1f			!	goto reload;
1987	 wr	%l0, 0, %psr		! but first put !@#*% cond codes back
1988
1989	/* previous window is valid; just rett */
1990	nop				! wait for cond codes to settle in
1991	RETT
1992
1993	/*
1994	 * Previous window is invalid.
1995	 * Update %wim and then reload l0..i7 from frame.
1996	 *
1997	 *	  T I X
1998	 *	0 0 1 0 0   (%wim)
1999	 * [see picture in window_uf handler]
2000	 *
2001	 * T is the current (Trap) window, I is the Invalid window,
2002	 * and X is the window we want to make invalid.  Window X
2003	 * currently has no useful values.
2004	 */
20051:
2006	wr	%g0, 0, %wim		! allow us to enter window I
2007	nop; nop; nop			! (it takes a while)
2008	restore				! enter window I
2009	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
2010	rd	%psr, %l0		! CWP = %psr & 31;
2011	and	%l0, 31, %l0
2012	sll	%l1, %l0, %l1		! wim = 1 << CWP;
2013	wr	%l1, 0, %wim		! setwim(wim);
2014	sethi	%hi(_cpcb), %l1
2015	ld	[%l1 + %lo(_cpcb)], %l1
2016	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = l0 & 31;
2017	save	%g0, %g0, %g0		! back to window I
2018	LOADWIN(%sp)
2019	save	%g0, %g0, %g0		! back to window T
2020	/*
2021	 * Note that the condition codes are still set from
2022	 * the code at rft_kernel; we can simply return.
2023	 */
2024	RETT
2025
2026/*
2027 * Return from trap, to user.  Checks for scheduling trap (`ast') first;
2028 * will re-enter trap() if set.  Note that we may have to switch from
2029 * the interrupt stack to the kernel stack in this case.
2030 *	%l0 = %psr
2031 *	%l1 = return pc
2032 *	%l2 = return npc
2033 *	%l4 = %wim
2034 *	%l5 = bit for previous window
2035 *	%l6 = cpcb
2036 * If returning to a valid window, just set psr and return.
2037 */
2038rft_user:
2039!	sethi	%hi(_want_ast), %l7	! (done below)
2040	ld	[%l7 + %lo(_want_ast)], %l7
2041	tst	%l7			! want AST trap?
2042	bne,a	softtrap		! yes, re-enter trap with type T_AST
2043	 mov	T_AST, %o0
2044
2045	btst	%l5, %l4		! if (wim & l5)
2046	bnz	1f			!	goto reload;
2047	 wr	%l0, 0, %psr		! restore cond codes
2048	nop				! (three instruction delay)
2049	RETT
2050
2051	/*
2052	 * Previous window is invalid.
2053	 * Before we try to load it, we must verify its stack pointer.
2054	 * This is much like the underflow handler, but a bit easier
2055	 * since we can use our own local registers.
2056	 */
20571:
2058	btst	7, %fp			! if unaligned, address is invalid
2059	bne	rft_invalid
2060	 EMPTY
2061
2062	PTE_OF_ADDR(%fp, %l7, rft_invalid)
2063	CMP_PTE_USER_READ(%l7)		! try first page
2064	bne	rft_invalid		! no good
2065	 EMPTY
2066	SLT_IF_1PAGE_RW(%fp, %l7)
2067	bl,a	rft_user_ok		! only 1 page: ok
2068	 wr	%g0, 0, %wim
2069	add	%fp, 7*8, %l5
2070	PTE_OF_ADDR(%l5, %l7, rft_invalid)
2071	CMP_PTE_USER_READ(%l7)		! check 2nd page too
2072	be,a	rft_user_ok
2073	 wr	%g0, 0, %wim
2074
2075	/*
2076	 * The window we wanted to pull could not be pulled.  Instead,
2077	 * re-enter trap with type T_RWRET.  This will pull the window
2078	 * into cpcb->pcb_rw[0] and set cpcb->pcb_nsaved to -1, which we
2079	 * will detect when we try to return again.
2080	 */
2081rft_invalid:
2082	b	softtrap
2083	 mov	T_RWRET, %o0
2084
2085	/*
2086	 * The window we want to pull can be pulled directly.
2087	 */
2088rft_user_ok:
2089!	wr	%g0, 0, %wim		! allow us to get into it
2090	wr	%l0, 0, %psr		! fix up the cond codes now
2091	nop; nop; nop
2092	restore				! enter window I
2093	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
2094	rd	%psr, %l0		! l0 = (junk << 5) + CWP;
2095	sll	%l1, %l0, %l1		! %wim = 1 << CWP;
2096	wr	%l1, 0, %wim
2097	sethi	%hi(_cpcb), %l1
2098	ld	[%l1 + %lo(_cpcb)], %l1
2099	and	%l0, 31, %l0
2100	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = l0 & 31;
2101	save	%g0, %g0, %g0		! back to window I
2102	LOADWIN(%sp)			! suck hard
2103	save	%g0, %g0, %g0		! back to window T
2104	RETT
2105
2106/*
2107 * Return from trap.  Entered after a
2108 *	wr	%l0, 0, %psr
2109 * which disables traps so that we can rett; registers are:
2110 *
2111 *	%l0 = %psr
2112 *	%l1 = return pc
2113 *	%l2 = return npc
2114 *
2115 * (%l3..%l7 anything).
2116 *
2117 * If we are returning to user code, we must:
2118 *  1.  Check for register windows in the pcb that belong on the stack.
2119 *	If there are any, reenter trap with type T_WINOF.
2120 *  2.  Make sure the register windows will not underflow.  This is
2121 *	much easier in kernel mode....
2122 */
2123return_from_trap:
2124!	wr	%l0, 0, %psr		! disable traps so we can rett
2125! (someone else did this already)
2126	and	%l0, 31, %l5
2127	set	wmask, %l6
2128	ldub	[%l6 + %l5], %l5	! %l5 = 1 << ((CWP + 1) % nwindows)
2129	btst	PSR_PS, %l0		! returning to userland?
2130	bnz	rft_kernel		! no, go return to kernel
2131	 rd	%wim, %l4		! (read %wim in any case)
2132
2133rft_user_or_recover_pcb_windows:
2134	/*
2135	 * (entered with %l4=%wim, %l5=wmask[cwp]; %l0..%l2 as usual)
2136	 *
2137	 * check cpcb->pcb_nsaved:
2138	 * if 0, do a `normal' return to user (see rft_user);
2139	 * if > 0, cpcb->pcb_rw[] holds registers to be copied to stack;
2140	 * if -1, cpcb->pcb_rw[0] holds user registers for rett window
2141	 * from an earlier T_RWRET pseudo-trap.
2142	 */
2143	sethi	%hi(_cpcb), %l6
2144	ld	[%l6 + %lo(_cpcb)], %l6
2145	ld	[%l6 + PCB_NSAVED], %l7
2146	tst	%l7
2147	bz,a	rft_user
2148	 sethi	%hi(_want_ast), %l7	! first instr of rft_user
2149
2150	bg,a	softtrap		! if (pcb_nsaved > 0)
2151	 mov	T_WINOF, %o0		!	trap(T_WINOF);
2152
2153	/*
2154	 * To get here, we must have tried to return from a previous
2155	 * trap and discovered that it would cause a window underflow.
2156	 * We then must have tried to pull the registers out of the
2157	 * user stack (from the address in %fp==%i6) and discovered
2158	 * that it was either unaligned or not loaded in memory, and
2159	 * therefore we ran a trap(T_RWRET), which loaded one set of
2160	 * registers into cpcb->pcb_pcb_rw[0] (if it had killed the
2161	 * process due to a bad stack, we would not be here).
2162	 *
2163	 * We want to load pcb_rw[0] into the previous window, which
2164	 * we know is currently invalid.  In other words, we want
2165	 * %wim to be 1 << ((cwp + 2) % nwindows).
2166	 */
2167	wr	%g0, 0, %wim		! enable restores
2168	mov	%g6, %l3		! save g6 in l3
2169	mov	%l6, %g6		! set g6 = &u
2170	st	%g0, [%g6 + PCB_NSAVED]	! clear cpcb->pcb_nsaved
2171	restore				! enter window I
2172	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
2173	rd	%psr, %l0
2174	sll	%l1, %l0, %l1		! %wim = 1 << CWP;
2175	wr	%l1, 0, %wim
2176	and	%l0, 31, %l0
2177	st	%l0, [%g6 + PCB_WIM]	! cpcb->pcb_wim = CWP;
2178	nop				! unnecessary? old wim was 0...
2179	save	%g0, %g0, %g0		! back to window I
2180	LOADWIN(%g6 + PCB_RW)
2181	save	%g0, %g0, %g0		! back to window T (trap window)
2182	wr	%l0, 0, %psr		! cond codes, cond codes everywhere
2183	mov	%l3, %g6		! restore g6
2184	RETT
2185
2186! exported end marker for kernel gdb
2187	.globl	_endtrapcode
2188_endtrapcode:
2189
2190/*
2191 * init_tables(nwin) int nwin;
2192 *
2193 * Set up the uwtab and wmask tables.
2194 * We know nwin > 1.
2195 */
2196init_tables:
2197	/*
2198	 * for (i = -nwin, j = nwin - 2; ++i < 0; j--)
2199	 *	uwtab[i] = j;
2200	 * (loop runs at least once)
2201	 */
2202	set	uwtab, %o3
2203	sub	%g0, %o0, %o1		! i = -nwin + 1
2204	inc	%o1
2205	add	%o0, -2, %o2		! j = nwin - 2;
22060:
2207	stb	%o2, [%o3 + %o1]	! uwtab[i] = j;
22081:
2209	inccc	%o1			! ++i < 0?
2210	bl	0b			! yes, continue loop
2211	 dec	%o2			! in any case, j--
2212
2213	/*
2214	 * (i now equals 0)
2215	 * for (j = nwin - 1; i < nwin; i++, j--)
2216	 *	uwtab[i] = j;
2217	 * (loop runs at least twice)
2218	 */
2219	sub	%o0, 1, %o2		! j = nwin - 1
22200:
2221	stb	%o2, [%o3 + %o1]	! uwtab[i] = j
2222	inc	%o1			! i++
22231:
2224	cmp	%o1, %o0		! i < nwin?
2225	bl	0b			! yes, continue
2226	 dec	%o2			! in any case, j--
2227
2228	/*
2229	 * We observe that, for i in 0..nwin-2, (i+1)%nwin == i+1;
2230	 * for i==nwin-1, (i+1)%nwin == 0.
2231	 * To avoid adding 1, we run i from 1 to nwin and set
2232	 * wmask[i-1].
2233	 *
2234	 * for (i = j = 1; i < nwin; i++) {
2235	 *	j <<= 1;	(j now == 1 << i)
2236	 *	wmask[i - 1] = j;
2237	 * }
2238	 * (loop runs at least once)
2239	 */
2240	set	wmask - 1, %o3
2241	mov	1, %o1			! i = 1;
2242	mov	2, %o2			! j = 2;
22430:
2244	stb	%o2, [%o3 + %o1]	! (wmask - 1)[i] = j;
2245	inc	%o1			! i++
2246	cmp	%o1, %o0		! i < nwin?
2247	bl,a	0b			! yes, continue
2248	 sll	%o2, 1, %o2		! (and j <<= 1)
2249
2250	/*
2251	 * Now i==nwin, so we want wmask[i-1] = 1.
2252	 */
2253	mov	1, %o2			! j = 1;
2254	retl
2255	 stb	%o2, [%o3 + %o1]	! (wmask - 1)[i] = j;
2256
2257
2258dostart:
2259	/*
2260	 * Startup.
2261	 *
2262	 * We have been loaded in low RAM, at some address which
2263	 * is page aligned (0x4000 actually) rather than where we
2264	 * want to run (0xf8004000).  Until we get everything set,
2265	 * we have to be sure to use only pc-relative addressing.
2266	 */
2267	wr	%g0, 0, %wim		! make sure we can set psr
2268	mov	%o0, %g7		! save prom vector pointer
2269	nop; nop
2270	wr	%g0, PSR_S|PSR_PS|PSR_PIL, %psr	! set initial psr
2271	set	AC_CONTEXT, %g1		! paranoia: set context to kernel
2272	stba	%g0, [%g1] ASI_CONTROL
2273	wr	%g0, 2, %wim		! set initial %wim (w1 invalid)
2274	mov	1, %g1			! set pcb_wim (log2(%wim) = 1)
2275	sethi	%hi(_u0 + PCB_WIM), %g2
2276	st	%g1, [%g2 + %lo(_u0 + PCB_WIM)]
2277
2278	/*
2279	 * Step 1: double map low RAM (addresses [0.._end-start-1])
2280	 * to KERNBASE (addresses [KERNBASE.._end-1]).  None of these
2281	 * are `bad' aliases (since they are all on segment boundaries)
2282	 * so we do not have to worry about cache aliasing.
2283	 *
2284	 * We map in another couple of segments just to have some
2285	 * more memory (512K, actually) guaranteed available for
2286	 * bootstrap code (pmap_bootstrap needs memory to hold MMU
2287	 * and context data structures).
2288	 */
2289	clr	%l0			! lowva
2290	set	KERNBASE, %l1		! highva
2291	set	_end + (2 << 18), %l2	! last va that must be remapped
2292	set	1 << 18, %l3		! segment size in bytes
22930:
2294	lduba	[%l0] ASI_SEGMAP, %l4	! segmap[highva] = segmap[lowva];
2295	stba	%l4, [%l1] ASI_SEGMAP
2296	add	%l3, %l1, %l1		! highva += segsiz;
2297	cmp	%l1, %l2		! done?
2298	bl	0b			! no, loop
2299	 add	%l3, %l0, %l0		! (and lowva += segsz)
2300
2301	/*
2302	 * Now map the interrupt enable register and clear any interrupts,
2303	 * enabling NMIs.  Note that we will not take NMIs until we change
2304	 * %tbr.
2305	 */
2306	set	IE_reg_addr, %l0
2307	set	IE_REG_PTE, %l1
2308	sta	%l1, [%l0] ASI_PTE
2309	mov	IE_ALLIE, %l1
2310	nop; nop			! paranoia
2311	stb	%l1, [%l0]
2312
2313	/*
2314	 * All set, fix pc and npc.  Once we are where we should be,
2315	 * we can give ourselves a stack and enable traps.
2316	 */
2317	set	1f, %l0
2318	jmp	%l0
2319	 nop
23201:
2321	set	USRSTACK - CCFSZ, %fp	! as if called from user code
2322	set	estack0 - CCFSZ - 80, %sp ! via syscall(boot_me_up) or somesuch
2323	rd	%psr, %l0
2324	wr	%l0, PSR_ET, %psr
2325
2326	/*
2327	 * Step 2: clear BSS.  This may just be paranoia; the boot
2328	 * loader might already do it for us; but what the hell.
2329	 */
2330	set	_edata, %o0		! bzero(edata, end - edata)
2331	set	_end, %o1
2332	call	_bzero
2333	 sub	%o1, %o0, %o1
2334
2335	/*
2336	 * Stash prom vectors now, after bzero, as it lives in bss
2337	 * (which we just zeroed).
2338	 * This depends on the fact that bzero does not use %g7.
2339	 */
2340	sethi	%hi(_promvec), %l0
2341	st	%g7, [%l0 + %lo(_promvec)]
2342
2343	/*
2344	 * Step 3: compute number of windows and set up tables.
2345	 * We could do some of this later.
2346	 */
2347	save	%sp, -64, %sp
2348	rd	%psr, %g1
2349	restore
2350	and	%g1, 31, %g1		! want just the CWP bits
2351	add	%g1, 1, %o0		! compute nwindows
2352	sethi	%hi(_nwindows), %o1	! may as well tell everyone
2353	call	init_tables
2354	 st	%o0, [%o1 + %lo(_nwindows)]
2355
2356	/*
2357	 * Step 4: change the trap base register, now that our trap handlers
2358	 * will function (they need the tables we just set up).
2359	 */
2360	set	_trapbase, %l0
2361	wr	%l0, 0, %tbr
2362	nop				! paranoia
2363
2364	/*
2365	 * Bootstrap, call main, and `return' from a fake trap into `icode'.
2366	 */
2367	call	_bootstrap
2368	 nop
2369	call	_main
2370	 nop
2371
2372	/*
2373	 * Return from main means we are process 1 and need to
2374	 * jump to the `icode' (which appears below) which has
2375	 * been copied in to address 0x2000 in the new process.
2376	 *
2377	 * Our stack & frame pointers are already set up to make it
2378	 * look like we are currently running in a trap taken from
2379	 * user mode (there is room for a full blown trapfrape),
2380	 * so all we need do is set up %l0, %l1, and %l2 and branch
2381	 * to return_from_trap.
2382	 */
2383	mov	PSR_S, %l0		! user psr
2384#define XADDR 0x2000 /* XXX */
2385	set	XADDR, %l1		! pc
2386	add	%l1, 4, %l2		! npc
2387	b	return_from_trap
2388	 wr	%l0, 0, %psr		! just like syscall()
2389
2390/*
2391 * Icode is copied out to process 1 to exec init.
2392 * If the exec fails, process 1 exits.
2393 *
2394 * We depend here on the fact that we are copied to address XADDR
2395 * (but so does the code above).
2396 */
2397	.globl	_icode, _szicode
2398_icode:
2399	mov	SYS_execve, %g1
2400	set	init - _icode + XADDR, %o0	! actual location of pathname
2401	set	argv - _icode + XADDR, %o1	! ... and of argv
2402	clr	%o2				! no environment
2403	t	ST_SYSCALL
2404	mov	SYS_exit, %g1
2405	t	ST_SYSCALL
2406
2407init:	.ascii	"/sbin/"
2408init1:	.asciz	"init"
2409	ALIGN
2410argv:	.word	init1 - _icode + XADDR
2411	.word	initflags - _icode + XADDR
2412	.word	0
2413initflags:
2414_szicode:
2415	.long	_szicode - _icode
2416#undef XADDR
2417
2418/*
2419 * The following code is copied to the top of the user stack when each
2420 * process is exec'ed, and signals are `trampolined' off it.
2421 *
2422 * When this code is run, the stack looks like:
2423 *	[%sp]		64 bytes to which registers can be dumped
2424 *	[%sp + 64]	signal number (goes in %o0)
2425 *	[%sp + 64 + 4]	signal code (goes in %o1)
2426 *	[%sp + 64 + 8]	placeholder
2427 *	[%sp + 64 + 12]	argument for %o3, currently unsupported (always 0)
2428 *	[%sp + 64 + 16]	first word of saved state (sigcontext)
2429 *	    .
2430 *	    .
2431 *	    .
2432 *	[%sp + NNN]	last word of saved state
2433 * (followed by previous stack contents or top of signal stack).
2434 * The address of the function to call is in %g1; the old %g1 and %o0
2435 * have already been saved in the sigcontext.  We are running in a clean
2436 * window, all previous windows now being saved to the stack.
2437 *
2438 * Note that [%sp + 64 + 8] == %sp + 64 + 16.  The copy at %sp+64+8
2439 * will eventually be removed, with a hole left in its place, if things
2440 * work out.
2441 */
2442	.globl	_sigcode
2443	.globl	_esigcode
2444_sigcode:
2445	/*
2446	 * XXX  the `save' and `restore' below are unnecessary: should
2447	 *	replace with simple arithmetic on %sp
2448	 *
2449	 * Make room on the stack for 32 %f registers + %fsr.  This comes
2450	 * out to 33*4 or 132 bytes, but this must be aligned to a multiple
2451	 * of 8, or 136 bytes.
2452	 */
2453	save	%sp, -CCFSZ - 136, %sp
2454	mov	%g2, %l2		! save globals in %l registers
2455	mov	%g3, %l3
2456	mov	%g4, %l4
2457	mov	%g5, %l5
2458	mov	%g6, %l6
2459	mov	%g7, %l7
2460	/*
2461	 * Saving the fpu registers is expensive, so do it iff the fsr
2462	 * stored in the sigcontext shows that the fpu is enabled.
2463	 */
2464	ld	[%fp + 64 + 16 + SC_PSR_OFFSET], %l0
2465	sethi	%hi(PSR_EF), %l1	! FPU enable bit is too high for andcc
2466	andcc	%l0, %l1, %l0		! %l0 = fpu enable bit
2467	be	1f			! if not set, skip the saves
2468	 rd	%y, %l1			! in any case, save %y
2469
2470	! fpu is enabled, oh well
2471	st	%fsr, [%sp + CCFSZ + 0]
2472	std	%f0, [%sp + CCFSZ + 8]
2473	std	%f2, [%sp + CCFSZ + 16]
2474	std	%f4, [%sp + CCFSZ + 24]
2475	std	%f6, [%sp + CCFSZ + 32]
2476	std	%f8, [%sp + CCFSZ + 40]
2477	std	%f10, [%sp + CCFSZ + 48]
2478	std	%f12, [%sp + CCFSZ + 56]
2479	std	%f14, [%sp + CCFSZ + 64]
2480	std	%f16, [%sp + CCFSZ + 72]
2481	std	%f18, [%sp + CCFSZ + 80]
2482	std	%f20, [%sp + CCFSZ + 88]
2483	std	%f22, [%sp + CCFSZ + 96]
2484	std	%f24, [%sp + CCFSZ + 104]
2485	std	%f26, [%sp + CCFSZ + 112]
2486	std	%f28, [%sp + CCFSZ + 120]
2487	std	%f30, [%sp + CCFSZ + 128]
2488
24891:
2490	ldd	[%fp + 64], %o0		! sig, code
2491	ld	[%fp + 76], %o3		! arg3
2492	call	%g1			! (*sa->sa_handler)(sig,code,scp,arg3)
2493	 add	%fp, 64 + 16, %o2	! scp
2494
2495	/*
2496	 * Now that the handler has returned, re-establish all the state
2497	 * we just saved above, then do a sigreturn.
2498	 */
2499	tst	%l0			! reload fpu registers?
2500	be	1f			! if not, skip the loads
2501	 wr	%l1, %g0, %y		! in any case, restore %y
2502
2503	ld	[%sp + CCFSZ + 0], %fsr
2504	ldd	[%sp + CCFSZ + 8], %f0
2505	ldd	[%sp + CCFSZ + 16], %f2
2506	ldd	[%sp + CCFSZ + 24], %f4
2507	ldd	[%sp + CCFSZ + 32], %f6
2508	ldd	[%sp + CCFSZ + 40], %f8
2509	ldd	[%sp + CCFSZ + 48], %f10
2510	ldd	[%sp + CCFSZ + 56], %f12
2511	ldd	[%sp + CCFSZ + 64], %f14
2512	ldd	[%sp + CCFSZ + 72], %f16
2513	ldd	[%sp + CCFSZ + 80], %f18
2514	ldd	[%sp + CCFSZ + 88], %f20
2515	ldd	[%sp + CCFSZ + 96], %f22
2516	ldd	[%sp + CCFSZ + 104], %f24
2517	ldd	[%sp + CCFSZ + 112], %f26
2518	ldd	[%sp + CCFSZ + 120], %f28
2519	ldd	[%sp + CCFSZ + 128], %f30
2520
25211:
2522	mov	%l2, %g2
2523	mov	%l3, %g3
2524	mov	%l4, %g4
2525	mov	%l5, %g5
2526	mov	%l6, %g6
2527	mov	%l7, %g7
2528
2529	restore	%g0, SYS_sigreturn, %g1	! get registers back & set syscall #
2530	add	%sp, 64 + 16, %o0	! compute scp
2531	t	ST_SYSCALL		! sigreturn(scp)
2532	! sigreturn does not return unless it fails
2533	mov	SYS_exit, %g1		! exit(errno)
2534	t	ST_SYSCALL
2535_esigcode:
2536
2537/*
2538 * Primitives
2539 */
2540
2541#ifdef GPROF
2542	.globl	mcount
2543#define	ENTRY(x) \
2544	.globl _##x; _##x: \
2545	save	%sp, -CCFSZ, %sp; \
2546	call	mcount; \
2547	nop; \
2548	restore
2549#else
2550#define	ENTRY(x)	.globl _##x; _##x:
2551#endif
2552#define	ALTENTRY(x)	.globl _##x; _##x:
2553
2554/*
2555 * copyinstr(fromaddr, toaddr, maxlength, &lencopied)
2556 *
2557 * Copy a null terminated string from the user address space into
2558 * the kernel address space.
2559 */
2560ENTRY(copyinstr)
2561	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
2562#ifdef DIAGNOSTIC
2563	tst	%o2			! kernel should never give maxlen <= 0
2564	ble	1f
2565	 EMPTY
2566#endif
2567	set	KERNBASE, %o4
2568	cmp	%o0, %o4		! fromaddr < KERNBASE?
2569	blu,a	Lcsdocopy		! yes, go do it
2570	sethi	%hi(_cpcb), %o4		! (first instr of copy)
2571
2572	b	Lcsdone			! no, return EFAULT
2573	 mov	EFAULT, %o0
2574
25751:
2576	sethi	%hi(2f), %o0
2577	call	_panic
2578	 or	%lo(2f), %o0, %o0
25792:	.asciz	"copyinstr"
2580	ALIGN
2581
2582/*
2583 * copyoutstr(fromaddr, toaddr, maxlength, &lencopied)
2584 *
2585 * Copy a null terminated string from the kernel
2586 * address space to the user address space.
2587 */
2588ENTRY(copyoutstr)
2589	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
2590#ifdef DIAGNOSTIC
2591	tst	%o2
2592	ble	1f
2593	 EMPTY
2594#endif
2595	set	KERNBASE, %o4
2596	cmp	%o1, %o4		! toaddr < KERNBASE?
2597	blu,a	Lcsdocopy		! yes, go do it
2598	 sethi	%hi(_cpcb), %o4		! (first instr of copy)
2599
2600	b	Lcsdone			! no, return EFAULT
2601	 mov	EFAULT, %o0
2602
26031:
2604	sethi	%hi(2f), %o0
2605	call	_panic
2606	 or	%lo(2f), %o0, %o0
26072:	.asciz	"copyoutstr"
2608	ALIGN
2609
2610Lcsdocopy:
2611!	sethi	%hi(_cpcb), %o4		! (done earlier)
2612	ld	[%o4 + %lo(_cpcb)], %o4	! catch faults
2613	set	Lcsfault, %o5
2614	st	%o5, [%o4 + PCB_ONFAULT]
2615
2616	mov	%o1, %o5		!	save = toaddr;
2617! XXX should do this in bigger chunks when possible
26180:					! loop:
2619	ldsb	[%o0], %g1		!	c = *fromaddr;
2620	tst	%g1
2621	stb	%g1, [%o1]		!	*toaddr++ = c;
2622	be	1f			!	if (c == NULL)
2623	 inc	%o1			!		goto ok;
2624	deccc	%o2			!	if (--len > 0) {
2625	bg	0b			!		fromaddr++;
2626	 inc	%o0			!		goto loop;
2627					!	}
2628					!
2629	b	Lcsdone			!	error = ENAMETOOLONG;
2630	 mov	ENAMETOOLONG, %o0	!	goto done;
26311:					! ok:
2632	clr	%o0			!    error = 0;
2633Lcsdone:				! done:
2634	sub	%o1, %o5, %o1		!	len = to - save;
2635	tst	%o3			!	if (lencopied)
2636	bnz,a	3f
2637	 st	%o1, [%o3]		!		*lencopied = len;
26383:
2639	retl				! cpcb->pcb_onfault = 0;
2640	 st	%g0, [%o4 + PCB_ONFAULT]! return (error);
2641
2642Lcsfault:
2643	b	Lcsdone			! error = EFAULT;
2644	 mov	EFAULT, %o0		! goto ret;
2645
2646/*
2647 * copystr(fromaddr, toaddr, maxlength, &lencopied)
2648 *
2649 * Copy a null terminated string from one point to another in
2650 * the kernel address space.  (This is a leaf procedure, but
2651 * it does not seem that way to the C compiler.)
2652 */
2653ENTRY(copystr)
2654#ifdef DIAGNOSTIC
2655	tst	%o2			! 	if (maxlength <= 0)
2656	ble	4f			!		panic(...);
2657	 EMPTY
2658#endif
2659	mov	%o1, %o5		!	to0 = to;
26600:					! loop:
2661	ldsb	[%o0], %o4		!	c = *from;
2662	tst	%o4
2663	stb	%o4, [%o1]		!	*to++ = c;
2664	be	1f			!	if (c == 0)
2665	 inc	%o1			!		goto ok;
2666	deccc	%o2			!	if (--len > 0) {
2667	bg,a	0b			!		from++;
2668	 inc	%o0			!		goto loop;
2669	b	2f			!	}
2670	 mov	ENAMETOOLONG, %o0	!	ret = ENAMETOOLONG; goto done;
26711:					! ok:
2672	clr	%o0			!	ret = 0;
26732:
2674	sub	%o1, %o5, %o1		!	len = to - to0;
2675	tst	%o3			!	if (lencopied)
2676	bnz,a	3f
2677	 st	%o1, [%o3]		!		*lencopied = len;
26783:
2679	retl
2680	 nop
2681#ifdef DIAGNOSTIC
26824:
2683	sethi	%hi(5f), %o0
2684	call	_panic
2685	 or	%lo(5f), %o0, %o0
26865:
2687	.asciz	"copystr"
2688	ALIGN
2689#endif
2690
2691/*
2692 * Copyin(src, dst, len)
2693 *
2694 * Copy specified amount of data from user space into the kernel.
2695 */
2696ENTRY(copyin)
2697	set	KERNBASE, %o3
2698	cmp	%o0, %o3		! src < KERNBASE?
2699	blu,a	Ldocopy			! yes, can try it
2700	 sethi	%hi(_cpcb), %o3
2701
2702	/* source address points into kernel space: return EFAULT */
2703	retl
2704	 mov	EFAULT, %o0
2705
2706/*
2707 * Copyout(src, dst, len)
2708 *
2709 * Copy specified amount of data from kernel to user space.
2710 * Just like copyin, except that the `dst' addresses are user space
2711 * rather than the `src' addresses.
2712 */
2713ENTRY(copyout)
2714	set	KERNBASE, %o3
2715	cmp	%o1, %o3		! dst < KERBASE?
2716	blu,a	Ldocopy
2717	 sethi	%hi(_cpcb), %o3
2718
2719	/* destination address points into kernel space: return EFAULT */
2720	retl
2721	 mov	EFAULT, %o0
2722
2723	/*
2724	 * ******NOTE****** this depends on bcopy() not using %g7
2725	 */
2726Ldocopy:
2727!	sethi	%hi(_cpcb), %o3
2728	ld	[%o3 + %lo(_cpcb)], %o3
2729	set	Lcopyfault, %o4
2730	mov	%o7, %g7		! save return address
2731	call	_bcopy			! bcopy(src, dst, len)
2732	 st	%o4, [%o3 + PCB_ONFAULT]
2733
2734	sethi	%hi(_cpcb), %o3
2735	ld	[%o3 + %lo(_cpcb)], %o3
2736	st	%g0, [%o3 + PCB_ONFAULT]
2737	jmp	%g7 + 8
2738	 clr	%o0			! return 0
2739
2740! Copyin or copyout fault.  Clear cpcb->pcb_onfault and return EFAULT.
2741! Note that although we were in bcopy, there is no state to clean up;
2742! the only special thing is that we have to return to [g7 + 8] rather than
2743! [o7 + 8].
2744Lcopyfault:
2745	sethi	%hi(_cpcb), %o3
2746	ld	[%o3 + %lo(_cpcb)], %o3
2747	st	%g0, [%o3 + PCB_ONFAULT]
2748	jmp	%g7 + 8
2749	 mov	EFAULT, %o0
2750
2751
2752/*
2753 * Write all user windows presently in the CPU back to the user's stack.
2754 * We just do `save' instructions until pcb_uw == 0.
2755 *
2756 *	p = cpcb;
2757 *	nsaves = 0;
2758 *	while (p->pcb_uw > 0)
2759 *		save(), nsaves++;
2760 *	while (--nsaves >= 0)
2761 *		restore();
2762 */
2763ENTRY(write_user_windows)
2764	sethi	%hi(_cpcb), %g6
2765	ld	[%g6 + %lo(_cpcb)], %g6
2766	b	2f
2767	 clr	%g5
27681:
2769	save	%sp, -64, %sp
27702:
2771	ld	[%g6 + PCB_UW], %g7
2772	tst	%g7
2773	bg,a	1b
2774	 inc	%g5
27753:
2776	deccc	%g5
2777	bge,a	3b
2778	 restore
2779	retl
2780	 nop
2781
2782
2783	.comm	_want_resched,4
2784/*
2785 * Masterpaddr is the p->p_addr of the last process on the processor.
2786 * XXX masterpaddr is almost the same as cpcb
2787 * XXX should delete this entirely
2788 */
2789	.comm	_masterpaddr, 4
2790
2791/*
2792 * Switch statistics (for later tweaking):
2793 *	nswitchdiff = p1 => p2 (i.e., chose different process)
2794 *	nswitchexit = number of calls to swtchexit()
2795 *	_cnt.v_swtch = total calls to swtch+swtchexit
2796 */
2797	.comm	_nswitchdiff, 4
2798	.comm	_nswitchexit, 4
2799
2800/*
2801 * REGISTER USAGE IN swtch AND swtchexit:
2802 * This is split into two phases, more or less
2803 * `before we locate a new proc' and `after'.
2804 * Some values are the same in both phases.
2805 * Note that the %o0-registers are not preserved across
2806 * the psr change when entering a new process, since this
2807 * usually changes the CWP field (hence heavy usage of %g's).
2808 *
2809 *	%g1 = oldpsr (excluding ipl bits)
2810 *	%g2 = %hi(_whichqs); newpsr
2811 *	%g3 = p
2812 *	%g4 = lastproc
2813 *	%g5 = <free>; newpcb
2814 *	%g6 = %hi(_cpcb)
2815 *	%g7 = %hi(_curproc)
2816 *	%o0 = tmp 1
2817 *	%o1 = tmp 2
2818 *	%o2 = tmp 3
2819 *	%o3 = tmp 4; whichqs; vm
2820 *	%o4 = tmp 4; which; sswap
2821 *	%o5 = tmp 5; q; <free>
2822 */
2823
2824/*
2825 * swtchexit is called only from cpu_exit() before the current process
2826 * has freed its kernel stack; we must free it.  (curproc is already NULL.)
2827 *
2828 * We lay the process to rest by changing to the `idle' kernel stack,
2829 * and note that the `last loaded process' is nonexistent.
2830 */
2831ENTRY(swtchexit)
2832	mov	%o0, %g2		! save the
2833	mov	%o1, %g3		! ... three parameters
2834	mov	%o2, %g4		! ... to kmem_free
2835
2836	/*
2837	 * Change pcb to idle u. area, i.e., set %sp to top of stack
2838	 * and %psr to PSR_S|PSR_ET, and set cpcb to point to _idle_u.
2839	 * Once we have left the old stack, we can call kmem_free to
2840	 * destroy it.  Call it any sooner and the register windows
2841	 * go bye-bye.
2842	 */
2843	set	_idle_u, %g5
2844	sethi	%hi(_cpcb), %g6
2845	mov	1, %g7
2846	wr	%g0, PSR_S, %psr	! change to window 0, traps off
2847	wr	%g0, 2, %wim		! and make window 1 the trap window
2848	st	%g5, [%g6 + %lo(_cpcb)]	! cpcb = &idle_u
2849	st	%g7, [%g5 + PCB_WIM]	! idle_u.pcb_wim = log2(2) = 1
2850	set	_idle_u + UPAGES * NBPG - CCFSZ, %sp	! set new %sp
2851#ifdef DEBUG
2852	set	_idle_u, %l6
2853	SET_SP_REDZONE(%l6, %l5)
2854#endif
2855	wr	%g0, PSR_S|PSR_ET, %psr	! and then enable traps
2856	mov	%g2, %o0		! now ready to call kmem_free
2857	mov	%g3, %o1
2858	call	_kmem_free
2859	 mov	%g4, %o2
2860
2861	/*
2862	 * Now fall through to `the last swtch'.  %g6 was set to
2863	 * %hi(_cpcb), but may have been clobbered in kmem_free,
2864	 * so all the registers described below will be set here.
2865	 *
2866	 * REGISTER USAGE AT THIS POINT:
2867	 *	%g1 = oldpsr (excluding ipl bits)
2868	 *	%g2 = %hi(_whichqs)
2869	 *	%g4 = lastproc
2870	 *	%g6 = %hi(_cpcb)
2871	 *	%g7 = %hi(_curproc)
2872	 *	%o0 = tmp 1
2873	 *	%o1 = tmp 2
2874	 *	%o3 = whichqs
2875	 */
2876
2877	INCR(_nswitchexit)		! nswitchexit++;
2878	INCR(_cnt+V_SWTCH)		! cnt.v_switch++;
2879
2880	mov	PSR_S|PSR_ET, %g1	! oldpsr = PSR_S | PSR_ET;
2881	sethi	%hi(_whichqs), %g2
2882	clr	%g4			! lastproc = NULL;
2883	sethi	%hi(_cpcb), %g6
2884	sethi	%hi(_curproc), %g7
2885	/* FALLTHROUGH */
2886
2887/*
2888 * When no processes are on the runq, swtch
2889 * idles here watiing for something to come ready.
2890 * The registers are set up as noted above.
2891 */
2892	.globl	idle
2893idle:
2894	st	%g0, [%g7 + %lo(_curproc)] ! curproc = NULL;
2895	wr	%g1, 0, %psr		! (void) spl0();
28961:					! spin reading _whichqs until nonzero
2897	ld	[%g2 + %lo(_whichqs)], %o3
2898	tst	%o3
2899	bnz,a	Lsw_scan
2900	 wr	%g1, PIL_CLOCK << 8, %psr	! (void) splclock();
2901	b,a	1b
2902
2903Lsw_panic_rq:
2904	sethi	%hi(1f), %o0
2905	call	_panic
2906	 or	%lo(1f), %o0, %o0
2907Lsw_panic_wchan:
2908	sethi	%hi(2f), %o0
2909	call	_panic
2910	 or	%lo(2f), %o0, %o0
2911Lsw_panic_srun:
2912	sethi	%hi(3f), %o0
2913	call	_panic
2914	 or	%lo(3f), %o0, %o0
29151:	.asciz	"swtch rq"
29162:	.asciz	"swtch wchan"
29173:	.asciz	"swtch SRUN"
2918	ALIGN
2919
2920/*
2921 * cpu_swtch() picks a process to run and runs it, saving the current
2922 * one away.  On the assumption that (since most workstations are
2923 * single user machines) the chances are quite good that the new
2924 * process will turn out to be the current process, we defer saving
2925 * it here until we have found someone to load.  If that someone
2926 * is the current process we avoid both store and load.
2927 *
2928 * cpu_swtch() is always entered at splstatclock or splhigh.
2929 *
2930 * IT MIGHT BE WORTH SAVING BEFORE ENTERING idle TO AVOID HAVING TO
2931 * SAVE LATER WHEN SOMEONE ELSE IS READY ... MUST MEASURE!
2932 */
2933	.globl	_runtime
2934	.globl	_time
2935ENTRY(cpu_swtch)
2936	/*
2937	 * REGISTER USAGE AT THIS POINT:
2938	 *	%g1 = oldpsr (excluding ipl bits)
2939	 *	%g2 = %hi(_whichqs)
2940	 *	%g3 = p
2941	 *	%g4 = lastproc
2942	 *	%g5 = tmp 0
2943	 *	%g6 = %hi(_cpcb)
2944	 *	%g7 = %hi(_curproc)
2945	 *	%o0 = tmp 1
2946	 *	%o1 = tmp 2
2947	 *	%o2 = tmp 3
2948	 *	%o3 = tmp 4, then at Lsw_scan, whichqs
2949	 *	%o4 = tmp 5, then at Lsw_scan, which
2950	 *	%o5 = tmp 6, then at Lsw_scan, q
2951	 */
2952	sethi	%hi(_whichqs), %g2	! set up addr regs
2953	sethi	%hi(_cpcb), %g6
2954	ld	[%g6 + %lo(_cpcb)], %o0
2955	std	%o6, [%o0 + PCB_SP]	! cpcb->pcb_<sp,pc> = <sp,pc>;
2956	rd	%psr, %g1		! oldpsr = %psr;
2957	sethi	%hi(_curproc), %g7
2958	ld	[%g7 + %lo(_curproc)], %g4	! lastproc = curproc;
2959	st	%g1, [%o0 + PCB_PSR]	! cpcb->pcb_psr = oldpsr;
2960	andn	%g1, PSR_PIL, %g1	! oldpsr &= ~PSR_PIL;
2961
2962	/*
2963	 * In all the fiddling we did to get this far, the thing we are
2964	 * waiting for might have come ready, so let interrupts in briefly
2965	 * before checking for other processes.  Note that we still have
2966	 * curproc set---we have to fix this or we can get in trouble with
2967	 * the run queues below.
2968	 */
2969	st	%g0, [%g7 + %lo(_curproc)]	! curproc = NULL;
2970	wr	%g1, 0, %psr			! (void) spl0();
2971	nop; nop; nop				! paranoia
2972	wr	%g1, PIL_CLOCK <<8 , %psr	! (void) splclock();
2973
2974Lsw_scan:
2975	nop; nop; nop				! paranoia
2976	/*
2977	 * We're about to run a (possibly) new process.  Set runtime
2978	 * to indicate its start time.
2979	 */
2980	sethi	%hi(_time), %o0
2981	ldd	[%o0 + %lo(_time)], %o2
2982	sethi	%hi(_runtime), %o0
2983	std	%o2, [%o0 + %lo(_runtime)]
2984
2985	ld	[%g2 + %lo(_whichqs)], %o3
2986
2987	/*
2988	 * Optimized inline expansion of `which = ffs(whichqs) - 1';
2989	 * branches to idle if ffs(whichqs) was 0.
2990	 */
2991	set	ffstab, %o2
2992	andcc	%o3, 0xff, %o1		! byte 0 zero?
2993	bz,a	1f			! yes, try byte 1
2994	 srl	%o3, 8, %o0
2995	b	2f			! ffs = ffstab[byte0]; which = ffs - 1;
2996	 ldsb	[%o2 + %o1], %o0
29971:	andcc	%o0, 0xff, %o1		! byte 1 zero?
2998	bz,a	1f			! yes, try byte 2
2999	 srl	%o0, 8, %o0
3000	ldsb	[%o2 + %o1], %o0	! which = ffstab[byte1] + 7;
3001	b	3f
3002	 add	%o0, 7, %o4
30031:	andcc	%o0, 0xff, %o1		! byte 2 zero?
3004	bz,a	1f			! yes, try byte 3
3005	 srl	%o0, 8, %o0
3006	ldsb	[%o2 + %o1], %o0	! which = ffstab[byte2] + 15;
3007	b	3f
3008	 add	%o0, 15, %o4
30091:	ldsb	[%o2 + %o0], %o0	! ffs = ffstab[byte3] + 24
3010	addcc	%o0, 24, %o0		! (note that ffstab[0] == -24)
3011	bz	idle			! if answer was 0, go idle
3012	 EMPTY
30132:	sub	%o0, 1, %o4		! which = ffs(whichqs) - 1
30143:	/* end optimized inline expansion */
3015
3016	/*
3017	 * We found a nonempty run queue.  Take its first process.
3018	 */
3019	set	_qs, %o5		! q = &qs[which];
3020	sll	%o4, 3, %o0
3021	add	%o0, %o5, %o5
3022	ld	[%o5], %g3		! p = q->ph_link;
3023	cmp	%g3, %o5		! if (p == q)
3024	be	Lsw_panic_rq		!	panic("swtch rq");
3025	 EMPTY
3026	ld	[%g3], %o0		! tmp0 = p->p_link;
3027	st	%o0, [%o5]		! q->ph_link = tmp0;
3028	st	%o5, [%o0 + 4]		! tmp0->p_rlink = q;
3029	cmp	%o0, %o5		! if (tmp0 == q)
3030	bne	1f
3031	 EMPTY
3032	mov	1, %o1			!	whichqs &= ~(1 << which);
3033	sll	%o1, %o4, %o1
3034	andn	%o3, %o1, %o3
3035	st	%o3, [%g2 + %lo(_whichqs)]
30361:
3037	/*
3038	 * PHASE TWO: NEW REGISTER USAGE:
3039	 *	%g1 = oldpsr (excluding ipl bits)
3040	 *	%g2 = newpsr
3041	 *	%g3 = p
3042	 *	%g4 = lastproc
3043	 *	%g5 = newpcb
3044	 *	%g6 = %hi(_cpcb)
3045	 *	%g7 = %hi(_curproc)
3046	 *	%o0 = tmp 1
3047	 *	%o1 = tmp 2
3048	 *	%o2 = tmp 3
3049	 *	%o3 = vm
3050	 *	%o4 = sswap
3051	 *	%o5 = <free>
3052	 */
3053
3054	/* firewalls */
3055	ld	[%g3 + P_WCHAN], %o0	! if (p->p_wchan)
3056	tst	%o0
3057	bne	Lsw_panic_wchan		!	panic("swtch wchan");
3058	 EMPTY
3059	ldsb	[%g3 + P_STAT], %o0	! if (p->p_stat != SRUN)
3060	cmp	%o0, SRUN
3061	bne	Lsw_panic_srun		!	panic("swtch SRUN");
3062	 EMPTY
3063
3064	/*
3065	 * Committed to running process p.
3066	 * It may be the same as the one we were running before.
3067	 */
3068	sethi	%hi(_want_resched), %o0
3069	st	%g0, [%o0 + %lo(_want_resched)]	! want_resched = 0;
3070	ld	[%g3 + P_ADDR], %g5		! newpcb = p->p_addr;
3071	st	%g0, [%g3 + 4]			! p->p_rlink = NULL;
3072	ld	[%g5 + PCB_PSR], %g2		! newpsr = newpcb->pcb_psr;
3073	st	%g3, [%g7 + %lo(_curproc)]	! curproc = p;
3074
3075	cmp	%g3, %g4		! p == lastproc?
3076	be,a	Lsw_sameproc		! yes, go return 0
3077	 wr	%g2, 0, %psr		! (after restoring ipl)
3078
3079	/*
3080	 * Not the old process.  Save the old process, if any;
3081	 * then load p.
3082	 */
3083	tst	%g4
3084	be,a	Lsw_load		! if no old process, go load
3085	 wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr
3086
3087	INCR(_nswitchdiff)		! clobbers %o0,%o1
3088	/*
3089	 * save: write back all windows (including the current one).
3090	 * XXX	crude; knows nwindows <= 8
3091	 */
3092#define	SAVE save %sp, -64, %sp
3093	SAVE; SAVE; SAVE; SAVE; SAVE; SAVE; SAVE	/* 7 of each: */
3094	restore; restore; restore; restore; restore; restore; restore
3095
3096	/*
3097	 * Load the new process.  To load, we must change stacks and
3098	 * alter cpcb and %wim, hence we must disable traps.  %psr is
3099	 * currently equal to oldpsr (%g1) ^ (PIL_CLOCK << 8);
3100	 * this means that PSR_ET is on.  Likewise, PSR_ET is on
3101	 * in newpsr (%g2), although we do not know newpsr's ipl.
3102	 *
3103	 * We also must load up the `in' and `local' registers.
3104	 */
3105	wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr
3106Lsw_load:
3107!	wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr	! done above
3108	/* compute new wim */
3109	ld	[%g5 + PCB_WIM], %o0
3110	mov	1, %o1
3111	sll	%o1, %o0, %o0
3112	wr	%o0, 0, %wim		! %wim = 1 << newpcb->pcb_wim;
3113	/* now must not change %psr for 3 more instrs */
3114/*1*/	set	PSR_EF|PSR_EC, %o0
3115/*2*/	andn	%g2, %o0, %g2		! newpsr &= ~(PSR_EF|PSR_EC);
3116/*3*/	nop
3117	/* set new psr, but with traps disabled */
3118	wr	%g2, PSR_ET, %psr	! %psr = newpsr ^ PSR_ET;
3119	/* set new cpcb */
3120	st	%g5, [%g6 + %lo(_cpcb)]	! cpcb = newpcb;
3121	/* XXX update masterpaddr too */
3122	sethi	%hi(_masterpaddr), %g7
3123	st	%g5, [%g7 + %lo(_masterpaddr)]
3124	ldd	[%g5 + PCB_SP], %o6	! <sp,pc> = newpcb->pcb_<sp,pc>
3125	/* load window */
3126	ldd	[%sp + (0*8)], %l0
3127	ldd	[%sp + (1*8)], %l2
3128	ldd	[%sp + (2*8)], %l4
3129	ldd	[%sp + (3*8)], %l6
3130	ldd	[%sp + (4*8)], %i0
3131	ldd	[%sp + (5*8)], %i2
3132	ldd	[%sp + (6*8)], %i4
3133	ldd	[%sp + (7*8)], %i6
3134#ifdef DEBUG
3135	mov	%g5, %o0
3136	SET_SP_REDZONE(%o0, %o1)
3137	CHECK_SP_REDZONE(%o0, %o1)
3138#endif
3139	/* finally, enable traps */
3140	wr	%g2, 0, %psr		! psr = newpsr;
3141
3142	/*
3143	 * Now running p.  Make sure it has a context so that it
3144	 * can talk about user space stuff.  (Its pcb_uw is currently
3145	 * zero so it is safe to have interrupts going here.)
3146	 */
3147	ld	[%g3 + P_VMSPACE], %o3	! vm = p->p_vmspace;
3148	ld	[%o3 + VM_PMAP_CTX], %o0! if (vm->vm_pmap.pm_ctx != NULL)
3149	tst	%o0
3150	bnz,a	Lsw_havectx		!	goto havecontext;
3151	 ld	[%o3 + VM_PMAP_CTXNUM], %o0
3152
3153	/* p does not have a context: call ctx_alloc to get one */
3154	save	%sp, -CCFSZ, %sp
3155	call	_ctx_alloc		! ctx_alloc(&vm->vm_pmap);
3156	 add	%i3, VM_PMAP, %o0
3157	ret
3158	 restore
3159
3160	/* p does have a context: just switch to it */
3161Lsw_havectx:
3162!	ld	[%o3 + VM_PMAP_CTXNUM], %o0	! (done in delay slot)
3163	set	AC_CONTEXT, %o1
3164	stba	%o0, [%o1] ASI_CONTROL	! setcontext(vm->vm_pmap.pm_ctxnum);
3165	retl
3166	 nop
3167
3168Lsw_sameproc:
3169	/*
3170	 * We are resuming the process that was running at the
3171	 * call to swtch().  Just set psr ipl and return.
3172	 */
3173!	wr	%g2, 0 %psr		! %psr = newpsr; (done earlier)
3174	nop
3175	retl
3176	 nop
3177
3178
3179/*
3180 * Snapshot the current process so that stack frames are up to date.
3181 * This is called from two places:
3182 *  - just before a crash dump, for the stack update;
3183 *  - in cpu_fork(), before copying the kernel stack.
3184 * In the latter case the pcb and stack will be copied to the child,
3185 * and the child will be made runnable.  Eventually swtch() will run
3186 * it.  When it does, we want its pcb_pc set so that we can appear
3187 * to return 1 from cpu_fork(), so we store the current sp and psr
3188 * in the given pcb, and set its pcb_pc to our return-1 code (offset
3189 * by -8 due to call/ret conventions).  This is not useful in the crash
3190 * dump code but it is easiest to do it anyway.
3191 */
3192ENTRY(snapshot)
3193	st	%o6, [%o0 + PCB_SP]	! save sp
3194	set	1f - 8, %o1		! set child-return pc
3195	st	%o1, [%o0 + PCB_PC]
3196	rd	%psr, %o1		! save psr
3197	st	%o1, [%o0 + PCB_PSR]
3198
3199	/*
3200	 * Just like swtch(); same XXX comments apply.
3201	 * 7 of each.  Minor tweak: the 7th restore is
3202	 * done after a ret.
3203	 */
3204	SAVE; SAVE; SAVE; SAVE; SAVE; SAVE; SAVE
3205	restore; restore; restore; restore; restore; restore; ret; restore
3206
32071:	/* this is reached only after a child gets chosen in swtch() */
3208	mov	1, %i0			! return 1 from cpu_fork
3209	ret
3210	 restore
3211
3212/*
3213 * {fu,su}{,i}{byte,word}
3214 */
3215ALTENTRY(fuiword)
3216ENTRY(fuword)
3217	set	KERNBASE, %o2
3218	cmp	%o0, %o2		! if addr >= KERNBASE...
3219	bgeu	Lfsbadaddr
3220	EMPTY
3221	btst	3, %o0			! or has low bits set...
3222	bnz	Lfsbadaddr		!	go return -1
3223	EMPTY
3224	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3225	ld	[%o2 + %lo(_cpcb)], %o2
3226	set	Lfserr, %o3
3227	st	%o3, [%o2 + PCB_ONFAULT]
3228	ld	[%o0], %o0		! fetch the word
3229	retl				! phew, made it, return the word
3230	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3231
3232Lfserr:
3233	st	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
3234Lfsbadaddr:
3235	retl				! and return error indicator
3236	mov	-1, %o0
3237
3238	/*
3239	 * This is just like Lfserr, but it's a global label that allows
3240	 * mem_access_fault() to check to see that we don't want to try to
3241	 * page in the fault.  It's used by fuswintr() etc.
3242	 */
3243	.globl	_Lfsbail
3244_Lfsbail:
3245	st	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
3246	retl				! and return error indicator
3247	mov	-1, %o0
3248
3249	/*
3250	 * Like fusword but callable from interrupt context.
3251	 * Fails if data isn't resident.
3252	 */
3253ENTRY(fuswintr)
3254	set	KERNBASE, %o2
3255	cmp	%o0, %o2		! if addr >= KERNBASE
3256	bgeu	Lfsbadaddr		!	return error
3257	EMPTY
3258	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = _Lfsbail;
3259	ld	[%o2 + %lo(_cpcb)], %o2
3260	set	_Lfsbail, %o3
3261	st	%o3, [%o2 + PCB_ONFAULT]
3262	lduh	[%o0], %o0		! fetch the halfword
3263	retl				! made it
3264	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3265
3266ENTRY(fusword)
3267	set	KERNBASE, %o2
3268	cmp	%o0, %o2		! if addr >= KERNBASE
3269	bgeu	Lfsbadaddr		!	return error
3270	EMPTY
3271	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3272	ld	[%o2 + %lo(_cpcb)], %o2
3273	set	Lfserr, %o3
3274	st	%o3, [%o2 + PCB_ONFAULT]
3275	lduh	[%o0], %o0		! fetch the halfword
3276	retl				! made it
3277	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3278
3279ALTENTRY(fuibyte)
3280ENTRY(fubyte)
3281	set	KERNBASE, %o2
3282	cmp	%o0, %o2		! if addr >= KERNBASE
3283	bgeu	Lfsbadaddr		!	return error
3284	EMPTY
3285	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3286	ld	[%o2 + %lo(_cpcb)], %o2
3287	set	Lfserr, %o3
3288	st	%o3, [%o2 + PCB_ONFAULT]
3289	ldub	[%o0], %o0		! fetch the byte
3290	retl				! made it
3291	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3292
3293ALTENTRY(suiword)
3294ENTRY(suword)
3295	set	KERNBASE, %o2
3296	cmp	%o0, %o2		! if addr >= KERNBASE ...
3297	bgeu	Lfsbadaddr
3298	EMPTY
3299	btst	3, %o0			! or has low bits set ...
3300	bnz	Lfsbadaddr		!	go return error
3301	EMPTY
3302	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3303	ld	[%o2 + %lo(_cpcb)], %o2
3304	set	Lfserr, %o3
3305	st	%o3, [%o2 + PCB_ONFAULT]
3306	st	%o1, [%o0]		! store the word
3307	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3308	retl				! and return 0
3309	clr	%o0
3310
3311ENTRY(suswintr)
3312	set	KERNBASE, %o2
3313	cmp	%o0, %o2		! if addr >= KERNBASE
3314	bgeu	Lfsbadaddr		!	go return error
3315	EMPTY
3316	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = _Lfsbail;
3317	ld	[%o2 + %lo(_cpcb)], %o2
3318	set	_Lfsbail, %o3
3319	st	%o3, [%o2 + PCB_ONFAULT]
3320	sth	%o1, [%o0]		! store the halfword
3321	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3322	retl				! and return 0
3323	clr	%o0
3324
3325ENTRY(susword)
3326	set	KERNBASE, %o2
3327	cmp	%o0, %o2		! if addr >= KERNBASE
3328	bgeu	Lfsbadaddr		!	go return error
3329	EMPTY
3330	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3331	ld	[%o2 + %lo(_cpcb)], %o2
3332	set	Lfserr, %o3
3333	st	%o3, [%o2 + PCB_ONFAULT]
3334	sth	%o1, [%o0]		! store the halfword
3335	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3336	retl				! and return 0
3337	clr	%o0
3338
3339ALTENTRY(suibyte)
3340ENTRY(subyte)
3341	set	KERNBASE, %o2
3342	cmp	%o0, %o2		! if addr >= KERNBASE
3343	bgeu	Lfsbadaddr		!	go return error
3344	EMPTY
3345	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3346	ld	[%o2 + %lo(_cpcb)], %o2
3347	set	Lfserr, %o3
3348	st	%o3, [%o2 + PCB_ONFAULT]
3349	stb	%o1, [%o0]		! store the byte
3350	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3351	retl				! and return 0
3352	clr	%o0
3353
3354/* probeget and probeset are meant to be used during autoconfiguration */
3355
3356/*
3357 * probeget(addr, size) caddr_t addr; int size;
3358 *
3359 * Read or write a (byte,word,longword) from the given address.
3360 * Like {fu,su}{byte,halfword,word} but our caller is supposed
3361 * to know what he is doing... the address can be anywhere.
3362 *
3363 * We optimize for space, rather than time, here.
3364 */
3365ENTRY(probeget)
3366	! %o0 = addr, %o1 = (1,2,4)
3367	set	KERNBASE, %o2
3368	cmp	%o0, %o2		! if addr < KERNBASE
3369	blu	Lfsbadaddr		!	go return error
3370	 EMPTY
3371	sethi	%hi(_cpcb), %o2
3372	ld	[%o2 + %lo(_cpcb)], %o2	! cpcb->pcb_onfault = Lfserr;
3373	set	Lfserr, %o5
3374	st	%o5, [%o2 + PCB_ONFAULT]
3375	btst	1, %o1
3376	bnz,a	0f			! if (len & 1)
3377	 ldub	[%o0], %o0		!	value = *(char *)addr;
33780:	btst	2, %o1
3379	bnz,a	0f			! if (len & 2)
3380	 lduh	[%o0], %o0		!	value = *(short *)addr;
33810:	btst	4, %o1
3382	bnz,a	0f			! if (len & 4)
3383	 ld	[%o0], %o0		!	value = *(int *)addr;
33840:	retl				! made it, clear onfault and return
3385	 st	%g0, [%o2 + PCB_ONFAULT]
3386
3387/*
3388 * probeset(addr, size, val) caddr_t addr; int size, val;
3389 *
3390 * As above, but we return 0 on success.
3391 */
3392ENTRY(probeset)
3393	! %o0 = addr, %o1 = (1,2,4), %o2 = val
3394	set	KERNBASE, %o2
3395	cmp	%o0, %o2		! if addr < KERNBASE
3396	blu	Lfsbadaddr		!	go return error
3397	 EMPTY
3398	sethi	%hi(_cpcb), %o2
3399	ld	[%o2 + %lo(_cpcb)], %o2	! cpcb->pcb_onfault = Lfserr;
3400	set	Lfserr, %o5
3401	st	%o5, [%o2 + PCB_ONFAULT]
3402	btst	1, %o1
3403	bnz,a	0f			! if (len & 1)
3404	 stb	%o2, [%o0]		!	*(char *)addr = value;
34050:	btst	2, %o1
3406	bnz,a	0f			! if (len & 2)
3407	 sth	%o2, [%o0]		!	*(short *)addr = value;
34080:	btst	4, %o1
3409	bnz,a	0f			! if (len & 4)
3410	 st	%o2, [%o0]		!	*(int *)addr = value;
34110:	clr	%o0			! made it, clear onfault and return 0
3412	retl
3413	 st	%g0, [%o2 + PCB_ONFAULT]
3414
3415/*
3416 * Insert entry into doubly-linked queue.
3417 * We could just do this in C, but gcc does not do leaves well (yet).
3418 */
3419ENTRY(_insque)
3420	! %o0 = e = what to insert; %o1 = after = entry to insert after
3421	st	%o1, [%o0 + 4]		! e->prev = after;
3422	ld	[%o1], %o2		! tmp = after->next;
3423	st	%o2, [%o0]		! e->next = tmp;
3424	st	%o0, [%o1]		! after->next = e;
3425	retl
3426	st	%o0, [%o2 + 4]		! tmp->prev = e;
3427
3428
3429/*
3430 * Remove entry from doubly-linked queue.
3431 */
3432ENTRY(_remque)
3433	! %o0 = e = what to remove
3434	ld	[%o0], %o1		! n = e->next;
3435	ld	[%o0 + 4], %o2		! p = e->prev;
3436	st	%o2, [%o1 + 4]		! n->prev = p;
3437	retl
3438	st	%o1, [%o2]		! p->next = n;
3439
3440/*
3441 * copywords(src, dst, nbytes)
3442 *
3443 * Copy `nbytes' bytes from src to dst, both of which are word-aligned;
3444 * nbytes is a multiple of four.  It may, however, be zero, in which case
3445 * nothing is to be copied.
3446 */
3447ENTRY(copywords)
3448	! %o0 = src, %o1 = dst, %o2 = nbytes
3449	b	1f
3450	deccc	4, %o2
34510:
3452	st	%o3, [%o1 + %o2]
3453	deccc	4, %o2			! while ((n -= 4) >= 0)
34541:
3455	bge,a	0b			!    *(int *)(dst+n) = *(int *)(src+n);
3456	ld	[%o0 + %o2], %o3
3457	retl
3458	nop
3459
3460/*
3461 * qcopy(src, dst, nbytes)
3462 *
3463 * (q for `quad' or `quick', as opposed to b for byte/block copy)
3464 *
3465 * Just like copywords, but everything is multiples of 8.
3466 */
3467ENTRY(qcopy)
3468	b	1f
3469	deccc	8, %o2
34700:
3471	std	%o4, [%o1 + %o2]
3472	deccc	8, %o2
34731:
3474	bge,a	0b
3475	ldd	[%o0 + %o2], %o4
3476	retl
3477	nop
3478
3479/*
3480 * qzero(addr, nbytes)
3481 *
3482 * Zeroes `nbytes' bytes of a quad-aligned virtual address,
3483 * where nbytes is itself a multiple of 8.
3484 */
3485ENTRY(qzero)
3486	! %o0 = addr, %o1 = len (in bytes)
3487	clr	%g1
34880:
3489	deccc	8, %o1			! while ((n =- 8) >= 0)
3490	bge,a	0b
3491	std	%g0, [%o0 + %o1]	!	*(quad *)(addr + n) = 0;
3492	retl
3493	nop
3494
3495/*
3496 * bzero(addr, len)
3497 *
3498 * We should unroll the loop, but at the moment this would
3499 * gain nothing since the `std' instructions are what limits us.
3500 */
3501ALTENTRY(blkclr)
3502ENTRY(bzero)
3503	! %o0 = addr, %o1 = len
3504
3505	! Optimize a common case: addr and len are both multiples of 8.
3506	or	%o0, %o1, %o2
3507	btst	7, %o2			! ((addr | len) & 7) != 0?
3508	bnz	1f			! if so, cannot optimize
3509	clr	%g1			! in any case, we want g1=0
3510
3511	/* `Good' operands, can just store doubles. */
35120:
3513	deccc	8, %o1			! while ((len -= 8) >= 0)
3514	bge,a	0b
3515	std	%g0, [%o0 + %o1]	!	*(quad *)(addr + len) = 0;
3516	retl
3517	nop
3518
3519	/*
3520	 * Either the address is unaligned, or the count is not a
3521	 * multiple of 8, or both.  We will have to align the address
3522	 * in order to use anything `better' than stb.
3523	 */
35241:
3525	cmp	%o1, 15			! len >= 15?
3526	bge,a	Lstd			! yes, use std
3527	btst	1, %o0			! (but first check alignment)
3528
3529	! not enough to bother: do byte-at-a-time loop.
35302:
3531	deccc	%o1			! while (--len >= 0)
3532	bge,a	2b
3533	stb	%g0, [%o0 + %o1]	!	addr[len] = 0;
3534	retl
3535	nop
3536
3537Lstd:
3538	/*
3539	 * There are at least 15 bytes to zero.
3540	 * We may have to zero some initial stuff to align
3541	 * the address.
3542	 */
3543	bz,a	1f			! if (addr & 1) {
3544	btst	2, %o0
3545	stb	%g0, [%o0]		!	*addr = 0;
3546	inc	%o0			!	addr++;
3547	dec	%o1			!	len--;
3548	btst	2, %o0			! }
35491:
3550	bz,a	1f			! if (addr & 2) {
3551	btst	4, %o0
3552	sth	%g0, [%o0]		!	*(short *)addr = 0;
3553	inc	2, %o0			!	addr += 2;
3554	dec	2, %o1			!	len -= 2;
3555	btst	4, %o0			! }
35561:
3557	bz	1f			! if (addr & 4) {
3558	dec	8, %o1
3559	st	%g0, [%o0]		!	*(int *)addr = 0;
3560	inc	4, %o0			!	addr += 4;
3561	dec	4, %o1			!	len -= 4;
3562					! }
3563	/*
3564	 * Address is double word aligned; len is 8 less than
3565	 * the number of bytes remaining (i.e., len is 0 if
3566	 * the remaining count is 8, 1 if it is 9, etc.).
3567	 */
35681:
3569	std	%g0, [%o0]		! do {
35702:					!	*(quad *)addr = 0;
3571	inc	8, %o0			!	addr += 8;
3572	deccc	8, %o1			! } while ((len -= 8) >= 0);
3573	bge,a	2b
3574	std	%g0, [%o0]
3575
3576	/*
3577	 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
3578	 * -6 => two bytes, etc.  Mop up this remainder, if any.
3579	 */
3580	btst	4, %o1
3581	bz	1f			! if (len & 4) {
3582	btst	2, %o1
3583	st	%g0, [%o0]		!	*(int *)addr = 0;
3584	inc	4, %o0			!	addr += 4;
35851:
3586	bz	1f			! if (len & 2) {
3587	btst	1, %o1
3588	sth	%g0, [%o0]		!	*(short *)addr = 0;
3589	inc	2, %o0			!	addr += 2;
35901:
3591	bnz,a	1f			! if (len & 1)
3592	stb	%g0, [%o0]		!	*addr = 0;
35931:
3594	retl
3595	nop
3596
3597/*
3598 * kernel bcopy/memcpy
3599 * Assumes regions do not overlap; has no useful return value.
3600 *
3601 * Must not use %g7 (see copyin/copyout above).
3602 */
3603
3604#define	BCOPY_SMALL	32	/* if < 32, copy by bytes */
3605
3606ENTRY(memcpy)
3607	/*
3608	 * Swap args for bcopy.  Gcc generates calls to memcpy for
3609	 * structure assignments.
3610	 */
3611	mov	%o0, %o3
3612	mov	%o1, %o0
3613	mov	%o3, %o1
3614ENTRY(bcopy)
3615	cmp	%o2, BCOPY_SMALL
3616Lbcopy_start:
3617	bge,a	Lbcopy_fancy	! if >= this many, go be fancy.
3618	btst	7, %o0		! (part of being fancy)
3619
3620	/*
3621	 * Not much to copy, just do it a byte at a time.
3622	 */
3623	deccc	%o2		! while (--len >= 0)
3624	bl	1f
3625	EMPTY
36260:
3627	inc	%o0
3628	ldsb	[%o0 - 1], %o4	!	(++dst)[-1] = *src++;
3629	stb	%o4, [%o1]
3630	deccc	%o2
3631	bge	0b
3632	inc	%o1
36331:
3634	retl
3635	nop
3636	/* NOTREACHED */
3637
3638	/*
3639	 * Plenty of data to copy, so try to do it optimally.
3640	 */
3641Lbcopy_fancy:
3642	! check for common case first: everything lines up.
3643!	btst	7, %o0		! done already
3644	bne	1f
3645	EMPTY
3646	btst	7, %o1
3647	be,a	Lbcopy_doubles
3648	dec	8, %o2		! if all lined up, len -= 8, goto bcopy_doubes
3649
3650	! If the low bits match, we can make these line up.
36511:
3652	xor	%o0, %o1, %o3	! t = src ^ dst;
3653	btst	1, %o3		! if (t & 1) {
3654	be,a	1f
3655	btst	1, %o0		! [delay slot: if (src & 1)]
3656
3657	! low bits do not match, must copy by bytes.
36580:
3659	ldsb	[%o0], %o4	!	do {
3660	inc	%o0		!		(++dst)[-1] = *src++;
3661	inc	%o1
3662	deccc	%o2
3663	bnz	0b		!	} while (--len != 0);
3664	stb	%o4, [%o1 - 1]
3665	retl
3666	nop
3667	/* NOTREACHED */
3668
3669	! lowest bit matches, so we can copy by words, if nothing else
36701:
3671	be,a	1f		! if (src & 1) {
3672	btst	2, %o3		! [delay slot: if (t & 2)]
3673
3674	! although low bits match, both are 1: must copy 1 byte to align
3675	ldsb	[%o0], %o4	!	*dst++ = *src++;
3676	stb	%o4, [%o1]
3677	inc	%o0
3678	inc	%o1
3679	dec	%o2		!	len--;
3680	btst	2, %o3		! } [if (t & 2)]
36811:
3682	be,a	1f		! if (t & 2) {
3683	btst	2, %o0		! [delay slot: if (src & 2)]
3684	dec	2, %o2		!	len -= 2;
36850:
3686	ldsh	[%o0], %o4	!	do {
3687	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
3688	inc	2, %o0		!		dst += 2, src += 2;
3689	deccc	2, %o2		!	} while ((len -= 2) >= 0);
3690	bge	0b
3691	inc	2, %o1
3692	b	Lbcopy_mopb	!	goto mop_up_byte;
3693	btst	1, %o2		! } [delay slot: if (len & 1)]
3694	/* NOTREACHED */
3695
3696	! low two bits match, so we can copy by longwords
36971:
3698	be,a	1f		! if (src & 2) {
3699	btst	4, %o3		! [delay slot: if (t & 4)]
3700
3701	! although low 2 bits match, they are 10: must copy one short to align
3702	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
3703	sth	%o4, [%o1]
3704	inc	2, %o0		!	dst += 2;
3705	inc	2, %o1		!	src += 2;
3706	dec	2, %o2		!	len -= 2;
3707	btst	4, %o3		! } [if (t & 4)]
37081:
3709	be,a	1f		! if (t & 4) {
3710	btst	4, %o0		! [delay slot: if (src & 4)]
3711	dec	4, %o2		!	len -= 4;
37120:
3713	ld	[%o0], %o4	!	do {
3714	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
3715	inc	4, %o0		!		dst += 4, src += 4;
3716	deccc	4, %o2		!	} while ((len -= 4) >= 0);
3717	bge	0b
3718	inc	4, %o1
3719	b	Lbcopy_mopw	!	goto mop_up_word_and_byte;
3720	btst	2, %o2		! } [delay slot: if (len & 2)]
3721	/* NOTREACHED */
3722
3723	! low three bits match, so we can copy by doublewords
37241:
3725	be	1f		! if (src & 4) {
3726	dec	8, %o2		! [delay slot: len -= 8]
3727	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
3728	st	%o4, [%o1]
3729	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
3730	inc	4, %o1
3731	dec	4, %o2		! }
37321:
3733Lbcopy_doubles:
3734	ldd	[%o0], %o4	! do {
3735	std	%o4, [%o1]	!	*(double *)dst = *(double *)src;
3736	inc	8, %o0		!	dst += 8, src += 8;
3737	deccc	8, %o2		! } while ((len -= 8) >= 0);
3738	bge	Lbcopy_doubles
3739	inc	8, %o1
3740
3741	! check for a usual case again (save work)
3742	btst	7, %o2		! if ((len & 7) == 0)
3743	be	Lbcopy_done	!	goto bcopy_done;
3744
3745	btst	4, %o2		! if ((len & 4)) == 0)
3746	be,a	Lbcopy_mopw	!	goto mop_up_word_and_byte;
3747	btst	2, %o2		! [delay slot: if (len & 2)]
3748	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
3749	st	%o4, [%o1]
3750	inc	4, %o0		!	dst += 4;
3751	inc	4, %o1		!	src += 4;
3752	btst	2, %o2		! } [if (len & 2)]
3753
37541:
3755	! mop up trailing word (if present) and byte (if present).
3756Lbcopy_mopw:
3757	be	Lbcopy_mopb	! no word, go mop up byte
3758	btst	1, %o2		! [delay slot: if (len & 1)]
3759	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
3760	be	Lbcopy_done	! if ((len & 1) == 0) goto done;
3761	sth	%o4, [%o1]
3762	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
3763	retl
3764	stb	%o4, [%o1 + 2]
3765	/* NOTREACHED */
3766
3767	! mop up trailing byte (if present).
3768Lbcopy_mopb:
3769	bne,a	1f
3770	ldsb	[%o0], %o4
3771
3772Lbcopy_done:
3773	retl
3774	nop
3775
37761:
3777	retl
3778	stb	%o4,[%o1]
3779/*
3780 * ovbcopy(src, dst, len): like bcopy, but regions may overlap.
3781 */
3782ENTRY(ovbcopy)
3783	cmp	%o0, %o1	! src < dst?
3784	bgeu	Lbcopy_start	! no, go copy forwards as via bcopy
3785	cmp	%o2, BCOPY_SMALL! (check length for doublecopy first)
3786
3787	/*
3788	 * Since src comes before dst, and the regions might overlap,
3789	 * we have to do the copy starting at the end and working backwards.
3790	 */
3791	add	%o2, %o0, %o0	! src += len
3792	add	%o2, %o1, %o1	! dst += len
3793	bge,a	Lback_fancy	! if len >= BCOPY_SMALL, go be fancy
3794	btst	3, %o0
3795
3796	/*
3797	 * Not much to copy, just do it a byte at a time.
3798	 */
3799	deccc	%o2		! while (--len >= 0)
3800	bl	1f
3801	EMPTY
38020:
3803	dec	%o0		!	*--dst = *--src;
3804	ldsb	[%o0], %o4
3805	dec	%o1
3806	deccc	%o2
3807	bge	0b
3808	stb	%o4, [%o1]
38091:
3810	retl
3811	nop
3812
3813	/*
3814	 * Plenty to copy, try to be optimal.
3815	 * We only bother with word/halfword/byte copies here.
3816	 */
3817Lback_fancy:
3818!	btst	3, %o0		! done already
3819	bnz	1f		! if ((src & 3) == 0 &&
3820	btst	3, %o1		!     (dst & 3) == 0)
3821	bz,a	Lback_words	!	goto words;
3822	dec	4, %o2		! (done early for word copy)
3823
38241:
3825	/*
3826	 * See if the low bits match.
3827	 */
3828	xor	%o0, %o1, %o3	! t = src ^ dst;
3829	btst	1, %o3
3830	bz,a	3f		! if (t & 1) == 0, can do better
3831	btst	1, %o0
3832
3833	/*
3834	 * Nope; gotta do byte copy.
3835	 */
38362:
3837	dec	%o0		! do {
3838	ldsb	[%o0], %o4	!	*--dst = *--src;
3839	dec	%o1
3840	deccc	%o2		! } while (--len != 0);
3841	bnz	2b
3842	stb	%o4, [%o1]
3843	retl
3844	nop
3845
38463:
3847	/*
3848	 * Can do halfword or word copy, but might have to copy 1 byte first.
3849	 */
3850!	btst	1, %o0		! done earlier
3851	bz,a	4f		! if (src & 1) {	/* copy 1 byte */
3852	btst	2, %o3		! (done early)
3853	dec	%o0		!	*--dst = *--src;
3854	ldsb	[%o0], %o4
3855	dec	%o1
3856	stb	%o4, [%o1]
3857	dec	%o2		!	len--;
3858	btst	2, %o3		! }
3859
38604:
3861	/*
3862	 * See if we can do a word copy ((t&2) == 0).
3863	 */
3864!	btst	2, %o3		! done earlier
3865	bz,a	6f		! if (t & 2) == 0, can do word copy
3866	btst	2, %o0		! (src&2, done early)
3867
3868	/*
3869	 * Gotta do halfword copy.
3870	 */
3871	dec	2, %o2		! len -= 2;
38725:
3873	dec	2, %o0		! do {
3874	ldsh	[%o0], %o4	!	src -= 2;
3875	dec	2, %o1		!	dst -= 2;
3876	deccc	2, %o0		!	*(short *)dst = *(short *)src;
3877	bge	5b		! } while ((len -= 2) >= 0);
3878	sth	%o4, [%o1]
3879	b	Lback_mopb	! goto mop_up_byte;
3880	btst	1, %o2		! (len&1, done early)
3881
38826:
3883	/*
3884	 * We can do word copies, but we might have to copy
3885	 * one halfword first.
3886	 */
3887!	btst	2, %o0		! done already
3888	bz	7f		! if (src & 2) {
3889	dec	4, %o2		! (len -= 4, done early)
3890	dec	2, %o0		!	src -= 2, dst -= 2;
3891	ldsh	[%o0], %o4	!	*(short *)dst = *(short *)src;
3892	dec	2, %o1
3893	sth	%o4, [%o1]
3894	dec	2, %o2		!	len -= 2;
3895				! }
3896
38977:
3898Lback_words:
3899	/*
3900	 * Do word copies (backwards), then mop up trailing halfword
3901	 * and byte if any.
3902	 */
3903!	dec	4, %o2		! len -= 4, done already
39040:				! do {
3905	dec	4, %o0		!	src -= 4;
3906	dec	4, %o1		!	src -= 4;
3907	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
3908	deccc	4, %o2		! } while ((len -= 4) >= 0);
3909	bge	0b
3910	st	%o4, [%o1]
3911
3912	/*
3913	 * Check for trailing shortword.
3914	 */
3915	btst	2, %o2		! if (len & 2) {
3916	bz,a	1f
3917	btst	1, %o2		! (len&1, done early)
3918	dec	2, %o0		!	src -= 2, dst -= 2;
3919	ldsh	[%o0], %o4	!	*(short *)dst = *(short *)src;
3920	dec	2, %o1
3921	sth	%o4, [%o1]	! }
3922	btst	1, %o2
3923
3924	/*
3925	 * Check for trailing byte.
3926	 */
39271:
3928Lback_mopb:
3929!	btst	1, %o2		! (done already)
3930	bnz,a	1f		! if (len & 1) {
3931	ldsb	[%o0 - 1], %o4	!	b = src[-1];
3932	retl
3933	nop
39341:
3935	retl			!	dst[-1] = b;
3936	stb	%o4, [%o1 - 1]	! }
3937
3938
3939/*
3940 * savefpstate(f) struct fpstate *f;
3941 *
3942 * Store the current FPU state.  The first `st %fsr' may cause a trap;
3943 * our trap handler knows how to recover (by `returning' to savefpcont).
3944 */
3945ENTRY(savefpstate)
3946	rd	%psr, %o1		! enable FP before we begin
3947	set	PSR_EF, %o2
3948	or	%o1, %o2, %o1
3949	wr	%o1, 0, %psr
3950	/* do some setup work while we wait for PSR_EF to turn on */
3951	set	FSR_QNE, %o5		! QNE = 0x2000, too big for immediate
3952	clr	%o3			! qsize = 0;
3953	nop				! (still waiting for PSR_EF)
3954special_fp_store:
3955	st	%fsr, [%o0 + FS_FSR]	! f->fs_fsr = getfsr();
3956	/*
3957	 * Even if the preceding instruction did not trap, the queue
3958	 * is not necessarily empty: this state save might be happening
3959	 * because user code tried to store %fsr and took the FPU
3960	 * from `exception pending' mode to `exception' mode.
3961	 * So we still have to check the blasted QNE bit.
3962	 * With any luck it will usually not be set.
3963	 */
3964	ld	[%o0 + FS_FSR], %o4	! if (f->fs_fsr & QNE)
3965	btst	%o5, %o4
3966	bnz	Lfp_storeq		!	goto storeq;
3967	 std	%f0, [%o0 + FS_REGS + (4*0)]	! f->fs_f0 = etc;
3968Lfp_finish:
3969	st	%o3, [%o0 + FS_QSIZE]	! f->fs_qsize = qsize;
3970	std	%f2, [%o0 + FS_REGS + (4*2)]
3971	std	%f4, [%o0 + FS_REGS + (4*4)]
3972	std	%f6, [%o0 + FS_REGS + (4*6)]
3973	std	%f8, [%o0 + FS_REGS + (4*8)]
3974	std	%f10, [%o0 + FS_REGS + (4*10)]
3975	std	%f12, [%o0 + FS_REGS + (4*12)]
3976	std	%f14, [%o0 + FS_REGS + (4*14)]
3977	std	%f16, [%o0 + FS_REGS + (4*16)]
3978	std	%f18, [%o0 + FS_REGS + (4*18)]
3979	std	%f20, [%o0 + FS_REGS + (4*20)]
3980	std	%f22, [%o0 + FS_REGS + (4*22)]
3981	std	%f24, [%o0 + FS_REGS + (4*24)]
3982	std	%f26, [%o0 + FS_REGS + (4*26)]
3983	std	%f28, [%o0 + FS_REGS + (4*28)]
3984	retl
3985	 std	%f30, [%o0 + FS_REGS + (4*30)]
3986
3987/*
3988 * Store the (now known nonempty) FP queue.
3989 * We have to reread the fsr each time in order to get the new QNE bit.
3990 */
3991Lfp_storeq:
3992	add	%o0, FS_QUEUE, %o1	! q = &f->fs_queue[0];
39931:
3994	std	%fq, [%o1 + %o3]	! q[qsize++] = fsr_qfront();
3995	st	%fsr, [%o0 + FS_FSR]	! reread fsr
3996	ld	[%o0 + FS_FSR], %o4	! if fsr & QNE, loop
3997	btst	%o5, %o4
3998	bnz	1b
3999	 inc	8, %o3
4000	b	Lfp_finish		! set qsize and finish storing fregs
4001	 srl	%o3, 3, %o3		! (but first fix qsize)
4002
4003/*
4004 * The fsr store trapped.  Do it again; this time it will not trap.
4005 * We could just have the trap handler return to the `st %fsr', but
4006 * if for some reason it *does* trap, that would lock us into a tight
4007 * loop.  This way we panic instead.  Whoopee.
4008 */
4009savefpcont:
4010	b	special_fp_store + 4	! continue
4011	 st	%fsr, [%o0 + FS_FSR]	! but first finish the %fsr store
4012
4013/*
4014 * Load FPU state.
4015 */
4016ENTRY(loadfpstate)
4017	rd	%psr, %o1		! enable FP before we begin
4018	set	PSR_EF, %o2
4019	or	%o1, %o2, %o1
4020	wr	%o1, 0, %psr
4021	nop; nop; nop			! paranoia
4022	ldd	[%o0 + FS_REGS + (4*0)], %f0
4023	ldd	[%o0 + FS_REGS + (4*2)], %f2
4024	ldd	[%o0 + FS_REGS + (4*4)], %f4
4025	ldd	[%o0 + FS_REGS + (4*6)], %f6
4026	ldd	[%o0 + FS_REGS + (4*8)], %f8
4027	ldd	[%o0 + FS_REGS + (4*10)], %f10
4028	ldd	[%o0 + FS_REGS + (4*12)], %f12
4029	ldd	[%o0 + FS_REGS + (4*14)], %f14
4030	ldd	[%o0 + FS_REGS + (4*16)], %f16
4031	ldd	[%o0 + FS_REGS + (4*18)], %f18
4032	ldd	[%o0 + FS_REGS + (4*20)], %f20
4033	ldd	[%o0 + FS_REGS + (4*22)], %f22
4034	ldd	[%o0 + FS_REGS + (4*24)], %f24
4035	ldd	[%o0 + FS_REGS + (4*26)], %f26
4036	ldd	[%o0 + FS_REGS + (4*28)], %f28
4037	ldd	[%o0 + FS_REGS + (4*30)], %f30
4038	retl
4039	 ld	[%o0 + FS_FSR], %fsr	! setfsr(f->fs_fsr);
4040
4041/*
4042 * ienab_bis(bis) int bis;
4043 * ienab_bic(bic) int bic;
4044 *
4045 * Set and clear bits in the interrupt register.
4046 * Since there are no read-modify-write instructions for this,
4047 * and one of the interrupts is nonmaskable, we must disable traps.
4048 *
4049 * NB: ___main is defined here for gcc-2 idiocy.  Ignore it.
4050 */
4051ENTRY(ienab_bis)
4052	! %o0 = bits to set
4053	rd	%psr, %o2
4054	wr	%o2, PSR_ET, %psr	! disable traps
4055	nop; nop			! 3-instr delay until ET turns off
4056	sethi	%hi(IE_reg_addr), %o3
4057	ldub	[%o3 + %lo(IE_reg_addr)], %o4
4058	or	%o4, %o0, %o4		! *IE_reg_addr |= bis;
4059	stb	%o4, [%o3 + %lo(IE_reg_addr)]
4060	wr	%o2, 0, %psr		! reenable traps
4061	nop
4062	.globl	___main
4063___main:
4064	retl
4065	 nop
4066
4067ENTRY(ienab_bic)
4068	! %o0 = bits to clear
4069	rd	%psr, %o2
4070	wr	%o2, PSR_ET, %psr	! disable traps
4071	nop; nop
4072	sethi	%hi(IE_reg_addr), %o3
4073	ldub	[%o3 + %lo(IE_reg_addr)], %o4
4074	andn	%o4, %o0, %o4		! *IE_reg_addr &=~ bic;
4075	stb	%o4, [%o3 + %lo(IE_reg_addr)]
4076	wr	%o2, 0, %psr		! reenable traps
4077	nop
4078	retl
4079	 nop
4080
4081/*
4082 * ffs(), using table lookup.
4083 * The process switch code shares the table, so we just put the
4084 * whole thing here.
4085 */
4086ffstab:
4087	.byte	-24,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1 /* 00-0f */
4088	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 10-1f */
4089	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 20-2f */
4090	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 30-3f */
4091	.byte	7,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 40-4f */
4092	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 50-5f */
4093	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 60-6f */
4094	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 70-7f */
4095	.byte	8,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 80-8f */
4096	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 10-9f */
4097	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* a0-af */
4098	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* b0-bf */
4099	.byte	7,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* c0-cf */
4100	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* d0-df */
4101	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* e0-ef */
4102	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* f0-ff */
4103
4104/*
4105 * We use a table lookup on each byte.
4106 *
4107 * In each section below, %o1 is the current byte (0, 1, 2, or 3).
4108 * The last byte is handled specially: for the first three,
4109 * if that byte is nonzero, we return the table value
4110 * (plus 0, 8, or 16 for the byte number), but for the last
4111 * one, we just return the table value plus 24.  This means
4112 * that ffstab[0] must be -24 so that ffs(0) will return 0.
4113 */
4114ENTRY(ffs)
4115	set	ffstab, %o2
4116	andcc	%o0, 0xff, %o1	! get low byte
4117	bz,a	1f		! try again if 0
4118	srl	%o0, 8, %o0	! delay slot, get ready for next byte
4119
4120	retl			! return ffstab[%o1]
4121	ldsb	[%o2 + %o1], %o0
4122
41231:
4124	andcc	%o0, 0xff, %o1	! byte 1 like byte 0...
4125	bz,a	2f
4126	srl	%o0, 8, %o0	! (use delay to prepare for byte 2)
4127
4128	ldsb	[%o2 + %o1], %o0
4129	retl			! return ffstab[%o1] + 8
4130	add	%o0, 8, %o0
4131
41322:
4133	andcc	%o0, 0xff, %o1
4134	bz,a	3f
4135	srl	%o0, 8, %o0	! (prepare for byte 3)
4136
4137	ldsb	[%o2 + %o1], %o0
4138	retl			! return ffstab[%o1] + 16
4139	add	%o0, 16, %o0
4140
41413:				! just return ffstab[%o0] + 24
4142	ldsb	[%o2 + %o0], %o0
4143	retl
4144	add	%o0, 24, %o0
4145
4146/*
4147 * Here is a very good random number generator.  This implementation is
4148 * based on ``Two Fast Implementations of the "Minimal Standard" Random
4149 * Number Generator", David G. Carta, Communications of the ACM, Jan 1990,
4150 * Vol 33 No 1.
4151 */
4152	.data
4153randseed:
4154	.word	1
4155	.text
4156ENTRY(random)
4157	sethi	%hi(16807), %o1
4158	wr	%o1, %lo(16807), %y
4159	 sethi	%hi(randseed), %g1
4160	 ld	[%g1 + %lo(randseed)], %o0
4161	 andcc	%g0, 0, %o2
4162	mulscc  %o2, %o0, %o2
4163	mulscc  %o2, %o0, %o2
4164	mulscc  %o2, %o0, %o2
4165	mulscc  %o2, %o0, %o2
4166	mulscc  %o2, %o0, %o2
4167	mulscc  %o2, %o0, %o2
4168	mulscc  %o2, %o0, %o2
4169	mulscc  %o2, %o0, %o2
4170	mulscc  %o2, %o0, %o2
4171	mulscc  %o2, %o0, %o2
4172	mulscc  %o2, %o0, %o2
4173	mulscc  %o2, %o0, %o2
4174	mulscc  %o2, %o0, %o2
4175	mulscc  %o2, %o0, %o2
4176	mulscc  %o2, %o0, %o2
4177	mulscc  %o2, %g0, %o2
4178	rd	%y, %o3
4179	srl	%o2, 16, %o1
4180	set	0xffff, %o4
4181	and	%o4, %o2, %o0
4182	sll	%o0, 15, %o0
4183	srl	%o3, 17, %o3
4184	or	%o3, %o0, %o0
4185	addcc	%o0, %o1, %o0
4186	bl	1f
4187	 sethi	%hi(0x7fffffff), %o1
4188	retl
4189	 st	%o0, [%g1 + %lo(randseed)]
41901:
4191	or	%o1, %lo(0x7fffffff), %o1
4192	add	%o0, 1, %o0
4193	and	%o1, %o0, %o0
4194	retl
4195	 st	%o0, [%g1 + %lo(randseed)]
4196
4197/*
4198 * void microtime(struct timeval *tv)
4199 *
4200 * LBL's sparc bsd 'microtime': We don't need to spl (so this routine
4201 * can be a leaf routine) and we don't keep a 'last' timeval (there
4202 * can't be two calls to this routine in a microsecond).  This seems to
4203 * be about 20 times faster than the Sun code on an SS-2. - vj
4204 *
4205 * Read time values from slowest-changing to fastest-changing,
4206 * then re-read out to slowest.  If the values read before
4207 * the innermost match those read after, the innermost value
4208 * is consistent with the outer values.  If not, it may not
4209 * be and we must retry.  Typically this loop runs only once;
4210 * occasionally it runs twice, and only rarely does it run longer.
4211 */
4212ENTRY(microtime)
4213	sethi	%hi(_time), %g2
4214	sethi	%hi(TIMERREG_VA), %g3
42151:
4216	ldd	[%g2+%lo(_time)], %o2		! time.tv_sec & time.tv_usec
4217	ld	[%g3+%lo(TIMERREG_VA)], %o4	! usec counter
4218	ldd	[%g2+%lo(_time)], %g4		! see if time values changed
4219	cmp	%g4, %o2
4220	bne	1b				! if time.tv_sec changed
4221	 cmp	%g5, %o3
4222	bne	1b				! if time.tv_usec changed
4223	 tst	%o4
4224
4225	bpos	2f				! reached limit?
4226	 srl	%o4, TMR_SHIFT, %o4		! convert counter to usec
4227	sethi	%hi(_tick), %g4			! bump usec by 1 tick
4228	ld	[%g4+%lo(_tick)], %o1
4229	set	TMR_MASK, %g5
4230	add	%o1, %o3, %o3
4231	and	%o4, %g5, %o4
42322:
4233	add	%o4, %o3, %o3
4234	set	1000000, %g5			! normalize usec value
4235	cmp	%o3, %g5
4236	bl,a	3f
4237	 st	%o2, [%o0]			! (should be able to std here)
4238	add	%o2, 1, %o2			! overflow
4239	sub	%o3, %g5, %o3
4240	st	%o2, [%o0]			! (should be able to std here)
42413:
4242	retl
4243	 st	%o3, [%o0+4]
4244
4245/*
4246 * This procedure exists to make stdarg functions work correctly.
4247 * We write the caller's `in' registers into his caller's `arg dump'
4248 * area.  That arg-dump area immediately precedes the argument extension
4249 * area, resulting in a single contiguous block of memory.
4250 *
4251 * This is really the wrong way to do it: the arguments should be written
4252 * to storage local to the stdarg function, and the stdarg `pick up
4253 * the next argument' code should pick it up from whichever region is
4254 * `active' at that point.
4255 */
4256	.globl	___builtin_saveregs
4257___builtin_saveregs:
4258	! not profiled -- this should be done inline anyway
4259	! bleah! the arg dump area is unaligned!  cannot std w/o reg/reg moves
4260	st	%i0, [%fp + 0x44]	! fr->fr_argd[0]
4261	st	%i1, [%fp + 0x48]	! fr->fr_argd[1]
4262	st	%i2, [%fp + 0x4c]	! fr->fr_argd[2]
4263	st	%i3, [%fp + 0x50]	! fr->fr_argd[3]
4264	st	%i4, [%fp + 0x54]	! fr->fr_argd[4]
4265	retl
4266	 st	%i5, [%fp + 0x58]	! fr->fr_argd[5]
4267
4268#ifdef KGDB
4269/*
4270 * Write all windows (user or otherwise), except the current one.
4271 *
4272 * THIS COULD BE DONE IN USER CODE
4273 */
4274ENTRY(write_all_windows)
4275	/*
4276	 * g2 = g1 = nwindows - 1;
4277	 * while (--g1 > 0) save();
4278	 * while (--g2 > 0) restore();
4279	 */
4280	sethi	%hi(_nwindows), %g1
4281	ld	[%g1 + %lo(_nwindows)], %g1
4282	dec	%g1
4283	mov	%g1, %g2
4284
42851:	deccc	%g1
4286	bg,a	1b
4287	 save	%sp, -64, %sp
4288
42892:	deccc	%g2
4290	bg,a	2b
4291	 restore
4292
4293	retl
4294	nop
4295#endif /* KGDB */
4296
4297	.data
4298	.globl	_cold
4299_cold:
4300	.word	1		! cold start flag
4301
4302	.globl	_proc0paddr
4303_proc0paddr:
4304	.word	_u0		! KVA of proc0 uarea
4305
4306/* interrupt counters	XXX THESE BELONG ELSEWHERE (if anywhere) */
4307	.globl	_intrcnt, _eintrcnt, _intrnames, _eintrnames
4308_intrnames:
4309	.asciz	"spur"
4310	.asciz	"lev1"
4311	.asciz	"lev2"
4312	.asciz	"lev3"
4313	.asciz	"lev4"
4314	.asciz	"lev5"
4315	.asciz	"lev6"
4316	.asciz	"lev7"
4317	.asciz  "lev8"
4318	.asciz	"lev9"
4319	.asciz	"clock"
4320	.asciz	"lev11"
4321	.asciz	"lev12"
4322	.asciz	"lev13"
4323	.asciz	"prof"
4324_eintrnames:
4325	ALIGN
4326_intrcnt:
4327	.skip	4*15
4328_eintrcnt:
4329
4330	.comm	_nwindows, 4
4331	.comm	_promvec, 4
4332	.comm	_curproc, 4
4333	.comm	_qs, 32 * 8
4334	.comm	_whichqs, 4
4335