xref: /original-bsd/sys/sparc/sparc/locore.s (revision 3705696b)
1/*
2 * Copyright (c) 1992, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This software was developed by the Computer Systems Engineering group
6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7 * contributed to Berkeley.
8 *
9 * All advertising materials mentioning features or use of this software
10 * must display the following acknowledgement:
11 *	This product includes software developed by the University of
12 *	California, Lawrence Berkeley Laboratory.
13 *
14 * %sccs.include.redist.c%
15 *
16 *	@(#)locore.s	8.1 (Berkeley) 06/11/93
17 *
18 * from: $Header: locore.s,v 1.51 93/04/21 06:19:37 torek Exp $
19 */
20
21#define	LOCORE
22#include "assym.s"
23#include <sparc/sparc/intreg.h>
24#include <sparc/sparc/timerreg.h>
25#ifdef notyet
26#include <sparc/sparc/vaddrs.h>
27#include <sparc/dev/zsreg.h>
28#endif
29#include <machine/ctlreg.h>
30#include <machine/psl.h>
31#include <machine/signal.h>
32#include <machine/trap.h>
33
34/*
35 * GNU assembler does not understand `.empty' directive; Sun assembler
36 * gripes about labels without it.  To allow cross-compilation using
37 * the Sun assembler, and because .empty directives are useful documentation,
38 * we use this trick.
39 */
40#ifdef SUN_AS
41#define	EMPTY	.empty
42#else
43#define	EMPTY	/* .empty */
44#endif
45
46/* use as needed to align things on longword boundaries */
47#define	ALIGN	.align 4
48
49/*
50 * CCFSZ (C Compiler Frame SiZe) is the size of a stack frame required if
51 * a function is to call C code.  It should be just 64, but Sun defined
52 * their frame with space to hold arguments 0 through 5 (plus some junk),
53 * and varargs routines (such as printf) demand this, and gcc uses this
54 * area at times anyway.
55 */
56#define	CCFSZ	96
57
58/*
59 * A handy macro for maintaining instrumentation counters.
60 * Note that this clobbers %o0 and %o1.  Normal usage is
61 * something like:
62 *	foointr:
63 *		TRAP_SETUP(...)		! makes %o registers safe
64 *		INCR(_cnt+V_FOO)	! count a foo
65 */
66#define INCR(what) \
67	sethi	%hi(what), %o0; \
68	ld	[%o0 + %lo(what)], %o1; \
69	inc	%o1; \
70	st	%o1, [%o0 + %lo(what)]
71
72/*
73 * Another handy macro: load one register window, given `base' address.
74 * This can be either a simple register (e.g., %sp) or include an initial
75 * offset (e.g., %g6 + PCB_RW).
76 */
77#define	LOADWIN(addr) \
78	ldd	[addr], %l0; \
79	ldd	[addr + 8], %l2; \
80	ldd	[addr + 16], %l4; \
81	ldd	[addr + 24], %l6; \
82	ldd	[addr + 32], %i0; \
83	ldd	[addr + 40], %i2; \
84	ldd	[addr + 48], %i4; \
85	ldd	[addr + 56], %i6
86
87/*
88 * To return from trap we need the two-instruction sequence
89 * `jmp %l1; rett %l2', which is defined here for convenience.
90 */
91#define	RETT	jmp %l1; rett %l2
92
93	.data
94/*
95 * The interrupt stack.
96 *
97 * This is the very first thing in the data segment, and therefore has
98 * the lowest kernel stack address.  We count on this in the interrupt
99 * trap-frame setup code, since we may need to switch from the kernel
100 * stack to the interrupt stack (iff we are not already on the interrupt
101 * stack).  One sethi+cmp is all we need since this is so carefully
102 * arranged.
103 */
104	.globl	_intstack
105	.globl	_eintstack
106_intstack:
107	.skip	4 * NBPG		! 16k = 128 128-byte stack frames
108_eintstack:
109
110/*
111 * When a process exits and its u. area goes away, we set cpcb to point
112 * to this `u.', leaving us with something to use for an interrupt stack,
113 * and letting all the register save code have a pcb_uw to examine.
114 * This is also carefully arranged (to come just before u0, so that
115 * process 0's kernel stack can quietly overrun into it during bootup, if
116 * we feel like doing that).
117 */
118	.globl	_idle_u
119_idle_u:
120	.skip	UPAGES * NBPG
121
122/*
123 * Process 0's u.
124 *
125 * This must be aligned on an 8 byte boundary.
126 */
127	.globl	_u0
128_u0:	.skip	UPAGES * NBPG
129estack0:
130
131#ifdef KGDB
132/*
133 * Another item that must be aligned, easiest to put it here.
134 */
135KGDB_STACK_SIZE = 2048
136	.globl	_kgdb_stack
137_kgdb_stack:
138	.skip	KGDB_STACK_SIZE		! hope this is enough
139#endif
140
141/*
142 * _cpcb points to the current pcb (and hence u. area).
143 * Initially this is the special one.
144 */
145	.globl	_cpcb
146_cpcb:	.word	_u0
147
148	.text
149
150/*
151 * The first thing in the real text segment is the trap vector table,
152 * which must be aligned on a 4096 byte boundary.  The text segment
153 * starts beyond page 0 of KERNBASE so that there is a red zone
154 * between user and kernel space.  Since the boot ROM loads us at
155 * 0x4000, it is far easier to start at KERNBASE+0x4000 than to
156 * buck the trend.  This is four pages in; we can stuff something
157 * into the three pages left beneath us later ... like, oh, say, the
158 * message buffer (1 page).
159 */
160	.globl	_msgbuf
161msgbufsize = NBPG			! 1 page for msg buffer
162_msgbuf	= KERNBASE + NBPG
163
164/*
165 * The remaining two physical pages are currently unused.  We need to
166 * map the interrupt enable register very early on in the boot process,
167 * so that we can handle NMIs (parity errors) halfway sensibly during
168 * boot.  We use virtual address f8002000 (`page 2') for this, wasting
169 * 4096 bytes of physical memory.
170 */
171IE_reg_addr = _msgbuf + msgbufsize	! this page not used; points to IEreg
172
173/*
174 * Each trap has room for four instructions, of which one perforce must
175 * be a branch.  On entry the hardware has copied pc and npc to %l1 and
176 * %l2 respectively.  We use two more to read the psr into %l0, and to
177 * put the trap type value into %l3 (with a few exceptions below).
178 * We could read the trap type field of %tbr later in the code instead,
179 * but there is no need, and that would require more instructions
180 * (read+mask, vs 1 `mov' here).
181 *
182 * I used to generate these numbers by address arithmetic, but gas's
183 * expression evaluator has about as much sense as your average slug
184 * (oddly enough, the code looks about as slimy too).  Thus, all the
185 * trap numbers are given as arguments to the trap macros.  This means
186 * there is one line per trap.  Sigh.
187 *
188 * Note that only the local registers may be used, since the trap
189 * window is potentially the last window.  Its `in' registers are
190 * the previous window's outs (as usual), but more important, its
191 * `out' registers may be in use as the `topmost' window's `in' registers.
192 * The global registers are of course verboten (well, until we save
193 * them away).
194 *
195 * Hardware interrupt vectors can be `linked'---the linkage is to regular
196 * C code---or rewired to fast in-window handlers.  The latter are good
197 * for unbuffered hardware like the Zilog serial chip and the AMD audio
198 * chip, where many interrupts can be handled trivially with pseudo-DMA or
199 * similar.  Only one `fast' interrupt can be used per level, however, and
200 * direct and `fast' interrupts are incompatible.  Routines in intr.c
201 * handle setting these, with optional paranoia.
202 */
203
204	/* regular vectored traps */
205#define	VTRAP(type, label) \
206	mov (type), %l3; b label; mov %psr, %l0; nop
207
208	/* hardware interrupts (can be linked or made `fast') */
209#define	HARDINT(lev) \
210	mov (lev), %l3; b _sparc_interrupt; mov %psr, %l0; nop
211
212	/* software interrupts (may not be made direct, sorry---but you
213	   should not be using them trivially anyway) */
214#define	SOFTINT(lev, bit) \
215	mov (lev), %l3; mov (bit), %l4; b softintr; mov %psr, %l0
216
217	/* traps that just call trap() */
218#define	TRAP(type)	VTRAP(type, slowtrap)
219
220	/* architecturally undefined traps (cause panic) */
221#define	UTRAP(type)	VTRAP(type, slowtrap)
222
223	/* software undefined traps (may be replaced) */
224#define	STRAP(type)	VTRAP(type, slowtrap)
225
226/* breakpoint acts differently under kgdb */
227#ifdef KGDB
228#define	BPT		VTRAP(T_BREAKPOINT, bpt)
229#define	BPT_KGDB_EXEC	VTRAP(T_KGDB_EXEC, bpt)
230#else
231#define	BPT		TRAP(T_BREAKPOINT)
232#define	BPT_KGDB_EXEC	TRAP(T_KGDB_EXEC)
233#endif
234
235/* special high-speed 1-instruction-shaved-off traps (get nothing in %l3) */
236#ifdef COMPAT_SUNOS
237#define	SUN_SYSCALL	b sun_syscall; mov %psr, %l0; nop; nop
238#else
239#define	SUN_SYSCALL	TRAP(T_SUN_SYSCALL)
240#endif
241#define	SYSCALL		b syscall; mov %psr, %l0; nop; nop
242#define	WINDOW_OF	b window_of; mov %psr, %l0; nop; nop
243#define	WINDOW_UF	b window_uf; mov %psr, %l0; nop; nop
244#ifdef notyet
245#define	ZS_INTERRUPT	b zshard; mov %psr, %l0; nop; nop
246#else
247#define	ZS_INTERRUPT	HARDINT(12)
248#endif
249
250	.globl	start
251	.globl	_trapbase
252start:
253_trapbase:
254/* trap 0 is special since we cannot receive it */
255	b dostart; nop; nop; nop	! 00 = reset (fake)
256	VTRAP(T_TEXTFAULT, memfault)	! 01 = instr. fetch fault
257	TRAP(T_ILLINST)			! 02 = illegal instruction
258	TRAP(T_PRIVINST)		! 03 = privileged instruction
259	TRAP(T_FPDISABLED)		! 04 = fp instr, but EF bit off in psr
260	WINDOW_OF			! 05 = window overflow
261	WINDOW_UF			! 06 = window underflow
262	TRAP(T_ALIGN)			! 07 = address alignment error
263	VTRAP(T_FPE, fp_exception)	! 08 = fp exception
264	VTRAP(T_DATAFAULT, memfault)	! 09 = data fetch fault
265	TRAP(T_TAGOF)			! 0a = tag overflow
266	UTRAP(0x0b)
267	UTRAP(0x0c)
268	UTRAP(0x0d)
269	UTRAP(0x0e)
270	UTRAP(0x0f)
271	UTRAP(0x10)
272	SOFTINT(1, IE_L1)		! 11 = level 1 interrupt
273	HARDINT(2)			! 12 = level 2 interrupt
274	HARDINT(3)			! 13 = level 3 interrupt
275	SOFTINT(4, IE_L4)		! 14 = level 4 interrupt
276	HARDINT(5)			! 15 = level 5 interrupt
277	SOFTINT(6, IE_L6)		! 16 = level 6 interrupt
278	HARDINT(7)			! 17 = level 7 interrupt
279	HARDINT(8)			! 18 = level 8 interrupt
280	HARDINT(9)			! 19 = level 9 interrupt
281	HARDINT(10)			! 1a = level 10 interrupt
282	HARDINT(11)			! 1b = level 11 interrupt
283	ZS_INTERRUPT			! 1c = level 12 (zs) interrupt
284	HARDINT(13)			! 1d = level 13 interrupt
285	HARDINT(14)			! 1e = level 14 interrupt
286	VTRAP(15, nmi)			! 1f = nonmaskable interrupt
287	UTRAP(0x20)
288	UTRAP(0x21)
289	UTRAP(0x22)
290	UTRAP(0x23)
291	UTRAP(0x24)
292	UTRAP(0x25)
293	UTRAP(0x26)
294	UTRAP(0x27)
295	UTRAP(0x28)
296	UTRAP(0x29)
297	UTRAP(0x2a)
298	UTRAP(0x2b)
299	UTRAP(0x2c)
300	UTRAP(0x2d)
301	UTRAP(0x2e)
302	UTRAP(0x2f)
303	UTRAP(0x30)
304	UTRAP(0x31)
305	UTRAP(0x32)
306	UTRAP(0x33)
307	UTRAP(0x34)
308	UTRAP(0x35)
309	TRAP(T_CPDISABLED)	! 36 = coprocessor instr, EC bit off in psr
310	UTRAP(0x37)
311	UTRAP(0x38)
312	UTRAP(0x39)
313	UTRAP(0x3a)
314	UTRAP(0x3b)
315	UTRAP(0x3c)
316	UTRAP(0x3d)
317	UTRAP(0x3e)
318	UTRAP(0x3f)
319	TRAP(T_CPEXCEPTION)	! 40 = coprocessor exception
320	UTRAP(0x41)
321	UTRAP(0x42)
322	UTRAP(0x43)
323	UTRAP(0x44)
324	UTRAP(0x45)
325	UTRAP(0x46)
326	UTRAP(0x47)
327	UTRAP(0x48)
328	UTRAP(0x49)
329	UTRAP(0x4a)
330	UTRAP(0x4b)
331	UTRAP(0x4c)
332	UTRAP(0x4d)
333	UTRAP(0x4e)
334	UTRAP(0x4f)
335	UTRAP(0x50)
336	UTRAP(0x51)
337	UTRAP(0x52)
338	UTRAP(0x53)
339	UTRAP(0x54)
340	UTRAP(0x55)
341	UTRAP(0x56)
342	UTRAP(0x57)
343	UTRAP(0x58)
344	UTRAP(0x59)
345	UTRAP(0x5a)
346	UTRAP(0x5b)
347	UTRAP(0x5c)
348	UTRAP(0x5d)
349	UTRAP(0x5e)
350	UTRAP(0x5f)
351	UTRAP(0x60)
352	UTRAP(0x61)
353	UTRAP(0x62)
354	UTRAP(0x63)
355	UTRAP(0x64)
356	UTRAP(0x65)
357	UTRAP(0x66)
358	UTRAP(0x67)
359	UTRAP(0x68)
360	UTRAP(0x69)
361	UTRAP(0x6a)
362	UTRAP(0x6b)
363	UTRAP(0x6c)
364	UTRAP(0x6d)
365	UTRAP(0x6e)
366	UTRAP(0x6f)
367	UTRAP(0x70)
368	UTRAP(0x71)
369	UTRAP(0x72)
370	UTRAP(0x73)
371	UTRAP(0x74)
372	UTRAP(0x75)
373	UTRAP(0x76)
374	UTRAP(0x77)
375	UTRAP(0x78)
376	UTRAP(0x79)
377	UTRAP(0x7a)
378	UTRAP(0x7b)
379	UTRAP(0x7c)
380	UTRAP(0x7d)
381	UTRAP(0x7e)
382	UTRAP(0x7f)
383	SUN_SYSCALL		! 80 = sun syscall
384	BPT			! 81 = pseudo breakpoint instruction
385	TRAP(T_DIV0)		! 82 = divide by zero
386	TRAP(T_FLUSHWIN)	! 83 = flush windows
387	TRAP(T_CLEANWIN)	! 84 = provide clean windows
388	TRAP(T_RANGECHECK)	! 85 = ???
389	TRAP(T_FIXALIGN)	! 86 = fix up unaligned accesses
390	TRAP(T_INTOF)		! 87 = integer overflow
391	BPT_KGDB_EXEC		! 88 = enter kernel gdb on kernel startup
392	SYSCALL			! 89 = bsd syscall
393	STRAP(0x8a)
394	STRAP(0x8b)
395	STRAP(0x8c)
396	STRAP(0x8d)
397	STRAP(0x8e)
398	STRAP(0x8f)
399	STRAP(0x90)
400	STRAP(0x91)
401	STRAP(0x92)
402	STRAP(0x93)
403	STRAP(0x94)
404	STRAP(0x95)
405	STRAP(0x96)
406	STRAP(0x97)
407	STRAP(0x98)
408	STRAP(0x99)
409	STRAP(0x9a)
410	STRAP(0x9b)
411	STRAP(0x9c)
412	STRAP(0x9d)
413	STRAP(0x9e)
414	STRAP(0x9f)
415	STRAP(0xa0)
416	STRAP(0xa1)
417	STRAP(0xa2)
418	STRAP(0xa3)
419	STRAP(0xa4)
420	STRAP(0xa5)
421	STRAP(0xa6)
422	STRAP(0xa7)
423	STRAP(0xa8)
424	STRAP(0xa9)
425	STRAP(0xaa)
426	STRAP(0xab)
427	STRAP(0xac)
428	STRAP(0xad)
429	STRAP(0xae)
430	STRAP(0xaf)
431	STRAP(0xb0)
432	STRAP(0xb1)
433	STRAP(0xb2)
434	STRAP(0xb3)
435	STRAP(0xb4)
436	STRAP(0xb5)
437	STRAP(0xb6)
438	STRAP(0xb7)
439	STRAP(0xb8)
440	STRAP(0xb9)
441	STRAP(0xba)
442	STRAP(0xbb)
443	STRAP(0xbc)
444	STRAP(0xbd)
445	STRAP(0xbe)
446	STRAP(0xbf)
447	STRAP(0xc0)
448	STRAP(0xc1)
449	STRAP(0xc2)
450	STRAP(0xc3)
451	STRAP(0xc4)
452	STRAP(0xc5)
453	STRAP(0xc6)
454	STRAP(0xc7)
455	STRAP(0xc8)
456	STRAP(0xc9)
457	STRAP(0xca)
458	STRAP(0xcb)
459	STRAP(0xcc)
460	STRAP(0xcd)
461	STRAP(0xce)
462	STRAP(0xcf)
463	STRAP(0xd0)
464	STRAP(0xd1)
465	STRAP(0xd2)
466	STRAP(0xd3)
467	STRAP(0xd4)
468	STRAP(0xd5)
469	STRAP(0xd6)
470	STRAP(0xd7)
471	STRAP(0xd8)
472	STRAP(0xd9)
473	STRAP(0xda)
474	STRAP(0xdb)
475	STRAP(0xdc)
476	STRAP(0xdd)
477	STRAP(0xde)
478	STRAP(0xdf)
479	STRAP(0xe0)
480	STRAP(0xe1)
481	STRAP(0xe2)
482	STRAP(0xe3)
483	STRAP(0xe4)
484	STRAP(0xe5)
485	STRAP(0xe6)
486	STRAP(0xe7)
487	STRAP(0xe8)
488	STRAP(0xe9)
489	STRAP(0xea)
490	STRAP(0xeb)
491	STRAP(0xec)
492	STRAP(0xed)
493	STRAP(0xee)
494	STRAP(0xef)
495	STRAP(0xf0)
496	STRAP(0xf1)
497	STRAP(0xf2)
498	STRAP(0xf3)
499	STRAP(0xf4)
500	STRAP(0xf5)
501	STRAP(0xf6)
502	STRAP(0xf7)
503	STRAP(0xf8)
504	STRAP(0xf9)
505	STRAP(0xfa)
506	STRAP(0xfb)
507	STRAP(0xfc)
508	STRAP(0xfd)
509	STRAP(0xfe)
510	STRAP(0xff)
511
512	/* the message buffer is always mapped */
513_msgbufmapped:
514	.word	1
515
516#ifdef DEBUG
517/*
518 * A hardware red zone is impossible.  We simulate one in software by
519 * keeping a `red zone' pointer; if %sp becomes less than this, we panic.
520 * This is expensive and is only enabled when debugging.
521 */
522#define	REDSIZE	(8*96)		/* some room for bouncing */
523#define	REDSTACK 2048		/* size of `panic: stack overflow' region */
524	.data
525_redzone:
526	.word	_idle_u + REDSIZE
527_redstack:
528	.skip	REDSTACK
529	.text
530Lpanic_red:
531	.asciz	"stack overflow"
532	ALIGN
533
534	/* set stack pointer redzone to base+minstack; alters base */
535#define	SET_SP_REDZONE(base, tmp) \
536	add	base, REDSIZE, base; \
537	sethi	%hi(_redzone), tmp; \
538	st	base, [tmp + %lo(_redzone)]
539
540	/* variant with a constant */
541#define	SET_SP_REDZONE_CONST(const, tmp1, tmp2) \
542	set	(const) + REDSIZE, tmp1; \
543	sethi	%hi(_redzone), tmp2; \
544	st	tmp1, [tmp2 + %lo(_redzone)]
545
546	/* check stack pointer against redzone (uses two temps) */
547#define	CHECK_SP_REDZONE(t1, t2) \
548	sethi	%hi(_redzone), t1; \
549	ld	[t1 + %lo(_redzone)], t2; \
550	cmp	%sp, t2;	/* if sp >= t2, not in red zone */ \
551	bgeu	7f; nop;	/* and can continue normally */ \
552	/* move to panic stack */ \
553	st	%g0, [t1 + %lo(_redzone)]; \
554	set	_redstack + REDSTACK - 96, %sp; \
555	/* prevent panic() from lowering ipl */ \
556	sethi	%hi(_panicstr), t2; \
557	set	Lpanic_red, t2; \
558	st	t2, [t1 + %lo(_panicstr)]; \
559	rd	%psr, t1;		/* t1 = splhigh() */ \
560	or	t1, PSR_PIL, t2; \
561	wr	t2, 0, %psr; \
562	wr	t2, PSR_ET, %psr;	/* turn on traps */ \
563	nop; nop; nop; \
564	save	%sp, -96, %sp;		/* preserve current window */ \
565	sethi	%hi(Lpanic_red), %o0; \
566	call	_panic; or %o0, %lo(Lpanic_red), %o0; \
5677:
568
569#else
570
571#define	SET_SP_REDZONE(base, tmp)
572#define	SET_SP_REDZONE_CONST(const, t1, t2)
573#define	CHECK_SP_REDZONE(t1, t2)
574#endif
575
576/*
577 * The window code must verify user stack addresses before using them.
578 * A user stack pointer is invalid if:
579 *	- it is not on an 8 byte boundary;
580 *	- its pages (a register window, being 64 bytes, can occupy
581 *	  two pages) are not readable or writable.
582 * We define three separate macros here for testing user stack addresses.
583 *
584 * PTE_OF_ADDR locates a PTE, branching to a `bad address'
585 *	handler if the stack pointer points into the hole in the
586 *	address space (i.e., top 3 bits are not either all 1 or all 0);
587 * CMP_PTE_USER_READ compares the located PTE against `user read' mode;
588 * CMP_PTE_USER_WRITE compares the located PTE against `user write' mode.
589 * The compares give `equal' if read or write is OK.
590 *
591 * Note that the user stack pointer usually points into high addresses
592 * (top 3 bits all 1), so that is what we check first.
593 *
594 * The code below also assumes that PTE_OF_ADDR is safe in a delay
595 * slot; it is, at it merely sets its `pte' register to a temporary value.
596 */
597	/* input: addr, output: pte; aux: bad address label */
598#define	PTE_OF_ADDR(addr, pte, bad) \
599	sra	addr, PG_VSHIFT, pte; \
600	cmp	pte, -1; \
601	be,a	1f; andn addr, 4095, pte; \
602	tst	pte; \
603	bne	bad; EMPTY; \
604	andn	addr, 4095, pte; \
6051:
606
607	/* input: pte; output: condition codes */
608#define	CMP_PTE_USER_READ(pte) \
609	lda	[pte] ASI_PTE, pte; \
610	srl	pte, PG_PROTSHIFT, pte; \
611	andn	pte, (PG_W >> PG_PROTSHIFT), pte; \
612	cmp	pte, PG_PROTUREAD
613
614	/* input: pte; output: condition codes */
615#define	CMP_PTE_USER_WRITE(pte) \
616	lda	[pte] ASI_PTE, pte; \
617	srl	pte, PG_PROTSHIFT, pte; \
618	cmp	pte, PG_PROTUWRITE
619
620/*
621 * The calculations in PTE_OF_ADDR and CMP_PTE_USER_* are rather slow:
622 * in particular, according to Gordon Irlam of the University of Adelaide
623 * in Australia, these consume at least 18 cycles on an SS1 and 37 on an
624 * SS2.  Hence, we try to avoid them in the common case.
625 *
626 * A chunk of 64 bytes is on a single page if and only if:
627 *
628 *	((base + 64 - 1) & ~4095) == (base & ~4095)
629 *
630 * Equivalently (and faster to test), the low order bits (base & 4095) must
631 * be small enough so that the sum (base + 63) does not carry out into the
632 * upper page-address bits, i.e.,
633 *
634 *	(base & 4095) < (4096 - 63)
635 *
636 * so we allow testing that here.  This macro is also assumed to be safe
637 * in a delay slot (modulo overwriting its temporary).
638 */
639#define	SLT_IF_1PAGE_RW(addr, tmp) \
640	and	addr, 4095, tmp; \
641	cmp	tmp, (4096 - 63)
642
643/*
644 * Every trap that enables traps must set up stack space.
645 * If the trap is from user mode, this involves switching to the kernel
646 * stack for the current process, and we must also set cpcb->pcb_uw
647 * so that the window overflow handler can tell user windows from kernel
648 * windows.
649 *
650 * The number of user windows is:
651 *
652 *	cpcb->pcb_uw = (cpcb->pcb_wim - 1 - CWP) % nwindows
653 *
654 * (where pcb_wim = log2(current %wim) and CWP = low 5 bits of %psr).
655 * We compute this expression by table lookup in uwtab[CWP - pcb_wim],
656 * which has been set up as:
657 *
658 *	for i in [-nwin+1 .. nwin-1]
659 *		uwtab[i] = (nwin - 1 - i) % nwin;
660 *
661 * (If you do not believe this works, try it for yourself.)
662 *
663 * We also keep one or two more tables:
664 *
665 *	for i in 0..nwin-1
666 *		wmask[i] = 1 << ((i + 1) % nwindows);
667 *
668 * wmask[CWP] tells whether a `rett' would return into the invalid window.
669 */
670	.data
671	.skip	32			! alignment byte & negative indicies
672uwtab:	.skip	32			! u_char uwtab[-31..31];
673wmask:	.skip	32			! u_char wmask[0..31];
674
675	.text
676/*
677 * Things begin to grow uglier....
678 *
679 * Each trap handler may (always) be running in the trap window.
680 * If this is the case, it cannot enable further traps until it writes
681 * the register windows into the stack (or, if the stack is no good,
682 * the current pcb).
683 *
684 * ASSUMPTIONS: TRAP_SETUP() is called with:
685 *	%l0 = %psr
686 *	%l1 = return pc
687 *	%l2 = return npc
688 *	%l3 = (some value that must not be altered)
689 * which means we have 4 registers to work with.
690 *
691 * The `stackspace' argument is the number of stack bytes to allocate
692 * for register-saving, and must be at least -64 (and typically more,
693 * for global registers and %y).
694 *
695 * Trapframes should use -CCFSZ-80.  (80 = sizeof(struct trapframe);
696 * see trap.h.  This basically means EVERYONE.  Interrupt frames could
697 * get away with less, but currently do not.)
698 *
699 * The basic outline here is:
700 *
701 *	if (trap came from kernel mode) {
702 *		if (we are in the trap window)
703 *			save it away;
704 *		%sp = %fp - stackspace;
705 *	} else {
706 *		compute the number of user windows;
707 *		if (we are in the trap window)
708 *			save it away;
709 *		%sp = (top of kernel stack) - stackspace;
710 *	}
711 *
712 * Again, the number of user windows is:
713 *
714 *	cpcb->pcb_uw = (cpcb->pcb_wim - 1 - CWP) % nwindows
715 *
716 * (where pcb_wim = log2(current %wim) and CWP is the low 5 bits of %psr),
717 * and this is computed as `uwtab[CWP - pcb_wim]'.
718 *
719 * NOTE: if you change this code, you will have to look carefully
720 * at the window overflow and underflow handlers and make sure they
721 * have similar changes made as needed.
722 */
723#define	CALL_CLEAN_TRAP_WINDOW \
724	sethi	%hi(clean_trap_window), %l7; \
725	jmpl	%l7 + %lo(clean_trap_window), %l4; \
726	 mov	%g7, %l7	/* save %g7 in %l7 for clean_trap_window */
727
728#define	TRAP_SETUP(stackspace) \
729	rd	%wim, %l4; \
730	mov	1, %l5; \
731	sll	%l5, %l0, %l5; \
732	btst	PSR_PS, %l0; \
733	bz	1f; \
734	 btst	%l5, %l4; \
735	/* came from kernel mode; cond codes indicate trap window */ \
736	bz,a	3f; \
737	 add	%fp, stackspace, %sp;	/* want to just set %sp */ \
738	CALL_CLEAN_TRAP_WINDOW;		/* but maybe need to clean first */ \
739	b	3f; \
740	 add	%fp, stackspace, %sp; \
7411: \
742	/* came from user mode: compute pcb_nw */ \
743	sethi	%hi(_cpcb), %l6; \
744	ld	[%l6 + %lo(_cpcb)], %l6; \
745	ld	[%l6 + PCB_WIM], %l5; \
746	and	%l0, 31, %l4; \
747	sub	%l4, %l5, %l5; \
748	set	uwtab, %l4; \
749	ldub	[%l4 + %l5], %l5; \
750	st	%l5, [%l6 + PCB_UW]; \
751	/* cond codes still indicate whether in trap window */ \
752	bz,a	2f; \
753	 sethi	%hi(UPAGES*NBPG+(stackspace)), %l5; \
754	/* yes, in trap window; must clean it */ \
755	CALL_CLEAN_TRAP_WINDOW; \
756	sethi	%hi(_cpcb), %l6; \
757	ld	[%l6 + %lo(_cpcb)], %l6; \
758	sethi	%hi(UPAGES*NBPG+(stackspace)), %l5; \
7592: \
760	/* trap window is (now) clean: set %sp */ \
761	or	%l5, %lo(UPAGES*NBPG+(stackspace)), %l5; \
762	add	%l6, %l5, %sp; \
763	SET_SP_REDZONE(%l6, %l5); \
7643: \
765	CHECK_SP_REDZONE(%l6, %l5)
766
767/*
768 * Interrupt setup is almost exactly like trap setup, but we need to
769 * go to the interrupt stack if (a) we came from user mode or (b) we
770 * came from kernel mode on the kernel stack.
771 */
772#define	INTR_SETUP(stackspace) \
773	rd	%wim, %l4; \
774	mov	1, %l5; \
775	sll	%l5, %l0, %l5; \
776	btst	PSR_PS, %l0; \
777	bz	1f; \
778	 btst	%l5, %l4; \
779	/* came from kernel mode; cond codes still indicate trap window */ \
780	bz,a	0f; \
781	 sethi	%hi(_eintstack), %l7; \
782	CALL_CLEAN_TRAP_WINDOW; \
783	sethi	%hi(_eintstack), %l7; \
7840:	/* now if %fp >= eintstack, we were on the kernel stack */ \
785	cmp	%fp, %l7; \
786	bge,a	3f; \
787	 add	%l7, stackspace, %sp;	/* so switch to intstack */ \
788	b	4f; \
789	 add	%fp, stackspace, %sp;	/* else stay on intstack */ \
7901: \
791	/* came from user mode: compute pcb_nw */ \
792	sethi	%hi(_cpcb), %l6; \
793	ld	[%l6 + %lo(_cpcb)], %l6; \
794	ld	[%l6 + PCB_WIM], %l5; \
795	and	%l0, 31, %l4; \
796	sub	%l4, %l5, %l5; \
797	set	uwtab, %l4; \
798	ldub	[%l4 + %l5], %l5; \
799	st	%l5, [%l6 + PCB_UW]; \
800	/* cond codes still indicate whether in trap window */ \
801	bz,a	2f; \
802	 sethi	%hi(_eintstack), %l7; \
803	/* yes, in trap window; must save regs */ \
804	CALL_CLEAN_TRAP_WINDOW; \
805	sethi	%hi(_eintstack), %l7; \
8062: \
807	add	%l7, stackspace, %sp; \
8083: \
809	SET_SP_REDZONE_CONST(_intstack, %l6, %l5); \
8104: \
811	CHECK_SP_REDZONE(%l6, %l5)
812
813/*
814 * Handler for making the trap window shiny clean.
815 *
816 * On entry:
817 *	cpcb->pcb_nw = number of user windows
818 *	%l0 = %psr
819 *	%l1 must not be clobbered
820 *	%l2 must not be clobbered
821 *	%l3 must not be clobbered
822 *	%l4 = address for `return'
823 *	%l7 = saved %g7 (we put this in a delay slot above, to save work)
824 *
825 * On return:
826 *	%wim has changed, along with cpcb->pcb_wim
827 *	%g7 has been restored
828 *
829 * Normally, we push only one window.
830 */
831clean_trap_window:
832	mov	%g5, %l5		! save %g5
833	mov	%g6, %l6		! ... and %g6
834/*	mov	%g7, %l7		! ... and %g7 (already done for us) */
835	sethi	%hi(_cpcb), %g6		! get current pcb
836	ld	[%g6 + %lo(_cpcb)], %g6
837
838	/* Figure out whether it is a user window (cpcb->pcb_uw > 0). */
839	ld	[%g6 + PCB_UW], %g7
840	deccc	%g7
841	bge	ctw_user
842	 save	%g0, %g0, %g0		! in any case, enter window to save
843
844	/* The window to be pushed is a kernel window. */
845	std	%l0, [%sp + (0*8)]
846ctw_merge:
847	std	%l2, [%sp + (1*8)]
848	std	%l4, [%sp + (2*8)]
849	std	%l6, [%sp + (3*8)]
850	std	%i0, [%sp + (4*8)]
851	std	%i2, [%sp + (5*8)]
852	std	%i4, [%sp + (6*8)]
853	std	%i6, [%sp + (7*8)]
854
855	/* Set up new window invalid mask, and update cpcb->pcb_wim. */
856	rd	%psr, %g7		! g7 = (junk << 5) + new_cwp
857	mov	1, %g5			! g5 = 1 << new_cwp;
858	sll	%g5, %g7, %g5
859	wr	%g5, 0, %wim		! setwim(g5);
860	and	%g7, 31, %g7		! cpcb->pcb_wim = g7 & 31;
861	st	%g7, [%g6 + PCB_WIM]
862	nop
863	restore				! back to trap window
864
865	mov	%l5, %g5		! restore g5
866	mov	%l6, %g6		! ... and g6
867	jmp	%l4 + 8			! return to caller
868	 mov	%l7, %g7		! ... and g7
869	/* NOTREACHED */
870
871ctw_user:
872	/*
873	 * The window to be pushed is a user window.
874	 * We must verify the stack pointer (alignment & permissions).
875	 * See comments above definition of PTE_OF_ADDR.
876	 */
877	st	%g7, [%g6 + PCB_UW]	! cpcb->pcb_uw--;
878	btst	7, %sp			! if not aligned,
879	bne	ctw_invalid		! choke on it
880	 EMPTY
881	PTE_OF_ADDR(%sp, %g7, ctw_invalid)
882	CMP_PTE_USER_WRITE(%g7)		! likewise if not writable
883	bne	ctw_invalid
884	 EMPTY
885	SLT_IF_1PAGE_RW(%sp, %g7)
886	bl,a	ctw_merge		! all ok if only 1
887	 std	%l0, [%sp]
888	add	%sp, 7*8, %g5		! check last addr too
889	PTE_OF_ADDR(%g5, %g7, ctw_invalid)
890	CMP_PTE_USER_WRITE(%g7)
891	be,a	ctw_merge		! all ok: store <l0,l1> and merge
892	 std	%l0, [%sp]
893
894	/*
895	 * The window we wanted to push could not be pushed.
896	 * Instead, save ALL user windows into the pcb.
897	 * We will notice later that we did this, when we
898	 * get ready to return from our trap or syscall.
899	 *
900	 * The code here is run rarely and need not be optimal.
901	 */
902ctw_invalid:
903	/*
904	 * Reread cpcb->pcb_uw.  We decremented this earlier,
905	 * so it is off by one.
906	 */
907	ld	[%g6 + PCB_UW], %g7	! (number of user windows) - 1
908	add	%g6, PCB_RW, %g5
909
910	/* save g7+1 windows, starting with the current one */
9111:					! do {
912	std	%l0, [%g5 + (0*8)]	!	rw->rw_local[0] = l0;
913	std	%l2, [%g5 + (1*8)]	!	...
914	std	%l4, [%g5 + (2*8)]
915	std	%l6, [%g5 + (3*8)]
916	std	%i0, [%g5 + (4*8)]
917	std	%i2, [%g5 + (5*8)]
918	std	%i4, [%g5 + (6*8)]
919	std	%i6, [%g5 + (7*8)]
920	deccc	%g7			!	if (n > 0) save(), rw++;
921	bge,a	1b			! } while (--n >= 0);
922	 save	%g5, 64, %g5
923
924	/* stash sp for bottommost window */
925	st	%sp, [%g5 + 64 + (7*8)]
926
927	/* set up new wim */
928	rd	%psr, %g7		! g7 = (junk << 5) + new_cwp;
929	mov	1, %g5			! g5 = 1 << new_cwp;
930	sll	%g5, %g7, %g5
931	wr	%g5, 0, %wim		! wim = g5;
932	and	%g7, 31, %g7
933	st	%g7, [%g6 + PCB_WIM]	! cpcb->pcb_wim = new_cwp;
934
935	/* fix up pcb fields */
936	ld	[%g6 + PCB_UW], %g7	! n = cpcb->pcb_uw;
937	add	%g7, 1, %g5
938	st	%g5, [%g6 + PCB_NSAVED]	! cpcb->pcb_nsaved = n + 1;
939	st	%g0, [%g6 + PCB_UW]	! cpcb->pcb_uw = 0;
940
941	/* return to trap window */
9421:	deccc	%g7			! do {
943	bge	1b			!	restore();
944	 restore			! } while (--n >= 0);
945
946	mov	%l5, %g5		! restore g5, g6, & g7, and return
947	mov	%l6, %g6
948	jmp	%l4 + 8
949	 mov	%l7, %g7
950	/* NOTREACHED */
951
952
953/*
954 * Each memory access (text or data) fault, from user or kernel mode,
955 * comes here.  We read the error register and figure out what has
956 * happened.
957 *
958 * This cannot be done from C code since we must not enable traps (and
959 * hence may not use the `save' instruction) until we have decided that
960 * the error is or is not an asynchronous one that showed up after a
961 * synchronous error, but which must be handled before the sync err.
962 *
963 * Most memory faults are user mode text or data faults, which can cause
964 * signal delivery or ptracing, for which we must build a full trapframe.
965 * It does not seem worthwhile to work to avoid this in the other cases,
966 * so we store all the %g registers on the stack immediately.
967 *
968 * On entry:
969 *	%l0 = %psr
970 *	%l1 = return pc
971 *	%l2 = return npc
972 *	%l3 = T_TEXTFAULT or T_DATAFAULT
973 *
974 * Internal:
975 *	%l4 = %y, until we call mem_access_fault (then onto trapframe)
976 *	%l5 = IE_reg_addr, if async mem error
977 *
978 * We know about the layout of the error registers here.
979 *	addr	reg
980 *	----	---
981 *	a	AC_SYNC_ERR
982 *	a+4	AC_SYNC_VA
983 *	a+8	AC_ASYNC_ERR
984 *	a+12	AC_ASYNC_VA
985 */
986memfault:
987	TRAP_SETUP(-CCFSZ-80)
988
989	INCR(_cnt+V_FAULTS)		! cnt.v_faults++ (clobbers %o0,%o1)
990
991	st	%g1, [%sp + CCFSZ + 20]	! save g1
992	rd	%y, %l4			! save y
993
994#if AC_SYNC_ERR + 4 != AC_SYNC_VA || \
995    AC_SYNC_ERR + 8 != AC_ASYNC_ERR || AC_SYNC_ERR + 12 != AC_ASYNC_VA
996	help help help		! I, I, I wanna be a lifeguard
997#endif
998	set	AC_SYNC_ERR, %o0
999	std	%g2, [%sp + CCFSZ + 24]	! save g2, g3
1000	lda	[%o0] ASI_CONTROL, %o1	! sync err reg
1001	inc	4, %o0
1002	std	%g4, [%sp + CCFSZ + 32]	! (sneak g4,g5 in here)
1003	lda	[%o0] ASI_CONTROL, %o2	! sync virt addr
1004	btst	SER_MEMERR, %o1		! memory error?
1005	std	%g6, [%sp + CCFSZ + 40]
1006	bz,a	normal_mem_fault	! no, just a regular fault
1007 	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1008
1009	/*
1010	 * We got a synchronous memory error.  It could be one that
1011	 * happened because there were two stores in a row, and the
1012	 * first went into the write buffer, and the second caused this
1013	 * synchronous trap; so there could now be a pending async error.
1014	 * This is in fact the case iff the two va's differ.
1015	 */
1016	inc	4, %o0
1017	lda	[%o0] ASI_CONTROL, %o3	! async err reg
1018	inc	4, %o0
1019	lda	[%o0] ASI_CONTROL, %o4	! async virt addr
1020	cmp	%o2, %o4
1021	be,a	1f			! no, not an async err
1022	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1023
1024	/*
1025	 * Handle the async error; ignore the sync error for now
1026	 * (we may end up getting it again, but so what?).
1027	 * This code is essentially the same as that at `nmi' below,
1028	 * but the register usage is different and we cannot merge.
1029	 */
1030	sethi	%hi(IE_reg_addr), %l5	! ienab_bic(IE_ALLIE);
1031	ldub	[%l5 + %lo(IE_reg_addr)], %o0
1032	andn	%o0, IE_ALLIE, %o0
1033	stb	%o0, [%l5 + %lo(IE_reg_addr)]
1034
1035	/*
1036	 * Now reenable traps and call C code.
1037	 * %o1 through %o4 still hold the error reg contents.
1038	 * If memerr() returns, return from the trap.
1039	 */
1040	wr	%l0, PSR_ET, %psr
1041	call	_memerr			! memerr(0, ser, sva, aer, ava)
1042	 clr	%o0
1043
1044	ld	[%sp + CCFSZ + 20], %g1	! restore g1 through g7
1045	wr	%l0, 0, %psr		! and disable traps, 3 instr delay
1046	ldd	[%sp + CCFSZ + 24], %g2
1047	ldd	[%sp + CCFSZ + 32], %g4
1048	ldd	[%sp + CCFSZ + 40], %g6
1049	/* now safe to set IE_ALLIE again */
1050	ldub	[%l5 + %lo(IE_reg_addr)], %o1
1051	or	%o1, IE_ALLIE, %o1
1052	stb	%o1, [%l5 + %lo(IE_reg_addr)]
1053	b	return_from_trap
1054	 wr	%l4, 0, %y		! restore y
1055
1056	/*
1057	 * Trap was a synchronous memory error.
1058	 * %o1 through %o4 still hold the error reg contents.
1059	 */
10601:
1061	call	_memerr			! memerr(1, ser, sva, aer, ava)
1062	 mov	1, %o0
1063
1064	ld	[%sp + CCFSZ + 20], %g1	! restore g1 through g7
1065	ldd	[%sp + CCFSZ + 24], %g2
1066	ldd	[%sp + CCFSZ + 32], %g4
1067	ldd	[%sp + CCFSZ + 40], %g6
1068	wr	%l4, 0, %y		! restore y
1069	b	return_from_trap
1070	 wr	%l0, 0, %psr
1071	/* NOTREACHED */
1072
1073normal_mem_fault:
1074	/*
1075	 * Trap was some other error; call C code to deal with it.
1076	 * Must finish trap frame (psr,pc,npc,%y,%o0..%o7) in case
1077	 * we decide to deliver a signal or ptrace the process.
1078	 * %g1..%g7 were already set up above.
1079	 */
1080	std	%l0, [%sp + CCFSZ + 0]	! set tf.tf_psr, tf.tf_pc
1081	mov	%l3, %o0		! (argument: type)
1082	st	%l2, [%sp + CCFSZ + 8]	! set tf.tf_npc
1083	st	%l4, [%sp + CCFSZ + 12]	! set tf.tf_y
1084	mov	%l1, %o3		! (argument: pc)
1085	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
1086	std	%i2, [%sp + CCFSZ + 56]
1087	mov	%l0, %o4		! (argument: psr)
1088	std	%i4, [%sp + CCFSZ + 64]
1089	std	%i6, [%sp + CCFSZ + 72]
1090	call	_mem_access_fault	! mem_access_fault(type, ser, sva,
1091					!		pc, psr, &tf);
1092	 add	%sp, CCFSZ, %o5		! (argument: &tf)
1093
1094	ldd	[%sp + CCFSZ + 0], %l0	! load new values
1095	ldd	[%sp + CCFSZ + 8], %l2
1096	wr	%l3, 0, %y
1097	ld	[%sp + CCFSZ + 20], %g1
1098	ldd	[%sp + CCFSZ + 24], %g2
1099	ldd	[%sp + CCFSZ + 32], %g4
1100	ldd	[%sp + CCFSZ + 40], %g6
1101	ldd	[%sp + CCFSZ + 48], %i0
1102	ldd	[%sp + CCFSZ + 56], %i2
1103	ldd	[%sp + CCFSZ + 64], %i4
1104	ldd	[%sp + CCFSZ + 72], %i6
1105
1106	b	return_from_trap	! go return
1107	 wr	%l0, 0, %psr		! (but first disable traps again)
1108
1109
1110/*
1111 * fp_exception has to check to see if we are trying to save
1112 * the FP state, and if so, continue to save the FP state.
1113 *
1114 * We do not even bother checking to see if we were in kernel mode,
1115 * since users have no access to the special_fp_store instruction.
1116 *
1117 * This whole idea was stolen from Sprite.
1118 */
1119fp_exception:
1120	set	special_fp_store, %l4	! see if we came from the special one
1121	cmp	%l1, %l4		! pc == special_fp_store?
1122	bne	slowtrap		! no, go handle per usual
1123	 EMPTY
1124	sethi	%hi(savefpcont), %l4	! yes, "return" to the special code
1125	or	%lo(savefpcont), %l4, %l4
1126	jmp	%l4
1127	 rett	%l4 + 4
1128
1129/*
1130 * slowtrap() builds a trap frame and calls trap().
1131 * This is called `slowtrap' because it *is*....
1132 * We have to build a full frame for ptrace(), for instance.
1133 *
1134 * Registers:
1135 *	%l0 = %psr
1136 *	%l1 = return pc
1137 *	%l2 = return npc
1138 *	%l3 = trap code
1139 */
1140slowtrap:
1141	TRAP_SETUP(-CCFSZ-80)
1142	/*
1143	 * Phew, ready to enable traps and call C code.
1144	 */
1145	mov	%l3, %o0		! put type in %o0 for later
1146Lslowtrap_reenter:
1147	wr	%l0, PSR_ET, %psr	! traps on again
1148	std	%l0, [%sp + CCFSZ]	! tf.tf_psr = psr; tf.tf_pc = ret_pc;
1149	rd	%y, %l3
1150	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc = return_npc; tf.tf_y = %y;
1151	st	%g1, [%sp + CCFSZ + 20]
1152	std	%g2, [%sp + CCFSZ + 24]
1153	std	%g4, [%sp + CCFSZ + 32]
1154	std	%g6, [%sp + CCFSZ + 40]
1155	std	%i0, [%sp + CCFSZ + 48]
1156	mov	%l0, %o1		! (psr)
1157	std	%i2, [%sp + CCFSZ + 56]
1158	mov	%l1, %o2		! (pc)
1159	std	%i4, [%sp + CCFSZ + 64]
1160	add	%sp, CCFSZ, %o3		! (&tf)
1161	call	_trap			! trap(type, psr, pc, &tf)
1162	 std	%i6, [%sp + CCFSZ + 72]
1163
1164	ldd	[%sp + CCFSZ], %l0	! load new values
1165	ldd	[%sp + CCFSZ + 8], %l2
1166	wr	%l3, 0, %y
1167	ld	[%sp + CCFSZ + 20], %g1
1168	ldd	[%sp + CCFSZ + 24], %g2
1169	ldd	[%sp + CCFSZ + 32], %g4
1170	ldd	[%sp + CCFSZ + 40], %g6
1171	ldd	[%sp + CCFSZ + 48], %i0
1172	ldd	[%sp + CCFSZ + 56], %i2
1173	ldd	[%sp + CCFSZ + 64], %i4
1174	ldd	[%sp + CCFSZ + 72], %i6
1175	b	return_from_trap
1176	 wr	%l0, 0, %psr
1177
1178/*
1179 * Do a `software' trap by re-entering the trap code, possibly first
1180 * switching from interrupt stack to kernel stack.  This is used for
1181 * scheduling and signal ASTs (which generally occur from softclock or
1182 * tty or net interrupts) and register window saves (which might occur
1183 * from anywhere).
1184 *
1185 * The current window is the trap window, and it is by definition clean.
1186 * We enter with the trap type in %o0.  All we have to do is jump to
1187 * Lslowtrap_reenter above, but maybe after switching stacks....
1188 */
1189softtrap:
1190	sethi	%hi(_eintstack), %l7
1191	cmp	%sp, %l7
1192	bge	Lslowtrap_reenter
1193	 EMPTY
1194	sethi	%hi(_cpcb), %l6
1195	ld	[%l6 + %lo(_cpcb)], %l6
1196	set	UPAGES*NBPG - CCFSZ - 80, %l5
1197	add	%l6, %l5, %l7
1198	SET_SP_REDZONE(%l6, %l5)
1199	b	Lslowtrap_reenter
1200	 mov	%l7, %sp
1201
1202#ifdef KGDB
1203/*
1204 * bpt is entered on all breakpoint traps.
1205 * If this is a kernel breakpoint, we do not want to call trap().
1206 * Among other reasons, this way we can set breakpoints in trap().
1207 */
1208bpt:
1209	btst	PSR_PS, %l0		! breakpoint from kernel?
1210	bz	slowtrap		! no, go do regular trap
1211	 nop
1212
1213	/*
1214	 * Build a trap frame for kgdb_trap_glue to copy.
1215	 * Enable traps but set ipl high so that we will not
1216	 * see interrupts from within breakpoints.
1217	 */
1218	TRAP_SETUP(-CCFSZ-80)
1219	or	%l0, PSR_PIL, %l4	! splhigh()
1220	wr	%l4, 0, %psr		! the manual claims that this
1221	wr	%l4, PSR_ET, %psr	! song and dance is necessary
1222	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
1223	mov	%l3, %o0		! trap type arg for kgdb_trap_glue
1224	rd	%y, %l3
1225	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
1226	rd	%wim, %l3
1227	st	%l3, [%sp + CCFSZ + 16]	! tf.tf_wim (a kgdb-only r/o field)
1228	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
1229	std	%g2, [%sp + CCFSZ + 24]	! etc
1230	std	%g4, [%sp + CCFSZ + 32]
1231	std	%g6, [%sp + CCFSZ + 40]
1232	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_in[0..1]
1233	std	%i2, [%sp + CCFSZ + 56]	! etc
1234	std	%i4, [%sp + CCFSZ + 64]
1235	std	%i6, [%sp + CCFSZ + 72]
1236
1237	/*
1238	 * Now call kgdb_trap_glue(); if it returns, call trap().
1239	 */
1240	mov	%o0, %l3		! gotta save trap type
1241	call	_kgdb_trap_glue		! kgdb_trap_glue(type, &trapframe)
1242	 add	%sp, CCFSZ, %o1		! (&trapframe)
1243
1244	/*
1245	 * Use slowtrap to call trap---but first erase our tracks
1246	 * (put the registers back the way they were).
1247	 */
1248	mov	%l3, %o0		! slowtrap will need trap type
1249	ld	[%sp + CCFSZ + 12], %l3
1250	wr	%l3, 0, %y
1251	ld	[%sp + CCFSZ + 20], %g1
1252	ldd	[%sp + CCFSZ + 24], %g2
1253	ldd	[%sp + CCFSZ + 32], %g4
1254	b	Lslowtrap_reenter
1255	 ldd	[%sp + CCFSZ + 40], %g6
1256
1257/*
1258 * Enter kernel breakpoint.  Write all the windows (not including the
1259 * current window) into the stack, so that backtrace works.  Copy the
1260 * supplied trap frame to the kgdb stack and switch stacks.
1261 *
1262 * kgdb_trap_glue(type, tf0)
1263 *	int type;
1264 *	struct trapframe *tf0;
1265 */
1266	.globl	_kgdb_trap_glue
1267_kgdb_trap_glue:
1268	save	%sp, -CCFSZ, %sp
1269
1270	call	_write_all_windows
1271	 mov	%sp, %l4		! %l4 = current %sp
1272
1273	/* copy trapframe to top of kgdb stack */
1274	set	_kgdb_stack + KGDB_STACK_SIZE - 80, %l0
1275					! %l0 = tfcopy -> end_of_kgdb_stack
1276	mov	80, %l1
12771:	ldd	[%i1], %l2
1278	inc	8, %i1
1279	deccc	8, %l1
1280	std	%l2, [%l0]
1281	bg	1b
1282	 inc	8, %l0
1283
1284#ifdef DEBUG
1285	/* save old red zone and then turn it off */
1286	sethi	%hi(_redzone), %l7
1287	ld	[%l7 + %lo(_redzone)], %l6
1288	st	%g0, [%l7 + %lo(_redzone)]
1289#endif
1290	/* switch to kgdb stack */
1291	add	%l0, -CCFSZ-80, %sp
1292
1293	/* if (kgdb_trap(type, tfcopy)) kgdb_rett(tfcopy); */
1294	mov	%i0, %o0
1295	call	_kgdb_trap
1296	add	%l0, -80, %o1
1297	tst	%o0
1298	bnz,a	kgdb_rett
1299	 add	%l0, -80, %g1
1300
1301	/*
1302	 * kgdb_trap() did not handle the trap at all so the stack is
1303	 * still intact.  A simple `restore' will put everything back,
1304	 * after we reset the stack pointer.
1305	 */
1306	mov	%l4, %sp
1307#ifdef DEBUG
1308	st	%l6, [%l7 + %lo(_redzone)]	! restore red zone
1309#endif
1310	ret
1311	restore
1312
1313/*
1314 * Return from kgdb trap.  This is sort of special.
1315 *
1316 * We know that kgdb_trap_glue wrote the window above it, so that we will
1317 * be able to (and are sure to have to) load it up.  We also know that we
1318 * came from kernel land and can assume that the %fp (%i6) we load here
1319 * is proper.  We must also be sure not to lower ipl (it is at splhigh())
1320 * until we have traps disabled, due to the SPARC taking traps at the
1321 * new ipl before noticing that PSR_ET has been turned off.  We are on
1322 * the kgdb stack, so this could be disastrous.
1323 *
1324 * Note that the trapframe argument in %g1 points into the current stack
1325 * frame (current window).  We abandon this window when we move %g1->tf_psr
1326 * into %psr, but we will not have loaded the new %sp yet, so again traps
1327 * must be disabled.
1328 */
1329kgdb_rett:
1330	rd	%psr, %g4		! turn off traps
1331	wr	%g4, PSR_ET, %psr
1332	/* use the three-instruction delay to do something useful */
1333	ld	[%g1], %g2		! pick up new %psr
1334	ld	[%g1 + 12], %g3		! set %y
1335	wr	%g3, 0, %y
1336#ifdef DEBUG
1337	st	%l6, [%l7 + %lo(_redzone)] ! and restore red zone
1338#endif
1339	wr	%g0, 0, %wim		! enable window changes
1340	nop; nop; nop
1341	/* now safe to set the new psr (changes CWP, leaves traps disabled) */
1342	wr	%g2, 0, %psr		! set rett psr (including cond codes)
1343	/* 3 instruction delay before we can use the new window */
1344/*1*/	ldd	[%g1 + 24], %g2		! set new %g2, %g3
1345/*2*/	ldd	[%g1 + 32], %g4		! set new %g4, %g5
1346/*3*/	ldd	[%g1 + 40], %g6		! set new %g6, %g7
1347
1348	/* now we can use the new window */
1349	mov	%g1, %l4
1350	ld	[%l4 + 4], %l1		! get new pc
1351	ld	[%l4 + 8], %l2		! get new npc
1352	ld	[%l4 + 20], %g1		! set new %g1
1353
1354	/* set up returnee's out registers, including its %sp */
1355	ldd	[%l4 + 48], %i0
1356	ldd	[%l4 + 56], %i2
1357	ldd	[%l4 + 64], %i4
1358	ldd	[%l4 + 72], %i6
1359
1360	/* load returnee's window, making the window above it be invalid */
1361	restore
1362	restore	%g0, 1, %l1		! move to inval window and set %l1 = 1
1363	rd	%psr, %l0
1364	sll	%l1, %l0, %l1
1365	wr	%l1, 0, %wim		! %wim = 1 << (%psr & 31)
1366	sethi	%hi(_cpcb), %l1
1367	ld	[%l1 + %lo(_cpcb)], %l1
1368	and	%l0, 31, %l0		! CWP = %psr & 31;
1369	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = CWP;
1370	save	%g0, %g0, %g0		! back to window to reload
1371	LOADWIN(%sp)
1372	save	%g0, %g0, %g0		! back to trap window
1373	/* note, we have not altered condition codes; safe to just rett */
1374	RETT
1375#endif
1376
1377/*
1378 * syscall() builds a trap frame and calls syscall().
1379 * sun_syscall is same but delivers sun system call number
1380 * XXX	should not have to save&reload ALL the registers just for
1381 *	ptrace...
1382 */
1383#ifdef COMPAT_SUNOS
1384sun_syscall:
1385	TRAP_SETUP(-CCFSZ-80)
1386	b	sys_merge
1387	 mov	1, %o3			! third arg to syscall: sun compat
1388syscall:
1389	TRAP_SETUP(-CCFSZ-80)
1390	clr	%o3			! third arg to syscall: native bsd
1391sys_merge:
1392#else
1393syscall:
1394	TRAP_SETUP(-CCFSZ-80)
1395#endif
1396	wr	%l0, PSR_ET, %psr
1397	std	%l0, [%sp + CCFSZ + 0]	! tf_psr, tf_pc
1398	rd	%y, %l3
1399	std	%l2, [%sp + CCFSZ + 8]	! tf_npc, tf_y
1400	st	%g1, [%sp + CCFSZ + 20]	! tf_g[1]
1401	std	%g2, [%sp + CCFSZ + 24]	! tf_g[2], tf_g[3]
1402	std	%g4, [%sp + CCFSZ + 32]	! etc
1403	std	%g6, [%sp + CCFSZ + 40]
1404	mov	%g1, %o0		! (code)
1405	std	%i0, [%sp + CCFSZ + 48]
1406	add	%sp, CCFSZ, %o1		! (&tf)
1407	std	%i2, [%sp + CCFSZ + 56]
1408	mov	%l1, %o2		! (pc)
1409	std	%i4, [%sp + CCFSZ + 64]
1410	call	_syscall		! syscall(code, &tf, pc, suncompat)
1411	 std	%i6, [%sp + CCFSZ + 72]
1412	! now load em all up again, sigh
1413	ldd	[%sp + CCFSZ + 0], %l0	! new %psr, new pc
1414	ldd	[%sp + CCFSZ + 8], %l2	! new npc, new %y
1415	wr	%l3, 0, %y
1416	ld	[%sp + CCFSZ + 20], %g1
1417	ldd	[%sp + CCFSZ + 24], %g2
1418	ldd	[%sp + CCFSZ + 32], %g4
1419	ldd	[%sp + CCFSZ + 40], %g6
1420	ldd	[%sp + CCFSZ + 48], %i0
1421	ldd	[%sp + CCFSZ + 56], %i2
1422	ldd	[%sp + CCFSZ + 64], %i4
1423	ldd	[%sp + CCFSZ + 72], %i6
1424	b	return_from_trap
1425	 wr	%l0, 0, %psr
1426
1427/*
1428 * Interrupts.  Software interrupts must be cleared from the software
1429 * interrupt enable register.  Rather than calling ienab_bic for each,
1430 * we do them in-line before enabling traps.
1431 *
1432 * After preliminary setup work, the interrupt is passed to each
1433 * registered handler in turn.  These are expected to return nonzero if
1434 * they took care of the interrupt.  If a handler claims the interrupt,
1435 * we exit (hardware interrupts are latched in the requestor so we'll
1436 * just take another interrupt in the unlikely event of simultaneous
1437 * interrupts from two different devices at the same level).  If we go
1438 * through all the registered handlers and no one claims it, we report a
1439 * stray interrupt.  This is more or less done as:
1440 *
1441 *	for (ih = intrhand[intlev]; ih; ih = ih->ih_next)
1442 *		if ((*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : &frame))
1443 *			return;
1444 *	strayintr(&frame);
1445 *
1446 * Software interrupts are almost the same with three exceptions:
1447 * (1) we clear the interrupt from the software interrupt enable
1448 *     register before calling any handler (we have to clear it first
1449 *     to avoid an interrupt-losing race),
1450 * (2) we always call all the registered handlers (there is no way
1451 *     to tell if the single bit in the software interrupt register
1452 *     represents one or many requests)
1453 * (3) we never announce a stray interrupt (because of (1), another
1454 *     interrupt request can come in while we're in the handler.  If
1455 *     the handler deal with everything for both the original & the
1456 *     new request, we'll erroneously report a stray interrupt when
1457 *     we take the software interrupt for the new request.
1458 *
1459 * Inputs:
1460 *	%l0 = %psr
1461 *	%l1 = return pc
1462 *	%l2 = return npc
1463 *	%l3 = interrupt level
1464 *	(software interrupt only) %l4 = bits to clear in interrupt register
1465 *
1466 * Internal:
1467 *	%l4, %l5: local variables
1468 *	%l6 = %y
1469 *	%l7 = %g1
1470 *	%g2..%g7 go to stack
1471 *
1472 * An interrupt frame is built in the space for a full trapframe;
1473 * this contains the psr, pc, npc, and interrupt level.
1474 */
1475	.comm	_intrhand, 15 * 8	! intrhand[0..14]; 0 => error
1476softintr:
1477	sethi	%hi(IE_reg_addr), %l6
1478	ldub	[%l6 + %lo(IE_reg_addr)], %l5
1479	andn	%l5, %l4, %l5
1480	stb	%l5, [%l6 + %lo(IE_reg_addr)]
1481	INTR_SETUP(-CCFSZ-80)
1482	std	%g2, [%sp + CCFSZ + 24]	! save registers
1483	INCR(_cnt+V_INTR)		! cnt.v_intr++; (clobbers %o0,%o1)
1484	mov	%g1, %l7
1485	rd	%y, %l6
1486	std	%g4, [%sp + CCFSZ + 32]
1487	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
1488	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
1489	std	%g6, [%sp + CCFSZ + 40]
1490	or	%l5, %l4, %l4		!			;
1491	wr	%l4, 0, %psr		! the manual claims this
1492	wr	%l4, PSR_ET, %psr	! song and dance is necessary
1493	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
1494	sll	%l3, 2, %l5
1495	set	_intrcnt, %l4		! intrcnt[intlev]++;
1496	ld	[%l4 + %l5], %o0
1497	std	%l2, [%sp + CCFSZ + 8]
1498	inc	%o0
1499	st	%o0, [%l4 + %l5]
1500	set	_intrhand, %l4		! %l4 = intrhand[intlev];
1501	ld	[%l4 + %l5], %l4
1502	b	3f
1503	 st	%fp, [%sp + CCFSZ + 16]
1504
15051:	ld	[%l4], %o1
1506	ld	[%l4 + 4], %o0
1507	tst	%o0
1508	bz,a	2f
1509	 add	%sp, CCFSZ, %o0
15102:	jmpl	%o1, %o7		!	(void)(*ih->ih_fun)(...)
1511	 ld	[%l4 + 8], %l4		!	and ih = ih->ih_next
15123:	tst	%l4			! while ih != NULL
1513	bnz	1b
1514	 nop
1515	mov	%l7, %g1
1516	wr	%l6, 0, %y
1517	ldd	[%sp + CCFSZ + 24], %g2
1518	ldd	[%sp + CCFSZ + 32], %g4
1519	ldd	[%sp + CCFSZ + 40], %g6
1520	b	return_from_trap
1521	 wr	%l0, 0, %psr
1522
1523	/*
1524	 * _sparc_interrupt is exported for paranoia checking (see intr.c).
1525	 */
1526	.globl	_sparc_interrupt
1527_sparc_interrupt:
1528	INTR_SETUP(-CCFSZ-80)
1529	std	%g2, [%sp + CCFSZ + 24]	! save registers
1530	INCR(_cnt+V_INTR)		! cnt.v_intr++; (clobbers %o0,%o1)
1531	mov	%g1, %l7
1532	rd	%y, %l6
1533	std	%g4, [%sp + CCFSZ + 32]
1534	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
1535	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
1536	std	%g6, [%sp + CCFSZ + 40]
1537	or	%l5, %l4, %l4		!			;
1538	wr	%l4, 0, %psr		! the manual claims this
1539	wr	%l4, PSR_ET, %psr	! song and dance is necessary
1540	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
1541	sll	%l3, 2, %l5
1542	set	_intrcnt, %l4		! intrcnt[intlev]++;
1543	ld	[%l4 + %l5], %o0
1544	std	%l2, [%sp + CCFSZ + 8]	! set up intrframe/clockframe
1545	inc	%o0
1546	st	%o0, [%l4 + %l5]
1547	set	_intrhand, %l4		! %l4 = intrhand[intlev];
1548	ld	[%l4 + %l5], %l4
1549	b	3f
1550	 st	%fp, [%sp + CCFSZ + 16]
1551
15521:	ld	[%l4], %o1
1553	ld	[%l4 + 4], %o0
1554	tst	%o0
1555	bz,a	2f
1556	 add	%sp, CCFSZ, %o0
15572:	jmpl	%o1, %o7		!	handled = (*ih->ih_fun)(...)
1558	 ld	[%l4 + 8], %l4		!	and ih = ih->ih_next
1559	tst	%o0
1560	bnz	4f			! if (handled) break
1561	 nop
15623:	tst	%l4
1563	bnz	1b			! while (ih)
1564	 nop
1565	call	_strayintr		!	strayintr(&intrframe)
1566	 add	%sp, CCFSZ, %o0
1567	/* all done: restore registers and go return */
15684:	mov	%l7, %g1
1569	wr	%l6, 0, %y
1570	ldd	[%sp + CCFSZ + 24], %g2
1571	ldd	[%sp + CCFSZ + 32], %g4
1572	ldd	[%sp + CCFSZ + 40], %g6
1573	b	return_from_trap
1574	 wr	%l0, 0, %psr
1575
1576#ifdef notyet
1577/*
1578 * Level 12 (ZS serial) interrupt.  Handle it quickly, schedule a
1579 * software interrupt, and get out.  Do the software interrupt directly
1580 * if we would just take it on the way out.
1581 *
1582 * Input:
1583 *	%l0 = %psr
1584 *	%l1 = return pc
1585 *	%l2 = return npc
1586 * Internal:
1587 *	%l3 = zs device
1588 *	%l4, %l5 = temporary
1589 *	%l6 = rr3 (or temporary data) + 0x100 => need soft int
1590 *	%l7 = zs soft status
1591 */
1592zshard:
1593#endif /* notyet */
1594
1595/*
1596 * Level 15 interrupt.  An async memory error has occurred;
1597 * take care of it (typically by panicking, but hey...).
1598 *	%l0 = %psr
1599 *	%l1 = return pc
1600 *	%l2 = return npc
1601 *	%l3 = 15 * 4 (why? just because!)
1602 *
1603 * Internal:
1604 *	%l4 = %y
1605 *	%l5 = %g1
1606 *	%l6 = %g6
1607 *	%l7 = %g7
1608 *  g2, g3, g4, g5 go to stack
1609 *
1610 * This code is almost the same as that in mem_access_fault,
1611 * except that we already know the problem is not a `normal' fault,
1612 * and that we must be extra-careful with interrupt enables.
1613 */
1614nmi:
1615	INTR_SETUP(-CCFSZ-80)
1616	INCR(_cnt+V_INTR)		! cnt.v_intr++; (clobbers %o0,%o1)
1617	/*
1618	 * Level 15 interrupts are nonmaskable, so with traps off,
1619	 * disable all interrupts to prevent recursion.
1620	 */
1621	sethi	%hi(IE_reg_addr), %o0
1622	ldub	[%o0 + %lo(IE_reg_addr)], %o1
1623	andn	%o1, IE_ALLIE, %o1
1624	stb	%o1, [%o0 + %lo(IE_reg_addr)]
1625	wr	%l0, PSR_ET, %psr	! okay, turn traps on again
1626
1627	std	%g2, [%sp + CCFSZ + 0]	! save g2, g3
1628	rd	%y, %l4			! save y
1629
1630	! must read the sync error register too.
1631	set	AC_SYNC_ERR, %o0
1632	lda	[%o0] ASI_CONTROL, %o1	! sync err reg
1633	inc	4, %o0
1634	lda	[%o0] ASI_CONTROL, %o2	! sync virt addr
1635	std	%g4, [%sp + CCFSZ + 8]	! save g4,g5
1636	mov	%g1, %l5		! save g1,g6,g7
1637	mov	%g6, %l6
1638	mov	%g7, %l7
1639	inc	4, %o0
1640	lda	[%o0] ASI_CONTROL, %o3	! async err reg
1641	inc	4, %o0
1642	lda	[%o0] ASI_CONTROL, %o4	! async virt addr
1643
1644	! and call C code
1645	call	_memerr			! memerr(0, ser, sva, aer, ava)
1646	clr	%o0
1647
1648	mov	%l5, %g1		! restore g1 through g7
1649	ldd	[%sp + CCFSZ + 0], %g2
1650	ldd	[%sp + CCFSZ + 8], %g4
1651	wr	%l0, 0, %psr		! re-disable traps
1652	mov	%l6, %g6
1653	mov	%l7, %g7
1654
1655	! set IE_ALLIE again (safe, we disabled traps again above)
1656	sethi	%hi(IE_reg_addr), %o0
1657	ldub	[%o0 + %lo(IE_reg_addr)], %o1
1658	or	%o1, IE_ALLIE, %o1
1659	stb	%o1, [%o0 + %lo(IE_reg_addr)]
1660	b	return_from_trap
1661	 wr	%l4, 0, %y		! restore y
1662
1663
1664/*
1665 * Window overflow trap handler.
1666 *	%l0 = %psr
1667 *	%l1 = return pc
1668 *	%l2 = return npc
1669 */
1670window_of:
1671#ifdef TRIVIAL_WINDOW_OVERFLOW_HANDLER
1672	/* a trivial version that assumes %sp is ok */
1673	/* (for testing only!) */
1674	save	%g0, %g0, %g0
1675	std	%l0, [%sp + (0*8)]
1676	rd	%psr, %l0
1677	mov	1, %l1
1678	sll	%l1, %l0, %l0
1679	wr	%l0, 0, %wim
1680	std	%l2, [%sp + (1*8)]
1681	std	%l4, [%sp + (2*8)]
1682	std	%l6, [%sp + (3*8)]
1683	std	%i0, [%sp + (4*8)]
1684	std	%i2, [%sp + (5*8)]
1685	std	%i4, [%sp + (6*8)]
1686	std	%i6, [%sp + (7*8)]
1687	restore
1688	RETT
1689#else
1690	/*
1691	 * This is similar to TRAP_SETUP, but we do not want to spend
1692	 * a lot of time, so we have separate paths for kernel and user.
1693	 * We also know for sure that the window has overflowed.
1694	 */
1695	btst	PSR_PS, %l0
1696	bz	winof_user
1697	 sethi	%hi(clean_trap_window), %l7
1698
1699	/*
1700	 * Overflow from kernel mode.  Call clean_trap_window to
1701	 * do the dirty work, then just return, since we know prev
1702	 * window is valid.  clean_trap_windows might dump all *user*
1703	 * windows into the pcb, but we do not care: there is at
1704	 * least one kernel window (a trap or interrupt frame!)
1705	 * above us.
1706	 */
1707	jmpl	%l7 + %lo(clean_trap_window), %l4
1708	 mov	%g7, %l7		! for clean_trap_window
1709
1710	wr	%l0, 0, %psr		! put back the @%*! cond. codes
1711	nop				! (let them settle in)
1712	RETT
1713
1714winof_user:
1715	/*
1716	 * Overflow from user mode.
1717	 * If clean_trap_window dumps the registers into the pcb,
1718	 * rft_user will need to call trap(), so we need space for
1719	 * a trap frame.  We also have to compute pcb_nw.
1720	 *
1721	 * SHOULD EXPAND IN LINE TO AVOID BUILDING TRAP FRAME ON
1722	 * `EASY' SAVES
1723	 */
1724	sethi	%hi(_cpcb), %l6
1725	ld	[%l6 + %lo(_cpcb)], %l6
1726	ld	[%l6 + PCB_WIM], %l5
1727	and	%l0, 31, %l3
1728	sub	%l3, %l5, %l5 		/* l5 = CWP - pcb_wim */
1729	set	uwtab, %l4
1730	ldub	[%l4 + %l5], %l5	/* l5 = uwtab[l5] */
1731	st	%l5, [%l6 + PCB_UW]
1732	jmpl	%l7 + %lo(clean_trap_window), %l4
1733	 mov	%g7, %l7		! for clean_trap_window
1734	sethi	%hi(_cpcb), %l6
1735	ld	[%l6 + %lo(_cpcb)], %l6
1736	set	UPAGES*NBPG-CCFSZ-80, %l5
1737	add	%l6, %l5, %sp		/* over to kernel stack */
1738	CHECK_SP_REDZONE(%l6, %l5)
1739
1740	/*
1741	 * Copy return_from_trap far enough to allow us
1742	 * to jump directly to rft_user_or_recover_pcb_windows
1743	 * (since we know that is where we are headed).
1744	 */
1745!	and	%l0, 31, %l3		! still set (clean_trap_window
1746					! leaves this register alone)
1747	set	wmask, %l6
1748	ldub	[%l6 + %l3], %l5	! %l5 = 1 << ((CWP + 1) % nwindows)
1749	b	rft_user_or_recover_pcb_windows
1750	 rd	%wim, %l4		! (read %wim first)
1751#endif /* end `real' version of window overflow trap handler */
1752
1753/*
1754 * Window underflow trap handler.
1755 *	%l0 = %psr
1756 *	%l1 = return pc
1757 *	%l2 = return npc
1758 *
1759 * A picture:
1760 *
1761 *	  T R I X
1762 *	0 0 0 1 0 0 0	(%wim)
1763 * [bit numbers increase towards the right;
1764 * `restore' moves right & `save' moves left]
1765 *
1766 * T is the current (Trap) window, R is the window that attempted
1767 * a `Restore' instruction, I is the Invalid window, and X is the
1768 * window we want to make invalid before we return.
1769 *
1770 * Since window R is valid, we cannot use rft_user to restore stuff
1771 * for us.  We have to duplicate its logic.  YUCK.
1772 *
1773 * Incidentally, TRIX are for kids.  Silly rabbit!
1774 */
1775window_uf:
1776#ifdef TRIVIAL_WINDOW_UNDERFLOW_HANDLER
1777	wr	%g0, 0, %wim		! allow us to enter I
1778	restore				! to R
1779	nop
1780	nop
1781	restore				! to I
1782	restore	%g0, 1, %l1		! to X
1783	rd	%psr, %l0
1784	sll	%l1, %l0, %l0
1785	wr	%l0, 0, %wim
1786	save	%g0, %g0, %g0		! back to I
1787	LOADWIN(%sp)
1788	save	%g0, %g0, %g0		! back to R
1789	save	%g0, %g0, %g0		! back to T
1790	RETT
1791#else
1792	wr	%g0, 0, %wim		! allow us to enter I
1793	btst	PSR_PS, %l0
1794	restore				! enter window R
1795	bz	winuf_user
1796	 restore			! enter window I
1797
1798	/*
1799	 * Underflow from kernel mode.  Just recover the
1800	 * registers and go (except that we have to update
1801	 * the blasted user pcb fields).
1802	 */
1803	restore	%g0, 1, %l1		! enter window X, then set %l1 to 1
1804	rd	%psr, %l0		! cwp = %psr & 31;
1805	and	%l0, 31, %l0
1806	sll	%l1, %l0, %l1		! wim = 1 << cwp;
1807	wr	%l1, 0, %wim		! setwim(wim);
1808	sethi	%hi(_cpcb), %l1
1809	ld	[%l1 + %lo(_cpcb)], %l1
1810	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = cwp;
1811	save	%g0, %g0, %g0		! back to window I
1812	LOADWIN(%sp)
1813	save	%g0, %g0, %g0		! back to R
1814	save	%g0, %g0, %g0		! and then to T
1815	wr	%l0, 0, %psr		! fix those cond codes....
1816	nop				! (let them settle in)
1817	RETT
1818
1819winuf_user:
1820	/*
1821	 * Underflow from user mode.
1822	 *
1823	 * We cannot use rft_user (as noted above) because
1824	 * we must re-execute the `restore' instruction.
1825	 * Since it could be, e.g., `restore %l0,0,%l0',
1826	 * it is not okay to touch R's registers either.
1827	 *
1828	 * We are now in window I.
1829	 */
1830	btst	7, %sp			! if unaligned, it is invalid
1831	bne	winuf_invalid
1832	 EMPTY
1833
1834	PTE_OF_ADDR(%sp, %l7, winuf_invalid)
1835	CMP_PTE_USER_READ(%l7)		! if first page not readable,
1836	bne	winuf_invalid		! it is invalid
1837	 EMPTY
1838	SLT_IF_1PAGE_RW(%sp, %l7)	! first page is readable
1839	bl,a	winuf_ok		! if only one page, enter window X
1840	 restore %g0, 1, %l1		! and goto ok, & set %l1 to 1
1841	add	%sp, 7*8, %l5
1842	PTE_OF_ADDR(%l5, %l7, winuf_invalid)
1843	CMP_PTE_USER_READ(%l7)		! check second page too
1844	be,a	winuf_ok		! enter window X and goto ok
1845	 restore %g0, 1, %l1		! (and then set %l1 to 1)
1846
1847winuf_invalid:
1848	/*
1849	 * We were unable to restore the window because %sp
1850	 * is invalid or paged out.  Return to the trap window
1851	 * and call trap(T_WINUF).  This will save R to the user
1852	 * stack, then load both R and I into the pcb rw[] area,
1853	 * and return with pcb_nsaved set to -1 for success, 0 for
1854	 * failure.  `Failure' indicates that someone goofed with the
1855	 * trap registers (e.g., signals), so that we need to return
1856	 * from the trap as from a syscall (probably to a signal handler)
1857	 * and let it retry the restore instruction later.  Note that
1858	 * window R will have been pushed out to user space, and thus
1859	 * be the invalid window, by the time we get back here.  (We
1860	 * continue to label it R anyway.)  We must also set %wim again,
1861	 * and set pcb_uw to 1, before enabling traps.  (Window R is the
1862	 * only window, and it is a user window).
1863	 */
1864	save	%g0, %g0, %g0		! back to R
1865#if 0		/* this gives `as' mild heartburn */
1866	save	%g0, 1, %l4		! back to T, then %l4 = 1
1867#else
1868	save	%g0, %g0, %g0		! back to T
1869	mov	1, %l4			! and set %l4 = 1
1870#endif
1871	sethi	%hi(_cpcb), %l6
1872	ld	[%l6 + %lo(_cpcb)], %l6
1873	st	%l4, [%l6 + PCB_UW]	! pcb_uw = 1
1874	ld	[%l6 + PCB_WIM], %l5	! get log2(%wim)
1875	sll	%l4, %l5, %l4		! %l4 = old %wim
1876	wr	%l4, 0, %wim		! window I is now invalid again
1877	set	UPAGES*NBPG-CCFSZ-80, %l5
1878	add	%l6, %l5, %sp		! get onto kernel stack
1879	CHECK_SP_REDZONE(%l6, %l5)
1880
1881	/*
1882	 * Okay, call trap(T_WINUF, psr, pc, &tf).
1883	 * See `slowtrap' above for operation.
1884	 */
1885	wr	%l0, PSR_ET, %psr
1886	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
1887	rd	%y, %l3
1888	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
1889	mov	T_WINUF, %o0
1890	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
1891	mov	%l0, %o1
1892	std	%g2, [%sp + CCFSZ + 24]	! etc
1893	mov	%l1, %o2
1894	std	%g4, [%sp + CCFSZ + 32]
1895	add	%sp, CCFSZ, %o3
1896	std	%g6, [%sp + CCFSZ + 40]
1897	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
1898	std	%i2, [%sp + CCFSZ + 56]
1899	std	%i4, [%sp + CCFSZ + 64]
1900	call	_trap			! trap(T_WINUF, pc, psr, &tf)
1901	 std	%i6, [%sp + CCFSZ + 72]	! tf.tf_out[6]
1902
1903	ldd	[%sp + CCFSZ + 0], %l0	! new psr, pc
1904	ldd	[%sp + CCFSZ + 8], %l2	! new npc, %y
1905	wr	%l3, 0, %y
1906	ld	[%sp + CCFSZ + 20], %g1
1907	ldd	[%sp + CCFSZ + 24], %g2
1908	ldd	[%sp + CCFSZ + 32], %g4
1909	ldd	[%sp + CCFSZ + 40], %g6
1910	ldd	[%sp + CCFSZ + 48], %i0	! %o0 for window R, etc
1911	ldd	[%sp + CCFSZ + 56], %i2
1912	ldd	[%sp + CCFSZ + 64], %i4
1913	wr	%l0, 0, %psr		! disable traps: test must be atomic
1914	ldd	[%sp + CCFSZ + 72], %i6
1915	sethi	%hi(_cpcb), %l6
1916	ld	[%l6 + %lo(_cpcb)], %l6
1917	ld	[%l6 + PCB_NSAVED], %l7	! if nsaved is -1, we have our regs
1918	tst	%l7
1919	bl,a	1f			! got them
1920	 wr	%g0, 0, %wim		! allow us to enter windows R, I
1921	b,a	return_from_trap
1922
1923	/*
1924	 * Got 'em.  Load 'em up.
1925	 */
19261:
1927	mov	%g6, %l3		! save %g6; set %g6 = cpcb
1928	mov	%l6, %g6
1929	st	%g0, [%g6 + PCB_NSAVED]	! and clear magic flag
1930	restore				! from T to R
1931	restore				! from R to I
1932	restore	%g0, 1, %l1		! from I to X, then %l1 = 1
1933	rd	%psr, %l0		! cwp = %psr;
1934	sll	%l1, %l0, %l1
1935	wr	%l1, 0, %wim		! make window X invalid
1936	and	%l0, 31, %l0
1937	st	%l0, [%g6 + PCB_WIM]	! cpcb->pcb_wim = cwp;
1938	nop				! unnecessary? old wim was 0...
1939	save	%g0, %g0, %g0		! back to I
1940	LOADWIN(%g6 + PCB_RW + 64)	! load from rw[1]
1941	save	%g0, %g0, %g0		! back to R
1942	LOADWIN(%g6 + PCB_RW)		! load from rw[0]
1943	save	%g0, %g0, %g0		! back to T
1944	wr	%l0, 0, %psr		! restore condition codes
1945	mov	%l3, %g6		! fix %g6
1946	RETT
1947
1948	/*
1949	 * Restoring from user stack, but everything has checked out
1950	 * as good.  We are now in window X, and %l1 = 1.  Window R
1951	 * is still valid and holds user values.
1952	 */
1953winuf_ok:
1954	rd	%psr, %l0
1955	sll	%l1, %l0, %l1
1956	wr	%l1, 0, %wim		! make this one invalid
1957	sethi	%hi(_cpcb), %l2
1958	ld	[%l2 + %lo(_cpcb)], %l2
1959	and	%l0, 31, %l0
1960	st	%l0, [%l2 + PCB_WIM]	! cpcb->pcb_wim = cwp;
1961	save	%g0, %g0, %g0		! back to I
1962	LOADWIN(%sp)
1963	save	%g0, %g0, %g0		! back to R
1964	save	%g0, %g0, %g0		! back to T
1965	wr	%l0, 0, %psr		! restore condition codes
1966	nop				! it takes three to tangle
1967	RETT
1968#endif /* end `real' version of window underflow trap handler */
1969
1970/*
1971 * Various return-from-trap routines (see return_from_trap).
1972 */
1973
1974/*
1975 * Return from trap, to kernel.
1976 *	%l0 = %psr
1977 *	%l1 = return pc
1978 *	%l2 = return npc
1979 *	%l4 = %wim
1980 *	%l5 = bit for previous window
1981 */
1982rft_kernel:
1983	btst	%l5, %l4		! if (wim & l5)
1984	bnz	1f			!	goto reload;
1985	 wr	%l0, 0, %psr		! but first put !@#*% cond codes back
1986
1987	/* previous window is valid; just rett */
1988	nop				! wait for cond codes to settle in
1989	RETT
1990
1991	/*
1992	 * Previous window is invalid.
1993	 * Update %wim and then reload l0..i7 from frame.
1994	 *
1995	 *	  T I X
1996	 *	0 0 1 0 0   (%wim)
1997	 * [see picture in window_uf handler]
1998	 *
1999	 * T is the current (Trap) window, I is the Invalid window,
2000	 * and X is the window we want to make invalid.  Window X
2001	 * currently has no useful values.
2002	 */
20031:
2004	wr	%g0, 0, %wim		! allow us to enter window I
2005	nop; nop; nop			! (it takes a while)
2006	restore				! enter window I
2007	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
2008	rd	%psr, %l0		! CWP = %psr & 31;
2009	and	%l0, 31, %l0
2010	sll	%l1, %l0, %l1		! wim = 1 << CWP;
2011	wr	%l1, 0, %wim		! setwim(wim);
2012	sethi	%hi(_cpcb), %l1
2013	ld	[%l1 + %lo(_cpcb)], %l1
2014	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = l0 & 31;
2015	save	%g0, %g0, %g0		! back to window I
2016	LOADWIN(%sp)
2017	save	%g0, %g0, %g0		! back to window T
2018	/*
2019	 * Note that the condition codes are still set from
2020	 * the code at rft_kernel; we can simply return.
2021	 */
2022	RETT
2023
2024/*
2025 * Return from trap, to user.  Checks for scheduling trap (`ast') first;
2026 * will re-enter trap() if set.  Note that we may have to switch from
2027 * the interrupt stack to the kernel stack in this case.
2028 *	%l0 = %psr
2029 *	%l1 = return pc
2030 *	%l2 = return npc
2031 *	%l4 = %wim
2032 *	%l5 = bit for previous window
2033 *	%l6 = cpcb
2034 * If returning to a valid window, just set psr and return.
2035 */
2036rft_user:
2037!	sethi	%hi(_want_ast), %l7	! (done below)
2038	ld	[%l7 + %lo(_want_ast)], %l7
2039	tst	%l7			! want AST trap?
2040	bne,a	softtrap		! yes, re-enter trap with type T_AST
2041	 mov	T_AST, %o0
2042
2043	btst	%l5, %l4		! if (wim & l5)
2044	bnz	1f			!	goto reload;
2045	 wr	%l0, 0, %psr		! restore cond codes
2046	nop				! (three instruction delay)
2047	RETT
2048
2049	/*
2050	 * Previous window is invalid.
2051	 * Before we try to load it, we must verify its stack pointer.
2052	 * This is much like the underflow handler, but a bit easier
2053	 * since we can use our own local registers.
2054	 */
20551:
2056	btst	7, %fp			! if unaligned, address is invalid
2057	bne	rft_invalid
2058	 EMPTY
2059
2060	PTE_OF_ADDR(%fp, %l7, rft_invalid)
2061	CMP_PTE_USER_READ(%l7)		! try first page
2062	bne	rft_invalid		! no good
2063	 EMPTY
2064	SLT_IF_1PAGE_RW(%fp, %l7)
2065	bl,a	rft_user_ok		! only 1 page: ok
2066	 wr	%g0, 0, %wim
2067	add	%fp, 7*8, %l5
2068	PTE_OF_ADDR(%l5, %l7, rft_invalid)
2069	CMP_PTE_USER_READ(%l7)		! check 2nd page too
2070	be,a	rft_user_ok
2071	 wr	%g0, 0, %wim
2072
2073	/*
2074	 * The window we wanted to pull could not be pulled.  Instead,
2075	 * re-enter trap with type T_RWRET.  This will pull the window
2076	 * into cpcb->pcb_rw[0] and set cpcb->pcb_nsaved to -1, which we
2077	 * will detect when we try to return again.
2078	 */
2079rft_invalid:
2080	b	softtrap
2081	 mov	T_RWRET, %o0
2082
2083	/*
2084	 * The window we want to pull can be pulled directly.
2085	 */
2086rft_user_ok:
2087!	wr	%g0, 0, %wim		! allow us to get into it
2088	wr	%l0, 0, %psr		! fix up the cond codes now
2089	nop; nop; nop
2090	restore				! enter window I
2091	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
2092	rd	%psr, %l0		! l0 = (junk << 5) + CWP;
2093	sll	%l1, %l0, %l1		! %wim = 1 << CWP;
2094	wr	%l1, 0, %wim
2095	sethi	%hi(_cpcb), %l1
2096	ld	[%l1 + %lo(_cpcb)], %l1
2097	and	%l0, 31, %l0
2098	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = l0 & 31;
2099	save	%g0, %g0, %g0		! back to window I
2100	LOADWIN(%sp)			! suck hard
2101	save	%g0, %g0, %g0		! back to window T
2102	RETT
2103
2104/*
2105 * Return from trap.  Entered after a
2106 *	wr	%l0, 0, %psr
2107 * which disables traps so that we can rett; registers are:
2108 *
2109 *	%l0 = %psr
2110 *	%l1 = return pc
2111 *	%l2 = return npc
2112 *
2113 * (%l3..%l7 anything).
2114 *
2115 * If we are returning to user code, we must:
2116 *  1.  Check for register windows in the pcb that belong on the stack.
2117 *	If there are any, reenter trap with type T_WINOF.
2118 *  2.  Make sure the register windows will not underflow.  This is
2119 *	much easier in kernel mode....
2120 */
2121return_from_trap:
2122!	wr	%l0, 0, %psr		! disable traps so we can rett
2123! (someone else did this already)
2124	and	%l0, 31, %l5
2125	set	wmask, %l6
2126	ldub	[%l6 + %l5], %l5	! %l5 = 1 << ((CWP + 1) % nwindows)
2127	btst	PSR_PS, %l0		! returning to userland?
2128	bnz	rft_kernel		! no, go return to kernel
2129	 rd	%wim, %l4		! (read %wim in any case)
2130
2131rft_user_or_recover_pcb_windows:
2132	/*
2133	 * (entered with %l4=%wim, %l5=wmask[cwp]; %l0..%l2 as usual)
2134	 *
2135	 * check cpcb->pcb_nsaved:
2136	 * if 0, do a `normal' return to user (see rft_user);
2137	 * if > 0, cpcb->pcb_rw[] holds registers to be copied to stack;
2138	 * if -1, cpcb->pcb_rw[0] holds user registers for rett window
2139	 * from an earlier T_RWRET pseudo-trap.
2140	 */
2141	sethi	%hi(_cpcb), %l6
2142	ld	[%l6 + %lo(_cpcb)], %l6
2143	ld	[%l6 + PCB_NSAVED], %l7
2144	tst	%l7
2145	bz,a	rft_user
2146	 sethi	%hi(_want_ast), %l7	! first instr of rft_user
2147
2148	bg,a	softtrap		! if (pcb_nsaved > 0)
2149	 mov	T_WINOF, %o0		!	trap(T_WINOF);
2150
2151	/*
2152	 * To get here, we must have tried to return from a previous
2153	 * trap and discovered that it would cause a window underflow.
2154	 * We then must have tried to pull the registers out of the
2155	 * user stack (from the address in %fp==%i6) and discovered
2156	 * that it was either unaligned or not loaded in memory, and
2157	 * therefore we ran a trap(T_RWRET), which loaded one set of
2158	 * registers into cpcb->pcb_pcb_rw[0] (if it had killed the
2159	 * process due to a bad stack, we would not be here).
2160	 *
2161	 * We want to load pcb_rw[0] into the previous window, which
2162	 * we know is currently invalid.  In other words, we want
2163	 * %wim to be 1 << ((cwp + 2) % nwindows).
2164	 */
2165	wr	%g0, 0, %wim		! enable restores
2166	mov	%g6, %l3		! save g6 in l3
2167	mov	%l6, %g6		! set g6 = &u
2168	st	%g0, [%g6 + PCB_NSAVED]	! clear cpcb->pcb_nsaved
2169	restore				! enter window I
2170	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
2171	rd	%psr, %l0
2172	sll	%l1, %l0, %l1		! %wim = 1 << CWP;
2173	wr	%l1, 0, %wim
2174	and	%l0, 31, %l0
2175	st	%l0, [%g6 + PCB_WIM]	! cpcb->pcb_wim = CWP;
2176	nop				! unnecessary? old wim was 0...
2177	save	%g0, %g0, %g0		! back to window I
2178	LOADWIN(%g6 + PCB_RW)
2179	save	%g0, %g0, %g0		! back to window T (trap window)
2180	wr	%l0, 0, %psr		! cond codes, cond codes everywhere
2181	mov	%l3, %g6		! restore g6
2182	RETT
2183
2184! exported end marker for kernel gdb
2185	.globl	_endtrapcode
2186_endtrapcode:
2187
2188/*
2189 * init_tables(nwin) int nwin;
2190 *
2191 * Set up the uwtab and wmask tables.
2192 * We know nwin > 1.
2193 */
2194init_tables:
2195	/*
2196	 * for (i = -nwin, j = nwin - 2; ++i < 0; j--)
2197	 *	uwtab[i] = j;
2198	 * (loop runs at least once)
2199	 */
2200	set	uwtab, %o3
2201	sub	%g0, %o0, %o1		! i = -nwin + 1
2202	inc	%o1
2203	add	%o0, -2, %o2		! j = nwin - 2;
22040:
2205	stb	%o2, [%o3 + %o1]	! uwtab[i] = j;
22061:
2207	inccc	%o1			! ++i < 0?
2208	bl	0b			! yes, continue loop
2209	 dec	%o2			! in any case, j--
2210
2211	/*
2212	 * (i now equals 0)
2213	 * for (j = nwin - 1; i < nwin; i++, j--)
2214	 *	uwtab[i] = j;
2215	 * (loop runs at least twice)
2216	 */
2217	sub	%o0, 1, %o2		! j = nwin - 1
22180:
2219	stb	%o2, [%o3 + %o1]	! uwtab[i] = j
2220	inc	%o1			! i++
22211:
2222	cmp	%o1, %o0		! i < nwin?
2223	bl	0b			! yes, continue
2224	 dec	%o2			! in any case, j--
2225
2226	/*
2227	 * We observe that, for i in 0..nwin-2, (i+1)%nwin == i+1;
2228	 * for i==nwin-1, (i+1)%nwin == 0.
2229	 * To avoid adding 1, we run i from 1 to nwin and set
2230	 * wmask[i-1].
2231	 *
2232	 * for (i = j = 1; i < nwin; i++) {
2233	 *	j <<= 1;	(j now == 1 << i)
2234	 *	wmask[i - 1] = j;
2235	 * }
2236	 * (loop runs at least once)
2237	 */
2238	set	wmask - 1, %o3
2239	mov	1, %o1			! i = 1;
2240	mov	2, %o2			! j = 2;
22410:
2242	stb	%o2, [%o3 + %o1]	! (wmask - 1)[i] = j;
2243	inc	%o1			! i++
2244	cmp	%o1, %o0		! i < nwin?
2245	bl,a	0b			! yes, continue
2246	 sll	%o2, 1, %o2		! (and j <<= 1)
2247
2248	/*
2249	 * Now i==nwin, so we want wmask[i-1] = 1.
2250	 */
2251	mov	1, %o2			! j = 1;
2252	retl
2253	 stb	%o2, [%o3 + %o1]	! (wmask - 1)[i] = j;
2254
2255dostart:
2256	rd	%psr, %l0		! paranoia: make sure ...
2257	andn	%l0, PSR_ET, %l0	! we have traps off
2258	wr	%l0, 0, %psr		! so that we can fiddle safely
2259	nop; nop; nop
2260
2261	/*
2262	 * Startup.
2263	 *
2264	 * We have been loaded in low RAM, at some address which
2265	 * is page aligned (0x4000 actually) rather than where we
2266	 * want to run (0xf8004000).  Until we get everything set,
2267	 * we have to be sure to use only pc-relative addressing.
2268	 */
2269
2270	wr	%g0, 0, %wim		! make sure we can set psr
2271	mov	%o0, %g7		! save prom vector pointer
2272	nop; nop
2273	wr	%g0, PSR_S|PSR_PS|PSR_PIL, %psr	! set initial psr
2274	set	AC_CONTEXT, %g1		! paranoia: set context to kernel
2275	stba	%g0, [%g1] ASI_CONTROL
2276	wr	%g0, 2, %wim		! set initial %wim (w1 invalid)
2277	mov	1, %g1			! set pcb_wim (log2(%wim) = 1)
2278	sethi	%hi(_u0 + PCB_WIM), %g2
2279	st	%g1, [%g2 + %lo(_u0 + PCB_WIM)]
2280
2281	/*
2282	 * Step 1: double map low RAM (addresses [0.._end-start-1])
2283	 * to KERNBASE (addresses [KERNBASE.._end-1]).  None of these
2284	 * are `bad' aliases (since they are all on segment boundaries)
2285	 * so we do not have to worry about cache aliasing.
2286	 *
2287	 * We map in another couple of segments just to have some
2288	 * more memory (512K, actually) guaranteed available for
2289	 * bootstrap code (pmap_bootstrap needs memory to hold MMU
2290	 * and context data structures).
2291	 */
2292	clr	%l0			! lowva
2293	set	KERNBASE, %l1		! highva
2294	set	_end + (2 << 18), %l2	! last va that must be remapped
2295	set	1 << 18, %l3		! segment size in bytes
22960:
2297	lduba	[%l0] ASI_SEGMAP, %l4	! segmap[highva] = segmap[lowva];
2298	stba	%l4, [%l1] ASI_SEGMAP
2299	add	%l3, %l1, %l1		! highva += segsiz;
2300	cmp	%l1, %l2		! done?
2301	bl	0b			! no, loop
2302	 add	%l3, %l0, %l0		! (and lowva += segsz)
2303
2304	/*
2305	 * Now map the interrupt enable register and clear any interrupts,
2306	 * enabling NMIs.  Note that we will not take NMIs until we change
2307	 * %tbr.
2308	 */
2309	set	IE_reg_addr, %l0
2310	set	IE_REG_PTE, %l1
2311	sta	%l1, [%l0] ASI_PTE
2312	mov	IE_ALLIE, %l1
2313	nop; nop			! paranoia
2314	stb	%l1, [%l0]
2315
2316	/*
2317	 * All set, fix pc and npc.  Once we are where we should be,
2318	 * we can give ourselves a stack and enable traps.
2319	 */
2320	set	1f, %l0
2321	jmp	%l0
2322	 nop
23231:
2324	set	USRSTACK - CCFSZ, %fp	! as if called from user code
2325	set	estack0 - CCFSZ - 80, %sp ! via syscall(boot_me_up) or somesuch
2326	rd	%psr, %l0
2327	wr	%l0, PSR_ET, %psr
2328
2329	/*
2330	 * Step 2: clear BSS.  This may just be paranoia; the boot
2331	 * loader might already do it for us; but what the hell.
2332	 */
2333	set	_edata, %o0		! bzero(edata, end - edata)
2334	set	_end, %o1
2335	call	_bzero
2336	 sub	%o1, %o0, %o1
2337
2338	/*
2339	 * Stash prom vectors now, after bzero, as it lives in bss
2340	 * (which we just zeroed).
2341	 * This depends on the fact that bzero does not use %g7.
2342	 */
2343	sethi	%hi(_promvec), %l0
2344	st	%g7, [%l0 + %lo(_promvec)]
2345
2346	/*
2347	 * Step 3: compute number of windows and set up tables.
2348	 * We could do some of this later.
2349	 */
2350	save	%sp, -64, %sp
2351	rd	%psr, %g1
2352	restore
2353	and	%g1, 31, %g1		! want just the CWP bits
2354	add	%g1, 1, %o0		! compute nwindows
2355	sethi	%hi(_nwindows), %o1	! may as well tell everyone
2356	call	init_tables
2357	 st	%o0, [%o1 + %lo(_nwindows)]
2358
2359	/*
2360	 * Step 4: change the trap base register, now that our trap handlers
2361	 * will function (they need the tables we just set up).
2362	 */
2363	set	_trapbase, %l0
2364	wr	%l0, 0, %tbr
2365	nop				! paranoia
2366
2367	/*
2368	 * Bootstrap, call main, and `return' from a fake trap into `icode'.
2369	 */
2370	call	_bootstrap
2371	 nop
2372	call	_main
2373	 nop
2374
2375	/*
2376	 * Return from main means we are process 1 and need to
2377	 * jump to the `icode' (which appears below) which has
2378	 * been copied in to address 0x2000 in the new process.
2379	 *
2380	 * Our stack & frame pointers are already set up to make it
2381	 * look like we are currently running in a trap taken from
2382	 * user mode (there is room for a full blown trapfrape),
2383	 * so all we need do is set up %l0, %l1, and %l2 and branch
2384	 * to return_from_trap.
2385	 */
2386	mov	PSR_S, %l0		! user psr
2387#define XADDR 0x2000 /* XXX */
2388	set	XADDR, %l1		! pc
2389	add	%l1, 4, %l2		! npc
2390	b	return_from_trap
2391	 wr	%l0, 0, %psr		! just like syscall()
2392
2393/*
2394 * Icode is copied out to process 1 to exec init.
2395 * If the exec fails, process 1 exits.
2396 *
2397 * We depend here on the fact that we are copied to address XADDR
2398 * (but so does the code above).
2399 */
2400	.globl	_icode, _szicode
2401_icode:
2402	mov	SYS_execve, %g1
2403	set	init - _icode + XADDR, %o0	! actual location of pathname
2404	set	argv - _icode + XADDR, %o1	! ... and of argv
2405	clr	%o2				! no environment
2406	t	ST_SYSCALL
2407	mov	SYS_exit, %g1
2408	t	ST_SYSCALL
2409
2410init:	.ascii	"/sbin/"
2411init1:	.asciz	"init"
2412	ALIGN
2413argv:	.word	init1 - _icode + XADDR
2414	.word	initflags - _icode + XADDR
2415	.word	0
2416initflags:
2417_szicode:
2418	.long	_szicode - _icode
2419#undef XADDR
2420
2421/*
2422 * The following code is copied to the top of the user stack when each
2423 * process is exec'ed, and signals are `trampolined' off it.
2424 *
2425 * When this code is run, the stack looks like:
2426 *	[%sp]		64 bytes to which registers can be dumped
2427 *	[%sp + 64]	signal number (goes in %o0)
2428 *	[%sp + 64 + 4]	signal code (goes in %o1)
2429 *	[%sp + 64 + 8]	placeholder
2430 *	[%sp + 64 + 12]	argument for %o3, currently unsupported (always 0)
2431 *	[%sp + 64 + 16]	first word of saved state (sigcontext)
2432 *	    .
2433 *	    .
2434 *	    .
2435 *	[%sp + NNN]	last word of saved state
2436 * (followed by previous stack contents or top of signal stack).
2437 * The address of the function to call is in %g1; the old %g1 and %o0
2438 * have already been saved in the sigcontext.  We are running in a clean
2439 * window, all previous windows now being saved to the stack.
2440 *
2441 * Note that [%sp + 64 + 8] == %sp + 64 + 16.  The copy at %sp+64+8
2442 * will eventually be removed, with a hole left in its place, if things
2443 * work out.
2444 */
2445	.globl	_sigcode
2446	.globl	_esigcode
2447_sigcode:
2448	/*
2449	 * XXX  the `save' and `restore' below are unnecessary: should
2450	 *	replace with simple arithmetic on %sp
2451	 *
2452	 * Make room on the stack for 32 %f registers + %fsr.  This comes
2453	 * out to 33*4 or 132 bytes, but this must be aligned to a multiple
2454	 * of 8, or 136 bytes.
2455	 */
2456	save	%sp, -CCFSZ - 136, %sp
2457	mov	%g2, %l2		! save globals in %l registers
2458	mov	%g3, %l3
2459	mov	%g4, %l4
2460	mov	%g5, %l5
2461	mov	%g6, %l6
2462	mov	%g7, %l7
2463	/*
2464	 * Saving the fpu registers is expensive, so do it iff the fsr
2465	 * stored in the sigcontext shows that the fpu is enabled.
2466	 */
2467	ld	[%fp + 64 + 16 + SC_PSR_OFFSET], %l0
2468	sethi	%hi(PSR_EF), %l1	! FPU enable bit is too high for andcc
2469	andcc	%l0, %l1, %l0		! %l0 = fpu enable bit
2470	be	1f			! if not set, skip the saves
2471	 rd	%y, %l1			! in any case, save %y
2472
2473	! fpu is enabled, oh well
2474	st	%fsr, [%sp + CCFSZ + 0]
2475	std	%f0, [%sp + CCFSZ + 8]
2476	std	%f2, [%sp + CCFSZ + 16]
2477	std	%f4, [%sp + CCFSZ + 24]
2478	std	%f6, [%sp + CCFSZ + 32]
2479	std	%f8, [%sp + CCFSZ + 40]
2480	std	%f10, [%sp + CCFSZ + 48]
2481	std	%f12, [%sp + CCFSZ + 56]
2482	std	%f14, [%sp + CCFSZ + 64]
2483	std	%f16, [%sp + CCFSZ + 72]
2484	std	%f18, [%sp + CCFSZ + 80]
2485	std	%f20, [%sp + CCFSZ + 88]
2486	std	%f22, [%sp + CCFSZ + 96]
2487	std	%f24, [%sp + CCFSZ + 104]
2488	std	%f26, [%sp + CCFSZ + 112]
2489	std	%f28, [%sp + CCFSZ + 120]
2490	std	%f30, [%sp + CCFSZ + 128]
2491
24921:
2493	ldd	[%fp + 64], %o0		! sig, code
2494	ld	[%fp + 76], %o3		! arg3
2495	call	%g1			! (*sa->sa_handler)(sig,code,scp,arg3)
2496	 add	%fp, 64 + 16, %o2	! scp
2497
2498	/*
2499	 * Now that the handler has returned, re-establish all the state
2500	 * we just saved above, then do a sigreturn.
2501	 */
2502	tst	%l0			! reload fpu registers?
2503	be	1f			! if not, skip the loads
2504	 wr	%l1, %g0, %y		! in any case, restore %y
2505
2506	ld	[%sp + CCFSZ + 0], %fsr
2507	ldd	[%sp + CCFSZ + 8], %f0
2508	ldd	[%sp + CCFSZ + 16], %f2
2509	ldd	[%sp + CCFSZ + 24], %f4
2510	ldd	[%sp + CCFSZ + 32], %f6
2511	ldd	[%sp + CCFSZ + 40], %f8
2512	ldd	[%sp + CCFSZ + 48], %f10
2513	ldd	[%sp + CCFSZ + 56], %f12
2514	ldd	[%sp + CCFSZ + 64], %f14
2515	ldd	[%sp + CCFSZ + 72], %f16
2516	ldd	[%sp + CCFSZ + 80], %f18
2517	ldd	[%sp + CCFSZ + 88], %f20
2518	ldd	[%sp + CCFSZ + 96], %f22
2519	ldd	[%sp + CCFSZ + 104], %f24
2520	ldd	[%sp + CCFSZ + 112], %f26
2521	ldd	[%sp + CCFSZ + 120], %f28
2522	ldd	[%sp + CCFSZ + 128], %f30
2523
25241:
2525	mov	%l2, %g2
2526	mov	%l3, %g3
2527	mov	%l4, %g4
2528	mov	%l5, %g5
2529	mov	%l6, %g6
2530	mov	%l7, %g7
2531
2532	restore	%g0, SYS_sigreturn, %g1	! get registers back & set syscall #
2533	add	%sp, 64 + 16, %o0	! compute scp
2534	t	ST_SYSCALL		! sigreturn(scp)
2535	! sigreturn does not return unless it fails
2536	mov	SYS_exit, %g1		! exit(errno)
2537	t	ST_SYSCALL
2538_esigcode:
2539
2540/*
2541 * Primitives
2542 */
2543
2544#ifdef GPROF
2545	.globl	mcount
2546#define	ENTRY(x) \
2547	.globl _##x; _##x: \
2548	save	%sp, -CCFSZ, %sp; \
2549	call	mcount; \
2550	nop; \
2551	restore
2552#else
2553#define	ENTRY(x)	.globl _##x; _##x:
2554#endif
2555#define	ALTENTRY(x)	.globl _##x; _##x:
2556
2557/*
2558 * copyinstr(fromaddr, toaddr, maxlength, &lencopied)
2559 *
2560 * Copy a null terminated string from the user address space into
2561 * the kernel address space.
2562 */
2563ENTRY(copyinstr)
2564	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
2565#ifdef DIAGNOSTIC
2566	tst	%o2			! kernel should never give maxlen <= 0
2567	ble	1f
2568	 EMPTY
2569#endif
2570	set	KERNBASE, %o4
2571	cmp	%o0, %o4		! fromaddr < KERNBASE?
2572	blu,a	Lcsdocopy		! yes, go do it
2573	sethi	%hi(_cpcb), %o4		! (first instr of copy)
2574
2575	b	Lcsdone			! no, return EFAULT
2576	 mov	EFAULT, %o0
2577
25781:
2579	sethi	%hi(2f), %o0
2580	call	_panic
2581	 or	%lo(2f), %o0, %o0
25822:	.asciz	"copyinstr"
2583	ALIGN
2584
2585/*
2586 * copyoutstr(fromaddr, toaddr, maxlength, &lencopied)
2587 *
2588 * Copy a null terminated string from the kernel
2589 * address space to the user address space.
2590 */
2591ENTRY(copyoutstr)
2592	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
2593#ifdef DIAGNOSTIC
2594	tst	%o2
2595	ble	1f
2596	 EMPTY
2597#endif
2598	set	KERNBASE, %o4
2599	cmp	%o1, %o4		! toaddr < KERNBASE?
2600	blu,a	Lcsdocopy		! yes, go do it
2601	 sethi	%hi(_cpcb), %o4		! (first instr of copy)
2602
2603	b	Lcsdone			! no, return EFAULT
2604	 mov	EFAULT, %o0
2605
26061:
2607	sethi	%hi(2f), %o0
2608	call	_panic
2609	 or	%lo(2f), %o0, %o0
26102:	.asciz	"copyoutstr"
2611	ALIGN
2612
2613Lcsdocopy:
2614!	sethi	%hi(_cpcb), %o4		! (done earlier)
2615	ld	[%o4 + %lo(_cpcb)], %o4	! catch faults
2616	set	Lcsfault, %o5
2617	st	%o5, [%o4 + PCB_ONFAULT]
2618
2619	mov	%o1, %o5		!	save = toaddr;
2620! XXX should do this in bigger chunks when possible
26210:					! loop:
2622	ldsb	[%o0], %g1		!	c = *fromaddr;
2623	tst	%g1
2624	stb	%g1, [%o1]		!	*toaddr++ = c;
2625	be	1f			!	if (c == NULL)
2626	 inc	%o1			!		goto ok;
2627	deccc	%o2			!	if (--len > 0) {
2628	bg	0b			!		fromaddr++;
2629	 inc	%o0			!		goto loop;
2630					!	}
2631					!
2632	b	Lcsdone			!	error = ENAMETOOLONG;
2633	 mov	ENAMETOOLONG, %o0	!	goto done;
26341:					! ok:
2635	clr	%o0			!    error = 0;
2636Lcsdone:				! done:
2637	sub	%o1, %o5, %o1		!	len = to - save;
2638	tst	%o3			!	if (lencopied)
2639	bnz,a	3f
2640	 st	%o1, [%o3]		!		*lencopied = len;
26413:
2642	retl				! cpcb->pcb_onfault = 0;
2643	 st	%g0, [%o4 + PCB_ONFAULT]! return (error);
2644
2645Lcsfault:
2646	b	Lcsdone			! error = EFAULT;
2647	 mov	EFAULT, %o0		! goto ret;
2648
2649/*
2650 * copystr(fromaddr, toaddr, maxlength, &lencopied)
2651 *
2652 * Copy a null terminated string from one point to another in
2653 * the kernel address space.  (This is a leaf procedure, but
2654 * it does not seem that way to the C compiler.)
2655 */
2656ENTRY(copystr)
2657#ifdef DIAGNOSTIC
2658	tst	%o2			! 	if (maxlength <= 0)
2659	ble	4f			!		panic(...);
2660	 EMPTY
2661#endif
2662	mov	%o1, %o5		!	to0 = to;
26630:					! loop:
2664	ldsb	[%o0], %o4		!	c = *from;
2665	tst	%o4
2666	stb	%o4, [%o1]		!	*to++ = c;
2667	be	1f			!	if (c == 0)
2668	 inc	%o1			!		goto ok;
2669	deccc	%o2			!	if (--len > 0) {
2670	bg,a	0b			!		from++;
2671	 inc	%o0			!		goto loop;
2672	b	2f			!	}
2673	 mov	ENAMETOOLONG, %o0	!	ret = ENAMETOOLONG; goto done;
26741:					! ok:
2675	clr	%o0			!	ret = 0;
26762:
2677	sub	%o1, %o5, %o1		!	len = to - to0;
2678	tst	%o3			!	if (lencopied)
2679	bnz,a	3f
2680	 st	%o1, [%o3]		!		*lencopied = len;
26813:
2682	retl
2683	 nop
2684#ifdef DIAGNOSTIC
26854:
2686	sethi	%hi(5f), %o0
2687	call	_panic
2688	 or	%lo(5f), %o0, %o0
26895:
2690	.asciz	"copystr"
2691	ALIGN
2692#endif
2693
2694/*
2695 * Copyin(src, dst, len)
2696 *
2697 * Copy specified amount of data from user space into the kernel.
2698 */
2699ENTRY(copyin)
2700	set	KERNBASE, %o3
2701	cmp	%o0, %o3		! src < KERNBASE?
2702	blu,a	Ldocopy			! yes, can try it
2703	 sethi	%hi(_cpcb), %o3
2704
2705	/* source address points into kernel space: return EFAULT */
2706	retl
2707	 mov	EFAULT, %o0
2708
2709/*
2710 * Copyout(src, dst, len)
2711 *
2712 * Copy specified amount of data from kernel to user space.
2713 * Just like copyin, except that the `dst' addresses are user space
2714 * rather than the `src' addresses.
2715 */
2716ENTRY(copyout)
2717	set	KERNBASE, %o3
2718	cmp	%o1, %o3		! dst < KERBASE?
2719	blu,a	Ldocopy
2720	 sethi	%hi(_cpcb), %o3
2721
2722	/* destination address points into kernel space: return EFAULT */
2723	retl
2724	 mov	EFAULT, %o0
2725
2726	/*
2727	 * ******NOTE****** this depends on bcopy() not using %g7
2728	 */
2729Ldocopy:
2730!	sethi	%hi(_cpcb), %o3
2731	ld	[%o3 + %lo(_cpcb)], %o3
2732	set	Lcopyfault, %o4
2733	mov	%o7, %g7		! save return address
2734	call	_bcopy			! bcopy(src, dst, len)
2735	 st	%o4, [%o3 + PCB_ONFAULT]
2736
2737	sethi	%hi(_cpcb), %o3
2738	ld	[%o3 + %lo(_cpcb)], %o3
2739	st	%g0, [%o3 + PCB_ONFAULT]
2740	jmp	%g7 + 8
2741	 clr	%o0			! return 0
2742
2743! Copyin or copyout fault.  Clear cpcb->pcb_onfault and return EFAULT.
2744! Note that although we were in bcopy, there is no state to clean up;
2745! the only special thing is that we have to return to [g7 + 8] rather than
2746! [o7 + 8].
2747Lcopyfault:
2748	sethi	%hi(_cpcb), %o3
2749	ld	[%o3 + %lo(_cpcb)], %o3
2750	st	%g0, [%o3 + PCB_ONFAULT]
2751	jmp	%g7 + 8
2752	 mov	EFAULT, %o0
2753
2754
2755/*
2756 * Write all user windows presently in the CPU back to the user's stack.
2757 * We just do `save' instructions until pcb_uw == 0.
2758 *
2759 *	p = cpcb;
2760 *	nsaves = 0;
2761 *	while (p->pcb_uw > 0)
2762 *		save(), nsaves++;
2763 *	while (--nsaves >= 0)
2764 *		restore();
2765 */
2766ENTRY(write_user_windows)
2767	sethi	%hi(_cpcb), %g6
2768	ld	[%g6 + %lo(_cpcb)], %g6
2769	b	2f
2770	 clr	%g5
27711:
2772	save	%sp, -64, %sp
27732:
2774	ld	[%g6 + PCB_UW], %g7
2775	tst	%g7
2776	bg,a	1b
2777	 inc	%g5
27783:
2779	deccc	%g5
2780	bge,a	3b
2781	 restore
2782	retl
2783	 nop
2784
2785
2786	.comm	_want_resched,4
2787/*
2788 * Masterpaddr is the p->p_addr of the last process on the processor.
2789 * XXX masterpaddr is almost the same as cpcb
2790 * XXX should delete this entirely
2791 */
2792	.comm	_masterpaddr, 4
2793
2794/*
2795 * Switch statistics (for later tweaking):
2796 *	nswitchdiff = p1 => p2 (i.e., chose different process)
2797 *	nswitchexit = number of calls to swtchexit()
2798 *	_cnt.v_swtch = total calls to swtch+swtchexit
2799 */
2800	.comm	_nswitchdiff, 4
2801	.comm	_nswitchexit, 4
2802
2803/*
2804 * REGISTER USAGE IN swtch AND swtchexit:
2805 * This is split into two phases, more or less
2806 * `before we locate a new proc' and `after'.
2807 * Some values are the same in both phases.
2808 * Note that the %o0-registers are not preserved across
2809 * the psr change when entering a new process, since this
2810 * usually changes the CWP field (hence heavy usage of %g's).
2811 *
2812 *	%g1 = oldpsr (excluding ipl bits)
2813 *	%g2 = %hi(_whichqs); newpsr
2814 *	%g3 = p
2815 *	%g4 = lastproc
2816 *	%g5 = <free>; newpcb
2817 *	%g6 = %hi(_cpcb)
2818 *	%g7 = %hi(_curproc)
2819 *	%o0 = tmp 1
2820 *	%o1 = tmp 2
2821 *	%o2 = tmp 3
2822 *	%o3 = tmp 4; whichqs; vm
2823 *	%o4 = tmp 4; which; sswap
2824 *	%o5 = tmp 5; q; <free>
2825 */
2826
2827/*
2828 * swtchexit is called only from cpu_exit() before the current process
2829 * has freed its kernel stack; we must free it.  (curproc is already NULL.)
2830 *
2831 * We lay the process to rest by changing to the `idle' kernel stack,
2832 * and note that the `last loaded process' is nonexistent.
2833 */
2834ENTRY(swtchexit)
2835	mov	%o0, %g2		! save the
2836	mov	%o1, %g3		! ... three parameters
2837	mov	%o2, %g4		! ... to kmem_free
2838
2839	/*
2840	 * Change pcb to idle u. area, i.e., set %sp to top of stack
2841	 * and %psr to PSR_S|PSR_ET, and set cpcb to point to _idle_u.
2842	 * Once we have left the old stack, we can call kmem_free to
2843	 * destroy it.  Call it any sooner and the register windows
2844	 * go bye-bye.
2845	 */
2846	set	_idle_u, %g5
2847	sethi	%hi(_cpcb), %g6
2848	mov	1, %g7
2849	wr	%g0, PSR_S, %psr	! change to window 0, traps off
2850	wr	%g0, 2, %wim		! and make window 1 the trap window
2851	st	%g5, [%g6 + %lo(_cpcb)]	! cpcb = &idle_u
2852	st	%g7, [%g5 + PCB_WIM]	! idle_u.pcb_wim = log2(2) = 1
2853	set	_idle_u + UPAGES * NBPG - CCFSZ, %sp	! set new %sp
2854#ifdef DEBUG
2855	set	_idle_u, %l6
2856	SET_SP_REDZONE(%l6, %l5)
2857#endif
2858	wr	%g0, PSR_S|PSR_ET, %psr	! and then enable traps
2859	mov	%g2, %o0		! now ready to call kmem_free
2860	mov	%g3, %o1
2861	call	_kmem_free
2862	 mov	%g4, %o2
2863
2864	/*
2865	 * Now fall through to `the last swtch'.  %g6 was set to
2866	 * %hi(_cpcb), but may have been clobbered in kmem_free,
2867	 * so all the registers described below will be set here.
2868	 *
2869	 * REGISTER USAGE AT THIS POINT:
2870	 *	%g1 = oldpsr (excluding ipl bits)
2871	 *	%g2 = %hi(_whichqs)
2872	 *	%g4 = lastproc
2873	 *	%g6 = %hi(_cpcb)
2874	 *	%g7 = %hi(_curproc)
2875	 *	%o0 = tmp 1
2876	 *	%o1 = tmp 2
2877	 *	%o3 = whichqs
2878	 */
2879
2880	INCR(_nswitchexit)		! nswitchexit++;
2881	INCR(_cnt+V_SWTCH)		! cnt.v_switch++;
2882
2883	mov	PSR_S|PSR_ET, %g1	! oldpsr = PSR_S | PSR_ET;
2884	sethi	%hi(_whichqs), %g2
2885	clr	%g4			! lastproc = NULL;
2886	sethi	%hi(_cpcb), %g6
2887	sethi	%hi(_curproc), %g7
2888	/* FALLTHROUGH */
2889
2890/*
2891 * When no processes are on the runq, swtch
2892 * idles here watiing for something to come ready.
2893 * The registers are set up as noted above.
2894 */
2895	.globl	idle
2896idle:
2897	st	%g0, [%g7 + %lo(_curproc)] ! curproc = NULL;
2898	wr	%g1, 0, %psr		! (void) spl0();
28991:					! spin reading _whichqs until nonzero
2900	ld	[%g2 + %lo(_whichqs)], %o3
2901	tst	%o3
2902	bnz,a	Lsw_scan
2903	 wr	%g1, PIL_CLOCK << 8, %psr	! (void) splclock();
2904	b,a	1b
2905
2906Lsw_panic_rq:
2907	sethi	%hi(1f), %o0
2908	call	_panic
2909	 or	%lo(1f), %o0, %o0
2910Lsw_panic_wchan:
2911	sethi	%hi(2f), %o0
2912	call	_panic
2913	 or	%lo(2f), %o0, %o0
2914Lsw_panic_srun:
2915	sethi	%hi(3f), %o0
2916	call	_panic
2917	 or	%lo(3f), %o0, %o0
29181:	.asciz	"swtch rq"
29192:	.asciz	"swtch wchan"
29203:	.asciz	"swtch SRUN"
2921	ALIGN
2922
2923/*
2924 * cpu_swtch() picks a process to run and runs it, saving the current
2925 * one away.  On the assumption that (since most workstations are
2926 * single user machines) the chances are quite good that the new
2927 * process will turn out to be the current process, we defer saving
2928 * it here until we have found someone to load.  If that someone
2929 * is the current process we avoid both store and load.
2930 *
2931 * cpu_swtch() is always entered at splstatclock or splhigh.
2932 *
2933 * IT MIGHT BE WORTH SAVING BEFORE ENTERING idle TO AVOID HAVING TO
2934 * SAVE LATER WHEN SOMEONE ELSE IS READY ... MUST MEASURE!
2935 */
2936	.globl	_runtime
2937	.globl	_time
2938ENTRY(cpu_swtch)
2939	/*
2940	 * REGISTER USAGE AT THIS POINT:
2941	 *	%g1 = oldpsr (excluding ipl bits)
2942	 *	%g2 = %hi(_whichqs)
2943	 *	%g3 = p
2944	 *	%g4 = lastproc
2945	 *	%g5 = tmp 0
2946	 *	%g6 = %hi(_cpcb)
2947	 *	%g7 = %hi(_curproc)
2948	 *	%o0 = tmp 1
2949	 *	%o1 = tmp 2
2950	 *	%o2 = tmp 3
2951	 *	%o3 = tmp 4, then at Lsw_scan, whichqs
2952	 *	%o4 = tmp 5, then at Lsw_scan, which
2953	 *	%o5 = tmp 6, then at Lsw_scan, q
2954	 */
2955	sethi	%hi(_whichqs), %g2	! set up addr regs
2956	sethi	%hi(_cpcb), %g6
2957	ld	[%g6 + %lo(_cpcb)], %o0
2958	std	%o6, [%o0 + PCB_SP]	! cpcb->pcb_<sp,pc> = <sp,pc>;
2959	rd	%psr, %g1		! oldpsr = %psr;
2960	sethi	%hi(_curproc), %g7
2961	ld	[%g7 + %lo(_curproc)], %g4	! lastproc = curproc;
2962	st	%g1, [%o0 + PCB_PSR]	! cpcb->pcb_psr = oldpsr;
2963	andn	%g1, PSR_PIL, %g1	! oldpsr &= ~PSR_PIL;
2964
2965	/*
2966	 * In all the fiddling we did to get this far, the thing we are
2967	 * waiting for might have come ready, so let interrupts in briefly
2968	 * before checking for other processes.  Note that we still have
2969	 * curproc set---we have to fix this or we can get in trouble with
2970	 * the run queues below.
2971	 */
2972	st	%g0, [%g7 + %lo(_curproc)]	! curproc = NULL;
2973	wr	%g1, 0, %psr			! (void) spl0();
2974	nop; nop; nop				! paranoia
2975	wr	%g1, PIL_CLOCK <<8 , %psr	! (void) splclock();
2976
2977Lsw_scan:
2978	nop; nop; nop				! paranoia
2979	/*
2980	 * We're about to run a (possibly) new process.  Set runtime
2981	 * to indicate its start time.
2982	 */
2983	sethi	%hi(_time), %o0
2984	ldd	[%o0 + %lo(_time)], %o2
2985	sethi	%hi(_runtime), %o0
2986	std	%o2, [%o0 + %lo(_runtime)]
2987
2988	ld	[%g2 + %lo(_whichqs)], %o3
2989
2990	/*
2991	 * Optimized inline expansion of `which = ffs(whichqs) - 1';
2992	 * branches to idle if ffs(whichqs) was 0.
2993	 */
2994	set	ffstab, %o2
2995	andcc	%o3, 0xff, %o1		! byte 0 zero?
2996	bz,a	1f			! yes, try byte 1
2997	 srl	%o3, 8, %o0
2998	b	2f			! ffs = ffstab[byte0]; which = ffs - 1;
2999	 ldsb	[%o2 + %o1], %o0
30001:	andcc	%o0, 0xff, %o1		! byte 1 zero?
3001	bz,a	1f			! yes, try byte 2
3002	 srl	%o0, 8, %o0
3003	ldsb	[%o2 + %o1], %o0	! which = ffstab[byte1] + 7;
3004	b	3f
3005	 add	%o0, 7, %o4
30061:	andcc	%o0, 0xff, %o1		! byte 2 zero?
3007	bz,a	1f			! yes, try byte 3
3008	 srl	%o0, 8, %o0
3009	ldsb	[%o2 + %o1], %o0	! which = ffstab[byte2] + 15;
3010	b	3f
3011	 add	%o0, 15, %o4
30121:	ldsb	[%o2 + %o0], %o0	! ffs = ffstab[byte3] + 24
3013	addcc	%o0, 24, %o0		! (note that ffstab[0] == -24)
3014	bz	idle			! if answer was 0, go idle
3015	 EMPTY
30162:	sub	%o0, 1, %o4		! which = ffs(whichqs) - 1
30173:	/* end optimized inline expansion */
3018
3019	/*
3020	 * We found a nonempty run queue.  Take its first process.
3021	 */
3022	set	_qs, %o5		! q = &qs[which];
3023	sll	%o4, 3, %o0
3024	add	%o0, %o5, %o5
3025	ld	[%o5], %g3		! p = q->ph_link;
3026	cmp	%g3, %o5		! if (p == q)
3027	be	Lsw_panic_rq		!	panic("swtch rq");
3028	 EMPTY
3029	ld	[%g3], %o0		! tmp0 = p->p_link;
3030	st	%o0, [%o5]		! q->ph_link = tmp0;
3031	st	%o5, [%o0 + 4]		! tmp0->p_rlink = q;
3032	cmp	%o0, %o5		! if (tmp0 == q)
3033	bne	1f
3034	 EMPTY
3035	mov	1, %o1			!	whichqs &= ~(1 << which);
3036	sll	%o1, %o4, %o1
3037	andn	%o3, %o1, %o3
3038	st	%o3, [%g2 + %lo(_whichqs)]
30391:
3040	/*
3041	 * PHASE TWO: NEW REGISTER USAGE:
3042	 *	%g1 = oldpsr (excluding ipl bits)
3043	 *	%g2 = newpsr
3044	 *	%g3 = p
3045	 *	%g4 = lastproc
3046	 *	%g5 = newpcb
3047	 *	%g6 = %hi(_cpcb)
3048	 *	%g7 = %hi(_curproc)
3049	 *	%o0 = tmp 1
3050	 *	%o1 = tmp 2
3051	 *	%o2 = tmp 3
3052	 *	%o3 = vm
3053	 *	%o4 = sswap
3054	 *	%o5 = <free>
3055	 */
3056
3057	/* firewalls */
3058	ld	[%g3 + P_WCHAN], %o0	! if (p->p_wchan)
3059	tst	%o0
3060	bne	Lsw_panic_wchan		!	panic("swtch wchan");
3061	 EMPTY
3062	ldsb	[%g3 + P_STAT], %o0	! if (p->p_stat != SRUN)
3063	cmp	%o0, SRUN
3064	bne	Lsw_panic_srun		!	panic("swtch SRUN");
3065	 EMPTY
3066
3067	/*
3068	 * Committed to running process p.
3069	 * It may be the same as the one we were running before.
3070	 */
3071	sethi	%hi(_want_resched), %o0
3072	st	%g0, [%o0 + %lo(_want_resched)]	! want_resched = 0;
3073	ld	[%g3 + P_ADDR], %g5		! newpcb = p->p_addr;
3074	st	%g0, [%g3 + 4]			! p->p_rlink = NULL;
3075	ld	[%g5 + PCB_PSR], %g2		! newpsr = newpcb->pcb_psr;
3076	st	%g3, [%g7 + %lo(_curproc)]	! curproc = p;
3077
3078	cmp	%g3, %g4		! p == lastproc?
3079	be,a	Lsw_sameproc		! yes, go return 0
3080	 wr	%g2, 0, %psr		! (after restoring ipl)
3081
3082	/*
3083	 * Not the old process.  Save the old process, if any;
3084	 * then load p.
3085	 */
3086	tst	%g4
3087	be,a	Lsw_load		! if no old process, go load
3088	 wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr
3089
3090	INCR(_nswitchdiff)		! clobbers %o0,%o1
3091	/*
3092	 * save: write back all windows (including the current one).
3093	 * XXX	crude; knows nwindows <= 8
3094	 */
3095#define	SAVE save %sp, -64, %sp
3096	SAVE; SAVE; SAVE; SAVE; SAVE; SAVE; SAVE	/* 7 of each: */
3097	restore; restore; restore; restore; restore; restore; restore
3098
3099	/*
3100	 * Load the new process.  To load, we must change stacks and
3101	 * alter cpcb and %wim, hence we must disable traps.  %psr is
3102	 * currently equal to oldpsr (%g1) ^ (PIL_CLOCK << 8);
3103	 * this means that PSR_ET is on.  Likewise, PSR_ET is on
3104	 * in newpsr (%g2), although we do not know newpsr's ipl.
3105	 *
3106	 * We also must load up the `in' and `local' registers.
3107	 */
3108	wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr
3109Lsw_load:
3110!	wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr	! done above
3111	/* compute new wim */
3112	ld	[%g5 + PCB_WIM], %o0
3113	mov	1, %o1
3114	sll	%o1, %o0, %o0
3115	wr	%o0, 0, %wim		! %wim = 1 << newpcb->pcb_wim;
3116	/* now must not change %psr for 3 more instrs */
3117/*1*/	set	PSR_EF|PSR_EC, %o0
3118/*2*/	andn	%g2, %o0, %g2		! newpsr &= ~(PSR_EF|PSR_EC);
3119/*3*/	nop
3120	/* set new psr, but with traps disabled */
3121	wr	%g2, PSR_ET, %psr	! %psr = newpsr ^ PSR_ET;
3122	/* set new cpcb */
3123	st	%g5, [%g6 + %lo(_cpcb)]	! cpcb = newpcb;
3124	/* XXX update masterpaddr too */
3125	sethi	%hi(_masterpaddr), %g7
3126	st	%g5, [%g7 + %lo(_masterpaddr)]
3127	ldd	[%g5 + PCB_SP], %o6	! <sp,pc> = newpcb->pcb_<sp,pc>
3128	/* load window */
3129	ldd	[%sp + (0*8)], %l0
3130	ldd	[%sp + (1*8)], %l2
3131	ldd	[%sp + (2*8)], %l4
3132	ldd	[%sp + (3*8)], %l6
3133	ldd	[%sp + (4*8)], %i0
3134	ldd	[%sp + (5*8)], %i2
3135	ldd	[%sp + (6*8)], %i4
3136	ldd	[%sp + (7*8)], %i6
3137#ifdef DEBUG
3138	mov	%g5, %o0
3139	SET_SP_REDZONE(%o0, %o1)
3140	CHECK_SP_REDZONE(%o0, %o1)
3141#endif
3142	/* finally, enable traps */
3143	wr	%g2, 0, %psr		! psr = newpsr;
3144
3145	/*
3146	 * Now running p.  Make sure it has a context so that it
3147	 * can talk about user space stuff.  (Its pcb_uw is currently
3148	 * zero so it is safe to have interrupts going here.)
3149	 */
3150	ld	[%g3 + P_VMSPACE], %o3	! vm = p->p_vmspace;
3151	ld	[%o3 + VM_PMAP_CTX], %o0! if (vm->vm_pmap.pm_ctx != NULL)
3152	tst	%o0
3153	bnz,a	Lsw_havectx		!	goto havecontext;
3154	 ld	[%o3 + VM_PMAP_CTXNUM], %o0
3155
3156	/* p does not have a context: call ctx_alloc to get one */
3157	save	%sp, -CCFSZ, %sp
3158	call	_ctx_alloc		! ctx_alloc(&vm->vm_pmap);
3159	 add	%i3, VM_PMAP, %o0
3160	ret
3161	 restore
3162
3163	/* p does have a context: just switch to it */
3164Lsw_havectx:
3165!	ld	[%o3 + VM_PMAP_CTXNUM], %o0	! (done in delay slot)
3166	set	AC_CONTEXT, %o1
3167	stba	%o0, [%o1] ASI_CONTROL	! setcontext(vm->vm_pmap.pm_ctxnum);
3168	retl
3169	 nop
3170
3171Lsw_sameproc:
3172	/*
3173	 * We are resuming the process that was running at the
3174	 * call to swtch().  Just set psr ipl and return.
3175	 */
3176!	wr	%g2, 0 %psr		! %psr = newpsr; (done earlier)
3177	nop
3178	retl
3179	 nop
3180
3181
3182/*
3183 * Snapshot the current process so that stack frames are up to date.
3184 * This is called from two places:
3185 *  - just before a crash dump, for the stack update;
3186 *  - in cpu_fork(), before copying the kernel stack.
3187 * In the latter case the pcb and stack will be copied to the child,
3188 * and the child will be made runnable.  Eventually swtch() will run
3189 * it.  When it does, we want its pcb_pc set so that we can appear
3190 * to return 1 from cpu_fork(), so we store the current sp and psr
3191 * in the given pcb, and set its pcb_pc to our return-1 code (offset
3192 * by -8 due to call/ret conventions).  This is not useful in the crash
3193 * dump code but it is easiest to do it anyway.
3194 */
3195ENTRY(snapshot)
3196	st	%o6, [%o0 + PCB_SP]	! save sp
3197	set	1f - 8, %o1		! set child-return pc
3198	st	%o1, [%o0 + PCB_PC]
3199	rd	%psr, %o1		! save psr
3200	st	%o1, [%o0 + PCB_PSR]
3201
3202	/*
3203	 * Just like swtch(); same XXX comments apply.
3204	 * 7 of each.  Minor tweak: the 7th restore is
3205	 * done after a ret.
3206	 */
3207	SAVE; SAVE; SAVE; SAVE; SAVE; SAVE; SAVE
3208	restore; restore; restore; restore; restore; restore; ret; restore
3209
32101:	/* this is reached only after a child gets chosen in swtch() */
3211	mov	1, %i0			! return 1 from cpu_fork
3212	ret
3213	 restore
3214
3215/*
3216 * {fu,su}{,i}{byte,word}
3217 */
3218ALTENTRY(fuiword)
3219ENTRY(fuword)
3220	set	KERNBASE, %o2
3221	cmp	%o0, %o2		! if addr >= KERNBASE...
3222	bgeu	Lfsbadaddr
3223	EMPTY
3224	btst	3, %o0			! or has low bits set...
3225	bnz	Lfsbadaddr		!	go return -1
3226	EMPTY
3227	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3228	ld	[%o2 + %lo(_cpcb)], %o2
3229	set	Lfserr, %o3
3230	st	%o3, [%o2 + PCB_ONFAULT]
3231	ld	[%o0], %o0		! fetch the word
3232	retl				! phew, made it, return the word
3233	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3234
3235Lfserr:
3236	st	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
3237Lfsbadaddr:
3238	retl				! and return error indicator
3239	mov	-1, %o0
3240
3241	/*
3242	 * This is just like Lfserr, but it's a global label that allows
3243	 * mem_access_fault() to check to see that we don't want to try to
3244	 * page in the fault.  It's used by fuswintr() etc.
3245	 */
3246	.globl	_Lfsbail
3247_Lfsbail:
3248	st	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
3249	retl				! and return error indicator
3250	mov	-1, %o0
3251
3252	/*
3253	 * Like fusword but callable from interrupt context.
3254	 * Fails if data isn't resident.
3255	 */
3256ENTRY(fuswintr)
3257	set	KERNBASE, %o2
3258	cmp	%o0, %o2		! if addr >= KERNBASE
3259	bgeu	Lfsbadaddr		!	return error
3260	EMPTY
3261	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = _Lfsbail;
3262	ld	[%o2 + %lo(_cpcb)], %o2
3263	set	_Lfsbail, %o3
3264	st	%o3, [%o2 + PCB_ONFAULT]
3265	lduh	[%o0], %o0		! fetch the halfword
3266	retl				! made it
3267	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3268
3269ENTRY(fusword)
3270	set	KERNBASE, %o2
3271	cmp	%o0, %o2		! if addr >= KERNBASE
3272	bgeu	Lfsbadaddr		!	return error
3273	EMPTY
3274	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3275	ld	[%o2 + %lo(_cpcb)], %o2
3276	set	Lfserr, %o3
3277	st	%o3, [%o2 + PCB_ONFAULT]
3278	lduh	[%o0], %o0		! fetch the halfword
3279	retl				! made it
3280	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3281
3282ALTENTRY(fuibyte)
3283ENTRY(fubyte)
3284	set	KERNBASE, %o2
3285	cmp	%o0, %o2		! if addr >= KERNBASE
3286	bgeu	Lfsbadaddr		!	return error
3287	EMPTY
3288	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3289	ld	[%o2 + %lo(_cpcb)], %o2
3290	set	Lfserr, %o3
3291	st	%o3, [%o2 + PCB_ONFAULT]
3292	ldub	[%o0], %o0		! fetch the byte
3293	retl				! made it
3294	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3295
3296ALTENTRY(suiword)
3297ENTRY(suword)
3298	set	KERNBASE, %o2
3299	cmp	%o0, %o2		! if addr >= KERNBASE ...
3300	bgeu	Lfsbadaddr
3301	EMPTY
3302	btst	3, %o0			! or has low bits set ...
3303	bnz	Lfsbadaddr		!	go return error
3304	EMPTY
3305	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3306	ld	[%o2 + %lo(_cpcb)], %o2
3307	set	Lfserr, %o3
3308	st	%o3, [%o2 + PCB_ONFAULT]
3309	st	%o1, [%o0]		! store the word
3310	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3311	retl				! and return 0
3312	clr	%o0
3313
3314ENTRY(suswintr)
3315	set	KERNBASE, %o2
3316	cmp	%o0, %o2		! if addr >= KERNBASE
3317	bgeu	Lfsbadaddr		!	go return error
3318	EMPTY
3319	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = _Lfsbail;
3320	ld	[%o2 + %lo(_cpcb)], %o2
3321	set	_Lfsbail, %o3
3322	st	%o3, [%o2 + PCB_ONFAULT]
3323	sth	%o1, [%o0]		! store the halfword
3324	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3325	retl				! and return 0
3326	clr	%o0
3327
3328ENTRY(susword)
3329	set	KERNBASE, %o2
3330	cmp	%o0, %o2		! if addr >= KERNBASE
3331	bgeu	Lfsbadaddr		!	go return error
3332	EMPTY
3333	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3334	ld	[%o2 + %lo(_cpcb)], %o2
3335	set	Lfserr, %o3
3336	st	%o3, [%o2 + PCB_ONFAULT]
3337	sth	%o1, [%o0]		! store the halfword
3338	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3339	retl				! and return 0
3340	clr	%o0
3341
3342ALTENTRY(suibyte)
3343ENTRY(subyte)
3344	set	KERNBASE, %o2
3345	cmp	%o0, %o2		! if addr >= KERNBASE
3346	bgeu	Lfsbadaddr		!	go return error
3347	EMPTY
3348	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3349	ld	[%o2 + %lo(_cpcb)], %o2
3350	set	Lfserr, %o3
3351	st	%o3, [%o2 + PCB_ONFAULT]
3352	stb	%o1, [%o0]		! store the byte
3353	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3354	retl				! and return 0
3355	clr	%o0
3356
3357/* probeget and probeset are meant to be used during autoconfiguration */
3358
3359/*
3360 * probeget(addr, size) caddr_t addr; int size;
3361 *
3362 * Read or write a (byte,word,longword) from the given address.
3363 * Like {fu,su}{byte,halfword,word} but our caller is supposed
3364 * to know what he is doing... the address can be anywhere.
3365 *
3366 * We optimize for space, rather than time, here.
3367 */
3368ENTRY(probeget)
3369	! %o0 = addr, %o1 = (1,2,4)
3370	set	KERNBASE, %o2
3371	cmp	%o0, %o2		! if addr < KERNBASE
3372	blu	Lfsbadaddr		!	go return error
3373	 EMPTY
3374	sethi	%hi(_cpcb), %o2
3375	ld	[%o2 + %lo(_cpcb)], %o2	! cpcb->pcb_onfault = Lfserr;
3376	set	Lfserr, %o5
3377	st	%o5, [%o2 + PCB_ONFAULT]
3378	btst	1, %o1
3379	bnz,a	0f			! if (len & 1)
3380	 ldub	[%o0], %o0		!	value = *(char *)addr;
33810:	btst	2, %o1
3382	bnz,a	0f			! if (len & 2)
3383	 lduh	[%o0], %o0		!	value = *(short *)addr;
33840:	btst	4, %o1
3385	bnz,a	0f			! if (len & 4)
3386	 ld	[%o0], %o0		!	value = *(int *)addr;
33870:	retl				! made it, clear onfault and return
3388	 st	%g0, [%o2 + PCB_ONFAULT]
3389
3390/*
3391 * probeset(addr, size, val) caddr_t addr; int size, val;
3392 *
3393 * As above, but we return 0 on success.
3394 */
3395ENTRY(probeset)
3396	! %o0 = addr, %o1 = (1,2,4), %o2 = val
3397	set	KERNBASE, %o2
3398	cmp	%o0, %o2		! if addr < KERNBASE
3399	blu	Lfsbadaddr		!	go return error
3400	 EMPTY
3401	sethi	%hi(_cpcb), %o2
3402	ld	[%o2 + %lo(_cpcb)], %o2	! cpcb->pcb_onfault = Lfserr;
3403	set	Lfserr, %o5
3404	st	%o5, [%o2 + PCB_ONFAULT]
3405	btst	1, %o1
3406	bnz,a	0f			! if (len & 1)
3407	 stb	%o2, [%o0]		!	*(char *)addr = value;
34080:	btst	2, %o1
3409	bnz,a	0f			! if (len & 2)
3410	 sth	%o2, [%o0]		!	*(short *)addr = value;
34110:	btst	4, %o1
3412	bnz,a	0f			! if (len & 4)
3413	 st	%o2, [%o0]		!	*(int *)addr = value;
34140:	clr	%o0			! made it, clear onfault and return 0
3415	retl
3416	 st	%g0, [%o2 + PCB_ONFAULT]
3417
3418/*
3419 * Insert entry into doubly-linked queue.
3420 * We could just do this in C, but gcc does not do leaves well (yet).
3421 */
3422ENTRY(_insque)
3423	! %o0 = e = what to insert; %o1 = after = entry to insert after
3424	st	%o1, [%o0 + 4]		! e->prev = after;
3425	ld	[%o1], %o2		! tmp = after->next;
3426	st	%o2, [%o0]		! e->next = tmp;
3427	st	%o0, [%o1]		! after->next = e;
3428	retl
3429	st	%o0, [%o2 + 4]		! tmp->prev = e;
3430
3431
3432/*
3433 * Remove entry from doubly-linked queue.
3434 */
3435ENTRY(_remque)
3436	! %o0 = e = what to remove
3437	ld	[%o0], %o1		! n = e->next;
3438	ld	[%o0 + 4], %o2		! p = e->prev;
3439	st	%o2, [%o1 + 4]		! n->prev = p;
3440	retl
3441	st	%o1, [%o2]		! p->next = n;
3442
3443/*
3444 * copywords(src, dst, nbytes)
3445 *
3446 * Copy `nbytes' bytes from src to dst, both of which are word-aligned;
3447 * nbytes is a multiple of four.  It may, however, be zero, in which case
3448 * nothing is to be copied.
3449 */
3450ENTRY(copywords)
3451	! %o0 = src, %o1 = dst, %o2 = nbytes
3452	b	1f
3453	deccc	4, %o2
34540:
3455	st	%o3, [%o1 + %o2]
3456	deccc	4, %o2			! while ((n -= 4) >= 0)
34571:
3458	bge,a	0b			!    *(int *)(dst+n) = *(int *)(src+n);
3459	ld	[%o0 + %o2], %o3
3460	retl
3461	nop
3462
3463/*
3464 * qcopy(src, dst, nbytes)
3465 *
3466 * (q for `quad' or `quick', as opposed to b for byte/block copy)
3467 *
3468 * Just like copywords, but everything is multiples of 8.
3469 */
3470ENTRY(qcopy)
3471	b	1f
3472	deccc	8, %o2
34730:
3474	std	%o4, [%o1 + %o2]
3475	deccc	8, %o2
34761:
3477	bge,a	0b
3478	ldd	[%o0 + %o2], %o4
3479	retl
3480	nop
3481
3482/*
3483 * qzero(addr, nbytes)
3484 *
3485 * Zeroes `nbytes' bytes of a quad-aligned virtual address,
3486 * where nbytes is itself a multiple of 8.
3487 */
3488ENTRY(qzero)
3489	! %o0 = addr, %o1 = len (in bytes)
3490	clr	%g1
34910:
3492	deccc	8, %o1			! while ((n =- 8) >= 0)
3493	bge,a	0b
3494	std	%g0, [%o0 + %o1]	!	*(quad *)(addr + n) = 0;
3495	retl
3496	nop
3497
3498/*
3499 * bzero(addr, len)
3500 *
3501 * We should unroll the loop, but at the moment this would
3502 * gain nothing since the `std' instructions are what limits us.
3503 */
3504ALTENTRY(blkclr)
3505ENTRY(bzero)
3506	! %o0 = addr, %o1 = len
3507
3508	! Optimize a common case: addr and len are both multiples of 8.
3509	or	%o0, %o1, %o2
3510	btst	7, %o2			! ((addr | len) & 7) != 0?
3511	bnz	1f			! if so, cannot optimize
3512	clr	%g1			! in any case, we want g1=0
3513
3514	/* `Good' operands, can just store doubles. */
35150:
3516	deccc	8, %o1			! while ((len -= 8) >= 0)
3517	bge,a	0b
3518	std	%g0, [%o0 + %o1]	!	*(quad *)(addr + len) = 0;
3519	retl
3520	nop
3521
3522	/*
3523	 * Either the address is unaligned, or the count is not a
3524	 * multiple of 8, or both.  We will have to align the address
3525	 * in order to use anything `better' than stb.
3526	 */
35271:
3528	cmp	%o1, 15			! len >= 15?
3529	bge,a	Lstd			! yes, use std
3530	btst	1, %o0			! (but first check alignment)
3531
3532	! not enough to bother: do byte-at-a-time loop.
35332:
3534	deccc	%o1			! while (--len >= 0)
3535	bge,a	2b
3536	stb	%g0, [%o0 + %o1]	!	addr[len] = 0;
3537	retl
3538	nop
3539
3540Lstd:
3541	/*
3542	 * There are at least 15 bytes to zero.
3543	 * We may have to zero some initial stuff to align
3544	 * the address.
3545	 */
3546	bz,a	1f			! if (addr & 1) {
3547	btst	2, %o0
3548	stb	%g0, [%o0]		!	*addr = 0;
3549	inc	%o0			!	addr++;
3550	dec	%o1			!	len--;
3551	btst	2, %o0			! }
35521:
3553	bz,a	1f			! if (addr & 2) {
3554	btst	4, %o0
3555	sth	%g0, [%o0]		!	*(short *)addr = 0;
3556	inc	2, %o0			!	addr += 2;
3557	dec	2, %o1			!	len -= 2;
3558	btst	4, %o0			! }
35591:
3560	bz	1f			! if (addr & 4) {
3561	dec	8, %o1
3562	st	%g0, [%o0]		!	*(int *)addr = 0;
3563	inc	4, %o0			!	addr += 4;
3564	dec	4, %o1			!	len -= 4;
3565					! }
3566	/*
3567	 * Address is double word aligned; len is 8 less than
3568	 * the number of bytes remaining (i.e., len is 0 if
3569	 * the remaining count is 8, 1 if it is 9, etc.).
3570	 */
35711:
3572	std	%g0, [%o0]		! do {
35732:					!	*(quad *)addr = 0;
3574	inc	8, %o0			!	addr += 8;
3575	deccc	8, %o1			! } while ((len -= 8) >= 0);
3576	bge,a	2b
3577	std	%g0, [%o0]
3578
3579	/*
3580	 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
3581	 * -6 => two bytes, etc.  Mop up this remainder, if any.
3582	 */
3583	btst	4, %o1
3584	bz	1f			! if (len & 4) {
3585	btst	2, %o1
3586	st	%g0, [%o0]		!	*(int *)addr = 0;
3587	inc	4, %o0			!	addr += 4;
35881:
3589	bz	1f			! if (len & 2) {
3590	btst	1, %o1
3591	sth	%g0, [%o0]		!	*(short *)addr = 0;
3592	inc	2, %o0			!	addr += 2;
35931:
3594	bnz,a	1f			! if (len & 1)
3595	stb	%g0, [%o0]		!	*addr = 0;
35961:
3597	retl
3598	nop
3599
3600/*
3601 * kernel bcopy/memcpy
3602 * Assumes regions do not overlap; has no useful return value.
3603 *
3604 * Must not use %g7 (see copyin/copyout above).
3605 */
3606
3607#define	BCOPY_SMALL	32	/* if < 32, copy by bytes */
3608
3609ENTRY(memcpy)
3610	/*
3611	 * Swap args for bcopy.  Gcc generates calls to memcpy for
3612	 * structure assignments.
3613	 */
3614	mov	%o0, %o3
3615	mov	%o1, %o0
3616	mov	%o3, %o1
3617ENTRY(bcopy)
3618	cmp	%o2, BCOPY_SMALL
3619Lbcopy_start:
3620	bge,a	Lbcopy_fancy	! if >= this many, go be fancy.
3621	btst	7, %o0		! (part of being fancy)
3622
3623	/*
3624	 * Not much to copy, just do it a byte at a time.
3625	 */
3626	deccc	%o2		! while (--len >= 0)
3627	bl	1f
3628	EMPTY
36290:
3630	inc	%o0
3631	ldsb	[%o0 - 1], %o4	!	(++dst)[-1] = *src++;
3632	stb	%o4, [%o1]
3633	deccc	%o2
3634	bge	0b
3635	inc	%o1
36361:
3637	retl
3638	nop
3639	/* NOTREACHED */
3640
3641	/*
3642	 * Plenty of data to copy, so try to do it optimally.
3643	 */
3644Lbcopy_fancy:
3645	! check for common case first: everything lines up.
3646!	btst	7, %o0		! done already
3647	bne	1f
3648	EMPTY
3649	btst	7, %o1
3650	be,a	Lbcopy_doubles
3651	dec	8, %o2		! if all lined up, len -= 8, goto bcopy_doubes
3652
3653	! If the low bits match, we can make these line up.
36541:
3655	xor	%o0, %o1, %o3	! t = src ^ dst;
3656	btst	1, %o3		! if (t & 1) {
3657	be,a	1f
3658	btst	1, %o0		! [delay slot: if (src & 1)]
3659
3660	! low bits do not match, must copy by bytes.
36610:
3662	ldsb	[%o0], %o4	!	do {
3663	inc	%o0		!		(++dst)[-1] = *src++;
3664	inc	%o1
3665	deccc	%o2
3666	bnz	0b		!	} while (--len != 0);
3667	stb	%o4, [%o1 - 1]
3668	retl
3669	nop
3670	/* NOTREACHED */
3671
3672	! lowest bit matches, so we can copy by words, if nothing else
36731:
3674	be,a	1f		! if (src & 1) {
3675	btst	2, %o3		! [delay slot: if (t & 2)]
3676
3677	! although low bits match, both are 1: must copy 1 byte to align
3678	ldsb	[%o0], %o4	!	*dst++ = *src++;
3679	stb	%o4, [%o1]
3680	inc	%o0
3681	inc	%o1
3682	dec	%o2		!	len--;
3683	btst	2, %o3		! } [if (t & 2)]
36841:
3685	be,a	1f		! if (t & 2) {
3686	btst	2, %o0		! [delay slot: if (src & 2)]
3687	dec	2, %o2		!	len -= 2;
36880:
3689	ldsh	[%o0], %o4	!	do {
3690	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
3691	inc	2, %o0		!		dst += 2, src += 2;
3692	deccc	2, %o2		!	} while ((len -= 2) >= 0);
3693	bge	0b
3694	inc	2, %o1
3695	b	Lbcopy_mopb	!	goto mop_up_byte;
3696	btst	1, %o2		! } [delay slot: if (len & 1)]
3697	/* NOTREACHED */
3698
3699	! low two bits match, so we can copy by longwords
37001:
3701	be,a	1f		! if (src & 2) {
3702	btst	4, %o3		! [delay slot: if (t & 4)]
3703
3704	! although low 2 bits match, they are 10: must copy one short to align
3705	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
3706	sth	%o4, [%o1]
3707	inc	2, %o0		!	dst += 2;
3708	inc	2, %o1		!	src += 2;
3709	dec	2, %o2		!	len -= 2;
3710	btst	4, %o3		! } [if (t & 4)]
37111:
3712	be,a	1f		! if (t & 4) {
3713	btst	4, %o0		! [delay slot: if (src & 4)]
3714	dec	4, %o2		!	len -= 4;
37150:
3716	ld	[%o0], %o4	!	do {
3717	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
3718	inc	4, %o0		!		dst += 4, src += 4;
3719	deccc	4, %o2		!	} while ((len -= 4) >= 0);
3720	bge	0b
3721	inc	4, %o1
3722	b	Lbcopy_mopw	!	goto mop_up_word_and_byte;
3723	btst	2, %o2		! } [delay slot: if (len & 2)]
3724	/* NOTREACHED */
3725
3726	! low three bits match, so we can copy by doublewords
37271:
3728	be	1f		! if (src & 4) {
3729	dec	8, %o2		! [delay slot: len -= 8]
3730	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
3731	st	%o4, [%o1]
3732	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
3733	inc	4, %o1
3734	dec	4, %o2		! }
37351:
3736Lbcopy_doubles:
3737	ldd	[%o0], %o4	! do {
3738	std	%o4, [%o1]	!	*(double *)dst = *(double *)src;
3739	inc	8, %o0		!	dst += 8, src += 8;
3740	deccc	8, %o2		! } while ((len -= 8) >= 0);
3741	bge	Lbcopy_doubles
3742	inc	8, %o1
3743
3744	! check for a usual case again (save work)
3745	btst	7, %o2		! if ((len & 7) == 0)
3746	be	Lbcopy_done	!	goto bcopy_done;
3747
3748	btst	4, %o2		! if ((len & 4)) == 0)
3749	be,a	Lbcopy_mopw	!	goto mop_up_word_and_byte;
3750	btst	2, %o2		! [delay slot: if (len & 2)]
3751	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
3752	st	%o4, [%o1]
3753	inc	4, %o0		!	dst += 4;
3754	inc	4, %o1		!	src += 4;
3755	btst	2, %o2		! } [if (len & 2)]
3756
37571:
3758	! mop up trailing word (if present) and byte (if present).
3759Lbcopy_mopw:
3760	be	Lbcopy_mopb	! no word, go mop up byte
3761	btst	1, %o2		! [delay slot: if (len & 1)]
3762	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
3763	be	Lbcopy_done	! if ((len & 1) == 0) goto done;
3764	sth	%o4, [%o1]
3765	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
3766	retl
3767	stb	%o4, [%o1 + 2]
3768	/* NOTREACHED */
3769
3770	! mop up trailing byte (if present).
3771Lbcopy_mopb:
3772	bne,a	1f
3773	ldsb	[%o0], %o4
3774
3775Lbcopy_done:
3776	retl
3777	nop
3778
37791:
3780	retl
3781	stb	%o4,[%o1]
3782/*
3783 * ovbcopy(src, dst, len): like bcopy, but regions may overlap.
3784 */
3785ENTRY(ovbcopy)
3786	cmp	%o0, %o1	! src < dst?
3787	bgeu	Lbcopy_start	! no, go copy forwards as via bcopy
3788	cmp	%o2, BCOPY_SMALL! (check length for doublecopy first)
3789
3790	/*
3791	 * Since src comes before dst, and the regions might overlap,
3792	 * we have to do the copy starting at the end and working backwards.
3793	 */
3794	add	%o2, %o0, %o0	! src += len
3795	add	%o2, %o1, %o1	! dst += len
3796	bge,a	Lback_fancy	! if len >= BCOPY_SMALL, go be fancy
3797	btst	3, %o0
3798
3799	/*
3800	 * Not much to copy, just do it a byte at a time.
3801	 */
3802	deccc	%o2		! while (--len >= 0)
3803	bl	1f
3804	EMPTY
38050:
3806	dec	%o0		!	*--dst = *--src;
3807	ldsb	[%o0], %o4
3808	dec	%o1
3809	deccc	%o2
3810	bge	0b
3811	stb	%o4, [%o1]
38121:
3813	retl
3814	nop
3815
3816	/*
3817	 * Plenty to copy, try to be optimal.
3818	 * We only bother with word/halfword/byte copies here.
3819	 */
3820Lback_fancy:
3821!	btst	3, %o0		! done already
3822	bnz	1f		! if ((src & 3) == 0 &&
3823	btst	3, %o1		!     (dst & 3) == 0)
3824	bz,a	Lback_words	!	goto words;
3825	dec	4, %o2		! (done early for word copy)
3826
38271:
3828	/*
3829	 * See if the low bits match.
3830	 */
3831	xor	%o0, %o1, %o3	! t = src ^ dst;
3832	btst	1, %o3
3833	bz,a	3f		! if (t & 1) == 0, can do better
3834	btst	1, %o0
3835
3836	/*
3837	 * Nope; gotta do byte copy.
3838	 */
38392:
3840	dec	%o0		! do {
3841	ldsb	[%o0], %o4	!	*--dst = *--src;
3842	dec	%o1
3843	deccc	%o2		! } while (--len != 0);
3844	bnz	2b
3845	stb	%o4, [%o1]
3846	retl
3847	nop
3848
38493:
3850	/*
3851	 * Can do halfword or word copy, but might have to copy 1 byte first.
3852	 */
3853!	btst	1, %o0		! done earlier
3854	bz,a	4f		! if (src & 1) {	/* copy 1 byte */
3855	btst	2, %o3		! (done early)
3856	dec	%o0		!	*--dst = *--src;
3857	ldsb	[%o0], %o4
3858	dec	%o1
3859	stb	%o4, [%o1]
3860	dec	%o2		!	len--;
3861	btst	2, %o3		! }
3862
38634:
3864	/*
3865	 * See if we can do a word copy ((t&2) == 0).
3866	 */
3867!	btst	2, %o3		! done earlier
3868	bz,a	6f		! if (t & 2) == 0, can do word copy
3869	btst	2, %o0		! (src&2, done early)
3870
3871	/*
3872	 * Gotta do halfword copy.
3873	 */
3874	dec	2, %o2		! len -= 2;
38755:
3876	dec	2, %o0		! do {
3877	ldsh	[%o0], %o4	!	src -= 2;
3878	dec	2, %o1		!	dst -= 2;
3879	deccc	2, %o0		!	*(short *)dst = *(short *)src;
3880	bge	5b		! } while ((len -= 2) >= 0);
3881	sth	%o4, [%o1]
3882	b	Lback_mopb	! goto mop_up_byte;
3883	btst	1, %o2		! (len&1, done early)
3884
38856:
3886	/*
3887	 * We can do word copies, but we might have to copy
3888	 * one halfword first.
3889	 */
3890!	btst	2, %o0		! done already
3891	bz	7f		! if (src & 2) {
3892	dec	4, %o2		! (len -= 4, done early)
3893	dec	2, %o0		!	src -= 2, dst -= 2;
3894	ldsh	[%o0], %o4	!	*(short *)dst = *(short *)src;
3895	dec	2, %o1
3896	sth	%o4, [%o1]
3897	dec	2, %o2		!	len -= 2;
3898				! }
3899
39007:
3901Lback_words:
3902	/*
3903	 * Do word copies (backwards), then mop up trailing halfword
3904	 * and byte if any.
3905	 */
3906!	dec	4, %o2		! len -= 4, done already
39070:				! do {
3908	dec	4, %o0		!	src -= 4;
3909	dec	4, %o1		!	src -= 4;
3910	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
3911	deccc	4, %o2		! } while ((len -= 4) >= 0);
3912	bge	0b
3913	st	%o4, [%o1]
3914
3915	/*
3916	 * Check for trailing shortword.
3917	 */
3918	btst	2, %o2		! if (len & 2) {
3919	bz,a	1f
3920	btst	1, %o2		! (len&1, done early)
3921	dec	2, %o0		!	src -= 2, dst -= 2;
3922	ldsh	[%o0], %o4	!	*(short *)dst = *(short *)src;
3923	dec	2, %o1
3924	sth	%o4, [%o1]	! }
3925	btst	1, %o2
3926
3927	/*
3928	 * Check for trailing byte.
3929	 */
39301:
3931Lback_mopb:
3932!	btst	1, %o2		! (done already)
3933	bnz,a	1f		! if (len & 1) {
3934	ldsb	[%o0 - 1], %o4	!	b = src[-1];
3935	retl
3936	nop
39371:
3938	retl			!	dst[-1] = b;
3939	stb	%o4, [%o1 - 1]	! }
3940
3941
3942/*
3943 * savefpstate(f) struct fpstate *f;
3944 *
3945 * Store the current FPU state.  The first `st %fsr' may cause a trap;
3946 * our trap handler knows how to recover (by `returning' to savefpcont).
3947 */
3948ENTRY(savefpstate)
3949	rd	%psr, %o1		! enable FP before we begin
3950	set	PSR_EF, %o2
3951	or	%o1, %o2, %o1
3952	wr	%o1, 0, %psr
3953	/* do some setup work while we wait for PSR_EF to turn on */
3954	set	FSR_QNE, %o5		! QNE = 0x2000, too big for immediate
3955	clr	%o3			! qsize = 0;
3956	nop				! (still waiting for PSR_EF)
3957special_fp_store:
3958	st	%fsr, [%o0 + FS_FSR]	! f->fs_fsr = getfsr();
3959	/*
3960	 * Even if the preceding instruction did not trap, the queue
3961	 * is not necessarily empty: this state save might be happening
3962	 * because user code tried to store %fsr and took the FPU
3963	 * from `exception pending' mode to `exception' mode.
3964	 * So we still have to check the blasted QNE bit.
3965	 * With any luck it will usually not be set.
3966	 */
3967	ld	[%o0 + FS_FSR], %o4	! if (f->fs_fsr & QNE)
3968	btst	%o5, %o4
3969	bnz	Lfp_storeq		!	goto storeq;
3970	 std	%f0, [%o0 + FS_REGS + (4*0)]	! f->fs_f0 = etc;
3971Lfp_finish:
3972	st	%o3, [%o0 + FS_QSIZE]	! f->fs_qsize = qsize;
3973	std	%f2, [%o0 + FS_REGS + (4*2)]
3974	std	%f4, [%o0 + FS_REGS + (4*4)]
3975	std	%f6, [%o0 + FS_REGS + (4*6)]
3976	std	%f8, [%o0 + FS_REGS + (4*8)]
3977	std	%f10, [%o0 + FS_REGS + (4*10)]
3978	std	%f12, [%o0 + FS_REGS + (4*12)]
3979	std	%f14, [%o0 + FS_REGS + (4*14)]
3980	std	%f16, [%o0 + FS_REGS + (4*16)]
3981	std	%f18, [%o0 + FS_REGS + (4*18)]
3982	std	%f20, [%o0 + FS_REGS + (4*20)]
3983	std	%f22, [%o0 + FS_REGS + (4*22)]
3984	std	%f24, [%o0 + FS_REGS + (4*24)]
3985	std	%f26, [%o0 + FS_REGS + (4*26)]
3986	std	%f28, [%o0 + FS_REGS + (4*28)]
3987	retl
3988	 std	%f30, [%o0 + FS_REGS + (4*30)]
3989
3990/*
3991 * Store the (now known nonempty) FP queue.
3992 * We have to reread the fsr each time in order to get the new QNE bit.
3993 */
3994Lfp_storeq:
3995	add	%o0, FS_QUEUE, %o1	! q = &f->fs_queue[0];
39961:
3997	std	%fq, [%o1 + %o3]	! q[qsize++] = fsr_qfront();
3998	st	%fsr, [%o0 + FS_FSR]	! reread fsr
3999	ld	[%o0 + FS_FSR], %o4	! if fsr & QNE, loop
4000	btst	%o5, %o4
4001	bnz	1b
4002	 inc	8, %o3
4003	b	Lfp_finish		! set qsize and finish storing fregs
4004	 srl	%o3, 3, %o3		! (but first fix qsize)
4005
4006/*
4007 * The fsr store trapped.  Do it again; this time it will not trap.
4008 * We could just have the trap handler return to the `st %fsr', but
4009 * if for some reason it *does* trap, that would lock us into a tight
4010 * loop.  This way we panic instead.  Whoopee.
4011 */
4012savefpcont:
4013	b	special_fp_store + 4	! continue
4014	 st	%fsr, [%o0 + FS_FSR]	! but first finish the %fsr store
4015
4016/*
4017 * Load FPU state.
4018 */
4019ENTRY(loadfpstate)
4020	rd	%psr, %o1		! enable FP before we begin
4021	set	PSR_EF, %o2
4022	or	%o1, %o2, %o1
4023	wr	%o1, 0, %psr
4024	nop; nop; nop			! paranoia
4025	ldd	[%o0 + FS_REGS + (4*0)], %f0
4026	ldd	[%o0 + FS_REGS + (4*2)], %f2
4027	ldd	[%o0 + FS_REGS + (4*4)], %f4
4028	ldd	[%o0 + FS_REGS + (4*6)], %f6
4029	ldd	[%o0 + FS_REGS + (4*8)], %f8
4030	ldd	[%o0 + FS_REGS + (4*10)], %f10
4031	ldd	[%o0 + FS_REGS + (4*12)], %f12
4032	ldd	[%o0 + FS_REGS + (4*14)], %f14
4033	ldd	[%o0 + FS_REGS + (4*16)], %f16
4034	ldd	[%o0 + FS_REGS + (4*18)], %f18
4035	ldd	[%o0 + FS_REGS + (4*20)], %f20
4036	ldd	[%o0 + FS_REGS + (4*22)], %f22
4037	ldd	[%o0 + FS_REGS + (4*24)], %f24
4038	ldd	[%o0 + FS_REGS + (4*26)], %f26
4039	ldd	[%o0 + FS_REGS + (4*28)], %f28
4040	ldd	[%o0 + FS_REGS + (4*30)], %f30
4041	retl
4042	 ld	[%o0 + FS_FSR], %fsr	! setfsr(f->fs_fsr);
4043
4044/*
4045 * ienab_bis(bis) int bis;
4046 * ienab_bic(bic) int bic;
4047 *
4048 * Set and clear bits in the interrupt register.
4049 * Since there are no read-modify-write instructions for this,
4050 * and one of the interrupts is nonmaskable, we must disable traps.
4051 *
4052 * NB: ___main is defined here for gcc-2 idiocy.  Ignore it.
4053 */
4054ENTRY(ienab_bis)
4055	! %o0 = bits to set
4056	rd	%psr, %o2
4057	wr	%o2, PSR_ET, %psr	! disable traps
4058	nop; nop			! 3-instr delay until ET turns off
4059	sethi	%hi(IE_reg_addr), %o3
4060	ldub	[%o3 + %lo(IE_reg_addr)], %o4
4061	or	%o4, %o0, %o4		! *IE_reg_addr |= bis;
4062	stb	%o4, [%o3 + %lo(IE_reg_addr)]
4063	wr	%o2, 0, %psr		! reenable traps
4064	nop
4065	.globl	___main
4066___main:
4067	retl
4068	 nop
4069
4070ENTRY(ienab_bic)
4071	! %o0 = bits to clear
4072	rd	%psr, %o2
4073	wr	%o2, PSR_ET, %psr	! disable traps
4074	nop; nop
4075	sethi	%hi(IE_reg_addr), %o3
4076	ldub	[%o3 + %lo(IE_reg_addr)], %o4
4077	andn	%o4, %o0, %o4		! *IE_reg_addr &=~ bic;
4078	stb	%o4, [%o3 + %lo(IE_reg_addr)]
4079	wr	%o2, 0, %psr		! reenable traps
4080	nop
4081	retl
4082	 nop
4083
4084/*
4085 * ffs(), using table lookup.
4086 * The process switch code shares the table, so we just put the
4087 * whole thing here.
4088 */
4089ffstab:
4090	.byte	-24,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1 /* 00-0f */
4091	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 10-1f */
4092	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 20-2f */
4093	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 30-3f */
4094	.byte	7,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 40-4f */
4095	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 50-5f */
4096	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 60-6f */
4097	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 70-7f */
4098	.byte	8,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 80-8f */
4099	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 10-9f */
4100	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* a0-af */
4101	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* b0-bf */
4102	.byte	7,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* c0-cf */
4103	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* d0-df */
4104	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* e0-ef */
4105	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* f0-ff */
4106
4107/*
4108 * We use a table lookup on each byte.
4109 *
4110 * In each section below, %o1 is the current byte (0, 1, 2, or 3).
4111 * The last byte is handled specially: for the first three,
4112 * if that byte is nonzero, we return the table value
4113 * (plus 0, 8, or 16 for the byte number), but for the last
4114 * one, we just return the table value plus 24.  This means
4115 * that ffstab[0] must be -24 so that ffs(0) will return 0.
4116 */
4117ENTRY(ffs)
4118	set	ffstab, %o2
4119	andcc	%o0, 0xff, %o1	! get low byte
4120	bz,a	1f		! try again if 0
4121	srl	%o0, 8, %o0	! delay slot, get ready for next byte
4122
4123	retl			! return ffstab[%o1]
4124	ldsb	[%o2 + %o1], %o0
4125
41261:
4127	andcc	%o0, 0xff, %o1	! byte 1 like byte 0...
4128	bz,a	2f
4129	srl	%o0, 8, %o0	! (use delay to prepare for byte 2)
4130
4131	ldsb	[%o2 + %o1], %o0
4132	retl			! return ffstab[%o1] + 8
4133	add	%o0, 8, %o0
4134
41352:
4136	andcc	%o0, 0xff, %o1
4137	bz,a	3f
4138	srl	%o0, 8, %o0	! (prepare for byte 3)
4139
4140	ldsb	[%o2 + %o1], %o0
4141	retl			! return ffstab[%o1] + 16
4142	add	%o0, 16, %o0
4143
41443:				! just return ffstab[%o0] + 24
4145	ldsb	[%o2 + %o0], %o0
4146	retl
4147	add	%o0, 24, %o0
4148
4149/*
4150 * Here is a very good random number generator.  This implementation is
4151 * based on ``Two Fast Implementations of the "Minimal Standard" Random
4152 * Number Generator", David G. Carta, Communications of the ACM, Jan 1990,
4153 * Vol 33 No 1.
4154 */
4155	.data
4156randseed:
4157	.word	1
4158	.text
4159ENTRY(random)
4160	sethi	%hi(16807), %o1
4161	wr	%o1, %lo(16807), %y
4162	 sethi	%hi(randseed), %g1
4163	 ld	[%g1 + %lo(randseed)], %o0
4164	 andcc	%g0, 0, %o2
4165	mulscc  %o2, %o0, %o2
4166	mulscc  %o2, %o0, %o2
4167	mulscc  %o2, %o0, %o2
4168	mulscc  %o2, %o0, %o2
4169	mulscc  %o2, %o0, %o2
4170	mulscc  %o2, %o0, %o2
4171	mulscc  %o2, %o0, %o2
4172	mulscc  %o2, %o0, %o2
4173	mulscc  %o2, %o0, %o2
4174	mulscc  %o2, %o0, %o2
4175	mulscc  %o2, %o0, %o2
4176	mulscc  %o2, %o0, %o2
4177	mulscc  %o2, %o0, %o2
4178	mulscc  %o2, %o0, %o2
4179	mulscc  %o2, %o0, %o2
4180	mulscc  %o2, %g0, %o2
4181	rd	%y, %o3
4182	srl	%o2, 16, %o1
4183	set	0xffff, %o4
4184	and	%o4, %o2, %o0
4185	sll	%o0, 15, %o0
4186	srl	%o3, 17, %o3
4187	or	%o3, %o0, %o0
4188	addcc	%o0, %o1, %o0
4189	bl	1f
4190	 sethi	%hi(0x7fffffff), %o1
4191	retl
4192	 st	%o0, [%g1 + %lo(randseed)]
41931:
4194	or	%o1, %lo(0x7fffffff), %o1
4195	add	%o0, 1, %o0
4196	and	%o1, %o0, %o0
4197	retl
4198	 st	%o0, [%g1 + %lo(randseed)]
4199
4200/*
4201 * void microtime(struct timeval *tv)
4202 *
4203 * LBL's sparc bsd 'microtime': We don't need to spl (so this routine
4204 * can be a leaf routine) and we don't keep a 'last' timeval (there
4205 * can't be two calls to this routine in a microsecond).  This seems to
4206 * be about 20 times faster than the Sun code on an SS-2. - vj
4207 *
4208 * Read time values from slowest-changing to fastest-changing,
4209 * then re-read out to slowest.  If the values read before
4210 * the innermost match those read after, the innermost value
4211 * is consistent with the outer values.  If not, it may not
4212 * be and we must retry.  Typically this loop runs only once;
4213 * occasionally it runs twice, and only rarely does it run longer.
4214 */
4215ENTRY(microtime)
4216	sethi	%hi(_time), %g2
4217	sethi	%hi(TIMERREG_VA), %g3
42181:
4219	ldd	[%g2+%lo(_time)], %o2		! time.tv_sec & time.tv_usec
4220	ld	[%g3+%lo(TIMERREG_VA)], %o4	! usec counter
4221	ldd	[%g2+%lo(_time)], %g4		! see if time values changed
4222	cmp	%g4, %o2
4223	bne	1b				! if time.tv_sec changed
4224	 cmp	%g5, %o3
4225	bne	1b				! if time.tv_usec changed
4226	 tst	%o4
4227
4228	bpos	2f				! reached limit?
4229	 srl	%o4, TMR_SHIFT, %o4		! convert counter to usec
4230	sethi	%hi(_tick), %g4			! bump usec by 1 tick
4231	ld	[%g4+%lo(_tick)], %o1
4232	set	TMR_MASK, %g5
4233	add	%o1, %o3, %o3
4234	and	%o4, %g5, %o4
42352:
4236	add	%o4, %o3, %o3
4237	set	1000000, %g5			! normalize usec value
4238	cmp	%o3, %g5
4239	bl,a	3f
4240	 st	%o2, [%o0]			! (should be able to std here)
4241	add	%o2, 1, %o2			! overflow
4242	sub	%o3, %g5, %o3
4243	st	%o2, [%o0]			! (should be able to std here)
42443:
4245	retl
4246	 st	%o3, [%o0+4]
4247
4248/*
4249 * This procedure exists to make stdarg functions work correctly.
4250 * We write the caller's `in' registers into his caller's `arg dump'
4251 * area.  That arg-dump area immediately precedes the argument extension
4252 * area, resulting in a single contiguous block of memory.
4253 *
4254 * This is really the wrong way to do it: the arguments should be written
4255 * to storage local to the stdarg function, and the stdarg `pick up
4256 * the next argument' code should pick it up from whichever region is
4257 * `active' at that point.
4258 */
4259	.globl	___builtin_saveregs
4260___builtin_saveregs:
4261	! not profiled -- this should be done inline anyway
4262	! bleah! the arg dump area is unaligned!  cannot std w/o reg/reg moves
4263	st	%i0, [%fp + 0x44]	! fr->fr_argd[0]
4264	st	%i1, [%fp + 0x48]	! fr->fr_argd[1]
4265	st	%i2, [%fp + 0x4c]	! fr->fr_argd[2]
4266	st	%i3, [%fp + 0x50]	! fr->fr_argd[3]
4267	st	%i4, [%fp + 0x54]	! fr->fr_argd[4]
4268	retl
4269	 st	%i5, [%fp + 0x58]	! fr->fr_argd[5]
4270
4271#ifdef KGDB
4272/*
4273 * Write all windows (user or otherwise), except the current one.
4274 *
4275 * THIS COULD BE DONE IN USER CODE
4276 */
4277ENTRY(write_all_windows)
4278	/*
4279	 * g2 = g1 = nwindows - 1;
4280	 * while (--g1 > 0) save();
4281	 * while (--g2 > 0) restore();
4282	 */
4283	sethi	%hi(_nwindows), %g1
4284	ld	[%g1 + %lo(_nwindows)], %g1
4285	dec	%g1
4286	mov	%g1, %g2
4287
42881:	deccc	%g1
4289	bg,a	1b
4290	 save	%sp, -64, %sp
4291
42922:	deccc	%g2
4293	bg,a	2b
4294	 restore
4295
4296	retl
4297	nop
4298#endif /* KGDB */
4299
4300	.data
4301	.globl	_cold
4302_cold:
4303	.word	1		! cold start flag
4304
4305	.globl	_proc0paddr
4306_proc0paddr:
4307	.word	_u0		! KVA of proc0 uarea
4308
4309/* interrupt counters	XXX THESE BELONG ELSEWHERE (if anywhere) */
4310	.globl	_intrcnt, _eintrcnt, _intrnames, _eintrnames
4311_intrnames:
4312	.asciz	"spur"
4313	.asciz	"lev1"
4314	.asciz	"lev2"
4315	.asciz	"lev3"
4316	.asciz	"lev4"
4317	.asciz	"lev5"
4318	.asciz	"lev6"
4319	.asciz	"lev7"
4320	.asciz  "lev8"
4321	.asciz	"lev9"
4322	.asciz	"clock"
4323	.asciz	"lev11"
4324	.asciz	"lev12"
4325	.asciz	"lev13"
4326	.asciz	"prof"
4327_eintrnames:
4328	ALIGN
4329_intrcnt:
4330	.skip	4*15
4331_eintrcnt:
4332
4333	.comm	_nwindows, 4
4334	.comm	_promvec, 4
4335	.comm	_curproc, 4
4336	.comm	_qs, 32 * 8
4337	.comm	_whichqs, 4
4338