xref: /original-bsd/sys/sparc/sparc/locore.s (revision 95ecee29)
1/*
2 * Copyright (c) 1992, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This software was developed by the Computer Systems Engineering group
6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7 * contributed to Berkeley.
8 *
9 * All advertising materials mentioning features or use of this software
10 * must display the following acknowledgement:
11 *	This product includes software developed by the University of
12 *	California, Lawrence Berkeley Laboratory.
13 *
14 * %sccs.include.redist.c%
15 *
16 *	@(#)locore.s	8.4 (Berkeley) 12/10/93
17 *
18 * from: $Header: locore.s,v 1.51 93/04/21 06:19:37 torek Exp $
19 */
20
21#define	LOCORE
22#include "assym.s"
23#include <sparc/sparc/intreg.h>
24#include <sparc/sparc/timerreg.h>
25#ifdef notyet
26#include <sparc/sparc/vaddrs.h>
27#include <sparc/dev/zsreg.h>
28#endif
29#include <machine/ctlreg.h>
30#include <machine/psl.h>
31#include <machine/signal.h>
32#include <machine/trap.h>
33
34/*
35 * GNU assembler does not understand `.empty' directive; Sun assembler
36 * gripes about labels without it.  To allow cross-compilation using
37 * the Sun assembler, and because .empty directives are useful documentation,
38 * we use this trick.
39 */
40#ifdef SUN_AS
41#define	EMPTY	.empty
42#else
43#define	EMPTY	/* .empty */
44#endif
45
46/* use as needed to align things on longword boundaries */
47#define	ALIGN	.align 4
48
49/*
50 * CCFSZ (C Compiler Frame SiZe) is the size of a stack frame required if
51 * a function is to call C code.  It should be just 64, but Sun defined
52 * their frame with space to hold arguments 0 through 5 (plus some junk),
53 * and varargs routines (such as printf) demand this, and gcc uses this
54 * area at times anyway.
55 */
56#define	CCFSZ	96
57
58/*
59 * A handy macro for maintaining instrumentation counters.
60 * Note that this clobbers %o0 and %o1.  Normal usage is
61 * something like:
62 *	foointr:
63 *		TRAP_SETUP(...)		! makes %o registers safe
64 *		INCR(_cnt+V_FOO)	! count a foo
65 */
66#define INCR(what) \
67	sethi	%hi(what), %o0; \
68	ld	[%o0 + %lo(what)], %o1; \
69	inc	%o1; \
70	st	%o1, [%o0 + %lo(what)]
71
72/*
73 * Another handy macro: load one register window, given `base' address.
74 * This can be either a simple register (e.g., %sp) or include an initial
75 * offset (e.g., %g6 + PCB_RW).
76 */
77#define	LOADWIN(addr) \
78	ldd	[addr], %l0; \
79	ldd	[addr + 8], %l2; \
80	ldd	[addr + 16], %l4; \
81	ldd	[addr + 24], %l6; \
82	ldd	[addr + 32], %i0; \
83	ldd	[addr + 40], %i2; \
84	ldd	[addr + 48], %i4; \
85	ldd	[addr + 56], %i6
86
87/*
88 * To return from trap we need the two-instruction sequence
89 * `jmp %l1; rett %l2', which is defined here for convenience.
90 */
91#define	RETT	jmp %l1; rett %l2
92
93	.data
94/*
95 * The interrupt stack.
96 *
97 * This is the very first thing in the data segment, and therefore has
98 * the lowest kernel stack address.  We count on this in the interrupt
99 * trap-frame setup code, since we may need to switch from the kernel
100 * stack to the interrupt stack (iff we are not already on the interrupt
101 * stack).  One sethi+cmp is all we need since this is so carefully
102 * arranged.
103 */
104	.globl	_intstack
105	.globl	_eintstack
106_intstack:
107	.skip	4 * NBPG		! 16k = 128 128-byte stack frames
108_eintstack:
109
110/*
111 * When a process exits and its u. area goes away, we set cpcb to point
112 * to this `u.', leaving us with something to use for an interrupt stack,
113 * and letting all the register save code have a pcb_uw to examine.
114 * This is also carefully arranged (to come just before u0, so that
115 * process 0's kernel stack can quietly overrun into it during bootup, if
116 * we feel like doing that).
117 */
118	.globl	_idle_u
119_idle_u:
120	.skip	UPAGES * NBPG
121
122/*
123 * Process 0's u.
124 *
125 * This must be aligned on an 8 byte boundary.
126 */
127	.globl	_u0
128_u0:	.skip	UPAGES * NBPG
129estack0:
130
131#ifdef KGDB
132/*
133 * Another item that must be aligned, easiest to put it here.
134 */
135KGDB_STACK_SIZE = 2048
136	.globl	_kgdb_stack
137_kgdb_stack:
138	.skip	KGDB_STACK_SIZE		! hope this is enough
139#endif
140
141/*
142 * _cpcb points to the current pcb (and hence u. area).
143 * Initially this is the special one.
144 */
145	.globl	_cpcb
146_cpcb:	.word	_u0
147
148	.text
149
150/*
151 * The first thing in the real text segment is the trap vector table,
152 * which must be aligned on a 4096 byte boundary.  The text segment
153 * starts beyond page 0 of KERNBASE so that there is a red zone
154 * between user and kernel space.  Since the boot ROM loads us at
155 * 0x4000, it is far easier to start at KERNBASE+0x4000 than to
156 * buck the trend.  This is four pages in; we can stuff something
157 * into the three pages left beneath us later ... like, oh, say, the
158 * message buffer (1 page).
159 */
160	.globl	_msgbuf
161msgbufsize = NBPG			! 1 page for msg buffer
162_msgbuf	= KERNBASE + NBPG
163
164/*
165 * The remaining two physical pages are currently unused.  We need to
166 * map the interrupt enable register very early on in the boot process,
167 * so that we can handle NMIs (parity errors) halfway sensibly during
168 * boot.  We use virtual address f8002000 (`page 2') for this, wasting
169 * 4096 bytes of physical memory.
170 */
171IE_reg_addr = _msgbuf + msgbufsize	! this page not used; points to IEreg
172
173/*
174 * Each trap has room for four instructions, of which one perforce must
175 * be a branch.  On entry the hardware has copied pc and npc to %l1 and
176 * %l2 respectively.  We use two more to read the psr into %l0, and to
177 * put the trap type value into %l3 (with a few exceptions below).
178 * We could read the trap type field of %tbr later in the code instead,
179 * but there is no need, and that would require more instructions
180 * (read+mask, vs 1 `mov' here).
181 *
182 * I used to generate these numbers by address arithmetic, but gas's
183 * expression evaluator has about as much sense as your average slug
184 * (oddly enough, the code looks about as slimy too).  Thus, all the
185 * trap numbers are given as arguments to the trap macros.  This means
186 * there is one line per trap.  Sigh.
187 *
188 * Note that only the local registers may be used, since the trap
189 * window is potentially the last window.  Its `in' registers are
190 * the previous window's outs (as usual), but more important, its
191 * `out' registers may be in use as the `topmost' window's `in' registers.
192 * The global registers are of course verboten (well, until we save
193 * them away).
194 *
195 * Hardware interrupt vectors can be `linked'---the linkage is to regular
196 * C code---or rewired to fast in-window handlers.  The latter are good
197 * for unbuffered hardware like the Zilog serial chip and the AMD audio
198 * chip, where many interrupts can be handled trivially with pseudo-DMA or
199 * similar.  Only one `fast' interrupt can be used per level, however, and
200 * direct and `fast' interrupts are incompatible.  Routines in intr.c
201 * handle setting these, with optional paranoia.
202 */
203
204	/* regular vectored traps */
205#define	VTRAP(type, label) \
206	mov (type), %l3; b label; mov %psr, %l0; nop
207
208	/* hardware interrupts (can be linked or made `fast') */
209#define	HARDINT(lev) \
210	mov (lev), %l3; b _sparc_interrupt; mov %psr, %l0; nop
211
212	/* software interrupts (may not be made direct, sorry---but you
213	   should not be using them trivially anyway) */
214#define	SOFTINT(lev, bit) \
215	mov (lev), %l3; mov (bit), %l4; b softintr; mov %psr, %l0
216
217	/* traps that just call trap() */
218#define	TRAP(type)	VTRAP(type, slowtrap)
219
220	/* architecturally undefined traps (cause panic) */
221#define	UTRAP(type)	VTRAP(type, slowtrap)
222
223	/* software undefined traps (may be replaced) */
224#define	STRAP(type)	VTRAP(type, slowtrap)
225
226/* breakpoint acts differently under kgdb */
227#ifdef KGDB
228#define	BPT		VTRAP(T_BREAKPOINT, bpt)
229#define	BPT_KGDB_EXEC	VTRAP(T_KGDB_EXEC, bpt)
230#else
231#define	BPT		TRAP(T_BREAKPOINT)
232#define	BPT_KGDB_EXEC	TRAP(T_KGDB_EXEC)
233#endif
234
235/* special high-speed 1-instruction-shaved-off traps (get nothing in %l3) */
236#ifdef COMPAT_SUNOS
237#define	SUN_SYSCALL	b sun_syscall; mov %psr, %l0; nop; nop
238#else
239#define	SUN_SYSCALL	TRAP(T_SUN_SYSCALL)
240#endif
241#define	SYSCALL		b syscall; mov %psr, %l0; nop; nop
242#define	WINDOW_OF	b window_of; mov %psr, %l0; nop; nop
243#define	WINDOW_UF	b window_uf; mov %psr, %l0; nop; nop
244#ifdef notyet
245#define	ZS_INTERRUPT	b zshard; mov %psr, %l0; nop; nop
246#else
247#define	ZS_INTERRUPT	HARDINT(12)
248#endif
249
250	.globl	start
251	.globl	_trapbase
252start:
253_trapbase:
254/* trap 0 is special since we cannot receive it */
255	b dostart; nop; nop; nop	! 00 = reset (fake)
256	VTRAP(T_TEXTFAULT, memfault)	! 01 = instr. fetch fault
257	TRAP(T_ILLINST)			! 02 = illegal instruction
258	TRAP(T_PRIVINST)		! 03 = privileged instruction
259	TRAP(T_FPDISABLED)		! 04 = fp instr, but EF bit off in psr
260	WINDOW_OF			! 05 = window overflow
261	WINDOW_UF			! 06 = window underflow
262	TRAP(T_ALIGN)			! 07 = address alignment error
263	VTRAP(T_FPE, fp_exception)	! 08 = fp exception
264	VTRAP(T_DATAFAULT, memfault)	! 09 = data fetch fault
265	TRAP(T_TAGOF)			! 0a = tag overflow
266	UTRAP(0x0b)
267	UTRAP(0x0c)
268	UTRAP(0x0d)
269	UTRAP(0x0e)
270	UTRAP(0x0f)
271	UTRAP(0x10)
272	SOFTINT(1, IE_L1)		! 11 = level 1 interrupt
273	HARDINT(2)			! 12 = level 2 interrupt
274	HARDINT(3)			! 13 = level 3 interrupt
275	SOFTINT(4, IE_L4)		! 14 = level 4 interrupt
276	HARDINT(5)			! 15 = level 5 interrupt
277	SOFTINT(6, IE_L6)		! 16 = level 6 interrupt
278	HARDINT(7)			! 17 = level 7 interrupt
279	HARDINT(8)			! 18 = level 8 interrupt
280	HARDINT(9)			! 19 = level 9 interrupt
281	HARDINT(10)			! 1a = level 10 interrupt
282	HARDINT(11)			! 1b = level 11 interrupt
283	ZS_INTERRUPT			! 1c = level 12 (zs) interrupt
284	HARDINT(13)			! 1d = level 13 interrupt
285	HARDINT(14)			! 1e = level 14 interrupt
286	VTRAP(15, nmi)			! 1f = nonmaskable interrupt
287	UTRAP(0x20)
288	UTRAP(0x21)
289	UTRAP(0x22)
290	UTRAP(0x23)
291	UTRAP(0x24)
292	UTRAP(0x25)
293	UTRAP(0x26)
294	UTRAP(0x27)
295	UTRAP(0x28)
296	UTRAP(0x29)
297	UTRAP(0x2a)
298	UTRAP(0x2b)
299	UTRAP(0x2c)
300	UTRAP(0x2d)
301	UTRAP(0x2e)
302	UTRAP(0x2f)
303	UTRAP(0x30)
304	UTRAP(0x31)
305	UTRAP(0x32)
306	UTRAP(0x33)
307	UTRAP(0x34)
308	UTRAP(0x35)
309	TRAP(T_CPDISABLED)	! 36 = coprocessor instr, EC bit off in psr
310	UTRAP(0x37)
311	UTRAP(0x38)
312	UTRAP(0x39)
313	UTRAP(0x3a)
314	UTRAP(0x3b)
315	UTRAP(0x3c)
316	UTRAP(0x3d)
317	UTRAP(0x3e)
318	UTRAP(0x3f)
319	TRAP(T_CPEXCEPTION)	! 40 = coprocessor exception
320	UTRAP(0x41)
321	UTRAP(0x42)
322	UTRAP(0x43)
323	UTRAP(0x44)
324	UTRAP(0x45)
325	UTRAP(0x46)
326	UTRAP(0x47)
327	UTRAP(0x48)
328	UTRAP(0x49)
329	UTRAP(0x4a)
330	UTRAP(0x4b)
331	UTRAP(0x4c)
332	UTRAP(0x4d)
333	UTRAP(0x4e)
334	UTRAP(0x4f)
335	UTRAP(0x50)
336	UTRAP(0x51)
337	UTRAP(0x52)
338	UTRAP(0x53)
339	UTRAP(0x54)
340	UTRAP(0x55)
341	UTRAP(0x56)
342	UTRAP(0x57)
343	UTRAP(0x58)
344	UTRAP(0x59)
345	UTRAP(0x5a)
346	UTRAP(0x5b)
347	UTRAP(0x5c)
348	UTRAP(0x5d)
349	UTRAP(0x5e)
350	UTRAP(0x5f)
351	UTRAP(0x60)
352	UTRAP(0x61)
353	UTRAP(0x62)
354	UTRAP(0x63)
355	UTRAP(0x64)
356	UTRAP(0x65)
357	UTRAP(0x66)
358	UTRAP(0x67)
359	UTRAP(0x68)
360	UTRAP(0x69)
361	UTRAP(0x6a)
362	UTRAP(0x6b)
363	UTRAP(0x6c)
364	UTRAP(0x6d)
365	UTRAP(0x6e)
366	UTRAP(0x6f)
367	UTRAP(0x70)
368	UTRAP(0x71)
369	UTRAP(0x72)
370	UTRAP(0x73)
371	UTRAP(0x74)
372	UTRAP(0x75)
373	UTRAP(0x76)
374	UTRAP(0x77)
375	UTRAP(0x78)
376	UTRAP(0x79)
377	UTRAP(0x7a)
378	UTRAP(0x7b)
379	UTRAP(0x7c)
380	UTRAP(0x7d)
381	UTRAP(0x7e)
382	UTRAP(0x7f)
383	SUN_SYSCALL		! 80 = sun syscall
384	BPT			! 81 = pseudo breakpoint instruction
385	TRAP(T_DIV0)		! 82 = divide by zero
386	TRAP(T_FLUSHWIN)	! 83 = flush windows
387	TRAP(T_CLEANWIN)	! 84 = provide clean windows
388	TRAP(T_RANGECHECK)	! 85 = ???
389	TRAP(T_FIXALIGN)	! 86 = fix up unaligned accesses
390	TRAP(T_INTOF)		! 87 = integer overflow
391	BPT_KGDB_EXEC		! 88 = enter kernel gdb on kernel startup
392	SYSCALL			! 89 = bsd syscall
393	STRAP(0x8a)
394	STRAP(0x8b)
395	STRAP(0x8c)
396	STRAP(0x8d)
397	STRAP(0x8e)
398	STRAP(0x8f)
399	STRAP(0x90)
400	STRAP(0x91)
401	STRAP(0x92)
402	STRAP(0x93)
403	STRAP(0x94)
404	STRAP(0x95)
405	STRAP(0x96)
406	STRAP(0x97)
407	STRAP(0x98)
408	STRAP(0x99)
409	STRAP(0x9a)
410	STRAP(0x9b)
411	STRAP(0x9c)
412	STRAP(0x9d)
413	STRAP(0x9e)
414	STRAP(0x9f)
415	STRAP(0xa0)
416	STRAP(0xa1)
417	STRAP(0xa2)
418	STRAP(0xa3)
419	STRAP(0xa4)
420	STRAP(0xa5)
421	STRAP(0xa6)
422	STRAP(0xa7)
423	STRAP(0xa8)
424	STRAP(0xa9)
425	STRAP(0xaa)
426	STRAP(0xab)
427	STRAP(0xac)
428	STRAP(0xad)
429	STRAP(0xae)
430	STRAP(0xaf)
431	STRAP(0xb0)
432	STRAP(0xb1)
433	STRAP(0xb2)
434	STRAP(0xb3)
435	STRAP(0xb4)
436	STRAP(0xb5)
437	STRAP(0xb6)
438	STRAP(0xb7)
439	STRAP(0xb8)
440	STRAP(0xb9)
441	STRAP(0xba)
442	STRAP(0xbb)
443	STRAP(0xbc)
444	STRAP(0xbd)
445	STRAP(0xbe)
446	STRAP(0xbf)
447	STRAP(0xc0)
448	STRAP(0xc1)
449	STRAP(0xc2)
450	STRAP(0xc3)
451	STRAP(0xc4)
452	STRAP(0xc5)
453	STRAP(0xc6)
454	STRAP(0xc7)
455	STRAP(0xc8)
456	STRAP(0xc9)
457	STRAP(0xca)
458	STRAP(0xcb)
459	STRAP(0xcc)
460	STRAP(0xcd)
461	STRAP(0xce)
462	STRAP(0xcf)
463	STRAP(0xd0)
464	STRAP(0xd1)
465	STRAP(0xd2)
466	STRAP(0xd3)
467	STRAP(0xd4)
468	STRAP(0xd5)
469	STRAP(0xd6)
470	STRAP(0xd7)
471	STRAP(0xd8)
472	STRAP(0xd9)
473	STRAP(0xda)
474	STRAP(0xdb)
475	STRAP(0xdc)
476	STRAP(0xdd)
477	STRAP(0xde)
478	STRAP(0xdf)
479	STRAP(0xe0)
480	STRAP(0xe1)
481	STRAP(0xe2)
482	STRAP(0xe3)
483	STRAP(0xe4)
484	STRAP(0xe5)
485	STRAP(0xe6)
486	STRAP(0xe7)
487	STRAP(0xe8)
488	STRAP(0xe9)
489	STRAP(0xea)
490	STRAP(0xeb)
491	STRAP(0xec)
492	STRAP(0xed)
493	STRAP(0xee)
494	STRAP(0xef)
495	STRAP(0xf0)
496	STRAP(0xf1)
497	STRAP(0xf2)
498	STRAP(0xf3)
499	STRAP(0xf4)
500	STRAP(0xf5)
501	STRAP(0xf6)
502	STRAP(0xf7)
503	STRAP(0xf8)
504	STRAP(0xf9)
505	STRAP(0xfa)
506	STRAP(0xfb)
507	STRAP(0xfc)
508	STRAP(0xfd)
509	STRAP(0xfe)
510	STRAP(0xff)
511
512	/* the message buffer is always mapped */
513_msgbufmapped:
514	.word	1
515
516#ifdef DEBUG
517/*
518 * A hardware red zone is impossible.  We simulate one in software by
519 * keeping a `red zone' pointer; if %sp becomes less than this, we panic.
520 * This is expensive and is only enabled when debugging.
521 */
522#define	REDSIZE	(8*96)		/* some room for bouncing */
523#define	REDSTACK 2048		/* size of `panic: stack overflow' region */
524	.data
525_redzone:
526	.word	_idle_u + REDSIZE
527_redstack:
528	.skip	REDSTACK
529	.text
530Lpanic_red:
531	.asciz	"stack overflow"
532	ALIGN
533
534	/* set stack pointer redzone to base+minstack; alters base */
535#define	SET_SP_REDZONE(base, tmp) \
536	add	base, REDSIZE, base; \
537	sethi	%hi(_redzone), tmp; \
538	st	base, [tmp + %lo(_redzone)]
539
540	/* variant with a constant */
541#define	SET_SP_REDZONE_CONST(const, tmp1, tmp2) \
542	set	(const) + REDSIZE, tmp1; \
543	sethi	%hi(_redzone), tmp2; \
544	st	tmp1, [tmp2 + %lo(_redzone)]
545
546	/* check stack pointer against redzone (uses two temps) */
547#define	CHECK_SP_REDZONE(t1, t2) \
548	sethi	%hi(_redzone), t1; \
549	ld	[t1 + %lo(_redzone)], t2; \
550	cmp	%sp, t2;	/* if sp >= t2, not in red zone */ \
551	bgeu	7f; nop;	/* and can continue normally */ \
552	/* move to panic stack */ \
553	st	%g0, [t1 + %lo(_redzone)]; \
554	set	_redstack + REDSTACK - 96, %sp; \
555	/* prevent panic() from lowering ipl */ \
556	sethi	%hi(_panicstr), t2; \
557	set	Lpanic_red, t2; \
558	st	t2, [t1 + %lo(_panicstr)]; \
559	rd	%psr, t1;		/* t1 = splhigh() */ \
560	or	t1, PSR_PIL, t2; \
561	wr	t2, 0, %psr; \
562	wr	t2, PSR_ET, %psr;	/* turn on traps */ \
563	nop; nop; nop; \
564	save	%sp, -96, %sp;		/* preserve current window */ \
565	sethi	%hi(Lpanic_red), %o0; \
566	call	_panic; or %o0, %lo(Lpanic_red), %o0; \
5677:
568
569#else
570
571#define	SET_SP_REDZONE(base, tmp)
572#define	SET_SP_REDZONE_CONST(const, t1, t2)
573#define	CHECK_SP_REDZONE(t1, t2)
574#endif
575
576/*
577 * The window code must verify user stack addresses before using them.
578 * A user stack pointer is invalid if:
579 *	- it is not on an 8 byte boundary;
580 *	- its pages (a register window, being 64 bytes, can occupy
581 *	  two pages) are not readable or writable.
582 * We define three separate macros here for testing user stack addresses.
583 *
584 * PTE_OF_ADDR locates a PTE, branching to a `bad address'
585 *	handler if the stack pointer points into the hole in the
586 *	address space (i.e., top 3 bits are not either all 1 or all 0);
587 * CMP_PTE_USER_READ compares the located PTE against `user read' mode;
588 * CMP_PTE_USER_WRITE compares the located PTE against `user write' mode.
589 * The compares give `equal' if read or write is OK.
590 *
591 * Note that the user stack pointer usually points into high addresses
592 * (top 3 bits all 1), so that is what we check first.
593 *
594 * The code below also assumes that PTE_OF_ADDR is safe in a delay
595 * slot; it is, at it merely sets its `pte' register to a temporary value.
596 */
597	/* input: addr, output: pte; aux: bad address label */
598#define	PTE_OF_ADDR(addr, pte, bad) \
599	sra	addr, PG_VSHIFT, pte; \
600	cmp	pte, -1; \
601	be,a	1f; andn addr, 4095, pte; \
602	tst	pte; \
603	bne	bad; EMPTY; \
604	andn	addr, 4095, pte; \
6051:
606
607	/* input: pte; output: condition codes */
608#define	CMP_PTE_USER_READ(pte) \
609	lda	[pte] ASI_PTE, pte; \
610	srl	pte, PG_PROTSHIFT, pte; \
611	andn	pte, (PG_W >> PG_PROTSHIFT), pte; \
612	cmp	pte, PG_PROTUREAD
613
614	/* input: pte; output: condition codes */
615#define	CMP_PTE_USER_WRITE(pte) \
616	lda	[pte] ASI_PTE, pte; \
617	srl	pte, PG_PROTSHIFT, pte; \
618	cmp	pte, PG_PROTUWRITE
619
620/*
621 * The calculations in PTE_OF_ADDR and CMP_PTE_USER_* are rather slow:
622 * in particular, according to Gordon Irlam of the University of Adelaide
623 * in Australia, these consume at least 18 cycles on an SS1 and 37 on an
624 * SS2.  Hence, we try to avoid them in the common case.
625 *
626 * A chunk of 64 bytes is on a single page if and only if:
627 *
628 *	((base + 64 - 1) & ~4095) == (base & ~4095)
629 *
630 * Equivalently (and faster to test), the low order bits (base & 4095) must
631 * be small enough so that the sum (base + 63) does not carry out into the
632 * upper page-address bits, i.e.,
633 *
634 *	(base & 4095) < (4096 - 63)
635 *
636 * so we allow testing that here.  This macro is also assumed to be safe
637 * in a delay slot (modulo overwriting its temporary).
638 */
639#define	SLT_IF_1PAGE_RW(addr, tmp) \
640	and	addr, 4095, tmp; \
641	cmp	tmp, (4096 - 63)
642
643/*
644 * Every trap that enables traps must set up stack space.
645 * If the trap is from user mode, this involves switching to the kernel
646 * stack for the current process, and we must also set cpcb->pcb_uw
647 * so that the window overflow handler can tell user windows from kernel
648 * windows.
649 *
650 * The number of user windows is:
651 *
652 *	cpcb->pcb_uw = (cpcb->pcb_wim - 1 - CWP) % nwindows
653 *
654 * (where pcb_wim = log2(current %wim) and CWP = low 5 bits of %psr).
655 * We compute this expression by table lookup in uwtab[CWP - pcb_wim],
656 * which has been set up as:
657 *
658 *	for i in [-nwin+1 .. nwin-1]
659 *		uwtab[i] = (nwin - 1 - i) % nwin;
660 *
661 * (If you do not believe this works, try it for yourself.)
662 *
663 * We also keep one or two more tables:
664 *
665 *	for i in 0..nwin-1
666 *		wmask[i] = 1 << ((i + 1) % nwindows);
667 *
668 * wmask[CWP] tells whether a `rett' would return into the invalid window.
669 */
670	.data
671	.skip	32			! alignment byte & negative indicies
672uwtab:	.skip	32			! u_char uwtab[-31..31];
673wmask:	.skip	32			! u_char wmask[0..31];
674
675	.text
676/*
677 * Things begin to grow uglier....
678 *
679 * Each trap handler may (always) be running in the trap window.
680 * If this is the case, it cannot enable further traps until it writes
681 * the register windows into the stack (or, if the stack is no good,
682 * the current pcb).
683 *
684 * ASSUMPTIONS: TRAP_SETUP() is called with:
685 *	%l0 = %psr
686 *	%l1 = return pc
687 *	%l2 = return npc
688 *	%l3 = (some value that must not be altered)
689 * which means we have 4 registers to work with.
690 *
691 * The `stackspace' argument is the number of stack bytes to allocate
692 * for register-saving, and must be at least -64 (and typically more,
693 * for global registers and %y).
694 *
695 * Trapframes should use -CCFSZ-80.  (80 = sizeof(struct trapframe);
696 * see trap.h.  This basically means EVERYONE.  Interrupt frames could
697 * get away with less, but currently do not.)
698 *
699 * The basic outline here is:
700 *
701 *	if (trap came from kernel mode) {
702 *		if (we are in the trap window)
703 *			save it away;
704 *		%sp = %fp - stackspace;
705 *	} else {
706 *		compute the number of user windows;
707 *		if (we are in the trap window)
708 *			save it away;
709 *		%sp = (top of kernel stack) - stackspace;
710 *	}
711 *
712 * Again, the number of user windows is:
713 *
714 *	cpcb->pcb_uw = (cpcb->pcb_wim - 1 - CWP) % nwindows
715 *
716 * (where pcb_wim = log2(current %wim) and CWP is the low 5 bits of %psr),
717 * and this is computed as `uwtab[CWP - pcb_wim]'.
718 *
719 * NOTE: if you change this code, you will have to look carefully
720 * at the window overflow and underflow handlers and make sure they
721 * have similar changes made as needed.
722 */
723#define	CALL_CLEAN_TRAP_WINDOW \
724	sethi	%hi(clean_trap_window), %l7; \
725	jmpl	%l7 + %lo(clean_trap_window), %l4; \
726	 mov	%g7, %l7	/* save %g7 in %l7 for clean_trap_window */
727
728#define	TRAP_SETUP(stackspace) \
729	rd	%wim, %l4; \
730	mov	1, %l5; \
731	sll	%l5, %l0, %l5; \
732	btst	PSR_PS, %l0; \
733	bz	1f; \
734	 btst	%l5, %l4; \
735	/* came from kernel mode; cond codes indicate trap window */ \
736	bz,a	3f; \
737	 add	%fp, stackspace, %sp;	/* want to just set %sp */ \
738	CALL_CLEAN_TRAP_WINDOW;		/* but maybe need to clean first */ \
739	b	3f; \
740	 add	%fp, stackspace, %sp; \
7411: \
742	/* came from user mode: compute pcb_nw */ \
743	sethi	%hi(_cpcb), %l6; \
744	ld	[%l6 + %lo(_cpcb)], %l6; \
745	ld	[%l6 + PCB_WIM], %l5; \
746	and	%l0, 31, %l4; \
747	sub	%l4, %l5, %l5; \
748	set	uwtab, %l4; \
749	ldub	[%l4 + %l5], %l5; \
750	st	%l5, [%l6 + PCB_UW]; \
751	/* cond codes still indicate whether in trap window */ \
752	bz,a	2f; \
753	 sethi	%hi(UPAGES*NBPG+(stackspace)), %l5; \
754	/* yes, in trap window; must clean it */ \
755	CALL_CLEAN_TRAP_WINDOW; \
756	sethi	%hi(_cpcb), %l6; \
757	ld	[%l6 + %lo(_cpcb)], %l6; \
758	sethi	%hi(UPAGES*NBPG+(stackspace)), %l5; \
7592: \
760	/* trap window is (now) clean: set %sp */ \
761	or	%l5, %lo(UPAGES*NBPG+(stackspace)), %l5; \
762	add	%l6, %l5, %sp; \
763	SET_SP_REDZONE(%l6, %l5); \
7643: \
765	CHECK_SP_REDZONE(%l6, %l5)
766
767/*
768 * Interrupt setup is almost exactly like trap setup, but we need to
769 * go to the interrupt stack if (a) we came from user mode or (b) we
770 * came from kernel mode on the kernel stack.
771 */
772#define	INTR_SETUP(stackspace) \
773	rd	%wim, %l4; \
774	mov	1, %l5; \
775	sll	%l5, %l0, %l5; \
776	btst	PSR_PS, %l0; \
777	bz	1f; \
778	 btst	%l5, %l4; \
779	/* came from kernel mode; cond codes still indicate trap window */ \
780	bz,a	0f; \
781	 sethi	%hi(_eintstack), %l7; \
782	CALL_CLEAN_TRAP_WINDOW; \
783	sethi	%hi(_eintstack), %l7; \
7840:	/* now if %fp >= eintstack, we were on the kernel stack */ \
785	cmp	%fp, %l7; \
786	bge,a	3f; \
787	 add	%l7, stackspace, %sp;	/* so switch to intstack */ \
788	b	4f; \
789	 add	%fp, stackspace, %sp;	/* else stay on intstack */ \
7901: \
791	/* came from user mode: compute pcb_nw */ \
792	sethi	%hi(_cpcb), %l6; \
793	ld	[%l6 + %lo(_cpcb)], %l6; \
794	ld	[%l6 + PCB_WIM], %l5; \
795	and	%l0, 31, %l4; \
796	sub	%l4, %l5, %l5; \
797	set	uwtab, %l4; \
798	ldub	[%l4 + %l5], %l5; \
799	st	%l5, [%l6 + PCB_UW]; \
800	/* cond codes still indicate whether in trap window */ \
801	bz,a	2f; \
802	 sethi	%hi(_eintstack), %l7; \
803	/* yes, in trap window; must save regs */ \
804	CALL_CLEAN_TRAP_WINDOW; \
805	sethi	%hi(_eintstack), %l7; \
8062: \
807	add	%l7, stackspace, %sp; \
8083: \
809	SET_SP_REDZONE_CONST(_intstack, %l6, %l5); \
8104: \
811	CHECK_SP_REDZONE(%l6, %l5)
812
813/*
814 * Handler for making the trap window shiny clean.
815 *
816 * On entry:
817 *	cpcb->pcb_nw = number of user windows
818 *	%l0 = %psr
819 *	%l1 must not be clobbered
820 *	%l2 must not be clobbered
821 *	%l3 must not be clobbered
822 *	%l4 = address for `return'
823 *	%l7 = saved %g7 (we put this in a delay slot above, to save work)
824 *
825 * On return:
826 *	%wim has changed, along with cpcb->pcb_wim
827 *	%g7 has been restored
828 *
829 * Normally, we push only one window.
830 */
831clean_trap_window:
832	mov	%g5, %l5		! save %g5
833	mov	%g6, %l6		! ... and %g6
834/*	mov	%g7, %l7		! ... and %g7 (already done for us) */
835	sethi	%hi(_cpcb), %g6		! get current pcb
836	ld	[%g6 + %lo(_cpcb)], %g6
837
838	/* Figure out whether it is a user window (cpcb->pcb_uw > 0). */
839	ld	[%g6 + PCB_UW], %g7
840	deccc	%g7
841	bge	ctw_user
842	 save	%g0, %g0, %g0		! in any case, enter window to save
843
844	/* The window to be pushed is a kernel window. */
845	std	%l0, [%sp + (0*8)]
846ctw_merge:
847	std	%l2, [%sp + (1*8)]
848	std	%l4, [%sp + (2*8)]
849	std	%l6, [%sp + (3*8)]
850	std	%i0, [%sp + (4*8)]
851	std	%i2, [%sp + (5*8)]
852	std	%i4, [%sp + (6*8)]
853	std	%i6, [%sp + (7*8)]
854
855	/* Set up new window invalid mask, and update cpcb->pcb_wim. */
856	rd	%psr, %g7		! g7 = (junk << 5) + new_cwp
857	mov	1, %g5			! g5 = 1 << new_cwp;
858	sll	%g5, %g7, %g5
859	wr	%g5, 0, %wim		! setwim(g5);
860	and	%g7, 31, %g7		! cpcb->pcb_wim = g7 & 31;
861	st	%g7, [%g6 + PCB_WIM]
862	nop
863	restore				! back to trap window
864
865	mov	%l5, %g5		! restore g5
866	mov	%l6, %g6		! ... and g6
867	jmp	%l4 + 8			! return to caller
868	 mov	%l7, %g7		! ... and g7
869	/* NOTREACHED */
870
871ctw_user:
872	/*
873	 * The window to be pushed is a user window.
874	 * We must verify the stack pointer (alignment & permissions).
875	 * See comments above definition of PTE_OF_ADDR.
876	 */
877	st	%g7, [%g6 + PCB_UW]	! cpcb->pcb_uw--;
878	btst	7, %sp			! if not aligned,
879	bne	ctw_invalid		! choke on it
880	 EMPTY
881	PTE_OF_ADDR(%sp, %g7, ctw_invalid)
882	CMP_PTE_USER_WRITE(%g7)		! likewise if not writable
883	bne	ctw_invalid
884	 EMPTY
885	SLT_IF_1PAGE_RW(%sp, %g7)
886	bl,a	ctw_merge		! all ok if only 1
887	 std	%l0, [%sp]
888	add	%sp, 7*8, %g5		! check last addr too
889	PTE_OF_ADDR(%g5, %g7, ctw_invalid)
890	CMP_PTE_USER_WRITE(%g7)
891	be,a	ctw_merge		! all ok: store <l0,l1> and merge
892	 std	%l0, [%sp]
893
894	/*
895	 * The window we wanted to push could not be pushed.
896	 * Instead, save ALL user windows into the pcb.
897	 * We will notice later that we did this, when we
898	 * get ready to return from our trap or syscall.
899	 *
900	 * The code here is run rarely and need not be optimal.
901	 */
902ctw_invalid:
903	/*
904	 * Reread cpcb->pcb_uw.  We decremented this earlier,
905	 * so it is off by one.
906	 */
907	ld	[%g6 + PCB_UW], %g7	! (number of user windows) - 1
908	add	%g6, PCB_RW, %g5
909
910	/* save g7+1 windows, starting with the current one */
9111:					! do {
912	std	%l0, [%g5 + (0*8)]	!	rw->rw_local[0] = l0;
913	std	%l2, [%g5 + (1*8)]	!	...
914	std	%l4, [%g5 + (2*8)]
915	std	%l6, [%g5 + (3*8)]
916	std	%i0, [%g5 + (4*8)]
917	std	%i2, [%g5 + (5*8)]
918	std	%i4, [%g5 + (6*8)]
919	std	%i6, [%g5 + (7*8)]
920	deccc	%g7			!	if (n > 0) save(), rw++;
921	bge,a	1b			! } while (--n >= 0);
922	 save	%g5, 64, %g5
923
924	/* stash sp for bottommost window */
925	st	%sp, [%g5 + 64 + (7*8)]
926
927	/* set up new wim */
928	rd	%psr, %g7		! g7 = (junk << 5) + new_cwp;
929	mov	1, %g5			! g5 = 1 << new_cwp;
930	sll	%g5, %g7, %g5
931	wr	%g5, 0, %wim		! wim = g5;
932	and	%g7, 31, %g7
933	st	%g7, [%g6 + PCB_WIM]	! cpcb->pcb_wim = new_cwp;
934
935	/* fix up pcb fields */
936	ld	[%g6 + PCB_UW], %g7	! n = cpcb->pcb_uw;
937	add	%g7, 1, %g5
938	st	%g5, [%g6 + PCB_NSAVED]	! cpcb->pcb_nsaved = n + 1;
939	st	%g0, [%g6 + PCB_UW]	! cpcb->pcb_uw = 0;
940
941	/* return to trap window */
9421:	deccc	%g7			! do {
943	bge	1b			!	restore();
944	 restore			! } while (--n >= 0);
945
946	mov	%l5, %g5		! restore g5, g6, & g7, and return
947	mov	%l6, %g6
948	jmp	%l4 + 8
949	 mov	%l7, %g7
950	/* NOTREACHED */
951
952
953/*
954 * Each memory access (text or data) fault, from user or kernel mode,
955 * comes here.  We read the error register and figure out what has
956 * happened.
957 *
958 * This cannot be done from C code since we must not enable traps (and
959 * hence may not use the `save' instruction) until we have decided that
960 * the error is or is not an asynchronous one that showed up after a
961 * synchronous error, but which must be handled before the sync err.
962 *
963 * Most memory faults are user mode text or data faults, which can cause
964 * signal delivery or ptracing, for which we must build a full trapframe.
965 * It does not seem worthwhile to work to avoid this in the other cases,
966 * so we store all the %g registers on the stack immediately.
967 *
968 * On entry:
969 *	%l0 = %psr
970 *	%l1 = return pc
971 *	%l2 = return npc
972 *	%l3 = T_TEXTFAULT or T_DATAFAULT
973 *
974 * Internal:
975 *	%l4 = %y, until we call mem_access_fault (then onto trapframe)
976 *	%l5 = IE_reg_addr, if async mem error
977 *
978 * We know about the layout of the error registers here.
979 *	addr	reg
980 *	----	---
981 *	a	AC_SYNC_ERR
982 *	a+4	AC_SYNC_VA
983 *	a+8	AC_ASYNC_ERR
984 *	a+12	AC_ASYNC_VA
985 */
986memfault:
987	TRAP_SETUP(-CCFSZ-80)
988
989	INCR(_cnt+V_FAULTS)		! cnt.v_faults++ (clobbers %o0,%o1)
990
991	st	%g1, [%sp + CCFSZ + 20]	! save g1
992	rd	%y, %l4			! save y
993
994#if AC_SYNC_ERR + 4 != AC_SYNC_VA || \
995    AC_SYNC_ERR + 8 != AC_ASYNC_ERR || AC_SYNC_ERR + 12 != AC_ASYNC_VA
996	help help help		! I, I, I wanna be a lifeguard
997#endif
998	set	AC_SYNC_ERR, %o0
999	std	%g2, [%sp + CCFSZ + 24]	! save g2, g3
1000	lda	[%o0] ASI_CONTROL, %o1	! sync err reg
1001	inc	4, %o0
1002	std	%g4, [%sp + CCFSZ + 32]	! (sneak g4,g5 in here)
1003	lda	[%o0] ASI_CONTROL, %o2	! sync virt addr
1004	btst	SER_MEMERR, %o1		! memory error?
1005	std	%g6, [%sp + CCFSZ + 40]
1006	bz,a	normal_mem_fault	! no, just a regular fault
1007 	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1008
1009	/*
1010	 * We got a synchronous memory error.  It could be one that
1011	 * happened because there were two stores in a row, and the
1012	 * first went into the write buffer, and the second caused this
1013	 * synchronous trap; so there could now be a pending async error.
1014	 * This is in fact the case iff the two va's differ.
1015	 */
1016	inc	4, %o0
1017	lda	[%o0] ASI_CONTROL, %o3	! async err reg
1018	inc	4, %o0
1019	lda	[%o0] ASI_CONTROL, %o4	! async virt addr
1020	cmp	%o2, %o4
1021	be,a	1f			! no, not an async err
1022	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1023
1024	/*
1025	 * Handle the async error; ignore the sync error for now
1026	 * (we may end up getting it again, but so what?).
1027	 * This code is essentially the same as that at `nmi' below,
1028	 * but the register usage is different and we cannot merge.
1029	 */
1030	sethi	%hi(IE_reg_addr), %l5	! ienab_bic(IE_ALLIE);
1031	ldub	[%l5 + %lo(IE_reg_addr)], %o0
1032	andn	%o0, IE_ALLIE, %o0
1033	stb	%o0, [%l5 + %lo(IE_reg_addr)]
1034
1035	/*
1036	 * Now reenable traps and call C code.
1037	 * %o1 through %o4 still hold the error reg contents.
1038	 * If memerr() returns, return from the trap.
1039	 */
1040	wr	%l0, PSR_ET, %psr
1041	call	_memerr			! memerr(0, ser, sva, aer, ava)
1042	 clr	%o0
1043
1044	ld	[%sp + CCFSZ + 20], %g1	! restore g1 through g7
1045	wr	%l0, 0, %psr		! and disable traps, 3 instr delay
1046	ldd	[%sp + CCFSZ + 24], %g2
1047	ldd	[%sp + CCFSZ + 32], %g4
1048	ldd	[%sp + CCFSZ + 40], %g6
1049	/* now safe to set IE_ALLIE again */
1050	ldub	[%l5 + %lo(IE_reg_addr)], %o1
1051	or	%o1, IE_ALLIE, %o1
1052	stb	%o1, [%l5 + %lo(IE_reg_addr)]
1053	b	return_from_trap
1054	 wr	%l4, 0, %y		! restore y
1055
1056	/*
1057	 * Trap was a synchronous memory error.
1058	 * %o1 through %o4 still hold the error reg contents.
1059	 */
10601:
1061	call	_memerr			! memerr(1, ser, sva, aer, ava)
1062	 mov	1, %o0
1063
1064	ld	[%sp + CCFSZ + 20], %g1	! restore g1 through g7
1065	ldd	[%sp + CCFSZ + 24], %g2
1066	ldd	[%sp + CCFSZ + 32], %g4
1067	ldd	[%sp + CCFSZ + 40], %g6
1068	wr	%l4, 0, %y		! restore y
1069	b	return_from_trap
1070	 wr	%l0, 0, %psr
1071	/* NOTREACHED */
1072
1073normal_mem_fault:
1074	/*
1075	 * Trap was some other error; call C code to deal with it.
1076	 * Must finish trap frame (psr,pc,npc,%y,%o0..%o7) in case
1077	 * we decide to deliver a signal or ptrace the process.
1078	 * %g1..%g7 were already set up above.
1079	 */
1080	std	%l0, [%sp + CCFSZ + 0]	! set tf.tf_psr, tf.tf_pc
1081	mov	%l3, %o0		! (argument: type)
1082	st	%l2, [%sp + CCFSZ + 8]	! set tf.tf_npc
1083	st	%l4, [%sp + CCFSZ + 12]	! set tf.tf_y
1084	mov	%l1, %o3		! (argument: pc)
1085	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
1086	std	%i2, [%sp + CCFSZ + 56]
1087	mov	%l0, %o4		! (argument: psr)
1088	std	%i4, [%sp + CCFSZ + 64]
1089	std	%i6, [%sp + CCFSZ + 72]
1090	call	_mem_access_fault	! mem_access_fault(type, ser, sva,
1091					!		pc, psr, &tf);
1092	 add	%sp, CCFSZ, %o5		! (argument: &tf)
1093
1094	ldd	[%sp + CCFSZ + 0], %l0	! load new values
1095	ldd	[%sp + CCFSZ + 8], %l2
1096	wr	%l3, 0, %y
1097	ld	[%sp + CCFSZ + 20], %g1
1098	ldd	[%sp + CCFSZ + 24], %g2
1099	ldd	[%sp + CCFSZ + 32], %g4
1100	ldd	[%sp + CCFSZ + 40], %g6
1101	ldd	[%sp + CCFSZ + 48], %i0
1102	ldd	[%sp + CCFSZ + 56], %i2
1103	ldd	[%sp + CCFSZ + 64], %i4
1104	ldd	[%sp + CCFSZ + 72], %i6
1105
1106	b	return_from_trap	! go return
1107	 wr	%l0, 0, %psr		! (but first disable traps again)
1108
1109
1110/*
1111 * fp_exception has to check to see if we are trying to save
1112 * the FP state, and if so, continue to save the FP state.
1113 *
1114 * We do not even bother checking to see if we were in kernel mode,
1115 * since users have no access to the special_fp_store instruction.
1116 *
1117 * This whole idea was stolen from Sprite.
1118 */
1119fp_exception:
1120	set	special_fp_store, %l4	! see if we came from the special one
1121	cmp	%l1, %l4		! pc == special_fp_store?
1122	bne	slowtrap		! no, go handle per usual
1123	 EMPTY
1124	sethi	%hi(savefpcont), %l4	! yes, "return" to the special code
1125	or	%lo(savefpcont), %l4, %l4
1126	jmp	%l4
1127	 rett	%l4 + 4
1128
1129/*
1130 * slowtrap() builds a trap frame and calls trap().
1131 * This is called `slowtrap' because it *is*....
1132 * We have to build a full frame for ptrace(), for instance.
1133 *
1134 * Registers:
1135 *	%l0 = %psr
1136 *	%l1 = return pc
1137 *	%l2 = return npc
1138 *	%l3 = trap code
1139 */
1140slowtrap:
1141	TRAP_SETUP(-CCFSZ-80)
1142	/*
1143	 * Phew, ready to enable traps and call C code.
1144	 */
1145	mov	%l3, %o0		! put type in %o0 for later
1146Lslowtrap_reenter:
1147	wr	%l0, PSR_ET, %psr	! traps on again
1148	std	%l0, [%sp + CCFSZ]	! tf.tf_psr = psr; tf.tf_pc = ret_pc;
1149	rd	%y, %l3
1150	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc = return_npc; tf.tf_y = %y;
1151	st	%g1, [%sp + CCFSZ + 20]
1152	std	%g2, [%sp + CCFSZ + 24]
1153	std	%g4, [%sp + CCFSZ + 32]
1154	std	%g6, [%sp + CCFSZ + 40]
1155	std	%i0, [%sp + CCFSZ + 48]
1156	mov	%l0, %o1		! (psr)
1157	std	%i2, [%sp + CCFSZ + 56]
1158	mov	%l1, %o2		! (pc)
1159	std	%i4, [%sp + CCFSZ + 64]
1160	add	%sp, CCFSZ, %o3		! (&tf)
1161	call	_trap			! trap(type, psr, pc, &tf)
1162	 std	%i6, [%sp + CCFSZ + 72]
1163
1164	ldd	[%sp + CCFSZ], %l0	! load new values
1165	ldd	[%sp + CCFSZ + 8], %l2
1166	wr	%l3, 0, %y
1167	ld	[%sp + CCFSZ + 20], %g1
1168	ldd	[%sp + CCFSZ + 24], %g2
1169	ldd	[%sp + CCFSZ + 32], %g4
1170	ldd	[%sp + CCFSZ + 40], %g6
1171	ldd	[%sp + CCFSZ + 48], %i0
1172	ldd	[%sp + CCFSZ + 56], %i2
1173	ldd	[%sp + CCFSZ + 64], %i4
1174	ldd	[%sp + CCFSZ + 72], %i6
1175	b	return_from_trap
1176	 wr	%l0, 0, %psr
1177
1178/*
1179 * Do a `software' trap by re-entering the trap code, possibly first
1180 * switching from interrupt stack to kernel stack.  This is used for
1181 * scheduling and signal ASTs (which generally occur from softclock or
1182 * tty or net interrupts) and register window saves (which might occur
1183 * from anywhere).
1184 *
1185 * The current window is the trap window, and it is by definition clean.
1186 * We enter with the trap type in %o0.  All we have to do is jump to
1187 * Lslowtrap_reenter above, but maybe after switching stacks....
1188 */
1189softtrap:
1190	sethi	%hi(_eintstack), %l7
1191	cmp	%sp, %l7
1192	bge	Lslowtrap_reenter
1193	 EMPTY
1194	sethi	%hi(_cpcb), %l6
1195	ld	[%l6 + %lo(_cpcb)], %l6
1196	set	UPAGES*NBPG - CCFSZ - 80, %l5
1197	add	%l6, %l5, %l7
1198	SET_SP_REDZONE(%l6, %l5)
1199	b	Lslowtrap_reenter
1200	 mov	%l7, %sp
1201
1202#ifdef KGDB
1203/*
1204 * bpt is entered on all breakpoint traps.
1205 * If this is a kernel breakpoint, we do not want to call trap().
1206 * Among other reasons, this way we can set breakpoints in trap().
1207 */
1208bpt:
1209	btst	PSR_PS, %l0		! breakpoint from kernel?
1210	bz	slowtrap		! no, go do regular trap
1211	 nop
1212
1213	/*
1214	 * Build a trap frame for kgdb_trap_glue to copy.
1215	 * Enable traps but set ipl high so that we will not
1216	 * see interrupts from within breakpoints.
1217	 */
1218	TRAP_SETUP(-CCFSZ-80)
1219	or	%l0, PSR_PIL, %l4	! splhigh()
1220	wr	%l4, 0, %psr		! the manual claims that this
1221	wr	%l4, PSR_ET, %psr	! song and dance is necessary
1222	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
1223	mov	%l3, %o0		! trap type arg for kgdb_trap_glue
1224	rd	%y, %l3
1225	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
1226	rd	%wim, %l3
1227	st	%l3, [%sp + CCFSZ + 16]	! tf.tf_wim (a kgdb-only r/o field)
1228	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
1229	std	%g2, [%sp + CCFSZ + 24]	! etc
1230	std	%g4, [%sp + CCFSZ + 32]
1231	std	%g6, [%sp + CCFSZ + 40]
1232	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_in[0..1]
1233	std	%i2, [%sp + CCFSZ + 56]	! etc
1234	std	%i4, [%sp + CCFSZ + 64]
1235	std	%i6, [%sp + CCFSZ + 72]
1236
1237	/*
1238	 * Now call kgdb_trap_glue(); if it returns, call trap().
1239	 */
1240	mov	%o0, %l3		! gotta save trap type
1241	call	_kgdb_trap_glue		! kgdb_trap_glue(type, &trapframe)
1242	 add	%sp, CCFSZ, %o1		! (&trapframe)
1243
1244	/*
1245	 * Use slowtrap to call trap---but first erase our tracks
1246	 * (put the registers back the way they were).
1247	 */
1248	mov	%l3, %o0		! slowtrap will need trap type
1249	ld	[%sp + CCFSZ + 12], %l3
1250	wr	%l3, 0, %y
1251	ld	[%sp + CCFSZ + 20], %g1
1252	ldd	[%sp + CCFSZ + 24], %g2
1253	ldd	[%sp + CCFSZ + 32], %g4
1254	b	Lslowtrap_reenter
1255	 ldd	[%sp + CCFSZ + 40], %g6
1256
1257/*
1258 * Enter kernel breakpoint.  Write all the windows (not including the
1259 * current window) into the stack, so that backtrace works.  Copy the
1260 * supplied trap frame to the kgdb stack and switch stacks.
1261 *
1262 * kgdb_trap_glue(type, tf0)
1263 *	int type;
1264 *	struct trapframe *tf0;
1265 */
1266	.globl	_kgdb_trap_glue
1267_kgdb_trap_glue:
1268	save	%sp, -CCFSZ, %sp
1269
1270	call	_write_all_windows
1271	 mov	%sp, %l4		! %l4 = current %sp
1272
1273	/* copy trapframe to top of kgdb stack */
1274	set	_kgdb_stack + KGDB_STACK_SIZE - 80, %l0
1275					! %l0 = tfcopy -> end_of_kgdb_stack
1276	mov	80, %l1
12771:	ldd	[%i1], %l2
1278	inc	8, %i1
1279	deccc	8, %l1
1280	std	%l2, [%l0]
1281	bg	1b
1282	 inc	8, %l0
1283
1284#ifdef DEBUG
1285	/* save old red zone and then turn it off */
1286	sethi	%hi(_redzone), %l7
1287	ld	[%l7 + %lo(_redzone)], %l6
1288	st	%g0, [%l7 + %lo(_redzone)]
1289#endif
1290	/* switch to kgdb stack */
1291	add	%l0, -CCFSZ-80, %sp
1292
1293	/* if (kgdb_trap(type, tfcopy)) kgdb_rett(tfcopy); */
1294	mov	%i0, %o0
1295	call	_kgdb_trap
1296	add	%l0, -80, %o1
1297	tst	%o0
1298	bnz,a	kgdb_rett
1299	 add	%l0, -80, %g1
1300
1301	/*
1302	 * kgdb_trap() did not handle the trap at all so the stack is
1303	 * still intact.  A simple `restore' will put everything back,
1304	 * after we reset the stack pointer.
1305	 */
1306	mov	%l4, %sp
1307#ifdef DEBUG
1308	st	%l6, [%l7 + %lo(_redzone)]	! restore red zone
1309#endif
1310	ret
1311	restore
1312
1313/*
1314 * Return from kgdb trap.  This is sort of special.
1315 *
1316 * We know that kgdb_trap_glue wrote the window above it, so that we will
1317 * be able to (and are sure to have to) load it up.  We also know that we
1318 * came from kernel land and can assume that the %fp (%i6) we load here
1319 * is proper.  We must also be sure not to lower ipl (it is at splhigh())
1320 * until we have traps disabled, due to the SPARC taking traps at the
1321 * new ipl before noticing that PSR_ET has been turned off.  We are on
1322 * the kgdb stack, so this could be disastrous.
1323 *
1324 * Note that the trapframe argument in %g1 points into the current stack
1325 * frame (current window).  We abandon this window when we move %g1->tf_psr
1326 * into %psr, but we will not have loaded the new %sp yet, so again traps
1327 * must be disabled.
1328 */
1329kgdb_rett:
1330	rd	%psr, %g4		! turn off traps
1331	wr	%g4, PSR_ET, %psr
1332	/* use the three-instruction delay to do something useful */
1333	ld	[%g1], %g2		! pick up new %psr
1334	ld	[%g1 + 12], %g3		! set %y
1335	wr	%g3, 0, %y
1336#ifdef DEBUG
1337	st	%l6, [%l7 + %lo(_redzone)] ! and restore red zone
1338#endif
1339	wr	%g0, 0, %wim		! enable window changes
1340	nop; nop; nop
1341	/* now safe to set the new psr (changes CWP, leaves traps disabled) */
1342	wr	%g2, 0, %psr		! set rett psr (including cond codes)
1343	/* 3 instruction delay before we can use the new window */
1344/*1*/	ldd	[%g1 + 24], %g2		! set new %g2, %g3
1345/*2*/	ldd	[%g1 + 32], %g4		! set new %g4, %g5
1346/*3*/	ldd	[%g1 + 40], %g6		! set new %g6, %g7
1347
1348	/* now we can use the new window */
1349	mov	%g1, %l4
1350	ld	[%l4 + 4], %l1		! get new pc
1351	ld	[%l4 + 8], %l2		! get new npc
1352	ld	[%l4 + 20], %g1		! set new %g1
1353
1354	/* set up returnee's out registers, including its %sp */
1355	ldd	[%l4 + 48], %i0
1356	ldd	[%l4 + 56], %i2
1357	ldd	[%l4 + 64], %i4
1358	ldd	[%l4 + 72], %i6
1359
1360	/* load returnee's window, making the window above it be invalid */
1361	restore
1362	restore	%g0, 1, %l1		! move to inval window and set %l1 = 1
1363	rd	%psr, %l0
1364	sll	%l1, %l0, %l1
1365	wr	%l1, 0, %wim		! %wim = 1 << (%psr & 31)
1366	sethi	%hi(_cpcb), %l1
1367	ld	[%l1 + %lo(_cpcb)], %l1
1368	and	%l0, 31, %l0		! CWP = %psr & 31;
1369	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = CWP;
1370	save	%g0, %g0, %g0		! back to window to reload
1371	LOADWIN(%sp)
1372	save	%g0, %g0, %g0		! back to trap window
1373	/* note, we have not altered condition codes; safe to just rett */
1374	RETT
1375#endif
1376
1377/*
1378 * syscall() builds a trap frame and calls syscall().
1379 * sun_syscall is same but delivers sun system call number
1380 * XXX	should not have to save&reload ALL the registers just for
1381 *	ptrace...
1382 */
1383#ifdef COMPAT_SUNOS
1384sun_syscall:
1385	TRAP_SETUP(-CCFSZ-80)
1386	b	sys_merge
1387	 mov	1, %o3			! third arg to syscall: sun compat
1388syscall:
1389	TRAP_SETUP(-CCFSZ-80)
1390	clr	%o3			! third arg to syscall: native bsd
1391sys_merge:
1392#else
1393syscall:
1394	TRAP_SETUP(-CCFSZ-80)
1395#endif
1396	wr	%l0, PSR_ET, %psr
1397	std	%l0, [%sp + CCFSZ + 0]	! tf_psr, tf_pc
1398	rd	%y, %l3
1399	std	%l2, [%sp + CCFSZ + 8]	! tf_npc, tf_y
1400	st	%g1, [%sp + CCFSZ + 20]	! tf_g[1]
1401	std	%g2, [%sp + CCFSZ + 24]	! tf_g[2], tf_g[3]
1402	std	%g4, [%sp + CCFSZ + 32]	! etc
1403	std	%g6, [%sp + CCFSZ + 40]
1404	mov	%g1, %o0		! (code)
1405	std	%i0, [%sp + CCFSZ + 48]
1406	add	%sp, CCFSZ, %o1		! (&tf)
1407	std	%i2, [%sp + CCFSZ + 56]
1408	mov	%l1, %o2		! (pc)
1409	std	%i4, [%sp + CCFSZ + 64]
1410	call	_syscall		! syscall(code, &tf, pc, suncompat)
1411	 std	%i6, [%sp + CCFSZ + 72]
1412	! now load em all up again, sigh
1413	ldd	[%sp + CCFSZ + 0], %l0	! new %psr, new pc
1414	ldd	[%sp + CCFSZ + 8], %l2	! new npc, new %y
1415	wr	%l3, 0, %y
1416	/* see `dostart' for the reason for this label */
1417init_syscall_ret:
1418	ld	[%sp + CCFSZ + 20], %g1
1419	ldd	[%sp + CCFSZ + 24], %g2
1420	ldd	[%sp + CCFSZ + 32], %g4
1421	ldd	[%sp + CCFSZ + 40], %g6
1422	ldd	[%sp + CCFSZ + 48], %i0
1423	ldd	[%sp + CCFSZ + 56], %i2
1424	ldd	[%sp + CCFSZ + 64], %i4
1425	ldd	[%sp + CCFSZ + 72], %i6
1426	b	return_from_trap
1427	 wr	%l0, 0, %psr
1428
1429/*
1430 * Interrupts.  Software interrupts must be cleared from the software
1431 * interrupt enable register.  Rather than calling ienab_bic for each,
1432 * we do them in-line before enabling traps.
1433 *
1434 * After preliminary setup work, the interrupt is passed to each
1435 * registered handler in turn.  These are expected to return nonzero if
1436 * they took care of the interrupt.  If a handler claims the interrupt,
1437 * we exit (hardware interrupts are latched in the requestor so we'll
1438 * just take another interrupt in the unlikely event of simultaneous
1439 * interrupts from two different devices at the same level).  If we go
1440 * through all the registered handlers and no one claims it, we report a
1441 * stray interrupt.  This is more or less done as:
1442 *
1443 *	for (ih = intrhand[intlev]; ih; ih = ih->ih_next)
1444 *		if ((*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : &frame))
1445 *			return;
1446 *	strayintr(&frame);
1447 *
1448 * Software interrupts are almost the same with three exceptions:
1449 * (1) we clear the interrupt from the software interrupt enable
1450 *     register before calling any handler (we have to clear it first
1451 *     to avoid an interrupt-losing race),
1452 * (2) we always call all the registered handlers (there is no way
1453 *     to tell if the single bit in the software interrupt register
1454 *     represents one or many requests)
1455 * (3) we never announce a stray interrupt (because of (1), another
1456 *     interrupt request can come in while we're in the handler.  If
1457 *     the handler deal with everything for both the original & the
1458 *     new request, we'll erroneously report a stray interrupt when
1459 *     we take the software interrupt for the new request.
1460 *
1461 * Inputs:
1462 *	%l0 = %psr
1463 *	%l1 = return pc
1464 *	%l2 = return npc
1465 *	%l3 = interrupt level
1466 *	(software interrupt only) %l4 = bits to clear in interrupt register
1467 *
1468 * Internal:
1469 *	%l4, %l5: local variables
1470 *	%l6 = %y
1471 *	%l7 = %g1
1472 *	%g2..%g7 go to stack
1473 *
1474 * An interrupt frame is built in the space for a full trapframe;
1475 * this contains the psr, pc, npc, and interrupt level.
1476 */
1477	.comm	_intrhand, 15 * 8	! intrhand[0..14]; 0 => error
1478softintr:
1479	sethi	%hi(IE_reg_addr), %l6
1480	ldub	[%l6 + %lo(IE_reg_addr)], %l5
1481	andn	%l5, %l4, %l5
1482	stb	%l5, [%l6 + %lo(IE_reg_addr)]
1483	INTR_SETUP(-CCFSZ-80)
1484	std	%g2, [%sp + CCFSZ + 24]	! save registers
1485	INCR(_cnt+V_INTR)		! cnt.v_intr++; (clobbers %o0,%o1)
1486	mov	%g1, %l7
1487	rd	%y, %l6
1488	std	%g4, [%sp + CCFSZ + 32]
1489	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
1490	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
1491	std	%g6, [%sp + CCFSZ + 40]
1492	or	%l5, %l4, %l4		!			;
1493	wr	%l4, 0, %psr		! the manual claims this
1494	wr	%l4, PSR_ET, %psr	! song and dance is necessary
1495	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
1496	sll	%l3, 2, %l5
1497	set	_intrcnt, %l4		! intrcnt[intlev]++;
1498	ld	[%l4 + %l5], %o0
1499	std	%l2, [%sp + CCFSZ + 8]
1500	inc	%o0
1501	st	%o0, [%l4 + %l5]
1502	set	_intrhand, %l4		! %l4 = intrhand[intlev];
1503	ld	[%l4 + %l5], %l4
1504	b	3f
1505	 st	%fp, [%sp + CCFSZ + 16]
1506
15071:	ld	[%l4], %o1
1508	ld	[%l4 + 4], %o0
1509	tst	%o0
1510	bz,a	2f
1511	 add	%sp, CCFSZ, %o0
15122:	jmpl	%o1, %o7		!	(void)(*ih->ih_fun)(...)
1513	 ld	[%l4 + 8], %l4		!	and ih = ih->ih_next
15143:	tst	%l4			! while ih != NULL
1515	bnz	1b
1516	 nop
1517	mov	%l7, %g1
1518	wr	%l6, 0, %y
1519	ldd	[%sp + CCFSZ + 24], %g2
1520	ldd	[%sp + CCFSZ + 32], %g4
1521	ldd	[%sp + CCFSZ + 40], %g6
1522	b	return_from_trap
1523	 wr	%l0, 0, %psr
1524
1525	/*
1526	 * _sparc_interrupt is exported for paranoia checking (see intr.c).
1527	 */
1528	.globl	_sparc_interrupt
1529_sparc_interrupt:
1530	INTR_SETUP(-CCFSZ-80)
1531	std	%g2, [%sp + CCFSZ + 24]	! save registers
1532	INCR(_cnt+V_INTR)		! cnt.v_intr++; (clobbers %o0,%o1)
1533	mov	%g1, %l7
1534	rd	%y, %l6
1535	std	%g4, [%sp + CCFSZ + 32]
1536	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
1537	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
1538	std	%g6, [%sp + CCFSZ + 40]
1539	or	%l5, %l4, %l4		!			;
1540	wr	%l4, 0, %psr		! the manual claims this
1541	wr	%l4, PSR_ET, %psr	! song and dance is necessary
1542	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
1543	sll	%l3, 2, %l5
1544	set	_intrcnt, %l4		! intrcnt[intlev]++;
1545	ld	[%l4 + %l5], %o0
1546	std	%l2, [%sp + CCFSZ + 8]	! set up intrframe/clockframe
1547	inc	%o0
1548	st	%o0, [%l4 + %l5]
1549	set	_intrhand, %l4		! %l4 = intrhand[intlev];
1550	ld	[%l4 + %l5], %l4
1551	b	3f
1552	 st	%fp, [%sp + CCFSZ + 16]
1553
15541:	ld	[%l4], %o1
1555	ld	[%l4 + 4], %o0
1556	tst	%o0
1557	bz,a	2f
1558	 add	%sp, CCFSZ, %o0
15592:	jmpl	%o1, %o7		!	handled = (*ih->ih_fun)(...)
1560	 ld	[%l4 + 8], %l4		!	and ih = ih->ih_next
1561	tst	%o0
1562	bnz	4f			! if (handled) break
1563	 nop
15643:	tst	%l4
1565	bnz	1b			! while (ih)
1566	 nop
1567	call	_strayintr		!	strayintr(&intrframe)
1568	 add	%sp, CCFSZ, %o0
1569	/* all done: restore registers and go return */
15704:	mov	%l7, %g1
1571	wr	%l6, 0, %y
1572	ldd	[%sp + CCFSZ + 24], %g2
1573	ldd	[%sp + CCFSZ + 32], %g4
1574	ldd	[%sp + CCFSZ + 40], %g6
1575	b	return_from_trap
1576	 wr	%l0, 0, %psr
1577
1578#ifdef notyet
1579/*
1580 * Level 12 (ZS serial) interrupt.  Handle it quickly, schedule a
1581 * software interrupt, and get out.  Do the software interrupt directly
1582 * if we would just take it on the way out.
1583 *
1584 * Input:
1585 *	%l0 = %psr
1586 *	%l1 = return pc
1587 *	%l2 = return npc
1588 * Internal:
1589 *	%l3 = zs device
1590 *	%l4, %l5 = temporary
1591 *	%l6 = rr3 (or temporary data) + 0x100 => need soft int
1592 *	%l7 = zs soft status
1593 */
1594zshard:
1595#endif /* notyet */
1596
1597/*
1598 * Level 15 interrupt.  An async memory error has occurred;
1599 * take care of it (typically by panicking, but hey...).
1600 *	%l0 = %psr
1601 *	%l1 = return pc
1602 *	%l2 = return npc
1603 *	%l3 = 15 * 4 (why? just because!)
1604 *
1605 * Internal:
1606 *	%l4 = %y
1607 *	%l5 = %g1
1608 *	%l6 = %g6
1609 *	%l7 = %g7
1610 *  g2, g3, g4, g5 go to stack
1611 *
1612 * This code is almost the same as that in mem_access_fault,
1613 * except that we already know the problem is not a `normal' fault,
1614 * and that we must be extra-careful with interrupt enables.
1615 */
1616nmi:
1617	INTR_SETUP(-CCFSZ-80)
1618	INCR(_cnt+V_INTR)		! cnt.v_intr++; (clobbers %o0,%o1)
1619	/*
1620	 * Level 15 interrupts are nonmaskable, so with traps off,
1621	 * disable all interrupts to prevent recursion.
1622	 */
1623	sethi	%hi(IE_reg_addr), %o0
1624	ldub	[%o0 + %lo(IE_reg_addr)], %o1
1625	andn	%o1, IE_ALLIE, %o1
1626	stb	%o1, [%o0 + %lo(IE_reg_addr)]
1627	wr	%l0, PSR_ET, %psr	! okay, turn traps on again
1628
1629	std	%g2, [%sp + CCFSZ + 0]	! save g2, g3
1630	rd	%y, %l4			! save y
1631
1632	! must read the sync error register too.
1633	set	AC_SYNC_ERR, %o0
1634	lda	[%o0] ASI_CONTROL, %o1	! sync err reg
1635	inc	4, %o0
1636	lda	[%o0] ASI_CONTROL, %o2	! sync virt addr
1637	std	%g4, [%sp + CCFSZ + 8]	! save g4,g5
1638	mov	%g1, %l5		! save g1,g6,g7
1639	mov	%g6, %l6
1640	mov	%g7, %l7
1641	inc	4, %o0
1642	lda	[%o0] ASI_CONTROL, %o3	! async err reg
1643	inc	4, %o0
1644	lda	[%o0] ASI_CONTROL, %o4	! async virt addr
1645
1646	! and call C code
1647	call	_memerr			! memerr(0, ser, sva, aer, ava)
1648	clr	%o0
1649
1650	mov	%l5, %g1		! restore g1 through g7
1651	ldd	[%sp + CCFSZ + 0], %g2
1652	ldd	[%sp + CCFSZ + 8], %g4
1653	wr	%l0, 0, %psr		! re-disable traps
1654	mov	%l6, %g6
1655	mov	%l7, %g7
1656
1657	! set IE_ALLIE again (safe, we disabled traps again above)
1658	sethi	%hi(IE_reg_addr), %o0
1659	ldub	[%o0 + %lo(IE_reg_addr)], %o1
1660	or	%o1, IE_ALLIE, %o1
1661	stb	%o1, [%o0 + %lo(IE_reg_addr)]
1662	b	return_from_trap
1663	 wr	%l4, 0, %y		! restore y
1664
1665
1666/*
1667 * Window overflow trap handler.
1668 *	%l0 = %psr
1669 *	%l1 = return pc
1670 *	%l2 = return npc
1671 */
1672window_of:
1673#ifdef TRIVIAL_WINDOW_OVERFLOW_HANDLER
1674	/* a trivial version that assumes %sp is ok */
1675	/* (for testing only!) */
1676	save	%g0, %g0, %g0
1677	std	%l0, [%sp + (0*8)]
1678	rd	%psr, %l0
1679	mov	1, %l1
1680	sll	%l1, %l0, %l0
1681	wr	%l0, 0, %wim
1682	std	%l2, [%sp + (1*8)]
1683	std	%l4, [%sp + (2*8)]
1684	std	%l6, [%sp + (3*8)]
1685	std	%i0, [%sp + (4*8)]
1686	std	%i2, [%sp + (5*8)]
1687	std	%i4, [%sp + (6*8)]
1688	std	%i6, [%sp + (7*8)]
1689	restore
1690	RETT
1691#else
1692	/*
1693	 * This is similar to TRAP_SETUP, but we do not want to spend
1694	 * a lot of time, so we have separate paths for kernel and user.
1695	 * We also know for sure that the window has overflowed.
1696	 */
1697	btst	PSR_PS, %l0
1698	bz	winof_user
1699	 sethi	%hi(clean_trap_window), %l7
1700
1701	/*
1702	 * Overflow from kernel mode.  Call clean_trap_window to
1703	 * do the dirty work, then just return, since we know prev
1704	 * window is valid.  clean_trap_windows might dump all *user*
1705	 * windows into the pcb, but we do not care: there is at
1706	 * least one kernel window (a trap or interrupt frame!)
1707	 * above us.
1708	 */
1709	jmpl	%l7 + %lo(clean_trap_window), %l4
1710	 mov	%g7, %l7		! for clean_trap_window
1711
1712	wr	%l0, 0, %psr		! put back the @%*! cond. codes
1713	nop				! (let them settle in)
1714	RETT
1715
1716winof_user:
1717	/*
1718	 * Overflow from user mode.
1719	 * If clean_trap_window dumps the registers into the pcb,
1720	 * rft_user will need to call trap(), so we need space for
1721	 * a trap frame.  We also have to compute pcb_nw.
1722	 *
1723	 * SHOULD EXPAND IN LINE TO AVOID BUILDING TRAP FRAME ON
1724	 * `EASY' SAVES
1725	 */
1726	sethi	%hi(_cpcb), %l6
1727	ld	[%l6 + %lo(_cpcb)], %l6
1728	ld	[%l6 + PCB_WIM], %l5
1729	and	%l0, 31, %l3
1730	sub	%l3, %l5, %l5 		/* l5 = CWP - pcb_wim */
1731	set	uwtab, %l4
1732	ldub	[%l4 + %l5], %l5	/* l5 = uwtab[l5] */
1733	st	%l5, [%l6 + PCB_UW]
1734	jmpl	%l7 + %lo(clean_trap_window), %l4
1735	 mov	%g7, %l7		! for clean_trap_window
1736	sethi	%hi(_cpcb), %l6
1737	ld	[%l6 + %lo(_cpcb)], %l6
1738	set	UPAGES*NBPG-CCFSZ-80, %l5
1739	add	%l6, %l5, %sp		/* over to kernel stack */
1740	CHECK_SP_REDZONE(%l6, %l5)
1741
1742	/*
1743	 * Copy return_from_trap far enough to allow us
1744	 * to jump directly to rft_user_or_recover_pcb_windows
1745	 * (since we know that is where we are headed).
1746	 */
1747!	and	%l0, 31, %l3		! still set (clean_trap_window
1748					! leaves this register alone)
1749	set	wmask, %l6
1750	ldub	[%l6 + %l3], %l5	! %l5 = 1 << ((CWP + 1) % nwindows)
1751	b	rft_user_or_recover_pcb_windows
1752	 rd	%wim, %l4		! (read %wim first)
1753#endif /* end `real' version of window overflow trap handler */
1754
1755/*
1756 * Window underflow trap handler.
1757 *	%l0 = %psr
1758 *	%l1 = return pc
1759 *	%l2 = return npc
1760 *
1761 * A picture:
1762 *
1763 *	  T R I X
1764 *	0 0 0 1 0 0 0	(%wim)
1765 * [bit numbers increase towards the right;
1766 * `restore' moves right & `save' moves left]
1767 *
1768 * T is the current (Trap) window, R is the window that attempted
1769 * a `Restore' instruction, I is the Invalid window, and X is the
1770 * window we want to make invalid before we return.
1771 *
1772 * Since window R is valid, we cannot use rft_user to restore stuff
1773 * for us.  We have to duplicate its logic.  YUCK.
1774 *
1775 * Incidentally, TRIX are for kids.  Silly rabbit!
1776 */
1777window_uf:
1778#ifdef TRIVIAL_WINDOW_UNDERFLOW_HANDLER
1779	wr	%g0, 0, %wim		! allow us to enter I
1780	restore				! to R
1781	nop
1782	nop
1783	restore				! to I
1784	restore	%g0, 1, %l1		! to X
1785	rd	%psr, %l0
1786	sll	%l1, %l0, %l0
1787	wr	%l0, 0, %wim
1788	save	%g0, %g0, %g0		! back to I
1789	LOADWIN(%sp)
1790	save	%g0, %g0, %g0		! back to R
1791	save	%g0, %g0, %g0		! back to T
1792	RETT
1793#else
1794	wr	%g0, 0, %wim		! allow us to enter I
1795	btst	PSR_PS, %l0
1796	restore				! enter window R
1797	bz	winuf_user
1798	 restore			! enter window I
1799
1800	/*
1801	 * Underflow from kernel mode.  Just recover the
1802	 * registers and go (except that we have to update
1803	 * the blasted user pcb fields).
1804	 */
1805	restore	%g0, 1, %l1		! enter window X, then set %l1 to 1
1806	rd	%psr, %l0		! cwp = %psr & 31;
1807	and	%l0, 31, %l0
1808	sll	%l1, %l0, %l1		! wim = 1 << cwp;
1809	wr	%l1, 0, %wim		! setwim(wim);
1810	sethi	%hi(_cpcb), %l1
1811	ld	[%l1 + %lo(_cpcb)], %l1
1812	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = cwp;
1813	save	%g0, %g0, %g0		! back to window I
1814	LOADWIN(%sp)
1815	save	%g0, %g0, %g0		! back to R
1816	save	%g0, %g0, %g0		! and then to T
1817	wr	%l0, 0, %psr		! fix those cond codes....
1818	nop				! (let them settle in)
1819	RETT
1820
1821winuf_user:
1822	/*
1823	 * Underflow from user mode.
1824	 *
1825	 * We cannot use rft_user (as noted above) because
1826	 * we must re-execute the `restore' instruction.
1827	 * Since it could be, e.g., `restore %l0,0,%l0',
1828	 * it is not okay to touch R's registers either.
1829	 *
1830	 * We are now in window I.
1831	 */
1832	btst	7, %sp			! if unaligned, it is invalid
1833	bne	winuf_invalid
1834	 EMPTY
1835
1836	PTE_OF_ADDR(%sp, %l7, winuf_invalid)
1837	CMP_PTE_USER_READ(%l7)		! if first page not readable,
1838	bne	winuf_invalid		! it is invalid
1839	 EMPTY
1840	SLT_IF_1PAGE_RW(%sp, %l7)	! first page is readable
1841	bl,a	winuf_ok		! if only one page, enter window X
1842	 restore %g0, 1, %l1		! and goto ok, & set %l1 to 1
1843	add	%sp, 7*8, %l5
1844	PTE_OF_ADDR(%l5, %l7, winuf_invalid)
1845	CMP_PTE_USER_READ(%l7)		! check second page too
1846	be,a	winuf_ok		! enter window X and goto ok
1847	 restore %g0, 1, %l1		! (and then set %l1 to 1)
1848
1849winuf_invalid:
1850	/*
1851	 * We were unable to restore the window because %sp
1852	 * is invalid or paged out.  Return to the trap window
1853	 * and call trap(T_WINUF).  This will save R to the user
1854	 * stack, then load both R and I into the pcb rw[] area,
1855	 * and return with pcb_nsaved set to -1 for success, 0 for
1856	 * failure.  `Failure' indicates that someone goofed with the
1857	 * trap registers (e.g., signals), so that we need to return
1858	 * from the trap as from a syscall (probably to a signal handler)
1859	 * and let it retry the restore instruction later.  Note that
1860	 * window R will have been pushed out to user space, and thus
1861	 * be the invalid window, by the time we get back here.  (We
1862	 * continue to label it R anyway.)  We must also set %wim again,
1863	 * and set pcb_uw to 1, before enabling traps.  (Window R is the
1864	 * only window, and it is a user window).
1865	 */
1866	save	%g0, %g0, %g0		! back to R
1867#if 0		/* this gives `as' mild heartburn */
1868	save	%g0, 1, %l4		! back to T, then %l4 = 1
1869#else
1870	save	%g0, %g0, %g0		! back to T
1871	mov	1, %l4			! and set %l4 = 1
1872#endif
1873	sethi	%hi(_cpcb), %l6
1874	ld	[%l6 + %lo(_cpcb)], %l6
1875	st	%l4, [%l6 + PCB_UW]	! pcb_uw = 1
1876	ld	[%l6 + PCB_WIM], %l5	! get log2(%wim)
1877	sll	%l4, %l5, %l4		! %l4 = old %wim
1878	wr	%l4, 0, %wim		! window I is now invalid again
1879	set	UPAGES*NBPG-CCFSZ-80, %l5
1880	add	%l6, %l5, %sp		! get onto kernel stack
1881	CHECK_SP_REDZONE(%l6, %l5)
1882
1883	/*
1884	 * Okay, call trap(T_WINUF, psr, pc, &tf).
1885	 * See `slowtrap' above for operation.
1886	 */
1887	wr	%l0, PSR_ET, %psr
1888	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
1889	rd	%y, %l3
1890	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
1891	mov	T_WINUF, %o0
1892	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
1893	mov	%l0, %o1
1894	std	%g2, [%sp + CCFSZ + 24]	! etc
1895	mov	%l1, %o2
1896	std	%g4, [%sp + CCFSZ + 32]
1897	add	%sp, CCFSZ, %o3
1898	std	%g6, [%sp + CCFSZ + 40]
1899	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
1900	std	%i2, [%sp + CCFSZ + 56]
1901	std	%i4, [%sp + CCFSZ + 64]
1902	call	_trap			! trap(T_WINUF, pc, psr, &tf)
1903	 std	%i6, [%sp + CCFSZ + 72]	! tf.tf_out[6]
1904
1905	ldd	[%sp + CCFSZ + 0], %l0	! new psr, pc
1906	ldd	[%sp + CCFSZ + 8], %l2	! new npc, %y
1907	wr	%l3, 0, %y
1908	ld	[%sp + CCFSZ + 20], %g1
1909	ldd	[%sp + CCFSZ + 24], %g2
1910	ldd	[%sp + CCFSZ + 32], %g4
1911	ldd	[%sp + CCFSZ + 40], %g6
1912	ldd	[%sp + CCFSZ + 48], %i0	! %o0 for window R, etc
1913	ldd	[%sp + CCFSZ + 56], %i2
1914	ldd	[%sp + CCFSZ + 64], %i4
1915	wr	%l0, 0, %psr		! disable traps: test must be atomic
1916	ldd	[%sp + CCFSZ + 72], %i6
1917	sethi	%hi(_cpcb), %l6
1918	ld	[%l6 + %lo(_cpcb)], %l6
1919	ld	[%l6 + PCB_NSAVED], %l7	! if nsaved is -1, we have our regs
1920	tst	%l7
1921	bl,a	1f			! got them
1922	 wr	%g0, 0, %wim		! allow us to enter windows R, I
1923	b,a	return_from_trap
1924
1925	/*
1926	 * Got 'em.  Load 'em up.
1927	 */
19281:
1929	mov	%g6, %l3		! save %g6; set %g6 = cpcb
1930	mov	%l6, %g6
1931	st	%g0, [%g6 + PCB_NSAVED]	! and clear magic flag
1932	restore				! from T to R
1933	restore				! from R to I
1934	restore	%g0, 1, %l1		! from I to X, then %l1 = 1
1935	rd	%psr, %l0		! cwp = %psr;
1936	sll	%l1, %l0, %l1
1937	wr	%l1, 0, %wim		! make window X invalid
1938	and	%l0, 31, %l0
1939	st	%l0, [%g6 + PCB_WIM]	! cpcb->pcb_wim = cwp;
1940	nop				! unnecessary? old wim was 0...
1941	save	%g0, %g0, %g0		! back to I
1942	LOADWIN(%g6 + PCB_RW + 64)	! load from rw[1]
1943	save	%g0, %g0, %g0		! back to R
1944	LOADWIN(%g6 + PCB_RW)		! load from rw[0]
1945	save	%g0, %g0, %g0		! back to T
1946	wr	%l0, 0, %psr		! restore condition codes
1947	mov	%l3, %g6		! fix %g6
1948	RETT
1949
1950	/*
1951	 * Restoring from user stack, but everything has checked out
1952	 * as good.  We are now in window X, and %l1 = 1.  Window R
1953	 * is still valid and holds user values.
1954	 */
1955winuf_ok:
1956	rd	%psr, %l0
1957	sll	%l1, %l0, %l1
1958	wr	%l1, 0, %wim		! make this one invalid
1959	sethi	%hi(_cpcb), %l2
1960	ld	[%l2 + %lo(_cpcb)], %l2
1961	and	%l0, 31, %l0
1962	st	%l0, [%l2 + PCB_WIM]	! cpcb->pcb_wim = cwp;
1963	save	%g0, %g0, %g0		! back to I
1964	LOADWIN(%sp)
1965	save	%g0, %g0, %g0		! back to R
1966	save	%g0, %g0, %g0		! back to T
1967	wr	%l0, 0, %psr		! restore condition codes
1968	nop				! it takes three to tangle
1969	RETT
1970#endif /* end `real' version of window underflow trap handler */
1971
1972/*
1973 * Various return-from-trap routines (see return_from_trap).
1974 */
1975
1976/*
1977 * Return from trap, to kernel.
1978 *	%l0 = %psr
1979 *	%l1 = return pc
1980 *	%l2 = return npc
1981 *	%l4 = %wim
1982 *	%l5 = bit for previous window
1983 */
1984rft_kernel:
1985	btst	%l5, %l4		! if (wim & l5)
1986	bnz	1f			!	goto reload;
1987	 wr	%l0, 0, %psr		! but first put !@#*% cond codes back
1988
1989	/* previous window is valid; just rett */
1990	nop				! wait for cond codes to settle in
1991	RETT
1992
1993	/*
1994	 * Previous window is invalid.
1995	 * Update %wim and then reload l0..i7 from frame.
1996	 *
1997	 *	  T I X
1998	 *	0 0 1 0 0   (%wim)
1999	 * [see picture in window_uf handler]
2000	 *
2001	 * T is the current (Trap) window, I is the Invalid window,
2002	 * and X is the window we want to make invalid.  Window X
2003	 * currently has no useful values.
2004	 */
20051:
2006	wr	%g0, 0, %wim		! allow us to enter window I
2007	nop; nop; nop			! (it takes a while)
2008	restore				! enter window I
2009	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
2010	rd	%psr, %l0		! CWP = %psr & 31;
2011	and	%l0, 31, %l0
2012	sll	%l1, %l0, %l1		! wim = 1 << CWP;
2013	wr	%l1, 0, %wim		! setwim(wim);
2014	sethi	%hi(_cpcb), %l1
2015	ld	[%l1 + %lo(_cpcb)], %l1
2016	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = l0 & 31;
2017	save	%g0, %g0, %g0		! back to window I
2018	LOADWIN(%sp)
2019	save	%g0, %g0, %g0		! back to window T
2020	/*
2021	 * Note that the condition codes are still set from
2022	 * the code at rft_kernel; we can simply return.
2023	 */
2024	RETT
2025
2026/*
2027 * Return from trap, to user.  Checks for scheduling trap (`ast') first;
2028 * will re-enter trap() if set.  Note that we may have to switch from
2029 * the interrupt stack to the kernel stack in this case.
2030 *	%l0 = %psr
2031 *	%l1 = return pc
2032 *	%l2 = return npc
2033 *	%l4 = %wim
2034 *	%l5 = bit for previous window
2035 *	%l6 = cpcb
2036 * If returning to a valid window, just set psr and return.
2037 */
2038rft_user:
2039!	sethi	%hi(_want_ast), %l7	! (done below)
2040	ld	[%l7 + %lo(_want_ast)], %l7
2041	tst	%l7			! want AST trap?
2042	bne,a	softtrap		! yes, re-enter trap with type T_AST
2043	 mov	T_AST, %o0
2044
2045	btst	%l5, %l4		! if (wim & l5)
2046	bnz	1f			!	goto reload;
2047	 wr	%l0, 0, %psr		! restore cond codes
2048	nop				! (three instruction delay)
2049	RETT
2050
2051	/*
2052	 * Previous window is invalid.
2053	 * Before we try to load it, we must verify its stack pointer.
2054	 * This is much like the underflow handler, but a bit easier
2055	 * since we can use our own local registers.
2056	 */
20571:
2058	btst	7, %fp			! if unaligned, address is invalid
2059	bne	rft_invalid
2060	 EMPTY
2061
2062	PTE_OF_ADDR(%fp, %l7, rft_invalid)
2063	CMP_PTE_USER_READ(%l7)		! try first page
2064	bne	rft_invalid		! no good
2065	 EMPTY
2066	SLT_IF_1PAGE_RW(%fp, %l7)
2067	bl,a	rft_user_ok		! only 1 page: ok
2068	 wr	%g0, 0, %wim
2069	add	%fp, 7*8, %l5
2070	PTE_OF_ADDR(%l5, %l7, rft_invalid)
2071	CMP_PTE_USER_READ(%l7)		! check 2nd page too
2072	be,a	rft_user_ok
2073	 wr	%g0, 0, %wim
2074
2075	/*
2076	 * The window we wanted to pull could not be pulled.  Instead,
2077	 * re-enter trap with type T_RWRET.  This will pull the window
2078	 * into cpcb->pcb_rw[0] and set cpcb->pcb_nsaved to -1, which we
2079	 * will detect when we try to return again.
2080	 */
2081rft_invalid:
2082	b	softtrap
2083	 mov	T_RWRET, %o0
2084
2085	/*
2086	 * The window we want to pull can be pulled directly.
2087	 */
2088rft_user_ok:
2089!	wr	%g0, 0, %wim		! allow us to get into it
2090	wr	%l0, 0, %psr		! fix up the cond codes now
2091	nop; nop; nop
2092	restore				! enter window I
2093	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
2094	rd	%psr, %l0		! l0 = (junk << 5) + CWP;
2095	sll	%l1, %l0, %l1		! %wim = 1 << CWP;
2096	wr	%l1, 0, %wim
2097	sethi	%hi(_cpcb), %l1
2098	ld	[%l1 + %lo(_cpcb)], %l1
2099	and	%l0, 31, %l0
2100	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = l0 & 31;
2101	save	%g0, %g0, %g0		! back to window I
2102	LOADWIN(%sp)			! suck hard
2103	save	%g0, %g0, %g0		! back to window T
2104	RETT
2105
2106/*
2107 * Return from trap.  Entered after a
2108 *	wr	%l0, 0, %psr
2109 * which disables traps so that we can rett; registers are:
2110 *
2111 *	%l0 = %psr
2112 *	%l1 = return pc
2113 *	%l2 = return npc
2114 *
2115 * (%l3..%l7 anything).
2116 *
2117 * If we are returning to user code, we must:
2118 *  1.  Check for register windows in the pcb that belong on the stack.
2119 *	If there are any, reenter trap with type T_WINOF.
2120 *  2.  Make sure the register windows will not underflow.  This is
2121 *	much easier in kernel mode....
2122 */
2123return_from_trap:
2124!	wr	%l0, 0, %psr		! disable traps so we can rett
2125! (someone else did this already)
2126	and	%l0, 31, %l5
2127	set	wmask, %l6
2128	ldub	[%l6 + %l5], %l5	! %l5 = 1 << ((CWP + 1) % nwindows)
2129	btst	PSR_PS, %l0		! returning to userland?
2130	bnz	rft_kernel		! no, go return to kernel
2131	 rd	%wim, %l4		! (read %wim in any case)
2132
2133rft_user_or_recover_pcb_windows:
2134	/*
2135	 * (entered with %l4=%wim, %l5=wmask[cwp]; %l0..%l2 as usual)
2136	 *
2137	 * check cpcb->pcb_nsaved:
2138	 * if 0, do a `normal' return to user (see rft_user);
2139	 * if > 0, cpcb->pcb_rw[] holds registers to be copied to stack;
2140	 * if -1, cpcb->pcb_rw[0] holds user registers for rett window
2141	 * from an earlier T_RWRET pseudo-trap.
2142	 */
2143	sethi	%hi(_cpcb), %l6
2144	ld	[%l6 + %lo(_cpcb)], %l6
2145	ld	[%l6 + PCB_NSAVED], %l7
2146	tst	%l7
2147	bz,a	rft_user
2148	 sethi	%hi(_want_ast), %l7	! first instr of rft_user
2149
2150	bg,a	softtrap		! if (pcb_nsaved > 0)
2151	 mov	T_WINOF, %o0		!	trap(T_WINOF);
2152
2153	/*
2154	 * To get here, we must have tried to return from a previous
2155	 * trap and discovered that it would cause a window underflow.
2156	 * We then must have tried to pull the registers out of the
2157	 * user stack (from the address in %fp==%i6) and discovered
2158	 * that it was either unaligned or not loaded in memory, and
2159	 * therefore we ran a trap(T_RWRET), which loaded one set of
2160	 * registers into cpcb->pcb_pcb_rw[0] (if it had killed the
2161	 * process due to a bad stack, we would not be here).
2162	 *
2163	 * We want to load pcb_rw[0] into the previous window, which
2164	 * we know is currently invalid.  In other words, we want
2165	 * %wim to be 1 << ((cwp + 2) % nwindows).
2166	 */
2167	wr	%g0, 0, %wim		! enable restores
2168	mov	%g6, %l3		! save g6 in l3
2169	mov	%l6, %g6		! set g6 = &u
2170	st	%g0, [%g6 + PCB_NSAVED]	! clear cpcb->pcb_nsaved
2171	restore				! enter window I
2172	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
2173	rd	%psr, %l0
2174	sll	%l1, %l0, %l1		! %wim = 1 << CWP;
2175	wr	%l1, 0, %wim
2176	and	%l0, 31, %l0
2177	st	%l0, [%g6 + PCB_WIM]	! cpcb->pcb_wim = CWP;
2178	nop				! unnecessary? old wim was 0...
2179	save	%g0, %g0, %g0		! back to window I
2180	LOADWIN(%g6 + PCB_RW)
2181	save	%g0, %g0, %g0		! back to window T (trap window)
2182	wr	%l0, 0, %psr		! cond codes, cond codes everywhere
2183	mov	%l3, %g6		! restore g6
2184	RETT
2185
2186! exported end marker for kernel gdb
2187	.globl	_endtrapcode
2188_endtrapcode:
2189
2190/*
2191 * init_tables(nwin) int nwin;
2192 *
2193 * Set up the uwtab and wmask tables.
2194 * We know nwin > 1.
2195 */
2196init_tables:
2197	/*
2198	 * for (i = -nwin, j = nwin - 2; ++i < 0; j--)
2199	 *	uwtab[i] = j;
2200	 * (loop runs at least once)
2201	 */
2202	set	uwtab, %o3
2203	sub	%g0, %o0, %o1		! i = -nwin + 1
2204	inc	%o1
2205	add	%o0, -2, %o2		! j = nwin - 2;
22060:
2207	stb	%o2, [%o3 + %o1]	! uwtab[i] = j;
22081:
2209	inccc	%o1			! ++i < 0?
2210	bl	0b			! yes, continue loop
2211	 dec	%o2			! in any case, j--
2212
2213	/*
2214	 * (i now equals 0)
2215	 * for (j = nwin - 1; i < nwin; i++, j--)
2216	 *	uwtab[i] = j;
2217	 * (loop runs at least twice)
2218	 */
2219	sub	%o0, 1, %o2		! j = nwin - 1
22200:
2221	stb	%o2, [%o3 + %o1]	! uwtab[i] = j
2222	inc	%o1			! i++
22231:
2224	cmp	%o1, %o0		! i < nwin?
2225	bl	0b			! yes, continue
2226	 dec	%o2			! in any case, j--
2227
2228	/*
2229	 * We observe that, for i in 0..nwin-2, (i+1)%nwin == i+1;
2230	 * for i==nwin-1, (i+1)%nwin == 0.
2231	 * To avoid adding 1, we run i from 1 to nwin and set
2232	 * wmask[i-1].
2233	 *
2234	 * for (i = j = 1; i < nwin; i++) {
2235	 *	j <<= 1;	(j now == 1 << i)
2236	 *	wmask[i - 1] = j;
2237	 * }
2238	 * (loop runs at least once)
2239	 */
2240	set	wmask - 1, %o3
2241	mov	1, %o1			! i = 1;
2242	mov	2, %o2			! j = 2;
22430:
2244	stb	%o2, [%o3 + %o1]	! (wmask - 1)[i] = j;
2245	inc	%o1			! i++
2246	cmp	%o1, %o0		! i < nwin?
2247	bl,a	0b			! yes, continue
2248	 sll	%o2, 1, %o2		! (and j <<= 1)
2249
2250	/*
2251	 * Now i==nwin, so we want wmask[i-1] = 1.
2252	 */
2253	mov	1, %o2			! j = 1;
2254	retl
2255	 stb	%o2, [%o3 + %o1]	! (wmask - 1)[i] = j;
2256
2257dostart:
2258	rd	%psr, %l0		! paranoia: make sure ...
2259	andn	%l0, PSR_ET, %l0	! we have traps off
2260	wr	%l0, 0, %psr		! so that we can fiddle safely
2261	nop; nop; nop
2262
2263	/*
2264	 * Startup.
2265	 *
2266	 * We have been loaded in low RAM, at some address which
2267	 * is page aligned (0x4000 actually) rather than where we
2268	 * want to run (0xf8004000).  Until we get everything set,
2269	 * we have to be sure to use only pc-relative addressing.
2270	 */
2271
2272	wr	%g0, 0, %wim		! make sure we can set psr
2273	mov	%o0, %g7		! save prom vector pointer
2274	nop; nop
2275	wr	%g0, PSR_S|PSR_PS|PSR_PIL, %psr	! set initial psr
2276	set	AC_CONTEXT, %g1		! paranoia: set context to kernel
2277	stba	%g0, [%g1] ASI_CONTROL
2278	wr	%g0, 2, %wim		! set initial %wim (w1 invalid)
2279	mov	1, %g1			! set pcb_wim (log2(%wim) = 1)
2280	sethi	%hi(_u0 - KERNBASE + PCB_WIM), %g2
2281	st	%g1, [%g2 + %lo(_u0 - KERNBASE + PCB_WIM)]
2282
2283	/*
2284	 * Step 1: double map low RAM (addresses [0.._end-start-1])
2285	 * to KERNBASE (addresses [KERNBASE.._end-1]).  None of these
2286	 * are `bad' aliases (since they are all on segment boundaries)
2287	 * so we do not have to worry about cache aliasing.
2288	 *
2289	 * We map in another couple of segments just to have some
2290	 * more memory (512K, actually) guaranteed available for
2291	 * bootstrap code (pmap_bootstrap needs memory to hold MMU
2292	 * and context data structures).
2293	 */
2294	clr	%l0			! lowva
2295	set	KERNBASE, %l1		! highva
2296	set	_end + (2 << 18), %l2	! last va that must be remapped
2297	set	1 << 18, %l3		! segment size in bytes
22980:
2299	lduba	[%l0] ASI_SEGMAP, %l4	! segmap[highva] = segmap[lowva];
2300	stba	%l4, [%l1] ASI_SEGMAP
2301	add	%l3, %l1, %l1		! highva += segsiz;
2302	cmp	%l1, %l2		! done?
2303	bl	0b			! no, loop
2304	 add	%l3, %l0, %l0		! (and lowva += segsz)
2305
2306	/*
2307	 * Now map the interrupt enable register and clear any interrupts,
2308	 * enabling NMIs.  Note that we will not take NMIs until we change
2309	 * %tbr.
2310	 */
2311	set	IE_reg_addr, %l0
2312	set	IE_REG_PTE, %l1
2313	sta	%l1, [%l0] ASI_PTE
2314	mov	IE_ALLIE, %l1
2315	nop; nop			! paranoia
2316	stb	%l1, [%l0]
2317
2318	/*
2319	 * All set, fix pc and npc.  Once we are where we should be,
2320	 * we can give ourselves a stack and enable traps.
2321	 */
2322	set	1f, %l0
2323	jmp	%l0
2324	 nop
23251:
2326	set	USRSTACK - CCFSZ, %fp	! as if called from user code
2327	set	estack0 - CCFSZ - 80, %sp ! via syscall(boot_me_up) or somesuch
2328	rd	%psr, %l0
2329	wr	%l0, PSR_ET, %psr
2330
2331	/*
2332	 * Step 2: clear BSS.  This may just be paranoia; the boot
2333	 * loader might already do it for us; but what the hell.
2334	 */
2335	set	_edata, %o0		! bzero(edata, end - edata)
2336	set	_end, %o1
2337	call	_bzero
2338	 sub	%o1, %o0, %o1
2339
2340	/*
2341	 * Stash prom vectors now, after bzero, as it lives in bss
2342	 * (which we just zeroed).
2343	 * This depends on the fact that bzero does not use %g7.
2344	 */
2345	sethi	%hi(_promvec), %l0
2346	st	%g7, [%l0 + %lo(_promvec)]
2347
2348	/*
2349	 * Step 3: compute number of windows and set up tables.
2350	 * We could do some of this later.
2351	 */
2352	save	%sp, -64, %sp
2353	rd	%psr, %g1
2354	restore
2355	and	%g1, 31, %g1		! want just the CWP bits
2356	add	%g1, 1, %o0		! compute nwindows
2357	sethi	%hi(_nwindows), %o1	! may as well tell everyone
2358	call	init_tables
2359	 st	%o0, [%o1 + %lo(_nwindows)]
2360
2361	/*
2362	 * Step 4: change the trap base register, now that our trap handlers
2363	 * will function (they need the tables we just set up).
2364	 */
2365	set	_trapbase, %l0
2366	wr	%l0, 0, %tbr
2367	nop				! paranoia
2368
2369	/*
2370	 * Ready to run C code; finish bootstrap.
2371	 */
2372	call	_bootstrap
2373	 nop
2374
2375	/*
2376	 * Call main.  This returns to us after loading /sbin/init into
2377	 * user space.  (If the exec fails, main() does not return.)
2378	 */
2379	call	_main
2380	 clr	%o0			! our frame arg is ignored
2381
2382	/*
2383	 * Here we finish up as in syscall, but simplified.  We need to
2384	 * fiddle pc and npc a bit, as execve() / setregs() have only set
2385	 * npc, in anticipation that trap.c will advance past the trap
2386	 * instruction; but we bypass that, so we must do it manually.
2387	 */
2388	mov	PSR_S, %l0		! user psr (no need to load it)
2389	ld	[%sp + CCFSZ + 8], %l1	! pc = npc from execve
2390	b	init_syscall_ret
2391	 add	%l1, 4, %l2		! npc = pc+4
2392
2393/*
2394 * The following code is copied to the top of the user stack when each
2395 * process is exec'ed, and signals are `trampolined' off it.
2396 *
2397 * When this code is run, the stack looks like:
2398 *	[%sp]		64 bytes to which registers can be dumped
2399 *	[%sp + 64]	signal number (goes in %o0)
2400 *	[%sp + 64 + 4]	signal code (goes in %o1)
2401 *	[%sp + 64 + 8]	placeholder
2402 *	[%sp + 64 + 12]	argument for %o3, currently unsupported (always 0)
2403 *	[%sp + 64 + 16]	first word of saved state (sigcontext)
2404 *	    .
2405 *	    .
2406 *	    .
2407 *	[%sp + NNN]	last word of saved state
2408 * (followed by previous stack contents or top of signal stack).
2409 * The address of the function to call is in %g1; the old %g1 and %o0
2410 * have already been saved in the sigcontext.  We are running in a clean
2411 * window, all previous windows now being saved to the stack.
2412 *
2413 * Note that [%sp + 64 + 8] == %sp + 64 + 16.  The copy at %sp+64+8
2414 * will eventually be removed, with a hole left in its place, if things
2415 * work out.
2416 */
2417	.globl	_sigcode
2418	.globl	_esigcode
2419_sigcode:
2420	/*
2421	 * XXX  the `save' and `restore' below are unnecessary: should
2422	 *	replace with simple arithmetic on %sp
2423	 *
2424	 * Make room on the stack for 32 %f registers + %fsr.  This comes
2425	 * out to 33*4 or 132 bytes, but this must be aligned to a multiple
2426	 * of 8, or 136 bytes.
2427	 */
2428	save	%sp, -CCFSZ - 136, %sp
2429	mov	%g2, %l2		! save globals in %l registers
2430	mov	%g3, %l3
2431	mov	%g4, %l4
2432	mov	%g5, %l5
2433	mov	%g6, %l6
2434	mov	%g7, %l7
2435	/*
2436	 * Saving the fpu registers is expensive, so do it iff the fsr
2437	 * stored in the sigcontext shows that the fpu is enabled.
2438	 */
2439	ld	[%fp + 64 + 16 + SC_PSR_OFFSET], %l0
2440	sethi	%hi(PSR_EF), %l1	! FPU enable bit is too high for andcc
2441	andcc	%l0, %l1, %l0		! %l0 = fpu enable bit
2442	be	1f			! if not set, skip the saves
2443	 rd	%y, %l1			! in any case, save %y
2444
2445	! fpu is enabled, oh well
2446	st	%fsr, [%sp + CCFSZ + 0]
2447	std	%f0, [%sp + CCFSZ + 8]
2448	std	%f2, [%sp + CCFSZ + 16]
2449	std	%f4, [%sp + CCFSZ + 24]
2450	std	%f6, [%sp + CCFSZ + 32]
2451	std	%f8, [%sp + CCFSZ + 40]
2452	std	%f10, [%sp + CCFSZ + 48]
2453	std	%f12, [%sp + CCFSZ + 56]
2454	std	%f14, [%sp + CCFSZ + 64]
2455	std	%f16, [%sp + CCFSZ + 72]
2456	std	%f18, [%sp + CCFSZ + 80]
2457	std	%f20, [%sp + CCFSZ + 88]
2458	std	%f22, [%sp + CCFSZ + 96]
2459	std	%f24, [%sp + CCFSZ + 104]
2460	std	%f26, [%sp + CCFSZ + 112]
2461	std	%f28, [%sp + CCFSZ + 120]
2462	std	%f30, [%sp + CCFSZ + 128]
2463
24641:
2465	ldd	[%fp + 64], %o0		! sig, code
2466	ld	[%fp + 76], %o3		! arg3
2467	call	%g1			! (*sa->sa_handler)(sig,code,scp,arg3)
2468	 add	%fp, 64 + 16, %o2	! scp
2469
2470	/*
2471	 * Now that the handler has returned, re-establish all the state
2472	 * we just saved above, then do a sigreturn.
2473	 */
2474	tst	%l0			! reload fpu registers?
2475	be	1f			! if not, skip the loads
2476	 wr	%l1, %g0, %y		! in any case, restore %y
2477
2478	ld	[%sp + CCFSZ + 0], %fsr
2479	ldd	[%sp + CCFSZ + 8], %f0
2480	ldd	[%sp + CCFSZ + 16], %f2
2481	ldd	[%sp + CCFSZ + 24], %f4
2482	ldd	[%sp + CCFSZ + 32], %f6
2483	ldd	[%sp + CCFSZ + 40], %f8
2484	ldd	[%sp + CCFSZ + 48], %f10
2485	ldd	[%sp + CCFSZ + 56], %f12
2486	ldd	[%sp + CCFSZ + 64], %f14
2487	ldd	[%sp + CCFSZ + 72], %f16
2488	ldd	[%sp + CCFSZ + 80], %f18
2489	ldd	[%sp + CCFSZ + 88], %f20
2490	ldd	[%sp + CCFSZ + 96], %f22
2491	ldd	[%sp + CCFSZ + 104], %f24
2492	ldd	[%sp + CCFSZ + 112], %f26
2493	ldd	[%sp + CCFSZ + 120], %f28
2494	ldd	[%sp + CCFSZ + 128], %f30
2495
24961:
2497	mov	%l2, %g2
2498	mov	%l3, %g3
2499	mov	%l4, %g4
2500	mov	%l5, %g5
2501	mov	%l6, %g6
2502	mov	%l7, %g7
2503
2504	restore	%g0, SYS_sigreturn, %g1	! get registers back & set syscall #
2505	add	%sp, 64 + 16, %o0	! compute scp
2506	t	ST_SYSCALL		! sigreturn(scp)
2507	! sigreturn does not return unless it fails
2508	mov	SYS_exit, %g1		! exit(errno)
2509	t	ST_SYSCALL
2510_esigcode:
2511
2512/*
2513 * Primitives
2514 */
2515
2516#ifdef GPROF
2517	.globl	mcount
2518#define	ENTRY(x) \
2519	.globl _##x; _##x: \
2520	save	%sp, -CCFSZ, %sp; \
2521	call	mcount; \
2522	nop; \
2523	restore
2524#else
2525#define	ENTRY(x)	.globl _##x; _##x:
2526#endif
2527#define	ALTENTRY(x)	.globl _##x; _##x:
2528
2529/*
2530 * copyinstr(fromaddr, toaddr, maxlength, &lencopied)
2531 *
2532 * Copy a null terminated string from the user address space into
2533 * the kernel address space.
2534 */
2535ENTRY(copyinstr)
2536	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
2537#ifdef DIAGNOSTIC
2538	tst	%o2			! kernel should never give maxlen <= 0
2539	ble	1f
2540	 EMPTY
2541#endif
2542	set	KERNBASE, %o4
2543	cmp	%o0, %o4		! fromaddr < KERNBASE?
2544	blu,a	Lcsdocopy		! yes, go do it
2545	sethi	%hi(_cpcb), %o4		! (first instr of copy)
2546
2547	b	Lcsdone			! no, return EFAULT
2548	 mov	EFAULT, %o0
2549
25501:
2551	sethi	%hi(2f), %o0
2552	call	_panic
2553	 or	%lo(2f), %o0, %o0
25542:	.asciz	"copyinstr"
2555	ALIGN
2556
2557/*
2558 * copyoutstr(fromaddr, toaddr, maxlength, &lencopied)
2559 *
2560 * Copy a null terminated string from the kernel
2561 * address space to the user address space.
2562 */
2563ENTRY(copyoutstr)
2564	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
2565#ifdef DIAGNOSTIC
2566	tst	%o2
2567	ble	1f
2568	 EMPTY
2569#endif
2570	set	KERNBASE, %o4
2571	cmp	%o1, %o4		! toaddr < KERNBASE?
2572	blu,a	Lcsdocopy		! yes, go do it
2573	 sethi	%hi(_cpcb), %o4		! (first instr of copy)
2574
2575	b	Lcsdone			! no, return EFAULT
2576	 mov	EFAULT, %o0
2577
25781:
2579	sethi	%hi(2f), %o0
2580	call	_panic
2581	 or	%lo(2f), %o0, %o0
25822:	.asciz	"copyoutstr"
2583	ALIGN
2584
2585Lcsdocopy:
2586!	sethi	%hi(_cpcb), %o4		! (done earlier)
2587	ld	[%o4 + %lo(_cpcb)], %o4	! catch faults
2588	set	Lcsfault, %o5
2589	st	%o5, [%o4 + PCB_ONFAULT]
2590
2591	mov	%o1, %o5		!	save = toaddr;
2592! XXX should do this in bigger chunks when possible
25930:					! loop:
2594	ldsb	[%o0], %g1		!	c = *fromaddr;
2595	tst	%g1
2596	stb	%g1, [%o1]		!	*toaddr++ = c;
2597	be	1f			!	if (c == NULL)
2598	 inc	%o1			!		goto ok;
2599	deccc	%o2			!	if (--len > 0) {
2600	bg	0b			!		fromaddr++;
2601	 inc	%o0			!		goto loop;
2602					!	}
2603					!
2604	b	Lcsdone			!	error = ENAMETOOLONG;
2605	 mov	ENAMETOOLONG, %o0	!	goto done;
26061:					! ok:
2607	clr	%o0			!    error = 0;
2608Lcsdone:				! done:
2609	sub	%o1, %o5, %o1		!	len = to - save;
2610	tst	%o3			!	if (lencopied)
2611	bnz,a	3f
2612	 st	%o1, [%o3]		!		*lencopied = len;
26133:
2614	retl				! cpcb->pcb_onfault = 0;
2615	 st	%g0, [%o4 + PCB_ONFAULT]! return (error);
2616
2617Lcsfault:
2618	b	Lcsdone			! error = EFAULT;
2619	 mov	EFAULT, %o0		! goto ret;
2620
2621/*
2622 * copystr(fromaddr, toaddr, maxlength, &lencopied)
2623 *
2624 * Copy a null terminated string from one point to another in
2625 * the kernel address space.  (This is a leaf procedure, but
2626 * it does not seem that way to the C compiler.)
2627 */
2628ENTRY(copystr)
2629#ifdef DIAGNOSTIC
2630	tst	%o2			! 	if (maxlength <= 0)
2631	ble	4f			!		panic(...);
2632	 EMPTY
2633#endif
2634	mov	%o1, %o5		!	to0 = to;
26350:					! loop:
2636	ldsb	[%o0], %o4		!	c = *from;
2637	tst	%o4
2638	stb	%o4, [%o1]		!	*to++ = c;
2639	be	1f			!	if (c == 0)
2640	 inc	%o1			!		goto ok;
2641	deccc	%o2			!	if (--len > 0) {
2642	bg,a	0b			!		from++;
2643	 inc	%o0			!		goto loop;
2644	b	2f			!	}
2645	 mov	ENAMETOOLONG, %o0	!	ret = ENAMETOOLONG; goto done;
26461:					! ok:
2647	clr	%o0			!	ret = 0;
26482:
2649	sub	%o1, %o5, %o1		!	len = to - to0;
2650	tst	%o3			!	if (lencopied)
2651	bnz,a	3f
2652	 st	%o1, [%o3]		!		*lencopied = len;
26533:
2654	retl
2655	 nop
2656#ifdef DIAGNOSTIC
26574:
2658	sethi	%hi(5f), %o0
2659	call	_panic
2660	 or	%lo(5f), %o0, %o0
26615:
2662	.asciz	"copystr"
2663	ALIGN
2664#endif
2665
2666/*
2667 * Copyin(src, dst, len)
2668 *
2669 * Copy specified amount of data from user space into the kernel.
2670 */
2671ENTRY(copyin)
2672	set	KERNBASE, %o3
2673	cmp	%o0, %o3		! src < KERNBASE?
2674	blu,a	Ldocopy			! yes, can try it
2675	 sethi	%hi(_cpcb), %o3
2676
2677	/* source address points into kernel space: return EFAULT */
2678	retl
2679	 mov	EFAULT, %o0
2680
2681/*
2682 * Copyout(src, dst, len)
2683 *
2684 * Copy specified amount of data from kernel to user space.
2685 * Just like copyin, except that the `dst' addresses are user space
2686 * rather than the `src' addresses.
2687 */
2688ENTRY(copyout)
2689	set	KERNBASE, %o3
2690	cmp	%o1, %o3		! dst < KERBASE?
2691	blu,a	Ldocopy
2692	 sethi	%hi(_cpcb), %o3
2693
2694	/* destination address points into kernel space: return EFAULT */
2695	retl
2696	 mov	EFAULT, %o0
2697
2698	/*
2699	 * ******NOTE****** this depends on bcopy() not using %g7
2700	 */
2701Ldocopy:
2702!	sethi	%hi(_cpcb), %o3
2703	ld	[%o3 + %lo(_cpcb)], %o3
2704	set	Lcopyfault, %o4
2705	mov	%o7, %g7		! save return address
2706	call	_bcopy			! bcopy(src, dst, len)
2707	 st	%o4, [%o3 + PCB_ONFAULT]
2708
2709	sethi	%hi(_cpcb), %o3
2710	ld	[%o3 + %lo(_cpcb)], %o3
2711	st	%g0, [%o3 + PCB_ONFAULT]
2712	jmp	%g7 + 8
2713	 clr	%o0			! return 0
2714
2715! Copyin or copyout fault.  Clear cpcb->pcb_onfault and return EFAULT.
2716! Note that although we were in bcopy, there is no state to clean up;
2717! the only special thing is that we have to return to [g7 + 8] rather than
2718! [o7 + 8].
2719Lcopyfault:
2720	sethi	%hi(_cpcb), %o3
2721	ld	[%o3 + %lo(_cpcb)], %o3
2722	st	%g0, [%o3 + PCB_ONFAULT]
2723	jmp	%g7 + 8
2724	 mov	EFAULT, %o0
2725
2726
2727/*
2728 * Write all user windows presently in the CPU back to the user's stack.
2729 * We just do `save' instructions until pcb_uw == 0.
2730 *
2731 *	p = cpcb;
2732 *	nsaves = 0;
2733 *	while (p->pcb_uw > 0)
2734 *		save(), nsaves++;
2735 *	while (--nsaves >= 0)
2736 *		restore();
2737 */
2738ENTRY(write_user_windows)
2739	sethi	%hi(_cpcb), %g6
2740	ld	[%g6 + %lo(_cpcb)], %g6
2741	b	2f
2742	 clr	%g5
27431:
2744	save	%sp, -64, %sp
27452:
2746	ld	[%g6 + PCB_UW], %g7
2747	tst	%g7
2748	bg,a	1b
2749	 inc	%g5
27503:
2751	deccc	%g5
2752	bge,a	3b
2753	 restore
2754	retl
2755	 nop
2756
2757
2758	.comm	_want_resched,4
2759/*
2760 * Masterpaddr is the p->p_addr of the last process on the processor.
2761 * XXX masterpaddr is almost the same as cpcb
2762 * XXX should delete this entirely
2763 */
2764	.comm	_masterpaddr, 4
2765
2766/*
2767 * Switch statistics (for later tweaking):
2768 *	nswitchdiff = p1 => p2 (i.e., chose different process)
2769 *	nswitchexit = number of calls to switchexit()
2770 *	_cnt.v_swtch = total calls to swtch+swtchexit
2771 */
2772	.comm	_nswitchdiff, 4
2773	.comm	_nswitchexit, 4
2774
2775/*
2776 * REGISTER USAGE IN cpu_switch AND switchexit:
2777 * This is split into two phases, more or less
2778 * `before we locate a new proc' and `after'.
2779 * Some values are the same in both phases.
2780 * Note that the %o0-registers are not preserved across
2781 * the psr change when entering a new process, since this
2782 * usually changes the CWP field (hence heavy usage of %g's).
2783 *
2784 *	%g1 = oldpsr (excluding ipl bits)
2785 *	%g2 = %hi(_whichqs); newpsr
2786 *	%g3 = p
2787 *	%g4 = lastproc
2788 *	%g5 = <free>; newpcb
2789 *	%g6 = %hi(_cpcb)
2790 *	%g7 = %hi(_curproc)
2791 *	%o0 = tmp 1
2792 *	%o1 = tmp 2
2793 *	%o2 = tmp 3
2794 *	%o3 = tmp 4; whichqs; vm
2795 *	%o4 = tmp 4; which; sswap
2796 *	%o5 = tmp 5; q; <free>
2797 */
2798
2799/*
2800 * switchexit is called only from cpu_exit() before the current process
2801 * has freed its kernel stack; we must free it.  (curproc is already NULL.)
2802 *
2803 * We lay the process to rest by changing to the `idle' kernel stack,
2804 * and note that the `last loaded process' is nonexistent.
2805 */
2806ENTRY(switchexit)
2807	mov	%o0, %g2		! save the
2808	mov	%o1, %g3		! ... three parameters
2809	mov	%o2, %g4		! ... to kmem_free
2810
2811	/*
2812	 * Change pcb to idle u. area, i.e., set %sp to top of stack
2813	 * and %psr to PSR_S|PSR_ET, and set cpcb to point to _idle_u.
2814	 * Once we have left the old stack, we can call kmem_free to
2815	 * destroy it.  Call it any sooner and the register windows
2816	 * go bye-bye.
2817	 */
2818	set	_idle_u, %g5
2819	sethi	%hi(_cpcb), %g6
2820	mov	1, %g7
2821	wr	%g0, PSR_S, %psr	! change to window 0, traps off
2822	wr	%g0, 2, %wim		! and make window 1 the trap window
2823	st	%g5, [%g6 + %lo(_cpcb)]	! cpcb = &idle_u
2824	st	%g7, [%g5 + PCB_WIM]	! idle_u.pcb_wim = log2(2) = 1
2825	set	_idle_u + UPAGES * NBPG - CCFSZ, %sp	! set new %sp
2826#ifdef DEBUG
2827	set	_idle_u, %l6
2828	SET_SP_REDZONE(%l6, %l5)
2829#endif
2830	wr	%g0, PSR_S|PSR_ET, %psr	! and then enable traps
2831	mov	%g2, %o0		! now ready to call kmem_free
2832	mov	%g3, %o1
2833	call	_kmem_free
2834	 mov	%g4, %o2
2835
2836	/*
2837	 * Now fall through to `the last switch'.  %g6 was set to
2838	 * %hi(_cpcb), but may have been clobbered in kmem_free,
2839	 * so all the registers described below will be set here.
2840	 *
2841	 * REGISTER USAGE AT THIS POINT:
2842	 *	%g1 = oldpsr (excluding ipl bits)
2843	 *	%g2 = %hi(_whichqs)
2844	 *	%g4 = lastproc
2845	 *	%g6 = %hi(_cpcb)
2846	 *	%g7 = %hi(_curproc)
2847	 *	%o0 = tmp 1
2848	 *	%o1 = tmp 2
2849	 *	%o3 = whichqs
2850	 */
2851
2852	INCR(_nswitchexit)		! nswitchexit++;
2853	INCR(_cnt+V_SWTCH)		! cnt.v_switch++;
2854
2855	mov	PSR_S|PSR_ET, %g1	! oldpsr = PSR_S | PSR_ET;
2856	sethi	%hi(_whichqs), %g2
2857	clr	%g4			! lastproc = NULL;
2858	sethi	%hi(_cpcb), %g6
2859	sethi	%hi(_curproc), %g7
2860	/* FALLTHROUGH */
2861
2862/*
2863 * When no processes are on the runq, switch
2864 * idles here watiing for something to come ready.
2865 * The registers are set up as noted above.
2866 */
2867	.globl	idle
2868idle:
2869	st	%g0, [%g7 + %lo(_curproc)] ! curproc = NULL;
2870	wr	%g1, 0, %psr		! (void) spl0();
28711:					! spin reading _whichqs until nonzero
2872	ld	[%g2 + %lo(_whichqs)], %o3
2873	tst	%o3
2874	bnz,a	Lsw_scan
2875	 wr	%g1, PIL_CLOCK << 8, %psr	! (void) splclock();
2876	b,a	1b
2877
2878Lsw_panic_rq:
2879	sethi	%hi(1f), %o0
2880	call	_panic
2881	 or	%lo(1f), %o0, %o0
2882Lsw_panic_wchan:
2883	sethi	%hi(2f), %o0
2884	call	_panic
2885	 or	%lo(2f), %o0, %o0
2886Lsw_panic_srun:
2887	sethi	%hi(3f), %o0
2888	call	_panic
2889	 or	%lo(3f), %o0, %o0
28901:	.asciz	"switch rq"
28912:	.asciz	"switch wchan"
28923:	.asciz	"switch SRUN"
2893	ALIGN
2894
2895/*
2896 * cpu_switch() picks a process to run and runs it, saving the current
2897 * one away.  On the assumption that (since most workstations are
2898 * single user machines) the chances are quite good that the new
2899 * process will turn out to be the current process, we defer saving
2900 * it here until we have found someone to load.  If that someone
2901 * is the current process we avoid both store and load.
2902 *
2903 * cpu_switch() is always entered at splstatclock or splhigh.
2904 *
2905 * IT MIGHT BE WORTH SAVING BEFORE ENTERING idle TO AVOID HAVING TO
2906 * SAVE LATER WHEN SOMEONE ELSE IS READY ... MUST MEASURE!
2907 */
2908	.globl	_runtime
2909	.globl	_time
2910ENTRY(cpu_switch)
2911	/*
2912	 * REGISTER USAGE AT THIS POINT:
2913	 *	%g1 = oldpsr (excluding ipl bits)
2914	 *	%g2 = %hi(_whichqs)
2915	 *	%g3 = p
2916	 *	%g4 = lastproc
2917	 *	%g5 = tmp 0
2918	 *	%g6 = %hi(_cpcb)
2919	 *	%g7 = %hi(_curproc)
2920	 *	%o0 = tmp 1
2921	 *	%o1 = tmp 2
2922	 *	%o2 = tmp 3
2923	 *	%o3 = tmp 4, then at Lsw_scan, whichqs
2924	 *	%o4 = tmp 5, then at Lsw_scan, which
2925	 *	%o5 = tmp 6, then at Lsw_scan, q
2926	 */
2927	sethi	%hi(_whichqs), %g2	! set up addr regs
2928	sethi	%hi(_cpcb), %g6
2929	ld	[%g6 + %lo(_cpcb)], %o0
2930	std	%o6, [%o0 + PCB_SP]	! cpcb->pcb_<sp,pc> = <sp,pc>;
2931	rd	%psr, %g1		! oldpsr = %psr;
2932	sethi	%hi(_curproc), %g7
2933	ld	[%g7 + %lo(_curproc)], %g4	! lastproc = curproc;
2934	st	%g1, [%o0 + PCB_PSR]	! cpcb->pcb_psr = oldpsr;
2935	andn	%g1, PSR_PIL, %g1	! oldpsr &= ~PSR_PIL;
2936
2937	/*
2938	 * In all the fiddling we did to get this far, the thing we are
2939	 * waiting for might have come ready, so let interrupts in briefly
2940	 * before checking for other processes.  Note that we still have
2941	 * curproc set---we have to fix this or we can get in trouble with
2942	 * the run queues below.
2943	 */
2944	st	%g0, [%g7 + %lo(_curproc)]	! curproc = NULL;
2945	wr	%g1, 0, %psr			! (void) spl0();
2946	nop; nop; nop				! paranoia
2947	wr	%g1, PIL_CLOCK <<8 , %psr	! (void) splclock();
2948
2949Lsw_scan:
2950	nop; nop; nop				! paranoia
2951	/*
2952	 * We're about to run a (possibly) new process.  Set runtime
2953	 * to indicate its start time.
2954	 */
2955	sethi	%hi(_time), %o0
2956	ldd	[%o0 + %lo(_time)], %o2
2957	sethi	%hi(_runtime), %o0
2958	std	%o2, [%o0 + %lo(_runtime)]
2959
2960	ld	[%g2 + %lo(_whichqs)], %o3
2961
2962	/*
2963	 * Optimized inline expansion of `which = ffs(whichqs) - 1';
2964	 * branches to idle if ffs(whichqs) was 0.
2965	 */
2966	set	ffstab, %o2
2967	andcc	%o3, 0xff, %o1		! byte 0 zero?
2968	bz,a	1f			! yes, try byte 1
2969	 srl	%o3, 8, %o0
2970	b	2f			! ffs = ffstab[byte0]; which = ffs - 1;
2971	 ldsb	[%o2 + %o1], %o0
29721:	andcc	%o0, 0xff, %o1		! byte 1 zero?
2973	bz,a	1f			! yes, try byte 2
2974	 srl	%o0, 8, %o0
2975	ldsb	[%o2 + %o1], %o0	! which = ffstab[byte1] + 7;
2976	b	3f
2977	 add	%o0, 7, %o4
29781:	andcc	%o0, 0xff, %o1		! byte 2 zero?
2979	bz,a	1f			! yes, try byte 3
2980	 srl	%o0, 8, %o0
2981	ldsb	[%o2 + %o1], %o0	! which = ffstab[byte2] + 15;
2982	b	3f
2983	 add	%o0, 15, %o4
29841:	ldsb	[%o2 + %o0], %o0	! ffs = ffstab[byte3] + 24
2985	addcc	%o0, 24, %o0		! (note that ffstab[0] == -24)
2986	bz	idle			! if answer was 0, go idle
2987	 EMPTY
29882:	sub	%o0, 1, %o4		! which = ffs(whichqs) - 1
29893:	/* end optimized inline expansion */
2990
2991	/*
2992	 * We found a nonempty run queue.  Take its first process.
2993	 */
2994	set	_qs, %o5		! q = &qs[which];
2995	sll	%o4, 3, %o0
2996	add	%o0, %o5, %o5
2997	ld	[%o5], %g3		! p = q->ph_link;
2998	cmp	%g3, %o5		! if (p == q)
2999	be	Lsw_panic_rq		!	panic("switch rq");
3000	 EMPTY
3001	ld	[%g3], %o0		! tmp0 = p->p_forw;
3002	st	%o0, [%o5]		! q->ph_link = tmp0;
3003	st	%o5, [%o0 + 4]		! tmp0->p_back = q;
3004	cmp	%o0, %o5		! if (tmp0 == q)
3005	bne	1f
3006	 EMPTY
3007	mov	1, %o1			!	whichqs &= ~(1 << which);
3008	sll	%o1, %o4, %o1
3009	andn	%o3, %o1, %o3
3010	st	%o3, [%g2 + %lo(_whichqs)]
30111:
3012	/*
3013	 * PHASE TWO: NEW REGISTER USAGE:
3014	 *	%g1 = oldpsr (excluding ipl bits)
3015	 *	%g2 = newpsr
3016	 *	%g3 = p
3017	 *	%g4 = lastproc
3018	 *	%g5 = newpcb
3019	 *	%g6 = %hi(_cpcb)
3020	 *	%g7 = %hi(_curproc)
3021	 *	%o0 = tmp 1
3022	 *	%o1 = tmp 2
3023	 *	%o2 = tmp 3
3024	 *	%o3 = vm
3025	 *	%o4 = sswap
3026	 *	%o5 = <free>
3027	 */
3028
3029	/* firewalls */
3030	ld	[%g3 + P_WCHAN], %o0	! if (p->p_wchan)
3031	tst	%o0
3032	bne	Lsw_panic_wchan		!	panic("switch wchan");
3033	 EMPTY
3034	ldsb	[%g3 + P_STAT], %o0	! if (p->p_stat != SRUN)
3035	cmp	%o0, SRUN
3036	bne	Lsw_panic_srun		!	panic("switch SRUN");
3037	 EMPTY
3038
3039	/*
3040	 * Committed to running process p.
3041	 * It may be the same as the one we were running before.
3042	 */
3043	sethi	%hi(_want_resched), %o0
3044	st	%g0, [%o0 + %lo(_want_resched)]	! want_resched = 0;
3045	ld	[%g3 + P_ADDR], %g5		! newpcb = p->p_addr;
3046	st	%g0, [%g3 + 4]			! p->p_back = NULL;
3047	ld	[%g5 + PCB_PSR], %g2		! newpsr = newpcb->pcb_psr;
3048	st	%g3, [%g7 + %lo(_curproc)]	! curproc = p;
3049
3050	cmp	%g3, %g4		! p == lastproc?
3051	be,a	Lsw_sameproc		! yes, go return 0
3052	 wr	%g2, 0, %psr		! (after restoring ipl)
3053
3054	/*
3055	 * Not the old process.  Save the old process, if any;
3056	 * then load p.
3057	 */
3058	tst	%g4
3059	be,a	Lsw_load		! if no old process, go load
3060	 wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr
3061
3062	INCR(_nswitchdiff)		! clobbers %o0,%o1
3063	/*
3064	 * save: write back all windows (including the current one).
3065	 * XXX	crude; knows nwindows <= 8
3066	 */
3067#define	SAVE save %sp, -64, %sp
3068	SAVE; SAVE; SAVE; SAVE; SAVE; SAVE; SAVE	/* 7 of each: */
3069	restore; restore; restore; restore; restore; restore; restore
3070
3071	/*
3072	 * Load the new process.  To load, we must change stacks and
3073	 * alter cpcb and %wim, hence we must disable traps.  %psr is
3074	 * currently equal to oldpsr (%g1) ^ (PIL_CLOCK << 8);
3075	 * this means that PSR_ET is on.  Likewise, PSR_ET is on
3076	 * in newpsr (%g2), although we do not know newpsr's ipl.
3077	 *
3078	 * We also must load up the `in' and `local' registers.
3079	 */
3080	wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr
3081Lsw_load:
3082!	wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr	! done above
3083	/* compute new wim */
3084	ld	[%g5 + PCB_WIM], %o0
3085	mov	1, %o1
3086	sll	%o1, %o0, %o0
3087	wr	%o0, 0, %wim		! %wim = 1 << newpcb->pcb_wim;
3088	/* now must not change %psr for 3 more instrs */
3089/*1*/	set	PSR_EF|PSR_EC, %o0
3090/*2*/	andn	%g2, %o0, %g2		! newpsr &= ~(PSR_EF|PSR_EC);
3091/*3*/	nop
3092	/* set new psr, but with traps disabled */
3093	wr	%g2, PSR_ET, %psr	! %psr = newpsr ^ PSR_ET;
3094	/* set new cpcb */
3095	st	%g5, [%g6 + %lo(_cpcb)]	! cpcb = newpcb;
3096	/* XXX update masterpaddr too */
3097	sethi	%hi(_masterpaddr), %g7
3098	st	%g5, [%g7 + %lo(_masterpaddr)]
3099	ldd	[%g5 + PCB_SP], %o6	! <sp,pc> = newpcb->pcb_<sp,pc>
3100	/* load window */
3101	ldd	[%sp + (0*8)], %l0
3102	ldd	[%sp + (1*8)], %l2
3103	ldd	[%sp + (2*8)], %l4
3104	ldd	[%sp + (3*8)], %l6
3105	ldd	[%sp + (4*8)], %i0
3106	ldd	[%sp + (5*8)], %i2
3107	ldd	[%sp + (6*8)], %i4
3108	ldd	[%sp + (7*8)], %i6
3109#ifdef DEBUG
3110	mov	%g5, %o0
3111	SET_SP_REDZONE(%o0, %o1)
3112	CHECK_SP_REDZONE(%o0, %o1)
3113#endif
3114	/* finally, enable traps */
3115	wr	%g2, 0, %psr		! psr = newpsr;
3116
3117	/*
3118	 * Now running p.  Make sure it has a context so that it
3119	 * can talk about user space stuff.  (Its pcb_uw is currently
3120	 * zero so it is safe to have interrupts going here.)
3121	 */
3122	ld	[%g3 + P_VMSPACE], %o3	! vm = p->p_vmspace;
3123	ld	[%o3 + VM_PMAP_CTX], %o0! if (vm->vm_pmap.pm_ctx != NULL)
3124	tst	%o0
3125	bnz,a	Lsw_havectx		!	goto havecontext;
3126	 ld	[%o3 + VM_PMAP_CTXNUM], %o0
3127
3128	/* p does not have a context: call ctx_alloc to get one */
3129	save	%sp, -CCFSZ, %sp
3130	call	_ctx_alloc		! ctx_alloc(&vm->vm_pmap);
3131	 add	%i3, VM_PMAP, %o0
3132	ret
3133	 restore
3134
3135	/* p does have a context: just switch to it */
3136Lsw_havectx:
3137!	ld	[%o3 + VM_PMAP_CTXNUM], %o0	! (done in delay slot)
3138	set	AC_CONTEXT, %o1
3139	stba	%o0, [%o1] ASI_CONTROL	! setcontext(vm->vm_pmap.pm_ctxnum);
3140	retl
3141	 nop
3142
3143Lsw_sameproc:
3144	/*
3145	 * We are resuming the process that was running at the
3146	 * call to switch().  Just set psr ipl and return.
3147	 */
3148!	wr	%g2, 0 %psr		! %psr = newpsr; (done earlier)
3149	nop
3150	retl
3151	 nop
3152
3153
3154/*
3155 * Snapshot the current process so that stack frames are up to date.
3156 * This is called from two places:
3157 *  - just before a crash dump, for the stack update;
3158 *  - in cpu_fork(), before copying the kernel stack.
3159 * In the latter case the pcb and stack will be copied to the child,
3160 * and the child will be made runnable.  Eventually switch() will run
3161 * it.  When it does, we want its pcb_pc set so that we can appear
3162 * to return 1 from cpu_fork(), so we store the current sp and psr
3163 * in the given pcb, and set its pcb_pc to our return-1 code (offset
3164 * by -8 due to call/ret conventions).  This is not useful in the crash
3165 * dump code but it is easiest to do it anyway.
3166 */
3167ENTRY(snapshot)
3168	st	%o6, [%o0 + PCB_SP]	! save sp
3169	set	1f - 8, %o1		! set child-return pc
3170	st	%o1, [%o0 + PCB_PC]
3171	rd	%psr, %o1		! save psr
3172	st	%o1, [%o0 + PCB_PSR]
3173
3174	/*
3175	 * Just like switch(); same XXX comments apply.
3176	 * 7 of each.  Minor tweak: the 7th restore is
3177	 * done after a ret.
3178	 */
3179	SAVE; SAVE; SAVE; SAVE; SAVE; SAVE; SAVE
3180	restore; restore; restore; restore; restore; restore; ret; restore
3181
31821:	/* this is reached only after a child gets chosen in switch() */
3183	mov	1, %i0			! return 1 from cpu_fork
3184	ret
3185	 restore
3186
3187/*
3188 * {fu,su}{,i}{byte,word}
3189 */
3190ALTENTRY(fuiword)
3191ENTRY(fuword)
3192	set	KERNBASE, %o2
3193	cmp	%o0, %o2		! if addr >= KERNBASE...
3194	bgeu	Lfsbadaddr
3195	EMPTY
3196	btst	3, %o0			! or has low bits set...
3197	bnz	Lfsbadaddr		!	go return -1
3198	EMPTY
3199	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3200	ld	[%o2 + %lo(_cpcb)], %o2
3201	set	Lfserr, %o3
3202	st	%o3, [%o2 + PCB_ONFAULT]
3203	ld	[%o0], %o0		! fetch the word
3204	retl				! phew, made it, return the word
3205	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3206
3207Lfserr:
3208	st	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
3209Lfsbadaddr:
3210	retl				! and return error indicator
3211	mov	-1, %o0
3212
3213	/*
3214	 * This is just like Lfserr, but it's a global label that allows
3215	 * mem_access_fault() to check to see that we don't want to try to
3216	 * page in the fault.  It's used by fuswintr() etc.
3217	 */
3218	.globl	_Lfsbail
3219_Lfsbail:
3220	st	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
3221	retl				! and return error indicator
3222	mov	-1, %o0
3223
3224	/*
3225	 * Like fusword but callable from interrupt context.
3226	 * Fails if data isn't resident.
3227	 */
3228ENTRY(fuswintr)
3229	set	KERNBASE, %o2
3230	cmp	%o0, %o2		! if addr >= KERNBASE
3231	bgeu	Lfsbadaddr		!	return error
3232	EMPTY
3233	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = _Lfsbail;
3234	ld	[%o2 + %lo(_cpcb)], %o2
3235	set	_Lfsbail, %o3
3236	st	%o3, [%o2 + PCB_ONFAULT]
3237	lduh	[%o0], %o0		! fetch the halfword
3238	retl				! made it
3239	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3240
3241ENTRY(fusword)
3242	set	KERNBASE, %o2
3243	cmp	%o0, %o2		! if addr >= KERNBASE
3244	bgeu	Lfsbadaddr		!	return error
3245	EMPTY
3246	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3247	ld	[%o2 + %lo(_cpcb)], %o2
3248	set	Lfserr, %o3
3249	st	%o3, [%o2 + PCB_ONFAULT]
3250	lduh	[%o0], %o0		! fetch the halfword
3251	retl				! made it
3252	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3253
3254ALTENTRY(fuibyte)
3255ENTRY(fubyte)
3256	set	KERNBASE, %o2
3257	cmp	%o0, %o2		! if addr >= KERNBASE
3258	bgeu	Lfsbadaddr		!	return error
3259	EMPTY
3260	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3261	ld	[%o2 + %lo(_cpcb)], %o2
3262	set	Lfserr, %o3
3263	st	%o3, [%o2 + PCB_ONFAULT]
3264	ldub	[%o0], %o0		! fetch the byte
3265	retl				! made it
3266	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3267
3268ALTENTRY(suiword)
3269ENTRY(suword)
3270	set	KERNBASE, %o2
3271	cmp	%o0, %o2		! if addr >= KERNBASE ...
3272	bgeu	Lfsbadaddr
3273	EMPTY
3274	btst	3, %o0			! or has low bits set ...
3275	bnz	Lfsbadaddr		!	go return error
3276	EMPTY
3277	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3278	ld	[%o2 + %lo(_cpcb)], %o2
3279	set	Lfserr, %o3
3280	st	%o3, [%o2 + PCB_ONFAULT]
3281	st	%o1, [%o0]		! store the word
3282	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3283	retl				! and return 0
3284	clr	%o0
3285
3286ENTRY(suswintr)
3287	set	KERNBASE, %o2
3288	cmp	%o0, %o2		! if addr >= KERNBASE
3289	bgeu	Lfsbadaddr		!	go return error
3290	EMPTY
3291	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = _Lfsbail;
3292	ld	[%o2 + %lo(_cpcb)], %o2
3293	set	_Lfsbail, %o3
3294	st	%o3, [%o2 + PCB_ONFAULT]
3295	sth	%o1, [%o0]		! store the halfword
3296	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3297	retl				! and return 0
3298	clr	%o0
3299
3300ENTRY(susword)
3301	set	KERNBASE, %o2
3302	cmp	%o0, %o2		! if addr >= KERNBASE
3303	bgeu	Lfsbadaddr		!	go return error
3304	EMPTY
3305	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3306	ld	[%o2 + %lo(_cpcb)], %o2
3307	set	Lfserr, %o3
3308	st	%o3, [%o2 + PCB_ONFAULT]
3309	sth	%o1, [%o0]		! store the halfword
3310	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3311	retl				! and return 0
3312	clr	%o0
3313
3314ALTENTRY(suibyte)
3315ENTRY(subyte)
3316	set	KERNBASE, %o2
3317	cmp	%o0, %o2		! if addr >= KERNBASE
3318	bgeu	Lfsbadaddr		!	go return error
3319	EMPTY
3320	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3321	ld	[%o2 + %lo(_cpcb)], %o2
3322	set	Lfserr, %o3
3323	st	%o3, [%o2 + PCB_ONFAULT]
3324	stb	%o1, [%o0]		! store the byte
3325	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3326	retl				! and return 0
3327	clr	%o0
3328
3329/* probeget and probeset are meant to be used during autoconfiguration */
3330
3331/*
3332 * probeget(addr, size) caddr_t addr; int size;
3333 *
3334 * Read or write a (byte,word,longword) from the given address.
3335 * Like {fu,su}{byte,halfword,word} but our caller is supposed
3336 * to know what he is doing... the address can be anywhere.
3337 *
3338 * We optimize for space, rather than time, here.
3339 */
3340ENTRY(probeget)
3341	! %o0 = addr, %o1 = (1,2,4)
3342	set	KERNBASE, %o2
3343	cmp	%o0, %o2		! if addr < KERNBASE
3344	blu	Lfsbadaddr		!	go return error
3345	 EMPTY
3346	sethi	%hi(_cpcb), %o2
3347	ld	[%o2 + %lo(_cpcb)], %o2	! cpcb->pcb_onfault = Lfserr;
3348	set	Lfserr, %o5
3349	st	%o5, [%o2 + PCB_ONFAULT]
3350	btst	1, %o1
3351	bnz,a	0f			! if (len & 1)
3352	 ldub	[%o0], %o0		!	value = *(char *)addr;
33530:	btst	2, %o1
3354	bnz,a	0f			! if (len & 2)
3355	 lduh	[%o0], %o0		!	value = *(short *)addr;
33560:	btst	4, %o1
3357	bnz,a	0f			! if (len & 4)
3358	 ld	[%o0], %o0		!	value = *(int *)addr;
33590:	retl				! made it, clear onfault and return
3360	 st	%g0, [%o2 + PCB_ONFAULT]
3361
3362/*
3363 * probeset(addr, size, val) caddr_t addr; int size, val;
3364 *
3365 * As above, but we return 0 on success.
3366 */
3367ENTRY(probeset)
3368	! %o0 = addr, %o1 = (1,2,4), %o2 = val
3369	set	KERNBASE, %o2
3370	cmp	%o0, %o2		! if addr < KERNBASE
3371	blu	Lfsbadaddr		!	go return error
3372	 EMPTY
3373	sethi	%hi(_cpcb), %o2
3374	ld	[%o2 + %lo(_cpcb)], %o2	! cpcb->pcb_onfault = Lfserr;
3375	set	Lfserr, %o5
3376	st	%o5, [%o2 + PCB_ONFAULT]
3377	btst	1, %o1
3378	bnz,a	0f			! if (len & 1)
3379	 stb	%o2, [%o0]		!	*(char *)addr = value;
33800:	btst	2, %o1
3381	bnz,a	0f			! if (len & 2)
3382	 sth	%o2, [%o0]		!	*(short *)addr = value;
33830:	btst	4, %o1
3384	bnz,a	0f			! if (len & 4)
3385	 st	%o2, [%o0]		!	*(int *)addr = value;
33860:	clr	%o0			! made it, clear onfault and return 0
3387	retl
3388	 st	%g0, [%o2 + PCB_ONFAULT]
3389
3390/*
3391 * Insert entry into doubly-linked queue.
3392 * We could just do this in C, but gcc does not do leaves well (yet).
3393 */
3394ENTRY(_insque)
3395	! %o0 = e = what to insert; %o1 = after = entry to insert after
3396	st	%o1, [%o0 + 4]		! e->prev = after;
3397	ld	[%o1], %o2		! tmp = after->next;
3398	st	%o2, [%o0]		! e->next = tmp;
3399	st	%o0, [%o1]		! after->next = e;
3400	retl
3401	st	%o0, [%o2 + 4]		! tmp->prev = e;
3402
3403
3404/*
3405 * Remove entry from doubly-linked queue.
3406 */
3407ENTRY(_remque)
3408	! %o0 = e = what to remove
3409	ld	[%o0], %o1		! n = e->next;
3410	ld	[%o0 + 4], %o2		! p = e->prev;
3411	st	%o2, [%o1 + 4]		! n->prev = p;
3412	retl
3413	st	%o1, [%o2]		! p->next = n;
3414
3415/*
3416 * copywords(src, dst, nbytes)
3417 *
3418 * Copy `nbytes' bytes from src to dst, both of which are word-aligned;
3419 * nbytes is a multiple of four.  It may, however, be zero, in which case
3420 * nothing is to be copied.
3421 */
3422ENTRY(copywords)
3423	! %o0 = src, %o1 = dst, %o2 = nbytes
3424	b	1f
3425	deccc	4, %o2
34260:
3427	st	%o3, [%o1 + %o2]
3428	deccc	4, %o2			! while ((n -= 4) >= 0)
34291:
3430	bge,a	0b			!    *(int *)(dst+n) = *(int *)(src+n);
3431	ld	[%o0 + %o2], %o3
3432	retl
3433	nop
3434
3435/*
3436 * qcopy(src, dst, nbytes)
3437 *
3438 * (q for `quad' or `quick', as opposed to b for byte/block copy)
3439 *
3440 * Just like copywords, but everything is multiples of 8.
3441 */
3442ENTRY(qcopy)
3443	b	1f
3444	deccc	8, %o2
34450:
3446	std	%o4, [%o1 + %o2]
3447	deccc	8, %o2
34481:
3449	bge,a	0b
3450	ldd	[%o0 + %o2], %o4
3451	retl
3452	nop
3453
3454/*
3455 * qzero(addr, nbytes)
3456 *
3457 * Zeroes `nbytes' bytes of a quad-aligned virtual address,
3458 * where nbytes is itself a multiple of 8.
3459 */
3460ENTRY(qzero)
3461	! %o0 = addr, %o1 = len (in bytes)
3462	clr	%g1
34630:
3464	deccc	8, %o1			! while ((n =- 8) >= 0)
3465	bge,a	0b
3466	std	%g0, [%o0 + %o1]	!	*(quad *)(addr + n) = 0;
3467	retl
3468	nop
3469
3470/*
3471 * bzero(addr, len)
3472 *
3473 * We should unroll the loop, but at the moment this would
3474 * gain nothing since the `std' instructions are what limits us.
3475 */
3476ALTENTRY(blkclr)
3477ENTRY(bzero)
3478	! %o0 = addr, %o1 = len
3479
3480	! Optimize a common case: addr and len are both multiples of 8.
3481	or	%o0, %o1, %o2
3482	btst	7, %o2			! ((addr | len) & 7) != 0?
3483	bnz	1f			! if so, cannot optimize
3484	clr	%g1			! in any case, we want g1=0
3485
3486	/* `Good' operands, can just store doubles. */
34870:
3488	deccc	8, %o1			! while ((len -= 8) >= 0)
3489	bge,a	0b
3490	std	%g0, [%o0 + %o1]	!	*(quad *)(addr + len) = 0;
3491	retl
3492	nop
3493
3494	/*
3495	 * Either the address is unaligned, or the count is not a
3496	 * multiple of 8, or both.  We will have to align the address
3497	 * in order to use anything `better' than stb.
3498	 */
34991:
3500	cmp	%o1, 15			! len >= 15?
3501	bge,a	Lstd			! yes, use std
3502	btst	1, %o0			! (but first check alignment)
3503
3504	! not enough to bother: do byte-at-a-time loop.
35052:
3506	deccc	%o1			! while (--len >= 0)
3507	bge,a	2b
3508	stb	%g0, [%o0 + %o1]	!	addr[len] = 0;
3509	retl
3510	nop
3511
3512Lstd:
3513	/*
3514	 * There are at least 15 bytes to zero.
3515	 * We may have to zero some initial stuff to align
3516	 * the address.
3517	 */
3518	bz,a	1f			! if (addr & 1) {
3519	btst	2, %o0
3520	stb	%g0, [%o0]		!	*addr = 0;
3521	inc	%o0			!	addr++;
3522	dec	%o1			!	len--;
3523	btst	2, %o0			! }
35241:
3525	bz,a	1f			! if (addr & 2) {
3526	btst	4, %o0
3527	sth	%g0, [%o0]		!	*(short *)addr = 0;
3528	inc	2, %o0			!	addr += 2;
3529	dec	2, %o1			!	len -= 2;
3530	btst	4, %o0			! }
35311:
3532	bz	1f			! if (addr & 4) {
3533	dec	8, %o1
3534	st	%g0, [%o0]		!	*(int *)addr = 0;
3535	inc	4, %o0			!	addr += 4;
3536	dec	4, %o1			!	len -= 4;
3537					! }
3538	/*
3539	 * Address is double word aligned; len is 8 less than
3540	 * the number of bytes remaining (i.e., len is 0 if
3541	 * the remaining count is 8, 1 if it is 9, etc.).
3542	 */
35431:
3544	std	%g0, [%o0]		! do {
35452:					!	*(quad *)addr = 0;
3546	inc	8, %o0			!	addr += 8;
3547	deccc	8, %o1			! } while ((len -= 8) >= 0);
3548	bge,a	2b
3549	std	%g0, [%o0]
3550
3551	/*
3552	 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
3553	 * -6 => two bytes, etc.  Mop up this remainder, if any.
3554	 */
3555	btst	4, %o1
3556	bz	1f			! if (len & 4) {
3557	btst	2, %o1
3558	st	%g0, [%o0]		!	*(int *)addr = 0;
3559	inc	4, %o0			!	addr += 4;
35601:
3561	bz	1f			! if (len & 2) {
3562	btst	1, %o1
3563	sth	%g0, [%o0]		!	*(short *)addr = 0;
3564	inc	2, %o0			!	addr += 2;
35651:
3566	bnz,a	1f			! if (len & 1)
3567	stb	%g0, [%o0]		!	*addr = 0;
35681:
3569	retl
3570	nop
3571
3572/*
3573 * kernel bcopy/memcpy
3574 * Assumes regions do not overlap; has no useful return value.
3575 *
3576 * Must not use %g7 (see copyin/copyout above).
3577 */
3578
3579#define	BCOPY_SMALL	32	/* if < 32, copy by bytes */
3580
3581ENTRY(memcpy)
3582	/*
3583	 * Swap args for bcopy.  Gcc generates calls to memcpy for
3584	 * structure assignments.
3585	 */
3586	mov	%o0, %o3
3587	mov	%o1, %o0
3588	mov	%o3, %o1
3589ENTRY(bcopy)
3590	cmp	%o2, BCOPY_SMALL
3591Lbcopy_start:
3592	bge,a	Lbcopy_fancy	! if >= this many, go be fancy.
3593	btst	7, %o0		! (part of being fancy)
3594
3595	/*
3596	 * Not much to copy, just do it a byte at a time.
3597	 */
3598	deccc	%o2		! while (--len >= 0)
3599	bl	1f
3600	EMPTY
36010:
3602	inc	%o0
3603	ldsb	[%o0 - 1], %o4	!	(++dst)[-1] = *src++;
3604	stb	%o4, [%o1]
3605	deccc	%o2
3606	bge	0b
3607	inc	%o1
36081:
3609	retl
3610	nop
3611	/* NOTREACHED */
3612
3613	/*
3614	 * Plenty of data to copy, so try to do it optimally.
3615	 */
3616Lbcopy_fancy:
3617	! check for common case first: everything lines up.
3618!	btst	7, %o0		! done already
3619	bne	1f
3620	EMPTY
3621	btst	7, %o1
3622	be,a	Lbcopy_doubles
3623	dec	8, %o2		! if all lined up, len -= 8, goto bcopy_doubes
3624
3625	! If the low bits match, we can make these line up.
36261:
3627	xor	%o0, %o1, %o3	! t = src ^ dst;
3628	btst	1, %o3		! if (t & 1) {
3629	be,a	1f
3630	btst	1, %o0		! [delay slot: if (src & 1)]
3631
3632	! low bits do not match, must copy by bytes.
36330:
3634	ldsb	[%o0], %o4	!	do {
3635	inc	%o0		!		(++dst)[-1] = *src++;
3636	inc	%o1
3637	deccc	%o2
3638	bnz	0b		!	} while (--len != 0);
3639	stb	%o4, [%o1 - 1]
3640	retl
3641	nop
3642	/* NOTREACHED */
3643
3644	! lowest bit matches, so we can copy by words, if nothing else
36451:
3646	be,a	1f		! if (src & 1) {
3647	btst	2, %o3		! [delay slot: if (t & 2)]
3648
3649	! although low bits match, both are 1: must copy 1 byte to align
3650	ldsb	[%o0], %o4	!	*dst++ = *src++;
3651	stb	%o4, [%o1]
3652	inc	%o0
3653	inc	%o1
3654	dec	%o2		!	len--;
3655	btst	2, %o3		! } [if (t & 2)]
36561:
3657	be,a	1f		! if (t & 2) {
3658	btst	2, %o0		! [delay slot: if (src & 2)]
3659	dec	2, %o2		!	len -= 2;
36600:
3661	ldsh	[%o0], %o4	!	do {
3662	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
3663	inc	2, %o0		!		dst += 2, src += 2;
3664	deccc	2, %o2		!	} while ((len -= 2) >= 0);
3665	bge	0b
3666	inc	2, %o1
3667	b	Lbcopy_mopb	!	goto mop_up_byte;
3668	btst	1, %o2		! } [delay slot: if (len & 1)]
3669	/* NOTREACHED */
3670
3671	! low two bits match, so we can copy by longwords
36721:
3673	be,a	1f		! if (src & 2) {
3674	btst	4, %o3		! [delay slot: if (t & 4)]
3675
3676	! although low 2 bits match, they are 10: must copy one short to align
3677	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
3678	sth	%o4, [%o1]
3679	inc	2, %o0		!	dst += 2;
3680	inc	2, %o1		!	src += 2;
3681	dec	2, %o2		!	len -= 2;
3682	btst	4, %o3		! } [if (t & 4)]
36831:
3684	be,a	1f		! if (t & 4) {
3685	btst	4, %o0		! [delay slot: if (src & 4)]
3686	dec	4, %o2		!	len -= 4;
36870:
3688	ld	[%o0], %o4	!	do {
3689	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
3690	inc	4, %o0		!		dst += 4, src += 4;
3691	deccc	4, %o2		!	} while ((len -= 4) >= 0);
3692	bge	0b
3693	inc	4, %o1
3694	b	Lbcopy_mopw	!	goto mop_up_word_and_byte;
3695	btst	2, %o2		! } [delay slot: if (len & 2)]
3696	/* NOTREACHED */
3697
3698	! low three bits match, so we can copy by doublewords
36991:
3700	be	1f		! if (src & 4) {
3701	dec	8, %o2		! [delay slot: len -= 8]
3702	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
3703	st	%o4, [%o1]
3704	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
3705	inc	4, %o1
3706	dec	4, %o2		! }
37071:
3708Lbcopy_doubles:
3709	ldd	[%o0], %o4	! do {
3710	std	%o4, [%o1]	!	*(double *)dst = *(double *)src;
3711	inc	8, %o0		!	dst += 8, src += 8;
3712	deccc	8, %o2		! } while ((len -= 8) >= 0);
3713	bge	Lbcopy_doubles
3714	inc	8, %o1
3715
3716	! check for a usual case again (save work)
3717	btst	7, %o2		! if ((len & 7) == 0)
3718	be	Lbcopy_done	!	goto bcopy_done;
3719
3720	btst	4, %o2		! if ((len & 4)) == 0)
3721	be,a	Lbcopy_mopw	!	goto mop_up_word_and_byte;
3722	btst	2, %o2		! [delay slot: if (len & 2)]
3723	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
3724	st	%o4, [%o1]
3725	inc	4, %o0		!	dst += 4;
3726	inc	4, %o1		!	src += 4;
3727	btst	2, %o2		! } [if (len & 2)]
3728
37291:
3730	! mop up trailing word (if present) and byte (if present).
3731Lbcopy_mopw:
3732	be	Lbcopy_mopb	! no word, go mop up byte
3733	btst	1, %o2		! [delay slot: if (len & 1)]
3734	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
3735	be	Lbcopy_done	! if ((len & 1) == 0) goto done;
3736	sth	%o4, [%o1]
3737	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
3738	retl
3739	stb	%o4, [%o1 + 2]
3740	/* NOTREACHED */
3741
3742	! mop up trailing byte (if present).
3743Lbcopy_mopb:
3744	bne,a	1f
3745	ldsb	[%o0], %o4
3746
3747Lbcopy_done:
3748	retl
3749	nop
3750
37511:
3752	retl
3753	stb	%o4,[%o1]
3754/*
3755 * ovbcopy(src, dst, len): like bcopy, but regions may overlap.
3756 */
3757ENTRY(ovbcopy)
3758	cmp	%o0, %o1	! src < dst?
3759	bgeu	Lbcopy_start	! no, go copy forwards as via bcopy
3760	cmp	%o2, BCOPY_SMALL! (check length for doublecopy first)
3761
3762	/*
3763	 * Since src comes before dst, and the regions might overlap,
3764	 * we have to do the copy starting at the end and working backwards.
3765	 */
3766	add	%o2, %o0, %o0	! src += len
3767	add	%o2, %o1, %o1	! dst += len
3768	bge,a	Lback_fancy	! if len >= BCOPY_SMALL, go be fancy
3769	btst	3, %o0
3770
3771	/*
3772	 * Not much to copy, just do it a byte at a time.
3773	 */
3774	deccc	%o2		! while (--len >= 0)
3775	bl	1f
3776	EMPTY
37770:
3778	dec	%o0		!	*--dst = *--src;
3779	ldsb	[%o0], %o4
3780	dec	%o1
3781	deccc	%o2
3782	bge	0b
3783	stb	%o4, [%o1]
37841:
3785	retl
3786	nop
3787
3788	/*
3789	 * Plenty to copy, try to be optimal.
3790	 * We only bother with word/halfword/byte copies here.
3791	 */
3792Lback_fancy:
3793!	btst	3, %o0		! done already
3794	bnz	1f		! if ((src & 3) == 0 &&
3795	btst	3, %o1		!     (dst & 3) == 0)
3796	bz,a	Lback_words	!	goto words;
3797	dec	4, %o2		! (done early for word copy)
3798
37991:
3800	/*
3801	 * See if the low bits match.
3802	 */
3803	xor	%o0, %o1, %o3	! t = src ^ dst;
3804	btst	1, %o3
3805	bz,a	3f		! if (t & 1) == 0, can do better
3806	btst	1, %o0
3807
3808	/*
3809	 * Nope; gotta do byte copy.
3810	 */
38112:
3812	dec	%o0		! do {
3813	ldsb	[%o0], %o4	!	*--dst = *--src;
3814	dec	%o1
3815	deccc	%o2		! } while (--len != 0);
3816	bnz	2b
3817	stb	%o4, [%o1]
3818	retl
3819	nop
3820
38213:
3822	/*
3823	 * Can do halfword or word copy, but might have to copy 1 byte first.
3824	 */
3825!	btst	1, %o0		! done earlier
3826	bz,a	4f		! if (src & 1) {	/* copy 1 byte */
3827	btst	2, %o3		! (done early)
3828	dec	%o0		!	*--dst = *--src;
3829	ldsb	[%o0], %o4
3830	dec	%o1
3831	stb	%o4, [%o1]
3832	dec	%o2		!	len--;
3833	btst	2, %o3		! }
3834
38354:
3836	/*
3837	 * See if we can do a word copy ((t&2) == 0).
3838	 */
3839!	btst	2, %o3		! done earlier
3840	bz,a	6f		! if (t & 2) == 0, can do word copy
3841	btst	2, %o0		! (src&2, done early)
3842
3843	/*
3844	 * Gotta do halfword copy.
3845	 */
3846	dec	2, %o2		! len -= 2;
38475:
3848	dec	2, %o0		! do {
3849	ldsh	[%o0], %o4	!	src -= 2;
3850	dec	2, %o1		!	dst -= 2;
3851	deccc	2, %o0		!	*(short *)dst = *(short *)src;
3852	bge	5b		! } while ((len -= 2) >= 0);
3853	sth	%o4, [%o1]
3854	b	Lback_mopb	! goto mop_up_byte;
3855	btst	1, %o2		! (len&1, done early)
3856
38576:
3858	/*
3859	 * We can do word copies, but we might have to copy
3860	 * one halfword first.
3861	 */
3862!	btst	2, %o0		! done already
3863	bz	7f		! if (src & 2) {
3864	dec	4, %o2		! (len -= 4, done early)
3865	dec	2, %o0		!	src -= 2, dst -= 2;
3866	ldsh	[%o0], %o4	!	*(short *)dst = *(short *)src;
3867	dec	2, %o1
3868	sth	%o4, [%o1]
3869	dec	2, %o2		!	len -= 2;
3870				! }
3871
38727:
3873Lback_words:
3874	/*
3875	 * Do word copies (backwards), then mop up trailing halfword
3876	 * and byte if any.
3877	 */
3878!	dec	4, %o2		! len -= 4, done already
38790:				! do {
3880	dec	4, %o0		!	src -= 4;
3881	dec	4, %o1		!	src -= 4;
3882	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
3883	deccc	4, %o2		! } while ((len -= 4) >= 0);
3884	bge	0b
3885	st	%o4, [%o1]
3886
3887	/*
3888	 * Check for trailing shortword.
3889	 */
3890	btst	2, %o2		! if (len & 2) {
3891	bz,a	1f
3892	btst	1, %o2		! (len&1, done early)
3893	dec	2, %o0		!	src -= 2, dst -= 2;
3894	ldsh	[%o0], %o4	!	*(short *)dst = *(short *)src;
3895	dec	2, %o1
3896	sth	%o4, [%o1]	! }
3897	btst	1, %o2
3898
3899	/*
3900	 * Check for trailing byte.
3901	 */
39021:
3903Lback_mopb:
3904!	btst	1, %o2		! (done already)
3905	bnz,a	1f		! if (len & 1) {
3906	ldsb	[%o0 - 1], %o4	!	b = src[-1];
3907	retl
3908	nop
39091:
3910	retl			!	dst[-1] = b;
3911	stb	%o4, [%o1 - 1]	! }
3912
3913
3914/*
3915 * savefpstate(f) struct fpstate *f;
3916 *
3917 * Store the current FPU state.  The first `st %fsr' may cause a trap;
3918 * our trap handler knows how to recover (by `returning' to savefpcont).
3919 */
3920ENTRY(savefpstate)
3921	rd	%psr, %o1		! enable FP before we begin
3922	set	PSR_EF, %o2
3923	or	%o1, %o2, %o1
3924	wr	%o1, 0, %psr
3925	/* do some setup work while we wait for PSR_EF to turn on */
3926	set	FSR_QNE, %o5		! QNE = 0x2000, too big for immediate
3927	clr	%o3			! qsize = 0;
3928	nop				! (still waiting for PSR_EF)
3929special_fp_store:
3930	st	%fsr, [%o0 + FS_FSR]	! f->fs_fsr = getfsr();
3931	/*
3932	 * Even if the preceding instruction did not trap, the queue
3933	 * is not necessarily empty: this state save might be happening
3934	 * because user code tried to store %fsr and took the FPU
3935	 * from `exception pending' mode to `exception' mode.
3936	 * So we still have to check the blasted QNE bit.
3937	 * With any luck it will usually not be set.
3938	 */
3939	ld	[%o0 + FS_FSR], %o4	! if (f->fs_fsr & QNE)
3940	btst	%o5, %o4
3941	bnz	Lfp_storeq		!	goto storeq;
3942	 std	%f0, [%o0 + FS_REGS + (4*0)]	! f->fs_f0 = etc;
3943Lfp_finish:
3944	st	%o3, [%o0 + FS_QSIZE]	! f->fs_qsize = qsize;
3945	std	%f2, [%o0 + FS_REGS + (4*2)]
3946	std	%f4, [%o0 + FS_REGS + (4*4)]
3947	std	%f6, [%o0 + FS_REGS + (4*6)]
3948	std	%f8, [%o0 + FS_REGS + (4*8)]
3949	std	%f10, [%o0 + FS_REGS + (4*10)]
3950	std	%f12, [%o0 + FS_REGS + (4*12)]
3951	std	%f14, [%o0 + FS_REGS + (4*14)]
3952	std	%f16, [%o0 + FS_REGS + (4*16)]
3953	std	%f18, [%o0 + FS_REGS + (4*18)]
3954	std	%f20, [%o0 + FS_REGS + (4*20)]
3955	std	%f22, [%o0 + FS_REGS + (4*22)]
3956	std	%f24, [%o0 + FS_REGS + (4*24)]
3957	std	%f26, [%o0 + FS_REGS + (4*26)]
3958	std	%f28, [%o0 + FS_REGS + (4*28)]
3959	retl
3960	 std	%f30, [%o0 + FS_REGS + (4*30)]
3961
3962/*
3963 * Store the (now known nonempty) FP queue.
3964 * We have to reread the fsr each time in order to get the new QNE bit.
3965 */
3966Lfp_storeq:
3967	add	%o0, FS_QUEUE, %o1	! q = &f->fs_queue[0];
39681:
3969	std	%fq, [%o1 + %o3]	! q[qsize++] = fsr_qfront();
3970	st	%fsr, [%o0 + FS_FSR]	! reread fsr
3971	ld	[%o0 + FS_FSR], %o4	! if fsr & QNE, loop
3972	btst	%o5, %o4
3973	bnz	1b
3974	 inc	8, %o3
3975	b	Lfp_finish		! set qsize and finish storing fregs
3976	 srl	%o3, 3, %o3		! (but first fix qsize)
3977
3978/*
3979 * The fsr store trapped.  Do it again; this time it will not trap.
3980 * We could just have the trap handler return to the `st %fsr', but
3981 * if for some reason it *does* trap, that would lock us into a tight
3982 * loop.  This way we panic instead.  Whoopee.
3983 */
3984savefpcont:
3985	b	special_fp_store + 4	! continue
3986	 st	%fsr, [%o0 + FS_FSR]	! but first finish the %fsr store
3987
3988/*
3989 * Load FPU state.
3990 */
3991ENTRY(loadfpstate)
3992	rd	%psr, %o1		! enable FP before we begin
3993	set	PSR_EF, %o2
3994	or	%o1, %o2, %o1
3995	wr	%o1, 0, %psr
3996	nop; nop; nop			! paranoia
3997	ldd	[%o0 + FS_REGS + (4*0)], %f0
3998	ldd	[%o0 + FS_REGS + (4*2)], %f2
3999	ldd	[%o0 + FS_REGS + (4*4)], %f4
4000	ldd	[%o0 + FS_REGS + (4*6)], %f6
4001	ldd	[%o0 + FS_REGS + (4*8)], %f8
4002	ldd	[%o0 + FS_REGS + (4*10)], %f10
4003	ldd	[%o0 + FS_REGS + (4*12)], %f12
4004	ldd	[%o0 + FS_REGS + (4*14)], %f14
4005	ldd	[%o0 + FS_REGS + (4*16)], %f16
4006	ldd	[%o0 + FS_REGS + (4*18)], %f18
4007	ldd	[%o0 + FS_REGS + (4*20)], %f20
4008	ldd	[%o0 + FS_REGS + (4*22)], %f22
4009	ldd	[%o0 + FS_REGS + (4*24)], %f24
4010	ldd	[%o0 + FS_REGS + (4*26)], %f26
4011	ldd	[%o0 + FS_REGS + (4*28)], %f28
4012	ldd	[%o0 + FS_REGS + (4*30)], %f30
4013	retl
4014	 ld	[%o0 + FS_FSR], %fsr	! setfsr(f->fs_fsr);
4015
4016/*
4017 * ienab_bis(bis) int bis;
4018 * ienab_bic(bic) int bic;
4019 *
4020 * Set and clear bits in the interrupt register.
4021 * Since there are no read-modify-write instructions for this,
4022 * and one of the interrupts is nonmaskable, we must disable traps.
4023 *
4024 * NB: ___main is defined here for gcc-2 idiocy.  Ignore it.
4025 */
4026ENTRY(ienab_bis)
4027	! %o0 = bits to set
4028	rd	%psr, %o2
4029	wr	%o2, PSR_ET, %psr	! disable traps
4030	nop; nop			! 3-instr delay until ET turns off
4031	sethi	%hi(IE_reg_addr), %o3
4032	ldub	[%o3 + %lo(IE_reg_addr)], %o4
4033	or	%o4, %o0, %o4		! *IE_reg_addr |= bis;
4034	stb	%o4, [%o3 + %lo(IE_reg_addr)]
4035	wr	%o2, 0, %psr		! reenable traps
4036	nop
4037	.globl	___main
4038___main:
4039	retl
4040	 nop
4041
4042ENTRY(ienab_bic)
4043	! %o0 = bits to clear
4044	rd	%psr, %o2
4045	wr	%o2, PSR_ET, %psr	! disable traps
4046	nop; nop
4047	sethi	%hi(IE_reg_addr), %o3
4048	ldub	[%o3 + %lo(IE_reg_addr)], %o4
4049	andn	%o4, %o0, %o4		! *IE_reg_addr &=~ bic;
4050	stb	%o4, [%o3 + %lo(IE_reg_addr)]
4051	wr	%o2, 0, %psr		! reenable traps
4052	nop
4053	retl
4054	 nop
4055
4056/*
4057 * ffs(), using table lookup.
4058 * The process switch code shares the table, so we just put the
4059 * whole thing here.
4060 */
4061ffstab:
4062	.byte	-24,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1 /* 00-0f */
4063	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 10-1f */
4064	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 20-2f */
4065	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 30-3f */
4066	.byte	7,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 40-4f */
4067	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 50-5f */
4068	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 60-6f */
4069	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 70-7f */
4070	.byte	8,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 80-8f */
4071	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 10-9f */
4072	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* a0-af */
4073	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* b0-bf */
4074	.byte	7,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* c0-cf */
4075	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* d0-df */
4076	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* e0-ef */
4077	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* f0-ff */
4078
4079/*
4080 * We use a table lookup on each byte.
4081 *
4082 * In each section below, %o1 is the current byte (0, 1, 2, or 3).
4083 * The last byte is handled specially: for the first three,
4084 * if that byte is nonzero, we return the table value
4085 * (plus 0, 8, or 16 for the byte number), but for the last
4086 * one, we just return the table value plus 24.  This means
4087 * that ffstab[0] must be -24 so that ffs(0) will return 0.
4088 */
4089ENTRY(ffs)
4090	set	ffstab, %o2
4091	andcc	%o0, 0xff, %o1	! get low byte
4092	bz,a	1f		! try again if 0
4093	srl	%o0, 8, %o0	! delay slot, get ready for next byte
4094
4095	retl			! return ffstab[%o1]
4096	ldsb	[%o2 + %o1], %o0
4097
40981:
4099	andcc	%o0, 0xff, %o1	! byte 1 like byte 0...
4100	bz,a	2f
4101	srl	%o0, 8, %o0	! (use delay to prepare for byte 2)
4102
4103	ldsb	[%o2 + %o1], %o0
4104	retl			! return ffstab[%o1] + 8
4105	add	%o0, 8, %o0
4106
41072:
4108	andcc	%o0, 0xff, %o1
4109	bz,a	3f
4110	srl	%o0, 8, %o0	! (prepare for byte 3)
4111
4112	ldsb	[%o2 + %o1], %o0
4113	retl			! return ffstab[%o1] + 16
4114	add	%o0, 16, %o0
4115
41163:				! just return ffstab[%o0] + 24
4117	ldsb	[%o2 + %o0], %o0
4118	retl
4119	add	%o0, 24, %o0
4120
4121/*
4122 * Here is a very good random number generator.  This implementation is
4123 * based on ``Two Fast Implementations of the "Minimal Standard" Random
4124 * Number Generator", David G. Carta, Communications of the ACM, Jan 1990,
4125 * Vol 33 No 1.
4126 */
4127	.data
4128randseed:
4129	.word	1
4130	.text
4131ENTRY(random)
4132	sethi	%hi(16807), %o1
4133	wr	%o1, %lo(16807), %y
4134	 sethi	%hi(randseed), %g1
4135	 ld	[%g1 + %lo(randseed)], %o0
4136	 andcc	%g0, 0, %o2
4137	mulscc  %o2, %o0, %o2
4138	mulscc  %o2, %o0, %o2
4139	mulscc  %o2, %o0, %o2
4140	mulscc  %o2, %o0, %o2
4141	mulscc  %o2, %o0, %o2
4142	mulscc  %o2, %o0, %o2
4143	mulscc  %o2, %o0, %o2
4144	mulscc  %o2, %o0, %o2
4145	mulscc  %o2, %o0, %o2
4146	mulscc  %o2, %o0, %o2
4147	mulscc  %o2, %o0, %o2
4148	mulscc  %o2, %o0, %o2
4149	mulscc  %o2, %o0, %o2
4150	mulscc  %o2, %o0, %o2
4151	mulscc  %o2, %o0, %o2
4152	mulscc  %o2, %g0, %o2
4153	rd	%y, %o3
4154	srl	%o2, 16, %o1
4155	set	0xffff, %o4
4156	and	%o4, %o2, %o0
4157	sll	%o0, 15, %o0
4158	srl	%o3, 17, %o3
4159	or	%o3, %o0, %o0
4160	addcc	%o0, %o1, %o0
4161	bneg	1f
4162	 sethi	%hi(0x7fffffff), %o1
4163	retl
4164	 st	%o0, [%g1 + %lo(randseed)]
41651:
4166	or	%o1, %lo(0x7fffffff), %o1
4167	add	%o0, 1, %o0
4168	and	%o1, %o0, %o0
4169	retl
4170	 st	%o0, [%g1 + %lo(randseed)]
4171
4172/*
4173 * void microtime(struct timeval *tv)
4174 *
4175 * LBL's sparc bsd 'microtime': We don't need to spl (so this routine
4176 * can be a leaf routine) and we don't keep a 'last' timeval (there
4177 * can't be two calls to this routine in a microsecond).  This seems to
4178 * be about 20 times faster than the Sun code on an SS-2. - vj
4179 *
4180 * Read time values from slowest-changing to fastest-changing,
4181 * then re-read out to slowest.  If the values read before
4182 * the innermost match those read after, the innermost value
4183 * is consistent with the outer values.  If not, it may not
4184 * be and we must retry.  Typically this loop runs only once;
4185 * occasionally it runs twice, and only rarely does it run longer.
4186 */
4187ENTRY(microtime)
4188	sethi	%hi(_time), %g2
4189	sethi	%hi(TIMERREG_VA), %g3
41901:
4191	ldd	[%g2+%lo(_time)], %o2		! time.tv_sec & time.tv_usec
4192	ld	[%g3+%lo(TIMERREG_VA)], %o4	! usec counter
4193	ldd	[%g2+%lo(_time)], %g4		! see if time values changed
4194	cmp	%g4, %o2
4195	bne	1b				! if time.tv_sec changed
4196	 cmp	%g5, %o3
4197	bne	1b				! if time.tv_usec changed
4198	 tst	%o4
4199
4200	bpos	2f				! reached limit?
4201	 srl	%o4, TMR_SHIFT, %o4		! convert counter to usec
4202	sethi	%hi(_tick), %g4			! bump usec by 1 tick
4203	ld	[%g4+%lo(_tick)], %o1
4204	set	TMR_MASK, %g5
4205	add	%o1, %o3, %o3
4206	and	%o4, %g5, %o4
42072:
4208	add	%o4, %o3, %o3
4209	set	1000000, %g5			! normalize usec value
4210	cmp	%o3, %g5
4211	bl,a	3f
4212	 st	%o2, [%o0]			! (should be able to std here)
4213	add	%o2, 1, %o2			! overflow
4214	sub	%o3, %g5, %o3
4215	st	%o2, [%o0]			! (should be able to std here)
42163:
4217	retl
4218	 st	%o3, [%o0+4]
4219
4220/*
4221 * This procedure exists to make stdarg functions work correctly.
4222 * We write the caller's `in' registers into his caller's `arg dump'
4223 * area.  That arg-dump area immediately precedes the argument extension
4224 * area, resulting in a single contiguous block of memory.
4225 *
4226 * This is really the wrong way to do it: the arguments should be written
4227 * to storage local to the stdarg function, and the stdarg `pick up
4228 * the next argument' code should pick it up from whichever region is
4229 * `active' at that point.
4230 */
4231	.globl	___builtin_saveregs
4232___builtin_saveregs:
4233	! not profiled -- this should be done inline anyway
4234	! bleah! the arg dump area is unaligned!  cannot std w/o reg/reg moves
4235	st	%i0, [%fp + 0x44]	! fr->fr_argd[0]
4236	st	%i1, [%fp + 0x48]	! fr->fr_argd[1]
4237	st	%i2, [%fp + 0x4c]	! fr->fr_argd[2]
4238	st	%i3, [%fp + 0x50]	! fr->fr_argd[3]
4239	st	%i4, [%fp + 0x54]	! fr->fr_argd[4]
4240	retl
4241	 st	%i5, [%fp + 0x58]	! fr->fr_argd[5]
4242
4243#ifdef KGDB
4244/*
4245 * Write all windows (user or otherwise), except the current one.
4246 *
4247 * THIS COULD BE DONE IN USER CODE
4248 */
4249ENTRY(write_all_windows)
4250	/*
4251	 * g2 = g1 = nwindows - 1;
4252	 * while (--g1 > 0) save();
4253	 * while (--g2 > 0) restore();
4254	 */
4255	sethi	%hi(_nwindows), %g1
4256	ld	[%g1 + %lo(_nwindows)], %g1
4257	dec	%g1
4258	mov	%g1, %g2
4259
42601:	deccc	%g1
4261	bg,a	1b
4262	 save	%sp, -64, %sp
4263
42642:	deccc	%g2
4265	bg,a	2b
4266	 restore
4267
4268	retl
4269	nop
4270#endif /* KGDB */
4271
4272	.data
4273	.globl	_cold
4274_cold:
4275	.word	1		! cold start flag
4276
4277	.globl	_proc0paddr
4278_proc0paddr:
4279	.word	_u0		! KVA of proc0 uarea
4280
4281/* interrupt counters	XXX THESE BELONG ELSEWHERE (if anywhere) */
4282	.globl	_intrcnt, _eintrcnt, _intrnames, _eintrnames
4283_intrnames:
4284	.asciz	"spur"
4285	.asciz	"lev1"
4286	.asciz	"lev2"
4287	.asciz	"lev3"
4288	.asciz	"lev4"
4289	.asciz	"lev5"
4290	.asciz	"lev6"
4291	.asciz	"lev7"
4292	.asciz  "lev8"
4293	.asciz	"lev9"
4294	.asciz	"clock"
4295	.asciz	"lev11"
4296	.asciz	"lev12"
4297	.asciz	"lev13"
4298	.asciz	"prof"
4299_eintrnames:
4300	ALIGN
4301_intrcnt:
4302	.skip	4*15
4303_eintrcnt:
4304
4305	.comm	_nwindows, 4
4306	.comm	_promvec, 4
4307	.comm	_curproc, 4
4308	.comm	_qs, 32 * 8
4309	.comm	_whichqs, 4
4310