xref: /original-bsd/sys/sparc/sparc/locore.s (revision 3efa343e)
1/*
2 * Copyright (c) 1992 Regents of the University of California.
3 * All rights reserved.
4 *
5 * This software was developed by the Computer Systems Engineering group
6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7 * contributed to Berkeley.
8 *
9 * %sccs.include.redist.c%
10 *
11 *	@(#)locore.s	7.1 (Berkeley) 07/13/92
12 *
13 * from: $Header: locore.s,v 1.45 92/07/12 08:19:55 torek Exp $
14 */
15
16#define	LOCORE
17#include "assym.s"
18#include "ctlreg.h"
19#include "intreg.h"
20#include "timerreg.h"
21#ifdef notyet
22#include "zsaddr.h"
23#include "../dev/zsreg.h"
24#endif
25#include "machine/psl.h"
26#include "machine/signal.h"
27#include "machine/trap.h"
28
29/*
30 * GNU assembler does not understand `.empty' directive; Sun assembler
31 * gripes about labels without it.  To allow cross-compilation using
32 * the Sun assembler, and because .empty directives are useful documentation,
33 * we use this trick.  While we are at it we fix the .align mismatch.
34 *
35 * XXX	new GNU assemblers match the Sun assembler in interpretation of
36 * XXX	.align -- will have to fix this for them
37 */
38#ifdef SUN_AS
39#define	EMPTY	.empty
40#define	ALIGN	.align 4
41#else
42#define	EMPTY	/* .empty */
43#define	ALIGN	.align 2
44#endif
45
46/*
47 * CCFSZ (C Compiler Frame SiZe) is the size of a stack frame required if
48 * a function is to call C code.  It should be just 64, but Sun defined
49 * their frame with space to hold arguments 0 through 5 (plus some junk),
50 * and varargs routines (such as printf) demand this, and gcc uses this
51 * area at times anyway.
52 */
53#define	CCFSZ	96
54
55/*
56 * A handy macro for maintaining instrumentation counters.
57 * Note that this clobbers %o0 and %o1.  Normal usage is
58 * something like:
59 *	foointr:
60 *		TRAP_SETUP(...)		! makes %o registers safe
61 *		INCR(_cnt+V_FOO)	! count a foo
62 */
63#define INCR(what) \
64	sethi	%hi(what), %o0; \
65	ld	[%o0 + %lo(what)], %o1; \
66	inc	%o1; \
67	st	%o1, [%o0 + %lo(what)]
68
69/*
70 * Another handy macro: load one register window, given `base' address.
71 * This can be either a simple register (e.g., %sp) or include an initial
72 * offset (e.g., %g6 + PCB_RW).
73 */
74#define	LOADWIN(addr) \
75	ldd	[addr], %l0; \
76	ldd	[addr + 8], %l2; \
77	ldd	[addr + 16], %l4; \
78	ldd	[addr + 24], %l6; \
79	ldd	[addr + 32], %i0; \
80	ldd	[addr + 40], %i2; \
81	ldd	[addr + 48], %i4; \
82	ldd	[addr + 56], %i6
83
84/*
85 * To return from trap we need the two-instruction sequence
86 * `jmp %l1; rett %l2', which is defined here for convenience.
87 */
88#define	RETT	jmp %l1; rett %l2
89
90	.data
91/*
92 * The interrupt stack.
93 *
94 * This is the very first thing in the data segment, and therefore has
95 * the lowest kernel stack address.  We count on this in the interrupt
96 * trap-frame setup code, since we may need to switch from the kernel
97 * stack to the interrupt stack (iff we are not already on the interrupt
98 * stack).  One sethi+cmp is all we need since this is so carefully
99 * arranged.
100 */
101	.globl	_intstack
102	.globl	_eintstack
103_intstack:
104	.skip	4 * NBPG		! 16k = 128 128-byte stack frames
105_eintstack:
106
107/*
108 * When a process exits and its u. area goes away, we set cpcb to point
109 * to this `u.', leaving us with something to use for an interrupt stack,
110 * and letting all the register save code have a pcb_uw to examine.
111 * This is also carefully arranged (to come just before u0, so that
112 * process 0's kernel stack can quietly overrun into it during bootup, if
113 * we feel like doing that).
114 */
115	.globl	_idle_u
116_idle_u:
117	.skip	UPAGES * NBPG
118
119/*
120 * Process 0's u.
121 *
122 * This must be aligned on an 8 byte boundary.
123 */
124	.globl	_u0
125_u0:	.skip	UPAGES * NBPG
126estack0:
127
128#ifdef KGDB
129/*
130 * Another item that must be aligned, easiest to put it here.
131 */
132KGDB_STACK_SIZE = 2048
133	.globl	_kgdb_stack
134_kgdb_stack:
135	.skip	KGDB_STACK_SIZE		! hope this is enough
136#endif
137
138/*
139 * _cpcb points to the current pcb (and hence u. area).
140 * Initially this is the special one.
141 */
142	.globl	_cpcb
143_cpcb:	.word	_u0
144
145	.text
146
147/*
148 * The first thing in the real text segment is the trap vector table,
149 * which must be aligned on a 4096 byte boundary.  The text segment
150 * starts beyond page 0 of KERNBASE so that there is a red zone
151 * between user and kernel space.  Since the boot ROM loads us at
152 * 0x4000, it is far easier to start at KERNBASE+0x4000 than to
153 * buck the trend.  This is four pages in; we can stuff something
154 * into the three pages left beneath us later ... like, oh, say, the
155 * message buffer (1 page).
156 */
157	.globl	_msgbuf
158msgbufsize = NBPG			! 1 page for msg buffer
159_msgbuf	= KERNBASE + NBPG
160
161/*
162 * The remaining two physical pages are currently unused.  We need to
163 * map the interrupt enable register very early on in the boot process,
164 * so that we can handle NMIs (parity errors) halfway sensibly during
165 * boot.  We use virtual address f8002000 (`page 2') for this, wasting
166 * 4096 bytes of physical memory.
167 */
168IE_reg_addr = _msgbuf + msgbufsize	! this page not used; points to IEreg
169
170/*
171 * Each trap has room for four instructions, of which one perforce must
172 * be a branch.  On entry the hardware has copied pc and npc to %l1 and
173 * %l2 respectively.  We use two more to read the psr into %l0, and to
174 * put the trap type value into %l3 (with a few exceptions below).
175 * We could read the trap type field of %tbr later in the code instead,
176 * but there is no need, and that would require more instructions
177 * (read+mask, vs 1 `mov' here).
178 *
179 * I used to generate these numbers by address arithmetic, but gas's
180 * expression evaluator has about as much sense as your average slug
181 * (oddly enough, the code looks about as slimy too).  Thus, all the
182 * trap numbers are given as arguments to the trap macros.  This means
183 * there is one line per trap.  Sigh.
184 *
185 * Note that only the local registers may be used, since the trap
186 * window is potentially the last window.  Its `in' registers are
187 * the previous window's outs (as usual), but more important, its
188 * `out' registers may be in use as the `topmost' window's `in' registers.
189 * The global registers are of course verboten (well, until we save
190 * them away).
191 *
192 * Hardware interrupt vectors can be `linked'---the linkage is to regular
193 * C code---or rewired to fast in-window handlers.  The latter are good
194 * for unbuffered hardware like the Zilog serial chip and the AMD audio
195 * chip, where many interrupts can be handled trivially with pseudo-DMA or
196 * similar.  Only one `fast' interrupt can be used per level, however, and
197 * direct and `fast' interrupts are incompatible.  Routines in intr.c
198 * handle setting these, with optional paranoia.
199 */
200
201	/* regular vectored traps */
202#define	VTRAP(type, label) \
203	mov (type), %l3; b label; mov %psr, %l0; nop
204
205	/* hardware interrupts (can be linked or made `fast') */
206#define	HARDINT(lev) \
207	mov (lev), %l3; b _sparc_interrupt; mov %psr, %l0; nop
208
209	/* software interrupts (may not be made direct, sorry---but you
210	   should not be using them trivially anyway) */
211#define	SOFTINT(lev, bit) \
212	mov (lev), %l3; mov (bit), %l4; b softintr; mov %psr, %l0
213
214	/* traps that just call trap() */
215#define	TRAP(type)	VTRAP(type, slowtrap)
216
217	/* architecturally undefined traps (cause panic) */
218#define	UTRAP(type)	VTRAP(type, slowtrap)
219
220	/* software undefined traps (may be replaced) */
221#define	STRAP(type)	VTRAP(type, slowtrap)
222
223/* breakpoint acts differently under kgdb */
224#ifdef KGDB
225#define	BPT		VTRAP(T_BREAKPOINT, bpt)
226#define	BPT_KGDB_EXEC	VTRAP(T_KGDB_EXEC, bpt)
227#else
228#define	BPT		TRAP(T_BREAKPOINT)
229#define	BPT_KGDB_EXEC	TRAP(T_KGDB_EXEC)
230#endif
231
232/* special high-speed 1-instruction-shaved-off traps (get nothing in %l3) */
233#ifdef COMPAT_SUNOS
234#define	SUN_SYSCALL	b sun_syscall; mov %psr, %l0; nop; nop
235#else
236#define	SUN_SYSCALL	TRAP(T_SUN_SYSCALL)
237#endif
238#define	SYSCALL		b syscall; mov %psr, %l0; nop; nop
239#define	WINDOW_OF	b window_of; mov %psr, %l0; nop; nop
240#define	WINDOW_UF	b window_uf; mov %psr, %l0; nop; nop
241#ifdef notyet
242#define	ZS_INTERRUPT	b zshard; mov %psr, %l0; nop; nop
243#else
244#define	ZS_INTERRUPT	HARDINT(12)
245#endif
246
247	.globl	start
248	.globl	_trapbase
249start:
250_trapbase:
251/* trap 0 is special since we cannot receive it */
252	b dostart; nop; nop; nop	! 00 = reset (fake)
253	VTRAP(T_TEXTFAULT, memfault)	! 01 = instr. fetch fault
254	TRAP(T_ILLINST)			! 02 = illegal instruction
255	TRAP(T_PRIVINST)		! 03 = privileged instruction
256	TRAP(T_FPDISABLED)		! 04 = fp instr, but EF bit off in psr
257	WINDOW_OF			! 05 = window overflow
258	WINDOW_UF			! 06 = window underflow
259	TRAP(T_ALIGN)			! 07 = address alignment error
260	VTRAP(T_FPE, fp_exception)	! 08 = fp exception
261	VTRAP(T_DATAFAULT, memfault)	! 09 = data fetch fault
262	TRAP(T_TAGOF)			! 0a = tag overflow
263	UTRAP(0x0b)
264	UTRAP(0x0c)
265	UTRAP(0x0d)
266	UTRAP(0x0e)
267	UTRAP(0x0f)
268	UTRAP(0x10)
269	SOFTINT(1, IE_L1)		! 11 = level 1 interrupt
270	HARDINT(2)			! 12 = level 2 interrupt
271	HARDINT(3)			! 13 = level 3 interrupt
272	SOFTINT(4, IE_L4)		! 14 = level 4 interrupt
273	HARDINT(5)			! 15 = level 5 interrupt
274	SOFTINT(6, IE_L6)		! 16 = level 6 interrupt
275	HARDINT(7)			! 17 = level 7 interrupt
276	HARDINT(8)			! 18 = level 8 interrupt
277	HARDINT(9)			! 19 = level 9 interrupt
278	HARDINT(10)			! 1a = level 10 interrupt
279	HARDINT(11)			! 1b = level 11 interrupt
280	ZS_INTERRUPT			! 1c = level 12 (zs) interrupt
281	HARDINT(13)			! 1d = level 13 interrupt
282	HARDINT(14)			! 1e = level 14 interrupt
283	VTRAP(15, nmi)			! 1f = nonmaskable interrupt
284	UTRAP(0x20)
285	UTRAP(0x21)
286	UTRAP(0x22)
287	UTRAP(0x23)
288	UTRAP(0x24)
289	UTRAP(0x25)
290	UTRAP(0x26)
291	UTRAP(0x27)
292	UTRAP(0x28)
293	UTRAP(0x29)
294	UTRAP(0x2a)
295	UTRAP(0x2b)
296	UTRAP(0x2c)
297	UTRAP(0x2d)
298	UTRAP(0x2e)
299	UTRAP(0x2f)
300	UTRAP(0x30)
301	UTRAP(0x31)
302	UTRAP(0x32)
303	UTRAP(0x33)
304	UTRAP(0x34)
305	UTRAP(0x35)
306	TRAP(T_CPDISABLED)	! 36 = coprocessor instr, EC bit off in psr
307	UTRAP(0x37)
308	UTRAP(0x38)
309	UTRAP(0x39)
310	UTRAP(0x3a)
311	UTRAP(0x3b)
312	UTRAP(0x3c)
313	UTRAP(0x3d)
314	UTRAP(0x3e)
315	UTRAP(0x3f)
316	TRAP(T_CPEXCEPTION)	! 40 = coprocessor exception
317	UTRAP(0x41)
318	UTRAP(0x42)
319	UTRAP(0x43)
320	UTRAP(0x44)
321	UTRAP(0x45)
322	UTRAP(0x46)
323	UTRAP(0x47)
324	UTRAP(0x48)
325	UTRAP(0x49)
326	UTRAP(0x4a)
327	UTRAP(0x4b)
328	UTRAP(0x4c)
329	UTRAP(0x4d)
330	UTRAP(0x4e)
331	UTRAP(0x4f)
332	UTRAP(0x50)
333	UTRAP(0x51)
334	UTRAP(0x52)
335	UTRAP(0x53)
336	UTRAP(0x54)
337	UTRAP(0x55)
338	UTRAP(0x56)
339	UTRAP(0x57)
340	UTRAP(0x58)
341	UTRAP(0x59)
342	UTRAP(0x5a)
343	UTRAP(0x5b)
344	UTRAP(0x5c)
345	UTRAP(0x5d)
346	UTRAP(0x5e)
347	UTRAP(0x5f)
348	UTRAP(0x60)
349	UTRAP(0x61)
350	UTRAP(0x62)
351	UTRAP(0x63)
352	UTRAP(0x64)
353	UTRAP(0x65)
354	UTRAP(0x66)
355	UTRAP(0x67)
356	UTRAP(0x68)
357	UTRAP(0x69)
358	UTRAP(0x6a)
359	UTRAP(0x6b)
360	UTRAP(0x6c)
361	UTRAP(0x6d)
362	UTRAP(0x6e)
363	UTRAP(0x6f)
364	UTRAP(0x70)
365	UTRAP(0x71)
366	UTRAP(0x72)
367	UTRAP(0x73)
368	UTRAP(0x74)
369	UTRAP(0x75)
370	UTRAP(0x76)
371	UTRAP(0x77)
372	UTRAP(0x78)
373	UTRAP(0x79)
374	UTRAP(0x7a)
375	UTRAP(0x7b)
376	UTRAP(0x7c)
377	UTRAP(0x7d)
378	UTRAP(0x7e)
379	UTRAP(0x7f)
380	SUN_SYSCALL		! 80 = sun syscall
381	BPT			! 81 = pseudo breakpoint instruction
382	TRAP(T_DIV0)		! 82 = divide by zero
383	TRAP(T_FLUSHWIN)	! 83 = flush windows
384	TRAP(T_CLEANWIN)	! 84 = provide clean windows
385	TRAP(T_RANGECHECK)	! 85 = ???
386	TRAP(T_FIXALIGN)	! 86 = fix up unaligned accesses
387	TRAP(T_INTOF)		! 87 = integer overflow
388	BPT_KGDB_EXEC		! 88 = enter kernel gdb on kernel startup
389	SYSCALL			! 89 = bsd syscall
390	STRAP(0x8a)
391	STRAP(0x8b)
392	STRAP(0x8c)
393	STRAP(0x8d)
394	STRAP(0x8e)
395	STRAP(0x8f)
396	STRAP(0x90)
397	STRAP(0x91)
398	STRAP(0x92)
399	STRAP(0x93)
400	STRAP(0x94)
401	STRAP(0x95)
402	STRAP(0x96)
403	STRAP(0x97)
404	STRAP(0x98)
405	STRAP(0x99)
406	STRAP(0x9a)
407	STRAP(0x9b)
408	STRAP(0x9c)
409	STRAP(0x9d)
410	STRAP(0x9e)
411	STRAP(0x9f)
412	STRAP(0xa0)
413	STRAP(0xa1)
414	STRAP(0xa2)
415	STRAP(0xa3)
416	STRAP(0xa4)
417	STRAP(0xa5)
418	STRAP(0xa6)
419	STRAP(0xa7)
420	STRAP(0xa8)
421	STRAP(0xa9)
422	STRAP(0xaa)
423	STRAP(0xab)
424	STRAP(0xac)
425	STRAP(0xad)
426	STRAP(0xae)
427	STRAP(0xaf)
428	STRAP(0xb0)
429	STRAP(0xb1)
430	STRAP(0xb2)
431	STRAP(0xb3)
432	STRAP(0xb4)
433	STRAP(0xb5)
434	STRAP(0xb6)
435	STRAP(0xb7)
436	STRAP(0xb8)
437	STRAP(0xb9)
438	STRAP(0xba)
439	STRAP(0xbb)
440	STRAP(0xbc)
441	STRAP(0xbd)
442	STRAP(0xbe)
443	STRAP(0xbf)
444	STRAP(0xc0)
445	STRAP(0xc1)
446	STRAP(0xc2)
447	STRAP(0xc3)
448	STRAP(0xc4)
449	STRAP(0xc5)
450	STRAP(0xc6)
451	STRAP(0xc7)
452	STRAP(0xc8)
453	STRAP(0xc9)
454	STRAP(0xca)
455	STRAP(0xcb)
456	STRAP(0xcc)
457	STRAP(0xcd)
458	STRAP(0xce)
459	STRAP(0xcf)
460	STRAP(0xd0)
461	STRAP(0xd1)
462	STRAP(0xd2)
463	STRAP(0xd3)
464	STRAP(0xd4)
465	STRAP(0xd5)
466	STRAP(0xd6)
467	STRAP(0xd7)
468	STRAP(0xd8)
469	STRAP(0xd9)
470	STRAP(0xda)
471	STRAP(0xdb)
472	STRAP(0xdc)
473	STRAP(0xdd)
474	STRAP(0xde)
475	STRAP(0xdf)
476	STRAP(0xe0)
477	STRAP(0xe1)
478	STRAP(0xe2)
479	STRAP(0xe3)
480	STRAP(0xe4)
481	STRAP(0xe5)
482	STRAP(0xe6)
483	STRAP(0xe7)
484	STRAP(0xe8)
485	STRAP(0xe9)
486	STRAP(0xea)
487	STRAP(0xeb)
488	STRAP(0xec)
489	STRAP(0xed)
490	STRAP(0xee)
491	STRAP(0xef)
492	STRAP(0xf0)
493	STRAP(0xf1)
494	STRAP(0xf2)
495	STRAP(0xf3)
496	STRAP(0xf4)
497	STRAP(0xf5)
498	STRAP(0xf6)
499	STRAP(0xf7)
500	STRAP(0xf8)
501	STRAP(0xf9)
502	STRAP(0xfa)
503	STRAP(0xfb)
504	STRAP(0xfc)
505	STRAP(0xfd)
506	STRAP(0xfe)
507	STRAP(0xff)
508
509	/* the message buffer is always mapped */
510_msgbufmapped:
511	.word	1
512
513#ifdef DEBUG
514/*
515 * A hardware red zone is impossible.  We simulate one in software by
516 * keeping a `red zone' pointer; if %sp becomes less than this, we panic.
517 * This is expensive and is only enabled when debugging.
518 */
519#define	REDSIZE	(8*96)		/* some room for bouncing */
520#define	REDSTACK 2048		/* size of `panic: stack overflow' region */
521	.data
522_redzone:
523	.word	_idle_u + REDSIZE
524_redstack:
525	.skip	REDSTACK
526	.text
527Lpanic_red:
528	.asciz	"stack overflow"
529	ALIGN
530
531	/* set stack pointer redzone to base+minstack; alters base */
532#define	SET_SP_REDZONE(base, tmp) \
533	add	base, REDSIZE, base; \
534	sethi	%hi(_redzone), tmp; \
535	st	base, [tmp + %lo(_redzone)]
536
537	/* variant with a constant */
538#define	SET_SP_REDZONE_CONST(const, tmp1, tmp2) \
539	set	(const) + REDSIZE, tmp1; \
540	sethi	%hi(_redzone), tmp2; \
541	st	tmp1, [tmp2 + %lo(_redzone)]
542
543	/* check stack pointer against redzone (uses two temps) */
544#define	CHECK_SP_REDZONE(t1, t2) \
545	sethi	%hi(_redzone), t1; \
546	ld	[t1 + %lo(_redzone)], t2; \
547	cmp	%sp, t2;	/* if sp >= t2, not in red zone */ \
548	bgeu	7f; nop;	/* and can continue normally */ \
549	/* move to panic stack */ \
550	st	%g0, [t1 + %lo(_redzone)]; \
551	set	_redstack + REDSTACK - 96, %sp; \
552	/* prevent panic() from lowering ipl */ \
553	sethi	%hi(_panicstr), t2; \
554	set	Lpanic_red, t2; \
555	st	t2, [t1 + %lo(_panicstr)]; \
556	rd	%psr, t1;		/* t1 = splhigh() */ \
557	or	t1, PSR_PIL, t2; \
558	wr	t2, 0, %psr; \
559	wr	t2, PSR_ET, %psr;	/* turn on traps */ \
560	nop; nop; nop; \
561	save	%sp, -96, %sp;		/* preserve current window */ \
562	sethi	%hi(Lpanic_red), %o0; \
563	call	_panic; or %o0, %lo(Lpanic_red), %o0; \
5647:
565
566#else
567
568#define	SET_SP_REDZONE(base, tmp)
569#define	SET_SP_REDZONE_CONST(const, t1, t2)
570#define	CHECK_SP_REDZONE(t1, t2)
571#endif
572
573/*
574 * The window code must verify user stack addresses before using them.
575 * A user stack pointer is invalid if:
576 *	- it is not on an 8 byte boundary;
577 *	- its pages (a register window, being 64 bytes, can occupy
578 *	  two pages) are not readable or writable.
579 * We define three separate macros here for testing user stack addresses.
580 *
581 * PTE_OF_ADDR locates a PTE, branching to a `bad address'
582 *	handler if the stack pointer points into the hole in the
583 *	address space (i.e., top 3 bits are not either all 1 or all 0);
584 * CMP_PTE_USER_READ compares the located PTE against `user read' mode;
585 * CMP_PTE_USER_WRITE compares the located PTE against `user write' mode.
586 * The compares give `equal' if read or write is OK.
587 *
588 * Note that the user stack pointer usually points into high addresses
589 * (top 3 bits all 1), so that is what we check first.
590 *
591 * The code below also assumes that PTE_OF_ADDR is safe in a delay
592 * slot; it is, at it merely sets its `pte' register to a temporary value.
593 */
594	/* input: addr, output: pte; aux: bad address label */
595#define	PTE_OF_ADDR(addr, pte, bad) \
596	sra	addr, PG_VSHIFT, pte; \
597	cmp	pte, -1; \
598	be,a	1f; andn addr, 4095, pte; \
599	tst	pte; \
600	bne	bad; EMPTY; \
601	andn	addr, 4095, pte; \
6021:
603
604	/* input: pte; output: condition codes */
605#define	CMP_PTE_USER_READ(pte) \
606	lda	[pte] ASI_PTE, pte; \
607	srl	pte, PG_PROTSHIFT, pte; \
608	andn	pte, (PG_W >> PG_PROTSHIFT), pte; \
609	cmp	pte, PG_PROTUREAD
610
611	/* input: pte; output: condition codes */
612#define	CMP_PTE_USER_WRITE(pte) \
613	lda	[pte] ASI_PTE, pte; \
614	srl	pte, PG_PROTSHIFT, pte; \
615	cmp	pte, PG_PROTUWRITE
616
617/*
618 * The calculations in PTE_OF_ADDR and CMP_PTE_USER_* are rather slow:
619 * in particular, according to Gordon Irlam of the University of Adelaide
620 * in Australia, these consume at least 18 cycles on an SS1 and 37 on an
621 * SS2.  Hence, we try to avoid them in the common case.
622 *
623 * A chunk of 64 bytes is on a single page if and only if:
624 *
625 *	((base + 64 - 1) & ~4095) == (base & ~4095)
626 *
627 * Equivalently (and faster to test), the low order bits (base & 4095) must
628 * be small enough so that the sum (base + 63) does not carry out into the
629 * upper page-address bits, i.e.,
630 *
631 *	(base & 4095) < (4096 - 63)
632 *
633 * so we allow testing that here.  This macro is also assumed to be safe
634 * in a delay slot (modulo overwriting its temporary).
635 */
636#define	SLT_IF_1PAGE_RW(addr, tmp) \
637	and	addr, 4095, tmp; \
638	cmp	tmp, (4096 - 63)
639
640/*
641 * Every trap that enables traps must set up stack space.
642 * If the trap is from user mode, this involves switching to the kernel
643 * stack for the current process, and we must also set cpcb->pcb_uw
644 * so that the window overflow handler can tell user windows from kernel
645 * windows.
646 *
647 * The number of user windows is:
648 *
649 *	cpcb->pcb_uw = (cpcb->pcb_wim - 1 - CWP) % nwindows
650 *
651 * (where pcb_wim = log2(current %wim) and CWP = low 5 bits of %psr).
652 * We compute this expression by table lookup in uwtab[CWP - pcb_wim],
653 * which has been set up as:
654 *
655 *	for i in [-nwin+1 .. nwin-1]
656 *		uwtab[i] = (nwin - 1 - i) % nwin;
657 *
658 * (If you do not believe this works, try it for yourself.)
659 *
660 * We also keep one or two more tables:
661 *
662 *	for i in 0..nwin-1
663 *		wmask[i] = 1 << ((i + 1) % nwindows);
664 *
665 * wmask[CWP] tells whether a `rett' would return into the invalid window.
666 */
667	.data
668	.skip	32			! alignment byte & negative indicies
669uwtab:	.skip	32			! u_char uwtab[-31..31];
670wmask:	.skip	32			! u_char wmask[0..31];
671
672	.text
673/*
674 * Things begin to grow uglier....
675 *
676 * Each trap handler may (always) be running in the trap window.
677 * If this is the case, it cannot enable further traps until it writes
678 * the register windows into the stack (or, if the stack is no good,
679 * the current pcb).
680 *
681 * ASSUMPTIONS: TRAP_SETUP() is called with:
682 *	%l0 = %psr
683 *	%l1 = return pc
684 *	%l2 = return npc
685 *	%l3 = (some value that must not be altered)
686 * which means we have 4 registers to work with.
687 *
688 * The `stackspace' argument is the number of stack bytes to allocate
689 * for register-saving, and must be at least -64 (and typically more,
690 * for global registers and %y).
691 *
692 * Trapframes should use -CCFSZ-80.  (80 = sizeof(struct trapframe);
693 * see trap.h.  This basically means EVERYONE.  Interrupt frames could
694 * get away with less, but currently do not.)
695 *
696 * The basic outline here is:
697 *
698 *	if (trap came from kernel mode) {
699 *		if (we are in the trap window)
700 *			save it away;
701 *		%sp = %fp - stackspace;
702 *	} else {
703 *		compute the number of user windows;
704 *		if (we are in the trap window)
705 *			save it away;
706 *		%sp = (top of kernel stack) - stackspace;
707 *	}
708 *
709 * Again, the number of user windows is:
710 *
711 *	cpcb->pcb_uw = (cpcb->pcb_wim - 1 - CWP) % nwindows
712 *
713 * (where pcb_wim = log2(current %wim) and CWP is the low 5 bits of %psr),
714 * and this is computed as `uwtab[CWP - pcb_wim]'.
715 *
716 * NOTE: if you change this code, you will have to look carefully
717 * at the window overflow and underflow handlers and make sure they
718 * have similar changes made as needed.
719 */
720#define	CALL_CLEAN_TRAP_WINDOW \
721	sethi	%hi(clean_trap_window), %l7; \
722	jmpl	%l7 + %lo(clean_trap_window), %l4; \
723	 mov	%g7, %l7	/* save %g7 in %l7 for clean_trap_window */
724
725#define	TRAP_SETUP(stackspace) \
726	rd	%wim, %l4; \
727	mov	1, %l5; \
728	sll	%l5, %l0, %l5; \
729	btst	PSR_PS, %l0; \
730	bz	1f; \
731	 btst	%l5, %l4; \
732	/* came from kernel mode; cond codes indicate trap window */ \
733	bz,a	3f; \
734	 add	%fp, stackspace, %sp;	/* want to just set %sp */ \
735	CALL_CLEAN_TRAP_WINDOW;		/* but maybe need to clean first */ \
736	b	3f; \
737	 add	%fp, stackspace, %sp; \
7381: \
739	/* came from user mode: compute pcb_nw */ \
740	sethi	%hi(_cpcb), %l6; \
741	ld	[%l6 + %lo(_cpcb)], %l6; \
742	ld	[%l6 + PCB_WIM], %l5; \
743	and	%l0, 31, %l4; \
744	sub	%l4, %l5, %l5; \
745	set	uwtab, %l4; \
746	ldub	[%l4 + %l5], %l5; \
747	st	%l5, [%l6 + PCB_UW]; \
748	/* cond codes still indicate whether in trap window */ \
749	bz,a	2f; \
750	 sethi	%hi(UPAGES*NBPG+(stackspace)), %l5; \
751	/* yes, in trap window; must clean it */ \
752	CALL_CLEAN_TRAP_WINDOW; \
753	sethi	%hi(_cpcb), %l6; \
754	ld	[%l6 + %lo(_cpcb)], %l6; \
755	sethi	%hi(UPAGES*NBPG+(stackspace)), %l5; \
7562: \
757	/* trap window is (now) clean: set %sp */ \
758	or	%l5, %lo(UPAGES*NBPG+(stackspace)), %l5; \
759	add	%l6, %l5, %sp; \
760	SET_SP_REDZONE(%l6, %l5); \
7613: \
762	CHECK_SP_REDZONE(%l6, %l5)
763
764/*
765 * Interrupt setup is almost exactly like trap setup, but we need to
766 * go to the interrupt stack if (a) we came from user mode or (b) we
767 * came from kernel mode on the kernel stack.
768 */
769#define	INTR_SETUP(stackspace) \
770	rd	%wim, %l4; \
771	mov	1, %l5; \
772	sll	%l5, %l0, %l5; \
773	btst	PSR_PS, %l0; \
774	bz	1f; \
775	 btst	%l5, %l4; \
776	/* came from kernel mode; cond codes still indicate trap window */ \
777	bz,a	0f; \
778	 sethi	%hi(_eintstack), %l7; \
779	CALL_CLEAN_TRAP_WINDOW; \
780	sethi	%hi(_eintstack), %l7; \
7810:	/* now if %fp >= eintstack, we were on the kernel stack */ \
782	cmp	%fp, %l7; \
783	bge,a	3f; \
784	 add	%l7, stackspace, %sp;	/* so switch to intstack */ \
785	b	4f; \
786	 add	%fp, stackspace, %sp;	/* else stay on intstack */ \
7871: \
788	/* came from user mode: compute pcb_nw */ \
789	sethi	%hi(_cpcb), %l6; \
790	ld	[%l6 + %lo(_cpcb)], %l6; \
791	ld	[%l6 + PCB_WIM], %l5; \
792	and	%l0, 31, %l4; \
793	sub	%l4, %l5, %l5; \
794	set	uwtab, %l4; \
795	ldub	[%l4 + %l5], %l5; \
796	st	%l5, [%l6 + PCB_UW]; \
797	/* cond codes still indicate whether in trap window */ \
798	bz,a	2f; \
799	 sethi	%hi(_eintstack), %l7; \
800	/* yes, in trap window; must save regs */ \
801	CALL_CLEAN_TRAP_WINDOW; \
802	sethi	%hi(_eintstack), %l7; \
8032: \
804	add	%l7, stackspace, %sp; \
8053: \
806	SET_SP_REDZONE_CONST(_intstack, %l6, %l5); \
8074: \
808	CHECK_SP_REDZONE(%l6, %l5)
809
810/*
811 * Handler for making the trap window shiny clean.
812 *
813 * On entry:
814 *	cpcb->pcb_nw = number of user windows
815 *	%l0 = %psr
816 *	%l1 must not be clobbered
817 *	%l2 must not be clobbered
818 *	%l3 must not be clobbered
819 *	%l4 = address for `return'
820 *	%l7 = saved %g7 (we put this in a delay slot above, to save work)
821 *
822 * On return:
823 *	%wim has changed, along with cpcb->pcb_wim
824 *	%g7 has been restored
825 *
826 * Normally, we push only one window.
827 */
828clean_trap_window:
829	mov	%g5, %l5		! save %g5
830	mov	%g6, %l6		! ... and %g6
831/*	mov	%g7, %l7		! ... and %g7 (already done for us) */
832	sethi	%hi(_cpcb), %g6		! get current pcb
833	ld	[%g6 + %lo(_cpcb)], %g6
834
835	/* Figure out whether it is a user window (cpcb->pcb_uw > 0). */
836	ld	[%g6 + PCB_UW], %g7
837	deccc	%g7
838	bge	ctw_user
839	 save	%g0, %g0, %g0		! in any case, enter window to save
840
841	/* The window to be pushed is a kernel window. */
842	std	%l0, [%sp + (0*8)]
843ctw_merge:
844	std	%l2, [%sp + (1*8)]
845	std	%l4, [%sp + (2*8)]
846	std	%l6, [%sp + (3*8)]
847	std	%i0, [%sp + (4*8)]
848	std	%i2, [%sp + (5*8)]
849	std	%i4, [%sp + (6*8)]
850	std	%i6, [%sp + (7*8)]
851
852	/* Set up new window invalid mask, and update cpcb->pcb_wim. */
853	rd	%psr, %g7		! g7 = (junk << 5) + new_cwp
854	mov	1, %g5			! g5 = 1 << new_cwp;
855	sll	%g5, %g7, %g5
856	wr	%g5, 0, %wim		! setwim(g5);
857	and	%g7, 31, %g7		! cpcb->pcb_wim = g7 & 31;
858	st	%g7, [%g6 + PCB_WIM]
859	nop
860	restore				! back to trap window
861
862	mov	%l5, %g5		! restore g5
863	mov	%l6, %g6		! ... and g6
864	jmp	%l4 + 8			! return to caller
865	 mov	%l7, %g7		! ... and g7
866	/* NOTREACHED */
867
868ctw_user:
869	/*
870	 * The window to be pushed is a user window.
871	 * We must verify the stack pointer (alignment & permissions).
872	 * See comments above definition of PTE_OF_ADDR.
873	 */
874	st	%g7, [%g6 + PCB_UW]	! cpcb->pcb_uw--;
875	btst	7, %sp			! if not aligned,
876	bne	ctw_invalid		! choke on it
877	 EMPTY
878	PTE_OF_ADDR(%sp, %g7, ctw_invalid)
879	CMP_PTE_USER_WRITE(%g7)		! likewise if not writable
880	bne	ctw_invalid
881	 EMPTY
882	SLT_IF_1PAGE_RW(%sp, %g7)
883	bl,a	ctw_merge		! all ok if only 1
884	 std	%l0, [%sp]
885	add	%sp, 7*8, %g5		! check last addr too
886	PTE_OF_ADDR(%g5, %g7, ctw_invalid)
887	CMP_PTE_USER_WRITE(%g7)
888	be,a	ctw_merge		! all ok: store <l0,l1> and merge
889	 std	%l0, [%sp]
890
891	/*
892	 * The window we wanted to push could not be pushed.
893	 * Instead, save ALL user windows into the pcb.
894	 * We will notice later that we did this, when we
895	 * get ready to return from our trap or syscall.
896	 *
897	 * The code here is run rarely and need not be optimal.
898	 */
899ctw_invalid:
900	/*
901	 * Reread cpcb->pcb_uw.  We decremented this earlier,
902	 * so it is off by one.
903	 */
904	ld	[%g6 + PCB_UW], %g7	! (number of user windows) - 1
905	add	%g6, PCB_RW, %g5
906
907	/* save g7+1 windows, starting with the current one */
9081:					! do {
909	std	%l0, [%g5 + (0*8)]	!	rw->rw_local[0] = l0;
910	std	%l2, [%g5 + (1*8)]	!	...
911	std	%l4, [%g5 + (2*8)]
912	std	%l6, [%g5 + (3*8)]
913	std	%i0, [%g5 + (4*8)]
914	std	%i2, [%g5 + (5*8)]
915	std	%i4, [%g5 + (6*8)]
916	std	%i6, [%g5 + (7*8)]
917	deccc	%g7			!	if (n > 0) save(), rw++;
918	bge,a	1b			! } while (--n >= 0);
919	 save	%g5, 64, %g5
920
921	/* stash sp for bottommost window */
922	st	%sp, [%g5 + 64 + (7*8)]
923
924	/* set up new wim */
925	rd	%psr, %g7		! g7 = (junk << 5) + new_cwp;
926	mov	1, %g5			! g5 = 1 << new_cwp;
927	sll	%g5, %g7, %g5
928	wr	%g5, 0, %wim		! wim = g5;
929	and	%g7, 31, %g7
930	st	%g7, [%g6 + PCB_WIM]	! cpcb->pcb_wim = new_cwp;
931
932	/* fix up pcb fields */
933	ld	[%g6 + PCB_UW], %g7	! n = cpcb->pcb_uw;
934	add	%g7, 1, %g5
935	st	%g5, [%g6 + PCB_NSAVED]	! cpcb->pcb_nsaved = n + 1;
936	st	%g0, [%g6 + PCB_UW]	! cpcb->pcb_uw = 0;
937
938	/* return to trap window */
9391:	deccc	%g7			! do {
940	bge	1b			!	restore();
941	 restore			! } while (--n >= 0);
942
943	mov	%l5, %g5		! restore g5, g6, & g7, and return
944	mov	%l6, %g6
945	jmp	%l4 + 8
946	 mov	%l7, %g7
947	/* NOTREACHED */
948
949
950/*
951 * Each memory access (text or data) fault, from user or kernel mode,
952 * comes here.  We read the error register and figure out what has
953 * happened.
954 *
955 * This cannot be done from C code since we must not enable traps (and
956 * hence may not use the `save' instruction) until we have decided that
957 * the error is or is not an asynchronous one that showed up after a
958 * synchronous error, but which must be handled before the sync err.
959 *
960 * Most memory faults are user mode text or data faults, which can cause
961 * signal delivery or ptracing, for which we must build a full trapframe.
962 * It does not seem worthwhile to work to avoid this in the other cases,
963 * so we store all the %g registers on the stack immediately.
964 *
965 * On entry:
966 *	%l0 = %psr
967 *	%l1 = return pc
968 *	%l2 = return npc
969 *	%l3 = T_TEXTFAULT or T_DATAFAULT
970 *
971 * Internal:
972 *	%l4 = %y, until we call mem_access_fault (then onto trapframe)
973 *	%l5 = IE_reg_addr, if async mem error
974 *
975 * We know about the layout of the error registers here.
976 *	addr	reg
977 *	----	---
978 *	a	AC_SYNC_ERR
979 *	a+4	AC_SYNC_VA
980 *	a+8	AC_ASYNC_ERR
981 *	a+12	AC_ASYNC_VA
982 */
983memfault:
984	TRAP_SETUP(-CCFSZ-80)
985
986	INCR(_cnt+V_FAULTS)		! cnt.v_faults++ (clobbers %o0,%o1)
987
988	st	%g1, [%sp + CCFSZ + 20]	! save g1
989	rd	%y, %l4			! save y
990
991#if AC_SYNC_ERR + 4 != AC_SYNC_VA || \
992    AC_SYNC_ERR + 8 != AC_ASYNC_ERR || AC_SYNC_ERR + 12 != AC_ASYNC_VA
993	help help help		! I, I, I wanna be a lifeguard
994#endif
995	set	AC_SYNC_ERR, %o0
996	std	%g2, [%sp + CCFSZ + 24]	! save g2, g3
997	lda	[%o0] ASI_CONTROL, %o1	! sync err reg
998	inc	4, %o0
999	std	%g4, [%sp + CCFSZ + 32]	! (sneak g4,g5 in here)
1000	lda	[%o0] ASI_CONTROL, %o2	! sync virt addr
1001	btst	SER_MEMERR, %o1		! memory error?
1002	std	%g6, [%sp + CCFSZ + 40]
1003	bz,a	normal_mem_fault	! no, just a regular fault
1004 	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1005
1006	/*
1007	 * We got a synchronous memory error.  It could be one that
1008	 * happened because there were two stores in a row, and the
1009	 * first went into the write buffer, and the second caused this
1010	 * synchronous trap; so there could now be a pending async error.
1011	 * This is in fact the case iff the two va's differ.
1012	 */
1013	inc	4, %o0
1014	lda	[%o0] ASI_CONTROL, %o3	! async err reg
1015	inc	4, %o0
1016	lda	[%o0] ASI_CONTROL, %o4	! async virt addr
1017	cmp	%o2, %o4
1018	be,a	1f			! no, not an async err
1019	 wr	%l0, PSR_ET, %psr	! (and reenable traps)
1020
1021	/*
1022	 * Handle the async error; ignore the sync error for now
1023	 * (we may end up getting it again, but so what?).
1024	 * This code is essentially the same as that at `nmi' below,
1025	 * but the register usage is different and we cannot merge.
1026	 */
1027	sethi	%hi(IE_reg_addr), %l5	! ienab_bic(IE_ALLIE);
1028	ldub	[%l5 + %lo(IE_reg_addr)], %o0
1029	andn	%o0, IE_ALLIE, %o0
1030	stb	%o0, [%l5 + %lo(IE_reg_addr)]
1031
1032	/*
1033	 * Now reenable traps and call C code.
1034	 * %o1 through %o4 still hold the error reg contents.
1035	 * If memerr() returns, return from the trap.
1036	 */
1037	wr	%l0, PSR_ET, %psr
1038	call	_memerr			! memerr(0, ser, sva, aer, ava)
1039	 clr	%o0
1040
1041	ld	[%sp + CCFSZ + 20], %g1	! restore g1 through g7
1042	wr	%l0, 0, %psr		! and disable traps, 3 instr delay
1043	ldd	[%sp + CCFSZ + 24], %g2
1044	ldd	[%sp + CCFSZ + 32], %g4
1045	ldd	[%sp + CCFSZ + 40], %g6
1046	/* now safe to set IE_ALLIE again */
1047	ldub	[%l5 + %lo(IE_reg_addr)], %o1
1048	or	%o1, IE_ALLIE, %o1
1049	stb	%o1, [%l5 + %lo(IE_reg_addr)]
1050	b	return_from_trap
1051	 wr	%l4, 0, %y		! restore y
1052
1053	/*
1054	 * Trap was a synchronous memory error.
1055	 * %o1 through %o4 still hold the error reg contents.
1056	 */
10571:
1058	call	_memerr			! memerr(1, ser, sva, aer, ava)
1059	 mov	1, %o0
1060
1061	ld	[%sp + CCFSZ + 20], %g1	! restore g1 through g7
1062	ldd	[%sp + CCFSZ + 24], %g2
1063	ldd	[%sp + CCFSZ + 32], %g4
1064	ldd	[%sp + CCFSZ + 40], %g6
1065	wr	%l4, 0, %y		! restore y
1066	b	return_from_trap
1067	 wr	%l0, 0, %psr
1068	/* NOTREACHED */
1069
1070normal_mem_fault:
1071	/*
1072	 * Trap was some other error; call C code to deal with it.
1073	 * Must finish trap frame (psr,pc,npc,%y,%o0..%o7) in case
1074	 * we decide to deliver a signal or ptrace the process.
1075	 * %g1..%g7 were already set up above.
1076	 */
1077	std	%l0, [%sp + CCFSZ + 0]	! set tf.tf_psr, tf.tf_pc
1078	mov	%l3, %o0		! (argument: type)
1079	st	%l2, [%sp + CCFSZ + 8]	! set tf.tf_npc
1080	st	%l4, [%sp + CCFSZ + 12]	! set tf.tf_y
1081	mov	%l1, %o3		! (argument: pc)
1082	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
1083	std	%i2, [%sp + CCFSZ + 56]
1084	mov	%l0, %o4		! (argument: psr)
1085	std	%i4, [%sp + CCFSZ + 64]
1086	std	%i6, [%sp + CCFSZ + 72]
1087	call	_mem_access_fault	! mem_access_fault(type, ser, sva,
1088					!		pc, psr, &tf);
1089	 add	%sp, CCFSZ, %o5		! (argument: &tf)
1090
1091	ldd	[%sp + CCFSZ + 0], %l0	! load new values
1092	ldd	[%sp + CCFSZ + 8], %l2
1093	wr	%l3, 0, %y
1094	ld	[%sp + CCFSZ + 20], %g1
1095	ldd	[%sp + CCFSZ + 24], %g2
1096	ldd	[%sp + CCFSZ + 32], %g4
1097	ldd	[%sp + CCFSZ + 40], %g6
1098	ldd	[%sp + CCFSZ + 48], %i0
1099	ldd	[%sp + CCFSZ + 56], %i2
1100	ldd	[%sp + CCFSZ + 64], %i4
1101	ldd	[%sp + CCFSZ + 72], %i6
1102
1103	b	return_from_trap	! go return
1104	 wr	%l0, 0, %psr		! (but first disable traps again)
1105
1106
1107/*
1108 * fp_exception has to check to see if we are trying to save
1109 * the FP state, and if so, continue to save the FP state.
1110 *
1111 * We do not even bother checking to see if we were in kernel mode,
1112 * since users have no access to the special_fp_store instruction.
1113 *
1114 * This whole idea was stolen from Sprite.
1115 */
1116fp_exception:
1117	set	special_fp_store, %l4	! see if we came from the special one
1118	cmp	%l1, %l4		! pc == special_fp_store?
1119	bne	slowtrap		! no, go handle per usual
1120	 EMPTY
1121	sethi	%hi(savefpcont), %l4	! yes, "return" to the special code
1122	or	%lo(savefpcont), %l4, %l4
1123	jmp	%l4
1124	 rett	%l4 + 4
1125
1126/*
1127 * slowtrap() builds a trap frame and calls trap().
1128 * This is called `slowtrap' because it *is*....
1129 * We have to build a full frame for ptrace(), for instance.
1130 *
1131 * Registers:
1132 *	%l0 = %psr
1133 *	%l1 = return pc
1134 *	%l2 = return npc
1135 *	%l3 = trap code
1136 */
1137slowtrap:
1138	TRAP_SETUP(-CCFSZ-80)
1139	/*
1140	 * Phew, ready to enable traps and call C code.
1141	 */
1142	mov	%l3, %o0		! put type in %o0 for later
1143Lslowtrap_reenter:
1144	wr	%l0, PSR_ET, %psr	! traps on again
1145	std	%l0, [%sp + CCFSZ]	! tf.tf_psr = psr; tf.tf_pc = ret_pc;
1146	rd	%y, %l3
1147	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc = return_npc; tf.tf_y = %y;
1148	st	%g1, [%sp + CCFSZ + 20]
1149	std	%g2, [%sp + CCFSZ + 24]
1150	std	%g4, [%sp + CCFSZ + 32]
1151	std	%g6, [%sp + CCFSZ + 40]
1152	std	%i0, [%sp + CCFSZ + 48]
1153	mov	%l0, %o1		! (psr)
1154	std	%i2, [%sp + CCFSZ + 56]
1155	mov	%l1, %o2		! (pc)
1156	std	%i4, [%sp + CCFSZ + 64]
1157	add	%sp, CCFSZ, %o3		! (&tf)
1158	call	_trap			! trap(type, psr, pc, &tf)
1159	 std	%i6, [%sp + CCFSZ + 72]
1160
1161	ldd	[%sp + CCFSZ], %l0	! load new values
1162	ldd	[%sp + CCFSZ + 8], %l2
1163	wr	%l3, 0, %y
1164	ld	[%sp + CCFSZ + 20], %g1
1165	ldd	[%sp + CCFSZ + 24], %g2
1166	ldd	[%sp + CCFSZ + 32], %g4
1167	ldd	[%sp + CCFSZ + 40], %g6
1168	ldd	[%sp + CCFSZ + 48], %i0
1169	ldd	[%sp + CCFSZ + 56], %i2
1170	ldd	[%sp + CCFSZ + 64], %i4
1171	ldd	[%sp + CCFSZ + 72], %i6
1172	b	return_from_trap
1173	 wr	%l0, 0, %psr
1174
1175/*
1176 * Do a `software' trap by re-entering the trap code, possibly first
1177 * switching from interrupt stack to kernel stack.  This is used for
1178 * scheduling and signal ASTs (which generally occur from softclock or
1179 * tty or net interrupts) and register window saves (which might occur
1180 * from anywhere).
1181 *
1182 * The current window is the trap window, and it is by definition clean.
1183 * We enter with the trap type in %o0.  All we have to do is jump to
1184 * Lslowtrap_reenter above, but maybe after switching stacks....
1185 */
1186softtrap:
1187	sethi	%hi(_eintstack), %l7
1188	cmp	%sp, %l7
1189	bge	Lslowtrap_reenter
1190	 EMPTY
1191	sethi	%hi(_cpcb), %l6
1192	ld	[%l6 + %lo(_cpcb)], %l6
1193	set	UPAGES*NBPG - CCFSZ - 80, %l5
1194	add	%l6, %l5, %l7
1195	SET_SP_REDZONE(%l6, %l5)
1196	b	Lslowtrap_reenter
1197	 mov	%l7, %sp
1198
1199#ifdef KGDB
1200/*
1201 * bpt is entered on all breakpoint traps.
1202 * If this is a kernel breakpoint, we do not want to call trap().
1203 * Among other reasons, this way we can set breakpoints in trap().
1204 */
1205bpt:
1206	btst	PSR_PS, %l0		! breakpoint from kernel?
1207	bz	slowtrap		! no, go do regular trap
1208	 nop
1209
1210	/*
1211	 * Build a trap frame for kgdb_trap_glue to copy.
1212	 * Enable traps but set ipl high so that we will not
1213	 * see interrupts from within breakpoints.
1214	 */
1215	TRAP_SETUP(-CCFSZ-80)
1216	or	%l0, PSR_PIL, %l4	! splhigh()
1217	wr	%l4, 0, %psr		! the manual claims that this
1218	wr	%l4, PSR_ET, %psr	! song and dance is necessary
1219	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
1220	mov	%l3, %o0		! trap type arg for kgdb_trap_glue
1221	rd	%y, %l3
1222	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
1223	rd	%wim, %l3
1224	st	%l3, [%sp + CCFSZ + 16]	! tf.tf_wim (a kgdb-only r/o field)
1225	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
1226	std	%g2, [%sp + CCFSZ + 24]	! etc
1227	std	%g4, [%sp + CCFSZ + 32]
1228	std	%g6, [%sp + CCFSZ + 40]
1229	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_in[0..1]
1230	std	%i2, [%sp + CCFSZ + 56]	! etc
1231	std	%i4, [%sp + CCFSZ + 64]
1232	std	%i6, [%sp + CCFSZ + 72]
1233
1234	/*
1235	 * Now call kgdb_trap_glue(); if it returns, call trap().
1236	 */
1237	mov	%o0, %l3		! gotta save trap type
1238	call	_kgdb_trap_glue		! kgdb_trap_glue(type, &trapframe)
1239	 add	%sp, CCFSZ, %o1		! (&trapframe)
1240
1241	/*
1242	 * Use slowtrap to call trap---but first erase our tracks
1243	 * (put the registers back the way they were).
1244	 */
1245	mov	%l3, %o0		! slowtrap will need trap type
1246	ld	[%sp + CCFSZ + 12], %l3
1247	wr	%l3, 0, %y
1248	ld	[%sp + CCFSZ + 20], %g1
1249	ldd	[%sp + CCFSZ + 24], %g2
1250	ldd	[%sp + CCFSZ + 32], %g4
1251	b	Lslowtrap_reenter
1252	 ldd	[%sp + CCFSZ + 40], %g6
1253
1254/*
1255 * Enter kernel breakpoint.  Write all the windows (not including the
1256 * current window) into the stack, so that backtrace works.  Copy the
1257 * supplied trap frame to the kgdb stack and switch stacks.
1258 *
1259 * kgdb_trap_glue(type, tf0)
1260 *	int type;
1261 *	struct trapframe *tf0;
1262 */
1263	.globl	_kgdb_trap_glue
1264_kgdb_trap_glue:
1265	save	%sp, -CCFSZ, %sp
1266
1267	call	_write_all_windows
1268	 mov	%sp, %l4		! %l4 = current %sp
1269
1270	/* copy trapframe to top of kgdb stack */
1271	set	_kgdb_stack + KGDB_STACK_SIZE - 80, %l0
1272					! %l0 = tfcopy -> end_of_kgdb_stack
1273	mov	80, %l1
12741:	ldd	[%i1], %l2
1275	inc	8, %i1
1276	deccc	8, %l1
1277	std	%l2, [%l0]
1278	bg	1b
1279	 inc	8, %l0
1280
1281#ifdef DEBUG
1282	/* save old red zone and then turn it off */
1283	sethi	%hi(_redzone), %l7
1284	ld	[%l7 + %lo(_redzone)], %l6
1285	st	%g0, [%l7 + %lo(_redzone)]
1286#endif
1287	/* switch to kgdb stack */
1288	add	%l0, -CCFSZ-80, %sp
1289
1290	/* if (kgdb_trap(type, tfcopy)) kgdb_rett(tfcopy); */
1291	mov	%i0, %o0
1292	call	_kgdb_trap
1293	add	%l0, -80, %o1
1294	tst	%o0
1295	bnz,a	kgdb_rett
1296	 add	%l0, -80, %g1
1297
1298	/*
1299	 * kgdb_trap() did not handle the trap at all so the stack is
1300	 * still intact.  A simple `restore' will put everything back,
1301	 * after we reset the stack pointer.
1302	 */
1303	mov	%l4, %sp
1304#ifdef DEBUG
1305	st	%l6, [%l7 + %lo(_redzone)]	! restore red zone
1306#endif
1307	ret
1308	restore
1309
1310/*
1311 * Return from kgdb trap.  This is sort of special.
1312 *
1313 * We know that kgdb_trap_glue wrote the window above it, so that we will
1314 * be able to (and are sure to have to) load it up.  We also know that we
1315 * came from kernel land and can assume that the %fp (%i6) we load here
1316 * is proper.  We must also be sure not to lower ipl (it is at splhigh())
1317 * until we have traps disabled, due to the SPARC taking traps at the
1318 * new ipl before noticing that PSR_ET has been turned off.  We are on
1319 * the kgdb stack, so this could be disastrous.
1320 *
1321 * Note that the trapframe argument in %g1 points into the current stack
1322 * frame (current window).  We abandon this window when we move %g1->tf_psr
1323 * into %psr, but we will not have loaded the new %sp yet, so again traps
1324 * must be disabled.
1325 */
1326kgdb_rett:
1327	rd	%psr, %g4		! turn off traps
1328	wr	%g4, PSR_ET, %psr
1329	/* use the three-instruction delay to do something useful */
1330	ld	[%g1], %g2		! pick up new %psr
1331	ld	[%g1 + 12], %g3		! set %y
1332	wr	%g3, 0, %y
1333#ifdef DEBUG
1334	st	%l6, [%l7 + %lo(_redzone)] ! and restore red zone
1335#endif
1336	wr	%g0, 0, %wim		! enable window changes
1337	nop; nop; nop
1338	/* now safe to set the new psr (changes CWP, leaves traps disabled) */
1339	wr	%g2, 0, %psr		! set rett psr (including cond codes)
1340	/* 3 instruction delay before we can use the new window */
1341/*1*/	ldd	[%g1 + 24], %g2		! set new %g2, %g3
1342/*2*/	ldd	[%g1 + 32], %g4		! set new %g4, %g5
1343/*3*/	ldd	[%g1 + 40], %g6		! set new %g6, %g7
1344
1345	/* now we can use the new window */
1346	mov	%g1, %l4
1347	ld	[%l4 + 4], %l1		! get new pc
1348	ld	[%l4 + 8], %l2		! get new npc
1349	ld	[%l4 + 20], %g1		! set new %g1
1350
1351	/* set up returnee's out registers, including its %sp */
1352	ldd	[%l4 + 48], %i0
1353	ldd	[%l4 + 56], %i2
1354	ldd	[%l4 + 64], %i4
1355	ldd	[%l4 + 72], %i6
1356
1357	/* load returnee's window, making the window above it be invalid */
1358	restore
1359	restore	%g0, 1, %l1		! move to inval window and set %l1 = 1
1360	rd	%psr, %l0
1361	sll	%l1, %l0, %l1
1362	wr	%l1, 0, %wim		! %wim = 1 << (%psr & 31)
1363	sethi	%hi(_cpcb), %l1
1364	ld	[%l1 + %lo(_cpcb)], %l1
1365	and	%l0, 31, %l0		! CWP = %psr & 31;
1366	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = CWP;
1367	save	%g0, %g0, %g0		! back to window to reload
1368	LOADWIN(%sp)
1369	save	%g0, %g0, %g0		! back to trap window
1370	/* note, we have not altered condition codes; safe to just rett */
1371	RETT
1372#endif
1373
1374/*
1375 * syscall() builds a trap frame and calls syscall().
1376 * sun_syscall is same but delivers sun system call number
1377 * XXX	should not have to save&reload ALL the registers just for
1378 *	ptrace...
1379 */
1380#ifdef COMPAT_SUNOS
1381sun_syscall:
1382	TRAP_SETUP(-CCFSZ-80)
1383	b	sys_merge
1384	 mov	1, %o3			! third arg to syscall: sun compat
1385syscall:
1386	TRAP_SETUP(-CCFSZ-80)
1387	clr	%o3			! third arg to syscall: native bsd
1388sys_merge:
1389#else
1390syscall:
1391	TRAP_SETUP(-CCFSZ-80)
1392#endif
1393	wr	%l0, PSR_ET, %psr
1394	std	%l0, [%sp + CCFSZ + 0]	! tf_psr, tf_pc
1395	rd	%y, %l3
1396	std	%l2, [%sp + CCFSZ + 8]	! tf_npc, tf_y
1397	st	%g1, [%sp + CCFSZ + 20]	! tf_g[1]
1398	std	%g2, [%sp + CCFSZ + 24]	! tf_g[2], tf_g[3]
1399	std	%g4, [%sp + CCFSZ + 32]	! etc
1400	std	%g6, [%sp + CCFSZ + 40]
1401	mov	%g1, %o0		! (code)
1402	std	%i0, [%sp + CCFSZ + 48]
1403	add	%sp, CCFSZ, %o1		! (&tf)
1404	std	%i2, [%sp + CCFSZ + 56]
1405	mov	%l1, %o2		! (pc)
1406	std	%i4, [%sp + CCFSZ + 64]
1407	call	_syscall		! syscall(code, &tf, pc, suncompat)
1408	 std	%i6, [%sp + CCFSZ + 72]
1409	! now load em all up again, sigh
1410	ldd	[%sp + CCFSZ + 0], %l0	! new %psr, new pc
1411	ldd	[%sp + CCFSZ + 8], %l2	! new npc, new %y
1412	wr	%l3, 0, %y
1413	ld	[%sp + CCFSZ + 20], %g1
1414	ldd	[%sp + CCFSZ + 24], %g2
1415	ldd	[%sp + CCFSZ + 32], %g4
1416	ldd	[%sp + CCFSZ + 40], %g6
1417	ldd	[%sp + CCFSZ + 48], %i0
1418	ldd	[%sp + CCFSZ + 56], %i2
1419	ldd	[%sp + CCFSZ + 64], %i4
1420	ldd	[%sp + CCFSZ + 72], %i6
1421	b	return_from_trap
1422	 wr	%l0, 0, %psr
1423
1424/*
1425 * Interrupts.  Software interrupts must be cleared from the software
1426 * interrupt enable register.  Rather than calling ienab_bic for each,
1427 * we do them in-line before enabling traps.
1428 *
1429 * After preliminary setup work, the interrupt is passed to each
1430 * registered handler in turn.  These are expected to return nonzero if
1431 * they took care of the interrupt.  If a handler claims the interrupt,
1432 * we exit (hardware interrupts are latched in the requestor so we'll
1433 * just take another interrupt in the unlikely event of simultaneous
1434 * interrupts from two different devices at the same level).  If we go
1435 * through all the registered handlers and no one claims it, we report a
1436 * stray interrupt.  This is more or less done as:
1437 *
1438 *	for (ih = intrhand[intlev]; ih; ih = ih->ih_next)
1439 *		if ((*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : &frame))
1440 *			return;
1441 *	strayintr(&frame);
1442 *
1443 * Software interrupts are almost the same with three exceptions:
1444 * (1) we clear the interrupt from the software interrupt enable
1445 *     register before calling any handler (we have to clear it first
1446 *     to avoid an interrupt-losing race),
1447 * (2) we always call all the registered handlers (there is no way
1448 *     to tell if the single bit in the software interrupt register
1449 *     represents one or many requests)
1450 * (3) we never announce a stray interrupt (because of (1), another
1451 *     interrupt request can come in while we're in the handler.  If
1452 *     the handler deal with everything for both the original & the
1453 *     new request, we'll erroneously report a stray interrupt when
1454 *     we take the software interrupt for the new request.
1455 *
1456 * Inputs:
1457 *	%l0 = %psr
1458 *	%l1 = return pc
1459 *	%l2 = return npc
1460 *	%l3 = interrupt level
1461 *	(software interrupt only) %l4 = bits to clear in interrupt register
1462 *
1463 * Internal:
1464 *	%l4, %l5: local variables
1465 *	%l6 = %y
1466 *	%l7 = %g1
1467 *	%g2..%g7 go to stack
1468 *
1469 * An interrupt frame is built in the space for a full trapframe;
1470 * this contains the psr, pc, npc, and interrupt level.
1471 */
1472	.comm	_intrhand, 15 * 8	! intrhand[0..14]; 0 => error
1473softintr:
1474	sethi	%hi(IE_reg_addr), %l6
1475	ldub	[%l6 + %lo(IE_reg_addr)], %l5
1476	andn	%l5, %l4, %l5
1477	stb	%l5, [%l6 + %lo(IE_reg_addr)]
1478	INTR_SETUP(-CCFSZ-80)
1479	std	%g2, [%sp + CCFSZ + 24]	! save registers
1480	INCR(_cnt+V_INTR)		! cnt.v_intr++; (clobbers %o0,%o1)
1481	mov	%g1, %l7
1482	rd	%y, %l6
1483	std	%g4, [%sp + CCFSZ + 32]
1484	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
1485	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
1486	std	%g6, [%sp + CCFSZ + 40]
1487	or	%l5, %l4, %l4		!			;
1488	wr	%l4, 0, %psr		! the manual claims this
1489	wr	%l4, PSR_ET, %psr	! song and dance is necessary
1490	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
1491	sll	%l3, 2, %l5
1492	set	_intrcnt, %l4		! intrcnt[intlev]++;
1493	ld	[%l4 + %l5], %o0
1494	std	%l2, [%sp + CCFSZ + 8]
1495	inc	%o0
1496	st	%o0, [%l4 + %l5]
1497	set	_intrhand, %l4		! %l4 = intrhand[intlev];
1498	ld	[%l4 + %l5], %l4
1499	b	3f
1500	 st	%fp, [%sp + CCFSZ + 16]
1501
15021:	ld	[%l4], %o1
1503	ld	[%l4 + 4], %o0
1504	tst	%o0
1505	bz,a	2f
1506	 add	%sp, CCFSZ, %o0
15072:	jmpl	%o1, %o7		!	(void)(*ih->ih_fun)(...)
1508	 ld	[%l4 + 8], %l4		!	and ih = ih->ih_next
15093:	tst	%l4			! while ih != NULL
1510	bnz	1b
1511	 nop
1512	mov	%l7, %g1
1513	wr	%l6, 0, %y
1514	ldd	[%sp + CCFSZ + 24], %g2
1515	ldd	[%sp + CCFSZ + 32], %g4
1516	ldd	[%sp + CCFSZ + 40], %g6
1517	b	return_from_trap
1518	 wr	%l0, 0, %psr
1519
1520	/*
1521	 * _sparc_interrupt is exported for paranoia checking (see intr.c).
1522	 */
1523	.globl	_sparc_interrupt
1524_sparc_interrupt:
1525	INTR_SETUP(-CCFSZ-80)
1526	std	%g2, [%sp + CCFSZ + 24]	! save registers
1527	INCR(_cnt+V_INTR)		! cnt.v_intr++; (clobbers %o0,%o1)
1528	mov	%g1, %l7
1529	rd	%y, %l6
1530	std	%g4, [%sp + CCFSZ + 32]
1531	andn	%l0, PSR_PIL, %l4	! %l4 = psr & ~PSR_PIL |
1532	sll	%l3, 8, %l5		!	intlev << IPLSHIFT
1533	std	%g6, [%sp + CCFSZ + 40]
1534	or	%l5, %l4, %l4		!			;
1535	wr	%l4, 0, %psr		! the manual claims this
1536	wr	%l4, PSR_ET, %psr	! song and dance is necessary
1537	std	%l0, [%sp + CCFSZ + 0]	! set up intrframe/clockframe
1538	sll	%l3, 2, %l5
1539	set	_intrcnt, %l4		! intrcnt[intlev]++;
1540	ld	[%l4 + %l5], %o0
1541	std	%l2, [%sp + CCFSZ + 8]	! set up intrframe/clockframe
1542	inc	%o0
1543	st	%o0, [%l4 + %l5]
1544	set	_intrhand, %l4		! %l4 = intrhand[intlev];
1545	ld	[%l4 + %l5], %l4
1546	b	3f
1547	 st	%fp, [%sp + CCFSZ + 16]
1548
15491:	ld	[%l4], %o1
1550	ld	[%l4 + 4], %o0
1551	tst	%o0
1552	bz,a	2f
1553	 add	%sp, CCFSZ, %o0
15542:	jmpl	%o1, %o7		!	handled = (*ih->ih_fun)(...)
1555	 ld	[%l4 + 8], %l4		!	and ih = ih->ih_next
1556	tst	%o0
1557	bnz	4f			! if (handled) break
1558	 nop
15593:	tst	%l4
1560	bnz	1b			! while (ih)
1561	 nop
1562	call	_strayintr		!	strayintr(&intrframe)
1563	 add	%sp, CCFSZ, %o0
1564	/* all done: restore registers and go return */
15654:	mov	%l7, %g1
1566	wr	%l6, 0, %y
1567	ldd	[%sp + CCFSZ + 24], %g2
1568	ldd	[%sp + CCFSZ + 32], %g4
1569	ldd	[%sp + CCFSZ + 40], %g6
1570	b	return_from_trap
1571	 wr	%l0, 0, %psr
1572
1573#ifdef notyet
1574/*
1575 * Level 12 (ZS serial) interrupt.  Handle it quickly, schedule a
1576 * software interrupt, and get out.  Do the software interrupt directly
1577 * if we would just take it on the way out.
1578 *
1579 * Input:
1580 *	%l0 = %psr
1581 *	%l1 = return pc
1582 *	%l2 = return npc
1583 * Internal:
1584 *	%l3 = zs device
1585 *	%l4, %l5 = temporary
1586 *	%l6 = rr3 (or temporary data) + 0x100 => need soft int
1587 *	%l7 = zs soft status
1588 */
1589zshard:
1590#endif /* notyet */
1591
1592/*
1593 * Level 15 interrupt.  An async memory error has occurred;
1594 * take care of it (typically by panicking, but hey...).
1595 *	%l0 = %psr
1596 *	%l1 = return pc
1597 *	%l2 = return npc
1598 *	%l3 = 15 * 4 (why? just because!)
1599 *
1600 * Internal:
1601 *	%l4 = %y
1602 *	%l5 = %g1
1603 *	%l6 = %g6
1604 *	%l7 = %g7
1605 *  g2, g3, g4, g5 go to stack
1606 *
1607 * This code is almost the same as that in mem_access_fault,
1608 * except that we already know the problem is not a `normal' fault,
1609 * and that we must be extra-careful with interrupt enables.
1610 */
1611nmi:
1612	INTR_SETUP(-CCFSZ-80)
1613	INCR(_cnt+V_INTR)		! cnt.v_intr++; (clobbers %o0,%o1)
1614	/*
1615	 * Level 15 interrupts are nonmaskable, so with traps off,
1616	 * disable all interrupts to prevent recursion.
1617	 */
1618	sethi	%hi(IE_reg_addr), %o0
1619	ldub	[%o0 + %lo(IE_reg_addr)], %o1
1620	andn	%o1, IE_ALLIE, %o1
1621	stb	%o1, [%o0 + %lo(IE_reg_addr)]
1622	wr	%l0, PSR_ET, %psr	! okay, turn traps on again
1623
1624	std	%g2, [%sp + CCFSZ + 0]	! save g2, g3
1625	rd	%y, %l4			! save y
1626
1627	! must read the sync error register too.
1628	set	AC_SYNC_ERR, %o0
1629	lda	[%o0] ASI_CONTROL, %o1	! sync err reg
1630	inc	4, %o0
1631	lda	[%o0] ASI_CONTROL, %o2	! sync virt addr
1632	std	%g4, [%sp + CCFSZ + 8]	! save g4,g5
1633	mov	%g1, %l5		! save g1,g6,g7
1634	mov	%g6, %l6
1635	mov	%g7, %l7
1636	inc	4, %o0
1637	lda	[%o0] ASI_CONTROL, %o3	! async err reg
1638	inc	4, %o0
1639	lda	[%o0] ASI_CONTROL, %o4	! async virt addr
1640
1641	! and call C code
1642	call	_memerr			! memerr(0, ser, sva, aer, ava)
1643	clr	%o0
1644
1645	mov	%l5, %g1		! restore g1 through g7
1646	ldd	[%sp + CCFSZ + 0], %g2
1647	ldd	[%sp + CCFSZ + 8], %g4
1648	wr	%l0, 0, %psr		! re-disable traps
1649	mov	%l6, %g6
1650	mov	%l7, %g7
1651
1652	! set IE_ALLIE again (safe, we disabled traps again above)
1653	sethi	%hi(IE_reg_addr), %o0
1654	ldub	[%o0 + %lo(IE_reg_addr)], %o1
1655	or	%o1, IE_ALLIE, %o1
1656	stb	%o1, [%o0 + %lo(IE_reg_addr)]
1657	b	return_from_trap
1658	 wr	%l4, 0, %y		! restore y
1659
1660
1661/*
1662 * Window overflow trap handler.
1663 *	%l0 = %psr
1664 *	%l1 = return pc
1665 *	%l2 = return npc
1666 */
1667window_of:
1668#ifdef TRIVIAL_WINDOW_OVERFLOW_HANDLER
1669	/* a trivial version that assumes %sp is ok */
1670	/* (for testing only!) */
1671	save	%g0, %g0, %g0
1672	std	%l0, [%sp + (0*8)]
1673	rd	%psr, %l0
1674	mov	1, %l1
1675	sll	%l1, %l0, %l0
1676	wr	%l0, 0, %wim
1677	std	%l2, [%sp + (1*8)]
1678	std	%l4, [%sp + (2*8)]
1679	std	%l6, [%sp + (3*8)]
1680	std	%i0, [%sp + (4*8)]
1681	std	%i2, [%sp + (5*8)]
1682	std	%i4, [%sp + (6*8)]
1683	std	%i6, [%sp + (7*8)]
1684	restore
1685	RETT
1686#else
1687	/*
1688	 * This is similar to TRAP_SETUP, but we do not want to spend
1689	 * a lot of time, so we have separate paths for kernel and user.
1690	 * We also know for sure that the window has overflowed.
1691	 */
1692	btst	PSR_PS, %l0
1693	bz	winof_user
1694	 sethi	%hi(clean_trap_window), %l7
1695
1696	/*
1697	 * Overflow from kernel mode.  Call clean_trap_window to
1698	 * do the dirty work, then just return, since we know prev
1699	 * window is valid.  clean_trap_windows might dump all *user*
1700	 * windows into the pcb, but we do not care: there is at
1701	 * least one kernel window (a trap or interrupt frame!)
1702	 * above us.
1703	 */
1704	jmpl	%l7 + %lo(clean_trap_window), %l4
1705	 mov	%g7, %l7		! for clean_trap_window
1706
1707	wr	%l0, 0, %psr		! put back the @%*! cond. codes
1708	nop				! (let them settle in)
1709	RETT
1710
1711winof_user:
1712	/*
1713	 * Overflow from user mode.
1714	 * If clean_trap_window dumps the registers into the pcb,
1715	 * rft_user will need to call trap(), so we need space for
1716	 * a trap frame.  We also have to compute pcb_nw.
1717	 *
1718	 * SHOULD EXPAND IN LINE TO AVOID BUILDING TRAP FRAME ON
1719	 * `EASY' SAVES
1720	 */
1721	sethi	%hi(_cpcb), %l6
1722	ld	[%l6 + %lo(_cpcb)], %l6
1723	ld	[%l6 + PCB_WIM], %l5
1724	and	%l0, 31, %l3
1725	sub	%l3, %l5, %l5 		/* l5 = CWP - pcb_wim */
1726	set	uwtab, %l4
1727	ldub	[%l4 + %l5], %l5	/* l5 = uwtab[l5] */
1728	st	%l5, [%l6 + PCB_UW]
1729	jmpl	%l7 + %lo(clean_trap_window), %l4
1730	 mov	%g7, %l7		! for clean_trap_window
1731	sethi	%hi(_cpcb), %l6
1732	ld	[%l6 + %lo(_cpcb)], %l6
1733	set	UPAGES*NBPG-CCFSZ-80, %l5
1734	add	%l6, %l5, %sp		/* over to kernel stack */
1735	CHECK_SP_REDZONE(%l6, %l5)
1736
1737	/*
1738	 * Copy return_from_trap far enough to allow us
1739	 * to jump directly to rft_user_or_recover_pcb_windows
1740	 * (since we know that is where we are headed).
1741	 */
1742!	and	%l0, 31, %l3		! still set (clean_trap_window
1743					! leaves this register alone)
1744	set	wmask, %l6
1745	ldub	[%l6 + %l3], %l5	! %l5 = 1 << ((CWP + 1) % nwindows)
1746	b	rft_user_or_recover_pcb_windows
1747	 rd	%wim, %l4		! (read %wim first)
1748#endif /* end `real' version of window overflow trap handler */
1749
1750/*
1751 * Window underflow trap handler.
1752 *	%l0 = %psr
1753 *	%l1 = return pc
1754 *	%l2 = return npc
1755 *
1756 * A picture:
1757 *
1758 *	  T R I X
1759 *	0 0 0 1 0 0 0	(%wim)
1760 * [bit numbers increase towards the right;
1761 * `restore' moves right & `save' moves left]
1762 *
1763 * T is the current (Trap) window, R is the window that attempted
1764 * a `Restore' instruction, I is the Invalid window, and X is the
1765 * window we want to make invalid before we return.
1766 *
1767 * Since window R is valid, we cannot use rft_user to restore stuff
1768 * for us.  We have to duplicate its logic.  YUCK.
1769 *
1770 * Incidentally, TRIX are for kids.  Silly rabbit!
1771 */
1772window_uf:
1773#ifdef TRIVIAL_WINDOW_UNDERFLOW_HANDLER
1774	wr	%g0, 0, %wim		! allow us to enter I
1775	restore				! to R
1776	nop
1777	nop
1778	restore				! to I
1779	restore	%g0, 1, %l1		! to X
1780	rd	%psr, %l0
1781	sll	%l1, %l0, %l0
1782	wr	%l0, 0, %wim
1783	save	%g0, %g0, %g0		! back to I
1784	LOADWIN(%sp)
1785	save	%g0, %g0, %g0		! back to R
1786	save	%g0, %g0, %g0		! back to T
1787	RETT
1788#else
1789	wr	%g0, 0, %wim		! allow us to enter I
1790	btst	PSR_PS, %l0
1791	restore				! enter window R
1792	bz	winuf_user
1793	 restore			! enter window I
1794
1795	/*
1796	 * Underflow from kernel mode.  Just recover the
1797	 * registers and go (except that we have to update
1798	 * the blasted user pcb fields).
1799	 */
1800	restore	%g0, 1, %l1		! enter window X, then set %l1 to 1
1801	rd	%psr, %l0		! cwp = %psr & 31;
1802	and	%l0, 31, %l0
1803	sll	%l1, %l0, %l1		! wim = 1 << cwp;
1804	wr	%l1, 0, %wim		! setwim(wim);
1805	sethi	%hi(_cpcb), %l1
1806	ld	[%l1 + %lo(_cpcb)], %l1
1807	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = cwp;
1808	save	%g0, %g0, %g0		! back to window I
1809	LOADWIN(%sp)
1810	save	%g0, %g0, %g0		! back to R
1811	save	%g0, %g0, %g0		! and then to T
1812	wr	%l0, 0, %psr		! fix those cond codes....
1813	nop				! (let them settle in)
1814	RETT
1815
1816winuf_user:
1817	/*
1818	 * Underflow from user mode.
1819	 *
1820	 * We cannot use rft_user (as noted above) because
1821	 * we must re-execute the `restore' instruction.
1822	 * Since it could be, e.g., `restore %l0,0,%l0',
1823	 * it is not okay to touch R's registers either.
1824	 *
1825	 * We are now in window I.
1826	 */
1827	btst	7, %sp			! if unaligned, it is invalid
1828	bne	winuf_invalid
1829	 EMPTY
1830
1831	PTE_OF_ADDR(%sp, %l7, winuf_invalid)
1832	CMP_PTE_USER_READ(%l7)		! if first page not readable,
1833	bne	winuf_invalid		! it is invalid
1834	 EMPTY
1835	SLT_IF_1PAGE_RW(%sp, %l7)	! first page is readable
1836	bl,a	winuf_ok		! if only one page, enter window X
1837	 restore %g0, 1, %l1		! and goto ok, & set %l1 to 1
1838	add	%sp, 7*8, %l5
1839	PTE_OF_ADDR(%l5, %l7, winuf_invalid)
1840	CMP_PTE_USER_READ(%l7)		! check second page too
1841	be,a	winuf_ok		! enter window X and goto ok
1842	 restore %g0, 1, %l1		! (and then set %l1 to 1)
1843
1844winuf_invalid:
1845	/*
1846	 * We were unable to restore the window because %sp
1847	 * is invalid or paged out.  Return to the trap window
1848	 * and call trap(T_WINUF).  This will save R to the user
1849	 * stack, then load both R and I into the pcb rw[] area,
1850	 * and return with pcb_nsaved set to -1 for success, 0 for
1851	 * failure.  `Failure' indicates that someone goofed with the
1852	 * trap registers (e.g., signals), so that we need to return
1853	 * from the trap as from a syscall (probably to a signal handler)
1854	 * and let it retry the restore instruction later.  Note that
1855	 * window R will have been pushed out to user space, and thus
1856	 * be the invalid window, by the time we get back here.  (We
1857	 * continue to label it R anyway.)  We must also set %wim again,
1858	 * and set pcb_uw to 1, before enabling traps.  (Window R is the
1859	 * only window, and it is a user window).
1860	 */
1861	save	%g0, %g0, %g0		! back to R
1862#if 0		/* this gives `as' mild heartburn */
1863	save	%g0, 1, %l4		! back to T, then %l4 = 1
1864#else
1865	save	%g0, %g0, %g0		! back to T
1866	mov	1, %l4			! and set %l4 = 1
1867#endif
1868	sethi	%hi(_cpcb), %l6
1869	ld	[%l6 + %lo(_cpcb)], %l6
1870	st	%l4, [%l6 + PCB_UW]	! pcb_uw = 1
1871	ld	[%l6 + PCB_WIM], %l5	! get log2(%wim)
1872	sll	%l4, %l5, %l4		! %l4 = old %wim
1873	wr	%l4, 0, %wim		! window I is now invalid again
1874	set	UPAGES*NBPG-CCFSZ-80, %l5
1875	add	%l6, %l5, %sp		! get onto kernel stack
1876	CHECK_SP_REDZONE(%l6, %l5)
1877
1878	/*
1879	 * Okay, call trap(T_WINUF, psr, pc, &tf).
1880	 * See `slowtrap' above for operation.
1881	 */
1882	wr	%l0, PSR_ET, %psr
1883	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
1884	rd	%y, %l3
1885	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
1886	mov	T_WINUF, %o0
1887	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
1888	mov	%l0, %o1
1889	std	%g2, [%sp + CCFSZ + 24]	! etc
1890	mov	%l1, %o2
1891	std	%g4, [%sp + CCFSZ + 32]
1892	add	%sp, CCFSZ, %o3
1893	std	%g6, [%sp + CCFSZ + 40]
1894	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_out[0], etc
1895	std	%i2, [%sp + CCFSZ + 56]
1896	std	%i4, [%sp + CCFSZ + 64]
1897	call	_trap			! trap(T_WINUF, pc, psr, &tf)
1898	 std	%i6, [%sp + CCFSZ + 72]	! tf.tf_out[6]
1899
1900	ldd	[%sp + CCFSZ + 0], %l0	! new psr, pc
1901	ldd	[%sp + CCFSZ + 8], %l2	! new npc, %y
1902	wr	%l3, 0, %y
1903	ld	[%sp + CCFSZ + 20], %g1
1904	ldd	[%sp + CCFSZ + 24], %g2
1905	ldd	[%sp + CCFSZ + 32], %g4
1906	ldd	[%sp + CCFSZ + 40], %g6
1907	ldd	[%sp + CCFSZ + 48], %i0	! %o0 for window R, etc
1908	ldd	[%sp + CCFSZ + 56], %i2
1909	ldd	[%sp + CCFSZ + 64], %i4
1910	wr	%l0, 0, %psr		! disable traps: test must be atomic
1911	ldd	[%sp + CCFSZ + 72], %i6
1912	sethi	%hi(_cpcb), %l6
1913	ld	[%l6 + %lo(_cpcb)], %l6
1914	ld	[%l6 + PCB_NSAVED], %l7	! if nsaved is -1, we have our regs
1915	tst	%l7
1916	bl,a	1f			! got them
1917	 wr	%g0, 0, %wim		! allow us to enter windows R, I
1918	b,a	return_from_trap
1919
1920	/*
1921	 * Got 'em.  Load 'em up.
1922	 */
19231:
1924	mov	%g6, %l3		! save %g6; set %g6 = cpcb
1925	mov	%l6, %g6
1926	st	%g0, [%g6 + PCB_NSAVED]	! and clear magic flag
1927	restore				! from T to R
1928	restore				! from R to I
1929	restore	%g0, 1, %l1		! from I to X, then %l1 = 1
1930	rd	%psr, %l0		! cwp = %psr;
1931	sll	%l1, %l0, %l1
1932	wr	%l1, 0, %wim		! make window X invalid
1933	and	%l0, 31, %l0
1934	st	%l0, [%g6 + PCB_WIM]	! cpcb->pcb_wim = cwp;
1935	nop				! unnecessary? old wim was 0...
1936	save	%g0, %g0, %g0		! back to I
1937	LOADWIN(%g6 + PCB_RW + 64)	! load from rw[1]
1938	save	%g0, %g0, %g0		! back to R
1939	LOADWIN(%g6 + PCB_RW)		! load from rw[0]
1940	save	%g0, %g0, %g0		! back to T
1941	wr	%l0, 0, %psr		! restore condition codes
1942	mov	%l3, %g6		! fix %g6
1943	RETT
1944
1945	/*
1946	 * Restoring from user stack, but everything has checked out
1947	 * as good.  We are now in window X, and %l1 = 1.  Window R
1948	 * is still valid and holds user values.
1949	 */
1950winuf_ok:
1951	rd	%psr, %l0
1952	sll	%l1, %l0, %l1
1953	wr	%l1, 0, %wim		! make this one invalid
1954	sethi	%hi(_cpcb), %l2
1955	ld	[%l2 + %lo(_cpcb)], %l2
1956	and	%l0, 31, %l0
1957	st	%l0, [%l2 + PCB_WIM]	! cpcb->pcb_wim = cwp;
1958	save	%g0, %g0, %g0		! back to I
1959	LOADWIN(%sp)
1960	save	%g0, %g0, %g0		! back to R
1961	save	%g0, %g0, %g0		! back to T
1962	wr	%l0, 0, %psr		! restore condition codes
1963	nop				! it takes three to tangle
1964	RETT
1965#endif /* end `real' version of window underflow trap handler */
1966
1967/*
1968 * Various return-from-trap routines (see return_from_trap).
1969 */
1970
1971/*
1972 * Return from trap, to kernel.
1973 *	%l0 = %psr
1974 *	%l1 = return pc
1975 *	%l2 = return npc
1976 *	%l4 = %wim
1977 *	%l5 = bit for previous window
1978 */
1979rft_kernel:
1980	btst	%l5, %l4		! if (wim & l5)
1981	bnz	1f			!	goto reload;
1982	 wr	%l0, 0, %psr		! but first put !@#*% cond codes back
1983
1984	/* previous window is valid; just rett */
1985	nop				! wait for cond codes to settle in
1986	RETT
1987
1988	/*
1989	 * Previous window is invalid.
1990	 * Update %wim and then reload l0..i7 from frame.
1991	 *
1992	 *	  T I X
1993	 *	0 0 1 0 0   (%wim)
1994	 * [see picture in window_uf handler]
1995	 *
1996	 * T is the current (Trap) window, I is the Invalid window,
1997	 * and X is the window we want to make invalid.  Window X
1998	 * currently has no useful values.
1999	 */
20001:
2001	wr	%g0, 0, %wim		! allow us to enter window I
2002	nop; nop; nop			! (it takes a while)
2003	restore				! enter window I
2004	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
2005	rd	%psr, %l0		! CWP = %psr & 31;
2006	and	%l0, 31, %l0
2007	sll	%l1, %l0, %l1		! wim = 1 << CWP;
2008	wr	%l1, 0, %wim		! setwim(wim);
2009	sethi	%hi(_cpcb), %l1
2010	ld	[%l1 + %lo(_cpcb)], %l1
2011	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = l0 & 31;
2012	save	%g0, %g0, %g0		! back to window I
2013	LOADWIN(%sp)
2014	save	%g0, %g0, %g0		! back to window T
2015	/*
2016	 * Note that the condition codes are still set from
2017	 * the code at rft_kernel; we can simply return.
2018	 */
2019	RETT
2020
2021/*
2022 * Return from trap, to user.  Checks for scheduling trap (`ast') first;
2023 * will re-enter trap() if set.  Note that we may have to switch from
2024 * the interrupt stack to the kernel stack in this case.
2025 *	%l0 = %psr
2026 *	%l1 = return pc
2027 *	%l2 = return npc
2028 *	%l4 = %wim
2029 *	%l5 = bit for previous window
2030 *	%l6 = cpcb
2031 * If returning to a valid window, just set psr and return.
2032 */
2033rft_user:
2034!	sethi	%hi(_want_ast), %l7	! (done below)
2035	ld	[%l7 + %lo(_want_ast)], %l7
2036	tst	%l7			! want AST trap?
2037	bne,a	softtrap		! yes, re-enter trap with type T_AST
2038	 mov	T_AST, %o0
2039
2040	btst	%l5, %l4		! if (wim & l5)
2041	bnz	1f			!	goto reload;
2042	 wr	%l0, 0, %psr		! restore cond codes
2043	nop				! (three instruction delay)
2044	RETT
2045
2046	/*
2047	 * Previous window is invalid.
2048	 * Before we try to load it, we must verify its stack pointer.
2049	 * This is much like the underflow handler, but a bit easier
2050	 * since we can use our own local registers.
2051	 */
20521:
2053	btst	7, %fp			! if unaligned, address is invalid
2054	bne	rft_invalid
2055	 EMPTY
2056
2057	PTE_OF_ADDR(%fp, %l7, rft_invalid)
2058	CMP_PTE_USER_READ(%l7)		! try first page
2059	bne	rft_invalid		! no good
2060	 EMPTY
2061	SLT_IF_1PAGE_RW(%fp, %l7)
2062	bl,a	rft_user_ok		! only 1 page: ok
2063	 wr	%g0, 0, %wim
2064	add	%fp, 7*8, %l5
2065	PTE_OF_ADDR(%l5, %l7, rft_invalid)
2066	CMP_PTE_USER_READ(%l7)		! check 2nd page too
2067	be,a	rft_user_ok
2068	 wr	%g0, 0, %wim
2069
2070	/*
2071	 * The window we wanted to pull could not be pulled.  Instead,
2072	 * re-enter trap with type T_RWRET.  This will pull the window
2073	 * into cpcb->pcb_rw[0] and set cpcb->pcb_nsaved to -1, which we
2074	 * will detect when we try to return again.
2075	 */
2076rft_invalid:
2077	b	softtrap
2078	 mov	T_RWRET, %o0
2079
2080	/*
2081	 * The window we want to pull can be pulled directly.
2082	 */
2083rft_user_ok:
2084!	wr	%g0, 0, %wim		! allow us to get into it
2085	wr	%l0, 0, %psr		! fix up the cond codes now
2086	nop; nop; nop
2087	restore				! enter window I
2088	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
2089	rd	%psr, %l0		! l0 = (junk << 5) + CWP;
2090	sll	%l1, %l0, %l1		! %wim = 1 << CWP;
2091	wr	%l1, 0, %wim
2092	sethi	%hi(_cpcb), %l1
2093	ld	[%l1 + %lo(_cpcb)], %l1
2094	and	%l0, 31, %l0
2095	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = l0 & 31;
2096	save	%g0, %g0, %g0		! back to window I
2097	LOADWIN(%sp)			! suck hard
2098	save	%g0, %g0, %g0		! back to window T
2099	RETT
2100
2101/*
2102 * Return from trap.  Entered after a
2103 *	wr	%l0, 0, %psr
2104 * which disables traps so that we can rett; registers are:
2105 *
2106 *	%l0 = %psr
2107 *	%l1 = return pc
2108 *	%l2 = return npc
2109 *
2110 * (%l3..%l7 anything).
2111 *
2112 * If we are returning to user code, we must:
2113 *  1.  Check for register windows in the pcb that belong on the stack.
2114 *	If there are any, reenter trap with type T_WINOF.
2115 *  2.  Make sure the register windows will not underflow.  This is
2116 *	much easier in kernel mode....
2117 */
2118return_from_trap:
2119!	wr	%l0, 0, %psr		! disable traps so we can rett
2120! (someone else did this already)
2121	and	%l0, 31, %l5
2122	set	wmask, %l6
2123	ldub	[%l6 + %l5], %l5	! %l5 = 1 << ((CWP + 1) % nwindows)
2124	btst	PSR_PS, %l0		! returning to userland?
2125	bnz	rft_kernel		! no, go return to kernel
2126	 rd	%wim, %l4		! (read %wim in any case)
2127
2128rft_user_or_recover_pcb_windows:
2129	/*
2130	 * (entered with %l4=%wim, %l5=wmask[cwp]; %l0..%l2 as usual)
2131	 *
2132	 * check cpcb->pcb_nsaved:
2133	 * if 0, do a `normal' return to user (see rft_user);
2134	 * if > 0, cpcb->pcb_rw[] holds registers to be copied to stack;
2135	 * if -1, cpcb->pcb_rw[0] holds user registers for rett window
2136	 * from an earlier T_RWRET pseudo-trap.
2137	 */
2138	sethi	%hi(_cpcb), %l6
2139	ld	[%l6 + %lo(_cpcb)], %l6
2140	ld	[%l6 + PCB_NSAVED], %l7
2141	tst	%l7
2142	bz,a	rft_user
2143	 sethi	%hi(_want_ast), %l7	! first instr of rft_user
2144
2145	bg,a	softtrap		! if (pcb_nsaved > 0)
2146	 mov	T_WINOF, %o0		!	trap(T_WINOF);
2147
2148	/*
2149	 * To get here, we must have tried to return from a previous
2150	 * trap and discovered that it would cause a window underflow.
2151	 * We then must have tried to pull the registers out of the
2152	 * user stack (from the address in %fp==%i6) and discovered
2153	 * that it was either unaligned or not loaded in memory, and
2154	 * therefore we ran a trap(T_RWRET), which loaded one set of
2155	 * registers into cpcb->pcb_pcb_rw[0] (if it had killed the
2156	 * process due to a bad stack, we would not be here).
2157	 *
2158	 * We want to load pcb_rw[0] into the previous window, which
2159	 * we know is currently invalid.  In other words, we want
2160	 * %wim to be 1 << ((cwp + 2) % nwindows).
2161	 */
2162	wr	%g0, 0, %wim		! enable restores
2163	mov	%g6, %l3		! save g6 in l3
2164	mov	%l6, %g6		! set g6 = &u
2165	st	%g0, [%g6 + PCB_NSAVED]	! clear cpcb->pcb_nsaved
2166	restore				! enter window I
2167	restore	%g0, 1, %l1		! enter window X, then %l1 = 1
2168	rd	%psr, %l0
2169	sll	%l1, %l0, %l1		! %wim = 1 << CWP;
2170	wr	%l1, 0, %wim
2171	and	%l0, 31, %l0
2172	st	%l0, [%g6 + PCB_WIM]	! cpcb->pcb_wim = CWP;
2173	nop				! unnecessary? old wim was 0...
2174	save	%g0, %g0, %g0		! back to window I
2175	LOADWIN(%g6 + PCB_RW)
2176	save	%g0, %g0, %g0		! back to window T (trap window)
2177	wr	%l0, 0, %psr		! cond codes, cond codes everywhere
2178	mov	%l3, %g6		! restore g6
2179	RETT
2180
2181! exported end marker for kernel gdb
2182	.globl	_endtrapcode
2183_endtrapcode:
2184
2185/*
2186 * init_tables(nwin) int nwin;
2187 *
2188 * Set up the uwtab and wmask tables.
2189 * We know nwin > 1.
2190 */
2191init_tables:
2192	/*
2193	 * for (i = -nwin, j = nwin - 2; ++i < 0; j--)
2194	 *	uwtab[i] = j;
2195	 * (loop runs at least once)
2196	 */
2197	set	uwtab, %o3
2198	sub	%g0, %o0, %o1		! i = -nwin + 1
2199	inc	%o1
2200	add	%o0, -2, %o2		! j = nwin - 2;
22010:
2202	stb	%o2, [%o3 + %o1]	! uwtab[i] = j;
22031:
2204	inccc	%o1			! ++i < 0?
2205	bl	0b			! yes, continue loop
2206	 dec	%o2			! in any case, j--
2207
2208	/*
2209	 * (i now equals 0)
2210	 * for (j = nwin - 1; i < nwin; i++, j--)
2211	 *	uwtab[i] = j;
2212	 * (loop runs at least twice)
2213	 */
2214	sub	%o0, 1, %o2		! j = nwin - 1
22150:
2216	stb	%o2, [%o3 + %o1]	! uwtab[i] = j
2217	inc	%o1			! i++
22181:
2219	cmp	%o1, %o0		! i < nwin?
2220	bl	0b			! yes, continue
2221	 dec	%o2			! in any case, j--
2222
2223	/*
2224	 * We observe that, for i in 0..nwin-2, (i+1)%nwin == i+1;
2225	 * for i==nwin-1, (i+1)%nwin == 0.
2226	 * To avoid adding 1, we run i from 1 to nwin and set
2227	 * wmask[i-1].
2228	 *
2229	 * for (i = j = 1; i < nwin; i++) {
2230	 *	j <<= 1;	(j now == 1 << i)
2231	 *	wmask[i - 1] = j;
2232	 * }
2233	 * (loop runs at least once)
2234	 */
2235	set	wmask - 1, %o3
2236	mov	1, %o1			! i = 1;
2237	mov	2, %o2			! j = 2;
22380:
2239	stb	%o2, [%o3 + %o1]	! (wmask - 1)[i] = j;
2240	inc	%o1			! i++
2241	cmp	%o1, %o0		! i < nwin?
2242	bl,a	0b			! yes, continue
2243	 sll	%o2, 1, %o2		! (and j <<= 1)
2244
2245	/*
2246	 * Now i==nwin, so we want wmask[i-1] = 1.
2247	 */
2248	mov	1, %o2			! j = 1;
2249	retl
2250	 stb	%o2, [%o3 + %o1]	! (wmask - 1)[i] = j;
2251
2252
2253dostart:
2254	/*
2255	 * Startup.
2256	 *
2257	 * We have been loaded in low RAM, at some address which
2258	 * is page aligned (0x4000 actually) rather than where we
2259	 * want to run (0xf8004000).  Until we get everything set,
2260	 * we have to be sure to use only pc-relative addressing.
2261	 */
2262	wr	%g0, 0, %wim		! make sure we can set psr
2263	mov	%o0, %g7		! save prom vector pointer
2264	nop; nop
2265	wr	%g0, PSR_S|PSR_PS|PSR_PIL, %psr	! set initial psr
2266	set	AC_CONTEXT, %g1		! paranoia: set context to kernel
2267	stba	%g0, [%g1] ASI_CONTROL
2268	wr	%g0, 2, %wim		! set initial %wim (w1 invalid)
2269	mov	1, %g1			! set pcb_wim (log2(%wim) = 1)
2270	sethi	%hi(_u0 + PCB_WIM), %g2
2271	st	%g1, [%g2 + %lo(_u0 + PCB_WIM)]
2272
2273	/*
2274	 * Step 1: double map low RAM (addresses [0.._end-start-1])
2275	 * to KERNBASE (addresses [KERNBASE.._end-1]).  None of these
2276	 * are `bad' aliases (since they are all on segment boundaries)
2277	 * so we do not have to worry about cache aliasing.
2278	 *
2279	 * We map in another couple of segments just to have some
2280	 * more memory (512K, actually) guaranteed available for
2281	 * bootstrap code (pmap_bootstrap needs memory to hold MMU
2282	 * and context data structures).
2283	 */
2284	clr	%l0			! lowva
2285	set	KERNBASE, %l1		! highva
2286	set	_end + (2 << 18), %l2	! last va that must be remapped
2287	set	1 << 18, %l3		! segment size in bytes
22880:
2289	lduba	[%l0] ASI_SEGMAP, %l4	! segmap[highva] = segmap[lowva];
2290	stba	%l4, [%l1] ASI_SEGMAP
2291	add	%l3, %l1, %l1		! highva += segsiz;
2292	cmp	%l1, %l2		! done?
2293	bl	0b			! no, loop
2294	 add	%l3, %l0, %l0		! (and lowva += segsz)
2295
2296	/*
2297	 * Now map the interrupt enable register and clear any interrupts,
2298	 * enabling NMIs.  Note that we will not take NMIs until we change
2299	 * %tbr.
2300	 */
2301	set	IE_reg_addr, %l0
2302	set	IE_REG_PTE, %l1
2303	sta	%l1, [%l0] ASI_PTE
2304	mov	IE_ALLIE, %l1
2305	nop; nop			! paranoia
2306	stb	%l1, [%l0]
2307
2308	/*
2309	 * All set, fix pc and npc.  Once we are where we should be,
2310	 * we can give ourselves a stack and enable traps.
2311	 */
2312	set	1f, %l0
2313	jmp	%l0
2314	 nop
23151:
2316	set	USRSTACK - CCFSZ, %fp	! as if called from user code
2317	set	estack0 - CCFSZ - 80, %sp ! via syscall(boot_me_up) or somesuch
2318	rd	%psr, %l0
2319	wr	%l0, PSR_ET, %psr
2320
2321	/*
2322	 * Step 2: clear BSS.  This may just be paranoia; the boot
2323	 * loader might already do it for us; but what the hell.
2324	 */
2325	set	_edata, %o0		! bzero(edata, end - edata)
2326	set	_end, %o1
2327	call	_bzero
2328	 sub	%o1, %o0, %o1
2329
2330	/*
2331	 * Stash prom vectors now, after bzero, as it lives in bss
2332	 * (which we just zeroed).
2333	 * This depends on the fact that bzero does not use %g7.
2334	 */
2335	sethi	%hi(_promvec), %l0
2336	st	%g7, [%l0 + %lo(_promvec)]
2337
2338	/*
2339	 * Step 3: compute number of windows and set up tables.
2340	 * We could do some of this later.
2341	 */
2342	save	%sp, -64, %sp
2343	rd	%psr, %g1
2344	restore
2345	and	%g1, 31, %g1		! want just the CWP bits
2346	add	%g1, 1, %o0		! compute nwindows
2347	sethi	%hi(_nwindows), %o1	! may as well tell everyone
2348	call	init_tables
2349	 st	%o0, [%o1 + %lo(_nwindows)]
2350
2351	/*
2352	 * Step 4: change the trap base register, now that our trap handlers
2353	 * will function (they need the tables we just set up).
2354	 */
2355	set	_trapbase, %l0
2356	wr	%l0, 0, %tbr
2357	nop				! paranoia
2358
2359	/*
2360	 * Bootstrap, call main, and `return' from a fake trap into `icode'.
2361	 */
2362	call	_bootstrap
2363	 nop
2364	call	_main
2365	 nop
2366
2367	/*
2368	 * Return from main means we are process 1 and need to
2369	 * jump to the `icode' (which appears below) which has
2370	 * been copied in to address 0x2000 in the new process.
2371	 *
2372	 * Our stack & frame pointers are already set up to make it
2373	 * look like we are currently running in a trap taken from
2374	 * user mode (there is room for a full blown trapfrape),
2375	 * so all we need do is set up %l0, %l1, and %l2 and branch
2376	 * to return_from_trap.
2377	 */
2378	mov	PSR_S, %l0		! user psr
2379#define XADDR 0x2000 /* XXX */
2380	set	XADDR, %l1		! pc
2381	add	%l1, 4, %l2		! npc
2382	b	return_from_trap
2383	 wr	%l0, 0, %psr		! just like syscall()
2384
2385/*
2386 * Icode is copied out to process 1 to exec init.
2387 * If the exec fails, process 1 exits.
2388 *
2389 * We depend here on the fact that we are copied to address XADDR
2390 * (but so does the code above).
2391 */
2392	.globl	_icode, _szicode
2393_icode:
2394	mov	SYS_execve, %g1
2395	set	init - _icode + XADDR, %o0	! actual location of pathname
2396	set	argv - _icode + XADDR, %o1	! ... and of argv
2397	clr	%o2				! no environment
2398	t	ST_SYSCALL
2399	mov	SYS_exit, %g1
2400	t	ST_SYSCALL
2401
2402init:	.ascii	"/sbin/"
2403init1:	.asciz	"init"
2404	ALIGN
2405argv:	.word	init1 - _icode + XADDR
2406	.word	initflags - _icode + XADDR
2407	.word	0
2408initflags:
2409_szicode:
2410	.long	_szicode - _icode
2411#undef XADDR
2412
2413/*
2414 * The following code is copied to the top of the user stack when each
2415 * process is exec'ed, and signals are `trampolined' off it.
2416 *
2417 * When this code is run, the stack looks like:
2418 *	[%sp]		64 bytes to which registers can be dumped
2419 *	[%sp + 64]	signal number (goes in %o0)
2420 *	[%sp + 64 + 4]	signal code (goes in %o1)
2421 *	[%sp + 64 + 8]	placeholder
2422 *	[%sp + 64 + 12]	argument for %o3, currently unsupported (always 0)
2423 *	[%sp + 64 + 16]	first word of saved state (sigcontext)
2424 *	    .
2425 *	    .
2426 *	    .
2427 *	[%sp + NNN]	last word of saved state
2428 * (followed by previous stack contents or top of signal stack).
2429 * The address of the function to call is in %g1; the old %g1 and %o0
2430 * have already been saved in the sigcontext.  We are running
2431 * in the window that was active when the signal occurred.
2432 *
2433 * Note that [%sp + 64 + 8] == %sp + 64 + 16.  The copy at %sp+64+8
2434 * will eventually be removed, with a hole left in its place, if things
2435 * work out.
2436 */
2437	.globl	_sigcode
2438	.globl	_esigcode
2439_sigcode:
2440	/*
2441	 * Save the %i,%l,%o registers (renaming %o as %i) and set up to
2442	 * call C code (CCFSZ).  At the same time, make room on the stack
2443	 * for 32 %f registers + %fsr.  This comes out to 33*4 or 132 bytes,
2444	 * but this must be aligned to a multiple of 8, or 136 bytes.
2445	 */
2446	save	%sp, -CCFSZ - 136, %sp
2447	mov	%g2, %l2		! save globals in %l registers
2448	mov	%g3, %l3
2449	mov	%g4, %l4
2450	mov	%g5, %l5
2451	mov	%g6, %l6
2452	mov	%g7, %l7
2453	/*
2454	 * Saving the fpu registers is expensive, so do it iff the fsr
2455	 * stored in the sigcontext shows that the fpu is enabled.
2456	 */
2457	ld	[%fp + 64 + 16 + SC_PSR_OFFSET], %l0
2458	sethi	%hi(PSR_EF), %l1	! FPU enable bit is too high for andcc
2459	andcc	%l0, %l1, %l0		! %l0 = fpu enable bit
2460	be	1f			! if not set, skip the saves
2461	 rd	%y, %l1			! in any case, save %y
2462
2463	! fpu is enabled, oh well
2464	st	%fsr, [%sp + CCFSZ + 0]
2465	std	%f0, [%sp + CCFSZ + 8]
2466	std	%f2, [%sp + CCFSZ + 16]
2467	std	%f4, [%sp + CCFSZ + 24]
2468	std	%f6, [%sp + CCFSZ + 32]
2469	std	%f8, [%sp + CCFSZ + 40]
2470	std	%f10, [%sp + CCFSZ + 48]
2471	std	%f12, [%sp + CCFSZ + 56]
2472	std	%f14, [%sp + CCFSZ + 64]
2473	std	%f16, [%sp + CCFSZ + 72]
2474	std	%f18, [%sp + CCFSZ + 80]
2475	std	%f20, [%sp + CCFSZ + 88]
2476	std	%f22, [%sp + CCFSZ + 96]
2477	std	%f24, [%sp + CCFSZ + 104]
2478	std	%f26, [%sp + CCFSZ + 112]
2479	std	%f28, [%sp + CCFSZ + 120]
2480	std	%f30, [%sp + CCFSZ + 128]
2481
24821:
2483	ldd	[%fp + 64], %o0		! sig, code
2484	ld	[%fp + 76], %o3		! arg3
2485	call	%g1			! (*sa->sa_handler)(sig,code,scp,arg3)
2486	 add	%fp, 64 + 16, %o2	! scp
2487
2488	/*
2489	 * Now that the handler has returned, re-establish all the state
2490	 * we just saved above, then do a sigreturn.
2491	 */
2492	tst	%l0			! reload fpu registers?
2493	be	1f			! if not, skip the loads
2494	 wr	%l1, %g0, %y		! in any case, restore %y
2495
2496	ld	[%sp + CCFSZ + 0], %fsr
2497	ldd	[%sp + CCFSZ + 8], %f0
2498	ldd	[%sp + CCFSZ + 16], %f2
2499	ldd	[%sp + CCFSZ + 24], %f4
2500	ldd	[%sp + CCFSZ + 32], %f6
2501	ldd	[%sp + CCFSZ + 40], %f8
2502	ldd	[%sp + CCFSZ + 48], %f10
2503	ldd	[%sp + CCFSZ + 56], %f12
2504	ldd	[%sp + CCFSZ + 64], %f14
2505	ldd	[%sp + CCFSZ + 72], %f16
2506	ldd	[%sp + CCFSZ + 80], %f18
2507	ldd	[%sp + CCFSZ + 88], %f20
2508	ldd	[%sp + CCFSZ + 96], %f22
2509	ldd	[%sp + CCFSZ + 104], %f24
2510	ldd	[%sp + CCFSZ + 112], %f26
2511	ldd	[%sp + CCFSZ + 120], %f28
2512	ldd	[%sp + CCFSZ + 128], %f30
2513
25141:
2515	mov	%l2, %g2
2516	mov	%l3, %g3
2517	mov	%l4, %g4
2518	mov	%l5, %g5
2519	mov	%l6, %g6
2520	mov	%l7, %g7
2521
2522	restore	%g0, SYS_sigreturn, %g1	! get registers back & set syscall #
2523	add	%sp, 64 + 16, %o0	! compute scp
2524	t	ST_SYSCALL		! sigreturn(scp)
2525	! sigreturn does not return unless it fails
2526	mov	SYS_exit, %g1		! exit(errno)
2527	t	ST_SYSCALL
2528_esigcode:
2529
2530/*
2531 * Primitives
2532 */
2533
2534#ifdef GPROF
2535	.globl	mcount
2536#define	ENTRY(x) \
2537	.globl _##x; _##x: \
2538	save	%sp, -CCFSZ, %sp; \
2539	call	mcount; \
2540	nop; \
2541	restore
2542#else
2543#define	ENTRY(x)	.globl _##x; _##x:
2544#endif
2545#define	ALTENTRY(x)	.globl _##x; _##x:
2546
2547/*
2548 * copyinstr(fromaddr, toaddr, maxlength, &lencopied)
2549 *
2550 * Copy a null terminated string from the user address space into
2551 * the kernel address space.
2552 */
2553ENTRY(copyinstr)
2554	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
2555#ifdef DIAGNOSTIC
2556	tst	%o2			! kernel should never give maxlen <= 0
2557	ble	1f
2558	 EMPTY
2559#endif
2560	set	KERNBASE, %o4
2561	cmp	%o0, %o4		! fromaddr < KERNBASE?
2562	blu,a	Lcsdocopy		! yes, go do it
2563	sethi	%hi(_cpcb), %o4		! (first instr of copy)
2564
2565	b	Lcsdone			! no, return EFAULT
2566	 mov	EFAULT, %o0
2567
25681:
2569	sethi	%hi(2f), %o0
2570	call	_panic
2571	 or	%lo(2f), %o0, %o0
25722:	.asciz	"copyinstr"
2573	ALIGN
2574
2575/*
2576 * copyoutstr(fromaddr, toaddr, maxlength, &lencopied)
2577 *
2578 * Copy a null terminated string from the kernel
2579 * address space to the user address space.
2580 */
2581ENTRY(copyoutstr)
2582	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
2583#ifdef DIAGNOSTIC
2584	tst	%o2
2585	ble	1f
2586	 EMPTY
2587#endif
2588	set	KERNBASE, %o4
2589	cmp	%o1, %o4		! toaddr < KERNBASE?
2590	blu,a	Lcsdocopy		! yes, go do it
2591	 sethi	%hi(_cpcb), %o4		! (first instr of copy)
2592
2593	b	Lcsdone			! no, return EFAULT
2594	 mov	EFAULT, %o0
2595
25961:
2597	sethi	%hi(2f), %o0
2598	call	_panic
2599	 or	%lo(2f), %o0, %o0
26002:	.asciz	"copyoutstr"
2601	ALIGN
2602
2603Lcsdocopy:
2604!	sethi	%hi(_cpcb), %o4		! (done earlier)
2605	ld	[%o4 + %lo(_cpcb)], %o4	! catch faults
2606	set	Lcsfault, %o5
2607	st	%o5, [%o4 + PCB_ONFAULT]
2608
2609	mov	%o1, %o5		!	save = toaddr;
2610! XXX should do this in bigger chunks when possible
26110:					! loop:
2612	ldsb	[%o0], %g1		!	c = *fromaddr;
2613	tst	%g1
2614	stb	%g1, [%o1]		!	*toaddr++ = c;
2615	be	1f			!	if (c == NULL)
2616	 inc	%o1			!		goto ok;
2617	deccc	%o2			!	if (--len > 0) {
2618	bg	0b			!		fromaddr++;
2619	 inc	%o0			!		goto loop;
2620					!	}
2621					!
2622	b	Lcsdone			!	error = ENAMETOOLONG;
2623	 mov	ENAMETOOLONG, %o0	!	goto done;
26241:					! ok:
2625	clr	%o0			!    error = 0;
2626Lcsdone:				! done:
2627	sub	%o1, %o5, %o1		!	len = to - save;
2628	tst	%o3			!	if (lencopied)
2629	bnz,a	3f
2630	 st	%o1, [%o3]		!		*lencopied = len;
26313:
2632	retl				! cpcb->pcb_onfault = 0;
2633	 st	%g0, [%o4 + PCB_ONFAULT]! return (error);
2634
2635Lcsfault:
2636	b	Lcsdone			! error = EFAULT;
2637	 mov	EFAULT, %o0		! goto ret;
2638
2639/*
2640 * copystr(fromaddr, toaddr, maxlength, &lencopied)
2641 *
2642 * Copy a null terminated string from one point to another in
2643 * the kernel address space.  (This is a leaf procedure, but
2644 * it does not seem that way to the C compiler.)
2645 */
2646ENTRY(copystr)
2647#ifdef DIAGNOSTIC
2648	tst	%o2			! 	if (maxlength <= 0)
2649	ble	4f			!		panic(...);
2650	 EMPTY
2651#endif
2652	mov	%o1, %o5		!	to0 = to;
26530:					! loop:
2654	ldsb	[%o0], %o4		!	c = *from;
2655	tst	%o4
2656	stb	%o4, [%o1]		!	*to++ = c;
2657	be	1f			!	if (c == 0)
2658	 inc	%o1			!		goto ok;
2659	deccc	%o2			!	if (--len > 0) {
2660	bg,a	0b			!		from++;
2661	 inc	%o0			!		goto loop;
2662	b	2f			!	}
2663	 mov	ENAMETOOLONG, %o0	!	ret = ENAMETOOLONG; goto done;
26641:					! ok:
2665	clr	%o0			!	ret = 0;
26662:
2667	sub	%o1, %o5, %o1		!	len = to - to0;
2668	tst	%o3			!	if (lencopied)
2669	bnz,a	3f
2670	 st	%o1, [%o3]		!		*lencopied = len;
26713:
2672	retl
2673	 nop
2674#ifdef DIAGNOSTIC
26754:
2676	sethi	%hi(5f), %o0
2677	call	_panic
2678	 or	%lo(5f), %o0, %o0
26795:
2680	.asciz	"copystr"
2681	ALIGN
2682#endif
2683
2684/*
2685 * Copyin(src, dst, len)
2686 *
2687 * Copy specified amount of data from user space into the kernel.
2688 */
2689ENTRY(copyin)
2690	set	KERNBASE, %o3
2691	cmp	%o0, %o3		! src < KERNBASE?
2692	blu,a	Ldocopy			! yes, can try it
2693	 sethi	%hi(_cpcb), %o3
2694
2695	/* source address points into kernel space: return EFAULT */
2696	retl
2697	 mov	EFAULT, %o0
2698
2699/*
2700 * Copyout(src, dst, len)
2701 *
2702 * Copy specified amount of data from kernel to user space.
2703 * Just like copyin, except that the `dst' addresses are user space
2704 * rather than the `src' addresses.
2705 */
2706ENTRY(copyout)
2707	set	KERNBASE, %o3
2708	cmp	%o1, %o3		! dst < KERBASE?
2709	blu,a	Ldocopy
2710	 sethi	%hi(_cpcb), %o3
2711
2712	/* destination address points into kernel space: return EFAULT */
2713	retl
2714	 mov	EFAULT, %o0
2715
2716	/*
2717	 * ******NOTE****** this depends on bcopy() not using %g7
2718	 */
2719Ldocopy:
2720!	sethi	%hi(_cpcb), %o3
2721	ld	[%o3 + %lo(_cpcb)], %o3
2722	set	Lcopyfault, %o4
2723	mov	%o7, %g7		! save return address
2724	call	_bcopy			! bcopy(src, dst, len)
2725	 st	%o4, [%o3 + PCB_ONFAULT]
2726
2727	sethi	%hi(_cpcb), %o3
2728	ld	[%o3 + %lo(_cpcb)], %o3
2729	st	%g0, [%o3 + PCB_ONFAULT]
2730	jmp	%g7 + 8
2731	 clr	%o0			! return 0
2732
2733! Copyin or copyout fault.  Clear cpcb->pcb_onfault and return EFAULT.
2734! Note that although we were in bcopy, there is no state to clean up;
2735! the only special thing is that we have to return to [g7 + 8] rather than
2736! [o7 + 8].
2737Lcopyfault:
2738	sethi	%hi(_cpcb), %o3
2739	ld	[%o3 + %lo(_cpcb)], %o3
2740	st	%g0, [%o3 + PCB_ONFAULT]
2741	jmp	%g7 + 8
2742	 mov	EFAULT, %o0
2743
2744
2745/*
2746 * Write all user windows presently in the CPU back to the user's stack.
2747 * We just do `save' instructions until pcb_uw == 0.
2748 *
2749 *	p = cpcb;
2750 *	nsaves = 0;
2751 *	while (p->pcb_uw > 0)
2752 *		save(), nsaves++;
2753 *	while (--nsaves >= 0)
2754 *		restore();
2755 */
2756ENTRY(write_user_windows)
2757	sethi	%hi(_cpcb), %g6
2758	ld	[%g6 + %lo(_cpcb)], %g6
2759	b	2f
2760	 clr	%g5
27611:
2762	save	%sp, -64, %sp
27632:
2764	ld	[%g6 + PCB_UW], %g7
2765	tst	%g7
2766	bg,a	1b
2767	 inc	%g5
27683:
2769	deccc	%g5
2770	bge,a	3b
2771	 restore
2772	retl
2773	 nop
2774
2775
2776	.comm	_want_resched,4
2777/*
2778 * Masterpaddr is the p->p_addr of the last process on the processor.
2779 * XXX masterpaddr is almost the same as cpcb
2780 * XXX should delete this entirely
2781 */
2782	.comm	_masterpaddr, 4
2783
2784/*
2785 * Switch statistics (for later tweaking):
2786 *	nswitchdiff = p1 => p2 (i.e., chose different process)
2787 *	nswitchexit = number of calls to swtchexit()
2788 *	_cnt.v_swtch = total calls to swtch+swtchexit
2789 */
2790	.comm	_nswitchdiff, 4
2791	.comm	_nswitchexit, 4
2792
2793/*
2794 * REGISTER USAGE IN swtch AND swtchexit:
2795 * This is split into two phases, more or less
2796 * `before we locate a new proc' and `after'.
2797 * Some values are the same in both phases.
2798 * Note that the %o0-registers are not preserved across
2799 * the psr change when entering a new process, since this
2800 * usually changes the CWP field (hence heavy usage of %g's).
2801 *
2802 *	%g1 = oldpsr (excluding ipl bits)
2803 *	%g2 = %hi(_whichqs); newpsr
2804 *	%g3 = p
2805 *	%g4 = lastproc
2806 *	%g5 = <free>; newpcb
2807 *	%g6 = %hi(_cpcb)
2808 *	%g7 = %hi(_curproc)
2809 *	%o0 = tmp 1
2810 *	%o1 = tmp 2
2811 *	%o2 = tmp 3
2812 *	%o3 = tmp 4; whichqs; vm
2813 *	%o4 = tmp 4; which; sswap
2814 *	%o5 = tmp 5; q; <free>
2815 */
2816
2817/*
2818 * swtchexit is called only from cpu_exit() before the current process
2819 * has freed its kernel stack; we must free it.  (curproc is already NULL.)
2820 *
2821 * We lay the process to rest by changing to the `idle' kernel stack,
2822 * and note that the `last loaded process' is nonexistent.
2823 */
2824ENTRY(swtchexit)
2825	mov	%o0, %g2		! save the
2826	mov	%o1, %g3		! ... three parameters
2827	mov	%o2, %g4		! ... to kmem_free
2828
2829	/*
2830	 * Change pcb to idle u. area, i.e., set %sp to top of stack
2831	 * and %psr to PSR_S|PSR_ET, and set cpcb to point to _idle_u.
2832	 * Once we have left the old stack, we can call kmem_free to
2833	 * destroy it.  Call it any sooner and the register windows
2834	 * go bye-bye.
2835	 */
2836	set	_idle_u, %g5
2837	sethi	%hi(_cpcb), %g6
2838	mov	1, %g7
2839	wr	%g0, PSR_S, %psr	! change to window 0, traps off
2840	wr	%g0, 2, %wim		! and make window 1 the trap window
2841	st	%g5, [%g6 + %lo(_cpcb)]	! cpcb = &idle_u
2842	st	%g7, [%g5 + PCB_WIM]	! idle_u.pcb_wim = log2(2) = 1
2843	set	_idle_u + UPAGES * NBPG - CCFSZ, %sp	! set new %sp
2844#ifdef DEBUG
2845	set	_idle_u, %l6
2846	SET_SP_REDZONE(%l6, %l5)
2847#endif
2848	wr	%g0, PSR_S|PSR_ET, %psr	! and then enable traps
2849	mov	%g2, %o0		! now ready to call kmem_free
2850	mov	%g3, %o1
2851	call	_kmem_free
2852	 mov	%g4, %o2
2853
2854	/*
2855	 * Now fall through to `the last swtch'.  %g6 was set to
2856	 * %hi(_cpcb), but may have been clobbered in kmem_free,
2857	 * so all the registers described below will be set here.
2858	 *
2859	 * REGISTER USAGE AT THIS POINT:
2860	 *	%g1 = oldpsr (excluding ipl bits)
2861	 *	%g2 = %hi(_whichqs)
2862	 *	%g4 = lastproc
2863	 *	%g6 = %hi(_cpcb)
2864	 *	%g7 = %hi(_curproc)
2865	 *	%o0 = tmp 1
2866	 *	%o1 = tmp 2
2867	 *	%o3 = whichqs
2868	 */
2869
2870	INCR(_nswitchexit)		! nswitchexit++;
2871	INCR(_cnt+V_SWTCH)		! cnt.v_switch++;
2872
2873	mov	PSR_S|PSR_ET, %g1	! oldpsr = PSR_S | PSR_ET;
2874	sethi	%hi(_whichqs), %g2
2875	clr	%g4			! lastproc = NULL;
2876	sethi	%hi(_cpcb), %g6
2877	sethi	%hi(_curproc), %g7
2878	/* FALLTHROUGH */
2879
2880/*
2881 * When no processes are on the runq, swtch
2882 * idles here watiing for something to come ready.
2883 * The registers are set up as noted above.
2884 */
2885	.globl	idle
2886idle:
2887	st	%g0, [%g7 + %lo(_curproc)] ! curproc = NULL;
2888	wr	%g1, 0, %psr		! (void) spl0();
28891:					! spin reading _whichqs until nonzero
2890	ld	[%g2 + %lo(_whichqs)], %o3
2891	tst	%o3
2892	bnz,a	Lsw_scan
2893	 wr	%g1, PIL_CLOCK << 8, %psr	! (void) splclock();
2894	b,a	1b
2895
2896Lsw_panic_rq:
2897	sethi	%hi(1f), %o0
2898	call	_panic
2899	 or	%lo(1f), %o0, %o0
2900Lsw_panic_wchan:
2901	sethi	%hi(2f), %o0
2902	call	_panic
2903	 or	%lo(2f), %o0, %o0
2904Lsw_panic_srun:
2905	sethi	%hi(3f), %o0
2906	call	_panic
2907	 or	%lo(3f), %o0, %o0
29081:	.asciz	"swtch rq"
29092:	.asciz	"swtch wchan"
29103:	.asciz	"swtch SRUN"
2911	ALIGN
2912
2913/*
2914 * cpu_swtch() picks a process to run and runs it, saving the current
2915 * one away.  On the assumption that (since most workstations are
2916 * single user machines) the chances are quite good that the new
2917 * process will turn out to be the current process, we defer saving
2918 * it here until we have found someone to load.  If that someone
2919 * is the current process we avoid both store and load.
2920 *
2921 * cpu_swtch() is always entered at splstatclock or splhigh.
2922 *
2923 * IT MIGHT BE WORTH SAVING BEFORE ENTERING idle TO AVOID HAVING TO
2924 * SAVE LATER WHEN SOMEONE ELSE IS READY ... MUST MEASURE!
2925 */
2926	.globl	_runtime
2927	.globl	_time
2928ENTRY(cpu_swtch)
2929	/*
2930	 * REGISTER USAGE AT THIS POINT:
2931	 *	%g1 = oldpsr (excluding ipl bits)
2932	 *	%g2 = %hi(_whichqs)
2933	 *	%g3 = p
2934	 *	%g4 = lastproc
2935	 *	%g5 = tmp 0
2936	 *	%g6 = %hi(_cpcb)
2937	 *	%g7 = %hi(_curproc)
2938	 *	%o0 = tmp 1
2939	 *	%o1 = tmp 2
2940	 *	%o2 = tmp 3
2941	 *	%o3 = tmp 4, then at Lsw_scan, whichqs
2942	 *	%o4 = tmp 5, then at Lsw_scan, which
2943	 *	%o5 = tmp 6, then at Lsw_scan, q
2944	 */
2945	sethi	%hi(_whichqs), %g2	! set up addr regs
2946	sethi	%hi(_cpcb), %g6
2947	ld	[%g6 + %lo(_cpcb)], %o0
2948	std	%o6, [%o0 + PCB_SP]	! cpcb->pcb_<sp,pc> = <sp,pc>;
2949	rd	%psr, %g1		! oldpsr = %psr;
2950	sethi	%hi(_curproc), %g7
2951	ld	[%g7 + %lo(_curproc)], %g4	! lastproc = curproc;
2952	st	%g1, [%o0 + PCB_PSR]	! cpcb->pcb_psr = oldpsr;
2953	andn	%g1, PSR_PIL, %g1	! oldpsr &= ~PSR_PIL;
2954
2955	/*
2956	 * In all the fiddling we did to get this far, the thing we are
2957	 * waiting for might have come ready, so let interrupts in briefly
2958	 * before checking for other processes.  Note that we still have
2959	 * curproc set---we have to fix this or we can get in trouble with
2960	 * the run queues below.
2961	 */
2962	st	%g0, [%g7 + %lo(_curproc)]	! curproc = NULL;
2963	wr	%g1, 0, %psr			! (void) spl0();
2964	nop; nop; nop				! paranoia
2965	wr	%g1, PIL_CLOCK <<8 , %psr	! (void) splclock();
2966
2967Lsw_scan:
2968	nop; nop; nop				! paranoia
2969	/*
2970	 * We're about to run a (possibly) new process.  Set runtime
2971	 * to indicate its start time.
2972	 */
2973	sethi	%hi(_time), %o0
2974	ldd	[%o0 + %lo(_time)], %o2
2975	sethi	%hi(_runtime), %o0
2976	std	%o2, [%o0 + %lo(_runtime)]
2977
2978	ld	[%g2 + %lo(_whichqs)], %o3
2979
2980	/*
2981	 * Optimized inline expansion of `which = ffs(whichqs) - 1';
2982	 * branches to idle if ffs(whichqs) was 0.
2983	 */
2984	set	ffstab, %o2
2985	andcc	%o3, 0xff, %o1		! byte 0 zero?
2986	bz,a	1f			! yes, try byte 1
2987	 srl	%o3, 8, %o0
2988	b	2f			! ffs = ffstab[byte0]; which = ffs - 1;
2989	 ldsb	[%o2 + %o1], %o0
29901:	andcc	%o0, 0xff, %o1		! byte 1 zero?
2991	bz,a	1f			! yes, try byte 2
2992	 srl	%o0, 8, %o0
2993	ldsb	[%o2 + %o1], %o0	! which = ffstab[byte1] + 7;
2994	b	3f
2995	 add	%o0, 7, %o4
29961:	andcc	%o0, 0xff, %o1		! byte 2 zero?
2997	bz,a	1f			! yes, try byte 3
2998	 srl	%o0, 8, %o0
2999	ldsb	[%o2 + %o1], %o0	! which = ffstab[byte2] + 15;
3000	b	3f
3001	 add	%o0, 15, %o4
30021:	ldsb	[%o2 + %o0], %o0	! ffs = ffstab[byte3] + 24
3003	addcc	%o0, 24, %o0		! (note that ffstab[0] == -24)
3004	bz	idle			! if answer was 0, go idle
3005	 EMPTY
30062:	sub	%o0, 1, %o4		! which = ffs(whichqs) - 1
30073:	/* end optimized inline expansion */
3008
3009	/*
3010	 * We found a nonempty run queue.  Take its first process.
3011	 */
3012	set	_qs, %o5		! q = &qs[which];
3013	sll	%o4, 3, %o0
3014	add	%o0, %o5, %o5
3015	ld	[%o5], %g3		! p = q->ph_link;
3016	cmp	%g3, %o5		! if (p == q)
3017	be	Lsw_panic_rq		!	panic("swtch rq");
3018	 EMPTY
3019	ld	[%g3], %o0		! tmp0 = p->p_link;
3020	st	%o0, [%o5]		! q->ph_link = tmp0;
3021	st	%o5, [%o0 + 4]		! tmp0->p_rlink = q;
3022	cmp	%o0, %o5		! if (tmp0 == q)
3023	bne	1f
3024	 EMPTY
3025	mov	1, %o1			!	whichqs &= ~(1 << which);
3026	sll	%o1, %o4, %o1
3027	andn	%o3, %o1, %o3
3028	st	%o3, [%g2 + %lo(_whichqs)]
30291:
3030	/*
3031	 * PHASE TWO: NEW REGISTER USAGE:
3032	 *	%g1 = oldpsr (excluding ipl bits)
3033	 *	%g2 = newpsr
3034	 *	%g3 = p
3035	 *	%g4 = lastproc
3036	 *	%g5 = newpcb
3037	 *	%g6 = %hi(_cpcb)
3038	 *	%g7 = %hi(_curproc)
3039	 *	%o0 = tmp 1
3040	 *	%o1 = tmp 2
3041	 *	%o2 = tmp 3
3042	 *	%o3 = vm
3043	 *	%o4 = sswap
3044	 *	%o5 = <free>
3045	 */
3046
3047	/* firewalls */
3048	ld	[%g3 + P_WCHAN], %o0	! if (p->p_wchan)
3049	tst	%o0
3050	bne	Lsw_panic_wchan		!	panic("swtch wchan");
3051	 EMPTY
3052	ldsb	[%g3 + P_STAT], %o0	! if (p->p_stat != SRUN)
3053	cmp	%o0, SRUN
3054	bne	Lsw_panic_srun		!	panic("swtch SRUN");
3055	 EMPTY
3056
3057	/*
3058	 * Committed to running process p.
3059	 * It may be the same as the one we were running before.
3060	 */
3061	sethi	%hi(_want_resched), %o0
3062	st	%g0, [%o0 + %lo(_want_resched)]	! want_resched = 0;
3063	ld	[%g3 + P_ADDR], %g5		! newpcb = p->p_addr;
3064	st	%g0, [%g3 + 4]			! p->p_rlink = NULL;
3065	ld	[%g5 + PCB_PSR], %g2		! newpsr = newpcb->pcb_psr;
3066	st	%g3, [%g7 + %lo(_curproc)]	! curproc = p;
3067
3068	cmp	%g3, %g4		! p == lastproc?
3069	be,a	Lsw_sameproc		! yes, go return 0
3070	 wr	%g2, 0, %psr		! (after restoring ipl)
3071
3072	/*
3073	 * Not the old process.  Save the old process, if any;
3074	 * then load p.
3075	 */
3076	tst	%g4
3077	be,a	Lsw_load		! if no old process, go load
3078	 wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr
3079
3080	INCR(_nswitchdiff)		! clobbers %o0,%o1
3081	/*
3082	 * save: write back all windows (including the current one).
3083	 * XXX	crude; knows nwindows <= 8
3084	 */
3085#define	SAVE save %sp, -64, %sp
3086	SAVE; SAVE; SAVE; SAVE; SAVE; SAVE; SAVE	/* 7 of each: */
3087	restore; restore; restore; restore; restore; restore; restore
3088
3089	/*
3090	 * Load the new process.  To load, we must change stacks and
3091	 * alter cpcb and %wim, hence we must disable traps.  %psr is
3092	 * currently equal to oldpsr (%g1) ^ (PIL_CLOCK << 8);
3093	 * this means that PSR_ET is on.  Likewise, PSR_ET is on
3094	 * in newpsr (%g2), although we do not know newpsr's ipl.
3095	 *
3096	 * We also must load up the `in' and `local' registers.
3097	 */
3098	wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr
3099Lsw_load:
3100!	wr	%g1, (PIL_CLOCK << 8) | PSR_ET, %psr	! done above
3101	/* compute new wim */
3102	ld	[%g5 + PCB_WIM], %o0
3103	mov	1, %o1
3104	sll	%o1, %o0, %o0
3105	wr	%o0, 0, %wim		! %wim = 1 << newpcb->pcb_wim;
3106	/* now must not change %psr for 3 more instrs */
3107/*1*/	set	PSR_EF|PSR_EC, %o0
3108/*2*/	andn	%g2, %o0, %g2		! newpsr &= ~(PSR_EF|PSR_EC);
3109/*3*/	nop
3110	/* set new psr, but with traps disabled */
3111	wr	%g2, PSR_ET, %psr	! %psr = newpsr ^ PSR_ET;
3112	/* set new cpcb */
3113	st	%g5, [%g6 + %lo(_cpcb)]	! cpcb = newpcb;
3114	/* XXX update masterpaddr too */
3115	sethi	%hi(_masterpaddr), %g7
3116	st	%g5, [%g7 + %lo(_masterpaddr)]
3117	ldd	[%g5 + PCB_SP], %o6	! <sp,pc> = newpcb->pcb_<sp,pc>
3118	/* load window */
3119	ldd	[%sp + (0*8)], %l0
3120	ldd	[%sp + (1*8)], %l2
3121	ldd	[%sp + (2*8)], %l4
3122	ldd	[%sp + (3*8)], %l6
3123	ldd	[%sp + (4*8)], %i0
3124	ldd	[%sp + (5*8)], %i2
3125	ldd	[%sp + (6*8)], %i4
3126	ldd	[%sp + (7*8)], %i6
3127#ifdef DEBUG
3128	mov	%g5, %o0
3129	SET_SP_REDZONE(%o0, %o1)
3130	CHECK_SP_REDZONE(%o0, %o1)
3131#endif
3132	/* finally, enable traps */
3133	wr	%g2, 0, %psr		! psr = newpsr;
3134
3135	/*
3136	 * Now running p.  Make sure it has a context so that it
3137	 * can talk about user space stuff.  (Its pcb_uw is currently
3138	 * zero so it is safe to have interrupts going here.)
3139	 */
3140	ld	[%g3 + P_VMSPACE], %o3	! vm = p->p_vmspace;
3141	ld	[%o3 + VM_PMAP_CTX], %o0! if (vm->vm_pmap.pm_ctx != NULL)
3142	tst	%o0
3143	bnz,a	Lsw_havectx		!	goto havecontext;
3144	 ld	[%o3 + VM_PMAP_CTXNUM], %o0
3145
3146	/* p does not have a context: call ctx_alloc to get one */
3147	save	%sp, -CCFSZ, %sp
3148	call	_ctx_alloc		! ctx_alloc(&vm->vm_pmap);
3149	 add	%i3, VM_PMAP, %o0
3150	ret
3151	 restore
3152
3153	/* p does have a context: just switch to it */
3154Lsw_havectx:
3155!	ld	[%o3 + VM_PMAP_CTXNUM], %o0	! (done in delay slot)
3156	set	AC_CONTEXT, %o1
3157	stba	%o0, [%o1] ASI_CONTROL	! setcontext(vm->vm_pmap.pm_ctxnum);
3158	retl
3159	 nop
3160
3161Lsw_sameproc:
3162	/*
3163	 * We are resuming the process that was running at the
3164	 * call to swtch().  Just set psr ipl and return.
3165	 */
3166!	wr	%g2, 0 %psr		! %psr = newpsr; (done earlier)
3167	nop
3168	retl
3169	 nop
3170
3171
3172/*
3173 * Snapshot the current process so that stack frames are up to date.
3174 * This is called from two places:
3175 *  - just before a crash dump, for the stack update;
3176 *  - in cpu_fork(), before copying the kernel stack.
3177 * In the latter case the pcb and stack will be copied to the child,
3178 * and the child will be made runnable.  Eventually swtch() will run
3179 * it.  When it does, we want its pcb_pc set so that we can appear
3180 * to return 1 from cpu_fork(), so we store the current sp and psr
3181 * in the given pcb, and set its pcb_pc to our return-1 code (offset
3182 * by -8 due to call/ret conventions).  This is not useful in the crash
3183 * dump code but it is easiest to do it anyway.
3184 */
3185ENTRY(snapshot)
3186	st	%o6, [%o0 + PCB_SP]	! save sp
3187	set	1f - 8, %o1		! set child-return pc
3188	st	%o1, [%o0 + PCB_PC]
3189	rd	%psr, %o1		! save psr
3190	st	%o1, [%o0 + PCB_PSR]
3191
3192	/*
3193	 * Just like swtch(); same XXX comments apply.
3194	 * 7 of each.  Minor tweak: the 7th restore is
3195	 * done after a ret.
3196	 */
3197	SAVE; SAVE; SAVE; SAVE; SAVE; SAVE; SAVE
3198	restore; restore; restore; restore; restore; restore; ret; restore
3199
32001:	/* this is reached only after a child gets chosen in swtch() */
3201	mov	1, %i0			! return 1 from cpu_fork
3202	ret
3203	 restore
3204
3205/*
3206 * {fu,su}{,i}{byte,word}
3207 */
3208ALTENTRY(fuiword)
3209ENTRY(fuword)
3210	set	KERNBASE, %o2
3211	cmp	%o0, %o2		! if addr >= KERNBASE...
3212	bgeu	Lfsbadaddr
3213	EMPTY
3214	btst	3, %o0			! or has low bits set...
3215	bnz	Lfsbadaddr		!	go return -1
3216	EMPTY
3217	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3218	ld	[%o2 + %lo(_cpcb)], %o2
3219	set	Lfserr, %o3
3220	st	%o3, [%o2 + PCB_ONFAULT]
3221	ld	[%o0], %o0		! fetch the word
3222	retl				! phew, made it, return the word
3223	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3224
3225Lfserr:
3226	st	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
3227Lfsbadaddr:
3228	retl				! and return error indicator
3229	mov	-1, %o0
3230
3231	/*
3232	 * This is just like Lfserr, but it's a global label that allows
3233	 * mem_access_fault() to check to see that we don't want to try to
3234	 * page in the fault.  It's used by fuswintr() etc.
3235	 */
3236	.globl	_Lfsbail
3237_Lfsbail:
3238	st	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
3239	retl				! and return error indicator
3240	mov	-1, %o0
3241
3242	/*
3243	 * Like fusword but callable from interrupt context.
3244	 * Fails if data isn't resident.
3245	 */
3246ENTRY(fuswintr)
3247	set	KERNBASE, %o2
3248	cmp	%o0, %o2		! if addr >= KERNBASE
3249	bgeu	Lfsbadaddr		!	return error
3250	EMPTY
3251	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = _Lfsbail;
3252	ld	[%o2 + %lo(_cpcb)], %o2
3253	set	_Lfsbail, %o3
3254	st	%o3, [%o2 + PCB_ONFAULT]
3255	lduh	[%o0], %o0		! fetch the halfword
3256	retl				! made it
3257	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3258
3259ENTRY(fusword)
3260	set	KERNBASE, %o2
3261	cmp	%o0, %o2		! if addr >= KERNBASE
3262	bgeu	Lfsbadaddr		!	return error
3263	EMPTY
3264	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3265	ld	[%o2 + %lo(_cpcb)], %o2
3266	set	Lfserr, %o3
3267	st	%o3, [%o2 + PCB_ONFAULT]
3268	lduh	[%o0], %o0		! fetch the halfword
3269	retl				! made it
3270	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3271
3272ALTENTRY(fuibyte)
3273ENTRY(fubyte)
3274	set	KERNBASE, %o2
3275	cmp	%o0, %o2		! if addr >= KERNBASE
3276	bgeu	Lfsbadaddr		!	return error
3277	EMPTY
3278	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3279	ld	[%o2 + %lo(_cpcb)], %o2
3280	set	Lfserr, %o3
3281	st	%o3, [%o2 + PCB_ONFAULT]
3282	ldub	[%o0], %o0		! fetch the byte
3283	retl				! made it
3284	st	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
3285
3286ALTENTRY(suiword)
3287ENTRY(suword)
3288	set	KERNBASE, %o2
3289	cmp	%o0, %o2		! if addr >= KERNBASE ...
3290	bgeu	Lfsbadaddr
3291	EMPTY
3292	btst	3, %o0			! or has low bits set ...
3293	bnz	Lfsbadaddr		!	go return error
3294	EMPTY
3295	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3296	ld	[%o2 + %lo(_cpcb)], %o2
3297	set	Lfserr, %o3
3298	st	%o3, [%o2 + PCB_ONFAULT]
3299	st	%o1, [%o0]		! store the word
3300	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3301	retl				! and return 0
3302	clr	%o0
3303
3304ENTRY(suswintr)
3305	set	KERNBASE, %o2
3306	cmp	%o0, %o2		! if addr >= KERNBASE
3307	bgeu	Lfsbadaddr		!	go return error
3308	EMPTY
3309	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = _Lfsbail;
3310	ld	[%o2 + %lo(_cpcb)], %o2
3311	set	_Lfsbail, %o3
3312	st	%o3, [%o2 + PCB_ONFAULT]
3313	sth	%o1, [%o0]		! store the halfword
3314	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3315	retl				! and return 0
3316	clr	%o0
3317
3318ENTRY(susword)
3319	set	KERNBASE, %o2
3320	cmp	%o0, %o2		! if addr >= KERNBASE
3321	bgeu	Lfsbadaddr		!	go return error
3322	EMPTY
3323	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3324	ld	[%o2 + %lo(_cpcb)], %o2
3325	set	Lfserr, %o3
3326	st	%o3, [%o2 + PCB_ONFAULT]
3327	sth	%o1, [%o0]		! store the halfword
3328	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3329	retl				! and return 0
3330	clr	%o0
3331
3332ALTENTRY(suibyte)
3333ENTRY(subyte)
3334	set	KERNBASE, %o2
3335	cmp	%o0, %o2		! if addr >= KERNBASE
3336	bgeu	Lfsbadaddr		!	go return error
3337	EMPTY
3338	sethi	%hi(_cpcb), %o2		! cpcb->pcb_onfault = Lfserr;
3339	ld	[%o2 + %lo(_cpcb)], %o2
3340	set	Lfserr, %o3
3341	st	%o3, [%o2 + PCB_ONFAULT]
3342	stb	%o1, [%o0]		! store the byte
3343	st	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
3344	retl				! and return 0
3345	clr	%o0
3346
3347/* probeget and probeset are meant to be used during autoconfiguration */
3348
3349/*
3350 * probeget(addr, size) caddr_t addr; int size;
3351 *
3352 * Read or write a (byte,word,longword) from the given address.
3353 * Like {fu,su}{byte,halfword,word} but our caller is supposed
3354 * to know what he is doing... the address can be anywhere.
3355 *
3356 * We optimize for space, rather than time, here.
3357 */
3358ENTRY(probeget)
3359	! %o0 = addr, %o1 = (1,2,4)
3360	set	KERNBASE, %o2
3361	cmp	%o0, %o2		! if addr < KERNBASE
3362	blu	Lfsbadaddr		!	go return error
3363	 EMPTY
3364	sethi	%hi(_cpcb), %o2
3365	ld	[%o2 + %lo(_cpcb)], %o2	! cpcb->pcb_onfault = Lfserr;
3366	set	Lfserr, %o5
3367	st	%o5, [%o2 + PCB_ONFAULT]
3368	btst	1, %o1
3369	bnz,a	0f			! if (len & 1)
3370	 ldub	[%o0], %o0		!	value = *(char *)addr;
33710:	btst	2, %o1
3372	bnz,a	0f			! if (len & 2)
3373	 lduh	[%o0], %o0		!	value = *(short *)addr;
33740:	btst	4, %o1
3375	bnz,a	0f			! if (len & 4)
3376	 ld	[%o0], %o0		!	value = *(int *)addr;
33770:	retl				! made it, clear onfault and return
3378	 st	%g0, [%o2 + PCB_ONFAULT]
3379
3380/*
3381 * probeset(addr, size, val) caddr_t addr; int size, val;
3382 *
3383 * As above, but we return 0 on success.
3384 */
3385ENTRY(probeset)
3386	! %o0 = addr, %o1 = (1,2,4), %o2 = val
3387	set	KERNBASE, %o2
3388	cmp	%o0, %o2		! if addr < KERNBASE
3389	blu	Lfsbadaddr		!	go return error
3390	 EMPTY
3391	sethi	%hi(_cpcb), %o2
3392	ld	[%o2 + %lo(_cpcb)], %o2	! cpcb->pcb_onfault = Lfserr;
3393	set	Lfserr, %o5
3394	st	%o5, [%o2 + PCB_ONFAULT]
3395	btst	1, %o1
3396	bnz,a	0f			! if (len & 1)
3397	 stb	%o2, [%o0]		!	*(char *)addr = value;
33980:	btst	2, %o1
3399	bnz,a	0f			! if (len & 2)
3400	 sth	%o2, [%o0]		!	*(short *)addr = value;
34010:	btst	4, %o1
3402	bnz,a	0f			! if (len & 4)
3403	 st	%o2, [%o0]		!	*(int *)addr = value;
34040:	clr	%o0			! made it, clear onfault and return 0
3405	retl
3406	 st	%g0, [%o2 + PCB_ONFAULT]
3407
3408/*
3409 * Insert entry into doubly-linked queue.
3410 * We could just do this in C, but gcc does not do leaves well (yet).
3411 */
3412ENTRY(_insque)
3413	! %o0 = e = what to insert; %o1 = after = entry to insert after
3414	st	%o1, [%o0 + 4]		! e->prev = after;
3415	ld	[%o1], %o2		! tmp = after->next;
3416	st	%o2, [%o0]		! e->next = tmp;
3417	st	%o0, [%o1]		! after->next = e;
3418	retl
3419	st	%o0, [%o2 + 4]		! tmp->prev = e;
3420
3421
3422/*
3423 * Remove entry from doubly-linked queue.
3424 */
3425ENTRY(_remque)
3426	! %o0 = e = what to remove
3427	ld	[%o0], %o1		! n = e->next;
3428	ld	[%o0 + 4], %o2		! p = e->prev;
3429	st	%o2, [%o1 + 4]		! n->prev = p;
3430	retl
3431	st	%o1, [%o2]		! p->next = n;
3432
3433/*
3434 * copywords(src, dst, nbytes)
3435 *
3436 * Copy `nbytes' bytes from src to dst, both of which are word-aligned;
3437 * nbytes is a multiple of four.  It may, however, be zero, in which case
3438 * nothing is to be copied.
3439 */
3440ENTRY(copywords)
3441	! %o0 = src, %o1 = dst, %o2 = nbytes
3442	b	1f
3443	deccc	4, %o2
34440:
3445	st	%o3, [%o1 + %o2]
3446	deccc	4, %o2			! while ((n -= 4) >= 0)
34471:
3448	bge,a	0b			!    *(int *)(dst+n) = *(int *)(src+n);
3449	ld	[%o0 + %o2], %o3
3450	retl
3451	nop
3452
3453/*
3454 * qcopy(src, dst, nbytes)
3455 *
3456 * (q for `quad' or `quick', as opposed to b for byte/block copy)
3457 *
3458 * Just like copywords, but everything is multiples of 8.
3459 */
3460ENTRY(qcopy)
3461	b	1f
3462	deccc	8, %o2
34630:
3464	std	%o4, [%o1 + %o2]
3465	deccc	8, %o2
34661:
3467	bge,a	0b
3468	ldd	[%o0 + %o2], %o4
3469	retl
3470	nop
3471
3472/*
3473 * qzero(addr, nbytes)
3474 *
3475 * Zeroes `nbytes' bytes of a quad-aligned virtual address,
3476 * where nbytes is itself a multiple of 8.
3477 */
3478ENTRY(qzero)
3479	! %o0 = addr, %o1 = len (in bytes)
3480	clr	%g1
34810:
3482	deccc	8, %o1			! while ((n =- 8) >= 0)
3483	bge,a	0b
3484	std	%g0, [%o0 + %o1]	!	*(quad *)(addr + n) = 0;
3485	retl
3486	nop
3487
3488/*
3489 * bzero(addr, len)
3490 *
3491 * We should unroll the loop, but at the moment this would
3492 * gain nothing since the `std' instructions are what limits us.
3493 */
3494ALTENTRY(blkclr)
3495ENTRY(bzero)
3496	! %o0 = addr, %o1 = len
3497
3498	! Optimize a common case: addr and len are both multiples of 8.
3499	or	%o0, %o1, %o2
3500	btst	7, %o2			! ((addr | len) & 7) != 0?
3501	bnz	1f			! if so, cannot optimize
3502	clr	%g1			! in any case, we want g1=0
3503
3504	/* `Good' operands, can just store doubles. */
35050:
3506	deccc	8, %o1			! while ((len -= 8) >= 0)
3507	bge,a	0b
3508	std	%g0, [%o0 + %o1]	!	*(quad *)(addr + len) = 0;
3509	retl
3510	nop
3511
3512	/*
3513	 * Either the address is unaligned, or the count is not a
3514	 * multiple of 8, or both.  We will have to align the address
3515	 * in order to use anything `better' than stb.
3516	 */
35171:
3518	cmp	%o1, 15			! len >= 15?
3519	bge,a	Lstd			! yes, use std
3520	btst	1, %o0			! (but first check alignment)
3521
3522	! not enough to bother: do byte-at-a-time loop.
35232:
3524	deccc	%o1			! while (--len >= 0)
3525	bge,a	2b
3526	stb	%g0, [%o0 + %o1]	!	addr[len] = 0;
3527	retl
3528	nop
3529
3530Lstd:
3531	/*
3532	 * There are at least 15 bytes to zero.
3533	 * We may have to zero some initial stuff to align
3534	 * the address.
3535	 */
3536	bz,a	1f			! if (addr & 1) {
3537	btst	2, %o0
3538	stb	%g0, [%o0]		!	*addr = 0;
3539	inc	%o0			!	addr++;
3540	dec	%o1			!	len--;
3541	btst	2, %o0			! }
35421:
3543	bz,a	1f			! if (addr & 2) {
3544	btst	4, %o0
3545	sth	%g0, [%o0]		!	*(short *)addr = 0;
3546	inc	2, %o0			!	addr += 2;
3547	dec	2, %o1			!	len -= 2;
3548	btst	4, %o0			! }
35491:
3550	bz	1f			! if (addr & 4) {
3551	dec	8, %o1
3552	st	%g0, [%o0]		!	*(int *)addr = 0;
3553	inc	4, %o0			!	addr += 4;
3554	dec	4, %o1			!	len -= 4;
3555					! }
3556	/*
3557	 * Address is double word aligned; len is 8 less than
3558	 * the number of bytes remaining (i.e., len is 0 if
3559	 * the remaining count is 8, 1 if it is 9, etc.).
3560	 */
35611:
3562	std	%g0, [%o0]		! do {
35632:					!	*(quad *)addr = 0;
3564	inc	8, %o0			!	addr += 8;
3565	deccc	8, %o1			! } while ((len -= 8) >= 0);
3566	bge,a	2b
3567	std	%g0, [%o0]
3568
3569	/*
3570	 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
3571	 * -6 => two bytes, etc.  Mop up this remainder, if any.
3572	 */
3573	btst	4, %o1
3574	bz	1f			! if (len & 4) {
3575	btst	2, %o1
3576	st	%g0, [%o0]		!	*(int *)addr = 0;
3577	inc	4, %o0			!	addr += 4;
35781:
3579	bz	1f			! if (len & 2) {
3580	btst	1, %o1
3581	sth	%g0, [%o0]		!	*(short *)addr = 0;
3582	inc	2, %o0			!	addr += 2;
35831:
3584	bnz,a	1f			! if (len & 1)
3585	stb	%g0, [%o0]		!	*addr = 0;
35861:
3587	retl
3588	nop
3589
3590/*
3591 * kernel bcopy/memcpy
3592 * Assumes regions do not overlap; has no useful return value.
3593 *
3594 * Must not use %g7 (see copyin/copyout above).
3595 */
3596
3597#define	BCOPY_SMALL	32	/* if < 32, copy by bytes */
3598
3599ENTRY(memcpy)
3600	/*
3601	 * Swap args for bcopy.  Gcc generates calls to memcpy for
3602	 * structure assignments.
3603	 */
3604	mov	%o0, %o3
3605	mov	%o1, %o0
3606	mov	%o3, %o1
3607ENTRY(bcopy)
3608	cmp	%o2, BCOPY_SMALL
3609Lbcopy_start:
3610	bge,a	Lbcopy_fancy	! if >= this many, go be fancy.
3611	btst	7, %o0		! (part of being fancy)
3612
3613	/*
3614	 * Not much to copy, just do it a byte at a time.
3615	 */
3616	deccc	%o2		! while (--len >= 0)
3617	bl	1f
3618	EMPTY
36190:
3620	inc	%o0
3621	ldsb	[%o0 - 1], %o4	!	(++dst)[-1] = *src++;
3622	stb	%o4, [%o1]
3623	deccc	%o2
3624	bge	0b
3625	inc	%o1
36261:
3627	retl
3628	nop
3629	/* NOTREACHED */
3630
3631	/*
3632	 * Plenty of data to copy, so try to do it optimally.
3633	 */
3634Lbcopy_fancy:
3635	! check for common case first: everything lines up.
3636!	btst	7, %o0		! done already
3637	bne	1f
3638	EMPTY
3639	btst	7, %o1
3640	be,a	Lbcopy_doubles
3641	dec	8, %o2		! if all lined up, len -= 8, goto bcopy_doubes
3642
3643	! If the low bits match, we can make these line up.
36441:
3645	xor	%o0, %o1, %o3	! t = src ^ dst;
3646	btst	1, %o3		! if (t & 1) {
3647	be,a	1f
3648	btst	1, %o0		! [delay slot: if (src & 1)]
3649
3650	! low bits do not match, must copy by bytes.
36510:
3652	ldsb	[%o0], %o4	!	do {
3653	inc	%o0		!		(++dst)[-1] = *src++;
3654	inc	%o1
3655	deccc	%o2
3656	bnz	0b		!	} while (--len != 0);
3657	stb	%o4, [%o1 - 1]
3658	retl
3659	nop
3660	/* NOTREACHED */
3661
3662	! lowest bit matches, so we can copy by words, if nothing else
36631:
3664	be,a	1f		! if (src & 1) {
3665	btst	2, %o3		! [delay slot: if (t & 2)]
3666
3667	! although low bits match, both are 1: must copy 1 byte to align
3668	ldsb	[%o0], %o4	!	*dst++ = *src++;
3669	stb	%o4, [%o1]
3670	inc	%o0
3671	inc	%o1
3672	dec	%o2		!	len--;
3673	btst	2, %o3		! } [if (t & 2)]
36741:
3675	be,a	1f		! if (t & 2) {
3676	btst	2, %o0		! [delay slot: if (src & 2)]
3677	dec	2, %o2		!	len -= 2;
36780:
3679	ldsh	[%o0], %o4	!	do {
3680	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
3681	inc	2, %o0		!		dst += 2, src += 2;
3682	deccc	2, %o2		!	} while ((len -= 2) >= 0);
3683	bge	0b
3684	inc	2, %o1
3685	b	Lbcopy_mopb	!	goto mop_up_byte;
3686	btst	1, %o2		! } [delay slot: if (len & 1)]
3687	/* NOTREACHED */
3688
3689	! low two bits match, so we can copy by longwords
36901:
3691	be,a	1f		! if (src & 2) {
3692	btst	4, %o3		! [delay slot: if (t & 4)]
3693
3694	! although low 2 bits match, they are 10: must copy one short to align
3695	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
3696	sth	%o4, [%o1]
3697	inc	2, %o0		!	dst += 2;
3698	inc	2, %o1		!	src += 2;
3699	dec	2, %o2		!	len -= 2;
3700	btst	4, %o3		! } [if (t & 4)]
37011:
3702	be,a	1f		! if (t & 4) {
3703	btst	4, %o0		! [delay slot: if (src & 4)]
3704	dec	4, %o2		!	len -= 4;
37050:
3706	ld	[%o0], %o4	!	do {
3707	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
3708	inc	4, %o0		!		dst += 4, src += 4;
3709	deccc	4, %o2		!	} while ((len -= 4) >= 0);
3710	bge	0b
3711	inc	4, %o1
3712	b	Lbcopy_mopw	!	goto mop_up_word_and_byte;
3713	btst	2, %o2		! } [delay slot: if (len & 2)]
3714	/* NOTREACHED */
3715
3716	! low three bits match, so we can copy by doublewords
37171:
3718	be	1f		! if (src & 4) {
3719	dec	8, %o2		! [delay slot: len -= 8]
3720	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
3721	st	%o4, [%o1]
3722	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
3723	inc	4, %o1
3724	dec	4, %o2		! }
37251:
3726Lbcopy_doubles:
3727	ldd	[%o0], %o4	! do {
3728	std	%o4, [%o1]	!	*(double *)dst = *(double *)src;
3729	inc	8, %o0		!	dst += 8, src += 8;
3730	deccc	8, %o2		! } while ((len -= 8) >= 0);
3731	bge	Lbcopy_doubles
3732	inc	8, %o1
3733
3734	! check for a usual case again (save work)
3735	btst	7, %o2		! if ((len & 7) == 0)
3736	be	Lbcopy_done	!	goto bcopy_done;
3737
3738	btst	4, %o2		! if ((len & 4)) == 0)
3739	be,a	Lbcopy_mopw	!	goto mop_up_word_and_byte;
3740	btst	2, %o2		! [delay slot: if (len & 2)]
3741	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
3742	st	%o4, [%o1]
3743	inc	4, %o0		!	dst += 4;
3744	inc	4, %o1		!	src += 4;
3745	btst	2, %o2		! } [if (len & 2)]
3746
37471:
3748	! mop up trailing word (if present) and byte (if present).
3749Lbcopy_mopw:
3750	be	Lbcopy_mopb	! no word, go mop up byte
3751	btst	1, %o2		! [delay slot: if (len & 1)]
3752	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
3753	be	Lbcopy_done	! if ((len & 1) == 0) goto done;
3754	sth	%o4, [%o1]
3755	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
3756	retl
3757	stb	%o4, [%o1 + 2]
3758	/* NOTREACHED */
3759
3760	! mop up trailing byte (if present).
3761Lbcopy_mopb:
3762	bne,a	1f
3763	ldsb	[%o0], %o4
3764
3765Lbcopy_done:
3766	retl
3767	nop
3768
37691:
3770	retl
3771	stb	%o4,[%o1]
3772/*
3773 * ovbcopy(src, dst, len): like bcopy, but regions may overlap.
3774 */
3775ENTRY(ovbcopy)
3776	cmp	%o0, %o1	! src < dst?
3777	bgeu	Lbcopy_start	! no, go copy forwards as via bcopy
3778	cmp	%o2, BCOPY_SMALL! (check length for doublecopy first)
3779
3780	/*
3781	 * Since src comes before dst, and the regions might overlap,
3782	 * we have to do the copy starting at the end and working backwards.
3783	 */
3784	add	%o2, %o0, %o0	! src += len
3785	add	%o2, %o1, %o1	! dst += len
3786	bge,a	Lback_fancy	! if len >= BCOPY_SMALL, go be fancy
3787	btst	3, %o0
3788
3789	/*
3790	 * Not much to copy, just do it a byte at a time.
3791	 */
3792	deccc	%o2		! while (--len >= 0)
3793	bl	1f
3794	EMPTY
37950:
3796	dec	%o0		!	*--dst = *--src;
3797	ldsb	[%o0], %o4
3798	dec	%o1
3799	deccc	%o2
3800	bge	0b
3801	stb	%o4, [%o1]
38021:
3803	retl
3804	nop
3805
3806	/*
3807	 * Plenty to copy, try to be optimal.
3808	 * We only bother with word/halfword/byte copies here.
3809	 */
3810Lback_fancy:
3811!	btst	3, %o0		! done already
3812	bnz	1f		! if ((src & 3) == 0 &&
3813	btst	3, %o1		!     (dst & 3) == 0)
3814	bz,a	Lback_words	!	goto words;
3815	dec	4, %o2		! (done early for word copy)
3816
38171:
3818	/*
3819	 * See if the low bits match.
3820	 */
3821	xor	%o0, %o1, %o3	! t = src ^ dst;
3822	btst	1, %o3
3823	bz,a	3f		! if (t & 1) == 0, can do better
3824	btst	1, %o0
3825
3826	/*
3827	 * Nope; gotta do byte copy.
3828	 */
38292:
3830	dec	%o0		! do {
3831	ldsb	[%o0], %o4	!	*--dst = *--src;
3832	dec	%o1
3833	deccc	%o2		! } while (--len != 0);
3834	bnz	2b
3835	stb	%o4, [%o1]
3836	retl
3837	nop
3838
38393:
3840	/*
3841	 * Can do halfword or word copy, but might have to copy 1 byte first.
3842	 */
3843!	btst	1, %o0		! done earlier
3844	bz,a	4f		! if (src & 1) {	/* copy 1 byte */
3845	btst	2, %o3		! (done early)
3846	dec	%o0		!	*--dst = *--src;
3847	ldsb	[%o0], %o4
3848	dec	%o1
3849	stb	%o4, [%o1]
3850	dec	%o2		!	len--;
3851	btst	2, %o3		! }
3852
38534:
3854	/*
3855	 * See if we can do a word copy ((t&2) == 0).
3856	 */
3857!	btst	2, %o3		! done earlier
3858	bz,a	6f		! if (t & 2) == 0, can do word copy
3859	btst	2, %o0		! (src&2, done early)
3860
3861	/*
3862	 * Gotta do halfword copy.
3863	 */
3864	dec	2, %o2		! len -= 2;
38655:
3866	dec	2, %o0		! do {
3867	ldsh	[%o0], %o4	!	src -= 2;
3868	dec	2, %o1		!	dst -= 2;
3869	deccc	2, %o0		!	*(short *)dst = *(short *)src;
3870	bge	5b		! } while ((len -= 2) >= 0);
3871	sth	%o4, [%o1]
3872	b	Lback_mopb	! goto mop_up_byte;
3873	btst	1, %o2		! (len&1, done early)
3874
38756:
3876	/*
3877	 * We can do word copies, but we might have to copy
3878	 * one halfword first.
3879	 */
3880!	btst	2, %o0		! done already
3881	bz	7f		! if (src & 2) {
3882	dec	4, %o2		! (len -= 4, done early)
3883	dec	2, %o0		!	src -= 2, dst -= 2;
3884	ldsh	[%o0], %o4	!	*(short *)dst = *(short *)src;
3885	dec	2, %o1
3886	sth	%o4, [%o1]
3887	dec	2, %o2		!	len -= 2;
3888				! }
3889
38907:
3891Lback_words:
3892	/*
3893	 * Do word copies (backwards), then mop up trailing halfword
3894	 * and byte if any.
3895	 */
3896!	dec	4, %o2		! len -= 4, done already
38970:				! do {
3898	dec	4, %o0		!	src -= 4;
3899	dec	4, %o1		!	src -= 4;
3900	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
3901	deccc	4, %o2		! } while ((len -= 4) >= 0);
3902	bge	0b
3903	st	%o4, [%o1]
3904
3905	/*
3906	 * Check for trailing shortword.
3907	 */
3908	btst	2, %o2		! if (len & 2) {
3909	bz,a	1f
3910	btst	1, %o2		! (len&1, done early)
3911	dec	2, %o0		!	src -= 2, dst -= 2;
3912	ldsh	[%o0], %o4	!	*(short *)dst = *(short *)src;
3913	dec	2, %o1
3914	sth	%o4, [%o1]	! }
3915	btst	1, %o2
3916
3917	/*
3918	 * Check for trailing byte.
3919	 */
39201:
3921Lback_mopb:
3922!	btst	1, %o2		! (done already)
3923	bnz,a	1f		! if (len & 1) {
3924	ldsb	[%o0 - 1], %o4	!	b = src[-1];
3925	retl
3926	nop
39271:
3928	retl			!	dst[-1] = b;
3929	stb	%o4, [%o1 - 1]	! }
3930
3931
3932/*
3933 * savefpstate(f) struct fpstate *f;
3934 *
3935 * Store the current FPU state.  The first `st %fsr' may cause a trap;
3936 * our trap handler knows how to recover (by `returning' to savefpcont).
3937 */
3938ENTRY(savefpstate)
3939	rd	%psr, %o1		! enable FP before we begin
3940	set	PSR_EF, %o2
3941	or	%o1, %o2, %o1
3942	wr	%o1, 0, %psr
3943	/* do some setup work while we wait for PSR_EF to turn on */
3944	set	FSR_QNE, %o5		! QNE = 0x2000, too big for immediate
3945	clr	%o3			! qsize = 0;
3946	nop				! (still waiting for PSR_EF)
3947special_fp_store:
3948	st	%fsr, [%o0 + FS_FSR]	! f->fs_fsr = getfsr();
3949	/*
3950	 * Even if the preceding instruction did not trap, the queue
3951	 * is not necessarily empty: this state save might be happening
3952	 * because user code tried to store %fsr and took the FPU
3953	 * from `exception pending' mode to `exception' mode.
3954	 * So we still have to check the blasted QNE bit.
3955	 * With any luck it will usually not be set.
3956	 */
3957	ld	[%o0 + FS_FSR], %o4	! if (f->fs_fsr & QNE)
3958	btst	%o5, %o4
3959	bnz	Lfp_storeq		!	goto storeq;
3960	 std	%f0, [%o0 + FS_REGS + (4*0)]	! f->fs_f0 = etc;
3961Lfp_finish:
3962	st	%o3, [%o0 + FS_QSIZE]	! f->fs_qsize = qsize;
3963	std	%f2, [%o0 + FS_REGS + (4*2)]
3964	std	%f4, [%o0 + FS_REGS + (4*4)]
3965	std	%f6, [%o0 + FS_REGS + (4*6)]
3966	std	%f8, [%o0 + FS_REGS + (4*8)]
3967	std	%f10, [%o0 + FS_REGS + (4*10)]
3968	std	%f12, [%o0 + FS_REGS + (4*12)]
3969	std	%f14, [%o0 + FS_REGS + (4*14)]
3970	std	%f16, [%o0 + FS_REGS + (4*16)]
3971	std	%f18, [%o0 + FS_REGS + (4*18)]
3972	std	%f20, [%o0 + FS_REGS + (4*20)]
3973	std	%f22, [%o0 + FS_REGS + (4*22)]
3974	std	%f24, [%o0 + FS_REGS + (4*24)]
3975	std	%f26, [%o0 + FS_REGS + (4*26)]
3976	std	%f28, [%o0 + FS_REGS + (4*28)]
3977	retl
3978	 std	%f30, [%o0 + FS_REGS + (4*30)]
3979
3980/*
3981 * Store the (now known nonempty) FP queue.
3982 * We have to reread the fsr each time in order to get the new QNE bit.
3983 */
3984Lfp_storeq:
3985	add	%o0, FS_QUEUE, %o1	! q = &f->fs_queue[0];
39861:
3987	std	%fq, [%o1 + %o3]	! q[qsize++] = fsr_qfront();
3988	st	%fsr, [%o0 + FS_FSR]	! reread fsr
3989	ld	[%o0 + FS_FSR], %o4	! if fsr & QNE, loop
3990	btst	%o5, %o4
3991	bnz	1b
3992	 inc	8, %o3
3993	b	Lfp_finish		! set qsize and finish storing fregs
3994	 srl	%o3, 3, %o3		! (but first fix qsize)
3995
3996/*
3997 * The fsr store trapped.  Do it again; this time it will not trap.
3998 * We could just have the trap handler return to the `st %fsr', but
3999 * if for some reason it *does* trap, that would lock us into a tight
4000 * loop.  This way we panic instead.  Whoopee.
4001 */
4002savefpcont:
4003	b	special_fp_store + 4	! continue
4004	 st	%fsr, [%o0 + FS_FSR]	! but first finish the %fsr store
4005
4006/*
4007 * Load FPU state.
4008 */
4009ENTRY(loadfpstate)
4010	rd	%psr, %o1		! enable FP before we begin
4011	set	PSR_EF, %o2
4012	or	%o1, %o2, %o1
4013	wr	%o1, 0, %psr
4014	nop; nop; nop			! paranoia
4015	ldd	[%o0 + FS_REGS + (4*0)], %f0
4016	ldd	[%o0 + FS_REGS + (4*2)], %f2
4017	ldd	[%o0 + FS_REGS + (4*4)], %f4
4018	ldd	[%o0 + FS_REGS + (4*6)], %f6
4019	ldd	[%o0 + FS_REGS + (4*8)], %f8
4020	ldd	[%o0 + FS_REGS + (4*10)], %f10
4021	ldd	[%o0 + FS_REGS + (4*12)], %f12
4022	ldd	[%o0 + FS_REGS + (4*14)], %f14
4023	ldd	[%o0 + FS_REGS + (4*16)], %f16
4024	ldd	[%o0 + FS_REGS + (4*18)], %f18
4025	ldd	[%o0 + FS_REGS + (4*20)], %f20
4026	ldd	[%o0 + FS_REGS + (4*22)], %f22
4027	ldd	[%o0 + FS_REGS + (4*24)], %f24
4028	ldd	[%o0 + FS_REGS + (4*26)], %f26
4029	ldd	[%o0 + FS_REGS + (4*28)], %f28
4030	ldd	[%o0 + FS_REGS + (4*30)], %f30
4031	retl
4032	 ld	[%o0 + FS_FSR], %fsr	! setfsr(f->fs_fsr);
4033
4034/*
4035 * ienab_bis(bis) int bis;
4036 * ienab_bic(bic) int bic;
4037 *
4038 * Set and clear bits in the interrupt register.
4039 * Since there are no read-modify-write instructions for this,
4040 * and one of the interrupts is nonmaskable, we must disable traps.
4041 *
4042 * NB: ___main is defined here for gcc-2 idiocy.  Ignore it.
4043 */
4044ENTRY(ienab_bis)
4045	! %o0 = bits to set
4046	rd	%psr, %o2
4047	wr	%o2, PSR_ET, %psr	! disable traps
4048	nop; nop			! 3-instr delay until ET turns off
4049	sethi	%hi(IE_reg_addr), %o3
4050	ldub	[%o3 + %lo(IE_reg_addr)], %o4
4051	or	%o4, %o0, %o4		! *IE_reg_addr |= bis;
4052	stb	%o4, [%o3 + %lo(IE_reg_addr)]
4053	wr	%o2, 0, %psr		! reenable traps
4054	nop
4055	.globl	___main
4056___main:
4057	retl
4058	 nop
4059
4060ENTRY(ienab_bic)
4061	! %o0 = bits to clear
4062	rd	%psr, %o2
4063	wr	%o2, PSR_ET, %psr	! disable traps
4064	nop; nop
4065	sethi	%hi(IE_reg_addr), %o3
4066	ldub	[%o3 + %lo(IE_reg_addr)], %o4
4067	andn	%o4, %o0, %o4		! *IE_reg_addr &=~ bic;
4068	stb	%o4, [%o3 + %lo(IE_reg_addr)]
4069	wr	%o2, 0, %psr		! reenable traps
4070	nop
4071	retl
4072	 nop
4073
4074/*
4075 * ffs(), using table lookup.
4076 * The process switch code shares the table, so we just put the
4077 * whole thing here.
4078 */
4079ffstab:
4080	.byte	-24,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1 /* 00-0f */
4081	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 10-1f */
4082	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 20-2f */
4083	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 30-3f */
4084	.byte	7,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 40-4f */
4085	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 50-5f */
4086	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 60-6f */
4087	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 70-7f */
4088	.byte	8,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 80-8f */
4089	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* 10-9f */
4090	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* a0-af */
4091	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* b0-bf */
4092	.byte	7,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* c0-cf */
4093	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* d0-df */
4094	.byte	6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* e0-ef */
4095	.byte	5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1	/* f0-ff */
4096
4097/*
4098 * We use a table lookup on each byte.
4099 *
4100 * In each section below, %o1 is the current byte (0, 1, 2, or 3).
4101 * The last byte is handled specially: for the first three,
4102 * if that byte is nonzero, we return the table value
4103 * (plus 0, 8, or 16 for the byte number), but for the last
4104 * one, we just return the table value plus 24.  This means
4105 * that ffstab[0] must be -24 so that ffs(0) will return 0.
4106 */
4107ENTRY(ffs)
4108	set	ffstab, %o2
4109	andcc	%o0, 0xff, %o1	! get low byte
4110	bz,a	1f		! try again if 0
4111	srl	%o0, 8, %o0	! delay slot, get ready for next byte
4112
4113	retl			! return ffstab[%o1]
4114	ldsb	[%o2 + %o1], %o0
4115
41161:
4117	andcc	%o0, 0xff, %o1	! byte 1 like byte 0...
4118	bz,a	2f
4119	srl	%o0, 8, %o0	! (use delay to prepare for byte 2)
4120
4121	ldsb	[%o2 + %o1], %o0
4122	retl			! return ffstab[%o1] + 8
4123	add	%o0, 8, %o0
4124
41252:
4126	andcc	%o0, 0xff, %o1
4127	bz,a	3f
4128	srl	%o0, 8, %o0	! (prepare for byte 3)
4129
4130	ldsb	[%o2 + %o1], %o0
4131	retl			! return ffstab[%o1] + 16
4132	add	%o0, 16, %o0
4133
41343:				! just return ffstab[%o0] + 24
4135	ldsb	[%o2 + %o0], %o0
4136	retl
4137	add	%o0, 24, %o0
4138
4139/*
4140 * Here is a very good random number generator.  This implementation is
4141 * based on ``Two Fast Implementations of the "Minimal Standard" Random
4142 * Number Generator", David G. Carta, Communications of the ACM, Jan 1990,
4143 * Vol 33 No 1.
4144 */
4145	.data
4146randseed:
4147	.word	1
4148	.text
4149ENTRY(random)
4150	sethi	%hi(16807), %o1
4151	wr	%o1, %lo(16807), %y
4152	 sethi	%hi(randseed), %g1
4153	 ld	[%g1 + %lo(randseed)], %o0
4154	 andcc	%g0, 0, %o2
4155	mulscc  %o2, %o0, %o2
4156	mulscc  %o2, %o0, %o2
4157	mulscc  %o2, %o0, %o2
4158	mulscc  %o2, %o0, %o2
4159	mulscc  %o2, %o0, %o2
4160	mulscc  %o2, %o0, %o2
4161	mulscc  %o2, %o0, %o2
4162	mulscc  %o2, %o0, %o2
4163	mulscc  %o2, %o0, %o2
4164	mulscc  %o2, %o0, %o2
4165	mulscc  %o2, %o0, %o2
4166	mulscc  %o2, %o0, %o2
4167	mulscc  %o2, %o0, %o2
4168	mulscc  %o2, %o0, %o2
4169	mulscc  %o2, %o0, %o2
4170	mulscc  %o2, %g0, %o2
4171	rd	%y, %o3
4172	srl	%o2, 16, %o1
4173	set	0xffff, %o4
4174	and	%o4, %o2, %o0
4175	sll	%o0, 15, %o0
4176	srl	%o3, 17, %o3
4177	or	%o3, %o0, %o0
4178	addcc	%o0, %o1, %o0
4179	bl	1f
4180	 sethi	%hi(0x7fffffff), %o1
4181	retl
4182	 st	%o0, [%g1 + %lo(randseed)]
41831:
4184	or	%o1, %lo(0x7fffffff), %o1
4185	add	%o0, 1, %o0
4186	and	%o1, %o0, %o0
4187	retl
4188	 st	%o0, [%g1 + %lo(randseed)]
4189
4190/*
4191 * void microtime(struct timeval *tv)
4192 *
4193 * LBL's sparc bsd 'microtime': We don't need to spl (so this routine
4194 * can be a leaf routine) and we don't keep a 'last' timeval (there
4195 * can't be two calls to this routine in a microsecond).  This seems to
4196 * be about 20 times faster than the Sun code on an SS-2. - vj
4197 *
4198 * Read time values from slowest-changing to fastest-changing,
4199 * then re-read out to slowest.  If the values read before
4200 * the innermost match those read after, the innermost value
4201 * is consistent with the outer values.  If not, it may not
4202 * be and we must retry.  Typically this loop runs only once;
4203 * occasionally it runs twice, and only rarely does it run longer.
4204 */
4205ENTRY(microtime)
4206	sethi	%hi(_time), %g2
4207	sethi	%hi(TIMERREG_VA), %g3
42081:
4209	ldd	[%g2+%lo(_time)], %o2		! time.tv_sec & time.tv_usec
4210	ld	[%g3+%lo(TIMERREG_VA)], %o4	! usec counter
4211	ldd	[%g2+%lo(_time)], %g4		! see if time values changed
4212	cmp	%g4, %o2
4213	bne	1b				! if time.tv_sec changed
4214	 cmp	%g5, %o3
4215	bne	1b				! if time.tv_usec changed
4216	 tst	%o4
4217
4218	bpos	2f				! reached limit?
4219	 srl	%o4, TMR_SHIFT, %o4		! convert counter to usec
4220	sethi	%hi(_tick), %g4			! bump usec by 1 tick
4221	ld	[%g4+%lo(_tick)], %o1
4222	set	TMR_MASK, %g5
4223	add	%o1, %o3, %o3
4224	and	%o4, %g5, %o4
42252:
4226	add	%o4, %o3, %o3
4227	set	1000000, %g5			! normalize usec value
4228	cmp	%o3, %g5
4229	bl,a	3f
4230	 st	%o2, [%o0]			! (should be able to std here)
4231	add	%o2, 1, %o2			! overflow
4232	sub	%o3, %g5, %o3
4233	st	%o2, [%o0]			! (should be able to std here)
42343:
4235	retl
4236	 st	%o3, [%o0+4]
4237
4238/*
4239 * This procedure exists to make stdarg functions work correctly.
4240 * We write the caller's `in' registers into his caller's `arg dump'
4241 * area.  That arg-dump area immediately precedes the argument extension
4242 * area, resulting in a single contiguous block of memory.
4243 *
4244 * This is really the wrong way to do it: the arguments should be written
4245 * to storage local to the stdarg function, and the stdarg `pick up
4246 * the next argument' code should pick it up from whichever region is
4247 * `active' at that point.
4248 */
4249	.globl	___builtin_saveregs
4250___builtin_saveregs:
4251	! not profiled -- this should be done inline anyway
4252	! bleah! the arg dump area is unaligned!  cannot std w/o reg/reg moves
4253	st	%i0, [%fp + 0x44]	! fr->fr_argd[0]
4254	st	%i1, [%fp + 0x48]	! fr->fr_argd[1]
4255	st	%i2, [%fp + 0x4c]	! fr->fr_argd[2]
4256	st	%i3, [%fp + 0x50]	! fr->fr_argd[3]
4257	st	%i4, [%fp + 0x54]	! fr->fr_argd[4]
4258	retl
4259	 st	%i5, [%fp + 0x58]	! fr->fr_argd[5]
4260
4261#ifdef KGDB
4262/*
4263 * Write all windows (user or otherwise), except the current one.
4264 *
4265 * THIS COULD BE DONE IN USER CODE
4266 */
4267ENTRY(write_all_windows)
4268	/*
4269	 * g2 = g1 = nwindows - 1;
4270	 * while (--g1 > 0) save();
4271	 * while (--g2 > 0) restore();
4272	 */
4273	sethi	%hi(_nwindows), %g1
4274	ld	[%g1 + %lo(_nwindows)], %g1
4275	dec	%g1
4276	mov	%g1, %g2
4277
42781:	deccc	%g1
4279	bg,a	1b
4280	 save	%sp, -64, %sp
4281
42822:	deccc	%g2
4283	bg,a	2b
4284	 restore
4285
4286	retl
4287	nop
4288#endif /* KGDB */
4289
4290	.data
4291	.globl	_cold
4292_cold:
4293	.word	1		! cold start flag
4294
4295	.globl	_proc0paddr
4296_proc0paddr:
4297	.word	_u0		! KVA of proc0 uarea
4298
4299/* interrupt counters	XXX THESE BELONG ELSEWHERE (if anywhere) */
4300	.globl	_intrcnt, _eintrcnt, _intrnames, _eintrnames
4301_intrnames:
4302	.asciz	"spur"
4303	.asciz	"lev1"
4304	.asciz	"lev2"
4305	.asciz	"lev3"
4306	.asciz	"lev4"
4307	.asciz	"lev5"
4308	.asciz	"lev6"
4309	.asciz	"lev7"
4310	.asciz  "lev8"
4311	.asciz	"lev9"
4312	.asciz	"clock"
4313	.asciz	"lev11"
4314	.asciz	"lev12"
4315	.asciz	"lev13"
4316	.asciz	"prof"
4317_eintrnames:
4318	ALIGN
4319_intrcnt:
4320	.skip	4*15
4321_eintrcnt:
4322
4323	.comm	_nwindows, 4
4324	.comm	_promvec, 4
4325	.comm	_curproc, 4
4326	.comm	_qs, 32 * 8
4327	.comm	_whichqs, 4
4328