xref: /openbsd/sys/arch/sparc64/sparc64/locore.s (revision 0cd9f930)
1/*	$OpenBSD: locore.s,v 1.230 2024/11/27 20:11:32 miod Exp $	*/
2/*	$NetBSD: locore.s,v 1.137 2001/08/13 06:10:10 jdolecek Exp $	*/
3
4/*
5 * Copyright (c) 1996-2001 Eduardo Horvath
6 * Copyright (c) 1996 Paul Kranenburg
7 * Copyright (c) 1996
8 * 	The President and Fellows of Harvard College.
9 *	All rights reserved.
10 * Copyright (c) 1992, 1993
11 *	The Regents of the University of California.
12 *	All rights reserved.
13 *
14 * This software was developed by the Computer Systems Engineering group
15 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
16 * contributed to Berkeley.
17 *
18 * All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Lawrence Berkeley Laboratory.
22 *	This product includes software developed by Harvard University.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 * 1. Redistributions of source code must retain the above copyright
28 *    notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 *    notice, this list of conditions and the following disclaimer in the
31 *    documentation and/or other materials provided with the
32 *    distribution.
33 * 3. All advertising materials mentioning features or use of this
34 *    software must display the following acknowledgement:
35 *	This product includes software developed by the University of
36 *	California, Berkeley and its contributors.
37 *	This product includes software developed by Harvard University.
38 *	This product includes software developed by Paul Kranenburg.
39 * 4. Neither the name of the University nor the names of its
40 *    contributors may be used to endorse or promote products derived
41 *    from this software without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
44 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
45 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
46 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
47 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
51 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
52 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
53 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
54 * DAMAGE.
55 *
56 *	@(#)locore.s	8.4 (Berkeley) 12/10/93
57 */
58
59.register %g2,
60.register %g3,
61
62#include "assym.h"
63#include "ksyms.h"
64#include <machine/param.h>
65#include <sparc64/sparc64/intreg.h>
66#include <machine/ctlreg.h>
67#include <machine/psl.h>
68#include <machine/trap.h>
69#include <machine/frame.h>
70#include <machine/pmap.h>
71#include <machine/asm.h>
72
73/* Let us use same syntax as C code */
74#define db_enter()	ta	1; nop
75
76/* use as needed to align things on longword boundaries */
77#define	_ALIGN	.align 8
78#define ICACHE_ALIGN	.align	32
79
80/* Give this real authority: reset the machine */
81#if 1
82#define NOTREACHED	sir
83#else	/* 1 */
84#define NOTREACHED
85#endif	/* 1 */
86
87	.section	.sun4v_patch, "ax"
88	.globl sun4v_patch
89sun4v_patch:
90	.previous
91
92	.section	.sun4v_patch_end, "ax"
93	.globl sun4v_patch_end
94sun4v_patch_end:
95	.previous
96
97	.section	.sun4v_pause_patch, "ax"
98	.globl sun4v_pause_patch
99sun4v_pause_patch:
100	.previous
101
102	.section	.sun4v_pause_patch_end, "ax"
103	.globl sun4v_pause_patch_end
104sun4v_pause_patch_end:
105	.previous
106
107#ifdef MULTIPROCESSOR
108	.section	.sun4v_mp_patch, "ax"
109	.globl sun4v_mp_patch
110sun4v_mp_patch:
111	.previous
112
113	.section	.sun4v_mp_patch_end, "ax"
114	.globl sun4v_mp_patch_end
115sun4v_mp_patch_end:
116	.previous
117
118	.section	.sun4u_mtp_patch, "ax"
119	.globl sun4u_mtp_patch
120sun4u_mtp_patch:
121	.previous
122
123	.section	.sun4u_mtp_patch_end, "ax"
124	.globl sun4u_mtp_patch_end
125sun4u_mtp_patch_end:
126	.previous
127#endif
128
129/*
130 * The UltraSPARC T1 has a "feature" where a LDXA/STXA to ASI_SCRATCHPAD
131 * registers may corrupt an unrelated integer register file register.
132 * To prevent this, it is required to have a non-store or NOP instruction
133 * before any LDXA/STXA to this register.
134 */
135#define GET_CPUINFO_VA(ci) \
136	nop					;\
137999:	set	CPUINFO_VA, ci			;\
138	.section	.sun4v_mp_patch, "ax"	;\
139	.word	999b				;\
140	ldxa	[%g0] ASI_SCRATCHPAD, ci	;\
141	.previous				;\
142	.section	.sun4u_mtp_patch, "ax"	;\
143	.word	999b				;\
144	ldxa	[%g0] ASI_SCRATCH, ci		;\
145	.previous
146
147#define GET_CPCB(pcb) \
148	GET_CPUINFO_VA(pcb)			;\
149	ldx	[pcb + CI_CPCB], pcb
150
151#define GET_CURPROC(curproc) \
152	GET_CPUINFO_VA(curproc)			;\
153	ldx	[curproc + CI_CURPROC], curproc
154
155#ifdef SUN4V
156
157#define GET_CPUINFO_PA(ci) \
158	mov	0x10, ci			;\
159	ldxa	[ci] ASI_SCRATCHPAD, ci
160
161#define GET_MMFSA(mmfsa) \
162	GET_CPUINFO_PA(mmfsa)			;\
163	add	mmfsa, CI_MMFSA, mmfsa		;\
164	ldxa	[mmfsa] ASI_PHYS_CACHED, mmfsa
165
166#endif
167
168#define GET_MMU_CONTEXTID(ctxid, ctx) \
169999:	ldxa	[ctx] ASI_DMMU, ctxid 		;\
170	.section	.sun4v_patch, "ax" 	;\
171	.word	999b				;\
172	ldxa	[ctx] ASI_MMU_CONTEXTID, ctxid 	;\
173	.previous
174
175#define SET_MMU_CONTEXTID(ctxid, ctx) \
176999:	stxa	ctxid, [ctx] ASI_DMMU 		;\
177	.section	.sun4v_patch, "ax" 	;\
178	.word	999b				;\
179	stxa	ctxid, [ctx] ASI_MMU_CONTEXTID 	;\
180	.previous
181
182#define NORMAL_GLOBALS() \
183999:	wrpr	%g0, PSTATE_KERN, %pstate	;\
184	.section	.sun4v_patch, "ax"	;\
185	.word	999b				;\
186	wrpr	%g0, 0, %gl			;\
187	.previous
188
189#define ALTERNATE_GLOBALS() \
190999:	wrpr	%g0, PSTATE_KERN|PSTATE_AG, %pstate	;\
191	.section	.sun4v_patch, "ax"	;\
192	.word	999b				;\
193	wrpr	%g0, 1, %gl			;\
194	.previous
195
196
197/*
198 * A handy macro for maintaining instrumentation counters.
199 * Note that this clobbers %o0, %o1 and %o2.  Normal usage is
200 * something like:
201 *	foointr:
202 *		TRAP_SETUP ...		! makes %o registers safe
203 *		INCR uvmexp+V_FOO	! count a foo
204 */
205	.macro INCR what
206	sethi	%hi(\what), %o0
207	or	%o0, %lo(\what), %o0
20899:
209	lduw	[%o0], %o1
210	add	%o1, 1, %o2
211	casa	[%o0] ASI_P, %o1, %o2
212	cmp	%o1, %o2
213	bne,pn	%icc, 99b
214	 nop
215	.endm
216
217/*
218 * some macros to load and store a register window
219 */
220	.macro	SPILL storer,base,size,asi
221
222	.irpc n,01234567
223		\storer %l\n, [\base + (\n * \size)] \asi
224	.endr
225	.irpc n,01234567
226		\storer %i\n, [\base + ((8+\n) * \size)] \asi
227	.endr
228	.endm
229
230	.macro FILL loader, base, size, asi
231	.irpc n,01234567
232		\loader [\base + (\n * \size)] \asi, %l\n
233	.endr
234
235	.irpc n,01234567
236		\loader [\base + ((8+\n) * \size)] \asi, %i\n
237	.endr
238	.endm
239
240/*
241 * Reserve the given room on stack.
242 */
243
244	.macro STACKFRAME size
245	save	%sp, \size, %sp
246	.endm
247
248
249	.data
250	.globl	data_start
251data_start:						! Start of data segment
252
253/*
254 * Process 0's u.
255 *
256 * This must be aligned on an 8 byte boundary.
257 */
258	.globl	u0
259u0:		.xword	0
260estack0:	.xword	0
261
262/*
263 * This stack is used for bootstrapping and spinning up CPUs.
264 */
265	.space	4096
266	.align	16
267tmpstack:
268
269#ifdef DEBUG
270/*
271 * This stack is used when we detect kernel stack corruption.
272 */
273	.space	USPACE
274	.align	16
275panicstack:
276#endif	/* DEBUG */
277
278/*
279 * romp is the prom entry pointer
280 */
281	.globl	romp
282romp:	.xword	0
283
284/*
285 * cputyp is the current cpu type, used to distinguish between
286 * the many variations of different sun4* machines. It contains
287 * the value CPU_SUN4U or CPU_SUN4V.
288 */
289	.globl	cputyp
290cputyp:
291	.word	CPU_SUN4U
292
293	.globl cold
294cold:
295	.word 1
296
297	_ALIGN
298
299	.text
300
301/*
302 * The v9 trap frame is stored in the special trap registers.  The
303 * register window is only modified on window overflow, underflow,
304 * and clean window traps, where it points to the register window
305 * needing service.  Traps have space for 8 instructions, except for
306 * the window overflow, underflow, and clean window traps which are
307 * 32 instructions long, large enough to in-line.
308 *
309 * The spitfire CPU (Ultra I) has 4 different sets of global registers.
310 * (blah blah...)
311 *
312 * I used to generate these numbers by address arithmetic, but gas's
313 * expression evaluator has about as much sense as your average slug
314 * (oddly enough, the code looks about as slimy too).  Thus, all the
315 * trap numbers are given as arguments to the trap macros.  This means
316 * there is one line per trap.  Sigh.
317 */
318
319/*
320 *	TA8 -- trap align for 8 instruction traps
321 *	TA32 -- trap align for 32 instruction traps
322 */
323	.macro TA8
324	.align 32
325	.endm
326
327	.macro TA32
328	.align 128
329	.endm
330
331/*
332 * v9 trap macros:
333 *
334 *	We have a problem with v9 traps; we have no registers to put the
335 *	trap type into.  But we do have a %tt register which already has
336 *	that information.  Trap types in these macros are all dummys.
337 */
338	/* regular vectored traps */
339	.macro VTRAP type, label
340	ba,a,pt	%icc,\label
341	nop
342	NOTREACHED
343	TA8
344	.endm
345
346	/* hardware interrupts */
347	.macro HARDINT4U lev
348	VTRAP \lev, sparc_interrupt
349	.endm
350
351	/* software interrupts */
352	.macro SOFTINT4U lev, bit
353	HARDINT4U \lev
354	.endm
355
356	/* traps that just call trap() */
357	.macro TRAP type
358	VTRAP \type, slowtrap
359	.endm
360
361	/* architecturally undefined traps (cause panic) */
362	.macro	UTRAP type
363#ifndef DEBUG
364	sir
365#endif	/* DEBUG */
366	VTRAP \type, slowtrap
367	.endm
368
369	/* software undefined traps (may be replaced) */
370	.macro STRAP type
371	VTRAP \type, slowtrap
372	.endm
373
374#define	SYSCALL		VTRAP 0x100, syscall_setup
375
376/*
377 * Macro to clear %tt so we don't get confused with old traps.
378 */
379	.macro CLRTT n
380#ifdef DEBUG
381#if 0	/* for henric, but not yet */
382	wrpr	%g0, 0x1ff - \n, %tt
383#else	/* 0 */
384	wrpr	%g0, 0x1ff, %tt
385#endif	/* 0 */
386#endif	/* DEBUG */
387	.endm
388
389	.macro UCLEANWIN
390	rdpr %cleanwin, %o7		! 024-027 = clean window trap
391	inc %o7				!	This handler is in-lined and cannot fault
392#ifdef DEBUG
393	set	0xbadcafe, %l0		! DEBUG -- compiler should not rely on zero-ed registers.
394#else	/* DEBUG */
395	clr	%l0
396#endif	/* DEBUG */
397	wrpr %g0, %o7, %cleanwin	!       Nucleus (trap&IRQ) code does not need clean windows
398
399	mov %l0,%l1; mov %l0,%l2	!	Clear out %l0-%l8 and %o0-%o8 and inc %cleanwin and done
400	mov %l0,%l3; mov %l0,%l4
401	mov %l0, %l5
402	mov %l0, %l6; mov %l0, %l7; mov %l0, %o0; mov %l0, %o1
403
404	mov %l0, %o2; mov %l0, %o3; mov %l0, %o4; mov %l0, %o5;
405	mov %l0, %o6; mov %l0, %o7
406	CLRTT 5
407	retry; nop; NOTREACHED; TA32
408	.endm
409
410	.macro KCLEANWIN
411#ifdef DEBUG
412	set	0xbadbeef, %l0		! DEBUG
413#else
414	clr	%l0
415#endif	/* DEBUG */
416	mov %l0, %l1; mov %l0, %l2	! 024-027 = clean window trap
417	rdpr %cleanwin, %o7		!	This handler is in-lined and cannot fault
418	inc %o7; mov %l0, %l3	!       Nucleus (trap&IRQ) code does not need clean windows
419	wrpr %g0, %o7, %cleanwin	!	Clear out %l0-%l8 and %o0-%o8 and inc %cleanwin and done
420	mov %l0, %l4; mov %l0, %l5; mov %l0, %l6; mov %l0, %l7
421	mov %l0, %o0; mov %l0, %o1; mov %l0, %o2; mov %l0, %o3
422
423	mov %l0, %o4; mov %l0, %o5; mov %l0, %o6; mov %l0, %o7
424	CLRTT 8
425	retry; nop; TA32
426	.endm
427
428	.macro IMMU_MISS n
429	ldxa	[%g0] ASI_IMMU_8KPTR, %g2!	Load IMMU 8K TSB pointer
430	ldxa	[%g0] ASI_IMMU, %g1	!	Load IMMU tag target register
431	ldda	[%g2] ASI_NUCLEUS_QUAD_LDD, %g4!Load TSB tag:data into %g4
432	brgez,pn %g5, instr_miss	!	Entry invalid?  Punt
433	 cmp	%g1, %g4		!	Compare TLB tags
434	bne,pn %xcc, instr_miss		!	Got right tag?
435	 nop
436	CLRTT \n
437	stxa	%g5, [%g0] ASI_IMMU_DATA_IN!	Enter new mapping
438	retry				!	Try new mapping
4391:
440	sir
441	TA32
442	.endm
443
444	.macro DMMU_MISS n
445	ldxa	[%g0] ASI_DMMU_8KPTR, %g2!	Load DMMU 8K TSB pointer
446	ldxa	[%g0] ASI_DMMU, %g1	!	Load DMMU tag target register
447	ldda	[%g2] ASI_NUCLEUS_QUAD_LDD, %g4!Load TSB tag:data into %g4
448	brgez,pn %g5, data_miss		!	Entry invalid?  Punt
449	 xor	%g1, %g4, %g4		!	Compare TLB tags
450	brnz,pn	%g4, data_miss		!	Got right tag?
451	 nop
452	CLRTT \n
453	stxa	%g5, [%g0] ASI_DMMU_DATA_IN!	Enter new mapping
454	retry				!	Try new mapping
4551:
456	sir
457	TA32
458	.endm
459
460	.macro DMMU_PROT
461	ba,a,pt	%xcc, dmmu_write_fault
462	nop
463	TA32
464	.endm
465/*
466 * Here are some often repeated traps as macros.
467 */
468
469	! spill a 64-bit user register window
470	.macro USPILL64 label, as
471\label:
472	wr	%g0, \as, %asi
473	stxa	%l0, [%sp + BIAS + ( 0*8)] %asi
474	stxa	%l1, [%sp + BIAS + ( 1*8)] %asi
475	stxa	%l2, [%sp + BIAS + ( 2*8)] %asi
476	stxa	%l3, [%sp + BIAS + ( 3*8)] %asi
477	stxa	%l4, [%sp + BIAS + ( 4*8)] %asi
478	stxa	%l5, [%sp + BIAS + ( 5*8)] %asi
479	stxa	%l6, [%sp + BIAS + ( 6*8)] %asi
480	stxa	%l7, [%sp + BIAS + ( 7*8)] %asi
481	stxa	%i0, [%sp + BIAS + ( 8*8)] %asi
482	stxa	%i1, [%sp + BIAS + ( 9*8)] %asi
483	stxa	%i2, [%sp + BIAS + (10*8)] %asi
484	stxa	%i3, [%sp + BIAS + (11*8)] %asi
485	stxa	%i4, [%sp + BIAS + (12*8)] %asi
486	stxa	%i5, [%sp + BIAS + (13*8)] %asi
487	stxa	%i6, [%sp + BIAS + (14*8)] %asi
488	GET_CPCB(%g5)
489	ldx	[%g5 + PCB_WCOOKIE], %g5
490	xor	%g5, %i7, %g5		! stackghost
491	stxa	%g5, [%sp + BIAS + (15*8)] %asi
492	saved
493	CLRTT 1
494	retry
495	NOTREACHED
496	TA32
497	.endm
498
499	! spill a 64-bit kernel register window
500	.macro SPILL64 label, as
501\label:
502	wr	%g0, \as, %asi
503	SPILL	stxa, %sp+BIAS, 8, %asi
504	saved
505	CLRTT 1
506	retry
507	NOTREACHED
508	TA32
509	.endm
510
511	! spill a 32-bit register window
512	.macro SPILL32 label, as
513\label:
514	wr	%g0, \as, %asi
515	srl	%sp, 0, %sp ! fixup 32-bit pointers
516	SPILL	stwa, %sp, 4, %asi
517	saved
518	CLRTT 2
519	retry
520	NOTREACHED
521	TA32
522	.endm
523
524	! Spill either 32-bit or 64-bit register window.
525	.macro SPILLBOTH label64,label32, as
526	andcc	%sp, 1, %g0
527	bnz,pt	%xcc, \label64+4	! Is it a v9 or v8 stack?
528	 wr	%g0, \as, %asi
529	ba,pt	%xcc, \label32+8
530	 srl	%sp, 0, %sp ! fixup 32-bit pointers
531	NOTREACHED
532	TA32
533	.endm
534
535	! fill a 64-bit user register window
536	.macro UFILL64 label, as
537\label:
538	wr	%g0, \as, %asi
539	FILL	ldxa, %sp+BIAS, 8, %asi
540	GET_CPCB(%g5)
541	ldx	[%g5 + PCB_WCOOKIE], %g5
542	xor	%g5, %i7, %i7		! stackghost
543	restored
544	CLRTT 3
545	retry
546	NOTREACHED
547	TA32
548	.endm
549
550	! fill a 64-bit kernel register window
551	.macro FILL64 label, as
552\label:
553	wr	%g0, \as, %asi
554	FILL	ldxa, %sp+BIAS, 8, %asi
555	restored
556	CLRTT 3
557	retry
558	NOTREACHED
559	TA32
560	.endm
561
562	! fill a 32-bit register window
563	.macro FILL32 label, as
564\label:
565	wr	%g0, \as, %asi
566	srl	%sp, 0, %sp ! fixup 32-bit pointers
567	FILL	lda, %sp, 4, %asi
568	restored
569	CLRTT 4
570	retry
571	NOTREACHED
572	TA32
573	.endm
574
575	! fill either 32-bit or 64-bit register window.
576	.macro FILLBOTH label64,label32, as
577	andcc	%sp, 1, %i0
578	bnz	(\label64)+4 ! See if it's a v9 stack or v8
579	 wr	%g0, \as, %asi
580	ba	(\label32)+8
581	 srl	%sp, 0, %sp ! fixup 32-bit pointers
582	NOTREACHED
583	TA32
584	.endm
585
586#ifdef SUN4V
587
588	.macro	sun4v_tl0_reserved	count
589	.rept	\count
590	ba,a,pt	%xcc, slowtrap
591	 nop
592	.align	32
593	.endr
594	.endm
595
596#define sun4v_tl0_unused sun4v_tl0_reserved
597
598	.macro	sun4v_tl1_reserved	count
599	.rept	\count
600	ba,a,pt	%xcc, slowtrap
601	 nop
602	.align	32
603	.endr
604	.endm
605
606#define sun4v_tl1_unused sun4v_tl1_reserved
607
608	.macro sun4v_tl1_kspill_normal
609	ba,a,pt	%xcc,kspill_normal
610	 nop
611	.align 128
612	.endm
613
614	.macro sun4v_tl1_uspill_normal
615	ba,a,pt	%xcc,pcbspill_normals
616	 nop
617	.align 128
618	.endm
619
620	.macro sun4v_tl1_uspill_other
621	ba,a,pt	%xcc,pcbspill_others
622	 nop
623	.align 128
624	.endm
625
626#endif
627
628	.globl	start, kernel_text
629	kernel_text = start			! for kvm_mkdb(8)
630start:
631	/* Traps from TL=0 -- traps from user mode */
632	.globl	trapbase
633trapbase:
634	b dostart; nop; TA8	! 000 = reserved -- Use it to boot
635	/* We should not get the next 5 traps */
636	UTRAP 0x001		! 001 = POR Reset -- ROM should get this
637	UTRAP 0x002		! 002 = WDR -- ROM should get this
638	UTRAP 0x003		! 003 = XIR -- ROM should get this
639	UTRAP 0x004		! 004 = SIR -- ROM should get this
640	UTRAP 0x005		! 005 = RED state exception
641	UTRAP 0x006; UTRAP 0x007
642	VTRAP T_INST_EXCEPT, textfault	! 008 = instr. access except
643	VTRAP T_TEXTFAULT, textfault	! 009 = instr. access MMU miss
644	VTRAP T_INST_ERROR, textfault	! 00a = instr. access err
645	UTRAP 0x00b; UTRAP 0x00c; UTRAP 0x00d; UTRAP 0x00e; UTRAP 0x00f
646	TRAP T_ILLINST			! 010 = illegal instruction
647	TRAP T_PRIVINST		! 011 = privileged instruction
648	UTRAP 0x012			! 012 = unimplemented LDD
649	UTRAP 0x013			! 013 = unimplemented STD
650	UTRAP 0x014; UTRAP 0x015; UTRAP 0x016; UTRAP 0x017; UTRAP 0x018
651	UTRAP 0x019; UTRAP 0x01a; UTRAP 0x01b; UTRAP 0x01c; UTRAP 0x01d
652	UTRAP 0x01e; UTRAP 0x01f
653	TRAP T_FPDISABLED		! 020 = fp instr, but EF bit off in psr
654	TRAP T_FP_IEEE_754		! 021 = ieee 754 exception
655	TRAP T_FP_OTHER			! 022 = other fp exception
656	TRAP T_TAGOF			! 023 = tag overflow
657	UCLEANWIN			! 024-027 = clean window trap
658	TRAP T_DIV0			! 028 = divide by zero
659	UTRAP 0x029			! 029 = internal processor error
660	UTRAP 0x02a; UTRAP 0x02b; UTRAP 0x02c; UTRAP 0x02d; UTRAP 0x02e; UTRAP 0x02f
661	VTRAP T_DATAFAULT, datafault	! 030 = data fetch fault
662	UTRAP 0x031			! 031 = data MMU miss -- no MMU
663	VTRAP T_DATA_ERROR, datafault	! 032 = data access error
664	VTRAP T_DATA_PROT, datafault	! 033 = data protection fault
665	TRAP T_ALIGN			! 034 = address alignment error -- we could fix it inline...
666	TRAP T_LDDF_ALIGN		! 035 = LDDF address alignment error -- we could fix it inline...
667	TRAP T_STDF_ALIGN		! 036 = STDF address alignment error -- we could fix it inline...
668	TRAP T_PRIVACT			! 037 = privileged action
669	TRAP T_LDQF_ALIGN		! 038 = LDDF address alignment error
670	TRAP T_STQF_ALIGN		! 039 = STQF address alignment error
671	UTRAP 0x03a; UTRAP 0x03b; UTRAP 0x03c;
672	UTRAP 0x03d; UTRAP 0x03e; UTRAP 0x03f;
673	VTRAP T_ASYNC_ERROR, datafault	! 040 = data fetch fault
674	SOFTINT4U 1, IE_L1		! 041 = level 1 interrupt
675	HARDINT4U 2			! 042 = level 2 interrupt
676	HARDINT4U 3			! 043 = level 3 interrupt
677	SOFTINT4U 4, IE_L4		! 044 = level 4 interrupt
678	HARDINT4U 5			! 045 = level 5 interrupt
679	SOFTINT4U 6, IE_L6		! 046 = level 6 interrupt
680	HARDINT4U 7			! 047 = level 7 interrupt
681	HARDINT4U 8			! 048 = level 8 interrupt
682	HARDINT4U 9			! 049 = level 9 interrupt
683	HARDINT4U 10			! 04a = level 10 interrupt
684	HARDINT4U 11			! 04b = level 11 interrupt
685	HARDINT4U 12			! 04c = level 12 interrupt
686	HARDINT4U 13			! 04d = level 13 interrupt
687	HARDINT4U 14			! 04e = level 14 interrupt
688	HARDINT4U 15			! 04f = nonmaskable interrupt
689	UTRAP 0x050; UTRAP 0x051; UTRAP 0x052; UTRAP 0x053; UTRAP 0x054; UTRAP 0x055
690	UTRAP 0x056; UTRAP 0x057; UTRAP 0x058; UTRAP 0x059; UTRAP 0x05a; UTRAP 0x05b
691	UTRAP 0x05c; UTRAP 0x05d; UTRAP 0x05e; UTRAP 0x05f
692	VTRAP 0x060, interrupt_vector;	! 060 = interrupt vector
693	TRAP T_PA_WATCHPT		! 061 = physical address data watchpoint
694	TRAP T_VA_WATCHPT		! 062 = virtual address data watchpoint
695	VTRAP T_ECCERR, cecc_catch	! 063 = Correctable ECC error
696ufast_IMMU_miss:			! 064 = fast instr access MMU miss
697	IMMU_MISS 6
698ufast_DMMU_miss:			! 068 = fast data access MMU miss
699	DMMU_MISS 7
700ufast_DMMU_protection:			! 06c = fast data access MMU protection
701	DMMU_PROT
702	UTRAP 0x070			! Implementation dependent traps
703	UTRAP 0x071; UTRAP 0x072; UTRAP 0x073; UTRAP 0x074; UTRAP 0x075; UTRAP 0x076
704	UTRAP 0x077; UTRAP 0x078; UTRAP 0x079; UTRAP 0x07a; UTRAP 0x07b; UTRAP 0x07c
705	UTRAP 0x07d; UTRAP 0x07e; UTRAP 0x07f
706user_uspill:
707	USPILL64 uspill8,ASI_AIUS	! 0x080 spill_0_normal -- used to save user windows in user mode
708	SPILL32 uspill4,ASI_AIUS	! 0x084 spill_1_normal
709	SPILLBOTH uspill8,uspill4,ASI_AIUS		! 0x088 spill_2_normal
710#ifdef DEBUG
711	sir
712#endif	/* DEBUG */
713	UTRAP 0x08c; TA32	! 0x08c spill_3_normal
714user_kspill:
715	UTRAP 0x090; TA32	! 0x090 spill_4_normal -- used to save supervisor windows
716	SPILL64 kspill8,ASI_N	! 0x094 spill_5_normal
717	SPILL32 kspill4,ASI_N	! 0x098 spill_6_normal
718	SPILLBOTH kspill8,kspill4,ASI_N	! 0x09c spill_7_normal
719user_uspillk:
720	USPILL64 uspillk8,ASI_AIUS	! 0x0a0 spill_0_other -- used to save user windows in supervisor mode
721	SPILL32 uspillk4,ASI_AIUS	! 0x0a4 spill_1_other
722	SPILLBOTH uspillk8,uspillk4,ASI_AIUS	! 0x0a8 spill_2_other
723	UTRAP 0x0ac; TA32	! 0x0ac spill_3_other
724	UTRAP 0x0b0; TA32	! 0x0b0 spill_4_other
725	UTRAP 0x0b4; TA32	! 0x0b4 spill_5_other
726	UTRAP 0x0b8; TA32	! 0x0b8 spill_6_other
727	UTRAP 0x0bc; TA32	! 0x0bc spill_7_other
728user_ufill:
729	UFILL64 ufill8,ASI_AIUS ! 0x0c0 fill_0_normal -- used to fill windows when running user mode
730	FILL32 ufill4,ASI_AIUS	! 0x0c4 fill_1_normal
731	FILLBOTH ufill8,ufill4,ASI_AIUS	! 0x0c8 fill_2_normal
732	UTRAP 0x0cc; TA32	! 0x0cc fill_3_normal
733user_kfill:
734	UTRAP 0x0d0; TA32	! 0x0d0 fill_4_normal -- used to fill windows when running supervisor mode
735	FILL64 kfill8,ASI_N	! 0x0d4 fill_5_normal
736	FILL32 kfill4,ASI_N	! 0x0d8 fill_6_normal
737	FILLBOTH kfill8,kfill4,ASI_N	! 0x0dc fill_7_normal
738user_ufillk:
739	UFILL64 ufillk8,ASI_AIUS	! 0x0e0 fill_0_other
740	FILL32 ufillk4,ASI_AIUS	! 0x0e4 fill_1_other
741	FILLBOTH ufillk8,ufillk4,ASI_AIUS	! 0x0e8 fill_2_other
742	UTRAP 0x0ec; TA32	! 0x0ec fill_3_other
743	UTRAP 0x0f0; TA32	! 0x0f0 fill_4_other
744	UTRAP 0x0f4; TA32	! 0x0f4 fill_5_other
745	UTRAP 0x0f8; TA32	! 0x0f8 fill_6_other
746	UTRAP 0x0fc; TA32	! 0x0fc fill_7_other
747user_syscall:
748	SYSCALL			! 0x100 = sun syscall
749	TRAP T_BREAKPOINT	! 0x101 = pseudo breakpoint instruction
750	STRAP 0x102; STRAP 0x103; STRAP 0x104; STRAP 0x105; STRAP 0x106; STRAP 0x107
751	SYSCALL			! 0x108 = svr4 syscall
752	SYSCALL			! 0x109 = bsd syscall
753	TRAP T_KGDB_EXEC	! 0x10a = enter kernel gdb on kernel startup
754	STRAP 0x10b; STRAP 0x10c; STRAP 0x10d; STRAP 0x10e; STRAP 0x10f;
755	STRAP 0x110; STRAP 0x111; STRAP 0x112; STRAP 0x113; STRAP 0x114; STRAP 0x115; STRAP 0x116; STRAP 0x117
756	STRAP 0x118; STRAP 0x119; STRAP 0x11a; STRAP 0x11b; STRAP 0x11c; STRAP 0x11d; STRAP 0x11e; STRAP 0x11f
757	STRAP 0x120; STRAP 0x121; STRAP 0x122; STRAP 0x123; STRAP 0x124; STRAP 0x125; STRAP 0x126; STRAP 0x127
758	STRAP 0x128; STRAP 0x129; STRAP 0x12a; STRAP 0x12b; STRAP 0x12c; STRAP 0x12d; STRAP 0x12e; STRAP 0x12f
759	STRAP 0x130; STRAP 0x131; STRAP 0x132; STRAP 0x133; STRAP 0x134; STRAP 0x135; STRAP 0x136; STRAP 0x137
760	STRAP 0x138; STRAP 0x139; STRAP 0x13a; STRAP 0x13b; STRAP 0x13c; STRAP 0x13d; STRAP 0x13e; STRAP 0x13f
761	SYSCALL			! 0x140 SVID syscall (Solaris 2.7)
762	SYSCALL			! 0x141 SPARC International syscall
763	SYSCALL			! 0x142	OS Vendor syscall
764	SYSCALL			! 0x143 HW OEM syscall
765	STRAP 0x144; STRAP 0x145; STRAP 0x146; STRAP 0x147
766	STRAP 0x148; STRAP 0x149; STRAP 0x14a; STRAP 0x14b; STRAP 0x14c; STRAP 0x14d; STRAP 0x14e; STRAP 0x14f
767	STRAP 0x150; STRAP 0x151; STRAP 0x152; STRAP 0x153; STRAP 0x154; STRAP 0x155; STRAP 0x156; STRAP 0x157
768	STRAP 0x158; STRAP 0x159; STRAP 0x15a; STRAP 0x15b; STRAP 0x15c; STRAP 0x15d; STRAP 0x15e; STRAP 0x15f
769	STRAP 0x160; STRAP 0x161; STRAP 0x162; STRAP 0x163; STRAP 0x164; STRAP 0x165; STRAP 0x166; STRAP 0x167
770	STRAP 0x168; STRAP 0x169; STRAP 0x16a; STRAP 0x16b; STRAP 0x16c; STRAP 0x16d; STRAP 0x16e; STRAP 0x16f
771	STRAP 0x170; STRAP 0x171; STRAP 0x172; STRAP 0x173; STRAP 0x174; STRAP 0x175; STRAP 0x176; STRAP 0x177
772	STRAP 0x178; STRAP 0x179; STRAP 0x17a; STRAP 0x17b; STRAP 0x17c; STRAP 0x17d; STRAP 0x17e; STRAP 0x17f
773	! Traps beyond 0x17f are reserved
774	UTRAP 0x180; UTRAP 0x181; UTRAP 0x182; UTRAP 0x183; UTRAP 0x184; UTRAP 0x185; UTRAP 0x186; UTRAP 0x187
775	UTRAP 0x188; UTRAP 0x189; UTRAP 0x18a; UTRAP 0x18b; UTRAP 0x18c; UTRAP 0x18d; UTRAP 0x18e; UTRAP 0x18f
776	UTRAP 0x190; UTRAP 0x191; UTRAP 0x192; UTRAP 0x193; UTRAP 0x194; UTRAP 0x195; UTRAP 0x196; UTRAP 0x197
777	UTRAP 0x198; UTRAP 0x199; UTRAP 0x19a; UTRAP 0x19b; UTRAP 0x19c; UTRAP 0x19d; UTRAP 0x19e; UTRAP 0x19f
778	UTRAP 0x1a0; UTRAP 0x1a1; UTRAP 0x1a2; UTRAP 0x1a3; UTRAP 0x1a4; UTRAP 0x1a5; UTRAP 0x1a6; UTRAP 0x1a7
779	UTRAP 0x1a8; UTRAP 0x1a9; UTRAP 0x1aa; UTRAP 0x1ab; UTRAP 0x1ac; UTRAP 0x1ad; UTRAP 0x1ae; UTRAP 0x1af
780	UTRAP 0x1b0; UTRAP 0x1b1; UTRAP 0x1b2; UTRAP 0x1b3; UTRAP 0x1b4; UTRAP 0x1b5; UTRAP 0x1b6; UTRAP 0x1b7
781	UTRAP 0x1b8; UTRAP 0x1b9; UTRAP 0x1ba; UTRAP 0x1bb; UTRAP 0x1bc; UTRAP 0x1bd; UTRAP 0x1be; UTRAP 0x1bf
782	UTRAP 0x1c0; UTRAP 0x1c1; UTRAP 0x1c2; UTRAP 0x1c3; UTRAP 0x1c4; UTRAP 0x1c5; UTRAP 0x1c6; UTRAP 0x1c7
783	UTRAP 0x1c8; UTRAP 0x1c9; UTRAP 0x1ca; UTRAP 0x1cb; UTRAP 0x1cc; UTRAP 0x1cd; UTRAP 0x1ce; UTRAP 0x1cf
784	UTRAP 0x1d0; UTRAP 0x1d1; UTRAP 0x1d2; UTRAP 0x1d3; UTRAP 0x1d4; UTRAP 0x1d5; UTRAP 0x1d6; UTRAP 0x1d7
785	UTRAP 0x1d8; UTRAP 0x1d9; UTRAP 0x1da; UTRAP 0x1db; UTRAP 0x1dc; UTRAP 0x1dd; UTRAP 0x1de; UTRAP 0x1df
786	UTRAP 0x1e0; UTRAP 0x1e1; UTRAP 0x1e2; UTRAP 0x1e3; UTRAP 0x1e4; UTRAP 0x1e5; UTRAP 0x1e6; UTRAP 0x1e7
787	UTRAP 0x1e8; UTRAP 0x1e9; UTRAP 0x1ea; UTRAP 0x1eb; UTRAP 0x1ec; UTRAP 0x1ed; UTRAP 0x1ee; UTRAP 0x1ef
788	UTRAP 0x1f0; UTRAP 0x1f1; UTRAP 0x1f2; UTRAP 0x1f3; UTRAP 0x1f4; UTRAP 0x1f5; UTRAP 0x1f6; UTRAP 0x1f7
789	UTRAP 0x1f8; UTRAP 0x1f9; UTRAP 0x1fa; UTRAP 0x1fb; UTRAP 0x1fc; UTRAP 0x1fd; UTRAP 0x1fe; UTRAP 0x1ff
790
791	/* Traps from TL>0 -- traps from supervisor mode */
792trapbase_priv:
793	UTRAP 0x000		! 000 = reserved -- Use it to boot
794	/* We should not get the next 5 traps */
795	UTRAP 0x001		! 001 = POR Reset -- ROM should get this
796	UTRAP 0x002		! 002 = WDR Watchdog -- ROM should get this
797	UTRAP 0x003		! 003 = XIR -- ROM should get this
798	UTRAP 0x004		! 004 = SIR -- ROM should get this
799	UTRAP 0x005		! 005 = RED state exception
800	UTRAP 0x006; UTRAP 0x007
801ktextfault:
802	VTRAP T_INST_EXCEPT, textfault	! 008 = instr. access except
803	VTRAP T_TEXTFAULT, textfault	! 009 = instr. access MMU miss -- no MMU
804	VTRAP T_INST_ERROR, textfault	! 00a = instr. access err
805	UTRAP 0x00b; UTRAP 0x00c; UTRAP 0x00d; UTRAP 0x00e; UTRAP 0x00f
806	TRAP T_ILLINST			! 010 = illegal instruction
807	TRAP T_PRIVINST		! 011 = privileged instruction
808	UTRAP 0x012			! 012 = unimplemented LDD
809	UTRAP 0x013			! 013 = unimplemented STD
810	UTRAP 0x014; UTRAP 0x015; UTRAP 0x016; UTRAP 0x017; UTRAP 0x018
811	UTRAP 0x019; UTRAP 0x01a; UTRAP 0x01b; UTRAP 0x01c; UTRAP 0x01d
812	UTRAP 0x01e; UTRAP 0x01f
813	TRAP T_FPDISABLED		! 020 = fp instr, but EF bit off in psr
814	TRAP T_FP_IEEE_754		! 021 = ieee 754 exception
815	TRAP T_FP_OTHER			! 022 = other fp exception
816	TRAP T_TAGOF			! 023 = tag overflow
817	KCLEANWIN			! 024-027 = clean window trap
818	TRAP T_DIV0			! 028 = divide by zero
819	UTRAP 0x029			! 029 = internal processor error
820	UTRAP 0x02a; UTRAP 0x02b; UTRAP 0x02c; UTRAP 0x02d; UTRAP 0x02e; UTRAP 0x02f
821kdatafault:
822	VTRAP T_DATAFAULT, winfault	! 030 = data fetch fault
823	UTRAP 0x031			! 031 = data MMU miss -- no MMU
824	VTRAP T_DATA_ERROR, winfault	! 032 = data fetch fault
825	VTRAP T_DATA_PROT, winfault	! 033 = data fetch fault
826	VTRAP T_ALIGN, checkalign	! 034 = address alignment error -- we could fix it inline...
827!	sir; nop; TA8	! DEBUG -- trap all kernel alignment errors
828	TRAP T_LDDF_ALIGN		! 035 = LDDF address alignment error -- we could fix it inline...
829	TRAP T_STDF_ALIGN		! 036 = STDF address alignment error -- we could fix it inline...
830	TRAP T_PRIVACT			! 037 = privileged action
831	UTRAP 0x038; UTRAP 0x039; UTRAP 0x03a; UTRAP 0x03b; UTRAP 0x03c;
832	UTRAP 0x03d; UTRAP 0x03e; UTRAP 0x03f;
833	VTRAP T_ASYNC_ERROR, winfault	! 040 = data fetch fault
834	SOFTINT4U 1, IE_L1		! 041 = level 1 interrupt
835	HARDINT4U 2			! 042 = level 2 interrupt
836	HARDINT4U 3			! 043 = level 3 interrupt
837	SOFTINT4U 4, IE_L4		! 044 = level 4 interrupt
838	HARDINT4U 5			! 045 = level 5 interrupt
839	SOFTINT4U 6, IE_L6		! 046 = level 6 interrupt
840	HARDINT4U 7			! 047 = level 7 interrupt
841	HARDINT4U 8			! 048 = level 8 interrupt
842	HARDINT4U 9			! 049 = level 9 interrupt
843	HARDINT4U 10			! 04a = level 10 interrupt
844	HARDINT4U 11			! 04b = level 11 interrupt
845	HARDINT4U 12			! 04c = level 12 interrupt
846	HARDINT4U 13			! 04d = level 13 interrupt
847	HARDINT4U 14			! 04e = level 14 interrupt
848	HARDINT4U 15			! 04f = nonmaskable interrupt
849	UTRAP 0x050; UTRAP 0x051; UTRAP 0x052; UTRAP 0x053; UTRAP 0x054; UTRAP 0x055
850	UTRAP 0x056; UTRAP 0x057; UTRAP 0x058; UTRAP 0x059; UTRAP 0x05a; UTRAP 0x05b
851	UTRAP 0x05c; UTRAP 0x05d; UTRAP 0x05e; UTRAP 0x05f
852	VTRAP 0x060, interrupt_vector; ! 060 = interrupt vector
853	TRAP T_PA_WATCHPT		! 061 = physical address data watchpoint
854	TRAP T_VA_WATCHPT		! 062 = virtual address data watchpoint
855	VTRAP T_ECCERR, cecc_catch	! 063 = Correctable ECC error
856kfast_IMMU_miss:			! 064 = fast instr access MMU miss
857	IMMU_MISS 9
858kfast_DMMU_miss:			! 068 = fast data access MMU miss
859	DMMU_MISS 10
860kfast_DMMU_protection:			! 06c = fast data access MMU protection
861	DMMU_PROT
862	UTRAP 0x070			! Implementation dependent traps
863	UTRAP 0x071; UTRAP 0x072; UTRAP 0x073; UTRAP 0x074; UTRAP 0x075; UTRAP 0x076
864	UTRAP 0x077; UTRAP 0x078; UTRAP 0x079; UTRAP 0x07a; UTRAP 0x07b; UTRAP 0x07c
865	UTRAP 0x07d; UTRAP 0x07e; UTRAP 0x07f
866nucleus_uspill:
867	USPILL64 1,ASI_AIUS	! 0x080 spill_0_normal -- used to save user windows
868	SPILL32 2,ASI_AIUS	! 0x084 spill_1_normal
869	SPILLBOTH 1b,2b,ASI_AIUS	! 0x088 spill_2_normal
870	UTRAP 0x08c; TA32	! 0x08c spill_3_normal
871nucleus_kspill:
872	UTRAP 0x090; TA32	! 0x090 spill_4_normal -- used to save supervisor windows
873	SPILL64 1,ASI_N		! 0x094 spill_5_normal
874	SPILL32 2,ASI_N		! 0x098 spill_6_normal
875	SPILLBOTH 1b,2b,ASI_N	! 0x09c spill_7_normal
876nucleus_uspillk:
877	USPILL64 1,ASI_AIUS	! 0x0a0 spill_0_other -- used to save user windows in nucleus mode
878	SPILL32 2,ASI_AIUS	! 0x0a4 spill_1_other
879	SPILLBOTH 1b,2b,ASI_AIUS	! 0x0a8 spill_2_other
880	UTRAP 0x0ac; TA32	! 0x0ac spill_3_other
881	UTRAP 0x0b0; TA32	! 0x0b0 spill_4_other
882	UTRAP 0x0b4; TA32	! 0x0b4 spill_5_other
883	UTRAP 0x0b8; TA32	! 0x0b8 spill_6_other
884	UTRAP 0x0bc; TA32	! 0x0bc spill_7_other
885nucleus_ufill:
886	UFILL64 1,ASI_AIUS	! 0x0c0 fill_0_normal -- used to fill windows when running nucleus mode from user
887	FILL32 2,ASI_AIUS	! 0x0c4 fill_1_normal
888	FILLBOTH 1b,2b,ASI_AIUS	! 0x0c8 fill_2_normal
889	UTRAP 0x0cc; TA32	! 0x0cc fill_3_normal
890nucleus_sfill:
891	UTRAP 0x0d0; TA32	! 0x0d0 fill_4_normal -- used to fill windows when running nucleus mode from supervisor
892	FILL64 1,ASI_N		! 0x0d4 fill_5_normal
893	FILL32 2,ASI_N		! 0x0d8 fill_6_normal
894	FILLBOTH 1b,2b,ASI_N	! 0x0dc fill_7_normal
895nucleus_kfill:
896	UFILL64 1,ASI_AIUS	! 0x0e0 fill_0_other -- used to fill user windows when running nucleus mode -- will we ever use this?
897	FILL32 2,ASI_AIUS	! 0x0e4 fill_1_other
898	FILLBOTH 1b,2b,ASI_AIUS	! 0x0e8 fill_2_other
899	UTRAP 0x0ec; TA32	! 0x0ec fill_3_other
900	UTRAP 0x0f0; TA32	! 0x0f0 fill_4_other
901	UTRAP 0x0f4; TA32	! 0x0f4 fill_5_other
902	UTRAP 0x0f8; TA32	! 0x0f8 fill_6_other
903	UTRAP 0x0fc; TA32	! 0x0fc fill_7_other
904nucleus_syscall:
905	SYSCALL			! 0x100 = sun syscall
906	TRAP T_BREAKPOINT	! 0x101 = pseudo breakpoint instruction
907	STRAP 0x102; STRAP 0x103; STRAP 0x104; STRAP 0x105; STRAP 0x106; STRAP 0x107
908	SYSCALL			! 0x108 = svr4 syscall
909	SYSCALL			! 0x109 = bsd syscall
910	TRAP T_KGDB_EXEC	! 0x10a = enter kernel gdb on kernel startup
911	STRAP 0x10b; STRAP 0x10c; STRAP 0x10d; STRAP 0x10e; STRAP 0x10f;
912	STRAP 0x110; STRAP 0x111; STRAP 0x112; STRAP 0x113; STRAP 0x114; STRAP 0x115; STRAP 0x116; STRAP 0x117
913	STRAP 0x118; STRAP 0x119; STRAP 0x11a; STRAP 0x11b; STRAP 0x11c; STRAP 0x11d; STRAP 0x11e; STRAP 0x11f
914	STRAP 0x120; STRAP 0x121; STRAP 0x122; STRAP 0x123; STRAP 0x124; STRAP 0x125; STRAP 0x126; STRAP 0x127
915	STRAP 0x128; STRAP 0x129; STRAP 0x12a; STRAP 0x12b; STRAP 0x12c; STRAP 0x12d; STRAP 0x12e; STRAP 0x12f
916	STRAP 0x130; STRAP 0x131; STRAP 0x132; STRAP 0x133; STRAP 0x134; STRAP 0x135; STRAP 0x136; STRAP 0x137
917	STRAP 0x138; STRAP 0x139; STRAP 0x13a; STRAP 0x13b; STRAP 0x13c; STRAP 0x13d; STRAP 0x13e; STRAP 0x13f
918	STRAP 0x140; STRAP 0x141; STRAP 0x142; STRAP 0x143; STRAP 0x144; STRAP 0x145; STRAP 0x146; STRAP 0x147
919	STRAP 0x148; STRAP 0x149; STRAP 0x14a; STRAP 0x14b; STRAP 0x14c; STRAP 0x14d; STRAP 0x14e; STRAP 0x14f
920	STRAP 0x150; STRAP 0x151; STRAP 0x152; STRAP 0x153; STRAP 0x154; STRAP 0x155; STRAP 0x156; STRAP 0x157
921	STRAP 0x158; STRAP 0x159; STRAP 0x15a; STRAP 0x15b; STRAP 0x15c; STRAP 0x15d; STRAP 0x15e; STRAP 0x15f
922	STRAP 0x160; STRAP 0x161; STRAP 0x162; STRAP 0x163; STRAP 0x164; STRAP 0x165; STRAP 0x166; STRAP 0x167
923	STRAP 0x168; STRAP 0x169; STRAP 0x16a; STRAP 0x16b; STRAP 0x16c; STRAP 0x16d; STRAP 0x16e; STRAP 0x16f
924	STRAP 0x170; STRAP 0x171; STRAP 0x172; STRAP 0x173; STRAP 0x174; STRAP 0x175; STRAP 0x176; STRAP 0x177
925	STRAP 0x178; STRAP 0x179; STRAP 0x17a; STRAP 0x17b; STRAP 0x17c; STRAP 0x17d; STRAP 0x17e; STRAP 0x17f
926	! Traps beyond 0x17f are reserved
927	UTRAP 0x180; UTRAP 0x181; UTRAP 0x182; UTRAP 0x183; UTRAP 0x184; UTRAP 0x185; UTRAP 0x186; UTRAP 0x187
928	UTRAP 0x188; UTRAP 0x189; UTRAP 0x18a; UTRAP 0x18b; UTRAP 0x18c; UTRAP 0x18d; UTRAP 0x18e; UTRAP 0x18f
929	UTRAP 0x190; UTRAP 0x191; UTRAP 0x192; UTRAP 0x193; UTRAP 0x194; UTRAP 0x195; UTRAP 0x196; UTRAP 0x197
930	UTRAP 0x198; UTRAP 0x199; UTRAP 0x19a; UTRAP 0x19b; UTRAP 0x19c; UTRAP 0x19d; UTRAP 0x19e; UTRAP 0x19f
931	UTRAP 0x1a0; UTRAP 0x1a1; UTRAP 0x1a2; UTRAP 0x1a3; UTRAP 0x1a4; UTRAP 0x1a5; UTRAP 0x1a6; UTRAP 0x1a7
932	UTRAP 0x1a8; UTRAP 0x1a9; UTRAP 0x1aa; UTRAP 0x1ab; UTRAP 0x1ac; UTRAP 0x1ad; UTRAP 0x1ae; UTRAP 0x1af
933	UTRAP 0x1b0; UTRAP 0x1b1; UTRAP 0x1b2; UTRAP 0x1b3; UTRAP 0x1b4; UTRAP 0x1b5; UTRAP 0x1b6; UTRAP 0x1b7
934	UTRAP 0x1b8; UTRAP 0x1b9; UTRAP 0x1ba; UTRAP 0x1bb; UTRAP 0x1bc; UTRAP 0x1bd; UTRAP 0x1be; UTRAP 0x1bf
935	UTRAP 0x1c0; UTRAP 0x1c1; UTRAP 0x1c2; UTRAP 0x1c3; UTRAP 0x1c4; UTRAP 0x1c5; UTRAP 0x1c6; UTRAP 0x1c7
936	UTRAP 0x1c8; UTRAP 0x1c9; UTRAP 0x1ca; UTRAP 0x1cb; UTRAP 0x1cc; UTRAP 0x1cd; UTRAP 0x1ce; UTRAP 0x1cf
937	UTRAP 0x1d0; UTRAP 0x1d1; UTRAP 0x1d2; UTRAP 0x1d3; UTRAP 0x1d4; UTRAP 0x1d5; UTRAP 0x1d6; UTRAP 0x1d7
938	UTRAP 0x1d8; UTRAP 0x1d9; UTRAP 0x1da; UTRAP 0x1db; UTRAP 0x1dc; UTRAP 0x1dd; UTRAP 0x1de; UTRAP 0x1df
939	UTRAP 0x1e0; UTRAP 0x1e1; UTRAP 0x1e2; UTRAP 0x1e3; UTRAP 0x1e4; UTRAP 0x1e5; UTRAP 0x1e6; UTRAP 0x1e7
940	UTRAP 0x1e8; UTRAP 0x1e9; UTRAP 0x1ea; UTRAP 0x1eb; UTRAP 0x1ec; UTRAP 0x1ed; UTRAP 0x1ee; UTRAP 0x1ef
941	UTRAP 0x1f0; UTRAP 0x1f1; UTRAP 0x1f2; UTRAP 0x1f3; UTRAP 0x1f4; UTRAP 0x1f5; UTRAP 0x1f6; UTRAP 0x1f7
942	UTRAP 0x1f8; UTRAP 0x1f9; UTRAP 0x1fa; UTRAP 0x1fb; UTRAP 0x1fc; UTRAP 0x1fd; UTRAP 0x1fe; UTRAP 0x1ff
943
944#ifdef SUN4V
945
946	.align	0x8000
947	.globl	trapbase_sun4v
948trapbase_sun4v:
949	sun4v_tl0_reserved 8				! 0x0-0x7
950	VTRAP T_INST_EXCEPT, sun4v_tl0_itsb_miss	! 0x8
951	VTRAP T_TEXTFAULT, sun4v_tl0_itsb_miss		! 0x9
952	sun4v_tl0_reserved 6				! 0xa-0xf
953	TRAP T_ILLINST					! 0x10
954	TRAP T_PRIVINST					! 0x11
955	sun4v_tl0_reserved 14				! 0x12-0x1f
956	TRAP T_FPDISABLED				! 0x20
957	TRAP T_FP_IEEE_754				! 0x21
958	TRAP T_FP_OTHER					! 0x22
959	TRAP T_TAGOF					! 0x23
960	UCLEANWIN					! 0x24-0x27
961	TRAP T_DIV0					! 0x28
962	sun4v_tl0_reserved 7				! 0x29-0x2f
963	VTRAP T_DATAFAULT, sun4v_datatrap		! 0x30
964	VTRAP T_DATA_MMU_MISS, sun4v_tl0_dtsb_miss	! 0x31
965	sun4v_tl0_reserved 2				! 0x32-0x33
966	TRAP T_ALIGN					! 0x34
967	TRAP T_LDDF_ALIGN				! 0x35
968	TRAP T_STDF_ALIGN				! 0x36
969	TRAP T_PRIVACT					! 0x37
970	TRAP T_LDQF_ALIGN				! 0x38
971	TRAP T_STQF_ALIGN				! 0x39
972	sun4v_tl0_reserved 7				! 0x3a-0x40
973	HARDINT4U 1					! 0x41
974	HARDINT4U 2					! 0x42
975	HARDINT4U 3					! 0x43
976	HARDINT4U 4					! 0x44
977	HARDINT4U 5					! 0x45
978	HARDINT4U 6					! 0x46
979	HARDINT4U 7					! 0x47
980	HARDINT4U 8					! 0x48
981	HARDINT4U 9					! 0x49
982	HARDINT4U 10					! 0x4a
983	HARDINT4U 11					! 0x4b
984	HARDINT4U 12					! 0x4c
985	HARDINT4U 13					! 0x4d
986	HARDINT4U 14					! 0x4e
987	HARDINT4U 15					! 0x4f
988	sun4v_tl0_reserved 18				! 0x50-0x61
989	TRAP T_VA_WATCHPT				! 0x62
990	sun4v_tl0_reserved 9				! 0x63-0x6b
991	VTRAP 0x6c, sun4v_tl0_dtsb_prot			! 0x6c
992	sun4v_tl0_reserved 15				! 0x6d-0x7b
993	VTRAP 0x7c, sun4v_cpu_mondo			! 0x7c
994	VTRAP 0x7c, sun4v_dev_mondo			! 0x7d
995	TRAP 0x7e					! 0x7e
996	TRAP 0x7f					! 0x7f
997	USPILL64 uspill8v, ASI_AIUS			! 0x80
998	SPILL32 uspill4v, ASI_AIUS			! 0x84
999	SPILLBOTH uspill8v,uspill4v, ASI_AIUS		! 0x88
1000	sun4v_tl0_unused 4				! 0x8c
1001	sun4v_tl0_unused 4				! 0x90
1002	SPILL64 kspill8v, ASI_N				! 0x94
1003	SPILL32 kspill4v, ASI_N				! 0x98
1004	SPILLBOTH kspill8v, kspill4v, ASI_N		! 0x9c
1005	USPILL64 uspillk8v, ASI_AIUS			! 0xa0
1006	SPILL32 uspillk4v, ASI_AIUS			! 0xa4
1007	SPILLBOTH uspillk8v, uspillk4v, ASI_AIUS	! 0xa8
1008	sun4v_tl0_unused 4				! 0xac
1009	sun4v_tl0_unused 16				! 0xb0-0xbc
1010	UFILL64 ufill8v, ASI_AIUS			! 0xc0
1011	FILL32 ufill4v, ASI_AIUS			! 0xc4
1012	FILLBOTH ufill8v, ufill4v, ASI_AIUS		! 0xc8
1013	sun4v_tl0_unused 4				! 0xcc
1014	sun4v_tl0_unused 4				! 0xd0
1015	FILL64 kfill8v, ASI_N				! 0xd4
1016	FILL32 kfill4v, ASI_N				! 0xd8
1017	FILLBOTH kfill8v, kfill4v, ASI_N		! 0xdc
1018	UFILL64 ufillk8v, ASI_AIUS			! 0xe0
1019	FILL32 ufillk4v, ASI_AIUS			! 0xe4
1020	FILLBOTH ufillk8v, ufillk4v, ASI_AIUS		! 0xe8
1021	sun4v_tl0_unused 4				! 0xef
1022	sun4v_tl0_unused 16				! 0xf0-0xfc
1023	SYSCALL						! 0x100
1024	TRAP T_BREAKPOINT				! 0x101
1025	sun4v_tl0_unused 6				! 0x102-0x107
1026	SYSCALL						! 0x108
1027	SYSCALL						! 0x109
1028	sun4v_tl0_unused 54				! 0x10a-0x13f
1029	SYSCALL						! 0x140
1030	SYSCALL						! 0x141
1031	SYSCALL						! 0x142
1032	SYSCALL						! 0x143
1033	sun4v_tl0_unused 60				! 0x144-0x17f
1034	sun4v_tl0_reserved 128				! 0x180-0x1ff
1035
1036	sun4v_tl1_reserved 8				! 0x0-0x7
1037	TRAP T_INST_EXCEPT				! 0x8
1038	TRAP T_TEXTFAULT				! 0x9
1039	sun4v_tl1_reserved 6				! 0xa-0xf
1040	TRAP T_ILLINST					! 0x10
1041	TRAP T_PRIVINST					! 0x11
1042	sun4v_tl1_reserved 14				! 0x12-0x1f
1043	TRAP T_FPDISABLED				! 0x20
1044	TRAP T_FP_IEEE_754				! 0x21
1045	TRAP T_FP_OTHER					! 0x22
1046	TRAP T_TAGOF					! 0x23
1047	KCLEANWIN					! 0x24-0x27
1048	TRAP T_DIV0					! 0x28
1049	sun4v_tl1_reserved 7				! 0x29-0x2f
1050	VTRAP T_DATAFAULT, sun4v_tl1_ptbl_miss		! 0x30
1051	VTRAP T_DATA_MMU_MISS, sun4v_tl1_dtsb_miss	! 0x31
1052	VTRAP T_DATA_ERROR, sun4v_tl1_ptbl_miss
1053	VTRAP T_DATA_PROT, sun4v_tl1_ptbl_miss
1054!	sun4v_tl1_reserved 2				! 0x32-0x33
1055	VTRAP T_ALIGN, sun4v_tl1_ptbl_miss		! 0x34
1056	TRAP T_LDDF_ALIGN				! 0x35
1057	TRAP T_STDF_ALIGN				! 0x36
1058	TRAP T_PRIVACT					! 0x37
1059	TRAP T_LDQF_ALIGN				! 0x38
1060	TRAP T_STQF_ALIGN				! 0x39
1061	sun4v_tl1_reserved 40				! 0x3a-0x61
1062	TRAP T_VA_WATCHPT				! 0x62
1063	sun4v_tl1_reserved 9				! 0x63-0x6b
1064	VTRAP 0x6c, sun4v_tl1_dtsb_prot			! 0x6c
1065	sun4v_tl1_reserved 19				! 0x6d-0x7f
1066	sun4v_tl1_uspill_normal				! 0x80
1067	sun4v_tl1_uspill_normal				! 0x84
1068	sun4v_tl1_uspill_normal				! 0x88
1069	sun4v_tl1_unused 4				! 0x8c
1070	sun4v_tl1_unused 4				! 0x90
1071	sun4v_tl1_kspill_normal				! 0x94
1072	sun4v_tl1_kspill_normal				! 0x98
1073	sun4v_tl1_kspill_normal				! 0x9c
1074	sun4v_tl1_uspill_other				! 0xa0
1075	sun4v_tl1_uspill_other				! 0xa4
1076	sun4v_tl1_uspill_other				! 0xa8
1077	sun4v_tl1_unused 4				! 0xac
1078	sun4v_tl1_unused 16				! 0xb0-0xbc
1079	sun4v_tl1_unused 64				! 0xc0-0xfc
1080	sun4v_tl1_unused 128
1081	sun4v_tl1_reserved 128
1082
1083#endif
1084
1085/*
1086 * v9 machines do not have a trap window.
1087 *
1088 * When we take a trap the trap state is pushed on to the stack of trap
1089 * registers, interrupts are disabled, then we switch to an alternate set
1090 * of global registers.
1091 *
1092 * The trap handling code needs to allocate a trap frame on the kernel, or
1093 * for interrupts, the interrupt stack, save the out registers to the trap
1094 * frame, then switch to the normal globals and save them to the trap frame
1095 * too.
1096 *
1097 * XXX it would be good to save the interrupt stack frame to the kernel
1098 * stack so we wouldn't have to copy it later if we needed to handle a AST.
1099 *
1100 * Since kernel stacks are all on one page and the interrupt stack is entirely
1101 * within the locked TLB, we can use physical addressing to save out our
1102 * trap frame so we don't trap during the TRAP_SETUP operation.  There
1103 * is unfortunately no supportable method for issuing a non-trapping save.
1104 *
1105 * However, if we use physical addresses to save our trapframe, we will need
1106 * to clear out the data cache before continuing much further.
1107 *
1108 * In short, what we need to do is:
1109 *
1110 *	all preliminary processing is done using the alternate globals
1111 *
1112 *	When we allocate our trap windows we must give up our globals because
1113 *	their state may have changed during the save operation
1114 *
1115 *	we need to save our normal globals as soon as we have a stack
1116 *
1117 * Finally, we may now call C code.
1118 *
1119 * This macro will destroy %g5-%g7.  %g0-%g4 remain unchanged.
1120 */
1121
1122 /*
1123  * Other misc. design criteria:
1124  *
1125  * When taking an address fault, fault info is in the sfsr, sfar,
1126  * TLB_TAG_ACCESS registers.  If we take another address fault
1127  * while trying to handle the first fault then that information,
1128  * the only information that tells us what address we trapped on,
1129  * can potentially be lost.  This trap can be caused when allocating
1130  * a register window with which to handle the trap because the save
1131  * may try to store or restore a register window that corresponds
1132  * to part of the stack that is not mapped.  Preventing this trap,
1133  * while possible, is much too complicated to do in a trap handler,
1134  * and then we will need to do just as much work to restore the processor
1135  * window state.
1136  *
1137  * Possible solutions to the problem:
1138  *
1139  * Since we have separate AG, MG, and IG, we could have all traps
1140  * above level-1 preserve AG and use other registers.  This causes
1141  * a problem for the return from trap code which is coded to use
1142  * alternate globals only.
1143  *
1144  * We could store the trapframe and trap address info to the stack
1145  * using physical addresses.  Then we need to read it back using
1146  * physical addressing, or flush the D$.
1147  *
1148  * We could identify certain registers to hold address fault info.
1149  * This means that these registers need to be preserved across all
1150  * fault handling.  But since we only have 7 useable globals, that
1151  * really puts a cramp in our style.
1152  *
1153  * Finally, there is the issue of returning from kernel mode to user
1154  * mode.  If we need to issue a restore of a user window in kernel
1155  * mode, we need the window control registers in a user mode setup.
1156  * If the trap handlers notice the register windows are in user mode,
1157  * they will allocate a trapframe at the bottom of the kernel stack,
1158  * overwriting the frame we were trying to return to.  This means that
1159  * we must complete the restoration of all registers *before* switching
1160  * to a user-mode window configuration.
1161  *
1162  * Essentially we need to be able to write re-entrant code w/no stack.
1163  */
1164	.text
1165
1166	.macro	TRAP_SETUP stackspace
1167	GET_CPCB(%g6)
1168	sethi	%hi((\stackspace)), %g5
1169	set	USPACE-BIAS, %g7
1170	or	%g5, %lo((\stackspace)), %g5
1171
1172	sra	%g5, 0, %g5			! Sign extend the damn thing
1173
1174	add	%g6, %g7, %g6
1175	rdpr	%wstate, %g7			! Find if we're from user mode
1176
1177	sub	%g7, WSTATE_KERN, %g7		! Compare & leave in register
1178	movrz	%g7, %sp, %g6			! Select old (kernel) stack or base of kernel stack
1179	add	%g6, %g5, %g6			! Allocate a stack frame
1180	SPILL stx, %g6 + CC64FSZ + BIAS + TF_L, 8, ! save local + in
1181	save	%g6, 0, %sp			! If we fault we should come right back here
1182	stx	%i0, [%sp + CC64FSZ + BIAS + TF_O + (0*8)] ! Save out registers to trap frame
1183	stx	%i1, [%sp + CC64FSZ + BIAS + TF_O + (1*8)]
1184	stx	%i2, [%sp + CC64FSZ + BIAS + TF_O + (2*8)]
1185	stx	%i3, [%sp + CC64FSZ + BIAS + TF_O + (3*8)]
1186	stx	%i4, [%sp + CC64FSZ + BIAS + TF_O + (4*8)]
1187	stx	%i5, [%sp + CC64FSZ + BIAS + TF_O + (5*8)]
1188
1189	stx	%i6, [%sp + CC64FSZ + BIAS + TF_O + (6*8)]
1190	brz,pt	%g7, 1f			! If we were in kernel mode start saving globals
1191	 stx	%i7, [%sp + CC64FSZ + BIAS + TF_O + (7*8)]
1192
1193	! came from user mode -- switch to kernel mode stack
1194	rdpr	%canrestore, %g5		! Fixup register window state registers
1195	wrpr	%g0, 0, %canrestore
1196	wrpr	%g0, %g5, %otherwin
1197	wrpr	%g0, WSTATE_KERN, %wstate	! Enable kernel mode window traps -- now we can trap again
1198
1199	mov	CTX_PRIMARY, %g7
1200	SET_MMU_CONTEXTID(%g0, %g7)		! Switch MMU to kernel primary context
1201	sethi	%hi(KERNBASE), %g5
1202	membar	#Sync				! XXXX Should be taken care of by flush
1203	flush	%g5				! Some convenient address that won't trap
12041:
1205	.endm
1206
1207/*
1208 * Interrupt setup is almost exactly like trap setup, but we need to
1209 * go to the interrupt stack if (a) we came from user mode or (b) we
1210 * came from kernel mode on the kernel stack.
1211 *
1212 * We don't guarantee that any registers are preserved during this operation,
1213 * so we can be more efficient.
1214 */
1215	.macro	INTR_SETUP stackspace
1216	rdpr	%wstate, %g7			! Find if we're from user mode
1217
1218	GET_CPUINFO_VA(%g6)
1219	sethi	%hi(EINTSTACK-INTSTACK), %g4
1220	sub	%g6, BIAS, %g6			! Base of interrupt stack
1221	dec	%g4				! Make it into a mask
1222
1223	sub	%g6, %sp, %g1			! Offset from interrupt stack
1224	sethi	%hi((\stackspace)), %g5
1225
1226	or	%g5, %lo((\stackspace)), %g5
1227
1228	andn	%g1, %g4, %g4			! Are we out of the interrupt stack range?
1229	xor	%g7, WSTATE_KERN, %g3
1230
1231	sra	%g5, 0, %g5			! Sign extend the damn thing
1232	or	%g3, %g4, %g4			! Definitely not off the interrupt stack
1233
1234	movrz	%g4, %sp, %g6
1235
1236	add	%g6, %g5, %g6			! Allocate a stack frame
1237	SPILL stx, %g6 + CC64FSZ + BIAS + TF_L, 8,  ! save local+in to trap frame
1238	save	%g6, 0, %sp			! If we fault we should come right back here
1239	stx	%i0, [%sp + CC64FSZ + BIAS + TF_O + (0*8)] ! Save out registers to trap frame
1240	stx	%i1, [%sp + CC64FSZ + BIAS + TF_O + (1*8)]
1241	stx	%i2, [%sp + CC64FSZ + BIAS + TF_O + (2*8)]
1242	stx	%i3, [%sp + CC64FSZ + BIAS + TF_O + (3*8)]
1243	stx	%i4, [%sp + CC64FSZ + BIAS + TF_O + (4*8)]
1244
1245	stx	%i5, [%sp + CC64FSZ + BIAS + TF_O + (5*8)]
1246	stx	%i6, [%sp + CC64FSZ + BIAS + TF_O + (6*8)]
1247	brz,pt	%g3, 1f				! If we were in kernel mode start saving globals
1248	 stx	%i7, [%sp + CC64FSZ + BIAS + TF_O + (7*8)]
1249
1250	! came from user mode -- switch to kernel mode stack
1251	rdpr	%canrestore, %g5		! Fixup register window state registers
1252	wrpr	%g0, 0, %canrestore
1253	wrpr	%g0, %g5, %otherwin
1254	wrpr	%g0, WSTATE_KERN, %wstate	! Enable kernel mode window traps -- now we can trap again
1255
1256	mov	CTX_PRIMARY, %g7
1257	SET_MMU_CONTEXTID(%g0, %g7)		! Switch MMU to kernel primary context
1258	sethi	%hi(KERNBASE), %g5
1259	membar	#Sync				! XXXX Should be taken care of by flush
1260	flush	%g5				! Some convenient address that won't trap
12611:
1262	.endm
1263
1264/*
1265 * Perform an inline pseg_get(), to retrieve the address of the PTE associated
1266 * to the given virtual address.
1267 * On entry: %g3 = va (won't be modified)
1268 * Registers used: %g4, %g5, %g6
1269 * Branches to the "failure" label if translation invalid, otherwise ends
1270 * with the pte address in %g6.
1271 */
1272	.macro	PTE_GET failure
1273	sethi	%hi(0x1fff), %g6		! 8K context mask
1274	sethi	%hi(ctxbusy), %g4
1275	or	%g6, %lo(0x1fff), %g6
1276	ldx	[%g4 + %lo(ctxbusy)], %g4
1277	srax	%g3, HOLESHIFT, %g5		! Check for valid address
1278	and	%g3, %g6, %g6			! Isolate context
1279	inc	%g5				! (0 or -1) -> (1 or 0)
1280	sllx	%g6, 3, %g6			! Make it into an offset into ctxbusy
1281	ldx	[%g4+%g6], %g4			! Load up our page table.
1282	srlx	%g3, STSHIFT, %g6
1283	cmp	%g5, 1
1284	bgu,pn %xcc, \failure			! Error!
1285	 srlx	%g3, PDSHIFT, %g5
1286	and	%g6, STMASK, %g6
1287	sll	%g6, 3, %g6
1288	and	%g5, PDMASK, %g5
1289	sll	%g5, 3, %g5
1290	add	%g6, %g4, %g4
1291	ldxa	[%g4] ASI_PHYS_CACHED, %g4
1292	srlx	%g3, PTSHIFT, %g6		! Convert to ptab offset
1293	add	%g5, %g4, %g5
1294	brz,pn	%g4, \failure			! NULL entry? check somewhere else
1295	 and	%g6, PTMASK, %g6
1296	ldxa	[%g5] ASI_PHYS_CACHED, %g4
1297	sll	%g6, 3, %g6
1298	brz,pn	%g4, \failure			! NULL entry? check somewhere else
1299	 add	%g6, %g4, %g6
1300	.endm
1301
1302/*
1303 * Make sure the TSB get locked for MULTIPROCESSOR kernels.
1304 * On entry: %g2 = TSB pointer
1305 * Registers used: %g5, %g6
1306 */
1307	.macro	LOCK_TSB
1308#ifdef MULTIPROCESSOR
130997:
1310	ld	[%g2], %g6
1311	btst	(TSB_TAG_LOCKED >> 32), %g6
1312	bnz,pn	%icc, 97b			! Wait until bit is clear
1313	 or	%g6, (TSB_TAG_LOCKED >> 32), %g5
1314	casa	[%g2] ASI_NUCLEUS, %g6, %g5
1315	cmp	%g6, %g5
1316	bne,pn	%icc, 97b			! Wait until we can set it
1317	 nop
1318	membar  #StoreStore
1319#endif
1320	.endm
1321
1322/*
1323 * This is the MMU protection handler.  It's too big to fit
1324 * in the trap table so I moved it here.  It's relatively simple.
1325 * It looks up the page mapping in the page table associated with
1326 * the trapping context.  It checks to see if the S/W writable bit
1327 * is set.  If so, it sets the H/W write bit, marks the tte modified,
1328 * and enters the mapping into the MMU.  Otherwise it does a regular
1329 * data fault.
1330 */
1331	ICACHE_ALIGN
1332dmmu_write_fault:
1333	mov	TLB_TAG_ACCESS, %g3
1334	ldxa	[%g3] ASI_DMMU, %g3			! Get fault addr from Tag Target
1335	PTE_GET winfix
13361:
1337	ldxa	[%g6] ASI_PHYS_CACHED, %g4
1338	brgez,pn %g4, winfix				! Entry invalid?  Punt
1339	 or	%g4, SUN4U_TLB_MODIFY|SUN4U_TLB_ACCESS|SUN4U_TLB_W, %g7
1340		! Update the modified bit
1341
1342	btst	SUN4U_TLB_REAL_W|SUN4U_TLB_W, %g4	! Is it a ref fault?
1343	bz,pn	%xcc, winfix				! No -- really fault
1344	/* Need to check for and handle large pages. */
1345	 srlx	%g4, 61, %g5				! Isolate the size bits
1346	ldxa	[%g0] ASI_DMMU_8KPTR, %g2		! Load DMMU 8K TSB pointer
1347	andcc	%g5, 0x3, %g5				! 8K?
1348	bnz,pn	%icc, winfix				! We punt to the pmap code since we can't handle policy
1349	 ldxa	[%g0] ASI_DMMU, %g1			! Hard coded for unified 8K TSB		Load DMMU tag target register
1350	casxa	[%g6] ASI_PHYS_CACHED, %g4, %g7		!  and write it out
1351
1352	membar	#StoreLoad
1353	cmp	%g4, %g7
1354	bne,pn	%xcc, 1b
1355	 or	%g4, SUN4U_TLB_MODIFY|SUN4U_TLB_ACCESS|SUN4U_TLB_W, %g4
1356		! Update the modified bit
1357
1358	LOCK_TSB
1359	stx	%g4, [%g2 + 8]				! Update TSB entry data
1360	mov	SFSR, %g7
1361	stx	%g1, [%g2]				! Update TSB entry tag
1362	nop
1363	mov	DEMAP_PAGE_SECONDARY, %g1		! Secondary flush
1364	mov	DEMAP_PAGE_NUCLEUS, %g5			! Nucleus flush
1365	stxa	%g0, [%g7] ASI_DMMU			! clear out the fault
1366	membar	#Sync
1367	sllx	%g3, (64-13), %g7			! Need to demap old entry first
1368	andn	%g3, 0xfff, %g6
1369	movrz	%g7, %g5, %g1				! Pick one
1370	or	%g6, %g1, %g6
1371	stxa	%g6, [%g6] ASI_DMMU_DEMAP		! Do the demap
1372	membar	#Sync					! No real reason for this XXXX
1373
1374	stxa	%g4, [%g0] ASI_DMMU_DATA_IN		! Enter new mapping
1375	membar	#Sync
1376	retry
1377
1378/*
1379 * Each memory data access fault from a fast access miss handler comes here.
1380 * We will quickly check if this is an original prom mapping before going
1381 * to the generic fault handler
1382 *
1383 * We will assume that %pil is not lost so we won't bother to save it
1384 * unless we're in an interrupt handler.
1385 *
1386 * On entry:
1387 *	We are on one of the alternate set of globals
1388 *	%g1 = MMU tag target
1389 *	%g2 = 8Kptr
1390 *	%g3 = TLB TAG ACCESS
1391 *
1392 * On return:
1393 *
1394 */
1395	ICACHE_ALIGN
1396data_miss:
1397	mov	TLB_TAG_ACCESS, %g3			! Get real fault page
1398	ldxa	[%g3] ASI_DMMU, %g3			! from tag access register
1399	PTE_GET	data_nfo
14001:
1401	ldxa	[%g6] ASI_PHYS_CACHED, %g4
1402	brgez,pn %g4, data_nfo				! Entry invalid?  Punt
1403	 nop
1404
1405	btst	SUN4U_TLB_EXEC_ONLY, %g4		! no read/write allowed?
1406	bne,pn	%xcc, data_nfo				! bail
1407	 nop
1408
1409	btst	SUN4U_TLB_ACCESS, %g4			! Need to update access git?
1410	bne,pt	%xcc, 1f
1411	 or	%g4, SUN4U_TLB_ACCESS, %g7		! Update the access bit
1412	casxa	[%g6] ASI_PHYS_CACHED, %g4, %g7		!  and write it out
1413	cmp	%g4, %g7
1414	bne,pn	%xcc, 1b
1415	 or	%g4, SUN4U_TLB_ACCESS, %g4		! Update the modified bit
14161:
1417	LOCK_TSB
1418	stx	%g4, [%g2 + 8]				! Update TSB entry data
1419	stx	%g1, [%g2]				! Update TSB entry tag
1420
1421	stxa	%g4, [%g0] ASI_DMMU_DATA_IN		! Enter new mapping
1422	membar	#Sync
1423	CLRTT
1424	retry
1425	NOTREACHED
1426/*
1427 * We had a data miss but did not find a mapping.  Insert
1428 * a NFO mapping to satisfy speculative loads and return.
1429 * If this had been a real load, it will re-execute and
1430 * result in a data fault or protection fault rather than
1431 * a TLB miss.  We insert an 8K TTE with the valid and NFO
1432 * bits set.  All others should zero.  The TTE looks like this:
1433 *
1434 *	0x9000000000000000
1435 *
1436 */
1437data_nfo:
1438	sethi	%hi(0x90000000), %g4			! V(0x8)|NFO(0x1)
1439	sllx	%g4, 32, %g4
1440	stxa	%g4, [%g0] ASI_DMMU_DATA_IN		! Enter new mapping
1441	membar	#Sync
1442	CLRTT
1443	retry
1444
1445/*
1446 * Handler for making the trap window shiny clean.
1447 *
1448 * If the store that trapped was to a kernel address, panic.
1449 *
1450 * If the store that trapped was to a user address, stick it in the PCB.
1451 * Since we don't want to force user code to use the standard register
1452 * convention if we don't have to, we will not assume that %fp points to
1453 * anything valid.
1454 *
1455 * On entry:
1456 *	We are on one of the alternate set of globals
1457 *	%g1 = %tl - 1, tstate[tl-1], scratch	- local
1458 *	%g2 = %tl				- local
1459 *	%g3 = MMU tag access			- in
1460 *	%g4 = %cwp				- local
1461 *	%g5 = scratch				- local
1462 *	%g6 = cpcb				- local
1463 *	%g7 = scratch				- local
1464 *
1465 * On return:
1466 *
1467 * NB:	 remove most of this from main codepath & cleanup I$
1468 */
1469winfault:
1470	mov	TLB_TAG_ACCESS, %g3	! Get real fault page from tag access register
1471	ldxa	[%g3] ASI_DMMU, %g3	! And put it into the non-MMU alternate regs
1472winfix:
1473	rdpr	%tl, %g2
1474	subcc	%g2, 1, %g1
1475	brlez,pt	%g1, datafault	! Don't go below trap level 1
1476	 nop
1477
1478	wrpr	%g1, 0, %tl		! Pop a trap level
1479	rdpr	%tt, %g7		! Read type of prev. trap
1480	rdpr	%tstate, %g4		! Try to restore prev %cwp if we were executing a restore
1481	andn	%g7, 0x3f, %g5		!   window fill traps are all 0b 0000 11xx xxxx
1482
1483#if 1
1484	cmp	%g7, 0x68		! If we took a datafault just before this trap
1485	bne,pt	%icc, winfixfill	! our stack's probably bad so we need to switch somewhere else
1486	 nop
1487
1488	!!
1489	!! Double data fault -- bad stack?
1490	!!
1491	wrpr	%g2, %tl	! Restore trap level.
1492	sir			! Just issue a reset and don't try to recover.
1493	mov	%fp, %l6		! Save the frame pointer
1494	set	EINTSTACK+USPACE+CC64FSZ-BIAS, %fp ! Set the frame pointer to the middle of the idle stack
1495	add	%fp, -CC64FSZ, %sp	! Create a stackframe
1496	wrpr	%g0, 15, %pil		! Disable interrupts, too
1497	wrpr	%g0, %g0, %canrestore	! Our stack is hozed and our PCB
1498	wrpr	%g0, 7, %cansave	!  probably is too, so blow away
1499	ba	slowtrap		!  all our register windows.
1500	 wrpr	%g0, 0x101, %tt
1501#endif	/* 1 */
1502
1503winfixfill:
1504	cmp	%g5, 0x0c0		!   so we mask lower bits & compare to 0b 0000 1100 0000
1505	bne,pt	%icc, winfixspill	! Dump our trap frame -- we will retry the fill when the page is loaded
1506	 cmp	%g5, 0x080		!   window spill traps are all 0b 0000 10xx xxxx
1507
1508	!!
1509	!! This was a fill
1510	!!
1511	btst	TSTATE_PRIV, %g4	! User mode?
1512	and	%g4, CWP, %g5		! %g4 = %cwp of trap
1513	wrpr	%g7, 0, %tt
1514	bz,a,pt	%icc, datafault		! We were in user mode -- normal fault
1515	 wrpr	%g5, %cwp		! Restore cwp from before fill trap -- regs should now be consistent
1516
1517	/*
1518	 * We're in a pickle here.  We were trying to return to user mode
1519	 * and the restore of the user window failed, so now we have one valid
1520	 * kernel window and a user window state.  If we do a TRAP_SETUP now,
1521	 * our kernel window will be considered a user window and cause a
1522	 * fault when we try to save it later due to an invalid user address.
1523	 * If we return to where we faulted, our window state will not be valid
1524	 * and we will fault trying to enter user with our primary context of zero.
1525	 *
1526	 * What we'll do is arrange to have us return to return_from_trap so we will
1527	 * start the whole business over again.  But first, switch to a kernel window
1528	 * setup.  Let's see, canrestore and otherwin are zero.  Set WSTATE_KERN and
1529	 * make sure we're in kernel context and we're done.
1530	 */
1531
1532	wrpr	%g2, %g0, %tl				! Restore trap level
1533	cmp	%g2, 3
1534	tne	%icc, 1
1535	rdpr	%tt, %g5
1536	wrpr	%g0, 1, %tl				! Revert to TL==1 XXX what if this wasn't in rft_user? Oh well.
1537	wrpr	%g5, %g0, %tt				! Set trap type correctly
1538/*
1539 * Here we need to implement the beginning of datafault.
1540 * TRAP_SETUP expects to come from either kernel mode or
1541 * user mode with at least one valid register window.  It
1542 * will allocate a trap frame, save the out registers, and
1543 * fix the window registers to think we have one user
1544 * register window.
1545 *
1546 * However, under these circumstances we don't have any
1547 * valid register windows, so we need to clean up the window
1548 * registers to prevent garbage from being saved to either
1549 * the user stack or the PCB before calling the datafault
1550 * handler.
1551 *
1552 * We could simply jump to datafault if we could somehow
1553 * make the handler issue a `saved' instruction immediately
1554 * after creating the trapframe.
1555 *
1556 * The following is duplicated from datafault:
1557 */
1558	wrpr	%g0, PSTATE_KERN|PSTATE_AG, %pstate	! We need to save volatile stuff to AG regs
1559	wr	%g0, ASI_DMMU, %asi			! We need to re-load trap info
1560	ldxa	[%g0 + TLB_TAG_ACCESS] %asi, %g1	! Get fault address from tag access register
1561	ldxa	[SFAR] %asi, %g2			! sync virt addr; must be read first
1562	ldxa	[SFSR] %asi, %g3			! get sync fault status register
1563	stxa	%g0, [SFSR] %asi			! Clear out fault now
1564	membar	#Sync					! No real reason for this XXXX
1565
1566	TRAP_SETUP -CC64FSZ-TRAPFRAME_SIZEOF
1567	saved						! Blow away that one register window we didn't ever use.
1568	ba,a,pt	%icc, Ldatafault_internal		! Now we should return directly to user mode
1569	 nop
1570
1571winfixspill:
1572	bne,a,pt	%xcc, datafault				! Was not a spill -- handle it normally
1573	 wrpr	%g2, 0, %tl				! Restore trap level for now XXXX
1574
1575	!!
1576	!! This was a spill
1577	!!
1578#if 1
1579	btst	TSTATE_PRIV, %g4			! From user mode?
1580!	cmp	%g2, 2					! From normal execution? take a fault.
1581	wrpr	%g2, 0, %tl				! We need to load the fault type so we can
1582	rdpr	%tt, %g5				! overwrite the lower trap and get it to the fault handler
1583	wrpr	%g1, 0, %tl
1584	wrpr	%g5, 0, %tt				! Copy over trap type for the fault handler
1585	and	%g4, CWP, %g5				! find %cwp from trap
1586
1587	be,a,pt	%xcc, datafault				! Let's do a regular datafault.  When we try a save in datafault we'll
1588	 wrpr	%g5, 0, %cwp				!  return here and write out all dirty windows.
1589#endif	/* 1 */
1590	wrpr	%g2, 0, %tl				! Restore trap level for now XXXX
1591	GET_CPUINFO_VA(%g6)
1592	ldx	[%g6 + CI_CPCBPADDR], %g6
1593	wr	%g0, ASI_PHYS_CACHED, %asi		! Use ASI_PHYS_CACHED to prevent possible page faults
1594	/*
1595	 * Now save all user windows to cpcb.
1596	 */
1597	rdpr	%otherwin, %g7
1598	brnz,pt	%g7, 1f
1599	 rdpr	%canrestore, %g5
1600	rdpr	%cansave, %g1
1601	add	%g5, 1, %g7				! add the %cwp window to the list to save
1602!	movrnz	%g1, %g5, %g7				! If we're issuing a save
1603!	mov	%g5, %g7				! DEBUG
1604	wrpr	%g0, 0, %canrestore
1605	wrpr	%g7, 0, %otherwin			! Still in user mode -- need to switch to kernel mode
16061:
1607	mov	%g7, %g1
1608	add	%g6, PCB_NSAVED, %g7
1609	lduba	[%g6 + PCB_NSAVED] %asi, %g7		! Start incrementing pcb_nsaved
1610
1611#ifdef DEBUG
1612	wrpr	%g0, 5, %tl
1613#endif	/* DEBUG */
1614	mov	%g6, %g5
1615	brz,pt	%g7, winfixsave				! If it's in use, panic
1616	 saved						! frob window registers
1617
1618	/* PANIC */
1619#ifdef DEBUG
1620	wrpr	%g2, 0, %tl
1621#endif	/* DEBUG */
1622	mov	%g7, %o2
1623	rdpr	%ver, %o1
1624	sethi	%hi(2f), %o0
1625	and	%o1, CWP, %o1
1626	wrpr	%g0, %o1, %cleanwin
1627	dec	1, %o1
1628	wrpr	%g0, %o1, %cansave			! kludge away any more window problems
1629	wrpr	%g0, 0, %canrestore
1630	wrpr	%g0, 0, %otherwin
1631	or	%lo(2f), %o0, %o0
1632	wrpr	%g0, WSTATE_KERN, %wstate
1633#ifdef DEBUG
1634	set	panicstack-CC64FSZ-BIAS, %sp		! Use panic stack.
1635#else	/* DEBUG */
1636	set	estack0, %sp
1637	ldx	[%sp], %sp
1638	add	%sp, -CC64FSZ-BIAS, %sp			! Overwrite proc 0's stack.
1639#endif	/* DEBUG */
1640	ta	1; nop					! This helps out traptrace.
1641	call	panic					! This needs to be fixed properly but we should panic here
1642	 mov	%g1, %o1
1643	NOTREACHED
1644	.data
16452:
1646	.asciz	"winfault: double invalid window at %p, nsaved=%d"
1647	_ALIGN
1648	.text
16493:
1650	saved
1651	save
1652winfixsave:
1653	sllx	%g7, 7, %g5
1654	add	%g6, %g5, %g5
1655	SPILL	stxa, %g5 + PCB_RW, 8, %asi	! Save the window in the pcb
1656
1657	sllx	%g7, 3, %g5
1658	add	%g6, %g5, %g5
1659	stxa	%sp, [%g5 + PCB_RWSP] %asi
1660
1661!	rdpr	%otherwin, %g1	! Check to see if we's done
1662	dec	%g1
1663	wrpr	%g0, 7, %cleanwin			! BUGBUG -- we should not hardcode this, but I have no spare globals
1664	brnz,pt	%g1, 3b
1665	 inc	%g7					! inc pcb_nsaved
1666
1667	/* fix up pcb fields */
1668	stba	%g7, [%g6 + PCB_NSAVED] %asi		! cpcb->pcb_nsaved = n
1669	/*
1670	 * We just issued a bunch of saves, so %cansave is now 0,
1671	 * probably (if we were doing a flushw then we may have
1672	 * come in with only partially full register windows and
1673	 * it may not be 0).
1674	 *
1675	 * %g7 contains the count of the windows we just finished
1676	 * saving.
1677	 *
1678	 * What we need to do now is move some of the windows from
1679	 * %canrestore to %cansave.  What we should do is take
1680	 * min(%canrestore, %g7) and move that over to %cansave.
1681	 *
1682	 * %g7 is the number of windows we flushed, so we should
1683	 * use that as a base.  Clear out %otherwin, set %cansave
1684	 * to min(%g7, NWINDOWS - 2), set %cleanwin to %canrestore
1685	 * + %cansave and the rest follows:
1686	 *
1687	 * %otherwin = 0
1688	 * %cansave = NWINDOWS - 2 - %canrestore
1689	 */
1690	wrpr	%g0, 0, %otherwin
1691	rdpr	%canrestore, %g1
1692	sub	%g1, %g7, %g1				! Calculate %canrestore - %g7
1693	movrlz	%g1, %g0, %g1				! Clamp at zero
1694	wrpr	%g1, 0, %canrestore			! This is the new canrestore
1695	rdpr	%ver, %g5
1696	and	%g5, CWP, %g5				! NWINDOWS-1
1697	dec	%g5					! NWINDOWS-2
1698	wrpr	%g5, 0, %cleanwin			! Set cleanwin to max, since we're in-kernel
1699	sub	%g5, %g1, %g5				! NWINDOWS-2-%canrestore
1700	wrpr	%g5, 0, %cansave
1701
1702!	rdpr	%tl, %g2				! DEBUG DEBUG -- did we trap somewhere?
1703	sub	%g2, 1, %g1
1704	rdpr	%tt, %g2
1705	wrpr	%g1, 0, %tl				! We will not attempt to re-execute the spill, so dump our trap frame permanently
1706	wrpr	%g2, 0, %tt				! Move trap type from fault frame here, overwriting spill
1707
1708	/* Did we save a user or kernel window ? */
1709!	srax	%g3, 48, %g7				! User or kernel store? (TAG TARGET)
1710	sllx	%g3, (64-13), %g7			! User or kernel store? (TAG ACCESS)
1711	sethi	%hi((2*NBPG)-8), %g7
1712	brnz,pt	%g7, 1f					! User fault -- save windows to pcb
1713	 or	%g7, %lo((2*NBPG)-8), %g7
1714
1715	and	%g4, CWP, %g4				! %g4 = %cwp of trap
1716	wrpr	%g4, 0, %cwp				! Kernel fault -- restore %cwp and force and trap to debugger
1717	!!
1718	!! Here we managed to fault trying to access a kernel window
1719	!! This is a bug.  Switch to the interrupt stack if we aren't
1720	!! there already and then trap into the debugger or panic.
1721	!!
1722	sethi	%hi(EINTSTACK-BIAS), %g6
1723	btst	1, %sp
1724	bnz,pt	%icc, 0f
1725	 mov	%sp, %g1
1726	add	%sp, -BIAS, %g1
17270:
1728	or	%g6, %lo(EINTSTACK-BIAS), %g6
1729	set	(EINTSTACK-INTSTACK), %g7	! XXXXXXXXXX This assumes kernel addresses are unique from user addresses
1730	sub	%g6, %g1, %g2				! Determine if we need to switch to intr stack or not
1731	dec	%g7					! Make it into a mask
1732	andncc	%g2, %g7, %g0				! XXXXXXXXXX This assumes kernel addresses are unique from user addresses */ \
1733	movz	%xcc, %g1, %g6				! Stay on interrupt stack?
1734	add	%g6, -CC64FSZ, %g6			! Allocate a stack frame
1735	mov	%sp, %l6				! XXXXX Save old stack pointer
1736	mov	%g6, %sp
1737	ta	1; nop					! Enter debugger
1738	NOTREACHED
17391:
1740#if 1
1741	/* Now we need to blast away the D$ to make sure we're in sync */
1742dlflush1:
1743	stxa	%g0, [%g7] ASI_DCACHE_TAG
1744	brnz,pt	%g7, 1b
1745	 dec	8, %g7
1746#endif	/* 1 */
1747
1748	/*
1749	 * If we had WSTATE_KERN then we had at least one valid kernel window.
1750	 * We should re-execute the trapping save.
1751	 */
1752	rdpr	%wstate, %g3
1753	mov	%g3, %g3
1754	cmp	%g3, WSTATE_KERN
1755	bne,pt	%icc, 1f
1756	 nop
1757	retry						! Now we can complete the save
17581:
1759	/*
1760	 * Since we had a WSTATE_USER, we had no valid kernel windows.  This should
1761	 * only happen inside TRAP_SETUP or INTR_SETUP. Emulate
1762	 * the instruction, clean up the register windows, then done.
1763	 */
1764	rdpr	%cwp, %g1
1765	inc	%g1
1766	rdpr	%tstate, %g2
1767	wrpr	%g1, %cwp
1768	andn	%g2, CWP, %g2
1769	wrpr	%g1, %g2, %tstate
1770	wrpr	%g0, PSTATE_KERN|PSTATE_AG, %pstate
1771	mov	%g6, %sp
1772	done
1773
1774/*
1775 * Each memory data access fault, from user or kernel mode,
1776 * comes here.
1777 *
1778 * We will assume that %pil is not lost so we won't bother to save it
1779 * unless we're in an interrupt handler.
1780 *
1781 * On entry:
1782 *	We are on one of the alternate set of globals
1783 *	%g1 = MMU tag target
1784 *	%g2 = %tl
1785 *
1786 * On return:
1787 *
1788 */
1789datafault:
1790	wrpr	%g0, PSTATE_KERN|PSTATE_AG, %pstate	! We need to save volatile stuff to AG regs
1791	wr	%g0, ASI_DMMU, %asi			! We need to re-load trap info
1792	ldxa	[%g0 + TLB_TAG_ACCESS] %asi, %g1	! Get fault address from tag access register
1793	ldxa	[SFAR] %asi, %g2			! sync virt addr; must be read first
1794	ldxa	[SFSR] %asi, %g3			! get sync fault status register
1795	stxa	%g0, [SFSR] %asi			! Clear out fault now
1796	membar	#Sync					! No real reason for this XXXX
1797
1798	TRAP_SETUP -CC64FSZ-TRAPFRAME_SIZEOF
1799Ldatafault_internal:
1800	INCR uvmexp+V_FAULTS				! uvmexp.faults++ (clobbers %o0,%o1,%o2) should not fault
1801	mov	%g1, %o0				! Move these to the out regs so we can save the globals
1802	mov	%g2, %o4
1803	mov	%g3, %o5
1804
1805	ldxa	[%g0] ASI_AFAR, %o2			! get async fault address
1806	ldxa	[%g0] ASI_AFSR, %o3			! get async fault status
1807	mov	-1, %g7
1808	stxa	%g7, [%g0] ASI_AFSR			! And clear this out, too
1809	membar	#Sync					! No real reason for this XXXX
1810
1811	wrpr	%g0, PSTATE_KERN, %pstate		! Get back to normal globals
1812
1813	stx	%g1, [%sp + CC64FSZ + BIAS + TF_G + (1*8)]	! save g1
1814	rdpr	%tt, %o1				! find out what trap brought us here
1815	stx	%g2, [%sp + CC64FSZ + BIAS + TF_G + (2*8)]	! save g2
1816	rdpr	%tstate, %g1
1817	stx	%g3, [%sp + CC64FSZ + BIAS + TF_G + (3*8)]	! (sneak g3 in here)
1818	rdpr	%tpc, %g2
1819	stx	%g4, [%sp + CC64FSZ + BIAS + TF_G + (4*8)]	! sneak in g4
1820	rdpr	%tnpc, %g3
1821	stx	%g5, [%sp + CC64FSZ + BIAS + TF_G + (5*8)]	! sneak in g5
1822	rd	%y, %g4					! save y
1823	stx	%g6, [%sp + CC64FSZ + BIAS + TF_G + (6*8)]	! sneak in g6
1824	mov	%g2, %o7				! Make the fault address look like the return address
1825	stx	%g7, [%sp + CC64FSZ + BIAS + TF_G + (7*8)]	! sneak in g7
1826
1827	sth	%o1, [%sp + CC64FSZ + BIAS + TF_TT]
1828	stx	%g1, [%sp + CC64FSZ + BIAS + TF_TSTATE]		! set tf.tf_psr, tf.tf_pc
1829	stx	%g2, [%sp + CC64FSZ + BIAS + TF_PC]		! set tf.tf_npc
1830	stx	%g3, [%sp + CC64FSZ + BIAS + TF_NPC]
1831
1832	rdpr	%pil, %g5
1833	stb	%g5, [%sp + CC64FSZ + BIAS + TF_PIL]
1834	stb	%g5, [%sp + CC64FSZ + BIAS + TF_OLDPIL]
1835
1836#if 1
1837	rdpr	%tl, %g7
1838	dec	%g7
1839	movrlz	%g7, %g0, %g7
1840	wrpr	%g0, %g7, %tl		! Revert to kernel mode
1841#else	/* 1 */
1842	wrpr	%g0, 0, %tl		! Revert to kernel mode
1843#endif	/* 1 */
1844	/* Finish stackframe, call C trap handler */
1845	flushw						! Get this clean so we won't take any more user faults
1846
1847	GET_CPUINFO_VA(%g7)
1848
1849	/*
1850	 * Right now the registers have the following values:
1851	 *
1852	 *	%o0 -- MMU_TAG_ACCESS
1853	 *	%o1 -- TT
1854	 *	%o2 -- afar
1855	 *	%o3 -- afsr
1856	 *	%o4 -- sfar
1857	 *	%o5 -- sfsr
1858	 */
1859
1860	cmp	%o1, T_DATA_ERROR
1861	st	%g4, [%sp + CC64FSZ + BIAS + TF_Y]
1862	wr	%g0, ASI_PRIMARY_NOFAULT, %asi	! Restore default ASI
1863	be,pn	%icc, data_error
1864	 wrpr	%g0, PSTATE_INTR, %pstate	! reenable interrupts
1865
1866	mov	%o0, %o3			! (argument: trap address)
1867	mov	%g2, %o2			! (argument: trap pc)
1868	call	data_access_fault		! data_access_fault(&tf, type,
1869						!	pc, addr, sfva, sfsr)
1870	 add	%sp, CC64FSZ + BIAS, %o0	! (argument: &tf)
1871
1872data_recover:
1873	wrpr	%g0, PSTATE_KERN, %pstate		! disable interrupts
1874	b	return_from_trap			! go return
1875	 ldx	[%sp + CC64FSZ + BIAS + TF_TSTATE], %g1		! Load this for return_from_trap
1876	NOTREACHED
1877
1878data_error:
1879	call	data_access_error		! data_access_error(&tf, type,
1880						!	afva, afsr, sfva, sfsr)
1881	 add	%sp, CC64FSZ + BIAS, %o0	! (argument: &tf)
1882	ba	data_recover
1883	 nop
1884	NOTREACHED
1885
1886/*
1887 * Each memory instruction access fault from a fast access handler comes here.
1888 * We will quickly check if this is an original prom mapping before going
1889 * to the generic fault handler
1890 *
1891 * We will assume that %pil is not lost so we won't bother to save it
1892 * unless we're in an interrupt handler.
1893 *
1894 * On entry:
1895 *	We are on one of the alternate set of globals
1896 *	%g1 = MMU tag target
1897 *	%g2 = TSB entry ptr
1898 *	%g3 = TLB Tag Access
1899 *
1900 * On return:
1901 *
1902 */
1903
1904	ICACHE_ALIGN
1905instr_miss:
1906	mov	TLB_TAG_ACCESS, %g3			! Get real fault page
1907	ldxa	[%g3] ASI_IMMU, %g3			! from tag access register
1908	PTE_GET	textfault
19091:
1910	ldxa	[%g6] ASI_PHYS_CACHED, %g4
1911	brgez,pn %g4, textfault
1912	 nop
1913
1914	/* Check if it's an executable mapping. */
1915	andcc	%g4, SUN4U_TLB_EXEC, %g0
1916	bz,pn	%xcc, textfault
1917	 nop
1918
1919	or	%g4, SUN4U_TLB_ACCESS, %g7		! Update accessed bit
1920	btst	SUN4U_TLB_ACCESS, %g4			! Need to update access bit?
1921	bne,pt	%xcc, 1f
1922	 nop
1923	casxa	[%g6] ASI_PHYS_CACHED, %g4, %g7		!  and store it
1924	cmp	%g4, %g7
1925	bne,pn	%xcc, 1b
1926	 or	%g4, SUN4U_TLB_ACCESS, %g4		! Update accessed bit
19271:
1928	LOCK_TSB
1929	stx	%g4, [%g2 + 8]				! Update TSB entry data
1930	stx	%g1, [%g2]				! Update TSB entry tag
1931	stxa	%g4, [%g0] ASI_IMMU_DATA_IN		! Enter new mapping
1932	membar	#Sync
1933	CLRTT
1934	retry
1935	NOTREACHED
1936	!!
1937	!!  Check our prom mappings -- temporary
1938	!!
1939
1940/*
1941 * Each memory text access fault, from user or kernel mode,
1942 * comes here.
1943 *
1944 * We will assume that %pil is not lost so we won't bother to save it
1945 * unless we're in an interrupt handler.
1946 *
1947 * On entry:
1948 *	We are on one of the alternate set of globals
1949 *	%g1 = MMU tag target
1950 *	%g2 = %tl
1951 *	%g3 = %tl - 1
1952 *
1953 * On return:
1954 *
1955 */
1956
1957
1958textfault:
1959	wrpr	%g0, PSTATE_KERN|PSTATE_AG, %pstate	! We need to save volatile stuff to AG regs
1960	wr	%g0, ASI_IMMU, %asi
1961	ldxa	[%g0 + TLB_TAG_ACCESS] %asi, %g1	! Get fault address from tag access register
1962	ldxa	[SFSR] %asi, %g3			! get sync fault status register
1963	membar	#LoadStore
1964	stxa	%g0, [SFSR] %asi			! Clear out old info
1965	membar	#Sync					! No real reason for this XXXX
1966
1967	TRAP_SETUP -CC64FSZ-TRAPFRAME_SIZEOF
1968	INCR uvmexp+V_FAULTS				! uvmexp.faults++ (clobbers %o0,%o1,%o2)
1969
1970	mov	%g3, %o3
1971
1972	wrpr	%g0, PSTATE_KERN, %pstate		! Switch to normal globals
1973	ldxa	[%g0] ASI_AFSR, %o4			! get async fault status
1974	ldxa	[%g0] ASI_AFAR, %o5			! get async fault address
1975	mov	-1, %o0
1976	stxa	%o0, [%g0] ASI_AFSR			! Clear this out
1977	membar	#Sync					! No real reason for this XXXX
1978	stx	%g1, [%sp + CC64FSZ + BIAS + TF_G + (1*8)]	! save g1
1979	stx	%g2, [%sp + CC64FSZ + BIAS + TF_G + (2*8)]	! save g2
1980	stx	%g3, [%sp + CC64FSZ + BIAS + TF_G + (3*8)]	! (sneak g3 in here)
1981	rdpr	%tt, %o1				! Find out what caused this trap
1982	stx	%g4, [%sp + CC64FSZ + BIAS + TF_G + (4*8)]	! sneak in g4
1983	rdpr	%tstate, %g1
1984	stx	%g5, [%sp + CC64FSZ + BIAS + TF_G + (5*8)]	! sneak in g5
1985	rdpr	%tpc, %o2				! sync virt addr; must be read first
1986	stx	%g6, [%sp + CC64FSZ + BIAS + TF_G + (6*8)]	! sneak in g6
1987	rdpr	%tnpc, %g3
1988	stx	%g7, [%sp + CC64FSZ + BIAS + TF_G + (7*8)]	! sneak in g7
1989	rd	%y, %g4					! save y
1990
1991	/* Finish stackframe, call C trap handler */
1992	stx	%g1, [%sp + CC64FSZ + BIAS + TF_TSTATE]		! set tf.tf_psr, tf.tf_pc
1993	sth	%o1, [%sp + CC64FSZ + BIAS + TF_TT]! debug
1994
1995	stx	%o2, [%sp + CC64FSZ + BIAS + TF_PC]
1996	stx	%g3, [%sp + CC64FSZ + BIAS + TF_NPC]		! set tf.tf_npc
1997
1998	rdpr	%pil, %g5
1999	stb	%g5, [%sp + CC64FSZ + BIAS + TF_PIL]
2000	stb	%g5, [%sp + CC64FSZ + BIAS + TF_OLDPIL]
2001
2002	rdpr	%tl, %g7
2003	dec	%g7
2004	movrlz	%g7, %g0, %g7
2005	wrpr	%g0, %g7, %tl		! Revert to kernel mode
2006
2007	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
2008	flushw						! Get rid of any user windows so we don't deadlock
2009
2010	GET_CPUINFO_VA(%g7)
2011
2012	/* Use trap type to see what handler to call */
2013	cmp	%o1, T_INST_ERROR
2014	be,pn	%xcc, text_error
2015	 st	%g4, [%sp + CC64FSZ + BIAS + TF_Y]		! set tf.tf_y
2016
2017	wrpr	%g0, PSTATE_INTR, %pstate	! reenable interrupts
2018	call	text_access_fault		! mem_access_fault(&tf, type, pc, sfsr)
2019	 add	%sp, CC64FSZ + BIAS, %o0	! (argument: &tf)
2020text_recover:
2021	wrpr	%g0, PSTATE_KERN, %pstate	! disable interrupts
2022	b	return_from_trap		! go return
2023	 ldx	[%sp + CC64FSZ + BIAS + TF_TSTATE], %g1	! Load this for return_from_trap
2024	NOTREACHED
2025
2026text_error:
2027	wrpr	%g0, PSTATE_INTR, %pstate	! reenable interrupts
2028	call	text_access_error		! mem_access_fault(&tfm type, sfva [pc], sfsr,
2029						!		afva, afsr);
2030	 add	%sp, CC64FSZ + BIAS, %o0	! (argument: &tf)
2031	ba	text_recover
2032	 nop
2033	NOTREACHED
2034
2035#ifdef SUN4V
2036
2037/*
2038 * Perform an inline pseg_get(), to retrieve the address of the PTE associated
2039 * to the given virtual address.
2040 * On entry: %g3 = va (won't be modified), %g6 = context
2041 * Registers used: %g4,%g5, %g6
2042 * Branches to the "failure" label if translation invalid, otherwise ends
2043 * with the pte address in %g6.
2044 */
2045	.macro	PTE_GET_SUN4V	failure
2046	sethi	%hi(ctxbusy), %g4
2047	ldx	[%g4 + %lo(ctxbusy)], %g4
2048	sllx	%g6, 3, %g6			! Make it into an offset into ctxbusy
2049	ldx	[%g4+%g6], %g4			! Load up our page table.
2050
2051	srax	%g3, HOLESHIFT, %g5		! Check for valid address
2052	brz,pt	%g5, 0f				! Should be zero or -1
2053	 inc	%g5				! (0 or -1) -> (1 or 0)
2054	brnz,pn	%g5, \failure			! Error! In hole!
20550:
2056	srlx	%g3, STSHIFT, %g6
2057	and	%g6, STMASK, %g6		! Index into pm_segs
2058	sll	%g6, 3, %g6
2059	add	%g4, %g6, %g4
2060	ldxa	[%g4] ASI_PHYS_CACHED, %g4	! Load page directory pointer
2061	srlx	%g3, PDSHIFT, %g6
2062	and	%g6, PDMASK, %g6
2063	sll	%g6, 3, %g6
2064	brz,pn	%g4, \failure			! NULL entry? check somewhere else
2065	 add	%g4, %g6, %g4
2066	ldxa	[%g4] ASI_PHYS_CACHED, %g4	! Load page table pointer
2067	srlx	%g3, PTSHIFT, %g6		! Convert to ptab offset
2068	and	%g6, PTMASK, %g6
2069	sll	%g6, 3, %g6
2070	brz,pn	%g4, \failure			! NULL entry? check somewhere else
2071	 add	%g4, %g6, %g6
2072	.endm
2073
2074/*
2075 * Traps for sun4v.
2076 */
2077
2078sun4v_tl1_dtsb_miss:
2079	GET_MMFSA(%g1)
2080	add	%g1, 0x48, %g3
2081	ldxa	[%g3] ASI_PHYS_CACHED, %g3
2082	add	%g1, 0x50, %g6
2083	ldxa	[%g6] ASI_PHYS_CACHED, %g6
2084	PTE_GET_SUN4V sun4v_tl1_ptbl_miss
20851:
2086	ldxa	[%g6] ASI_PHYS_CACHED, %g4
2087	brgez,pn %g4, sun4v_tl1_ptbl_miss		! Entry invalid?  Punt
2088	 or	%g4, SUN4V_TLB_ACCESS, %g7		! Update the access bit
2089
2090	btst	SUN4V_TLB_ACCESS, %g4			! Need to update access git?
2091	bne,pt	%xcc, 2f
2092	 nop
2093	casxa	[%g6] ASI_PHYS_CACHED, %g4, %g7		!  and write it out
2094	cmp	%g4, %g7
2095	bne,pn	%xcc, 1b
2096	 or	%g4, SUN4V_TLB_ACCESS, %g4		! Update the modified bit
20972:
2098	sethi	%hi(tsb_dmmu), %g2
2099	ldx	[%g2 + %lo(tsb_dmmu)], %g2
2100
2101	mov	%g1, %g7
2102	/* Construct TSB tag word. */
2103	add	%g1, 0x50, %g6
2104	ldxa	[%g6] ASI_PHYS_CACHED, %g6
2105	mov	%g3, %g1
2106	srlx	%g1, 22, %g1
2107	sllx	%g6, 48, %g6
2108	or	%g1, %g6, %g1
2109
2110	srlx	%g3, PTSHIFT, %g3
2111	sethi	%hi(tsbsize), %g5
2112	mov	512, %g6
2113	ld	[%g5 + %lo(tsbsize)], %g5
2114	sllx	%g6, %g5, %g5
2115	sub	%g5, 1, %g5
2116	and	%g3, %g5, %g3
2117	sllx	%g3, 4, %g3
2118	add	%g2, %g3, %g2
2119
2120	LOCK_TSB
2121	stx	%g4, [%g2 + 8]
2122	stx	%g1, [%g2]		! unlock
2123
2124	retry
2125	NOTREACHED
2126
2127sun4v_tl1_dtsb_prot:
2128	GET_MMFSA(%g1)
2129	add	%g1, 0x48, %g3
2130	ldxa	[%g3] ASI_PHYS_CACHED, %g3
2131	add	%g1, 0x50, %g6
2132	ldxa	[%g6] ASI_PHYS_CACHED, %g6
2133	PTE_GET_SUN4V sun4v_tl1_ptbl_miss
21341:
2135	ldxa	[%g6] ASI_PHYS_CACHED, %g4
2136	brgez,pn %g4, sun4v_tl1_ptbl_miss		! Entry invalid?  Punt
2137	 or	%g4, SUN4V_TLB_MODIFY|SUN4V_TLB_ACCESS|SUN4V_TLB_W, %g7
2138		! Update the modified bit
2139
2140#	btst	SUN4V_TLB_REAL_W|SUN4V_TLB_W, %g4	! Is it a ref fault?
2141	mov	1, %g2
2142	sllx	%g2, 61, %g2
2143	or	%g2, SUN4V_TLB_W, %g2
2144	btst	%g2, %g4
2145	bz,pn	%xcc, sun4v_tl1_ptbl_miss		! No -- really fault
2146	 nop
2147	casxa	[%g6] ASI_PHYS_CACHED, %g4, %g7		!  and write it out
2148	cmp	%g4, %g7
2149	bne,pn	%xcc, 1b
2150	 or	%g4, SUN4V_TLB_MODIFY|SUN4V_TLB_ACCESS|SUN4V_TLB_W, %g4
2151		! Update the modified bit
21522:
2153	sethi	%hi(tsb_dmmu), %g2
2154	ldx	[%g2 + %lo(tsb_dmmu)], %g2
2155
2156	mov	%g1, %g7
2157	/* Construct TSB tag word. */
2158	add	%g1, 0x50, %g6
2159	ldxa	[%g6] ASI_PHYS_CACHED, %g6
2160	mov	%g3, %g1
2161	srlx	%g1, 22, %g1
2162	sllx	%g6, 48, %g6
2163	or	%g1, %g6, %g1
2164
2165	srlx	%g3, PTSHIFT, %g3
2166	sethi	%hi(tsbsize), %g5
2167	mov	512, %g6
2168	ld	[%g5 + %lo(tsbsize)], %g5
2169	sllx	%g6, %g5, %g5
2170	sub	%g5, 1, %g5
2171	and	%g3, %g5, %g3
2172	sllx	%g3, 4, %g3
2173	add	%g2, %g3, %g2
2174
2175	LOCK_TSB
2176	stx	%g4, [%g2 + 8]
2177	stx	%g1, [%g2]		! unlock
2178
2179	mov	%o0, %g1
2180	mov	%o1, %g2
2181	mov	%o2, %g3
2182
2183#define MAP_DTLB	0x1
2184#define MAP_ITLB	0x2
2185#define MMU_UNMAP_ADDR	0x84
2186	add	%g7, 0x48, %o0
2187	ldxa	[%o0] ASI_PHYS_CACHED, %o0
2188	add	%g7, 0x50, %o1
2189	ldxa	[%o1] ASI_PHYS_CACHED, %o1
2190	mov	MAP_DTLB, %o2
2191	ta	MMU_UNMAP_ADDR
2192
2193	mov	%g1, %o0
2194	mov	%g2, %o1
2195	mov	%g3, %o2
2196
2197	retry
2198	NOTREACHED
2199
2200
2201sun4v_tl1_ptbl_miss:
2202	rdpr	%tpc, %g1
2203
2204	set	rft_user_fault_start, %g2
2205	cmp	%g1, %g2
2206	blu,pt	%xcc, 1f
2207	 set	rft_user_fault_end, %g2
2208	cmp	%g1, %g2
2209	bgeu,pt	%xcc, 1f
2210	 nop
2211
2212	/* Fixup %cwp. */
2213	rdpr	%cwp, %g1
2214	inc	%g1
2215	wrpr	%g1, %cwp
2216
2217	rdpr	%tt, %g1
2218	wrpr	1, %tl
2219	wrpr	%g1, %tt
2220	rdpr	%cwp, %g1
2221	set	TSTATE_KERN, %g2
2222	wrpr	%g1, %g2, %tstate
2223	set	return_from_trap, %g1
2224	wrpr	%g1, %tpc
2225	add	%g1, 4, %g1
2226	wrpr	%g1, %tnpc
2227	wrpr	%g0, 1, %gl
2228
2229	ba,pt %xcc, sun4v_datatrap
2230	 wrpr	WSTATE_KERN, %wstate
2231
22321:
2233	rdpr	%tstate, %g3
2234	rdpr	%tt, %g4
2235
2236	rdpr	%tl, %g1
2237	dec	%g1
2238	wrpr	%g1, %tl
2239	rdpr	%tt, %g2
2240	inc	%g1
2241	wrpr	%g1, %tl
2242
2243	wrpr	%g0, %g3, %tstate
2244	wrpr	%g0, %g4, %tt
2245
2246	andn	%g2, 0x00f, %g3
2247	cmp	%g3, 0x080
2248	be,pn	%icc, flush_normals
2249	 nop
2250	cmp	%g3, 0x0a0
2251	be,pn	%icc, flush_others
2252	 nop
2253	cmp	%g3, 0x0c0
2254	be,pn	%icc, ufill_trap
2255	 nop
2256
2257	db_enter()
2258	NOTREACHED
2259
2260flush_others:
2261	set	pcbspill_others, %g1
2262	wrpr	%g1, %tnpc
2263	done
2264	NOTREACHED
2265
2266flush_normals:
2267ufill_trap:
2268	/*
2269	 * Rearrange our trap state such that it appears as if we got
2270	 * this trap directly from user mode.  Then process it at TL = 1.
2271	 * We'll take the spill/fill trap again once we return to user mode.
2272	 */
2273	rdpr	%tt, %g1
2274	rdpr	%tstate, %g3
2275	wrpr	%g0, 1, %tl
2276	wrpr	%g0, %g1, %tt
2277	rdpr	%tstate, %g2
2278	wrpr	%g0, 2, %tl
2279	and	%g2, TSTATE_CWP, %g2
2280	andn	%g3, TSTATE_CWP, %g3
2281	wrpr	%g2, %g3, %tstate
2282	set	sun4v_datatrap, %g4
2283	wrpr	%g0, %g4, %tnpc
2284	done
2285
2286sun4v_tl0_dtsb_miss:
2287	GET_MMFSA(%g1)
2288	add	%g1, 0x48, %g3
2289	ldxa	[%g3] ASI_PHYS_CACHED, %g3
2290	add	%g1, 0x50, %g6
2291	ldxa	[%g6] ASI_PHYS_CACHED, %g6
2292	PTE_GET_SUN4V sun4v_datatrap
22931:
2294	ldxa	[%g6] ASI_PHYS_CACHED, %g4
2295	brgez,pn %g4, sun4v_datatrap			! Entry invalid?  Punt
2296	 or	%g4, SUN4V_TLB_ACCESS, %g7		! Update the access bit
2297
2298	btst	SUN4V_TLB_ACCESS, %g4			! Need to update access git?
2299	bne,pt	%xcc, 2f
2300	 nop
2301	casxa	[%g6] ASI_PHYS_CACHED, %g4, %g7		!  and write it out
2302	cmp	%g4, %g7
2303	bne,pn	%xcc, 1b
2304	 or	%g4, SUN4V_TLB_ACCESS, %g4		! Update the modified bit
23052:
2306	sethi	%hi(tsb_dmmu), %g2
2307	ldx	[%g2 + %lo(tsb_dmmu)], %g2
2308
2309	mov	%g1, %g7
2310	/* Construct TSB tag word. */
2311	add	%g1, 0x50, %g6
2312	ldxa	[%g6] ASI_PHYS_CACHED, %g6
2313	mov	%g3, %g1
2314	srlx	%g1, 22, %g1
2315	sllx	%g6, 48, %g6
2316	or	%g1, %g6, %g1
2317
2318	srlx	%g3, PTSHIFT, %g3
2319	sethi	%hi(tsbsize), %g5
2320	mov	512, %g6
2321	ld	[%g5 + %lo(tsbsize)], %g5
2322	sllx	%g6, %g5, %g5
2323	sub	%g5, 1, %g5
2324	and	%g3, %g5, %g3
2325	sllx	%g3, 4, %g3
2326	add	%g2, %g3, %g2
2327
2328	LOCK_TSB
2329	stx	%g4, [%g2 + 8]
2330	stx	%g1, [%g2]		! unlock
2331
2332	retry
2333	NOTREACHED
2334
2335sun4v_tl0_dtsb_prot:
2336	GET_MMFSA(%g1)
2337	add	%g1, 0x48, %g3
2338	ldxa	[%g3] ASI_PHYS_CACHED, %g3
2339	add	%g1, 0x50, %g6
2340	ldxa	[%g6] ASI_PHYS_CACHED, %g6
2341	PTE_GET_SUN4V sun4v_datatrap
23421:
2343	ldxa	[%g6] ASI_PHYS_CACHED, %g4
2344	brgez,pn %g4, sun4v_datatrap			! Entry invalid?  Punt
2345	 or	%g4, SUN4V_TLB_MODIFY|SUN4V_TLB_ACCESS|SUN4V_TLB_W, %g7
2346		! Update the modified bit
2347
2348#	btst	SUN4V_TLB_REAL_W|SUN4V_TLB_W, %g4	! Is it a ref fault?
2349	mov	1, %g2
2350	sllx	%g2, 61, %g2
2351	or	%g2, SUN4V_TLB_W, %g2
2352	btst	%g2, %g4
2353	bz,pn	%xcc, sun4v_datatrap			! No -- really fault
2354	 nop
2355	casxa	[%g6] ASI_PHYS_CACHED, %g4, %g7		!  and write it out
2356	cmp	%g4, %g7
2357	bne,pn	%xcc, 1b
2358	 or	%g4, SUN4V_TLB_MODIFY|SUN4V_TLB_ACCESS|SUN4V_TLB_W, %g4
2359		! Update the modified bit
23602:
2361	sethi	%hi(tsb_dmmu), %g2
2362	ldx	[%g2 + %lo(tsb_dmmu)], %g2
2363
2364	mov	%g1, %g7
2365	/* Construct TSB tag word. */
2366	add	%g1, 0x50, %g6
2367	ldxa	[%g6] ASI_PHYS_CACHED, %g6
2368	mov	%g3, %g1
2369	srlx	%g1, 22, %g1
2370	sllx	%g6, 48, %g6
2371	or	%g1, %g6, %g1
2372
2373	srlx	%g3, PTSHIFT, %g3
2374	sethi	%hi(tsbsize), %g5
2375	mov	512, %g6
2376	ld	[%g5 + %lo(tsbsize)], %g5
2377	sllx	%g6, %g5, %g5
2378	sub	%g5, 1, %g5
2379	and	%g3, %g5, %g3
2380	sllx	%g3, 4, %g3
2381	add	%g2, %g3, %g2
2382
2383	LOCK_TSB
2384	stx	%g4, [%g2 + 8]
2385	stx	%g1, [%g2]		! unlock
2386
2387	mov	%o0, %g1
2388	mov	%o1, %g2
2389	mov	%o2, %g3
2390
2391#define MAP_DTLB	0x1
2392#define MMU_UNMAP_ADDR	0x84
2393	add	%g7, 0x48, %o0
2394	ldxa	[%o0] ASI_PHYS_CACHED, %o0
2395	add	%g7, 0x50, %o1
2396	ldxa	[%o1] ASI_PHYS_CACHED, %o1
2397	mov	MAP_DTLB, %o2
2398	ta	MMU_UNMAP_ADDR
2399
2400	mov	%g1, %o0
2401	mov	%g2, %o1
2402	mov	%g3, %o2
2403
2404	retry
2405	NOTREACHED
2406
2407sun4v_tl0_itsb_miss:
2408	GET_MMFSA(%g1)
2409	add	%g1, 0x8, %g3
2410	ldxa	[%g3] ASI_PHYS_CACHED, %g3
2411	add	%g1, 0x10, %g6
2412	ldxa	[%g6] ASI_PHYS_CACHED, %g6
2413	PTE_GET_SUN4V sun4v_texttrap
24141:
2415	ldxa	[%g6] ASI_PHYS_CACHED, %g4
2416	brgez,pn %g4, sun4v_texttrap			! Entry invalid?  Punt
2417	 or	%g4, SUN4V_TLB_ACCESS, %g7		! Update the access bit
2418
2419	btst	SUN4V_TLB_EXEC, %g4
2420	bz,pn	%xcc, sun4v_texttrap
2421	 nop
2422	btst	SUN4V_TLB_ACCESS, %g4			! Need to update access git?
2423	bne,pt	%xcc, 2f
2424	 nop
2425	casxa	[%g6] ASI_PHYS_CACHED, %g4, %g7		!  and write it out
2426	cmp	%g4, %g7
2427	bne,pn	%xcc, 1b
2428	 or	%g4, SUN4V_TLB_ACCESS, %g4		! Update the modified bit
24292:
2430	sethi	%hi(tsb_dmmu), %g2
2431	ldx	[%g2 + %lo(tsb_dmmu)], %g2
2432
2433	mov	%g1, %g7
2434	/* Construct TSB tag word. */
2435	add	%g1, 0x10, %g6
2436	ldxa	[%g6] ASI_PHYS_CACHED, %g6
2437	mov	%g3, %g1
2438	srlx	%g1, 22, %g1
2439	sllx	%g6, 48, %g6
2440	or	%g1, %g6, %g1
2441
2442	srlx	%g3, PTSHIFT, %g3
2443	sethi	%hi(tsbsize), %g5
2444	mov	512, %g6
2445	ld	[%g5 + %lo(tsbsize)], %g5
2446	sllx	%g6, %g5, %g5
2447	sub	%g5, 1, %g5
2448	and	%g3, %g5, %g3
2449	sllx	%g3, 4, %g3
2450	add	%g2, %g3, %g2
2451
2452	LOCK_TSB
2453	stx	%g4, [%g2 + 8]
2454	stx	%g1, [%g2]		! unlock
2455
2456	retry
2457	NOTREACHED
2458
2459kspill_normal:
2460	wrpr	0x90, %tt
2461
2462	GET_CPUINFO_PA(%g1)
2463	wr	%g0, ASI_PHYS_CACHED, %asi
2464
2465	SPILL	stxa, %g1 + CI_RW, 8, %asi
2466	saved
2467
2468	stxa	%sp, [%g1 + CI_RWSP] %asi
2469
2470	retry
2471	NOTREACHED
2472
2473/*
2474 * Spill user windows into the PCB.
2475 */
2476pcbspill_normals:
2477	ba,pt	%xcc, pcbspill
2478	 wrpr	0x80, %tt
2479
2480pcbspill_others:
2481	wrpr	0xa0, %tt
2482
2483pcbspill:
2484	GET_CPUINFO_PA(%g6)
2485	wr	%g0, ASI_PHYS_CACHED, %asi		! Use ASI_PHYS_CACHED to prevent possible page faults
2486	ldxa	[%g6 + CI_CPCBPADDR] %asi, %g6
2487
2488	lduba	[%g6 + PCB_NSAVED] %asi, %g7
2489	sllx	%g7, 7, %g5
2490	add	%g6, %g5, %g5
2491	SPILL	stxa, %g5 + PCB_RW, 8, %asi
2492	saved
2493
2494	sllx	%g7, 3, %g5
2495	add	%g6, %g5, %g5
2496	stxa	%sp, [%g5 + PCB_RWSP] %asi
2497
2498	inc	%g7
2499	stba	%g7, [%g6 + PCB_NSAVED] %asi
2500
2501	retry
2502	NOTREACHED
2503
2504	/*
2505	 * Copy the trap-time register window to the stack, if needed,
2506	 * and reset ci_rwsp afterwards.
2507	 * On entry: %g7 is curcpu
2508	 * Registers used: %g2 and all locals
2509	 */
2510	.macro	SUN4V_PUSH_RWINDOW
2511	ldx	[%g7 + CI_RWSP], %g2
2512	brz,pt	%g2, 98f
2513	 nop
2514
2515	ldx	[%g7 + CI_RW + (0*8)], %l0
2516	ldx	[%g7 + CI_RW + (1*8)], %l1
2517	ldx	[%g7 + CI_RW + (2*8)], %l2
2518	ldx	[%g7 + CI_RW + (3*8)], %l3
2519	ldx	[%g7 + CI_RW + (4*8)], %l4
2520	ldx	[%g7 + CI_RW + (5*8)], %l5
2521	ldx	[%g7 + CI_RW + (6*8)], %l6
2522	ldx	[%g7 + CI_RW + (7*8)], %l7
2523	stx	%l0, [%g2 + BIAS + (0*8)]
2524	stx	%l1, [%g2 + BIAS + (1*8)]
2525	stx	%l2, [%g2 + BIAS + (2*8)]
2526	stx	%l3, [%g2 + BIAS + (3*8)]
2527	stx	%l4, [%g2 + BIAS + (4*8)]
2528	stx	%l5, [%g2 + BIAS + (5*8)]
2529	stx	%l6, [%g2 + BIAS + (6*8)]
2530	stx	%l7, [%g2 + BIAS + (7*8)]
2531	ldx	[%g7 + CI_RW + (8*8)], %l0
2532	ldx	[%g7 + CI_RW + (9*8)], %l1
2533	ldx	[%g7 + CI_RW + (10*8)], %l2
2534	ldx	[%g7 + CI_RW + (11*8)], %l3
2535	ldx	[%g7 + CI_RW + (12*8)], %l4
2536	ldx	[%g7 + CI_RW + (13*8)], %l5
2537	ldx	[%g7 + CI_RW + (14*8)], %l6
2538	ldx	[%g7 + CI_RW + (15*8)], %l7
2539	stx	%l0, [%g2 + BIAS + (8*8)]
2540	stx	%l1, [%g2 + BIAS + (9*8)]
2541	stx	%l2, [%g2 + BIAS + (10*8)]
2542	stx	%l3, [%g2 + BIAS + (11*8)]
2543	stx	%l4, [%g2 + BIAS + (12*8)]
2544	stx	%l5, [%g2 + BIAS + (13*8)]
2545	stx	%l6, [%g2 + BIAS + (14*8)]
2546	stx	%l7, [%g2 + BIAS + (15*8)]
2547
2548	stx	%g0, [%g7 + CI_RWSP]
254998:
2550	.endm
2551
2552sun4v_datatrap:
2553	GET_MMFSA(%g3)
2554	add	%g3, 0x48, %g1
2555	ldxa	[%g1] ASI_PHYS_CACHED, %g1
2556	add	%g3, 0x50, %g2
2557	ldxa	[%g2] ASI_PHYS_CACHED, %g2
2558
2559	TRAP_SETUP -CC64FSZ-TRAPFRAME_SIZEOF
2560	or	%g1, %g2, %o3
2561	mov	%g1, %o4
2562
2563	rdpr	%tt, %g4
2564	rdpr	%tstate, %g1
2565	rdpr	%tpc, %g2
2566	rdpr	%tnpc, %g3
2567
2568	stx	%g1, [%sp + CC64FSZ + BIAS + TF_TSTATE]
2569	mov	%g4, %o1		! (type)
2570	stx	%g2, [%sp + CC64FSZ + BIAS + TF_PC]
2571	rd	%y, %g5
2572	stx	%g3, [%sp + CC64FSZ + BIAS + TF_NPC]
2573	st	%g5, [%sp + CC64FSZ + BIAS + TF_Y]
2574	mov	%g2, %o2		! (pc)
2575	sth	%o1, [%sp + CC64FSZ + BIAS + TF_TT]! debug
2576
2577	cmp	%o1, T_FDMMU_PROT
2578	bne,pn	%icc, 1f
2579	 mov	SFSR_FV, %o5
2580	or	%o5, SFSR_W, %o5
2581
25821:
2583	wrpr	%g0, PSTATE_KERN, %pstate		! Get back to normal globals
2584	wrpr	%g0, 0, %gl
2585
2586	stx	%g1, [%sp + CC64FSZ + BIAS + TF_G + (1*8)]
2587	stx	%g2, [%sp + CC64FSZ + BIAS + TF_G + (2*8)]
2588	add	%sp, CC64FSZ + BIAS, %o0		! (&tf)
2589	stx	%g3, [%sp + CC64FSZ + BIAS + TF_G + (3*8)]
2590	stx	%g4, [%sp + CC64FSZ + BIAS + TF_G + (4*8)]
2591	stx	%g5, [%sp + CC64FSZ + BIAS + TF_G + (5*8)]
2592	rdpr	%pil, %g5
2593	stx	%g6, [%sp + CC64FSZ + BIAS + TF_G + (6*8)]
2594	stx	%g7, [%sp + CC64FSZ + BIAS + TF_G + (7*8)]
2595	stb	%g5, [%sp + CC64FSZ + BIAS + TF_PIL]
2596	stb	%g5, [%sp + CC64FSZ + BIAS + TF_OLDPIL]
2597
2598	/*
2599	 * Phew, ready to enable traps and call C code.
2600	 */
2601	wrpr	%g0, 0, %tl
2602
2603	GET_CPUINFO_VA(%g7)
2604	SUN4V_PUSH_RWINDOW
2605
2606	wr	%g0, ASI_PRIMARY_NOFAULT, %asi	! Restore default ASI
2607	wrpr	%g0, PSTATE_INTR, %pstate	! traps on again
2608	call	data_access_fault		! data_acces_fault(tf, type, ...)
2609	 nop
2610
2611	ba,a,pt	%icc, return_from_trap
2612	 nop
2613	NOTREACHED
2614
2615sun4v_texttrap:
2616	GET_MMFSA(%g3)
2617	add	%g3, 0x08, %g1
2618	ldxa	[%g1] ASI_PHYS_CACHED, %g1
2619	add	%g3, 0x10, %g2
2620	ldxa	[%g2] ASI_PHYS_CACHED, %g2
2621
2622	TRAP_SETUP -CC64FSZ-TRAPFRAME_SIZEOF
2623
2624	or	%g1, %g2, %o2
2625	clr	%o3
2626
2627	rdpr	%tt, %g4
2628	rdpr	%tstate, %g1
2629	rdpr	%tpc, %g2
2630	rdpr	%tnpc, %g3
2631
2632	stx	%g1, [%sp + CC64FSZ + BIAS + TF_TSTATE]
2633	mov	%g4, %o1		! (type)
2634	stx	%g2, [%sp + CC64FSZ + BIAS + TF_PC]
2635	rd	%y, %g5
2636	stx	%g3, [%sp + CC64FSZ + BIAS + TF_NPC]
2637	st	%g5, [%sp + CC64FSZ + BIAS + TF_Y]
2638	sth	%o1, [%sp + CC64FSZ + BIAS + TF_TT]! debug
2639
2640	wrpr	%g0, PSTATE_KERN, %pstate		! Get back to normal globals
2641	wrpr	%g0, 0, %gl
2642
2643	stx	%g1, [%sp + CC64FSZ + BIAS + TF_G + (1*8)]
2644	stx	%g2, [%sp + CC64FSZ + BIAS + TF_G + (2*8)]
2645	add	%sp, CC64FSZ + BIAS, %o0		! (&tf)
2646	stx	%g3, [%sp + CC64FSZ + BIAS + TF_G + (3*8)]
2647	stx	%g4, [%sp + CC64FSZ + BIAS + TF_G + (4*8)]
2648	stx	%g5, [%sp + CC64FSZ + BIAS + TF_G + (5*8)]
2649	rdpr	%pil, %g5
2650	stx	%g6, [%sp + CC64FSZ + BIAS + TF_G + (6*8)]
2651	stx	%g7, [%sp + CC64FSZ + BIAS + TF_G + (7*8)]
2652	stb	%g5, [%sp + CC64FSZ + BIAS + TF_PIL]
2653	stb	%g5, [%sp + CC64FSZ + BIAS + TF_OLDPIL]
2654
2655	/*
2656	 * Phew, ready to enable traps and call C code.
2657	 */
2658	wrpr	%g0, 0, %tl
2659
2660	GET_CPUINFO_VA(%g7)
2661	SUN4V_PUSH_RWINDOW
2662
2663	wr	%g0, ASI_PRIMARY_NOFAULT, %asi	! Restore default ASI
2664	wrpr	%g0, PSTATE_INTR, %pstate	! traps on again
2665	call	text_access_fault		! text_access_fault(tf, type, ...)
2666	 nop
2667
2668	ba,a,pt	%icc, return_from_trap
2669	 nop
2670	NOTREACHED
2671
2672	.align 8
2673NENTRY(sun4v_tlb_flush_pte)
2674	ba	hv_mmu_demap_page
2675	 mov	MAP_ITLB|MAP_DTLB, %o2
2676END(sun4v_tlb_flush_pte)
2677
2678	.align 8
2679NENTRY(sun4v_tlb_flush_ctx)
2680	ba	hv_mmu_demap_ctx
2681	 mov	MAP_ITLB|MAP_DTLB, %o1
2682END(sun4v_tlb_flush_ctx)
2683
2684#endif	/* SUN4V */
2685
2686/*
2687 * We're here because we took an alignment fault in NUCLEUS context.
2688 * This could be a kernel bug or it could be due to saving or restoring
2689 * a user window to/from an invalid stack pointer.
2690 *
2691 * If the latter is the case, we could try to emulate unaligned accesses,
2692 * but we really don't know where to store the registers since we can't
2693 * determine if there's a stack bias.  Or we could store all the regs
2694 * into the PCB and punt, until the user program uses up all the CPU's
2695 * register windows and we run out of places to store them.  So for
2696 * simplicity we'll just blow them away and enter the trap code which
2697 * will generate a bus error.  Debugging the problem will be a bit
2698 * complicated since lots of register windows will be lost, but what
2699 * can we do?
2700 */
2701checkalign:
2702	rdpr	%tl, %g2
2703	subcc	%g2, 1, %g1
2704	bneg,pn	%icc, slowtrap		! Huh?
2705	 nop
2706
2707	wrpr	%g1, 0, %tl
2708	rdpr	%tt, %g7
2709	rdpr	%tstate, %g4
2710	andn	%g7, 0x07f, %g5		! Window spill traps are all 0b 0000 10xx xxxx
2711	cmp	%g5, 0x080		! Window fill traps are all 0b 0000 11xx xxxx
2712	bne,a,pn %icc, slowtrap
2713	 nop
2714
2715	/*
2716         * %g1 -- current tl
2717	 * %g2 -- original tl
2718	 * %g4 -- tstate
2719         * %g7 -- tt
2720	 */
2721
2722	and	%g4, CWP, %g5
2723	wrpr	%g5, %cwp		! Go back to the original register window
2724
2725	rdpr	%otherwin, %g6
2726	rdpr	%cansave, %g5
2727	add	%g5, %g6, %g5
2728	wrpr	%g0, 0, %otherwin	! Just blow away all user windows
2729	wrpr	%g5, 0, %cansave
2730	rdpr	%canrestore, %g5
2731	wrpr	%g5, 0, %cleanwin
2732
2733	wrpr	%g0, T_ALIGN, %tt	! This was an alignment fault
2734	/*
2735	 * Now we need to determine if this was a userland store/load or not.
2736	 * Userland stores occur in anything other than the kernel spill/fill
2737	 * handlers (trap type 0x9x/0xdx).
2738	 */
2739	and	%g7, 0xff0, %g5
2740	cmp	%g5, 0x90
2741	bz,pn	%icc, slowtrap
2742	 nop
2743	cmp	%g5, 0xd0
2744	bz,pn	%icc, slowtrap
2745	 nop
2746	and	%g7, 0xfc0, %g5
2747	wrpr	%g5, 0, %tt
2748	ba,a,pt	%icc, slowtrap
2749	 nop
2750
2751/*
2752 * slowtrap() builds a trap frame and calls trap().
2753 * This is called `slowtrap' because it *is*....
2754 * We have to build a full frame for ptrace(), for instance.
2755 *
2756 * Registers:
2757 *
2758 */
2759slowtrap:
2760	TRAP_SETUP -CC64FSZ-TRAPFRAME_SIZEOF
2761
2762	rdpr	%tt, %g4
2763	rdpr	%tstate, %g1
2764	rdpr	%tpc, %g2
2765	rdpr	%tnpc, %g3
2766
2767Lslowtrap_reenter:
2768	stx	%g1, [%sp + CC64FSZ + BIAS + TF_TSTATE]
2769	mov	%g4, %o1		! (type)
2770	stx	%g2, [%sp + CC64FSZ + BIAS + TF_PC]
2771	rd	%y, %g5
2772	stx	%g3, [%sp + CC64FSZ + BIAS + TF_NPC]
2773	mov	%g1, %o3		! (pstate)
2774	st	%g5, [%sp + CC64FSZ + BIAS + TF_Y]
2775	mov	%g2, %o2		! (pc)
2776	sth	%o1, [%sp + CC64FSZ + BIAS + TF_TT]! debug
2777
2778	NORMAL_GLOBALS()
2779
2780	stx	%g1, [%sp + CC64FSZ + BIAS + TF_G + (1*8)]
2781	stx	%g2, [%sp + CC64FSZ + BIAS + TF_G + (2*8)]
2782	add	%sp, CC64FSZ + BIAS, %o0		! (&tf)
2783	stx	%g3, [%sp + CC64FSZ + BIAS + TF_G + (3*8)]
2784	stx	%g4, [%sp + CC64FSZ + BIAS + TF_G + (4*8)]
2785	stx	%g5, [%sp + CC64FSZ + BIAS + TF_G + (5*8)]
2786	rdpr	%pil, %g5
2787	stx	%g6, [%sp + CC64FSZ + BIAS + TF_G + (6*8)]
2788	stx	%g7, [%sp + CC64FSZ + BIAS + TF_G + (7*8)]
2789	stb	%g5, [%sp + CC64FSZ + BIAS + TF_PIL]
2790	stb	%g5, [%sp + CC64FSZ + BIAS + TF_OLDPIL]
2791
2792	/*
2793	 * Phew, ready to enable traps and call C code.
2794	 */
2795	wrpr	%g0, 0, %tl
2796
2797	GET_CPUINFO_VA(%g7)
2798#ifdef SUN4V
2799	SUN4V_PUSH_RWINDOW
2800#endif
2801
2802	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
2803	wrpr	%g0, PSTATE_INTR, %pstate	! traps on again
2804	call	trap				! trap(tf, type, pc, pstate)
2805	 nop
2806
2807	ba,a,pt	%icc, return_from_trap
2808	 nop
2809	NOTREACHED
2810
2811/*
2812 * Do a `software' trap by re-entering the trap code, possibly first
2813 * switching from interrupt stack to kernel stack.  This is used for
2814 * scheduling and signal ASTs (which generally occur from softclock or
2815 * tty or net interrupts).
2816 *
2817 * We enter with the trap type in %g1.  All we have to do is jump to
2818 * Lslowtrap_reenter above, but maybe after switching stacks....
2819 *
2820 * We should be running alternate globals.  The normal globals and
2821 * out registers were just loaded from the old trap frame.
2822 *
2823 *	Input Params:
2824 *	%g1 = tstate
2825 *	%g2 = tpc
2826 *	%g3 = tnpc
2827 *	%g4 = tt == T_AST
2828 */
2829softtrap:
2830	GET_CPUINFO_VA(%g5)
2831	sethi	%hi(EINTSTACK-INTSTACK), %g7
2832	sub	%g5, BIAS, %g5
2833	dec	%g7
2834
2835	sub	%g5, %sp, %g5
2836	andncc	%g5, %g7, %g0
2837	bnz,pt	%xcc, Lslowtrap_reenter
2838	 nop
2839	GET_CPCB(%g7)
2840	set	USPACE-CC64FSZ-TRAPFRAME_SIZEOF-BIAS, %g5
2841	add	%g7, %g5, %g6
2842	stx	%i0, [%g6 + CC64FSZ + BIAS + TF_O + (0*8)]	! Generate a new trapframe
2843	stx	%i1, [%g6 + CC64FSZ + BIAS + TF_O + (1*8)]	!	but don't bother with
2844	stx	%i2, [%g6 + CC64FSZ + BIAS + TF_O + (2*8)]	!	locals and ins
2845	stx	%i3, [%g6 + CC64FSZ + BIAS + TF_O + (3*8)]
2846	stx	%i4, [%g6 + CC64FSZ + BIAS + TF_O + (4*8)]
2847	stx	%i5, [%g6 + CC64FSZ + BIAS + TF_O + (5*8)]
2848	stx	%i6, [%g6 + CC64FSZ + BIAS + TF_O + (6*8)]
2849	stx	%i7, [%g6 + CC64FSZ + BIAS + TF_O + (7*8)]
2850	ba,pt	%xcc, Lslowtrap_reenter
2851	 mov	%g6, %sp
2852
2853/*
2854 * syscall_setup() builds a trap frame and calls syscall().
2855 * XXX	should not have to save&reload ALL the registers just for
2856 *	ptrace...
2857 */
2858syscall_setup:
2859	TRAP_SETUP -CC64FSZ-TRAPFRAME_SIZEOF
2860
2861#ifdef DEBUG
2862	rdpr	%tt, %o1	! debug
2863	sth	%o1, [%sp + CC64FSZ + BIAS + TF_TT]! debug
2864#endif	/* DEBUG */
2865
2866	NORMAL_GLOBALS()
2867
2868	stx	%g1, [%sp + CC64FSZ + BIAS + TF_G + ( 1*8)]
2869	mov	%g1, %o1			! code
2870	rdpr	%tpc, %o2			! (pc)
2871	stx	%g2, [%sp + CC64FSZ + BIAS + TF_G + ( 2*8)]
2872	rdpr	%tstate, %g1
2873	stx	%g3, [%sp + CC64FSZ + BIAS + TF_G + ( 3*8)]
2874	rdpr	%tnpc, %o3
2875	stx	%g4, [%sp + CC64FSZ + BIAS + TF_G + ( 4*8)]
2876	rd	%y, %o4
2877	stx	%g5, [%sp + CC64FSZ + BIAS + TF_G + ( 5*8)]
2878	stx	%g6, [%sp + CC64FSZ + BIAS + TF_G + ( 6*8)]
2879	wrpr	%g0, 0, %tl			! return to tl=0
2880	stx	%g7, [%sp + CC64FSZ + BIAS + TF_G + ( 7*8)]
2881	add	%sp, CC64FSZ + BIAS, %o0	! (&tf)
2882
2883	stx	%g1, [%sp + CC64FSZ + BIAS + TF_TSTATE]
2884	stx	%o2, [%sp + CC64FSZ + BIAS + TF_PC]
2885	stx	%o3, [%sp + CC64FSZ + BIAS + TF_NPC]
2886	st	%o4, [%sp + CC64FSZ + BIAS + TF_Y]
2887
2888	rdpr	%pil, %g5
2889	stb	%g5, [%sp + CC64FSZ + BIAS + TF_PIL]
2890	stb	%g5, [%sp + CC64FSZ + BIAS + TF_OLDPIL]
2891
2892	wr	%g0, ASI_PRIMARY_NOFAULT, %asi	! Restore default ASI
2893
2894	GET_CPUINFO_VA(%g7)
2895	call	syscall				! syscall(&tf, code, pc)
2896	 wrpr	%g0, PSTATE_INTR, %pstate	! turn on interrupts
2897
2898	wrpr	%g0, PSTATE_KERN, %pstate	! Disable interrupts
2899	wrpr	%g0, 0, %tl			! Return to tl==0
2900	ba,a,pt	%icc, return_from_trap
2901	 nop
2902	NOTREACHED
2903
2904/*
2905 * interrupt_vector:
2906 *
2907 * Spitfire chips never get level interrupts directly from H/W.
2908 * Instead, all interrupts come in as interrupt_vector traps.
2909 * The interrupt number or handler address is an 11 bit number
2910 * encoded in the first interrupt data word.  Additional words
2911 * are application specific and used primarily for cross-calls.
2912 *
2913 * The interrupt vector handler then needs to identify the
2914 * interrupt source from the interrupt number and arrange to
2915 * invoke the interrupt handler.  This can either be done directly
2916 * from here, or a softint at a particular level can be issued.
2917 *
2918 * To call an interrupt directly and not overflow the trap stack,
2919 * the trap registers should be saved on the stack, registers
2920 * cleaned, trap-level decremented, the handler called, and then
2921 * the process must be reversed.
2922 *
2923 * To simplify life all we do here is issue an appropriate softint.
2924 *
2925 * Note:	It is impossible to identify or change a device's
2926 *		interrupt number until it is probed.  That's the
2927 *		purpose for all the funny interrupt acknowledge
2928 *		code.
2929 *
2930 */
2931
2932/*
2933 * Vectored interrupts:
2934 *
2935 * When an interrupt comes in, interrupt_vector uses the interrupt
2936 * vector number to lookup the appropriate intrhand from the intrlev
2937 * array.  It then looks up the interrupt level from the intrhand
2938 * structure.  It uses the level to index the per-cpu intrpending array,
2939 * and inserts the intrhand at the head of the proper intrpending entry.
2940 *
2941 * Then interrupt_vector uses the interrupt level in the intrhand
2942 * to issue a softint of the appropriate level.  The softint handler
2943 * figures out what level interrupt it's handling and pulls the
2944 * intrhand pointer at thead of the intrpending list for that interrupt
2945 * level, removes it from the list, clears the interrupt generator,
2946 * and invokes the interrupt handler.
2947 */
2948
2949	.text
2950interrupt_vector:
2951	ldxa	[%g0] ASI_IRSR, %g1
2952	mov	IRDR_0H, %g2
2953	ldxa	[%g2] ASI_IRDR, %g2	! Get interrupt number
2954	membar	#Sync
2955
2956	sethi	%hi(KERNBASE), %g3
2957	btst	IRSR_BUSY, %g1
2958	bz,pn	%icc, 3f		! Spurious interrupt
2959	 cmp	%g2, %g3
2960#ifdef MULTIPROCESSOR
2961	blu,pt	%xcc, Lsoftint_regular
2962	 and	%g2, MAXINTNUM-1, %g5	! XXX make sun4us work
2963	mov	IRDR_1H, %g3
2964	ldxa	[%g3] ASI_IRDR, %g3     ! Get IPI handler arg0
2965	mov	IRDR_2H, %g5
2966	ldxa	[%g5] ASI_IRDR, %g5     ! Get IPI handler arg1
2967
2968	stxa	%g0, [%g0] ASI_IRSR	! Ack IRQ
2969	membar	#Sync			! Should not be needed due to retry
2970
2971	jmpl	%g2, %g0
2972	 nop
2973	db_enter()
2974	NOTREACHED
2975#else
2976	bgeu,pn	%xcc, 3f
2977	 and	%g2, MAXINTNUM-1, %g5	! XXX make sun4us work
2978#endif
2979
2980Lsoftint_regular:
2981	stxa	%g0, [%g0] ASI_IRSR	! Ack IRQ
2982	membar	#Sync			! Should not be needed due to retry
2983
2984	sethi	%hi(intrlev), %g3
2985	or	%g3, %lo(intrlev), %g3
2986	sllx	%g5, 3, %g5		! Calculate entry number
2987	ldx	[%g3 + %g5], %g5	! We have a pointer to the handler
2988
2989	brz,pn	%g5, 3f			! NULL means it isn't registered yet.  Skip it.
2990	 nop
2991
2992setup_sparcintr:
2993	ldx	[%g5+IH_PEND], %g6	! Check if already in use
2994	brnz,pn	%g6, ret_from_intr_vector ! Skip it if it's running
2995	 ldub	[%g5+IH_PIL], %g6	! Read interrupt mask
2996	GET_CPUINFO_VA(%g1)
2997	sll	%g6, 3, %g3		! Find start of list for this IPL
2998	add	%g1, CI_INTRPENDING, %g1
2999	add	%g1, %g3, %g1
30001:
3001	ldx	[%g1], %g3		! Load list head
3002	add	%g5, IH_PEND, %g7
3003	casxa	[%g7] ASI_N, %g0, %g3
3004	brnz,pn	%g3, ret_from_intr_vector
3005	 nop
3006	stx	%g5, [%g1]
3007
3008	mov	1, %g7
3009	sll	%g7, %g6, %g6
3010	wr	%g6, 0, SET_SOFTINT	! Invoke a softint
3011
3012ret_from_intr_vector:
3013	CLRTT
3014	retry
3015	NOTREACHED
3016
30173:
3018	ba,a	ret_from_intr_vector
3019	 nop				! XXX spitfire bug?
3020
3021#ifdef SUN4V
3022
3023sun4v_cpu_mondo:
3024	mov	0x3c0, %g1
3025	ldxa	[%g1] ASI_QUEUE, %g2
3026
3027	GET_CPUINFO_PA(%g3)
3028	add	%g3, CI_CPUMQ, %g3
3029	ldxa	[%g3] ASI_PHYS_CACHED, %g3
3030	ldxa	[%g3 + %g2] ASI_PHYS_CACHED, %g4
3031	add	%g2, 8, %g5
3032	ldxa	[%g3 + %g5] ASI_PHYS_CACHED, %g5
3033	add	%g2, 16, %g6
3034	ldxa	[%g3 + %g6] ASI_PHYS_CACHED, %g6
3035	add	%g2, 64, %g2
3036	and	%g2, 0x7ff, %g2
3037	stxa	%g2, [%g1] ASI_QUEUE
3038	membar	#Sync
3039
3040	mov	%g4, %g2
3041	mov	%g5, %g3
3042	mov	%g6, %g5
3043	jmpl	%g2, %g0
3044	 nop			! No store here!
3045	retry
3046	NOTREACHED
3047
3048sun4v_dev_mondo:
3049	mov	0x3d0, %g1
3050	ldxa	[%g1] ASI_QUEUE, %g2
3051
3052	GET_CPUINFO_PA(%g3)
3053	add	%g3, CI_DEVMQ, %g3
3054	ldxa	[%g3] ASI_PHYS_CACHED, %g3
3055	ldxa	[%g3 + %g2] ASI_PHYS_CACHED, %g5
3056	add	%g2, 64, %g2
3057	and	%g2, 0x7ff, %g2
3058	stxa	%g2, [%g1] ASI_QUEUE
3059	membar	#Sync
3060
3061	cmp	%g5, MAXINTNUM
3062	bgeu,pt	%xcc, 1f
3063	 nop
3064
3065	sethi	%hi(intrlev), %g3
3066	or	%g3, %lo(intrlev), %g3
3067	sllx	%g5, 3, %g5		! Calculate entry number
3068	ldx	[%g3 + %g5], %g5	! We have a pointer to the handler
30691:
3070	brnz,pt	%g5, setup_sparcintr
3071	 nop
3072
3073	ba,a	3b
3074	 nop
3075
3076#endif
3077
3078#ifdef MULTIPROCESSOR
3079NENTRY(sun4u_ipi_tlb_page_demap)
3080	rdpr	%pstate, %g1
3081	andn	%g1, PSTATE_IE, %g2
3082	wrpr	%g2, %pstate				! disable interrupts
3083
3084	rdpr	%tl, %g2
3085	brnz	%g2, 1f
3086	 add	%g2, 1, %g4
3087	wrpr	%g0, %g4, %tl				! Switch to traplevel > 0
30881:
3089	mov	CTX_PRIMARY, %g4
3090	andn	%g3, 0xfff, %g3				! drop unused va bits
3091	ldxa	[%g4] ASI_DMMU, %g6			! Save primary context
3092	sethi	%hi(KERNBASE), %g7
3093	membar	#LoadStore
3094	stxa	%g5, [%g4] ASI_DMMU			! Insert context to demap
3095	membar	#Sync
3096	or	%g3, DEMAP_PAGE_PRIMARY, %g3
3097	stxa	%g0, [%g3] ASI_DMMU_DEMAP
3098	stxa	%g0, [%g3] ASI_IMMU_DEMAP
3099	membar	#Sync
3100	flush	%g7
3101	stxa	%g6, [%g4] ASI_DMMU
3102	membar	#Sync
3103	flush	%g7
3104
3105	wrpr	%g2, %tl
3106	wrpr	%g1, %pstate
3107	ba,a	ret_from_intr_vector
3108	 nop
3109END(sun4u_ipi_tlb_page_demap)
3110
3111NENTRY(sun4u_ipi_tlb_context_demap)
3112	rdpr	%pstate, %g1
3113	andn	%g1, PSTATE_IE, %g2
3114	wrpr	%g2, %pstate				! disable interrupts
3115
3116	rdpr	%tl, %g2
3117	brnz	%g2, 1f
3118	 add	%g2, 1, %g4
3119	wrpr	%g0, %g4, %tl				! Switch to traplevel > 0
31201:
3121	mov	CTX_PRIMARY, %g4
3122	sethi	%hi(KERNBASE), %g7
3123	ldxa	[%g4] ASI_DMMU, %g6			! Save primary context
3124	membar	#LoadStore
3125	stxa	%g3, [%g4] ASI_DMMU			! Insert context to demap
3126	membar	#Sync
3127	set	DEMAP_CTX_PRIMARY, %g3
3128	stxa	%g0, [%g3] ASI_DMMU_DEMAP
3129	stxa	%g0, [%g3] ASI_IMMU_DEMAP
3130	membar	#Sync
3131	flush	%g7
3132	stxa	%g6, [%g4] ASI_DMMU
3133	membar	#Sync
3134	flush	%g7
3135
3136	wrpr	%g2, %tl
3137	wrpr	%g1, %pstate
3138	ba,a	ret_from_intr_vector
3139	 nop
3140END(sun4u_ipi_tlb_context_demap)
3141
3142#ifdef SUN4V
3143NENTRY(sun4v_ipi_tlb_page_demap)
3144	mov	%o0, %g1
3145	mov	%o1, %g2
3146	mov	%o2, %g4
3147	mov	%g3, %o0
3148	mov	%g5, %o1
3149	mov	MAP_DTLB|MAP_ITLB, %o2
3150	ta	MMU_UNMAP_ADDR
3151	mov	%g1, %o0
3152	mov	%g2, %o1
3153	mov	%g4, %o2
3154
3155	retry
3156END(sun4v_ipi_tlb_page_demap)
3157
3158NENTRY(sun4v_ipi_tlb_context_demap)
3159	NOTREACHED
3160END(sun4v_ipi_tlb_context_demap)
3161#endif
3162
3163NENTRY(ipi_save_fpstate)
3164	GET_CPUINFO_VA(%g1)
3165	ldx	[%g1 + CI_FPPROC], %g2
3166	cmp	%g2, %g3
3167	bne,pn	%xcc, 3f
3168
3169	 mov	CTX_SECONDARY, %g2
3170	GET_MMU_CONTEXTID(%g6, %g2)
3171	membar	#LoadStore
3172	SET_MMU_CONTEXTID(%g0, %g2)
3173	membar	#Sync
3174
3175	ldx	[%g3 + P_FPSTATE], %g3
3176
3177	rdpr	%pstate, %g2		! enable FP before we begin
3178	rd	%fprs, %g4
3179	wr	%g0, FPRS_FEF, %fprs
3180	or	%g2, PSTATE_PEF, %g2
3181	wrpr	%g2, 0, %pstate
3182
3183	stx	%fsr, [%g3 + FS_FSR]	! f->fs_fsr = getfsr();
3184
3185	rd	%gsr, %g2		! Save %gsr
3186	st	%g2, [%g3 + FS_GSR]
3187
3188	add	%g3, FS_REGS, %g3	! This is zero...
3189	btst	FPRS_DL, %g4		! Lower FPU clean?
3190	bz,a,pt	%icc, 1f		! Then skip it
3191	 add	%g3, 128, %g3		! Skip a block
3192
3193	membar	#Sync
3194	stda	%f0, [%g3] ASI_BLK_S	! f->fs_f0 = etc;
3195	inc	BLOCK_SIZE, %g3
3196	stda	%f16, [%g3] ASI_BLK_S
3197	inc	BLOCK_SIZE, %g3
31981:
3199	btst	FPRS_DU, %g4		! Upper FPU clean?
3200	bz,pt	%icc, 2f		! Then skip it
3201	 nop
3202
3203	membar	#Sync
3204	stda	%f32, [%g3] ASI_BLK_S
3205	inc	BLOCK_SIZE, %g3
3206	stda	%f48, [%g3] ASI_BLK_S
32072:
3208	membar	#Sync			! Finish operation so we can
3209	wr	%g0, FPRS_FEF, %fprs	! Mark FPU clean
3210
3211	stx	%g0, [%g1 + CI_FPPROC]	! fpproc = NULL
3212	mov	CTX_SECONDARY, %g2
3213	SET_MMU_CONTEXTID(%g6, %g2)
3214	membar	#Sync
32153:
3216	ba	ret_from_intr_vector
3217	 nop
3218END(ipi_save_fpstate)
3219
3220NENTRY(ipi_drop_fpstate)
3221	rdpr	%pstate, %g1
3222	wr	%g0, FPRS_FEF, %fprs
3223	or	%g1, PSTATE_PEF, %g1
3224	wrpr	%g1, 0, %pstate
3225	GET_CPUINFO_VA(%g1)
3226	ldx	[%g1 + CI_FPPROC], %g5
3227	cmp	%g5, %g3
3228	bne,pn	%xcc, 1f
3229	 nop
3230	stx	%g0, [%g1 + CI_FPPROC]		! fpproc = NULL
32311:
3232	ba	ret_from_intr_vector
3233	 nop
3234END(ipi_drop_fpstate)
3235
3236NENTRY(ipi_softint)
3237	ba	ret_from_intr_vector
3238	 wr	%g3, 0, SET_SOFTINT
3239END(ipi_softint)
3240
3241NENTRY(ipi_db)
3242	ba	slowtrap
3243	 wrpr	%g0, T_BREAKPOINT, %tt
3244END(ipi_db)
3245#endif
3246
3247/*
3248 * Ultra1 and Ultra2 CPUs use soft interrupts for everything.  What we do
3249 * on a soft interrupt, is we should check which bits in ASR_SOFTINT(0x16)
3250 * are set, handle those interrupts, then clear them by setting the
3251 * appropriate bits in ASR_CLEAR_SOFTINT(0x15).
3252 *
3253 * We have a list of interrupt handlers for each of 15 interrupt levels.
3254 * If a vectored interrupt can be dispatched, the dispatch routine will put
3255 * the intrhand at the head of the appropriate list. The interrupt handler
3256 * will go through the list to look for an interrupt to dispatch.  If it
3257 * finds one it will pull it off the list, and call the handler.
3258 *
3259 * After preliminary setup work, the interrupt is passed to each
3260 * registered handler in turn.  These are expected to return nonzero if
3261 * they took care of the interrupt.  If a handler claims the interrupt,
3262 * we exit (hardware interrupts are latched in the requestor so we'll
3263 * just take another interrupt in the unlikely event of simultaneous
3264 * interrupts from two different devices at the same level).
3265 *
3266 * Inputs:
3267 *	%l0 = %tstate
3268 *	%l1 = return pc
3269 *	%l2 = return npc
3270 *	%l3 = interrupt level
3271 *
3272 * Internal:
3273 *	%l4, %l5: local variables
3274 *	%l6 = %y
3275 *	%l7 = %g1
3276 *	%g2..%g7 go to stack
3277 *
3278 * An interrupt frame is built in the space for a full trapframe;
3279 * this contains the psr, pc, npc, and interrupt level.
3280 *
3281 * The level of this interrupt is determined by:
3282 *
3283 *       IRQ# = %tt - 0x40
3284 */
3285
3286	.globl sparc_interrupt			! This is for interrupt debugging
3287sparc_interrupt:
3288	/*
3289	 * If this is a %tick softint, clear it then call interrupt_vector.
3290	 */
3291	rd	SOFTINT, %g1
3292	set	(TICK_INT|STICK_INT), %g2
3293	andcc	%g2, %g1, %g2
3294	bz,pt	%icc, 0f
3295	 GET_CPUINFO_VA(%g7)
3296	wr	%g2, 0, CLEAR_SOFTINT
3297	ba,pt	%icc, setup_sparcintr
3298	 add	%g7, CI_TICKINTR, %g5
32990:
3300	! We don't use TRAPFRAME_SIZEOF here because it might be a clock
3301	! interrupt, which uses a larger frame.
3302	INTR_SETUP -CC64FSZ-CLOCKFRAME_SIZEOF
3303
3304	NORMAL_GLOBALS()
3305
3306	/* Save normal globals */
3307	stx	%g1, [%sp + CC64FSZ + BIAS + TF_G + ( 1*8)]
3308	stx	%g2, [%sp + CC64FSZ + BIAS + TF_G + ( 2*8)]
3309	stx	%g3, [%sp + CC64FSZ + BIAS + TF_G + ( 3*8)]
3310	stx	%g4, [%sp + CC64FSZ + BIAS + TF_G + ( 4*8)]
3311	stx	%g5, [%sp + CC64FSZ + BIAS + TF_G + ( 5*8)]
3312	stx	%g6, [%sp + CC64FSZ + BIAS + TF_G + ( 6*8)]
3313	stx	%g7, [%sp + CC64FSZ + BIAS + TF_G + ( 7*8)]
3314
3315999:	flushw			! Do not remove this instruction -- causes interrupt loss
3316	.section	.sun4v_patch, "ax"
3317	.word	999b
3318	nop
3319	.previous
3320
3321	GET_CPUINFO_VA(%g7)
3322#ifdef SUN4V
3323	SUN4V_PUSH_RWINDOW
3324#endif
3325
3326	rd	%y, %l6
3327	INCR uvmexp+V_INTR		! uvmexp.intrs++; (clobbers %o0,%o1,%o2)
3328	rdpr	%tt, %l5		! Find out our current IPL
3329	rdpr	%tstate, %l0
3330	rdpr	%tpc, %l1
3331	rdpr	%tnpc, %l2
3332	wrpr	%g0, 0, %tl
3333
3334	! Dump our trap frame now we have taken the IRQ
3335	stw	%l6, [%sp + CC64FSZ + BIAS + TF_Y]	! Silly, but we need to save this for rft
3336	sth	%l5, [%sp + CC64FSZ + BIAS + TF_TT]! debug
3337	stx	%l0, [%sp + CC64FSZ + BIAS + TF_TSTATE]	! set up intrframe/clockframe
3338	stx	%l1, [%sp + CC64FSZ + BIAS + TF_PC]
3339	stx	%l2, [%sp + CC64FSZ + BIAS + TF_NPC]
3340
3341	sub	%l5, 0x40, %l6			! Convert to interrupt level
3342	stb	%l6, [%sp + CC64FSZ + BIAS + TF_PIL]	! set up intrframe/clockframe
3343	rdpr	%pil, %o1
3344	stb	%o1, [%sp + CC64FSZ + BIAS + TF_OLDPIL]	! old %pil
3345	clr	%l5			! Zero handled count
3346	mov	1, %l3			! Ack softint
3347	sll	%l3, %l6, %l3		! Generate IRQ mask
3348
3349	wrpr	%l6, %pil
3350
3351	/*
3352	 * Set handled_intr_level and save the old one so we can restore it
3353	 * later.
3354	 */
3355	ld	[%g7 + CI_HANDLED_INTR_LEVEL], %l0
3356	st	%l6, [%g7 + CI_HANDLED_INTR_LEVEL]
3357	st	%l0, [%sp + CC64FSZ + BIAS + SAVED_INTR_LEVEL]
3358
3359sparc_intr_retry:
3360	wr	%l3, 0, CLEAR_SOFTINT	! (don't clear possible %tick IRQ)
3361	wrpr	%g0, PSTATE_INTR, %pstate	! Reenable interrupts
3362	sll	%l6, 3, %l2
3363	add	%g7, CI_INTRPENDING, %l4
3364	add	%l2, %l4, %l4
3365
33661:
3367	membar	#StoreLoad		! Make sure any failed casxa instructions complete
3368	ldx	[%l4], %l2		! Check the head of the list
3369	brz,pn	%l2, intrcmplt		! Empty list?
3370
3371	 clr	%l7
3372	membar	#LoadStore
3373	casxa	[%l4] ASI_N, %l2, %l7	! Grab the entire list
3374	cmp	%l7, %l2
3375	bne,pn	%icc, 1b
3376	 nop
3377
33782:
3379	ldx	[%l2 + IH_PEND], %l7	! Load next pending
3380	add	%l2, IH_PEND, %l3
3381	clr	%l4
3382	casxa	[%l3] ASI_N, %l7, %l4	! Unlink from list
3383	cmp	%l7, %l4
3384	bne,pn	%xcc, 2b		! Retry?
3385	 add	%sp, CC64FSZ+BIAS, %o0	! tf = %sp + CC64FSZ + BIAS
3386
3387	ldx	[%l2 + IH_ACK], %l1	! ih->ih_ack
3388
3389	! At this point, the current ih could already be added
3390	! back to the pending list.
3391
3392	call	intr_handler
3393	 mov	%l2, %o1
3394
3395	brz,pn	%l1, 0f
3396	 add	%l5, %o0, %l5		! Add handler return value
3397	ldx	[%l2 + IH_COUNT], %o0	! ih->ih_count.ec_count++;
3398	inc	%o0
3399	stx	%o0, [%l2 + IH_COUNT]
3400
3401	jmpl	%l1, %o7		! (*ih->ih_ack)(ih)
3402	 mov	%l2, %o0
34030:
3404	brnz,pn	%l7, 2b			! Another?
3405	 mov	%l7, %l2
3406
3407intrcmplt:
3408	/*
3409	 * Re-read SOFTINT to see if there are any new pending interrupts
3410	 * at this level.
3411	 */
3412	mov	1, %l3			! Ack softint
3413	rd	SOFTINT, %l7
3414	sll	%l3, %l6, %l3		! Generate IRQ mask
3415	btst	%l3, %l7		! leave mask in %l3 for retry code
3416	bnz,pn	%icc, sparc_intr_retry
3417	 mov	1, %l5			! initialize intr count for next run
3418
3419	/* Restore old handled_intr_level */
3420	st	%l0, [%g7 + CI_HANDLED_INTR_LEVEL]
3421
3422	ldub	[%sp + CC64FSZ + BIAS + TF_OLDPIL], %l3	! restore old %pil
3423	wrpr	%g0, PSTATE_KERN, %pstate	! Disable interrupts
3424	wrpr	%l3, 0, %pil
3425
3426	ba,a,pt	%icc, return_from_trap
3427	 nop
3428
3429	.globl	return_from_trap, rft_kernel, rft_user
3430	.globl	softtrap, slowtrap
3431	.globl	syscall
3432
3433/*
3434 * Various return-from-trap routines.
3435 */
3436
3437/*
3438 * Return from trap.
3439 * registers are:
3440 *
3441 *	[%sp + CC64FSZ + BIAS] => trap frame
3442 *
3443 * We must load all global, out, and trap registers from the trap frame.
3444 *
3445 * If returning to kernel, we should be at the proper trap level because
3446 * we don't touch %tl.
3447 *
3448 * When returning to user mode, the trap level does not matter, as it
3449 * will be set explicitly.
3450 *
3451 * If we are returning to user code, we must check for register windows in
3452 * the pcb that belong on the stack, and reload them, if there are any.
3453 */
3454return_from_trap:
3455	ldx	[%sp + CC64FSZ + BIAS + TF_TSTATE], %g1
3456	btst	TSTATE_PRIV, %g1
3457	!!
3458	!! Let all pending interrupts drain before returning to userland
3459	!!
3460	bnz,pn	%icc, 1f				! Returning to userland?
3461	 nop
3462	wrpr	%g0, PSTATE_INTR, %pstate
3463	wrpr	%g0, %g0, %pil				! Lower IPL
34641:
3465	wrpr	%g0, PSTATE_KERN, %pstate		! Disable IRQs
3466
3467	/* Restore normal globals */
3468	ldx	[%sp + CC64FSZ + BIAS + TF_G + (1*8)], %g1
3469	ldx	[%sp + CC64FSZ + BIAS + TF_G + (2*8)], %g2
3470	ldx	[%sp + CC64FSZ + BIAS + TF_G + (3*8)], %g3
3471	ldx	[%sp + CC64FSZ + BIAS + TF_G + (4*8)], %g4
3472	ldx	[%sp + CC64FSZ + BIAS + TF_G + (5*8)], %g5
3473	ldx	[%sp + CC64FSZ + BIAS + TF_G + (6*8)], %g6
3474	bnz,pn	%icc, 2f
3475	 nop
3476	ldx	[%sp + CC64FSZ + BIAS + TF_G + (7*8)], %g7
34772:
3478	ALTERNATE_GLOBALS()
3479
3480	/* Restore outs */
3481	ldx	[%sp + CC64FSZ + BIAS + TF_O + (0*8)], %i0
3482	ldx	[%sp + CC64FSZ + BIAS + TF_O + (1*8)], %i1
3483	ldx	[%sp + CC64FSZ + BIAS + TF_O + (2*8)], %i2
3484	ldx	[%sp + CC64FSZ + BIAS + TF_O + (3*8)], %i3
3485	ldx	[%sp + CC64FSZ + BIAS + TF_O + (4*8)], %i4
3486	ldx	[%sp + CC64FSZ + BIAS + TF_O + (5*8)], %i5
3487	ldx	[%sp + CC64FSZ + BIAS + TF_O + (6*8)], %i6
3488	ldx	[%sp + CC64FSZ + BIAS + TF_O + (7*8)], %i7
3489	/* Now load trap registers into alternate globals */
3490	ld	[%sp + CC64FSZ + BIAS + TF_Y], %g4
3491	ldx	[%sp + CC64FSZ + BIAS + TF_TSTATE], %g1		! load new values
3492	wr	%g4, 0, %y
3493	ldx	[%sp + CC64FSZ + BIAS + TF_PC], %g2
3494	ldx	[%sp + CC64FSZ + BIAS + TF_NPC], %g3
3495
3496	/* Returning to user mode or kernel mode? */
3497	btst	TSTATE_PRIV, %g1		! returning to userland?
3498	bz,pt	%icc, rft_user
3499	 nop
3500
3501/*
3502 * Return from trap, to kernel.
3503 *
3504 * We will assume, for the moment, that all kernel traps are properly stacked
3505 * in the trap registers, so all we have to do is insert the (possibly modified)
3506 * register values into the trap registers then do a retry.
3507 *
3508 */
3509rft_kernel:
3510	rdpr	%tl, %g4			! Grab a set of trap registers
3511	inc	%g4
3512	wrpr	%g4, %g0, %tl
3513	wrpr	%g3, 0, %tnpc
3514	wrpr	%g2, 0, %tpc
3515	wrpr	%g1, 0, %tstate
3516
3517	rdpr	%canrestore, %g2
3518	brnz	%g2, 1f
3519	 nop
3520
3521	wr	%g0, ASI_NUCLEUS, %asi
3522	rdpr	%cwp, %g1
3523	dec	%g1
3524	wrpr	%g1, %cwp
3525	FILL	ldxa, %sp+BIAS, 8, %asi
3526	restored
3527	inc	%g1
3528	wrpr	%g1, %cwp
35291:
3530	restore
3531
3532	rdpr	%tstate, %g1			! Since we may have trapped our regs may be toast
3533	rdpr	%cwp, %g2
3534	andn	%g1, CWP, %g1
3535	wrpr	%g1, %g2, %tstate		! Put %cwp in %tstate
3536	retry					! We should allow some way to distinguish retry/done
3537	NOTREACHED
3538
3539/*
3540 * Return from trap, to user.  Checks for scheduling trap (`ast') first;
3541 * will re-enter trap() if set.  Note that we may have to switch from
3542 * the interrupt stack to the kernel stack in this case.
3543 *	%g1 = %tstate
3544 *	%g2 = return %pc
3545 *	%g3 = return %npc
3546 * If returning to a valid window, just set psr and return.
3547 */
3548rft_user:
3549	GET_CURPROC(%g7)
3550	lduw	[%g7 + P_MD_ASTPENDING], %g7	! want AST trap?
3551	brnz,pn	%g7, softtrap			! yes, re-enter trap with type T_AST
3552	 mov	T_AST, %g4
3553
3554	/*
3555	 * NB: only need to do this after a cache miss
3556	 */
3557	/*
3558	 * Now check to see if any regs are saved in the pcb and restore them.
3559	 *
3560	 * Here we need to undo the damage caused by switching to a kernel
3561	 * stack.
3562	 *
3563	 * We will use alternate globals %g4..%g7 because %g1..%g3 are used
3564	 * by the data fault trap handlers and we don't want possible conflict.
3565	 */
3566
3567	GET_CPCB(%g6)
3568 	ldub	[%g6 + PCB_NSAVED], %g7		! Any saved reg windows?
3569	brnz,pn	%g7, softtrap
3570	 mov	T_RWRET, %g4
3571
3572	/*
3573	 * Set up our return trapframe so we can recover if we trap from here
3574	 * on in.
3575	 */
3576	wrpr	%g0, 1, %tl			! Set up the trap state
3577	wrpr	%g2, 0, %tpc
3578	wrpr	%g3, 0, %tnpc
3579	rdpr	%cwp, %g7
3580	andn	%g1, CWP, %g1
3581	wrpr	%g1, %g7, %tstate
3582
3583	rdpr	%otherwin, %g7
3584	brnz	%g7, 1f
3585	 nop
3586
3587	/* XXX Rewrite sun4u code to handle faults like sun4v. */
3588	sethi	%hi(cputyp), %g2
3589	ld	[%g2 + %lo(cputyp)], %g2
3590	cmp	%g2, CPU_SUN4V
3591	bne,pt	%icc, 1f
3592	 nop
3593
3594	wr	%g0, ASI_AIUS, %asi
3595	rdpr	%cwp, %g1
3596	dec	%g1
3597	wrpr	%g1, 0, %cwp
3598rft_user_fault_start:
3599	FILL	ldxa, %sp+BIAS, 8, %asi
3600	ldx	[%g6 + PCB_WCOOKIE], %g7
3601	xor	%g7, %i7, %i7		! stackghost
3602rft_user_fault_end:
3603	restored
3604	inc	%g1
3605	wrpr	%g1, 0, %cwp
3606
3607	rdpr	%canrestore, %g7
3608	wrpr	%g7, 0, %otherwin
3609	wrpr	%g0, 0, %canrestore
3610	rdpr	%otherwin, %g7
36111:
3612	wrpr	%g7, 0, %canrestore
3613	wrpr	%g0, 0, %otherwin
3614	wrpr	WSTATE_USER, %wstate		! Need to know where our sp points
3615	wrpr	%g7, 0, %cleanwin		! Force cleanup of kernel windows
3616
3617	restore
3618
3619	rdpr	%tstate, %g1
3620	rdpr	%cwp, %g7			! Find our cur window
3621	andn	%g1, CWP, %g1			! Clear it from %tstate
3622	wrpr	%g1, %g7, %tstate		! Set %tstate with %cwp
3623
3624	mov	CTX_SECONDARY, %g1		! Restore the user context
3625	GET_MMU_CONTEXTID(%g4, %g1)
3626	mov	CTX_PRIMARY, %g2
3627	SET_MMU_CONTEXTID(%g4, %g2)
3628	sethi	%hi(KERNBASE), %g7		! Should not be needed due to retry
3629	membar	#Sync				! Should not be needed due to retry
3630	flush	%g7				! Should not be needed due to retry
3631
3632	wrpr	%g0, 0, %pil			! Enable all interrupts
3633	retry
3634
3635! exported end marker for kernel gdb
3636	.globl	endtrapcode
3637endtrapcode:
3638
3639#ifdef DDB
3640!!!
3641!!! Dump the DTLB to phys address in %o0 and print it
3642!!!
3643!!! Only toast a few %o registers
3644!!!
3645	.globl	dump_dtlb
3646dump_dtlb:
3647	clr	%o1
3648	add	%o1, (64*8), %o3
36491:
3650	ldxa	[%o1] ASI_DMMU_TLB_TAG, %o2
3651	membar	#Sync
3652	stx	%o2, [%o0]
3653	membar	#Sync
3654	inc	8, %o0
3655	ldxa	[%o1] ASI_DMMU_TLB_DATA, %o4
3656	membar	#Sync
3657	inc	8, %o1
3658	stx	%o4, [%o0]
3659	cmp	%o1, %o3
3660	membar	#Sync
3661	bl	1b
3662	 inc	8, %o0
3663
3664	retl
3665	 nop
3666
3667	.globl	print_dtlb
3668print_dtlb:
3669	save	%sp, -CC64FSZ, %sp
3670	clr	%l1
3671	add	%l1, (64*8), %l3
3672	clr	%l2
36731:
3674	ldxa	[%l1] ASI_DMMU_TLB_TAG, %o2
3675	membar	#Sync
3676	mov	%l2, %o1
3677	ldxa	[%l1] ASI_DMMU_TLB_DATA, %o3
3678	membar	#Sync
3679	inc	%l2
3680	set	2f, %o0
3681	call	db_printf
3682	 inc	8, %l1
3683
3684	ldxa	[%l1] ASI_DMMU_TLB_TAG, %o2
3685	membar	#Sync
3686	mov	%l2, %o1
3687	ldxa	[%l1] ASI_DMMU_TLB_DATA, %o3
3688	membar	#Sync
3689	inc	%l2
3690	set	3f, %o0
3691	call	db_printf
3692	 inc	8, %l1
3693
3694	cmp	%l1, %l3
3695	bl	1b
3696	 inc	8, %l0
3697
3698	ret
3699	 restore
3700	.data
37012:
3702	.asciz	"%2d:%016lx %016lx "
37033:
3704	.asciz	"%2d:%016lx %016lx\r\n"
3705	.text
3706#endif	/* DDB */
3707
3708	.align	8
3709dostart:
3710	/*
3711	 * Startup.
3712	 *
3713	 * The Sun FCODE bootloader is nice and loads us where we want
3714	 * to be.  We have a full set of mappings already set up for us.
3715	 *
3716	 * I think we end up having an entire 16M allocated to us.
3717	 *
3718	 * We enter with the prom entry vector in %o0, dvec in %o1,
3719	 * and the bootops vector in %o2.
3720	 *
3721	 * All we need to do is:
3722	 *	1:	Save the prom vector
3723	 *	2:	Create a decent stack for ourselves
3724	 *	3:	Install the permanent 4MB kernel mapping
3725	 *	4:	Call the C language initialization code
3726	 */
3727
3728	/*
3729	 * Set the psr into a known state:
3730	 * Set supervisor mode, interrupt level >= 13, traps enabled
3731	 */
3732	wrpr	%g0, 13, %pil
3733	wrpr	%g0, PSTATE_INTR|PSTATE_PEF, %pstate
3734	wr	%g0, FPRS_FEF, %fprs		! Turn on FPU
3735
3736#if defined(DDB) || NKSYMS > 0
3737	/*
3738	 * First, check for DDB arguments.  A pointer to an argument
3739	 * is passed in %o1 who's length is passed in %o2.  Our
3740	 * bootloader passes in a magic number as the first argument,
3741	 * followed by esym as argument 2, and ssym as argument 3,
3742	 * so check that %o3 >= 12.
3743	 */
3744	cmp	%o2, 12
3745	blt	1f			! Not enough args
3746	 nop
3747
3748	set	0x44444230, %l3
3749	ldx	[%o1], %l4
3750	cmp	%l3, %l4		! chk magic
3751	bne	%xcc, 1f
3752	 nop
3753
3754	ldx	[%o1+8], %l4
3755	sethi	%hi(esym), %l3			! store esym
3756	stx	%l4, [%l3 + %lo(esym)]
3757
3758	ldx	[%o1+16], %l4
3759	sethi	%hi(ssym), %l3			! store ssym
3760	stx	%l4, [%l3 + %lo(ssym)]
37611:
3762#endif	/* defined(DDB) || NKSYMS > 0 */
3763	/*
3764	 * Step 1: Save rom entry pointer
3765	 */
3766
3767	mov	%o4, %g7	! save prom vector pointer
3768	set	romp, %o5
3769	stx	%o4, [%o5]	! It's initialized data, I hope
3770
3771	/*
3772	 * Switch to temporary stack.
3773	 */
3774	set	tmpstack-CC64FSZ-BIAS, %sp
3775
3776	/*
3777	 * Ready to run C code; finish bootstrap.
3778	 */
37791:
3780	set	0x2000, %o0			! fixed: 8192 contexts
3781	call	bootstrap
3782	 clr	%g4				! Clear data segment pointer
3783
3784	/*
3785	 * pmap_bootstrap should have allocated a stack for proc 0 and
3786	 * stored the start and end in u0 and estack0.  Switch to that
3787	 * stack now.
3788	 */
3789
3790	sethi	%hi(cpus), %g2
3791	ldx	[%g2 + %lo(cpus)], %g2
3792	ldx	[%g2 + CI_PADDR], %g2		! Load the interrupt stack's PA
3793
3794/*
3795 * Initialize a CPU.  This is used both for bootstrapping the first CPU
3796 * and spinning up each subsequent CPU.  Basically:
3797 *
3798 *	Install trap table.
3799 *	Switch to the initial stack.
3800 *	Call the routine passed in in cpu_info->ci_spinup.
3801 */
3802
3803cpu_initialize:
3804
3805	wrpr	%g0, 0, %tl			! Make sure we're not in NUCLEUS mode
3806	flushw
3807
3808	/* Change the trap base register */
3809	set	trapbase, %l1
3810#ifdef SUN4V
3811	sethi	%hi(cputyp), %l0
3812	ld	[%l0 + %lo(cputyp)], %l0
3813	cmp	%l0, CPU_SUN4V
3814	bne,pt	%icc, 1f
3815	 nop
3816	set	trapbase_sun4v, %l1
3817	GET_MMFSA(%o1)
38181:
3819#endif
3820	call	prom_set_trap_table		! Now we should be running 100% from our handlers
3821	 mov	%l1, %o0
3822	wrpr	%l1, 0, %tba			! Make sure the PROM didn't foul up.
3823	wrpr	%g0, WSTATE_KERN, %wstate
3824
3825	/*
3826	 * Switch to our initial stack.
3827	 */
3828
3829	GET_CPUINFO_VA(%g7)
3830	ldx	[%g7 + CI_INITSTACK], %l0
3831	add	%l0, -BIAS-CC64FSZ, %sp
3832
3833	/*
3834	 * Call our startup routine.
3835	 */
3836
3837	ldx	[%g7 + CI_SPINUP], %o1
3838	call	%o1				! Call routine
3839	 nop
3840	NOTREACHED
3841
3842	set	1f, %o0				! Main should never come back here
3843	call	panic
3844	 nop
3845	.data
38461:
3847	.asciz	"main() returned\r\n"
3848	_ALIGN
3849	.text
3850
3851ENTRY(sun4u_set_tsbs)
3852
3853	/* Set the dmmu tsb */
3854	sethi	%hi(0x1fff), %o2
3855	set	tsb_dmmu, %o0
3856	ldx	[%o0], %o0
3857	set	tsbsize, %o1
3858	or	%o2, %lo(0x1fff), %o2
3859	ld	[%o1], %o1
3860	andn	%o0, %o2, %o0			! Mask off size and split bits
3861	or	%o0, %o1, %o0			! Make a TSB pointer
3862	set	TSB, %o2
3863	stxa	%o0, [%o2] ASI_DMMU		! Install data TSB pointer
3864	membar	#Sync
3865
3866	/* Set the immu tsb */
3867	sethi	%hi(0x1fff), %o2
3868	set	tsb_immu, %o0
3869	ldx	[%o0], %o0
3870	set	tsbsize, %o1
3871	or	%o2, %lo(0x1fff), %o2
3872	ld	[%o1], %o1
3873	andn	%o0, %o2, %o0			! Mask off size and split bits
3874	or	%o0, %o1, %o0			! Make a TSB pointer
3875	set	TSB, %o2
3876	stxa	%o0, [%o2] ASI_IMMU		! Install insn TSB pointer
3877	membar	#Sync
3878
3879	retl
3880	 nop
3881END(sun4u_set_tsbs)
3882
3883
3884#ifdef MULTIPROCESSOR
3885ENTRY(cpu_mp_startup)
3886	mov	%o0, %g2
3887
3888	wrpr	%g0, 13, %pil
3889	wrpr	%g0, PSTATE_INTR|PSTATE_PEF, %pstate
3890	wr	%g0, FPRS_FEF, %fprs		! Turn on FPU
3891
3892	set	tmpstack-CC64FSZ-BIAS, %sp
3893
3894	call	pmap_bootstrap_cpu
3895	 nop
3896
3897	ba,a,pt	%xcc, cpu_initialize
3898	 nop
3899END(cpu_mp_startup)
3900#endif
3901
3902/*
3903 * openfirmware(cell* param);
3904 *
3905 * OpenFirmware entry point
3906 */
3907	.align 8
3908NENTRY(openfirmware)
3909	sethi	%hi(romp), %o4
3910	ldx	[%o4+%lo(romp)], %o4
3911	save	%sp, -CC64FSZ, %sp
3912	rdpr	%pil, %i2
3913	mov	PIL_HIGH, %i3
3914	cmp	%i3, %i2
3915	movle	%icc, %i2, %i3
3916	wrpr	%g0, %i3, %pil
3917	mov	%i0, %o0
3918	mov	%g1, %l1
3919	mov	%g2, %l2
3920	mov	%g3, %l3
3921	mov	%g4, %l4
3922	mov	%g5, %l5
3923	mov	%g6, %l6
3924	mov	%g7, %l7
3925	rdpr	%pstate, %l0
3926	jmpl	%i4, %o7
3927	 wrpr	%g0, PSTATE_PROM|PSTATE_IE, %pstate
3928	wrpr	%l0, %g0, %pstate
3929	mov	%l1, %g1
3930	mov	%l2, %g2
3931	mov	%l3, %g3
3932	mov	%l4, %g4
3933	mov	%l5, %g5
3934	mov	%l6, %g6
3935	mov	%l7, %g7
3936	wrpr	%i2, 0, %pil
3937	ret
3938	 restore	%o0, %g0, %o0
3939END(openfirmware)
3940
3941/*
3942 * tlb_flush_pte(vaddr_t va, int ctx)
3943 *
3944 * Flush tte from both IMMU and DMMU.
3945 *
3946 */
3947	.align 8
3948NENTRY(us_tlb_flush_pte)
3949	mov	CTX_SECONDARY, %o2
3950	andn	%o0, 0xfff, %g2				! drop unused va bits
3951	ldxa	[%o2] ASI_DMMU, %g1			! Save secondary context
3952	sethi	%hi(KERNBASE), %o4
3953	membar	#LoadStore
3954	stxa	%o1, [%o2] ASI_DMMU			! Insert context to demap
3955	membar	#Sync
3956	or	%g2, DEMAP_PAGE_SECONDARY, %g2		! Demap page from secondary context only
3957	stxa	%g0, [%g2] ASI_DMMU_DEMAP		! Do the demap
3958	stxa	%g0, [%g2] ASI_IMMU_DEMAP		! to both TLBs
3959	membar	#Sync					! No real reason for this XXXX
3960	flush	%o4
3961	stxa	%g1, [%o2] ASI_DMMU			! Restore asi
3962	membar	#Sync					! No real reason for this XXXX
3963	flush	%o4
3964	retl
3965	 nop
3966END(us_tlb_flush_pte)
3967
3968	.align 8
3969NENTRY(us3_tlb_flush_pte)
3970	rdpr	%pstate, %o5
3971	andn	%o5, PSTATE_IE, %o4
3972	wrpr	%o4, %pstate				! disable interrupts
3973
3974	rdpr	%tl, %o3
3975	brnz	%o3, 1f
3976	 add	%o3, 1, %g2
3977	wrpr	%g0, %g2, %tl				! Switch to traplevel > 0
39781:
3979	mov	CTX_PRIMARY, %o2
3980	andn	%o0, 0xfff, %g2				! drop unused va bits
3981	ldxa	[%o2] ASI_DMMU, %g1			! Save primary context
3982	sethi	%hi(KERNBASE), %o4
3983	membar	#LoadStore
3984	stxa	%o1, [%o2] ASI_DMMU			! Insert context to demap
3985	membar	#Sync
3986	or	%g2, DEMAP_PAGE_PRIMARY, %g2		! Demap page from primary context only
3987	stxa	%g0, [%g2] ASI_DMMU_DEMAP		! Do the demap
3988	stxa	%g0, [%g2] ASI_IMMU_DEMAP		! to both TLBs
3989	membar	#Sync					! No real reason for this XXXX
3990	flush	%o4
3991	stxa	%g1, [%o2] ASI_DMMU			! Restore asi
3992	membar	#Sync					! No real reason for this XXXX
3993	flush	%o4
3994	wrpr	%g0, %o3, %tl				! Restore traplevel
3995	wrpr	%o5, %pstate				! Restore interrupts
3996	retl
3997	 nop
3998END(us_tlb_flush_pte)
3999
4000/*
4001 * tlb_flush_ctx(int ctx)
4002 *
4003 * Flush entire context from both IMMU and DMMU.
4004 *
4005 */
4006	.align 8
4007NENTRY(us_tlb_flush_ctx)
4008	mov	CTX_SECONDARY, %o2
4009	sethi	%hi(KERNBASE), %o4
4010	ldxa	[%o2] ASI_DMMU, %g1		! Save secondary context
4011	membar	#LoadStore
4012	stxa	%o0, [%o2] ASI_DMMU		! Insert context to demap
4013	membar	#Sync
4014	set	DEMAP_CTX_SECONDARY, %g2	! Demap context from secondary context only
4015	stxa	%g0, [%g2] ASI_DMMU_DEMAP		! Do the demap
4016	stxa	%g0, [%g2] ASI_IMMU_DEMAP		! Do the demap
4017	membar	#Sync
4018	stxa	%g1, [%o2] ASI_DMMU		! Restore secondary asi
4019	membar	#Sync					! No real reason for this XXXX
4020	flush	%o4
4021	retl
4022	 nop
4023END(us_tlb_flush_ctx)
4024
4025	.align 8
4026NENTRY(us3_tlb_flush_ctx)
4027	rdpr	%pstate, %o5
4028	andn	%o5, PSTATE_IE, %o4
4029	wrpr	%o4, %pstate				! disable interrupts
4030
4031	rdpr	%tl, %o3
4032	brnz	%o3, 1f
4033	 add	%o3, 1, %g2
4034	wrpr	%g0, %g2, %tl				! Switch to traplevel > 0
40351:
4036	mov	CTX_PRIMARY, %o2
4037	sethi	%hi(KERNBASE), %o4
4038	ldxa	[%o2] ASI_DMMU, %g1		! Save primary context
4039	membar	#LoadStore
4040	stxa	%o0, [%o2] ASI_DMMU		! Insert context to demap
4041	membar	#Sync
4042	set	DEMAP_CTX_PRIMARY, %g2		! Demap context from primary context only
4043	stxa	%g0, [%g2] ASI_DMMU_DEMAP		! Do the demap
4044	stxa	%g0, [%g2] ASI_IMMU_DEMAP		! Do the demap
4045	membar	#Sync
4046	stxa	%g1, [%o2] ASI_DMMU		! Restore secondary asi
4047	membar	#Sync					! No real reason for this XXXX
4048	flush	%o4
4049	wrpr	%g0, %o3, %tl				! Restore traplevel
4050	wrpr	%o5, %pstate				! Restore interrupts
4051	retl
4052	 nop
4053END(us3_tlb_flush_ctx)
4054
4055/*
4056 * dcache_flush_page(paddr_t pa)
4057 *
4058 * Clear one page from D$.
4059 *
4060 */
4061	.align 8
4062NENTRY(us_dcache_flush_page)
4063	mov	-1, %o1		! Generate mask for tag: bits [29..2]
4064	srlx	%o0, 13-2, %o2	! Tag is VA bits <40:13> in bits <29:2>
4065	clr	%o4
4066	srl	%o1, 2, %o1	! Now we have bits <29:0> set
4067	set	(2*NBPG), %o5
4068	ba,pt	%icc, 1f
4069	 andn	%o1, 3, %o1	! Now we have bits <29:2> set
4070
4071	.align 8
40721:
4073	ldxa	[%o4] ASI_DCACHE_TAG, %o3
4074	mov	%o4, %o0
4075	deccc	16, %o5
4076	bl,pn	%icc, 2f
4077
4078	 inc	16, %o4
4079	xor	%o3, %o2, %o3
4080	andcc	%o3, %o1, %g0
4081	bne,pt	%xcc, 1b
4082	 membar	#LoadStore
4083
4084dlflush2:
4085	stxa	%g0, [%o0] ASI_DCACHE_TAG
4086	ba,pt	%icc, 1b
4087	 membar	#StoreLoad
40882:
4089
4090	wr	%g0, ASI_PRIMARY_NOFAULT, %asi
4091	sethi	%hi(KERNBASE), %o5
4092	flush	%o5
4093	retl
4094	 membar	#Sync
4095END(us_dcache_flush_page)
4096
4097	.align 8
4098NENTRY(us3_dcache_flush_page)
4099	ldxa    [%g0] ASI_MCCR, %o1
4100	btst    MCCR_DCACHE_EN, %o1
4101	bz,pn   %icc, 1f
4102	 nop
4103	sethi   %hi(PAGE_SIZE), %o4
4104	or      %g0, (PAGE_SIZE - 1), %o3
4105	andn    %o0, %o3, %o0
41062:
4107	subcc   %o4, 32, %o4
4108	stxa    %g0, [%o0 + %o4] ASI_DCACHE_INVALIDATE
4109	membar  #Sync
4110	bne,pt  %icc, 2b
4111	 nop
41121:
4113	retl
4114	 nop
4115END(us3_dcache_flush_page)
4116
4117	.globl no_dcache_flush_page
4118ENTRY(no_dcache_flush_page)
4119	retl
4120	 nop
4121END(no_dcache_flush_page)
4122
4123/*
4124 * cache_flush_virt(va, len)
4125 *
4126 * Clear everything in that va range from D$.
4127 *
4128 */
4129	.align 8
4130NENTRY(cache_flush_virt)
4131	brz,pn	%o1, 2f		! What? nothing to clear?
4132	 add	%o0, %o1, %o2
4133	mov	0x1ff, %o3
4134	sllx	%o3, 5, %o3	! Generate mask for VA bits
4135	and	%o0, %o3, %o0
4136	and	%o2, %o3, %o2
4137	sub	%o2, %o1, %o4	! End < start? need to split flushes.
4138	brlz,pn	%o4, 1f
4139	 movrz	%o4, %o3, %o4	! If start == end we need to wrap
4140
4141	!! Clear from start to end
41421:
4143dlflush3:
4144	stxa	%g0, [%o0] ASI_DCACHE_TAG
4145	dec	16, %o4
4146	brgz,pt	%o4, 1b
4147	 inc	16, %o0
41482:
4149	sethi	%hi(KERNBASE), %o5
4150	flush	%o5
4151	membar	#Sync
4152	retl
4153	 nop
4154END(cache_flush_virt)
4155
4156/*
4157 * The following code is copied to a dedicated page,
4158 * and signals are `trampolined' off it.
4159 *
4160 * When this code is run, the stack looks like:
4161 *	[%sp]			128 bytes to which registers can be dumped
4162 *	[%sp + 128]		signal number (goes in %o0)
4163 *	[%sp + 128 + 4]		signal code (ignored)
4164 *	[%sp + 128 + 8]		siginfo pointer(goes in %o1)
4165 *	[%sp + 128 + 16]	first word of saved state (sigcontext)
4166 *	    .
4167 *	    .
4168 *	    .
4169 *	[%sp + NNN]		last word of saved state
4170 *	[%sp + ...]		siginfo structure
4171 * (followed by previous stack contents or top of signal stack).
4172 * The address of the function to call is in %g1; the old %g1 and %o0
4173 * have already been saved in the sigcontext.  We are running in a clean
4174 * window, all previous windows now being saved to the stack.
4175 */
4176	.section .rodata
4177	.globl	sigcode
4178sigcode:
4179	/*
4180	 * XXX  the `save' and `restore' below are unnecessary: should
4181	 *	replace with simple arithmetic on %sp
4182	 *
4183	 * Make room on the stack for 64 %f registers + %fsr.  This comes
4184	 * out to 64*4+8 or 264 bytes, but this must be aligned to a multiple
4185	 * of 64, or 320 bytes.
4186	 */
4187	save	%sp, -CC64FSZ - 320, %sp
4188	mov	%g2, %l2		! save globals in %l registers
4189	mov	%g3, %l3
4190	mov	%g4, %l4
4191	mov	%g5, %l5
4192	mov	%g6, %l6
4193	mov	%g7, %l7
4194	/*
4195	 * Saving the fpu registers is expensive, so do it iff it is
4196	 * enabled and dirty.
4197	 */
4198	rd	%fprs, %l0
4199	btst	FPRS_DL|FPRS_DU, %l0	! All clean?
4200	bz,pt	%icc, 2f
4201	 btst	FPRS_DL, %l0		! test dl
4202	bz,pt	%icc, 1f
4203	 btst	FPRS_DU, %l0		! test du
4204
4205	! fpu is enabled, oh well
4206	stx	%fsr, [%sp + CC64FSZ + BIAS + 0]
4207	add	%sp, BIAS+CC64FSZ+BLOCK_SIZE, %l0	! Generate a pointer so we can
4208	andn	%l0, BLOCK_ALIGN, %l0	! do a block store
4209	stda	%f0, [%l0] ASI_BLK_P
4210	inc	BLOCK_SIZE, %l0
4211	stda	%f16, [%l0] ASI_BLK_P
42121:
4213	bz,pt	%icc, 2f
4214	 add	%sp, BIAS+CC64FSZ+BLOCK_SIZE, %l0	! Generate a pointer so we can
4215	andn	%l0, BLOCK_ALIGN, %l0	! do a block store
4216	add	%l0, 2*BLOCK_SIZE, %l0	! and skip what we already stored
4217	stda	%f32, [%l0] ASI_BLK_P
4218	inc	BLOCK_SIZE, %l0
4219	stda	%f48, [%l0] ASI_BLK_P
42202:
4221	membar	#Sync
4222	rd	%fprs, %l0		! reload fprs copy, for checking after
4223	rd	%y, %l1			! in any case, save %y
4224	lduw	[%fp + BIAS + 128], %o0	! sig
4225	ldx	[%fp + BIAS + 128 + 8], %o1	! siginfo
4226	call	%g1			! (*sa->sa_handler)(sig, sip, scp)
4227	 add	%fp, BIAS + 128 + 16, %o2	! scp
4228	wr	%l1, %g0, %y		! in any case, restore %y
4229
4230	/*
4231	 * Now that the handler has returned, re-establish all the state
4232	 * we just saved above, then do a sigreturn.
4233	 */
4234	btst	FPRS_DL|FPRS_DU, %l0	! All clean?
4235	bz,pt	%icc, 2f
4236	 btst	FPRS_DL, %l0		! test dl
4237	bz,pt	%icc, 1f
4238	 btst	FPRS_DU, %l0		! test du
4239
4240	ldx	[%sp + CC64FSZ + BIAS + 0], %fsr
4241	add	%sp, BIAS+CC64FSZ+BLOCK_SIZE, %l0	! Generate a pointer so we can
4242	andn	%l0, BLOCK_ALIGN, %l0	! do a block load
4243	ldda	[%l0] ASI_BLK_P, %f0
4244	inc	BLOCK_SIZE, %l0
4245	ldda	[%l0] ASI_BLK_P, %f16
42461:
4247	bz,pt	%icc, 2f
4248	 nop
4249	add	%sp, BIAS+CC64FSZ+BLOCK_SIZE, %l0	! Generate a pointer so we can
4250	andn	%l0, BLOCK_ALIGN, %l0	! do a block load
4251	inc	2*BLOCK_SIZE, %l0	! and skip what we already loaded
4252	ldda	[%l0] ASI_BLK_P, %f32
4253	inc	BLOCK_SIZE, %l0
4254	ldda	[%l0] ASI_BLK_P, %f48
42552:
4256	mov	%l2, %g2
4257	mov	%l3, %g3
4258	mov	%l4, %g4
4259	mov	%l5, %g5
4260	mov	%l6, %g6
4261	mov	%l7, %g7
4262	membar	#Sync
4263
4264	restore	%g0, SYS_sigreturn, %g1 ! get registers back & set syscall #
4265	add	%sp, BIAS + 128 + 16, %o0	! compute scp
4266	.globl	sigcoderet
4267sigcoderet:
4268	.globl	sigcodecall
4269sigcodecall:
4270	t	ST_SYSCALL		! sigreturn(scp)
4271	! sigreturn does not return unless it fails
4272	.globl	esigcode
4273esigcode:
4274	/* FALLTHROUGH */
4275	.globl	sigfill
4276sigfill:
4277	unimp
4278esigfill:
4279
4280	.globl	sigfillsiz
4281sigfillsiz:
4282	.word	esigfill - sigfill
4283
4284	.text
4285
4286/*
4287 * Primitives
4288 */
4289
4290/*
4291 * getfp() - get unbiased stack frame pointer
4292 */
4293ENTRY(getfp)
4294	mov %fp, %o0
4295	retl
4296	 add	%o0, BIAS, %o0
4297
4298/*
4299 * _copyinstr(fromaddr, toaddr, maxlength, &lencopied)
4300 *
4301 * Copy a null terminated string from the user address space into
4302 * the kernel address space.
4303 */
4304ENTRY(_copyinstr)
4305	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
4306	brgz,pt	%o2, 1f					! Make sure len is valid
4307	 nop
4308	retl
4309	 mov	ENAMETOOLONG, %o0
43101:
4311	GET_CPCB(%o4)			! catch faults
4312	set	Lcsfault, %o5
4313	membar	#Sync
4314	stx	%o5, [%o4 + PCB_ONFAULT]
4315
4316	mov	%o1, %o5		!	save = toaddr;
4317! XXX should do this in bigger chunks when possible
43180:					! loop:
4319	ldsba	[%o0] ASI_AIUS, %g1	!	c = *fromaddr;
4320	stb	%g1, [%o1]		!	*toaddr++ = c;
4321	inc	%o1
4322	brz,a,pn	%g1, Lcsdone	!	if (c == NULL)
4323	 clr	%o0			!		{ error = 0; done; }
4324	deccc	%o2			!	if (--len > 0) {
4325	bg,pt	%icc, 0b		!		fromaddr++;
4326	 inc	%o0			!		goto loop;
4327	ba,pt	%xcc, Lcsdone		!	}
4328	 mov	ENAMETOOLONG, %o0	!	error = ENAMETOOLONG;
4329	NOTREACHED
4330
4331/*
4332 * copyoutstr(fromaddr, toaddr, maxlength, &lencopied)
4333 *
4334 * Copy a null terminated string from the kernel
4335 * address space to the user address space.
4336 */
4337ENTRY(copyoutstr)
4338	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
4339	brgz,pt	%o2, 1f					! Make sure len is valid
4340	 nop
4341	retl
4342	 mov	ENAMETOOLONG, %o0
43431:
4344	GET_CPCB(%o4)			! catch faults
4345	set	Lcsfault, %o5
4346	membar	#Sync
4347	stx	%o5, [%o4 + PCB_ONFAULT]
4348
4349	mov	%o1, %o5		!	save = toaddr;
4350! XXX should do this in bigger chunks when possible
43510:					! loop:
4352	ldsb	[%o0], %g1		!	c = *fromaddr;
4353	stba	%g1, [%o1] ASI_AIUS	!	*toaddr++ = c;
4354	inc	%o1
4355	brz,a,pn	%g1, Lcsdone	!	if (c == NULL)
4356	 clr	%o0			!		{ error = 0; done; }
4357	deccc	%o2			!	if (--len > 0) {
4358	bg,pt	%icc, 0b		!		fromaddr++;
4359	 inc	%o0			!		goto loop;
4360					!	}
4361	mov	ENAMETOOLONG, %o0	!	error = ENAMETOOLONG;
4362Lcsdone:				! done:
4363	sub	%o1, %o5, %o1		!	len = to - save;
4364	brnz,a	%o3, 1f			!	if (lencopied)
4365	 stx	%o1, [%o3]		!		*lencopied = len;
43661:
4367	retl				! cpcb->pcb_onfault = 0;
4368	 stx	%g0, [%o4 + PCB_ONFAULT]! return (error);
4369
4370Lcsfault:
4371	b	Lcsdone			! error = EFAULT;
4372	 mov	EFAULT, %o0		! goto ret;
4373
4374/*
4375 * copyin(src, dst, len)
4376 *
4377 * Copy specified amount of data from user space into the kernel.
4378 *
4379 * This is a modified version of bcopy that uses ASI_AIUS.  When
4380 * bcopy is optimized to use block copy ASIs, this should be also.
4381 */
4382
4383#define	BCOPY_SMALL	32	/* if < 32, copy by bytes */
4384
4385ENTRY(_copyin)
4386!	flushw			! Make sure we don't have stack probs & lose hibits of %o
4387	GET_CPCB(%o3)
4388	wr	%g0, ASI_AIUS, %asi
4389	set	Lcopyfault, %o4
4390	membar	#Sync
4391	stx	%o4, [%o3 + PCB_ONFAULT]
4392	cmp	%o2, BCOPY_SMALL
4393Lcopyin_start:
4394	bge,a	Lcopyin_fancy	! if >= this many, go be fancy.
4395	 btst	7, %o0		! (part of being fancy)
4396
4397	/*
4398	 * Not much to copy, just do it a byte at a time.
4399	 */
4400	deccc	%o2		! while (--len >= 0)
4401	bl	1f
44020:
4403	 inc	%o0
4404	ldsba	[%o0 - 1] %asi, %o4!	*dst++ = (++src)[-1];
4405	stb	%o4, [%o1]
4406	deccc	%o2
4407	bge	0b
4408	 inc	%o1
44091:
4410	ba	Lcopyin_done
4411	 clr	%o0
4412	NOTREACHED
4413
4414	/*
4415	 * Plenty of data to copy, so try to do it optimally.
4416	 */
4417Lcopyin_fancy:
4418	! check for common case first: everything lines up.
4419!	btst	7, %o0		! done already
4420	bne	1f
4421!	 XXX check no delay slot
4422	btst	7, %o1
4423	be,a	Lcopyin_doubles
4424	 dec	8, %o2		! if all lined up, len -= 8, goto copyin_doubes
4425
4426	! If the low bits match, we can make these line up.
44271:
4428	xor	%o0, %o1, %o3	! t = src ^ dst;
4429	btst	1, %o3		! if (t & 1) {
4430	be,a	1f
4431	 btst	1, %o0		! [delay slot: if (src & 1)]
4432
4433	! low bits do not match, must copy by bytes.
44340:
4435	ldsba	[%o0] %asi, %o4	!	do {
4436	inc	%o0		!		(++dst)[-1] = *src++;
4437	inc	%o1
4438	deccc	%o2
4439	bnz	0b		!	} while (--len != 0);
4440	 stb	%o4, [%o1 - 1]
4441	ba	Lcopyin_done
4442	 clr	%o0
4443	NOTREACHED
4444
4445	! lowest bit matches, so we can copy by words, if nothing else
44461:
4447	be,a	1f		! if (src & 1) {
4448	 btst	2, %o3		! [delay slot: if (t & 2)]
4449
4450	! although low bits match, both are 1: must copy 1 byte to align
4451	ldsba	[%o0] %asi, %o4	!	*dst++ = *src++;
4452	stb	%o4, [%o1]
4453	inc	%o0
4454	inc	%o1
4455	dec	%o2		!	len--;
4456	btst	2, %o3		! } [if (t & 2)]
44571:
4458	be,a	1f		! if (t & 2) {
4459	 btst	2, %o0		! [delay slot: if (src & 2)]
4460	dec	2, %o2		!	len -= 2;
44610:
4462	ldsha	[%o0] %asi, %o4	!	do {
4463	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
4464	inc	2, %o0		!		dst += 2, src += 2;
4465	deccc	2, %o2		!	} while ((len -= 2) >= 0);
4466	bge	0b
4467	 inc	2, %o1
4468	b	Lcopyin_mopb	!	goto mop_up_byte;
4469	 btst	1, %o2		! } [delay slot: if (len & 1)]
4470	NOTREACHED
4471
4472	! low two bits match, so we can copy by longwords
44731:
4474	be,a	1f		! if (src & 2) {
4475	 btst	4, %o3		! [delay slot: if (t & 4)]
4476
4477	! although low 2 bits match, they are 10: must copy one short to align
4478	ldsha	[%o0] %asi, %o4	!	(*short *)dst = *(short *)src;
4479	sth	%o4, [%o1]
4480	inc	2, %o0		!	dst += 2;
4481	inc	2, %o1		!	src += 2;
4482	dec	2, %o2		!	len -= 2;
4483	btst	4, %o3		! } [if (t & 4)]
44841:
4485	be,a	1f		! if (t & 4) {
4486	 btst	4, %o0		! [delay slot: if (src & 4)]
4487	dec	4, %o2		!	len -= 4;
44880:
4489	lduwa	[%o0] %asi, %o4	!	do {
4490	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
4491	inc	4, %o0		!		dst += 4, src += 4;
4492	deccc	4, %o2		!	} while ((len -= 4) >= 0);
4493	bge	0b
4494	 inc	4, %o1
4495	b	Lcopyin_mopw	!	goto mop_up_word_and_byte;
4496	 btst	2, %o2		! } [delay slot: if (len & 2)]
4497	NOTREACHED
4498
4499	! low three bits match, so we can copy by doublewords
45001:
4501	be	1f		! if (src & 4) {
4502	 dec	8, %o2		! [delay slot: len -= 8]
4503	lduwa	[%o0] %asi, %o4	!	*(int *)dst = *(int *)src;
4504	st	%o4, [%o1]
4505	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
4506	inc	4, %o1
4507	dec	4, %o2		! }
45081:
4509Lcopyin_doubles:
4510	ldxa	[%o0] %asi, %g1	! do {
4511	stx	%g1, [%o1]	!	*(double *)dst = *(double *)src;
4512	inc	8, %o0		!	dst += 8, src += 8;
4513	deccc	8, %o2		! } while ((len -= 8) >= 0);
4514	bge	Lcopyin_doubles
4515	 inc	8, %o1
4516
4517	! check for a usual case again (save work)
4518	btst	7, %o2		! if ((len & 7) == 0)
4519	be	Lcopyin_done	!	goto copyin_done;
4520
4521	 btst	4, %o2		! if ((len & 4) == 0)
4522	be,a	Lcopyin_mopw	!	goto mop_up_word_and_byte;
4523	 btst	2, %o2		! [delay slot: if (len & 2)]
4524	lduwa	[%o0] %asi, %o4	!	*(int *)dst = *(int *)src;
4525	st	%o4, [%o1]
4526	inc	4, %o0		!	dst += 4;
4527	inc	4, %o1		!	src += 4;
4528	btst	2, %o2		! } [if (len & 2)]
4529
45301:
4531	! mop up trailing word (if present) and byte (if present).
4532Lcopyin_mopw:
4533	be	Lcopyin_mopb	! no word, go mop up byte
4534	 btst	1, %o2		! [delay slot: if (len & 1)]
4535	ldsha	[%o0] %asi, %o4	! *(short *)dst = *(short *)src;
4536	be	Lcopyin_done	! if ((len & 1) == 0) goto done;
4537	 sth	%o4, [%o1]
4538	ldsba	[%o0 + 2] %asi, %o4	! dst[2] = src[2];
4539	stb	%o4, [%o1 + 2]
4540	ba	Lcopyin_done
4541	 clr	%o0
4542	NOTREACHED
4543
4544	! mop up trailing byte (if present).
4545Lcopyin_mopb:
4546	be,a	Lcopyin_done
4547	 nop
4548	ldsba	[%o0] %asi, %o4
4549	stb	%o4, [%o1]
4550
4551Lcopyin_done:
4552	GET_CPCB(%o3)
4553	membar	#Sync
4554	stx	%g0, [%o3 + PCB_ONFAULT]
4555	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
4556	retl
4557	 clr	%o0			! return 0
4558END(_copyin)
4559
4560/*
4561 * copyout(src, dst, len)
4562 *
4563 * Copy specified amount of data from kernel to user space.
4564 * Just like copyin, except that the `dst' addresses are user space
4565 * rather than the `src' addresses.
4566 *
4567 * This is a modified version of bcopy that uses ASI_AIUS.  When
4568 * bcopy is optimized to use block copy ASIs, this should be also.
4569 */
4570ENTRY(copyout)
4571Ldocopy:
4572	GET_CPCB(%o3)
4573	wr	%g0, ASI_AIUS, %asi
4574	set	Lcopyfault, %o4
4575	membar	#Sync
4576	stx	%o4, [%o3 + PCB_ONFAULT]
4577	cmp	%o2, BCOPY_SMALL
4578Lcopyout_start:
4579	membar	#StoreStore
4580	bge,a	Lcopyout_fancy	! if >= this many, go be fancy.
4581	 btst	7, %o0		! (part of being fancy)
4582
4583	/*
4584	 * Not much to copy, just do it a byte at a time.
4585	 */
4586	deccc	%o2		! while (--len >= 0)
4587	bl	1f
4588!	 XXX check no delay slot
45890:
4590	inc	%o0
4591	ldsb	[%o0 - 1], %o4!	(++dst)[-1] = *src++;
4592	stba	%o4, [%o1] %asi
4593	deccc	%o2
4594	bge	0b
4595	 inc	%o1
45961:
4597	ba	Lcopyout_done
4598	 clr	%o0
4599	NOTREACHED
4600
4601	/*
4602	 * Plenty of data to copy, so try to do it optimally.
4603	 */
4604Lcopyout_fancy:
4605	! check for common case first: everything lines up.
4606!	btst	7, %o0		! done already
4607	bne	1f
4608!	 XXX check no delay slot
4609	btst	7, %o1
4610	be,a	Lcopyout_doubles
4611	 dec	8, %o2		! if all lined up, len -= 8, goto copyout_doubes
4612
4613	! If the low bits match, we can make these line up.
46141:
4615	xor	%o0, %o1, %o3	! t = src ^ dst;
4616	btst	1, %o3		! if (t & 1) {
4617	be,a	1f
4618	 btst	1, %o0		! [delay slot: if (src & 1)]
4619
4620	! low bits do not match, must copy by bytes.
46210:
4622	ldsb	[%o0], %o4	!	do {
4623	inc	%o0		!		(++dst)[-1] = *src++;
4624	inc	%o1
4625	deccc	%o2
4626	bnz	0b		!	} while (--len != 0);
4627	 stba	%o4, [%o1 - 1] %asi
4628	ba	Lcopyout_done
4629	 clr	%o0
4630	NOTREACHED
4631
4632	! lowest bit matches, so we can copy by words, if nothing else
46331:
4634	be,a	1f		! if (src & 1) {
4635	 btst	2, %o3		! [delay slot: if (t & 2)]
4636
4637	! although low bits match, both are 1: must copy 1 byte to align
4638	ldsb	[%o0], %o4	!	*dst++ = *src++;
4639	stba	%o4, [%o1] %asi
4640	inc	%o0
4641	inc	%o1
4642	dec	%o2		!	len--;
4643	btst	2, %o3		! } [if (t & 2)]
46441:
4645	be,a	1f		! if (t & 2) {
4646	 btst	2, %o0		! [delay slot: if (src & 2)]
4647	dec	2, %o2		!	len -= 2;
46480:
4649	ldsh	[%o0], %o4	!	do {
4650	stha	%o4, [%o1] %asi	!		*(short *)dst = *(short *)src;
4651	inc	2, %o0		!		dst += 2, src += 2;
4652	deccc	2, %o2		!	} while ((len -= 2) >= 0);
4653	bge	0b
4654	 inc	2, %o1
4655	b	Lcopyout_mopb	!	goto mop_up_byte;
4656	 btst	1, %o2		! } [delay slot: if (len & 1)]
4657	NOTREACHED
4658
4659	! low two bits match, so we can copy by longwords
46601:
4661	be,a	1f		! if (src & 2) {
4662	 btst	4, %o3		! [delay slot: if (t & 4)]
4663
4664	! although low 2 bits match, they are 10: must copy one short to align
4665	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
4666	stha	%o4, [%o1] %asi
4667	inc	2, %o0		!	dst += 2;
4668	inc	2, %o1		!	src += 2;
4669	dec	2, %o2		!	len -= 2;
4670	btst	4, %o3		! } [if (t & 4)]
46711:
4672	be,a	1f		! if (t & 4) {
4673	 btst	4, %o0		! [delay slot: if (src & 4)]
4674	dec	4, %o2		!	len -= 4;
46750:
4676	lduw	[%o0], %o4	!	do {
4677	sta	%o4, [%o1] %asi	!		*(int *)dst = *(int *)src;
4678	inc	4, %o0		!		dst += 4, src += 4;
4679	deccc	4, %o2		!	} while ((len -= 4) >= 0);
4680	bge	0b
4681	 inc	4, %o1
4682	b	Lcopyout_mopw	!	goto mop_up_word_and_byte;
4683	 btst	2, %o2		! } [delay slot: if (len & 2)]
4684	NOTREACHED
4685
4686	! low three bits match, so we can copy by doublewords
46871:
4688	be	1f		! if (src & 4) {
4689	 dec	8, %o2		! [delay slot: len -= 8]
4690	lduw	[%o0], %o4	!	*(int *)dst = *(int *)src;
4691	sta	%o4, [%o1] %asi
4692	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
4693	inc	4, %o1
4694	dec	4, %o2		! }
46951:
4696Lcopyout_doubles:
4697	ldx	[%o0], %g1	! do {
4698	stxa	%g1, [%o1] %asi	!	*(double *)dst = *(double *)src;
4699	inc	8, %o0		!	dst += 8, src += 8;
4700	deccc	8, %o2		! } while ((len -= 8) >= 0);
4701	bge	Lcopyout_doubles
4702	 inc	8, %o1
4703
4704	! check for a usual case again (save work)
4705	btst	7, %o2		! if ((len & 7) == 0)
4706	be	Lcopyout_done	!	goto copyout_done;
4707
4708	 btst	4, %o2		! if ((len & 4) == 0)
4709	be,a	Lcopyout_mopw	!	goto mop_up_word_and_byte;
4710	 btst	2, %o2		! [delay slot: if (len & 2)]
4711	lduw	[%o0], %o4	!	*(int *)dst = *(int *)src;
4712	sta	%o4, [%o1] %asi
4713	inc	4, %o0		!	dst += 4;
4714	inc	4, %o1		!	src += 4;
4715	btst	2, %o2		! } [if (len & 2)]
4716
47171:
4718	! mop up trailing word (if present) and byte (if present).
4719Lcopyout_mopw:
4720	be	Lcopyout_mopb	! no word, go mop up byte
4721	 btst	1, %o2		! [delay slot: if (len & 1)]
4722	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
4723	be	Lcopyout_done	! if ((len & 1) == 0) goto done;
4724	 stha	%o4, [%o1] %asi
4725	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
4726	stba	%o4, [%o1 + 2] %asi
4727	ba	Lcopyout_done
4728	 clr	%o0
4729	NOTREACHED
4730
4731	! mop up trailing byte (if present).
4732Lcopyout_mopb:
4733	be,a	Lcopyout_done
4734	 nop
4735	ldsb	[%o0], %o4
4736	stba	%o4, [%o1] %asi
4737
4738Lcopyout_done:
4739	GET_CPCB(%o3)
4740	membar	#Sync
4741	stx	%g0, [%o3 + PCB_ONFAULT]
4742	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
4743	membar	#StoreStore|#StoreLoad
4744	retl			! New instr
4745	 clr	%o0			! return 0
4746END(copyout)
4747
4748ENTRY(copyin32)
4749	andcc	%o0, 0x3, %g0
4750	bnz,pn	%xcc, Lcopyfault
4751	 nop
4752	GET_CPCB(%o3)
4753	set	Lcopyfault, %o4
4754	membar	#Sync
4755	stx	%o4, [%o3 + PCB_ONFAULT]
4756	lduwa	[%o0] ASI_AIUS, %o2
4757	stw	%o2, [%o1]
4758	membar	#Sync
4759	stx	%g0, [%o3 + PCB_ONFAULT]
4760	retl
4761	 clr	%o0
4762END(copyin32)
4763
4764! Copyin or copyout fault.  Clear cpcb->pcb_onfault and return EFAULT.
4765Lcopyfault:
4766	GET_CPCB(%o3)
4767	stx	%g0, [%o3 + PCB_ONFAULT]
4768	membar	#StoreStore|#StoreLoad
4769	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
4770	retl
4771	 mov	EFAULT, %o0
4772
4773/*
4774 * cpu_switchto(struct proc *old, struct proc *new)
4775 *
4776 * Save the context of "old" and switch to "new".
4777 */
4778ENTRY(cpu_switchto)
4779	save	%sp, -CC64FSZ, %sp
4780	rdpr	%pstate, %o1		! oldpstate = %pstate;
4781	wrpr	%g0, PSTATE_INTR, %pstate ! make sure we're on normal globals
4782
4783	ldx	[%g7 + CI_CPCB], %l5
4784
4785	/*
4786	 * Register usage:
4787	 *
4788	 *	%i0 = oldproc
4789	 *	%i1 = newproc
4790	 *	%l1 = newpcb
4791	 *	%l2 = newpstate
4792	 *	%l5 = cpcb
4793	 *	%o0 = tmp 1
4794	 *	%o1 = oldpstate
4795	 *	%o2 = tmp 2
4796	 *	%o3 = vm
4797	 *	%o4 = sswap
4798	 *	%o5 = <free>
4799	 */
4800
4801	/*
4802	 * Committed to running process p.
4803	 */
4804#if defined(MULTIPROCESSOR)
4805	/*
4806	 * p->p_cpu = curcpu();
4807	 */
4808	ldx	[%g7 + CI_SELF], %o0
4809	stx	%o0, [%i1 + P_CPU]
4810#endif	/* defined(MULTIPROCESSOR) */
4811	mov	SONPROC, %o0			! newproc->p_stat = SONPROC
4812	stb	%o0, [%i1 + P_STAT]
4813	ldx	[%i1 + P_ADDR], %l1		! newpcb = newproc->p_addr;
4814
4815	flushw				! save all register windows except this one
4816
4817	/*
4818	 * Save the old process, if any; then load p.
4819	 */
4820	brz,pn	%i0, Lsw_load		! if no old process, go load
4821	 wrpr	%g0, PSTATE_KERN, %pstate
4822
4823	stx	%i6, [%l5 + PCB_SP]	! cpcb->pcb_sp = sp;
4824	stx	%i7, [%l5 + PCB_PC]	! cpcb->pcb_pc = pc;
4825	sth	%o1, [%l5 + PCB_PSTATE]	! cpcb->pcb_pstate = oldpstate;
4826	rdpr	%cwp, %o2		! Useless
4827	stb	%o2, [%l5 + PCB_CWP]
4828
4829	/*
4830	 * Load the new process.  To load, we must change stacks and
4831	 * alter cpcb and the window control registers, hence we must
4832	 * disable interrupts.
4833	 *
4834	 * We also must load up the `in' and `local' registers.
4835	 */
4836Lsw_load:
4837	/* set new cpcb and cpcbpaddr */
4838	stx	%i1, [%g7 + CI_CURPROC]		! curproc = newproc;
4839	ldx	[%i1 + P_MD_PCBPADDR], %o2
4840	stx	%l1, [%g7 + CI_CPCB]		! cpcb = newpcb;
4841	stx	%o2, [%g7 + CI_CPCBPADDR]
4842
4843	ldx	[%l1 + PCB_SP], %i6
4844	ldx	[%l1 + PCB_PC], %i7
4845
4846	/* finally, enable traps */
4847	wrpr	%g0, PSTATE_INTR, %pstate
4848
4849	/*
4850	 * Now running p.  Make sure it has a context so that it
4851	 * can talk about user space stuff.  (Its pcb_uw is currently
4852	 * zero so it is safe to have interrupts going here.)
4853	 */
4854	ldx	[%i1 + P_VMSPACE], %o3		! vm = newproc->p_vmspace;
4855	sethi	%hi(kernel_pmap_), %o1
4856	mov	CTX_SECONDARY, %l5		! Recycle %l5
4857	ldx	[%o3 + VM_PMAP], %o2		! if (vm->vm_pmap != kernel_pmap_)
4858	or	%o1, %lo(kernel_pmap_), %o1
4859	cmp	%o2, %o1
4860	bz,pn	%xcc, Lsw_havectx		! Don't replace kernel context!
4861	 ld	[%o2 + PM_CTX], %o0
4862	brnz,pt	%o0, Lsw_havectx		!	goto havecontext;
4863	 nop
4864
4865	/* p does not have a context: call ctx_alloc to get one */
4866	call	ctx_alloc			! ctx_alloc(&vm->vm_pmap);
4867	 mov	%o2, %o0
4868
4869	set	DEMAP_CTX_SECONDARY, %o1	! This context has been recycled
4870	stxa	%o0, [%l5] ASI_DMMU		! so we need to invalidate
4871	membar	#Sync
4872	stxa	%o1, [%o1] ASI_DMMU_DEMAP	! whatever bits of it may
4873	stxa	%o1, [%o1] ASI_IMMU_DEMAP	! be left in the TLB
4874	membar	#Sync
4875	/* p does have a context: just switch to it */
4876Lsw_havectx:
4877	! context is in %o0
4878	/*
4879	 * We probably need to flush the cache here.
4880	 */
4881	SET_MMU_CONTEXTID(%o0, %l5)		! Maybe we should invalidate the old context?
4882	membar	#Sync				! Maybe we should use flush here?
4883	flush	%sp
4884
4885!	wrpr	%g0, 0, %cleanwin	! DEBUG
4886	clr	%g4		! This needs to point to the base of the data segment
4887	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
4888	wrpr	%g0, PSTATE_INTR, %pstate
4889	ret
4890	 restore
4891END(cpu_switchto)
4892
4893/*
4894 * Snapshot the current process so that stack frames are up to date.
4895 * Only used just before a crash dump.
4896 */
4897ENTRY(snapshot)
4898	rdpr	%pstate, %o1		! save psr
4899	stx	%o6, [%o0 + PCB_SP]	! save sp
4900	rdpr	%pil, %o2
4901	sth	%o1, [%o0 + PCB_PSTATE]
4902	rdpr	%cwp, %o3
4903	stb	%o2, [%o0 + PCB_PIL]
4904	stb	%o3, [%o0 + PCB_CWP]
4905
4906	flushw
4907	save	%sp, -CC64FSZ, %sp
4908	flushw
4909	ret
4910	 restore
4911END(snapshot)
4912
4913/*
4914 * cpu_fork() arrange for proc_trampoline() to run
4915 * after a process gets chosen in mi_switch(). The stack frame will
4916 * contain a function pointer in %l0, and an argument to pass to it in %l1.
4917 *
4918 * If the function *(%l0) returns, we arrange for an immediate return
4919 * to user mode. This happens in two known cases: after execve(2) of init,
4920 * and when returning a child to user mode after a fork(2).
4921 */
4922ENTRY(proc_trampoline)
4923	save	%sp, -CC64FSZ, %sp
4924	call	proc_trampoline_mi
4925	 nop
4926	restore
4927	call	%l0			! re-use current frame
4928	 mov	%l1, %o0
4929
4930	/*
4931	 * Here we finish up as in syscall, but simplified.
4932	 */
4933!	save	%sp, -CC64FSZ, %sp		! Save a kernel frame to emulate a syscall
4934	mov	PSTATE_USER, %g1		! XXXX user pstate (no need to load it)
4935	sllx	%g1, TSTATE_PSTATE_SHIFT, %g1	! Shift it into place
4936	rdpr	%cwp, %g5			! Fixup %cwp in %tstate
4937	or	%g1, %g5, %g1
4938	stx	%g1, [%sp + CC64FSZ + BIAS + TF_TSTATE]
4939	ba,a,pt	%icc, return_from_trap
4940	 nop
4941END(proc_trampoline)
4942
4943#ifdef DDB
4944
4945/*
4946 * The following probably need to be changed, but to what I don't know.
4947 */
4948
4949/*
4950 * u_int64_t
4951 * probeget(addr, asi, size)
4952 *	paddr_t addr;
4953 *	int asi;
4954 *	int size;
4955 *
4956 * Read a (byte,short,int,long) from the given address.
4957 * Like copyin but our caller is supposed to know what he is doing...
4958 * the address can be anywhere.
4959 *
4960 * We optimize for space, rather than time, here.
4961 */
4962ENTRY(probeget)
4963	mov	%o2, %o4
4964	! %o0 = addr, %o1 = asi, %o4 = (1,2,4)
4965	GET_CPCB(%o2)			! cpcb->pcb_onfault = Lfsprobe;
4966	set	Lfsprobe, %o5
4967	stx	%o5, [%o2 + PCB_ONFAULT]
4968	or	%o0, 0x9, %o3		! if (PHYS_ASI(asi)) {
4969	sub	%o3, 0x1d, %o3
4970	brz,a	%o3, 0f
4971	 mov	%g0, %o5
4972					! }
49730:
4974	btst	1, %o4
4975	wr	%o1, 0, %asi
4976	membar	#Sync
4977	bz	0f			! if (len & 1)
4978	 btst	2, %o4
4979	ba,pt	%icc, 1f
4980	 lduba	[%o0] %asi, %o0		!	value = *(char *)addr;
49810:
4982	bz	0f			! if (len & 2)
4983	 btst	4, %o4
4984	ba,pt	%icc, 1f
4985	 lduha	[%o0] %asi, %o0		!	value = *(short *)addr;
49860:
4987	bz	0f			! if (len & 4)
4988	 btst	8, %o4
4989	ba,pt	%icc, 1f
4990	 lda	[%o0] %asi, %o0		!	value = *(int *)addr;
49910:
4992	ldxa	[%o0] %asi, %o0		!	value = *(long *)addr;
49931:
4994	membar	#Sync
4995	brz	%o5, 1f			! if (cache flush addr != 0)
4996	 nop
49971:
4998	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
4999	stx	%g0, [%o2 + PCB_ONFAULT]
5000	retl				! made it, clear onfault and return
5001	 membar	#StoreStore|#StoreLoad
5002END(probeget)
5003
5004	/*
5005	 * Fault handler for probeget
5006	 */
5007	.globl	Lfsprobe
5008Lfsprobe:
5009	stx	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
5010	mov	-1, %o1
5011	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
5012	membar	#StoreStore|#StoreLoad
5013	retl				! and return error indicator
5014	 mov	-1, %o0
5015END(Lfsprobe)
5016#endif	/* DDB */
5017
5018/*
5019 * pmap_zero_phys(pa)
5020 *
5021 * Zero one page physically addressed
5022 *
5023 * Block load/store ASIs do not exist for physical addresses,
5024 * so we won't use them.
5025 *
5026 * While we do the zero operation, we also need to blast away
5027 * the contents of the D$.  We will execute a flush at the end
5028 * to sync the I$.
5029 */
5030	.text
5031ENTRY(pmap_zero_phys)
5032	set	NBPG, %o2		! Loop count
5033	clr	%o1
50341:
5035	dec	8, %o2
5036	stxa	%g0, [%o0] ASI_PHYS_CACHED
5037	inc	8, %o0
5038dlflush4:
5039	stxa	%g0, [%o1] ASI_DCACHE_TAG
5040	brgz	%o2, 1b
5041	 inc	16, %o1
5042
5043	sethi	%hi(KERNBASE), %o3
5044	flush	%o3
5045	retl
5046	 nop
5047 END(pmap_zero_phys)
5048
5049/*
5050 * pmap_copy_phys(src, dst)
5051 *
5052 * Copy one page physically addressed
5053 */
5054ENTRY(pmap_copy_phys)
5055	set	NBPG, %o3
5056	add	%o3, %o0, %o3
50571:
5058	ldxa	[%o0] ASI_PHYS_CACHED, %o4
5059	inc	8, %o0
5060	cmp	%o0, %o3
5061	stxa	%o4, [%o1] ASI_PHYS_CACHED
5062	blu,pt	%xcc, 1b
5063	 inc	8, %o1
5064	retl
5065	 nop
5066END(pmap_copy_phys)
5067
5068/*
5069 * extern int64_t pseg_get(struct pmap* %o0, vaddr_t addr %o1);
5070 *
5071 * Return TTE at addr in pmap.  Uses physical addressing only.
5072 * pmap->pm_physaddr must by the physical address of pm_segs
5073 *
5074 */
5075ENTRY(pseg_get)
5076	ldx	[%o0 + PM_PHYS], %o2			! pmap->pm_segs
5077
5078	srax	%o1, HOLESHIFT, %o3			! Check for valid address
5079	brz,pt	%o3, 0f					! Should be zero or -1
5080	 inc	%o3					! Make -1 -> 0
5081	brnz,pn	%o3, 1f					! Error! In hole!
50820:
5083	srlx	%o1, STSHIFT, %o3
5084	and	%o3, STMASK, %o3			! Index into pm_segs
5085	sll	%o3, 3, %o3
5086	add	%o2, %o3, %o2
5087	ldxa	[%o2] ASI_PHYS_CACHED, %o2		! Load page directory pointer
5088
5089	srlx	%o1, PDSHIFT, %o3
5090	and	%o3, PDMASK, %o3
5091	sll	%o3, 3, %o3
5092	brz,pn	%o2, 1f					! NULL entry? check somewhere else
5093	 add	%o2, %o3, %o2
5094	ldxa	[%o2] ASI_PHYS_CACHED, %o2		! Load page table pointer
5095
5096	srlx	%o1, PTSHIFT, %o3			! Convert to ptab offset
5097	and	%o3, PTMASK, %o3
5098	sll	%o3, 3, %o3
5099	brz,pn	%o2, 1f					! NULL entry? check somewhere else
5100	 add	%o2, %o3, %o2
5101	ldxa	[%o2] ASI_PHYS_CACHED, %o0
5102	brgez,pn %o0, 1f				! Entry invalid?  Punt
5103	 nop
5104	retl
5105	 nop
51061:
5107	retl
5108	 clr	%o0
5109END(pseg_get)
5110
5111/*
5112 * extern int pseg_set(struct pmap* %o0, vaddr_t addr %o1, int64_t tte %o2,
5113 *			paddr_t spare %o3);
5114 *
5115 * Set a pseg entry to a particular TTE value.  Returns 0 on success,
5116 * 1 if it needs to fill a pseg, 2 if it succeeded but didn't need the
5117 * spare page, and -1 if the address is in the virtual hole.
5118 * (NB: nobody in pmap checks for the virtual hole, so the system will hang.)
5119 * Allocate a page, pass the phys addr in as the spare, and try again.
5120 * If spare is not NULL it is assumed to be the address of a zeroed physical
5121 * page that can be used to generate a directory table or page table if needed.
5122 *
5123 */
5124ENTRY(pseg_set)
5125	!!
5126	!! However we managed to get here we now have:
5127	!!
5128	!! %o0 = *pmap
5129	!! %o1 = addr
5130	!! %o2 = tte
5131	!! %o3 = spare
5132	!!
5133	srax	%o1, HOLESHIFT, %o4			! Check for valid address
5134	brz,pt	%o4, 0f					! Should be zero or -1
5135	 inc	%o4					! Make -1 -> 0
5136	brz,pt	%o4, 0f
5137	 nop
5138	mov	-1, %o0					! Error -- in hole!
5139	retl
5140	 mov	-1, %o1
51410:
5142	ldx	[%o0 + PM_PHYS], %o4			! pmap->pm_segs
5143	srlx	%o1, STSHIFT, %o5
5144	and	%o5, STMASK, %o5
5145	sll	%o5, 3, %o5
5146	add	%o4, %o5, %o4
51472:
5148	ldxa	[%o4] ASI_PHYS_CACHED, %o5		! Load page directory pointer
5149
5150	brnz,a,pt	%o5, 0f				! Null pointer?
5151	 mov	%o5, %o4
5152	brz,pn	%o3, 1f					! Have a spare?
5153	 mov	%o3, %o5
5154	casxa	[%o4] ASI_PHYS_CACHED, %g0, %o5
5155	brnz,pn	%o5, 2b					! Something changed?
5156	mov	%o3, %o4
5157	clr	%o3					! Mark spare as used
51580:
5159	srlx	%o1, PDSHIFT, %o5
5160	and	%o5, PDMASK, %o5
5161	sll	%o5, 3, %o5
5162	add	%o4, %o5, %o4
51632:
5164	ldxa	[%o4] ASI_PHYS_CACHED, %o5		! Load table directory pointer
5165
5166	brnz,a,pt	%o5, 0f				! Null pointer?
5167	 mov	%o5, %o4
5168	brz,pn	%o3, 1f					! Have a spare?
5169	 mov	%o3, %o5
5170	casxa	[%o4] ASI_PHYS_CACHED, %g0, %o5
5171	brnz,pn	%o5, 2b					! Something changed?
5172	mov	%o3, %o4
5173	clr	%o3					! Mark spare as used
51740:
5175	srlx	%o1, PTSHIFT, %o5			! Convert to ptab offset
5176	and	%o5, PTMASK, %o5
5177	sll	%o5, 3, %o5
5178	add	%o5, %o4, %o4
5179	stxa	%o2, [%o4] ASI_PHYS_CACHED		! Easier than shift+or
5180	mov	2, %o0					! spare unused?
5181	retl
5182	 movrz	%o3, %g0, %o0				! No. return 0
51831:
5184	retl
5185	 mov	1, %o0
5186END(pseg_set)
5187
5188
5189/*
5190 * memcpy(dst, src, len) - always copies forward.
5191 */
5192ENTRY(memcpy) /* dest, src, size */
5193	cmp	%o2, BCOPY_SMALL! (check length for doublecopy first)
5194Lmemcpy_start:
5195	bge,pt	%xcc, 2f	! if >= this many, go be fancy.
5196	 nop
5197
5198	mov	%o0, %o5	! Save memcpy return value
5199	/*
5200	 * Not much to copy, just do it a byte at a time.
5201	 */
5202	deccc	%o2		! while (--len >= 0)
5203	bl	1f
5204!	 XXX check no delay slot
52050:
5206	inc	%o1
5207	ldsb	[%o1 - 1], %o4	!	(++dst)[-1] = *src++;
5208	stb	%o4, [%o0]
5209	deccc	%o2
5210	bge	0b
5211	 inc	%o0
52121:
5213	retl
5214	 mov	%o5, %o0
5215	NOTREACHED
5216
5217	/*
5218	 * Plenty of data to copy, so try to do it optimally.
5219	 */
52202:
5221Lbcopy_fancy:
5222
5223	!!
5224	!! First align the output to a 8-byte entity
5225	!!
5226
5227	save	%sp, -CC64FSZ, %sp
5228
5229	mov	%i1, %l0
5230	mov	%i0, %l1
5231
5232	mov	%i2, %l2
5233	btst	1, %l1
5234
5235	bz,pt	%icc, 4f
5236	 btst	2, %l1
5237	ldub	[%l0], %l4				! Load 1st byte
5238
5239	deccc	1, %l2
5240	ble,pn	%xcc, Lbcopy_finish			! XXXX
5241	 inc	1, %l0
5242
5243	stb	%l4, [%l1]				! Store 1st byte
5244	inc	1, %l1					! Update address
5245	btst	2, %l1
52464:
5247	bz,pt	%icc, 4f
5248
5249	 btst	1, %l0
5250	bz,a	1f
5251	 lduh	[%l0], %l4				! Load short
5252
5253	ldub	[%l0], %l4				! Load bytes
5254
5255	ldub	[%l0+1], %l3
5256	sllx	%l4, 8, %l4
5257	or	%l3, %l4, %l4
5258
52591:
5260	deccc	2, %l2
5261	ble,pn	%xcc, Lbcopy_finish			! XXXX
5262	 inc	2, %l0
5263	sth	%l4, [%l1]				! Store 1st short
5264
5265	inc	2, %l1
52664:
5267	btst	4, %l1
5268	bz,pt	%xcc, 4f
5269
5270	 btst	3, %l0
5271	bz,a,pt	%xcc, 1f
5272	 lduw	[%l0], %l4				! Load word -1
5273
5274	btst	1, %l0
5275	bz,a,pt	%icc, 2f
5276	 lduh	[%l0], %l4
5277
5278	ldub	[%l0], %l4
5279
5280	lduh	[%l0+1], %l3
5281	sllx	%l4, 16, %l4
5282	or	%l4, %l3, %l4
5283
5284	ldub	[%l0+3], %l3
5285	sllx	%l4, 8, %l4
5286	ba,pt	%icc, 1f
5287	 or	%l4, %l3, %l4
5288
52892:
5290	lduh	[%l0+2], %l3
5291	sllx	%l4, 16, %l4
5292	or	%l4, %l3, %l4
5293
52941:
5295	deccc	4, %l2
5296	ble,pn	%xcc, Lbcopy_finish		! XXXX
5297	 inc	4, %l0
5298
5299	st	%l4, [%l1]				! Store word
5300	inc	4, %l1
53014:
5302	!!
5303	!! We are now 32-bit aligned in the dest.
5304	!!
5305Lbcopy_common:
5306
5307	and	%l0, 7, %l4				! Shift amount
5308	andn	%l0, 7, %l0				! Source addr
5309
5310	brz,pt	%l4, Lbcopy_noshift8			! No shift version...
5311
5312	 sllx	%l4, 3, %l4				! In bits
5313	mov	8<<3, %l3
5314
5315	ldx	[%l0], %o0				! Load word -1
5316	sub	%l3, %l4, %l3				! Reverse shift
5317	deccc	12*8, %l2				! Have enough room?
5318
5319	sllx	%o0, %l4, %o0
5320	bl,pn	%xcc, 2f
5321	 and	%l3, 0x38, %l3
5322Lbcopy_unrolled8:
5323
5324	/*
5325	 * This is about as close to optimal as you can get, since
5326	 * the shifts require EU0 and cannot be paired, and you have
5327	 * 3 dependent operations on the data.
5328	 */
5329
5330!	ldx	[%l0+0*8], %o0				! Already done
5331!	sllx	%o0, %l4, %o0				! Already done
5332	ldx	[%l0+1*8], %o1
5333	ldx	[%l0+2*8], %o2
5334	ldx	[%l0+3*8], %o3
5335	ldx	[%l0+4*8], %o4
5336	ba,pt	%icc, 1f
5337	 ldx	[%l0+5*8], %o5
5338	.align	8
53391:
5340	srlx	%o1, %l3, %g1
5341	inc	6*8, %l0
5342
5343	sllx	%o1, %l4, %o1
5344	or	%g1, %o0, %g6
5345	ldx	[%l0+0*8], %o0
5346
5347	stx	%g6, [%l1+0*8]
5348	srlx	%o2, %l3, %g1
5349
5350	sllx	%o2, %l4, %o2
5351	or	%g1, %o1, %g6
5352	ldx	[%l0+1*8], %o1
5353
5354	stx	%g6, [%l1+1*8]
5355	srlx	%o3, %l3, %g1
5356
5357	sllx	%o3, %l4, %o3
5358	or	%g1, %o2, %g6
5359	ldx	[%l0+2*8], %o2
5360
5361	stx	%g6, [%l1+2*8]
5362	srlx	%o4, %l3, %g1
5363
5364	sllx	%o4, %l4, %o4
5365	or	%g1, %o3, %g6
5366	ldx	[%l0+3*8], %o3
5367
5368	stx	%g6, [%l1+3*8]
5369	srlx	%o5, %l3, %g1
5370
5371	sllx	%o5, %l4, %o5
5372	or	%g1, %o4, %g6
5373	ldx	[%l0+4*8], %o4
5374
5375	stx	%g6, [%l1+4*8]
5376	srlx	%o0, %l3, %g1
5377	deccc	6*8, %l2				! Have enough room?
5378
5379	sllx	%o0, %l4, %o0				! Next loop
5380	or	%g1, %o5, %g6
5381	ldx	[%l0+5*8], %o5
5382
5383	stx	%g6, [%l1+5*8]
5384	bge,pt	%xcc, 1b
5385	 inc	6*8, %l1
5386
5387Lbcopy_unrolled8_cleanup:
5388	!!
5389	!! Finished 8 byte block, unload the regs.
5390	!!
5391	srlx	%o1, %l3, %g1
5392	inc	5*8, %l0
5393
5394	sllx	%o1, %l4, %o1
5395	or	%g1, %o0, %g6
5396
5397	stx	%g6, [%l1+0*8]
5398	srlx	%o2, %l3, %g1
5399
5400	sllx	%o2, %l4, %o2
5401	or	%g1, %o1, %g6
5402
5403	stx	%g6, [%l1+1*8]
5404	srlx	%o3, %l3, %g1
5405
5406	sllx	%o3, %l4, %o3
5407	or	%g1, %o2, %g6
5408
5409	stx	%g6, [%l1+2*8]
5410	srlx	%o4, %l3, %g1
5411
5412	sllx	%o4, %l4, %o4
5413	or	%g1, %o3, %g6
5414
5415	stx	%g6, [%l1+3*8]
5416	srlx	%o5, %l3, %g1
5417
5418	sllx	%o5, %l4, %o5
5419	or	%g1, %o4, %g6
5420
5421	stx	%g6, [%l1+4*8]
5422	inc	5*8, %l1
5423
5424	mov	%o5, %o0				! Save our unused data
5425	dec	5*8, %l2
54262:
5427	inccc	12*8, %l2
5428	bz,pn	%icc, Lbcopy_complete
5429
5430	!! Unrolled 8 times
5431Lbcopy_aligned8:
5432!	ldx	[%l0], %o0				! Already done
5433!	sllx	%o0, %l4, %o0				! Shift high word
5434
5435	 deccc	8, %l2					! Pre-decrement
5436	bl,pn	%xcc, Lbcopy_finish
54371:
5438	ldx	[%l0+8], %o1				! Load word 0
5439	inc	8, %l0
5440
5441	srlx	%o1, %l3, %g6
5442	or	%g6, %o0, %g6				! Combine
5443
5444	stx	%g6, [%l1]				! Store result
5445	 inc	8, %l1
5446
5447	deccc	8, %l2
5448	bge,pn	%xcc, 1b
5449	 sllx	%o1, %l4, %o0
5450
5451	btst	7, %l2					! Done?
5452	bz,pt	%xcc, Lbcopy_complete
5453
5454	!!
5455	!! Loadup the last dregs into %o0 and shift it into place
5456	!!
5457	 srlx	%l3, 3, %g6				! # bytes in %o0
5458	dec	8, %g6					!  - 8
5459	!! n-8 - (by - 8) -> n - by
5460	subcc	%l2, %g6, %g0				! # bytes we need
5461	ble,pt	%icc, Lbcopy_finish
5462	 nop
5463	ldx	[%l0+8], %o1				! Need another word
5464	srlx	%o1, %l3, %o1
5465	ba,pt	%icc, Lbcopy_finish
5466	 or	%o0, %o1, %o0				! All loaded up.
5467
5468Lbcopy_noshift8:
5469	deccc	6*8, %l2				! Have enough room?
5470	bl,pn	%xcc, 2f
5471	 nop
5472	ba,pt	%icc, 1f
5473	 nop
5474	.align	32
54751:
5476	ldx	[%l0+0*8], %o0
5477	ldx	[%l0+1*8], %o1
5478	ldx	[%l0+2*8], %o2
5479	stx	%o0, [%l1+0*8]
5480	stx	%o1, [%l1+1*8]
5481	stx	%o2, [%l1+2*8]
5482
5483
5484	ldx	[%l0+3*8], %o3
5485	ldx	[%l0+4*8], %o4
5486	ldx	[%l0+5*8], %o5
5487	inc	6*8, %l0
5488	stx	%o3, [%l1+3*8]
5489	deccc	6*8, %l2
5490	stx	%o4, [%l1+4*8]
5491	stx	%o5, [%l1+5*8]
5492	bge,pt	%xcc, 1b
5493	 inc	6*8, %l1
54942:
5495	inc	6*8, %l2
54961:
5497	deccc	8, %l2
5498	bl,pn	%icc, 1f				! < 0 --> sub word
5499	 nop
5500	ldx	[%l0], %g6
5501	inc	8, %l0
5502	stx	%g6, [%l1]
5503	bg,pt	%icc, 1b				! Exactly 0 --> done
5504	 inc	8, %l1
55051:
5506	btst	7, %l2					! Done?
5507	bz,pt	%xcc, Lbcopy_complete
5508	 clr	%l4
5509	ldx	[%l0], %o0
5510Lbcopy_finish:
5511
5512	brz,pn	%l2, 2f					! 100% complete?
5513	 cmp	%l2, 8					! Exactly 8 bytes?
5514	bz,a,pn	%xcc, 2f
5515	 stx	%o0, [%l1]
5516
5517	btst	4, %l2					! Word store?
5518	bz	%xcc, 1f
5519	 srlx	%o0, 32, %g6				! Shift high word down
5520	stw	%g6, [%l1]
5521	inc	4, %l1
5522	mov	%o0, %g6				! Operate on the low bits
55231:
5524	btst	2, %l2
5525	mov	%g6, %o0
5526	bz	1f
5527	 srlx	%o0, 16, %g6
5528
5529	sth	%g6, [%l1]				! Store short
5530	inc	2, %l1
5531	mov	%o0, %g6				! Operate on low bytes
55321:
5533	mov	%g6, %o0
5534	btst	1, %l2					! Byte aligned?
5535	bz	2f
5536	 srlx	%o0, 8, %g6
5537
5538	stb	%g6, [%l1]				! Store last byte
5539	inc	1, %l1					! Update address
55402:
5541Lbcopy_complete:
5542	ret
5543	 restore %i0, %g0, %o0
5544END(memcpy)
5545
5546/*
5547 * bzero(addr, len)
5548 *
5549 * XXXXX To produce more efficient code, we do not allow lengths
5550 * greater than 0x80000000000000000, which are negative numbers.
5551 * This should not really be an issue since the VA hole should
5552 * cause any such ranges to fail anyway.
5553 */
5554ENTRY(bzero)
5555	! %o0 = addr, %o1 = len
5556	mov	%o1, %o2
5557	clr	%o1			! Initialize our pattern
5558/*
5559 * memset(addr, c, len)
5560 */
5561ENTRY(memset)
5562	! %o0 = addr, %o1 = pattern, %o2 = len
5563	mov	%o0, %o4		! Save original pointer
5564
5565Lbzero_internal:
5566	btst	7, %o0			! Word aligned?
5567	bz,pn	%xcc, 0f
5568	 nop
5569	inc	%o0
5570	deccc	%o2			! Store up to 7 bytes
5571	bge,a,pt	%xcc, Lbzero_internal
5572	 stb	%o1, [%o0 - 1]
5573
5574	retl				! Duplicate Lbzero_done
5575	 mov	%o4, %o0
55760:
5577	/*
5578	 * Duplicate the pattern so it fills 64-bits.
5579	 */
5580	andcc	%o1, 0x0ff, %o1		! No need to extend zero
5581	bz,pt	%icc, 1f
5582	 sllx	%o1, 8, %o3		! sigh.  all dependent instructions.
5583	or	%o1, %o3, %o1
5584	sllx	%o1, 16, %o3
5585	or	%o1, %o3, %o1
5586	sllx	%o1, 32, %o3
5587	 or	%o1, %o3, %o1
55881:
5589	 deccc	8, %o2
5590Lbzero_longs:
5591	bl,pn	%xcc, Lbzero_cleanup	! Less than 8 bytes left
5592	 nop
55933:
5594	inc	8, %o0
5595	deccc	8, %o2
5596	bge,pt	%xcc, 3b
5597	 stx	%o1, [%o0 - 8]		! Do 1 longword at a time
5598
5599	/*
5600	 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
5601	 * -6 => two bytes, etc.  Mop up this remainder, if any.
5602	 */
5603Lbzero_cleanup:
5604	btst	4, %o2
5605	bz,pt	%xcc, 5f		! if (len & 4) {
5606	 nop
5607	stw	%o1, [%o0]		!	*(int *)addr = 0;
5608	inc	4, %o0			!	addr += 4;
56095:
5610	btst	2, %o2
5611	bz,pt	%xcc, 7f		! if (len & 2) {
5612	 nop
5613	sth	%o1, [%o0]		!	*(short *)addr = 0;
5614	inc	2, %o0			!	addr += 2;
56157:
5616	btst	1, %o2
5617	bnz,a	%icc, Lbzero_done	! if (len & 1)
5618	 stb	%o1, [%o0]		!	*addr = 0;
5619Lbzero_done:
5620	retl
5621	 mov	%o4, %o0		! Restore pointer for memset (ugh)
5622END(memset)
5623
5624/*
5625 * kcopy() is exactly like bcopy except that it sets pcb_onfault such that
5626 * when a fault occurs, it is able to return EFAULT to indicate this to the
5627 * caller.
5628 */
5629ENTRY(kcopy)
5630	GET_CPCB(%o5)			! cpcb->pcb_onfault = Lkcerr;
5631	set	Lkcerr, %o3
5632	ldx	[%o5 + PCB_ONFAULT], %g1! save current onfault handler
5633	membar	#LoadStore
5634	stx	%o3, [%o5 + PCB_ONFAULT]
5635	membar	#StoreStore|#StoreLoad
5636
5637	cmp	%o2, BCOPY_SMALL
5638Lkcopy_start:
5639	bge,a	Lkcopy_fancy	! if >= this many, go be fancy.
5640	 btst	7, %o0		! (part of being fancy)
5641
5642	/*
5643	 * Not much to copy, just do it a byte at a time.
5644	 */
5645	deccc	%o2		! while (--len >= 0)
5646	bl	1f
5647!	 XXX check no delay slot
56480:
5649	ldsb	[%o0], %o4	!	*dst++ = *src++;
5650	inc	%o0
5651	stb	%o4, [%o1]
5652	deccc	%o2
5653	bge	0b
5654	 inc	%o1
56551:
5656	membar	#Sync		! Make sure all faults are processed
5657	stx	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
5658	membar	#StoreStore|#StoreLoad
5659	retl
5660	 clr	%o0
5661	NOTREACHED
5662
5663	/*
5664	 * Plenty of data to copy, so try to do it optimally.
5665	 */
5666Lkcopy_fancy:
5667	! check for common case first: everything lines up.
5668!	btst	7, %o0		! done already
5669	bne	1f
5670!	 XXX check no delay slot
5671	btst	7, %o1
5672	be,a	Lkcopy_doubles
5673	 dec	8, %o2		! if all lined up, len -= 8, goto kcopy_doubes
5674
5675	! If the low bits match, we can make these line up.
56761:
5677	xor	%o0, %o1, %o3	! t = src ^ dst;
5678	btst	1, %o3		! if (t & 1) {
5679	be,a	1f
5680	 btst	1, %o0		! [delay slot: if (src & 1)]
5681
5682	! low bits do not match, must copy by bytes.
56830:
5684	ldsb	[%o0], %o4	!	do {
5685	inc	%o0		!		*dst++ = *src++;
5686	stb	%o4, [%o1]
5687	deccc	%o2
5688	bnz	0b		!	} while (--len != 0);
5689	 inc	%o1
5690	membar	#Sync		! Make sure all traps are taken
5691	stx	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
5692	membar	#StoreStore|#StoreLoad
5693	retl
5694	 clr	%o0
5695	NOTREACHED
5696
5697	! lowest bit matches, so we can copy by words, if nothing else
56981:
5699	be,a	1f		! if (src & 1) {
5700	 btst	2, %o3		! [delay slot: if (t & 2)]
5701
5702	! although low bits match, both are 1: must copy 1 byte to align
5703	ldsb	[%o0], %o4	!	*dst++ = *src++;
5704	inc	%o0
5705	stb	%o4, [%o1]
5706	dec	%o2		!	len--;
5707	inc	%o1
5708	btst	2, %o3		! } [if (t & 2)]
57091:
5710	be,a	1f		! if (t & 2) {
5711	 btst	2, %o0		! [delay slot: if (src & 2)]
5712	dec	2, %o2		!	len -= 2;
57130:
5714	ldsh	[%o0], %o4	!	do {
5715	inc	2, %o0		!		dst += 2, src += 2;
5716	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
5717	deccc	2, %o2		!	} while ((len -= 2) >= 0);
5718	bge	0b
5719	 inc	2, %o1
5720	b	Lkcopy_mopb	!	goto mop_up_byte;
5721	 btst	1, %o2		! } [delay slot: if (len & 1)]
5722	NOTREACHED
5723
5724	! low two bits match, so we can copy by longwords
57251:
5726	be,a	1f		! if (src & 2) {
5727	 btst	4, %o3		! [delay slot: if (t & 4)]
5728
5729	! although low 2 bits match, they are 10: must copy one short to align
5730	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
5731	inc	2, %o0		!	dst += 2;
5732	sth	%o4, [%o1]
5733	dec	2, %o2		!	len -= 2;
5734	inc	2, %o1		!	src += 2;
5735	btst	4, %o3		! } [if (t & 4)]
57361:
5737	be,a	1f		! if (t & 4) {
5738	 btst	4, %o0		! [delay slot: if (src & 4)]
5739	dec	4, %o2		!	len -= 4;
57400:
5741	ld	[%o0], %o4	!	do {
5742	inc	4, %o0		!		dst += 4, src += 4;
5743	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
5744	deccc	4, %o2		!	} while ((len -= 4) >= 0);
5745	bge	0b
5746	 inc	4, %o1
5747	b	Lkcopy_mopw	!	goto mop_up_word_and_byte;
5748	 btst	2, %o2		! } [delay slot: if (len & 2)]
5749	NOTREACHED
5750
5751	! low three bits match, so we can copy by doublewords
57521:
5753	be	1f		! if (src & 4) {
5754	 dec	8, %o2		! [delay slot: len -= 8]
5755	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
5756	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
5757	st	%o4, [%o1]
5758	dec	4, %o2		! }
5759	inc	4, %o1
57601:
5761Lkcopy_doubles:
5762	ldx	[%o0], %g5	! do {
5763	inc	8, %o0		!	dst += 8, src += 8;
5764	stx	%g5, [%o1]	!	*(double *)dst = *(double *)src;
5765	deccc	8, %o2		! } while ((len -= 8) >= 0);
5766	bge	Lkcopy_doubles
5767	 inc	8, %o1
5768
5769	! check for a usual case again (save work)
5770	btst	7, %o2		! if ((len & 7) == 0)
5771	be	Lkcopy_done	!	goto kcopy_done;
5772
5773	 btst	4, %o2		! if ((len & 4) == 0)
5774	be,a	Lkcopy_mopw	!	goto mop_up_word_and_byte;
5775	 btst	2, %o2		! [delay slot: if (len & 2)]
5776	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
5777	inc	4, %o0		!	dst += 4;
5778	st	%o4, [%o1]
5779	inc	4, %o1		!	src += 4;
5780	btst	2, %o2		! } [if (len & 2)]
5781
57821:
5783	! mop up trailing word (if present) and byte (if present).
5784Lkcopy_mopw:
5785	be	Lkcopy_mopb	! no word, go mop up byte
5786	 btst	1, %o2		! [delay slot: if (len & 1)]
5787	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
5788	be	Lkcopy_done	! if ((len & 1) == 0) goto done;
5789	 sth	%o4, [%o1]
5790	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
5791	stb	%o4, [%o1 + 2]
5792	membar	#Sync		! Make sure all traps are taken
5793	stx	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
5794	membar	#StoreStore|#StoreLoad
5795	retl
5796	 clr	%o0
5797	NOTREACHED
5798
5799	! mop up trailing byte (if present).
5800Lkcopy_mopb:
5801	bne,a	1f
5802	 ldsb	[%o0], %o4
5803
5804Lkcopy_done:
5805	membar	#Sync		! Make sure all traps are taken
5806	stx	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
5807	membar	#StoreStore|#StoreLoad
5808	retl
5809	 clr	%o0
5810	NOTREACHED
5811
58121:
5813	stb	%o4, [%o1]
5814	membar	#Sync		! Make sure all traps are taken
5815	stx	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
5816	membar	#StoreStore|#StoreLoad
5817	retl
5818	 clr	%o0
5819	NOTREACHED
5820
5821Lkcerr:
5822	stx	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
5823	membar	#StoreStore|#StoreLoad
5824	retl				! and return error indicator
5825	 mov	EFAULT, %o0
5826	NOTREACHED
5827END(kcopy)
5828
5829/*
5830 * bcopy(src, dest, size) - overlaps detected and copied in reverse
5831 */
5832ENTRY(bcopy)
5833	/*
5834	 * Swap args and continue to memmove.
5835	 */
5836	mov	%o0, %o3
5837	mov	%o1, %o0
5838	mov	%o3, %o1
5839/*
5840 * memmove(dst, src, len) - overlaps detected and copied in reverse
5841 */
5842ENTRY(memmove)
5843	mov	%o0, %o5	! Save memcpy return value
5844
5845	cmp	%o1, %o0	! src < dst?
5846	bgeu	Lmemcpy_start	! no, go copy forwards as via memcpy
5847	 cmp	%o2, BCOPY_SMALL! (check length for doublecopy first)
5848
5849	/*
5850	 * Since src comes before dst, and the regions might overlap,
5851	 * we have to do the copy starting at the end and working backwards.
5852	 */
5853	add	%o2, %o1, %o1	! src += len
5854	add	%o2, %o0, %o0	! dst += len
5855	bge,a	Lback_fancy	! if len >= BCOPY_SMALL, go be fancy
5856	 btst	3, %o1
5857
5858	/*
5859	 * Not much to copy, just do it a byte at a time.
5860	 */
5861	deccc	%o2		! while (--len >= 0)
5862	bl	1f
5863!	 XXX check no delay slot
58640:
5865	dec	%o1		!	*--dst = *--src;
5866	ldsb	[%o1], %o4
5867	dec	%o0
5868	deccc	%o2
5869	bge	0b
5870	 stb	%o4, [%o0]
58711:
5872	retl
5873	 mov	%o5, %o0
5874	NOTREACHED
5875
5876	/*
5877	 * Plenty to copy, try to be optimal.
5878	 * We only bother with word/halfword/byte copies here.
5879	 */
5880Lback_fancy:
5881!	btst	3, %o1		! done already
5882	bnz	1f		! if ((src & 3) == 0 &&
5883	 btst	3, %o0		!     (dst & 3) == 0)
5884	bz,a	Lback_words	!	goto words;
5885	 dec	4, %o2		! (done early for word copy)
5886
58871:
5888	/*
5889	 * See if the low bits match.
5890	 */
5891	xor	%o1, %o0, %o3	! t = src ^ dst;
5892	btst	1, %o3
5893	bz,a	3f		! if (t & 1) == 0, can do better
5894	 btst	1, %o1
5895
5896	/*
5897	 * Nope; gotta do byte copy.
5898	 */
58992:
5900	dec	%o1		! do {
5901	ldsb	[%o1], %o4	!	*--dst = *--src;
5902	dec	%o0
5903	deccc	%o2		! } while (--len != 0);
5904	bnz	2b
5905	 stb	%o4, [%o0]
5906	retl
5907	 nop
5908
59093:
5910	/*
5911	 * Can do halfword or word copy, but might have to copy 1 byte first.
5912	 */
5913!	btst	1, %o1		! done earlier
5914	bz,a	4f		! if (src & 1) {	/* copy 1 byte */
5915	 btst	2, %o3		! (done early)
5916	dec	%o1		!	*--dst = *--src;
5917	ldsb	[%o1], %o4
5918	dec	%o0
5919	stb	%o4, [%o0]
5920	dec	%o2		!	len--;
5921	btst	2, %o3		! }
5922
59234:
5924	/*
5925	 * See if we can do a word copy ((t&2) == 0).
5926	 */
5927!	btst	2, %o3		! done earlier
5928	bz,a	6f		! if (t & 2) == 0, can do word copy
5929	 btst	2, %o1		! (src&2, done early)
5930
5931	/*
5932	 * Gotta do halfword copy.
5933	 */
5934	dec	2, %o2		! len -= 2;
59355:
5936	dec	2, %o1		! do {
5937	ldsh	[%o1], %o4	!	src -= 2;
5938	dec	2, %o0		!	dst -= 2;
5939	deccc	2, %o2		!	*(short *)dst = *(short *)src;
5940	bge	5b		! } while ((len -= 2) >= 0);
5941	 sth	%o4, [%o0]
5942	b	Lback_mopb	! goto mop_up_byte;
5943	 btst	1, %o2		! (len&1, done early)
5944
59456:
5946	/*
5947	 * We can do word copies, but we might have to copy
5948	 * one halfword first.
5949	 */
5950!	btst	2, %o1		! done already
5951	bz	7f		! if (src & 2) {
5952	 dec	4, %o2		! (len -= 4, done early)
5953	dec	2, %o1		!	src -= 2, dst -= 2;
5954	ldsh	[%o1], %o4	!	*(short *)dst = *(short *)src;
5955	dec	2, %o0
5956	sth	%o4, [%o0]
5957	dec	2, %o2		!	len -= 2;
5958				! }
5959
59607:
5961Lback_words:
5962	/*
5963	 * Do word copies (backwards), then mop up trailing halfword
5964	 * and byte if any.
5965	 */
5966!	dec	4, %o2		! len -= 4, done already
59670:				! do {
5968	dec	4, %o1		!	src -= 4;
5969	dec	4, %o0		!	src -= 4;
5970	ld	[%o1], %o4	!	*(int *)dst = *(int *)src;
5971	deccc	4, %o2		! } while ((len -= 4) >= 0);
5972	bge	0b
5973	 st	%o4, [%o0]
5974
5975	/*
5976	 * Check for trailing shortword.
5977	 */
5978	btst	2, %o2		! if (len & 2) {
5979	bz,a	1f
5980	 btst	1, %o2		! (len&1, done early)
5981	dec	2, %o1		!	src -= 2, dst -= 2;
5982	ldsh	[%o1], %o4	!	*(short *)dst = *(short *)src;
5983	dec	2, %o0
5984	sth	%o4, [%o0]	! }
5985	btst	1, %o2
5986
5987	/*
5988	 * Check for trailing byte.
5989	 */
59901:
5991Lback_mopb:
5992!	btst	1, %o2		! (done already)
5993	bnz,a	1f		! if (len & 1) {
5994	 ldsb	[%o1 - 1], %o4	!	b = src[-1];
5995	retl
5996	 mov	%o5, %o0
5997	NOTREACHED
5998
59991:
6000	stb	%o4, [%o0 - 1]	! }
6001	retl			!	dst[-1] = b;
6002	 mov	%o5, %o0
6003	NOTREACHED
6004END(memmove)
6005
6006/*
6007 * clearfpstate()
6008 *
6009 * Drops the current fpu state, without saving it.
6010 */
6011ENTRY(clearfpstate)
6012	rdpr	%pstate, %o1		! enable FPU
6013	wr	%g0, FPRS_FEF, %fprs
6014	or	%o1, PSTATE_PEF, %o1
6015	retl
6016	 wrpr	%o1, 0, %pstate
6017END(clearfpstate)
6018
6019/*
6020 * savefpstate(struct fpstate *f)
6021 *
6022 * Store the current FPU state.
6023 *
6024 * Since the kernel may need to use the FPU and we have problems atomically
6025 * testing and enabling the FPU, we leave here with the FPRS_FEF bit set.
6026 * Normally this should be turned on in loadfpstate().
6027 */
6028 /* XXXXXXXXXX  Assume caller created a proper stack frame */
6029ENTRY(savefpstate)
6030	rdpr	%pstate, %o1		! enable FP before we begin
6031	rd	%fprs, %o5
6032	wr	%g0, FPRS_FEF, %fprs
6033	or	%o1, PSTATE_PEF, %o1
6034	wrpr	%o1, 0, %pstate
6035
6036	stx	%fsr, [%o0 + FS_FSR]	! f->fs_fsr = getfsr();
6037
6038	rd	%gsr, %o4		! Save %gsr
6039	st	%o4, [%o0 + FS_GSR]
6040
6041	add	%o0, FS_REGS, %o2	! This is zero...
6042	btst	FPRS_DL, %o5		! Lower FPU clean?
6043	bz,a,pt	%icc, 1f		! Then skip it
6044	 add	%o2, 128, %o2		! Skip a block
6045
6046	membar	#Sync
6047	stda	%f0, [%o2] ASI_BLK_P	! f->fs_f0 = etc;
6048	inc	BLOCK_SIZE, %o2
6049	stda	%f16, [%o2] ASI_BLK_P
6050	inc	BLOCK_SIZE, %o2
60511:
6052	btst	FPRS_DU, %o5		! Upper FPU clean?
6053	bz,pt	%icc, 2f		! Then skip it
6054	 nop
6055
6056	membar	#Sync
6057	stda	%f32, [%o2] ASI_BLK_P
6058	inc	BLOCK_SIZE, %o2
6059	stda	%f48, [%o2] ASI_BLK_P
60602:
6061	membar	#Sync			! Finish operation so we can
6062	retl
6063	 wr	%g0, FPRS_FEF, %fprs	! Mark FPU clean
6064END(savefpstate)
6065
6066/*
6067 * Load FPU state.
6068 */
6069 /* XXXXXXXXXX  Should test to see if we only need to do a partial restore */
6070ENTRY(loadfpstate)
6071	rdpr	%pstate, %o1		! enable FP before we begin
6072	ld	[%o0 + FS_GSR], %o4	! Restore %gsr
6073	set	PSTATE_PEF, %o2
6074	wr	%g0, FPRS_FEF, %fprs
6075	or	%o1, %o2, %o1
6076	wrpr	%o1, 0, %pstate
6077	ldx	[%o0 + FS_FSR], %fsr	! setfsr(f->fs_fsr);
6078	add	%o0, FS_REGS, %o3	! This is zero...
6079	wr	%o4, %g0, %gsr
6080	membar	#Sync
6081	ldda	[%o3] ASI_BLK_P, %f0
6082	inc	BLOCK_SIZE, %o3
6083	ldda	[%o3] ASI_BLK_P, %f16
6084	inc	BLOCK_SIZE, %o3
6085	ldda	[%o3] ASI_BLK_P, %f32
6086	inc	BLOCK_SIZE, %o3
6087	ldda	[%o3] ASI_BLK_P, %f48
6088	membar	#Sync			! Make sure loads are complete
6089	retl
6090	 wr	%g0, FPRS_FEF, %fprs	! Clear dirty bits
6091END(loadfpstate)
6092
6093/* XXX belongs elsewhere (ctlreg.h?) */
6094#define	AFSR_CECC_ERROR		0x100000	/* AFSR Correctable ECC err */
6095#define	DATAPATH_CE		0x100		/* Datapath Correctable Err */
6096
6097	.data
6098	_ALIGN
6099	.globl	cecclast, ceccerrs
6100cecclast:
6101	.xword 0
6102ceccerrs:
6103	.word 0
6104	_ALIGN
6105	.text
6106
6107/*
6108 * ECC Correctable Error handler - this doesn't do much except intercept
6109 * the error and reset the status bits.
6110 */
6111ENTRY(cecc_catch)
6112	ldxa	[%g0] ASI_AFSR, %g1			! g1 = AFSR
6113	ldxa	[%g0] ASI_AFAR, %g2			! g2 = AFAR
6114
6115	sethi	%hi(cecclast), %g1			! cecclast = AFAR
6116	or	%g1, %lo(cecclast), %g1
6117	stx	%g2, [%g1]
6118
6119	sethi	%hi(ceccerrs), %g1			! get current count
6120	or	%g1, %lo(ceccerrs), %g1
6121	lduw	[%g1], %g2				! g2 = ceccerrs
6122
6123	ldxa	[%g0] ASI_DATAPATH_ERR_REG_READ, %g3	! Read UDB-Low status
6124	andcc	%g3, DATAPATH_CE, %g4			! Check CE bit
6125	be,pn	%xcc, 1f				! Don't clear unless
6126	 nop						!  necessary
6127	stxa	%g4, [%g0] ASI_DATAPATH_ERR_REG_WRITE	! Clear CE bit in UDBL
6128	membar	#Sync					! sync store
6129	inc	%g2					! ceccerrs++
61301:	mov	0x18, %g5
6131	ldxa	[%g5] ASI_DATAPATH_ERR_REG_READ, %g3	! Read UDB-High status
6132	andcc	%g3, DATAPATH_CE, %g4			! Check CE bit
6133	be,pn	%xcc, 1f				! Don't clear unless
6134	 nop						!  necessary
6135	stxa	%g4, [%g5] ASI_DATAPATH_ERR_REG_WRITE	! Clear CE bit in UDBH
6136	membar	#Sync					! sync store
6137	inc	%g2					! ceccerrs++
61381:	set	AFSR_CECC_ERROR, %g3
6139	stxa	%g3, [%g0] ASI_AFSR			! Clear CE in AFSR
6140	stw	%g2, [%g1]				! set ceccerrs
6141	membar	#Sync					! sync store
6142        CLRTT
6143        retry
6144        NOTREACHED
6145END(cecc_catch)
6146
6147/*
6148 * send_softint(level, intrhand)
6149 *
6150 * Send a softint with an intrhand pointer so we can cause a vectored
6151 * interrupt instead of a polled interrupt.  This does pretty much the
6152 * same as interrupt_vector.  If intrhand is NULL then it just sends
6153 * a polled interrupt.
6154 */
6155ENTRY(send_softint)
6156	rdpr	%pstate, %g1
6157	andn	%g1, PSTATE_IE, %o3
6158	wrpr	%o3, 0, %pstate
6159
6160	brz,pn	%o1, 1f
6161	 add	%g7, CI_INTRPENDING, %o3
6162
6163	ldx	[%o1 + IH_PEND], %o5
6164	brnz,pn	%o5, 1f
6165	 sll	%o0, 3, %o5		! Find start of list for this IPL
6166	add	%o3, %o5, %o3
6167
6168	ldx	[%o3], %o5		! Load list head
6169	add	%o1, IH_PEND, %o4
6170	casxa	[%o4] ASI_N, %g0, %o5
6171	brnz,pn	%o5, 1f
6172	 nop
6173	stx	%o1, [%o3]
6174
6175	mov	1, %o3			! Change from level to bitmask
6176	sllx	%o3, %o0, %o3
6177	wr	%o3, 0, SET_SOFTINT	! SET_SOFTINT
61781:
6179	retl
6180	 wrpr	%g1, 0, %pstate		! restore interrupts
6181END(send_softint)
6182
6183/*
6184 * Flush user windows to memory.
6185 */
6186ENTRY(write_user_windows)
6187	rdpr	%otherwin, %g1
6188	brz	%g1, 3f
6189	clr	%g2
61901:
6191	save	%sp, -CC64FSZ, %sp
6192	rdpr	%otherwin, %g1
6193	brnz	%g1, 1b
6194	 inc	%g2
61952:
6196	dec	%g2
6197	brnz	%g2, 2b
6198	 restore
61993:
6200	retl
6201	 nop
6202END(write_user_windows)
6203
6204/*
6205 * Clear the Nonprivileged Trap (NPT) bit of %tick such that it can be
6206 * read from userland.  This requires us to read the current value and
6207 * write it back with the bit cleared.  As a result we will lose a
6208 * couple of ticks.  In order to limit the number of lost ticks, we
6209 * block interrupts and make sure the instructions to read and write
6210 * %tick live in the same cache line.  We tag on an extra read to work
6211 * around a Blackbird (UltraSPARC-II) errata (see below).
6212 */
6213ENTRY(tick_enable)
6214	rdpr	%pstate, %o0
6215	andn	%o0, PSTATE_IE, %o1
6216	wrpr	%o1, 0, %pstate		! disable interrupts
6217	rdpr	%tick, %o2
6218	brgez,pn %o2, 1f
6219	 clr	%o1
6220	mov	1, %o1
6221	sllx	%o1, 63, %o1
6222	ba,pt	%xcc, 1f
6223	 nop
6224	.align	64
62251:	rdpr	%tick, %o2
6226	wrpr	%o2, %o1, %tick
6227	rdpr	%tick, %g0
6228
6229	retl
6230	 wrpr	%o0, 0, %pstate		! restore interrupts
6231END(tick_enable)
6232
6233/*
6234 * On Blackbird (UltraSPARC-II) CPUs, writes to %tick_cmpr may fail.
6235 * The workaround is to do a read immediately after the write and make
6236 * sure those two instructions are in the same cache line.
6237 */
6238ENTRY(tickcmpr_set)
6239	.align	64
6240	wr	%o0, 0, %tick_cmpr
6241	rd	%tick_cmpr, %g0
6242	retl
6243	 nop
6244END(tickcmpr_set)
6245
6246ENTRY(sys_tick_enable)
6247	rdpr	%pstate, %o0
6248	andn	%o0, PSTATE_IE, %o1
6249	wrpr	%o1, 0, %pstate		! disable interrupts
6250	rd	%sys_tick, %o2
6251	brgez,pn %o2, 1f
6252	 clr	%o1
6253	mov	1, %o1
6254	sllx	%o1, 63, %o1
6255	ba,pt	%xcc, 1f
6256	 nop
6257	.align	64
62581:	rd	%sys_tick, %o2
6259	wr	%o2, %o1, %sys_tick
6260	rd	%sys_tick, %g0
6261
6262	retl
6263	 wrpr	%o0, 0, %pstate		! restore interrupts
6264END(sys_tick_enable)
6265
6266ENTRY(sys_tickcmpr_set)
6267	.align	64
6268	wr	%o0, 0, %sys_tick_cmpr
6269	rd	%sys_tick_cmpr, %g0
6270	retl
6271	 nop
6272END(sys_tickcmpr_set)
6273
6274/*
6275 * Support for the STICK logic found on the integrated PCI host bridge
6276 * of Hummingbird (UltraSPARC-IIe).  The chip designers made the
6277 * brilliant decision to split the 64-bit counters into two 64-bit
6278 * aligned 32-bit registers, making atomic access impossible.  This
6279 * means we have to check for wraparound in various places.  Sigh.
6280 */
6281
6282#define STICK_CMP_LOW	0x1fe0000f060
6283#define STICK_CMP_HIGH	0x1fe0000f068
6284#define STICK_REG_LOW	0x1fe0000f070
6285#define STICK_REG_HIGH	0x1fe0000f078
6286
6287ENTRY(stick)
6288	setx	STICK_REG_LOW, %o1, %o3
62890:
6290	ldxa	[%o3] ASI_PHYS_NON_CACHED, %o0
6291	add	%o3, (STICK_REG_HIGH - STICK_REG_LOW), %o4
6292	ldxa	[%o4] ASI_PHYS_NON_CACHED, %o1
6293	ldxa	[%o3] ASI_PHYS_NON_CACHED, %o2
6294	cmp	%o2, %o0		! Check for wraparound
6295	blu,pn	%icc, 0b
6296	 sllx	%o1, 33, %o1		! Clear the MSB
6297	srlx	%o1, 1, %o1
6298	retl
6299	 or	%o2, %o1, %o0
6300END(stick)
6301
6302ENTRY(stickcmpr_set)
6303	setx	STICK_CMP_HIGH, %o1, %o3
6304	srlx	%o0, 32, %o1
6305	stxa	%o1, [%o3] ASI_PHYS_NON_CACHED
6306	add	%o3, (STICK_CMP_LOW - STICK_CMP_HIGH), %o4
6307	stxa	%o0, [%o4] ASI_PHYS_NON_CACHED
6308	retl
6309	 nop
6310END(stickcmpr_set)
6311
6312#define MICROPERSEC	(1000000)
6313	.data
6314	.align	16
6315	.globl	cpu_clockrate
6316cpu_clockrate:
6317	!! Pretend we have a 200MHz clock -- cpu_attach will fix this
6318	.xword	200000000
6319	!! Here we'll store cpu_clockrate/1000000 so we can calculate usecs
6320	.xword	0
6321	.text
6322
6323/*
6324 * delay function
6325 *
6326 * void delay(N)  -- delay N microseconds
6327 *
6328 * Register usage: %o0 = "N" number of usecs to go (counts down to zero)
6329 *		   %o2 = counter for 1 usec (counts down from %o1 to zero)
6330 *
6331 *
6332 *	cpu_clockrate should be tuned during CPU probe to the CPU clockrate in Hz
6333 *
6334 */
6335ENTRY(delay)			! %o0 = n
6336	rdpr	%tick, %o1					! Take timer snapshot
6337	sethi	%hi(cpu_clockrate), %o2
6338	sethi	%hi(MICROPERSEC), %o3
6339	ldx	[%o2 + %lo(cpu_clockrate + 8)], %o4		! Get scale factor
6340	brnz,pt	%o4, 0f
6341	 or	%o3, %lo(MICROPERSEC), %o3
6342
6343	!! Calculate ticks/usec
6344	ldx	[%o2 + %lo(cpu_clockrate)], %o4			! No, we need to calculate it
6345	udivx	%o4, %o3, %o4
6346	stx	%o4, [%o2 + %lo(cpu_clockrate + 8)]		! Save it so we don't need to divide again
63470:
6348
6349	mulx	%o0, %o4, %o0					! Convert usec -> ticks
6350	rdpr	%tick, %o2					! Top of next itr
63511:
6352	sub	%o2, %o1, %o3					! How many ticks have gone by?
6353	sub	%o0, %o3, %o4					! Decrement count by that much
6354	movrgz	%o3, %o4, %o0					! But only if we're decrementing
6355	mov	%o2, %o1					! Remember last tick
6356	brgz,pt	%o0, 1b						! Done?
6357	 rdpr	%tick, %o2					! Get new tick
6358
6359	retl
6360	 nop
6361END(delay)
6362
6363#ifdef DDB
6364ENTRY(setjmp)
6365	save	%sp, -CC64FSZ, %sp	! Need a frame to return to.
6366	flushw
6367	stx	%fp, [%i0+0]	! 64-bit stack pointer
6368	stx	%i7, [%i0+8]	! 64-bit return pc
6369	ret
6370	 restore	%g0, 0, %o0
6371END(setjmp)
6372
6373ENTRY(longjmp)
6374	save	%sp, -CC64FSZ, %sp	! prepare to restore to (old) frame
6375	flushw
6376	mov	1, %i2
6377	ldx	[%i0+0], %fp	! get return stack
6378	ldx	[%i0+8], %i7	! get rpc
6379	ret
6380	 restore	%i2, 0, %o0
6381END(longjmp)
6382
6383	/*
6384	 * Debug stuff.  Dump the trap registers into buffer & set tl=0.
6385	 *
6386	 *  %o0 = *ts
6387	 */
6388ENTRY(savetstate)
6389	mov	%o0, %o1
6390	rdpr	%tl, %o0
6391	brz	%o0, 2f
6392	 mov	%o0, %o2
63931:
6394	rdpr	%tstate, %o3
6395	stx	%o3, [%o1]
6396	deccc	%o2
6397	inc	8, %o1
6398	rdpr	%tpc, %o4
6399	stx	%o4, [%o1]
6400	inc	8, %o1
6401	rdpr	%tnpc, %o5
6402	stx	%o5, [%o1]
6403	inc	8, %o1
6404	rdpr	%tt, %o4
6405	stx	%o4, [%o1]
6406	inc	8, %o1
6407	bnz	1b
6408	 wrpr	%o2, 0, %tl
64092:
6410	retl
6411	 nop
6412END(savetstate)
6413
6414	/*
6415	 * Debug stuff.  Restore trap registers from buffer.
6416	 *
6417	 *  %o0 = %tl
6418	 *  %o1 = *ts
6419	 *
6420	 * Maybe this should be re-written to increment tl instead of decrementing.
6421	 */
6422ENTRY(restoretstate)
6423	flushw			! Make sure we don't have stack probs & lose hibits of %o
6424	brz,pn	%o0, 2f
6425	 mov	%o0, %o2
6426	wrpr	%o0, 0, %tl
64271:
6428	ldx	[%o1], %o3
6429	deccc	%o2
6430	inc	8, %o1
6431	wrpr	%o3, 0, %tstate
6432	ldx	[%o1], %o4
6433	inc	8, %o1
6434	wrpr	%o4, 0, %tpc
6435	ldx	[%o1], %o5
6436	inc	8, %o1
6437	wrpr	%o5, 0, %tnpc
6438	ldx	[%o1], %o4
6439	inc	8, %o1
6440	wrpr	%o4, 0, %tt
6441	bnz	1b
6442	 wrpr	%o2, 0, %tl
64432:
6444	retl
6445	 wrpr	%o0, 0, %tl
6446END(restoretstate)
6447
6448	/*
6449	 * Switch to context in %o0
6450	 */
6451ENTRY(switchtoctx)
6452	set	DEMAP_CTX_SECONDARY, %o3
6453	stxa	%o3, [%o3] ASI_DMMU_DEMAP
6454	membar	#Sync
6455	mov	CTX_SECONDARY, %o4
6456	stxa	%o3, [%o3] ASI_IMMU_DEMAP
6457	membar	#Sync
6458	stxa	%o0, [%o4] ASI_DMMU		! Maybe we should invalidate the old context?
6459	membar	#Sync				! No real reason for this XXXX
6460	sethi	%hi(KERNBASE), %o2
6461	flush	%o2
6462	retl
6463	 nop
6464END(switchtoctx)
6465
6466#endif /* DDB */	/* DDB */
6467
6468	.data
6469	_ALIGN
6470#if defined(DDB) || NKSYMS > 0
6471	.globl	esym
6472esym:
6473	.xword	0
6474	.globl	ssym
6475ssym:
6476	.xword	0
6477#endif	/* defined(DDB) || NKSYMS > 0 */
6478	.globl	proc0paddr
6479proc0paddr:
6480	.xword	u0			! KVA of proc0 uarea
6481
6482#ifdef DEBUG
6483	.comm	pmapdebug, 4
6484#endif	/* DEBUG */
6485
6486	.globl	dlflush_start
6487dlflush_start:
6488	.xword	dlflush1
6489	.xword	dlflush2
6490	.xword	dlflush3
6491	.xword	dlflush4
6492	.xword	0
6493