xref: /netbsd/sys/arch/sparc64/sparc64/locore.s (revision bf9ec67e)
1/*	$NetBSD: locore.s,v 1.153 2002/05/14 02:23:07 eeh Exp $	*/
2
3/*
4 * Copyright (c) 1996-2002 Eduardo Horvath
5 * Copyright (c) 1996 Paul Kranenburg
6 * Copyright (c) 1996
7 * 	The President and Fellows of Harvard College.
8 *	All rights reserved.
9 * Copyright (c) 1992, 1993
10 *	The Regents of the University of California.
11 *	All rights reserved.
12 *
13 * This software was developed by the Computer Systems Engineering group
14 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
15 * contributed to Berkeley.
16 *
17 * All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Lawrence Berkeley Laboratory.
21 *	This product includes software developed by Harvard University.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the above copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the
31 *    distribution.
32 * 3. All advertising materials mentioning features or use of this
33 *    software must display the following acknowledgement:
34 *	This product includes software developed by the University of
35 *	California, Berkeley and its contributors.
36 *	This product includes software developed by Harvard University.
37 *	This product includes software developed by Paul Kranenburg.
38 * 4. Neither the name of the University nor the names of its
39 *    contributors may be used to endorse or promote products derived
40 *    from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
43 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
44 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
45 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
46 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
50 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
51 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
52 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
53 * DAMAGE.
54 *
55 *	@(#)locore.s	8.4 (Berkeley) 12/10/93
56 */
57#define INTRLIST
58
59#define	SPITFIRE		/* We don't support Cheetah (USIII) yet */
60#define	INTR_INTERLOCK		/* Use IH_PEND field to interlock interrupts */
61#undef	PARANOID		/* Extremely expensive consistency checks */
62#undef	NO_VCACHE		/* Map w/D$ disabled */
63#define	TRAPTRACE		/* Keep history of all traps (unsafe) */
64#undef	FLTRACE			/* Keep history of all page faults */
65#undef	TRAPSTATS		/* Count traps */
66#undef	TRAPS_USE_IG		/* Use Interrupt Globals for all traps */
67#define	HWREF			/* Track ref/mod bits in trap handlers */
68#undef	PMAP_FPSTATE		/* Allow nesting of VIS pmap copy/zero */
69#define	NEW_FPSTATE
70#define	PMAP_PHYS_PAGE		/* Use phys ASIs for pmap copy/zero */
71#undef	DCACHE_BUG		/* Flush D$ around ASI_PHYS accesses */
72#undef	NO_TSB			/* Don't use TSB */
73#define	TICK_IS_TIME		/* Keep %tick synchronized with time */
74#undef	SCHED_DEBUG
75
76#include "opt_ddb.h"
77#include "opt_kgdb.h"
78#include "opt_multiprocessor.h"
79#include "opt_lockdebug.h"
80
81#include "assym.h"
82#include <machine/param.h>
83#include <sparc64/sparc64/intreg.h>
84#include <sparc64/sparc64/timerreg.h>
85#include <machine/ctlreg.h>
86#include <machine/psl.h>
87#include <machine/signal.h>
88#include <machine/trap.h>
89#include <machine/frame.h>
90#include <machine/pte.h>
91#include <machine/pmap.h>
92#include <machine/asm.h>
93
94/* A few convenient abbreviations for trapframe fields. */
95#define	TF_G	TF_GLOBAL
96#define	TF_O	TF_OUT
97#define	TF_L	TF_LOCAL
98#define	TF_I	TF_IN
99
100
101#undef	CURPROC
102#undef	CPCB
103#undef	FPPROC
104#ifndef MULTIPROCESSOR
105#define	CURPROC	_C_LABEL(curproc)
106#define CPCB	_C_LABEL(cpcb)
107#define	FPPROC	_C_LABEL(fpproc)
108#else
109#define	CURPROC	(CPUINFO_VA+CI_CURPROC)
110#define CPCB	(CPUINFO_VA+CI_CPCB)
111#define	FPPROC	(CPUINFO_VA+CI_FPPROC)
112#endif
113
114/* Let us use same syntax as C code */
115#define Debugger()	ta	1; nop
116
117#if 1
118/*
119 * Try to issue an elf note to ask the Solaris
120 * bootloader to align the kernel properly.
121 */
122	.section	.note
123	.word	0x0d
124	.word	4		! Dunno why
125	.word	1
1260:	.asciz	"SUNW Solaris"
1271:
128	.align	4
129	.word	0x0400000
130#endif
131
132	.register	%g2,#scratch
133	.register	%g3,#scratch
134
135/*
136 * Here are some defines to try to maintain consistency but still
137 * support 32-and 64-bit compilers.
138 */
139#ifdef _LP64
140/* reg that points to base of data/text segment */
141#define	BASEREG	%g4
142/* first constants for storage allocation */
143#define LNGSZ		8
144#define LNGSHFT		3
145#define PTRSZ		8
146#define PTRSHFT		3
147#define	POINTER		.xword
148/* Now instructions to load/store pointers & long ints */
149#define LDLNG		ldx
150#define LDULNG		ldx
151#define STLNG		stx
152#define STULNG		stx
153#define LDPTR		ldx
154#define LDPTRA		ldxa
155#define STPTR		stx
156#define STPTRA		stxa
157#define	CASPTR		casxa
158/* Now something to calculate the stack bias */
159#define STKB		BIAS
160#define	CCCR		%xcc
161#else
162#define	BASEREG		%g0
163#define LNGSZ		4
164#define LNGSHFT		2
165#define PTRSZ		4
166#define PTRSHFT		2
167#define POINTER		.word
168/* Instructions to load/store pointers & long ints */
169#define LDLNG		ldsw
170#define LDULNG		lduw
171#define STLNG		stw
172#define STULNG		stw
173#define LDPTR		lduw
174#define LDPTRA		lduwa
175#define STPTR		stw
176#define STPTRA		stwa
177#define	CASPTR		casa
178#define STKB		0
179#define	CCCR		%icc
180#endif
181
182/*
183 * GNU assembler does not understand `.empty' directive; Sun assembler
184 * gripes about labels without it.  To allow cross-compilation using
185 * the Sun assembler, and because .empty directives are useful
186 * documentation, we use this trick.
187 */
188#ifdef SUN_AS
189#define	EMPTY	.empty
190#else
191#define	EMPTY	/* .empty */
192#endif
193
194/* use as needed to align things on longword boundaries */
195#define	_ALIGN	.align 8
196#define ICACHE_ALIGN	.align	32
197
198/* Give this real authority: reset the machine */
199#if 1
200#define NOTREACHED	sir
201#else
202#define NOTREACHED
203#endif
204
205/*
206 * This macro will clear out a cache line before an explicit
207 * access to that location.  It's mostly used to make certain
208 * loads bypassing the D$ do not get stale D$ data.
209 *
210 * It uses a register with the address to clear and a temporary
211 * which is destroyed.
212 */
213#ifdef DCACHE_BUG
214#define DLFLUSH(a,t) \
215	andn	a, 0x1f, t; \
216	stxa	%g0, [ t ] ASI_DCACHE_TAG; \
217	membar	#Sync
218/* The following can be used if the pointer is 16-byte aligned */
219#define DLFLUSH2(t) \
220	stxa	%g0, [ t ] ASI_DCACHE_TAG; \
221	membar	#Sync
222#else
223#define DLFLUSH(a,t)
224#define DLFLUSH2(t)
225#endif
226
227
228/*
229 * Combine 2 regs -- used to convert 64-bit ILP32
230 * values to LP64.
231 */
232#define	COMBINE(r1, r2, d)	\
233	sllx	r1, 32, d;	\
234	or	d, r2, d
235
236/*
237 * Split 64-bit value in 1 reg into high and low halves.
238 * Used for ILP32 return values.
239 */
240#define	SPLIT(r0, r1)		\
241	srl	r0, 0, r1;	\
242	srlx	r0, 32, r0
243
244
245/*
246 * A handy macro for maintaining instrumentation counters.
247 * Note that this clobbers %o0, %o1 and %o2.  Normal usage is
248 * something like:
249 *	foointr:
250 *		TRAP_SETUP(...)		! makes %o registers safe
251 *		INCR(_C_LABEL(cnt)+V_FOO)	! count a foo
252 */
253#define INCR(what) \
254	sethi	%hi(what), %o0; \
255	or	%o0, %lo(what), %o0; \
25699:	\
257	lduw	[%o0], %o1; \
258	add	%o1, 1, %o2; \
259	casa	[%o0] ASI_P, %o1, %o2; \
260	cmp	%o1, %o2; \
261	bne,pn	%icc, 99b; \
262	 nop
263
264/*
265 * A couple of handy macros to save and restore globals to/from
266 * locals.  Since udivrem uses several globals, and it's called
267 * from vsprintf, we need to do this before and after doing a printf.
268 */
269#define GLOBTOLOC \
270	mov	%g1, %l1; \
271	mov	%g2, %l2; \
272	mov	%g3, %l3; \
273	mov	%g4, %l4; \
274	mov	%g5, %l5; \
275	mov	%g6, %l6; \
276	mov	%g7, %l7
277
278#define LOCTOGLOB \
279	mov	%l1, %g1; \
280	mov	%l2, %g2; \
281	mov	%l3, %g3; \
282	mov	%l4, %g4; \
283	mov	%l5, %g5; \
284	mov	%l6, %g6; \
285	mov	%l7, %g7
286
287/* Load strings address into register; NOTE: hidden local label 99 */
288#define LOAD_ASCIZ(reg, s)	\
289	set	99f, reg ;	\
290	.data ;			\
29199:	.asciz	s ;		\
292	_ALIGN ;		\
293	.text
294
295/*
296 * Handy stack conversion macros.
297 * They correctly switch to requested stack type
298 * regardless of the current stack.
299 */
300
301#define TO_STACK64(size)					\
302	save	%sp, size, %sp;					\
303	add	%sp, -BIAS, %o0; /* Convert to 64-bits */	\
304	andcc	%sp, 1, %g0; /* 64-bit stack? */		\
305	movz	%icc, %o0, %sp
306
307#define TO_STACK32(size)					\
308	save	%sp, size, %sp;					\
309	add	%sp, +BIAS, %o0; /* Convert to 32-bits */	\
310	andcc	%sp, 1, %g0; /* 64-bit stack? */		\
311	movnz	%icc, %o0, %sp
312
313#ifdef _LP64
314#define	STACKFRAME(size)	TO_STACK64(size)
315#else
316#define	STACKFRAME(size)	TO_STACK32(size)
317#endif
318
319/*
320 * The following routines allow fpu use in the kernel.
321 *
322 * They allocate a stack frame and use all local regs.  Extra
323 * local storage can be requested by setting the siz parameter,
324 * and can be accessed at %sp+CC64FSZ.
325 */
326
327#define ENABLE_FPU(siz)									     \
328	save	%sp, -(CC64FSZ), %sp;		/* Allocate a stack frame */		     \
329	sethi	%hi(FPPROC), %l1;							     \
330	add	%fp, STKB-FS_SIZE, %l0;		/* Allocate a fpstate */		     \
331	LDPTR	[%l1 + %lo(FPPROC)], %l2;	/* Load fpproc */			     \
332	andn	%l0, BLOCK_SIZE, %l0;		/* Align it */				     \
333	clr	%l3;				/* NULL fpstate */			     \
334	brz,pt	%l2, 1f;			/* fpproc == NULL? */			     \
335	 add	%l0, -STKB-CC64FSZ-(siz), %sp;	/* Set proper %sp */			     \
336	LDPTR	[%l2 + P_FPSTATE], %l3;							     \
337	brz,pn	%l3, 1f;			/* Make sure we have an fpstate */	     \
338	 mov	%l3, %o0;								     \
339	call	_C_LABEL(savefpstate);		/* Save the old fpstate */		     \
3401:	\
341	 set	EINTSTACK-STKB, %l4;		/* Are we on intr stack? */		     \
342	cmp	%sp, %l4;								     \
343	bgu,pt	%xcc, 1f;								     \
344	 set	INTSTACK-STKB, %l4;							     \
345	cmp	%sp, %l4;								     \
346	blu	%xcc, 1f;								     \
3470:											     \
348	 sethi	%hi(_C_LABEL(proc0)), %l4;	/* Yes, use proc0 */			     \
349	ba,pt	%xcc, 2f;			/* XXXX needs to change to CPUs idle proc */ \
350	 or	%l4, %lo(_C_LABEL(proc0)), %l5;						     \
3511:											     \
352	sethi	%hi(CURPROC), %l4;		/* Use curproc */			     \
353	LDPTR	[%l4 + %lo(CURPROC)], %l5;						     \
354	brz,pn	%l5, 0b; nop;			/* If curproc is NULL need to use proc0 */   \
3552:											     \
356	LDPTR	[%l5 + P_FPSTATE], %l6;		/* Save old fpstate */			     \
357	STPTR	%l0, [%l5 + P_FPSTATE];		/* Insert new fpstate */		     \
358	STPTR	%l5, [%l1 + %lo(FPPROC)];	/* Set new fpproc */			     \
359	wr	%g0, FPRS_FEF, %fprs		/* Enable FPU */
360
361/*
362 * Weve saved our possible fpstate, now disable the fpu
363 * and continue with life.
364 */
365#ifdef DEBUG
366#define __CHECK_FPU				\
367	LDPTR	[%l5 + P_FPSTATE], %l7;		\
368	cmp	%l7, %l0;			\
369	tnz	1;
370#else
371#define	__CHECK_FPU
372#endif
373
374#define RESTORE_FPU							     \
375	__CHECK_FPU							     \
376	STPTR	%l2, [%l1 + %lo(FPPROC)];	/* Restore old fproc */	     \
377	wr	%g0, 0, %fprs;			/* Disable fpu */	     \
378	brz,pt	%l3, 1f;			/* Skip if no fpstate */     \
379	 STPTR	%l6, [%l5 + P_FPSTATE];		/* Restore old fpstate */    \
380									     \
381	mov	%l3, %o0;						     \
382	call	_C_LABEL(loadfpstate);		/* Re-load orig fpstate */   \
3831: \
384	 membar	#Sync;				/* Finish all FP ops */
385
386
387
388	.data
389	.globl	_C_LABEL(data_start)
390_C_LABEL(data_start):					! Start of data segment
391#define DATA_START	_C_LABEL(data_start)
392
393/*
394 * When a process exits and its u. area goes away, we set cpcb to point
395 * to this `u.', leaving us with something to use for an interrupt stack,
396 * and letting all the register save code have a pcb_uw to examine.
397 * This is also carefully arranged (to come just before u0, so that
398 * process 0's kernel stack can quietly overrun into it during bootup, if
399 * we feel like doing that).
400 */
401	.globl	_C_LABEL(idle_u)
402_C_LABEL(idle_u):
403	.space	USPACE
404
405/*
406 * Process 0's u.
407 *
408 * This must be aligned on an 8 byte boundary.
409 */
410	.globl	_C_LABEL(u0)
411_C_LABEL(u0):	POINTER	0
412estack0:	POINTER	0
413
414#ifdef KGDB
415/*
416 * Another item that must be aligned, easiest to put it here.
417 */
418KGDB_STACK_SIZE = 2048
419	.globl	_C_LABEL(kgdb_stack)
420_C_LABEL(kgdb_stack):
421	.space	KGDB_STACK_SIZE		! hope this is enough
422#endif
423
424#ifdef DEBUG
425/*
426 * This stack is used when we detect kernel stack corruption.
427 */
428	.space	USPACE
429	.align	16
430panicstack:
431#endif
432
433/*
434 * _cpcb points to the current pcb (and hence u. area).
435 * Initially this is the special one.
436 */
437	.globl	_C_LABEL(cpcb)
438_C_LABEL(cpcb):	POINTER	_C_LABEL(u0)
439
440/*
441 * romp is the prom entry pointer
442 */
443	.globl	romp
444romp:	POINTER	0
445
446
447/* NB:	 Do we really need the following around? */
448/*
449 * _cputyp is the current cpu type, used to distinguish between
450 * the many variations of different sun4* machines. It contains
451 * the value CPU_SUN4, CPU_SUN4C, or CPU_SUN4M.
452 */
453	.globl	_C_LABEL(cputyp)
454_C_LABEL(cputyp):
455	.word	1
456/*
457 * _cpumod is the current cpu model, used to distinguish between variants
458 * in the Sun4 and Sun4M families. See /sys/arch/sparc64/include/param.h
459 * for possible values.
460 */
461	.globl	_C_LABEL(cpumod)
462_C_LABEL(cpumod):
463	.word	1
464/*
465 * _mmumod is the current mmu model, used to distinguish between the
466 * various implementations of the SRMMU in the sun4m family of machines.
467 * See /sys/arch/sparc64/include/param.h for possible values.
468 */
469	.globl	_C_LABEL(mmumod)
470_C_LABEL(mmumod):
471	.word	0
472
473/*
474 * There variables are pointed to by the cpp symbols PGSHIFT, NBPG,
475 * and PGOFSET.
476 */
477	.globl	_C_LABEL(pgshift), _C_LABEL(nbpg), _C_LABEL(pgofset)
478_C_LABEL(pgshift):
479	.word	0
480_C_LABEL(nbpg):
481	.word	0
482_C_LABEL(pgofset):
483	.word	0
484
485	_ALIGN
486
487	.text
488
489/*
490 * The v9 trap frame is stored in the special trap registers.  The
491 * register window is only modified on window overflow, underflow,
492 * and clean window traps, where it points to the register window
493 * needing service.  Traps have space for 8 instructions, except for
494 * the window overflow, underflow, and clean window traps which are
495 * 32 instructions long, large enough to in-line.
496 *
497 * The spitfire CPU (Ultra I) has 4 different sets of global registers.
498 * (blah blah...)
499 *
500 * I used to generate these numbers by address arithmetic, but gas's
501 * expression evaluator has about as much sense as your average slug
502 * (oddly enough, the code looks about as slimy too).  Thus, all the
503 * trap numbers are given as arguments to the trap macros.  This means
504 * there is one line per trap.  Sigh.
505 *
506 * Hardware interrupt vectors can be `linked'---the linkage is to regular
507 * C code---or rewired to fast in-window handlers.  The latter are good
508 * for unbuffered hardware like the Zilog serial chip and the AMD audio
509 * chip, where many interrupts can be handled trivially with pseudo-DMA
510 * or similar.  Only one `fast' interrupt can be used per level, however,
511 * and direct and `fast' interrupts are incompatible.  Routines in intr.c
512 * handle setting these, with optional paranoia.
513 */
514
515/*
516 *	TA8 -- trap align for 8 instruction traps
517 *	TA32 -- trap align for 32 instruction traps
518 */
519#define TA8	.align 32
520#define TA32	.align 128
521
522/*
523 * v9 trap macros:
524 *
525 *	We have a problem with v9 traps; we have no registers to put the
526 *	trap type into.  But we do have a %tt register which already has
527 *	that information.  Trap types in these macros are all dummys.
528 */
529	/* regular vectored traps */
530#ifdef DEBUG
531#ifdef TRAPTRACE
532#define TRACEME		sethi %hi(1f), %g1; ba,pt %icc,traceit;\
533 or %g1, %lo(1f), %g1; 1:
534#if 0
535#define TRACEWIN	sethi %hi(9f), %l6; ba,pt %icc,traceitwin;\
536 or %l6, %lo(9f), %l6; 9:
537#endif /* 0 */
538#ifdef TRAPS_USE_IG
539#define TRACEWIN	wrpr %g0, PSTATE_KERN|PSTATE_AG, %pstate;\
540 sethi %hi(9f), %g1; ba,pt %icc,traceit; or %g1, %lo(9f), %g1; 9:
541#else
542#define TRACEWIN	wrpr %g0, PSTATE_KERN|PSTATE_IG, %pstate;\
543 sethi %hi(9f), %g1; ba,pt %icc,traceit; or %g1, %lo(9f), %g1; 9:
544#endif /* TRAPS_USE_IG */
545#define TRACERELOAD32	ba reload32; nop;
546#define TRACERELOAD64	ba reload64; nop;
547#define TRACEFLT	TRACEME
548#define	VTRAP(type, label) \
549	sethi %hi(label), %g1; ba,pt %icc,traceit;\
550 or %g1, %lo(label), %g1; NOTREACHED; TA8
551#else /* TRAPTRACE */
552#define TRACEME
553#define TRACEWIN	TRACEME
554#define TRACERELOAD32
555#define TRACERELOAD64
556#ifdef FLTRACE
557#define TRACEFLT	sethi %hi(1f), %g1; ba,pt %icc,traceit;\
558 or %g1, %lo(1f), %g1; 1:
559#else
560#define TRACEFLT	TRACEME
561#endif /* FLTRACE */
562#define	VTRAP(type, label) \
563	sethi %hi(DATA_START),%g1; rdpr %tt,%g2; or %g1,0x28,%g1; b label;\
564 stx %g2,[%g1]; NOTREACHED; TA8
565#endif /* TRAPTRACE */
566#else /* DEBUG */
567#ifdef TRAPTRACE
568#define TRACEME		sethi %hi(1f), %g1; ba,pt %icc,traceit;\
569 or %g1, %lo(1f), %g1; 1:
570#if 0
571/* Can't use this 'cause we have no clean registers during a spill */
572#define TRACEWIN	sethi %hi(9f), %l6; ba,pt %icc,traceitwin;\
573 or %l6, %lo(9f), %l6; 9:
574#endif /* 0 */
575#ifdef TRAPS_USE_IG
576#define TRACEWIN	wrpr %g0, PSTATE_KERN|PSTATE_AG, %pstate;\
577 sethi %hi(9f), %g1; ba,pt %icc,traceit; or %g1, %lo(9f), %g1; 9:
578#else
579#define TRACEWIN	wrpr %g0, PSTATE_KERN|PSTATE_IG, %pstate;\
580 sethi %hi(9f), %g1; ba,pt %icc,traceit; or %g1, %lo(9f), %g1; 9:
581#endif /* TRAPS_USE_IG */
582#define TRACERELOAD32	ba reload32; nop;
583#define TRACERELOAD64	ba reload64; nop;
584#define TRACEFLT	TRACEME
585#define	VTRAP(type, label) \
586	sethi %hi(label), %g1; ba,pt %icc,traceit;\
587 or %g1, %lo(label), %g1; NOTREACHED; TA8
588#else /* TRAPTRACE */
589#define TRACEME
590#define TRACEWIN	TRACEME
591#define TRACERELOAD32
592#define TRACERELOAD64
593#ifdef FLTRACE
594#define TRACEFLT	sethi %hi(1f), %g1; ba,pt %icc,traceit;\
595 or %g1, %lo(1f), %g1; 1:
596#else
597#define TRACEFLT	TRACEME
598#endif /* FLTRACE */
599#define	VTRAP(type, label) \
600	ba,a,pt	%icc,label; nop; NOTREACHED; TA8
601#endif /* TRAPTRACE */
602#endif /* DEBUG */
603	/* hardware interrupts (can be linked or made `fast') */
604#define	HARDINT4U(lev) \
605	VTRAP(lev, _C_LABEL(sparc_interrupt))
606
607	/* software interrupts (may not be made direct, sorry---but you
608	   should not be using them trivially anyway) */
609#define	SOFTINT4U(lev, bit) \
610	HARDINT4U(lev)
611
612	/* traps that just call trap() */
613#define	TRAP(type)	VTRAP(type, slowtrap)
614
615	/* architecturally undefined traps (cause panic) */
616#ifndef DEBUG
617#define	UTRAP(type)	sir; VTRAP(type, slowtrap)
618#else
619#define	UTRAP(type)	VTRAP(type, slowtrap)
620#endif
621
622	/* software undefined traps (may be replaced) */
623#define	STRAP(type)	VTRAP(type, slowtrap)
624
625/* breakpoint acts differently under kgdb */
626#ifdef KGDB
627#define	BPT		VTRAP(T_BREAKPOINT, bpt)
628#define	BPT_KGDB_EXEC	VTRAP(T_KGDB_EXEC, bpt)
629#else
630#define	BPT		TRAP(T_BREAKPOINT)
631#define	BPT_KGDB_EXEC	TRAP(T_KGDB_EXEC)
632#endif
633
634#define	SYSCALL		VTRAP(0x100, syscall_setup)
635#ifdef notyet
636#define	ZS_INTERRUPT	ba,a,pt %icc, zshard; nop; TA8
637#else
638#define	ZS_INTERRUPT4U	HARDINT4U(12)
639#endif
640
641
642/*
643 * Macro to clear %tt so we don't get confused with old traps.
644 */
645#ifdef DEBUG
646#define CLRTT	wrpr	%g0,0x1ff,%tt
647#else
648#define CLRTT
649#endif
650/*
651 * Here are some oft repeated traps as macros.
652 */
653
654	/* spill a 64-bit register window */
655#define SPILL64(label,as) \
656	TRACEWIN; \
657label:	\
658	wr	%g0, as, %asi; \
659	stxa	%l0, [%sp+BIAS+0x00]%asi; \
660	stxa	%l1, [%sp+BIAS+0x08]%asi; \
661	stxa	%l2, [%sp+BIAS+0x10]%asi; \
662	stxa	%l3, [%sp+BIAS+0x18]%asi; \
663	stxa	%l4, [%sp+BIAS+0x20]%asi; \
664	stxa	%l5, [%sp+BIAS+0x28]%asi; \
665	stxa	%l6, [%sp+BIAS+0x30]%asi; \
666	\
667	stxa	%l7, [%sp+BIAS+0x38]%asi; \
668	stxa	%i0, [%sp+BIAS+0x40]%asi; \
669	stxa	%i1, [%sp+BIAS+0x48]%asi; \
670	stxa	%i2, [%sp+BIAS+0x50]%asi; \
671	stxa	%i3, [%sp+BIAS+0x58]%asi; \
672	stxa	%i4, [%sp+BIAS+0x60]%asi; \
673	stxa	%i5, [%sp+BIAS+0x68]%asi; \
674	stxa	%i6, [%sp+BIAS+0x70]%asi; \
675	\
676	stxa	%i7, [%sp+BIAS+0x78]%asi; \
677	saved; \
678	CLRTT; \
679	retry; \
680	NOTREACHED; \
681	TA32
682
683	/* spill a 32-bit register window */
684#define SPILL32(label,as) \
685	TRACEWIN; \
686label:	\
687	wr	%g0, as, %asi; \
688	srl	%sp, 0, %sp; /* fixup 32-bit pointers */ \
689	stwa	%l0, [%sp+0x00]%asi; \
690	stwa	%l1, [%sp+0x04]%asi; \
691	stwa	%l2, [%sp+0x08]%asi; \
692	stwa	%l3, [%sp+0x0c]%asi; \
693	stwa	%l4, [%sp+0x10]%asi; \
694	stwa	%l5, [%sp+0x14]%asi; \
695	\
696	stwa	%l6, [%sp+0x18]%asi; \
697	stwa	%l7, [%sp+0x1c]%asi; \
698	stwa	%i0, [%sp+0x20]%asi; \
699	stwa	%i1, [%sp+0x24]%asi; \
700	stwa	%i2, [%sp+0x28]%asi; \
701	stwa	%i3, [%sp+0x2c]%asi; \
702	stwa	%i4, [%sp+0x30]%asi; \
703	stwa	%i5, [%sp+0x34]%asi; \
704	\
705	stwa	%i6, [%sp+0x38]%asi; \
706	stwa	%i7, [%sp+0x3c]%asi; \
707	saved; \
708	CLRTT; \
709	retry; \
710	NOTREACHED; \
711	TA32
712
713	/* Spill either 32-bit or 64-bit register window. */
714#define SPILLBOTH(label64,label32,as) \
715	TRACEWIN; \
716	andcc	%sp, 1, %g0; \
717	bnz,pt	%xcc, label64+4;	/* Is it a v9 or v8 stack? */ \
718	 wr	%g0, as, %asi; \
719	ba,pt	%xcc, label32+8; \
720	 srl	%sp, 0, %sp; /* fixup 32-bit pointers */ \
721	NOTREACHED; \
722	TA32
723
724	/* fill a 64-bit register window */
725#define FILL64(label,as) \
726	TRACEWIN; \
727label: \
728	wr	%g0, as, %asi; \
729	ldxa	[%sp+BIAS+0x00]%asi, %l0; \
730	ldxa	[%sp+BIAS+0x08]%asi, %l1; \
731	ldxa	[%sp+BIAS+0x10]%asi, %l2; \
732	ldxa	[%sp+BIAS+0x18]%asi, %l3; \
733	ldxa	[%sp+BIAS+0x20]%asi, %l4; \
734	ldxa	[%sp+BIAS+0x28]%asi, %l5; \
735	ldxa	[%sp+BIAS+0x30]%asi, %l6; \
736	\
737	ldxa	[%sp+BIAS+0x38]%asi, %l7; \
738	ldxa	[%sp+BIAS+0x40]%asi, %i0; \
739	ldxa	[%sp+BIAS+0x48]%asi, %i1; \
740	ldxa	[%sp+BIAS+0x50]%asi, %i2; \
741	ldxa	[%sp+BIAS+0x58]%asi, %i3; \
742	ldxa	[%sp+BIAS+0x60]%asi, %i4; \
743	ldxa	[%sp+BIAS+0x68]%asi, %i5; \
744	ldxa	[%sp+BIAS+0x70]%asi, %i6; \
745	\
746	ldxa	[%sp+BIAS+0x78]%asi, %i7; \
747	restored; \
748	CLRTT; \
749	retry; \
750	NOTREACHED; \
751	TA32
752
753	/* fill a 32-bit register window */
754#define FILL32(label,as) \
755	TRACEWIN; \
756label:	\
757	wr	%g0, as, %asi; \
758	srl	%sp, 0, %sp; /* fixup 32-bit pointers */ \
759	lda	[%sp+0x00]%asi, %l0; \
760	lda	[%sp+0x04]%asi, %l1; \
761	lda	[%sp+0x08]%asi, %l2; \
762	lda	[%sp+0x0c]%asi, %l3; \
763	lda	[%sp+0x10]%asi, %l4; \
764	lda	[%sp+0x14]%asi, %l5; \
765	\
766	lda	[%sp+0x18]%asi, %l6; \
767	lda	[%sp+0x1c]%asi, %l7; \
768	lda	[%sp+0x20]%asi, %i0; \
769	lda	[%sp+0x24]%asi, %i1; \
770	lda	[%sp+0x28]%asi, %i2; \
771	lda	[%sp+0x2c]%asi, %i3; \
772	lda	[%sp+0x30]%asi, %i4; \
773	lda	[%sp+0x34]%asi, %i5; \
774	\
775	lda	[%sp+0x38]%asi, %i6; \
776	lda	[%sp+0x3c]%asi, %i7; \
777	restored; \
778	CLRTT; \
779	retry; \
780	NOTREACHED; \
781	TA32
782
783	/* fill either 32-bit or 64-bit register window. */
784#define FILLBOTH(label64,label32,as) \
785	TRACEWIN; \
786	andcc	%sp, 1, %i0; \
787	bnz	(label64)+4; /* See if it's a v9 stack or v8 */ \
788	 wr	%g0, as, %asi; \
789	ba	(label32)+8; \
790	 srl	%sp, 0, %sp; /* fixup 32-bit pointers */ \
791	NOTREACHED; \
792	TA32
793
794	.globl	start, _C_LABEL(kernel_text)
795	_C_LABEL(kernel_text) = start		! for kvm_mkdb(8)
796start:
797	/* Traps from TL=0 -- traps from user mode */
798#define TABLE	user_
799	.globl	_C_LABEL(trapbase)
800_C_LABEL(trapbase):
801	b dostart; nop; TA8	! 000 = reserved -- Use it to boot
802	/* We should not get the next 5 traps */
803	UTRAP(0x001)		! 001 = POR Reset -- ROM should get this
804	UTRAP(0x002)		! 002 = WDR -- ROM should get this
805	UTRAP(0x003)		! 003 = XIR -- ROM should get this
806	UTRAP(0x004)		! 004 = SIR -- ROM should get this
807	UTRAP(0x005)		! 005 = RED state exception
808	UTRAP(0x006); UTRAP(0x007)
809	VTRAP(T_INST_EXCEPT, textfault)	! 008 = instr. access exept
810	VTRAP(T_TEXTFAULT, textfault)	! 009 = instr access MMU miss
811	VTRAP(T_INST_ERROR, textfault)	! 00a = instr. access err
812	UTRAP(0x00b); UTRAP(0x00c); UTRAP(0x00d); UTRAP(0x00e); UTRAP(0x00f)
813	TRAP(T_ILLINST)			! 010 = illegal instruction
814	TRAP(T_PRIVINST)		! 011 = privileged instruction
815	UTRAP(0x012)			! 012 = unimplemented LDD
816	UTRAP(0x013)			! 013 = unimplemented STD
817	UTRAP(0x014); UTRAP(0x015); UTRAP(0x016); UTRAP(0x017); UTRAP(0x018)
818	UTRAP(0x019); UTRAP(0x01a); UTRAP(0x01b); UTRAP(0x01c); UTRAP(0x01d)
819	UTRAP(0x01e); UTRAP(0x01f)
820	TRAP(T_FPDISABLED)		! 020 = fp instr, but EF bit off in psr
821	VTRAP(T_FP_IEEE_754, fp_exception)		! 021 = ieee 754 exception
822	VTRAP(T_FP_OTHER, fp_exception)		! 022 = other fp exception
823	TRAP(T_TAGOF)			! 023 = tag overflow
824	TRACEWIN			! DEBUG -- 4 insns
825	rdpr %cleanwin, %o7		! 024-027 = clean window trap
826	inc %o7				!	This handler is in-lined and cannot fault
827#ifdef DEBUG
828	set	0xbadcafe, %l0		! DEBUG -- compiler should not rely on zero-ed registers.
829#else
830	clr	%l0
831#endif
832	wrpr %g0, %o7, %cleanwin	!       Nucleus (trap&IRQ) code does not need clean windows
833
834	mov %l0,%l1; mov %l0,%l2	!	Clear out %l0-%l8 and %o0-%o8 and inc %cleanwin and done
835	mov %l0,%l3; mov %l0,%l4
836#if 0
837#ifdef DIAGNOSTIC
838	!!
839	!! Check the sp redzone
840	!!
841	!! Since we can't spill the current window, we'll just keep
842	!! track of the frame pointer.  Problems occur when the routine
843	!! allocates and uses stack storage.
844	!!
845!	rdpr	%wstate, %l5	! User stack?
846!	cmp	%l5, WSTATE_KERN
847!	bne,pt	%icc, 7f
848	 sethi	%hi(CPCB), %l5
849	LDPTR	[%l5 + %lo(CPCB)], %l5	! If pcb < fp < pcb+sizeof(pcb)
850	inc	PCB_SIZE, %l5		! then we have a stack overflow
851	btst	%fp, 1			! 64-bit stack?
852	sub	%fp, %l5, %l7
853	bnz,a,pt	%icc, 1f
854	 inc	BIAS, %l7		! Remove BIAS
8551:
856	cmp	%l7, PCB_SIZE
857	blu	%xcc, cleanwin_overflow
858#endif
859#endif
860	mov %l0, %l5
861	mov %l0, %l6; mov %l0, %l7; mov %l0, %o0; mov %l0, %o1
862
863	mov %l0, %o2; mov %l0, %o3; mov %l0, %o4; mov %l0, %o5;
864	mov %l0, %o6; mov %l0, %o7
865	CLRTT
866	retry; nop; NOTREACHED; TA32
867	TRAP(T_DIV0)			! 028 = divide by zero
868	UTRAP(0x029)			! 029 = internal processor error
869	UTRAP(0x02a); UTRAP(0x02b); UTRAP(0x02c); UTRAP(0x02d); UTRAP(0x02e); UTRAP(0x02f)
870	VTRAP(T_DATAFAULT, winfault)	! 030 = data fetch fault
871	UTRAP(0x031)			! 031 = data MMU miss -- no MMU
872	VTRAP(T_DATA_ERROR, winfault)	! 032 = data access error
873	VTRAP(T_DATA_PROT, winfault)	! 033 = data protection fault
874	TRAP(T_ALIGN)			! 034 = address alignment error -- we could fix it inline...
875	TRAP(T_LDDF_ALIGN)		! 035 = LDDF address alignment error -- we could fix it inline...
876	TRAP(T_STDF_ALIGN)		! 036 = STDF address alignment error -- we could fix it inline...
877	TRAP(T_PRIVACT)			! 037 = privileged action
878	UTRAP(0x038); UTRAP(0x039); UTRAP(0x03a); UTRAP(0x03b); UTRAP(0x03c);
879	UTRAP(0x03d); UTRAP(0x03e); UTRAP(0x03f);
880	VTRAP(T_ASYNC_ERROR, winfault)	! 040 = data fetch fault
881	SOFTINT4U(1, IE_L1)		! 041 = level 1 interrupt
882	HARDINT4U(2)			! 042 = level 2 interrupt
883	HARDINT4U(3)			! 043 = level 3 interrupt
884	SOFTINT4U(4, IE_L4)		! 044 = level 4 interrupt
885	HARDINT4U(5)			! 045 = level 5 interrupt
886	SOFTINT4U(6, IE_L6)		! 046 = level 6 interrupt
887	HARDINT4U(7)			! 047 = level 7 interrupt
888	HARDINT4U(8)			! 048 = level 8 interrupt
889	HARDINT4U(9)			! 049 = level 9 interrupt
890	HARDINT4U(10)			! 04a = level 10 interrupt
891	HARDINT4U(11)			! 04b = level 11 interrupt
892	ZS_INTERRUPT4U			! 04c = level 12 (zs) interrupt
893	HARDINT4U(13)			! 04d = level 13 interrupt
894	HARDINT4U(14)			! 04e = level 14 interrupt
895	HARDINT4U(15)			! 04f = nonmaskable interrupt
896	UTRAP(0x050); UTRAP(0x051); UTRAP(0x052); UTRAP(0x053); UTRAP(0x054); UTRAP(0x055)
897	UTRAP(0x056); UTRAP(0x057); UTRAP(0x058); UTRAP(0x059); UTRAP(0x05a); UTRAP(0x05b)
898	UTRAP(0x05c); UTRAP(0x05d); UTRAP(0x05e); UTRAP(0x05f)
899	VTRAP(0x060, interrupt_vector); ! 060 = interrupt vector
900	TRAP(T_PA_WATCHPT)		! 061 = physical address data watchpoint
901	TRAP(T_VA_WATCHPT)		! 062 = virtual address data watchpoint
902	UTRAP(T_ECCERR)			! We'll implement this one later
903ufast_IMMU_miss:			! 064 = fast instr access MMU miss
904	TRACEFLT			! DEBUG
905	ldxa	[%g0] ASI_IMMU_8KPTR, %g2	! Load IMMU 8K TSB pointer
906#ifdef NO_TSB
907	ba,a	%icc, instr_miss;
908#endif
909	ldxa	[%g0] ASI_IMMU, %g1	!	Load IMMU tag target register
910	ldda	[%g2] ASI_NUCLEUS_QUAD_LDD, %g4	!Load TSB tag:data into %g4:%g5
911	brgez,pn %g5, instr_miss	!	Entry invalid?  Punt
912	 cmp	%g1, %g4		!	Compare TLB tags
913	bne,pn %xcc, instr_miss		!	Got right tag?
914	 nop
915	CLRTT
916	stxa	%g5, [%g0] ASI_IMMU_DATA_IN!	Enter new mapping
917	retry				!	Try new mapping
9181:
919	sir
920	TA32
921ufast_DMMU_miss:			! 068 = fast data access MMU miss
922	TRACEFLT			! DEBUG
923	ldxa	[%g0] ASI_DMMU_8KPTR, %g2!					Load DMMU 8K TSB pointer
924#ifdef NO_TSB
925	ba,a	%icc, data_miss;
926#endif
927	ldxa	[%g0] ASI_DMMU, %g1	! Hard coded for unified 8K TSB		Load DMMU tag target register
928	ldda	[%g2] ASI_NUCLEUS_QUAD_LDD, %g4	!				Load TSB tag and data into %g4 and %g5
929	brgez,pn %g5, data_miss		!					Entry invalid?  Punt
930	 cmp	%g1, %g4		!					Compare TLB tags
931	bnz,pn	%xcc, data_miss		!					Got right tag?
932	 nop
933	CLRTT
934#ifdef TRAPSTATS
935	sethi	%hi(_C_LABEL(udhit)), %g1
936	lduw	[%g1+%lo(_C_LABEL(udhit))], %g2
937	inc	%g2
938	stw	%g2, [%g1+%lo(_C_LABEL(udhit))]
939#endif
940	stxa	%g5, [%g0] ASI_DMMU_DATA_IN!					Enter new mapping
941	retry				!					Try new mapping
9421:
943	sir
944	TA32
945ufast_DMMU_protection:			! 06c = fast data access MMU protection
946	TRACEFLT			! DEBUG -- we're perilously close to 32 insns
947#ifdef TRAPSTATS
948	sethi	%hi(_C_LABEL(udprot)), %g1
949	lduw	[%g1+%lo(_C_LABEL(udprot))], %g2
950	inc	%g2
951	stw	%g2, [%g1+%lo(_C_LABEL(udprot))]
952#endif
953#ifdef HWREF
954	ba,a,pt	%xcc, dmmu_write_fault
955#else
956	ba,a,pt	%xcc, winfault
957#endif
958	nop
959	TA32
960	UTRAP(0x070)			! Implementation dependent traps
961	UTRAP(0x071); UTRAP(0x072); UTRAP(0x073); UTRAP(0x074); UTRAP(0x075); UTRAP(0x076)
962	UTRAP(0x077); UTRAP(0x078); UTRAP(0x079); UTRAP(0x07a); UTRAP(0x07b); UTRAP(0x07c)
963	UTRAP(0x07d); UTRAP(0x07e); UTRAP(0x07f)
964TABLE/**/uspill:
965	SPILL64(uspill8,ASI_AIUS)	! 0x080 spill_0_normal -- used to save user windows in user mode
966	SPILL32(uspill4,ASI_AIUS)	! 0x084 spill_1_normal
967	SPILLBOTH(uspill8,uspill4,ASI_AIUS)		! 0x088 spill_2_normal
968#ifdef DEBUG
969	sir
970#endif
971	UTRAP(0x08c); TA32	! 0x08c spill_3_normal
972TABLE/**/kspill:
973	SPILL64(kspill8,ASI_N)	! 0x090 spill_4_normal -- used to save supervisor windows
974	SPILL32(kspill4,ASI_N)	! 0x094 spill_5_normal
975	SPILLBOTH(kspill8,kspill4,ASI_N)	! 0x098 spill_6_normal
976	UTRAP(0x09c); TA32	! 0x09c spill_7_normal
977TABLE/**/uspillk:
978	SPILL64(uspillk8,ASI_AIUS)	! 0x0a0 spill_0_other -- used to save user windows in supervisor mode
979	SPILL32(uspillk4,ASI_AIUS)	! 0x0a4 spill_1_other
980	SPILLBOTH(uspillk8,uspillk4,ASI_AIUS)	! 0x0a8 spill_2_other
981	UTRAP(0x0ac); TA32	! 0x0ac spill_3_other
982	UTRAP(0x0b0); TA32	! 0x0b0 spill_4_other
983	UTRAP(0x0b4); TA32	! 0x0b4 spill_5_other
984	UTRAP(0x0b8); TA32	! 0x0b8 spill_6_other
985	UTRAP(0x0bc); TA32	! 0x0bc spill_7_other
986TABLE/**/ufill:
987	FILL64(ufill8,ASI_AIUS) ! 0x0c0 fill_0_normal -- used to fill windows when running user mode
988	FILL32(ufill4,ASI_AIUS)	! 0x0c4 fill_1_normal
989	FILLBOTH(ufill8,ufill4,ASI_AIUS)	! 0x0c8 fill_2_normal
990	UTRAP(0x0cc); TA32	! 0x0cc fill_3_normal
991TABLE/**/kfill:
992	FILL64(kfill8,ASI_N)	! 0x0d0 fill_4_normal -- used to fill windows when running supervisor mode
993	FILL32(kfill4,ASI_N)	! 0x0d4 fill_5_normal
994	FILLBOTH(kfill8,kfill4,ASI_N)	! 0x0d8 fill_6_normal
995	UTRAP(0x0dc); TA32	! 0x0dc fill_7_normal
996TABLE/**/ufillk:
997	FILL64(ufillk8,ASI_AIUS)	! 0x0e0 fill_0_other
998	FILL32(ufillk4,ASI_AIUS)	! 0x0e4 fill_1_other
999	FILLBOTH(ufillk8,ufillk4,ASI_AIUS)	! 0x0e8 fill_2_other
1000	UTRAP(0x0ec); TA32	! 0x0ec fill_3_other
1001	UTRAP(0x0f0); TA32	! 0x0f0 fill_4_other
1002	UTRAP(0x0f4); TA32	! 0x0f4 fill_5_other
1003	UTRAP(0x0f8); TA32	! 0x0f8 fill_6_other
1004	UTRAP(0x0fc); TA32	! 0x0fc fill_7_other
1005TABLE/**/syscall:
1006	SYSCALL			! 0x100 = sun syscall
1007	BPT			! 0x101 = pseudo breakpoint instruction
1008	STRAP(0x102); STRAP(0x103); STRAP(0x104); STRAP(0x105); STRAP(0x106); STRAP(0x107)
1009	SYSCALL			! 0x108 = svr4 syscall
1010	SYSCALL			! 0x109 = bsd syscall
1011	BPT_KGDB_EXEC		! 0x10a = enter kernel gdb on kernel startup
1012	STRAP(0x10b); STRAP(0x10c); STRAP(0x10d); STRAP(0x10e); STRAP(0x10f);
1013	STRAP(0x110); STRAP(0x111); STRAP(0x112); STRAP(0x113); STRAP(0x114); STRAP(0x115); STRAP(0x116); STRAP(0x117)
1014	STRAP(0x118); STRAP(0x119); STRAP(0x11a); STRAP(0x11b); STRAP(0x11c); STRAP(0x11d); STRAP(0x11e); STRAP(0x11f)
1015	STRAP(0x120); STRAP(0x121); STRAP(0x122); STRAP(0x123); STRAP(0x124); STRAP(0x125); STRAP(0x126); STRAP(0x127)
1016	STRAP(0x128); STRAP(0x129); STRAP(0x12a); STRAP(0x12b); STRAP(0x12c); STRAP(0x12d); STRAP(0x12e); STRAP(0x12f)
1017	STRAP(0x130); STRAP(0x131); STRAP(0x132); STRAP(0x133); STRAP(0x134); STRAP(0x135); STRAP(0x136); STRAP(0x137)
1018	STRAP(0x138); STRAP(0x139); STRAP(0x13a); STRAP(0x13b); STRAP(0x13c); STRAP(0x13d); STRAP(0x13e); STRAP(0x13f)
1019	SYSCALL			! 0x140 SVID syscall (Solaris 2.7)
1020	SYSCALL			! 0x141 SPARC International syscall
1021	SYSCALL			! 0x142	OS Vendor syscall
1022	SYSCALL			! 0x143 HW OEM syscall
1023	STRAP(0x144); STRAP(0x145); STRAP(0x146); STRAP(0x147)
1024	STRAP(0x148); STRAP(0x149); STRAP(0x14a); STRAP(0x14b); STRAP(0x14c); STRAP(0x14d); STRAP(0x14e); STRAP(0x14f)
1025	STRAP(0x150); STRAP(0x151); STRAP(0x152); STRAP(0x153); STRAP(0x154); STRAP(0x155); STRAP(0x156); STRAP(0x157)
1026	STRAP(0x158); STRAP(0x159); STRAP(0x15a); STRAP(0x15b); STRAP(0x15c); STRAP(0x15d); STRAP(0x15e); STRAP(0x15f)
1027	STRAP(0x160); STRAP(0x161); STRAP(0x162); STRAP(0x163); STRAP(0x164); STRAP(0x165); STRAP(0x166); STRAP(0x167)
1028	STRAP(0x168); STRAP(0x169); STRAP(0x16a); STRAP(0x16b); STRAP(0x16c); STRAP(0x16d); STRAP(0x16e); STRAP(0x16f)
1029	STRAP(0x170); STRAP(0x171); STRAP(0x172); STRAP(0x173); STRAP(0x174); STRAP(0x175); STRAP(0x176); STRAP(0x177)
1030	STRAP(0x178); STRAP(0x179); STRAP(0x17a); STRAP(0x17b); STRAP(0x17c); STRAP(0x17d); STRAP(0x17e); STRAP(0x17f)
1031	! Traps beyond 0x17f are reserved
1032	UTRAP(0x180); UTRAP(0x181); UTRAP(0x182); UTRAP(0x183); UTRAP(0x184); UTRAP(0x185); UTRAP(0x186); UTRAP(0x187)
1033	UTRAP(0x188); UTRAP(0x189); UTRAP(0x18a); UTRAP(0x18b); UTRAP(0x18c); UTRAP(0x18d); UTRAP(0x18e); UTRAP(0x18f)
1034	UTRAP(0x190); UTRAP(0x191); UTRAP(0x192); UTRAP(0x193); UTRAP(0x194); UTRAP(0x195); UTRAP(0x196); UTRAP(0x197)
1035	UTRAP(0x198); UTRAP(0x199); UTRAP(0x19a); UTRAP(0x19b); UTRAP(0x19c); UTRAP(0x19d); UTRAP(0x19e); UTRAP(0x19f)
1036	UTRAP(0x1a0); UTRAP(0x1a1); UTRAP(0x1a2); UTRAP(0x1a3); UTRAP(0x1a4); UTRAP(0x1a5); UTRAP(0x1a6); UTRAP(0x1a7)
1037	UTRAP(0x1a8); UTRAP(0x1a9); UTRAP(0x1aa); UTRAP(0x1ab); UTRAP(0x1ac); UTRAP(0x1ad); UTRAP(0x1ae); UTRAP(0x1af)
1038	UTRAP(0x1b0); UTRAP(0x1b1); UTRAP(0x1b2); UTRAP(0x1b3); UTRAP(0x1b4); UTRAP(0x1b5); UTRAP(0x1b6); UTRAP(0x1b7)
1039	UTRAP(0x1b8); UTRAP(0x1b9); UTRAP(0x1ba); UTRAP(0x1bb); UTRAP(0x1bc); UTRAP(0x1bd); UTRAP(0x1be); UTRAP(0x1bf)
1040	UTRAP(0x1c0); UTRAP(0x1c1); UTRAP(0x1c2); UTRAP(0x1c3); UTRAP(0x1c4); UTRAP(0x1c5); UTRAP(0x1c6); UTRAP(0x1c7)
1041	UTRAP(0x1c8); UTRAP(0x1c9); UTRAP(0x1ca); UTRAP(0x1cb); UTRAP(0x1cc); UTRAP(0x1cd); UTRAP(0x1ce); UTRAP(0x1cf)
1042	UTRAP(0x1d0); UTRAP(0x1d1); UTRAP(0x1d2); UTRAP(0x1d3); UTRAP(0x1d4); UTRAP(0x1d5); UTRAP(0x1d6); UTRAP(0x1d7)
1043	UTRAP(0x1d8); UTRAP(0x1d9); UTRAP(0x1da); UTRAP(0x1db); UTRAP(0x1dc); UTRAP(0x1dd); UTRAP(0x1de); UTRAP(0x1df)
1044	UTRAP(0x1e0); UTRAP(0x1e1); UTRAP(0x1e2); UTRAP(0x1e3); UTRAP(0x1e4); UTRAP(0x1e5); UTRAP(0x1e6); UTRAP(0x1e7)
1045	UTRAP(0x1e8); UTRAP(0x1e9); UTRAP(0x1ea); UTRAP(0x1eb); UTRAP(0x1ec); UTRAP(0x1ed); UTRAP(0x1ee); UTRAP(0x1ef)
1046	UTRAP(0x1f0); UTRAP(0x1f1); UTRAP(0x1f2); UTRAP(0x1f3); UTRAP(0x1f4); UTRAP(0x1f5); UTRAP(0x1f6); UTRAP(0x1f7)
1047	UTRAP(0x1f8); UTRAP(0x1f9); UTRAP(0x1fa); UTRAP(0x1fb); UTRAP(0x1fc); UTRAP(0x1fd); UTRAP(0x1fe); UTRAP(0x1ff)
1048
1049	/* Traps from TL>0 -- traps from supervisor mode */
1050#undef TABLE
1051#define TABLE	nucleus_
1052trapbase_priv:
1053	UTRAP(0x000)		! 000 = reserved -- Use it to boot
1054	/* We should not get the next 5 traps */
1055	UTRAP(0x001)		! 001 = POR Reset -- ROM should get this
1056	UTRAP(0x002)		! 002 = WDR Watchdog -- ROM should get this
1057	UTRAP(0x003)		! 003 = XIR -- ROM should get this
1058	UTRAP(0x004)		! 004 = SIR -- ROM should get this
1059	UTRAP(0x005)		! 005 = RED state exception
1060	UTRAP(0x006); UTRAP(0x007)
1061ktextfault:
1062	VTRAP(T_INST_EXCEPT, textfault)	! 008 = instr. access exept
1063	VTRAP(T_TEXTFAULT, textfault)	! 009 = instr access MMU miss -- no MMU
1064	VTRAP(T_INST_ERROR, textfault)	! 00a = instr. access err
1065	UTRAP(0x00b); UTRAP(0x00c); UTRAP(0x00d); UTRAP(0x00e); UTRAP(0x00f)
1066	TRAP(T_ILLINST)			! 010 = illegal instruction
1067	TRAP(T_PRIVINST)		! 011 = privileged instruction
1068	UTRAP(0x012)			! 012 = unimplemented LDD
1069	UTRAP(0x013)			! 013 = unimplemented STD
1070	UTRAP(0x014); UTRAP(0x015); UTRAP(0x016); UTRAP(0x017); UTRAP(0x018)
1071	UTRAP(0x019); UTRAP(0x01a); UTRAP(0x01b); UTRAP(0x01c); UTRAP(0x01d)
1072	UTRAP(0x01e); UTRAP(0x01f)
1073	TRAP(T_FPDISABLED)		! 020 = fp instr, but EF bit off in psr
1074	VTRAP(T_FP_IEEE_754, fp_exception)		! 021 = ieee 754 exception
1075	VTRAP(T_FP_OTHER, fp_exception)		! 022 = other fp exception
1076	TRAP(T_TAGOF)			! 023 = tag overflow
1077	TRACEWIN			! DEBUG
1078	clr	%l0
1079#ifdef DEBUG
1080	set	0xbadbeef, %l0		! DEBUG
1081#endif
1082	mov %l0, %l1; mov %l0, %l2	! 024-027 = clean window trap
1083	rdpr %cleanwin, %o7		!	This handler is in-lined and cannot fault
1084	inc %o7; mov %l0, %l3	!       Nucleus (trap&IRQ) code does not need clean windows
1085	wrpr %g0, %o7, %cleanwin	!	Clear out %l0-%l8 and %o0-%o8 and inc %cleanwin and done
1086#ifdef NOT_DEBUG
1087	!!
1088	!! Check the sp redzone
1089	!!
1090	rdpr	%wstate, t1
1091	cmp	t1, WSTATE_KERN
1092	bne,pt	icc, 7f
1093	 sethi	%hi(_C_LABEL(redzone)), t1
1094	ldx	[t1 + %lo(_C_LABEL(redzone))], t2
1095	cmp	%sp, t2			! if sp >= t2, not in red zone
1096	blu	panic_red		! and can continue normally
10977:
1098#endif
1099	mov %l0, %l4; mov %l0, %l5; mov %l0, %l6; mov %l0, %l7
1100	mov %l0, %o0; mov %l0, %o1; mov %l0, %o2; mov %l0, %o3
1101
1102	mov %l0, %o4; mov %l0, %o5; mov %l0, %o6; mov %l0, %o7
1103	CLRTT
1104	retry; nop; TA32
1105	TRAP(T_DIV0)			! 028 = divide by zero
1106	UTRAP(0x029)			! 029 = internal processor error
1107	UTRAP(0x02a); UTRAP(0x02b); UTRAP(0x02c); UTRAP(0x02d); UTRAP(0x02e); UTRAP(0x02f)
1108kdatafault:
1109	VTRAP(T_DATAFAULT, winfault)	! 030 = data fetch fault
1110	UTRAP(0x031)			! 031 = data MMU miss -- no MMU
1111	VTRAP(T_DATA_ERROR, winfault)	! 032 = data fetch fault
1112	VTRAP(T_DATA_PROT, winfault)	! 033 = data fetch fault
1113	VTRAP(T_ALIGN, checkalign)	! 034 = address alignment error -- we could fix it inline...
1114	TRAP(T_LDDF_ALIGN)		! 035 = LDDF address alignment error -- we could fix it inline...
1115	TRAP(T_STDF_ALIGN)		! 036 = STDF address alignment error -- we could fix it inline...
1116	TRAP(T_PRIVACT)			! 037 = privileged action
1117	UTRAP(0x038); UTRAP(0x039); UTRAP(0x03a); UTRAP(0x03b); UTRAP(0x03c);
1118	UTRAP(0x03d); UTRAP(0x03e); UTRAP(0x03f);
1119	VTRAP(T_ASYNC_ERROR, winfault)	! 040 = data fetch fault
1120	SOFTINT4U(1, IE_L1)		! 041 = level 1 interrupt
1121	HARDINT4U(2)			! 042 = level 2 interrupt
1122	HARDINT4U(3)			! 043 = level 3 interrupt
1123	SOFTINT4U(4, IE_L4)		! 044 = level 4 interrupt
1124	HARDINT4U(5)			! 045 = level 5 interrupt
1125	SOFTINT4U(6, IE_L6)		! 046 = level 6 interrupt
1126	HARDINT4U(7)			! 047 = level 7 interrupt
1127	HARDINT4U(8)			! 048 = level 8 interrupt
1128	HARDINT4U(9)			! 049 = level 9 interrupt
1129	HARDINT4U(10)			! 04a = level 10 interrupt
1130	HARDINT4U(11)			! 04b = level 11 interrupt
1131	ZS_INTERRUPT4U			! 04c = level 12 (zs) interrupt
1132	HARDINT4U(13)			! 04d = level 13 interrupt
1133	HARDINT4U(14)			! 04e = level 14 interrupt
1134	HARDINT4U(15)			! 04f = nonmaskable interrupt
1135	UTRAP(0x050); UTRAP(0x051); UTRAP(0x052); UTRAP(0x053); UTRAP(0x054); UTRAP(0x055)
1136	UTRAP(0x056); UTRAP(0x057); UTRAP(0x058); UTRAP(0x059); UTRAP(0x05a); UTRAP(0x05b)
1137	UTRAP(0x05c); UTRAP(0x05d); UTRAP(0x05e); UTRAP(0x05f)
1138	VTRAP(0x060, interrupt_vector); ! 060 = interrupt vector
1139	TRAP(T_PA_WATCHPT)		! 061 = physical address data watchpoint
1140	TRAP(T_VA_WATCHPT)		! 062 = virtual address data watchpoint
1141	UTRAP(T_ECCERR)			! We'll implement this one later
1142kfast_IMMU_miss:			! 064 = fast instr access MMU miss
1143	TRACEFLT			! DEBUG
1144	ldxa	[%g0] ASI_IMMU_8KPTR, %g2	! Load IMMU 8K TSB pointer
1145#ifdef NO_TSB
1146	ba,a	%icc, instr_miss;
1147#endif
1148	ldxa	[%g0] ASI_IMMU, %g1	!	Load IMMU tag target register
1149	ldda	[%g2] ASI_NUCLEUS_QUAD_LDD, %g4	!Load TSB tag:data into %g4:%g5
1150	brgez,pn %g5, instr_miss	!	Entry invalid?  Punt
1151	 cmp	%g1, %g4		!	Compare TLB tags
1152	bne,pn %xcc, instr_miss		!	Got right tag?
1153	 nop
1154	CLRTT
1155	stxa	%g5, [%g0] ASI_IMMU_DATA_IN!	Enter new mapping
1156	retry				!	Try new mapping
11571:
1158	sir
1159	TA32
1160kfast_DMMU_miss:			! 068 = fast data access MMU miss
1161	TRACEFLT			! DEBUG
1162	ldxa	[%g0] ASI_DMMU_8KPTR, %g2!					Load DMMU 8K TSB pointer
1163#ifdef NO_TSB
1164	ba,a	%icc, data_miss;
1165#endif
1166	ldxa	[%g0] ASI_DMMU, %g1	! Hard coded for unified 8K TSB		Load DMMU tag target register
1167	ldda	[%g2] ASI_NUCLEUS_QUAD_LDD, %g4	!				Load TSB tag and data into %g4 and %g5
1168	brgez,pn %g5, data_miss		!					Entry invalid?  Punt
1169	 cmp	%g1, %g4		!					Compare TLB tags
1170	bnz,pn	%xcc, data_miss		!					Got right tag?
1171	 nop
1172	CLRTT
1173#ifdef TRAPSTATS
1174	sethi	%hi(_C_LABEL(kdhit)), %g1
1175	lduw	[%g1+%lo(_C_LABEL(kdhit))], %g2
1176	inc	%g2
1177	stw	%g2, [%g1+%lo(_C_LABEL(kdhit))]
1178#endif
1179	stxa	%g5, [%g0] ASI_DMMU_DATA_IN!					Enter new mapping
1180	retry				!					Try new mapping
11811:
1182	sir
1183	TA32
1184kfast_DMMU_protection:			! 06c = fast data access MMU protection
1185	TRACEFLT			! DEBUG
1186#ifdef TRAPSTATS
1187	sethi	%hi(_C_LABEL(kdprot)), %g1
1188	lduw	[%g1+%lo(_C_LABEL(kdprot))], %g2
1189	inc	%g2
1190	stw	%g2, [%g1+%lo(_C_LABEL(kdprot))]
1191#endif
1192#ifdef HWREF
1193	ba,a,pt	%xcc, dmmu_write_fault
1194#else
1195	ba,a,pt	%xcc, winfault
1196#endif
1197	nop
1198	TA32
1199	UTRAP(0x070)			! Implementation dependent traps
1200	UTRAP(0x071); UTRAP(0x072); UTRAP(0x073); UTRAP(0x074); UTRAP(0x075); UTRAP(0x076)
1201	UTRAP(0x077); UTRAP(0x078); UTRAP(0x079); UTRAP(0x07a); UTRAP(0x07b); UTRAP(0x07c)
1202	UTRAP(0x07d); UTRAP(0x07e); UTRAP(0x07f)
1203TABLE/**/uspill:
1204	SPILL64(1,ASI_AIUS)	! 0x080 spill_0_normal -- used to save user windows
1205	SPILL32(2,ASI_AIUS)	! 0x084 spill_1_normal
1206	SPILLBOTH(1b,2b,ASI_AIUS)	! 0x088 spill_2_normal
1207	UTRAP(0x08c); TA32	! 0x08c spill_3_normal
1208TABLE/**/kspill:
1209	SPILL64(1,ASI_N)	! 0x090 spill_4_normal -- used to save supervisor windows
1210	SPILL32(2,ASI_N)	! 0x094 spill_5_normal
1211	SPILLBOTH(1b,2b,ASI_N)	! 0x098 spill_6_normal
1212	UTRAP(0x09c); TA32	! 0x09c spill_7_normal
1213TABLE/**/uspillk:
1214	SPILL64(1,ASI_AIUS)	! 0x0a0 spill_0_other -- used to save user windows in nucleus mode
1215	SPILL32(2,ASI_AIUS)	! 0x0a4 spill_1_other
1216	SPILLBOTH(1b,2b,ASI_AIUS)	! 0x0a8 spill_2_other
1217	UTRAP(0x0ac); TA32	! 0x0ac spill_3_other
1218	UTRAP(0x0b0); TA32	! 0x0b0 spill_4_other
1219	UTRAP(0x0b4); TA32	! 0x0b4 spill_5_other
1220	UTRAP(0x0b8); TA32	! 0x0b8 spill_6_other
1221	UTRAP(0x0bc); TA32	! 0x0bc spill_7_other
1222TABLE/**/ufill:
1223	FILL64(1,ASI_AIUS)	! 0x0c0 fill_0_normal -- used to fill windows when running nucleus mode from user
1224	FILL32(2,ASI_AIUS)	! 0x0c4 fill_1_normal
1225	FILLBOTH(1b,2b,ASI_AIUS)	! 0x0c8 fill_2_normal
1226	UTRAP(0x0cc); TA32	! 0x0cc fill_3_normal
1227TABLE/**/sfill:
1228	FILL64(1,ASI_N)		! 0x0d0 fill_4_normal -- used to fill windows when running nucleus mode from supervisor
1229	FILL32(2,ASI_N)		! 0x0d4 fill_5_normal
1230	FILLBOTH(1b,2b,ASI_N)	! 0x0d8 fill_6_normal
1231	UTRAP(0x0dc); TA32	! 0x0dc fill_7_normal
1232TABLE/**/kfill:
1233	FILL64(1,ASI_AIUS)	! 0x0e0 fill_0_other -- used to fill user windows when running nucleus mode -- will we ever use this?
1234	FILL32(2,ASI_AIUS)	! 0x0e4 fill_1_other
1235	FILLBOTH(1b,2b,ASI_AIUS)! 0x0e8 fill_2_other
1236	UTRAP(0x0ec); TA32	! 0x0ec fill_3_other
1237	UTRAP(0x0f0); TA32	! 0x0f0 fill_4_other
1238	UTRAP(0x0f4); TA32	! 0x0f4 fill_5_other
1239	UTRAP(0x0f8); TA32	! 0x0f8 fill_6_other
1240	UTRAP(0x0fc); TA32	! 0x0fc fill_7_other
1241TABLE/**/syscall:
1242	SYSCALL			! 0x100 = sun syscall
1243	BPT			! 0x101 = pseudo breakpoint instruction
1244	STRAP(0x102); STRAP(0x103); STRAP(0x104); STRAP(0x105); STRAP(0x106); STRAP(0x107)
1245	SYSCALL			! 0x108 = svr4 syscall
1246	SYSCALL			! 0x109 = bsd syscall
1247	BPT_KGDB_EXEC		! 0x10a = enter kernel gdb on kernel startup
1248	STRAP(0x10b); STRAP(0x10c); STRAP(0x10d); STRAP(0x10e); STRAP(0x10f);
1249	STRAP(0x110); STRAP(0x111); STRAP(0x112); STRAP(0x113); STRAP(0x114); STRAP(0x115); STRAP(0x116); STRAP(0x117)
1250	STRAP(0x118); STRAP(0x119); STRAP(0x11a); STRAP(0x11b); STRAP(0x11c); STRAP(0x11d); STRAP(0x11e); STRAP(0x11f)
1251	STRAP(0x120); STRAP(0x121); STRAP(0x122); STRAP(0x123); STRAP(0x124); STRAP(0x125); STRAP(0x126); STRAP(0x127)
1252	STRAP(0x128); STRAP(0x129); STRAP(0x12a); STRAP(0x12b); STRAP(0x12c); STRAP(0x12d); STRAP(0x12e); STRAP(0x12f)
1253	STRAP(0x130); STRAP(0x131); STRAP(0x132); STRAP(0x133); STRAP(0x134); STRAP(0x135); STRAP(0x136); STRAP(0x137)
1254	STRAP(0x138); STRAP(0x139); STRAP(0x13a); STRAP(0x13b); STRAP(0x13c); STRAP(0x13d); STRAP(0x13e); STRAP(0x13f)
1255	STRAP(0x140); STRAP(0x141); STRAP(0x142); STRAP(0x143); STRAP(0x144); STRAP(0x145); STRAP(0x146); STRAP(0x147)
1256	STRAP(0x148); STRAP(0x149); STRAP(0x14a); STRAP(0x14b); STRAP(0x14c); STRAP(0x14d); STRAP(0x14e); STRAP(0x14f)
1257	STRAP(0x150); STRAP(0x151); STRAP(0x152); STRAP(0x153); STRAP(0x154); STRAP(0x155); STRAP(0x156); STRAP(0x157)
1258	STRAP(0x158); STRAP(0x159); STRAP(0x15a); STRAP(0x15b); STRAP(0x15c); STRAP(0x15d); STRAP(0x15e); STRAP(0x15f)
1259	STRAP(0x160); STRAP(0x161); STRAP(0x162); STRAP(0x163); STRAP(0x164); STRAP(0x165); STRAP(0x166); STRAP(0x167)
1260	STRAP(0x168); STRAP(0x169); STRAP(0x16a); STRAP(0x16b); STRAP(0x16c); STRAP(0x16d); STRAP(0x16e); STRAP(0x16f)
1261	STRAP(0x170); STRAP(0x171); STRAP(0x172); STRAP(0x173); STRAP(0x174); STRAP(0x175); STRAP(0x176); STRAP(0x177)
1262	STRAP(0x178); STRAP(0x179); STRAP(0x17a); STRAP(0x17b); STRAP(0x17c); STRAP(0x17d); STRAP(0x17e); STRAP(0x17f)
1263	! Traps beyond 0x17f are reserved
1264	UTRAP(0x180); UTRAP(0x181); UTRAP(0x182); UTRAP(0x183); UTRAP(0x184); UTRAP(0x185); UTRAP(0x186); UTRAP(0x187)
1265	UTRAP(0x188); UTRAP(0x189); UTRAP(0x18a); UTRAP(0x18b); UTRAP(0x18c); UTRAP(0x18d); UTRAP(0x18e); UTRAP(0x18f)
1266	UTRAP(0x190); UTRAP(0x191); UTRAP(0x192); UTRAP(0x193); UTRAP(0x194); UTRAP(0x195); UTRAP(0x196); UTRAP(0x197)
1267	UTRAP(0x198); UTRAP(0x199); UTRAP(0x19a); UTRAP(0x19b); UTRAP(0x19c); UTRAP(0x19d); UTRAP(0x19e); UTRAP(0x19f)
1268	UTRAP(0x1a0); UTRAP(0x1a1); UTRAP(0x1a2); UTRAP(0x1a3); UTRAP(0x1a4); UTRAP(0x1a5); UTRAP(0x1a6); UTRAP(0x1a7)
1269	UTRAP(0x1a8); UTRAP(0x1a9); UTRAP(0x1aa); UTRAP(0x1ab); UTRAP(0x1ac); UTRAP(0x1ad); UTRAP(0x1ae); UTRAP(0x1af)
1270	UTRAP(0x1b0); UTRAP(0x1b1); UTRAP(0x1b2); UTRAP(0x1b3); UTRAP(0x1b4); UTRAP(0x1b5); UTRAP(0x1b6); UTRAP(0x1b7)
1271	UTRAP(0x1b8); UTRAP(0x1b9); UTRAP(0x1ba); UTRAP(0x1bb); UTRAP(0x1bc); UTRAP(0x1bd); UTRAP(0x1be); UTRAP(0x1bf)
1272	UTRAP(0x1c0); UTRAP(0x1c1); UTRAP(0x1c2); UTRAP(0x1c3); UTRAP(0x1c4); UTRAP(0x1c5); UTRAP(0x1c6); UTRAP(0x1c7)
1273	UTRAP(0x1c8); UTRAP(0x1c9); UTRAP(0x1ca); UTRAP(0x1cb); UTRAP(0x1cc); UTRAP(0x1cd); UTRAP(0x1ce); UTRAP(0x1cf)
1274	UTRAP(0x1d0); UTRAP(0x1d1); UTRAP(0x1d2); UTRAP(0x1d3); UTRAP(0x1d4); UTRAP(0x1d5); UTRAP(0x1d6); UTRAP(0x1d7)
1275	UTRAP(0x1d8); UTRAP(0x1d9); UTRAP(0x1da); UTRAP(0x1db); UTRAP(0x1dc); UTRAP(0x1dd); UTRAP(0x1de); UTRAP(0x1df)
1276	UTRAP(0x1e0); UTRAP(0x1e1); UTRAP(0x1e2); UTRAP(0x1e3); UTRAP(0x1e4); UTRAP(0x1e5); UTRAP(0x1e6); UTRAP(0x1e7)
1277	UTRAP(0x1e8); UTRAP(0x1e9); UTRAP(0x1ea); UTRAP(0x1eb); UTRAP(0x1ec); UTRAP(0x1ed); UTRAP(0x1ee); UTRAP(0x1ef)
1278	UTRAP(0x1f0); UTRAP(0x1f1); UTRAP(0x1f2); UTRAP(0x1f3); UTRAP(0x1f4); UTRAP(0x1f5); UTRAP(0x1f6); UTRAP(0x1f7)
1279	UTRAP(0x1f8); UTRAP(0x1f9); UTRAP(0x1fa); UTRAP(0x1fb); UTRAP(0x1fc); UTRAP(0x1fd); UTRAP(0x1fe); UTRAP(0x1ff)
1280
1281/*
1282 * If the cleanwin trap handler detects an overfow we come here.
1283 * We need to fix up the window registers, switch to the interrupt
1284 * stack, and then trap to the debugger.
1285 */
1286cleanwin_overflow:
1287	!! We've already incremented %cleanwin
1288	!! So restore %cwp
1289	rdpr	%cwp, %l0
1290	dec	%l0
1291	wrpr	%l0, %g0, %cwp
1292	set	EINTSTACK-STKB-CC64FSZ, %l0
1293	save	%l0, 0, %sp
1294
1295	ta	1		! Enter debugger
1296	sethi	%hi(1f), %o0
1297	call	_C_LABEL(panic)
1298	 or	%o0, %lo(1f), %o0
1299	restore
1300	retry
1301	.data
13021:
1303	.asciz	"Kernel stack overflow!"
1304	_ALIGN
1305	.text
1306
1307#ifdef DEBUG
1308#define CHKREG(r) \
1309	ldx	[%o0 + 8*1], %o1; \
1310	cmp	r, %o1; \
1311	stx	%o0, [%o0]; \
1312	tne	1
1313	.data
1314globreg_debug:
1315	.xword	-1, 0, 0, 0, 0, 0, 0, 0
1316	.text
1317globreg_set:
1318	save	%sp, -CC64FSZ, %sp
1319	set	globreg_debug, %o0
1320	stx	%g0, [%o0]
1321	stx	%g1, [%o0 + 8*1]
1322	stx	%g2, [%o0 + 8*2]
1323	stx	%g3, [%o0 + 8*3]
1324	stx	%g4, [%o0 + 8*4]
1325	stx	%g5, [%o0 + 8*5]
1326	stx	%g6, [%o0 + 8*6]
1327	stx	%g7, [%o0 + 8*7]
1328	ret
1329	 restore
1330globreg_check:
1331	save	%sp, -CC64FSZ, %sp
1332	rd	%pc, %o7
1333	set	globreg_debug, %o0
1334	ldx	[%o0], %o1
1335	brnz,pn	%o1, 1f		! Don't re-execute this
1336	CHKREG(%g1)
1337	CHKREG(%g2)
1338	CHKREG(%g3)
1339	CHKREG(%g4)
1340	CHKREG(%g5)
1341	CHKREG(%g6)
1342	CHKREG(%g7)
1343	nop
13441:	ret
1345	 restore
1346
1347	/*
1348	 * Checkpoint:	 store a byte value at DATA_START+0x21
1349	 *		uses two temp regs
1350	 */
1351#define CHKPT(r1,r2,val) \
1352	sethi	%hi(DATA_START), r1; \
1353	mov	val, r2; \
1354	stb	r2, [r1 + 0x21]
1355
1356	/*
1357	 * Debug routine:
1358	 *
1359	 * If datafault manages to get an unaligned pmap entry
1360	 * we come here.  We want to save as many regs as we can.
1361	 * %g3 has the sfsr, and %g7 the result of the wstate
1362	 * both of which we can toast w/out much lossage.
1363	 *
1364	 */
1365	.data
1366pmap_dumpflag:
1367	.xword	0		! semaphore
1368	.globl	pmap_dumparea	! Get this into the kernel syms
1369pmap_dumparea:
1370	.space	(32*8)		! room to save 32 registers
1371pmap_edumparea:
1372	.text
1373pmap_screwup:
1374	rd	%pc, %g3
1375	sub	%g3, (pmap_edumparea-pmap_dumparea), %g3! pc relative addressing 8^)
1376	ldstub	[%g3+( 0*0x8)], %g3
1377	tst	%g3		! Semaphore set?
1378	tnz	%xcc, 1; nop		! Then trap
1379	set	pmap_dumparea, %g3
1380	stx	%g3, [%g3+( 0*0x8)]	! set semaphore
1381	stx	%g1, [%g3+( 1*0x8)]	! Start saving regs
1382	stx	%g2, [%g3+( 2*0x8)]
1383	stx	%g3, [%g3+( 3*0x8)]	! Redundant, I know...
1384	stx	%g4, [%g3+( 4*0x8)]
1385	stx	%g5, [%g3+( 5*0x8)]
1386	stx	%g6, [%g3+( 6*0x8)]
1387	stx	%g7, [%g3+( 7*0x8)]
1388	stx	%i0, [%g3+( 8*0x8)]
1389	stx	%i1, [%g3+( 9*0x8)]
1390	stx	%i2, [%g3+(10*0x8)]
1391	stx	%i3, [%g3+(11*0x8)]
1392	stx	%i4, [%g3+(12*0x8)]
1393	stx	%i5, [%g3+(13*0x8)]
1394	stx	%i6, [%g3+(14*0x8)]
1395	stx	%i7, [%g3+(15*0x8)]
1396	stx	%l0, [%g3+(16*0x8)]
1397	stx	%l1, [%g3+(17*0x8)]
1398	stx	%l2, [%g3+(18*0x8)]
1399	stx	%l3, [%g3+(19*0x8)]
1400	stx	%l4, [%g3+(20*0x8)]
1401	stx	%l5, [%g3+(21*0x8)]
1402	stx	%l6, [%g3+(22*0x8)]
1403	stx	%l7, [%g3+(23*0x8)]
1404	stx	%o0, [%g3+(24*0x8)]
1405	stx	%o1, [%g3+(25*0x8)]
1406	stx	%o2, [%g3+(26*0x8)]
1407	stx	%o3, [%g3+(27*0x8)]
1408	stx	%o4, [%g3+(28*0x8)]
1409	stx	%o5, [%g3+(29*0x8)]
1410	stx	%o6, [%g3+(30*0x8)]
1411	stx	%o7, [%g3+(31*0x8)]
1412	ta	1; nop		! Break into the debugger
1413
1414#else
1415#define	CHKPT(r1,r2,val)
1416#define CHKREG(r)
1417#endif
1418
1419#ifdef DEBUG_NOTDEF
1420/*
1421 * A hardware red zone is impossible.  We simulate one in software by
1422 * keeping a `red zone' pointer; if %sp becomes less than this, we panic.
1423 * This is expensive and is only enabled when debugging.
1424 */
1425#define	REDSIZE	(USIZ)		/* Mark used portion of user structure out of bounds */
1426#define	REDSTACK 2048		/* size of `panic: stack overflow' region */
1427	.data
1428	_ALIGN
1429redzone:
1430	.xword	_C_LABEL(idle_u) + REDSIZE
1431redstack:
1432	.space	REDSTACK
1433eredstack:
1434Lpanic_red:
1435	.asciz	"kernel stack overflow"
1436	_ALIGN
1437	.text
1438
1439	/* set stack pointer redzone to base+minstack; alters base */
1440#define	SET_SP_REDZONE(base, tmp) \
1441	add	base, REDSIZE, base; \
1442	sethi	%hi(_C_LABEL(redzone)), tmp; \
1443	stx	base, [tmp + %lo(_C_LABEL(redzone))]
1444
1445	/* variant with a constant */
1446#define	SET_SP_REDZONE_CONST(const, tmp1, tmp2) \
1447	set	(const) + REDSIZE, tmp1; \
1448	sethi	%hi(_C_LABEL(redzone)), tmp2; \
1449	stx	tmp1, [tmp2 + %lo(_C_LABEL(redzone))]
1450
1451	/* check stack pointer against redzone (uses two temps) */
1452#define	CHECK_SP_REDZONE(t1, t2) \
1453	sethi	KERNBASE, t1;	\
1454	cmp	%sp, t1;	\
1455	blu,pt	%xcc, 7f;	\
1456	 sethi	%hi(_C_LABEL(redzone)), t1; \
1457	ldx	[t1 + %lo(_C_LABEL(redzone))], t2; \
1458	cmp	%sp, t2;	/* if sp >= t2, not in red zone */ \
1459	blu	panic_red; nop;	/* and can continue normally */ \
14607:
1461
1462panic_red:
1463	/* move to panic stack */
1464	stx	%g0, [t1 + %lo(_C_LABEL(redzone))];
1465	set	eredstack - BIAS, %sp;
1466	/* prevent panic() from lowering ipl */
1467	sethi	%hi(_C_LABEL(panicstr)), t2;
1468	set	Lpanic_red, t2;
1469	st	t2, [t1 + %lo(_C_LABEL(panicstr))];
1470	wrpr	g0, 15, %pil		/* t1 = splhigh() */
1471	save	%sp, -CCF64SZ, %sp;	/* preserve current window */
1472	sethi	%hi(Lpanic_red), %o0;
1473	call	_C_LABEL(panic);
1474	 or %o0, %lo(Lpanic_red), %o0;
1475
1476
1477#else
1478
1479#define	SET_SP_REDZONE(base, tmp)
1480#define	SET_SP_REDZONE_CONST(const, t1, t2)
1481#define	CHECK_SP_REDZONE(t1, t2)
1482#endif
1483
1484#define TRACESIZ	0x01000
1485	.globl	_C_LABEL(trap_trace)
1486	.globl	_C_LABEL(trap_trace_ptr)
1487	.globl	_C_LABEL(trap_trace_end)
1488	.globl	_C_LABEL(trap_trace_dis)
1489	.data
1490_C_LABEL(trap_trace_dis):
1491	.word	1, 1		! Starts disabled.  DDB turns it on.
1492_C_LABEL(trap_trace_ptr):
1493	.word	0, 0, 0, 0
1494_C_LABEL(trap_trace):
1495	.space	TRACESIZ
1496_C_LABEL(trap_trace_end):
1497	.space	0x20		! safety margin
1498#if	defined(TRAPTRACE)||defined(FLTRACE)
1499#define TRACEPTR	(_C_LABEL(trap_trace_ptr)-_C_LABEL(trap_trace))
1500#define TRACEDIS	(_C_LABEL(trap_trace_dis)-_C_LABEL(trap_trace))
1501#define	TRACEIT(tt,r3,r4,r2,r6,r7)					\
1502	set	trap_trace, r2;						\
1503	lduw	[r2+TRACEDIS], r4;					\
1504	brnz,pn	r4, 1f;							\
1505	 lduw	[r2+TRACEPTR], r3;					\
1506	rdpr	%tl, r4;						\
1507	cmp	r4, 1;							\
1508	sllx	r4, 13, r4;						\
1509	rdpr	%pil, r6;						\
1510	or	r4, %g5, r4;						\
1511	mov	%g0, %g5;						\
1512	andncc	r3, (TRACESIZ-1), %g0;	/* At end of buffer? */		\
1513	sllx	r6, 9, r6;						\
1514	or	r6, r4, r4;						\
1515	movnz	%icc, %g0, r3;		/* Wrap buffer if needed */	\
1516	rdpr	%tstate, r6;						\
1517	rdpr	%tpc, r7;						\
1518	sth	r4, [r2+r3];						\
1519	inc	2, r3;							\
1520	sth	%g5, [r2+r3];						\
1521	inc	2, r3;							\
1522	stw	r6, [r2+r3];						\
1523	inc	4, r3;							\
1524	stw	%sp, [r2+r3];						\
1525	inc	4, r3;							\
1526	stw	r7, [r2+r3];						\
1527	inc	4, r3;							\
1528	mov	TLB_TAG_ACCESS, r7;					\
1529	ldxa	[r7] ASI_DMMU, r7;					\
1530	stw	r7, [r2+r3];						\
1531	inc	4, r3;							\
1532	stw	r3, [r2+TRACEPTR];					\
15331:
1534
1535
1536	.text
1537traceit:
1538	set	trap_trace, %g2
1539	lduw	[%g2+TRACEDIS], %g4
1540	brnz,pn	%g4, 1f
1541	 lduw	[%g2+TRACEPTR], %g3
1542	rdpr	%tl, %g4
1543	rdpr	%tt, %g5
1544	set	CURPROC, %g6
1545	cmp	%g4, 1
1546	sllx	%g4, 13, %g4
1547	bnz,a,pt	%icc, 3f
1548	 clr	%g6
1549	cmp	%g5, 0x68
1550	bnz,a,pt	%icc, 3f
1551	 clr	%g6
1552	cmp	%g5, 0x64
1553	bnz,a,pt	%icc, 3f
1554	 clr	%g6
1555	cmp	%g5, 0x6c
1556	bnz,a,pt	%icc, 3f
1557	 clr	%g6
1558	LDPTR	[%g6], %g6
15593:
1560	or	%g4, %g5, %g4
1561	mov	%g0, %g5
1562	brz,pn	%g6, 2f
1563	 andncc	%g3, (TRACESIZ-1), %g0	! At end of buffer? wrap
1564	LDPTR	[%g6+P_PID], %g5	! Load PID
1565
1566	set	CPCB, %g6	! Load up nsaved
1567	LDPTR	[%g6], %g6
1568	ldub	[%g6 + PCB_NSAVED], %g6
1569	sllx	%g6, 9, %g6
1570	or	%g6, %g4, %g4
15712:
1572
1573	movnz	%icc, %g0, %g3		! Wrap buffer if needed
1574	rdpr	%tstate, %g6
1575	rdpr	%tpc, %g7
1576	sth	%g4, [%g2+%g3]
1577	inc	2, %g3
1578	sth	%g5, [%g2+%g3]
1579	inc	2, %g3
1580	stw	%g6, [%g2+%g3]
1581	inc	4, %g3
1582	stw	%sp, [%g2+%g3]
1583	inc	4, %g3
1584	stw	%g7, [%g2+%g3]
1585	inc	4, %g3
1586	mov	TLB_TAG_ACCESS, %g7
1587	ldxa	[%g7] ASI_DMMU, %g7
1588	stw	%g7, [%g2+%g3]
1589	inc	4, %g3
15901:
1591	jmpl	%g1, %g0
1592	 stw	%g3, [%g2+TRACEPTR]
1593traceitwin:
1594	set	trap_trace, %l2
1595	lduw	[%l2+TRACEDIS], %l4
1596	brnz,pn	%l4, 1f
1597	 nop
1598	lduw	[%l2+TRACEPTR], %l3
1599	rdpr	%tl, %l4
1600	rdpr	%tt, %l5
1601	sllx	%l4, 13, %l4
1602	or	%l4, %l5, %l4
1603	clr	%l5		! Don't load PID
1604	andncc	%l3, (TRACESIZ-1), %g0
1605	movnz	%icc, %g0, %l3	! Wrap?
1606
1607	clr	%l0		! Don't load nsaved
1608	sllx	%l0, 9, %l1
1609	or	%l1, %l4, %l4
1610	rdpr	%tpc, %l7
1611
1612	sth	%l4, [%l2+%l3]
1613	inc	2, %l3
1614	sth	%l5, [%l2+%l3]
1615	inc	2, %l3
1616	stw	%l0, [%l2+%l3]
1617	inc	4, %l3
1618	stw	%sp, [%l2+%l3]
1619	inc	4, %l3
1620	stw	%l7, [%l2+%l3]
1621	inc	4, %l3
1622	stw	%g0, [%l2+%l3]
1623	inc	4, %l3
1624	stw	%l3, [%l2+TRACEPTR]
16251:
1626	jmpl	%l6, %g0
1627	 nop
1628reload64:
1629	ldxa	[%sp+BIAS+0x00]%asi, %l0
1630	ldxa	[%sp+BIAS+0x08]%asi, %l1
1631	ldxa	[%sp+BIAS+0x10]%asi, %l2
1632	ldxa	[%sp+BIAS+0x18]%asi, %l3
1633	ldxa	[%sp+BIAS+0x20]%asi, %l4
1634	ldxa	[%sp+BIAS+0x28]%asi, %l5
1635	ldxa	[%sp+BIAS+0x30]%asi, %l6
1636	ldxa	[%sp+BIAS+0x38]%asi, %l7
1637	CLRTT
1638	retry
1639reload32:
1640	lda	[%sp+0x00]%asi, %l0
1641	lda	[%sp+0x04]%asi, %l1
1642	lda	[%sp+0x08]%asi, %l2
1643	lda	[%sp+0x0c]%asi, %l3
1644	lda	[%sp+0x10]%asi, %l4
1645	lda	[%sp+0x14]%asi, %l5
1646	lda	[%sp+0x18]%asi, %l6
1647	lda	[%sp+0x1c]%asi, %l7
1648	CLRTT
1649	retry
1650#endif
1651
1652/*
1653 * v9 machines do not have a trap window.
1654 *
1655 * When we take a trap the trap state is pushed on to the stack of trap
1656 * registers, interrupts are disabled, then we switch to an alternate set
1657 * of global registers.
1658 *
1659 * The trap handling code needs to allocate a trap frame on the kernel, or
1660 * for interrupts, the interrupt stack, save the out registers to the trap
1661 * frame, then switch to the normal globals and save them to the trap frame
1662 * too.
1663 *
1664 * XXX it would be good to save the interrupt stack frame to the kernel
1665 * stack so we wouldn't have to copy it later if we needed to handle a AST.
1666 *
1667 * Since kernel stacks are all on one page and the interrupt stack is entirely
1668 * within the locked TLB, we can use physical addressing to save out our
1669 * trap frame so we don't trap during the TRAP_SETUP() operation.  There
1670 * is unfortunately no supportable method for issuing a non-trapping save.
1671 *
1672 * However, if we use physical addresses to save our trapframe, we will need
1673 * to clear out the data cache before continuing much further.
1674 *
1675 * In short, what we need to do is:
1676 *
1677 *	all preliminary processing is done using the alternate globals
1678 *
1679 *	When we allocate our trap windows we must give up our globals because
1680 *	their state may have changed during the save operation
1681 *
1682 *	we need to save our normal globals as soon as we have a stack
1683 *
1684 * Finally, we may now call C code.
1685 *
1686 * This macro will destroy %g5-%g7.  %g0-%g4 remain unchanged.
1687 *
1688 * In order to properly handle nested traps without lossage, alternate
1689 * global %g6 is used as a kernel stack pointer.  It is set to the last
1690 * allocated stack pointer (trapframe) and the old value is stored in
1691 * tf_kstack.  It is restored when returning from a trap.  It is cleared
1692 * on entering user mode.
1693 */
1694
1695 /*
1696  * Other misc. design criteria:
1697  *
1698  * When taking an address fault, fault info is in the sfsr, sfar,
1699  * TLB_TAG_ACCESS registers.  If we take another address fault
1700  * while trying to handle the first fault then that information,
1701  * the only information that tells us what address we trapped on,
1702  * can potentially be lost.  This trap can be caused when allocating
1703  * a register window with which to handle the trap because the save
1704  * may try to store or restore a register window that corresponds
1705  * to part of the stack that is not mapped.  Preventing this trap,
1706  * while possible, is much too complicated to do in a trap handler,
1707  * and then we will need to do just as much work to restore the processor
1708  * window state.
1709  *
1710  * Possible solutions to the problem:
1711  *
1712  * Since we have separate AG, MG, and IG, we could have all traps
1713  * above level-1 preserve AG and use other registers.  This causes
1714  * a problem for the return from trap code which is coded to use
1715  * alternate globals only.
1716  *
1717  * We could store the trapframe and trap address info to the stack
1718  * using physical addresses.  Then we need to read it back using
1719  * physical addressing, or flush the D$.
1720  *
1721  * We could identify certain registers to hold address fault info.
1722  * this means that these registers need to be preserved across all
1723  * fault handling.  But since we only have 7 useable globals, that
1724  * really puts a cramp in our style.
1725  *
1726  * Finally, there is the issue of returning from kernel mode to user
1727  * mode.  If we need to issue a restore of a user window in kernel
1728  * mode, we need the window control registers in a user mode setup.
1729  * If the trap handlers notice the register windows are in user mode,
1730  * they will allocate a trapframe at the bottom of the kernel stack,
1731  * overwriting the frame we were trying to return to.  This means that
1732  * we must complete the restoration of all registers *before* switching
1733  * to a user-mode window configuration.
1734  *
1735  * Essentially we need to be able to write re-entrant code w/no stack.
1736  */
1737	.data
1738trap_setup_msg:
1739	.asciz	"TRAP_SETUP: tt=%x osp=%x nsp=%x tl=%x tpc=%x\n"
1740	_ALIGN
1741intr_setup_msg:
1742	.asciz	"INTR_SETUP: tt=%x osp=%x nsp=%x tl=%x tpc=%x\n"
1743	_ALIGN
1744	.text
1745
1746#ifdef _LP64
1747#ifdef DEBUG
1748	/* Only save a snapshot of locals and ins in DEBUG kernels */
1749#define	SAVE_LOCALS_INS	\
1750	stx	%l0, [%g6 + CC64FSZ + BIAS + TF_L + (0*8)];		/* Save local registers to trap frame */ \
1751	stx	%l1, [%g6 + CC64FSZ + BIAS + TF_L + (1*8)]; \
1752	stx	%l2, [%g6 + CC64FSZ + BIAS + TF_L + (2*8)]; \
1753	stx	%l3, [%g6 + CC64FSZ + BIAS + TF_L + (3*8)]; \
1754	stx	%l4, [%g6 + CC64FSZ + BIAS + TF_L + (4*8)]; \
1755	stx	%l5, [%g6 + CC64FSZ + BIAS + TF_L + (5*8)]; \
1756	stx	%l6, [%g6 + CC64FSZ + BIAS + TF_L + (6*8)]; \
1757\
1758	stx	%l7, [%g6 + CC64FSZ + BIAS + TF_L + (7*8)]; \
1759	stx	%i0, [%g6 + CC64FSZ + BIAS + TF_I + (0*8)];		/* Save in registers to trap frame */ \
1760	stx	%i1, [%g6 + CC64FSZ + BIAS + TF_I + (1*8)]; \
1761	stx	%i2, [%g6 + CC64FSZ + BIAS + TF_I + (2*8)]; \
1762	stx	%i3, [%g6 + CC64FSZ + BIAS + TF_I + (3*8)]; \
1763	stx	%i4, [%g6 + CC64FSZ + BIAS + TF_I + (4*8)]; \
1764	stx	%i5, [%g6 + CC64FSZ + BIAS + TF_I + (5*8)]; \
1765	stx	%i6, [%g6 + CC64FSZ + BIAS + TF_I + (6*8)]; \
1766\
1767	stx	%i7, [%g6 + CC64FSZ + BIAS + TF_I + (7*8)];
1768#else
1769#define	SAVE_LOCALS_INS
1770#endif
1771#define	TRAP_SETUP(stackspace) \
1772	sethi	%hi(CPCB), %g6; \
1773	sethi	%hi((stackspace)), %g5; \
1774	\
1775	ldx	[%g6 + %lo(CPCB)], %g6; \
1776	sethi	%hi(USPACE), %g7;				/* Always multiple of page size */ \
1777	or	%g5, %lo((stackspace)), %g5; \
1778	\
1779	sra	%g5, 0, %g5;					/* Sign extend the damn thing */ \
1780	\
1781	add	%g6, %g7, %g6; \
1782	rdpr	%wstate, %g7;					/* Find if we're from user mode */ \
1783\
1784	\
1785	\
1786	\
1787	sub	%g7, WSTATE_KERN, %g7;				/* Compare & leave in register */ \
1788	\
1789	movrz	%g7, %sp, %g6;					/* Select old (kernel) stack or base of kernel stack */ \
1790	\
1791	\
1792	btst	1, %g6;						/* Fixup 64-bit stack if necessary */ \
1793	bnz,pt	%icc, 1f; \
1794	\
1795	 add	%g6, %g5, %g6;					/* Allocate a stack frame */ \
1796	\
1797	inc	-BIAS, %g6; \
1798	nop; \
1799	nop; \
18001:\
1801	SAVE_LOCALS_INS	\
1802	save	%g6, 0, %sp;					/* If we fault we should come right back here */ \
1803	stx	%i0, [%sp + CC64FSZ + BIAS + TF_O + (0*8)];		/* Save out registers to trap frame */ \
1804	stx	%i1, [%sp + CC64FSZ + BIAS + TF_O + (1*8)]; \
1805	stx	%i2, [%sp + CC64FSZ + BIAS + TF_O + (2*8)]; \
1806	stx	%i3, [%sp + CC64FSZ + BIAS + TF_O + (3*8)]; \
1807	stx	%i4, [%sp + CC64FSZ + BIAS + TF_O + (4*8)]; \
1808	stx	%i5, [%sp + CC64FSZ + BIAS + TF_O + (5*8)]; \
1809\
1810	stx	%i6, [%sp + CC64FSZ + BIAS + TF_O + (6*8)]; \
1811	brz,pt	%g7, 1f;					/* If we were in kernel mode start saving globals */ \
1812	 stx	%i7, [%sp + CC64FSZ + BIAS + TF_O + (7*8)]; \
1813	mov	CTX_PRIMARY, %g7; \
1814	\
1815	/* came from user mode -- switch to kernel mode stack */ \
1816	rdpr	%canrestore, %g5;				/* Fixup register window state registers */ \
1817	\
1818	wrpr	%g0, 0, %canrestore; \
1819	\
1820	wrpr	%g0, %g5, %otherwin; \
1821	\
1822	wrpr	%g0, WSTATE_KERN, %wstate;			/* Enable kernel mode window traps -- now we can trap again */ \
1823\
1824	stxa	%g0, [%g7] ASI_DMMU; 				/* Switch MMU to kernel primary context */ \
1825	sethi	%hi(KERNBASE), %g5; \
1826	membar	#Sync;						/* XXXX Should be taken care of by flush */ \
1827	flush	%g5;						/* Some convenient address that won't trap */ \
18281:
1829
1830/*
1831 * Interrupt setup is almost exactly like trap setup, but we need to
1832 * go to the interrupt stack if (a) we came from user mode or (b) we
1833 * came from kernel mode on the kernel stack.
1834 *
1835 * We don't guarantee any registers are preserved during this operation.
1836 * So we can be more efficient.
1837 */
1838#define	INTR_SETUP(stackspace) \
1839	rdpr	%wstate, %g7;					/* Find if we're from user mode */ \
1840	\
1841	sethi	%hi(EINTSTACK-BIAS), %g6; \
1842	sethi	%hi(EINTSTACK-INTSTACK), %g4; \
1843	\
1844	or	%g6, %lo(EINTSTACK-BIAS), %g6;			/* Base of interrupt stack */ \
1845	dec	%g4;						/* Make it into a mask */ \
1846	\
1847	sub	%g6, %sp, %g1;					/* Offset from interrupt stack */ \
1848	sethi	%hi((stackspace)), %g5; \
1849	\
1850	or	%g5, %lo((stackspace)), %g5; \
1851\
1852	andn	%g1, %g4, %g4;					/* Are we out of the interrupt stack range? */ \
1853	xor	%g7, WSTATE_KERN, %g3;				/* Are we on the user stack ? */ \
1854	\
1855	sra	%g5, 0, %g5;					/* Sign extend the damn thing */ \
1856	or	%g3, %g4, %g4;					/* Definitely not off the interrupt stack */ \
1857	\
1858	movrz	%g4, %sp, %g6; \
1859	\
1860	add	%g6, %g5, %g5;					/* Allocate a stack frame */ \
1861	btst	1, %g6; \
1862	bnz,pt	%icc, 1f; \
1863\
1864	 mov	%g5, %g6; \
1865	\
1866	add	%g5, -BIAS, %g6; \
1867	\
18681:	SAVE_LOCALS_INS	\
1869	save	%g6, 0, %sp;					/* If we fault we should come right back here */ \
1870	stx	%i0, [%sp + CC64FSZ + BIAS + TF_O + (0*8)];		/* Save out registers to trap frame */ \
1871	stx	%i1, [%sp + CC64FSZ + BIAS + TF_O + (1*8)]; \
1872	stx	%i2, [%sp + CC64FSZ + BIAS + TF_O + (2*8)]; \
1873	stx	%i3, [%sp + CC64FSZ + BIAS + TF_O + (3*8)]; \
1874	stx	%i4, [%sp + CC64FSZ + BIAS + TF_O + (4*8)]; \
1875\
1876	stx	%i5, [%sp + CC64FSZ + BIAS + TF_O + (5*8)]; \
1877	stx	%i6, [%sp + CC64FSZ + BIAS + TF_O + (6*8)]; \
1878	stx	%i6, [%sp + CC64FSZ + BIAS + TF_G + (0*8)];		/* Save fp in clockframe->cf_fp */ \
1879	brz,pt	%g3, 1f;					/* If we were in kernel mode start saving globals */ \
1880	 stx	%i7, [%sp + CC64FSZ + BIAS + TF_O + (7*8)]; \
1881	/* came from user mode -- switch to kernel mode stack */ \
1882	 rdpr	%otherwin, %g5;					/* Has this already been done? */ \
1883	\
1884	brnz,pn	%g5, 1f;					/* Don't set this twice */ \
1885	\
1886	 rdpr	%canrestore, %g5;				/* Fixup register window state registers */ \
1887\
1888	wrpr	%g0, 0, %canrestore; \
1889	\
1890	wrpr	%g0, %g5, %otherwin; \
1891	\
1892	sethi	%hi(KERNBASE), %g5; \
1893	mov	CTX_PRIMARY, %g7; \
1894	\
1895	wrpr	%g0, WSTATE_KERN, %wstate;			/* Enable kernel mode window traps -- now we can trap again */ \
1896	\
1897	stxa	%g0, [%g7] ASI_DMMU; 				/* Switch MMU to kernel primary context */ \
1898	membar	#Sync;						/* XXXX Should be taken care of by flush */ \
1899	\
1900	flush	%g5;						/* Some convenient address that won't trap */ \
19011:
1902
1903#else
1904#ifdef DEBUG
1905#define	SAVE_LOCALS_INS	\
1906	stx	%g1, [%g6 + CC64FSZ + STKB + TF_FAULT]; \
1907	stx	%l0, [%g6 + CC64FSZ + STKB + TF_L + (0*8)];		/* Save local registers to trap frame */ \
1908	stx	%l1, [%g6 + CC64FSZ + STKB + TF_L + (1*8)]; \
1909	stx	%l2, [%g6 + CC64FSZ + STKB + TF_L + (2*8)]; \
1910	stx	%l3, [%g6 + CC64FSZ + STKB + TF_L + (3*8)]; \
1911	stx	%l4, [%g6 + CC64FSZ + STKB + TF_L + (4*8)]; \
1912	stx	%l5, [%g6 + CC64FSZ + STKB + TF_L + (5*8)]; \
1913	stx	%l6, [%g6 + CC64FSZ + STKB + TF_L + (6*8)]; \
1914	\
1915	stx	%l7, [%g6 + CC64FSZ + STKB + TF_L + (7*8)]; \
1916	stx	%i0, [%g6 + CC64FSZ + STKB + TF_I + (0*8)];		/* Save in registers to trap frame */ \
1917	stx	%i1, [%g6 + CC64FSZ + STKB + TF_I + (1*8)]; \
1918	stx	%i2, [%g6 + CC64FSZ + STKB + TF_I + (2*8)]; \
1919	stx	%i3, [%g6 + CC64FSZ + STKB + TF_I + (3*8)]; \
1920	stx	%i4, [%g6 + CC64FSZ + STKB + TF_I + (4*8)]; \
1921	stx	%i5, [%g6 + CC64FSZ + STKB + TF_I + (5*8)]; \
1922	stx	%i6, [%g6 + CC64FSZ + STKB + TF_I + (6*8)]; \
1923	\
1924	stx	%i7, [%g6 + CC64FSZ + STKB + TF_I + (7*8)];
1925#else
1926#define	SAVE_LOCALS_INS
1927#endif
1928#define	TRAP_SETUP(stackspace) \
1929	sethi	%hi(USPACE), %g7; \
1930	sethi	%hi(CPCB), %g6; \
1931	or	%g7, %lo(USPACE), %g7; \
1932	sethi	%hi((stackspace)), %g5; \
1933	lduw	[%g6 + %lo(CPCB)], %g6; \
1934	or	%g5, %lo((stackspace)), %g5; \
1935	add	%g6, %g7, %g6; \
1936	rdpr	%wstate, %g7;					/* Find if we're from user mode */ \
1937	\
1938	sra	%g5, 0, %g5;					/* Sign extend the damn thing */ \
1939	subcc	%g7, WSTATE_KERN, %g7;				/* Compare & leave in register */ \
1940	movz	%icc, %sp, %g6;					/* Select old (kernel) stack or base of kernel stack */ \
1941	srl	%g6, 0, %g6;					/* truncate at 32-bits */ \
1942	btst	1, %g6;						/* Fixup 64-bit stack if necessary */ \
1943	add	%g6, %g5, %g6;					/* Allocate a stack frame */ \
1944	add	%g6, BIAS, %g5; \
1945	movne	%icc, %g5, %g6; \
1946	\
1947	SAVE_LOCALS_INS \
1948	save	%g6, 0, %sp;					/* If we fault we should come right back here */ \
1949	stx	%i0, [%sp + CC64FSZ + STKB + TF_O + (0*8)];		/* Save out registers to trap frame */ \
1950	stx	%i1, [%sp + CC64FSZ + STKB + TF_O + (1*8)]; \
1951	stx	%i2, [%sp + CC64FSZ + STKB + TF_O + (2*8)]; \
1952	stx	%i3, [%sp + CC64FSZ + STKB + TF_O + (3*8)]; \
1953	stx	%i4, [%sp + CC64FSZ + STKB + TF_O + (4*8)]; \
1954	stx	%i5, [%sp + CC64FSZ + STKB + TF_O + (5*8)]; \
1955	stx	%i6, [%sp + CC64FSZ + STKB + TF_O + (6*8)]; \
1956	\
1957	stx	%i7, [%sp + CC64FSZ + STKB + TF_O + (7*8)]; \
1958/*	rdpr	%wstate, %g7; sub %g7, WSTATE_KERN, %g7; /* DEBUG */ \
1959	brz,pn	%g7, 1f;					/* If we were in kernel mode start saving globals */ \
1960	 rdpr	%canrestore, %g5;				/* Fixup register window state registers */ \
1961	/* came from user mode -- switch to kernel mode stack */ \
1962	wrpr	%g0, 0, %canrestore; \
1963	wrpr	%g0, %g5, %otherwin; \
1964	mov	CTX_PRIMARY, %g7; \
1965	wrpr	%g0, WSTATE_KERN, %wstate;			/* Enable kernel mode window traps -- now we can trap again */ \
1966	\
1967	stxa	%g0, [%g7] ASI_DMMU; 				/* Switch MMU to kernel primary context */ \
1968	sethi	%hi(KERNBASE), %g5; \
1969	membar	#Sync;						/* XXXX Should be taken care of by flush */ \
1970	flush	%g5;						/* Some convenient address that won't trap */ \
19711:
1972
1973/*
1974 * Interrupt setup is almost exactly like trap setup, but we need to
1975 * go to the interrupt stack if (a) we came from user mode or (b) we
1976 * came from kernel mode on the kernel stack.
1977 *
1978 * We don't guarantee any registers are preserved during this operation.
1979 */
1980#define	INTR_SETUP(stackspace) \
1981	sethi	%hi(EINTSTACK), %g1; \
1982	sethi	%hi((stackspace)), %g5; \
1983	btst	1, %sp; \
1984	add	%sp, BIAS, %g6; \
1985	movz	%icc, %sp, %g6; \
1986	or	%g1, %lo(EINTSTACK), %g1; \
1987	srl	%g6, 0, %g6;					/* truncate at 32-bits */ \
1988	set	(EINTSTACK-INTSTACK), %g7; \
1989	or	%g5, %lo((stackspace)), %g5; \
1990	sub	%g1, %g6, %g2;					/* Determine if we need to switch to intr stack or not */ \
1991	dec	%g7;						/* Make it into a mask */ \
1992	andncc	%g2, %g7, %g0;					/* XXXXXXXXXX This assumes kernel addresses are unique from user addresses */ \
1993	rdpr	%wstate, %g7;					/* Find if we're from user mode */ \
1994	sra	%g5, 0, %g5;					/* Sign extend the damn thing */ \
1995	movnz	%xcc, %g1, %g6;					/* Stay on interrupt stack? */ \
1996	cmp	%g7, WSTATE_KERN;				/* User or kernel sp? */ \
1997	movnz	%icc, %g1, %g6;					/* Stay on interrupt stack? */ \
1998	add	%g6, %g5, %g6;					/* Allocate a stack frame */ \
1999	\
2000	SAVE_LOCALS_INS \
2001	save	%g6, 0, %sp;					/* If we fault we should come right back here */ \
2002	stx	%i0, [%sp + CC64FSZ + STKB + TF_O + (0*8)];		/* Save out registers to trap frame */ \
2003	stx	%i1, [%sp + CC64FSZ + STKB + TF_O + (1*8)]; \
2004	stx	%i2, [%sp + CC64FSZ + STKB + TF_O + (2*8)]; \
2005	stx	%i3, [%sp + CC64FSZ + STKB + TF_O + (3*8)]; \
2006	stx	%i4, [%sp + CC64FSZ + STKB + TF_O + (4*8)]; \
2007	stx	%i5, [%sp + CC64FSZ + STKB + TF_O + (5*8)]; \
2008	stx	%i6, [%sp + CC64FSZ + STKB + TF_O + (6*8)]; \
2009	stx	%i6, [%sp + CC64FSZ + STKB + TF_G + (0*8)];		/* Save fp in clockframe->cf_fp */ \
2010	rdpr	%wstate, %g7;					/* Find if we're from user mode */ \
2011	stx	%i7, [%sp + CC64FSZ + STKB + TF_O + (7*8)]; \
2012	cmp	%g7, WSTATE_KERN;				/* Compare & leave in register */ \
2013	be,pn	%icc, 1f;					/* If we were in kernel mode start saving globals */ \
2014	/* came from user mode -- switch to kernel mode stack */ \
2015	 rdpr	%otherwin, %g5;					/* Has this already been done? */ \
2016	tst	%g5; tnz %xcc, 1; nop; /* DEBUG -- this should _NEVER_ happen */ \
2017	brnz,pn	%g5, 1f;					/* Don't set this twice */ \
2018	 rdpr	%canrestore, %g5;				/* Fixup register window state registers */ \
2019	wrpr	%g0, 0, %canrestore; \
2020	mov	CTX_PRIMARY, %g7; \
2021	wrpr	%g0, %g5, %otherwin; \
2022	sethi	%hi(KERNBASE), %g5; \
2023	wrpr	%g0, WSTATE_KERN, %wstate;			/* Enable kernel mode window traps -- now we can trap again */ \
2024	stxa	%g0, [%g7] ASI_DMMU; 				/* Switch MMU to kernel primary context */ \
2025	membar	#Sync;						/* XXXX Should be taken care of by flush */ \
2026	flush	%g5;						/* Some convenient address that won't trap */ \
20271:
2028#endif /* _LP64 */
2029
2030#ifdef DEBUG
2031
2032	/* Look up kpte to test algorithm */
2033	.globl	asmptechk
2034asmptechk:
2035	mov	%o0, %g4	! pmap->pm_segs
2036	mov	%o1, %g3	! Addr to lookup -- mind the context
2037
2038	srax	%g3, HOLESHIFT, %g5			! Check for valid address
2039	brz,pt	%g5, 0f					! Should be zero or -1
2040	 inc	%g5					! Make -1 -> 0
2041	brnz,pn	%g5, 1f					! Error!
20420:
2043	 srlx	%g3, STSHIFT, %g5
2044	and	%g5, STMASK, %g5
2045	sll	%g5, 3, %g5
2046	add	%g4, %g5, %g4
2047	DLFLUSH(%g4,%g5)
2048	ldxa	[%g4] ASI_PHYS_CACHED, %g4		! Remember -- UNSIGNED
2049	DLFLUSH2(%g5)
2050	brz,pn	%g4, 1f					! NULL entry? check somewhere else
2051
2052	 srlx	%g3, PDSHIFT, %g5
2053	and	%g5, PDMASK, %g5
2054	sll	%g5, 3, %g5
2055	add	%g4, %g5, %g4
2056	DLFLUSH(%g4,%g5)
2057	ldxa	[%g4] ASI_PHYS_CACHED, %g4		! Remember -- UNSIGNED
2058	DLFLUSH2(%g5)
2059	brz,pn	%g4, 1f					! NULL entry? check somewhere else
2060
2061	 srlx	%g3, PTSHIFT, %g5			! Convert to ptab offset
2062	and	%g5, PTMASK, %g5
2063	sll	%g5, 3, %g5
2064	add	%g4, %g5, %g4
2065	DLFLUSH(%g4,%g5)
2066	ldxa	[%g4] ASI_PHYS_CACHED, %g6
2067	DLFLUSH2(%g5)
2068	brgez,pn %g6, 1f				! Entry invalid?  Punt
2069	 srlx	%g6, 32, %o0
2070	retl
2071	 srl	%g6, 0, %o1
20721:
2073	mov	%g0, %o1
2074	retl
2075	 mov	%g0, %o0
2076
2077	.data
20782:
2079	.asciz	"asmptechk: %x %x %x %x:%x\r\n"
2080	_ALIGN
2081	.text
2082#endif
2083
2084/*
2085 * This is the MMU protection handler.  It's too big to fit
2086 * in the trap table so I moved it here.  It's relatively simple.
2087 * It looks up the page mapping in the page table associated with
2088 * the trapping context.  It checks to see if the S/W writable bit
2089 * is set.  If so, it sets the H/W write bit, marks the tte modified,
2090 * and enters the mapping into the MMU.  Otherwise it does a regular
2091 * data fault.
2092 *
2093 *
2094 */
2095	ICACHE_ALIGN
2096dmmu_write_fault:
2097	mov	TLB_TAG_ACCESS, %g3
2098	sethi	%hi(0x1fff), %g6			! 8K context mask
2099	ldxa	[%g3] ASI_DMMU, %g3			! Get fault addr from Tag Target
2100	sethi	%hi(_C_LABEL(ctxbusy)), %g4
2101	or	%g6, %lo(0x1fff), %g6
2102	LDPTR	[%g4 + %lo(_C_LABEL(ctxbusy))], %g4
2103	srax	%g3, HOLESHIFT, %g5			! Check for valid address
2104	and	%g3, %g6, %g6				! Isolate context
2105
2106	inc	%g5					! (0 or -1) -> (1 or 0)
2107	sllx	%g6, 3, %g6				! Make it into an offset into ctxbusy
2108	ldx	[%g4+%g6], %g4				! Load up our page table.
2109	srlx	%g3, STSHIFT, %g6
2110	cmp	%g5, 1
2111	bgu,pn %xcc, winfix				! Error!
2112	 srlx	%g3, PDSHIFT, %g5
2113	and	%g6, STMASK, %g6
2114	sll	%g6, 3, %g6
2115
2116	and	%g5, PDMASK, %g5
2117	sll	%g5, 3, %g5
2118	add	%g6, %g4, %g4
2119	DLFLUSH(%g4,%g6)
2120	ldxa	[%g4] ASI_PHYS_CACHED, %g4
2121	DLFLUSH2(%g6)
2122	srlx	%g3, PTSHIFT, %g6			! Convert to ptab offset
2123	and	%g6, PTMASK, %g6
2124	add	%g5, %g4, %g5
2125	brz,pn	%g4, winfix				! NULL entry? check somewhere else
2126
2127	 nop
2128	ldxa	[%g5] ASI_PHYS_CACHED, %g4
2129	sll	%g6, 3, %g6
2130	brz,pn	%g4, winfix				! NULL entry? check somewhere else
2131	 add	%g6, %g4, %g6
21321:
2133	ldxa	[%g6] ASI_PHYS_CACHED, %g4
2134	brgez,pn %g4, winfix				! Entry invalid?  Punt
2135	 or	%g4, TTE_MODIFY|TTE_ACCESS|TTE_W, %g7	! Update the modified bit
2136
2137	btst	TTE_REAL_W|TTE_W, %g4			! Is it a ref fault?
2138	bz,pn	%xcc, winfix				! No -- really fault
2139#ifdef DEBUG
2140	/* Make sure we don't try to replace a kernel translation */
2141	/* This should not be necessary */
2142	sllx	%g3, 64-13, %g2				! Isolate context bits
2143	sethi	%hi(KERNBASE), %g5			! Don't need %lo
2144	brnz,pt	%g2, 0f					! Ignore context != 0
2145	 set	0x0800000, %g2				! 8MB
2146	sub	%g3, %g5, %g5
2147	cmp	%g5, %g2
2148	tlu	%xcc, 1; nop
2149	blu,pn	%xcc, winfix				! Next insn in delay slot is unimportant
21500:
2151#endif
2152	/* Need to check for and handle large pages. */
2153	 srlx	%g4, 61, %g5				! Isolate the size bits
2154	ldxa	[%g0] ASI_DMMU_8KPTR, %g2		! Load DMMU 8K TSB pointer
2155	andcc	%g5, 0x3, %g5				! 8K?
2156	bnz,pn	%icc, winfix				! We punt to the pmap code since we can't handle policy
2157	 ldxa	[%g0] ASI_DMMU, %g1			! Hard coded for unified 8K TSB		Load DMMU tag target register
2158	casxa	[%g6] ASI_PHYS_CACHED, %g4, %g7		!  and write it out
2159
2160	membar	#StoreLoad
2161	cmp	%g4, %g7
2162	bne,pn	%xcc, 1b
2163	 or	%g4, TTE_MODIFY|TTE_ACCESS|TTE_W, %g4	! Update the modified bit
2164	stx	%g1, [%g2]				! Update TSB entry tag
2165	mov	SFSR, %g7
2166	stx	%g4, [%g2+8]				! Update TSB entry data
2167	nop
2168#ifdef DEBUG
2169	set	DATA_START, %g6	! debug
2170	stx	%g1, [%g6+0x40]	! debug
2171	set	0x88, %g5	! debug
2172	stx	%g4, [%g6+0x48]	! debug -- what we tried to enter in TLB
2173	stb	%g5, [%g6+0x8]	! debug
2174#endif
2175#ifdef TRAPSTATS
2176	sethi	%hi(_C_LABEL(protfix)), %g1
2177	lduw	[%g1+%lo(_C_LABEL(protfix))], %g2
2178	inc	%g2
2179	stw	%g2, [%g1+%lo(_C_LABEL(protfix))]
2180#endif
2181	mov	DEMAP_PAGE_SECONDARY, %g1		! Secondary flush
2182	mov	DEMAP_PAGE_NUCLEUS, %g5			! Nucleus flush
2183	stxa	%g0, [%g7] ASI_DMMU			! clear out the fault
2184	membar	#Sync
2185	sllx	%g3, (64-13), %g7			! Need to demap old entry first
2186	andn	%g3, 0xfff, %g6
2187	movrz	%g7, %g5, %g1				! Pick one
2188	or	%g6, %g1, %g6
2189	stxa	%g6, [%g6] ASI_DMMU_DEMAP		! Do the demap
2190	membar	#Sync					! No real reason for this XXXX
2191
2192	stxa	%g4, [%g0] ASI_DMMU_DATA_IN		! Enter new mapping
2193	membar	#Sync
2194	retry
2195
2196/*
2197 * Each memory data access fault from a fast access miss handler comes here.
2198 * We will quickly check if this is an original prom mapping before going
2199 * to the generic fault handler
2200 *
2201 * We will assume that %pil is not lost so we won't bother to save it
2202 * unless we're in an interrupt handler.
2203 *
2204 * On entry:
2205 *	We are on one of the alternate set of globals
2206 *	%g1 = MMU tag target
2207 *	%g2 = 8Kptr
2208 *	%g3 = TLB TAG ACCESS
2209 *
2210 * On return:
2211 *
2212 */
2213	ICACHE_ALIGN
2214data_miss:
2215#ifdef TRAPSTATS
2216	set	_C_LABEL(kdmiss), %g3
2217	set	_C_LABEL(udmiss), %g4
2218	rdpr	%tl, %g6
2219	dec	%g6
2220	movrz	%g6, %g4, %g3
2221	lduw	[%g3], %g4
2222	inc	%g4
2223	stw	%g4, [%g3]
2224#endif
2225	mov	TLB_TAG_ACCESS, %g3			! Get real fault page
2226	sethi	%hi(0x1fff), %g6			! 8K context mask
2227	ldxa	[%g3] ASI_DMMU, %g3			! from tag access register
2228	sethi	%hi(_C_LABEL(ctxbusy)), %g4
2229	or	%g6, %lo(0x1fff), %g6
2230	LDPTR	[%g4 + %lo(_C_LABEL(ctxbusy))], %g4
2231	srax	%g3, HOLESHIFT, %g5			! Check for valid address
2232	and	%g3, %g6, %g6				! Isolate context
2233
2234	inc	%g5					! (0 or -1) -> (1 or 0)
2235	sllx	%g6, 3, %g6				! Make it into an offset into ctxbusy
2236	ldx	[%g4+%g6], %g4				! Load up our page table.
2237#ifdef DEBUG
2238	/* Make sure we don't try to replace a kernel translation */
2239	/* This should not be necessary */
2240	brnz,pt	%g6, 1f			! If user context continue miss
2241	sethi	%hi(KERNBASE), %g7			! Don't need %lo
2242	set	0x0800000, %g6				! 8MB
2243	sub	%g3, %g7, %g7
2244	cmp	%g7, %g6
2245	sethi	%hi(DATA_START), %g7
2246	mov	6, %g6		! debug
2247	stb	%g6, [%g7+0x20]	! debug
2248	tlu	%xcc, 1; nop
2249	blu,pn	%xcc, winfix				! Next insn in delay slot is unimportant
2250	 mov	7, %g6		! debug
2251	stb	%g6, [%g7+0x20]	! debug
22521:
2253#endif
2254	srlx	%g3, STSHIFT, %g6
2255	cmp	%g5, 1
2256	bgu,pn %xcc, winfix				! Error!
2257	 srlx	%g3, PDSHIFT, %g5
2258	and	%g6, STMASK, %g6
2259
2260	sll	%g6, 3, %g6
2261	and	%g5, PDMASK, %g5
2262	sll	%g5, 3, %g5
2263	add	%g6, %g4, %g4
2264	ldxa	[%g4] ASI_PHYS_CACHED, %g4
2265	srlx	%g3, PTSHIFT, %g6			! Convert to ptab offset
2266	and	%g6, PTMASK, %g6
2267	add	%g5, %g4, %g5
2268	brz,pn	%g4, data_nfo				! NULL entry? check somewhere else
2269
2270	 nop
2271	ldxa	[%g5] ASI_PHYS_CACHED, %g4
2272	sll	%g6, 3, %g6
2273	brz,pn	%g4, data_nfo				! NULL entry? check somewhere else
2274	 add	%g6, %g4, %g6
22751:
2276	ldxa	[%g6] ASI_PHYS_CACHED, %g4
2277	brgez,pn %g4, data_nfo				! Entry invalid?  Punt
2278	 or	%g4, TTE_ACCESS, %g7			! Update the access bit
2279
2280	btst	TTE_ACCESS, %g4				! Need to update access git?
2281	bne,pt	%xcc, 1f
2282	 nop
2283	casxa	[%g6] ASI_PHYS_CACHED, %g4, %g7		!  and write it out
2284	cmp	%g4, %g7
2285	bne,pn	%xcc, 1b
2286	 or	%g4, TTE_ACCESS, %g4				! Update the modified bit
22871:
2288	stx	%g1, [%g2]				! Update TSB entry tag
2289
2290	stx	%g4, [%g2+8]				! Update TSB entry data
2291#ifdef DEBUG
2292	set	DATA_START, %g6	! debug
2293	stx	%g3, [%g6+8]	! debug
2294	set	0xa, %g5	! debug
2295	stx	%g4, [%g6]	! debug -- what we tried to enter in TLB
2296	stb	%g5, [%g6+0x20]	! debug
2297#endif
2298#if 0
2299	/* This was a miss -- should be nothing to demap. */
2300	sllx	%g3, (64-13), %g6			! Need to demap old entry first
2301	mov	DEMAP_PAGE_SECONDARY, %g1		! Secondary flush
2302	mov	DEMAP_PAGE_NUCLEUS, %g5			! Nucleus flush
2303	movrz	%g6, %g5, %g1				! Pick one
2304	andn	%g3, 0xfff, %g6
2305	or	%g6, %g1, %g6
2306	stxa	%g6, [%g6] ASI_DMMU_DEMAP		! Do the demap
2307	membar	#Sync					! No real reason for this XXXX
2308#endif
2309	stxa	%g4, [%g0] ASI_DMMU_DATA_IN		! Enter new mapping
2310	membar	#Sync
2311	CLRTT
2312	retry
2313	NOTREACHED
2314/*
2315 * We had a data miss but did not find a mapping.  Insert
2316 * a NFO mapping to satisfy speculative loads and return.
2317 * If this had been a real load, it will re-execute and
2318 * result in a data fault or protection fault rather than
2319 * a TLB miss.  We insert an 8K TTE with the valid and NFO
2320 * bits set.  All others should zero.  The TTE looks like this:
2321 *
2322 *	0x9000000000000000
2323 *
2324 */
2325data_nfo:
2326	sethi	%hi(0x90000000), %g4			! V(0x8)|NFO(0x1)
2327	sllx	%g4, 32, %g4
2328	stxa	%g4, [%g0] ASI_DMMU_DATA_IN		! Enter new mapping
2329	membar	#Sync
2330	CLRTT
2331	retry
2332
2333/*
2334 * Handler for making the trap window shiny clean.
2335 *
2336 * If the store that trapped was to a kernel address, panic.
2337 *
2338 * If the store that trapped was to a user address, stick it in the PCB.
2339 * Since we don't want to force user code to use the standard register
2340 * convention if we don't have to, we will not assume that %fp points to
2341 * anything valid.
2342 *
2343 * On entry:
2344 *	We are on one of the alternate set of globals
2345 *	%g1 = %tl - 1, tstate[tl-1], scratch	- local
2346 *	%g2 = %tl				- local
2347 *	%g3 = MMU tag access			- in
2348 *	%g4 = %cwp				- local
2349 *	%g5 = scratch				- local
2350 *	%g6 = cpcb				- local
2351 *	%g7 = scratch				- local
2352 *
2353 * On return:
2354 *
2355 * NB:	 remove most of this from main codepath & cleanup I$
2356 */
2357winfault:
2358#ifdef DEBUG
2359	sethi	%hi(DATA_START), %g7			! debug
2360!	stx	%g0, [%g7]				! debug This is a real fault -- prevent another trap from watchdoging
2361	set	0x10, %g4				! debug
2362	stb	%g4, [%g7 + 0x20]			! debug
2363	CHKPT(%g4,%g7,0x19)
2364#endif
2365	mov	TLB_TAG_ACCESS, %g3	! Get real fault page from tag access register
2366	ldxa	[%g3] ASI_DMMU, %g3	! And put it into the non-MMU alternate regs
2367winfix:
2368	rdpr	%tl, %g2
2369	subcc	%g2, 1, %g1
2370	ble,pt	%icc, datafault		! Don't go below trap level 1
2371	 sethi	%hi(CPCB), %g6		! get current pcb
2372
2373
2374	CHKPT(%g4,%g7,0x20)
2375	wrpr	%g1, 0, %tl		! Pop a trap level
2376	rdpr	%tt, %g7		! Read type of prev. trap
2377	rdpr	%tstate, %g4		! Try to restore prev %cwp if we were executing a restore
2378	andn	%g7, 0x3f, %g5		!   window fill traps are all 0b 0000 11xx xxxx
2379
2380#if 1
2381	cmp	%g7, 0x30		! If we took a datafault just before this trap
2382	bne,pt	%icc, winfixfill	! our stack's probably bad so we need to switch somewhere else
2383	 nop
2384
2385	!!
2386	!! Double data fault -- bad stack?
2387	!!
2388	wrpr	%g2, %tl	! Restore trap level.
2389	sir			! Just issue a reset and don't try to recover.
2390	mov	%fp, %l6		! Save the frame pointer
2391	set	EINTSTACK+USPACE+CC64FSZ-STKB, %fp ! Set the frame pointer to the middle of the idle stack
2392	add	%fp, -CC64FSZ, %sp	! Create a stackframe
2393	wrpr	%g0, 15, %pil		! Disable interrupts, too
2394	wrpr	%g0, %g0, %canrestore	! Our stack is hozed and our PCB
2395	wrpr	%g0, 7, %cansave	!  probably is too, so blow away
2396	ba	slowtrap		!  all our register windows.
2397	 wrpr	%g0, 0x101, %tt
2398#endif
2399
2400winfixfill:
2401	cmp	%g5, 0x0c0		!   so we mask lower bits & compare to 0b 0000 1100 0000
2402	bne,pt	%icc, winfixspill	! Dump our trap frame -- we will retry the fill when the page is loaded
2403	 cmp	%g5, 0x080		!   window spill traps are all 0b 0000 10xx xxxx
2404
2405	!!
2406	!! This was a fill
2407	!!
2408#ifdef TRAPSTATS
2409	set	_C_LABEL(wfill), %g1
2410	lduw	[%g1], %g5
2411	inc	%g5
2412	stw	%g5, [%g1]
2413#endif
2414	btst	TSTATE_PRIV, %g4	! User mode?
2415	and	%g4, CWP, %g5		! %g4 = %cwp of trap
2416	wrpr	%g7, 0, %tt
2417	bz,a,pt	%icc, datafault		! We were in user mode -- normal fault
2418	 wrpr	%g5, %cwp		! Restore cwp from before fill trap -- regs should now be consisent
2419
2420	/*
2421	 * We're in a pickle here.  We were trying to return to user mode
2422	 * and the restore of the user window failed, so now we have one valid
2423	 * kernel window and a user window state.  If we do a TRAP_SETUP() now,
2424	 * our kernel window will be considered a user window and cause a
2425	 * fault when we try to save it later due to an invalid user address.
2426	 * If we return to where we faulted, our window state will not be valid
2427	 * and we will fault trying to enter user with our primary context of zero.
2428	 *
2429	 * What we'll do is arrange to have us return to return_from_trap so we will
2430	 * start the whole business over again.  But first, switch to a kernel window
2431	 * setup.  Let's see, canrestore and otherwin are zero.  Set WSTATE_KERN and
2432	 * make sure we're in kernel context and we're done.
2433	 */
2434
2435#ifdef TRAPSTATS
2436	set	_C_LABEL(kwfill), %g4
2437	lduw	[%g4], %g7
2438	inc	%g7
2439	stw	%g7, [%g4]
2440#endif
2441#if 0 /* Need to switch over to new stuff to fix WDR bug */
2442	wrpr	%g5, %cwp				! Restore cwp from before fill trap -- regs should now be consisent
2443	wrpr	%g2, %g0, %tl				! Restore trap level -- we need to reuse it
2444	set	return_from_trap, %g4
2445	set	CTX_PRIMARY, %g7
2446	wrpr	%g4, 0, %tpc
2447	stxa	%g0, [%g7] ASI_DMMU
2448	inc	4, %g4
2449	membar	#Sync
2450	flush	%g4					! Isn't this convenient?
2451	wrpr	%g0, WSTATE_KERN, %wstate
2452	wrpr	%g0, 0, %canrestore			! These should be zero but
2453	wrpr	%g0, 0, %otherwin			! clear them just in case
2454	rdpr	%ver, %g5
2455	and	%g5, CWP, %g5
2456	wrpr	%g0, 0, %cleanwin
2457	dec	1, %g5					! NWINDOWS-1-1
2458	wrpr	%g5, 0, %cansave			! Invalidate all windows
2459	CHKPT(%g5,%g7,0xe)
2460!	flushw						! DEBUG
2461	ba,pt	%icc, datafault
2462	 wrpr	%g4, 0, %tnpc
2463#else
2464	wrpr	%g2, %g0, %tl				! Restore trap level
2465	cmp	%g2, 3
2466	tne	%icc, 1
2467	rdpr	%tt, %g5
2468	wrpr	%g0, 1, %tl				! Revert to TL==1 XXX what if this wasn't in rft_user? Oh well.
2469	wrpr	%g5, %g0, %tt				! Set trap type correctly
2470	CHKPT(%g5,%g7,0xe)
2471/*
2472 * Here we need to implement the beginning of datafault.
2473 * TRAP_SETUP expects to come from either kernel mode or
2474 * user mode with at least one valid register window.  It
2475 * will allocate a trap frame, save the out registers, and
2476 * fix the window registers to think we have one user
2477 * register window.
2478 *
2479 * However, under these circumstances we don't have any
2480 * valid register windows, so we need to clean up the window
2481 * registers to prevent garbage from being saved to either
2482 * the user stack or the PCB before calling the datafault
2483 * handler.
2484 *
2485 * We could simply jump to datafault if we could somehow
2486 * make the handler issue a `saved' instruction immediately
2487 * after creating the trapframe.
2488 *
2489 * The fillowing is duplicated from datafault:
2490 */
2491	wrpr	%g0, PSTATE_KERN|PSTATE_AG, %pstate	! We need to save volatile stuff to AG regs
2492#ifdef TRAPS_USE_IG
2493	wrpr	%g0, PSTATE_KERN|PSTATE_IG, %pstate	! We need to save volatile stuff to AG regs
2494#endif
2495#ifdef DEBUG
2496	set	DATA_START, %g7				! debug
2497	set	0x20, %g6				! debug
2498	stx	%g0, [%g7]				! debug
2499	stb	%g6, [%g7 + 0x20]			! debug
2500	CHKPT(%g4,%g7,0xf)
2501#endif
2502	wr	%g0, ASI_DMMU, %asi			! We need to re-load trap info
2503	ldxa	[%g0 + TLB_TAG_ACCESS] %asi, %g1	! Get fault address from tag access register
2504	ldxa	[SFAR] %asi, %g2			! sync virt addr; must be read first
2505	ldxa	[SFSR] %asi, %g3			! get sync fault status register
2506	stxa	%g0, [SFSR] %asi			! Clear out fault now
2507	membar	#Sync					! No real reason for this XXXX
2508
2509	TRAP_SETUP(-CC64FSZ-TF_SIZE)
2510	saved						! Blow away that one register window we didn't ever use.
2511	ba,a,pt	%icc, Ldatafault_internal		! Now we should return directly to user mode
2512	 nop
2513#endif
2514winfixspill:
2515	bne,a,pt	%xcc, datafault				! Was not a spill -- handle it normally
2516	 wrpr	%g2, 0, %tl				! Restore trap level for now XXXX
2517
2518	!!
2519	!! This was a spill
2520	!!
2521#if 1
2522	btst	TSTATE_PRIV, %g4			! From user mode?
2523	wrpr	%g2, 0, %tl				! We need to load the fault type so we can
2524	rdpr	%tt, %g5				! overwrite the lower trap and get it to the fault handler
2525	wrpr	%g1, 0, %tl
2526	wrpr	%g5, 0, %tt				! Copy over trap type for the fault handler
2527	and	%g4, CWP, %g5				! find %cwp from trap
2528#ifndef TRAPTRACE
2529	be,a,pt	%xcc, datafault				! Let's do a regular datafault.  When we try a save in datafault we'll
2530	 wrpr	%g5, 0, %cwp				!  return here and write out all dirty windows.
2531#else
2532	bne,pt	%xcc, 3f				! Let's do a regular datafault.  When we try a save in datafault we'll
2533	 nop
2534	wrpr	%g5, 0, %cwp				!  return here and write out all dirty windows.
2535	set	trap_trace, %g2
2536	lduw	[%g2+TRACEDIS], %g4
2537	brnz,pn	%g4, 1f
2538	 nop
2539	lduw	[%g2+TRACEPTR], %g3
2540	rdpr	%tl, %g4
2541	mov	2, %g5
2542	set	CURPROC, %g6
2543	sllx	%g4, 13, %g4
2544!	LDPTR	[%g6], %g6	! Never touch PID
2545	clr	%g6		! DISABLE PID
2546	or	%g4, %g5, %g4
2547	mov	%g0, %g5
2548	brz,pn	%g6, 2f
2549	 andncc	%g3, (TRACESIZ-1), %g0
2550!	ldsw	[%g6+P_PID], %g5	! Load PID
25512:
2552	movnz	%icc, %g0, %g3		! Wrap if needed
2553	ba,a,pt	%xcc, 4f
2554
2555	set	CPCB, %g6	! Load up nsaved
2556	LDPTR	[%g6], %g6
2557	ldub	[%g6 + PCB_NSAVED], %g6
2558	sllx	%g6, 9, %g6
2559	or	%g6, %g4, %g4
25604:
2561	rdpr	%tstate, %g6
2562	rdpr	%tpc, %g7
2563	sth	%g4, [%g2+%g3]
2564	inc	2, %g3
2565	sth	%g5, [%g2+%g3]
2566	inc	2, %g3
2567	stw	%g6, [%g2+%g3]
2568	inc	4, %g3
2569	stw	%sp, [%g2+%g3]
2570	inc	4, %g3
2571	stw	%g7, [%g2+%g3]
2572	inc	4, %g3
2573	mov	TLB_TAG_ACCESS, %g7
2574	ldxa	[%g7] ASI_DMMU, %g7
2575	stw	%g7, [%g2+%g3]
2576	inc	4, %g3
2577	stw	%g3, [%g2+TRACEPTR]
25781:
2579	ba	datafault
2580	 nop
25813:
2582#endif
2583#endif
2584	wrpr	%g2, 0, %tl				! Restore trap level for now XXXX
2585	LDPTR	[%g6 + %lo(CPCB)], %g6	! This is in the locked TLB and should not fault
2586#ifdef TRAPSTATS
2587	set	_C_LABEL(wspill), %g7
2588	lduw	[%g7], %g5
2589	inc	%g5
2590	stw	%g5, [%g7]
2591#endif
2592#ifdef DEBUG
2593	set	0x12, %g5				! debug
2594	sethi	%hi(DATA_START), %g7			! debug
2595	stb	%g5, [%g7 + 0x20]			! debug
2596	CHKPT(%g5,%g7,0x11)
2597#endif
2598
2599	/*
2600	 * Traverse kernel map to find paddr of cpcb and only us ASI_PHYS_CACHED to
2601	 * prevent any faults while saving the windows.  BTW if it isn't mapped, we
2602	 * will trap and hopefully panic.
2603	 */
2604
2605!	ba	0f					! DEBUG -- don't use phys addresses
2606	 wr	%g0, ASI_NUCLEUS, %asi			! In case of problems finding PA
2607	sethi	%hi(_C_LABEL(ctxbusy)), %g1
2608	LDPTR	[%g1 + %lo(_C_LABEL(ctxbusy))], %g1	! Load start of ctxbusy
2609#ifdef DEBUG
2610	srax	%g6, HOLESHIFT, %g7			! Check for valid address
2611	brz,pt	%g7, 1f					! Should be zero or -1
2612	 addcc	%g7, 1, %g7					! Make -1 -> 0
2613	tnz	%xcc, 1					! Invalid address??? How did this happen?
26141:
2615#endif
2616	srlx	%g6, STSHIFT, %g7
2617	ldx	[%g1], %g1				! Load pointer to kernel_pmap
2618	and	%g7, STMASK, %g7
2619	sll	%g7, 3, %g7
2620	add	%g7, %g1, %g1
2621	DLFLUSH(%g1,%g7)
2622	ldxa	[%g1] ASI_PHYS_CACHED, %g1		! Load pointer to directory
2623	DLFLUSH2(%g7)
2624
2625	srlx	%g6, PDSHIFT, %g7			! Do page directory
2626	and	%g7, PDMASK, %g7
2627	sll	%g7, 3, %g7
2628	brz,pn	%g1, 0f
2629	 add	%g7, %g1, %g1
2630	DLFLUSH(%g1,%g7)
2631	ldxa	[%g1] ASI_PHYS_CACHED, %g1
2632	DLFLUSH2(%g7)
2633
2634	srlx	%g6, PTSHIFT, %g7			! Convert to ptab offset
2635	and	%g7, PTMASK, %g7
2636	brz	%g1, 0f
2637	 sll	%g7, 3, %g7
2638	add	%g1, %g7, %g7
2639	DLFLUSH(%g7,%g1)
2640	ldxa	[%g7] ASI_PHYS_CACHED, %g7		! This one is not
2641	DLFLUSH2(%g1)
2642	brgez	%g7, 0f
2643	 srlx	%g7, PGSHIFT, %g7			! Isolate PA part
2644	sll	%g6, 32-PGSHIFT, %g6			! And offset
2645	sllx	%g7, PGSHIFT+23, %g7			! There are 23 bits to the left of the PA in the TTE
2646	srl	%g6, 32-PGSHIFT, %g6
2647	srax	%g7, 23, %g7
2648	or	%g7, %g6, %g6				! Then combine them to form PA
2649
2650	wr	%g0, ASI_PHYS_CACHED, %asi		! Use ASI_PHYS_CACHED to prevent possible page faults
26510:
2652	/*
2653	 * Now save all user windows to cpcb.
2654	 */
2655#ifdef NOTDEF_DEBUG
2656	add	%g6, PCB_NSAVED, %g7
2657	DLFLUSH(%g7,%g5)
2658	lduba	[%g6 + PCB_NSAVED] %asi, %g7		! make sure that pcb_nsaved
2659	DLFLUSH2(%g5)
2660	brz,pt	%g7, 1f					! is zero, else
2661	 nop
2662	wrpr	%g0, 4, %tl
2663	sir						! Force a watchdog
26641:
2665#endif
2666	CHKPT(%g5,%g7,0x12)
2667	rdpr	%otherwin, %g7
2668	brnz,pt	%g7, 1f
2669	 rdpr	%canrestore, %g5
2670	rdpr	%cansave, %g1
2671	add	%g5, 1, %g7				! add the %cwp window to the list to save
2672!	movrnz	%g1, %g5, %g7				! If we're issuing a save
2673!	mov	%g5, %g7				! DEBUG
2674	wrpr	%g0, 0, %canrestore
2675	wrpr	%g7, 0, %otherwin			! Still in user mode -- need to switch to kernel mode
26761:
2677	mov	%g7, %g1
2678	CHKPT(%g5,%g7,0x13)
2679	add	%g6, PCB_NSAVED, %g7
2680	DLFLUSH(%g7,%g5)
2681	lduba	[%g6 + PCB_NSAVED] %asi, %g7		! Start incrementing pcb_nsaved
2682	DLFLUSH2(%g5)
2683
2684#ifdef DEBUG
2685	wrpr	%g0, 5, %tl
2686#endif
2687	mov	%g6, %g5
2688	brz,pt	%g7, winfixsave				! If it's in use, panic
2689	 saved						! frob window registers
2690
2691	/* PANIC */
2692!	CHKPT(%g4,%g7,0x10)	! Checkpoint
2693!	sir						! Force a watchdog
2694#ifdef DEBUG
2695	wrpr	%g2, 0, %tl
2696#endif
2697	mov	%g7, %o2
2698	rdpr	%ver, %o1
2699	sethi	%hi(2f), %o0
2700	and	%o1, CWP, %o1
2701	wrpr	%g0, %o1, %cleanwin
2702	dec	1, %o1
2703	wrpr	%g0, %o1, %cansave			! kludge away any more window problems
2704	wrpr	%g0, 0, %canrestore
2705	wrpr	%g0, 0, %otherwin
2706	or	%lo(2f), %o0, %o0
2707	wrpr	%g0, WSTATE_KERN, %wstate
2708#ifdef DEBUG
2709	set	panicstack-CC64FSZ-STKB, %sp		! Use panic stack.
2710#else
2711	set	estack0, %sp
2712	LDPTR	[%sp], %sp
2713	add	%sp, -CC64FSZ-STKB, %sp			! Overwrite proc 0's stack.
2714#endif
2715	ta	1; nop					! This helps out traptrace.
2716	call	_C_LABEL(panic)				! This needs to be fixed properly but we should panic here
2717	 mov	%g1, %o1
2718	NOTREACHED
2719	.data
27202:
2721	.asciz	"winfault: double invalid window at %p, nsaved=%d"
2722	_ALIGN
2723	.text
27243:
2725	saved
2726	save
2727winfixsave:
2728	stxa	%l0, [%g5 + PCB_RW + ( 0*8)] %asi	! Save the window in the pcb, we can schedule other stuff in here
2729	stxa	%l1, [%g5 + PCB_RW + ( 1*8)] %asi
2730	stxa	%l2, [%g5 + PCB_RW + ( 2*8)] %asi
2731	stxa	%l3, [%g5 + PCB_RW + ( 3*8)] %asi
2732	stxa	%l4, [%g5 + PCB_RW + ( 4*8)] %asi
2733	stxa	%l5, [%g5 + PCB_RW + ( 5*8)] %asi
2734	stxa	%l6, [%g5 + PCB_RW + ( 6*8)] %asi
2735	stxa	%l7, [%g5 + PCB_RW + ( 7*8)] %asi
2736
2737	stxa	%i0, [%g5 + PCB_RW + ( 8*8)] %asi
2738	stxa	%i1, [%g5 + PCB_RW + ( 9*8)] %asi
2739	stxa	%i2, [%g5 + PCB_RW + (10*8)] %asi
2740	stxa	%i3, [%g5 + PCB_RW + (11*8)] %asi
2741	stxa	%i4, [%g5 + PCB_RW + (12*8)] %asi
2742	stxa	%i5, [%g5 + PCB_RW + (13*8)] %asi
2743	stxa	%i6, [%g5 + PCB_RW + (14*8)] %asi
2744	stxa	%i7, [%g5 + PCB_RW + (15*8)] %asi
2745
2746!	rdpr	%otherwin, %g1	! Check to see if we's done
2747	dec	%g1
2748	wrpr	%g0, 7, %cleanwin			! BUGBUG -- we should not hardcode this, but I have no spare globals
2749	inc	16*8, %g5				! Move to next window
2750	inc	%g7					! inc pcb_nsaved
2751	brnz,pt	%g1, 3b
2752	 stxa	%o6, [%g5 + PCB_RW + (14*8)] %asi	! Save %sp so we can write these all out
2753
2754	/* fix up pcb fields */
2755	stba	%g7, [%g6 + PCB_NSAVED] %asi		! cpcb->pcb_nsaved = n
2756	CHKPT(%g5,%g1,0x14)
2757#if 0
2758	mov	%g7, %g5				! fixup window registers
27595:
2760	dec	%g5
2761	brgz,a,pt	%g5, 5b
2762	 restore
2763#ifdef NOT_DEBUG
2764	rdpr	%wstate, %g5				! DEBUG
2765	wrpr	%g0, WSTATE_KERN, %wstate		! DEBUG
2766	wrpr	%g0, 4, %tl
2767	rdpr	%cansave, %g7
2768	rdpr	%canrestore, %g6
2769	flushw						! DEBUG
2770	wrpr	%g2, 0, %tl
2771	wrpr	%g5, 0, %wstate				! DEBUG
2772#endif
2773#else
2774	/*
2775	 * We just issued a bunch of saves, so %cansave is now 0,
2776	 * probably (if we were doing a flushw then we may have
2777	 * come in with only partially full register windows and
2778	 * it may not be 0).
2779	 *
2780	 * %g7 contains the count of the windows we just finished
2781	 * saving.
2782	 *
2783	 * What we need to do now is move some of the windows from
2784	 * %canrestore to %cansave.  What we should do is take
2785	 * min(%canrestore, %g7) and move that over to %cansave.
2786	 *
2787	 * %g7 is the number of windows we flushed, so we should
2788	 * use that as a base.  Clear out %otherwin, set %cansave
2789	 * to min(%g7, NWINDOWS - 2), set %cleanwin to %canrestore
2790	 * + %cansave and the rest follows:
2791	 *
2792	 * %otherwin = 0
2793	 * %cansave = NWINDOWS - 2 - %canrestore
2794	 */
2795	wrpr	%g0, 0, %otherwin
2796	rdpr	%canrestore, %g1
2797	sub	%g1, %g7, %g1				! Calculate %canrestore - %g7
2798	movrlz	%g1, %g0, %g1				! Clamp at zero
2799	wrpr	%g1, 0, %canrestore			! This is the new canrestore
2800	rdpr	%ver, %g5
2801	and	%g5, CWP, %g5				! NWINDOWS-1
2802	dec	%g5					! NWINDOWS-2
2803	wrpr	%g5, 0, %cleanwin			! Set cleanwin to max, since we're in-kernel
2804	sub	%g5, %g1, %g5				! NWINDOWS-2-%canrestore
2805#ifdef xTRAPTRACE
2806	wrpr	%g5, 0, %cleanwin			! Force cleanwindow faults
2807#endif
2808	wrpr	%g5, 0, %cansave
2809#ifdef NOT_DEBUG
2810	rdpr	%wstate, %g5				! DEBUG
2811	wrpr	%g0, WSTATE_KERN, %wstate		! DEBUG
2812	wrpr	%g0, 4, %tl
2813	flushw						! DEBUG
2814	wrpr	%g2, 0, %tl
2815	wrpr	%g5, 0, %wstate				! DEBUG
2816#endif
2817#endif
2818
2819#ifdef NOTDEF_DEBUG
2820	set	panicstack-CC64FSZ, %g1
2821	save	%g1, 0, %sp
2822	GLOBTOLOC
2823	rdpr	%wstate, %l0
2824	wrpr	%g0, WSTATE_KERN, %wstate
2825	set	8f, %o0
2826	mov	%g7, %o1
2827	call	printf
2828	 mov	%g5, %o2
2829	wrpr	%l0, 0, %wstate
2830	LOCTOGLOB
2831	restore
2832	.data
28338:
2834	.asciz	"winfix: spill fixup\n"
2835	_ALIGN
2836	.text
2837#endif
2838	CHKPT(%g5,%g1,0x15)
2839!	rdpr	%tl, %g2				! DEBUG DEBUG -- did we trap somewhere?
2840	sub	%g2, 1, %g1
2841	rdpr	%tt, %g2
2842	wrpr	%g1, 0, %tl				! We will not attempt to re-execute the spill, so dump our trap frame permanently
2843	wrpr	%g2, 0, %tt				! Move trap type from fault frame here, overwriting spill
2844	CHKPT(%g2,%g5,0x16)
2845
2846	/* Did we save a user or kernel window ? */
2847!	srax	%g3, 48, %g7				! User or kernel store? (TAG TARGET)
2848	sllx	%g3, (64-13), %g7			! User or kernel store? (TAG ACCESS)
2849	brnz,pt	%g7, 1f					! User fault -- save windows to pcb
2850	 set	(2*NBPG)-8, %g7
2851
2852	and	%g4, CWP, %g4				! %g4 = %cwp of trap
2853	wrpr	%g4, 0, %cwp				! Kernel fault -- restore %cwp and force and trap to debugger
2854#ifdef DEBUG
2855	set	DATA_START, %g7				! debug
2856	set	0x11, %g6				! debug
2857	stb	%g6, [%g7 + 0x20]			! debug
2858	CHKPT(%g2,%g1,0x17)
2859!	sir
2860#endif
2861	!!
2862	!! Here we managed to fault trying to access a kernel window
2863	!! This is a bug.  Switch to the interrupt stack if we aren't
2864	!! there already and then trap into the debugger or panic.
2865	!!
2866	sethi	%hi(EINTSTACK-BIAS), %g6
2867	btst	1, %sp
2868	bnz,pt	%icc, 0f
2869	 mov	%sp, %g1
2870	add	%sp, -BIAS, %g1
28710:
2872	or	%g6, %lo(EINTSTACK-BIAS), %g6
2873	set	(EINTSTACK-INTSTACK), %g7	! XXXXXXXXXX This assumes kernel addresses are unique from user addresses
2874	sub	%g6, %g1, %g2				! Determine if we need to switch to intr stack or not
2875	dec	%g7					! Make it into a mask
2876	andncc	%g2, %g7, %g0				! XXXXXXXXXX This assumes kernel addresses are unique from user addresses */ \
2877	movz	%xcc, %g1, %g6				! Stay on interrupt stack?
2878	add	%g6, -CCFSZ, %g6			! Allocate a stack frame
2879	mov	%sp, %l6				! XXXXX Save old stack pointer
2880	mov	%g6, %sp
2881	ta	1; nop					! Enter debugger
2882	NOTREACHED
28831:
2884#if 1
2885	/* Now we need to blast away the D$ to make sure we're in sync */
2886	stxa	%g0, [%g7] ASI_DCACHE_TAG
2887	brnz,pt	%g7, 1b
2888	 dec	8, %g7
2889#endif
2890
2891#ifdef DEBUG
2892	CHKPT(%g2,%g1,0x18)
2893	set	DATA_START, %g7				! debug
2894	set	0x19, %g6				! debug
2895	stb	%g6, [%g7 + 0x20]			! debug
2896#endif
2897#ifdef NOTDEF_DEBUG
2898	set	panicstack-CC64FSZ, %g5
2899	save	%g5, 0, %sp
2900	GLOBTOLOC
2901	rdpr	%wstate, %l0
2902	wrpr	%g0, WSTATE_KERN, %wstate
2903	set	8f, %o0
2904	call	printf
2905	 mov	%fp, %o1
2906	wrpr	%l0, 0, %wstate
2907	LOCTOGLOB
2908	restore
2909	.data
29108:
2911	.asciz	"winfix: kernel spill retry\n"
2912	_ALIGN
2913	.text
2914#endif
2915#ifdef TRAPTRACE
2916	and	%g4, CWP, %g2	! Point our regwin at right place
2917	wrpr	%g2, %cwp
2918
2919	set	trap_trace, %g2
2920	lduw	[%g2+TRACEDIS], %g4
2921	brnz,pn	%g4, 1f
2922	 nop
2923	lduw	[%g2+TRACEPTR], %g3
2924	rdpr	%tl, %g4
2925	mov	3, %g5
2926	set	CURPROC, %g6
2927	sllx	%g4, 13, %g4
2928!	LDPTR	[%g6], %g6	! Never do faultable loads
2929	clr	%g6		! DISABLE PID
2930	or	%g4, %g5, %g4
2931	mov	%g0, %g5
2932	brz,pn	%g6, 2f
2933	 andncc	%g3, (TRACESIZ-1), %g0
2934!	ldsw	[%g6+P_PID], %g5	! Load PID
29352:
2936	movnz	%icc, %g0, %g3	! Wrap if needed
2937
2938	set	CPCB, %g6	! Load up nsaved
2939	LDPTR	[%g6], %g6
2940	clr	%g6
2941!	ldub	[%g6 + PCB_NSAVED], %g6! this could fault
2942	sllx	%g6, 9, %g6
2943	or	%g6, %g4, %g4
2944
2945	rdpr	%tstate, %g6
2946	rdpr	%tpc, %g7
2947	sth	%g4, [%g2+%g3]
2948	inc	2, %g3
2949	sth	%g5, [%g2+%g3]
2950	inc	2, %g3
2951	stw	%g6, [%g2+%g3]
2952	inc	4, %g3
2953	stw	%sp, [%g2+%g3]
2954	inc	4, %g3
2955	stw	%g7, [%g2+%g3]
2956	inc	4, %g3
2957	mov	TLB_TAG_ACCESS, %g7
2958	ldxa	[%g7] ASI_DMMU, %g7
2959	stw	%g7, [%g2+%g3]
2960	inc	4, %g3
2961	stw	%g3, [%g2+TRACEPTR]
29621:
2963#endif
2964#ifdef TRAPSTATS
2965	set	_C_LABEL(wspillskip), %g4
2966	lduw	[%g4], %g5
2967	inc	%g5
2968	stw	%g5, [%g4]
2969#endif
2970	/*
2971	 * If we had WSTATE_KERN then we had at least one valid kernel window.
2972	 * We should re-execute the trapping save.
2973	 */
2974	rdpr	%wstate, %g3
2975	mov	%g3, %g3
2976	cmp	%g3, WSTATE_KERN
2977	bne,pt	%icc, 1f
2978	 nop
2979	retry						! Now we can complete the save
29801:
2981	/*
2982	 * Since we had a WSTATE_USER, we had no valid kernel windows.  This should
2983	 * only happen inside TRAP_SETUP or INTR_SETUP. Emulate
2984	 * the instruction, clean up the register windows, then done.
2985	 */
2986	rdpr	%cwp, %g1
2987	inc	%g1
2988	rdpr	%tstate, %g2
2989	wrpr	%g1, %cwp
2990	andn	%g2, CWP, %g2
2991	wrpr	%g1, %g2, %tstate
2992	wrpr	%g0, PSTATE_KERN|PSTATE_AG, %pstate
2993#ifdef TRAPS_USE_IG
2994	wrpr	%g0, PSTATE_KERN|PSTATE_IG, %pstate	! DEBUG
2995#endif
2996	mov	%g6, %sp
2997	done
2998
2999/*
3000 * Each memory data access fault, from user or kernel mode,
3001 * comes here.
3002 *
3003 * We will assume that %pil is not lost so we won't bother to save it
3004 * unless we're in an interrupt handler.
3005 *
3006 * On entry:
3007 *	We are on one of the alternate set of globals
3008 *	%g1 = MMU tag target
3009 *	%g2 = %tl
3010 *
3011 * On return:
3012 *
3013 */
3014datafault:
3015	wrpr	%g0, PSTATE_KERN|PSTATE_AG, %pstate	! We need to save volatile stuff to AG regs
3016#ifdef TRAPS_USE_IG
3017	wrpr	%g0, PSTATE_KERN|PSTATE_IG, %pstate	! We need to save volatile stuff to AG regs
3018#endif
3019#ifdef DEBUG
3020	set	DATA_START, %g7				! debug
3021	set	0x20, %g6				! debug
3022	stx	%g0, [%g7]				! debug
3023	stb	%g6, [%g7 + 0x20]			! debug
3024	CHKPT(%g4,%g7,0xf)
3025#endif
3026	wr	%g0, ASI_DMMU, %asi			! We need to re-load trap info
3027	ldxa	[%g0 + TLB_TAG_ACCESS] %asi, %g1	! Get fault address from tag access register
3028	ldxa	[SFAR] %asi, %g2			! sync virt addr; must be read first
3029	ldxa	[SFSR] %asi, %g3			! get sync fault status register
3030	stxa	%g0, [SFSR] %asi			! Clear out fault now
3031	membar	#Sync					! No real reason for this XXXX
3032
3033	TRAP_SETUP(-CC64FSZ-TF_SIZE)
3034Ldatafault_internal:
3035	INCR(_C_LABEL(uvmexp)+V_FAULTS)			! cnt.v_faults++ (clobbers %o0,%o1,%o2) should not fault
3036!	ldx	[%sp + CC64FSZ + STKB + TF_FAULT], %g1		! DEBUG make sure this has not changed
3037	mov	%g1, %o0				! Move these to the out regs so we can save the globals
3038	mov	%g2, %o4
3039	mov	%g3, %o5
3040
3041	ldxa	[%g0] ASI_AFAR, %o2			! get async fault address
3042	ldxa	[%g0] ASI_AFSR, %o3			! get async fault status
3043	mov	-1, %g7
3044	stxa	%g7, [%g0] ASI_AFSR			! And clear this out, too
3045	membar	#Sync					! No real reason for this XXXX
3046
3047#ifdef TRAPTRACE
3048	rdpr	%tt, %o1				! find out what trap brought us here
3049	wrpr	%g0, 0x69, %tt	! We claim to be trap type 69, not a valid trap
3050	TRACEME
3051	wrpr	%g0, PSTATE_KERN, %pstate		! Get back to normal globals
3052
3053	stx	%g1, [%sp + CC64FSZ + STKB + TF_G + (1*8)]	! save g1
3054#else
3055	wrpr	%g0, PSTATE_KERN, %pstate		! Get back to normal globals
3056
3057	stx	%g1, [%sp + CC64FSZ + STKB + TF_G + (1*8)]	! save g1
3058	rdpr	%tt, %o1				! find out what trap brought us here
3059#endif
3060	stx	%g2, [%sp + CC64FSZ + STKB + TF_G + (2*8)]	! save g2
3061	rdpr	%tstate, %g1
3062	stx	%g3, [%sp + CC64FSZ + STKB + TF_G + (3*8)]	! (sneak g3 in here)
3063	rdpr	%tpc, %g2
3064	stx	%g4, [%sp + CC64FSZ + STKB + TF_G + (4*8)]	! sneak in g4
3065	rdpr	%tnpc, %g3
3066	stx	%g5, [%sp + CC64FSZ + STKB + TF_G + (5*8)]	! sneak in g5
3067	rd	%y, %g4					! save y
3068	stx	%g6, [%sp + CC64FSZ + STKB + TF_G + (6*8)]	! sneak in g6
3069	mov	%g2, %o7				! Make the fault address look like the return address
3070	stx	%g7, [%sp + CC64FSZ + STKB + TF_G + (7*8)]	! sneak in g7
3071
3072#ifdef DEBUG
3073	set	DATA_START, %g7				! debug
3074	set	0x21, %g6				! debug
3075	stb	%g6, [%g7 + 0x20]			! debug
3076#endif
3077	sth	%o1, [%sp + CC64FSZ + STKB + TF_TT]
3078	stx	%g1, [%sp + CC64FSZ + STKB + TF_TSTATE]		! set tf.tf_psr, tf.tf_pc
3079	stx	%g2, [%sp + CC64FSZ + STKB + TF_PC]		! set tf.tf_npc
3080	stx	%g3, [%sp + CC64FSZ + STKB + TF_NPC]
3081
3082	rdpr	%pil, %g5
3083	stb	%g5, [%sp + CC64FSZ + STKB + TF_PIL]
3084	stb	%g5, [%sp + CC64FSZ + STKB + TF_OLDPIL]
3085
3086#if 1
3087	rdpr	%tl, %g7
3088	dec	%g7
3089	movrlz	%g7, %g0, %g7
3090	CHKPT(%g1,%g3,0x21)
3091	wrpr	%g0, %g7, %tl		! Revert to kernel mode
3092#else
3093	CHKPT(%g1,%g3,0x21)
3094	wrpr	%g0, 0, %tl		! Revert to kernel mode
3095#endif
3096	/* Finish stackframe, call C trap handler */
3097	flushw						! Get this clean so we won't take any more user faults
3098#ifdef NOTDEF_DEBUG
3099	set	CPCB, %o7
3100	LDPTR	[%o7], %o7
3101	ldub	[%o7 + PCB_NSAVED], %o7
3102	brz,pt	%o7, 2f
3103	 nop
3104	save	%sp, -CC64FSZ, %sp
3105	set	1f, %o0
3106	call printf
3107	 mov	%i7, %o1
3108	ta	1; nop
3109	 restore
3110	.data
31111:	.asciz	"datafault: nsaved = %d\n"
3112	_ALIGN
3113	.text
31142:
3115#endif
3116	!! In the EMBEDANY memory model %g4 points to the start of the data segment.
3117	!! In our case we need to clear it before calling any C-code
3118	clr	%g4
3119
3120	/*
3121	 * Right now the registers have the following values:
3122	 *
3123	 *	%o0 -- MMU_TAG_ACCESS
3124	 *	%o1 -- TT
3125	 *	%o2 -- afar
3126	 *	%o3 -- afsr
3127	 *	%o4 -- sfar
3128	 *	%o5 -- sfsr
3129	 */
3130
3131	cmp	%o1, T_DATA_ERROR
3132	st	%g4, [%sp + CC64FSZ + STKB + TF_Y]
3133	wr	%g0, ASI_PRIMARY_NOFAULT, %asi	! Restore default ASI
3134	be,pn	%icc, data_error
3135	 wrpr	%g0, PSTATE_INTR, %pstate	! reenable interrupts
3136
3137	mov	%o0, %o3			! (argument: trap address)
3138	mov	%g2, %o2			! (argument: trap pc)
3139	call	_C_LABEL(data_access_fault)	! data_access_fault(&tf, type,
3140						!	pc, addr, sfva, sfsr)
3141	 add	%sp, CC64FSZ + STKB, %o0	! (argument: &tf)
3142
3143data_recover:
3144	CHKPT(%o1,%o2,1)
3145	wrpr	%g0, PSTATE_KERN, %pstate		! disable interrupts
3146#ifdef TRAPSTATS
3147	set	_C_LABEL(uintrcnt), %g1
3148	stw	%g0, [%g1]
3149	set	_C_LABEL(iveccnt), %g1
3150	stw	%g0, [%g1]
3151#endif
3152	b	return_from_trap			! go return
3153	 ldx	[%sp + CC64FSZ + STKB + TF_TSTATE], %g1		! Load this for return_from_trap
3154	NOTREACHED
3155
3156data_error:
3157	call	_C_LABEL(data_access_error)	! data_access_error(&tf, type,
3158						!	afva, afsr, sfva, sfsr)
3159	 add	%sp, CC64FSZ + STKB, %o0	! (argument: &tf)
3160	ba	data_recover
3161	 nop
3162	NOTREACHED
3163
3164/*
3165 * Each memory instruction access fault from a fast access handler comes here.
3166 * We will quickly check if this is an original prom mapping before going
3167 * to the generic fault handler
3168 *
3169 * We will assume that %pil is not lost so we won't bother to save it
3170 * unless we're in an interrupt handler.
3171 *
3172 * On entry:
3173 *	We are on one of the alternate set of globals
3174 *	%g1 = MMU tag target
3175 *	%g2 = TSB entry ptr
3176 *	%g3 = TLB Tag Access
3177 *
3178 * On return:
3179 *
3180 */
3181
3182	ICACHE_ALIGN
3183instr_miss:
3184#ifdef TRAPSTATS
3185	set	_C_LABEL(ktmiss), %g3
3186	set	_C_LABEL(utmiss), %g4
3187	rdpr	%tl, %g6
3188	dec	%g6
3189	movrz	%g6, %g4, %g3
3190	lduw	[%g3], %g4
3191	inc	%g4
3192	stw	%g4, [%g3]
3193#endif
3194	mov	TLB_TAG_ACCESS, %g3			! Get real fault page
3195	sethi	%hi(0x1fff), %g7			! 8K context mask
3196	ldxa	[%g3] ASI_IMMU, %g3			! from tag access register
3197	sethi	%hi(_C_LABEL(ctxbusy)), %g4
3198	or	%g7, %lo(0x1fff), %g7
3199	LDPTR	[%g4 + %lo(_C_LABEL(ctxbusy))], %g4
3200	srax	%g3, HOLESHIFT, %g5			! Check for valid address
3201	and	%g3, %g7, %g6				! Isolate context
3202	sllx	%g6, 3, %g6				! Make it into an offset into ctxbusy
3203	inc	%g5					! (0 or -1) -> (1 or 0)
3204
3205	ldx	[%g4+%g6], %g4				! Load up our page table.
3206#ifdef DEBUG
3207	/* Make sure we don't try to replace a kernel translation */
3208	/* This should not be necessary */
3209	brnz,pt	%g6, 1f					! If user context continue miss
3210	sethi	%hi(KERNBASE), %g7			! Don't need %lo
3211	set	0x0800000, %g6				! 8MB
3212	sub	%g3, %g7, %g7
3213	cmp	%g7, %g6
3214	mov	6, %g6		! debug
3215	sethi	%hi(DATA_START), %g7
3216	stb	%g6, [%g7+0x30]	! debug
3217	tlu	%xcc, 1; nop
3218	blu,pn	%xcc, textfault				! Next insn in delay slot is unimportant
3219	 mov	7, %g6		! debug
3220	stb	%g6, [%g7+0x30]	! debug
32211:
3222#endif
3223	srlx	%g3, STSHIFT, %g6
3224	cmp	%g5, 1
3225	bgu,pn %xcc, textfault				! Error!
3226	 srlx	%g3, PDSHIFT, %g5
3227	and	%g6, STMASK, %g6
3228	sll	%g6, 3, %g6
3229	and	%g5, PDMASK, %g5
3230	nop
3231
3232	sll	%g5, 3, %g5
3233	add	%g6, %g4, %g4
3234	ldxa	[%g4] ASI_PHYS_CACHED, %g4
3235	srlx	%g3, PTSHIFT, %g6			! Convert to ptab offset
3236	and	%g6, PTMASK, %g6
3237	add	%g5, %g4, %g5
3238	brz,pn	%g4, textfault				! NULL entry? check somewhere else
3239	 nop
3240
3241	ldxa	[%g5] ASI_PHYS_CACHED, %g4
3242	sll	%g6, 3, %g6
3243	brz,pn	%g4, textfault				! NULL entry? check somewhere else
3244	 add	%g6, %g4, %g6
32451:
3246	ldxa	[%g6] ASI_PHYS_CACHED, %g4
3247	brgez,pn %g4, textfault
3248	 or	%g4, TTE_ACCESS, %g7			! Update accessed bit
3249	btst	TTE_ACCESS, %g4				! Need to update access git?
3250	bne,pt	%xcc, 1f
3251	 nop
3252	casxa	[%g6] ASI_PHYS_CACHED, %g4, %g7		!  and store it
3253	cmp	%g4, %g7
3254	bne,pn	%xcc, 1b
3255	 or	%g4, TTE_ACCESS, %g4			! Update accessed bit
32561:
3257	stx	%g1, [%g2]				! Update TSB entry tag
3258	stx	%g4, [%g2+8]				! Update TSB entry data
3259#ifdef DEBUG
3260	set	DATA_START, %g6	! debug
3261	stx	%g3, [%g6+8]	! debug
3262	set	0xaa, %g3	! debug
3263	stx	%g4, [%g6]	! debug -- what we tried to enter in TLB
3264	stb	%g3, [%g6+0x20]	! debug
3265#endif
3266#if 0
3267	/* This was a miss -- should be nothing to demap. */
3268	sllx	%g3, (64-13), %g6			! Need to demap old entry first
3269	mov	DEMAP_PAGE_SECONDARY, %g1		! Secondary flush
3270	mov	DEMAP_PAGE_NUCLEUS, %g5			! Nucleus flush
3271	movrz	%g6, %g5, %g1				! Pick one
3272	andn	%g3, 0xfff, %g6
3273	or	%g6, %g1, %g6
3274	stxa	%g6, [%g6] ASI_DMMU_DEMAP		! Do the demap
3275	membar	#Sync					! No real reason for this XXXX
3276#endif
3277	stxa	%g4, [%g0] ASI_IMMU_DATA_IN		! Enter new mapping
3278	membar	#Sync
3279	CLRTT
3280	retry
3281	NOTREACHED
3282	!!
3283	!!  Check our prom mappings -- temporary
3284	!!
3285
3286/*
3287 * Each memory text access fault, from user or kernel mode,
3288 * comes here.
3289 *
3290 * We will assume that %pil is not lost so we won't bother to save it
3291 * unless we're in an interrupt handler.
3292 *
3293 * On entry:
3294 *	We are on one of the alternate set of globals
3295 *	%g1 = MMU tag target
3296 *	%g2 = %tl
3297 *	%g3 = %tl - 1
3298 *
3299 * On return:
3300 *
3301 */
3302
3303
3304textfault:
3305	wrpr	%g0, PSTATE_KERN|PSTATE_AG, %pstate	! We need to save volatile stuff to AG regs
3306#ifdef TRAPS_USE_IG
3307	wrpr	%g0, PSTATE_KERN|PSTATE_IG, %pstate	! We need to save volatile stuff to AG regs
3308#endif
3309	wr	%g0, ASI_IMMU, %asi
3310	ldxa	[%g0 + TLB_TAG_ACCESS] %asi, %g1	! Get fault address from tag access register
3311	ldxa	[SFSR] %asi, %g3			! get sync fault status register
3312	membar	#LoadStore
3313	stxa	%g0, [SFSR] %asi			! Clear out old info
3314	membar	#Sync					! No real reason for this XXXX
3315
3316	TRAP_SETUP(-CC64FSZ-TF_SIZE)
3317	INCR(_C_LABEL(uvmexp)+V_FAULTS)			! cnt.v_faults++ (clobbers %o0,%o1,%o2)
3318
3319	mov	%g3, %o3
3320
3321	wrpr	%g0, PSTATE_KERN, %pstate		! Switch to normal globals
3322	ldxa	[%g0] ASI_AFSR, %o4			! get async fault status
3323	ldxa	[%g0] ASI_AFAR, %o5			! get async fault address
3324	mov	-1, %o0
3325	stxa	%o0, [%g0] ASI_AFSR			! Clear this out
3326	membar	#Sync					! No real reason for this XXXX
3327	stx	%g1, [%sp + CC64FSZ + STKB + TF_G + (1*8)]	! save g1
3328	stx	%g2, [%sp + CC64FSZ + STKB + TF_G + (2*8)]	! save g2
3329	stx	%g3, [%sp + CC64FSZ + STKB + TF_G + (3*8)]	! (sneak g3 in here)
3330	rdpr	%tt, %o1				! Find out what caused this trap
3331	stx	%g4, [%sp + CC64FSZ + STKB + TF_G + (4*8)]	! sneak in g4
3332	rdpr	%tstate, %g1
3333	stx	%g5, [%sp + CC64FSZ + STKB + TF_G + (5*8)]	! sneak in g5
3334	rdpr	%tpc, %o2				! sync virt addr; must be read first
3335	stx	%g6, [%sp + CC64FSZ + STKB + TF_G + (6*8)]	! sneak in g6
3336	rdpr	%tnpc, %g3
3337	stx	%g7, [%sp + CC64FSZ + STKB + TF_G + (7*8)]	! sneak in g7
3338	rd	%y, %g7					! save y
3339
3340	/* Finish stackframe, call C trap handler */
3341	stx	%g1, [%sp + CC64FSZ + STKB + TF_TSTATE]		! set tf.tf_psr, tf.tf_pc
3342	sth	%o1, [%sp + CC64FSZ + STKB + TF_TT]! debug
3343
3344	stx	%o2, [%sp + CC64FSZ + STKB + TF_PC]
3345	stx	%g3, [%sp + CC64FSZ + STKB + TF_NPC]		! set tf.tf_npc
3346
3347	rdpr	%pil, %g5
3348	stb	%g5, [%sp + CC64FSZ + STKB + TF_PIL]
3349	stb	%g5, [%sp + CC64FSZ + STKB + TF_OLDPIL]
3350
3351	rdpr	%tl, %g7
3352	dec	%g7
3353	movrlz	%g7, %g0, %g7
3354	CHKPT(%g1,%g3,0x22)
3355	wrpr	%g0, %g7, %tl		! Revert to kernel mode
3356
3357	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
3358	flushw						! Get rid of any user windows so we don't deadlock
3359
3360	!! In the EMBEDANY memory model %g4 points to the start of the data segment.
3361	!! In our case we need to clear it before calling any C-code
3362	clr	%g4
3363
3364	/* Use trap type to see what handler to call */
3365	cmp	%o1, T_INST_ERROR
3366	be,pn	%xcc, text_error
3367	 st	%g7, [%sp + CC64FSZ + STKB + TF_Y]		! set tf.tf_y
3368
3369	wrpr	%g0, PSTATE_INTR, %pstate	! reenable interrupts
3370	call	_C_LABEL(text_access_fault)	! mem_access_fault(&tf, type, pc, sfsr)
3371	 add	%sp, CC64FSZ + STKB, %o0	! (argument: &tf)
3372text_recover:
3373	CHKPT(%o1,%o2,2)
3374	wrpr	%g0, PSTATE_KERN, %pstate	! disable interrupts
3375	b	return_from_trap		! go return
3376	 ldx	[%sp + CC64FSZ + STKB + TF_TSTATE], %g1	! Load this for return_from_trap
3377	NOTREACHED
3378
3379text_error:
3380	wrpr	%g0, PSTATE_INTR, %pstate	! reenable interrupts
3381	call	_C_LABEL(text_access_error)	! mem_access_fault(&tfm type, sfva [pc], sfsr,
3382						!		afva, afsr);
3383	 add	%sp, CC64FSZ + STKB, %o0	! (argument: &tf)
3384	ba	text_recover
3385	 nop
3386	NOTREACHED
3387
3388/*
3389 * fp_exception has to check to see if we are trying to save
3390 * the FP state, and if so, continue to save the FP state.
3391 *
3392 * We do not even bother checking to see if we were in kernel mode,
3393 * since users have no access to the special_fp_store instruction.
3394 *
3395 * This whole idea was stolen from Sprite.
3396 */
3397/*
3398 * XXX I don't think this is at all revelent for V9.
3399 */
3400fp_exception:
3401	rdpr	%tpc, %g1
3402	set	special_fp_store, %g4	! see if we came from the special one
3403	cmp	%g1, %g4		! pc == special_fp_store?
3404	bne	slowtrap		! no, go handle per usual
3405	 sethi	%hi(savefpcont), %g4	! yes, "return" to the special code
3406	or	%lo(savefpcont), %g4, %g4
3407	wrpr	%g0, %g4, %tnpc
3408	 done
3409	NOTREACHED
3410
3411
3412/*
3413 * We're here because we took an alignment fault in NUCLEUS context.
3414 * This could be a kernel bug or it could be due to saving a user
3415 * window to an invalid stack pointer.  If the latter is the case,
3416 * we should emulate the save by storing all the user register windows
3417 * to the PCB and returning.
3418 */
3419checkalign:
3420	rdpr	%tl, %g2
3421	subcc	%g2, 1, %g1
3422	bneg,pn	%icc, slowtrap		! Huh?
3423	 sethi	%hi(CPCB), %g6		! get current pcb
3424
3425	wrpr	%g1, 0, %tl
3426	rdpr	%tt, %g7
3427	rdpr	%tstate, %g4
3428	andn	%g7, 0x3f, %g5
3429	cmp	%g5, 0x080		!   window spill traps are all 0b 0000 10xx xxxx
3430	bne,a,pn	%icc, slowtrap
3431	 wrpr	%g1, 0, %tl		! Revert TL  XXX wrpr in a delay slot...
3432
3433#ifdef DEBUG
3434	cmp	%g7, 0x34		! If we took a datafault just before this trap
3435	bne,pt	%icc, checkalignspill	! our stack's probably bad so we need to switch somewhere else
3436	 nop
3437
3438	!!
3439	!! Double data fault -- bad stack?
3440	!!
3441	wrpr	%g2, %tl	! Restore trap level.
3442	sir			! Just issue a reset and don't try to recover.
3443	mov	%fp, %l6		! Save the frame pointer
3444	set	EINTSTACK+USPACE+CC64FSZ-STKB, %fp ! Set the frame pointer to the middle of the idle stack
3445	add	%fp, -CC64FSZ, %sp	! Create a stackframe
3446	wrpr	%g0, 15, %pil		! Disable interrupts, too
3447	wrpr	%g0, %g0, %canrestore	! Our stack is hozed and our PCB
3448	wrpr	%g0, 7, %cansave	!  probably is too, so blow away
3449	ba	slowtrap		!  all our register windows.
3450	 wrpr	%g0, 0x101, %tt
3451#endif
3452checkalignspill:
3453	wr	%g0, ASI_DMMU, %asi			! We need to re-load trap info
3454	ldxa	[SFSR] %asi, %g3			! get sync fault status register
3455	stxa	%g0, [SFSR] %asi			! Clear out fault now
3456	membar	#Sync					! No real reason for this XXXX
3457	/*
3458	 * Here we just jump to winfixspill and let it take care of
3459	 * saving the windows.
3460	 */
3461	ba,pt	%icc, winfixspill	! Continue with the winfix
3462	 orcc	%g0, %g0, %g0		! Make sure we compare to zero
3463
3464/*
3465 * slowtrap() builds a trap frame and calls trap().
3466 * This is called `slowtrap' because it *is*....
3467 * We have to build a full frame for ptrace(), for instance.
3468 *
3469 * Registers:
3470 *
3471 */
3472slowtrap:
3473#ifdef TRAPS_USE_IG
3474	wrpr	%g0, PSTATE_KERN|PSTATE_IG, %pstate	! DEBUG
3475#endif
3476#ifdef DIAGNOSTIC
3477	/* Make sure kernel stack is aligned */
3478	btst	0x03, %sp		! 32-bit stack OK?
3479	 and	%sp, 0x07, %g4		! 64-bit stack OK?
3480	bz,pt	%icc, 1f
3481	cmp	%g4, 0x1		! Must end in 0b001
3482	be,pt	%icc, 1f
3483	 rdpr	%wstate, %g4
3484	cmp	%g7, WSTATE_KERN
3485	bnz,pt	%icc, 1f		! User stack -- we'll blow it away
3486	 nop
3487#ifdef DEBUG
3488	set	panicstack, %sp		! Kernel stack corrupt -- use panicstack
3489#else
3490	set	estack0, %sp
3491	LDPTR	[%sp], %sp
3492	add	%sp, -CC64FSZ-STKB, %sp	! Overwrite proc 0's stack.
3493#endif
34941:
3495#endif
3496	rdpr	%tt, %g4
3497	rdpr	%tstate, %g1
3498	rdpr	%tpc, %g2
3499	rdpr	%tnpc, %g3
3500
3501	TRAP_SETUP(-CC64FSZ-TF_SIZE)
3502Lslowtrap_reenter:
3503	stx	%g1, [%sp + CC64FSZ + STKB + TF_TSTATE]
3504	mov	%g4, %o1		! (type)
3505	stx	%g2, [%sp + CC64FSZ + STKB + TF_PC]
3506	rd	%y, %g5
3507	stx	%g3, [%sp + CC64FSZ + STKB + TF_NPC]
3508	mov	%g1, %o3		! (pstate)
3509	st	%g5, [%sp + CC64FSZ + STKB + TF_Y]
3510	mov	%g2, %o2		! (pc)
3511	sth	%o1, [%sp + CC64FSZ + STKB + TF_TT]! debug
3512
3513	wrpr	%g0, PSTATE_KERN, %pstate		! Get back to normal globals
3514	stx	%g1, [%sp + CC64FSZ + STKB + TF_G + (1*8)]
3515	stx	%g2, [%sp + CC64FSZ + STKB + TF_G + (2*8)]
3516	add	%sp, CC64FSZ + STKB, %o0		! (&tf)
3517	stx	%g3, [%sp + CC64FSZ + STKB + TF_G + (3*8)]
3518	stx	%g4, [%sp + CC64FSZ + STKB + TF_G + (4*8)]
3519	stx	%g5, [%sp + CC64FSZ + STKB + TF_G + (5*8)]
3520	rdpr	%pil, %g5
3521	stx	%g6, [%sp + CC64FSZ + STKB + TF_G + (6*8)]
3522	stx	%g7, [%sp + CC64FSZ + STKB + TF_G + (7*8)]
3523	stb	%g5, [%sp + CC64FSZ + STKB + TF_PIL]
3524	stb	%g5, [%sp + CC64FSZ + STKB + TF_OLDPIL]
3525	/*
3526	 * Phew, ready to enable traps and call C code.
3527	 */
3528	rdpr	%tl, %g1
3529	dec	%g1
3530	movrlz	%g1, %g0, %g1
3531	CHKPT(%g2,%g3,0x24)
3532	wrpr	%g0, %g1, %tl		! Revert to kernel mode
3533	!! In the EMBEDANY memory model %g4 points to the start of the data segment.
3534	!! In our case we need to clear it before calling any C-code
3535	clr	%g4
3536
3537	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
3538	wrpr	%g0, PSTATE_INTR, %pstate	! traps on again
3539	call	_C_LABEL(trap)			! trap(tf, type, pc, pstate)
3540	 nop
3541
3542	CHKPT(%o1,%o2,3)
3543	ba,a,pt	%icc, return_from_trap
3544	 nop
3545	NOTREACHED
3546#if 1
3547/*
3548 * This code is no longer needed.
3549 */
3550/*
3551 * Do a `software' trap by re-entering the trap code, possibly first
3552 * switching from interrupt stack to kernel stack.  This is used for
3553 * scheduling and signal ASTs (which generally occur from softclock or
3554 * tty or net interrupts).
3555 *
3556 * We enter with the trap type in %g1.  All we have to do is jump to
3557 * Lslowtrap_reenter above, but maybe after switching stacks....
3558 *
3559 * We should be running alternate globals.  The normal globals and
3560 * out registers were just loaded from the old trap frame.
3561 *
3562 *	Input Params:
3563 *	%g1 = tstate
3564 *	%g2 = tpc
3565 *	%g3 = tnpc
3566 *	%g4 = tt == T_AST
3567 */
3568softtrap:
3569	sethi	%hi(EINTSTACK-STKB), %g5
3570	sethi	%hi(EINTSTACK-INTSTACK), %g7
3571	or	%g5, %lo(EINTSTACK-STKB), %g5
3572	dec	%g7
3573	sub	%g5, %g6, %g5
3574	sethi	%hi(CPCB), %g6
3575	andncc	%g5, %g7, %g0
3576	bnz,pt	%xcc, Lslowtrap_reenter
3577	 LDPTR	[%g6 + %lo(CPCB)], %g7
3578	set	USPACE-CC64FSZ-TF_SIZE-STKB, %g5
3579	add	%g7, %g5, %g6
3580	SET_SP_REDZONE(%g7, %g5)
3581#ifdef DEBUG
3582	stx	%g1, [%g6 + CC64FSZ + STKB + TF_FAULT]		! Generate a new trapframe
3583#endif
3584	stx	%i0, [%g6 + CC64FSZ + STKB + TF_O + (0*8)]	!	but don't bother with
3585	stx	%i1, [%g6 + CC64FSZ + STKB + TF_O + (1*8)]	!	locals and ins
3586	stx	%i2, [%g6 + CC64FSZ + STKB + TF_O + (2*8)]
3587	stx	%i3, [%g6 + CC64FSZ + STKB + TF_O + (3*8)]
3588	stx	%i4, [%g6 + CC64FSZ + STKB + TF_O + (4*8)]
3589	stx	%i5, [%g6 + CC64FSZ + STKB + TF_O + (5*8)]
3590	stx	%i6, [%g6 + CC64FSZ + STKB + TF_O + (6*8)]
3591	stx	%i7, [%g6 + CC64FSZ + STKB + TF_O + (7*8)]
3592#ifdef DEBUG
3593	ldx	[%sp + CC64FSZ + STKB + TF_I + (0*8)], %l0	! Copy over the rest of the regs
3594	ldx	[%sp + CC64FSZ + STKB + TF_I + (1*8)], %l1	! But just dirty the locals
3595	ldx	[%sp + CC64FSZ + STKB + TF_I + (2*8)], %l2
3596	ldx	[%sp + CC64FSZ + STKB + TF_I + (3*8)], %l3
3597	ldx	[%sp + CC64FSZ + STKB + TF_I + (4*8)], %l4
3598	ldx	[%sp + CC64FSZ + STKB + TF_I + (5*8)], %l5
3599	ldx	[%sp + CC64FSZ + STKB + TF_I + (6*8)], %l6
3600	ldx	[%sp + CC64FSZ + STKB + TF_I + (7*8)], %l7
3601	stx	%l0, [%g6 + CC64FSZ + STKB + TF_I + (0*8)]
3602	stx	%l1, [%g6 + CC64FSZ + STKB + TF_I + (1*8)]
3603	stx	%l2, [%g6 + CC64FSZ + STKB + TF_I + (2*8)]
3604	stx	%l3, [%g6 + CC64FSZ + STKB + TF_I + (3*8)]
3605	stx	%l4, [%g6 + CC64FSZ + STKB + TF_I + (4*8)]
3606	stx	%l5, [%g6 + CC64FSZ + STKB + TF_I + (5*8)]
3607	stx	%l6, [%g6 + CC64FSZ + STKB + TF_I + (6*8)]
3608	stx	%l7, [%g6 + CC64FSZ + STKB + TF_I + (7*8)]
3609	ldx	[%sp + CC64FSZ + STKB + TF_L + (0*8)], %l0
3610	ldx	[%sp + CC64FSZ + STKB + TF_L + (1*8)], %l1
3611	ldx	[%sp + CC64FSZ + STKB + TF_L + (2*8)], %l2
3612	ldx	[%sp + CC64FSZ + STKB + TF_L + (3*8)], %l3
3613	ldx	[%sp + CC64FSZ + STKB + TF_L + (4*8)], %l4
3614	ldx	[%sp + CC64FSZ + STKB + TF_L + (5*8)], %l5
3615	ldx	[%sp + CC64FSZ + STKB + TF_L + (6*8)], %l6
3616	ldx	[%sp + CC64FSZ + STKB + TF_L + (7*8)], %l7
3617	stx	%l0, [%g6 + CC64FSZ + STKB + TF_L + (0*8)]
3618	stx	%l1, [%g6 + CC64FSZ + STKB + TF_L + (1*8)]
3619	stx	%l2, [%g6 + CC64FSZ + STKB + TF_L + (2*8)]
3620	stx	%l3, [%g6 + CC64FSZ + STKB + TF_L + (3*8)]
3621	stx	%l4, [%g6 + CC64FSZ + STKB + TF_L + (4*8)]
3622	stx	%l5, [%g6 + CC64FSZ + STKB + TF_L + (5*8)]
3623	stx	%l6, [%g6 + CC64FSZ + STKB + TF_L + (6*8)]
3624	stx	%l7, [%g6 + CC64FSZ + STKB + TF_L + (7*8)]
3625#endif
3626	ba,pt	%xcc, Lslowtrap_reenter
3627	 mov	%g6, %sp
3628#endif
3629
3630#if 0
3631/*
3632 * breakpoint:	capture as much info as possible and then call DDB
3633 * or trap, as the case may be.
3634 *
3635 * First, we switch to interrupt globals, and blow away %g7.  Then
3636 * switch down one stackframe -- just fiddle w/cwp, don't save or
3637 * we'll trap.  Then slowly save all the globals into our static
3638 * register buffer.  etc. etc.
3639 */
3640
3641breakpoint:
3642	wrpr	%g0, PSTATE_KERN|PSTATE_IG, %pstate	! Get IG to use
3643	rdpr	%cwp, %g7
3644	inc	1, %g7					! Equivalent of save
3645	wrpr	%g7, 0, %cwp				! Now we have some unused locals to fiddle with
3646	set	_C_LABEL(ddb_regs), %l0
3647	stx	%g1, [%l0+DBR_IG+(1*8)]			! Save IGs
3648	stx	%g2, [%l0+DBR_IG+(2*8)]
3649	stx	%g3, [%l0+DBR_IG+(3*8)]
3650	stx	%g4, [%l0+DBR_IG+(4*8)]
3651	stx	%g5, [%l0+DBR_IG+(5*8)]
3652	stx	%g6, [%l0+DBR_IG+(6*8)]
3653	stx	%g7, [%l0+DBR_IG+(7*8)]
3654	wrpr	%g0, PSTATE_KERN|PSTATE_MG, %pstate	! Get MG to use
3655	stx	%g1, [%l0+DBR_MG+(1*8)]			! Save MGs
3656	stx	%g2, [%l0+DBR_MG+(2*8)]
3657	stx	%g3, [%l0+DBR_MG+(3*8)]
3658	stx	%g4, [%l0+DBR_MG+(4*8)]
3659	stx	%g5, [%l0+DBR_MG+(5*8)]
3660	stx	%g6, [%l0+DBR_MG+(6*8)]
3661	stx	%g7, [%l0+DBR_MG+(7*8)]
3662	wrpr	%g0, PSTATE_KERN|PSTATE_AG, %pstate	! Get AG to use
3663	stx	%g1, [%l0+DBR_AG+(1*8)]			! Save AGs
3664	stx	%g2, [%l0+DBR_AG+(2*8)]
3665	stx	%g3, [%l0+DBR_AG+(3*8)]
3666	stx	%g4, [%l0+DBR_AG+(4*8)]
3667	stx	%g5, [%l0+DBR_AG+(5*8)]
3668	stx	%g6, [%l0+DBR_AG+(6*8)]
3669	stx	%g7, [%l0+DBR_AG+(7*8)]
3670	wrpr	%g0, PSTATE_KERN, %pstate	! Get G to use
3671	stx	%g1, [%l0+DBR_G+(1*8)]			! Save Gs
3672	stx	%g2, [%l0+DBR_G+(2*8)]
3673	stx	%g3, [%l0+DBR_G+(3*8)]
3674	stx	%g4, [%l0+DBR_G+(4*8)]
3675	stx	%g5, [%l0+DBR_G+(5*8)]
3676	stx	%g6, [%l0+DBR_G+(6*8)]
3677	stx	%g7, [%l0+DBR_G+(7*8)]
3678	rdpr	%canrestore, %l1
3679	stb	%l1, [%l0+DBR_CANRESTORE]
3680	rdpr	%cansave, %l2
3681	stb	%l2, [%l0+DBR_CANSAVE]
3682	rdpr	%cleanwin, %l3
3683	stb	%l3, [%l0+DBR_CLEANWIN]
3684	rdpr	%wstate, %l4
3685	stb	%l4, [%l0+DBR_WSTATE]
3686	rd	%y, %l5
3687	stw	%l5, [%l0+DBR_Y]
3688	rdpr	%tl, %l6
3689	stb	%l6, [%l0+DBR_TL]
3690	dec	1, %g7
3691#endif
3692
3693/*
3694 * I will not touch any of the DDB or KGDB stuff until I know what's going
3695 * on with the symbol table.  This is all still v7/v8 code and needs to be fixed.
3696 */
3697#ifdef KGDB
3698/*
3699 * bpt is entered on all breakpoint traps.
3700 * If this is a kernel breakpoint, we do not want to call trap().
3701 * Among other reasons, this way we can set breakpoints in trap().
3702 */
3703bpt:
3704	set	TSTATE_PRIV, %l4
3705	andcc	%l4, %l0, %g0		! breakpoint from kernel?
3706	bz	slowtrap		! no, go do regular trap
3707	 nop
3708
3709	/*
3710	 * Build a trap frame for kgdb_trap_glue to copy.
3711	 * Enable traps but set ipl high so that we will not
3712	 * see interrupts from within breakpoints.
3713	 */
3714	save	%sp, -CCFSZ-TF_SIZE, %sp		! allocate a trap frame
3715	TRAP_SETUP(-CCFSZ-TF_SIZE)
3716	or	%l0, PSR_PIL, %l4	! splhigh()
3717	wr	%l4, 0, %psr		! the manual claims that this
3718	wr	%l4, PSR_ET, %psr	! song and dance is necessary
3719	std	%l0, [%sp + CCFSZ + 0]	! tf.tf_psr, tf.tf_pc
3720	mov	%l3, %o0		! trap type arg for kgdb_trap_glue
3721	rd	%y, %l3
3722	std	%l2, [%sp + CCFSZ + 8]	! tf.tf_npc, tf.tf_y
3723	rd	%wim, %l3
3724	st	%l3, [%sp + CCFSZ + 16]	! tf.tf_wim (a kgdb-only r/o field)
3725	st	%g1, [%sp + CCFSZ + 20]	! tf.tf_global[1]
3726	std	%g2, [%sp + CCFSZ + 24]	! etc
3727	std	%g4, [%sp + CCFSZ + 32]
3728	std	%g6, [%sp + CCFSZ + 40]
3729	std	%i0, [%sp + CCFSZ + 48]	! tf.tf_in[0..1]
3730	std	%i2, [%sp + CCFSZ + 56]	! etc
3731	std	%i4, [%sp + CCFSZ + 64]
3732	std	%i6, [%sp + CCFSZ + 72]
3733
3734	/*
3735	 * Now call kgdb_trap_glue(); if it returns, call trap().
3736	 */
3737	mov	%o0, %l3		! gotta save trap type
3738	call	_C_LABEL(kgdb_trap_glue)		! kgdb_trap_glue(type, &trapframe)
3739	 add	%sp, CCFSZ, %o1		! (&trapframe)
3740
3741	/*
3742	 * Use slowtrap to call trap---but first erase our tracks
3743	 * (put the registers back the way they were).
3744	 */
3745	mov	%l3, %o0		! slowtrap will need trap type
3746	ld	[%sp + CCFSZ + 12], %l3
3747	wr	%l3, 0, %y
3748	ld	[%sp + CCFSZ + 20], %g1
3749	ldd	[%sp + CCFSZ + 24], %g2
3750	ldd	[%sp + CCFSZ + 32], %g4
3751	b	Lslowtrap_reenter
3752	 ldd	[%sp + CCFSZ + 40], %g6
3753
3754/*
3755 * Enter kernel breakpoint.  Write all the windows (not including the
3756 * current window) into the stack, so that backtrace works.  Copy the
3757 * supplied trap frame to the kgdb stack and switch stacks.
3758 *
3759 * kgdb_trap_glue(type, tf0)
3760 *	int type;
3761 *	struct trapframe *tf0;
3762 */
3763	.globl	_C_LABEL(kgdb_trap_glue)
3764_C_LABEL(kgdb_trap_glue):
3765	save	%sp, -CCFSZ, %sp
3766
3767	flushw				! flush all windows
3768	mov	%sp, %l4		! %l4 = current %sp
3769
3770	/* copy trapframe to top of kgdb stack */
3771	set	_C_LABEL(kgdb_stack) + KGDB_STACK_SIZE - 80, %l0
3772					! %l0 = tfcopy -> end_of_kgdb_stack
3773	mov	80, %l1
37741:	ldd	[%i1], %l2
3775	inc	8, %i1
3776	deccc	8, %l1
3777	std	%l2, [%l0]
3778	bg	1b
3779	 inc	8, %l0
3780
3781#ifdef DEBUG
3782	/* save old red zone and then turn it off */
3783	sethi	%hi(_C_LABEL(redzone)), %l7
3784	ld	[%l7 + %lo(_C_LABEL(redzone))], %l6
3785	st	%g0, [%l7 + %lo(_C_LABEL(redzone))]
3786#endif
3787	/* switch to kgdb stack */
3788	add	%l0, -CCFSZ-TF_SIZE, %sp
3789
3790	/* if (kgdb_trap(type, tfcopy)) kgdb_rett(tfcopy); */
3791	mov	%i0, %o0
3792	call	_C_LABEL(kgdb_trap)
3793	add	%l0, -80, %o1
3794	tst	%o0
3795	bnz,a	kgdb_rett
3796	 add	%l0, -80, %g1
3797
3798	/*
3799	 * kgdb_trap() did not handle the trap at all so the stack is
3800	 * still intact.  A simple `restore' will put everything back,
3801	 * after we reset the stack pointer.
3802	 */
3803	mov	%l4, %sp
3804#ifdef DEBUG
3805	st	%l6, [%l7 + %lo(_C_LABEL(redzone))]	! restore red zone
3806#endif
3807	ret
3808	 restore
3809
3810/*
3811 * Return from kgdb trap.  This is sort of special.
3812 *
3813 * We know that kgdb_trap_glue wrote the window above it, so that we will
3814 * be able to (and are sure to have to) load it up.  We also know that we
3815 * came from kernel land and can assume that the %fp (%i6) we load here
3816 * is proper.  We must also be sure not to lower ipl (it is at splhigh())
3817 * until we have traps disabled, due to the SPARC taking traps at the
3818 * new ipl before noticing that PSR_ET has been turned off.  We are on
3819 * the kgdb stack, so this could be disastrous.
3820 *
3821 * Note that the trapframe argument in %g1 points into the current stack
3822 * frame (current window).  We abandon this window when we move %g1->tf_psr
3823 * into %psr, but we will not have loaded the new %sp yet, so again traps
3824 * must be disabled.
3825 */
3826kgdb_rett:
3827	rd	%psr, %g4		! turn off traps
3828	wr	%g4, PSR_ET, %psr
3829	/* use the three-instruction delay to do something useful */
3830	ld	[%g1], %g2		! pick up new %psr
3831	ld	[%g1 + 12], %g3		! set %y
3832	wr	%g3, 0, %y
3833#ifdef DEBUG
3834	st	%l6, [%l7 + %lo(_C_LABEL(redzone))] ! and restore red zone
3835#endif
3836	wr	%g0, 0, %wim		! enable window changes
3837	nop; nop; nop
3838	/* now safe to set the new psr (changes CWP, leaves traps disabled) */
3839	wr	%g2, 0, %psr		! set rett psr (including cond codes)
3840	/* 3 instruction delay before we can use the new window */
3841/*1*/	ldd	[%g1 + 24], %g2		! set new %g2, %g3
3842/*2*/	ldd	[%g1 + 32], %g4		! set new %g4, %g5
3843/*3*/	ldd	[%g1 + 40], %g6		! set new %g6, %g7
3844
3845	/* now we can use the new window */
3846	mov	%g1, %l4
3847	ld	[%l4 + 4], %l1		! get new pc
3848	ld	[%l4 + 8], %l2		! get new npc
3849	ld	[%l4 + 20], %g1		! set new %g1
3850
3851	/* set up returnee's out registers, including its %sp */
3852	ldd	[%l4 + 48], %i0
3853	ldd	[%l4 + 56], %i2
3854	ldd	[%l4 + 64], %i4
3855	ldd	[%l4 + 72], %i6
3856
3857	/* load returnee's window, making the window above it be invalid */
3858	restore
3859	restore	%g0, 1, %l1		! move to inval window and set %l1 = 1
3860	rd	%psr, %l0
3861	srl	%l1, %l0, %l1
3862	wr	%l1, 0, %wim		! %wim = 1 << (%psr & 31)
3863	sethi	%hi(CPCB), %l1
3864	LDPTR	[%l1 + %lo(CPCB)], %l1
3865	and	%l0, 31, %l0		! CWP = %psr & 31;
3866	st	%l0, [%l1 + PCB_WIM]	! cpcb->pcb_wim = CWP;
3867	save	%g0, %g0, %g0		! back to window to reload
3868	LOADWIN(%sp)
3869	save	%g0, %g0, %g0		! back to trap window
3870	/* note, we have not altered condition codes; safe to just rett */
3871	RETT
3872#endif
3873
3874/*
3875 * syscall_setup() builds a trap frame and calls syscall().
3876 * sun_syscall is same but delivers sun system call number
3877 * XXX	should not have to save&reload ALL the registers just for
3878 *	ptrace...
3879 */
3880syscall_setup:
3881#ifdef TRAPS_USE_IG
3882	wrpr	%g0, PSTATE_KERN|PSTATE_IG, %pstate	! DEBUG
3883#endif
3884	TRAP_SETUP(-CC64FSZ-TF_SIZE)
3885
3886#ifdef DEBUG
3887	rdpr	%tt, %o1	! debug
3888	sth	%o1, [%sp + CC64FSZ + STKB + TF_TT]! debug
3889#endif
3890
3891	wrpr	%g0, PSTATE_KERN, %pstate	! Get back to normal globals
3892	stx	%g1, [%sp + CC64FSZ + STKB + TF_G + ( 1*8)]
3893	mov	%g1, %o1			! code
3894	rdpr	%tpc, %o2			! (pc)
3895	stx	%g2, [%sp + CC64FSZ + STKB + TF_G + ( 2*8)]
3896	rdpr	%tstate, %g1
3897	stx	%g3, [%sp + CC64FSZ + STKB + TF_G + ( 3*8)]
3898	rdpr	%tnpc, %o3
3899	stx	%g4, [%sp + CC64FSZ + STKB + TF_G + ( 4*8)]
3900	rd	%y, %o4
3901	stx	%g5, [%sp + CC64FSZ + STKB + TF_G + ( 5*8)]
3902	stx	%g6, [%sp + CC64FSZ + STKB + TF_G + ( 6*8)]
3903	CHKPT(%g5,%g6,0x31)
3904	wrpr	%g0, 0, %tl			! return to tl=0
3905	stx	%g7, [%sp + CC64FSZ + STKB + TF_G + ( 7*8)]
3906	add	%sp, CC64FSZ + STKB, %o0	! (&tf)
3907
3908	stx	%g1, [%sp + CC64FSZ + STKB + TF_TSTATE]
3909	stx	%o2, [%sp + CC64FSZ + STKB + TF_PC]
3910	stx	%o3, [%sp + CC64FSZ + STKB + TF_NPC]
3911	st	%o4, [%sp + CC64FSZ + STKB + TF_Y]
3912
3913	rdpr	%pil, %g5
3914	stb	%g5, [%sp + CC64FSZ + STKB + TF_PIL]
3915	stb	%g5, [%sp + CC64FSZ + STKB + TF_OLDPIL]
3916
3917	!! In the EMBEDANY memory model %g4 points to the start of the data segment.
3918	!! In our case we need to clear it before calling any C-code
3919	clr	%g4
3920	wr	%g0, ASI_PRIMARY_NOFAULT, %asi	! Restore default ASI
3921
3922	call	_C_LABEL(syscall)		! syscall(&tf, code, pc)
3923	 wrpr	%g0, PSTATE_INTR, %pstate	! turn on interrupts
3924
3925	/* see `proc_trampoline' for the reason for this label */
3926return_from_syscall:
3927	wrpr	%g0, PSTATE_KERN, %pstate	! Disable intterrupts
3928	CHKPT(%o1,%o2,0x32)
3929	wrpr	%g0, 0, %tl			! Return to tl==0
3930	CHKPT(%o1,%o2,4)
3931	ba,a,pt	%icc, return_from_trap
3932	 nop
3933	NOTREACHED
3934
3935/*
3936 * interrupt_vector:
3937 *
3938 * Spitfire chips never get level interrupts directly from H/W.
3939 * Instead, all interrupts come in as interrupt_vector traps.
3940 * The interrupt number or handler address is an 11 bit number
3941 * encoded in the first interrupt data word.  Additional words
3942 * are application specific and used primarily for cross-calls.
3943 *
3944 * The interrupt vector handler then needs to identify the
3945 * interrupt source from the interrupt number and arrange to
3946 * invoke the interrupt handler.  This can either be done directly
3947 * from here, or a softint at a particular level can be issued.
3948 *
3949 * To call an interrupt directly and not overflow the trap stack,
3950 * the trap registers should be saved on the stack, registers
3951 * cleaned, trap-level decremented, the handler called, and then
3952 * the process must be reversed.
3953 *
3954 * To simplify life all we do here is issue an appropriate softint.
3955 *
3956 * Note:	It is impossible to identify or change a device's
3957 *		interrupt number until it is probed.  That's the
3958 *		purpose for all the funny interrupt acknowledge
3959 *		code.
3960 *
3961 */
3962
3963/*
3964 * Vectored interrupts:
3965 *
3966 * When an interrupt comes in, interrupt_vector uses the interrupt
3967 * vector number to lookup the appropriate intrhand from the intrlev
3968 * array.  It then looks up the interrupt level from the intrhand
3969 * structure.  It uses the level to index the intrpending array,
3970 * which is 8 slots for each possible interrupt level (so we can
3971 * shift instead of multiply for address calculation).  It hunts for
3972 * any available slot at that level.  Available slots are NULL.
3973 *
3974 * NOTE: If no slots are available, we issue an un-vectored interrupt,
3975 * but it will probably be lost anyway.
3976 *
3977 * Then interrupt_vector uses the interrupt level in the intrhand
3978 * to issue a softint of the appropriate level.  The softint handler
3979 * figures out what level interrupt it's handling and pulls the first
3980 * intrhand pointer out of the intrpending array for that interrupt
3981 * level, puts a NULL in its place, clears the interrupt generator,
3982 * and invokes the interrupt handler.
3983 */
3984
3985	.data
3986	.globl	intrpending
3987intrpending:
3988	.space	16 * 8 * PTRSZ
3989
3990#ifdef DEBUG
3991#define INTRDEBUG_VECTOR	0x1
3992#define INTRDEBUG_LEVEL		0x2
3993#define INTRDEBUG_FUNC		0x4
3994#define INTRDEBUG_SPUR		0x8
3995	.globl	_C_LABEL(intrdebug)
3996_C_LABEL(intrdebug):	.word 0x0
3997/*
3998 * Note: we use the local label `97' to branch forward to, to skip
3999 * actual debugging code following a `intrdebug' bit test.
4000 */
4001#endif
4002	.text
4003interrupt_vector:
4004#ifdef TRAPSTATS
4005	set	_C_LABEL(kiveccnt), %g1
4006	set	_C_LABEL(iveccnt), %g2
4007	rdpr	%tl, %g3
4008	dec	%g3
4009	movrz	%g3, %g2, %g1
4010	lduw	[%g1], %g2
4011	inc	%g2
4012	stw	%g2, [%g1]
4013#endif
4014	ldxa	[%g0] ASI_IRSR, %g1
4015	mov	IRDR_0H, %g2
4016	ldxa	[%g2] ASI_IRDR, %g2	! Get interrupt number
4017	membar	#Sync
4018	stxa	%g0, [%g0] ASI_IRSR	! Ack IRQ
4019	membar	#Sync			! Should not be needed due to retry
4020#if NOT_DEBUG
4021	STACKFRAME(-CC64FSZ)		! Get a clean register window
4022	mov	%g1, %o1
4023	mov	%g2, %o2
4024
4025	LOAD_ASCIZ(%o0, "interrupt_vector: ASI_IRSR %lx ASI_IRDR(0x40) %lx\r\n")
4026	GLOBTOLOC
4027	call	prom_printf
4028	 clr	%g4
4029	LOCTOGLOB
4030	restore
4031	 nop
4032#endif
4033	sethi	%hi(_C_LABEL(intrlev)), %g3
4034	btst	IRSR_BUSY, %g1
4035	or	%g3, %lo(_C_LABEL(intrlev)), %g3
4036	bz,pn	%icc, 3f		! spurious interrupt
4037	 sllx	%g2, PTRSHFT, %g5	! Calculate entry number
4038	cmp	%g2, MAXINTNUM
4039
4040#ifdef DEBUG
4041	tgeu	55
4042#endif
4043	bgeu,pn	%xcc, 3f
4044	 nop
4045	LDPTR	[%g3 + %g5], %g5	! We have a pointer to the handler
4046#if DEBUG
4047	brnz,pt %g5, 1f
4048	 nop
4049	STACKFRAME(-CC64FSZ)		! Get a clean register window
4050	mov	%g2, %o1
4051
4052	LOAD_ASCIZ(%o0, "interrupt_vector: vector %lx NULL\r\n")
4053	GLOBTOLOC
4054	call	prom_printf
4055	 clr	%g4
4056	LOCTOGLOB
4057	restore
4058	 nop
40591:
4060#endif
4061#ifdef NOT_DEBUG
4062	tst	%g5
4063	tz	56
4064#endif
4065
4066	brz,pn	%g5, 3f			! NULL means it isn't registered yet.  Skip it.
4067	 nop
4068
4069setup_sparcintr:
4070#ifdef	INTR_INTERLOCK
4071	LDPTR	[%g5+IH_PEND], %g6	! Read pending flag
4072	brnz,pn	%g6, ret_from_intr_vector ! Skip it if it's running
4073#endif
4074	 ldub	[%g5+IH_PIL], %g6	! Read interrupt mask
4075	sethi	%hi(intrpending), %g1
4076	mov	8, %g7			! Number of slots to search
4077	sll	%g6, PTRSHFT+3, %g3	! Find start of table for this IPL
4078	or	%g1, %lo(intrpending), %g1
4079	 add	%g1, %g3, %g1
40801:
4081#ifdef INTRLIST
4082	LDPTR	[%g1], %g3		! Load list head
4083	STPTR	%g3, [%g5+IH_PEND]	! Link our intrhand node in
4084	mov	%g5, %g7
4085	CASPTR	[%g1] ASI_N, %g3, %g7
4086	cmp	%g7, %g3		! Did it work?
4087	bne,pn	%xcc, 1b		! No, try again
4088	 nop
4089#else	/* INTRLIST */
4090	mov	%g5, %g3
4091	CASPTR	[%g1] ASI_N, %g0, %g3	! Try a slot -- MPU safe
4092	brz,pt	%g3, 2f			! Available?
4093#ifdef DEBUG
4094	 cmp	%g5, %g3		! if these are the same
4095	bne,pt	%icc, 97f		! then we aleady have the
4096	 nop				! interrupt registered
4097	set	_C_LABEL(intrdebug), %g4
4098	ld	[%g4], %g4
4099	btst	INTRDEBUG_VECTOR, %g4
4100	bz,pt	%icc, 97f
4101	 nop
4102
4103	STACKFRAME(-CC64FSZ)		! Get a clean register window
4104	LOAD_ASCIZ(%o0, "interrupt_vector: duplicate handler %p\r\n")
4105	GLOBTOLOC
4106	clr	%g4
4107	call	prom_printf
4108	 mov	%g3, %o1
4109	LOCTOGLOB
4110	 restore
411197:
4112#endif
4113	 dec	%g7
4114	brgz,pt	%g7, 1b
4115	 inc	PTRSZ, %g1		! Next slot
4116
4117	!! If we get here we have a problem.
4118	!! There were no available slots and the interrupt was lost.
4119	!! We'll resort to polling in this case.
4120#ifdef DIAGNOSTIC
4121	STACKFRAME(-CC64FSZ)		! Get a clean register window
4122	LOAD_ASCIZ(%o0, "interrupt_vector: level %d out of slots\r\n")
4123	mov	%g6, %o1
4124	GLOBTOLOC
4125	clr	%g4
4126	rdpr	%pil, %l0
4127	call	prom_printf
4128	 mov	%l0, %o2
4129	wrpr	%g0, 15, %pil
4130	ta	1
4131	LOCTOGLOB
4132	restore
4133#endif
4134#endif	/* INTRLIST */
41352:
4136#ifdef DEBUG
4137	set	_C_LABEL(intrdebug), %g7
4138	ld	[%g7], %g7
4139	btst	INTRDEBUG_VECTOR, %g7
4140	bz,pt	%icc, 97f
4141	 nop
4142
4143	STACKFRAME(-CC64FSZ)		! Get a clean register window
4144	LOAD_ASCIZ(%o0,\
4145	    "interrupt_vector: number %lx softint mask %lx pil %lu slot %p\r\n")
4146	mov	%g2, %o1
4147	rdpr	%pil, %o3
4148	mov	%g1, %o4
4149	GLOBTOLOC
4150	clr	%g4
4151	call	prom_printf
4152	 mov	%g6, %o2
4153	LOCTOGLOB
4154	restore
415597:
4156#endif
4157	mov	1, %g7
4158	sll	%g7, %g6, %g6
4159	wr	%g6, 0, SET_SOFTINT	! Invoke a softint
4160
4161ret_from_intr_vector:
4162	CLRTT
4163	retry
4164	NOTREACHED
4165
41663:
4167#ifdef DEBUG
4168	set	_C_LABEL(intrdebug), %g7
4169	ld	[%g7], %g7
4170	btst	INTRDEBUG_SPUR, %g7
4171	bz,pt	%icc, 97f
4172	 nop
4173#endif
4174	STACKFRAME(-CC64FSZ)		! Get a clean register window
4175	LOAD_ASCIZ(%o0, "interrupt_vector: spurious vector %lx at pil %d\r\n")
4176	mov	%g2, %o1
4177	GLOBTOLOC
4178	clr	%g4
4179	call	prom_printf
4180	 rdpr	%pil, %o2
4181	LOCTOGLOB
4182	restore
418397:
4184	ba,a	ret_from_intr_vector
4185	 nop				! XXX spitfire bug?
4186
4187/*
4188 * Ultra1 and Ultra2 CPUs use soft interrupts for everything.  What we do
4189 * on a soft interrupt, is we should check which bits in ASR_SOFTINT(0x16)
4190 * are set, handle those interrupts, then clear them by setting the
4191 * appropriate bits in ASR_CLEAR_SOFTINT(0x15).
4192 *
4193 * We have an array of 8 interrupt vector slots for each of 15 interrupt
4194 * levels.  If a vectored interrupt can be dispatched, the dispatch
4195 * routine will place a pointer to an intrhand structure in one of
4196 * the slots.  The interrupt handler will go through the list to look
4197 * for an interrupt to dispatch.  If it finds one it will pull it off
4198 * the list, free the entry, and call the handler.  The code is like
4199 * this:
4200 *
4201 *	for (i=0; i<8; i++)
4202 *		if (ih = intrpending[intlev][i]) {
4203 *			intrpending[intlev][i] = NULL;
4204 *			if ((*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : &frame))
4205 *				return;
4206 *			strayintr(&frame);
4207 *			return;
4208 *		}
4209 *
4210 * Otherwise we go back to the old style of polled interrupts.
4211 *
4212 * After preliminary setup work, the interrupt is passed to each
4213 * registered handler in turn.  These are expected to return nonzero if
4214 * they took care of the interrupt.  If a handler claims the interrupt,
4215 * we exit (hardware interrupts are latched in the requestor so we'll
4216 * just take another interrupt in the unlikely event of simultaneous
4217 * interrupts from two different devices at the same level).  If we go
4218 * through all the registered handlers and no one claims it, we report a
4219 * stray interrupt.  This is more or less done as:
4220 *
4221 *	for (ih = intrhand[intlev]; ih; ih = ih->ih_next)
4222 *		if ((*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : &frame))
4223 *			return;
4224 *	strayintr(&frame);
4225 *
4226 * Inputs:
4227 *	%l0 = %tstate
4228 *	%l1 = return pc
4229 *	%l2 = return npc
4230 *	%l3 = interrupt level
4231 *	(software interrupt only) %l4 = bits to clear in interrupt register
4232 *
4233 * Internal:
4234 *	%l4, %l5: local variables
4235 *	%l6 = %y
4236 *	%l7 = %g1
4237 *	%g2..%g7 go to stack
4238 *
4239 * An interrupt frame is built in the space for a full trapframe;
4240 * this contains the psr, pc, npc, and interrupt level.
4241 *
4242 * The level of this interrupt is determined by:
4243 *
4244 *       IRQ# = %tt - 0x40
4245 */
4246
4247	.globl _C_LABEL(sparc_interrupt)	! This is for interrupt debugging
4248_C_LABEL(sparc_interrupt):
4249#ifdef TRAPS_USE_IG
4250	wrpr	%g0, PSTATE_KERN|PSTATE_IG, %pstate	! DEBUG
4251#endif
4252	/*
4253	 * If this is a %tick softint, clear it then call interrupt_vector.
4254	 */
4255	rd	SOFTINT, %g1
4256	btst	1, %g1
4257	bz,pt	%icc, 0f
4258	 set	_C_LABEL(intrlev), %g3
4259	wr	%g0, 1, CLEAR_SOFTINT
4260	DLFLUSH(%g3, %g2)
4261	ba,pt	%icc, setup_sparcintr
4262	 LDPTR	[%g3 + PTRSZ], %g5	! intrlev[1] is reserved for %tick intr.
42630:
4264#ifdef TRAPSTATS
4265	sethi	%hi(_C_LABEL(kintrcnt)), %g1
4266	sethi	%hi(_C_LABEL(uintrcnt)), %g2
4267	or	%g1, %lo(_C_LABEL(kintrcnt)), %g1
4268	or	%g1, %lo(_C_LABEL(uintrcnt)), %g2
4269	rdpr	%tl, %g3
4270	dec	%g3
4271	movrz	%g3, %g2, %g1
4272	lduw	[%g1], %g2
4273	inc	%g2
4274	stw	%g2, [%g1]
4275	/* See if we're on the interrupt stack already. */
4276	set	EINTSTACK, %g2
4277	set	(EINTSTACK-INTSTACK), %g1
4278	btst	1, %sp
4279	add	%sp, BIAS, %g3
4280	movz	%icc, %sp, %g3
4281	srl	%g3, 0, %g3
4282	sub	%g2, %g3, %g3
4283	cmp	%g3, %g1
4284	bgu	1f
4285	 set	_C_LABEL(intristk), %g1
4286	lduw	[%g1], %g2
4287	inc	%g2
4288	stw	%g2, [%g1]
42891:
4290#endif
4291	INTR_SETUP(-CC64FSZ-TF_SIZE)
4292	! Switch to normal globals so we can save them
4293	wrpr	%g0, PSTATE_KERN, %pstate
4294	stx	%g1, [%sp + CC64FSZ + STKB + TF_G + ( 1*8)]
4295	stx	%g2, [%sp + CC64FSZ + STKB + TF_G + ( 2*8)]
4296	stx	%g3, [%sp + CC64FSZ + STKB + TF_G + ( 3*8)]
4297	stx	%g4, [%sp + CC64FSZ + STKB + TF_G + ( 4*8)]
4298	stx	%g5, [%sp + CC64FSZ + STKB + TF_G + ( 5*8)]
4299	stx	%g6, [%sp + CC64FSZ + STKB + TF_G + ( 6*8)]
4300	stx	%g7, [%sp + CC64FSZ + STKB + TF_G + ( 7*8)]
4301
4302	/*
4303	 * In the EMBEDANY memory model %g4 points to the start of the
4304	 * data segment.  In our case we need to clear it before calling
4305	 * any C-code.
4306	 */
4307	clr	%g4
4308
4309	flushw			! Do not remove this insn -- causes interrupt loss
4310	rd	%y, %l6
4311	INCR(_C_LABEL(uvmexp)+V_INTR)	! cnt.v_intr++; (clobbers %o0,%o1,%o2)
4312	rdpr	%tt, %l5		! Find out our current IPL
4313	rdpr	%tstate, %l0
4314	rdpr	%tpc, %l1
4315	rdpr	%tnpc, %l2
4316	rdpr	%tl, %l3		! Dump our trap frame now we have taken the IRQ
4317	stw	%l6, [%sp + CC64FSZ + STKB + TF_Y]	! Silly, but we need to save this for rft
4318	dec	%l3
4319	CHKPT(%l4,%l7,0x26)
4320	wrpr	%g0, %l3, %tl
4321	sth	%l5, [%sp + CC64FSZ + STKB + TF_TT]! debug
4322	stx	%l0, [%sp + CC64FSZ + STKB + TF_TSTATE]	! set up intrframe/clockframe
4323	stx	%l1, [%sp + CC64FSZ + STKB + TF_PC]
4324	btst	TSTATE_PRIV, %l0		! User mode?
4325	stx	%l2, [%sp + CC64FSZ + STKB + TF_NPC]
4326
4327	sub	%l5, 0x40, %l6			! Convert to interrupt level
4328	sethi	%hi(_C_LABEL(intrcnt)), %l4
4329	stb	%l6, [%sp + CC64FSZ + STKB + TF_PIL]	! set up intrframe/clockframe
4330	rdpr	%pil, %o1
4331	sll	%l6, LNGSHFT, %l3
4332	or	%l4, %lo(_C_LABEL(intrcnt)), %l4	! intrcnt[intlev]++;
4333	stb	%o1, [%sp + CC64FSZ + STKB + TF_OLDPIL]	! old %pil
4334	LDULNG	[%l4 + %l3], %o0
4335	add	%l4, %l3, %l4
4336	clr	%l5			! Zero handled count
4337	mov	1, %l3			! Ack softint
4338	inc	%o0
4339	STULNG	%o0, [%l4]
4340	sll	%l3, %l6, %l3		! Generate IRQ mask
4341
4342	wrpr	%l6, %pil
4343
4344sparc_intr_retry:
4345	wr	%l3, 0, CLEAR_SOFTINT	! (don't clear possible %tick IRQ)
4346	wrpr	%g0, PSTATE_INTR, %pstate	! Reenable interrupts
4347	sll	%l6, PTRSHFT+3, %l2
4348	sethi	%hi(intrpending), %l4
4349	or	%l4, %lo(intrpending), %l4
4350	mov	8, %l7
4351	add	%l2, %l4, %l4
4352
4353#ifdef INTRLIST
43541:
4355	membar	#StoreLoad		! Make sure any failed casxa insns complete
4356	LDPTR	[%l4], %l2		! Check a slot
4357	brz,pn	%l2, intrcmplt		! Empty list?
4358
4359	 clr	%l7
4360	membar	#LoadStore
4361	CASPTR	[%l4] ASI_N, %l2, %l7	! Grab the entire list
4362	cmp	%l7, %l2
4363	bne,pn	%icc, 1b
4364	 add	%sp, CC64FSZ+STKB, %o2	! tf = %sp + CC64FSZ + STKB
43652:
4366	LDPTR	[%l2 + IH_FUN], %o4	! ih->ih_fun
4367	LDPTR	[%l2 + IH_ARG], %o0	! ih->ih_arg
4368
4369	jmpl	%o4, %o7		! handled = (*ih->ih_fun)(...)
4370	 movrz	%o0, %o2, %o0		! arg = (arg == 0) ? arg : tf
4371	LDPTR	[%l2 + IH_PEND], %l7	! Clear pending flag
4372	LDPTR	[%l2 + IH_CLR], %l1
4373	membar	#LoadStore
4374	STPTR	%g0, [%l2 + IH_PEND]	! Clear pending flag
4375	membar	#Sync
4376
4377	brz,pn	%l1, 0f
4378	 add	%l5, %o0, %l5
4379	stx	%g0, [%l1]		! Clear intr source
4380	membar	#Sync			! Should not be needed
43810:
4382	brnz,pn	%l7, 2b			! 'Nother?
4383	 mov	%l7, %l2
4384
4385#else /* INTRLIST */
4386	/*
4387	 * Register usage at this point:
4388	 *	%l4 - current slot at intrpending[PIL]
4389	 *	%l5 - sum of interrupt handler return values
4390	 *	%l6 - PIL
4391	 */
4392sparc_intr_check_slot:
4393	LDPTR	[%l4], %l2		! Check a slot
4394	dec	%l7
4395	brnz,pt	%l2, 1f			! Pending?
4396	 nop
4397	brgz,pt	%l7, sparc_intr_check_slot
4398	 inc	PTRSZ, %l4		! Next slot
4399
4400	ba,a,pt	%icc, intrcmplt		! Only handle vectors -- don't poll XXXX
4401	 nop				! XXX spitfire bug?
4402
44031:
4404	/*
4405	 * We have a pending interrupt; prepare to call handler
4406	 */
4407!	DLFLUSH(%l2, %o3)
4408	LDPTR	[%l2 + IH_CLR], %l1
4409	add	%sp, CC64FSZ+STKB, %o2	! tf = %sp + CC64FSZ + STKB
4410	LDPTR	[%l2 + IH_FUN], %o4	! ih->ih_fun
4411	LDPTR	[%l2 + IH_ARG], %o0	! ih->ih_arg
4412
4413#ifdef DEBUG
4414	set	_C_LABEL(intrdebug), %o3
4415	ld	[%o3], %o3
4416	btst	INTRDEBUG_FUNC, %o3
4417	bz,a,pt	%icc, 97f
4418	 nop
4419
4420	STACKFRAME(-CC64FSZ)		! Get a clean register window
4421	LOAD_ASCIZ(%o0, "sparc_interrupt:  calling %lx(%lx) sp = %p\r\n")
4422	mov	%i0, %o2		! arg
4423	mov	%i6, %o3		! sp
4424	GLOBTOLOC
4425	call	prom_printf
4426	 mov	%i4, %o1		! fun
4427	LOCTOGLOB
4428	restore
442997:
4430	mov	%l4, %o1	! XXXXXXX DEBUGGGGGG!
4431#endif	/* DEBUG */
4432
4433!	STPTR	%g0, [%l4]		! Clear the slot
4434	jmpl	%o4, %o7		! handled = (*ih->ih_fun)(...)
4435	 movrz	%o0, %o2, %o0		! arg = (arg == 0) ? arg : tf
4436	STPTR	%g0, [%l2 + IH_PEND]	! Clear pending flag
4437	STPTR	%g0, [%l4]		! Clear the slot
4438
4439#ifdef DEBUG
4440	set	_C_LABEL(intrdebug), %o3
4441	ld	[%o3], %o3
4442	btst	INTRDEBUG_FUNC, %o3
4443	bz,a,pt	%icc, 97f
4444	 nop
4445#if 0
4446	brnz,pt	%l1, 97f
4447	 nop
4448#endif
4449
4450	mov	%l4, %o5
4451	mov	%l1, %o3
4452	STACKFRAME(-CC64FSZ)		! Get a clean register window
4453	mov	%i5, %o1
4454	mov	%i3, %o3
4455	LOAD_ASCIZ(%o0, "sparc_interrupt:  ih %p fun %p has %p clear\r\n")
4456	GLOBTOLOC
4457	call	prom_printf
4458	 mov	%i4, %o2		! fun
4459	LOCTOGLOB
4460	restore
446197:
4462#endif	/* DEBUG */
4463	brz,pn	%l1, 0f
4464	 add	%l5, %o0, %l5
4465	stx	%g0, [%l1]		! Clear intr source
4466	membar	#Sync			! Should not be needed
44670:
4468	brnz,pt	%o0, sparc_intr_check_slot	! Handle any others
4469	 nop
4470
4471	/*
4472	 * Interrupt not claimed by handler at this vector entry;
4473	 * report that.
4474	 */
4475	mov	1, %o1
4476	call	_C_LABEL(strayintr)		! strayintr(&intrframe, 1)
4477	 add	%sp, CC64FSZ + STKB, %o0
4478
4479	ba,a,pt	%icc, sparc_intr_check_slot	! Try another
4480	 nop					! XXX spitfire bug?
4481#endif /* INTRLIST */
4482intrcmplt:
4483	/*
4484	 * Re-read SOFTINT to see if any new  pending interrupts
4485	 * at this level.
4486	 */
4487	mov	1, %l3			! Ack softint
4488	rd	SOFTINT, %l7		! %l5 contains #intr handled.
4489	sll	%l3, %l6, %l3		! Generate IRQ mask
4490	btst	%l3, %l7		! leave mask in %l3 for retry code
4491	bnz,pn	%icc, sparc_intr_retry
4492	 mov	1, %l5			! initialize intr count for next run
4493
4494#ifdef DEBUG
4495	set	_C_LABEL(intrdebug), %o2
4496	ld	[%o2], %o2
4497	btst	INTRDEBUG_FUNC, %o2
4498	bz,a,pt	%icc, 97f
4499	 nop
4500
4501	STACKFRAME(-CC64FSZ)		! Get a clean register window
4502	LOAD_ASCIZ(%o0, "sparc_interrupt:  done\r\n")
4503	GLOBTOLOC
4504	call	prom_printf
4505	 nop
4506	LOCTOGLOB
4507	restore
450897:
4509#endif
4510
4511	ldub	[%sp + CC64FSZ + STKB + TF_OLDPIL], %l3	! restore old %pil
4512	wrpr	%g0, PSTATE_KERN, %pstate	! Disable interrupts
4513	wrpr	%l3, 0, %pil
4514
4515	CHKPT(%o1,%o2,5)
4516	ba,a,pt	%icc, return_from_trap
4517	 nop
4518
4519#ifdef notyet
4520/*
4521 * Level 12 (ZS serial) interrupt.  Handle it quickly, schedule a
4522 * software interrupt, and get out.  Do the software interrupt directly
4523 * if we would just take it on the way out.
4524 *
4525 * Input:
4526 *	%l0 = %psr
4527 *	%l1 = return pc
4528 *	%l2 = return npc
4529 * Internal:
4530 *	%l3 = zs device
4531 *	%l4, %l5 = temporary
4532 *	%l6 = rr3 (or temporary data) + 0x100 => need soft int
4533 *	%l7 = zs soft status
4534 */
4535zshard:
4536#endif /* notyet */
4537
4538	.globl	return_from_trap, rft_kernel, rft_user
4539	.globl	softtrap, slowtrap
4540	.globl	syscall
4541
4542
4543/*
4544 * Various return-from-trap routines (see return_from_trap).
4545 */
4546
4547/*
4548 * Return from trap.
4549 * registers are:
4550 *
4551 *	[%sp + CC64FSZ + STKB] => trap frame
4552 *
4553 * We must load all global, out, and trap registers from the trap frame.
4554 *
4555 * If returning to kernel, we should be at the proper trap level because
4556 * we don't touch %tl.
4557 *
4558 * When returning to user mode, the trap level does not matter, as it
4559 * will be set explicitly.
4560 *
4561 * If we are returning to user code, we must:
4562 *  1.  Check for register windows in the pcb that belong on the stack.
4563 *	If there are any, reload them
4564 */
4565return_from_trap:
4566#ifdef DEBUG
4567	!! Make sure we don't have pc == npc == 0 or we suck.
4568	ldx	[%sp + CC64FSZ + STKB + TF_PC], %g2
4569	ldx	[%sp + CC64FSZ + STKB + TF_NPC], %g3
4570	orcc	%g2, %g3, %g0
4571	tz	%icc, 1
4572#endif
4573#ifdef NOTDEF_DEBUG
4574	mov	%i6, %o1
4575	save	%sp, -CC64FSZ, %sp
4576	set	1f, %o0
4577	mov	%i1, %o1
4578	ldx	[%fp + CC64FSZ + STKB + TF_PC], %o3
4579	ldx	[%fp + CC64FSZ + STKB + TF_NPC], %o4
4580	GLOBTOLOC
4581	call	printf
4582	 mov	%i6, %o2
4583	LOCTOGLOB
4584	restore
4585	.data
45861:	.asciz	"rft[%x,%x,%p,%p]"
45873:	.asciz	"return_from_trap: fp=%x sp=%x pc=%x\n"
4588	_ALIGN
4589	.text
45902:
4591#endif
4592
4593#ifdef NOTDEF_DEBUG
4594	ldx	[%sp + CC64FSZ + STKB + TF_TSTATE], %g2
4595	set	TSTATE_AG, %g3
4596	set	4f, %g4
4597	and	%g2, %g3, %g3
4598	clr	%o1
4599	movrnz	%g3, %g4, %o1
4600	set	TSTATE_MG, %g3
4601	set	3f, %g4
4602	and	%g2, %g3, %g3
4603	movrnz	%g3, %g4, %o1
4604	set	TSTATE_IG, %g3
4605	set	5f, %g4
4606	and	%g2, %g3, %g3
4607	movrnz	%g3, %g4, %o1
4608	brz,pt	%o1, 2f
4609	 set	1f, %o0
4610	call	printf
4611	 nop
4612	ta	1; nop
4613	.data
46141:	.asciz	"Returning to trap from %s globals\n"
46153:	.asciz	"MMU"
46164:	.asciz	"Altermate"
46175:	.asciz	"Interrupt"
4618	_ALIGN
4619	.text
46202:
4621#endif
4622	!!
4623	!! We'll make sure we flush our pcb here, rather than later.
4624	!!
4625	ldx	[%sp + CC64FSZ + STKB + TF_TSTATE], %g1
4626	btst	TSTATE_PRIV, %g1			! returning to userland?
4627#if 0
4628	bnz,pt	%icc, 0f
4629	 sethi	%hi(CURPROC), %o1
4630	call	_C_LABEL(rwindow_save)			! Flush out our pcb
4631	 LDPTR	[%o1 + %lo(CURPROC)], %o0
46320:
4633#endif
4634	!!
4635	!! Let all pending interrupts drain before returning to userland
4636	!!
4637	bnz,pn	%icc, 1f				! Returning to userland?
4638	 nop
4639	wrpr	%g0, PSTATE_INTR, %pstate
4640	wrpr	%g0, %g0, %pil				! Lower IPL
46411:
4642	wrpr	%g0, PSTATE_KERN, %pstate		! Make sure we have normal globals & no IRQs
4643
4644	/* Restore normal globals */
4645	ldx	[%sp + CC64FSZ + STKB + TF_G + (1*8)], %g1
4646	ldx	[%sp + CC64FSZ + STKB + TF_G + (2*8)], %g2
4647	ldx	[%sp + CC64FSZ + STKB + TF_G + (3*8)], %g3
4648	ldx	[%sp + CC64FSZ + STKB + TF_G + (4*8)], %g4
4649	ldx	[%sp + CC64FSZ + STKB + TF_G + (5*8)], %g5
4650	ldx	[%sp + CC64FSZ + STKB + TF_G + (6*8)], %g6
4651	ldx	[%sp + CC64FSZ + STKB + TF_G + (7*8)], %g7
4652	/* Switch to alternate globals and load outs */
4653	wrpr	%g0, PSTATE_KERN|PSTATE_AG, %pstate
4654#ifdef TRAPS_USE_IG
4655	wrpr	%g0, PSTATE_KERN|PSTATE_IG, %pstate	! DEBUG
4656#endif
4657	ldx	[%sp + CC64FSZ + STKB + TF_O + (0*8)], %i0
4658	ldx	[%sp + CC64FSZ + STKB + TF_O + (1*8)], %i1
4659	ldx	[%sp + CC64FSZ + STKB + TF_O + (2*8)], %i2
4660	ldx	[%sp + CC64FSZ + STKB + TF_O + (3*8)], %i3
4661	ldx	[%sp + CC64FSZ + STKB + TF_O + (4*8)], %i4
4662	ldx	[%sp + CC64FSZ + STKB + TF_O + (5*8)], %i5
4663	ldx	[%sp + CC64FSZ + STKB + TF_O + (6*8)], %i6
4664	ldx	[%sp + CC64FSZ + STKB + TF_O + (7*8)], %i7
4665	/* Now load trap registers into alternate globals */
4666	ld	[%sp + CC64FSZ + STKB + TF_Y], %g4
4667	ldx	[%sp + CC64FSZ + STKB + TF_TSTATE], %g1		! load new values
4668	wr	%g4, 0, %y
4669	ldx	[%sp + CC64FSZ + STKB + TF_PC], %g2
4670	ldx	[%sp + CC64FSZ + STKB + TF_NPC], %g3
4671
4672#ifdef NOTDEF_DEBUG
4673	ldub	[%sp + CC64FSZ + STKB + TF_PIL], %g5		! restore %pil
4674	wrpr	%g5, %pil				! DEBUG
4675#endif
4676
4677	/* Returning to user mode or kernel mode? */
4678	btst	TSTATE_PRIV, %g1		! returning to userland?
4679	CHKPT(%g4, %g7, 6)
4680	bz,pt	%icc, rft_user
4681	 sethi	%hi(_C_LABEL(want_ast)), %g7	! first instr of rft_user
4682
4683/*
4684 * Return from trap, to kernel.
4685 *
4686 * We will assume, for the moment, that all kernel traps are properly stacked
4687 * in the trap registers, so all we have to do is insert the (possibly modified)
4688 * register values into the trap registers then do a retry.
4689 *
4690 */
4691rft_kernel:
4692	rdpr	%tl, %g4				! Grab a set of trap registers
4693	inc	%g4
4694	wrpr	%g4, %g0, %tl
4695	wrpr	%g3, 0, %tnpc
4696	wrpr	%g2, 0, %tpc
4697	wrpr	%g1, 0, %tstate
4698	CHKPT(%g1,%g2,7)
4699	restore
4700	CHKPT(%g1,%g2,0)			! Clear this out
4701	rdpr	%tstate, %g1			! Since we may have trapped our regs may be toast
4702	rdpr	%cwp, %g2
4703	andn	%g1, CWP, %g1
4704	wrpr	%g1, %g2, %tstate		! Put %cwp in %tstate
4705	CLRTT
4706#ifdef TRAPTRACE
4707	set	trap_trace, %g2
4708	lduw	[%g2+TRACEDIS], %g4
4709	brnz,pn	%g4, 1f
4710	 nop
4711	lduw	[%g2+TRACEPTR], %g3
4712	rdpr	%tl, %g4
4713	set	CURPROC, %g6
4714	sllx	%g4, 13, %g4
4715	LDPTR	[%g6], %g6
4716	clr	%g6		! DISABLE PID
4717	mov	%g0, %g5
4718	brz,pn	%g6, 2f
4719	 andncc	%g3, (TRACESIZ-1), %g0
4720!	ldsw	[%g6+P_PID], %g5	! Load PID
47212:
4722
4723	set	CPCB, %g6	! Load up nsaved
4724	LDPTR	[%g6], %g6
4725	ldub	[%g6 + PCB_NSAVED], %g6
4726	sllx	%g6, 9, %g6
4727	or	%g6, %g4, %g4
4728
4729	movnz	%icc, %g0, %g3	! Wrap if needed
4730	rdpr	%tstate, %g6
4731	rdpr	%tpc, %g7
4732	sth	%g4, [%g2+%g3]
4733	inc	2, %g3
4734	sth	%g5, [%g2+%g3]
4735	inc	2, %g3
4736	stw	%g6, [%g2+%g3]
4737	inc	4, %g3
4738	stw	%sp, [%g2+%g3]
4739	inc	4, %g3
4740	stw	%g7, [%g2+%g3]
4741	inc	4, %g3
4742	mov	TLB_TAG_ACCESS, %g7
4743	ldxa	[%g7] ASI_DMMU, %g7
4744	stw	%g7, [%g2+%g3]
4745	inc	4, %g3
4746	stw	%g3, [%g2+TRACEPTR]
47471:
4748#endif
4749#ifdef TRAPSTATS
4750	rdpr	%tl, %g2
4751	set	_C_LABEL(rftkcnt), %g1
4752	sllx	%g2, 2, %g2
4753	add	%g1, %g2, %g1
4754	lduw	[%g1], %g2
4755	inc	%g2
4756	stw	%g2, [%g1]
4757#endif
4758#if	0
4759	wrpr	%g0, 0, %cleanwin	! DEBUG
4760#endif
4761	retry					! We should allow some way to distinguish retry/done
4762	NOTREACHED
4763/*
4764 * Return from trap, to user.  Checks for scheduling trap (`ast') first;
4765 * will re-enter trap() if set.  Note that we may have to switch from
4766 * the interrupt stack to the kernel stack in this case.
4767 *	%g1 = %tstate
4768 *	%g2 = return %pc
4769 *	%g3 = return %npc
4770 * If returning to a valid window, just set psr and return.
4771 */
4772	.data
4773rft_wcnt:	.word 0
4774	.text
4775
4776rft_user:
4777!	sethi	%hi(_C_LABEL(want_ast)), %g7	! (done above)
4778	lduw	[%g7 + %lo(_C_LABEL(want_ast))], %g7! want AST trap?
4779	brnz,pn	%g7, softtrap			! yes, re-enter trap with type T_AST
4780	 mov	T_AST, %g4
4781
4782	CHKPT(%g4,%g7,8)
4783#ifdef NOTDEF_DEBUG
4784	sethi	%hi(CPCB), %g4
4785	LDPTR	[%g4 + %lo(CPCB)], %g4
4786	ldub	[%g4 + PCB_NSAVED], %g4		! nsaved
4787	brz,pt	%g4, 2f		! Only print if nsaved <> 0
4788	 nop
4789
4790	set	1f, %o0
4791	mov	%g4, %o1
4792	mov	%g2, %o2			! pc
4793	wr	%g0, ASI_DMMU, %asi		! restore the user context
4794	ldxa	[CTX_SECONDARY] %asi, %o3	! ctx
4795	GLOBTOLOC
4796	mov	%g3, %o5
4797	call	printf
4798	 mov	%i6, %o4			! sp
4799!	wrpr	%g0, PSTATE_INTR, %pstate		! Allow IRQ service
4800!	wrpr	%g0, PSTATE_KERN, %pstate		! DenyIRQ service
4801	LOCTOGLOB
48021:
4803	.data
4804	.asciz	"rft_user: nsaved=%x pc=%d ctx=%x sp=%x npc=%p\n"
4805	_ALIGN
4806	.text
4807#endif
4808
4809	/*
4810	 * NB: only need to do this after a cache miss
4811	 */
4812#ifdef TRAPSTATS
4813	set	_C_LABEL(rftucnt), %g6
4814	lduw	[%g6], %g7
4815	inc	%g7
4816	stw	%g7, [%g6]
4817#endif
4818	/*
4819	 * Now check to see if any regs are saved in the pcb and restore them.
4820	 *
4821	 * Here we need to undo the damage caused by switching to a kernel
4822	 * stack.
4823	 *
4824	 * We will use alternate globals %g4..%g7 because %g1..%g3 are used
4825	 * by the data fault trap handlers and we don't want possible conflict.
4826	 */
4827
4828	sethi	%hi(CPCB), %g6
4829	rdpr	%otherwin, %g7			! restore register window controls
4830#ifdef DEBUG
4831	rdpr	%canrestore, %g5		! DEBUG
4832	tst	%g5				! DEBUG
4833	tnz	%icc, 1; nop			! DEBUG
4834!	mov	%g0, %g5			! There shoud be *NO* %canrestore
4835	add	%g7, %g5, %g7			! DEBUG
4836#endif
4837	wrpr	%g0, %g7, %canrestore
4838	LDPTR	[%g6 + %lo(CPCB)], %g6
4839	wrpr	%g0, 0, %otherwin
4840
4841	CHKPT(%g4,%g7,9)
4842	ldub	[%g6 + PCB_NSAVED], %g7		! Any saved reg windows?
4843	wrpr	%g0, WSTATE_USER, %wstate	! Need to know where our sp points
4844
4845#ifdef DEBUG
4846	set	rft_wcnt, %g4	! Keep track of all the windows we restored
4847	stw	%g7, [%g4]
4848#endif
4849
4850	brz,pt	%g7, 5f				! No saved reg wins
4851	 nop
4852	dec	%g7				! We can do this now or later.  Move to last entry
4853
4854#ifdef DEBUG
4855	rdpr	%canrestore, %g4			! DEBUG Make sure we've restored everything
4856	brnz,a,pn	%g4, 0f				! DEBUG
4857	 sir						! DEBUG we should NOT have any usable windows here
48580:							! DEBUG
4859	wrpr	%g0, 5, %tl
4860#endif
4861	rdpr	%otherwin, %g4
4862	sll	%g7, 7, %g5			! calculate ptr into rw64 array 8*16 == 128 or 7 bits
4863	brz,pt	%g4, 6f				! We should not have any user windows left
4864	 add	%g5, %g6, %g5
4865
4866	set	1f, %o0
4867	mov	%g7, %o1
4868	mov	%g4, %o2
4869	call	printf
4870	 wrpr	%g0, PSTATE_KERN, %pstate
4871	set	2f, %o0
4872	call	panic
4873	 nop
4874	NOTREACHED
4875	.data
48761:	.asciz	"pcb_nsaved=%x and otherwin=%x\n"
48772:	.asciz	"rft_user\n"
4878	_ALIGN
4879	.text
48806:
48813:
4882	restored					! Load in the window
4883	restore						! This should not trap!
4884	ldx	[%g5 + PCB_RW + ( 0*8)], %l0		! Load the window from the pcb
4885	ldx	[%g5 + PCB_RW + ( 1*8)], %l1
4886	ldx	[%g5 + PCB_RW + ( 2*8)], %l2
4887	ldx	[%g5 + PCB_RW + ( 3*8)], %l3
4888	ldx	[%g5 + PCB_RW + ( 4*8)], %l4
4889	ldx	[%g5 + PCB_RW + ( 5*8)], %l5
4890	ldx	[%g5 + PCB_RW + ( 6*8)], %l6
4891	ldx	[%g5 + PCB_RW + ( 7*8)], %l7
4892
4893	ldx	[%g5 + PCB_RW + ( 8*8)], %i0
4894	ldx	[%g5 + PCB_RW + ( 9*8)], %i1
4895	ldx	[%g5 + PCB_RW + (10*8)], %i2
4896	ldx	[%g5 + PCB_RW + (11*8)], %i3
4897	ldx	[%g5 + PCB_RW + (12*8)], %i4
4898	ldx	[%g5 + PCB_RW + (13*8)], %i5
4899	ldx	[%g5 + PCB_RW + (14*8)], %i6
4900	ldx	[%g5 + PCB_RW + (15*8)], %i7
4901
4902#ifdef DEBUG
4903	stx	%g0, [%g5 + PCB_RW + (14*8)]		! DEBUG mark that we've saved this one
4904#endif
4905
4906	cmp	%g5, %g6
4907	bgu,pt	%xcc, 3b				! Next one?
4908	 dec	8*16, %g5
4909
4910	rdpr	%ver, %g5
4911	stb	%g0, [%g6 + PCB_NSAVED]			! Clear them out so we won't do this again
4912	and	%g5, CWP, %g5
4913	add	%g5, %g7, %g4
4914	dec	1, %g5					! NWINDOWS-1-1
4915	wrpr	%g5, 0, %cansave
4916	wrpr	%g0, 0, %canrestore			! Make sure we have no freeloaders XXX
4917	wrpr	%g0, WSTATE_USER, %wstate		! Save things to user space
4918	mov	%g7, %g5				! We already did one restore
49194:
4920	rdpr	%canrestore, %g4
4921	inc	%g4
4922	deccc	%g5
4923	wrpr	%g4, 0, %cleanwin			! Make *sure* we don't trap to cleanwin
4924	bge,a,pt	%xcc, 4b				! return to starting regwin
4925	 save	%g0, %g0, %g0				! This may force a datafault
4926
4927#ifdef DEBUG
4928	wrpr	%g0, 0, %tl
4929#endif
4930#ifdef TRAPSTATS
4931	set	_C_LABEL(rftuld), %g5
4932	lduw	[%g5], %g4
4933	inc	%g4
4934	stw	%g4, [%g5]
4935#endif
4936	!!
4937	!! We can't take any save faults in here 'cause they will never be serviced
4938	!!
4939
4940#ifdef DEBUG
4941	sethi	%hi(CPCB), %g5
4942	LDPTR	[%g5 + %lo(CPCB)], %g5
4943	ldub	[%g5 + PCB_NSAVED], %g5		! Any saved reg windows?
4944	tst	%g5
4945	tnz	%icc, 1; nop			! Debugger if we still have saved windows
4946	bne,a	rft_user			! Try starting over again
4947	 sethi	%hi(_C_LABEL(want_ast)), %g7
4948#endif
4949	/*
4950	 * Set up our return trapframe so we can recover if we trap from here
4951	 * on in.
4952	 */
4953	wrpr	%g0, 1, %tl			! Set up the trap state
4954	wrpr	%g2, 0, %tpc
4955	wrpr	%g3, 0, %tnpc
4956	ba,pt	%icc, 6f
4957	 wrpr	%g1, %g0, %tstate
4958
49595:
4960	/*
4961	 * Set up our return trapframe so we can recover if we trap from here
4962	 * on in.
4963	 */
4964	wrpr	%g0, 1, %tl			! Set up the trap state
4965	wrpr	%g2, 0, %tpc
4966	wrpr	%g3, 0, %tnpc
4967	wrpr	%g1, %g0, %tstate
4968	restore
49696:
4970	CHKPT(%g4,%g7,0xa)
4971	rdpr	%canrestore, %g5
4972	wrpr	%g5, 0, %cleanwin			! Force cleanup of kernel windows
4973
4974#ifdef NOTDEF_DEBUG
4975	ldx	[%g6 + CC64FSZ + STKB + TF_L + (0*8)], %g5! DEBUG -- get proper value for %l0
4976	cmp	%l0, %g5
4977	be,a,pt %icc, 1f
4978	 nop
4979!	sir			! WATCHDOG
4980	set	badregs, %g1	! Save the suspect regs
4981	stw	%l0, [%g1+(4*0)]
4982	stw	%l1, [%g1+(4*1)]
4983	stw	%l2, [%g1+(4*2)]
4984	stw	%l3, [%g1+(4*3)]
4985	stw	%l4, [%g1+(4*4)]
4986	stw	%l5, [%g1+(4*5)]
4987	stw	%l6, [%g1+(4*6)]
4988	stw	%l7, [%g1+(4*7)]
4989	stw	%i0, [%g1+(4*8)+(4*0)]
4990	stw	%i1, [%g1+(4*8)+(4*1)]
4991	stw	%i2, [%g1+(4*8)+(4*2)]
4992	stw	%i3, [%g1+(4*8)+(4*3)]
4993	stw	%i4, [%g1+(4*8)+(4*4)]
4994	stw	%i5, [%g1+(4*8)+(4*5)]
4995	stw	%i6, [%g1+(4*8)+(4*6)]
4996	stw	%i7, [%g1+(4*8)+(4*7)]
4997	save
4998	inc	%g7
4999	wrpr	%g7, 0, %otherwin
5000	wrpr	%g0, 0, %canrestore
5001	wrpr	%g0, WSTATE_KERN, %wstate	! Need to know where our sp points
5002	set	rft_wcnt, %g4	! Restore nsaved before trapping
5003	sethi	%hi(CPCB), %g6
5004	LDPTR	[%g6 + %lo(CPCB)], %g6
5005	lduw	[%g4], %g4
5006	stb	%g4, [%g6 + PCB_NSAVED]
5007	ta	1
5008	sir
5009	.data
5010badregs:
5011	.space	16*4
5012	.text
50131:
5014#endif
5015
5016	rdpr	%tstate, %g1
5017	rdpr	%cwp, %g7			! Find our cur window
5018	andn	%g1, CWP, %g1			! Clear it from %tstate
5019	wrpr	%g1, %g7, %tstate		! Set %tstate with %cwp
5020	CHKPT(%g4,%g7,0xb)
5021
5022	wr	%g0, ASI_DMMU, %asi		! restore the user context
5023	ldxa	[CTX_SECONDARY] %asi, %g4
5024	sethi	%hi(KERNBASE), %g7		! Should not be needed due to retry
5025	stxa	%g4, [CTX_PRIMARY] %asi
5026	membar	#Sync				! Should not be needed due to retry
5027	flush	%g7				! Should not be needed due to retry
5028	CLRTT
5029	CHKPT(%g4,%g7,0xd)
5030#ifdef TRAPTRACE
5031	set	trap_trace, %g2
5032	lduw	[%g2+TRACEDIS], %g4
5033	brnz,pn	%g4, 1f
5034	 nop
5035	lduw	[%g2+TRACEPTR], %g3
5036	rdpr	%tl, %g4
5037	mov	1, %g5
5038	set	CURPROC, %g6
5039	sllx	%g4, 13, %g4
5040	LDPTR	[%g6], %g6
5041!	clr	%g6		! DISABLE PID
5042	or	%g4, %g5, %g4
5043	mov	%g0, %g5
5044	brz,pn	%g6, 2f
5045	 andncc	%g3, (TRACESIZ-1), %g0
5046!	ldsw	[%g6+P_PID], %g5	! Load PID
50472:
5048
5049	set	CPCB, %g6	! Load up nsaved
5050	LDPTR	[%g6], %g6
5051	ldub	[%g6 + PCB_NSAVED], %g6
5052	sllx	%g6, 9, %g6
5053	or	%g6, %g4, %g4
5054
5055	movnz	%icc, %g0, %g3	! Wrap if needed
5056	rdpr	%tstate, %g6
5057	rdpr	%tpc, %g7
5058	sth	%g4, [%g2+%g3]
5059	inc	2, %g3
5060	sth	%g5, [%g2+%g3]
5061	inc	2, %g3
5062	stw	%g6, [%g2+%g3]
5063	inc	4, %g3
5064	stw	%sp, [%g2+%g3]
5065	inc	4, %g3
5066	stw	%g7, [%g2+%g3]
5067	inc	4, %g3
5068	mov	TLB_TAG_ACCESS, %g7
5069	ldxa	[%g7] ASI_DMMU, %g7
5070	stw	%g7, [%g2+%g3]
5071	inc	4, %g3
5072	stw	%g3, [%g2+TRACEPTR]
50731:
5074#endif
5075#ifdef TRAPSTATS
5076	set	_C_LABEL(rftudone), %g1
5077	lduw	[%g1], %g2
5078	inc	%g2
5079	stw	%g2, [%g1]
5080#endif
5081#ifdef DEBUG
5082	sethi	%hi(CPCB), %g5
5083	LDPTR	[%g5 + %lo(CPCB)], %g5
5084	ldub	[%g5 + PCB_NSAVED], %g5		! Any saved reg windows?
5085	tst	%g5
5086	tnz	%icc, 1; nop			! Debugger if we still have saved windows!
5087#endif
5088	wrpr	%g0, 0, %pil			! Enable all interrupts
5089	retry
5090
5091! exported end marker for kernel gdb
5092	.globl	_C_LABEL(endtrapcode)
5093_C_LABEL(endtrapcode):
5094
5095#ifdef DDB
5096!!!
5097!!! Dump the DTLB to phys address in %o0 and print it
5098!!!
5099!!! Only toast a few %o registers
5100!!!
5101	.globl	dump_dtlb
5102dump_dtlb:
5103	clr	%o1
5104	add	%o1, (64*8), %o3
51051:
5106	ldxa	[%o1] ASI_DMMU_TLB_TAG, %o2
5107	membar	#Sync
5108	stx	%o2, [%o0]
5109	membar	#Sync
5110	inc	8, %o0
5111	ldxa	[%o1] ASI_DMMU_TLB_DATA, %o4
5112	membar	#Sync
5113	inc	8, %o1
5114	stx	%o4, [%o0]
5115	cmp	%o1, %o3
5116	membar	#Sync
5117	bl	1b
5118	 inc	8, %o0
5119
5120	retl
5121	 nop
5122#endif /* DDB */
5123#if defined(DDB)
5124	.globl	print_dtlb
5125print_dtlb:
5126#ifdef _LP64
5127	save	%sp, -CC64FSZ, %sp
5128	clr	%l1
5129	add	%l1, (64*8), %l3
5130	clr	%l2
51311:
5132	ldxa	[%l1] ASI_DMMU_TLB_TAG, %o2
5133	membar	#Sync
5134	mov	%l2, %o1
5135	ldxa	[%l1] ASI_DMMU_TLB_DATA, %o3
5136	membar	#Sync
5137	inc	%l2
5138	set	2f, %o0
5139	call	_C_LABEL(db_printf)
5140	 inc	8, %l1
5141
5142	ldxa	[%l1] ASI_DMMU_TLB_TAG, %o2
5143	membar	#Sync
5144	mov	%l2, %o1
5145	ldxa	[%l1] ASI_DMMU_TLB_DATA, %o3
5146	membar	#Sync
5147	inc	%l2
5148	set	3f, %o0
5149	call	_C_LABEL(db_printf)
5150	 inc	8, %l1
5151
5152	cmp	%l1, %l3
5153	bl	1b
5154	 inc	8, %l0
5155
5156	ret
5157	 restore
5158	.data
51592:
5160	.asciz	"%2d:%016lx %016lx "
51613:
5162	.asciz	"%2d:%016lx %016lx\r\n"
5163	.text
5164#else
5165	save	%sp, -CC64FSZ, %sp
5166	clr	%l1
5167	add	%l1, (64*8), %l3
5168	clr	%l2
51691:
5170	ldxa	[%l1] ASI_DMMU_TLB_TAG, %o2
5171	membar	#Sync
5172	srl	%o2, 0, %o3
5173	mov	%l2, %o1
5174	srax	%o2, 32, %o2
5175	ldxa	[%l1] ASI_DMMU_TLB_DATA, %o4
5176	membar	#Sync
5177	srl	%o4, 0, %o5
5178	inc	%l2
5179	srax	%o4, 32, %o4
5180	set	2f, %o0
5181	call	_C_LABEL(db_printf)
5182	 inc	8, %l1
5183
5184	ldxa	[%l1] ASI_DMMU_TLB_TAG, %o2
5185	membar	#Sync
5186	srl	%o2, 0, %o3
5187	mov	%l2, %o1
5188	srax	%o2, 32, %o2
5189	ldxa	[%l1] ASI_DMMU_TLB_DATA, %o4
5190	membar	#Sync
5191	srl	%o4, 0, %o5
5192	inc	%l2
5193	srax	%o4, 32, %o4
5194	set	3f, %o0
5195	call	_C_LABEL(db_printf)
5196	 inc	8, %l1
5197
5198	cmp	%l1, %l3
5199	bl	1b
5200	 inc	8, %l0
5201
5202	ret
5203	 restore
5204	.data
52052:
5206	.asciz	"%2d:%08x:%08x %08x:%08x "
52073:
5208	.asciz	"%2d:%08x:%08x %08x:%08x\r\n"
5209	.text
5210#endif
5211#endif
5212
5213	.align	8
5214dostart:
5215	wrpr	%g0, 0, %tick	! XXXXXXX clear %tick register for now
5216	mov	1, %g1
5217	sllx	%g1, 63, %g1
5218	wr	%g1, TICK_CMPR	! XXXXXXX clear and disable %tick_cmpr as well
5219	/*
5220	 * Startup.
5221	 *
5222	 * The Sun FCODE bootloader is nice and loads us where we want
5223	 * to be.  We have a full set of mappings already set up for us.
5224	 *
5225	 * I think we end up having an entire 16M allocated to us.
5226	 *
5227	 * We enter with the prom entry vector in %o0, dvec in %o1,
5228	 * and the bootops vector in %o2.
5229	 *
5230	 * All we need to do is:
5231	 *
5232	 *	1:	Save the prom vector
5233	 *
5234	 *	2:	Create a decent stack for ourselves
5235	 *
5236	 *	3:	Install the permanent 4MB kernel mapping
5237	 *
5238	 *	4:	Call the C language initialization code
5239	 *
5240	 */
5241
5242	/*
5243	 * Set the psr into a known state:
5244	 * Set supervisor mode, interrupt level >= 13, traps enabled
5245	 */
5246	wrpr	%g0, 13, %pil
5247	wrpr	%g0, PSTATE_INTR|PSTATE_PEF, %pstate
5248	wr	%o0, FPRS_FEF, %fprs		! Turn on FPU
5249#ifdef DDB
5250	/*
5251	 * First, check for DDB arguments.  A pointer to an argument
5252	 * is passed in %o1 who's length is passed in %o2.  Our
5253	 * bootloader passes in a magic number as the first argument,
5254	 * followed by esym as argument 2, so check that %o2 == 8,
5255	 * then extract esym and check the magic number.
5256	 *
5257	 *  Oh, yeah, start of elf symtab is arg 3.
5258	 */
5259	cmp	%o2, 8
5260	blt	1f			! Not enuff args
5261
5262	/*
5263	 * First we'll see if we were loaded by a 64-bit bootloader
5264	 */
5265	 btst	0x7, %o1		! Check alignment
5266	bne	0f
5267	 set	0x44444230, %l3
5268
5269	ldx	[%o1], %l4
5270	cmp	%l3, %l4		! chk magic
5271	bne	%xcc, 0f
5272	 nop
5273
5274	ldx	[%o1+8], %l4
5275	sethi	%hi(_C_LABEL(esym)), %l3	! store _esym
5276	STPTR	%l4, [%l3 + %lo(_C_LABEL(esym))]
5277
5278	cmp	%o2, 12
5279	blt	1f
5280	 nop
5281
5282	ldx	[%o1+16], %l4
5283	sethi	%hi(_C_LABEL(ssym)), %l3	! store _esym
5284	ba	1f
5285	 STPTR	%l4, [%l3 + %lo(_C_LABEL(ssym))]
52860:
5287	/*
5288	 * Now we can try again with for a 32-bit bootloader
5289	 */
5290	cmp	%o2, 8
5291	blt	1f			! Not enuff args
5292
5293	 set	0x44444230, %l3
5294	ld	[%o1], %l4
5295	cmp	%l3, %l4		! chk magic
5296	bne	1f
5297	 nop
5298
5299	ld	[%o1+4], %l4
5300	sethi	%hi(_C_LABEL(esym)), %l3	! store _esym
5301	STPTR	%l4, [%l3 + %lo(_C_LABEL(esym))]
5302
5303	cmp	%o2, 12
5304	blt	1f
5305	 nop
5306
5307	ld	[%o1+8], %l4
5308	sethi	%hi(_C_LABEL(ssym)), %l3	! store _esym
5309	STPTR	%l4, [%l3 + %lo(_C_LABEL(ssym))]
53101:
5311#endif
5312	/*
5313	 * Step 1: Save rom entry pointer
5314	 */
5315
5316	mov	%o4, %g7	! save prom vector pointer
5317	set	romp, %o5
5318	STPTR	%o4, [%o5]	! It's initialized data, I hope
5319
5320	/*
5321	 * Step 2: Set up a v8-like stack if we need to
5322	 */
5323
5324#ifdef _LP64
5325	btst	1, %sp
5326	bnz,pt	%icc, 0f
5327	 nop
5328	add	%sp, -BIAS, %sp
5329#else
5330	btst	1, %sp
5331	bz,pt	%icc, 0f
5332	 nop
5333	add	%sp, BIAS, %sp
5334#endif
53350:
5336	/*
5337	 * Step 3: clear BSS.  This may just be paranoia; the boot
5338	 * loader might already do it for us; but what the hell.
5339	 */
5340	set	_C_LABEL(edata), %o0		! bzero(edata, end - edata)
5341	set	_C_LABEL(end), %o1
5342	call	_C_LABEL(bzero)
5343	 sub	%o1, %o0, %o1
5344
5345	/*
5346	 * Step 4: compute number of windows and set up tables.
5347	 * We could do some of this later.
5348	 *
5349	 * XXX I forget: why are we doing this?
5350	 */
5351	rdpr	%ver, %g1
5352	and	%g1, 0x0f, %g1		! want just the CWP bits
5353	add	%g1, 1, %o0		! compute nwindows
5354	sethi	%hi(_C_LABEL(nwindows)), %o1	! may as well tell everyone
5355	st	%o0, [%o1 + %lo(_C_LABEL(nwindows))]
5356
5357#if 0
5358	/*
5359	 * Disable the DCACHE entirely for debug.
5360	 */
5361	ldxa	[%g0] ASI_MCCR, %o1
5362	andn	%o1, MCCR_DCACHE_EN, %o1
5363	stxa	%o1, [%g0] ASI_MCCR
5364	membar	#Sync
5365#endif
5366
5367	/*
5368	 * Ready to run C code; finish bootstrap.
5369	 */
5370	set	CTX_SECONDARY, %o1		! Store -1 in the context register
5371	mov	-1, %o2
5372	stxa	%o2, [%o1] ASI_DMMU
5373	membar	#Sync
5374	ldxa	[%o1] ASI_DMMU, %o0		! then read it back
5375	membar	#Sync
5376	stxa	%g0, [%o1] ASI_DMMU
5377	membar	#Sync
5378	clr	%g4				! Clear data segment pointer
5379	call	_C_LABEL(bootstrap)
5380	 inc	%o0				! and add 1 to discover maxctx
5381
5382	/*
5383	 * pmap_bootstrap should have allocated a stack for proc 0 and
5384	 * stored the start and end in u0 and estack0.  Switch to that
5385	 * stack now.
5386	 */
5387
5388/*
5389 * Initialize a CPU.  This is used both for bootstrapping the first CPU
5390 * and spinning up each subsequent CPU.  Basically:
5391 *
5392 *	Establish the 4MB locked mappings for kernel data and text.
5393 *	Locate the cpu_info structure for this CPU.
5394 *	Establish a locked mapping for interrupt stack.
5395 *	Switch to the initial stack.
5396 *	Call the routine passed in in cpu_info->ci_spinup
5397 */
5398
5399
5400_C_LABEL(cpu_initialize):
5401	/*
5402	 * Step 5: install the permanent 4MB kernel mapping in both the
5403	 * immu and dmmu.  We will clear out other mappings later.
5404	 *
5405	 * Register usage in this section:
5406	 *
5407	 *	%l0 = ktext (also KERNBASE)
5408	 *	%l1 = ektext
5409	 *	%l2 = ktextp/TTE Data for text w/o low bits
5410	 *	%l3 = kdata (also DATA_START)
5411	 *	%l4 = ekdata
5412	 *	%l5 = kdatap/TTE Data for data w/o low bits
5413	 *	%l6 = 4MB
5414	 *	%l7 = 4MB-1
5415	 *	%o0-%o5 = tmp
5416	 */
5417
5418#ifdef	NO_VCACHE
5419	!! Turn off D$ in LSU
5420	ldxa	[%g0] ASI_LSU_CONTROL_REGISTER, %g1
5421	bclr	MCCR_DCACHE_EN, %g1
5422	stxa	%g1, [%g0] ASI_LSU_CONTROL_REGISTER
5423	membar	#Sync
5424#endif
5425
5426	wrpr	%g0, 0, %tl			! Make sure we're not in NUCLEUS mode
5427	sethi	%hi(KERNBASE), %l0		! Find our xlation
5428	sethi	%hi(DATA_START), %l3
5429
5430	set	_C_LABEL(ktextp), %l2		! Find phys addr
5431	ldx	[%l2], %l2			! The following gets ugly:	We need to load the following mask
5432	set	_C_LABEL(kdatap), %l5
5433	ldx	[%l5], %l5
5434
5435	set	_C_LABEL(ektext), %l1		! And the ends...
5436	LDPTR	[%l1], %l1
5437	set	_C_LABEL(ekdata), %l4
5438	LDPTR	[%l4], %l4
5439
5440	sethi	%hi(0xe0000000), %o0		! V=1|SZ=11|NFO=0|IE=0
5441	sllx	%o0, 32, %o0			! Shift it into place
5442
5443	sethi	%hi(0x400000), %l6		! Create a 4MB mask
5444	add	%l6, -1, %l7
5445
5446	mov	-1, %o1				! Create a nice mask
5447	sllx	%o1, 41, %o1			! Mask off high bits
5448	or	%o1, 0xfff, %o1			! We can just load this in 12 (of 13) bits
5449
5450	andn	%l2, %o1, %l2			! Mask the phys page number
5451	andn	%l5, %o1, %l5			! Mask the phys page number
5452
5453	or	%l2, %o0, %l2			! Now take care of the high bits
5454	or	%l5, %o0, %l5			! Now take care of the high bits
5455
5456	wrpr	%g0, PSTATE_KERN, %pstate	! Disable interrupts
5457
5458#ifdef DEBUG
5459	set	_C_LABEL(pmapdebug), %o1
5460	ld	[%o1], %o1
5461	sethi	%hi(0x40000), %o2
5462	btst	%o2, %o1
5463	bz	0f
5464
5465	 set	1f, %o0		! Debug printf for TEXT page
5466	srlx	%l0, 32, %o1
5467	srl	%l0, 0, %o2
5468	or	%l2, TTE_L|TTE_CP|TTE_CV|TTE_P, %o4	! And low bits:	L=1|CP=1|CV=1|E=0|P=1|W=1(ugh)|G=0
5469	srlx	%o4, 32, %o3
5470	call	_C_LABEL(prom_printf)
5471	 srl	%o4, 0, %o4
5472
5473	set	1f, %o0		! Debug printf for DATA page
5474	srlx	%l3, 32, %o1
5475	srl	%l3, 0, %o2
5476	or	%l5, TTE_L|TTE_CP|TTE_CV|TTE_P|TTE_W, %o4	! And low bits:	L=1|CP=1|CV=1|E=0|P=1|W=1(ugh)|G=0
5477	srlx	%o4, 32, %o3
5478	call	_C_LABEL(prom_printf)
5479	 srl	%o4, 0, %o4
5480	.data
54811:
5482	.asciz	"Setting DTLB entry %08x %08x data %08x %08x\r\n"
5483	_ALIGN
5484	.text
54850:
5486#endif
5487	mov	%l0, %o0			! Demap all of kernel dmmu text segment
5488	mov	%l3, %o1
5489	set	0x2000, %o2			! 8K page size
5490	add	%l1, %l7, %o5			! Extend to 4MB boundary
5491	andn	%o5, %l7, %o5
54920:
5493	stxa	%o0, [%o0] ASI_DMMU_DEMAP	! Demap text segment
5494	membar	#Sync
5495	cmp	%o0, %o5
5496	bleu	0b
5497	 add	%o0, %o2, %o0
5498
5499	add	%l4, %l7, %o5			! Extend to 4MB boundary
5500	andn	%o5, %l7, %o5
55010:
5502	stxa	%o1, [%o1] ASI_DMMU_DEMAP	! Demap data segment
5503	membar	#Sync
5504	cmp	%o1, %o5
5505	bleu	0b
5506	 add	%o1, %o2, %o1
5507
5508	set	(1<<14)-8, %o0			! Clear out DCACHE
55091:
5510	stxa	%g0, [%o0] ASI_DCACHE_TAG	! clear DCACHE line
5511	membar	#Sync
5512	brnz,pt	%o0, 1b
5513	 dec	8, %o0
5514
5515	/*
5516	 * First map data segment into the DMMU.
5517	 */
5518	set	TLB_TAG_ACCESS, %o0		! Now map it back in with a locked TTE
5519	mov	%l3, %o1
5520#ifdef NO_VCACHE
5521	! And low bits:	L=1|CP=1|CV=0(ugh)|E=0|P=1|W=1|G=0
5522	or	%l5, TTE_L|TTE_CP|TTE_P|TTE_W, %o2
5523#else
5524	! And low bits:	L=1|CP=1|CV=1|E=0|P=1|W=1|G=0
5525	or	%l5, TTE_L|TTE_CP|TTE_CV|TTE_P|TTE_W, %o2
5526#endif
5527	set	1f, %o5
55282:
5529	stxa	%o1, [%o0] ASI_DMMU		! Set VA for DSEG
5530	membar	#Sync				! We may need more membar #Sync in here
5531	stxa	%o2, [%g0] ASI_DMMU_DATA_IN	! Store TTE for DSEG
5532	membar	#Sync				! We may need more membar #Sync in here
5533	flush	%o5				! Make IMMU see this too
55341:
5535	add	%o1, %l6, %o1			! increment VA
5536	cmp	%o1, %l4			! Next 4MB mapping....
5537	blu,pt	%xcc, 2b
5538	 add	%o2, %l6, %o2			! Increment tag
5539
5540	/*
5541	 * Next map the text segment into the DMMU so we can get at RODATA.
5542	 */
5543	mov	%l0, %o1
5544#ifdef NO_VCACHE
5545	! And low bits:	L=1|CP=1|CV=0(ugh)|E=0|P=1|W=0|G=0
5546	or	%l2, TTE_L|TTE_CP|TTE_P, %o2
5547#else
5548	! And low bits:	L=1|CP=1|CV=1|E=0|P=1|W=0|G=0
5549	or	%l2, TTE_L|TTE_CP|TTE_CV|TTE_P, %o2
5550#endif
55512:
5552	stxa	%o1, [%o0] ASI_DMMU		! Set VA for DSEG
5553	membar	#Sync				! We may need more membar #Sync in here
5554	stxa	%o2, [%g0] ASI_DMMU_DATA_IN	! Store TTE for DSEG
5555	membar	#Sync				! We may need more membar #Sync in here
5556	flush	%o5				! Make IMMU see this too
5557	add	%o1, %l6, %o1			! increment VA
5558	cmp	%o1, %l1			! Next 4MB mapping....
5559	blu,pt	%xcc, 2b
5560	 add	%o2, %l6, %o2			! Increment tag
5561
5562#ifdef DEBUG
5563	set	_C_LABEL(pmapdebug), %o1
5564	ld	[%o1], %o1
5565	sethi	%hi(0x40000), %o2
5566	btst	%o2, %o1
5567	bz	0f
5568
5569	set	1f, %o0		! Debug printf
5570	srlx	%l0, 32, %o1
5571	srl	%l0, 0, %o2
5572	or	%l2, TTE_L|TTE_CP|TTE_CV|TTE_P, %o4
5573	srlx	%o4, 32, %o3
5574	call	_C_LABEL(prom_printf)
5575	 srl	%o4, 0, %o4
5576	.data
55771:
5578	.asciz	"Setting ITLB entry %08x %08x data %08x %08x\r\n"
5579	_ALIGN
5580	.text
55810:
5582#endif
5583	/*
5584	 * Finished the DMMU, now we need to do the IMMU which is more
5585	 * difficult because we're execting instructions through the IMMU
5586	 * while we're flushing it.  We need to remap the entire kernel
5587	 * to a new context, flush the entire context 0 IMMU, map it back
5588	 * into context 0, switch to context 0, and flush context 1.
5589	 *
5590	 * Another interesting issue is that the flush instructions are
5591	 * translated through the DMMU, therefore we need to enter the
5592	 * mappings both in the IMMU and the DMMU so we can flush them
5593	 * correctly.
5594	 *
5595	 *  Start by mapping in the kernel text as context==1
5596	 */
5597	set	TLB_TAG_ACCESS, %o0
5598	or	%l0, 1, %o1			! Context = 1
5599	or	%l2, TTE_CP|TTE_P, %o2		! And low bits:	L=0|CP=1|CV=0|E=0|P=1|G=0
5600	set	1f, %o5
56012:
5602	stxa	%o1, [%o0] ASI_DMMU		! Make DMMU point to it
5603	membar	#Sync				! We may need more membar #Sync in here
5604	stxa	%o2, [%g0] ASI_DMMU_DATA_IN	! Store it
5605	membar	#Sync				! We may need more membar #Sync in here
5606	stxa	%o1, [%o0] ASI_IMMU		! Make IMMU point to it
5607	membar	#Sync				! We may need more membar #Sync in here
5608	flush	%o1-1				! Make IMMU see this too
5609	stxa	%o2, [%g0] ASI_IMMU_DATA_IN	! Store it
5610	membar	#Sync				! We may need more membar #Sync in here
5611	flush	%o5				! Make IMMU see this too
56121:
5613	add	%o1, %l6, %o1			! increment VA
5614	cmp	%o1, %l1			! Next 4MB mapping....
5615	blu,pt	%xcc, 2b
5616	 add	%o2, %l6, %o2			! Increment tag
5617
5618	!!
5619	!! Load 1 as primary context
5620	!!
5621	mov	1, %o0
5622	mov	CTX_PRIMARY, %o1
5623	stxa	%o0, [%o1] ASI_DMMU
5624	wrpr	%g0, 0, %tl			! Make SURE we're nucleus mode
5625	membar	#Sync				! This probably should be a flush, but it works
5626	flush	%o5				! This should be KERNBASE
5627
5628	!!
5629	!! Demap entire context 0 kernel
5630	!!
5631	or	%l0, DEMAP_PAGE_NUCLEUS, %o0	! Context = Nucleus
5632	add	%l1, %l7, %o1			! Demap all of kernel text seg
5633	andn	%o1, %l7, %o1			! rounded up to 4MB.
5634	set	0x2000, %o2			! 8K page size
56350:
5636	stxa	%o0, [%o0] ASI_IMMU_DEMAP	! Demap it
5637	membar	#Sync
5638	flush	%o5				! Assume low bits are benign
5639	cmp	%o0, %o1
5640	bleu,pt	%xcc, 0b			! Next page
5641	 add	%o0, %o2, %o0
5642
5643	or	%l3, DEMAP_PAGE_NUCLEUS, %o0	! Context = Nucleus
5644	add	%l4, %l7, %o1			! Demap all of kernel data seg
5645	andn	%o1, %l7, %o1			! rounded up to 4MB.
56460:
5647	stxa	%o0, [%o0] ASI_IMMU_DEMAP	! Demap it
5648	membar	#Sync
5649	flush	%o5				! Assume low bits are benign
5650	cmp	%o0, %o1
5651	bleu,pt	%xcc, 0b			! Next page
5652	 add	%o0, %o2, %o0
5653
5654	!!
5655	!!  Now, map in the kernel text as context==0
5656	!!
5657	set	TLB_TAG_ACCESS, %o0
5658	mov	%l0, %o1			! Context = 0
5659#ifdef NO_VCACHE
5660	! And low bits:	L=1|CP=1|CV=0(ugh)|E=0|P=1|W=1|G=0
5661	or	%l2, TTE_L|TTE_CP|TTE_P, %o2
5662#else
5663	! And low bits:	L=1|CP=1|CV=1|E=0|P=1|W=1|G=0
5664	or	%l2, TTE_L|TTE_CP|TTE_CV|TTE_P, %o2
5665#endif
56662:
5667	stxa	%o1, [%o0] ASI_IMMU		! Make IMMU point to it
5668	membar	#Sync				! We may need more membar #Sync in here
5669	stxa	%o2, [%g0] ASI_IMMU_DATA_IN	! Store it
5670	membar	#Sync				! We may need more membar #Sync in here
5671	flush	%o5				! Make IMMU see this too
5672	add	%o1, %l6, %o1			! increment VA
5673	cmp	%o1, %l1			! Next 4MB mapping....
5674	blu,pt	%xcc, 2b
5675	 add	%o2, %l6, %o2			! Increment tag
5676
5677	!!
5678	!! Restore 0 as primary context
5679	!!
5680	mov	CTX_PRIMARY, %o0
5681	stxa	%g0, [%o0] ASI_DMMU
5682	membar	#Sync				! No real reason for this XXXX
5683	flush	%o5
5684
5685	!!
5686	!! Demap context 1
5687	!!
5688#ifdef SPITFIRE
5689	mov	1, %o1
5690	mov	CTX_SECONDARY, %o0
5691	stxa	%o1, [%o0] ASI_DMMU
5692	membar	#Sync				! This probably should be a flush, but it works
5693	flush	%l0
5694	mov	DEMAP_CTX_SECONDARY, %o4
5695	stxa	%o4, [%o4] ASI_DMMU_DEMAP
5696	membar	#Sync
5697	stxa	%o4, [%o4] ASI_IMMU_DEMAP
5698	membar	#Sync
5699	flush	%l0
5700	stxa	%g0, [%o0] ASI_DMMU
5701	membar	#Sync
5702	flush	%l0
5703#else
5704	mov	1, %o1
5705	wrpr	%g0, 1, %tl			! Enter nucleus context
5706	mov	CTX_PRIMARY, %o0
5707	stxa	%o1, [%o0] ASI_DMMU
5708	membar	#Sync				! This probably should be a flush, but it works
5709	flush	%l0
5710	mov	DEMAP_CTX_PRIMARY, %o4
5711	stxa	%o4, [%o4] ASI_DMMU_DEMAP
5712	membar	#Sync
5713	stxa	%o4, [%o4] ASI_IMMU_DEMAP
5714	membar	#Sync
5715	flush	%l0
5716	stxa	%g0, [%o0] ASI_DMMU
5717	membar	#Sync
5718	flush	%l0
5719	wrpr	%g0, 0, %tl			! Exit nucleus context
5720#endif
5721#ifdef DEBUG
5722	set	_C_LABEL(pmapdebug), %o1
5723	ld	[%o1], %o1
5724	sethi	%hi(0x40000), %o2
5725	btst	%o2, %o1
5726	bz	0f
5727
5728	set	1f, %o0		! Debug printf
5729	call	_C_LABEL(prom_printf)
5730	.data
57311:
5732	.asciz	"Setting CPUINFO mappings...\r\n"
5733	_ALIGN
5734	.text
57350:
5736#endif
5737
5738	/*
5739	 * Step 6: hunt through cpus list and find the one that
5740	 * matches our UPAID.
5741	 */
5742	sethi	%hi(_C_LABEL(cpus)), %l1
5743	ldxa	[%g0] ASI_MID_REG, %l2
5744	LDPTR	[%l1 + %lo(_C_LABEL(cpus))], %l1
5745	srax	%l2, 17, %l2			! Isolate UPAID from CPU reg
5746	and	%l2, 0x1f, %l2
57470:
5748	ld	[%l1 + CI_UPAID], %l3		! Load UPAID
5749	cmp	%l3, %l2			! Does it match?
5750	bne,a,pt	%icc, 0b		! no
5751	 ld	[%l1 + CI_NEXT], %l1		! Load next cpu_info pointer
5752
5753
5754	/*
5755	 * Get pointer to our cpu_info struct
5756	 */
5757
5758	ldx	[%l1 + CI_PADDR], %l1		! Load the interrupt stack's PA
5759
5760	sethi	%hi(0xa0000000), %l2		! V=1|SZ=01|NFO=0|IE=0
5761	sllx	%l2, 32, %l2			! Shift it into place
5762
5763	mov	-1, %l3				! Create a nice mask
5764	sllx	%l3, 41, %l4			! Mask off high bits
5765	or	%l4, 0xfff, %l4			! We can just load this in 12 (of 13) bits
5766
5767	andn	%l1, %l4, %l1			! Mask the phys page number
5768
5769	or	%l2, %l1, %l1			! Now take care of the high bits
5770#ifdef NO_VCACHE
5771	or	%l1, TTE_L|TTE_CP|TTE_P|TTE_W, %l2	! And low bits:	L=1|CP=1|CV=0|E=0|P=1|W=0|G=0
5772#else
5773	or	%l1, TTE_L|TTE_CP|TTE_CV|TTE_P|TTE_W, %l2	! And low bits:	L=1|CP=1|CV=1|E=0|P=1|W=0|G=0
5774#endif
5775
5776	!!
5777	!!  Now, map in the interrupt stack as context==0
5778	!!
5779	set	TLB_TAG_ACCESS, %l5
5780	set	1f, %o5
5781	sethi	%hi(INTSTACK), %l0
5782	stxa	%l0, [%l5] ASI_DMMU		! Make DMMU point to it
5783	membar	#Sync				! We may need more membar #Sync in here
5784	stxa	%l2, [%g0] ASI_DMMU_DATA_IN	! Store it
5785	membar	#Sync				! We may need more membar #Sync in here
5786	flush	%o5
57871:
5788!!! Make sure our stack's OK.
5789	flushw
5790	sethi	%hi(CPUINFO_VA+CI_INITSTACK), %l0
5791	LDPTR	[%l0 + %lo(CPUINFO_VA+CI_INITSTACK)], %l0
5792 	add	%l0, - CC64FSZ - 80, %l0	! via syscall(boot_me_up) or somesuch
5793#ifdef _LP64
5794	andn	%l0, 0x0f, %l0			! Needs to be 16-byte aligned
5795	sub	%l0, BIAS, %l0			! and biased
5796#endif
5797	mov	%l0, %sp
5798	flushw
5799
5800	/*
5801	 * Step 7: change the trap base register, and install our TSB
5802	 *
5803	 * XXXX -- move this to CPUINFO_VA+32KB?
5804	 */
5805	sethi	%hi(0x1fff), %l2
5806	set	_C_LABEL(tsb), %l0
5807	LDPTR	[%l0], %l0
5808	set	_C_LABEL(tsbsize), %l1
5809	or	%l2, %lo(0x1fff), %l2
5810	ld	[%l1], %l1
5811	andn	%l0, %l2, %l0			! Mask off size and split bits
5812	or	%l0, %l1, %l0			! Make a TSB pointer
5813!	srl	%l0, 0, %l0	! DEBUG -- make sure this is a valid pointer by zeroing the high bits
5814
5815#ifdef DEBUG
5816	set	_C_LABEL(pmapdebug), %o1
5817	ld	[%o1], %o1
5818	sethi	%hi(0x40000), %o2
5819	btst	%o2, %o1
5820	bz	0f
5821
5822	set	1f, %o0		! Debug printf
5823	srlx	%l0, 32, %o1
5824	call	_C_LABEL(prom_printf)
5825	 srl	%l0, 0, %o2
5826	.data
58271:
5828	.asciz	"Setting TSB pointer %08x %08x\r\n"
5829	_ALIGN
5830	.text
58310:
5832#endif
5833
5834	set	TSB, %l2
5835	stxa	%l0, [%l2] ASI_IMMU		! Install insn TSB pointer
5836	membar	#Sync				! We may need more membar #Sync in here
5837	stxa	%l0, [%l2] ASI_DMMU		! Install data TSB pointer
5838	membar	#Sync
5839	set	_C_LABEL(trapbase), %l1
5840	call	_C_LABEL(prom_set_trap_table)	! Now we should be running 100% from our handlers
5841	 mov	%l1, %o0
5842	wrpr	%l1, 0, %tba			! Make sure the PROM didn't foul up.
5843	wrpr	%g0, WSTATE_KERN, %wstate
5844
5845#ifdef DEBUG
5846	wrpr	%g0, 1, %tl			! Debug -- start at tl==3 so we'll watchdog
5847	wrpr	%g0, 0x1ff, %tt			! Debug -- clear out unused trap regs
5848	wrpr	%g0, 0, %tpc
5849	wrpr	%g0, 0, %tnpc
5850	wrpr	%g0, 0, %tstate
5851#endif
5852
5853#ifdef NOTDEF_DEBUG
5854	set	1f, %o0		! Debug printf
5855	srax	%l0, 32, %o1
5856	call	_C_LABEL(prom_printf)
5857	 srl	%l0, 0, %o2
5858	.data
58591:
5860	.asciz	"Our trap handler is enabled\r\n"
5861	_ALIGN
5862	.text
5863#endif
5864	/*
5865	 * Call our startup routine.
5866	 */
5867
5868	sethi	%hi(CPUINFO_VA+CI_SPINUP), %l0
5869	LDPTR	[%l0 + %lo(CPUINFO_VA+CI_SPINUP)], %o1
5870
5871	call	%o1				! Call routine
5872	 clr	%o0				! our frame arg is ignored
5873	NOTREACHED
5874
5875	set	1f, %o0				! Main should never come back here
5876	call	_C_LABEL(panic)
5877	 nop
5878	.data
58791:
5880	.asciz	"main() returned\r\n"
5881	_ALIGN
5882	.text
5883
5884/*
5885 * openfirmware(cell* param);
5886 *
5887 * OpenFirmware entry point
5888 *
5889 * If we're running in 32-bit mode we need to convert to a 64-bit stack
5890 * and 64-bit cells.  The cells we'll allocate off the stack for simplicity.
5891 */
5892	.align 8
5893ENTRY(openfirmware)
5894	sethi	%hi(romp), %o4
5895	andcc	%sp, 1, %g0
5896	bz,pt	%icc, 1f
5897	 LDPTR	[%o4+%lo(romp)], %o4		! v9 stack, just load the addr and callit
5898	save	%sp, -CC64FSZ, %sp
5899	rdpr	%pil, %i2
5900	mov	PIL_HIGH, %i3
5901	cmp	%i3, %i2
5902	movle	%icc, %i2, %i3
5903	wrpr	%g0, %i3, %pil
5904	mov	%i0, %o0
5905	mov	%g1, %l1
5906	mov	%g2, %l2
5907	mov	%g3, %l3
5908	mov	%g4, %l4
5909	mov	%g5, %l5
5910	mov	%g6, %l6
5911	mov	%g7, %l7
5912	rdpr	%pstate, %l0
5913	jmpl	%i4, %o7
5914#if !defined(_LP64) || defined(TRAPTRACE)
5915	 wrpr	%g0, PSTATE_PROM, %pstate
5916#else
5917	 wrpr	%g0, PSTATE_PROM|PSTATE_IE, %pstate
5918#endif
5919	wrpr	%l0, %g0, %pstate
5920	mov	%l1, %g1
5921	mov	%l2, %g2
5922	mov	%l3, %g3
5923	mov	%l4, %g4
5924	mov	%l5, %g5
5925	mov	%l6, %g6
5926	mov	%l7, %g7
5927	wrpr	%i2, 0, %pil
5928	ret
5929	 restore	%o0, %g0, %o0
5930
59311:	! v8 -- need to screw with stack & params
5932#ifdef NOTDEF_DEBUG
5933	mov	%o7, %o5
5934	call	globreg_check
5935	 nop
5936	mov	%o5, %o7
5937#endif
5938	save	%sp, -CC64FSZ, %sp		! Get a new 64-bit stack frame
5939	add	%sp, -BIAS, %sp
5940	rdpr	%pstate, %l0
5941	srl	%sp, 0, %sp
5942	rdpr	%pil, %i2	! s = splx(level)
5943	mov	%i0, %o0
5944	mov	PIL_HIGH, %i3
5945	mov	%g1, %l1
5946	mov	%g2, %l2
5947	cmp	%i3, %i2
5948	mov	%g3, %l3
5949	mov	%g4, %l4
5950	mov	%g5, %l5
5951	movle	%icc, %i2, %i3
5952	mov	%g6, %l6
5953	mov	%g7, %l7
5954	wrpr	%i3, %g0, %pil
5955	jmpl	%i4, %o7
5956	! Enable 64-bit addresses for the prom
5957#if defined(_LP64) || defined(TRAPTRACE)
5958	 wrpr	%g0, PSTATE_PROM, %pstate
5959#else
5960	 wrpr	%g0, PSTATE_PROM|PSTATE_IE, %pstate
5961#endif
5962	wrpr	%l0, 0, %pstate
5963	wrpr	%i2, 0, %pil
5964	mov	%l1, %g1
5965	mov	%l2, %g2
5966	mov	%l3, %g3
5967	mov	%l4, %g4
5968	mov	%l5, %g5
5969	mov	%l6, %g6
5970	mov	%l7, %g7
5971	ret
5972	 restore	%o0, %g0, %o0
5973
5974/*
5975 * tlb_flush_pte(vaddr_t va, int ctx)
5976 *
5977 * Flush tte from both IMMU and DMMU.
5978 *
5979 */
5980	.align 8
5981ENTRY(tlb_flush_pte)
5982#ifdef DEBUG
5983	set	DATA_START, %o4				! Forget any recent TLB misses
5984	stx	%g0, [%o4]
5985	stx	%g0, [%o4+16]
5986#endif
5987#ifdef DEBUG
5988	set	pmapdebug, %o3
5989	lduw	[%o3], %o3
5990!	movrz	%o1, -1, %o3				! Print on either pmapdebug & PDB_DEMAP or ctx == 0
5991	btst	0x0020, %o3
5992	bz,pt	%icc, 2f
5993	 nop
5994	save	%sp, -CC64FSZ, %sp
5995	set	1f, %o0
5996	mov	%i1, %o1
5997	andn	%i0, 0xfff, %o3
5998	or	%o3, 0x010, %o3
5999	call	_C_LABEL(printf)
6000	 mov	%i0, %o2
6001	restore
6002	.data
60031:
6004	.asciz	"tlb_flush_pte:	demap ctx=%x va=%08x res=%x\r\n"
6005	_ALIGN
6006	.text
60072:
6008#endif
6009#ifdef	SPITFIRE
6010	mov	CTX_SECONDARY, %o2
6011	andn	%o0, 0xfff, %g2				! drop unused va bits
6012	ldxa	[%o2] ASI_DMMU, %g1			! Save secondary context
6013	sethi	%hi(KERNBASE), %o4
6014	membar	#LoadStore
6015	stxa	%o1, [%o2] ASI_DMMU			! Insert context to demap
6016	membar	#Sync
6017	or	%g2, DEMAP_PAGE_SECONDARY, %g2		! Demap page from secondary context only
6018	stxa	%g2, [%g2] ASI_DMMU_DEMAP		! Do the demap
6019	membar	#Sync
6020	stxa	%g2, [%g2] ASI_IMMU_DEMAP		! to both TLBs
6021	membar	#Sync					! No real reason for this XXXX
6022	flush	%o4
6023	srl	%g2, 0, %g2				! and make sure it's both 32- and 64-bit entries
6024	stxa	%g2, [%g2] ASI_DMMU_DEMAP		! Do the demap
6025	membar	#Sync
6026	stxa	%g2, [%g2] ASI_IMMU_DEMAP		! Do the demap
6027	membar	#Sync					! No real reason for this XXXX
6028	flush	%o4
6029	stxa	%g1, [%o2] ASI_DMMU			! Restore secondary asi
6030	membar	#Sync					! No real reason for this XXXX
6031	flush	%o4
6032	retl
6033	 nop
6034#else
6035	!!
6036	!! Cheetahs do not support flushing the IMMU from secondary context
6037	!!
6038	rdpr	%tl, %o3
6039	mov	CTX_PRIMARY, %o2
6040	brnz,pt	%o3, 1f
6041	 andn	%o0, 0xfff, %g2				! drop unused va bits
6042	wrpr	%g0, 1, %tl				! Make sure we're NUCLEUS
60431:
6044	ldxa	[%o2] ASI_DMMU, %g1			! Save secondary context
6045	sethi	%hi(KERNBASE), %o4
6046	membar	#LoadStore
6047	stxa	%o1, [%o2] ASI_DMMU			! Insert context to demap
6048	membar	#Sync
6049	or	%g2, DEMAP_PAGE_PRIMARY, %g2
6050	stxa	%g2, [%g2] ASI_DMMU_DEMAP		! Do the demap
6051	membar	#Sync
6052	stxa	%g2, [%g2] ASI_IMMU_DEMAP		! to both TLBs
6053	membar	#Sync					! No real reason for this XXXX
6054	flush	%o4
6055	srl	%g2, 0, %g2				! and make sure it's both 32- and 64-bit entries
6056	stxa	%g2, [%g2] ASI_DMMU_DEMAP		! Do the demap
6057	membar	#Sync
6058	stxa	%g2, [%g2] ASI_IMMU_DEMAP		! Do the demap
6059	membar	#Sync					! No real reason for this XXXX
6060	flush	%o4
6061	stxa	%g1, [%o2] ASI_DMMU			! Restore secondary asi
6062	membar	#Sync					! No real reason for this XXXX
6063	brz,pt	%o3, 1f
6064	 flush	%o4
6065	retl
6066	 nop
60671:
6068	retl
6069	 wrpr	%g0, 0, %tl				! Return to kernel mode.
6070#endif
6071/*
6072 * tlb_flush_ctx(int ctx)
6073 *
6074 * Flush entire context from both IMMU and DMMU.
6075 *
6076 */
6077	.align 8
6078ENTRY(tlb_flush_ctx)
6079#ifdef DEBUG
6080	set	DATA_START, %o4				! Forget any recent TLB misses
6081	stx	%g0, [%o4]
6082#endif
6083#ifdef NOTDEF_DEBUG
6084	save	%sp, -CC64FSZ, %sp
6085	set	1f, %o0
6086	call	printf
6087	 mov	%i0, %o1
6088	restore
6089	.data
60901:
6091	.asciz	"tlb_flush_ctx:	context flush of %d attempted\r\n"
6092	_ALIGN
6093	.text
6094#endif
6095#ifdef DIAGNOSTIC
6096	brnz,pt	%o0, 2f
6097	 nop
6098	set	1f, %o0
6099	call	panic
6100	 nop
6101	.data
61021:
6103	.asciz	"tlb_flush_ctx:	attempted demap of NUCLEUS context\r\n"
6104	_ALIGN
6105	.text
61062:
6107#endif
6108#ifdef SPITFIRE
6109	mov	CTX_SECONDARY, %o2
6110	sethi	%hi(KERNBASE), %o4
6111	ldxa	[%o2] ASI_DMMU, %g1		! Save secondary context
6112	membar	#LoadStore
6113	stxa	%o0, [%o2] ASI_DMMU		! Insert context to demap
6114	membar	#Sync
6115	set	DEMAP_PAGE_SECONDARY, %g2
6116	stxa	%g2, [%g2] ASI_DMMU_DEMAP	! Do the demap
6117	membar	#Sync				! No real reason for this XXXX
6118	stxa	%g2, [%g2] ASI_IMMU_DEMAP	! Do the demap
6119	membar	#Sync
6120	stxa	%g1, [%o2] ASI_DMMU		! Restore secondary asi
6121	membar	#Sync				! No real reason for this XXXX
6122	flush	%o4
6123	retl
6124	 nop
6125#else
6126	rdpr	%tl, %o3
6127	mov	CTX_PRIMARY, %o2
6128	brnz	%o3, 1f
6129	 sethi	%hi(KERNBASE), %o4
6130	wrpr	%g0, 1, %tl
61311:
6132	ldxa	[%o2] ASI_DMMU, %g1		! Save secondary context
6133	membar	#LoadStore
6134	stxa	%o0, [%o2] ASI_DMMU		! Insert context to demap
6135	membar	#Sync
6136	set	DEMAP_PAGE_PRIMARY, %g2		! Demap context from secondary context only
6137	stxa	%g2, [%g2] ASI_DMMU_DEMAP	! Do the demap
6138	membar	#Sync				! No real reason for this XXXX
6139	stxa	%g2, [%g2] ASI_IMMU_DEMAP	! Do the demap
6140	membar	#Sync
6141	stxa	%g1, [%o2] ASI_DMMU		! Restore secondary asi
6142	membar	#Sync					! No real reason for this XXXX
6143	brz,pt	%o3, 1f
6144	 flush	%o4
6145	retl
6146	 nop
61471:
6148	retl
6149	 wrpr	%g0, 0, %tl				! Return to kernel mode.
6150#endif
6151/*
6152 * blast_vcache()
6153 *
6154 * Clear out all of both I$ and D$ regardless of contents
6155 * Does not modify %o0
6156 *
6157 */
6158	.align 8
6159ENTRY(blast_vcache)
6160/*
6161 * We turn off interrupts for the duration to prevent RED exceptions.
6162 */
6163	rdpr	%pstate, %o3
6164	set	(2*NBPG)-8, %o1
6165	andn	%o3, PSTATE_IE, %o4			! Turn off PSTATE_IE bit
6166	wrpr	%o4, 0, %pstate
61671:
6168	stxa	%g0, [%o1] ASI_ICACHE_TAG
6169	stxa	%g0, [%o1] ASI_DCACHE_TAG
6170	brnz,pt	%o1, 1b
6171	 dec	8, %o1
6172	sethi	%hi(KERNBASE), %o2
6173	flush	%o2
6174	retl
6175	 wrpr	%o3, %pstate
6176
6177
6178/*
6179 * blast_icache()
6180 *
6181 * Clear out all of I$ regardless of contents
6182 * Does not modify %o0
6183 *
6184 */
6185	.align 8
6186ENTRY(blast_icache)
6187/*
6188 * We turn off interrupts for the duration to prevent RED exceptions.
6189 */
6190	rdpr	%pstate, %o3
6191	set	(2*NBPG)-8, %o1
6192	andn	%o3, PSTATE_IE, %o4			! Turn off PSTATE_IE bit
6193	wrpr	%o4, 0, %pstate
61941:
6195	stxa	%g0, [%o1] ASI_ICACHE_TAG
6196	brnz,pt	%o1, 1b
6197	 dec	8, %o1
6198	sethi	%hi(KERNBASE), %o2
6199	flush	%o2
6200	retl
6201	 wrpr	%o3, %pstate
6202
6203
6204
6205/*
6206 * dcache_flush_page(vaddr_t pa)
6207 *
6208 * Clear one page from D$.
6209 *
6210 */
6211	.align 8
6212ENTRY(dcache_flush_page)
6213#ifndef _LP64
6214	COMBINE(%o0, %o1, %o0)
6215#endif
6216
6217	!! Try using cache_flush_phys for a change.
6218
6219	mov	-1, %o1		! Generate mask for tag: bits [29..2]
6220	srlx	%o0, 13-2, %o2	! Tag is VA bits <40:13> in bits <29:2>
6221	clr	%o4
6222	srl	%o1, 2, %o1	! Now we have bits <29:0> set
6223	set	(2*NBPG), %o5
6224	andn	%o1, 3, %o1	! Now we have bits <29:2> set
6225
62261:
6227	ldxa	[%o4] ASI_DCACHE_TAG, %o3
6228	dec	16, %o5
6229	xor	%o3, %o2, %o3
6230	andcc	%o3, %o1, %g0
6231	bne,pt	%xcc, 2f
6232	 membar	#LoadStore
6233	stxa	%g0, [%o4] ASI_DCACHE_TAG
6234	membar	#StoreLoad
62352:
6236	brnz,pt	%o5, 1b
6237	 inc	16, %o4
6238
6239	sethi	%hi(KERNBASE), %o5
6240	flush	%o5
6241	retl
6242	 membar	#Sync
6243
6244/*
6245 * icache_flush_page(vaddr_t pa)
6246 *
6247 * Clear one page from I$.
6248 *
6249 */
6250	.align 8
6251ENTRY(icache_flush_page)
6252#ifndef _LP64
6253	COMBINE(%o0, %o1, %o0)
6254#endif
6255
6256#ifdef SPITFIRE
6257	!!
6258	!! Linux sez that I$ flushes are not needed for cheetah.
6259	!!
6260
6261	!! Now do the I$
6262	srlx	%o0, 13-8, %o2
6263	mov	-1, %o1		! Generate mask for tag: bits [35..8]
6264	srl	%o1, 32-35+7, %o1
6265	clr	%o4
6266	sll	%o1, 7, %o1	! Mask
6267	set	(2*NBPG), %o5
6268
62691:
6270	ldda	[%o4] ASI_ICACHE_TAG, %g0	! Tag goes in %g1
6271	dec	16, %o5
6272	xor	%g1, %o2, %g1
6273	andcc	%g1, %o1, %g0
6274	bne,pt	%xcc, 2f
6275	 membar	#LoadStore
6276	stxa	%g0, [%o4] ASI_ICACHE_TAG
6277	membar	#StoreLoad
62782:
6279	brnz,pt	%o5, 1b
6280	 inc	16, %o4
6281#endif
6282	sethi	%hi(KERNBASE), %o5
6283	flush	%o5
6284	membar	#Sync
6285	retl
6286	 nop
6287
6288/*
6289 * cache_flush_virt(va, len)
6290 *
6291 * Clear everything in that va range from D$ and I$.
6292 *
6293 */
6294	.align 8
6295ENTRY(cache_flush_virt)
6296	brz,pn	%o1, 2f		! What? nothing to clear?
6297	 add	%o0, %o1, %o2
6298	mov	0x1ff, %o3
6299	sllx	%o3, 5, %o3	! Generate mask for VA bits
6300	and	%o0, %o3, %o0
6301	and	%o2, %o3, %o2
6302	sub	%o2, %o1, %o4	! End < start? need to split flushes.
6303	sethi	%hi((1<<13)), %o5
6304	brlz,pn	%o4, 1f
6305	 movrz	%o4, %o3, %o4	! If start == end we need to wrap
6306
6307	!! Clear from start to end
63081:
6309	stxa	%g0, [%o0] ASI_DCACHE_TAG
6310	dec	16, %o4
6311	xor	%o5, %o0, %o3	! Second way
6312#ifdef SPITFIRE
6313	stxa	%g0, [%o0] ASI_ICACHE_TAG! Don't do this on cheetah
6314	stxa	%g0, [%o3] ASI_ICACHE_TAG! Don't do this on cheetah
6315#endif
6316	brgz,pt	%o4, 1b
6317	 inc	16, %o0
63182:
6319	sethi	%hi(KERNBASE), %o5
6320	flush	%o5
6321	membar	#Sync
6322	retl
6323	 nop
6324
6325	!! We got a hole.  Clear from start to hole
6326	clr	%o4
63273:
6328	stxa	%g0, [%o4] ASI_DCACHE_TAG
6329	dec	16, %o1
6330	xor	%o5, %o4, %g1	! Second way
6331	stxa	%g0, [%o4] ASI_ICACHE_TAG
6332	stxa	%g0, [%g1] ASI_ICACHE_TAG
6333	brgz,pt	%o1, 3b
6334	 inc	16, %o4
6335
6336	!! Now clear to the end.
6337	sub	%o3, %o2, %o4	! Size to clear (NBPG - end)
6338	ba,pt	%icc, 1b
6339	 mov	%o2, %o0	! Start of clear
6340
6341/*
6342 *	cache_flush_phys __P((paddr_t, psize_t, int));
6343 *
6344 *	Clear a set of paddrs from the D$, I$ and if param3 is
6345 *	non-zero, E$.  (E$ is not supported yet).
6346 */
6347
6348	.align 8
6349ENTRY(cache_flush_phys)
6350#ifndef _LP64
6351	COMBINE(%o0, %o1, %o0)
6352	COMBINE(%o2, %o3, %o1)
6353	mov	%o4, %o2
6354#endif
6355#ifdef DEBUG
6356	tst	%o2		! Want to clear E$?
6357	tnz	1		! Error!
6358#endif
6359	add	%o0, %o1, %o1	! End PA
6360
6361	!!
6362	!! Both D$ and I$ tags match pa bits 40-13, but
6363	!! they are shifted different amounts.  So we'll
6364	!! generate a mask for bits 40-13.
6365	!!
6366
6367	mov	-1, %o2		! Generate mask for tag: bits [40..13]
6368	srl	%o2, 5, %o2	! 32-5 = [27..0]
6369	sllx	%o2, 13, %o2	! 27+13 = [40..13]
6370
6371	and	%o2, %o0, %o0	! Mask away uninteresting bits
6372	and	%o2, %o1, %o1	! (probably not necessary)
6373
6374	set	(2*NBPG), %o5
6375	clr	%o4
63761:
6377	ldxa	[%o4] ASI_DCACHE_TAG, %o3
6378#ifdef SPITFIRE
6379	ldda	[%o4] ASI_ICACHE_TAG, %g0	! Tag goes in %g1 -- not on cheetah
6380#endif
6381	sllx	%o3, 40-29, %o3	! Shift D$ tag into place
6382	and	%o3, %o2, %o3	! Mask out trash
6383	cmp	%o0, %o3
6384	blt,pt	%xcc, 2f	! Too low
6385	 sllx	%g1, 40-35, %g1	! Shift I$ tag into place
6386	cmp	%o1, %o3
6387	bgt,pt	%xcc, 2f	! Too high
6388	 nop
6389
6390	membar	#LoadStore
6391	stxa	%g0, [%o4] ASI_DCACHE_TAG ! Just right
63922:
6393#ifndef SPITFIRE
6394	cmp	%o0, %g1
6395	blt,pt	%xcc, 3f
6396	 cmp	%o1, %g1
6397	bgt,pt	%icc, 3f
6398	 nop
6399	stxa	%g0, [%o4] ASI_ICACHE_TAG
64003:
6401#endif
6402	membar	#StoreLoad
6403	dec	16, %o5
6404	brgz,pt	%o5, 1b
6405	 inc	16, %o4
6406
6407	sethi	%hi(KERNBASE), %o5
6408	flush	%o5
6409	membar	#Sync
6410	retl
6411	 nop
6412
6413#ifdef _LP64
6414/*
6415 * XXXXX Still needs lotsa cleanup after sendsig is complete and offsets are known
6416 *
6417 * The following code is copied to the top of the user stack when each
6418 * process is exec'ed, and signals are `trampolined' off it.
6419 *
6420 * When this code is run, the stack looks like:
6421 *	[%sp]			128 bytes to which registers can be dumped
6422 *	[%sp + 128]		signal number (goes in %o0)
6423 *	[%sp + 128 + 4]		signal code (goes in %o1)
6424 *	[%sp + 128 + 8]		first word of saved state (sigcontext)
6425 *	    .
6426 *	    .
6427 *	    .
6428 *	[%sp + NNN]	last word of saved state
6429 * (followed by previous stack contents or top of signal stack).
6430 * The address of the function to call is in %g1; the old %g1 and %o0
6431 * have already been saved in the sigcontext.  We are running in a clean
6432 * window, all previous windows now being saved to the stack.
6433 *
6434 * Note that [%sp + 128 + 8] == %sp + 128 + 16.  The copy at %sp+128+8
6435 * will eventually be removed, with a hole left in its place, if things
6436 * work out.
6437 */
6438	.globl	_C_LABEL(sigcode)
6439	.globl	_C_LABEL(esigcode)
6440_C_LABEL(sigcode):
6441	/*
6442	 * XXX  the `save' and `restore' below are unnecessary: should
6443	 *	replace with simple arithmetic on %sp
6444	 *
6445	 * Make room on the stack for 64 %f registers + %fsr.  This comes
6446	 * out to 64*4+8 or 264 bytes, but this must be aligned to a multiple
6447	 * of 64, or 320 bytes.
6448	 */
6449	save	%sp, -CC64FSZ - 320, %sp
6450	mov	%g2, %l2		! save globals in %l registers
6451	mov	%g3, %l3
6452	mov	%g4, %l4
6453	mov	%g5, %l5
6454	mov	%g6, %l6
6455	mov	%g7, %l7
6456	/*
6457	 * Saving the fpu registers is expensive, so do it iff it is
6458	 * enabled and dirty.
6459	 */
6460	rd	%fprs, %l0
6461	btst	FPRS_DL|FPRS_DU, %l0	! All clean?
6462	bz,pt	%icc, 2f
6463	 btst	FPRS_DL, %l0		! test dl
6464	bz,pt	%icc, 1f
6465	 btst	FPRS_DU, %l0		! test du
6466
6467	! fpu is enabled, oh well
6468	stx	%fsr, [%sp + CC64FSZ + BIAS + 0]
6469	add	%sp, BIAS+CC64FSZ+BLOCK_SIZE, %l0	! Generate a pointer so we can
6470	andn	%l0, BLOCK_ALIGN, %l0	! do a block store
6471	stda	%f0, [%l0] ASI_BLK_P
6472	inc	BLOCK_SIZE, %l0
6473	stda	%f16, [%l0] ASI_BLK_P
64741:
6475	bz,pt	%icc, 2f
6476	 add	%sp, BIAS+CC64FSZ+BLOCK_SIZE, %l0	! Generate a pointer so we can
6477	andn	%l0, BLOCK_ALIGN, %l0	! do a block store
6478	add	%l0, 2*BLOCK_SIZE, %l0	! and skip what we already stored
6479	stda	%f32, [%l0] ASI_BLK_P
6480	inc	BLOCK_SIZE, %l0
6481	stda	%f48, [%l0] ASI_BLK_P
64822:
6483	membar	#Sync
6484	rd	%y, %l1			! in any case, save %y
6485	lduw	[%fp + BIAS + 128], %o0	! sig
6486	lduw	[%fp + BIAS + 128 + 4], %o1	! code
6487	call	%g1			! (*sa->sa_handler)(sig,code,scp)
6488	 add	%fp, BIAS + 128 + 8, %o2	! scp
6489
6490	/*
6491	 * Now that the handler has returned, re-establish all the state
6492	 * we just saved above, then do a sigreturn.
6493	 */
6494	btst	3, %l0			! All clean?
6495	bz,pt	%icc, 2f
6496	 btst	1, %l0			! test dl
6497	bz,pt	%icc, 1f
6498	 btst	2, %l0			! test du
6499
6500	ldx	[%sp + CC64FSZ + BIAS + 0], %fsr
6501	add	%sp, BIAS+CC64FSZ+BLOCK_SIZE, %l0	! Generate a pointer so we can
6502	andn	%l0, BLOCK_ALIGN, %l0	! do a block load
6503	ldda	[%l0] ASI_BLK_P, %f0
6504	inc	BLOCK_SIZE, %l0
6505	ldda	[%l0] ASI_BLK_P, %f16
65061:
6507	bz,pt	%icc, 2f
6508	 wr	%l1, %g0, %y		! in any case, restore %y
6509	add	%sp, BIAS+CC64FSZ+BLOCK_SIZE, %l0	! Generate a pointer so we can
6510	andn	%l0, BLOCK_ALIGN, %l0	! do a block load
6511	inc	2*BLOCK_SIZE, %l0	! and skip what we already loaded
6512	ldda	[%l0] ASI_BLK_P, %f32
6513	inc	BLOCK_SIZE, %l0
6514	ldda	[%l0] ASI_BLK_P, %f48
65152:
6516	mov	%l2, %g2
6517	mov	%l3, %g3
6518	mov	%l4, %g4
6519	mov	%l5, %g5
6520	mov	%l6, %g6
6521	mov	%l7, %g7
6522	membar	#Sync
6523
6524	restore	%g0, SYS___sigreturn14, %g1 ! get registers back & set syscall #
6525	add	%sp, BIAS + 128 + 8, %o0! compute scp
6526!	andn	%o0, 0x0f, %o0
6527	t	ST_SYSCALL		! sigreturn(scp)
6528	! sigreturn does not return unless it fails
6529	mov	SYS_exit, %g1		! exit(errno)
6530	t	ST_SYSCALL
6531_C_LABEL(esigcode):
6532#endif
6533
6534#if !defined(_LP64)
6535
6536#define SIGCODE_NAME		sigcode
6537#define ESIGCODE_NAME		esigcode
6538#define SIGRETURN_NAME		SYS___sigreturn14
6539#define EXIT_NAME		SYS_exit
6540
6541#include "sigcode32.s"
6542
6543#endif
6544
6545/*
6546 * Primitives
6547 */
6548#ifdef ENTRY
6549#undef ENTRY
6550#endif
6551
6552#ifdef GPROF
6553	.globl	_mcount
6554#define	ENTRY(x) \
6555	.globl _C_LABEL(x); .proc 1; .type _C_LABEL(x),@function; \
6556_C_LABEL(x): ; \
6557	.data; \
6558	.align 8; \
65590:	.uaword 0; .uaword 0; \
6560	.text;	\
6561	save	%sp, -CC64FSZ, %sp; \
6562	sethi	%hi(0b), %o0; \
6563	call	_mcount; \
6564	or	%o0, %lo(0b), %o0; \
6565	restore
6566#else
6567#define	ENTRY(x)	.globl _C_LABEL(x); .proc 1; \
6568	.type _C_LABEL(x),@function; _C_LABEL(x):
6569#endif
6570#define	ALTENTRY(x)	.globl _C_LABEL(x); _C_LABEL(x):
6571
6572/*
6573 * getfp() - get stack frame pointer
6574 */
6575ENTRY(getfp)
6576	retl
6577	 mov %fp, %o0
6578
6579/*
6580 * copyinstr(fromaddr, toaddr, maxlength, &lencopied)
6581 *
6582 * Copy a null terminated string from the user address space into
6583 * the kernel address space.
6584 */
6585ENTRY(copyinstr)
6586	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
6587#ifdef NOTDEF_DEBUG
6588	save	%sp, -CC64FSZ, %sp
6589	set	8f, %o0
6590	mov	%i0, %o1
6591	mov	%i1, %o2
6592	mov	%i2, %o3
6593	call	printf
6594	 mov	%i3, %o4
6595	restore
6596	.data
65978:	.asciz	"copyinstr: from=%x to=%x max=%x &len=%x\n"
6598	_ALIGN
6599	.text
6600#endif
6601	brgz,pt	%o2, 1f					! Make sure len is valid
6602	 sethi	%hi(CPCB), %o4		! (first instr of copy)
6603	retl
6604	 mov	ENAMETOOLONG, %o0
66051:
6606	LDPTR	[%o4 + %lo(CPCB)], %o4	! catch faults
6607	set	Lcsfault, %o5
6608	membar	#Sync
6609	STPTR	%o5, [%o4 + PCB_ONFAULT]
6610
6611	mov	%o1, %o5		!	save = toaddr;
6612! XXX should do this in bigger chunks when possible
66130:					! loop:
6614	ldsba	[%o0] ASI_AIUS, %g1	!	c = *fromaddr;
6615	stb	%g1, [%o1]		!	*toaddr++ = c;
6616	inc	%o1
6617	brz,a,pn	%g1, Lcsdone	!	if (c == NULL)
6618	 clr	%o0			!		{ error = 0; done; }
6619	deccc	%o2			!	if (--len > 0) {
6620	bg,pt	%icc, 0b		!		fromaddr++;
6621	 inc	%o0			!		goto loop;
6622	ba,pt	%xcc, Lcsdone		!	}
6623	 mov	ENAMETOOLONG, %o0	!	error = ENAMETOOLONG;
6624	NOTREACHED
6625
6626/*
6627 * copyoutstr(fromaddr, toaddr, maxlength, &lencopied)
6628 *
6629 * Copy a null terminated string from the kernel
6630 * address space to the user address space.
6631 */
6632ENTRY(copyoutstr)
6633	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
6634#ifdef NOTDEF_DEBUG
6635	save	%sp, -CC64FSZ, %sp
6636	set	8f, %o0
6637	mov	%i0, %o1
6638	mov	%i1, %o2
6639	mov	%i2, %o3
6640	call	printf
6641	 mov	%i3, %o4
6642	restore
6643	.data
66448:	.asciz	"copyoutstr: from=%x to=%x max=%x &len=%x\n"
6645	_ALIGN
6646	.text
6647#endif
6648	brgz,pt	%o2, 1f					! Make sure len is valid
6649	 sethi	%hi(CPCB), %o4		! (first instr of copy)
6650	retl
6651	 mov	ENAMETOOLONG, %o0
66521:
6653	LDPTR	[%o4 + %lo(CPCB)], %o4	! catch faults
6654	set	Lcsfault, %o5
6655	membar	#Sync
6656	STPTR	%o5, [%o4 + PCB_ONFAULT]
6657
6658	mov	%o1, %o5		!	save = toaddr;
6659! XXX should do this in bigger chunks when possible
66600:					! loop:
6661	ldsb	[%o0], %g1		!	c = *fromaddr;
6662	stba	%g1, [%o1] ASI_AIUS	!	*toaddr++ = c;
6663	inc	%o1
6664	brz,a,pn	%g1, Lcsdone	!	if (c == NULL)
6665	 clr	%o0			!		{ error = 0; done; }
6666	deccc	%o2			!	if (--len > 0) {
6667	bg,pt	%icc, 0b		!		fromaddr++;
6668	 inc	%o0			!		goto loop;
6669					!	}
6670	mov	ENAMETOOLONG, %o0	!	error = ENAMETOOLONG;
6671Lcsdone:				! done:
6672	sub	%o1, %o5, %o1		!	len = to - save;
6673	brnz,a	%o3, 1f			!	if (lencopied)
6674	 STPTR	%o1, [%o3]		!		*lencopied = len;
66751:
6676	retl				! cpcb->pcb_onfault = 0;
6677	 STPTR	%g0, [%o4 + PCB_ONFAULT]! return (error);
6678
6679Lcsfault:
6680#ifdef NOTDEF_DEBUG
6681	save	%sp, -CC64FSZ, %sp
6682	set	5f, %o0
6683	call	printf
6684	 nop
6685	restore
6686	.data
66875:	.asciz	"Lcsfault: recovering\n"
6688	_ALIGN
6689	.text
6690#endif
6691	b	Lcsdone			! error = EFAULT;
6692	 mov	EFAULT, %o0		! goto ret;
6693
6694/*
6695 * copystr(fromaddr, toaddr, maxlength, &lencopied)
6696 *
6697 * Copy a null terminated string from one point to another in
6698 * the kernel address space.  (This is a leaf procedure, but
6699 * it does not seem that way to the C compiler.)
6700 */
6701ENTRY(copystr)
6702	brgz,pt	%o2, 0f	! Make sure len is valid
6703	 mov	%o1, %o5		!	to0 = to;
6704	retl
6705	 mov	ENAMETOOLONG, %o0
67060:					! loop:
6707	ldsb	[%o0], %o4		!	c = *from;
6708	tst	%o4
6709	stb	%o4, [%o1]		!	*to++ = c;
6710	be	1f			!	if (c == 0)
6711	 inc	%o1			!		goto ok;
6712	deccc	%o2			!	if (--len > 0) {
6713	bg,a	0b			!		from++;
6714	 inc	%o0			!		goto loop;
6715	b	2f			!	}
6716	 mov	ENAMETOOLONG, %o0	!	ret = ENAMETOOLONG; goto done;
67171:					! ok:
6718	clr	%o0			!	ret = 0;
67192:
6720	sub	%o1, %o5, %o1		!	len = to - to0;
6721	tst	%o3			!	if (lencopied)
6722	bnz,a	3f
6723	 STPTR	%o1, [%o3]		!		*lencopied = len;
67243:
6725	retl
6726	 nop
6727#ifdef DIAGNOSTIC
67284:
6729	sethi	%hi(5f), %o0
6730	call	_C_LABEL(panic)
6731	 or	%lo(5f), %o0, %o0
6732	.data
67335:
6734	.asciz	"copystr"
6735	_ALIGN
6736	.text
6737#endif
6738
6739/*
6740 * copyin(src, dst, len)
6741 *
6742 * Copy specified amount of data from user space into the kernel.
6743 *
6744 * This is a modified version of bcopy that uses ASI_AIUS.  When
6745 * bcopy is optimized to use block copy ASIs, this should be also.
6746 */
6747
6748#define	BCOPY_SMALL	32	/* if < 32, copy by bytes */
6749
6750ENTRY(copyin)
6751!	flushw			! Make sure we don't have stack probs & lose hibits of %o
6752#ifdef NOTDEF_DEBUG
6753	save	%sp, -CC64FSZ, %sp
6754	set	1f, %o0
6755	mov	%i0, %o1
6756	mov	%i1, %o2
6757	call	printf
6758	 mov	%i2, %o3
6759	restore
6760	.data
67611:	.asciz	"copyin: src=%x dest=%x len=%x\n"
6762	_ALIGN
6763	.text
6764#endif
6765	sethi	%hi(CPCB), %o3
6766	wr	%g0, ASI_AIUS, %asi
6767	LDPTR	[%o3 + %lo(CPCB)], %o3
6768	set	Lcopyfault, %o4
6769!	mov	%o7, %g7		! save return address
6770	membar	#Sync
6771	STPTR	%o4, [%o3 + PCB_ONFAULT]
6772	cmp	%o2, BCOPY_SMALL
6773Lcopyin_start:
6774	bge,a	Lcopyin_fancy	! if >= this many, go be fancy.
6775	 btst	7, %o0		! (part of being fancy)
6776
6777	/*
6778	 * Not much to copy, just do it a byte at a time.
6779	 */
6780	deccc	%o2		! while (--len >= 0)
6781	bl	1f
67820:
6783	 inc	%o0
6784	ldsba	[%o0 - 1] %asi, %o4!	*dst++ = (++src)[-1];
6785	stb	%o4, [%o1]
6786	deccc	%o2
6787	bge	0b
6788	 inc	%o1
67891:
6790	ba	Lcopyin_done
6791	 clr	%o0
6792	NOTREACHED
6793
6794	/*
6795	 * Plenty of data to copy, so try to do it optimally.
6796	 */
6797Lcopyin_fancy:
6798	! check for common case first: everything lines up.
6799!	btst	7, %o0		! done already
6800	bne	1f
6801	 EMPTY
6802	btst	7, %o1
6803	be,a	Lcopyin_doubles
6804	 dec	8, %o2		! if all lined up, len -= 8, goto copyin_doubes
6805
6806	! If the low bits match, we can make these line up.
68071:
6808	xor	%o0, %o1, %o3	! t = src ^ dst;
6809	btst	1, %o3		! if (t & 1) {
6810	be,a	1f
6811	 btst	1, %o0		! [delay slot: if (src & 1)]
6812
6813	! low bits do not match, must copy by bytes.
68140:
6815	ldsba	[%o0] %asi, %o4	!	do {
6816	inc	%o0		!		(++dst)[-1] = *src++;
6817	inc	%o1
6818	deccc	%o2
6819	bnz	0b		!	} while (--len != 0);
6820	 stb	%o4, [%o1 - 1]
6821	ba	Lcopyin_done
6822	 clr	%o0
6823	NOTREACHED
6824
6825	! lowest bit matches, so we can copy by words, if nothing else
68261:
6827	be,a	1f		! if (src & 1) {
6828	 btst	2, %o3		! [delay slot: if (t & 2)]
6829
6830	! although low bits match, both are 1: must copy 1 byte to align
6831	ldsba	[%o0] %asi, %o4	!	*dst++ = *src++;
6832	stb	%o4, [%o1]
6833	inc	%o0
6834	inc	%o1
6835	dec	%o2		!	len--;
6836	btst	2, %o3		! } [if (t & 2)]
68371:
6838	be,a	1f		! if (t & 2) {
6839	 btst	2, %o0		! [delay slot: if (src & 2)]
6840	dec	2, %o2		!	len -= 2;
68410:
6842	ldsha	[%o0] %asi, %o4	!	do {
6843	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
6844	inc	2, %o0		!		dst += 2, src += 2;
6845	deccc	2, %o2		!	} while ((len -= 2) >= 0);
6846	bge	0b
6847	 inc	2, %o1
6848	b	Lcopyin_mopb	!	goto mop_up_byte;
6849	 btst	1, %o2		! } [delay slot: if (len & 1)]
6850	NOTREACHED
6851
6852	! low two bits match, so we can copy by longwords
68531:
6854	be,a	1f		! if (src & 2) {
6855	 btst	4, %o3		! [delay slot: if (t & 4)]
6856
6857	! although low 2 bits match, they are 10: must copy one short to align
6858	ldsha	[%o0] %asi, %o4	!	(*short *)dst = *(short *)src;
6859	sth	%o4, [%o1]
6860	inc	2, %o0		!	dst += 2;
6861	inc	2, %o1		!	src += 2;
6862	dec	2, %o2		!	len -= 2;
6863	btst	4, %o3		! } [if (t & 4)]
68641:
6865	be,a	1f		! if (t & 4) {
6866	 btst	4, %o0		! [delay slot: if (src & 4)]
6867	dec	4, %o2		!	len -= 4;
68680:
6869	lduwa	[%o0] %asi, %o4	!	do {
6870	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
6871	inc	4, %o0		!		dst += 4, src += 4;
6872	deccc	4, %o2		!	} while ((len -= 4) >= 0);
6873	bge	0b
6874	 inc	4, %o1
6875	b	Lcopyin_mopw	!	goto mop_up_word_and_byte;
6876	 btst	2, %o2		! } [delay slot: if (len & 2)]
6877	NOTREACHED
6878
6879	! low three bits match, so we can copy by doublewords
68801:
6881	be	1f		! if (src & 4) {
6882	 dec	8, %o2		! [delay slot: len -= 8]
6883	lduwa	[%o0] %asi, %o4	!	*(int *)dst = *(int *)src;
6884	st	%o4, [%o1]
6885	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
6886	inc	4, %o1
6887	dec	4, %o2		! }
68881:
6889Lcopyin_doubles:
6890	ldxa	[%o0] %asi, %g1	! do {
6891	stx	%g1, [%o1]	!	*(double *)dst = *(double *)src;
6892	inc	8, %o0		!	dst += 8, src += 8;
6893	deccc	8, %o2		! } while ((len -= 8) >= 0);
6894	bge	Lcopyin_doubles
6895	 inc	8, %o1
6896
6897	! check for a usual case again (save work)
6898	btst	7, %o2		! if ((len & 7) == 0)
6899	be	Lcopyin_done	!	goto copyin_done;
6900
6901	 btst	4, %o2		! if ((len & 4)) == 0)
6902	be,a	Lcopyin_mopw	!	goto mop_up_word_and_byte;
6903	 btst	2, %o2		! [delay slot: if (len & 2)]
6904	lduwa	[%o0] %asi, %o4	!	*(int *)dst = *(int *)src;
6905	st	%o4, [%o1]
6906	inc	4, %o0		!	dst += 4;
6907	inc	4, %o1		!	src += 4;
6908	btst	2, %o2		! } [if (len & 2)]
6909
69101:
6911	! mop up trailing word (if present) and byte (if present).
6912Lcopyin_mopw:
6913	be	Lcopyin_mopb	! no word, go mop up byte
6914	 btst	1, %o2		! [delay slot: if (len & 1)]
6915	ldsha	[%o0] %asi, %o4	! *(short *)dst = *(short *)src;
6916	be	Lcopyin_done	! if ((len & 1) == 0) goto done;
6917	 sth	%o4, [%o1]
6918	ldsba	[%o0 + 2] %asi, %o4	! dst[2] = src[2];
6919	stb	%o4, [%o1 + 2]
6920	ba	Lcopyin_done
6921	 clr	%o0
6922	NOTREACHED
6923
6924	! mop up trailing byte (if present).
6925Lcopyin_mopb:
6926	be,a	Lcopyin_done
6927	 nop
6928	ldsba	[%o0] %asi, %o4
6929	stb	%o4, [%o1]
6930
6931Lcopyin_done:
6932	sethi	%hi(CPCB), %o3
6933!	stb	%o4,[%o1]	! Store last byte -- should not be needed
6934	LDPTR	[%o3 + %lo(CPCB)], %o3
6935	membar	#Sync
6936	STPTR	%g0, [%o3 + PCB_ONFAULT]
6937	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
6938	retl
6939	 clr	%o0			! return 0
6940
6941/*
6942 * copyout(src, dst, len)
6943 *
6944 * Copy specified amount of data from kernel to user space.
6945 * Just like copyin, except that the `dst' addresses are user space
6946 * rather than the `src' addresses.
6947 *
6948 * This is a modified version of bcopy that uses ASI_AIUS.  When
6949 * bcopy is optimized to use block copy ASIs, this should be also.
6950 */
6951 /*
6952  * This needs to be reimplemented to really do the copy.
6953  */
6954ENTRY(copyout)
6955	/*
6956	 * ******NOTE****** this depends on bcopy() not using %g7
6957	 */
6958#ifdef NOTDEF_DEBUG
6959	save	%sp, -CC64FSZ, %sp
6960	set	1f, %o0
6961	mov	%i0, %o1
6962	set	CTX_SECONDARY, %o4
6963	mov	%i1, %o2
6964	ldxa	[%o4] ASI_DMMU, %o4
6965	call	printf
6966	 mov	%i2, %o3
6967	restore
6968	.data
69691:	.asciz	"copyout: src=%x dest=%x len=%x ctx=%d\n"
6970	_ALIGN
6971	.text
6972#endif
6973Ldocopy:
6974	sethi	%hi(CPCB), %o3
6975	wr	%g0, ASI_AIUS, %asi
6976	LDPTR	[%o3 + %lo(CPCB)], %o3
6977	set	Lcopyfault, %o4
6978!	mov	%o7, %g7		! save return address
6979	membar	#Sync
6980	STPTR	%o4, [%o3 + PCB_ONFAULT]
6981	cmp	%o2, BCOPY_SMALL
6982Lcopyout_start:
6983	membar	#StoreStore
6984	bge,a	Lcopyout_fancy	! if >= this many, go be fancy.
6985	 btst	7, %o0		! (part of being fancy)
6986
6987	/*
6988	 * Not much to copy, just do it a byte at a time.
6989	 */
6990	deccc	%o2		! while (--len >= 0)
6991	bl	1f
6992	 EMPTY
69930:
6994	inc	%o0
6995	ldsb	[%o0 - 1], %o4!	(++dst)[-1] = *src++;
6996	stba	%o4, [%o1] %asi
6997	deccc	%o2
6998	bge	0b
6999	 inc	%o1
70001:
7001	ba	Lcopyout_done
7002	 clr	%o0
7003	NOTREACHED
7004
7005	/*
7006	 * Plenty of data to copy, so try to do it optimally.
7007	 */
7008Lcopyout_fancy:
7009	! check for common case first: everything lines up.
7010!	btst	7, %o0		! done already
7011	bne	1f
7012	 EMPTY
7013	btst	7, %o1
7014	be,a	Lcopyout_doubles
7015	 dec	8, %o2		! if all lined up, len -= 8, goto copyout_doubes
7016
7017	! If the low bits match, we can make these line up.
70181:
7019	xor	%o0, %o1, %o3	! t = src ^ dst;
7020	btst	1, %o3		! if (t & 1) {
7021	be,a	1f
7022	 btst	1, %o0		! [delay slot: if (src & 1)]
7023
7024	! low bits do not match, must copy by bytes.
70250:
7026	ldsb	[%o0], %o4	!	do {
7027	inc	%o0		!		(++dst)[-1] = *src++;
7028	inc	%o1
7029	deccc	%o2
7030	bnz	0b		!	} while (--len != 0);
7031	 stba	%o4, [%o1 - 1] %asi
7032	ba	Lcopyout_done
7033	 clr	%o0
7034	NOTREACHED
7035
7036	! lowest bit matches, so we can copy by words, if nothing else
70371:
7038	be,a	1f		! if (src & 1) {
7039	 btst	2, %o3		! [delay slot: if (t & 2)]
7040
7041	! although low bits match, both are 1: must copy 1 byte to align
7042	ldsb	[%o0], %o4	!	*dst++ = *src++;
7043	stba	%o4, [%o1] %asi
7044	inc	%o0
7045	inc	%o1
7046	dec	%o2		!	len--;
7047	btst	2, %o3		! } [if (t & 2)]
70481:
7049	be,a	1f		! if (t & 2) {
7050	 btst	2, %o0		! [delay slot: if (src & 2)]
7051	dec	2, %o2		!	len -= 2;
70520:
7053	ldsh	[%o0], %o4	!	do {
7054	stha	%o4, [%o1] %asi	!		*(short *)dst = *(short *)src;
7055	inc	2, %o0		!		dst += 2, src += 2;
7056	deccc	2, %o2		!	} while ((len -= 2) >= 0);
7057	bge	0b
7058	 inc	2, %o1
7059	b	Lcopyout_mopb	!	goto mop_up_byte;
7060	 btst	1, %o2		! } [delay slot: if (len & 1)]
7061	NOTREACHED
7062
7063	! low two bits match, so we can copy by longwords
70641:
7065	be,a	1f		! if (src & 2) {
7066	 btst	4, %o3		! [delay slot: if (t & 4)]
7067
7068	! although low 2 bits match, they are 10: must copy one short to align
7069	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
7070	stha	%o4, [%o1] %asi
7071	inc	2, %o0		!	dst += 2;
7072	inc	2, %o1		!	src += 2;
7073	dec	2, %o2		!	len -= 2;
7074	btst	4, %o3		! } [if (t & 4)]
70751:
7076	be,a	1f		! if (t & 4) {
7077	 btst	4, %o0		! [delay slot: if (src & 4)]
7078	dec	4, %o2		!	len -= 4;
70790:
7080	lduw	[%o0], %o4	!	do {
7081	sta	%o4, [%o1] %asi	!		*(int *)dst = *(int *)src;
7082	inc	4, %o0		!		dst += 4, src += 4;
7083	deccc	4, %o2		!	} while ((len -= 4) >= 0);
7084	bge	0b
7085	 inc	4, %o1
7086	b	Lcopyout_mopw	!	goto mop_up_word_and_byte;
7087	 btst	2, %o2		! } [delay slot: if (len & 2)]
7088	NOTREACHED
7089
7090	! low three bits match, so we can copy by doublewords
70911:
7092	be	1f		! if (src & 4) {
7093	 dec	8, %o2		! [delay slot: len -= 8]
7094	lduw	[%o0], %o4	!	*(int *)dst = *(int *)src;
7095	sta	%o4, [%o1] %asi
7096	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
7097	inc	4, %o1
7098	dec	4, %o2		! }
70991:
7100Lcopyout_doubles:
7101	ldx	[%o0], %g1	! do {
7102	stxa	%g1, [%o1] %asi	!	*(double *)dst = *(double *)src;
7103	inc	8, %o0		!	dst += 8, src += 8;
7104	deccc	8, %o2		! } while ((len -= 8) >= 0);
7105	bge	Lcopyout_doubles
7106	 inc	8, %o1
7107
7108	! check for a usual case again (save work)
7109	btst	7, %o2		! if ((len & 7) == 0)
7110	be	Lcopyout_done	!	goto copyout_done;
7111
7112	 btst	4, %o2		! if ((len & 4)) == 0)
7113	be,a	Lcopyout_mopw	!	goto mop_up_word_and_byte;
7114	 btst	2, %o2		! [delay slot: if (len & 2)]
7115	lduw	[%o0], %o4	!	*(int *)dst = *(int *)src;
7116	sta	%o4, [%o1] %asi
7117	inc	4, %o0		!	dst += 4;
7118	inc	4, %o1		!	src += 4;
7119	btst	2, %o2		! } [if (len & 2)]
7120
71211:
7122	! mop up trailing word (if present) and byte (if present).
7123Lcopyout_mopw:
7124	be	Lcopyout_mopb	! no word, go mop up byte
7125	 btst	1, %o2		! [delay slot: if (len & 1)]
7126	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
7127	be	Lcopyout_done	! if ((len & 1) == 0) goto done;
7128	 stha	%o4, [%o1] %asi
7129	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
7130	stba	%o4, [%o1 + 2] %asi
7131	ba	Lcopyout_done
7132	 clr	%o0
7133	NOTREACHED
7134
7135	! mop up trailing byte (if present).
7136Lcopyout_mopb:
7137	be,a	Lcopyout_done
7138	 nop
7139	ldsb	[%o0], %o4
7140	stba	%o4, [%o1] %asi
7141
7142Lcopyout_done:
7143	sethi	%hi(CPCB), %o3
7144	LDPTR	[%o3 + %lo(CPCB)], %o3
7145	membar	#Sync
7146	STPTR	%g0, [%o3 + PCB_ONFAULT]
7147!	jmp	%g7 + 8		! Original instr
7148	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
7149	membar	#StoreStore|#StoreLoad
7150	retl			! New instr
7151	 clr	%o0			! return 0
7152
7153! Copyin or copyout fault.  Clear cpcb->pcb_onfault and return EFAULT.
7154! Note that although we were in bcopy, there is no state to clean up;
7155! the only special thing is that we have to return to [g7 + 8] rather than
7156! [o7 + 8].
7157Lcopyfault:
7158	sethi	%hi(CPCB), %o3
7159	LDPTR	[%o3 + %lo(CPCB)], %o3
7160	STPTR	%g0, [%o3 + PCB_ONFAULT]
7161	membar	#StoreStore|#StoreLoad
7162#ifdef NOTDEF_DEBUG
7163	save	%sp, -CC64FSZ, %sp
7164	set	1f, %o0
7165	call	printf
7166	 nop
7167	restore
7168	.data
71691:	.asciz	"copyfault: fault occurred\n"
7170	_ALIGN
7171	.text
7172#endif
7173	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
7174	retl
7175	 mov	EFAULT, %o0
7176
7177
7178	.data
7179	_ALIGN
7180	.comm	_C_LABEL(want_resched),4
7181
7182/*
7183 * Switch statistics (for later tweaking):
7184 *	nswitchdiff = p1 => p2 (i.e., chose different process)
7185 *	nswitchexit = number of calls to switchexit()
7186 *	_cnt.v_swtch = total calls to swtch+swtchexit
7187 */
7188	.comm	_C_LABEL(nswitchdiff), 4
7189	.comm	_C_LABEL(nswitchexit), 4
7190	.text
7191/*
7192 * REGISTER USAGE IN cpu_switch AND switchexit:
7193 * This is split into two phases, more or less
7194 * `before we locate a new proc' and `after'.
7195 * Some values are the same in both phases.
7196 * Note that the %o0-registers are not preserved across
7197 * the psr change when entering a new process, since this
7198 * usually changes the CWP field (hence heavy usage of %g's).
7199 *
7200 *	%l1 = <free>; newpcb
7201 *	%l2 = %hi(_whichqs); newpsr
7202 *	%l3 = p
7203 *	%l4 = lastproc
7204 *	%l5 = oldpsr (excluding ipl bits)
7205 *	%l6 = %hi(cpcb)
7206 *	%l7 = %hi(curproc)
7207 *	%o0 = tmp 1
7208 *	%o1 = tmp 2
7209 *	%o2 = tmp 3
7210 *	%o3 = tmp 4; whichqs; vm
7211 *	%o4 = tmp 4; which; sswap
7212 *	%o5 = tmp 5; q; <free>
7213 */
7214
7215/*
7216 * switchexit is called only from cpu_exit() before the current process
7217 * has freed its vmspace and kernel stack; we must schedule them to be
7218 * freed.  (curproc is already NULL.)
7219 *
7220 * We lay the process to rest by changing to the `idle' kernel stack,
7221 * and note that the `last loaded process' is nonexistent.
7222 */
7223ENTRY(switchexit)
7224	/*
7225	 * Since we're exiting we don't need to save locals or ins, so
7226	 * we won't need the next instruction.
7227	 */
7228!	save	%sp, -CC64FSZ, %sp
7229	flushw				! We don't have anything else to run, so why not
7230#ifdef DEBUG
7231	save	%sp, -CC64FSZ, %sp
7232	flushw
7233	restore
7234#endif
7235	wrpr	%g0, PSTATE_KERN, %pstate ! Make sure we're on the right globals
7236	mov	%o0, %l2		! save proc arg for exit2() call XXXXX
7237
7238#ifdef SCHED_DEBUG
7239	save	%sp, -CC64FSZ, %sp
7240	GLOBTOLOC
7241	set	1f, %o0
7242	call	printf
7243	 nop
7244	LOCTOGLOB
7245	restore
7246	.data
72471:	.asciz	"switchexit()\r\n"
7248	_ALIGN
7249	.text
7250#endif
7251	/*
7252	 * Change pcb to idle u. area, i.e., set %sp to top of stack
7253	 * and %psr to PSR_S|PSR_ET, and set cpcb to point to _idle_u.
7254	 * Once we have left the old stack, we can call kmem_free to
7255	 * destroy it.  Call it any sooner and the register windows
7256	 * go bye-bye.
7257	 */
7258	set	_C_LABEL(idle_u), %l1
7259	sethi	%hi(CPCB), %l6
7260#if 0
7261	/* Get rid of the stack	*/
7262	rdpr	%ver, %o0
7263	wrpr	%g0, 0, %canrestore	! Fixup window state regs
7264	and	%o0, 0x0f, %o0
7265	wrpr	%g0, 0, %otherwin
7266	wrpr	%g0, %o0, %cleanwin	! kernel don't care, but user does
7267	dec	1, %o0			! What happens if we don't subtract 2?
7268	wrpr	%g0, %o0, %cansave
7269	flushw						! DEBUG
7270#endif
7271
7272	STPTR	%l1, [%l6 + %lo(CPCB)]	! cpcb = &idle_u
7273	set	_C_LABEL(idle_u) + USPACE - CC64FSZ, %o0	! set new %sp
7274#ifdef _LP64
7275	sub	%o0, BIAS, %sp		! Maybe this should be a save?
7276#else
7277	mov	%o0, %sp		! Maybe this should be a save?
7278#endif
7279	wrpr	%g0, 0, %canrestore
7280	wrpr	%g0, 0, %otherwin
7281	rdpr	%ver, %l7
7282	and	%l7, CWP, %l7
7283	wrpr	%l7, 0, %cleanwin
7284	dec	1, %l7					! NWINDOWS-1-1
7285	wrpr	%l7, %cansave
7286	clr	%fp			! End of stack.
7287#ifdef DEBUG
7288	flushw						! DEBUG
7289	set	_C_LABEL(idle_u), %l6
7290	SET_SP_REDZONE(%l6, %l5)
7291#endif
7292	wrpr	%g0, PSTATE_INTR, %pstate	! and then enable traps
7293	call	_C_LABEL(exit2)			! exit2(p)
7294	 mov	%l2, %o0
7295
7296#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
7297	call	_C_LABEL(sched_lock_idle)	! Acquire sched_lock
7298#endif
7299	 wrpr	%g0, PIL_SCHED, %pil		! Set splsched()
7300
7301	/*
7302	 * Now fall through to `the last switch'.  %g6 was set to
7303	 * %hi(cpcb), but may have been clobbered in kmem_free,
7304	 * so all the registers described below will be set here.
7305	 *
7306	 * Since the process has exited we can blow its context
7307	 * out of the MMUs now to free up those TLB entries rather
7308	 * than have more useful ones replaced.
7309	 *
7310	 * REGISTER USAGE AT THIS POINT:
7311	 *	%l2 = %hi(_whichqs)
7312	 *	%l4 = lastproc
7313	 *	%l5 = oldpsr (excluding ipl bits)
7314	 *	%l6 = %hi(cpcb)
7315	 *	%l7 = %hi(curproc)
7316	 *	%o0 = tmp 1
7317	 *	%o1 = tmp 2
7318	 *	%o3 = whichqs
7319	 */
7320
7321	INCR(_C_LABEL(nswitchexit))		! nswitchexit++;
7322	INCR(_C_LABEL(uvmexp)+V_SWTCH)		! cnt.v_switch++;
7323
7324	mov	CTX_SECONDARY, %o0
7325	sethi	%hi(_C_LABEL(sched_whichqs)), %l2
7326	sethi	%hi(CPCB), %l6
7327	sethi	%hi(CURPROC), %l7
7328	ldxa	[%o0] ASI_DMMU, %l1		! Don't demap the kernel
7329	LDPTR	[%l6 + %lo(CPCB)], %l5
7330	clr	%l4				! lastproc = NULL;
7331	brz,pn	%l1, 1f
7332#ifdef SPITFIRE
7333	 set	DEMAP_CTX_SECONDARY, %l1	! Demap secondary context
7334	stxa	%g1, [%l1] ASI_DMMU_DEMAP
7335	stxa	%g1, [%l1] ASI_IMMU_DEMAP
7336	membar	#Sync
7337#else
7338	 mov	CTX_PRIMARY, %o0
7339	wrpr	%g0, 1, %tl
7340	stxa	%l1, [%o0] ASI_DMMU
7341	set	DEMAP_CTX_PRIMARY, %l1		! Demap secondary context
7342	stxa	%g1, [%l1] ASI_DMMU_DEMAP
7343	stxa	%g1, [%l1] ASI_IMMU_DEMAP
7344	membar	#Sync
7345	stxa	%g0, [%o0] ASI_DMMU
7346	membar	#Sync
7347	wrpr	%g0, 0, %tl
7348#endif
73491:
7350	stxa	%g0, [%o0] ASI_DMMU		! Clear out our context
7351	membar	#Sync
7352	/* FALLTHROUGH */
7353/*
7354 * When no processes are on the runq, switch
7355 * idles here waiting for something to come ready.
7356 * The registers are set up as noted above.
7357 */
7358ENTRY(idle)
7359#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
7360	call	_C_LABEL(sched_unlock_idle)	! Release sched_lock
7361#endif
7362	 STPTR	%g0, [%l7 + %lo(CURPROC)] ! curproc = NULL;
73631:					! spin reading _whichqs until nonzero
7364	wrpr	%g0, PSTATE_INTR, %pstate		! Make sure interrupts are enabled
7365	wrpr	%g0, 0, %pil		! (void) spl0();
7366#ifdef NOTDEF_DEBUG
7367	save	%sp, -CC64FSZ, %sp
7368	GLOBTOLOC
7369	set	idlemsg, %o0
7370	mov	%g1, %o1
7371	mov	%g2, %o2
7372	mov	%g3, %o3
7373	mov	%g5, %l5
7374	mov	%g6, %l6
7375	mov	%g7, %l7
7376	call	_C_LABEL(prom_printf)
7377	 mov	%g4, %o4
7378	set	idlemsg1, %o0
7379	mov	%l5, %o1
7380	mov	%l6, %o2
7381	call	_C_LABEL(prom_printf)
7382	 mov	%l7, %o3
7383	LOCTOGLOB
7384	restore
7385#endif
7386	ld	[%l2 + %lo(_C_LABEL(sched_whichqs))], %o3
7387	brnz,pt	%o3, notidle		! Something to run
7388	 nop
7389#ifdef UVM_PAGE_IDLE_ZERO
7390	! Check uvm.page_idle_zero
7391	sethi	%hi(_C_LABEL(uvm) + UVM_PAGE_IDLE_ZERO), %o3
7392	ld	[%o3 + %lo(_C_LABEL(uvm) + UVM_PAGE_IDLE_ZERO)], %o3
7393	brz,pn	%o3, 1b
7394	 nop
7395
7396	! zero some pages
7397	call	_C_LABEL(uvm_pageidlezero)
7398	 nop
7399#endif
7400	ba,a,pt	%xcc, 1b
7401	 nop				! spitfire bug
7402notidle:
7403	wrpr	%g0, PIL_SCHED, %pil	! (void) splhigh();
7404#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
7405	call	_C_LABEL(sched_lock_idle)	! Grab sched_lock
7406	 add	%o7, (Lsw_scan-.-4), %o7	! Return to Lsw_scan directly
7407#endif
7408	ba,a,pt	%xcc, Lsw_scan
7409	 nop				! spitfire bug
7410
7411Lsw_panic_rq:
7412	sethi	%hi(1f), %o0
7413	call	_C_LABEL(panic)
7414	 or	%lo(1f), %o0, %o0
7415Lsw_panic_wchan:
7416	sethi	%hi(2f), %o0
7417	call	_C_LABEL(panic)
7418	 or	%lo(2f), %o0, %o0
7419Lsw_panic_srun:
7420	sethi	%hi(3f), %o0
7421	call	_C_LABEL(panic)
7422	 or	%lo(3f), %o0, %o0
7423	.data
74241:	.asciz	"switch rq"
74252:	.asciz	"switch wchan"
74263:	.asciz	"switch SRUN"
7427idlemsg:	.asciz	"idle %x %x %x %x"
7428idlemsg1:	.asciz	" %x %x %x\r\n"
7429	_ALIGN
7430	.text
7431/*
7432 * cpu_switch() picks a process to run and runs it, saving the current
7433 * one away.  On the assumption that (since most workstations are
7434 * single user machines) the chances are quite good that the new
7435 * process will turn out to be the current process, we defer saving
7436 * it here until we have found someone to load.  If that someone
7437 * is the current process we avoid both store and load.
7438 *
7439 * cpu_switch() is always entered at splstatclock or splhigh.
7440 *
7441 * IT MIGHT BE WORTH SAVING BEFORE ENTERING idle TO AVOID HAVING TO
7442 * SAVE LATER WHEN SOMEONE ELSE IS READY ... MUST MEASURE!
7443 *
7444 * Apparently cpu_switch() is called with curproc as the first argument,
7445 * but no port seems to make use of that parameter.
7446 */
7447	.globl	_C_LABEL(time)
7448ENTRY(cpu_switch)
7449	save	%sp, -CC64FSZ, %sp
7450	/*
7451	 * REGISTER USAGE AT THIS POINT:
7452	 *	%l1 = tmp 0
7453	 *	%l2 = %hi(_C_LABEL(whichqs))
7454	 *	%l3 = p
7455	 *	%l4 = lastproc
7456	 *	%l5 = cpcb
7457	 *	%l6 = %hi(CPCB)
7458	 *	%l7 = %hi(CURPROC)
7459	 *	%o0 = tmp 1
7460	 *	%o1 = tmp 2
7461	 *	%o2 = tmp 3
7462	 *	%o3 = tmp 4, then at Lsw_scan, whichqs
7463	 *	%o4 = tmp 5, then at Lsw_scan, which
7464	 *	%o5 = tmp 6, then at Lsw_scan, q
7465	 */
7466#ifdef DEBUG
7467	set	swdebug, %o1
7468	ld	[%o1], %o1
7469	brz,pt	%o1, 2f
7470	 set	1f, %o0
7471	call	printf
7472	 nop
7473	.data
74741:	.asciz	"s"
7475	_ALIGN
7476	.globl	swdebug
7477swdebug:	.word 0
7478	.text
74792:
7480#endif
7481#ifdef NOTDEF_DEBUG
7482	set	_C_LABEL(intrdebug), %l1
7483	mov	INTRDEBUG_FUNC, %o1
7484	st	%o1, [%l1]
7485#endif
7486	flushw				! We don't have anything else to run, so why not flush
7487#ifdef DEBUG
7488	save	%sp, -CC64FSZ, %sp
7489	flushw
7490	restore
7491#endif
7492	rdpr	%pstate, %o1		! oldpstate = %pstate;
7493	wrpr	%g0, PSTATE_INTR, %pstate ! make sure we're on normal globals
7494	sethi	%hi(CPCB), %l6
7495	sethi	%hi(_C_LABEL(sched_whichqs)), %l2	! set up addr regs
7496	LDPTR	[%l6 + %lo(CPCB)], %l5
7497	sethi	%hi(CURPROC), %l7
7498	stx	%o7, [%l5 + PCB_PC]	! cpcb->pcb_pc = pc;
7499	LDPTR	[%l7 + %lo(CURPROC)], %l4	! lastproc = curproc;
7500	sth	%o1, [%l5 + PCB_PSTATE]	! cpcb->pcb_pstate = oldpstate;
7501
7502	STPTR	%g0, [%l7 + %lo(CURPROC)]	! curproc = NULL;
7503
7504Lsw_scan:
7505	ld	[%l2 + %lo(_C_LABEL(sched_whichqs))], %o3
7506
7507#ifndef POPC
7508	.globl	_C_LABEL(__ffstab)
7509	/*
7510	 * Optimized inline expansion of `which = ffs(whichqs) - 1';
7511	 * branches to idle if ffs(whichqs) was 0.
7512	 */
7513	set	_C_LABEL(__ffstab), %o2
7514	andcc	%o3, 0xff, %o1		! byte 0 zero?
7515	bz,a,pn	%icc, 1f		! yes, try byte 1
7516	 srl	%o3, 8, %o0
7517	ba,pt	%icc, 2f		! ffs = ffstab[byte0]; which = ffs - 1;
7518	 ldsb	[%o2 + %o1], %o0
75191:	andcc	%o0, 0xff, %o1		! byte 1 zero?
7520	bz,a,pn	%icc, 1f		! yes, try byte 2
7521	 srl	%o0, 8, %o0
7522	ldsb	[%o2 + %o1], %o0	! which = ffstab[byte1] + 7;
7523	ba,pt	%icc, 3f
7524	 add	%o0, 7, %o4
75251:	andcc	%o0, 0xff, %o1		! byte 2 zero?
7526	bz,a,pn	%icc, 1f		! yes, try byte 3
7527	 srl	%o0, 8, %o0
7528	ldsb	[%o2 + %o1], %o0	! which = ffstab[byte2] + 15;
7529	ba,pt	%icc, 3f
7530	 add	%o0, 15, %o4
75311:	ldsb	[%o2 + %o0], %o0	! ffs = ffstab[byte3] + 24
7532	addcc	%o0, 24, %o0		! (note that ffstab[0] == -24)
7533	bz,pn	%icc, idle		! if answer was 0, go idle
7534	 EMPTY
75352:	sub	%o0, 1, %o4
75363:	/* end optimized inline expansion */
7537
7538#else
7539	/*
7540	 * Optimized inline expansion of `which = ffs(whichqs) - 1';
7541	 * branches to idle if ffs(whichqs) was 0.
7542	 *
7543	 * This version uses popc.
7544	 *
7545	 * XXXX spitfires and blackbirds don't implement popc.
7546	 *
7547	 */
7548	brz,pn	%o3, idle				! Don't bother if queues are empty
7549	 neg	%o3, %o1				! %o1 = -zz
7550	xnor	%o3, %o1, %o2				! %o2 = zz ^ ~ -zz
7551	popc	%o2, %o4				! which = popc(whichqs)
7552	dec	%o4					! which = ffs(whichqs) - 1
7553
7554#endif
7555	/*
7556	 * We found a nonempty run queue.  Take its first process.
7557	 */
7558	set	_C_LABEL(sched_qs), %o5	! q = &qs[which];
7559	sll	%o4, PTRSHFT+1, %o0
7560	add	%o0, %o5, %o5
7561	LDPTR	[%o5], %l3		! p = q->ph_link;
7562	cmp	%l3, %o5		! if (p == q)
7563	be,pn	%icc, Lsw_panic_rq	!	panic("switch rq");
7564	 EMPTY
7565	LDPTR	[%l3], %o0		! tmp0 = p->p_forw;
7566	STPTR	%o0, [%o5]		! q->ph_link = tmp0;
7567	STPTR	%o5, [%o0 + PTRSZ]	! tmp0->p_back = q;
7568	cmp	%o0, %o5		! if (tmp0 == q)
7569	bne	1f
7570	 EMPTY
7571	mov	1, %o1			!	whichqs &= ~(1 << which);
7572	sll	%o1, %o4, %o1
7573	andn	%o3, %o1, %o3
7574	st	%o3, [%l2 + %lo(_C_LABEL(sched_whichqs))]
75751:
7576	/*
7577	 * PHASE TWO: NEW REGISTER USAGE:
7578	 *	%l1 = newpcb
7579	 *	%l2 = newpstate
7580	 *	%l3 = p
7581	 *	%l4 = lastproc
7582	 *	%l5 = cpcb
7583	 *	%l6 = %hi(_cpcb)
7584	 *	%l7 = %hi(_curproc)
7585	 *	%o0 = tmp 1
7586	 *	%o1 = tmp 2
7587	 *	%o2 = tmp 3
7588	 *	%o3 = vm
7589	 *	%o4 = sswap
7590	 *	%o5 = <free>
7591	 */
7592
7593	/* firewalls */
7594	LDPTR	[%l3 + P_WCHAN], %o0	! if (p->p_wchan)
7595	brnz,pn	%o0, Lsw_panic_wchan	!	panic("switch wchan");
7596	 EMPTY
7597	ldsb	[%l3 + P_STAT], %o0	! if (p->p_stat != SRUN)
7598	cmp	%o0, SRUN
7599	bne	Lsw_panic_srun		!	panic("switch SRUN");
7600	 EMPTY
7601
7602	/*
7603	 * Committed to running process p.
7604	 * It may be the same as the one we were running before.
7605	 */
7606#if defined(MULTIPROCESSOR)
7607	/*
7608	 * XXXSMP
7609	 * p->p_cpu = curcpu();
7610	 */
7611#endif
7612	mov	SONPROC, %o0			! p->p_stat = SONPROC
7613	stb	%o0, [%l3 + P_STAT]
7614	sethi	%hi(_C_LABEL(want_resched)), %o0
7615	st	%g0, [%o0 + %lo(_C_LABEL(want_resched))]	! want_resched = 0;
7616	LDPTR	[%l3 + P_ADDR], %l1		! newpcb = p->p_addr;
7617	STPTR	%g0, [%l3 + PTRSZ]		! p->p_back = NULL;
7618#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
7619	/*
7620	 * Done mucking with the run queues, release the
7621	 * scheduler lock, but keep interrupts out.
7622	 */
7623	call	_C_LABEL(sched_unlock_idle)
7624#endif
7625	 STPTR	%l4, [%l7 + %lo(CURPROC)]	! restore old proc so we can save it
7626
7627	cmp	%l3, %l4			! p == lastproc?
7628	be,pt	%xcc, Lsw_sameproc		! yes, go return 0
7629	 nop
7630
7631	/*
7632	 * Not the old process.  Save the old process, if any;
7633	 * then load p.
7634	 */
7635#ifdef SCHED_DEBUG
7636	mov	%l4, %g1
7637	mov	%l3, %g2
7638	save	%sp, -CC64FSZ, %sp
7639	set	1f, %o0
7640	mov	%g1, %o1
7641	ld	[%o1+P_PID], %o2
7642	mov	%g2, %o3
7643	call	printf
7644	 ld	[%o3+P_PID], %o4
7645	ba	2f
7646	 restore
7647	.data
76481:	.asciz	"cpu_switch: %x(%d)->%x(%d)\r\n"
7649	_ALIGN
7650	.text
7651	Debugger();
76522:
7653#endif
7654	flushw				! DEBUG -- make sure we don't hold on to any garbage
7655	brz,pn	%l4, Lsw_load		! if no old process, go load
7656	 wrpr	%g0, PSTATE_KERN, %pstate
7657
7658	INCR(_C_LABEL(nswitchdiff))	! clobbers %o0,%o1,%o2
7659wb1:
7660	flushw				! save all register windows except this one
7661	stx	%i7, [%l5 + PCB_PC]	! Save rpc
7662	stx	%i6, [%l5 + PCB_SP]
7663	rdpr	%cwp, %o2		! Useless
7664	stb	%o2, [%l5 + PCB_CWP]
7665
7666	/*
7667	 * Load the new process.  To load, we must change stacks and
7668	 * alter cpcb and the window control registers, hence we must
7669	 * disable interrupts.
7670	 *
7671	 * We also must load up the `in' and `local' registers.
7672	 */
7673Lsw_load:
7674#ifdef SCHED_DEBUG
7675	save	%sp, -CC64FSZ, %sp
7676	GLOBTOLOC
7677	set	1f, %o0
7678	call	printf
7679	 nop
7680	LOCTOGLOB
7681	restore
7682	.data
76831:	.asciz	"cpu_switch: loading the new process:\r\n"
7684	_ALIGN
7685	.text
7686#endif
7687	/* set new cpcb */
7688	STPTR	%l3, [%l7 + %lo(CURPROC)]	! curproc = p;
7689	STPTR	%l1, [%l6 + %lo(CPCB)]	! cpcb = newpcb;
7690
7691#ifdef SCHED_DEBUG
7692	ldx	[%l1 + PCB_SP], %o0
7693	btst	1, %o0
7694	add	%o0, BIAS, %o1
7695	movnz	%icc, %o1, %o0
7696	brnz,pt	%o0, 2f
7697	 ldx	[%o0], %o0			! Force a fault if needed
7698	save	%sp, -CC64FSZ, %sp
7699	GLOBTOLOC
7700	set	1f, %o0
7701	call	printf
7702	 nop
7703	LOCTOGLOB
7704	restore
7705	ta 1
7706	.data
77071:	.asciz	"cpu_switch: NULL %sp\r\n"
7708	_ALIGN
7709	.text
77102:
7711#endif
7712	ldx	[%l1 + PCB_SP], %i6
7713	ldx	[%l1 + PCB_PC], %i7
7714	wrpr	%g0, 0, %otherwin	! These two insns should be redundant
7715	wrpr	%g0, 0, %canrestore
7716	rdpr	%ver, %l7
7717	and	%l7, CWP, %l7
7718	wrpr	%g0, %l7, %cleanwin
7719!	wrpr	%g0, 0, %cleanwin	! DEBUG
7720	dec	1, %l7					! NWINDOWS-1-1
7721	wrpr	%l7, %cansave
7722#ifdef DEBUG
7723	wrpr	%g0, 4, %tl				! DEBUG -- force watchdog
7724	flushw						! DEBUG
7725	wrpr	%g0, 0, %tl				! DEBUG
7726	/* load window */
7727!	restore				! The logic is just too complicated to handle here.  Let the traps deal with the problem
7728!	flushw						! DEBUG
7729#endif
7730#ifdef SCHED_DEBUG
7731	mov	%fp, %i1
7732	save	%sp, -CC64FSZ, %sp
7733	GLOBTOLOC
7734	set	1f, %o0
7735	call	printf
7736	 mov	%i1, %o1
7737	LOCTOGLOB
7738	restore
7739	.data
77401:	.asciz	"cpu_switch: setup new process stack regs at %08x\r\n"
7741	_ALIGN
7742	.text
7743#endif
7744#ifdef DEBUG
7745	mov	%l1, %o0
7746	SET_SP_REDZONE(%o0, %o1)
7747	CHECK_SP_REDZONE(%o0, %o1)
7748#endif
7749	/* finally, enable traps */
7750	wrpr	%g0, PSTATE_INTR, %pstate
7751
7752	/*
7753	 * Now running p.  Make sure it has a context so that it
7754	 * can talk about user space stuff.  (Its pcb_uw is currently
7755	 * zero so it is safe to have interrupts going here.)
7756	 */
7757	LDPTR	[%l3 + P_VMSPACE], %o3	! vm = p->p_vmspace;
7758	sethi	%hi(_C_LABEL(kernel_pmap_)), %o1
7759	mov	CTX_SECONDARY, %l5		! Recycle %l5
7760	LDPTR	[%o3 + VM_PMAP], %o2		! if (vm->vm_pmap.pm_ctx != NULL)
7761	or	%o1, %lo(_C_LABEL(kernel_pmap_)), %o1
7762	cmp	%o2, %o1
7763	bz,pn	%xcc, Lsw_havectx		! Don't replace kernel context!
7764	 ld	[%o2 + PM_CTX], %o0
7765	brnz,pt	%o0, Lsw_havectx		!	goto havecontext;
7766	 nop
7767
7768	/* p does not have a context: call ctx_alloc to get one */
7769	call	_C_LABEL(ctx_alloc)		! ctx_alloc(&vm->vm_pmap);
7770	 mov	%o2, %o0
7771
7772#ifdef SPITFIRE
7773	set	DEMAP_CTX_SECONDARY, %o1	! This context has been recycled
7774	stxa	%o0, [%l5] ASI_DMMU		! so we need to invalidate
7775	membar	#Sync
7776	stxa	%o1, [%o1] ASI_DMMU_DEMAP	! whatever bits of it may
7777	stxa	%o1, [%o1] ASI_IMMU_DEMAP	! be left in the TLB
7778	membar	#Sync
7779#else
7780	wrpr	%g0, 1, %tl
7781	set	DEMAP_CTX_PRIMARY, %o1		! This context has been recycled
7782	stxa	%o0, [%l5] ASI_DMMU		! so we need to invalidate
7783	membar	#Sync
7784	stxa	%o1, [%o1] ASI_DMMU_DEMAP	! whatever bits of it may
7785	stxa	%o1, [%o1] ASI_IMMU_DEMAP	! be left in the TLB
7786	membar	#Sync
7787	stxa	%g0, [%l5] ASI_DMMU		! so we need to invalidate
7788	membar	#Sync
7789	wrpr	%g0, 0, %tl
7790#endif
7791#ifdef SCHED_DEBUG
7792	mov	%o0, %g1
7793	save	%sp, -CC64FSZ, %sp
7794	GLOBTOLOC
7795	set	1f, %o0
7796	call	printf
7797 	 mov	%g1, %o1
7798	LOCTOGLOB
7799	restore
7800	.data
78011:	.asciz	"cpu_switch: got new ctx %d in new process\r\n"
7802	_ALIGN
7803	.text
7804#endif
7805	/* p does have a context: just switch to it */
7806Lsw_havectx:
7807	! context is in %o0
7808	/*
7809	 * We probably need to flush the cache here.
7810	 */
7811	stxa	%o0, [%l5] ASI_DMMU		! Maybe we should invalidate the old context?
7812	membar	#Sync				! Maybe we should use flush here?
7813	flush	%sp
7814
7815#ifdef SCHED_DEBUG
7816	mov	%o0, %g1
7817	mov	%i7, %g1
7818	save	%sp, -CC64FSZ, %sp
7819	GLOBTOLOC
7820	set	1f, %o0
7821	mov	%g1, %o2
7822	call	printf
7823	 mov	%g2, %o1
7824	LOCTOGLOB
7825	restore
7826	.data
78271:	.asciz	"cpu_switch: in new process pc=%08x ctx %d\r\n"
7828	_ALIGN
7829	.text
7830#endif
7831#ifdef TRAPTRACE
7832	set	trap_trace, %o2
7833	lduw	[%o2+TRACEDIS], %o4
7834	brnz,pn	%o4, 1f
7835	 nop
7836	lduw	[%o2+TRACEPTR], %o3
7837	rdpr	%tl, %o4
7838	mov	4, %o5
7839	set	CURPROC, %o0
7840	sllx	%o4, 13, %o4
7841	LDPTR	[%o0], %o0
7842!	clr	%o0		! DISABLE PID
7843	or	%o4, %o5, %o4
7844	mov	%g0, %o5
7845	brz,pn	%o0, 2f
7846	 andncc	%o3, (TRACESIZ-1), %g0
7847!	ldsw	[%o0+P_PID], %o5	!  Load PID
78482:
7849	movnz	%icc, %g0, %o3	! Wrap if needed
7850
7851	set	CPCB, %o0	! Load up nsaved
7852	LDPTR	[%o0], %o0
7853	ldub	[%o0 + PCB_NSAVED], %o0
7854	sllx	%o0, 9, %o1
7855	or	%o1, %o4, %o4
7856
7857	sth	%o4, [%o2+%o3]
7858	inc	2, %o3
7859	sth	%o5, [%o2+%o3]
7860	inc	2, %o3
7861	stw	%o0, [%o2+%o3]
7862	inc	4, %o3
7863	stw	%sp, [%o2+%o3]
7864	inc	4, %o3
7865	stw	%o7, [%o2+%o3]
7866	inc	4, %o3
7867	mov	TLB_TAG_ACCESS, %o4
7868	ldxa	[%o4] ASI_DMMU, %o4
7869	stw	%o4, [%o2+%o3]
7870	inc	4, %o3
7871	stw	%o3, [%o2+TRACEPTR]
78721:
7873#endif
7874
7875
7876Lsw_sameproc:
7877	/*
7878	 * We are resuming the process that was running at the
7879	 * call to switch().  Just set psr ipl and return.
7880	 */
7881#ifdef SCHED_DEBUG
7882	mov	%l0, %o0		! XXXXX
7883	save	%sp, -CC64FSZ, %sp
7884	GLOBTOLOC
7885	set	1f, %o0
7886	mov	%i0, %o2
7887	set	CURPROC, %o3
7888	LDPTR	[%o3], %o3
7889	ld	[%o3 + P_VMSPACE], %o3
7890	call	printf
7891	 mov	%i7, %o1
7892#ifdef DEBUG
7893	set	swtchdelay, %o0
7894	call	delay
7895	 ld	[%o0], %o0
7896	set	pmapdebug, %o0
7897	ld	[%o0], %o0
7898	tst	%o0
7899	tnz	%icc, 1; nop	! Call debugger if we're in pmapdebug
7900#endif
7901	LOCTOGLOB
7902	ba	2f		! Skip debugger
7903	 restore
7904	.data
79051:	.asciz	"cpu_switch: vectoring to pc=%08x thru %08x vmspace=%p\r\n"
7906	_ALIGN
7907	.globl	swtchdelay
7908swtchdelay:
7909	.word	1000
7910	.text
7911	Debugger();
79122:
7913#endif
7914!	wrpr	%g0, 0, %cleanwin	! DEBUG
7915	clr	%g4		! This needs to point to the base of the data segment
7916	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
7917	wrpr	%g0, PSTATE_INTR, %pstate
7918	ret
7919	 restore
7920
7921/*
7922 * Snapshot the current process so that stack frames are up to date.
7923 * Only used just before a crash dump.
7924 */
7925ENTRY(snapshot)
7926	rdpr	%pstate, %o1		! save psr
7927	stx	%o6, [%o0 + PCB_SP]	! save sp
7928	rdpr	%pil, %o2
7929	sth	%o1, [%o0 + PCB_PSTATE]
7930	rdpr	%cwp, %o3
7931	stb	%o2, [%o0 + PCB_PIL]
7932	stb	%o3, [%o0 + PCB_CWP]
7933
7934	flushw
7935	save	%sp, -CC64FSZ, %sp
7936	flushw
7937	ret
7938	 restore
7939
7940/*
7941 * cpu_set_kpc() and cpu_fork() arrange for proc_trampoline() to run
7942 * after after a process gets chosen in switch(). The stack frame will
7943 * contain a function pointer in %l0, and an argument to pass to it in %l2.
7944 *
7945 * If the function *(%l0) returns, we arrange for an immediate return
7946 * to user mode. This happens in two known cases: after execve(2) of init,
7947 * and when returning a child to user mode after a fork(2).
7948 */
7949ENTRY(proc_trampoline)
7950#ifdef SCHED_DEBUG
7951	nop; nop; nop; nop				! Try to make sure we don't vector into the wrong instr
7952	mov	%l0, %o0
7953	save	%sp, -CC64FSZ, %sp
7954	set	1f, %o0
7955	mov	%i6, %o2
7956	call	printf
7957	 mov	%i0, %o1
7958	ba	2f
7959	 restore
7960	.data
79611:	.asciz	"proc_trampoline: calling %x sp %x\r\n"
7962	_ALIGN
7963	.text
7964	Debugger()
79652:
7966#endif
7967	wrpr	%g0, 0, %pil		! Reset interrupt level
7968	call	%l0			! re-use current frame
7969	 mov	%l1, %o0
7970
7971	/*
7972	 * Here we finish up as in syscall, but simplified.  We need to
7973	 * fiddle pc and npc a bit, as execve() / setregs() /cpu_set_kpc()
7974	 * have only set npc, in anticipation that trap.c will advance past
7975	 * the trap instruction; but we bypass that, so we must do it manually.
7976	 */
7977!	save	%sp, -CC64FSZ, %sp		! Save a kernel frame to emulate a syscall
7978#if 0
7979	/* This code doesn't seem to work, but it should. */
7980	ldx	[%sp + CC64FSZ + STKB + TF_TSTATE], %g1
7981	ldx	[%sp + CC64FSZ + STKB + TF_NPC], %g2	! pc = tf->tf_npc from execve/fork
7982	andn	%g1, CWP, %g1			! Clear the CWP bits
7983	add	%g2, 4, %g3			! npc = pc+4
7984	rdpr	%cwp, %g5			! Fixup %cwp in %tstate
7985	stx	%g3, [%sp + CC64FSZ + STKB + TF_NPC]
7986	or	%g1, %g5, %g1
7987	stx	%g2, [%sp + CC64FSZ + STKB + TF_PC]
7988	stx	%g1, [%sp + CC64FSZ + STKB + TF_TSTATE]
7989#else
7990	mov	PSTATE_USER, %g1		! XXXX user pstate (no need to load it)
7991	ldx	[%sp + CC64FSZ + STKB + TF_NPC], %g2	! pc = tf->tf_npc from execve/fork
7992	sllx	%g1, TSTATE_PSTATE_SHIFT, %g1	! Shift it into place
7993	add	%g2, 4, %g3			! npc = pc+4
7994	rdpr	%cwp, %g5			! Fixup %cwp in %tstate
7995	stx	%g3, [%sp + CC64FSZ + STKB + TF_NPC]
7996	or	%g1, %g5, %g1
7997	stx	%g2, [%sp + CC64FSZ + STKB + TF_PC]
7998	stx	%g1, [%sp + CC64FSZ + STKB + TF_TSTATE]
7999#endif
8000#ifdef SCHED_DEBUG
8001!	set	panicstack-CC64FSZ-STKB, %o0! DEBUG
8002!	save	%g0, %o0, %sp	! DEBUG
8003	save	%sp, -CC64FSZ, %sp
8004	set	1f, %o0
8005	ldx	[%fp + CC64FSZ + STKB + TF_O + ( 6*8)], %o2
8006	mov	%fp, %o2
8007	add	%fp, CC64FSZ + STKB, %o3
8008	GLOBTOLOC
8009	call	printf
8010	 mov	%g2, %o1
8011	LOCTOGLOB
8012	set	3f, %o0
8013	mov	%g1, %o1
8014	mov	%g2, %o2
8015	mov	CTX_SECONDARY, %o4
8016	ldxa	[%o4] ASI_DMMU, %o4
8017	call	printf
8018	 mov	%g3, %o3
8019	LOCTOGLOB
8020	ba 2f
8021	restore
8022	.data
80231:	.asciz	"proc_trampoline: returning to %p, sp=%p, tf=%p\r\n"
80243:	.asciz	"tstate=%p tpc=%p tnpc=%p ctx=%x\r\n"
8025	_ALIGN
8026	.text
8027	Debugger()
80282:
8029#endif
8030	CHKPT(%o3,%o4,0x35)
8031	ba,a,pt	%icc, return_from_trap
8032	 nop
8033
8034/*
8035 * {fu,su}{,i}{byte,word}
8036 */
8037ALTENTRY(fuiword)
8038ENTRY(fuword)
8039	btst	3, %o0			! has low bits set...
8040	bnz	Lfsbadaddr		!	go return -1
8041	EMPTY
8042	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = Lfserr;
8043	set	Lfserr, %o3
8044	LDPTR	[%o2 + %lo(CPCB)], %o2
8045	membar	#LoadStore
8046	STPTR	%o3, [%o2 + PCB_ONFAULT]
8047	membar	#Sync
8048	LDPTRA	[%o0] ASI_AIUS, %o0	! fetch the word
8049	membar	#Sync
8050	STPTR	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
8051	retl				! phew, made it, return the word
8052	 membar	#StoreStore|#StoreLoad
8053
8054Lfserr:
8055	STPTR	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
8056	membar	#StoreStore|#StoreLoad
8057Lfsbadaddr:
8058#ifndef _LP64
8059	mov	-1, %o1
8060#endif
8061	retl				! and return error indicator
8062	 mov	-1, %o0
8063
8064	/*
8065	 * This is just like Lfserr, but it's a global label that allows
8066	 * mem_access_fault() to check to see that we don't want to try to
8067	 * page in the fault.  It's used by fuswintr() etc.
8068	 */
8069	.globl	_C_LABEL(Lfsbail)
8070_C_LABEL(Lfsbail):
8071	STPTR	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
8072	membar	#StoreStore|#StoreLoad
8073	retl				! and return error indicator
8074	 mov	-1, %o0
8075
8076	/*
8077	 * Like fusword but callable from interrupt context.
8078	 * Fails if data isn't resident.
8079	 */
8080ENTRY(fuswintr)
8081	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = _Lfsbail;
8082	LDPTR	[%o2 + %lo(CPCB)], %o2
8083	set	_C_LABEL(Lfsbail), %o3
8084	STPTR	%o3, [%o2 + PCB_ONFAULT]
8085	membar	#Sync
8086	lduha	[%o0] ASI_AIUS, %o0	! fetch the halfword
8087	membar	#Sync
8088	STPTR	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
8089	retl				! made it
8090	 membar	#StoreStore|#StoreLoad
8091
8092ENTRY(fusword)
8093	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = Lfserr;
8094	LDPTR	[%o2 + %lo(CPCB)], %o2
8095	set	Lfserr, %o3
8096	STPTR	%o3, [%o2 + PCB_ONFAULT]
8097	membar	#Sync
8098	lduha	[%o0] ASI_AIUS, %o0		! fetch the halfword
8099	membar	#Sync
8100	STPTR	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
8101	retl				! made it
8102	 membar	#StoreStore|#StoreLoad
8103
8104ALTENTRY(fuibyte)
8105ENTRY(fubyte)
8106	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = Lfserr;
8107	LDPTR	[%o2 + %lo(CPCB)], %o2
8108	set	Lfserr, %o3
8109	STPTR	%o3, [%o2 + PCB_ONFAULT]
8110	membar	#Sync
8111	lduba	[%o0] ASI_AIUS, %o0	! fetch the byte
8112	membar	#Sync
8113	STPTR	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
8114	retl				! made it
8115	 membar	#StoreStore|#StoreLoad
8116
8117ALTENTRY(suiword)
8118ENTRY(suword)
8119	btst	3, %o0			! or has low bits set ...
8120	bnz	Lfsbadaddr		!	go return error
8121	EMPTY
8122	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = Lfserr;
8123	LDPTR	[%o2 + %lo(CPCB)], %o2
8124	set	Lfserr, %o3
8125	STPTR	%o3, [%o2 + PCB_ONFAULT]
8126	membar	#Sync
8127	STPTRA	%o1, [%o0] ASI_AIUS	! store the word
8128	membar	#Sync
8129	STPTR	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
8130	membar	#StoreStore|#StoreLoad
8131	retl				! and return 0
8132	 clr	%o0
8133
8134ENTRY(suswintr)
8135	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = _Lfsbail;
8136	LDPTR	[%o2 + %lo(CPCB)], %o2
8137	set	_C_LABEL(Lfsbail), %o3
8138	STPTR	%o3, [%o2 + PCB_ONFAULT]
8139	membar	#Sync
8140	stha	%o1, [%o0] ASI_AIUS	! store the halfword
8141	membar	#Sync
8142	STPTR	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
8143	membar	#StoreStore|#StoreLoad
8144	retl				! and return 0
8145	 clr	%o0
8146
8147ENTRY(susword)
8148	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = Lfserr;
8149	LDPTR	[%o2 + %lo(CPCB)], %o2
8150	set	Lfserr, %o3
8151	STPTR	%o3, [%o2 + PCB_ONFAULT]
8152	membar	#Sync
8153	stha	%o1, [%o0] ASI_AIUS	! store the halfword
8154	membar	#Sync
8155	STPTR	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
8156	membar	#StoreStore|#StoreLoad
8157	retl				! and return 0
8158	 clr	%o0
8159
8160ALTENTRY(suibyte)
8161ENTRY(subyte)
8162	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = Lfserr;
8163	LDPTR	[%o2 + %lo(CPCB)], %o2
8164	set	Lfserr, %o3
8165	STPTR	%o3, [%o2 + PCB_ONFAULT]
8166	membar	#Sync
8167	stba	%o1, [%o0] ASI_AIUS	! store the byte
8168	membar	#Sync
8169	STPTR	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
8170	membar	#StoreStore|#StoreLoad
8171	retl				! and return 0
8172	 clr	%o0
8173
8174/* probeget and probeset are meant to be used during autoconfiguration */
8175/*
8176 * The following probably need to be changed, but to what I don't know.
8177 */
8178
8179/*
8180 * u_int64_t
8181 * probeget(addr, asi, size)
8182 *	paddr_t addr;
8183 *	int asi;
8184 *	int size;
8185 *
8186 * Read or write a (byte,word,longword) from the given address.
8187 * Like {fu,su}{byte,halfword,word} but our caller is supposed
8188 * to know what he is doing... the address can be anywhere.
8189 *
8190 * We optimize for space, rather than time, here.
8191 */
8192ENTRY(probeget)
8193#ifndef _LP64
8194	!! Shuffle the args around into LP64 format
8195	COMBINE(%o0, %o1, %o0)
8196	mov	%o2, %o1
8197	mov	%o3, %o2
8198#endif
8199	mov	%o2, %o4
8200	! %o0 = addr, %o1 = asi, %o4 = (1,2,4)
8201	sethi	%hi(CPCB), %o2
8202	LDPTR	[%o2 + %lo(CPCB)], %o2	! cpcb->pcb_onfault = Lfserr;
8203#ifdef _LP64
8204	set	_C_LABEL(Lfsbail), %o5
8205#else
8206	set	_C_LABEL(Lfsprobe), %o5
8207#endif
8208	STPTR	%o5, [%o2 + PCB_ONFAULT]
8209	or	%o0, 0x9, %o3		! if (PHYS_ASI(asi)) {
8210	sub	%o3, 0x1d, %o3
8211	brz,a	%o3, 0f
8212	 mov	%g0, %o5
8213	DLFLUSH(%o0,%o5)		!	flush cache line
8214					! }
82150:
8216#ifndef _LP64
8217	rdpr	%pstate, %g1
8218	wrpr	%g1, PSTATE_AM, %pstate
8219#endif
8220	btst	1, %o4
8221	wr	%o1, 0, %asi
8222	membar	#Sync
8223	bz	0f			! if (len & 1)
8224	 btst	2, %o4
8225	ba,pt	%icc, 1f
8226	 lduba	[%o0] %asi, %o0		!	value = *(char *)addr;
82270:
8228	bz	0f			! if (len & 2)
8229	 btst	4, %o4
8230	ba,pt	%icc, 1f
8231	 lduha	[%o0] %asi, %o0		!	value = *(short *)addr;
82320:
8233	bz	0f			! if (len & 4)
8234	 btst	8, %o4
8235	ba,pt	%icc, 1f
8236	 lda	[%o0] %asi, %o0		!	value = *(int *)addr;
82370:
8238	ldxa	[%o0] %asi, %o0		!	value = *(long *)addr;
82391:
8240#ifndef _LP64
8241	SPLIT(%o0, %o1)
8242#endif
8243	membar	#Sync
8244#ifndef _LP64
8245	wrpr	%g1, 0, %pstate
8246#endif
8247	brz	%o5, 1f			! if (cache flush addr != 0)
8248	 nop
8249	DLFLUSH2(%o5)			!	flush cache line again
82501:
8251	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
8252	STPTR	%g0, [%o2 + PCB_ONFAULT]
8253	retl				! made it, clear onfault and return
8254	 membar	#StoreStore|#StoreLoad
8255
8256	/*
8257	 * Fault handler for probeget
8258	 */
8259_C_LABEL(Lfsprobe):
8260#ifndef _LP64
8261	wrpr	%g1, 0, %pstate
8262#endif
8263	STPTR	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
8264	mov	-1, %o1
8265	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
8266	membar	#StoreStore|#StoreLoad
8267	retl				! and return error indicator
8268	 mov	-1, %o0
8269
8270/*
8271 * probeset(addr, asi, size, val)
8272 *	paddr_t addr;
8273 *	int asi;
8274 *	int size;
8275 *	long val;
8276 *
8277 * As above, but we return 0 on success.
8278 */
8279ENTRY(probeset)
8280#ifndef _LP64
8281	!! Shuffle the args around into LP64 format
8282	COMBINE(%o0, %o1, %o0)
8283	mov	%o2, %o1
8284	mov	%o3, %o2
8285	COMBINE(%o4, %o5, %o3)
8286#endif
8287	mov	%o2, %o4
8288	! %o0 = addr, %o1 = asi, %o4 = (1,2,4), %o3 = val
8289	sethi	%hi(CPCB), %o2		! Lfserr requires CPCB in %o2
8290	LDPTR	[%o2 + %lo(CPCB)], %o2	! cpcb->pcb_onfault = Lfserr;
8291	set	_C_LABEL(Lfsbail), %o5
8292	STPTR	%o5, [%o2 + PCB_ONFAULT]
8293	btst	1, %o4
8294	wr	%o1, 0, %asi
8295	membar	#Sync
8296	bz	0f			! if (len & 1)
8297	 btst	2, %o4
8298	ba,pt	%icc, 1f
8299	 stba	%o3, [%o0] %asi		!	*(char *)addr = value;
83000:
8301	bz	0f			! if (len & 2)
8302	 btst	4, %o4
8303	ba,pt	%icc, 1f
8304	 stha	%o3, [%o0] %asi		!	*(short *)addr = value;
83050:
8306	bz	0f			! if (len & 4)
8307	 btst	8, %o4
8308	ba,pt	%icc, 1f
8309	 sta	%o3, [%o0] %asi		!	*(int *)addr = value;
83100:
8311	bz	Lfserr			! if (len & 8)
8312	ba,pt	%icc, 1f
8313	 sta	%o3, [%o0] %asi		!	*(int *)addr = value;
83141:	membar	#Sync
8315	clr	%o0			! made it, clear onfault and return 0
8316	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
8317	STPTR	%g0, [%o2 + PCB_ONFAULT]
8318	retl
8319	 membar	#StoreStore|#StoreLoad
8320
8321/*
8322 * pmap_zero_page(pa)
8323 *
8324 * Zero one page physically addressed
8325 *
8326 * Block load/store ASIs do not exist for physical addresses,
8327 * so we won't use them.
8328 *
8329 * While we do the zero operation, we also need to blast away
8330 * the contents of the D$.  We will execute a flush at the end
8331 * to sync the I$.
8332 */
8333	.data
8334paginuse:
8335	.word	0
8336	.text
8337ENTRY(pmap_zero_page)
8338	!!
8339	!! If we have 64-bit physical addresses (and we do now)
8340	!! we need to move the pointer from %o0:%o1 to %o0
8341	!!
8342#ifndef _LP64
8343#if PADDRT == 8
8344	COMBINE(%o0, %o1, %o0)
8345#endif
8346#endif
8347#ifdef DEBUG
8348	set	pmapdebug, %o4
8349	ld	[%o4], %o4
8350	btst	0x80, %o4	! PDB_COPY
8351	bz,pt	%icc, 3f
8352	 nop
8353	save	%sp, -CC64FSZ, %sp
8354	set	2f, %o0
8355	call	printf
8356	 mov	%i0, %o1
8357!	ta	1; nop
8358	restore
8359	.data
83602:	.asciz	"pmap_zero_page(%p)\n"
8361	_ALIGN
8362	.text
83633:
8364#endif
8365	set	NBPG, %o2		! Loop count
8366	wr	%g0, ASI_PHYS_CACHED, %asi
83671:
8368	/* Unroll the loop 8 times */
8369	stxa	%g0, [%o0 + 0x00] %asi
8370	deccc	0x40, %o2
8371	stxa	%g0, [%o0 + 0x08] %asi
8372	stxa	%g0, [%o0 + 0x10] %asi
8373	stxa	%g0, [%o0 + 0x18] %asi
8374	stxa	%g0, [%o0 + 0x20] %asi
8375	stxa	%g0, [%o0 + 0x28] %asi
8376	stxa	%g0, [%o0 + 0x30] %asi
8377	stxa	%g0, [%o0 + 0x38] %asi
8378	bg,pt	%icc, 1b
8379	 inc	0x40, %o0
8380
8381	sethi	%hi(KERNBASE), %o3
8382	flush	%o3
8383	retl
8384	 wr	%g0, ASI_PRIMARY_NOFAULT, %asi	! Make C code happy
8385/*
8386 * pmap_copy_page(src, dst)
8387 *
8388 * Copy one page physically addressed
8389 * We need to use a global reg for ldxa/stxa
8390 * so the top 32-bits cannot be lost if we take
8391 * a trap and need to save our stack frame to a
8392 * 32-bit stack.  We will unroll the loop by 8 to
8393 * improve performance.
8394 *
8395 * We also need to blast the D$ and flush like
8396 * pmap_zero_page.
8397 */
8398ENTRY(pmap_copy_page)
8399	!!
8400	!! If we have 64-bit physical addresses (and we do now)
8401	!! we need to move the pointer from %o0:%o1 to %o0 and
8402	!! %o2:%o3 to %o1
8403	!!
8404#ifndef _LP64
8405#if PADDRT == 8
8406	COMBINE(%o0, %o1, %o0)
8407	COMBINE(%o2, %o3, %o1)
8408#endif
8409#endif
8410#ifdef DEBUG
8411	set	pmapdebug, %o4
8412	ld	[%o4], %o4
8413	btst	0x80, %o4	! PDB_COPY
8414	bz,pt	%icc, 3f
8415	 nop
8416	save	%sp, -CC64FSZ, %sp
8417	mov	%i0, %o1
8418	set	2f, %o0
8419	call	printf
8420	 mov	%i1, %o2
8421!	ta	1; nop
8422	restore
8423	.data
84242:	.asciz	"pmap_copy_page(%p,%p)\n"
8425	_ALIGN
8426	.text
84273:
8428#endif
8429#if 1
8430	/*
8431	 * XXXX
8432	 * We will make use of all global regs.  This may cause problems
8433	 * if we ever decide to store static data in a global reg, like
8434	 * a pointer to curcpu or something.
8435	 */
8436	set	NBPG, %o2
8437	wr	%g0, ASI_PHYS_CACHED, %asi
84381:
8439	ldxa	[%o0 + 0x00] %asi, %g1
8440	ldxa	[%o0 + 0x08] %asi, %o3
8441	ldxa	[%o0 + 0x10] %asi, %o4
8442	ldxa	[%o0 + 0x18] %asi, %o5
8443	inc	0x20, %o0
8444	deccc	0x20, %o2
8445	stxa	%g1, [%o1 + 0x00] %asi
8446	stxa	%o3, [%o1 + 0x08] %asi
8447	stxa	%o4, [%o1 + 0x10] %asi
8448	stxa	%o5, [%o1 + 0x18] %asi
8449	bg,pt	%icc, 1b		! We don't care about pages >4GB
8450	 inc	0x20, %o1
8451	wr	%g0, ASI_PRIMARY_NOFAULT, %asi
8452	retl
8453	 clr	%g4			! Restore g4
8454#else
8455	set	NBPG, %o3
8456	add	%o3, %o0, %o3
8457	mov	%g1, %o4		! Save g1
84581:
8459	ldxa	[%o0] ASI_PHYS_CACHED, %g1
8460	inc	8, %o0
8461	cmp	%o0, %o3
8462	stxa	%g1, [%o1] ASI_PHYS_CACHED
8463	bl,pt	%icc, 1b		! We don't care about pages >4GB
8464	 inc	8, %o1
8465	retl
8466	 mov	%o4, %g1		! Restore g1
8467#endif
8468/*
8469 * extern int64_t pseg_get(struct pmap* %o0, vaddr_t addr %o1);
8470 *
8471 * Return TTE at addr in pmap.  Uses physical addressing only.
8472 * pmap->pm_physaddr must by the physical address of pm_segs
8473 *
8474 */
8475ENTRY(pseg_get)
8476!	flushw			! Make sure we don't have stack probs & lose hibits of %o
8477	ldx	[%o0 + PM_PHYS], %o2			! pmap->pm_segs
8478
8479	srax	%o1, HOLESHIFT, %o3			! Check for valid address
8480	brz,pt	%o3, 0f					! Should be zero or -1
8481	 inc	%o3					! Make -1 -> 0
8482	brnz,pn	%o3, 1f					! Error! In hole!
84830:
8484	srlx	%o1, STSHIFT, %o3
8485	and	%o3, STMASK, %o3			! Index into pm_segs
8486	sll	%o3, 3, %o3
8487	add	%o2, %o3, %o2
8488	DLFLUSH(%o2,%o3)
8489	ldxa	[%o2] ASI_PHYS_CACHED, %o2		! Load page directory pointer
8490	DLFLUSH2(%o3)
8491
8492	srlx	%o1, PDSHIFT, %o3
8493	and	%o3, PDMASK, %o3
8494	sll	%o3, 3, %o3
8495	brz,pn	%o2, 1f					! NULL entry? check somewhere else
8496	 add	%o2, %o3, %o2
8497	DLFLUSH(%o2,%o3)
8498	ldxa	[%o2] ASI_PHYS_CACHED, %o2		! Load page table pointer
8499	DLFLUSH2(%o3)
8500
8501	srlx	%o1, PTSHIFT, %o3			! Convert to ptab offset
8502	and	%o3, PTMASK, %o3
8503	sll	%o3, 3, %o3
8504	brz,pn	%o2, 1f					! NULL entry? check somewhere else
8505	 add	%o2, %o3, %o2
8506	DLFLUSH(%o2,%o3)
8507	ldxa	[%o2] ASI_PHYS_CACHED, %o0
8508	DLFLUSH2(%o3)
8509	brgez,pn %o0, 1f				! Entry invalid?  Punt
8510	 btst	1, %sp
8511	bz,pn	%icc, 0f				! 64-bit mode?
8512	 nop
8513	retl						! Yes, return full value
8514	 nop
85150:
8516#if 1
8517	srl	%o0, 0, %o1
8518	retl						! No, generate a %o0:%o1 double
8519	 srlx	%o0, 32, %o0
8520#else
8521	DLFLUSH(%o2,%o3)
8522	ldda	[%o2] ASI_PHYS_CACHED, %o0
8523	DLFLUSH2(%o3)
8524	retl						! No, generate a %o0:%o1 double
8525	 nop
8526#endif
85271:
8528	clr	%o1
8529	retl
8530	 clr	%o0
8531
8532/*
8533 * In 32-bit mode:
8534 *
8535 * extern int pseg_set(struct pmap* %o0, vaddr_t addr %o1, int64_t tte %o2:%o3,
8536 *			 paddr_t spare %o4:%o5);
8537 *
8538 * In 64-bit mode:
8539 *
8540 * extern int pseg_set(struct pmap* %o0, vaddr_t addr %o1, int64_t tte %o2,
8541 *			paddr_t spare %o3);
8542 *
8543 * Set a pseg entry to a particular TTE value.  Returns 0 on success,
8544 * 1 if it needs to fill a pseg, 2 if it succeeded but didn't need the
8545 * spare page, and -1 if the address is in the virtual hole.
8546 * (NB: nobody in pmap checks for the virtual hole, so the system will hang.)
8547 * Allocate a page, pass the phys addr in as the spare, and try again.
8548 * If spare is not NULL it is assumed to be the address of a zeroed physical
8549 * page that can be used to generate a directory table or page table if needed.
8550 *
8551 */
8552ENTRY(pseg_set)
8553#ifndef _LP64
8554	btst	1, %sp					! 64-bit mode?
8555	bnz,pt	%icc, 0f
8556	 sllx	%o4, 32, %o4				! Put args into 64-bit format
8557
8558	sllx	%o2, 32, %o2				! Shift to high 32-bits
8559	sll	%o3, 0, %o3				! Zero extend
8560	sll	%o5, 0, %o5
8561	sll	%o1, 0, %o1
8562	or	%o2, %o3, %o2
8563	or	%o4, %o5, %o3
85640:
8565#endif
8566#ifdef NOT_DEBUG
8567	!! Trap any changes to pmap_kernel below 0xf0000000
8568	set	_C_LABEL(kernel_pmap_), %o5
8569	cmp	%o0, %o5
8570	bne	0f
8571	 sethi	%hi(0xf0000000), %o5
8572	cmp	%o1, %o5
8573	tlu	1
85740:
8575#endif
8576	!!
8577	!! However we managed to get here we now have:
8578	!!
8579	!! %o0 = *pmap
8580	!! %o1 = addr
8581	!! %o2 = tte
8582	!! %o3 = spare
8583	!!
8584	srax	%o1, HOLESHIFT, %o4			! Check for valid address
8585	brz,pt	%o4, 0f					! Should be zero or -1
8586	 inc	%o4					! Make -1 -> 0
8587	brz,pt	%o4, 0f
8588	 nop
8589#ifdef DEBUG
8590	ta	1					! Break into debugger
8591#endif
8592	mov	-1, %o0					! Error -- in hole!
8593	retl
8594	 mov	-1, %o1
85950:
8596	ldx	[%o0 + PM_PHYS], %o4			! pmap->pm_segs
8597	srlx	%o1, STSHIFT, %o5
8598	and	%o5, STMASK, %o5
8599	sll	%o5, 3, %o5
8600	add	%o4, %o5, %o4
86012:
8602	DLFLUSH(%o4,%g1)
8603	ldxa	[%o4] ASI_PHYS_CACHED, %o5		! Load page directory pointer
8604	DLFLUSH2(%g1)
8605
8606	brnz,a,pt	%o5, 0f				! Null pointer?
8607	 mov	%o5, %o4
8608	brz,pn	%o3, 1f					! Have a spare?
8609	 mov	%o3, %o5
8610	casxa	[%o4] ASI_PHYS_CACHED, %g0, %o5
8611	brnz,pn	%o5, 2b					! Something changed?
8612	DLFLUSH(%o4, %o5)
8613	mov	%o3, %o4
8614	clr	%o3					! Mark spare as used
86150:
8616	srlx	%o1, PDSHIFT, %o5
8617	and	%o5, PDMASK, %o5
8618	sll	%o5, 3, %o5
8619	add	%o4, %o5, %o4
86202:
8621	DLFLUSH(%o4,%g1)
8622	ldxa	[%o4] ASI_PHYS_CACHED, %o5		! Load table directory pointer
8623	DLFLUSH2(%g1)
8624
8625	brnz,a,pt	%o5, 0f				! Null pointer?
8626	 mov	%o5, %o4
8627	brz,pn	%o3, 1f					! Have a spare?
8628	 mov	%o3, %o5
8629	casxa	[%o4] ASI_PHYS_CACHED, %g0, %o5
8630	brnz,pn	%o5, 2b					! Something changed?
8631	DLFLUSH(%o4, %o4)
8632	mov	%o3, %o4
8633	clr	%o3					! Mark spare as used
86340:
8635	srlx	%o1, PTSHIFT, %o5			! Convert to ptab offset
8636	and	%o5, PTMASK, %o5
8637	sll	%o5, 3, %o5
8638	add	%o5, %o4, %o4
8639	stxa	%o2, [%o4] ASI_PHYS_CACHED		! Easier than shift+or
8640	DLFLUSH(%o4, %o4)
8641#ifdef PARANOID
8642	!! Try pseg_get to verify we did this right
8643	mov	%o7, %o4
8644	call	pseg_get
8645	 mov	%o2, %o5
8646#ifndef _LP64
8647	COMBINE(%o0, %o1, %o0)
8648#endif
8649	cmp	%o0, %o5
8650	tne	1
8651	mov	%o4, %o7
8652#endif
8653	mov	2, %o0					! spare unused?
8654	retl
8655	 movrz	%o3, %g0, %o0				! No. return 0
86561:
8657	retl
8658	 mov	1, %o0
8659
8660/*
8661 * In 32-bit mode:
8662 *
8663 * extern void pseg_find(struct pmap* %o0, vaddr_t addr %o1,
8664 *			 paddr_t spare %o2:%o3);
8665 *
8666 * In 64-bit mode:
8667 *
8668 * extern void pseg_find(struct pmap* %o0, vaddr_t addr %o1, paddr_t spare %o2);
8669 *
8670 * Get the paddr for a particular TTE entry.  Returns the TTE's PA on success,
8671 * 1 if it needs to fill a pseg, and -1 if the address is in the virtual hole.
8672 * (NB: nobody in pmap checks for the virtual hole, so the system will hang.)
8673 *  Allocate a page, pass the phys addr in as the spare, and try again.
8674 * If spare is not NULL it is assumed to be the address of a zeroed physical
8675 * page that can be used to generate a directory table or page table if needed.
8676 *
8677 */
8678ENTRY(pseg_find)
8679#ifndef _LP64
8680	btst	1, %sp					! 64-bit mode?
8681	bnz,pt	%icc, 0f
8682	 sllx	%o2, 32, %o2				! Shift to high 32-bits
8683	sll	%o3, 0, %o3				! Zero extend
8684	sll	%o1, 0, %o1
8685	or	%o2, %o3, %o2
86860:
8687#endif
8688#ifdef NOT_DEBUG
8689	!! Trap any changes to pmap_kernel below 0xf0000000
8690	set	_C_LABEL(kernel_pmap_), %o5
8691	cmp	%o0, %o5
8692	bne	0f
8693	 sethi	%hi(0xf0000000), %o5
8694	cmp	%o1, %o5
8695	tlu	1
86960:
8697#endif
8698	!!
8699	!! However we managed to get here we now have:
8700	!!
8701	!! %o0 = *pmap
8702	!! %o1 = addr
8703	!! %o2 = spare
8704	!!
8705	srax	%o1, HOLESHIFT, %o4			! Check for valid address
8706	brz,pt	%o4, 0f					! Should be zero or -1
8707	 inc	%o4					! Make -1 -> 0
8708	brz,pt	%o4, 0f
8709	 nop
8710#ifdef DEBUG
8711	ta	1					! Break into debugger
8712#endif
8713	mov	-1, %o0					! Error -- in hole!
8714	retl
8715	 mov	-1, %o1
87160:
8717	ldx	[%o0 + PM_PHYS], %o4			! pmap->pm_segs
8718	srlx	%o1, STSHIFT, %o5
8719	and	%o5, STMASK, %o5
8720	sll	%o5, 3, %o5
8721	add	%o4, %o5, %o4
87222:
8723	DLFLUSH(%o4,%o3)
8724	ldxa	[%o4] ASI_PHYS_CACHED, %o5		! Load page directory pointer
8725	DLFLUSH2(%o3)
8726
8727	brnz,a,pt	%o5, 0f				! Null pointer?
8728	 mov	%o5, %o4
8729	brz,pn	%o2, 1f					! Have a spare?
8730	 mov	%o2, %o5
8731	casxa	[%o4] ASI_PHYS_CACHED, %g0, %o5
8732	brnz,pn	%o5, 2b					! Something changed?
8733	DLFLUSH(%o4, %o5)
8734	mov	%o2, %o4
8735	clr	%o2					! Mark spare as used
87360:
8737	srlx	%o1, PDSHIFT, %o5
8738	and	%o5, PDMASK, %o5
8739	sll	%o5, 3, %o5
8740	add	%o4, %o5, %o4
87412:
8742	DLFLUSH(%o4,%o3)
8743	ldxa	[%o4] ASI_PHYS_CACHED, %o5		! Load table directory pointer
8744	DLFLUSH2(%o3)
8745
8746	brnz,a,pt	%o5, 0f				! Null pointer?
8747	 mov	%o5, %o4
8748	brz,pn	%o2, 1f					! Have a spare?
8749	 mov	%o2, %o5
8750	casxa	[%o4] ASI_PHYS_CACHED, %g0, %o5
8751	brnz,pn	%o5, 2b					! Something changed?
8752	DLFLUSH(%o4, %o4)
8753	mov	%o2, %o4
8754	clr	%o2					! Mark spare as used
87550:
8756	srlx	%o1, PTSHIFT, %o5			! Convert to ptab offset
8757	btst	1, %sp
8758	and	%o5, PTMASK, %o5
8759	sll	%o5, 3, %o5
8760	bz,pn	%icc, 0f				! 64-bit mode?
8761	 add	%o5, %o4, %o0
8762	retl
8763	 clr	%o0
87640:
8765	srl	%o0, 0, %o1
8766	retl						! No, generate a %o0:%o1 double
8767	 srlx	%o0, 32, %o0
8768
87691:
8770	retl
8771	 mov	1, %o0
8772
8773
8774/*
8775 * Use block_disable to turn off block insns for
8776 * bcopy/memset
8777 */
8778	.data
8779	.align	8
8780	.globl	block_disable
8781block_disable:	.xword	1
8782	.text
8783
8784#if 0
8785#define ASI_STORE	ASI_BLK_COMMIT_P
8786#else
8787#define ASI_STORE	ASI_BLK_P
8788#endif
8789
8790#if 1
8791/*
8792 * kernel bcopy/memcpy
8793 * Assumes regions do not overlap; has no useful return value.
8794 *
8795 * Must not use %g7 (see copyin/copyout above).
8796 */
8797ENTRY(memcpy) /* dest, src, size */
8798	/*
8799	 * Swap args for bcopy.  Gcc generates calls to memcpy for
8800	 * structure assignments.
8801	 */
8802	mov	%o0, %o3
8803	mov	%o1, %o0
8804	mov	%o3, %o1
8805#endif
8806ENTRY(bcopy) /* src, dest, size */
8807#ifdef DEBUG
8808	set	pmapdebug, %o4
8809	ld	[%o4], %o4
8810	btst	0x80, %o4	! PDB_COPY
8811	bz,pt	%icc, 3f
8812	 nop
8813	save	%sp, -CC64FSZ, %sp
8814	mov	%i0, %o1
8815	set	2f, %o0
8816	mov	%i1, %o2
8817	call	printf
8818	 mov	%i2, %o3
8819!	ta	1; nop
8820	restore
8821	.data
88222:	.asciz	"bcopy(%p->%p,%x)\n"
8823	_ALIGN
8824	.text
88253:
8826#endif
8827	/*
8828	 * Check for overlaps and punt.
8829	 *
8830	 * If src <= dest <= src+len we have a problem.
8831	 */
8832
8833	sub	%o1, %o0, %o3
8834
8835	cmp	%o3, %o2
8836	blu,pn	CCCR, Lovbcopy
8837	 cmp	%o2, BCOPY_SMALL
8838Lbcopy_start:
8839	bge,pt	CCCR, 2f	! if >= this many, go be fancy.
8840	 cmp	%o2, 256
8841
8842	mov	%o1, %o5	! Save memcpy return value
8843	/*
8844	 * Not much to copy, just do it a byte at a time.
8845	 */
8846	deccc	%o2		! while (--len >= 0)
8847	bl	1f
8848	 EMPTY
88490:
8850	inc	%o0
8851	ldsb	[%o0 - 1], %o4	!	(++dst)[-1] = *src++;
8852	stb	%o4, [%o1]
8853	deccc	%o2
8854	bge	0b
8855	 inc	%o1
88561:
8857	retl
8858	 mov	%o5, %o0
8859	NOTREACHED
8860
8861	/*
8862	 * Overlapping bcopies -- punt.
8863	 */
8864Lovbcopy:
8865
8866	/*
8867	 * Since src comes before dst, and the regions might overlap,
8868	 * we have to do the copy starting at the end and working backwards.
8869	 *
8870	 * We could optimize this, but it almost never happens.
8871	 */
8872	mov	%o1, %o5	! Retval
8873	add	%o2, %o0, %o0	! src += len
8874	add	%o2, %o1, %o1	! dst += len
8875
8876	deccc	%o2
8877	bl,pn	CCCR, 1f
8878	 dec	%o0
88790:
8880	dec	%o1
8881	ldsb	[%o0], %o4
8882	dec	%o0
8883
8884	deccc	%o2
8885	bge,pt	CCCR, 0b
8886	 stb	%o4, [%o1]
88871:
8888	retl
8889	 mov	%o5, %o0
8890
8891	/*
8892	 * Plenty of data to copy, so try to do it optimally.
8893	 */
88942:
8895#if 1
8896	! If it is big enough, use VIS instructions
8897	bge	Lbcopy_block
8898	 nop
8899#endif
8900Lbcopy_fancy:
8901
8902	!!
8903	!! First align the output to a 8-byte entity
8904	!!
8905
8906	save	%sp, -CC64FSZ, %sp
8907
8908	mov	%i0, %l0
8909	mov	%i1, %l1
8910
8911	mov	%i2, %l2
8912	btst	1, %l1
8913
8914	bz,pt	%icc, 4f
8915	 btst	2, %l1
8916	ldub	[%l0], %l4				! Load 1st byte
8917
8918	deccc	1, %l2
8919	ble,pn	CCCR, Lbcopy_finish			! XXXX
8920	 inc	1, %l0
8921
8922	stb	%l4, [%l1]				! Store 1st byte
8923	inc	1, %l1					! Update address
8924	btst	2, %l1
89254:
8926	bz,pt	%icc, 4f
8927
8928	 btst	1, %l0
8929	bz,a	1f
8930	 lduh	[%l0], %l4				! Load short
8931
8932	ldub	[%l0], %l4				! Load bytes
8933
8934	ldub	[%l0+1], %l3
8935	sllx	%l4, 8, %l4
8936	or	%l3, %l4, %l4
8937
89381:
8939	deccc	2, %l2
8940	ble,pn	CCCR, Lbcopy_finish			! XXXX
8941	 inc	2, %l0
8942	sth	%l4, [%l1]				! Store 1st short
8943
8944	inc	2, %l1
89454:
8946	btst	4, %l1
8947	bz,pt	CCCR, 4f
8948
8949	 btst	3, %l0
8950	bz,a,pt	CCCR, 1f
8951	 lduw	[%l0], %l4				! Load word -1
8952
8953	btst	1, %l0
8954	bz,a,pt	%icc, 2f
8955	 lduh	[%l0], %l4
8956
8957	ldub	[%l0], %l4
8958
8959	lduh	[%l0+1], %l3
8960	sllx	%l4, 16, %l4
8961	or	%l4, %l3, %l4
8962
8963	ldub	[%l0+3], %l3
8964	sllx	%l4, 8, %l4
8965	ba,pt	%icc, 1f
8966	 or	%l4, %l3, %l4
8967
89682:
8969	lduh	[%l0+2], %l3
8970	sllx	%l4, 16, %l4
8971	or	%l4, %l3, %l4
8972
89731:
8974	deccc	4, %l2
8975	ble,pn	CCCR, Lbcopy_finish		! XXXX
8976	 inc	4, %l0
8977
8978	st	%l4, [%l1]				! Store word
8979	inc	4, %l1
89804:
8981	!!
8982	!! We are now 32-bit aligned in the dest.
8983	!!
8984Lbcopy_common:
8985
8986	and	%l0, 7, %l4				! Shift amount
8987	andn	%l0, 7, %l0				! Source addr
8988
8989	brz,pt	%l4, Lbcopy_noshift8			! No shift version...
8990
8991	 sllx	%l4, 3, %l4				! In bits
8992	mov	8<<3, %l3
8993
8994	ldx	[%l0], %o0				! Load word -1
8995	sub	%l3, %l4, %l3				! Reverse shift
8996	deccc	12*8, %l2				! Have enough room?
8997
8998	sllx	%o0, %l4, %o0
8999	bl,pn	CCCR, 2f
9000	 and	%l3, 0x38, %l3
9001Lbcopy_unrolled8:
9002
9003	/*
9004	 * This is about as close to optimal as you can get, since
9005	 * the shifts require EU0 and cannot be paired, and you have
9006	 * 3 dependent operations on the data.
9007	 */
9008
9009!	ldx	[%l0+0*8], %o0				! Already done
9010!	sllx	%o0, %l4, %o0				! Already done
9011	ldx	[%l0+1*8], %o1
9012	ldx	[%l0+2*8], %o2
9013	ldx	[%l0+3*8], %o3
9014	ldx	[%l0+4*8], %o4
9015	ba,pt	%icc, 1f
9016	 ldx	[%l0+5*8], %o5
9017	.align	8
90181:
9019	srlx	%o1, %l3, %g1
9020	inc	6*8, %l0
9021
9022	sllx	%o1, %l4, %o1
9023	or	%g1, %o0, %g6
9024	ldx	[%l0+0*8], %o0
9025
9026	stx	%g6, [%l1+0*8]
9027	srlx	%o2, %l3, %g1
9028
9029	sllx	%o2, %l4, %o2
9030	or	%g1, %o1, %g6
9031	ldx	[%l0+1*8], %o1
9032
9033	stx	%g6, [%l1+1*8]
9034	srlx	%o3, %l3, %g1
9035
9036	sllx	%o3, %l4, %o3
9037	or	%g1, %o2, %g6
9038	ldx	[%l0+2*8], %o2
9039
9040	stx	%g6, [%l1+2*8]
9041	srlx	%o4, %l3, %g1
9042
9043	sllx	%o4, %l4, %o4
9044	or	%g1, %o3, %g6
9045	ldx	[%l0+3*8], %o3
9046
9047	stx	%g6, [%l1+3*8]
9048	srlx	%o5, %l3, %g1
9049
9050	sllx	%o5, %l4, %o5
9051	or	%g1, %o4, %g6
9052	ldx	[%l0+4*8], %o4
9053
9054	stx	%g6, [%l1+4*8]
9055	srlx	%o0, %l3, %g1
9056	deccc	6*8, %l2				! Have enough room?
9057
9058	sllx	%o0, %l4, %o0				! Next loop
9059	or	%g1, %o5, %g6
9060	ldx	[%l0+5*8], %o5
9061
9062	stx	%g6, [%l1+5*8]
9063	bge,pt	CCCR, 1b
9064	 inc	6*8, %l1
9065
9066Lbcopy_unrolled8_cleanup:
9067	!!
9068	!! Finished 8 byte block, unload the regs.
9069	!!
9070	srlx	%o1, %l3, %g1
9071	inc	5*8, %l0
9072
9073	sllx	%o1, %l4, %o1
9074	or	%g1, %o0, %g6
9075
9076	stx	%g6, [%l1+0*8]
9077	srlx	%o2, %l3, %g1
9078
9079	sllx	%o2, %l4, %o2
9080	or	%g1, %o1, %g6
9081
9082	stx	%g6, [%l1+1*8]
9083	srlx	%o3, %l3, %g1
9084
9085	sllx	%o3, %l4, %o3
9086	or	%g1, %o2, %g6
9087
9088	stx	%g6, [%l1+2*8]
9089	srlx	%o4, %l3, %g1
9090
9091	sllx	%o4, %l4, %o4
9092	or	%g1, %o3, %g6
9093
9094	stx	%g6, [%l1+3*8]
9095	srlx	%o5, %l3, %g1
9096
9097	sllx	%o5, %l4, %o5
9098	or	%g1, %o4, %g6
9099
9100	stx	%g6, [%l1+4*8]
9101	inc	5*8, %l1
9102
9103	mov	%o5, %o0				! Save our unused data
9104	dec	5*8, %l2
91052:
9106	inccc	12*8, %l2
9107	bz,pn	%icc, Lbcopy_complete
9108
9109	!! Unrolled 8 times
9110Lbcopy_aligned8:
9111!	ldx	[%l0], %o0				! Already done
9112!	sllx	%o0, %l4, %o0				! Shift high word
9113
9114	 deccc	8, %l2					! Pre-decrement
9115	bl,pn	CCCR, Lbcopy_finish
91161:
9117	ldx	[%l0+8], %o1				! Load word 0
9118	inc	8, %l0
9119
9120	srlx	%o1, %l3, %g6
9121	or	%g6, %o0, %g6				! Combine
9122
9123	stx	%g6, [%l1]				! Store result
9124	 inc	8, %l1
9125
9126	deccc	8, %l2
9127	bge,pn	CCCR, 1b
9128	 sllx	%o1, %l4, %o0
9129
9130	btst	7, %l2					! Done?
9131	bz,pt	CCCR, Lbcopy_complete
9132
9133	!!
9134	!! Loadup the last dregs into %o0 and shift it into place
9135	!!
9136	 srlx	%l3, 3, %g6				! # bytes in %o0
9137	dec	8, %g6					!  - 8
9138	!! n-8 - (by - 8) -> n - by
9139	subcc	%l2, %g6, %g0				! # bytes we need
9140	ble,pt	%icc, Lbcopy_finish
9141	 nop
9142	ldx	[%l0+8], %o1				! Need another word
9143	srlx	%o1, %l3, %o1
9144	ba,pt	%icc, Lbcopy_finish
9145	 or	%o0, %o1, %o0				! All loaded up.
9146
9147Lbcopy_noshift8:
9148	deccc	6*8, %l2				! Have enough room?
9149	bl,pn	CCCR, 2f
9150	 nop
9151	ba,pt	%icc, 1f
9152	 nop
9153	.align	32
91541:
9155	ldx	[%l0+0*8], %o0
9156	ldx	[%l0+1*8], %o1
9157	ldx	[%l0+2*8], %o2
9158	stx	%o0, [%l1+0*8]
9159	stx	%o1, [%l1+1*8]
9160	stx	%o2, [%l1+2*8]
9161
9162
9163	ldx	[%l0+3*8], %o3
9164	ldx	[%l0+4*8], %o4
9165	ldx	[%l0+5*8], %o5
9166	inc	6*8, %l0
9167	stx	%o3, [%l1+3*8]
9168	deccc	6*8, %l2
9169	stx	%o4, [%l1+4*8]
9170	stx	%o5, [%l1+5*8]
9171	bge,pt	CCCR, 1b
9172	 inc	6*8, %l1
91732:
9174	inc	6*8, %l2
91751:
9176	deccc	8, %l2
9177	bl,pn	%icc, 1f				! < 0 --> sub word
9178	 nop
9179	ldx	[%l0], %g6
9180	inc	8, %l0
9181	stx	%g6, [%l1]
9182	bg,pt	%icc, 1b				! Exactly 0 --> done
9183	 inc	8, %l1
91841:
9185	btst	7, %l2					! Done?
9186	bz,pt	CCCR, Lbcopy_complete
9187	 clr	%l4
9188	ldx	[%l0], %o0
9189Lbcopy_finish:
9190
9191	brz,pn	%l2, 2f					! 100% complete?
9192	 cmp	%l2, 8					! Exactly 8 bytes?
9193	bz,a,pn	CCCR, 2f
9194	 stx	%o0, [%l1]
9195
9196	btst	4, %l2					! Word store?
9197	bz	CCCR, 1f
9198	 srlx	%o0, 32, %g6				! Shift high word down
9199	stw	%g6, [%l1]
9200	inc	4, %l1
9201	mov	%o0, %g6				! Operate on the low bits
92021:
9203	btst	2, %l2
9204	mov	%g6, %o0
9205	bz	1f
9206	 srlx	%o0, 16, %g6
9207
9208	sth	%g6, [%l1]				! Store short
9209	inc	2, %l1
9210	mov	%o0, %g6				! Operate on low bytes
92111:
9212	mov	%g6, %o0
9213	btst	1, %l2					! Byte aligned?
9214	bz	2f
9215	 srlx	%o0, 8, %g6
9216
9217	stb	%g6, [%l1]				! Store last byte
9218	inc	1, %l1					! Update address
92192:
9220Lbcopy_complete:
9221#if 0
9222	!!
9223	!! verify copy success.
9224	!!
9225
9226	mov	%i0, %o2
9227	mov	%i1, %o4
9228	mov	%i2, %l4
92290:
9230	ldub	[%o2], %o1
9231	inc	%o2
9232	ldub	[%o4], %o3
9233	inc	%o4
9234	cmp	%o3, %o1
9235	bnz	1f
9236	 dec	%l4
9237	brnz	%l4, 0b
9238	 nop
9239	ba	2f
9240	 nop
9241
92421:
9243	set	0f, %o0
9244	call	printf
9245	 sub	%i2, %l4, %o5
9246	set	1f, %o0
9247	mov	%i0, %o1
9248	mov	%i1, %o2
9249	call	printf
9250	 mov	%i2, %o3
9251	ta	1
9252	.data
92530:	.asciz	"bcopy failed: %x@%p != %x@%p byte %d\n"
92541:	.asciz	"bcopy(%p, %p, %lx)\n"
9255	.align 8
9256	.text
92572:
9258#endif
9259	ret
9260	 restore %i1, %g0, %o0
9261
9262#if 1
9263
9264/*
9265 * Block copy.  Useful for >256 byte copies.
9266 *
9267 * Benchmarking has shown this always seems to be slower than
9268 * the integer version, so this is disabled.  Maybe someone will
9269 * figure out why sometime.
9270 */
9271
9272Lbcopy_block:
9273	sethi	%hi(block_disable), %o3
9274	ldx	[ %o3 + %lo(block_disable) ], %o3
9275	brnz,pn	%o3, Lbcopy_fancy
9276	!! Make sure our trap table is installed
9277	set	_C_LABEL(trapbase), %o5
9278	rdpr	%tba, %o3
9279	sub	%o3, %o5, %o3
9280	brnz,pn	%o3, Lbcopy_fancy	! No, then don't use block load/store
9281	 nop
9282#ifdef _KERNEL
9283/*
9284 * Kernel:
9285 *
9286 * Here we use VIS instructions to do a block clear of a page.
9287 * But before we can do that we need to save and enable the FPU.
9288 * The last owner of the FPU registers is fpproc, and
9289 * fpproc->p_md.md_fpstate is the current fpstate.  If that's not
9290 * null, call savefpstate() with it to store our current fp state.
9291 *
9292 * Next, allocate an aligned fpstate on the stack.  We will properly
9293 * nest calls on a particular stack so this should not be a problem.
9294 *
9295 * Now we grab either curproc (or if we're on the interrupt stack
9296 * proc0).  We stash its existing fpstate in a local register and
9297 * put our new fpstate in curproc->p_md.md_fpstate.  We point
9298 * fpproc at curproc (or proc0) and enable the FPU.
9299 *
9300 * If we are ever preempted, our FPU state will be saved in our
9301 * fpstate.  Then, when we're resumed and we take an FPDISABLED
9302 * trap, the trap handler will be able to fish our FPU state out
9303 * of curproc (or proc0).
9304 *
9305 * On exiting this routine we undo the damage: restore the original
9306 * pointer to curproc->p_md.md_fpstate, clear our fpproc, and disable
9307 * the MMU.
9308 *
9309 *
9310 * Register usage, Kernel only (after save):
9311 *
9312 * %i0		src
9313 * %i1		dest
9314 * %i2		size
9315 *
9316 * %l0		XXXX DEBUG old fpstate
9317 * %l1		fpproc (hi bits only)
9318 * %l2		orig fpproc
9319 * %l3		orig fpstate
9320 * %l5		curproc
9321 * %l6		old fpstate
9322 *
9323 * Register ussage, Kernel and user:
9324 *
9325 * %g1		src (retval for memcpy)
9326 *
9327 * %o0		src
9328 * %o1		dest
9329 * %o2		end dest
9330 * %o5		last safe fetchable address
9331 */
9332
9333#if 1
9334	ENABLE_FPU(0)
9335#else
9336	save	%sp, -(CC64FSZ+FS_SIZE+BLOCK_SIZE), %sp	! Allocate an fpstate
9337	sethi	%hi(FPPROC), %l1
9338	LDPTR	[%l1 + %lo(FPPROC)], %l2		! Load fpproc
9339	add	%sp, (CC64FSZ+STKB+BLOCK_SIZE-1), %l0	! Calculate pointer to fpstate
9340	brz,pt	%l2, 1f					! fpproc == NULL?
9341	 andn	%l0, BLOCK_ALIGN, %l0			! And make it block aligned
9342	LDPTR	[%l2 + P_FPSTATE], %l3
9343	brz,pn	%l3, 1f					! Make sure we have an fpstate
9344	 mov	%l3, %o0
9345	call	_C_LABEL(savefpstate)			! Save the old fpstate
9346	 set	EINTSTACK-STKB, %l4			! Are we on intr stack?
9347	cmp	%sp, %l4
9348	bgu,pt	CCCR, 1f
9349	 set	INTSTACK-STKB, %l4
9350	cmp	%sp, %l4
9351	blu	CCCR, 1f
93520:
9353	 sethi	%hi(_C_LABEL(proc0)), %l4		! Yes, use proc0
9354	ba,pt	%xcc, 2f				! XXXX needs to change to CPUs idle proc
9355	 or	%l4, %lo(_C_LABEL(proc0)), %l5
93561:
9357	sethi	%hi(CURPROC), %l4			! Use curproc
9358	LDPTR	[%l4 + %lo(CURPROC)], %l5
9359	brz,pn	%l5, 0b					! If curproc is NULL need to use proc0
9360	 nop
93612:
9362	LDPTR	[%l5 + P_FPSTATE], %l6			! Save old fpstate
9363	STPTR	%l0, [%l5 + P_FPSTATE]			! Insert new fpstate
9364	STPTR	%l5, [%l1 + %lo(FPPROC)]		! Set new fpproc
9365	wr	%g0, FPRS_FEF, %fprs			! Enable FPU
9366#endif
9367	mov	%i0, %o0				! Src addr.
9368	mov	%i1, %o1				! Store our dest ptr here.
9369	mov	%i2, %o2				! Len counter
9370#endif
9371
9372	!!
9373	!! First align the output to a 64-bit entity
9374	!!
9375
9376	mov	%o1, %g1				! memcpy retval
9377	add	%o0, %o2, %o5				! End of source block
9378
9379	andn	%o0, 7, %o3				! Start of block
9380	dec	%o5
9381	fzero	%f0
9382
9383	andn	%o5, BLOCK_ALIGN, %o5			! Last safe addr.
9384	ldd	[%o3], %f2				! Load 1st word
9385
9386	dec	8, %o3					! Move %o3 1 word back
9387	btst	1, %o1
9388	bz	4f
9389
9390	 mov	-7, %o4					! Lowest src addr possible
9391	alignaddr %o0, %o4, %o4				! Base addr for load.
9392
9393	cmp	%o3, %o4
9394	be,pt	CCCR, 1f				! Already loaded?
9395	 mov	%o4, %o3
9396	fmovd	%f2, %f0				! No. Shift
9397	ldd	[%o3+8], %f2				! And load
93981:
9399
9400	faligndata	%f0, %f2, %f4			! Isolate 1st byte
9401
9402	stda	%f4, [%o1] ASI_FL8_P			! Store 1st byte
9403	inc	1, %o1					! Update address
9404	inc	1, %o0
9405	dec	1, %o2
94064:
9407	btst	2, %o1
9408	bz	4f
9409
9410	 mov	-6, %o4					! Calculate src - 6
9411	alignaddr %o0, %o4, %o4				! calculate shift mask and dest.
9412
9413	cmp	%o3, %o4				! Addresses same?
9414	be,pt	CCCR, 1f
9415	 mov	%o4, %o3
9416	fmovd	%f2, %f0				! Shuffle data
9417	ldd	[%o3+8], %f2				! Load word 0
94181:
9419	faligndata %f0, %f2, %f4			! Move 1st short low part of f8
9420
9421	stda	%f4, [%o1] ASI_FL16_P			! Store 1st short
9422	dec	2, %o2
9423	inc	2, %o1
9424	inc	2, %o0
94254:
9426	brz,pn	%o2, Lbcopy_blockfinish			! XXXX
9427
9428	 btst	4, %o1
9429	bz	4f
9430
9431	mov	-4, %o4
9432	alignaddr %o0, %o4, %o4				! calculate shift mask and dest.
9433
9434	cmp	%o3, %o4				! Addresses same?
9435	beq,pt	CCCR, 1f
9436	 mov	%o4, %o3
9437	fmovd	%f2, %f0				! Shuffle data
9438	ldd	[%o3+8], %f2				! Load word 0
94391:
9440	faligndata %f0, %f2, %f4			! Move 1st short low part of f8
9441
9442	st	%f5, [%o1]				! Store word
9443	dec	4, %o2
9444	inc	4, %o1
9445	inc	4, %o0
94464:
9447	brz,pn	%o2, Lbcopy_blockfinish			! XXXX
9448	!!
9449	!! We are now 32-bit aligned in the dest.
9450	!!
9451Lbcopy_block_common:
9452
9453	 mov	-0, %o4
9454	alignaddr %o0, %o4, %o4				! base - shift
9455
9456	cmp	%o3, %o4				! Addresses same?
9457	beq,pt	CCCR, 1f
9458	 mov	%o4, %o3
9459	fmovd	%f2, %f0				! Shuffle data
9460	ldd	[%o3+8], %f2				! Load word 0
94611:
9462	add	%o3, 8, %o0				! now use %o0 for src
9463
9464	!!
9465	!! Continue until our dest is block aligned
9466	!!
9467Lbcopy_block_aligned8:
94681:
9469	brz	%o2, Lbcopy_blockfinish
9470	 btst	BLOCK_ALIGN, %o1			! Block aligned?
9471	bz	1f
9472
9473	 faligndata %f0, %f2, %f4			! Generate result
9474	deccc	8, %o2
9475	ble,pn	%icc, Lbcopy_blockfinish		! Should never happen
9476	 fmovd	%f4, %f48
9477
9478	std	%f4, [%o1]				! Store result
9479	inc	8, %o1
9480
9481	fmovd	%f2, %f0
9482	inc	8, %o0
9483	ba,pt	%xcc, 1b				! Not yet.
9484	 ldd	[%o0], %f2				! Load next part
9485Lbcopy_block_aligned64:
94861:
9487
9488/*
9489 * 64-byte aligned -- ready for block operations.
9490 *
9491 * Here we have the destination block aligned, but the
9492 * source pointer may not be.  Sub-word alignment will
9493 * be handled by faligndata instructions.  But the source
9494 * can still be potentially aligned to 8 different words
9495 * in our 64-bit block, so we have 8 different copy routines.
9496 *
9497 * Once we figure out our source alignment, we branch
9498 * to the appropriate copy routine, which sets up the
9499 * alignment for faligndata and loads (sets) the values
9500 * into the source registers and does the copy loop.
9501 *
9502 * When were down to less than 1 block to store, we
9503 * exit the copy loop and execute cleanup code.
9504 *
9505 * Block loads and stores are not properly interlocked.
9506 * Stores save one reg/cycle, so you can start overwriting
9507 * registers the cycle after the store is issued.
9508 *
9509 * Block loads require a block load to a different register
9510 * block or a membar #Sync before accessing the loaded
9511 * data.
9512 *
9513 * Since the faligndata instructions may be offset as far
9514 * as 7 registers into a block (if you are shifting source
9515 * 7 -> dest 0), you need 3 source register blocks for full
9516 * performance: one you are copying, one you are loading,
9517 * and one for interlocking.  Otherwise, we would need to
9518 * sprinkle the code with membar #Sync and lose the advantage
9519 * of running faligndata in parallel with block stores.  This
9520 * means we are fetching a full 128 bytes ahead of the stores.
9521 * We need to make sure the prefetch does not inadvertently
9522 * cross a page boundary and fault on data that we will never
9523 * store.
9524 *
9525 */
9526#if 1
9527	and	%o0, BLOCK_ALIGN, %o3
9528	srax	%o3, 3, %o3				! Isolate the offset
9529
9530	brz	%o3, L100				! 0->0
9531	 btst	4, %o3
9532	bnz	%xcc, 4f
9533	 btst	2, %o3
9534	bnz	%xcc, 2f
9535	 btst	1, %o3
9536	ba,pt	%xcc, L101				! 0->1
9537	 nop	/* XXX spitfire bug */
95382:
9539	bz	%xcc, L102				! 0->2
9540	 nop
9541	ba,pt	%xcc, L103				! 0->3
9542	 nop	/* XXX spitfire bug */
95434:
9544	bnz	%xcc, 2f
9545	 btst	1, %o3
9546	bz	%xcc, L104				! 0->4
9547	 nop
9548	ba,pt	%xcc, L105				! 0->5
9549	 nop	/* XXX spitfire bug */
95502:
9551	bz	%xcc, L106				! 0->6
9552	 nop
9553	ba,pt	%xcc, L107				! 0->7
9554	 nop	/* XXX spitfire bug */
9555#else
9556
9557	!!
9558	!! Isolate the word offset, which just happens to be
9559	!! the slot in our jump table.
9560	!!
9561	!! This is 6 insns, most of which cannot be paired,
9562	!! which is about the same as the above version.
9563	!!
9564	rd	%pc, %o4
95651:
9566	and	%o0, 0x31, %o3
9567	add	%o3, (Lbcopy_block_jmp - 1b), %o3
9568	jmpl	%o4 + %o3, %g0
9569	 nop
9570
9571	!!
9572	!! Jump table
9573	!!
9574
9575Lbcopy_block_jmp:
9576	ba,a,pt	%xcc, L100
9577	 nop
9578	ba,a,pt	%xcc, L101
9579	 nop
9580	ba,a,pt	%xcc, L102
9581	 nop
9582	ba,a,pt	%xcc, L103
9583	 nop
9584	ba,a,pt	%xcc, L104
9585	 nop
9586	ba,a,pt	%xcc, L105
9587	 nop
9588	ba,a,pt	%xcc, L106
9589	 nop
9590	ba,a,pt	%xcc, L107
9591	 nop
9592#endif
9593
9594	!!
9595	!! Source is block aligned.
9596	!!
9597	!! Just load a block and go.
9598	!!
9599L100:
9600#ifdef RETURN_NAME
9601	sethi	%hi(1f), %g1
9602	ba,pt	%icc, 2f
9603	 or	%g1, %lo(1f), %g1
96041:
9605	.asciz	"L100"
9606	.align	8
96072:
9608#endif
9609	fmovd	%f0 , %f62
9610	ldda	[%o0] ASI_BLK_P, %f0
9611	inc	BLOCK_SIZE, %o0
9612	cmp	%o0, %o5
9613	bleu,a,pn	%icc, 3f
9614	 ldda	[%o0] ASI_BLK_P, %f16
9615	ba,pt	%icc, 3f
9616	 membar #Sync
9617
9618	.align	32					! ICache align.
96193:
9620	faligndata	%f62, %f0, %f32
9621	inc	BLOCK_SIZE, %o0
9622	faligndata	%f0, %f2, %f34
9623	dec	BLOCK_SIZE, %o2
9624	faligndata	%f2, %f4, %f36
9625	cmp	%o0, %o5
9626	faligndata	%f4, %f6, %f38
9627	faligndata	%f6, %f8, %f40
9628	faligndata	%f8, %f10, %f42
9629	faligndata	%f10, %f12, %f44
9630	brlez,pn	%o2, Lbcopy_blockdone
9631	 faligndata	%f12, %f14, %f46
9632
9633	bleu,a,pn	%icc, 2f
9634	 ldda	[%o0] ASI_BLK_P, %f48
9635	membar	#Sync
96362:
9637	stda	%f32, [%o1] ASI_STORE
9638	faligndata	%f14, %f16, %f32
9639	inc	BLOCK_SIZE, %o0
9640	faligndata	%f16, %f18, %f34
9641	inc	BLOCK_SIZE, %o1
9642	faligndata	%f18, %f20, %f36
9643	dec	BLOCK_SIZE, %o2
9644	faligndata	%f20, %f22, %f38
9645	cmp	%o0, %o5
9646	faligndata	%f22, %f24, %f40
9647	faligndata	%f24, %f26, %f42
9648	faligndata	%f26, %f28, %f44
9649	brlez,pn	%o2, Lbcopy_blockdone
9650	 faligndata	%f28, %f30, %f46
9651
9652	bleu,a,pn	%icc, 2f
9653	 ldda	[%o0] ASI_BLK_P, %f0
9654	membar	#Sync
96552:
9656	stda	%f32, [%o1] ASI_STORE
9657	faligndata	%f30, %f48, %f32
9658	inc	BLOCK_SIZE, %o0
9659	faligndata	%f48, %f50, %f34
9660	inc	BLOCK_SIZE, %o1
9661	faligndata	%f50, %f52, %f36
9662	dec	BLOCK_SIZE, %o2
9663	faligndata	%f52, %f54, %f38
9664	cmp	%o0, %o5
9665	faligndata	%f54, %f56, %f40
9666	faligndata	%f56, %f58, %f42
9667	faligndata	%f58, %f60, %f44
9668	brlez,pn	%o2, Lbcopy_blockdone
9669	 faligndata	%f60, %f62, %f46
9670	bleu,a,pn	%icc, 2f
9671	 ldda	[%o0] ASI_BLK_P, %f16			! Increment is at top
9672	membar	#Sync
96732:
9674	stda	%f32, [%o1] ASI_STORE
9675	ba	3b
9676	 inc	BLOCK_SIZE, %o1
9677
9678	!!
9679	!! Source at BLOCK_ALIGN+8
9680	!!
9681	!! We need to load almost 1 complete block by hand.
9682	!!
9683L101:
9684#ifdef RETURN_NAME
9685	sethi	%hi(1f), %g1
9686	ba,pt	%icc, 2f
9687	 or	%g1, %lo(1f), %g1
96881:
9689	.asciz	"L101"
9690	.align	8
96912:
9692#endif
9693!	fmovd	%f0, %f0				! Hoist fmovd
9694	ldd	[%o0], %f2
9695	inc	8, %o0
9696	ldd	[%o0], %f4
9697	inc	8, %o0
9698	ldd	[%o0], %f6
9699	inc	8, %o0
9700	ldd	[%o0], %f8
9701	inc	8, %o0
9702	ldd	[%o0], %f10
9703	inc	8, %o0
9704	ldd	[%o0], %f12
9705	inc	8, %o0
9706	ldd	[%o0], %f14
9707	inc	8, %o0
9708
9709	cmp	%o0, %o5
9710	bleu,a,pn	%icc, 3f
9711	 ldda	[%o0] ASI_BLK_P, %f16
9712	membar #Sync
97133:
9714	faligndata	%f0, %f2, %f32
9715	inc	BLOCK_SIZE, %o0
9716	faligndata	%f2, %f4, %f34
9717	cmp	%o0, %o5
9718	faligndata	%f4, %f6, %f36
9719	dec	BLOCK_SIZE, %o2
9720	faligndata	%f6, %f8, %f38
9721	faligndata	%f8, %f10, %f40
9722	faligndata	%f10, %f12, %f42
9723	faligndata	%f12, %f14, %f44
9724	bleu,a,pn	%icc, 2f
9725	 ldda	[%o0] ASI_BLK_P, %f48
9726	membar	#Sync
97272:
9728	brlez,pn	%o2, Lbcopy_blockdone
9729	 faligndata	%f14, %f16, %f46
9730
9731	stda	%f32, [%o1] ASI_STORE
9732
9733	faligndata	%f16, %f18, %f32
9734	inc	BLOCK_SIZE, %o0
9735	faligndata	%f18, %f20, %f34
9736	inc	BLOCK_SIZE, %o1
9737	faligndata	%f20, %f22, %f36
9738	cmp	%o0, %o5
9739	faligndata	%f22, %f24, %f38
9740	dec	BLOCK_SIZE, %o2
9741	faligndata	%f24, %f26, %f40
9742	faligndata	%f26, %f28, %f42
9743	faligndata	%f28, %f30, %f44
9744	bleu,a,pn	%icc, 2f
9745	 ldda	[%o0] ASI_BLK_P, %f0
9746	membar	#Sync
97472:
9748	brlez,pn	%o2, Lbcopy_blockdone
9749	 faligndata	%f30, %f48, %f46
9750
9751	stda	%f32, [%o1] ASI_STORE
9752
9753	faligndata	%f48, %f50, %f32
9754	inc	BLOCK_SIZE, %o0
9755	faligndata	%f50, %f52, %f34
9756	inc	BLOCK_SIZE, %o1
9757	faligndata	%f52, %f54, %f36
9758	cmp	%o0, %o5
9759	faligndata	%f54, %f56, %f38
9760	dec	BLOCK_SIZE, %o2
9761	faligndata	%f56, %f58, %f40
9762	faligndata	%f58, %f60, %f42
9763	faligndata	%f60, %f62, %f44
9764	bleu,a,pn	%icc, 2f
9765	 ldda	[%o0] ASI_BLK_P, %f16
9766	membar	#Sync
97672:
9768	brlez,pn	%o2, Lbcopy_blockdone
9769	 faligndata	%f62, %f0, %f46
9770
9771	stda	%f32, [%o1] ASI_STORE
9772	ba	3b
9773	 inc	BLOCK_SIZE, %o1
9774
9775	!!
9776	!! Source at BLOCK_ALIGN+16
9777	!!
9778	!! We need to load 6 doubles by hand.
9779	!!
9780L102:
9781#ifdef RETURN_NAME
9782	sethi	%hi(1f), %g1
9783	ba,pt	%icc, 2f
9784	 or	%g1, %lo(1f), %g1
97851:
9786	.asciz	"L102"
9787	.align	8
97882:
9789#endif
9790	ldd	[%o0], %f4
9791	inc	8, %o0
9792	fmovd	%f0, %f2				! Hoist fmovd
9793	ldd	[%o0], %f6
9794	inc	8, %o0
9795
9796	ldd	[%o0], %f8
9797	inc	8, %o0
9798	ldd	[%o0], %f10
9799	inc	8, %o0
9800	ldd	[%o0], %f12
9801	inc	8, %o0
9802	ldd	[%o0], %f14
9803	inc	8, %o0
9804
9805	cmp	%o0, %o5
9806	bleu,a,pn	%icc, 3f
9807	 ldda	[%o0] ASI_BLK_P, %f16
9808	membar #Sync
98093:
9810	faligndata	%f2, %f4, %f32
9811	inc	BLOCK_SIZE, %o0
9812	faligndata	%f4, %f6, %f34
9813	cmp	%o0, %o5
9814	faligndata	%f6, %f8, %f36
9815	dec	BLOCK_SIZE, %o2
9816	faligndata	%f8, %f10, %f38
9817	faligndata	%f10, %f12, %f40
9818	faligndata	%f12, %f14, %f42
9819	bleu,a,pn	%icc, 2f
9820	 ldda	[%o0] ASI_BLK_P, %f48
9821	membar	#Sync
98222:
9823	faligndata	%f14, %f16, %f44
9824
9825	brlez,pn	%o2, Lbcopy_blockdone
9826	 faligndata	%f16, %f18, %f46
9827
9828	stda	%f32, [%o1] ASI_STORE
9829
9830	faligndata	%f18, %f20, %f32
9831	inc	BLOCK_SIZE, %o0
9832	faligndata	%f20, %f22, %f34
9833	inc	BLOCK_SIZE, %o1
9834	faligndata	%f22, %f24, %f36
9835	cmp	%o0, %o5
9836	faligndata	%f24, %f26, %f38
9837	dec	BLOCK_SIZE, %o2
9838	faligndata	%f26, %f28, %f40
9839	faligndata	%f28, %f30, %f42
9840	bleu,a,pn	%icc, 2f
9841	 ldda	[%o0] ASI_BLK_P, %f0
9842	membar	#Sync
98432:
9844	faligndata	%f30, %f48, %f44
9845	brlez,pn	%o2, Lbcopy_blockdone
9846	 faligndata	%f48, %f50, %f46
9847
9848	stda	%f32, [%o1] ASI_STORE
9849
9850	faligndata	%f50, %f52, %f32
9851	inc	BLOCK_SIZE, %o0
9852	faligndata	%f52, %f54, %f34
9853	inc	BLOCK_SIZE, %o1
9854	faligndata	%f54, %f56, %f36
9855	cmp	%o0, %o5
9856	faligndata	%f56, %f58, %f38
9857	dec	BLOCK_SIZE, %o2
9858	faligndata	%f58, %f60, %f40
9859	faligndata	%f60, %f62, %f42
9860	bleu,a,pn	%icc, 2f
9861	 ldda	[%o0] ASI_BLK_P, %f16
9862	membar	#Sync
98632:
9864	faligndata	%f62, %f0, %f44
9865	brlez,pn	%o2, Lbcopy_blockdone
9866	 faligndata	%f0, %f2, %f46
9867
9868	stda	%f32, [%o1] ASI_STORE
9869	ba	3b
9870	 inc	BLOCK_SIZE, %o1
9871
9872	!!
9873	!! Source at BLOCK_ALIGN+24
9874	!!
9875	!! We need to load 5 doubles by hand.
9876	!!
9877L103:
9878#ifdef RETURN_NAME
9879	sethi	%hi(1f), %g1
9880	ba,pt	%icc, 2f
9881	 or	%g1, %lo(1f), %g1
98821:
9883	.asciz	"L103"
9884	.align	8
98852:
9886#endif
9887	fmovd	%f0, %f4
9888	ldd	[%o0], %f6
9889	inc	8, %o0
9890	ldd	[%o0], %f8
9891	inc	8, %o0
9892	ldd	[%o0], %f10
9893	inc	8, %o0
9894	ldd	[%o0], %f12
9895	inc	8, %o0
9896	ldd	[%o0], %f14
9897	inc	8, %o0
9898
9899	cmp	%o0, %o5
9900	bleu,a,pn	%icc, 2f
9901	 ldda	[%o0] ASI_BLK_P, %f16
9902	membar #Sync
99032:
9904	inc	BLOCK_SIZE, %o0
99053:
9906	faligndata	%f4, %f6, %f32
9907	cmp	%o0, %o5
9908	faligndata	%f6, %f8, %f34
9909	dec	BLOCK_SIZE, %o2
9910	faligndata	%f8, %f10, %f36
9911	faligndata	%f10, %f12, %f38
9912	faligndata	%f12, %f14, %f40
9913	bleu,a,pn	%icc, 2f
9914	 ldda	[%o0] ASI_BLK_P, %f48
9915	membar	#Sync
99162:
9917	faligndata	%f14, %f16, %f42
9918	inc	BLOCK_SIZE, %o0
9919	faligndata	%f16, %f18, %f44
9920	brlez,pn	%o2, Lbcopy_blockdone
9921	 faligndata	%f18, %f20, %f46
9922
9923	stda	%f32, [%o1] ASI_STORE
9924
9925	faligndata	%f20, %f22, %f32
9926	cmp	%o0, %o5
9927	faligndata	%f22, %f24, %f34
9928	dec	BLOCK_SIZE, %o2
9929	faligndata	%f24, %f26, %f36
9930	inc	BLOCK_SIZE, %o1
9931	faligndata	%f26, %f28, %f38
9932	faligndata	%f28, %f30, %f40
9933	ble,a,pn	%icc, 2f
9934	 ldda	[%o0] ASI_BLK_P, %f0
9935	membar	#Sync
99362:
9937	faligndata	%f30, %f48, %f42
9938	inc	BLOCK_SIZE, %o0
9939	faligndata	%f48, %f50, %f44
9940	brlez,pn	%o2, Lbcopy_blockdone
9941	 faligndata	%f50, %f52, %f46
9942
9943	stda	%f32, [%o1] ASI_STORE
9944
9945	faligndata	%f52, %f54, %f32
9946	cmp	%o0, %o5
9947	faligndata	%f54, %f56, %f34
9948	dec	BLOCK_SIZE, %o2
9949	faligndata	%f56, %f58, %f36
9950	faligndata	%f58, %f60, %f38
9951	inc	BLOCK_SIZE, %o1
9952	faligndata	%f60, %f62, %f40
9953	bleu,a,pn	%icc, 2f
9954	 ldda	[%o0] ASI_BLK_P, %f16
9955	membar	#Sync
99562:
9957	faligndata	%f62, %f0, %f42
9958	inc	BLOCK_SIZE, %o0
9959	faligndata	%f0, %f2, %f44
9960	brlez,pn	%o2, Lbcopy_blockdone
9961	 faligndata	%f2, %f4, %f46
9962
9963	stda	%f32, [%o1] ASI_STORE
9964	ba	3b
9965	 inc	BLOCK_SIZE, %o1
9966
9967	!!
9968	!! Source at BLOCK_ALIGN+32
9969	!!
9970	!! We need to load 4 doubles by hand.
9971	!!
9972L104:
9973#ifdef RETURN_NAME
9974	sethi	%hi(1f), %g1
9975	ba,pt	%icc, 2f
9976	 or	%g1, %lo(1f), %g1
99771:
9978	.asciz	"L104"
9979	.align	8
99802:
9981#endif
9982	fmovd	%f0, %f6
9983	ldd	[%o0], %f8
9984	inc	8, %o0
9985	ldd	[%o0], %f10
9986	inc	8, %o0
9987	ldd	[%o0], %f12
9988	inc	8, %o0
9989	ldd	[%o0], %f14
9990	inc	8, %o0
9991
9992	cmp	%o0, %o5
9993	bleu,a,pn	%icc, 2f
9994	 ldda	[%o0] ASI_BLK_P, %f16
9995	membar #Sync
99962:
9997	inc	BLOCK_SIZE, %o0
99983:
9999	faligndata	%f6, %f8, %f32
10000	cmp	%o0, %o5
10001	faligndata	%f8, %f10, %f34
10002	dec	BLOCK_SIZE, %o2
10003	faligndata	%f10, %f12, %f36
10004	faligndata	%f12, %f14, %f38
10005	bleu,a,pn	%icc, 2f
10006	 ldda	[%o0] ASI_BLK_P, %f48
10007	membar	#Sync
100082:
10009	faligndata	%f14, %f16, %f40
10010	faligndata	%f16, %f18, %f42
10011	inc	BLOCK_SIZE, %o0
10012	faligndata	%f18, %f20, %f44
10013	brlez,pn	%o2, Lbcopy_blockdone
10014	 faligndata	%f20, %f22, %f46
10015
10016	stda	%f32, [%o1] ASI_STORE
10017
10018	faligndata	%f22, %f24, %f32
10019	cmp	%o0, %o5
10020	faligndata	%f24, %f26, %f34
10021	faligndata	%f26, %f28, %f36
10022	inc	BLOCK_SIZE, %o1
10023	faligndata	%f28, %f30, %f38
10024	bleu,a,pn	%icc, 2f
10025	 ldda	[%o0] ASI_BLK_P, %f0
10026	membar	#Sync
100272:
10028	faligndata	%f30, %f48, %f40
10029	dec	BLOCK_SIZE, %o2
10030	faligndata	%f48, %f50, %f42
10031	inc	BLOCK_SIZE, %o0
10032	faligndata	%f50, %f52, %f44
10033	brlez,pn	%o2, Lbcopy_blockdone
10034	 faligndata	%f52, %f54, %f46
10035
10036	stda	%f32, [%o1] ASI_STORE
10037
10038	faligndata	%f54, %f56, %f32
10039	cmp	%o0, %o5
10040	faligndata	%f56, %f58, %f34
10041	faligndata	%f58, %f60, %f36
10042	inc	BLOCK_SIZE, %o1
10043	faligndata	%f60, %f62, %f38
10044	bleu,a,pn	%icc, 2f
10045	 ldda	[%o0] ASI_BLK_P, %f16
10046	membar	#Sync
100472:
10048	faligndata	%f62, %f0, %f40
10049	dec	BLOCK_SIZE, %o2
10050	faligndata	%f0, %f2, %f42
10051	inc	BLOCK_SIZE, %o0
10052	faligndata	%f2, %f4, %f44
10053	brlez,pn	%o2, Lbcopy_blockdone
10054	 faligndata	%f4, %f6, %f46
10055
10056	stda	%f32, [%o1] ASI_STORE
10057	ba	3b
10058	 inc	BLOCK_SIZE, %o1
10059
10060	!!
10061	!! Source at BLOCK_ALIGN+40
10062	!!
10063	!! We need to load 3 doubles by hand.
10064	!!
10065L105:
10066#ifdef RETURN_NAME
10067	sethi	%hi(1f), %g1
10068	ba,pt	%icc, 2f
10069	 or	%g1, %lo(1f), %g1
100701:
10071	.asciz	"L105"
10072	.align	8
100732:
10074#endif
10075	fmovd	%f0, %f8
10076	ldd	[%o0], %f10
10077	inc	8, %o0
10078	ldd	[%o0], %f12
10079	inc	8, %o0
10080	ldd	[%o0], %f14
10081	inc	8, %o0
10082
10083	cmp	%o0, %o5
10084	bleu,a,pn	%icc, 2f
10085	 ldda	[%o0] ASI_BLK_P, %f16
10086	membar #Sync
100872:
10088	inc	BLOCK_SIZE, %o0
100893:
10090	faligndata	%f8, %f10, %f32
10091	cmp	%o0, %o5
10092	faligndata	%f10, %f12, %f34
10093	faligndata	%f12, %f14, %f36
10094	bleu,a,pn	%icc, 2f
10095	 ldda	[%o0] ASI_BLK_P, %f48
10096	membar	#Sync
100972:
10098	faligndata	%f14, %f16, %f38
10099	dec	BLOCK_SIZE, %o2
10100	faligndata	%f16, %f18, %f40
10101	inc	BLOCK_SIZE, %o0
10102	faligndata	%f18, %f20, %f42
10103	faligndata	%f20, %f22, %f44
10104	brlez,pn	%o2, Lbcopy_blockdone
10105	 faligndata	%f22, %f24, %f46
10106
10107	stda	%f32, [%o1] ASI_STORE
10108
10109	faligndata	%f24, %f26, %f32
10110	cmp	%o0, %o5
10111	faligndata	%f26, %f28, %f34
10112	dec	BLOCK_SIZE, %o2
10113	faligndata	%f28, %f30, %f36
10114	bleu,a,pn	%icc, 2f
10115	 ldda	[%o0] ASI_BLK_P, %f0
10116	membar	#Sync
101172:
10118	faligndata	%f30, %f48, %f38
10119	inc	BLOCK_SIZE, %o1
10120	faligndata	%f48, %f50, %f40
10121	inc	BLOCK_SIZE, %o0
10122	faligndata	%f50, %f52, %f42
10123	faligndata	%f52, %f54, %f44
10124	brlez,pn	%o2, Lbcopy_blockdone
10125	 faligndata	%f54, %f56, %f46
10126
10127	stda	%f32, [%o1] ASI_STORE
10128
10129	faligndata	%f56, %f58, %f32
10130	cmp	%o0, %o5
10131	faligndata	%f58, %f60, %f34
10132	dec	BLOCK_SIZE, %o2
10133	faligndata	%f60, %f62, %f36
10134	bleu,a,pn	%icc, 2f
10135	 ldda	[%o0] ASI_BLK_P, %f16
10136	membar	#Sync
101372:
10138	faligndata	%f62, %f0, %f38
10139	inc	BLOCK_SIZE, %o1
10140	faligndata	%f0, %f2, %f40
10141	inc	BLOCK_SIZE, %o0
10142	faligndata	%f2, %f4, %f42
10143	faligndata	%f4, %f6, %f44
10144	brlez,pn	%o2, Lbcopy_blockdone
10145	 faligndata	%f6, %f8, %f46
10146
10147	stda	%f32, [%o1] ASI_STORE
10148	ba	3b
10149	 inc	BLOCK_SIZE, %o1
10150
10151
10152	!!
10153	!! Source at BLOCK_ALIGN+48
10154	!!
10155	!! We need to load 2 doubles by hand.
10156	!!
10157L106:
10158#ifdef RETURN_NAME
10159	sethi	%hi(1f), %g1
10160	ba,pt	%icc, 2f
10161	 or	%g1, %lo(1f), %g1
101621:
10163	.asciz	"L106"
10164	.align	8
101652:
10166#endif
10167	fmovd	%f0, %f10
10168	ldd	[%o0], %f12
10169	inc	8, %o0
10170	ldd	[%o0], %f14
10171	inc	8, %o0
10172
10173	cmp	%o0, %o5
10174	bleu,a,pn	%icc, 2f
10175	 ldda	[%o0] ASI_BLK_P, %f16
10176	membar #Sync
101772:
10178	inc	BLOCK_SIZE, %o0
101793:
10180	faligndata	%f10, %f12, %f32
10181	cmp	%o0, %o5
10182	faligndata	%f12, %f14, %f34
10183	bleu,a,pn	%icc, 2f
10184	 ldda	[%o0] ASI_BLK_P, %f48
10185	membar	#Sync
101862:
10187	faligndata	%f14, %f16, %f36
10188	dec	BLOCK_SIZE, %o2
10189	faligndata	%f16, %f18, %f38
10190	inc	BLOCK_SIZE, %o0
10191	faligndata	%f18, %f20, %f40
10192	faligndata	%f20, %f22, %f42
10193	faligndata	%f22, %f24, %f44
10194	brlez,pn	%o2, Lbcopy_blockdone
10195	 faligndata	%f24, %f26, %f46
10196
10197	stda	%f32, [%o1] ASI_STORE
10198
10199	faligndata	%f26, %f28, %f32
10200	cmp	%o0, %o5
10201	faligndata	%f28, %f30, %f34
10202	bleu,a,pn	%icc, 2f
10203	 ldda	[%o0] ASI_BLK_P, %f0
10204	membar	#Sync
102052:
10206	faligndata	%f30, %f48, %f36
10207	dec	BLOCK_SIZE, %o2
10208	faligndata	%f48, %f50, %f38
10209	inc	BLOCK_SIZE, %o1
10210	faligndata	%f50, %f52, %f40
10211	faligndata	%f52, %f54, %f42
10212	inc	BLOCK_SIZE, %o0
10213	faligndata	%f54, %f56, %f44
10214	brlez,pn	%o2, Lbcopy_blockdone
10215	 faligndata	%f56, %f58, %f46
10216
10217	stda	%f32, [%o1] ASI_STORE
10218
10219	faligndata	%f58, %f60, %f32
10220	cmp	%o0, %o5
10221	faligndata	%f60, %f62, %f34
10222	bleu,a,pn	%icc, 2f
10223	 ldda	[%o0] ASI_BLK_P, %f16
10224	membar	#Sync
102252:
10226	faligndata	%f62, %f0, %f36
10227	dec	BLOCK_SIZE, %o2
10228	faligndata	%f0, %f2, %f38
10229	inc	BLOCK_SIZE, %o1
10230	faligndata	%f2, %f4, %f40
10231	faligndata	%f4, %f6, %f42
10232	inc	BLOCK_SIZE, %o0
10233	faligndata	%f6, %f8, %f44
10234	brlez,pn	%o2, Lbcopy_blockdone
10235	 faligndata	%f8, %f10, %f46
10236
10237	stda	%f32, [%o1] ASI_STORE
10238	ba	3b
10239	 inc	BLOCK_SIZE, %o1
10240
10241
10242	!!
10243	!! Source at BLOCK_ALIGN+56
10244	!!
10245	!! We need to load 1 double by hand.
10246	!!
10247L107:
10248#ifdef RETURN_NAME
10249	sethi	%hi(1f), %g1
10250	ba,pt	%icc, 2f
10251	 or	%g1, %lo(1f), %g1
102521:
10253	.asciz	"L107"
10254	.align	8
102552:
10256#endif
10257	fmovd	%f0, %f12
10258	ldd	[%o0], %f14
10259	inc	8, %o0
10260
10261	cmp	%o0, %o5
10262	bleu,a,pn	%icc, 2f
10263	 ldda	[%o0] ASI_BLK_P, %f16
10264	membar #Sync
102652:
10266	inc	BLOCK_SIZE, %o0
102673:
10268	faligndata	%f12, %f14, %f32
10269	cmp	%o0, %o5
10270	bleu,a,pn	%icc, 2f
10271	 ldda	[%o0] ASI_BLK_P, %f48
10272	membar	#Sync
102732:
10274	faligndata	%f14, %f16, %f34
10275	dec	BLOCK_SIZE, %o2
10276	faligndata	%f16, %f18, %f36
10277	inc	BLOCK_SIZE, %o0
10278	faligndata	%f18, %f20, %f38
10279	faligndata	%f20, %f22, %f40
10280	faligndata	%f22, %f24, %f42
10281	faligndata	%f24, %f26, %f44
10282	brlez,pn	%o2, Lbcopy_blockdone
10283	 faligndata	%f26, %f28, %f46
10284
10285	stda	%f32, [%o1] ASI_STORE
10286
10287	faligndata	%f28, %f30, %f32
10288	cmp	%o0, %o5
10289	bleu,a,pn	%icc, 2f
10290	 ldda	[%o0] ASI_BLK_P, %f0
10291	membar	#Sync
102922:
10293	faligndata	%f30, %f48, %f34
10294	dec	BLOCK_SIZE, %o2
10295	faligndata	%f48, %f50, %f36
10296	inc	BLOCK_SIZE, %o1
10297	faligndata	%f50, %f52, %f38
10298	faligndata	%f52, %f54, %f40
10299	inc	BLOCK_SIZE, %o0
10300	faligndata	%f54, %f56, %f42
10301	faligndata	%f56, %f58, %f44
10302	brlez,pn	%o2, Lbcopy_blockdone
10303	 faligndata	%f58, %f60, %f46
10304
10305	stda	%f32, [%o1] ASI_STORE
10306
10307	faligndata	%f60, %f62, %f32
10308	cmp	%o0, %o5
10309	bleu,a,pn	%icc, 2f
10310	 ldda	[%o0] ASI_BLK_P, %f16
10311	membar	#Sync
103122:
10313	faligndata	%f62, %f0, %f34
10314	dec	BLOCK_SIZE, %o2
10315	faligndata	%f0, %f2, %f36
10316	inc	BLOCK_SIZE, %o1
10317	faligndata	%f2, %f4, %f38
10318	faligndata	%f4, %f6, %f40
10319	inc	BLOCK_SIZE, %o0
10320	faligndata	%f6, %f8, %f42
10321	faligndata	%f8, %f10, %f44
10322
10323	brlez,pn	%o2, Lbcopy_blockdone
10324	 faligndata	%f10, %f12, %f46
10325
10326	stda	%f32, [%o1] ASI_STORE
10327	ba	3b
10328	 inc	BLOCK_SIZE, %o1
10329
10330Lbcopy_blockdone:
10331	inc	BLOCK_SIZE, %o2				! Fixup our overcommit
10332	membar	#Sync					! Finish any pending loads
10333#define	FINISH_REG(f)				\
10334	deccc	8, %o2;				\
10335	bl,a	Lbcopy_blockfinish;		\
10336	 fmovd	f, %f48;			\
10337	std	f, [%o1];			\
10338	inc	8, %o1
10339
10340	FINISH_REG(%f32)
10341	FINISH_REG(%f34)
10342	FINISH_REG(%f36)
10343	FINISH_REG(%f38)
10344	FINISH_REG(%f40)
10345	FINISH_REG(%f42)
10346	FINISH_REG(%f44)
10347	FINISH_REG(%f46)
10348	FINISH_REG(%f48)
10349#undef FINISH_REG
10350	!!
10351	!! The low 3 bits have the sub-word bits needed to be
10352	!! stored [because (x-8)&0x7 == x].
10353	!!
10354Lbcopy_blockfinish:
10355	brz,pn	%o2, 2f					! 100% complete?
10356	 fmovd	%f48, %f4
10357	cmp	%o2, 8					! Exactly 8 bytes?
10358	bz,a,pn	CCCR, 2f
10359	 std	%f4, [%o1]
10360
10361	btst	4, %o2					! Word store?
10362	bz	CCCR, 1f
10363	 nop
10364	st	%f4, [%o1]
10365	inc	4, %o1
103661:
10367	btst	2, %o2
10368	fzero	%f0
10369	bz	1f
10370
10371	 mov	-6, %o4
10372	alignaddr %o1, %o4, %g0
10373
10374	faligndata %f0, %f4, %f8
10375
10376	stda	%f8, [%o1] ASI_FL16_P			! Store short
10377	inc	2, %o1
103781:
10379	btst	1, %o2					! Byte aligned?
10380	bz	2f
10381
10382	 mov	-7, %o0					! Calculate dest - 7
10383	alignaddr %o1, %o0, %g0				! Calculate shift mask and dest.
10384
10385	faligndata %f0, %f4, %f8			! Move 1st byte to low part of f8
10386
10387	stda	%f8, [%o1] ASI_FL8_P			! Store 1st byte
10388	inc	1, %o1					! Update address
103892:
10390	membar	#Sync
10391#if 0
10392	!!
10393	!! verify copy success.
10394	!!
10395
10396	mov	%i0, %o2
10397	mov	%i1, %o4
10398	mov	%i2, %l4
103990:
10400	ldub	[%o2], %o1
10401	inc	%o2
10402	ldub	[%o4], %o3
10403	inc	%o4
10404	cmp	%o3, %o1
10405	bnz	1f
10406	 dec	%l4
10407	brnz	%l4, 0b
10408	 nop
10409	ba	2f
10410	 nop
10411
104121:
10413	set	block_disable, %o0
10414	stx	%o0, [%o0]
10415
10416	set	0f, %o0
10417	call	prom_printf
10418	 sub	%i2, %l4, %o5
10419	set	1f, %o0
10420	mov	%i0, %o1
10421	mov	%i1, %o2
10422	call	prom_printf
10423	 mov	%i2, %o3
10424	ta	1
10425	.data
10426	_ALIGN
104270:	.asciz	"block bcopy failed: %x@%p != %x@%p byte %d\r\n"
104281:	.asciz	"bcopy(%p, %p, %lx)\r\n"
10429	_ALIGN
10430	.text
104312:
10432#endif
10433#ifdef _KERNEL
10434
10435/*
10436 * Weve saved our possible fpstate, now disable the fpu
10437 * and continue with life.
10438 */
10439#if 1
10440	RESTORE_FPU
10441#else
10442#ifdef DEBUG
10443	LDPTR	[%l1 + %lo(FPPROC)], %l7
10444	cmp	%l7, %l5
10445!	tnz	1		! fpproc has changed!
10446	LDPTR	[%l5 + P_FPSTATE], %l7
10447	cmp	%l7, %l0
10448	tnz	1		! fpstate has changed!
10449#endif
10450	andcc	%l2, %l3, %g0				! If (fpproc && fpstate)
10451	STPTR	%l2, [%l1 + %lo(FPPROC)]		! Restore old fproc
10452	bz,pt	CCCR, 1f				! Skip if no fpstate
10453	 STPTR	%l6, [%l5 + P_FPSTATE]			! Restore old fpstate
10454
10455	call	_C_LABEL(loadfpstate)			! Re-load orig fpstate
10456	 mov	%l3, %o0
104571:
10458#endif
10459	ret
10460	 restore	%g1, 0, %o0			! Return DEST for memcpy
10461#endif
10462 	retl
10463	 mov	%g1, %o0
10464#endif
10465
10466
10467#if 1
10468/*
10469 * XXXXXXXXXXXXXXXXXXXX
10470 * We need to make sure that this doesn't use floating point
10471 * before our trap handlers are installed or we could panic
10472 * XXXXXXXXXXXXXXXXXXXX
10473 */
10474/*
10475 * bzero(addr, len)
10476 *
10477 * We want to use VIS instructions if we're clearing out more than
10478 * 256 bytes, but to do that we need to properly save and restore the
10479 * FP registers.  Unfortunately the code to do that in the kernel needs
10480 * to keep track of the current owner of the FPU, hence the different
10481 * code.
10482 *
10483 * XXXXX To produce more efficient code, we do not allow lengths
10484 * greater than 0x80000000000000000, which are negative numbers.
10485 * This should not really be an issue since the VA hole should
10486 * cause any such ranges to fail anyway.
10487 */
10488ENTRY(bzero)
10489	! %o0 = addr, %o1 = len
10490	mov	%o1, %o2
10491	clr	%o1			! Initialize our pattern
10492/*
10493 * memset(addr, c, len)
10494 *
10495 */
10496ENTRY(memset)
10497	! %o0 = addr, %o1 = pattern, %o2 = len
10498	mov	%o0, %o4		! Save original pointer
10499
10500Lbzero_internal:
10501	btst	7, %o0			! Word aligned?
10502	bz,pn	%xcc, 0f
10503	 nop
10504	inc	%o0
10505	deccc	%o2			! Store up to 7 bytes
10506	bge,a,pt	CCCR, Lbzero_internal
10507	 stb	%o1, [%o0 - 1]
10508
10509	retl				! Duplicate Lbzero_done
10510	 mov	%o4, %o0
105110:
10512	/*
10513	 * Duplicate the pattern so it fills 64-bits.
10514	 */
10515	andcc	%o1, 0x0ff, %o1		! No need to extend zero
10516	bz,pt	%icc, 1f
10517	 sllx	%o1, 8, %o3		! sigh.  all dependent insns.
10518	or	%o1, %o3, %o1
10519	sllx	%o1, 16, %o3
10520	or	%o1, %o3, %o1
10521	sllx	%o1, 32, %o3
10522	 or	%o1, %o3, %o1
105231:
10524#if 1
10525	!! Now we are 64-bit aligned
10526	cmp	%o2, 256		! Use block clear if len > 256
10527	bge,pt	CCCR, Lbzero_block	! use block store insns
10528#endif
10529	 deccc	8, %o2
10530Lbzero_longs:
10531	bl,pn	CCCR, Lbzero_cleanup	! Less than 8 bytes left
10532	 nop
105333:
10534	inc	8, %o0
10535	deccc	8, %o2
10536	bge,pt	CCCR, 3b
10537	 stx	%o1, [%o0 - 8]		! Do 1 longword at a time
10538
10539	/*
10540	 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
10541	 * -6 => two bytes, etc.  Mop up this remainder, if any.
10542	 */
10543Lbzero_cleanup:
10544	btst	4, %o2
10545	bz,pt	CCCR, 5f		! if (len & 4) {
10546	 nop
10547	stw	%o1, [%o0]		!	*(int *)addr = 0;
10548	inc	4, %o0			!	addr += 4;
105495:
10550	btst	2, %o2
10551	bz,pt	CCCR, 7f		! if (len & 2) {
10552	 nop
10553	sth	%o1, [%o0]		!	*(short *)addr = 0;
10554	inc	2, %o0			!	addr += 2;
105557:
10556	btst	1, %o2
10557	bnz,a	%icc, Lbzero_done	! if (len & 1)
10558	 stb	%o1, [%o0]		!	*addr = 0;
10559Lbzero_done:
10560	retl
10561	 mov	%o4, %o0		! Restore ponter for memset (ugh)
10562
10563#if 1
10564Lbzero_block:
10565	sethi	%hi(block_disable), %o3
10566	ldx	[ %o3 + %lo(block_disable) ], %o3
10567	brnz,pn	%o3, Lbzero_longs
10568	!! Make sure our trap table is installed
10569	set	_C_LABEL(trapbase), %o5
10570	rdpr	%tba, %o3
10571	sub	%o3, %o5, %o3
10572	brnz,pn	%o3, Lbzero_longs	! No, then don't use block load/store
10573	 nop
10574/*
10575 * Kernel:
10576 *
10577 * Here we use VIS instructions to do a block clear of a page.
10578 * But before we can do that we need to save and enable the FPU.
10579 * The last owner of the FPU registers is fpproc, and
10580 * fpproc->p_md.md_fpstate is the current fpstate.  If that's not
10581 * null, call savefpstate() with it to store our current fp state.
10582 *
10583 * Next, allocate an aligned fpstate on the stack.  We will properly
10584 * nest calls on a particular stack so this should not be a problem.
10585 *
10586 * Now we grab either curproc (or if we're on the interrupt stack
10587 * proc0).  We stash its existing fpstate in a local register and
10588 * put our new fpstate in curproc->p_md.md_fpstate.  We point
10589 * fpproc at curproc (or proc0) and enable the FPU.
10590 *
10591 * If we are ever preempted, our FPU state will be saved in our
10592 * fpstate.  Then, when we're resumed and we take an FPDISABLED
10593 * trap, the trap handler will be able to fish our FPU state out
10594 * of curproc (or proc0).
10595 *
10596 * On exiting this routine we undo the damage: restore the original
10597 * pointer to curproc->p_md.md_fpstate, clear our fpproc, and disable
10598 * the MMU.
10599 *
10600 */
10601
10602#if 1
10603	ENABLE_FPU(0)
10604#else
10605	!!
10606	!! This code will allow us to save the fpstate around this
10607	!! routine and nest FP use in the kernel
10608	!!
10609	save	%sp, -(CC64FSZ+FS_SIZE+BLOCK_SIZE), %sp	! Allocate an fpstate
10610	sethi	%hi(FPPROC), %l1
10611	LDPTR	[%l1 + %lo(FPPROC)], %l2		! Load fpproc
10612	add	%sp, (CC64FSZ+STKB+BLOCK_SIZE-1), %l0	! Calculate pointer to fpstate
10613	brz,pt	%l2, 1f					! fpproc == NULL?
10614	 andn	%l0, BLOCK_ALIGN, %l0			! And make it block aligned
10615	LDPTR	[%l2 + P_FPSTATE], %l3
10616	brz,pn	%l3, 1f					! Make sure we have an fpstate
10617	 mov	%l3, %o0
10618	call	_C_LABEL(savefpstate)			! Save the old fpstate
10619	 set	EINTSTACK-STKB, %l4			! Are we on intr stack?
10620	cmp	%sp, %l4
10621	bgu,pt	CCCR, 1f
10622	 set	INTSTACK-STKB, %l4
10623	cmp	%sp, %l4
10624	blu	CCCR, 1f
106250:
10626	 sethi	%hi(_C_LABEL(proc0)), %l4		! Yes, use proc0
10627	ba,pt	%xcc, 2f				! XXXX needs to change to CPU's idle proc
10628	 or	%l4, %lo(_C_LABEL(proc0)), %l5
106291:
10630	sethi	%hi(CURPROC), %l4			! Use curproc
10631	LDPTR	[%l4 + %lo(CURPROC)], %l5
10632	brz,pn	%l5, 0b					! If curproc is NULL need to use proc0
106332:
10634	mov	%i0, %o0
10635	mov	%i2, %o2
10636	LDPTR	[%l5 + P_FPSTATE], %l6			! Save old fpstate
10637	mov	%i3, %o3
10638	STPTR	%l0, [%l5 + P_FPSTATE]			! Insert new fpstate
10639	STPTR	%l5, [%l1 + %lo(FPPROC)]		! Set new fpproc
10640	wr	%g0, FPRS_FEF, %fprs			! Enable FPU
10641#endif
10642	!! We are now 8-byte aligned.  We need to become 64-byte aligned.
10643	btst	63, %i0
10644	bz,pt	CCCR, 2f
10645	 nop
106461:
10647	stx	%i1, [%i0]
10648	inc	8, %i0
10649	btst	63, %i0
10650	bnz,pt	%xcc, 1b
10651	 dec	8, %i2
10652
106532:
10654	brz	%i1, 3f					! Skip the memory op
10655	 fzero	%f0					! for bzero
10656
10657#ifdef _LP64
10658	stx	%i1, [%i0]				! Flush this puppy to RAM
10659	membar	#StoreLoad
10660	ldd	[%i0], %f0
10661#else
10662	stw	%i1, [%i0]				! Flush this puppy to RAM
10663	membar	#StoreLoad
10664	ld	[%i0], %f0
10665	fmovsa	%icc, %f0, %f1
10666#endif
10667
106683:
10669	fmovd	%f0, %f2				! Duplicate the pattern
10670	fmovd	%f0, %f4
10671	fmovd	%f0, %f6
10672	fmovd	%f0, %f8
10673	fmovd	%f0, %f10
10674	fmovd	%f0, %f12
10675	fmovd	%f0, %f14
10676
10677	!! Remember: we were 8 bytes too far
10678	dec	56, %i2					! Go one iteration too far
106795:
10680	stda	%f0, [%i0] ASI_STORE			! Store 64 bytes
10681	deccc	BLOCK_SIZE, %i2
10682	bg,pt	%icc, 5b
10683	 inc	BLOCK_SIZE, %i0
10684
10685	membar	#Sync
10686/*
10687 * We've saved our possible fpstate, now disable the fpu
10688 * and continue with life.
10689 */
10690#if 1
10691	RESTORE_FPU
10692	addcc	%i2, 56, %i2	! Restore the count
10693	ba,pt	%xcc, Lbzero_longs	! Finish up the remainder
10694	 restore
10695#else
10696#ifdef DEBUG
10697	LDPTR	[%l1 + %lo(FPPROC)], %l7
10698	cmp	%l7, %l5
10699!	tnz	1		! fpproc has changed!
10700	LDPTR	[%l5 + P_FPSTATE], %l7
10701	cmp	%l7, %l0
10702	tnz	1		! fpstate has changed!
10703#endif
10704	STPTR	%g0, [%l1 + %lo(FPPROC)]		! Clear fpproc
10705	STPTR	%l6, [%l5 + P_FPSTATE]			! Restore old fpstate
10706	wr	%g0, 0, %fprs				! Disable FPU
10707	addcc	%i2, 56, %i2	! Restore the count
10708	ba,pt	%xcc, Lbzero_longs	! Finish up the remainder
10709	 restore
10710#endif
10711#endif
10712#endif
10713
10714/*
10715 * kcopy() is exactly like bcopy except that it set pcb_onfault such that
10716 * when a fault occurs, it is able to return -1 to indicate this to the
10717 * caller.
10718 */
10719ENTRY(kcopy)
10720#ifdef DEBUG
10721	set	pmapdebug, %o4
10722	ld	[%o4], %o4
10723	btst	0x80, %o4	! PDB_COPY
10724	bz,pt	%icc, 3f
10725	 nop
10726	save	%sp, -CC64FSZ, %sp
10727	mov	%i0, %o1
10728	set	2f, %o0
10729	mov	%i1, %o2
10730	call	printf
10731	 mov	%i2, %o3
10732!	ta	1; nop
10733	restore
10734	.data
107352:	.asciz	"kcopy(%p->%p,%x)\n"
10736	_ALIGN
10737	.text
107383:
10739#endif
10740	sethi	%hi(CPCB), %o5		! cpcb->pcb_onfault = Lkcerr;
10741	LDPTR	[%o5 + %lo(CPCB)], %o5
10742	set	Lkcerr, %o3
10743	LDPTR	[%o5 + PCB_ONFAULT], %g1! save current onfault handler
10744	membar	#LoadStore
10745	STPTR	%o3, [%o5 + PCB_ONFAULT]
10746	membar	#StoreStore|#StoreLoad
10747
10748	cmp	%o2, BCOPY_SMALL
10749Lkcopy_start:
10750	bge,a	Lkcopy_fancy	! if >= this many, go be fancy.
10751	 btst	7, %o0		! (part of being fancy)
10752
10753	/*
10754	 * Not much to copy, just do it a byte at a time.
10755	 */
10756	deccc	%o2		! while (--len >= 0)
10757	bl	1f
10758	 EMPTY
107590:
10760	ldsb	[%o0], %o4	!	*dst++ = *src++;
10761	inc	%o0
10762	stb	%o4, [%o1]
10763	deccc	%o2
10764	bge	0b
10765	 inc	%o1
107661:
10767	membar	#Sync		! Make sure all fauls are processed
10768	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
10769	membar	#StoreStore|#StoreLoad
10770	retl
10771	 clr	%o0
10772	NOTREACHED
10773
10774	/*
10775	 * Plenty of data to copy, so try to do it optimally.
10776	 */
10777Lkcopy_fancy:
10778	! check for common case first: everything lines up.
10779!	btst	7, %o0		! done already
10780	bne	1f
10781	 EMPTY
10782	btst	7, %o1
10783	be,a	Lkcopy_doubles
10784	 dec	8, %o2		! if all lined up, len -= 8, goto kcopy_doubes
10785
10786	! If the low bits match, we can make these line up.
107871:
10788	xor	%o0, %o1, %o3	! t = src ^ dst;
10789	btst	1, %o3		! if (t & 1) {
10790	be,a	1f
10791	 btst	1, %o0		! [delay slot: if (src & 1)]
10792
10793	! low bits do not match, must copy by bytes.
107940:
10795	ldsb	[%o0], %o4	!	do {
10796	inc	%o0		!		*dst++ = *src++;
10797	stb	%o4, [%o1]
10798	deccc	%o2
10799	bnz	0b		!	} while (--len != 0);
10800	 inc	%o1
10801	membar	#Sync		! Make sure all traps are taken
10802	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
10803	membar	#StoreStore|#StoreLoad
10804	retl
10805	 clr	%o0
10806	NOTREACHED
10807
10808	! lowest bit matches, so we can copy by words, if nothing else
108091:
10810	be,a	1f		! if (src & 1) {
10811	 btst	2, %o3		! [delay slot: if (t & 2)]
10812
10813	! although low bits match, both are 1: must copy 1 byte to align
10814	ldsb	[%o0], %o4	!	*dst++ = *src++;
10815	inc	%o0
10816	stb	%o4, [%o1]
10817	dec	%o2		!	len--;
10818	inc	%o1
10819	btst	2, %o3		! } [if (t & 2)]
108201:
10821	be,a	1f		! if (t & 2) {
10822	 btst	2, %o0		! [delay slot: if (src & 2)]
10823	dec	2, %o2		!	len -= 2;
108240:
10825	ldsh	[%o0], %o4	!	do {
10826	inc	2, %o0		!		dst += 2, src += 2;
10827	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
10828	deccc	2, %o2		!	} while ((len -= 2) >= 0);
10829	bge	0b
10830	 inc	2, %o1
10831	b	Lkcopy_mopb	!	goto mop_up_byte;
10832	 btst	1, %o2		! } [delay slot: if (len & 1)]
10833	NOTREACHED
10834
10835	! low two bits match, so we can copy by longwords
108361:
10837	be,a	1f		! if (src & 2) {
10838	 btst	4, %o3		! [delay slot: if (t & 4)]
10839
10840	! although low 2 bits match, they are 10: must copy one short to align
10841	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
10842	inc	2, %o0		!	dst += 2;
10843	sth	%o4, [%o1]
10844	dec	2, %o2		!	len -= 2;
10845	inc	2, %o1		!	src += 2;
10846	btst	4, %o3		! } [if (t & 4)]
108471:
10848	be,a	1f		! if (t & 4) {
10849	 btst	4, %o0		! [delay slot: if (src & 4)]
10850	dec	4, %o2		!	len -= 4;
108510:
10852	ld	[%o0], %o4	!	do {
10853	inc	4, %o0		!		dst += 4, src += 4;
10854	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
10855	deccc	4, %o2		!	} while ((len -= 4) >= 0);
10856	bge	0b
10857	 inc	4, %o1
10858	b	Lkcopy_mopw	!	goto mop_up_word_and_byte;
10859	 btst	2, %o2		! } [delay slot: if (len & 2)]
10860	NOTREACHED
10861
10862	! low three bits match, so we can copy by doublewords
108631:
10864	be	1f		! if (src & 4) {
10865	 dec	8, %o2		! [delay slot: len -= 8]
10866	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
10867	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
10868	st	%o4, [%o1]
10869	dec	4, %o2		! }
10870	inc	4, %o1
108711:
10872Lkcopy_doubles:
10873	ldx	[%o0], %g5	! do {
10874	inc	8, %o0		!	dst += 8, src += 8;
10875	stx	%g5, [%o1]	!	*(double *)dst = *(double *)src;
10876	deccc	8, %o2		! } while ((len -= 8) >= 0);
10877	bge	Lkcopy_doubles
10878	 inc	8, %o1
10879
10880	! check for a usual case again (save work)
10881	btst	7, %o2		! if ((len & 7) == 0)
10882	be	Lkcopy_done	!	goto kcopy_done;
10883
10884	 btst	4, %o2		! if ((len & 4)) == 0)
10885	be,a	Lkcopy_mopw	!	goto mop_up_word_and_byte;
10886	 btst	2, %o2		! [delay slot: if (len & 2)]
10887	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
10888	inc	4, %o0		!	dst += 4;
10889	st	%o4, [%o1]
10890	inc	4, %o1		!	src += 4;
10891	btst	2, %o2		! } [if (len & 2)]
10892
108931:
10894	! mop up trailing word (if present) and byte (if present).
10895Lkcopy_mopw:
10896	be	Lkcopy_mopb	! no word, go mop up byte
10897	 btst	1, %o2		! [delay slot: if (len & 1)]
10898	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
10899	be	Lkcopy_done	! if ((len & 1) == 0) goto done;
10900	 sth	%o4, [%o1]
10901	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
10902	stb	%o4, [%o1 + 2]
10903	membar	#Sync		! Make sure all traps are taken
10904	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
10905	membar	#StoreStore|#StoreLoad
10906	retl
10907	 clr	%o0
10908	NOTREACHED
10909
10910	! mop up trailing byte (if present).
10911Lkcopy_mopb:
10912	bne,a	1f
10913	 ldsb	[%o0], %o4
10914
10915Lkcopy_done:
10916	membar	#Sync		! Make sure all traps are taken
10917	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
10918	membar	#StoreStore|#StoreLoad
10919	retl
10920	 clr	%o0
10921	NOTREACHED
10922
109231:
10924	stb	%o4, [%o1]
10925	membar	#Sync		! Make sure all traps are taken
10926	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
10927	membar	#StoreStore|#StoreLoad
10928	retl
10929	 clr	%o0
10930	NOTREACHED
10931
10932Lkcerr:
10933#ifdef DEBUG
10934	set	pmapdebug, %o4
10935	ld	[%o4], %o4
10936	btst	0x80, %o4	! PDB_COPY
10937	bz,pt	%icc, 3f
10938	 nop
10939	save	%sp, -CC64FSZ, %sp
10940	set	2f, %o0
10941	call	printf
10942	 nop
10943!	ta	1; nop
10944	restore
10945	.data
109462:	.asciz	"kcopy error\n"
10947	_ALIGN
10948	.text
109493:
10950#endif
10951	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
10952	membar	#StoreStore|#StoreLoad
10953	retl				! and return error indicator
10954	 mov	EFAULT, %o0
10955	NOTREACHED
10956
10957/*
10958 * ovbcopy(src, dst, len): like bcopy, but regions may overlap.
10959 */
10960ENTRY(ovbcopy)
10961	cmp	%o0, %o1	! src < dst?
10962	bgeu	Lbcopy_start	! no, go copy forwards as via bcopy
10963	 cmp	%o2, BCOPY_SMALL! (check length for doublecopy first)
10964
10965	/*
10966	 * Since src comes before dst, and the regions might overlap,
10967	 * we have to do the copy starting at the end and working backwards.
10968	 */
10969	add	%o2, %o0, %o0	! src += len
10970	add	%o2, %o1, %o1	! dst += len
10971	bge,a	Lback_fancy	! if len >= BCOPY_SMALL, go be fancy
10972	 btst	3, %o0
10973
10974	/*
10975	 * Not much to copy, just do it a byte at a time.
10976	 */
10977	deccc	%o2		! while (--len >= 0)
10978	bl	1f
10979	 EMPTY
109800:
10981	dec	%o0		!	*--dst = *--src;
10982	ldsb	[%o0], %o4
10983	dec	%o1
10984	deccc	%o2
10985	bge	0b
10986	 stb	%o4, [%o1]
109871:
10988	retl
10989	 nop
10990
10991	/*
10992	 * Plenty to copy, try to be optimal.
10993	 * We only bother with word/halfword/byte copies here.
10994	 */
10995Lback_fancy:
10996!	btst	3, %o0		! done already
10997	bnz	1f		! if ((src & 3) == 0 &&
10998	 btst	3, %o1		!     (dst & 3) == 0)
10999	bz,a	Lback_words	!	goto words;
11000	 dec	4, %o2		! (done early for word copy)
11001
110021:
11003	/*
11004	 * See if the low bits match.
11005	 */
11006	xor	%o0, %o1, %o3	! t = src ^ dst;
11007	btst	1, %o3
11008	bz,a	3f		! if (t & 1) == 0, can do better
11009	 btst	1, %o0
11010
11011	/*
11012	 * Nope; gotta do byte copy.
11013	 */
110142:
11015	dec	%o0		! do {
11016	ldsb	[%o0], %o4	!	*--dst = *--src;
11017	dec	%o1
11018	deccc	%o2		! } while (--len != 0);
11019	bnz	2b
11020	 stb	%o4, [%o1]
11021	retl
11022	 nop
11023
110243:
11025	/*
11026	 * Can do halfword or word copy, but might have to copy 1 byte first.
11027	 */
11028!	btst	1, %o0		! done earlier
11029	bz,a	4f		! if (src & 1) {	/* copy 1 byte */
11030	 btst	2, %o3		! (done early)
11031	dec	%o0		!	*--dst = *--src;
11032	ldsb	[%o0], %o4
11033	dec	%o1
11034	stb	%o4, [%o1]
11035	dec	%o2		!	len--;
11036	btst	2, %o3		! }
11037
110384:
11039	/*
11040	 * See if we can do a word copy ((t&2) == 0).
11041	 */
11042!	btst	2, %o3		! done earlier
11043	bz,a	6f		! if (t & 2) == 0, can do word copy
11044	 btst	2, %o0		! (src&2, done early)
11045
11046	/*
11047	 * Gotta do halfword copy.
11048	 */
11049	dec	2, %o2		! len -= 2;
110505:
11051	dec	2, %o0		! do {
11052	ldsh	[%o0], %o4	!	src -= 2;
11053	dec	2, %o1		!	dst -= 2;
11054	deccc	2, %o0		!	*(short *)dst = *(short *)src;
11055	bge	5b		! } while ((len -= 2) >= 0);
11056	 sth	%o4, [%o1]
11057	b	Lback_mopb	! goto mop_up_byte;
11058	 btst	1, %o2		! (len&1, done early)
11059
110606:
11061	/*
11062	 * We can do word copies, but we might have to copy
11063	 * one halfword first.
11064	 */
11065!	btst	2, %o0		! done already
11066	bz	7f		! if (src & 2) {
11067	 dec	4, %o2		! (len -= 4, done early)
11068	dec	2, %o0		!	src -= 2, dst -= 2;
11069	ldsh	[%o0], %o4	!	*(short *)dst = *(short *)src;
11070	dec	2, %o1
11071	sth	%o4, [%o1]
11072	dec	2, %o2		!	len -= 2;
11073				! }
11074
110757:
11076Lback_words:
11077	/*
11078	 * Do word copies (backwards), then mop up trailing halfword
11079	 * and byte if any.
11080	 */
11081!	dec	4, %o2		! len -= 4, done already
110820:				! do {
11083	dec	4, %o0		!	src -= 4;
11084	dec	4, %o1		!	src -= 4;
11085	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
11086	deccc	4, %o2		! } while ((len -= 4) >= 0);
11087	bge	0b
11088	 st	%o4, [%o1]
11089
11090	/*
11091	 * Check for trailing shortword.
11092	 */
11093	btst	2, %o2		! if (len & 2) {
11094	bz,a	1f
11095	 btst	1, %o2		! (len&1, done early)
11096	dec	2, %o0		!	src -= 2, dst -= 2;
11097	ldsh	[%o0], %o4	!	*(short *)dst = *(short *)src;
11098	dec	2, %o1
11099	sth	%o4, [%o1]	! }
11100	btst	1, %o2
11101
11102	/*
11103	 * Check for trailing byte.
11104	 */
111051:
11106Lback_mopb:
11107!	btst	1, %o2		! (done already)
11108	bnz,a	1f		! if (len & 1) {
11109	 ldsb	[%o0 - 1], %o4	!	b = src[-1];
11110	retl
11111	 nop
111121:
11113	retl			!	dst[-1] = b;
11114	 stb	%o4, [%o1 - 1]	! }
11115
11116
11117/*
11118 * savefpstate(f) struct fpstate *f;
11119 *
11120 * Store the current FPU state.  The first `st %fsr' may cause a trap;
11121 * our trap handler knows how to recover (by `returning' to savefpcont).
11122 *
11123 * Since the kernel may need to use the FPU and we have problems atomically
11124 * testing and enabling the FPU, we leave here with the FPRS_FEF bit set.
11125 * Normally this should be turned on in loadfpstate().
11126 */
11127 /* XXXXXXXXXX  Assume caller created a proper stack frame */
11128ENTRY(savefpstate)
11129!	flushw			! Make sure we don't have stack probs & lose hibits of %o
11130	rdpr	%pstate, %o1		! enable FP before we begin
11131	rd	%fprs, %o5
11132	wr	%g0, FPRS_FEF, %fprs
11133	or	%o1, PSTATE_PEF, %o1
11134	wrpr	%o1, 0, %pstate
11135	/* do some setup work while we wait for PSR_EF to turn on */
11136	set	FSR_QNE, %o2		! QNE = 0x2000, too big for immediate
11137	clr	%o3			! qsize = 0;
11138special_fp_store:
11139	/* This may need to be done w/rdpr/stx combo */
11140	stx	%fsr, [%o0 + FS_FSR]	! f->fs_fsr = getfsr();
11141	/*
11142	 * Even if the preceding instruction did not trap, the queue
11143	 * is not necessarily empty: this state save might be happening
11144	 * because user code tried to store %fsr and took the FPU
11145	 * from `exception pending' mode to `exception' mode.
11146	 * So we still have to check the blasted QNE bit.
11147	 * With any luck it will usually not be set.
11148	 */
11149	rd	%gsr, %o4		! Save %gsr
11150	st	%o4, [%o0 + FS_GSR]
11151
11152	ldx	[%o0 + FS_FSR], %o4	! if (f->fs_fsr & QNE)
11153	btst	%o2, %o4
11154	add	%o0, FS_REGS, %o2
11155	bnz	Lfp_storeq		!	goto storeq;
11156Lfp_finish:
11157	 btst	BLOCK_ALIGN, %o2	! Needs to be re-executed
11158	bnz,pn	%icc, 3f		! Check alignment
11159	 st	%o3, [%o0 + FS_QSIZE]	! f->fs_qsize = qsize;
11160	btst	FPRS_DL, %o5		! Lower FPU clean?
11161	bz,a,pt	%icc, 1f		! Then skip it
11162	 add	%o2, 128, %o2		! Skip a block
11163
11164	membar	#Sync
11165	stda	%f0, [%o2] ASI_BLK_COMMIT_P	! f->fs_f0 = etc;
11166	inc	BLOCK_SIZE, %o2
11167	stda	%f16, [%o2] ASI_BLK_COMMIT_P
11168	inc	BLOCK_SIZE, %o2
111691:
11170	btst	FPRS_DU, %o5		! Upper FPU clean?
11171	bz,pt	%icc, 2f		! Then skip it
11172	 nop
11173
11174	membar	#Sync
11175	stda	%f32, [%o2] ASI_BLK_COMMIT_P
11176	inc	BLOCK_SIZE, %o2
11177	stda	%f48, [%o2] ASI_BLK_COMMIT_P
111782:
11179	membar	#Sync			! Finish operation so we can
11180	retl
11181	 wr	%g0, FPRS_FEF, %fprs	! Mark FPU clean
111823:
11183#ifdef DIAGONSTIC
11184	btst	7, %o2			! 32-bit aligned!?!?
11185	bnz,pn	%icc, 6f
11186#endif
11187	 btst	FPRS_DL, %o5		! Lower FPU clean?
11188	bz,a,pt	%icc, 4f		! Then skip it
11189	 add	%o0, 128, %o0
11190
11191	membar	#Sync
11192	std	%f0, [%o0 + FS_REGS + (4*0)]	! f->fs_f0 = etc;
11193	std	%f2, [%o0 + FS_REGS + (4*2)]
11194	std	%f4, [%o0 + FS_REGS + (4*4)]
11195	std	%f6, [%o0 + FS_REGS + (4*6)]
11196	std	%f8, [%o0 + FS_REGS + (4*8)]
11197	std	%f10, [%o0 + FS_REGS + (4*10)]
11198	std	%f12, [%o0 + FS_REGS + (4*12)]
11199	std	%f14, [%o0 + FS_REGS + (4*14)]
11200	std	%f16, [%o0 + FS_REGS + (4*16)]
11201	std	%f18, [%o0 + FS_REGS + (4*18)]
11202	std	%f20, [%o0 + FS_REGS + (4*20)]
11203	std	%f22, [%o0 + FS_REGS + (4*22)]
11204	std	%f24, [%o0 + FS_REGS + (4*24)]
11205	std	%f26, [%o0 + FS_REGS + (4*26)]
11206	std	%f28, [%o0 + FS_REGS + (4*28)]
11207	std	%f30, [%o0 + FS_REGS + (4*30)]
112084:
11209	btst	FPRS_DU, %o5		! Upper FPU clean?
11210	bz,pt	%icc, 5f		! Then skip it
11211	 nop
11212
11213	membar	#Sync
11214	std	%f32, [%o0 + FS_REGS + (4*32)]
11215	std	%f34, [%o0 + FS_REGS + (4*34)]
11216	std	%f36, [%o0 + FS_REGS + (4*36)]
11217	std	%f38, [%o0 + FS_REGS + (4*38)]
11218	std	%f40, [%o0 + FS_REGS + (4*40)]
11219	std	%f42, [%o0 + FS_REGS + (4*42)]
11220	std	%f44, [%o0 + FS_REGS + (4*44)]
11221	std	%f46, [%o0 + FS_REGS + (4*46)]
11222	std	%f48, [%o0 + FS_REGS + (4*48)]
11223	std	%f50, [%o0 + FS_REGS + (4*50)]
11224	std	%f52, [%o0 + FS_REGS + (4*52)]
11225	std	%f54, [%o0 + FS_REGS + (4*54)]
11226	std	%f56, [%o0 + FS_REGS + (4*56)]
11227	std	%f58, [%o0 + FS_REGS + (4*58)]
11228	std	%f60, [%o0 + FS_REGS + (4*60)]
11229	std	%f62, [%o0 + FS_REGS + (4*62)]
112305:
11231	membar	#Sync
11232	retl
11233	 wr	%g0, FPRS_FEF, %fprs		! Mark FPU clean
11234
11235	!!
11236	!! Damn thing is *NOT* aligned on a 64-bit boundary
11237	!!
112386:
11239	wr	%g0, FPRS_FEF, %fprs
11240	ta	1
11241	retl
11242	 nop
11243
11244/*
11245 * Store the (now known nonempty) FP queue.
11246 * We have to reread the fsr each time in order to get the new QNE bit.
11247 *
11248 * UltraSPARCs don't have floating point queues.
11249 */
11250Lfp_storeq:
11251	add	%o0, FS_QUEUE, %o1	! q = &f->fs_queue[0];
112521:
11253	rdpr	%fq, %o4
11254	stx	%o4, [%o1 + %o3]	! q[qsize++] = fsr_qfront();
11255	stx	%fsr, [%o0 + FS_FSR] 	! reread fsr
11256	ldx	[%o0 + FS_FSR], %o4	! if fsr & QNE, loop
11257	btst	%o5, %o4
11258	bnz	1b
11259	 inc	8, %o3
11260	b	Lfp_finish		! set qsize and finish storing fregs
11261	 srl	%o3, 3, %o3		! (but first fix qsize)
11262
11263/*
11264 * The fsr store trapped.  Do it again; this time it will not trap.
11265 * We could just have the trap handler return to the `st %fsr', but
11266 * if for some reason it *does* trap, that would lock us into a tight
11267 * loop.  This way we panic instead.  Whoopee.
11268 */
11269savefpcont:
11270	b	special_fp_store + 4	! continue
11271	 stx	%fsr, [%o0 + FS_FSR]	! but first finish the %fsr store
11272
11273/*
11274 * Load FPU state.
11275 */
11276 /* XXXXXXXXXX  Should test to see if we only need to do a partial restore */
11277ENTRY(loadfpstate)
11278	flushw			! Make sure we don't have stack probs & lose hibits of %o
11279	rdpr	%pstate, %o1		! enable FP before we begin
11280	ld	[%o0 + FS_GSR], %o4	! Restore %gsr
11281	set	PSTATE_PEF, %o2
11282	wr	%g0, FPRS_FEF, %fprs
11283	or	%o1, %o2, %o1
11284	wrpr	%o1, 0, %pstate
11285	ldx	[%o0 + FS_FSR], %fsr	! setfsr(f->fs_fsr);
11286	add	%o0, FS_REGS, %o3	! This is zero...
11287	btst	BLOCK_ALIGN, %o3
11288	bne,pt	%icc, 1f	! Only use block loads on aligned blocks
11289	 wr	%o4, %g0, %gsr
11290	membar	#Sync
11291	ldda	[%o3] ASI_BLK_P, %f0
11292	inc	BLOCK_SIZE, %o3
11293	ldda	[%o3] ASI_BLK_P, %f16
11294	inc	BLOCK_SIZE, %o3
11295	ldda	[%o3] ASI_BLK_P, %f32
11296	inc	BLOCK_SIZE, %o3
11297	ldda	[%o3] ASI_BLK_P, %f48
11298	membar	#Sync			! Make sure loads are complete
11299	retl
11300	 wr	%g0, FPRS_FEF, %fprs	! Clear dirty bits
113011:
11302#ifdef DIAGNOSTIC
11303	btst	7, %o3
11304	bne,pn	%icc, 1f
11305	 nop
11306#endif
11307	/* Unaligned -- needs to be done the long way
11308	membar	#Sync
11309	ldd	[%o3 + (4*0)], %f0
11310	ldd	[%o3 + (4*2)], %f2
11311	ldd	[%o3 + (4*4)], %f4
11312	ldd	[%o3 + (4*6)], %f6
11313	ldd	[%o3 + (4*8)], %f8
11314	ldd	[%o3 + (4*10)], %f10
11315	ldd	[%o3 + (4*12)], %f12
11316	ldd	[%o3 + (4*14)], %f14
11317	ldd	[%o3 + (4*16)], %f16
11318	ldd	[%o3 + (4*18)], %f18
11319	ldd	[%o3 + (4*20)], %f20
11320	ldd	[%o3 + (4*22)], %f22
11321	ldd	[%o3 + (4*24)], %f24
11322	ldd	[%o3 + (4*26)], %f26
11323	ldd	[%o3 + (4*28)], %f28
11324	ldd	[%o3 + (4*30)], %f30
11325	ldd	[%o3 + (4*32)], %f32
11326	ldd	[%o3 + (4*34)], %f34
11327	ldd	[%o3 + (4*36)], %f36
11328	ldd	[%o3 + (4*38)], %f38
11329	ldd	[%o3 + (4*40)], %f40
11330	ldd	[%o3 + (4*42)], %f42
11331	ldd	[%o3 + (4*44)], %f44
11332	ldd	[%o3 + (4*46)], %f46
11333	ldd	[%o3 + (4*48)], %f48
11334	ldd	[%o3 + (4*50)], %f50
11335	ldd	[%o3 + (4*52)], %f52
11336	ldd	[%o3 + (4*54)], %f54
11337	ldd	[%o3 + (4*56)], %f56
11338	ldd	[%o3 + (4*58)], %f58
11339	ldd	[%o3 + (4*60)], %f60
11340 	ldd	[%o3 + (4*62)], %f62
11341	membar	#Sync
11342	retl
11343	 wr	%g0, FPRS_FEF, %fprs	! Clear dirty bits
11344
113451:
11346	wr	%g0, FPRS_FEF, %fprs	! Clear dirty bits
11347	ta	1
11348	retl
11349	 nop
11350/*
11351 * ienab_bis(bis) int bis;
11352 * ienab_bic(bic) int bic;
11353 *
11354 * Set and clear bits in the interrupt register.
11355 */
11356
11357/*
11358 * sun4u has separate asr's for clearing/setting the interrupt mask.
11359 */
11360ENTRY(ienab_bis)
11361	retl
11362	 wr	%o0, 0, SET_SOFTINT	! SET_SOFTINT
11363
11364ENTRY(ienab_bic)
11365	retl
11366	 wr	%o0, 0, CLEAR_SOFTINT	! CLEAR_SOFTINT
11367
11368/*
11369 * send_softint(cpu, level, intrhand)
11370 *
11371 * Send a softint with an intrhand pointer so we can cause a vectored
11372 * interrupt instead of a polled interrupt.  This does pretty much the
11373 * same as interrupt_vector.  If intrhand is NULL then it just sends
11374 * a polled interrupt.  If cpu is -1 then send it to this CPU, if it's
11375 * -2 send it to any CPU, otherwise send it to a particular CPU.
11376 *
11377 * XXXX Dispatching to different CPUs is not implemented yet.
11378 *
11379 * XXXX We do not block interrupts here so it's possible that another
11380 *	interrupt of the same level is dispatched before we get to
11381 *	enable the softint, causing a spurious interrupt.
11382 */
11383ENTRY(send_softint)
11384	rdpr	%pil, %g1	! s = splx(level)
11385	cmp	%g1, %o1
11386	bge,pt	%icc, 1f
11387	 nop
11388	wrpr	%o1, 0, %pil
113891:
11390	brz,pn	%o2, 1f
11391	 set	intrpending, %o3
11392	LDPTR	[%o2 + IH_PEND], %o5
11393	mov	8, %o4			! Number of slots to search
11394#ifdef INTR_INTERLOCK
11395	brnz	%o5, 1f
11396#endif
11397	 sll	%o1, PTRSHFT+3, %o5	! Find start of table for this IPL
11398	add	%o3, %o5, %o3
113992:
11400#ifdef INTRLIST
11401	LDPTR	[%o3], %o5		! Load list head
11402	STPTR	%o5, [%o2+IH_PEND]	! Link our intrhand node in
11403	mov	%o2, %o4
11404	CASPTR	[%o3] ASI_N, %o5, %o4
11405	cmp	%o4, %o5		! Did it work?
11406	bne,pn	%xcc, 2b		! No, try again
11407	 nop
11408#else	/* INTRLIST */
11409#if 1
11410	DLFLUSH(%o3, %o5)
11411	mov	%o2, %o5
11412	CASPTR	[%o3] ASI_N, %g0, %o5	! Try a slot -- MPU safe
11413	brz,pt	%o5, 4f			! Available?
11414#else
11415	DLFLUSH(%o3, %o5)
11416	LDPTR	[%o3], %o5		! Try a slog
11417	brz,a	%o5, 4f			! Available?
11418	 STPTR	%o2, [%o3]		! Grab it
11419#endif
11420	 dec	%o4
11421	brgz,pt	%o4, 2b
11422	 inc	PTRSZ, %o3		! Next slot
11423
11424	!! If we get here we have a problem.
11425	!! There were no available slots and the interrupt was lost.
11426	!! We'll resort to polling in this case.
114274:
11428	 DLFLUSH(%o3, %o3)		! Prevent D$ pollution
11429#endif /* INTRLIST */
114301:
11431	mov	1, %o3			! Change from level to bitmask
11432	sllx	%o3, %o1, %o3
11433	wr	%o3, 0, SET_SOFTINT	! SET_SOFTINT
11434	retl
11435	 wrpr	%g1, 0, %pil		! restore IPL
11436
11437/*
11438 * Here is a very good random number generator.  This implementation is
11439 * based on _Two Fast Implementations of the `Minimal Standard' Random
11440 * Number Generator_, David G. Carta, Communications of the ACM, Jan 1990,
11441 * Vol 33 No 1.
11442 */
11443/*
11444 * This should be rewritten using the mulx instr. if I ever understand what it
11445 * does.
11446 */
11447	.data
11448randseed:
11449	.word	1
11450	.text
11451ENTRY(random)
11452	sethi	%hi(16807), %o1
11453	wr	%o1, %lo(16807), %y
11454	 sethi	%hi(randseed), %o5
11455	 ld	[%o5 + %lo(randseed)], %o0
11456	 andcc	%g0, 0, %o2
11457	mulscc  %o2, %o0, %o2
11458	mulscc  %o2, %o0, %o2
11459	mulscc  %o2, %o0, %o2
11460	mulscc  %o2, %o0, %o2
11461	mulscc  %o2, %o0, %o2
11462	mulscc  %o2, %o0, %o2
11463	mulscc  %o2, %o0, %o2
11464	mulscc  %o2, %o0, %o2
11465	mulscc  %o2, %o0, %o2
11466	mulscc  %o2, %o0, %o2
11467	mulscc  %o2, %o0, %o2
11468	mulscc  %o2, %o0, %o2
11469	mulscc  %o2, %o0, %o2
11470	mulscc  %o2, %o0, %o2
11471	mulscc  %o2, %o0, %o2
11472	mulscc  %o2, %g0, %o2
11473	rd	%y, %o3
11474	srl	%o2, 16, %o1
11475	set	0xffff, %o4
11476	and	%o4, %o2, %o0
11477	sll	%o0, 15, %o0
11478	srl	%o3, 17, %o3
11479	or	%o3, %o0, %o0
11480	addcc	%o0, %o1, %o0
11481	bneg	1f
11482	 sethi	%hi(0x7fffffff), %o1
11483	retl
11484	 st	%o0, [%o5 + %lo(randseed)]
114851:
11486	or	%o1, %lo(0x7fffffff), %o1
11487	add	%o0, 1, %o0
11488	and	%o1, %o0, %o0
11489	retl
11490	 st	%o0, [%o5 + %lo(randseed)]
11491
11492/*
11493 * void microtime(struct timeval *tv)
11494 *
11495 * LBL's sparc bsd 'microtime': We don't need to spl (so this routine
11496 * can be a leaf routine) and we don't keep a 'last' timeval (there
11497 * can't be two calls to this routine in a microsecond).  This seems to
11498 * be about 20 times faster than the Sun code on an SS-2. - vj
11499 *
11500 * Read time values from slowest-changing to fastest-changing,
11501 * then re-read out to slowest.  If the values read before
11502 * the innermost match those read after, the innermost value
11503 * is consistent with the outer values.  If not, it may not
11504 * be and we must retry.  Typically this loop runs only once;
11505 * occasionally it runs twice, and only rarely does it run longer.
11506 *
11507 * If we used the %tick register we could go into the nano-seconds,
11508 * and it must run for at least 10 years according to the v9 spec.
11509 *
11510 * For some insane reason timeval structure members are `long's so
11511 * we need to change this code depending on the memory model.
11512 *
11513 * NB: if somehow time was 128-bit aligned we could use an atomic
11514 * quad load to read it in and not bother de-bouncing it.
11515 */
11516#define MICROPERSEC	(1000000)
11517
11518	.data
11519	.align	8
11520	.globl	_C_LABEL(cpu_clockrate)
11521_C_LABEL(cpu_clockrate):
11522	!! Pretend we have a 200MHz clock -- cpu_attach will fix this
11523	.xword	200000000
11524	!! Here we'll store cpu_clockrate/1000000 so we can calculate usecs
11525	.xword	0
11526	.text
11527
11528ENTRY(microtime)
11529	sethi	%hi(timerreg_4u), %g3
11530	sethi	%hi(_C_LABEL(time)), %g2
11531	LDPTR	[%g3+%lo(timerreg_4u)], %g3			! usec counter
11532	brz,pn	%g3, microtick					! If we have no counter-timer use %tick
115332:
11534	!!  NB: if we could guarantee 128-bit alignment of these values we could do an atomic read
11535	LDPTR	[%g2+%lo(_C_LABEL(time))], %o2			! time.tv_sec & time.tv_usec
11536	LDPTR	[%g2+%lo(_C_LABEL(time)+PTRSZ)], %o3		! time.tv_sec & time.tv_usec
11537	ldx	[%g3], %o4					! Load usec timer valuse
11538	LDPTR	[%g2+%lo(_C_LABEL(time))], %g1			! see if time values changed
11539	LDPTR	[%g2+%lo(_C_LABEL(time)+PTRSZ)], %g5		! see if time values changed
11540	cmp	%g1, %o2
11541	bne	2b						! if time.tv_sec changed
11542	 cmp	%g5, %o3
11543	bne	2b						! if time.tv_usec changed
11544	 add	%o4, %o3, %o3					! Our timers have 1usec resolution
11545
11546	set	MICROPERSEC, %o5				! normalize usec value
11547	sub	%o3, %o5, %o5					! Did we overflow?
11548	brlz,pn	%o5, 4f
11549	 nop
11550	add	%o2, 1, %o2					! overflow
11551	mov	%o5, %o3
115524:
11553	STPTR	%o2, [%o0]					! (should be able to std here)
11554	retl
11555	 STPTR	%o3, [%o0+PTRSZ]
11556
11557microtick:
11558#ifndef TICK_IS_TIME
11559/*
11560 * The following code only works if %tick is reset each interrupt.
11561 */
115622:
11563	!!  NB: if we could guarantee 128-bit alignment of these values we could do an atomic read
11564	LDPTR	[%g2+%lo(_C_LABEL(time))], %o2			! time.tv_sec & time.tv_usec
11565	LDPTR	[%g2+%lo(_C_LABEL(time)+PTRSZ)], %o3		! time.tv_sec & time.tv_usec
11566	rdpr	%tick, %o4					! Load usec timer value
11567	LDPTR	[%g2+%lo(_C_LABEL(time))], %g1			! see if time values changed
11568	LDPTR	[%g2+%lo(_C_LABEL(time)+PTRSZ)], %g5		! see if time values changed
11569	cmp	%g1, %o2
11570	bne	2b						! if time.tv_sec changed
11571	 cmp	%g5, %o3
11572	bne	2b						! if time.tv_usec changed
11573	 sethi	%hi(_C_LABEL(cpu_clockrate)), %g1
11574	ldx	[%g1 + %lo(_C_LABEL(cpu_clockrate) + 8)], %o1
11575	sethi	%hi(MICROPERSEC), %o5
11576	brnz,pt	%o1, 3f
11577	 or	%o5, %lo(MICROPERSEC), %o5
11578
11579	!! Calculate ticks/usec
11580	ldx	[%g1 + %lo(_C_LABEL(cpu_clockrate))], %o1	! No, we need to calculate it
11581	udivx	%o1, %o5, %o1
11582	stx	%o1, [%g1 + %lo(_C_LABEL(cpu_clockrate) + 8)]	! Save it so we don't need to divide again
115833:
11584	udivx	%o4, %o1, %o4					! Convert to usec
11585	add	%o4, %o3, %o3
11586
11587	sub	%o3, %o5, %o5					! Did we overflow?
11588	brlz,pn	%o5, 4f
11589	 nop
11590	add	%o2, 1, %o2					! overflow
11591	mov	%o5, %o3
115924:
11593	STPTR	%o2, [%o0]					! (should be able to std here)
11594	retl
11595	 STPTR	%o3, [%o0+PTRSZ]
11596#else
11597/*
11598 * The following code only works if %tick is synchronized with time.
11599 */
116002:
11601	LDPTR	[%g2+%lo(_C_LABEL(time))], %o2			! time.tv_sec & time.tv_usec
11602	LDPTR	[%g2+%lo(_C_LABEL(time)+PTRSZ)], %o3		! time.tv_sec & time.tv_usec
11603	rdpr	%tick, %o4					! Load usec timer value
11604	LDPTR	[%g2+%lo(_C_LABEL(time))], %g1			! see if time values changed
11605	LDPTR	[%g2+%lo(_C_LABEL(time)+PTRSZ)], %g5		! see if time values changed
11606	cmp	%g1, %o2
11607	bne	2b						! if time.tv_sec changed
11608	 cmp	%g5, %o3
11609	bne	2b						! if time.tv_usec changed
11610
11611	 sethi	%hi(_C_LABEL(cpu_clockrate)), %o1
11612	ldx	[%o1 + %lo(_C_LABEL(cpu_clockrate) + 8)], %g1	! Get scale factor
11613	sethi	%hi(MICROPERSEC), %o5
11614	brnz,pt	%g1, 1f						! Already scaled?
11615	 or	%o5, %lo(MICROPERSEC), %o5
11616
11617	!! Calculate ticks/usec
11618	ldx	[%o1 + %lo(_C_LABEL(cpu_clockrate))], %g1	! No, we need to calculate it
11619	udivx	%g1, %o5, %g1					! Hz / 10^6 = MHz
11620	stx	%g1, [%o1 + %lo(_C_LABEL(cpu_clockrate) + 8)]	! Save it so we don't need to divide again
116211:
11622
11623	STPTR	%o2, [%o0]					! Store seconds.
11624	udivx	%o4, %g1, %o4					! Scale it: ticks / MHz = usec
11625
11626	udivx	%o4, %o5, %o2					! Now %o2 has seconds
11627
11628	mulx	%o2, %o5, %o5					! Now calculate usecs -- damn no remainder insn
11629	sub	%o4, %o5, %o1					! %o1 has the remainder
11630
11631	retl
11632	 STPTR	%o1, [%o0+PTRSZ]				! Save time_t low word
11633#endif
11634
11635/*
11636 * delay function
11637 *
11638 * void delay(N)  -- delay N microseconds
11639 *
11640 * Register usage: %o0 = "N" number of usecs to go (counts down to zero)
11641 *		   %o1 = "timerblurb" (stays constant)
11642 *		   %o2 = counter for 1 usec (counts down from %o1 to zero)
11643 *
11644 *
11645 *	cpu_clockrate should be tuned during CPU probe to the CPU clockrate in Hz
11646 *
11647 */
11648ENTRY(delay)			! %o0 = n
11649#if 1
11650	rdpr	%tick, %o1					! Take timer snapshot
11651	sethi	%hi(_C_LABEL(cpu_clockrate)), %o2
11652	sethi	%hi(MICROPERSEC), %o3
11653	ldx	[%o2 + %lo(_C_LABEL(cpu_clockrate) + 8)], %o4	! Get scale factor
11654	brnz,pt	%o4, 0f
11655	 or	%o3, %lo(MICROPERSEC), %o3
11656
11657	!! Calculate ticks/usec
11658	ldx	[%o2 + %lo(_C_LABEL(cpu_clockrate))], %o4	! No, we need to calculate it
11659	udivx	%o4, %o3, %o4
11660	stx	%o4, [%o2 + %lo(_C_LABEL(cpu_clockrate) + 8)]	! Save it so we don't need to divide again
116610:
11662
11663	mulx	%o0, %o4, %o0					! Convert usec -> ticks
11664	rdpr	%tick, %o2					! Top of next itr
116651:
11666	sub	%o2, %o1, %o3					! How many ticks have gone by?
11667	sub	%o0, %o3, %o4					! Decrement count by that much
11668	movrgz	%o3, %o4, %o0					! But only if we're decrementing
11669	mov	%o2, %o1					! Remember last tick
11670	brgz,pt	%o0, 1b						! Done?
11671	 rdpr	%tick, %o2					! Get new tick
11672
11673	retl
11674	 nop
11675#else
11676/* This code only works if %tick does not wrap */
11677	rdpr	%tick, %g1					! Take timer snapshot
11678	sethi	%hi(_C_LABEL(cpu_clockrate)), %g2
11679	sethi	%hi(MICROPERSEC), %o2
11680	ldx	[%g2 + %lo(_C_LABEL(cpu_clockrate))], %g2	! Get scale factor
11681	or	%o2, %lo(MICROPERSEC), %o2
11682!	sethi	%hi(_C_LABEL(timerblurb), %o5			! This is if we plan to tune the clock
11683!	ld	[%o5 + %lo(_C_LABEL(timerblurb))], %o5		!  with respect to the counter/timer
11684	mulx	%o0, %g2, %g2					! Scale it: (usec * Hz) / 1 x 10^6 = ticks
11685	udivx	%g2, %o2, %g2
11686	add	%g1, %g2, %g2
11687!	add	%o5, %g2, %g2			5, %g2, %g2					! But this gets complicated
11688	rdpr	%tick, %g1					! Top of next itr
11689	mov	%g1, %g1	! Erratum 50
116901:
11691	cmp	%g1, %g2
11692	bl,a,pn %xcc, 1b					! Done?
11693	 rdpr	%tick, %g1
11694
11695	retl
11696	 nop
11697#endif
11698	/*
11699	 * If something's wrong with the standard setup do this stupid loop
11700	 * calibrated for a 143MHz processor.
11701	 */
11702Lstupid_delay:
11703	set	142857143/MICROPERSEC, %o1
11704Lstupid_loop:
11705	brnz,pt	%o1, Lstupid_loop
11706	 dec	%o1
11707	brnz,pt	%o0, Lstupid_delay
11708	 dec	%o0
11709	retl
11710	 nop
11711
11712/*
11713 * next_tick(long increment)
11714 *
11715 * Sets the %tick_cmpr register to fire off in `increment' machine
11716 * cycles in the future.  Also handles %tick wraparound.  In 32-bit
11717 * mode we're limited to a 32-bit increment.
11718 */
11719	.data
11720	.align	8
11721tlimit:
11722	.xword	0
11723	.text
11724ENTRY(next_tick)
11725	rd	TICK_CMPR, %o2
11726	rdpr	%tick, %o1
11727
11728	mov	1, %o3		! Mask off high bits of these registers
11729	sllx	%o3, 63, %o3
11730	andn	%o1, %o3, %o1
11731	andn	%o2, %o3, %o2
11732	cmp	%o1, %o2	! Did we wrap?  (tick < tick_cmpr)
11733	bgt,pt	%icc, 1f
11734	 add	%o1, 1000, %o1	! Need some slack so we don't lose intrs.
11735
11736	/*
11737	 * Handle the unlikely case of %tick wrapping.
11738	 *
11739	 * This should only happen every 10 years or more.
11740	 *
11741	 * We need to increment the time base by the size of %tick in
11742	 * microseconds.  This will require some divides and multiplies
11743	 * which can take time.  So we re-read %tick.
11744	 *
11745	 */
11746
11747	/* XXXXX NOT IMPLEMENTED */
11748
11749
11750
117511:
11752	add	%o2, %o0, %o2
11753	andn	%o2, %o3, %o4
11754	brlz,pn	%o4, Ltick_ovflw
11755	 cmp	%o2, %o1	! Has this tick passed?
11756	blt,pn	%xcc, 1b	! Yes
11757	 nop
11758
11759	retl
11760	 wr	%o2, TICK_CMPR
11761
11762Ltick_ovflw:
11763/*
11764 * When we get here tick_cmpr has wrapped, but we don't know if %tick
11765 * has wrapped.  If bit 62 is set then we have not wrapped and we can
11766 * use the current value of %o4 as %tick.  Otherwise we need to return
11767 * to our loop with %o4 as %tick_cmpr (%o2).
11768 */
11769	srlx	%o3, 1, %o5
11770	btst	%o5, %o1
11771	bz,pn	%xcc, 1b
11772	 mov	%o4, %o2
11773	retl
11774	 wr	%o2, TICK_CMPR
11775
11776
11777ENTRY(setjmp)
11778	save	%sp, -CC64FSZ, %sp	! Need a frame to return to.
11779	flushw
11780	stx	%fp, [%i0+0]	! 64-bit stack pointer
11781	stx	%i7, [%i0+8]	! 64-bit return pc
11782	ret
11783	 restore	%g0, 0, %o0
11784
11785	.data
11786Lpanic_ljmp:
11787	.asciz	"longjmp botch"
11788	_ALIGN
11789	.text
11790
11791ENTRY(longjmp)
11792	save	%sp, -CC64FSZ, %sp	! prepare to restore to (old) frame
11793	flushw
11794	mov	1, %i2
11795	ldx	[%i0+0], %fp	! get return stack
11796	movrz	%i1, %i1, %i2	! compute v ? v : 1
11797	ldx	[%i0+8], %i7	! get rpc
11798	ret
11799	 restore	%i2, 0, %o0
11800
11801#ifdef DDB
11802	/*
11803	 * Debug stuff.  Dump the trap registers into buffer & set tl=0.
11804	 *
11805	 *  %o0 = *ts
11806	 */
11807ENTRY(savetstate)
11808	mov	%o0, %o1
11809	CHKPT(%o4,%o3,0x28)
11810	rdpr	%tl, %o0
11811	brz	%o0, 2f
11812	 mov	%o0, %o2
118131:
11814	rdpr	%tstate, %o3
11815	stx	%o3, [%o1]
11816	deccc	%o2
11817	inc	8, %o1
11818	rdpr	%tpc, %o4
11819	stx	%o4, [%o1]
11820	inc	8, %o1
11821	rdpr	%tnpc, %o5
11822	stx	%o5, [%o1]
11823	inc	8, %o1
11824	rdpr	%tt, %o4
11825	stx	%o4, [%o1]
11826	inc	8, %o1
11827	bnz	1b
11828	 wrpr	%o2, 0, %tl
118292:
11830	retl
11831	 nop
11832
11833	/*
11834	 * Debug stuff.  Resore trap registers from buffer.
11835	 *
11836	 *  %o0 = %tl
11837	 *  %o1 = *ts
11838	 *
11839	 * Maybe this should be re-written to increment tl instead of decrementing.
11840	 */
11841ENTRY(restoretstate)
11842	CHKPT(%o4,%o3,0x36)
11843	flushw			! Make sure we don't have stack probs & lose hibits of %o
11844	brz,pn	%o0, 2f
11845	 mov	%o0, %o2
11846	CHKPT(%o4,%o3,0x29)
11847	wrpr	%o0, 0, %tl
118481:
11849	ldx	[%o1], %o3
11850	deccc	%o2
11851	inc	8, %o1
11852	wrpr	%o3, 0, %tstate
11853	ldx	[%o1], %o4
11854	inc	8, %o1
11855	wrpr	%o4, 0, %tpc
11856	ldx	[%o1], %o5
11857	inc	8, %o1
11858	wrpr	%o5, 0, %tnpc
11859	ldx	[%o1], %o4
11860	inc	8, %o1
11861	wrpr	%o4, 0, %tt
11862	bnz	1b
11863	 wrpr	%o2, 0, %tl
118642:
11865	CHKPT(%o4,%o3,0x30)
11866	retl
11867	 wrpr	%o0, 0, %tl
11868
11869	/*
11870	 * Switch to context in %o0
11871	 */
11872ENTRY(switchtoctx)
11873#ifdef SPITFIRE
11874	set	DEMAP_CTX_SECONDARY, %o3
11875	stxa	%o3, [%o3] ASI_DMMU_DEMAP
11876	membar	#Sync
11877	mov	CTX_SECONDARY, %o4
11878	stxa	%o3, [%o3] ASI_IMMU_DEMAP
11879	membar	#Sync
11880	stxa	%o0, [%o4] ASI_DMMU		! Maybe we should invali
11881	membar	#Sync				! No real reason for this XXXX
11882	sethi	%hi(KERNBASE), %o2
11883	flush	%o2
11884	retl
11885	 nop
11886#else
11887	/* UNIMPLEMENTED */
11888	retl
11889	 nop
11890#endif
11891
11892#ifndef _LP64
11893	/*
11894	 * Convert to 32-bit stack then call OF_sym2val()
11895	 */
11896ENTRY(OF_sym2val32)
11897	save	%sp, -CC64FSZ, %sp
11898	btst	7, %i0
11899	bnz,pn	%icc, 1f
11900	 add	%sp, BIAS, %o1
11901	btst	1, %sp
11902	movnz	%icc, %o1, %sp
11903	call	_C_LABEL(OF_sym2val)
11904	 mov	%i0, %o0
119051:
11906	ret
11907	 restore	%o0, 0, %o0
11908
11909	/*
11910	 * Convert to 32-bit stack then call OF_val2sym()
11911	 */
11912ENTRY(OF_val2sym32)
11913	save	%sp, -CC64FSZ, %sp
11914	btst	7, %i0
11915	bnz,pn	%icc, 1f
11916	 add	%sp, BIAS, %o1
11917	btst	1, %sp
11918	movnz	%icc, %o1, %sp
11919	call	_C_LABEL(OF_val2sym)
11920	 mov	%i0, %o0
119211:
11922	ret
11923	 restore	%o0, 0, %o0
11924#endif /* _LP64 */
11925#endif /* DDB */
11926
11927	.data
11928	_ALIGN
11929#ifdef DDB
11930	.globl	_C_LABEL(esym)
11931_C_LABEL(esym):
11932	POINTER	0
11933	.globl	_C_LABEL(ssym)
11934_C_LABEL(ssym):
11935	POINTER	0
11936#endif
11937	.globl	_C_LABEL(proc0paddr)
11938_C_LABEL(proc0paddr):
11939	POINTER	_C_LABEL(u0)		! KVA of proc0 uarea
11940
11941/* interrupt counters	XXX THESE BELONG ELSEWHERE (if anywhere) */
11942	.globl	_C_LABEL(intrcnt), _C_LABEL(eintrcnt), _C_LABEL(intrnames), _C_LABEL(eintrnames)
11943_C_LABEL(intrnames):
11944	.asciz	"spur"
11945	.asciz	"lev1"
11946	.asciz	"lev2"
11947	.asciz	"lev3"
11948	.asciz	"lev4"
11949	.asciz	"lev5"
11950	.asciz	"lev6"
11951	.asciz	"lev7"
11952	.asciz  "lev8"
11953	.asciz	"lev9"
11954	.asciz	"clock"
11955	.asciz	"lev11"
11956	.asciz	"lev12"
11957	.asciz	"lev13"
11958	.asciz	"prof"
11959	.asciz  "lev15"
11960_C_LABEL(eintrnames):
11961	_ALIGN
11962_C_LABEL(intrcnt):
11963	.space	16 * LNGSZ
11964_C_LABEL(eintrcnt):
11965
11966	.comm	_C_LABEL(curproc), PTRSZ
11967	.comm	_C_LABEL(promvec), PTRSZ
11968	.comm	_C_LABEL(nwindows), 4
11969
11970#ifdef DEBUG
11971	.comm	_C_LABEL(trapdebug), 4
11972	.comm	_C_LABEL(pmapdebug), 4
11973#endif
11974