1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe/*
22*5d9d9091SRichard Lowe * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23*5d9d9091SRichard Lowe * Use is subject to license terms.
24*5d9d9091SRichard Lowe */
25*5d9d9091SRichard Lowe
26*5d9d9091SRichard Lowe# ident	"%Z%%M%	%I%	%E% SMI"
27*5d9d9091SRichard Lowe
28*5d9d9091SRichard Lowe#include <sys/param.h>
29*5d9d9091SRichard Lowe#include <sys/errno.h>
30*5d9d9091SRichard Lowe#include <sys/asm_linkage.h>
31*5d9d9091SRichard Lowe#include <sys/vtrace.h>
32*5d9d9091SRichard Lowe#include <sys/machthread.h>
33*5d9d9091SRichard Lowe#include <sys/clock.h>
34*5d9d9091SRichard Lowe#include <sys/asi.h>
35*5d9d9091SRichard Lowe#include <sys/fsr.h>
36*5d9d9091SRichard Lowe#include <sys/privregs.h>
37*5d9d9091SRichard Lowe
38*5d9d9091SRichard Lowe#include "assym.h"
39*5d9d9091SRichard Lowe
40*5d9d9091SRichard Lowe
41*5d9d9091SRichard Lowe/*
42*5d9d9091SRichard Lowe * Less then or equal this number of bytes we will always copy byte-for-byte
43*5d9d9091SRichard Lowe */
44*5d9d9091SRichard Lowe#define	SMALL_LIMIT	7
45*5d9d9091SRichard Lowe
46*5d9d9091SRichard Lowe/*
47*5d9d9091SRichard Lowe * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault
48*5d9d9091SRichard Lowe * handler was set
49*5d9d9091SRichard Lowe */
50*5d9d9091SRichard Lowe#define	LOFAULT_SET 2
51*5d9d9091SRichard Lowe
52*5d9d9091SRichard Lowe
53*5d9d9091SRichard Lowe/*
54*5d9d9091SRichard Lowe * Copy a block of storage, returning an error code if `from' or
55*5d9d9091SRichard Lowe * `to' takes a kernel pagefault which cannot be resolved.
56*5d9d9091SRichard Lowe * Returns errno value on pagefault error, 0 if all ok
57*5d9d9091SRichard Lowe */
58*5d9d9091SRichard Lowe
59*5d9d9091SRichard Lowe
60*5d9d9091SRichard Lowe
61*5d9d9091SRichard Lowe	.seg	".text"
62*5d9d9091SRichard Lowe	.align	4
63*5d9d9091SRichard Lowe
64*5d9d9091SRichard Lowe	ENTRY(kcopy)
65*5d9d9091SRichard Lowe
66*5d9d9091SRichard Lowe	save	%sp, -SA(MINFRAME), %sp
67*5d9d9091SRichard Lowe	set	.copyerr, %l7			! copyerr is lofault value
68*5d9d9091SRichard Lowe	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
69*5d9d9091SRichard Lowe	or	%o5, LOFAULT_SET, %o5
70*5d9d9091SRichard Lowe	membar	#Sync				! sync error barrier
71*5d9d9091SRichard Lowe	b	.do_copy			! common code
72*5d9d9091SRichard Lowe	stn	%l7, [THREAD_REG + T_LOFAULT]	! set t_lofault
73*5d9d9091SRichard Lowe
74*5d9d9091SRichard Lowe/*
75*5d9d9091SRichard Lowe * We got here because of a fault during kcopy.
76*5d9d9091SRichard Lowe * Errno value is in %g1.
77*5d9d9091SRichard Lowe */
78*5d9d9091SRichard Lowe.copyerr:
79*5d9d9091SRichard Lowe	! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET
80*5d9d9091SRichard Lowe	! into %o5 to indicate it has set t_lofault handler. Need to clear
81*5d9d9091SRichard Lowe	! LOFAULT_SET flag before restoring the error handler.
82*5d9d9091SRichard Lowe	andn	%o5, LOFAULT_SET, %o5
83*5d9d9091SRichard Lowe	membar	#Sync			! sync error barrier
84*5d9d9091SRichard Lowe	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
85*5d9d9091SRichard Lowe	ret
86*5d9d9091SRichard Lowe	restore	%g1, 0, %o0
87*5d9d9091SRichard Lowe
88*5d9d9091SRichard Lowe	SET_SIZE(kcopy)
89*5d9d9091SRichard Lowe
90*5d9d9091SRichard Lowe
91*5d9d9091SRichard Lowe/*
92*5d9d9091SRichard Lowe * Copy a block of storage - must not overlap (from + len <= to).
93*5d9d9091SRichard Lowe */
94*5d9d9091SRichard Lowe
95*5d9d9091SRichard Lowe	ENTRY(bcopy)
96*5d9d9091SRichard Lowe
97*5d9d9091SRichard Lowe	save	%sp, -SA(MINFRAME), %sp
98*5d9d9091SRichard Lowe	clr	%o5			! flag LOFAULT_SET is not set for bcopy
99*5d9d9091SRichard Lowe
100*5d9d9091SRichard Lowe.do_copy:
101*5d9d9091SRichard Lowe        mov     %i1, %g5                ! save dest addr start
102*5d9d9091SRichard Lowe
103*5d9d9091SRichard Lowe        mov     %i2, %l6                ! save size
104*5d9d9091SRichard Lowe
105*5d9d9091SRichard Lowe	cmp	%i2, 12			! for small counts
106*5d9d9091SRichard Lowe	blu	%ncc, .bytecp		! just copy bytes
107*5d9d9091SRichard Lowe	  .empty
108*5d9d9091SRichard Lowe
109*5d9d9091SRichard Lowe	!
110*5d9d9091SRichard Lowe	! use aligned transfers where possible
111*5d9d9091SRichard Lowe	!
112*5d9d9091SRichard Lowe	xor	%i0, %i1, %o4		! xor from and to address
113*5d9d9091SRichard Lowe	btst	7, %o4			! if lower three bits zero
114*5d9d9091SRichard Lowe	bz	.aldoubcp		! can align on double boundary
115*5d9d9091SRichard Lowe	.empty	! assembler complaints about label
116*5d9d9091SRichard Lowe
117*5d9d9091SRichard Lowe	xor	%i0, %i1, %o4		! xor from and to address
118*5d9d9091SRichard Lowe	btst	3, %o4			! if lower two bits zero
119*5d9d9091SRichard Lowe	bz	.alwordcp		! can align on word boundary
120*5d9d9091SRichard Lowe	btst	3, %i0			! delay slot, from address unaligned?
121*5d9d9091SRichard Lowe	!
122*5d9d9091SRichard Lowe	! use aligned reads and writes where possible
123*5d9d9091SRichard Lowe	! this differs from wordcp in that it copes
124*5d9d9091SRichard Lowe	! with odd alignment between source and destnation
125*5d9d9091SRichard Lowe	! using word reads and writes with the proper shifts
126*5d9d9091SRichard Lowe	! in between to align transfers to and from memory
127*5d9d9091SRichard Lowe	! i0 - src address, i1 - dest address, i2 - count
128*5d9d9091SRichard Lowe	! i3, i4 - tmps for used generating complete word
129*5d9d9091SRichard Lowe	! i5 (word to write)
130*5d9d9091SRichard Lowe	! l0 size in bits of upper part of source word (US)
131*5d9d9091SRichard Lowe	! l1 size in bits of lower part of source word (LS = 32 - US)
132*5d9d9091SRichard Lowe	! l2 size in bits of upper part of destination word (UD)
133*5d9d9091SRichard Lowe	! l3 size in bits of lower part of destination word (LD = 32 - UD)
134*5d9d9091SRichard Lowe	! l4 number of bytes leftover after aligned transfers complete
135*5d9d9091SRichard Lowe	! l5 the number 32
136*5d9d9091SRichard Lowe	!
137*5d9d9091SRichard Lowe	mov	32, %l5			! load an oft-needed constant
138*5d9d9091SRichard Lowe	bz	.align_dst_only
139*5d9d9091SRichard Lowe	btst	3, %i1			! is destnation address aligned?
140*5d9d9091SRichard Lowe	clr	%i4			! clear registers used in either case
141*5d9d9091SRichard Lowe	bz	.align_src_only
142*5d9d9091SRichard Lowe	clr	%l0
143*5d9d9091SRichard Lowe	!
144*5d9d9091SRichard Lowe	! both source and destination addresses are unaligned
145*5d9d9091SRichard Lowe	!
146*5d9d9091SRichard Lowe1:					! align source
147*5d9d9091SRichard Lowe	ldub	[%i0], %i3		! read a byte from source address
148*5d9d9091SRichard Lowe	add	%i0, 1, %i0		! increment source address
149*5d9d9091SRichard Lowe	or	%i4, %i3, %i4		! or in with previous bytes (if any)
150*5d9d9091SRichard Lowe	btst	3, %i0			! is source aligned?
151*5d9d9091SRichard Lowe	add	%l0, 8, %l0		! increment size of upper source (US)
152*5d9d9091SRichard Lowe	bnz,a	1b
153*5d9d9091SRichard Lowe	sll	%i4, 8, %i4		! make room for next byte
154*5d9d9091SRichard Lowe
155*5d9d9091SRichard Lowe	sub	%l5, %l0, %l1		! generate shift left count (LS)
156*5d9d9091SRichard Lowe	sll	%i4, %l1, %i4		! prepare to get rest
157*5d9d9091SRichard Lowe	ld	[%i0], %i3		! read a word
158*5d9d9091SRichard Lowe	add	%i0, 4, %i0		! increment source address
159*5d9d9091SRichard Lowe	srl	%i3, %l0, %i5		! upper src bits into lower dst bits
160*5d9d9091SRichard Lowe	or	%i4, %i5, %i5		! merge
161*5d9d9091SRichard Lowe	mov	24, %l3			! align destination
162*5d9d9091SRichard Lowe1:
163*5d9d9091SRichard Lowe	srl	%i5, %l3, %i4		! prepare to write a single byte
164*5d9d9091SRichard Lowe	stb	%i4, [%i1]		! write a byte
165*5d9d9091SRichard Lowe	add	%i1, 1, %i1		! increment destination address
166*5d9d9091SRichard Lowe	sub	%i2, 1, %i2		! decrement count
167*5d9d9091SRichard Lowe	btst	3, %i1			! is destination aligned?
168*5d9d9091SRichard Lowe	bnz,a	1b
169*5d9d9091SRichard Lowe	sub	%l3, 8, %l3		! delay slot, decrement shift count (LD)
170*5d9d9091SRichard Lowe	sub	%l5, %l3, %l2		! generate shift left count (UD)
171*5d9d9091SRichard Lowe	sll	%i5, %l2, %i5		! move leftover into upper bytes
172*5d9d9091SRichard Lowe	cmp	%l2, %l0		! cmp # reqd to fill dst w old src left
173*5d9d9091SRichard Lowe	bgu	%ncc, .more_needed	! need more to fill than we have
174*5d9d9091SRichard Lowe	nop
175*5d9d9091SRichard Lowe
176*5d9d9091SRichard Lowe	sll	%i3, %l1, %i3		! clear upper used byte(s)
177*5d9d9091SRichard Lowe	srl	%i3, %l1, %i3
178*5d9d9091SRichard Lowe	! get the odd bytes between alignments
179*5d9d9091SRichard Lowe	sub	%l0, %l2, %l0		! regenerate shift count
180*5d9d9091SRichard Lowe	sub	%l5, %l0, %l1		! generate new shift left count (LS)
181*5d9d9091SRichard Lowe	and	%i2, 3, %l4		! must do remaining bytes if count%4 > 0
182*5d9d9091SRichard Lowe	andn	%i2, 3, %i2		! # of aligned bytes that can be moved
183*5d9d9091SRichard Lowe	srl	%i3, %l0, %i4
184*5d9d9091SRichard Lowe	or	%i5, %i4, %i5
185*5d9d9091SRichard Lowe	st	%i5, [%i1]		! write a word
186*5d9d9091SRichard Lowe	subcc	%i2, 4, %i2		! decrement count
187*5d9d9091SRichard Lowe	bz	%ncc, .unalign_out
188*5d9d9091SRichard Lowe	add	%i1, 4, %i1		! increment destination address
189*5d9d9091SRichard Lowe
190*5d9d9091SRichard Lowe	b	2f
191*5d9d9091SRichard Lowe	sll	%i3, %l1, %i5		! get leftover into upper bits
192*5d9d9091SRichard Lowe.more_needed:
193*5d9d9091SRichard Lowe	sll	%i3, %l0, %i3		! save remaining byte(s)
194*5d9d9091SRichard Lowe	srl	%i3, %l0, %i3
195*5d9d9091SRichard Lowe	sub	%l2, %l0, %l1		! regenerate shift count
196*5d9d9091SRichard Lowe	sub	%l5, %l1, %l0		! generate new shift left count
197*5d9d9091SRichard Lowe	sll	%i3, %l1, %i4		! move to fill empty space
198*5d9d9091SRichard Lowe	b	3f
199*5d9d9091SRichard Lowe	or	%i5, %i4, %i5		! merge to complete word
200*5d9d9091SRichard Lowe	!
201*5d9d9091SRichard Lowe	! the source address is aligned and destination is not
202*5d9d9091SRichard Lowe	!
203*5d9d9091SRichard Lowe.align_dst_only:
204*5d9d9091SRichard Lowe	ld	[%i0], %i4		! read a word
205*5d9d9091SRichard Lowe	add	%i0, 4, %i0		! increment source address
206*5d9d9091SRichard Lowe	mov	24, %l0			! initial shift alignment count
207*5d9d9091SRichard Lowe1:
208*5d9d9091SRichard Lowe	srl	%i4, %l0, %i3		! prepare to write a single byte
209*5d9d9091SRichard Lowe	stb	%i3, [%i1]		! write a byte
210*5d9d9091SRichard Lowe	add	%i1, 1, %i1		! increment destination address
211*5d9d9091SRichard Lowe	sub	%i2, 1, %i2		! decrement count
212*5d9d9091SRichard Lowe	btst	3, %i1			! is destination aligned?
213*5d9d9091SRichard Lowe	bnz,a	1b
214*5d9d9091SRichard Lowe	sub	%l0, 8, %l0		! delay slot, decrement shift count
215*5d9d9091SRichard Lowe.xfer:
216*5d9d9091SRichard Lowe	sub	%l5, %l0, %l1		! generate shift left count
217*5d9d9091SRichard Lowe	sll	%i4, %l1, %i5		! get leftover
218*5d9d9091SRichard Lowe3:
219*5d9d9091SRichard Lowe	and	%i2, 3, %l4		! must do remaining bytes if count%4 > 0
220*5d9d9091SRichard Lowe	andn	%i2, 3, %i2		! # of aligned bytes that can be moved
221*5d9d9091SRichard Lowe2:
222*5d9d9091SRichard Lowe	ld	[%i0], %i3		! read a source word
223*5d9d9091SRichard Lowe	add	%i0, 4, %i0		! increment source address
224*5d9d9091SRichard Lowe	srl	%i3, %l0, %i4		! upper src bits into lower dst bits
225*5d9d9091SRichard Lowe	or	%i5, %i4, %i5		! merge with upper dest bits (leftover)
226*5d9d9091SRichard Lowe	st	%i5, [%i1]		! write a destination word
227*5d9d9091SRichard Lowe	subcc	%i2, 4, %i2		! decrement count
228*5d9d9091SRichard Lowe	bz	%ncc, .unalign_out	! check if done
229*5d9d9091SRichard Lowe	add	%i1, 4, %i1		! increment destination address
230*5d9d9091SRichard Lowe	b	2b			! loop
231*5d9d9091SRichard Lowe	sll	%i3, %l1, %i5		! get leftover
232*5d9d9091SRichard Lowe.unalign_out:
233*5d9d9091SRichard Lowe	tst	%l4			! any bytes leftover?
234*5d9d9091SRichard Lowe	bz	%ncc, .cpdone
235*5d9d9091SRichard Lowe	.empty				! allow next instruction in delay slot
236*5d9d9091SRichard Lowe1:
237*5d9d9091SRichard Lowe	sub	%l0, 8, %l0		! decrement shift
238*5d9d9091SRichard Lowe	srl	%i3, %l0, %i4		! upper src byte into lower dst byte
239*5d9d9091SRichard Lowe	stb	%i4, [%i1]		! write a byte
240*5d9d9091SRichard Lowe	subcc	%l4, 1, %l4		! decrement count
241*5d9d9091SRichard Lowe	bz	%ncc, .cpdone		! done?
242*5d9d9091SRichard Lowe	add	%i1, 1, %i1		! increment destination
243*5d9d9091SRichard Lowe	tst	%l0			! any more previously read bytes
244*5d9d9091SRichard Lowe	bnz	%ncc, 1b		! we have leftover bytes
245*5d9d9091SRichard Lowe	mov	%l4, %i2		! delay slot, mv cnt where dbytecp wants
246*5d9d9091SRichard Lowe	b	.dbytecp		! let dbytecp do the rest
247*5d9d9091SRichard Lowe	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
248*5d9d9091SRichard Lowe	!
249*5d9d9091SRichard Lowe	! the destination address is aligned and the source is not
250*5d9d9091SRichard Lowe	!
251*5d9d9091SRichard Lowe.align_src_only:
252*5d9d9091SRichard Lowe	ldub	[%i0], %i3		! read a byte from source address
253*5d9d9091SRichard Lowe	add	%i0, 1, %i0		! increment source address
254*5d9d9091SRichard Lowe	or	%i4, %i3, %i4		! or in with previous bytes (if any)
255*5d9d9091SRichard Lowe	btst	3, %i0			! is source aligned?
256*5d9d9091SRichard Lowe	add	%l0, 8, %l0		! increment shift count (US)
257*5d9d9091SRichard Lowe	bnz,a	.align_src_only
258*5d9d9091SRichard Lowe	sll	%i4, 8, %i4		! make room for next byte
259*5d9d9091SRichard Lowe	b,a	.xfer
260*5d9d9091SRichard Lowe	!
261*5d9d9091SRichard Lowe	! if from address unaligned for double-word moves,
262*5d9d9091SRichard Lowe	! move bytes till it is, if count is < 56 it could take
263*5d9d9091SRichard Lowe	! longer to align the thing than to do the transfer
264*5d9d9091SRichard Lowe	! in word size chunks right away
265*5d9d9091SRichard Lowe	!
266*5d9d9091SRichard Lowe.aldoubcp:
267*5d9d9091SRichard Lowe	cmp	%i2, 56			! if count < 56, use wordcp, it takes
268*5d9d9091SRichard Lowe	blu,a	%ncc, .alwordcp		! longer to align doubles than words
269*5d9d9091SRichard Lowe	mov	3, %o0			! mask for word alignment
270*5d9d9091SRichard Lowe	call	.alignit		! copy bytes until aligned
271*5d9d9091SRichard Lowe	mov	7, %o0			! mask for double alignment
272*5d9d9091SRichard Lowe	!
273*5d9d9091SRichard Lowe	! source and destination are now double-word aligned
274*5d9d9091SRichard Lowe	! i3 has aligned count returned by alignit
275*5d9d9091SRichard Lowe	!
276*5d9d9091SRichard Lowe	and	%i2, 7, %i2		! unaligned leftover count
277*5d9d9091SRichard Lowe	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
278*5d9d9091SRichard Lowe5:
279*5d9d9091SRichard Lowe	ldx	[%i0+%i1], %o4		! read from address
280*5d9d9091SRichard Lowe	stx	%o4, [%i1]		! write at destination address
281*5d9d9091SRichard Lowe	subcc	%i3, 8, %i3		! dec count
282*5d9d9091SRichard Lowe	bgu	%ncc, 5b
283*5d9d9091SRichard Lowe	add	%i1, 8, %i1		! delay slot, inc to address
284*5d9d9091SRichard Lowe	cmp	%i2, 4			! see if we can copy a word
285*5d9d9091SRichard Lowe	blu	%ncc, .dbytecp		! if 3 or less bytes use bytecp
286*5d9d9091SRichard Lowe	.empty
287*5d9d9091SRichard Lowe	!
288*5d9d9091SRichard Lowe	! for leftover bytes we fall into wordcp, if needed
289*5d9d9091SRichard Lowe	!
290*5d9d9091SRichard Lowe.wordcp:
291*5d9d9091SRichard Lowe	and	%i2, 3, %i2		! unaligned leftover count
292*5d9d9091SRichard Lowe5:
293*5d9d9091SRichard Lowe	ld	[%i0+%i1], %o4		! read from address
294*5d9d9091SRichard Lowe	st	%o4, [%i1]		! write at destination address
295*5d9d9091SRichard Lowe	subcc	%i3, 4, %i3		! dec count
296*5d9d9091SRichard Lowe	bgu	%ncc, 5b
297*5d9d9091SRichard Lowe	add	%i1, 4, %i1		! delay slot, inc to address
298*5d9d9091SRichard Lowe	b,a	.dbytecp
299*5d9d9091SRichard Lowe
300*5d9d9091SRichard Lowe	! we come here to align copies on word boundaries
301*5d9d9091SRichard Lowe.alwordcp:
302*5d9d9091SRichard Lowe	call	.alignit		! go word-align it
303*5d9d9091SRichard Lowe	mov	3, %o0			! bits that must be zero to be aligned
304*5d9d9091SRichard Lowe	b	.wordcp
305*5d9d9091SRichard Lowe	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
306*5d9d9091SRichard Lowe
307*5d9d9091SRichard Lowe	!
308*5d9d9091SRichard Lowe	! byte copy, works with any alignment
309*5d9d9091SRichard Lowe	!
310*5d9d9091SRichard Lowe.bytecp:
311*5d9d9091SRichard Lowe	b	.dbytecp
312*5d9d9091SRichard Lowe	sub	%i0, %i1, %i0		! i0 gets difference of src and dst
313*5d9d9091SRichard Lowe
314*5d9d9091SRichard Lowe	!
315*5d9d9091SRichard Lowe	! differenced byte copy, works with any alignment
316*5d9d9091SRichard Lowe	! assumes dest in %i1 and (source - dest) in %i0
317*5d9d9091SRichard Lowe	!
318*5d9d9091SRichard Lowe1:
319*5d9d9091SRichard Lowe	stb	%o4, [%i1]		! write to address
320*5d9d9091SRichard Lowe	inc	%i1			! inc to address
321*5d9d9091SRichard Lowe.dbytecp:
322*5d9d9091SRichard Lowe	deccc	%i2			! dec count
323*5d9d9091SRichard Lowe	bgeu,a	%ncc, 1b		! loop till done
324*5d9d9091SRichard Lowe	ldub	[%i0+%i1], %o4		! read from address
325*5d9d9091SRichard Lowe.cpdone:
326*5d9d9091SRichard Lowe	membar	#Sync				! sync error barrier
327*5d9d9091SRichard Lowe	! Restore t_lofault handler, if came here from kcopy().
328*5d9d9091SRichard Lowe	tst	%o5
329*5d9d9091SRichard Lowe	bz	%ncc, 1f
330*5d9d9091SRichard Lowe	andn	%o5, LOFAULT_SET, %o5
331*5d9d9091SRichard Lowe	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
332*5d9d9091SRichard Lowe1:
333*5d9d9091SRichard Lowe        mov     %g5, %o0                ! copy dest address
334*5d9d9091SRichard Lowe        call    sync_icache
335*5d9d9091SRichard Lowe        mov     %l6, %o1                ! saved size
336*5d9d9091SRichard Lowe	ret
337*5d9d9091SRichard Lowe	restore %g0, 0, %o0		! return (0)
338*5d9d9091SRichard Lowe
339*5d9d9091SRichard Lowe/*
340*5d9d9091SRichard Lowe * Common code used to align transfers on word and doubleword
341*5d9d9091SRichard Lowe * boudaries.  Aligns source and destination and returns a count
342*5d9d9091SRichard Lowe * of aligned bytes to transfer in %i3
343*5d9d9091SRichard Lowe */
344*5d9d9091SRichard Lowe1:
345*5d9d9091SRichard Lowe	inc	%i0			! inc from
346*5d9d9091SRichard Lowe	stb	%o4, [%i1]		! write a byte
347*5d9d9091SRichard Lowe	inc	%i1			! inc to
348*5d9d9091SRichard Lowe	dec	%i2			! dec count
349*5d9d9091SRichard Lowe.alignit:
350*5d9d9091SRichard Lowe	btst	%o0, %i0		! %o0 is bit mask to check for alignment
351*5d9d9091SRichard Lowe	bnz,a	1b
352*5d9d9091SRichard Lowe	ldub	[%i0], %o4		! read next byte
353*5d9d9091SRichard Lowe
354*5d9d9091SRichard Lowe	retl
355*5d9d9091SRichard Lowe	andn	%i2, %o0, %i3		! return size of aligned bytes
356*5d9d9091SRichard Lowe	SET_SIZE(bcopy)
357*5d9d9091SRichard Lowe
358*5d9d9091SRichard Lowe/*
359*5d9d9091SRichard Lowe * Block copy with possibly overlapped operands.
360*5d9d9091SRichard Lowe */
361*5d9d9091SRichard Lowe
362*5d9d9091SRichard Lowe	ENTRY(ovbcopy)
363*5d9d9091SRichard Lowe	tst	%o2			! check count
364*5d9d9091SRichard Lowe	bgu,a	%ncc, 1f		! nothing to do or bad arguments
365*5d9d9091SRichard Lowe	subcc	%o0, %o1, %o3		! difference of from and to address
366*5d9d9091SRichard Lowe
367*5d9d9091SRichard Lowe	retl				! return
368*5d9d9091SRichard Lowe	nop
369*5d9d9091SRichard Lowe1:
370*5d9d9091SRichard Lowe	bneg,a	%ncc, 2f
371*5d9d9091SRichard Lowe	neg	%o3			! if < 0, make it positive
372*5d9d9091SRichard Lowe2:	cmp	%o2, %o3		! cmp size and abs(from - to)
373*5d9d9091SRichard Lowe	bleu	%ncc, bcopy		! if size <= abs(diff): use bcopy,
374*5d9d9091SRichard Lowe	.empty				!   no overlap
375*5d9d9091SRichard Lowe	cmp	%o0, %o1		! compare from and to addresses
376*5d9d9091SRichard Lowe	blu	%ncc, .ov_bkwd		! if from < to, copy backwards
377*5d9d9091SRichard Lowe	nop
378*5d9d9091SRichard Lowe	!
379*5d9d9091SRichard Lowe	! Copy forwards.
380*5d9d9091SRichard Lowe	!
381*5d9d9091SRichard Lowe.ov_fwd:
382*5d9d9091SRichard Lowe	ldub	[%o0], %o3		! read from address
383*5d9d9091SRichard Lowe	inc	%o0			! inc from address
384*5d9d9091SRichard Lowe	stb	%o3, [%o1]		! write to address
385*5d9d9091SRichard Lowe	deccc	%o2			! dec count
386*5d9d9091SRichard Lowe	bgu	%ncc, .ov_fwd		! loop till done
387*5d9d9091SRichard Lowe	inc	%o1			! inc to address
388*5d9d9091SRichard Lowe
389*5d9d9091SRichard Lowe	retl				! return
390*5d9d9091SRichard Lowe	nop
391*5d9d9091SRichard Lowe	!
392*5d9d9091SRichard Lowe	! Copy backwards.
393*5d9d9091SRichard Lowe	!
394*5d9d9091SRichard Lowe.ov_bkwd:
395*5d9d9091SRichard Lowe	deccc	%o2			! dec count
396*5d9d9091SRichard Lowe	ldub	[%o0 + %o2], %o3	! get byte at end of src
397*5d9d9091SRichard Lowe	bgu	%ncc, .ov_bkwd		! loop till done
398*5d9d9091SRichard Lowe	stb	%o3, [%o1 + %o2]	! delay slot, store at end of dst
399*5d9d9091SRichard Lowe
400*5d9d9091SRichard Lowe	retl				! return
401*5d9d9091SRichard Lowe	nop
402*5d9d9091SRichard Lowe	SET_SIZE(ovbcopy)
403*5d9d9091SRichard Lowe
404*5d9d9091SRichard Lowe/*
405*5d9d9091SRichard Lowe * hwblkpagecopy()
406*5d9d9091SRichard Lowe *
407*5d9d9091SRichard Lowe * Copies exactly one page.  This routine assumes the caller (ppcopy)
408*5d9d9091SRichard Lowe * has already disabled kernel preemption and has checked
409*5d9d9091SRichard Lowe * use_hw_bcopy.
410*5d9d9091SRichard Lowe */
411*5d9d9091SRichard Lowe	ENTRY(hwblkpagecopy)
412*5d9d9091SRichard Lowe	save	%sp, -SA(MINFRAME), %sp
413*5d9d9091SRichard Lowe
414*5d9d9091SRichard Lowe	! %i0 - source address (arg)
415*5d9d9091SRichard Lowe	! %i1 - destination address (arg)
416*5d9d9091SRichard Lowe	! %i2 - length of region (not arg)
417*5d9d9091SRichard Lowe
418*5d9d9091SRichard Lowe	set	PAGESIZE, %i2
419*5d9d9091SRichard Lowe	mov     %i1,    %o0     ! store destination address for flushing
420*5d9d9091SRichard Lowe
421*5d9d9091SRichard Lowe	/*
422*5d9d9091SRichard Lowe	 * Copying exactly one page and PAGESIZE is in mutliple of 0x80.
423*5d9d9091SRichard Lowe	 */
424*5d9d9091SRichard Lowe1:
425*5d9d9091SRichard Lowe	ldx	[%i0+0x0], %l0
426*5d9d9091SRichard Lowe	ldx	[%i0+0x8], %l1
427*5d9d9091SRichard Lowe	ldx	[%i0+0x10], %l2
428*5d9d9091SRichard Lowe	ldx	[%i0+0x18], %l3
429*5d9d9091SRichard Lowe	ldx	[%i0+0x20], %l4
430*5d9d9091SRichard Lowe	ldx	[%i0+0x28], %l5
431*5d9d9091SRichard Lowe	ldx	[%i0+0x30], %l6
432*5d9d9091SRichard Lowe	ldx	[%i0+0x38], %l7
433*5d9d9091SRichard Lowe	stx	%l0, [%i1+0x0]
434*5d9d9091SRichard Lowe	stx	%l1, [%i1+0x8]
435*5d9d9091SRichard Lowe	stx	%l2, [%i1+0x10]
436*5d9d9091SRichard Lowe	stx	%l3, [%i1+0x18]
437*5d9d9091SRichard Lowe	stx	%l4, [%i1+0x20]
438*5d9d9091SRichard Lowe	stx	%l5, [%i1+0x28]
439*5d9d9091SRichard Lowe	stx	%l6, [%i1+0x30]
440*5d9d9091SRichard Lowe	stx	%l7, [%i1+0x38]
441*5d9d9091SRichard Lowe
442*5d9d9091SRichard Lowe	ldx	[%i0+0x40], %l0
443*5d9d9091SRichard Lowe	ldx	[%i0+0x48], %l1
444*5d9d9091SRichard Lowe	ldx	[%i0+0x50], %l2
445*5d9d9091SRichard Lowe	ldx	[%i0+0x58], %l3
446*5d9d9091SRichard Lowe	ldx	[%i0+0x60], %l4
447*5d9d9091SRichard Lowe	ldx	[%i0+0x68], %l5
448*5d9d9091SRichard Lowe	ldx	[%i0+0x70], %l6
449*5d9d9091SRichard Lowe	ldx	[%i0+0x78], %l7
450*5d9d9091SRichard Lowe	stx	%l0, [%i1+0x40]
451*5d9d9091SRichard Lowe	stx	%l1, [%i1+0x48]
452*5d9d9091SRichard Lowe	stx	%l2, [%i1+0x50]
453*5d9d9091SRichard Lowe	stx	%l3, [%i1+0x58]
454*5d9d9091SRichard Lowe	stx	%l4, [%i1+0x60]
455*5d9d9091SRichard Lowe	stx	%l5, [%i1+0x68]
456*5d9d9091SRichard Lowe	stx	%l6, [%i1+0x70]
457*5d9d9091SRichard Lowe	stx	%l7, [%i1+0x78]
458*5d9d9091SRichard Lowe
459*5d9d9091SRichard Lowe	add	%i0, 0x80, %i0
460*5d9d9091SRichard Lowe	subcc	%i2, 0x80, %i2
461*5d9d9091SRichard Lowe	bgu,pt	%xcc, 1b
462*5d9d9091SRichard Lowe	add	%i1, 0x80, %i1
463*5d9d9091SRichard Lowe
464*5d9d9091SRichard Lowe	! %o0 contains the dest. address
465*5d9d9091SRichard Lowe	set	PAGESIZE, %o1
466*5d9d9091SRichard Lowe	call	sync_icache
467*5d9d9091SRichard Lowe	nop
468*5d9d9091SRichard Lowe
469*5d9d9091SRichard Lowe	membar #Sync
470*5d9d9091SRichard Lowe	ret
471*5d9d9091SRichard Lowe	restore	%g0, 0, %o0
472*5d9d9091SRichard Lowe	SET_SIZE(hwblkpagecopy)
473*5d9d9091SRichard Lowe
474*5d9d9091SRichard Lowe
475*5d9d9091SRichard Lowe/*
476*5d9d9091SRichard Lowe * Transfer data to and from user space -
477*5d9d9091SRichard Lowe * Note that these routines can cause faults
478*5d9d9091SRichard Lowe * It is assumed that the kernel has nothing at
479*5d9d9091SRichard Lowe * less than KERNELBASE in the virtual address space.
480*5d9d9091SRichard Lowe *
481*5d9d9091SRichard Lowe * Note that copyin(9F) and copyout(9F) are part of the
482*5d9d9091SRichard Lowe * DDI/DKI which specifies that they return '-1' on "errors."
483*5d9d9091SRichard Lowe *
484*5d9d9091SRichard Lowe * Sigh.
485*5d9d9091SRichard Lowe *
486*5d9d9091SRichard Lowe * So there's two extremely similar routines - xcopyin() and xcopyout()
487*5d9d9091SRichard Lowe * which return the errno that we've faithfully computed.  This
488*5d9d9091SRichard Lowe * allows other callers (e.g. uiomove(9F)) to work correctly.
489*5d9d9091SRichard Lowe * Given that these are used pretty heavily, we expand the calling
490*5d9d9091SRichard Lowe * sequences inline for all flavours (rather than making wrappers).
491*5d9d9091SRichard Lowe *
492*5d9d9091SRichard Lowe * There are also stub routines for xcopyout_little and xcopyin_little,
493*5d9d9091SRichard Lowe * which currently are intended to handle requests of <= 16 bytes from
494*5d9d9091SRichard Lowe * do_unaligned. Future enhancement to make them handle 8k pages efficiently
495*5d9d9091SRichard Lowe * is left as an exercise...
496*5d9d9091SRichard Lowe */
497*5d9d9091SRichard Lowe
498*5d9d9091SRichard Lowe/*
499*5d9d9091SRichard Lowe * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
500*5d9d9091SRichard Lowe *
501*5d9d9091SRichard Lowe * General theory of operation:
502*5d9d9091SRichard Lowe *
503*5d9d9091SRichard Lowe * None of the copyops routines grab a window.
504*5d9d9091SRichard Lowe *
505*5d9d9091SRichard Lowe * Flow:
506*5d9d9091SRichard Lowe *
507*5d9d9091SRichard Lowe * If count == zero return zero.
508*5d9d9091SRichard Lowe *
509*5d9d9091SRichard Lowe * Store the previous lo_fault handler into %g6.
510*5d9d9091SRichard Lowe * Place our secondary lofault handler into %g5.
511*5d9d9091SRichard Lowe * Place the address of our fault handler into %o3.
512*5d9d9091SRichard Lowe *
513*5d9d9091SRichard Lowe * If count is less than or equal to SMALL_LIMIT (7) we
514*5d9d9091SRichard Lowe * always do a byte for byte copy.
515*5d9d9091SRichard Lowe *
516*5d9d9091SRichard Lowe * If count is > SMALL_LIMIT, we check the alignment of the input
517*5d9d9091SRichard Lowe * and output pointers.  We store -count in %o3, we store the number
518*5d9d9091SRichard Lowe * of chunks (8, 4, 2 or 1 byte) operated on in our basic copy loop
519*5d9d9091SRichard Lowe * in %o2. Following this we branch to the appropriate copy loop and
520*5d9d9091SRichard Lowe * copy that many chunks.  Since we've been adding the chunk size
521*5d9d9091SRichard Lowe * to %o3 each time through as well as decrementing %o2, we can tell
522*5d9d9091SRichard Lowe * if any data is is left to be copied by examining %o3. If that is
523*5d9d9091SRichard Lowe * zero, we're done and can go home. If not, we figure out what the
524*5d9d9091SRichard Lowe * largest chunk size left to be copied is and branch to that copy
525*5d9d9091SRichard Lowe * loop unless there's only one byte left. We load that as we're
526*5d9d9091SRichard Lowe * branching to code that stores it just before we return.
527*5d9d9091SRichard Lowe *
528*5d9d9091SRichard Lowe * Fault handlers are invoked if we reference memory that has no
529*5d9d9091SRichard Lowe * current mapping.  All forms share the same copyio_fault handler.
530*5d9d9091SRichard Lowe * This routine handles fixing up the stack and general housecleaning.
531*5d9d9091SRichard Lowe * Each copy operation has a simple fault handler that is then called
532*5d9d9091SRichard Lowe * to do the work specific to the invidual operation.  The handler
533*5d9d9091SRichard Lowe * for copyOP and xcopyOP are found at the end of individual function.
534*5d9d9091SRichard Lowe * The handlers for xcopyOP_little are found at the end of xcopyin_little.
535*5d9d9091SRichard Lowe * The handlers for copyOP_noerr are found at the end of copyin_noerr.
536*5d9d9091SRichard Lowe */
537*5d9d9091SRichard Lowe
538*5d9d9091SRichard Lowe/*
539*5d9d9091SRichard Lowe * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
540*5d9d9091SRichard Lowe */
541*5d9d9091SRichard Lowe
542*5d9d9091SRichard Lowe/*
543*5d9d9091SRichard Lowe * We save the arguments in the following registers in case of a fault:
544*5d9d9091SRichard Lowe * 	kaddr - %g2
545*5d9d9091SRichard Lowe * 	uaddr - %g3
546*5d9d9091SRichard Lowe * 	count - %g4
547*5d9d9091SRichard Lowe */
548*5d9d9091SRichard Lowe#define	SAVE_SRC	%g2
549*5d9d9091SRichard Lowe#define	SAVE_DST	%g3
550*5d9d9091SRichard Lowe#define	SAVE_COUNT	%g4
551*5d9d9091SRichard Lowe
552*5d9d9091SRichard Lowe#define	REAL_LOFAULT		%g5
553*5d9d9091SRichard Lowe#define	SAVED_LOFAULT		%g6
554*5d9d9091SRichard Lowe
555*5d9d9091SRichard Lowe/*
556*5d9d9091SRichard Lowe * Generic copyio fault handler.  This is the first line of defense when a
557*5d9d9091SRichard Lowe * fault occurs in (x)copyin/(x)copyout.  In order for this to function
558*5d9d9091SRichard Lowe * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
559*5d9d9091SRichard Lowe * This allows us to share common code for all the flavors of the copy
560*5d9d9091SRichard Lowe * operations, including the _noerr versions.
561*5d9d9091SRichard Lowe *
562*5d9d9091SRichard Lowe * Note that this function will restore the original input parameters before
563*5d9d9091SRichard Lowe * calling REAL_LOFAULT.  So the real handler can vector to the appropriate
564*5d9d9091SRichard Lowe * member of the t_copyop structure, if needed.
565*5d9d9091SRichard Lowe */
566*5d9d9091SRichard Lowe	ENTRY(copyio_fault)
567*5d9d9091SRichard Lowe	membar	#Sync
568*5d9d9091SRichard Lowe	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
569*5d9d9091SRichard Lowe
570*5d9d9091SRichard Lowe	mov	SAVE_SRC, %o0
571*5d9d9091SRichard Lowe	mov	SAVE_DST, %o1
572*5d9d9091SRichard Lowe	jmp	REAL_LOFAULT
573*5d9d9091SRichard Lowe	  mov	SAVE_COUNT, %o2
574*5d9d9091SRichard Lowe	SET_SIZE(copyio_fault)
575*5d9d9091SRichard Lowe
576*5d9d9091SRichard Lowe	ENTRY(copyout)
577*5d9d9091SRichard Lowe	sethi	%hi(.copyout_err), REAL_LOFAULT
578*5d9d9091SRichard Lowe	or	REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT
579*5d9d9091SRichard Lowe
580*5d9d9091SRichard Lowe.do_copyout:
581*5d9d9091SRichard Lowe	!
582*5d9d9091SRichard Lowe	! Check the length and bail if zero.
583*5d9d9091SRichard Lowe	!
584*5d9d9091SRichard Lowe	tst	%o2
585*5d9d9091SRichard Lowe	bnz,pt	%ncc, 1f
586*5d9d9091SRichard Lowe	  nop
587*5d9d9091SRichard Lowe	retl
588*5d9d9091SRichard Lowe	  clr	%o0
589*5d9d9091SRichard Lowe1:
590*5d9d9091SRichard Lowe	sethi	%hi(copyio_fault), %o3
591*5d9d9091SRichard Lowe	ldn	[THREAD_REG + T_LOFAULT], SAVED_LOFAULT
592*5d9d9091SRichard Lowe	or	%o3, %lo(copyio_fault), %o3
593*5d9d9091SRichard Lowe	membar	#Sync
594*5d9d9091SRichard Lowe	stn	%o3, [THREAD_REG + T_LOFAULT]
595*5d9d9091SRichard Lowe
596*5d9d9091SRichard Lowe	mov	%o0, SAVE_SRC
597*5d9d9091SRichard Lowe	mov	%o1, SAVE_DST
598*5d9d9091SRichard Lowe	mov	%o2, SAVE_COUNT
599*5d9d9091SRichard Lowe
600*5d9d9091SRichard Lowe	!
601*5d9d9091SRichard Lowe	! Check to see if we're more than SMALL_LIMIT (7 bytes).
602*5d9d9091SRichard Lowe	! Run in leaf mode, using the %o regs as our input regs.
603*5d9d9091SRichard Lowe	!
604*5d9d9091SRichard Lowe	subcc	%o2, SMALL_LIMIT, %o3
605*5d9d9091SRichard Lowe	bgu,a,pt %ncc, .dco_ns
606*5d9d9091SRichard Lowe	or	%o0, %o1, %o3
607*5d9d9091SRichard Lowe
608*5d9d9091SRichard Lowe.dcobcp:
609*5d9d9091SRichard Lowe	sub	%g0, %o2, %o3		! negate count
610*5d9d9091SRichard Lowe	add	%o0, %o2, %o0		! make %o0 point at the end
611*5d9d9091SRichard Lowe	add	%o1, %o2, %o1		! make %o1 point at the end
612*5d9d9091SRichard Lowe	ba,pt	%ncc, .dcocl
613*5d9d9091SRichard Lowe	ldub	[%o0 + %o3], %o4	! load first byte
614*5d9d9091SRichard Lowe	!
615*5d9d9091SRichard Lowe	! %o0 and %o2 point at the end and remain pointing at the end
616*5d9d9091SRichard Lowe	! of their buffers. We pull things out by adding %o3 (which is
617*5d9d9091SRichard Lowe	! the negation of the length) to the buffer end which gives us
618*5d9d9091SRichard Lowe	! the curent location in the buffers. By incrementing %o3 we walk
619*5d9d9091SRichard Lowe	! through both buffers without having to bump each buffer's
620*5d9d9091SRichard Lowe	! pointer. A very fast 4 instruction loop.
621*5d9d9091SRichard Lowe	!
622*5d9d9091SRichard Lowe	.align 16
623*5d9d9091SRichard Lowe.dcocl:
624*5d9d9091SRichard Lowe	stba	%o4, [%o1 + %o3]ASI_USER
625*5d9d9091SRichard Lowe	inccc	%o3
626*5d9d9091SRichard Lowe	bl,a,pt	%ncc, .dcocl
627*5d9d9091SRichard Lowe	ldub	[%o0 + %o3], %o4
628*5d9d9091SRichard Lowe	!
629*5d9d9091SRichard Lowe	! We're done. Go home.
630*5d9d9091SRichard Lowe	!
631*5d9d9091SRichard Lowe	membar	#Sync
632*5d9d9091SRichard Lowe	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
633*5d9d9091SRichard Lowe	retl
634*5d9d9091SRichard Lowe	clr	%o0
635*5d9d9091SRichard Lowe	!
636*5d9d9091SRichard Lowe	! Try aligned copies from here.
637*5d9d9091SRichard Lowe	!
638*5d9d9091SRichard Lowe.dco_ns:
639*5d9d9091SRichard Lowe	! %o0 = kernel addr (to be copied from)
640*5d9d9091SRichard Lowe	! %o1 = user addr (to be copied to)
641*5d9d9091SRichard Lowe	! %o2 = length
642*5d9d9091SRichard Lowe	! %o3 = %o1 | %o2 (used for alignment checking)
643*5d9d9091SRichard Lowe	! %o4 is alternate lo_fault
644*5d9d9091SRichard Lowe	! %o5 is original lo_fault
645*5d9d9091SRichard Lowe	!
646*5d9d9091SRichard Lowe	! See if we're single byte aligned. If we are, check the
647*5d9d9091SRichard Lowe	! limit for single byte copies. If we're smaller or equal,
648*5d9d9091SRichard Lowe	! bounce to the byte for byte copy loop. Otherwise do it in
649*5d9d9091SRichard Lowe	! HW (if enabled).
650*5d9d9091SRichard Lowe	!
651*5d9d9091SRichard Lowe	btst	1, %o3
652*5d9d9091SRichard Lowe	bz,pt	%icc, .dcoh8
653*5d9d9091SRichard Lowe	btst	7, %o3
654*5d9d9091SRichard Lowe
655*5d9d9091SRichard Lowe	ba	.dcobcp
656*5d9d9091SRichard Lowe	nop
657*5d9d9091SRichard Lowe.dcoh8:
658*5d9d9091SRichard Lowe	!
659*5d9d9091SRichard Lowe	! 8 byte aligned?
660*5d9d9091SRichard Lowe	!
661*5d9d9091SRichard Lowe	bnz,a	%ncc, .dcoh4
662*5d9d9091SRichard Lowe	btst	3, %o3
663*5d9d9091SRichard Lowe.dcos8:
664*5d9d9091SRichard Lowe	!
665*5d9d9091SRichard Lowe	! Housekeeping for copy loops. Uses same idea as in the byte for
666*5d9d9091SRichard Lowe	! byte copy loop above.
667*5d9d9091SRichard Lowe	!
668*5d9d9091SRichard Lowe	add	%o0, %o2, %o0
669*5d9d9091SRichard Lowe	add	%o1, %o2, %o1
670*5d9d9091SRichard Lowe	sub	%g0, %o2, %o3
671*5d9d9091SRichard Lowe	ba,pt	%ncc, .dodebc
672*5d9d9091SRichard Lowe	srl	%o2, 3, %o2		! Number of 8 byte chunks to copy
673*5d9d9091SRichard Lowe	!
674*5d9d9091SRichard Lowe	! 4 byte aligned?
675*5d9d9091SRichard Lowe	!
676*5d9d9091SRichard Lowe.dcoh4:
677*5d9d9091SRichard Lowe	bnz,pn	%ncc, .dcoh2
678*5d9d9091SRichard Lowe	nop
679*5d9d9091SRichard Lowe.dcos4:
680*5d9d9091SRichard Lowe	add	%o0, %o2, %o0
681*5d9d9091SRichard Lowe	add	%o1, %o2, %o1
682*5d9d9091SRichard Lowe	sub	%g0, %o2, %o3
683*5d9d9091SRichard Lowe	ba,pt	%ncc, .dodfbc
684*5d9d9091SRichard Lowe	srl	%o2, 2, %o2		! Number of 4 byte chunks to copy
685*5d9d9091SRichard Lowe	!
686*5d9d9091SRichard Lowe	! We must be 2 byte aligned. Off we go.
687*5d9d9091SRichard Lowe	! The check for small copies was done in the
688*5d9d9091SRichard Lowe	! delay at .dcoh4
689*5d9d9091SRichard Lowe	!
690*5d9d9091SRichard Lowe.dcoh2:
691*5d9d9091SRichard Lowe.dcos2:
692*5d9d9091SRichard Lowe	add	%o0, %o2, %o0
693*5d9d9091SRichard Lowe	add	%o1, %o2, %o1
694*5d9d9091SRichard Lowe	sub	%g0, %o2, %o3
695*5d9d9091SRichard Lowe	ba,pt	%ncc, .dodtbc
696*5d9d9091SRichard Lowe	srl	%o2, 1, %o2		! Number of 2 byte chunks to copy
697*5d9d9091SRichard Lowe
698*5d9d9091SRichard Lowe.dodebc:
699*5d9d9091SRichard Lowe	ldx	[%o0 + %o3], %o4
700*5d9d9091SRichard Lowe	deccc	%o2
701*5d9d9091SRichard Lowe	stxa	%o4, [%o1 + %o3]ASI_USER
702*5d9d9091SRichard Lowe	bg,pt	%ncc, .dodebc
703*5d9d9091SRichard Lowe	addcc	%o3, 8, %o3
704*5d9d9091SRichard Lowe	!
705*5d9d9091SRichard Lowe	! End of copy loop. Check to see if we're done. Most
706*5d9d9091SRichard Lowe	! eight byte aligned copies end here.
707*5d9d9091SRichard Lowe	!
708*5d9d9091SRichard Lowe	bz,pt	%ncc, .dcofh
709*5d9d9091SRichard Lowe	nop
710*5d9d9091SRichard Lowe	!
711*5d9d9091SRichard Lowe	! Something is left - do it byte for byte.
712*5d9d9091SRichard Lowe	!
713*5d9d9091SRichard Lowe	ba,pt	%ncc, .dcocl
714*5d9d9091SRichard Lowe	ldub	[%o0 + %o3], %o4	! load next byte
715*5d9d9091SRichard Lowe	!
716*5d9d9091SRichard Lowe	! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
717*5d9d9091SRichard Lowe	!
718*5d9d9091SRichard Lowe	.align 32
719*5d9d9091SRichard Lowe.dodfbc:
720*5d9d9091SRichard Lowe	lduw	[%o0 + %o3], %o4
721*5d9d9091SRichard Lowe	deccc	%o2
722*5d9d9091SRichard Lowe	sta	%o4, [%o1 + %o3]ASI_USER
723*5d9d9091SRichard Lowe	bg,pt	%ncc, .dodfbc
724*5d9d9091SRichard Lowe	addcc	%o3, 4, %o3
725*5d9d9091SRichard Lowe	!
726*5d9d9091SRichard Lowe	! End of copy loop. Check to see if we're done. Most
727*5d9d9091SRichard Lowe	! four byte aligned copies end here.
728*5d9d9091SRichard Lowe	!
729*5d9d9091SRichard Lowe	bz,pt	%ncc, .dcofh
730*5d9d9091SRichard Lowe	nop
731*5d9d9091SRichard Lowe	!
732*5d9d9091SRichard Lowe	! Something is left. Do it byte for byte.
733*5d9d9091SRichard Lowe	!
734*5d9d9091SRichard Lowe	ba,pt	%ncc, .dcocl
735*5d9d9091SRichard Lowe	ldub	[%o0 + %o3], %o4	! load next byte
736*5d9d9091SRichard Lowe	!
737*5d9d9091SRichard Lowe	! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
738*5d9d9091SRichard Lowe	! copy.
739*5d9d9091SRichard Lowe	!
740*5d9d9091SRichard Lowe	.align 32
741*5d9d9091SRichard Lowe.dodtbc:
742*5d9d9091SRichard Lowe	lduh	[%o0 + %o3], %o4
743*5d9d9091SRichard Lowe	deccc	%o2
744*5d9d9091SRichard Lowe	stha	%o4, [%o1 + %o3]ASI_USER
745*5d9d9091SRichard Lowe	bg,pt	%ncc, .dodtbc
746*5d9d9091SRichard Lowe	addcc	%o3, 2, %o3
747*5d9d9091SRichard Lowe	!
748*5d9d9091SRichard Lowe	! End of copy loop. Anything left?
749*5d9d9091SRichard Lowe	!
750*5d9d9091SRichard Lowe	bz,pt	%ncc, .dcofh
751*5d9d9091SRichard Lowe	nop
752*5d9d9091SRichard Lowe	!
753*5d9d9091SRichard Lowe	! Deal with the last byte
754*5d9d9091SRichard Lowe	!
755*5d9d9091SRichard Lowe	ldub	[%o0 + %o3], %o4
756*5d9d9091SRichard Lowe	stba	%o4, [%o1 + %o3]ASI_USER
757*5d9d9091SRichard Lowe.dcofh:
758*5d9d9091SRichard Lowe	membar	#Sync
759*5d9d9091SRichard Lowe	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
760*5d9d9091SRichard Lowe	retl
761*5d9d9091SRichard Lowe	clr	%o0
762*5d9d9091SRichard Lowe
763*5d9d9091SRichard Lowe.copyout_err:
764*5d9d9091SRichard Lowe	ldn	[THREAD_REG + T_COPYOPS], %o4
765*5d9d9091SRichard Lowe	brz	%o4, 2f
766*5d9d9091SRichard Lowe	nop
767*5d9d9091SRichard Lowe	ldn	[%o4 + CP_COPYOUT], %g2
768*5d9d9091SRichard Lowe	jmp	%g2
769*5d9d9091SRichard Lowe	nop
770*5d9d9091SRichard Lowe2:
771*5d9d9091SRichard Lowe	retl
772*5d9d9091SRichard Lowe	mov	-1, %o0
773*5d9d9091SRichard Lowe	SET_SIZE(copyout)
774*5d9d9091SRichard Lowe
775*5d9d9091SRichard Lowe
776*5d9d9091SRichard Lowe	ENTRY(xcopyout)
777*5d9d9091SRichard Lowe	sethi	%hi(.xcopyout_err), REAL_LOFAULT
778*5d9d9091SRichard Lowe	b	.do_copyout
779*5d9d9091SRichard Lowe	  or	REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
780*5d9d9091SRichard Lowe.xcopyout_err:
781*5d9d9091SRichard Lowe	ldn	[THREAD_REG + T_COPYOPS], %o4
782*5d9d9091SRichard Lowe	brz	%o4, 2f
783*5d9d9091SRichard Lowe	nop
784*5d9d9091SRichard Lowe	ldn	[%o4 + CP_XCOPYOUT], %g2
785*5d9d9091SRichard Lowe	jmp	%g2
786*5d9d9091SRichard Lowe	nop
787*5d9d9091SRichard Lowe2:
788*5d9d9091SRichard Lowe	retl
789*5d9d9091SRichard Lowe	mov	%g1, %o0
790*5d9d9091SRichard Lowe	SET_SIZE(xcopyout)
791*5d9d9091SRichard Lowe
792*5d9d9091SRichard Lowe	ENTRY(xcopyout_little)
793*5d9d9091SRichard Lowe	sethi	%hi(.little_err), %o4
794*5d9d9091SRichard Lowe	ldn	[THREAD_REG + T_LOFAULT], %o5
795*5d9d9091SRichard Lowe	or	%o4, %lo(.little_err), %o4
796*5d9d9091SRichard Lowe	membar	#Sync			! sync error barrier
797*5d9d9091SRichard Lowe	stn	%o4, [THREAD_REG + T_LOFAULT]
798*5d9d9091SRichard Lowe
799*5d9d9091SRichard Lowe	subcc	%g0, %o2, %o3
800*5d9d9091SRichard Lowe	add	%o0, %o2, %o0
801*5d9d9091SRichard Lowe	bz,pn	%ncc, 2f		! check for zero bytes
802*5d9d9091SRichard Lowe	sub	%o2, 1, %o4
803*5d9d9091SRichard Lowe	add	%o0, %o4, %o0		! start w/last byte
804*5d9d9091SRichard Lowe	add	%o1, %o2, %o1
805*5d9d9091SRichard Lowe	ldub	[%o0+%o3], %o4
806*5d9d9091SRichard Lowe
807*5d9d9091SRichard Lowe1:	stba	%o4, [%o1+%o3]ASI_AIUSL
808*5d9d9091SRichard Lowe	inccc	%o3
809*5d9d9091SRichard Lowe	sub	%o0, 2, %o0		! get next byte
810*5d9d9091SRichard Lowe	bcc,a,pt %ncc, 1b
811*5d9d9091SRichard Lowe	  ldub	[%o0+%o3], %o4
812*5d9d9091SRichard Lowe
813*5d9d9091SRichard Lowe2:	membar	#Sync			! sync error barrier
814*5d9d9091SRichard Lowe	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
815*5d9d9091SRichard Lowe	retl
816*5d9d9091SRichard Lowe	mov	%g0, %o0		! return (0)
817*5d9d9091SRichard Lowe	SET_SIZE(xcopyout_little)
818*5d9d9091SRichard Lowe
819*5d9d9091SRichard Lowe/*
820*5d9d9091SRichard Lowe * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
821*5d9d9091SRichard Lowe */
822*5d9d9091SRichard Lowe
823*5d9d9091SRichard Lowe	ENTRY(copyin)
824*5d9d9091SRichard Lowe	sethi	%hi(.copyin_err), REAL_LOFAULT
825*5d9d9091SRichard Lowe	or	REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT
826*5d9d9091SRichard Lowe
827*5d9d9091SRichard Lowe.do_copyin:
828*5d9d9091SRichard Lowe	!
829*5d9d9091SRichard Lowe	! Check the length and bail if zero.
830*5d9d9091SRichard Lowe	!
831*5d9d9091SRichard Lowe	tst	%o2
832*5d9d9091SRichard Lowe	bnz,pt	%ncc, 1f
833*5d9d9091SRichard Lowe	  nop
834*5d9d9091SRichard Lowe	retl
835*5d9d9091SRichard Lowe	  clr	%o0
836*5d9d9091SRichard Lowe1:
837*5d9d9091SRichard Lowe	sethi	%hi(copyio_fault), %o3
838*5d9d9091SRichard Lowe	ldn	[THREAD_REG + T_LOFAULT], SAVED_LOFAULT
839*5d9d9091SRichard Lowe	or	%o3, %lo(copyio_fault), %o3
840*5d9d9091SRichard Lowe	membar	#Sync
841*5d9d9091SRichard Lowe	stn	%o3, [THREAD_REG + T_LOFAULT]
842*5d9d9091SRichard Lowe
843*5d9d9091SRichard Lowe	mov	%o0, SAVE_SRC
844*5d9d9091SRichard Lowe	mov	%o1, SAVE_DST
845*5d9d9091SRichard Lowe	mov	%o2, SAVE_COUNT
846*5d9d9091SRichard Lowe
847*5d9d9091SRichard Lowe	!
848*5d9d9091SRichard Lowe	! Check to see if we're more than SMALL_LIMIT.
849*5d9d9091SRichard Lowe	!
850*5d9d9091SRichard Lowe	subcc	%o2, SMALL_LIMIT, %o3
851*5d9d9091SRichard Lowe	bgu,a,pt %ncc, .dci_ns
852*5d9d9091SRichard Lowe	or	%o0, %o1, %o3
853*5d9d9091SRichard Lowe
854*5d9d9091SRichard Lowe.dcibcp:
855*5d9d9091SRichard Lowe	sub	%g0, %o2, %o3		! setup for copy loop
856*5d9d9091SRichard Lowe	add	%o0, %o2, %o0
857*5d9d9091SRichard Lowe	add	%o1, %o2, %o1
858*5d9d9091SRichard Lowe	ba,pt	%ncc, .dcicl
859*5d9d9091SRichard Lowe	lduba	[%o0 + %o3]ASI_USER, %o4
860*5d9d9091SRichard Lowe	!
861*5d9d9091SRichard Lowe	! %o0 and %o1 point at the end and remain pointing at the end
862*5d9d9091SRichard Lowe	! of their buffers. We pull things out by adding %o3 (which is
863*5d9d9091SRichard Lowe	! the negation of the length) to the buffer end which gives us
864*5d9d9091SRichard Lowe	! the curent location in the buffers. By incrementing %o3 we walk
865*5d9d9091SRichard Lowe	! through both buffers without having to bump each buffer's
866*5d9d9091SRichard Lowe	! pointer. A very fast 4 instruction loop.
867*5d9d9091SRichard Lowe	!
868*5d9d9091SRichard Lowe	.align 16
869*5d9d9091SRichard Lowe.dcicl:
870*5d9d9091SRichard Lowe	stb	%o4, [%o1 + %o3]
871*5d9d9091SRichard Lowe	inccc	%o3
872*5d9d9091SRichard Lowe	bl,a,pt %ncc, .dcicl
873*5d9d9091SRichard Lowe	lduba	[%o0 + %o3]ASI_USER, %o4
874*5d9d9091SRichard Lowe	!
875*5d9d9091SRichard Lowe	! We're done. Go home.
876*5d9d9091SRichard Lowe	!
877*5d9d9091SRichard Lowe	membar	#Sync
878*5d9d9091SRichard Lowe	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
879*5d9d9091SRichard Lowe	retl
880*5d9d9091SRichard Lowe	clr	%o0
881*5d9d9091SRichard Lowe	!
882*5d9d9091SRichard Lowe	! Try aligned copies from here.
883*5d9d9091SRichard Lowe	!
884*5d9d9091SRichard Lowe.dci_ns:
885*5d9d9091SRichard Lowe	!
886*5d9d9091SRichard Lowe	! See if we're single byte aligned. If we are, check the
887*5d9d9091SRichard Lowe	! limit for single byte copies. If we're smaller, or equal,
888*5d9d9091SRichard Lowe	! bounce to the byte for byte copy loop. Otherwise do it in
889*5d9d9091SRichard Lowe	! HW (if enabled).
890*5d9d9091SRichard Lowe	!
891*5d9d9091SRichard Lowe	btst	1, %o3
892*5d9d9091SRichard Lowe	bz,a,pt	%icc, .dcih8
893*5d9d9091SRichard Lowe	btst	7, %o3
894*5d9d9091SRichard Lowe	ba	.dcibcp
895*5d9d9091SRichard Lowe	nop
896*5d9d9091SRichard Lowe
897*5d9d9091SRichard Lowe.dcih8:
898*5d9d9091SRichard Lowe	!
899*5d9d9091SRichard Lowe	! 8 byte aligned?
900*5d9d9091SRichard Lowe	!
901*5d9d9091SRichard Lowe	bnz,a	%ncc, .dcih4
902*5d9d9091SRichard Lowe	btst	3, %o3
903*5d9d9091SRichard Lowe.dcis8:
904*5d9d9091SRichard Lowe	!
905*5d9d9091SRichard Lowe	! Housekeeping for copy loops. Uses same idea as in the byte for
906*5d9d9091SRichard Lowe	! byte copy loop above.
907*5d9d9091SRichard Lowe	!
908*5d9d9091SRichard Lowe	add	%o0, %o2, %o0
909*5d9d9091SRichard Lowe	add	%o1, %o2, %o1
910*5d9d9091SRichard Lowe	sub	%g0, %o2, %o3
911*5d9d9091SRichard Lowe	ba,pt	%ncc, .didebc
912*5d9d9091SRichard Lowe	srl	%o2, 3, %o2		! Number of 8 byte chunks to copy
913*5d9d9091SRichard Lowe	!
914*5d9d9091SRichard Lowe	! 4 byte aligned?
915*5d9d9091SRichard Lowe	!
916*5d9d9091SRichard Lowe.dcih4:
917*5d9d9091SRichard Lowe	bnz	%ncc, .dcih2
918*5d9d9091SRichard Lowe	nop
919*5d9d9091SRichard Lowe.dcis4:
920*5d9d9091SRichard Lowe	!
921*5d9d9091SRichard Lowe	! Housekeeping for copy loops. Uses same idea as in the byte
922*5d9d9091SRichard Lowe	! for byte copy loop above.
923*5d9d9091SRichard Lowe	!
924*5d9d9091SRichard Lowe	add	%o0, %o2, %o0
925*5d9d9091SRichard Lowe	add	%o1, %o2, %o1
926*5d9d9091SRichard Lowe	sub	%g0, %o2, %o3
927*5d9d9091SRichard Lowe	ba,pt	%ncc, .didfbc
928*5d9d9091SRichard Lowe	srl	%o2, 2, %o2		! Number of 4 byte chunks to copy
929*5d9d9091SRichard Lowe.dcih2:
930*5d9d9091SRichard Lowe.dcis2:
931*5d9d9091SRichard Lowe	add	%o0, %o2, %o0
932*5d9d9091SRichard Lowe	add	%o1, %o2, %o1
933*5d9d9091SRichard Lowe	sub	%g0, %o2, %o3
934*5d9d9091SRichard Lowe	ba,pt	%ncc, .didtbc
935*5d9d9091SRichard Lowe	srl	%o2, 1, %o2		! Number of 2 byte chunks to copy
936*5d9d9091SRichard Lowe
937*5d9d9091SRichard Lowe.didebc:
938*5d9d9091SRichard Lowe	ldxa	[%o0 + %o3]ASI_USER, %o4
939*5d9d9091SRichard Lowe	deccc	%o2
940*5d9d9091SRichard Lowe	stx	%o4, [%o1 + %o3]
941*5d9d9091SRichard Lowe	bg,pt	%ncc, .didebc
942*5d9d9091SRichard Lowe	addcc	%o3, 8, %o3
943*5d9d9091SRichard Lowe	!
944*5d9d9091SRichard Lowe	! End of copy loop. Most 8 byte aligned copies end here.
945*5d9d9091SRichard Lowe	!
946*5d9d9091SRichard Lowe	bz,pt	%ncc, .dcifh
947*5d9d9091SRichard Lowe	nop
948*5d9d9091SRichard Lowe	!
949*5d9d9091SRichard Lowe	! Something is left. Do it byte for byte.
950*5d9d9091SRichard Lowe	!
951*5d9d9091SRichard Lowe	ba,pt	%ncc, .dcicl
952*5d9d9091SRichard Lowe	lduba	[%o0 + %o3]ASI_USER, %o4
953*5d9d9091SRichard Lowe	!
954*5d9d9091SRichard Lowe	! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
955*5d9d9091SRichard Lowe	!
956*5d9d9091SRichard Lowe	.align 32
957*5d9d9091SRichard Lowe.didfbc:
958*5d9d9091SRichard Lowe	lduwa	[%o0 + %o3]ASI_USER, %o4
959*5d9d9091SRichard Lowe	deccc	%o2
960*5d9d9091SRichard Lowe	st	%o4, [%o1 + %o3]
961*5d9d9091SRichard Lowe	bg,pt	%ncc, .didfbc
962*5d9d9091SRichard Lowe	addcc	%o3, 4, %o3
963*5d9d9091SRichard Lowe	!
964*5d9d9091SRichard Lowe	! End of copy loop. Most 4 byte aligned copies end here.
965*5d9d9091SRichard Lowe	!
966*5d9d9091SRichard Lowe	bz,pt	%ncc, .dcifh
967*5d9d9091SRichard Lowe	nop
968*5d9d9091SRichard Lowe	!
969*5d9d9091SRichard Lowe	! Something is left. Do it byte for byte.
970*5d9d9091SRichard Lowe	!
971*5d9d9091SRichard Lowe	ba,pt	%ncc, .dcicl
972*5d9d9091SRichard Lowe	lduba	[%o0 + %o3]ASI_USER, %o4
973*5d9d9091SRichard Lowe	!
974*5d9d9091SRichard Lowe	! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
975*5d9d9091SRichard Lowe	! copy.
976*5d9d9091SRichard Lowe	!
977*5d9d9091SRichard Lowe	.align 32
978*5d9d9091SRichard Lowe.didtbc:
979*5d9d9091SRichard Lowe	lduha	[%o0 + %o3]ASI_USER, %o4
980*5d9d9091SRichard Lowe	deccc	%o2
981*5d9d9091SRichard Lowe	sth	%o4, [%o1 + %o3]
982*5d9d9091SRichard Lowe	bg,pt	%ncc, .didtbc
983*5d9d9091SRichard Lowe	addcc	%o3, 2, %o3
984*5d9d9091SRichard Lowe	!
985*5d9d9091SRichard Lowe	! End of copy loop. Most 2 byte aligned copies end here.
986*5d9d9091SRichard Lowe	!
987*5d9d9091SRichard Lowe	bz,pt	%ncc, .dcifh
988*5d9d9091SRichard Lowe	nop
989*5d9d9091SRichard Lowe	!
990*5d9d9091SRichard Lowe	! Deal with the last byte
991*5d9d9091SRichard Lowe	!
992*5d9d9091SRichard Lowe	lduba	[%o0 + %o3]ASI_USER, %o4
993*5d9d9091SRichard Lowe	stb	%o4, [%o1 + %o3]
994*5d9d9091SRichard Lowe.dcifh:
995*5d9d9091SRichard Lowe	membar	#Sync
996*5d9d9091SRichard Lowe	stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
997*5d9d9091SRichard Lowe	retl
998*5d9d9091SRichard Lowe	clr	%o0
999*5d9d9091SRichard Lowe
1000*5d9d9091SRichard Lowe.copyin_err:
1001*5d9d9091SRichard Lowe	ldn	[THREAD_REG + T_COPYOPS], %o4
1002*5d9d9091SRichard Lowe	brz	%o4, 2f
1003*5d9d9091SRichard Lowe	nop
1004*5d9d9091SRichard Lowe	ldn	[%o4 + CP_COPYIN], %g2
1005*5d9d9091SRichard Lowe	jmp	%g2
1006*5d9d9091SRichard Lowe	nop
1007*5d9d9091SRichard Lowe2:
1008*5d9d9091SRichard Lowe	retl
1009*5d9d9091SRichard Lowe	mov	-1, %o0
1010*5d9d9091SRichard Lowe	SET_SIZE(copyin)
1011*5d9d9091SRichard Lowe
1012*5d9d9091SRichard Lowe	ENTRY(xcopyin)
1013*5d9d9091SRichard Lowe	sethi	%hi(.xcopyin_err), REAL_LOFAULT
1014*5d9d9091SRichard Lowe	b	.do_copyin
1015*5d9d9091SRichard Lowe	  or	REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
1016*5d9d9091SRichard Lowe.xcopyin_err:
1017*5d9d9091SRichard Lowe	ldn	[THREAD_REG + T_COPYOPS], %o4
1018*5d9d9091SRichard Lowe	brz	%o4, 2f
1019*5d9d9091SRichard Lowe	nop
1020*5d9d9091SRichard Lowe	ldn	[%o4 + CP_XCOPYIN], %g2
1021*5d9d9091SRichard Lowe	jmp	%g2
1022*5d9d9091SRichard Lowe	nop
1023*5d9d9091SRichard Lowe2:
1024*5d9d9091SRichard Lowe	retl
1025*5d9d9091SRichard Lowe	mov	%g1, %o0
1026*5d9d9091SRichard Lowe	SET_SIZE(xcopyin)
1027*5d9d9091SRichard Lowe
1028*5d9d9091SRichard Lowe	ENTRY(xcopyin_little)
1029*5d9d9091SRichard Lowe	sethi	%hi(.little_err), %o4
1030*5d9d9091SRichard Lowe	ldn	[THREAD_REG + T_LOFAULT], %o5
1031*5d9d9091SRichard Lowe	or	%o4, %lo(.little_err), %o4
1032*5d9d9091SRichard Lowe	membar	#Sync				! sync error barrier
1033*5d9d9091SRichard Lowe	stn	%o4, [THREAD_REG + T_LOFAULT]
1034*5d9d9091SRichard Lowe
1035*5d9d9091SRichard Lowe	subcc	%g0, %o2, %o3
1036*5d9d9091SRichard Lowe	add	%o0, %o2, %o0
1037*5d9d9091SRichard Lowe	bz,pn	%ncc, 2f		! check for zero bytes
1038*5d9d9091SRichard Lowe	sub	%o2, 1, %o4
1039*5d9d9091SRichard Lowe	add	%o0, %o4, %o0		! start w/last byte
1040*5d9d9091SRichard Lowe	add	%o1, %o2, %o1
1041*5d9d9091SRichard Lowe	lduba	[%o0+%o3]ASI_AIUSL, %o4
1042*5d9d9091SRichard Lowe
1043*5d9d9091SRichard Lowe1:	stb	%o4, [%o1+%o3]
1044*5d9d9091SRichard Lowe	inccc	%o3
1045*5d9d9091SRichard Lowe	sub	%o0, 2, %o0		! get next byte
1046*5d9d9091SRichard Lowe	bcc,a,pt %ncc, 1b
1047*5d9d9091SRichard Lowe	  lduba	[%o0+%o3]ASI_AIUSL, %o4
1048*5d9d9091SRichard Lowe
1049*5d9d9091SRichard Lowe2:	membar	#Sync				! sync error barrier
1050*5d9d9091SRichard Lowe	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
1051*5d9d9091SRichard Lowe	retl
1052*5d9d9091SRichard Lowe	mov	%g0, %o0		! return (0)
1053*5d9d9091SRichard Lowe
1054*5d9d9091SRichard Lowe.little_err:
1055*5d9d9091SRichard Lowe	membar	#Sync				! sync error barrier
1056*5d9d9091SRichard Lowe	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
1057*5d9d9091SRichard Lowe	retl
1058*5d9d9091SRichard Lowe	mov	%g1, %o0
1059*5d9d9091SRichard Lowe	SET_SIZE(xcopyin_little)
1060*5d9d9091SRichard Lowe
1061*5d9d9091SRichard Lowe
1062*5d9d9091SRichard Lowe/*
1063*5d9d9091SRichard Lowe * Copy a block of storage - must not overlap (from + len <= to).
1064*5d9d9091SRichard Lowe * No fault handler installed (to be called under on_fault())
1065*5d9d9091SRichard Lowe */
1066*5d9d9091SRichard Lowe
1067*5d9d9091SRichard Lowe	ENTRY(copyin_noerr)
1068*5d9d9091SRichard Lowe	sethi	%hi(.copyio_noerr), REAL_LOFAULT
1069*5d9d9091SRichard Lowe	b	.do_copyin
1070*5d9d9091SRichard Lowe	  or	REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
1071*5d9d9091SRichard Lowe.copyio_noerr:
1072*5d9d9091SRichard Lowe	jmp	SAVED_LOFAULT
1073*5d9d9091SRichard Lowe	  nop
1074*5d9d9091SRichard Lowe	SET_SIZE(copyin_noerr)
1075*5d9d9091SRichard Lowe
1076*5d9d9091SRichard Lowe/*
1077*5d9d9091SRichard Lowe * Copy a block of storage - must not overlap (from + len <= to).
1078*5d9d9091SRichard Lowe * No fault handler installed (to be called under on_fault())
1079*5d9d9091SRichard Lowe */
1080*5d9d9091SRichard Lowe
1081*5d9d9091SRichard Lowe	ENTRY(copyout_noerr)
1082*5d9d9091SRichard Lowe	sethi	%hi(.copyio_noerr), REAL_LOFAULT
1083*5d9d9091SRichard Lowe	b	.do_copyout
1084*5d9d9091SRichard Lowe	  or	REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
1085*5d9d9091SRichard Lowe	SET_SIZE(copyout_noerr)
1086*5d9d9091SRichard Lowe
1087*5d9d9091SRichard Lowe	.align	4
1088*5d9d9091SRichard Lowe	DGDEF(use_hw_bcopy)
1089*5d9d9091SRichard Lowe	.word	1
1090*5d9d9091SRichard Lowe	DGDEF(use_hw_bzero)
1091*5d9d9091SRichard Lowe	.word	1
1092*5d9d9091SRichard Lowe
1093*5d9d9091SRichard Lowe	.align	64
1094*5d9d9091SRichard Lowe	.section ".text"
1095*5d9d9091SRichard Lowe
1096*5d9d9091SRichard Lowe
1097*5d9d9091SRichard Lowe/*
1098*5d9d9091SRichard Lowe * hwblkclr - clears block-aligned, block-multiple-sized regions that are
1099*5d9d9091SRichard Lowe * longer than 256 bytes in length. For the generic module we will simply
1100*5d9d9091SRichard Lowe * call bzero and return 1 to ensure that the pages in cache should be
1101*5d9d9091SRichard Lowe * flushed to ensure integrity.
1102*5d9d9091SRichard Lowe * Caller is responsible for ensuring use_hw_bzero is true and that
1103*5d9d9091SRichard Lowe * kpreempt_disable() has been called.
1104*5d9d9091SRichard Lowe */
1105*5d9d9091SRichard Lowe	! %i0 - start address
1106*5d9d9091SRichard Lowe	! %i1 - length of region (multiple of 64)
1107*5d9d9091SRichard Lowe
1108*5d9d9091SRichard Lowe	ENTRY(hwblkclr)
1109*5d9d9091SRichard Lowe	save	%sp, -SA(MINFRAME), %sp
1110*5d9d9091SRichard Lowe
1111*5d9d9091SRichard Lowe	! Simply call bzero and notify the caller that bzero was used
1112*5d9d9091SRichard Lowe	mov	%i0, %o0
1113*5d9d9091SRichard Lowe	call	bzero
1114*5d9d9091SRichard Lowe	  mov	%i1, %o1
1115*5d9d9091SRichard Lowe	ret
1116*5d9d9091SRichard Lowe	restore	%g0, 1, %o0	! return (1) - did not use block operations
1117*5d9d9091SRichard Lowe
1118*5d9d9091SRichard Lowe	SET_SIZE(hwblkclr)
1119*5d9d9091SRichard Lowe
1120*5d9d9091SRichard Lowe	/*
1121*5d9d9091SRichard Lowe	 * Copy 32 bytes of data from src (%o0) to dst (%o1)
1122*5d9d9091SRichard Lowe	 * using physical addresses.
1123*5d9d9091SRichard Lowe	 */
1124*5d9d9091SRichard Lowe	ENTRY_NP(hw_pa_bcopy32)
1125*5d9d9091SRichard Lowe	rdpr    %pstate, %g1
1126*5d9d9091SRichard Lowe	andn    %g1, PSTATE_IE, %g2
1127*5d9d9091SRichard Lowe	wrpr    %g0, %g2, %pstate
1128*5d9d9091SRichard Lowe
1129*5d9d9091SRichard Lowe	ldxa    [%o0]ASI_MEM, %o2
1130*5d9d9091SRichard Lowe	add     %o0, 8, %o0
1131*5d9d9091SRichard Lowe	ldxa    [%o0]ASI_MEM, %o3
1132*5d9d9091SRichard Lowe	add     %o0, 8, %o0
1133*5d9d9091SRichard Lowe	ldxa    [%o0]ASI_MEM, %o4
1134*5d9d9091SRichard Lowe	add     %o0, 8, %o0
1135*5d9d9091SRichard Lowe	ldxa    [%o0]ASI_MEM, %o5
1136*5d9d9091SRichard Lowe	stxa    %o2, [%o1]ASI_MEM
1137*5d9d9091SRichard Lowe	add     %o1, 8, %o1
1138*5d9d9091SRichard Lowe	stxa    %o3, [%o1]ASI_MEM
1139*5d9d9091SRichard Lowe	add     %o1, 8, %o1
1140*5d9d9091SRichard Lowe	stxa    %o4, [%o1]ASI_MEM
1141*5d9d9091SRichard Lowe	add     %o1, 8, %o1
1142*5d9d9091SRichard Lowe	stxa    %o5, [%o1]ASI_MEM
1143*5d9d9091SRichard Lowe
1144*5d9d9091SRichard Lowe	membar	#Sync
1145*5d9d9091SRichard Lowe	retl
1146*5d9d9091SRichard Lowe	  wrpr    %g0, %g1, %pstate
1147*5d9d9091SRichard Lowe	SET_SIZE(hw_pa_bcopy32)
1148*5d9d9091SRichard Lowe
1149*5d9d9091SRichard Lowe/*
1150*5d9d9091SRichard Lowe * Zero a block of storage.
1151*5d9d9091SRichard Lowe *
1152*5d9d9091SRichard Lowe * uzero is used by the kernel to zero a block in user address space.
1153*5d9d9091SRichard Lowe */
1154*5d9d9091SRichard Lowe
1155*5d9d9091SRichard Lowe
1156*5d9d9091SRichard Lowe	ENTRY(uzero)
1157*5d9d9091SRichard Lowe	!
1158*5d9d9091SRichard Lowe	! Set a new lo_fault handler only if we came in with one
1159*5d9d9091SRichard Lowe	! already specified.
1160*5d9d9091SRichard Lowe	!
1161*5d9d9091SRichard Lowe	wr	%g0, ASI_USER, %asi
1162*5d9d9091SRichard Lowe	ldn	[THREAD_REG + T_LOFAULT], %o5
1163*5d9d9091SRichard Lowe	tst	%o5
1164*5d9d9091SRichard Lowe	bz,pt	%ncc, .do_zero
1165*5d9d9091SRichard Lowe	sethi	%hi(.zeroerr), %o2
1166*5d9d9091SRichard Lowe	or	%o2, %lo(.zeroerr), %o2
1167*5d9d9091SRichard Lowe	membar	#Sync
1168*5d9d9091SRichard Lowe	ba,pt	%ncc, .do_zero
1169*5d9d9091SRichard Lowe	stn	%o2, [THREAD_REG + T_LOFAULT]
1170*5d9d9091SRichard Lowe
1171*5d9d9091SRichard Lowe	ENTRY(kzero)
1172*5d9d9091SRichard Lowe	!
1173*5d9d9091SRichard Lowe	! Always set a lo_fault handler
1174*5d9d9091SRichard Lowe	!
1175*5d9d9091SRichard Lowe	wr	%g0, ASI_P, %asi
1176*5d9d9091SRichard Lowe	ldn	[THREAD_REG + T_LOFAULT], %o5
1177*5d9d9091SRichard Lowe	sethi	%hi(.zeroerr), %o2
1178*5d9d9091SRichard Lowe	or	%o5, LOFAULT_SET, %o5
1179*5d9d9091SRichard Lowe	or	%o2, %lo(.zeroerr), %o2
1180*5d9d9091SRichard Lowe	membar	#Sync
1181*5d9d9091SRichard Lowe	ba,pt	%ncc, .do_zero
1182*5d9d9091SRichard Lowe	stn	%o2, [THREAD_REG + T_LOFAULT]
1183*5d9d9091SRichard Lowe
1184*5d9d9091SRichard Lowe/*
1185*5d9d9091SRichard Lowe * We got here because of a fault during kzero or if
1186*5d9d9091SRichard Lowe * uzero or bzero was called with t_lofault non-zero.
1187*5d9d9091SRichard Lowe * Otherwise we've already run screaming from the room.
1188*5d9d9091SRichard Lowe * Errno value is in %g1. Note that we're here iff
1189*5d9d9091SRichard Lowe * we did set t_lofault.
1190*5d9d9091SRichard Lowe */
1191*5d9d9091SRichard Lowe.zeroerr:
1192*5d9d9091SRichard Lowe	!
1193*5d9d9091SRichard Lowe	! Undo asi register setting. Just set it to be the
1194*5d9d9091SRichard Lowe        ! kernel default without checking.
1195*5d9d9091SRichard Lowe	!
1196*5d9d9091SRichard Lowe	wr	%g0, ASI_P, %asi
1197*5d9d9091SRichard Lowe
1198*5d9d9091SRichard Lowe	!
1199*5d9d9091SRichard Lowe	! We did set t_lofault. It may well have been zero coming in.
1200*5d9d9091SRichard Lowe	!
1201*5d9d9091SRichard Lowe1:
1202*5d9d9091SRichard Lowe	tst	%o5
1203*5d9d9091SRichard Lowe	membar #Sync
1204*5d9d9091SRichard Lowe	bne,pn	%ncc, 3f
1205*5d9d9091SRichard Lowe	andncc	%o5, LOFAULT_SET, %o5
1206*5d9d9091SRichard Lowe2:
1207*5d9d9091SRichard Lowe	!
1208*5d9d9091SRichard Lowe	! Old handler was zero. Just return the error.
1209*5d9d9091SRichard Lowe	!
1210*5d9d9091SRichard Lowe	retl				! return
1211*5d9d9091SRichard Lowe	mov	%g1, %o0		! error code from %g1
1212*5d9d9091SRichard Lowe3:
1213*5d9d9091SRichard Lowe	!
1214*5d9d9091SRichard Lowe	! We're here because %o5 was non-zero. It was non-zero
1215*5d9d9091SRichard Lowe	! because either LOFAULT_SET was present, a previous fault
1216*5d9d9091SRichard Lowe	! handler was present or both. In all cases we need to reset
1217*5d9d9091SRichard Lowe	! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET
1218*5d9d9091SRichard Lowe	! before we either simply return the error or we invoke the
1219*5d9d9091SRichard Lowe	! previously specified handler.
1220*5d9d9091SRichard Lowe	!
1221*5d9d9091SRichard Lowe	be	%ncc, 2b
1222*5d9d9091SRichard Lowe	stn	%o5, [THREAD_REG + T_LOFAULT]
1223*5d9d9091SRichard Lowe	jmp	%o5			! goto real handler
1224*5d9d9091SRichard Lowe	  nop
1225*5d9d9091SRichard Lowe	SET_SIZE(kzero)
1226*5d9d9091SRichard Lowe	SET_SIZE(uzero)
1227*5d9d9091SRichard Lowe
1228*5d9d9091SRichard Lowe/*
1229*5d9d9091SRichard Lowe * Zero a block of storage.
1230*5d9d9091SRichard Lowe */
1231*5d9d9091SRichard Lowe
1232*5d9d9091SRichard Lowe	ENTRY(bzero)
1233*5d9d9091SRichard Lowe	wr	%g0, ASI_P, %asi
1234*5d9d9091SRichard Lowe
1235*5d9d9091SRichard Lowe	ldn	[THREAD_REG + T_LOFAULT], %o5	! save old vector
1236*5d9d9091SRichard Lowe	tst	%o5
1237*5d9d9091SRichard Lowe	bz,pt	%ncc, .do_zero
1238*5d9d9091SRichard Lowe	sethi	%hi(.zeroerr), %o2
1239*5d9d9091SRichard Lowe	or	%o2, %lo(.zeroerr), %o2
1240*5d9d9091SRichard Lowe	membar	#Sync				! sync error barrier
1241*5d9d9091SRichard Lowe	stn	%o2, [THREAD_REG + T_LOFAULT]	! install new vector
1242*5d9d9091SRichard Lowe
1243*5d9d9091SRichard Lowe.do_zero:
1244*5d9d9091SRichard Lowe	cmp	%o1, 7
1245*5d9d9091SRichard Lowe	blu,pn	%ncc, .byteclr
1246*5d9d9091SRichard Lowe	nop
1247*5d9d9091SRichard Lowe
1248*5d9d9091SRichard Lowe	cmp	%o1, 15
1249*5d9d9091SRichard Lowe	blu,pn	%ncc, .wdalign
1250*5d9d9091SRichard Lowe	nop
1251*5d9d9091SRichard Lowe
1252*5d9d9091SRichard Lowe	andcc	%o0, 7, %o3		! is add aligned on a 8 byte bound
1253*5d9d9091SRichard Lowe	bz,pt	%ncc, .blkalign		! already double aligned
1254*5d9d9091SRichard Lowe	sub	%o3, 8, %o3		! -(bytes till double aligned)
1255*5d9d9091SRichard Lowe	add	%o1, %o3, %o1		! update o1 with new count
1256*5d9d9091SRichard Lowe
1257*5d9d9091SRichard Lowe1:
1258*5d9d9091SRichard Lowe	stba	%g0, [%o0]%asi
1259*5d9d9091SRichard Lowe	inccc	%o3
1260*5d9d9091SRichard Lowe	bl,pt	%ncc, 1b
1261*5d9d9091SRichard Lowe	inc	%o0
1262*5d9d9091SRichard Lowe
1263*5d9d9091SRichard Lowe	! Now address is double aligned
1264*5d9d9091SRichard Lowe.blkalign:
1265*5d9d9091SRichard Lowe	cmp	%o1, 0x80		! check if there are 128 bytes to set
1266*5d9d9091SRichard Lowe	blu,pn	%ncc, .bzero_small
1267*5d9d9091SRichard Lowe	mov	%o1, %o3
1268*5d9d9091SRichard Lowe
1269*5d9d9091SRichard Lowe	andcc	%o0, 0x3f, %o3		! is block aligned?
1270*5d9d9091SRichard Lowe	bz,pt	%ncc, .bzero_blk
1271*5d9d9091SRichard Lowe	sub	%o3, 0x40, %o3		! -(bytes till block aligned)
1272*5d9d9091SRichard Lowe	add	%o1, %o3, %o1		! o1 is the remainder
1273*5d9d9091SRichard Lowe
1274*5d9d9091SRichard Lowe	! Clear -(%o3) bytes till block aligned
1275*5d9d9091SRichard Lowe1:
1276*5d9d9091SRichard Lowe	stxa	%g0, [%o0]%asi
1277*5d9d9091SRichard Lowe	addcc	%o3, 8, %o3
1278*5d9d9091SRichard Lowe	bl,pt	%ncc, 1b
1279*5d9d9091SRichard Lowe	add	%o0, 8, %o0
1280*5d9d9091SRichard Lowe
1281*5d9d9091SRichard Lowe.bzero_blk:
1282*5d9d9091SRichard Lowe	and	%o1, 0x3f, %o3		! calc bytes left after blk clear
1283*5d9d9091SRichard Lowe	andn	%o1, 0x3f, %o4		! calc size of blocks in bytes
1284*5d9d9091SRichard Lowe
1285*5d9d9091SRichard Lowe	cmp	%o4, 0x100		! 256 bytes or more
1286*5d9d9091SRichard Lowe	blu,pn	%ncc, 3f
1287*5d9d9091SRichard Lowe	nop
1288*5d9d9091SRichard Lowe
1289*5d9d9091SRichard Lowe2:
1290*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x0]%asi
1291*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x40]%asi
1292*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x80]%asi
1293*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0xc0]%asi
1294*5d9d9091SRichard Lowe
1295*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x8]%asi
1296*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x10]%asi
1297*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x18]%asi
1298*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x20]%asi
1299*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x28]%asi
1300*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x30]%asi
1301*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x38]%asi
1302*5d9d9091SRichard Lowe
1303*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x48]%asi
1304*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x50]%asi
1305*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x58]%asi
1306*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x60]%asi
1307*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x68]%asi
1308*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x70]%asi
1309*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x78]%asi
1310*5d9d9091SRichard Lowe
1311*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x88]%asi
1312*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x90]%asi
1313*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x98]%asi
1314*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0xa0]%asi
1315*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0xa8]%asi
1316*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0xb0]%asi
1317*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0xb8]%asi
1318*5d9d9091SRichard Lowe
1319*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0xc8]%asi
1320*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0xd0]%asi
1321*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0xd8]%asi
1322*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0xe0]%asi
1323*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0xe8]%asi
1324*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0xf0]%asi
1325*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0xf8]%asi
1326*5d9d9091SRichard Lowe
1327*5d9d9091SRichard Lowe	sub	%o4, 0x100, %o4
1328*5d9d9091SRichard Lowe	cmp	%o4, 0x100
1329*5d9d9091SRichard Lowe	bgu,pt	%ncc, 2b
1330*5d9d9091SRichard Lowe	add	%o0, 0x100, %o0
1331*5d9d9091SRichard Lowe
1332*5d9d9091SRichard Lowe3:
1333*5d9d9091SRichard Lowe	! ... check if 64 bytes to set
1334*5d9d9091SRichard Lowe	cmp	%o4, 0x40
1335*5d9d9091SRichard Lowe	blu	%ncc, .bzero_blk_done
1336*5d9d9091SRichard Lowe	nop
1337*5d9d9091SRichard Lowe
1338*5d9d9091SRichard Lowe4:
1339*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x0]%asi
1340*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x8]%asi
1341*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x10]%asi
1342*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x18]%asi
1343*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x20]%asi
1344*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x28]%asi
1345*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x30]%asi
1346*5d9d9091SRichard Lowe	stxa	%g0, [%o0+0x38]%asi
1347*5d9d9091SRichard Lowe
1348*5d9d9091SRichard Lowe	subcc	%o4, 0x40, %o4
1349*5d9d9091SRichard Lowe	bgu,pt	%ncc, 3b
1350*5d9d9091SRichard Lowe	add	%o0, 0x40, %o0
1351*5d9d9091SRichard Lowe
1352*5d9d9091SRichard Lowe.bzero_blk_done:
1353*5d9d9091SRichard Lowe	membar	#Sync
1354*5d9d9091SRichard Lowe
1355*5d9d9091SRichard Lowe.bzero_small:
1356*5d9d9091SRichard Lowe	! Set the remaining doubles
1357*5d9d9091SRichard Lowe	subcc	%o3, 8, %o3		! Can we store any doubles?
1358*5d9d9091SRichard Lowe	blu,pn	%ncc, .byteclr
1359*5d9d9091SRichard Lowe	and	%o1, 7, %o1		! calc bytes left after doubles
1360*5d9d9091SRichard Lowe
1361*5d9d9091SRichard Lowe.dbclr:
1362*5d9d9091SRichard Lowe	stxa	%g0, [%o0]%asi		! Clear the doubles
1363*5d9d9091SRichard Lowe	subcc	%o3, 8, %o3
1364*5d9d9091SRichard Lowe	bgeu,pt	%ncc, .dbclr
1365*5d9d9091SRichard Lowe	add	%o0, 8, %o0
1366*5d9d9091SRichard Lowe
1367*5d9d9091SRichard Lowe	ba	.byteclr
1368*5d9d9091SRichard Lowe	nop
1369*5d9d9091SRichard Lowe
1370*5d9d9091SRichard Lowe.wdalign:
1371*5d9d9091SRichard Lowe	andcc	%o0, 3, %o3		! is add aligned on a word boundary
1372*5d9d9091SRichard Lowe	bz,pn	%ncc, .wdclr
1373*5d9d9091SRichard Lowe	andn	%o1, 3, %o3		! create word sized count in %o3
1374*5d9d9091SRichard Lowe
1375*5d9d9091SRichard Lowe	dec	%o1			! decrement count
1376*5d9d9091SRichard Lowe	stba	%g0, [%o0]%asi		! clear a byte
1377*5d9d9091SRichard Lowe	ba	.wdalign
1378*5d9d9091SRichard Lowe	inc	%o0			! next byte
1379*5d9d9091SRichard Lowe
1380*5d9d9091SRichard Lowe.wdclr:
1381*5d9d9091SRichard Lowe	sta	%g0, [%o0]%asi		! 4-byte clearing loop
1382*5d9d9091SRichard Lowe	subcc	%o3, 4, %o3
1383*5d9d9091SRichard Lowe	bnz,pt	%ncc, .wdclr
1384*5d9d9091SRichard Lowe	inc	4, %o0
1385*5d9d9091SRichard Lowe
1386*5d9d9091SRichard Lowe	and	%o1, 3, %o1		! leftover count, if any
1387*5d9d9091SRichard Lowe
1388*5d9d9091SRichard Lowe.byteclr:
1389*5d9d9091SRichard Lowe	! Set the leftover bytes
1390*5d9d9091SRichard Lowe	brz	%o1, .bzero_exit
1391*5d9d9091SRichard Lowe	nop
1392*5d9d9091SRichard Lowe
1393*5d9d9091SRichard Lowe7:
1394*5d9d9091SRichard Lowe	deccc	%o1			! byte clearing loop
1395*5d9d9091SRichard Lowe	stba	%g0, [%o0]%asi
1396*5d9d9091SRichard Lowe	bgu,pt	%ncc, 7b
1397*5d9d9091SRichard Lowe	inc	%o0
1398*5d9d9091SRichard Lowe
1399*5d9d9091SRichard Lowe.bzero_exit:
1400*5d9d9091SRichard Lowe	!
1401*5d9d9091SRichard Lowe	! We're just concerned with whether t_lofault was set
1402*5d9d9091SRichard Lowe	! when we came in. We end up here from either kzero()
1403*5d9d9091SRichard Lowe	! or bzero(). kzero() *always* sets a lofault handler.
1404*5d9d9091SRichard Lowe	! It ors LOFAULT_SET into %o5 to indicate it has done
1405*5d9d9091SRichard Lowe	! this even if the value of %o5 is otherwise zero.
1406*5d9d9091SRichard Lowe	! bzero() sets a lofault handler *only* if one was
1407*5d9d9091SRichard Lowe	! previously set. Accordingly we need to examine
1408*5d9d9091SRichard Lowe	! %o5 and if it is non-zero be sure to clear LOFAULT_SET
1409*5d9d9091SRichard Lowe	! before resetting the error handler.
1410*5d9d9091SRichard Lowe	!
1411*5d9d9091SRichard Lowe	tst	%o5
1412*5d9d9091SRichard Lowe	bz	%ncc, 1f
1413*5d9d9091SRichard Lowe	andn	%o5, LOFAULT_SET, %o5
1414*5d9d9091SRichard Lowe	membar	#Sync				! sync error barrier
1415*5d9d9091SRichard Lowe	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
1416*5d9d9091SRichard Lowe1:
1417*5d9d9091SRichard Lowe	retl
1418*5d9d9091SRichard Lowe	clr	%o0			! return (0)
1419*5d9d9091SRichard Lowe
1420*5d9d9091SRichard Lowe	SET_SIZE(bzero)
1421