1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "textflag.h"
6
7// See memmove Go doc for important implementation constraints.
8
9// func memmove(to, from unsafe.Pointer, n uintptr)
10TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
11	MOVD	to+0(FP), R3
12	MOVD	from+8(FP), R4
13	MOVD	n+16(FP), R5
14	CBNZ	R5, check
15	RET
16
17check:
18	CMP	$16, R5
19	BLE	copy16
20
21	AND	$~31, R5, R7	// R7 is N&~31
22	SUB	R7, R5, R6	// R6 is N&31
23
24	CMP	R3, R4
25	BLT	backward
26
27	// Copying forward proceeds by copying R7/32 quadwords then R6 <= 31 tail bytes.
28	// R3 and R4 are advanced as we copy.
29
30	// (There may be implementations of armv8 where copying by bytes until
31	// at least one of source or dest is word aligned is a worthwhile
32	// optimization, but the on the one tested so far (xgene) it did not
33	// make a significance difference.)
34
35	CBZ	R7, noforwardlarge	// Do we need to do any quadword copying?
36
37	ADD	R3, R7, R9	// R9 points just past where we copy by word
38
39forwardlargeloop:
40	// Copy 32 bytes at a time.
41	LDP.P	32(R4), (R8, R10)
42	STP.P	(R8, R10), 32(R3)
43	LDP	-16(R4), (R11, R12)
44	STP	(R11, R12), -16(R3)
45	SUB 	$32, R7, R7
46	CBNZ	R7, forwardlargeloop
47
48noforwardlarge:
49	CBNZ	R6, forwardtail		// Do we need to copy any tail bytes?
50	RET
51
52forwardtail:
53	// There are R6 <= 31 bytes remaining to copy.
54	// This is large enough to still contain pointers,
55	// which must be copied atomically.
56	// Copy the next 16 bytes, then 8 bytes, then any remaining bytes.
57	TBZ	$4, R6, 3(PC)	// write 16 bytes if R6&16 != 0
58	LDP.P	16(R4), (R8, R10)
59	STP.P	(R8, R10), 16(R3)
60
61	TBZ	$3, R6, 3(PC)	// write 8 bytes if R6&8 != 0
62	MOVD.P	8(R4), R8
63	MOVD.P	R8, 8(R3)
64
65	AND	$7, R6
66	CBNZ	R6, 2(PC)
67	RET
68
69	ADD	R3, R6, R9	// R9 points just past the destination memory
70
71forwardtailloop:
72	MOVBU.P 1(R4), R8
73	MOVBU.P	R8, 1(R3)
74	CMP	R3, R9
75	BNE	forwardtailloop
76	RET
77
78	// Small copies: 1..16 bytes.
79copy16:
80	ADD	R4, R5, R8	// R8 points just past the last source byte
81	ADD	R3, R5, R9	// R9 points just past the last destination byte
82	CMP	$8, R5
83	BLT	copy7
84	MOVD	(R4), R6
85	MOVD	-8(R8), R7
86	MOVD	R6, (R3)
87	MOVD	R7, -8(R9)
88	RET
89
90copy7:
91	TBZ	$2, R5, copy3
92	MOVWU	(R4), R6
93	MOVWU	-4(R8), R7
94	MOVW	R6, (R3)
95	MOVW	R7, -4(R9)
96	RET
97
98copy3:
99	TBZ	$1, R5, copy1
100	MOVHU	(R4), R6
101	MOVHU	-2(R8), R7
102	MOVH	R6, (R3)
103	MOVH	R7, -2(R9)
104	RET
105
106copy1:
107	MOVBU	(R4), R6
108	MOVB	R6, (R3)
109	RET
110
111backward:
112	// Copying backwards first copies R6 <= 31 tail bytes, then R7/32 quadwords.
113	// R3 and R4 are advanced to the end of the destination/source buffers
114	// respectively and moved back as we copy.
115
116	ADD	R4, R5, R4	// R4 points just past the last source byte
117	ADD	R3, R5, R3	// R3 points just past the last destination byte
118
119	CBZ	R6, nobackwardtail	// Do we need to do any byte-by-byte copying?
120
121	AND	$7, R6, R12
122	CBZ	R12, backwardtaillarge
123
124	SUB	R12, R3, R9	// R9 points at the lowest destination byte that should be copied by byte.
125backwardtailloop:
126	// Copy sub-pointer-size tail.
127	MOVBU.W	-1(R4), R8
128	MOVBU.W	R8, -1(R3)
129	CMP	R9, R3
130	BNE	backwardtailloop
131
132backwardtaillarge:
133	// Do 8/16-byte write if possible.
134	// See comment at forwardtail.
135	TBZ	$3, R6, 3(PC)
136	MOVD.W	-8(R4), R8
137	MOVD.W	R8, -8(R3)
138
139	TBZ	$4, R6, 3(PC)
140	LDP.W	-16(R4), (R8, R10)
141	STP.W	(R8, R10), -16(R3)
142
143nobackwardtail:
144	CBNZ     R7, backwardlarge	// Do we need to do any doubleword-by-doubleword copying?
145	RET
146
147backwardlarge:
148	SUB	R7, R3, R9	// R9 points at the lowest destination byte
149
150backwardlargeloop:
151	LDP	-16(R4), (R8, R10)
152	STP	(R8, R10), -16(R3)
153	LDP.W	-32(R4), (R11, R12)
154	STP.W	(R11, R12), -32(R3)
155	CMP	R9, R3
156	BNE	backwardlargeloop
157	RET
158