1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ppc64 ppc64le
6
7#include "textflag.h"
8
9// See memmove Go doc for important implementation constraints.
10
11// func memmove(to, from unsafe.Pointer, n uintptr)
12TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
13	MOVD	to+0(FP), R3
14	MOVD	from+8(FP), R4
15	MOVD	n+16(FP), R5
16
17	// Determine if there are doublewords to
18	// copy so a more efficient move can be done
19check:
20	ANDCC	$7, R5, R7	// R7: bytes to copy
21	SRD	$3, R5, R6	// R6: double words to copy
22	CMP	R6, $0, CR1	// CR1[EQ] set if no double words to copy
23
24	// Determine overlap by subtracting dest - src and comparing against the
25	// length.  The catches the cases where src and dest are in different types
26	// of storage such as stack and static to avoid doing backward move when not
27	// necessary.
28
29	SUB	R4, R3, R8	// dest - src
30	CMPU	R8, R5, CR2	// < len?
31	BC	12, 8, backward // BLT CR2 backward
32
33	// Copying forward if no overlap.
34
35	BC	12, 6, noforwardlarge	// "BEQ CR1, noforwardlarge"
36	SRDCC	$2,R6,R8		// 32 byte chunks?
37	BNE	forward32setup		//
38	MOVD	R6,CTR			// R6 = number of double words
39
40	// Move double words
41
42forward8:
43	MOVD    0(R4), R8		// double word
44	ADD     $8,R4
45	MOVD    R8, 0(R3)		//
46	ADD     $8,R3
47	BC      16, 0, forward8
48	BR	noforwardlarge		// handle remainder
49
50	// Prepare for moves of 32 bytes at a time.
51
52forward32setup:
53	DCBTST	(R3)			// prepare data cache
54	DCBT	(R4)
55	MOVD	R8, CTR			// double work count
56	MOVD	$16, R8
57
58forward32:
59	LXVD2X	(R4+R0), VS32		// load 16 bytes
60	LXVD2X	(R4+R8), VS33
61	ADD	$32, R4
62	STXVD2X	VS32, (R3+R0)		// store 16 bytes
63	STXVD2X	VS33, (R3+R8)
64	ADD	$32,R3			// bump up for next set
65	BC	16, 0, forward32	// continue
66	RLDCLCC	$61,R5,$3,R6		// remaining doublewords
67	BEQ	noforwardlarge
68	MOVD	R6,CTR			// set up the CTR
69	BR	forward8
70
71noforwardlarge:
72	CMP	R7,$0			// any remaining bytes
73	BC	4, 1, LR		// ble lr
74
75forwardtail:
76	MOVD	R7, CTR			// move tail bytes
77
78forwardtailloop:
79	MOVBZ	0(R4), R8		// move single bytes
80	ADD	$1,R4
81	MOVBZ	R8, 0(R3)
82	ADD	$1,R3
83	BC	16, 0, forwardtailloop
84	RET
85
86backward:
87	// Copying backwards proceeds by copying R7 bytes then copying R6 double words.
88	// R3 and R4 are advanced to the end of the destination/source buffers
89	// respectively and moved back as we copy.
90
91	ADD	R5, R4, R4		// end of source
92	ADD	R3, R5, R3		// end of dest
93
94	BEQ	nobackwardtail		// earlier condition
95
96	MOVD	R7, CTR			// bytes to move
97
98backwardtailloop:
99	MOVBZ 	-1(R4), R8		// point to last byte
100	SUB	$1,R4
101	MOVBZ 	R8, -1(R3)
102	SUB	$1,R3
103	BC	16, 0, backwardtailloop // bndz
104
105nobackwardtail:
106	BC	4, 5, LR		// ble CR1 lr
107
108backwardlarge:
109	MOVD	R6, CTR
110	SUB	R3, R4, R9		// Use vsx if moving
111	CMP	R9, $32			// at least 32 byte chunks
112	BLT	backwardlargeloop	// and distance >= 32
113	SRDCC	$2,R6,R8		// 32 byte chunks
114	BNE	backward32setup
115
116backwardlargeloop:
117	MOVD 	-8(R4), R8
118	SUB	$8,R4
119	MOVD 	R8, -8(R3)
120	SUB	$8,R3
121	BC	16, 0, backwardlargeloop // bndz
122	RET
123
124backward32setup:
125	MOVD	R8, CTR			// set up loop ctr
126	MOVD	$16, R8			// 32 bytes at at time
127
128backward32loop:
129	SUB	$32, R4
130	SUB	$32, R3
131	LXVD2X	(R4+R0), VS32           // load 16 bytes
132	LXVD2X	(R4+R8), VS33
133	STXVD2X	VS32, (R3+R0)           // store 16 bytes
134	STXVD2X	VS33, (R3+R8)
135	BC      16, 0, backward32loop   // bndz
136	BC	4, 5, LR		// ble CR1 lr
137	MOVD	R6, CTR
138	BR	backwardlargeloop
139