1// Copyright 2014 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5#include "textflag.h" 6 7// See memmove Go doc for important implementation constraints. 8 9// func memmove(to, from unsafe.Pointer, n uintptr) 10TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24 11 MOVD to+0(FP), R3 12 MOVD from+8(FP), R4 13 MOVD n+16(FP), R5 14 CBNZ R5, check 15 RET 16 17check: 18 CMP $16, R5 19 BLE copy16 20 21 AND $~31, R5, R7 // R7 is N&~31 22 SUB R7, R5, R6 // R6 is N&31 23 24 CMP R3, R4 25 BLT backward 26 27 // Copying forward proceeds by copying R7/32 quadwords then R6 <= 31 tail bytes. 28 // R3 and R4 are advanced as we copy. 29 30 // (There may be implementations of armv8 where copying by bytes until 31 // at least one of source or dest is word aligned is a worthwhile 32 // optimization, but the on the one tested so far (xgene) it did not 33 // make a significance difference.) 34 35 CBZ R7, noforwardlarge // Do we need to do any quadword copying? 36 37 ADD R3, R7, R9 // R9 points just past where we copy by word 38 39forwardlargeloop: 40 // Copy 32 bytes at a time. 41 LDP.P 32(R4), (R8, R10) 42 STP.P (R8, R10), 32(R3) 43 LDP -16(R4), (R11, R12) 44 STP (R11, R12), -16(R3) 45 SUB $32, R7, R7 46 CBNZ R7, forwardlargeloop 47 48noforwardlarge: 49 CBNZ R6, forwardtail // Do we need to copy any tail bytes? 50 RET 51 52forwardtail: 53 // There are R6 <= 31 bytes remaining to copy. 54 // This is large enough to still contain pointers, 55 // which must be copied atomically. 56 // Copy the next 16 bytes, then 8 bytes, then any remaining bytes. 57 TBZ $4, R6, 3(PC) // write 16 bytes if R6&16 != 0 58 LDP.P 16(R4), (R8, R10) 59 STP.P (R8, R10), 16(R3) 60 61 TBZ $3, R6, 3(PC) // write 8 bytes if R6&8 != 0 62 MOVD.P 8(R4), R8 63 MOVD.P R8, 8(R3) 64 65 AND $7, R6 66 CBNZ R6, 2(PC) 67 RET 68 69 ADD R3, R6, R9 // R9 points just past the destination memory 70 71forwardtailloop: 72 MOVBU.P 1(R4), R8 73 MOVBU.P R8, 1(R3) 74 CMP R3, R9 75 BNE forwardtailloop 76 RET 77 78 // Small copies: 1..16 bytes. 79copy16: 80 ADD R4, R5, R8 // R8 points just past the last source byte 81 ADD R3, R5, R9 // R9 points just past the last destination byte 82 CMP $8, R5 83 BLT copy7 84 MOVD (R4), R6 85 MOVD -8(R8), R7 86 MOVD R6, (R3) 87 MOVD R7, -8(R9) 88 RET 89 90copy7: 91 TBZ $2, R5, copy3 92 MOVWU (R4), R6 93 MOVWU -4(R8), R7 94 MOVW R6, (R3) 95 MOVW R7, -4(R9) 96 RET 97 98copy3: 99 TBZ $1, R5, copy1 100 MOVHU (R4), R6 101 MOVHU -2(R8), R7 102 MOVH R6, (R3) 103 MOVH R7, -2(R9) 104 RET 105 106copy1: 107 MOVBU (R4), R6 108 MOVB R6, (R3) 109 RET 110 111backward: 112 // Copying backwards first copies R6 <= 31 tail bytes, then R7/32 quadwords. 113 // R3 and R4 are advanced to the end of the destination/source buffers 114 // respectively and moved back as we copy. 115 116 ADD R4, R5, R4 // R4 points just past the last source byte 117 ADD R3, R5, R3 // R3 points just past the last destination byte 118 119 CBZ R6, nobackwardtail // Do we need to do any byte-by-byte copying? 120 121 AND $7, R6, R12 122 CBZ R12, backwardtaillarge 123 124 SUB R12, R3, R9 // R9 points at the lowest destination byte that should be copied by byte. 125backwardtailloop: 126 // Copy sub-pointer-size tail. 127 MOVBU.W -1(R4), R8 128 MOVBU.W R8, -1(R3) 129 CMP R9, R3 130 BNE backwardtailloop 131 132backwardtaillarge: 133 // Do 8/16-byte write if possible. 134 // See comment at forwardtail. 135 TBZ $3, R6, 3(PC) 136 MOVD.W -8(R4), R8 137 MOVD.W R8, -8(R3) 138 139 TBZ $4, R6, 3(PC) 140 LDP.W -16(R4), (R8, R10) 141 STP.W (R8, R10), -16(R3) 142 143nobackwardtail: 144 CBNZ R7, backwardlarge // Do we need to do any doubleword-by-doubleword copying? 145 RET 146 147backwardlarge: 148 SUB R7, R3, R9 // R9 points at the lowest destination byte 149 150backwardlargeloop: 151 LDP -16(R4), (R8, R10) 152 STP (R8, R10), -16(R3) 153 LDP.W -32(R4), (R11, R12) 154 STP.W (R11, R12), -32(R3) 155 CMP R9, R3 156 BNE backwardlargeloop 157 RET 158