1// Inferno's libkern/memmove-arm.s 2// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-arm.s 3// 4// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6// Portions Copyright 2009 The Go Authors. All rights reserved. 7// 8// Permission is hereby granted, free of charge, to any person obtaining a copy 9// of this software and associated documentation files (the "Software"), to deal 10// in the Software without restriction, including without limitation the rights 11// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12// copies of the Software, and to permit persons to whom the Software is 13// furnished to do so, subject to the following conditions: 14// 15// The above copyright notice and this permission notice shall be included in 16// all copies or substantial portions of the Software. 17// 18// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24// THE SOFTWARE. 25 26#include "textflag.h" 27 28// TE or TS are spilled to the stack during bulk register moves. 29#define TS R0 30#define TE R8 31 32// Warning: the linker will use R11 to synthesize certain instructions. Please 33// take care and double check with objdump. 34#define FROM R11 35#define N R12 36#define TMP R12 /* N and TMP don't overlap */ 37#define TMP1 R5 38 39#define RSHIFT R5 40#define LSHIFT R6 41#define OFFSET R7 42 43#define BR0 R0 /* shared with TS */ 44#define BW0 R1 45#define BR1 R1 46#define BW1 R2 47#define BR2 R2 48#define BW2 R3 49#define BR3 R3 50#define BW3 R4 51 52#define FW0 R1 53#define FR0 R2 54#define FW1 R2 55#define FR1 R3 56#define FW2 R3 57#define FR2 R4 58#define FW3 R4 59#define FR3 R8 /* shared with TE */ 60 61// See memmove Go doc for important implementation constraints. 62 63// func memmove(to, from unsafe.Pointer, n uintptr) 64TEXT runtime·memmove(SB), NOSPLIT, $4-12 65_memmove: 66 MOVW to+0(FP), TS 67 MOVW from+4(FP), FROM 68 MOVW n+8(FP), N 69 70 ADD N, TS, TE /* to end pointer */ 71 72 CMP FROM, TS 73 BLS _forward 74 75_back: 76 ADD N, FROM /* from end pointer */ 77 CMP $4, N /* need at least 4 bytes to copy */ 78 BLT _b1tail 79 80_b4align: /* align destination on 4 */ 81 AND.S $3, TE, TMP 82 BEQ _b4aligned 83 84 MOVBU.W -1(FROM), TMP /* pre-indexed */ 85 MOVBU.W TMP, -1(TE) /* pre-indexed */ 86 B _b4align 87 88_b4aligned: /* is source now aligned? */ 89 AND.S $3, FROM, TMP 90 BNE _bunaligned 91 92 ADD $31, TS, TMP /* do 32-byte chunks if possible */ 93 MOVW TS, savedts-4(SP) 94_b32loop: 95 CMP TMP, TE 96 BLS _b4tail 97 98 MOVM.DB.W (FROM), [R0-R7] 99 MOVM.DB.W [R0-R7], (TE) 100 B _b32loop 101 102_b4tail: /* do remaining words if possible */ 103 MOVW savedts-4(SP), TS 104 ADD $3, TS, TMP 105_b4loop: 106 CMP TMP, TE 107 BLS _b1tail 108 109 MOVW.W -4(FROM), TMP1 /* pre-indexed */ 110 MOVW.W TMP1, -4(TE) /* pre-indexed */ 111 B _b4loop 112 113_b1tail: /* remaining bytes */ 114 CMP TE, TS 115 BEQ _return 116 117 MOVBU.W -1(FROM), TMP /* pre-indexed */ 118 MOVBU.W TMP, -1(TE) /* pre-indexed */ 119 B _b1tail 120 121_forward: 122 CMP $4, N /* need at least 4 bytes to copy */ 123 BLT _f1tail 124 125_f4align: /* align destination on 4 */ 126 AND.S $3, TS, TMP 127 BEQ _f4aligned 128 129 MOVBU.P 1(FROM), TMP /* implicit write back */ 130 MOVBU.P TMP, 1(TS) /* implicit write back */ 131 B _f4align 132 133_f4aligned: /* is source now aligned? */ 134 AND.S $3, FROM, TMP 135 BNE _funaligned 136 137 SUB $31, TE, TMP /* do 32-byte chunks if possible */ 138 MOVW TE, savedte-4(SP) 139_f32loop: 140 CMP TMP, TS 141 BHS _f4tail 142 143 MOVM.IA.W (FROM), [R1-R8] 144 MOVM.IA.W [R1-R8], (TS) 145 B _f32loop 146 147_f4tail: 148 MOVW savedte-4(SP), TE 149 SUB $3, TE, TMP /* do remaining words if possible */ 150_f4loop: 151 CMP TMP, TS 152 BHS _f1tail 153 154 MOVW.P 4(FROM), TMP1 /* implicit write back */ 155 MOVW.P TMP1, 4(TS) /* implicit write back */ 156 B _f4loop 157 158_f1tail: 159 CMP TS, TE 160 BEQ _return 161 162 MOVBU.P 1(FROM), TMP /* implicit write back */ 163 MOVBU.P TMP, 1(TS) /* implicit write back */ 164 B _f1tail 165 166_return: 167 MOVW to+0(FP), R0 168 RET 169 170_bunaligned: 171 CMP $2, TMP /* is TMP < 2 ? */ 172 173 MOVW.LT $8, RSHIFT /* (R(n)<<24)|(R(n-1)>>8) */ 174 MOVW.LT $24, LSHIFT 175 MOVW.LT $1, OFFSET 176 177 MOVW.EQ $16, RSHIFT /* (R(n)<<16)|(R(n-1)>>16) */ 178 MOVW.EQ $16, LSHIFT 179 MOVW.EQ $2, OFFSET 180 181 MOVW.GT $24, RSHIFT /* (R(n)<<8)|(R(n-1)>>24) */ 182 MOVW.GT $8, LSHIFT 183 MOVW.GT $3, OFFSET 184 185 ADD $16, TS, TMP /* do 16-byte chunks if possible */ 186 CMP TMP, TE 187 BLS _b1tail 188 189 BIC $3, FROM /* align source */ 190 MOVW TS, savedts-4(SP) 191 MOVW (FROM), BR0 /* prime first block register */ 192 193_bu16loop: 194 CMP TMP, TE 195 BLS _bu1tail 196 197 MOVW BR0<<LSHIFT, BW3 198 MOVM.DB.W (FROM), [BR0-BR3] 199 ORR BR3>>RSHIFT, BW3 200 201 MOVW BR3<<LSHIFT, BW2 202 ORR BR2>>RSHIFT, BW2 203 204 MOVW BR2<<LSHIFT, BW1 205 ORR BR1>>RSHIFT, BW1 206 207 MOVW BR1<<LSHIFT, BW0 208 ORR BR0>>RSHIFT, BW0 209 210 MOVM.DB.W [BW0-BW3], (TE) 211 B _bu16loop 212 213_bu1tail: 214 MOVW savedts-4(SP), TS 215 ADD OFFSET, FROM 216 B _b1tail 217 218_funaligned: 219 CMP $2, TMP 220 221 MOVW.LT $8, RSHIFT /* (R(n+1)<<24)|(R(n)>>8) */ 222 MOVW.LT $24, LSHIFT 223 MOVW.LT $3, OFFSET 224 225 MOVW.EQ $16, RSHIFT /* (R(n+1)<<16)|(R(n)>>16) */ 226 MOVW.EQ $16, LSHIFT 227 MOVW.EQ $2, OFFSET 228 229 MOVW.GT $24, RSHIFT /* (R(n+1)<<8)|(R(n)>>24) */ 230 MOVW.GT $8, LSHIFT 231 MOVW.GT $1, OFFSET 232 233 SUB $16, TE, TMP /* do 16-byte chunks if possible */ 234 CMP TMP, TS 235 BHS _f1tail 236 237 BIC $3, FROM /* align source */ 238 MOVW TE, savedte-4(SP) 239 MOVW.P 4(FROM), FR3 /* prime last block register, implicit write back */ 240 241_fu16loop: 242 CMP TMP, TS 243 BHS _fu1tail 244 245 MOVW FR3>>RSHIFT, FW0 246 MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3] 247 ORR FR0<<LSHIFT, FW0 248 249 MOVW FR0>>RSHIFT, FW1 250 ORR FR1<<LSHIFT, FW1 251 252 MOVW FR1>>RSHIFT, FW2 253 ORR FR2<<LSHIFT, FW2 254 255 MOVW FR2>>RSHIFT, FW3 256 ORR FR3<<LSHIFT, FW3 257 258 MOVM.IA.W [FW0,FW1,FW2,FW3], (TS) 259 B _fu16loop 260 261_fu1tail: 262 MOVW savedte-4(SP), TE 263 SUB OFFSET, FROM 264 B _f1tail 265