1/* $OpenBSD: bcopy.S,v 1.8 2015/08/31 02:53:56 guenther Exp $ */ 2/* $NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $ */ 3 4/* 5 * Copyright (c) 1995 Carnegie-Mellon University. 6 * All rights reserved. 7 * 8 * Author: Trevor Blackwell. Support for use as memcpy() and memmove() 9 * added by Chris Demetriou. 10 * 11 * Permission to use, copy, modify and distribute this software and 12 * its documentation is hereby granted, provided that both the copyright 13 * notice and this permission notice appear in all copies of the 14 * software, derivative works or modified versions, and any portions 15 * thereof, and that both notices appear in supporting documentation. 16 * 17 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 18 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 19 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 20 * 21 * Carnegie Mellon requests users of this software to return to 22 * 23 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 24 * School of Computer Science 25 * Carnegie Mellon University 26 * Pittsburgh PA 15213-3890 27 * 28 * any improvements or extensions that they make and grant Carnegie the 29 * rights to redistribute these changes. 30 */ 31 32#include "SYS.h" 33 34#define SRCREG a0 35#define DSTREG a1 36#define SIZEREG a2 37 38/* 39 * Copy bytes. 40 * 41 * void bcopy(char *from, char *to, size_t len); 42 * 43 * No matter how invoked, the source and destination registers 44 * for calculation. There's no point in copying them to "working" 45 * registers, since the code uses their values "in place," and 46 * copying them would be slower. 47 */ 48 49LEAF(bcopy,3) 50 /* Check for zero length */ 51 beq SIZEREG,bcopy_done 52 53 /* Check for overlap */ 54 subq DSTREG,SRCREG,t5 55 cmpult t5,SIZEREG,t5 56 bne t5,bcopy_overlap 57 58 /* a3 = end address */ 59 addq SRCREG,SIZEREG,a3 60 61 /* Get the first word */ 62 ldq_u t2,0(SRCREG) 63 64 /* Do they have the same alignment? */ 65 xor SRCREG,DSTREG,t0 66 and t0,7,t0 67 and DSTREG,7,t1 68 bne t0,bcopy_different_alignment 69 70 /* src & dst have same alignment */ 71 beq t1,bcopy_all_aligned 72 73 ldq_u t3,0(DSTREG) 74 addq SIZEREG,t1,SIZEREG 75 mskqh t2,SRCREG,t2 76 mskql t3,SRCREG,t3 77 or t2,t3,t2 78 79 /* Dst is 8-byte aligned */ 80 81bcopy_all_aligned: 82 /* If less than 8 bytes,skip loop */ 83 subq SIZEREG,1,t0 84 and SIZEREG,7,SIZEREG 85 bic t0,7,t0 86 beq t0,bcopy_samealign_lp_end 87 88bcopy_samealign_lp: 89 stq_u t2,0(DSTREG) 90 addq DSTREG,8,DSTREG 91 ldq_u t2,8(SRCREG) 92 subq t0,8,t0 93 addq SRCREG,8,SRCREG 94 bne t0,bcopy_samealign_lp 95 96bcopy_samealign_lp_end: 97 /* If we're done, exit */ 98 bne SIZEREG,bcopy_small_left 99 stq_u t2,0(DSTREG) 100 RET 101 102bcopy_small_left: 103 mskql t2,SIZEREG,t4 104 ldq_u t3,0(DSTREG) 105 mskqh t3,SIZEREG,t3 106 or t4,t3,t4 107 stq_u t4,0(DSTREG) 108 RET 109 110bcopy_different_alignment: 111 /* 112 * this is the fun part 113 */ 114 addq SRCREG,SIZEREG,a3 115 cmpule SIZEREG,8,t0 116 bne t0,bcopy_da_finish 117 118 beq t1,bcopy_da_noentry 119 120 /* Do the initial partial word */ 121 subq zero,DSTREG,t0 122 and t0,7,t0 123 ldq_u t3,7(SRCREG) 124 extql t2,SRCREG,t2 125 extqh t3,SRCREG,t3 126 or t2,t3,t5 127 insql t5,DSTREG,t5 128 ldq_u t6,0(DSTREG) 129 mskql t6,DSTREG,t6 130 or t5,t6,t5 131 stq_u t5,0(DSTREG) 132 addq SRCREG,t0,SRCREG 133 addq DSTREG,t0,DSTREG 134 subq SIZEREG,t0,SIZEREG 135 ldq_u t2,0(SRCREG) 136 137bcopy_da_noentry: 138 subq SIZEREG,1,t0 139 bic t0,7,t0 140 and SIZEREG,7,SIZEREG 141 beq t0,bcopy_da_finish2 142 143bcopy_da_lp: 144 ldq_u t3,7(SRCREG) 145 addq SRCREG,8,SRCREG 146 extql t2,SRCREG,t4 147 extqh t3,SRCREG,t5 148 subq t0,8,t0 149 or t4,t5,t5 150 stq t5,0(DSTREG) 151 addq DSTREG,8,DSTREG 152 beq t0,bcopy_da_finish1 153 ldq_u t2,7(SRCREG) 154 addq SRCREG,8,SRCREG 155 extql t3,SRCREG,t4 156 extqh t2,SRCREG,t5 157 subq t0,8,t0 158 or t4,t5,t5 159 stq t5,0(DSTREG) 160 addq DSTREG,8,DSTREG 161 bne t0,bcopy_da_lp 162 163bcopy_da_finish2: 164 /* Do the last new word */ 165 mov t2,t3 166 167bcopy_da_finish1: 168 /* Do the last partial word */ 169 ldq_u t2,-1(a3) 170 extql t3,SRCREG,t3 171 extqh t2,SRCREG,t2 172 or t2,t3,t2 173 br zero,bcopy_samealign_lp_end 174 175bcopy_da_finish: 176 /* Do the last word in the next source word */ 177 ldq_u t3,-1(a3) 178 extql t2,SRCREG,t2 179 extqh t3,SRCREG,t3 180 or t2,t3,t2 181 insqh t2,DSTREG,t3 182 insql t2,DSTREG,t2 183 lda t4,-1(zero) 184 mskql t4,SIZEREG,t5 185 cmovne t5,t5,t4 186 insqh t4,DSTREG,t5 187 insql t4,DSTREG,t4 188 addq DSTREG,SIZEREG,a4 189 ldq_u t6,0(DSTREG) 190 ldq_u t7,-1(a4) 191 bic t6,t4,t6 192 bic t7,t5,t7 193 and t2,t4,t2 194 and t3,t5,t3 195 or t2,t6,t2 196 or t3,t7,t3 197 stq_u t3,-1(a4) 198 stq_u t2,0(DSTREG) 199 RET 200 201bcopy_overlap: 202 /* 203 * Basically equivalent to previous case, only backwards. 204 * Not quite as highly optimized 205 */ 206 addq SRCREG,SIZEREG,a3 207 addq DSTREG,SIZEREG,a4 208 209 /* less than 8 bytes - don't worry about overlap */ 210 cmpule SIZEREG,8,t0 211 bne t0,bcopy_ov_short 212 213 /* Possibly do a partial first word */ 214 and a4,7,t4 215 beq t4,bcopy_ov_nostart2 216 subq a3,t4,a3 217 subq a4,t4,a4 218 ldq_u t1,0(a3) 219 subq SIZEREG,t4,SIZEREG 220 ldq_u t2,7(a3) 221 ldq t3,0(a4) 222 extql t1,a3,t1 223 extqh t2,a3,t2 224 or t1,t2,t1 225 mskqh t3,t4,t3 226 mskql t1,t4,t1 227 or t1,t3,t1 228 stq t1,0(a4) 229 230bcopy_ov_nostart2: 231 bic SIZEREG,7,t4 232 and SIZEREG,7,SIZEREG 233 beq t4,bcopy_ov_lp_end 234 235bcopy_ov_lp: 236 /* This could be more pipelined, but it doesn't seem worth it */ 237 ldq_u t0,-8(a3) 238 subq a4,8,a4 239 ldq_u t1,-1(a3) 240 subq a3,8,a3 241 extql t0,a3,t0 242 extqh t1,a3,t1 243 subq t4,8,t4 244 or t0,t1,t0 245 stq t0,0(a4) 246 bne t4,bcopy_ov_lp 247 248bcopy_ov_lp_end: 249 beq SIZEREG,bcopy_done 250 251 ldq_u t0,0(SRCREG) 252 ldq_u t1,7(SRCREG) 253 ldq_u t2,0(DSTREG) 254 extql t0,SRCREG,t0 255 extqh t1,SRCREG,t1 256 or t0,t1,t0 257 insql t0,DSTREG,t0 258 mskql t2,DSTREG,t2 259 or t2,t0,t2 260 stq_u t2,0(DSTREG) 261 262bcopy_done: 263 RET 264 265bcopy_ov_short: 266 ldq_u t2,0(SRCREG) 267 br zero,bcopy_da_finish 268 269 END_WEAK(bcopy) 270