1/* $NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $ */ 2 3/* 4 * Mach Operating System 5 * Copyright (c) 1993 Carnegie Mellon University 6 * All Rights Reserved. 7 * 8 * Permission to use, copy, modify and distribute this software and its 9 * documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie Mellon 26 * the rights to redistribute these changes. 27 */ 28 29/* 30 * File: mips_bcopy.s 31 * Author: Chris Maeda 32 * Date: June 1993 33 * 34 * Fast copy routine. Derived from aligned_block_copy. 35 */ 36 37 38#include <mips/asm.h> 39#ifndef _LOCORE 40#define _LOCORE /* XXX not really, just assembly-code source */ 41#endif 42#include <machine/endian.h> 43 44 45#if defined(LIBC_SCCS) && !defined(lint) 46#if 0 47 RCSID("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") 48#else 49 RCSID("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $") 50#endif 51#endif /* LIBC_SCCS and not lint */ 52 53/* 54 * bcopy(caddr_t src, caddr_t dst, unsigned int len) 55 * 56 * a0 src address 57 * a1 dst address 58 * a2 length 59 */ 60 61#if defined(MEMCOPY) || defined(MEMMOVE) 62#ifdef MEMCOPY 63#define FUNCTION memcpy 64#else 65#define FUNCTION memmove 66#endif 67#define SRCREG a1 68#define DSTREG a0 69#else 70#define FUNCTION bcopy 71#define SRCREG a0 72#define DSTREG a1 73#endif 74 75#define SIZEREG a2 76 77LEAF(FUNCTION) 78 .set noat 79 .set noreorder 80 81#if defined(MEMCOPY) || defined(MEMMOVE) 82 /* set up return value, while we still can */ 83 move v0,DSTREG 84#endif 85 /* 86 * Make sure we can copy forwards. 87 */ 88 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG 89 bne t0,zero,6f # copy backwards 90 91 /* 92 * There are four alignment cases (with frequency) 93 * (Based on measurements taken with a DECstation 5000/200 94 * inside a Mach kernel.) 95 * 96 * aligned -> aligned (mostly) 97 * unaligned -> aligned (sometimes) 98 * aligned,unaligned -> unaligned (almost never) 99 * 100 * Note that we could add another case that checks if 101 * the destination and source are unaligned but the 102 * copy is alignable. eg if src and dest are both 103 * on a halfword boundary. 104 */ 105 andi t1,DSTREG,(SZREG-1) # get last bits of dest 106 bne t1,zero,3f # dest unaligned 107 andi t0,SRCREG,(SZREG-1) # get last bits of src 108 bne t0,zero,5f 109 110 /* 111 * Forward aligned->aligned copy, 8 words at a time. 112 */ 11398: 114 li AT,-(SZREG*8) 115 and t0,SIZEREG,AT # count truncated to multiples 116 PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr 117 sltu AT,SRCREG,a3 # any work to do? 118 beq AT,zero,2f 119 PTR_SUBU SIZEREG,t0 120 121 /* 122 * loop body 123 */ 1241: # cp 125 REG_L t3,(0*SZREG)(SRCREG) 126 REG_L v1,(1*SZREG)(SRCREG) 127 REG_L t0,(2*SZREG)(SRCREG) 128 REG_L t1,(3*SZREG)(SRCREG) 129 PTR_ADDU SRCREG,SZREG*8 130 REG_S t3,(0*SZREG)(DSTREG) 131 REG_S v1,(1*SZREG)(DSTREG) 132 REG_S t0,(2*SZREG)(DSTREG) 133 REG_S t1,(3*SZREG)(DSTREG) 134 REG_L t1,(-1*SZREG)(SRCREG) 135 REG_L t0,(-2*SZREG)(SRCREG) 136 REG_L v1,(-3*SZREG)(SRCREG) 137 REG_L t3,(-4*SZREG)(SRCREG) 138 PTR_ADDU DSTREG,SZREG*8 139 REG_S t1,(-1*SZREG)(DSTREG) 140 REG_S t0,(-2*SZREG)(DSTREG) 141 REG_S v1,(-3*SZREG)(DSTREG) 142 bne SRCREG,a3,1b 143 REG_S t3,(-4*SZREG)(DSTREG) 144 145 /* 146 * Copy a word at a time, no loop unrolling. 147 */ 1482: # wordcopy 149 andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG 150 PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG 151 beq t2,zero,3f 152 PTR_ADDU t0,SRCREG,t2 # stop at t0 153 PTR_SUBU SIZEREG,SIZEREG,t2 1541: 155 REG_L t3,0(SRCREG) 156 PTR_ADDU SRCREG,SZREG 157 REG_S t3,0(DSTREG) 158 bne SRCREG,t0,1b 159 PTR_ADDU DSTREG,SZREG 160 1613: # bytecopy 162 beq SIZEREG,zero,4f # nothing left to do? 163 nop 1641: 165 lb t3,0(SRCREG) 166 PTR_ADDU SRCREG,1 167 sb t3,0(DSTREG) 168 PTR_SUBU SIZEREG,1 169 bgtz SIZEREG,1b 170 PTR_ADDU DSTREG,1 171 1724: # copydone 173 j ra 174 nop 175 176 /* 177 * Copy from unaligned source to aligned dest. 178 */ 1795: # destaligned 180 andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG 181 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 182 beq a3,zero,3b 183 nop 184 move SIZEREG,t0 # this many to do after we are done 185 PTR_ADDU a3,SRCREG,a3 # stop point 186 1871: 188 REG_LHI t3,0(SRCREG) 189 REG_LLO t3,SZREG-1(SRCREG) 190 PTR_ADDI SRCREG,SZREG 191 REG_S t3,0(DSTREG) 192 bne SRCREG,a3,1b 193 PTR_ADDI DSTREG,SZREG 194 195 b 3b 196 nop 197 1986: # backcopy -- based on above 199 PTR_ADDU SRCREG,SIZEREG 200 PTR_ADDU DSTREG,SIZEREG 201 andi t1,DSTREG,SZREG-1 # get last 3 bits of dest 202 bne t1,zero,3f 203 andi t0,SRCREG,SZREG-1 # get last 3 bits of src 204 bne t0,zero,5f 205 206 /* 207 * Forward aligned->aligned copy, 8*4 bytes at a time. 208 */ 209 li AT,(-8*SZREG) 210 and t0,SIZEREG,AT # count truncated to multiple of 32 211 beq t0,zero,2f # any work to do? 212 PTR_SUBU SIZEREG,t0 213 PTR_SUBU a3,SRCREG,t0 214 215 /* 216 * loop body 217 */ 2181: # cp 219 REG_L t3,(-4*SZREG)(SRCREG) 220 REG_L v1,(-3*SZREG)(SRCREG) 221 REG_L t0,(-2*SZREG)(SRCREG) 222 REG_L t1,(-1*SZREG)(SRCREG) 223 PTR_SUBU SRCREG,8*SZREG 224 REG_S t3,(-4*SZREG)(DSTREG) 225 REG_S v1,(-3*SZREG)(DSTREG) 226 REG_S t0,(-2*SZREG)(DSTREG) 227 REG_S t1,(-1*SZREG)(DSTREG) 228 REG_L t1,(3*SZREG)(SRCREG) 229 REG_L t0,(2*SZREG)(SRCREG) 230 REG_L v1,(1*SZREG)(SRCREG) 231 REG_L t3,(0*SZREG)(SRCREG) 232 PTR_SUBU DSTREG,8*SZREG 233 REG_S t1,(3*SZREG)(DSTREG) 234 REG_S t0,(2*SZREG)(DSTREG) 235 REG_S v1,(1*SZREG)(DSTREG) 236 bne SRCREG,a3,1b 237 REG_S t3,(0*SZREG)(DSTREG) 238 239 /* 240 * Copy a word at a time, no loop unrolling. 241 */ 2422: # wordcopy 243 andi t2,SIZEREG,SZREG-1 # get byte count / 4 244 PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy 245 beq t2,zero,3f 246 PTR_SUBU t0,SRCREG,t2 # stop at t0 247 PTR_SUBU SIZEREG,SIZEREG,t2 2481: 249 REG_L t3,-SZREG(SRCREG) 250 PTR_SUBU SRCREG,SZREG 251 REG_S t3,-SZREG(DSTREG) 252 bne SRCREG,t0,1b 253 PTR_SUBU DSTREG,SZREG 254 2553: # bytecopy 256 beq SIZEREG,zero,4f # nothing left to do? 257 nop 2581: 259 lb t3,-1(SRCREG) 260 PTR_SUBU SRCREG,1 261 sb t3,-1(DSTREG) 262 PTR_SUBU SIZEREG,1 263 bgtz SIZEREG,1b 264 PTR_SUBU DSTREG,1 265 2664: # copydone 267 j ra 268 nop 269 270 /* 271 * Copy from unaligned source to aligned dest. 272 */ 2735: # destaligned 274 andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4 275 PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 276 beq a3,zero,3b 277 nop 278 move SIZEREG,t0 # this many to do after we are done 279 PTR_SUBU a3,SRCREG,a3 # stop point 280 2811: 282 REG_LHI t3,-SZREG(SRCREG) 283 REG_LLO t3,-1(SRCREG) 284 PTR_SUBU SRCREG,SZREG 285 REG_S t3,-SZREG(DSTREG) 286 bne SRCREG,a3,1b 287 PTR_SUBU DSTREG,SZREG 288 289 b 3b 290 nop 291 292 .set reorder 293 .set at 294 END(FUNCTION) 295