1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 .file "__align_cpy_8.s" 28 29/* __align_cpy_8(s1, s2, n) 30 * 31 * Copy 8-byte aligned source to 8-byte aligned target in multiples of 8 bytes. 32 * 33 * Input: 34 * o0 address of target 35 * o1 address of source 36 * o2 number of bytes to copy (must be a multiple of 8) 37 * Output: 38 * o0 address of target 39 * Caller's registers that have been changed by this function: 40 * o1-o5 41 * 42 * Note: 43 * This helper routine will not be used by any 32-bit compilations. To do 44 * so would break binary compatibility with previous versions of Solaris. 45 * 46 * Assumptions: 47 * Source and target addresses are 8-byte aligned. 48 * Bytes to be copied are non-overlapping or _exactly_ overlapping. 49 * The number of bytes to be copied is a multiple of 8. 50 * Call will _usually_ be made with a byte count of more than 4*8 and 51 * less than a few hundred bytes. Legal values are 0 to MAX_SIZE_T. 52 * 53 * Optimization attempt: 54 * Reasonable speed for a generic v9. Going for 32 bytes at a time 55 * rather than 16 bytes at a time did not result in a time saving for 56 * the number of bytes expected to be copied. No timing runs using other 57 * levels of optimization have been tried yet. 58 * 59 * Even when multiples of 16 bytes were used, the savings by going for 32 bytes 60 * at a time were about 2%. Thus, __align_cpy_16 is a second entry point to 61 * the same code as __align_cpy_8. 62 * 63 * Register usage: 64 * o1 source address (updated for each read) 65 * o2 byte count remaining 66 * o3 contents being copied 67 * o4 more contents being copied 68 * o5 target address 69 */ 70 71#include <sys/asm_linkage.h> 72 73 ENTRY(__align_cpy_8) 74 ENTRY(__align_cpy_16) 75 cmp %o0, %o1 ! Identical--do nothing. 76 be,pn %xcc, .done 77 subcc %o2, 8, %o2 78 bz,pn %xcc, .wrdbl2 ! Only 8 bytes need to be copied. 79 mov %o0, %o5 ! Original target address is returned. 80 bpos,a,pt %xcc, .wrdbl1 ! Have at least 16 bytes to copy. 81 ldx [%o1], %o3 82.done: 83 retl ! No bytes to copy. 84 nop 85 86 .align 32 87.wrdbl1: ! Copy 16 bytes at a time. 88 subcc %o2, 16, %o2 89 ldx [%o1+8], %o4 90 add %o1, 16, %o1 91 stx %o3, [%o5] 92 stx %o4, [%o5+8] 93 add %o5, 16, %o5 94 bg,a,pt %xcc, .wrdbl1 ! Have at least 16 more bytes. 95 ldx [%o1], %o3 96 97 bz,a,pt %xcc, .wrdbl3 ! Have 8 bytes remaining to copy. 98 ldx [%o1], %o3 99 100 retl 101 nop 102 103.wrdbl2: 104 ldx [%o1], %o3 ! Copy last 8 bytes. 105.wrdbl3: 106 stx %o3, [%o5] 107 retl 108 nop 109 110 SET_SIZE(__align_cpy_8) 111 SET_SIZE(__align_cpy_16) 112