1// Copyright 2014 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// +build ppc64 ppc64le 6 7#include "textflag.h" 8 9// See memmove Go doc for important implementation constraints. 10 11// func memmove(to, from unsafe.Pointer, n uintptr) 12TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24 13 MOVD to+0(FP), R3 14 MOVD from+8(FP), R4 15 MOVD n+16(FP), R5 16 17 // Determine if there are doublewords to 18 // copy so a more efficient move can be done 19check: 20 ANDCC $7, R5, R7 // R7: bytes to copy 21 SRD $3, R5, R6 // R6: double words to copy 22 CMP R6, $0, CR1 // CR1[EQ] set if no double words to copy 23 24 // Determine overlap by subtracting dest - src and comparing against the 25 // length. The catches the cases where src and dest are in different types 26 // of storage such as stack and static to avoid doing backward move when not 27 // necessary. 28 29 SUB R4, R3, R8 // dest - src 30 CMPU R8, R5, CR2 // < len? 31 BC 12, 8, backward // BLT CR2 backward 32 33 // Copying forward if no overlap. 34 35 BC 12, 6, noforwardlarge // "BEQ CR1, noforwardlarge" 36 SRDCC $2,R6,R8 // 32 byte chunks? 37 BNE forward32setup // 38 MOVD R6,CTR // R6 = number of double words 39 40 // Move double words 41 42forward8: 43 MOVD 0(R4), R8 // double word 44 ADD $8,R4 45 MOVD R8, 0(R3) // 46 ADD $8,R3 47 BC 16, 0, forward8 48 BR noforwardlarge // handle remainder 49 50 // Prepare for moves of 32 bytes at a time. 51 52forward32setup: 53 DCBTST (R3) // prepare data cache 54 DCBT (R4) 55 MOVD R8, CTR // double work count 56 MOVD $16, R8 57 58forward32: 59 LXVD2X (R4+R0), VS32 // load 16 bytes 60 LXVD2X (R4+R8), VS33 61 ADD $32, R4 62 STXVD2X VS32, (R3+R0) // store 16 bytes 63 STXVD2X VS33, (R3+R8) 64 ADD $32,R3 // bump up for next set 65 BC 16, 0, forward32 // continue 66 RLDCLCC $61,R5,$3,R6 // remaining doublewords 67 BEQ noforwardlarge 68 MOVD R6,CTR // set up the CTR 69 BR forward8 70 71noforwardlarge: 72 CMP R7,$0 // any remaining bytes 73 BC 4, 1, LR // ble lr 74 75forwardtail: 76 MOVD R7, CTR // move tail bytes 77 78forwardtailloop: 79 MOVBZ 0(R4), R8 // move single bytes 80 ADD $1,R4 81 MOVBZ R8, 0(R3) 82 ADD $1,R3 83 BC 16, 0, forwardtailloop 84 RET 85 86backward: 87 // Copying backwards proceeds by copying R7 bytes then copying R6 double words. 88 // R3 and R4 are advanced to the end of the destination/source buffers 89 // respectively and moved back as we copy. 90 91 ADD R5, R4, R4 // end of source 92 ADD R3, R5, R3 // end of dest 93 94 BEQ nobackwardtail // earlier condition 95 96 MOVD R7, CTR // bytes to move 97 98backwardtailloop: 99 MOVBZ -1(R4), R8 // point to last byte 100 SUB $1,R4 101 MOVBZ R8, -1(R3) 102 SUB $1,R3 103 BC 16, 0, backwardtailloop // bndz 104 105nobackwardtail: 106 BC 4, 5, LR // ble CR1 lr 107 108backwardlarge: 109 MOVD R6, CTR 110 SUB R3, R4, R9 // Use vsx if moving 111 CMP R9, $32 // at least 32 byte chunks 112 BLT backwardlargeloop // and distance >= 32 113 SRDCC $2,R6,R8 // 32 byte chunks 114 BNE backward32setup 115 116backwardlargeloop: 117 MOVD -8(R4), R8 118 SUB $8,R4 119 MOVD R8, -8(R3) 120 SUB $8,R3 121 BC 16, 0, backwardlargeloop // bndz 122 RET 123 124backward32setup: 125 MOVD R8, CTR // set up loop ctr 126 MOVD $16, R8 // 32 bytes at at time 127 128backward32loop: 129 SUB $32, R4 130 SUB $32, R3 131 LXVD2X (R4+R0), VS32 // load 16 bytes 132 LXVD2X (R4+R8), VS33 133 STXVD2X VS32, (R3+R0) // store 16 bytes 134 STXVD2X VS33, (R3+R8) 135 BC 16, 0, backward32loop // bndz 136 BC 4, 5, LR // ble CR1 lr 137 MOVD R6, CTR 138 BR backwardlargeloop 139