1/*************************************************************************** 2Copyright (c) 2013-2016, The OpenBLAS Project 3All rights reserved. 4Redistribution and use in source and binary forms, with or without 5modification, are permitted provided that the following conditions are 6met: 71. Redistributions of source code must retain the above copyright 8notice, this list of conditions and the following disclaimer. 92. Redistributions in binary form must reproduce the above copyright 10notice, this list of conditions and the following disclaimer in 11the documentation and/or other materials provided with the 12distribution. 133. Neither the name of the OpenBLAS project nor the names of 14its contributors may be used to endorse or promote products 15derived from this software without specific prior written permission. 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 25USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*****************************************************************************/ 27 28/************************************************************************************** 29* 2016/04/21 Werner Saar (wernsaar@googlemail.com) 30* BLASTEST : OK 31* CTEST : OK 32* TEST : OK 33* LAPACK-TEST : OK 34**************************************************************************************/ 35 36 37 srawi. I, M, 2 38 ble DCOPYT_L2_BEGIN 39 40 41DCOPYT_L4_BEGIN: 42 43 mr A0, A 44 add A1, A0, LDA 45 add A2, A1, LDA 46 add A3, A2, LDA 47 add A, A3, LDA 48 mr B16, B 49 addi B, B, 64*SIZE 50 51 sradi. J, N, 4 52 ble DCOPYT_L4x8_BEGIN 53 54 mr BO, B16 55 addi T2, M16, 384 56 mtctr J 57 58 .align 5 59 60DCOPYT_L4x16_LOOP: 61 62 addi T1, M16, 256 63 64 dcbt A0, PREA 65 dcbt A1, PREA 66 dcbt A2, PREA 67 dcbt A3, PREA 68 69 dcbt BO, M16 70 dcbt BO, PREB 71 dcbt BO, T1 72 dcbt BO, T2 73 74 COPY_4x16 75 76 add BO, BO, M16 77 78 // addic. J, J, -1 79 bdnz+ DCOPYT_L4x16_LOOP 80 81DCOPYT_L4x8_BEGIN: 82 83 andi. T1, N, 8 84 ble DCOPYT_L4x4_BEGIN 85 86 mr BO, B8 87 88 COPY_4x8 89 90 91 addi B8, B8, 32*SIZE 92 93DCOPYT_L4x4_BEGIN: 94 95 andi. T1, N, 4 96 ble DCOPYT_L4x2_BEGIN 97 98 mr BO, B4 99 100 COPY_4x4 101 102 103 addi B4, B4, 16*SIZE 104 105DCOPYT_L4x2_BEGIN: 106 107 andi. T1, N, 2 108 ble DCOPYT_L4x1_BEGIN 109 110 mr BO, B2 111 112 COPY_4x2 113 114 115 addi B2, B2, 8*SIZE 116 117DCOPYT_L4x1_BEGIN: 118 119 andi. T1, N, 1 120 ble DCOPYT_L4_END 121 122 mr BO, B1 123 124 COPY_4x1 125 126 127 addi B1, B1, 4*SIZE 128 129DCOPYT_L4_END: 130 131 addic. I, I, -1 132 bgt DCOPYT_L4_BEGIN 133 134 135 136DCOPYT_L2_BEGIN: 137 138 andi. T1, M, 2 139 ble DCOPYT_L1_BEGIN 140 141 mr A0, A 142 add A1, A0, LDA 143 add A, A1, LDA 144 mr B16, B 145 addi B, B, 32*SIZE 146 147 sradi. J, N, 4 148 ble DCOPYT_L2x8_BEGIN 149 150 mr BO, B16 151 152DCOPYT_L2x16_LOOP: 153 154 COPY_2x16 155 156 add BO, BO, M16 157 158 addic. J, J, -1 159 bgt DCOPYT_L2x16_LOOP 160 161DCOPYT_L2x8_BEGIN: 162 163 andi. T1, N, 8 164 ble DCOPYT_L2x4_BEGIN 165 166 mr BO, B8 167 168 COPY_2x8 169 170 171 addi B8, B8, 16*SIZE 172 173DCOPYT_L2x4_BEGIN: 174 175 andi. T1, N, 4 176 ble DCOPYT_L2x2_BEGIN 177 178 mr BO, B4 179 180 COPY_2x4 181 182 183 addi B4, B4, 8*SIZE 184 185DCOPYT_L2x2_BEGIN: 186 187 andi. T1, N, 2 188 ble DCOPYT_L2x1_BEGIN 189 190 mr BO, B2 191 192 COPY_2x2 193 194 195 addi B2, B2, 4*SIZE 196 197DCOPYT_L2x1_BEGIN: 198 199 andi. T1, N, 1 200 ble DCOPYT_L2_END 201 202 mr BO, B1 203 204 COPY_2x1 205 206 207 addi B1, B1, 2*SIZE 208 209DCOPYT_L2_END: 210 211 212DCOPYT_L1_BEGIN: 213 214 andi. T1, M, 1 215 ble L999 216 217 mr A0, A 218 add A, A0, LDA 219 mr B16, B 220 addi B, B, 16*SIZE 221 222 sradi. J, N, 4 223 ble DCOPYT_L1x8_BEGIN 224 225 mr BO, B16 226 227DCOPYT_L1x16_LOOP: 228 229 COPY_1x16 230 231 add BO, BO, M16 232 233 addic. J, J, -1 234 bgt DCOPYT_L1x16_LOOP 235 236DCOPYT_L1x8_BEGIN: 237 238 andi. T1, N, 8 239 ble DCOPYT_L1x4_BEGIN 240 241 mr BO, B8 242 243 COPY_1x8 244 245 246 addi B8, B8, 8*SIZE 247 248DCOPYT_L1x4_BEGIN: 249 250 andi. T1, N, 4 251 ble DCOPYT_L1x2_BEGIN 252 253 mr BO, B4 254 255 COPY_1x4 256 257 258 addi B4, B4, 4*SIZE 259 260DCOPYT_L1x2_BEGIN: 261 262 andi. T1, N, 2 263 ble DCOPYT_L1x1_BEGIN 264 265 mr BO, B2 266 267 COPY_1x2 268 269 270 addi B2, B2, 2*SIZE 271 272DCOPYT_L1x1_BEGIN: 273 274 andi. T1, N, 1 275 ble DCOPYT_L1_END 276 277 mr BO, B1 278 279 COPY_1x1 280 281 282 addi B1, B1, 1*SIZE 283 284DCOPYT_L1_END: 285 286