1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N $4 43#define X $5 44#define INCX $6 45#define Y $7 46#define INCY $8 47 48#define I $2 49#define TEMP $3 50 51#define a1 $f4 52#define a2 $f5 53#define a3 $f6 54#define a4 $f7 55#define b1 $f8 56#define b2 $f9 57#define b3 $f10 58#define b4 $f11 59 60#define s1 $f0 61#define s2 $f1 62#define s3 $f2 63#define s4 $f3 64 65 66 PROLOGUE 67 68#ifdef F_INTERFACE 69 LDINT N, 0(N) 70 LDINT INCX, 0(INCX) 71 LDINT INCY, 0(INCY) 72#endif 73 74 MTC $0, s1 75 76 MOV s2, s1 77 MOV s3, s2 78 MOV s4, s3 79 80 dsll INCX, INCX, ZBASE_SHIFT 81 li TEMP, 2 * SIZE 82 83 blez N, .L999 84 dsll INCY, INCY, ZBASE_SHIFT 85 86 bne INCX, TEMP, .L20 87 dsra I, N, 2 88 89 bne INCY, TEMP, .L20 90 NOP 91 92 blez I, .L15 93 NOP 94 95 LD a1, 0 * SIZE(X) 96 LD a2, 1 * SIZE(X) 97 LD b1, 0 * SIZE(Y) 98 daddiu I, I, -1 99 100 blez I, .L14 101 LD b2, 1 * SIZE(Y) 102 .align 3 103 104.L13: 105 MADD s1, s1, a1, b1 106 LD a3, 2 * SIZE(X) 107 MADD s2, s2, a2, b1 108 LD a4, 3 * SIZE(X) 109 MADD s3, s3, a1, b2 110 LD b3, 2 * SIZE(Y) 111 MADD s4, s4, a2, b2 112 LD b4, 3 * SIZE(Y) 113 114 MADD s1, s1, a3, b3 115 LD a1, 4 * SIZE(X) 116 MADD s2, s2, a4, b3 117 LD a2, 5 * SIZE(X) 118 MADD s3, s3, a3, b4 119 LD b1, 4 * SIZE(Y) 120 MADD s4, s4, a4, b4 121 LD b2, 5 * SIZE(Y) 122 123 MADD s1, s1, a1, b1 124 LD a3, 6 * SIZE(X) 125 MADD s2, s2, a2, b1 126 LD a4, 7 * SIZE(X) 127 MADD s3, s3, a1, b2 128 LD b3, 6 * SIZE(Y) 129 MADD s4, s4, a2, b2 130 LD b4, 7 * SIZE(Y) 131 132 MADD s1, s1, a3, b3 133 LD a1, 8 * SIZE(X) 134 MADD s2, s2, a4, b3 135 LD a2, 9 * SIZE(X) 136 MADD s3, s3, a3, b4 137 LD b1, 8 * SIZE(Y) 138 MADD s4, s4, a4, b4 139 LD b2, 9 * SIZE(Y) 140 141 daddiu I, I, -1 142 daddiu X, X, 8 * SIZE 143 144 bgtz I, .L13 145 daddiu Y, Y, 8 * SIZE 146 .align 3 147 148.L14: 149 MADD s1, s1, a1, b1 150 LD a3, 2 * SIZE(X) 151 MADD s2, s2, a2, b1 152 LD a4, 3 * SIZE(X) 153 MADD s3, s3, a1, b2 154 LD b3, 2 * SIZE(Y) 155 MADD s4, s4, a2, b2 156 LD b4, 3 * SIZE(Y) 157 158 MADD s1, s1, a3, b3 159 LD a1, 4 * SIZE(X) 160 MADD s2, s2, a4, b3 161 LD a2, 5 * SIZE(X) 162 MADD s3, s3, a3, b4 163 LD b1, 4 * SIZE(Y) 164 MADD s4, s4, a4, b4 165 LD b2, 5 * SIZE(Y) 166 167 MADD s1, s1, a1, b1 168 LD a3, 6 * SIZE(X) 169 MADD s2, s2, a2, b1 170 LD a4, 7 * SIZE(X) 171 MADD s3, s3, a1, b2 172 LD b3, 6 * SIZE(Y) 173 MADD s4, s4, a2, b2 174 LD b4, 7 * SIZE(Y) 175 176 MADD s1, s1, a3, b3 177 daddiu X, X, 8 * SIZE 178 MADD s2, s2, a4, b3 179 daddiu Y, Y, 8 * SIZE 180 MADD s3, s3, a3, b4 181 MADD s4, s4, a4, b4 182 .align 3 183 184.L15: 185 andi I, N, 3 186 187 blez I, .L999 188 NOP 189 190 LD a1, 0 * SIZE(X) 191 LD a2, 1 * SIZE(X) 192 193 LD b1, 0 * SIZE(Y) 194 daddiu I, I, -1 195 196 blez I, .L17 197 LD b2, 1 * SIZE(Y) 198 .align 3 199 200.L16: 201 MADD s1, s1, a1, b1 202 daddiu I, I, -1 203 MADD s2, s2, a2, b1 204 LD b1, 2 * SIZE(Y) 205 MADD s3, s3, a1, b2 206 LD a1, 2 * SIZE(X) 207 MADD s4, s4, a2, b2 208 LD a2, 3 * SIZE(X) 209 210 LD b2, 3 * SIZE(Y) 211 daddiu X, X, 2 * SIZE 212 213 bgtz I, .L16 214 daddiu Y, Y, 2 * SIZE 215 .align 3 216 217.L17: 218 MADD s1, s1, a1, b1 219 MADD s2, s2, a2, b1 220 NOP 221 MADD s3, s3, a1, b2 222 j .L999 223 MADD s4, s4, a2, b2 224 .align 3 225 226.L20: 227#ifdef F_INTERFACE 228 bgez INCX, .L21 229 daddiu TEMP, N, -1 230 231 mult TEMP, INCX 232 233 mflo TEMP 234 dsub X, X, TEMP 235 .align 3 236 237.L21: 238 bgez INCY, .L22 239 daddiu TEMP, N, -1 240 241 mult TEMP, INCY 242 243 mflo TEMP 244 dsub Y, Y, TEMP 245 .align 3 246 247.L22: 248#endif 249 blez I, .L25 250 NOP 251 252 LD a1, 0 * SIZE(X) 253 LD a2, 1 * SIZE(X) 254 LD b1, 0 * SIZE(Y) 255 LD b2, 1 * SIZE(Y) 256 257 dadd X, X, INCX 258 daddiu I, I, -1 259 260 blez I, .L24 261 dadd Y, Y, INCY 262 .align 3 263 264.L23: 265 MADD s1, s1, a1, b1 266 LD a3, 0 * SIZE(X) 267 MADD s2, s2, a2, b1 268 LD a4, 1 * SIZE(X) 269 MADD s3, s3, a1, b2 270 LD b3, 0 * SIZE(Y) 271 MADD s4, s4, a2, b2 272 LD b4, 1 * SIZE(Y) 273 274 dadd X, X, INCX 275 dadd Y, Y, INCY 276 277 MADD s1, s1, a3, b3 278 LD a1, 0 * SIZE(X) 279 MADD s2, s2, a4, b3 280 LD a2, 1 * SIZE(X) 281 MADD s3, s3, a3, b4 282 LD b1, 0 * SIZE(Y) 283 MADD s4, s4, a4, b4 284 LD b2, 1 * SIZE(Y) 285 286 dadd X, X, INCX 287 dadd Y, Y, INCY 288 289 MADD s1, s1, a1, b1 290 LD a3, 0 * SIZE(X) 291 MADD s2, s2, a2, b1 292 LD a4, 1 * SIZE(X) 293 MADD s3, s3, a1, b2 294 LD b3, 0 * SIZE(Y) 295 MADD s4, s4, a2, b2 296 LD b4, 1 * SIZE(Y) 297 298 dadd X, X, INCX 299 dadd Y, Y, INCY 300 301 MADD s1, s1, a3, b3 302 LD a1, 0 * SIZE(X) 303 MADD s2, s2, a4, b3 304 LD a2, 1 * SIZE(X) 305 MADD s3, s3, a3, b4 306 LD b1, 0 * SIZE(Y) 307 MADD s4, s4, a4, b4 308 LD b2, 1 * SIZE(Y) 309 310 dadd X, X, INCX 311 daddiu I, I, -1 312 313 bgtz I, .L23 314 dadd Y, Y, INCY 315 .align 3 316 317.L24: 318 MADD s1, s1, a1, b1 319 LD a3, 0 * SIZE(X) 320 MADD s2, s2, a2, b1 321 LD a4, 1 * SIZE(X) 322 MADD s3, s3, a1, b2 323 LD b3, 0 * SIZE(Y) 324 MADD s4, s4, a2, b2 325 LD b4, 1 * SIZE(Y) 326 327 dadd X, X, INCX 328 dadd Y, Y, INCY 329 330 MADD s1, s1, a3, b3 331 LD a1, 0 * SIZE(X) 332 MADD s2, s2, a4, b3 333 LD a2, 1 * SIZE(X) 334 MADD s3, s3, a3, b4 335 LD b1, 0 * SIZE(Y) 336 MADD s4, s4, a4, b4 337 LD b2, 1 * SIZE(Y) 338 339 dadd X, X, INCX 340 dadd Y, Y, INCY 341 342 MADD s1, s1, a1, b1 343 LD a3, 0 * SIZE(X) 344 MADD s2, s2, a2, b1 345 LD a4, 1 * SIZE(X) 346 MADD s3, s3, a1, b2 347 LD b3, 0 * SIZE(Y) 348 MADD s4, s4, a2, b2 349 LD b4, 1 * SIZE(Y) 350 351 MADD s1, s1, a3, b3 352 dadd X, X, INCX 353 MADD s2, s2, a4, b3 354 dadd Y, Y, INCY 355 MADD s3, s3, a3, b4 356 MADD s4, s4, a4, b4 357 .align 3 358 359.L25: 360 andi I, N, 3 361 362 blez I, .L999 363 NOP 364 .align 3 365 366.L26: 367 LD a1, 0 * SIZE(X) 368 LD a2, 1 * SIZE(X) 369 LD b1, 0 * SIZE(Y) 370 LD b2, 1 * SIZE(Y) 371 372 MADD s1, s1, a1, b1 373 MADD s2, s2, a2, b1 374 MADD s3, s3, a1, b2 375 MADD s4, s4, a2, b2 376 377 378 dadd X, X, INCX 379 dadd Y, Y, INCY 380 381 daddiu I, I, -1 382 383 bgtz I, .L26 384 NOP 385 .align 3 386 387.L999: 388 NOP 389#ifndef CONJ 390 SUB s1, s1, s4 391#else 392 ADD s1, s1, s4 393#endif 394 395 j $31 396#ifndef CONJ 397 ADD s3, s3, s2 398#else 399 SUB s3, s3, s2 400#endif 401 402 EPILOGUE 403