1/*********************************************************************/ 2/* */ 3/* Optimized BLAS libraries */ 4/* By Kazushige Goto <kgoto@tacc.utexas.edu> */ 5/* */ 6/* Copyright (c) The University of Texas, 2009. All rights reserved. */ 7/* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING */ 8/* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF */ 9/* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE, */ 10/* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY */ 11/* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF */ 12/* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO */ 13/* THE USE OF THE SOFTWARE OR DOCUMENTATION. */ 14/* Under no circumstances shall University be liable for incidental, */ 15/* special, indirect, direct or consequential damages or loss of */ 16/* profits, interruption of business, or related expenses which may */ 17/* arise from use of Software or Documentation, including but not */ 18/* limited to those resulting from defects in Software and/or */ 19/* Documentation, or loss or inaccuracy of data of any kind. */ 20/*********************************************************************/ 21 22#define ASSEMBLER 23#include "common.h" 24 25#define M $4 26#define N $5 27#define K $6 28#define A $9 29#define B $10 30#define C $11 31#define LDC $8 32 33#define AO $12 34#define BO $13 35 36#define I $2 37#define J $3 38#define L $7 39 40#define CO1 $14 41#define CO2 $15 42#define CO3 $16 43#define CO4 $17 44#define CO5 $18 45#define CO6 $19 46#define CO7 $20 47#define CO8 $21 48 49#if defined(TRMMKERNEL) 50#define OFFSET $22 51#define KK $23 52#define TEMP $24 53#endif 54 55#define a1 $f0 56#define a2 $f1 57#define a3 $f28 58#define a4 $f29 59 60#define b1 $f2 61#define b2 $f3 62#define b3 $f4 63#define b4 $f5 64#define b5 $f6 65#define b6 $f7 66#define b7 $f8 67#define b8 $f9 68 69#define a5 b8 70 71#define c11 $f10 72#define c12 $f11 73#define c21 $f12 74#define c22 $f13 75#define c31 $f14 76#define c32 $f17 77#define c41 $f18 78#define c42 $f19 79#define c51 $f20 80#define c52 $f21 81#define c61 $f22 82#define c62 $f23 83#define c71 $f24 84#define c72 $f25 85#define c81 $f26 86#define c82 $f27 87 88#define ALPHA_R $f15 89#define ALPHA_I $f16 90 91 PROLOGUE 92 93 daddiu $sp, $sp, -128 94 95 SDARG $16, 0($sp) 96 SDARG $17, 8($sp) 97 SDARG $18, 16($sp) 98 SDARG $19, 24($sp) 99 SDARG $20, 32($sp) 100 SDARG $21, 40($sp) 101 sdc1 $f24, 48($sp) 102 sdc1 $f25, 56($sp) 103 sdc1 $f26, 64($sp) 104 sdc1 $f27, 72($sp) 105 sdc1 $f28, 80($sp) 106 sdc1 $f29, 88($sp) 107 108 LDARG LDC, 128($sp) 109 110 dsll LDC, LDC, ZBASE_SHIFT 111 112 dsra J, N, 3 113 blez J, .L30 114 nop 115 116.L10: 117 move CO1, C 118 MTC $0, c11 119 daddu CO2, C, LDC 120 move AO, A 121 daddu CO3, CO2, LDC 122 daddiu J, J, -1 123 daddu CO4, CO3, LDC 124 MOV c21, c11 125 daddu CO5, CO4, LDC 126 MOV c31, c11 127 daddu CO6, CO5, LDC 128 MOV c41, c11 129 daddu CO7, CO6, LDC 130 MOV c51, c11 131 daddu CO8, CO7, LDC 132 dsra I, M, 1 133 daddu C, CO8, LDC 134 135 blez I, .L20 136 MOV c61, c11 137 138.L11: 139 LD a1, 0 * SIZE(AO) 140 MOV c71, c11 141 LD b1, 0 * SIZE(B) 142 MOV c81, c11 143 144 LD a3, 4 * SIZE(AO) 145 MOV c12, c11 146 LD b2, 1 * SIZE(B) 147 MOV c22, c11 148 149 dsra L, K, 2 150 MOV c32, c11 151 LD b3, 2 * SIZE(B) 152 MOV c42, c11 153 154 LD b4, 3 * SIZE(B) 155 MOV c52, c11 156 LD b5, 4 * SIZE(B) 157 MOV c62, c11 158 159 LD b6, 8 * SIZE(B) 160 MOV c72, c11 161 LD b7, 12 * SIZE(B) 162 MOV c82, c11 163 164 blez L, .L15 165 move BO, B 166 167 MADD c11, c11, a1, b1 168 LD a2, 1 * SIZE(AO) 169 MADD c21, c21, a1, b2 170 daddiu L, L, -1 171 MADD c31, c31, a1, b3 172 blez L, .L13 173 MADD c41, c41, a1, b4 174 NOP 175 .align 3 176 177.L12: 178 MADD c12, c12, a2, b1 179 LD b1, 16 * SIZE(BO) 180 MADD c22, c22, a2, b2 181 LD b2, 5 * SIZE(BO) 182 MADD c32, c32, a2, b3 183 LD b3, 6 * SIZE(BO) 184 MADD c42, c42, a2, b4 185 LD b4, 7 * SIZE(BO) 186 187 MADD c51, c51, a1, b5 188 LD a4, 2 * SIZE(AO) 189 MADD c61, c61, a1, b2 190 NOP 191 MADD c71, c71, a1, b3 192 NOP 193 MADD c81, c81, a1, b4 194 LD a1, 8 * SIZE(AO) 195 196 MADD c52, c52, a2, b5 197 LD b5, 20 * SIZE(BO) 198 MADD c62, c62, a2, b2 199 LD b2, 9 * SIZE(BO) 200 MADD c72, c72, a2, b3 201 LD b3, 10 * SIZE(BO) 202 MADD c82, c82, a2, b4 203 LD b4, 11 * SIZE(BO) 204 205 MADD c11, c11, a4, b6 206 LD a2, 3 * SIZE(AO) 207 MADD c21, c21, a4, b2 208 NOP 209 MADD c31, c31, a4, b3 210 NOP 211 MADD c41, c41, a4, b4 212 NOP 213 214 MADD c12, c12, a2, b6 215 LD b6, 24 * SIZE(BO) 216 MADD c22, c22, a2, b2 217 LD b2, 13 * SIZE(BO) 218 MADD c32, c32, a2, b3 219 LD b3, 14 * SIZE(BO) 220 MADD c42, c42, a2, b4 221 LD b4, 15 * SIZE(BO) 222 223 MADD c51, c51, a4, b7 224 NOP 225 MADD c61, c61, a4, b2 226 NOP 227 MADD c71, c71, a4, b3 228 NOP 229 MADD c81, c81, a4, b4 230 NOP 231 232 MADD c52, c52, a2, b7 233 LD b7, 28 * SIZE(BO) 234 MADD c62, c62, a2, b2 235 LD b2, 17 * SIZE(BO) 236 MADD c72, c72, a2, b3 237 LD b3, 18 * SIZE(BO) 238 MADD c82, c82, a2, b4 239 LD b4, 19 * SIZE(BO) 240 241 MADD c11, c11, a3, b1 242 LD a2, 5 * SIZE(AO) 243 MADD c21, c21, a3, b2 244 NOP 245 MADD c31, c31, a3, b3 246 NOP 247 MADD c41, c41, a3, b4 248 NOP 249 250 MADD c12, c12, a2, b1 251 LD b1, 32 * SIZE(BO) 252 MADD c22, c22, a2, b2 253 LD b2, 21 * SIZE(BO) 254 MADD c32, c32, a2, b3 255 LD b3, 22 * SIZE(BO) 256 MADD c42, c42, a2, b4 257 LD b4, 23 * SIZE(BO) 258 259 MADD c51, c51, a3, b5 260 LD a4, 6 * SIZE(AO) 261 MADD c61, c61, a3, b2 262 NOP 263 MADD c71, c71, a3, b3 264 NOP 265 MADD c81, c81, a3, b4 266 LD a3, 12 * SIZE(AO) 267 268 MADD c52, c52, a2, b5 269 LD b5, 36 * SIZE(BO) 270 MADD c62, c62, a2, b2 271 LD b2, 25 * SIZE(BO) 272 MADD c72, c72, a2, b3 273 LD b3, 26 * SIZE(BO) 274 MADD c82, c82, a2, b4 275 LD b4, 27 * SIZE(BO) 276 277 MADD c11, c11, a4, b6 278 LD a2, 7 * SIZE(AO) 279 MADD c21, c21, a4, b2 280 NOP 281 MADD c31, c31, a4, b3 282 NOP 283 MADD c41, c41, a4, b4 284 daddiu L, L, -1 285 286 MADD c12, c12, a2, b6 287 LD b6, 40 * SIZE(BO) 288 MADD c22, c22, a2, b2 289 LD b2, 29 * SIZE(BO) 290 MADD c32, c32, a2, b3 291 LD b3, 30 * SIZE(BO) 292 MADD c42, c42, a2, b4 293 LD b4, 31 * SIZE(BO) 294 295 MADD c51, c51, a4, b7 296 daddiu BO, BO, 32 * SIZE 297 MADD c61, c61, a4, b2 298 daddiu AO, AO, 8 * SIZE 299 MADD c71, c71, a4, b3 300 NOP 301 MADD c81, c81, a4, b4 302 NOP 303 304 MADD c52, c52, a2, b7 305 LD b7, 12 * SIZE(BO) 306 MADD c62, c62, a2, b2 307 LD b2, 1 * SIZE(BO) 308 MADD c72, c72, a2, b3 309 LD b3, 2 * SIZE(BO) 310 MADD c82, c82, a2, b4 311 LD b4, 3 * SIZE(BO) 312 313 MADD c11, c11, a1, b1 314 LD a2, 1 * SIZE(AO) 315 MADD c21, c21, a1, b2 316 NOP 317 MADD c31, c31, a1, b3 318 bgtz L, .L12 319 MADD c41, c41, a1, b4 320 NOP 321 .align 3 322 323.L13: 324 MADD c12, c12, a2, b1 325 LD b1, 16 * SIZE(BO) 326 MADD c22, c22, a2, b2 327 LD b2, 5 * SIZE(BO) 328 MADD c32, c32, a2, b3 329 LD b3, 6 * SIZE(BO) 330 MADD c42, c42, a2, b4 331 LD b4, 7 * SIZE(BO) 332 333 MADD c51, c51, a1, b5 334 NOP 335 MADD c61, c61, a1, b2 336 LD a4, 2 * SIZE(AO) 337 MADD c71, c71, a1, b3 338 NOP 339 MADD c81, c81, a1, b4 340 LD a1, 8 * SIZE(AO) 341 342 MADD c52, c52, a2, b5 343 LD b5, 20 * SIZE(BO) 344 MADD c62, c62, a2, b2 345 LD b2, 9 * SIZE(BO) 346 MADD c72, c72, a2, b3 347 LD b3, 10 * SIZE(BO) 348 MADD c82, c82, a2, b4 349 LD b4, 11 * SIZE(BO) 350 351 MADD c11, c11, a4, b6 352 LD a2, 3 * SIZE(AO) 353 MADD c21, c21, a4, b2 354 NOP 355 MADD c31, c31, a4, b3 356 NOP 357 MADD c41, c41, a4, b4 358 NOP 359 360 MADD c12, c12, a2, b6 361 LD b6, 24 * SIZE(BO) 362 MADD c22, c22, a2, b2 363 LD b2, 13 * SIZE(BO) 364 MADD c32, c32, a2, b3 365 LD b3, 14 * SIZE(BO) 366 MADD c42, c42, a2, b4 367 LD b4, 15 * SIZE(BO) 368 369 MADD c51, c51, a4, b7 370 NOP 371 MADD c61, c61, a4, b2 372 NOP 373 MADD c71, c71, a4, b3 374 NOP 375 MADD c81, c81, a4, b4 376 NOP 377 378 MADD c52, c52, a2, b7 379 LD b7, 28 * SIZE(BO) 380 MADD c62, c62, a2, b2 381 LD b2, 17 * SIZE(BO) 382 MADD c72, c72, a2, b3 383 LD b3, 18 * SIZE(BO) 384 MADD c82, c82, a2, b4 385 LD b4, 19 * SIZE(BO) 386 387 MADD c11, c11, a3, b1 388 LD a2, 5 * SIZE(AO) 389 MADD c21, c21, a3, b2 390 NOP 391 MADD c31, c31, a3, b3 392 NOP 393 MADD c41, c41, a3, b4 394 NOP 395 396 MADD c12, c12, a2, b1 397 LD b1, 32 * SIZE(BO) 398 MADD c22, c22, a2, b2 399 LD b2, 21 * SIZE(BO) 400 MADD c32, c32, a2, b3 401 LD b3, 22 * SIZE(BO) 402 MADD c42, c42, a2, b4 403 LD b4, 23 * SIZE(BO) 404 405 MADD c51, c51, a3, b5 406 NOP 407 MADD c61, c61, a3, b2 408 LD a4, 6 * SIZE(AO) 409 MADD c71, c71, a3, b3 410 NOP 411 MADD c81, c81, a3, b4 412 LD a3, 12 * SIZE(AO) 413 414 MADD c52, c52, a2, b5 415 LD b5, 36 * SIZE(BO) 416 MADD c62, c62, a2, b2 417 LD b2, 25 * SIZE(BO) 418 MADD c72, c72, a2, b3 419 LD b3, 26 * SIZE(BO) 420 MADD c82, c82, a2, b4 421 LD b4, 27 * SIZE(BO) 422 423 MADD c11, c11, a4, b6 424 LD a2, 7 * SIZE(AO) 425 MADD c21, c21, a4, b2 426 NOP 427 MADD c31, c31, a4, b3 428 NOP 429 MADD c41, c41, a4, b4 430 NOP 431 432 MADD c12, c12, a2, b6 433 LD b6, 40 * SIZE(BO) 434 MADD c22, c22, a2, b2 435 LD b2, 29 * SIZE(BO) 436 MADD c32, c32, a2, b3 437 LD b3, 30 * SIZE(BO) 438 MADD c42, c42, a2, b4 439 LD b4, 31 * SIZE(BO) 440 441 MADD c51, c51, a4, b7 442 daddiu BO, BO, 32 * SIZE 443 MADD c61, c61, a4, b2 444 daddiu AO, AO, 8 * SIZE 445 MADD c71, c71, a4, b3 446 NOP 447 MADD c81, c81, a4, b4 448 NOP 449 450 MADD c52, c52, a2, b7 451 LD b7, 12 * SIZE(BO) 452 MADD c62, c62, a2, b2 453 LD b2, 1 * SIZE(BO) 454 MADD c72, c72, a2, b3 455 LD b3, 2 * SIZE(BO) 456 MADD c82, c82, a2, b4 457 LD b4, 3 * SIZE(BO) 458 .align 3 459 460.L15: 461 andi L, K, 3 462 NOP 463 blez L, .L18 464 NOP 465 .align 3 466 467.L16: 468 MADD c11, c11, a1, b1 469 LD a2, 1 * SIZE(AO) 470 MADD c21, c21, a1, b2 471 NOP 472 MADD c31, c31, a1, b3 473 NOP 474 MADD c41, c41, a1, b4 475 NOP 476 477 MADD c12, c12, a2, b1 478 LD b1, 8 * SIZE(BO) 479 MADD c22, c22, a2, b2 480 LD b2, 5 * SIZE(BO) 481 MADD c32, c32, a2, b3 482 LD b3, 6 * SIZE(BO) 483 MADD c42, c42, a2, b4 484 LD b4, 7 * SIZE(BO) 485 486 MADD c51, c51, a1, b5 487 daddiu L, L, -1 488 MADD c61, c61, a1, b2 489 daddiu AO, AO, 2 * SIZE 490 MADD c71, c71, a1, b3 491 daddiu BO, BO, 8 * SIZE 492 MADD c81, c81, a1, b4 493 LD a1, 0 * SIZE(AO) 494 495 MADD c52, c52, a2, b5 496 LD b5, 4 * SIZE(BO) 497 MADD c62, c62, a2, b2 498 LD b2, 1 * SIZE(BO) 499 MADD c72, c72, a2, b3 500 LD b3, 2 * SIZE(BO) 501 MADD c82, c82, a2, b4 502 bgtz L, .L16 503 LD b4, 3 * SIZE(BO) 504 505.L18: 506 LD $f0, 0 * SIZE(CO1) 507 LD $f1, 1 * SIZE(CO1) 508 LD $f2, 2 * SIZE(CO1) 509 LD $f3, 3 * SIZE(CO1) 510 511 LD $f4, 0 * SIZE(CO2) 512 MADD $f0, $f0, ALPHA_R, c11 513 LD $f5, 1 * SIZE(CO2) 514 MADD $f1, $f1, ALPHA_I, c11 515 LD $f6, 2 * SIZE(CO2) 516 MADD $f2, $f2, ALPHA_R, c12 517 LD $f7, 3 * SIZE(CO2) 518 MADD $f3, $f3, ALPHA_I, c12 519 520 MADD $f4, $f4, ALPHA_R, c21 521 ST $f0, 0 * SIZE(CO1) 522 MADD $f5, $f5, ALPHA_I, c21 523 ST $f1, 1 * SIZE(CO1) 524 MADD $f6, $f6, ALPHA_R, c22 525 ST $f2, 2 * SIZE(CO1) 526 MADD $f7, $f7, ALPHA_I, c22 527 ST $f3, 3 * SIZE(CO1) 528 529 LD $f0, 0 * SIZE(CO3) 530 LD $f1, 1 * SIZE(CO3) 531 LD $f2, 2 * SIZE(CO3) 532 LD $f3, 3 * SIZE(CO3) 533 534 ST $f4, 0 * SIZE(CO2) 535 ST $f5, 1 * SIZE(CO2) 536 ST $f6, 2 * SIZE(CO2) 537 ST $f7, 3 * SIZE(CO2) 538 539 LD $f4, 0 * SIZE(CO4) 540 LD $f5, 1 * SIZE(CO4) 541 LD $f6, 2 * SIZE(CO4) 542 LD $f7, 3 * SIZE(CO4) 543 544 MADD $f0, $f0, ALPHA_R, c31 545 MADD $f1, $f1, ALPHA_I, c31 546 MADD $f2, $f2, ALPHA_R, c32 547 MADD $f3, $f3, ALPHA_I, c32 548 549 MADD $f4, $f4, ALPHA_R, c41 550 ST $f0, 0 * SIZE(CO3) 551 MADD $f5, $f5, ALPHA_I, c41 552 ST $f1, 1 * SIZE(CO3) 553 MADD $f6, $f6, ALPHA_R, c42 554 ST $f2, 2 * SIZE(CO3) 555 MADD $f7, $f7, ALPHA_I, c42 556 ST $f3, 3 * SIZE(CO3) 557 558 LD $f0, 0 * SIZE(CO5) 559 LD $f1, 1 * SIZE(CO5) 560 LD $f2, 2 * SIZE(CO5) 561 LD $f3, 3 * SIZE(CO5) 562 563 ST $f4, 0 * SIZE(CO4) 564 ST $f5, 1 * SIZE(CO4) 565 ST $f6, 2 * SIZE(CO4) 566 ST $f7, 3 * SIZE(CO4) 567 568 LD $f4, 0 * SIZE(CO6) 569 LD $f5, 1 * SIZE(CO6) 570 LD $f6, 2 * SIZE(CO6) 571 LD $f7, 3 * SIZE(CO6) 572 573 MADD $f0, $f0, ALPHA_R, c51 574 daddiu CO1,CO1, 4 * SIZE 575 MADD $f1, $f1, ALPHA_I, c51 576 daddiu CO2,CO2, 4 * SIZE 577 MADD $f2, $f2, ALPHA_R, c52 578 daddiu CO3,CO3, 4 * SIZE 579 MADD $f3, $f3, ALPHA_I, c52 580 daddiu CO4,CO4, 4 * SIZE 581 582 MADD $f4, $f4, ALPHA_R, c61 583 ST $f0, 0 * SIZE(CO5) 584 MADD $f5, $f5, ALPHA_I, c61 585 ST $f1, 1 * SIZE(CO5) 586 MADD $f6, $f6, ALPHA_R, c62 587 ST $f2, 2 * SIZE(CO5) 588 MADD $f7, $f7, ALPHA_I, c62 589 ST $f3, 3 * SIZE(CO5) 590 591 LD $f0, 0 * SIZE(CO7) 592 LD $f1, 1 * SIZE(CO7) 593 LD $f2, 2 * SIZE(CO7) 594 LD $f3, 3 * SIZE(CO7) 595 596 ST $f4, 0 * SIZE(CO6) 597 ST $f5, 1 * SIZE(CO6) 598 ST $f6, 2 * SIZE(CO6) 599 ST $f7, 3 * SIZE(CO6) 600 601 LD $f4, 0 * SIZE(CO8) 602 daddiu I, I, -1 603 LD $f5, 1 * SIZE(CO8) 604 MTC $0, c11 605 LD $f6, 2 * SIZE(CO8) 606 LD $f7, 3 * SIZE(CO8) 607 608 MADD $f0, $f0, ALPHA_R, c71 609 daddiu CO5,CO5, 4 * SIZE 610 MADD $f1, $f1, ALPHA_I, c71 611 daddiu CO6,CO6, 4 * SIZE 612 MADD $f2, $f2, ALPHA_R, c72 613 daddiu CO7,CO7, 4 * SIZE 614 MADD $f3, $f3, ALPHA_I, c72 615 daddiu CO8,CO8, 4 * SIZE 616 617 MADD $f4, $f4, ALPHA_R, c81 618 ST $f0, -4 * SIZE(CO7) 619 MADD $f5, $f5, ALPHA_I, c81 620 ST $f1, -3 * SIZE(CO7) 621 MADD $f6, $f6, ALPHA_R, c82 622 ST $f2, -2 * SIZE(CO7) 623 MADD $f7, $f7, ALPHA_I, c82 624 ST $f3, -1 * SIZE(CO7) 625 626 ST $f4, -4 * SIZE(CO8) 627 MOV c21, c11 628 ST $f5, -3 * SIZE(CO8) 629 MOV c31, c11 630 ST $f6, -2 * SIZE(CO8) 631 MOV c41, c11 632 ST $f7, -1 * SIZE(CO8) 633 MOV c51, c11 634 bgtz I, .L11 635 MOV c61, c11 636 .align 3 637 638.L20: 639 andi I, M, 1 640 MOV c61, c11 641 blez I, .L29 642 MOV c71, c11 643 644 LD a1, 0 * SIZE(AO) 645 LD a2, 1 * SIZE(AO) 646 LD a3, 2 * SIZE(AO) 647 LD a4, 3 * SIZE(AO) 648 649 LD b1, 0 * SIZE(B) 650 LD b2, 1 * SIZE(B) 651 LD b3, 2 * SIZE(B) 652 LD b4, 3 * SIZE(B) 653 LD b5, 4 * SIZE(B) 654 LD b6, 8 * SIZE(B) 655 LD b7, 12 * SIZE(B) 656 657 dsra L, K, 2 658 MOV c81, c11 659 660 blez L, .L25 661 move BO, B 662 .align 3 663 664.L22: 665 MADD c11, c11, a1, b1 666 LD b1, 16 * SIZE(BO) 667 MADD c21, c21, a1, b2 668 LD b2, 5 * SIZE(BO) 669 MADD c31, c31, a1, b3 670 LD b3, 6 * SIZE(BO) 671 MADD c41, c41, a1, b4 672 LD b4, 7 * SIZE(BO) 673 674 MADD c51, c51, a1, b5 675 LD b5, 20 * SIZE(BO) 676 MADD c61, c61, a1, b2 677 LD b2, 9 * SIZE(BO) 678 MADD c71, c71, a1, b3 679 LD b3, 10 * SIZE(BO) 680 MADD c81, c81, a1, b4 681 LD b4, 11 * SIZE(BO) 682 683 LD a1, 4 * SIZE(AO) 684 daddiu L, L, -1 685 686 MADD c11, c11, a2, b6 687 LD b6, 24 * SIZE(BO) 688 MADD c21, c21, a2, b2 689 LD b2, 13 * SIZE(BO) 690 MADD c31, c31, a2, b3 691 LD b3, 14 * SIZE(BO) 692 MADD c41, c41, a2, b4 693 LD b4, 15 * SIZE(BO) 694 695 MADD c51, c51, a2, b7 696 LD b7, 28 * SIZE(BO) 697 MADD c61, c61, a2, b2 698 LD b2, 17 * SIZE(BO) 699 MADD c71, c71, a2, b3 700 LD b3, 18 * SIZE(BO) 701 MADD c81, c81, a2, b4 702 LD b4, 19 * SIZE(BO) 703 704 LD a2, 5 * SIZE(AO) 705 daddiu AO, AO, 4 * SIZE 706 707 MADD c11, c11, a3, b1 708 LD b1, 32 * SIZE(BO) 709 MADD c21, c21, a3, b2 710 LD b2, 21 * SIZE(BO) 711 MADD c31, c31, a3, b3 712 LD b3, 22 * SIZE(BO) 713 MADD c41, c41, a3, b4 714 LD b4, 23 * SIZE(BO) 715 716 MADD c51, c51, a3, b5 717 LD b5, 36 * SIZE(BO) 718 MADD c61, c61, a3, b2 719 LD b2, 25 * SIZE(BO) 720 MADD c71, c71, a3, b3 721 LD b3, 26 * SIZE(BO) 722 MADD c81, c81, a3, b4 723 LD b4, 27 * SIZE(BO) 724 725 LD a3, 2 * SIZE(AO) 726 daddiu BO, BO, 32 * SIZE 727 728 MADD c11, c11, a4, b6 729 LD b6, 8 * SIZE(BO) 730 MADD c21, c21, a4, b2 731 LD b2, -3 * SIZE(BO) 732 MADD c31, c31, a4, b3 733 LD b3, -2 * SIZE(BO) 734 MADD c41, c41, a4, b4 735 LD b4, -1 * SIZE(BO) 736 737 MADD c51, c51, a4, b7 738 LD b7, 12 * SIZE(BO) 739 MADD c61, c61, a4, b2 740 LD b2, 1 * SIZE(BO) 741 MADD c71, c71, a4, b3 742 LD b3, 2 * SIZE(BO) 743 MADD c81, c81, a4, b4 744 LD b4, 3 * SIZE(BO) 745 bgtz L, .L22 746 LD a4, 3 * SIZE(AO) 747 .align 3 748 749.L25: 750 andi L, K, 3 751 NOP 752 blez L, .L28 753 NOP 754 .align 3 755 756.L26: 757 MADD c11, c11, a1, b1 758 LD b1, 8 * SIZE(BO) 759 MADD c21, c21, a1, b2 760 LD b2, 5 * SIZE(BO) 761 MADD c31, c31, a1, b3 762 LD b3, 6 * SIZE(BO) 763 MADD c41, c41, a1, b4 764 LD b4, 7 * SIZE(BO) 765 766 daddiu L, L, -1 767 MOV a2, a2 768 daddiu AO, AO, 1 * SIZE 769 daddiu BO, BO, 8 * SIZE 770 771 MADD c51, c51, a1, b5 772 LD b5, 4 * SIZE(BO) 773 MADD c61, c61, a1, b2 774 LD b2, 1 * SIZE(BO) 775 MADD c71, c71, a1, b3 776 LD b3, 2 * SIZE(BO) 777 MADD c81, c81, a1, b4 778 LD a1, 0 * SIZE(AO) 779 780 bgtz L, .L26 781 LD b4, 3 * SIZE(BO) 782 783.L28: 784 LD $f0, 0 * SIZE(CO1) 785 LD $f1, 1 * SIZE(CO1) 786 LD $f2, 0 * SIZE(CO2) 787 LD $f3, 1 * SIZE(CO2) 788 789 LD $f4, 0 * SIZE(CO3) 790 MADD $f0, $f0, ALPHA_R, c11 791 LD $f5, 1 * SIZE(CO3) 792 MADD $f1, $f1, ALPHA_I, c11 793 LD $f6, 0 * SIZE(CO4) 794 MADD $f2, $f2, ALPHA_R, c21 795 LD $f7, 1 * SIZE(CO4) 796 MADD $f3, $f3, ALPHA_I, c21 797 798 MADD $f4, $f4, ALPHA_R, c31 799 ST $f0, 0 * SIZE(CO1) 800 MADD $f5, $f5, ALPHA_I, c31 801 ST $f1, 1 * SIZE(CO1) 802 MADD $f6, $f6, ALPHA_R, c41 803 ST $f2, 0 * SIZE(CO2) 804 MADD $f7, $f7, ALPHA_I, c41 805 ST $f3, 1 * SIZE(CO2) 806 807 LD $f0, 0 * SIZE(CO5) 808 LD $f1, 1 * SIZE(CO5) 809 LD $f2, 0 * SIZE(CO6) 810 LD $f3, 1 * SIZE(CO6) 811 812 ST $f4, 0 * SIZE(CO3) 813 ST $f5, 1 * SIZE(CO3) 814 ST $f6, 0 * SIZE(CO4) 815 ST $f7, 1 * SIZE(CO4) 816 817 LD $f4, 0 * SIZE(CO7) 818 MADD $f0, $f0, ALPHA_R, c51 819 LD $f5, 1 * SIZE(CO7) 820 MADD $f1, $f1, ALPHA_I, c51 821 LD $f6, 0 * SIZE(CO8) 822 MADD $f2, $f2, ALPHA_R, c61 823 LD $f7, 1 * SIZE(CO8) 824 MADD $f3, $f3, ALPHA_I, c61 825 826 MADD $f4, $f4, ALPHA_R, c71 827 ST $f0, 0 * SIZE(CO5) 828 MADD $f5, $f5, ALPHA_I, c71 829 ST $f1, 1 * SIZE(CO5) 830 MADD $f6, $f6, ALPHA_R, c81 831 ST $f2, 0 * SIZE(CO6) 832 MADD $f7, $f7, ALPHA_I, c81 833 ST $f3, 1 * SIZE(CO6) 834 835 ST $f4, 0 * SIZE(CO7) 836 ST $f5, 1 * SIZE(CO7) 837 ST $f6, 0 * SIZE(CO8) 838 ST $f7, 1 * SIZE(CO8) 839 .align 3 840 841.L29: 842 bgtz J, .L10 843 move B, BO 844 .align 3 845 846.L30: 847 andi J, N, 4 848 blez J, .L50 849 move AO, A 850 851 move CO1, C 852 MTC $0, c11 853 daddu CO2, C, LDC 854 daddu CO3, CO2, LDC 855 daddu CO4, CO3, LDC 856 MOV c21, c11 857 daddu C, CO4, LDC 858 MOV c31, c11 859 860 dsra I, M, 1 861 blez I, .L40 862 MOV c41, c11 863 864.L31: 865 LD a1, 0 * SIZE(AO) 866 LD a3, 4 * SIZE(AO) 867 868 LD b1, 0 * SIZE(B) 869 MOV c12, c11 870 LD b2, 1 * SIZE(B) 871 MOV c22, c11 872 LD b3, 2 * SIZE(B) 873 MOV c32, c11 874 LD b4, 3 * SIZE(B) 875 MOV c42, c11 876 877 LD b5, 4 * SIZE(B) 878 dsra L, K, 2 879 LD b6, 8 * SIZE(B) 880 LD b7, 12 * SIZE(B) 881 882 blez L, .L35 883 move BO, B 884 .align 3 885 886.L32: 887 MADD c11, c11, a1, b1 888 LD a2, 1 * SIZE(AO) 889 MADD c21, c21, a1, b2 890 daddiu L, L, -1 891 MADD c31, c31, a1, b3 892 NOP 893 MADD c41, c41, a1, b4 894 LD a1, 2 * SIZE(AO) 895 896 MADD c12, c12, a2, b1 897 LD b1, 16 * SIZE(BO) 898 MADD c22, c22, a2, b2 899 LD b2, 5 * SIZE(BO) 900 MADD c32, c32, a2, b3 901 LD b3, 6 * SIZE(BO) 902 MADD c42, c42, a2, b4 903 LD b4, 7 * SIZE(BO) 904 905 MADD c11, c11, a1, b5 906 LD a2, 3 * SIZE(AO) 907 MADD c21, c21, a1, b2 908 NOP 909 MADD c31, c31, a1, b3 910 NOP 911 MADD c41, c41, a1, b4 912 LD a1, 8 * SIZE(AO) 913 914 MADD c12, c12, a2, b5 915 LD b5, 20 * SIZE(BO) 916 MADD c22, c22, a2, b2 917 LD b2, 9 * SIZE(BO) 918 MADD c32, c32, a2, b3 919 LD b3, 10 * SIZE(BO) 920 MADD c42, c42, a2, b4 921 LD b4, 11 * SIZE(BO) 922 923 MADD c11, c11, a3, b6 924 LD a2, 5 * SIZE(AO) 925 MADD c21, c21, a3, b2 926 NOP 927 MADD c31, c31, a3, b3 928 NOP 929 MADD c41, c41, a3, b4 930 LD a3, 6 * SIZE(AO) 931 932 MADD c12, c12, a2, b6 933 LD b6, 24 * SIZE(BO) 934 MADD c22, c22, a2, b2 935 LD b2, 13 * SIZE(BO) 936 MADD c32, c32, a2, b3 937 LD b3, 14 * SIZE(BO) 938 MADD c42, c42, a2, b4 939 LD b4, 15 * SIZE(BO) 940 941 MADD c11, c11, a3, b7 942 LD a2, 7 * SIZE(AO) 943 MADD c21, c21, a3, b2 944 daddiu AO, AO, 8 * SIZE 945 MADD c31, c31, a3, b3 946 daddiu BO, BO, 16 * SIZE 947 MADD c41, c41, a3, b4 948 LD a3, 4 * SIZE(AO) 949 950 MADD c12, c12, a2, b7 951 LD b7, 12 * SIZE(BO) 952 MADD c22, c22, a2, b2 953 LD b2, 1 * SIZE(BO) 954 MADD c32, c32, a2, b3 955 LD b3, 2 * SIZE(BO) 956 MADD c42, c42, a2, b4 957 NOP 958 959 bgtz L, .L32 960 LD b4, 3 * SIZE(BO) 961 .align 3 962 963.L35: 964 andi L, K, 3 965 NOP 966 blez L, .L38 967 NOP 968 .align 3 969 970.L36: 971 MADD c11, c11, a1, b1 972 LD a2, 1 * SIZE(AO) 973 MADD c21, c21, a1, b2 974 daddiu L, L, -1 975 MADD c31, c31, a1, b3 976 daddiu AO, AO, 2 * SIZE 977 MADD c41, c41, a1, b4 978 LD a1, 0 * SIZE(AO) 979 980 MADD c12, c12, a2, b1 981 LD b1, 4 * SIZE(BO) 982 MADD c22, c22, a2, b2 983 LD b2, 5 * SIZE(BO) 984 MADD c32, c32, a2, b3 985 LD b3, 6 * SIZE(BO) 986 MADD c42, c42, a2, b4 987 LD b4, 7 * SIZE(BO) 988 989 bgtz L, .L36 990 daddiu BO, BO, 4 * SIZE 991 992.L38: 993 LD $f0, 0 * SIZE(CO1) 994 LD $f1, 1 * SIZE(CO1) 995 LD $f2, 2 * SIZE(CO1) 996 LD $f3, 3 * SIZE(CO1) 997 998 LD $f4, 0 * SIZE(CO2) 999 LD $f5, 1 * SIZE(CO2) 1000 LD $f6, 2 * SIZE(CO2) 1001 LD $f7, 3 * SIZE(CO2) 1002 1003 MADD $f0, $f0, ALPHA_R, c11 1004 MADD $f1, $f1, ALPHA_I, c11 1005 MADD $f2, $f2, ALPHA_R, c12 1006 MADD $f3, $f3, ALPHA_I, c12 1007 1008 MADD $f4, $f4, ALPHA_R, c21 1009 ST $f0, 0 * SIZE(CO1) 1010 MADD $f5, $f5, ALPHA_I, c21 1011 ST $f1, 1 * SIZE(CO1) 1012 MADD $f6, $f6, ALPHA_R, c22 1013 ST $f2, 2 * SIZE(CO1) 1014 MADD $f7, $f7, ALPHA_I, c22 1015 ST $f3, 3 * SIZE(CO1) 1016 1017 LD $f0, 0 * SIZE(CO3) 1018 LD $f1, 1 * SIZE(CO3) 1019 LD $f2, 2 * SIZE(CO3) 1020 LD $f3, 3 * SIZE(CO3) 1021 1022 ST $f4, 0 * SIZE(CO2) 1023 MADD $f0, $f0, ALPHA_R, c31 1024 ST $f5, 1 * SIZE(CO2) 1025 MADD $f1, $f1, ALPHA_I, c31 1026 ST $f6, 2 * SIZE(CO2) 1027 MADD $f2, $f2, ALPHA_R, c32 1028 ST $f7, 3 * SIZE(CO2) 1029 MADD $f3, $f3, ALPHA_I, c32 1030 1031 LD $f4, 0 * SIZE(CO4) 1032 LD $f5, 1 * SIZE(CO4) 1033 LD $f6, 2 * SIZE(CO4) 1034 LD $f7, 3 * SIZE(CO4) 1035 1036 MADD $f4, $f4, ALPHA_R, c41 1037 daddiu CO1,CO1, 4 * SIZE 1038 MADD $f5, $f5, ALPHA_I, c41 1039 daddiu CO2,CO2, 4 * SIZE 1040 MADD $f6, $f6, ALPHA_R, c42 1041 daddiu CO3,CO3, 4 * SIZE 1042 MADD $f7, $f7, ALPHA_I, c42 1043 daddiu CO4,CO4, 4 * SIZE 1044 1045 ST $f0, -4 * SIZE(CO3) 1046 daddiu I, I, -1 1047 ST $f1, -3 * SIZE(CO3) 1048 ST $f2, -2 * SIZE(CO3) 1049 ST $f3, -1 * SIZE(CO3) 1050 1051 ST $f4, -4 * SIZE(CO4) 1052 MTC $0, c11 1053 ST $f5, -3 * SIZE(CO4) 1054 MOV c21, c11 1055 ST $f6, -2 * SIZE(CO4) 1056 MOV c31, c11 1057 ST $f7, -1 * SIZE(CO4) 1058 bgtz I, .L31 1059 MOV c41, c11 1060 .align 3 1061 1062.L40: 1063 andi I, M, 1 1064 blez I, .L49 1065 MOV c61, c11 1066 1067 LD a1, 0 * SIZE(AO) 1068 MOV c71, c11 1069 LD a2, 1 * SIZE(AO) 1070 MOV c81, c11 1071 1072 LD b1, 0 * SIZE(B) 1073 LD b2, 1 * SIZE(B) 1074 LD b3, 2 * SIZE(B) 1075 LD b4, 3 * SIZE(B) 1076 LD b5, 4 * SIZE(B) 1077 LD b6, 8 * SIZE(B) 1078 LD b7, 12 * SIZE(B) 1079 1080 dsra L, K, 2 1081 1082 blez L, .L45 1083 move BO, B 1084 .align 3 1085 1086.L42: 1087 MADD c11, c11, a1, b1 1088 LD b1, 16 * SIZE(BO) 1089 MADD c21, c21, a1, b2 1090 LD b2, 5 * SIZE(BO) 1091 MADD c31, c31, a1, b3 1092 LD b3, 6 * SIZE(BO) 1093 MADD c41, c41, a1, b4 1094 LD b4, 7 * SIZE(BO) 1095 1096 LD a1, 4 * SIZE(AO) 1097 daddiu L, L, -1 1098 1099 MADD c11, c11, a2, b5 1100 LD b5, 20 * SIZE(BO) 1101 MADD c21, c21, a2, b2 1102 LD b2, 9 * SIZE(BO) 1103 MADD c31, c31, a2, b3 1104 LD b3, 10 * SIZE(BO) 1105 MADD c41, c41, a2, b4 1106 LD b4, 11 * SIZE(BO) 1107 1108 LD a2, 2 * SIZE(AO) 1109 daddiu AO, AO, 4 * SIZE 1110 1111 MADD c11, c11, a2, b6 1112 LD b6, 24 * SIZE(BO) 1113 MADD c21, c21, a2, b2 1114 LD b2, 13 * SIZE(BO) 1115 MADD c31, c31, a2, b3 1116 LD b3, 14 * SIZE(BO) 1117 MADD c41, c41, a2, b4 1118 LD b4, 15 * SIZE(BO) 1119 1120 LD a2, -1 * SIZE(AO) 1121 daddiu BO, BO, 16 * SIZE 1122 1123 MADD c11, c11, a2, b7 1124 LD b7, 12 * SIZE(BO) 1125 MADD c21, c21, a2, b2 1126 LD b2, 1 * SIZE(BO) 1127 MADD c31, c31, a2, b3 1128 LD b3, 2 * SIZE(BO) 1129 MADD c41, c41, a2, b4 1130 LD b4, 3 * SIZE(BO) 1131 1132 bgtz L, .L42 1133 LD a2, 1 * SIZE(AO) 1134 .align 3 1135 1136.L45: 1137 andi L, K, 3 1138 NOP 1139 blez L, .L48 1140 NOP 1141 .align 3 1142 1143.L46: 1144 MADD c11, c11, a1, b1 1145 LD b1, 4 * SIZE(BO) 1146 MADD c21, c21, a1, b2 1147 LD b2, 5 * SIZE(BO) 1148 MADD c31, c31, a1, b3 1149 LD b3, 6 * SIZE(BO) 1150 MADD c41, c41, a1, b4 1151 LD a1, 1 * SIZE(AO) 1152 1153 LD b4, 7 * SIZE(BO) 1154 daddiu L, L, -1 1155 1156 daddiu AO, AO, 1 * SIZE 1157 MOV a2, a2 1158 bgtz L, .L46 1159 daddiu BO, BO, 4 * SIZE 1160 1161 1162.L48: 1163 LD $f0, 0 * SIZE(CO1) 1164 LD $f1, 1 * SIZE(CO1) 1165 LD $f2, 0 * SIZE(CO2) 1166 LD $f3, 1 * SIZE(CO2) 1167 1168 LD $f4, 0 * SIZE(CO3) 1169 MADD $f0, $f0, ALPHA_R, c11 1170 LD $f5, 1 * SIZE(CO3) 1171 MADD $f1, $f1, ALPHA_I, c11 1172 LD $f6, 0 * SIZE(CO4) 1173 MADD $f2, $f2, ALPHA_R, c21 1174 LD $f7, 1 * SIZE(CO4) 1175 MADD $f3, $f3, ALPHA_I, c21 1176 1177 MADD $f4, $f4, ALPHA_R, c31 1178 ST $f0, 0 * SIZE(CO1) 1179 MADD $f5, $f5, ALPHA_I, c31 1180 ST $f1, 1 * SIZE(CO1) 1181 MADD $f6, $f6, ALPHA_R, c41 1182 ST $f2, 0 * SIZE(CO2) 1183 MADD $f7, $f7, ALPHA_I, c41 1184 ST $f3, 1 * SIZE(CO2) 1185 1186 ST $f4, 0 * SIZE(CO3) 1187 ST $f5, 1 * SIZE(CO3) 1188 ST $f6, 0 * SIZE(CO4) 1189 ST $f7, 1 * SIZE(CO4) 1190 .align 3 1191 1192.L49: 1193 move B, BO 1194 .align 3 1195 1196.L50: 1197 andi J, N, 2 1198 blez J, .L70 1199 1200 move AO, A 1201 move CO1, C 1202 daddu CO2, C, LDC 1203 1204 dsra I, M, 1 1205 blez I, .L60 1206 daddu C, CO2, LDC 1207 1208.L51: 1209 LD a1, 0 * SIZE(AO) 1210 MTC $0, c11 1211 LD a2, 1 * SIZE(AO) 1212 MOV c21, c11 1213 LD a5, 4 * SIZE(AO) 1214 1215 LD b1, 0 * SIZE(B) 1216 MOV c12, c11 1217 LD b2, 1 * SIZE(B) 1218 MOV c22, c11 1219 LD b3, 2 * SIZE(B) 1220 LD b5, 4 * SIZE(B) 1221 dsra L, K, 2 1222 LD b6, 8 * SIZE(B) 1223 LD b7, 12 * SIZE(B) 1224 1225 blez L, .L55 1226 move BO, B 1227 .align 3 1228 1229.L52: 1230 MADD c11, c11, a1, b1 1231 LD a3, 2 * SIZE(AO) 1232 MADD c21, c21, a1, b2 1233 LD b4, 3 * SIZE(BO) 1234 MADD c12, c12, a2, b1 1235 LD a4, 3 * SIZE(AO) 1236 MADD c22, c22, a2, b2 1237 LD b1, 8 * SIZE(BO) 1238 1239 MADD c11, c11, a3, b3 1240 LD a1, 8 * SIZE(AO) 1241 MADD c21, c21, a3, b4 1242 LD b2, 5 * SIZE(BO) 1243 MADD c12, c12, a4, b3 1244 LD a2, 5 * SIZE(AO) 1245 MADD c22, c22, a4, b4 1246 LD b3, 6 * SIZE(BO) 1247 1248 MADD c11, c11, a5, b5 1249 LD a3, 6 * SIZE(AO) 1250 MADD c21, c21, a5, b2 1251 LD b4, 7 * SIZE(BO) 1252 MADD c12, c12, a2, b5 1253 LD a4, 7 * SIZE(AO) 1254 MADD c22, c22, a2, b2 1255 LD b5, 12 * SIZE(BO) 1256 1257 MADD c11, c11, a3, b3 1258 LD a5, 12 * SIZE(AO) 1259 MADD c21, c21, a3, b4 1260 LD b2, 9 * SIZE(BO) 1261 MADD c12, c12, a4, b3 1262 LD a2, 9 * SIZE(AO) 1263 MADD c22, c22, a4, b4 1264 LD b3, 10 * SIZE(BO) 1265 1266 daddiu AO, AO, 8 * SIZE 1267 daddiu L, L, -1 1268 bgtz L, .L52 1269 daddiu BO, BO, 8 * SIZE 1270 .align 3 1271 1272.L55: 1273 andi L, K, 3 1274 NOP 1275 blez L, .L58 1276 NOP 1277 .align 3 1278 1279.L56: 1280 MADD c11, c11, a1, b1 1281 LD a2, 1 * SIZE(AO) 1282 MADD c21, c21, a1, b2 1283 LD a1, 2 * SIZE(AO) 1284 1285 MADD c12, c12, a2, b1 1286 LD b1, 2 * SIZE(BO) 1287 MADD c22, c22, a2, b2 1288 LD b2, 3 * SIZE(BO) 1289 1290 daddiu L, L, -1 1291 daddiu AO, AO, 2 * SIZE 1292 bgtz L, .L56 1293 daddiu BO, BO, 2 * SIZE 1294 1295.L58: 1296 LD $f0, 0 * SIZE(CO1) 1297 LD $f1, 1 * SIZE(CO1) 1298 LD $f2, 2 * SIZE(CO1) 1299 LD $f3, 3 * SIZE(CO1) 1300 1301 LD $f4, 0 * SIZE(CO2) 1302 LD $f5, 1 * SIZE(CO2) 1303 LD $f6, 2 * SIZE(CO2) 1304 LD $f7, 3 * SIZE(CO2) 1305 1306 MADD $f0, $f0, ALPHA_R, c11 1307 daddiu I, I, -1 1308 MADD $f1, $f1, ALPHA_I, c11 1309 daddiu CO1,CO1, 4 * SIZE 1310 MADD $f2, $f2, ALPHA_R, c12 1311 daddiu CO2,CO2, 4 * SIZE 1312 MADD $f3, $f3, ALPHA_I, c12 1313 MADD $f4, $f4, ALPHA_R, c21 1314 MADD $f5, $f5, ALPHA_I, c21 1315 MADD $f6, $f6, ALPHA_R, c22 1316 MADD $f7, $f7, ALPHA_I, c22 1317 1318 ST $f0, -4 * SIZE(CO1) 1319 ST $f1, -3 * SIZE(CO1) 1320 ST $f2, -2 * SIZE(CO1) 1321 ST $f3, -1 * SIZE(CO1) 1322 1323 ST $f4, -4 * SIZE(CO2) 1324 ST $f5, -3 * SIZE(CO2) 1325 ST $f6, -2 * SIZE(CO2) 1326 bgtz I, .L51 1327 ST $f7, -1 * SIZE(CO2) 1328 .align 3 1329 1330.L60: 1331 andi I, M, 1 1332 blez I, .L69 1333 NOP 1334 1335 dsra L, K, 2 1336 LD a1, 0 * SIZE(AO) 1337 MTC $0, c11 1338 LD a2, 1 * SIZE(AO) 1339 MOV c21, c11 1340 LD a3, 2 * SIZE(AO) 1341 MOV c31, c11 1342 LD a4, 3 * SIZE(AO) 1343 MOV c41, c11 1344 1345 LD b1, 0 * SIZE(B) 1346 LD b2, 1 * SIZE(B) 1347 LD b3, 2 * SIZE(B) 1348 LD b4, 3 * SIZE(B) 1349 LD b5, 4 * SIZE(B) 1350 LD b6, 8 * SIZE(B) 1351 LD b7, 12 * SIZE(B) 1352 1353 blez L, .L65 1354 move BO, B 1355 .align 3 1356 1357.L62: 1358 MADD c11, c11, a1, b1 1359 LD b1, 4 * SIZE(BO) 1360 MADD c21, c21, a1, b2 1361 LD b2, 5 * SIZE(BO) 1362 MADD c31, c31, a2, b3 1363 LD b3, 6 * SIZE(BO) 1364 MADD c41, c41, a2, b4 1365 LD b4, 7 * SIZE(BO) 1366 1367 LD a1, 4 * SIZE(AO) 1368 LD a2, 5 * SIZE(AO) 1369 1370 MADD c11, c11, a3, b1 1371 LD b1, 8 * SIZE(BO) 1372 MADD c21, c21, a3, b2 1373 LD b2, 9 * SIZE(BO) 1374 MADD c31, c31, a4, b3 1375 LD b3, 10 * SIZE(BO) 1376 MADD c41, c41, a4, b4 1377 LD b4, 11 * SIZE(BO) 1378 1379 LD a3, 6 * SIZE(AO) 1380 LD a4, 7 * SIZE(AO) 1381 1382 daddiu L, L, -1 1383 daddiu AO, AO, 4 * SIZE 1384 1385 bgtz L, .L62 1386 daddiu BO, BO, 8 * SIZE 1387 .align 3 1388 1389.L65: 1390 andi L, K, 3 1391 NOP 1392 blez L, .L68 1393 NOP 1394 .align 3 1395 1396.L66: 1397 MADD c11, c11, a1, b1 1398 LD b1, 2 * SIZE(BO) 1399 MADD c21, c21, a1, b2 1400 LD b2, 3 * SIZE(BO) 1401 1402 LD a1, 1 * SIZE(AO) 1403 daddiu L, L, -1 1404 1405 daddiu AO, AO, 1 * SIZE 1406 bgtz L, .L66 1407 daddiu BO, BO, 2 * SIZE 1408 1409 1410.L68: 1411 LD $f0, 0 * SIZE(CO1) 1412 LD $f1, 1 * SIZE(CO1) 1413 LD $f2, 0 * SIZE(CO2) 1414 LD $f3, 1 * SIZE(CO2) 1415 1416 ADD c11, c11, c31 1417 ADD c21, c21, c41 1418 1419 MADD $f0, $f0, ALPHA_R, c11 1420 MADD $f1, $f1, ALPHA_I, c11 1421 MADD $f2, $f2, ALPHA_R, c21 1422 MADD $f3, $f3, ALPHA_I, c21 1423 1424 ST $f0, 0 * SIZE(CO1) 1425 ST $f1, 1 * SIZE(CO1) 1426 ST $f2, 0 * SIZE(CO2) 1427 ST $f3, 1 * SIZE(CO2) 1428 .align 3 1429 1430.L69: 1431 move B, BO 1432 .align 3 1433 1434.L70: 1435 andi J, N, 1 1436 blez J, .L999 1437 1438 move AO, A 1439 move CO1, C 1440 1441 dsra I, M, 1 1442 blez I, .L80 1443 daddu C, CO1, LDC 1444 1445.L71: 1446 LD a1, 0 * SIZE(AO) 1447 MTC $0, c11 1448 LD a2, 1 * SIZE(AO) 1449 MOV c21, c11 1450 LD a5, 4 * SIZE(AO) 1451 1452 LD b1, 0 * SIZE(B) 1453 MOV c12, c11 1454 LD b2, 1 * SIZE(B) 1455 MOV c22, c11 1456 LD b3, 2 * SIZE(B) 1457 LD b5, 4 * SIZE(B) 1458 dsra L, K, 2 1459 LD b6, 8 * SIZE(B) 1460 LD b7, 12 * SIZE(B) 1461 1462 blez L, .L75 1463 move BO, B 1464 .align 3 1465 1466.L72: 1467 LD a1, 0 * SIZE(AO) 1468 LD a2, 1 * SIZE(AO) 1469 LD b1, 0 * SIZE(BO) 1470 1471 MADD c11, c11, a1, b1 1472 MADD c12, c12, a2, b1 1473 1474 LD a1, 2 * SIZE(AO) 1475 LD a2, 3 * SIZE(AO) 1476 LD b1, 1 * SIZE(BO) 1477 1478 MADD c11, c11, a1, b1 1479 MADD c12, c12, a2, b1 1480 1481 LD a1, 4 * SIZE(AO) 1482 LD a2, 5 * SIZE(AO) 1483 LD b1, 2 * SIZE(BO) 1484 1485 MADD c11, c11, a1, b1 1486 MADD c12, c12, a2, b1 1487 1488 LD a1, 6 * SIZE(AO) 1489 LD a2, 7 * SIZE(AO) 1490 LD b1, 3 * SIZE(BO) 1491 1492 MADD c11, c11, a1, b1 1493 MADD c12, c12, a2, b1 1494 1495 daddiu L, L, -1 1496 daddiu AO, AO, 8 * SIZE 1497 bgtz L, .L72 1498 daddiu BO, BO, 4 * SIZE 1499 .align 3 1500 1501.L75: 1502 andi L, K, 3 1503 NOP 1504 blez L, .L78 1505 NOP 1506 .align 3 1507 1508.L76: 1509 LD a1, 0 * SIZE(AO) 1510 LD a2, 1 * SIZE(AO) 1511 LD b1, 0 * SIZE(BO) 1512 1513 MADD c11, c11, a1, b1 1514 MADD c12, c12, a2, b1 1515 1516 daddiu L, L, -1 1517 daddiu AO, AO, 2 * SIZE 1518 bgtz L, .L76 1519 daddiu BO, BO, 1 * SIZE 1520 1521.L78: 1522 LD $f0, 0 * SIZE(CO1) 1523 LD $f1, 1 * SIZE(CO1) 1524 LD $f2, 2 * SIZE(CO1) 1525 LD $f3, 3 * SIZE(CO1) 1526 1527 ADD c11, c11, c21 1528 daddiu I, I, -1 1529 ADD c12, c12, c22 1530 daddiu CO1,CO1, 4 * SIZE 1531 1532 MADD $f0, $f0, ALPHA_R, c11 1533 MADD $f1, $f1, ALPHA_I, c11 1534 MADD $f2, $f2, ALPHA_R, c12 1535 MADD $f3, $f3, ALPHA_I, c12 1536 1537 ST $f0, -4 * SIZE(CO1) 1538 ST $f1, -3 * SIZE(CO1) 1539 ST $f2, -2 * SIZE(CO1) 1540 1541 bgtz I, .L71 1542 ST $f3, -1 * SIZE(CO1) 1543 .align 3 1544 1545.L80: 1546 andi I, M, 1 1547 blez I, .L89 1548 NOP 1549 1550 LD a1, 0 * SIZE(AO) 1551 MTC $0, c11 1552 LD a2, 1 * SIZE(AO) 1553 MOV c21, c11 1554 LD a3, 2 * SIZE(AO) 1555 LD a4, 3 * SIZE(AO) 1556 1557 LD b1, 0 * SIZE(B) 1558 LD b2, 1 * SIZE(B) 1559 LD b3, 2 * SIZE(B) 1560 LD b4, 3 * SIZE(B) 1561 LD b5, 4 * SIZE(B) 1562 LD b6, 8 * SIZE(B) 1563 LD b7, 12 * SIZE(B) 1564 1565 dsra L, K, 2 1566 blez L, .L85 1567 move BO, B 1568 .align 3 1569 1570.L82: 1571 LD a1, 0 * SIZE(AO) 1572 LD b1, 0 * SIZE(BO) 1573 1574 MADD c11, c11, a1, b1 1575 1576 LD a1, 1 * SIZE(AO) 1577 LD b1, 1 * SIZE(BO) 1578 1579 MADD c21, c21, a1, b1 1580 1581 LD a1, 2 * SIZE(AO) 1582 LD b1, 2 * SIZE(BO) 1583 1584 MADD c11, c11, a1, b1 1585 1586 LD a1, 3 * SIZE(AO) 1587 LD b1, 3 * SIZE(BO) 1588 1589 MADD c21, c21, a1, b1 1590 1591 daddiu L, L, -1 1592 daddiu AO, AO, 4 * SIZE 1593 bgtz L, .L82 1594 daddiu BO, BO, 4 * SIZE 1595 .align 3 1596 1597.L85: 1598 andi L, K, 3 1599 NOP 1600 blez L, .L88 1601 NOP 1602 .align 3 1603 1604.L86: 1605 LD a1, 0 * SIZE(AO) 1606 LD b1, 0 * SIZE(BO) 1607 1608 MADD c11, c11, a1, b1 1609 1610 daddiu L, L, -1 1611 daddiu AO, AO, 1 * SIZE 1612 bgtz L, .L86 1613 daddiu BO, BO, 1 * SIZE 1614 1615 1616.L88: 1617 LD $f0, 0 * SIZE(CO1) 1618 LD $f1, 1 * SIZE(CO1) 1619 1620 ADD c11, c11, c21 1621 MADD $f0, $f0, ALPHA_R, c11 1622 MADD $f1, $f1, ALPHA_I, c11 1623 1624 ST $f0, 0 * SIZE(CO1) 1625 ST $f1, 1 * SIZE(CO1) 1626 .align 3 1627 1628.L89: 1629 move B, BO 1630 .align 3 1631 1632.L999: 1633 LDARG $16, 0($sp) 1634 LDARG $17, 8($sp) 1635 LDARG $18, 16($sp) 1636 LDARG $19, 24($sp) 1637 LDARG $20, 32($sp) 1638 LDARG $21, 40($sp) 1639 ldc1 $f24, 48($sp) 1640 ldc1 $f25, 56($sp) 1641 ldc1 $f26, 64($sp) 1642 ldc1 $f27, 72($sp) 1643 ldc1 $f28, 80($sp) 1644 ldc1 $f29, 88($sp) 1645 1646 j $31 1647 daddiu $sp, $sp, 128 1648 1649 EPILOGUE 1650