1##define REALNAME gemm 2#define ASSEMBLER 3#include "common.h" 4 5#define FETCH ld 6#define STACKSIZE 192 7#define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) 8#define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) 9 10 11##### Parameter registers #### 12#define M $4 13#define N $5 14#define K $6 15#define A $9 16#define B $10 17#define C $11 18#define LDC $8 19 20#### Pointer A, B, C #### 21#define AO $12 22#define BO $13 23 24#define CO1 $14 25#define CO2 $15 26 27#define PREA $18 28#define PREB $19 29 30#### Used registers #### 31#define A1 $f0 32#define A2 $f1 33#define A3 $f2 34#define A4 $f3 35#define A5 $f4 36#define A6 $f5 37#define A7 $f6 38#define A8 $f7 39 40#define B1 $f8 41#define B2 $f9 42#define B3 $f10 43#define B4 $f11 44#define B5 $f12 45#define B6 $f13 46#define B7 $f14 47#define B8 $f15 48 49#define C11 $f16 50#define C12 $f17 51#define C21 $f18 52#define C22 $f19 53#define C31 $f20 54#define C32 $f21 55#define C41 $f22 56#define C42 $f23 57#define C13 $f24 58#define C14 $f25 59#define C23 $f26 60#define C24 $f27 61#define C33 $f28 62#define C34 $f29 63#define C43 $f30 64#define C44 $f31 65 66#define I $2 67#define J $3 68#define L $7 69 70#### Alpha register #### 71#define ALPHA $f15 72 73#define F31 31 74#define F30 30 75#define F29 29 76#define F28 28 77#define F27 27 78#define F26 26 79#define F25 25 80#define F24 24 81#define F23 23 82#define F22 22 83#define F21 21 84#define F20 20 85#define F19 19 86#define F18 18 87#define F17 17 88#define F16 16 89#define F15 15 90#define F14 14 91#define F13 13 92#define F12 12 93#define F11 11 94#define F10 10 95#define F9 9 96#define F8 8 97#define F7 7 98#define F6 6 99#define F5 5 100#define F4 4 101#define F3 3 102#define F2 2 103#define F1 1 104#define F0 0 105 106#define R12 12 107#define R13 13 108 109#define R14 14 110#define R15 15 111#define R16 16 112#define R17 17 113 114#if defined(TRMMKERNEL) 115#define OFFSET $23 116#define KK $24 117#define TEMP $25 118#endif 119 120 121 PROLOGUE 122 123 LDARG LDC, 0($sp) 124 daddiu $sp,$sp,-STACKSIZE 125 126 sd $16, 0($sp) 127 sd $17, 8($sp) 128 sd $18, 16($sp) 129 sd $19, 24($sp) 130 sd $20, 32($sp) 131 sd $21, 40($sp) 132 sd $22, 48($sp) 133 134 ST $f24, 56($sp) 135 ST $f25, 64($sp) 136 ST $f26, 72($sp) 137 ST $f27, 80($sp) 138 ST $f28, 88($sp) 139 140#if defined(TRMMKERNEL) 141 sd $23, 96($sp) 142 sd $24, 104($sp) 143 sd $25, 112($sp) 144 145 LDARG OFFSET, STACKSIZE+8($sp) 146#endif 147 148#ifndef __64BIT__ 149 ST $f20,120($sp) 150 ST $f21,128($sp) 151 ST $f22,136($sp) 152 ST $f23,144($sp) 153#endif 154 155 .align 4 156.L2: 157 dsra J, N, 1 # NR=2 158 ST $f15, 152($sp) 159 160#if defined(TRMMKERNEL) && !defined(LEFT) 161 neg KK, OFFSET 162#endif 163 164 dsll LDC, LDC, ZBASE_SHIFT# LDC*SIZE 165 blez J, .L1 166 ST $f16, 160($sp) 167 168.L24: 169#if defined(TRMMKERNEL) && defined(LEFT) 170 move KK, OFFSET 171#endif 172 173 dsra I, M, 2 # MR=8 174 move AO, A # Reset A 175 176 dsll PREA, K, 1 + ZBASE_SHIFT 177 move CO1, C 178 179 daddu CO2, C, LDC 180 daddu PREA, AO, PREA 181 182 blez I, .L22 183 daddu C, CO2, LDC 184 185 .align 4 186.L241: 187#if defined(TRMMKERNEL) 188#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 189 move BO, B 190#else 191 dsll L, KK, 2 + ZBASE_SHIFT 192 dsll TEMP, KK, 1 + ZBASE_SHIFT 193 194 daddu AO, AO, L 195 daddu BO, B, TEMP 196#endif 197 MTC $0, C11 # CLEAR REAULTS REGISTERS 198 MOV C12, C11 199 200 dsll PREB, K, ZBASE_SHIFT 201 MOV C21, C11 202 MOV C22, C11 203 204 gsLQC1(R13, F9, F8, 0) # B1 B2 205 MOV C31, C11 206 MOV C32, C11 207 208 gsLQC1(R12, F1, F0, 0) # A1 A2 209 MOV C41, C11 210 MOV C42, C11 211 212 gsLQC1(R12, F3, F2, 1) # A3 A4 213 MOV C13, C11 214 MOV C14, C11 215 216 MOV C23, C11 217 MOV C24, C11 218 219 MOV C33, C11 220 MOV C34, C11 221 222 MOV C43, C11 223 MOV C44, C11 224 225 PLU B3, B1, B1 226 PLU B4, B2, B2 227 daddu PREB, BO, PREB 228 229 FETCH $0, 0 * SIZE(CO1) 230 FETCH $0, 8 * SIZE(CO1) 231 FETCH $0, 0 * SIZE(CO2) 232 FETCH $0, 8 * SIZE(CO2) 233#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 234 dsubu TEMP, K, KK 235#elif defined(LEFT) 236 daddiu TEMP, KK, 4 237#else 238 daddiu TEMP, KK, 2 239#endif 240 dsra L, TEMP, 2 241 blez L, .L242 242 NOP 243 244#else 245 246 move BO, B # Reset B 247 dsra L, K, 2 # UnRoll K=64 248 MTC $0, C11 # CLEAR REAULTS REGISTERS 249 MOV C12, C11 250 251 dsll PREB, K, ZBASE_SHIFT 252 MOV C21, C11 253 MOV C22, C11 254 255 gsLQC1(R13, F9, F8, 0) # B1 B2 256 MOV C31, C11 257 MOV C32, C11 258 259 gsLQC1(R12, F1, F0, 0) # A1 A2 260 MOV C41, C11 261 MOV C42, C11 262 263 gsLQC1(R12, F3, F2, 1) # A3 A4 264 MOV C13, C11 265 MOV C14, C11 266 267 FETCH $0, 0 * SIZE(CO1) 268 MOV C23, C11 269 MOV C24, C11 270 271 FETCH $0, 0 * SIZE(CO2) 272 MOV C33, C11 273 MOV C34, C11 274 275 MOV C43, C11 276 MOV C44, C11 277 daddu PREB, BO, PREB 278 279 PLU B3, B1, B1 280 PLU B4, B2, B2 281 282 FETCH $0, 8 * SIZE(CO1) 283 blez L, .L242 284 FETCH $0, 8 * SIZE(CO2) 285#endif 286 287.L2410: 288 daddiu L, L, -1 289 gsLQC1(R13, F13, F12, 1) # B3 B4 290 MADPS C11, C11, A1, B1 291 MADPS C21, C21, A2, B1 292 293 gsLQC1(R12, F5, F4, 2) # A5 A6 294 MADPS C12, C12, A1, B2 295 MADPS C22, C22, A2, B2 296 297 gsLQC1(R12, F7, F6, 3) # A7 A8 298 MADPS C31, C31, A3, B1 299 MADPS C41, C41, A4, B1 300 301 FETCH $0, 0 * SIZE(PREB) 302 MADPS C32, C32, A3, B2 303 MADPS C42, C42, A4, B2 304 305 FETCH $0, 0 * SIZE(PREA) 306 MADPS C13, C13, A1, B3 307 MADPS C23, C23, A2, B3 308 309 MADPS C33, C33, A3, B3 310 MADPS C43, C43, A4, B3 311 312 MADPS C14, C14, A1, B4 313 MADPS C24, C24, A2, B4 314 315 PLU B7, B5, B5 316 PLU B8, B6, B6 317 daddu PREB, PREB, 8 * SIZE 318 319 MADPS C34, C34, A3, B4 320 MADPS C44, C44, A4, B4 321 322 gsLQC1(R13, F9, F8, 2) # B1 B2 323 MADPS C11, C11, A5, B5 324 MADPS C21, C21, A6, B5 325 326 gsLQC1(R12, F1, F0, 4) # A1 A2 327 MADPS C12, C12, A5, B6 328 MADPS C22, C22, A6, B6 329 330 gsLQC1(R12, F3, F2, 5) # A3 A4 331 MADPS C31, C31, A7, B5 332 MADPS C41, C41, A8, B5 333 334 FETCH $0, 8 * SIZE(PREA) 335 MADPS C32, C32, A7, B6 336 MADPS C42, C42, A8, B6 337 338 MADPS C13, C13, A5, B7 339 MADPS C23, C23, A6, B7 340 341 MADPS C33, C33, A7, B7 342 MADPS C43, C43, A8, B7 343 344 MADPS C14, C14, A5, B8 345 MADPS C24, C24, A6, B8 346 347 PLU B3, B1, B1 348 PLU B4, B2, B2 349 350 MADPS C34, C34, A7, B8 351 MADPS C44, C44, A8, B8 352 353 gsLQC1(R13, F13, F12, 3) # B3 B4 354 MADPS C11, C11, A1, B1 355 MADPS C21, C21, A2, B1 356 357 gsLQC1(R12, F5, F4, 6) # A5 A6 358 MADPS C12, C12, A1, B2 359 MADPS C22, C22, A2, B2 360 361 gsLQC1(R12, F7, F6, 7) # A7 A8 362 MADPS C31, C31, A3, B1 363 MADPS C41, C41, A4, B1 364 daddiu BO, BO, 4 * 4 * SIZE # 4KR*4NR 365 366 FETCH $0, 16 * SIZE(PREA) 367 MADPS C32, C32, A3, B2 368 MADPS C42, C42, A4, B2 369 daddiu AO, AO, 8 * 4 * SIZE # 4KR*8MR 370 371 MADPS C13, C13, A1, B3 372 MADPS C23, C23, A2, B3 373 374 MADPS C33, C33, A3, B3 375 MADPS C43, C43, A4, B3 376 377 MADPS C14, C14, A1, B4 378 MADPS C24, C24, A2, B4 379 380 PLU B7, B5, B5 381 PLU B8, B6, B6 382 383 MADPS C34, C34, A3, B4 384 MADPS C44, C44, A4, B4 385 386 gsLQC1(R13, F9, F8, 0) # B1 B2 387 MADPS C11, C11, A5, B5 388 MADPS C21, C21, A6, B5 389 390 gsLQC1(R12, F1, F0, 0) # A1 A2 391 MADPS C12, C12, A5, B6 392 MADPS C22, C22, A6, B6 393 394 gsLQC1(R12, F3, F2, 1) # A3 A4 395 MADPS C31, C31, A7, B5 396 MADPS C41, C41, A8, B5 397 398 FETCH $0, 24 * SIZE(PREA) 399 MADPS C32, C32, A7, B6 400 MADPS C42, C42, A8, B6 401 402 MADPS C13, C13, A5, B7 403 MADPS C23, C23, A6, B7 404 daddu PREA, PREA, 32 * SIZE 405 406 MADPS C33, C33, A7, B7 407 MADPS C43, C43, A8, B7 408 409 MADPS C14, C14, A5, B8 410 MADPS C24, C24, A6, B8 411 412 PLU B3, B1, B1 413 PLU B4, B2, B2 414 415 MADPS C34, C34, A7, B8 416 bgtz L, .L2410 417 MADPS C44, C44, A8, B8 418 419 420 .align 4 421.L242: 422#ifndef TRMMKERNEL 423 andi L, K, 2 424#else 425 andi L, TEMP, 2 426#endif 427 blez L, .L247 428 NOP 429 430 gsLQC1(R13, F13, F12, 1) # B3 B4 431 MADPS C11, C11, A1, B1 432 MADPS C21, C21, A2, B1 433 434 gsLQC1(R12, F5, F4, 2) # A5 A6 435 MADPS C12, C12, A1, B2 436 MADPS C22, C22, A2, B2 437 438 gsLQC1(R12, F7, F6, 3) # A7 A8 439 MADPS C31, C31, A3, B1 440 MADPS C41, C41, A4, B1 441 daddiu BO, BO, 2 * 4 * SIZE # 4KR*4NR 442 443 MADPS C32, C32, A3, B2 444 MADPS C42, C42, A4, B2 445 daddiu AO, AO, 4 * 4 * SIZE 446 447 MADPS C13, C13, A1, B3 448 MADPS C23, C23, A2, B3 449 450 MADPS C33, C33, A3, B3 451 MADPS C43, C43, A4, B3 452 453 MADPS C14, C14, A1, B4 454 MADPS C24, C24, A2, B4 455 456 PLU B7, B5, B5 457 PLU B8, B6, B6 458 459 MADPS C34, C34, A3, B4 460 MADPS C44, C44, A4, B4 461 462 gsLQC1(R13, F9, F8, 0) # B1 B2 463 MADPS C11, C11, A5, B5 464 MADPS C21, C21, A6, B5 465 466 gsLQC1(R12, F1, F0, 0) # A1 A2 467 MADPS C12, C12, A5, B6 468 MADPS C22, C22, A6, B6 469 470 gsLQC1(R12, F3, F2, 1) # A3 A4 471 MADPS C31, C31, A7, B5 472 MADPS C41, C41, A8, B5 473 474 MADPS C32, C32, A7, B6 475 MADPS C42, C42, A8, B6 476 477 MADPS C13, C13, A5, B7 478 MADPS C23, C23, A6, B7 479 480 MADPS C33, C33, A7, B7 481 MADPS C43, C43, A8, B7 482 483 MADPS C14, C14, A5, B8 484 MADPS C24, C24, A6, B8 485 486 PLU B3, B1, B1 487 PLU B4, B2, B2 488 489 MADPS C34, C34, A7, B8 490 MADPS C44, C44, A8, B8 491 492 .align 4 493.L247: 494#ifndef TRMMKERNEL 495 andi L, K, 1 496#else 497 andi L, TEMP, 1 498#endif 499 blez L, .L240 500 NOP 501 502 MADPS C11, C11, A1, B1 503 MADPS C21, C21, A2, B1 504 505 MADPS C12, C12, A1, B2 506 MADPS C22, C22, A2, B2 507 508 MADPS C31, C31, A3, B1 509 MADPS C41, C41, A4, B1 510 daddiu BO, BO, 1 * 4 * SIZE # 4KR*4NR 511 512 MADPS C32, C32, A3, B2 513 MADPS C42, C42, A4, B2 514 daddiu AO, AO, 2 * 4 * SIZE 515 516 MADPS C13, C13, A1, B3 517 MADPS C23, C23, A2, B3 518 519 MADPS C33, C33, A3, B3 520 MADPS C43, C43, A4, B3 521 522 MADPS C14, C14, A1, B4 523 MADPS C24, C24, A2, B4 524 525 MADPS C34, C34, A3, B4 526 MADPS C44, C44, A4, B4 527 528 529 .align 4 530.L240: # Write Back 531#ifndef TRMMKERNEL 532 daddiu I, I, -1 533 CVTU A1, C11 534 CVTU A2, C21 535 536 CVTU A3, C31 537 CVTU A4, C41 538 539 CVTU A5, C13 540 CVTU A6, C23 541 542 CVTU A7, C33 543 CVTU A8, C43 544 545 CVTU B1, C12 546 CVTU B2, C22 547 548 CVTU B3, C32 549 CVTU B4, C42 550 551 CVTU B5, C14 552 CVTU B6, C24 553 554 CVTU B7, C34 555 CVTU B8, C44 556 557#if defined(NN) || defined(NT) || defined(TN) || defined(TT) 558 /* (a + bi) * (c + di) */ 559 SUB C11, C11, A1 # ac'+'bd 560 SUB C21, C21, A2 561# LD A1, 0 * SIZE(A) # load alpha_r 562 SUB C31, C31, A3 563 LD A1, 152($sp) # load alpha_r 564 SUB C41, C41, A4 565 LD A2, 160($sp) # load alpha_i 566# LD A2, 0 * SIZE(A) # load alpha_i 567 ADD C13, A5, C13 # ad'+'cb 568 ADD C23, A6, C23 569 ADD C33, A7, C33 570 ADD C43, A8, C43 571 SUB C12, C12, B1 572 SUB C22, C22, B2 573 SUB C32, C32, B3 574 SUB C42, C42, B4 575 ADD C14, B5, C14 576 ADD C24, B6, C24 577 ADD C34, B7, C34 578 ADD C44, B8, C44 579 580 LD B1, 0 * SIZE(CO1) 581 LD B3, 2 * SIZE(CO1) 582 LD B5, 4 * SIZE(CO1) 583 LD B7, 6 * SIZE(CO1) 584 LD B2, 1 * SIZE(CO1) 585 LD B4, 3 * SIZE(CO1) 586 LD B6, 5 * SIZE(CO1) 587 LD B8, 7 * SIZE(CO1) 588 589 MADD B1, B1, C11, A1 # A1 = alpha_r 590 MADD B3, B3, C21, A1 591 MADD B5, B5, C31, A1 592 MADD B7, B7, C41, A1 593 MADD B2, B2, C13, A1 594 MADD B4, B4, C23, A1 595 MADD B6, B6, C33, A1 596 MADD B8, B8, C43, A1 597 NMSUB B1, B1, C13, A2 # A2 = alpha_i 598 NMSUB B3, B3, C23, A2 599 NMSUB B5, B5, C33, A2 600 NMSUB B7, B7, C43, A2 601 MADD B2, B2, C11, A2 602 MADD B4, B4, C21, A2 603 MADD B6, B6, C31, A2 604 MADD B8, B8, C41, A2 605 606 LD C13, 0 * SIZE(CO2) 607 LD C23, 2 * SIZE(CO2) 608 LD C33, 4 * SIZE(CO2) 609 LD C43, 6 * SIZE(CO2) 610 LD C11, 1 * SIZE(CO2) 611 LD C21, 3 * SIZE(CO2) 612 LD C31, 5 * SIZE(CO2) 613 LD C41, 7 * SIZE(CO2) 614 615 MADD C13, C13, C12, A1 616 MADD C23, C23, C22, A1 617 618 MADD C33, C33, C32, A1 619 ST B1, 0 * SIZE(CO1) 620 621 MADD C43, C43, C42, A1 622 ST B3, 2 * SIZE(CO1) 623 624 MADD C11, C11, C14, A1 625 ST B5, 4 * SIZE(CO1) 626 627 MADD C21, C21, C24, A1 628 ST B7, 6 * SIZE(CO1) 629 630 MADD C31, C31, C34, A1 631 ST B2, 1 * SIZE(CO1) 632 633 MADD C41, C41, C44, A1 634 ST B4, 3 * SIZE(CO1) 635 636 NMSUB C13, C13, C14, A2 637 ST B6, 5 * SIZE(CO1) 638 639 NMSUB C23, C23, C24, A2 640 ST B8, 7 * SIZE(CO1) 641 642 NMSUB C33, C33, C34, A2 643 NMSUB C43, C43, C44, A2 644 645 MADD C11, C11, C12, A2 646 MADD C21, C21, C22, A2 647 648 MADD C31, C31, C32, A2 649 MADD C41, C41, C42, A2 650 651 ST C13, 0 * SIZE(CO2) 652 ST C23, 2 * SIZE(CO2) 653 ST C33, 4 * SIZE(CO2) 654 ST C43, 6 * SIZE(CO2) 655 ST C11, 1 * SIZE(CO2) 656 ST C21, 3 * SIZE(CO2) 657 ST C31, 5 * SIZE(CO2) 658 ST C41, 7 * SIZE(CO2) 659#endif 660 661#if defined(NR) || defined(NC) || defined(TR) || defined(TC) 662 /* (a + bi) * (c - di) */ 663 ADD C11, A1, C11 # ac'+'bd 664 ADD C21, A2, C21 665# LD A1, 0 * SIZE(A) # load alpha_r 666 ADD C31, A3, C31 667 LD A1, 152($sp) # load alpha_r 668 ADD C41, A4, C41 669 LD A2, 160($sp) # load alpha_i 670# LD A2, 0 * SIZE(A) # load alpha_r 671 SUB C13, A5, C13 # ad'+'cb 672 SUB C23, A6, C23 673 SUB C33, A7, C33 674 SUB C43, A8, C43 675 ADD C12, B1, C12 676 ADD C22, B2, C22 677 ADD C32, B3, C32 678 ADD C42, B4, C42 679 SUB C14, B5, C14 680 SUB C24, B6, C24 681 SUB C34, B7, C34 682 SUB C44, B8, C44 683 684 LD B1, 0 * SIZE(CO1) 685 LD B3, 2 * SIZE(CO1) 686 LD B5, 4 * SIZE(CO1) 687 LD B7, 6 * SIZE(CO1) 688 LD B2, 1 * SIZE(CO1) 689 LD B4, 3 * SIZE(CO1) 690 LD B6, 5 * SIZE(CO1) 691 LD B8, 7 * SIZE(CO1) 692 693 MADD B1, B1, C11, A1 # A1 = alpha_r 694 MADD B3, B3, C21, A1 695 MADD B5, B5, C31, A1 696 MADD B7, B7, C41, A1 697 MADD B2, B2, C13, A1 698 MADD B4, B4, C23, A1 699 MADD B6, B6, C33, A1 700 MADD B8, B8, C43, A1 701 NMSUB B1, B1, C13, A2 # A2 = alpha_i 702 NMSUB B3, B3, C23, A2 703 NMSUB B5, B5, C33, A2 704 NMSUB B7, B7, C43, A2 705 MADD B2, B2, C11, A2 706 MADD B4, B4, C21, A2 707 MADD B6, B6, C31, A2 708 MADD B8, B8, C41, A2 709 710 LD C13, 0 * SIZE(CO2) 711 LD C23, 2 * SIZE(CO2) 712 LD C33, 4 * SIZE(CO2) 713 LD C43, 6 * SIZE(CO2) 714 LD C11, 1 * SIZE(CO2) 715 LD C21, 3 * SIZE(CO2) 716 LD C31, 5 * SIZE(CO2) 717 LD C41, 7 * SIZE(CO2) 718 719 MADD C13, C13, C12, A1 720 MADD C23, C23, C22, A1 721 722 MADD C33, C33, C32, A1 723 ST B1, 0 * SIZE(CO1) 724 725 MADD C43, C43, C42, A1 726 ST B3, 2 * SIZE(CO1) 727 728 MADD C11, C11, C14, A1 729 ST B5, 4 * SIZE(CO1) 730 731 MADD C21, C21, C24, A1 732 ST B7, 6 * SIZE(CO1) 733 734 MADD C31, C31, C34, A1 735 ST B2, 1 * SIZE(CO1) 736 737 MADD C41, C41, C44, A1 738 ST B4, 3 * SIZE(CO1) 739 740 NMSUB C13, C13, C14, A2 741 ST B6, 5 * SIZE(CO1) 742 743 NMSUB C23, C23, C24, A2 744 ST B8, 7 * SIZE(CO1) 745 746 NMSUB C33, C33, C34, A2 747 NMSUB C43, C43, C44, A2 748 749 MADD C11, C11, C12, A2 750 MADD C21, C21, C22, A2 751 752 MADD C31, C31, C32, A2 753 MADD C41, C41, C42, A2 754 755 ST C13, 0 * SIZE(CO2) 756 ST C23, 2 * SIZE(CO2) 757 ST C33, 4 * SIZE(CO2) 758 ST C43, 6 * SIZE(CO2) 759 ST C11, 1 * SIZE(CO2) 760 ST C21, 3 * SIZE(CO2) 761 ST C31, 5 * SIZE(CO2) 762 ST C41, 7 * SIZE(CO2) 763 764#endif 765 766#if defined(RN) || defined(RT) || defined(CN) || defined(CT) 767 /* (a - bi) * (c + di) */ 768 ADD C11, A1, C11 # ac'+'bd 769 ADD C21, A2, C21 770# LD A1, 0 * SIZE(A) # load alpha_r 771 ADD C31, A3, C31 772 LD A1, 152($sp) # load alpha_r 773# LD A2, 0 * SIZE(A) # load alpha_r 774 ADD C41, A4, C41 775 LD A2, 160($sp) # load alpha_i 776 SUB C13, C13, A5 # ad'+'cb 777 SUB C23, C23, A6 778 SUB C33, C33, A7 779 SUB C43, C43, A8 780 ADD C12, B1, C12 781 ADD C22, B2, C22 782 ADD C32, B3, C32 783 ADD C42, B4, C42 784 SUB C14, C14, B5 785 SUB C24, C24, B6 786 SUB C34, C34, B7 787 SUB C44, C44, B8 788 789 LD B1, 0 * SIZE(CO1) 790 LD B3, 2 * SIZE(CO1) 791 LD B5, 4 * SIZE(CO1) 792 LD B7, 6 * SIZE(CO1) 793 LD B2, 1 * SIZE(CO1) 794 LD B4, 3 * SIZE(CO1) 795 LD B6, 5 * SIZE(CO1) 796 LD B8, 7 * SIZE(CO1) 797 798 MADD B1, B1, C11, A1 # A1 = alpha_r 799 MADD B3, B3, C21, A1 800 MADD B5, B5, C31, A1 801 MADD B7, B7, C41, A1 802 MADD B2, B2, C13, A1 803 MADD B4, B4, C23, A1 804 MADD B6, B6, C33, A1 805 MADD B8, B8, C43, A1 806 NMSUB B1, B1, C13, A2 # A2 = alpha_i 807 NMSUB B3, B3, C23, A2 808 NMSUB B5, B5, C33, A2 809 NMSUB B7, B7, C43, A2 810 MADD B2, B2, C11, A2 811 MADD B4, B4, C21, A2 812 MADD B6, B6, C31, A2 813 MADD B8, B8, C41, A2 814 815 LD C13, 0 * SIZE(CO2) 816 LD C23, 2 * SIZE(CO2) 817 LD C33, 4 * SIZE(CO2) 818 LD C43, 6 * SIZE(CO2) 819 LD C11, 1 * SIZE(CO2) 820 LD C21, 3 * SIZE(CO2) 821 LD C31, 5 * SIZE(CO2) 822 LD C41, 7 * SIZE(CO2) 823 824 MADD C13, C13, C12, A1 825 MADD C23, C23, C22, A1 826 827 MADD C33, C33, C32, A1 828 ST B1, 0 * SIZE(CO1) 829 830 MADD C43, C43, C42, A1 831 ST B3, 2 * SIZE(CO1) 832 833 MADD C11, C11, C14, A1 834 ST B5, 4 * SIZE(CO1) 835 836 MADD C21, C21, C24, A1 837 ST B7, 6 * SIZE(CO1) 838 839 MADD C31, C31, C34, A1 840 ST B2, 1 * SIZE(CO1) 841 842 MADD C41, C41, C44, A1 843 ST B4, 3 * SIZE(CO1) 844 845 NMSUB C13, C13, C14, A2 846 ST B6, 5 * SIZE(CO1) 847 848 NMSUB C23, C23, C24, A2 849 ST B8, 7 * SIZE(CO1) 850 851 NMSUB C33, C33, C34, A2 852 NMSUB C43, C43, C44, A2 853 854 MADD C11, C11, C12, A2 855 MADD C21, C21, C22, A2 856 857 MADD C31, C31, C32, A2 858 MADD C41, C41, C42, A2 859 860 ST C13, 0 * SIZE(CO2) 861 ST C23, 2 * SIZE(CO2) 862 ST C33, 4 * SIZE(CO2) 863 ST C43, 6 * SIZE(CO2) 864 ST C11, 1 * SIZE(CO2) 865 ST C21, 3 * SIZE(CO2) 866 ST C31, 5 * SIZE(CO2) 867 ST C41, 7 * SIZE(CO2) 868 869#endif 870 871#if defined(RR) || defined(RC) || defined(CR) || defined(CC) 872 /* (a - bi) * (c - di) */ 873 SUB C11, C11, A1 # ac'+'bd 874 SUB C21, C21, A2 875 SUB C31, C31, A3 876 LD A1, 152($sp) # load alpha_r 877# LD A1, 0 * SIZE(A) # load alpha_r 878 SUB C41, C41, A4 879 LD A2, 160($sp) 880# LD A2, 0 * SIZE(A) # load alpha_i 881 882 ADD C13, A5, C13 # ad'+'cb 883 ADD C23, A6, C23 884 ADD C33, A7, C33 885 ADD C43, A8, C43 886 SUB C12, C12, B1 887 SUB C22, C22, B2 888 SUB C32, C32, B3 889 SUB C42, C42, B4 890 ADD C14, B5, C14 891 ADD C24, B6, C24 892 ADD C34, B7, C34 893 ADD C44, B8, C44 894 NEG C13, C13 895 NEG C23, C23 896 NEG C33, C33 897 NEG C43, C43 898 NEG C14, C14 899 NEG C24, C24 900 NEG C34, C34 901 NEG C44, C44 902 903 LD B1, 0 * SIZE(CO1) 904 LD B3, 2 * SIZE(CO1) 905 LD B5, 4 * SIZE(CO1) 906 LD B7, 6 * SIZE(CO1) 907 LD B2, 1 * SIZE(CO1) 908 LD B4, 3 * SIZE(CO1) 909 LD B6, 5 * SIZE(CO1) 910 LD B8, 7 * SIZE(CO1) 911 912 MADD B1, B1, C11, A1 # A1 = alpha_r 913 MADD B3, B3, C21, A1 914 MADD B5, B5, C31, A1 915 MADD B7, B7, C41, A1 916 MADD B2, B2, C13, A1 917 MADD B4, B4, C23, A1 918 MADD B6, B6, C33, A1 919 MADD B8, B8, C43, A1 920 NMSUB B1, B1, C13, A2 # A2 = alpha_i 921 NMSUB B3, B3, C23, A2 922 NMSUB B5, B5, C33, A2 923 NMSUB B7, B7, C43, A2 924 MADD B2, B2, C11, A2 925 MADD B4, B4, C21, A2 926 MADD B6, B6, C31, A2 927 MADD B8, B8, C41, A2 928 929 LD C13, 0 * SIZE(CO2) 930 LD C43, 6 * SIZE(CO2) 931 LD C23, 2 * SIZE(CO2) 932 LD C33, 4 * SIZE(CO2) 933 LD C11, 1 * SIZE(CO2) 934 LD C21, 3 * SIZE(CO2) 935 LD C31, 5 * SIZE(CO2) 936 LD C41, 7 * SIZE(CO2) 937 938 MADD C13, C13, C12, A1 939 ST B1, 0 * SIZE(CO1) 940 941 MADD C23, C23, C22, A1 942 ST B3, 2 * SIZE(CO1) 943 944 MADD C33, C33, C32, A1 945 ST B5, 4 * SIZE(CO1) 946 947 MADD C43, C43, C42, A1 948 ST B7, 6 * SIZE(CO1) 949 950 MADD C11, C11, C14, A1 951 ST B2, 1 * SIZE(CO1) 952 953 MADD C21, C21, C24, A1 954 ST B4, 3 * SIZE(CO1) 955 956 MADD C31, C31, C34, A1 957 ST B6, 5 * SIZE(CO1) 958 959 MADD C41, C41, C44, A1 960 ST B8, 7 * SIZE(CO1) 961 962 NMSUB C13, C13, C14, A2 963 NMSUB C23, C23, C24, A2 964 NMSUB C33, C33, C34, A2 965 NMSUB C43, C43, C44, A2 966 967 MADD C11, C11, C12, A2 968 MADD C21, C21, C22, A2 969 MADD C31, C31, C32, A2 970 MADD C41, C41, C42, A2 971 972 ST C13, 0 * SIZE(CO2) 973 ST C23, 2 * SIZE(CO2) 974 ST C33, 4 * SIZE(CO2) 975 ST C43, 6 * SIZE(CO2) 976 ST C11, 1 * SIZE(CO2) 977 ST C21, 3 * SIZE(CO2) 978 ST C31, 5 * SIZE(CO2) 979 ST C41, 7 * SIZE(CO2) 980 981#endif 982 983#else 984 daddiu I, I, -1 985 CVTU A1, C11 986 CVTU A2, C21 987 988 CVTU A3, C31 989 CVTU A4, C41 990 991 CVTU A5, C13 992 CVTU A6, C23 993 994 CVTU A7, C33 995 CVTU A8, C43 996 997 CVTU B1, C12 998 CVTU B2, C22 999 1000 CVTU B3, C32 1001 CVTU B4, C42 1002 1003 CVTU B5, C14 1004 CVTU B6, C24 1005 1006 CVTU B7, C34 1007 CVTU B8, C44 1008 1009#if defined(NN) || defined(NT) || defined(TN) || defined(TT) 1010 /* (a + bi) * (c + di) */ 1011 SUB C11, C11, A1 # ac'+'bd 1012 SUB C21, C21, A2 1013 SUB C31, C31, A3 1014 LD A1, 152($sp) # load alpha_r 1015 SUB C41, C41, A4 1016# LD A1, 0 * SIZE(A) # load alpha_r 1017 LD A2, 160($sp) # load alpha_i 1018 ADD C13, A5, C13 # ad'+'cb 1019 ADD C23, A6, C23 1020# LD A2, 0 * SIZE(A) # load alpha_i 1021 ADD C33, A7, C33 1022 ADD C43, A8, C43 1023 SUB C12, C12, B1 1024 SUB C22, C22, B2 1025 SUB C32, C32, B3 1026 SUB C42, C42, B4 1027 ADD C14, B5, C14 1028 ADD C24, B6, C24 1029 ADD C34, B7, C34 1030 ADD C44, B8, C44 1031 1032 MUL B1, C11, A1 # A1 = alpha_r 1033 MUL B3, C21, A1 1034 MUL B5, C31, A1 1035 MUL B7, C41, A1 1036 MUL B2, C13, A1 1037 MUL B4, C23, A1 1038 MUL B6, C33, A1 1039 MUL B8, C43, A1 1040 NMSUB B1, B1, C13, A2 # A2 = alpha_i 1041 NMSUB B3, B3, C23, A2 1042 NMSUB B5, B5, C33, A2 1043 NMSUB B7, B7, C43, A2 1044 MADD B2, B2, C11, A2 1045 MADD B4, B4, C21, A2 1046 MADD B6, B6, C31, A2 1047 MADD B8, B8, C41, A2 1048 1049 ST B1, 0 * SIZE(CO1) 1050 MUL C13, C12, A1 1051 MUL C23, C22, A1 1052 1053 ST B3, 2 * SIZE(CO1) 1054 MUL C33, C32, A1 1055 MUL C43, C42, A1 1056 1057 ST B5, 4 * SIZE(CO1) 1058 MUL C11, C14, A1 1059 MUL C21, C24, A1 1060 1061 ST B7, 6 * SIZE(CO1) 1062 MUL C31, C34, A1 1063 MUL C41, C44, A1 1064 1065 ST B2, 1 * SIZE(CO1) 1066 NMSUB C13, C13, C14, A2 1067 NMSUB C23, C23, C24, A2 1068 1069 ST B4, 3 * SIZE(CO1) 1070 NMSUB C33, C33, C34, A2 1071 NMSUB C43, C43, C44, A2 1072 1073 ST B6, 5 * SIZE(CO1) 1074 MADD C11, C11, C12, A2 1075 MADD C21, C21, C22, A2 1076 1077 ST B8, 7 * SIZE(CO1) 1078 MADD C31, C31, C32, A2 1079 MADD C41, C41, C42, A2 1080 1081 ST C13, 0 * SIZE(CO2) 1082 ST C23, 2 * SIZE(CO2) 1083 ST C33, 4 * SIZE(CO2) 1084 ST C43, 6 * SIZE(CO2) 1085 ST C11, 1 * SIZE(CO2) 1086 ST C21, 3 * SIZE(CO2) 1087 ST C31, 5 * SIZE(CO2) 1088 ST C41, 7 * SIZE(CO2) 1089#endif 1090 1091#if defined(NR) || defined(NC) || defined(TR) || defined(TC) 1092 /* (a + bi) * (c - di) */ 1093 ADD C11, A1, C11 # ac'+'bd 1094 ADD C21, A2, C21 1095# LD A1, 0 * SIZE(A) # load alpha_r 1096 ADD C31, A3, C31 1097 LD A1, 152($sp) # load alpha_r 1098 ADD C41, A4, C41 1099 LD A2, 160($sp) # load alpha_i 1100# LD A2, 0 * SIZE(A) # load alpha_r 1101 SUB C13, A5, C13 # ad'+'cb 1102 SUB C23, A6, C23 1103 SUB C33, A7, C33 1104 SUB C43, A8, C43 1105 ADD C12, B1, C12 1106 ADD C22, B2, C22 1107 ADD C32, B3, C32 1108 ADD C42, B4, C42 1109 SUB C14, B5, C14 1110 SUB C24, B6, C24 1111 SUB C34, B7, C34 1112 SUB C44, B8, C44 1113 1114 MUL B1, C11, A1 # A1 = alpha_r 1115 MUL B3, C21, A1 1116 MUL B5, C31, A1 1117 MUL B7, C41, A1 1118 MUL B2, C13, A1 1119 MUL B4, C23, A1 1120 MUL B6, C33, A1 1121 MUL B8, C43, A1 1122 NMSUB B1, B1, C13, A2 # A2 = alpha_i 1123 NMSUB B3, B3, C23, A2 1124 NMSUB B5, B5, C33, A2 1125 NMSUB B7, B7, C43, A2 1126 MADD B2, B2, C11, A2 1127 MADD B4, B4, C21, A2 1128 MADD B6, B6, C31, A2 1129 MADD B8, B8, C41, A2 1130 1131 MUL C13, C12, A1 1132 MUL C23, C22, A1 1133 1134 ST B1, 0 * SIZE(CO1) 1135 MUL C33, C32, A1 1136 MUL C43, C42, A1 1137 1138 ST B3, 2 * SIZE(CO1) 1139 MUL C11, C14, A1 1140 MUL C21, C24, A1 1141 1142 ST B5, 4 * SIZE(CO1) 1143 MUL C31, C34, A1 1144 MUL C41, C44, A1 1145 1146 ST B7, 6 * SIZE(CO1) 1147 NMSUB C13, C13, C14, A2 1148 NMSUB C23, C23, C24, A2 1149 1150 ST B2, 1 * SIZE(CO1) 1151 NMSUB C33, C33, C34, A2 1152 NMSUB C43, C43, C44, A2 1153 1154 ST B4, 3 * SIZE(CO1) 1155 MADD C11, C11, C12, A2 1156 MADD C21, C21, C22, A2 1157 1158 ST B6, 5 * SIZE(CO1) 1159 MADD C31, C31, C32, A2 1160 MADD C41, C41, C42, A2 1161 1162 ST B8, 7 * SIZE(CO1) 1163 ST C13, 0 * SIZE(CO2) 1164 ST C23, 2 * SIZE(CO2) 1165 ST C33, 4 * SIZE(CO2) 1166 ST C43, 6 * SIZE(CO2) 1167 ST C11, 1 * SIZE(CO2) 1168 ST C21, 3 * SIZE(CO2) 1169 ST C31, 5 * SIZE(CO2) 1170 ST C41, 7 * SIZE(CO2) 1171 1172#endif 1173 1174#if defined(RN) || defined(RT) || defined(CN) || defined(CT) 1175 /* (a - bi) * (c + di) */ 1176 ADD C11, A1, C11 # ac'+'bd 1177 ADD C21, A2, C21 1178# LD A1, 0 * SIZE(A) # load alpha_r 1179 ADD C31, A3, C31 1180 LD A1, 152($sp) # load alpha_r 1181# LD A2, 0 * SIZE(A) # load alpha_r 1182 ADD C41, A4, C41 1183 LD A2, 160($sp) # load alpha_i 1184 SUB C13, C13, A5 # ad'+'cb 1185 SUB C23, C23, A6 1186 SUB C33, C33, A7 1187 SUB C43, C43, A8 1188 ADD C12, B1, C12 1189 ADD C22, B2, C22 1190 ADD C32, B3, C32 1191 ADD C42, B4, C42 1192 SUB C14, C14, B5 1193 SUB C24, C24, B6 1194 1195 SUB C34, C34, B7 1196 SUB C44, C44, B8 1197 1198 MUL B1, C11, A1 # A1 = alpha_r 1199 MUL B3, C21, A1 1200 MUL B5, C31, A1 1201 MUL B7, C41, A1 1202 MUL B2, C13, A1 1203 MUL B4, C23, A1 1204 MUL B6, C33, A1 1205 MUL B8, C43, A1 1206 NMSUB B1, B1, C13, A2 # A2 = alpha_i 1207 NMSUB B3, B3, C23, A2 1208 NMSUB B5, B5, C33, A2 1209 NMSUB B7, B7, C43, A2 1210 MADD B2, B2, C11, A2 1211 MADD B4, B4, C21, A2 1212 MADD B6, B6, C31, A2 1213 MADD B8, B8, C41, A2 1214 1215 MUL C13, C12, A1 1216 MUL C23, C22, A1 1217 1218 ST B1, 0 * SIZE(CO1) 1219 MUL C33, C32, A1 1220 MUL C43, C42, A1 1221 1222 ST B3, 2 * SIZE(CO1) 1223 MUL C11, C14, A1 1224 MUL C21, C24, A1 1225 1226 ST B5, 4 * SIZE(CO1) 1227 MUL C31, C34, A1 1228 MUL C41, C44, A1 1229 1230 ST B7, 6 * SIZE(CO1) 1231 NMSUB C13, C13, C14, A2 1232 NMSUB C23, C23, C24, A2 1233 1234 ST B2, 1 * SIZE(CO1) 1235 NMSUB C33, C33, C34, A2 1236 NMSUB C43, C43, C44, A2 1237 1238 ST B4, 3 * SIZE(CO1) 1239 MADD C11, C11, C12, A2 1240 MADD C21, C21, C22, A2 1241 1242 ST B6, 5 * SIZE(CO1) 1243 MADD C31, C31, C32, A2 1244 MADD C41, C41, C42, A2 1245 1246 ST B8, 7 * SIZE(CO1) 1247 ST C13, 0 * SIZE(CO2) 1248 ST C23, 2 * SIZE(CO2) 1249 ST C33, 4 * SIZE(CO2) 1250 ST C43, 6 * SIZE(CO2) 1251 ST C11, 1 * SIZE(CO2) 1252 ST C21, 3 * SIZE(CO2) 1253 ST C31, 5 * SIZE(CO2) 1254 ST C41, 7 * SIZE(CO2) 1255 1256#endif 1257 1258#if defined(RR) || defined(RC) || defined(CR) || defined(CC) 1259 /* (a - bi) * (c - di) */ 1260 SUB C11, C11, A1 # ac'+'bd 1261 SUB C21, C21, A2 1262 SUB C31, C31, A3 1263 LD A1, 152($sp) # load alpha_r 1264# LD A1, 0 * SIZE(A) # load alpha_r 1265 SUB C41, C41, A4 1266 LD A2, 160($sp) 1267# LD A2, 0 * SIZE(A) # load alpha_i 1268 1269 ADD C13, A5, C13 # ad'+'cb 1270 ADD C23, A6, C23 1271 ADD C33, A7, C33 1272 ADD C43, A8, C43 1273 SUB C12, C12, B1 1274 SUB C22, C22, B2 1275 SUB C32, C32, B3 1276 SUB C42, C42, B4 1277 ADD C14, B5, C14 1278 ADD C24, B6, C24 1279 ADD C34, B7, C34 1280 ADD C44, B8, C44 1281 1282 NEG C13, C13 1283 NEG C23, C23 1284 NEG C33, C33 1285 NEG C43, C43 1286 NEG C14, C14 1287 NEG C24, C24 1288 NEG C34, C34 1289 NEG C44, C44 1290 1291 MUL B1, C11, A1 # A1 = alpha_r 1292 MUL B3, C21, A1 1293 MUL B5, C31, A1 1294 MUL B7, C41, A1 1295 MUL B2, C13, A1 1296 MUL B4, C23, A1 1297 MUL B6, C33, A1 1298 MUL B8, C43, A1 1299 NMSUB B1, B1, C13, A2 # A2 = alpha_i 1300 NMSUB B3, B3, C23, A2 1301 NMSUB B5, B5, C33, A2 1302 NMSUB B7, B7, C43, A2 1303 MADD B2, B2, C11, A2 1304 MADD B4, B4, C21, A2 1305 MADD B6, B6, C31, A2 1306 MADD B8, B8, C41, A2 1307 1308 ST B1, 0 * SIZE(CO1) 1309 MUL C13, C12, A1 1310 MUL C23, C22, A1 1311 1312 ST B3, 2 * SIZE(CO1) 1313 MUL C33, C32, A1 1314 MUL C43, C42, A1 1315 1316 ST B5, 4 * SIZE(CO1) 1317 MUL C11, C14, A1 1318 MUL C21, C24, A1 1319 1320 ST B7, 6 * SIZE(CO1) 1321 MUL C31, C34, A1 1322 MUL C41, C44, A1 1323 1324 ST B2, 1 * SIZE(CO1) 1325 NMSUB C13, C13, C14, A2 1326 NMSUB C23, C23, C24, A2 1327 1328 ST B4, 3 * SIZE(CO1) 1329 NMSUB C33, C33, C34, A2 1330 NMSUB C43, C43, C44, A2 1331 1332 ST B6, 5 * SIZE(CO1) 1333 MADD C11, C11, C12, A2 1334 MADD C21, C21, C22, A2 1335 1336 ST B8, 7 * SIZE(CO1) 1337 MADD C31, C31, C32, A2 1338 MADD C41, C41, C42, A2 1339 1340 ST C13, 0 * SIZE(CO2) 1341 ST C23, 2 * SIZE(CO2) 1342 ST C33, 4 * SIZE(CO2) 1343 ST C43, 6 * SIZE(CO2) 1344 ST C11, 1 * SIZE(CO2) 1345 ST C21, 3 * SIZE(CO2) 1346 ST C31, 5 * SIZE(CO2) 1347 ST C41, 7 * SIZE(CO2) 1348#endif 1349 1350 1351#if ( defined(LEFT) && defined(TRANSA)) || \ 1352 (!defined(LEFT) && !defined(TRANSA)) 1353 dsubu TEMP, K, KK 1354#ifdef LEFT 1355 daddiu TEMP, TEMP, -4 1356#else 1357 daddiu TEMP, TEMP, -2 1358#endif 1359 1360 dsll L, TEMP, 2 + ZBASE_SHIFT 1361 dsll TEMP, TEMP, 1 + ZBASE_SHIFT 1362 1363 daddu AO, AO, L 1364 daddu BO, BO, TEMP 1365#endif 1366 1367#ifdef LEFT 1368 daddiu KK, KK, 4 1369#endif 1370 1371#endif 1372 daddiu CO1, CO1, 8 * SIZE 1373 bgtz I, .L241 1374 daddiu CO2, CO2, 8 * SIZE 1375 1376 .align 4 1377.L22: 1378 andi I, M, 2 # MR=4 1379 blez I, .L21 1380 NOP 1381 1382 .align 4 1383.L221: 1384#if defined(TRMMKERNEL) 1385#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 1386 move BO, B 1387#else 1388 dsll TEMP, KK, 1 + ZBASE_SHIFT # NR=2 1389 1390 daddu AO, AO, TEMP 1391 daddu BO, B, TEMP 1392#endif 1393 MTC $0, C11 # CLEAR REAULTS REGISTERS 1394 MOV C12, C11 1395 1396 MOV C21, C11 1397 MOV C22, C11 1398 gsLQC1(R13, F9, F8, 0) # B1 B2 1399 1400 gsLQC1(R12, F1, F0, 0) # A1 A2 1401 MOV C13, C11 1402 MOV C14, C11 1403 1404 MOV C23, C11 1405 FETCH $0, 0 * SIZE(CO1) 1406 1407 FETCH $0, 8 * SIZE(CO1) 1408 MOV C24, C11 1409 1410 FETCH $0, 0 * SIZE(CO2) 1411 FETCH $0, 8 * SIZE(CO2) 1412 1413 PLU B3, B1, B1 1414 PLU B4, B2, B2 1415#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 1416 dsubu TEMP, K, KK 1417#elif defined(LEFT) 1418 daddiu TEMP, KK, 2 # MR=2 1419#else 1420 daddiu TEMP, KK, 2 # NR=2 1421#endif 1422 dsra L, TEMP, 2 1423 blez L, .L222 1424 NOP 1425 1426#else 1427 move BO, B # Reset B 1428 dsra L, K, 2 # UnRoll K=64 1429 1430 MTC $0, C11 # CLEAR REAULTS REGISTERS 1431 MOV C12, C11 1432 1433 MOV C21, C11 1434 MOV C22, C11 1435 gsLQC1(R13, F9, F8, 0) # B1 B2 1436 1437 gsLQC1(R12, F1, F0, 0) # A1 A2 1438 MOV C13, C11 1439 MOV C14, C11 1440 1441 MOV C23, C11 1442 FETCH $0, 0 * SIZE(CO1) 1443 1444 FETCH $0, 8 * SIZE(CO1) 1445 MOV C24, C11 1446 1447 FETCH $0, 0 * SIZE(CO2) 1448 FETCH $0, 8 * SIZE(CO2) 1449 1450 PLU B3, B1, B1 1451 blez L, .L222 1452 PLU B4, B2, B2 1453#endif 1454 1455.L2210: 1456 daddiu L, L, -1 1457 gsLQC1(R13, F13, F12, 1) # B3 B4 1458 MADPS C11, C11, A1, B1 1459 MADPS C21, C21, A2, B1 1460 1461 gsLQC1(R12, F3, F2, 1) # A3 A4 1462 MADPS C12, C12, A1, B2 1463 MADPS C22, C22, A2, B2 1464 1465 MADPS C13, C13, A1, B3 1466 MADPS C23, C23, A2, B3 1467 1468 MADPS C14, C14, A1, B4 1469 MADPS C24, C24, A2, B4 1470 1471 gsLQC1(R12, F5, F4, 2) # A5 A6 1472 PLU B7, B5, B5 1473 PLU B8, B6, B6 1474 1475 gsLQC1(R13, F9, F8, 2) # B1 B2 1476 MADPS C11, C11, A3, B5 1477 MADPS C21, C21, A4, B5 1478 1479 MADPS C12, C12, A3, B6 1480 MADPS C22, C22, A4, B6 1481 1482 MADPS C13, C13, A3, B7 1483 MADPS C23, C23, A4, B7 1484 1485 MADPS C14, C14, A3, B8 1486 MADPS C24, C24, A4, B8 1487 1488 gsLQC1(R12, F7, F6, 3) # A7 A8 1489 PLU B3, B1, B1 1490 PLU B4, B2, B2 1491 1492 gsLQC1(R13, F13, F12, 3) # B3 B4 1493 MADPS C11, C11, A5, B1 1494 MADPS C21, C21, A6, B1 1495 1496 MADPS C12, C12, A5, B2 1497 MADPS C22, C22, A6, B2 1498 daddiu BO, BO, 4 * 4 * SIZE # 4KR*4NR 1499 1500 daddiu AO, AO, 4 * 4 * SIZE # 4KR*8MR 1501 MADPS C13, C13, A5, B3 1502 MADPS C23, C23, A6, B3 1503 1504 MADPS C14, C14, A5, B4 1505 MADPS C24, C24, A6, B4 1506 1507 gsLQC1(R12, F1, F0, 0) # A1 A2 1508 PLU B7, B5, B5 1509 PLU B8, B6, B6 1510 1511 gsLQC1(R13, F9, F8, 0) # B1 B2 1512 MADPS C11, C11, A7, B5 1513 MADPS C21, C21, A8, B5 1514 1515 MADPS C12, C12, A7, B6 1516 MADPS C22, C22, A8, B6 1517 1518 MADPS C13, C13, A7, B7 1519 MADPS C23, C23, A8, B7 1520 1521 MADPS C14, C14, A7, B8 1522 MADPS C24, C24, A8, B8 1523 1524 PLU B3, B1, B1 1525 bgtz L, .L2210 1526 PLU B4, B2, B2 1527 1528 1529 .align 4 1530.L222: 1531#ifndef TRMMKERNEL 1532 andi L, K, 2 1533#else 1534 andi L, TEMP, 2 1535#endif 1536 blez L, .L227 1537 NOP 1538 1539 gsLQC1(R13, F13, F12, 1) # B3 B4 1540 MADPS C11, C11, A1, B1 1541 MADPS C21, C21, A2, B1 1542 1543 gsLQC1(R12, F3, F2, 1) # A3 A4 1544 MADPS C12, C12, A1, B2 1545 MADPS C22, C22, A2, B2 1546 1547 MADPS C13, C13, A1, B3 1548 MADPS C23, C23, A2, B3 1549 1550 MADPS C14, C14, A1, B4 1551 MADPS C24, C24, A2, B4 1552 1553 PLU B7, B5, B5 1554 PLU B8, B6, B6 1555 daddiu BO, BO, 2 * 4 * SIZE 1556 1557 daddiu AO, AO, 2 * 4 * SIZE 1558 MADPS C11, C11, A3, B5 1559 MADPS C21, C21, A4, B5 1560 gsLQC1(R13, F9, F8, 0) # A1 A2 1561 1562 MADPS C12, C12, A3, B6 1563 MADPS C22, C22, A4, B6 1564 gsLQC1(R12, F1, F0, 0) # A1 A2 1565 1566 MADPS C13, C13, A3, B7 1567 MADPS C23, C23, A4, B7 1568 1569 MADPS C14, C14, A3, B8 1570 MADPS C24, C24, A4, B8 1571 1572 PLU B3, B1, B1 1573 PLU B4, B2, B2 1574 1575 1576 .align 4 1577.L227: 1578#ifndef TRMMKERNEL 1579 andi L, K, 1 1580#else 1581 andi L, TEMP, 1 1582#endif 1583 blez L, .L220 1584 NOP 1585 1586 MADPS C11, C11, A1, B1 1587 MADPS C21, C21, A2, B1 1588 daddiu BO, BO, 4 * SIZE 1589 daddiu AO, AO, 4 * SIZE 1590 1591 MADPS C12, C12, A1, B2 1592 MADPS C22, C22, A2, B2 1593 1594 MADPS C13, C13, A1, B3 1595 MADPS C23, C23, A2, B3 1596 1597 MADPS C14, C14, A1, B4 1598 MADPS C24, C24, A2, B4 1599 1600 .align 4 1601.L220: # Write Back 1602#ifndef TRMMKERNEL 1603 daddiu I, I, -1 1604 CVTU A1, C11 1605 CVTU A2, C21 1606 1607 CVTU A3, C13 1608 CVTU A4, C23 1609 1610 CVTU A5, C12 1611 CVTU A6, C22 1612 1613 CVTU A7, C14 1614 CVTU A8, C24 1615 1616 1617#if defined(NN) || defined(NT) || defined(TN) || defined(TT) 1618 /* (a + bi) * (c + di) */ 1619 SUB C11, C11, A1 # ac'+'bd 1620 SUB C21, C21, A2 1621 ADD C13, A3, C13 # ad'+'cb 1622 ADD C23, A4, C23 1623# LD A1, 0 * SIZE(A) # load alpha_r 1624 LD A1, 152($sp) # load alpha_r 1625 LD A2, 160($sp) # load alpha_i 1626# LD A2, 0 * SIZE(A) # load alpha_i 1627 SUB C12, C12, A5 1628 SUB C22, C22, A6 1629 ADD C14, A7, C14 1630 ADD C24, A8, C24 1631 1632 LD B1, 0 * SIZE(CO1) 1633 LD B3, 2 * SIZE(CO1) 1634 LD B2, 1 * SIZE(CO1) 1635 LD B4, 3 * SIZE(CO1) 1636 1637 MADD B1, B1, C11, A1 # A1 = alpha_r 1638 MADD B3, B3, C21, A1 1639 MADD B2, B2, C13, A1 1640 MADD B4, B4, C23, A1 1641 NMSUB B1, B1, C13, A2 # A2 = alpha_i 1642 NMSUB B3, B3, C23, A2 1643 MADD B2, B2, C11, A2 1644 MADD B4, B4, C21, A2 1645 1646 LD B5, 0 * SIZE(CO2) 1647 LD B7, 2 * SIZE(CO2) 1648 LD B6, 1 * SIZE(CO2) 1649 LD B8, 3 * SIZE(CO2) 1650 1651 MADD B5, B5, C12, A1 1652 MADD B7, B7, C22, A1 1653 1654 ST B1, 0 * SIZE(CO1) 1655 ST B3, 2 * SIZE(CO1) 1656 1657 MADD B6, B6, C14, A1 1658 MADD B8, B8, C24, A1 1659 1660 ST B2, 1 * SIZE(CO1) 1661 ST B4, 3 * SIZE(CO1) 1662 1663 NMSUB B5, B5, C14, A2 1664 NMSUB B7, B7, C24, A2 1665 1666 MADD B6, B6, C12, A2 1667 MADD B8, B8, C22, A2 1668 1669 ST B5, 0 * SIZE(CO2) 1670 ST B7, 2 * SIZE(CO2) 1671 ST B6, 1 * SIZE(CO2) 1672 ST B8, 3 * SIZE(CO2) 1673#endif 1674 1675#if defined(NR) || defined(NC) || defined(TR) || defined(TC) 1676 /* (a + bi) * (c - di) */ 1677 ADD C11, A1, C11 # ac'+'bd 1678 ADD C21, A2, C21 1679 SUB C13, A3, C13 # ad'+'cb 1680 SUB C23, A4, C23 1681# LD A1, 0 * SIZE(A) # load alpha_r 1682 LD A1, 152($sp) # load alpha_r 1683 LD A2, 160($sp) # load alpha_i 1684# LD A2, 0 * SIZE(A) # load alpha_r 1685 ADD C12, A5, C12 1686 ADD C22, A6, C22 1687 SUB C14, A7, C14 1688 SUB C24, A8, C24 1689 1690 LD B1, 0 * SIZE(CO1) 1691 LD B3, 2 * SIZE(CO1) 1692 LD B2, 1 * SIZE(CO1) 1693 LD B4, 3 * SIZE(CO1) 1694 1695 MADD B1, B1, C11, A1 # A1 = alpha_r 1696 MADD B3, B3, C21, A1 1697 MADD B2, B2, C13, A1 1698 MADD B4, B4, C23, A1 1699 NMSUB B1, B1, C13, A2 # A2 = alpha_i 1700 NMSUB B3, B3, C23, A2 1701 MADD B2, B2, C11, A2 1702 MADD B4, B4, C21, A2 1703 1704 LD B5, 0 * SIZE(CO2) 1705 LD B7, 2 * SIZE(CO2) 1706 LD B6, 1 * SIZE(CO2) 1707 LD B8, 3 * SIZE(CO2) 1708 1709 MADD B5, B5, C12, A1 1710 MADD B7, B7, C22, A1 1711 1712 ST B1, 0 * SIZE(CO1) 1713 ST B3, 2 * SIZE(CO1) 1714 1715 MADD B6, B6, C14, A1 1716 MADD B8, B8, C24, A1 1717 1718 ST B2, 1 * SIZE(CO1) 1719 ST B4, 3 * SIZE(CO1) 1720 1721 NMSUB B5, B5, C14, A2 1722 NMSUB B7, B7, C24, A2 1723 1724 MADD B6, B6, C12, A2 1725 MADD B8, B8, C22, A2 1726 1727 ST B5, 0 * SIZE(CO2) 1728 ST B7, 2 * SIZE(CO2) 1729 ST B6, 1 * SIZE(CO2) 1730 ST B8, 3 * SIZE(CO2) 1731 1732#endif 1733 1734#if defined(RN) || defined(RT) || defined(CN) || defined(CT) 1735 /* (a - bi) * (c + di) */ 1736 ADD C11, A1, C11 # ac'+'bd 1737 ADD C21, A2, C21 1738 SUB C13, C13, A3 # ad'+'cb 1739 SUB C23, C23, A4 1740# LD A1, 0 * SIZE(A) # load alpha_r 1741 LD A1, 152($sp) # load alpha_r 1742# LD A2, 0 * SIZE(A) # load alpha_r 1743 LD A2, 160($sp) # load alpha_i 1744 ADD C12, A5, C12 1745 ADD C22, A6, C22 1746 SUB C14, C14, A7 1747 SUB C24, C24, A8 1748 1749 LD B1, 0 * SIZE(CO1) 1750 LD B3, 2 * SIZE(CO1) 1751 LD B2, 1 * SIZE(CO1) 1752 LD B4, 3 * SIZE(CO1) 1753 1754 MADD B1, B1, C11, A1 # A1 = alpha_r 1755 MADD B3, B3, C21, A1 1756 MADD B2, B2, C13, A1 1757 MADD B4, B4, C23, A1 1758 NMSUB B1, B1, C13, A2 # A2 = alpha_i 1759 NMSUB B3, B3, C23, A2 1760 MADD B2, B2, C11, A2 1761 MADD B4, B4, C21, A2 1762 1763 LD B5, 0 * SIZE(CO2) 1764 LD B7, 2 * SIZE(CO2) 1765 LD B6, 1 * SIZE(CO2) 1766 LD B8, 3 * SIZE(CO2) 1767 1768 MADD B5, B5, C12, A1 1769 MADD B7, B7, C22, A1 1770 1771 ST B1, 0 * SIZE(CO1) 1772 ST B3, 2 * SIZE(CO1) 1773 1774 MADD B6, B6, C14, A1 1775 MADD B8, B8, C24, A1 1776 1777 ST B2, 1 * SIZE(CO1) 1778 ST B4, 3 * SIZE(CO1) 1779 1780 NMSUB B5, B5, C14, A2 1781 NMSUB B7, B7, C24, A2 1782 1783 MADD B6, B6, C12, A2 1784 MADD B8, B8, C22, A2 1785 1786 ST B5, 0 * SIZE(CO2) 1787 ST B7, 2 * SIZE(CO2) 1788 ST B6, 1 * SIZE(CO2) 1789 ST B8, 3 * SIZE(CO2) 1790 1791#endif 1792 1793#if defined(RR) || defined(RC) || defined(CR) || defined(CC) 1794 /* (a - bi) * (c - di) */ 1795 SUB C11, C11, A1 # ac'+'bd 1796 SUB C21, C21, A2 1797 ADD C13, A3, C13 # ad'+'cb 1798 ADD C23, A4, C23 1799 LD A1, 152($sp) # load alpha_r 1800# LD A1, 0 * SIZE(A) # load alpha_r 1801 LD A2, 160($sp) 1802# LD A2, 0 * SIZE(A) # load alpha_i 1803 SUB C12, C12, A5 1804 SUB C22, C22, A6 1805 ADD C14, A7, C14 1806 ADD C24, A8, C24 1807 NEG C13, C13 1808 NEG C23, C23 1809 NEG C14, C14 1810 NEG C24, C24 1811 1812 1813 LD B1, 0 * SIZE(CO1) 1814 LD B3, 2 * SIZE(CO1) 1815 LD B2, 1 * SIZE(CO1) 1816 LD B4, 3 * SIZE(CO1) 1817 1818 MADD B1, B1, C11, A1 # A1 = alpha_r 1819 MADD B3, B3, C21, A1 1820 MADD B2, B2, C13, A1 1821 MADD B4, B4, C23, A1 1822 NMSUB B1, B1, C13, A2 # A2 = alpha_i 1823 NMSUB B3, B3, C23, A2 1824 MADD B2, B2, C11, A2 1825 MADD B4, B4, C21, A2 1826 1827 LD B5, 0 * SIZE(CO2) 1828 LD B7, 2 * SIZE(CO2) 1829 LD B6, 1 * SIZE(CO2) 1830 LD B8, 3 * SIZE(CO2) 1831 1832 MADD B5, B5, C12, A1 1833 MADD B7, B7, C22, A1 1834 1835 ST B1, 0 * SIZE(CO1) 1836 ST B3, 2 * SIZE(CO1) 1837 1838 MADD B6, B6, C14, A1 1839 MADD B8, B8, C24, A1 1840 1841 ST B2, 1 * SIZE(CO1) 1842 ST B4, 3 * SIZE(CO1) 1843 1844 NMSUB B5, B5, C14, A2 1845 NMSUB B7, B7, C24, A2 1846 1847 MADD B6, B6, C12, A2 1848 MADD B8, B8, C22, A2 1849 1850 ST B5, 0 * SIZE(CO2) 1851 ST B7, 2 * SIZE(CO2) 1852 ST B6, 1 * SIZE(CO2) 1853 ST B8, 3 * SIZE(CO2) 1854#endif 1855 1856#else 1857 daddiu I, I, -1 1858 CVTU A1, C11 1859 CVTU A2, C21 1860 1861 CVTU A3, C13 1862 CVTU A4, C23 1863 1864 CVTU A5, C12 1865 CVTU A6, C22 1866 1867 CVTU A7, C14 1868 CVTU A8, C24 1869 1870 1871#if defined(NN) || defined(NT) || defined(TN) || defined(TT) 1872 /* (a + bi) * (c + di) */ 1873 SUB C11, C11, A1 # ac'+'bd 1874 SUB C21, C21, A2 1875 ADD C13, A3, C13 # ad'+'cb 1876 ADD C23, A4, C23 1877# LD A1, 0 * SIZE(A) # load alpha_r 1878 LD A1, 152($sp) # load alpha_r 1879 LD A2, 160($sp) # load alpha_i 1880# LD A2, 0 * SIZE(A) # load alpha_i 1881 SUB C12, C12, A5 1882 SUB C22, C22, A6 1883 ADD C14, A7, C14 1884 ADD C24, A8, C24 1885 1886 MUL B1, C11, A1 # A1 = alpha_r 1887 MUL B3, C21, A1 1888 MUL B2, C13, A1 1889 MUL B4, C23, A1 1890 NMSUB B1, B1, C13, A2 # A2 = alpha_i 1891 NMSUB B3, B3, C23, A2 1892 MADD B2, B2, C11, A2 1893 MADD B4, B4, C21, A2 1894 1895 1896 MUL B5, C12, A1 1897 MUL B7, C22, A1 1898 1899 ST B1, 0 * SIZE(CO1) 1900 ST B3, 2 * SIZE(CO1) 1901 1902 MUL B6, C14, A1 1903 MUL B8, C24, A1 1904 1905 ST B2, 1 * SIZE(CO1) 1906 ST B4, 3 * SIZE(CO1) 1907 1908 NMSUB B5, B5, C14, A2 1909 NMSUB B7, B7, C24, A2 1910 1911 MADD B6, B6, C12, A2 1912 MADD B8, B8, C22, A2 1913 1914 ST B5, 0 * SIZE(CO2) 1915 ST B7, 2 * SIZE(CO2) 1916 ST B6, 1 * SIZE(CO2) 1917 ST B8, 3 * SIZE(CO2) 1918#endif 1919 1920#if defined(NR) || defined(NC) || defined(TR) || defined(TC) 1921 /* (a + bi) * (c - di) */ 1922 ADD C11, A1, C11 # ac'+'bd 1923 ADD C21, A2, C21 1924 SUB C13, A3, C13 # ad'+'cb 1925 SUB C23, A4, C23 1926# LD A1, 0 * SIZE(A) # load alpha_r 1927 LD A1, 152($sp) # load alpha_r 1928 LD A2, 160($sp) # load alpha_i 1929# LD A2, 0 * SIZE(A) # load alpha_r 1930 ADD C12, A5, C12 1931 ADD C22, A6, C22 1932 SUB C14, A7, C14 1933 SUB C24, A8, C24 1934 1935 MUL B1, C11, A1 # A1 = alpha_r 1936 MUL B3, C21, A1 1937 MUL B2, C13, A1 1938 MUL B4, C23, A1 1939 NMSUB B1, B1, C13, A2 # A2 = alpha_i 1940 NMSUB B3, B3, C23, A2 1941 MADD B2, B2, C11, A2 1942 MADD B4, B4, C21, A2 1943 1944 MUL B5, C12, A1 1945 MUL B7, C22, A1 1946 1947 ST B1, 0 * SIZE(CO1) 1948 ST B3, 2 * SIZE(CO1) 1949 1950 MUL B6, C14, A1 1951 MUL B8, C24, A1 1952 1953 ST B2, 1 * SIZE(CO1) 1954 ST B4, 3 * SIZE(CO1) 1955 1956 NMSUB B5, B5, C14, A2 1957 NMSUB B7, B7, C24, A2 1958 1959 MADD B6, B6, C12, A2 1960 MADD B8, B8, C22, A2 1961 1962 ST B5, 0 * SIZE(CO2) 1963 ST B7, 2 * SIZE(CO2) 1964 ST B6, 1 * SIZE(CO2) 1965 ST B8, 3 * SIZE(CO2) 1966 1967#endif 1968 1969#if defined(RN) || defined(RT) || defined(CN) || defined(CT) 1970 /* (a - bi) * (c + di) */ 1971 ADD C11, A1, C11 # ac'+'bd 1972 ADD C21, A2, C21 1973 SUB C13, C13, A3 # ad'+'cb 1974 SUB C23, C23, A4 1975# LD A1, 0 * SIZE(A) # load alpha_r 1976 LD A1, 152($sp) # load alpha_r 1977# LD A2, 0 * SIZE(A) # load alpha_r 1978 LD A2, 160($sp) # load alpha_i 1979 ADD C12, A5, C12 1980 ADD C22, A6, C22 1981 SUB C14, C14, A7 1982 SUB C24, C24, A8 1983 1984 MUL B1, C11, A1 # A1 = alpha_r 1985 MUL B3, C21, A1 1986 MUL B2, C13, A1 1987 MUL B4, C23, A1 1988 NMSUB B1, B1, C13, A2 # A2 = alpha_i 1989 NMSUB B3, B3, C23, A2 1990 MADD B2, B2, C11, A2 1991 MADD B4, B4, C21, A2 1992 1993 MUL B5, C12, A1 1994 MUL B7, C22, A1 1995 1996 ST B1, 0 * SIZE(CO1) 1997 ST B3, 2 * SIZE(CO1) 1998 1999 MUL B6, C14, A1 2000 MUL B8, C24, A1 2001 2002 ST B2, 1 * SIZE(CO1) 2003 ST B4, 3 * SIZE(CO1) 2004 2005 NMSUB B5, B5, C14, A2 2006 NMSUB B7, B7, C24, A2 2007 2008 MADD B6, B6, C12, A2 2009 MADD B8, B8, C22, A2 2010 2011 ST B5, 0 * SIZE(CO2) 2012 ST B7, 2 * SIZE(CO2) 2013 ST B6, 1 * SIZE(CO2) 2014 ST B8, 3 * SIZE(CO2) 2015 2016#endif 2017 2018#if defined(RR) || defined(RC) || defined(CR) || defined(CC) 2019 /* (a - bi) * (c - di) */ 2020 SUB C11, C11, A1 # ac'+'bd 2021 SUB C21, C21, A2 2022 ADD C13, A3, C13 # ad'+'cb 2023 ADD C23, A4, C23 2024 LD A1, 152($sp) # load alpha_r 2025# LD A1, 0 * SIZE(A) # load alpha_r 2026 LD A2, 160($sp) 2027# LD A2, 0 * SIZE(A) # load alpha_i 2028 SUB C12, C12, A5 2029 SUB C22, C22, A6 2030 ADD C14, A7, C14 2031 ADD C24, A8, C24 2032 NEG C13, C13 2033 NEG C23, C23 2034 NEG C14, C14 2035 NEG C24, C24 2036 2037 MUL B1, C11, A1 # A1 = alpha_r 2038 MUL B3, C21, A1 2039 MUL B2, C13, A1 2040 MUL B4, C23, A1 2041 NMSUB B1, B1, C13, A2 # A2 = alpha_i 2042 NMSUB B3, B3, C23, A2 2043 MADD B2, B2, C11, A2 2044 MADD B4, B4, C21, A2 2045 2046 MUL B5, C12, A1 2047 MUL B7, C22, A1 2048 2049 ST B1, 0 * SIZE(CO1) 2050 ST B3, 2 * SIZE(CO1) 2051 2052 MUL B6, C14, A1 2053 MUL B8, C24, A1 2054 2055 ST B2, 1 * SIZE(CO1) 2056 ST B4, 3 * SIZE(CO1) 2057 2058 NMSUB B5, B5, C14, A2 2059 NMSUB B7, B7, C24, A2 2060 2061 MADD B6, B6, C12, A2 2062 MADD B8, B8, C22, A2 2063 2064 ST B5, 0 * SIZE(CO2) 2065 ST B7, 2 * SIZE(CO2) 2066 ST B6, 1 * SIZE(CO2) 2067 ST B8, 3 * SIZE(CO2) 2068#endif 2069 2070#if ( defined(LEFT) && defined(TRANSA)) || \ 2071 (!defined(LEFT) && !defined(TRANSA)) 2072 dsubu TEMP, K, KK 2073#ifdef LEFT 2074 daddiu TEMP, TEMP, -2 2075#else 2076 daddiu TEMP, TEMP, -2 2077#endif 2078 dsll TEMP, TEMP, 1 + ZBASE_SHIFT 2079 2080 daddu AO, AO, TEMP 2081 daddu BO, BO, TEMP 2082#endif 2083 2084#ifdef LEFT 2085 daddiu KK, KK, 2 2086#endif 2087 2088#endif 2089 daddiu CO1, CO1, 4 * SIZE 2090 daddiu CO2, CO2, 4 * SIZE 2091 2092 2093 .align 4 2094.L21: 2095 andi I, M, 1 2096 blez I, .L20 2097 NOP 2098 2099 .align 4 2100.L211: 2101#if defined(TRMMKERNEL) 2102#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 2103 move BO, B 2104#else 2105 dsll L, KK, ZBASE_SHIFT # MR=1 2106 dsll TEMP, KK, 1 + ZBASE_SHIFT # NR=2 2107 2108 daddu AO, AO, L 2109 daddu BO, B, TEMP 2110#endif 2111 MTC $0, C11 # CLEAR REAULTS REGISTERS 2112 MOV C12, C11 2113 gsLQC1(R13, F9, F8, 0) # B1 B2 2114 2115 gsLQC1(R12, F1, F0, 0) # A1 A2 2116 MOV C13, C11 2117 MOV C14, C11 2118 2119 FETCH $0, 0 * SIZE(CO1) 2120 FETCH $0, 0 * SIZE(CO2) 2121 2122 PLU B3, B1, B1 2123 PLU B4, B2, B2 2124#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 2125 dsubu TEMP, K, KK 2126#elif defined(LEFT) 2127 daddiu TEMP, KK, 1 # MR=1 2128#else 2129 daddiu TEMP, KK, 2 # NR=2 2130#endif 2131 dsra L, TEMP, 2 2132 blez L, .L212 2133 NOP 2134 2135#else 2136 move BO, B # Reset B 2137 dsra L, K, 2 # UnRoll K=64 2138 2139 MTC $0, C11 # CLEAR REAULTS REGISTERS 2140 MOV C12, C11 2141 gsLQC1(R13, F9, F8, 0) # B1 B2 2142 2143 gsLQC1(R12, F1, F0, 0) # A1 A2 2144 MOV C13, C11 2145 MOV C14, C11 2146 2147 FETCH $0, 0 * SIZE(CO1) 2148 FETCH $0, 0 * SIZE(CO2) 2149 2150 PLU B3, B1, B1 2151 blez L, .L212 2152 PLU B4, B2, B2 2153#endif 2154 2155.L2110: 2156 daddiu L, L, -1 2157 gsLQC1(R13, F13, F12, 1) # B3 B4 2158 MADPS C11, C11, A1, B1 2159 MADPS C12, C12, A1, B2 2160 2161 MADPS C13, C13, A1, B3 2162 MADPS C14, C14, A1, B4 2163 2164 PLU B7, B5, B5 2165 PLU B8, B6, B6 2166 2167 gsLQC1(R13, F9, F8, 2) # B1 B2 2168 MADPS C11, C11, A2, B5 2169 MADPS C12, C12, A2, B6 2170 2171 gsLQC1(R12, F3, F2, 1) # A3 A4 2172 MADPS C13, C13, A2, B7 2173 MADPS C14, C14, A2, B8 2174 2175 PLU B3, B1, B1 2176 PLU B4, B2, B2 2177 2178 gsLQC1(R13, F13, F12, 3) # B3 B4 2179 MADPS C11, C11, A3, B1 2180 MADPS C12, C12, A3, B2 2181 daddiu BO, BO, 4 * 4 * SIZE # 4KR*4NR 2182 2183 daddiu AO, AO, 2 * 4 * SIZE # 4KR*8MR 2184 MADPS C13, C13, A3, B3 2185 MADPS C14, C14, A3, B4 2186 2187 PLU B7, B5, B5 2188 PLU B8, B6, B6 2189 2190 gsLQC1(R13, F9, F8, 0) # B1 B2 2191 MADPS C11, C11, A4, B5 2192 MADPS C12, C12, A4, B6 2193 2194 gsLQC1(R12, F1, F0, 0) # A1 A2 2195 MADPS C13, C13, A4, B7 2196 MADPS C14, C14, A4, B8 2197 2198 PLU B3, B1, B1 2199 bgtz L, .L2110 2200 PLU B4, B2, B2 2201 2202 2203 .align 4 2204.L212: 2205#ifndef TRMMKERNEL 2206 andi L, K, 2 2207#else 2208 andi L, TEMP, 2 2209#endif 2210 blez L, .L217 2211 NOP 2212 2213 gsLQC1(R13, F13, F12, 1) # B3 B4 2214 MADPS C11, C11, A1, B1 2215 MADPS C12, C12, A1, B2 2216 2217 MADPS C13, C13, A1, B3 2218 MADPS C14, C14, A1, B4 2219 2220 PLU B7, B5, B5 2221 PLU B8, B6, B6 2222 daddiu BO, BO, 2 * 4 * SIZE 2223 2224 MADPS C11, C11, A2, B5 2225 MADPS C12, C12, A2, B6 2226 daddiu AO, AO, 4 * SIZE 2227 2228 MADPS C13, C13, A2, B7 2229 MADPS C14, C14, A2, B8 2230 2231 gsLQC1(R12, F1, F0, 0) # A5 A6 2232 gsLQC1(R13, F9, F8, 0) # B1 B2 2233 PLU B3, B1, B1 2234 PLU B4, B2, B2 2235 2236 2237 .align 4 2238.L217: 2239#ifndef TRMMKERNEL 2240 andi L, K, 1 2241#else 2242 andi L, TEMP, 1 2243#endif 2244 blez L, .L210 2245 NOP 2246 2247 MADPS C11, C11, A1, B1 2248 daddiu BO, BO, 4 * SIZE 2249 MADPS C12, C12, A1, B2 2250 daddiu AO, AO, 2 * SIZE 2251 2252 MADPS C13, C13, A1, B3 2253 MADPS C14, C14, A1, B4 2254 2255 .align 4 2256.L210: # Write Back 2257#ifndef TRMMKERNEL 2258 daddiu I, I, -1 2259 CVTU A1, C11 2260 CVTU A3, C13 2261 CVTU A5, C12 2262 CVTU A7, C14 2263 2264#if defined(NN) || defined(NT) || defined(TN) || defined(TT) 2265 /* (a + bi) * (c + di) */ 2266 SUB C11, C11, A1 # ac'+'bd 2267 ADD C13, A3, C13 # ad'+'cb 2268# LD A1, 0 * SIZE(A) # load alpha_r 2269 LD A4, 152($sp) # load alpha_r 2270 LD A2, 160($sp) # load alpha_i 2271# LD A2, 0 * SIZE(A) # load alpha_i 2272 SUB C12, C12, A5 2273 ADD C14, A7, C14 2274 2275 LD B1, 0 * SIZE(CO1) 2276 LD B2, 1 * SIZE(CO1) 2277 2278 MADD B1, B1, C11, A4 # A1 = alpha_r 2279 MADD B2, B2, C13, A4 2280 NMSUB B1, B1, C13, A2 # A2 = alpha_i 2281 MADD B2, B2, C11, A2 2282 2283 LD B5, 0 * SIZE(CO2) 2284 LD B6, 1 * SIZE(CO2) 2285 2286 MADD B5, B5, C12, A4 2287 ST B1, 0 * SIZE(CO1) 2288 MADD B6, B6, C14, A4 2289 ST B2, 1 * SIZE(CO1) 2290 2291 NMSUB B5, B5, C14, A2 2292 MADD B6, B6, C12, A2 2293 2294 ST B5, 0 * SIZE(CO2) 2295 ST B6, 1 * SIZE(CO2) 2296#endif 2297 2298#if defined(NR) || defined(NC) || defined(TR) || defined(TC) 2299 /* (a + bi) * (c - di) */ 2300 ADD C11, A1, C11 # ac'+'bd 2301 SUB C13, A3, C13 # ad'+'cb 2302# LD A1, 0 * SIZE(A) # load alpha_r 2303 LD A4, 152($sp) # load alpha_r 2304 LD A2, 160($sp) # load alpha_i 2305# LD A2, 0 * SIZE(A) # load alpha_r 2306 ADD C12, A5, C12 2307 SUB C14, A7, C14 2308 2309 LD B1, 0 * SIZE(CO1) 2310 LD B2, 1 * SIZE(CO1) 2311 2312 MADD B1, B1, C11, A4 # A1 = alpha_r 2313 MADD B2, B2, C13, A4 2314 NMSUB B1, B1, C13, A2 # A2 = alpha_i 2315 MADD B2, B2, C11, A2 2316 2317 LD B5, 0 * SIZE(CO2) 2318 LD B6, 1 * SIZE(CO2) 2319 2320 MADD B5, B5, C12, A4 2321 ST B1, 0 * SIZE(CO1) 2322 MADD B6, B6, C14, A4 2323 ST B2, 1 * SIZE(CO1) 2324 2325 NMSUB B5, B5, C14, A2 2326 MADD B6, B6, C12, A2 2327 2328 ST B5, 0 * SIZE(CO2) 2329 ST B6, 1 * SIZE(CO2) 2330 2331#endif 2332 2333#if defined(RN) || defined(RT) || defined(CN) || defined(CT) 2334 /* (a - bi) * (c + di) */ 2335 ADD C11, A1, C11 # ac'+'bd 2336 SUB C13, C13, A3 # ad'+'cb 2337# LD A1, 0 * SIZE(A) # load alpha_r 2338 LD A4, 152($sp) # load alpha_r 2339# LD A2, 0 * SIZE(A) # load alpha_r 2340 LD A2, 160($sp) # load alpha_i 2341 ADD C12, A5, C12 2342 SUB C14, C14, A7 2343 2344 LD B1, 0 * SIZE(CO1) 2345 LD B2, 1 * SIZE(CO1) 2346 2347 MADD B1, B1, C11, A4 # A1 = alpha_r 2348 MADD B2, B2, C13, A4 2349 NMSUB B1, B1, C13, A2 # A2 = alpha_i 2350 MADD B2, B2, C11, A2 2351 2352 LD B5, 0 * SIZE(CO2) 2353 LD B6, 1 * SIZE(CO2) 2354 2355 MADD B5, B5, C12, A4 2356 ST B1, 0 * SIZE(CO1) 2357 MADD B6, B6, C14, A4 2358 ST B2, 1 * SIZE(CO1) 2359 2360 NMSUB B5, B5, C14, A2 2361 MADD B6, B6, C12, A2 2362 2363 ST B5, 0 * SIZE(CO2) 2364 ST B6, 1 * SIZE(CO2) 2365#endif 2366 2367#if defined(RR) || defined(RC) || defined(CR) || defined(CC) 2368 /* (a - bi) * (c - di) */ 2369 SUB C11, C11, A1 # ac'+'bd 2370 ADD C13, A3, C13 # ad'+'cb 2371 LD A4, 152($sp) # load alpha_r 2372# LD A1, 0 * SIZE(A) # load alpha_r 2373 LD A2, 160($sp) 2374# LD A2, 0 * SIZE(A) # load alpha_i 2375 SUB C12, C12, A5 2376 ADD C14, A7, C14 2377 NEG C13, C13 2378 LD B1, 0 * SIZE(CO1) 2379 LD B2, 1 * SIZE(CO1) 2380 NEG C14, C14 2381 2382 MADD B1, B1, C11, A4 # A1 = alpha_r 2383 MADD B2, B2, C13, A4 2384 NMSUB B1, B1, C13, A2 # A2 = alpha_i 2385 MADD B2, B2, C11, A2 2386 2387 LD B5, 0 * SIZE(CO2) 2388 LD B6, 1 * SIZE(CO2) 2389 2390 MADD B5, B5, C12, A4 2391 ST B1, 0 * SIZE(CO1) 2392 MADD B6, B6, C14, A4 2393 ST B2, 1 * SIZE(CO1) 2394 2395 NMSUB B5, B5, C14, A2 2396 MADD B6, B6, C12, A2 2397 2398 ST B5, 0 * SIZE(CO2) 2399 ST B6, 1 * SIZE(CO2) 2400#endif 2401 2402#else 2403 daddiu I, I, -1 2404 CVTU A1, C11 2405 CVTU A3, C13 2406 CVTU A5, C12 2407 CVTU A7, C14 2408 2409#if defined(NN) || defined(NT) || defined(TN) || defined(TT) 2410 /* (a + bi) * (c + di) */ 2411 SUB C11, C11, A1 # ac'+'bd 2412 ADD C13, A3, C13 # ad'+'cb 2413# LD A1, 0 * SIZE(A) # load alpha_r 2414 LD A4, 152($sp) # load alpha_r 2415 LD A2, 160($sp) # load alpha_i 2416# LD A2, 0 * SIZE(A) # load alpha_i 2417 SUB C12, C12, A5 2418 ADD C14, A7, C14 2419 2420 MUL B1, C11, A4 # A1 = alpha_r 2421 MUL B2, C13, A4 2422 NMSUB B1, B1, C13, A2 # A2 = alpha_i 2423 MADD B2, B2, C11, A2 2424 2425 MUL B5, C12, A4 2426 ST B1, 0 * SIZE(CO1) 2427 MUL B6, C14, A4 2428 ST B2, 1 * SIZE(CO1) 2429 2430 NMSUB B5, B5, C14, A2 2431 MADD B6, B6, C12, A2 2432 2433 ST B5, 0 * SIZE(CO2) 2434 ST B6, 1 * SIZE(CO2) 2435#endif 2436 2437#if defined(NR) || defined(NC) || defined(TR) || defined(TC) 2438 /* (a + bi) * (c - di) */ 2439 ADD C11, A1, C11 # ac'+'bd 2440 SUB C13, A3, C13 # ad'+'cb 2441# LD A1, 0 * SIZE(A) # load alpha_r 2442 LD A4, 152($sp) # load alpha_r 2443 LD A2, 160($sp) # load alpha_i 2444# LD A2, 0 * SIZE(A) # load alpha_r 2445 ADD C12, A5, C12 2446 SUB C14, A7, C14 2447 2448 MUL B1, C11, A4 # A1 = alpha_r 2449 MUL B2, C13, A4 2450 NMSUB B1, B1, C13, A2 # A2 = alpha_i 2451 MADD B2, B2, C11, A2 2452 2453 MUL B5, C12, A4 2454 ST B1, 0 * SIZE(CO1) 2455 MUL B6, C14, A4 2456 ST B2, 1 * SIZE(CO1) 2457 2458 NMSUB B5, B5, C14, A2 2459 MADD B6, B6, C12, A2 2460 2461 ST B5, 0 * SIZE(CO2) 2462 ST B6, 1 * SIZE(CO2) 2463 2464#endif 2465 2466#if defined(RN) || defined(RT) || defined(CN) || defined(CT) 2467 /* (a - bi) * (c + di) */ 2468 ADD C11, A1, C11 # ac'+'bd 2469 SUB C13, C13, A3 # ad'+'cb 2470# LD A1, 0 * SIZE(A) # load alpha_r 2471 LD A4, 152($sp) # load alpha_r 2472# LD A2, 0 * SIZE(A) # load alpha_r 2473 LD A2, 160($sp) # load alpha_i 2474 ADD C12, A5, C12 2475 SUB C14, C14, A7 2476 2477 MUL B1, C11, A4 # A1 = alpha_r 2478 MUL B2, C13, A4 2479 NMSUB B1, B1, C13, A2 # A2 = alpha_i 2480 MADD B2, B2, C11, A2 2481 2482 MUL B5, C12, A4 2483 ST B1, 0 * SIZE(CO1) 2484 MUL B6, C14, A4 2485 ST B2, 1 * SIZE(CO1) 2486 2487 NMSUB B5, B5, C14, A2 2488 MADD B6, B6, C12, A2 2489 2490 ST B5, 0 * SIZE(CO2) 2491 ST B6, 1 * SIZE(CO2) 2492#endif 2493 2494#if defined(RR) || defined(RC) || defined(CR) || defined(CC) 2495 /* (a - bi) * (c - di) */ 2496 SUB C11, C11, A1 # ac'+'bd 2497 ADD C13, A3, C13 # ad'+'cb 2498 LD A4, 152($sp) # load alpha_r 2499# LD A1, 0 * SIZE(A) # load alpha_r 2500 LD A2, 160($sp) 2501# LD A2, 0 * SIZE(A) # load alpha_i 2502 SUB C12, C12, A5 2503 ADD C14, A7, C14 2504 NEG C13, C13 2505 NEG C14, C14 2506 2507 MUL B1, C11, A4 # A1 = alpha_r 2508 MUL B2, C13, A4 2509 NMSUB B1, B1, C13, A2 # A2 = alpha_i 2510 MADD B2, B2, C11, A2 2511 2512 MUL B5, C12, A4 2513 ST B1, 0 * SIZE(CO1) 2514 MUL B6, C14, A4 2515 ST B2, 1 * SIZE(CO1) 2516 2517 NMSUB B5, B5, C14, A2 2518 MADD B6, B6, C12, A2 2519 2520 ST B5, 0 * SIZE(CO2) 2521 ST B6, 1 * SIZE(CO2) 2522#endif 2523 2524 2525#if ( defined(LEFT) && defined(TRANSA)) || \ 2526 (!defined(LEFT) && !defined(TRANSA)) 2527 dsubu TEMP, K, KK 2528#ifdef LEFT 2529 daddiu TEMP, TEMP, -1 2530#else 2531 daddiu TEMP, TEMP, -2 2532#endif 2533 dsll L, TEMP, ZBASE_SHIFT 2534 dsll TEMP, TEMP, 1 + ZBASE_SHIFT 2535 2536 daddu AO, AO, L 2537 daddu BO, BO, TEMP 2538#endif 2539 2540#ifdef LEFT 2541 daddiu KK, KK, 1 2542#endif 2543 2544#endif 2545 daddiu CO1, CO1, 2 * SIZE 2546 daddiu CO2, CO2, 2 * SIZE 2547 2548 2549 .align 4 2550.L20: 2551 daddiu J, J, -1 2552 move B, BO 2553 2554#if defined(TRMMKERNEL) && !defined(LEFT) 2555 daddiu KK, KK, 2 2556#endif 2557 2558 bgtz J, .L24 2559 NOP 2560 2561 2562 .align 4 2563.L1: 2564 andi J, N, 1 2565 blez J, .L999 2566 NOP 2567 2568.L14: 2569 dsra I, M, 2 # MR=8 2570 move AO, A # Reset A 2571 2572#if defined(TRMMKERNEL) && defined(LEFT) 2573 move KK, OFFSET 2574#endif 2575 2576 move CO1, C 2577 blez I, .L12 2578 daddu C, CO1, LDC 2579 2580 .align 4 2581.L141: 2582#if defined(TRMMKERNEL) 2583#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 2584 move BO, B 2585#else 2586 dsll L, KK, 2 + ZBASE_SHIFT 2587 dsll TEMP, KK, ZBASE_SHIFT 2588 2589 daddu AO, AO, L 2590 daddu BO, B, TEMP 2591#endif 2592 MTC $0, C11 # CLEAR REAULTS REGISTERS 2593 MOV C21, C11 2594 gsLQC1(R13, F9, F8, 0) # B1 B2 2595 2596 gsLQC1(R12, F1, F0, 0) # A1 A2 2597 MOV C31, C11 2598 MOV C41, C11 2599 2600 gsLQC1(R12, F3, F2, 1) # A3 A4 2601 MOV C13, C11 2602 MOV C23, C11 2603 2604 FETCH $0, 0 * SIZE(CO1) 2605 MOV C33, C11 2606 MOV C43, C11 2607 2608 FETCH $0, 8 * SIZE(CO1) 2609 PLU B3, B1, B1 2610 PLU B4, B2, B2 2611#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 2612 dsubu TEMP, K, KK 2613#elif defined(LEFT) 2614 daddiu TEMP, KK, 4 # define Mr=4 2615#else 2616 daddiu TEMP, KK, 1 # define NR=1 2617#endif 2618 dsra L, TEMP, 2 2619 blez L, .L142 2620 NOP 2621 2622#else 2623 move BO, B # Reset B 2624 dsra L, K, 2 # UnRoll K=64 2625 2626 MTC $0, C11 # CLEAR REAULTS REGISTERS 2627 MOV C21, C11 2628 gsLQC1(R13, F9, F8, 0) # B1 B2 2629 2630 gsLQC1(R12, F1, F0, 0) # A1 A2 2631 MOV C31, C11 2632 MOV C41, C11 2633 2634 gsLQC1(R12, F3, F2, 1) # A3 A4 2635 MOV C13, C11 2636 MOV C23, C11 2637 2638 FETCH $0, 0 * SIZE(CO1) 2639 MOV C33, C11 2640 MOV C43, C11 2641 2642 FETCH $0, 8 * SIZE(CO1) 2643 PLU B3, B1, B1 2644 blez L, .L142 2645 PLU B4, B2, B2 2646#endif 2647 2648.L1410: 2649 daddiu L, L, -1 2650 MADPS C11, C11, A1, B1 2651 MADPS C21, C21, A2, B1 2652 gsLQC1(R12, F5, F4, 2) # A5 A6 2653 2654 gsLQC1(R12, F7, F6, 3) # A7 A8 2655 MADPS C31, C31, A3, B1 2656 MADPS C41, C41, A4, B1 2657 2658 MADPS C13, C13, A1, B3 2659 MADPS C23, C23, A2, B3 2660 2661 MADPS C33, C33, A3, B3 2662 MADPS C43, C43, A4, B3 2663 gsLQC1(R13, F13, F12, 1) # B3 B4 2664 2665 gsLQC1(R12, F1, F0, 4) # A1 A2 2666 MADPS C11, C11, A5, B2 2667 MADPS C21, C21, A6, B2 2668 2669 gsLQC1(R12, F3, F2, 5) # A3 A4 2670 MADPS C31, C31, A7, B2 2671 MADPS C41, C41, A8, B2 2672 daddiu BO, BO, 2 * 4 * SIZE # 4KR*4NR 2673 2674 MADPS C13, C13, A5, B4 2675 MADPS C23, C23, A6, B4 2676 2677 MADPS C33, C33, A7, B4 2678 MADPS C43, C43, A8, B4 2679 2680 PLU B7, B5, B5 2681 PLU B8, B6, B6 2682 2683 MADPS C11, C11, A1, B5 2684 MADPS C21, C21, A2, B5 2685 gsLQC1(R12, F5, F4, 6) # A5 A6 2686 2687 gsLQC1(R12, F7, F6, 7) # A7 A8 2688 MADPS C31, C31, A3, B5 2689 MADPS C41, C41, A4, B5 2690 2691 daddiu AO, AO, 8 * 4 * SIZE # 4KR*8MR 2692 MADPS C13, C13, A1, B7 2693 MADPS C23, C23, A2, B7 2694 2695 MADPS C33, C33, A3, B7 2696 MADPS C43, C43, A4, B7 2697 gsLQC1(R13, F9, F8, 0) # B1 B2 2698 2699 gsLQC1(R12, F1, F0, 0) # A1 A2 2700 MADPS C11, C11, A5, B6 2701 MADPS C21, C21, A6, B6 2702 2703 gsLQC1(R12, F3, F2, 1) # A3 A4 2704 MADPS C31, C31, A7, B6 2705 MADPS C41, C41, A8, B6 2706 2707 MADPS C13, C13, A5, B8 2708 MADPS C23, C23, A6, B8 2709 2710 MADPS C33, C33, A7, B8 2711 MADPS C43, C43, A8, B8 2712 2713 PLU B3, B1, B1 2714 bgtz L, .L1410 2715 PLU B4, B2, B2 2716 2717 2718 .align 4 2719.L142: 2720#ifndef TRMMKERNEL 2721 andi L, K, 2 2722#else 2723 andi L, TEMP, 2 2724#endif 2725 blez L, .L147 2726 NOP 2727 2728 MADPS C11, C11, A1, B1 2729 MADPS C21, C21, A2, B1 2730 gsLQC1(R12, F5, F4, 2) # A5 A6 2731 2732 gsLQC1(R12, F7, F6, 3) # A7 A8 2733 MADPS C31, C31, A3, B1 2734 MADPS C41, C41, A4, B1 2735 daddiu AO, AO, 4 * 4 * SIZE # 4KR*8MR 2736 2737 MADPS C13, C13, A1, B3 2738 MADPS C23, C23, A2, B3 2739 2740 MADPS C33, C33, A3, B3 2741 MADPS C43, C43, A4, B3 2742 gsLQC1(R13, F13, F8, 1) # B3 B4 2743 2744 gsLQC1(R12, F1, F0, 0) # A1 A2 2745 MADPS C11, C11, A5, B2 2746 MADPS C21, C21, A6, B2 2747 2748 gsLQC1(R12, F3, F2, 1) # A3 A4 2749 MADPS C31, C31, A7, B2 2750 MADPS C41, C41, A8, B2 2751 daddiu BO, BO, 4 * SIZE # 4KR*4NR 2752 2753 MADPS C13, C13, A5, B4 2754 MADPS C23, C23, A6, B4 2755 2756 MADPS C33, C33, A7, B4 2757 MADPS C43, C43, A8, B4 2758 PLU B3, B1, B1 2759 2760 2761 .align 4 2762.L147: 2763#ifndef TRMMKERNEL 2764 andi L, K, 1 2765#else 2766 andi L, TEMP, 1 2767#endif 2768 blez L, .L140 2769 NOP 2770 2771 MADPS C11, C11, A1, B1 2772 MADPS C21, C21, A2, B1 2773 daddiu BO, BO, 2 * SIZE 2774 2775 MADPS C31, C31, A3, B1 2776 MADPS C41, C41, A4, B1 2777 daddiu AO, AO, 2 * 4 * SIZE 2778 2779 MADPS C13, C13, A1, B3 2780 MADPS C23, C23, A2, B3 2781 2782 MADPS C33, C33, A3, B3 2783 MADPS C43, C43, A4, B3 2784 2785 2786 .align 4 2787.L140: # Write Back 2788#ifndef TRMMKERNEL 2789 daddiu I, I, -1 2790 CVTU A1, C11 2791 CVTU A2, C21 2792 2793 CVTU A3, C31 2794 CVTU A4, C41 2795 2796 CVTU A5, C13 2797 CVTU A6, C23 2798 2799 CVTU A7, C33 2800 CVTU A8, C43 2801 2802 CVTU B1, C12 2803 CVTU B2, C22 2804 2805 CVTU B3, C32 2806 CVTU B4, C42 2807 2808 CVTU B5, C14 2809 CVTU B6, C24 2810 2811 CVTU B7, C34 2812 CVTU B8, C44 2813 2814#if defined(NN) || defined(NT) || defined(TN) || defined(TT) 2815 /* (a + bi) * (c + di) */ 2816 SUB C11, C11, A1 # ac'+'bd 2817 SUB C21, C21, A2 2818# LD A1, 0 * SIZE(A) # load alpha_r 2819 SUB C31, C31, A3 2820 LD A1, 152($sp) # load alpha_r 2821 SUB C41, C41, A4 2822 LD A2, 160($sp) # load alpha_i 2823# LD A2, 0 * SIZE(A) # load alpha_i 2824 ADD C13, A5, C13 # ad'+'cb 2825 ADD C23, A6, C23 2826 ADD C33, A7, C33 2827 ADD C43, A8, C43 2828 2829 LD B1, 0 * SIZE(CO1) 2830 LD B3, 2 * SIZE(CO1) 2831 LD B5, 4 * SIZE(CO1) 2832 LD B7, 6 * SIZE(CO1) 2833 LD B2, 1 * SIZE(CO1) 2834 LD B4, 3 * SIZE(CO1) 2835 LD B6, 5 * SIZE(CO1) 2836 LD B8, 7 * SIZE(CO1) 2837 2838 MADD B1, B1, C11, A1 # A1 = alpha_r 2839 MADD B3, B3, C21, A1 2840 MADD B5, B5, C31, A1 2841 MADD B7, B7, C41, A1 2842 MADD B2, B2, C13, A1 2843 MADD B4, B4, C23, A1 2844 MADD B6, B6, C33, A1 2845 MADD B8, B8, C43, A1 2846 NMSUB B1, B1, C13, A2 # A2 = alpha_i 2847 NMSUB B3, B3, C23, A2 2848 NMSUB B5, B5, C33, A2 2849 NMSUB B7, B7, C43, A2 2850 MADD B2, B2, C11, A2 2851 MADD B4, B4, C21, A2 2852 MADD B6, B6, C31, A2 2853 MADD B8, B8, C41, A2 2854 2855 ST B1, 0 * SIZE(CO1) 2856 ST B3, 2 * SIZE(CO1) 2857 ST B5, 4 * SIZE(CO1) 2858 ST B7, 6 * SIZE(CO1) 2859 ST B2, 1 * SIZE(CO1) 2860 ST B4, 3 * SIZE(CO1) 2861 ST B6, 5 * SIZE(CO1) 2862 ST B8, 7 * SIZE(CO1) 2863#endif 2864 2865#if defined(NR) || defined(NC) || defined(TR) || defined(TC) 2866 /* (a + bi) * (c - di) */ 2867 ADD C11, A1, C11 # ac'+'bd 2868 ADD C21, A2, C21 2869# LD A1, 0 * SIZE(A) # load alpha_r 2870 ADD C31, A3, C31 2871 LD A1, 152($sp) # load alpha_r 2872 ADD C41, A4, C41 2873 LD A2, 160($sp) # load alpha_i 2874# LD A2, 0 * SIZE(A) # load alpha_r 2875 SUB C13, A5, C13 # ad'+'cb 2876 SUB C23, A6, C23 2877 SUB C33, A7, C33 2878 SUB C43, A8, C43 2879 2880 LD B1, 0 * SIZE(CO1) 2881 LD B3, 2 * SIZE(CO1) 2882 LD B5, 4 * SIZE(CO1) 2883 LD B7, 6 * SIZE(CO1) 2884 LD B2, 1 * SIZE(CO1) 2885 LD B4, 3 * SIZE(CO1) 2886 LD B6, 5 * SIZE(CO1) 2887 LD B8, 7 * SIZE(CO1) 2888 2889 MADD B1, B1, C11, A1 # A1 = alpha_r 2890 MADD B3, B3, C21, A1 2891 MADD B5, B5, C31, A1 2892 MADD B7, B7, C41, A1 2893 MADD B2, B2, C13, A1 2894 MADD B4, B4, C23, A1 2895 MADD B6, B6, C33, A1 2896 MADD B8, B8, C43, A1 2897 NMSUB B1, B1, C13, A2 # A2 = alpha_i 2898 NMSUB B3, B3, C23, A2 2899 NMSUB B5, B5, C33, A2 2900 NMSUB B7, B7, C43, A2 2901 MADD B2, B2, C11, A2 2902 MADD B4, B4, C21, A2 2903 MADD B6, B6, C31, A2 2904 MADD B8, B8, C41, A2 2905 2906 ST B1, 0 * SIZE(CO1) 2907 ST B3, 2 * SIZE(CO1) 2908 ST B5, 4 * SIZE(CO1) 2909 ST B7, 6 * SIZE(CO1) 2910 ST B2, 1 * SIZE(CO1) 2911 ST B4, 3 * SIZE(CO1) 2912 ST B6, 5 * SIZE(CO1) 2913 ST B8, 7 * SIZE(CO1) 2914#endif 2915 2916#if defined(RN) || defined(RT) || defined(CN) || defined(CT) 2917 /* (a - bi) * (c + di) */ 2918 ADD C11, A1, C11 # ac'+'bd 2919 ADD C21, A2, C21 2920# LD A1, 0 * SIZE(A) # load alpha_r 2921 ADD C31, A3, C31 2922 LD A1, 152($sp) # load alpha_r 2923# LD A2, 0 * SIZE(A) # load alpha_r 2924 ADD C41, A4, C41 2925 LD A2, 160($sp) # load alpha_i 2926 SUB C13, C13, A5 # ad'+'cb 2927 SUB C23, C23, A6 2928 SUB C33, C33, A7 2929 SUB C43, C43, A8 2930 2931 LD B1, 0 * SIZE(CO1) 2932 LD B3, 2 * SIZE(CO1) 2933 LD B5, 4 * SIZE(CO1) 2934 LD B7, 6 * SIZE(CO1) 2935 LD B2, 1 * SIZE(CO1) 2936 LD B4, 3 * SIZE(CO1) 2937 LD B6, 5 * SIZE(CO1) 2938 LD B8, 7 * SIZE(CO1) 2939 2940 MADD B1, B1, C11, A1 # A1 = alpha_r 2941 MADD B3, B3, C21, A1 2942 MADD B5, B5, C31, A1 2943 MADD B7, B7, C41, A1 2944 MADD B2, B2, C13, A1 2945 MADD B4, B4, C23, A1 2946 MADD B6, B6, C33, A1 2947 MADD B8, B8, C43, A1 2948 NMSUB B1, B1, C13, A2 # A2 = alpha_i 2949 NMSUB B3, B3, C23, A2 2950 NMSUB B5, B5, C33, A2 2951 NMSUB B7, B7, C43, A2 2952 MADD B2, B2, C11, A2 2953 MADD B4, B4, C21, A2 2954 MADD B6, B6, C31, A2 2955 MADD B8, B8, C41, A2 2956 2957 ST B1, 0 * SIZE(CO1) 2958 ST B3, 2 * SIZE(CO1) 2959 ST B5, 4 * SIZE(CO1) 2960 ST B7, 6 * SIZE(CO1) 2961 ST B2, 1 * SIZE(CO1) 2962 ST B4, 3 * SIZE(CO1) 2963 ST B6, 5 * SIZE(CO1) 2964 ST B8, 7 * SIZE(CO1) 2965#endif 2966 2967#if defined(RR) || defined(RC) || defined(CR) || defined(CC) 2968 /* (a - bi) * (c - di) */ 2969 SUB C11, C11, A1 # AC'+'BD 2970 SUB C21, C21, A2 2971 SUB C31, C31, A3 2972 LD A1, 152($sp) # LOAD ALPHA_R 2973# LD A1, 0 * SIZE(A) # LOAD ALPHA_R 2974 SUB C41, C41, A4 2975 LD A2, 160($sp) 2976# LD A2, 0 * SIZE(A) # LOAD ALPHA_I 2977 2978 ADD C13, A5, C13 # AD'+'CB 2979 ADD C23, A6, C23 2980 ADD C33, A7, C33 2981 ADD C43, A8, C43 2982 NEG C13, C13 # AD'+'CB 2983 NEG C23, C23 2984 NEG C33, C33 2985 NEG C43, C43 2986 2987 2988 LD B1, 0 * SIZE(CO1) 2989 LD B3, 2 * SIZE(CO1) 2990 LD B5, 4 * SIZE(CO1) 2991 LD B7, 6 * SIZE(CO1) 2992 LD B2, 1 * SIZE(CO1) 2993 LD B4, 3 * SIZE(CO1) 2994 LD B6, 5 * SIZE(CO1) 2995 LD B8, 7 * SIZE(CO1) 2996 2997 MADD B1, B1, C11, A1 # A1 = ALPHA_R 2998 MADD B3, B3, C21, A1 2999 MADD B5, B5, C31, A1 3000 MADD B7, B7, C41, A1 3001 MADD B2, B2, C13, A1 3002 MADD B4, B4, C23, A1 3003 MADD B6, B6, C33, A1 3004 MADD B8, B8, C43, A1 3005 NMSUB B1, B1, C13, A2 # A2 = ALPHA_I 3006 NMSUB B3, B3, C23, A2 3007 NMSUB B5, B5, C33, A2 3008 NMSUB B7, B7, C43, A2 3009 MADD B2, B2, C11, A2 3010 MADD B4, B4, C21, A2 3011 MADD B6, B6, C31, A2 3012 MADD B8, B8, C41, A2 3013 3014 ST B1, 0 * SIZE(CO1) 3015 ST B3, 2 * SIZE(CO1) 3016 ST B5, 4 * SIZE(CO1) 3017 ST B7, 6 * SIZE(CO1) 3018 ST B2, 1 * SIZE(CO1) 3019 ST B4, 3 * SIZE(CO1) 3020 ST B6, 5 * SIZE(CO1) 3021 ST B8, 7 * SIZE(CO1) 3022#endif 3023 3024#else 3025 daddiu I, I, -1 3026 CVTU A1, C11 3027 CVTU A2, C21 3028 3029 CVTU A3, C31 3030 CVTU A4, C41 3031 3032 CVTU A5, C13 3033 CVTU A6, C23 3034 3035 CVTU A7, C33 3036 CVTU A8, C43 3037 3038 CVTU B1, C12 3039 CVTU B2, C22 3040 3041 CVTU B3, C32 3042 CVTU B4, C42 3043 3044 CVTU B5, C14 3045 CVTU B6, C24 3046 3047 CVTU B7, C34 3048 CVTU B8, C44 3049 3050#if defined(NN) || defined(NT) || defined(TN) || defined(TT) 3051 /* (a + bi) * (c + di) */ 3052 SUB C11, C11, A1 # ac'+'bd 3053 SUB C21, C21, A2 3054# LD A1, 0 * SIZE(A) # load alpha_r 3055 SUB C31, C31, A3 3056 LD A1, 152($sp) # load alpha_r 3057 SUB C41, C41, A4 3058 LD A2, 160($sp) # load alpha_i 3059# LD A2, 0 * SIZE(A) # load alpha_i 3060 ADD C13, A5, C13 # ad'+'cb 3061 ADD C23, A6, C23 3062 ADD C33, A7, C33 3063 ADD C43, A8, C43 3064 3065 MUL B1, C11, A1 # A1 = alpha_r 3066 MUL B3, C21, A1 3067 MUL B5, C31, A1 3068 MUL B7, C41, A1 3069 MUL B2, C13, A1 3070 MUL B4, C23, A1 3071 MUL B6, C33, A1 3072 MUL B8, C43, A1 3073 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3074 NMSUB B3, B3, C23, A2 3075 NMSUB B5, B5, C33, A2 3076 NMSUB B7, B7, C43, A2 3077 MADD B2, B2, C11, A2 3078 MADD B4, B4, C21, A2 3079 MADD B6, B6, C31, A2 3080 MADD B8, B8, C41, A2 3081 3082 ST B1, 0 * SIZE(CO1) 3083 ST B3, 2 * SIZE(CO1) 3084 ST B5, 4 * SIZE(CO1) 3085 ST B7, 6 * SIZE(CO1) 3086 ST B2, 1 * SIZE(CO1) 3087 ST B4, 3 * SIZE(CO1) 3088 ST B6, 5 * SIZE(CO1) 3089 ST B8, 7 * SIZE(CO1) 3090#endif 3091 3092#if defined(NR) || defined(NC) || defined(TR) || defined(TC) 3093 /* (a + bi) * (c - di) */ 3094 ADD C11, A1, C11 # ac'+'bd 3095 ADD C21, A2, C21 3096# LD A1, 0 * SIZE(A) # load alpha_r 3097 ADD C31, A3, C31 3098 LD A1, 152($sp) # load alpha_r 3099 ADD C41, A4, C41 3100 LD A2, 160($sp) # load alpha_i 3101# LD A2, 0 * SIZE(A) # load alpha_r 3102 SUB C13, A5, C13 # ad'+'cb 3103 SUB C23, A6, C23 3104 SUB C33, A7, C33 3105 SUB C43, A8, C43 3106 3107 MUL B1, C11, A1 # A1 = alpha_r 3108 MUL B3, C21, A1 3109 MUL B5, C31, A1 3110 MUL B7, C41, A1 3111 MUL B2, C13, A1 3112 MUL B4, C23, A1 3113 MUL B6, C33, A1 3114 MUL B8, C43, A1 3115 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3116 NMSUB B3, B3, C23, A2 3117 NMSUB B5, B5, C33, A2 3118 NMSUB B7, B7, C43, A2 3119 MADD B2, B2, C11, A2 3120 MADD B4, B4, C21, A2 3121 MADD B6, B6, C31, A2 3122 MADD B8, B8, C41, A2 3123 3124 ST B1, 0 * SIZE(CO1) 3125 ST B3, 2 * SIZE(CO1) 3126 ST B5, 4 * SIZE(CO1) 3127 ST B7, 6 * SIZE(CO1) 3128 ST B2, 1 * SIZE(CO1) 3129 ST B4, 3 * SIZE(CO1) 3130 ST B6, 5 * SIZE(CO1) 3131 ST B8, 7 * SIZE(CO1) 3132#endif 3133 3134#if defined(RN) || defined(RT) || defined(CN) || defined(CT) 3135 /* (a - bi) * (c + di) */ 3136 ADD C11, A1, C11 # ac'+'bd 3137 ADD C21, A2, C21 3138# LD A1, 0 * SIZE(A) # load alpha_r 3139 ADD C31, A3, C31 3140 LD A1, 152($sp) # load alpha_r 3141# LD A2, 0 * SIZE(A) # load alpha_r 3142 ADD C41, A4, C41 3143 LD A2, 160($sp) # load alpha_i 3144 SUB C13, C13, A5 # ad'+'cb 3145 SUB C23, C23, A6 3146 SUB C33, C33, A7 3147 SUB C43, C43, A8 3148 3149 MUL B1, C11, A1 # A1 = alpha_r 3150 MUL B3, C21, A1 3151 MUL B5, C31, A1 3152 MUL B7, C41, A1 3153 MUL B2, C13, A1 3154 MUL B4, C23, A1 3155 MUL B6, C33, A1 3156 MUL B8, C43, A1 3157 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3158 NMSUB B3, B3, C23, A2 3159 NMSUB B5, B5, C33, A2 3160 NMSUB B7, B7, C43, A2 3161 MADD B2, B2, C11, A2 3162 MADD B4, B4, C21, A2 3163 MADD B6, B6, C31, A2 3164 MADD B8, B8, C41, A2 3165 3166 ST B1, 0 * SIZE(CO1) 3167 ST B3, 2 * SIZE(CO1) 3168 ST B5, 4 * SIZE(CO1) 3169 ST B7, 6 * SIZE(CO1) 3170 ST B2, 1 * SIZE(CO1) 3171 ST B4, 3 * SIZE(CO1) 3172 ST B6, 5 * SIZE(CO1) 3173 ST B8, 7 * SIZE(CO1) 3174#endif 3175 3176#if defined(RR) || defined(RC) || defined(CR) || defined(CC) 3177 /* (a - bi) * (c - di) */ 3178 SUB C11, C11, A1 # AC'+'BD 3179 SUB C21, C21, A2 3180 SUB C31, C31, A3 3181 LD A1, 152($sp) # LOAD ALPHA_R 3182# LD A1, 0 * SIZE(A) # LOAD ALPHA_R 3183 SUB C41, C41, A4 3184 LD A2, 160($sp) 3185# LD A2, 0 * SIZE(A) # LOAD ALPHA_I 3186 3187 ADD C13, A5, C13 # AD'+'CB 3188 ADD C23, A6, C23 3189 ADD C33, A7, C33 3190 ADD C43, A8, C43 3191 NEG C13, C13 # AD'+'CB 3192 NEG C23, C23 3193 NEG C33, C33 3194 NEG C43, C43 3195 3196 MUL B1, C11, A1 # A1 = ALPHA_R 3197 MUL B3, C21, A1 3198 MUL B5, C31, A1 3199 MUL B7, C41, A1 3200 MUL B2, C13, A1 3201 MUL B4, C23, A1 3202 MUL B6, C33, A1 3203 MUL B8, C43, A1 3204 NMSUB B1, B1, C13, A2 # A2 = ALPHA_I 3205 NMSUB B3, B3, C23, A2 3206 NMSUB B5, B5, C33, A2 3207 NMSUB B7, B7, C43, A2 3208 MADD B2, B2, C11, A2 3209 MADD B4, B4, C21, A2 3210 MADD B6, B6, C31, A2 3211 MADD B8, B8, C41, A2 3212 3213 ST B1, 0 * SIZE(CO1) 3214 ST B3, 2 * SIZE(CO1) 3215 ST B5, 4 * SIZE(CO1) 3216 ST B7, 6 * SIZE(CO1) 3217 ST B2, 1 * SIZE(CO1) 3218 ST B4, 3 * SIZE(CO1) 3219 ST B6, 5 * SIZE(CO1) 3220 ST B8, 7 * SIZE(CO1) 3221#endif 3222 3223 3224#if ( defined(LEFT) && defined(TRANSA)) || \ 3225 (!defined(LEFT) && !defined(TRANSA)) 3226 dsubu TEMP, K, KK 3227#ifdef LEFT 3228 daddiu TEMP, TEMP, -4 3229#else 3230 daddiu TEMP, TEMP, -1 3231#endif 3232 3233 dsll L, TEMP, 2 + ZBASE_SHIFT 3234 dsll TEMP, TEMP, ZBASE_SHIFT 3235 3236 daddu AO, AO, L 3237 daddu BO, BO, TEMP 3238#endif 3239 3240#ifdef LEFT 3241 daddiu KK, KK, 4 3242#endif 3243 3244#endif 3245 bgtz I, .L141 3246 daddiu CO1, CO1, 8 * SIZE 3247 3248 .align 4 3249.L12: 3250 andi I, M, 2 # MR=4 3251 blez I, .L11 3252 NOP 3253 3254 .align 4 3255.L121: 3256#if defined(TRMMKERNEL) 3257#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 3258 move BO, B 3259#else 3260 dsll L, KK, 1 + ZBASE_SHIFT 3261 dsll TEMP, KK, ZBASE_SHIFT 3262 3263 daddu AO, AO, L 3264 daddu BO, B, TEMP 3265#endif 3266 3267 MTC $0, C11 # CLEAR REAULTS REGISTERS 3268 MOV C21, C11 3269 gsLQC1(R13, F9, F8, 0) # B1 B2 3270 3271 gsLQC1(R12, F1, F0, 0) # A1 A2 3272 MOV C13, C11 3273 MOV C23, C11 3274 3275 FETCH $0, 0 * SIZE(CO1) 3276 FETCH $0, 8 * SIZE(CO1) 3277 3278 PLU B3, B1, B1 3279 PLU B4, B2, B2 3280#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 3281 dsubu TEMP, K, KK 3282#elif defined(LEFT) 3283 daddiu TEMP, KK, 2 3284#else 3285 daddiu TEMP, KK, 1 3286#endif 3287 dsra L, TEMP, 2 3288 blez L, .L122 3289 NOP 3290 3291#else 3292 move BO, B # Reset B 3293 dsra L, K, 2 # UnRoll K=64 3294 3295 MTC $0, C11 # CLEAR REAULTS REGISTERS 3296 MOV C21, C11 3297 gsLQC1(R13, F9, F8, 0) # B1 B2 3298 3299 gsLQC1(R12, F1, F0, 0) # A1 A2 3300 MOV C13, C11 3301 MOV C23, C11 3302 3303 FETCH $0, 0 * SIZE(CO1) 3304 FETCH $0, 8 * SIZE(CO1) 3305 3306 PLU B3, B1, B1 3307 blez L, .L122 3308 PLU B4, B2, B2 3309#endif 3310 3311.L1210: 3312 daddiu L, L, -1 3313 gsLQC1(R13, F13, F12, 1) # B3 B4 3314 MADPS C11, C11, A1, B1 3315 MADPS C21, C21, A2, B1 3316 3317 gsLQC1(R12, F3, F2, 1) # A3 A4 3318 MADPS C13, C13, A1, B3 3319 MADPS C23, C23, A2, B3 3320 3321 gsLQC1(R12, F5, F4, 2) # A5 A6 3322 PLU B7, B5, B5 3323 PLU B8, B6, B6 3324 daddiu BO, BO, 2 * 4 * SIZE # 4KR*4NR 3325 3326 MADPS C11, C11, A3, B2 3327 MADPS C21, C21, A4, B2 3328 3329 gsLQC1(R12, F7, F6, 3) # A7 A8 3330 MADPS C13, C13, A3, B4 3331 MADPS C23, C23, A4, B4 3332 3333 MADPS C11, C11, A5, B5 3334 MADPS C21, C21, A6, B5 3335 daddiu AO, AO, 4 * 4 * SIZE # 4KR*8MR 3336 3337 gsLQC1(R13, F9, F8, 0) # B1 B2 3338 MADPS C13, C13, A5, B7 3339 MADPS C23, C23, A6, B7 3340 3341 gsLQC1(R12, F1, F0, 0) # A1 A2 3342 MADPS C11, C11, A7, B6 3343 MADPS C21, C21, A8, B6 3344 3345 MADPS C13, C13, A7, B8 3346 MADPS C23, C23, A8, B8 3347 3348 PLU B3, B1, B1 3349 bgtz L, .L1210 3350 PLU B4, B2, B2 3351 3352 3353 .align 4 3354.L122: 3355#ifndef TRMMKERNEL 3356 andi L, K, 2 3357#else 3358 andi L, TEMP, 2 3359#endif 3360 blez L, .L127 3361 NOP 3362 3363 MADPS C11, C11, A1, B1 3364 MADPS C21, C21, A2, B1 3365 3366 gsLQC1(R12, F3, F2, 1) # A3 A4 3367 MADPS C13, C13, A1, B3 3368 MADPS C23, C23, A2, B3 3369 3370 PLU B7, B5, B5 3371 daddiu BO, BO, 1 * 4 * SIZE 3372 3373 daddiu AO, AO, 2 * 4 * SIZE 3374 MADPS C11, C11, A3, B2 3375 MADPS C21, C21, A4, B2 3376 3377 MADPS C13, C13, A3, B4 3378 MADPS C23, C23, A4, B4 3379 3380 gsLQC1(R13, F9, F8, 0) 3381 gsLQC1(R12, F1, F0, 0) 3382 PLU B3, B1, B1 3383 3384 .align 4 3385.L127: 3386#ifndef TRMMKERNEL 3387 andi L, K, 1 3388#else 3389 andi L, TEMP, 1 3390#endif 3391 blez L, .L120 3392 NOP 3393 3394 MADPS C11, C11, A1, B1 3395 MADPS C21, C21, A2, B1 3396 daddiu BO, BO, 2 * SIZE 3397 daddiu AO, AO, 4 * SIZE 3398 3399 MADPS C13, C13, A1, B3 3400 MADPS C23, C23, A2, B3 3401 3402 .align 4 3403.L120: # Write Back 3404#ifndef TRMMKERNEL 3405 daddiu I, I, -1 3406 CVTU A1, C11 3407 CVTU A2, C21 3408 3409 CVTU A3, C13 3410 CVTU A4, C23 3411 3412 3413#if defined(NN) || defined(NT) || defined(TN) || defined(TT) 3414 /* (a + bi) * (c + di) */ 3415 SUB C11, C11, A1 # ac'+'bd 3416 SUB C21, C21, A2 3417 ADD C13, A3, C13 # ad'+'cb 3418 ADD C23, A4, C23 3419# LD A1, 0 * SIZE(A) # load alpha_r 3420 LD A1, 152($sp) # load alpha_r 3421 LD A2, 160($sp) # load alpha_i 3422# LD A2, 0 * SIZE(A) # load alpha_i 3423 3424 LD B1, 0 * SIZE(CO1) 3425 LD B3, 2 * SIZE(CO1) 3426 LD B2, 1 * SIZE(CO1) 3427 LD B4, 3 * SIZE(CO1) 3428 3429 MADD B1, B1, C11, A1 # A1 = alpha_r 3430 MADD B3, B3, C21, A1 3431 MADD B2, B2, C13, A1 3432 MADD B4, B4, C23, A1 3433 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3434 NMSUB B3, B3, C23, A2 3435 MADD B2, B2, C11, A2 3436 MADD B4, B4, C21, A2 3437 3438 ST B1, 0 * SIZE(CO1) 3439 ST B3, 2 * SIZE(CO1) 3440 3441 ST B2, 1 * SIZE(CO1) 3442 ST B4, 3 * SIZE(CO1) 3443#endif 3444 3445#if defined(NR) || defined(NC) || defined(TR) || defined(TC) 3446 /* (a + bi) * (c - di) */ 3447 ADD C11, A1, C11 # ac'+'bd 3448 ADD C21, A2, C21 3449 SUB C13, A3, C13 # ad'+'cb 3450 SUB C23, A4, C23 3451# LD A1, 0 * SIZE(A) # load alpha_r 3452 LD A1, 152($sp) # load alpha_r 3453 LD A2, 160($sp) # load alpha_i 3454# LD A2, 0 * SIZE(A) # load alpha_r 3455 3456 LD B1, 0 * SIZE(CO1) 3457 LD B3, 2 * SIZE(CO1) 3458 LD B2, 1 * SIZE(CO1) 3459 LD B4, 3 * SIZE(CO1) 3460 3461 MADD B1, B1, C11, A1 # A1 = alpha_r 3462 MADD B3, B3, C21, A1 3463 MADD B2, B2, C13, A1 3464 MADD B4, B4, C23, A1 3465 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3466 NMSUB B3, B3, C23, A2 3467 MADD B2, B2, C11, A2 3468 MADD B4, B4, C21, A2 3469 3470 ST B1, 0 * SIZE(CO1) 3471 ST B3, 2 * SIZE(CO1) 3472 ST B2, 1 * SIZE(CO1) 3473 ST B4, 3 * SIZE(CO1) 3474#endif 3475 3476#if defined(RN) || defined(RT) || defined(CN) || defined(CT) 3477 /* (a - bi) * (c + di) */ 3478 ADD C11, A1, C11 # ac'+'bd 3479 ADD C21, A2, C21 3480 SUB C13, C13, A3 # ad'+'cb 3481 SUB C23, C23, A4 3482# LD A1, 0 * SIZE(A) # load alpha_r 3483 LD A1, 152($sp) # load alpha_r 3484# LD A2, 0 * SIZE(A) # load alpha_r 3485 LD A2, 160($sp) # load alpha_i 3486 3487 LD B1, 0 * SIZE(CO1) 3488 LD B3, 2 * SIZE(CO1) 3489 LD B2, 1 * SIZE(CO1) 3490 LD B4, 3 * SIZE(CO1) 3491 3492 MADD B1, B1, C11, A1 # A1 = alpha_r 3493 MADD B3, B3, C21, A1 3494 MADD B2, B2, C13, A1 3495 MADD B4, B4, C23, A1 3496 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3497 NMSUB B3, B3, C23, A2 3498 MADD B2, B2, C11, A2 3499 MADD B4, B4, C21, A2 3500 3501 ST B1, 0 * SIZE(CO1) 3502 ST B3, 2 * SIZE(CO1) 3503 ST B2, 1 * SIZE(CO1) 3504 ST B4, 3 * SIZE(CO1) 3505#endif 3506 3507#if defined(RR) || defined(RC) || defined(CR) || defined(CC) 3508 /* (a - bi) * (c - di) */ 3509 SUB C11, C11, A1 # ac'+'bd 3510 SUB C21, C21, A2 3511 ADD C13, A3, C13 # ad'+'cb 3512 ADD C23, A4, C23 3513 LD A1, 152($sp) # load alpha_r 3514# LD A1, 0 * SIZE(A) # load alpha_r 3515 LD A2, 160($sp) 3516# LD A2, 0 * SIZE(A) # load alpha_i 3517 NEG C13, C13 # ad'+'cb 3518 NEG C23, C23 3519 3520 LD B1, 0 * SIZE(CO1) 3521 LD B3, 2 * SIZE(CO1) 3522 LD B2, 1 * SIZE(CO1) 3523 LD B4, 3 * SIZE(CO1) 3524 3525 MADD B1, B1, C11, A1 # A1 = alpha_r 3526 MADD B3, B3, C21, A1 3527 MADD B2, B2, C13, A1 3528 MADD B4, B4, C23, A1 3529 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3530 NMSUB B3, B3, C23, A2 3531 MADD B2, B2, C11, A2 3532 MADD B4, B4, C21, A2 3533 3534 ST B1, 0 * SIZE(CO1) 3535 ST B3, 2 * SIZE(CO1) 3536 ST B2, 1 * SIZE(CO1) 3537 ST B4, 3 * SIZE(CO1) 3538#endif 3539 3540#else 3541 daddiu I, I, -1 3542 CVTU A1, C11 3543 CVTU A2, C21 3544 3545 CVTU A3, C13 3546 CVTU A4, C23 3547 3548 3549#if defined(NN) || defined(NT) || defined(TN) || defined(TT) 3550 /* (a + bi) * (c + di) */ 3551 SUB C11, C11, A1 # ac'+'bd 3552 SUB C21, C21, A2 3553 ADD C13, A3, C13 # ad'+'cb 3554 ADD C23, A4, C23 3555# LD A1, 0 * SIZE(A) # load alpha_r 3556 LD A1, 152($sp) # load alpha_r 3557 LD A2, 160($sp) # load alpha_i 3558# LD A2, 0 * SIZE(A) # load alpha_i 3559 3560 MUL B1, C11, A1 # A1 = alpha_r 3561 MUL B3, C21, A1 3562 MUL B2, C13, A1 3563 MUL B4, C23, A1 3564 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3565 NMSUB B3, B3, C23, A2 3566 MADD B2, B2, C11, A2 3567 MADD B4, B4, C21, A2 3568 3569 ST B1, 0 * SIZE(CO1) 3570 ST B3, 2 * SIZE(CO1) 3571 3572 ST B2, 1 * SIZE(CO1) 3573 ST B4, 3 * SIZE(CO1) 3574#endif 3575 3576#if defined(NR) || defined(NC) || defined(TR) || defined(TC) 3577 /* (a + bi) * (c - di) */ 3578 ADD C11, A1, C11 # ac'+'bd 3579 ADD C21, A2, C21 3580 SUB C13, A3, C13 # ad'+'cb 3581 SUB C23, A4, C23 3582# LD A1, 0 * SIZE(A) # load alpha_r 3583 LD A1, 152($sp) # load alpha_r 3584 LD A2, 160($sp) # load alpha_i 3585# LD A2, 0 * SIZE(A) # load alpha_r 3586 3587 MUL B1, C11, A1 # A1 = alpha_r 3588 MUL B3, C21, A1 3589 MUL B2, C13, A1 3590 MUL B4, C23, A1 3591 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3592 NMSUB B3, B3, C23, A2 3593 MADD B2, B2, C11, A2 3594 MADD B4, B4, C21, A2 3595 3596 ST B1, 0 * SIZE(CO1) 3597 ST B3, 2 * SIZE(CO1) 3598 ST B2, 1 * SIZE(CO1) 3599 ST B4, 3 * SIZE(CO1) 3600#endif 3601 3602#if defined(RN) || defined(RT) || defined(CN) || defined(CT) 3603 /* (a - bi) * (c + di) */ 3604 ADD C11, A1, C11 # ac'+'bd 3605 ADD C21, A2, C21 3606 SUB C13, C13, A3 # ad'+'cb 3607 SUB C23, C23, A4 3608# LD A1, 0 * SIZE(A) # load alpha_r 3609 LD A1, 152($sp) # load alpha_r 3610# LD A2, 0 * SIZE(A) # load alpha_r 3611 LD A2, 160($sp) # load alpha_i 3612 3613 MUL B1, C11, A1 # A1 = alpha_r 3614 MUL B3, C21, A1 3615 MUL B2, C13, A1 3616 MUL B4, C23, A1 3617 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3618 NMSUB B3, B3, C23, A2 3619 MADD B2, B2, C11, A2 3620 MADD B4, B4, C21, A2 3621 3622 ST B1, 0 * SIZE(CO1) 3623 ST B3, 2 * SIZE(CO1) 3624 ST B2, 1 * SIZE(CO1) 3625 ST B4, 3 * SIZE(CO1) 3626#endif 3627 3628#if defined(RR) || defined(RC) || defined(CR) || defined(CC) 3629 /* (a - bi) * (c - di) */ 3630 SUB C11, C11, A1 # ac'+'bd 3631 SUB C21, C21, A2 3632 ADD C13, A3, C13 # ad'+'cb 3633 ADD C23, A4, C23 3634 LD A1, 152($sp) # load alpha_r 3635# LD A1, 0 * SIZE(A) # load alpha_r 3636 LD A2, 160($sp) 3637# LD A2, 0 * SIZE(A) # load alpha_i 3638 NEG C13, C13 # ad'+'cb 3639 NEG C23, C23 3640 3641 MUL B1, C11, A1 # A1 = alpha_r 3642 MUL B3, C21, A1 3643 MUL B2, C13, A1 3644 MUL B4, C23, A1 3645 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3646 NMSUB B3, B3, C23, A2 3647 MADD B2, B2, C11, A2 3648 MADD B4, B4, C21, A2 3649 3650 ST B1, 0 * SIZE(CO1) 3651 ST B3, 2 * SIZE(CO1) 3652 ST B2, 1 * SIZE(CO1) 3653 ST B4, 3 * SIZE(CO1) 3654#endif 3655#if ( defined(LEFT) && defined(TRANSA)) || \ 3656 (!defined(LEFT) && !defined(TRANSA)) 3657 dsubu TEMP, K, KK 3658#ifdef LEFT 3659 daddiu TEMP, TEMP, -2 3660#else 3661 daddiu TEMP, TEMP, -1 3662#endif 3663 dsll L, TEMP, 1 + ZBASE_SHIFT 3664 dsll TEMP, TEMP, ZBASE_SHIFT 3665 3666 daddu AO, AO, L 3667 daddu BO, BO, TEMP 3668#endif 3669 3670#ifdef LEFT 3671 daddiu KK, KK, 2 3672#endif 3673 3674#endif 3675 daddiu CO1, CO1, 4 * SIZE 3676 daddiu CO2, CO2, 4 * SIZE 3677 3678 3679 .align 4 3680.L11: 3681 andi I, M, 1 3682 blez I, .L10 3683 NOP 3684 3685 .align 4 3686.L111: 3687#if defined(TRMMKERNEL) 3688#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 3689 move BO, B 3690#else 3691 dsll TEMP, KK, ZBASE_SHIFT 3692 3693 daddu AO, AO, TEMP 3694 daddu BO, B, TEMP 3695#endif 3696 MTC $0, C11 # CLEAR REAULTS REGISTERS 3697 gsLQC1(R13, F9, F8, 0) # B1 B2 3698 3699 gsLQC1(R12, F1, F0, 0) # A1 A2 3700 MOV C13, C11 3701 3702 FETCH $0, 0 * SIZE(CO1) 3703 3704 PLU B3, B1, B1 3705 PLU B4, B2, B2 3706#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 3707 dsubu TEMP, K, KK 3708#elif defined(LEFT) 3709 daddiu TEMP, KK, 1 3710#else 3711 daddiu TEMP, KK, 1 3712#endif 3713 dsra L, TEMP, 2 3714 blez L, .L112 3715 NOP 3716 3717#else 3718 move BO, B # Reset B 3719 dsra L, K, 2 # UnRoll K=64 3720 3721 MTC $0, C11 # CLEAR REAULTS REGISTERS 3722 gsLQC1(R13, F9, F8, 0) # B1 B2 3723 3724 gsLQC1(R12, F1, F0, 0) # A1 A2 3725 MOV C13, C11 3726 3727 FETCH $0, 0 * SIZE(CO1) 3728 3729 PLU B3, B1, B1 3730 blez L, .L112 3731 PLU B4, B2, B2 3732#endif 3733 3734.L1110: 3735 daddiu L, L, -1 3736 gsLQC1(R13, F13, F12, 1) # B3 B4 3737 MADPS C11, C11, A1, B1 3738 3739 gsLQC1(R12, F3, F2, 1) # A3 A4 3740 MADPS C13, C13, A1, B3 3741 daddiu BO, BO, 2 * 4 * SIZE # 4KR*4NR 3742 3743 PLU B7, B5, B5 3744 PLU B8, B6, B6 3745 daddiu AO, AO, 2 * 4 * SIZE # 4KR*8MR 3746 3747 MADPS C11, C11, A2, B2 3748 MADPS C13, C13, A2, B4 3749 3750 MADPS C11, C11, A3, B5 3751 MADPS C13, C13, A3, B7 3752 3753 gsLQC1(R13, F9, F8, 0) # B1 B2 3754 MADPS C11, C11, A4, B6 3755 3756 gsLQC1(R12, F1, F0, 0) # A1 A2 3757 MADPS C13, C13, A4, B8 3758 3759 PLU B3, B1, B1 3760 bgtz L, .L1110 3761 PLU B4, B2, B2 3762 3763 3764 .align 4 3765.L112: 3766#ifndef TRMMKERNEL 3767 andi L, K, 2 3768#else 3769 andi L, TEMP, 2 3770#endif 3771 blez L, .L117 3772 NOP 3773 3774 MADPS C11, C11, A1, B1 3775 MADPS C13, C13, A1, B3 3776 daddiu BO, BO, 4 * SIZE 3777 daddiu AO, AO, 4 * SIZE 3778 3779 MADPS C11, C11, A2, B2 3780 MADPS C13, C13, A2, B4 3781 3782 gsLQC1(R13, F9, F8, 0) 3783 gsLQC1(R12, F1, F0, 0) 3784 PLU B3, B1, B1 3785 3786 3787 .align 4 3788.L117: 3789#ifndef TRMMKERNEL 3790 andi L, K, 1 3791#else 3792 andi L, TEMP, 1 3793#endif 3794 blez L, .L110 3795 NOP 3796 3797 daddiu BO, BO, 2 * SIZE 3798 daddiu AO, AO, 2 * SIZE 3799 3800 MADPS C11, C11, A1, B1 3801 MADPS C13, C13, A1, B3 3802 3803 3804 .align 4 3805.L110: # Write Back 3806#ifndef TRMMKERNEL 3807 daddiu I, I, -1 3808 CVTU A1, C11 3809 CVTU A3, C13 3810 3811#if defined(NN) || defined(NT) || defined(TN) || defined(TT) 3812 /* (a + bi) * (c + di) */ 3813 SUB C11, C11, A1 # ac'+'bd 3814 ADD C13, A3, C13 # ad'+'cb 3815# LD A1, 0 * SIZE(A) # load alpha_r 3816 LD A4, 152($sp) # load alpha_r 3817 LD A2, 160($sp) # load alpha_i 3818# LD A2, 0 * SIZE(A) # load alpha_i 3819 3820 LD B1, 0 * SIZE(CO1) 3821 LD B2, 1 * SIZE(CO1) 3822 3823 MADD B1, B1, C11, A4 # A1 = alpha_r 3824 MADD B2, B2, C13, A4 3825 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3826 MADD B2, B2, C11, A2 3827 3828 ST B1, 0 * SIZE(CO1) 3829 ST B2, 1 * SIZE(CO1) 3830#endif 3831 3832#if defined(NR) || defined(NC) || defined(TR) || defined(TC) 3833 /* (a + bi) * (c - di) */ 3834 ADD C11, A1, C11 # ac'+'bd 3835 SUB C13, A3, C13 # ad'+'cb 3836 LD A4, 152($sp) # load alpha_r 3837 LD A2, 160($sp) # load alpha_i 3838 3839 LD B1, 0 * SIZE(CO1) 3840 LD B2, 1 * SIZE(CO1) 3841 3842 MADD B1, B1, C11, A4 # A1 = alpha_r 3843 MADD B2, B2, C13, A4 3844 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3845 MADD B2, B2, C11, A2 3846 3847 ST B1, 0 * SIZE(CO1) 3848 ST B2, 1 * SIZE(CO1) 3849#endif 3850 3851#if defined(RN) || defined(RT) || defined(CN) || defined(CT) 3852 /* (a - bi) * (c + di) */ 3853 ADD C11, A1, C11 # ac'+'bd 3854 SUB C13, C13, A3 # ad'+'cb 3855 LD A4, 152($sp) # load alpha_r 3856 LD A2, 160($sp) # load alpha_i 3857 3858 LD B1, 0 * SIZE(CO1) 3859 LD B2, 1 * SIZE(CO1) 3860 3861 MADD B1, B1, C11, A4 # A1 = alpha_r 3862 MADD B2, B2, C13, A4 3863 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3864 MADD B2, B2, C11, A2 3865 3866 ST B1, 0 * SIZE(CO1) 3867 ST B2, 1 * SIZE(CO1) 3868#endif 3869 3870#if defined(RR) || defined(RC) || defined(CR) || defined(CC) 3871 /* (a - bi) * (c - di) */ 3872 SUB C11, C11, A1 # ac'+'bd 3873 ADD C13, A3, C13 # ad'+'cb 3874 NEG C13, C13 3875 LD A4, 152($sp) # load alpha_r 3876 LD A2, 160($sp) 3877 3878 LD B1, 0 * SIZE(CO1) 3879 LD B2, 1 * SIZE(CO1) 3880 3881 MADD B1, B1, C11, A4 # A1 = alpha_r 3882 MADD B2, B2, C13, A4 3883 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3884 MADD B2, B2, C11, A2 3885 3886 ST B1, 0 * SIZE(CO1) 3887 ST B2, 1 * SIZE(CO1) 3888#endif 3889 3890#else 3891 daddiu I, I, -1 3892 CVTU A1, C11 3893 CVTU A3, C13 3894 3895#if defined(NN) || defined(NT) || defined(TN) || defined(TT) 3896 /* (a + bi) * (c + di) */ 3897 SUB C11, C11, A1 # ac'+'bd 3898 ADD C13, A3, C13 # ad'+'cb 3899# LD A1, 0 * SIZE(A) # load alpha_r 3900 LD A4, 152($sp) # load alpha_r 3901 LD A2, 160($sp) # load alpha_i 3902# LD A2, 0 * SIZE(A) # load alpha_i 3903 3904 MUL B1, C11, A4 # A1 = alpha_r 3905 MUL B2, C13, A4 3906 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3907 MADD B2, B2, C11, A2 3908 3909 ST B1, 0 * SIZE(CO1) 3910 ST B2, 1 * SIZE(CO1) 3911#endif 3912 3913#if defined(NR) || defined(NC) || defined(TR) || defined(TC) 3914 /* (a + bi) * (c - di) */ 3915 ADD C11, A1, C11 # ac'+'bd 3916 SUB C13, A3, C13 # ad'+'cb 3917 LD A4, 152($sp) # load alpha_r 3918 LD A2, 160($sp) # load alpha_i 3919 3920 MUL B1, C11, A4 # A1 = alpha_r 3921 MUL B2, C13, A4 3922 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3923 MADD B2, B2, C11, A2 3924 3925 ST B1, 0 * SIZE(CO1) 3926 ST B2, 1 * SIZE(CO1) 3927#endif 3928 3929#if defined(RN) || defined(RT) || defined(CN) || defined(CT) 3930 /* (a - bi) * (c + di) */ 3931 ADD C11, A1, C11 # ac'+'bd 3932 SUB C13, C13, A3 # ad'+'cb 3933 LD A4, 152($sp) # load alpha_r 3934 LD A2, 160($sp) # load alpha_i 3935 3936 MUL B1, C11, A4 # A1 = alpha_r 3937 MUL B2, C13, A4 3938 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3939 MADD B2, B2, C11, A2 3940 3941 ST B1, 0 * SIZE(CO1) 3942 ST B2, 1 * SIZE(CO1) 3943#endif 3944 3945#if defined(RR) || defined(RC) || defined(CR) || defined(CC) 3946 /* (a - bi) * (c - di) */ 3947 SUB C11, C11, A1 # ac'+'bd 3948 ADD C13, A3, C13 # ad'+'cb 3949 NEG C13, C13 3950 LD A4, 152($sp) # load alpha_r 3951 LD A2, 160($sp) 3952 3953 MUL B1, C11, A4 # A1 = alpha_r 3954 MUL B2, C13, A4 3955 NMSUB B1, B1, C13, A2 # A2 = alpha_i 3956 MADD B2, B2, C11, A2 3957 3958 ST B1, 0 * SIZE(CO1) 3959 ST B2, 1 * SIZE(CO1) 3960#endif 3961 3962 3963#if ( defined(LEFT) && defined(TRANSA)) || \ 3964 (!defined(LEFT) && !defined(TRANSA)) 3965 dsubu TEMP, K, KK 3966#ifdef LEFT 3967 daddiu TEMP, TEMP, -1 3968#else 3969 daddiu TEMP, TEMP, -1 3970#endif 3971 3972 dsll TEMP, TEMP, ZBASE_SHIFT 3973 3974 daddu AO, AO, TEMP 3975 daddu BO, BO, TEMP 3976#endif 3977 3978#ifdef LEFT 3979 daddiu KK, KK, 1 3980#endif 3981 3982#endif 3983 daddiu CO1, CO1, 2 * SIZE 3984 daddiu CO2, CO2, 2 * SIZE 3985 3986 3987 .align 4 3988.L10: 3989 move B, BO 3990#if defined(TRMMKERNEL) && !defined(LEFT) 3991 daddiu KK, KK, 1 3992#endif 3993 3994.L999: 3995 ld $16, 0($sp) 3996 ld $17, 8($sp) 3997 ld $18, 16($sp) 3998 ld $19, 24($sp) 3999 ld $20, 32($sp) 4000 ld $21, 40($sp) 4001 ld $22, 48($sp) 4002 4003 LD $f24, 56($sp) 4004 LD $f25, 64($sp) 4005 LD $f26, 72($sp) 4006 LD $f27, 80($sp) 4007 LD $f28, 88($sp) 4008 4009#if defined(TRMMKERNEL) 4010 ld $23, 96($sp) 4011 ld $24, 104($sp) 4012 ld $25, 112($sp) 4013#endif 4014 4015#ifndef __64BIT__ 4016 LD $f20,120($sp) 4017 LD $f21,128($sp) 4018 LD $f22,136($sp) 4019 LD $f23,144($sp) 4020#endif 4021 4022 daddiu $sp,$sp,STACKSIZE 4023 j $31 4024 nop 4025 4026 EPILOGUE 4027