1/*********************************************************************/ 2/* */ 3/* Optimized BLAS libraries */ 4/* By Kazushige Goto <kgoto@tacc.utexas.edu> */ 5/* */ 6/* Copyright (c) The University of Texas, 2009. All rights reserved. */ 7/* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING */ 8/* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF */ 9/* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE, */ 10/* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY */ 11/* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF */ 12/* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO */ 13/* THE USE OF THE SOFTWARE OR DOCUMENTATION. */ 14/* Under no circumstances shall University be liable for incidental, */ 15/* special, indirect, direct or consequential damages or loss of */ 16/* profits, interruption of business, or related expenses which may */ 17/* arise from use of Software or Documentation, including but not */ 18/* limited to those resulting from defects in Software and/or */ 19/* Documentation, or loss or inaccuracy of data of any kind. */ 20/*********************************************************************/ 21 22#define ASSEMBLER 23#include "common.h" 24 25#define M $4 26#define N $5 27#define K $6 28#define A $8 29#define B $9 30#define C $10 31#define LDC $11 32 33#define AO $12 34#define BO $13 35 36#define I $2 37#define J $3 38#define L $7 39 40#define CO1 $14 41#define CO2 $15 42#define CO3 $16 43#define CO4 $17 44#define CO5 $18 45#define CO6 $19 46#define CO7 $20 47#define CO8 $21 48 49#define OFFSET $22 50#define KK $23 51#define TEMP $24 52#define AORIG $25 53 54#define a1 $f0 55#define a2 $f1 56#define a3 $f27 57#define a4 $f28 58 59#define b1 $f2 60#define b2 $f3 61#define b3 $f4 62#define b4 $f5 63#define b5 $f6 64#define b6 $f7 65#define b7 $f8 66#define b8 $f9 67 68#define a5 b8 69 70#define c11 $f10 71#define c12 $f11 72#define c21 $f12 73#define c22 $f13 74#define c31 $f14 75#define c32 $f16 76#define c41 $f17 77#define c42 $f18 78#define c51 $f19 79#define c52 $f20 80#define c61 $f21 81#define c62 $f22 82#define c71 $f23 83#define c72 $f24 84#define c81 $f25 85#define c82 $f26 86 87#define ALPHA $f15 88 89 PROLOGUE 90 91 daddiu $sp, $sp, -144 92 93 SDARG $16, 0($sp) 94 SDARG $17, 8($sp) 95 SDARG $18, 16($sp) 96 SDARG $19, 24($sp) 97 SDARG $20, 32($sp) 98 SDARG $21, 40($sp) 99 sdc1 $f24, 48($sp) 100 sdc1 $f25, 56($sp) 101 sdc1 $f26, 64($sp) 102 sdc1 $f27, 72($sp) 103 sdc1 $f28, 80($sp) 104 105 SDARG $22, 88($sp) 106 SDARG $23, 96($sp) 107 SDARG $24, 104($sp) 108 SDARG $25, 112($sp) 109 110#ifndef __64BIT__ 111 sdc1 $f20,112($sp) 112 sdc1 $f21,120($sp) 113 sdc1 $f22,128($sp) 114 sdc1 $f23,136($sp) 115#endif 116 117 LDARG OFFSET, 144($sp) 118 119 dsll LDC, LDC, BASE_SHIFT 120 121#ifdef LN 122 mult M, K 123 mflo TEMP 124 125 dsll TEMP, TEMP, BASE_SHIFT 126 daddu A, A, TEMP 127 128 dsll TEMP, M, BASE_SHIFT 129 daddu C, C, TEMP 130#endif 131 132#ifdef RN 133 neg KK, OFFSET 134#endif 135 136#ifdef RT 137 mult N, K 138 mflo TEMP 139 140 dsll TEMP, TEMP, BASE_SHIFT 141 daddu B, B, TEMP 142 143 mult N, LDC 144 mflo TEMP 145 daddu C, C, TEMP 146 147 dsubu KK, N, OFFSET 148#endif 149 150 andi J, N, 1 151 blez J, .L30 152 NOP 153 154#ifdef RT 155 dsll TEMP, K, BASE_SHIFT 156 dsubu B, B, TEMP 157 158 dsubu C, C, LDC 159#endif 160 161 move AO, A 162 move CO1, C 163 164#ifdef LN 165 daddu KK, M, OFFSET 166#endif 167 168#ifdef LT 169 move KK, OFFSET 170#endif 171 172#if defined(LN) || defined(RT) 173 move AORIG, A 174#else 175 move AO, A 176#endif 177#ifndef RT 178 daddu C, CO1, LDC 179#endif 180 181 dsra I, M, 1 182 blez I, .L80 183 NOP 184 185.L71: 186#if defined(LT) || defined(RN) 187 LD a1, 0 * SIZE(AO) 188 MTC $0, c11 189 LD a2, 1 * SIZE(AO) 190 MOV c21, c11 191 LD a5, 4 * SIZE(AO) 192 193 LD b1, 0 * SIZE(B) 194 MOV c12, c11 195 LD b2, 1 * SIZE(B) 196 MOV c22, c11 197 LD b3, 2 * SIZE(B) 198 LD b5, 4 * SIZE(B) 199 dsra L, KK, 2 200 LD b6, 8 * SIZE(B) 201 LD b7, 12 * SIZE(B) 202 203 blez L, .L75 204 move BO, B 205#else 206#ifdef LN 207 dsll TEMP, K, 1 + BASE_SHIFT 208 dsubu AORIG, AORIG, TEMP 209#endif 210 211 dsll L, KK, 1 + BASE_SHIFT 212 dsll TEMP, KK, 0 + BASE_SHIFT 213 214 daddu AO, AORIG, L 215 daddu BO, B, TEMP 216 217 dsubu TEMP, K, KK 218 219 LD a1, 0 * SIZE(AO) 220 MTC $0, c11 221 LD a2, 1 * SIZE(AO) 222 MOV c21, c11 223 LD a5, 4 * SIZE(AO) 224 225 LD b1, 0 * SIZE(BO) 226 MOV c12, c11 227 LD b2, 1 * SIZE(BO) 228 MOV c22, c11 229 LD b3, 2 * SIZE(BO) 230 LD b5, 4 * SIZE(BO) 231 dsra L, TEMP, 2 232 LD b6, 8 * SIZE(BO) 233 LD b7, 12 * SIZE(BO) 234 235 blez L, .L75 236 NOP 237#endif 238 .align 3 239 240.L72: 241 LD a1, 0 * SIZE(AO) 242 LD a2, 1 * SIZE(AO) 243 LD b1, 0 * SIZE(BO) 244 245 MADD c11, c11, a1, b1 246 MADD c12, c12, a2, b1 247 248 LD a1, 2 * SIZE(AO) 249 LD a2, 3 * SIZE(AO) 250 LD b1, 1 * SIZE(BO) 251 252 MADD c11, c11, a1, b1 253 MADD c12, c12, a2, b1 254 255 LD a1, 4 * SIZE(AO) 256 LD a2, 5 * SIZE(AO) 257 LD b1, 2 * SIZE(BO) 258 259 MADD c11, c11, a1, b1 260 MADD c12, c12, a2, b1 261 262 LD a1, 6 * SIZE(AO) 263 LD a2, 7 * SIZE(AO) 264 LD b1, 3 * SIZE(BO) 265 266 MADD c11, c11, a1, b1 267 MADD c12, c12, a2, b1 268 269 daddiu L, L, -1 270 daddiu AO, AO, 8 * SIZE 271 bgtz L, .L72 272 daddiu BO, BO, 4 * SIZE 273 .align 3 274 275.L75: 276#if defined(LT) || defined(RN) 277 andi L, KK, 3 278#else 279 andi L, TEMP, 3 280#endif 281 NOP 282 blez L, .L78 283 NOP 284 .align 3 285 286.L76: 287 LD a1, 0 * SIZE(AO) 288 LD a2, 1 * SIZE(AO) 289 LD b1, 0 * SIZE(BO) 290 291 MADD c11, c11, a1, b1 292 MADD c12, c12, a2, b1 293 294 daddiu L, L, -1 295 daddiu AO, AO, 2 * SIZE 296 bgtz L, .L76 297 daddiu BO, BO, 1 * SIZE 298 299.L78: 300 ADD c11, c11, c21 301 ADD c12, c12, c22 302 303#if defined(LN) || defined(RT) 304#ifdef LN 305 daddiu TEMP, KK, -2 306#else 307 daddiu TEMP, KK, -1 308#endif 309 310 dsll L, TEMP, 1 + BASE_SHIFT 311 dsll TEMP, TEMP, 0 + BASE_SHIFT 312 daddu AO, AORIG, L 313 daddu BO, B, TEMP 314#endif 315 316 317#if defined(LN) || defined(LT) 318 LD b1, 0 * SIZE(BO) 319 LD b2, 1 * SIZE(BO) 320 321 SUB c11, b1, c11 322 SUB c12, b2, c12 323#else 324 LD b1, 0 * SIZE(AO) 325 LD b2, 1 * SIZE(AO) 326 327 SUB c11, b1, c11 328 SUB c12, b2, c12 329#endif 330 331#ifdef LN 332 LD b1, 3 * SIZE(AO) 333 LD b2, 2 * SIZE(AO) 334 LD b3, 0 * SIZE(AO) 335 336 MUL c12, b1, c12 337 NMSUB c11, c11, b2, c12 338 MUL c11, b3, c11 339#endif 340 341#ifdef LT 342 LD b1, 0 * SIZE(AO) 343 LD b2, 1 * SIZE(AO) 344 LD b3, 3 * SIZE(AO) 345 346 MUL c11, b1, c11 347 NMSUB c12, c12, b2, c11 348 MUL c12, b3, c12 349#endif 350 351#if defined(RN) || defined(RT) 352 LD b1, 0 * SIZE(BO) 353 354 MUL c11, b1, c11 355 MUL c12, b1, c12 356#endif 357 358#ifdef LN 359 daddiu CO1, CO1, -2 * SIZE 360#endif 361 362#if defined(LN) || defined(LT) 363 ST c11, 0 * SIZE(BO) 364 ST c12, 1 * SIZE(BO) 365#else 366 ST c11, 0 * SIZE(AO) 367 ST c12, 1 * SIZE(AO) 368#endif 369 370 ST c11, 0 * SIZE(CO1) 371 ST c12, 1 * SIZE(CO1) 372 373#ifndef LN 374 daddiu CO1, CO1, 2 * SIZE 375#endif 376 377#ifdef RT 378 dsll TEMP, K, 1 + BASE_SHIFT 379 daddu AORIG, AORIG, TEMP 380#endif 381 382#if defined(LT) || defined(RN) 383 dsubu TEMP, K, KK 384 dsll L, TEMP, 1 + BASE_SHIFT 385 dsll TEMP, TEMP, 0 + BASE_SHIFT 386 daddu AO, AO, L 387 daddu BO, BO, TEMP 388#endif 389 390#ifdef LT 391 daddiu KK, KK, 2 392#endif 393 394#ifdef LN 395 daddiu KK, KK, -2 396#endif 397 398 daddiu I, I, -1 399 400 bgtz I, .L71 401 NOP 402 .align 3 403 404.L80: 405 andi I, M, 1 406 blez I, .L89 407 NOP 408 409#if defined(LT) || defined(RN) 410 LD a1, 0 * SIZE(AO) 411 MTC $0, c11 412 LD a2, 1 * SIZE(AO) 413 LD a3, 2 * SIZE(AO) 414 LD a4, 3 * SIZE(AO) 415 416 LD b1, 0 * SIZE(B) 417 LD b2, 1 * SIZE(B) 418 MOV c21, c11 419 LD b3, 2 * SIZE(B) 420 LD b4, 3 * SIZE(B) 421 LD b5, 4 * SIZE(B) 422 LD b6, 8 * SIZE(B) 423 LD b7, 12 * SIZE(B) 424 425 dsra L, KK, 2 426 blez L, .L85 427 move BO, B 428#else 429#ifdef LN 430 dsll TEMP, K, BASE_SHIFT 431 dsubu AORIG, AORIG, TEMP 432#endif 433 434 dsll TEMP, KK, BASE_SHIFT 435 436 daddu AO, AORIG, TEMP 437 daddu BO, B, TEMP 438 439 dsubu TEMP, K, KK 440 441 LD a1, 0 * SIZE(AO) 442 MTC $0, c11 443 LD a2, 1 * SIZE(AO) 444 LD a3, 2 * SIZE(AO) 445 LD a4, 3 * SIZE(AO) 446 447 LD b1, 0 * SIZE(BO) 448 LD b2, 1 * SIZE(BO) 449 LD b3, 2 * SIZE(BO) 450 LD b4, 3 * SIZE(BO) 451 MOV c21, c11 452 LD b5, 4 * SIZE(BO) 453 LD b6, 8 * SIZE(BO) 454 LD b7, 12 * SIZE(BO) 455 456 dsra L, TEMP, 2 457 blez L, .L85 458 NOP 459#endif 460 .align 3 461 462.L82: 463 LD a1, 0 * SIZE(AO) 464 LD b1, 0 * SIZE(BO) 465 466 MADD c11, c11, a1, b1 467 468 LD a1, 1 * SIZE(AO) 469 LD b1, 1 * SIZE(BO) 470 471 MADD c21, c21, a1, b1 472 473 LD a1, 2 * SIZE(AO) 474 LD b1, 2 * SIZE(BO) 475 476 MADD c11, c11, a1, b1 477 478 LD a1, 3 * SIZE(AO) 479 LD b1, 3 * SIZE(BO) 480 481 MADD c21, c21, a1, b1 482 483 daddiu L, L, -1 484 daddiu AO, AO, 4 * SIZE 485 bgtz L, .L82 486 daddiu BO, BO, 4 * SIZE 487 .align 3 488 489.L85: 490#if defined(LT) || defined(RN) 491 andi L, KK, 3 492#else 493 andi L, TEMP, 3 494#endif 495 NOP 496 blez L, .L88 497 NOP 498 .align 3 499 500.L86: 501 LD a1, 0 * SIZE(AO) 502 LD b1, 0 * SIZE(BO) 503 504 MADD c11, c11, a1, b1 505 506 daddiu L, L, -1 507 daddiu AO, AO, 1 * SIZE 508 bgtz L, .L86 509 daddiu BO, BO, 1 * SIZE 510 511 512.L88: 513 ADD c11, c11, c21 514 515#if defined(LN) || defined(RT) 516#ifdef LN 517 daddiu TEMP, KK, -1 518#else 519 daddiu TEMP, KK, -1 520#endif 521 522 dsll TEMP, TEMP, 0 + BASE_SHIFT 523 daddu AO, AORIG, TEMP 524 daddu BO, B, TEMP 525#endif 526 527 528#if defined(LN) || defined(LT) 529 LD b1, 0 * SIZE(BO) 530 531 SUB c11, b1, c11 532#else 533 LD b1, 0 * SIZE(AO) 534 535 SUB c11, b1, c11 536#endif 537 538#if defined(LN) || defined(LT) 539 LD b1, 0 * SIZE(AO) 540 541 MUL c11, b1, c11 542#endif 543 544#if defined(RN) || defined(RT) 545 LD b1, 0 * SIZE(BO) 546 547 MUL c11, b1, c11 548#endif 549 550#ifdef LN 551 daddiu CO1, CO1, -1 * SIZE 552#endif 553 554#if defined(LN) || defined(LT) 555 ST c11, 0 * SIZE(BO) 556#else 557 ST c11, 0 * SIZE(AO) 558#endif 559 560 ST c11, 0 * SIZE(CO1) 561 562#ifndef LN 563 daddiu CO1, CO1, 1 * SIZE 564#endif 565 566#ifdef RT 567 dsll TEMP, K, BASE_SHIFT 568 daddu AORIG, AORIG, TEMP 569#endif 570 571#if defined(LT) || defined(RN) 572 dsubu TEMP, K, KK 573 dsll TEMP, TEMP, 0 + BASE_SHIFT 574 daddu AO, AO, TEMP 575 daddu BO, BO, TEMP 576#endif 577 578#ifdef LT 579 daddiu KK, KK, 1 580#endif 581 582#ifdef LN 583 daddiu KK, KK, -1 584#endif 585 .align 3 586 587.L89: 588#ifdef LN 589 dsll TEMP, K, BASE_SHIFT 590 daddu B, B, TEMP 591#endif 592 593#if defined(LT) || defined(RN) 594 move B, BO 595#endif 596 597#ifdef RN 598 daddiu KK, KK, 1 599#endif 600 601#ifdef RT 602 daddiu KK, KK, -1 603#endif 604 .align 3 605 606.L30: 607 andi J, N, 2 608 blez J, .L50 609 NOP 610 611#ifdef RT 612 dsll TEMP, K, 1 + BASE_SHIFT 613 dsubu B, B, TEMP 614 615 dsll TEMP, LDC, 1 616 dsubu C, C, TEMP 617#endif 618 619 move AO, A 620 move CO1, C 621 daddu CO2, C, LDC 622 623#ifdef LN 624 daddu KK, M, OFFSET 625#endif 626 627#ifdef LT 628 move KK, OFFSET 629#endif 630 631#if defined(LN) || defined(RT) 632 move AORIG, A 633#else 634 move AO, A 635#endif 636#ifndef RT 637 daddu C, CO2, LDC 638#endif 639 640 dsra I, M, 1 641 blez I, .L60 642 NOP 643 644.L51: 645#if defined(LT) || defined(RN) 646 LD a1, 0 * SIZE(AO) 647 MTC $0, c11 648 LD a2, 1 * SIZE(AO) 649 MOV c21, c11 650 LD a5, 4 * SIZE(AO) 651 652 LD b1, 0 * SIZE(B) 653 MOV c12, c11 654 LD b2, 1 * SIZE(B) 655 MOV c22, c11 656 LD b3, 2 * SIZE(B) 657 LD b5, 4 * SIZE(B) 658 dsra L, KK, 2 659 LD b6, 8 * SIZE(B) 660 LD b7, 12 * SIZE(B) 661 662 blez L, .L55 663 move BO, B 664 665#else 666#ifdef LN 667 dsll TEMP, K, 1 + BASE_SHIFT 668 dsubu AORIG, AORIG, TEMP 669#endif 670 671 dsll L, KK, 1 + BASE_SHIFT 672 dsll TEMP, KK, 1 + BASE_SHIFT 673 674 daddu AO, AORIG, L 675 daddu BO, B, TEMP 676 677 dsubu TEMP, K, KK 678 679 LD a1, 0 * SIZE(AO) 680 MTC $0, c11 681 LD a2, 1 * SIZE(AO) 682 MOV c21, c11 683 LD a5, 4 * SIZE(AO) 684 685 LD b1, 0 * SIZE(BO) 686 MOV c12, c11 687 LD b2, 1 * SIZE(BO) 688 MOV c22, c11 689 LD b3, 2 * SIZE(BO) 690 LD b5, 4 * SIZE(BO) 691 dsra L, TEMP, 2 692 LD b6, 8 * SIZE(BO) 693 LD b7, 12 * SIZE(BO) 694 695 blez L, .L55 696 NOP 697#endif 698 .align 3 699 700.L52: 701 MADD c11, c11, a1, b1 702 LD a3, 2 * SIZE(AO) 703 MADD c21, c21, a1, b2 704 LD b4, 3 * SIZE(BO) 705 MADD c12, c12, a2, b1 706 LD a4, 3 * SIZE(AO) 707 MADD c22, c22, a2, b2 708 LD b1, 8 * SIZE(BO) 709 710 MADD c11, c11, a3, b3 711 LD a1, 8 * SIZE(AO) 712 MADD c21, c21, a3, b4 713 LD b2, 5 * SIZE(BO) 714 MADD c12, c12, a4, b3 715 LD a2, 5 * SIZE(AO) 716 MADD c22, c22, a4, b4 717 LD b3, 6 * SIZE(BO) 718 719 MADD c11, c11, a5, b5 720 LD a3, 6 * SIZE(AO) 721 MADD c21, c21, a5, b2 722 LD b4, 7 * SIZE(BO) 723 MADD c12, c12, a2, b5 724 LD a4, 7 * SIZE(AO) 725 MADD c22, c22, a2, b2 726 LD b5, 12 * SIZE(BO) 727 728 MADD c11, c11, a3, b3 729 LD a5, 12 * SIZE(AO) 730 MADD c21, c21, a3, b4 731 LD b2, 9 * SIZE(BO) 732 MADD c12, c12, a4, b3 733 LD a2, 9 * SIZE(AO) 734 MADD c22, c22, a4, b4 735 LD b3, 10 * SIZE(BO) 736 737 daddiu AO, AO, 8 * SIZE 738 daddiu L, L, -1 739 bgtz L, .L52 740 daddiu BO, BO, 8 * SIZE 741 .align 3 742 743.L55: 744#if defined(LT) || defined(RN) 745 andi L, KK, 3 746#else 747 andi L, TEMP, 3 748#endif 749 NOP 750 blez L, .L58 751 NOP 752 .align 3 753 754.L56: 755 MADD c11, c11, a1, b1 756 LD a2, 1 * SIZE(AO) 757 MADD c21, c21, a1, b2 758 LD a1, 2 * SIZE(AO) 759 760 MADD c12, c12, a2, b1 761 LD b1, 2 * SIZE(BO) 762 MADD c22, c22, a2, b2 763 LD b2, 3 * SIZE(BO) 764 765 daddiu L, L, -1 766 daddiu AO, AO, 2 * SIZE 767 bgtz L, .L56 768 daddiu BO, BO, 2 * SIZE 769 770.L58: 771#if defined(LN) || defined(RT) 772#ifdef LN 773 daddiu TEMP, KK, -2 774#else 775 daddiu TEMP, KK, -2 776#endif 777 778 dsll L, TEMP, 1 + BASE_SHIFT 779 dsll TEMP, TEMP, 1 + BASE_SHIFT 780 daddu AO, AORIG, L 781 daddu BO, B, TEMP 782#endif 783 784 785#if defined(LN) || defined(LT) 786 LD b1, 0 * SIZE(BO) 787 LD b2, 1 * SIZE(BO) 788 LD b3, 2 * SIZE(BO) 789 LD b4, 3 * SIZE(BO) 790 791 SUB c11, b1, c11 792 SUB c21, b2, c21 793 SUB c12, b3, c12 794 SUB c22, b4, c22 795#else 796 LD b1, 0 * SIZE(AO) 797 LD b2, 1 * SIZE(AO) 798 LD b3, 2 * SIZE(AO) 799 LD b4, 3 * SIZE(AO) 800 801 SUB c11, b1, c11 802 SUB c12, b2, c12 803 SUB c21, b3, c21 804 SUB c22, b4, c22 805#endif 806 807#ifdef LN 808 LD b1, 3 * SIZE(AO) 809 LD b2, 2 * SIZE(AO) 810 LD b3, 0 * SIZE(AO) 811 812 MUL c12, b1, c12 813 MUL c22, b1, c22 814 815 NMSUB c11, c11, b2, c12 816 NMSUB c21, c21, b2, c22 817 818 MUL c11, b3, c11 819 MUL c21, b3, c21 820#endif 821 822#ifdef LT 823 LD b1, 0 * SIZE(AO) 824 LD b2, 1 * SIZE(AO) 825 LD b3, 3 * SIZE(AO) 826 827 MUL c11, b1, c11 828 MUL c21, b1, c21 829 830 NMSUB c12, c12, b2, c11 831 NMSUB c22, c22, b2, c21 832 833 MUL c12, b3, c12 834 MUL c22, b3, c22 835#endif 836 837#ifdef RN 838 LD b1, 0 * SIZE(BO) 839 LD b2, 1 * SIZE(BO) 840 LD b3, 3 * SIZE(BO) 841 842 MUL c11, b1, c11 843 MUL c12, b1, c12 844 845 NMSUB c21, c21, b2, c11 846 NMSUB c22, c22, b2, c12 847 848 MUL c21, b3, c21 849 MUL c22, b3, c22 850#endif 851 852#ifdef RT 853 LD b1, 3 * SIZE(BO) 854 LD b2, 2 * SIZE(BO) 855 LD b3, 0 * SIZE(BO) 856 857 MUL c21, b1, c21 858 MUL c22, b1, c22 859 860 NMSUB c11, c11, b2, c21 861 NMSUB c12, c12, b2, c22 862 863 MUL c11, b3, c11 864 MUL c12, b3, c12 865#endif 866 867#ifdef LN 868 daddiu CO1, CO1, -2 * SIZE 869 daddiu CO2, CO2, -2 * SIZE 870#endif 871 872#if defined(LN) || defined(LT) 873 ST c11, 0 * SIZE(BO) 874 ST c21, 1 * SIZE(BO) 875 ST c12, 2 * SIZE(BO) 876 ST c22, 3 * SIZE(BO) 877#else 878 ST c11, 0 * SIZE(AO) 879 ST c12, 1 * SIZE(AO) 880 ST c21, 2 * SIZE(AO) 881 ST c22, 3 * SIZE(AO) 882#endif 883 884 ST c11, 0 * SIZE(CO1) 885 ST c12, 1 * SIZE(CO1) 886 ST c21, 0 * SIZE(CO2) 887 ST c22, 1 * SIZE(CO2) 888 889#ifndef LN 890 daddiu CO1, CO1, 2 * SIZE 891 daddiu CO2, CO2, 2 * SIZE 892#endif 893 894#ifdef RT 895 dsll TEMP, K, 1 + BASE_SHIFT 896 daddu AORIG, AORIG, TEMP 897#endif 898 899#if defined(LT) || defined(RN) 900 dsubu TEMP, K, KK 901 dsll TEMP, TEMP, 1 + BASE_SHIFT 902 daddu AO, AO, TEMP 903 daddu BO, BO, TEMP 904#endif 905 906#ifdef LT 907 daddiu KK, KK, 2 908#endif 909 910#ifdef LN 911 daddiu KK, KK, -2 912#endif 913 914 MTC $0, a1 915 916 MOV c11, a1 917 MOV c21, a1 918 MOV c31, a1 919 920 daddiu I, I, -1 921 922 bgtz I, .L51 923 MOV c41, c11 924 .align 3 925 926.L60: 927 andi I, M, 1 928 blez I, .L69 929 NOP 930 931#if defined(LT) || defined(RN) 932 dsra L, KK, 2 933 LD a1, 0 * SIZE(AO) 934 MTC $0, c11 935 LD a2, 1 * SIZE(AO) 936 MOV c21, c11 937 LD a3, 2 * SIZE(AO) 938 MOV c31, c11 939 LD a4, 3 * SIZE(AO) 940 MOV c41, c11 941 942 LD b1, 0 * SIZE(B) 943 LD b2, 1 * SIZE(B) 944 LD b3, 2 * SIZE(B) 945 LD b4, 3 * SIZE(B) 946 LD b5, 4 * SIZE(B) 947 LD b6, 8 * SIZE(B) 948 LD b7, 12 * SIZE(B) 949 950 blez L, .L65 951 move BO, B 952#else 953#ifdef LN 954 dsll TEMP, K, BASE_SHIFT 955 dsubu AORIG, AORIG, TEMP 956#endif 957 958 dsll L, KK, 0 + BASE_SHIFT 959 dsll TEMP, KK, 1 + BASE_SHIFT 960 961 daddu AO, AORIG, L 962 daddu BO, B, TEMP 963 964 dsubu TEMP, K, KK 965 966 dsra L, TEMP, 2 967 LD a1, 0 * SIZE(AO) 968 MTC $0, c11 969 LD a2, 1 * SIZE(AO) 970 MOV c21, c11 971 LD a3, 2 * SIZE(AO) 972 MOV c31, c11 973 LD a4, 3 * SIZE(AO) 974 MOV c41, c11 975 976 LD b1, 0 * SIZE(BO) 977 LD b2, 1 * SIZE(BO) 978 LD b3, 2 * SIZE(BO) 979 LD b4, 3 * SIZE(BO) 980 LD b5, 4 * SIZE(BO) 981 LD b6, 8 * SIZE(BO) 982 LD b7, 12 * SIZE(BO) 983 984 blez L, .L65 985 NOP 986#endif 987 .align 3 988 989.L62: 990 MADD c11, c11, a1, b1 991 LD b1, 4 * SIZE(BO) 992 MADD c21, c21, a1, b2 993 LD b2, 5 * SIZE(BO) 994 MADD c31, c31, a2, b3 995 LD b3, 6 * SIZE(BO) 996 MADD c41, c41, a2, b4 997 LD b4, 7 * SIZE(BO) 998 999 LD a1, 4 * SIZE(AO) 1000 LD a2, 5 * SIZE(AO) 1001 1002 MADD c11, c11, a3, b1 1003 LD b1, 8 * SIZE(BO) 1004 MADD c21, c21, a3, b2 1005 LD b2, 9 * SIZE(BO) 1006 MADD c31, c31, a4, b3 1007 LD b3, 10 * SIZE(BO) 1008 MADD c41, c41, a4, b4 1009 LD b4, 11 * SIZE(BO) 1010 1011 LD a3, 6 * SIZE(AO) 1012 LD a4, 7 * SIZE(AO) 1013 1014 daddiu L, L, -1 1015 daddiu AO, AO, 4 * SIZE 1016 1017 bgtz L, .L62 1018 daddiu BO, BO, 8 * SIZE 1019 .align 3 1020 1021.L65: 1022#if defined(LT) || defined(RN) 1023 andi L, KK, 3 1024#else 1025 andi L, TEMP, 3 1026#endif 1027 NOP 1028 blez L, .L68 1029 NOP 1030 .align 3 1031 1032.L66: 1033 MADD c11, c11, a1, b1 1034 LD b1, 2 * SIZE(BO) 1035 MADD c21, c21, a1, b2 1036 LD b2, 3 * SIZE(BO) 1037 1038 LD a1, 1 * SIZE(AO) 1039 daddiu L, L, -1 1040 1041 daddiu AO, AO, 1 * SIZE 1042 bgtz L, .L66 1043 daddiu BO, BO, 2 * SIZE 1044 1045 1046.L68: 1047 ADD c11, c11, c31 1048 ADD c21, c21, c41 1049 1050#if defined(LN) || defined(RT) 1051#ifdef LN 1052 daddiu TEMP, KK, -1 1053#else 1054 daddiu TEMP, KK, -2 1055#endif 1056 1057 dsll L, TEMP, 0 + BASE_SHIFT 1058 dsll TEMP, TEMP, 1 + BASE_SHIFT 1059 daddu AO, AORIG, L 1060 daddu BO, B, TEMP 1061#endif 1062 1063 1064#if defined(LN) || defined(LT) 1065 LD b1, 0 * SIZE(BO) 1066 LD b2, 1 * SIZE(BO) 1067 1068 SUB c11, b1, c11 1069 SUB c21, b2, c21 1070#else 1071 LD b1, 0 * SIZE(AO) 1072 LD b2, 1 * SIZE(AO) 1073 1074 SUB c11, b1, c11 1075 SUB c21, b2, c21 1076#endif 1077 1078#if defined(LN) || defined(LT) 1079 LD b3, 0 * SIZE(AO) 1080 1081 MUL c11, b3, c11 1082 MUL c21, b3, c21 1083#endif 1084 1085#ifdef RN 1086 LD b1, 0 * SIZE(BO) 1087 LD b2, 1 * SIZE(BO) 1088 LD b3, 3 * SIZE(BO) 1089 1090 MUL c11, b1, c11 1091 1092 NMSUB c21, c21, b2, c11 1093 1094 MUL c21, b3, c21 1095#endif 1096 1097#ifdef RT 1098 LD b1, 3 * SIZE(BO) 1099 LD b2, 2 * SIZE(BO) 1100 LD b3, 0 * SIZE(BO) 1101 1102 MUL c21, b1, c21 1103 1104 NMSUB c11, c11, b2, c21 1105 1106 MUL c11, b3, c11 1107#endif 1108 1109#ifdef LN 1110 daddiu CO1, CO1, -1 * SIZE 1111 daddiu CO2, CO2, -1 * SIZE 1112#endif 1113 1114#if defined(LN) || defined(LT) 1115 ST c11, 0 * SIZE(BO) 1116 ST c21, 1 * SIZE(BO) 1117#else 1118 ST c11, 0 * SIZE(AO) 1119 ST c21, 1 * SIZE(AO) 1120#endif 1121 1122 ST c11, 0 * SIZE(CO1) 1123 ST c21, 0 * SIZE(CO2) 1124 1125#ifndef LN 1126 daddiu CO1, CO1, 1 * SIZE 1127 daddiu CO2, CO2, 1 * SIZE 1128#endif 1129 1130#ifdef RT 1131 dsll TEMP, K, 0 + BASE_SHIFT 1132 daddu AORIG, AORIG, TEMP 1133#endif 1134 1135#if defined(LT) || defined(RN) 1136 dsubu TEMP, K, KK 1137 dsll L, TEMP, 0 + BASE_SHIFT 1138 dsll TEMP, TEMP, 1 + BASE_SHIFT 1139 daddu AO, AO, L 1140 daddu BO, BO, TEMP 1141#endif 1142 1143#ifdef LT 1144 daddiu KK, KK, 1 1145#endif 1146 1147#ifdef LN 1148 daddiu KK, KK, -1 1149#endif 1150 .align 3 1151 1152.L69: 1153#ifdef LN 1154 dsll TEMP, K, 1 + BASE_SHIFT 1155 daddu B, B, TEMP 1156#endif 1157 1158#if defined(LT) || defined(RN) 1159 move B, BO 1160#endif 1161 1162#ifdef RN 1163 daddiu KK, KK, 2 1164#endif 1165 1166#ifdef RT 1167 daddiu KK, KK, -2 1168#endif 1169 .align 3 1170 1171.L50: 1172 andi J, N, 4 1173 blez J, .L70 1174 move AO, A 1175 1176#ifdef RT 1177 dsll TEMP, K, 2 + BASE_SHIFT 1178 dsubu B, B, TEMP 1179 1180 dsll TEMP, LDC, 2 1181 dsubu C, C, TEMP 1182#endif 1183 1184 move CO1, C 1185 MTC $0, c11 1186 daddu CO2, C, LDC 1187 daddu CO3, CO2, LDC 1188 daddu CO4, CO3, LDC 1189 MOV c21, c11 1190 dsra I, M, 1 1191 MOV c31, c11 1192 1193#ifdef LN 1194 daddu KK, M, OFFSET 1195#endif 1196 1197#ifdef LT 1198 move KK, OFFSET 1199#endif 1200 1201#if defined(LN) || defined(RT) 1202 move AORIG, A 1203#else 1204 move AO, A 1205#endif 1206#ifndef RT 1207 daddu C, CO4, LDC 1208#endif 1209 1210 blez I, .L40 1211 MOV c41, c11 1212 1213.L31: 1214#if defined(LT) || defined(RN) 1215 LD a1, 0 * SIZE(AO) 1216 LD a3, 4 * SIZE(AO) 1217 1218 LD b1, 0 * SIZE(B) 1219 MOV c12, c11 1220 LD b2, 1 * SIZE(B) 1221 MOV c22, c11 1222 LD b3, 2 * SIZE(B) 1223 MOV c32, c11 1224 LD b4, 3 * SIZE(B) 1225 MOV c42, c11 1226 1227 LD b5, 4 * SIZE(B) 1228 dsra L, KK, 2 1229 LD b6, 8 * SIZE(B) 1230 LD b7, 12 * SIZE(B) 1231 1232 blez L, .L35 1233 move BO, B 1234#else 1235#ifdef LN 1236 dsll TEMP, K, 1 + BASE_SHIFT 1237 dsubu AORIG, AORIG, TEMP 1238#endif 1239 1240 dsll L, KK, 1 + BASE_SHIFT 1241 dsll TEMP, KK, 2 + BASE_SHIFT 1242 1243 daddu AO, AORIG, L 1244 daddu BO, B, TEMP 1245 1246 dsubu TEMP, K, KK 1247 1248 LD a1, 0 * SIZE(AO) 1249 LD a3, 4 * SIZE(AO) 1250 1251 LD b1, 0 * SIZE(BO) 1252 MOV c12, c11 1253 LD b2, 1 * SIZE(BO) 1254 MOV c22, c11 1255 LD b3, 2 * SIZE(BO) 1256 MOV c32, c11 1257 LD b4, 3 * SIZE(BO) 1258 MOV c42, c11 1259 1260 LD b5, 4 * SIZE(BO) 1261 dsra L, TEMP, 2 1262 LD b6, 8 * SIZE(BO) 1263 LD b7, 12 * SIZE(BO) 1264 1265 blez L, .L35 1266 NOP 1267#endif 1268 .align 3 1269 1270.L32: 1271 MADD c11, c11, a1, b1 1272 LD a2, 1 * SIZE(AO) 1273 MADD c21, c21, a1, b2 1274 daddiu L, L, -1 1275 MADD c31, c31, a1, b3 1276 NOP 1277 MADD c41, c41, a1, b4 1278 LD a1, 2 * SIZE(AO) 1279 1280 MADD c12, c12, a2, b1 1281 LD b1, 16 * SIZE(BO) 1282 MADD c22, c22, a2, b2 1283 LD b2, 5 * SIZE(BO) 1284 MADD c32, c32, a2, b3 1285 LD b3, 6 * SIZE(BO) 1286 MADD c42, c42, a2, b4 1287 LD b4, 7 * SIZE(BO) 1288 1289 MADD c11, c11, a1, b5 1290 LD a2, 3 * SIZE(AO) 1291 MADD c21, c21, a1, b2 1292 NOP 1293 MADD c31, c31, a1, b3 1294 NOP 1295 MADD c41, c41, a1, b4 1296 LD a1, 8 * SIZE(AO) 1297 1298 MADD c12, c12, a2, b5 1299 LD b5, 20 * SIZE(BO) 1300 MADD c22, c22, a2, b2 1301 LD b2, 9 * SIZE(BO) 1302 MADD c32, c32, a2, b3 1303 LD b3, 10 * SIZE(BO) 1304 MADD c42, c42, a2, b4 1305 LD b4, 11 * SIZE(BO) 1306 1307 MADD c11, c11, a3, b6 1308 LD a2, 5 * SIZE(AO) 1309 MADD c21, c21, a3, b2 1310 NOP 1311 MADD c31, c31, a3, b3 1312 NOP 1313 MADD c41, c41, a3, b4 1314 LD a3, 6 * SIZE(AO) 1315 1316 MADD c12, c12, a2, b6 1317 LD b6, 24 * SIZE(BO) 1318 MADD c22, c22, a2, b2 1319 LD b2, 13 * SIZE(BO) 1320 MADD c32, c32, a2, b3 1321 LD b3, 14 * SIZE(BO) 1322 MADD c42, c42, a2, b4 1323 LD b4, 15 * SIZE(BO) 1324 1325 MADD c11, c11, a3, b7 1326 LD a2, 7 * SIZE(AO) 1327 MADD c21, c21, a3, b2 1328 daddiu AO, AO, 8 * SIZE 1329 MADD c31, c31, a3, b3 1330 daddiu BO, BO, 16 * SIZE 1331 MADD c41, c41, a3, b4 1332 LD a3, 4 * SIZE(AO) 1333 1334 MADD c12, c12, a2, b7 1335 LD b7, 12 * SIZE(BO) 1336 MADD c22, c22, a2, b2 1337 LD b2, 1 * SIZE(BO) 1338 MADD c32, c32, a2, b3 1339 LD b3, 2 * SIZE(BO) 1340 MADD c42, c42, a2, b4 1341 NOP 1342 1343 bgtz L, .L32 1344 LD b4, 3 * SIZE(BO) 1345 .align 3 1346 1347.L35: 1348#if defined(LT) || defined(RN) 1349 andi L, KK, 3 1350#else 1351 andi L, TEMP, 3 1352#endif 1353 NOP 1354 blez L, .L38 1355 NOP 1356 .align 3 1357 1358.L36: 1359 MADD c11, c11, a1, b1 1360 LD a2, 1 * SIZE(AO) 1361 MADD c21, c21, a1, b2 1362 daddiu L, L, -1 1363 MADD c31, c31, a1, b3 1364 daddiu AO, AO, 2 * SIZE 1365 MADD c41, c41, a1, b4 1366 LD a1, 0 * SIZE(AO) 1367 1368 MADD c12, c12, a2, b1 1369 LD b1, 4 * SIZE(BO) 1370 MADD c22, c22, a2, b2 1371 LD b2, 5 * SIZE(BO) 1372 MADD c32, c32, a2, b3 1373 LD b3, 6 * SIZE(BO) 1374 MADD c42, c42, a2, b4 1375 LD b4, 7 * SIZE(BO) 1376 1377 bgtz L, .L36 1378 daddiu BO, BO, 4 * SIZE 1379 1380.L38: 1381#if defined(LN) || defined(RT) 1382#ifdef LN 1383 daddiu TEMP, KK, -2 1384#else 1385 daddiu TEMP, KK, -4 1386#endif 1387 1388 dsll L, TEMP, 1 + BASE_SHIFT 1389 dsll TEMP, TEMP, 2 + BASE_SHIFT 1390 daddu AO, AORIG, L 1391 daddu BO, B, TEMP 1392#endif 1393 1394 1395#if defined(LN) || defined(LT) 1396 LD b1, 0 * SIZE(BO) 1397 LD b2, 1 * SIZE(BO) 1398 LD b3, 2 * SIZE(BO) 1399 LD b4, 3 * SIZE(BO) 1400 LD b5, 4 * SIZE(BO) 1401 LD b6, 5 * SIZE(BO) 1402 LD b7, 6 * SIZE(BO) 1403 LD b8, 7 * SIZE(BO) 1404 1405 SUB c11, b1, c11 1406 SUB c21, b2, c21 1407 SUB c31, b3, c31 1408 SUB c41, b4, c41 1409 SUB c12, b5, c12 1410 SUB c22, b6, c22 1411 SUB c32, b7, c32 1412 SUB c42, b8, c42 1413#else 1414 LD b1, 0 * SIZE(AO) 1415 LD b2, 1 * SIZE(AO) 1416 LD b3, 2 * SIZE(AO) 1417 LD b4, 3 * SIZE(AO) 1418 LD b5, 4 * SIZE(AO) 1419 LD b6, 5 * SIZE(AO) 1420 LD b7, 6 * SIZE(AO) 1421 LD b8, 7 * SIZE(AO) 1422 1423 SUB c11, b1, c11 1424 SUB c12, b2, c12 1425 SUB c21, b3, c21 1426 SUB c22, b4, c22 1427 SUB c31, b5, c31 1428 SUB c32, b6, c32 1429 SUB c41, b7, c41 1430 SUB c42, b8, c42 1431#endif 1432 1433#ifdef LN 1434 LD b1, 3 * SIZE(AO) 1435 LD b2, 2 * SIZE(AO) 1436 LD b3, 0 * SIZE(AO) 1437 1438 MUL c12, b1, c12 1439 MUL c22, b1, c22 1440 MUL c32, b1, c32 1441 MUL c42, b1, c42 1442 1443 NMSUB c11, c11, b2, c12 1444 NMSUB c21, c21, b2, c22 1445 NMSUB c31, c31, b2, c32 1446 NMSUB c41, c41, b2, c42 1447 1448 MUL c11, b3, c11 1449 MUL c21, b3, c21 1450 MUL c31, b3, c31 1451 MUL c41, b3, c41 1452#endif 1453 1454#ifdef LT 1455 LD b1, 0 * SIZE(AO) 1456 LD b2, 1 * SIZE(AO) 1457 LD b3, 3 * SIZE(AO) 1458 1459 MUL c11, b1, c11 1460 MUL c21, b1, c21 1461 MUL c31, b1, c31 1462 MUL c41, b1, c41 1463 1464 NMSUB c12, c12, b2, c11 1465 NMSUB c22, c22, b2, c21 1466 NMSUB c32, c32, b2, c31 1467 NMSUB c42, c42, b2, c41 1468 1469 MUL c12, b3, c12 1470 MUL c22, b3, c22 1471 MUL c32, b3, c32 1472 MUL c42, b3, c42 1473#endif 1474 1475#ifdef RN 1476 LD b1, 0 * SIZE(BO) 1477 LD b2, 1 * SIZE(BO) 1478 LD b3, 2 * SIZE(BO) 1479 LD b4, 3 * SIZE(BO) 1480 1481 MUL c11, b1, c11 1482 MUL c12, b1, c12 1483 1484 NMSUB c21, c21, b2, c11 1485 NMSUB c22, c22, b2, c12 1486 NMSUB c31, c31, b3, c11 1487 NMSUB c32, c32, b3, c12 1488 NMSUB c41, c41, b4, c11 1489 NMSUB c42, c42, b4, c12 1490 1491 LD b2, 5 * SIZE(BO) 1492 LD b3, 6 * SIZE(BO) 1493 LD b4, 7 * SIZE(BO) 1494 1495 MUL c21, b2, c21 1496 MUL c22, b2, c22 1497 1498 NMSUB c31, c31, b3, c21 1499 NMSUB c32, c32, b3, c22 1500 NMSUB c41, c41, b4, c21 1501 NMSUB c42, c42, b4, c22 1502 1503 LD b3, 10 * SIZE(BO) 1504 LD b4, 11 * SIZE(BO) 1505 1506 MUL c31, b3, c31 1507 MUL c32, b3, c32 1508 1509 NMSUB c41, c41, b4, c31 1510 NMSUB c42, c42, b4, c32 1511 1512 LD b4, 15 * SIZE(BO) 1513 1514 MUL c41, b4, c41 1515 MUL c42, b4, c42 1516#endif 1517 1518#ifdef RT 1519 LD b5, 15 * SIZE(BO) 1520 LD b6, 14 * SIZE(BO) 1521 LD b7, 13 * SIZE(BO) 1522 LD b8, 12 * SIZE(BO) 1523 1524 MUL c41, b5, c41 1525 MUL c42, b5, c42 1526 1527 NMSUB c31, c31, b6, c41 1528 NMSUB c32, c32, b6, c42 1529 NMSUB c21, c21, b7, c41 1530 NMSUB c22, c22, b7, c42 1531 NMSUB c11, c11, b8, c41 1532 NMSUB c12, c12, b8, c42 1533 1534 LD b6, 10 * SIZE(BO) 1535 LD b7, 9 * SIZE(BO) 1536 LD b8, 8 * SIZE(BO) 1537 1538 MUL c31, b6, c31 1539 MUL c32, b6, c32 1540 1541 NMSUB c21, c21, b7, c31 1542 NMSUB c22, c22, b7, c32 1543 NMSUB c11, c11, b8, c31 1544 NMSUB c12, c12, b8, c32 1545 1546 LD b7, 5 * SIZE(BO) 1547 LD b8, 4 * SIZE(BO) 1548 1549 MUL c21, b7, c21 1550 MUL c22, b7, c22 1551 1552 NMSUB c11, c11, b8, c21 1553 NMSUB c12, c12, b8, c22 1554 1555 LD b8, 0 * SIZE(BO) 1556 1557 MUL c11, b8, c11 1558 MUL c12, b8, c12 1559#endif 1560 1561#ifdef LN 1562 daddiu CO1, CO1, -2 * SIZE 1563 daddiu CO2, CO2, -2 * SIZE 1564 daddiu CO3, CO3, -2 * SIZE 1565 daddiu CO4, CO4, -2 * SIZE 1566#endif 1567 1568#if defined(LN) || defined(LT) 1569 ST c11, 0 * SIZE(BO) 1570 ST c21, 1 * SIZE(BO) 1571 ST c31, 2 * SIZE(BO) 1572 ST c41, 3 * SIZE(BO) 1573 ST c12, 4 * SIZE(BO) 1574 ST c22, 5 * SIZE(BO) 1575 ST c32, 6 * SIZE(BO) 1576 ST c42, 7 * SIZE(BO) 1577#else 1578 ST c11, 0 * SIZE(AO) 1579 ST c12, 1 * SIZE(AO) 1580 ST c21, 2 * SIZE(AO) 1581 ST c22, 3 * SIZE(AO) 1582 ST c31, 4 * SIZE(AO) 1583 ST c32, 5 * SIZE(AO) 1584 ST c41, 6 * SIZE(AO) 1585 ST c42, 7 * SIZE(AO) 1586#endif 1587 1588 ST c11, 0 * SIZE(CO1) 1589 ST c12, 1 * SIZE(CO1) 1590 ST c21, 0 * SIZE(CO2) 1591 ST c22, 1 * SIZE(CO2) 1592 ST c31, 0 * SIZE(CO3) 1593 ST c32, 1 * SIZE(CO3) 1594 ST c41, 0 * SIZE(CO4) 1595 ST c42, 1 * SIZE(CO4) 1596 1597#ifndef LN 1598 daddiu CO1, CO1, 2 * SIZE 1599 daddiu CO2, CO2, 2 * SIZE 1600 daddiu CO3, CO3, 2 * SIZE 1601 daddiu CO4, CO4, 2 * SIZE 1602#endif 1603 1604#ifdef RT 1605 dsll TEMP, K, 1 + BASE_SHIFT 1606 daddu AORIG, AORIG, TEMP 1607#endif 1608 1609#if defined(LT) || defined(RN) 1610 dsubu TEMP, K, KK 1611 dsll L, TEMP, 1 + BASE_SHIFT 1612 dsll TEMP, TEMP, 2 + BASE_SHIFT 1613 daddu AO, AO, L 1614 daddu BO, BO, TEMP 1615#endif 1616 1617#ifdef LT 1618 daddiu KK, KK, 2 1619#endif 1620 1621#ifdef LN 1622 daddiu KK, KK, -2 1623#endif 1624 1625 MTC $0, a1 1626 1627 MOV c11, a1 1628 MOV c21, a1 1629 MOV c31, a1 1630 1631 daddiu I, I, -1 1632 1633 bgtz I, .L31 1634 MOV c41, c11 1635 .align 3 1636 1637.L40: 1638 andi I, M, 1 1639 blez I, .L49 1640 MOV c61, c11 1641 1642#if defined(LT) || defined(RN) 1643 LD a1, 0 * SIZE(AO) 1644 MOV c71, c11 1645 LD a2, 1 * SIZE(AO) 1646 MOV c81, c11 1647 1648 LD b1, 0 * SIZE(B) 1649 LD b2, 1 * SIZE(B) 1650 LD b3, 2 * SIZE(B) 1651 LD b4, 3 * SIZE(B) 1652 LD b5, 4 * SIZE(B) 1653 LD b6, 8 * SIZE(B) 1654 LD b7, 12 * SIZE(B) 1655 1656 dsra L, KK, 2 1657 1658 blez L, .L45 1659 move BO, B 1660#else 1661#ifdef LN 1662 dsll TEMP, K, BASE_SHIFT 1663 dsubu AORIG, AORIG, TEMP 1664#endif 1665 1666 dsll L, KK, 0 + BASE_SHIFT 1667 dsll TEMP, KK, 2 + BASE_SHIFT 1668 1669 daddu AO, AORIG, L 1670 daddu BO, B, TEMP 1671 1672 dsubu TEMP, K, KK 1673 1674 LD a1, 0 * SIZE(AO) 1675 MOV c71, c11 1676 LD a2, 1 * SIZE(AO) 1677 MOV c81, c11 1678 1679 LD b1, 0 * SIZE(BO) 1680 LD b2, 1 * SIZE(BO) 1681 LD b3, 2 * SIZE(BO) 1682 LD b4, 3 * SIZE(BO) 1683 LD b5, 4 * SIZE(BO) 1684 LD b6, 8 * SIZE(BO) 1685 LD b7, 12 * SIZE(BO) 1686 1687 dsra L, TEMP, 2 1688 1689 blez L, .L45 1690 NOP 1691#endif 1692 .align 3 1693 1694.L42: 1695 MADD c11, c11, a1, b1 1696 LD b1, 16 * SIZE(BO) 1697 MADD c21, c21, a1, b2 1698 LD b2, 5 * SIZE(BO) 1699 MADD c31, c31, a1, b3 1700 LD b3, 6 * SIZE(BO) 1701 MADD c41, c41, a1, b4 1702 LD b4, 7 * SIZE(BO) 1703 1704 LD a1, 4 * SIZE(AO) 1705 daddiu L, L, -1 1706 1707 MADD c11, c11, a2, b5 1708 LD b5, 20 * SIZE(BO) 1709 MADD c21, c21, a2, b2 1710 LD b2, 9 * SIZE(BO) 1711 MADD c31, c31, a2, b3 1712 LD b3, 10 * SIZE(BO) 1713 MADD c41, c41, a2, b4 1714 LD b4, 11 * SIZE(BO) 1715 1716 LD a2, 2 * SIZE(AO) 1717 daddiu AO, AO, 4 * SIZE 1718 1719 MADD c11, c11, a2, b6 1720 LD b6, 24 * SIZE(BO) 1721 MADD c21, c21, a2, b2 1722 LD b2, 13 * SIZE(BO) 1723 MADD c31, c31, a2, b3 1724 LD b3, 14 * SIZE(BO) 1725 MADD c41, c41, a2, b4 1726 LD b4, 15 * SIZE(BO) 1727 1728 LD a2, -1 * SIZE(AO) 1729 daddiu BO, BO, 16 * SIZE 1730 1731 MADD c11, c11, a2, b7 1732 LD b7, 12 * SIZE(BO) 1733 MADD c21, c21, a2, b2 1734 LD b2, 1 * SIZE(BO) 1735 MADD c31, c31, a2, b3 1736 LD b3, 2 * SIZE(BO) 1737 MADD c41, c41, a2, b4 1738 LD b4, 3 * SIZE(BO) 1739 1740 bgtz L, .L42 1741 LD a2, 1 * SIZE(AO) 1742 .align 3 1743 1744.L45: 1745#if defined(LT) || defined(RN) 1746 andi L, KK, 3 1747#else 1748 andi L, TEMP, 3 1749#endif 1750 NOP 1751 blez L, .L48 1752 NOP 1753 .align 3 1754 1755.L46: 1756 MADD c11, c11, a1, b1 1757 LD b1, 4 * SIZE(BO) 1758 MADD c21, c21, a1, b2 1759 LD b2, 5 * SIZE(BO) 1760 MADD c31, c31, a1, b3 1761 LD b3, 6 * SIZE(BO) 1762 MADD c41, c41, a1, b4 1763 LD a1, 1 * SIZE(AO) 1764 1765 LD b4, 7 * SIZE(BO) 1766 daddiu L, L, -1 1767 1768 daddiu AO, AO, 1 * SIZE 1769 MOV a2, a2 1770 bgtz L, .L46 1771 daddiu BO, BO, 4 * SIZE 1772 1773 1774.L48: 1775#if defined(LN) || defined(RT) 1776#ifdef LN 1777 daddiu TEMP, KK, -1 1778#else 1779 daddiu TEMP, KK, -4 1780#endif 1781 1782 dsll L, TEMP, 0 + BASE_SHIFT 1783 dsll TEMP, TEMP, 2 + BASE_SHIFT 1784 daddu AO, AORIG, L 1785 daddu BO, B, TEMP 1786#endif 1787 1788 1789#if defined(LN) || defined(LT) 1790 LD b1, 0 * SIZE(BO) 1791 LD b2, 1 * SIZE(BO) 1792 LD b3, 2 * SIZE(BO) 1793 LD b4, 3 * SIZE(BO) 1794 1795 SUB c11, b1, c11 1796 SUB c21, b2, c21 1797 SUB c31, b3, c31 1798 SUB c41, b4, c41 1799#else 1800 LD b1, 0 * SIZE(AO) 1801 LD b2, 1 * SIZE(AO) 1802 LD b3, 2 * SIZE(AO) 1803 LD b4, 3 * SIZE(AO) 1804 1805 SUB c11, b1, c11 1806 SUB c21, b2, c21 1807 SUB c31, b3, c31 1808 SUB c41, b4, c41 1809#endif 1810 1811#if defined(LN) || defined(LT) 1812 LD b1, 0 * SIZE(AO) 1813 1814 MUL c11, b1, c11 1815 MUL c21, b1, c21 1816 MUL c31, b1, c31 1817 MUL c41, b1, c41 1818#endif 1819 1820#ifdef RN 1821 LD b1, 0 * SIZE(BO) 1822 LD b2, 1 * SIZE(BO) 1823 LD b3, 2 * SIZE(BO) 1824 LD b4, 3 * SIZE(BO) 1825 1826 MUL c11, b1, c11 1827 1828 NMSUB c21, c21, b2, c11 1829 NMSUB c31, c31, b3, c11 1830 NMSUB c41, c41, b4, c11 1831 1832 LD b2, 5 * SIZE(BO) 1833 LD b3, 6 * SIZE(BO) 1834 LD b4, 7 * SIZE(BO) 1835 1836 MUL c21, b2, c21 1837 1838 NMSUB c31, c31, b3, c21 1839 NMSUB c41, c41, b4, c21 1840 1841 LD b3, 10 * SIZE(BO) 1842 LD b4, 11 * SIZE(BO) 1843 1844 MUL c31, b3, c31 1845 1846 NMSUB c41, c41, b4, c31 1847 1848 LD b4, 15 * SIZE(BO) 1849 1850 MUL c41, b4, c41 1851#endif 1852 1853#ifdef RT 1854 LD b5, 15 * SIZE(BO) 1855 LD b6, 14 * SIZE(BO) 1856 LD b7, 13 * SIZE(BO) 1857 LD b8, 12 * SIZE(BO) 1858 1859 MUL c41, b5, c41 1860 1861 NMSUB c31, c31, b6, c41 1862 NMSUB c21, c21, b7, c41 1863 NMSUB c11, c11, b8, c41 1864 1865 LD b6, 10 * SIZE(BO) 1866 LD b7, 9 * SIZE(BO) 1867 LD b8, 8 * SIZE(BO) 1868 1869 MUL c31, b6, c31 1870 1871 NMSUB c21, c21, b7, c31 1872 NMSUB c11, c11, b8, c31 1873 1874 LD b7, 5 * SIZE(BO) 1875 LD b8, 4 * SIZE(BO) 1876 1877 MUL c21, b7, c21 1878 1879 NMSUB c11, c11, b8, c21 1880 1881 LD b8, 0 * SIZE(BO) 1882 1883 MUL c11, b8, c11 1884#endif 1885 1886#ifdef LN 1887 daddiu CO1, CO1, -1 * SIZE 1888 daddiu CO2, CO2, -1 * SIZE 1889 daddiu CO3, CO3, -1 * SIZE 1890 daddiu CO4, CO4, -1 * SIZE 1891#endif 1892 1893#if defined(LN) || defined(LT) 1894 ST c11, 0 * SIZE(BO) 1895 ST c21, 1 * SIZE(BO) 1896 ST c31, 2 * SIZE(BO) 1897 ST c41, 3 * SIZE(BO) 1898#else 1899 ST c11, 0 * SIZE(AO) 1900 ST c21, 1 * SIZE(AO) 1901 ST c31, 2 * SIZE(AO) 1902 ST c41, 3 * SIZE(AO) 1903#endif 1904 1905 ST c11, 0 * SIZE(CO1) 1906 ST c21, 0 * SIZE(CO2) 1907 ST c31, 0 * SIZE(CO3) 1908 ST c41, 0 * SIZE(CO4) 1909 1910#ifndef LN 1911 daddiu CO1, CO1, 1 * SIZE 1912 daddiu CO2, CO2, 1 * SIZE 1913 daddiu CO3, CO3, 1 * SIZE 1914 daddiu CO4, CO4, 1 * SIZE 1915#endif 1916 1917#ifdef RT 1918 dsll TEMP, K, BASE_SHIFT 1919 daddu AORIG, AORIG, TEMP 1920#endif 1921 1922#if defined(LT) || defined(RN) 1923 dsubu TEMP, K, KK 1924 dsll L, TEMP, 0 + BASE_SHIFT 1925 dsll TEMP, TEMP, 2 + BASE_SHIFT 1926 daddu AO, AO, L 1927 daddu BO, BO, TEMP 1928#endif 1929 1930#ifdef LT 1931 daddiu KK, KK, 1 1932#endif 1933 1934#ifdef LN 1935 daddiu KK, KK, -1 1936#endif 1937 .align 3 1938 1939.L49: 1940#ifdef LN 1941 dsll TEMP, K, 2 + BASE_SHIFT 1942 daddu B, B, TEMP 1943#endif 1944 1945#if defined(LT) || defined(RN) 1946 move B, BO 1947#endif 1948 1949#ifdef RN 1950 daddiu KK, KK, 4 1951#endif 1952 1953#ifdef RT 1954 daddiu KK, KK, -4 1955#endif 1956 .align 3 1957 1958.L70: 1959 dsra J, N, 3 1960 blez J, .L999 1961 nop 1962 1963.L10: 1964#ifdef RT 1965 dsll TEMP, K, 3 + BASE_SHIFT 1966 dsubu B, B, TEMP 1967 1968 dsll TEMP, LDC, 3 1969 dsubu C, C, TEMP 1970#endif 1971 1972 move CO1, C 1973 MTC $0, c11 1974 daddu CO2, C, LDC 1975 daddu CO3, CO2, LDC 1976 daddiu J, J, -1 1977 daddu CO4, CO3, LDC 1978 MOV c21, c11 1979 daddu CO5, CO4, LDC 1980 MOV c31, c11 1981 daddu CO6, CO5, LDC 1982 MOV c41, c11 1983 daddu CO7, CO6, LDC 1984 MOV c51, c11 1985 daddu CO8, CO7, LDC 1986 dsra I, M, 1 1987 1988#ifdef LN 1989 daddu KK, M, OFFSET 1990#endif 1991 1992#ifdef LT 1993 move KK, OFFSET 1994#endif 1995 1996#if defined(LN) || defined(RT) 1997 move AORIG, A 1998#else 1999 move AO, A 2000#endif 2001#ifndef RT 2002 daddu C, CO8, LDC 2003#endif 2004 2005 blez I, .L20 2006 MOV c61, c11 2007 2008.L11: 2009#if defined(LT) || defined(RN) 2010 LD a1, 0 * SIZE(AO) 2011 MOV c71, c11 2012 LD b1, 0 * SIZE(B) 2013 MOV c81, c11 2014 2015 LD a3, 4 * SIZE(AO) 2016 MOV c12, c11 2017 LD b2, 1 * SIZE(B) 2018 MOV c22, c11 2019 2020 dsra L, KK, 2 2021 MOV c32, c11 2022 LD b3, 2 * SIZE(B) 2023 MOV c42, c11 2024 2025 LD b4, 3 * SIZE(B) 2026 MOV c52, c11 2027 LD b5, 4 * SIZE(B) 2028 MOV c62, c11 2029 2030 LD b6, 8 * SIZE(B) 2031 MOV c72, c11 2032 LD b7, 12 * SIZE(B) 2033 MOV c82, c11 2034 2035 blez L, .L15 2036 move BO, B 2037#else 2038 2039#ifdef LN 2040 dsll TEMP, K, 1 + BASE_SHIFT 2041 dsubu AORIG, AORIG, TEMP 2042#endif 2043 2044 dsll L, KK, 1 + BASE_SHIFT 2045 dsll TEMP, KK, 3 + BASE_SHIFT 2046 2047 daddu AO, AORIG, L 2048 daddu BO, B, TEMP 2049 2050 dsubu TEMP, K, KK 2051 2052 LD a1, 0 * SIZE(AO) 2053 MOV c71, c11 2054 LD b1, 0 * SIZE(BO) 2055 MOV c81, c11 2056 2057 LD a3, 4 * SIZE(AO) 2058 MOV c12, c11 2059 LD b2, 1 * SIZE(BO) 2060 MOV c22, c11 2061 2062 MOV c32, c11 2063 LD b3, 2 * SIZE(BO) 2064 MOV c42, c11 2065 2066 LD b4, 3 * SIZE(BO) 2067 MOV c52, c11 2068 LD b5, 4 * SIZE(BO) 2069 MOV c62, c11 2070 2071 LD b6, 8 * SIZE(BO) 2072 MOV c72, c11 2073 LD b7, 12 * SIZE(BO) 2074 MOV c82, c11 2075 2076 dsra L, TEMP, 2 2077 blez L, .L15 2078 NOP 2079#endif 2080 2081 MADD c11, c11, a1, b1 2082 LD a2, 1 * SIZE(AO) 2083 MADD c21, c21, a1, b2 2084 daddiu L, L, -1 2085 MADD c31, c31, a1, b3 2086 blez L, .L13 2087 MADD c41, c41, a1, b4 2088 NOP 2089 .align 3 2090 2091.L12: 2092 MADD c12, c12, a2, b1 2093 LD b1, 16 * SIZE(BO) 2094 MADD c22, c22, a2, b2 2095 LD b2, 5 * SIZE(BO) 2096 MADD c32, c32, a2, b3 2097 LD b3, 6 * SIZE(BO) 2098 MADD c42, c42, a2, b4 2099 LD b4, 7 * SIZE(BO) 2100 2101 MADD c51, c51, a1, b5 2102 NOP 2103 MADD c61, c61, a1, b2 2104 LD a4, 2 * SIZE(AO) 2105 MADD c71, c71, a1, b3 2106 NOP 2107 MADD c81, c81, a1, b4 2108 LD a1, 8 * SIZE(AO) 2109 2110 MADD c52, c52, a2, b5 2111 LD b5, 20 * SIZE(BO) 2112 MADD c62, c62, a2, b2 2113 LD b2, 9 * SIZE(BO) 2114 MADD c72, c72, a2, b3 2115 LD b3, 10 * SIZE(BO) 2116 MADD c82, c82, a2, b4 2117 LD b4, 11 * SIZE(BO) 2118 2119 MADD c11, c11, a4, b6 2120 LD a2, 3 * SIZE(AO) 2121 MADD c21, c21, a4, b2 2122 NOP 2123 MADD c31, c31, a4, b3 2124 NOP 2125 MADD c41, c41, a4, b4 2126 NOP 2127 2128 MADD c12, c12, a2, b6 2129 LD b6, 24 * SIZE(BO) 2130 MADD c22, c22, a2, b2 2131 LD b2, 13 * SIZE(BO) 2132 MADD c32, c32, a2, b3 2133 LD b3, 14 * SIZE(BO) 2134 MADD c42, c42, a2, b4 2135 LD b4, 15 * SIZE(BO) 2136 2137 MADD c51, c51, a4, b7 2138 NOP 2139 MADD c61, c61, a4, b2 2140 NOP 2141 MADD c71, c71, a4, b3 2142 NOP 2143 MADD c81, c81, a4, b4 2144 NOP 2145 2146 MADD c52, c52, a2, b7 2147 LD b7, 28 * SIZE(BO) 2148 MADD c62, c62, a2, b2 2149 LD b2, 17 * SIZE(BO) 2150 MADD c72, c72, a2, b3 2151 LD b3, 18 * SIZE(BO) 2152 MADD c82, c82, a2, b4 2153 LD b4, 19 * SIZE(BO) 2154 2155 MADD c11, c11, a3, b1 2156 LD a2, 5 * SIZE(AO) 2157 MADD c21, c21, a3, b2 2158 NOP 2159 MADD c31, c31, a3, b3 2160 NOP 2161 MADD c41, c41, a3, b4 2162 NOP 2163 2164 MADD c12, c12, a2, b1 2165 LD b1, 32 * SIZE(BO) 2166 MADD c22, c22, a2, b2 2167 LD b2, 21 * SIZE(BO) 2168 MADD c32, c32, a2, b3 2169 LD b3, 22 * SIZE(BO) 2170 MADD c42, c42, a2, b4 2171 LD b4, 23 * SIZE(BO) 2172 2173 MADD c51, c51, a3, b5 2174 NOP 2175 MADD c61, c61, a3, b2 2176 LD a4, 6 * SIZE(AO) 2177 MADD c71, c71, a3, b3 2178 NOP 2179 MADD c81, c81, a3, b4 2180 LD a3, 12 * SIZE(AO) 2181 2182 MADD c52, c52, a2, b5 2183 LD b5, 36 * SIZE(BO) 2184 MADD c62, c62, a2, b2 2185 LD b2, 25 * SIZE(BO) 2186 MADD c72, c72, a2, b3 2187 LD b3, 26 * SIZE(BO) 2188 MADD c82, c82, a2, b4 2189 LD b4, 27 * SIZE(BO) 2190 2191 MADD c11, c11, a4, b6 2192 LD a2, 7 * SIZE(AO) 2193 MADD c21, c21, a4, b2 2194 NOP 2195 MADD c31, c31, a4, b3 2196 NOP 2197 MADD c41, c41, a4, b4 2198 daddiu L, L, -1 2199 2200 MADD c12, c12, a2, b6 2201 LD b6, 40 * SIZE(BO) 2202 MADD c22, c22, a2, b2 2203 LD b2, 29 * SIZE(BO) 2204 MADD c32, c32, a2, b3 2205 LD b3, 30 * SIZE(BO) 2206 MADD c42, c42, a2, b4 2207 LD b4, 31 * SIZE(BO) 2208 2209 MADD c51, c51, a4, b7 2210 daddiu BO, BO, 32 * SIZE 2211 MADD c61, c61, a4, b2 2212 daddiu AO, AO, 8 * SIZE 2213 MADD c71, c71, a4, b3 2214 NOP 2215 MADD c81, c81, a4, b4 2216 NOP 2217 2218 MADD c52, c52, a2, b7 2219 LD b7, 12 * SIZE(BO) 2220 MADD c62, c62, a2, b2 2221 LD b2, 1 * SIZE(BO) 2222 MADD c72, c72, a2, b3 2223 LD b3, 2 * SIZE(BO) 2224 MADD c82, c82, a2, b4 2225 LD b4, 3 * SIZE(BO) 2226 2227 MADD c11, c11, a1, b1 2228 LD a2, 1 * SIZE(AO) 2229 MADD c21, c21, a1, b2 2230 NOP 2231 MADD c31, c31, a1, b3 2232 bgtz L, .L12 2233 MADD c41, c41, a1, b4 2234 NOP 2235 .align 3 2236 2237.L13: 2238 MADD c12, c12, a2, b1 2239 LD b1, 16 * SIZE(BO) 2240 MADD c22, c22, a2, b2 2241 LD b2, 5 * SIZE(BO) 2242 MADD c32, c32, a2, b3 2243 LD b3, 6 * SIZE(BO) 2244 MADD c42, c42, a2, b4 2245 LD b4, 7 * SIZE(BO) 2246 2247 MADD c51, c51, a1, b5 2248 NOP 2249 MADD c61, c61, a1, b2 2250 LD a4, 2 * SIZE(AO) 2251 MADD c71, c71, a1, b3 2252 NOP 2253 MADD c81, c81, a1, b4 2254 LD a1, 8 * SIZE(AO) 2255 2256 MADD c52, c52, a2, b5 2257 LD b5, 20 * SIZE(BO) 2258 MADD c62, c62, a2, b2 2259 LD b2, 9 * SIZE(BO) 2260 MADD c72, c72, a2, b3 2261 LD b3, 10 * SIZE(BO) 2262 MADD c82, c82, a2, b4 2263 LD b4, 11 * SIZE(BO) 2264 2265 MADD c11, c11, a4, b6 2266 LD a2, 3 * SIZE(AO) 2267 MADD c21, c21, a4, b2 2268 NOP 2269 MADD c31, c31, a4, b3 2270 NOP 2271 MADD c41, c41, a4, b4 2272 NOP 2273 2274 MADD c12, c12, a2, b6 2275 LD b6, 24 * SIZE(BO) 2276 MADD c22, c22, a2, b2 2277 LD b2, 13 * SIZE(BO) 2278 MADD c32, c32, a2, b3 2279 LD b3, 14 * SIZE(BO) 2280 MADD c42, c42, a2, b4 2281 LD b4, 15 * SIZE(BO) 2282 2283 MADD c51, c51, a4, b7 2284 NOP 2285 MADD c61, c61, a4, b2 2286 NOP 2287 MADD c71, c71, a4, b3 2288 NOP 2289 MADD c81, c81, a4, b4 2290 NOP 2291 2292 MADD c52, c52, a2, b7 2293 LD b7, 28 * SIZE(BO) 2294 MADD c62, c62, a2, b2 2295 LD b2, 17 * SIZE(BO) 2296 MADD c72, c72, a2, b3 2297 LD b3, 18 * SIZE(BO) 2298 MADD c82, c82, a2, b4 2299 LD b4, 19 * SIZE(BO) 2300 2301 MADD c11, c11, a3, b1 2302 LD a2, 5 * SIZE(AO) 2303 MADD c21, c21, a3, b2 2304 NOP 2305 MADD c31, c31, a3, b3 2306 NOP 2307 MADD c41, c41, a3, b4 2308 NOP 2309 2310 MADD c12, c12, a2, b1 2311 LD b1, 32 * SIZE(BO) 2312 MADD c22, c22, a2, b2 2313 LD b2, 21 * SIZE(BO) 2314 MADD c32, c32, a2, b3 2315 LD b3, 22 * SIZE(BO) 2316 MADD c42, c42, a2, b4 2317 LD b4, 23 * SIZE(BO) 2318 2319 MADD c51, c51, a3, b5 2320 NOP 2321 MADD c61, c61, a3, b2 2322 LD a4, 6 * SIZE(AO) 2323 MADD c71, c71, a3, b3 2324 NOP 2325 MADD c81, c81, a3, b4 2326 LD a3, 12 * SIZE(AO) 2327 2328 MADD c52, c52, a2, b5 2329 LD b5, 36 * SIZE(BO) 2330 MADD c62, c62, a2, b2 2331 LD b2, 25 * SIZE(BO) 2332 MADD c72, c72, a2, b3 2333 LD b3, 26 * SIZE(BO) 2334 MADD c82, c82, a2, b4 2335 LD b4, 27 * SIZE(BO) 2336 2337 MADD c11, c11, a4, b6 2338 LD a2, 7 * SIZE(AO) 2339 MADD c21, c21, a4, b2 2340 NOP 2341 MADD c31, c31, a4, b3 2342 NOP 2343 MADD c41, c41, a4, b4 2344 NOP 2345 2346 MADD c12, c12, a2, b6 2347 LD b6, 40 * SIZE(BO) 2348 MADD c22, c22, a2, b2 2349 LD b2, 29 * SIZE(BO) 2350 MADD c32, c32, a2, b3 2351 LD b3, 30 * SIZE(BO) 2352 MADD c42, c42, a2, b4 2353 LD b4, 31 * SIZE(BO) 2354 2355 MADD c51, c51, a4, b7 2356 daddiu BO, BO, 32 * SIZE 2357 MADD c61, c61, a4, b2 2358 daddiu AO, AO, 8 * SIZE 2359 MADD c71, c71, a4, b3 2360 NOP 2361 MADD c81, c81, a4, b4 2362 NOP 2363 2364 MADD c52, c52, a2, b7 2365 LD b7, 12 * SIZE(BO) 2366 MADD c62, c62, a2, b2 2367 LD b2, 1 * SIZE(BO) 2368 MADD c72, c72, a2, b3 2369 LD b3, 2 * SIZE(BO) 2370 MADD c82, c82, a2, b4 2371 LD b4, 3 * SIZE(BO) 2372 .align 3 2373 2374.L15: 2375#if defined(LT) || defined(RN) 2376 andi L, KK, 3 2377#else 2378 andi L, TEMP, 3 2379#endif 2380 blez L, .L18 2381 NOP 2382 .align 3 2383 2384.L16: 2385 MADD c11, c11, a1, b1 2386 LD a2, 1 * SIZE(AO) 2387 MADD c21, c21, a1, b2 2388 NOP 2389 MADD c31, c31, a1, b3 2390 NOP 2391 MADD c41, c41, a1, b4 2392 NOP 2393 2394 MADD c12, c12, a2, b1 2395 LD b1, 8 * SIZE(BO) 2396 MADD c22, c22, a2, b2 2397 LD b2, 5 * SIZE(BO) 2398 MADD c32, c32, a2, b3 2399 LD b3, 6 * SIZE(BO) 2400 MADD c42, c42, a2, b4 2401 LD b4, 7 * SIZE(BO) 2402 2403 MADD c51, c51, a1, b5 2404 daddiu L, L, -1 2405 MADD c61, c61, a1, b2 2406 daddiu AO, AO, 2 * SIZE 2407 MADD c71, c71, a1, b3 2408 daddiu BO, BO, 8 * SIZE 2409 MADD c81, c81, a1, b4 2410 LD a1, 0 * SIZE(AO) 2411 2412 MADD c52, c52, a2, b5 2413 LD b5, 4 * SIZE(BO) 2414 MADD c62, c62, a2, b2 2415 LD b2, 1 * SIZE(BO) 2416 MADD c72, c72, a2, b3 2417 LD b3, 2 * SIZE(BO) 2418 MADD c82, c82, a2, b4 2419 bgtz L, .L16 2420 LD b4, 3 * SIZE(BO) 2421 2422.L18: 2423#if defined(LN) || defined(RT) 2424#ifdef LN 2425 daddiu TEMP, KK, -2 2426#else 2427 daddiu TEMP, KK, -8 2428#endif 2429 2430 dsll L, TEMP, 1 + BASE_SHIFT 2431 dsll TEMP, TEMP, 3 + BASE_SHIFT 2432 daddu AO, AORIG, L 2433 daddu BO, B, TEMP 2434#endif 2435 2436#if defined(LN) || defined(LT) 2437 LD b1, 0 * SIZE(BO) 2438 LD b2, 1 * SIZE(BO) 2439 LD b3, 2 * SIZE(BO) 2440 LD b4, 3 * SIZE(BO) 2441 2442 SUB c11, b1, c11 2443 LD b5, 4 * SIZE(BO) 2444 SUB c21, b2, c21 2445 LD b6, 5 * SIZE(BO) 2446 SUB c31, b3, c31 2447 LD b7, 6 * SIZE(BO) 2448 SUB c41, b4, c41 2449 LD b8, 7 * SIZE(BO) 2450 2451 SUB c51, b5, c51 2452 LD b1, 8 * SIZE(BO) 2453 SUB c61, b6, c61 2454 LD b2, 9 * SIZE(BO) 2455 SUB c71, b7, c71 2456 LD b3, 10 * SIZE(BO) 2457 SUB c81, b8, c81 2458 LD b4, 11 * SIZE(BO) 2459 2460 SUB c12, b1, c12 2461 LD b5, 12 * SIZE(BO) 2462 SUB c22, b2, c22 2463 LD b6, 13 * SIZE(BO) 2464 SUB c32, b3, c32 2465 LD b7, 14 * SIZE(BO) 2466 SUB c42, b4, c42 2467 LD b8, 15 * SIZE(BO) 2468 2469 SUB c52, b5, c52 2470#ifdef LN 2471 LD b1, 3 * SIZE(AO) 2472#else 2473 LD b1, 0 * SIZE(AO) 2474#endif 2475 SUB c62, b6, c62 2476 SUB c72, b7, c72 2477 SUB c82, b8, c82 2478#else 2479 LD b1, 0 * SIZE(AO) 2480 LD b2, 1 * SIZE(AO) 2481 LD b3, 2 * SIZE(AO) 2482 LD b4, 3 * SIZE(AO) 2483 2484 SUB c11, b1, c11 2485 LD b5, 4 * SIZE(AO) 2486 SUB c12, b2, c12 2487 LD b6, 5 * SIZE(AO) 2488 SUB c21, b3, c21 2489 LD b7, 6 * SIZE(AO) 2490 SUB c22, b4, c22 2491 LD b8, 7 * SIZE(AO) 2492 2493 SUB c31, b5, c31 2494 LD b1, 8 * SIZE(AO) 2495 SUB c32, b6, c32 2496 LD b2, 9 * SIZE(AO) 2497 SUB c41, b7, c41 2498 LD b3, 10 * SIZE(AO) 2499 SUB c42, b8, c42 2500 LD b4, 11 * SIZE(AO) 2501 2502 LD b5, 12 * SIZE(AO) 2503 SUB c51, b1, c51 2504 LD b6, 13 * SIZE(AO) 2505 SUB c52, b2, c52 2506 LD b7, 14 * SIZE(AO) 2507 SUB c61, b3, c61 2508 LD b8, 15 * SIZE(AO) 2509 SUB c62, b4, c62 2510 2511 SUB c71, b5, c71 2512 SUB c72, b6, c72 2513 SUB c81, b7, c81 2514 SUB c82, b8, c82 2515#endif 2516 2517#ifdef LN 2518 MUL c12, b1, c12 2519 LD b2, 2 * SIZE(AO) 2520 MUL c22, b1, c22 2521 MUL c32, b1, c32 2522 MUL c42, b1, c42 2523 MUL c52, b1, c52 2524 MUL c62, b1, c62 2525 MUL c72, b1, c72 2526 MUL c82, b1, c82 2527 2528 NMSUB c11, c11, b2, c12 2529 LD b3, 0 * SIZE(AO) 2530 NMSUB c21, c21, b2, c22 2531 NMSUB c31, c31, b2, c32 2532 NMSUB c41, c41, b2, c42 2533 NMSUB c51, c51, b2, c52 2534 NMSUB c61, c61, b2, c62 2535 NMSUB c71, c71, b2, c72 2536 NMSUB c81, c81, b2, c82 2537 2538 MUL c11, b3, c11 2539 daddiu CO1, CO1, -2 * SIZE 2540 MUL c21, b3, c21 2541 daddiu CO2, CO2, -2 * SIZE 2542 MUL c31, b3, c31 2543 daddiu CO3, CO3, -2 * SIZE 2544 MUL c41, b3, c41 2545 daddiu CO4, CO4, -2 * SIZE 2546 MUL c51, b3, c51 2547 daddiu CO5, CO5, -2 * SIZE 2548 MUL c61, b3, c61 2549 daddiu CO6, CO6, -2 * SIZE 2550 MUL c71, b3, c71 2551 daddiu CO7, CO7, -2 * SIZE 2552 MUL c81, b3, c81 2553 daddiu CO8, CO8, -2 * SIZE 2554#endif 2555 2556#ifdef LT 2557 MUL c11, b1, c11 2558 LD b2, 1 * SIZE(AO) 2559 MUL c21, b1, c21 2560 MUL c31, b1, c31 2561 MUL c41, b1, c41 2562 MUL c51, b1, c51 2563 MUL c61, b1, c61 2564 MUL c71, b1, c71 2565 MUL c81, b1, c81 2566 2567 NMSUB c12, c12, b2, c11 2568 LD b3, 3 * SIZE(AO) 2569 NMSUB c22, c22, b2, c21 2570 NMSUB c32, c32, b2, c31 2571 NMSUB c42, c42, b2, c41 2572 NMSUB c52, c52, b2, c51 2573 NMSUB c62, c62, b2, c61 2574 NMSUB c72, c72, b2, c71 2575 NMSUB c82, c82, b2, c81 2576 2577 MUL c12, b3, c12 2578 MUL c22, b3, c22 2579 MUL c32, b3, c32 2580 MUL c42, b3, c42 2581 MUL c52, b3, c52 2582 MUL c62, b3, c62 2583 MUL c72, b3, c72 2584 MUL c82, b3, c82 2585#endif 2586 2587#ifdef RN 2588 LD b1, 0 * SIZE(BO) 2589 LD b2, 1 * SIZE(BO) 2590 LD b3, 2 * SIZE(BO) 2591 LD b4, 3 * SIZE(BO) 2592 2593 MUL c11, b1, c11 2594 MUL c12, b1, c12 2595 LD b5, 4 * SIZE(BO) 2596 2597 NMSUB c21, c21, b2, c11 2598 NMSUB c22, c22, b2, c12 2599 LD b6, 5 * SIZE(BO) 2600 NMSUB c31, c31, b3, c11 2601 NMSUB c32, c32, b3, c12 2602 LD b7, 6 * SIZE(BO) 2603 NMSUB c41, c41, b4, c11 2604 NMSUB c42, c42, b4, c12 2605 LD b8, 7 * SIZE(BO) 2606 2607 NMSUB c51, c51, b5, c11 2608 NMSUB c52, c52, b5, c12 2609 LD b2, 9 * SIZE(BO) 2610 NMSUB c61, c61, b6, c11 2611 NMSUB c62, c62, b6, c12 2612 LD b3, 10 * SIZE(BO) 2613 NMSUB c71, c71, b7, c11 2614 NMSUB c72, c72, b7, c12 2615 LD b4, 11 * SIZE(BO) 2616 NMSUB c81, c81, b8, c11 2617 NMSUB c82, c82, b8, c12 2618 LD b5, 12 * SIZE(BO) 2619 2620 MUL c21, b2, c21 2621 MUL c22, b2, c22 2622 LD b6, 13 * SIZE(BO) 2623 2624 NMSUB c31, c31, b3, c21 2625 NMSUB c32, c32, b3, c22 2626 LD b7, 14 * SIZE(BO) 2627 NMSUB c41, c41, b4, c21 2628 NMSUB c42, c42, b4, c22 2629 LD b8, 15 * SIZE(BO) 2630 NMSUB c51, c51, b5, c21 2631 NMSUB c52, c52, b5, c22 2632 LD b3, 18 * SIZE(BO) 2633 NMSUB c61, c61, b6, c21 2634 NMSUB c62, c62, b6, c22 2635 LD b4, 19 * SIZE(BO) 2636 NMSUB c71, c71, b7, c21 2637 NMSUB c72, c72, b7, c22 2638 LD b5, 20 * SIZE(BO) 2639 NMSUB c81, c81, b8, c21 2640 NMSUB c82, c82, b8, c22 2641 LD b6, 21 * SIZE(BO) 2642 2643 MUL c31, b3, c31 2644 MUL c32, b3, c32 2645 LD b7, 22 * SIZE(BO) 2646 2647 NMSUB c41, c41, b4, c31 2648 NMSUB c42, c42, b4, c32 2649 LD b8, 23 * SIZE(BO) 2650 NMSUB c51, c51, b5, c31 2651 NMSUB c52, c52, b5, c32 2652 LD b4, 27 * SIZE(BO) 2653 NMSUB c61, c61, b6, c31 2654 NMSUB c62, c62, b6, c32 2655 LD b5, 28 * SIZE(BO) 2656 NMSUB c71, c71, b7, c31 2657 NMSUB c72, c72, b7, c32 2658 LD b6, 29 * SIZE(BO) 2659 NMSUB c81, c81, b8, c31 2660 NMSUB c82, c82, b8, c32 2661 LD b7, 30 * SIZE(BO) 2662 2663 MUL c41, b4, c41 2664 MUL c42, b4, c42 2665 LD b8, 31 * SIZE(BO) 2666 2667 NMSUB c51, c51, b5, c41 2668 NMSUB c52, c52, b5, c42 2669 LD b5, 36 * SIZE(BO) 2670 NMSUB c61, c61, b6, c41 2671 NMSUB c62, c62, b6, c42 2672 LD b6, 37 * SIZE(BO) 2673 NMSUB c71, c71, b7, c41 2674 NMSUB c72, c72, b7, c42 2675 LD b7, 38 * SIZE(BO) 2676 NMSUB c81, c81, b8, c41 2677 NMSUB c82, c82, b8, c42 2678 LD b8, 39 * SIZE(BO) 2679 2680 MUL c51, b5, c51 2681 MUL c52, b5, c52 2682 2683 NMSUB c61, c61, b6, c51 2684 NMSUB c62, c62, b6, c52 2685 LD b6, 45 * SIZE(BO) 2686 NMSUB c71, c71, b7, c51 2687 NMSUB c72, c72, b7, c52 2688 LD b7, 46 * SIZE(BO) 2689 NMSUB c81, c81, b8, c51 2690 NMSUB c82, c82, b8, c52 2691 LD b8, 47 * SIZE(BO) 2692 2693 MUL c61, b6, c61 2694 MUL c62, b6, c62 2695 2696 NMSUB c71, c71, b7, c61 2697 NMSUB c72, c72, b7, c62 2698 LD b7, 54 * SIZE(BO) 2699 NMSUB c81, c81, b8, c61 2700 NMSUB c82, c82, b8, c62 2701 LD b8, 55 * SIZE(BO) 2702 2703 MUL c71, b7, c71 2704 MUL c72, b7, c72 2705 2706 NMSUB c81, c81, b8, c71 2707 NMSUB c82, c82, b8, c72 2708 LD b8, 63 * SIZE(BO) 2709 2710 MUL c81, b8, c81 2711 MUL c82, b8, c82 2712#endif 2713 2714#ifdef RT 2715 LD b1, 63 * SIZE(BO) 2716 LD b2, 62 * SIZE(BO) 2717 LD b3, 61 * SIZE(BO) 2718 LD b4, 60 * SIZE(BO) 2719 2720 MUL c81, b1, c81 2721 MUL c82, b1, c82 2722 LD b5, 59 * SIZE(BO) 2723 2724 NMSUB c71, c71, b2, c81 2725 NMSUB c72, c72, b2, c82 2726 LD b6, 58 * SIZE(BO) 2727 NMSUB c61, c61, b3, c81 2728 NMSUB c62, c62, b3, c82 2729 LD b7, 57 * SIZE(BO) 2730 NMSUB c51, c51, b4, c81 2731 NMSUB c52, c52, b4, c82 2732 LD b8, 56 * SIZE(BO) 2733 2734 NMSUB c41, c41, b5, c81 2735 NMSUB c42, c42, b5, c82 2736 LD b2, 54 * SIZE(BO) 2737 NMSUB c31, c31, b6, c81 2738 NMSUB c32, c32, b6, c82 2739 LD b3, 53 * SIZE(BO) 2740 NMSUB c21, c21, b7, c81 2741 NMSUB c22, c22, b7, c82 2742 LD b4, 52 * SIZE(BO) 2743 NMSUB c11, c11, b8, c81 2744 NMSUB c12, c12, b8, c82 2745 LD b5, 51 * SIZE(BO) 2746 2747 MUL c71, b2, c71 2748 MUL c72, b2, c72 2749 LD b6, 50 * SIZE(BO) 2750 2751 NMSUB c61, c61, b3, c71 2752 NMSUB c62, c62, b3, c72 2753 LD b7, 49 * SIZE(BO) 2754 NMSUB c51, c51, b4, c71 2755 NMSUB c52, c52, b4, c72 2756 LD b8, 48 * SIZE(BO) 2757 NMSUB c41, c41, b5, c71 2758 NMSUB c42, c42, b5, c72 2759 LD b3, 45 * SIZE(BO) 2760 NMSUB c31, c31, b6, c71 2761 NMSUB c32, c32, b6, c72 2762 LD b4, 44 * SIZE(BO) 2763 NMSUB c21, c21, b7, c71 2764 NMSUB c22, c22, b7, c72 2765 LD b5, 43 * SIZE(BO) 2766 NMSUB c11, c11, b8, c71 2767 NMSUB c12, c12, b8, c72 2768 LD b6, 42 * SIZE(BO) 2769 2770 MUL c61, b3, c61 2771 MUL c62, b3, c62 2772 LD b7, 41 * SIZE(BO) 2773 2774 NMSUB c51, c51, b4, c61 2775 NMSUB c52, c52, b4, c62 2776 LD b8, 40 * SIZE(BO) 2777 NMSUB c41, c41, b5, c61 2778 NMSUB c42, c42, b5, c62 2779 LD b4, 36 * SIZE(BO) 2780 NMSUB c31, c31, b6, c61 2781 NMSUB c32, c32, b6, c62 2782 LD b5, 35 * SIZE(BO) 2783 NMSUB c21, c21, b7, c61 2784 NMSUB c22, c22, b7, c62 2785 LD b6, 34 * SIZE(BO) 2786 NMSUB c11, c11, b8, c61 2787 NMSUB c12, c12, b8, c62 2788 LD b7, 33 * SIZE(BO) 2789 2790 MUL c51, b4, c51 2791 MUL c52, b4, c52 2792 LD b8, 32 * SIZE(BO) 2793 2794 NMSUB c41, c41, b5, c51 2795 NMSUB c42, c42, b5, c52 2796 LD b5, 27 * SIZE(BO) 2797 NMSUB c31, c31, b6, c51 2798 NMSUB c32, c32, b6, c52 2799 LD b6, 26 * SIZE(BO) 2800 NMSUB c21, c21, b7, c51 2801 NMSUB c22, c22, b7, c52 2802 LD b7, 25 * SIZE(BO) 2803 NMSUB c11, c11, b8, c51 2804 NMSUB c12, c12, b8, c52 2805 LD b8, 24 * SIZE(BO) 2806 2807 MUL c41, b5, c41 2808 MUL c42, b5, c42 2809 2810 NMSUB c31, c31, b6, c41 2811 NMSUB c32, c32, b6, c42 2812 LD b6, 18 * SIZE(BO) 2813 NMSUB c21, c21, b7, c41 2814 NMSUB c22, c22, b7, c42 2815 LD b7, 17 * SIZE(BO) 2816 NMSUB c11, c11, b8, c41 2817 NMSUB c12, c12, b8, c42 2818 LD b8, 16 * SIZE(BO) 2819 2820 MUL c31, b6, c31 2821 MUL c32, b6, c32 2822 2823 NMSUB c21, c21, b7, c31 2824 NMSUB c22, c22, b7, c32 2825 LD b7, 9 * SIZE(BO) 2826 NMSUB c11, c11, b8, c31 2827 NMSUB c12, c12, b8, c32 2828 LD b8, 8 * SIZE(BO) 2829 2830 MUL c21, b7, c21 2831 MUL c22, b7, c22 2832 2833 NMSUB c11, c11, b8, c21 2834 NMSUB c12, c12, b8, c22 2835 LD b8, 0 * SIZE(BO) 2836 2837 MUL c11, b8, c11 2838 MUL c12, b8, c12 2839#endif 2840 2841#if defined(LN) || defined(LT) 2842 ST c11, 0 * SIZE(BO) 2843 ST c21, 1 * SIZE(BO) 2844 ST c31, 2 * SIZE(BO) 2845 ST c41, 3 * SIZE(BO) 2846 ST c51, 4 * SIZE(BO) 2847 ST c61, 5 * SIZE(BO) 2848 ST c71, 6 * SIZE(BO) 2849 ST c81, 7 * SIZE(BO) 2850 2851 ST c12, 8 * SIZE(BO) 2852 ST c22, 9 * SIZE(BO) 2853 ST c32, 10 * SIZE(BO) 2854 ST c42, 11 * SIZE(BO) 2855 ST c52, 12 * SIZE(BO) 2856 ST c62, 13 * SIZE(BO) 2857 ST c72, 14 * SIZE(BO) 2858 ST c82, 15 * SIZE(BO) 2859#else 2860 ST c11, 0 * SIZE(AO) 2861 ST c12, 1 * SIZE(AO) 2862 ST c21, 2 * SIZE(AO) 2863 ST c22, 3 * SIZE(AO) 2864 ST c31, 4 * SIZE(AO) 2865 ST c32, 5 * SIZE(AO) 2866 ST c41, 6 * SIZE(AO) 2867 ST c42, 7 * SIZE(AO) 2868 2869 ST c51, 8 * SIZE(AO) 2870 ST c52, 9 * SIZE(AO) 2871 ST c61, 10 * SIZE(AO) 2872 ST c62, 11 * SIZE(AO) 2873 ST c71, 12 * SIZE(AO) 2874 ST c72, 13 * SIZE(AO) 2875 ST c81, 14 * SIZE(AO) 2876 ST c82, 15 * SIZE(AO) 2877#endif 2878 2879 ST c11, 0 * SIZE(CO1) 2880 ST c12, 1 * SIZE(CO1) 2881 ST c21, 0 * SIZE(CO2) 2882 ST c22, 1 * SIZE(CO2) 2883 ST c31, 0 * SIZE(CO3) 2884 ST c32, 1 * SIZE(CO3) 2885 ST c41, 0 * SIZE(CO4) 2886 ST c42, 1 * SIZE(CO4) 2887 ST c51, 0 * SIZE(CO5) 2888 ST c52, 1 * SIZE(CO5) 2889 ST c61, 0 * SIZE(CO6) 2890 ST c62, 1 * SIZE(CO6) 2891 ST c71, 0 * SIZE(CO7) 2892 ST c72, 1 * SIZE(CO7) 2893 ST c81, 0 * SIZE(CO8) 2894 ST c82, 1 * SIZE(CO8) 2895 2896 MTC $0, a1 2897 2898#ifndef LN 2899 daddiu CO1, CO1, 2 * SIZE 2900 daddiu CO2, CO2, 2 * SIZE 2901 daddiu CO3, CO3, 2 * SIZE 2902 daddiu CO4, CO4, 2 * SIZE 2903 daddiu CO5, CO5, 2 * SIZE 2904 daddiu CO6, CO6, 2 * SIZE 2905 daddiu CO7, CO7, 2 * SIZE 2906 daddiu CO8, CO8, 2 * SIZE 2907#endif 2908 2909 MOV c11, a1 2910 MOV c21, a1 2911 2912#ifdef RT 2913 dsll TEMP, K, 1 + BASE_SHIFT 2914 daddu AORIG, AORIG, TEMP 2915#endif 2916 2917 MOV c31, a1 2918 MOV c41, a1 2919 2920#if defined(LT) || defined(RN) 2921 dsubu TEMP, K, KK 2922 dsll L, TEMP, 1 + BASE_SHIFT 2923 dsll TEMP, TEMP, 3 + BASE_SHIFT 2924 daddu AO, AO, L 2925 daddu BO, BO, TEMP 2926#endif 2927 2928#ifdef LT 2929 daddiu KK, KK, 2 2930#endif 2931 2932#ifdef LN 2933 daddiu KK, KK, -2 2934#endif 2935 2936 daddiu I, I, -1 2937 MOV c51, a1 2938 2939 bgtz I, .L11 2940 MOV c61, a1 2941 .align 3 2942 2943.L20: 2944 andi I, M, 1 2945 MOV c61, c11 2946 blez I, .L29 2947 MOV c71, c11 2948 2949#if defined(LT) || defined(RN) 2950 LD a1, 0 * SIZE(AO) 2951 LD a2, 1 * SIZE(AO) 2952 LD a3, 2 * SIZE(AO) 2953 LD a4, 3 * SIZE(AO) 2954 2955 LD b1, 0 * SIZE(B) 2956 LD b2, 1 * SIZE(B) 2957 LD b3, 2 * SIZE(B) 2958 LD b4, 3 * SIZE(B) 2959 LD b5, 4 * SIZE(B) 2960 LD b6, 8 * SIZE(B) 2961 LD b7, 12 * SIZE(B) 2962 2963 dsra L, KK, 2 2964 MOV c81, c11 2965 2966 blez L, .L25 2967 move BO, B 2968#else 2969 2970#ifdef LN 2971 dsll TEMP, K, 0 + BASE_SHIFT 2972 dsubu AORIG, AORIG, TEMP 2973#endif 2974 2975 dsll L, KK, 0 + BASE_SHIFT 2976 dsll TEMP, KK, 3 + BASE_SHIFT 2977 2978 daddu AO, AORIG, L 2979 daddu BO, B, TEMP 2980 2981 dsubu TEMP, K, KK 2982 2983 LD a1, 0 * SIZE(AO) 2984 LD a2, 1 * SIZE(AO) 2985 LD a3, 2 * SIZE(AO) 2986 LD a4, 3 * SIZE(AO) 2987 2988 LD b1, 0 * SIZE(BO) 2989 LD b2, 1 * SIZE(BO) 2990 LD b3, 2 * SIZE(BO) 2991 LD b4, 3 * SIZE(BO) 2992 LD b5, 4 * SIZE(BO) 2993 LD b6, 8 * SIZE(BO) 2994 LD b7, 12 * SIZE(BO) 2995 2996 dsra L, TEMP, 2 2997 MOV c81, c11 2998 2999 blez L, .L25 3000 NOP 3001#endif 3002 .align 3 3003 3004.L22: 3005 MADD c11, c11, a1, b1 3006 LD b1, 16 * SIZE(BO) 3007 MADD c21, c21, a1, b2 3008 LD b2, 5 * SIZE(BO) 3009 MADD c31, c31, a1, b3 3010 LD b3, 6 * SIZE(BO) 3011 MADD c41, c41, a1, b4 3012 LD b4, 7 * SIZE(BO) 3013 3014 MADD c51, c51, a1, b5 3015 LD b5, 20 * SIZE(BO) 3016 MADD c61, c61, a1, b2 3017 LD b2, 9 * SIZE(BO) 3018 MADD c71, c71, a1, b3 3019 LD b3, 10 * SIZE(BO) 3020 MADD c81, c81, a1, b4 3021 LD b4, 11 * SIZE(BO) 3022 3023 LD a1, 4 * SIZE(AO) 3024 daddiu L, L, -1 3025 3026 MADD c11, c11, a2, b6 3027 LD b6, 24 * SIZE(BO) 3028 MADD c21, c21, a2, b2 3029 LD b2, 13 * SIZE(BO) 3030 MADD c31, c31, a2, b3 3031 LD b3, 14 * SIZE(BO) 3032 MADD c41, c41, a2, b4 3033 LD b4, 15 * SIZE(BO) 3034 3035 MADD c51, c51, a2, b7 3036 LD b7, 28 * SIZE(BO) 3037 MADD c61, c61, a2, b2 3038 LD b2, 17 * SIZE(BO) 3039 MADD c71, c71, a2, b3 3040 LD b3, 18 * SIZE(BO) 3041 MADD c81, c81, a2, b4 3042 LD b4, 19 * SIZE(BO) 3043 3044 LD a2, 5 * SIZE(AO) 3045 daddiu AO, AO, 4 * SIZE 3046 3047 MADD c11, c11, a3, b1 3048 LD b1, 32 * SIZE(BO) 3049 MADD c21, c21, a3, b2 3050 LD b2, 21 * SIZE(BO) 3051 MADD c31, c31, a3, b3 3052 LD b3, 22 * SIZE(BO) 3053 MADD c41, c41, a3, b4 3054 LD b4, 23 * SIZE(BO) 3055 3056 MADD c51, c51, a3, b5 3057 LD b5, 36 * SIZE(BO) 3058 MADD c61, c61, a3, b2 3059 LD b2, 25 * SIZE(BO) 3060 MADD c71, c71, a3, b3 3061 LD b3, 26 * SIZE(BO) 3062 MADD c81, c81, a3, b4 3063 LD b4, 27 * SIZE(BO) 3064 3065 LD a3, 2 * SIZE(AO) 3066 daddiu BO, BO, 32 * SIZE 3067 3068 MADD c11, c11, a4, b6 3069 LD b6, 8 * SIZE(BO) 3070 MADD c21, c21, a4, b2 3071 LD b2, -3 * SIZE(BO) 3072 MADD c31, c31, a4, b3 3073 LD b3, -2 * SIZE(BO) 3074 MADD c41, c41, a4, b4 3075 LD b4, -1 * SIZE(BO) 3076 3077 MADD c51, c51, a4, b7 3078 LD b7, 12 * SIZE(BO) 3079 MADD c61, c61, a4, b2 3080 LD b2, 1 * SIZE(BO) 3081 MADD c71, c71, a4, b3 3082 LD b3, 2 * SIZE(BO) 3083 MADD c81, c81, a4, b4 3084 LD b4, 3 * SIZE(BO) 3085 bgtz L, .L22 3086 LD a4, 3 * SIZE(AO) 3087 .align 3 3088 3089.L25: 3090#if defined(LT) || defined(RN) 3091 andi L, KK, 3 3092#else 3093 andi L, TEMP, 3 3094#endif 3095 NOP 3096 blez L, .L28 3097 NOP 3098 .align 3 3099 3100.L26: 3101 MADD c11, c11, a1, b1 3102 LD b1, 8 * SIZE(BO) 3103 MADD c21, c21, a1, b2 3104 LD b2, 5 * SIZE(BO) 3105 MADD c31, c31, a1, b3 3106 LD b3, 6 * SIZE(BO) 3107 MADD c41, c41, a1, b4 3108 LD b4, 7 * SIZE(BO) 3109 3110 daddiu L, L, -1 3111 MOV a2, a2 3112 daddiu AO, AO, 1 * SIZE 3113 daddiu BO, BO, 8 * SIZE 3114 3115 MADD c51, c51, a1, b5 3116 LD b5, 4 * SIZE(BO) 3117 MADD c61, c61, a1, b2 3118 LD b2, 1 * SIZE(BO) 3119 MADD c71, c71, a1, b3 3120 LD b3, 2 * SIZE(BO) 3121 MADD c81, c81, a1, b4 3122 LD a1, 0 * SIZE(AO) 3123 3124 bgtz L, .L26 3125 LD b4, 3 * SIZE(BO) 3126 3127.L28: 3128#if defined(LN) || defined(RT) 3129#ifdef LN 3130 daddiu TEMP, KK, -1 3131#else 3132 daddiu TEMP, KK, -8 3133#endif 3134 3135 dsll L, TEMP, 0 + BASE_SHIFT 3136 dsll TEMP, TEMP, 3 + BASE_SHIFT 3137 daddu AO, AORIG, L 3138 daddu BO, B, TEMP 3139#endif 3140 3141 3142#if defined(LN) || defined(LT) 3143 LD b1, 0 * SIZE(BO) 3144 LD b2, 1 * SIZE(BO) 3145 LD b3, 2 * SIZE(BO) 3146 LD b4, 3 * SIZE(BO) 3147 LD b5, 4 * SIZE(BO) 3148 LD b6, 5 * SIZE(BO) 3149 LD b7, 6 * SIZE(BO) 3150 LD b8, 7 * SIZE(BO) 3151 3152 SUB c11, b1, c11 3153 SUB c21, b2, c21 3154 SUB c31, b3, c31 3155 SUB c41, b4, c41 3156 SUB c51, b5, c51 3157 SUB c61, b6, c61 3158 SUB c71, b7, c71 3159 SUB c81, b8, c81 3160#else 3161 LD b1, 0 * SIZE(AO) 3162 LD b2, 1 * SIZE(AO) 3163 LD b3, 2 * SIZE(AO) 3164 LD b4, 3 * SIZE(AO) 3165 LD b5, 4 * SIZE(AO) 3166 LD b6, 5 * SIZE(AO) 3167 LD b7, 6 * SIZE(AO) 3168 LD b8, 7 * SIZE(AO) 3169 3170 SUB c11, b1, c11 3171 SUB c21, b2, c21 3172 SUB c31, b3, c31 3173 SUB c41, b4, c41 3174 SUB c51, b5, c51 3175 SUB c61, b6, c61 3176 SUB c71, b7, c71 3177 SUB c81, b8, c81 3178#endif 3179 3180#if defined(LN) || defined(LT) 3181 LD b1, 0 * SIZE(AO) 3182 3183 MUL c11, b1, c11 3184 MUL c21, b1, c21 3185 MUL c31, b1, c31 3186 MUL c41, b1, c41 3187 MUL c51, b1, c51 3188 MUL c61, b1, c61 3189 MUL c71, b1, c71 3190 MUL c81, b1, c81 3191#endif 3192 3193#ifdef RN 3194 LD b1, 0 * SIZE(BO) 3195 LD b2, 1 * SIZE(BO) 3196 LD b3, 2 * SIZE(BO) 3197 LD b4, 3 * SIZE(BO) 3198 LD b5, 4 * SIZE(BO) 3199 LD b6, 5 * SIZE(BO) 3200 LD b7, 6 * SIZE(BO) 3201 LD b8, 7 * SIZE(BO) 3202 3203 MUL c11, b1, c11 3204 3205 NMSUB c21, c21, b2, c11 3206 NMSUB c31, c31, b3, c11 3207 NMSUB c41, c41, b4, c11 3208 NMSUB c51, c51, b5, c11 3209 NMSUB c61, c61, b6, c11 3210 NMSUB c71, c71, b7, c11 3211 NMSUB c81, c81, b8, c11 3212 3213 LD b2, 9 * SIZE(BO) 3214 LD b3, 10 * SIZE(BO) 3215 LD b4, 11 * SIZE(BO) 3216 LD b5, 12 * SIZE(BO) 3217 LD b6, 13 * SIZE(BO) 3218 LD b7, 14 * SIZE(BO) 3219 LD b8, 15 * SIZE(BO) 3220 3221 MUL c21, b2, c21 3222 3223 NMSUB c31, c31, b3, c21 3224 NMSUB c41, c41, b4, c21 3225 NMSUB c51, c51, b5, c21 3226 NMSUB c61, c61, b6, c21 3227 NMSUB c71, c71, b7, c21 3228 NMSUB c81, c81, b8, c21 3229 3230 LD b3, 18 * SIZE(BO) 3231 LD b4, 19 * SIZE(BO) 3232 LD b5, 20 * SIZE(BO) 3233 LD b6, 21 * SIZE(BO) 3234 LD b7, 22 * SIZE(BO) 3235 LD b8, 23 * SIZE(BO) 3236 3237 MUL c31, b3, c31 3238 3239 NMSUB c41, c41, b4, c31 3240 NMSUB c51, c51, b5, c31 3241 NMSUB c61, c61, b6, c31 3242 NMSUB c71, c71, b7, c31 3243 NMSUB c81, c81, b8, c31 3244 3245 LD b4, 27 * SIZE(BO) 3246 LD b5, 28 * SIZE(BO) 3247 LD b6, 29 * SIZE(BO) 3248 LD b7, 30 * SIZE(BO) 3249 LD b8, 31 * SIZE(BO) 3250 3251 MUL c41, b4, c41 3252 3253 NMSUB c51, c51, b5, c41 3254 NMSUB c61, c61, b6, c41 3255 NMSUB c71, c71, b7, c41 3256 NMSUB c81, c81, b8, c41 3257 3258 LD b5, 36 * SIZE(BO) 3259 LD b6, 37 * SIZE(BO) 3260 LD b7, 38 * SIZE(BO) 3261 LD b8, 39 * SIZE(BO) 3262 3263 MUL c51, b5, c51 3264 3265 NMSUB c61, c61, b6, c51 3266 NMSUB c71, c71, b7, c51 3267 NMSUB c81, c81, b8, c51 3268 3269 LD b6, 45 * SIZE(BO) 3270 LD b7, 46 * SIZE(BO) 3271 LD b8, 47 * SIZE(BO) 3272 3273 MUL c61, b6, c61 3274 3275 NMSUB c71, c71, b7, c61 3276 NMSUB c81, c81, b8, c61 3277 3278 LD b7, 54 * SIZE(BO) 3279 LD b8, 55 * SIZE(BO) 3280 3281 MUL c71, b7, c71 3282 3283 NMSUB c81, c81, b8, c71 3284 3285 LD b8, 63 * SIZE(BO) 3286 3287 MUL c81, b8, c81 3288#endif 3289 3290#ifdef RT 3291 LD b1, 63 * SIZE(BO) 3292 LD b2, 62 * SIZE(BO) 3293 LD b3, 61 * SIZE(BO) 3294 LD b4, 60 * SIZE(BO) 3295 LD b5, 59 * SIZE(BO) 3296 LD b6, 58 * SIZE(BO) 3297 LD b7, 57 * SIZE(BO) 3298 LD b8, 56 * SIZE(BO) 3299 3300 MUL c81, b1, c81 3301 3302 NMSUB c71, c71, b2, c81 3303 NMSUB c61, c61, b3, c81 3304 NMSUB c51, c51, b4, c81 3305 NMSUB c41, c41, b5, c81 3306 NMSUB c31, c31, b6, c81 3307 NMSUB c21, c21, b7, c81 3308 NMSUB c11, c11, b8, c81 3309 3310 LD b2, 54 * SIZE(BO) 3311 LD b3, 53 * SIZE(BO) 3312 LD b4, 52 * SIZE(BO) 3313 LD b5, 51 * SIZE(BO) 3314 LD b6, 50 * SIZE(BO) 3315 LD b7, 49 * SIZE(BO) 3316 LD b8, 48 * SIZE(BO) 3317 3318 MUL c71, b2, c71 3319 3320 NMSUB c61, c61, b3, c71 3321 NMSUB c51, c51, b4, c71 3322 NMSUB c41, c41, b5, c71 3323 NMSUB c31, c31, b6, c71 3324 NMSUB c21, c21, b7, c71 3325 NMSUB c11, c11, b8, c71 3326 3327 LD b3, 45 * SIZE(BO) 3328 LD b4, 44 * SIZE(BO) 3329 LD b5, 43 * SIZE(BO) 3330 LD b6, 42 * SIZE(BO) 3331 LD b7, 41 * SIZE(BO) 3332 LD b8, 40 * SIZE(BO) 3333 3334 MUL c61, b3, c61 3335 3336 NMSUB c51, c51, b4, c61 3337 NMSUB c41, c41, b5, c61 3338 NMSUB c31, c31, b6, c61 3339 NMSUB c21, c21, b7, c61 3340 NMSUB c11, c11, b8, c61 3341 3342 LD b4, 36 * SIZE(BO) 3343 LD b5, 35 * SIZE(BO) 3344 LD b6, 34 * SIZE(BO) 3345 LD b7, 33 * SIZE(BO) 3346 LD b8, 32 * SIZE(BO) 3347 3348 MUL c51, b4, c51 3349 3350 NMSUB c41, c41, b5, c51 3351 NMSUB c31, c31, b6, c51 3352 NMSUB c21, c21, b7, c51 3353 NMSUB c11, c11, b8, c51 3354 3355 LD b5, 27 * SIZE(BO) 3356 LD b6, 26 * SIZE(BO) 3357 LD b7, 25 * SIZE(BO) 3358 LD b8, 24 * SIZE(BO) 3359 3360 MUL c41, b5, c41 3361 3362 NMSUB c31, c31, b6, c41 3363 NMSUB c21, c21, b7, c41 3364 NMSUB c11, c11, b8, c41 3365 3366 LD b6, 18 * SIZE(BO) 3367 LD b7, 17 * SIZE(BO) 3368 LD b8, 16 * SIZE(BO) 3369 3370 MUL c31, b6, c31 3371 3372 NMSUB c21, c21, b7, c31 3373 NMSUB c11, c11, b8, c31 3374 3375 LD b7, 9 * SIZE(BO) 3376 LD b8, 8 * SIZE(BO) 3377 3378 MUL c21, b7, c21 3379 3380 NMSUB c11, c11, b8, c21 3381 3382 LD b8, 0 * SIZE(BO) 3383 3384 MUL c11, b8, c11 3385#endif 3386 3387#ifdef LN 3388 daddiu CO1, CO1, -1 * SIZE 3389 daddiu CO2, CO2, -1 * SIZE 3390 daddiu CO3, CO3, -1 * SIZE 3391 daddiu CO4, CO4, -1 * SIZE 3392 daddiu CO5, CO5, -1 * SIZE 3393 daddiu CO6, CO6, -1 * SIZE 3394 daddiu CO7, CO7, -1 * SIZE 3395 daddiu CO8, CO8, -1 * SIZE 3396#endif 3397 3398#if defined(LN) || defined(LT) 3399 ST c11, 0 * SIZE(BO) 3400 ST c21, 1 * SIZE(BO) 3401 ST c31, 2 * SIZE(BO) 3402 ST c41, 3 * SIZE(BO) 3403 ST c51, 4 * SIZE(BO) 3404 ST c61, 5 * SIZE(BO) 3405 ST c71, 6 * SIZE(BO) 3406 ST c81, 7 * SIZE(BO) 3407#else 3408 ST c11, 0 * SIZE(AO) 3409 ST c21, 1 * SIZE(AO) 3410 ST c31, 2 * SIZE(AO) 3411 ST c41, 3 * SIZE(AO) 3412 ST c51, 4 * SIZE(AO) 3413 ST c61, 5 * SIZE(AO) 3414 ST c71, 6 * SIZE(AO) 3415 ST c81, 7 * SIZE(AO) 3416#endif 3417 3418 ST c11, 0 * SIZE(CO1) 3419 ST c21, 0 * SIZE(CO2) 3420 ST c31, 0 * SIZE(CO3) 3421 ST c41, 0 * SIZE(CO4) 3422 ST c51, 0 * SIZE(CO5) 3423 ST c61, 0 * SIZE(CO6) 3424 ST c71, 0 * SIZE(CO7) 3425 ST c81, 0 * SIZE(CO8) 3426 3427#ifndef LN 3428 daddiu CO1, CO1, 1 * SIZE 3429 daddiu CO2, CO2, 1 * SIZE 3430 daddiu CO3, CO3, 1 * SIZE 3431 daddiu CO4, CO4, 1 * SIZE 3432 daddiu CO5, CO5, 1 * SIZE 3433 daddiu CO6, CO6, 1 * SIZE 3434 daddiu CO7, CO7, 1 * SIZE 3435 daddiu CO8, CO8, 1 * SIZE 3436#endif 3437 3438#ifdef RT 3439 dsll TEMP, K, BASE_SHIFT 3440 daddu AORIG, AORIG, TEMP 3441#endif 3442 3443#if defined(LT) || defined(RN) 3444 dsubu TEMP, K, KK 3445 dsll L, TEMP, 0 + BASE_SHIFT 3446 dsll TEMP, TEMP, 3 + BASE_SHIFT 3447 daddu AO, AO, L 3448 daddu BO, BO, TEMP 3449#endif 3450 3451#ifdef LT 3452 daddiu KK, KK, 1 3453#endif 3454 3455#ifdef LN 3456 daddiu KK, KK, -1 3457#endif 3458 .align 3 3459 3460.L29: 3461#ifdef LN 3462 dsll TEMP, K, 3 + BASE_SHIFT 3463 daddu B, B, TEMP 3464#endif 3465 3466#if defined(LT) || defined(RN) 3467 move B, BO 3468#endif 3469 3470#ifdef RN 3471 daddiu KK, KK, 8 3472#endif 3473 3474#ifdef RT 3475 daddiu KK, KK, -8 3476#endif 3477 3478 bgtz J, .L10 3479 NOP 3480 .align 3 3481 3482 3483 3484.L999: 3485 LDARG $16, 0($sp) 3486 LDARG $17, 8($sp) 3487 LDARG $18, 16($sp) 3488 LDARG $19, 24($sp) 3489 LDARG $20, 32($sp) 3490 LDARG $21, 40($sp) 3491 ldc1 $f24, 48($sp) 3492 ldc1 $f25, 56($sp) 3493 ldc1 $f26, 64($sp) 3494 ldc1 $f27, 72($sp) 3495 ldc1 $f28, 80($sp) 3496 3497 LDARG $22, 88($sp) 3498 LDARG $23, 96($sp) 3499 LDARG $24, 104($sp) 3500 LDARG $25, 112($sp) 3501 3502#ifndef __64BIT__ 3503 ldc1 $f20,112($sp) 3504 ldc1 $f21,120($sp) 3505 ldc1 $f22,128($sp) 3506 ldc1 $f23,136($sp) 3507#endif 3508 3509 j $31 3510 daddiu $sp, $sp, 144 3511 3512 EPILOGUE 3513