1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define ALPHA 0 43#define FZERO 8 44 45#define M r3 46#define N r4 47#define K r5 48 49#if defined(linux) || defined(__FreeBSD__) 50#define A r6 51#define B r7 52#define C r8 53#define LDC r9 54#define OFFSET r10 55#endif 56 57#define TEMP r11 58#define KK r14 59#define INCM1 r15 60#define INCM3 r16 61#define INCM5 r17 62#define INCM7 r18 63#define INC2 r19 64#define INC r20 65#define INC4 r21 66 67#define I r22 68#define J r23 69#define AO r24 70#define BO r25 71#define AO2 r26 72#define BO2 r27 73 74#define CO1 r28 75#define CO2 r29 76#define CO3 r30 77#define CO4 r31 78 79#ifndef NEEDPARAM 80 81#define A1 f16 82#define A2 f17 83#define A3 f18 84#define A4 f19 85#define A5 f20 86#define A6 f21 87#define A7 f22 88#define A8 f23 89#define A9 f24 90#define A10 f25 91 92#define B1 f26 93#define B2 f27 94#define B3 f28 95#define B4 f29 96#define B5 f30 97#define B6 f31 98 99#define AP B6 100 101 102 PROLOGUE 103 PROFCODE 104 105 li r0, -16 106 107 stfpdux f14, SP, r0 108 stfpdux f15, SP, r0 109 stfpdux f16, SP, r0 110 stfpdux f17, SP, r0 111 stfpdux f18, SP, r0 112 stfpdux f19, SP, r0 113 stfpdux f20, SP, r0 114 stfpdux f21, SP, r0 115 stfpdux f22, SP, r0 116 stfpdux f23, SP, r0 117 stfpdux f24, SP, r0 118 stfpdux f25, SP, r0 119 stfpdux f26, SP, r0 120 stfpdux f27, SP, r0 121 stfpdux f28, SP, r0 122 stfpdux f29, SP, r0 123 stfpdux f30, SP, r0 124 stfpdux f31, SP, r0 125 126 stwu r31, -4(SP) 127 stwu r30, -4(SP) 128 stwu r29, -4(SP) 129 stwu r28, -4(SP) 130 131 stwu r27, -4(SP) 132 stwu r26, -4(SP) 133 stwu r25, -4(SP) 134 stwu r24, -4(SP) 135 136 stwu r23, -4(SP) 137 stwu r22, -4(SP) 138 stwu r21, -4(SP) 139 stwu r20, -4(SP) 140 141 stwu r19, -4(SP) 142 stwu r18, -4(SP) 143 stwu r17, -4(SP) 144 stwu r16, -4(SP) 145 146 stwu r15, -4(SP) 147 stwu r14, -4(SP) # dummy 148 149 li r0, 0 150 151 stwu r0, -4(SP) 152 stwu r0, -4(SP) 153 stfdu f1, -8(SP) 154 155 slwi LDC, LDC, BASE_SHIFT 156 157 cmpwi cr0, M, 0 158 ble .L999 159 cmpwi cr0, N, 0 160 ble .L999 161 cmpwi cr0, K, 0 162 ble .L999 163 164 li INC, 1 * SIZE 165 li INC2, 2 * SIZE 166 li INC4, 4 * SIZE 167 168#if defined(TRMMKERNEL) && !defined(LEFT) 169 neg KK, OFFSET 170#endif 171 172 andi. r0, C, 2 * SIZE - 1 173 bne .L1000 174 andi. r0, LDC, 2 * SIZE - 1 175 bne .L1000 176 177/* High performance version */ 178 179 li INCM3, -2 * SIZE 180 li INCM5, -5 * SIZE 181 li INCM7, -6 * SIZE 182 183 addi C, C, - 2 * SIZE 184 srawi. J, N, 2 185 ble .L50 186 .align 4 187 188.L10: 189 mr CO1, C 190 add CO2, C, LDC 191 add CO3, CO2, LDC 192 add CO4, CO3, LDC 193 add C, CO4, LDC 194 195#if defined(TRMMKERNEL) && defined(LEFT) 196 mr KK, OFFSET 197#endif 198 199 addi AO, A, -4 * SIZE 200 201 li r0, FZERO 202 lfpsx f0, SP, r0 203 204 srawi. I, M, 3 205 ble .L20 206 .align 4 207 208.L11: 209#if defined(TRMMKERNEL) 210#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 211 addi AO2, AO, 2 * SIZE 212 fpmr f4, f0 213 addi BO, B, - 4 * SIZE 214 fpmr f8, f0 215 addi BO2, B, - 2 * SIZE 216 fpmr f12, f0 217#else 218 slwi TEMP, KK, 3 + BASE_SHIFT 219 slwi r0, KK, 2 + BASE_SHIFT 220 add AO, AO, TEMP 221 add BO, B, r0 222 223 addi AO2, AO, 2 * SIZE 224 fpmr f4, f0 225 addi BO, BO, - 4 * SIZE 226 fpmr f8, f0 227 addi BO2, BO, 2 * SIZE 228 fpmr f12, f0 229#endif 230 231 232#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 233 sub TEMP, K, KK 234#elif defined(LEFT) 235 addi TEMP, KK, 8 236#else 237 addi TEMP, KK, 4 238#endif 239 srawi. TEMP, TEMP, 2 240 fpmr f1, f0 241 mtspr CTR, TEMP 242 ble .L14 243 244#else 245 addi AO2, AO, 2 * SIZE 246 fpmr f4, f0 247 addi BO, B, - 4 * SIZE 248 fpmr f8, f0 249 addi BO2, B, - 2 * SIZE 250 fpmr f12, f0 251 252 srawi. r0, K, 2 253 fpmr f1, f0 254 mtspr CTR, r0 255 ble .L14 256#endif 257 258 LFPDUX A1, AO, INC4 259 fpmr f5, f0 260 LFPDUX A3, AO, INC4 261 fpmr f9, f0 262 LFPDUX B1, BO, INC4 263 fpmr f13, f0 264 265 LFPDUX A5, AO, INC4 266 fpmr f2, f0 267 LFPDUX A6, AO, INC4 268 fpmr f6, f0 269 LFPDUX B3, BO, INC4 270 fpmr f10, f0 271 LFPDUX A7, AO, INC4 272 fpmr f14, f0 273 274 LFPDUX A8, AO, INC4 275 fpmr f3, f0 276 LFPDUX B5, BO, INC4 277 fpmr f7, f0 278 LFPDUX A9, AO, INC4 279 fpmr f11, f0 280 LFPDUX A2, AO2, INC4 281 fpmr f15, f0 282 LFPDUX B2, BO2, INC4 283 bdz- .L13 284 .align 4 285 286.L12: 287 288## 1 ## 289 fxcpmadd f0, B1, A1, f0 290 nop 291 fxcsmadd f4, B1, A1, f4 292 nop 293 fxcpmadd f8, B2, A1, f8 294 LFPDUX B4, BO2, INC4 295 fxcsmadd f12, B2, A1, f12 296 LFPDUX B6, BO, INC4 297 298 fxcpmadd f1, B1, A2, f1 299 nop 300 fxcsmadd f5, B1, A2, f5 301 LFPDUX A4, AO2, INC4 302 fxcpmadd f9, B2, A2, f9 303 LFPDUX A10, AO, INC4 304 fxcsmadd f13, B2, A2, f13 305 nop 306 307 fxcpmadd f2, B1, A3, f2 308 nop 309 fxcsmadd f6, B1, A3, f6 310 nop 311 fxcpmadd f10, B2, A3, f10 312 nop 313 fxcsmadd f14, B2, A3, f14 314 nop 315 316 fxcpmadd f3, B1, A4, f3 317 nop 318 fxcsmadd f7, B1, A4, f7 319 LFPDUX A2, AO2, INC4 320 fxcpmadd f11, B2, A4, f11 321 LFPDUX A1, AO, INC4 322 fxcsmadd f15, B2, A4, f15 323 nop 324 325## 2 ## 326 327 fxcpmadd f0, B3, A5, f0 328 nop 329 fxcsmadd f4, B3, A5, f4 330 nop 331 fxcpmadd f8, B4, A5, f8 332 LFPDUX B2, BO2, INC4 333 fxcsmadd f12, B4, A5, f12 334 LFPDUX B1, BO, INC4 335 336 fxcpmadd f1, B3, A2, f1 337 nop 338 fxcsmadd f5, B3, A2, f5 339 LFPDUX A4, AO2, INC4 340 fxcpmadd f9, B4, A2, f9 341 LFPDUX A3, AO, INC4 342 fxcsmadd f13, B4, A2, f13 343 nop 344 345 fxcpmadd f2, B3, A6, f2 346 nop 347 fxcsmadd f6, B3, A6, f6 348 nop 349 fxcpmadd f10, B4, A6, f10 350 nop 351 fxcsmadd f14, B4, A6, f14 352 nop 353 354 fxcpmadd f3, B3, A4, f3 355 nop 356 fxcsmadd f7, B3, A4, f7 357 LFPDUX A2, AO2, INC4 358 fxcpmadd f11, B4, A4, f11 359 LFPDUX A5, AO, INC4 360 fxcsmadd f15, B4, A4, f15 361 nop 362 363## 3 ## 364 365 fxcpmadd f0, B5, A7, f0 366 nop 367 fxcsmadd f4, B5, A7, f4 368 nop 369 fxcpmadd f8, B2, A7, f8 370 LFPDUX B4, BO2, INC4 371 fxcsmadd f12, B2, A7, f12 372 LFPDUX B3, BO, INC4 373 374 fxcpmadd f1, B5, A2, f1 375 nop 376 fxcsmadd f5, B5, A2, f5 377 LFPDUX A4, AO2, INC4 378 fxcpmadd f9, B2, A2, f9 379 LFPDUX A6, AO, INC4 380 fxcsmadd f13, B2, A2, f13 381 nop 382 383 fxcpmadd f2, B5, A8, f2 384 nop 385 fxcsmadd f6, B5, A8, f6 386 nop 387 fxcpmadd f10, B2, A8, f10 388 nop 389 fxcsmadd f14, B2, A8, f14 390 nop 391 392 fxcpmadd f3, B5, A4, f3 393 nop 394 fxcsmadd f7, B5, A4, f7 395 LFPDUX A2, AO2, INC4 396 fxcpmadd f11, B2, A4, f11 397 LFPDUX A7, AO, INC4 398 fxcsmadd f15, B2, A4, f15 399 nop 400 401## 4 ## 402 fxcpmadd f0, B6, A9, f0 403 nop 404 fxcsmadd f4, B6, A9, f4 405 nop 406 fxcpmadd f8, B4, A9, f8 407 LFPDUX B2, BO2, INC4 408 fxcsmadd f12, B4, A9, f12 409 LFPDUX B5, BO, INC4 410 411 fxcpmadd f1, B6, A2, f1 412 nop 413 fxcsmadd f5, B6, A2, f5 414 LFPDUX A4, AO2, INC4 415 fxcpmadd f9, B4, A2, f9 416 LFPDUX A8, AO, INC4 417 fxcsmadd f13, B4, A2, f13 418 nop 419 420 fxcpmadd f2, B6, A10, f2 421 nop 422 fxcsmadd f6, B6, A10, f6 423 nop 424 fxcpmadd f10, B4, A10, f10 425 nop 426 fxcsmadd f14, B4, A10, f14 427 nop 428 429 fxcpmadd f3, B6, A4, f3 430 LFPDUX A2, AO2, INC4 431 fxcsmadd f7, B6, A4, f7 432 LFPDUX A9, AO, INC4 433 fxcpmadd f11, B4, A4, f11 434 nop 435 fxcsmadd f15, B4, A4, f15 436 bdnz+ .L12 437 .align 4 438 439.L13: 440## 1 ## 441 442 fxcpmadd f0, B1, A1, f0 443 nop 444 fxcsmadd f4, B1, A1, f4 445 nop 446 fxcpmadd f8, B2, A1, f8 447 LFPDUX B4, BO2, INC4 448 fxcsmadd f12, B2, A1, f12 449 LFPDUX B6, BO, INC4 450 451 fxcpmadd f1, B1, A2, f1 452 nop 453 fxcsmadd f5, B1, A2, f5 454 LFPDUX A4, AO2, INC4 455 fxcpmadd f9, B2, A2, f9 456 LFPDUX A10, AO, INC4 457 fxcsmadd f13, B2, A2, f13 458 nop 459 460 fxcpmadd f2, B1, A3, f2 461 nop 462 fxcsmadd f6, B1, A3, f6 463 nop 464 fxcpmadd f10, B2, A3, f10 465 nop 466 fxcsmadd f14, B2, A3, f14 467 nop 468 469 fxcpmadd f3, B1, A4, f3 470 nop 471 fxcsmadd f7, B1, A4, f7 472 LFPDUX A2, AO2, INC4 473 fxcpmadd f11, B2, A4, f11 474#ifndef TRMMKERNEL 475 LFPDUX A1, CO1, INC2 476#else 477 nop 478#endif 479 fxcsmadd f15, B2, A4, f15 480 nop 481 482## 2 ## 483 484 fxcpmadd f0, B3, A5, f0 485 nop 486 fxcsmadd f4, B3, A5, f4 487 nop 488 fxcpmadd f8, B4, A5, f8 489 LFPDUX B2, BO2, INC4 490 fxcsmadd f12, B4, A5, f12 491#ifndef TRMMKERNEL 492 LFPDUX B1, CO1, INC4 493#else 494 nop 495#endif 496 497 fxcpmadd f1, B3, A2, f1 498 nop 499 fxcsmadd f5, B3, A2, f5 500 LFPDUX A4, AO2, INC4 501 fxcpmadd f9, B4, A2, f9 502#ifndef TRMMKERNEL 503 LFPDUX A3, CO2, INC2 504#else 505 nop 506#endif 507 fxcsmadd f13, B4, A2, f13 508 nop 509 510 fxcpmadd f2, B3, A6, f2 511 nop 512 fxcsmadd f6, B3, A6, f6 513 nop 514 fxcpmadd f10, B4, A6, f10 515 nop 516 fxcsmadd f14, B4, A6, f14 517 nop 518 519 fxcpmadd f3, B3, A4, f3 520 nop 521 fxcsmadd f7, B3, A4, f7 522 LFPDUX A2, AO2, INC4 523 fxcpmadd f11, B4, A4, f11 524#ifndef TRMMKERNEL 525 LFPDUX A5, CO2, INC4 526#else 527 nop 528#endif 529 fxcsmadd f15, B4, A4, f15 530 nop 531 532## 3 ## 533 534 fxcpmadd f0, B5, A7, f0 535 nop 536 fxcsmadd f4, B5, A7, f4 537 nop 538 fxcpmadd f8, B2, A7, f8 539 LFPDUX B4, BO2, INC4 540 fxcsmadd f12, B2, A7, f12 541#ifndef TRMMKERNEL 542 LFPDUX B3, CO3, INC2 543#else 544 nop 545#endif 546 547 fxcpmadd f1, B5, A2, f1 548 nop 549 fxcsmadd f5, B5, A2, f5 550 LFPDUX A4, AO2, INC4 551 fxcpmadd f9, B2, A2, f9 552#ifndef TRMMKERNEL 553 LFPDUX A6, CO3, INC4 554#else 555 nop 556#endif 557 fxcsmadd f13, B2, A2, f13 558 nop 559 560 fxcpmadd f2, B5, A8, f2 561 nop 562 fxcsmadd f6, B5, A8, f6 563 nop 564 fxcpmadd f10, B2, A8, f10 565 nop 566 fxcsmadd f14, B2, A8, f14 567 nop 568 569 fxcpmadd f3, B5, A4, f3 570 nop 571 fxcsmadd f7, B5, A4, f7 572 LFPDUX A2, AO2, INC4 573 fxcpmadd f11, B2, A4, f11 574#ifndef TRMMKERNEL 575 LFPDUX A7, CO4, INC2 576#else 577 nop 578#endif 579 fxcsmadd f15, B2, A4, f15 580 nop 581 582## 4 ## 583 584 fxcpmadd f0, B6, A9, f0 585 nop 586 fxcsmadd f4, B6, A9, f4 587 nop 588 fxcpmadd f8, B4, A9, f8 589 nop 590 fxcsmadd f12, B4, A9, f12 591#ifndef TRMMKERNEL 592 LFPDUX B2, CO4, INC4 593#else 594 nop 595#endif 596 597 fxcpmadd f1, B6, A2, f1 598 nop 599 fxcsmadd f5, B6, A2, f5 600 LFPDUX A4, AO2, INC4 601 fxcpmadd f9, B4, A2, f9 602#ifndef TRMMKERNEL 603 LFPDUX B5, CO1, INCM3 604#else 605 nop 606#endif 607 fxcsmadd f13, B4, A2, f13 608 nop 609 610 fxcpmadd f2, B6, A10, f2 611 nop 612 fxcsmadd f6, B6, A10, f6 613 nop 614 fxcpmadd f10, B4, A10, f10 615 nop 616 fxcsmadd f14, B4, A10, f14 617#ifndef TRMMKERNEL 618 LFPDUX A8, CO1, INC4 619#else 620 nop 621#endif 622 623 fxcpmadd f3, B6, A4, f3 624 nop 625 fxcsmadd f7, B6, A4, f7 626 nop 627 fxcpmadd f11, B4, A4, f11 628 nop 629 fxcsmadd f15, B4, A4, f15 630#ifndef TRMMKERNEL 631 LFPDUX A9, CO2, INCM3 632#else 633 nop 634#endif 635 .align 4 636 637.L14: 638 lfd AP, ALPHA(SP) 639#ifdef TRMMKERNEL 640 fsmfp AP, AP 641#endif 642 643#if defined(TRMMKERNEL) 644#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 645 sub TEMP, K, KK 646#elif defined(LEFT) 647 addi TEMP, KK, 8 648#else 649 addi TEMP, KK, 4 650#endif 651 andi. r0, TEMP, 3 652 mtspr CTR, r0 653 ble+ .L18 654 655 cmpwi cr0, TEMP, 3 656 bgt+ .L15 657#else 658 andi. r0, K, 3 659 mtspr CTR, r0 660 ble+ .L18 661 662 cmpwi cr0, K, 3 663 bgt+ .L15 664#endif 665 666#ifndef TRMMKERNEL 667 LFPDUX A1, CO1, INC2 668 fpmr f5, f0 669 LFPDUX B1, CO1, INC4 670 fpmr f9, f0 671 LFPDUX A3, CO2, INC2 672 fpmr f13, f0 673 LFPDUX A5, CO2, INC4 674 fpmr f2, f0 675 676 LFPDUX B3, CO3, INC2 677 fpmr f6, f0 678 LFPDUX A6, CO3, INC4 679 fpmr f10, f0 680 LFPDUX A7, CO4, INC2 681 fpmr f14, f0 682 LFPDUX B2, CO4, INC4 683 fpmr f3, f0 684 685 LFPDUX B5, CO1, INCM3 686 fpmr f7, f0 687 LFPDUX A8, CO1, INC4 688 fpmr f11, f0 689 LFPDUX A9, CO2, INCM3 690 fpmr f15, f0 691#else 692 fpmr f5, f0 693 fpmr f9, f0 694 fpmr f13, f0 695 fpmr f2, f0 696 697 fpmr f6, f0 698 fpmr f10, f0 699 fpmr f14, f0 700 fpmr f3, f0 701 702 fpmr f7, f0 703 fpmr f11, f0 704 fpmr f15, f0 705 nop 706#endif 707 .align 4 708 709.L15: 710 LFPDUX A2, AO, INC4 711 LFPDUX A4, AO2, INC4 712 LFPDUX A10, BO, INC4 713 LFPDUX B4, BO2, INC4 714 bdz- .L17 715 .align 4 716 717.L16: 718 fxcpmadd f0, A10, A2, f0 719 fxcsmadd f4, A10, A2, f4 720 fxcpmadd f8, B4, A2, f8 721 fxcsmadd f12, B4, A2, f12 722 LFPDUX A2, AO, INC4 723 724 fxcpmadd f1, A10, A4, f1 725 fxcsmadd f5, A10, A4, f5 726 fxcpmadd f9, B4, A4, f9 727 fxcsmadd f13, B4, A4, f13 728 LFPDUX A4, AO2, INC4 729 730 fxcpmadd f2, A10, A2, f2 731 fxcsmadd f6, A10, A2, f6 732 fxcpmadd f10, B4, A2, f10 733 fxcsmadd f14, B4, A2, f14 734 LFPDUX A2, AO, INC4 735 736 fxcpmadd f3, A10, A4, f3 737 fxcsmadd f7, A10, A4, f7 738 LFPDUX A10, BO, INC4 739 fxcpmadd f11, B4, A4, f11 740 fxcsmadd f15, B4, A4, f15 741 LFPDUX A4, AO2, INC4 742 LFPDUX B4, BO2, INC4 743 bdnz+ .L16 744 .align 4 745 746.L17: 747 fxcpmadd f0, A10, A2, f0 748 fxcsmadd f4, A10, A2, f4 749 fxcpmadd f8, B4, A2, f8 750 fxcsmadd f12, B4, A2, f12 751 LFPDUX A2, AO, INC4 752 753 fxcpmadd f1, A10, A4, f1 754 fxcsmadd f5, A10, A4, f5 755 fxcpmadd f9, B4, A4, f9 756 fxcsmadd f13, B4, A4, f13 757 LFPDUX A4, AO2, INC4 758 759 fxcpmadd f2, A10, A2, f2 760 fxcsmadd f6, A10, A2, f6 761 fxcpmadd f10, B4, A2, f10 762 fxcsmadd f14, B4, A2, f14 763 764 fxcpmadd f3, A10, A4, f3 765 fxcsmadd f7, A10, A4, f7 766 fxcpmadd f11, B4, A4, f11 767 fxcsmadd f15, B4, A4, f15 768 .align 4 769 770.L18: 771#ifndef TRMMKERNEL 772 fxcpmadd f0, AP, f0, A1 773 LFPDUX B4, CO2, INC4 774 fxcpmadd f1, AP, f1, B5 775 LFPDUX A2, CO3, INCM3 776 777 fxcpmadd f2, AP, f2, B1 778 LFPDUX A4, CO3, INC4 779 fxcpmadd f3, AP, f3, A8 780 LFPDUX A10, CO4, INCM3 781 782 fxcpmadd f4, AP, f4, A3 783 LFPDUX A1, CO4, INC4 784 fxcpmadd f5, AP, f5, A9 785 STFPDUX f0, CO1, INCM7 786 787 fxcpmadd f6, AP, f6, A5 788 STFPDUX f1, CO1, INC2 789 fxcpmadd f7, AP, f7, B4 790 STFPDUX f2, CO1, INC2 791 792 fxcpmadd f8, AP, f8, B3 793 STFPDUX f3, CO1, INC2 794 fxcpmadd f9, AP, f9, A2 795 STFPDUX f4, CO2, INCM7 796 797 fxcpmadd f10, AP, f10, A6 798 STFPDUX f5, CO2, INC2 799 fxcpmadd f11, AP, f11, A4 800 STFPDUX f6, CO2, INC2 801 802 fxcpmadd f12, AP, f12, A7 803 STFPDUX f7, CO2, INC2 804 fxcpmadd f13, AP, f13, A10 805 STFPDUX f8, CO3, INCM7 806 807 fxcpmadd f14, AP, f14, B2 808 STFPDUX f9, CO3, INC2 809 fxcpmadd f15, AP, f15, A1 810 STFPDUX f10, CO3, INC2 811 812 STFPDUX f11, CO3, INC2 813 STFPDUX f12, CO4, INCM7 814 STFPDUX f13, CO4, INC2 815 STFPDUX f14, CO4, INC2 816 STFPDUX f15, CO4, INC2 817#else 818 fpmul f0, AP, f0 819 fpmul f1, AP, f1 820 fpmul f2, AP, f2 821 fpmul f3, AP, f3 822 823 fpmul f4, AP, f4 824 fpmul f5, AP, f5 825 STFPDUX f0, CO1, INC2 826 827 fpmul f6, AP, f6 828 STFPDUX f1, CO1, INC2 829 fpmul f7, AP, f7 830 STFPDUX f2, CO1, INC2 831 832 fpmul f8, AP, f8 833 STFPDUX f3, CO1, INC2 834 fpmul f9, AP, f9 835 STFPDUX f4, CO2, INC2 836 837 fpmul f10, AP, f10 838 STFPDUX f5, CO2, INC2 839 fpmul f11, AP, f11 840 STFPDUX f6, CO2, INC2 841 842 fpmul f12, AP, f12 843 STFPDUX f7, CO2, INC2 844 fpmul f13, AP, f13 845 STFPDUX f8, CO3, INC2 846 847 fpmul f14, AP, f14 848 STFPDUX f9, CO3, INC2 849 fpmul f15, AP, f15 850 STFPDUX f10, CO3, INC2 851 852 STFPDUX f11, CO3, INC2 853 STFPDUX f12, CO4, INC2 854 STFPDUX f13, CO4, INC2 855 STFPDUX f14, CO4, INC2 856 STFPDUX f15, CO4, INC2 857#endif 858 859#ifdef TRMMKERNEL 860#if ( defined(LEFT) && defined(TRANSA)) || \ 861 (!defined(LEFT) && !defined(TRANSA)) 862 sub TEMP, K, KK 863#ifdef LEFT 864 addi TEMP, TEMP, -8 865#else 866 addi TEMP, TEMP, -4 867#endif 868 slwi r0, TEMP, 3 + BASE_SHIFT 869 slwi TEMP, TEMP, 2 + BASE_SHIFT 870 add AO, AO, r0 871 add BO, BO, TEMP 872#endif 873 874#ifdef LEFT 875 addi KK, KK, 8 876#endif 877#endif 878 879 addic. I, I, -1 880 li r0, FZERO 881 882 lfpsx f0, SP, r0 883 bgt+ .L11 884 .align 4 885 886.L20: 887 andi. I, M, 4 888 beq .L30 889 890#if defined(TRMMKERNEL) 891#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 892 addi AO2, AO, 2 * SIZE 893 fpmr f4, f0 894 addi BO, B, - 4 * SIZE 895 fpmr f8, f0 896 addi BO2, B, - 2 * SIZE 897 fpmr f12, f0 898#else 899 slwi TEMP, KK, 2 + BASE_SHIFT 900 slwi r0, KK, 2 + BASE_SHIFT 901 add AO, AO, TEMP 902 add BO, B, r0 903 904 addi AO2, AO, 2 * SIZE 905 fpmr f4, f0 906 addi BO, BO, - 4 * SIZE 907 fpmr f8, f0 908 addi BO2, BO, 2 * SIZE 909 fpmr f12, f0 910#endif 911 912#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 913 sub TEMP, K, KK 914#elif defined(LEFT) 915 addi TEMP, KK, 4 916#else 917 addi TEMP, KK, 4 918#endif 919 920 srawi. TEMP, TEMP, 2 921 fpmr f1, f0 922 fpmr f5, f0 923 fpmr f9, f0 924 mtspr CTR, TEMP 925 fpmr f13, f0 926 ble .L24 927#else 928 addi AO2, AO, 2 * SIZE 929 fpmr f4, f0 930 addi BO, B, - 4 * SIZE 931 fpmr f8, f0 932 addi BO2, B, - 2 * SIZE 933 fpmr f12, f0 934 935 srawi. r0, K, 2 936 fpmr f1, f0 937 fpmr f5, f0 938 fpmr f9, f0 939 mtspr CTR, r0 940 fpmr f13, f0 941 ble .L24 942#endif 943 944 LFPDUX A1, AO, INC4 945 LFPDUX B1, BO, INC4 946 LFPDUX A2, AO2, INC4 947 LFPDUX B2, BO2, INC4 948 LFPDUX A3, AO, INC4 949 LFPDUX B3, BO, INC4 950 LFPDUX A4, AO2, INC4 951 LFPDUX B4, BO2, INC4 952 953 LFPDUX A5, AO, INC4 954 LFPDUX B5, BO, INC4 955 LFPDUX A6, AO2, INC4 956 LFPDUX B6, BO2, INC4 957 LFPDUX A7, AO, INC4 958 LFPDUX A9, BO, INC4 959 LFPDUX A10, BO2, INC4 960 bdz- .L23 961 .align 4 962 963.L22: 964 fxcpmadd f0, B1, A1, f0 965 nop 966 fxcsmadd f4, B1, A1, f4 967 LFPDUX A8, AO2, INC4 968 fxcpmadd f8, B2, A1, f8 969 nop 970 fxcsmadd f12, B2, A1, f12 971 LFPDUX A1, AO, INC4 972 973 fxcpmadd f1, B1, A2, f1 974 nop 975 fxcsmadd f5, B1, A2, f5 976 LFPDUX B1, BO, INC4 977 fxcpmadd f9, B2, A2, f9 978 nop 979 fxcsmadd f13, B2, A2, f13 980 LFPDUX B2, BO2, INC4 981 982 fxcpmadd f0, B3, A3, f0 983 nop 984 fxcsmadd f4, B3, A3, f4 985 LFPDUX A2, AO2, INC4 986 fxcpmadd f8, B4, A3, f8 987 nop 988 fxcsmadd f12, B4, A3, f12 989 LFPDUX A3, AO, INC4 990 991 fxcpmadd f1, B3, A4, f1 992 nop 993 fxcsmadd f5, B3, A4, f5 994 LFPDUX B3, BO, INC4 995 fxcpmadd f9, B4, A4, f9 996 nop 997 fxcsmadd f13, B4, A4, f13 998 LFPDUX B4, BO2, INC4 999 1000 fxcpmadd f0, B5, A5, f0 1001 nop 1002 fxcsmadd f4, B5, A5, f4 1003 LFPDUX A4, AO2, INC4 1004 fxcpmadd f8, B6, A5, f8 1005 nop 1006 fxcsmadd f12, B6, A5, f12 1007 LFPDUX A5, AO, INC4 1008 1009 fxcpmadd f1, B5, A6, f1 1010 nop 1011 fxcsmadd f5, B5, A6, f5 1012 LFPDUX B5, BO, INC4 1013 fxcpmadd f9, B6, A6, f9 1014 nop 1015 fxcsmadd f13, B6, A6, f13 1016 LFPDUX B6, BO2, INC4 1017 1018 fxcpmadd f0, A9, A7, f0 1019 nop 1020 fxcsmadd f4, A9, A7, f4 1021 LFPDUX A6, AO2, INC4 1022 fxcpmadd f8, A10, A7, f8 1023 nop 1024 fxcsmadd f12, A10, A7, f12 1025 LFPDUX A7, AO, INC4 1026 1027 fxcpmadd f1, A9, A8, f1 1028 nop 1029 fxcsmadd f5, A9, A8, f5 1030 LFPDUX A9, BO, INC4 1031 fxcpmadd f9, A10, A8, f9 1032 nop 1033 fxcsmadd f13, A10, A8, f13 1034 LFPDUX A10, BO2, INC4 1035 bdnz+ .L22 1036 .align 4 1037 1038.L23: 1039 fxcpmadd f0, B1, A1, f0 1040 fxcsmadd f4, B1, A1, f4 1041 LFPDUX A8, AO2, INC4 1042 fxcpmadd f8, B2, A1, f8 1043 fxcsmadd f12, B2, A1, f12 1044 1045 fxcpmadd f1, B1, A2, f1 1046 fxcsmadd f5, B1, A2, f5 1047 fxcpmadd f9, B2, A2, f9 1048 fxcsmadd f13, B2, A2, f13 1049 1050 fxcpmadd f0, B3, A3, f0 1051 fxcsmadd f4, B3, A3, f4 1052 fxcpmadd f8, B4, A3, f8 1053 fxcsmadd f12, B4, A3, f12 1054 1055 fxcpmadd f1, B3, A4, f1 1056 fxcsmadd f5, B3, A4, f5 1057 fxcpmadd f9, B4, A4, f9 1058 fxcsmadd f13, B4, A4, f13 1059 1060 fxcpmadd f0, B5, A5, f0 1061 fxcsmadd f4, B5, A5, f4 1062 fxcpmadd f8, B6, A5, f8 1063 fxcsmadd f12, B6, A5, f12 1064 1065 fxcpmadd f1, B5, A6, f1 1066 fxcsmadd f5, B5, A6, f5 1067 fxcpmadd f9, B6, A6, f9 1068 fxcsmadd f13, B6, A6, f13 1069 1070 fxcpmadd f0, A9, A7, f0 1071 fxcsmadd f4, A9, A7, f4 1072 fxcpmadd f8, A10, A7, f8 1073 fxcsmadd f12, A10, A7, f12 1074 1075 fxcpmadd f1, A9, A8, f1 1076 fxcsmadd f5, A9, A8, f5 1077 fxcpmadd f9, A10, A8, f9 1078 fxcsmadd f13, A10, A8, f13 1079 .align 4 1080 1081.L24: 1082 lfd AP, ALPHA(SP) 1083#ifdef TRMMKERNEL 1084 fsmfp AP, AP 1085#endif 1086 1087#if defined(TRMMKERNEL) 1088#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 1089 sub TEMP, K, KK 1090#elif defined(LEFT) 1091 addi TEMP, KK, 4 1092#else 1093 addi TEMP, KK, 4 1094#endif 1095 andi. TEMP, TEMP, 3 1096 mtspr CTR, TEMP 1097#else 1098 andi. r0, K, 3 1099 mtspr CTR, r0 1100#endif 1101 ble+ .L28 1102 1103 LFPDUX A1, AO, INC4 1104 LFPDUX A2, AO2, INC4 1105 LFPDUX B1, BO, INC4 1106 LFPDUX B2, BO2, INC4 1107 bdz- .L27 1108 .align 4 1109 1110.L26: 1111 fxcpmadd f0, B1, A1, f0 1112 fxcsmadd f4, B1, A1, f4 1113 fxcpmadd f8, B2, A1, f8 1114 fxcsmadd f12, B2, A1, f12 1115 LFPDUX A1, AO, INC4 1116 1117 fxcpmadd f1, B1, A2, f1 1118 fxcsmadd f5, B1, A2, f5 1119 LFPDUX B1, BO, INC4 1120 fxcpmadd f9, B2, A2, f9 1121 fxcsmadd f13, B2, A2, f13 1122 LFPDUX A2, AO2, INC4 1123 LFPDUX B2, BO2, INC4 1124 bdnz+ .L26 1125 .align 4 1126 1127.L27: 1128 fxcpmadd f0, B1, A1, f0 1129 fxcsmadd f4, B1, A1, f4 1130 fxcpmadd f8, B2, A1, f8 1131 fxcsmadd f12, B2, A1, f12 1132 1133 fxcpmadd f1, B1, A2, f1 1134 fxcsmadd f5, B1, A2, f5 1135 fxcpmadd f9, B2, A2, f9 1136 fxcsmadd f13, B2, A2, f13 1137 .align 4 1138 1139.L28: 1140#ifndef TRMMKERNEL 1141 LFPDUX A1, CO1, INC2 1142 LFPDUX B1, CO1, INC2 1143 LFPDUX B3, CO2, INC2 1144 LFPDUX A6, CO2, INC2 1145 1146 LFPDUX B5, CO3, INC2 1147 LFPDUX A8, CO3, INC2 1148 LFPDUX A2, CO4, INC2 1149 LFPDUX A4, CO4, INC2 1150 1151 fxcpmadd f0, AP, f0, A1 1152 fxcpmadd f1, AP, f1, B1 1153 fxcpmadd f4, AP, f4, B3 1154 fxcpmadd f5, AP, f5, A6 1155 1156 fxcpmadd f8, AP, f8, B5 1157 fxcpmadd f9, AP, f9, A8 1158 STFPDUX f0, CO1, INCM3 1159 fxcpmadd f12, AP, f12, A2 1160 STFPDUX f1, CO1, INC2 1161 fxcpmadd f13, AP, f13, A4 1162 STFPDUX f4, CO2, INCM3 1163 1164 STFPDUX f5, CO2, INC2 1165 STFPDUX f8, CO3, INCM3 1166 STFPDUX f9, CO3, INC2 1167 STFPDUX f12, CO4, INCM3 1168 STFPDUX f13, CO4, INC2 1169#else 1170 fpmul f0, AP, f0 1171 fpmul f1, AP, f1 1172 fpmul f4, AP, f4 1173 fpmul f5, AP, f5 1174 1175 fpmul f8, AP, f8 1176 fpmul f9, AP, f9 1177 STFPDUX f0, CO1, INC2 1178 fpmul f12, AP, f12 1179 STFPDUX f1, CO1, INC2 1180 fpmul f13, AP, f13 1181 STFPDUX f4, CO2, INC2 1182 1183 STFPDUX f5, CO2, INC2 1184 STFPDUX f8, CO3, INC2 1185 STFPDUX f9, CO3, INC2 1186 STFPDUX f12, CO4, INC2 1187 STFPDUX f13, CO4, INC2 1188#endif 1189 1190 1191#ifdef TRMMKERNEL 1192#if ( defined(LEFT) && defined(TRANSA)) || \ 1193 (!defined(LEFT) && !defined(TRANSA)) 1194 sub TEMP, K, KK 1195#ifdef LEFT 1196 addi TEMP, TEMP, -4 1197#else 1198 addi TEMP, TEMP, -4 1199#endif 1200 slwi r0, TEMP, 2 + BASE_SHIFT 1201 slwi TEMP, TEMP, 2 + BASE_SHIFT 1202 add AO, AO, r0 1203 add BO, BO, TEMP 1204#endif 1205 1206#ifdef LEFT 1207 addi KK, KK, 4 1208#endif 1209#endif 1210 1211 li r0, FZERO 1212 lfpsx f0, SP, r0 1213 .align 4 1214 1215.L30: 1216 andi. I, M, 2 1217 beq .L40 1218 1219#if defined(TRMMKERNEL) 1220#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 1221 addi AO2, AO, 2 * SIZE 1222 fpmr f1, f0 1223 addi BO, B, - 4 * SIZE 1224 fpmr f2, f0 1225 addi BO2, B, - 2 * SIZE 1226 fpmr f3, f0 1227#else 1228 slwi TEMP, KK, 1 + BASE_SHIFT 1229 slwi r0, KK, 2 + BASE_SHIFT 1230 add AO, AO, TEMP 1231 add BO, B, r0 1232 1233 addi AO2, AO, 2 * SIZE 1234 fpmr f1, f0 1235 addi BO, BO, - 4 * SIZE 1236 fpmr f2, f0 1237 addi BO2, BO, 2 * SIZE 1238 fpmr f3, f0 1239#endif 1240 1241#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 1242 sub TEMP, K, KK 1243#elif defined(LEFT) 1244 addi TEMP, KK, 2 1245#else 1246 addi TEMP, KK, 4 1247#endif 1248 1249 srawi. r0, TEMP, 2 1250 mtspr CTR, r0 1251 ble .L34 1252 1253#else 1254 addi AO2, AO, 2 * SIZE 1255 fpmr f1, f0 1256 addi BO, B, - 4 * SIZE 1257 fpmr f2, f0 1258 addi BO2, B, - 2 * SIZE 1259 fpmr f3, f0 1260 1261 srawi. r0, K, 2 1262 mtspr CTR, r0 1263 ble .L34 1264#endif 1265 1266 LFPDUX A1, AO, INC4 1267 LFPDUX B1, BO, INC4 1268 LFPDUX B2, BO2, INC4 1269 LFPDUX A2, AO2, INC4 1270 LFPDUX B3, BO, INC4 1271 LFPDUX B4, BO2, INC4 1272 1273 LFPDUX A3, AO, INC4 1274 LFPDUX A5, BO, INC4 1275 LFPDUX A6, BO2, INC4 1276 LFPDUX A4, AO2, INC4 1277 LFPDUX A7, BO, INC4 1278 LFPDUX A8, BO2, INC4 1279 bdz- .L33 1280 .align 4 1281 1282.L32: 1283 fxcpmadd f0, B1, A1, f0 1284 fxcsmadd f1, B1, A1, f1 1285 LFPDUX B1, BO, INC4 1286 fxcpmadd f2, B2, A1, f2 1287 fxcsmadd f3, B2, A1, f3 1288 LFPDUX B2, BO2, INC4 1289 LFPDUX A1, AO, INC4 1290 1291 fxcpmadd f0, B3, A2, f0 1292 fxcsmadd f1, B3, A2, f1 1293 LFPDUX B3, BO, INC4 1294 fxcpmadd f2, B4, A2, f2 1295 fxcsmadd f3, B4, A2, f3 1296 LFPDUX B4, BO2, INC4 1297 LFPDUX A2, AO2, INC4 1298 1299 fxcpmadd f0, A5, A3, f0 1300 fxcsmadd f1, A5, A3, f1 1301 LFPDUX A5, BO, INC4 1302 fxcpmadd f2, A6, A3, f2 1303 fxcsmadd f3, A6, A3, f3 1304 LFPDUX A6, BO2, INC4 1305 LFPDUX A3, AO, INC4 1306 1307 fxcpmadd f0, A7, A4, f0 1308 fxcsmadd f1, A7, A4, f1 1309 LFPDUX A7, BO, INC4 1310 fxcpmadd f2, A8, A4, f2 1311 fxcsmadd f3, A8, A4, f3 1312 LFPDUX A8, BO2, INC4 1313 LFPDUX A4, AO2, INC4 1314 bdnz+ .L32 1315 .align 4 1316 1317.L33: 1318 fxcpmadd f0, B1, A1, f0 1319 fxcsmadd f1, B1, A1, f1 1320 fxcpmadd f2, B2, A1, f2 1321 fxcsmadd f3, B2, A1, f3 1322 1323 fxcpmadd f0, B3, A2, f0 1324 fxcsmadd f1, B3, A2, f1 1325 fxcpmadd f2, B4, A2, f2 1326 fxcsmadd f3, B4, A2, f3 1327 1328 fxcpmadd f0, A5, A3, f0 1329 fxcsmadd f1, A5, A3, f1 1330 fxcpmadd f2, A6, A3, f2 1331 fxcsmadd f3, A6, A3, f3 1332 1333 fxcpmadd f0, A7, A4, f0 1334 fxcsmadd f1, A7, A4, f1 1335 fxcpmadd f2, A8, A4, f2 1336 fxcsmadd f3, A8, A4, f3 1337 .align 4 1338 1339.L34: 1340 lfd AP, ALPHA(SP) 1341#ifdef TRMMKERNEL 1342 fsmfp AP, AP 1343#endif 1344 1345#if defined(TRMMKERNEL) 1346#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 1347 sub TEMP, K, KK 1348#elif defined(LEFT) 1349 addi TEMP, KK, 2 1350#else 1351 addi TEMP, KK, 4 1352#endif 1353 andi. TEMP, TEMP, 3 1354 mtspr CTR, TEMP 1355#else 1356 andi. r0, K, 3 1357 mtspr CTR, r0 1358#endif 1359 ble+ .L38 1360 1361 LFPDX A1, AO, INC4 1362 LFPDUX B1, BO, INC4 1363 LFPDUX B2, BO2, INC4 1364 add AO, AO, INC2 1365 bdz- .L37 1366 .align 4 1367 1368.L36: 1369 fxcpmadd f0, B1, A1, f0 1370 fxcsmadd f1, B1, A1, f1 1371 LFPDUX B1, BO, INC4 1372 fxcpmadd f2, B2, A1, f2 1373 fxcsmadd f3, B2, A1, f3 1374 LFPDX A1, AO, INC4 1375 LFPDUX B2, BO2, INC4 1376 add AO, AO, INC2 1377 bdnz+ .L36 1378 .align 4 1379 1380.L37: 1381 fxcpmadd f0, B1, A1, f0 1382 fxcsmadd f1, B1, A1, f1 1383 fxcpmadd f2, B2, A1, f2 1384 fxcsmadd f3, B2, A1, f3 1385 .align 4 1386 1387.L38: 1388#ifndef TRMMKERNEL 1389 LFPDX A1, CO1, INC2 1390 LFPDX A2, CO2, INC2 1391 LFPDX A3, CO3, INC2 1392 LFPDX A4, CO4, INC2 1393 1394 fxcpmadd f0, AP, f0, A1 1395 fxcpmadd f1, AP, f1, A2 1396 fxcpmadd f2, AP, f2, A3 1397 fxcpmadd f3, AP, f3, A4 1398#else 1399 fpmul f0, AP, f0 1400 fpmul f1, AP, f1 1401 fpmul f2, AP, f2 1402 fpmul f3, AP, f3 1403#endif 1404 1405 STFPDUX f0, CO1, INC2 1406 STFPDUX f1, CO2, INC2 1407 STFPDUX f2, CO3, INC2 1408 STFPDUX f3, CO4, INC2 1409 1410#ifdef TRMMKERNEL 1411#if ( defined(LEFT) && defined(TRANSA)) || \ 1412 (!defined(LEFT) && !defined(TRANSA)) 1413 sub TEMP, K, KK 1414#ifdef LEFT 1415 addi TEMP, TEMP, -2 1416#else 1417 addi TEMP, TEMP, -4 1418#endif 1419 slwi r0, TEMP, 1 + BASE_SHIFT 1420 slwi TEMP, TEMP, 2 + BASE_SHIFT 1421 add AO, AO, r0 1422 add BO, BO, TEMP 1423#endif 1424 1425#ifdef LEFT 1426 addi KK, KK, 2 1427#endif 1428#endif 1429 1430 li r0, FZERO 1431 lfpsx f0, SP, r0 1432 .align 4 1433 1434.L40: 1435 andi. I, M, 1 1436 beq .L49 1437 1438#if defined(TRMMKERNEL) 1439#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 1440 addi AO2, AO, 2 * SIZE 1441 fpmr f1, f0 1442 addi BO, B, - 4 * SIZE 1443 fpmr f2, f0 1444 addi BO2, B, - 2 * SIZE 1445 fpmr f3, f0 1446#else 1447 slwi TEMP, KK, 0 + BASE_SHIFT 1448 slwi r0, KK, 2 + BASE_SHIFT 1449 add AO, AO, TEMP 1450 add BO, B, r0 1451 1452 addi AO2, AO, 2 * SIZE 1453 fpmr f1, f0 1454 addi BO, BO, - 4 * SIZE 1455 fpmr f2, f0 1456 addi BO2, BO, 2 * SIZE 1457 fpmr f3, f0 1458#endif 1459 1460#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 1461 sub TEMP, K, KK 1462#elif defined(LEFT) 1463 addi TEMP, KK, 1 1464#else 1465 addi TEMP, KK, 4 1466#endif 1467 srawi. r0, TEMP, 3 1468 mtspr CTR, r0 1469 ble .L44 1470 1471#else 1472 addi AO2, AO, 2 * SIZE 1473 fpmr f1, f0 1474 addi BO, B, - 4 * SIZE 1475 fpmr f2, f0 1476 addi BO2, B, - 2 * SIZE 1477 fpmr f3, f0 1478 1479 srawi. r0, K, 3 1480 mtspr CTR, r0 1481 ble .L44 1482#endif 1483 1484 LFPDUX A1, AO, INC4 1485 LFPDUX B1, BO, INC4 1486 LFPDUX B2, BO2, INC4 1487 LFPDUX A2, AO2, INC4 1488 LFPDUX B3, BO, INC4 1489 LFPDUX B4, BO2, INC4 1490 1491 LFPDUX A3, AO, INC4 1492 LFPDUX A5, BO, INC4 1493 LFPDUX A6, BO2, INC4 1494 LFPDUX A4, AO2, INC4 1495 LFPDUX A7, BO, INC4 1496 LFPDUX A8, BO2, INC4 1497 bdz- .L43 1498 .align 4 1499 1500.L42: 1501 fxcpmadd f0, A1, B1, f0 1502 LFPDUX B1, BO, INC4 1503 fxcpmadd f1, A1, B2, f1 1504 LFPDUX B2, BO2, INC4 1505 fxcsmadd f2, A1, B3, f2 1506 LFPDUX B3, BO, INC4 1507 fxcsmadd f3, A1, B4, f3 1508 LFPDUX B4, BO2, INC4 1509 LFPDUX A1, AO, INC4 1510 1511 fxcpmadd f0, A2, A5, f0 1512 LFPDUX A5, BO, INC4 1513 fxcpmadd f1, A2, A6, f1 1514 LFPDUX A6, BO2, INC4 1515 fxcsmadd f2, A2, A7, f2 1516 LFPDUX A7, BO, INC4 1517 fxcsmadd f3, A2, A8, f3 1518 LFPDUX A8, BO2, INC4 1519 LFPDUX A2, AO2, INC4 1520 1521 fxcpmadd f0, A3, B1, f0 1522 LFPDUX B1, BO, INC4 1523 fxcpmadd f1, A3, B2, f1 1524 LFPDUX B2, BO2, INC4 1525 fxcsmadd f2, A3, B3, f2 1526 LFPDUX B3, BO, INC4 1527 fxcsmadd f3, A3, B4, f3 1528 LFPDUX B4, BO2, INC4 1529 LFPDUX A3, AO, INC4 1530 1531 fxcpmadd f0, A4, A5, f0 1532 LFPDUX A5, BO, INC4 1533 fxcpmadd f1, A4, A6, f1 1534 LFPDUX A6, BO2, INC4 1535 fxcsmadd f2, A4, A7, f2 1536 LFPDUX A7, BO, INC4 1537 fxcsmadd f3, A4, A8, f3 1538 LFPDUX A8, BO2, INC4 1539 LFPDUX A4, AO2, INC4 1540 bdnz+ .L42 1541 .align 4 1542 1543.L43: 1544 fxcpmadd f0, A1, B1, f0 1545 LFPDUX B1, BO, INC4 1546 fxcpmadd f1, A1, B2, f1 1547 LFPDUX B2, BO2, INC4 1548 fxcsmadd f2, A1, B3, f2 1549 LFPDUX B3, BO, INC4 1550 fxcsmadd f3, A1, B4, f3 1551 LFPDUX B4, BO2, INC4 1552 1553 fxcpmadd f0, A2, A5, f0 1554 LFPDUX A5, BO, INC4 1555 fxcpmadd f1, A2, A6, f1 1556 LFPDUX A6, BO2, INC4 1557 fxcsmadd f2, A2, A7, f2 1558 LFPDUX A7, BO, INC4 1559 fxcsmadd f3, A2, A8, f3 1560 LFPDUX A8, BO2, INC4 1561 1562 fxcpmadd f0, A3, B1, f0 1563 fxcpmadd f1, A3, B2, f1 1564 fxcsmadd f2, A3, B3, f2 1565 fxcsmadd f3, A3, B4, f3 1566 1567 fxcpmadd f0, A4, A5, f0 1568 fxcpmadd f1, A4, A6, f1 1569 fxcsmadd f2, A4, A7, f2 1570 fxcsmadd f3, A4, A8, f3 1571 .align 4 1572 1573.L44: 1574 lfd AP, ALPHA(SP) 1575#ifdef TRMMKERNEL 1576 fsmfp AP, AP 1577#endif 1578 1579#if defined(TRMMKERNEL) 1580#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 1581 sub TEMP, K, KK 1582#elif defined(LEFT) 1583 addi TEMP, KK, 1 1584#else 1585 addi TEMP, KK, 4 1586#endif 1587 andi. TEMP, TEMP, 7 1588 mtspr CTR, TEMP 1589#else 1590 andi. r0, K, 7 1591 mtspr CTR, r0 1592#endif 1593 ble+ .L48 1594 1595 LFDX A1, AO, INC4 1596 LFPDUX B1, BO, INC4 1597 LFPDUX B2, BO2, INC4 1598 add AO, AO, INC 1599 bdz- .L47 1600 .align 4 1601 1602.L46: 1603 fxcpmadd f0, A1, B1, f0 1604 LFPDUX B1, BO, INC4 1605 fxcpmadd f1, A1, B2, f1 1606 LFDX A1, AO, INC4 1607 LFPDUX B2, BO2, INC4 1608 add AO, AO, INC 1609 bdnz+ .L46 1610 .align 4 1611 1612.L47: 1613 fxcpmadd f0, A1, B1, f0 1614 fxcpmadd f1, A1, B2, f1 1615 .align 4 1616 1617.L48: 1618#ifndef TRMMKERNEL 1619 LFDX A1, CO1, INC2 1620 LFDX A2, CO2, INC2 1621 LFDX A3, CO3, INC2 1622 LFDX A4, CO4, INC2 1623 1624 fpadd f0, f0, f2 1625 fpadd f1, f1, f3 1626 1627 fsmfp A1, A2 1628 fsmfp A3, A4 1629 1630 fxcpmadd f0, AP, f0, A1 1631 fxcpmadd f1, AP, f1, A3 1632#else 1633 fpadd f0, f0, f2 1634 fpadd f1, f1, f3 1635 1636 fpmul f0, AP, f0 1637 fpmul f1, AP, f1 1638#endif 1639 1640 STFDX f0, CO1, INC2 1641 STFSDX f0, CO2, INC2 1642 STFDX f1, CO3, INC2 1643 STFSDX f1, CO4, INC2 1644 1645#ifdef TRMMKERNEL 1646#if ( defined(LEFT) && defined(TRANSA)) || \ 1647 (!defined(LEFT) && !defined(TRANSA)) 1648 sub TEMP, K, KK 1649#ifdef LEFT 1650 addi TEMP, TEMP, -1 1651#else 1652 addi TEMP, TEMP, -4 1653#endif 1654 slwi r0, TEMP, 0 + BASE_SHIFT 1655 slwi TEMP, TEMP, 2 + BASE_SHIFT 1656 add AO, AO, r0 1657 add BO, BO, TEMP 1658#endif 1659 1660#ifdef LEFT 1661 addi KK, KK, 1 1662#endif 1663#endif 1664 .align 4 1665 1666.L49: 1667#if defined(TRMMKERNEL) && !defined(LEFT) 1668 addi KK, KK, 4 1669#endif 1670 1671 addi B, BO, 4 * SIZE 1672 1673 addic. J, J, -1 1674 bgt+ .L10 1675 .align 4 1676 1677.L50: 1678 andi. J, N, 2 1679 beq .L90 1680 1681 mr CO1, C 1682 add CO2, C, LDC 1683 add C, CO2, LDC 1684 1685#if defined(TRMMKERNEL) && defined(LEFT) 1686 mr KK, OFFSET 1687#endif 1688 1689 addi AO, A, -2 * SIZE 1690 1691 li r0, FZERO 1692 lfpsx f0, SP, r0 1693 1694 srawi. I, M, 3 1695 ble .L60 1696 .align 4 1697 1698.L51: 1699#if defined(TRMMKERNEL) 1700#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 1701 fpmr f4, f0 1702 addi BO, B, - 2 * SIZE 1703 fpmr f1, f0 1704 fpmr f5, f0 1705 fpmr f2, f0 1706 fpmr f6, f0 1707#else 1708 slwi TEMP, KK, 3 + BASE_SHIFT 1709 slwi r0, KK, 1 + BASE_SHIFT 1710 add AO, AO, TEMP 1711 add BO, B, r0 1712 1713 fpmr f4, f0 1714 addi BO, BO, - 2 * SIZE 1715 fpmr f1, f0 1716 fpmr f5, f0 1717 fpmr f2, f0 1718 fpmr f6, f0 1719#endif 1720 1721 1722#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 1723 sub TEMP, K, KK 1724#elif defined(LEFT) 1725 addi TEMP, KK, 8 1726#else 1727 addi TEMP, KK, 2 1728#endif 1729 srawi. r0, TEMP, 2 1730 fpmr f3, f0 1731 mtspr CTR, r0 1732 fpmr f7, f0 1733 ble .L54 1734#else 1735 fpmr f4, f0 1736 addi BO, B, - 2 * SIZE 1737 fpmr f1, f0 1738 fpmr f5, f0 1739 fpmr f2, f0 1740 fpmr f6, f0 1741 1742 srawi. r0, K, 2 1743 fpmr f3, f0 1744 mtspr CTR, r0 1745 fpmr f7, f0 1746 ble .L54 1747#endif 1748 1749 LFPDUX B1, BO, INC2 1750 LFPDUX A1, AO, INC2 1751 LFPDUX A2, AO, INC2 1752 LFPDUX B2, BO, INC2 1753 LFPDUX A3, AO, INC2 1754 LFPDUX A4, AO, INC2 1755 1756 LFPDUX B3, BO, INC2 1757 LFPDUX A5, AO, INC2 1758 LFPDUX A6, AO, INC2 1759 LFPDUX A7, AO, INC2 1760 LFPDUX A8, AO, INC2 1761 bdz- .L53 1762 .align 4 1763 1764.L52: 1765 fxcpmadd f0, B1, A1, f0 1766 LFPDUX B4, BO, INC2 1767 fxcsmadd f4, B1, A1, f4 1768 LFPDUX A1, AO, INC2 1769 fxcpmadd f1, B1, A2, f1 1770 nop 1771 fxcsmadd f5, B1, A2, f5 1772 LFPDUX A2, AO, INC2 1773 1774 fxcpmadd f2, B1, A3, f2 1775 nop 1776 fxcsmadd f6, B1, A3, f6 1777 LFPDUX A3, AO, INC2 1778 fxcpmadd f3, B1, A4, f3 1779 nop 1780 fxcsmadd f7, B1, A4, f7 1781 LFPDUX A4, AO, INC2 1782 1783 fxcpmadd f0, B2, A5, f0 1784 LFPDUX B1, BO, INC2 1785 fxcsmadd f4, B2, A5, f4 1786 LFPDUX A5, AO, INC2 1787 fxcpmadd f1, B2, A6, f1 1788 nop 1789 fxcsmadd f5, B2, A6, f5 1790 LFPDUX A6, AO, INC2 1791 1792 fxcpmadd f2, B2, A7, f2 1793 nop 1794 fxcsmadd f6, B2, A7, f6 1795 LFPDUX A7, AO, INC2 1796 fxcpmadd f3, B2, A8, f3 1797 nop 1798 fxcsmadd f7, B2, A8, f7 1799 LFPDUX A8, AO, INC2 1800 1801 fxcpmadd f0, B3, A1, f0 1802 LFPDUX B2, BO, INC2 1803 fxcsmadd f4, B3, A1, f4 1804 LFPDUX A1, AO, INC2 1805 fxcpmadd f1, B3, A2, f1 1806 nop 1807 fxcsmadd f5, B3, A2, f5 1808 LFPDUX A2, AO, INC2 1809 1810 fxcpmadd f2, B3, A3, f2 1811 nop 1812 fxcsmadd f6, B3, A3, f6 1813 LFPDUX A3, AO, INC2 1814 fxcpmadd f3, B3, A4, f3 1815 nop 1816 fxcsmadd f7, B3, A4, f7 1817 LFPDUX A4, AO, INC2 1818 1819 fxcpmadd f0, B4, A5, f0 1820 LFPDUX B3, BO, INC2 1821 fxcsmadd f4, B4, A5, f4 1822 LFPDUX A5, AO, INC2 1823 fxcpmadd f1, B4, A6, f1 1824 nop 1825 fxcsmadd f5, B4, A6, f5 1826 LFPDUX A6, AO, INC2 1827 1828 fxcpmadd f2, B4, A7, f2 1829 nop 1830 fxcsmadd f6, B4, A7, f6 1831 LFPDUX A7, AO, INC2 1832 fxcpmadd f3, B4, A8, f3 1833 nop 1834 fxcsmadd f7, B4, A8, f7 1835 LFPDUX A8, AO, INC2 1836 bdnz+ .L52 1837 .align 4 1838 1839.L53: 1840 fxcpmadd f0, B1, A1, f0 1841 LFPDUX B4, BO, INC2 1842 fxcsmadd f4, B1, A1, f4 1843 LFPDUX A1, AO, INC2 1844 fxcpmadd f1, B1, A2, f1 1845 nop 1846 fxcsmadd f5, B1, A2, f5 1847 LFPDUX A2, AO, INC2 1848 1849 fxcpmadd f2, B1, A3, f2 1850 nop 1851 fxcsmadd f6, B1, A3, f6 1852 LFPDUX A3, AO, INC2 1853 fxcpmadd f3, B1, A4, f3 1854 nop 1855 fxcsmadd f7, B1, A4, f7 1856 LFPDUX A4, AO, INC2 1857 1858 fxcpmadd f0, B2, A5, f0 1859 nop 1860 fxcsmadd f4, B2, A5, f4 1861 LFPDUX A5, AO, INC2 1862 fxcpmadd f1, B2, A6, f1 1863 nop 1864 fxcsmadd f5, B2, A6, f5 1865 LFPDUX A6, AO, INC2 1866 1867 fxcpmadd f2, B2, A7, f2 1868 nop 1869 fxcsmadd f6, B2, A7, f6 1870 LFPDUX A7, AO, INC2 1871 fxcpmadd f3, B2, A8, f3 1872 nop 1873 fxcsmadd f7, B2, A8, f7 1874 LFPDUX A8, AO, INC2 1875 1876 fxcpmadd f0, B3, A1, f0 1877 fxcsmadd f4, B3, A1, f4 1878 fxcpmadd f1, B3, A2, f1 1879 fxcsmadd f5, B3, A2, f5 1880 1881 fxcpmadd f2, B3, A3, f2 1882 fxcsmadd f6, B3, A3, f6 1883 fxcpmadd f3, B3, A4, f3 1884 fxcsmadd f7, B3, A4, f7 1885 1886 fxcpmadd f0, B4, A5, f0 1887 fxcsmadd f4, B4, A5, f4 1888 fxcpmadd f1, B4, A6, f1 1889 fxcsmadd f5, B4, A6, f5 1890 1891 fxcpmadd f2, B4, A7, f2 1892 fxcsmadd f6, B4, A7, f6 1893 fxcpmadd f3, B4, A8, f3 1894 fxcsmadd f7, B4, A8, f7 1895 .align 4 1896 1897.L54: 1898 lfd AP, ALPHA(SP) 1899#ifdef TRMMKERNEL 1900 fsmfp AP, AP 1901#endif 1902 1903#if defined(TRMMKERNEL) 1904#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 1905 sub TEMP, K, KK 1906#elif defined(LEFT) 1907 addi TEMP, KK, 8 1908#else 1909 addi TEMP, KK, 2 1910#endif 1911 andi. TEMP, TEMP, 3 1912 mtspr CTR, TEMP 1913#else 1914 andi. r0, K, 3 1915 mtspr CTR, r0 1916#endif 1917 ble+ .L58 1918 1919 LFPDUX A1, AO, INC2 1920 LFPDUX B1, BO, INC2 1921 LFPDUX A2, AO, INC2 1922 LFPDUX A3, AO, INC2 1923 LFPDUX A4, AO, INC2 1924 bdz- .L57 1925 .align 4 1926 1927.L56: 1928 fxcpmadd f0, B1, A1, f0 1929 fxcsmadd f4, B1, A1, f4 1930 LFPDUX A1, AO, INC2 1931 fxcpmadd f1, B1, A2, f1 1932 fxcsmadd f5, B1, A2, f5 1933 LFPDUX A2, AO, INC2 1934 1935 fxcpmadd f2, B1, A3, f2 1936 fxcsmadd f6, B1, A3, f6 1937 LFPDUX A3, AO, INC2 1938 fxcpmadd f3, B1, A4, f3 1939 fxcsmadd f7, B1, A4, f7 1940 LFPDUX A4, AO, INC2 1941 LFPDUX B1, BO, INC2 1942 bdnz+ .L56 1943 .align 4 1944 1945.L57: 1946 fxcpmadd f0, B1, A1, f0 1947 fxcsmadd f4, B1, A1, f4 1948 fxcpmadd f1, B1, A2, f1 1949 fxcsmadd f5, B1, A2, f5 1950 1951 fxcpmadd f2, B1, A3, f2 1952 fxcsmadd f6, B1, A3, f6 1953 fxcpmadd f3, B1, A4, f3 1954 fxcsmadd f7, B1, A4, f7 1955 .align 4 1956 1957.L58: 1958#ifndef TRMMKERNEL 1959 LFPDUX A1, CO1, INC2 1960 LFPDUX B1, CO1, INC2 1961 LFPDUX A3, CO1, INC2 1962 LFPDUX A5, CO1, INC2 1963 1964 LFPDUX B3, CO2, INC2 1965 LFPDUX A6, CO2, INC2 1966 LFPDUX A7, CO2, INC2 1967 LFPDUX B2, CO2, INC2 1968 1969 fxcpmadd f0, AP, f0, A1 1970 fxcpmadd f1, AP, f1, B1 1971 fxcpmadd f2, AP, f2, A3 1972 fxcpmadd f3, AP, f3, A5 1973 1974 fxcpmadd f4, AP, f4, B3 1975 fxcpmadd f5, AP, f5, A6 1976 STFPDUX f0, CO1, INCM7 1977 fxcpmadd f6, AP, f6, A7 1978 STFPDUX f1, CO1, INC2 1979 fxcpmadd f7, AP, f7, B2 1980 STFPDUX f2, CO1, INC2 1981 STFPDUX f3, CO1, INC2 1982 STFPDUX f4, CO2, INCM7 1983 1984 STFPDUX f5, CO2, INC2 1985 STFPDUX f6, CO2, INC2 1986 STFPDUX f7, CO2, INC2 1987#else 1988 fpmul f0, AP, f0 1989 fpmul f1, AP, f1 1990 fpmul f2, AP, f2 1991 fpmul f3, AP, f3 1992 1993 fpmul f4, AP, f4 1994 fpmul f5, AP, f5 1995 STFPDUX f0, CO1, INC2 1996 fpmul f6, AP, f6 1997 STFPDUX f1, CO1, INC2 1998 fpmul f7, AP, f7 1999 STFPDUX f2, CO1, INC2 2000 STFPDUX f3, CO1, INC2 2001 STFPDUX f4, CO2, INC2 2002 2003 STFPDUX f5, CO2, INC2 2004 STFPDUX f6, CO2, INC2 2005 STFPDUX f7, CO2, INC2 2006#endif 2007 2008 2009#ifdef TRMMKERNEL 2010#if ( defined(LEFT) && defined(TRANSA)) || \ 2011 (!defined(LEFT) && !defined(TRANSA)) 2012 sub TEMP, K, KK 2013#ifdef LEFT 2014 addi TEMP, TEMP, -8 2015#else 2016 addi TEMP, TEMP, -2 2017#endif 2018 slwi r0, TEMP, 3 + BASE_SHIFT 2019 slwi TEMP, TEMP, 1 + BASE_SHIFT 2020 add AO, AO, r0 2021 add BO, BO, TEMP 2022#endif 2023 2024#ifdef LEFT 2025 addi KK, KK, 8 2026#endif 2027#endif 2028 2029 addic. I, I, -1 2030 li r0, FZERO 2031 2032 lfpsx f0, SP, r0 2033 bgt+ .L51 2034 .align 4 2035 2036.L60: 2037 andi. I, M, 4 2038 beq .L70 2039 2040#if defined(TRMMKERNEL) 2041#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 2042 addi BO, B, - 2 * SIZE 2043 fpmr f1, f0 2044#else 2045 slwi TEMP, KK, 2 + BASE_SHIFT 2046 slwi r0, KK, 1 + BASE_SHIFT 2047 add AO, AO, TEMP 2048 add BO, B, r0 2049 2050 addi BO, BO, - 2 * SIZE 2051 fpmr f1, f0 2052#endif 2053 2054#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 2055 sub TEMP, K, KK 2056#elif defined(LEFT) 2057 addi TEMP, KK, 4 2058#else 2059 addi TEMP, KK, 2 2060#endif 2061 fpmr f2, f0 2062 srawi. r0, TEMP, 2 2063 mtspr CTR, r0 2064 fpmr f3, f0 2065 ble .L64 2066#else 2067 srawi. r0, K, 2 2068 fpmr f1, f0 2069 addi BO, B, - 2 * SIZE 2070 fpmr f2, f0 2071 mtspr CTR, r0 2072 fpmr f3, f0 2073 ble .L64 2074#endif 2075 2076 LFPDUX B1, BO, INC2 2077 LFPDUX A1, AO, INC2 2078 LFPDUX A2, AO, INC2 2079 LFPDUX B2, BO, INC2 2080 LFPDUX A3, AO, INC2 2081 LFPDUX A4, AO, INC2 2082 2083 LFPDUX B3, BO, INC2 2084 LFPDUX A5, AO, INC2 2085 LFPDUX A6, AO, INC2 2086 LFPDUX B4, BO, INC2 2087 LFPDUX A7, AO, INC2 2088 LFPDUX A8, AO, INC2 2089 bdz- .L63 2090 .align 4 2091 2092.L62: 2093 fxcpmadd f0, B1, A1, f0 2094 fxcsmadd f2, B1, A1, f2 2095 LFPDUX A1, AO, INC2 2096 fxcpmadd f1, B1, A2, f1 2097 fxcsmadd f3, B1, A2, f3 2098 LFPDUX A2, AO, INC2 2099 LFPDUX B1, BO, INC2 2100 2101 fxcpmadd f0, B2, A3, f0 2102 fxcsmadd f2, B2, A3, f2 2103 LFPDUX A3, AO, INC2 2104 fxcpmadd f1, B2, A4, f1 2105 fxcsmadd f3, B2, A4, f3 2106 LFPDUX A4, AO, INC2 2107 LFPDUX B2, BO, INC2 2108 2109 fxcpmadd f0, B3, A5, f0 2110 fxcsmadd f2, B3, A5, f2 2111 LFPDUX A5, AO, INC2 2112 fxcpmadd f1, B3, A6, f1 2113 fxcsmadd f3, B3, A6, f3 2114 LFPDUX A6, AO, INC2 2115 LFPDUX B3, BO, INC2 2116 2117 fxcpmadd f0, B4, A7, f0 2118 fxcsmadd f2, B4, A7, f2 2119 LFPDUX A7, AO, INC2 2120 fxcpmadd f1, B4, A8, f1 2121 fxcsmadd f3, B4, A8, f3 2122 LFPDUX A8, AO, INC2 2123 LFPDUX B4, BO, INC2 2124 bdnz+ .L62 2125 .align 4 2126 2127.L63: 2128 fxcpmadd f0, B1, A1, f0 2129 fxcsmadd f2, B1, A1, f2 2130 fxcpmadd f1, B1, A2, f1 2131 fxcsmadd f3, B1, A2, f3 2132 2133 fxcpmadd f0, B2, A3, f0 2134 fxcsmadd f2, B2, A3, f2 2135 fxcpmadd f1, B2, A4, f1 2136 fxcsmadd f3, B2, A4, f3 2137 2138 fxcpmadd f0, B3, A5, f0 2139 fxcsmadd f2, B3, A5, f2 2140 fxcpmadd f1, B3, A6, f1 2141 fxcsmadd f3, B3, A6, f3 2142 2143 fxcpmadd f0, B4, A7, f0 2144 fxcsmadd f2, B4, A7, f2 2145 fxcpmadd f1, B4, A8, f1 2146 fxcsmadd f3, B4, A8, f3 2147 .align 4 2148 2149.L64: 2150 lfd AP, ALPHA(SP) 2151#ifdef TRMMKERNEL 2152 fsmfp AP, AP 2153#endif 2154 2155#if defined(TRMMKERNEL) 2156#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 2157 sub TEMP, K, KK 2158#elif defined(LEFT) 2159 addi TEMP, KK, 4 2160#else 2161 addi TEMP, KK, 2 2162#endif 2163 andi. TEMP, TEMP, 3 2164 mtspr CTR, TEMP 2165#else 2166 andi. r0, K, 3 2167 mtspr CTR, r0 2168#endif 2169 ble+ .L68 2170 2171 LFPDUX A1, AO, INC2 2172 LFPDUX B1, BO, INC2 2173 LFPDUX A2, AO, INC2 2174 bdz- .L67 2175 .align 4 2176 2177.L66: 2178 fxcpmadd f0, B1, A1, f0 2179 fxcsmadd f2, B1, A1, f2 2180 LFPDUX A1, AO, INC2 2181 fxcpmadd f1, B1, A2, f1 2182 fxcsmadd f3, B1, A2, f3 2183 LFPDUX B1, BO, INC2 2184 LFPDUX A2, AO, INC2 2185 bdnz+ .L66 2186 .align 4 2187 2188.L67: 2189 fxcpmadd f0, B1, A1, f0 2190 fxcsmadd f2, B1, A1, f2 2191 fxcpmadd f1, B1, A2, f1 2192 fxcsmadd f3, B1, A2, f3 2193 .align 4 2194 2195.L68: 2196#ifndef TRMMKERNEL 2197 LFPDUX A1, CO1, INC2 2198 LFPDUX A2, CO1, INC2 2199 LFPDUX A3, CO2, INC2 2200 LFPDUX A4, CO2, INC2 2201 2202 fxcpmadd f0, AP, f0, A1 2203 fxcpmadd f1, AP, f1, A2 2204 fxcpmadd f2, AP, f2, A3 2205 fxcpmadd f3, AP, f3, A4 2206 2207 STFPDUX f0, CO1, INCM3 2208 STFPDUX f1, CO1, INC2 2209 STFPDUX f2, CO2, INCM3 2210 STFPDUX f3, CO2, INC2 2211#else 2212 fpmul f0, AP, f0 2213 fpmul f1, AP, f1 2214 fpmul f2, AP, f2 2215 fpmul f3, AP, f3 2216 2217 STFPDUX f0, CO1, INC2 2218 STFPDUX f1, CO1, INC2 2219 STFPDUX f2, CO2, INC2 2220 STFPDUX f3, CO2, INC2 2221#endif 2222 2223#ifdef TRMMKERNEL 2224#if ( defined(LEFT) && defined(TRANSA)) || \ 2225 (!defined(LEFT) && !defined(TRANSA)) 2226 sub TEMP, K, KK 2227#ifdef LEFT 2228 addi TEMP, TEMP, -4 2229#else 2230 addi TEMP, TEMP, -2 2231#endif 2232 slwi r0, TEMP, 2 + BASE_SHIFT 2233 slwi TEMP, TEMP, 1 + BASE_SHIFT 2234 add AO, AO, r0 2235 add BO, BO, TEMP 2236#endif 2237 2238#ifdef LEFT 2239 addi KK, KK, 4 2240#endif 2241#endif 2242 2243 li r0, FZERO 2244 lfpsx f0, SP, r0 2245 .align 4 2246 2247.L70: 2248 andi. I, M, 2 2249 beq .L80 2250 2251#if defined(TRMMKERNEL) 2252#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 2253 addi BO, B, - 2 * SIZE 2254 fpmr f1, f0 2255#else 2256 slwi TEMP, KK, 1 + BASE_SHIFT 2257 slwi r0, KK, 1 + BASE_SHIFT 2258 add AO, AO, TEMP 2259 add BO, B, r0 2260 2261 addi BO, BO, - 2 * SIZE 2262 fpmr f1, f0 2263#endif 2264 2265#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 2266 sub TEMP, K, KK 2267#elif defined(LEFT) 2268 addi TEMP, KK, 2 2269#else 2270 addi TEMP, KK, 2 2271#endif 2272 srawi. r0, TEMP, 3 2273 fpmr f2, f0 2274 mtspr CTR, r0 2275 fpmr f3, f0 2276 ble .L74 2277#else 2278 addi BO, B, - 2 * SIZE 2279 fpmr f1, f0 2280 2281 srawi. r0, K, 3 2282 fpmr f2, f0 2283 mtspr CTR, r0 2284 fpmr f3, f0 2285 ble .L74 2286#endif 2287 2288 2289 LFPDUX A1, AO, INC2 2290 LFPDUX B1, BO, INC2 2291 LFPDUX A2, AO, INC2 2292 LFPDUX B2, BO, INC2 2293 LFPDUX A3, AO, INC2 2294 LFPDUX B3, BO, INC2 2295 LFPDUX A4, AO, INC2 2296 LFPDUX B4, BO, INC2 2297 2298 LFPDUX A5, AO, INC2 2299 LFPDUX B5, BO, INC2 2300 LFPDUX A6, AO, INC2 2301 LFPDUX B6, BO, INC2 2302 LFPDUX A7, AO, INC2 2303 LFPDUX A9, BO, INC2 2304 LFPDUX A8, AO, INC2 2305 LFPDUX A10, BO, INC2 2306 bdz- .L73 2307 .align 4 2308 2309.L72: 2310 fxcpmadd f0, B1, A1, f0 2311 fxcsmadd f1, B1, A1, f1 2312 LFPDUX A1, AO, INC2 2313 LFPDUX B1, BO, INC2 2314 fxcpmadd f2, B2, A2, f2 2315 fxcsmadd f3, B2, A2, f3 2316 LFPDUX A2, AO, INC2 2317 LFPDUX B2, BO, INC2 2318 2319 fxcpmadd f0, B3, A3, f0 2320 fxcsmadd f1, B3, A3, f1 2321 LFPDUX A3, AO, INC2 2322 LFPDUX B3, BO, INC2 2323 fxcpmadd f2, B4, A4, f2 2324 fxcsmadd f3, B4, A4, f3 2325 LFPDUX A4, AO, INC2 2326 LFPDUX B4, BO, INC2 2327 2328 fxcpmadd f0, B5, A5, f0 2329 fxcsmadd f1, B5, A5, f1 2330 LFPDUX A5, AO, INC2 2331 LFPDUX B5, BO, INC2 2332 fxcpmadd f2, B6, A6, f2 2333 fxcsmadd f3, B6, A6, f3 2334 LFPDUX A6, AO, INC2 2335 LFPDUX B6, BO, INC2 2336 2337 fxcpmadd f0, A9, A7, f0 2338 fxcsmadd f1, A9, A7, f1 2339 LFPDUX A7, AO, INC2 2340 LFPDUX A9, BO, INC2 2341 fxcpmadd f2, A10, A8, f2 2342 fxcsmadd f3, A10, A8, f3 2343 LFPDUX A8, AO, INC2 2344 LFPDUX A10, BO, INC2 2345 bdnz+ .L72 2346 .align 4 2347 2348.L73: 2349 fxcpmadd f0, B1, A1, f0 2350 fxcsmadd f1, B1, A1, f1 2351 fxcpmadd f2, B2, A2, f2 2352 fxcsmadd f3, B2, A2, f3 2353 2354 fxcpmadd f0, B3, A3, f0 2355 fxcsmadd f1, B3, A3, f1 2356 fxcpmadd f2, B4, A4, f2 2357 fxcsmadd f3, B4, A4, f3 2358 2359 fxcpmadd f0, B5, A5, f0 2360 fxcsmadd f1, B5, A5, f1 2361 fxcpmadd f2, B6, A6, f2 2362 fxcsmadd f3, B6, A6, f3 2363 2364 fxcpmadd f0, A9, A7, f0 2365 fxcsmadd f1, A9, A7, f1 2366 fxcpmadd f2, A10, A8, f2 2367 fxcsmadd f3, A10, A8, f3 2368 .align 4 2369 2370.L74: 2371 lfd AP, ALPHA(SP) 2372#ifdef TRMMKERNEL 2373 fsmfp AP, AP 2374#endif 2375 2376#if defined(TRMMKERNEL) 2377#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 2378 sub TEMP, K, KK 2379#elif defined(LEFT) 2380 addi TEMP, KK, 2 2381#else 2382 addi TEMP, KK, 2 2383#endif 2384 andi. TEMP, TEMP, 7 2385 mtspr CTR, TEMP 2386#else 2387 andi. r0, K, 7 2388 mtspr CTR, r0 2389#endif 2390 ble+ .L78 2391 2392 LFPDUX A1, AO, INC2 2393 LFPDUX B1, BO, INC2 2394 bdz- .L77 2395 .align 4 2396 2397.L76: 2398 fxcpmadd f0, B1, A1, f0 2399 fxcsmadd f1, B1, A1, f1 2400 LFPDUX A1, AO, INC2 2401 LFPDUX B1, BO, INC2 2402 bdnz+ .L76 2403 .align 4 2404 2405.L77: 2406 fxcpmadd f0, B1, A1, f0 2407 fxcsmadd f1, B1, A1, f1 2408 .align 4 2409 2410.L78: 2411#ifndef TRMMKERNEL 2412 LFPDX A1, CO1, INC2 2413 LFPDX B3, CO2, INC2 2414 2415 fpadd f0, f0, f2 2416 fpadd f1, f1, f3 2417 2418 fxcpmadd f0, AP, f0, A1 2419 fxcpmadd f1, AP, f1, B3 2420#else 2421 fpadd f0, f0, f2 2422 fpadd f1, f1, f3 2423 2424 fpmul f0, AP, f0 2425 fpmul f1, AP, f1 2426#endif 2427 2428 STFPDUX f0, CO1, INC2 2429 STFPDUX f1, CO2, INC2 2430 2431#ifdef TRMMKERNEL 2432#if ( defined(LEFT) && defined(TRANSA)) || \ 2433 (!defined(LEFT) && !defined(TRANSA)) 2434 sub TEMP, K, KK 2435#ifdef LEFT 2436 addi TEMP, TEMP, -2 2437#else 2438 addi TEMP, TEMP, -2 2439#endif 2440 slwi r0, TEMP, 1 + BASE_SHIFT 2441 slwi TEMP, TEMP, 1 + BASE_SHIFT 2442 add AO, AO, r0 2443 add BO, BO, TEMP 2444#endif 2445 2446#ifdef LEFT 2447 addi KK, KK, 2 2448#endif 2449#endif 2450 2451 li r0, FZERO 2452 lfpsx f0, SP, r0 2453 .align 4 2454 2455.L80: 2456 andi. I, M, 1 2457 beq .L89 2458 2459#if defined(TRMMKERNEL) 2460#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 2461 addi BO, B, - 2 * SIZE 2462 fpmr f1, f0 2463 fpmr f2, f0 2464 fpmr f3, f0 2465#else 2466 slwi TEMP, KK, 0 + BASE_SHIFT 2467 slwi r0, KK, 1 + BASE_SHIFT 2468 add AO, AO, TEMP 2469 add BO, B, r0 2470 2471 addi BO, BO, - 2 * SIZE 2472 fpmr f1, f0 2473 fpmr f2, f0 2474 fpmr f3, f0 2475#endif 2476 2477#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 2478 sub TEMP, K, KK 2479#elif defined(LEFT) 2480 addi TEMP, KK, 1 2481#else 2482 addi TEMP, KK, 2 2483#endif 2484 srawi. r0, TEMP, 3 2485 mtspr CTR, r0 2486 ble .L84 2487#else 2488 addi BO, B, - 2 * SIZE 2489 fpmr f1, f0 2490 fpmr f2, f0 2491 fpmr f3, f0 2492 2493 srawi. r0, K, 3 2494 mtspr CTR, r0 2495 ble .L84 2496#endif 2497 2498 LFPDUX B1, BO, INC2 2499 LFPDUX A1, AO, INC2 2500 LFPDUX A2, AO, INC2 2501 2502 LFPDUX B2, BO, INC2 2503 LFPDUX A3, AO, INC2 2504 LFPDUX A4, AO, INC2 2505 2506 LFPDUX B3, BO, INC2 2507 LFPDUX B4, BO, INC2 2508 bdz- .L83 2509 .align 4 2510 2511.L82: 2512 fxcpmadd f0, A1, B1, f0 2513 LFPDUX B1, BO, INC2 2514 fxcsmadd f1, A1, B2, f1 2515 LFPDUX B2, BO, INC2 2516 LFPDUX A1, AO, INC2 2517 fxcpmadd f2, A2, B3, f2 2518 LFPDUX B3, BO, INC2 2519 fxcsmadd f3, A2, B4, f3 2520 LFPDUX B4, BO, INC2 2521 LFPDUX A2, AO, INC2 2522 2523 fxcpmadd f0, A3, B1, f0 2524 LFPDUX B1, BO, INC2 2525 fxcsmadd f1, A3, B2, f1 2526 LFPDUX B2, BO, INC2 2527 LFPDUX A3, AO, INC2 2528 fxcpmadd f2, A4, B3, f2 2529 LFPDUX B3, BO, INC2 2530 fxcsmadd f3, A4, B4, f3 2531 LFPDUX B4, BO, INC2 2532 LFPDUX A4, AO, INC2 2533 bdnz+ .L82 2534 .align 4 2535 2536.L83: 2537 fxcpmadd f0, A1, B1, f0 2538 LFPDUX B1, BO, INC2 2539 fxcsmadd f1, A1, B2, f1 2540 LFPDUX B2, BO, INC2 2541 fxcpmadd f2, A2, B3, f2 2542 LFPDUX B3, BO, INC2 2543 fxcsmadd f3, A2, B4, f3 2544 LFPDUX B4, BO, INC2 2545 2546 fxcpmadd f0, A3, B1, f0 2547 fxcsmadd f1, A3, B2, f1 2548 fxcpmadd f2, A4, B3, f2 2549 fxcsmadd f3, A4, B4, f3 2550 .align 4 2551 2552.L84: 2553 lfd AP, ALPHA(SP) 2554#ifdef TRMMKERNEL 2555 fsmfp AP, AP 2556#endif 2557 2558#if defined(TRMMKERNEL) 2559#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 2560 sub TEMP, K, KK 2561#elif defined(LEFT) 2562 addi TEMP, KK, 1 2563#else 2564 addi TEMP, KK, 2 2565#endif 2566 andi. TEMP, TEMP, 7 2567 mtspr CTR, TEMP 2568#else 2569 andi. r0, K, 7 2570 mtspr CTR, r0 2571#endif 2572 ble+ .L88 2573 2574 LFDX A1, AO, INC2 2575 LFPDUX B1, BO, INC2 2576 add AO, AO, INC 2577 bdz- .L87 2578 .align 4 2579 2580.L86: 2581 fxcpmadd f0, A1, B1, f0 2582 LFDX A1, AO, INC2 2583 LFPDUX B1, BO, INC2 2584 add AO, AO, INC 2585 bdnz+ .L86 2586 .align 4 2587 2588.L87: 2589 fxcpmadd f0, A1, B1, f0 2590 .align 4 2591 2592.L88: 2593#ifndef TRMMKERNEL 2594 LFDX A1, CO1, INC2 2595 LFDX A2, CO2, INC2 2596 2597 fpadd f0, f0, f1 2598 fpadd f2, f2, f3 2599 fsmfp A1, A2 2600 fpadd f0, f0, f2 2601 fxcpmadd f0, AP, f0, A1 2602#else 2603 fpadd f0, f0, f1 2604 fpadd f2, f2, f3 2605 fsmfp A1, A2 2606 fpadd f0, f0, f2 2607 fpmul f0, AP, f0 2608#endif 2609 2610 STFDX f0, CO1, INC2 2611 STFSDX f0, CO2, INC2 2612 2613#ifdef TRMMKERNEL 2614#if ( defined(LEFT) && defined(TRANSA)) || \ 2615 (!defined(LEFT) && !defined(TRANSA)) 2616 sub TEMP, K, KK 2617#ifdef LEFT 2618 addi TEMP, TEMP, -1 2619#else 2620 addi TEMP, TEMP, -2 2621#endif 2622 slwi r0, TEMP, 0 + BASE_SHIFT 2623 slwi TEMP, TEMP, 1 + BASE_SHIFT 2624 add AO, AO, r0 2625 add BO, BO, TEMP 2626#endif 2627 2628#ifdef LEFT 2629 addi KK, KK, 1 2630#endif 2631#endif 2632 .align 4 2633 2634.L89: 2635#if defined(TRMMKERNEL) && !defined(LEFT) 2636 addi KK, KK, 2 2637#endif 2638 2639 addi B, BO, 2 * SIZE 2640 .align 4 2641 2642.L90: 2643 andi. J, N, 1 2644 beq .L999 2645 2646#if defined(TRMMKERNEL) && defined(LEFT) 2647 mr KK, OFFSET 2648#endif 2649 2650 mr CO1, C 2651 addi AO, A, -2 * SIZE 2652 2653 li r0, FZERO 2654 lfpsx f0, SP, r0 2655 2656 srawi. I, M, 3 2657 ble .L100 2658 .align 4 2659 2660.L91: 2661#if defined(TRMMKERNEL) 2662#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 2663 addi BO, B, - 2 * SIZE 2664 fpmr f1, f0 2665#else 2666 slwi TEMP, KK, 3 + BASE_SHIFT 2667 slwi r0, KK, 0 + BASE_SHIFT 2668 add AO, AO, TEMP 2669 add BO, B, r0 2670 2671 addi BO, BO, - 2 * SIZE 2672 fpmr f1, f0 2673#endif 2674 2675#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 2676 sub TEMP, K, KK 2677#elif defined(LEFT) 2678 addi TEMP, KK, 8 2679#else 2680 addi TEMP, KK, 1 2681#endif 2682 fpmr f2, f0 2683 srawi. r0, TEMP, 2 2684 fpmr f3, f0 2685 mtspr CTR, r0 2686 ble .L94 2687 2688#else 2689 srawi. r0, K, 2 2690 fpmr f1, f0 2691 addi BO, B, - 2 * SIZE 2692 fpmr f2, f0 2693 fpmr f3, f0 2694 mtspr CTR, r0 2695 ble .L94 2696#endif 2697 2698 LFPDUX B1, BO, INC2 2699 LFPDUX A1, AO, INC2 2700 LFPDUX A2, AO, INC2 2701 LFPDUX A3, AO, INC2 2702 LFPDUX A4, AO, INC2 2703 LFPDUX B2, BO, INC2 2704 LFPDUX A5, AO, INC2 2705 LFPDUX A6, AO, INC2 2706 LFPDUX A7, AO, INC2 2707 LFPDUX A8, AO, INC2 2708 bdz- .L93 2709 .align 4 2710 2711.L92: 2712 fxcpmadd f0, B1, A1, f0 2713 LFPDUX A1, AO, INC2 2714 fxcpmadd f1, B1, A2, f1 2715 LFPDUX A2, AO, INC2 2716 fxcpmadd f2, B1, A3, f2 2717 LFPDUX A3, AO, INC2 2718 fxcpmadd f3, B1, A4, f3 2719 LFPDUX A4, AO, INC2 2720 2721 fxcsmadd f0, B1, A5, f0 2722 LFPDUX A5, AO, INC2 2723 fxcsmadd f1, B1, A6, f1 2724 LFPDUX A6, AO, INC2 2725 fxcsmadd f2, B1, A7, f2 2726 LFPDUX A7, AO, INC2 2727 fxcsmadd f3, B1, A8, f3 2728 LFPDUX A8, AO, INC2 2729 LFPDUX B1, BO, INC2 2730 2731 fxcpmadd f0, B2, A1, f0 2732 LFPDUX A1, AO, INC2 2733 fxcpmadd f1, B2, A2, f1 2734 LFPDUX A2, AO, INC2 2735 fxcpmadd f2, B2, A3, f2 2736 LFPDUX A3, AO, INC2 2737 fxcpmadd f3, B2, A4, f3 2738 LFPDUX A4, AO, INC2 2739 2740 fxcsmadd f0, B2, A5, f0 2741 LFPDUX A5, AO, INC2 2742 fxcsmadd f1, B2, A6, f1 2743 LFPDUX A6, AO, INC2 2744 fxcsmadd f2, B2, A7, f2 2745 LFPDUX A7, AO, INC2 2746 fxcsmadd f3, B2, A8, f3 2747 LFPDUX A8, AO, INC2 2748 LFPDUX B2, BO, INC2 2749 bdnz+ .L92 2750 .align 4 2751 2752.L93: 2753 fxcpmadd f0, B1, A1, f0 2754 LFPDUX A1, AO, INC2 2755 fxcpmadd f1, B1, A2, f1 2756 LFPDUX A2, AO, INC2 2757 fxcpmadd f2, B1, A3, f2 2758 LFPDUX A3, AO, INC2 2759 fxcpmadd f3, B1, A4, f3 2760 LFPDUX A4, AO, INC2 2761 2762 fxcsmadd f0, B1, A5, f0 2763 LFPDUX A5, AO, INC2 2764 fxcsmadd f1, B1, A6, f1 2765 LFPDUX A6, AO, INC2 2766 fxcsmadd f2, B1, A7, f2 2767 LFPDUX A7, AO, INC2 2768 fxcsmadd f3, B1, A8, f3 2769 LFPDUX A8, AO, INC2 2770 2771 fxcpmadd f0, B2, A1, f0 2772 fxcpmadd f1, B2, A2, f1 2773 fxcpmadd f2, B2, A3, f2 2774 fxcpmadd f3, B2, A4, f3 2775 2776 fxcsmadd f0, B2, A5, f0 2777 fxcsmadd f1, B2, A6, f1 2778 fxcsmadd f2, B2, A7, f2 2779 fxcsmadd f3, B2, A8, f3 2780 .align 4 2781 2782.L94: 2783 lfd AP, ALPHA(SP) 2784#ifdef TRMMKERNEL 2785 fsmfp AP, AP 2786#endif 2787 2788#if defined(TRMMKERNEL) 2789#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 2790 sub TEMP, K, KK 2791#elif defined(LEFT) 2792 addi TEMP, KK, 8 2793#else 2794 addi TEMP, KK, 1 2795#endif 2796 andi. TEMP, TEMP, 3 2797 mtspr CTR, TEMP 2798#else 2799 andi. r0, K, 3 2800 mtspr CTR, r0 2801#endif 2802 ble+ .L98 2803 2804 LFDX B1, BO, INC2 2805 LFPDUX A1, AO, INC2 2806 LFPDUX A2, AO, INC2 2807 LFPDUX A3, AO, INC2 2808 LFPDUX A4, AO, INC2 2809 add BO, BO, INC 2810 bdz- .L97 2811 .align 4 2812 2813.L96: 2814 fxcpmadd f0, B1, A1, f0 2815 LFPDUX A1, AO, INC2 2816 fxcpmadd f1, B1, A2, f1 2817 LFPDUX A2, AO, INC2 2818 fxcpmadd f2, B1, A3, f2 2819 LFPDUX A3, AO, INC2 2820 fxcpmadd f3, B1, A4, f3 2821 LFDX B1, BO, INC2 2822 LFPDUX A4, AO, INC2 2823 add BO, BO, INC 2824 bdnz+ .L96 2825 .align 4 2826 2827.L97: 2828 fxcpmadd f0, B1, A1, f0 2829 fxcpmadd f1, B1, A2, f1 2830 fxcpmadd f2, B1, A3, f2 2831 fxcpmadd f3, B1, A4, f3 2832 .align 4 2833 2834.L98: 2835#ifndef TRMMKERNEL 2836 LFPDUX A1, CO1, INC2 2837 LFPDUX B1, CO1, INC2 2838 LFPDUX A3, CO1, INC2 2839 LFPDUX A5, CO1, INC2 2840 2841 fxcpmadd f0, AP, f0, A1 2842 fxcpmadd f1, AP, f1, B1 2843 fxcpmadd f2, AP, f2, A3 2844 fxcpmadd f3, AP, f3, A5 2845 2846 STFPDUX f0, CO1, INCM7 2847 STFPDUX f1, CO1, INC2 2848 STFPDUX f2, CO1, INC2 2849 STFPDUX f3, CO1, INC2 2850#else 2851 fpmul f0, AP, f0 2852 fpmul f1, AP, f1 2853 fpmul f2, AP, f2 2854 fpmul f3, AP, f3 2855 2856 STFPDUX f0, CO1, INC2 2857 STFPDUX f1, CO1, INC2 2858 STFPDUX f2, CO1, INC2 2859 STFPDUX f3, CO1, INC2 2860#endif 2861 2862#ifdef TRMMKERNEL 2863#if ( defined(LEFT) && defined(TRANSA)) || \ 2864 (!defined(LEFT) && !defined(TRANSA)) 2865 sub TEMP, K, KK 2866#ifdef LEFT 2867 addi TEMP, TEMP, -8 2868#else 2869 addi TEMP, TEMP, -1 2870#endif 2871 slwi r0, TEMP, 3 + BASE_SHIFT 2872 slwi TEMP, TEMP, 0 + BASE_SHIFT 2873 add AO, AO, r0 2874 add BO, BO, TEMP 2875#endif 2876 2877#ifdef LEFT 2878 addi KK, KK, 8 2879#endif 2880#endif 2881 2882 addic. I, I, -1 2883 li r0, FZERO 2884 2885 lfpsx f0, SP, r0 2886 bgt+ .L91 2887 .align 4 2888 2889.L100: 2890 andi. I, M, 4 2891 beq .L110 2892 2893#if defined(TRMMKERNEL) 2894#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 2895 addi BO, B, - 2 * SIZE 2896 fpmr f1, f0 2897 fpmr f2, f0 2898 fpmr f3, f0 2899#else 2900 slwi TEMP, KK, 2 + BASE_SHIFT 2901 slwi r0, KK, 0 + BASE_SHIFT 2902 add AO, AO, TEMP 2903 add BO, B, r0 2904 2905 fpmr f1, f0 2906 addi BO, BO, - 2 * SIZE 2907 fpmr f2, f0 2908 fpmr f3, f0 2909#endif 2910 2911#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 2912 sub TEMP, K, KK 2913#elif defined(LEFT) 2914 addi TEMP, KK, 4 2915#else 2916 addi TEMP, KK, 1 2917#endif 2918 srawi. r0, TEMP, 3 2919 mtspr CTR, r0 2920 ble .L104 2921#else 2922 addi BO, B, - 2 * SIZE 2923 fpmr f1, f0 2924 fpmr f2, f0 2925 fpmr f3, f0 2926 2927 srawi. r0, K, 3 2928 mtspr CTR, r0 2929 ble .L104 2930#endif 2931 2932 LFPDUX B1, BO, INC2 2933 LFPDUX A1, AO, INC2 2934 LFPDUX A2, AO, INC2 2935 LFPDUX A3, AO, INC2 2936 LFPDUX A4, AO, INC2 2937 LFPDUX B2, BO, INC2 2938 LFPDUX A5, AO, INC2 2939 LFPDUX A6, AO, INC2 2940 LFPDUX A7, AO, INC2 2941 LFPDUX A8, AO, INC2 2942 LFPDUX B3, BO, INC2 2943 LFPDUX B4, BO, INC2 2944 2945 bdz- .L103 2946 .align 4 2947 2948.L102: 2949 fxcpmadd f0, B1, A1, f0 2950 LFPDUX A1, AO, INC2 2951 fxcpmadd f1, B1, A2, f1 2952 LFPDUX A2, AO, INC2 2953 fxcsmadd f2, B1, A3, f2 2954 LFPDUX A3, AO, INC2 2955 fxcsmadd f3, B1, A4, f3 2956 LFPDUX A4, AO, INC2 2957 LFPDUX B1, BO, INC2 2958 2959 fxcpmadd f0, B2, A5, f0 2960 LFPDUX A5, AO, INC2 2961 fxcpmadd f1, B2, A6, f1 2962 LFPDUX A6, AO, INC2 2963 fxcsmadd f2, B2, A7, f2 2964 LFPDUX A7, AO, INC2 2965 fxcsmadd f3, B2, A8, f3 2966 LFPDUX A8, AO, INC2 2967 LFPDUX B2, BO, INC2 2968 2969 fxcpmadd f0, B3, A1, f0 2970 LFPDUX A1, AO, INC2 2971 fxcpmadd f1, B3, A2, f1 2972 LFPDUX A2, AO, INC2 2973 fxcsmadd f2, B3, A3, f2 2974 LFPDUX A3, AO, INC2 2975 fxcsmadd f3, B3, A4, f3 2976 LFPDUX A4, AO, INC2 2977 LFPDUX B3, BO, INC2 2978 2979 fxcpmadd f0, B4, A5, f0 2980 LFPDUX A5, AO, INC2 2981 fxcpmadd f1, B4, A6, f1 2982 LFPDUX A6, AO, INC2 2983 fxcsmadd f2, B4, A7, f2 2984 LFPDUX A7, AO, INC2 2985 fxcsmadd f3, B4, A8, f3 2986 LFPDUX A8, AO, INC2 2987 LFPDUX B4, BO, INC2 2988 bdnz+ .L102 2989 .align 4 2990 2991.L103: 2992 fxcpmadd f0, B1, A1, f0 2993 LFPDUX A1, AO, INC2 2994 fxcpmadd f1, B1, A2, f1 2995 LFPDUX A2, AO, INC2 2996 fxcsmadd f2, B1, A3, f2 2997 LFPDUX A3, AO, INC2 2998 fxcsmadd f3, B1, A4, f3 2999 LFPDUX A4, AO, INC2 3000 3001 fxcpmadd f0, B2, A5, f0 3002 LFPDUX A5, AO, INC2 3003 fxcpmadd f1, B2, A6, f1 3004 LFPDUX A6, AO, INC2 3005 fxcsmadd f2, B2, A7, f2 3006 LFPDUX A7, AO, INC2 3007 fxcsmadd f3, B2, A8, f3 3008 LFPDUX A8, AO, INC2 3009 3010 fxcpmadd f0, B3, A1, f0 3011 fxcpmadd f1, B3, A2, f1 3012 fxcsmadd f2, B3, A3, f2 3013 fxcsmadd f3, B3, A4, f3 3014 3015 fxcpmadd f0, B4, A5, f0 3016 fxcpmadd f1, B4, A6, f1 3017 fxcsmadd f2, B4, A7, f2 3018 fxcsmadd f3, B4, A8, f3 3019 .align 4 3020 3021.L104: 3022 lfd AP, ALPHA(SP) 3023#ifdef TRMMKERNEL 3024 fsmfp AP, AP 3025#endif 3026 3027#if defined(TRMMKERNEL) 3028#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 3029 sub TEMP, K, KK 3030#elif defined(LEFT) 3031 addi TEMP, KK, 4 3032#else 3033 addi TEMP, KK, 1 3034#endif 3035 andi. TEMP, TEMP, 7 3036 mtspr CTR, TEMP 3037#else 3038 andi. r0, K, 7 3039 mtspr CTR, r0 3040#endif 3041 ble+ .L108 3042 3043 LFPDUX A1, AO, INC2 3044 LFDX B1, BO, INC2 3045 LFPDUX A2, AO, INC2 3046 add BO, BO, INC 3047 bdz- .L107 3048 .align 4 3049 3050.L106: 3051 fxcpmadd f0, B1, A1, f0 3052 LFPDUX A1, AO, INC2 3053 fxcpmadd f1, B1, A2, f1 3054 LFDX B1, BO, INC2 3055 LFPDUX A2, AO, INC2 3056 add BO, BO, INC 3057 bdnz+ .L106 3058 .align 4 3059 3060.L107: 3061 fxcpmadd f0, B1, A1, f0 3062 fxcpmadd f1, B1, A2, f1 3063 .align 4 3064 3065.L108: 3066#ifndef TRMMKERNEL 3067 LFPDUX A1, CO1, INC2 3068 LFPDUX B1, CO1, INC2 3069 3070 fpadd f0, f0, f2 3071 fpadd f1, f1, f3 3072 3073 fxcpmadd f0, AP, f0, A1 3074 fxcpmadd f1, AP, f1, B1 3075 3076 STFPDUX f0, CO1, INCM3 3077 STFPDUX f1, CO1, INC2 3078#else 3079 fpadd f0, f0, f2 3080 fpadd f1, f1, f3 3081 3082 fpmul f0, AP, f0 3083 fpmul f1, AP, f1 3084 3085 STFPDUX f0, CO1, INC2 3086 STFPDUX f1, CO1, INC2 3087#endif 3088 3089 3090#ifdef TRMMKERNEL 3091#if ( defined(LEFT) && defined(TRANSA)) || \ 3092 (!defined(LEFT) && !defined(TRANSA)) 3093 sub TEMP, K, KK 3094#ifdef LEFT 3095 addi TEMP, TEMP, -4 3096#else 3097 addi TEMP, TEMP, -1 3098#endif 3099 slwi r0, TEMP, 2 + BASE_SHIFT 3100 slwi TEMP, TEMP, 0 + BASE_SHIFT 3101 add AO, AO, r0 3102 add BO, BO, TEMP 3103#endif 3104 3105#ifdef LEFT 3106 addi KK, KK, 4 3107#endif 3108#endif 3109 3110 li r0, FZERO 3111 lfpsx f0, SP, r0 3112 .align 4 3113 3114.L110: 3115 andi. I, M, 2 3116 beq .L120 3117 3118#if defined(TRMMKERNEL) 3119#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 3120 addi BO, B, - 2 * SIZE 3121 fpmr f1, f0 3122 fpmr f2, f0 3123 fpmr f3, f0 3124#else 3125 slwi TEMP, KK, 1 + BASE_SHIFT 3126 slwi r0, KK, 0 + BASE_SHIFT 3127 add AO, AO, TEMP 3128 add BO, B, r0 3129 3130 fpmr f1, f0 3131 addi BO, BO, - 2 * SIZE 3132 fpmr f2, f0 3133 fpmr f3, f0 3134#endif 3135 3136#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 3137 sub TEMP, K, KK 3138#elif defined(LEFT) 3139 addi TEMP, KK, 2 3140#else 3141 addi TEMP, KK, 1 3142#endif 3143 srawi. r0, TEMP, 3 3144 mtspr CTR, r0 3145 ble .L114 3146#else 3147 addi BO, B, - 2 * SIZE 3148 fpmr f1, f0 3149 fpmr f2, f0 3150 fpmr f3, f0 3151 3152 srawi. r0, K, 3 3153 mtspr CTR, r0 3154 ble .L114 3155#endif 3156 3157 LFPDUX A1, AO, INC2 3158 LFPDUX A2, AO, INC2 3159 LFPDUX B1, BO, INC2 3160 3161 LFPDUX A3, AO, INC2 3162 LFPDUX A4, AO, INC2 3163 LFPDUX B2, BO, INC2 3164 3165 LFPDUX A5, AO, INC2 3166 LFPDUX A6, AO, INC2 3167 LFPDUX B3, BO, INC2 3168 3169 LFPDUX A7, AO, INC2 3170 LFPDUX A8, AO, INC2 3171 LFPDUX B4, BO, INC2 3172 bdz- .L113 3173 .align 4 3174 3175.L112: 3176 fxcpmadd f0, B1, A1, f0 3177 LFPDUX A1, AO, INC2 3178 fxcsmadd f1, B1, A2, f1 3179 LFPDUX A2, AO, INC2 3180 LFPDUX B1, BO, INC2 3181 fxcpmadd f2, B2, A3, f2 3182 LFPDUX A3, AO, INC2 3183 fxcsmadd f3, B2, A4, f3 3184 LFPDUX A4, AO, INC2 3185 LFPDUX B2, BO, INC2 3186 fxcpmadd f0, B3, A5, f0 3187 LFPDUX A5, AO, INC2 3188 fxcsmadd f1, B3, A6, f1 3189 LFPDUX A6, AO, INC2 3190 LFPDUX B3, BO, INC2 3191 fxcpmadd f2, B4, A7, f2 3192 LFPDUX A7, AO, INC2 3193 fxcsmadd f3, B4, A8, f3 3194 LFPDUX A8, AO, INC2 3195 LFPDUX B4, BO, INC2 3196 bdnz+ .L112 3197 .align 4 3198 3199.L113: 3200 fxcpmadd f0, B1, A1, f0 3201 fxcsmadd f1, B1, A2, f1 3202 fxcpmadd f2, B2, A3, f2 3203 fxcsmadd f3, B2, A4, f3 3204 fxcpmadd f0, B3, A5, f0 3205 fxcsmadd f1, B3, A6, f1 3206 fxcpmadd f2, B4, A7, f2 3207 fxcsmadd f3, B4, A8, f3 3208 .align 4 3209 3210.L114: 3211 lfd AP, ALPHA(SP) 3212#ifdef TRMMKERNEL 3213 fsmfp AP, AP 3214#endif 3215 3216#if defined(TRMMKERNEL) 3217#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 3218 sub TEMP, K, KK 3219#elif defined(LEFT) 3220 addi TEMP, KK, 2 3221#else 3222 addi TEMP, KK, 1 3223#endif 3224 andi. TEMP, TEMP, 7 3225 mtspr CTR, TEMP 3226#else 3227 andi. r0, K, 7 3228 mtspr CTR, r0 3229#endif 3230 ble+ .L118 3231 3232 LFPDUX A1, AO, INC2 3233 LFDX B1, BO, INC2 3234 add BO, BO, INC 3235 bdz- .L117 3236 .align 4 3237 3238.L116: 3239 fxcpmadd f0, B1, A1, f0 3240 LFPDUX A1, AO, INC2 3241 LFDX B1, BO, INC2 3242 add BO, BO, INC 3243 bdnz+ .L116 3244 .align 4 3245 3246.L117: 3247 fxcpmadd f0, B1, A1, f0 3248 .align 4 3249 3250.L118: 3251#ifndef TRMMKERNEL 3252 LFPDX A1, CO1, INC2 3253 3254 fpadd f0, f0, f1 3255 fpadd f2, f3, f2 3256 fpadd f0, f0, f2 3257 fxcpmadd f1, AP, f0, A1 3258 3259 li r0, FZERO 3260 lfpsx f0, SP, r0 3261 3262 STFPDUX f1, CO1, INC2 3263#else 3264 fpadd f0, f0, f1 3265 fpadd f2, f3, f2 3266 fpadd f0, f0, f2 3267 fpmul f1, AP, f0 3268 3269 li r0, FZERO 3270 lfpsx f0, SP, r0 3271 3272 STFPDUX f1, CO1, INC2 3273#endif 3274 3275 3276#ifdef TRMMKERNEL 3277#if ( defined(LEFT) && defined(TRANSA)) || \ 3278 (!defined(LEFT) && !defined(TRANSA)) 3279 sub TEMP, K, KK 3280#ifdef LEFT 3281 addi TEMP, TEMP, -2 3282#else 3283 addi TEMP, TEMP, -1 3284#endif 3285 slwi r0, TEMP, 1 + BASE_SHIFT 3286 slwi TEMP, TEMP, 0 + BASE_SHIFT 3287 add AO, AO, r0 3288 add BO, BO, TEMP 3289#endif 3290 3291#ifdef LEFT 3292 addi KK, KK, 2 3293#endif 3294#endif 3295 .align 4 3296 3297.L120: 3298 andi. I, M, 1 3299 beq .L999 3300 3301#if defined(TRMMKERNEL) 3302#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 3303 addi BO, B, - 2 * SIZE 3304 fpmr f1, f0 3305 fpmr f2, f0 3306 fpmr f3, f0 3307#else 3308 slwi TEMP, KK, 0 + BASE_SHIFT 3309 slwi r0, KK, 0 + BASE_SHIFT 3310 add AO, AO, TEMP 3311 add BO, B, r0 3312 3313 fpmr f1, f0 3314 addi BO, BO, - 2 * SIZE 3315 fpmr f2, f0 3316 fpmr f3, f0 3317#endif 3318 3319#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 3320 sub TEMP, K, KK 3321#elif defined(LEFT) 3322 addi TEMP, KK, 1 3323#else 3324 addi TEMP, KK, 1 3325#endif 3326 srawi. r0, TEMP, 3 3327 mtspr CTR, r0 3328 ble .L124 3329#else 3330 addi BO, B, - 2 * SIZE 3331 fpmr f1, f0 3332 fpmr f2, f0 3333 fpmr f3, f0 3334 3335 srawi. r0, K, 3 3336 mtspr CTR, r0 3337 ble .L124 3338#endif 3339 3340 LFPDUX A1, AO, INC2 3341 LFPDUX B1, BO, INC2 3342 LFPDUX A2, AO, INC2 3343 LFPDUX B2, BO, INC2 3344 LFPDUX A3, AO, INC2 3345 LFPDUX B3, BO, INC2 3346 LFPDUX A4, AO, INC2 3347 LFPDUX B4, BO, INC2 3348 bdz- .L123 3349 .align 4 3350 3351.L122: 3352 fpmadd f0, A1, B1, f0 3353 LFPDUX A1, AO, INC2 3354 LFPDUX B1, BO, INC2 3355 fpmadd f1, A2, B2, f1 3356 LFPDUX A2, AO, INC2 3357 LFPDUX B2, BO, INC2 3358 fpmadd f2, A3, B3, f2 3359 LFPDUX A3, AO, INC2 3360 LFPDUX B3, BO, INC2 3361 fpmadd f3, A4, B4, f3 3362 LFPDUX A4, AO, INC2 3363 LFPDUX B4, BO, INC2 3364 bdnz+ .L122 3365 .align 4 3366 3367.L123: 3368 fpmadd f0, A1, B1, f0 3369 fpmadd f1, A2, B2, f1 3370 fpmadd f2, A3, B3, f2 3371 fpmadd f3, A4, B4, f3 3372 .align 4 3373 3374.L124: 3375 lfd AP, ALPHA(SP) 3376#ifdef TRMMKERNEL 3377 fsmfp AP, AP 3378#endif 3379 3380#if defined(TRMMKERNEL) 3381#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 3382 sub TEMP, K, KK 3383#elif defined(LEFT) 3384 addi TEMP, KK, 1 3385#else 3386 addi TEMP, KK, 1 3387#endif 3388 andi. TEMP, TEMP, 7 3389 mtspr CTR, TEMP 3390#else 3391 andi. r0, K, 7 3392 mtspr CTR, r0 3393#endif 3394 ble+ .L128 3395 3396 LFDX A1, AO, INC2 3397 LFDX B1, BO, INC2 3398 add AO, AO, INC 3399 add BO, BO, INC 3400 bdz- .L127 3401 .align 4 3402 3403.L126: 3404 fmadd f0, A1, B1, f0 3405 LFDX A1, AO, INC2 3406 LFDX B1, BO, INC2 3407 add AO, AO, INC 3408 add BO, BO, INC 3409 bdnz+ .L126 3410 .align 4 3411 3412.L127: 3413 fmadd f0, A1, B1, f0 3414 .align 4 3415 3416.L128: 3417#ifndef TRMMKERNEL 3418 LFDX A1, CO1, INC2 3419 fpadd f0, f0, f1 3420 fpadd f2, f2, f3 3421 fpadd f0, f0, f2 3422 fsmtp f1, f0 3423 fadd f0, f0, f1 3424 fmadd f0, AP, f0, A1 3425#else 3426 fpadd f0, f0, f1 3427 fpadd f2, f2, f3 3428 fpadd f0, f0, f2 3429 fsmtp f1, f0 3430 fadd f0, f0, f1 3431 fpmul f0, AP, f0 3432#endif 3433 STFDUX f0, CO1, INC2 3434 .align 4 3435 3436.L999: 3437 addi SP, SP, 12 3438 3439 lwzu r14, 4(SP) 3440 lwzu r15, 4(SP) 3441 3442 lwzu r16, 4(SP) 3443 lwzu r17, 4(SP) 3444 lwzu r18, 4(SP) 3445 lwzu r19, 4(SP) 3446 3447 lwzu r20, 4(SP) 3448 lwzu r21, 4(SP) 3449 lwzu r22, 4(SP) 3450 lwzu r23, 4(SP) 3451 3452 lwzu r24, 4(SP) 3453 lwzu r25, 4(SP) 3454 lwzu r26, 4(SP) 3455 lwzu r27, 4(SP) 3456 3457 lwzu r28, 4(SP) 3458 lwzu r29, 4(SP) 3459 lwzu r30, 4(SP) 3460 lwzu r31, 4(SP) 3461 3462 subi SP, SP, 12 3463 li r0, 16 3464 3465 lfpdux f31, SP, r0 3466 lfpdux f30, SP, r0 3467 lfpdux f29, SP, r0 3468 lfpdux f28, SP, r0 3469 lfpdux f27, SP, r0 3470 lfpdux f26, SP, r0 3471 lfpdux f25, SP, r0 3472 lfpdux f24, SP, r0 3473 lfpdux f23, SP, r0 3474 lfpdux f22, SP, r0 3475 lfpdux f21, SP, r0 3476 lfpdux f20, SP, r0 3477 lfpdux f19, SP, r0 3478 lfpdux f18, SP, r0 3479 lfpdux f17, SP, r0 3480 lfpdux f16, SP, r0 3481 lfpdux f15, SP, r0 3482 lfpdux f14, SP, r0 3483 addi SP, SP, 16 3484 blr 3485 .align 4 3486 3487.L1000: 3488 li INCM1, -1 * SIZE 3489 li INCM3, -3 * SIZE 3490 li INCM5, -5 * SIZE 3491 li INCM7, -7 * SIZE 3492 3493 addi C, C, - 1 * SIZE 3494 srawi. J, N, 2 3495 ble .L1050 3496 .align 4 3497 3498.L1010: 3499 mr CO1, C 3500 add CO2, C, LDC 3501 add CO3, CO2, LDC 3502 add CO4, CO3, LDC 3503 add C, CO4, LDC 3504 3505#if defined(TRMMKERNEL) && defined(LEFT) 3506 mr KK, OFFSET 3507#endif 3508 3509 addi AO, A, -4 * SIZE 3510 3511 li r0, FZERO 3512 lfpsx f0, SP, r0 3513 3514 srawi. I, M, 3 3515 ble .L1020 3516 .align 4 3517 3518.L1011: 3519#if defined(TRMMKERNEL) 3520#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 3521 addi AO2, AO, 2 * SIZE 3522 fpmr f4, f0 3523 addi BO, B, - 4 * SIZE 3524 fpmr f8, f0 3525 addi BO2, B, - 2 * SIZE 3526 fpmr f12, f0 3527#else 3528 slwi TEMP, KK, 3 + BASE_SHIFT 3529 slwi r0, KK, 2 + BASE_SHIFT 3530 add AO, AO, TEMP 3531 add BO, B, r0 3532 3533 addi AO2, AO, 2 * SIZE 3534 fpmr f4, f0 3535 addi BO, BO, - 4 * SIZE 3536 fpmr f8, f0 3537 addi BO2, BO, 2 * SIZE 3538 fpmr f12, f0 3539#endif 3540 3541 3542#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 3543 sub TEMP, K, KK 3544#elif defined(LEFT) 3545 addi TEMP, KK, 8 3546#else 3547 addi TEMP, KK, 4 3548#endif 3549 srawi. TEMP, TEMP, 2 3550 fpmr f1, f0 3551 mtspr CTR, TEMP 3552 ble .L1014 3553 3554#else 3555 addi AO2, AO, 2 * SIZE 3556 fpmr f4, f0 3557 addi BO, B, - 4 * SIZE 3558 fpmr f8, f0 3559 addi BO2, B, - 2 * SIZE 3560 fpmr f12, f0 3561 3562 srawi. r0, K, 2 3563 fpmr f1, f0 3564 mtspr CTR, r0 3565 ble .L1014 3566#endif 3567 3568 LFPDUX A1, AO, INC4 3569 fpmr f5, f0 3570 LFPDUX A3, AO, INC4 3571 fpmr f9, f0 3572 LFPDUX B1, BO, INC4 3573 fpmr f13, f0 3574 3575 LFPDUX A5, AO, INC4 3576 fpmr f2, f0 3577 LFPDUX A6, AO, INC4 3578 fpmr f6, f0 3579 LFPDUX B3, BO, INC4 3580 fpmr f10, f0 3581 LFPDUX A7, AO, INC4 3582 fpmr f14, f0 3583 3584 LFPDUX A8, AO, INC4 3585 fpmr f3, f0 3586 LFPDUX B5, BO, INC4 3587 fpmr f7, f0 3588 LFPDUX A9, AO, INC4 3589 fpmr f11, f0 3590 LFPDUX A2, AO2, INC4 3591 fpmr f15, f0 3592 LFPDUX B2, BO2, INC4 3593 bdz- .L1013 3594 .align 4 3595 3596.L1012: 3597 3598## 1 ## 3599 fxcpmadd f0, B1, A1, f0 3600 nop 3601 fxcsmadd f4, B1, A1, f4 3602 nop 3603 fxcpmadd f8, B2, A1, f8 3604 LFPDUX B4, BO2, INC4 3605 fxcsmadd f12, B2, A1, f12 3606 LFPDUX B6, BO, INC4 3607 3608 fxcpmadd f1, B1, A2, f1 3609 nop 3610 fxcsmadd f5, B1, A2, f5 3611 LFPDUX A4, AO2, INC4 3612 fxcpmadd f9, B2, A2, f9 3613 LFPDUX A10, AO, INC4 3614 fxcsmadd f13, B2, A2, f13 3615 nop 3616 3617 fxcpmadd f2, B1, A3, f2 3618 nop 3619 fxcsmadd f6, B1, A3, f6 3620 nop 3621 fxcpmadd f10, B2, A3, f10 3622 nop 3623 fxcsmadd f14, B2, A3, f14 3624 nop 3625 3626 fxcpmadd f3, B1, A4, f3 3627 nop 3628 fxcsmadd f7, B1, A4, f7 3629 LFPDUX A2, AO2, INC4 3630 fxcpmadd f11, B2, A4, f11 3631 LFPDUX A1, AO, INC4 3632 fxcsmadd f15, B2, A4, f15 3633 nop 3634 3635## 2 ## 3636 3637 fxcpmadd f0, B3, A5, f0 3638 nop 3639 fxcsmadd f4, B3, A5, f4 3640 nop 3641 fxcpmadd f8, B4, A5, f8 3642 LFPDUX B2, BO2, INC4 3643 fxcsmadd f12, B4, A5, f12 3644 LFPDUX B1, BO, INC4 3645 3646 fxcpmadd f1, B3, A2, f1 3647 nop 3648 fxcsmadd f5, B3, A2, f5 3649 LFPDUX A4, AO2, INC4 3650 fxcpmadd f9, B4, A2, f9 3651 LFPDUX A3, AO, INC4 3652 fxcsmadd f13, B4, A2, f13 3653 nop 3654 3655 fxcpmadd f2, B3, A6, f2 3656 nop 3657 fxcsmadd f6, B3, A6, f6 3658 nop 3659 fxcpmadd f10, B4, A6, f10 3660 nop 3661 fxcsmadd f14, B4, A6, f14 3662 nop 3663 3664 fxcpmadd f3, B3, A4, f3 3665 nop 3666 fxcsmadd f7, B3, A4, f7 3667 LFPDUX A2, AO2, INC4 3668 fxcpmadd f11, B4, A4, f11 3669 LFPDUX A5, AO, INC4 3670 fxcsmadd f15, B4, A4, f15 3671 nop 3672 3673## 3 ## 3674 3675 fxcpmadd f0, B5, A7, f0 3676 nop 3677 fxcsmadd f4, B5, A7, f4 3678 nop 3679 fxcpmadd f8, B2, A7, f8 3680 LFPDUX B4, BO2, INC4 3681 fxcsmadd f12, B2, A7, f12 3682 LFPDUX B3, BO, INC4 3683 3684 fxcpmadd f1, B5, A2, f1 3685 nop 3686 fxcsmadd f5, B5, A2, f5 3687 LFPDUX A4, AO2, INC4 3688 fxcpmadd f9, B2, A2, f9 3689 LFPDUX A6, AO, INC4 3690 fxcsmadd f13, B2, A2, f13 3691 nop 3692 3693 fxcpmadd f2, B5, A8, f2 3694 nop 3695 fxcsmadd f6, B5, A8, f6 3696 nop 3697 fxcpmadd f10, B2, A8, f10 3698 nop 3699 fxcsmadd f14, B2, A8, f14 3700 nop 3701 3702 fxcpmadd f3, B5, A4, f3 3703 nop 3704 fxcsmadd f7, B5, A4, f7 3705 LFPDUX A2, AO2, INC4 3706 fxcpmadd f11, B2, A4, f11 3707 LFPDUX A7, AO, INC4 3708 fxcsmadd f15, B2, A4, f15 3709 nop 3710 3711## 4 ## 3712 fxcpmadd f0, B6, A9, f0 3713 nop 3714 fxcsmadd f4, B6, A9, f4 3715 nop 3716 fxcpmadd f8, B4, A9, f8 3717 LFPDUX B2, BO2, INC4 3718 fxcsmadd f12, B4, A9, f12 3719 LFPDUX B5, BO, INC4 3720 3721 fxcpmadd f1, B6, A2, f1 3722 nop 3723 fxcsmadd f5, B6, A2, f5 3724 LFPDUX A4, AO2, INC4 3725 fxcpmadd f9, B4, A2, f9 3726 LFPDUX A8, AO, INC4 3727 fxcsmadd f13, B4, A2, f13 3728 nop 3729 3730 fxcpmadd f2, B6, A10, f2 3731 nop 3732 fxcsmadd f6, B6, A10, f6 3733 nop 3734 fxcpmadd f10, B4, A10, f10 3735 nop 3736 fxcsmadd f14, B4, A10, f14 3737 nop 3738 3739 fxcpmadd f3, B6, A4, f3 3740 LFPDUX A2, AO2, INC4 3741 fxcsmadd f7, B6, A4, f7 3742 LFPDUX A9, AO, INC4 3743 fxcpmadd f11, B4, A4, f11 3744 nop 3745 fxcsmadd f15, B4, A4, f15 3746 bdnz+ .L1012 3747 .align 4 3748 3749.L1013: 3750## 1 ## 3751 3752 fxcpmadd f0, B1, A1, f0 3753 nop 3754 fxcsmadd f4, B1, A1, f4 3755 nop 3756 fxcpmadd f8, B2, A1, f8 3757 LFPDUX B4, BO2, INC4 3758 fxcsmadd f12, B2, A1, f12 3759 LFPDUX B6, BO, INC4 3760 3761 fxcpmadd f1, B1, A2, f1 3762 nop 3763 fxcsmadd f5, B1, A2, f5 3764 LFPDUX A4, AO2, INC4 3765 fxcpmadd f9, B2, A2, f9 3766 LFPDUX A10, AO, INC4 3767 fxcsmadd f13, B2, A2, f13 3768 nop 3769 3770 fxcpmadd f2, B1, A3, f2 3771 nop 3772 fxcsmadd f6, B1, A3, f6 3773 nop 3774 fxcpmadd f10, B2, A3, f10 3775 nop 3776 fxcsmadd f14, B2, A3, f14 3777 nop 3778 3779 fxcpmadd f3, B1, A4, f3 3780 nop 3781 fxcsmadd f7, B1, A4, f7 3782 LFPDUX A2, AO2, INC4 3783 fxcpmadd f11, B2, A4, f11 3784#ifndef TRMMKERNEL 3785 LFDUX A1, CO1, INC 3786#else 3787 nop 3788#endif 3789 fxcsmadd f15, B2, A4, f15 3790 nop 3791 3792## 2 ## 3793 3794 fxcpmadd f0, B3, A5, f0 3795 nop 3796 fxcsmadd f4, B3, A5, f4 3797 nop 3798 fxcpmadd f8, B4, A5, f8 3799 LFPDUX B2, BO2, INC4 3800 fxcsmadd f12, B4, A5, f12 3801#ifndef TRMMKERNEL 3802 LFDUX B1, CO1, INC2 3803#else 3804 nop 3805#endif 3806 3807 fxcpmadd f1, B3, A2, f1 3808 nop 3809 fxcsmadd f5, B3, A2, f5 3810 LFPDUX A4, AO2, INC4 3811 fxcpmadd f9, B4, A2, f9 3812#ifndef TRMMKERNEL 3813 LFDUX A3, CO1, INC2 3814#else 3815 nop 3816#endif 3817 fxcsmadd f13, B4, A2, f13 3818 nop 3819 3820 fxcpmadd f2, B3, A6, f2 3821 nop 3822 fxcsmadd f6, B3, A6, f6 3823 nop 3824 fxcpmadd f10, B4, A6, f10 3825 nop 3826 fxcsmadd f14, B4, A6, f14 3827 nop 3828 3829 fxcpmadd f3, B3, A4, f3 3830 nop 3831 fxcsmadd f7, B3, A4, f7 3832 LFPDUX A2, AO2, INC4 3833 fxcpmadd f11, B4, A4, f11 3834#ifndef TRMMKERNEL 3835 LFDUX A5, CO1, INC2 3836#else 3837 nop 3838#endif 3839 fxcsmadd f15, B4, A4, f15 3840 nop 3841 3842## 3 ## 3843 3844 fxcpmadd f0, B5, A7, f0 3845 nop 3846 fxcsmadd f4, B5, A7, f4 3847 nop 3848 fxcpmadd f8, B2, A7, f8 3849 LFPDUX B4, BO2, INC4 3850 fxcsmadd f12, B2, A7, f12 3851#ifndef TRMMKERNEL 3852 LFSDUX A1, CO1, INCM5 3853#else 3854 nop 3855#endif 3856 3857 fxcpmadd f1, B5, A2, f1 3858 nop 3859 fxcsmadd f5, B5, A2, f5 3860 LFPDUX A4, AO2, INC4 3861 fxcpmadd f9, B2, A2, f9 3862#ifndef TRMMKERNEL 3863 LFSDUX B1, CO1, INC2 3864#else 3865 nop 3866#endif 3867 fxcsmadd f13, B2, A2, f13 3868 nop 3869 3870 fxcpmadd f2, B5, A8, f2 3871 nop 3872 fxcsmadd f6, B5, A8, f6 3873 nop 3874 fxcpmadd f10, B2, A8, f10 3875 nop 3876 fxcsmadd f14, B2, A8, f14 3877 nop 3878 3879 fxcpmadd f3, B5, A4, f3 3880 nop 3881 fxcsmadd f7, B5, A4, f7 3882 LFPDUX A2, AO2, INC4 3883 fxcpmadd f11, B2, A4, f11 3884#ifndef TRMMKERNEL 3885 LFSDUX A3, CO1, INC2 3886#else 3887 nop 3888#endif 3889 fxcsmadd f15, B2, A4, f15 3890 nop 3891 3892## 4 ## 3893 3894 fxcpmadd f0, B6, A9, f0 3895 nop 3896 fxcsmadd f4, B6, A9, f4 3897 nop 3898 fxcpmadd f8, B4, A9, f8 3899#ifndef TRMMKERNEL 3900 LFSDUX A5, CO1, INC2 3901#else 3902 nop 3903#endif 3904 fxcsmadd f12, B4, A9, f12 3905#ifndef TRMMKERNEL 3906 LFDUX B3, CO2, INC 3907#else 3908 nop 3909#endif 3910 3911 fxcpmadd f1, B6, A2, f1 3912 nop 3913 fxcsmadd f5, B6, A2, f5 3914 LFPDUX A4, AO2, INC4 3915 fxcpmadd f9, B4, A2, f9 3916#ifndef TRMMKERNEL 3917 LFDUX A6, CO2, INC2 3918#else 3919 nop 3920#endif 3921 fxcsmadd f13, B4, A2, f13 3922 nop 3923 3924 fxcpmadd f2, B6, A10, f2 3925 nop 3926 fxcsmadd f6, B6, A10, f6 3927 nop 3928 fxcpmadd f10, B4, A10, f10 3929 nop 3930 fxcsmadd f14, B4, A10, f14 3931#ifndef TRMMKERNEL 3932 LFDUX A7, CO2, INC2 3933#else 3934 nop 3935#endif 3936 3937 fxcpmadd f3, B6, A4, f3 3938 nop 3939 fxcsmadd f7, B6, A4, f7 3940 nop 3941 fxcpmadd f11, B4, A4, f11 3942 nop 3943 fxcsmadd f15, B4, A4, f15 3944#ifndef TRMMKERNEL 3945 LFDUX B2, CO2, INC2 3946#else 3947 nop 3948#endif 3949 .align 4 3950 3951.L1014: 3952 lfd AP, ALPHA(SP) 3953#ifdef TRMMKERNEL 3954 fsmfp AP, AP 3955#endif 3956 3957#if defined(TRMMKERNEL) 3958#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 3959 sub TEMP, K, KK 3960#elif defined(LEFT) 3961 addi TEMP, KK, 8 3962#else 3963 addi TEMP, KK, 4 3964#endif 3965 andi. r0, TEMP, 3 3966 mtspr CTR, r0 3967 ble+ .L1018 3968 3969 cmpwi cr0, TEMP, 3 3970 bgt+ .L1015 3971#else 3972 andi. r0, K, 3 3973 mtspr CTR, r0 3974 ble+ .L1018 3975 3976 cmpwi cr0, K, 3 3977 bgt+ .L1015 3978#endif 3979 3980#ifndef TRMMKERNEL 3981 LFDUX A1, CO1, INC 3982 fpmr f5, f0 3983 LFDUX B1, CO1, INC2 3984 fpmr f9, f0 3985 LFDUX A3, CO1, INC2 3986 fpmr f13, f0 3987 LFDUX A5, CO1, INC2 3988 fpmr f2, f0 3989 3990 LFSDUX A1, CO1, INCM5 3991 fpmr f6, f0 3992 LFSDUX B1, CO1, INC2 3993 fpmr f10, f0 3994 LFSDUX A3, CO1, INC2 3995 fpmr f14, f0 3996 LFSDUX A5, CO1, INC2 3997 fpmr f3, f0 3998 3999 LFDUX B3, CO2, INC 4000 fpmr f7, f0 4001 LFDUX A6, CO2, INC2 4002 fpmr f11, f0 4003 LFDUX A7, CO2, INC2 4004 fpmr f15, f0 4005 LFDUX B2, CO2, INC2 4006#else 4007 fpmr f5, f0 4008 fpmr f9, f0 4009 fpmr f13, f0 4010 fpmr f2, f0 4011 4012 fpmr f6, f0 4013 fpmr f10, f0 4014 fpmr f14, f0 4015 fpmr f3, f0 4016 4017 fpmr f7, f0 4018 fpmr f11, f0 4019 fpmr f15, f0 4020 nop 4021#endif 4022 .align 4 4023 4024.L1015: 4025 LFPDUX A2, AO, INC4 4026 LFPDUX A4, AO2, INC4 4027 LFPDUX A10, BO, INC4 4028 LFPDUX B4, BO2, INC4 4029 bdz- .L1017 4030 .align 4 4031 4032.L1016: 4033 fxcpmadd f0, A10, A2, f0 4034 fxcsmadd f4, A10, A2, f4 4035 fxcpmadd f8, B4, A2, f8 4036 fxcsmadd f12, B4, A2, f12 4037 LFPDUX A2, AO, INC4 4038 4039 fxcpmadd f1, A10, A4, f1 4040 fxcsmadd f5, A10, A4, f5 4041 fxcpmadd f9, B4, A4, f9 4042 fxcsmadd f13, B4, A4, f13 4043 LFPDUX A4, AO2, INC4 4044 4045 fxcpmadd f2, A10, A2, f2 4046 fxcsmadd f6, A10, A2, f6 4047 fxcpmadd f10, B4, A2, f10 4048 fxcsmadd f14, B4, A2, f14 4049 LFPDUX A2, AO, INC4 4050 4051 fxcpmadd f3, A10, A4, f3 4052 fxcsmadd f7, A10, A4, f7 4053 LFPDUX A10, BO, INC4 4054 fxcpmadd f11, B4, A4, f11 4055 fxcsmadd f15, B4, A4, f15 4056 LFPDUX A4, AO2, INC4 4057 LFPDUX B4, BO2, INC4 4058 bdnz+ .L1016 4059 .align 4 4060 4061.L1017: 4062 fxcpmadd f0, A10, A2, f0 4063 fxcsmadd f4, A10, A2, f4 4064 fxcpmadd f8, B4, A2, f8 4065 fxcsmadd f12, B4, A2, f12 4066 LFPDUX A2, AO, INC4 4067 4068 fxcpmadd f1, A10, A4, f1 4069 fxcsmadd f5, A10, A4, f5 4070 fxcpmadd f9, B4, A4, f9 4071 fxcsmadd f13, B4, A4, f13 4072 LFPDUX A4, AO2, INC4 4073 4074 fxcpmadd f2, A10, A2, f2 4075 fxcsmadd f6, A10, A2, f6 4076 fxcpmadd f10, B4, A2, f10 4077 fxcsmadd f14, B4, A2, f14 4078 4079 fxcpmadd f3, A10, A4, f3 4080 fxcsmadd f7, A10, A4, f7 4081 fxcpmadd f11, B4, A4, f11 4082 fxcsmadd f15, B4, A4, f15 4083 .align 4 4084 4085.L1018: 4086#ifndef TRMMKERNEL 4087 LFSDUX B3, CO2, INCM5 4088 LFSDUX A6, CO2, INC2 4089 LFSDUX A7, CO2, INC2 4090 LFSDUX B2, CO2, INC2 4091 4092 LFDUX B5, CO3, INC 4093 LFDUX A8, CO3, INC2 4094 LFDUX A9, CO3, INC2 4095 LFDUX B4, CO3, INC2 4096 4097 LFSDUX B5, CO3, INCM5 4098 LFSDUX A8, CO3, INC2 4099 LFSDUX A9, CO3, INC2 4100 LFSDUX B4, CO3, INC2 4101 4102 LFDUX A2, CO4, INC 4103 LFDUX A4, CO4, INC2 4104 4105 fxcpmadd f0, AP, f0, A1 4106 LFDUX A10, CO4, INC2 4107 LFDUX A1, CO4, INC2 4108 4109 fxcpmadd f1, AP, f1, B1 4110 LFSDUX A2, CO4, INCM5 4111 LFSDUX A4, CO4, INC2 4112 4113 fxcpmadd f2, AP, f2, A3 4114 LFSDUX A10, CO4, INC2 4115 LFSDUX A1, CO4, INC2 4116 4117 fxcpmadd f3, AP, f3, A5 4118 STFDUX f0, CO1, INCM7 4119 STFSDUX f0, CO1, INC 4120 4121 fxcpmadd f4, AP, f4, B3 4122 STFDUX f1, CO1, INC 4123 STFSDUX f1, CO1, INC 4124 4125 fxcpmadd f5, AP, f5, A6 4126 STFDUX f2, CO1, INC 4127 STFSDUX f2, CO1, INC 4128 4129 fxcpmadd f6, AP, f6, A7 4130 STFDUX f3, CO1, INC 4131 STFSDUX f3, CO1, INC 4132 4133 fxcpmadd f7, AP, f7, B2 4134 STFDUX f4, CO2, INCM7 4135 STFSDUX f4, CO2, INC 4136 4137 fxcpmadd f8, AP, f8, B5 4138 STFDUX f5, CO2, INC 4139 STFSDUX f5, CO2, INC 4140 4141 fxcpmadd f9, AP, f9, A8 4142 STFDUX f6, CO2, INC 4143 STFSDUX f6, CO2, INC 4144 4145 fxcpmadd f10, AP, f10, A9 4146 STFDUX f7, CO2, INC 4147 STFSDUX f7, CO2, INC 4148 4149 fxcpmadd f11, AP, f11, B4 4150 STFDUX f8, CO3, INCM7 4151 STFSDUX f8, CO3, INC 4152 4153 fxcpmadd f12, AP, f12, A2 4154 STFDUX f9, CO3, INC 4155 STFSDUX f9, CO3, INC 4156 4157 fxcpmadd f13, AP, f13, A4 4158 STFDUX f10, CO3, INC 4159 STFSDUX f10, CO3, INC 4160 4161 fxcpmadd f14, AP, f14, A10 4162 STFDUX f11, CO3, INC 4163 STFSDUX f11, CO3, INC 4164 4165 fxcpmadd f15, AP, f15, A1 4166 STFDUX f12, CO4, INCM7 4167#else 4168 fpmul f0, AP, f0 4169 fpmul f1, AP, f1 4170 fpmul f2, AP, f2 4171 fpmul f3, AP, f3 4172 4173 STFDUX f0, CO1, INC 4174 STFSDUX f0, CO1, INC 4175 4176 fpmul f4, AP, f4 4177 STFDUX f1, CO1, INC 4178 STFSDUX f1, CO1, INC 4179 4180 fpmul f5, AP, f5 4181 STFDUX f2, CO1, INC 4182 STFSDUX f2, CO1, INC 4183 4184 fpmul f6, AP, f6 4185 STFDUX f3, CO1, INC 4186 STFSDUX f3, CO1, INC 4187 4188 fpmul f7, AP, f7 4189 STFDUX f4, CO2, INC 4190 STFSDUX f4, CO2, INC 4191 4192 fpmul f8, AP, f8 4193 STFDUX f5, CO2, INC 4194 STFSDUX f5, CO2, INC 4195 4196 fpmul f9, AP, f9 4197 STFDUX f6, CO2, INC 4198 STFSDUX f6, CO2, INC 4199 4200 fpmul f10, AP, f10 4201 STFDUX f7, CO2, INC 4202 STFSDUX f7, CO2, INC 4203 4204 fpmul f11, AP, f11 4205 STFDUX f8, CO3, INC 4206 STFSDUX f8, CO3, INC 4207 4208 fpmul f12, AP, f12 4209 STFDUX f9, CO3, INC 4210 STFSDUX f9, CO3, INC 4211 4212 fpmul f13, AP, f13 4213 STFDUX f10, CO3, INC 4214 STFSDUX f10, CO3, INC 4215 4216 fpmul f14, AP, f14 4217 STFDUX f11, CO3, INC 4218 STFSDUX f11, CO3, INC 4219 4220 fpmul f15, AP, f15 4221 STFDUX f12, CO4, INC 4222#endif 4223 4224 STFSDUX f12, CO4, INC 4225 STFDUX f13, CO4, INC 4226 STFSDUX f13, CO4, INC 4227 STFDUX f14, CO4, INC 4228 STFSDUX f14, CO4, INC 4229 STFDUX f15, CO4, INC 4230 STFSDUX f15, CO4, INC 4231 4232#ifdef TRMMKERNEL 4233#if ( defined(LEFT) && defined(TRANSA)) || \ 4234 (!defined(LEFT) && !defined(TRANSA)) 4235 sub TEMP, K, KK 4236#ifdef LEFT 4237 addi TEMP, TEMP, -8 4238#else 4239 addi TEMP, TEMP, -4 4240#endif 4241 slwi r0, TEMP, 3 + BASE_SHIFT 4242 slwi TEMP, TEMP, 2 + BASE_SHIFT 4243 add AO, AO, r0 4244 add BO, BO, TEMP 4245#endif 4246 4247#ifdef LEFT 4248 addi KK, KK, 8 4249#endif 4250#endif 4251 4252 addic. I, I, -1 4253 li r0, FZERO 4254 4255 lfpsx f0, SP, r0 4256 bgt+ .L1011 4257 .align 4 4258 4259.L1020: 4260 andi. I, M, 4 4261 beq .L1030 4262 4263#if defined(TRMMKERNEL) 4264#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 4265 addi AO2, AO, 2 * SIZE 4266 fpmr f4, f0 4267 addi BO, B, - 4 * SIZE 4268 fpmr f8, f0 4269 addi BO2, B, - 2 * SIZE 4270 fpmr f12, f0 4271#else 4272 slwi TEMP, KK, 2 + BASE_SHIFT 4273 slwi r0, KK, 2 + BASE_SHIFT 4274 add AO, AO, TEMP 4275 add BO, B, r0 4276 4277 addi AO2, AO, 2 * SIZE 4278 fpmr f4, f0 4279 addi BO, BO, - 4 * SIZE 4280 fpmr f8, f0 4281 addi BO2, BO, 2 * SIZE 4282 fpmr f12, f0 4283#endif 4284 4285#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 4286 sub TEMP, K, KK 4287#elif defined(LEFT) 4288 addi TEMP, KK, 4 4289#else 4290 addi TEMP, KK, 4 4291#endif 4292 4293 srawi. TEMP, TEMP, 2 4294 fpmr f1, f0 4295 fpmr f5, f0 4296 fpmr f9, f0 4297 mtspr CTR, TEMP 4298 fpmr f13, f0 4299 ble .L1024 4300#else 4301 addi AO2, AO, 2 * SIZE 4302 fpmr f4, f0 4303 addi BO, B, - 4 * SIZE 4304 fpmr f8, f0 4305 addi BO2, B, - 2 * SIZE 4306 fpmr f12, f0 4307 4308 srawi. r0, K, 2 4309 fpmr f1, f0 4310 fpmr f5, f0 4311 fpmr f9, f0 4312 mtspr CTR, r0 4313 fpmr f13, f0 4314 ble .L1024 4315#endif 4316 4317 LFPDUX A1, AO, INC4 4318 LFPDUX B1, BO, INC4 4319 LFPDUX A2, AO2, INC4 4320 LFPDUX B2, BO2, INC4 4321 LFPDUX A3, AO, INC4 4322 LFPDUX B3, BO, INC4 4323 LFPDUX A4, AO2, INC4 4324 LFPDUX B4, BO2, INC4 4325 4326 LFPDUX A5, AO, INC4 4327 LFPDUX B5, BO, INC4 4328 LFPDUX A6, AO2, INC4 4329 LFPDUX B6, BO2, INC4 4330 LFPDUX A7, AO, INC4 4331 LFPDUX A9, BO, INC4 4332 LFPDUX A10, BO2, INC4 4333 bdz- .L1023 4334 .align 4 4335 4336.L1022: 4337 fxcpmadd f0, B1, A1, f0 4338 nop 4339 fxcsmadd f4, B1, A1, f4 4340 LFPDUX A8, AO2, INC4 4341 fxcpmadd f8, B2, A1, f8 4342 nop 4343 fxcsmadd f12, B2, A1, f12 4344 LFPDUX A1, AO, INC4 4345 4346 fxcpmadd f1, B1, A2, f1 4347 nop 4348 fxcsmadd f5, B1, A2, f5 4349 LFPDUX B1, BO, INC4 4350 fxcpmadd f9, B2, A2, f9 4351 nop 4352 fxcsmadd f13, B2, A2, f13 4353 LFPDUX B2, BO2, INC4 4354 4355 fxcpmadd f0, B3, A3, f0 4356 nop 4357 fxcsmadd f4, B3, A3, f4 4358 LFPDUX A2, AO2, INC4 4359 fxcpmadd f8, B4, A3, f8 4360 nop 4361 fxcsmadd f12, B4, A3, f12 4362 LFPDUX A3, AO, INC4 4363 4364 fxcpmadd f1, B3, A4, f1 4365 nop 4366 fxcsmadd f5, B3, A4, f5 4367 LFPDUX B3, BO, INC4 4368 fxcpmadd f9, B4, A4, f9 4369 nop 4370 fxcsmadd f13, B4, A4, f13 4371 LFPDUX B4, BO2, INC4 4372 4373 fxcpmadd f0, B5, A5, f0 4374 nop 4375 fxcsmadd f4, B5, A5, f4 4376 LFPDUX A4, AO2, INC4 4377 fxcpmadd f8, B6, A5, f8 4378 nop 4379 fxcsmadd f12, B6, A5, f12 4380 LFPDUX A5, AO, INC4 4381 4382 fxcpmadd f1, B5, A6, f1 4383 nop 4384 fxcsmadd f5, B5, A6, f5 4385 LFPDUX B5, BO, INC4 4386 fxcpmadd f9, B6, A6, f9 4387 nop 4388 fxcsmadd f13, B6, A6, f13 4389 LFPDUX B6, BO2, INC4 4390 4391 fxcpmadd f0, A9, A7, f0 4392 nop 4393 fxcsmadd f4, A9, A7, f4 4394 LFPDUX A6, AO2, INC4 4395 fxcpmadd f8, A10, A7, f8 4396 nop 4397 fxcsmadd f12, A10, A7, f12 4398 LFPDUX A7, AO, INC4 4399 4400 fxcpmadd f1, A9, A8, f1 4401 nop 4402 fxcsmadd f5, A9, A8, f5 4403 LFPDUX A9, BO, INC4 4404 fxcpmadd f9, A10, A8, f9 4405 nop 4406 fxcsmadd f13, A10, A8, f13 4407 LFPDUX A10, BO2, INC4 4408 bdnz+ .L1022 4409 .align 4 4410 4411.L1023: 4412 fxcpmadd f0, B1, A1, f0 4413 fxcsmadd f4, B1, A1, f4 4414 LFPDUX A8, AO2, INC4 4415 fxcpmadd f8, B2, A1, f8 4416 fxcsmadd f12, B2, A1, f12 4417 4418 fxcpmadd f1, B1, A2, f1 4419 fxcsmadd f5, B1, A2, f5 4420 fxcpmadd f9, B2, A2, f9 4421 fxcsmadd f13, B2, A2, f13 4422 4423 fxcpmadd f0, B3, A3, f0 4424 fxcsmadd f4, B3, A3, f4 4425 fxcpmadd f8, B4, A3, f8 4426 fxcsmadd f12, B4, A3, f12 4427 4428 fxcpmadd f1, B3, A4, f1 4429 fxcsmadd f5, B3, A4, f5 4430 fxcpmadd f9, B4, A4, f9 4431 fxcsmadd f13, B4, A4, f13 4432 4433 fxcpmadd f0, B5, A5, f0 4434 fxcsmadd f4, B5, A5, f4 4435 fxcpmadd f8, B6, A5, f8 4436 fxcsmadd f12, B6, A5, f12 4437 4438 fxcpmadd f1, B5, A6, f1 4439 fxcsmadd f5, B5, A6, f5 4440 fxcpmadd f9, B6, A6, f9 4441 fxcsmadd f13, B6, A6, f13 4442 4443 fxcpmadd f0, A9, A7, f0 4444 fxcsmadd f4, A9, A7, f4 4445 fxcpmadd f8, A10, A7, f8 4446 fxcsmadd f12, A10, A7, f12 4447 4448 fxcpmadd f1, A9, A8, f1 4449 fxcsmadd f5, A9, A8, f5 4450 fxcpmadd f9, A10, A8, f9 4451 fxcsmadd f13, A10, A8, f13 4452 .align 4 4453 4454.L1024: 4455 lfd AP, ALPHA(SP) 4456#ifdef TRMMKERNEL 4457 fsmfp AP, AP 4458#endif 4459 4460#if defined(TRMMKERNEL) 4461#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 4462 sub TEMP, K, KK 4463#elif defined(LEFT) 4464 addi TEMP, KK, 4 4465#else 4466 addi TEMP, KK, 4 4467#endif 4468 andi. TEMP, TEMP, 3 4469 mtspr CTR, TEMP 4470#else 4471 andi. r0, K, 3 4472 mtspr CTR, r0 4473#endif 4474 ble+ .L1028 4475 4476 LFPDUX A1, AO, INC4 4477 LFPDUX A2, AO2, INC4 4478 LFPDUX B1, BO, INC4 4479 LFPDUX B2, BO2, INC4 4480 bdz- .L1027 4481 .align 4 4482 4483.L1026: 4484 fxcpmadd f0, B1, A1, f0 4485 fxcsmadd f4, B1, A1, f4 4486 fxcpmadd f8, B2, A1, f8 4487 fxcsmadd f12, B2, A1, f12 4488 LFPDUX A1, AO, INC4 4489 4490 fxcpmadd f1, B1, A2, f1 4491 fxcsmadd f5, B1, A2, f5 4492 LFPDUX B1, BO, INC4 4493 fxcpmadd f9, B2, A2, f9 4494 fxcsmadd f13, B2, A2, f13 4495 LFPDUX A2, AO2, INC4 4496 LFPDUX B2, BO2, INC4 4497 bdnz+ .L1026 4498 .align 4 4499 4500.L1027: 4501 fxcpmadd f0, B1, A1, f0 4502 fxcsmadd f4, B1, A1, f4 4503 fxcpmadd f8, B2, A1, f8 4504 fxcsmadd f12, B2, A1, f12 4505 4506 fxcpmadd f1, B1, A2, f1 4507 fxcsmadd f5, B1, A2, f5 4508 fxcpmadd f9, B2, A2, f9 4509 fxcsmadd f13, B2, A2, f13 4510 .align 4 4511 4512.L1028: 4513#ifndef TRMMKERNEL 4514 LFDUX A1, CO1, INC 4515 LFDUX B1, CO1, INC2 4516 LFDUX B3, CO2, INC 4517 LFDUX A6, CO2, INC2 4518 4519 LFSDUX A1, CO1, INCM1 4520 LFSDUX B1, CO1, INC2 4521 LFSDUX B3, CO2, INCM1 4522 LFSDUX A6, CO2, INC2 4523 4524 LFDUX B5, CO3, INC 4525 LFDUX A8, CO3, INC2 4526 LFDUX A2, CO4, INC 4527 LFDUX A4, CO4, INC2 4528 4529 fxcpmadd f0, AP, f0, A1 4530 LFSDUX B5, CO3, INCM1 4531 LFSDUX A8, CO3, INC2 4532 4533 fxcpmadd f1, AP, f1, B1 4534 LFSDUX A2, CO4, INCM1 4535 LFSDUX A4, CO4, INC2 4536 4537 fxcpmadd f4, AP, f4, B3 4538 STFDUX f0, CO1, INCM3 4539 STFSDUX f0, CO1, INC 4540 4541 fxcpmadd f5, AP, f5, A6 4542 STFDUX f1, CO1, INC 4543 STFSDUX f1, CO1, INC 4544 4545 fxcpmadd f8, AP, f8, B5 4546 STFDUX f4, CO2, INCM3 4547 STFSDUX f4, CO2, INC 4548 4549 fxcpmadd f9, AP, f9, A8 4550 STFDUX f5, CO2, INC 4551 STFSDUX f5, CO2, INC 4552 4553 fxcpmadd f12, AP, f12, A2 4554 STFDUX f8, CO3, INCM3 4555 STFSDUX f8, CO3, INC 4556 4557 fxcpmadd f13, AP, f13, A4 4558 STFDUX f9, CO3, INC 4559 STFSDUX f9, CO3, INC 4560 4561 STFDUX f12, CO4, INCM3 4562 STFSDUX f12, CO4, INC 4563 4564 STFDUX f13, CO4, INC 4565 STFSDUX f13, CO4, INC 4566#else 4567 fpmul f0, AP, f0 4568 fpmul f1, AP, f1 4569 4570 fpmul f4, AP, f4 4571 STFDUX f0, CO1, INC 4572 STFSDUX f0, CO1, INC 4573 4574 fpmul f5, AP, f5 4575 STFDUX f1, CO1, INC 4576 STFSDUX f1, CO1, INC 4577 4578 fpmul f8, AP, f8 4579 STFDUX f4, CO2, INC 4580 STFSDUX f4, CO2, INC 4581 4582 fpmul f9, AP, f9 4583 STFDUX f5, CO2, INC 4584 STFSDUX f5, CO2, INC 4585 4586 fpmul f12, AP, f12 4587 STFDUX f8, CO3, INC 4588 STFSDUX f8, CO3, INC 4589 4590 fpmul f13, AP, f13 4591 STFDUX f9, CO3, INC 4592 STFSDUX f9, CO3, INC 4593 4594 STFDUX f12, CO4, INC 4595 STFSDUX f12, CO4, INC 4596 4597 STFDUX f13, CO4, INC 4598 STFSDUX f13, CO4, INC 4599#endif 4600 4601 4602#ifdef TRMMKERNEL 4603#if ( defined(LEFT) && defined(TRANSA)) || \ 4604 (!defined(LEFT) && !defined(TRANSA)) 4605 sub TEMP, K, KK 4606#ifdef LEFT 4607 addi TEMP, TEMP, -4 4608#else 4609 addi TEMP, TEMP, -4 4610#endif 4611 slwi r0, TEMP, 2 + BASE_SHIFT 4612 slwi TEMP, TEMP, 2 + BASE_SHIFT 4613 add AO, AO, r0 4614 add BO, BO, TEMP 4615#endif 4616 4617#ifdef LEFT 4618 addi KK, KK, 4 4619#endif 4620#endif 4621 4622 li r0, FZERO 4623 lfpsx f0, SP, r0 4624 .align 4 4625 4626.L1030: 4627 andi. I, M, 2 4628 beq .L1040 4629 4630#if defined(TRMMKERNEL) 4631#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 4632 addi AO2, AO, 2 * SIZE 4633 fpmr f1, f0 4634 addi BO, B, - 4 * SIZE 4635 fpmr f2, f0 4636 addi BO2, B, - 2 * SIZE 4637 fpmr f3, f0 4638#else 4639 slwi TEMP, KK, 1 + BASE_SHIFT 4640 slwi r0, KK, 2 + BASE_SHIFT 4641 add AO, AO, TEMP 4642 add BO, B, r0 4643 4644 addi AO2, AO, 2 * SIZE 4645 fpmr f1, f0 4646 addi BO, BO, - 4 * SIZE 4647 fpmr f2, f0 4648 addi BO2, BO, 2 * SIZE 4649 fpmr f3, f0 4650#endif 4651 4652#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 4653 sub TEMP, K, KK 4654#elif defined(LEFT) 4655 addi TEMP, KK, 2 4656#else 4657 addi TEMP, KK, 4 4658#endif 4659 4660 srawi. r0, TEMP, 2 4661 mtspr CTR, r0 4662 ble .L1034 4663 4664#else 4665 addi AO2, AO, 2 * SIZE 4666 fpmr f1, f0 4667 addi BO, B, - 4 * SIZE 4668 fpmr f2, f0 4669 addi BO2, B, - 2 * SIZE 4670 fpmr f3, f0 4671 4672 srawi. r0, K, 2 4673 mtspr CTR, r0 4674 ble .L1034 4675#endif 4676 4677 LFPDUX A1, AO, INC4 4678 LFPDUX B1, BO, INC4 4679 LFPDUX B2, BO2, INC4 4680 LFPDUX A2, AO2, INC4 4681 LFPDUX B3, BO, INC4 4682 LFPDUX B4, BO2, INC4 4683 4684 LFPDUX A3, AO, INC4 4685 LFPDUX A5, BO, INC4 4686 LFPDUX A6, BO2, INC4 4687 LFPDUX A4, AO2, INC4 4688 LFPDUX A7, BO, INC4 4689 LFPDUX A8, BO2, INC4 4690 bdz- .L1033 4691 .align 4 4692 4693.L1032: 4694 fxcpmadd f0, B1, A1, f0 4695 fxcsmadd f1, B1, A1, f1 4696 LFPDUX B1, BO, INC4 4697 fxcpmadd f2, B2, A1, f2 4698 fxcsmadd f3, B2, A1, f3 4699 LFPDUX B2, BO2, INC4 4700 LFPDUX A1, AO, INC4 4701 4702 fxcpmadd f0, B3, A2, f0 4703 fxcsmadd f1, B3, A2, f1 4704 LFPDUX B3, BO, INC4 4705 fxcpmadd f2, B4, A2, f2 4706 fxcsmadd f3, B4, A2, f3 4707 LFPDUX B4, BO2, INC4 4708 LFPDUX A2, AO2, INC4 4709 4710 fxcpmadd f0, A5, A3, f0 4711 fxcsmadd f1, A5, A3, f1 4712 LFPDUX A5, BO, INC4 4713 fxcpmadd f2, A6, A3, f2 4714 fxcsmadd f3, A6, A3, f3 4715 LFPDUX A6, BO2, INC4 4716 LFPDUX A3, AO, INC4 4717 4718 fxcpmadd f0, A7, A4, f0 4719 fxcsmadd f1, A7, A4, f1 4720 LFPDUX A7, BO, INC4 4721 fxcpmadd f2, A8, A4, f2 4722 fxcsmadd f3, A8, A4, f3 4723 LFPDUX A8, BO2, INC4 4724 LFPDUX A4, AO2, INC4 4725 bdnz+ .L1032 4726 .align 4 4727 4728.L1033: 4729 fxcpmadd f0, B1, A1, f0 4730 fxcsmadd f1, B1, A1, f1 4731 fxcpmadd f2, B2, A1, f2 4732 fxcsmadd f3, B2, A1, f3 4733 4734 fxcpmadd f0, B3, A2, f0 4735 fxcsmadd f1, B3, A2, f1 4736 fxcpmadd f2, B4, A2, f2 4737 fxcsmadd f3, B4, A2, f3 4738 4739 fxcpmadd f0, A5, A3, f0 4740 fxcsmadd f1, A5, A3, f1 4741 fxcpmadd f2, A6, A3, f2 4742 fxcsmadd f3, A6, A3, f3 4743 4744 fxcpmadd f0, A7, A4, f0 4745 fxcsmadd f1, A7, A4, f1 4746 fxcpmadd f2, A8, A4, f2 4747 fxcsmadd f3, A8, A4, f3 4748 .align 4 4749 4750.L1034: 4751 lfd AP, ALPHA(SP) 4752#ifdef TRMMKERNEL 4753 fsmfp AP, AP 4754#endif 4755 4756#if defined(TRMMKERNEL) 4757#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 4758 sub TEMP, K, KK 4759#elif defined(LEFT) 4760 addi TEMP, KK, 2 4761#else 4762 addi TEMP, KK, 4 4763#endif 4764 andi. TEMP, TEMP, 3 4765 mtspr CTR, TEMP 4766#else 4767 andi. r0, K, 3 4768 mtspr CTR, r0 4769#endif 4770 ble+ .L1038 4771 4772 LFPDX A1, AO, INC4 4773 LFPDUX B1, BO, INC4 4774 LFPDUX B2, BO2, INC4 4775 add AO, AO, INC2 4776 bdz- .L1037 4777 .align 4 4778 4779.L1036: 4780 fxcpmadd f0, B1, A1, f0 4781 fxcsmadd f1, B1, A1, f1 4782 LFPDUX B1, BO, INC4 4783 fxcpmadd f2, B2, A1, f2 4784 fxcsmadd f3, B2, A1, f3 4785 LFPDX A1, AO, INC4 4786 LFPDUX B2, BO2, INC4 4787 add AO, AO, INC2 4788 bdnz+ .L1036 4789 .align 4 4790 4791.L1037: 4792 fxcpmadd f0, B1, A1, f0 4793 fxcsmadd f1, B1, A1, f1 4794 fxcpmadd f2, B2, A1, f2 4795 fxcsmadd f3, B2, A1, f3 4796 .align 4 4797 4798.L1038: 4799#ifndef TRMMKERNEL 4800 LFDUX A1, CO1, INC 4801 LFDUX A2, CO2, INC 4802 LFDUX A3, CO3, INC 4803 LFDUX A4, CO4, INC 4804 4805 LFSDUX A1, CO1, INC 4806 LFSDUX A2, CO2, INC 4807 LFSDUX A3, CO3, INC 4808 LFSDUX A4, CO4, INC 4809 4810 fxcpmadd f0, AP, f0, A1 4811 fxcpmadd f1, AP, f1, A2 4812 fxcpmadd f2, AP, f2, A3 4813 fxcpmadd f3, AP, f3, A4 4814 4815 STFDUX f0, CO1, INCM1 4816 STFSDUX f0, CO1, INC 4817 4818 STFDUX f1, CO2, INCM1 4819 STFSDUX f1, CO2, INC 4820 4821 STFDUX f2, CO3, INCM1 4822 STFSDUX f2, CO3, INC 4823 4824 STFDUX f3, CO4, INCM1 4825 STFSDUX f3, CO4, INC 4826#else 4827 fpmul f0, AP, f0 4828 fpmul f1, AP, f1 4829 fpmul f2, AP, f2 4830 fpmul f3, AP, f3 4831 4832 STFDUX f0, CO1, INC 4833 STFSDUX f0, CO1, INC 4834 4835 STFDUX f1, CO2, INC 4836 STFSDUX f1, CO2, INC 4837 4838 STFDUX f2, CO3, INC 4839 STFSDUX f2, CO3, INC 4840 4841 STFDUX f3, CO4, INC 4842 STFSDUX f3, CO4, INC 4843#endif 4844 4845 4846#ifdef TRMMKERNEL 4847#if ( defined(LEFT) && defined(TRANSA)) || \ 4848 (!defined(LEFT) && !defined(TRANSA)) 4849 sub TEMP, K, KK 4850#ifdef LEFT 4851 addi TEMP, TEMP, -2 4852#else 4853 addi TEMP, TEMP, -4 4854#endif 4855 slwi r0, TEMP, 1 + BASE_SHIFT 4856 slwi TEMP, TEMP, 2 + BASE_SHIFT 4857 add AO, AO, r0 4858 add BO, BO, TEMP 4859#endif 4860 4861#ifdef LEFT 4862 addi KK, KK, 2 4863#endif 4864#endif 4865 4866 li r0, FZERO 4867 lfpsx f0, SP, r0 4868 .align 4 4869 4870.L1040: 4871 andi. I, M, 1 4872 beq .L1049 4873 4874#if defined(TRMMKERNEL) 4875#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 4876 addi AO2, AO, 2 * SIZE 4877 fpmr f1, f0 4878 addi BO, B, - 4 * SIZE 4879 fpmr f2, f0 4880 addi BO2, B, - 2 * SIZE 4881 fpmr f3, f0 4882#else 4883 slwi TEMP, KK, 0 + BASE_SHIFT 4884 slwi r0, KK, 2 + BASE_SHIFT 4885 add AO, AO, TEMP 4886 add BO, B, r0 4887 4888 addi AO2, AO, 2 * SIZE 4889 fpmr f1, f0 4890 addi BO, BO, - 4 * SIZE 4891 fpmr f2, f0 4892 addi BO2, BO, 2 * SIZE 4893 fpmr f3, f0 4894#endif 4895 4896#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 4897 sub TEMP, K, KK 4898#elif defined(LEFT) 4899 addi TEMP, KK, 1 4900#else 4901 addi TEMP, KK, 4 4902#endif 4903 srawi. r0, TEMP, 3 4904 mtspr CTR, r0 4905 ble .L1044 4906 4907#else 4908 addi AO2, AO, 2 * SIZE 4909 fpmr f1, f0 4910 addi BO, B, - 4 * SIZE 4911 fpmr f2, f0 4912 addi BO2, B, - 2 * SIZE 4913 fpmr f3, f0 4914 4915 srawi. r0, K, 3 4916 mtspr CTR, r0 4917 ble .L1044 4918#endif 4919 4920 LFPDUX A1, AO, INC4 4921 LFPDUX B1, BO, INC4 4922 LFPDUX B2, BO2, INC4 4923 LFPDUX A2, AO2, INC4 4924 LFPDUX B3, BO, INC4 4925 LFPDUX B4, BO2, INC4 4926 4927 LFPDUX A3, AO, INC4 4928 LFPDUX A5, BO, INC4 4929 LFPDUX A6, BO2, INC4 4930 LFPDUX A4, AO2, INC4 4931 LFPDUX A7, BO, INC4 4932 LFPDUX A8, BO2, INC4 4933 bdz- .L1043 4934 .align 4 4935 4936.L1042: 4937 fxcpmadd f0, A1, B1, f0 4938 LFPDUX B1, BO, INC4 4939 fxcpmadd f1, A1, B2, f1 4940 LFPDUX B2, BO2, INC4 4941 fxcsmadd f2, A1, B3, f2 4942 LFPDUX B3, BO, INC4 4943 fxcsmadd f3, A1, B4, f3 4944 LFPDUX B4, BO2, INC4 4945 LFPDUX A1, AO, INC4 4946 4947 fxcpmadd f0, A2, A5, f0 4948 LFPDUX A5, BO, INC4 4949 fxcpmadd f1, A2, A6, f1 4950 LFPDUX A6, BO2, INC4 4951 fxcsmadd f2, A2, A7, f2 4952 LFPDUX A7, BO, INC4 4953 fxcsmadd f3, A2, A8, f3 4954 LFPDUX A8, BO2, INC4 4955 LFPDUX A2, AO2, INC4 4956 4957 fxcpmadd f0, A3, B1, f0 4958 LFPDUX B1, BO, INC4 4959 fxcpmadd f1, A3, B2, f1 4960 LFPDUX B2, BO2, INC4 4961 fxcsmadd f2, A3, B3, f2 4962 LFPDUX B3, BO, INC4 4963 fxcsmadd f3, A3, B4, f3 4964 LFPDUX B4, BO2, INC4 4965 LFPDUX A3, AO, INC4 4966 4967 fxcpmadd f0, A4, A5, f0 4968 LFPDUX A5, BO, INC4 4969 fxcpmadd f1, A4, A6, f1 4970 LFPDUX A6, BO2, INC4 4971 fxcsmadd f2, A4, A7, f2 4972 LFPDUX A7, BO, INC4 4973 fxcsmadd f3, A4, A8, f3 4974 LFPDUX A8, BO2, INC4 4975 LFPDUX A4, AO2, INC4 4976 bdnz+ .L1042 4977 .align 4 4978 4979.L1043: 4980 fxcpmadd f0, A1, B1, f0 4981 LFPDUX B1, BO, INC4 4982 fxcpmadd f1, A1, B2, f1 4983 LFPDUX B2, BO2, INC4 4984 fxcsmadd f2, A1, B3, f2 4985 LFPDUX B3, BO, INC4 4986 fxcsmadd f3, A1, B4, f3 4987 LFPDUX B4, BO2, INC4 4988 4989 fxcpmadd f0, A2, A5, f0 4990 LFPDUX A5, BO, INC4 4991 fxcpmadd f1, A2, A6, f1 4992 LFPDUX A6, BO2, INC4 4993 fxcsmadd f2, A2, A7, f2 4994 LFPDUX A7, BO, INC4 4995 fxcsmadd f3, A2, A8, f3 4996 LFPDUX A8, BO2, INC4 4997 4998 fxcpmadd f0, A3, B1, f0 4999 fxcpmadd f1, A3, B2, f1 5000 fxcsmadd f2, A3, B3, f2 5001 fxcsmadd f3, A3, B4, f3 5002 5003 fxcpmadd f0, A4, A5, f0 5004 fxcpmadd f1, A4, A6, f1 5005 fxcsmadd f2, A4, A7, f2 5006 fxcsmadd f3, A4, A8, f3 5007 .align 4 5008 5009.L1044: 5010 lfd AP, ALPHA(SP) 5011#ifdef TRMMKERNEL 5012 fsmfp AP, AP 5013#endif 5014 5015#if defined(TRMMKERNEL) 5016#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 5017 sub TEMP, K, KK 5018#elif defined(LEFT) 5019 addi TEMP, KK, 1 5020#else 5021 addi TEMP, KK, 4 5022#endif 5023 andi. TEMP, TEMP, 7 5024 mtspr CTR, TEMP 5025#else 5026 andi. r0, K, 7 5027 mtspr CTR, r0 5028#endif 5029 ble+ .L1048 5030 5031 LFDX A1, AO, INC4 5032 LFPDUX B1, BO, INC4 5033 LFPDUX B2, BO2, INC4 5034 add AO, AO, INC 5035 bdz- .L1047 5036 .align 4 5037 5038.L1046: 5039 fxcpmadd f0, A1, B1, f0 5040 LFPDUX B1, BO, INC4 5041 fxcpmadd f1, A1, B2, f1 5042 LFDX A1, AO, INC4 5043 LFPDUX B2, BO2, INC4 5044 add AO, AO, INC 5045 bdnz+ .L1046 5046 .align 4 5047 5048.L1047: 5049 fxcpmadd f0, A1, B1, f0 5050 fxcpmadd f1, A1, B2, f1 5051 .align 4 5052 5053.L1048: 5054#ifndef TRMMKERNEL 5055 LFDX A1, CO1, INC 5056 LFDX B3, CO3, INC 5057 LFSDX A1, CO2, INC 5058 LFSDX B3, CO4, INC 5059 5060 fpadd f0, f0, f2 5061 fpadd f1, f1, f3 5062 5063 fxcpmadd f0, AP, f0, A1 5064 fxcpmadd f1, AP, f1, B3 5065#else 5066 fpadd f0, f0, f2 5067 fpadd f1, f1, f3 5068 5069 fpmul f0, AP, f0 5070 fpmul f1, AP, f1 5071#endif 5072 5073 STFDUX f0, CO1, INC 5074 STFSDUX f0, CO2, INC 5075 STFDUX f1, CO3, INC 5076 STFSDUX f1, CO4, INC 5077 5078#ifdef TRMMKERNEL 5079#if ( defined(LEFT) && defined(TRANSA)) || \ 5080 (!defined(LEFT) && !defined(TRANSA)) 5081 sub TEMP, K, KK 5082#ifdef LEFT 5083 addi TEMP, TEMP, -1 5084#else 5085 addi TEMP, TEMP, -4 5086#endif 5087 slwi r0, TEMP, 0 + BASE_SHIFT 5088 slwi TEMP, TEMP, 2 + BASE_SHIFT 5089 add AO, AO, r0 5090 add BO, BO, TEMP 5091#endif 5092 5093#ifdef LEFT 5094 addi KK, KK, 1 5095#endif 5096#endif 5097 .align 4 5098 5099.L1049: 5100#if defined(TRMMKERNEL) && !defined(LEFT) 5101 addi KK, KK, 4 5102#endif 5103 5104 addi B, BO, 4 * SIZE 5105 5106 addic. J, J, -1 5107 bgt+ .L1010 5108 .align 4 5109 5110.L1050: 5111 andi. J, N, 2 5112 beq .L1090 5113 5114 mr CO1, C 5115 add CO2, C, LDC 5116 add C, CO2, LDC 5117 5118#if defined(TRMMKERNEL) && defined(LEFT) 5119 mr KK, OFFSET 5120#endif 5121 5122 addi AO, A, -2 * SIZE 5123 5124 li r0, FZERO 5125 lfpsx f0, SP, r0 5126 5127 srawi. I, M, 3 5128 ble .L1060 5129 .align 4 5130 5131.L1051: 5132#if defined(TRMMKERNEL) 5133#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 5134 fpmr f4, f0 5135 addi BO, B, - 2 * SIZE 5136 fpmr f1, f0 5137 fpmr f5, f0 5138 fpmr f2, f0 5139 fpmr f6, f0 5140#else 5141 slwi TEMP, KK, 3 + BASE_SHIFT 5142 slwi r0, KK, 1 + BASE_SHIFT 5143 add AO, AO, TEMP 5144 add BO, B, r0 5145 5146 fpmr f4, f0 5147 addi BO, BO, - 2 * SIZE 5148 fpmr f1, f0 5149 fpmr f5, f0 5150 fpmr f2, f0 5151 fpmr f6, f0 5152#endif 5153 5154 5155#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 5156 sub TEMP, K, KK 5157#elif defined(LEFT) 5158 addi TEMP, KK, 8 5159#else 5160 addi TEMP, KK, 2 5161#endif 5162 srawi. r0, TEMP, 2 5163 fpmr f3, f0 5164 mtspr CTR, r0 5165 fpmr f7, f0 5166 ble .L1054 5167#else 5168 fpmr f4, f0 5169 addi BO, B, - 2 * SIZE 5170 fpmr f1, f0 5171 fpmr f5, f0 5172 fpmr f2, f0 5173 fpmr f6, f0 5174 5175 srawi. r0, K, 2 5176 fpmr f3, f0 5177 mtspr CTR, r0 5178 fpmr f7, f0 5179 ble .L1054 5180#endif 5181 5182 LFPDUX B1, BO, INC2 5183 LFPDUX A1, AO, INC2 5184 LFPDUX A2, AO, INC2 5185 LFPDUX B2, BO, INC2 5186 LFPDUX A3, AO, INC2 5187 LFPDUX A4, AO, INC2 5188 5189 LFPDUX B3, BO, INC2 5190 LFPDUX A5, AO, INC2 5191 LFPDUX A6, AO, INC2 5192 LFPDUX A7, AO, INC2 5193 LFPDUX A8, AO, INC2 5194 bdz- .L1053 5195 .align 4 5196 5197.L1052: 5198 fxcpmadd f0, B1, A1, f0 5199 LFPDUX B4, BO, INC2 5200 fxcsmadd f4, B1, A1, f4 5201 LFPDUX A1, AO, INC2 5202 fxcpmadd f1, B1, A2, f1 5203 nop 5204 fxcsmadd f5, B1, A2, f5 5205 LFPDUX A2, AO, INC2 5206 5207 fxcpmadd f2, B1, A3, f2 5208 nop 5209 fxcsmadd f6, B1, A3, f6 5210 LFPDUX A3, AO, INC2 5211 fxcpmadd f3, B1, A4, f3 5212 nop 5213 fxcsmadd f7, B1, A4, f7 5214 LFPDUX A4, AO, INC2 5215 5216 fxcpmadd f0, B2, A5, f0 5217 LFPDUX B1, BO, INC2 5218 fxcsmadd f4, B2, A5, f4 5219 LFPDUX A5, AO, INC2 5220 fxcpmadd f1, B2, A6, f1 5221 nop 5222 fxcsmadd f5, B2, A6, f5 5223 LFPDUX A6, AO, INC2 5224 5225 fxcpmadd f2, B2, A7, f2 5226 nop 5227 fxcsmadd f6, B2, A7, f6 5228 LFPDUX A7, AO, INC2 5229 fxcpmadd f3, B2, A8, f3 5230 nop 5231 fxcsmadd f7, B2, A8, f7 5232 LFPDUX A8, AO, INC2 5233 5234 fxcpmadd f0, B3, A1, f0 5235 LFPDUX B2, BO, INC2 5236 fxcsmadd f4, B3, A1, f4 5237 LFPDUX A1, AO, INC2 5238 fxcpmadd f1, B3, A2, f1 5239 nop 5240 fxcsmadd f5, B3, A2, f5 5241 LFPDUX A2, AO, INC2 5242 5243 fxcpmadd f2, B3, A3, f2 5244 nop 5245 fxcsmadd f6, B3, A3, f6 5246 LFPDUX A3, AO, INC2 5247 fxcpmadd f3, B3, A4, f3 5248 nop 5249 fxcsmadd f7, B3, A4, f7 5250 LFPDUX A4, AO, INC2 5251 5252 fxcpmadd f0, B4, A5, f0 5253 LFPDUX B3, BO, INC2 5254 fxcsmadd f4, B4, A5, f4 5255 LFPDUX A5, AO, INC2 5256 fxcpmadd f1, B4, A6, f1 5257 nop 5258 fxcsmadd f5, B4, A6, f5 5259 LFPDUX A6, AO, INC2 5260 5261 fxcpmadd f2, B4, A7, f2 5262 nop 5263 fxcsmadd f6, B4, A7, f6 5264 LFPDUX A7, AO, INC2 5265 fxcpmadd f3, B4, A8, f3 5266 nop 5267 fxcsmadd f7, B4, A8, f7 5268 LFPDUX A8, AO, INC2 5269 bdnz+ .L1052 5270 .align 4 5271 5272.L1053: 5273 fxcpmadd f0, B1, A1, f0 5274 LFPDUX B4, BO, INC2 5275 fxcsmadd f4, B1, A1, f4 5276 LFPDUX A1, AO, INC2 5277 fxcpmadd f1, B1, A2, f1 5278 nop 5279 fxcsmadd f5, B1, A2, f5 5280 LFPDUX A2, AO, INC2 5281 5282 fxcpmadd f2, B1, A3, f2 5283 nop 5284 fxcsmadd f6, B1, A3, f6 5285 LFPDUX A3, AO, INC2 5286 fxcpmadd f3, B1, A4, f3 5287 nop 5288 fxcsmadd f7, B1, A4, f7 5289 LFPDUX A4, AO, INC2 5290 5291 fxcpmadd f0, B2, A5, f0 5292 nop 5293 fxcsmadd f4, B2, A5, f4 5294 LFPDUX A5, AO, INC2 5295 fxcpmadd f1, B2, A6, f1 5296 nop 5297 fxcsmadd f5, B2, A6, f5 5298 LFPDUX A6, AO, INC2 5299 5300 fxcpmadd f2, B2, A7, f2 5301 nop 5302 fxcsmadd f6, B2, A7, f6 5303 LFPDUX A7, AO, INC2 5304 fxcpmadd f3, B2, A8, f3 5305 nop 5306 fxcsmadd f7, B2, A8, f7 5307 LFPDUX A8, AO, INC2 5308 5309 fxcpmadd f0, B3, A1, f0 5310 fxcsmadd f4, B3, A1, f4 5311 fxcpmadd f1, B3, A2, f1 5312 fxcsmadd f5, B3, A2, f5 5313 5314 fxcpmadd f2, B3, A3, f2 5315 fxcsmadd f6, B3, A3, f6 5316 fxcpmadd f3, B3, A4, f3 5317 fxcsmadd f7, B3, A4, f7 5318 5319 fxcpmadd f0, B4, A5, f0 5320 fxcsmadd f4, B4, A5, f4 5321 fxcpmadd f1, B4, A6, f1 5322 fxcsmadd f5, B4, A6, f5 5323 5324 fxcpmadd f2, B4, A7, f2 5325 fxcsmadd f6, B4, A7, f6 5326 fxcpmadd f3, B4, A8, f3 5327 fxcsmadd f7, B4, A8, f7 5328 .align 4 5329 5330.L1054: 5331 lfd AP, ALPHA(SP) 5332#ifdef TRMMKERNEL 5333 fsmfp AP, AP 5334#endif 5335 5336#if defined(TRMMKERNEL) 5337#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 5338 sub TEMP, K, KK 5339#elif defined(LEFT) 5340 addi TEMP, KK, 8 5341#else 5342 addi TEMP, KK, 2 5343#endif 5344 andi. TEMP, TEMP, 3 5345 mtspr CTR, TEMP 5346#else 5347 andi. r0, K, 3 5348 mtspr CTR, r0 5349#endif 5350 ble+ .L1058 5351 5352 LFPDUX A1, AO, INC2 5353 LFPDUX B1, BO, INC2 5354 LFPDUX A2, AO, INC2 5355 LFPDUX A3, AO, INC2 5356 LFPDUX A4, AO, INC2 5357 bdz- .L1057 5358 .align 4 5359 5360.L1056: 5361 fxcpmadd f0, B1, A1, f0 5362 fxcsmadd f4, B1, A1, f4 5363 LFPDUX A1, AO, INC2 5364 fxcpmadd f1, B1, A2, f1 5365 fxcsmadd f5, B1, A2, f5 5366 LFPDUX A2, AO, INC2 5367 5368 fxcpmadd f2, B1, A3, f2 5369 fxcsmadd f6, B1, A3, f6 5370 LFPDUX A3, AO, INC2 5371 fxcpmadd f3, B1, A4, f3 5372 fxcsmadd f7, B1, A4, f7 5373 LFPDUX A4, AO, INC2 5374 LFPDUX B1, BO, INC2 5375 bdnz+ .L1056 5376 .align 4 5377 5378.L1057: 5379 fxcpmadd f0, B1, A1, f0 5380 fxcsmadd f4, B1, A1, f4 5381 fxcpmadd f1, B1, A2, f1 5382 fxcsmadd f5, B1, A2, f5 5383 5384 fxcpmadd f2, B1, A3, f2 5385 fxcsmadd f6, B1, A3, f6 5386 fxcpmadd f3, B1, A4, f3 5387 fxcsmadd f7, B1, A4, f7 5388 .align 4 5389 5390.L1058: 5391#ifndef TRMMKERNEL 5392 LFDUX A1, CO1, INC 5393 LFDUX B1, CO1, INC2 5394 LFDUX A3, CO1, INC2 5395 LFDUX A5, CO1, INC2 5396 5397 LFSDUX A1, CO1, INCM5 5398 LFSDUX B1, CO1, INC2 5399 LFSDUX A3, CO1, INC2 5400 LFSDUX A5, CO1, INC2 5401 5402 LFDUX B3, CO2, INC 5403 LFDUX A6, CO2, INC2 5404 LFDUX A7, CO2, INC2 5405 LFDUX B2, CO2, INC2 5406 5407 fxcpmadd f0, AP, f0, A1 5408 LFSDUX B3, CO2, INCM5 5409 LFSDUX A6, CO2, INC2 5410 fxcpmadd f1, AP, f1, B1 5411 LFSDUX A7, CO2, INC2 5412 LFSDUX B2, CO2, INC2 5413 5414 fxcpmadd f2, AP, f2, A3 5415 STFDUX f0, CO1, INCM7 5416 STFSDUX f0, CO1, INC 5417 5418 fxcpmadd f3, AP, f3, A5 5419 STFDUX f1, CO1, INC 5420 STFSDUX f1, CO1, INC 5421 5422 fxcpmadd f4, AP, f4, B3 5423 STFDUX f2, CO1, INC 5424 STFSDUX f2, CO1, INC 5425 5426 fxcpmadd f5, AP, f5, A6 5427 STFDUX f3, CO1, INC 5428 STFSDUX f3, CO1, INC 5429 5430 fxcpmadd f6, AP, f6, A7 5431 STFDUX f4, CO2, INCM7 5432 STFSDUX f4, CO2, INC 5433 5434 fxcpmadd f7, AP, f7, B2 5435 STFDUX f5, CO2, INC 5436 STFSDUX f5, CO2, INC 5437 5438 STFDUX f6, CO2, INC 5439 STFSDUX f6, CO2, INC 5440 5441 STFDUX f7, CO2, INC 5442 STFSDUX f7, CO2, INC 5443#else 5444 fpmul f0, AP, f0 5445 fpmul f1, AP, f1 5446 5447 fpmul f2, AP, f2 5448 STFDUX f0, CO1, INC 5449 STFSDUX f0, CO1, INC 5450 5451 fpmul f3, AP, f3 5452 STFDUX f1, CO1, INC 5453 STFSDUX f1, CO1, INC 5454 5455 fpmul f4, AP, f4 5456 STFDUX f2, CO1, INC 5457 STFSDUX f2, CO1, INC 5458 5459 fpmul f5, AP, f5 5460 STFDUX f3, CO1, INC 5461 STFSDUX f3, CO1, INC 5462 5463 fpmul f6, AP, f6 5464 STFDUX f4, CO2, INC 5465 STFSDUX f4, CO2, INC 5466 5467 fpmul f7, AP, f7 5468 STFDUX f5, CO2, INC 5469 STFSDUX f5, CO2, INC 5470 5471 STFDUX f6, CO2, INC 5472 STFSDUX f6, CO2, INC 5473 5474 STFDUX f7, CO2, INC 5475 STFSDUX f7, CO2, INC 5476#endif 5477 5478 5479#ifdef TRMMKERNEL 5480#if ( defined(LEFT) && defined(TRANSA)) || \ 5481 (!defined(LEFT) && !defined(TRANSA)) 5482 sub TEMP, K, KK 5483#ifdef LEFT 5484 addi TEMP, TEMP, -8 5485#else 5486 addi TEMP, TEMP, -2 5487#endif 5488 slwi r0, TEMP, 3 + BASE_SHIFT 5489 slwi TEMP, TEMP, 1 + BASE_SHIFT 5490 add AO, AO, r0 5491 add BO, BO, TEMP 5492#endif 5493 5494#ifdef LEFT 5495 addi KK, KK, 8 5496#endif 5497#endif 5498 5499 addic. I, I, -1 5500 li r0, FZERO 5501 5502 lfpsx f0, SP, r0 5503 bgt+ .L1051 5504 .align 4 5505 5506.L1060: 5507 andi. I, M, 4 5508 beq .L1070 5509 5510#if defined(TRMMKERNEL) 5511#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 5512 addi BO, B, - 2 * SIZE 5513 fpmr f1, f0 5514#else 5515 slwi TEMP, KK, 2 + BASE_SHIFT 5516 slwi r0, KK, 1 + BASE_SHIFT 5517 add AO, AO, TEMP 5518 add BO, B, r0 5519 5520 addi BO, BO, - 2 * SIZE 5521 fpmr f1, f0 5522#endif 5523 5524#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 5525 sub TEMP, K, KK 5526#elif defined(LEFT) 5527 addi TEMP, KK, 4 5528#else 5529 addi TEMP, KK, 2 5530#endif 5531 fpmr f2, f0 5532 srawi. r0, TEMP, 2 5533 mtspr CTR, r0 5534 fpmr f3, f0 5535 ble .L1064 5536#else 5537 srawi. r0, K, 2 5538 fpmr f1, f0 5539 addi BO, B, - 2 * SIZE 5540 fpmr f2, f0 5541 mtspr CTR, r0 5542 fpmr f3, f0 5543 ble .L1064 5544#endif 5545 5546 LFPDUX B1, BO, INC2 5547 LFPDUX A1, AO, INC2 5548 LFPDUX A2, AO, INC2 5549 LFPDUX B2, BO, INC2 5550 LFPDUX A3, AO, INC2 5551 LFPDUX A4, AO, INC2 5552 5553 LFPDUX B3, BO, INC2 5554 LFPDUX A5, AO, INC2 5555 LFPDUX A6, AO, INC2 5556 LFPDUX B4, BO, INC2 5557 LFPDUX A7, AO, INC2 5558 LFPDUX A8, AO, INC2 5559 bdz- .L1063 5560 .align 4 5561 5562.L1062: 5563 fxcpmadd f0, B1, A1, f0 5564 fxcsmadd f2, B1, A1, f2 5565 LFPDUX A1, AO, INC2 5566 fxcpmadd f1, B1, A2, f1 5567 fxcsmadd f3, B1, A2, f3 5568 LFPDUX A2, AO, INC2 5569 LFPDUX B1, BO, INC2 5570 5571 fxcpmadd f0, B2, A3, f0 5572 fxcsmadd f2, B2, A3, f2 5573 LFPDUX A3, AO, INC2 5574 fxcpmadd f1, B2, A4, f1 5575 fxcsmadd f3, B2, A4, f3 5576 LFPDUX A4, AO, INC2 5577 LFPDUX B2, BO, INC2 5578 5579 fxcpmadd f0, B3, A5, f0 5580 fxcsmadd f2, B3, A5, f2 5581 LFPDUX A5, AO, INC2 5582 fxcpmadd f1, B3, A6, f1 5583 fxcsmadd f3, B3, A6, f3 5584 LFPDUX A6, AO, INC2 5585 LFPDUX B3, BO, INC2 5586 5587 fxcpmadd f0, B4, A7, f0 5588 fxcsmadd f2, B4, A7, f2 5589 LFPDUX A7, AO, INC2 5590 fxcpmadd f1, B4, A8, f1 5591 fxcsmadd f3, B4, A8, f3 5592 LFPDUX A8, AO, INC2 5593 LFPDUX B4, BO, INC2 5594 bdnz+ .L1062 5595 .align 4 5596 5597.L1063: 5598 fxcpmadd f0, B1, A1, f0 5599 fxcsmadd f2, B1, A1, f2 5600 fxcpmadd f1, B1, A2, f1 5601 fxcsmadd f3, B1, A2, f3 5602 5603 fxcpmadd f0, B2, A3, f0 5604 fxcsmadd f2, B2, A3, f2 5605 fxcpmadd f1, B2, A4, f1 5606 fxcsmadd f3, B2, A4, f3 5607 5608 fxcpmadd f0, B3, A5, f0 5609 fxcsmadd f2, B3, A5, f2 5610 fxcpmadd f1, B3, A6, f1 5611 fxcsmadd f3, B3, A6, f3 5612 5613 fxcpmadd f0, B4, A7, f0 5614 fxcsmadd f2, B4, A7, f2 5615 fxcpmadd f1, B4, A8, f1 5616 fxcsmadd f3, B4, A8, f3 5617 .align 4 5618 5619.L1064: 5620 lfd AP, ALPHA(SP) 5621#ifdef TRMMKERNEL 5622 fsmfp AP, AP 5623#endif 5624 5625#if defined(TRMMKERNEL) 5626#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 5627 sub TEMP, K, KK 5628#elif defined(LEFT) 5629 addi TEMP, KK, 4 5630#else 5631 addi TEMP, KK, 2 5632#endif 5633 andi. TEMP, TEMP, 3 5634 mtspr CTR, TEMP 5635#else 5636 andi. r0, K, 3 5637 mtspr CTR, r0 5638#endif 5639 ble+ .L1068 5640 5641 LFPDUX A1, AO, INC2 5642 LFPDUX B1, BO, INC2 5643 LFPDUX A2, AO, INC2 5644 bdz- .L1067 5645 .align 4 5646 5647.L1066: 5648 fxcpmadd f0, B1, A1, f0 5649 fxcsmadd f2, B1, A1, f2 5650 LFPDUX A1, AO, INC2 5651 fxcpmadd f1, B1, A2, f1 5652 fxcsmadd f3, B1, A2, f3 5653 LFPDUX B1, BO, INC2 5654 LFPDUX A2, AO, INC2 5655 bdnz+ .L1066 5656 .align 4 5657 5658.L1067: 5659 fxcpmadd f0, B1, A1, f0 5660 fxcsmadd f2, B1, A1, f2 5661 fxcpmadd f1, B1, A2, f1 5662 fxcsmadd f3, B1, A2, f3 5663 .align 4 5664 5665.L1068: 5666#ifndef TRMMKERNEL 5667 LFDUX A1, CO1, INC 5668 LFDUX A2, CO1, INC2 5669 LFDUX A3, CO2, INC 5670 LFDUX A4, CO2, INC2 5671 5672 LFSDUX A1, CO1, INCM1 5673 LFSDUX A2, CO1, INC2 5674 LFSDUX A3, CO2, INCM1 5675 LFSDUX A4, CO2, INC2 5676 5677 fxcpmadd f0, AP, f0, A1 5678 fxcpmadd f1, AP, f1, A2 5679 fxcpmadd f2, AP, f2, A3 5680 STFDUX f0, CO1, INCM3 5681 STFSDUX f0, CO1, INC 5682 5683 fxcpmadd f3, AP, f3, A4 5684 STFDUX f1, CO1, INC 5685 STFSDUX f1, CO1, INC 5686 5687 STFDUX f2, CO2, INCM3 5688 STFSDUX f2, CO2, INC 5689 5690 STFDUX f3, CO2, INC 5691 STFSDUX f3, CO2, INC 5692#else 5693 fpmul f0, AP, f0 5694 fpmul f1, AP, f1 5695 fpmul f2, AP, f2 5696 STFDUX f0, CO1, INC 5697 STFSDUX f0, CO1, INC 5698 5699 fpmul f3, AP, f3 5700 STFDUX f1, CO1, INC 5701 STFSDUX f1, CO1, INC 5702 5703 STFDUX f2, CO2, INC 5704 STFSDUX f2, CO2, INC 5705 5706 STFDUX f3, CO2, INC 5707 STFSDUX f3, CO2, INC 5708#endif 5709 5710 5711#ifdef TRMMKERNEL 5712#if ( defined(LEFT) && defined(TRANSA)) || \ 5713 (!defined(LEFT) && !defined(TRANSA)) 5714 sub TEMP, K, KK 5715#ifdef LEFT 5716 addi TEMP, TEMP, -4 5717#else 5718 addi TEMP, TEMP, -2 5719#endif 5720 slwi r0, TEMP, 2 + BASE_SHIFT 5721 slwi TEMP, TEMP, 1 + BASE_SHIFT 5722 add AO, AO, r0 5723 add BO, BO, TEMP 5724#endif 5725 5726#ifdef LEFT 5727 addi KK, KK, 4 5728#endif 5729#endif 5730 5731 li r0, FZERO 5732 lfpsx f0, SP, r0 5733 .align 4 5734 5735.L1070: 5736 andi. I, M, 2 5737 beq .L1080 5738 5739#if defined(TRMMKERNEL) 5740#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 5741 addi BO, B, - 2 * SIZE 5742 fpmr f1, f0 5743#else 5744 slwi TEMP, KK, 1 + BASE_SHIFT 5745 slwi r0, KK, 1 + BASE_SHIFT 5746 add AO, AO, TEMP 5747 add BO, B, r0 5748 5749 addi BO, BO, - 2 * SIZE 5750 fpmr f1, f0 5751#endif 5752 5753#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 5754 sub TEMP, K, KK 5755#elif defined(LEFT) 5756 addi TEMP, KK, 2 5757#else 5758 addi TEMP, KK, 2 5759#endif 5760 srawi. r0, TEMP, 3 5761 fpmr f2, f0 5762 mtspr CTR, r0 5763 fpmr f3, f0 5764 ble .L1074 5765#else 5766 addi BO, B, - 2 * SIZE 5767 fpmr f1, f0 5768 5769 srawi. r0, K, 3 5770 fpmr f2, f0 5771 mtspr CTR, r0 5772 fpmr f3, f0 5773 ble .L1074 5774#endif 5775 5776 5777 LFPDUX A1, AO, INC2 5778 LFPDUX B1, BO, INC2 5779 LFPDUX A2, AO, INC2 5780 LFPDUX B2, BO, INC2 5781 LFPDUX A3, AO, INC2 5782 LFPDUX B3, BO, INC2 5783 LFPDUX A4, AO, INC2 5784 LFPDUX B4, BO, INC2 5785 5786 LFPDUX A5, AO, INC2 5787 LFPDUX B5, BO, INC2 5788 LFPDUX A6, AO, INC2 5789 LFPDUX B6, BO, INC2 5790 LFPDUX A7, AO, INC2 5791 LFPDUX A9, BO, INC2 5792 LFPDUX A8, AO, INC2 5793 LFPDUX A10, BO, INC2 5794 bdz- .L1073 5795 .align 4 5796 5797.L1072: 5798 fxcpmadd f0, B1, A1, f0 5799 fxcsmadd f1, B1, A1, f1 5800 LFPDUX A1, AO, INC2 5801 LFPDUX B1, BO, INC2 5802 fxcpmadd f2, B2, A2, f2 5803 fxcsmadd f3, B2, A2, f3 5804 LFPDUX A2, AO, INC2 5805 LFPDUX B2, BO, INC2 5806 5807 fxcpmadd f0, B3, A3, f0 5808 fxcsmadd f1, B3, A3, f1 5809 LFPDUX A3, AO, INC2 5810 LFPDUX B3, BO, INC2 5811 fxcpmadd f2, B4, A4, f2 5812 fxcsmadd f3, B4, A4, f3 5813 LFPDUX A4, AO, INC2 5814 LFPDUX B4, BO, INC2 5815 5816 fxcpmadd f0, B5, A5, f0 5817 fxcsmadd f1, B5, A5, f1 5818 LFPDUX A5, AO, INC2 5819 LFPDUX B5, BO, INC2 5820 fxcpmadd f2, B6, A6, f2 5821 fxcsmadd f3, B6, A6, f3 5822 LFPDUX A6, AO, INC2 5823 LFPDUX B6, BO, INC2 5824 5825 fxcpmadd f0, A9, A7, f0 5826 fxcsmadd f1, A9, A7, f1 5827 LFPDUX A7, AO, INC2 5828 LFPDUX A9, BO, INC2 5829 fxcpmadd f2, A10, A8, f2 5830 fxcsmadd f3, A10, A8, f3 5831 LFPDUX A8, AO, INC2 5832 LFPDUX A10, BO, INC2 5833 bdnz+ .L1072 5834 .align 4 5835 5836.L1073: 5837 fxcpmadd f0, B1, A1, f0 5838 fxcsmadd f1, B1, A1, f1 5839 fxcpmadd f2, B2, A2, f2 5840 fxcsmadd f3, B2, A2, f3 5841 5842 fxcpmadd f0, B3, A3, f0 5843 fxcsmadd f1, B3, A3, f1 5844 fxcpmadd f2, B4, A4, f2 5845 fxcsmadd f3, B4, A4, f3 5846 5847 fxcpmadd f0, B5, A5, f0 5848 fxcsmadd f1, B5, A5, f1 5849 fxcpmadd f2, B6, A6, f2 5850 fxcsmadd f3, B6, A6, f3 5851 5852 fxcpmadd f0, A9, A7, f0 5853 fxcsmadd f1, A9, A7, f1 5854 fxcpmadd f2, A10, A8, f2 5855 fxcsmadd f3, A10, A8, f3 5856 .align 4 5857 5858.L1074: 5859 lfd AP, ALPHA(SP) 5860#ifdef TRMMKERNEL 5861 fsmfp AP, AP 5862#endif 5863 5864#if defined(TRMMKERNEL) 5865#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 5866 sub TEMP, K, KK 5867#elif defined(LEFT) 5868 addi TEMP, KK, 2 5869#else 5870 addi TEMP, KK, 2 5871#endif 5872 andi. TEMP, TEMP, 7 5873 mtspr CTR, TEMP 5874#else 5875 andi. r0, K, 7 5876 mtspr CTR, r0 5877#endif 5878 ble+ .L1078 5879 5880 LFPDUX A1, AO, INC2 5881 LFPDUX B1, BO, INC2 5882 bdz- .L1077 5883 .align 4 5884 5885.L1076: 5886 fxcpmadd f0, B1, A1, f0 5887 fxcsmadd f1, B1, A1, f1 5888 LFPDUX A1, AO, INC2 5889 LFPDUX B1, BO, INC2 5890 bdnz+ .L1076 5891 .align 4 5892 5893.L1077: 5894 fxcpmadd f0, B1, A1, f0 5895 fxcsmadd f1, B1, A1, f1 5896 .align 4 5897 5898.L1078: 5899#ifndef TRMMKERNEL 5900 LFDUX A1, CO1, INC 5901 LFDUX B3, CO2, INC 5902 LFSDUX A1, CO1, INC 5903 LFSDUX B3, CO2, INC 5904 5905 fpadd f0, f0, f2 5906 fpadd f1, f1, f3 5907 5908 fxcpmadd f0, AP, f0, A1 5909 fxcpmadd f1, AP, f1, B3 5910 5911 STFDUX f0, CO1, INCM1 5912 STFSDUX f0, CO1, INC 5913 STFDUX f1, CO2, INCM1 5914 STFSDUX f1, CO2, INC 5915#else 5916 fpadd f0, f0, f2 5917 fpadd f1, f1, f3 5918 5919 fpmul f0, AP, f0 5920 fpmul f1, AP, f1 5921 5922 STFDUX f0, CO1, INC 5923 STFSDUX f0, CO1, INC 5924 STFDUX f1, CO2, INC 5925 STFSDUX f1, CO2, INC 5926#endif 5927 5928 5929#ifdef TRMMKERNEL 5930#if ( defined(LEFT) && defined(TRANSA)) || \ 5931 (!defined(LEFT) && !defined(TRANSA)) 5932 sub TEMP, K, KK 5933#ifdef LEFT 5934 addi TEMP, TEMP, -2 5935#else 5936 addi TEMP, TEMP, -2 5937#endif 5938 slwi r0, TEMP, 1 + BASE_SHIFT 5939 slwi TEMP, TEMP, 1 + BASE_SHIFT 5940 add AO, AO, r0 5941 add BO, BO, TEMP 5942#endif 5943 5944#ifdef LEFT 5945 addi KK, KK, 2 5946#endif 5947#endif 5948 5949 li r0, FZERO 5950 lfpsx f0, SP, r0 5951 .align 4 5952 5953.L1080: 5954 andi. I, M, 1 5955 beq .L1089 5956 5957#if defined(TRMMKERNEL) 5958#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 5959 addi BO, B, - 2 * SIZE 5960 fpmr f1, f0 5961 fpmr f2, f0 5962 fpmr f3, f0 5963#else 5964 slwi TEMP, KK, 0 + BASE_SHIFT 5965 slwi r0, KK, 1 + BASE_SHIFT 5966 add AO, AO, TEMP 5967 add BO, B, r0 5968 5969 addi BO, BO, - 2 * SIZE 5970 fpmr f1, f0 5971 fpmr f2, f0 5972 fpmr f3, f0 5973#endif 5974 5975#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 5976 sub TEMP, K, KK 5977#elif defined(LEFT) 5978 addi TEMP, KK, 1 5979#else 5980 addi TEMP, KK, 2 5981#endif 5982 srawi. r0, TEMP, 3 5983 mtspr CTR, r0 5984 ble .L1084 5985#else 5986 addi BO, B, - 2 * SIZE 5987 fpmr f1, f0 5988 fpmr f2, f0 5989 fpmr f3, f0 5990 5991 srawi. r0, K, 3 5992 mtspr CTR, r0 5993 ble .L1084 5994#endif 5995 5996 LFPDUX B1, BO, INC2 5997 LFPDUX A1, AO, INC2 5998 LFPDUX A2, AO, INC2 5999 6000 LFPDUX B2, BO, INC2 6001 LFPDUX A3, AO, INC2 6002 LFPDUX A4, AO, INC2 6003 6004 LFPDUX B3, BO, INC2 6005 LFPDUX B4, BO, INC2 6006 bdz- .L1083 6007 .align 4 6008 6009.L1082: 6010 fxcpmadd f0, A1, B1, f0 6011 LFPDUX B1, BO, INC2 6012 fxcsmadd f1, A1, B2, f1 6013 LFPDUX B2, BO, INC2 6014 LFPDUX A1, AO, INC2 6015 fxcpmadd f2, A2, B3, f2 6016 LFPDUX B3, BO, INC2 6017 fxcsmadd f3, A2, B4, f3 6018 LFPDUX B4, BO, INC2 6019 LFPDUX A2, AO, INC2 6020 6021 fxcpmadd f0, A3, B1, f0 6022 LFPDUX B1, BO, INC2 6023 fxcsmadd f1, A3, B2, f1 6024 LFPDUX B2, BO, INC2 6025 LFPDUX A3, AO, INC2 6026 fxcpmadd f2, A4, B3, f2 6027 LFPDUX B3, BO, INC2 6028 fxcsmadd f3, A4, B4, f3 6029 LFPDUX B4, BO, INC2 6030 LFPDUX A4, AO, INC2 6031 bdnz+ .L1082 6032 .align 4 6033 6034.L1083: 6035 fxcpmadd f0, A1, B1, f0 6036 LFPDUX B1, BO, INC2 6037 fxcsmadd f1, A1, B2, f1 6038 LFPDUX B2, BO, INC2 6039 fxcpmadd f2, A2, B3, f2 6040 LFPDUX B3, BO, INC2 6041 fxcsmadd f3, A2, B4, f3 6042 LFPDUX B4, BO, INC2 6043 6044 fxcpmadd f0, A3, B1, f0 6045 fxcsmadd f1, A3, B2, f1 6046 fxcpmadd f2, A4, B3, f2 6047 fxcsmadd f3, A4, B4, f3 6048 .align 4 6049 6050.L1084: 6051 lfd AP, ALPHA(SP) 6052#ifdef TRMMKERNEL 6053 fsmfp AP, AP 6054#endif 6055 6056#if defined(TRMMKERNEL) 6057#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 6058 sub TEMP, K, KK 6059#elif defined(LEFT) 6060 addi TEMP, KK, 1 6061#else 6062 addi TEMP, KK, 2 6063#endif 6064 andi. TEMP, TEMP, 7 6065 mtspr CTR, TEMP 6066#else 6067 andi. r0, K, 7 6068 mtspr CTR, r0 6069#endif 6070 ble+ .L1088 6071 6072 LFDX A1, AO, INC2 6073 LFPDUX B1, BO, INC2 6074 add AO, AO, INC 6075 bdz- .L1087 6076 .align 4 6077 6078.L1086: 6079 fxcpmadd f0, A1, B1, f0 6080 LFDX A1, AO, INC2 6081 LFPDUX B1, BO, INC2 6082 add AO, AO, INC 6083 bdnz+ .L1086 6084 .align 4 6085 6086.L1087: 6087 fxcpmadd f0, A1, B1, f0 6088 .align 4 6089 6090.L1088: 6091#ifndef TRMMKERNEL 6092 LFDX A1, CO1, INC 6093 LFDX A2, CO2, INC 6094 6095 fpadd f0, f0, f1 6096 fpadd f2, f2, f3 6097 fsmfp A1, A2 6098 fpadd f0, f0, f2 6099 fxcpmadd f0, AP, f0, A1 6100#else 6101 fpadd f0, f0, f1 6102 fpadd f2, f2, f3 6103 fsmfp A1, A2 6104 fpadd f0, f0, f2 6105 fpmul f0, AP, f0 6106#endif 6107 6108 STFDUX f0, CO1, INC 6109 STFSDUX f0, CO2, INC 6110 6111#ifdef TRMMKERNEL 6112#if ( defined(LEFT) && defined(TRANSA)) || \ 6113 (!defined(LEFT) && !defined(TRANSA)) 6114 sub TEMP, K, KK 6115#ifdef LEFT 6116 addi TEMP, TEMP, -1 6117#else 6118 addi TEMP, TEMP, -2 6119#endif 6120 slwi r0, TEMP, 0 + BASE_SHIFT 6121 slwi TEMP, TEMP, 1 + BASE_SHIFT 6122 add AO, AO, r0 6123 add BO, BO, TEMP 6124#endif 6125 6126#ifdef LEFT 6127 addi KK, KK, 1 6128#endif 6129#endif 6130 .align 4 6131 6132.L1089: 6133#if defined(TRMMKERNEL) && !defined(LEFT) 6134 addi KK, KK, 2 6135#endif 6136 6137 addi B, BO, 2 * SIZE 6138 .align 4 6139 6140.L1090: 6141 andi. J, N, 1 6142 beq .L10999 6143 6144#if defined(TRMMKERNEL) && defined(LEFT) 6145 mr KK, OFFSET 6146#endif 6147 6148 mr CO1, C 6149 addi AO, A, -2 * SIZE 6150 6151 li r0, FZERO 6152 lfpsx f0, SP, r0 6153 6154 srawi. I, M, 3 6155 ble .L10100 6156 .align 4 6157 6158.L1091: 6159#if defined(TRMMKERNEL) 6160#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 6161 addi BO, B, - 2 * SIZE 6162 fpmr f1, f0 6163#else 6164 slwi TEMP, KK, 3 + BASE_SHIFT 6165 slwi r0, KK, 0 + BASE_SHIFT 6166 add AO, AO, TEMP 6167 add BO, B, r0 6168 6169 addi BO, BO, - 2 * SIZE 6170 fpmr f1, f0 6171#endif 6172 6173#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 6174 sub TEMP, K, KK 6175#elif defined(LEFT) 6176 addi TEMP, KK, 8 6177#else 6178 addi TEMP, KK, 1 6179#endif 6180 fpmr f2, f0 6181 srawi. r0, TEMP, 2 6182 fpmr f3, f0 6183 mtspr CTR, r0 6184 ble .L1094 6185 6186#else 6187 srawi. r0, K, 2 6188 fpmr f1, f0 6189 addi BO, B, - 2 * SIZE 6190 fpmr f2, f0 6191 fpmr f3, f0 6192 mtspr CTR, r0 6193 ble .L1094 6194#endif 6195 6196 LFPDUX B1, BO, INC2 6197 LFPDUX A1, AO, INC2 6198 LFPDUX A2, AO, INC2 6199 LFPDUX A3, AO, INC2 6200 LFPDUX A4, AO, INC2 6201 LFPDUX B2, BO, INC2 6202 LFPDUX A5, AO, INC2 6203 LFPDUX A6, AO, INC2 6204 LFPDUX A7, AO, INC2 6205 LFPDUX A8, AO, INC2 6206 bdz- .L1093 6207 .align 4 6208 6209.L1092: 6210 fxcpmadd f0, B1, A1, f0 6211 LFPDUX A1, AO, INC2 6212 fxcpmadd f1, B1, A2, f1 6213 LFPDUX A2, AO, INC2 6214 fxcpmadd f2, B1, A3, f2 6215 LFPDUX A3, AO, INC2 6216 fxcpmadd f3, B1, A4, f3 6217 LFPDUX A4, AO, INC2 6218 6219 fxcsmadd f0, B1, A5, f0 6220 LFPDUX A5, AO, INC2 6221 fxcsmadd f1, B1, A6, f1 6222 LFPDUX A6, AO, INC2 6223 fxcsmadd f2, B1, A7, f2 6224 LFPDUX A7, AO, INC2 6225 fxcsmadd f3, B1, A8, f3 6226 LFPDUX A8, AO, INC2 6227 LFPDUX B1, BO, INC2 6228 6229 fxcpmadd f0, B2, A1, f0 6230 LFPDUX A1, AO, INC2 6231 fxcpmadd f1, B2, A2, f1 6232 LFPDUX A2, AO, INC2 6233 fxcpmadd f2, B2, A3, f2 6234 LFPDUX A3, AO, INC2 6235 fxcpmadd f3, B2, A4, f3 6236 LFPDUX A4, AO, INC2 6237 6238 fxcsmadd f0, B2, A5, f0 6239 LFPDUX A5, AO, INC2 6240 fxcsmadd f1, B2, A6, f1 6241 LFPDUX A6, AO, INC2 6242 fxcsmadd f2, B2, A7, f2 6243 LFPDUX A7, AO, INC2 6244 fxcsmadd f3, B2, A8, f3 6245 LFPDUX A8, AO, INC2 6246 LFPDUX B2, BO, INC2 6247 bdnz+ .L1092 6248 .align 4 6249 6250.L1093: 6251 fxcpmadd f0, B1, A1, f0 6252 LFPDUX A1, AO, INC2 6253 fxcpmadd f1, B1, A2, f1 6254 LFPDUX A2, AO, INC2 6255 fxcpmadd f2, B1, A3, f2 6256 LFPDUX A3, AO, INC2 6257 fxcpmadd f3, B1, A4, f3 6258 LFPDUX A4, AO, INC2 6259 6260 fxcsmadd f0, B1, A5, f0 6261 LFPDUX A5, AO, INC2 6262 fxcsmadd f1, B1, A6, f1 6263 LFPDUX A6, AO, INC2 6264 fxcsmadd f2, B1, A7, f2 6265 LFPDUX A7, AO, INC2 6266 fxcsmadd f3, B1, A8, f3 6267 LFPDUX A8, AO, INC2 6268 6269 fxcpmadd f0, B2, A1, f0 6270 fxcpmadd f1, B2, A2, f1 6271 fxcpmadd f2, B2, A3, f2 6272 fxcpmadd f3, B2, A4, f3 6273 6274 fxcsmadd f0, B2, A5, f0 6275 fxcsmadd f1, B2, A6, f1 6276 fxcsmadd f2, B2, A7, f2 6277 fxcsmadd f3, B2, A8, f3 6278 .align 4 6279 6280.L1094: 6281 lfd AP, ALPHA(SP) 6282#ifdef TRMMKERNEL 6283 fsmfp AP, AP 6284#endif 6285 6286#if defined(TRMMKERNEL) 6287#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 6288 sub TEMP, K, KK 6289#elif defined(LEFT) 6290 addi TEMP, KK, 8 6291#else 6292 addi TEMP, KK, 1 6293#endif 6294 andi. TEMP, TEMP, 3 6295 mtspr CTR, TEMP 6296#else 6297 andi. r0, K, 3 6298 mtspr CTR, r0 6299#endif 6300 ble+ .L1098 6301 6302 LFDX B1, BO, INC2 6303 LFPDUX A1, AO, INC2 6304 LFPDUX A2, AO, INC2 6305 LFPDUX A3, AO, INC2 6306 LFPDUX A4, AO, INC2 6307 add BO, BO, INC 6308 bdz- .L1097 6309 .align 4 6310 6311.L1096: 6312 fxcpmadd f0, B1, A1, f0 6313 LFPDUX A1, AO, INC2 6314 fxcpmadd f1, B1, A2, f1 6315 LFPDUX A2, AO, INC2 6316 fxcpmadd f2, B1, A3, f2 6317 LFPDUX A3, AO, INC2 6318 fxcpmadd f3, B1, A4, f3 6319 LFDX B1, BO, INC2 6320 LFPDUX A4, AO, INC2 6321 add BO, BO, INC 6322 bdnz+ .L1096 6323 .align 4 6324 6325.L1097: 6326 fxcpmadd f0, B1, A1, f0 6327 fxcpmadd f1, B1, A2, f1 6328 fxcpmadd f2, B1, A3, f2 6329 fxcpmadd f3, B1, A4, f3 6330 .align 4 6331 6332.L1098: 6333#ifndef TRMMKERNEL 6334 LFDUX A1, CO1, INC 6335 LFDUX B1, CO1, INC2 6336 LFDUX A3, CO1, INC2 6337 LFDUX A5, CO1, INC2 6338 6339 LFSDUX A1, CO1, INCM5 6340 LFSDUX B1, CO1, INC2 6341 LFSDUX A3, CO1, INC2 6342 LFSDUX A5, CO1, INC2 6343 6344 fxcpmadd f0, AP, f0, A1 6345 fxcpmadd f1, AP, f1, B1 6346 fxcpmadd f2, AP, f2, A3 6347 STFDUX f0, CO1, INCM7 6348 STFSDUX f0, CO1, INC 6349 6350 fxcpmadd f3, AP, f3, A5 6351#else 6352 fpmul f0, AP, f0 6353 fpmul f1, AP, f1 6354 fpmul f2, AP, f2 6355 STFDUX f0, CO1, INC 6356 STFSDUX f0, CO1, INC 6357 6358 fpmul f3, AP, f3 6359#endif 6360 6361 STFDUX f1, CO1, INC 6362 STFSDUX f1, CO1, INC 6363 6364 STFDUX f2, CO1, INC 6365 STFSDUX f2, CO1, INC 6366 6367 STFDUX f3, CO1, INC 6368 STFSDUX f3, CO1, INC 6369 6370#ifdef TRMMKERNEL 6371#if ( defined(LEFT) && defined(TRANSA)) || \ 6372 (!defined(LEFT) && !defined(TRANSA)) 6373 sub TEMP, K, KK 6374#ifdef LEFT 6375 addi TEMP, TEMP, -8 6376#else 6377 addi TEMP, TEMP, -1 6378#endif 6379 slwi r0, TEMP, 3 + BASE_SHIFT 6380 slwi TEMP, TEMP, 0 + BASE_SHIFT 6381 add AO, AO, r0 6382 add BO, BO, TEMP 6383#endif 6384 6385#ifdef LEFT 6386 addi KK, KK, 8 6387#endif 6388#endif 6389 6390 addic. I, I, -1 6391 li r0, FZERO 6392 6393 lfpsx f0, SP, r0 6394 bgt+ .L1091 6395 .align 4 6396 6397.L10100: 6398 andi. I, M, 4 6399 beq .L10110 6400 6401#if defined(TRMMKERNEL) 6402#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 6403 addi BO, B, - 2 * SIZE 6404 fpmr f1, f0 6405 fpmr f2, f0 6406 fpmr f3, f0 6407#else 6408 slwi TEMP, KK, 2 + BASE_SHIFT 6409 slwi r0, KK, 0 + BASE_SHIFT 6410 add AO, AO, TEMP 6411 add BO, B, r0 6412 6413 fpmr f1, f0 6414 addi BO, BO, - 2 * SIZE 6415 fpmr f2, f0 6416 fpmr f3, f0 6417#endif 6418 6419#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 6420 sub TEMP, K, KK 6421#elif defined(LEFT) 6422 addi TEMP, KK, 4 6423#else 6424 addi TEMP, KK, 1 6425#endif 6426 srawi. r0, TEMP, 3 6427 mtspr CTR, r0 6428 ble .L10104 6429#else 6430 addi BO, B, - 2 * SIZE 6431 fpmr f1, f0 6432 fpmr f2, f0 6433 fpmr f3, f0 6434 6435 srawi. r0, K, 3 6436 mtspr CTR, r0 6437 ble .L10104 6438#endif 6439 6440 LFPDUX B1, BO, INC2 6441 LFPDUX A1, AO, INC2 6442 LFPDUX A2, AO, INC2 6443 LFPDUX A3, AO, INC2 6444 LFPDUX A4, AO, INC2 6445 LFPDUX B2, BO, INC2 6446 LFPDUX A5, AO, INC2 6447 LFPDUX A6, AO, INC2 6448 LFPDUX A7, AO, INC2 6449 LFPDUX A8, AO, INC2 6450 LFPDUX B3, BO, INC2 6451 LFPDUX B4, BO, INC2 6452 6453 bdz- .L10103 6454 .align 4 6455 6456.L10102: 6457 fxcpmadd f0, B1, A1, f0 6458 LFPDUX A1, AO, INC2 6459 fxcpmadd f1, B1, A2, f1 6460 LFPDUX A2, AO, INC2 6461 fxcsmadd f2, B1, A3, f2 6462 LFPDUX A3, AO, INC2 6463 fxcsmadd f3, B1, A4, f3 6464 LFPDUX A4, AO, INC2 6465 LFPDUX B1, BO, INC2 6466 6467 fxcpmadd f0, B2, A5, f0 6468 LFPDUX A5, AO, INC2 6469 fxcpmadd f1, B2, A6, f1 6470 LFPDUX A6, AO, INC2 6471 fxcsmadd f2, B2, A7, f2 6472 LFPDUX A7, AO, INC2 6473 fxcsmadd f3, B2, A8, f3 6474 LFPDUX A8, AO, INC2 6475 LFPDUX B2, BO, INC2 6476 6477 fxcpmadd f0, B3, A1, f0 6478 LFPDUX A1, AO, INC2 6479 fxcpmadd f1, B3, A2, f1 6480 LFPDUX A2, AO, INC2 6481 fxcsmadd f2, B3, A3, f2 6482 LFPDUX A3, AO, INC2 6483 fxcsmadd f3, B3, A4, f3 6484 LFPDUX A4, AO, INC2 6485 LFPDUX B3, BO, INC2 6486 6487 fxcpmadd f0, B4, A5, f0 6488 LFPDUX A5, AO, INC2 6489 fxcpmadd f1, B4, A6, f1 6490 LFPDUX A6, AO, INC2 6491 fxcsmadd f2, B4, A7, f2 6492 LFPDUX A7, AO, INC2 6493 fxcsmadd f3, B4, A8, f3 6494 LFPDUX A8, AO, INC2 6495 LFPDUX B4, BO, INC2 6496 bdnz+ .L10102 6497 .align 4 6498 6499.L10103: 6500 fxcpmadd f0, B1, A1, f0 6501 LFPDUX A1, AO, INC2 6502 fxcpmadd f1, B1, A2, f1 6503 LFPDUX A2, AO, INC2 6504 fxcsmadd f2, B1, A3, f2 6505 LFPDUX A3, AO, INC2 6506 fxcsmadd f3, B1, A4, f3 6507 LFPDUX A4, AO, INC2 6508 6509 fxcpmadd f0, B2, A5, f0 6510 LFPDUX A5, AO, INC2 6511 fxcpmadd f1, B2, A6, f1 6512 LFPDUX A6, AO, INC2 6513 fxcsmadd f2, B2, A7, f2 6514 LFPDUX A7, AO, INC2 6515 fxcsmadd f3, B2, A8, f3 6516 LFPDUX A8, AO, INC2 6517 6518 fxcpmadd f0, B3, A1, f0 6519 fxcpmadd f1, B3, A2, f1 6520 fxcsmadd f2, B3, A3, f2 6521 fxcsmadd f3, B3, A4, f3 6522 6523 fxcpmadd f0, B4, A5, f0 6524 fxcpmadd f1, B4, A6, f1 6525 fxcsmadd f2, B4, A7, f2 6526 fxcsmadd f3, B4, A8, f3 6527 .align 4 6528 6529.L10104: 6530 lfd AP, ALPHA(SP) 6531#ifdef TRMMKERNEL 6532 fsmfp AP, AP 6533#endif 6534 6535#if defined(TRMMKERNEL) 6536#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 6537 sub TEMP, K, KK 6538#elif defined(LEFT) 6539 addi TEMP, KK, 4 6540#else 6541 addi TEMP, KK, 1 6542#endif 6543 andi. TEMP, TEMP, 7 6544 mtspr CTR, TEMP 6545#else 6546 andi. r0, K, 7 6547 mtspr CTR, r0 6548#endif 6549 ble+ .L10108 6550 6551 LFPDUX A1, AO, INC2 6552 LFDX B1, BO, INC2 6553 LFPDUX A2, AO, INC2 6554 add BO, BO, INC 6555 bdz- .L10107 6556 .align 4 6557 6558.L10106: 6559 fxcpmadd f0, B1, A1, f0 6560 LFPDUX A1, AO, INC2 6561 fxcpmadd f1, B1, A2, f1 6562 LFDX B1, BO, INC2 6563 LFPDUX A2, AO, INC2 6564 add BO, BO, INC 6565 bdnz+ .L10106 6566 .align 4 6567 6568.L10107: 6569 fxcpmadd f0, B1, A1, f0 6570 fxcpmadd f1, B1, A2, f1 6571 .align 4 6572 6573.L10108: 6574#ifndef TRMMKERNEL 6575 LFDUX A1, CO1, INC 6576 LFDUX B1, CO1, INC2 6577 LFSDUX A1, CO1, INCM1 6578 LFSDUX B1, CO1, INC2 6579 6580 fpadd f0, f0, f2 6581 fpadd f1, f1, f3 6582 6583 fxcpmadd f0, AP, f0, A1 6584 fxcpmadd f1, AP, f1, B1 6585 6586 STFDUX f0, CO1, INCM3 6587 STFSDUX f0, CO1, INC 6588#else 6589 fpadd f0, f0, f2 6590 fpadd f1, f1, f3 6591 6592 fpmul f0, AP, f0 6593 fpmul f1, AP, f1 6594 6595 STFDUX f0, CO1, INC 6596 STFSDUX f0, CO1, INC 6597#endif 6598 6599 STFDUX f1, CO1, INC 6600 STFSDUX f1, CO1, INC 6601 6602#ifdef TRMMKERNEL 6603#if ( defined(LEFT) && defined(TRANSA)) || \ 6604 (!defined(LEFT) && !defined(TRANSA)) 6605 sub TEMP, K, KK 6606#ifdef LEFT 6607 addi TEMP, TEMP, -4 6608#else 6609 addi TEMP, TEMP, -1 6610#endif 6611 slwi r0, TEMP, 2 + BASE_SHIFT 6612 slwi TEMP, TEMP, 0 + BASE_SHIFT 6613 add AO, AO, r0 6614 add BO, BO, TEMP 6615#endif 6616 6617#ifdef LEFT 6618 addi KK, KK, 4 6619#endif 6620#endif 6621 6622 li r0, FZERO 6623 lfpsx f0, SP, r0 6624 .align 4 6625 6626.L10110: 6627 andi. I, M, 2 6628 beq .L10120 6629 6630#if defined(TRMMKERNEL) 6631#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 6632 addi BO, B, - 2 * SIZE 6633 fpmr f1, f0 6634 fpmr f2, f0 6635 fpmr f3, f0 6636#else 6637 slwi TEMP, KK, 1 + BASE_SHIFT 6638 slwi r0, KK, 0 + BASE_SHIFT 6639 add AO, AO, TEMP 6640 add BO, B, r0 6641 6642 fpmr f1, f0 6643 addi BO, BO, - 2 * SIZE 6644 fpmr f2, f0 6645 fpmr f3, f0 6646#endif 6647 6648#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 6649 sub TEMP, K, KK 6650#elif defined(LEFT) 6651 addi TEMP, KK, 2 6652#else 6653 addi TEMP, KK, 1 6654#endif 6655 srawi. r0, TEMP, 3 6656 mtspr CTR, r0 6657 ble .L10114 6658#else 6659 addi BO, B, - 2 * SIZE 6660 fpmr f1, f0 6661 fpmr f2, f0 6662 fpmr f3, f0 6663 6664 srawi. r0, K, 3 6665 mtspr CTR, r0 6666 ble .L10114 6667#endif 6668 6669 LFPDUX A1, AO, INC2 6670 LFPDUX A2, AO, INC2 6671 LFPDUX B1, BO, INC2 6672 6673 LFPDUX A3, AO, INC2 6674 LFPDUX A4, AO, INC2 6675 LFPDUX B2, BO, INC2 6676 6677 LFPDUX A5, AO, INC2 6678 LFPDUX A6, AO, INC2 6679 LFPDUX B3, BO, INC2 6680 6681 LFPDUX A7, AO, INC2 6682 LFPDUX A8, AO, INC2 6683 LFPDUX B4, BO, INC2 6684 bdz- .L10113 6685 .align 4 6686 6687.L10112: 6688 fxcpmadd f0, B1, A1, f0 6689 LFPDUX A1, AO, INC2 6690 fxcsmadd f1, B1, A2, f1 6691 LFPDUX A2, AO, INC2 6692 LFPDUX B1, BO, INC2 6693 fxcpmadd f2, B2, A3, f2 6694 LFPDUX A3, AO, INC2 6695 fxcsmadd f3, B2, A4, f3 6696 LFPDUX A4, AO, INC2 6697 LFPDUX B2, BO, INC2 6698 fxcpmadd f0, B3, A5, f0 6699 LFPDUX A5, AO, INC2 6700 fxcsmadd f1, B3, A6, f1 6701 LFPDUX A6, AO, INC2 6702 LFPDUX B3, BO, INC2 6703 fxcpmadd f2, B4, A7, f2 6704 LFPDUX A7, AO, INC2 6705 fxcsmadd f3, B4, A8, f3 6706 LFPDUX A8, AO, INC2 6707 LFPDUX B4, BO, INC2 6708 bdnz+ .L10112 6709 .align 4 6710 6711.L10113: 6712 fxcpmadd f0, B1, A1, f0 6713 fxcsmadd f1, B1, A2, f1 6714 fxcpmadd f2, B2, A3, f2 6715 fxcsmadd f3, B2, A4, f3 6716 fxcpmadd f0, B3, A5, f0 6717 fxcsmadd f1, B3, A6, f1 6718 fxcpmadd f2, B4, A7, f2 6719 fxcsmadd f3, B4, A8, f3 6720 .align 4 6721 6722.L10114: 6723 lfd AP, ALPHA(SP) 6724#ifdef TRMMKERNEL 6725 fsmfp AP, AP 6726#endif 6727 6728#if defined(TRMMKERNEL) 6729#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 6730 sub TEMP, K, KK 6731#elif defined(LEFT) 6732 addi TEMP, KK, 2 6733#else 6734 addi TEMP, KK, 1 6735#endif 6736 andi. TEMP, TEMP, 7 6737 mtspr CTR, TEMP 6738#else 6739 andi. r0, K, 7 6740 mtspr CTR, r0 6741#endif 6742 ble+ .L10118 6743 6744 LFPDUX A1, AO, INC2 6745 LFDX B1, BO, INC2 6746 add BO, BO, INC 6747 bdz- .L10117 6748 .align 4 6749 6750.L10116: 6751 fxcpmadd f0, B1, A1, f0 6752 LFPDUX A1, AO, INC2 6753 LFDX B1, BO, INC2 6754 add BO, BO, INC 6755 bdnz+ .L10116 6756 .align 4 6757 6758.L10117: 6759 fxcpmadd f0, B1, A1, f0 6760 .align 4 6761 6762.L10118: 6763#ifndef TRMMKERNEL 6764 LFDUX A1, CO1, INC 6765 LFDUX A2, CO1, INC 6766 6767 fpadd f0, f0, f1 6768 fpadd f2, f3, f2 6769 fsmfp A1, A2 6770 fpadd f0, f0, f2 6771 fxcpmadd f1, AP, f0, A1 6772 6773 li r0, FZERO 6774 lfpsx f0, SP, r0 6775 6776 STFDUX f1, CO1, INCM1 6777 STFSDUX f1, CO1, INC 6778#else 6779 fpadd f0, f0, f1 6780 fpadd f2, f3, f2 6781 fsmfp A1, A2 6782 fpadd f0, f0, f2 6783 fpmul f1, AP, f0 6784 6785 li r0, FZERO 6786 lfpsx f0, SP, r0 6787 6788 STFDUX f1, CO1, INC 6789 STFSDUX f1, CO1, INC 6790#endif 6791 6792 6793#ifdef TRMMKERNEL 6794#if ( defined(LEFT) && defined(TRANSA)) || \ 6795 (!defined(LEFT) && !defined(TRANSA)) 6796 sub TEMP, K, KK 6797#ifdef LEFT 6798 addi TEMP, TEMP, -2 6799#else 6800 addi TEMP, TEMP, -1 6801#endif 6802 slwi r0, TEMP, 1 + BASE_SHIFT 6803 slwi TEMP, TEMP, 0 + BASE_SHIFT 6804 add AO, AO, r0 6805 add BO, BO, TEMP 6806#endif 6807 6808#ifdef LEFT 6809 addi KK, KK, 2 6810#endif 6811#endif 6812 .align 4 6813 6814.L10120: 6815 andi. I, M, 1 6816 beq .L10999 6817 6818#if defined(TRMMKERNEL) 6819#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 6820 addi BO, B, - 2 * SIZE 6821 fpmr f1, f0 6822 fpmr f2, f0 6823 fpmr f3, f0 6824#else 6825 slwi TEMP, KK, 0 + BASE_SHIFT 6826 slwi r0, KK, 0 + BASE_SHIFT 6827 add AO, AO, TEMP 6828 add BO, B, r0 6829 6830 fpmr f1, f0 6831 addi BO, BO, - 2 * SIZE 6832 fpmr f2, f0 6833 fpmr f3, f0 6834#endif 6835 6836#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 6837 sub TEMP, K, KK 6838#elif defined(LEFT) 6839 addi TEMP, KK, 1 6840#else 6841 addi TEMP, KK, 1 6842#endif 6843 srawi. r0, TEMP, 3 6844 mtspr CTR, r0 6845 ble .L10124 6846#else 6847 addi BO, B, - 2 * SIZE 6848 fpmr f1, f0 6849 fpmr f2, f0 6850 fpmr f3, f0 6851 6852 srawi. r0, K, 3 6853 mtspr CTR, r0 6854 ble .L10124 6855#endif 6856 6857 LFPDUX A1, AO, INC2 6858 LFPDUX B1, BO, INC2 6859 LFPDUX A2, AO, INC2 6860 LFPDUX B2, BO, INC2 6861 LFPDUX A3, AO, INC2 6862 LFPDUX B3, BO, INC2 6863 LFPDUX A4, AO, INC2 6864 LFPDUX B4, BO, INC2 6865 bdz- .L10123 6866 .align 4 6867 6868.L10122: 6869 fpmadd f0, A1, B1, f0 6870 LFPDUX A1, AO, INC2 6871 LFPDUX B1, BO, INC2 6872 fpmadd f1, A2, B2, f1 6873 LFPDUX A2, AO, INC2 6874 LFPDUX B2, BO, INC2 6875 fpmadd f2, A3, B3, f2 6876 LFPDUX A3, AO, INC2 6877 LFPDUX B3, BO, INC2 6878 fpmadd f3, A4, B4, f3 6879 LFPDUX A4, AO, INC2 6880 LFPDUX B4, BO, INC2 6881 bdnz+ .L10122 6882 .align 4 6883 6884.L10123: 6885 fpmadd f0, A1, B1, f0 6886 fpmadd f1, A2, B2, f1 6887 fpmadd f2, A3, B3, f2 6888 fpmadd f3, A4, B4, f3 6889 .align 4 6890 6891.L10124: 6892 lfd AP, ALPHA(SP) 6893#ifdef TRMMKERNEL 6894 fsmfp AP, AP 6895#endif 6896 6897#if defined(TRMMKERNEL) 6898#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) 6899 sub TEMP, K, KK 6900#elif defined(LEFT) 6901 addi TEMP, KK, 1 6902#else 6903 addi TEMP, KK, 1 6904#endif 6905 andi. TEMP, TEMP, 7 6906 mtspr CTR, TEMP 6907#else 6908 andi. r0, K, 7 6909 mtspr CTR, r0 6910#endif 6911 ble+ .L10128 6912 6913 LFDX A1, AO, INC2 6914 LFDX B1, BO, INC2 6915 add AO, AO, INC 6916 add BO, BO, INC 6917 bdz- .L10127 6918 .align 4 6919 6920.L10126: 6921 fmadd f0, A1, B1, f0 6922 LFDX A1, AO, INC2 6923 LFDX B1, BO, INC2 6924 add AO, AO, INC 6925 add BO, BO, INC 6926 bdnz+ .L10126 6927 .align 4 6928 6929.L10127: 6930 fmadd f0, A1, B1, f0 6931 .align 4 6932 6933.L10128: 6934#ifndef TRMMKERNEL 6935 LFDX A1, CO1, INC 6936 fpadd f0, f0, f1 6937 fpadd f2, f2, f3 6938 fpadd f0, f0, f2 6939 fsmtp f1, f0 6940 fadd f0, f0, f1 6941 fmadd f0, AP, f0, A1 6942 STFDUX f0, CO1, INC 6943#else 6944 fpadd f0, f0, f1 6945 fpadd f2, f2, f3 6946 fpadd f0, f0, f2 6947 fsmtp f1, f0 6948 fadd f0, f0, f1 6949 fmul f0, AP, f0 6950 STFDUX f0, CO1, INC 6951#endif 6952 .align 4 6953 6954.L10999: 6955 addi SP, SP, 12 6956 6957 lwzu r14, 4(SP) 6958 lwzu r15, 4(SP) 6959 6960 lwzu r16, 4(SP) 6961 lwzu r17, 4(SP) 6962 lwzu r18, 4(SP) 6963 lwzu r19, 4(SP) 6964 6965 lwzu r20, 4(SP) 6966 lwzu r21, 4(SP) 6967 lwzu r22, 4(SP) 6968 lwzu r23, 4(SP) 6969 6970 lwzu r24, 4(SP) 6971 lwzu r25, 4(SP) 6972 lwzu r26, 4(SP) 6973 lwzu r27, 4(SP) 6974 6975 lwzu r28, 4(SP) 6976 lwzu r29, 4(SP) 6977 lwzu r30, 4(SP) 6978 lwzu r31, 4(SP) 6979 6980 subi SP, SP, 12 6981 li r0, 16 6982 6983 lfpdux f31, SP, r0 6984 lfpdux f30, SP, r0 6985 lfpdux f29, SP, r0 6986 lfpdux f28, SP, r0 6987 lfpdux f27, SP, r0 6988 lfpdux f26, SP, r0 6989 lfpdux f25, SP, r0 6990 lfpdux f24, SP, r0 6991 lfpdux f23, SP, r0 6992 lfpdux f22, SP, r0 6993 lfpdux f21, SP, r0 6994 lfpdux f20, SP, r0 6995 lfpdux f19, SP, r0 6996 lfpdux f18, SP, r0 6997 lfpdux f17, SP, r0 6998 lfpdux f16, SP, r0 6999 lfpdux f15, SP, r0 7000 lfpdux f14, SP, r0 7001 addi SP, SP, 16 7002 blr 7003 7004 7005 EPILOGUE 7006#endif 7007