1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#undef ZERO 43 44#define ALPHA 0 45#define FZERO 16 46 47#define M r3 48#define N r4 49#define K r5 50 51#if defined(linux) || defined(__FreeBSD__) 52#define A r6 53#define B r7 54#define C r8 55#define LDC r9 56#define OFFSET r10 57#endif 58 59#define TEMP r11 60#define AORIG r12 61#define KK r14 62#define INCM1 r15 63#define INCM3 r16 64#define INCM5 r17 65#define INCM7 r18 66#define INC2 r19 67#define INC r20 68#define INC4 r21 69 70#define I r22 71#define J r23 72#define AO r24 73#define BO r25 74#define AO2 r26 75#define BO2 r27 76 77#define CO1 r28 78#define CO2 r29 79#define ZERO r31 80 81#ifndef NEEDPARAM 82 83#define A1 f16 84#define A2 f17 85#define A3 f18 86#define A4 f19 87#define A5 f20 88#define A6 f21 89#define A7 f22 90#define A8 f23 91#define A9 f24 92#define A10 f25 93 94#define B1 f26 95#define B2 f27 96#define B3 f28 97#define B4 f29 98#define B5 f30 99#define B6 f31 100 101#define AP B6 102 103#ifndef CONJ 104#define FXCPMADD fxcpmadd 105#define FXCSMADD fxcxnpma 106#else 107#if defined(LN) || defined(LT) 108#define FXCPMADD fxcpnsma 109#define FXCSMADD fxcxma 110#else 111#define FXCPMADD fxcpmadd 112#define FXCSMADD fxcxnsma 113#endif 114#endif 115 116#ifndef CONJ 117#define FXCXNPMA fxcxnpma 118#define FXCXNSMA fxcxnsma 119#else 120#define FXCXNPMA fxcxnsma 121#define FXCXNSMA fxcxnpma 122#endif 123 124 125 PROLOGUE 126 PROFCODE 127 128 li r0, -16 129 130 stfpdux f14, SP, r0 131 stfpdux f15, SP, r0 132 stfpdux f16, SP, r0 133 stfpdux f17, SP, r0 134 stfpdux f18, SP, r0 135 stfpdux f19, SP, r0 136 stfpdux f20, SP, r0 137 stfpdux f21, SP, r0 138 stfpdux f22, SP, r0 139 stfpdux f23, SP, r0 140 stfpdux f24, SP, r0 141 stfpdux f25, SP, r0 142 stfpdux f26, SP, r0 143 stfpdux f27, SP, r0 144 stfpdux f28, SP, r0 145 stfpdux f29, SP, r0 146 stfpdux f30, SP, r0 147 stfpdux f31, SP, r0 148 149 stwu r31, -4(SP) 150 stwu r30, -4(SP) 151 stwu r29, -4(SP) 152 stwu r28, -4(SP) 153 154 stwu r27, -4(SP) 155 stwu r26, -4(SP) 156 stwu r25, -4(SP) 157 stwu r24, -4(SP) 158 159 stwu r23, -4(SP) 160 stwu r22, -4(SP) 161 stwu r21, -4(SP) 162 stwu r20, -4(SP) 163 164 stwu r19, -4(SP) 165 stwu r18, -4(SP) 166 stwu r17, -4(SP) 167 stwu r16, -4(SP) 168 169 stwu r15, -4(SP) 170 stwu r14, -4(SP) 171 172 li r0, 0 173 stwu r0, -4(SP) 174 stwu r0, -4(SP) 175 176 stfdu f2, -8(SP) 177 stfdu f1, -8(SP) 178 179 slwi LDC, LDC, ZBASE_SHIFT 180 181 cmpwi cr0, M, 0 182 ble .L999 183 cmpwi cr0, N, 0 184 ble .L999 185 cmpwi cr0, K, 0 186 ble .L999 187 188 li INC, 1 * SIZE 189 li INC2, 2 * SIZE 190 li INC4, 4 * SIZE 191 li INCM1, -1 * SIZE 192 li INCM3, -3 * SIZE 193 li INCM5, -5 * SIZE 194 li INCM7, -7 * SIZE 195 196 addi C, C, - 1 * SIZE 197 198#ifdef LN 199 mullw r0, M, K 200 slwi r0, r0, ZBASE_SHIFT 201 add A, A, r0 202 203 slwi r0, M, ZBASE_SHIFT 204 add C, C, r0 205#endif 206 207#ifdef RN 208 neg KK, OFFSET 209#endif 210 211#ifdef RT 212 mullw r0, N, K 213 slwi r0, r0, ZBASE_SHIFT 214 add B, B, r0 215 216 mullw r0, N, LDC 217 add C, C, r0 218 219 sub KK, N, OFFSET 220#endif 221 222 srawi. J, N, 1 223 ble .L50 224 .align 4 225 226.L10: 227#ifdef RT 228 slwi r0, K, 1 + ZBASE_SHIFT 229 sub B, B, r0 230 231 slwi r0, LDC, 1 232 sub C, C, r0 233#endif 234 235 mr CO1, C 236 add CO2, C, LDC 237 238#ifdef LN 239 add KK, M, OFFSET 240#endif 241 242#ifdef LT 243 mr KK, OFFSET 244#endif 245 246#if defined(LN) || defined(RT) 247 addi AORIG, A, -4 * SIZE 248#else 249 addi AO, A, -4 * SIZE 250#endif 251#ifndef RT 252 add C, CO2, LDC 253#endif 254 255 li r0, FZERO 256 lfpsx f0, SP, r0 257 258 259 andi. I, M, 1 260 beq .L20 261 262#if defined(LT) || defined(RN) 263 addi AO2, AO, 2 * SIZE 264 fpmr f1, f0 265 addi BO, B, - 4 * SIZE 266 fpmr f2, f0 267 addi BO2, B, - 2 * SIZE 268 fpmr f3, f0 269 270 srawi. r0, KK, 2 271 mtspr CTR, r0 272 ble .L34 273#else 274#ifdef LN 275 slwi r0, K, 0 + ZBASE_SHIFT 276 sub AORIG, AORIG, r0 277#endif 278 279 slwi r0 , KK, 0 + ZBASE_SHIFT 280 slwi TEMP, KK, 1 + ZBASE_SHIFT 281 add AO, AORIG, r0 282 add BO, B, TEMP 283 284 sub TEMP, K, KK 285 286 addi AO2, AO, 2 * SIZE 287 fpmr f1, f0 288 addi BO, BO, - 4 * SIZE 289 fpmr f2, f0 290 addi BO2, BO, 2 * SIZE 291 fpmr f3, f0 292 293 srawi. r0, TEMP, 2 294 mtspr CTR, r0 295 ble .L34 296#endif 297 298 LFPDUX A1, AO, INC4 299 LFPDUX B1, BO, INC4 300 LFPDUX B2, BO2, INC4 301 LFPDUX A2, AO2, INC4 302 LFPDUX B3, BO, INC4 303 LFPDUX B4, BO2, INC4 304 305 LFPDUX A3, AO, INC4 306 LFPDUX A5, BO, INC4 307 LFPDUX A6, BO2, INC4 308 LFPDUX A4, AO2, INC4 309 LFPDUX A7, BO, INC4 310 LFPDUX A8, BO2, INC4 311 bdz- .L33 312 .align 4 313 314.L32: 315 FXCPMADD f0, B1, A1, f0 316 FXCSMADD f1, B1, A1, f1 317 LFPDUX B1, BO, INC4 318 FXCPMADD f2, B2, A1, f2 319 FXCSMADD f3, B2, A1, f3 320 LFPDUX B2, BO2, INC4 321 LFPDUX A1, AO, INC4 322 323 FXCPMADD f0, B3, A2, f0 324 FXCSMADD f1, B3, A2, f1 325 LFPDUX B3, BO, INC4 326 FXCPMADD f2, B4, A2, f2 327 FXCSMADD f3, B4, A2, f3 328 LFPDUX B4, BO2, INC4 329 LFPDUX A2, AO2, INC4 330 331 FXCPMADD f0, A5, A3, f0 332 FXCSMADD f1, A5, A3, f1 333 LFPDUX A5, BO, INC4 334 FXCPMADD f2, A6, A3, f2 335 FXCSMADD f3, A6, A3, f3 336 LFPDUX A6, BO2, INC4 337 LFPDUX A3, AO, INC4 338 339 FXCPMADD f0, A7, A4, f0 340 FXCSMADD f1, A7, A4, f1 341 LFPDUX A7, BO, INC4 342 FXCPMADD f2, A8, A4, f2 343 FXCSMADD f3, A8, A4, f3 344 LFPDUX A8, BO2, INC4 345 LFPDUX A4, AO2, INC4 346 bdnz+ .L32 347 .align 4 348 349.L33: 350 FXCPMADD f0, B1, A1, f0 351 FXCSMADD f1, B1, A1, f1 352 FXCPMADD f2, B2, A1, f2 353 FXCSMADD f3, B2, A1, f3 354 355 FXCPMADD f0, B3, A2, f0 356 FXCSMADD f1, B3, A2, f1 357 FXCPMADD f2, B4, A2, f2 358 FXCSMADD f3, B4, A2, f3 359 360 FXCPMADD f0, A5, A3, f0 361 FXCSMADD f1, A5, A3, f1 362 FXCPMADD f2, A6, A3, f2 363 FXCSMADD f3, A6, A3, f3 364 365 FXCPMADD f0, A7, A4, f0 366 FXCSMADD f1, A7, A4, f1 367 FXCPMADD f2, A8, A4, f2 368 FXCSMADD f3, A8, A4, f3 369 .align 4 370 371.L34: 372#if defined(LT) || defined(RN) 373 andi. r0, KK, 3 374 mtspr CTR, r0 375 ble+ .L38 376#else 377 andi. r0, TEMP, 3 378 mtspr CTR, r0 379 ble+ .L38 380#endif 381 382 LFPDX A1, AO, INC4 383 LFPDUX B1, BO, INC4 384 LFPDUX B2, BO2, INC4 385 add AO, AO, INC2 386 bdz- .L37 387 .align 4 388 389.L36: 390 FXCPMADD f0, B1, A1, f0 391 FXCSMADD f1, B1, A1, f1 392 LFPDUX B1, BO, INC4 393 FXCPMADD f2, B2, A1, f2 394 FXCSMADD f3, B2, A1, f3 395 LFPDX A1, AO, INC4 396 LFPDUX B2, BO2, INC4 397 add AO, AO, INC2 398 bdnz+ .L36 399 .align 4 400 401.L37: 402 FXCPMADD f0, B1, A1, f0 403 FXCSMADD f1, B1, A1, f1 404 FXCPMADD f2, B2, A1, f2 405 FXCSMADD f3, B2, A1, f3 406 .align 4 407 408.L38: 409 fpadd f0, f0, f1 410 fpadd f2, f2, f3 411 412#if defined(LN) || defined(RT) 413#ifdef LN 414 subi r0, KK, 1 415#else 416 subi r0, KK, 2 417#endif 418 slwi TEMP, r0, 0 + ZBASE_SHIFT 419 slwi r0, r0, 1 + ZBASE_SHIFT 420 add AO, AORIG, TEMP 421 add BO, B, r0 422 addi BO, BO, - 4 * SIZE 423#endif 424 425 addi AO2, AO, 2 * SIZE 426 addi BO2, BO, 2 * SIZE 427 428#if defined(LN) || defined(LT) 429 LFPDX f16, BO, INC4 430 LFPDX f17, BO2, INC4 431#else 432 LFPDX f16, AO, INC4 433 LFPDX f17, AO2, INC4 434#endif 435 436 fpsub f0, f16, f0 437 fpsub f2, f17, f2 438 439#ifdef LN 440 LFPDX A1, AO, INC4 441 442 fxpmul f4, A1, f0 443 fxpmul f5, A1, f2 444 FXCXNPMA f0, A1, f0, f4 445 FXCXNPMA f2, A1, f2, f5 446#endif 447 448#ifdef LT 449 LFPDX A1, AO, INC4 450 451 fxpmul f4, A1, f0 452 fxpmul f5, A1, f2 453 FXCXNPMA f0, A1, f0, f4 454 FXCXNPMA f2, A1, f2, f5 455#endif 456 457#ifdef RN 458 LFPDUX A1, BO, INC4 459 LFPDUX A2, BO2, INC4 460 add BO, BO, INC4 461 LFPDUX A3, BO2, INC4 462 463 subi BO, BO, 8 * SIZE 464 subi BO2, BO2, 8 * SIZE 465 466 fxpmul f4, A1, f0 467 FXCXNPMA f0, A1, f0, f4 468 469 fxcpnmsub f2, A2, f0, f2 470 FXCXNSMA f2, A2, f0, f2 471 472 fxpmul f4, A3, f2 473 FXCXNPMA f2, A3, f2, f4 474#endif 475 476#ifdef RT 477 LFPDUX A1, BO, INC4 478 add BO2, BO2, INC4 479 LFPDUX A2, BO, INC4 480 LFPDUX A3, BO2, INC4 481 482 subi BO, BO, 8 * SIZE 483 subi BO2, BO2, 8 * SIZE 484 485 fxpmul f4, A3, f2 486 FXCXNPMA f2, A3, f2, f4 487 488 fxcpnmsub f0, A2, f2, f0 489 FXCXNSMA f0, A2, f2, f0 490 491 fxpmul f4, A1, f0 492 FXCXNPMA f0, A1, f0, f4 493#endif 494 495#ifdef LN 496 subi CO1, CO1, 2 * SIZE 497 subi CO2, CO2, 2 * SIZE 498#endif 499 500#if defined(LN) || defined(LT) 501 STFPDX f0, BO, INC4 502 STFPDX f2, BO2, INC4 503#else 504 STFPDX f0, AO, INC4 505 STFPDX f2, AO2, INC4 506#endif 507 508 STFDUX f0, CO1, INC 509 STFSDUX f0, CO1, INC 510 STFDUX f2, CO2, INC 511 STFSDUX f2, CO2, INC 512 513#ifdef LN 514 subi CO1, CO1, 2 * SIZE 515 subi CO2, CO2, 2 * SIZE 516#endif 517 518#ifdef RT 519 slwi r0, K, 0 + ZBASE_SHIFT 520 add AORIG, AORIG, r0 521#endif 522 523#if defined(LT) || defined(RN) 524 sub TEMP, K, KK 525 slwi r0, TEMP, 0 + ZBASE_SHIFT 526 slwi TEMP, TEMP, 1 + ZBASE_SHIFT 527 add AO, AO, r0 528 add BO, BO, TEMP 529#endif 530 531#ifdef LT 532 addi KK, KK, 1 533#endif 534 535#ifdef LN 536 subi KK, KK, 1 537#endif 538 539 li r0, FZERO 540 lfpsx f0, SP, r0 541 .align 4 542 543.L20: 544 andi. I, M, 2 545 beq .L30 546 547#if defined(LT) || defined(RN) 548 addi AO2, AO, 2 * SIZE 549 fpmr f4, f0 550 addi BO, B, - 4 * SIZE 551 fpmr f8, f0 552 addi BO2, B, - 2 * SIZE 553 fpmr f12, f0 554 555 srawi. r0, KK, 2 556 fpmr f1, f0 557 fpmr f5, f0 558 fpmr f9, f0 559 mtspr CTR, r0 560 fpmr f13, f0 561 ble .L24 562#else 563#ifdef LN 564 slwi r0, K, 1 + ZBASE_SHIFT 565 sub AORIG, AORIG, r0 566#endif 567 568 slwi r0 , KK, 1 + ZBASE_SHIFT 569 add AO, AORIG, r0 570 add BO, B, r0 571 572 sub TEMP, K, KK 573 574 addi AO2, AO, 2 * SIZE 575 fpmr f4, f0 576 addi BO, BO, - 4 * SIZE 577 fpmr f8, f0 578 addi BO2, BO, 2 * SIZE 579 fpmr f12, f0 580 581 fpmr f1, f0 582 fpmr f5, f0 583 fpmr f9, f0 584 fpmr f13, f0 585 srawi. r0, TEMP, 2 586 mtspr CTR, r0 587 ble .L24 588#endif 589 590 LFPDUX A1, AO, INC4 591 LFPDUX B1, BO, INC4 592 LFPDUX A2, AO2, INC4 593 LFPDUX B2, BO2, INC4 594 LFPDUX A3, AO, INC4 595 LFPDUX B3, BO, INC4 596 LFPDUX A4, AO2, INC4 597 LFPDUX B4, BO2, INC4 598 599 LFPDUX A5, AO, INC4 600 LFPDUX B5, BO, INC4 601 LFPDUX A6, AO2, INC4 602 LFPDUX B6, BO2, INC4 603 LFPDUX A7, AO, INC4 604 LFPDUX A9, BO, INC4 605 LFPDUX A10, BO2, INC4 606 bdz- .L23 607 .align 4 608 609.L22: 610 FXCPMADD f0, B1, A1, f0 611 nop 612 FXCSMADD f4, B1, A1, f4 613 LFPDUX A8, AO2, INC4 614 FXCPMADD f8, B2, A1, f8 615 nop 616 FXCSMADD f12, B2, A1, f12 617 LFPDUX A1, AO, INC4 618 619 FXCPMADD f1, B1, A2, f1 620 nop 621 FXCSMADD f5, B1, A2, f5 622 LFPDUX B1, BO, INC4 623 FXCPMADD f9, B2, A2, f9 624 nop 625 FXCSMADD f13, B2, A2, f13 626 LFPDUX B2, BO2, INC4 627 628 FXCPMADD f0, B3, A3, f0 629 nop 630 FXCSMADD f4, B3, A3, f4 631 LFPDUX A2, AO2, INC4 632 FXCPMADD f8, B4, A3, f8 633 nop 634 FXCSMADD f12, B4, A3, f12 635 LFPDUX A3, AO, INC4 636 637 FXCPMADD f1, B3, A4, f1 638 nop 639 FXCSMADD f5, B3, A4, f5 640 LFPDUX B3, BO, INC4 641 FXCPMADD f9, B4, A4, f9 642 nop 643 FXCSMADD f13, B4, A4, f13 644 LFPDUX B4, BO2, INC4 645 646 FXCPMADD f0, B5, A5, f0 647 nop 648 FXCSMADD f4, B5, A5, f4 649 LFPDUX A4, AO2, INC4 650 FXCPMADD f8, B6, A5, f8 651 nop 652 FXCSMADD f12, B6, A5, f12 653 LFPDUX A5, AO, INC4 654 655 FXCPMADD f1, B5, A6, f1 656 nop 657 FXCSMADD f5, B5, A6, f5 658 LFPDUX B5, BO, INC4 659 FXCPMADD f9, B6, A6, f9 660 nop 661 FXCSMADD f13, B6, A6, f13 662 LFPDUX B6, BO2, INC4 663 664 FXCPMADD f0, A9, A7, f0 665 nop 666 FXCSMADD f4, A9, A7, f4 667 LFPDUX A6, AO2, INC4 668 FXCPMADD f8, A10, A7, f8 669 nop 670 FXCSMADD f12, A10, A7, f12 671 LFPDUX A7, AO, INC4 672 673 FXCPMADD f1, A9, A8, f1 674 nop 675 FXCSMADD f5, A9, A8, f5 676 LFPDUX A9, BO, INC4 677 FXCPMADD f9, A10, A8, f9 678 nop 679 FXCSMADD f13, A10, A8, f13 680 LFPDUX A10, BO2, INC4 681 bdnz+ .L22 682 .align 4 683 684.L23: 685 FXCPMADD f0, B1, A1, f0 686 FXCSMADD f4, B1, A1, f4 687 LFPDUX A8, AO2, INC4 688 FXCPMADD f8, B2, A1, f8 689 FXCSMADD f12, B2, A1, f12 690 691 FXCPMADD f1, B1, A2, f1 692 FXCSMADD f5, B1, A2, f5 693 FXCPMADD f9, B2, A2, f9 694 FXCSMADD f13, B2, A2, f13 695 696 FXCPMADD f0, B3, A3, f0 697 FXCSMADD f4, B3, A3, f4 698 FXCPMADD f8, B4, A3, f8 699 FXCSMADD f12, B4, A3, f12 700 701 FXCPMADD f1, B3, A4, f1 702 FXCSMADD f5, B3, A4, f5 703 FXCPMADD f9, B4, A4, f9 704 FXCSMADD f13, B4, A4, f13 705 706 FXCPMADD f0, B5, A5, f0 707 FXCSMADD f4, B5, A5, f4 708 FXCPMADD f8, B6, A5, f8 709 FXCSMADD f12, B6, A5, f12 710 711 FXCPMADD f1, B5, A6, f1 712 FXCSMADD f5, B5, A6, f5 713 FXCPMADD f9, B6, A6, f9 714 FXCSMADD f13, B6, A6, f13 715 716 FXCPMADD f0, A9, A7, f0 717 FXCSMADD f4, A9, A7, f4 718 FXCPMADD f8, A10, A7, f8 719 FXCSMADD f12, A10, A7, f12 720 721 FXCPMADD f1, A9, A8, f1 722 FXCSMADD f5, A9, A8, f5 723 FXCPMADD f9, A10, A8, f9 724 FXCSMADD f13, A10, A8, f13 725 .align 4 726 727.L24: 728#if defined(LT) || defined(RN) 729 andi. r0, KK, 3 730 mtspr CTR, r0 731 ble+ .L28 732#else 733 andi. r0, TEMP, 3 734 mtspr CTR, r0 735 ble+ .L28 736#endif 737 738 LFPDUX A1, AO, INC4 739 LFPDUX A2, AO2, INC4 740 LFPDUX B1, BO, INC4 741 LFPDUX B2, BO2, INC4 742 bdz- .L27 743 .align 4 744 745.L26: 746 FXCPMADD f0, B1, A1, f0 747 FXCSMADD f4, B1, A1, f4 748 FXCPMADD f8, B2, A1, f8 749 FXCSMADD f12, B2, A1, f12 750 LFPDUX A1, AO, INC4 751 752 FXCPMADD f1, B1, A2, f1 753 FXCSMADD f5, B1, A2, f5 754 LFPDUX B1, BO, INC4 755 FXCPMADD f9, B2, A2, f9 756 FXCSMADD f13, B2, A2, f13 757 LFPDUX A2, AO2, INC4 758 LFPDUX B2, BO2, INC4 759 bdnz+ .L26 760 .align 4 761 762.L27: 763 FXCPMADD f0, B1, A1, f0 764 FXCSMADD f4, B1, A1, f4 765 FXCPMADD f8, B2, A1, f8 766 FXCSMADD f12, B2, A1, f12 767 768 FXCPMADD f1, B1, A2, f1 769 FXCSMADD f5, B1, A2, f5 770 FXCPMADD f9, B2, A2, f9 771 FXCSMADD f13, B2, A2, f13 772 .align 4 773 774.L28: 775 fpadd f0, f0, f4 776 fpadd f8, f8, f12 777 fpadd f1, f1, f5 778 fpadd f9, f9, f13 779 780#if defined(LN) || defined(RT) 781#ifdef LN 782 subi r0, KK, 2 783#else 784 subi r0, KK, 2 785#endif 786 slwi r0, r0, 1 + ZBASE_SHIFT 787 add AO, AORIG, r0 788 add BO, B, r0 789 addi AO2, AO, 2 * SIZE 790 addi BO, BO, - 4 * SIZE 791 addi BO2, BO, 2 * SIZE 792#endif 793 794#if defined(LN) || defined(LT) 795 LFPDUX f16, BO, INC4 796 LFPDUX f18, BO2, INC4 797 LFPDUX f17, BO, INC4 798 LFPDUX f19, BO2, INC4 799 800 subi BO, BO, 8 * SIZE 801 subi BO2, BO2, 8 * SIZE 802#else 803 LFPDUX f16, AO, INC4 804 LFPDUX f17, AO2, INC4 805 LFPDUX f18, AO, INC4 806 LFPDUX f19, AO2, INC4 807 808 subi AO, AO, 8 * SIZE 809 subi AO2, AO2, 8 * SIZE 810#endif 811 812 fpsub f0, f16, f0 813 fpsub f1, f17, f1 814 fpsub f8, f18, f8 815 fpsub f9, f19, f9 816 817#ifdef LN 818 LFPDUX A1, AO, INC4 819 add AO2, AO2, INC4 820 LFPDUX A2, AO, INC4 821 LFPDUX A3, AO2, INC4 822 823 subi AO, AO, 8 * SIZE 824 subi AO2, AO2, 8 * SIZE 825 826 fxpmul f4, A3, f1 827 fxpmul f5, A3, f9 828 FXCXNPMA f1, A3, f1, f4 829 FXCXNPMA f9, A3, f9, f5 830 831 fxcpnmsub f0, A2, f1, f0 832 fxcpnmsub f8, A2, f9, f8 833 FXCXNSMA f0, A2, f1, f0 834 FXCXNSMA f8, A2, f9, f8 835 836 fxpmul f4, A1, f0 837 fxpmul f5, A1, f8 838 FXCXNPMA f0, A1, f0, f4 839 FXCXNPMA f8, A1, f8, f5 840#endif 841 842#ifdef LT 843 LFPDUX A1, AO, INC4 844 LFPDUX A2, AO2, INC4 845 add AO, AO, INC4 846 LFPDUX A3, AO2, INC4 847 848 subi AO, AO, 8 * SIZE 849 subi AO2, AO2, 8 * SIZE 850 851 fxpmul f4, A1, f0 852 fxpmul f5, A1, f8 853 FXCXNPMA f0, A1, f0, f4 854 FXCXNPMA f8, A1, f8, f5 855 856 fxcpnmsub f1, A2, f0, f1 857 fxcpnmsub f9, A2, f8, f9 858 FXCXNSMA f1, A2, f0, f1 859 FXCXNSMA f9, A2, f8, f9 860 861 fxpmul f6, A3, f1 862 fxpmul f7, A3, f9 863 FXCXNPMA f1, A3, f1, f6 864 FXCXNPMA f9, A3, f9, f7 865#endif 866 867#ifdef RN 868 LFPDUX A1, BO, INC4 869 LFPDUX A2, BO2, INC4 870 add BO, BO, INC4 871 LFPDUX A3, BO2, INC4 872 873 subi BO, BO, 8 * SIZE 874 subi BO2, BO2, 8 * SIZE 875 876 fxpmul f4, A1, f0 877 fxpmul f5, A1, f1 878 879 FXCXNPMA f0, A1, f0, f4 880 FXCXNPMA f1, A1, f1, f5 881 882 fxcpnmsub f8, A2, f0, f8 883 fxcpnmsub f9, A2, f1, f9 884 885 FXCXNSMA f8, A2, f0, f8 886 FXCXNSMA f9, A2, f1, f9 887 888 fxpmul f4, A3, f8 889 fxpmul f5, A3, f9 890 891 FXCXNPMA f8, A3, f8, f4 892 FXCXNPMA f9, A3, f9, f5 893#endif 894 895#ifdef RT 896 LFPDUX A1, BO, INC4 897 add BO2, BO2, INC4 898 LFPDUX A2, BO, INC4 899 LFPDUX A3, BO2, INC4 900 901 subi BO, BO, 8 * SIZE 902 subi BO2, BO2, 8 * SIZE 903 904 fxpmul f4, A3, f8 905 fxpmul f5, A3, f9 906 907 FXCXNPMA f8, A3, f8, f4 908 FXCXNPMA f9, A3, f9, f5 909 910 fxcpnmsub f0, A2, f8, f0 911 fxcpnmsub f1, A2, f9, f1 912 913 FXCXNSMA f0, A2, f8, f0 914 FXCXNSMA f1, A2, f9, f1 915 916 fxpmul f4, A1, f0 917 fxpmul f5, A1, f1 918 919 FXCXNPMA f0, A1, f0, f4 920 FXCXNPMA f1, A1, f1, f5 921#endif 922 923#ifdef LN 924 subi CO1, CO1, 4 * SIZE 925 subi CO2, CO2, 4 * SIZE 926#endif 927 928#if defined(LN) || defined(LT) 929 STFPDUX f0, BO, INC4 930 STFPDUX f8, BO2, INC4 931 STFPDUX f1, BO, INC4 932 STFPDUX f9, BO2, INC4 933 934 subi BO, BO, 8 * SIZE 935 subi BO2, BO2, 8 * SIZE 936#else 937 STFPDUX f0, AO, INC4 938 STFPDUX f1, AO2, INC4 939 STFPDUX f8, AO, INC4 940 STFPDUX f9, AO2, INC4 941 942 subi AO, AO, 8 * SIZE 943 subi AO2, AO2, 8 * SIZE 944#endif 945 946 STFDUX f0, CO1, INC 947 STFSDUX f0, CO1, INC 948 STFDUX f1, CO1, INC 949 STFSDUX f1, CO1, INC 950 951 STFDUX f8, CO2, INC 952 STFSDUX f8, CO2, INC 953 STFDUX f9, CO2, INC 954 STFSDUX f9, CO2, INC 955 956#ifdef LN 957 subi CO1, CO1, 4 * SIZE 958 subi CO2, CO2, 4 * SIZE 959#endif 960 961#ifdef RT 962 slwi r0, K, 1 + ZBASE_SHIFT 963 add AORIG, AORIG, r0 964#endif 965 966#if defined(LT) || defined(RN) 967 sub TEMP, K, KK 968 slwi r0, TEMP, 1 + ZBASE_SHIFT 969 add AO, AO, r0 970 add BO, BO, r0 971#endif 972 973#ifdef LT 974 addi KK, KK, 2 975#endif 976 977#ifdef LN 978 subi KK, KK, 2 979#endif 980 981 li r0, FZERO 982 lfpsx f0, SP, r0 983 .align 4 984 985.L30: 986 srawi. I, M, 2 987 ble .L49 988 .align 4 989 990.L11: 991#if defined(LT) || defined(RN) 992 993 addi AO2, AO, 2 * SIZE 994 fpmr f4, f0 995 addi BO, B, - 4 * SIZE 996 fpmr f8, f0 997 addi BO2, B, - 2 * SIZE 998 fpmr f12, f0 999 1000 fpmr f5, f0 1001 fpmr f9, f0 1002 fpmr f13, f0 1003 fpmr f2, f0 1004 1005 fpmr f6, f0 1006 fpmr f10, f0 1007 fpmr f14, f0 1008 fpmr f3, f0 1009 1010 fpmr f7, f0 1011 fpmr f11, f0 1012 fpmr f15, f0 1013 1014 srawi. r0, KK, 2 1015 fpmr f1, f0 1016 mtspr CTR, r0 1017 ble .L14 1018#else 1019 1020#ifdef LN 1021 slwi r0, K, 2 + ZBASE_SHIFT 1022 sub AORIG, AORIG, r0 1023#endif 1024 1025 slwi r0 , KK, 2 + ZBASE_SHIFT 1026 slwi TEMP, KK, 1 + ZBASE_SHIFT 1027 add AO, AORIG, r0 1028 add BO, B, TEMP 1029 1030 sub TEMP, K, KK 1031 1032 fpmr f5, f0 1033 fpmr f9, f0 1034 fpmr f13, f0 1035 fpmr f2, f0 1036 1037 fpmr f6, f0 1038 fpmr f10, f0 1039 fpmr f14, f0 1040 fpmr f3, f0 1041 1042 fpmr f7, f0 1043 fpmr f11, f0 1044 fpmr f15, f0 1045 1046 addi AO2, AO, 2 * SIZE 1047 fpmr f4, f0 1048 addi BO, BO, - 4 * SIZE 1049 fpmr f8, f0 1050 addi BO2, BO, 2 * SIZE 1051 fpmr f12, f0 1052 1053 srawi. r0, TEMP, 2 1054 fpmr f1, f0 1055 mtspr CTR, r0 1056 ble .L14 1057#endif 1058 1059 LFPDUX A1, AO, INC4 1060 fpmr f5, f0 1061 LFPDUX A3, AO, INC4 1062 fpmr f9, f0 1063 LFPDUX B1, BO, INC4 1064 fpmr f13, f0 1065 1066 LFPDUX A5, AO, INC4 1067 fpmr f2, f0 1068 LFPDUX A6, AO, INC4 1069 fpmr f6, f0 1070 LFPDUX B3, BO, INC4 1071 fpmr f10, f0 1072 LFPDUX A7, AO, INC4 1073 fpmr f14, f0 1074 1075 LFPDUX A8, AO, INC4 1076 fpmr f3, f0 1077 LFPDUX B5, BO, INC4 1078 fpmr f7, f0 1079 LFPDUX A9, AO, INC4 1080 fpmr f11, f0 1081 LFPDUX A2, AO2, INC4 1082 fpmr f15, f0 1083 LFPDUX B2, BO2, INC4 1084 bdz- .L13 1085 .align 4 1086 1087.L12: 1088 1089## 1 ## 1090 FXCPMADD f0, B1, A1, f0 1091 nop 1092 FXCSMADD f4, B1, A1, f4 1093 nop 1094 FXCPMADD f8, B2, A1, f8 1095 LFPDUX B4, BO2, INC4 1096 FXCSMADD f12, B2, A1, f12 1097 LFPDUX B6, BO, INC4 1098 1099 FXCPMADD f1, B1, A2, f1 1100 nop 1101 FXCSMADD f5, B1, A2, f5 1102 LFPDUX A4, AO2, INC4 1103 FXCPMADD f9, B2, A2, f9 1104 LFPDUX A10, AO, INC4 1105 FXCSMADD f13, B2, A2, f13 1106 nop 1107 1108 FXCPMADD f2, B1, A3, f2 1109 nop 1110 FXCSMADD f6, B1, A3, f6 1111 nop 1112 FXCPMADD f10, B2, A3, f10 1113 nop 1114 FXCSMADD f14, B2, A3, f14 1115 nop 1116 1117 FXCPMADD f3, B1, A4, f3 1118 nop 1119 FXCSMADD f7, B1, A4, f7 1120 LFPDUX A2, AO2, INC4 1121 FXCPMADD f11, B2, A4, f11 1122 LFPDUX A1, AO, INC4 1123 FXCSMADD f15, B2, A4, f15 1124 nop 1125 1126## 2 ## 1127 1128 FXCPMADD f0, B3, A5, f0 1129 nop 1130 FXCSMADD f4, B3, A5, f4 1131 nop 1132 FXCPMADD f8, B4, A5, f8 1133 LFPDUX B2, BO2, INC4 1134 FXCSMADD f12, B4, A5, f12 1135 LFPDUX B1, BO, INC4 1136 1137 FXCPMADD f1, B3, A2, f1 1138 nop 1139 FXCSMADD f5, B3, A2, f5 1140 LFPDUX A4, AO2, INC4 1141 FXCPMADD f9, B4, A2, f9 1142 LFPDUX A3, AO, INC4 1143 FXCSMADD f13, B4, A2, f13 1144 nop 1145 1146 FXCPMADD f2, B3, A6, f2 1147 nop 1148 FXCSMADD f6, B3, A6, f6 1149 nop 1150 FXCPMADD f10, B4, A6, f10 1151 nop 1152 FXCSMADD f14, B4, A6, f14 1153 nop 1154 1155 FXCPMADD f3, B3, A4, f3 1156 nop 1157 FXCSMADD f7, B3, A4, f7 1158 LFPDUX A2, AO2, INC4 1159 FXCPMADD f11, B4, A4, f11 1160 LFPDUX A5, AO, INC4 1161 FXCSMADD f15, B4, A4, f15 1162 nop 1163 1164## 3 ## 1165 1166 FXCPMADD f0, B5, A7, f0 1167 nop 1168 FXCSMADD f4, B5, A7, f4 1169 nop 1170 FXCPMADD f8, B2, A7, f8 1171 LFPDUX B4, BO2, INC4 1172 FXCSMADD f12, B2, A7, f12 1173 LFPDUX B3, BO, INC4 1174 1175 FXCPMADD f1, B5, A2, f1 1176 nop 1177 FXCSMADD f5, B5, A2, f5 1178 LFPDUX A4, AO2, INC4 1179 FXCPMADD f9, B2, A2, f9 1180 LFPDUX A6, AO, INC4 1181 FXCSMADD f13, B2, A2, f13 1182 nop 1183 1184 FXCPMADD f2, B5, A8, f2 1185 nop 1186 FXCSMADD f6, B5, A8, f6 1187 nop 1188 FXCPMADD f10, B2, A8, f10 1189 nop 1190 FXCSMADD f14, B2, A8, f14 1191 nop 1192 1193 FXCPMADD f3, B5, A4, f3 1194 nop 1195 FXCSMADD f7, B5, A4, f7 1196 LFPDUX A2, AO2, INC4 1197 FXCPMADD f11, B2, A4, f11 1198 LFPDUX A7, AO, INC4 1199 FXCSMADD f15, B2, A4, f15 1200 nop 1201 1202## 4 ## 1203 FXCPMADD f0, B6, A9, f0 1204 nop 1205 FXCSMADD f4, B6, A9, f4 1206 nop 1207 FXCPMADD f8, B4, A9, f8 1208 LFPDUX B2, BO2, INC4 1209 FXCSMADD f12, B4, A9, f12 1210 LFPDUX B5, BO, INC4 1211 1212 FXCPMADD f1, B6, A2, f1 1213 nop 1214 FXCSMADD f5, B6, A2, f5 1215 LFPDUX A4, AO2, INC4 1216 FXCPMADD f9, B4, A2, f9 1217 LFPDUX A8, AO, INC4 1218 FXCSMADD f13, B4, A2, f13 1219 nop 1220 1221 FXCPMADD f2, B6, A10, f2 1222 nop 1223 FXCSMADD f6, B6, A10, f6 1224 nop 1225 FXCPMADD f10, B4, A10, f10 1226 nop 1227 FXCSMADD f14, B4, A10, f14 1228 nop 1229 1230 FXCPMADD f3, B6, A4, f3 1231 LFPDUX A2, AO2, INC4 1232 FXCSMADD f7, B6, A4, f7 1233 LFPDUX A9, AO, INC4 1234 FXCPMADD f11, B4, A4, f11 1235 nop 1236 FXCSMADD f15, B4, A4, f15 1237 bdnz+ .L12 1238 .align 4 1239 1240.L13: 1241## 1 ## 1242 1243 FXCPMADD f0, B1, A1, f0 1244 nop 1245 FXCSMADD f4, B1, A1, f4 1246 nop 1247 FXCPMADD f8, B2, A1, f8 1248 LFPDUX B4, BO2, INC4 1249 FXCSMADD f12, B2, A1, f12 1250 LFPDUX B6, BO, INC4 1251 1252 FXCPMADD f1, B1, A2, f1 1253 nop 1254 FXCSMADD f5, B1, A2, f5 1255 LFPDUX A4, AO2, INC4 1256 FXCPMADD f9, B2, A2, f9 1257 LFPDUX A10, AO, INC4 1258 FXCSMADD f13, B2, A2, f13 1259 nop 1260 1261 FXCPMADD f2, B1, A3, f2 1262 nop 1263 FXCSMADD f6, B1, A3, f6 1264 nop 1265 FXCPMADD f10, B2, A3, f10 1266 nop 1267 FXCSMADD f14, B2, A3, f14 1268 nop 1269 1270 FXCPMADD f3, B1, A4, f3 1271 nop 1272 FXCSMADD f7, B1, A4, f7 1273 LFPDUX A2, AO2, INC4 1274 FXCPMADD f11, B2, A4, f11 1275 nop 1276 FXCSMADD f15, B2, A4, f15 1277 nop 1278 1279## 2 ## 1280 1281 FXCPMADD f0, B3, A5, f0 1282 nop 1283 FXCSMADD f4, B3, A5, f4 1284 nop 1285 FXCPMADD f8, B4, A5, f8 1286 LFPDUX B2, BO2, INC4 1287 FXCSMADD f12, B4, A5, f12 1288 nop 1289 1290 FXCPMADD f1, B3, A2, f1 1291 nop 1292 FXCSMADD f5, B3, A2, f5 1293 LFPDUX A4, AO2, INC4 1294 FXCPMADD f9, B4, A2, f9 1295 nop 1296 FXCSMADD f13, B4, A2, f13 1297 nop 1298 1299 FXCPMADD f2, B3, A6, f2 1300 nop 1301 FXCSMADD f6, B3, A6, f6 1302 nop 1303 FXCPMADD f10, B4, A6, f10 1304 nop 1305 FXCSMADD f14, B4, A6, f14 1306 nop 1307 1308 FXCPMADD f3, B3, A4, f3 1309 nop 1310 FXCSMADD f7, B3, A4, f7 1311 LFPDUX A2, AO2, INC4 1312 FXCPMADD f11, B4, A4, f11 1313 nop 1314 FXCSMADD f15, B4, A4, f15 1315 nop 1316 1317## 3 ## 1318 1319 FXCPMADD f0, B5, A7, f0 1320 nop 1321 FXCSMADD f4, B5, A7, f4 1322 nop 1323 FXCPMADD f8, B2, A7, f8 1324 LFPDUX B4, BO2, INC4 1325 FXCSMADD f12, B2, A7, f12 1326 nop 1327 1328 FXCPMADD f1, B5, A2, f1 1329 nop 1330 FXCSMADD f5, B5, A2, f5 1331 LFPDUX A4, AO2, INC4 1332 FXCPMADD f9, B2, A2, f9 1333 nop 1334 FXCSMADD f13, B2, A2, f13 1335 nop 1336 1337 FXCPMADD f2, B5, A8, f2 1338 nop 1339 FXCSMADD f6, B5, A8, f6 1340 nop 1341 FXCPMADD f10, B2, A8, f10 1342 nop 1343 FXCSMADD f14, B2, A8, f14 1344 nop 1345 1346 FXCPMADD f3, B5, A4, f3 1347 nop 1348 FXCSMADD f7, B5, A4, f7 1349 LFPDUX A2, AO2, INC4 1350 FXCPMADD f11, B2, A4, f11 1351 nop 1352 FXCSMADD f15, B2, A4, f15 1353 nop 1354 1355## 4 ## 1356 1357 FXCPMADD f0, B6, A9, f0 1358 nop 1359 FXCSMADD f4, B6, A9, f4 1360 nop 1361 FXCPMADD f8, B4, A9, f8 1362 nop 1363 FXCSMADD f12, B4, A9, f12 1364 nop 1365 1366 FXCPMADD f1, B6, A2, f1 1367 nop 1368 FXCSMADD f5, B6, A2, f5 1369 LFPDUX A4, AO2, INC4 1370 FXCPMADD f9, B4, A2, f9 1371 nop 1372 FXCSMADD f13, B4, A2, f13 1373 nop 1374 1375 FXCPMADD f2, B6, A10, f2 1376 nop 1377 FXCSMADD f6, B6, A10, f6 1378 nop 1379 FXCPMADD f10, B4, A10, f10 1380 nop 1381 FXCSMADD f14, B4, A10, f14 1382 nop 1383 1384 FXCPMADD f3, B6, A4, f3 1385 nop 1386 FXCSMADD f7, B6, A4, f7 1387 nop 1388 FXCPMADD f11, B4, A4, f11 1389 nop 1390 FXCSMADD f15, B4, A4, f15 1391 nop 1392 .align 4 1393 1394.L14: 1395#if defined(LT) || defined(RN) 1396 andi. r0, KK, 3 1397 mtspr CTR, r0 1398 ble+ .L18 1399#else 1400 andi. r0, TEMP, 3 1401 mtspr CTR, r0 1402 ble+ .L18 1403#endif 1404 1405.L15: 1406 LFPDUX A2, AO, INC4 1407 LFPDUX A4, AO2, INC4 1408 LFPDUX A10, BO, INC4 1409 LFPDUX B4, BO2, INC4 1410 bdz- .L17 1411 .align 4 1412 1413.L16: 1414 FXCPMADD f0, A10, A2, f0 1415 FXCSMADD f4, A10, A2, f4 1416 FXCPMADD f8, B4, A2, f8 1417 FXCSMADD f12, B4, A2, f12 1418 LFPDUX A2, AO, INC4 1419 1420 FXCPMADD f1, A10, A4, f1 1421 FXCSMADD f5, A10, A4, f5 1422 FXCPMADD f9, B4, A4, f9 1423 FXCSMADD f13, B4, A4, f13 1424 LFPDUX A4, AO2, INC4 1425 1426 FXCPMADD f2, A10, A2, f2 1427 FXCSMADD f6, A10, A2, f6 1428 FXCPMADD f10, B4, A2, f10 1429 FXCSMADD f14, B4, A2, f14 1430 LFPDUX A2, AO, INC4 1431 1432 FXCPMADD f3, A10, A4, f3 1433 FXCSMADD f7, A10, A4, f7 1434 LFPDUX A10, BO, INC4 1435 FXCPMADD f11, B4, A4, f11 1436 FXCSMADD f15, B4, A4, f15 1437 LFPDUX A4, AO2, INC4 1438 LFPDUX B4, BO2, INC4 1439 bdnz+ .L16 1440 .align 4 1441 1442.L17: 1443 FXCPMADD f0, A10, A2, f0 1444 FXCSMADD f4, A10, A2, f4 1445 FXCPMADD f8, B4, A2, f8 1446 FXCSMADD f12, B4, A2, f12 1447 LFPDUX A2, AO, INC4 1448 1449 FXCPMADD f1, A10, A4, f1 1450 FXCSMADD f5, A10, A4, f5 1451 FXCPMADD f9, B4, A4, f9 1452 FXCSMADD f13, B4, A4, f13 1453 LFPDUX A4, AO2, INC4 1454 1455 FXCPMADD f2, A10, A2, f2 1456 FXCSMADD f6, A10, A2, f6 1457 FXCPMADD f10, B4, A2, f10 1458 FXCSMADD f14, B4, A2, f14 1459 1460 FXCPMADD f3, A10, A4, f3 1461 FXCSMADD f7, A10, A4, f7 1462 FXCPMADD f11, B4, A4, f11 1463 FXCSMADD f15, B4, A4, f15 1464 .align 4 1465 1466.L18: 1467 fpadd f0, f0, f4 1468 fpadd f8, f8, f12 1469 fpadd f1, f1, f5 1470 fpadd f9, f9, f13 1471 1472 fpadd f2, f2, f6 1473 fpadd f10, f10, f14 1474 fpadd f3, f3, f7 1475 fpadd f11, f11, f15 1476 1477#if defined(LN) || defined(RT) 1478#ifdef LN 1479 subi r0, KK, 4 1480#else 1481 subi r0, KK, 2 1482#endif 1483 slwi TEMP, r0, 2 + ZBASE_SHIFT 1484 slwi r0, r0, 1 + ZBASE_SHIFT 1485 add AO, AORIG, TEMP 1486 add BO, B, r0 1487 addi AO2, AO, 2 * SIZE 1488 addi BO, BO, - 4 * SIZE 1489 addi BO2, BO, 2 * SIZE 1490#endif 1491 1492#if defined(LN) || defined(LT) 1493 LFPDUX f16, BO, INC4 1494 LFPDUX f20, BO2, INC4 1495 LFPDUX f17, BO, INC4 1496 LFPDUX f21, BO2, INC4 1497 LFPDUX f18, BO, INC4 1498 LFPDUX f22, BO2, INC4 1499 LFPDUX f19, BO, INC4 1500 LFPDUX f23, BO2, INC4 1501 1502 subi BO, BO, 16 * SIZE 1503 subi BO2, BO2, 16 * SIZE 1504#else 1505 LFPDUX f16, AO, INC4 1506 LFPDUX f17, AO2, INC4 1507 LFPDUX f18, AO, INC4 1508 LFPDUX f19, AO2, INC4 1509 LFPDUX f20, AO, INC4 1510 LFPDUX f21, AO2, INC4 1511 LFPDUX f22, AO, INC4 1512 LFPDUX f23, AO2, INC4 1513 1514 subi AO, AO, 16 * SIZE 1515 subi AO2, AO2, 16 * SIZE 1516#endif 1517 1518 fpsub f0, f16, f0 1519 fpsub f1, f17, f1 1520 fpsub f2, f18, f2 1521 fpsub f3, f19, f3 1522 1523 fpsub f8, f20, f8 1524 fpsub f9, f21, f9 1525 fpsub f10, f22, f10 1526 fpsub f11, f23, f11 1527 1528#ifdef LN 1529 LFPDUX A1, AO, INC4 1530 add AO2, AO2, INC4 1531 add AO, AO, INC4 1532 add AO2, AO2, INC4 1533 1534 LFPDUX A2, AO, INC4 1535 LFPDUX A3, AO2, INC4 1536 add AO, AO, INC4 1537 add AO2, AO2, INC4 1538 1539 LFPDUX A4, AO, INC4 1540 LFPDUX A5, AO2, INC4 1541 LFPDUX A6, AO, INC4 1542 add AO2, AO2, INC4 1543 1544 LFPDUX A7, AO, INC4 1545 LFPDUX A8, AO2, INC4 1546 LFPDUX A9, AO, INC4 1547 LFPDUX A10, AO2, INC4 1548 1549 subi AO, AO, 32 * SIZE 1550 subi AO2, AO2, 32 * SIZE 1551 1552 fxpmul f4, A10, f3 1553 fxpmul f5, A10, f11 1554 FXCXNPMA f3, A10, f3, f4 1555 FXCXNPMA f11, A10, f11, f5 1556 1557 fxcpnmsub f2, A9, f3, f2 1558 fxcpnmsub f10, A9, f11, f10 1559 FXCXNSMA f2, A9, f3, f2 1560 FXCXNSMA f10, A9, f11, f10 1561 1562 fxcpnmsub f1, A8, f3, f1 1563 fxcpnmsub f9, A8, f11, f9 1564 FXCXNSMA f1, A8, f3, f1 1565 FXCXNSMA f9, A8, f11, f9 1566 1567 fxcpnmsub f0, A7, f3, f0 1568 fxcpnmsub f8, A7, f11, f8 1569 FXCXNSMA f0, A7, f3, f0 1570 FXCXNSMA f8, A7, f11, f8 1571 1572 fxpmul f4, A6, f2 1573 fxpmul f5, A6, f10 1574 FXCXNPMA f2, A6, f2, f4 1575 FXCXNPMA f10, A6, f10, f5 1576 1577 fxcpnmsub f1, A5, f2, f1 1578 fxcpnmsub f9, A5, f10, f9 1579 FXCXNSMA f1, A5, f2, f1 1580 FXCXNSMA f9, A5, f10, f9 1581 1582 fxcpnmsub f0, A4, f2, f0 1583 fxcpnmsub f8, A4, f10, f8 1584 FXCXNSMA f0, A4, f2, f0 1585 FXCXNSMA f8, A4, f10, f8 1586 1587 fxpmul f4, A3, f1 1588 fxpmul f5, A3, f9 1589 FXCXNPMA f1, A3, f1, f4 1590 FXCXNPMA f9, A3, f9, f5 1591 1592 fxcpnmsub f0, A2, f1, f0 1593 fxcpnmsub f8, A2, f9, f8 1594 FXCXNSMA f0, A2, f1, f0 1595 FXCXNSMA f8, A2, f9, f8 1596 1597 fxpmul f4, A1, f0 1598 fxpmul f5, A1, f8 1599 FXCXNPMA f0, A1, f0, f4 1600 FXCXNPMA f8, A1, f8, f5 1601#endif 1602 1603#ifdef LT 1604 LFPDUX A1, AO, INC4 1605 LFPDUX A2, AO2, INC4 1606 LFPDUX A3, AO, INC4 1607 LFPDUX A4, AO2, INC4 1608 1609 add AO, AO, INC4 1610 LFPDUX A5, AO2, INC4 1611 LFPDUX A6, AO, INC4 1612 LFPDUX A7, AO2, INC4 1613 1614 add AO, AO, INC4 1615 add AO2, AO2, INC4 1616 LFPDUX A8, AO, INC4 1617 LFPDUX A9, AO2, INC4 1618 1619 add AO, AO, INC4 1620 add AO2, AO2, INC4 1621 add AO, AO, INC4 1622 LFPDUX A10, AO2, INC4 1623 1624 subi AO, AO, 32 * SIZE 1625 subi AO2, AO2, 32 * SIZE 1626 1627 fxpmul f4, A1, f0 1628 fxpmul f5, A1, f8 1629 FXCXNPMA f0, A1, f0, f4 1630 FXCXNPMA f8, A1, f8, f5 1631 1632 fxcpnmsub f1, A2, f0, f1 1633 fxcpnmsub f9, A2, f8, f9 1634 FXCXNSMA f1, A2, f0, f1 1635 FXCXNSMA f9, A2, f8, f9 1636 1637 fxcpnmsub f2, A3, f0, f2 1638 fxcpnmsub f10, A3, f8, f10 1639 FXCXNSMA f2, A3, f0, f2 1640 FXCXNSMA f10, A3, f8, f10 1641 1642 fxcpnmsub f3, A4, f0, f3 1643 fxcpnmsub f11, A4, f8, f11 1644 FXCXNSMA f3, A4, f0, f3 1645 FXCXNSMA f11, A4, f8, f11 1646 1647 fxpmul f6, A5, f1 1648 fxpmul f7, A5, f9 1649 FXCXNPMA f1, A5, f1, f6 1650 FXCXNPMA f9, A5, f9, f7 1651 1652 fxcpnmsub f2, A6, f1, f2 1653 fxcpnmsub f10, A6, f9, f10 1654 FXCXNSMA f2, A6, f1, f2 1655 FXCXNSMA f10, A6, f9, f10 1656 1657 fxcpnmsub f3, A7, f1, f3 1658 fxcpnmsub f11, A7, f9, f11 1659 FXCXNSMA f3, A7, f1, f3 1660 FXCXNSMA f11, A7, f9, f11 1661 1662 fxpmul f4, A8, f2 1663 fxpmul f5, A8, f10 1664 FXCXNPMA f2, A8, f2, f4 1665 FXCXNPMA f10, A8, f10, f5 1666 1667 fxcpnmsub f3, A9, f2, f3 1668 fxcpnmsub f11, A9, f10, f11 1669 FXCXNSMA f3, A9, f2, f3 1670 FXCXNSMA f11, A9, f10, f11 1671 1672 fxpmul f6, A10, f3 1673 fxpmul f7, A10, f11 1674 FXCXNPMA f3, A10, f3, f6 1675 FXCXNPMA f11, A10, f11, f7 1676#endif 1677 1678#ifdef RN 1679 LFPDUX A1, BO, INC4 1680 LFPDUX A2, BO2, INC4 1681 add BO, BO, INC4 1682 LFPDUX A3, BO2, INC4 1683 1684 subi BO, BO, 8 * SIZE 1685 subi BO2, BO2, 8 * SIZE 1686 1687 fxpmul f4, A1, f0 1688 fxpmul f5, A1, f1 1689 fxpmul f6, A1, f2 1690 fxpmul f7, A1, f3 1691 1692 FXCXNPMA f0, A1, f0, f4 1693 FXCXNPMA f1, A1, f1, f5 1694 FXCXNPMA f2, A1, f2, f6 1695 FXCXNPMA f3, A1, f3, f7 1696 1697 fxcpnmsub f8, A2, f0, f8 1698 fxcpnmsub f9, A2, f1, f9 1699 fxcpnmsub f10, A2, f2, f10 1700 fxcpnmsub f11, A2, f3, f11 1701 1702 FXCXNSMA f8, A2, f0, f8 1703 FXCXNSMA f9, A2, f1, f9 1704 FXCXNSMA f10, A2, f2, f10 1705 FXCXNSMA f11, A2, f3, f11 1706 1707 fxpmul f4, A3, f8 1708 fxpmul f5, A3, f9 1709 fxpmul f6, A3, f10 1710 fxpmul f7, A3, f11 1711 1712 FXCXNPMA f8, A3, f8, f4 1713 FXCXNPMA f9, A3, f9, f5 1714 FXCXNPMA f10, A3, f10, f6 1715 FXCXNPMA f11, A3, f11, f7 1716#endif 1717 1718#ifdef RT 1719 LFPDUX A1, BO, INC4 1720 add BO2, BO2, INC4 1721 LFPDUX A2, BO, INC4 1722 LFPDUX A3, BO2, INC4 1723 1724 subi BO, BO, 8 * SIZE 1725 subi BO2, BO2, 8 * SIZE 1726 1727 fxpmul f4, A3, f8 1728 fxpmul f5, A3, f9 1729 fxpmul f6, A3, f10 1730 fxpmul f7, A3, f11 1731 1732 FXCXNPMA f8, A3, f8, f4 1733 FXCXNPMA f9, A3, f9, f5 1734 FXCXNPMA f10, A3, f10, f6 1735 FXCXNPMA f11, A3, f11, f7 1736 1737 fxcpnmsub f0, A2, f8, f0 1738 fxcpnmsub f1, A2, f9, f1 1739 fxcpnmsub f2, A2, f10, f2 1740 fxcpnmsub f3, A2, f11, f3 1741 1742 FXCXNSMA f0, A2, f8, f0 1743 FXCXNSMA f1, A2, f9, f1 1744 FXCXNSMA f2, A2, f10, f2 1745 FXCXNSMA f3, A2, f11, f3 1746 1747 fxpmul f4, A1, f0 1748 fxpmul f5, A1, f1 1749 fxpmul f6, A1, f2 1750 fxpmul f7, A1, f3 1751 1752 FXCXNPMA f0, A1, f0, f4 1753 FXCXNPMA f1, A1, f1, f5 1754 FXCXNPMA f2, A1, f2, f6 1755 FXCXNPMA f3, A1, f3, f7 1756#endif 1757 1758#ifdef LN 1759 subi CO1, CO1, 8 * SIZE 1760 subi CO2, CO2, 8 * SIZE 1761#endif 1762 1763#if defined(LN) || defined(LT) 1764 STFPDUX f0, BO, INC4 1765 STFPDUX f8, BO2, INC4 1766 STFPDUX f1, BO, INC4 1767 STFPDUX f9, BO2, INC4 1768 STFPDUX f2, BO, INC4 1769 STFPDUX f10, BO2, INC4 1770 STFPDUX f3, BO, INC4 1771 STFPDUX f11, BO2, INC4 1772 1773 subi BO, BO, 16 * SIZE 1774 subi BO2, BO2, 16 * SIZE 1775#else 1776 STFPDUX f0, AO, INC4 1777 STFPDUX f1, AO2, INC4 1778 STFPDUX f2, AO, INC4 1779 STFPDUX f3, AO2, INC4 1780 STFPDUX f8, AO, INC4 1781 STFPDUX f9, AO2, INC4 1782 STFPDUX f10, AO, INC4 1783 STFPDUX f11, AO2, INC4 1784 1785 subi AO, AO, 16 * SIZE 1786 subi AO2, AO2, 16 * SIZE 1787#endif 1788 1789 STFDUX f0, CO1, INC 1790 STFSDUX f0, CO1, INC 1791 STFDUX f1, CO1, INC 1792 STFSDUX f1, CO1, INC 1793 STFDUX f2, CO1, INC 1794 STFSDUX f2, CO1, INC 1795 STFDUX f3, CO1, INC 1796 STFSDUX f3, CO1, INC 1797 1798 STFDUX f8, CO2, INC 1799 STFSDUX f8, CO2, INC 1800 STFDUX f9, CO2, INC 1801 STFSDUX f9, CO2, INC 1802 STFDUX f10, CO2, INC 1803 STFSDUX f10, CO2, INC 1804 STFDUX f11, CO2, INC 1805 STFSDUX f11, CO2, INC 1806 1807#ifdef LN 1808 subi CO1, CO1, 8 * SIZE 1809 subi CO2, CO2, 8 * SIZE 1810#endif 1811 1812#ifdef RT 1813 slwi r0, K, 2 + ZBASE_SHIFT 1814 add AORIG, AORIG, r0 1815#endif 1816 1817#if defined(LT) || defined(RN) 1818 sub TEMP, K, KK 1819 slwi r0, TEMP, 2 + ZBASE_SHIFT 1820 slwi TEMP, TEMP, 1 + ZBASE_SHIFT 1821 add AO, AO, r0 1822 add BO, BO, TEMP 1823#endif 1824 1825#ifdef LT 1826 addi KK, KK, 4 1827#endif 1828 1829#ifdef LN 1830 subi KK, KK, 4 1831#endif 1832 1833 addic. I, I, -1 1834 li r0, FZERO 1835 1836 lfpsx f0, SP, r0 1837 bgt+ .L11 1838 .align 4 1839 1840.L49: 1841#ifdef LN 1842 slwi r0, K, 1 + ZBASE_SHIFT 1843 add B, B, r0 1844#endif 1845 1846#if defined(LT) || defined(RN) 1847 addi B, BO, 4 * SIZE 1848#endif 1849 1850#ifdef RN 1851 addi KK, KK, 2 1852#endif 1853 1854#ifdef RT 1855 subi KK, KK, 2 1856#endif 1857 1858 addic. J, J, -1 1859 bgt+ .L10 1860 .align 4 1861 1862.L50: 1863 andi. J, N, 1 1864 beq .L999 1865 1866#ifdef RT 1867 slwi r0, K, 0 + ZBASE_SHIFT 1868 sub B, B, r0 1869 1870 sub C, C, LDC 1871#endif 1872 1873 mr CO1, C 1874 1875#ifdef LN 1876 add KK, M, OFFSET 1877#endif 1878 1879#ifdef LT 1880 mr KK, OFFSET 1881#endif 1882 1883#if defined(LN) || defined(RT) 1884 addi AORIG, A, -2 * SIZE 1885#else 1886 addi AO, A, -2 * SIZE 1887#endif 1888#ifndef RT 1889 add C, CO2, LDC 1890#endif 1891 li r0, FZERO 1892 lfpsx f0, SP, r0 1893 1894 andi. I, M, 1 1895 beq .L60 1896 1897#if defined(LT) || defined(RN) 1898 addi BO, B, - 2 * SIZE 1899 fpmr f1, f0 1900 fpmr f2, f0 1901 fpmr f3, f0 1902 srawi. r0, KK, 3 1903 mtspr CTR, r0 1904 ble .L74 1905#else 1906#ifdef LN 1907 slwi r0, K, 0 + ZBASE_SHIFT 1908 sub AORIG, AORIG, r0 1909#endif 1910 1911 slwi TEMP, KK, 0 + ZBASE_SHIFT 1912 add AO, AORIG, TEMP 1913 add BO, B, TEMP 1914 1915 sub TEMP, K, KK 1916 1917 addi BO, BO, - 2 * SIZE 1918 fpmr f1, f0 1919 fpmr f2, f0 1920 fpmr f3, f0 1921 srawi. r0, TEMP, 3 1922 mtspr CTR, r0 1923 ble .L74 1924#endif 1925 1926 LFPDUX A1, AO, INC2 1927 LFPDUX B1, BO, INC2 1928 LFPDUX A2, AO, INC2 1929 LFPDUX B2, BO, INC2 1930 LFPDUX A3, AO, INC2 1931 LFPDUX B3, BO, INC2 1932 LFPDUX A4, AO, INC2 1933 LFPDUX B4, BO, INC2 1934 1935 LFPDUX A5, AO, INC2 1936 LFPDUX B5, BO, INC2 1937 LFPDUX A6, AO, INC2 1938 LFPDUX B6, BO, INC2 1939 LFPDUX A7, AO, INC2 1940 LFPDUX A9, BO, INC2 1941 LFPDUX A8, AO, INC2 1942 LFPDUX A10, BO, INC2 1943 bdz- .L73 1944 .align 4 1945 1946.L72: 1947 FXCPMADD f0, B1, A1, f0 1948 FXCSMADD f1, B1, A1, f1 1949 LFPDUX A1, AO, INC2 1950 LFPDUX B1, BO, INC2 1951 FXCPMADD f2, B2, A2, f2 1952 FXCSMADD f3, B2, A2, f3 1953 LFPDUX A2, AO, INC2 1954 LFPDUX B2, BO, INC2 1955 1956 FXCPMADD f0, B3, A3, f0 1957 FXCSMADD f1, B3, A3, f1 1958 LFPDUX A3, AO, INC2 1959 LFPDUX B3, BO, INC2 1960 FXCPMADD f2, B4, A4, f2 1961 FXCSMADD f3, B4, A4, f3 1962 LFPDUX A4, AO, INC2 1963 LFPDUX B4, BO, INC2 1964 1965 FXCPMADD f0, B5, A5, f0 1966 FXCSMADD f1, B5, A5, f1 1967 LFPDUX A5, AO, INC2 1968 LFPDUX B5, BO, INC2 1969 FXCPMADD f2, B6, A6, f2 1970 FXCSMADD f3, B6, A6, f3 1971 LFPDUX A6, AO, INC2 1972 LFPDUX B6, BO, INC2 1973 1974 FXCPMADD f0, A9, A7, f0 1975 FXCSMADD f1, A9, A7, f1 1976 LFPDUX A7, AO, INC2 1977 LFPDUX A9, BO, INC2 1978 FXCPMADD f2, A10, A8, f2 1979 FXCSMADD f3, A10, A8, f3 1980 LFPDUX A8, AO, INC2 1981 LFPDUX A10, BO, INC2 1982 1983 bdnz+ .L72 1984 .align 4 1985 1986.L73: 1987 FXCPMADD f0, B1, A1, f0 1988 FXCSMADD f1, B1, A1, f1 1989 FXCPMADD f2, B2, A2, f2 1990 FXCSMADD f3, B2, A2, f3 1991 1992 FXCPMADD f0, B3, A3, f0 1993 FXCSMADD f1, B3, A3, f1 1994 FXCPMADD f2, B4, A4, f2 1995 FXCSMADD f3, B4, A4, f3 1996 1997 FXCPMADD f0, B5, A5, f0 1998 FXCSMADD f1, B5, A5, f1 1999 FXCPMADD f2, B6, A6, f2 2000 FXCSMADD f3, B6, A6, f3 2001 2002 FXCPMADD f0, A9, A7, f0 2003 FXCSMADD f1, A9, A7, f1 2004 FXCPMADD f2, A10, A8, f2 2005 FXCSMADD f3, A10, A8, f3 2006 .align 4 2007 2008.L74: 2009#if defined(LT) || defined(RN) 2010 andi. r0, KK, 7 2011 mtspr CTR, r0 2012 ble+ .L78 2013#else 2014 andi. r0, TEMP, 7 2015 mtspr CTR, r0 2016 ble+ .L78 2017#endif 2018 2019 LFPDUX A1, AO, INC2 2020 LFPDUX B1, BO, INC2 2021 bdz- .L77 2022 .align 4 2023 2024.L76: 2025 FXCPMADD f0, B1, A1, f0 2026 FXCSMADD f1, B1, A1, f1 2027 LFPDUX A1, AO, INC2 2028 LFPDUX B1, BO, INC2 2029 bdnz+ .L76 2030 .align 4 2031 2032.L77: 2033 FXCPMADD f0, B1, A1, f0 2034 FXCSMADD f1, B1, A1, f1 2035 .align 4 2036 2037.L78: 2038 fpadd f0, f0, f2 2039 fpadd f1, f1, f3 2040 2041 fpadd f0, f0, f1 2042 2043#if defined(LN) || defined(RT) 2044#ifdef LN 2045 subi r0, KK, 1 2046#else 2047 subi r0, KK, 1 2048#endif 2049 slwi TEMP, r0, 0 + ZBASE_SHIFT 2050 add AO, AORIG, TEMP 2051 add BO, B, TEMP 2052 addi BO, BO, - 2 * SIZE 2053#endif 2054 2055#if defined(LN) || defined(LT) 2056 LFPDX f16, BO, INC2 2057#else 2058 LFPDX f16, AO, INC2 2059#endif 2060 2061 fpsub f0, f16, f0 2062 2063#ifdef LN 2064 LFPDX A1, AO, INC2 2065 2066 fxpmul f4, A1, f0 2067 FXCXNPMA f0, A1, f0, f4 2068#endif 2069 2070#ifdef LT 2071 LFPDX A1, AO, INC2 2072 2073 fxpmul f4, A1, f0 2074 FXCXNPMA f0, A1, f0, f4 2075#endif 2076 2077#ifdef RN 2078 LFPDX A1, BO, INC2 2079 2080 fxpmul f4, A1, f0 2081 FXCXNPMA f0, A1, f0, f4 2082#endif 2083 2084#ifdef RT 2085 LFPDX A1, BO, INC2 2086 2087 fxpmul f4, A1, f0 2088 FXCXNPMA f0, A1, f0, f4 2089#endif 2090 2091#ifdef LN 2092 subi CO1, CO1, 2 * SIZE 2093#endif 2094 2095#if defined(LN) || defined(LT) 2096 STFPDX f0, BO, INC2 2097#else 2098 STFPDX f0, AO, INC2 2099#endif 2100 2101 STFDUX f0, CO1, INC 2102 STFSDUX f0, CO1, INC 2103 2104#ifdef LN 2105 subi CO1, CO1, 2 * SIZE 2106#endif 2107 2108#ifdef RT 2109 slwi r0, K, 0 + ZBASE_SHIFT 2110 add AORIG, AORIG, r0 2111#endif 2112 2113#if defined(LT) || defined(RN) 2114 sub TEMP, K, KK 2115 slwi TEMP, TEMP, 0 + ZBASE_SHIFT 2116 add AO, AO, TEMP 2117 add BO, BO, TEMP 2118#endif 2119 2120#ifdef LT 2121 addi KK, KK, 1 2122#endif 2123 2124#ifdef LN 2125 subi KK, KK, 1 2126#endif 2127 2128 li r0, FZERO 2129 lfpsx f0, SP, r0 2130 .align 4 2131 2132.L60: 2133 andi. I, M, 2 2134 beq .L70 2135 2136#if defined(LT) || defined(RN) 2137 fpmr f1, f0 2138 addi BO, B, - 2 * SIZE 2139 fpmr f2, f0 2140 fpmr f3, f0 2141 srawi. r0, KK, 2 2142 mtspr CTR, r0 2143 ble .L64 2144#else 2145#ifdef LN 2146 slwi r0, K, 1 + ZBASE_SHIFT 2147 sub AORIG, AORIG, r0 2148#endif 2149 2150 slwi r0 , KK, 1 + ZBASE_SHIFT 2151 slwi TEMP, KK, 0 + ZBASE_SHIFT 2152 add AO, AORIG, r0 2153 add BO, B, TEMP 2154 2155 sub TEMP, K, KK 2156 2157 fpmr f1, f0 2158 addi BO, BO, - 2 * SIZE 2159 fpmr f2, f0 2160 fpmr f3, f0 2161 srawi. r0, TEMP, 2 2162 mtspr CTR, r0 2163 ble .L64 2164#endif 2165 2166 LFPDUX B1, BO, INC2 2167 LFPDUX A1, AO, INC2 2168 LFPDUX A2, AO, INC2 2169 LFPDUX B2, BO, INC2 2170 LFPDUX A3, AO, INC2 2171 LFPDUX A4, AO, INC2 2172 2173 LFPDUX B3, BO, INC2 2174 LFPDUX A5, AO, INC2 2175 LFPDUX A6, AO, INC2 2176 LFPDUX B4, BO, INC2 2177 LFPDUX A7, AO, INC2 2178 LFPDUX A8, AO, INC2 2179 bdz- .L63 2180 .align 4 2181 2182.L62: 2183 FXCPMADD f0, B1, A1, f0 2184 FXCSMADD f2, B1, A1, f2 2185 LFPDUX A1, AO, INC2 2186 FXCPMADD f1, B1, A2, f1 2187 FXCSMADD f3, B1, A2, f3 2188 LFPDUX A2, AO, INC2 2189 LFPDUX B1, BO, INC2 2190 2191 FXCPMADD f0, B2, A3, f0 2192 FXCSMADD f2, B2, A3, f2 2193 LFPDUX A3, AO, INC2 2194 FXCPMADD f1, B2, A4, f1 2195 FXCSMADD f3, B2, A4, f3 2196 LFPDUX A4, AO, INC2 2197 LFPDUX B2, BO, INC2 2198 2199 FXCPMADD f0, B3, A5, f0 2200 FXCSMADD f2, B3, A5, f2 2201 LFPDUX A5, AO, INC2 2202 FXCPMADD f1, B3, A6, f1 2203 FXCSMADD f3, B3, A6, f3 2204 LFPDUX A6, AO, INC2 2205 LFPDUX B3, BO, INC2 2206 2207 FXCPMADD f0, B4, A7, f0 2208 FXCSMADD f2, B4, A7, f2 2209 LFPDUX A7, AO, INC2 2210 FXCPMADD f1, B4, A8, f1 2211 FXCSMADD f3, B4, A8, f3 2212 LFPDUX A8, AO, INC2 2213 LFPDUX B4, BO, INC2 2214 bdnz+ .L62 2215 .align 4 2216 2217.L63: 2218 FXCPMADD f0, B1, A1, f0 2219 FXCSMADD f2, B1, A1, f2 2220 FXCPMADD f1, B1, A2, f1 2221 FXCSMADD f3, B1, A2, f3 2222 2223 FXCPMADD f0, B2, A3, f0 2224 FXCSMADD f2, B2, A3, f2 2225 FXCPMADD f1, B2, A4, f1 2226 FXCSMADD f3, B2, A4, f3 2227 2228 FXCPMADD f0, B3, A5, f0 2229 FXCSMADD f2, B3, A5, f2 2230 FXCPMADD f1, B3, A6, f1 2231 FXCSMADD f3, B3, A6, f3 2232 2233 FXCPMADD f0, B4, A7, f0 2234 FXCSMADD f2, B4, A7, f2 2235 FXCPMADD f1, B4, A8, f1 2236 FXCSMADD f3, B4, A8, f3 2237 .align 4 2238 2239.L64: 2240#if defined(LT) || defined(RN) 2241 andi. r0, KK, 3 2242 mtspr CTR, r0 2243 ble+ .L68 2244#else 2245 andi. r0, TEMP, 3 2246 mtspr CTR, r0 2247 ble+ .L68 2248#endif 2249 2250 LFPDUX A1, AO, INC2 2251 LFPDUX B1, BO, INC2 2252 LFPDUX A2, AO, INC2 2253 bdz- .L67 2254 .align 4 2255 2256.L66: 2257 FXCPMADD f0, B1, A1, f0 2258 FXCSMADD f2, B1, A1, f2 2259 LFPDUX A1, AO, INC2 2260 FXCPMADD f1, B1, A2, f1 2261 FXCSMADD f3, B1, A2, f3 2262 LFPDUX B1, BO, INC2 2263 LFPDUX A2, AO, INC2 2264 bdnz+ .L66 2265 .align 4 2266 2267.L67: 2268 FXCPMADD f0, B1, A1, f0 2269 FXCSMADD f2, B1, A1, f2 2270 FXCPMADD f1, B1, A2, f1 2271 FXCSMADD f3, B1, A2, f3 2272 .align 4 2273 2274.L68: 2275 fpadd f0, f0, f2 2276 fpadd f1, f1, f3 2277 2278#if defined(LN) || defined(RT) 2279#ifdef LN 2280 subi r0, KK, 2 2281#else 2282 subi r0, KK, 1 2283#endif 2284 slwi TEMP, r0, 1 + ZBASE_SHIFT 2285 slwi r0, r0, 0 + ZBASE_SHIFT 2286 add AO, AORIG, TEMP 2287 add BO, B, r0 2288 addi BO, BO, - 2 * SIZE 2289#endif 2290 2291#if defined(LN) || defined(LT) 2292 LFPDUX f16, BO, INC2 2293 LFPDUX f17, BO, INC2 2294 2295 subi BO, BO, 4 * SIZE 2296#else 2297 LFPDUX f16, AO, INC2 2298 LFPDUX f17, AO, INC2 2299 2300 subi AO, AO, 4 * SIZE 2301#endif 2302 2303 fpsub f0, f16, f0 2304 fpsub f1, f17, f1 2305 2306#ifdef LN 2307 LFPDUX A1, AO, INC2 2308 add AO, AO, INC2 2309 LFPDUX A2, AO, INC2 2310 LFPDUX A3, AO, INC2 2311 2312 subi AO, AO, 8 * SIZE 2313 2314 fxpmul f4, A3, f1 2315 FXCXNPMA f1, A3, f1, f4 2316 2317 fxcpnmsub f0, A2, f1, f0 2318 FXCXNSMA f0, A2, f1, f0 2319 2320 fxpmul f4, A1, f0 2321 FXCXNPMA f0, A1, f0, f4 2322#endif 2323 2324#ifdef LT 2325 LFPDUX A1, AO, INC2 2326 LFPDUX A2, AO, INC2 2327 add AO, AO, INC2 2328 LFPDUX A3, AO, INC2 2329 2330 subi AO, AO, 8 * SIZE 2331 2332 fxpmul f4, A1, f0 2333 FXCXNPMA f0, A1, f0, f4 2334 2335 fxcpnmsub f1, A2, f0, f1 2336 FXCXNSMA f1, A2, f0, f1 2337 2338 fxpmul f6, A3, f1 2339 FXCXNPMA f1, A3, f1, f6 2340#endif 2341 2342#ifdef RN 2343 LFPDX A1, BO, INC2 2344 2345 fxpmul f4, A1, f0 2346 fxpmul f5, A1, f1 2347 2348 FXCXNPMA f0, A1, f0, f4 2349 FXCXNPMA f1, A1, f1, f5 2350#endif 2351 2352#ifdef RT 2353 LFPDX A1, BO, INC2 2354 2355 fxpmul f4, A1, f0 2356 fxpmul f5, A1, f1 2357 2358 FXCXNPMA f0, A1, f0, f4 2359 FXCXNPMA f1, A1, f1, f5 2360#endif 2361 2362#ifdef LN 2363 subi CO1, CO1, 4 * SIZE 2364#endif 2365 2366#if defined(LN) || defined(LT) 2367 STFPDUX f0, BO, INC2 2368 STFPDUX f1, BO, INC2 2369 2370 subi BO, BO, 4 * SIZE 2371#else 2372 STFPDUX f0, AO, INC2 2373 STFPDUX f1, AO, INC2 2374 2375 subi AO, AO, 4 * SIZE 2376#endif 2377 2378 STFDUX f0, CO1, INC 2379 STFSDUX f0, CO1, INC 2380 STFDUX f1, CO1, INC 2381 STFSDUX f1, CO1, INC 2382 2383#ifdef LN 2384 subi CO1, CO1, 4 * SIZE 2385#endif 2386 2387#ifdef RT 2388 slwi r0, K, 1 + ZBASE_SHIFT 2389 add AORIG, AORIG, r0 2390#endif 2391 2392#if defined(LT) || defined(RN) 2393 sub TEMP, K, KK 2394 slwi r0, TEMP, 1 + ZBASE_SHIFT 2395 slwi TEMP, TEMP, 0 + ZBASE_SHIFT 2396 add AO, AO, r0 2397 add BO, BO, TEMP 2398#endif 2399 2400#ifdef LT 2401 addi KK, KK, 2 2402#endif 2403 2404#ifdef LN 2405 subi KK, KK, 2 2406#endif 2407 2408 li r0, FZERO 2409 lfpsx f0, SP, r0 2410 .align 4 2411 2412.L70: 2413 srawi. I, M, 2 2414 ble .L89 2415 .align 4 2416 2417.L51: 2418#if defined(LT) || defined(RN) 2419 fpmr f4, f0 2420 addi BO, B, - 2 * SIZE 2421 fpmr f1, f0 2422 fpmr f5, f0 2423 fpmr f2, f0 2424 fpmr f6, f0 2425 fpmr f3, f0 2426 fpmr f7, f0 2427 srawi. r0, KK, 2 2428 mtspr CTR, r0 2429 ble .L54 2430#else 2431 2432#ifdef LN 2433 slwi r0, K, 2 + ZBASE_SHIFT 2434 sub AORIG, AORIG, r0 2435#endif 2436 2437 slwi r0 , KK, 2 + ZBASE_SHIFT 2438 slwi TEMP, KK, 0 + ZBASE_SHIFT 2439 add AO, AORIG, r0 2440 add BO, B, TEMP 2441 2442 sub TEMP, K, KK 2443 2444 fpmr f4, f0 2445 addi BO, BO, - 2 * SIZE 2446 fpmr f1, f0 2447 fpmr f5, f0 2448 fpmr f2, f0 2449 fpmr f6, f0 2450 fpmr f3, f0 2451 fpmr f7, f0 2452 srawi. r0, TEMP, 2 2453 mtspr CTR, r0 2454 ble .L54 2455#endif 2456 2457 LFPDUX B1, BO, INC2 2458 LFPDUX A1, AO, INC2 2459 LFPDUX A2, AO, INC2 2460 LFPDUX B2, BO, INC2 2461 LFPDUX A3, AO, INC2 2462 LFPDUX A4, AO, INC2 2463 2464 LFPDUX B3, BO, INC2 2465 LFPDUX A5, AO, INC2 2466 LFPDUX A6, AO, INC2 2467 LFPDUX A7, AO, INC2 2468 LFPDUX A8, AO, INC2 2469 bdz- .L53 2470 .align 4 2471 2472.L52: 2473 FXCPMADD f0, B1, A1, f0 2474 LFPDUX B4, BO, INC2 2475 FXCSMADD f4, B1, A1, f4 2476 LFPDUX A1, AO, INC2 2477 FXCPMADD f1, B1, A2, f1 2478 nop 2479 FXCSMADD f5, B1, A2, f5 2480 LFPDUX A2, AO, INC2 2481 2482 FXCPMADD f2, B1, A3, f2 2483 nop 2484 FXCSMADD f6, B1, A3, f6 2485 LFPDUX A3, AO, INC2 2486 FXCPMADD f3, B1, A4, f3 2487 nop 2488 FXCSMADD f7, B1, A4, f7 2489 LFPDUX A4, AO, INC2 2490 2491 FXCPMADD f0, B2, A5, f0 2492 LFPDUX B1, BO, INC2 2493 FXCSMADD f4, B2, A5, f4 2494 LFPDUX A5, AO, INC2 2495 FXCPMADD f1, B2, A6, f1 2496 nop 2497 FXCSMADD f5, B2, A6, f5 2498 LFPDUX A6, AO, INC2 2499 2500 FXCPMADD f2, B2, A7, f2 2501 nop 2502 FXCSMADD f6, B2, A7, f6 2503 LFPDUX A7, AO, INC2 2504 FXCPMADD f3, B2, A8, f3 2505 nop 2506 FXCSMADD f7, B2, A8, f7 2507 LFPDUX A8, AO, INC2 2508 2509 FXCPMADD f0, B3, A1, f0 2510 LFPDUX B2, BO, INC2 2511 FXCSMADD f4, B3, A1, f4 2512 LFPDUX A1, AO, INC2 2513 FXCPMADD f1, B3, A2, f1 2514 nop 2515 FXCSMADD f5, B3, A2, f5 2516 LFPDUX A2, AO, INC2 2517 2518 FXCPMADD f2, B3, A3, f2 2519 nop 2520 FXCSMADD f6, B3, A3, f6 2521 LFPDUX A3, AO, INC2 2522 FXCPMADD f3, B3, A4, f3 2523 nop 2524 FXCSMADD f7, B3, A4, f7 2525 LFPDUX A4, AO, INC2 2526 2527 FXCPMADD f0, B4, A5, f0 2528 LFPDUX B3, BO, INC2 2529 FXCSMADD f4, B4, A5, f4 2530 LFPDUX A5, AO, INC2 2531 FXCPMADD f1, B4, A6, f1 2532 nop 2533 FXCSMADD f5, B4, A6, f5 2534 LFPDUX A6, AO, INC2 2535 2536 FXCPMADD f2, B4, A7, f2 2537 nop 2538 FXCSMADD f6, B4, A7, f6 2539 LFPDUX A7, AO, INC2 2540 FXCPMADD f3, B4, A8, f3 2541 nop 2542 FXCSMADD f7, B4, A8, f7 2543 LFPDUX A8, AO, INC2 2544 bdnz+ .L52 2545 .align 4 2546 2547.L53: 2548 FXCPMADD f0, B1, A1, f0 2549 LFPDUX B4, BO, INC2 2550 FXCSMADD f4, B1, A1, f4 2551 LFPDUX A1, AO, INC2 2552 FXCPMADD f1, B1, A2, f1 2553 nop 2554 FXCSMADD f5, B1, A2, f5 2555 LFPDUX A2, AO, INC2 2556 2557 FXCPMADD f2, B1, A3, f2 2558 nop 2559 FXCSMADD f6, B1, A3, f6 2560 LFPDUX A3, AO, INC2 2561 FXCPMADD f3, B1, A4, f3 2562 nop 2563 FXCSMADD f7, B1, A4, f7 2564 LFPDUX A4, AO, INC2 2565 2566 FXCPMADD f0, B2, A5, f0 2567 nop 2568 FXCSMADD f4, B2, A5, f4 2569 LFPDUX A5, AO, INC2 2570 FXCPMADD f1, B2, A6, f1 2571 nop 2572 FXCSMADD f5, B2, A6, f5 2573 LFPDUX A6, AO, INC2 2574 2575 FXCPMADD f2, B2, A7, f2 2576 nop 2577 FXCSMADD f6, B2, A7, f6 2578 LFPDUX A7, AO, INC2 2579 FXCPMADD f3, B2, A8, f3 2580 nop 2581 FXCSMADD f7, B2, A8, f7 2582 LFPDUX A8, AO, INC2 2583 2584 FXCPMADD f0, B3, A1, f0 2585 FXCSMADD f4, B3, A1, f4 2586 FXCPMADD f1, B3, A2, f1 2587 FXCSMADD f5, B3, A2, f5 2588 2589 FXCPMADD f2, B3, A3, f2 2590 FXCSMADD f6, B3, A3, f6 2591 FXCPMADD f3, B3, A4, f3 2592 FXCSMADD f7, B3, A4, f7 2593 2594 FXCPMADD f0, B4, A5, f0 2595 FXCSMADD f4, B4, A5, f4 2596 FXCPMADD f1, B4, A6, f1 2597 FXCSMADD f5, B4, A6, f5 2598 2599 FXCPMADD f2, B4, A7, f2 2600 FXCSMADD f6, B4, A7, f6 2601 FXCPMADD f3, B4, A8, f3 2602 FXCSMADD f7, B4, A8, f7 2603 .align 4 2604 2605.L54: 2606#if defined(LT) || defined(RN) 2607 andi. r0, KK, 3 2608 mtspr CTR, r0 2609 ble+ .L58 2610#else 2611 andi. r0, TEMP, 3 2612 mtspr CTR, r0 2613 ble+ .L58 2614#endif 2615 2616 LFPDUX A1, AO, INC2 2617 LFPDUX B1, BO, INC2 2618 LFPDUX A2, AO, INC2 2619 LFPDUX A3, AO, INC2 2620 LFPDUX A4, AO, INC2 2621 bdz- .L57 2622 .align 4 2623 2624.L56: 2625 FXCPMADD f0, B1, A1, f0 2626 FXCSMADD f4, B1, A1, f4 2627 LFPDUX A1, AO, INC2 2628 FXCPMADD f1, B1, A2, f1 2629 FXCSMADD f5, B1, A2, f5 2630 LFPDUX A2, AO, INC2 2631 2632 FXCPMADD f2, B1, A3, f2 2633 FXCSMADD f6, B1, A3, f6 2634 LFPDUX A3, AO, INC2 2635 FXCPMADD f3, B1, A4, f3 2636 FXCSMADD f7, B1, A4, f7 2637 LFPDUX A4, AO, INC2 2638 LFPDUX B1, BO, INC2 2639 bdnz+ .L56 2640 .align 4 2641 2642.L57: 2643 FXCPMADD f0, B1, A1, f0 2644 FXCSMADD f4, B1, A1, f4 2645 FXCPMADD f1, B1, A2, f1 2646 FXCSMADD f5, B1, A2, f5 2647 2648 FXCPMADD f2, B1, A3, f2 2649 FXCSMADD f6, B1, A3, f6 2650 FXCPMADD f3, B1, A4, f3 2651 FXCSMADD f7, B1, A4, f7 2652 .align 4 2653 2654.L58: 2655 fpadd f0, f0, f4 2656 fpadd f1, f1, f5 2657 fpadd f2, f2, f6 2658 fpadd f3, f3, f7 2659 2660#if defined(LN) || defined(RT) 2661#ifdef LN 2662 subi r0, KK, 4 2663#else 2664 subi r0, KK, 1 2665#endif 2666 slwi TEMP, r0, 2 + ZBASE_SHIFT 2667 slwi r0, r0, 0 + ZBASE_SHIFT 2668 add AO, AORIG, TEMP 2669 add BO, B, r0 2670 addi BO, BO, - 2 * SIZE 2671#endif 2672 2673#if defined(LN) || defined(LT) 2674 LFPDUX f16, BO, INC2 2675 LFPDUX f17, BO, INC2 2676 LFPDUX f18, BO, INC2 2677 LFPDUX f19, BO, INC2 2678 2679 subi BO, BO, 8 * SIZE 2680#else 2681 LFPDUX f16, AO, INC2 2682 LFPDUX f17, AO, INC2 2683 LFPDUX f18, AO, INC2 2684 LFPDUX f19, AO, INC2 2685 2686 subi AO, AO, 8 * SIZE 2687#endif 2688 2689 fpsub f0, f16, f0 2690 fpsub f1, f17, f1 2691 fpsub f2, f18, f2 2692 fpsub f3, f19, f3 2693 2694#ifdef LN 2695 LFPDUX A1, AO, INC2 2696 add AO, AO, INC2 2697 add AO, AO, INC2 2698 add AO, AO, INC2 2699 2700 LFPDUX A2, AO, INC2 2701 LFPDUX A3, AO, INC2 2702 add AO, AO, INC2 2703 add AO, AO, INC2 2704 2705 LFPDUX A4, AO, INC2 2706 LFPDUX A5, AO, INC2 2707 LFPDUX A6, AO, INC2 2708 add AO, AO, INC2 2709 2710 LFPDUX A7, AO, INC2 2711 LFPDUX A8, AO, INC2 2712 LFPDUX A9, AO, INC2 2713 LFPDUX A10, AO, INC2 2714 2715 subi AO, AO, 32 * SIZE 2716 2717 fxpmul f4, A10, f3 2718 FXCXNPMA f3, A10, f3, f4 2719 2720 fxcpnmsub f2, A9, f3, f2 2721 FXCXNSMA f2, A9, f3, f2 2722 2723 fxcpnmsub f1, A8, f3, f1 2724 FXCXNSMA f1, A8, f3, f1 2725 2726 fxcpnmsub f0, A7, f3, f0 2727 FXCXNSMA f0, A7, f3, f0 2728 2729 fxpmul f4, A6, f2 2730 FXCXNPMA f2, A6, f2, f4 2731 2732 fxcpnmsub f1, A5, f2, f1 2733 FXCXNSMA f1, A5, f2, f1 2734 2735 fxcpnmsub f0, A4, f2, f0 2736 FXCXNSMA f0, A4, f2, f0 2737 2738 fxpmul f4, A3, f1 2739 FXCXNPMA f1, A3, f1, f4 2740 2741 fxcpnmsub f0, A2, f1, f0 2742 FXCXNSMA f0, A2, f1, f0 2743 2744 fxpmul f4, A1, f0 2745 FXCXNPMA f0, A1, f0, f4 2746#endif 2747 2748#ifdef LT 2749 LFPDUX A1, AO, INC2 2750 LFPDUX A2, AO, INC2 2751 LFPDUX A3, AO, INC2 2752 LFPDUX A4, AO, INC2 2753 2754 add AO, AO, INC2 2755 LFPDUX A5, AO, INC2 2756 LFPDUX A6, AO, INC2 2757 LFPDUX A7, AO, INC2 2758 2759 add AO, AO, INC2 2760 add AO, AO, INC2 2761 LFPDUX A8, AO, INC2 2762 LFPDUX A9, AO, INC2 2763 2764 add AO, AO, INC2 2765 add AO, AO, INC2 2766 add AO, AO, INC2 2767 LFPDUX A10, AO, INC2 2768 2769 subi AO, AO, 32 * SIZE 2770 2771 fxpmul f4, A1, f0 2772 FXCXNPMA f0, A1, f0, f4 2773 2774 fxcpnmsub f1, A2, f0, f1 2775 FXCXNSMA f1, A2, f0, f1 2776 2777 fxcpnmsub f2, A3, f0, f2 2778 FXCXNSMA f2, A3, f0, f2 2779 2780 fxcpnmsub f3, A4, f0, f3 2781 FXCXNSMA f3, A4, f0, f3 2782 2783 fxpmul f6, A5, f1 2784 FXCXNPMA f1, A5, f1, f6 2785 2786 fxcpnmsub f2, A6, f1, f2 2787 FXCXNSMA f2, A6, f1, f2 2788 2789 fxcpnmsub f3, A7, f1, f3 2790 FXCXNSMA f3, A7, f1, f3 2791 2792 fxpmul f4, A8, f2 2793 FXCXNPMA f2, A8, f2, f4 2794 2795 fxcpnmsub f3, A9, f2, f3 2796 FXCXNSMA f3, A9, f2, f3 2797 2798 fxpmul f6, A10, f3 2799 FXCXNPMA f3, A10, f3, f6 2800#endif 2801 2802#ifdef RN 2803 LFPDX A1, BO, INC2 2804 2805 fxpmul f4, A1, f0 2806 fxpmul f5, A1, f1 2807 fxpmul f6, A1, f2 2808 fxpmul f7, A1, f3 2809 2810 FXCXNPMA f0, A1, f0, f4 2811 FXCXNPMA f1, A1, f1, f5 2812 FXCXNPMA f2, A1, f2, f6 2813 FXCXNPMA f3, A1, f3, f7 2814#endif 2815 2816#ifdef RT 2817 LFPDX A1, BO, INC2 2818 2819 fxpmul f4, A1, f0 2820 fxpmul f5, A1, f1 2821 fxpmul f6, A1, f2 2822 fxpmul f7, A1, f3 2823 2824 FXCXNPMA f0, A1, f0, f4 2825 FXCXNPMA f1, A1, f1, f5 2826 FXCXNPMA f2, A1, f2, f6 2827 FXCXNPMA f3, A1, f3, f7 2828#endif 2829 2830#ifdef LN 2831 subi CO1, CO1, 8 * SIZE 2832#endif 2833 2834#if defined(LN) || defined(LT) 2835 STFPDUX f0, BO, INC2 2836 STFPDUX f1, BO, INC2 2837 STFPDUX f2, BO, INC2 2838 STFPDUX f3, BO, INC2 2839 2840 subi BO, BO, 8 * SIZE 2841#else 2842 STFPDUX f0, AO, INC2 2843 STFPDUX f1, AO, INC2 2844 STFPDUX f2, AO, INC2 2845 STFPDUX f3, AO, INC2 2846 2847 subi AO, AO, 8 * SIZE 2848#endif 2849 2850 STFDUX f0, CO1, INC 2851 STFSDUX f0, CO1, INC 2852 STFDUX f1, CO1, INC 2853 STFSDUX f1, CO1, INC 2854 STFDUX f2, CO1, INC 2855 STFSDUX f2, CO1, INC 2856 STFDUX f3, CO1, INC 2857 STFSDUX f3, CO1, INC 2858 2859#ifdef LN 2860 subi CO1, CO1, 8 * SIZE 2861#endif 2862 2863#ifdef RT 2864 slwi r0, K, 2 + ZBASE_SHIFT 2865 add AORIG, AORIG, r0 2866#endif 2867 2868#if defined(LT) || defined(RN) 2869 sub TEMP, K, KK 2870 slwi r0, TEMP, 2 + ZBASE_SHIFT 2871 slwi TEMP, TEMP, 0 + ZBASE_SHIFT 2872 add AO, AO, r0 2873 add BO, BO, TEMP 2874#endif 2875 2876#ifdef LT 2877 addi KK, KK, 4 2878#endif 2879 2880#ifdef LN 2881 subi KK, KK, 4 2882#endif 2883 2884 addic. I, I, -1 2885 li r0, FZERO 2886 2887 lfpsx f0, SP, r0 2888 bgt+ .L51 2889 .align 4 2890 2891.L89: 2892#ifdef LN 2893 slwi r0, K, 0 + ZBASE_SHIFT 2894 add B, B, r0 2895#endif 2896 2897#if defined(LT) || defined(RN) 2898 addi B, BO, 2 * SIZE 2899#endif 2900 2901#ifdef RN 2902 addi KK, KK, 1 2903#endif 2904 2905#ifdef RT 2906 subi KK, KK, 1 2907#endif 2908 .align 4 2909 2910.L999: 2911 addi SP, SP, 20 2912 2913 lwzu r14, 4(SP) 2914 lwzu r15, 4(SP) 2915 2916 lwzu r16, 4(SP) 2917 lwzu r17, 4(SP) 2918 lwzu r18, 4(SP) 2919 lwzu r19, 4(SP) 2920 2921 lwzu r20, 4(SP) 2922 lwzu r21, 4(SP) 2923 lwzu r22, 4(SP) 2924 lwzu r23, 4(SP) 2925 2926 lwzu r24, 4(SP) 2927 lwzu r25, 4(SP) 2928 lwzu r26, 4(SP) 2929 lwzu r27, 4(SP) 2930 2931 lwzu r28, 4(SP) 2932 lwzu r29, 4(SP) 2933 lwzu r30, 4(SP) 2934 lwzu r31, 4(SP) 2935 2936 subi SP, SP, 12 2937 li r0, 16 2938 2939 lfpdux f31, SP, r0 2940 lfpdux f30, SP, r0 2941 lfpdux f29, SP, r0 2942 lfpdux f28, SP, r0 2943 lfpdux f27, SP, r0 2944 lfpdux f26, SP, r0 2945 lfpdux f25, SP, r0 2946 lfpdux f24, SP, r0 2947 lfpdux f23, SP, r0 2948 lfpdux f22, SP, r0 2949 lfpdux f21, SP, r0 2950 lfpdux f20, SP, r0 2951 lfpdux f19, SP, r0 2952 lfpdux f18, SP, r0 2953 lfpdux f17, SP, r0 2954 lfpdux f16, SP, r0 2955 lfpdux f15, SP, r0 2956 lfpdux f14, SP, r0 2957 addi SP, SP, 16 2958 blr 2959 .align 4 2960 2961 2962 EPILOGUE 2963#endif 2964