1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#undef ZERO 43 44#define ALPHA 0 45#define FZERO 16 46 47#define M r3 48#define N r4 49#define K r5 50 51#ifdef linux 52#define A r6 53#define B r7 54#define C r8 55#define LDC r9 56#define OFFSET r10 57#endif 58 59#define TEMP r11 60#define AORIG r12 61#define KK r14 62#define INCM1 r15 63#define INCM3 r16 64#define INCM5 r17 65#define INCM7 r18 66#define INC2 r19 67#define INC r20 68#define INC4 r21 69 70#define I r22 71#define J r23 72#define AO r24 73#define BO r25 74#define AO2 r26 75#define BO2 r27 76 77#define CO1 r28 78#define CO2 r29 79#define ZERO r31 80 81#ifndef NEEDPARAM 82 83#define A1 f16 84#define A2 f17 85#define A3 f18 86#define A4 f19 87#define A5 f20 88#define A6 f21 89#define A7 f22 90#define A8 f23 91#define A9 f24 92#define A10 f25 93 94#define B1 f26 95#define B2 f27 96#define B3 f28 97#define B4 f29 98#define B5 f30 99#define B6 f31 100 101#define AP B6 102 103#ifndef CONJ 104#define FXCPMADD fxcpmadd 105#define FXCSMADD fxcxnpma 106#else 107#if defined(LN) || defined(LT) 108#define FXCPMADD fxcpnsma 109#define FXCSMADD fxcxma 110#else 111#define FXCPMADD fxcpmadd 112#define FXCSMADD fxcxnsma 113#endif 114#endif 115 116#ifndef CONJ 117#define FXCXNPMA fxcxnpma 118#define FXCXNSMA fxcxnsma 119#else 120#define FXCXNPMA fxcxnsma 121#define FXCXNSMA fxcxnpma 122#endif 123 124 125 PROLOGUE 126 PROFCODE 127 128 li r0, -16 129 130 stfpdux f14, SP, r0 131 stfpdux f15, SP, r0 132 stfpdux f16, SP, r0 133 stfpdux f17, SP, r0 134 stfpdux f18, SP, r0 135 stfpdux f19, SP, r0 136 stfpdux f20, SP, r0 137 stfpdux f21, SP, r0 138 stfpdux f22, SP, r0 139 stfpdux f23, SP, r0 140 stfpdux f24, SP, r0 141 stfpdux f25, SP, r0 142 stfpdux f26, SP, r0 143 stfpdux f27, SP, r0 144 stfpdux f28, SP, r0 145 stfpdux f29, SP, r0 146 stfpdux f30, SP, r0 147 stfpdux f31, SP, r0 148 149 stwu r31, -4(SP) 150 stwu r30, -4(SP) 151 stwu r29, -4(SP) 152 stwu r28, -4(SP) 153 154 stwu r27, -4(SP) 155 stwu r26, -4(SP) 156 stwu r25, -4(SP) 157 stwu r24, -4(SP) 158 159 stwu r23, -4(SP) 160 stwu r22, -4(SP) 161 stwu r21, -4(SP) 162 stwu r20, -4(SP) 163 164 stwu r19, -4(SP) 165 stwu r18, -4(SP) 166 stwu r17, -4(SP) 167 stwu r16, -4(SP) 168 169 stwu r15, -4(SP) 170 stwu r14, -4(SP) 171 172 li r0, 0 173 stwu r0, -4(SP) 174 stwu r0, -4(SP) 175 176 stfdu f2, -8(SP) 177 stfdu f1, -8(SP) 178 179 slwi LDC, LDC, ZBASE_SHIFT 180 181 cmpwi cr0, M, 0 182 ble .L999 183 cmpwi cr0, N, 0 184 ble .L999 185 cmpwi cr0, K, 0 186 ble .L999 187 188 li INC, 1 * SIZE 189 li INC2, 2 * SIZE 190 li INC4, 4 * SIZE 191 li INCM1, -1 * SIZE 192 li INCM3, -3 * SIZE 193 li INCM5, -5 * SIZE 194 li INCM7, -7 * SIZE 195 196 addi C, C, - 1 * SIZE 197 198#ifdef LN 199 mullw r0, M, K 200 slwi r0, r0, ZBASE_SHIFT 201 add A, A, r0 202 203 slwi r0, M, ZBASE_SHIFT 204 add C, C, r0 205#endif 206 207#ifdef RN 208 neg KK, OFFSET 209#endif 210 211#ifdef RT 212 mullw r0, N, K 213 slwi r0, r0, ZBASE_SHIFT 214 add B, B, r0 215 216 mullw r0, N, LDC 217 add C, C, r0 218 219 sub KK, N, OFFSET 220#endif 221 222 andi. J, N, 1 223 beq .L50 224 225#ifdef RT 226 slwi r0, K, 0 + ZBASE_SHIFT 227 sub B, B, r0 228 229 sub C, C, LDC 230#endif 231 232 mr CO1, C 233 234#ifdef LN 235 add KK, M, OFFSET 236#endif 237 238#ifdef LT 239 mr KK, OFFSET 240#endif 241 242#if defined(LN) || defined(RT) 243 addi AORIG, A, -2 * SIZE 244#else 245 addi AO, A, -2 * SIZE 246#endif 247#ifndef RT 248 add C, CO2, LDC 249#endif 250 li r0, FZERO 251 lfpsx f0, SP, r0 252 253 srawi. I, M, 2 254 ble .L60 255 .align 4 256 257.L51: 258#if defined(LT) || defined(RN) 259 fpmr f4, f0 260 addi BO, B, - 2 * SIZE 261 fpmr f1, f0 262 fpmr f5, f0 263 fpmr f2, f0 264 fpmr f6, f0 265 fpmr f3, f0 266 fpmr f7, f0 267 srawi. r0, KK, 2 268 mtspr CTR, r0 269 ble .L54 270#else 271 272#ifdef LN 273 slwi r0, K, 2 + ZBASE_SHIFT 274 sub AORIG, AORIG, r0 275#endif 276 277 slwi r0 , KK, 2 + ZBASE_SHIFT 278 slwi TEMP, KK, 0 + ZBASE_SHIFT 279 add AO, AORIG, r0 280 add BO, B, TEMP 281 282 sub TEMP, K, KK 283 284 fpmr f4, f0 285 addi BO, BO, - 2 * SIZE 286 fpmr f1, f0 287 fpmr f5, f0 288 fpmr f2, f0 289 fpmr f6, f0 290 fpmr f3, f0 291 fpmr f7, f0 292 srawi. r0, TEMP, 2 293 mtspr CTR, r0 294 ble .L54 295#endif 296 297 LFPDUX B1, BO, INC2 298 LFPDUX A1, AO, INC2 299 LFPDUX A2, AO, INC2 300 LFPDUX B2, BO, INC2 301 LFPDUX A3, AO, INC2 302 LFPDUX A4, AO, INC2 303 304 LFPDUX B3, BO, INC2 305 LFPDUX A5, AO, INC2 306 LFPDUX A6, AO, INC2 307 LFPDUX A7, AO, INC2 308 LFPDUX A8, AO, INC2 309 bdz- .L53 310 .align 4 311 312.L52: 313 FXCPMADD f0, B1, A1, f0 314 LFPDUX B4, BO, INC2 315 FXCSMADD f4, B1, A1, f4 316 LFPDUX A1, AO, INC2 317 FXCPMADD f1, B1, A2, f1 318 nop 319 FXCSMADD f5, B1, A2, f5 320 LFPDUX A2, AO, INC2 321 322 FXCPMADD f2, B1, A3, f2 323 nop 324 FXCSMADD f6, B1, A3, f6 325 LFPDUX A3, AO, INC2 326 FXCPMADD f3, B1, A4, f3 327 nop 328 FXCSMADD f7, B1, A4, f7 329 LFPDUX A4, AO, INC2 330 331 FXCPMADD f0, B2, A5, f0 332 LFPDUX B1, BO, INC2 333 FXCSMADD f4, B2, A5, f4 334 LFPDUX A5, AO, INC2 335 FXCPMADD f1, B2, A6, f1 336 nop 337 FXCSMADD f5, B2, A6, f5 338 LFPDUX A6, AO, INC2 339 340 FXCPMADD f2, B2, A7, f2 341 nop 342 FXCSMADD f6, B2, A7, f6 343 LFPDUX A7, AO, INC2 344 FXCPMADD f3, B2, A8, f3 345 nop 346 FXCSMADD f7, B2, A8, f7 347 LFPDUX A8, AO, INC2 348 349 FXCPMADD f0, B3, A1, f0 350 LFPDUX B2, BO, INC2 351 FXCSMADD f4, B3, A1, f4 352 LFPDUX A1, AO, INC2 353 FXCPMADD f1, B3, A2, f1 354 nop 355 FXCSMADD f5, B3, A2, f5 356 LFPDUX A2, AO, INC2 357 358 FXCPMADD f2, B3, A3, f2 359 nop 360 FXCSMADD f6, B3, A3, f6 361 LFPDUX A3, AO, INC2 362 FXCPMADD f3, B3, A4, f3 363 nop 364 FXCSMADD f7, B3, A4, f7 365 LFPDUX A4, AO, INC2 366 367 FXCPMADD f0, B4, A5, f0 368 LFPDUX B3, BO, INC2 369 FXCSMADD f4, B4, A5, f4 370 LFPDUX A5, AO, INC2 371 FXCPMADD f1, B4, A6, f1 372 nop 373 FXCSMADD f5, B4, A6, f5 374 LFPDUX A6, AO, INC2 375 376 FXCPMADD f2, B4, A7, f2 377 nop 378 FXCSMADD f6, B4, A7, f6 379 LFPDUX A7, AO, INC2 380 FXCPMADD f3, B4, A8, f3 381 nop 382 FXCSMADD f7, B4, A8, f7 383 LFPDUX A8, AO, INC2 384 bdnz+ .L52 385 .align 4 386 387.L53: 388 FXCPMADD f0, B1, A1, f0 389 LFPDUX B4, BO, INC2 390 FXCSMADD f4, B1, A1, f4 391 LFPDUX A1, AO, INC2 392 FXCPMADD f1, B1, A2, f1 393 nop 394 FXCSMADD f5, B1, A2, f5 395 LFPDUX A2, AO, INC2 396 397 FXCPMADD f2, B1, A3, f2 398 nop 399 FXCSMADD f6, B1, A3, f6 400 LFPDUX A3, AO, INC2 401 FXCPMADD f3, B1, A4, f3 402 nop 403 FXCSMADD f7, B1, A4, f7 404 LFPDUX A4, AO, INC2 405 406 FXCPMADD f0, B2, A5, f0 407 nop 408 FXCSMADD f4, B2, A5, f4 409 LFPDUX A5, AO, INC2 410 FXCPMADD f1, B2, A6, f1 411 nop 412 FXCSMADD f5, B2, A6, f5 413 LFPDUX A6, AO, INC2 414 415 FXCPMADD f2, B2, A7, f2 416 nop 417 FXCSMADD f6, B2, A7, f6 418 LFPDUX A7, AO, INC2 419 FXCPMADD f3, B2, A8, f3 420 nop 421 FXCSMADD f7, B2, A8, f7 422 LFPDUX A8, AO, INC2 423 424 FXCPMADD f0, B3, A1, f0 425 FXCSMADD f4, B3, A1, f4 426 FXCPMADD f1, B3, A2, f1 427 FXCSMADD f5, B3, A2, f5 428 429 FXCPMADD f2, B3, A3, f2 430 FXCSMADD f6, B3, A3, f6 431 FXCPMADD f3, B3, A4, f3 432 FXCSMADD f7, B3, A4, f7 433 434 FXCPMADD f0, B4, A5, f0 435 FXCSMADD f4, B4, A5, f4 436 FXCPMADD f1, B4, A6, f1 437 FXCSMADD f5, B4, A6, f5 438 439 FXCPMADD f2, B4, A7, f2 440 FXCSMADD f6, B4, A7, f6 441 FXCPMADD f3, B4, A8, f3 442 FXCSMADD f7, B4, A8, f7 443 .align 4 444 445.L54: 446#if defined(LT) || defined(RN) 447 andi. r0, KK, 3 448 mtspr CTR, r0 449 ble+ .L58 450#else 451 andi. r0, TEMP, 3 452 mtspr CTR, r0 453 ble+ .L58 454#endif 455 456 LFPDUX A1, AO, INC2 457 LFPDUX B1, BO, INC2 458 LFPDUX A2, AO, INC2 459 LFPDUX A3, AO, INC2 460 LFPDUX A4, AO, INC2 461 bdz- .L57 462 .align 4 463 464.L56: 465 FXCPMADD f0, B1, A1, f0 466 FXCSMADD f4, B1, A1, f4 467 LFPDUX A1, AO, INC2 468 FXCPMADD f1, B1, A2, f1 469 FXCSMADD f5, B1, A2, f5 470 LFPDUX A2, AO, INC2 471 472 FXCPMADD f2, B1, A3, f2 473 FXCSMADD f6, B1, A3, f6 474 LFPDUX A3, AO, INC2 475 FXCPMADD f3, B1, A4, f3 476 FXCSMADD f7, B1, A4, f7 477 LFPDUX A4, AO, INC2 478 LFPDUX B1, BO, INC2 479 bdnz+ .L56 480 .align 4 481 482.L57: 483 FXCPMADD f0, B1, A1, f0 484 FXCSMADD f4, B1, A1, f4 485 FXCPMADD f1, B1, A2, f1 486 FXCSMADD f5, B1, A2, f5 487 488 FXCPMADD f2, B1, A3, f2 489 FXCSMADD f6, B1, A3, f6 490 FXCPMADD f3, B1, A4, f3 491 FXCSMADD f7, B1, A4, f7 492 .align 4 493 494.L58: 495 fpadd f0, f0, f4 496 fpadd f1, f1, f5 497 fpadd f2, f2, f6 498 fpadd f3, f3, f7 499 500#if defined(LN) || defined(RT) 501#ifdef LN 502 subi r0, KK, 4 503#else 504 subi r0, KK, 1 505#endif 506 slwi TEMP, r0, 2 + ZBASE_SHIFT 507 slwi r0, r0, 0 + ZBASE_SHIFT 508 add AO, AORIG, TEMP 509 add BO, B, r0 510 addi BO, BO, - 2 * SIZE 511#endif 512 513#if defined(LN) || defined(LT) 514 LFPDUX f16, BO, INC2 515 LFPDUX f17, BO, INC2 516 LFPDUX f18, BO, INC2 517 LFPDUX f19, BO, INC2 518 519 subi BO, BO, 8 * SIZE 520#else 521 LFPDUX f16, AO, INC2 522 LFPDUX f17, AO, INC2 523 LFPDUX f18, AO, INC2 524 LFPDUX f19, AO, INC2 525 526 subi AO, AO, 8 * SIZE 527#endif 528 529 fpsub f0, f16, f0 530 fpsub f1, f17, f1 531 fpsub f2, f18, f2 532 fpsub f3, f19, f3 533 534#ifdef LN 535 LFPDUX A1, AO, INC2 536 add AO, AO, INC2 537 add AO, AO, INC2 538 add AO, AO, INC2 539 540 LFPDUX A2, AO, INC2 541 LFPDUX A3, AO, INC2 542 add AO, AO, INC2 543 add AO, AO, INC2 544 545 LFPDUX A4, AO, INC2 546 LFPDUX A5, AO, INC2 547 LFPDUX A6, AO, INC2 548 add AO, AO, INC2 549 550 LFPDUX A7, AO, INC2 551 LFPDUX A8, AO, INC2 552 LFPDUX A9, AO, INC2 553 LFPDUX A10, AO, INC2 554 555 subi AO, AO, 32 * SIZE 556 557 fxpmul f4, A10, f3 558 FXCXNPMA f3, A10, f3, f4 559 560 fxcpnmsub f2, A9, f3, f2 561 FXCXNSMA f2, A9, f3, f2 562 563 fxcpnmsub f1, A8, f3, f1 564 FXCXNSMA f1, A8, f3, f1 565 566 fxcpnmsub f0, A7, f3, f0 567 FXCXNSMA f0, A7, f3, f0 568 569 fxpmul f4, A6, f2 570 FXCXNPMA f2, A6, f2, f4 571 572 fxcpnmsub f1, A5, f2, f1 573 FXCXNSMA f1, A5, f2, f1 574 575 fxcpnmsub f0, A4, f2, f0 576 FXCXNSMA f0, A4, f2, f0 577 578 fxpmul f4, A3, f1 579 FXCXNPMA f1, A3, f1, f4 580 581 fxcpnmsub f0, A2, f1, f0 582 FXCXNSMA f0, A2, f1, f0 583 584 fxpmul f4, A1, f0 585 FXCXNPMA f0, A1, f0, f4 586#endif 587 588#ifdef LT 589 LFPDUX A1, AO, INC2 590 LFPDUX A2, AO, INC2 591 LFPDUX A3, AO, INC2 592 LFPDUX A4, AO, INC2 593 594 add AO, AO, INC2 595 LFPDUX A5, AO, INC2 596 LFPDUX A6, AO, INC2 597 LFPDUX A7, AO, INC2 598 599 add AO, AO, INC2 600 add AO, AO, INC2 601 LFPDUX A8, AO, INC2 602 LFPDUX A9, AO, INC2 603 604 add AO, AO, INC2 605 add AO, AO, INC2 606 add AO, AO, INC2 607 LFPDUX A10, AO, INC2 608 609 subi AO, AO, 32 * SIZE 610 611 fxpmul f4, A1, f0 612 FXCXNPMA f0, A1, f0, f4 613 614 fxcpnmsub f1, A2, f0, f1 615 FXCXNSMA f1, A2, f0, f1 616 617 fxcpnmsub f2, A3, f0, f2 618 FXCXNSMA f2, A3, f0, f2 619 620 fxcpnmsub f3, A4, f0, f3 621 FXCXNSMA f3, A4, f0, f3 622 623 fxpmul f6, A5, f1 624 FXCXNPMA f1, A5, f1, f6 625 626 fxcpnmsub f2, A6, f1, f2 627 FXCXNSMA f2, A6, f1, f2 628 629 fxcpnmsub f3, A7, f1, f3 630 FXCXNSMA f3, A7, f1, f3 631 632 fxpmul f4, A8, f2 633 FXCXNPMA f2, A8, f2, f4 634 635 fxcpnmsub f3, A9, f2, f3 636 FXCXNSMA f3, A9, f2, f3 637 638 fxpmul f6, A10, f3 639 FXCXNPMA f3, A10, f3, f6 640#endif 641 642#ifdef RN 643 LFPDX A1, BO, INC2 644 645 fxpmul f4, A1, f0 646 fxpmul f5, A1, f1 647 fxpmul f6, A1, f2 648 fxpmul f7, A1, f3 649 650 FXCXNPMA f0, A1, f0, f4 651 FXCXNPMA f1, A1, f1, f5 652 FXCXNPMA f2, A1, f2, f6 653 FXCXNPMA f3, A1, f3, f7 654#endif 655 656#ifdef RT 657 LFPDX A1, BO, INC2 658 659 fxpmul f4, A1, f0 660 fxpmul f5, A1, f1 661 fxpmul f6, A1, f2 662 fxpmul f7, A1, f3 663 664 FXCXNPMA f0, A1, f0, f4 665 FXCXNPMA f1, A1, f1, f5 666 FXCXNPMA f2, A1, f2, f6 667 FXCXNPMA f3, A1, f3, f7 668#endif 669 670#ifdef LN 671 subi CO1, CO1, 8 * SIZE 672#endif 673 674#if defined(LN) || defined(LT) 675 STFPDUX f0, BO, INC2 676 STFPDUX f1, BO, INC2 677 STFPDUX f2, BO, INC2 678 STFPDUX f3, BO, INC2 679 680 subi BO, BO, 8 * SIZE 681#else 682 STFPDUX f0, AO, INC2 683 STFPDUX f1, AO, INC2 684 STFPDUX f2, AO, INC2 685 STFPDUX f3, AO, INC2 686 687 subi AO, AO, 8 * SIZE 688#endif 689 690 STFDUX f0, CO1, INC 691 STFSDUX f0, CO1, INC 692 STFDUX f1, CO1, INC 693 STFSDUX f1, CO1, INC 694 STFDUX f2, CO1, INC 695 STFSDUX f2, CO1, INC 696 STFDUX f3, CO1, INC 697 STFSDUX f3, CO1, INC 698 699#ifdef LN 700 subi CO1, CO1, 8 * SIZE 701#endif 702 703#ifdef RT 704 slwi r0, K, 2 + ZBASE_SHIFT 705 add AORIG, AORIG, r0 706#endif 707 708#if defined(LT) || defined(RN) 709 sub TEMP, K, KK 710 slwi r0, TEMP, 2 + ZBASE_SHIFT 711 slwi TEMP, TEMP, 0 + ZBASE_SHIFT 712 add AO, AO, r0 713 add BO, BO, TEMP 714#endif 715 716#ifdef LT 717 addi KK, KK, 4 718#endif 719 720#ifdef LN 721 subi KK, KK, 4 722#endif 723 724 addic. I, I, -1 725 li r0, FZERO 726 727 lfpsx f0, SP, r0 728 bgt+ .L51 729 .align 4 730 731.L60: 732 andi. I, M, 2 733 beq .L70 734 735#if defined(LT) || defined(RN) 736 fpmr f1, f0 737 addi BO, B, - 2 * SIZE 738 fpmr f2, f0 739 fpmr f3, f0 740 srawi. r0, KK, 2 741 mtspr CTR, r0 742 ble .L64 743#else 744#ifdef LN 745 slwi r0, K, 1 + ZBASE_SHIFT 746 sub AORIG, AORIG, r0 747#endif 748 749 slwi r0 , KK, 1 + ZBASE_SHIFT 750 slwi TEMP, KK, 0 + ZBASE_SHIFT 751 add AO, AORIG, r0 752 add BO, B, TEMP 753 754 sub TEMP, K, KK 755 756 fpmr f1, f0 757 addi BO, BO, - 2 * SIZE 758 fpmr f2, f0 759 fpmr f3, f0 760 srawi. r0, TEMP, 2 761 mtspr CTR, r0 762 ble .L64 763#endif 764 765 LFPDUX B1, BO, INC2 766 LFPDUX A1, AO, INC2 767 LFPDUX A2, AO, INC2 768 LFPDUX B2, BO, INC2 769 LFPDUX A3, AO, INC2 770 LFPDUX A4, AO, INC2 771 772 LFPDUX B3, BO, INC2 773 LFPDUX A5, AO, INC2 774 LFPDUX A6, AO, INC2 775 LFPDUX B4, BO, INC2 776 LFPDUX A7, AO, INC2 777 LFPDUX A8, AO, INC2 778 bdz- .L63 779 .align 4 780 781.L62: 782 FXCPMADD f0, B1, A1, f0 783 FXCSMADD f2, B1, A1, f2 784 LFPDUX A1, AO, INC2 785 FXCPMADD f1, B1, A2, f1 786 FXCSMADD f3, B1, A2, f3 787 LFPDUX A2, AO, INC2 788 LFPDUX B1, BO, INC2 789 790 FXCPMADD f0, B2, A3, f0 791 FXCSMADD f2, B2, A3, f2 792 LFPDUX A3, AO, INC2 793 FXCPMADD f1, B2, A4, f1 794 FXCSMADD f3, B2, A4, f3 795 LFPDUX A4, AO, INC2 796 LFPDUX B2, BO, INC2 797 798 FXCPMADD f0, B3, A5, f0 799 FXCSMADD f2, B3, A5, f2 800 LFPDUX A5, AO, INC2 801 FXCPMADD f1, B3, A6, f1 802 FXCSMADD f3, B3, A6, f3 803 LFPDUX A6, AO, INC2 804 LFPDUX B3, BO, INC2 805 806 FXCPMADD f0, B4, A7, f0 807 FXCSMADD f2, B4, A7, f2 808 LFPDUX A7, AO, INC2 809 FXCPMADD f1, B4, A8, f1 810 FXCSMADD f3, B4, A8, f3 811 LFPDUX A8, AO, INC2 812 LFPDUX B4, BO, INC2 813 bdnz+ .L62 814 .align 4 815 816.L63: 817 FXCPMADD f0, B1, A1, f0 818 FXCSMADD f2, B1, A1, f2 819 FXCPMADD f1, B1, A2, f1 820 FXCSMADD f3, B1, A2, f3 821 822 FXCPMADD f0, B2, A3, f0 823 FXCSMADD f2, B2, A3, f2 824 FXCPMADD f1, B2, A4, f1 825 FXCSMADD f3, B2, A4, f3 826 827 FXCPMADD f0, B3, A5, f0 828 FXCSMADD f2, B3, A5, f2 829 FXCPMADD f1, B3, A6, f1 830 FXCSMADD f3, B3, A6, f3 831 832 FXCPMADD f0, B4, A7, f0 833 FXCSMADD f2, B4, A7, f2 834 FXCPMADD f1, B4, A8, f1 835 FXCSMADD f3, B4, A8, f3 836 .align 4 837 838.L64: 839#if defined(LT) || defined(RN) 840 andi. r0, KK, 3 841 mtspr CTR, r0 842 ble+ .L68 843#else 844 andi. r0, TEMP, 3 845 mtspr CTR, r0 846 ble+ .L68 847#endif 848 849 LFPDUX A1, AO, INC2 850 LFPDUX B1, BO, INC2 851 LFPDUX A2, AO, INC2 852 bdz- .L67 853 .align 4 854 855.L66: 856 FXCPMADD f0, B1, A1, f0 857 FXCSMADD f2, B1, A1, f2 858 LFPDUX A1, AO, INC2 859 FXCPMADD f1, B1, A2, f1 860 FXCSMADD f3, B1, A2, f3 861 LFPDUX B1, BO, INC2 862 LFPDUX A2, AO, INC2 863 bdnz+ .L66 864 .align 4 865 866.L67: 867 FXCPMADD f0, B1, A1, f0 868 FXCSMADD f2, B1, A1, f2 869 FXCPMADD f1, B1, A2, f1 870 FXCSMADD f3, B1, A2, f3 871 .align 4 872 873.L68: 874 fpadd f0, f0, f2 875 fpadd f1, f1, f3 876 877#if defined(LN) || defined(RT) 878#ifdef LN 879 subi r0, KK, 2 880#else 881 subi r0, KK, 1 882#endif 883 slwi TEMP, r0, 1 + ZBASE_SHIFT 884 slwi r0, r0, 0 + ZBASE_SHIFT 885 add AO, AORIG, TEMP 886 add BO, B, r0 887 addi BO, BO, - 2 * SIZE 888#endif 889 890#if defined(LN) || defined(LT) 891 LFPDUX f16, BO, INC2 892 LFPDUX f17, BO, INC2 893 894 subi BO, BO, 4 * SIZE 895#else 896 LFPDUX f16, AO, INC2 897 LFPDUX f17, AO, INC2 898 899 subi AO, AO, 4 * SIZE 900#endif 901 902 fpsub f0, f16, f0 903 fpsub f1, f17, f1 904 905#ifdef LN 906 LFPDUX A1, AO, INC2 907 add AO, AO, INC2 908 LFPDUX A2, AO, INC2 909 LFPDUX A3, AO, INC2 910 911 subi AO, AO, 8 * SIZE 912 913 fxpmul f4, A3, f1 914 FXCXNPMA f1, A3, f1, f4 915 916 fxcpnmsub f0, A2, f1, f0 917 FXCXNSMA f0, A2, f1, f0 918 919 fxpmul f4, A1, f0 920 FXCXNPMA f0, A1, f0, f4 921#endif 922 923#ifdef LT 924 LFPDUX A1, AO, INC2 925 LFPDUX A2, AO, INC2 926 add AO, AO, INC2 927 LFPDUX A3, AO, INC2 928 929 subi AO, AO, 8 * SIZE 930 931 fxpmul f4, A1, f0 932 FXCXNPMA f0, A1, f0, f4 933 934 fxcpnmsub f1, A2, f0, f1 935 FXCXNSMA f1, A2, f0, f1 936 937 fxpmul f6, A3, f1 938 FXCXNPMA f1, A3, f1, f6 939#endif 940 941#ifdef RN 942 LFPDX A1, BO, INC2 943 944 fxpmul f4, A1, f0 945 fxpmul f5, A1, f1 946 947 FXCXNPMA f0, A1, f0, f4 948 FXCXNPMA f1, A1, f1, f5 949#endif 950 951#ifdef RT 952 LFPDX A1, BO, INC2 953 954 fxpmul f4, A1, f0 955 fxpmul f5, A1, f1 956 957 FXCXNPMA f0, A1, f0, f4 958 FXCXNPMA f1, A1, f1, f5 959#endif 960 961#ifdef LN 962 subi CO1, CO1, 4 * SIZE 963#endif 964 965#if defined(LN) || defined(LT) 966 STFPDUX f0, BO, INC2 967 STFPDUX f1, BO, INC2 968 969 subi BO, BO, 4 * SIZE 970#else 971 STFPDUX f0, AO, INC2 972 STFPDUX f1, AO, INC2 973 974 subi AO, AO, 4 * SIZE 975#endif 976 977 STFDUX f0, CO1, INC 978 STFSDUX f0, CO1, INC 979 STFDUX f1, CO1, INC 980 STFSDUX f1, CO1, INC 981 982#ifdef LN 983 subi CO1, CO1, 4 * SIZE 984#endif 985 986#ifdef RT 987 slwi r0, K, 1 + ZBASE_SHIFT 988 add AORIG, AORIG, r0 989#endif 990 991#if defined(LT) || defined(RN) 992 sub TEMP, K, KK 993 slwi r0, TEMP, 1 + ZBASE_SHIFT 994 slwi TEMP, TEMP, 0 + ZBASE_SHIFT 995 add AO, AO, r0 996 add BO, BO, TEMP 997#endif 998 999#ifdef LT 1000 addi KK, KK, 2 1001#endif 1002 1003#ifdef LN 1004 subi KK, KK, 2 1005#endif 1006 1007 li r0, FZERO 1008 lfpsx f0, SP, r0 1009 .align 4 1010 1011.L70: 1012 andi. I, M, 1 1013 beq .L89 1014 1015#if defined(LT) || defined(RN) 1016 addi BO, B, - 2 * SIZE 1017 fpmr f1, f0 1018 fpmr f2, f0 1019 fpmr f3, f0 1020 srawi. r0, KK, 3 1021 mtspr CTR, r0 1022 ble .L74 1023#else 1024#ifdef LN 1025 slwi r0, K, 0 + ZBASE_SHIFT 1026 sub AORIG, AORIG, r0 1027#endif 1028 1029 slwi TEMP, KK, 0 + ZBASE_SHIFT 1030 add AO, AORIG, TEMP 1031 add BO, B, TEMP 1032 1033 sub TEMP, K, KK 1034 1035 addi BO, BO, - 2 * SIZE 1036 fpmr f1, f0 1037 fpmr f2, f0 1038 fpmr f3, f0 1039 srawi. r0, TEMP, 3 1040 mtspr CTR, r0 1041 ble .L74 1042#endif 1043 1044 LFPDUX A1, AO, INC2 1045 LFPDUX B1, BO, INC2 1046 LFPDUX A2, AO, INC2 1047 LFPDUX B2, BO, INC2 1048 LFPDUX A3, AO, INC2 1049 LFPDUX B3, BO, INC2 1050 LFPDUX A4, AO, INC2 1051 LFPDUX B4, BO, INC2 1052 1053 LFPDUX A5, AO, INC2 1054 LFPDUX B5, BO, INC2 1055 LFPDUX A6, AO, INC2 1056 LFPDUX B6, BO, INC2 1057 LFPDUX A7, AO, INC2 1058 LFPDUX A9, BO, INC2 1059 LFPDUX A8, AO, INC2 1060 LFPDUX A10, BO, INC2 1061 bdz- .L73 1062 .align 4 1063 1064.L72: 1065 FXCPMADD f0, B1, A1, f0 1066 FXCSMADD f1, B1, A1, f1 1067 LFPDUX A1, AO, INC2 1068 LFPDUX B1, BO, INC2 1069 FXCPMADD f2, B2, A2, f2 1070 FXCSMADD f3, B2, A2, f3 1071 LFPDUX A2, AO, INC2 1072 LFPDUX B2, BO, INC2 1073 1074 FXCPMADD f0, B3, A3, f0 1075 FXCSMADD f1, B3, A3, f1 1076 LFPDUX A3, AO, INC2 1077 LFPDUX B3, BO, INC2 1078 FXCPMADD f2, B4, A4, f2 1079 FXCSMADD f3, B4, A4, f3 1080 LFPDUX A4, AO, INC2 1081 LFPDUX B4, BO, INC2 1082 1083 FXCPMADD f0, B5, A5, f0 1084 FXCSMADD f1, B5, A5, f1 1085 LFPDUX A5, AO, INC2 1086 LFPDUX B5, BO, INC2 1087 FXCPMADD f2, B6, A6, f2 1088 FXCSMADD f3, B6, A6, f3 1089 LFPDUX A6, AO, INC2 1090 LFPDUX B6, BO, INC2 1091 1092 FXCPMADD f0, A9, A7, f0 1093 FXCSMADD f1, A9, A7, f1 1094 LFPDUX A7, AO, INC2 1095 LFPDUX A9, BO, INC2 1096 FXCPMADD f2, A10, A8, f2 1097 FXCSMADD f3, A10, A8, f3 1098 LFPDUX A8, AO, INC2 1099 LFPDUX A10, BO, INC2 1100 1101 bdnz+ .L72 1102 .align 4 1103 1104.L73: 1105 FXCPMADD f0, B1, A1, f0 1106 FXCSMADD f1, B1, A1, f1 1107 FXCPMADD f2, B2, A2, f2 1108 FXCSMADD f3, B2, A2, f3 1109 1110 FXCPMADD f0, B3, A3, f0 1111 FXCSMADD f1, B3, A3, f1 1112 FXCPMADD f2, B4, A4, f2 1113 FXCSMADD f3, B4, A4, f3 1114 1115 FXCPMADD f0, B5, A5, f0 1116 FXCSMADD f1, B5, A5, f1 1117 FXCPMADD f2, B6, A6, f2 1118 FXCSMADD f3, B6, A6, f3 1119 1120 FXCPMADD f0, A9, A7, f0 1121 FXCSMADD f1, A9, A7, f1 1122 FXCPMADD f2, A10, A8, f2 1123 FXCSMADD f3, A10, A8, f3 1124 .align 4 1125 1126.L74: 1127#if defined(LT) || defined(RN) 1128 andi. r0, KK, 7 1129 mtspr CTR, r0 1130 ble+ .L78 1131#else 1132 andi. r0, TEMP, 7 1133 mtspr CTR, r0 1134 ble+ .L78 1135#endif 1136 1137 LFPDUX A1, AO, INC2 1138 LFPDUX B1, BO, INC2 1139 bdz- .L77 1140 .align 4 1141 1142.L76: 1143 FXCPMADD f0, B1, A1, f0 1144 FXCSMADD f1, B1, A1, f1 1145 LFPDUX A1, AO, INC2 1146 LFPDUX B1, BO, INC2 1147 bdnz+ .L76 1148 .align 4 1149 1150.L77: 1151 FXCPMADD f0, B1, A1, f0 1152 FXCSMADD f1, B1, A1, f1 1153 .align 4 1154 1155.L78: 1156 fpadd f0, f0, f2 1157 fpadd f1, f1, f3 1158 1159 fpadd f0, f0, f1 1160 1161#if defined(LN) || defined(RT) 1162#ifdef LN 1163 subi r0, KK, 1 1164#else 1165 subi r0, KK, 1 1166#endif 1167 slwi TEMP, r0, 0 + ZBASE_SHIFT 1168 add AO, AORIG, TEMP 1169 add BO, B, TEMP 1170 addi BO, BO, - 2 * SIZE 1171#endif 1172 1173#if defined(LN) || defined(LT) 1174 LFPDX f16, BO, INC2 1175#else 1176 LFPDX f16, AO, INC2 1177#endif 1178 1179 fpsub f0, f16, f0 1180 1181#ifdef LN 1182 LFPDX A1, AO, INC2 1183 1184 fxpmul f4, A1, f0 1185 FXCXNPMA f0, A1, f0, f4 1186#endif 1187 1188#ifdef LT 1189 LFPDX A1, AO, INC2 1190 1191 fxpmul f4, A1, f0 1192 FXCXNPMA f0, A1, f0, f4 1193#endif 1194 1195#ifdef RN 1196 LFPDX A1, BO, INC2 1197 1198 fxpmul f4, A1, f0 1199 FXCXNPMA f0, A1, f0, f4 1200#endif 1201 1202#ifdef RT 1203 LFPDX A1, BO, INC2 1204 1205 fxpmul f4, A1, f0 1206 FXCXNPMA f0, A1, f0, f4 1207#endif 1208 1209#ifdef LN 1210 subi CO1, CO1, 2 * SIZE 1211#endif 1212 1213#if defined(LN) || defined(LT) 1214 STFPDX f0, BO, INC2 1215#else 1216 STFPDX f0, AO, INC2 1217#endif 1218 1219 STFDUX f0, CO1, INC 1220 STFSDUX f0, CO1, INC 1221 1222#ifdef LN 1223 subi CO1, CO1, 2 * SIZE 1224#endif 1225 1226#ifdef RT 1227 slwi r0, K, 0 + ZBASE_SHIFT 1228 add AORIG, AORIG, r0 1229#endif 1230 1231#if defined(LT) || defined(RN) 1232 sub TEMP, K, KK 1233 slwi TEMP, TEMP, 0 + ZBASE_SHIFT 1234 add AO, AO, TEMP 1235 add BO, BO, TEMP 1236#endif 1237 1238#ifdef LT 1239 addi KK, KK, 1 1240#endif 1241 1242#ifdef LN 1243 subi KK, KK, 1 1244#endif 1245 1246 li r0, FZERO 1247 lfpsx f0, SP, r0 1248 .align 4 1249 1250.L89: 1251#ifdef LN 1252 slwi r0, K, 0 + ZBASE_SHIFT 1253 add B, B, r0 1254#endif 1255 1256#if defined(LT) || defined(RN) 1257 addi B, BO, 2 * SIZE 1258#endif 1259 1260#ifdef RN 1261 addi KK, KK, 1 1262#endif 1263 1264#ifdef RT 1265 subi KK, KK, 1 1266#endif 1267 .align 4 1268 1269.L50: 1270 srawi. J, N, 1 1271 ble .L999 1272 .align 4 1273 1274.L10: 1275#ifdef RT 1276 slwi r0, K, 1 + ZBASE_SHIFT 1277 sub B, B, r0 1278 1279 slwi r0, LDC, 1 1280 sub C, C, r0 1281#endif 1282 1283 mr CO1, C 1284 add CO2, C, LDC 1285 1286#ifdef LN 1287 add KK, M, OFFSET 1288#endif 1289 1290#ifdef LT 1291 mr KK, OFFSET 1292#endif 1293 1294#if defined(LN) || defined(RT) 1295 addi AORIG, A, -4 * SIZE 1296#else 1297 addi AO, A, -4 * SIZE 1298#endif 1299#ifndef RT 1300 add C, CO2, LDC 1301#endif 1302 1303 li r0, FZERO 1304 lfpsx f0, SP, r0 1305 1306 srawi. I, M, 2 1307 ble .L20 1308 .align 4 1309 1310.L11: 1311#if defined(LT) || defined(RN) 1312 1313 addi AO2, AO, 2 * SIZE 1314 fpmr f4, f0 1315 addi BO, B, - 4 * SIZE 1316 fpmr f8, f0 1317 addi BO2, B, - 2 * SIZE 1318 fpmr f12, f0 1319 1320 fpmr f5, f0 1321 fpmr f9, f0 1322 fpmr f13, f0 1323 fpmr f2, f0 1324 1325 fpmr f6, f0 1326 fpmr f10, f0 1327 fpmr f14, f0 1328 fpmr f3, f0 1329 1330 fpmr f7, f0 1331 fpmr f11, f0 1332 fpmr f15, f0 1333 1334 srawi. r0, KK, 2 1335 fpmr f1, f0 1336 mtspr CTR, r0 1337 ble .L14 1338#else 1339 1340#ifdef LN 1341 slwi r0, K, 2 + ZBASE_SHIFT 1342 sub AORIG, AORIG, r0 1343#endif 1344 1345 slwi r0 , KK, 2 + ZBASE_SHIFT 1346 slwi TEMP, KK, 1 + ZBASE_SHIFT 1347 add AO, AORIG, r0 1348 add BO, B, TEMP 1349 1350 sub TEMP, K, KK 1351 1352 fpmr f5, f0 1353 fpmr f9, f0 1354 fpmr f13, f0 1355 fpmr f2, f0 1356 1357 fpmr f6, f0 1358 fpmr f10, f0 1359 fpmr f14, f0 1360 fpmr f3, f0 1361 1362 fpmr f7, f0 1363 fpmr f11, f0 1364 fpmr f15, f0 1365 1366 addi AO2, AO, 2 * SIZE 1367 fpmr f4, f0 1368 addi BO, BO, - 4 * SIZE 1369 fpmr f8, f0 1370 addi BO2, BO, 2 * SIZE 1371 fpmr f12, f0 1372 1373 srawi. r0, TEMP, 2 1374 fpmr f1, f0 1375 mtspr CTR, r0 1376 ble .L14 1377#endif 1378 1379 LFPDUX A1, AO, INC4 1380 fpmr f5, f0 1381 LFPDUX A3, AO, INC4 1382 fpmr f9, f0 1383 LFPDUX B1, BO, INC4 1384 fpmr f13, f0 1385 1386 LFPDUX A5, AO, INC4 1387 fpmr f2, f0 1388 LFPDUX A6, AO, INC4 1389 fpmr f6, f0 1390 LFPDUX B3, BO, INC4 1391 fpmr f10, f0 1392 LFPDUX A7, AO, INC4 1393 fpmr f14, f0 1394 1395 LFPDUX A8, AO, INC4 1396 fpmr f3, f0 1397 LFPDUX B5, BO, INC4 1398 fpmr f7, f0 1399 LFPDUX A9, AO, INC4 1400 fpmr f11, f0 1401 LFPDUX A2, AO2, INC4 1402 fpmr f15, f0 1403 LFPDUX B2, BO2, INC4 1404 bdz- .L13 1405 .align 4 1406 1407.L12: 1408 1409## 1 ## 1410 FXCPMADD f0, B1, A1, f0 1411 nop 1412 FXCSMADD f4, B1, A1, f4 1413 nop 1414 FXCPMADD f8, B2, A1, f8 1415 LFPDUX B4, BO2, INC4 1416 FXCSMADD f12, B2, A1, f12 1417 LFPDUX B6, BO, INC4 1418 1419 FXCPMADD f1, B1, A2, f1 1420 nop 1421 FXCSMADD f5, B1, A2, f5 1422 LFPDUX A4, AO2, INC4 1423 FXCPMADD f9, B2, A2, f9 1424 LFPDUX A10, AO, INC4 1425 FXCSMADD f13, B2, A2, f13 1426 nop 1427 1428 FXCPMADD f2, B1, A3, f2 1429 nop 1430 FXCSMADD f6, B1, A3, f6 1431 nop 1432 FXCPMADD f10, B2, A3, f10 1433 nop 1434 FXCSMADD f14, B2, A3, f14 1435 nop 1436 1437 FXCPMADD f3, B1, A4, f3 1438 nop 1439 FXCSMADD f7, B1, A4, f7 1440 LFPDUX A2, AO2, INC4 1441 FXCPMADD f11, B2, A4, f11 1442 LFPDUX A1, AO, INC4 1443 FXCSMADD f15, B2, A4, f15 1444 nop 1445 1446## 2 ## 1447 1448 FXCPMADD f0, B3, A5, f0 1449 nop 1450 FXCSMADD f4, B3, A5, f4 1451 nop 1452 FXCPMADD f8, B4, A5, f8 1453 LFPDUX B2, BO2, INC4 1454 FXCSMADD f12, B4, A5, f12 1455 LFPDUX B1, BO, INC4 1456 1457 FXCPMADD f1, B3, A2, f1 1458 nop 1459 FXCSMADD f5, B3, A2, f5 1460 LFPDUX A4, AO2, INC4 1461 FXCPMADD f9, B4, A2, f9 1462 LFPDUX A3, AO, INC4 1463 FXCSMADD f13, B4, A2, f13 1464 nop 1465 1466 FXCPMADD f2, B3, A6, f2 1467 nop 1468 FXCSMADD f6, B3, A6, f6 1469 nop 1470 FXCPMADD f10, B4, A6, f10 1471 nop 1472 FXCSMADD f14, B4, A6, f14 1473 nop 1474 1475 FXCPMADD f3, B3, A4, f3 1476 nop 1477 FXCSMADD f7, B3, A4, f7 1478 LFPDUX A2, AO2, INC4 1479 FXCPMADD f11, B4, A4, f11 1480 LFPDUX A5, AO, INC4 1481 FXCSMADD f15, B4, A4, f15 1482 nop 1483 1484## 3 ## 1485 1486 FXCPMADD f0, B5, A7, f0 1487 nop 1488 FXCSMADD f4, B5, A7, f4 1489 nop 1490 FXCPMADD f8, B2, A7, f8 1491 LFPDUX B4, BO2, INC4 1492 FXCSMADD f12, B2, A7, f12 1493 LFPDUX B3, BO, INC4 1494 1495 FXCPMADD f1, B5, A2, f1 1496 nop 1497 FXCSMADD f5, B5, A2, f5 1498 LFPDUX A4, AO2, INC4 1499 FXCPMADD f9, B2, A2, f9 1500 LFPDUX A6, AO, INC4 1501 FXCSMADD f13, B2, A2, f13 1502 nop 1503 1504 FXCPMADD f2, B5, A8, f2 1505 nop 1506 FXCSMADD f6, B5, A8, f6 1507 nop 1508 FXCPMADD f10, B2, A8, f10 1509 nop 1510 FXCSMADD f14, B2, A8, f14 1511 nop 1512 1513 FXCPMADD f3, B5, A4, f3 1514 nop 1515 FXCSMADD f7, B5, A4, f7 1516 LFPDUX A2, AO2, INC4 1517 FXCPMADD f11, B2, A4, f11 1518 LFPDUX A7, AO, INC4 1519 FXCSMADD f15, B2, A4, f15 1520 nop 1521 1522## 4 ## 1523 FXCPMADD f0, B6, A9, f0 1524 nop 1525 FXCSMADD f4, B6, A9, f4 1526 nop 1527 FXCPMADD f8, B4, A9, f8 1528 LFPDUX B2, BO2, INC4 1529 FXCSMADD f12, B4, A9, f12 1530 LFPDUX B5, BO, INC4 1531 1532 FXCPMADD f1, B6, A2, f1 1533 nop 1534 FXCSMADD f5, B6, A2, f5 1535 LFPDUX A4, AO2, INC4 1536 FXCPMADD f9, B4, A2, f9 1537 LFPDUX A8, AO, INC4 1538 FXCSMADD f13, B4, A2, f13 1539 nop 1540 1541 FXCPMADD f2, B6, A10, f2 1542 nop 1543 FXCSMADD f6, B6, A10, f6 1544 nop 1545 FXCPMADD f10, B4, A10, f10 1546 nop 1547 FXCSMADD f14, B4, A10, f14 1548 nop 1549 1550 FXCPMADD f3, B6, A4, f3 1551 LFPDUX A2, AO2, INC4 1552 FXCSMADD f7, B6, A4, f7 1553 LFPDUX A9, AO, INC4 1554 FXCPMADD f11, B4, A4, f11 1555 nop 1556 FXCSMADD f15, B4, A4, f15 1557 bdnz+ .L12 1558 .align 4 1559 1560.L13: 1561## 1 ## 1562 1563 FXCPMADD f0, B1, A1, f0 1564 nop 1565 FXCSMADD f4, B1, A1, f4 1566 nop 1567 FXCPMADD f8, B2, A1, f8 1568 LFPDUX B4, BO2, INC4 1569 FXCSMADD f12, B2, A1, f12 1570 LFPDUX B6, BO, INC4 1571 1572 FXCPMADD f1, B1, A2, f1 1573 nop 1574 FXCSMADD f5, B1, A2, f5 1575 LFPDUX A4, AO2, INC4 1576 FXCPMADD f9, B2, A2, f9 1577 LFPDUX A10, AO, INC4 1578 FXCSMADD f13, B2, A2, f13 1579 nop 1580 1581 FXCPMADD f2, B1, A3, f2 1582 nop 1583 FXCSMADD f6, B1, A3, f6 1584 nop 1585 FXCPMADD f10, B2, A3, f10 1586 nop 1587 FXCSMADD f14, B2, A3, f14 1588 nop 1589 1590 FXCPMADD f3, B1, A4, f3 1591 nop 1592 FXCSMADD f7, B1, A4, f7 1593 LFPDUX A2, AO2, INC4 1594 FXCPMADD f11, B2, A4, f11 1595 nop 1596 FXCSMADD f15, B2, A4, f15 1597 nop 1598 1599## 2 ## 1600 1601 FXCPMADD f0, B3, A5, f0 1602 nop 1603 FXCSMADD f4, B3, A5, f4 1604 nop 1605 FXCPMADD f8, B4, A5, f8 1606 LFPDUX B2, BO2, INC4 1607 FXCSMADD f12, B4, A5, f12 1608 nop 1609 1610 FXCPMADD f1, B3, A2, f1 1611 nop 1612 FXCSMADD f5, B3, A2, f5 1613 LFPDUX A4, AO2, INC4 1614 FXCPMADD f9, B4, A2, f9 1615 nop 1616 FXCSMADD f13, B4, A2, f13 1617 nop 1618 1619 FXCPMADD f2, B3, A6, f2 1620 nop 1621 FXCSMADD f6, B3, A6, f6 1622 nop 1623 FXCPMADD f10, B4, A6, f10 1624 nop 1625 FXCSMADD f14, B4, A6, f14 1626 nop 1627 1628 FXCPMADD f3, B3, A4, f3 1629 nop 1630 FXCSMADD f7, B3, A4, f7 1631 LFPDUX A2, AO2, INC4 1632 FXCPMADD f11, B4, A4, f11 1633 nop 1634 FXCSMADD f15, B4, A4, f15 1635 nop 1636 1637## 3 ## 1638 1639 FXCPMADD f0, B5, A7, f0 1640 nop 1641 FXCSMADD f4, B5, A7, f4 1642 nop 1643 FXCPMADD f8, B2, A7, f8 1644 LFPDUX B4, BO2, INC4 1645 FXCSMADD f12, B2, A7, f12 1646 nop 1647 1648 FXCPMADD f1, B5, A2, f1 1649 nop 1650 FXCSMADD f5, B5, A2, f5 1651 LFPDUX A4, AO2, INC4 1652 FXCPMADD f9, B2, A2, f9 1653 nop 1654 FXCSMADD f13, B2, A2, f13 1655 nop 1656 1657 FXCPMADD f2, B5, A8, f2 1658 nop 1659 FXCSMADD f6, B5, A8, f6 1660 nop 1661 FXCPMADD f10, B2, A8, f10 1662 nop 1663 FXCSMADD f14, B2, A8, f14 1664 nop 1665 1666 FXCPMADD f3, B5, A4, f3 1667 nop 1668 FXCSMADD f7, B5, A4, f7 1669 LFPDUX A2, AO2, INC4 1670 FXCPMADD f11, B2, A4, f11 1671 nop 1672 FXCSMADD f15, B2, A4, f15 1673 nop 1674 1675## 4 ## 1676 1677 FXCPMADD f0, B6, A9, f0 1678 nop 1679 FXCSMADD f4, B6, A9, f4 1680 nop 1681 FXCPMADD f8, B4, A9, f8 1682 nop 1683 FXCSMADD f12, B4, A9, f12 1684 nop 1685 1686 FXCPMADD f1, B6, A2, f1 1687 nop 1688 FXCSMADD f5, B6, A2, f5 1689 LFPDUX A4, AO2, INC4 1690 FXCPMADD f9, B4, A2, f9 1691 nop 1692 FXCSMADD f13, B4, A2, f13 1693 nop 1694 1695 FXCPMADD f2, B6, A10, f2 1696 nop 1697 FXCSMADD f6, B6, A10, f6 1698 nop 1699 FXCPMADD f10, B4, A10, f10 1700 nop 1701 FXCSMADD f14, B4, A10, f14 1702 nop 1703 1704 FXCPMADD f3, B6, A4, f3 1705 nop 1706 FXCSMADD f7, B6, A4, f7 1707 nop 1708 FXCPMADD f11, B4, A4, f11 1709 nop 1710 FXCSMADD f15, B4, A4, f15 1711 nop 1712 .align 4 1713 1714.L14: 1715#if defined(LT) || defined(RN) 1716 andi. r0, KK, 3 1717 mtspr CTR, r0 1718 ble+ .L18 1719#else 1720 andi. r0, TEMP, 3 1721 mtspr CTR, r0 1722 ble+ .L18 1723#endif 1724 1725.L15: 1726 LFPDUX A2, AO, INC4 1727 LFPDUX A4, AO2, INC4 1728 LFPDUX A10, BO, INC4 1729 LFPDUX B4, BO2, INC4 1730 bdz- .L17 1731 .align 4 1732 1733.L16: 1734 FXCPMADD f0, A10, A2, f0 1735 FXCSMADD f4, A10, A2, f4 1736 FXCPMADD f8, B4, A2, f8 1737 FXCSMADD f12, B4, A2, f12 1738 LFPDUX A2, AO, INC4 1739 1740 FXCPMADD f1, A10, A4, f1 1741 FXCSMADD f5, A10, A4, f5 1742 FXCPMADD f9, B4, A4, f9 1743 FXCSMADD f13, B4, A4, f13 1744 LFPDUX A4, AO2, INC4 1745 1746 FXCPMADD f2, A10, A2, f2 1747 FXCSMADD f6, A10, A2, f6 1748 FXCPMADD f10, B4, A2, f10 1749 FXCSMADD f14, B4, A2, f14 1750 LFPDUX A2, AO, INC4 1751 1752 FXCPMADD f3, A10, A4, f3 1753 FXCSMADD f7, A10, A4, f7 1754 LFPDUX A10, BO, INC4 1755 FXCPMADD f11, B4, A4, f11 1756 FXCSMADD f15, B4, A4, f15 1757 LFPDUX A4, AO2, INC4 1758 LFPDUX B4, BO2, INC4 1759 bdnz+ .L16 1760 .align 4 1761 1762.L17: 1763 FXCPMADD f0, A10, A2, f0 1764 FXCSMADD f4, A10, A2, f4 1765 FXCPMADD f8, B4, A2, f8 1766 FXCSMADD f12, B4, A2, f12 1767 LFPDUX A2, AO, INC4 1768 1769 FXCPMADD f1, A10, A4, f1 1770 FXCSMADD f5, A10, A4, f5 1771 FXCPMADD f9, B4, A4, f9 1772 FXCSMADD f13, B4, A4, f13 1773 LFPDUX A4, AO2, INC4 1774 1775 FXCPMADD f2, A10, A2, f2 1776 FXCSMADD f6, A10, A2, f6 1777 FXCPMADD f10, B4, A2, f10 1778 FXCSMADD f14, B4, A2, f14 1779 1780 FXCPMADD f3, A10, A4, f3 1781 FXCSMADD f7, A10, A4, f7 1782 FXCPMADD f11, B4, A4, f11 1783 FXCSMADD f15, B4, A4, f15 1784 .align 4 1785 1786.L18: 1787 fpadd f0, f0, f4 1788 fpadd f8, f8, f12 1789 fpadd f1, f1, f5 1790 fpadd f9, f9, f13 1791 1792 fpadd f2, f2, f6 1793 fpadd f10, f10, f14 1794 fpadd f3, f3, f7 1795 fpadd f11, f11, f15 1796 1797#if defined(LN) || defined(RT) 1798#ifdef LN 1799 subi r0, KK, 4 1800#else 1801 subi r0, KK, 2 1802#endif 1803 slwi TEMP, r0, 2 + ZBASE_SHIFT 1804 slwi r0, r0, 1 + ZBASE_SHIFT 1805 add AO, AORIG, TEMP 1806 add BO, B, r0 1807 addi AO2, AO, 2 * SIZE 1808 addi BO, BO, - 4 * SIZE 1809 addi BO2, BO, 2 * SIZE 1810#endif 1811 1812#if defined(LN) || defined(LT) 1813 LFPDUX f16, BO, INC4 1814 LFPDUX f20, BO2, INC4 1815 LFPDUX f17, BO, INC4 1816 LFPDUX f21, BO2, INC4 1817 LFPDUX f18, BO, INC4 1818 LFPDUX f22, BO2, INC4 1819 LFPDUX f19, BO, INC4 1820 LFPDUX f23, BO2, INC4 1821 1822 subi BO, BO, 16 * SIZE 1823 subi BO2, BO2, 16 * SIZE 1824#else 1825 LFPDUX f16, AO, INC4 1826 LFPDUX f17, AO2, INC4 1827 LFPDUX f18, AO, INC4 1828 LFPDUX f19, AO2, INC4 1829 LFPDUX f20, AO, INC4 1830 LFPDUX f21, AO2, INC4 1831 LFPDUX f22, AO, INC4 1832 LFPDUX f23, AO2, INC4 1833 1834 subi AO, AO, 16 * SIZE 1835 subi AO2, AO2, 16 * SIZE 1836#endif 1837 1838 fpsub f0, f16, f0 1839 fpsub f1, f17, f1 1840 fpsub f2, f18, f2 1841 fpsub f3, f19, f3 1842 1843 fpsub f8, f20, f8 1844 fpsub f9, f21, f9 1845 fpsub f10, f22, f10 1846 fpsub f11, f23, f11 1847 1848#ifdef LN 1849 LFPDUX A1, AO, INC4 1850 add AO2, AO2, INC4 1851 add AO, AO, INC4 1852 add AO2, AO2, INC4 1853 1854 LFPDUX A2, AO, INC4 1855 LFPDUX A3, AO2, INC4 1856 add AO, AO, INC4 1857 add AO2, AO2, INC4 1858 1859 LFPDUX A4, AO, INC4 1860 LFPDUX A5, AO2, INC4 1861 LFPDUX A6, AO, INC4 1862 add AO2, AO2, INC4 1863 1864 LFPDUX A7, AO, INC4 1865 LFPDUX A8, AO2, INC4 1866 LFPDUX A9, AO, INC4 1867 LFPDUX A10, AO2, INC4 1868 1869 subi AO, AO, 32 * SIZE 1870 subi AO2, AO2, 32 * SIZE 1871 1872 fxpmul f4, A10, f3 1873 fxpmul f5, A10, f11 1874 FXCXNPMA f3, A10, f3, f4 1875 FXCXNPMA f11, A10, f11, f5 1876 1877 fxcpnmsub f2, A9, f3, f2 1878 fxcpnmsub f10, A9, f11, f10 1879 FXCXNSMA f2, A9, f3, f2 1880 FXCXNSMA f10, A9, f11, f10 1881 1882 fxcpnmsub f1, A8, f3, f1 1883 fxcpnmsub f9, A8, f11, f9 1884 FXCXNSMA f1, A8, f3, f1 1885 FXCXNSMA f9, A8, f11, f9 1886 1887 fxcpnmsub f0, A7, f3, f0 1888 fxcpnmsub f8, A7, f11, f8 1889 FXCXNSMA f0, A7, f3, f0 1890 FXCXNSMA f8, A7, f11, f8 1891 1892 fxpmul f4, A6, f2 1893 fxpmul f5, A6, f10 1894 FXCXNPMA f2, A6, f2, f4 1895 FXCXNPMA f10, A6, f10, f5 1896 1897 fxcpnmsub f1, A5, f2, f1 1898 fxcpnmsub f9, A5, f10, f9 1899 FXCXNSMA f1, A5, f2, f1 1900 FXCXNSMA f9, A5, f10, f9 1901 1902 fxcpnmsub f0, A4, f2, f0 1903 fxcpnmsub f8, A4, f10, f8 1904 FXCXNSMA f0, A4, f2, f0 1905 FXCXNSMA f8, A4, f10, f8 1906 1907 fxpmul f4, A3, f1 1908 fxpmul f5, A3, f9 1909 FXCXNPMA f1, A3, f1, f4 1910 FXCXNPMA f9, A3, f9, f5 1911 1912 fxcpnmsub f0, A2, f1, f0 1913 fxcpnmsub f8, A2, f9, f8 1914 FXCXNSMA f0, A2, f1, f0 1915 FXCXNSMA f8, A2, f9, f8 1916 1917 fxpmul f4, A1, f0 1918 fxpmul f5, A1, f8 1919 FXCXNPMA f0, A1, f0, f4 1920 FXCXNPMA f8, A1, f8, f5 1921#endif 1922 1923#ifdef LT 1924 LFPDUX A1, AO, INC4 1925 LFPDUX A2, AO2, INC4 1926 LFPDUX A3, AO, INC4 1927 LFPDUX A4, AO2, INC4 1928 1929 add AO, AO, INC4 1930 LFPDUX A5, AO2, INC4 1931 LFPDUX A6, AO, INC4 1932 LFPDUX A7, AO2, INC4 1933 1934 add AO, AO, INC4 1935 add AO2, AO2, INC4 1936 LFPDUX A8, AO, INC4 1937 LFPDUX A9, AO2, INC4 1938 1939 add AO, AO, INC4 1940 add AO2, AO2, INC4 1941 add AO, AO, INC4 1942 LFPDUX A10, AO2, INC4 1943 1944 subi AO, AO, 32 * SIZE 1945 subi AO2, AO2, 32 * SIZE 1946 1947 fxpmul f4, A1, f0 1948 fxpmul f5, A1, f8 1949 FXCXNPMA f0, A1, f0, f4 1950 FXCXNPMA f8, A1, f8, f5 1951 1952 fxcpnmsub f1, A2, f0, f1 1953 fxcpnmsub f9, A2, f8, f9 1954 FXCXNSMA f1, A2, f0, f1 1955 FXCXNSMA f9, A2, f8, f9 1956 1957 fxcpnmsub f2, A3, f0, f2 1958 fxcpnmsub f10, A3, f8, f10 1959 FXCXNSMA f2, A3, f0, f2 1960 FXCXNSMA f10, A3, f8, f10 1961 1962 fxcpnmsub f3, A4, f0, f3 1963 fxcpnmsub f11, A4, f8, f11 1964 FXCXNSMA f3, A4, f0, f3 1965 FXCXNSMA f11, A4, f8, f11 1966 1967 fxpmul f6, A5, f1 1968 fxpmul f7, A5, f9 1969 FXCXNPMA f1, A5, f1, f6 1970 FXCXNPMA f9, A5, f9, f7 1971 1972 fxcpnmsub f2, A6, f1, f2 1973 fxcpnmsub f10, A6, f9, f10 1974 FXCXNSMA f2, A6, f1, f2 1975 FXCXNSMA f10, A6, f9, f10 1976 1977 fxcpnmsub f3, A7, f1, f3 1978 fxcpnmsub f11, A7, f9, f11 1979 FXCXNSMA f3, A7, f1, f3 1980 FXCXNSMA f11, A7, f9, f11 1981 1982 fxpmul f4, A8, f2 1983 fxpmul f5, A8, f10 1984 FXCXNPMA f2, A8, f2, f4 1985 FXCXNPMA f10, A8, f10, f5 1986 1987 fxcpnmsub f3, A9, f2, f3 1988 fxcpnmsub f11, A9, f10, f11 1989 FXCXNSMA f3, A9, f2, f3 1990 FXCXNSMA f11, A9, f10, f11 1991 1992 fxpmul f6, A10, f3 1993 fxpmul f7, A10, f11 1994 FXCXNPMA f3, A10, f3, f6 1995 FXCXNPMA f11, A10, f11, f7 1996#endif 1997 1998#ifdef RN 1999 LFPDUX A1, BO, INC4 2000 LFPDUX A2, BO2, INC4 2001 add BO, BO, INC4 2002 LFPDUX A3, BO2, INC4 2003 2004 subi BO, BO, 8 * SIZE 2005 subi BO2, BO2, 8 * SIZE 2006 2007 fxpmul f4, A1, f0 2008 fxpmul f5, A1, f1 2009 fxpmul f6, A1, f2 2010 fxpmul f7, A1, f3 2011 2012 FXCXNPMA f0, A1, f0, f4 2013 FXCXNPMA f1, A1, f1, f5 2014 FXCXNPMA f2, A1, f2, f6 2015 FXCXNPMA f3, A1, f3, f7 2016 2017 fxcpnmsub f8, A2, f0, f8 2018 fxcpnmsub f9, A2, f1, f9 2019 fxcpnmsub f10, A2, f2, f10 2020 fxcpnmsub f11, A2, f3, f11 2021 2022 FXCXNSMA f8, A2, f0, f8 2023 FXCXNSMA f9, A2, f1, f9 2024 FXCXNSMA f10, A2, f2, f10 2025 FXCXNSMA f11, A2, f3, f11 2026 2027 fxpmul f4, A3, f8 2028 fxpmul f5, A3, f9 2029 fxpmul f6, A3, f10 2030 fxpmul f7, A3, f11 2031 2032 FXCXNPMA f8, A3, f8, f4 2033 FXCXNPMA f9, A3, f9, f5 2034 FXCXNPMA f10, A3, f10, f6 2035 FXCXNPMA f11, A3, f11, f7 2036#endif 2037 2038#ifdef RT 2039 LFPDUX A1, BO, INC4 2040 add BO2, BO2, INC4 2041 LFPDUX A2, BO, INC4 2042 LFPDUX A3, BO2, INC4 2043 2044 subi BO, BO, 8 * SIZE 2045 subi BO2, BO2, 8 * SIZE 2046 2047 fxpmul f4, A3, f8 2048 fxpmul f5, A3, f9 2049 fxpmul f6, A3, f10 2050 fxpmul f7, A3, f11 2051 2052 FXCXNPMA f8, A3, f8, f4 2053 FXCXNPMA f9, A3, f9, f5 2054 FXCXNPMA f10, A3, f10, f6 2055 FXCXNPMA f11, A3, f11, f7 2056 2057 fxcpnmsub f0, A2, f8, f0 2058 fxcpnmsub f1, A2, f9, f1 2059 fxcpnmsub f2, A2, f10, f2 2060 fxcpnmsub f3, A2, f11, f3 2061 2062 FXCXNSMA f0, A2, f8, f0 2063 FXCXNSMA f1, A2, f9, f1 2064 FXCXNSMA f2, A2, f10, f2 2065 FXCXNSMA f3, A2, f11, f3 2066 2067 fxpmul f4, A1, f0 2068 fxpmul f5, A1, f1 2069 fxpmul f6, A1, f2 2070 fxpmul f7, A1, f3 2071 2072 FXCXNPMA f0, A1, f0, f4 2073 FXCXNPMA f1, A1, f1, f5 2074 FXCXNPMA f2, A1, f2, f6 2075 FXCXNPMA f3, A1, f3, f7 2076#endif 2077 2078#ifdef LN 2079 subi CO1, CO1, 8 * SIZE 2080 subi CO2, CO2, 8 * SIZE 2081#endif 2082 2083#if defined(LN) || defined(LT) 2084 STFPDUX f0, BO, INC4 2085 STFPDUX f8, BO2, INC4 2086 STFPDUX f1, BO, INC4 2087 STFPDUX f9, BO2, INC4 2088 STFPDUX f2, BO, INC4 2089 STFPDUX f10, BO2, INC4 2090 STFPDUX f3, BO, INC4 2091 STFPDUX f11, BO2, INC4 2092 2093 subi BO, BO, 16 * SIZE 2094 subi BO2, BO2, 16 * SIZE 2095#else 2096 STFPDUX f0, AO, INC4 2097 STFPDUX f1, AO2, INC4 2098 STFPDUX f2, AO, INC4 2099 STFPDUX f3, AO2, INC4 2100 STFPDUX f8, AO, INC4 2101 STFPDUX f9, AO2, INC4 2102 STFPDUX f10, AO, INC4 2103 STFPDUX f11, AO2, INC4 2104 2105 subi AO, AO, 16 * SIZE 2106 subi AO2, AO2, 16 * SIZE 2107#endif 2108 2109 STFDUX f0, CO1, INC 2110 STFSDUX f0, CO1, INC 2111 STFDUX f1, CO1, INC 2112 STFSDUX f1, CO1, INC 2113 STFDUX f2, CO1, INC 2114 STFSDUX f2, CO1, INC 2115 STFDUX f3, CO1, INC 2116 STFSDUX f3, CO1, INC 2117 2118 STFDUX f8, CO2, INC 2119 STFSDUX f8, CO2, INC 2120 STFDUX f9, CO2, INC 2121 STFSDUX f9, CO2, INC 2122 STFDUX f10, CO2, INC 2123 STFSDUX f10, CO2, INC 2124 STFDUX f11, CO2, INC 2125 STFSDUX f11, CO2, INC 2126 2127#ifdef LN 2128 subi CO1, CO1, 8 * SIZE 2129 subi CO2, CO2, 8 * SIZE 2130#endif 2131 2132#ifdef RT 2133 slwi r0, K, 2 + ZBASE_SHIFT 2134 add AORIG, AORIG, r0 2135#endif 2136 2137#if defined(LT) || defined(RN) 2138 sub TEMP, K, KK 2139 slwi r0, TEMP, 2 + ZBASE_SHIFT 2140 slwi TEMP, TEMP, 1 + ZBASE_SHIFT 2141 add AO, AO, r0 2142 add BO, BO, TEMP 2143#endif 2144 2145#ifdef LT 2146 addi KK, KK, 4 2147#endif 2148 2149#ifdef LN 2150 subi KK, KK, 4 2151#endif 2152 2153 addic. I, I, -1 2154 li r0, FZERO 2155 2156 lfpsx f0, SP, r0 2157 bgt+ .L11 2158 .align 4 2159 2160.L20: 2161 andi. I, M, 2 2162 beq .L30 2163 2164#if defined(LT) || defined(RN) 2165 addi AO2, AO, 2 * SIZE 2166 fpmr f4, f0 2167 addi BO, B, - 4 * SIZE 2168 fpmr f8, f0 2169 addi BO2, B, - 2 * SIZE 2170 fpmr f12, f0 2171 2172 srawi. r0, KK, 2 2173 fpmr f1, f0 2174 fpmr f5, f0 2175 fpmr f9, f0 2176 mtspr CTR, r0 2177 fpmr f13, f0 2178 ble .L24 2179#else 2180#ifdef LN 2181 slwi r0, K, 1 + ZBASE_SHIFT 2182 sub AORIG, AORIG, r0 2183#endif 2184 2185 slwi r0 , KK, 1 + ZBASE_SHIFT 2186 add AO, AORIG, r0 2187 add BO, B, r0 2188 2189 sub TEMP, K, KK 2190 2191 addi AO2, AO, 2 * SIZE 2192 fpmr f4, f0 2193 addi BO, BO, - 4 * SIZE 2194 fpmr f8, f0 2195 addi BO2, BO, 2 * SIZE 2196 fpmr f12, f0 2197 2198 fpmr f1, f0 2199 fpmr f5, f0 2200 fpmr f9, f0 2201 fpmr f13, f0 2202 srawi. r0, TEMP, 2 2203 mtspr CTR, r0 2204 ble .L24 2205#endif 2206 2207 LFPDUX A1, AO, INC4 2208 LFPDUX B1, BO, INC4 2209 LFPDUX A2, AO2, INC4 2210 LFPDUX B2, BO2, INC4 2211 LFPDUX A3, AO, INC4 2212 LFPDUX B3, BO, INC4 2213 LFPDUX A4, AO2, INC4 2214 LFPDUX B4, BO2, INC4 2215 2216 LFPDUX A5, AO, INC4 2217 LFPDUX B5, BO, INC4 2218 LFPDUX A6, AO2, INC4 2219 LFPDUX B6, BO2, INC4 2220 LFPDUX A7, AO, INC4 2221 LFPDUX A9, BO, INC4 2222 LFPDUX A10, BO2, INC4 2223 bdz- .L23 2224 .align 4 2225 2226.L22: 2227 FXCPMADD f0, B1, A1, f0 2228 nop 2229 FXCSMADD f4, B1, A1, f4 2230 LFPDUX A8, AO2, INC4 2231 FXCPMADD f8, B2, A1, f8 2232 nop 2233 FXCSMADD f12, B2, A1, f12 2234 LFPDUX A1, AO, INC4 2235 2236 FXCPMADD f1, B1, A2, f1 2237 nop 2238 FXCSMADD f5, B1, A2, f5 2239 LFPDUX B1, BO, INC4 2240 FXCPMADD f9, B2, A2, f9 2241 nop 2242 FXCSMADD f13, B2, A2, f13 2243 LFPDUX B2, BO2, INC4 2244 2245 FXCPMADD f0, B3, A3, f0 2246 nop 2247 FXCSMADD f4, B3, A3, f4 2248 LFPDUX A2, AO2, INC4 2249 FXCPMADD f8, B4, A3, f8 2250 nop 2251 FXCSMADD f12, B4, A3, f12 2252 LFPDUX A3, AO, INC4 2253 2254 FXCPMADD f1, B3, A4, f1 2255 nop 2256 FXCSMADD f5, B3, A4, f5 2257 LFPDUX B3, BO, INC4 2258 FXCPMADD f9, B4, A4, f9 2259 nop 2260 FXCSMADD f13, B4, A4, f13 2261 LFPDUX B4, BO2, INC4 2262 2263 FXCPMADD f0, B5, A5, f0 2264 nop 2265 FXCSMADD f4, B5, A5, f4 2266 LFPDUX A4, AO2, INC4 2267 FXCPMADD f8, B6, A5, f8 2268 nop 2269 FXCSMADD f12, B6, A5, f12 2270 LFPDUX A5, AO, INC4 2271 2272 FXCPMADD f1, B5, A6, f1 2273 nop 2274 FXCSMADD f5, B5, A6, f5 2275 LFPDUX B5, BO, INC4 2276 FXCPMADD f9, B6, A6, f9 2277 nop 2278 FXCSMADD f13, B6, A6, f13 2279 LFPDUX B6, BO2, INC4 2280 2281 FXCPMADD f0, A9, A7, f0 2282 nop 2283 FXCSMADD f4, A9, A7, f4 2284 LFPDUX A6, AO2, INC4 2285 FXCPMADD f8, A10, A7, f8 2286 nop 2287 FXCSMADD f12, A10, A7, f12 2288 LFPDUX A7, AO, INC4 2289 2290 FXCPMADD f1, A9, A8, f1 2291 nop 2292 FXCSMADD f5, A9, A8, f5 2293 LFPDUX A9, BO, INC4 2294 FXCPMADD f9, A10, A8, f9 2295 nop 2296 FXCSMADD f13, A10, A8, f13 2297 LFPDUX A10, BO2, INC4 2298 bdnz+ .L22 2299 .align 4 2300 2301.L23: 2302 FXCPMADD f0, B1, A1, f0 2303 FXCSMADD f4, B1, A1, f4 2304 LFPDUX A8, AO2, INC4 2305 FXCPMADD f8, B2, A1, f8 2306 FXCSMADD f12, B2, A1, f12 2307 2308 FXCPMADD f1, B1, A2, f1 2309 FXCSMADD f5, B1, A2, f5 2310 FXCPMADD f9, B2, A2, f9 2311 FXCSMADD f13, B2, A2, f13 2312 2313 FXCPMADD f0, B3, A3, f0 2314 FXCSMADD f4, B3, A3, f4 2315 FXCPMADD f8, B4, A3, f8 2316 FXCSMADD f12, B4, A3, f12 2317 2318 FXCPMADD f1, B3, A4, f1 2319 FXCSMADD f5, B3, A4, f5 2320 FXCPMADD f9, B4, A4, f9 2321 FXCSMADD f13, B4, A4, f13 2322 2323 FXCPMADD f0, B5, A5, f0 2324 FXCSMADD f4, B5, A5, f4 2325 FXCPMADD f8, B6, A5, f8 2326 FXCSMADD f12, B6, A5, f12 2327 2328 FXCPMADD f1, B5, A6, f1 2329 FXCSMADD f5, B5, A6, f5 2330 FXCPMADD f9, B6, A6, f9 2331 FXCSMADD f13, B6, A6, f13 2332 2333 FXCPMADD f0, A9, A7, f0 2334 FXCSMADD f4, A9, A7, f4 2335 FXCPMADD f8, A10, A7, f8 2336 FXCSMADD f12, A10, A7, f12 2337 2338 FXCPMADD f1, A9, A8, f1 2339 FXCSMADD f5, A9, A8, f5 2340 FXCPMADD f9, A10, A8, f9 2341 FXCSMADD f13, A10, A8, f13 2342 .align 4 2343 2344.L24: 2345#if defined(LT) || defined(RN) 2346 andi. r0, KK, 3 2347 mtspr CTR, r0 2348 ble+ .L28 2349#else 2350 andi. r0, TEMP, 3 2351 mtspr CTR, r0 2352 ble+ .L28 2353#endif 2354 2355 LFPDUX A1, AO, INC4 2356 LFPDUX A2, AO2, INC4 2357 LFPDUX B1, BO, INC4 2358 LFPDUX B2, BO2, INC4 2359 bdz- .L27 2360 .align 4 2361 2362.L26: 2363 FXCPMADD f0, B1, A1, f0 2364 FXCSMADD f4, B1, A1, f4 2365 FXCPMADD f8, B2, A1, f8 2366 FXCSMADD f12, B2, A1, f12 2367 LFPDUX A1, AO, INC4 2368 2369 FXCPMADD f1, B1, A2, f1 2370 FXCSMADD f5, B1, A2, f5 2371 LFPDUX B1, BO, INC4 2372 FXCPMADD f9, B2, A2, f9 2373 FXCSMADD f13, B2, A2, f13 2374 LFPDUX A2, AO2, INC4 2375 LFPDUX B2, BO2, INC4 2376 bdnz+ .L26 2377 .align 4 2378 2379.L27: 2380 FXCPMADD f0, B1, A1, f0 2381 FXCSMADD f4, B1, A1, f4 2382 FXCPMADD f8, B2, A1, f8 2383 FXCSMADD f12, B2, A1, f12 2384 2385 FXCPMADD f1, B1, A2, f1 2386 FXCSMADD f5, B1, A2, f5 2387 FXCPMADD f9, B2, A2, f9 2388 FXCSMADD f13, B2, A2, f13 2389 .align 4 2390 2391.L28: 2392 fpadd f0, f0, f4 2393 fpadd f8, f8, f12 2394 fpadd f1, f1, f5 2395 fpadd f9, f9, f13 2396 2397#if defined(LN) || defined(RT) 2398#ifdef LN 2399 subi r0, KK, 2 2400#else 2401 subi r0, KK, 2 2402#endif 2403 slwi r0, r0, 1 + ZBASE_SHIFT 2404 add AO, AORIG, r0 2405 add BO, B, r0 2406 addi AO2, AO, 2 * SIZE 2407 addi BO, BO, - 4 * SIZE 2408 addi BO2, BO, 2 * SIZE 2409#endif 2410 2411#if defined(LN) || defined(LT) 2412 LFPDUX f16, BO, INC4 2413 LFPDUX f18, BO2, INC4 2414 LFPDUX f17, BO, INC4 2415 LFPDUX f19, BO2, INC4 2416 2417 subi BO, BO, 8 * SIZE 2418 subi BO2, BO2, 8 * SIZE 2419#else 2420 LFPDUX f16, AO, INC4 2421 LFPDUX f17, AO2, INC4 2422 LFPDUX f18, AO, INC4 2423 LFPDUX f19, AO2, INC4 2424 2425 subi AO, AO, 8 * SIZE 2426 subi AO2, AO2, 8 * SIZE 2427#endif 2428 2429 fpsub f0, f16, f0 2430 fpsub f1, f17, f1 2431 fpsub f8, f18, f8 2432 fpsub f9, f19, f9 2433 2434#ifdef LN 2435 LFPDUX A1, AO, INC4 2436 add AO2, AO2, INC4 2437 LFPDUX A2, AO, INC4 2438 LFPDUX A3, AO2, INC4 2439 2440 subi AO, AO, 8 * SIZE 2441 subi AO2, AO2, 8 * SIZE 2442 2443 fxpmul f4, A3, f1 2444 fxpmul f5, A3, f9 2445 FXCXNPMA f1, A3, f1, f4 2446 FXCXNPMA f9, A3, f9, f5 2447 2448 fxcpnmsub f0, A2, f1, f0 2449 fxcpnmsub f8, A2, f9, f8 2450 FXCXNSMA f0, A2, f1, f0 2451 FXCXNSMA f8, A2, f9, f8 2452 2453 fxpmul f4, A1, f0 2454 fxpmul f5, A1, f8 2455 FXCXNPMA f0, A1, f0, f4 2456 FXCXNPMA f8, A1, f8, f5 2457#endif 2458 2459#ifdef LT 2460 LFPDUX A1, AO, INC4 2461 LFPDUX A2, AO2, INC4 2462 add AO, AO, INC4 2463 LFPDUX A3, AO2, INC4 2464 2465 subi AO, AO, 8 * SIZE 2466 subi AO2, AO2, 8 * SIZE 2467 2468 fxpmul f4, A1, f0 2469 fxpmul f5, A1, f8 2470 FXCXNPMA f0, A1, f0, f4 2471 FXCXNPMA f8, A1, f8, f5 2472 2473 fxcpnmsub f1, A2, f0, f1 2474 fxcpnmsub f9, A2, f8, f9 2475 FXCXNSMA f1, A2, f0, f1 2476 FXCXNSMA f9, A2, f8, f9 2477 2478 fxpmul f6, A3, f1 2479 fxpmul f7, A3, f9 2480 FXCXNPMA f1, A3, f1, f6 2481 FXCXNPMA f9, A3, f9, f7 2482#endif 2483 2484#ifdef RN 2485 LFPDUX A1, BO, INC4 2486 LFPDUX A2, BO2, INC4 2487 add BO, BO, INC4 2488 LFPDUX A3, BO2, INC4 2489 2490 subi BO, BO, 8 * SIZE 2491 subi BO2, BO2, 8 * SIZE 2492 2493 fxpmul f4, A1, f0 2494 fxpmul f5, A1, f1 2495 2496 FXCXNPMA f0, A1, f0, f4 2497 FXCXNPMA f1, A1, f1, f5 2498 2499 fxcpnmsub f8, A2, f0, f8 2500 fxcpnmsub f9, A2, f1, f9 2501 2502 FXCXNSMA f8, A2, f0, f8 2503 FXCXNSMA f9, A2, f1, f9 2504 2505 fxpmul f4, A3, f8 2506 fxpmul f5, A3, f9 2507 2508 FXCXNPMA f8, A3, f8, f4 2509 FXCXNPMA f9, A3, f9, f5 2510#endif 2511 2512#ifdef RT 2513 LFPDUX A1, BO, INC4 2514 add BO2, BO2, INC4 2515 LFPDUX A2, BO, INC4 2516 LFPDUX A3, BO2, INC4 2517 2518 subi BO, BO, 8 * SIZE 2519 subi BO2, BO2, 8 * SIZE 2520 2521 fxpmul f4, A3, f8 2522 fxpmul f5, A3, f9 2523 2524 FXCXNPMA f8, A3, f8, f4 2525 FXCXNPMA f9, A3, f9, f5 2526 2527 fxcpnmsub f0, A2, f8, f0 2528 fxcpnmsub f1, A2, f9, f1 2529 2530 FXCXNSMA f0, A2, f8, f0 2531 FXCXNSMA f1, A2, f9, f1 2532 2533 fxpmul f4, A1, f0 2534 fxpmul f5, A1, f1 2535 2536 FXCXNPMA f0, A1, f0, f4 2537 FXCXNPMA f1, A1, f1, f5 2538#endif 2539 2540#ifdef LN 2541 subi CO1, CO1, 4 * SIZE 2542 subi CO2, CO2, 4 * SIZE 2543#endif 2544 2545#if defined(LN) || defined(LT) 2546 STFPDUX f0, BO, INC4 2547 STFPDUX f8, BO2, INC4 2548 STFPDUX f1, BO, INC4 2549 STFPDUX f9, BO2, INC4 2550 2551 subi BO, BO, 8 * SIZE 2552 subi BO2, BO2, 8 * SIZE 2553#else 2554 STFPDUX f0, AO, INC4 2555 STFPDUX f1, AO2, INC4 2556 STFPDUX f8, AO, INC4 2557 STFPDUX f9, AO2, INC4 2558 2559 subi AO, AO, 8 * SIZE 2560 subi AO2, AO2, 8 * SIZE 2561#endif 2562 2563 STFDUX f0, CO1, INC 2564 STFSDUX f0, CO1, INC 2565 STFDUX f1, CO1, INC 2566 STFSDUX f1, CO1, INC 2567 2568 STFDUX f8, CO2, INC 2569 STFSDUX f8, CO2, INC 2570 STFDUX f9, CO2, INC 2571 STFSDUX f9, CO2, INC 2572 2573#ifdef LN 2574 subi CO1, CO1, 4 * SIZE 2575 subi CO2, CO2, 4 * SIZE 2576#endif 2577 2578#ifdef RT 2579 slwi r0, K, 1 + ZBASE_SHIFT 2580 add AORIG, AORIG, r0 2581#endif 2582 2583#if defined(LT) || defined(RN) 2584 sub TEMP, K, KK 2585 slwi r0, TEMP, 1 + ZBASE_SHIFT 2586 add AO, AO, r0 2587 add BO, BO, r0 2588#endif 2589 2590#ifdef LT 2591 addi KK, KK, 2 2592#endif 2593 2594#ifdef LN 2595 subi KK, KK, 2 2596#endif 2597 2598 li r0, FZERO 2599 lfpsx f0, SP, r0 2600 .align 4 2601 2602.L30: 2603 andi. I, M, 1 2604 beq .L49 2605 2606#if defined(LT) || defined(RN) 2607 addi AO2, AO, 2 * SIZE 2608 fpmr f1, f0 2609 addi BO, B, - 4 * SIZE 2610 fpmr f2, f0 2611 addi BO2, B, - 2 * SIZE 2612 fpmr f3, f0 2613 2614 srawi. r0, KK, 2 2615 mtspr CTR, r0 2616 ble .L34 2617#else 2618#ifdef LN 2619 slwi r0, K, 0 + ZBASE_SHIFT 2620 sub AORIG, AORIG, r0 2621#endif 2622 2623 slwi r0 , KK, 0 + ZBASE_SHIFT 2624 slwi TEMP, KK, 1 + ZBASE_SHIFT 2625 add AO, AORIG, r0 2626 add BO, B, TEMP 2627 2628 sub TEMP, K, KK 2629 2630 addi AO2, AO, 2 * SIZE 2631 fpmr f1, f0 2632 addi BO, BO, - 4 * SIZE 2633 fpmr f2, f0 2634 addi BO2, BO, 2 * SIZE 2635 fpmr f3, f0 2636 2637 srawi. r0, TEMP, 2 2638 mtspr CTR, r0 2639 ble .L34 2640#endif 2641 2642 LFPDUX A1, AO, INC4 2643 LFPDUX B1, BO, INC4 2644 LFPDUX B2, BO2, INC4 2645 LFPDUX A2, AO2, INC4 2646 LFPDUX B3, BO, INC4 2647 LFPDUX B4, BO2, INC4 2648 2649 LFPDUX A3, AO, INC4 2650 LFPDUX A5, BO, INC4 2651 LFPDUX A6, BO2, INC4 2652 LFPDUX A4, AO2, INC4 2653 LFPDUX A7, BO, INC4 2654 LFPDUX A8, BO2, INC4 2655 bdz- .L33 2656 .align 4 2657 2658.L32: 2659 FXCPMADD f0, B1, A1, f0 2660 FXCSMADD f1, B1, A1, f1 2661 LFPDUX B1, BO, INC4 2662 FXCPMADD f2, B2, A1, f2 2663 FXCSMADD f3, B2, A1, f3 2664 LFPDUX B2, BO2, INC4 2665 LFPDUX A1, AO, INC4 2666 2667 FXCPMADD f0, B3, A2, f0 2668 FXCSMADD f1, B3, A2, f1 2669 LFPDUX B3, BO, INC4 2670 FXCPMADD f2, B4, A2, f2 2671 FXCSMADD f3, B4, A2, f3 2672 LFPDUX B4, BO2, INC4 2673 LFPDUX A2, AO2, INC4 2674 2675 FXCPMADD f0, A5, A3, f0 2676 FXCSMADD f1, A5, A3, f1 2677 LFPDUX A5, BO, INC4 2678 FXCPMADD f2, A6, A3, f2 2679 FXCSMADD f3, A6, A3, f3 2680 LFPDUX A6, BO2, INC4 2681 LFPDUX A3, AO, INC4 2682 2683 FXCPMADD f0, A7, A4, f0 2684 FXCSMADD f1, A7, A4, f1 2685 LFPDUX A7, BO, INC4 2686 FXCPMADD f2, A8, A4, f2 2687 FXCSMADD f3, A8, A4, f3 2688 LFPDUX A8, BO2, INC4 2689 LFPDUX A4, AO2, INC4 2690 bdnz+ .L32 2691 .align 4 2692 2693.L33: 2694 FXCPMADD f0, B1, A1, f0 2695 FXCSMADD f1, B1, A1, f1 2696 FXCPMADD f2, B2, A1, f2 2697 FXCSMADD f3, B2, A1, f3 2698 2699 FXCPMADD f0, B3, A2, f0 2700 FXCSMADD f1, B3, A2, f1 2701 FXCPMADD f2, B4, A2, f2 2702 FXCSMADD f3, B4, A2, f3 2703 2704 FXCPMADD f0, A5, A3, f0 2705 FXCSMADD f1, A5, A3, f1 2706 FXCPMADD f2, A6, A3, f2 2707 FXCSMADD f3, A6, A3, f3 2708 2709 FXCPMADD f0, A7, A4, f0 2710 FXCSMADD f1, A7, A4, f1 2711 FXCPMADD f2, A8, A4, f2 2712 FXCSMADD f3, A8, A4, f3 2713 .align 4 2714 2715.L34: 2716#if defined(LT) || defined(RN) 2717 andi. r0, KK, 3 2718 mtspr CTR, r0 2719 ble+ .L38 2720#else 2721 andi. r0, TEMP, 3 2722 mtspr CTR, r0 2723 ble+ .L38 2724#endif 2725 2726 LFPDX A1, AO, INC4 2727 LFPDUX B1, BO, INC4 2728 LFPDUX B2, BO2, INC4 2729 add AO, AO, INC2 2730 bdz- .L37 2731 .align 4 2732 2733.L36: 2734 FXCPMADD f0, B1, A1, f0 2735 FXCSMADD f1, B1, A1, f1 2736 LFPDUX B1, BO, INC4 2737 FXCPMADD f2, B2, A1, f2 2738 FXCSMADD f3, B2, A1, f3 2739 LFPDX A1, AO, INC4 2740 LFPDUX B2, BO2, INC4 2741 add AO, AO, INC2 2742 bdnz+ .L36 2743 .align 4 2744 2745.L37: 2746 FXCPMADD f0, B1, A1, f0 2747 FXCSMADD f1, B1, A1, f1 2748 FXCPMADD f2, B2, A1, f2 2749 FXCSMADD f3, B2, A1, f3 2750 .align 4 2751 2752.L38: 2753 fpadd f0, f0, f1 2754 fpadd f2, f2, f3 2755 2756#if defined(LN) || defined(RT) 2757#ifdef LN 2758 subi r0, KK, 1 2759#else 2760 subi r0, KK, 2 2761#endif 2762 slwi TEMP, r0, 0 + ZBASE_SHIFT 2763 slwi r0, r0, 1 + ZBASE_SHIFT 2764 add AO, AORIG, TEMP 2765 add BO, B, r0 2766 addi BO, BO, - 4 * SIZE 2767#endif 2768 2769 addi AO2, AO, 2 * SIZE 2770 addi BO2, BO, 2 * SIZE 2771 2772#if defined(LN) || defined(LT) 2773 LFPDX f16, BO, INC4 2774 LFPDX f17, BO2, INC4 2775#else 2776 LFPDX f16, AO, INC4 2777 LFPDX f17, AO2, INC4 2778#endif 2779 2780 fpsub f0, f16, f0 2781 fpsub f2, f17, f2 2782 2783#ifdef LN 2784 LFPDX A1, AO, INC4 2785 2786 fxpmul f4, A1, f0 2787 fxpmul f5, A1, f2 2788 FXCXNPMA f0, A1, f0, f4 2789 FXCXNPMA f2, A1, f2, f5 2790#endif 2791 2792#ifdef LT 2793 LFPDX A1, AO, INC4 2794 2795 fxpmul f4, A1, f0 2796 fxpmul f5, A1, f2 2797 FXCXNPMA f0, A1, f0, f4 2798 FXCXNPMA f2, A1, f2, f5 2799#endif 2800 2801#ifdef RN 2802 LFPDUX A1, BO, INC4 2803 LFPDUX A2, BO2, INC4 2804 add BO, BO, INC4 2805 LFPDUX A3, BO2, INC4 2806 2807 subi BO, BO, 8 * SIZE 2808 subi BO2, BO2, 8 * SIZE 2809 2810 fxpmul f4, A1, f0 2811 FXCXNPMA f0, A1, f0, f4 2812 2813 fxcpnmsub f2, A2, f0, f2 2814 FXCXNSMA f2, A2, f0, f2 2815 2816 fxpmul f4, A3, f2 2817 FXCXNPMA f2, A3, f2, f4 2818#endif 2819 2820#ifdef RT 2821 LFPDUX A1, BO, INC4 2822 add BO2, BO2, INC4 2823 LFPDUX A2, BO, INC4 2824 LFPDUX A3, BO2, INC4 2825 2826 subi BO, BO, 8 * SIZE 2827 subi BO2, BO2, 8 * SIZE 2828 2829 fxpmul f4, A3, f2 2830 FXCXNPMA f2, A3, f2, f4 2831 2832 fxcpnmsub f0, A2, f2, f0 2833 FXCXNSMA f0, A2, f2, f0 2834 2835 fxpmul f4, A1, f0 2836 FXCXNPMA f0, A1, f0, f4 2837#endif 2838 2839#ifdef LN 2840 subi CO1, CO1, 2 * SIZE 2841 subi CO2, CO2, 2 * SIZE 2842#endif 2843 2844#if defined(LN) || defined(LT) 2845 STFPDX f0, BO, INC4 2846 STFPDX f2, BO2, INC4 2847#else 2848 STFPDX f0, AO, INC4 2849 STFPDX f2, AO2, INC4 2850#endif 2851 2852 STFDUX f0, CO1, INC 2853 STFSDUX f0, CO1, INC 2854 STFDUX f2, CO2, INC 2855 STFSDUX f2, CO2, INC 2856 2857#ifdef LN 2858 subi CO1, CO1, 2 * SIZE 2859 subi CO2, CO2, 2 * SIZE 2860#endif 2861 2862#ifdef RT 2863 slwi r0, K, 0 + ZBASE_SHIFT 2864 add AORIG, AORIG, r0 2865#endif 2866 2867#if defined(LT) || defined(RN) 2868 sub TEMP, K, KK 2869 slwi r0, TEMP, 0 + ZBASE_SHIFT 2870 slwi TEMP, TEMP, 1 + ZBASE_SHIFT 2871 add AO, AO, r0 2872 add BO, BO, TEMP 2873#endif 2874 2875#ifdef LT 2876 addi KK, KK, 1 2877#endif 2878 2879#ifdef LN 2880 subi KK, KK, 1 2881#endif 2882 2883 li r0, FZERO 2884 lfpsx f0, SP, r0 2885 .align 4 2886 2887.L49: 2888#ifdef LN 2889 slwi r0, K, 1 + ZBASE_SHIFT 2890 add B, B, r0 2891#endif 2892 2893#if defined(LT) || defined(RN) 2894 addi B, BO, 4 * SIZE 2895#endif 2896 2897#ifdef RN 2898 addi KK, KK, 2 2899#endif 2900 2901#ifdef RT 2902 subi KK, KK, 2 2903#endif 2904 2905 addic. J, J, -1 2906 bgt+ .L10 2907 .align 4 2908 2909.L999: 2910 addi SP, SP, 20 2911 2912 lwzu r14, 4(SP) 2913 lwzu r15, 4(SP) 2914 2915 lwzu r16, 4(SP) 2916 lwzu r17, 4(SP) 2917 lwzu r18, 4(SP) 2918 lwzu r19, 4(SP) 2919 2920 lwzu r20, 4(SP) 2921 lwzu r21, 4(SP) 2922 lwzu r22, 4(SP) 2923 lwzu r23, 4(SP) 2924 2925 lwzu r24, 4(SP) 2926 lwzu r25, 4(SP) 2927 lwzu r26, 4(SP) 2928 lwzu r27, 4(SP) 2929 2930 lwzu r28, 4(SP) 2931 lwzu r29, 4(SP) 2932 lwzu r30, 4(SP) 2933 lwzu r31, 4(SP) 2934 2935 subi SP, SP, 12 2936 li r0, 16 2937 2938 lfpdux f31, SP, r0 2939 lfpdux f30, SP, r0 2940 lfpdux f29, SP, r0 2941 lfpdux f28, SP, r0 2942 lfpdux f27, SP, r0 2943 lfpdux f26, SP, r0 2944 lfpdux f25, SP, r0 2945 lfpdux f24, SP, r0 2946 lfpdux f23, SP, r0 2947 lfpdux f22, SP, r0 2948 lfpdux f21, SP, r0 2949 lfpdux f20, SP, r0 2950 lfpdux f19, SP, r0 2951 lfpdux f18, SP, r0 2952 lfpdux f17, SP, r0 2953 lfpdux f16, SP, r0 2954 lfpdux f15, SP, r0 2955 lfpdux f14, SP, r0 2956 addi SP, SP, 16 2957 blr 2958 .align 4 2959 2960 2961 EPILOGUE 2962#endif 2963