1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define N r3 43#define X r6 44#define INCX r7 45 46#define INCX2 r4 47#define XX r5 48#define Y r8 49#define YY r9 50 51#define ALPHA f1 52#define ALPHA_I f2 53 54#define A1 f0 55#define A2 f16 56#define A3 f17 57#define A4 f3 58#define A5 f4 59#define A6 f5 60#define A7 f6 61#define A8 f7 62 63#define B1 f8 64#define B2 f9 65#define B3 f10 66#define B4 f11 67#define B5 f12 68#define B6 f13 69#define B7 f14 70#define B8 f15 71 72 PROLOGUE 73 PROFCODE 74 75 li r10, -16 76 77 stfpdux f14, SP, r10 78 stfpdux f15, SP, r10 79 stfpdux f16, SP, r10 80 stfpdux f17, SP, r10 81 82 li r10, 0 83 stwu r10, -4(SP) 84 stwu r10, -4(SP) 85 stwu r10, -4(SP) 86 stwu r10, -4(SP) 87 88 lfpdx A1, SP, r10 # Zero clear 89 fsmfp ALPHA, ALPHA_I 90 91 slwi INCX, INCX, BASE_SHIFT 92 add INCX2, INCX, INCX 93 94 cmpwi cr0, N, 0 95 ble LL(999) 96 97 cmpwi cr0, INCX, SIZE 98 bne LL(100) 99 100 fcmpu cr7, ALPHA, A1 101 bne cr7, LL(50) 102 103 fscmp cr7, ALPHA, A1 104 bne cr7, LL(50) 105 106 andi. r0, X, 2 * SIZE - 1 107 bne LL(20) 108 109 sub X, X, INCX2 110 111 srawi. r0, N, 2 112 mtspr CTR, r0 113 beq- LL(15) 114 .align 4 115 116LL(12): 117 STFPDUX A1, X, INCX2 118 STFPDUX A1, X, INCX2 119 STFPDUX A1, X, INCX2 120 STFPDUX A1, X, INCX2 121 bdnz LL(12) 122 .align 4 123 124LL(15): 125 andi. r0, N, 3 126 beq LL(999) 127 andi. r0, N, 2 128 beq LL(17) 129 130 STFPDUX A1, X, INCX2 131 STFPDUX A1, X, INCX2 132 .align 4 133 134LL(17): 135 andi. r0, N, 1 136 beq LL(999) 137 138 STFPDUX A1, X, INCX2 139 b LL(999) 140 .align 4 141 142LL(20): 143 sub X, X, INCX2 144 145 STFDX A1, X, INCX2 146 addi X, X, SIZE 147 addi N, N, -1 148 cmpwi cr0, N, 0 149 ble LL(29) 150 151 srawi. r0, N, 2 152 mtspr CTR, r0 153 beq- LL(25) 154 .align 4 155 156LL(22): 157 STFPDUX A1, X, INCX2 158 STFPDUX A1, X, INCX2 159 STFPDUX A1, X, INCX2 160 STFPDUX A1, X, INCX2 161 bdnz LL(22) 162 .align 4 163 164LL(25): 165 andi. r0, N, 3 166 beq LL(29) 167 andi. r0, N, 2 168 beq LL(27) 169 170 STFPDUX A1, X, INCX2 171 STFPDUX A1, X, INCX2 172 .align 4 173 174LL(27): 175 andi. r0, N, 1 176 beq LL(29) 177 178 STFPDUX A1, X, INCX2 179 .align 4 180 181LL(29): 182 STFDX A1, X, INCX2 183 b LL(999) 184 .align 4 185 186LL(50): 187 sub Y, X, INCX2 188 sub X, X, INCX2 189 190 andi. r0, X, 2 * SIZE - 1 191 bne LL(60) 192 193 srawi. r0, N, 3 194 mtspr CTR, r0 195 beq- LL(55) 196 197 LFPDUX A1, X, INCX2 198 LFPDUX A2, X, INCX2 199 LFPDUX A3, X, INCX2 200 LFPDUX A4, X, INCX2 201 202 LFPDUX A5, X, INCX2 203 fxpmul B1, ALPHA, A1 204 LFPDUX A6, X, INCX2 205 fxpmul B2, ALPHA, A2 206 LFPDUX A7, X, INCX2 207 fxpmul B3, ALPHA, A3 208 LFPDUX A8, X, INCX2 209 fxpmul B4, ALPHA, A4 210 fxpmul B5, ALPHA, A5 211 212 fxcxnpma B1, ALPHA, A1, B1 213 fxcxnpma B2, ALPHA, A2, B2 214 bdz LL(53) 215 .align 4 216 217LL(52): 218 fxcxnpma B3, ALPHA, A3, B3 219 LFPDUX A1, X, INCX2 220 fxpmul B6, ALPHA, A6 221 STFPDUX B1, Y, INCX2 222 223 fxcxnpma B4, ALPHA, A4, B4 224 LFPDUX A2, X, INCX2 225 fxpmul B7, ALPHA, A7 226 STFPDUX B2, Y, INCX2 227 228 fxcxnpma B5, ALPHA, A5, B5 229 LFPDUX A3, X, INCX2 230 fxpmul B8, ALPHA, A8 231 STFPDUX B3, Y, INCX2 232 233 fxcxnpma B6, ALPHA, A6, B6 234 LFPDUX A4, X, INCX2 235 fxpmul B1, ALPHA, A1 236 STFPDUX B4, Y, INCX2 237 238 fxcxnpma B7, ALPHA, A7, B7 239 LFPDUX A5, X, INCX2 240 fxpmul B2, ALPHA, A2 241 STFPDUX B5, Y, INCX2 242 243 fxcxnpma B8, ALPHA, A8, B8 244 LFPDUX A6, X, INCX2 245 fxpmul B3, ALPHA, A3 246 STFPDUX B6, Y, INCX2 247 248 fxcxnpma B1, ALPHA, A1, B1 249 LFPDUX A7, X, INCX2 250 fxpmul B4, ALPHA, A4 251 STFPDUX B7, Y, INCX2 252 253 fxcxnpma B2, ALPHA, A2, B2 254 LFPDUX A8, X, INCX2 255 fxpmul B5, ALPHA, A5 256 STFPDUX B8, Y, INCX2 257 bdnz LL(52) 258 .align 4 259 260LL(53): 261 fxcxnpma B3, ALPHA, A3, B3 262 fxpmul B6, ALPHA, A6 263 STFPDUX B1, Y, INCX2 264 265 fxcxnpma B4, ALPHA, A4, B4 266 fxpmul B7, ALPHA, A7 267 STFPDUX B2, Y, INCX2 268 269 fxcxnpma B5, ALPHA, A5, B5 270 fxpmul B8, ALPHA, A8 271 STFPDUX B3, Y, INCX2 272 273 fxcxnpma B6, ALPHA, A6, B6 274 STFPDUX B4, Y, INCX2 275 fxcxnpma B7, ALPHA, A7, B7 276 STFPDUX B5, Y, INCX2 277 fxcxnpma B8, ALPHA, A8, B8 278 STFPDUX B6, Y, INCX2 279 STFPDUX B7, Y, INCX2 280 STFPDUX B8, Y, INCX2 281 .align 4 282 283LL(55): 284 andi. r0, N, 7 285 beq LL(999) 286 287 andi. r0, N, 4 288 beq LL(56) 289 290 LFPDUX A1, X, INCX2 291 LFPDUX A2, X, INCX2 292 LFPDUX A3, X, INCX2 293 LFPDUX A4, X, INCX2 294 295 fxpmul B1, ALPHA, A1 296 fxpmul B2, ALPHA, A2 297 fxpmul B3, ALPHA, A3 298 fxpmul B4, ALPHA, A4 299 300 fxcxnpma B1, ALPHA, A1, B1 301 fxcxnpma B2, ALPHA, A2, B2 302 fxcxnpma B3, ALPHA, A3, B3 303 fxcxnpma B4, ALPHA, A4, B4 304 305 STFPDUX B1, Y, INCX2 306 STFPDUX B2, Y, INCX2 307 STFPDUX B3, Y, INCX2 308 STFPDUX B4, Y, INCX2 309 .align 4 310 311LL(56): 312 andi. r0, N, 2 313 beq LL(57) 314 315 LFPDUX A1, X, INCX2 316 LFPDUX A2, X, INCX2 317 318 fxpmul B1, ALPHA, A1 319 fxpmul B2, ALPHA, A2 320 321 fxcxnpma B1, ALPHA, A1, B1 322 fxcxnpma B2, ALPHA, A2, B2 323 324 STFPDUX B1, Y, INCX2 325 STFPDUX B2, Y, INCX2 326 .align 4 327 328LL(57): 329 andi. r0, N, 1 330 beq LL(999) 331 332 LFPDUX A1, X, INCX2 333 334 fxpmul B1, ALPHA, A1 335 fxcxnpma B1, ALPHA, A1, B1 336 337 STFPDUX B1, Y, INCX2 338 b LL(999) 339 .align 4 340 341LL(60): 342 addi XX, X, SIZE 343 addi YY, Y, SIZE 344 345 srawi. r0, N, 2 346 mtspr CTR, r0 347 beq- LL(65) 348 349 LFDUX A1, X, INCX2 350 LFDUX A2, XX, INCX2 351 LFDUX A3, X, INCX2 352 LFDUX A4, XX, INCX2 353 354 LFDUX A5, X, INCX2 355 fmul B1, ALPHA, A1 356 LFDUX A6, XX, INCX2 357 fmul B2, ALPHA_I, A1 358 LFDUX A7, X, INCX2 359 fmul B3, ALPHA, A3 360 LFDUX A8, XX, INCX2 361 fmul B4, ALPHA_I, A3 362 363 fmul B5, ALPHA, A5 364 fnmsub B1, ALPHA_I, A2, B1 365 fmadd B2, ALPHA , A2, B2 366 bdz LL(63) 367 .align 4 368 369LL(62): 370 fnmsub B3, ALPHA_I, A4, B3 371 LFDUX A1, X, INCX2 372 fmul B6, ALPHA_I, A5 373 STFDUX B1, Y, INCX2 374 375 fmadd B4, ALPHA , A4, B4 376 LFDUX A2, XX, INCX2 377 fmul B7, ALPHA, A7 378 STFDUX B2, YY, INCX2 379 380 fnmsub B5, ALPHA_I, A6, B5 381 LFDUX A3, X, INCX2 382 fmul B8, ALPHA_I, A7 383 STFDUX B3, Y, INCX2 384 385 fmadd B6, ALPHA , A6, B6 386 LFDUX A4, XX, INCX2 387 fmul B1, ALPHA, A1 388 STFDUX B4, YY, INCX2 389 390 fnmsub B7, ALPHA_I, A8, B7 391 LFDUX A5, X, INCX2 392 fmul B2, ALPHA_I, A1 393 STFDUX B5, Y, INCX2 394 395 fmadd B8, ALPHA , A8, B8 396 LFDUX A6, XX, INCX2 397 fmul B3, ALPHA, A3 398 STFDUX B6, YY, INCX2 399 400 fnmsub B1, ALPHA_I, A2, B1 401 LFDUX A7, X, INCX2 402 fmul B4, ALPHA_I, A3 403 STFDUX B7, Y, INCX2 404 405 fmadd B2, ALPHA , A2, B2 406 LFDUX A8, XX, INCX2 407 fmul B5, ALPHA, A5 408 STFDUX B8, YY, INCX2 409 bdnz LL(62) 410 .align 4 411 412LL(63): 413 fnmsub B3, ALPHA_I, A4, B3 414 fmul B6, ALPHA_I, A5 415 STFDUX B1, Y, INCX2 416 417 fmadd B4, ALPHA , A4, B4 418 fmul B7, ALPHA, A7 419 STFDUX B2, YY, INCX2 420 421 fnmsub B5, ALPHA_I, A6, B5 422 fmul B8, ALPHA_I, A7 423 STFDUX B3, Y, INCX2 424 425 fmadd B6, ALPHA , A6, B6 426 STFDUX B4, YY, INCX2 427 fnmsub B7, ALPHA_I, A8, B7 428 STFDUX B5, Y, INCX2 429 fmadd B8, ALPHA , A8, B8 430 STFDUX B6, YY, INCX2 431 STFDUX B7, Y, INCX2 432 STFDUX B8, YY, INCX2 433 .align 4 434 435LL(65): 436 andi. r0, N, 3 437 beq LL(999) 438 andi. r0, N, 2 439 beq LL(67) 440 441 LFDUX A1, X, INCX2 442 LFDUX A2, XX, INCX2 443 LFDUX A3, X, INCX2 444 LFDUX A4, XX, INCX2 445 446 fmul B1, ALPHA, A1 447 fmul B2, ALPHA, A2 448 fmul B3, ALPHA, A3 449 fmul B4, ALPHA, A4 450 451 fnmsub B1, ALPHA_I, A2, B1 452 fmadd B2, ALPHA_I, A1, B2 453 fnmsub B3, ALPHA_I, A4, B3 454 fmadd B4, ALPHA_I, A3, B4 455 456 STFDUX B1, Y, INCX2 457 STFDUX B2, YY, INCX2 458 STFDUX B3, Y, INCX2 459 STFDUX B4, YY, INCX2 460 .align 4 461 462LL(67): 463 andi. r0, N, 1 464 beq LL(999) 465 466 LFDUX A1, X, INCX2 467 LFDUX A2, XX, INCX2 468 469 fmul B1, ALPHA, A1 470 fmul B2, ALPHA, A2 471 fnmsub B1, ALPHA_I, A2, B1 472 fmadd B2, ALPHA_I, A1, B2 473 474 STFDUX B1, Y, INCX2 475 STFDUX B2, YY, INCX2 476 b LL(999) 477 .align 4 478 479 480LL(100): 481 fcmpu cr7, ALPHA, A1 482 bne cr7, LL(150) 483 484 fscmp cr7, ALPHA, A1 485 bne cr7, LL(150) 486 487 andi. r0, X, 2 * SIZE - 1 488 bne LL(120) 489 490 sub X, X, INCX2 491 492 srawi. r0, N, 2 493 mtspr CTR, r0 494 beq- LL(115) 495 .align 4 496 497LL(112): 498 STFPDUX A1, X, INCX2 499 STFPDUX A1, X, INCX2 500 STFPDUX A1, X, INCX2 501 STFPDUX A1, X, INCX2 502 bdnz LL(112) 503 .align 4 504 505LL(115): 506 andi. r0, N, 3 507 beq LL(999) 508 andi. r0, N, 2 509 beq LL(117) 510 511 STFPDUX A1, X, INCX2 512 STFPDUX A1, X, INCX2 513 .align 4 514 515LL(117): 516 andi. r0, N, 1 517 beq LL(999) 518 519 STFPDUX A1, X, INCX2 520 b LL(999) 521 .align 4 522 523LL(120): 524 subi INCX2, INCX2, SIZE 525 li INCX, SIZE 526 527 sub X, X, INCX2 528 529 srawi. r0, N, 2 530 mtspr CTR, r0 531 beq- LL(125) 532 .align 4 533 534LL(122): 535 STFDUX A1, X, INCX2 536 STFDUX A1, X, INCX 537 STFDUX A1, X, INCX2 538 STFDUX A1, X, INCX 539 STFDUX A1, X, INCX2 540 STFDUX A1, X, INCX 541 STFDUX A1, X, INCX2 542 STFDUX A1, X, INCX 543 bdnz LL(122) 544 .align 4 545 546LL(125): 547 andi. r0, N, 3 548 beq LL(999) 549 andi. r0, N, 2 550 beq LL(127) 551 552 STFDUX A1, X, INCX2 553 STFDUX A1, X, INCX 554 STFDUX A1, X, INCX2 555 STFDUX A1, X, INCX 556 .align 4 557 558LL(127): 559 andi. r0, N, 1 560 beq LL(999) 561 562 STFDUX A1, X, INCX2 563 STFDUX A1, X, INCX 564 b LL(999) 565 .align 4 566 567LL(150): 568 sub Y, X, INCX2 569 sub X, X, INCX2 570 571 andi. r0, X, 2 * SIZE - 1 572 bne LL(160) 573 574 srawi. r0, N, 3 575 mtspr CTR, r0 576 beq- LL(155) 577 578 LFPDUX A1, X, INCX2 579 LFPDUX A2, X, INCX2 580 LFPDUX A3, X, INCX2 581 LFPDUX A4, X, INCX2 582 583 LFPDUX A5, X, INCX2 584 fxpmul B1, ALPHA, A1 585 LFPDUX A6, X, INCX2 586 fxpmul B2, ALPHA, A2 587 LFPDUX A7, X, INCX2 588 fxpmul B3, ALPHA, A3 589 LFPDUX A8, X, INCX2 590 fxpmul B4, ALPHA, A4 591 fxpmul B5, ALPHA, A5 592 593 fxcxnpma B1, ALPHA, A1, B1 594 fxcxnpma B2, ALPHA, A2, B2 595 bdz LL(153) 596 .align 4 597 598LL(152): 599 fxcxnpma B3, ALPHA, A3, B3 600 LFPDUX A1, X, INCX2 601 fxpmul B6, ALPHA, A6 602 STFPDUX B1, Y, INCX2 603 604 fxcxnpma B4, ALPHA, A4, B4 605 LFPDUX A2, X, INCX2 606 fxpmul B7, ALPHA, A7 607 STFPDUX B2, Y, INCX2 608 609 fxcxnpma B5, ALPHA, A5, B5 610 LFPDUX A3, X, INCX2 611 fxpmul B8, ALPHA, A8 612 STFPDUX B3, Y, INCX2 613 614 fxcxnpma B6, ALPHA, A6, B6 615 LFPDUX A4, X, INCX2 616 fxpmul B1, ALPHA, A1 617 STFPDUX B4, Y, INCX2 618 619 fxcxnpma B7, ALPHA, A7, B7 620 LFPDUX A5, X, INCX2 621 fxpmul B2, ALPHA, A2 622 STFPDUX B5, Y, INCX2 623 624 fxcxnpma B8, ALPHA, A8, B8 625 LFPDUX A6, X, INCX2 626 fxpmul B3, ALPHA, A3 627 STFPDUX B6, Y, INCX2 628 629 fxcxnpma B1, ALPHA, A1, B1 630 LFPDUX A7, X, INCX2 631 fxpmul B4, ALPHA, A4 632 STFPDUX B7, Y, INCX2 633 634 fxcxnpma B2, ALPHA, A2, B2 635 LFPDUX A8, X, INCX2 636 fxpmul B5, ALPHA, A5 637 STFPDUX B8, Y, INCX2 638 bdnz LL(152) 639 .align 4 640 641LL(153): 642 fxcxnpma B3, ALPHA, A3, B3 643 fxpmul B6, ALPHA, A6 644 STFPDUX B1, Y, INCX2 645 646 fxcxnpma B4, ALPHA, A4, B4 647 fxpmul B7, ALPHA, A7 648 STFPDUX B2, Y, INCX2 649 650 fxcxnpma B5, ALPHA, A5, B5 651 fxpmul B8, ALPHA, A8 652 STFPDUX B3, Y, INCX2 653 654 fxcxnpma B6, ALPHA, A6, B6 655 STFPDUX B4, Y, INCX2 656 fxcxnpma B7, ALPHA, A7, B7 657 STFPDUX B5, Y, INCX2 658 fxcxnpma B8, ALPHA, A8, B8 659 STFPDUX B6, Y, INCX2 660 STFPDUX B7, Y, INCX2 661 STFPDUX B8, Y, INCX2 662 .align 4 663 664LL(155): 665 andi. r0, N, 7 666 beq LL(999) 667 668 andi. r0, N, 4 669 beq LL(156) 670 671 LFPDUX A1, X, INCX2 672 LFPDUX A2, X, INCX2 673 LFPDUX A3, X, INCX2 674 LFPDUX A4, X, INCX2 675 676 fxpmul B1, ALPHA, A1 677 fxpmul B2, ALPHA, A2 678 fxpmul B3, ALPHA, A3 679 fxpmul B4, ALPHA, A4 680 681 fxcxnpma B1, ALPHA, A1, B1 682 fxcxnpma B2, ALPHA, A2, B2 683 fxcxnpma B3, ALPHA, A3, B3 684 fxcxnpma B4, ALPHA, A4, B4 685 686 STFPDUX B1, Y, INCX2 687 STFPDUX B2, Y, INCX2 688 STFPDUX B3, Y, INCX2 689 STFPDUX B4, Y, INCX2 690 .align 4 691 692LL(156): 693 andi. r0, N, 2 694 beq LL(157) 695 696 LFPDUX A1, X, INCX2 697 LFPDUX A2, X, INCX2 698 699 fxpmul B1, ALPHA, A1 700 fxpmul B2, ALPHA, A2 701 702 fxcxnpma B1, ALPHA, A1, B1 703 fxcxnpma B2, ALPHA, A2, B2 704 705 STFPDUX B1, Y, INCX2 706 STFPDUX B2, Y, INCX2 707 .align 4 708 709LL(157): 710 andi. r0, N, 1 711 beq LL(999) 712 713 LFPDUX A1, X, INCX2 714 715 fxpmul B1, ALPHA, A1 716 fxcxnpma B1, ALPHA, A1, B1 717 718 STFPDUX B1, Y, INCX2 719 b LL(999) 720 .align 4 721 722LL(160): 723 addi XX, X, SIZE 724 addi YY, Y, SIZE 725 726 srawi. r0, N, 2 727 mtspr CTR, r0 728 beq- LL(165) 729 730 LFDUX A1, X, INCX2 731 LFDUX A2, XX, INCX2 732 LFDUX A3, X, INCX2 733 LFDUX A4, XX, INCX2 734 735 LFDUX A5, X, INCX2 736 fmul B1, ALPHA, A1 737 LFDUX A6, XX, INCX2 738 fmul B2, ALPHA_I, A1 739 LFDUX A7, X, INCX2 740 fmul B3, ALPHA, A3 741 LFDUX A8, XX, INCX2 742 fmul B4, ALPHA_I, A3 743 744 fmul B5, ALPHA, A5 745 fnmsub B1, ALPHA_I, A2, B1 746 fmadd B2, ALPHA , A2, B2 747 bdz LL(163) 748 749 .align 4 750 751LL(162): 752 fnmsub B3, ALPHA_I, A4, B3 753 LFDUX A1, X, INCX2 754 fmul B6, ALPHA_I, A5 755 STFDUX B1, Y, INCX2 756 757 fmadd B4, ALPHA , A4, B4 758 LFDUX A2, XX, INCX2 759 fmul B7, ALPHA, A7 760 STFDUX B2, YY, INCX2 761 762 fnmsub B5, ALPHA_I, A6, B5 763 LFDUX A3, X, INCX2 764 fmul B8, ALPHA_I, A7 765 STFDUX B3, Y, INCX2 766 767 fmadd B6, ALPHA , A6, B6 768 LFDUX A4, XX, INCX2 769 fmul B1, ALPHA, A1 770 STFDUX B4, YY, INCX2 771 772 fnmsub B7, ALPHA_I, A8, B7 773 LFDUX A5, X, INCX2 774 fmul B2, ALPHA_I, A1 775 STFDUX B5, Y, INCX2 776 777 fmadd B8, ALPHA , A8, B8 778 LFDUX A6, XX, INCX2 779 fmul B3, ALPHA, A3 780 STFDUX B6, YY, INCX2 781 782 fnmsub B1, ALPHA_I, A2, B1 783 LFDUX A7, X, INCX2 784 fmul B4, ALPHA_I, A3 785 STFDUX B7, Y, INCX2 786 787 fmadd B2, ALPHA , A2, B2 788 LFDUX A8, XX, INCX2 789 fmul B5, ALPHA, A5 790 STFDUX B8, YY, INCX2 791 bdnz LL(162) 792 .align 4 793 794LL(163): 795 fnmsub B3, ALPHA_I, A4, B3 796 fmul B6, ALPHA_I, A5 797 STFDUX B1, Y, INCX2 798 799 fmadd B4, ALPHA , A4, B4 800 fmul B7, ALPHA, A7 801 STFDUX B2, YY, INCX2 802 803 fnmsub B5, ALPHA_I, A6, B5 804 fmul B8, ALPHA_I, A7 805 STFDUX B3, Y, INCX2 806 807 fmadd B6, ALPHA , A6, B6 808 STFDUX B4, YY, INCX2 809 fnmsub B7, ALPHA_I, A8, B7 810 STFDUX B5, Y, INCX2 811 fmadd B8, ALPHA , A8, B8 812 STFDUX B6, YY, INCX2 813 STFDUX B7, Y, INCX2 814 STFDUX B8, YY, INCX2 815 .align 4 816 817LL(165): 818 andi. r0, N, 3 819 beq LL(999) 820 andi. r0, N, 2 821 beq LL(167) 822 823 LFDUX A1, X, INCX2 824 LFDUX A2, XX, INCX2 825 LFDUX A3, X, INCX2 826 LFDUX A4, XX, INCX2 827 828 fmul B1, ALPHA, A1 829 fmul B2, ALPHA, A2 830 fmul B3, ALPHA, A3 831 fmul B4, ALPHA, A4 832 833 fnmsub B1, ALPHA_I, A2, B1 834 fmadd B2, ALPHA_I, A1, B2 835 fnmsub B3, ALPHA_I, A4, B3 836 fmadd B4, ALPHA_I, A3, B4 837 838 STFDUX B1, Y, INCX2 839 STFDUX B2, YY, INCX2 840 STFDUX B3, Y, INCX2 841 STFDUX B4, YY, INCX2 842 .align 4 843 844LL(167): 845 andi. r0, N, 1 846 beq LL(999) 847 848 LFDUX A1, X, INCX2 849 LFDUX A2, XX, INCX2 850 851 fmul B1, ALPHA, A1 852 fmul B2, ALPHA, A2 853 fnmsub B1, ALPHA_I, A2, B1 854 fmadd B2, ALPHA_I, A1, B2 855 856 STFDUX B1, Y, INCX2 857 STFDUX B2, YY, INCX2 858 .align 4 859 860LL(999): 861 li r10, 16 862 863 lfpdux f17, SP, r10 864 lfpdux f16, SP, r10 865 lfpdux f15, SP, r10 866 lfpdux f14, SP, r10 867 868 addi SP, SP, 16 869 blr 870 871 EPILOGUE 872