1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#define M r3 43#define N r4 44#define A r6 45#define LDA r7 46#define X r8 47#define INCX r9 48#define Y r10 49#define INCY r5 50 51#define I r11 52#define J r12 53 54#define INCY2 r24 55#define A1 r25 56#define A2 r26 57#define A3 r27 58#define A4 r28 59 60#define YL r29 61#define YS r30 62#define INC2 r31 63 64#define yl1 f0 65#define yl2 f2 66#define yl3 f3 67#define yl4 f4 68#define ys1 f5 69#define ys2 f6 70#define ys3 f7 71#define ys4 f8 72#define yl5 f27 73#define ys5 f28 74 75#define alpha1 f9 76#define alpha2 f10 77 78#define a1 f11 79#define a2 f12 80#define a3 f13 81#define a4 f14 82#define a5 f15 83#define a6 f16 84#define a7 f17 85#define a8 f18 86 87#define a9 f19 88#define a10 f20 89#define a11 f21 90#define a12 f22 91#define a13 f23 92#define a14 f24 93#define a15 f25 94#define a16 f26 95 96#define alpha f1 97 98 PROLOGUE 99 PROFCODE 100 101 li r0, -16 102 lwz INCY, 8(SP) 103 104 stfpdux f14, SP, r0 105 stfpdux f15, SP, r0 106 stfpdux f16, SP, r0 107 stfpdux f17, SP, r0 108 stfpdux f18, SP, r0 109 stfpdux f19, SP, r0 110 stfpdux f20, SP, r0 111 stfpdux f21, SP, r0 112 stfpdux f22, SP, r0 113 stfpdux f23, SP, r0 114 stfpdux f24, SP, r0 115 stfpdux f25, SP, r0 116 stfpdux f26, SP, r0 117 stfpdux f27, SP, r0 118 stfpdux f28, SP, r0 119 stfpdux f29, SP, r0 120 stfpdux f30, SP, r0 121 stfpdux f31, SP, r0 122 123 stwu r31, -4(SP) 124 stwu r30, -4(SP) 125 stwu r29, -4(SP) 126 stwu r28, -4(SP) 127 128 stwu r27, -4(SP) 129 stwu r26, -4(SP) 130 stwu r25, -4(SP) 131 stwu r24, -4(SP) 132 133 stwu r23, -4(SP) 134 stwu r22, -4(SP) 135 stwu r21, -4(SP) 136 stwu r20, -4(SP) 137 138 stwu r19, -4(SP) 139 stwu r18, -4(SP) 140 stwu r17, -4(SP) 141 stwu r16, -4(SP) 142 143 slwi LDA, LDA, BASE_SHIFT 144 slwi INCX, INCX, BASE_SHIFT 145 slwi INCY, INCY, BASE_SHIFT 146 147 fsmfp alpha, alpha 148 149 cmpwi cr0, M, 0 150 ble- .L999 151 cmpwi cr0, N, 0 152 ble- .L999 153 154 add INCY2, INCY, INCY 155 li INC2, 2 * SIZE 156 sub X, X, INCX 157 158 andi. r0, A, 2 * SIZE - 1 159# bne .L100 160 161# All cases for aligned A, even LDA 162 163 cmpwi cr0, INCY, SIZE 164 bne .L70 165 166 andi. r0, Y, 2 * SIZE - 1 167 bne .L40 168 169# A : aligned LDA : even Y : Unit Aligned 170 171 sub A, A, INC2 172 sub Y, Y, INCY2 173 174 srawi. J, N, 2 175 ble .L20 176 .align 4 177 178.L11: 179 LFDUX alpha1, X, INCX 180 mr A1, A 181 add A2, A, LDA 182 add A3, A2, LDA 183 LFSDUX alpha1, X, INCX 184 LFDUX alpha2, X, INCX 185 add A4, A3, LDA 186 add A, A4, LDA 187 mr YL, Y 188 LFSDUX alpha2, X, INCX 189 fpmul alpha1, alpha, alpha1 190 mr YS, Y 191 srawi. r0, M, 3 192 mtspr CTR, r0 193 fpmul alpha2, alpha, alpha2 194 ble .L15 195 196 LFPDUX yl1, YL, INCY2 197 LFPDUX yl2, YL, INCY2 198 LFPDUX yl3, YL, INCY2 199 LFPDUX yl4, YL, INCY2 200 201 LFPDUX a1, A1, INC2 202 LFPDUX a5, A1, INC2 203 LFPDUX a9, A1, INC2 204 LFPDUX a13, A1, INC2 205 206 LFPDUX a2, A2, INC2 207 LFPDUX a6, A2, INC2 208 LFPDUX a10, A2, INC2 209 LFPDUX a14, A2, INC2 210 211 LFPDUX a3, A3, INC2 212 LFPDUX a7, A3, INC2 213 LFPDUX a11, A3, INC2 214 LFPDUX a15, A3, INC2 215 216 LFPDUX a4, A4, INC2 217 fxcpmadd ys1, alpha1, a1, yl1 218 LFPDUX a8, A4, INC2 219 fxcpmadd ys2, alpha1, a5, yl2 220 LFPDUX a12, A4, INC2 221 fxcpmadd ys3, alpha1, a9, yl3 222 LFPDUX a16, A4, INC2 223 fxcpmadd ys4, alpha1, a13, yl4 224 bdz .L13 225 .align 4 226 227.L12: 228 LFPDUX yl1, YL, INCY2 229 230 fxcsmadd ys1, alpha1, a2, ys1 231 LFPDUX a1, A1, INC2 232 fxcsmadd ys2, alpha1, a6, ys2 233 LFPDUX a5, A1, INC2 234 fxcsmadd ys3, alpha1, a10, ys3 235 LFPDUX a9, A1, INC2 236 fxcsmadd ys4, alpha1, a14, ys4 237 LFPDUX a13, A1, INC2 238 239 LFPDUX yl2, YL, INCY2 240 241 fxcpmadd ys1, alpha2, a3, ys1 242 LFPDUX a2, A2, INC2 243 fxcpmadd ys2, alpha2, a7, ys2 244 LFPDUX a6, A2, INC2 245 fxcpmadd ys3, alpha2, a11, ys3 246 LFPDUX a10, A2, INC2 247 fxcpmadd ys4, alpha2, a15, ys4 248 LFPDUX a14, A2, INC2 249 250 LFPDUX yl3, YL, INCY2 251 252 fxcsmadd ys1, alpha2, a4, ys1 253 LFPDUX a3, A3, INC2 254 fxcsmadd ys2, alpha2, a8, ys2 255 LFPDUX a7, A3, INC2 256 fxcsmadd ys3, alpha2, a12, ys3 257 LFPDUX a11, A3, INC2 258 fxcsmadd ys4, alpha2, a16, ys4 259 LFPDUX a15, A3, INC2 260 261 LFPDUX yl4, YL, INCY2 262 263 STFPDUX ys1, YS, INCY2 264 STFPDUX ys2, YS, INCY2 265 STFPDUX ys3, YS, INCY2 266 STFPDUX ys4, YS, INCY2 267 268 LFPDUX a4, A4, INC2 269 fxcpmadd ys1, alpha1, a1, yl1 270 LFPDUX a8, A4, INC2 271 fxcpmadd ys2, alpha1, a5, yl2 272 LFPDUX a12, A4, INC2 273 fxcpmadd ys3, alpha1, a9, yl3 274 LFPDUX a16, A4, INC2 275 fxcpmadd ys4, alpha1, a13, yl4 276 bdnz .L12 277 .align 4 278 279.L13: 280 fxcsmadd ys1, alpha1, a2, ys1 281 fxcsmadd ys2, alpha1, a6, ys2 282 fxcsmadd ys3, alpha1, a10, ys3 283 fxcsmadd ys4, alpha1, a14, ys4 284 285 fxcpmadd ys1, alpha2, a3, ys1 286 fxcpmadd ys2, alpha2, a7, ys2 287 fxcpmadd ys3, alpha2, a11, ys3 288 fxcpmadd ys4, alpha2, a15, ys4 289 290 fxcsmadd ys1, alpha2, a4, ys1 291 fxcsmadd ys2, alpha2, a8, ys2 292 fxcsmadd ys3, alpha2, a12, ys3 293 fxcsmadd ys4, alpha2, a16, ys4 294 295 STFPDUX ys1, YS, INCY2 296 STFPDUX ys2, YS, INCY2 297 STFPDUX ys3, YS, INCY2 298 STFPDUX ys4, YS, INCY2 299 .align 4 300 301.L15: 302 andi. r0, M, 7 303 ble .L19 304 305 andi. r0, M, 4 306 ble .L17 307 308 LFPDUX yl1, YL, INCY2 309 LFPDUX a1, A1, INC2 310 LFPDUX yl2, YL, INCY2 311 LFPDUX a5, A1, INC2 312 313 LFPDUX a2, A2, INC2 314 LFPDUX a6, A2, INC2 315 LFPDUX a3, A3, INC2 316 LFPDUX a7, A3, INC2 317 318 LFPDUX a4, A4, INC2 319 LFPDUX a8, A4, INC2 320 321 fxcpmadd ys1, alpha1, a1, yl1 322 fxcpmadd ys2, alpha1, a5, yl2 323 fxcsmadd ys1, alpha1, a2, ys1 324 fxcsmadd ys2, alpha1, a6, ys2 325 326 fxcpmadd ys1, alpha2, a3, ys1 327 fxcpmadd ys2, alpha2, a7, ys2 328 fxcsmadd ys1, alpha2, a4, ys1 329 fxcsmadd ys2, alpha2, a8, ys2 330 331 STFPDUX ys1, YS, INCY2 332 STFPDUX ys2, YS, INCY2 333 .align 4 334 335.L17: 336 andi. r0, M, 2 337 ble .L18 338 339 LFPDUX yl1, YL, INCY2 340 341 LFPDUX a1, A1, INC2 342 LFPDUX a2, A2, INC2 343 LFPDUX a3, A3, INC2 344 LFPDUX a4, A4, INC2 345 346 fxcpmadd ys1, alpha1, a1, yl1 347 fxcsmadd ys1, alpha1, a2, ys1 348 fxcpmadd ys1, alpha2, a3, ys1 349 fxcsmadd ys1, alpha2, a4, ys1 350 351 STFPDUX ys1, YS, INCY2 352 .align 4 353 354.L18: 355 andi. r0, M, 1 356 ble .L19 357 358 LFDUX yl1, YL, INCY2 359 360 LFDUX a1, A1, INC2 361 LFDUX a2, A2, INC2 362 LFDUX a3, A3, INC2 363 LFDUX a4, A4, INC2 364 365 fxcpmadd ys1, alpha1, a1, yl1 366 fxcsmadd ys1, alpha1, a2, ys1 367 fxcpmadd ys1, alpha2, a3, ys1 368 fxcsmadd ys1, alpha2, a4, ys1 369 370 STFDUX ys1, YS, INCY2 371 .align 4 372 373.L19: 374 addi J, J, -1 375 cmpi cr0, 0, J, 0 376 bgt .L11 377 .align 4 378 379.L20: 380 andi. J, N, 2 381 ble .L30 382 383 LFDUX alpha1, X, INCX 384 385 mr A1, A 386 add A2, A, LDA 387 add A, A2, LDA 388 LFSDUX alpha1, X, INCX 389 390 mr YL, Y 391 mr YS, Y 392 fpmul alpha1, alpha, alpha1 393 394 srawi. r0, M, 3 395 mtspr CTR, r0 396 ble .L25 397 398 LFPDUX yl1, YL, INCY2 399 LFPDUX a1, A1, INC2 400 LFPDUX yl2, YL, INCY2 401 LFPDUX a5, A1, INC2 402 403 LFPDUX yl3, YL, INCY2 404 LFPDUX a9, A1, INC2 405 LFPDUX yl4, YL, INCY2 406 LFPDUX a13, A1, INC2 407 408 LFPDUX a2, A2, INC2 409 LFPDUX a6, A2, INC2 410 LFPDUX a10, A2, INC2 411 LFPDUX a14, A2, INC2 412 bdz .L23 413 .align 4 414 415.L22: 416 fxcpmadd ys1, alpha1, a1, yl1 417 LFPDUX a1, A1, INC2 418 LFPDUX yl1, YL, INCY2 419 fxcpmadd ys2, alpha1, a5, yl2 420 LFPDUX a5, A1, INC2 421 LFPDUX yl2, YL, INCY2 422 fxcpmadd ys3, alpha1, a9, yl3 423 LFPDUX a9, A1, INC2 424 LFPDUX yl3, YL, INCY2 425 fxcpmadd ys4, alpha1, a13, yl4 426 LFPDUX a13, A1, INC2 427 LFPDUX yl4, YL, INCY2 428 429 fxcsmadd ys1, alpha1, a2, ys1 430 LFPDUX a2, A2, INC2 431 fxcsmadd ys2, alpha1, a6, ys2 432 LFPDUX a6, A2, INC2 433 fxcsmadd ys3, alpha1, a10, ys3 434 LFPDUX a10, A2, INC2 435 fxcsmadd ys4, alpha1, a14, ys4 436 LFPDUX a14, A2, INC2 437 438 STFPDUX ys1, YS, INCY2 439 STFPDUX ys2, YS, INCY2 440 STFPDUX ys3, YS, INCY2 441 STFPDUX ys4, YS, INCY2 442 bdnz .L22 443 .align 4 444 445.L23: 446 fxcpmadd ys1, alpha1, a1, yl1 447 fxcpmadd ys2, alpha1, a5, yl2 448 fxcpmadd ys3, alpha1, a9, yl3 449 fxcpmadd ys4, alpha1, a13, yl4 450 451 fxcsmadd ys1, alpha1, a2, ys1 452 fxcsmadd ys2, alpha1, a6, ys2 453 fxcsmadd ys3, alpha1, a10, ys3 454 fxcsmadd ys4, alpha1, a14, ys4 455 456 STFPDUX ys1, YS, INCY2 457 STFPDUX ys2, YS, INCY2 458 STFPDUX ys3, YS, INCY2 459 STFPDUX ys4, YS, INCY2 460 .align 4 461 462.L25: 463 andi. r0, M, 7 464 ble .L30 465 466 andi. r0, M, 4 467 ble .L27 468 469 LFPDUX yl1, YL, INCY2 470 LFPDUX a1, A1, INC2 471 LFPDUX a2, A2, INC2 472 473 LFPDUX yl2, YL, INCY2 474 LFPDUX a5, A1, INC2 475 LFPDUX a6, A2, INC2 476 477 fxcpmadd ys1, alpha1, a1, yl1 478 fxcsmadd ys1, alpha1, a2, ys1 479 fxcpmadd ys2, alpha1, a5, yl2 480 fxcsmadd ys2, alpha1, a6, ys2 481 482 STFPDUX ys1, YS, INCY2 483 STFPDUX ys2, YS, INCY2 484 .align 4 485 486.L27: 487 andi. r0, M, 2 488 ble .L28 489 490 LFPDUX yl1, YL, INCY2 491 LFPDUX a1, A1, INC2 492 LFPDUX a2, A2, INC2 493 494 fxcpmadd ys1, alpha1, a1, yl1 495 fxcsmadd ys1, alpha1, a2, ys1 496 497 STFPDUX ys1, YS, INCY2 498 .align 4 499 500.L28: 501 andi. r0, M, 1 502 ble .L30 503 504 LFDUX yl1, YL, INCY2 505 LFDUX a1, A1, INC2 506 LFDUX a2, A2, INC2 507 508 fxcpmadd ys1, alpha1, a1, yl1 509 fxcsmadd ys1, alpha1, a2, ys1 510 511 STFDUX ys1, YS, INCY2 512 .align 4 513 514.L30: 515 andi. J, N, 1 516 ble .L999 517 518 LFDUX alpha1, X, INCX 519 520 mr A1, A 521 mr YL, Y 522 mr YS, Y 523 fmul alpha1, alpha, alpha1 524 525 srawi. r0, M, 3 526 mtspr CTR, r0 527 ble .L35 528 529 LFPDUX yl1, YL, INCY2 530 LFPDUX a1, A1, INC2 531 LFPDUX yl2, YL, INCY2 532 LFPDUX a5, A1, INC2 533 534 LFPDUX yl3, YL, INCY2 535 LFPDUX a9, A1, INC2 536 LFPDUX yl4, YL, INCY2 537 LFPDUX a13, A1, INC2 538 bdz .L33 539 .align 4 540 541.L32: 542 fxcpmadd ys1, alpha1, a1, yl1 543 LFPDUX yl1, YL, INCY2 544 LFPDUX a1, A1, INC2 545 fxcpmadd ys2, alpha1, a5, yl2 546 LFPDUX yl2, YL, INCY2 547 LFPDUX a5, A1, INC2 548 fxcpmadd ys3, alpha1, a9, yl3 549 LFPDUX yl3, YL, INCY2 550 LFPDUX a9, A1, INC2 551 fxcpmadd ys4, alpha1, a13, yl4 552 LFPDUX yl4, YL, INCY2 553 LFPDUX a13, A1, INC2 554 555 STFPDUX ys1, YS, INCY2 556 STFPDUX ys2, YS, INCY2 557 STFPDUX ys3, YS, INCY2 558 STFPDUX ys4, YS, INCY2 559 bdnz .L32 560 .align 4 561 562.L33: 563 fxcpmadd ys1, alpha1, a1, yl1 564 fxcpmadd ys2, alpha1, a5, yl2 565 fxcpmadd ys3, alpha1, a9, yl3 566 fxcpmadd ys4, alpha1, a13, yl4 567 568 STFPDUX ys1, YS, INCY2 569 STFPDUX ys2, YS, INCY2 570 STFPDUX ys3, YS, INCY2 571 STFPDUX ys4, YS, INCY2 572 .align 4 573 574.L35: 575 andi. r0, M, 7 576 ble .L999 577 578 andi. r0, M, 4 579 ble .L37 580 581 LFPDUX yl1, YL, INCY2 582 LFPDUX a1, A1, INC2 583 584 LFPDUX yl2, YL, INCY2 585 LFPDUX a5, A1, INC2 586 587 fxcpmadd ys1, alpha1, a1, yl1 588 fxcpmadd ys2, alpha1, a5, yl2 589 590 STFPDUX ys1, YS, INCY2 591 STFPDUX ys2, YS, INCY2 592 .align 4 593 594.L37: 595 andi. r0, M, 2 596 ble .L38 597 598 LFPDUX yl1, YL, INCY2 599 LFPDUX a1, A1, INC2 600 601 fxcpmadd ys1, alpha1, a1, yl1 602 603 STFPDUX ys1, YS, INCY2 604 .align 4 605 606.L38: 607 andi. r0, M, 1 608 ble .L999 609 610 LFDUX yl1, YL, INCY2 611 LFDUX a1, A1, INC2 612 613 fxcpmadd ys1, alpha1, a1, yl1 614 615 STFDUX ys1, YS, INCY2 616 b .L999 617 .align 4 618 619.L40: 620# A : aligned LDA : even Y : Unaligned 621 622 sub A, A, INC2 623 sub Y, Y, INCY 624 625 srawi. J, N, 2 626 ble .L50 627 .align 4 628 629.L41: 630 LFDUX alpha1, X, INCX 631 LFSDUX alpha1, X, INCX 632 LFDUX alpha2, X, INCX 633 LFSDUX alpha2, X, INCX 634 635 fpmul alpha1, alpha, alpha1 636 fpmul alpha2, alpha, alpha2 637 638 mr A1, A 639 add A2, A, LDA 640 add A3, A2, LDA 641 add A4, A3, LDA 642 add A, A4, LDA 643 644 mr YL, Y 645 sub YS, Y, INCY2 646 647 LFSDX ys1, YS, INCY2 648 LFDX yl1, YL, INCY 649 650 srawi. r0, M, 3 651 mtspr CTR, r0 652 ble .L45 653 654 LFPDUX a1, A1, INC2 655 LFPDUX a5, A1, INC2 656 LFPDUX a9, A1, INC2 657 LFPDUX a13, A1, INC2 658 659 LFXDUX yl2, YL, INCY2 660 LFXDUX yl3, YL, INCY2 661 LFXDUX yl4, YL, INCY2 662 LFXDUX yl5, YL, INCY2 663 664 LFPDUX a2, A2, INC2 665 LFPDUX a6, A2, INC2 666 LFPDUX a10, A2, INC2 667 LFPDUX a14, A2, INC2 668 669 LFPDUX a3, A3, INC2 670 LFPDUX a7, A3, INC2 671 LFPDUX a11, A3, INC2 672 LFPDUX a15, A3, INC2 673 674 LFPDUX a4, A4, INC2 675 fsmr yl1, yl2 676 LFPDUX a8, A4, INC2 677 fsmr yl2, yl3 678 LFPDUX a12, A4, INC2 679 fsmr yl3, yl4 680 LFPDUX a16, A4, INC2 681 fsmr yl4, yl5 682 bdz .L43 683 .align 4 684 685.L42: 686 fxcpmadd ys2, alpha1, a1, yl1 687 LFPDUX a1, A1, INC2 688 fxcpmadd ys3, alpha1, a5, yl2 689 LFPDUX a5, A1, INC2 690 fxcpmadd ys4, alpha1, a9, yl3 691 LFPDUX a9, A1, INC2 692 fxcpmadd ys5, alpha1, a13, yl4 693 LFPDUX a13, A1, INC2 694 695 fxcsmadd ys2, alpha1, a2, ys2 696 LFPDUX a2, A2, INC2 697 fxcsmadd ys3, alpha1, a6, ys3 698 LFPDUX a6, A2, INC2 699 fxcsmadd ys4, alpha1, a10, ys4 700 LFPDUX a10, A2, INC2 701 fxcsmadd ys5, alpha1, a14, ys5 702 LFPDUX a14, A2, INC2 703 704 fxcpmadd ys2, alpha2, a3, ys2 705 LFPDUX a3, A3, INC2 706 fxcpmadd ys3, alpha2, a7, ys3 707 LFPDUX a7, A3, INC2 708 fxcpmadd ys4, alpha2, a11, ys4 709 LFPDUX a11, A3, INC2 710 fxcpmadd ys5, alpha2, a15, ys5 711 LFPDUX a15, A3, INC2 712 713 fxcsmadd ys2, alpha2, a4, ys2 714 LFPDUX a4, A4, INC2 715 fxcsmadd ys3, alpha2, a8, ys3 716 LFPDUX a8, A4, INC2 717 fxcsmadd ys4, alpha2, a12, ys4 718 LFPDUX a12, A4, INC2 719 fxcsmadd ys5, alpha2, a16, ys5 720 LFPDUX a16, A4, INC2 721 722 fmr yl1, yl5 723 LFXDUX yl2, YL, INCY2 724 fmr ys1, ys2 725 LFXDUX yl3, YL, INCY2 726 fmr ys2, ys3 727 LFXDUX yl4, YL, INCY2 728 fmr ys3, ys4 729 LFXDUX yl5, YL, INCY2 730 fmr ys4, ys5 731 732 STFXDUX ys1, YS, INCY2 733 fsmr ys1, ys5 734 STFXDUX ys2, YS, INCY2 735 fsmr yl1, yl2 736 STFXDUX ys3, YS, INCY2 737 fsmr yl2, yl3 738 STFXDUX ys4, YS, INCY2 739 fsmr yl3, yl4 740 741 fsmr yl4, yl5 742 bdnz .L42 743 .align 4 744 745.L43: 746 fxcpmadd ys2, alpha1, a1, yl1 747 fxcpmadd ys3, alpha1, a5, yl2 748 fxcpmadd ys4, alpha1, a9, yl3 749 fxcpmadd ys5, alpha1, a13, yl4 750 751 fxcsmadd ys2, alpha1, a2, ys2 752 fxcsmadd ys3, alpha1, a6, ys3 753 fxcsmadd ys4, alpha1, a10, ys4 754 fxcsmadd ys5, alpha1, a14, ys5 755 756 fxcpmadd ys2, alpha2, a3, ys2 757 fxcpmadd ys3, alpha2, a7, ys3 758 fxcpmadd ys4, alpha2, a11, ys4 759 fxcpmadd ys5, alpha2, a15, ys5 760 761 fxcsmadd ys2, alpha2, a4, ys2 762 fxcsmadd ys3, alpha2, a8, ys3 763 fxcsmadd ys4, alpha2, a12, ys4 764 fxcsmadd ys5, alpha2, a16, ys5 765 766 fmr ys1, ys2 767 fmr ys2, ys3 768 fmr ys3, ys4 769 fmr ys4, ys5 770 fmr yl1, yl5 771 772 STFXDUX ys1, YS, INCY2 773 fsmr ys1, ys5 774 STFXDUX ys2, YS, INCY2 775 STFXDUX ys3, YS, INCY2 776 STFXDUX ys4, YS, INCY2 777 .align 4 778 779.L45: 780 andi. r0, M, 7 781 ble .L48 782 783 andi. r0, M, 4 784 ble .L46 785 786 LFXDUX yl2, YL, INCY2 787 LFXDUX yl3, YL, INCY2 788 789 LFPDUX a1, A1, INC2 790 LFPDUX a5, A1, INC2 791 792 LFPDUX a2, A2, INC2 793 LFPDUX a6, A2, INC2 794 LFPDUX a3, A3, INC2 795 LFPDUX a7, A3, INC2 796 797 LFPDUX a4, A4, INC2 798 fsmr yl1, yl2 799 LFPDUX a8, A4, INC2 800 fsmr yl2, yl3 801 802 fxcpmadd ys2, alpha1, a1, yl1 803 fxcpmadd ys3, alpha1, a5, yl2 804 fxcsmadd ys2, alpha1, a2, ys2 805 fxcsmadd ys3, alpha1, a6, ys3 806 807 fxcpmadd ys2, alpha2, a3, ys2 808 fxcpmadd ys3, alpha2, a7, ys3 809 fxcsmadd ys2, alpha2, a4, ys2 810 fxcsmadd ys3, alpha2, a8, ys3 811 812 fmr yl1, yl3 813 fmr ys1, ys2 814 fmr ys2, ys3 815 816 STFXDUX ys1, YS, INCY2 817 fsmr ys1, ys3 818 STFXDUX ys2, YS, INCY2 819 .align 4 820 821.L46: 822 andi. r0, M, 2 823 ble .L47 824 825 LFXDUX yl2, YL, INCY2 826 827 LFPDUX a1, A1, INC2 828 LFPDUX a2, A2, INC2 829 LFPDUX a3, A3, INC2 830 LFPDUX a4, A4, INC2 831 832 fsmr yl1, yl2 833 fxcpmadd ys2, alpha1, a1, yl1 834 fxcsmadd ys2, alpha1, a2, ys2 835 fxcpmadd ys2, alpha2, a3, ys2 836 fxcsmadd ys2, alpha2, a4, ys2 837 fmr yl1, yl2 838 839 fmr ys1, ys2 840 STFXDUX ys1, YS, INCY2 841 fsmr ys1, ys2 842 .align 4 843 844.L47: 845 andi. r0, M, 1 846 ble .L48 847 848 LFDUX a1, A1, INC2 849 LFDUX a2, A2, INC2 850 LFDUX a3, A3, INC2 851 LFDUX a4, A4, INC2 852 853 fxcpmadd ys2, alpha1, a1, yl1 854 fxcsmadd ys2, alpha1, a2, ys2 855 fxcpmadd ys2, alpha2, a3, ys2 856 fxcsmadd ys2, alpha2, a4, ys2 857 858 STFSDX ys1, YS, INCY2 859 add YS, YS, INCY 860 STFDX ys2, YS, INCY2 861 b .L49 862 .align 4 863 864.L48: 865 STFSDUX ys1, YS, INCY2 866 .align 4 867 868.L49: 869 addi J, J, -1 870 cmpi cr0, 0, J, 0 871 bgt .L41 872 .align 4 873 874.L50: 875 andi. J, N, 2 876 ble .L60 877 878 LFDUX alpha1, X, INCX 879 880 mr A1, A 881 add A2, A, LDA 882 add A, A2, LDA 883 LFSDUX alpha1, X, INCX 884 885 mr YL, Y 886 sub YS, Y, INCY2 887 fpmul alpha1, alpha, alpha1 888 889 LFSDX ys1, YS, INCY2 890 LFDX yl1, YL, INCY 891 892 srawi. r0, M, 3 893 mtspr CTR, r0 894 ble .L55 895 896 LFPDUX a1, A1, INC2 897 LFPDUX a5, A1, INC2 898 LFPDUX a9, A1, INC2 899 LFPDUX a13, A1, INC2 900 901 LFXDUX yl2, YL, INCY2 902 LFXDUX yl3, YL, INCY2 903 LFXDUX yl4, YL, INCY2 904 LFXDUX yl5, YL, INCY2 905 906 LFPDUX a2, A2, INC2 907 fsmr yl1, yl2 908 LFPDUX a6, A2, INC2 909 fsmr yl2, yl3 910 LFPDUX a10, A2, INC2 911 fsmr yl3, yl4 912 LFPDUX a14, A2, INC2 913 fsmr yl4, yl5 914 bdz .L53 915 .align 4 916 917.L52: 918 fxcpmadd ys2, alpha1, a1, yl1 919 LFPDUX a1, A1, INC2 920 fxcpmadd ys3, alpha1, a5, yl2 921 LFPDUX a5, A1, INC2 922 fxcpmadd ys4, alpha1, a9, yl3 923 LFPDUX a9, A1, INC2 924 fxcpmadd ys5, alpha1, a13, yl4 925 LFPDUX a13, A1, INC2 926 927 fxcsmadd ys2, alpha1, a2, ys2 928 LFPDUX a2, A2, INC2 929 fxcsmadd ys3, alpha1, a6, ys3 930 LFPDUX a6, A2, INC2 931 fxcsmadd ys4, alpha1, a10, ys4 932 LFPDUX a10, A2, INC2 933 fxcsmadd ys5, alpha1, a14, ys5 934 LFPDUX a14, A2, INC2 935 936 fmr yl1, yl5 937 LFXDUX yl2, YL, INCY2 938 fmr ys1, ys2 939 LFXDUX yl3, YL, INCY2 940 fmr ys2, ys3 941 LFXDUX yl4, YL, INCY2 942 fmr ys3, ys4 943 LFXDUX yl5, YL, INCY2 944 fmr ys4, ys5 945 946 STFXDUX ys1, YS, INCY2 947 fsmr ys1, ys5 948 STFXDUX ys2, YS, INCY2 949 fsmr yl1, yl2 950 STFXDUX ys3, YS, INCY2 951 fsmr yl2, yl3 952 STFXDUX ys4, YS, INCY2 953 fsmr yl3, yl4 954 955 fsmr yl4, yl5 956 bdnz .L52 957 .align 4 958 959.L53: 960 fxcpmadd ys2, alpha1, a1, yl1 961 fxcpmadd ys3, alpha1, a5, yl2 962 fxcpmadd ys4, alpha1, a9, yl3 963 fxcpmadd ys5, alpha1, a13, yl4 964 965 fxcsmadd ys2, alpha1, a2, ys2 966 fxcsmadd ys3, alpha1, a6, ys3 967 fxcsmadd ys4, alpha1, a10, ys4 968 fxcsmadd ys5, alpha1, a14, ys5 969 970 fmr yl1, yl5 971 fmr ys1, ys2 972 fmr ys2, ys3 973 fmr ys3, ys4 974 fmr ys4, ys5 975 976 STFXDUX ys1, YS, INCY2 977 fsmr ys1, ys5 978 STFXDUX ys2, YS, INCY2 979 STFXDUX ys3, YS, INCY2 980 STFXDUX ys4, YS, INCY2 981 .align 4 982 983.L55: 984 andi. r0, M, 7 985 ble .L59 986 987 andi. r0, M, 4 988 ble .L57 989 990 LFXDUX yl2, YL, INCY2 991 LFXDUX yl3, YL, INCY2 992 993 LFPDUX a1, A1, INC2 994 LFPDUX a2, A2, INC2 995 996 LFPDUX a5, A1, INC2 997 LFPDUX a6, A2, INC2 998 999 fsmr yl1, yl2 1000 fsmr yl2, yl3 1001 1002 fxcpmadd ys2, alpha1, a1, yl1 1003 fxcsmadd ys2, alpha1, a2, ys2 1004 fxcpmadd ys3, alpha1, a5, yl2 1005 fxcsmadd ys3, alpha1, a6, ys3 1006 1007 fmr yl1, yl3 1008 fmr ys1, ys2 1009 fmr ys2, ys3 1010 1011 STFXDUX ys1, YS, INCY2 1012 STFXDUX ys2, YS, INCY2 1013 fsmr ys1, ys3 1014 .align 4 1015 1016.L57: 1017 andi. r0, M, 2 1018 ble .L58 1019 1020 LFXDUX yl2, YL, INCY2 1021 LFPDUX a1, A1, INC2 1022 LFPDUX a2, A2, INC2 1023 1024 fsmr yl1, yl2 1025 fxcpmadd ys2, alpha1, a1, yl1 1026 fxcsmadd ys2, alpha1, a2, ys2 1027 fmr yl1, yl2 1028 1029 fmr ys1, ys2 1030 STFXDUX ys1, YS, INCY2 1031 fsmr ys1, ys2 1032 .align 4 1033 1034.L58: 1035 andi. r0, M, 1 1036 ble .L59 1037 1038 LFDUX a1, A1, INC2 1039 LFDUX a2, A2, INC2 1040 1041 fxmr alpha2, alpha1 1042 fmadd ys1, alpha1, a1, yl1 1043 fmadd ys1, alpha2, a2, ys1 1044 1045 STFXDUX ys1, YS, INCY2 1046 b .L60 1047 .align 4 1048 1049.L59: 1050 STFSDUX ys1, YS, INCY2 1051 .align 4 1052 1053.L60: 1054 andi. J, N, 1 1055 ble .L999 1056 1057 LFDUX alpha1, X, INCX 1058 mr A1, A 1059 1060 mr YL, Y 1061 sub YS, Y, INCY2 1062 1063 fmul alpha1, alpha, alpha1 1064 1065 LFSDX ys1, YS, INCY2 1066 LFDX yl1, YL, INCY 1067 1068 srawi. r0, M, 3 1069 mtspr CTR, r0 1070 ble .L65 1071 1072 LFXDUX yl2, YL, INCY2 1073 LFXDUX yl3, YL, INCY2 1074 LFXDUX yl4, YL, INCY2 1075 LFXDUX yl5, YL, INCY2 1076 1077 LFPDUX a1, A1, INC2 1078 LFPDUX a5, A1, INC2 1079 LFPDUX a9, A1, INC2 1080 LFPDUX a13, A1, INC2 1081 1082 fsmr yl1, yl2 1083 fsmr yl2, yl3 1084 fsmr yl3, yl4 1085 fsmr yl4, yl5 1086 bdz .L63 1087 .align 4 1088 1089.L62: 1090 fxcpmadd ys2, alpha1, a1, yl1 1091 LFPDUX a1, A1, INC2 1092 fxcpmadd ys3, alpha1, a5, yl2 1093 LFXDUX yl2, YL, INCY2 1094 fxcpmadd ys4, alpha1, a9, yl3 1095 LFXDUX yl3, YL, INCY2 1096 fxcpmadd ys5, alpha1, a13, yl4 1097 LFXDUX yl4, YL, INCY2 1098 1099 fmr yl1, yl5 1100 LFXDUX yl5, YL, INCY2 1101 fmr ys1, ys2 1102 LFPDUX a5, A1, INC2 1103 fmr ys2, ys3 1104 LFPDUX a9, A1, INC2 1105 fmr ys3, ys4 1106 LFPDUX a13, A1, INC2 1107 fmr ys4, ys5 1108 1109 STFXDUX ys1, YS, INCY2 1110 fsmr ys1, ys5 1111 STFXDUX ys2, YS, INCY2 1112 fsmr yl1, yl2 1113 STFXDUX ys3, YS, INCY2 1114 fsmr yl2, yl3 1115 STFXDUX ys4, YS, INCY2 1116 fsmr yl3, yl4 1117 1118 fsmr yl4, yl5 1119 bdnz .L62 1120 .align 4 1121 1122.L63: 1123 fxcpmadd ys2, alpha1, a1, yl1 1124 fxcpmadd ys3, alpha1, a5, yl2 1125 fxcpmadd ys4, alpha1, a9, yl3 1126 fxcpmadd ys5, alpha1, a13, yl4 1127 1128 fmr yl1, yl5 1129 fmr ys1, ys2 1130 fmr ys2, ys3 1131 fmr ys3, ys4 1132 fmr ys4, ys5 1133 1134 STFXDUX ys1, YS, INCY2 1135 fsmr ys1, ys5 1136 STFXDUX ys2, YS, INCY2 1137 STFXDUX ys3, YS, INCY2 1138 STFXDUX ys4, YS, INCY2 1139 .align 4 1140 1141.L65: 1142 andi. r0, M, 7 1143 ble .L69 1144 1145 andi. r0, M, 4 1146 ble .L67 1147 1148 LFXDUX yl2, YL, INCY2 1149 LFXDUX yl3, YL, INCY2 1150 1151 LFPDUX a1, A1, INC2 1152 LFPDUX a5, A1, INC2 1153 1154 fsmr yl1, yl2 1155 fsmr yl2, yl3 1156 1157 fxcpmadd ys2, alpha1, a1, yl1 1158 fxcpmadd ys3, alpha1, a5, yl2 1159 1160 fmr yl1, yl3 1161 fmr ys1, ys2 1162 fmr ys2, ys3 1163 1164 STFXDUX ys1, YS, INCY2 1165 fsmr ys1, ys3 1166 STFXDUX ys2, YS, INCY2 1167 .align 4 1168 1169.L67: 1170 andi. r0, M, 2 1171 ble .L68 1172 1173 LFPDUX a1, A1, INC2 1174 LFXDUX yl2, YL, INCY2 1175 1176 fsmr yl1, yl2 1177 fxcpmadd ys2, alpha1, a1, yl1 1178 fmr yl1, yl2 1179 fmr ys1, ys2 1180 STFXDUX ys1, YS, INCY2 1181 fsmr ys1, ys2 1182 .align 4 1183 1184.L68: 1185 andi. r0, M, 1 1186 ble .L69 1187 1188 LFDUX a1, A1, INC2 1189 fmadd ys1, alpha1, a1, yl1 1190 STFXDUX ys1, YS, INCY2 1191 b .L999 1192 .align 4 1193 1194.L69: 1195 STFSDUX ys1, YS, INCY2 1196 b .L999 1197 .align 4 1198 1199.L70: 1200 sub A, A, INC2 1201 sub Y, Y, INCY 1202 srawi. J, N, 2 1203 ble .L80 1204 .align 4 1205 1206.L71: 1207 LFDUX alpha1, X, INCX 1208 mr A1, A 1209 add A2, A, LDA 1210 add A3, A2, LDA 1211 LFSDUX alpha1, X, INCX 1212 LFDUX alpha2, X, INCX 1213 add A4, A3, LDA 1214 add A, A4, LDA 1215 mr YL, Y 1216 LFSDUX alpha2, X, INCX 1217 fpmul alpha1, alpha, alpha1 1218 mr YS, Y 1219 srawi. r0, M, 3 1220 mtspr CTR, r0 1221 fpmul alpha2, alpha, alpha2 1222 ble .L75 1223 1224 LFDUX yl1, YL, INCY 1225 LFPDUX a1, A1, INC2 1226 LFPDUX a5, A1, INC2 1227 LFPDUX a9, A1, INC2 1228 LFPDUX a13, A1, INC2 1229 LFSDUX yl1, YL, INCY 1230 1231 LFDUX yl2, YL, INCY 1232 LFPDUX a2, A2, INC2 1233 LFPDUX a6, A2, INC2 1234 LFPDUX a10, A2, INC2 1235 LFPDUX a14, A2, INC2 1236 LFSDUX yl2, YL, INCY 1237 1238 LFDUX yl3, YL, INCY 1239 LFPDUX a3, A3, INC2 1240 LFPDUX a7, A3, INC2 1241 LFPDUX a11, A3, INC2 1242 LFPDUX a15, A3, INC2 1243 LFSDUX yl3, YL, INCY 1244 1245 LFDUX yl4, YL, INCY 1246 LFPDUX a4, A4, INC2 1247 LFPDUX a8, A4, INC2 1248 LFPDUX a12, A4, INC2 1249 LFPDUX a16, A4, INC2 1250 LFSDUX yl4, YL, INCY 1251 bdz .L73 1252 .align 4 1253 1254.L72: 1255 fxcpmadd ys1, alpha1, a1, yl1 1256 LFPDUX a1, A1, INC2 1257 LFDUX yl1, YL, INCY 1258 fxcpmadd ys2, alpha1, a5, yl2 1259 LFPDUX a5, A1, INC2 1260 fxcpmadd ys3, alpha1, a9, yl3 1261 LFPDUX a9, A1, INC2 1262 fxcpmadd ys4, alpha1, a13, yl4 1263 LFPDUX a13, A1, INC2 1264 LFSDUX yl1, YL, INCY 1265 1266 fxcsmadd ys1, alpha1, a2, ys1 1267 LFPDUX a2, A2, INC2 1268 LFDUX yl2, YL, INCY 1269 fxcsmadd ys2, alpha1, a6, ys2 1270 LFPDUX a6, A2, INC2 1271 fxcsmadd ys3, alpha1, a10, ys3 1272 LFPDUX a10, A2, INC2 1273 fxcsmadd ys4, alpha1, a14, ys4 1274 LFPDUX a14, A2, INC2 1275 LFSDUX yl2, YL, INCY 1276 1277 fxcpmadd ys1, alpha2, a3, ys1 1278 LFPDUX a3, A3, INC2 1279 LFDUX yl3, YL, INCY 1280 fxcpmadd ys2, alpha2, a7, ys2 1281 LFPDUX a7, A3, INC2 1282 fxcpmadd ys3, alpha2, a11, ys3 1283 LFPDUX a11, A3, INC2 1284 fxcpmadd ys4, alpha2, a15, ys4 1285 LFPDUX a15, A3, INC2 1286 LFSDUX yl3, YL, INCY 1287 1288 fxcsmadd ys1, alpha2, a4, ys1 1289 LFPDUX a4, A4, INC2 1290 LFDUX yl4, YL, INCY 1291 fxcsmadd ys2, alpha2, a8, ys2 1292 LFPDUX a8, A4, INC2 1293 fxcsmadd ys3, alpha2, a12, ys3 1294 LFPDUX a12, A4, INC2 1295 fxcsmadd ys4, alpha2, a16, ys4 1296 LFPDUX a16, A4, INC2 1297 LFSDUX yl4, YL, INCY 1298 1299 STFDUX ys1, YS, INCY 1300 STFSDUX ys1, YS, INCY 1301 STFDUX ys2, YS, INCY 1302 STFSDUX ys2, YS, INCY 1303 STFDUX ys3, YS, INCY 1304 STFSDUX ys3, YS, INCY 1305 STFDUX ys4, YS, INCY 1306 STFSDUX ys4, YS, INCY 1307 bdnz .L72 1308 .align 4 1309 1310.L73: 1311 fxcpmadd ys1, alpha1, a1, yl1 1312 fxcpmadd ys2, alpha1, a5, yl2 1313 fxcpmadd ys3, alpha1, a9, yl3 1314 fxcpmadd ys4, alpha1, a13, yl4 1315 1316 fxcsmadd ys1, alpha1, a2, ys1 1317 fxcsmadd ys2, alpha1, a6, ys2 1318 fxcsmadd ys3, alpha1, a10, ys3 1319 fxcsmadd ys4, alpha1, a14, ys4 1320 1321 fxcpmadd ys1, alpha2, a3, ys1 1322 fxcpmadd ys2, alpha2, a7, ys2 1323 fxcpmadd ys3, alpha2, a11, ys3 1324 fxcpmadd ys4, alpha2, a15, ys4 1325 1326 fxcsmadd ys1, alpha2, a4, ys1 1327 fxcsmadd ys2, alpha2, a8, ys2 1328 fxcsmadd ys3, alpha2, a12, ys3 1329 fxcsmadd ys4, alpha2, a16, ys4 1330 1331 STFDUX ys1, YS, INCY 1332 STFSDUX ys1, YS, INCY 1333 STFDUX ys2, YS, INCY 1334 STFSDUX ys2, YS, INCY 1335 STFDUX ys3, YS, INCY 1336 STFSDUX ys3, YS, INCY 1337 STFDUX ys4, YS, INCY 1338 STFSDUX ys4, YS, INCY 1339 .align 4 1340 1341.L75: 1342 andi. r0, M, 7 1343 ble .L79 1344 1345 andi. r0, M, 4 1346 ble .L77 1347 1348 LFDUX yl1, YL, INCY 1349 LFPDUX a1, A1, INC2 1350 LFPDUX a5, A1, INC2 1351 LFSDUX yl1, YL, INCY 1352 LFPDUX a2, A2, INC2 1353 LFPDUX a6, A2, INC2 1354 1355 LFDUX yl2, YL, INCY 1356 LFPDUX a3, A3, INC2 1357 LFPDUX a7, A3, INC2 1358 LFSDUX yl2, YL, INCY 1359 LFPDUX a4, A4, INC2 1360 LFPDUX a8, A4, INC2 1361 1362 fxcpmadd ys1, alpha1, a1, yl1 1363 fxcpmadd ys2, alpha1, a5, yl2 1364 fxcsmadd ys1, alpha1, a2, ys1 1365 fxcsmadd ys2, alpha1, a6, ys2 1366 1367 fxcpmadd ys1, alpha2, a3, ys1 1368 fxcpmadd ys2, alpha2, a7, ys2 1369 fxcsmadd ys1, alpha2, a4, ys1 1370 fxcsmadd ys2, alpha2, a8, ys2 1371 1372 STFDUX ys1, YS, INCY 1373 STFSDUX ys1, YS, INCY 1374 STFDUX ys2, YS, INCY 1375 STFSDUX ys2, YS, INCY 1376 .align 4 1377 1378.L77: 1379 andi. r0, M, 2 1380 ble .L78 1381 1382 LFDUX yl1, YL, INCY 1383 LFPDUX a1, A1, INC2 1384 LFPDUX a2, A2, INC2 1385 LFSDUX yl1, YL, INCY 1386 LFPDUX a3, A3, INC2 1387 LFPDUX a4, A4, INC2 1388 1389 fxcpmadd ys1, alpha1, a1, yl1 1390 fxcsmadd ys1, alpha1, a2, ys1 1391 fxcpmadd ys1, alpha2, a3, ys1 1392 fxcsmadd ys1, alpha2, a4, ys1 1393 1394 STFDUX ys1, YS, INCY 1395 STFSDUX ys1, YS, INCY 1396 .align 4 1397 1398.L78: 1399 andi. r0, M, 1 1400 ble .L79 1401 1402 LFDUX yl1, YL, INCY 1403 1404 LFDUX a1, A1, INC2 1405 LFDUX a2, A2, INC2 1406 LFDUX a3, A3, INC2 1407 LFDUX a4, A4, INC2 1408 1409 fxcpmadd ys1, alpha1, a1, yl1 1410 fxcsmadd ys1, alpha1, a2, ys1 1411 fxcpmadd ys1, alpha2, a3, ys1 1412 fxcsmadd ys1, alpha2, a4, ys1 1413 1414 STFDUX ys1, YS, INCY 1415 .align 4 1416 1417.L79: 1418 addi J, J, -1 1419 cmpi cr0, 0, J, 0 1420 bgt .L71 1421 .align 4 1422 1423.L80: 1424 andi. J, N, 2 1425 ble .L90 1426 1427 LFDUX alpha1, X, INCX 1428 1429 mr A1, A 1430 add A2, A, LDA 1431 add A, A2, LDA 1432 LFSDUX alpha1, X, INCX 1433 1434 mr YL, Y 1435 mr YS, Y 1436 fpmul alpha1, alpha, alpha1 1437 1438 srawi. r0, M, 3 1439 mtspr CTR, r0 1440 ble .L85 1441 1442 LFDUX yl1, YL, INCY 1443 LFDUX a9, YL, INCY 1444 LFDUX yl2, YL, INCY 1445 LFDUX a10, YL, INCY 1446 1447 LFPDUX a1, A1, INC2 1448 LFPDUX a5, A1, INC2 1449 LFPDUX a3, A1, INC2 1450 LFPDUX a7, A1, INC2 1451 1452 LFDUX yl3, YL, INCY 1453 LFDUX a11, YL, INCY 1454 LFDUX yl4, YL, INCY 1455 LFDUX a12, YL, INCY 1456 1457 LFPDUX a2, A2, INC2 1458 LFPDUX a6, A2, INC2 1459 LFPDUX a4, A2, INC2 1460 LFPDUX a8, A2, INC2 1461 1462 bdz .L83 1463 .align 4 1464 1465.L82: 1466 fsmfp yl1, a9 1467 fsmfp yl2, a10 1468 fsmfp yl3, a11 1469 fsmfp yl4, a12 1470 1471 fxcpmadd ys1, alpha1, a1, yl1 1472 LFDUX yl1, YL, INCY 1473 LFDUX a9, YL, INCY 1474 LFPDUX a1, A1, INC2 1475 fxcpmadd ys2, alpha1, a5, yl2 1476 LFDUX yl2, YL, INCY 1477 LFDUX a10, YL, INCY 1478 LFPDUX a5, A1, INC2 1479 fxcpmadd ys3, alpha1, a3, yl3 1480 LFDUX yl3, YL, INCY 1481 LFDUX a11, YL, INCY 1482 LFPDUX a3, A1, INC2 1483 fxcpmadd ys4, alpha1, a7, yl4 1484 LFDUX yl4, YL, INCY 1485 LFDUX a12, YL, INCY 1486 LFPDUX a7, A1, INC2 1487 1488 fxcsmadd ys1, alpha1, a2, ys1 1489 LFPDUX a2, A2, INC2 1490 fxcsmadd ys2, alpha1, a6, ys2 1491 LFPDUX a6, A2, INC2 1492 fxcsmadd ys3, alpha1, a4, ys3 1493 LFPDUX a4, A2, INC2 1494 fxcsmadd ys4, alpha1, a8, ys4 1495 LFPDUX a8, A2, INC2 1496 1497 STFDUX ys1, YS, INCY 1498 STFSDUX ys1, YS, INCY 1499 STFDUX ys2, YS, INCY 1500 STFSDUX ys2, YS, INCY 1501 1502 STFDUX ys3, YS, INCY 1503 STFSDUX ys3, YS, INCY 1504 STFDUX ys4, YS, INCY 1505 STFSDUX ys4, YS, INCY 1506 bdnz .L82 1507 .align 4 1508 1509.L83: 1510 fsmfp yl1, a9 1511 fsmfp yl2, a10 1512 fsmfp yl3, a11 1513 fsmfp yl4, a12 1514 1515 fxcpmadd ys1, alpha1, a1, yl1 1516 fxcpmadd ys2, alpha1, a5, yl2 1517 fxcpmadd ys3, alpha1, a3, yl3 1518 fxcpmadd ys4, alpha1, a7, yl4 1519 1520 fxcsmadd ys1, alpha1, a2, ys1 1521 fxcsmadd ys2, alpha1, a6, ys2 1522 fxcsmadd ys3, alpha1, a4, ys3 1523 fxcsmadd ys4, alpha1, a8, ys4 1524 1525 STFDUX ys1, YS, INCY 1526 STFSDUX ys1, YS, INCY 1527 STFDUX ys2, YS, INCY 1528 STFSDUX ys2, YS, INCY 1529 STFDUX ys3, YS, INCY 1530 STFSDUX ys3, YS, INCY 1531 STFDUX ys4, YS, INCY 1532 STFSDUX ys4, YS, INCY 1533 .align 4 1534 1535.L85: 1536 andi. r0, M, 7 1537 ble .L90 1538 1539 andi. r0, M, 4 1540 ble .L87 1541 1542 LFDUX yl1, YL, INCY 1543 LFPDUX a1, A1, INC2 1544 LFPDUX a2, A2, INC2 1545 LFSDUX yl1, YL, INCY 1546 LFDUX yl2, YL, INCY 1547 LFPDUX a5, A1, INC2 1548 LFPDUX a6, A2, INC2 1549 LFSDUX yl2, YL, INCY 1550 1551 fxcpmadd ys1, alpha1, a1, yl1 1552 fxcpmadd ys2, alpha1, a5, yl2 1553 fxcsmadd ys1, alpha1, a2, ys1 1554 fxcsmadd ys2, alpha1, a6, ys2 1555 1556 STFDUX ys1, YS, INCY 1557 STFSDUX ys1, YS, INCY 1558 STFDUX ys2, YS, INCY 1559 STFSDUX ys2, YS, INCY 1560 .align 4 1561 1562.L87: 1563 andi. r0, M, 2 1564 ble .L88 1565 1566 LFDUX yl1, YL, INCY 1567 LFPDUX a1, A1, INC2 1568 LFPDUX a2, A2, INC2 1569 LFSDUX yl1, YL, INCY 1570 1571 fxcpmadd ys1, alpha1, a1, yl1 1572 fxcsmadd ys1, alpha1, a2, ys1 1573 1574 STFDUX ys1, YS, INCY 1575 STFSDUX ys1, YS, INCY 1576 .align 4 1577 1578.L88: 1579 andi. r0, M, 1 1580 ble .L90 1581 1582 LFDUX yl1, YL, INCY 1583 LFDUX a1, A1, INC2 1584 LFDUX a2, A2, INC2 1585 1586 fxcpmadd ys1, alpha1, a1, yl1 1587 fxcsmadd ys1, alpha1, a2, ys1 1588 1589 STFDUX ys1, YS, INCY 1590 .align 4 1591 1592.L90: 1593 andi. J, N, 1 1594 ble .L999 1595 1596 LFDUX alpha1, X, INCX 1597 1598 mr A1, A 1599 mr YL, Y 1600 mr YS, Y 1601 fmul alpha1, alpha, alpha1 1602 1603 srawi. r0, M, 3 1604 mtspr CTR, r0 1605 ble .L95 1606 1607 LFDUX yl1, YL, INCY 1608 LFSDUX a2, YL, INCY 1609 LFDUX yl2, YL, INCY 1610 LFSDUX a4, YL, INCY 1611 LFDUX yl3, YL, INCY 1612 LFSDUX a6, YL, INCY 1613 LFDUX yl4, YL, INCY 1614 LFSDUX a8, YL, INCY 1615 1616 LFPDUX a1, A1, INC2 1617 LFPDUX a5, A1, INC2 1618 LFPDUX a9, A1, INC2 1619 LFPDUX a13, A1, INC2 1620 bdz .L93 1621 .align 4 1622 1623.L92: 1624 fmr a2, yl1 1625 fmr a4, yl2 1626 fmr a6, yl3 1627 fmr a8, yl4 1628 1629 fxcpmadd ys1, alpha1, a1, a2 1630 LFDUX yl1, YL, INCY 1631 LFSDUX a2, YL, INCY 1632 fxcpmadd ys2, alpha1, a5, a4 1633 LFDUX yl2, YL, INCY 1634 LFSDUX a4, YL, INCY 1635 fxcpmadd ys3, alpha1, a9, a6 1636 LFDUX yl3, YL, INCY 1637 LFSDUX a6, YL, INCY 1638 fxcpmadd ys4, alpha1, a13, a8 1639 LFDUX yl4, YL, INCY 1640 LFSDUX a8, YL, INCY 1641 1642 LFPDUX a1, A1, INC2 1643 LFPDUX a5, A1, INC2 1644 LFPDUX a9, A1, INC2 1645 LFPDUX a13, A1, INC2 1646 1647 STFDUX ys1, YS, INCY 1648 STFSDUX ys1, YS, INCY 1649 STFDUX ys2, YS, INCY 1650 STFSDUX ys2, YS, INCY 1651 STFDUX ys3, YS, INCY 1652 STFSDUX ys3, YS, INCY 1653 STFDUX ys4, YS, INCY 1654 STFSDUX ys4, YS, INCY 1655 bdnz .L92 1656 .align 4 1657 1658.L93: 1659 fmr a2, yl1 1660 fmr a4, yl2 1661 fmr a6, yl3 1662 fmr a8, yl4 1663 1664 fxcpmadd ys1, alpha1, a1, a2 1665 fxcpmadd ys2, alpha1, a5, a4 1666 fxcpmadd ys3, alpha1, a9, a6 1667 fxcpmadd ys4, alpha1, a13, a8 1668 1669 STFDUX ys1, YS, INCY 1670 STFSDUX ys1, YS, INCY 1671 STFDUX ys2, YS, INCY 1672 STFSDUX ys2, YS, INCY 1673 STFDUX ys3, YS, INCY 1674 STFSDUX ys3, YS, INCY 1675 STFDUX ys4, YS, INCY 1676 STFSDUX ys4, YS, INCY 1677 .align 4 1678 1679.L95: 1680 andi. r0, M, 7 1681 ble .L999 1682 1683 andi. r0, M, 4 1684 ble .L97 1685 1686 LFPDUX a1, A1, INC2 1687 LFDUX yl1, YL, INCY 1688 LFDUX yl2, YL, INCY 1689 LFPDUX a2, A1, INC2 1690 LFDUX yl3, YL, INCY 1691 LFDUX yl4, YL, INCY 1692 1693 fxcpmadd ys1, a1, alpha1, yl1 1694 fxcsmadd ys2, a1, alpha1, yl2 1695 fxcpmadd ys3, a2, alpha1, yl3 1696 fxcsmadd ys4, a2, alpha1, yl4 1697 1698 STFDUX ys1, YS, INCY 1699 STFDUX ys2, YS, INCY 1700 STFDUX ys3, YS, INCY 1701 STFDUX ys4, YS, INCY 1702 .align 4 1703 1704.L97: 1705 andi. r0, M, 2 1706 ble .L98 1707 1708 LFPDUX a1, A1, INC2 1709 LFDUX yl1, YL, INCY 1710 LFDUX yl2, YL, INCY 1711 1712 fxcpmadd ys1, a1, alpha1, yl1 1713 fxcsmadd ys2, a1, alpha1, yl2 1714 1715 STFDUX ys1, YS, INCY 1716 STFDUX ys2, YS, INCY 1717 .align 4 1718 1719.L98: 1720 andi. r0, M, 1 1721 ble .L999 1722 1723 LFDUX yl1, YL, INCY 1724 LFDUX a1, A1, INC2 1725 1726 fxcpmadd ys1, alpha1, a1, yl1 1727 1728 STFDUX ys1, YS, INCY 1729 b .L999 1730 .align 4 1731 1732 1733.L999: 1734 addi SP, SP, -4 1735 1736 lwzu r16, 4(SP) 1737 lwzu r17, 4(SP) 1738 lwzu r18, 4(SP) 1739 lwzu r19, 4(SP) 1740 1741 lwzu r20, 4(SP) 1742 lwzu r21, 4(SP) 1743 lwzu r22, 4(SP) 1744 lwzu r23, 4(SP) 1745 1746 lwzu r24, 4(SP) 1747 lwzu r25, 4(SP) 1748 lwzu r26, 4(SP) 1749 lwzu r27, 4(SP) 1750 1751 lwzu r28, 4(SP) 1752 lwzu r29, 4(SP) 1753 lwzu r30, 4(SP) 1754 lwzu r31, 4(SP) 1755 1756 subi SP, SP, 12 1757 li r0, 16 1758 1759 lfpdux f31, SP, r0 1760 lfpdux f30, SP, r0 1761 lfpdux f29, SP, r0 1762 lfpdux f28, SP, r0 1763 lfpdux f27, SP, r0 1764 lfpdux f26, SP, r0 1765 lfpdux f25, SP, r0 1766 lfpdux f24, SP, r0 1767 lfpdux f23, SP, r0 1768 lfpdux f22, SP, r0 1769 lfpdux f21, SP, r0 1770 lfpdux f20, SP, r0 1771 lfpdux f19, SP, r0 1772 lfpdux f18, SP, r0 1773 lfpdux f17, SP, r0 1774 lfpdux f16, SP, r0 1775 lfpdux f15, SP, r0 1776 lfpdux f14, SP, r0 1777 addi SP, SP, 16 1778 blr 1779 1780 EPILOGUE 1781