1/*********************************************************************/ 2/* */ 3/* Optimized BLAS libraries */ 4/* By Kazushige Goto <kgoto@tacc.utexas.edu> */ 5/* */ 6/* Copyright (c) The University of Texas, 2009. All rights reserved. */ 7/* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING */ 8/* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF */ 9/* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE, */ 10/* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY */ 11/* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF */ 12/* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO */ 13/* THE USE OF THE SOFTWARE OR DOCUMENTATION. */ 14/* Under no circumstances shall University be liable for incidental, */ 15/* special, indirect, direct or consequential damages or loss of */ 16/* profits, interruption of business, or related expenses which may */ 17/* arise from use of Software or Documentation, including but not */ 18/* limited to those resulting from defects in Software and/or */ 19/* Documentation, or loss or inaccuracy of data of any kind. */ 20/*********************************************************************/ 21 22#define ASSEMBLER 23#include "common.h" 24 25#define M r3 26#define N r4 27#define A r5 28#define LDA r6 29#define B r7 30 31#define AO1 r8 32#define AO2 r9 33#define AO3 r10 34#define AO4 r11 35 36#define J r12 37 38#define AO5 r26 39#define AO6 r27 40#define AO7 r28 41#define AO8 r29 42#define INC r30 43#define INC2 r31 44 45#define c01 f0 46#define c02 f1 47#define c03 f2 48#define c04 f3 49#define c05 f4 50#define c06 f5 51#define c07 f6 52#define c08 f7 53#define c09 f8 54#define c10 f9 55#define c11 f10 56#define c12 f11 57#define c13 f12 58#define c14 f13 59#define c15 f14 60#define c16 f15 61 62#define c17 f16 63#define c18 f17 64#define c19 f18 65#define c20 f19 66#define c21 f20 67#define c22 f21 68#define c23 f22 69#define c24 f23 70#define c25 f24 71#define c26 f25 72#define c27 f26 73#define c28 f27 74#define c29 f28 75#define c30 f29 76#define c31 f30 77#define c32 f31 78 79#define sel_p f30 80#define sel_s f31 81 82 83 PROLOGUE 84 PROFCODE 85 86 li r0, -16 87 88 stfpdux f14, SP, r0 89 stfpdux f15, SP, r0 90 stfpdux f16, SP, r0 91 stfpdux f17, SP, r0 92 stfpdux f18, SP, r0 93 stfpdux f19, SP, r0 94 stfpdux f20, SP, r0 95 stfpdux f21, SP, r0 96 stfpdux f22, SP, r0 97 stfpdux f23, SP, r0 98 stfpdux f24, SP, r0 99 stfpdux f25, SP, r0 100 stfpdux f26, SP, r0 101 stfpdux f27, SP, r0 102 stfpdux f28, SP, r0 103 stfpdux f29, SP, r0 104 stfpdux f30, SP, r0 105 stfpdux f31, SP, r0 106 107 stwu r31, -4(SP) 108 stwu r30, -4(SP) 109 stwu r29, -4(SP) 110 stwu r28, -4(SP) 111 112 stwu r27, -4(SP) 113 stwu r26, -4(SP) 114 115 lis r9, 0x3f80 116 lis r10, 0xbf80 117 118 stwu r9, -4(SP) 119 stwu r10, -4(SP) 120 stwu r10, -4(SP) 121 stwu r9, -4(SP) 122 123 slwi LDA, LDA, BASE_SHIFT 124 125 li r0, 0 126 lfpsux sel_p, SP, r0 127 li r0, 8 128 lfpsux sel_s, SP, r0 129 130 cmpwi cr0, M, 0 131 ble- .L999 132 cmpwi cr0, N, 0 133 ble- .L999 134 135 li INC, 1 * SIZE 136 li INC2, 2 * SIZE 137 138 subi B, B, 2 * SIZE 139 140 andi. r0, A, 2 * SIZE - 1 141 bne .L100 142 andi. r0, LDA, 2 * SIZE - 1 143 bne .L100 144 145 subi A, A, 2 * SIZE 146 srawi. J, N, 3 147 ble .L20 148 .align 4 149.L11: 150 mr AO1, A 151 add AO2, A, LDA 152 add AO3, AO2, LDA 153 add AO4, AO3, LDA 154 add AO5, AO4, LDA 155 add AO6, AO5, LDA 156 add AO7, AO6, LDA 157 add AO8, AO7, LDA 158 add A, AO8, LDA 159 160 srawi. r0, M, 2 161 mtspr CTR, r0 162 ble .L15 163 .align 4 164 165.L12: 166 LFPDUX c01, AO1, INC2 167 LFXDUX c02, AO2, INC2 168 LFPDUX c03, AO3, INC2 169 LFXDUX c04, AO4, INC2 170 171 LFPDUX c05, AO5, INC2 172 LFXDUX c06, AO6, INC2 173 LFPDUX c07, AO7, INC2 174 LFXDUX c08, AO8, INC2 175 176 LFPDUX c09, AO1, INC2 177 LFXDUX c10, AO2, INC2 178 LFPDUX c11, AO3, INC2 179 LFXDUX c12, AO4, INC2 180 fpsel c17, sel_p, c01, c02 181 182 LFPDUX c13, AO5, INC2 183 fpsel c18, sel_p, c03, c04 184 LFXDUX c14, AO6, INC2 185 fpsel c19, sel_p, c05, c06 186 LFPDUX c15, AO7, INC2 187 fpsel c20, sel_p, c07, c08 188 LFXDUX c16, AO8, INC2 189 fpsel c21, sel_s, c01, c02 190 191 fpsel c22, sel_s, c03, c04 192 STFPDUX c17, B, INC2 193 fpsel c23, sel_s, c05, c06 194 STFPDUX c18, B, INC2 195 fpsel c24, sel_s, c07, c08 196 STFPDUX c19, B, INC2 197 198 fpsel c01, sel_p, c09, c10 199 STFPDUX c20, B, INC2 200 fpsel c02, sel_p, c11, c12 201 STFXDUX c21, B, INC2 202 fpsel c03, sel_p, c13, c14 203 STFXDUX c22, B, INC2 204 fpsel c04, sel_p, c15, c16 205 STFXDUX c23, B, INC2 206 207 fpsel c05, sel_s, c09, c10 208 STFXDUX c24, B, INC2 209 fpsel c06, sel_s, c11, c12 210 STFPDUX c01, B, INC2 211 fpsel c07, sel_s, c13, c14 212 STFPDUX c02, B, INC2 213 fpsel c08, sel_s, c15, c16 214 STFPDUX c03, B, INC2 215 216 STFPDUX c04, B, INC2 217 STFXDUX c05, B, INC2 218 STFXDUX c06, B, INC2 219 STFXDUX c07, B, INC2 220 STFXDUX c08, B, INC2 221 bdnz .L12 222 .align 4 223 224.L15: 225 andi. r0, M, 3 226 ble .L19 227 228 andi. r0, M, 2 229 beq .L17 230 231 LFPDUX c01, AO1, INC2 232 LFXDUX c02, AO2, INC2 233 LFPDUX c03, AO3, INC2 234 LFXDUX c04, AO4, INC2 235 236 LFPDUX c05, AO5, INC2 237 fpsel c09, sel_p, c01, c02 238 LFXDUX c06, AO6, INC2 239 fpsel c10, sel_p, c03, c04 240 LFPDUX c07, AO7, INC2 241 fpsel c11, sel_p, c05, c06 242 LFXDUX c08, AO8, INC2 243 fpsel c12, sel_p, c07, c08 244 245 fpsel c13, sel_s, c01, c02 246 fpsel c14, sel_s, c03, c04 247 STFPDUX c09, B, INC2 248 fpsel c15, sel_s, c05, c06 249 STFPDUX c10, B, INC2 250 fpsel c16, sel_s, c07, c08 251 STFPDUX c11, B, INC2 252 253 STFPDUX c12, B, INC2 254 STFXDUX c13, B, INC2 255 STFXDUX c14, B, INC2 256 STFXDUX c15, B, INC2 257 STFXDUX c16, B, INC2 258 .align 4 259 260.L17: 261 andi. r0, M, 1 262 beq .L19 263 264 LFDUX c01, AO1, INC2 265 LFDUX c02, AO3, INC2 266 LFDUX c03, AO5, INC2 267 LFDUX c04, AO7, INC2 268 269 LFSDUX c01, AO2, INC2 270 LFSDUX c02, AO4, INC2 271 LFSDUX c03, AO6, INC2 272 LFSDUX c04, AO8, INC2 273 274 STFPDUX c01, B, INC2 275 STFPDUX c02, B, INC2 276 STFPDUX c03, B, INC2 277 STFPDUX c04, B, INC2 278 .align 4 279 280.L19: 281 addic. J, J, -1 282 bgt .L11 283 .align 4 284 285.L20: 286 andi. J, N, 4 287 ble .L30 288 .align 4 289.L21: 290 mr AO1, A 291 add AO2, A, LDA 292 add AO3, AO2, LDA 293 add AO4, AO3, LDA 294 add A, AO4, LDA 295 296 srawi. r0, M, 3 297 mtspr CTR, r0 298 ble .L25 299 .align 4 300 301.L22: 302 LFPDUX c01, AO1, INC2 303 LFXDUX c02, AO2, INC2 304 LFPDUX c03, AO3, INC2 305 LFXDUX c04, AO4, INC2 306 307 LFPDUX c05, AO1, INC2 308 LFXDUX c06, AO2, INC2 309 LFPDUX c07, AO3, INC2 310 LFXDUX c08, AO4, INC2 311 312 LFPDUX c09, AO1, INC2 313 LFXDUX c10, AO2, INC2 314 LFPDUX c11, AO3, INC2 315 LFXDUX c12, AO4, INC2 316 fpsel c17, sel_p, c01, c02 317 318 LFPDUX c13, AO1, INC2 319 fpsel c18, sel_p, c03, c04 320 LFXDUX c14, AO2, INC2 321 fpsel c19, sel_s, c01, c02 322 LFPDUX c15, AO3, INC2 323 fpsel c20, sel_s, c03, c04 324 LFXDUX c16, AO4, INC2 325 fpsel c21, sel_p, c05, c06 326 327 fpsel c22, sel_p, c07, c08 328 STFPDUX c17, B, INC2 329 fpsel c23, sel_s, c05, c06 330 STFPDUX c18, B, INC2 331 fpsel c24, sel_s, c07, c08 332 STFXDUX c19, B, INC2 333 334 fpsel c01, sel_p, c09, c10 335 STFXDUX c20, B, INC2 336 fpsel c02, sel_p, c11, c12 337 STFPDUX c21, B, INC2 338 fpsel c03, sel_s, c09, c10 339 STFPDUX c22, B, INC2 340 fpsel c04, sel_s, c11, c12 341 STFXDUX c23, B, INC2 342 343 fpsel c05, sel_p, c13, c14 344 STFXDUX c24, B, INC2 345 fpsel c06, sel_p, c15, c16 346 STFPDUX c01, B, INC2 347 fpsel c07, sel_s, c13, c14 348 STFPDUX c02, B, INC2 349 fpsel c08, sel_s, c15, c16 350 STFXDUX c03, B, INC2 351 352 STFXDUX c04, B, INC2 353 STFPDUX c05, B, INC2 354 STFPDUX c06, B, INC2 355 STFXDUX c07, B, INC2 356 STFXDUX c08, B, INC2 357 bdnz .L22 358 .align 4 359 360.L25: 361 andi. r0, M, 7 362 ble .L30 363 364 andi. r0, M, 4 365 beq .L26 366 367 LFPDUX c01, AO1, INC2 368 LFXDUX c02, AO2, INC2 369 LFPDUX c03, AO3, INC2 370 LFXDUX c04, AO4, INC2 371 372 LFPDUX c05, AO1, INC2 373 fpsel c09, sel_p, c01, c02 374 LFXDUX c06, AO2, INC2 375 fpsel c10, sel_p, c03, c04 376 LFPDUX c07, AO3, INC2 377 fpsel c11, sel_s, c01, c02 378 LFXDUX c08, AO4, INC2 379 fpsel c12, sel_s, c03, c04 380 381 fpsel c13, sel_p, c05, c06 382 fpsel c14, sel_p, c07, c08 383 STFPDUX c09, B, INC2 384 fpsel c15, sel_s, c05, c06 385 STFPDUX c10, B, INC2 386 fpsel c16, sel_s, c07, c08 387 STFXDUX c11, B, INC2 388 389 STFXDUX c12, B, INC2 390 STFPDUX c13, B, INC2 391 STFPDUX c14, B, INC2 392 STFXDUX c15, B, INC2 393 STFXDUX c16, B, INC2 394 .align 4 395 396.L26: 397 andi. r0, M, 2 398 beq .L27 399 400 LFPDUX c01, AO1, INC2 401 LFXDUX c02, AO2, INC2 402 LFPDUX c03, AO3, INC2 403 LFXDUX c04, AO4, INC2 404 405 fpsel c05, sel_p, c01, c02 406 fpsel c06, sel_p, c03, c04 407 fpsel c07, sel_s, c01, c02 408 fpsel c08, sel_s, c03, c04 409 410 STFPDUX c05, B, INC2 411 STFPDUX c06, B, INC2 412 STFXDUX c07, B, INC2 413 STFXDUX c08, B, INC2 414 .align 4 415 416.L27: 417 andi. r0, M, 1 418 beq .L30 419 420 LFDUX c01, AO1, INC2 421 LFDUX c02, AO2, INC2 422 LFDUX c03, AO3, INC2 423 LFDUX c04, AO4, INC2 424 425 fsmfp c01, c02 426 fsmfp c03, c04 427 428 STFPDUX c01, B, INC2 429 STFPDUX c03, B, INC2 430 .align 4 431 432 433.L30: 434 andi. J, N, 2 435 ble .L40 436 437 mr AO1, A 438 add AO2, A, LDA 439 add A, AO2, LDA 440 441 srawi. r0, M, 3 442 mtspr CTR, r0 443 ble .L35 444 .align 4 445 446.L32: 447 LFPDUX c01, AO1, INC2 448 LFXDUX c05, AO2, INC2 449 LFPDUX c02, AO1, INC2 450 LFXDUX c06, AO2, INC2 451 452 LFPDUX c03, AO1, INC2 453 fpsel c09, sel_p, c01, c05 454 LFXDUX c07, AO2, INC2 455 fpsel c10, sel_s, c01, c05 456 LFPDUX c04, AO1, INC2 457 fpsel c11, sel_p, c02, c06 458 LFXDUX c08, AO2, INC2 459 fpsel c12, sel_s, c02, c06 460 461 fpsel c13, sel_p, c03, c07 462 fpsel c14, sel_s, c03, c07 463 STFPDUX c09, B, INC2 464 fpsel c15, sel_p, c04, c08 465 STFXDUX c10, B, INC2 466 fpsel c16, sel_s, c04, c08 467 STFPDUX c11, B, INC2 468 STFXDUX c12, B, INC2 469 470 STFPDUX c13, B, INC2 471 STFXDUX c14, B, INC2 472 STFPDUX c15, B, INC2 473 STFXDUX c16, B, INC2 474 bdnz .L32 475 .align 4 476 477.L35: 478 andi. r0, M, 7 479 ble .L40 480 481 andi. r0, M, 4 482 beq .L36 483 484 LFPDUX c01, AO1, INC2 485 LFXDUX c03, AO2, INC2 486 LFPDUX c02, AO1, INC2 487 LFXDUX c04, AO2, INC2 488 489 fpsel c05, sel_p, c01, c03 490 fpsel c06, sel_s, c01, c03 491 fpsel c07, sel_p, c02, c04 492 fpsel c08, sel_s, c02, c04 493 494 STFPDUX c05, B, INC2 495 STFXDUX c06, B, INC2 496 STFPDUX c07, B, INC2 497 STFXDUX c08, B, INC2 498 .align 4 499 500.L36: 501 andi. r0, M, 2 502 beq .L37 503 504 LFPDUX c01, AO1, INC2 505 LFXDUX c02, AO2, INC2 506 507 fpsel c03, sel_p, c01, c02 508 fpsel c04, sel_s, c01, c02 509 510 STFPDUX c03, B, INC2 511 STFXDUX c04, B, INC2 512 .align 4 513 514.L37: 515 andi. r0, M, 1 516 beq .L40 517 518 LFDUX c01, AO1, INC2 519 LFDUX c02, AO2, INC2 520 521 fsmfp c01, c02 522 STFPDUX c01, B, INC2 523 .align 4 524 525.L40: 526 andi. J, N, 1 527 ble .L999 528 529 mr AO1, A 530 531 srawi. r0, M, 3 532 mtspr CTR, r0 533 ble .L45 534 .align 4 535 536.L42: 537 LFPDUX c01, AO1, INC2 538 LFPDUX c02, AO1, INC2 539 LFPDUX c03, AO1, INC2 540 LFPDUX c04, AO1, INC2 541 542 STFPDUX c01, B, INC2 543 STFPDUX c02, B, INC2 544 STFPDUX c03, B, INC2 545 STFPDUX c04, B, INC2 546 bdnz .L42 547 .align 4 548 549.L45: 550 andi. r0, M, 7 551 ble .L999 552 553 andi. r0, M, 4 554 beq .L46 555 556 LFPDUX c01, AO1, INC2 557 LFPDUX c02, AO1, INC2 558 559 STFPDUX c01, B, INC2 560 STFPDUX c02, B, INC2 561 .align 4 562 563.L46: 564 andi. r0, M, 2 565 beq .L47 566 567 LFPDUX c01, AO1, INC2 568 STFPDUX c01, B, INC2 569 .align 4 570 571.L47: 572 andi. r0, M, 1 573 beq .L999 574 575 LFDX c01, AO1, INC2 576 STFDX c01, B, INC2 577 b .L999 578 .align 4 579 580 581.L100: 582 subi A, A, 1 * SIZE 583 srawi. J, N, 3 584 ble .L120 585 .align 4 586.L111: 587 mr AO1, A 588 add AO2, A, LDA 589 add AO3, AO2, LDA 590 add AO4, AO3, LDA 591 add AO5, AO4, LDA 592 add AO6, AO5, LDA 593 add AO7, AO6, LDA 594 add AO8, AO7, LDA 595 add A, AO8, LDA 596 597 srawi. r0, M, 3 598 mtspr CTR, r0 599 ble .L115 600 .align 4 601 602.L112: 603 LFDUX c01, AO1, INC 604 LFDUX c05, AO1, INC 605 LFDUX c09, AO1, INC 606 LFDUX c13, AO1, INC 607 608 LFDUX c17, AO1, INC 609 LFDUX c21, AO1, INC 610 LFDUX c25, AO1, INC 611 LFDUX c29, AO1, INC 612 613 LFSDUX c01, AO2, INC 614 LFSDUX c05, AO2, INC 615 LFSDUX c09, AO2, INC 616 LFSDUX c13, AO2, INC 617 618 LFSDUX c17, AO2, INC 619 LFSDUX c21, AO2, INC 620 LFSDUX c25, AO2, INC 621 LFSDUX c29, AO2, INC 622 623 LFDUX c02, AO3, INC 624 LFDUX c06, AO3, INC 625 LFDUX c10, AO3, INC 626 LFDUX c14, AO3, INC 627 628 LFDUX c18, AO3, INC 629 LFDUX c22, AO3, INC 630 LFDUX c26, AO3, INC 631 LFDUX c30, AO3, INC 632 633 LFSDUX c02, AO4, INC 634 LFSDUX c06, AO4, INC 635 LFSDUX c10, AO4, INC 636 LFSDUX c14, AO4, INC 637 638 LFSDUX c18, AO4, INC 639 LFSDUX c22, AO4, INC 640 LFSDUX c26, AO4, INC 641 LFSDUX c30, AO4, INC 642 643 LFDUX c03, AO5, INC 644 LFDUX c07, AO5, INC 645 LFDUX c11, AO5, INC 646 LFDUX c15, AO5, INC 647 648 LFDUX c19, AO5, INC 649 LFDUX c23, AO5, INC 650 LFDUX c27, AO5, INC 651 LFDUX c31, AO5, INC 652 653 LFSDUX c03, AO6, INC 654 LFSDUX c07, AO6, INC 655 LFSDUX c11, AO6, INC 656 LFSDUX c15, AO6, INC 657 658 LFSDUX c19, AO6, INC 659 LFSDUX c23, AO6, INC 660 LFSDUX c27, AO6, INC 661 LFSDUX c31, AO6, INC 662 663 LFDUX c04, AO7, INC 664 LFDUX c08, AO7, INC 665 LFDUX c12, AO7, INC 666 LFDUX c16, AO7, INC 667 668 LFDUX c20, AO7, INC 669 LFDUX c24, AO7, INC 670 LFDUX c28, AO7, INC 671 LFDUX c32, AO7, INC 672 673 LFSDUX c04, AO8, INC 674 LFSDUX c08, AO8, INC 675 LFSDUX c12, AO8, INC 676 LFSDUX c16, AO8, INC 677 678 LFSDUX c20, AO8, INC 679 LFSDUX c24, AO8, INC 680 LFSDUX c28, AO8, INC 681 LFSDUX c32, AO8, INC 682 683 STFPDUX c01, B, INC2 684 STFPDUX c02, B, INC2 685 STFPDUX c03, B, INC2 686 STFPDUX c04, B, INC2 687 STFPDUX c05, B, INC2 688 STFPDUX c06, B, INC2 689 STFPDUX c07, B, INC2 690 STFPDUX c08, B, INC2 691 692 STFPDUX c09, B, INC2 693 STFPDUX c10, B, INC2 694 STFPDUX c11, B, INC2 695 STFPDUX c12, B, INC2 696 STFPDUX c13, B, INC2 697 STFPDUX c14, B, INC2 698 STFPDUX c15, B, INC2 699 STFPDUX c16, B, INC2 700 701 STFPDUX c17, B, INC2 702 STFPDUX c18, B, INC2 703 STFPDUX c19, B, INC2 704 STFPDUX c20, B, INC2 705 STFPDUX c21, B, INC2 706 STFPDUX c22, B, INC2 707 STFPDUX c23, B, INC2 708 STFPDUX c24, B, INC2 709 710 STFPDUX c25, B, INC2 711 STFPDUX c26, B, INC2 712 STFPDUX c27, B, INC2 713 STFPDUX c28, B, INC2 714 STFPDUX c29, B, INC2 715 STFPDUX c30, B, INC2 716 STFPDUX c31, B, INC2 717 STFPDUX c32, B, INC2 718 bdnz .L112 719 .align 4 720 721.L115: 722 andi. r0, M, 7 723 ble .L119 724 725 andi. r0, M, 4 726 beq .L116 727 728 LFDUX c01, AO1, INC 729 LFDUX c05, AO1, INC 730 LFDUX c09, AO1, INC 731 LFDUX c13, AO1, INC 732 733 LFSDUX c01, AO2, INC 734 LFSDUX c05, AO2, INC 735 LFSDUX c09, AO2, INC 736 LFSDUX c13, AO2, INC 737 738 LFDUX c02, AO3, INC 739 LFDUX c06, AO3, INC 740 LFDUX c10, AO3, INC 741 LFDUX c14, AO3, INC 742 743 LFSDUX c02, AO4, INC 744 LFSDUX c06, AO4, INC 745 LFSDUX c10, AO4, INC 746 LFSDUX c14, AO4, INC 747 748 LFDUX c03, AO5, INC 749 LFDUX c07, AO5, INC 750 LFDUX c11, AO5, INC 751 LFDUX c15, AO5, INC 752 753 LFSDUX c03, AO6, INC 754 LFSDUX c07, AO6, INC 755 LFSDUX c11, AO6, INC 756 LFSDUX c15, AO6, INC 757 758 LFDUX c04, AO7, INC 759 LFDUX c08, AO7, INC 760 LFDUX c12, AO7, INC 761 LFDUX c16, AO7, INC 762 763 LFSDUX c04, AO8, INC 764 LFSDUX c08, AO8, INC 765 LFSDUX c12, AO8, INC 766 LFSDUX c16, AO8, INC 767 768 STFPDUX c01, B, INC2 769 STFPDUX c02, B, INC2 770 STFPDUX c03, B, INC2 771 STFPDUX c04, B, INC2 772 STFPDUX c05, B, INC2 773 STFPDUX c06, B, INC2 774 STFPDUX c07, B, INC2 775 STFPDUX c08, B, INC2 776 777 STFPDUX c09, B, INC2 778 STFPDUX c10, B, INC2 779 STFPDUX c11, B, INC2 780 STFPDUX c12, B, INC2 781 STFPDUX c13, B, INC2 782 STFPDUX c14, B, INC2 783 STFPDUX c15, B, INC2 784 STFPDUX c16, B, INC2 785 .align 4 786 787.L116: 788 andi. r0, M, 2 789 beq .L117 790 791 LFDUX c01, AO1, INC 792 LFDUX c05, AO1, INC 793 LFDUX c02, AO3, INC 794 LFDUX c06, AO3, INC 795 796 LFSDUX c01, AO2, INC 797 LFSDUX c05, AO2, INC 798 LFSDUX c02, AO4, INC 799 LFSDUX c06, AO4, INC 800 801 LFDUX c03, AO5, INC 802 LFDUX c07, AO5, INC 803 LFDUX c04, AO7, INC 804 LFDUX c08, AO7, INC 805 806 LFSDUX c03, AO6, INC 807 LFSDUX c07, AO6, INC 808 LFSDUX c04, AO8, INC 809 LFSDUX c08, AO8, INC 810 811 STFPDUX c01, B, INC2 812 STFPDUX c02, B, INC2 813 STFPDUX c03, B, INC2 814 STFPDUX c04, B, INC2 815 STFPDUX c05, B, INC2 816 STFPDUX c06, B, INC2 817 STFPDUX c07, B, INC2 818 STFPDUX c08, B, INC2 819 .align 4 820 821.L117: 822 andi. r0, M, 1 823 beq .L119 824 825 LFDUX c01, AO1, INC 826 LFDUX c02, AO3, INC 827 LFDUX c03, AO5, INC 828 LFDUX c04, AO7, INC 829 830 LFSDUX c01, AO2, INC 831 LFSDUX c02, AO4, INC 832 LFSDUX c03, AO6, INC 833 LFSDUX c04, AO8, INC 834 835 STFPDUX c01, B, INC2 836 STFPDUX c02, B, INC2 837 STFPDUX c03, B, INC2 838 STFPDUX c04, B, INC2 839 .align 4 840 841.L119: 842 addic. J, J, -1 843 bgt .L111 844 .align 4 845 846.L120: 847 andi. J, N, 4 848 ble .L130 849 .align 4 850.L121: 851 mr AO1, A 852 add AO2, A, LDA 853 add AO3, AO2, LDA 854 add AO4, AO3, LDA 855 add A, AO4, LDA 856 857 srawi. r0, M, 3 858 mtspr CTR, r0 859 ble .L125 860 .align 4 861 862.L122: 863 LFDUX c01, AO1, INC 864 LFDUX c02, AO1, INC 865 LFDUX c03, AO1, INC 866 LFDUX c04, AO1, INC 867 868 LFDUX c09, AO1, INC 869 LFDUX c10, AO1, INC 870 LFDUX c11, AO1, INC 871 LFDUX c12, AO1, INC 872 873 LFSDUX c01, AO2, INC 874 LFSDUX c02, AO2, INC 875 LFSDUX c03, AO2, INC 876 LFSDUX c04, AO2, INC 877 878 LFSDUX c09, AO2, INC 879 LFSDUX c10, AO2, INC 880 LFSDUX c11, AO2, INC 881 LFSDUX c12, AO2, INC 882 883 LFDUX c05, AO3, INC 884 LFDUX c06, AO3, INC 885 LFDUX c07, AO3, INC 886 LFDUX c08, AO3, INC 887 888 LFDUX c13, AO3, INC 889 LFDUX c14, AO3, INC 890 LFDUX c15, AO3, INC 891 LFDUX c16, AO3, INC 892 893 LFSDUX c05, AO4, INC 894 LFSDUX c06, AO4, INC 895 LFSDUX c07, AO4, INC 896 LFSDUX c08, AO4, INC 897 898 LFSDUX c13, AO4, INC 899 LFSDUX c14, AO4, INC 900 LFSDUX c15, AO4, INC 901 LFSDUX c16, AO4, INC 902 903 STFPDUX c01, B, INC2 904 STFPDUX c05, B, INC2 905 STFPDUX c02, B, INC2 906 STFPDUX c06, B, INC2 907 STFPDUX c03, B, INC2 908 STFPDUX c07, B, INC2 909 STFPDUX c04, B, INC2 910 STFPDUX c08, B, INC2 911 912 STFPDUX c09, B, INC2 913 STFPDUX c13, B, INC2 914 STFPDUX c10, B, INC2 915 STFPDUX c14, B, INC2 916 STFPDUX c11, B, INC2 917 STFPDUX c15, B, INC2 918 STFPDUX c12, B, INC2 919 STFPDUX c16, B, INC2 920 bdnz .L122 921 .align 4 922 923.L125: 924 andi. r0, M, 7 925 ble .L130 926 927 andi. r0, M, 4 928 beq .L126 929 930 LFDUX c01, AO1, INC 931 LFDUX c02, AO1, INC 932 LFDUX c03, AO1, INC 933 LFDUX c04, AO1, INC 934 935 LFSDUX c01, AO2, INC 936 LFSDUX c02, AO2, INC 937 LFSDUX c03, AO2, INC 938 LFSDUX c04, AO2, INC 939 940 LFDUX c05, AO3, INC 941 LFDUX c06, AO3, INC 942 LFDUX c07, AO3, INC 943 LFDUX c08, AO3, INC 944 945 LFSDUX c05, AO4, INC 946 LFSDUX c06, AO4, INC 947 LFSDUX c07, AO4, INC 948 LFSDUX c08, AO4, INC 949 950 STFPDUX c01, B, INC2 951 STFPDUX c05, B, INC2 952 STFPDUX c02, B, INC2 953 STFPDUX c06, B, INC2 954 STFPDUX c03, B, INC2 955 STFPDUX c07, B, INC2 956 STFPDUX c04, B, INC2 957 STFPDUX c08, B, INC2 958 .align 4 959 960.L126: 961 andi. r0, M, 2 962 beq .L127 963 964 LFDUX c01, AO1, INC 965 LFDUX c02, AO1, INC 966 967 LFSDUX c01, AO2, INC 968 LFSDUX c02, AO2, INC 969 970 LFDUX c05, AO3, INC 971 LFDUX c06, AO3, INC 972 973 LFSDUX c05, AO4, INC 974 LFSDUX c06, AO4, INC 975 976 STFPDUX c01, B, INC2 977 STFPDUX c05, B, INC2 978 STFPDUX c02, B, INC2 979 STFPDUX c06, B, INC2 980 .align 4 981 982.L127: 983 andi. r0, M, 1 984 beq .L130 985 986 LFDUX c01, AO1, INC 987 LFDUX c05, AO3, INC 988 989 nop 990 nop 991 992 LFSDUX c01, AO2, INC 993 LFSDUX c05, AO4, INC 994 995 STFPDUX c01, B, INC2 996 STFPDUX c05, B, INC2 997 .align 4 998 999 1000.L130: 1001 andi. J, N, 2 1002 ble .L140 1003 1004 mr AO1, A 1005 add AO2, A, LDA 1006 add A, AO2, LDA 1007 1008 srawi. r0, M, 3 1009 mtspr CTR, r0 1010 ble .L135 1011 .align 4 1012 1013.L132: 1014 LFDUX c01, AO1, INC 1015 LFDUX c02, AO1, INC 1016 LFDUX c03, AO1, INC 1017 LFDUX c04, AO1, INC 1018 1019 LFDUX c09, AO1, INC 1020 LFDUX c10, AO1, INC 1021 LFDUX c11, AO1, INC 1022 LFDUX c12, AO1, INC 1023 1024 LFSDUX c01, AO2, INC 1025 LFSDUX c02, AO2, INC 1026 LFSDUX c03, AO2, INC 1027 LFSDUX c04, AO2, INC 1028 1029 LFSDUX c09, AO2, INC 1030 LFSDUX c10, AO2, INC 1031 LFSDUX c11, AO2, INC 1032 LFSDUX c12, AO2, INC 1033 1034 STFPDUX c01, B, INC2 1035 STFPDUX c02, B, INC2 1036 STFPDUX c03, B, INC2 1037 STFPDUX c04, B, INC2 1038 1039 STFPDUX c09, B, INC2 1040 STFPDUX c10, B, INC2 1041 STFPDUX c11, B, INC2 1042 STFPDUX c12, B, INC2 1043 bdnz .L132 1044 .align 4 1045 1046.L135: 1047 andi. r0, M, 7 1048 ble .L140 1049 1050 andi. r0, M, 4 1051 beq .L136 1052 1053 LFDUX c01, AO1, INC 1054 LFDUX c02, AO1, INC 1055 LFDUX c03, AO1, INC 1056 LFDUX c04, AO1, INC 1057 1058 LFSDUX c01, AO2, INC 1059 LFSDUX c02, AO2, INC 1060 LFSDUX c03, AO2, INC 1061 LFSDUX c04, AO2, INC 1062 1063 STFPDUX c01, B, INC2 1064 STFPDUX c02, B, INC2 1065 STFPDUX c03, B, INC2 1066 STFPDUX c04, B, INC2 1067 .align 4 1068 1069.L136: 1070 andi. r0, M, 2 1071 beq .L137 1072 1073 LFDUX c01, AO1, INC 1074 LFDUX c02, AO1, INC 1075 1076 LFSDUX c01, AO2, INC 1077 LFSDUX c02, AO2, INC 1078 1079 STFPDUX c01, B, INC2 1080 STFPDUX c02, B, INC2 1081 .align 4 1082 1083.L137: 1084 andi. r0, M, 1 1085 beq .L140 1086 1087 LFDUX c01, AO1, INC 1088 LFDUX c02, AO2, INC 1089 1090 fsmfp c01, c02 1091 STFPDUX c01, B, INC2 1092 .align 4 1093 1094.L140: 1095 andi. J, N, 1 1096 ble .L999 1097 1098 mr AO1, A 1099 1100 srawi. r0, M, 3 1101 mtspr CTR, r0 1102 ble .L145 1103 .align 4 1104 1105.L142: 1106 LFDUX c01, AO1, INC 1107 LFDUX c02, AO1, INC 1108 LFDUX c03, AO1, INC 1109 LFDUX c04, AO1, INC 1110 1111 LFDUX c05, AO1, INC 1112 LFDUX c06, AO1, INC 1113 LFDUX c07, AO1, INC 1114 LFDUX c08, AO1, INC 1115 1116 fsmfp c01, c02 1117 fsmfp c03, c04 1118 fsmfp c05, c06 1119 fsmfp c07, c08 1120 1121 STFPDUX c01, B, INC2 1122 STFPDUX c03, B, INC2 1123 STFPDUX c05, B, INC2 1124 STFPDUX c07, B, INC2 1125 bdnz .L142 1126 .align 4 1127 1128.L145: 1129 andi. r0, M, 7 1130 ble .L999 1131 1132 andi. r0, M, 4 1133 beq .L146 1134 1135 LFDUX c01, AO1, INC 1136 LFDUX c02, AO1, INC 1137 LFDUX c03, AO1, INC 1138 LFDUX c04, AO1, INC 1139 1140 fsmfp c01, c02 1141 fsmfp c03, c04 1142 1143 STFPDUX c01, B, INC2 1144 STFPDUX c03, B, INC2 1145 .align 4 1146 1147.L146: 1148 andi. r0, M, 2 1149 beq .L147 1150 1151 LFDUX c01, AO1, INC 1152 LFDUX c02, AO1, INC 1153 1154 fsmfp c01, c02 1155 STFPDUX c01, B, INC2 1156 .align 4 1157 1158.L147: 1159 andi. r0, M, 1 1160 beq .L999 1161 1162 LFDX c01, AO1, INC 1163 STFDX c01, B, INC2 1164 .align 4 1165 1166.L999: 1167 addi SP, SP, 4 1168 1169 lwzu r26, 4(SP) 1170 lwzu r27, 4(SP) 1171 1172 lwzu r28, 4(SP) 1173 lwzu r29, 4(SP) 1174 lwzu r30, 4(SP) 1175 lwzu r31, 4(SP) 1176 1177 subi SP, SP, 12 1178 li r0, 16 1179 1180 lfpdux f31, SP, r0 1181 lfpdux f30, SP, r0 1182 lfpdux f29, SP, r0 1183 lfpdux f28, SP, r0 1184 lfpdux f27, SP, r0 1185 lfpdux f26, SP, r0 1186 lfpdux f25, SP, r0 1187 lfpdux f24, SP, r0 1188 lfpdux f23, SP, r0 1189 lfpdux f22, SP, r0 1190 lfpdux f21, SP, r0 1191 lfpdux f20, SP, r0 1192 lfpdux f19, SP, r0 1193 lfpdux f18, SP, r0 1194 lfpdux f17, SP, r0 1195 lfpdux f16, SP, r0 1196 lfpdux f15, SP, r0 1197 lfpdux f14, SP, r0 1198 addi SP, SP, 16 1199 blr 1200 EPILOGUE 1201