1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#ifdef XDOUBLE 43#define PREFETCH_SIZE ( 8 * 8 + 4) 44#elif defined(DOUBLE) 45#define PREFETCH_SIZE (16 * 8 + 8) 46#else 47#define PREFETCH_SIZE (32 * 8 + 16) 48#endif 49 50#define N r32 51#define X1 r33 52#define INCX r34 53#define Y1 r35 54#define INCY r36 55 56#define PREX r2 57#define PREY r3 58 59#define I r14 60#define J r15 61#define Y2 r16 62#define X2 r17 63 64#define INCX16 r18 65#define INCY16 r19 66 67#define PR r30 68#define ARLC r31 69 70#define C f8 71#define S f9 72 73 PROLOGUE 74 .prologue 75 PROFCODE 76 { .mmi 77 adds r29 = 16, r12 78 add INCX = INCX, INCX 79 .save ar.lc, ARLC 80 mov ARLC = ar.lc 81 } 82 { .mib 83 cmp.lt p0, p6 = r0, N 84 shr I = N, 3 85 (p6) br.ret.spnt.many b0 86 } 87 ;; 88 .body 89 { .mmi 90#ifdef XDOUBLE 91 LDFD S = [r29] 92#else 93 nop __LINE__ 94#endif 95 add INCY = INCY, INCY 96 mov PR = pr 97 } 98 { .mmi 99 mov X2 = X1 100 mov Y2 = Y1 101 mov pr.rot= 0 102 } 103 ;; 104 { .mmi 105 shladd INCX = INCX, BASE_SHIFT, r0 106 shladd INCY = INCY, BASE_SHIFT, r0 107 mov ar.ec= 3 108 } 109 { .mmi 110 adds I = -1, I 111 cmp.eq p16, p0 = r0, r0 112 and J = 7, N 113 } 114 ;; 115 { .mmi 116#ifndef XDOUBLE 117 shladd INCX16 = INCX, 3, r0 118 shladd INCY16 = INCY, 3, r0 119#else 120 shladd INCX16 = INCX, 2, r0 121 shladd INCY16 = INCY, 2, r0 122#endif 123 nop __LINE__ 124 } 125 { .mmi 126 adds INCX = -SIZE, INCX 127 adds INCY = -SIZE, INCY 128 nop __LINE__ 129 } 130 ;; 131 { .mmi 132 adds PREX = PREFETCH_SIZE * SIZE, X1 133 adds PREY = PREFETCH_SIZE * SIZE, Y1 134 mov ar.lc = I 135 } 136 { .mib 137 cmp.eq p6 ,p0 = -1, I 138 tbit.z p0, p12 = N, 2 139 (p6) br.cond.dpnt .L15 140 } 141 ;; 142 .align 32 143 144.L12: 145 { .mmf 146 (p19) STFD [Y2] = f15 147 (p16) lfetch.excl.nt1 [PREX], INCX16 148 (p18) FMPY f15 = C, f91 149 } 150 { .mmf 151 (p16) LDFD f32 = [X1], SIZE 152 (p19) add Y2 = Y2, INCY 153 (p18) FNMA f11 = S, f37, f11 154 } 155 ;; 156 { .mmf 157 (p18) STFD [X2] = f6 158 (p16) lfetch.excl.nt1 [PREY], INCY16 159 (p18) FMA f12 = C, f40, f12 160 } 161 { .mmf 162 (p17) LDFD f114 = [Y1], INCY 163 (p18) adds X2 = SIZE, X2 164 (p18) FMPY f6 = S, f94 165 } 166 ;; 167 { .mmf 168 (p18) STFD [Y2] = f7 169 (p16) LDFD f35 = [X1], INCX 170 (p18) FNMA f13 = S, f40, f13 171 } 172 { .mmf 173 nop __LINE__ 174 (p18) adds Y2 = SIZE, Y2 175 (p18) FMPY f7 = C, f94 176 } 177 ;; 178 { .mmf 179 (p18) STFD [X2] = f10 180 (p17) LDFD f117 = [Y1], SIZE 181 (p18) FMA f14 = C, f43, f14 182 } 183 { .mmf 184 (p18) add X2 = X2, INCX 185 nop __LINE__ 186 (p18) FMPY f10 = S, f97 187 } 188 ;; 189 { .mmf 190 (p18) STFD [Y2] = f11 191 (p16) LDFD f38 = [X1], SIZE 192 (p18) FNMA f15 = S, f43, f15 193 } 194 { .mmf 195 (p18) add Y2 = Y2, INCY 196 nop __LINE__ 197 (p18) FMPY f11 = C, f97 198 } 199 ;; 200 { .mmf 201 (p18) STFD [X2] = f12 202 (p17) LDFD f120 = [Y1], INCY 203 (p18) FMPY f12 = S, f100 204 } 205 { .mmf 206 (p18) adds X2 = SIZE, X2 207 nop __LINE__ 208 (p18) FMA f6 = C, f46, f6 209 } 210 ;; 211 { .mmf 212 (p18) STFD [Y2] = f13 213 (p16) LDFD f41 = [X1], INCX 214 (p18) FMPY f13 = C, f100 215 } 216 { .mmf 217 (p18) adds Y2 = SIZE, Y2 218 nop __LINE__ 219 (p18) FNMA f7 = S, f46, f7 220 } 221 ;; 222 { .mmf 223 (p18) STFD [X2] = f14 224 (p17) LDFD f123 = [Y1], SIZE 225 (p18) FMPY f14 = S, f103 226 } 227 { .mmf 228 (p18) add X2 = X2, INCX 229 nop __LINE__ 230 (p18) FMA f10 = C, f49, f10 231 } 232 ;; 233 { .mmf 234 (p18) STFD [Y2] = f15 235 (p16) LDFD f44 = [X1], SIZE 236 (p18) FMPY f15 = C, f103 237 } 238 { .mmf 239 (p18) add Y2 = Y2, INCY 240 nop __LINE__ 241 (p18) FNMA f11 = S, f49, f11 242 } 243 ;; 244 { .mmf 245 (p18) STFD [X2] = f6 246 (p17) LDFD f126 = [Y1], INCY 247 (p18) FMA f12 = C, f52, f12 248 } 249 { .mmf 250 (p18) adds X2 = SIZE, X2 251 nop __LINE__ 252 (p18) FMPY f6 = S, f106 253 } 254 ;; 255 { .mmf 256 (p18) STFD [Y2] = f7 257 (p16) LDFD f47 = [X1], INCX 258 (p18) FNMA f13 = S, f52, f13 259 } 260 { .mmf 261 (p18) adds Y2 = SIZE, Y2 262 nop __LINE__ 263 (p18) FMPY f7 = C, f106 264 } 265 ;; 266 { .mmf 267 (p18) STFD [X2] = f10 268 (p16) LDFD f80 = [Y1], SIZE 269 (p18) FMA f14 = C, f55, f14 270 } 271 { .mmf 272 (p18) add X2 = X2, INCX 273 nop __LINE__ 274 (p18) FMPY f10 = S, f109 275 } 276 ;; 277 { .mmf 278 (p18) STFD [Y2] = f11 279 (p16) LDFD f50 = [X1], SIZE 280 (p18) FNMA f15 = S, f55, f15 281 } 282 { .mmf 283 (p18) add Y2 = Y2, INCY 284 nop __LINE__ 285 (p18) FMPY f11 = C, f109 286 } 287 ;; 288 { .mmf 289 (p18) STFD [X2] = f12 290 (p16) LDFD f83 = [Y1], INCY 291 (p18) FMPY f12 = S, f112 292 } 293 { .mmf 294 (p18) adds X2 = SIZE, X2 295 nop __LINE__ 296 (p18) FMA f6 = C, f58, f6 297 } 298 ;; 299 { .mmf 300 (p18) STFD [Y2] = f13 301 (p16) LDFD f53 = [X1], INCX 302 (p18) FMPY f13 = C, f112 303 } 304 { .mmf 305 (p18) adds Y2 = SIZE, Y2 306 nop __LINE__ 307 (p18) FNMA f7 = S, f58, f7 308 } 309 ;; 310 { .mmf 311 (p18) STFD [X2] = f14 312 (p16) LDFD f86 = [Y1], SIZE 313 (p18) FMPY f14 = S, f115 314 } 315 { .mmf 316 (p18) add X2 = X2, INCX 317 nop __LINE__ 318 (p18) FMA f10 = C, f61, f10 319 } 320 ;; 321 { .mmf 322 (p18) STFD [Y2] = f15 323 (p16) LDFD f56 = [X1], SIZE 324 (p18) FMPY f15 = C, f115 325 } 326 { .mmf 327 (p18) add Y2 = Y2, INCY 328 nop __LINE__ 329 (p18) FNMA f11 = S, f61, f11 330 } 331 ;; 332#ifndef XDOUBLE 333 { .mmf 334 (p18) STFD [X2] = f6 335 (p16) LDFD f89 = [Y1], INCY 336 (p18) FMA f12 = C, f64, f12 337 } 338 { .mmf 339 (p18) adds X2 = SIZE, X2 340 nop __LINE__ 341 (p18) FMPY f6 = S, f118 342 } 343 ;; 344 { .mmf 345 (p18) STFD [Y2] = f7 346 (p16) LDFD f59 = [X1], INCX 347 (p18) FNMA f13 = S, f64, f13 348 } 349 { .mmf 350 (p18) adds Y2 = SIZE, Y2 351 nop __LINE__ 352 (p18) FMPY f7 = C, f118 353 } 354 ;; 355#else 356 { .mmf 357 (p18) STFD [X2] = f6 358 (p16) lfetch.excl.nt1 [PREY], INCY16 359 (p18) FMA f12 = C, f64, f12 360 } 361 { .mmf 362 (p16) LDFD f89 = [Y1], INCY 363 (p18) adds X2 = SIZE, X2 364 (p18) FMPY f6 = S, f118 365 } 366 ;; 367 { .mmf 368 (p18) STFD [Y2] = f7 369 (p16) lfetch.excl.nt1 [PREX], INCX16 370 (p18) FNMA f13 = S, f64, f13 371 } 372 { .mmf 373 (p16) LDFD f59 = [X1], INCX 374 (p18) adds Y2 = SIZE, Y2 375 (p18) FMPY f7 = C, f118 376 } 377 ;; 378#endif 379 { .mmf 380 (p18) STFD [X2] = f10 381 (p16) LDFD f92 = [Y1], SIZE 382 (p18) FMA f14 = C, f67, f14 383 } 384 { .mmf 385 (p18) add X2 = X2, INCX 386 nop __LINE__ 387 (p18) FMPY f10 = S, f121 388 } 389 ;; 390 { .mmf 391 (p18) STFD [Y2] = f11 392 (p16) LDFD f62 = [X1], SIZE 393 (p18) FNMA f15 = S, f67, f15 394 } 395 { .mmf 396 (p18) add Y2 = Y2, INCY 397 nop __LINE__ 398 (p18) FMPY f11 = C, f121 399 } 400 ;; 401 { .mmf 402 (p18) STFD [X2] = f12 403 (p16) LDFD f95 = [Y1], INCY 404 (p18) FMPY f12 = S, f124 405 } 406 { .mmf 407 (p18) adds X2 = SIZE, X2 408 nop __LINE__ 409 (p18) FMA f6 = C, f70, f6 410 } 411 ;; 412 { .mmf 413 (p18) STFD [Y2] = f13 414 (p16) LDFD f65 = [X1], INCX 415 (p18) FMPY f13 = C, f124 416 } 417 { .mmf 418 (p18) adds Y2 = SIZE, Y2 419 nop __LINE__ 420 (p18) FNMA f7 = S, f70, f7 421 } 422 ;; 423 { .mmf 424 (p18) STFD [X2] = f14 425 (p16) LDFD f98 = [Y1], SIZE 426 (p18) FMPY f14 = S, f127 427 } 428 { .mmf 429 (p18) add X2 = X2, INCX 430 nop __LINE__ 431 (p18) FMA f10 = C, f73, f10 432 } 433 ;; 434 { .mmf 435 (p18) STFD [Y2] = f15 436 (p16) LDFD f68 = [X1], SIZE 437 (p18) FMPY f15 = C, f127 438 } 439 { .mmf 440 (p18) add Y2 = Y2, INCY 441 nop __LINE__ 442 (p18) FNMA f11 = S, f73, f11 443 } 444 ;; 445 { .mmf 446 (p18) STFD [X2] = f6 447 (p16) LDFD f101 = [Y1], INCY 448 (p18) FMA f12 = C, f76, f12 449 } 450 { .mmf 451 (p18) adds X2 = SIZE, X2 452 nop __LINE__ 453 (p17) FMPY f6 = S, f81 454 } 455 ;; 456 { .mmf 457 (p18) STFD [Y2] = f7 458 (p16) LDFD f71 = [X1], INCX 459 (p18) FNMA f13 = S, f76, f13 460 } 461 { .mmf 462 (p18) adds Y2 = SIZE, Y2 463 nop __LINE__ 464 (p17) FMPY f7 = C, f81 465 } 466 ;; 467 { .mmf 468 (p18) STFD [X2] = f10 469 (p16) LDFD f104 = [Y1], SIZE 470 (p18) FMA f14 = C, f79, f14 471 } 472 { .mmf 473 (p18) add X2 = X2, INCX 474 nop __LINE__ 475 (p17) FMPY f10 = S, f84 476 } 477 ;; 478 { .mmf 479 (p18) STFD [Y2] = f11 480 (p16) LDFD f74 = [X1], SIZE 481 (p18) FNMA f15 = S, f79, f15 482 } 483 { .mmf 484 (p18) add Y2 = Y2, INCY 485 nop __LINE__ 486 (p17) FMPY f11 = C, f84 487 } 488 ;; 489 { .mmf 490 (p18) STFD [X2] = f12 491 (p16) LDFD f107 = [Y1], INCY 492 (p17) FMPY f12 = S, f87 493 } 494 { .mmf 495 (p18) adds X2 = SIZE, X2 496 nop __LINE__ 497 (p17) FMA f6 = C, f33, f6 498 } 499 ;; 500 { .mmf 501 (p18) STFD [Y2] = f13 502 (p16) LDFD f77 = [X1], INCX 503 (p17) FMPY f13 = C, f87 504 } 505 { .mmf 506 (p18) adds Y2 = SIZE, Y2 507 nop __LINE__ 508 (p17) FNMA f7 = S, f33, f7 509 } 510 ;; 511 { .mmf 512 (p18) STFD [X2] = f14 513 (p16) LDFD f110 = [Y1], SIZE 514 (p17) FMPY f14 = S, f90 515 } 516 { .mfb 517 (p18) add X2 = X2, INCX 518 (p17) FMA f10 = C, f36, f10 519 br.ctop.sptk.few .L12 520 } 521 ;; 522 { .mmi 523 (p19) STFD [Y2] = f15 524 (p19) add Y2 = Y2, INCY 525 nop __LINE__ 526 } 527 { .mmi 528 nop __LINE__ 529 nop __LINE__ 530 nop __LINE__ 531 } 532 ;; 533 .align 32 534 535.L15: 536 { .mmi 537 (p12) LDFD f40 = [Y1], SIZE 538 (p12) LDFD f32 = [X1], SIZE 539 mov ar.lc = ARLC 540 } 541 ;; 542 { .mmi 543 (p12) LDFD f41 = [Y1], INCY 544 (p12) LDFD f33 = [X1], INCX 545 mov pr = PR, -65474 546 } 547 ;; 548 { .mmb 549 (p12) LDFD f42 = [Y1], SIZE 550 cmp.eq p7, p0 = r0, J 551 (p7) br.ret.sptk.many b0 552 } 553 ;; 554 { .mmf 555 (p12) LDFD f43 = [Y1], INCY 556 nop __LINE__ 557 (p12) FMPY f6 = S, f40 558 } 559 ;; 560 { .mmf 561 (p12) LDFD f34 = [X1], SIZE 562 nop __LINE__ 563 (p12) FMPY f7 = C, f40 564 } 565 ;; 566 { .mmf 567 (p12) LDFD f44 = [Y1], SIZE 568 nop __LINE__ 569 (p12) FMPY f10 = S, f41 570 } 571 ;; 572 { .mmf 573 (p12) LDFD f35 = [X1], INCX 574 nop __LINE__ 575 (p12) FMPY f11 = C, f41 576 } 577 ;; 578 { .mmf 579 (p12) LDFD f45 = [Y1], INCY 580 nop __LINE__ 581 (p12) FMPY f12 = S, f42 582 } 583 { .mmf 584 nop __LINE__ 585 nop __LINE__ 586 (p12) FMA f6 = C, f32, f6 587 } 588 ;; 589 { .mmf 590 (p12) LDFD f36 = [X1], SIZE 591 nop __LINE__ 592 (p12) FMPY f13 = C, f42 593 } 594 { .mmf 595 nop __LINE__ 596 nop __LINE__ 597 (p12) FNMA f7 = S, f32, f7 598 } 599 ;; 600 { .mmf 601 (p12) LDFD f46 = [Y1], SIZE 602 nop __LINE__ 603 (p12) FMPY f14 = S, f43 604 } 605 { .mmf 606 nop __LINE__ 607 nop __LINE__ 608 (p12) FMA f10 = C, f33, f10 609 } 610 ;; 611 { .mmf 612 (p12) LDFD f37 = [X1], INCX 613 nop __LINE__ 614 (p12) FMPY f15 = C, f43 615 } 616 { .mmf 617 nop __LINE__ 618 nop __LINE__ 619 (p12) FNMA f11 = S, f33, f11 620 } 621 ;; 622 { .mmf 623 (p12) STFD [X2] = f6, SIZE 624 (p12) LDFD f47 = [Y1], INCY 625 (p12) FMA f12 = C, f34, f12 626 } 627 { .mfi 628 nop __LINE__ 629 (p12) FMPY f6 = S, f44 630 tbit.z p0, p13 = N, 1 631 } 632 ;; 633 { .mmf 634 (p12) STFD [Y2] = f7, SIZE 635 (p12) LDFD f38 = [X1], SIZE 636 (p12) FNMA f13 = S, f34, f13 637 } 638 { .mmf 639 nop __LINE__ 640 nop __LINE__ 641 (p12) FMPY f7 = C, f44 642 } 643 ;; 644 { .mmf 645 (p12) STFD [X2] = f10 646 (p13) LDFD f52 = [Y1], SIZE 647 (p12) FMA f14 = C, f35, f14 648 } 649 { .mmf 650 (p12) add X2 = X2, INCX 651 nop __LINE__ 652 (p12) FMPY f10 = S, f45 653 } 654 ;; 655 { .mmf 656 (p12) STFD [Y2] = f11 657 (p12) LDFD f39 = [X1], INCX 658 (p12) FNMA f15 = S, f35, f15 659 } 660 { .mmf 661 (p12) add Y2 = Y2, INCY 662 nop __LINE__ 663 (p12) FMPY f11 = C, f45 664 } 665 ;; 666 { .mmf 667 (p12) STFD [X2] = f12, SIZE 668 (p13) LDFD f53 = [Y1], INCY 669 (p12) FMPY f12 = S, f46 670 } 671 { .mmf 672 nop __LINE__ 673 nop __LINE__ 674 (p12) FMA f6 = C, f36, f6 675 } 676 ;; 677 { .mmf 678 (p12) STFD [Y2] = f13, SIZE 679 (p13) LDFD f48 = [X1], SIZE 680 (p12) FMPY f13 = C, f46 681 } 682 { .mmf 683 nop __LINE__ 684 nop __LINE__ 685 (p12) FNMA f7 = S, f36, f7 686 } 687 ;; 688 { .mmf 689 (p12) STFD [X2] = f14 690 (p13) LDFD f54 = [Y1], SIZE 691 (p12) FMPY f14 = S, f47 692 } 693 { .mmf 694 (p12) add X2 = X2, INCX 695 nop __LINE__ 696 (p12) FMA f10 = C, f37, f10 697 } 698 ;; 699 { .mmf 700 (p12) STFD [Y2] = f15 701 (p13) LDFD f49 = [X1], INCX 702 (p12) FMPY f15 = C, f47 703 } 704 { .mfi 705 (p12) add Y2 = Y2, INCY 706 (p12) FNMA f11 = S, f37, f11 707 tbit.z p0, p14 = N, 0 708 } 709 ;; 710 { .mmf 711 (p12) STFD [X2] = f6, SIZE 712 (p13) LDFD f55 = [Y1], INCY 713 (p12) FMA f12 = C, f38, f12 714 } 715 { .mmf 716 nop __LINE__ 717 nop __LINE__ 718 (p13) FMPY f6 = S, f52 719 } 720 ;; 721 { .mmf 722 (p12) STFD [Y2] = f7, SIZE 723 (p13) LDFD f50 = [X1], SIZE 724 (p12) FNMA f13 = S, f38, f13 725 } 726 { .mmf 727 nop __LINE__ 728 nop __LINE__ 729 (p13) FMPY f7 = C, f52 730 } 731 ;; 732 { .mmf 733 (p12) STFD [X2] = f10 734 (p14) LDFD f58 = [Y1], SIZE 735 (p12) FMA f14 = C, f39, f14 736 } 737 { .mmf 738 (p12) add X2 = X2, INCX 739 nop __LINE__ 740 (p13) FMPY f10 = S, f53 741 } 742 ;; 743 { .mmf 744 (p12) STFD [Y2] = f11 745 (p13) LDFD f51 = [X1], INCX 746 (p12) FNMA f15 = S, f39, f15 747 } 748 { .mmf 749 (p12) add Y2 = Y2, INCY 750 nop __LINE__ 751 (p13) FMPY f11 = C, f53 752 } 753 ;; 754 { .mmf 755 (p12) STFD [X2] = f12, SIZE 756 (p14) LDFD f59 = [Y1], INCY 757 (p13) FMPY f12 = S, f54 758 } 759 { .mmf 760 nop __LINE__ 761 nop __LINE__ 762 (p13) FMA f6 = C, f48, f6 763 } 764 ;; 765 { .mmf 766 (p12) STFD [Y2] = f13, SIZE 767 (p14) LDFD f56 = [X1], SIZE 768 (p13) FMPY f13 = C, f54 769 } 770 { .mmf 771 nop __LINE__ 772 nop __LINE__ 773 (p13) FNMA f7 = S, f48, f7 774 } 775 ;; 776 { .mmf 777 (p12) STFD [X2] = f14 778 (p12) add X2 = X2, INCX 779 (p13) FMPY f14 = S, f55 780 } 781 { .mmf 782 nop __LINE__ 783 nop __LINE__ 784 (p13) FMA f10 = C, f49, f10 785 } 786 ;; 787 { .mmf 788 (p12) STFD [Y2] = f15 789 (p14) LDFD f57 = [X1], INCX 790 (p13) FMPY f15 = C, f55 791 } 792 { .mmf 793 (p12) add Y2 = Y2, INCY 794 nop __LINE__ 795 (p13) FNMA f11 = S, f49, f11 796 } 797 ;; 798 { .mmf 799 (p13) STFD [X2] = f6, SIZE 800 nop __LINE__ 801 (p13) FMA f12 = C, f50, f12 802 } 803 { .mmf 804 nop __LINE__ 805 nop __LINE__ 806 (p14) FMPY f6 = S, f58 807 } 808 ;; 809 { .mmf 810 (p13) STFD [Y2] = f7, SIZE 811 nop __LINE__ 812 (p13) FNMA f13 = S, f50, f13 813 } 814 { .mmf 815 nop __LINE__ 816 nop __LINE__ 817 (p14) FMPY f7 = C, f58 818 } 819 ;; 820 { .mmf 821 (p13) STFD [X2] = f10 822 (p13) add X2 = X2, INCX 823 (p13) FMA f14 = C, f51, f14 824 } 825 { .mmf 826 nop __LINE__ 827 nop __LINE__ 828 (p14) FMPY f10 = S, f59 829 } 830 ;; 831 { .mmf 832 (p13) STFD [Y2] = f11 833 (p13) add Y2 = Y2, INCY 834 (p13) FNMA f15 = S, f51, f15 835 } 836 { .mmf 837 nop __LINE__ 838 nop __LINE__ 839 (p14) FMPY f11 = C, f59 840 } 841 ;; 842 { .mmf 843 (p13) STFD [X2] = f12, SIZE 844 nop __LINE__ 845 (p14) FMA f6 = C, f56, f6 846 } 847 ;; 848 { .mmf 849 (p13) STFD [Y2] = f13, SIZE 850 nop __LINE__ 851 (p14) FNMA f7 = S, f56, f7 852 } 853 ;; 854 { .mmf 855 (p13) STFD [X2] = f14 856 (p13) add X2 = X2, INCX 857 (p14) FMA f10 = C, f57, f10 858 } 859 ;; 860 { .mmf 861 (p13) STFD [Y2] = f15 862 (p13) add Y2 = Y2, INCY 863 (p14) FNMA f11 = S, f57, f11 864 } 865 ;; 866 { .mmi 867 (p14) STFD [X2] = f6, SIZE 868 (p14) STFD [Y2] = f7, SIZE 869 nop __LINE__ 870 } 871 ;; 872 { .mmb 873 (p14) STFD [X2] = f10 874 (p14) STFD [Y2] = f11 875 br.ret.sptk.many b0 876 } 877 ;; 878 EPILOGUE 879 880