1/*********************************************************************/ 2/* Copyright 2009, 2010 The University of Texas at Austin. */ 3/* All rights reserved. */ 4/* */ 5/* Redistribution and use in source and binary forms, with or */ 6/* without modification, are permitted provided that the following */ 7/* conditions are met: */ 8/* */ 9/* 1. Redistributions of source code must retain the above */ 10/* copyright notice, this list of conditions and the following */ 11/* disclaimer. */ 12/* */ 13/* 2. Redistributions in binary form must reproduce the above */ 14/* copyright notice, this list of conditions and the following */ 15/* disclaimer in the documentation and/or other materials */ 16/* provided with the distribution. */ 17/* */ 18/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31/* POSSIBILITY OF SUCH DAMAGE. */ 32/* */ 33/* The views and conclusions contained in the software and */ 34/* documentation are those of the authors and should not be */ 35/* interpreted as representing official policies, either expressed */ 36/* or implied, of The University of Texas at Austin. */ 37/*********************************************************************/ 38 39#define ASSEMBLER 40#include "common.h" 41 42#ifdef XDOUBLE 43#define PREFETCH_SIZE ( 8 * 16) 44#elif defined(DOUBLE) 45#define PREFETCH_SIZE (16 * 16) 46#else 47#define PREFETCH_SIZE (32 * 16) 48#endif 49 50#ifndef CONJ 51#define FMA1 FNMA 52#define FMA2 FMA 53#else 54#define FMA1 FMA 55#define FMA2 FNMA 56#endif 57 58#define SP r12 59 60#ifdef XDOUBLE 61#define N r32 62#define X1 r14 63#define INCX r15 64#define Y1 r16 65#define INCY r17 66#else 67#define N r32 68#define X1 r37 69#define INCX r38 70#define Y1 r39 71#define INCY r36 72#endif 73 74#define PREX1 r2 75#define PREY1 r3 76 77#define I r18 78#define J r19 79#define Y2 r20 80#define X2 r21 81#define INCX8 r22 82#define INCY8 r23 83#define YY1 r24 84#define YY2 r25 85#define YY3 r26 86#define YY4 r27 87 88#define INCX2M1 loc0 89#define INCY2M1 loc1 90#define INCX4M1 loc2 91#define INCY4M1 loc3 92#define X3 loc4 93#define Y3 loc5 94#define X4 loc6 95#define Y4 loc7 96#define PREX2 loc8 97#define PREY2 loc9 98 99#define ARLC r29 100#define PR r30 101 102#define ALPHA_R f8 103#define ALPHA_I f9 104 105 PROLOGUE 106 .prologue 107 PROFCODE 108 109 { .mmi 110 adds r14 = 16, SP 111 adds r15 = 24, SP 112 adds r16 = 32, SP 113 } 114 { .mmb 115 adds r17 = 40, SP 116 cmp.gt p15, p0 = r0, N 117 (p15) br.ret.sptk.many b0 118 } 119 ;; 120#ifdef XDOUBLE 121 { .mmi 122 ld8 X1 = [r14] 123 ld8 INCX = [r15] 124 nop __LINE__ 125 } 126 { .mmi 127 ld8 Y1 = [r16] 128 ld8 INCY = [r17] 129 nop __LINE__ 130 } 131 ;; 132#else 133 { .mmi 134 ld8 INCY = [r14] 135 nop __LINE__ 136 nop __LINE__ 137 } 138 ;; 139#endif 140 { .mmi 141 .save ar.pfs, r10 142 alloc r10 = ar.pfs, 8, 16, 0, 0 143 and J = 7, N 144 shl INCX = INCX, ZBASE_SHIFT 145 } 146 { .mmi 147 adds PREX1 = (PREFETCH_SIZE + 2) * SIZE, X1 148 adds PREY1 = (PREFETCH_SIZE + 2) * SIZE, Y1 149 shl INCY = INCY, ZBASE_SHIFT 150 } 151 ;; 152 { .mmi 153 shladd INCX8 = INCX, 3, r0 154 shladd INCY8 = INCY, 3, r0 155 .save ar.lc, ARLC 156 mov ARLC = ar.lc 157 } 158 { .mmi 159 adds INCX2M1 = -SIZE, INCX 160 adds INCY2M1 = -SIZE, INCY 161 shr I = N, 3 162 } 163 ;; 164 { .mmi 165 add INCX2M1 = INCX2M1, INCX 166 add INCY2M1 = INCY2M1, INCY 167 mov PR = pr 168 } 169 { .mmi 170 add X2 = X1, INCX 171 add Y2 = Y1, INCY 172 nop __LINE__ 173 } 174 ;; 175 { .mmi 176 shladd INCX4M1 = INCX, 1, INCX2M1 177 shladd INCY4M1 = INCY, 1, INCY2M1 178 mov pr.rot= 0 179 } 180 { .mmi 181 shladd X3 = INCX, 1, X1 182 shladd Y3 = INCY, 1, Y1 183 } 184 ;; 185 { .mmi 186 shladd X4 = INCX, 1, X2 187 shladd Y4 = INCY, 1, Y2 188 adds I = -1, I 189 } 190 { .mmi 191 cmp.eq p16, p0 = r0, r0 192 and r8 = 127, Y1 193 and PREX1 = -128, PREX1 194 } 195 ;; 196 { .mmi 197 mov YY1 = Y1 198 mov YY2 = Y2 199 mov ar.ec = 3 200 } 201 { .mmi 202 mov YY3 = Y3 203 mov YY4 = Y4 204 or PREX1 = PREX1, r8 205 } 206 ;; 207 { .mmi 208 shladd PREX2 = INCX, 2, PREX1 209 shladd PREY2 = INCY, 2, PREY1 210 mov ar.lc = I 211 } 212 { .mib 213 cmp.eq p11 ,p0 = -1, I 214 tbit.z p0, p13 = N, 2 215 (p11) br.cond.dpnt .L25 216 } 217 ;; 218 .align 32 219 220.L22: 221#ifdef XDOUBLE 222 { .mmf 223 (p16) LDFD f80 = [Y1], 1 * SIZE 224 (p16) LDFD f83 = [Y2], 1 * SIZE 225 (p18) FMA1 f82 = ALPHA_I, f40, f82 226 } 227 { .mmf 228 (p16) LDFD f92 = [Y3], 1 * SIZE 229 (p16) LDFD f95 = [Y4], 1 * SIZE 230 (p18) FMA1 f85 = ALPHA_I, f43, f85 231 } 232 ;; 233 { .mmf 234 (p16) LDFD f86 = [Y1], INCY4M1 235 (p16) LDFD f89 = [Y2], INCY4M1 236 (p18) FMA1 f94 = ALPHA_I, f52, f94 237 } 238 { .mmf 239 (p16) LDFD f98 = [Y3], INCY4M1 240 (p16) LDFD f101 = [Y4], INCY4M1 241 (p18) FMA1 f97 = ALPHA_I, f55, f97 242 } 243 ;; 244 { .mmf 245 (p16) LDFD f32 = [X1], 1 * SIZE 246 (p16) LDFD f35 = [X2], 1 * SIZE 247 (p18) FMA f88 = ALPHA_I, f34, f88 248 } 249 { .mmf 250 (p16) LDFD f44 = [X3], 1 * SIZE 251 (p16) LDFD f47 = [X4], 1 * SIZE 252 (p18) FMA f91 = ALPHA_I, f37, f91 253 } 254 ;; 255 { .mmf 256 (p16) LDFD f38 = [X1], INCX4M1 257 (p16) LDFD f41 = [X2], INCX4M1 258 (p18) FMA f100 = ALPHA_I, f46, f100 259 } 260 { .mmf 261 (p16) LDFD f50 = [X3], INCX4M1 262 (p16) LDFD f53 = [X4], INCX4M1 263 (p18) FMA f103 = ALPHA_I, f49, f103 264 } 265 ;; 266 { .mmf 267 (p18) STFD [YY1] = f82, 1 * SIZE 268 (p18) STFD [YY2] = f85, 1 * SIZE 269 (p18) FMA f106 = ALPHA_R, f58, f106 270 } 271 { .mmf 272 (p19) add YY3 = YY3, INCY4M1 273 (p19) add YY4 = YY4, INCY4M1 274 (p18) FMA f109 = ALPHA_R, f61, f109 275 } 276 ;; 277 { .mmf 278 (p18) STFD [YY3] = f94, 1 * SIZE 279 (p18) STFD [YY4] = f97, 1 * SIZE 280 (p18) FMA f118 = ALPHA_R, f70, f118 281 } 282 { .mmf 283 (p16) lfetch.excl.nt1 [PREY1], INCY8 284 (p16) lfetch.excl.nt1 [PREY2], INCY8 285 (p18) FMA f121 = ALPHA_R, f73, f121 286 } 287 ;; 288 { .mmf 289 (p18) STFD [YY1] = f88 290 (p18) STFD [YY2] = f91 291 (p18) FMA2 f112 = ALPHA_R, f64, f112 292 } 293 { .mmf 294 (p18) add YY1 = YY1, INCY4M1 295 (p18) add YY2 = YY2, INCY4M1 296 (p18) FMA2 f115 = ALPHA_R, f67, f115 297 } 298 ;; 299 { .mmf 300 (p18) STFD [YY3] = f100 301 (p18) STFD [YY4] = f103 302 (p18) FMA2 f124 = ALPHA_R, f76, f124 303 } 304 { .mmf 305 (p18) add YY3 = YY3, INCY4M1 306 (p18) add YY4 = YY4, INCY4M1 307 (p18) FMA2 f127 = ALPHA_R, f79, f127 308 } 309 ;; 310 { .mmf 311 (p16) LDFD f104 = [Y1], 1 * SIZE 312 (p16) LDFD f107 = [Y2], 1 * SIZE 313 (p18) FMA1 f106 = ALPHA_I, f64, f106 314 } 315 { .mmf 316 (p16) LDFD f116 = [Y3], 1 * SIZE 317 (p16) LDFD f119 = [Y4], 1 * SIZE 318 (p18) FMA1 f109 = ALPHA_I, f67, f109 319 } 320 ;; 321 { .mmf 322 (p16) LDFD f110 = [Y1], INCY4M1 323 (p16) LDFD f113 = [Y2], INCY4M1 324 (p18) FMA1 f118 = ALPHA_I, f76, f118 325 } 326 { .mmf 327 (p16) LDFD f122 = [Y3], INCY4M1 328 (p16) LDFD f125 = [Y4], INCY4M1 329 (p18) FMA1 f121 = ALPHA_I, f79, f121 330 } 331 ;; 332 { .mmf 333 (p16) LDFD f56 = [X1], 1 * SIZE 334 (p16) LDFD f59 = [X2], 1 * SIZE 335 (p18) FMA f112 = ALPHA_I, f58, f112 336 } 337 { .mmf 338 (p16) LDFD f68 = [X3], 1 * SIZE 339 (p16) LDFD f71 = [X4], 1 * SIZE 340 (p18) FMA f115 = ALPHA_I, f61, f115 341 } 342 ;; 343 { .mmf 344 (p16) LDFD f62 = [X1], INCX4M1 345 (p16) LDFD f65 = [X2], INCX4M1 346 (p18) FMA f124 = ALPHA_I, f70, f124 347 } 348 { .mmf 349 (p16) LDFD f74 = [X3], INCX4M1 350 (p16) LDFD f77 = [X4], INCX4M1 351 (p18) FMA f127 = ALPHA_I, f73, f127 352 } 353 ;; 354 { .mmf 355 (p18) STFD [YY1] = f106, 1 * SIZE 356 (p18) STFD [YY2] = f109, 1 * SIZE 357 (p17) FMA f81 = ALPHA_R, f33, f81 358 } 359 { .mmf 360 nop __LINE__ 361 nop __LINE__ 362 (p17) FMA f84 = ALPHA_R, f36, f84 363 } 364 ;; 365 { .mmf 366 (p18) STFD [YY3] = f118, 1 * SIZE 367 (p18) STFD [YY4] = f121, 1 * SIZE 368 (p17) FMA f93 = ALPHA_R, f45, f93 369 } 370 { .mmf 371 (p16) lfetch.nt1 [PREX1], INCX8 372 (p16) lfetch.nt1 [PREX2], INCX8 373 (p17) FMA f96 = ALPHA_R, f48, f96 374 } 375 ;; 376 { .mmf 377 (p18) STFD [YY1] = f112 378 (p18) STFD [YY2] = f115 379 (p17) FMA2 f87 = ALPHA_R, f39, f87 380 } 381 { .mmf 382 (p18) add YY1 = YY1, INCY4M1 383 (p18) add YY2 = YY2, INCY4M1 384 (p17) FMA2 f90 = ALPHA_R, f42, f90 385 } 386 ;; 387 { .mmf 388 (p18) STFD [YY3] = f124 389 (p18) STFD [YY4] = f127 390 (p17) FMA2 f99 = ALPHA_R, f51, f99 391 } 392 { .mfb 393 nop __LINE__ 394 (p17) FMA2 f102 = ALPHA_R, f54, f102 395 br.ctop.sptk.few .L22 396 } 397 ;; 398 ;; 399 (p19) add YY3 = YY3, INCY4M1 400 (p19) add YY4 = YY4, INCY4M1 401 ;; 402#else 403 { .mmf 404 (p19) STFD [YY3] = f125 405 (p19) STFD [YY4] = f32 406 (p18) FMA2 f100 = ALPHA_R, f52, f100 407 } 408 { .mmf 409 (p16) lfetch.excl.nt1 [PREY1], INCY8 410 nop __LINE__ 411 (p18) FMA2 f103 = ALPHA_R, f55, f103 412 } 413 ;; 414 { .mmf 415 (p16) LDFD f80 = [Y1], 1 * SIZE 416 (p16) LDFD f83 = [Y2], 1 * SIZE 417 (p18) FMA1 f82 = ALPHA_I, f40, f82 418 } 419 { .mmf 420 (p16) LDFD f92 = [Y3], 1 * SIZE 421 (p16) LDFD f95 = [Y4], 1 * SIZE 422 (p18) FMA1 f85 = ALPHA_I, f43, f85 423 } 424 ;; 425 { .mmf 426 (p16) LDFD f86 = [Y1], INCY4M1 427 (p16) LDFD f89 = [Y2], INCY4M1 428 (p18) FMA1 f94 = ALPHA_I, f52, f94 429 } 430 { .mmf 431 (p19) add YY3 = YY3, INCY4M1 432 (p19) add YY4 = YY4, INCY4M1 433 (p18) FMA1 f97 = ALPHA_I, f55, f97 434 } 435 ;; 436 { .mmf 437 (p16) LDFD f98 = [Y3], INCY4M1 438 (p16) LDFD f101 = [Y4], INCY4M1 439 (p18) FMA f88 = ALPHA_I, f34, f88 440 } 441 { .mmf 442 (p19) add YY1 = YY1, INCY4M1 443 (p19) add YY2 = YY2, INCY4M1 444 (p18) FMA f91 = ALPHA_I, f37, f91 445 } 446 ;; 447 { .mmf 448 (p16) LDFD f32 = [X1], 1 * SIZE 449 (p16) LDFD f35 = [X2], 1 * SIZE 450 (p18) FMA f100 = ALPHA_I, f46, f100 451 } 452 { .mmf 453 (p16) LDFD f44 = [X3], 1 * SIZE 454 (p16) LDFD f47 = [X4], 1 * SIZE 455 (p18) FMA f103 = ALPHA_I, f49, f103 456 } 457 ;; 458 { .mmf 459 (p18) STFD [YY1] = f82, 1 * SIZE 460 (p18) STFD [YY2] = f85, 1 * SIZE 461 (p18) FMA f106 = ALPHA_R, f58, f106 462 } 463 { .mmf 464 (p16) LDFD f38 = [X1], INCX4M1 465 (p16) LDFD f41 = [X2], INCX4M1 466 (p18) FMA f109 = ALPHA_R, f61, f109 467 } 468 ;; 469 { .mmf 470 (p18) STFD [YY3] = f94, 1 * SIZE 471 (p18) STFD [YY4] = f97, 1 * SIZE 472 (p18) FMA f118 = ALPHA_R, f70, f118 473 } 474 { .mmf 475 (p16) LDFD f50 = [X3], INCX4M1 476 (p16) LDFD f53 = [X4], INCX4M1 477 (p18) FMA f121 = ALPHA_R, f73, f121 478 } 479 ;; 480 { .mmf 481 (p18) STFD [YY1] = f88 482 (p18) STFD [YY2] = f91 483 (p18) FMA2 f112 = ALPHA_R, f64, f112 484 } 485 { .mmf 486 (p16) lfetch.nt1 [PREX1], INCX8 487 nop __LINE__ 488 (p18) FMA2 f115 = ALPHA_R, f67, f115 489 } 490 ;; 491 { .mmf 492 (p18) STFD [YY3] = f100 493 (p18) STFD [YY4] = f103 494 (p18) FMA2 f124 = ALPHA_R, f76, f124 495 } 496 { .mmf 497 (p16) LDFD f104 = [Y1], 1 * SIZE 498 (p16) LDFD f107 = [Y2], 1 * SIZE 499 (p18) FMA2 f127 = ALPHA_R, f79, f127 500 } 501 ;; 502 { .mmf 503 (p16) LDFD f116 = [Y3], 1 * SIZE 504 (p16) LDFD f119 = [Y4], 1 * SIZE 505 (p18) FMA1 f106 = ALPHA_I, f64, f106 506 } 507 { .mmf 508 (p18) add YY1 = YY1, INCY4M1 509 (p18) add YY2 = YY2, INCY4M1 510 (p18) FMA1 f109 = ALPHA_I, f67, f109 511 } 512 ;; 513 { .mmf 514 (p16) LDFD f110 = [Y1], INCY4M1 515 (p16) LDFD f113 = [Y2], INCY4M1 516 (p18) FMA1 f118 = ALPHA_I, f76, f118 517 } 518 { .mmf 519 (p18) add YY3 = YY3, INCY4M1 520 (p18) add YY4 = YY4, INCY4M1 521 (p18) FMA1 f121 = ALPHA_I, f79, f121 522 } 523 ;; 524 { .mmf 525 (p16) LDFD f122 = [Y3], INCY4M1 526 (p16) LDFD f125 = [Y4], INCY4M1 527 (p18) FMA f112 = ALPHA_I, f58, f112 528 } 529 { .mmf 530 nop __LINE__ 531 nop __LINE__ 532 (p18) FMA f115 = ALPHA_I, f61, f115 533 } 534 ;; 535 { .mmf 536 (p16) LDFD f56 = [X1], 1 * SIZE 537 (p16) LDFD f59 = [X2], 1 * SIZE 538 (p18) FMA f124 = ALPHA_I, f70, f124 539 } 540 { .mmf 541 (p16) LDFD f68 = [X3], 1 * SIZE 542 (p16) LDFD f71 = [X4], 1 * SIZE 543 (p18) FMA f127 = ALPHA_I, f73, f127 544 } 545 ;; 546 { .mmf 547 (p18) STFD [YY1] = f106, 1 * SIZE 548 (p18) STFD [YY2] = f109, 1 * SIZE 549 (p17) FMA f81 = ALPHA_R, f33, f81 550 } 551 { .mmf 552 (p16) LDFD f62 = [X1], INCX4M1 553 (p16) LDFD f65 = [X2], INCX4M1 554 (p17) FMA f84 = ALPHA_R, f36, f84 555 } 556 ;; 557 { .mmf 558 (p18) STFD [YY3] = f118, 1 * SIZE 559 (p18) STFD [YY4] = f121, 1 * SIZE 560 (p17) FMA f93 = ALPHA_R, f45, f93 561 } 562 { .mmf 563 (p16) LDFD f74 = [X3], INCX4M1 564 (p16) LDFD f77 = [X4], INCX4M1 565 (p17) FMA f96 = ALPHA_R, f48, f96 566 } 567 ;; 568 { .mmf 569 (p18) STFD [YY1] = f112 570 (p18) STFD [YY2] = f115 571 (p17) FMA2 f87 = ALPHA_R, f39, f87 572 } 573 { .mfb 574 nop __LINE__ 575 (p17) FMA2 f90 = ALPHA_R, f42, f90 576 br.ctop.sptk.few .L22 577 } 578 ;; 579 { .mmi 580 (p19) STFD [YY3] = f125 581 (p19) STFD [YY4] = f32 582 (p19) add YY1 = YY1, INCY4M1 583 } 584 { .mmi 585 (p19) add YY2 = YY2, INCY4M1 586 (p19) add YY3 = YY3, INCY4M1 587 (p19) add YY4 = YY4, INCY4M1 588 } 589 ;; 590#endif 591 .align 32 592 593.L25: 594 { .mmi 595 (p13) LDFD f32 = [X1], 1 * SIZE 596 (p13) LDFD f34 = [X2], 1 * SIZE 597 mov ar.lc = ARLC 598 } 599 { .mmi 600 (p13) LDFD f36 = [X3], 1 * SIZE 601 (p13) LDFD f38 = [X4], 1 * SIZE 602 cmp.eq p12, p0 = r0, J 603 } 604 ;; 605 { .mmi 606 (p13) LDFD f80 = [Y1], 1 * SIZE 607 (p13) LDFD f82 = [Y2], 1 * SIZE 608 mov pr = PR, -65474 609 } 610 { .mmb 611 (p13) LDFD f84 = [Y3], 1 * SIZE 612 (p13) LDFD f86 = [Y4], 1 * SIZE 613 (p12) br.ret.sptk.many b0 614 } 615 ;; 616 { .mmi 617 (p13) LDFD f33 = [X1], INCX4M1 618 (p13) LDFD f35 = [X2], INCX4M1 619 tbit.z p0, p14 = N, 1 620 } 621 { .mmi 622 (p13) LDFD f81 = [Y1], INCY4M1 623 (p13) LDFD f83 = [Y2], INCY4M1 624 nop __LINE__ 625 } 626 ;; 627 { .mmi 628 (p13) LDFD f37 = [X3], INCX4M1 629 (p13) LDFD f39 = [X4], INCX4M1 630 tbit.z p0, p15 = N, 0 631 } 632 { .mmi 633 (p13) LDFD f85 = [Y3], INCY4M1 634 (p13) LDFD f87 = [Y4], INCY4M1 635 nop __LINE__ 636 } 637 ;; 638 { .mmf 639 (p14) LDFD f40 = [X1], 1 * SIZE 640 (p14) LDFD f42 = [X2], 1 * SIZE 641 } 642 ;; 643 { .mmf 644 (p14) LDFD f88 = [Y1], 1 * SIZE 645 (p14) LDFD f90 = [Y2], 1 * SIZE 646 } 647 ;; 648 { .mmf 649 (p14) LDFD f41 = [X1], INCX2M1 650 (p14) LDFD f43 = [X2], INCX2M1 651 (p13) FMA f80 = ALPHA_R, f32, f80 652 } 653 { .mmf 654 nop __LINE__ 655 nop __LINE__ 656 (p13) FMA f82 = ALPHA_R, f34, f82 657 } 658 ;; 659 { .mmf 660 (p14) LDFD f89 = [Y1], INCY2M1 661 (p14) LDFD f91 = [Y2], INCY2M1 662 (p13) FMA f84 = ALPHA_R, f36, f84 663 } 664 { .mmf 665 nop __LINE__ 666 nop __LINE__ 667 (p13) FMA f86 = ALPHA_R, f38, f86 668 } 669 ;; 670 { .mmf 671 (p15) LDFD f44 = [X1], 1 * SIZE 672 (p15) LDFD f92 = [Y1], 1 * SIZE 673 (p13) FMA2 f81 = ALPHA_R, f33, f81 674 } 675 { .mmf 676 nop __LINE__ 677 nop __LINE__ 678 (p13) FMA2 f83 = ALPHA_R, f35, f83 679 } 680 ;; 681 { .mmf 682 (p15) LDFD f45 = [X1] 683 (p15) LDFD f93 = [Y1] 684 (p13) FMA2 f85 = ALPHA_R, f37, f85 685 } 686 { .mmf 687 nop __LINE__ 688 nop __LINE__ 689 (p13) FMA2 f87 = ALPHA_R, f39, f87 690 } 691 ;; 692 { .mmf 693 nop __LINE__ 694 nop __LINE__ 695 (p13) FMA1 f80 = ALPHA_I, f33, f80 696 } 697 { .mmf 698 nop __LINE__ 699 nop __LINE__ 700 (p13) FMA1 f82 = ALPHA_I, f35, f82 701 } 702 { .mmf 703 nop __LINE__ 704 nop __LINE__ 705 (p13) FMA1 f84 = ALPHA_I, f37, f84 706 } 707 { .mmf 708 nop __LINE__ 709 nop __LINE__ 710 (p13) FMA1 f86 = ALPHA_I, f39, f86 711 } 712 { .mmf 713 nop __LINE__ 714 nop __LINE__ 715 (p13) FMA f81 = ALPHA_I, f32, f81 716 } 717 { .mmf 718 nop __LINE__ 719 nop __LINE__ 720 (p13) FMA f83 = ALPHA_I, f34, f83 721 } 722 { .mmf 723 nop __LINE__ 724 nop __LINE__ 725 (p13) FMA f85 = ALPHA_I, f36, f85 726 } 727 { .mmf 728 nop __LINE__ 729 nop __LINE__ 730 (p13) FMA f87 = ALPHA_I, f38, f87 731 } 732 ;; 733 { .mmf 734 (p13) STFD [YY1] = f80, 1 * SIZE 735 (p13) STFD [YY2] = f82, 1 * SIZE 736 (p14) FMA f88 = ALPHA_R, f40, f88 737 } 738 { .mmf 739 nop __LINE__ 740 nop __LINE__ 741 (p14) FMA f90 = ALPHA_R, f42, f90 742 } 743 ;; 744 { .mmf 745 (p13) STFD [YY3] = f84, 1 * SIZE 746 (p13) STFD [YY4] = f86, 1 * SIZE 747 (p14) FMA2 f89 = ALPHA_R, f41, f89 748 } 749 { .mmf 750 nop __LINE__ 751 nop __LINE__ 752 (p14) FMA2 f91 = ALPHA_R, f43, f91 753 } 754 ;; 755 { .mmf 756 (p13) STFD [YY1] = f81 757 (p13) STFD [YY2] = f83 758 (p15) FMA f92 = ALPHA_R, f44, f92 759 } 760 { .mmf 761 (p13) add YY1 = YY1, INCY4M1 762 (p13) add YY2 = YY2, INCY4M1 763 (p15) FMA2 f93 = ALPHA_R, f45, f93 764 } 765 ;; 766 { .mmf 767 (p13) STFD [YY3] = f85 768 (p13) STFD [YY4] = f87 769 (p14) FMA1 f88 = ALPHA_I, f41, f88 770 } 771 { .mmf 772 (p13) add YY3 = YY3, INCY4M1 773 (p13) add YY4 = YY4, INCY4M1 774 (p14) FMA1 f90 = ALPHA_I, f43, f90 775 } 776 ;; 777 { .mmf 778 nop __LINE__ 779 nop __LINE__ 780 (p14) FMA f89 = ALPHA_I, f40, f89 781 } 782 { .mmf 783 nop __LINE__ 784 nop __LINE__ 785 (p14) FMA f91 = ALPHA_I, f42, f91 786 } 787 { .mmf 788 nop __LINE__ 789 nop __LINE__ 790 (p15) FMA1 f92 = ALPHA_I, f45, f92 791 } 792 { .mmf 793 nop __LINE__ 794 nop __LINE__ 795 (p15) FMA f93 = ALPHA_I, f44, f93 796 } 797 ;; 798 { .mmi 799 (p14) STFD [YY1] = f88, 1 * SIZE 800 (p14) STFD [YY2] = f90, 1 * SIZE 801 nop __LINE__ 802 } 803 ;; 804 { .mmi 805 (p14) STFD [YY1] = f89 806 (p14) STFD [YY2] = f91 807 (p14) add YY1 = YY1, INCY2M1 808 } 809 ;; 810 { .mmi 811 (p15) STFD [YY1] = f92, 1 * SIZE 812 nop __LINE__ 813 nop __LINE__ 814 } 815 ;; 816 { .mmb 817 (p15) STFD [YY1] = f93 818 nop __LINE__ 819 br.ret.sptk.many b0 820 } 821 ;; 822 EPILOGUE 823