1/* k6opt.s vector functions optimized for MMX extensions to x86 2 * 3 * Copyright (C) 1999 by Stanley J. Brooks <stabro@megsinet.net> 4 * 5 * Any use of this software is permitted provided that this notice is not 6 * removed and that neither the authors nor the Technische Universitaet Berlin 7 * are deemed to have made any representations as to the suitability of this 8 * software for any purpose nor are held responsible for any defects of 9 * this software. THERE IS ABSOLUTELY NO WARRANTY FOR THIS SOFTWARE; 10 * not even the implied warranty of MERCHANTABILITY or FITNESS FOR 11 * A PARTICULAR PURPOSE. 12 * 13 * Chicago, 03.12.1999 14 * Stanley J. Brooks 15 */ 16 17 .file "k6opt.s" 18 .version "01.01" 19/* gcc2_compiled.: */ 20.section .rodata 21 .align 4 22 .type coefs,@object 23 .size coefs,24 24coefs: 25 .value -134 26 .value -374 27 .value 0 28 .value 2054 29 .value 5741 30 .value 8192 31 .value 5741 32 .value 2054 33 .value 0 34 .value -374 35 .value -134 36 .value 0 37.text 38 .align 4 39/* void Weighting_filter (const short *e, short *x) */ 40.globl Weighting_filter 41 .type Weighting_filter,@function 42Weighting_filter: 43 pushl %ebp 44 movl %esp,%ebp 45 pushl %edi 46 pushl %esi 47 pushl %ebx 48 movl 12(%ebp),%edi 49 movl 8(%ebp),%ebx 50 addl $-10,%ebx 51 emms 52 movl $0x1000,%eax; movd %eax,%mm5 /* for rounding */ 53 movq coefs,%mm1 54 movq coefs+8,%mm2 55 movq coefs+16,%mm3 56 xorl %esi,%esi 57 .p2align 2 58.L21: 59 movq (%ebx,%esi,2),%mm0 60 pmaddwd %mm1,%mm0 61 62 movq 8(%ebx,%esi,2),%mm4 63 pmaddwd %mm2,%mm4 64 paddd %mm4,%mm0 65 66 movq 16(%ebx,%esi,2),%mm4 67 pmaddwd %mm3,%mm4 68 paddd %mm4,%mm0 69 70 movq %mm0,%mm4 71 punpckhdq %mm0,%mm4 /* mm4 has high int32 of mm0 dup'd */ 72 paddd %mm4,%mm0; 73 74 paddd %mm5,%mm0 /* add for roundoff */ 75 psrad $13,%mm0 76 packssdw %mm0,%mm0 77 movd %mm0,%eax /* ax has result */ 78 movw %ax,(%edi,%esi,2) 79 incl %esi 80 cmpl $39,%esi 81 jle .L21 82 emms 83 popl %ebx 84 popl %esi 85 popl %edi 86 leave 87 ret 88.Lfe1: 89 .size Weighting_filter,.Lfe1-Weighting_filter 90 91.macro ccstep n 92.if \n 93 movq \n(%edi),%mm1 94 movq \n(%esi),%mm2 95.else 96 movq (%edi),%mm1 97 movq (%esi),%mm2 98.endif 99 pmaddwd %mm2,%mm1 100 paddd %mm1,%mm0 101.endm 102 103 .align 4 104/* long k6maxcc(const short *wt, const short *dp, short *Nc_out) */ 105.globl k6maxcc 106 .type k6maxcc,@function 107k6maxcc: 108 pushl %ebp 109 movl %esp,%ebp 110 pushl %edi 111 pushl %esi 112 pushl %ebx 113 emms 114 movl 8(%ebp),%edi 115 movl 12(%ebp),%esi 116 movl $0,%edx /* will be maximum inner-product */ 117 movl $40,%ebx 118 movl %ebx,%ecx /* will be index of max inner-product */ 119 subl $80,%esi 120 .p2align 2 121.L41: 122 movq (%edi),%mm0 123 movq (%esi),%mm2 124 pmaddwd %mm2,%mm0 125 ccstep 8 126 ccstep 16 127 ccstep 24 128 ccstep 32 129 ccstep 40 130 ccstep 48 131 ccstep 56 132 ccstep 64 133 ccstep 72 134 135 movq %mm0,%mm1 136 punpckhdq %mm0,%mm1 /* mm1 has high int32 of mm0 dup'd */ 137 paddd %mm1,%mm0; 138 movd %mm0,%eax /* eax has result */ 139 140 cmpl %edx,%eax 141 jle .L40 142 movl %eax,%edx 143 movl %ebx,%ecx 144 .p2align 2 145.L40: 146 subl $2,%esi 147 incl %ebx 148 cmpl $120,%ebx 149 jle .L41 150 movl 16(%ebp),%eax 151 movw %cx,(%eax) 152 movl %edx,%eax 153 emms 154 popl %ebx 155 popl %esi 156 popl %edi 157 leave 158 ret 159.Lfe2: 160 .size k6maxcc,.Lfe2-k6maxcc 161 162 163 .align 4 164/* long k6iprod (const short *p, const short *q, int n) */ 165.globl k6iprod 166 .type k6iprod,@function 167k6iprod: 168 pushl %ebp 169 movl %esp,%ebp 170 pushl %edi 171 pushl %esi 172 emms 173 pxor %mm0,%mm0 174 movl 8(%ebp),%esi 175 movl 12(%ebp),%edi 176 movl 16(%ebp),%eax 177 leal -32(%esi,%eax,2),%edx /* edx = top - 32 */ 178 179 cmpl %edx,%esi; ja .L202 180 181 .p2align 2 182.L201: 183 ccstep 0 184 ccstep 8 185 ccstep 16 186 ccstep 24 187 188 addl $32,%esi 189 addl $32,%edi 190 cmpl %edx,%esi; jbe .L201 191 192 .p2align 2 193.L202: 194 addl $24,%edx /* now edx = top-8 */ 195 cmpl %edx,%esi; ja .L205 196 197 .p2align 2 198.L203: 199 ccstep 0 200 201 addl $8,%esi 202 addl $8,%edi 203 cmpl %edx,%esi; jbe .L203 204 205 .p2align 2 206.L205: 207 addl $4,%edx /* now edx = top-4 */ 208 cmpl %edx,%esi; ja .L207 209 210 movd (%edi),%mm1 211 movd (%esi),%mm2 212 pmaddwd %mm2,%mm1 213 paddd %mm1,%mm0 214 215 addl $4,%esi 216 addl $4,%edi 217 218 .p2align 2 219.L207: 220 addl $2,%edx /* now edx = top-2 */ 221 cmpl %edx,%esi; ja .L209 222 223 movswl (%edi),%eax 224 movd %eax,%mm1 225 movswl (%esi),%eax 226 movd %eax,%mm2 227 pmaddwd %mm2,%mm1 228 paddd %mm1,%mm0 229 230 .p2align 2 231.L209: 232 movq %mm0,%mm1 233 punpckhdq %mm0,%mm1 /* mm1 has high int32 of mm0 dup'd */ 234 paddd %mm1,%mm0; 235 movd %mm0,%eax /* eax has result */ 236 237 emms 238 popl %esi 239 popl %edi 240 leave 241 ret 242.Lfe3: 243 .size k6iprod,.Lfe3-k6iprod 244 245 246 .align 4 247/* void k6vsraw P3((short *p, int n, int bits) */ 248.globl k6vsraw 249 .type k6vsraw,@function 250k6vsraw: 251 pushl %ebp 252 movl %esp,%ebp 253 pushl %esi 254 movl 8(%ebp),%esi 255 movl 16(%ebp),%ecx 256 andl %ecx,%ecx; jle .L399 257 movl 12(%ebp),%eax 258 leal -16(%esi,%eax,2),%edx /* edx = top - 16 */ 259 emms 260 movd %ecx,%mm3 261 movq ones,%mm2 262 psllw %mm3,%mm2; psrlw $1,%mm2 263 cmpl %edx,%esi; ja .L306 264 265 .p2align 2 266.L302: /* 8 words per iteration */ 267 movq (%esi),%mm0 268 movq 8(%esi),%mm1 269 paddsw %mm2,%mm0 270 psraw %mm3,%mm0; 271 paddsw %mm2,%mm1 272 psraw %mm3,%mm1; 273 movq %mm0,(%esi) 274 movq %mm1,8(%esi) 275 addl $16,%esi 276 cmpl %edx,%esi 277 jbe .L302 278 279 .p2align 2 280.L306: 281 addl $12,%edx /* now edx = top-4 */ 282 cmpl %edx,%esi; ja .L310 283 284 .p2align 2 285.L308: /* do up to 6 words, two at a time */ 286 movd (%esi),%mm0 287 paddsw %mm2,%mm0 288 psraw %mm3,%mm0; 289 movd %mm0,(%esi) 290 addl $4,%esi 291 cmpl %edx,%esi 292 jbe .L308 293 294 .p2align 2 295.L310: 296 addl $2,%edx /* now edx = top-2 */ 297 cmpl %edx,%esi; ja .L315 298 299 movzwl (%esi),%eax 300 movd %eax,%mm0 301 paddsw %mm2,%mm0 302 psraw %mm3,%mm0; 303 movd %mm0,%eax 304 movw %ax,(%esi) 305 306 .p2align 2 307.L315: 308 emms 309.L399: 310 popl %esi 311 leave 312 ret 313.Lfe4: 314 .size k6vsraw,.Lfe4-k6vsraw 315 316 .align 4 317/* void k6vsllw P3((short *p, int n, int bits) */ 318.globl k6vsllw 319 .type k6vsllw,@function 320k6vsllw: 321 pushl %ebp 322 movl %esp,%ebp 323 pushl %esi 324 movl 8(%ebp),%esi 325 movl 16(%ebp),%ecx 326 andl %ecx,%ecx; jle .L499 327 movl 12(%ebp),%eax 328 leal -16(%esi,%eax,2),%edx /* edx = top - 16 */ 329 emms 330 movd %ecx,%mm3 331 cmpl %edx,%esi; ja .L406 332 333 .p2align 2 334.L402: /* 8 words per iteration */ 335 movq (%esi),%mm0 336 movq 8(%esi),%mm1 337 psllw %mm3,%mm0; 338 psllw %mm3,%mm1; 339 movq %mm0,(%esi) 340 movq %mm1,8(%esi) 341 addl $16,%esi 342 cmpl %edx,%esi 343 jbe .L402 344 345 .p2align 2 346.L406: 347 addl $12,%edx /* now edx = top-4 */ 348 cmpl %edx,%esi; ja .L410 349 350 .p2align 2 351.L408: /* do up to 6 words, two at a time */ 352 movd (%esi),%mm0 353 psllw %mm3,%mm0; 354 movd %mm0,(%esi) 355 addl $4,%esi 356 cmpl %edx,%esi 357 jbe .L408 358 359 .p2align 2 360.L410: 361 addl $2,%edx /* now edx = top-2 */ 362 cmpl %edx,%esi; ja .L415 363 364 movzwl (%esi),%eax 365 movd %eax,%mm0 366 psllw %mm3,%mm0; 367 movd %mm0,%eax 368 movw %ax,(%esi) 369 370 .p2align 2 371.L415: 372 emms 373.L499: 374 popl %esi 375 leave 376 ret 377.Lfe5: 378 .size k6vsllw,.Lfe5-k6vsllw 379 380 381.section .rodata 382 .align 4 383 .type extremes,@object 384 .size extremes,8 385extremes: 386 .long 0x80008000 387 .long 0x7fff7fff 388 .type ones,@object 389 .size ones,8 390ones: 391 .long 0x00010001 392 .long 0x00010001 393 394.text 395 .align 4 396/* long k6maxmin (const short *p, int n, short *out) */ 397.globl k6maxmin 398 .type k6maxmin,@function 399k6maxmin: 400 pushl %ebp 401 movl %esp,%ebp 402 pushl %esi 403 emms 404 movl 8(%ebp),%esi 405 movl 12(%ebp),%eax 406 leal -8(%esi,%eax,2),%edx 407 408 cmpl %edx,%esi 409 jbe .L52 410 movd extremes,%mm0 411 movd extremes+4,%mm1 412 jmp .L58 413 414 .p2align 2 415.L52: 416 movq (%esi),%mm0 /* mm0 will be max's */ 417 movq %mm0,%mm1 /* mm1 will be min's */ 418 addl $8,%esi 419 cmpl %edx,%esi 420 ja .L56 421 422 .p2align 2 423.L54: 424 movq (%esi),%mm2 425 426 movq %mm2,%mm3 427 pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */ 428 movq %mm3,%mm4 429 pand %mm2,%mm3 /* mm3 is mm2 masked to new max's */ 430 pandn %mm0,%mm4 /* mm4 is mm0 masked to its max's */ 431 por %mm3,%mm4 432 movq %mm4,%mm0 /* now mm0 is updated max's */ 433 434 movq %mm1,%mm3 435 pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */ 436 pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */ 437 pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */ 438 por %mm3,%mm2 439 movq %mm2,%mm1 /* now mm1 is updated min's */ 440 441 addl $8,%esi 442 cmpl %edx,%esi 443 jbe .L54 444 445 .p2align 2 446.L56: /* merge down the 4-word max/mins to lower 2 words */ 447 448 movq %mm0,%mm2 449 psrlq $32,%mm2 450 movq %mm2,%mm3 451 pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */ 452 pand %mm3,%mm2 /* mm2 is mm2 masked to new max's */ 453 pandn %mm0,%mm3 /* mm3 is mm0 masked to its max's */ 454 por %mm3,%mm2 455 movq %mm2,%mm0 /* now mm0 is updated max's */ 456 457 movq %mm1,%mm2 458 psrlq $32,%mm2 459 movq %mm1,%mm3 460 pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */ 461 pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */ 462 pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */ 463 por %mm3,%mm2 464 movq %mm2,%mm1 /* now mm1 is updated min's */ 465 466 .p2align 2 467.L58: 468 addl $4,%edx /* now dx = top-4 */ 469 cmpl %edx,%esi 470 ja .L62 471 /* here, there are >= 2 words of input remaining */ 472 movd (%esi),%mm2 473 474 movq %mm2,%mm3 475 pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */ 476 movq %mm3,%mm4 477 pand %mm2,%mm3 /* mm3 is mm2 masked to new max's */ 478 pandn %mm0,%mm4 /* mm4 is mm0 masked to its max's */ 479 por %mm3,%mm4 480 movq %mm4,%mm0 /* now mm0 is updated max's */ 481 482 movq %mm1,%mm3 483 pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */ 484 pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */ 485 pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */ 486 por %mm3,%mm2 487 movq %mm2,%mm1 /* now mm1 is updated min's */ 488 489 addl $4,%esi 490 491 .p2align 2 492.L62: 493 /* merge down the 2-word max/mins to 1 word */ 494 495 movq %mm0,%mm2 496 psrlq $16,%mm2 497 movq %mm2,%mm3 498 pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */ 499 pand %mm3,%mm2 /* mm2 is mm2 masked to new max's */ 500 pandn %mm0,%mm3 /* mm3 is mm0 masked to its max's */ 501 por %mm3,%mm2 502 movd %mm2,%ecx /* cx is max so far */ 503 504 movq %mm1,%mm2 505 psrlq $16,%mm2 506 movq %mm1,%mm3 507 pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */ 508 pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */ 509 pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */ 510 por %mm3,%mm2 511 movd %mm2,%eax /* ax is min so far */ 512 513 addl $2,%edx /* now dx = top-2 */ 514 cmpl %edx,%esi 515 ja .L65 516 517 /* here, there is one word of input left */ 518 cmpw (%esi),%cx 519 jge .L64 520 movw (%esi),%cx 521 .p2align 2 522.L64: 523 cmpw (%esi),%ax 524 jle .L65 525 movw (%esi),%ax 526 527 .p2align 2 528.L65: /* (finally!) cx is the max, ax the min */ 529 movswl %cx,%ecx 530 movswl %ax,%eax 531 532 movl 16(%ebp),%edx /* ptr to output max,min vals */ 533 andl %edx,%edx; jz .L77 534 movw %cx,(%edx) /* max */ 535 movw %ax,2(%edx) /* min */ 536 .p2align 2 537.L77: 538 /* now calculate max absolute val */ 539 negl %eax 540 cmpl %ecx,%eax 541 jge .L81 542 movl %ecx,%eax 543 .p2align 2 544.L81: 545 emms 546 popl %esi 547 leave 548 ret 549.Lfe6: 550 .size k6maxmin,.Lfe6-k6maxmin 551 552/* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */ 553 .equiv pm_u0,8 554 .equiv pm_rp0,12 555 .equiv pm_kn,16 556 .equiv pm_s,20 557 .equiv lv_u_top,-4 558 .equiv lv_s_top,-8 559 .equiv lv_rp,-40 /* local version of rp0 with each word twice */ 560 .align 4 561.globl Short_term_analysis_filteringx 562 .type Short_term_analysis_filteringx,@function 563Short_term_analysis_filteringx: 564 pushl %ebp 565 movl %esp,%ebp 566 subl $40,%esp 567 pushl %edi 568 pushl %esi 569 570 movl pm_rp0(%ebp),%esi; 571 leal lv_rp(%ebp),%edi; 572 cld 573 lodsw; stosw; stosw 574 lodsw; stosw; stosw 575 lodsw; stosw; stosw 576 lodsw; stosw; stosw 577 lodsw; stosw; stosw 578 lodsw; stosw; stosw 579 lodsw; stosw; stosw 580 lodsw; stosw; stosw 581 emms 582 movl $0x4000,%eax; 583 movd %eax,%mm4; 584 punpckldq %mm4,%mm4 /* (0x00004000,0x00004000) for rounding dword product pairs */ 585 586 movl pm_u0(%ebp),%eax 587 addl $16,%eax 588 movl %eax,lv_u_top(%ebp) /* UTOP */ 589 movl pm_s(%ebp),%edx /* edx is local s ptr throughout below */ 590 movl pm_kn(%ebp),%eax 591 leal (%edx,%eax,2),%eax 592 movl %eax,lv_s_top(%ebp) 593 cmpl %eax,%edx 594 jae .L179 595 .p2align 2 596.L181: 597 leal lv_rp(%ebp),%esi /* RP */ 598 movl pm_u0(%ebp),%edi /* U */ 599 movw (%edx),%ax /* (0,DI) */ 600 roll $16,%eax 601 movw (%edx),%ax /* (DI,DI) */ 602 .p2align 2 603.L185: /* RP is %esi */ 604 movl %eax,%ecx 605 movw (%edi),%ax /* (DI,U) */ 606 movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */ 607 movw %cx,(%edi) 608 609 movd %eax,%mm2 /* mm2 is (0,0,DI,U) */ 610 rorl $16,%eax 611 movd %eax,%mm1 /* mm1 is (0,0,U,DI) */ 612 613 movq %mm1,%mm0 614 pmullw %mm3,%mm0 615 pmulhw %mm3,%mm1 616 punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */ 617 paddd %mm4,%mm0 /* mm4 is 0x00004000,0x00004000 */ 618 psrad $15,%mm0 /* (RP*U,RP*DI) adjusted */ 619 packssdw %mm0,%mm0 /* (*,*,RP*U,RP*DI) adjusted and saturated to word */ 620 paddsw %mm2,%mm0 /* mm0 is (?,?, DI', U') */ 621 movd %mm0,%eax /* (DI,U') */ 622 623 addl $2,%edi 624 addl $4,%esi 625 cmpl lv_u_top(%ebp),%edi 626 jb .L185 627 628 rorl $16,%eax 629 movw %ax,(%edx) /* last DI goes to *s */ 630 addl $2,%edx /* next s */ 631 cmpl lv_s_top(%ebp),%edx 632 jb .L181 633 .p2align 2 634.L179: 635 emms 636 popl %esi 637 popl %edi 638 leave 639 ret 640.Lfe7: 641 .size Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx 642 643.end 644 645/* 'as' macro's seem to be case-insensitive */ 646.macro STEP n 647.if \n 648 movd \n(%esi),%mm3 /* mm3 is (0,0,RP,RP) */ 649.else 650 movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */ 651.endif 652 movq %mm5,%mm1; 653 movd %mm4,%ecx; movw %cx,%ax /* (DI,U) */ 654 psllq $48,%mm1; psrlq $16,%mm4; por %mm1,%mm4 655 psllq $48,%mm0; psrlq $16,%mm5; por %mm0,%mm5 656 657 movd %eax,%mm2 /* mm2 is (0,0,DI,U) */ 658 rorl $16,%eax 659 movd %eax,%mm1 /* mm1 is (0,0,U,DI) */ 660 661 movq %mm1,%mm0 662 pmullw %mm3,%mm0 663 pmulhw %mm3,%mm1 664 punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */ 665 paddd %mm6,%mm0 /* mm6 is 0x00004000,0x00004000 */ 666 psrad $15,%mm0 /* (RP*U,RP*DI) adjusted */ 667 packssdw %mm0,%mm0 /* (*,*,RP*U,RP*DI) adjusted and saturated to word */ 668 paddsw %mm2,%mm0 /* mm0 is (?,?, DI', U') */ 669 movd %mm0,%eax /* (DI,U') */ 670.endm 671 672/* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */ 673 .equiv pm_u0,8 674 .equiv pm_rp0,12 675 .equiv pm_kn,16 676 .equiv pm_s,20 677 .equiv lv_rp_top,-4 678 .equiv lv_s_top,-8 679 .equiv lv_rp,-40 /* local version of rp0 with each word twice */ 680 .align 4 681.globl Short_term_analysis_filteringx 682 .type Short_term_analysis_filteringx,@function 683Short_term_analysis_filteringx: 684 pushl %ebp 685 movl %esp,%ebp 686 subl $56,%esp 687 pushl %edi 688 pushl %esi 689 pushl %ebx 690 691 movl pm_rp0(%ebp),%esi; 692 leal lv_rp(%ebp),%edi; 693 cld 694 lodsw; stosw; stosw 695 lodsw; stosw; stosw 696 lodsw; stosw; stosw 697 lodsw; stosw; stosw 698 lodsw; stosw; stosw 699 lodsw; stosw; stosw 700 lodsw; stosw; stosw 701 lodsw; stosw; stosw 702 movl %edi,lv_rp_top(%ebp) 703 emms 704 705 movl $0x4000,%eax; 706 movd %eax,%mm6; 707 punpckldq %mm6,%mm6 /* (0x00004000,0x00004000) for rounding dword product pairs */ 708 709 movl pm_u0(%ebp),%ebx 710 movq (%ebx),%mm4; movq 8(%ebx),%mm5 /* the 8 u's */ 711 movl pm_s(%ebp),%edx /* edx is local s ptr throughout below */ 712 movl pm_kn(%ebp),%eax 713 leal (%edx,%eax,2),%eax 714 movl %eax,lv_s_top(%ebp) 715 cmpl %eax,%edx 716 jae .L179 717 .p2align 2 718.L181: 719 leal lv_rp(%ebp),%esi /* RP */ 720 movw (%edx),%ax /* (0,DI) */ 721 roll $16,%eax 722 movw (%edx),%ax /* (DI,DI) */ 723 movd %eax,%mm0 724 .p2align 2 725.L185: /* RP is %esi */ 726 step 0 727 step 4 728 step 8 729 step 12 730/* 731 step 16 732 step 20 733 step 24 734 step 28 735*/ 736 addl $16,%esi 737 cmpl lv_rp_top(%ebp),%esi 738 jb .L185 739 740 rorl $16,%eax 741 movw %ax,(%edx) /* last DI goes to *s */ 742 addl $2,%edx /* next s */ 743 cmpl lv_s_top(%ebp),%edx 744 jb .L181 745.L179: 746 movq %mm4,(%ebx); movq %mm5,8(%ebx) /* the 8 u's */ 747 emms 748 popl %ebx 749 popl %esi 750 popl %edi 751 leave 752 ret 753.Lfe7: 754 .size Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx 755 .ident "GCC: (GNU) 2.95.2 19991109 (Debian GNU/Linux)" 756