1 // Warning, number of coefficients is hardcoded. 2 asm volatile( 3 //"emms\n\t" 4 "pushl %%ebp\n\t" 5 "pushl %%eax\n\t" 6 "pushl %%edi\n\t" 7 "pushl %%ecx\n\t" 8 9 "movl %%ecx,%%ebp\n\t" 10 "bigloop_mmx"FILTMMX_SKIP_ADD_STR":\n\t" 11 12 "pxor %%mm1,%%mm1\n\t" 13 "movl $32, %%ecx\n\t" 14 15 "frup_mmx"FILTMMX_SKIP_ADD_STR":\n\t" 16 17 /* 18 PMADDWD mm1,mm2/m64 ; 0F F5 /r [PENT,MMX] 19 20 dst[0-31] := (dst[0-15] * src[0-15]) 21 + (dst[16-31] * src[16-31]); 22 dst[32-63] := (dst[32-47] * src[32-47]) 23 + (dst[48-63] * src[48-63]); 24 25 */ 26 "movq (%%edi), %%mm0\n\t" 27 "pmaddwd (%%esi), %%mm0\n\t" 28 29 "movq 8(%%edi), %%mm2\n\t" 30 "pmaddwd 8(%%esi), %%mm2\n\t" 31 32 "movq 16(%%edi), %%mm3\n\t" 33 "pmaddwd 16(%%esi), %%mm3\n\t" 34 35 "movq 24(%%edi), %%mm4\n\t" 36 "pmaddwd 24(%%esi), %%mm4\n\t" 37 38 "addl $32, %%edi\n\t" 39 "addl $32, %%esi\n\t" 40 "paddd %%mm0, %%mm1\n\t" 41 "paddd %%mm2, %%mm1\n\t" 42 "decl %%ecx\n\t" 43 "paddd %%mm3, %%mm1\n\t" 44 "paddd %%mm4, %%mm1\n\t" 45 "jnz frup_mmx"FILTMMX_SKIP_ADD_STR"\n\t" 46 47 "movd %%mm1, (%%eax)\n\t" 48 "subl $1024, %%edi\n\t" 49 "subl $1024, %%esi\n\t" 50 51 "addl $4, %%eax\n\t" 52 "addl %%edx, %%edi\n\t" 53 "decl %%ebp\n\t" 54 "jnz bigloop_mmx"FILTMMX_SKIP_ADD_STR"\n\t" 55 56 "popl %%ecx\n\t" 57 "popl %%edi\n\t" 58 "popl %%eax\n\t" 59 "emms\n\t" 60 61 "convmmx"FILTMMX_SKIP_ADD_STR":\n\t" 62 63 "movl (%%eax), %%ebp\n\t" 64 "sarl $15, %%ebp\n\t" 65 #ifndef FILTMMX_SKIP_ADD 66 "addl $32767, %%ebp\n\t" 67 #endif 68 "movl %%ebp, (%%eax)\n\t" 69 70 "fildl (%%eax)\n\t" 71 "fstp (%%eax)\n\t" 72 "addl $4, %%eax\n\t" 73 "decl %%ecx\n\t" 74 "jnz convmmx"FILTMMX_SKIP_ADD_STR"\n\t" 75 "popl %%ebp\n\t" 76 : 77 : "D" (in), "S" (ff->coeffs_i16), "a" (flout), "d" (2*ff->mrratio), "c" (bigcount) 78 ); 79