1/* k6opt.s  vector functions optimized for MMX extensions to x86
2 *
3 * Copyright (C) 1999 by Stanley J. Brooks <stabro@megsinet.net>
4 *
5 * Any use of this software is permitted provided that this notice is not
6 * removed and that neither the authors nor the Technische Universitaet Berlin
7 * are deemed to have made any representations as to the suitability of this
8 * software for any purpose nor are held responsible for any defects of
9 * this software.  THERE IS ABSOLUTELY NO WARRANTY FOR THIS SOFTWARE;
10 * not even the implied warranty of MERCHANTABILITY or FITNESS FOR
11 * A PARTICULAR PURPOSE.
12 *
13 * Chicago, 03.12.1999
14 * Stanley J. Brooks
15 */
16
17	.file	"k6opt.s"
18	.version	"01.01"
19/* gcc2_compiled.: */
20.section	.rodata
21	.align 4
22	.type	 coefs,@object
23	.size	 coefs,24
24coefs:
25	.value -134
26	.value -374
27	.value 0
28	.value 2054
29	.value 5741
30	.value 8192
31	.value 5741
32	.value 2054
33	.value 0
34	.value -374
35	.value -134
36	.value 0
37.text
38	.align 4
39/* void Weighting_filter (const short *e, short *x) */
40.globl Weighting_filter
41	.type	 Weighting_filter,@function
42Weighting_filter:
43	pushl %ebp
44	movl %esp,%ebp
45	pushl %edi
46	pushl %esi
47	pushl %ebx
48	movl 12(%ebp),%edi
49	movl 8(%ebp),%ebx
50	addl $-10,%ebx
51	emms
52	movl $0x1000,%eax; movd %eax,%mm5  /* for rounding */
53	movq coefs,%mm1
54	movq coefs+8,%mm2
55	movq coefs+16,%mm3
56	xorl %esi,%esi
57	.p2align 2
58.L21:
59	movq (%ebx,%esi,2),%mm0
60	pmaddwd %mm1,%mm0
61
62	movq 8(%ebx,%esi,2),%mm4
63	pmaddwd %mm2,%mm4
64	paddd %mm4,%mm0
65
66	movq 16(%ebx,%esi,2),%mm4
67	pmaddwd %mm3,%mm4
68	paddd %mm4,%mm0
69
70	movq %mm0,%mm4
71	punpckhdq %mm0,%mm4  /* mm4 has high int32 of mm0 dup'd */
72	paddd %mm4,%mm0;
73
74	paddd %mm5,%mm0 /* add for roundoff */
75	psrad $13,%mm0
76	packssdw %mm0,%mm0
77	movd %mm0,%eax  /* ax has result */
78	movw %ax,(%edi,%esi,2)
79	incl %esi
80	cmpl $39,%esi
81	jle .L21
82	emms
83	popl %ebx
84	popl %esi
85	popl %edi
86	leave
87	ret
88.Lfe1:
89	.size	 Weighting_filter,.Lfe1-Weighting_filter
90
91.macro ccstep n
92.if \n
93	movq \n(%edi),%mm1
94	movq \n(%esi),%mm2
95.else
96	movq (%edi),%mm1
97	movq (%esi),%mm2
98.endif
99	pmaddwd %mm2,%mm1
100	paddd %mm1,%mm0
101.endm
102
103	.align 4
104/* long k6maxcc(const short *wt, const short *dp, short *Nc_out) */
105.globl k6maxcc
106	.type	 k6maxcc,@function
107k6maxcc:
108	pushl %ebp
109	movl %esp,%ebp
110	pushl %edi
111	pushl %esi
112	pushl %ebx
113	emms
114	movl 8(%ebp),%edi
115	movl 12(%ebp),%esi
116	movl $0,%edx  /* will be maximum inner-product */
117	movl $40,%ebx
118	movl %ebx,%ecx /* will be index of max inner-product */
119	subl $80,%esi
120	.p2align 2
121.L41:
122	movq (%edi),%mm0
123	movq (%esi),%mm2
124	pmaddwd %mm2,%mm0
125	ccstep 8
126	ccstep 16
127	ccstep 24
128	ccstep 32
129	ccstep 40
130	ccstep 48
131	ccstep 56
132	ccstep 64
133	ccstep 72
134
135	movq %mm0,%mm1
136	punpckhdq %mm0,%mm1  /* mm1 has high int32 of mm0 dup'd */
137	paddd %mm1,%mm0;
138	movd %mm0,%eax  /* eax has result */
139
140	cmpl %edx,%eax
141	jle .L40
142	movl %eax,%edx
143	movl %ebx,%ecx
144	.p2align 2
145.L40:
146	subl $2,%esi
147	incl %ebx
148	cmpl $120,%ebx
149	jle .L41
150	movl 16(%ebp),%eax
151	movw %cx,(%eax)
152	movl %edx,%eax
153	emms
154	popl %ebx
155	popl %esi
156	popl %edi
157	leave
158	ret
159.Lfe2:
160	.size	 k6maxcc,.Lfe2-k6maxcc
161
162
163	.align 4
164/* long k6iprod (const short *p, const short *q, int n) */
165.globl k6iprod
166	.type	 k6iprod,@function
167k6iprod:
168	pushl %ebp
169	movl %esp,%ebp
170	pushl %edi
171	pushl %esi
172	emms
173	pxor %mm0,%mm0
174	movl 8(%ebp),%esi
175	movl 12(%ebp),%edi
176	movl 16(%ebp),%eax
177	leal -32(%esi,%eax,2),%edx /* edx = top - 32 */
178
179	cmpl %edx,%esi; ja .L202
180
181	.p2align 2
182.L201:
183	ccstep 0
184	ccstep 8
185	ccstep 16
186	ccstep 24
187
188	addl $32,%esi
189	addl $32,%edi
190	cmpl %edx,%esi; jbe .L201
191
192	.p2align 2
193.L202:
194	addl $24,%edx  /* now edx = top-8 */
195	cmpl %edx,%esi; ja .L205
196
197	.p2align 2
198.L203:
199	ccstep 0
200
201	addl $8,%esi
202	addl $8,%edi
203	cmpl %edx,%esi; jbe .L203
204
205	.p2align 2
206.L205:
207	addl $4,%edx  /* now edx = top-4 */
208	cmpl %edx,%esi; ja .L207
209
210	movd (%edi),%mm1
211	movd (%esi),%mm2
212	pmaddwd %mm2,%mm1
213	paddd %mm1,%mm0
214
215	addl $4,%esi
216	addl $4,%edi
217
218	.p2align 2
219.L207:
220	addl $2,%edx  /* now edx = top-2 */
221	cmpl %edx,%esi; ja .L209
222
223	movswl (%edi),%eax
224	movd %eax,%mm1
225	movswl (%esi),%eax
226	movd %eax,%mm2
227	pmaddwd %mm2,%mm1
228	paddd %mm1,%mm0
229
230	.p2align 2
231.L209:
232	movq %mm0,%mm1
233	punpckhdq %mm0,%mm1  /* mm1 has high int32 of mm0 dup'd */
234	paddd %mm1,%mm0;
235	movd %mm0,%eax  /* eax has result */
236
237	emms
238	popl %esi
239	popl %edi
240	leave
241	ret
242.Lfe3:
243	.size	 k6iprod,.Lfe3-k6iprod
244
245
246	.align 4
247/* void k6vsraw P3((short *p, int n, int bits) */
248.globl k6vsraw
249	.type	 k6vsraw,@function
250k6vsraw:
251	pushl %ebp
252	movl %esp,%ebp
253	pushl %esi
254	movl 8(%ebp),%esi
255	movl 16(%ebp),%ecx
256	andl %ecx,%ecx; jle .L399
257	movl 12(%ebp),%eax
258	leal -16(%esi,%eax,2),%edx /* edx = top - 16 */
259	emms
260	movd %ecx,%mm3
261	movq ones,%mm2
262	psllw %mm3,%mm2; psrlw $1,%mm2
263	cmpl %edx,%esi; ja .L306
264
265	.p2align 2
266.L302: /* 8 words per iteration */
267	movq (%esi),%mm0
268	movq 8(%esi),%mm1
269	paddsw %mm2,%mm0
270	psraw %mm3,%mm0;
271	paddsw %mm2,%mm1
272	psraw %mm3,%mm1;
273	movq %mm0,(%esi)
274	movq %mm1,8(%esi)
275	addl $16,%esi
276	cmpl %edx,%esi
277	jbe .L302
278
279	.p2align 2
280.L306:
281	addl $12,%edx /* now edx = top-4 */
282	cmpl %edx,%esi; ja .L310
283
284	.p2align 2
285.L308: /* do up to 6 words, two at a time */
286	movd  (%esi),%mm0
287	paddsw %mm2,%mm0
288	psraw %mm3,%mm0;
289	movd %mm0,(%esi)
290	addl $4,%esi
291	cmpl %edx,%esi
292	jbe .L308
293
294	.p2align 2
295.L310:
296	addl $2,%edx /* now edx = top-2 */
297	cmpl %edx,%esi; ja .L315
298
299	movzwl (%esi),%eax
300	movd %eax,%mm0
301	paddsw %mm2,%mm0
302	psraw %mm3,%mm0;
303	movd %mm0,%eax
304	movw %ax,(%esi)
305
306	.p2align 2
307.L315:
308	emms
309.L399:
310	popl %esi
311	leave
312	ret
313.Lfe4:
314	.size	 k6vsraw,.Lfe4-k6vsraw
315
316	.align 4
317/* void k6vsllw P3((short *p, int n, int bits) */
318.globl k6vsllw
319	.type	 k6vsllw,@function
320k6vsllw:
321	pushl %ebp
322	movl %esp,%ebp
323	pushl %esi
324	movl 8(%ebp),%esi
325	movl 16(%ebp),%ecx
326	andl %ecx,%ecx; jle .L499
327	movl 12(%ebp),%eax
328	leal -16(%esi,%eax,2),%edx /* edx = top - 16 */
329	emms
330	movd %ecx,%mm3
331	cmpl %edx,%esi; ja .L406
332
333	.p2align 2
334.L402: /* 8 words per iteration */
335	movq (%esi),%mm0
336	movq 8(%esi),%mm1
337	psllw %mm3,%mm0;
338	psllw %mm3,%mm1;
339	movq %mm0,(%esi)
340	movq %mm1,8(%esi)
341	addl $16,%esi
342	cmpl %edx,%esi
343	jbe .L402
344
345	.p2align 2
346.L406:
347	addl $12,%edx /* now edx = top-4 */
348	cmpl %edx,%esi; ja .L410
349
350	.p2align 2
351.L408: /* do up to 6 words, two at a time */
352	movd (%esi),%mm0
353	psllw %mm3,%mm0;
354	movd %mm0,(%esi)
355	addl $4,%esi
356	cmpl %edx,%esi
357	jbe .L408
358
359	.p2align 2
360.L410:
361	addl $2,%edx /* now edx = top-2 */
362	cmpl %edx,%esi; ja .L415
363
364	movzwl (%esi),%eax
365	movd %eax,%mm0
366	psllw %mm3,%mm0;
367	movd %mm0,%eax
368	movw %ax,(%esi)
369
370	.p2align 2
371.L415:
372	emms
373.L499:
374	popl %esi
375	leave
376	ret
377.Lfe5:
378	.size	 k6vsllw,.Lfe5-k6vsllw
379
380
381.section	.rodata
382	.align 4
383	.type	 extremes,@object
384	.size	 extremes,8
385extremes:
386	.long 0x80008000
387	.long 0x7fff7fff
388	.type	 ones,@object
389	.size	 ones,8
390ones:
391	.long 0x00010001
392	.long 0x00010001
393
394.text
395	.align 4
396/* long k6maxmin (const short *p, int n, short *out) */
397.globl k6maxmin
398	.type	 k6maxmin,@function
399k6maxmin:
400	pushl %ebp
401	movl %esp,%ebp
402	pushl %esi
403	emms
404	movl 8(%ebp),%esi
405	movl 12(%ebp),%eax
406	leal -8(%esi,%eax,2),%edx
407
408	cmpl %edx,%esi
409	jbe .L52
410	movd extremes,%mm0
411	movd extremes+4,%mm1
412	jmp .L58
413
414	.p2align 2
415.L52:
416	movq (%esi),%mm0   /* mm0 will be max's */
417	movq %mm0,%mm1     /* mm1 will be min's */
418	addl $8,%esi
419	cmpl %edx,%esi
420	ja .L56
421
422	.p2align 2
423.L54:
424	movq (%esi),%mm2
425
426	movq %mm2,%mm3
427	pcmpgtw %mm0,%mm3  /* mm3 is bitmask for words where mm2 > mm0 */
428	movq %mm3,%mm4
429	pand %mm2,%mm3     /* mm3 is mm2 masked to new max's */
430	pandn %mm0,%mm4    /* mm4 is mm0 masked to its max's */
431	por %mm3,%mm4
432	movq %mm4,%mm0     /* now mm0 is updated max's */
433
434	movq %mm1,%mm3
435	pcmpgtw %mm2,%mm3  /* mm3 is bitmask for words where mm2 < mm1 */
436	pand %mm3,%mm2     /* mm2 is mm2 masked to new min's */
437	pandn %mm1,%mm3    /* mm3 is mm1 masked to its min's */
438	por %mm3,%mm2
439	movq %mm2,%mm1     /* now mm1 is updated min's */
440
441	addl $8,%esi
442	cmpl %edx,%esi
443	jbe .L54
444
445	.p2align 2
446.L56: /* merge down the 4-word max/mins to lower 2 words */
447
448	movq %mm0,%mm2
449	psrlq $32,%mm2
450	movq %mm2,%mm3
451	pcmpgtw %mm0,%mm3  /* mm3 is bitmask for words where mm2 > mm0 */
452	pand %mm3,%mm2     /* mm2 is mm2 masked to new max's */
453	pandn %mm0,%mm3    /* mm3 is mm0 masked to its max's */
454	por %mm3,%mm2
455	movq %mm2,%mm0     /* now mm0 is updated max's */
456
457	movq %mm1,%mm2
458	psrlq $32,%mm2
459	movq %mm1,%mm3
460	pcmpgtw %mm2,%mm3  /* mm3 is bitmask for words where mm2 < mm1 */
461	pand %mm3,%mm2     /* mm2 is mm2 masked to new min's */
462	pandn %mm1,%mm3    /* mm3 is mm1 masked to its min's */
463	por %mm3,%mm2
464	movq %mm2,%mm1     /* now mm1 is updated min's */
465
466	.p2align 2
467.L58:
468	addl $4,%edx       /* now dx = top-4 */
469	cmpl %edx,%esi
470	ja .L62
471	/* here, there are >= 2 words of input remaining */
472	movd (%esi),%mm2
473
474	movq %mm2,%mm3
475	pcmpgtw %mm0,%mm3  /* mm3 is bitmask for words where mm2 > mm0 */
476	movq %mm3,%mm4
477	pand %mm2,%mm3     /* mm3 is mm2 masked to new max's */
478	pandn %mm0,%mm4    /* mm4 is mm0 masked to its max's */
479	por %mm3,%mm4
480	movq %mm4,%mm0     /* now mm0 is updated max's */
481
482	movq %mm1,%mm3
483	pcmpgtw %mm2,%mm3  /* mm3 is bitmask for words where mm2 < mm1 */
484	pand %mm3,%mm2     /* mm2 is mm2 masked to new min's */
485	pandn %mm1,%mm3    /* mm3 is mm1 masked to its min's */
486	por %mm3,%mm2
487	movq %mm2,%mm1     /* now mm1 is updated min's */
488
489	addl $4,%esi
490
491	.p2align 2
492.L62:
493	/* merge down the 2-word max/mins to 1 word */
494
495	movq %mm0,%mm2
496	psrlq $16,%mm2
497	movq %mm2,%mm3
498	pcmpgtw %mm0,%mm3  /* mm3 is bitmask for words where mm2 > mm0 */
499	pand %mm3,%mm2     /* mm2 is mm2 masked to new max's */
500	pandn %mm0,%mm3    /* mm3 is mm0 masked to its max's */
501	por %mm3,%mm2
502	movd %mm2,%ecx     /* cx is max so far */
503
504	movq %mm1,%mm2
505	psrlq $16,%mm2
506	movq %mm1,%mm3
507	pcmpgtw %mm2,%mm3  /* mm3 is bitmask for words where mm2 < mm1 */
508	pand %mm3,%mm2     /* mm2 is mm2 masked to new min's */
509	pandn %mm1,%mm3    /* mm3 is mm1 masked to its min's */
510	por %mm3,%mm2
511	movd %mm2,%eax     /* ax is min so far */
512
513	addl $2,%edx       /* now dx = top-2 */
514	cmpl %edx,%esi
515	ja .L65
516
517	/* here, there is one word of input left */
518	cmpw (%esi),%cx
519	jge .L64
520	movw (%esi),%cx
521	.p2align 2
522.L64:
523	cmpw (%esi),%ax
524	jle .L65
525	movw (%esi),%ax
526
527	.p2align 2
528.L65:  /* (finally!) cx is the max, ax the min */
529	movswl %cx,%ecx
530	movswl %ax,%eax
531
532	movl 16(%ebp),%edx /* ptr to output max,min vals */
533	andl %edx,%edx; jz .L77
534	movw %cx,(%edx)  /* max */
535	movw %ax,2(%edx) /* min */
536	.p2align 2
537.L77:
538	/* now calculate max absolute val */
539	negl %eax
540	cmpl %ecx,%eax
541	jge .L81
542	movl %ecx,%eax
543	.p2align 2
544.L81:
545	emms
546	popl %esi
547	leave
548	ret
549.Lfe6:
550	.size	 k6maxmin,.Lfe6-k6maxmin
551
552/* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */
553	.equiv pm_u0,8
554	.equiv pm_rp0,12
555	.equiv pm_kn,16
556	.equiv pm_s,20
557	.equiv lv_u_top,-4
558	.equiv lv_s_top,-8
559	.equiv lv_rp,-40 /* local version of rp0 with each word twice */
560	.align 4
561.globl Short_term_analysis_filteringx
562	.type	 Short_term_analysis_filteringx,@function
563Short_term_analysis_filteringx:
564	pushl %ebp
565	movl %esp,%ebp
566	subl $40,%esp
567	pushl %edi
568	pushl %esi
569
570	movl pm_rp0(%ebp),%esi;
571	leal lv_rp(%ebp),%edi;
572	cld
573	lodsw; stosw; stosw
574	lodsw; stosw; stosw
575	lodsw; stosw; stosw
576	lodsw; stosw; stosw
577	lodsw; stosw; stosw
578	lodsw; stosw; stosw
579	lodsw; stosw; stosw
580	lodsw; stosw; stosw
581	emms
582	movl $0x4000,%eax;
583	movd %eax,%mm4;
584	punpckldq %mm4,%mm4 /* (0x00004000,0x00004000) for rounding dword product pairs */
585
586	movl pm_u0(%ebp),%eax
587	addl $16,%eax
588	movl %eax,lv_u_top(%ebp) /* UTOP */
589	movl pm_s(%ebp),%edx  /* edx is local s ptr throughout below */
590	movl pm_kn(%ebp),%eax
591	leal (%edx,%eax,2),%eax
592	movl %eax,lv_s_top(%ebp)
593	cmpl %eax,%edx
594	jae .L179
595	.p2align 2
596.L181:
597	leal lv_rp(%ebp),%esi  /* RP */
598	movl pm_u0(%ebp),%edi  /* U  */
599	movw (%edx),%ax /* (0,DI) */
600	roll $16,%eax
601	movw (%edx),%ax /* (DI,DI) */
602	.p2align 2
603.L185: /* RP is %esi */
604	movl %eax,%ecx
605	movw (%edi),%ax  /* (DI,U) */
606	movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */
607	movw %cx,(%edi)
608
609	movd %eax,%mm2   /* mm2 is (0,0,DI,U) */
610	rorl $16,%eax
611	movd %eax,%mm1   /* mm1 is (0,0,U,DI) */
612
613	movq %mm1,%mm0
614	pmullw %mm3,%mm0
615	pmulhw %mm3,%mm1
616	punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */
617	paddd %mm4,%mm0     /* mm4 is 0x00004000,0x00004000 */
618	psrad $15,%mm0      /* (RP*U,RP*DI) adjusted */
619	packssdw %mm0,%mm0  /* (*,*,RP*U,RP*DI) adjusted and saturated to word */
620	paddsw %mm2,%mm0    /* mm0 is (?,?, DI', U') */
621	movd %mm0,%eax      /* (DI,U') */
622
623	addl $2,%edi
624	addl $4,%esi
625	cmpl lv_u_top(%ebp),%edi
626	jb .L185
627
628	rorl $16,%eax
629	movw %ax,(%edx) /* last DI goes to *s */
630	addl $2,%edx    /* next s */
631	cmpl lv_s_top(%ebp),%edx
632	jb .L181
633	.p2align 2
634.L179:
635	emms
636	popl %esi
637	popl %edi
638	leave
639	ret
640.Lfe7:
641	.size	 Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx
642
643.end
644
645/* 'as' macro's seem to be case-insensitive */
646.macro STEP n
647.if \n
648	movd \n(%esi),%mm3 /* mm3 is (0,0,RP,RP) */
649.else
650	movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */
651.endif
652	movq %mm5,%mm1;
653	movd %mm4,%ecx; movw %cx,%ax  /* (DI,U) */
654	psllq $48,%mm1; psrlq $16,%mm4; por %mm1,%mm4
655	psllq $48,%mm0; psrlq $16,%mm5; por %mm0,%mm5
656
657	movd %eax,%mm2   /* mm2 is (0,0,DI,U) */
658	rorl $16,%eax
659	movd %eax,%mm1   /* mm1 is (0,0,U,DI) */
660
661	movq %mm1,%mm0
662	pmullw %mm3,%mm0
663	pmulhw %mm3,%mm1
664	punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */
665	paddd %mm6,%mm0     /* mm6 is 0x00004000,0x00004000 */
666	psrad $15,%mm0      /* (RP*U,RP*DI) adjusted */
667	packssdw %mm0,%mm0  /* (*,*,RP*U,RP*DI) adjusted and saturated to word */
668	paddsw %mm2,%mm0    /* mm0 is (?,?, DI', U') */
669	movd %mm0,%eax      /* (DI,U') */
670.endm
671
672/* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */
673	.equiv pm_u0,8
674	.equiv pm_rp0,12
675	.equiv pm_kn,16
676	.equiv pm_s,20
677	.equiv lv_rp_top,-4
678	.equiv lv_s_top,-8
679	.equiv lv_rp,-40 /* local version of rp0 with each word twice */
680	.align 4
681.globl Short_term_analysis_filteringx
682	.type	 Short_term_analysis_filteringx,@function
683Short_term_analysis_filteringx:
684	pushl %ebp
685	movl %esp,%ebp
686	subl $56,%esp
687	pushl %edi
688	pushl %esi
689	pushl %ebx
690
691	movl pm_rp0(%ebp),%esi;
692	leal lv_rp(%ebp),%edi;
693	cld
694	lodsw; stosw; stosw
695	lodsw; stosw; stosw
696	lodsw; stosw; stosw
697	lodsw; stosw; stosw
698	lodsw; stosw; stosw
699	lodsw; stosw; stosw
700	lodsw; stosw; stosw
701	lodsw; stosw; stosw
702	movl %edi,lv_rp_top(%ebp)
703	emms
704
705	movl $0x4000,%eax;
706	movd %eax,%mm6;
707	punpckldq %mm6,%mm6 /* (0x00004000,0x00004000) for rounding dword product pairs */
708
709	movl pm_u0(%ebp),%ebx
710	movq (%ebx),%mm4; movq 8(%ebx),%mm5 /* the 8 u's */
711	movl pm_s(%ebp),%edx  /* edx is local s ptr throughout below */
712	movl pm_kn(%ebp),%eax
713	leal (%edx,%eax,2),%eax
714	movl %eax,lv_s_top(%ebp)
715	cmpl %eax,%edx
716	jae .L179
717	.p2align 2
718.L181:
719	leal lv_rp(%ebp),%esi  /* RP */
720	movw (%edx),%ax /* (0,DI) */
721	roll $16,%eax
722	movw (%edx),%ax /* (DI,DI) */
723	movd %eax,%mm0
724	.p2align 2
725.L185: /* RP is %esi */
726	step 0
727	step 4
728	step 8
729	step 12
730/*
731	step 16
732	step 20
733	step 24
734	step 28
735*/
736	addl $16,%esi
737	cmpl lv_rp_top(%ebp),%esi
738	jb .L185
739
740	rorl $16,%eax
741	movw %ax,(%edx) /* last DI goes to *s */
742	addl $2,%edx    /* next s */
743	cmpl lv_s_top(%ebp),%edx
744	jb .L181
745.L179:
746	movq %mm4,(%ebx); movq %mm5,8(%ebx) /* the 8 u's */
747	emms
748	popl %ebx
749	popl %esi
750	popl %edi
751	leave
752	ret
753.Lfe7:
754	.size	 Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx
755	.ident	"GCC: (GNU) 2.95.2 19991109 (Debian GNU/Linux)"
756