1 /*
2  * Copyright (c) 2012
3  *      MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  *    contributors may be used to endorse or promote products derived from
15  *    this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Authors:  Djordje Pesut   (djordje@mips.com)
30  *           Mirjana Vulin   (mvulin@mips.com)
31  *
32  * This file is part of FFmpeg.
33  *
34  * FFmpeg is free software; you can redistribute it and/or
35  * modify it under the terms of the GNU Lesser General Public
36  * License as published by the Free Software Foundation; either
37  * version 2.1 of the License, or (at your option) any later version.
38  *
39  * FFmpeg is distributed in the hope that it will be useful,
40  * but WITHOUT ANY WARRANTY; without even the implied warranty of
41  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
42  * Lesser General Public License for more details.
43  *
44  * You should have received a copy of the GNU Lesser General Public
45  * License along with FFmpeg; if not, write to the Free Software
46  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
47  */
48 
49 /**
50  * @file
51  * Reference: libavcodec/aacsbr.c
52  */
53 
54 #ifndef AVCODEC_MIPS_AACSBR_MIPS_H
55 #define AVCODEC_MIPS_AACSBR_MIPS_H
56 
57 #include "libavcodec/aac.h"
58 #include "libavcodec/sbr.h"
59 #include "libavutil/mips/asmdefs.h"
60 
61 #if HAVE_INLINE_ASM
sbr_qmf_analysis_mips(AVFloatDSPContext * fdsp,FFTContext * mdct,SBRDSPContext * sbrdsp,const float * in,float * x,float z[320],float W[2][32][32][2],int buf_idx)62 static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, FFTContext *mdct,
63                              SBRDSPContext *sbrdsp, const float *in, float *x,
64                              float z[320], float W[2][32][32][2], int buf_idx)
65 {
66     int i;
67     float *w0;
68     float *w1;
69     int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
70 
71     w0 = x;
72     w1 = x + 1024;
73     for(i = 0; i < 36; i++)
74     {
75         /* loop unrolled 8 times */
76         __asm__ volatile(
77             "lw      %[temp0],   0(%[w1])         \n\t"
78             "lw      %[temp1],   4(%[w1])         \n\t"
79             "lw      %[temp2],   8(%[w1])         \n\t"
80             "lw      %[temp3],   12(%[w1])        \n\t"
81             "lw      %[temp4],   16(%[w1])        \n\t"
82             "lw      %[temp5],   20(%[w1])        \n\t"
83             "lw      %[temp6],   24(%[w1])        \n\t"
84             "lw      %[temp7],   28(%[w1])        \n\t"
85             "sw      %[temp0],   0(%[w0])         \n\t"
86             "sw      %[temp1],   4(%[w0])         \n\t"
87             "sw      %[temp2],   8(%[w0])         \n\t"
88             "sw      %[temp3],   12(%[w0])        \n\t"
89             "sw      %[temp4],   16(%[w0])        \n\t"
90             "sw      %[temp5],   20(%[w0])        \n\t"
91             "sw      %[temp6],   24(%[w0])        \n\t"
92             "sw      %[temp7],   28(%[w0])        \n\t"
93             PTR_ADDIU " %[w0],      %[w0],     32 \n\t"
94             PTR_ADDIU " %[w1],      %[w1],     32 \n\t"
95 
96             : [w0]"+r"(w0), [w1]"+r"(w1),
97               [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
98               [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
99               [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
100               [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
101             :
102             : "memory"
103         );
104     }
105 
106     w0 = x + 288;
107     w1 = (float*)in;
108     for(i = 0; i < 128; i++)
109     {
110         /* loop unrolled 8 times */
111         __asm__ volatile(
112             "lw       %[temp0],    0(%[w1])        \n\t"
113             "lw       %[temp1],    4(%[w1])        \n\t"
114             "lw       %[temp2],    8(%[w1])        \n\t"
115             "lw       %[temp3],    12(%[w1])       \n\t"
116             "lw       %[temp4],    16(%[w1])       \n\t"
117             "lw       %[temp5],    20(%[w1])       \n\t"
118             "lw       %[temp6],    24(%[w1])       \n\t"
119             "lw       %[temp7],    28(%[w1])       \n\t"
120             "sw       %[temp0],    0(%[w0])        \n\t"
121             "sw       %[temp1],    4(%[w0])        \n\t"
122             "sw       %[temp2],    8(%[w0])        \n\t"
123             "sw       %[temp3],    12(%[w0])       \n\t"
124             "sw       %[temp4],    16(%[w0])       \n\t"
125             "sw       %[temp5],    20(%[w0])       \n\t"
126             "sw       %[temp6],    24(%[w0])       \n\t"
127             "sw       %[temp7],    28(%[w0])       \n\t"
128             PTR_ADDIU "  %[w0],       %[w0],    32 \n\t"
129             PTR_ADDIU "  %[w1],       %[w1],    32 \n\t"
130 
131             : [w0]"+r"(w0), [w1]"+r"(w1),
132               [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
133               [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
134               [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
135               [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
136             :
137             : "memory"
138         );
139     }
140 
141     for (i = 0; i < 32; i++) { // numTimeSlots*RATE = 16*2 as 960 sample frames
142                                // are not supported
143         fdsp->vector_fmul_reverse(z, sbr_qmf_window_ds, x, 320);
144         sbrdsp->sum64x5(z);
145         sbrdsp->qmf_pre_shuffle(z);
146         mdct->imdct_half(mdct, z, z+64);
147         sbrdsp->qmf_post_shuffle(W[buf_idx][i], z);
148         x += 32;
149     }
150 }
151 
152 #if HAVE_MIPSFPU
153 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
sbr_qmf_synthesis_mips(FFTContext * mdct,SBRDSPContext * sbrdsp,AVFloatDSPContext * fdsp,float * out,float X[2][38][64],float mdct_buf[2][64],float * v0,int * v_off,const unsigned int div)154 static void sbr_qmf_synthesis_mips(FFTContext *mdct,
155                               SBRDSPContext *sbrdsp, AVFloatDSPContext *fdsp,
156                               float *out, float X[2][38][64],
157                               float mdct_buf[2][64],
158                               float *v0, int *v_off, const unsigned int div)
159 {
160     int i, n;
161     const float *sbr_qmf_window = div ? sbr_qmf_window_ds : sbr_qmf_window_us;
162     const int step = 128 >> div;
163     float *v;
164     float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13;
165     float temp14, temp15, temp16, temp17, temp18, temp19;
166     float *vv0, *s0, *dst;
167     dst = out;
168 
169     for (i = 0; i < 32; i++) {
170         if (*v_off < step) {
171             int saved_samples = (1280 - 128) >> div;
172             memcpy(&v0[SBR_SYNTHESIS_BUF_SIZE - saved_samples], v0, saved_samples * sizeof(float));
173             *v_off = SBR_SYNTHESIS_BUF_SIZE - saved_samples - step;
174         } else {
175             *v_off -= step;
176         }
177         v = v0 + *v_off;
178         if (div) {
179             for (n = 0; n < 32; n++) {
180                 X[0][i][   n] = -X[0][i][n];
181                 X[0][i][32+n] =  X[1][i][31-n];
182             }
183             mdct->imdct_half(mdct, mdct_buf[0], X[0][i]);
184             sbrdsp->qmf_deint_neg(v, mdct_buf[0]);
185         } else {
186             sbrdsp->neg_odd_64(X[1][i]);
187             mdct->imdct_half(mdct, mdct_buf[0], X[0][i]);
188             mdct->imdct_half(mdct, mdct_buf[1], X[1][i]);
189             sbrdsp->qmf_deint_bfly(v, mdct_buf[1], mdct_buf[0]);
190         }
191 
192         if(div == 0)
193         {
194             float *v0_end;
195             vv0 = v;
196             v0_end = v + 60;
197             s0 = (float*)sbr_qmf_window;
198 
199             /* 10 calls of function vector_fmul_add merged into one loop
200                and loop unrolled 4 times */
201             __asm__ volatile(
202                 ".set    push                                           \n\t"
203                 ".set    noreorder                                      \n\t"
204                 "lwc1    %[temp4],   0(%[v0])                           \n\t"
205                 "lwc1    %[temp5],   0(%[s0])                           \n\t"
206                 "lwc1    %[temp6],   4(%[v0])                           \n\t"
207                 "lwc1    %[temp7],   4(%[s0])                           \n\t"
208                 "lwc1    %[temp8],   8(%[v0])                           \n\t"
209                 "lwc1    %[temp9],   8(%[s0])                           \n\t"
210                 "lwc1    %[temp10],  12(%[v0])                          \n\t"
211                 "lwc1    %[temp11],  12(%[s0])                          \n\t"
212                 "lwc1    %[temp12],  768(%[v0])                         \n\t"
213                 "lwc1    %[temp13],  256(%[s0])                         \n\t"
214                 "lwc1    %[temp14],  772(%[v0])                         \n\t"
215                 "lwc1    %[temp15],  260(%[s0])                         \n\t"
216                 "lwc1    %[temp16],  776(%[v0])                         \n\t"
217                 "lwc1    %[temp17],  264(%[s0])                         \n\t"
218                 "lwc1    %[temp18],  780(%[v0])                         \n\t"
219                 "lwc1    %[temp19],  268(%[s0])                         \n\t"
220             "1:                                                         \n\t"
221                 "mul.s   %[temp0],   %[temp4],   %[temp5]               \n\t"
222                 "lwc1    %[temp4],   1024(%[v0])                        \n\t"
223                 "mul.s   %[temp1],   %[temp6],   %[temp7]               \n\t"
224                 "lwc1    %[temp5],   512(%[s0])                         \n\t"
225                 "mul.s   %[temp2],   %[temp8],   %[temp9]               \n\t"
226                 "lwc1    %[temp6],   1028(%[v0])                        \n\t"
227                 "mul.s   %[temp3],   %[temp10],  %[temp11]              \n\t"
228                 "lwc1    %[temp7],   516(%[s0])                         \n\t"
229                 "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
230                 "lwc1    %[temp8],   1032(%[v0])                        \n\t"
231                 "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
232                 "lwc1    %[temp9],   520(%[s0])                         \n\t"
233                 "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
234                 "lwc1    %[temp10],  1036(%[v0])                        \n\t"
235                 "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
236                 "lwc1    %[temp11],  524(%[s0])                         \n\t"
237                 "lwc1    %[temp12],  1792(%[v0])                        \n\t"
238                 "lwc1    %[temp13],  768(%[s0])                         \n\t"
239                 "lwc1    %[temp14],  1796(%[v0])                        \n\t"
240                 "lwc1    %[temp15],  772(%[s0])                         \n\t"
241                 "lwc1    %[temp16],  1800(%[v0])                        \n\t"
242                 "lwc1    %[temp17],  776(%[s0])                         \n\t"
243                 "lwc1    %[temp18],  1804(%[v0])                        \n\t"
244                 "lwc1    %[temp19],  780(%[s0])                         \n\t"
245                 "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
246                 "lwc1    %[temp4],   2048(%[v0])                        \n\t"
247                 "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
248                 "lwc1    %[temp5],   1024(%[s0])                        \n\t"
249                 "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
250                 "lwc1    %[temp6],   2052(%[v0])                        \n\t"
251                 "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
252                 "lwc1    %[temp7],   1028(%[s0])                        \n\t"
253                 "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
254                 "lwc1    %[temp8],   2056(%[v0])                        \n\t"
255                 "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
256                 "lwc1    %[temp9],   1032(%[s0])                        \n\t"
257                 "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
258                 "lwc1    %[temp10],  2060(%[v0])                        \n\t"
259                 "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
260                 "lwc1    %[temp11],  1036(%[s0])                        \n\t"
261                 "lwc1    %[temp12],  2816(%[v0])                        \n\t"
262                 "lwc1    %[temp13],  1280(%[s0])                        \n\t"
263                 "lwc1    %[temp14],  2820(%[v0])                        \n\t"
264                 "lwc1    %[temp15],  1284(%[s0])                        \n\t"
265                 "lwc1    %[temp16],  2824(%[v0])                        \n\t"
266                 "lwc1    %[temp17],  1288(%[s0])                        \n\t"
267                 "lwc1    %[temp18],  2828(%[v0])                        \n\t"
268                 "lwc1    %[temp19],  1292(%[s0])                        \n\t"
269                 "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
270                 "lwc1    %[temp4],   3072(%[v0])                        \n\t"
271                 "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
272                 "lwc1    %[temp5],   1536(%[s0])                        \n\t"
273                 "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
274                 "lwc1    %[temp6],   3076(%[v0])                        \n\t"
275                 "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
276                 "lwc1    %[temp7],   1540(%[s0])                        \n\t"
277                 "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
278                 "lwc1    %[temp8],   3080(%[v0])                        \n\t"
279                 "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
280                 "lwc1    %[temp9],   1544(%[s0])                        \n\t"
281                 "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
282                 "lwc1    %[temp10],  3084(%[v0])                        \n\t"
283                 "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
284                 "lwc1    %[temp11],  1548(%[s0])                        \n\t"
285                 "lwc1    %[temp12],  3840(%[v0])                        \n\t"
286                 "lwc1    %[temp13],  1792(%[s0])                        \n\t"
287                 "lwc1    %[temp14],  3844(%[v0])                        \n\t"
288                 "lwc1    %[temp15],  1796(%[s0])                        \n\t"
289                 "lwc1    %[temp16],  3848(%[v0])                        \n\t"
290                 "lwc1    %[temp17],  1800(%[s0])                        \n\t"
291                 "lwc1    %[temp18],  3852(%[v0])                        \n\t"
292                 "lwc1    %[temp19],  1804(%[s0])                        \n\t"
293                 "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
294                 "lwc1    %[temp4],   4096(%[v0])                        \n\t"
295                 "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
296                 "lwc1    %[temp5],   2048(%[s0])                        \n\t"
297                 "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
298                 "lwc1    %[temp6],   4100(%[v0])                        \n\t"
299                 "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
300                 "lwc1    %[temp7],   2052(%[s0])                        \n\t"
301                 "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
302                 "lwc1    %[temp8],   4104(%[v0])                        \n\t"
303                 PTR_ADDIU "%[dst],     %[dst],      16                  \n\t"
304                 "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
305                 "lwc1    %[temp9],   2056(%[s0])                        \n\t"
306                 PTR_ADDIU " %[s0],      %[s0],      16                  \n\t"
307                 "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
308                 "lwc1    %[temp10],  4108(%[v0])                        \n\t"
309                 PTR_ADDIU " %[v0],      %[v0],      16                  \n\t"
310                 "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
311                 "lwc1    %[temp11],  2044(%[s0])                        \n\t"
312                 "lwc1    %[temp12],  4848(%[v0])                        \n\t"
313                 "lwc1    %[temp13],  2288(%[s0])                        \n\t"
314                 "lwc1    %[temp14],  4852(%[v0])                        \n\t"
315                 "lwc1    %[temp15],  2292(%[s0])                        \n\t"
316                 "lwc1    %[temp16],  4856(%[v0])                        \n\t"
317                 "lwc1    %[temp17],  2296(%[s0])                        \n\t"
318                 "lwc1    %[temp18],  4860(%[v0])                        \n\t"
319                 "lwc1    %[temp19],  2300(%[s0])                        \n\t"
320                 "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
321                 "lwc1    %[temp4],   0(%[v0])                           \n\t"
322                 "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
323                 "lwc1    %[temp5],   0(%[s0])                           \n\t"
324                 "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
325                 "lwc1    %[temp6],   4(%[v0])                           \n\t"
326                 "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
327                 "lwc1    %[temp7],   4(%[s0])                           \n\t"
328                 "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
329                 "lwc1    %[temp8],   8(%[v0])                           \n\t"
330                 "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
331                 "lwc1    %[temp9],   8(%[s0])                           \n\t"
332                 "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
333                 "lwc1    %[temp10],  12(%[v0])                          \n\t"
334                 "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
335                 "lwc1    %[temp11],  12(%[s0])                          \n\t"
336                 "lwc1    %[temp12],  768(%[v0])                         \n\t"
337                 "lwc1    %[temp13],  256(%[s0])                         \n\t"
338                 "lwc1    %[temp14],  772(%[v0])                         \n\t"
339                 "lwc1    %[temp15],  260(%[s0])                         \n\t"
340                 "lwc1    %[temp16],  776(%[v0])                         \n\t"
341                 "lwc1    %[temp17],  264(%[s0])                         \n\t"
342                 "lwc1    %[temp18],  780(%[v0])                         \n\t"
343                 "lwc1    %[temp19],  268(%[s0])                         \n\t"
344                 "swc1    %[temp0],   -16(%[dst])                        \n\t"
345                 "swc1    %[temp1],   -12(%[dst])                        \n\t"
346                 "swc1    %[temp2],   -8(%[dst])                         \n\t"
347                 "bne     %[v0],      %[v0_end],  1b                     \n\t"
348                 " swc1   %[temp3],   -4(%[dst])                         \n\t"
349                 "mul.s   %[temp0],   %[temp4],   %[temp5]               \n\t"
350                 "lwc1    %[temp4],   1024(%[v0])                        \n\t"
351                 "mul.s   %[temp1],   %[temp6],   %[temp7]               \n\t"
352                 "lwc1    %[temp5],   512(%[s0])                         \n\t"
353                 "mul.s   %[temp2],   %[temp8],   %[temp9]               \n\t"
354                 "lwc1    %[temp6],   1028(%[v0])                        \n\t"
355                 "mul.s   %[temp3],   %[temp10],  %[temp11]              \n\t"
356                 "lwc1    %[temp7],   516(%[s0])                         \n\t"
357                 "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
358                 "lwc1    %[temp8],   1032(%[v0])                        \n\t"
359                 "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
360                 "lwc1    %[temp9],   520(%[s0])                         \n\t"
361                 "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
362                 "lwc1    %[temp10],  1036(%[v0])                        \n\t"
363                 "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
364                 "lwc1    %[temp11],  524(%[s0])                         \n\t"
365                 "lwc1    %[temp12],  1792(%[v0])                        \n\t"
366                 "lwc1    %[temp13],  768(%[s0])                         \n\t"
367                 "lwc1    %[temp14],  1796(%[v0])                        \n\t"
368                 "lwc1    %[temp15],  772(%[s0])                         \n\t"
369                 "lwc1    %[temp16],  1800(%[v0])                        \n\t"
370                 "lwc1    %[temp17],  776(%[s0])                         \n\t"
371                 "lwc1    %[temp18],  1804(%[v0])                        \n\t"
372                 "lwc1    %[temp19],  780(%[s0])                         \n\t"
373                 "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
374                 "lwc1    %[temp4],   2048(%[v0])                        \n\t"
375                 "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
376                 "lwc1    %[temp5],   1024(%[s0])                        \n\t"
377                 "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
378                 "lwc1    %[temp6],   2052(%[v0])                        \n\t"
379                 "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
380                 "lwc1    %[temp7],   1028(%[s0])                        \n\t"
381                 "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
382                 "lwc1    %[temp8],   2056(%[v0])                        \n\t"
383                 "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
384                 "lwc1    %[temp9],   1032(%[s0])                        \n\t"
385                 "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
386                 "lwc1    %[temp10],  2060(%[v0])                        \n\t"
387                 "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
388                 "lwc1    %[temp11],  1036(%[s0])                        \n\t"
389                 "lwc1    %[temp12],  2816(%[v0])                        \n\t"
390                 "lwc1    %[temp13],  1280(%[s0])                        \n\t"
391                 "lwc1    %[temp14],  2820(%[v0])                        \n\t"
392                 "lwc1    %[temp15],  1284(%[s0])                        \n\t"
393                 "lwc1    %[temp16],  2824(%[v0])                        \n\t"
394                 "lwc1    %[temp17],  1288(%[s0])                        \n\t"
395                 "lwc1    %[temp18],  2828(%[v0])                        \n\t"
396                 "lwc1    %[temp19],  1292(%[s0])                        \n\t"
397                 "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
398                 "lwc1    %[temp4],   3072(%[v0])                        \n\t"
399                 "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
400                 "lwc1    %[temp5],   1536(%[s0])                        \n\t"
401                 "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
402                 "lwc1    %[temp6],   3076(%[v0])                        \n\t"
403                 "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
404                 "lwc1    %[temp7],   1540(%[s0])                        \n\t"
405                 "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
406                 "lwc1    %[temp8],   3080(%[v0])                        \n\t"
407                 "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
408                 "lwc1    %[temp9],   1544(%[s0])                        \n\t"
409                 "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
410                 "lwc1    %[temp10],  3084(%[v0])                        \n\t"
411                 "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
412                 "lwc1    %[temp11],  1548(%[s0])                        \n\t"
413                 "lwc1    %[temp12],  3840(%[v0])                        \n\t"
414                 "lwc1    %[temp13],  1792(%[s0])                        \n\t"
415                 "lwc1    %[temp14],  3844(%[v0])                        \n\t"
416                 "lwc1    %[temp15],  1796(%[s0])                        \n\t"
417                 "lwc1    %[temp16],  3848(%[v0])                        \n\t"
418                 "lwc1    %[temp17],  1800(%[s0])                        \n\t"
419                 "lwc1    %[temp18],  3852(%[v0])                        \n\t"
420                 "lwc1    %[temp19],  1804(%[s0])                        \n\t"
421                 "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
422                 "lwc1    %[temp4],   4096(%[v0])                        \n\t"
423                 "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
424                 "lwc1    %[temp5],   2048(%[s0])                        \n\t"
425                 "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
426                 "lwc1    %[temp6],   4100(%[v0])                        \n\t"
427                 "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
428                 "lwc1    %[temp7],   2052(%[s0])                        \n\t"
429                 "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
430                 "lwc1    %[temp8],   4104(%[v0])                        \n\t"
431                 "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
432                 "lwc1    %[temp9],   2056(%[s0])                        \n\t"
433                 "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
434                 "lwc1    %[temp10],  4108(%[v0])                        \n\t"
435                 "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
436                 "lwc1    %[temp11],  2060(%[s0])                        \n\t"
437                 "lwc1    %[temp12],  4864(%[v0])                        \n\t"
438                 "lwc1    %[temp13],  2304(%[s0])                        \n\t"
439                 "lwc1    %[temp14],  4868(%[v0])                        \n\t"
440                 "lwc1    %[temp15],  2308(%[s0])                        \n\t"
441                 "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
442                 "lwc1    %[temp16],  4872(%[v0])                        \n\t"
443                 "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
444                 "lwc1    %[temp17],  2312(%[s0])                        \n\t"
445                 "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
446                 "lwc1    %[temp18],  4876(%[v0])                        \n\t"
447                 "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
448                 "lwc1    %[temp19],  2316(%[s0])                        \n\t"
449                 "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
450                 PTR_ADDIU "%[dst],     %[dst],     16                   \n\t"
451                 "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
452                 "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
453                 "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
454                 "swc1    %[temp0],   -16(%[dst])                        \n\t"
455                 "swc1    %[temp1],   -12(%[dst])                        \n\t"
456                 "swc1    %[temp2],   -8(%[dst])                         \n\t"
457                 "swc1    %[temp3],   -4(%[dst])                         \n\t"
458                 ".set    pop                                            \n\t"
459 
460                 : [dst]"+r"(dst), [v0]"+r"(vv0), [s0]"+r"(s0),
461                   [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
462                   [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
463                   [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
464                   [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
465                   [temp12]"=&f"(temp12), [temp13]"=&f"(temp13), [temp14]"=&f"(temp14),
466                   [temp15]"=&f"(temp15), [temp16]"=&f"(temp16), [temp17]"=&f"(temp17),
467                   [temp18]"=&f"(temp18), [temp19]"=&f"(temp19)
468                 : [v0_end]"r"(v0_end)
469                 : "memory"
470             );
471         }
472         else
473         {
474             fdsp->vector_fmul   (out, v                , sbr_qmf_window                       , 64 >> div);
475             fdsp->vector_fmul_add(out, v + ( 192 >> div), sbr_qmf_window + ( 64 >> div), out   , 64 >> div);
476             fdsp->vector_fmul_add(out, v + ( 256 >> div), sbr_qmf_window + (128 >> div), out   , 64 >> div);
477             fdsp->vector_fmul_add(out, v + ( 448 >> div), sbr_qmf_window + (192 >> div), out   , 64 >> div);
478             fdsp->vector_fmul_add(out, v + ( 512 >> div), sbr_qmf_window + (256 >> div), out   , 64 >> div);
479             fdsp->vector_fmul_add(out, v + ( 704 >> div), sbr_qmf_window + (320 >> div), out   , 64 >> div);
480             fdsp->vector_fmul_add(out, v + ( 768 >> div), sbr_qmf_window + (384 >> div), out   , 64 >> div);
481             fdsp->vector_fmul_add(out, v + ( 960 >> div), sbr_qmf_window + (448 >> div), out   , 64 >> div);
482             fdsp->vector_fmul_add(out, v + (1024 >> div), sbr_qmf_window + (512 >> div), out   , 64 >> div);
483             fdsp->vector_fmul_add(out, v + (1216 >> div), sbr_qmf_window + (576 >> div), out   , 64 >> div);
484             out += 64 >> div;
485         }
486     }
487 }
488 
489 #define sbr_qmf_analysis sbr_qmf_analysis_mips
490 #define sbr_qmf_synthesis sbr_qmf_synthesis_mips
491 
492 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
493 #endif /* HAVE_MIPSFPU */
494 #endif /* HAVE_INLINE_ASM */
495 
496 #endif /* AVCODEC_MIPS_AACSBR_MIPS_H */
497