1 /* Copyright (C) 2002-2006 Jean-Marc Valin
2    File: filters.c
3    Various analysis/synthesis filters
4 
5    Redistribution and use in source and binary forms, with or without
6    modification, are permitted provided that the following conditions
7    are met:
8 
9    - Redistributions of source code must retain the above copyright
10    notice, this list of conditions and the following disclaimer.
11 
12    - Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15 
16    - Neither the name of the Xiph.org Foundation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19 
20    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
24    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32 
33 #ifdef HAVE_CONFIG_H
34 #include "config.h"
35 #endif
36 
37 #include "filters.h"
38 #include "stack_alloc.h"
39 #include "arch.h"
40 #include "math_approx.h"
41 #include "ltp.h"
42 #include <math.h>
43 
44 #ifdef _USE_SSE
45 #include "filters_sse.h"
46 #elif defined (ARM4_ASM) || defined(ARM5E_ASM)
47 #include "filters_arm4.h"
48 #elif defined (BFIN_ASM)
49 #include "filters_bfin.h"
50 #endif
51 
52 
53 
bw_lpc(spx_word16_t gamma,const spx_coef_t * lpc_in,spx_coef_t * lpc_out,int order)54 void bw_lpc(spx_word16_t gamma, const spx_coef_t *lpc_in, spx_coef_t *lpc_out, int order)
55 {
56    int i;
57    spx_word16_t tmp=gamma;
58    for (i=0;i<order;i++)
59    {
60       lpc_out[i] = MULT16_16_P15(tmp,lpc_in[i]);
61       tmp = MULT16_16_P15(tmp, gamma);
62    }
63 }
64 
sanitize_values32(spx_word32_t * vec,spx_word32_t min_val,spx_word32_t max_val,int len)65 void sanitize_values32(spx_word32_t *vec, spx_word32_t min_val, spx_word32_t max_val, int len)
66 {
67    int i;
68    for (i=0;i<len;i++)
69    {
70       /* It's important we do the test that way so we can catch NaNs, which are neither greater nor smaller */
71       if (!(vec[i]>=min_val && vec[i] <= max_val))
72       {
73          if (vec[i] < min_val)
74             vec[i] = min_val;
75          else if (vec[i] > max_val)
76             vec[i] = max_val;
77          else /* Has to be NaN */
78             vec[i] = 0;
79       }
80    }
81 }
82 
highpass(const spx_word16_t * x,spx_word16_t * y,int len,int filtID,spx_mem_t * mem)83 void highpass(const spx_word16_t *x, spx_word16_t *y, int len, int filtID, spx_mem_t *mem)
84 {
85    int i;
86 #ifdef FIXED_POINT
87    const spx_word16_t Pcoef[5][3] = {{16384, -31313, 14991}, {16384, -31569, 15249}, {16384, -31677, 15328}, {16384, -32313, 15947}, {16384, -22446, 6537}};
88    const spx_word16_t Zcoef[5][3] = {{15672, -31344, 15672}, {15802, -31601, 15802}, {15847, -31694, 15847}, {16162, -32322, 16162}, {14418, -28836, 14418}};
89 #else
90    const spx_word16_t Pcoef[5][3] = {{1.00000f, -1.91120f, 0.91498f}, {1.00000f, -1.92683f, 0.93071f}, {1.00000f, -1.93338f, 0.93553f}, {1.00000f, -1.97226f, 0.97332f}, {1.00000f, -1.37000f, 0.39900f}};
91    const spx_word16_t Zcoef[5][3] = {{0.95654f, -1.91309f, 0.95654f}, {0.96446f, -1.92879f, 0.96446f}, {0.96723f, -1.93445f, 0.96723f}, {0.98645f, -1.97277f, 0.98645f}, {0.88000f, -1.76000f, 0.88000f}};
92 #endif
93    const spx_word16_t *den, *num;
94    if (filtID>4)
95       filtID=4;
96 
97    den = Pcoef[filtID]; num = Zcoef[filtID];
98    /*return;*/
99    for (i=0;i<len;i++)
100    {
101       spx_word16_t yi;
102       spx_word32_t vout = ADD32(MULT16_16(num[0], x[i]),mem[0]);
103       yi = EXTRACT16(SATURATE(PSHR32(vout,14),32767));
104       mem[0] = ADD32(MAC16_16(mem[1], num[1],x[i]), SHL32(MULT16_32_Q15(-den[1],vout),1));
105       mem[1] = ADD32(MULT16_16(num[2],x[i]), SHL32(MULT16_32_Q15(-den[2],vout),1));
106       y[i] = yi;
107    }
108 }
109 
110 #ifdef FIXED_POINT
111 
112 /* FIXME: These functions are ugly and probably introduce too much error */
signal_mul(const spx_sig_t * x,spx_sig_t * y,spx_word32_t scale,int len)113 void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
114 {
115    int i;
116    for (i=0;i<len;i++)
117    {
118       y[i] = SHL32(MULT16_32_Q14(EXTRACT16(SHR32(x[i],7)),scale),7);
119    }
120 }
121 
signal_div(const spx_word16_t * x,spx_word16_t * y,spx_word32_t scale,int len)122 void signal_div(const spx_word16_t *x, spx_word16_t *y, spx_word32_t scale, int len)
123 {
124    int i;
125    if (scale > SHL32(EXTEND32(SIG_SCALING), 8))
126    {
127       spx_word16_t scale_1;
128       scale = PSHR32(scale, SIG_SHIFT);
129       scale_1 = EXTRACT16(PDIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale));
130       for (i=0;i<len;i++)
131       {
132          y[i] = MULT16_16_P15(scale_1, x[i]);
133       }
134    } else if (scale > SHR32(EXTEND32(SIG_SCALING), 2)) {
135       spx_word16_t scale_1;
136       scale = PSHR32(scale, SIG_SHIFT-5);
137       scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
138       for (i=0;i<len;i++)
139       {
140          y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),8);
141       }
142    } else {
143       spx_word16_t scale_1;
144       scale = PSHR32(scale, SIG_SHIFT-7);
145       if (scale < 5)
146          scale = 5;
147       scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
148       for (i=0;i<len;i++)
149       {
150          y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),6);
151       }
152    }
153 }
154 
155 #else
156 
signal_mul(const spx_sig_t * x,spx_sig_t * y,spx_word32_t scale,int len)157 void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
158 {
159    int i;
160    for (i=0;i<len;i++)
161       y[i] = scale*x[i];
162 }
163 
signal_div(const spx_sig_t * x,spx_sig_t * y,spx_word32_t scale,int len)164 void signal_div(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
165 {
166    int i;
167    float scale_1 = 1/scale;
168    for (i=0;i<len;i++)
169       y[i] = scale_1*x[i];
170 }
171 #endif
172 
173 
174 
175 #ifdef FIXED_POINT
176 
177 
178 
compute_rms(const spx_sig_t * x,int len)179 spx_word16_t compute_rms(const spx_sig_t *x, int len)
180 {
181    int i;
182    spx_word32_t sum=0;
183    spx_sig_t max_val=1;
184    int sig_shift;
185 
186    for (i=0;i<len;i++)
187    {
188       spx_sig_t tmp = x[i];
189       if (tmp<0)
190          tmp = -tmp;
191       if (tmp > max_val)
192          max_val = tmp;
193    }
194 
195    sig_shift=0;
196    while (max_val>16383)
197    {
198       sig_shift++;
199       max_val >>= 1;
200    }
201 
202    for (i=0;i<len;i+=4)
203    {
204       spx_word32_t sum2=0;
205       spx_word16_t tmp;
206       tmp = EXTRACT16(SHR32(x[i],sig_shift));
207       sum2 = MAC16_16(sum2,tmp,tmp);
208       tmp = EXTRACT16(SHR32(x[i+1],sig_shift));
209       sum2 = MAC16_16(sum2,tmp,tmp);
210       tmp = EXTRACT16(SHR32(x[i+2],sig_shift));
211       sum2 = MAC16_16(sum2,tmp,tmp);
212       tmp = EXTRACT16(SHR32(x[i+3],sig_shift));
213       sum2 = MAC16_16(sum2,tmp,tmp);
214       sum = ADD32(sum,SHR32(sum2,6));
215    }
216 
217    return EXTRACT16(PSHR32(SHL32(EXTEND32(spx_sqrt(DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
218 }
219 
compute_rms16(const spx_word16_t * x,int len)220 spx_word16_t compute_rms16(const spx_word16_t *x, int len)
221 {
222    int i;
223    spx_word16_t max_val=10;
224 
225    for (i=0;i<len;i++)
226    {
227       spx_sig_t tmp = x[i];
228       if (tmp<0)
229          tmp = -tmp;
230       if (tmp > max_val)
231          max_val = tmp;
232    }
233    if (max_val>16383)
234    {
235       spx_word32_t sum=0;
236       for (i=0;i<len;i+=4)
237       {
238          spx_word32_t sum2=0;
239          sum2 = MAC16_16(sum2,SHR16(x[i],1),SHR16(x[i],1));
240          sum2 = MAC16_16(sum2,SHR16(x[i+1],1),SHR16(x[i+1],1));
241          sum2 = MAC16_16(sum2,SHR16(x[i+2],1),SHR16(x[i+2],1));
242          sum2 = MAC16_16(sum2,SHR16(x[i+3],1),SHR16(x[i+3],1));
243          sum = ADD32(sum,SHR32(sum2,6));
244       }
245       return SHL16(spx_sqrt(DIV32(sum,len)),4);
246    } else {
247       spx_word32_t sum=0;
248       int sig_shift=0;
249       if (max_val < 8192)
250          sig_shift=1;
251       if (max_val < 4096)
252          sig_shift=2;
253       if (max_val < 2048)
254          sig_shift=3;
255       for (i=0;i<len;i+=4)
256       {
257          spx_word32_t sum2=0;
258          sum2 = MAC16_16(sum2,SHL16(x[i],sig_shift),SHL16(x[i],sig_shift));
259          sum2 = MAC16_16(sum2,SHL16(x[i+1],sig_shift),SHL16(x[i+1],sig_shift));
260          sum2 = MAC16_16(sum2,SHL16(x[i+2],sig_shift),SHL16(x[i+2],sig_shift));
261          sum2 = MAC16_16(sum2,SHL16(x[i+3],sig_shift),SHL16(x[i+3],sig_shift));
262          sum = ADD32(sum,SHR32(sum2,6));
263       }
264       return SHL16(spx_sqrt(DIV32(sum,len)),3-sig_shift);
265    }
266 }
267 
268 #ifndef OVERRIDE_NORMALIZE16
normalize16(const spx_sig_t * x,spx_word16_t * y,spx_sig_t max_scale,int len)269 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
270 {
271    int i;
272    spx_sig_t max_val=1;
273    int sig_shift;
274 
275    for (i=0;i<len;i++)
276    {
277       spx_sig_t tmp = x[i];
278       if (tmp<0)
279          tmp = NEG32(tmp);
280       if (tmp >= max_val)
281          max_val = tmp;
282    }
283 
284    sig_shift=0;
285    while (max_val>max_scale)
286    {
287       sig_shift++;
288       max_val >>= 1;
289    }
290 
291    for (i=0;i<len;i++)
292       y[i] = EXTRACT16(SHR32(x[i], sig_shift));
293 
294    return sig_shift;
295 }
296 #endif
297 
298 #else
299 
compute_rms(const spx_sig_t * x,int len)300 spx_word16_t compute_rms(const spx_sig_t *x, int len)
301 {
302    int i;
303    float sum=0;
304    for (i=0;i<len;i++)
305    {
306       sum += x[i]*x[i];
307    }
308    return sqrt(.1+sum/len);
309 }
compute_rms16(const spx_word16_t * x,int len)310 spx_word16_t compute_rms16(const spx_word16_t *x, int len)
311 {
312    return compute_rms(x, len);
313 }
314 #endif
315 
316 
317 
318 #ifndef OVERRIDE_FILTER_MEM16
filter_mem16(const spx_word16_t * x,const spx_coef_t * num,const spx_coef_t * den,spx_word16_t * y,int N,int ord,spx_mem_t * mem,char * stack)319 void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
320 {
321    int i,j;
322    spx_word16_t xi,yi,nyi;
323    for (i=0;i<N;i++)
324    {
325       xi= x[i];
326       yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
327       nyi = NEG16(yi);
328       for (j=0;j<ord-1;j++)
329       {
330          mem[j] = MAC16_16(MAC16_16(mem[j+1], num[j],xi), den[j],nyi);
331       }
332       mem[ord-1] = ADD32(MULT16_16(num[ord-1],xi), MULT16_16(den[ord-1],nyi));
333       y[i] = yi;
334    }
335 }
336 #endif
337 
338 #ifndef OVERRIDE_IIR_MEM16
iir_mem16(const spx_word16_t * x,const spx_coef_t * den,spx_word16_t * y,int N,int ord,spx_mem_t * mem,char * stack)339 void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
340 {
341    int i,j;
342    spx_word16_t yi,nyi;
343 
344    for (i=0;i<N;i++)
345    {
346       yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
347       nyi = NEG16(yi);
348       for (j=0;j<ord-1;j++)
349       {
350          mem[j] = MAC16_16(mem[j+1],den[j],nyi);
351       }
352       mem[ord-1] = MULT16_16(den[ord-1],nyi);
353       y[i] = yi;
354    }
355 }
356 #endif
357 
358 #ifndef OVERRIDE_FIR_MEM16
fir_mem16(const spx_word16_t * x,const spx_coef_t * num,spx_word16_t * y,int N,int ord,spx_mem_t * mem,char * stack)359 void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
360 {
361    int i,j;
362    spx_word16_t xi,yi;
363 
364    for (i=0;i<N;i++)
365    {
366       xi=x[i];
367       yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
368       for (j=0;j<ord-1;j++)
369       {
370          mem[j] = MAC16_16(mem[j+1], num[j],xi);
371       }
372       mem[ord-1] = MULT16_16(num[ord-1],xi);
373       y[i] = yi;
374    }
375 }
376 #endif
377 
378 
syn_percep_zero16(const spx_word16_t * xx,const spx_coef_t * ak,const spx_coef_t * awk1,const spx_coef_t * awk2,spx_word16_t * y,int N,int ord,char * stack)379 void syn_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
380 {
381    int i;
382    VARDECL(spx_mem_t *mem);
383    ALLOC(mem, ord, spx_mem_t);
384    for (i=0;i<ord;i++)
385       mem[i]=0;
386    iir_mem16(xx, ak, y, N, ord, mem, stack);
387    for (i=0;i<ord;i++)
388       mem[i]=0;
389    filter_mem16(y, awk1, awk2, y, N, ord, mem, stack);
390 }
residue_percep_zero16(const spx_word16_t * xx,const spx_coef_t * ak,const spx_coef_t * awk1,const spx_coef_t * awk2,spx_word16_t * y,int N,int ord,char * stack)391 void residue_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
392 {
393    int i;
394    VARDECL(spx_mem_t *mem);
395    ALLOC(mem, ord, spx_mem_t);
396    for (i=0;i<ord;i++)
397       mem[i]=0;
398    filter_mem16(xx, ak, awk1, y, N, ord, mem, stack);
399    for (i=0;i<ord;i++)
400       mem[i]=0;
401    fir_mem16(y, awk2, y, N, ord, mem, stack);
402 }
403 
404 
405 #ifndef OVERRIDE_COMPUTE_IMPULSE_RESPONSE
compute_impulse_response(const spx_coef_t * ak,const spx_coef_t * awk1,const spx_coef_t * awk2,spx_word16_t * y,int N,int ord,char * stack)406 void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
407 {
408    int i,j;
409    spx_word16_t y1, ny1i, ny2i;
410    VARDECL(spx_mem_t *mem1);
411    VARDECL(spx_mem_t *mem2);
412    ALLOC(mem1, ord, spx_mem_t);
413    ALLOC(mem2, ord, spx_mem_t);
414 
415    y[0] = LPC_SCALING;
416    for (i=0;i<ord;i++)
417       y[i+1] = awk1[i];
418    i++;
419    for (;i<N;i++)
420       y[i] = VERY_SMALL;
421    for (i=0;i<ord;i++)
422       mem1[i] = mem2[i] = 0;
423    for (i=0;i<N;i++)
424    {
425       y1 = ADD16(y[i], EXTRACT16(PSHR32(mem1[0],LPC_SHIFT)));
426       ny1i = NEG16(y1);
427       y[i] = PSHR32(ADD32(SHL32(EXTEND32(y1),LPC_SHIFT+1),mem2[0]),LPC_SHIFT);
428       ny2i = NEG16(y[i]);
429       for (j=0;j<ord-1;j++)
430       {
431          mem1[j] = MAC16_16(mem1[j+1], awk2[j],ny1i);
432          mem2[j] = MAC16_16(mem2[j+1], ak[j],ny2i);
433       }
434       mem1[ord-1] = MULT16_16(awk2[ord-1],ny1i);
435       mem2[ord-1] = MULT16_16(ak[ord-1],ny2i);
436    }
437 }
438 #endif
439 
440 /* Decomposes a signal into low-band and high-band using a QMF */
qmf_decomp(const spx_word16_t * xx,const spx_word16_t * aa,spx_word16_t * y1,spx_word16_t * y2,int N,int M,spx_word16_t * mem,char * stack)441 void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_word16_t *y1, spx_word16_t *y2, int N, int M, spx_word16_t *mem, char *stack)
442 {
443    int i,j,k,M2;
444    VARDECL(spx_word16_t *a);
445    VARDECL(spx_word16_t *x);
446    spx_word16_t *x2;
447 
448    ALLOC(a, M, spx_word16_t);
449    ALLOC(x, N+M-1, spx_word16_t);
450    x2=x+M-1;
451    M2=M>>1;
452    for (i=0;i<M;i++)
453       a[M-i-1]= aa[i];
454    for (i=0;i<M-1;i++)
455       x[i]=mem[M-i-2];
456    for (i=0;i<N;i++)
457       x[i+M-1]=SHR16(xx[i],1);
458    for (i=0;i<M-1;i++)
459       mem[i]=SHR16(xx[N-i-1],1);
460    for (i=0,k=0;i<N;i+=2,k++)
461    {
462       spx_word32_t y1k=0, y2k=0;
463       for (j=0;j<M2;j++)
464       {
465          y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
466          y2k=SUB32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
467          j++;
468          y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
469          y2k=ADD32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
470       }
471       y1[k] = EXTRACT16(SATURATE(PSHR32(y1k,15),32767));
472       y2[k] = EXTRACT16(SATURATE(PSHR32(y2k,15),32767));
473    }
474 }
475 
476 /* Re-synthesised a signal from the QMF low-band and high-band signals */
qmf_synth(const spx_word16_t * x1,const spx_word16_t * x2,const spx_word16_t * a,spx_word16_t * y,int N,int M,spx_word16_t * mem1,spx_word16_t * mem2,char * stack)477 void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word16_t *mem1, spx_word16_t *mem2, char *stack)
478    /* assumptions:
479       all odd x[i] are zero -- well, actually they are left out of the array now
480       N and M are multiples of 4 */
481 {
482    int i, j;
483    int M2, N2;
484    VARDECL(spx_word16_t *xx1);
485    VARDECL(spx_word16_t *xx2);
486 
487    M2 = M>>1;
488    N2 = N>>1;
489    ALLOC(xx1, M2+N2, spx_word16_t);
490    ALLOC(xx2, M2+N2, spx_word16_t);
491 
492    for (i = 0; i < N2; i++)
493       xx1[i] = x1[N2-1-i];
494    for (i = 0; i < M2; i++)
495       xx1[N2+i] = mem1[2*i+1];
496    for (i = 0; i < N2; i++)
497       xx2[i] = x2[N2-1-i];
498    for (i = 0; i < M2; i++)
499       xx2[N2+i] = mem2[2*i+1];
500 
501    for (i = 0; i < N2; i += 2) {
502       spx_sig_t y0, y1, y2, y3;
503       spx_word16_t x10, x20;
504 
505       y0 = y1 = y2 = y3 = 0;
506       x10 = xx1[N2-2-i];
507       x20 = xx2[N2-2-i];
508 
509       for (j = 0; j < M2; j += 2) {
510          spx_word16_t x11, x21;
511          spx_word16_t a0, a1;
512 
513          a0 = a[2*j];
514          a1 = a[2*j+1];
515          x11 = xx1[N2-1+j-i];
516          x21 = xx2[N2-1+j-i];
517 
518 #ifdef FIXED_POINT
519          /* We multiply twice by the same coef to avoid overflows */
520          y0 = MAC16_16(MAC16_16(y0, a0, x11), NEG16(a0), x21);
521          y1 = MAC16_16(MAC16_16(y1, a1, x11), a1, x21);
522          y2 = MAC16_16(MAC16_16(y2, a0, x10), NEG16(a0), x20);
523          y3 = MAC16_16(MAC16_16(y3, a1, x10), a1, x20);
524 #else
525          y0 = ADD32(y0,MULT16_16(a0, x11-x21));
526          y1 = ADD32(y1,MULT16_16(a1, x11+x21));
527          y2 = ADD32(y2,MULT16_16(a0, x10-x20));
528          y3 = ADD32(y3,MULT16_16(a1, x10+x20));
529 #endif
530          a0 = a[2*j+2];
531          a1 = a[2*j+3];
532          x10 = xx1[N2+j-i];
533          x20 = xx2[N2+j-i];
534 
535 #ifdef FIXED_POINT
536          /* We multiply twice by the same coef to avoid overflows */
537          y0 = MAC16_16(MAC16_16(y0, a0, x10), NEG16(a0), x20);
538          y1 = MAC16_16(MAC16_16(y1, a1, x10), a1, x20);
539          y2 = MAC16_16(MAC16_16(y2, a0, x11), NEG16(a0), x21);
540          y3 = MAC16_16(MAC16_16(y3, a1, x11), a1, x21);
541 #else
542          y0 = ADD32(y0,MULT16_16(a0, x10-x20));
543          y1 = ADD32(y1,MULT16_16(a1, x10+x20));
544          y2 = ADD32(y2,MULT16_16(a0, x11-x21));
545          y3 = ADD32(y3,MULT16_16(a1, x11+x21));
546 #endif
547       }
548 #ifdef FIXED_POINT
549       y[2*i] = EXTRACT16(SATURATE32(PSHR32(y0,15),32767));
550       y[2*i+1] = EXTRACT16(SATURATE32(PSHR32(y1,15),32767));
551       y[2*i+2] = EXTRACT16(SATURATE32(PSHR32(y2,15),32767));
552       y[2*i+3] = EXTRACT16(SATURATE32(PSHR32(y3,15),32767));
553 #else
554       /* Normalize up explicitly if we're in float */
555       y[2*i] = 2.f*y0;
556       y[2*i+1] = 2.f*y1;
557       y[2*i+2] = 2.f*y2;
558       y[2*i+3] = 2.f*y3;
559 #endif
560    }
561 
562    for (i = 0; i < M2; i++)
563       mem1[2*i+1] = xx1[i];
564    for (i = 0; i < M2; i++)
565       mem2[2*i+1] = xx2[i];
566 }
567 
568 #ifdef FIXED_POINT
569 #if 0
570 const spx_word16_t shift_filt[3][7] = {{-33,    1043,   -4551,   19959,   19959,   -4551,    1043},
571                                  {-98,    1133,   -4425,   29179,    8895,   -2328,     444},
572                                  {444,   -2328,    8895,   29179,   -4425,    1133,     -98}};
573 #else
574 const spx_word16_t shift_filt[3][7] = {{-390,    1540,   -4993,   20123,   20123,   -4993,    1540},
575                                 {-1064,    2817,   -6694,   31589,    6837,    -990,    -209},
576                                  {-209,    -990,    6837,   31589,   -6694,    2817,   -1064}};
577 #endif
578 #else
579 #if 0
580 const float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02},
581                           {-0.0029937, 0.0345613, -0.1350474, 0.8904793, 0.2714479, -0.0710304, 0.0135403},
582                           {0.0135403, -0.0710304, 0.2714479, 0.8904793, -0.1350474, 0.0345613,  -0.0029937}};
583 #else
584 const float shift_filt[3][7] = {{-0.011915f, 0.046995f, -0.152373f, 0.614108f, 0.614108f, -0.152373f, 0.046995f},
585                           {-0.0324855f, 0.0859768f, -0.2042986f, 0.9640297f, 0.2086420f, -0.0302054f, -0.0063646f},
586                           {-0.0063646f, -0.0302054f, 0.2086420f, 0.9640297f, -0.2042986f, 0.0859768f, -0.0324855f}};
587 #endif
588 #endif
589 
interp_pitch(spx_word16_t * exc,spx_word16_t * interp,int pitch,int len)590 int interp_pitch(
591 spx_word16_t *exc,          /*decoded excitation*/
592 spx_word16_t *interp,          /*decoded excitation*/
593 int pitch,               /*pitch period*/
594 int len
595 )
596 {
597    int i,j,k;
598    spx_word32_t corr[4][7];
599    spx_word32_t maxcorr;
600    int maxi, maxj;
601    for (i=0;i<7;i++)
602    {
603       corr[0][i] = inner_prod(exc, exc-pitch-3+i, len);
604    }
605    for (i=0;i<3;i++)
606    {
607       for (j=0;j<7;j++)
608       {
609          int i1, i2;
610          spx_word32_t tmp=0;
611          i1 = 3-j;
612          if (i1<0)
613             i1 = 0;
614          i2 = 10-j;
615          if (i2>7)
616             i2 = 7;
617          for (k=i1;k<i2;k++)
618             tmp += MULT16_32_Q15(shift_filt[i][k],corr[0][j+k-3]);
619          corr[i+1][j] = tmp;
620       }
621    }
622    maxi=maxj=0;
623    maxcorr = corr[0][0];
624    for (i=0;i<4;i++)
625    {
626       for (j=0;j<7;j++)
627       {
628          if (corr[i][j] > maxcorr)
629          {
630             maxcorr = corr[i][j];
631             maxi=i;
632             maxj=j;
633          }
634       }
635    }
636    for (i=0;i<len;i++)
637    {
638       spx_word32_t tmp = 0;
639       if (maxi>0)
640       {
641          for (k=0;k<7;k++)
642          {
643             tmp += MULT16_16(exc[i-(pitch-maxj+3)+k-3],shift_filt[maxi-1][k]);
644          }
645       } else {
646          tmp = SHL32(exc[i-(pitch-maxj+3)],15);
647       }
648       interp[i] = PSHR32(tmp,15);
649    }
650    return pitch-maxj+3;
651 }
652 
multicomb(spx_word16_t * exc,spx_word16_t * new_exc,spx_coef_t * ak,int p,int nsf,int pitch,int max_pitch,spx_word16_t comb_gain,char * stack)653 void multicomb(
654 spx_word16_t *exc,          /*decoded excitation*/
655 spx_word16_t *new_exc,      /*enhanced excitation*/
656 spx_coef_t *ak,           /*LPC filter coefs*/
657 int p,               /*LPC order*/
658 int nsf,             /*sub-frame size*/
659 int pitch,           /*pitch period*/
660 int max_pitch,
661 spx_word16_t  comb_gain,    /*gain of comb filter*/
662 char *stack
663 )
664 {
665    int i;
666    VARDECL(spx_word16_t *iexc);
667    spx_word16_t old_ener, new_ener;
668    int corr_pitch;
669 
670    spx_word16_t iexc0_mag, iexc1_mag, exc_mag;
671    spx_word32_t corr0, corr1;
672    spx_word16_t gain0, gain1;
673    spx_word16_t pgain1, pgain2;
674    spx_word16_t c1, c2;
675    spx_word16_t g1, g2;
676    spx_word16_t ngain;
677    spx_word16_t gg1, gg2;
678 #ifdef FIXED_POINT
679    int scaledown=0;
680 #endif
681 #if 0 /* Set to 1 to enable full pitch search */
682    int nol_pitch[6];
683    spx_word16_t nol_pitch_coef[6];
684    spx_word16_t ol_pitch_coef;
685    open_loop_nbest_pitch(exc, 20, 120, nsf,
686                          nol_pitch, nol_pitch_coef, 6, stack);
687    corr_pitch=nol_pitch[0];
688    ol_pitch_coef = nol_pitch_coef[0];
689    /*Try to remove pitch multiples*/
690    for (i=1;i<6;i++)
691    {
692 #ifdef FIXED_POINT
693       if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],19661)) &&
694 #else
695       if ((nol_pitch_coef[i]>.6*nol_pitch_coef[0]) &&
696 #endif
697          (ABS(2*nol_pitch[i]-corr_pitch)<=2 || ABS(3*nol_pitch[i]-corr_pitch)<=3 ||
698          ABS(4*nol_pitch[i]-corr_pitch)<=4 || ABS(5*nol_pitch[i]-corr_pitch)<=5))
699       {
700          corr_pitch = nol_pitch[i];
701       }
702    }
703 #else
704    corr_pitch = pitch;
705 #endif
706 
707    ALLOC(iexc, 2*nsf, spx_word16_t);
708 
709    interp_pitch(exc, iexc, corr_pitch, 80);
710    if (corr_pitch>max_pitch)
711       interp_pitch(exc, iexc+nsf, 2*corr_pitch, 80);
712    else
713       interp_pitch(exc, iexc+nsf, -corr_pitch, 80);
714 
715 #ifdef FIXED_POINT
716    for (i=0;i<nsf;i++)
717    {
718       if (ABS16(exc[i])>16383)
719       {
720          scaledown = 1;
721          break;
722       }
723    }
724    if (scaledown)
725    {
726       for (i=0;i<nsf;i++)
727          exc[i] = SHR16(exc[i],1);
728       for (i=0;i<2*nsf;i++)
729          iexc[i] = SHR16(iexc[i],1);
730    }
731 #endif
732    /*interp_pitch(exc, iexc+2*nsf, 2*corr_pitch, 80);*/
733 
734    /*printf ("%d %d %f\n", pitch, corr_pitch, max_corr*ener_1);*/
735    iexc0_mag = spx_sqrt(1000+inner_prod(iexc,iexc,nsf));
736    iexc1_mag = spx_sqrt(1000+inner_prod(iexc+nsf,iexc+nsf,nsf));
737    exc_mag = spx_sqrt(1+inner_prod(exc,exc,nsf));
738    corr0  = inner_prod(iexc,exc,nsf);
739    if (corr0<0)
740       corr0=0;
741    corr1 = inner_prod(iexc+nsf,exc,nsf);
742    if (corr1<0)
743       corr1=0;
744 #ifdef FIXED_POINT
745    /* Doesn't cost much to limit the ratio and it makes the rest easier */
746    if (SHL32(EXTEND32(iexc0_mag),6) < EXTEND32(exc_mag))
747       iexc0_mag = ADD16(1,PSHR16(exc_mag,6));
748    if (SHL32(EXTEND32(iexc1_mag),6) < EXTEND32(exc_mag))
749       iexc1_mag = ADD16(1,PSHR16(exc_mag,6));
750 #endif
751    if (corr0 > MULT16_16(iexc0_mag,exc_mag))
752       pgain1 = QCONST16(1., 14);
753    else
754       pgain1 = PDIV32_16(SHL32(PDIV32(corr0, exc_mag),14),iexc0_mag);
755    if (corr1 > MULT16_16(iexc1_mag,exc_mag))
756       pgain2 = QCONST16(1., 14);
757    else
758       pgain2 = PDIV32_16(SHL32(PDIV32(corr1, exc_mag),14),iexc1_mag);
759    gg1 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc0_mag);
760    gg2 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc1_mag);
761    if (comb_gain>0)
762    {
763 #ifdef FIXED_POINT
764       c1 = (MULT16_16_Q15(QCONST16(.4,15),comb_gain)+QCONST16(.07,15));
765       c2 = QCONST16(.5,15)+MULT16_16_Q14(QCONST16(1.72,14),(c1-QCONST16(.07,15)));
766 #else
767       c1 = .4*comb_gain+.07;
768       c2 = .5+1.72*(c1-.07);
769 #endif
770    } else
771    {
772       c1=c2=0;
773    }
774 #ifdef FIXED_POINT
775    g1 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain1),pgain1);
776    g2 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain2),pgain2);
777 #else
778    g1 = 1-c2*pgain1*pgain1;
779    g2 = 1-c2*pgain2*pgain2;
780 #endif
781    if (g1<c1)
782       g1 = c1;
783    if (g2<c1)
784       g2 = c1;
785    g1 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g1);
786    g2 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g2);
787    if (corr_pitch>max_pitch)
788    {
789       gain0 = MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q14(g1,gg1));
790       gain1 = MULT16_16_Q15(QCONST16(.3,15),MULT16_16_Q14(g2,gg2));
791    } else {
792       gain0 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g1,gg1));
793       gain1 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g2,gg2));
794    }
795    for (i=0;i<nsf;i++)
796       new_exc[i] = ADD16(exc[i], EXTRACT16(PSHR32(ADD32(MULT16_16(gain0,iexc[i]), MULT16_16(gain1,iexc[i+nsf])),8)));
797    /* FIXME: compute_rms16 is currently not quite accurate enough (but close) */
798    new_ener = compute_rms16(new_exc, nsf);
799    old_ener = compute_rms16(exc, nsf);
800 
801    if (old_ener < 1)
802       old_ener = 1;
803    if (new_ener < 1)
804       new_ener = 1;
805    if (old_ener > new_ener)
806       old_ener = new_ener;
807    ngain = PDIV32_16(SHL32(EXTEND32(old_ener),14),new_ener);
808 
809    for (i=0;i<nsf;i++)
810       new_exc[i] = MULT16_16_Q14(ngain, new_exc[i]);
811 #ifdef FIXED_POINT
812    if (scaledown)
813    {
814       for (i=0;i<nsf;i++)
815          exc[i] = SHL16(exc[i],1);
816       for (i=0;i<nsf;i++)
817          new_exc[i] = SHL16(SATURATE16(new_exc[i],16383),1);
818    }
819 #endif
820 }
821 
822