1 /* Copyright (C) 2003-2006 Jean-Marc Valin
2 
3    File: mdf.c
4    Echo canceller based on the MDF algorithm (see below)
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions are
8    met:
9 
10    1. Redistributions of source code must retain the above copyright notice,
11    this list of conditions and the following disclaimer.
12 
13    2. Redistributions in binary form must reproduce the above copyright
14    notice, this list of conditions and the following disclaimer in the
15    documentation and/or other materials provided with the distribution.
16 
17    3. The name of the author may not be used to endorse or promote products
18    derived from this software without specific prior written permission.
19 
20    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30    POSSIBILITY OF SUCH DAMAGE.
31 */
32 
33 /*
34    The echo canceller is based on the MDF algorithm described in:
35 
36    J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
37    IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
38    February 1990.
39 
40    We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
41    double-talk is achieved using a variable learning rate as described in:
42 
43    Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
44    Cancellation With Double-Talk. Submitted to IEEE Transactions on Speech
45    and Audio Processing, 2006.
46 
47    About the fixed-point version:
48    All the signals are represented with 16-bit words. The filter weights
49    are represented with 32-bit words, but only the top 16 bits are used
50    in most cases. The lower 16 bits are completely reliable (due to the
51    fact that the update is done only on the top bits), but help in the
52    adaptation -- probably by removing a "threshold effect" due to
53    quantization when the gradient is small.
54 
55    Another kludge that seems to work good: when performing the weight
56    update, we only move half the way toward the "goal" this seems to
57    reduce the effect of quantization noise in the update phase.
58 
59  */
60 #ifdef HAVE_CONFIG_H
61 #include "config.h"
62 #endif
63 
64 #ifdef _WIN32
65 #pragma warning(disable:4305)
66 #pragma warning(disable:4127)
67 #endif
68 
69 
70 #ifdef _WIN32
71 #pragma warning(disable:4244)
72 #endif
73 
74 #include "misc.h"
75 #include "speex_echo.h"
76 #include "smallft.h"
77 #include "fftwrap.h"
78 #include "pseudofloat.h"
79 #include "math_approx.h"
80 
81 #ifndef M_PI
82 #define M_PI 3.14159265358979323846
83 #endif
84 
85 #define min(a,b) ((a)<(b) ? (a) : (b))
86 #define max(a,b) ((a)>(b) ? (a) : (b))
87 
88 #ifdef FIXED_POINT
89 #define WEIGHT_SHIFT 11
90 #define NORMALIZE_SCALEDOWN 5
91 #define NORMALIZE_SCALEUP 3
92 #else
93 #define WEIGHT_SHIFT 0
94 #endif
95 
96 #ifdef FIXED_POINT
97 static const spx_float_t MAX_ALPHA = ((spx_float_t){16777, -21});
98 static const spx_float_t ALPHA0 = ((spx_float_t){26214, -19});
99 static const spx_float_t MIN_LEAK = ((spx_float_t){16777, -24});
100 #define TOP16(x) ((x)>>16)
101 #else
102 static const spx_float_t MAX_ALPHA = .008f;
103 static const spx_float_t ALPHA0 = .05f;
104 static const spx_float_t MIN_LEAK = .001f;
105 #define TOP16(x) (x)
106 #endif
107 
108 
109 /** Speex echo cancellation state. */
110 struct SpeexEchoState_ {
111    int frame_size;           /**< Number of samples processed each time */
112    int window_size;
113    int M;
114    int cancel_count;
115    int adapted;
116    spx_int32_t sampling_rate;
117    spx_word16_t spec_average;
118    spx_word16_t beta0;
119    spx_word16_t beta_max;
120    spx_word32_t sum_adapt;
121    spx_word16_t *e;
122    spx_word16_t *x;
123    spx_word16_t *X;
124    spx_word16_t *d;
125    spx_word16_t *y;
126    spx_word16_t *last_y;
127    spx_word32_t *Yps;
128    spx_word16_t *Y;
129    spx_word16_t *E;
130    spx_word32_t *PHI;
131    spx_word32_t *W;
132    spx_word32_t *power;
133    spx_float_t *power_1;
134    spx_word32_t *Rf;
135    spx_word32_t *Yf;
136    spx_word32_t *Xf;
137    spx_word32_t *Eh;
138    spx_word32_t *Yh;
139    spx_float_t Pey;
140    spx_float_t Pyy;
141    spx_word16_t *window;
142    void *fft_table;
143    spx_word16_t memX, memD, memE;
144    spx_word16_t preemph;
145 };
146 
inner_prod(const spx_word16_t * x,const spx_word16_t * y,int len)147 static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
148 {
149    spx_word32_t sum=0;
150    len >>= 2;
151    while(len--)
152    {
153       spx_word32_t part=0;
154       part = MAC16_16(part,*x++,*y++);
155       part = MAC16_16(part,*x++,*y++);
156       part = MAC16_16(part,*x++,*y++);
157       part = MAC16_16(part,*x++,*y++);
158       /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
159       sum = ADD32(sum,SHR32(part,6));
160    }
161    return sum;
162 }
163 
164 /** Compute power spectrum of a half-complex (packed) vector */
power_spectrum(spx_word16_t * X,spx_word32_t * ps,int N)165 static void power_spectrum(spx_word16_t *X, spx_word32_t *ps, int N)
166 {
167    int i, j;
168    ps[0]=MULT16_16(X[0],X[0]);
169    for (i=1,j=1;i<N-1;i+=2,j++)
170    {
171       ps[j] =  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
172    }
173    ps[j]=MULT16_16(X[i],X[i]);
174 }
175 
176 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
177 #ifdef FIXED_POINT
spectral_mul_accum(spx_word16_t * X,spx_word32_t * Y,spx_word16_t * acc,int N,int M)178 static inline void spectral_mul_accum(spx_word16_t *X, spx_word32_t *Y, spx_word16_t *acc, int N, int M)
179 {
180    int i,j;
181    spx_word32_t tmp1=0,tmp2=0;
182    for (j=0;j<M;j++)
183    {
184       tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
185    }
186    acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
187    for (i=1;i<N-1;i+=2)
188    {
189       tmp1 = tmp2 = 0;
190       for (j=0;j<M;j++)
191       {
192          tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
193          tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
194       }
195       acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
196       acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
197    }
198    tmp1 = tmp2 = 0;
199    for (j=0;j<M;j++)
200    {
201       tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
202    }
203    acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
204 }
205 #else
spectral_mul_accum(spx_word16_t * X,spx_word32_t * Y,spx_word16_t * acc,int N,int M)206 static void spectral_mul_accum(spx_word16_t *X, spx_word32_t *Y, spx_word16_t *acc, int N, int M)
207 {
208    int i,j;
209    for (i=0;i<N;i++)
210       acc[i] = 0;
211    for (j=0;j<M;j++)
212    {
213       acc[0] += X[0]*Y[0];
214       for (i=1;i<N-1;i+=2)
215       {
216          acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
217          acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
218       }
219       acc[i] += X[i]*Y[i];
220       X += N;
221       Y += N;
222    }
223 }
224 #endif
225 
226 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
weighted_spectral_mul_conj(spx_float_t * w,spx_word16_t * X,spx_word16_t * Y,spx_word32_t * prod,int N)227 static void weighted_spectral_mul_conj(spx_float_t *w, spx_word16_t *X, spx_word16_t *Y, spx_word32_t *prod, int N)
228 {
229    int i, j;
230    prod[0] = FLOAT_MUL32(w[0],MULT16_16(X[0],Y[0]));
231    for (i=1,j=1;i<N-1;i+=2,j++)
232    {
233       prod[i] = FLOAT_MUL32(w[j],MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
234       prod[i+1] = FLOAT_MUL32(w[j],MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
235    }
236    prod[i] = FLOAT_MUL32(w[j],MULT16_16(X[i],Y[i]));
237 }
238 
239 
240 /** Creates a new echo canceller state */
speex_echo_state_init(int frame_size,int filter_length)241 SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
242 {
243    int i,N,M;
244    SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
245 
246    st->frame_size = frame_size;
247    st->window_size = 2*frame_size;
248    N = st->window_size;
249    M = st->M = (filter_length+st->frame_size-1)/frame_size;
250    st->cancel_count=0;
251    st->sum_adapt = 0;
252    /* FIXME: Make that an init option (new API call?) */
253    st->sampling_rate = 8000;
254    st->spec_average = DIV32_16(SHL32(st->frame_size, 15), st->sampling_rate);
255 #ifdef FIXED_POINT
256    st->beta0 = DIV32_16(SHL32(st->frame_size, 16), st->sampling_rate);
257    st->beta_max = DIV32_16(SHL32(st->frame_size, 14), st->sampling_rate);
258 #else
259    st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
260    st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
261 #endif
262 
263    st->fft_table = spx_fft_init(N);
264 
265    st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
266    st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
267    st->d = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
268    st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
269    st->Yps = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
270    st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
271    st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
272    st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
273    st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
274    st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
275    st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
276 
277    st->X = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t));
278    st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
279    st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
280    st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t));
281    st->PHI = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t));
282    st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
283    st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
284    st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
285 #ifdef FIXED_POINT
286    for (i=0;i<N>>1;i++)
287    {
288       st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1));
289       st->window[N-i-1] = st->window[i];
290    }
291 #else
292    for (i=0;i<N;i++)
293       st->window[i] = .5-.5*cos(2*M_PI*i/N);
294 #endif
295    for (i=0;i<N*M;i++)
296    {
297       st->W[i] = st->PHI[i] = 0;
298    }
299    st->memX=st->memD=st->memE=0;
300    st->preemph = QCONST16(.9,15);
301    st->adapted = 0;
302    st->Pey = st->Pyy = FLOAT_ONE;
303    return st;
304 }
305 
306 /** Resets echo canceller state */
speex_echo_state_reset(SpeexEchoState * st)307 void speex_echo_state_reset(SpeexEchoState *st)
308 {
309    int i, M, N;
310    st->cancel_count=0;
311    N = st->window_size;
312    M = st->M;
313    for (i=0;i<N*M;i++)
314    {
315       st->W[i] = 0;
316       st->X[i] = 0;
317    }
318    for (i=0;i<=st->frame_size;i++)
319       st->power[i] = 0;
320 
321    st->adapted = 0;
322    st->sum_adapt = 0;
323    st->Pey = st->Pyy = FLOAT_ONE;
324 
325 }
326 
327 /** Destroys an echo canceller state */
speex_echo_state_destroy(SpeexEchoState * st)328 void speex_echo_state_destroy(SpeexEchoState *st)
329 {
330    spx_fft_destroy(st->fft_table);
331 
332    speex_free(st->e);
333    speex_free(st->x);
334    speex_free(st->d);
335    speex_free(st->y);
336    speex_free(st->last_y);
337    speex_free(st->Yps);
338    speex_free(st->Yf);
339    speex_free(st->Rf);
340    speex_free(st->Xf);
341    speex_free(st->Yh);
342    speex_free(st->Eh);
343 
344    speex_free(st->X);
345    speex_free(st->Y);
346    speex_free(st->E);
347    speex_free(st->W);
348    speex_free(st->PHI);
349    speex_free(st->power);
350    speex_free(st->power_1);
351    speex_free(st->window);
352 
353    speex_free(st);
354 }
355 
356 extern int fixed_point;
357 /** Performs echo cancellation on a frame */
speex_echo_cancel(SpeexEchoState * st,short * ref,short * echo,short * out,spx_int32_t * Yout)358 void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, spx_int32_t *Yout)
359 {
360    int i,j;
361    int N,M;
362    spx_word32_t Syy,See;
363    spx_word16_t leak_estimate;
364    spx_word16_t ss, ss_1;
365    spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
366    spx_float_t alpha, alpha_1;
367    spx_word16_t RER;
368    spx_word32_t tmp32;
369    spx_word16_t M_1;
370 
371    N = st->window_size;
372    M = st->M;
373    st->cancel_count++;
374 #ifdef FIXED_POINT
375    ss=DIV32_16(11469,M);
376    ss_1 = SUB16(32767,ss);
377    M_1 = DIV32_16(32767,M);
378 #else
379    ss=.35/M;
380    ss_1 = 1-ss;
381    M_1 = 1.f/M;
382 #endif
383 
384    /* Copy input data to buffer */
385    for (i=0;i<st->frame_size;i++)
386    {
387       st->x[i] = st->x[i+st->frame_size];
388       st->x[i+st->frame_size] = SHL16(SUB16(echo[i], MULT16_16_P15(st->preemph, st->memX)),1);
389       st->memX = echo[i];
390 
391       st->d[i] = st->d[i+st->frame_size];
392       st->d[i+st->frame_size] = SHL16(SUB16(ref[i], MULT16_16_P15(st->preemph, st->memD)),1);
393       st->memD = ref[i];
394    }
395 
396    /* Shift memory: this could be optimized eventually*/
397    for (i=0;i<N*(M-1);i++)
398       st->X[i]=st->X[i+N];
399 
400    /* Convert x (echo input) to frequency domain */
401    spx_fft(st->fft_table, st->x, &st->X[(M-1)*N]);
402 
403    /* Compute filter response Y */
404    spectral_mul_accum(st->X, st->W, st->Y, N, M);
405 
406    spx_ifft(st->fft_table, st->Y, st->y);
407 
408 #if 1
409    spectral_mul_accum(st->X, st->PHI, st->Y, N, M);
410    spx_ifft(st->fft_table, st->Y, st->e);
411 #endif
412 
413    /* Compute error signal (for the output with de-emphasis) */
414    for (i=0;i<st->frame_size;i++)
415    {
416       spx_word32_t tmp_out;
417 #if 1
418       spx_word16_t y = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]);
419       tmp_out = SUB32(EXTEND32(st->d[i+st->frame_size]), EXTEND32(y));
420 #else
421       tmp_out = SUB32(EXTEND32(st->d[i+st->frame_size]), EXTEND32(st->y[i+st->frame_size]));
422 #endif
423 
424       tmp_out = PSHR32(tmp_out,1);
425       /* Saturation */
426       if (tmp_out>32767)
427          tmp_out = 32767;
428       else if (tmp_out<-32768)
429          tmp_out = -32768;
430       tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE)));
431       out[i] = tmp_out;
432       st->memE = tmp_out;
433    }
434 
435    /* Compute error signal (filter update version) */
436    for (i=0;i<st->frame_size;i++)
437    {
438       st->e[i] = 0;
439       st->e[i+st->frame_size] = st->d[i+st->frame_size] - st->y[i+st->frame_size];
440    }
441 
442    /* Compute a bunch of correlations */
443    See = inner_prod(st->e+st->frame_size, st->e+st->frame_size, st->frame_size);
444    See = ADD32(See, SHR32(10000,6));
445    Syy = inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
446 
447    /* Convert error to frequency domain */
448    spx_fft(st->fft_table, st->e, st->E);
449    for (i=0;i<st->frame_size;i++)
450       st->y[i] = 0;
451    spx_fft(st->fft_table, st->y, st->Y);
452 
453    /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
454    power_spectrum(st->E, st->Rf, N);
455    power_spectrum(st->Y, st->Yf, N);
456    power_spectrum(&st->X[(M-1)*N], st->Xf, N);
457 
458    /* Smooth echo energy estimate over time */
459    for (j=0;j<=st->frame_size;j++)
460       st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
461 
462    /* Enable this to compute the power based only on the tail (would need to compute more
463       efficiently to make this really useful */
464    if (0)
465    {
466       float scale2 = .5f/M;
467       for (j=0;j<=st->frame_size;j++)
468          st->power[j] = 0;
469       for (i=0;i<M;i++)
470       {
471          power_spectrum(&st->X[i*N], st->Xf, N);
472          for (j=0;j<=st->frame_size;j++)
473             st->power[j] += scale2*st->Xf[j];
474       }
475    }
476 
477    /* Compute filtered spectra and (cross-)correlations */
478    for (j=st->frame_size;j>=0;j--)
479    {
480       spx_float_t Eh, Yh;
481       Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
482       Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
483       Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
484       Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
485 #ifdef FIXED_POINT
486       st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
487       st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
488 #else
489       st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
490       st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
491 #endif
492    }
493 
494    /* Compute correlation updatete rate */
495    tmp32 = MULT16_32_Q15(st->beta0,Syy);
496    if (tmp32 > MULT16_32_Q15(st->beta_max,See))
497       tmp32 = MULT16_32_Q15(st->beta_max,See);
498    alpha = FLOAT_DIV32(tmp32, See);
499    alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
500    /* Update correlations (recursive average) */
501    st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
502    st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
503    if (FLOAT_LT(st->Pyy, FLOAT_ONE))
504       st->Pyy = FLOAT_ONE;
505    /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
506    if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
507       st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
508    if (FLOAT_GT(st->Pey, st->Pyy))
509       st->Pey = st->Pyy;
510    /* leak_estimate is the limear regression result */
511    leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
512    if (leak_estimate > 16383)
513       leak_estimate = 32767;
514    else
515       leak_estimate = SHL16(leak_estimate,1);
516    /*printf ("%f\n", leak_estimate);*/
517 
518    /* Compute Residual to Error Ratio */
519 #ifdef FIXED_POINT
520    tmp32 = MULT16_32_Q15(leak_estimate,Syy);
521    tmp32 = ADD32(tmp32, SHL32(tmp32,1));
522    if (tmp32 > SHR32(See,1))
523       tmp32 = SHR32(See,1);
524    RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
525 #else
526    RER = 3.*MULT16_32_Q15(leak_estimate,Syy) / See;
527    if (RER > .5)
528       RER = .5;
529 #endif
530 
531    /* We consider that the filter has had minimal adaptation if the following is true*/
532    if (!st->adapted && st->sum_adapt > QCONST32(1,15))
533    {
534       st->adapted = 1;
535    }
536 
537    if (st->adapted)
538    {
539       for (i=0;i<=st->frame_size;i++)
540       {
541          spx_word32_t r, e;
542          /* Compute frequency-domain adaptation mask */
543          r = MULT16_32_Q15(leak_estimate,SHL32(st->Yf[i],3));
544          e = SHL32(st->Rf[i],3)+1;
545 #ifdef FIXED_POINT
546          if (r>SHR32(e,1))
547             r = SHR32(e,1);
548 #else
549          if (r>.5*e)
550             r = .5*e;
551 #endif
552          r = MULT16_32_Q15(QCONST16(.8,15),r) + MULT16_32_Q15(QCONST16(.2,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
553          /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
554          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(MULT16_32_Q15(M_1,r),FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
555       }
556    } else {
557       spx_word32_t Sxx;
558       spx_word16_t adapt_rate=0;
559 
560       Sxx = inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size);
561       /* Temporary adaption rate if filter is not adapted correctly */
562 
563       tmp32 = MULT16_32_Q15(QCONST16(.15f, 15), Sxx);
564 #ifdef FIXED_POINT
565       if (Sxx > SHR32(See,2))
566          Sxx = SHR32(See,2);
567 #else
568       if (Sxx > .25*See)
569          Sxx = .25*See;
570 #endif
571       adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(MULT16_32_Q15(M_1,Sxx), See),15));
572 
573       for (i=0;i<=st->frame_size;i++)
574          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
575 
576 
577       /* How much have we adapted so far? */
578       st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
579    }
580    /* Compute weight gradient */
581    for (j=0;j<M;j++)
582    {
583       weighted_spectral_mul_conj(st->power_1, &st->X[j*N], st->E, st->PHI+N*j, N);
584    }
585 
586    /* Gradient descent */
587    for (i=0;i<M*N;i++)
588    {
589       st->W[i] += st->PHI[i];
590       /* Old value of W in PHI */
591       st->PHI[i] = st->W[i] - st->PHI[i];
592    }
593 
594    /* Update weight to prevent circular convolution (MDF / AUMDF) */
595    for (j=0;j<M;j++)
596    {
597       /* This is a variant of the Alternatively Updated MDF (AUMDF) */
598       /* Remove the "if" to make this an MDF filter */
599       if (j==M-1 || st->cancel_count%(M-1) == j)
600       {
601 #ifdef _WIN32
602          spx_word16_t * w = (spx_word16_t *)_alloca(N * sizeof(spx_word16_t));
603 #else
604          spx_word16_t w[N];
605 #endif
606 #ifdef FIXED_POINT
607          spx_word16_t w2[N];
608          for (i=0;i<N;i++)
609             w2[i] = PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16);
610          spx_ifft(st->fft_table, w2, w);
611          for (i=0;i<st->frame_size;i++)
612          {
613             w[i]=0;
614          }
615          for (i=st->frame_size;i<N;i++)
616          {
617             w[i]=SHL(w[i],NORMALIZE_SCALEUP);
618          }
619          spx_fft(st->fft_table, w, w2);
620          /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
621          for (i=0;i<N;i++)
622             st->W[j*N+i] -= SHL32(w2[i],16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
623 #else
624          spx_ifft(st->fft_table, &st->W[j*N], w);
625          for (i=st->frame_size;i<N;i++)
626          {
627             w[i]=0;
628          }
629          spx_fft(st->fft_table, w, &st->W[j*N]);
630 #endif
631       }
632    }
633 
634    /* Compute spectrum of estimated echo for use in an echo post-filter (if necessary)*/
635    if (Yout)
636    {
637       spx_word16_t leak2;
638       if (st->adapted)
639       {
640          /* If the filter is adapted, take the filtered echo */
641          for (i=0;i<st->frame_size;i++)
642             st->last_y[i] = st->last_y[st->frame_size+i];
643          for (i=0;i<st->frame_size;i++)
644             st->last_y[st->frame_size+i] = ref[i]-out[i];
645       } else {
646          /* If filter isn't adapted yet, all we can do is take the echo signal directly */
647          for (i=0;i<N;i++)
648             st->last_y[i] = st->x[i];
649       }
650 
651       /* Apply hanning window (should pre-compute it)*/
652       for (i=0;i<N;i++)
653          st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
654 
655       /* Compute power spectrum of the echo */
656       spx_fft(st->fft_table, st->y, st->Y);
657       power_spectrum(st->Y, st->Yps, N);
658 
659 #ifdef FIXED_POINT
660       if (leak_estimate > 16383)
661          leak2 = 32767;
662       else
663          leak2 = SHL16(leak_estimate, 1);
664 #else
665       if (leak_estimate>.5)
666          leak2 = 1;
667       else
668          leak2 = 2*leak_estimate;
669 #endif
670       /* Estimate residual echo */
671       for (i=0;i<=st->frame_size;i++)
672          Yout[i] = MULT16_32_Q15(leak2,st->Yps[i]);
673    }
674 }
675 
676