1 /* Copyright (c) 2007-2008 CSIRO
2    Copyright (c) 2007-2009 Xiph.Org Foundation
3    Copyright (c) 2008-2009 Gregory Maxwell
4    Written by Jean-Marc Valin and Gregory Maxwell */
5 /*
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions
8    are met:
9 
10    - Redistributions of source code must retain the above copyright
11    notice, this list of conditions and the following disclaimer.
12 
13    - Redistributions in binary form must reproduce the above copyright
14    notice, this list of conditions and the following disclaimer in the
15    documentation and/or other materials provided with the distribution.
16 
17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
21    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include <math.h>
35 #include "bands.h"
36 #include "modes.h"
37 #include "vq.h"
38 #include "cwrs.h"
39 #include "stack_alloc.h"
40 #include "os_support.h"
41 #include "mathops.h"
42 #include "rate.h"
43 
lcg_rand(celt_uint32 seed)44 celt_uint32 lcg_rand(celt_uint32 seed)
45 {
46    return 1664525 * seed + 1013904223;
47 }
48 
49 /* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
50    with this approximation is important because it has an impact on the bit allocation */
bitexact_cos(celt_int16 x)51 static celt_int16 bitexact_cos(celt_int16 x)
52 {
53    celt_int32 tmp;
54    celt_int16 x2;
55    tmp = (4096+((celt_int32)(x)*(x)))>>13;
56    if (tmp > 32767)
57       tmp = 32767;
58    x2 = tmp;
59    x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
60    if (x2 > 32766)
61       x2 = 32766;
62    return 1+x2;
63 }
64 
bitexact_log2tan(int isin,int icos)65 static int bitexact_log2tan(int isin,int icos)
66 {
67    int lc;
68    int ls;
69    lc=EC_ILOG(icos);
70    ls=EC_ILOG(isin);
71    icos<<=15-lc;
72    isin<<=15-ls;
73    return (ls-lc<<11)
74          +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
75          -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
76 }
77 
78 #ifdef FIXED_POINT
79 /* Compute the amplitude (sqrt energy) in each of the bands */
compute_band_energies(const CELTMode * m,const celt_sig * X,celt_ener * bank,int end,int _C,int M)80 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bank, int end, int _C, int M)
81 {
82    int i, c, N;
83    const celt_int16 *eBands = m->eBands;
84    const int C = CHANNELS(_C);
85    N = M*m->shortMdctSize;
86    c=0; do {
87       for (i=0;i<end;i++)
88       {
89          int j;
90          celt_word32 maxval=0;
91          celt_word32 sum = 0;
92 
93          j=M*eBands[i]; do {
94             maxval = MAX32(maxval, X[j+c*N]);
95             maxval = MAX32(maxval, -X[j+c*N]);
96          } while (++j<M*eBands[i+1]);
97 
98          if (maxval > 0)
99          {
100             int shift = celt_ilog2(maxval)-10;
101             j=M*eBands[i]; do {
102                sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
103                                    EXTRACT16(VSHR32(X[j+c*N],shift)));
104             } while (++j<M*eBands[i+1]);
105             /* We're adding one here to make damn sure we never end up with a pitch vector that's
106                larger than unity norm */
107             bank[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
108          } else {
109             bank[i+c*m->nbEBands] = EPSILON;
110          }
111          /*printf ("%f ", bank[i+c*m->nbEBands]);*/
112       }
113    } while (++c<C);
114    /*printf ("\n");*/
115 }
116 
117 /* Normalise each band such that the energy is one. */
normalise_bands(const CELTMode * m,const celt_sig * restrict freq,celt_norm * restrict X,const celt_ener * bank,int end,int _C,int M)118 void normalise_bands(const CELTMode *m, const celt_sig * restrict freq, celt_norm * restrict X, const celt_ener *bank, int end, int _C, int M)
119 {
120    int i, c, N;
121    const celt_int16 *eBands = m->eBands;
122    const int C = CHANNELS(_C);
123    N = M*m->shortMdctSize;
124    c=0; do {
125       i=0; do {
126          celt_word16 g;
127          int j,shift;
128          celt_word16 E;
129          shift = celt_zlog2(bank[i+c*m->nbEBands])-13;
130          E = VSHR32(bank[i+c*m->nbEBands], shift);
131          g = EXTRACT16(celt_rcp(SHL32(E,3)));
132          j=M*eBands[i]; do {
133             X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
134          } while (++j<M*eBands[i+1]);
135       } while (++i<end);
136    } while (++c<C);
137 }
138 
139 #else /* FIXED_POINT */
140 /* Compute the amplitude (sqrt energy) in each of the bands */
compute_band_energies(const CELTMode * m,const celt_sig * X,celt_ener * bank,int end,int _C,int M)141 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bank, int end, int _C, int M)
142 {
143    int i, c, N;
144    const celt_int16 *eBands = m->eBands;
145    const int C = CHANNELS(_C);
146    N = M*m->shortMdctSize;
147    c=0; do {
148       for (i=0;i<end;i++)
149       {
150          int j;
151          celt_word32 sum = 1e-27f;
152          for (j=M*eBands[i];j<M*eBands[i+1];j++)
153             sum += X[j+c*N]*X[j+c*N];
154          bank[i+c*m->nbEBands] = celt_sqrt(sum);
155          /*printf ("%f ", bank[i+c*m->nbEBands]);*/
156       }
157    } while (++c<C);
158    /*printf ("\n");*/
159 }
160 
161 /* Normalise each band such that the energy is one. */
normalise_bands(const CELTMode * m,const celt_sig * restrict freq,celt_norm * restrict X,const celt_ener * bank,int end,int _C,int M)162 void normalise_bands(const CELTMode *m, const celt_sig * restrict freq, celt_norm * restrict X, const celt_ener *bank, int end, int _C, int M)
163 {
164    int i, c, N;
165    const celt_int16 *eBands = m->eBands;
166    const int C = CHANNELS(_C);
167    N = M*m->shortMdctSize;
168    c=0; do {
169       for (i=0;i<end;i++)
170       {
171          int j;
172          celt_word16 g = 1.f/(1e-27f+bank[i+c*m->nbEBands]);
173          for (j=M*eBands[i];j<M*eBands[i+1];j++)
174             X[j+c*N] = freq[j+c*N]*g;
175       }
176    } while (++c<C);
177 }
178 
179 #endif /* FIXED_POINT */
180 
181 /* De-normalise the energy to produce the synthesis from the unit-energy bands */
denormalise_bands(const CELTMode * m,const celt_norm * restrict X,celt_sig * restrict freq,const celt_ener * bank,int end,int _C,int M)182 void denormalise_bands(const CELTMode *m, const celt_norm * restrict X, celt_sig * restrict freq, const celt_ener *bank, int end, int _C, int M)
183 {
184    int i, c, N;
185    const celt_int16 *eBands = m->eBands;
186    const int C = CHANNELS(_C);
187    N = M*m->shortMdctSize;
188    celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels");
189    c=0; do {
190       celt_sig * restrict f;
191       const celt_norm * restrict x;
192       f = freq+c*N;
193       x = X+c*N;
194       for (i=0;i<end;i++)
195       {
196          int j, band_end;
197          celt_word32 g = SHR32(bank[i+c*m->nbEBands],1);
198          j=M*eBands[i];
199          band_end = M*eBands[i+1];
200          do {
201             *f++ = SHL32(MULT16_32_Q15(*x, g),2);
202             x++;
203          } while (++j<band_end);
204       }
205       for (i=M*eBands[end];i<N;i++)
206          *f++ = 0;
207    } while (++c<C);
208 }
209 
210 /* This prevents energy collapse for transients with multiple short MDCTs */
anti_collapse(const CELTMode * m,celt_norm * _X,unsigned char * collapse_masks,int LM,int C,int CC,int size,int start,int end,celt_word16 * logE,celt_word16 * prev1logE,celt_word16 * prev2logE,int * pulses,celt_uint32 seed)211 void anti_collapse(const CELTMode *m, celt_norm *_X, unsigned char *collapse_masks, int LM, int C, int CC, int size,
212       int start, int end, celt_word16 *logE, celt_word16 *prev1logE,
213       celt_word16 *prev2logE, int *pulses, celt_uint32 seed)
214 {
215    int c, i, j, k;
216    for (i=start;i<end;i++)
217    {
218       int N0;
219       celt_word16 thresh, sqrt_1;
220       int depth;
221 #ifdef FIXED_POINT
222       int shift;
223 #endif
224 
225       N0 = m->eBands[i+1]-m->eBands[i];
226       /* depth in 1/8 bits */
227       depth = (1+pulses[i])/(m->eBands[i+1]-m->eBands[i]<<LM);
228 
229 #ifdef FIXED_POINT
230       thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1) ));
231       {
232          celt_word32 t;
233          t = N0<<LM;
234          shift = celt_ilog2(t)>>1;
235          t = SHL32(t, (7-shift)<<1);
236          sqrt_1 = celt_rsqrt_norm(t);
237       }
238 #else
239       thresh = .5f*celt_exp2(-.125f*depth);
240       sqrt_1 = celt_rsqrt(N0<<LM);
241 #endif
242 
243       c=0; do
244       {
245          celt_norm *X;
246          celt_word16 prev1;
247          celt_word16 prev2;
248          celt_word16 Ediff;
249          celt_word16 r;
250          int renormalize=0;
251          prev1 = prev1logE[c*m->nbEBands+i];
252          prev2 = prev2logE[c*m->nbEBands+i];
253          if (C<CC)
254          {
255             prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]);
256             prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]);
257          }
258          Ediff = logE[c*m->nbEBands+i]-MIN16(prev1,prev2);
259          Ediff = MAX16(0, Ediff);
260 
261 #ifdef FIXED_POINT
262          if (Ediff < 16384)
263             r = 2*MIN16(16383,SHR32(celt_exp2(-Ediff),1));
264          else
265             r = 0;
266          if (LM==3)
267             r = MULT16_16_Q14(23170, MIN32(23169, r));
268          r = SHR16(MIN16(thresh, r),1);
269          r = SHR32(MULT16_16_Q15(sqrt_1, r),shift);
270 #else
271          /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
272             short blocks don't have the same energy as long */
273          r = 2.f*celt_exp2(-Ediff);
274          if (LM==3)
275             r *= 1.41421356f;
276          r = MIN16(thresh, r);
277          r = r*sqrt_1;
278 #endif
279          X = _X+c*size+(m->eBands[i]<<LM);
280          for (k=0;k<1<<LM;k++)
281          {
282             /* Detect collapse */
283             if (!(collapse_masks[i*C+c]&1<<k))
284             {
285                /* Fill with noise */
286                for (j=0;j<N0;j++)
287                {
288                   seed = lcg_rand(seed);
289                   X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
290                }
291                renormalize = 1;
292             }
293          }
294          /* We just added some energy, so we need to renormalise */
295          if (renormalize)
296             renormalise_vector(X, N0<<LM, Q15ONE);
297       } while (++c<C);
298    }
299 
300 }
301 
302 
intensity_stereo(const CELTMode * m,celt_norm * X,celt_norm * Y,const celt_ener * bank,int bandID,int N)303 static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bank, int bandID, int N)
304 {
305    int i = bandID;
306    int j;
307    celt_word16 a1, a2;
308    celt_word16 left, right;
309    celt_word16 norm;
310 #ifdef FIXED_POINT
311    int shift = celt_zlog2(MAX32(bank[i], bank[i+m->nbEBands]))-13;
312 #endif
313    left = VSHR32(bank[i],shift);
314    right = VSHR32(bank[i+m->nbEBands],shift);
315    norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
316    a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
317    a2 = DIV32_16(SHL32(EXTEND32(right),14),norm);
318    for (j=0;j<N;j++)
319    {
320       celt_norm r, l;
321       l = X[j];
322       r = Y[j];
323       X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r);
324       /* Side is not encoded, no need to calculate */
325    }
326 }
327 
stereo_split(celt_norm * X,celt_norm * Y,int N)328 static void stereo_split(celt_norm *X, celt_norm *Y, int N)
329 {
330    int j;
331    for (j=0;j<N;j++)
332    {
333       celt_norm r, l;
334       l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]);
335       r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]);
336       X[j] = l+r;
337       Y[j] = r-l;
338    }
339 }
340 
stereo_merge(celt_norm * X,celt_norm * Y,celt_word16 mid,int N)341 static void stereo_merge(celt_norm *X, celt_norm *Y, celt_word16 mid, int N)
342 {
343    int j;
344    celt_word32 xp=0, side=0;
345    celt_word32 El, Er;
346    celt_word16 mid2;
347 #ifdef FIXED_POINT
348    int kl, kr;
349 #endif
350    celt_word32 t, lgain, rgain;
351 
352    /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
353    for (j=0;j<N;j++)
354    {
355       xp = MAC16_16(xp, X[j], Y[j]);
356       side = MAC16_16(side, Y[j], Y[j]);
357    }
358    /* Compensating for the mid normalization */
359    xp = MULT16_32_Q15(mid, xp);
360    /* mid and side are in Q15, not Q14 like X and Y */
361    mid2 = SHR32(mid, 1);
362    El = MULT16_16(mid2, mid2) + side - 2*xp;
363    Er = MULT16_16(mid2, mid2) + side + 2*xp;
364    if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
365    {
366       for (j=0;j<N;j++)
367          Y[j] = X[j];
368       return;
369    }
370 
371 #ifdef FIXED_POINT
372    kl = celt_ilog2(El)>>1;
373    kr = celt_ilog2(Er)>>1;
374 #endif
375    t = VSHR32(El, (kl-7)<<1);
376    lgain = celt_rsqrt_norm(t);
377    t = VSHR32(Er, (kr-7)<<1);
378    rgain = celt_rsqrt_norm(t);
379 
380 #ifdef FIXED_POINT
381    if (kl < 7)
382       kl = 7;
383    if (kr < 7)
384       kr = 7;
385 #endif
386 
387    for (j=0;j<N;j++)
388    {
389       celt_norm r, l;
390       /* Apply mid scaling (side is already scaled) */
391       l = MULT16_16_Q15(mid, X[j]);
392       r = Y[j];
393       X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
394       Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
395    }
396 }
397 
398 /* Decide whether we should spread the pulses in the current frame */
spreading_decision(const CELTMode * m,celt_norm * X,int * average,int last_decision,int * hf_average,int * tapset_decision,int update_hf,int end,int _C,int M)399 int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
400       int last_decision, int *hf_average, int *tapset_decision, int update_hf,
401       int end, int _C, int M)
402 {
403    int i, c, N0;
404    int sum = 0, nbBands=0;
405    const int C = CHANNELS(_C);
406    const celt_int16 * restrict eBands = m->eBands;
407    int decision;
408    int hf_sum=0;
409 
410    N0 = M*m->shortMdctSize;
411 
412    if (M*(eBands[end]-eBands[end-1]) <= 8)
413       return SPREAD_NONE;
414    c=0; do {
415       for (i=0;i<end;i++)
416       {
417          int j, N, tmp=0;
418          int tcount[3] = {0};
419          celt_norm * restrict x = X+M*eBands[i]+c*N0;
420          N = M*(eBands[i+1]-eBands[i]);
421          if (N<=8)
422             continue;
423          /* Compute rough CDF of |x[j]| */
424          for (j=0;j<N;j++)
425          {
426             celt_word32 x2N; /* Q13 */
427 
428             x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N);
429             if (x2N < QCONST16(0.25f,13))
430                tcount[0]++;
431             if (x2N < QCONST16(0.0625f,13))
432                tcount[1]++;
433             if (x2N < QCONST16(0.015625f,13))
434                tcount[2]++;
435          }
436 
437          /* Only include four last bands (8 kHz and up) */
438          if (i>m->nbEBands-4)
439             hf_sum += 32*(tcount[1]+tcount[0])/N;
440          tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
441          sum += tmp*256;
442          nbBands++;
443       }
444    } while (++c<C);
445 
446    if (update_hf)
447    {
448       if (hf_sum)
449          hf_sum /= C*(4-m->nbEBands+end);
450       *hf_average = (*hf_average+hf_sum)>>1;
451       hf_sum = *hf_average;
452       if (*tapset_decision==2)
453          hf_sum += 4;
454       else if (*tapset_decision==0)
455          hf_sum -= 4;
456       if (hf_sum > 22)
457          *tapset_decision=2;
458       else if (hf_sum > 18)
459          *tapset_decision=1;
460       else
461          *tapset_decision=0;
462    }
463    /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
464    sum /= nbBands;
465    /* Recursive averaging */
466    sum = (sum+*average)>>1;
467    *average = sum;
468    /* Hysteresis */
469    sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
470    if (sum < 80)
471    {
472       decision = SPREAD_AGGRESSIVE;
473    } else if (sum < 256)
474    {
475       decision = SPREAD_NORMAL;
476    } else if (sum < 384)
477    {
478       decision = SPREAD_LIGHT;
479    } else {
480       decision = SPREAD_NONE;
481    }
482    return decision;
483 }
484 
485 #ifdef MEASURE_NORM_MSE
486 
487 float MSE[30] = {0};
488 int nbMSEBands = 0;
489 int MSECount[30] = {0};
490 
dump_norm_mse(void)491 void dump_norm_mse(void)
492 {
493    int i;
494    for (i=0;i<nbMSEBands;i++)
495    {
496       printf ("%g ", MSE[i]/MSECount[i]);
497    }
498    printf ("\n");
499 }
500 
measure_norm_mse(const CELTMode * m,float * X,float * X0,float * bandE,float * bandE0,int M,int N,int C)501 void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C)
502 {
503    static int init = 0;
504    int i;
505    if (!init)
506    {
507       atexit(dump_norm_mse);
508       init = 1;
509    }
510    for (i=0;i<m->nbEBands;i++)
511    {
512       int j;
513       int c;
514       float g;
515       if (bandE0[i]<10 || (C==2 && bandE0[i+m->nbEBands]<1))
516          continue;
517       c=0; do {
518          g = bandE[i+c*m->nbEBands]/(1e-15+bandE0[i+c*m->nbEBands]);
519          for (j=M*m->eBands[i];j<M*m->eBands[i+1];j++)
520             MSE[i] += (g*X[j+c*N]-X0[j+c*N])*(g*X[j+c*N]-X0[j+c*N]);
521       } while (++c<C);
522       MSECount[i]+=C;
523    }
524    nbMSEBands = m->nbEBands;
525 }
526 
527 #endif
528 
529 /* Indexing table for converting from natural Hadamard to ordery Hadamard
530    This is essentially a bit-reversed Gray, on top of which we've added
531    an inversion of the order because we want the DC at the end rather than
532    the beginning. The lines are for N=2, 4, 8, 16 */
533 static const int ordery_table[] = {
534        1,  0,
535        3,  0,  2,  1,
536        7,  0,  4,  3,  6,  1,  5,  2,
537       15,  0,  8,  7, 12,  3, 11,  4, 14,  1,  9,  6, 13,  2, 10,  5,
538 };
539 
deinterleave_hadamard(celt_norm * X,int N0,int stride,int hadamard)540 static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
541 {
542    int i,j;
543    VARDECL(celt_norm, tmp);
544    int N;
545    SAVE_STACK;
546    N = N0*stride;
547    ALLOC(tmp, N, celt_norm);
548    if (hadamard)
549    {
550       const int *ordery = ordery_table+stride-2;
551       for (i=0;i<stride;i++)
552       {
553          for (j=0;j<N0;j++)
554             tmp[ordery[i]*N0+j] = X[j*stride+i];
555       }
556    } else {
557       for (i=0;i<stride;i++)
558          for (j=0;j<N0;j++)
559             tmp[i*N0+j] = X[j*stride+i];
560    }
561    for (j=0;j<N;j++)
562       X[j] = tmp[j];
563    RESTORE_STACK;
564 }
565 
interleave_hadamard(celt_norm * X,int N0,int stride,int hadamard)566 static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
567 {
568    int i,j;
569    VARDECL(celt_norm, tmp);
570    int N;
571    SAVE_STACK;
572    N = N0*stride;
573    ALLOC(tmp, N, celt_norm);
574    if (hadamard)
575    {
576       const int *ordery = ordery_table+stride-2;
577       for (i=0;i<stride;i++)
578          for (j=0;j<N0;j++)
579             tmp[j*stride+i] = X[ordery[i]*N0+j];
580    } else {
581       for (i=0;i<stride;i++)
582          for (j=0;j<N0;j++)
583             tmp[j*stride+i] = X[i*N0+j];
584    }
585    for (j=0;j<N;j++)
586       X[j] = tmp[j];
587    RESTORE_STACK;
588 }
589 
haar1(celt_norm * X,int N0,int stride)590 void haar1(celt_norm *X, int N0, int stride)
591 {
592    int i, j;
593    N0 >>= 1;
594    for (i=0;i<stride;i++)
595       for (j=0;j<N0;j++)
596       {
597          celt_norm tmp1, tmp2;
598          tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]);
599          tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
600          X[stride*2*j+i] = tmp1 + tmp2;
601          X[stride*(2*j+1)+i] = tmp1 - tmp2;
602       }
603 }
604 
compute_qn(int N,int b,int offset,int pulse_cap,int stereo)605 static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
606 {
607    static const celt_int16 exp2_table8[8] =
608       {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
609    int qn, qb;
610    int N2 = 2*N-1;
611    if (stereo && N==2)
612       N2--;
613    /* The upper limit ensures that in a stereo split with itheta==16384, we'll
614        always have enough bits left over to code at least one pulse in the
615        side; otherwise it would collapse, since it doesn't get folded. */
616    qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2);
617 
618    qb = IMIN(8<<BITRES, qb);
619 
620    if (qb<(1<<BITRES>>1)) {
621       qn = 1;
622    } else {
623       qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
624       qn = (qn+1)>>1<<1;
625    }
626    celt_assert(qn <= 256);
627    return qn;
628 }
629 
630 /* This function is responsible for encoding and decoding a band for both
631    the mono and stereo case. Even in the mono case, it can split the band
632    in two and transmit the energy difference with the two half-bands. It
633    can be called recursively so bands can end up being split in 8 parts. */
quant_band(int encode,const CELTMode * m,int i,celt_norm * X,celt_norm * Y,int N,int b,int spread,int B,int intensity,int tf_change,celt_norm * lowband,int resynth,ec_ctx * ec,celt_int32 * remaining_bits,int LM,celt_norm * lowband_out,const celt_ener * bandE,int level,celt_uint32 * seed,celt_word16 gain,celt_norm * lowband_scratch,int fill)634 static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, celt_norm *Y,
635       int N, int b, int spread, int B, int intensity, int tf_change, celt_norm *lowband, int resynth, ec_ctx *ec,
636       celt_int32 *remaining_bits, int LM, celt_norm *lowband_out, const celt_ener *bandE, int level,
637       celt_uint32 *seed, celt_word16 gain, celt_norm *lowband_scratch, int fill)
638 {
639    const unsigned char *cache;
640    int q;
641    int curr_bits;
642    int stereo, split;
643    int imid=0, iside=0;
644    int N0=N;
645    int N_B=N;
646    int N_B0;
647    int B0=B;
648    int time_divide=0;
649    int recombine=0;
650    int inv = 0;
651    celt_word16 mid=0, side=0;
652    int longBlocks;
653    unsigned cm=0;
654 
655    longBlocks = B0==1;
656 
657    N_B /= B;
658    N_B0 = N_B;
659 
660    split = stereo = Y != NULL;
661 
662    /* Special case for one sample */
663    if (N==1)
664    {
665       int c;
666       celt_norm *x = X;
667       c=0; do {
668          int sign=0;
669          if (*remaining_bits>=1<<BITRES)
670          {
671             if (encode)
672             {
673                sign = x[0]<0;
674                ec_enc_bits(ec, sign, 1);
675             } else {
676                sign = ec_dec_bits(ec, 1);
677             }
678             *remaining_bits -= 1<<BITRES;
679             b-=1<<BITRES;
680          }
681          if (resynth)
682             x[0] = sign ? -NORM_SCALING : NORM_SCALING;
683          x = Y;
684       } while (++c<1+stereo);
685       if (lowband_out)
686          lowband_out[0] = SHR16(X[0],4);
687       return 1;
688    }
689 
690    if (!stereo && level == 0)
691    {
692       int k;
693       if (tf_change>0)
694          recombine = tf_change;
695       /* Band recombining to increase frequency resolution */
696 
697       if (lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
698       {
699          int j;
700          for (j=0;j<N;j++)
701             lowband_scratch[j] = lowband[j];
702          lowband = lowband_scratch;
703       }
704 
705       for (k=0;k<recombine;k++)
706       {
707          static const unsigned char bit_interleave_table[16]={
708            0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
709          };
710          if (encode)
711             haar1(X, N>>k, 1<<k);
712          if (lowband)
713             haar1(lowband, N>>k, 1<<k);
714          fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
715       }
716       B>>=recombine;
717       N_B<<=recombine;
718 
719       /* Increasing the time resolution */
720       while ((N_B&1) == 0 && tf_change<0)
721       {
722          if (encode)
723             haar1(X, N_B, B);
724          if (lowband)
725             haar1(lowband, N_B, B);
726          fill |= fill<<B;
727          B <<= 1;
728          N_B >>= 1;
729          time_divide++;
730          tf_change++;
731       }
732       B0=B;
733       N_B0 = N_B;
734 
735       /* Reorganize the samples in time order instead of frequency order */
736       if (B0>1)
737       {
738          if (encode)
739             deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
740          if (lowband)
741             deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
742       }
743    }
744 
745    /* If we need 1.5 more bit than we can produce, split the band in two. */
746    cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
747    if (!stereo && LM != -1 && b > cache[cache[0]]+12 && N>2)
748    {
749       if (LM>0 || (N&1)==0)
750       {
751          N >>= 1;
752          Y = X+N;
753          split = 1;
754          LM -= 1;
755          if (B==1)
756             fill = fill&1|fill<<1;
757          B = (B+1)>>1;
758       }
759    }
760 
761    if (split)
762    {
763       int qn;
764       int itheta=0;
765       int mbits, sbits, delta;
766       int qalloc;
767       int pulse_cap;
768       int offset;
769       int orig_fill;
770       celt_int32 tell;
771 
772       /* Decide on the resolution to give to the split parameter theta */
773       pulse_cap = m->logN[i]+(LM<<BITRES);
774       offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
775       qn = compute_qn(N, b, offset, pulse_cap, stereo);
776       if (stereo && i>=intensity)
777          qn = 1;
778       if (encode)
779       {
780          /* theta is the atan() of the ratio between the (normalized)
781             side and mid. With just that parameter, we can re-scale both
782             mid and side because we know that 1) they have unit norm and
783             2) they are orthogonal. */
784          itheta = stereo_itheta(X, Y, stereo, N);
785       }
786       tell = ec_tell_frac(ec);
787       if (qn!=1)
788       {
789          if (encode)
790             itheta = (itheta*qn+8192)>>14;
791 
792          /* Entropy coding of the angle. We use a uniform pdf for the
793             time split, a step for stereo, and a triangular one for the rest. */
794          if (stereo && N>2)
795          {
796             int p0 = 3;
797             int x = itheta;
798             int x0 = qn/2;
799             int ft = p0*(x0+1) + x0;
800             /* Use a probability of p0 up to itheta=8192 and then use 1 after */
801             if (encode)
802             {
803                ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
804             } else {
805                int fs;
806                fs=ec_decode(ec,ft);
807                if (fs<(x0+1)*p0)
808                   x=fs/p0;
809                else
810                   x=x0+1+(fs-(x0+1)*p0);
811                ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
812                itheta = x;
813             }
814          } else if (B0>1 || stereo) {
815             /* Uniform pdf */
816             if (encode)
817                ec_enc_uint(ec, itheta, qn+1);
818             else
819                itheta = ec_dec_uint(ec, qn+1);
820          } else {
821             int fs=1, ft;
822             ft = ((qn>>1)+1)*((qn>>1)+1);
823             if (encode)
824             {
825                int fl;
826 
827                fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
828                fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
829                 ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
830 
831                ec_encode(ec, fl, fl+fs, ft);
832             } else {
833                /* Triangular pdf */
834                int fl=0;
835                int fm;
836                fm = ec_decode(ec, ft);
837 
838                if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
839                {
840                   itheta = (isqrt32(8*(celt_uint32)fm + 1) - 1)>>1;
841                   fs = itheta + 1;
842                   fl = itheta*(itheta + 1)>>1;
843                }
844                else
845                {
846                   itheta = (2*(qn + 1)
847                    - isqrt32(8*(celt_uint32)(ft - fm - 1) + 1))>>1;
848                   fs = qn + 1 - itheta;
849                   fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
850                }
851 
852                ec_dec_update(ec, fl, fl+fs, ft);
853             }
854          }
855          itheta = (celt_int32)itheta*16384/qn;
856          if (encode && stereo)
857          {
858             if (itheta==0)
859                intensity_stereo(m, X, Y, bandE, i, N);
860             else
861                stereo_split(X, Y, N);
862          }
863          /* TODO: Renormalising X and Y *may* help fixed-point a bit at very high rate.
864                   Let's do that at higher complexity */
865       } else if (stereo) {
866          if (encode)
867          {
868             inv = itheta > 8192;
869             if (inv)
870             {
871                int j;
872                for (j=0;j<N;j++)
873                   Y[j] = -Y[j];
874             }
875             intensity_stereo(m, X, Y, bandE, i, N);
876          }
877          if (b>2<<BITRES && *remaining_bits > 2<<BITRES)
878          {
879             if (encode)
880                ec_enc_bit_logp(ec, inv, 2);
881             else
882                inv = ec_dec_bit_logp(ec, 2);
883          } else
884             inv = 0;
885          itheta = 0;
886       }
887       qalloc = ec_tell_frac(ec) - tell;
888       b -= qalloc;
889 
890       orig_fill = fill;
891       if (itheta == 0)
892       {
893          imid = 32767;
894          iside = 0;
895          fill &= (1<<B)-1;
896          delta = -16384;
897       } else if (itheta == 16384)
898       {
899          imid = 0;
900          iside = 32767;
901          fill &= (1<<B)-1<<B;
902          delta = 16384;
903       } else {
904          imid = bitexact_cos(itheta);
905          iside = bitexact_cos(16384-itheta);
906          /* This is the mid vs side allocation that minimizes squared error
907             in that band. */
908          delta = FRAC_MUL16(N-1<<7,bitexact_log2tan(iside,imid));
909       }
910 
911 #ifdef FIXED_POINT
912       mid = imid;
913       side = iside;
914 #else
915       mid = (1.f/32768)*imid;
916       side = (1.f/32768)*iside;
917 #endif
918 
919       /* This is a special case for N=2 that only works for stereo and takes
920          advantage of the fact that mid and side are orthogonal to encode
921          the side with just one bit. */
922       if (N==2 && stereo)
923       {
924          int c;
925          int sign=0;
926          celt_norm *x2, *y2;
927          mbits = b;
928          sbits = 0;
929          /* Only need one bit for the side */
930          if (itheta != 0 && itheta != 16384)
931             sbits = 1<<BITRES;
932          mbits -= sbits;
933          c = itheta > 8192;
934          *remaining_bits -= qalloc+sbits;
935 
936          x2 = c ? Y : X;
937          y2 = c ? X : Y;
938          if (sbits)
939          {
940             if (encode)
941             {
942                /* Here we only need to encode a sign for the side */
943                sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
944                ec_enc_bits(ec, sign, 1);
945             } else {
946                sign = ec_dec_bits(ec, 1);
947             }
948          }
949          sign = 1-2*sign;
950          /* We use orig_fill here because we want to fold the side, but if
951              itheta==16384, we'll have cleared the low bits of fill. */
952          cm = quant_band(encode, m, i, x2, NULL, N, mbits, spread, B, intensity, tf_change, lowband, resynth, ec, remaining_bits, LM, lowband_out, NULL, level, seed, gain, lowband_scratch, orig_fill);
953          /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
954              and there's no need to worry about mixing with the other channel. */
955          y2[0] = -sign*x2[1];
956          y2[1] = sign*x2[0];
957          if (resynth)
958          {
959             celt_norm tmp;
960             X[0] = MULT16_16_Q15(mid, X[0]);
961             X[1] = MULT16_16_Q15(mid, X[1]);
962             Y[0] = MULT16_16_Q15(side, Y[0]);
963             Y[1] = MULT16_16_Q15(side, Y[1]);
964             tmp = X[0];
965             X[0] = SUB16(tmp,Y[0]);
966             Y[0] = ADD16(tmp,Y[0]);
967             tmp = X[1];
968             X[1] = SUB16(tmp,Y[1]);
969             Y[1] = ADD16(tmp,Y[1]);
970          }
971       } else {
972          /* "Normal" split code */
973          celt_norm *next_lowband2=NULL;
974          celt_norm *next_lowband_out1=NULL;
975          int next_level=0;
976          celt_int32 rebalance;
977 
978          /* Give more bits to low-energy MDCTs than they would otherwise deserve */
979          if (B0>1 && !stereo && (itheta&0x3fff))
980          {
981             if (itheta > 8192)
982                /* Rough approximation for pre-echo masking */
983                delta -= delta>>(4-LM);
984             else
985                /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
986                delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
987          }
988          mbits = IMAX(0, IMIN(b, (b-delta)/2));
989          sbits = b-mbits;
990          *remaining_bits -= qalloc;
991 
992          if (lowband && !stereo)
993             next_lowband2 = lowband+N; /* >32-bit split case */
994 
995          /* Only stereo needs to pass on lowband_out. Otherwise, it's
996             handled at the end */
997          if (stereo)
998             next_lowband_out1 = lowband_out;
999          else
1000             next_level = level+1;
1001 
1002          rebalance = *remaining_bits;
1003          if (mbits >= sbits)
1004          {
1005             /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1006                mid for folding later */
1007             cm = quant_band(encode, m, i, X, NULL, N, mbits, spread, B, intensity, tf_change,
1008                   lowband, resynth, ec, remaining_bits, LM, next_lowband_out1,
1009                   NULL, next_level, seed, stereo ? Q15ONE : MULT16_16_P15(gain,mid), lowband_scratch, fill);
1010             rebalance = mbits - (rebalance-*remaining_bits);
1011             if (rebalance > 3<<BITRES && itheta!=0)
1012                sbits += rebalance - (3<<BITRES);
1013 
1014             /* For a stereo split, the high bits of fill are always zero, so no
1015                folding will be done to the side. */
1016             cm |= quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, intensity, tf_change,
1017                   next_lowband2, resynth, ec, remaining_bits, LM, NULL,
1018                   NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill>>B)<<(B0>>1&stereo-1);
1019          } else {
1020             /* For a stereo split, the high bits of fill are always zero, so no
1021                folding will be done to the side. */
1022             cm = quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, intensity, tf_change,
1023                   next_lowband2, resynth, ec, remaining_bits, LM, NULL,
1024                   NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill>>B)<<(B0>>1&stereo-1);
1025             rebalance = sbits - (rebalance-*remaining_bits);
1026             if (rebalance > 3<<BITRES && itheta!=16384)
1027                mbits += rebalance - (3<<BITRES);
1028             /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1029                mid for folding later */
1030             cm |= quant_band(encode, m, i, X, NULL, N, mbits, spread, B, intensity, tf_change,
1031                   lowband, resynth, ec, remaining_bits, LM, next_lowband_out1,
1032                   NULL, next_level, seed, stereo ? Q15ONE : MULT16_16_P15(gain,mid), lowband_scratch, fill);
1033          }
1034       }
1035 
1036    } else {
1037       /* This is the basic no-split case */
1038       q = bits2pulses(m, i, LM, b);
1039       curr_bits = pulses2bits(m, i, LM, q);
1040       *remaining_bits -= curr_bits;
1041 
1042       /* Ensures we can never bust the budget */
1043       while (*remaining_bits < 0 && q > 0)
1044       {
1045          *remaining_bits += curr_bits;
1046          q--;
1047          curr_bits = pulses2bits(m, i, LM, q);
1048          *remaining_bits -= curr_bits;
1049       }
1050 
1051       if (q!=0)
1052       {
1053          int K = get_pulses(q);
1054 
1055          /* Finally do the actual quantization */
1056          if (encode)
1057             cm = alg_quant(X, N, K, spread, B, resynth, ec, gain);
1058          else
1059             cm = alg_unquant(X, N, K, spread, B, ec, gain);
1060       } else {
1061          /* If there's no pulse, fill the band anyway */
1062          int j;
1063          if (resynth)
1064          {
1065             unsigned cm_mask;
1066             /*B can be as large as 16, so this shift might overflow an int on a
1067                16-bit platform; use a long to get defined behavior.*/
1068             cm_mask = (unsigned)(1UL<<B)-1;
1069             fill &= cm_mask;
1070             if (!fill)
1071             {
1072                for (j=0;j<N;j++)
1073                   X[j] = 0;
1074             } else {
1075                if (lowband == NULL)
1076                {
1077                   /* Noise */
1078                   for (j=0;j<N;j++)
1079                   {
1080                      *seed = lcg_rand(*seed);
1081                      X[j] = (celt_int32)(*seed)>>20;
1082                   }
1083                   cm = cm_mask;
1084                } else {
1085                   /* Folded spectrum */
1086                   for (j=0;j<N;j++)
1087                   {
1088                      celt_word16 tmp;
1089                      *seed = lcg_rand(*seed);
1090                      /* About 48 dB below the "normal" folding level */
1091                      tmp = QCONST16(1.0f/256, 10);
1092                      tmp = (*seed)&0x8000 ? tmp : -tmp;
1093                      X[j] = lowband[j]+tmp;
1094                   }
1095                   cm = fill;
1096                }
1097                renormalise_vector(X, N, gain);
1098             }
1099          }
1100       }
1101    }
1102 
1103    /* This code is used by the decoder and by the resynthesis-enabled encoder */
1104    if (resynth)
1105    {
1106       if (stereo)
1107       {
1108          if (N!=2)
1109             stereo_merge(X, Y, mid, N);
1110          if (inv)
1111          {
1112             int j;
1113             for (j=0;j<N;j++)
1114                Y[j] = -Y[j];
1115          }
1116       } else if (level == 0)
1117       {
1118          int k;
1119 
1120          /* Undo the sample reorganization going from time order to frequency order */
1121          if (B0>1)
1122             interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1123 
1124          /* Undo time-freq changes that we did earlier */
1125          N_B = N_B0;
1126          B = B0;
1127          for (k=0;k<time_divide;k++)
1128          {
1129             B >>= 1;
1130             N_B <<= 1;
1131             cm |= cm>>B;
1132             haar1(X, N_B, B);
1133          }
1134 
1135          for (k=0;k<recombine;k++)
1136          {
1137             static const unsigned char bit_deinterleave_table[16]={
1138               0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
1139               0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
1140             };
1141             cm = bit_deinterleave_table[cm];
1142             haar1(X, N0>>k, 1<<k);
1143          }
1144          B<<=recombine;
1145 
1146          /* Scale output for later folding */
1147          if (lowband_out)
1148          {
1149             int j;
1150             celt_word16 n;
1151             n = celt_sqrt(SHL32(EXTEND32(N0),22));
1152             for (j=0;j<N0;j++)
1153                lowband_out[j] = MULT16_16_Q15(n,X[j]);
1154          }
1155          cm &= (1<<B)-1;
1156       }
1157    }
1158    return cm;
1159 }
1160 
quant_all_bands(int encode,const CELTMode * m,int start,int end,celt_norm * _X,celt_norm * _Y,unsigned char * collapse_masks,const celt_ener * bandE,int * pulses,int shortBlocks,int spread,int dual_stereo,int intensity,int * tf_res,int resynth,celt_int32 total_bits,celt_int32 balance,ec_ctx * ec,int LM,int codedBands,celt_uint32 * seed)1161 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1162       celt_norm *_X, celt_norm *_Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
1163       int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, int resynth,
1164       celt_int32 total_bits, celt_int32 balance, ec_ctx *ec, int LM, int codedBands, celt_uint32 *seed)
1165 {
1166    int i;
1167    celt_int32 remaining_bits;
1168    const celt_int16 * restrict eBands = m->eBands;
1169    celt_norm * restrict norm, * restrict norm2;
1170    VARDECL(celt_norm, _norm);
1171    VARDECL(celt_norm, lowband_scratch);
1172    int B;
1173    int M;
1174    int lowband_offset;
1175    int update_lowband = 1;
1176    int C = _Y != NULL ? 2 : 1;
1177    SAVE_STACK;
1178 
1179    M = 1<<LM;
1180    B = shortBlocks ? M : 1;
1181    ALLOC(_norm, C*M*eBands[m->nbEBands], celt_norm);
1182    ALLOC(lowband_scratch, M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]), celt_norm);
1183    norm = _norm;
1184    norm2 = norm + M*eBands[m->nbEBands];
1185 
1186    lowband_offset = 0;
1187    for (i=start;i<end;i++)
1188    {
1189       celt_int32 tell;
1190       int b;
1191       int N;
1192       celt_int32 curr_balance;
1193       int effective_lowband=-1;
1194       celt_norm * restrict X, * restrict Y;
1195       int tf_change=0;
1196       unsigned x_cm;
1197       unsigned y_cm;
1198 
1199       X = _X+M*eBands[i];
1200       if (_Y!=NULL)
1201          Y = _Y+M*eBands[i];
1202       else
1203          Y = NULL;
1204       N = M*eBands[i+1]-M*eBands[i];
1205       tell = ec_tell_frac(ec);
1206 
1207       /* Compute how many bits we want to allocate to this band */
1208       if (i != start)
1209          balance -= tell;
1210       remaining_bits = total_bits-tell-1;
1211       if (i <= codedBands-1)
1212       {
1213          curr_balance = balance / IMIN(3, codedBands-i);
1214          b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
1215       } else {
1216          b = 0;
1217       }
1218 
1219       if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
1220             lowband_offset = i;
1221 
1222       tf_change = tf_res[i];
1223       if (i>=m->effEBands)
1224       {
1225          X=norm;
1226          if (_Y!=NULL)
1227             Y = norm;
1228       }
1229 
1230       /* Get a conservative estimate of the collapse_mask's for the bands we're
1231           going to be folding from. */
1232       if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
1233       {
1234          int fold_start;
1235          int fold_end;
1236          int fold_i;
1237          /* This ensures we never repeat spectral content within one band */
1238          effective_lowband = IMAX(M*eBands[start], M*eBands[lowband_offset]-N);
1239          fold_start = lowband_offset;
1240          while(M*eBands[--fold_start] > effective_lowband);
1241          fold_end = lowband_offset-1;
1242          while(M*eBands[++fold_end] < effective_lowband+N);
1243          x_cm = y_cm = 0;
1244          fold_i = fold_start; do {
1245            x_cm |= collapse_masks[fold_i*C+0];
1246            y_cm |= collapse_masks[fold_i*C+C-1];
1247          } while (++fold_i<fold_end);
1248       }
1249       /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
1250           always) be non-zero.*/
1251       else
1252          x_cm = y_cm = (1<<B)-1;
1253 
1254       if (dual_stereo && i==intensity)
1255       {
1256          int j;
1257 
1258          /* Switch off dual stereo to do intensity */
1259          dual_stereo = 0;
1260          for (j=M*eBands[start];j<M*eBands[i];j++)
1261             norm[j] = HALF32(norm[j]+norm2[j]);
1262       }
1263       if (dual_stereo)
1264       {
1265          x_cm = quant_band(encode, m, i, X, NULL, N, b/2, spread, B, intensity, tf_change,
1266                effective_lowband != -1 ? norm+effective_lowband : NULL, resynth, ec, &remaining_bits, LM,
1267                norm+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, x_cm);
1268          y_cm = quant_band(encode, m, i, Y, NULL, N, b/2, spread, B, intensity, tf_change,
1269                effective_lowband != -1 ? norm2+effective_lowband : NULL, resynth, ec, &remaining_bits, LM,
1270                norm2+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, y_cm);
1271       } else {
1272          x_cm = quant_band(encode, m, i, X, Y, N, b, spread, B, intensity, tf_change,
1273                effective_lowband != -1 ? norm+effective_lowband : NULL, resynth, ec, &remaining_bits, LM,
1274                norm+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, x_cm|y_cm);
1275          y_cm = x_cm;
1276       }
1277       collapse_masks[i*C+0] = (unsigned char)x_cm;
1278       collapse_masks[i*C+C-1] = (unsigned char)y_cm;
1279       balance += pulses[i] + tell;
1280 
1281       /* Update the folding position only as long as we have 1 bit/sample depth */
1282       update_lowband = b>(N<<BITRES);
1283    }
1284    RESTORE_STACK;
1285 }
1286 
1287