1 /*
2  *	MP3 quantization
3  *
4  *	Copyright (c) 1999-2000 Mark Taylor
5  *	Copyright (c) 2000-2012 Robert Hegemann
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Library General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
15  * Library General Public License for more details.
16  *
17  * You should have received a copy of the GNU Library General Public
18  * License along with this library; if not, write to the
19  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20  * Boston, MA 02111-1307, USA.
21  */
22 
23 /* $Id: vbrquantize.c,v 1.141.2.1 2012/02/07 13:40:37 robert Exp $ */
24 
25 #ifdef HAVE_CONFIG_H
26 #  include <config.h>
27 #endif
28 
29 
30 #include "lame.h"
31 #include "lame-machine.h"
32 #include "encoder.h"
33 #include "util.h"
34 #include "vbrquantize.h"
35 #include "quantize_pvt.h"
36 
37 
38 
39 
40 struct algo_s;
41 typedef struct algo_s algo_t;
42 
43 typedef void (*alloc_sf_f) (const algo_t *, const int *, const int *, int);
44 typedef uint8_t (*find_sf_f) (const FLOAT *, const FLOAT *, FLOAT, unsigned int, uint8_t);
45 
46 struct algo_s {
47     alloc_sf_f alloc;
48     find_sf_f  find;
49     const FLOAT *xr34orig;
50     lame_internal_flags *gfc;
51     gr_info *cod_info;
52     int     mingain_l;
53     int     mingain_s[3];
54 };
55 
56 
57 
58 /*  Remarks on optimizing compilers:
59  *
60  *  the MSVC compiler may get into aliasing problems when accessing
61  *  memory through the fi_union. declaring it volatile does the trick here
62  *
63  *  the calc_sfb_noise_* functions are not inlined because the intel compiler
64  *  optimized executeables won't work as expected anymore
65  */
66 
67 #ifdef _MSC_VER
68 #  if _MSC_VER < 1400
69 #  define VOLATILE volatile
70 #  else
71 #  define VOLATILE
72 #  endif
73 #else
74 #  define VOLATILE
75 #endif
76 
77 typedef VOLATILE union {
78     float   f;
79     int     i;
80 } fi_union;
81 
82 
83 
84 #ifdef TAKEHIRO_IEEE754_HACK
85 #define DOUBLEX double
86 #else
87 #define DOUBLEX FLOAT
88 #endif
89 
90 #define MAGIC_FLOAT_def (65536*(128))
91 #define MAGIC_INT_def    0x4b000000
92 
93 #ifdef TAKEHIRO_IEEE754_HACK
94 #else
95 /*********************************************************************
96  * XRPOW_FTOI is a macro to convert floats to ints.
97  * if XRPOW_FTOI(x) = nearest_int(x), then QUANTFAC(x)=adj43asm[x]
98  *                                         ROUNDFAC= -0.0946
99  *
100  * if XRPOW_FTOI(x) = floor(x), then QUANTFAC(x)=asj43[x]
101  *                                   ROUNDFAC=0.4054
102  *********************************************************************/
103 #  define QUANTFAC(rx)  adj43[rx]
104 #  define ROUNDFAC_def 0.4054f
105 #  define XRPOW_FTOI(src,dest) ((dest) = (int)(src))
106 #endif
107 
108 static int const MAGIC_INT = MAGIC_INT_def;
109 #ifndef TAKEHIRO_IEEE754_HACK
110 static DOUBLEX const ROUNDFAC = ROUNDFAC_def;
111 #endif
112 static DOUBLEX const MAGIC_FLOAT = MAGIC_FLOAT_def;
113 
114 
115 inline static  float
vec_max_c(const float * xr34,unsigned int bw)116 vec_max_c(const float * xr34, unsigned int bw)
117 {
118     float   xfsf = 0;
119     unsigned int i = bw >> 2u;
120     unsigned int const remaining = (bw & 0x03u);
121 
122     while (i-- > 0) {
123         if (xfsf < xr34[0]) {
124             xfsf = xr34[0];
125         }
126         if (xfsf < xr34[1]) {
127             xfsf = xr34[1];
128         }
129         if (xfsf < xr34[2]) {
130             xfsf = xr34[2];
131         }
132         if (xfsf < xr34[3]) {
133             xfsf = xr34[3];
134         }
135         xr34 += 4;
136     }
137     switch( remaining ) {
138     case 3: if (xfsf < xr34[2]) xfsf = xr34[2];
139     case 2: if (xfsf < xr34[1]) xfsf = xr34[1];
140     case 1: if (xfsf < xr34[0]) xfsf = xr34[0];
141     default: break;
142     }
143     return xfsf;
144 }
145 
146 
147 inline static  uint8_t
find_lowest_scalefac(const FLOAT xr34)148 find_lowest_scalefac(const FLOAT xr34)
149 {
150     uint8_t sf_ok = 255;
151     uint8_t sf = 128, delsf = 64;
152     uint8_t i;
153     FLOAT const ixmax_val = IXMAX_VAL;
154     for (i = 0; i < 8; ++i) {
155         FLOAT const xfsf = ipow20[sf] * xr34;
156         if (xfsf <= ixmax_val) {
157             sf_ok = sf;
158             sf -= delsf;
159         }
160         else {
161             sf += delsf;
162         }
163         delsf >>= 1;
164     }
165     return sf_ok;
166 }
167 
168 
169 inline static void
k_34_4(DOUBLEX x[4],int l3[4])170 k_34_4(DOUBLEX x[4], int l3[4])
171 {
172 #ifdef TAKEHIRO_IEEE754_HACK
173     fi_union fi[4];
174 
175     assert(x[0] <= IXMAX_VAL && x[1] <= IXMAX_VAL && x[2] <= IXMAX_VAL && x[3] <= IXMAX_VAL);
176     x[0] += MAGIC_FLOAT;
177     fi[0].f = x[0];
178     x[1] += MAGIC_FLOAT;
179     fi[1].f = x[1];
180     x[2] += MAGIC_FLOAT;
181     fi[2].f = x[2];
182     x[3] += MAGIC_FLOAT;
183     fi[3].f = x[3];
184     fi[0].f = x[0] + adj43asm[fi[0].i - MAGIC_INT];
185     fi[1].f = x[1] + adj43asm[fi[1].i - MAGIC_INT];
186     fi[2].f = x[2] + adj43asm[fi[2].i - MAGIC_INT];
187     fi[3].f = x[3] + adj43asm[fi[3].i - MAGIC_INT];
188     l3[0] = fi[0].i - MAGIC_INT;
189     l3[1] = fi[1].i - MAGIC_INT;
190     l3[2] = fi[2].i - MAGIC_INT;
191     l3[3] = fi[3].i - MAGIC_INT;
192 #else
193     assert(x[0] <= IXMAX_VAL && x[1] <= IXMAX_VAL && x[2] <= IXMAX_VAL && x[3] <= IXMAX_VAL);
194     XRPOW_FTOI(x[0], l3[0]);
195     XRPOW_FTOI(x[1], l3[1]);
196     XRPOW_FTOI(x[2], l3[2]);
197     XRPOW_FTOI(x[3], l3[3]);
198     x[0] += QUANTFAC(l3[0]);
199     x[1] += QUANTFAC(l3[1]);
200     x[2] += QUANTFAC(l3[2]);
201     x[3] += QUANTFAC(l3[3]);
202     XRPOW_FTOI(x[0], l3[0]);
203     XRPOW_FTOI(x[1], l3[1]);
204     XRPOW_FTOI(x[2], l3[2]);
205     XRPOW_FTOI(x[3], l3[3]);
206 #endif
207 }
208 
209 
210 
211 
212 
213 /*  do call the calc_sfb_noise_* functions only with sf values
214  *  for which holds: sfpow34*xr34 <= IXMAX_VAL
215  */
216 
217 static  FLOAT
calc_sfb_noise_x34(const FLOAT * xr,const FLOAT * xr34,unsigned int bw,uint8_t sf)218 calc_sfb_noise_x34(const FLOAT * xr, const FLOAT * xr34, unsigned int bw, uint8_t sf)
219 {
220     DOUBLEX x[4];
221     int     l3[4];
222     const FLOAT sfpow = pow20[sf + Q_MAX2]; /*pow(2.0,sf/4.0); */
223     const FLOAT sfpow34 = ipow20[sf]; /*pow(sfpow,-3.0/4.0); */
224 
225     FLOAT   xfsf = 0;
226     unsigned int i = bw >> 2u;
227     unsigned int const remaining = (bw & 0x03u);
228 
229     while (i-- > 0) {
230         x[0] = sfpow34 * xr34[0];
231         x[1] = sfpow34 * xr34[1];
232         x[2] = sfpow34 * xr34[2];
233         x[3] = sfpow34 * xr34[3];
234 
235         k_34_4(x, l3);
236 
237         x[0] = fabsf(xr[0]) - sfpow * pow43[l3[0]];
238         x[1] = fabsf(xr[1]) - sfpow * pow43[l3[1]];
239         x[2] = fabsf(xr[2]) - sfpow * pow43[l3[2]];
240         x[3] = fabsf(xr[3]) - sfpow * pow43[l3[3]];
241         xfsf += (x[0] * x[0] + x[1] * x[1]) + (x[2] * x[2] + x[3] * x[3]);
242 
243         xr += 4;
244         xr34 += 4;
245     }
246     if (remaining) {
247         x[0] = x[1] = x[2] = x[3] = 0;
248         switch( remaining ) {
249         case 3: x[2] = sfpow34 * xr34[2];
250         case 2: x[1] = sfpow34 * xr34[1];
251         case 1: x[0] = sfpow34 * xr34[0];
252         }
253 
254         k_34_4(x, l3);
255         x[0] = x[1] = x[2] = x[3] = 0;
256 
257         switch( remaining ) {
258         case 3: x[2] = fabsf(xr[2]) - sfpow * pow43[l3[2]];
259         case 2: x[1] = fabsf(xr[1]) - sfpow * pow43[l3[1]];
260         case 1: x[0] = fabsf(xr[0]) - sfpow * pow43[l3[0]];
261         }
262         xfsf += (x[0] * x[0] + x[1] * x[1]) + (x[2] * x[2] + x[3] * x[3]);
263     }
264     return xfsf;
265 }
266 
267 
268 
269 struct calc_noise_cache {
270     int     valid;
271     FLOAT   value;
272 };
273 
274 typedef struct calc_noise_cache calc_noise_cache_t;
275 
276 
277 static  uint8_t
tri_calc_sfb_noise_x34(const FLOAT * xr,const FLOAT * xr34,FLOAT l3_xmin,unsigned int bw,uint8_t sf,calc_noise_cache_t * did_it)278 tri_calc_sfb_noise_x34(const FLOAT * xr, const FLOAT * xr34, FLOAT l3_xmin, unsigned int bw,
279                        uint8_t sf, calc_noise_cache_t * did_it)
280 {
281     if (did_it[sf].valid == 0) {
282         did_it[sf].valid = 1;
283         did_it[sf].value = calc_sfb_noise_x34(xr, xr34, bw, sf);
284     }
285     if (l3_xmin < did_it[sf].value) {
286         return 1;
287     }
288     if (sf < 255) {
289         uint8_t const sf_x = sf + 1;
290         if (did_it[sf_x].valid == 0) {
291             did_it[sf_x].valid = 1;
292             did_it[sf_x].value = calc_sfb_noise_x34(xr, xr34, bw, sf_x);
293         }
294         if (l3_xmin < did_it[sf_x].value) {
295             return 1;
296         }
297     }
298     if (sf > 0) {
299         uint8_t const sf_x = sf - 1;
300         if (did_it[sf_x].valid == 0) {
301             did_it[sf_x].valid = 1;
302             did_it[sf_x].value = calc_sfb_noise_x34(xr, xr34, bw, sf_x);
303         }
304         if (l3_xmin < did_it[sf_x].value) {
305             return 1;
306         }
307     }
308     return 0;
309 }
310 
311 
312 /**
313  *  Robert Hegemann 2001-05-01
314  *  calculates quantization step size determined by allowed masking
315  */
316 static int
calc_scalefac(FLOAT l3_xmin,int bw)317 calc_scalefac(FLOAT l3_xmin, int bw)
318 {
319     FLOAT const c = 5.799142446; /* 10 * 10^(2/3) * log10(4/3) */
320     return 210 + (int) (c * log10f(l3_xmin / bw) - .5f);
321 }
322 
323 static uint8_t
guess_scalefac_x34(const FLOAT * xr,const FLOAT * xr34,FLOAT l3_xmin,unsigned int bw,uint8_t sf_min)324 guess_scalefac_x34(const FLOAT * xr, const FLOAT * xr34, FLOAT l3_xmin, unsigned int bw, uint8_t sf_min)
325 {
326     int const guess = calc_scalefac(l3_xmin, bw);
327     if (guess < sf_min) return sf_min;
328     if (guess >= 255) return 255;
329     (void) xr;
330     (void) xr34;
331     return guess;
332 }
333 
334 
335 /* the find_scalefac* routines calculate
336  * a quantization step size which would
337  * introduce as much noise as is allowed.
338  * The larger the step size the more
339  * quantization noise we'll get. The
340  * scalefactors are there to lower the
341  * global step size, allowing limited
342  * differences in quantization step sizes
343  * per band (shaping the noise).
344  */
345 
346 static  uint8_t
find_scalefac_x34(const FLOAT * xr,const FLOAT * xr34,FLOAT l3_xmin,unsigned int bw,uint8_t sf_min)347 find_scalefac_x34(const FLOAT * xr, const FLOAT * xr34, FLOAT l3_xmin, unsigned int bw,
348                   uint8_t sf_min)
349 {
350     calc_noise_cache_t did_it[256];
351     uint8_t sf = 128, sf_ok = 255, delsf = 128, seen_good_one = 0, i;
352     memset(did_it, 0, sizeof(did_it));
353     for (i = 0; i < 8; ++i) {
354         delsf >>= 1;
355         if (sf <= sf_min) {
356             sf += delsf;
357         }
358         else {
359             uint8_t const bad = tri_calc_sfb_noise_x34(xr, xr34, l3_xmin, bw, sf, did_it);
360             if (bad) {  /* distortion.  try a smaller scalefactor */
361                 sf -= delsf;
362             }
363             else {
364                 sf_ok = sf;
365                 sf += delsf;
366                 seen_good_one = 1;
367             }
368         }
369     }
370     /*  returning a scalefac without distortion, if possible
371      */
372     if (seen_good_one > 0) {
373         sf = sf_ok;
374     }
375     if (sf <= sf_min) {
376         sf = sf_min;
377     }
378     return sf;
379 }
380 
381 
382 
383 /***********************************************************************
384  *
385  *      calc_short_block_vbr_sf()
386  *      calc_long_block_vbr_sf()
387  *
388  *  Mark Taylor 2000-??-??
389  *  Robert Hegemann 2000-10-25 made functions of it
390  *
391  ***********************************************************************/
392 
393 /* a variation for vbr-mtrh */
394 static int
block_sf(algo_t * that,const FLOAT l3_xmin[SFBMAX],int vbrsf[SFBMAX],int vbrsfmin[SFBMAX])395 block_sf(algo_t * that, const FLOAT l3_xmin[SFBMAX], int vbrsf[SFBMAX], int vbrsfmin[SFBMAX])
396 {
397     FLOAT   max_xr34;
398     const FLOAT *const xr = &that->cod_info->xr[0];
399     const FLOAT *const xr34_orig = &that->xr34orig[0];
400     const int *const width = &that->cod_info->width[0];
401     const char *const energy_above_cutoff = &that->cod_info->energy_above_cutoff[0];
402     unsigned int const max_nonzero_coeff = (unsigned int) that->cod_info->max_nonzero_coeff;
403     uint8_t maxsf = 0;
404     int     sfb = 0, m_o = -1;
405     unsigned int j = 0, i = 0;
406     int const psymax = that->cod_info->psymax;
407 
408     assert(that->cod_info->max_nonzero_coeff >= 0);
409 
410     that->mingain_l = 0;
411     that->mingain_s[0] = 0;
412     that->mingain_s[1] = 0;
413     that->mingain_s[2] = 0;
414     while (j <= max_nonzero_coeff) {
415         unsigned int const w = (unsigned int) width[sfb];
416         unsigned int const m = (unsigned int) (max_nonzero_coeff - j + 1);
417         unsigned int l = w;
418         uint8_t m1, m2;
419         if (l > m) {
420             l = m;
421         }
422         max_xr34 = vec_max_c(&xr34_orig[j], l);
423 
424         m1 = find_lowest_scalefac(max_xr34);
425         vbrsfmin[sfb] = m1;
426         if (that->mingain_l < m1) {
427             that->mingain_l = m1;
428         }
429         if (that->mingain_s[i] < m1) {
430             that->mingain_s[i] = m1;
431         }
432         if (++i > 2) {
433             i = 0;
434         }
435         if (sfb < psymax && w > 2) { /* mpeg2.5 at 8 kHz doesn't use all scalefactors, unused have width 2 */
436             if (energy_above_cutoff[sfb]) {
437                 m2 = that->find(&xr[j], &xr34_orig[j], l3_xmin[sfb], l, m1);
438 #if 0
439                 if (0) {
440                     /** Robert Hegemann 2007-09-29:
441                      *  It seems here is some more potential for speed improvements.
442                      *  Current find method does 11-18 quantization calculations.
443                      *  Using a "good guess" may help to reduce this amount.
444                      */
445                     uint8_t guess = calc_scalefac(l3_xmin[sfb], l);
446                     DEBUGF(that->gfc, "sfb=%3d guess=%3d found=%3d diff=%3d\n", sfb, guess, m2,
447                            m2 - guess);
448                 }
449 #endif
450                 if (maxsf < m2) {
451                     maxsf = m2;
452                 }
453                 if (m_o < m2 && m2 < 255) {
454                     m_o = m2;
455                 }
456             }
457             else {
458                 m2 = 255;
459                 maxsf = 255;
460             }
461         }
462         else {
463             if (maxsf < m1) {
464                 maxsf = m1;
465             }
466             m2 = maxsf;
467         }
468         vbrsf[sfb] = m2;
469         ++sfb;
470         j += w;
471     }
472     for (; sfb < SFBMAX; ++sfb) {
473         vbrsf[sfb] = maxsf;
474         vbrsfmin[sfb] = 0;
475     }
476     if (m_o > -1) {
477         maxsf = m_o;
478         for (sfb = 0; sfb < SFBMAX; ++sfb) {
479             if (vbrsf[sfb] == 255) {
480                 vbrsf[sfb] = m_o;
481             }
482         }
483     }
484     return maxsf;
485 }
486 
487 
488 
489 /***********************************************************************
490  *
491  *  quantize xr34 based on scalefactors
492  *
493  *  block_xr34
494  *
495  *  Mark Taylor 2000-??-??
496  *  Robert Hegemann 2000-10-20 made functions of them
497  *
498  ***********************************************************************/
499 
500 static void
quantize_x34(const algo_t * that)501 quantize_x34(const algo_t * that)
502 {
503     DOUBLEX x[4];
504     const FLOAT *xr34_orig = that->xr34orig;
505     gr_info *const cod_info = that->cod_info;
506     int const ifqstep = (cod_info->scalefac_scale == 0) ? 2 : 4;
507     int    *l3 = cod_info->l3_enc;
508     unsigned int j = 0, sfb = 0;
509     unsigned int const max_nonzero_coeff = (unsigned int) cod_info->max_nonzero_coeff;
510 
511     assert(cod_info->max_nonzero_coeff >= 0);
512     assert(cod_info->max_nonzero_coeff < 576);
513 
514     while (j <= max_nonzero_coeff) {
515         int const s =
516             (cod_info->scalefac[sfb] + (cod_info->preflag ? pretab[sfb] : 0)) * ifqstep
517             + cod_info->subblock_gain[cod_info->window[sfb]] * 8;
518         uint8_t const sfac = (uint8_t) (cod_info->global_gain - s);
519         FLOAT const sfpow34 = ipow20[sfac];
520         unsigned int const w = (unsigned int) cod_info->width[sfb];
521         unsigned int const m = (unsigned int) (max_nonzero_coeff - j + 1);
522         unsigned int i, remaining;
523 
524         assert((cod_info->global_gain - s) >= 0);
525         assert(cod_info->width[sfb] >= 0);
526         j += w;
527         ++sfb;
528 
529         i = (w <= m) ? w : m;
530         remaining = (i & 0x03u);
531         i >>= 2u;
532 
533         while (i-- > 0) {
534             x[0] = sfpow34 * xr34_orig[0];
535             x[1] = sfpow34 * xr34_orig[1];
536             x[2] = sfpow34 * xr34_orig[2];
537             x[3] = sfpow34 * xr34_orig[3];
538 
539             k_34_4(x, l3);
540 
541             l3 += 4;
542             xr34_orig += 4;
543         }
544         if (remaining) {
545             int tmp_l3[4];
546             x[0] = x[1] = x[2] = x[3] = 0;
547             switch( remaining ) {
548             case 3: x[2] = sfpow34 * xr34_orig[2];
549             case 2: x[1] = sfpow34 * xr34_orig[1];
550             case 1: x[0] = sfpow34 * xr34_orig[0];
551             }
552 
553             k_34_4(x, tmp_l3);
554 
555             switch( remaining ) {
556             case 3: l3[2] = tmp_l3[2];
557             case 2: l3[1] = tmp_l3[1];
558             case 1: l3[0] = tmp_l3[0];
559             }
560 
561             l3 += remaining;
562             xr34_orig += remaining;
563         }
564     }
565 }
566 
567 
568 
569 static const uint8_t max_range_short[SBMAX_s * 3] = {
570     15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
571     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
572     0, 0, 0
573 };
574 
575 static const uint8_t max_range_long[SBMAX_l] = {
576     15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0
577 };
578 
579 static const uint8_t max_range_long_lsf_pretab[SBMAX_l] = {
580     7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
581 };
582 
583 
584 
585 /*
586     sfb=0..5  scalefac < 16
587     sfb>5     scalefac < 8
588 
589     ifqstep = ( cod_info->scalefac_scale == 0 ) ? 2 : 4;
590     ol_sf =  (cod_info->global_gain-210.0);
591     ol_sf -= 8*cod_info->subblock_gain[i];
592     ol_sf -= ifqstep*scalefac[gr][ch].s[sfb][i];
593 */
594 
595 static void
set_subblock_gain(gr_info * cod_info,const int mingain_s[3],int sf[])596 set_subblock_gain(gr_info * cod_info, const int mingain_s[3], int sf[])
597 {
598     const int maxrange1 = 15, maxrange2 = 7;
599     const int ifqstepShift = (cod_info->scalefac_scale == 0) ? 1 : 2;
600     int    *const sbg = cod_info->subblock_gain;
601     unsigned int const psymax = (unsigned int) cod_info->psymax;
602     unsigned int psydiv = 18;
603     int     sbg0, sbg1, sbg2;
604     unsigned int sfb, i;
605     int     min_sbg = 7;
606 
607     if (psydiv > psymax) {
608         psydiv = psymax;
609     }
610     for (i = 0; i < 3; ++i) {
611         int     maxsf1 = 0, maxsf2 = 0, minsf = 1000;
612         /* see if we should use subblock gain */
613         for (sfb = i; sfb < psydiv; sfb += 3) { /* part 1 */
614             int const v = -sf[sfb];
615             if (maxsf1 < v) {
616                 maxsf1 = v;
617             }
618             if (minsf > v) {
619                 minsf = v;
620             }
621         }
622         for (; sfb < SFBMAX; sfb += 3) { /* part 2 */
623             int const v = -sf[sfb];
624             if (maxsf2 < v) {
625                 maxsf2 = v;
626             }
627             if (minsf > v) {
628                 minsf = v;
629             }
630         }
631 
632         /* boost subblock gain as little as possible so we can
633          * reach maxsf1 with scalefactors
634          * 8*sbg >= maxsf1
635          */
636         {
637             int const m1 = maxsf1 - (maxrange1 << ifqstepShift);
638             int const m2 = maxsf2 - (maxrange2 << ifqstepShift);
639 
640             maxsf1 = Max(m1, m2);
641         }
642         if (minsf > 0) {
643             sbg[i] = minsf >> 3;
644         }
645         else {
646             sbg[i] = 0;
647         }
648         if (maxsf1 > 0) {
649             int const m1 = sbg[i];
650             int const m2 = (maxsf1 + 7) >> 3;
651             sbg[i] = Max(m1, m2);
652         }
653         if (sbg[i] > 0 && mingain_s[i] > (cod_info->global_gain - sbg[i] * 8)) {
654             sbg[i] = (cod_info->global_gain - mingain_s[i]) >> 3;
655         }
656         if (sbg[i] > 7) {
657             sbg[i] = 7;
658         }
659         if (min_sbg > sbg[i]) {
660             min_sbg = sbg[i];
661         }
662     }
663     sbg0 = sbg[0] * 8;
664     sbg1 = sbg[1] * 8;
665     sbg2 = sbg[2] * 8;
666     for (sfb = 0; sfb < SFBMAX; sfb += 3) {
667         sf[sfb + 0] += sbg0;
668         sf[sfb + 1] += sbg1;
669         sf[sfb + 2] += sbg2;
670     }
671     if (min_sbg > 0) {
672         for (i = 0; i < 3; ++i) {
673             sbg[i] -= min_sbg;
674         }
675         cod_info->global_gain -= min_sbg * 8;
676     }
677 }
678 
679 
680 
681 /*
682 	  ifqstep = ( cod_info->scalefac_scale == 0 ) ? 2 : 4;
683 	  ol_sf =  (cod_info->global_gain-210.0);
684 	  ol_sf -= ifqstep*scalefac[gr][ch].l[sfb];
685 	  if (cod_info->preflag && sfb>=11)
686 	  ol_sf -= ifqstep*pretab[sfb];
687 */
688 static void
set_scalefacs(gr_info * cod_info,const int * vbrsfmin,int sf[],const uint8_t * max_range)689 set_scalefacs(gr_info * cod_info, const int *vbrsfmin, int sf[], const uint8_t * max_range)
690 {
691     const int ifqstep = (cod_info->scalefac_scale == 0) ? 2 : 4;
692     const int ifqstepShift = (cod_info->scalefac_scale == 0) ? 1 : 2;
693     int    *const scalefac = cod_info->scalefac;
694     int const sfbmax = cod_info->sfbmax;
695     int     sfb;
696     int const *const sbg = cod_info->subblock_gain;
697     int const *const window = cod_info->window;
698     int const preflag = cod_info->preflag;
699 
700     if (preflag) {
701         for (sfb = 11; sfb < sfbmax; ++sfb) {
702             sf[sfb] += pretab[sfb] * ifqstep;
703         }
704     }
705     for (sfb = 0; sfb < sfbmax; ++sfb) {
706         int const gain = cod_info->global_gain - (sbg[window[sfb]] * 8)
707             - ((preflag ? pretab[sfb] : 0) * ifqstep);
708 
709         if (sf[sfb] < 0) {
710             int const m = gain - vbrsfmin[sfb];
711             /* ifqstep*scalefac >= -sf[sfb], so round UP */
712             scalefac[sfb] = (ifqstep - 1 - sf[sfb]) >> ifqstepShift;
713 
714             if (scalefac[sfb] > max_range[sfb]) {
715                 scalefac[sfb] = max_range[sfb];
716             }
717             if (scalefac[sfb] > 0 && (scalefac[sfb] << ifqstepShift) > m) {
718                 scalefac[sfb] = m >> ifqstepShift;
719             }
720         }
721         else {
722             scalefac[sfb] = 0;
723         }
724     }
725     for (; sfb < SFBMAX; ++sfb) {
726         scalefac[sfb] = 0; /* sfb21 */
727     }
728 }
729 
730 
731 #ifndef NDEBUG
732 static int
checkScalefactor(const gr_info * cod_info,const int vbrsfmin[SFBMAX])733 checkScalefactor(const gr_info * cod_info, const int vbrsfmin[SFBMAX])
734 {
735     int const ifqstep = cod_info->scalefac_scale == 0 ? 2 : 4;
736     int     sfb;
737     for (sfb = 0; sfb < cod_info->psymax; ++sfb) {
738         const int s =
739             ((cod_info->scalefac[sfb] +
740               (cod_info->preflag ? pretab[sfb] : 0)) * ifqstep) +
741             cod_info->subblock_gain[cod_info->window[sfb]] * 8;
742 
743         if ((cod_info->global_gain - s) < vbrsfmin[sfb]) {
744             /*
745                fprintf( stdout, "sf %d\n", sfb );
746                fprintf( stdout, "min %d\n", vbrsfmin[sfb] );
747                fprintf( stdout, "ggain %d\n", cod_info->global_gain );
748                fprintf( stdout, "scalefac %d\n", cod_info->scalefac[sfb] );
749                fprintf( stdout, "pretab %d\n", (cod_info->preflag ? pretab[sfb] : 0) );
750                fprintf( stdout, "scale %d\n", (cod_info->scalefac_scale + 1) );
751                fprintf( stdout, "subgain %d\n", cod_info->subblock_gain[cod_info->window[sfb]] * 8 );
752                fflush( stdout );
753                exit(-1);
754              */
755             return 0;
756         }
757     }
758     return 1;
759 }
760 #endif
761 
762 
763 /******************************************************************
764  *
765  *  short block scalefacs
766  *
767  ******************************************************************/
768 
769 static void
short_block_constrain(const algo_t * that,const int vbrsf[SFBMAX],const int vbrsfmin[SFBMAX],int vbrmax)770 short_block_constrain(const algo_t * that, const int vbrsf[SFBMAX],
771                       const int vbrsfmin[SFBMAX], int vbrmax)
772 {
773     gr_info *const cod_info = that->cod_info;
774     lame_internal_flags const *const gfc = that->gfc;
775     SessionConfig_t const *const cfg = &gfc->cfg;
776     int const maxminsfb = that->mingain_l;
777     int     mover, maxover0 = 0, maxover1 = 0, delta = 0;
778     int     v, v0, v1;
779     int     sfb;
780     int const psymax = cod_info->psymax;
781 
782     for (sfb = 0; sfb < psymax; ++sfb) {
783         assert(vbrsf[sfb] >= vbrsfmin[sfb]);
784         v = vbrmax - vbrsf[sfb];
785         if (delta < v) {
786             delta = v;
787         }
788         v0 = v - (4 * 14 + 2 * max_range_short[sfb]);
789         v1 = v - (4 * 14 + 4 * max_range_short[sfb]);
790         if (maxover0 < v0) {
791             maxover0 = v0;
792         }
793         if (maxover1 < v1) {
794             maxover1 = v1;
795         }
796     }
797     if (cfg->noise_shaping == 2) {
798         /* allow scalefac_scale=1 */
799         mover = Min(maxover0, maxover1);
800     }
801     else {
802         mover = maxover0;
803     }
804     if (delta > mover) {
805         delta = mover;
806     }
807     vbrmax -= delta;
808     maxover0 -= mover;
809     maxover1 -= mover;
810 
811     if (maxover0 == 0) {
812         cod_info->scalefac_scale = 0;
813     }
814     else if (maxover1 == 0) {
815         cod_info->scalefac_scale = 1;
816     }
817     if (vbrmax < maxminsfb) {
818         vbrmax = maxminsfb;
819     }
820     cod_info->global_gain = vbrmax;
821 
822     if (cod_info->global_gain < 0) {
823         cod_info->global_gain = 0;
824     }
825     else if (cod_info->global_gain > 255) {
826         cod_info->global_gain = 255;
827     }
828     {
829         int     sf_temp[SFBMAX];
830         for (sfb = 0; sfb < SFBMAX; ++sfb) {
831             sf_temp[sfb] = vbrsf[sfb] - vbrmax;
832         }
833         set_subblock_gain(cod_info, &that->mingain_s[0], sf_temp);
834         set_scalefacs(cod_info, vbrsfmin, sf_temp, max_range_short);
835     }
836     assert(checkScalefactor(cod_info, vbrsfmin));
837 }
838 
839 
840 
841 /******************************************************************
842  *
843  *  long block scalefacs
844  *
845  ******************************************************************/
846 
847 static void
long_block_constrain(const algo_t * that,const int vbrsf[SFBMAX],const int vbrsfmin[SFBMAX],int vbrmax)848 long_block_constrain(const algo_t * that, const int vbrsf[SFBMAX], const int vbrsfmin[SFBMAX],
849                      int vbrmax)
850 {
851     gr_info *const cod_info = that->cod_info;
852     lame_internal_flags const *const gfc = that->gfc;
853     SessionConfig_t const *const cfg = &gfc->cfg;
854     uint8_t const *max_rangep;
855     int const maxminsfb = that->mingain_l;
856     int     sfb;
857     int     maxover0, maxover1, maxover0p, maxover1p, mover, delta = 0;
858     int     v, v0, v1, v0p, v1p, vm0p = 1, vm1p = 1;
859     int const psymax = cod_info->psymax;
860 
861     max_rangep = cfg->mode_gr == 2 ? max_range_long : max_range_long_lsf_pretab;
862 
863     maxover0 = 0;
864     maxover1 = 0;
865     maxover0p = 0;      /* pretab */
866     maxover1p = 0;      /* pretab */
867 
868     for (sfb = 0; sfb < psymax; ++sfb) {
869         assert(vbrsf[sfb] >= vbrsfmin[sfb]);
870         v = vbrmax - vbrsf[sfb];
871         if (delta < v) {
872             delta = v;
873         }
874         v0 = v - 2 * max_range_long[sfb];
875         v1 = v - 4 * max_range_long[sfb];
876         v0p = v - 2 * (max_rangep[sfb] + pretab[sfb]);
877         v1p = v - 4 * (max_rangep[sfb] + pretab[sfb]);
878         if (maxover0 < v0) {
879             maxover0 = v0;
880         }
881         if (maxover1 < v1) {
882             maxover1 = v1;
883         }
884         if (maxover0p < v0p) {
885             maxover0p = v0p;
886         }
887         if (maxover1p < v1p) {
888             maxover1p = v1p;
889         }
890     }
891     if (vm0p == 1) {
892         int     gain = vbrmax - maxover0p;
893         if (gain < maxminsfb) {
894             gain = maxminsfb;
895         }
896         for (sfb = 0; sfb < psymax; ++sfb) {
897             int const a = (gain - vbrsfmin[sfb]) - 2 * pretab[sfb];
898             if (a <= 0) {
899                 vm0p = 0;
900                 vm1p = 0;
901                 break;
902             }
903         }
904     }
905     if (vm1p == 1) {
906         int     gain = vbrmax - maxover1p;
907         if (gain < maxminsfb) {
908             gain = maxminsfb;
909         }
910         for (sfb = 0; sfb < psymax; ++sfb) {
911             int const b = (gain - vbrsfmin[sfb]) - 4 * pretab[sfb];
912             if (b <= 0) {
913                 vm1p = 0;
914                 break;
915             }
916         }
917     }
918     if (vm0p == 0) {
919         maxover0p = maxover0;
920     }
921     if (vm1p == 0) {
922         maxover1p = maxover1;
923     }
924     if (cfg->noise_shaping != 2) {
925         maxover1 = maxover0;
926         maxover1p = maxover0p;
927     }
928     mover = Min(maxover0, maxover0p);
929     mover = Min(mover, maxover1);
930     mover = Min(mover, maxover1p);
931 
932     if (delta > mover) {
933         delta = mover;
934     }
935     vbrmax -= delta;
936     if (vbrmax < maxminsfb) {
937         vbrmax = maxminsfb;
938     }
939     maxover0 -= mover;
940     maxover0p -= mover;
941     maxover1 -= mover;
942     maxover1p -= mover;
943 
944     if (maxover0 == 0) {
945         cod_info->scalefac_scale = 0;
946         cod_info->preflag = 0;
947         max_rangep = max_range_long;
948     }
949     else if (maxover0p == 0) {
950         cod_info->scalefac_scale = 0;
951         cod_info->preflag = 1;
952     }
953     else if (maxover1 == 0) {
954         cod_info->scalefac_scale = 1;
955         cod_info->preflag = 0;
956         max_rangep = max_range_long;
957     }
958     else if (maxover1p == 0) {
959         cod_info->scalefac_scale = 1;
960         cod_info->preflag = 1;
961     }
962     else {
963         assert(0);      /* this should not happen */
964     }
965     cod_info->global_gain = vbrmax;
966     if (cod_info->global_gain < 0) {
967         cod_info->global_gain = 0;
968     }
969     else if (cod_info->global_gain > 255) {
970         cod_info->global_gain = 255;
971     }
972     {
973         int     sf_temp[SFBMAX];
974         for (sfb = 0; sfb < SFBMAX; ++sfb) {
975             sf_temp[sfb] = vbrsf[sfb] - vbrmax;
976         }
977         set_scalefacs(cod_info, vbrsfmin, sf_temp, max_rangep);
978     }
979     assert(checkScalefactor(cod_info, vbrsfmin));
980 }
981 
982 
983 
984 static void
bitcount(const algo_t * that)985 bitcount(const algo_t * that)
986 {
987     int     rc = scale_bitcount(that->gfc, that->cod_info);
988 
989     if (rc == 0) {
990         return;
991     }
992     /*  this should not happen due to the way the scalefactors are selected  */
993     ERRORF(that->gfc, "INTERNAL ERROR IN VBR NEW CODE (986), please send bug report\n");
994     exit(-1);
995 }
996 
997 
998 
999 static int
quantizeAndCountBits(const algo_t * that)1000 quantizeAndCountBits(const algo_t * that)
1001 {
1002     quantize_x34(that);
1003     that->cod_info->part2_3_length = noquant_count_bits(that->gfc, that->cod_info, 0);
1004     return that->cod_info->part2_3_length;
1005 }
1006 
1007 
1008 
1009 
1010 
1011 static int
tryGlobalStepsize(const algo_t * that,const int sfwork[SFBMAX],const int vbrsfmin[SFBMAX],int delta)1012 tryGlobalStepsize(const algo_t * that, const int sfwork[SFBMAX],
1013                   const int vbrsfmin[SFBMAX], int delta)
1014 {
1015     FLOAT const xrpow_max = that->cod_info->xrpow_max;
1016     int     sftemp[SFBMAX], i, nbits;
1017     int     gain, vbrmax = 0;
1018     for (i = 0; i < SFBMAX; ++i) {
1019         gain = sfwork[i] + delta;
1020         if (gain < vbrsfmin[i]) {
1021             gain = vbrsfmin[i];
1022         }
1023         if (gain > 255) {
1024             gain = 255;
1025         }
1026         if (vbrmax < gain) {
1027             vbrmax = gain;
1028         }
1029         sftemp[i] = gain;
1030     }
1031     that->alloc(that, sftemp, vbrsfmin, vbrmax);
1032     bitcount(that);
1033     nbits = quantizeAndCountBits(that);
1034     that->cod_info->xrpow_max = xrpow_max;
1035     return nbits;
1036 }
1037 
1038 
1039 
1040 static void
searchGlobalStepsizeMax(const algo_t * that,const int sfwork[SFBMAX],const int vbrsfmin[SFBMAX],int target)1041 searchGlobalStepsizeMax(const algo_t * that, const int sfwork[SFBMAX],
1042                         const int vbrsfmin[SFBMAX], int target)
1043 {
1044     gr_info const *const cod_info = that->cod_info;
1045     const int gain = cod_info->global_gain;
1046     int     curr = gain;
1047     int     gain_ok = 1024;
1048     int     nbits = LARGE_BITS;
1049     int     l = gain, r = 512;
1050 
1051     assert(gain >= 0);
1052     while (l <= r) {
1053         curr = (l + r) >> 1;
1054         nbits = tryGlobalStepsize(that, sfwork, vbrsfmin, curr - gain);
1055         if (nbits == 0 || (nbits + cod_info->part2_length) < target) {
1056             r = curr - 1;
1057             gain_ok = curr;
1058         }
1059         else {
1060             l = curr + 1;
1061             if (gain_ok == 1024) {
1062                 gain_ok = curr;
1063             }
1064         }
1065     }
1066     if (gain_ok != curr) {
1067         curr = gain_ok;
1068         nbits = tryGlobalStepsize(that, sfwork, vbrsfmin, curr - gain);
1069     }
1070 }
1071 
1072 
1073 
1074 static int
sfDepth(const int sfwork[SFBMAX])1075 sfDepth(const int sfwork[SFBMAX])
1076 {
1077     int     m = 0;
1078     unsigned int i, j;
1079     for (j = SFBMAX, i = 0; j > 0; --j, ++i) {
1080         int const di = 255 - sfwork[i];
1081         if (m < di) {
1082             m = di;
1083         }
1084         assert(sfwork[i] >= 0);
1085         assert(sfwork[i] <= 255);
1086     }
1087     assert(m >= 0);
1088     assert(m <= 255);
1089     return m;
1090 }
1091 
1092 
1093 static void
cutDistribution(const int sfwork[SFBMAX],int sf_out[SFBMAX],int cut)1094 cutDistribution(const int sfwork[SFBMAX], int sf_out[SFBMAX], int cut)
1095 {
1096     unsigned int i, j;
1097     for (j = SFBMAX, i = 0; j > 0; --j, ++i) {
1098         int const x = sfwork[i];
1099         sf_out[i] = x < cut ? x : cut;
1100     }
1101 }
1102 
1103 
1104 static int
flattenDistribution(const int sfwork[SFBMAX],int sf_out[SFBMAX],int dm,int k,int p)1105 flattenDistribution(const int sfwork[SFBMAX], int sf_out[SFBMAX], int dm, int k, int p)
1106 {
1107     unsigned int i, j;
1108     int     x, sfmax = 0;
1109     if (dm > 0) {
1110         for (j = SFBMAX, i = 0; j > 0; --j, ++i) {
1111             int const di = p - sfwork[i];
1112             x = sfwork[i] + (k * di) / dm;
1113             if (x < 0) {
1114                 x = 0;
1115             }
1116             else {
1117                 if (x > 255) {
1118                     x = 255;
1119                 }
1120             }
1121             sf_out[i] = x;
1122             if (sfmax < x) {
1123                 sfmax = x;
1124             }
1125         }
1126     }
1127     else {
1128         for (j = SFBMAX, i = 0; j > 0u; --j, ++i) {
1129             x = sfwork[i];
1130             sf_out[i] = x;
1131             if (sfmax < x) {
1132                 sfmax = x;
1133             }
1134         }
1135     }
1136     return sfmax;
1137 }
1138 
1139 
1140 static int
tryThatOne(algo_t const * that,const int sftemp[SFBMAX],const int vbrsfmin[SFBMAX],int vbrmax)1141 tryThatOne(algo_t const* that, const int sftemp[SFBMAX], const int vbrsfmin[SFBMAX], int vbrmax)
1142 {
1143     FLOAT const xrpow_max = that->cod_info->xrpow_max;
1144     int     nbits = LARGE_BITS;
1145     that->alloc(that, sftemp, vbrsfmin, vbrmax);
1146     bitcount(that);
1147     nbits = quantizeAndCountBits(that);
1148     nbits += that->cod_info->part2_length;
1149     that->cod_info->xrpow_max = xrpow_max;
1150     return nbits;
1151 }
1152 
1153 
1154 static void
outOfBitsStrategy(algo_t const * that,const int sfwork[SFBMAX],const int vbrsfmin[SFBMAX],int target)1155 outOfBitsStrategy(algo_t const* that, const int sfwork[SFBMAX], const int vbrsfmin[SFBMAX], int target)
1156 {
1157     int     wrk[SFBMAX];
1158     int const dm = sfDepth(sfwork);
1159     int const p = that->cod_info->global_gain;
1160     int     nbits;
1161 
1162     /* PART 1 */
1163     {
1164         int     bi = dm / 2;
1165         int     bi_ok = -1;
1166         int     bu = 0;
1167         int     bo = dm;
1168         for (;;) {
1169             int const sfmax = flattenDistribution(sfwork, wrk, dm, bi, p);
1170             nbits = tryThatOne(that, wrk, vbrsfmin, sfmax);
1171             if (nbits <= target) {
1172                 bi_ok = bi;
1173                 bo = bi - 1;
1174             }
1175             else {
1176                 bu = bi + 1;
1177             }
1178             if (bu <= bo) {
1179                 bi = (bu + bo) / 2;
1180             }
1181             else {
1182                 break;
1183             }
1184         }
1185         if (bi_ok >= 0) {
1186             if (bi != bi_ok) {
1187                 int const sfmax = flattenDistribution(sfwork, wrk, dm, bi_ok, p);
1188                 nbits = tryThatOne(that, wrk, vbrsfmin, sfmax);
1189             }
1190             return;
1191         }
1192     }
1193 
1194     /* PART 2: */
1195     {
1196         int     bi = (255 + p) / 2;
1197         int     bi_ok = -1;
1198         int     bu = p;
1199         int     bo = 255;
1200         for (;;) {
1201             int const sfmax = flattenDistribution(sfwork, wrk, dm, dm, bi);
1202             nbits = tryThatOne(that, wrk, vbrsfmin, sfmax);
1203             if (nbits <= target) {
1204                 bi_ok = bi;
1205                 bo = bi - 1;
1206             }
1207             else {
1208                 bu = bi + 1;
1209             }
1210             if (bu <= bo) {
1211                 bi = (bu + bo) / 2;
1212             }
1213             else {
1214                 break;
1215             }
1216         }
1217         if (bi_ok >= 0) {
1218             if (bi != bi_ok) {
1219                 int const sfmax = flattenDistribution(sfwork, wrk, dm, dm, bi_ok);
1220                 nbits = tryThatOne(that, wrk, vbrsfmin, sfmax);
1221             }
1222             return;
1223         }
1224     }
1225 
1226     /* fall back to old code, likely to be never called */
1227     searchGlobalStepsizeMax(that, wrk, vbrsfmin, target);
1228 }
1229 
1230 
1231 static int
reduce_bit_usage(lame_internal_flags * gfc,int gr,int ch)1232 reduce_bit_usage(lame_internal_flags * gfc, int gr, int ch
1233 #if 0
1234                  , const FLOAT xr34orig[576], const FLOAT l3_xmin[SFBMAX], int maxbits
1235 #endif
1236     )
1237 {
1238     SessionConfig_t const *const cfg = &gfc->cfg;
1239     gr_info *const cod_info = &gfc->l3_side.tt[gr][ch];
1240     /*  try some better scalefac storage
1241      */
1242     best_scalefac_store(gfc, gr, ch, &gfc->l3_side);
1243 
1244     /*  best huffman_divide may save some bits too
1245      */
1246     if (cfg->use_best_huffman == 1)
1247         best_huffman_divide(gfc, cod_info);
1248     return cod_info->part2_3_length + cod_info->part2_length;
1249 }
1250 
1251 
1252 
1253 
1254 int
VBR_encode_frame(lame_internal_flags * gfc,const FLOAT xr34orig[2][2][576],const FLOAT l3_xmin[2][2][SFBMAX],const int max_bits[2][2])1255 VBR_encode_frame(lame_internal_flags * gfc, const FLOAT xr34orig[2][2][576],
1256                  const FLOAT l3_xmin[2][2][SFBMAX], const int max_bits[2][2])
1257 {
1258     SessionConfig_t const *const cfg = &gfc->cfg;
1259     int     sfwork_[2][2][SFBMAX];
1260     int     vbrsfmin_[2][2][SFBMAX];
1261     algo_t  that_[2][2];
1262     int const ngr = cfg->mode_gr;
1263     int const nch = cfg->channels_out;
1264     int     max_nbits_ch[2][2] = {{0, 0}, {0 ,0}};
1265     int     max_nbits_gr[2] = {0, 0};
1266     int     max_nbits_fr = 0;
1267     int     use_nbits_ch[2][2] = {{MAX_BITS_PER_CHANNEL+1, MAX_BITS_PER_CHANNEL+1}
1268                                  ,{MAX_BITS_PER_CHANNEL+1, MAX_BITS_PER_CHANNEL+1}};
1269     int     use_nbits_gr[2] = { MAX_BITS_PER_GRANULE+1, MAX_BITS_PER_GRANULE+1 };
1270     int     use_nbits_fr = MAX_BITS_PER_GRANULE+MAX_BITS_PER_GRANULE;
1271     int     gr, ch;
1272     int     ok, sum_fr;
1273 
1274     /* set up some encoding parameters
1275      */
1276     for (gr = 0; gr < ngr; ++gr) {
1277         max_nbits_gr[gr] = 0;
1278         for (ch = 0; ch < nch; ++ch) {
1279             max_nbits_ch[gr][ch] = max_bits[gr][ch];
1280             use_nbits_ch[gr][ch] = 0;
1281             max_nbits_gr[gr] += max_bits[gr][ch];
1282             max_nbits_fr += max_bits[gr][ch];
1283             that_[gr][ch].find = (cfg->full_outer_loop < 0) ? guess_scalefac_x34 : find_scalefac_x34;
1284             that_[gr][ch].gfc = gfc;
1285             that_[gr][ch].cod_info = &gfc->l3_side.tt[gr][ch];
1286             that_[gr][ch].xr34orig = xr34orig[gr][ch];
1287             if (that_[gr][ch].cod_info->block_type == SHORT_TYPE) {
1288                 that_[gr][ch].alloc = short_block_constrain;
1289             }
1290             else {
1291                 that_[gr][ch].alloc = long_block_constrain;
1292             }
1293         }               /* for ch */
1294     }
1295     /* searches scalefactors
1296      */
1297     for (gr = 0; gr < ngr; ++gr) {
1298         for (ch = 0; ch < nch; ++ch) {
1299             if (max_bits[gr][ch] > 0) {
1300                 algo_t *that = &that_[gr][ch];
1301                 int    *sfwork = sfwork_[gr][ch];
1302                 int    *vbrsfmin = vbrsfmin_[gr][ch];
1303                 int     vbrmax;
1304 
1305                 vbrmax = block_sf(that, l3_xmin[gr][ch], sfwork, vbrsfmin);
1306                 that->alloc(that, sfwork, vbrsfmin, vbrmax);
1307                 bitcount(that);
1308             }
1309             else {
1310                 /*  xr contains no energy
1311                  *  l3_enc, our encoding data, will be quantized to zero
1312                  *  continue with next channel
1313                  */
1314             }
1315         }               /* for ch */
1316     }
1317     /* encode 'as is'
1318      */
1319     use_nbits_fr = 0;
1320     for (gr = 0; gr < ngr; ++gr) {
1321         use_nbits_gr[gr] = 0;
1322         for (ch = 0; ch < nch; ++ch) {
1323             algo_t const *that = &that_[gr][ch];
1324             if (max_bits[gr][ch] > 0) {
1325                 memset(&that->cod_info->l3_enc[0], 0, sizeof(that->cod_info->l3_enc));
1326                 (void) quantizeAndCountBits(that);
1327             }
1328             else {
1329                 /*  xr contains no energy
1330                  *  l3_enc, our encoding data, will be quantized to zero
1331                  *  continue with next channel
1332                  */
1333             }
1334             use_nbits_ch[gr][ch] = reduce_bit_usage(gfc, gr, ch);
1335             use_nbits_gr[gr] += use_nbits_ch[gr][ch];
1336         }               /* for ch */
1337         use_nbits_fr += use_nbits_gr[gr];
1338     }
1339 
1340     /* check bit constrains
1341      */
1342     if (use_nbits_fr <= max_nbits_fr) {
1343         ok = 1;
1344         for (gr = 0; gr < ngr; ++gr) {
1345             if (use_nbits_gr[gr] > MAX_BITS_PER_GRANULE) {
1346                 /* violates the rule that every granule has to use no more
1347                  * bits than MAX_BITS_PER_GRANULE
1348                  */
1349                 ok = 0;
1350             }
1351             for (ch = 0; ch < nch; ++ch) {
1352                 if (use_nbits_ch[gr][ch] > MAX_BITS_PER_CHANNEL) {
1353                     /* violates the rule that every gr_ch has to use no more
1354                      * bits than MAX_BITS_PER_CHANNEL
1355                      *
1356                      * This isn't explicitly stated in the ISO docs, but the
1357                      * part2_3_length field has only 12 bits, that makes it
1358                      * up to a maximum size of 4095 bits!!!
1359                      */
1360                     ok = 0;
1361                 }
1362             }
1363         }
1364         if (ok) {
1365             return use_nbits_fr;
1366         }
1367     }
1368 
1369     /* OK, we are in trouble and have to define how many bits are
1370      * to be used for each granule
1371      */
1372     {
1373         ok = 1;
1374         sum_fr = 0;
1375 
1376         for (gr = 0; gr < ngr; ++gr) {
1377             max_nbits_gr[gr] = 0;
1378             for (ch = 0; ch < nch; ++ch) {
1379                 if (use_nbits_ch[gr][ch] > MAX_BITS_PER_CHANNEL) {
1380                     max_nbits_ch[gr][ch] = MAX_BITS_PER_CHANNEL;
1381                 }
1382                 else {
1383                     max_nbits_ch[gr][ch] = use_nbits_ch[gr][ch];
1384                 }
1385                 max_nbits_gr[gr] += max_nbits_ch[gr][ch];
1386             }
1387             if (max_nbits_gr[gr] > MAX_BITS_PER_GRANULE) {
1388                 float   f[2] = {0.0f, 0.0f}, s = 0.0f;
1389                 for (ch = 0; ch < nch; ++ch) {
1390                     if (max_nbits_ch[gr][ch] > 0) {
1391                         f[ch] = sqrt(sqrt(max_nbits_ch[gr][ch]));
1392                         s += f[ch];
1393                     }
1394                     else {
1395                         f[ch] = 0;
1396                     }
1397                 }
1398                 for (ch = 0; ch < nch; ++ch) {
1399                     if (s > 0) {
1400                         max_nbits_ch[gr][ch] = MAX_BITS_PER_GRANULE * f[ch] / s;
1401                     }
1402                     else {
1403                         max_nbits_ch[gr][ch] = 0;
1404                     }
1405                 }
1406                 if (nch > 1) {
1407                     if (max_nbits_ch[gr][0] > use_nbits_ch[gr][0] + 32) {
1408                         max_nbits_ch[gr][1] += max_nbits_ch[gr][0];
1409                         max_nbits_ch[gr][1] -= use_nbits_ch[gr][0] + 32;
1410                         max_nbits_ch[gr][0] = use_nbits_ch[gr][0] + 32;
1411                     }
1412                     if (max_nbits_ch[gr][1] > use_nbits_ch[gr][1] + 32) {
1413                         max_nbits_ch[gr][0] += max_nbits_ch[gr][1];
1414                         max_nbits_ch[gr][0] -= use_nbits_ch[gr][1] + 32;
1415                         max_nbits_ch[gr][1] = use_nbits_ch[gr][1] + 32;
1416                     }
1417                     if (max_nbits_ch[gr][0] > MAX_BITS_PER_CHANNEL) {
1418                         max_nbits_ch[gr][0] = MAX_BITS_PER_CHANNEL;
1419                     }
1420                     if (max_nbits_ch[gr][1] > MAX_BITS_PER_CHANNEL) {
1421                         max_nbits_ch[gr][1] = MAX_BITS_PER_CHANNEL;
1422                     }
1423                 }
1424                 max_nbits_gr[gr] = 0;
1425                 for (ch = 0; ch < nch; ++ch) {
1426                     max_nbits_gr[gr] += max_nbits_ch[gr][ch];
1427                 }
1428             }
1429             sum_fr += max_nbits_gr[gr];
1430         }
1431         if (sum_fr > max_nbits_fr) {
1432             {
1433                 float   f[2] = {0.0f, 0.0f}, s = 0.0f;
1434                 for (gr = 0; gr < ngr; ++gr) {
1435                     if (max_nbits_gr[gr] > 0) {
1436                         f[gr] = sqrt(max_nbits_gr[gr]);
1437                         s += f[gr];
1438                     }
1439                     else {
1440                         f[gr] = 0;
1441                     }
1442                 }
1443                 for (gr = 0; gr < ngr; ++gr) {
1444                     if (s > 0) {
1445                         max_nbits_gr[gr] = max_nbits_fr * f[gr] / s;
1446                     }
1447                     else {
1448                         max_nbits_gr[gr] = 0;
1449                     }
1450                 }
1451             }
1452             if (ngr > 1) {
1453                 if (max_nbits_gr[0] > use_nbits_gr[0] + 125) {
1454                     max_nbits_gr[1] += max_nbits_gr[0];
1455                     max_nbits_gr[1] -= use_nbits_gr[0] + 125;
1456                     max_nbits_gr[0] = use_nbits_gr[0] + 125;
1457                 }
1458                 if (max_nbits_gr[1] > use_nbits_gr[1] + 125) {
1459                     max_nbits_gr[0] += max_nbits_gr[1];
1460                     max_nbits_gr[0] -= use_nbits_gr[1] + 125;
1461                     max_nbits_gr[1] = use_nbits_gr[1] + 125;
1462                 }
1463                 for (gr = 0; gr < ngr; ++gr) {
1464                     if (max_nbits_gr[gr] > MAX_BITS_PER_GRANULE) {
1465                         max_nbits_gr[gr] = MAX_BITS_PER_GRANULE;
1466                     }
1467                 }
1468             }
1469             for (gr = 0; gr < ngr; ++gr) {
1470                 float   f[2] = {0.0f, 0.0f}, s = 0.0f;
1471                 for (ch = 0; ch < nch; ++ch) {
1472                     if (max_nbits_ch[gr][ch] > 0) {
1473                         f[ch] = sqrt(max_nbits_ch[gr][ch]);
1474                         s += f[ch];
1475                     }
1476                     else {
1477                         f[ch] = 0;
1478                     }
1479                 }
1480                 for (ch = 0; ch < nch; ++ch) {
1481                     if (s > 0) {
1482                         max_nbits_ch[gr][ch] = max_nbits_gr[gr] * f[ch] / s;
1483                     }
1484                     else {
1485                         max_nbits_ch[gr][ch] = 0;
1486                     }
1487                 }
1488                 if (nch > 1) {
1489                     if (max_nbits_ch[gr][0] > use_nbits_ch[gr][0] + 32) {
1490                         max_nbits_ch[gr][1] += max_nbits_ch[gr][0];
1491                         max_nbits_ch[gr][1] -= use_nbits_ch[gr][0] + 32;
1492                         max_nbits_ch[gr][0] = use_nbits_ch[gr][0] + 32;
1493                     }
1494                     if (max_nbits_ch[gr][1] > use_nbits_ch[gr][1] + 32) {
1495                         max_nbits_ch[gr][0] += max_nbits_ch[gr][1];
1496                         max_nbits_ch[gr][0] -= use_nbits_ch[gr][1] + 32;
1497                         max_nbits_ch[gr][1] = use_nbits_ch[gr][1] + 32;
1498                     }
1499                     for (ch = 0; ch < nch; ++ch) {
1500                         if (max_nbits_ch[gr][ch] > MAX_BITS_PER_CHANNEL) {
1501                             max_nbits_ch[gr][ch] = MAX_BITS_PER_CHANNEL;
1502                         }
1503                     }
1504                 }
1505             }
1506         }
1507         /* sanity check */
1508         sum_fr = 0;
1509         for (gr = 0; gr < ngr; ++gr) {
1510             int     sum_gr = 0;
1511             for (ch = 0; ch < nch; ++ch) {
1512                 sum_gr += max_nbits_ch[gr][ch];
1513                 if (max_nbits_ch[gr][ch] > MAX_BITS_PER_CHANNEL) {
1514                     ok = 0;
1515                 }
1516             }
1517             sum_fr += sum_gr;
1518             if (sum_gr > MAX_BITS_PER_GRANULE) {
1519                 ok = 0;
1520             }
1521         }
1522         if (sum_fr > max_nbits_fr) {
1523             ok = 0;
1524         }
1525         if (!ok) {
1526             /* we must have done something wrong, fallback to 'on_pe' based constrain */
1527             for (gr = 0; gr < ngr; ++gr) {
1528                 for (ch = 0; ch < nch; ++ch) {
1529                     max_nbits_ch[gr][ch] = max_bits[gr][ch];
1530                 }
1531             }
1532         }
1533     }
1534 
1535     /* we already called the 'best_scalefac_store' function, so we need to reset some
1536      * variables before we can do it again.
1537      */
1538     for (ch = 0; ch < nch; ++ch) {
1539         gfc->l3_side.scfsi[ch][0] = 0;
1540         gfc->l3_side.scfsi[ch][1] = 0;
1541         gfc->l3_side.scfsi[ch][2] = 0;
1542         gfc->l3_side.scfsi[ch][3] = 0;
1543     }
1544     for (gr = 0; gr < ngr; ++gr) {
1545         for (ch = 0; ch < nch; ++ch) {
1546             gfc->l3_side.tt[gr][ch].scalefac_compress = 0;
1547         }
1548     }
1549 
1550     /* alter our encoded data, until it fits into the target bitrate
1551      */
1552     use_nbits_fr = 0;
1553     for (gr = 0; gr < ngr; ++gr) {
1554         use_nbits_gr[gr] = 0;
1555         for (ch = 0; ch < nch; ++ch) {
1556             algo_t const *that = &that_[gr][ch];
1557             use_nbits_ch[gr][ch] = 0;
1558             if (max_bits[gr][ch] > 0) {
1559                 int    *sfwork = sfwork_[gr][ch];
1560                 int const *vbrsfmin = vbrsfmin_[gr][ch];
1561                 cutDistribution(sfwork, sfwork, that->cod_info->global_gain);
1562                 outOfBitsStrategy(that, sfwork, vbrsfmin, max_nbits_ch[gr][ch]);
1563             }
1564             use_nbits_ch[gr][ch] = reduce_bit_usage(gfc, gr, ch);
1565             assert(use_nbits_ch[gr][ch] <= max_nbits_ch[gr][ch]);
1566             use_nbits_gr[gr] += use_nbits_ch[gr][ch];
1567         }               /* for ch */
1568         use_nbits_fr += use_nbits_gr[gr];
1569     }
1570 
1571     /* check bit constrains, but it should always be ok, iff there are no bugs ;-)
1572      */
1573     if (use_nbits_fr <= max_nbits_fr) {
1574         return use_nbits_fr;
1575     }
1576 
1577     ERRORF(gfc, "INTERNAL ERROR IN VBR NEW CODE (1313), please send bug report\n"
1578            "maxbits=%d usedbits=%d\n", max_nbits_fr, use_nbits_fr);
1579     exit(-1);
1580 }
1581