1 /*
2 * MP3 huffman table selecting and bit counting
3 *
4 * Copyright (c) 1999-2005 Takehiro TOMINAGA
5 * Copyright (c) 2002-2005 Gabriel Bouvigne
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the
19 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
21 */
22
23 /* $Id: takehiro.c,v 1.79 2011/05/07 16:05:17 rbrito Exp $ */
24
25 #ifdef HAVE_CONFIG_H
26 # include <config.h>
27 #elif IDE_COMPILE
28 # include "lame-config.h"
29 #endif
30
31
32 #include "lame.h"
33 #include "lame-machine.h"
34 #include "encoder.h"
35 #include "util.h"
36 #include "quantize_pvt.h"
37 #include "tables.h"
38
39
40 static const struct {
41 const int region0_count;
42 const int region1_count;
43 } subdv_table[23] = {
44 {
45 0, 0}, /* 0 bands */
46 {
47 0, 0}, /* 1 bands */
48 {
49 0, 0}, /* 2 bands */
50 {
51 0, 0}, /* 3 bands */
52 {
53 0, 0}, /* 4 bands */
54 {
55 0, 1}, /* 5 bands */
56 {
57 1, 1}, /* 6 bands */
58 {
59 1, 1}, /* 7 bands */
60 {
61 1, 2}, /* 8 bands */
62 {
63 2, 2}, /* 9 bands */
64 {
65 2, 3}, /* 10 bands */
66 {
67 2, 3}, /* 11 bands */
68 {
69 3, 4}, /* 12 bands */
70 {
71 3, 4}, /* 13 bands */
72 {
73 3, 4}, /* 14 bands */
74 {
75 4, 5}, /* 15 bands */
76 {
77 4, 5}, /* 16 bands */
78 {
79 4, 6}, /* 17 bands */
80 {
81 5, 6}, /* 18 bands */
82 {
83 5, 6}, /* 19 bands */
84 {
85 5, 7}, /* 20 bands */
86 {
87 6, 7}, /* 21 bands */
88 {
89 6, 7}, /* 22 bands */
90 };
91
92
93
94
95
96 /*********************************************************************
97 * nonlinear quantization of xr
98 * More accurate formula than the ISO formula. Takes into account
99 * the fact that we are quantizing xr -> ix, but we want ix^4/3 to be
100 * as close as possible to x^4/3. (taking the nearest int would mean
101 * ix is as close as possible to xr, which is different.)
102 *
103 * From Segher Boessenkool <segher@eastsite.nl> 11/1999
104 *
105 * 09/2000: ASM code removed in favor of IEEE754 hack by Takehiro
106 * Tominaga. If you need the ASM code, check CVS circa Aug 2000.
107 *
108 * 01/2004: Optimizations by Gabriel Bouvigne
109 *********************************************************************/
110
111
112
113
114
115 static void
quantize_lines_xrpow_01(unsigned int l,FLOAT istep,const FLOAT * xr,int * ix)116 quantize_lines_xrpow_01(unsigned int l, FLOAT istep, const FLOAT * xr, int *ix)
117 {
118 const FLOAT compareval0 = (1.0f - 0.4054f) / istep;
119 unsigned int i;
120
121 assert(l > 0);
122 assert(l % 2 == 0);
123 for (i = 0; i < l; i += 2) {
124 FLOAT const xr_0 = xr[i+0];
125 FLOAT const xr_1 = xr[i+1];
126 int const ix_0 = (compareval0 > xr_0) ? 0 : 1;
127 int const ix_1 = (compareval0 > xr_1) ? 0 : 1;
128 ix[i+0] = ix_0;
129 ix[i+1] = ix_1;
130 }
131 }
132
133
134
135 #ifdef TAKEHIRO_IEEE754_HACK
136
137 typedef union {
138 float f;
139 int i;
140 } fi_union;
141
142 #define MAGIC_FLOAT (65536*(128))
143 #define MAGIC_INT 0x4b000000
144
145
146 static void
quantize_lines_xrpow(unsigned int l,FLOAT istep,const FLOAT * xp,int * pi)147 quantize_lines_xrpow(unsigned int l, FLOAT istep, const FLOAT * xp, int *pi)
148 {
149 fi_union *fi;
150 unsigned int remaining;
151
152 assert(l > 0);
153
154 fi = (fi_union *) pi;
155
156 l = l >> 1;
157 remaining = l % 2;
158 l = l >> 1;
159 while (l--) {
160 double x0 = istep * xp[0];
161 double x1 = istep * xp[1];
162 double x2 = istep * xp[2];
163 double x3 = istep * xp[3];
164
165 x0 += MAGIC_FLOAT;
166 fi[0].f = x0;
167 x1 += MAGIC_FLOAT;
168 fi[1].f = x1;
169 x2 += MAGIC_FLOAT;
170 fi[2].f = x2;
171 x3 += MAGIC_FLOAT;
172 fi[3].f = x3;
173
174 fi[0].f = x0 + adj43asm[fi[0].i - MAGIC_INT];
175 fi[1].f = x1 + adj43asm[fi[1].i - MAGIC_INT];
176 fi[2].f = x2 + adj43asm[fi[2].i - MAGIC_INT];
177 fi[3].f = x3 + adj43asm[fi[3].i - MAGIC_INT];
178
179 fi[0].i -= MAGIC_INT;
180 fi[1].i -= MAGIC_INT;
181 fi[2].i -= MAGIC_INT;
182 fi[3].i -= MAGIC_INT;
183 fi += 4;
184 xp += 4;
185 };
186 if (remaining) {
187 double x0 = istep * xp[0];
188 double x1 = istep * xp[1];
189
190 x0 += MAGIC_FLOAT;
191 fi[0].f = x0;
192 x1 += MAGIC_FLOAT;
193 fi[1].f = x1;
194
195 fi[0].f = x0 + adj43asm[fi[0].i - MAGIC_INT];
196 fi[1].f = x1 + adj43asm[fi[1].i - MAGIC_INT];
197
198 fi[0].i -= MAGIC_INT;
199 fi[1].i -= MAGIC_INT;
200 }
201
202 }
203
204
205 #else
206
207 /*********************************************************************
208 * XRPOW_FTOI is a macro to convert floats to ints.
209 * if XRPOW_FTOI(x) = nearest_int(x), then QUANTFAC(x)=adj43asm[x]
210 * ROUNDFAC= -0.0946
211 *
212 * if XRPOW_FTOI(x) = floor(x), then QUANTFAC(x)=asj43[x]
213 * ROUNDFAC=0.4054
214 *
215 * Note: using floor() or (int) is extremely slow. On machines where
216 * the TAKEHIRO_IEEE754_HACK code above does not work, it is worthwile
217 * to write some ASM for XRPOW_FTOI().
218 *********************************************************************/
219 #define XRPOW_FTOI(src,dest) ((dest) = (int)(src))
220 #define QUANTFAC(rx) adj43[rx]
221 #define ROUNDFAC 0.4054
222
223
224 static void
quantize_lines_xrpow(unsigned int l,FLOAT istep,const FLOAT * xr,int * ix)225 quantize_lines_xrpow(unsigned int l, FLOAT istep, const FLOAT * xr, int *ix)
226 {
227 unsigned int remaining;
228
229 assert(l > 0);
230
231 l = l >> 1;
232 remaining = l % 2;
233 l = l >> 1;
234 while (l--) {
235 FLOAT x0, x1, x2, x3;
236 int rx0, rx1, rx2, rx3;
237
238 x0 = *xr++ * istep;
239 x1 = *xr++ * istep;
240 XRPOW_FTOI(x0, rx0);
241 x2 = *xr++ * istep;
242 XRPOW_FTOI(x1, rx1);
243 x3 = *xr++ * istep;
244 XRPOW_FTOI(x2, rx2);
245 x0 += QUANTFAC(rx0);
246 XRPOW_FTOI(x3, rx3);
247 x1 += QUANTFAC(rx1);
248 XRPOW_FTOI(x0, *ix++);
249 x2 += QUANTFAC(rx2);
250 XRPOW_FTOI(x1, *ix++);
251 x3 += QUANTFAC(rx3);
252 XRPOW_FTOI(x2, *ix++);
253 XRPOW_FTOI(x3, *ix++);
254 };
255 if (remaining) {
256 FLOAT x0, x1;
257 int rx0, rx1;
258
259 x0 = *xr++ * istep;
260 x1 = *xr++ * istep;
261 XRPOW_FTOI(x0, rx0);
262 XRPOW_FTOI(x1, rx1);
263 x0 += QUANTFAC(rx0);
264 x1 += QUANTFAC(rx1);
265 XRPOW_FTOI(x0, *ix++);
266 XRPOW_FTOI(x1, *ix++);
267 }
268
269 }
270
271
272
273 #endif
274
275
276
277 /*********************************************************************
278 * Quantization function
279 * This function will select which lines to quantize and call the
280 * proper quantization function
281 *********************************************************************/
282
283 static void
quantize_xrpow(const FLOAT * xp,int * pi,FLOAT istep,gr_info const * const cod_info,calc_noise_data const * prev_noise)284 quantize_xrpow(const FLOAT * xp, int *pi, FLOAT istep, gr_info const *const cod_info,
285 calc_noise_data const *prev_noise)
286 {
287 /* quantize on xr^(3/4) instead of xr */
288 int sfb;
289 int sfbmax;
290 int j = 0;
291 int prev_data_use;
292 int *iData;
293 int accumulate = 0;
294 int accumulate01 = 0;
295 int *acc_iData;
296 const FLOAT *acc_xp;
297
298 iData = pi;
299 acc_xp = xp;
300 acc_iData = iData;
301
302
303 /* Reusing previously computed data does not seems to work if global gain
304 is changed. Finding why it behaves this way would allow to use a cache of
305 previously computed values (let's 10 cached values per sfb) that would
306 probably provide a noticeable speedup */
307 prev_data_use = (prev_noise && (cod_info->global_gain == prev_noise->global_gain));
308
309 if (cod_info->block_type == SHORT_TYPE)
310 sfbmax = 38;
311 else
312 sfbmax = 21;
313
314 for (sfb = 0; sfb <= sfbmax; sfb++) {
315 int step = -1;
316
317 if (prev_data_use || cod_info->block_type == NORM_TYPE) {
318 step =
319 cod_info->global_gain
320 - ((cod_info->scalefac[sfb] + (cod_info->preflag ? pretab[sfb] : 0))
321 << (cod_info->scalefac_scale + 1))
322 - cod_info->subblock_gain[cod_info->window[sfb]] * 8;
323 }
324 assert(cod_info->width[sfb] >= 0);
325 if (prev_data_use && (prev_noise->step[sfb] == step)) {
326 /* do not recompute this part,
327 but compute accumulated lines */
328 if (accumulate) {
329 quantize_lines_xrpow(accumulate, istep, acc_xp, acc_iData);
330 accumulate = 0;
331 }
332 if (accumulate01) {
333 quantize_lines_xrpow_01(accumulate01, istep, acc_xp, acc_iData);
334 accumulate01 = 0;
335 }
336 }
337 else { /*should compute this part */
338 int l;
339 l = cod_info->width[sfb];
340
341 if ((j + cod_info->width[sfb]) > cod_info->max_nonzero_coeff) {
342 /*do not compute upper zero part */
343 int usefullsize;
344 usefullsize = cod_info->max_nonzero_coeff - j + 1;
345 memset(&pi[cod_info->max_nonzero_coeff], 0,
346 sizeof(int) * (576 - cod_info->max_nonzero_coeff));
347 l = usefullsize;
348
349 if (l < 0) {
350 l = 0;
351 }
352
353 /* no need to compute higher sfb values */
354 sfb = sfbmax + 1;
355 }
356
357 /*accumulate lines to quantize */
358 if (!accumulate && !accumulate01) {
359 acc_iData = iData;
360 acc_xp = xp;
361 }
362 if (prev_noise &&
363 prev_noise->sfb_count1 > 0 &&
364 sfb >= prev_noise->sfb_count1 &&
365 prev_noise->step[sfb] > 0 && step >= prev_noise->step[sfb]) {
366
367 if (accumulate) {
368 quantize_lines_xrpow(accumulate, istep, acc_xp, acc_iData);
369 accumulate = 0;
370 acc_iData = iData;
371 acc_xp = xp;
372 }
373 accumulate01 += l;
374 }
375 else {
376 if (accumulate01) {
377 quantize_lines_xrpow_01(accumulate01, istep, acc_xp, acc_iData);
378 accumulate01 = 0;
379 acc_iData = iData;
380 acc_xp = xp;
381 }
382 accumulate += l;
383 }
384
385 if (l <= 0) {
386 /* rh: 20040215
387 * may happen due to "prev_data_use" optimization
388 */
389 if (accumulate01) {
390 quantize_lines_xrpow_01(accumulate01, istep, acc_xp, acc_iData);
391 accumulate01 = 0;
392 }
393 if (accumulate) {
394 quantize_lines_xrpow(accumulate, istep, acc_xp, acc_iData);
395 accumulate = 0;
396 }
397
398 break; /* ends for-loop */
399 }
400 }
401 if (sfb <= sfbmax) {
402 iData += cod_info->width[sfb];
403 xp += cod_info->width[sfb];
404 j += cod_info->width[sfb];
405 }
406 }
407 if (accumulate) { /*last data part */
408 quantize_lines_xrpow(accumulate, istep, acc_xp, acc_iData);
409 accumulate = 0;
410 }
411 if (accumulate01) { /*last data part */
412 quantize_lines_xrpow_01(accumulate01, istep, acc_xp, acc_iData);
413 accumulate01 = 0;
414 }
415
416 }
417
418
419
420
421 /*************************************************************************/
422 /* ix_max */
423 /*************************************************************************/
424
425 static int
ix_max(const int * ix,const int * end)426 ix_max(const int *ix, const int *end)
427 {
428 int max1 = 0, max2 = 0;
429
430 do {
431 int const x1 = *ix++;
432 int const x2 = *ix++;
433 if (max1 < x1)
434 max1 = x1;
435
436 if (max2 < x2)
437 max2 = x2;
438 } while (ix < end);
439 if (max1 < max2)
440 max1 = max2;
441 return max1;
442 }
443
444
445
446
447
448
449
450
451 static int
count_bit_ESC(const int * ix,const int * const end,int t1,const int t2,unsigned int * const s)452 count_bit_ESC(const int *ix, const int *const end, int t1, const int t2, unsigned int *const s)
453 {
454 /* ESC-table is used */
455 unsigned int const linbits = ht[t1].xlen * 65536u + ht[t2].xlen;
456 unsigned int sum = 0, sum2;
457
458 do {
459 unsigned int x = *ix++;
460 unsigned int y = *ix++;
461
462 if (x >= 15u) {
463 x = 15u;
464 sum += linbits;
465 }
466 if (y >= 15u) {
467 y = 15u;
468 sum += linbits;
469 }
470 x <<= 4u;
471 x += y;
472 sum += largetbl[x];
473 } while (ix < end);
474
475 sum2 = sum & 0xffffu;
476 sum >>= 16u;
477
478 if (sum > sum2) {
479 sum = sum2;
480 t1 = t2;
481 }
482
483 *s += sum;
484 return t1;
485 }
486
487
488 static int
count_bit_noESC(const int * ix,const int * end,int mx,unsigned int * s)489 count_bit_noESC(const int *ix, const int *end, int mx, unsigned int *s)
490 {
491 /* No ESC-words */
492 unsigned int sum1 = 0;
493 const uint8_t *const hlen1 = ht[1].hlen;
494 (void) mx;
495
496 do {
497 unsigned int const x0 = *ix++;
498 unsigned int const x1 = *ix++;
499 sum1 += hlen1[ x0+x0 + x1 ];
500 } while (ix < end);
501
502 *s += sum1;
503 return 1;
504 }
505
506
507 static const int huf_tbl_noESC[] = {
508 1, 2, 5, 7, 7, 10, 10, 13, 13, 13, 13, 13, 13, 13, 13
509 };
510
511
512 static int
count_bit_noESC_from2(const int * ix,const int * end,int max,unsigned int * s)513 count_bit_noESC_from2(const int *ix, const int *end, int max, unsigned int *s)
514 {
515 int t1 = huf_tbl_noESC[max - 1];
516 /* No ESC-words */
517 const unsigned int xlen = ht[t1].xlen;
518 uint32_t const* table = (t1 == 2) ? &table23[0] : &table56[0];
519 unsigned int sum = 0, sum2;
520
521 do {
522 unsigned int const x0 = *ix++;
523 unsigned int const x1 = *ix++;
524 sum += table[ x0 * xlen + x1 ];
525 } while (ix < end);
526
527 sum2 = sum & 0xffffu;
528 sum >>= 16u;
529
530 if (sum > sum2) {
531 sum = sum2;
532 t1++;
533 }
534
535 *s += sum;
536 return t1;
537 }
538
539
540 inline static int
count_bit_noESC_from3(const int * ix,const int * end,int max,unsigned int * s)541 count_bit_noESC_from3(const int *ix, const int *end, int max, unsigned int * s)
542 {
543 int t1 = huf_tbl_noESC[max - 1];
544 /* No ESC-words */
545 unsigned int sum1 = 0;
546 unsigned int sum2 = 0;
547 unsigned int sum3 = 0;
548 const unsigned int xlen = ht[t1].xlen;
549 const uint8_t *const hlen1 = ht[t1].hlen;
550 const uint8_t *const hlen2 = ht[t1 + 1].hlen;
551 const uint8_t *const hlen3 = ht[t1 + 2].hlen;
552 int t;
553
554 do {
555 unsigned int x0 = *ix++;
556 unsigned int x1 = *ix++;
557 unsigned int x = x0 * xlen + x1;
558 sum1 += hlen1[x];
559 sum2 += hlen2[x];
560 sum3 += hlen3[x];
561 } while (ix < end);
562
563 t = t1;
564 if (sum1 > sum2) {
565 sum1 = sum2;
566 t++;
567 }
568 if (sum1 > sum3) {
569 sum1 = sum3;
570 t = t1 + 2;
571 }
572 *s += sum1;
573
574 return t;
575 }
576
577
578 /*************************************************************************/
579 /* choose table */
580 /*************************************************************************/
581
582 /*
583 Choose the Huffman table that will encode ix[begin..end] with
584 the fewest bits.
585
586 Note: This code contains knowledge about the sizes and characteristics
587 of the Huffman tables as defined in the IS (Table B.7), and will not work
588 with any arbitrary tables.
589 */
count_bit_null(const int * ix,const int * end,int max,unsigned int * s)590 static int count_bit_null(const int* ix, const int* end, int max, unsigned int* s)
591 {
592 (void) ix;
593 (void) end;
594 (void) max;
595 (void) s;
596 return 0;
597 }
598
599 typedef int (*count_fnc)(const int* ix, const int* end, int max, unsigned int* s);
600
601 static count_fnc count_fncs[] =
602 { &count_bit_null
603 , &count_bit_noESC
604 , &count_bit_noESC_from2
605 , &count_bit_noESC_from2
606 , &count_bit_noESC_from3
607 , &count_bit_noESC_from3
608 , &count_bit_noESC_from3
609 , &count_bit_noESC_from3
610 , &count_bit_noESC_from3
611 , &count_bit_noESC_from3
612 , &count_bit_noESC_from3
613 , &count_bit_noESC_from3
614 , &count_bit_noESC_from3
615 , &count_bit_noESC_from3
616 , &count_bit_noESC_from3
617 , &count_bit_noESC_from3
618 };
619
620 static int
choose_table_nonMMX(const int * ix,const int * const end,int * const _s)621 choose_table_nonMMX(const int *ix, const int *const end, int *const _s)
622 {
623 unsigned int* s = (unsigned int*)_s;
624 unsigned int max;
625 int choice, choice2;
626 max = ix_max(ix, end);
627
628 if (max <= 15) {
629 return count_fncs[max](ix, end, max, s);
630 }
631 /* try tables with linbits */
632 if (max > IXMAX_VAL) {
633 *s = LARGE_BITS;
634 return -1;
635 }
636 max -= 15u;
637 for (choice2 = 24; choice2 < 32; choice2++) {
638 if (ht[choice2].linmax >= max) {
639 break;
640 }
641 }
642
643 for (choice = choice2 - 8; choice < 24; choice++) {
644 if (ht[choice].linmax >= max) {
645 break;
646 }
647 }
648 return count_bit_ESC(ix, end, choice, choice2, s);
649 }
650
651
652
653 /*************************************************************************/
654 /* count_bit */
655 /*************************************************************************/
656 int
noquant_count_bits(lame_internal_flags const * const gfc,gr_info * const gi,calc_noise_data * prev_noise)657 noquant_count_bits(lame_internal_flags const *const gfc,
658 gr_info * const gi, calc_noise_data * prev_noise)
659 {
660 SessionConfig_t const *const cfg = &gfc->cfg;
661 int bits = 0;
662 int i, a1, a2;
663 int const *const ix = gi->l3_enc;
664
665 i = Min(576, ((gi->max_nonzero_coeff + 2) >> 1) << 1);
666
667 if (prev_noise)
668 prev_noise->sfb_count1 = 0;
669
670 /* Determine count1 region */
671 for (; i > 1; i -= 2)
672 if (ix[i - 1] | ix[i - 2])
673 break;
674 gi->count1 = i;
675
676 /* Determines the number of bits to encode the quadruples. */
677 a1 = a2 = 0;
678 for (; i > 3; i -= 4) {
679 int x4 = ix[i-4];
680 int x3 = ix[i-3];
681 int x2 = ix[i-2];
682 int x1 = ix[i-1];
683 int p;
684 /* hack to check if all values <= 1 */
685 if ((unsigned int) (x4 | x3 | x2 | x1) > 1)
686 break;
687
688 p = ((x4 * 2 + x3) * 2 + x2) * 2 + x1;
689 a1 += t32l[p];
690 a2 += t33l[p];
691 }
692
693 bits = a1;
694 gi->count1table_select = 0;
695 if (a1 > a2) {
696 bits = a2;
697 gi->count1table_select = 1;
698 }
699
700 gi->count1bits = bits;
701 gi->big_values = i;
702 if (i == 0)
703 return bits;
704
705 if (gi->block_type == SHORT_TYPE) {
706 a1 = 3 * gfc->scalefac_band.s[3];
707 if (a1 > gi->big_values)
708 a1 = gi->big_values;
709 a2 = gi->big_values;
710
711 }
712 else if (gi->block_type == NORM_TYPE) {
713 assert(i <= 576); /* bv_scf has 576 entries (0..575) */
714 a1 = gi->region0_count = gfc->sv_qnt.bv_scf[i - 2];
715 a2 = gi->region1_count = gfc->sv_qnt.bv_scf[i - 1];
716
717 assert(a1 + a2 + 2 < SBPSY_l);
718 a2 = gfc->scalefac_band.l[a1 + a2 + 2];
719 a1 = gfc->scalefac_band.l[a1 + 1];
720 if (a2 < i)
721 gi->table_select[2] = gfc->choose_table(ix + a2, ix + i, &bits);
722
723 }
724 else {
725 gi->region0_count = 7;
726 /*gi->region1_count = SBPSY_l - 7 - 1; */
727 gi->region1_count = SBMAX_l - 1 - 7 - 1;
728 a1 = gfc->scalefac_band.l[7 + 1];
729 a2 = i;
730 if (a1 > a2) {
731 a1 = a2;
732 }
733 }
734
735
736 /* have to allow for the case when bigvalues < region0 < region1 */
737 /* (and region0, region1 are ignored) */
738 a1 = Min(a1, i);
739 a2 = Min(a2, i);
740
741 assert(a1 >= 0);
742 assert(a2 >= 0);
743
744 /* Count the number of bits necessary to code the bigvalues region. */
745 if (0 < a1)
746 gi->table_select[0] = gfc->choose_table(ix, ix + a1, &bits);
747 if (a1 < a2)
748 gi->table_select[1] = gfc->choose_table(ix + a1, ix + a2, &bits);
749 if (cfg->use_best_huffman == 2) {
750 gi->part2_3_length = bits;
751 best_huffman_divide(gfc, gi);
752 bits = gi->part2_3_length;
753 }
754
755
756 if (prev_noise) {
757 if (gi->block_type == NORM_TYPE) {
758 int sfb = 0;
759 while (gfc->scalefac_band.l[sfb] < gi->big_values) {
760 sfb++;
761 }
762 prev_noise->sfb_count1 = sfb;
763 }
764 }
765
766 return bits;
767 }
768
769 int
count_bits(lame_internal_flags const * const gfc,const FLOAT * const xr,gr_info * const gi,calc_noise_data * prev_noise)770 count_bits(lame_internal_flags const *const gfc,
771 const FLOAT * const xr, gr_info * const gi, calc_noise_data * prev_noise)
772 {
773 int *const ix = gi->l3_enc;
774
775 /* since quantize_xrpow uses table lookup, we need to check this first: */
776 FLOAT const w = (IXMAX_VAL) / IPOW20(gi->global_gain);
777
778 if (gi->xrpow_max > w)
779 return LARGE_BITS;
780
781 quantize_xrpow(xr, ix, IPOW20(gi->global_gain), gi, prev_noise);
782
783 if (gfc->sv_qnt.substep_shaping & 2) {
784 int sfb, j = 0;
785 /* 0.634521682242439 = 0.5946*2**(.5*0.1875) */
786 int const gain = gi->global_gain + gi->scalefac_scale;
787 const FLOAT roundfac = 0.634521682242439 / IPOW20(gain);
788 for (sfb = 0; sfb < gi->sfbmax; sfb++) {
789 int const width = gi->width[sfb];
790 assert(width >= 0);
791 if (!gfc->sv_qnt.pseudohalf[sfb]) {
792 j += width;
793 }
794 else {
795 int k;
796 for (k = j, j += width; k < j; ++k) {
797 ix[k] = (xr[k] >= roundfac) ? ix[k] : 0;
798 }
799 }
800 }
801 }
802 return noquant_count_bits(gfc, gi, prev_noise);
803 }
804
805 /***********************************************************************
806 re-calculate the best scalefac_compress using scfsi
807 the saved bits are kept in the bit reservoir.
808 **********************************************************************/
809
810
811 inline static void
recalc_divide_init(const lame_internal_flags * const gfc,gr_info const * cod_info,int const * const ix,int r01_bits[],int r01_div[],int r0_tbl[],int r1_tbl[])812 recalc_divide_init(const lame_internal_flags * const gfc,
813 gr_info const *cod_info,
814 int const *const ix, int r01_bits[], int r01_div[], int r0_tbl[], int r1_tbl[])
815 {
816 int r0, r1, bigv, r0t, r1t, bits;
817
818 bigv = cod_info->big_values;
819
820 for (r0 = 0; r0 <= 7 + 15; r0++) {
821 r01_bits[r0] = LARGE_BITS;
822 }
823
824 for (r0 = 0; r0 < 16; r0++) {
825 int const a1 = gfc->scalefac_band.l[r0 + 1];
826 int r0bits;
827 if (a1 >= bigv)
828 break;
829 r0bits = 0;
830 r0t = gfc->choose_table(ix, ix + a1, &r0bits);
831
832 for (r1 = 0; r1 < 8; r1++) {
833 int const a2 = gfc->scalefac_band.l[r0 + r1 + 2];
834 if (a2 >= bigv)
835 break;
836
837 bits = r0bits;
838 r1t = gfc->choose_table(ix + a1, ix + a2, &bits);
839 if (r01_bits[r0 + r1] > bits) {
840 r01_bits[r0 + r1] = bits;
841 r01_div[r0 + r1] = r0;
842 r0_tbl[r0 + r1] = r0t;
843 r1_tbl[r0 + r1] = r1t;
844 }
845 }
846 }
847 }
848
849 inline static void
recalc_divide_sub(const lame_internal_flags * const gfc,const gr_info * cod_info2,gr_info * const gi,const int * const ix,const int r01_bits[],const int r01_div[],const int r0_tbl[],const int r1_tbl[])850 recalc_divide_sub(const lame_internal_flags * const gfc,
851 const gr_info * cod_info2,
852 gr_info * const gi,
853 const int *const ix,
854 const int r01_bits[], const int r01_div[], const int r0_tbl[], const int r1_tbl[])
855 {
856 int bits, r2, a2, bigv, r2t;
857
858 bigv = cod_info2->big_values;
859
860 for (r2 = 2; r2 < SBMAX_l + 1; r2++) {
861 a2 = gfc->scalefac_band.l[r2];
862 if (a2 >= bigv)
863 break;
864
865 bits = r01_bits[r2 - 2] + cod_info2->count1bits;
866 if (gi->part2_3_length <= bits)
867 break;
868
869 r2t = gfc->choose_table(ix + a2, ix + bigv, &bits);
870 if (gi->part2_3_length <= bits)
871 continue;
872
873 memcpy(gi, cod_info2, sizeof(gr_info));
874 gi->part2_3_length = bits;
875 gi->region0_count = r01_div[r2 - 2];
876 gi->region1_count = r2 - 2 - r01_div[r2 - 2];
877 gi->table_select[0] = r0_tbl[r2 - 2];
878 gi->table_select[1] = r1_tbl[r2 - 2];
879 gi->table_select[2] = r2t;
880 }
881 }
882
883
884
885
886 void
best_huffman_divide(const lame_internal_flags * const gfc,gr_info * const gi)887 best_huffman_divide(const lame_internal_flags * const gfc, gr_info * const gi)
888 {
889 SessionConfig_t const *const cfg = &gfc->cfg;
890 int i, a1, a2;
891 gr_info cod_info2;
892 int const *const ix = gi->l3_enc;
893
894 int r01_bits[7 + 15 + 1];
895 int r01_div[7 + 15 + 1];
896 int r0_tbl[7 + 15 + 1];
897 int r1_tbl[7 + 15 + 1];
898
899
900 /* SHORT BLOCK stuff fails for MPEG2 */
901 if (gi->block_type == SHORT_TYPE && cfg->mode_gr == 1)
902 return;
903
904
905 memcpy(&cod_info2, gi, sizeof(gr_info));
906 if (gi->block_type == NORM_TYPE) {
907 recalc_divide_init(gfc, gi, ix, r01_bits, r01_div, r0_tbl, r1_tbl);
908 recalc_divide_sub(gfc, &cod_info2, gi, ix, r01_bits, r01_div, r0_tbl, r1_tbl);
909 }
910
911 i = cod_info2.big_values;
912 if (i == 0 || (unsigned int) (ix[i - 2] | ix[i - 1]) > 1)
913 return;
914
915 i = gi->count1 + 2;
916 if (i > 576)
917 return;
918
919 /* Determines the number of bits to encode the quadruples. */
920 memcpy(&cod_info2, gi, sizeof(gr_info));
921 cod_info2.count1 = i;
922 a1 = a2 = 0;
923
924 assert(i <= 576);
925
926 for (; i > cod_info2.big_values; i -= 4) {
927 int const p = ((ix[i - 4] * 2 + ix[i - 3]) * 2 + ix[i - 2]) * 2 + ix[i - 1];
928 a1 += t32l[p];
929 a2 += t33l[p];
930 }
931 cod_info2.big_values = i;
932
933 cod_info2.count1table_select = 0;
934 if (a1 > a2) {
935 a1 = a2;
936 cod_info2.count1table_select = 1;
937 }
938
939 cod_info2.count1bits = a1;
940
941 if (cod_info2.block_type == NORM_TYPE)
942 recalc_divide_sub(gfc, &cod_info2, gi, ix, r01_bits, r01_div, r0_tbl, r1_tbl);
943 else {
944 /* Count the number of bits necessary to code the bigvalues region. */
945 cod_info2.part2_3_length = a1;
946 a1 = gfc->scalefac_band.l[7 + 1];
947 if (a1 > i) {
948 a1 = i;
949 }
950 if (a1 > 0)
951 cod_info2.table_select[0] =
952 gfc->choose_table(ix, ix + a1, (int *) &cod_info2.part2_3_length);
953 if (i > a1)
954 cod_info2.table_select[1] =
955 gfc->choose_table(ix + a1, ix + i, (int *) &cod_info2.part2_3_length);
956 if (gi->part2_3_length > cod_info2.part2_3_length)
957 memcpy(gi, &cod_info2, sizeof(gr_info));
958 }
959 }
960
961 static const int slen1_n[16] = { 1, 1, 1, 1, 8, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16 };
962 static const int slen2_n[16] = { 1, 2, 4, 8, 1, 2, 4, 8, 2, 4, 8, 2, 4, 8, 4, 8 };
963 const int slen1_tab[16] = { 0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 };
964 const int slen2_tab[16] = { 0, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3 };
965
966 static void
scfsi_calc(int ch,III_side_info_t * l3_side)967 scfsi_calc(int ch, III_side_info_t * l3_side)
968 {
969 unsigned int i;
970 int s1, s2, c1, c2;
971 int sfb;
972 gr_info *const gi = &l3_side->tt[1][ch];
973 gr_info const *const g0 = &l3_side->tt[0][ch];
974
975 for (i = 0; i < (sizeof(scfsi_band) / sizeof(int)) - 1; i++) {
976 for (sfb = scfsi_band[i]; sfb < scfsi_band[i + 1]; sfb++) {
977 if (g0->scalefac[sfb] != gi->scalefac[sfb]
978 && gi->scalefac[sfb] >= 0)
979 break;
980 }
981 if (sfb == scfsi_band[i + 1]) {
982 for (sfb = scfsi_band[i]; sfb < scfsi_band[i + 1]; sfb++) {
983 gi->scalefac[sfb] = -1;
984 }
985 l3_side->scfsi[ch][i] = 1;
986 }
987 }
988
989 s1 = c1 = 0;
990 for (sfb = 0; sfb < 11; sfb++) {
991 if (gi->scalefac[sfb] == -1)
992 continue;
993 c1++;
994 if (s1 < gi->scalefac[sfb])
995 s1 = gi->scalefac[sfb];
996 }
997
998 s2 = c2 = 0;
999 for (; sfb < SBPSY_l; sfb++) {
1000 if (gi->scalefac[sfb] == -1)
1001 continue;
1002 c2++;
1003 if (s2 < gi->scalefac[sfb])
1004 s2 = gi->scalefac[sfb];
1005 }
1006
1007 for (i = 0; i < 16; i++) {
1008 if (s1 < slen1_n[i] && s2 < slen2_n[i]) {
1009 int const c = slen1_tab[i] * c1 + slen2_tab[i] * c2;
1010 if (gi->part2_length > c) {
1011 gi->part2_length = c;
1012 gi->scalefac_compress = (int)i;
1013 }
1014 }
1015 }
1016 }
1017
1018 /*
1019 Find the optimal way to store the scalefactors.
1020 Only call this routine after final scalefactors have been
1021 chosen and the channel/granule will not be re-encoded.
1022 */
1023 void
best_scalefac_store(const lame_internal_flags * gfc,const int gr,const int ch,III_side_info_t * const l3_side)1024 best_scalefac_store(const lame_internal_flags * gfc,
1025 const int gr, const int ch, III_side_info_t * const l3_side)
1026 {
1027 SessionConfig_t const *const cfg = &gfc->cfg;
1028 /* use scalefac_scale if we can */
1029 gr_info *const gi = &l3_side->tt[gr][ch];
1030 int sfb, i, j, l;
1031 int recalc = 0;
1032
1033 /* remove scalefacs from bands with ix=0. This idea comes
1034 * from the AAC ISO docs. added mt 3/00 */
1035 /* check if l3_enc=0 */
1036 j = 0;
1037 for (sfb = 0; sfb < gi->sfbmax; sfb++) {
1038 int const width = gi->width[sfb];
1039 assert(width >= 0);
1040 for (l = j, j += width; l < j; ++l) {
1041 if (gi->l3_enc[l] != 0)
1042 break;
1043 }
1044 if (l == j)
1045 gi->scalefac[sfb] = recalc = -2; /* anything goes. */
1046 /* only best_scalefac_store and calc_scfsi
1047 * know--and only they should know--about the magic number -2.
1048 */
1049 }
1050
1051 if (!gi->scalefac_scale && !gi->preflag) {
1052 int s = 0;
1053 for (sfb = 0; sfb < gi->sfbmax; sfb++)
1054 if (gi->scalefac[sfb] > 0)
1055 s |= gi->scalefac[sfb];
1056
1057 if (!(s & 1) && s != 0) {
1058 for (sfb = 0; sfb < gi->sfbmax; sfb++)
1059 if (gi->scalefac[sfb] > 0)
1060 gi->scalefac[sfb] >>= 1;
1061
1062 gi->scalefac_scale = recalc = 1;
1063 }
1064 }
1065
1066 if (!gi->preflag && gi->block_type != SHORT_TYPE && cfg->mode_gr == 2) {
1067 for (sfb = 11; sfb < SBPSY_l; sfb++)
1068 if (gi->scalefac[sfb] < pretab[sfb] && gi->scalefac[sfb] != -2)
1069 break;
1070 if (sfb == SBPSY_l) {
1071 for (sfb = 11; sfb < SBPSY_l; sfb++)
1072 if (gi->scalefac[sfb] > 0)
1073 gi->scalefac[sfb] -= pretab[sfb];
1074
1075 gi->preflag = recalc = 1;
1076 }
1077 }
1078
1079 for (i = 0; i < 4; i++)
1080 l3_side->scfsi[ch][i] = 0;
1081
1082 if (cfg->mode_gr == 2 && gr == 1
1083 && l3_side->tt[0][ch].block_type != SHORT_TYPE
1084 && l3_side->tt[1][ch].block_type != SHORT_TYPE) {
1085 scfsi_calc(ch, l3_side);
1086 recalc = 0;
1087 }
1088 for (sfb = 0; sfb < gi->sfbmax; sfb++) {
1089 if (gi->scalefac[sfb] == -2) {
1090 gi->scalefac[sfb] = 0; /* if anything goes, then 0 is a good choice */
1091 }
1092 }
1093 if (recalc) {
1094 (void) scale_bitcount(gfc, gi);
1095 }
1096 }
1097
1098
1099 #ifndef NDEBUG
1100 static int
all_scalefactors_not_negative(int const * scalefac,int n)1101 all_scalefactors_not_negative(int const *scalefac, int n)
1102 {
1103 int i;
1104 for (i = 0; i < n; ++i) {
1105 if (scalefac[i] < 0)
1106 return 0;
1107 }
1108 return 1;
1109 }
1110 #endif
1111
1112
1113 /* number of bits used to encode scalefacs */
1114
1115 /* 18*slen1_tab[i] + 18*slen2_tab[i] */
1116 static const int scale_short[16] = {
1117 0, 18, 36, 54, 54, 36, 54, 72, 54, 72, 90, 72, 90, 108, 108, 126
1118 };
1119
1120 /* 17*slen1_tab[i] + 18*slen2_tab[i] */
1121 static const int scale_mixed[16] = {
1122 0, 18, 36, 54, 51, 35, 53, 71, 52, 70, 88, 69, 87, 105, 104, 122
1123 };
1124
1125 /* 11*slen1_tab[i] + 10*slen2_tab[i] */
1126 static const int scale_long[16] = {
1127 0, 10, 20, 30, 33, 21, 31, 41, 32, 42, 52, 43, 53, 63, 64, 74
1128 };
1129
1130
1131 /*************************************************************************/
1132 /* scale_bitcount */
1133 /*************************************************************************/
1134
1135 /* Also calculates the number of bits necessary to code the scalefactors. */
1136
1137 static int
mpeg1_scale_bitcount(const lame_internal_flags * gfc,gr_info * const cod_info)1138 mpeg1_scale_bitcount(const lame_internal_flags * gfc, gr_info * const cod_info)
1139 {
1140 int k, sfb, max_slen1 = 0, max_slen2 = 0;
1141
1142 /* maximum values */
1143 const int *tab;
1144 int *const scalefac = cod_info->scalefac;
1145
1146 (void) gfc;
1147 assert(all_scalefactors_not_negative(scalefac, cod_info->sfbmax));
1148
1149 if (cod_info->block_type == SHORT_TYPE) {
1150 tab = scale_short;
1151 if (cod_info->mixed_block_flag)
1152 tab = scale_mixed;
1153 }
1154 else { /* block_type == 1,2,or 3 */
1155 tab = scale_long;
1156 if (!cod_info->preflag) {
1157 for (sfb = 11; sfb < SBPSY_l; sfb++)
1158 if (scalefac[sfb] < pretab[sfb])
1159 break;
1160
1161 if (sfb == SBPSY_l) {
1162 cod_info->preflag = 1;
1163 for (sfb = 11; sfb < SBPSY_l; sfb++)
1164 scalefac[sfb] -= pretab[sfb];
1165 }
1166 }
1167 }
1168
1169 for (sfb = 0; sfb < cod_info->sfbdivide; sfb++)
1170 if (max_slen1 < scalefac[sfb])
1171 max_slen1 = scalefac[sfb];
1172
1173 for (; sfb < cod_info->sfbmax; sfb++)
1174 if (max_slen2 < scalefac[sfb])
1175 max_slen2 = scalefac[sfb];
1176
1177 /* from Takehiro TOMINAGA <tominaga@isoternet.org> 10/99
1178 * loop over *all* posible values of scalefac_compress to find the
1179 * one which uses the smallest number of bits. ISO would stop
1180 * at first valid index */
1181 cod_info->part2_length = LARGE_BITS;
1182 for (k = 0; k < 16; k++) {
1183 if (max_slen1 < slen1_n[k] && max_slen2 < slen2_n[k]
1184 && cod_info->part2_length > tab[k]) {
1185 cod_info->part2_length = tab[k];
1186 cod_info->scalefac_compress = k;
1187 }
1188 }
1189 return cod_info->part2_length == LARGE_BITS;
1190 }
1191
1192
1193
1194 /*
1195 table of largest scalefactor values for MPEG2
1196 */
1197 static const int max_range_sfac_tab[6][4] = {
1198 {15, 15, 7, 7},
1199 {15, 15, 7, 0},
1200 {7, 3, 0, 0},
1201 {15, 31, 31, 0},
1202 {7, 7, 7, 0},
1203 {3, 3, 0, 0}
1204 };
1205
1206
1207
1208
1209 /*************************************************************************/
1210 /* scale_bitcount_lsf */
1211 /*************************************************************************/
1212
1213 /* Also counts the number of bits to encode the scalefacs but for MPEG 2 */
1214 /* Lower sampling frequencies (24, 22.05 and 16 kHz.) */
1215
1216 /* This is reverse-engineered from section 2.4.3.2 of the MPEG2 IS, */
1217 /* "Audio Decoding Layer III" */
1218
1219 static int
mpeg2_scale_bitcount(const lame_internal_flags * gfc,gr_info * const cod_info)1220 mpeg2_scale_bitcount(const lame_internal_flags * gfc, gr_info * const cod_info)
1221 {
1222 int table_number, row_in_table, partition, nr_sfb, window, over;
1223 int i, sfb, max_sfac[4];
1224 const int *partition_table;
1225 int const *const scalefac = cod_info->scalefac;
1226
1227 /*
1228 Set partition table. Note that should try to use table one,
1229 but do not yet...
1230 */
1231 if (cod_info->preflag)
1232 table_number = 2;
1233 else
1234 table_number = 0;
1235
1236 for (i = 0; i < 4; i++)
1237 max_sfac[i] = 0;
1238
1239 if (cod_info->block_type == SHORT_TYPE) {
1240 row_in_table = 1;
1241 partition_table = &nr_of_sfb_block[table_number][row_in_table][0];
1242 for (sfb = 0, partition = 0; partition < 4; partition++) {
1243 nr_sfb = partition_table[partition] / 3;
1244 for (i = 0; i < nr_sfb; i++, sfb++)
1245 for (window = 0; window < 3; window++)
1246 if (scalefac[sfb * 3 + window] > max_sfac[partition])
1247 max_sfac[partition] = scalefac[sfb * 3 + window];
1248 }
1249 }
1250 else {
1251 row_in_table = 0;
1252 partition_table = &nr_of_sfb_block[table_number][row_in_table][0];
1253 for (sfb = 0, partition = 0; partition < 4; partition++) {
1254 nr_sfb = partition_table[partition];
1255 for (i = 0; i < nr_sfb; i++, sfb++)
1256 if (scalefac[sfb] > max_sfac[partition])
1257 max_sfac[partition] = scalefac[sfb];
1258 }
1259 }
1260
1261 for (over = 0, partition = 0; partition < 4; partition++) {
1262 if (max_sfac[partition] > max_range_sfac_tab[table_number][partition])
1263 over++;
1264 }
1265 if (!over) {
1266 /*
1267 Since no bands have been over-amplified, we can set scalefac_compress
1268 and slen[] for the formatter
1269 */
1270 static const int log2tab[] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4 };
1271
1272 int slen1, slen2, slen3, slen4;
1273
1274 cod_info->sfb_partition_table = nr_of_sfb_block[table_number][row_in_table];
1275 for (partition = 0; partition < 4; partition++)
1276 cod_info->slen[partition] = log2tab[max_sfac[partition]];
1277
1278 /* set scalefac_compress */
1279 slen1 = cod_info->slen[0];
1280 slen2 = cod_info->slen[1];
1281 slen3 = cod_info->slen[2];
1282 slen4 = cod_info->slen[3];
1283
1284 switch (table_number) {
1285 case 0:
1286 cod_info->scalefac_compress = (((slen1 * 5) + slen2) << 4)
1287 + (slen3 << 2)
1288 + slen4;
1289 break;
1290
1291 case 1:
1292 cod_info->scalefac_compress = 400 + (((slen1 * 5) + slen2) << 2)
1293 + slen3;
1294 break;
1295
1296 case 2:
1297 cod_info->scalefac_compress = 500 + (slen1 * 3) + slen2;
1298 break;
1299
1300 default:
1301 ERRORF(gfc, "intensity stereo not implemented yet\n");
1302 break;
1303 }
1304 }
1305 #ifdef DEBUG
1306 if (over)
1307 ERRORF(gfc, "---WARNING !! Amplification of some bands over limits\n");
1308 #endif
1309 if (!over) {
1310 assert(cod_info->sfb_partition_table);
1311 cod_info->part2_length = 0;
1312 for (partition = 0; partition < 4; partition++)
1313 cod_info->part2_length +=
1314 cod_info->slen[partition] * cod_info->sfb_partition_table[partition];
1315 }
1316 return over;
1317 }
1318
1319
1320 int
scale_bitcount(const lame_internal_flags * gfc,gr_info * cod_info)1321 scale_bitcount(const lame_internal_flags * gfc, gr_info * cod_info)
1322 {
1323 if (gfc->cfg.mode_gr == 2) {
1324 return mpeg1_scale_bitcount(gfc, cod_info);
1325 }
1326 else {
1327 return mpeg2_scale_bitcount(gfc, cod_info);
1328 }
1329 }
1330
1331
1332 #ifdef MMX_choose_table
1333 extern int choose_table_MMX(const int *ix, const int *const end, int *const s);
1334 #endif
1335
1336 void
huffman_init(lame_internal_flags * const gfc)1337 huffman_init(lame_internal_flags * const gfc)
1338 {
1339 int i;
1340
1341 gfc->choose_table = choose_table_nonMMX;
1342
1343 #ifdef MMX_choose_table
1344 if (gfc->CPU_features.MMX) {
1345 gfc->choose_table = choose_table_MMX;
1346 }
1347 #endif
1348
1349 for (i = 2; i <= 576; i += 2) {
1350 int scfb_anz = 0, bv_index;
1351 while (gfc->scalefac_band.l[++scfb_anz] < i);
1352
1353 bv_index = subdv_table[scfb_anz].region0_count;
1354 while (gfc->scalefac_band.l[bv_index + 1] > i)
1355 bv_index--;
1356
1357 if (bv_index < 0) {
1358 /* this is an indication that everything is going to
1359 be encoded as region0: bigvalues < region0 < region1
1360 so lets set region0, region1 to some value larger
1361 than bigvalues */
1362 bv_index = subdv_table[scfb_anz].region0_count;
1363 }
1364
1365 gfc->sv_qnt.bv_scf[i - 2] = bv_index;
1366
1367 bv_index = subdv_table[scfb_anz].region1_count;
1368 while (gfc->scalefac_band.l[bv_index + gfc->sv_qnt.bv_scf[i - 2] + 2] > i)
1369 bv_index--;
1370
1371 if (bv_index < 0) {
1372 bv_index = subdv_table[scfb_anz].region1_count;
1373 }
1374
1375 gfc->sv_qnt.bv_scf[i - 1] = bv_index;
1376 }
1377 }
1378