1 /*
2  *      lame utility library include file
3  *
4  *      Copyright (c) 1999 Albert L Faber
5  *      Copyright (c) 2008 Robert Hegemann
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Library General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Library General Public License for more details.
16  *
17  * You should have received a copy of the GNU Library General Public
18  * License along with this library; if not, write to the
19  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20  * Boston, MA 02111-1307, USA.
21  */
22 
23 #ifndef LAME_UTIL_H
24 #define LAME_UTIL_H
25 
26 #include "l3side.h"
27 #include "id3tag.h"
28 #include "lame_global_flags.h"
29 
30 #ifdef __cplusplus
31 extern  "C" {
32 #endif
33 
34 /***********************************************************************
35 *
36 *  Global Definitions
37 *
38 ***********************************************************************/
39 
40 #ifndef FALSE
41 #define         FALSE                   0
42 #endif
43 
44 #ifndef TRUE
45 #define         TRUE                    (!FALSE)
46 #endif
47 
48 #ifdef UINT_MAX
49 # define         MAX_U_32_NUM            UINT_MAX
50 #else
51 # define         MAX_U_32_NUM            0xFFFFFFFF
52 #endif
53 
54 #ifndef PI
55 # ifdef M_PI
56 #  define       PI                      M_PI
57 # else
58 #  define       PI                      3.14159265358979323846
59 # endif
60 #endif
61 
62 
63 #ifdef M_LN2
64 # define        LOG2                    M_LN2
65 #else
66 # define        LOG2                    0.69314718055994530942
67 #endif
68 
69 #ifdef M_LN10
70 # define        LOG10                   M_LN10
71 #else
72 # define        LOG10                   2.30258509299404568402
73 #endif
74 
75 
76 #ifdef M_SQRT2
77 # define        SQRT2                   M_SQRT2
78 #else
79 # define        SQRT2                   1.41421356237309504880
80 #endif
81 
82 
83 #define         CRC16_POLYNOMIAL        0x8005
84 
85 #define MAX_BITS_PER_CHANNEL 4095
86 #define MAX_BITS_PER_GRANULE 7680
87 
88 /* "bit_stream.h" Definitions */
89 #define         BUFFER_SIZE     LAME_MAXMP3BUFFER
90 
91 #define         Min(A, B)       ((A) < (B) ? (A) : (B))
92 #define         Max(A, B)       ((A) > (B) ? (A) : (B))
93 
94 /* log/log10 approximations */
95 #ifdef USE_FAST_LOG
96 #define         FAST_LOG10(x)       (fast_log2(x)*(LOG2/LOG10))
97 #define         FAST_LOG(x)         (fast_log2(x)*LOG2)
98 #define         FAST_LOG10_X(x,y)   (fast_log2(x)*(LOG2/LOG10*(y)))
99 #define         FAST_LOG_X(x,y)     (fast_log2(x)*(LOG2*(y)))
100 #else
101 #define         FAST_LOG10(x)       log10(x)
102 #define         FAST_LOG(x)         log(x)
103 #define         FAST_LOG10_X(x,y)   (log10(x)*(y))
104 #define         FAST_LOG_X(x,y)     (log(x)*(y))
105 #endif
106 
107 
108     struct replaygain_data;
109 #ifndef replaygain_data_defined
110 #define replaygain_data_defined
111     typedef struct replaygain_data replaygain_t;
112 #endif
113     struct plotting_data;
114 #ifndef plotting_data_defined
115 #define plotting_data_defined
116     typedef struct plotting_data plotting_data;
117 #endif
118 
119 /***********************************************************************
120 *
121 *  Global Type Definitions
122 *
123 ***********************************************************************/
124 
125     typedef struct {
126         void   *aligned;     /* pointer to ie. 128 bit aligned memory */
127         void   *pointer;     /* to use with malloc/free */
128     } aligned_pointer_t;
129 
130     void    calloc_aligned(aligned_pointer_t * ptr, unsigned int size, unsigned int bytes);
131     void    free_aligned(aligned_pointer_t * ptr);
132 
133 
134     /* "bit_stream.h" Type Definitions */
135 
136     typedef struct bit_stream_struc {
137         unsigned char *buf;  /* bit stream buffer */
138         int     buf_size;    /* size of buffer (in number of bytes) */
139         int     totbit;      /* bit counter of bit stream */
140         int     buf_byte_idx; /* pointer to top byte in buffer */
141         int     buf_bit_idx; /* pointer to top bit of top byte in buffer */
142 
143         /* format of file in rd mode (BINARY/ASCII) */
144     } Bit_stream_struc;
145 
146 
147 
148     typedef struct {
149         int     sum;         /* what we have seen so far */
150         int     seen;        /* how many frames we have seen in this chunk */
151         int     want;        /* how many frames we want to collect into one chunk */
152         int     pos;         /* actual position in our bag */
153         int     size;        /* size of our bag */
154         int    *bag;         /* pointer to our bag */
155         unsigned int nVbrNumFrames;
156         unsigned long nBytesWritten;
157         /* VBR tag data */
158         unsigned int TotalFrameSize;
159     } VBR_seek_info_t;
160 
161 
162     /**
163      *  ATH related stuff, if something new ATH related has to be added,
164      *  please plugg it here into the ATH_t struct
165      */
166     typedef struct {
167         int     use_adjust;  /* method for the auto adjustment  */
168         FLOAT   aa_sensitivity_p; /* factor for tuning the (sample power)
169                                      point below which adaptive threshold
170                                      of hearing adjustment occurs */
171         FLOAT   adjust_factor; /* lowering based on peak volume, 1 = no lowering */
172         FLOAT   adjust_limit; /* limit for dynamic ATH adjust */
173         FLOAT   decay;       /* determined to lower x dB each second */
174         FLOAT   floor;       /* lowest ATH value */
175         FLOAT   l[SBMAX_l];  /* ATH for sfbs in long blocks */
176         FLOAT   s[SBMAX_s];  /* ATH for sfbs in short blocks */
177         FLOAT   psfb21[PSFB21]; /* ATH for partitionned sfb21 in long blocks */
178         FLOAT   psfb12[PSFB12]; /* ATH for partitionned sfb12 in short blocks */
179         FLOAT   cb_l[CBANDS]; /* ATH for long block convolution bands */
180         FLOAT   cb_s[CBANDS]; /* ATH for short block convolution bands */
181         FLOAT   eql_w[BLKSIZE / 2]; /* equal loudness weights (based on ATH) */
182     } ATH_t;
183 
184     /**
185      *  PSY Model related stuff
186      */
187 
188     typedef struct {
189         FLOAT   masking_lower[CBANDS];
190         FLOAT   minval[CBANDS];
191         FLOAT   rnumlines[CBANDS];
192         FLOAT   mld_cb[CBANDS];
193         FLOAT   mld[Max(SBMAX_l,SBMAX_s)];
194         FLOAT   bo_weight[Max(SBMAX_l,SBMAX_s)]; /* band weight long scalefactor bands, at transition */
195         FLOAT   attack_threshold; /* short block tuning */
196         int     s3ind[CBANDS][2];
197         int     numlines[CBANDS];
198         int     bm[Max(SBMAX_l,SBMAX_s)];
199         int     bo[Max(SBMAX_l,SBMAX_s)];
200         int     npart;
201         int     n_sb; /* SBMAX_l or SBMAX_s */
202         FLOAT  *s3;
203     } PsyConst_CB2SB_t;
204 
205 
206     /**
207      *  global data constants
208      */
209     typedef struct {
210         FLOAT window[BLKSIZE], window_s[BLKSIZE_s / 2];
211         PsyConst_CB2SB_t l;
212         PsyConst_CB2SB_t s;
213         PsyConst_CB2SB_t l_to_s;
214         FLOAT   attack_threshold[4];
215         FLOAT   decay;
216         int     force_short_block_calc;
217     } PsyConst_t;
218 
219 
220     typedef struct {
221 
222         FLOAT   nb_l1[4][CBANDS], nb_l2[4][CBANDS];
223         FLOAT   nb_s1[4][CBANDS], nb_s2[4][CBANDS];
224 
225         III_psy_xmin thm[4];
226         III_psy_xmin en[4];
227 
228         /* loudness calculation (for adaptive threshold of hearing) */
229         FLOAT   loudness_sq_save[2]; /* account for granule delay of L3psycho_anal */
230 
231         FLOAT   tot_ener[4];
232 
233         FLOAT   last_en_subshort[4][9];
234         int     last_attacks[4];
235 
236         int     blocktype_old[2];
237     } PsyStateVar_t;
238 
239 
240     typedef struct {
241         /* loudness calculation (for adaptive threshold of hearing) */
242         FLOAT   loudness_sq[2][2]; /* loudness^2 approx. per granule and channel */
243     } PsyResult_t;
244 
245 
246     /* variables used by encoder.c */
247     typedef struct {
248         /* variables for newmdct.c */
249         FLOAT   sb_sample[2][2][18][SBLIMIT];
250         FLOAT   amp_filter[32];
251 
252         /* variables used by util.c */
253         /* BPC = maximum number of filter convolution windows to precompute */
254 #define BPC 320
255         double  itime[2]; /* float precision seems to be not enough */
256         sample_t *inbuf_old[2];
257         sample_t *blackfilt[2 * BPC + 1];
258 
259         FLOAT   pefirbuf[19];
260 
261         /* used for padding */
262         int     frac_SpF;
263         int     slot_lag;
264 
265         /* variables for bitstream.c */
266         /* mpeg1: buffer=511 bytes  smallest frame: 96-38(sideinfo)=58
267          * max number of frames in reservoir:  8
268          * mpeg2: buffer=255 bytes.  smallest frame: 24-23bytes=1
269          * with VBR, if you are encoding all silence, it is possible to
270          * have 8kbs/24khz frames with 1byte of data each, which means we need
271          * to buffer up to 255 headers! */
272         /* also, max_header_buf has to be a power of two */
273 #define MAX_HEADER_BUF 256
274 #define MAX_HEADER_LEN 40    /* max size of header is 38 */
275         struct {
276             int     write_timing;
277             int     ptr;
278             char    buf[MAX_HEADER_LEN];
279         } header[MAX_HEADER_BUF];
280 
281         int     h_ptr;
282         int     w_ptr;
283         int     ancillary_flag;
284 
285         /* variables for reservoir.c */
286         int     ResvSize;    /* in bits */
287         int     ResvMax;     /* in bits */
288 
289         int     in_buffer_nsamples;
290         sample_t *in_buffer_0;
291         sample_t *in_buffer_1;
292 
293 #ifndef  MFSIZE
294 # define MFSIZE  ( 3*1152 + ENCDELAY - MDCTDELAY )
295 #endif
296         sample_t mfbuf[2][MFSIZE];
297 
298         int     mf_samples_to_encode;
299         int     mf_size;
300 
301     } EncStateVar_t;
302 
303 
304     typedef struct {
305         /* simple statistics */
306         int     bitrate_channelmode_hist[16][4 + 1];
307         int     bitrate_blocktype_hist[16][4 + 1 + 1]; /*norm/start/short/stop/mixed(short)/sum */
308 
309         int     bitrate_index;
310         int     frame_number; /* number of frames encoded             */
311         int     padding;     /* padding for the current frame? */
312         int     mode_ext;
313         int     encoder_delay;
314         int     encoder_padding; /* number of samples of padding appended to input */
315     } EncResult_t;
316 
317 
318     /* variables used by quantize.c */
319     typedef struct {
320         /* variables for nspsytune */
321         FLOAT   longfact[SBMAX_l];
322         FLOAT   shortfact[SBMAX_s];
323         FLOAT   masking_lower;
324         FLOAT   mask_adjust; /* the dbQ stuff */
325         FLOAT   mask_adjust_short; /* the dbQ stuff */
326         int     OldValue[2];
327         int     CurrentStep[2];
328         int     pseudohalf[SFBMAX];
329         int     sfb21_extra; /* will be set in lame_init_params */
330         int     substep_shaping; /* 0 = no substep
331                                     1 = use substep shaping at last step(VBR only)
332                                     (not implemented yet)
333                                     2 = use substep inside loop
334                                     3 = use substep inside loop and last step
335                                   */
336 
337 
338         char    bv_scf[576];
339     } QntStateVar_t;
340 
341 
342     typedef struct {
343         replaygain_t *rgdata;
344         /* ReplayGain */
345     } RpgStateVar_t;
346 
347 
348     typedef struct {
349         FLOAT   noclipScale; /* user-specified scale factor required for preventing clipping */
350         sample_t PeakSample;
351         int     RadioGain;
352         int     noclipGainChange; /* gain change required for preventing clipping */
353     } RpgResult_t;
354 
355 
356     typedef struct {
357         int     version;     /* 0=MPEG-2/2.5  1=MPEG-1               */
358         int     samplerate_index;
359         int     sideinfo_len;
360 
361         int     noise_shaping; /* 0 = none
362                                   1 = ISO AAC model
363                                   2 = allow scalefac_select=1
364                                 */
365 
366         int     subblock_gain; /*  0 = no, 1 = yes */
367         int     use_best_huffman; /* 0 = no.  1=outside loop  2=inside loop(slow) */
368         int     noise_shaping_amp; /*  0 = ISO model: amplify all distorted bands
369                                       1 = amplify within 50% of max (on db scale)
370                                       2 = amplify only most distorted band
371                                       3 = method 1 and refine with method 2
372                                     */
373 
374         int     noise_shaping_stop; /* 0 = stop at over=0, all scalefacs amplified or
375                                        a scalefac has reached max value
376                                        1 = stop when all scalefacs amplified or
377                                        a scalefac has reached max value
378                                        2 = stop when all scalefacs amplified
379                                      */
380 
381 
382         int     full_outer_loop; /* 0 = stop early after 0 distortion found. 1 = full search */
383 
384         int     lowpassfreq;
385         int     highpassfreq;
386         int     samplerate_in; /* input_samp_rate in Hz. default=44.1 kHz     */
387         int     samplerate_out; /* output_samp_rate. */
388         int     channels_in; /* number of channels in the input data stream (PCM or decoded PCM) */
389         int     channels_out; /* number of channels in the output data stream (not used for decoding) */
390         int     mode_gr;     /* granules per frame */
391         int     force_ms;    /* force M/S mode.  requires mode=1            */
392 
393         int     quant_comp;
394         int     quant_comp_short;
395 
396         int     use_temporal_masking_effect;
397         int     use_safe_joint_stereo;
398 
399         int     preset;
400 
401         vbr_mode vbr;
402         int     vbr_avg_bitrate_kbps;
403         int     vbr_min_bitrate_index; /* min bitrate index */
404         int     vbr_max_bitrate_index; /* max bitrate index */
405         int     avg_bitrate;
406         int     enforce_min_bitrate; /* strictly enforce VBR_min_bitrate normaly, it will be violated for analog silence */
407 
408         int     findReplayGain; /* find the RG value? default=0       */
409         int     findPeakSample;
410         int     decode_on_the_fly; /* decode on the fly? default=0                */
411         int     analysis;
412         int     disable_reservoir;
413         int     buffer_constraint;  /* enforce ISO spec as much as possible   */
414         int     free_format;
415         int     write_lame_tag; /* add Xing VBR tag?                           */
416 
417         int     error_protection; /* use 2 bytes per frame for a CRC checksum. default=0 */
418         int     copyright;   /* mark as copyright. default=0           */
419         int     original;    /* mark as original. default=1            */
420         int     extension;   /* the MP3 'private extension' bit. Meaningless */
421         int     emphasis;    /* Input PCM is emphased PCM (for
422                                 instance from one of the rarely
423                                 emphased CDs), it is STRONGLY not
424                                 recommended to use this, because
425                                 psycho does not take it into account,
426                                 and last but not least many decoders
427                                 don't care about these bits          */
428 
429 
430         MPEG_mode mode;
431         short_block_t short_blocks;
432 
433         float   interChRatio;
434         float   msfix;       /* Naoki's adjustment of Mid/Side maskings */
435         float   ATH_offset_db;/* add to ATH this many db            */
436         float   ATH_offset_factor;/* change ATH by this factor, derived from ATH_offset_db */
437         float   ATHcurve;    /* change ATH formula 4 shape           */
438         int     ATHtype;
439         int     ATHonly;     /* only use ATH                         */
440         int     ATHshort;    /* only use ATH for short blocks        */
441         int     noATH;       /* disable ATH                          */
442 
443         float   ATHfixpoint;
444 
445         float   adjust_alto_db;
446         float   adjust_bass_db;
447         float   adjust_treble_db;
448         float   adjust_sfb21_db;
449 
450         float   compression_ratio; /* sizeof(wav file)/sizeof(mp3 file)          */
451 
452         /* lowpass and highpass filter control */
453         FLOAT   lowpass1, lowpass2; /* normalized frequency bounds of passband */
454         FLOAT   highpass1, highpass2; /* normalized frequency bounds of passband */
455 
456         /* scale input by this amount before encoding at least not used for MP3 decoding */
457         FLOAT   pcm_transform[2][2];
458 
459         FLOAT   minval;
460     } SessionConfig_t;
461 
462 
463     struct lame_internal_flags {
464 
465   /********************************************************************
466    * internal variables NOT set by calling program, and should not be *
467    * modified by the calling program                                  *
468    ********************************************************************/
469 
470         /*
471          * Some remarks to the Class_ID field:
472          * The Class ID is an Identifier for a pointer to this struct.
473          * It is very unlikely that a pointer to lame_global_flags has the same 32 bits
474          * in it's structure (large and other special properties, for instance prime).
475          *
476          * To test that the structure is right and initialized, use:
477          *     if ( gfc -> Class_ID == LAME_ID ) ...
478          * Other remark:
479          *     If you set a flag to 0 for uninit data and 1 for init data, the right test
480          *     should be "if (flag == 1)" and NOT "if (flag)". Unintended modification
481          *     of this element will be otherwise misinterpreted as an init.
482          */
483 #  define  LAME_ID   0xFFF88E3B
484         unsigned long class_id;
485 
486         int     lame_init_params_successful;
487         int     lame_encode_frame_init;
488         int     iteration_init_init;
489         int     fill_buffer_resample_init;
490 
491         SessionConfig_t cfg;
492 
493         /* variables used by lame.c */
494         Bit_stream_struc bs;
495         III_side_info_t l3_side;
496 
497         scalefac_struct scalefac_band;
498 
499         PsyStateVar_t sv_psy; /* DATA FROM PSYMODEL.C */
500         PsyResult_t ov_psy;
501         EncStateVar_t sv_enc; /* DATA FROM ENCODER.C */
502         EncResult_t ov_enc;
503         QntStateVar_t sv_qnt; /* DATA FROM QUANTIZE.C */
504 
505         RpgStateVar_t sv_rpg;
506         RpgResult_t ov_rpg;
507 
508         /* optional ID3 tags, used in id3tag.c  */
509         struct id3tag_spec tag_spec;
510         uint16_t nMusicCRC;
511 
512         uint16_t _unused;
513 
514         /* CPU features */
515         struct {
516             unsigned int MMX:1; /* Pentium MMX, Pentium II...IV, K6, K6-2,
517                                    K6-III, Athlon */
518             unsigned int AMD_3DNow:1; /* K6-2, K6-III, Athlon      */
519             unsigned int SSE:1; /* Pentium III, Pentium 4    */
520             unsigned int SSE2:1; /* Pentium 4, K8             */
521             unsigned int _unused:28;
522         } CPU_features;
523 
524 
525         VBR_seek_info_t VBR_seek_table; /* used for Xing VBR header */
526 
527         ATH_t  *ATH;         /* all ATH related stuff */
528 
529         PsyConst_t *cd_psy;
530 
531         /* used by the frame analyzer */
532         plotting_data *pinfo;
533         hip_t hip;
534 
535         /* functions to replace with CPU feature optimized versions in takehiro.c */
536         int     (*choose_table) (const int *ix, const int *const end, int *const s);
537         void    (*fft_fht) (FLOAT *, int);
538         void    (*init_xrpow_core) (gr_info * const cod_info, FLOAT xrpow[576], int upper,
539                                     FLOAT * sum);
540 
541         lame_report_function report_msg;
542         lame_report_function report_dbg;
543         lame_report_function report_err;
544     };
545 
546 #ifndef lame_internal_flags_defined
547 #define lame_internal_flags_defined
548     typedef struct lame_internal_flags lame_internal_flags;
549 #endif
550 
551 
552 /***********************************************************************
553 *
554 *  Global Function Prototype Declarations
555 *
556 ***********************************************************************/
557     void    freegfc(lame_internal_flags * const gfc);
558     void    free_id3tag(lame_internal_flags * const gfc);
559     extern int BitrateIndex(int, int, int);
560     extern int FindNearestBitrate(int, int, int);
561     extern int map2MP3Frequency(int freq);
562     extern int SmpFrqIndex(int, int *const);
563     extern int nearestBitrateFullIndex(uint16_t brate);
564     extern FLOAT ATHformula(SessionConfig_t const *cfg, FLOAT freq);
565     extern FLOAT freq2bark(FLOAT freq);
566     void    disable_FPE(void);
567 
568 /* log/log10 approximations */
569     extern void init_log_table(void);
570     extern ieee754_float32_t fast_log2(ieee754_float32_t x);
571 
572     int     isResamplingNecessary(SessionConfig_t const* cfg);
573 
574     void    fill_buffer(lame_internal_flags * gfc,
575                         sample_t *const mfbuf[2],
576                         sample_t const *const in_buffer[2], int nsamples, int *n_in, int *n_out);
577 
578 /* same as lame_decode1 (look in lame.h), but returns
579    unclipped raw floating-point samples. It is declared
580    here, not in lame.h, because it returns LAME's
581    internal type sample_t. No more than 1152 samples
582    per channel are allowed. */
583     int     hip_decode1_unclipped(hip_t hip, unsigned char *mp3buf,
584                                    size_t len, sample_t pcm_l[], sample_t pcm_r[]);
585 
586 
587     extern int has_MMX(void);
588     extern int has_3DNow(void);
589     extern int has_SSE(void);
590     extern int has_SSE2(void);
591 
592 
593 
594 /***********************************************************************
595 *
596 *  Macros about Message Printing and Exit
597 *
598 ***********************************************************************/
599 
600     extern void lame_report_def(const char* format, va_list args);
601     extern void lame_report_fnc(lame_report_function print_f, const char *, ...);
602     extern void lame_errorf(const lame_internal_flags * gfc, const char *, ...);
603     extern void lame_debugf(const lame_internal_flags * gfc, const char *, ...);
604     extern void lame_msgf(const lame_internal_flags * gfc, const char *, ...);
605 #define DEBUGF  lame_debugf
606 #define ERRORF  lame_errorf
607 #define MSGF    lame_msgf
608 
609     int     is_lame_internal_flags_valid(const lame_internal_flags * gfp);
610 
611     extern void hip_set_pinfo(hip_t hip, plotting_data* pinfo);
612 
613 #ifdef __cplusplus
614 }
615 #endif
616 #endif                       /* LAME_UTIL_H */
617