1 #ifndef MINIMP3_H
2 #define MINIMP3_H
3 /*
4     https://github.com/lieff/minimp3
5     To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.
6     This software is distributed without any warranty.
7     See <http://creativecommons.org/publicdomain/zero/1.0/>.
8 */
9 #include <stdint.h>
10 
11 #define MINIMP3_MAX_SAMPLES_PER_FRAME (1152*2)
12 
13 typedef struct
14 {
15     int frame_bytes, frame_offset, channels, hz, layer, bitrate_kbps;
16 } mp3dec_frame_info_t;
17 
18 typedef struct
19 {
20     float mdct_overlap[2][9*32], qmf_state[15*2*32];
21     int reserv, free_format_bytes;
22     unsigned char header[4], reserv_buf[511];
23 } mp3dec_t;
24 
25 #ifdef __cplusplus
26 extern "C" {
27 #endif /* __cplusplus */
28 
29 void mp3dec_init(mp3dec_t *dec);
30 #ifndef MINIMP3_FLOAT_OUTPUT
31 typedef int16_t mp3d_sample_t;
32 #else /* MINIMP3_FLOAT_OUTPUT */
33 typedef float mp3d_sample_t;
34 void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples);
35 #endif /* MINIMP3_FLOAT_OUTPUT */
36 int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info);
37 
38 #ifdef __cplusplus
39 }
40 #endif /* __cplusplus */
41 
42 #endif /* MINIMP3_H */
43 #if defined(MINIMP3_IMPLEMENTATION) && !defined(_MINIMP3_IMPLEMENTATION_GUARD)
44 #define _MINIMP3_IMPLEMENTATION_GUARD
45 
46 #include <stdlib.h>
47 #include <string.h>
48 
49 #define MAX_FREE_FORMAT_FRAME_SIZE  2304    /* more than ISO spec's */
50 #ifndef MAX_FRAME_SYNC_MATCHES
51 #define MAX_FRAME_SYNC_MATCHES      10
52 #endif /* MAX_FRAME_SYNC_MATCHES */
53 
54 #define MAX_L3_FRAME_PAYLOAD_BYTES  MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */
55 
56 #define MAX_BITRESERVOIR_BYTES      511
57 #define SHORT_BLOCK_TYPE            2
58 #define STOP_BLOCK_TYPE             3
59 #define MODE_MONO                   3
60 #define MODE_JOINT_STEREO           1
61 #define HDR_SIZE                    4
62 #define HDR_IS_MONO(h)              (((h[3]) & 0xC0) == 0xC0)
63 #define HDR_IS_MS_STEREO(h)         (((h[3]) & 0xE0) == 0x60)
64 #define HDR_IS_FREE_FORMAT(h)       (((h[2]) & 0xF0) == 0)
65 #define HDR_IS_CRC(h)               (!((h[1]) & 1))
66 #define HDR_TEST_PADDING(h)         ((h[2]) & 0x2)
67 #define HDR_TEST_MPEG1(h)           ((h[1]) & 0x8)
68 #define HDR_TEST_NOT_MPEG25(h)      ((h[1]) & 0x10)
69 #define HDR_TEST_I_STEREO(h)        ((h[3]) & 0x10)
70 #define HDR_TEST_MS_STEREO(h)       ((h[3]) & 0x20)
71 #define HDR_GET_STEREO_MODE(h)      (((h[3]) >> 6) & 3)
72 #define HDR_GET_STEREO_MODE_EXT(h)  (((h[3]) >> 4) & 3)
73 #define HDR_GET_LAYER(h)            (((h[1]) >> 1) & 3)
74 #define HDR_GET_BITRATE(h)          ((h[2]) >> 4)
75 #define HDR_GET_SAMPLE_RATE(h)      (((h[2]) >> 2) & 3)
76 #define HDR_GET_MY_SAMPLE_RATE(h)   (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3)
77 #define HDR_IS_FRAME_576(h)         ((h[1] & 14) == 2)
78 #define HDR_IS_LAYER_1(h)           ((h[1] & 6) == 6)
79 
80 #define BITS_DEQUANTIZER_OUT        -1
81 #define MAX_SCF                     (255 + BITS_DEQUANTIZER_OUT*4 - 210)
82 #define MAX_SCFI                    ((MAX_SCF + 3) & ~3)
83 
84 #define MINIMP3_MIN(a, b)           ((a) > (b) ? (b) : (a))
85 #define MINIMP3_MAX(a, b)           ((a) < (b) ? (b) : (a))
86 
87 #if !defined(MINIMP3_NO_SIMD)
88 
89 #if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(__x86_64__) || defined(__aarch64__) || defined(_M_ARM64))
90 /* x64 always have SSE2, arm64 always have neon, no need for generic code */
91 #define MINIMP3_ONLY_SIMD
92 #endif /* SIMD checks... */
93 
94 #if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
95 #if defined(_MSC_VER)
96 #include <intrin.h>
97 #endif /* defined(_MSC_VER) */
98 #include <immintrin.h>
99 #define HAVE_SSE 1
100 #define HAVE_SIMD 1
101 #define VSTORE _mm_storeu_ps
102 #define VLD _mm_loadu_ps
103 #define VSET _mm_set1_ps
104 #define VADD _mm_add_ps
105 #define VSUB _mm_sub_ps
106 #define VMUL _mm_mul_ps
107 #define VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y))
108 #define VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y))
109 #define VMUL_S(x, s)  _mm_mul_ps(x, _mm_set1_ps(s))
110 #define VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3))
111 typedef __m128 f4;
112 #if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD)
113 #define minimp3_cpuid __cpuid
114 #else /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
minimp3_cpuid(int CPUInfo[],const int InfoType)115 static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType)
116 {
117 #if defined(__PIC__)
118     __asm__ __volatile__(
119 #if defined(__x86_64__)
120         "push %%rbx\n"
121         "cpuid\n"
122         "xchgl %%ebx, %1\n"
123         "pop  %%rbx\n"
124 #else /* defined(__x86_64__) */
125         "xchgl %%ebx, %1\n"
126         "cpuid\n"
127         "xchgl %%ebx, %1\n"
128 #endif /* defined(__x86_64__) */
129         : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
130         : "a" (InfoType));
131 #else /* defined(__PIC__) */
132     __asm__ __volatile__(
133         "cpuid"
134         : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
135         : "a" (InfoType));
136 #endif /* defined(__PIC__)*/
137 }
138 #endif /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
have_simd(void)139 static int have_simd(void)
140 {
141 #ifdef MINIMP3_ONLY_SIMD
142     return 1;
143 #else /* MINIMP3_ONLY_SIMD */
144     static int g_have_simd;
145     int CPUInfo[4];
146 #ifdef MINIMP3_TEST
147     static int g_counter;
148     if (g_counter++ > 100)
149         return 0;
150 #endif /* MINIMP3_TEST */
151     if (g_have_simd)
152         goto end;
153     minimp3_cpuid(CPUInfo, 0);
154     g_have_simd = 1;
155     if (CPUInfo[0] > 0)
156     {
157         minimp3_cpuid(CPUInfo, 1);
158         g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */
159     }
160 end:
161     return g_have_simd - 1;
162 #endif /* MINIMP3_ONLY_SIMD */
163 }
164 #elif defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)
165 #include <arm_neon.h>
166 #define HAVE_SSE 0
167 #define HAVE_SIMD 1
168 #define VSTORE vst1q_f32
169 #define VLD vld1q_f32
170 #define VSET vmovq_n_f32
171 #define VADD vaddq_f32
172 #define VSUB vsubq_f32
173 #define VMUL vmulq_f32
174 #define VMAC(a, x, y) vmlaq_f32(a, x, y)
175 #define VMSB(a, x, y) vmlsq_f32(a, x, y)
176 #define VMUL_S(x, s)  vmulq_f32(x, vmovq_n_f32(s))
177 #define VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x)))
178 typedef float32x4_t f4;
have_simd()179 static int have_simd()
180 {   /* TODO: detect neon for !MINIMP3_ONLY_SIMD */
181     return 1;
182 }
183 #else /* SIMD checks... */
184 #define HAVE_SSE 0
185 #define HAVE_SIMD 0
186 #ifdef MINIMP3_ONLY_SIMD
187 #error MINIMP3_ONLY_SIMD used, but SSE/NEON not enabled
188 #endif /* MINIMP3_ONLY_SIMD */
189 #endif /* SIMD checks... */
190 #else /* !defined(MINIMP3_NO_SIMD) */
191 #define HAVE_SIMD 0
192 #endif /* !defined(MINIMP3_NO_SIMD) */
193 
194 #if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__) && !defined(_M_ARM64)
195 #define HAVE_ARMV6 1
minimp3_clip_int16_arm(int32_t a)196 static __inline__ __attribute__((always_inline)) int32_t minimp3_clip_int16_arm(int32_t a)
197 {
198     int32_t x = 0;
199     __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
200     return x;
201 }
202 #else
203 #define HAVE_ARMV6 0
204 #endif
205 
206 typedef struct
207 {
208     const uint8_t *buf;
209     int pos, limit;
210 } bs_t;
211 
212 typedef struct
213 {
214     float scf[3*64];
215     uint8_t total_bands, stereo_bands, bitalloc[64], scfcod[64];
216 } L12_scale_info;
217 
218 typedef struct
219 {
220     uint8_t tab_offset, code_tab_width, band_count;
221 } L12_subband_alloc_t;
222 
223 typedef struct
224 {
225     const uint8_t *sfbtab;
226     uint16_t part_23_length, big_values, scalefac_compress;
227     uint8_t global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb;
228     uint8_t table_select[3], region_count[3], subblock_gain[3];
229     uint8_t preflag, scalefac_scale, count1_table, scfsi;
230 } L3_gr_info_t;
231 
232 typedef struct
233 {
234     bs_t bs;
235     uint8_t maindata[MAX_BITRESERVOIR_BYTES + MAX_L3_FRAME_PAYLOAD_BYTES];
236     L3_gr_info_t gr_info[4];
237     float grbuf[2][576], scf[40], syn[18 + 15][2*32];
238     uint8_t ist_pos[2][39];
239 } mp3dec_scratch_t;
240 
bs_init(bs_t * bs,const uint8_t * data,int bytes)241 static void bs_init(bs_t *bs, const uint8_t *data, int bytes)
242 {
243     bs->buf   = data;
244     bs->pos   = 0;
245     bs->limit = bytes*8;
246 }
247 
get_bits(bs_t * bs,int n)248 static uint32_t get_bits(bs_t *bs, int n)
249 {
250     uint32_t next, cache = 0, s = bs->pos & 7;
251     int shl = n + s;
252     const uint8_t *p = bs->buf + (bs->pos >> 3);
253     if ((bs->pos += n) > bs->limit)
254         return 0;
255     next = *p++ & (255 >> s);
256     while ((shl -= 8) > 0)
257     {
258         cache |= next << shl;
259         next = *p++;
260     }
261     return cache | (next >> -shl);
262 }
263 
hdr_valid(const uint8_t * h)264 static int hdr_valid(const uint8_t *h)
265 {
266     return h[0] == 0xff &&
267         ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) &&
268         (HDR_GET_LAYER(h) != 0) &&
269         (HDR_GET_BITRATE(h) != 15) &&
270         (HDR_GET_SAMPLE_RATE(h) != 3);
271 }
272 
hdr_compare(const uint8_t * h1,const uint8_t * h2)273 static int hdr_compare(const uint8_t *h1, const uint8_t *h2)
274 {
275     return hdr_valid(h2) &&
276         ((h1[1] ^ h2[1]) & 0xFE) == 0 &&
277         ((h1[2] ^ h2[2]) & 0x0C) == 0 &&
278         !(HDR_IS_FREE_FORMAT(h1) ^ HDR_IS_FREE_FORMAT(h2));
279 }
280 
hdr_bitrate_kbps(const uint8_t * h)281 static unsigned hdr_bitrate_kbps(const uint8_t *h)
282 {
283     static const uint8_t halfrate[2][3][15] = {
284         { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } },
285         { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } },
286     };
287     return 2*halfrate[!!HDR_TEST_MPEG1(h)][HDR_GET_LAYER(h) - 1][HDR_GET_BITRATE(h)];
288 }
289 
hdr_sample_rate_hz(const uint8_t * h)290 static unsigned hdr_sample_rate_hz(const uint8_t *h)
291 {
292     static const unsigned g_hz[3] = { 44100, 48000, 32000 };
293     return g_hz[HDR_GET_SAMPLE_RATE(h)] >> (int)!HDR_TEST_MPEG1(h) >> (int)!HDR_TEST_NOT_MPEG25(h);
294 }
295 
hdr_frame_samples(const uint8_t * h)296 static unsigned hdr_frame_samples(const uint8_t *h)
297 {
298     return HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)HDR_IS_FRAME_576(h));
299 }
300 
hdr_frame_bytes(const uint8_t * h,int free_format_size)301 static int hdr_frame_bytes(const uint8_t *h, int free_format_size)
302 {
303     int frame_bytes = hdr_frame_samples(h)*hdr_bitrate_kbps(h)*125/hdr_sample_rate_hz(h);
304     if (HDR_IS_LAYER_1(h))
305     {
306         frame_bytes &= ~3; /* slot align */
307     }
308     return frame_bytes ? frame_bytes : free_format_size;
309 }
310 
hdr_padding(const uint8_t * h)311 static int hdr_padding(const uint8_t *h)
312 {
313     return HDR_TEST_PADDING(h) ? (HDR_IS_LAYER_1(h) ? 4 : 1) : 0;
314 }
315 
316 #ifndef MINIMP3_ONLY_MP3
L12_subband_alloc_table(const uint8_t * hdr,L12_scale_info * sci)317 static const L12_subband_alloc_t *L12_subband_alloc_table(const uint8_t *hdr, L12_scale_info *sci)
318 {
319     const L12_subband_alloc_t *alloc;
320     int mode = HDR_GET_STEREO_MODE(hdr);
321     int nbands, stereo_bands = (mode == MODE_MONO) ? 0 : (mode == MODE_JOINT_STEREO) ? (HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32;
322 
323     if (HDR_IS_LAYER_1(hdr))
324     {
325         static const L12_subband_alloc_t g_alloc_L1[] = { { 76, 4, 32 } };
326         alloc = g_alloc_L1;
327         nbands = 32;
328     } else if (!HDR_TEST_MPEG1(hdr))
329     {
330         static const L12_subband_alloc_t g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } };
331         alloc = g_alloc_L2M2;
332         nbands = 30;
333     } else
334     {
335         static const L12_subband_alloc_t g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } };
336         int sample_rate_idx = HDR_GET_SAMPLE_RATE(hdr);
337         unsigned kbps = hdr_bitrate_kbps(hdr) >> (int)(mode != MODE_MONO);
338         if (!kbps) /* free-format */
339         {
340             kbps = 192;
341         }
342 
343         alloc = g_alloc_L2M1;
344         nbands = 27;
345         if (kbps < 56)
346         {
347             static const L12_subband_alloc_t g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } };
348             alloc = g_alloc_L2M1_lowrate;
349             nbands = sample_rate_idx == 2 ? 12 : 8;
350         } else if (kbps >= 96 && sample_rate_idx != 1)
351         {
352             nbands = 30;
353         }
354     }
355 
356     sci->total_bands = (uint8_t)nbands;
357     sci->stereo_bands = (uint8_t)MINIMP3_MIN(stereo_bands, nbands);
358 
359     return alloc;
360 }
361 
L12_read_scalefactors(bs_t * bs,uint8_t * pba,uint8_t * scfcod,int bands,float * scf)362 static void L12_read_scalefactors(bs_t *bs, uint8_t *pba, uint8_t *scfcod, int bands, float *scf)
363 {
364     static const float g_deq_L12[18*3] = {
365 #define DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x
366         DQ(3),DQ(7),DQ(15),DQ(31),DQ(63),DQ(127),DQ(255),DQ(511),DQ(1023),DQ(2047),DQ(4095),DQ(8191),DQ(16383),DQ(32767),DQ(65535),DQ(3),DQ(5),DQ(9)
367     };
368     int i, m;
369     for (i = 0; i < bands; i++)
370     {
371         float s = 0;
372         int ba = *pba++;
373         int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0;
374         for (m = 4; m; m >>= 1)
375         {
376             if (mask & m)
377             {
378                 int b = get_bits(bs, 6);
379                 s = g_deq_L12[ba*3 - 6 + b % 3]*(1 << 21 >> b/3);
380             }
381             *scf++ = s;
382         }
383     }
384 }
385 
L12_read_scale_info(const uint8_t * hdr,bs_t * bs,L12_scale_info * sci)386 static void L12_read_scale_info(const uint8_t *hdr, bs_t *bs, L12_scale_info *sci)
387 {
388     static const uint8_t g_bitalloc_code_tab[] = {
389         0,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16,
390         0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16,
391         0,17,18, 3,19,4,5,16,
392         0,17,18,16,
393         0,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15,
394         0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14,
395         0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16
396     };
397     const L12_subband_alloc_t *subband_alloc = L12_subband_alloc_table(hdr, sci);
398 
399     int i, k = 0, ba_bits = 0;
400     const uint8_t *ba_code_tab = g_bitalloc_code_tab;
401 
402     for (i = 0; i < sci->total_bands; i++)
403     {
404         uint8_t ba;
405         if (i == k)
406         {
407             k += subband_alloc->band_count;
408             ba_bits = subband_alloc->code_tab_width;
409             ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset;
410             subband_alloc++;
411         }
412         ba = ba_code_tab[get_bits(bs, ba_bits)];
413         sci->bitalloc[2*i] = ba;
414         if (i < sci->stereo_bands)
415         {
416             ba = ba_code_tab[get_bits(bs, ba_bits)];
417         }
418         sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0;
419     }
420 
421     for (i = 0; i < 2*sci->total_bands; i++)
422     {
423         sci->scfcod[i] = sci->bitalloc[i] ? HDR_IS_LAYER_1(hdr) ? 2 : get_bits(bs, 2) : 6;
424     }
425 
426     L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf);
427 
428     for (i = sci->stereo_bands; i < sci->total_bands; i++)
429     {
430         sci->bitalloc[2*i + 1] = 0;
431     }
432 }
433 
L12_dequantize_granule(float * grbuf,bs_t * bs,L12_scale_info * sci,int group_size)434 static int L12_dequantize_granule(float *grbuf, bs_t *bs, L12_scale_info *sci, int group_size)
435 {
436     int i, j, k, choff = 576;
437     for (j = 0; j < 4; j++)
438     {
439         float *dst = grbuf + group_size*j;
440         for (i = 0; i < 2*sci->total_bands; i++)
441         {
442             int ba = sci->bitalloc[i];
443             if (ba != 0)
444             {
445                 if (ba < 17)
446                 {
447                     int half = (1 << (ba - 1)) - 1;
448                     for (k = 0; k < group_size; k++)
449                     {
450                         dst[k] = (float)((int)get_bits(bs, ba) - half);
451                     }
452                 } else
453                 {
454                     unsigned mod = (2 << (ba - 17)) + 1;    /* 3, 5, 9 */
455                     unsigned code = get_bits(bs, mod + 2 - (mod >> 3));  /* 5, 7, 10 */
456                     for (k = 0; k < group_size; k++, code /= mod)
457                     {
458                         dst[k] = (float)((int)(code % mod - mod/2));
459                     }
460                 }
461             }
462             dst += choff;
463             choff = 18 - choff;
464         }
465     }
466     return group_size*4;
467 }
468 
L12_apply_scf_384(L12_scale_info * sci,const float * scf,float * dst)469 static void L12_apply_scf_384(L12_scale_info *sci, const float *scf, float *dst)
470 {
471     int i, k;
472     memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
473     for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)
474     {
475         for (k = 0; k < 12; k++)
476         {
477             dst[k + 0]   *= scf[0];
478             dst[k + 576] *= scf[3];
479         }
480     }
481 }
482 #endif /* MINIMP3_ONLY_MP3 */
483 
L3_read_side_info(bs_t * bs,L3_gr_info_t * gr,const uint8_t * hdr)484 static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr)
485 {
486     static const uint8_t g_scf_long[8][23] = {
487         { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
488         { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 },
489         { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
490         { 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 },
491         { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
492         { 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 },
493         { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 },
494         { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 }
495     };
496     static const uint8_t g_scf_short[8][40] = {
497         { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
498         { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
499         { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
500         { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
501         { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
502         { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
503         { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
504         { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
505     };
506     static const uint8_t g_scf_mixed[8][40] = {
507         { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
508         { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
509         { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
510         { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
511         { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
512         { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
513         { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
514         { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
515     };
516 
517     unsigned tables, scfsi = 0;
518     int main_data_begin, part_23_sum = 0;
519     int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0);
520     int gr_count = HDR_IS_MONO(hdr) ? 1 : 2;
521 
522     if (HDR_TEST_MPEG1(hdr))
523     {
524         gr_count *= 2;
525         main_data_begin = get_bits(bs, 9);
526         scfsi = get_bits(bs, 7 + gr_count);
527     } else
528     {
529         main_data_begin = get_bits(bs, 8 + gr_count) >> gr_count;
530     }
531 
532     do
533     {
534         if (HDR_IS_MONO(hdr))
535         {
536             scfsi <<= 4;
537         }
538         gr->part_23_length = (uint16_t)get_bits(bs, 12);
539         part_23_sum += gr->part_23_length;
540         gr->big_values = (uint16_t)get_bits(bs,  9);
541         if (gr->big_values > 288)
542         {
543             return -1;
544         }
545         gr->global_gain = (uint8_t)get_bits(bs, 8);
546         gr->scalefac_compress = (uint16_t)get_bits(bs, HDR_TEST_MPEG1(hdr) ? 4 : 9);
547         gr->sfbtab = g_scf_long[sr_idx];
548         gr->n_long_sfb  = 22;
549         gr->n_short_sfb = 0;
550         if (get_bits(bs, 1))
551         {
552             gr->block_type = (uint8_t)get_bits(bs, 2);
553             if (!gr->block_type)
554             {
555                 return -1;
556             }
557             gr->mixed_block_flag = (uint8_t)get_bits(bs, 1);
558             gr->region_count[0] = 7;
559             gr->region_count[1] = 255;
560             if (gr->block_type == SHORT_BLOCK_TYPE)
561             {
562                 scfsi &= 0x0F0F;
563                 if (!gr->mixed_block_flag)
564                 {
565                     gr->region_count[0] = 8;
566                     gr->sfbtab = g_scf_short[sr_idx];
567                     gr->n_long_sfb = 0;
568                     gr->n_short_sfb = 39;
569                 } else
570                 {
571                     gr->sfbtab = g_scf_mixed[sr_idx];
572                     gr->n_long_sfb = HDR_TEST_MPEG1(hdr) ? 8 : 6;
573                     gr->n_short_sfb = 30;
574                 }
575             }
576             tables = get_bits(bs, 10);
577             tables <<= 5;
578             gr->subblock_gain[0] = (uint8_t)get_bits(bs, 3);
579             gr->subblock_gain[1] = (uint8_t)get_bits(bs, 3);
580             gr->subblock_gain[2] = (uint8_t)get_bits(bs, 3);
581         } else
582         {
583             gr->block_type = 0;
584             gr->mixed_block_flag = 0;
585             tables = get_bits(bs, 15);
586             gr->region_count[0] = (uint8_t)get_bits(bs, 4);
587             gr->region_count[1] = (uint8_t)get_bits(bs, 3);
588             gr->region_count[2] = 255;
589         }
590         gr->table_select[0] = (uint8_t)(tables >> 10);
591         gr->table_select[1] = (uint8_t)((tables >> 5) & 31);
592         gr->table_select[2] = (uint8_t)((tables) & 31);
593         gr->preflag = HDR_TEST_MPEG1(hdr) ? get_bits(bs, 1) : (gr->scalefac_compress >= 500);
594         gr->scalefac_scale = (uint8_t)get_bits(bs, 1);
595         gr->count1_table = (uint8_t)get_bits(bs, 1);
596         gr->scfsi = (uint8_t)((scfsi >> 12) & 15);
597         scfsi <<= 4;
598         gr++;
599     } while(--gr_count);
600 
601     if (part_23_sum + bs->pos > bs->limit + main_data_begin*8)
602     {
603         return -1;
604     }
605 
606     return main_data_begin;
607 }
608 
L3_read_scalefactors(uint8_t * scf,uint8_t * ist_pos,const uint8_t * scf_size,const uint8_t * scf_count,bs_t * bitbuf,int scfsi)609 static void L3_read_scalefactors(uint8_t *scf, uint8_t *ist_pos, const uint8_t *scf_size, const uint8_t *scf_count, bs_t *bitbuf, int scfsi)
610 {
611     int i, k;
612     for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2)
613     {
614         int cnt = scf_count[i];
615         if (scfsi & 8)
616         {
617             memcpy(scf, ist_pos, cnt);
618         } else
619         {
620             int bits = scf_size[i];
621             if (!bits)
622             {
623                 memset(scf, 0, cnt);
624                 memset(ist_pos, 0, cnt);
625             } else
626             {
627                 int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
628                 for (k = 0; k < cnt; k++)
629                 {
630                     int s = get_bits(bitbuf, bits);
631                     ist_pos[k] = (s == max_scf ? -1 : s);
632                     scf[k] = s;
633                 }
634             }
635         }
636         ist_pos += cnt;
637         scf += cnt;
638     }
639     scf[0] = scf[1] = scf[2] = 0;
640 }
641 
L3_ldexp_q2(float y,int exp_q2)642 static float L3_ldexp_q2(float y, int exp_q2)
643 {
644     static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f };
645     int e;
646     do
647     {
648         e = MINIMP3_MIN(30*4, exp_q2);
649         y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2));
650     } while ((exp_q2 -= e) > 0);
651     return y;
652 }
653 
L3_decode_scalefactors(const uint8_t * hdr,uint8_t * ist_pos,bs_t * bs,const L3_gr_info_t * gr,float * scf,int ch)654 static void L3_decode_scalefactors(const uint8_t *hdr, uint8_t *ist_pos, bs_t *bs, const L3_gr_info_t *gr, float *scf, int ch)
655 {
656     static const uint8_t g_scf_partitions[3][28] = {
657         { 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 },
658         { 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 },
659         { 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 }
660     };
661     const uint8_t *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb];
662     uint8_t scf_size[4], iscf[40];
663     int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi;
664     float gain;
665 
666     if (HDR_TEST_MPEG1(hdr))
667     {
668         static const uint8_t g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 };
669         int part = g_scfc_decode[gr->scalefac_compress];
670         scf_size[1] = scf_size[0] = (uint8_t)(part >> 2);
671         scf_size[3] = scf_size[2] = (uint8_t)(part & 3);
672     } else
673     {
674         static const uint8_t g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 };
675         int k, modprod, sfc, ist = HDR_TEST_I_STEREO(hdr) && ch;
676         sfc = gr->scalefac_compress >> ist;
677         for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4)
678         {
679             for (modprod = 1, i = 3; i >= 0; i--)
680             {
681                 scf_size[i] = (uint8_t)(sfc / modprod % g_mod[k + i]);
682                 modprod *= g_mod[k + i];
683             }
684         }
685         scf_partition += k;
686         scfsi = -16;
687     }
688     L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi);
689 
690     if (gr->n_short_sfb)
691     {
692         int sh = 3 - scf_shift;
693         for (i = 0; i < gr->n_short_sfb; i += 3)
694         {
695             iscf[gr->n_long_sfb + i + 0] += gr->subblock_gain[0] << sh;
696             iscf[gr->n_long_sfb + i + 1] += gr->subblock_gain[1] << sh;
697             iscf[gr->n_long_sfb + i + 2] += gr->subblock_gain[2] << sh;
698         }
699     } else if (gr->preflag)
700     {
701         static const uint8_t g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 };
702         for (i = 0; i < 10; i++)
703         {
704             iscf[11 + i] += g_preamp[i];
705         }
706     }
707 
708     gain_exp = gr->global_gain + BITS_DEQUANTIZER_OUT*4 - 210 - (HDR_IS_MS_STEREO(hdr) ? 2 : 0);
709     gain = L3_ldexp_q2(1 << (MAX_SCFI/4),  MAX_SCFI - gain_exp);
710     for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++)
711     {
712         scf[i] = L3_ldexp_q2(gain, iscf[i] << scf_shift);
713     }
714 }
715 
716 static const float g_pow43[129 + 16] = {
717     0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f,
718     0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f
719 };
720 
L3_pow_43(int x)721 static float L3_pow_43(int x)
722 {
723     float frac;
724     int sign, mult = 256;
725 
726     if (x < 129)
727     {
728         return g_pow43[16 + x];
729     }
730 
731     if (x < 1024)
732     {
733         mult = 16;
734         x <<= 3;
735     }
736 
737     sign = 2*x & 64;
738     frac = (float)((x & 63) - sign) / ((x & ~63) + sign);
739     return g_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult;
740 }
741 
L3_huffman(float * dst,bs_t * bs,const L3_gr_info_t * gr_info,const float * scf,int layer3gr_limit)742 static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const float *scf, int layer3gr_limit)
743 {
744     static const int16_t tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
745         785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
746         -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288,
747         -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288,
748         -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258,
749         -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259,
750         -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258,
751         -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258,
752         -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259,
753         -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258,
754         -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290,
755         -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259,
756         -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258,
757         -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259,
758         -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258,
759         -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 };
760     static const uint8_t tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205 };
761     static const uint8_t tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 };
762     static const int16_t tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 };
763     static const uint8_t g_linbits[] =  { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 };
764 
765 #define PEEK_BITS(n)  (bs_cache >> (32 - n))
766 #define FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); }
767 #define CHECK_BITS    while (bs_sh >= 0) { bs_cache |= (uint32_t)*bs_next_ptr++ << bs_sh; bs_sh -= 8; }
768 #define BSPOS         ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh)
769 
770     float one = 0.0f;
771     int ireg = 0, big_val_cnt = gr_info->big_values;
772     const uint8_t *sfb = gr_info->sfbtab;
773     const uint8_t *bs_next_ptr = bs->buf + bs->pos/8;
774     uint32_t bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7);
775     int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8;
776     bs_next_ptr += 4;
777 
778     while (big_val_cnt > 0)
779     {
780         int tab_num = gr_info->table_select[ireg];
781         int sfb_cnt = gr_info->region_count[ireg++];
782         const int16_t *codebook = tabs + tabindex[tab_num];
783         int linbits = g_linbits[tab_num];
784         if (linbits)
785         {
786             do
787             {
788                 np = *sfb++ / 2;
789                 pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
790                 one = *scf++;
791                 do
792                 {
793                     int j, w = 5;
794                     int leaf = codebook[PEEK_BITS(w)];
795                     while (leaf < 0)
796                     {
797                         FLUSH_BITS(w);
798                         w = leaf & 7;
799                         leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
800                     }
801                     FLUSH_BITS(leaf >> 8);
802 
803                     for (j = 0; j < 2; j++, dst++, leaf >>= 4)
804                     {
805                         int lsb = leaf & 0x0F;
806                         if (lsb == 15)
807                         {
808                             lsb += PEEK_BITS(linbits);
809                             FLUSH_BITS(linbits);
810                             CHECK_BITS;
811                             *dst = one*L3_pow_43(lsb)*((int32_t)bs_cache < 0 ? -1: 1);
812                         } else
813                         {
814                             *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
815                         }
816                         FLUSH_BITS(lsb ? 1 : 0);
817                     }
818                     CHECK_BITS;
819                 } while (--pairs_to_decode);
820             } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
821         } else
822         {
823             do
824             {
825                 np = *sfb++ / 2;
826                 pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
827                 one = *scf++;
828                 do
829                 {
830                     int j, w = 5;
831                     int leaf = codebook[PEEK_BITS(w)];
832                     while (leaf < 0)
833                     {
834                         FLUSH_BITS(w);
835                         w = leaf & 7;
836                         leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
837                     }
838                     FLUSH_BITS(leaf >> 8);
839 
840                     for (j = 0; j < 2; j++, dst++, leaf >>= 4)
841                     {
842                         int lsb = leaf & 0x0F;
843                         *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
844                         FLUSH_BITS(lsb ? 1 : 0);
845                     }
846                     CHECK_BITS;
847                 } while (--pairs_to_decode);
848             } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
849         }
850     }
851 
852     for (np = 1 - big_val_cnt;; dst += 4)
853     {
854         const uint8_t *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32;
855         int leaf = codebook_count1[PEEK_BITS(4)];
856         if (!(leaf & 8))
857         {
858             leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))];
859         }
860         FLUSH_BITS(leaf & 7);
861         if (BSPOS > layer3gr_limit)
862         {
863             break;
864         }
865 #define RELOAD_SCALEFACTOR  if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; }
866 #define DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((int32_t)bs_cache < 0) ? -one : one; FLUSH_BITS(1) }
867         RELOAD_SCALEFACTOR;
868         DEQ_COUNT1(0);
869         DEQ_COUNT1(1);
870         RELOAD_SCALEFACTOR;
871         DEQ_COUNT1(2);
872         DEQ_COUNT1(3);
873         CHECK_BITS;
874     }
875 
876     bs->pos = layer3gr_limit;
877 }
878 
L3_midside_stereo(float * left,int n)879 static void L3_midside_stereo(float *left, int n)
880 {
881     int i = 0;
882     float *right = left + 576;
883 #if HAVE_SIMD
884     if (have_simd())
885     {
886         for (; i < n - 3; i += 4)
887         {
888             f4 vl = VLD(left + i);
889             f4 vr = VLD(right + i);
890             VSTORE(left + i, VADD(vl, vr));
891             VSTORE(right + i, VSUB(vl, vr));
892         }
893 #ifdef __GNUC__
894         /* Workaround for spurious -Waggressive-loop-optimizations warning from gcc.
895          * For more info see: https://github.com/lieff/minimp3/issues/88
896          */
897         if (__builtin_constant_p(n % 4 == 0) && n % 4 == 0)
898             return;
899 #endif
900     }
901 #endif /* HAVE_SIMD */
902     for (; i < n; i++)
903     {
904         float a = left[i];
905         float b = right[i];
906         left[i] = a + b;
907         right[i] = a - b;
908     }
909 }
910 
L3_intensity_stereo_band(float * left,int n,float kl,float kr)911 static void L3_intensity_stereo_band(float *left, int n, float kl, float kr)
912 {
913     int i;
914     for (i = 0; i < n; i++)
915     {
916         left[i + 576] = left[i]*kr;
917         left[i] = left[i]*kl;
918     }
919 }
920 
L3_stereo_top_band(const float * right,const uint8_t * sfb,int nbands,int max_band[3])921 static void L3_stereo_top_band(const float *right, const uint8_t *sfb, int nbands, int max_band[3])
922 {
923     int i, k;
924 
925     max_band[0] = max_band[1] = max_band[2] = -1;
926 
927     for (i = 0; i < nbands; i++)
928     {
929         for (k = 0; k < sfb[i]; k += 2)
930         {
931             if (right[k] != 0 || right[k + 1] != 0)
932             {
933                 max_band[i % 3] = i;
934                 break;
935             }
936         }
937         right += sfb[i];
938     }
939 }
940 
L3_stereo_process(float * left,const uint8_t * ist_pos,const uint8_t * sfb,const uint8_t * hdr,int max_band[3],int mpeg2_sh)941 static void L3_stereo_process(float *left, const uint8_t *ist_pos, const uint8_t *sfb, const uint8_t *hdr, int max_band[3], int mpeg2_sh)
942 {
943     static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 };
944     unsigned i, max_pos = HDR_TEST_MPEG1(hdr) ? 7 : 64;
945 
946     for (i = 0; sfb[i]; i++)
947     {
948         unsigned ipos = ist_pos[i];
949         if ((int)i > max_band[i % 3] && ipos < max_pos)
950         {
951             float kl, kr, s = HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1;
952             if (HDR_TEST_MPEG1(hdr))
953             {
954                 kl = g_pan[2*ipos];
955                 kr = g_pan[2*ipos + 1];
956             } else
957             {
958                 kl = 1;
959                 kr = L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh);
960                 if (ipos & 1)
961                 {
962                     kl = kr;
963                     kr = 1;
964                 }
965             }
966             L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s);
967         } else if (HDR_TEST_MS_STEREO(hdr))
968         {
969             L3_midside_stereo(left, sfb[i]);
970         }
971         left += sfb[i];
972     }
973 }
974 
L3_intensity_stereo(float * left,uint8_t * ist_pos,const L3_gr_info_t * gr,const uint8_t * hdr)975 static void L3_intensity_stereo(float *left, uint8_t *ist_pos, const L3_gr_info_t *gr, const uint8_t *hdr)
976 {
977     int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb;
978     int i, max_blocks = gr->n_short_sfb ? 3 : 1;
979 
980     L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band);
981     if (gr->n_long_sfb)
982     {
983         max_band[0] = max_band[1] = max_band[2] = MINIMP3_MAX(MINIMP3_MAX(max_band[0], max_band[1]), max_band[2]);
984     }
985     for (i = 0; i < max_blocks; i++)
986     {
987         int default_pos = HDR_TEST_MPEG1(hdr) ? 3 : 0;
988         int itop = n_sfb - max_blocks + i;
989         int prev = itop - max_blocks;
990         ist_pos[itop] = max_band[i] >= prev ? default_pos : ist_pos[prev];
991     }
992     L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1);
993 }
994 
L3_reorder(float * grbuf,float * scratch,const uint8_t * sfb)995 static void L3_reorder(float *grbuf, float *scratch, const uint8_t *sfb)
996 {
997     int i, len;
998     float *src = grbuf, *dst = scratch;
999 
1000     for (;0 != (len = *sfb); sfb += 3, src += 2*len)
1001     {
1002         for (i = 0; i < len; i++, src++)
1003         {
1004             *dst++ = src[0*len];
1005             *dst++ = src[1*len];
1006             *dst++ = src[2*len];
1007         }
1008     }
1009     memcpy(grbuf, scratch, (dst - scratch)*sizeof(float));
1010 }
1011 
L3_antialias(float * grbuf,int nbands)1012 static void L3_antialias(float *grbuf, int nbands)
1013 {
1014     static const float g_aa[2][8] = {
1015         {0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f},
1016         {0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f}
1017     };
1018 
1019     for (; nbands > 0; nbands--, grbuf += 18)
1020     {
1021         int i = 0;
1022 #if HAVE_SIMD
1023         if (have_simd()) for (; i < 8; i += 4)
1024         {
1025             f4 vu = VLD(grbuf + 18 + i);
1026             f4 vd = VLD(grbuf + 14 - i);
1027             f4 vc0 = VLD(g_aa[0] + i);
1028             f4 vc1 = VLD(g_aa[1] + i);
1029             vd = VREV(vd);
1030             VSTORE(grbuf + 18 + i, VSUB(VMUL(vu, vc0), VMUL(vd, vc1)));
1031             vd = VADD(VMUL(vu, vc1), VMUL(vd, vc0));
1032             VSTORE(grbuf + 14 - i, VREV(vd));
1033         }
1034 #endif /* HAVE_SIMD */
1035 #ifndef MINIMP3_ONLY_SIMD
1036         for(; i < 8; i++)
1037         {
1038             float u = grbuf[18 + i];
1039             float d = grbuf[17 - i];
1040             grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i];
1041             grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i];
1042         }
1043 #endif /* MINIMP3_ONLY_SIMD */
1044     }
1045 }
1046 
L3_dct3_9(float * y)1047 static void L3_dct3_9(float *y)
1048 {
1049     float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4;
1050 
1051     s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8];
1052     t0 = s0 + s6*0.5f;
1053     s0 -= s6;
1054     t4 = (s4 + s2)*0.93969262f;
1055     t2 = (s8 + s2)*0.76604444f;
1056     s6 = (s4 - s8)*0.17364818f;
1057     s4 += s8 - s2;
1058 
1059     s2 = s0 - s4*0.5f;
1060     y[4] = s4 + s0;
1061     s8 = t0 - t2 + s6;
1062     s0 = t0 - t4 + t2;
1063     s4 = t0 + t4 - s6;
1064 
1065     s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7];
1066 
1067     s3 *= 0.86602540f;
1068     t0 = (s5 + s1)*0.98480775f;
1069     t4 = (s5 - s7)*0.34202014f;
1070     t2 = (s1 + s7)*0.64278761f;
1071     s1 = (s1 - s5 - s7)*0.86602540f;
1072 
1073     s5 = t0 - s3 - t2;
1074     s7 = t4 - s3 - t0;
1075     s3 = t4 + s3 - t2;
1076 
1077     y[0] = s4 - s7;
1078     y[1] = s2 + s1;
1079     y[2] = s0 - s3;
1080     y[3] = s8 + s5;
1081     y[5] = s8 - s5;
1082     y[6] = s0 + s3;
1083     y[7] = s2 - s1;
1084     y[8] = s4 + s7;
1085 }
1086 
L3_imdct36(float * grbuf,float * overlap,const float * window,int nbands)1087 static void L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands)
1088 {
1089     int i, j;
1090     static const float g_twid9[18] = {
1091         0.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f
1092     };
1093 
1094     for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9)
1095     {
1096         float co[9], si[9];
1097         co[0] = -grbuf[0];
1098         si[0] = grbuf[17];
1099         for (i = 0; i < 4; i++)
1100         {
1101             si[8 - 2*i] =   grbuf[4*i + 1] - grbuf[4*i + 2];
1102             co[1 + 2*i] =   grbuf[4*i + 1] + grbuf[4*i + 2];
1103             si[7 - 2*i] =   grbuf[4*i + 4] - grbuf[4*i + 3];
1104             co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]);
1105         }
1106         L3_dct3_9(co);
1107         L3_dct3_9(si);
1108 
1109         si[1] = -si[1];
1110         si[3] = -si[3];
1111         si[5] = -si[5];
1112         si[7] = -si[7];
1113 
1114         i = 0;
1115 
1116 #if HAVE_SIMD
1117         if (have_simd()) for (; i < 8; i += 4)
1118         {
1119             f4 vovl = VLD(overlap + i);
1120             f4 vc = VLD(co + i);
1121             f4 vs = VLD(si + i);
1122             f4 vr0 = VLD(g_twid9 + i);
1123             f4 vr1 = VLD(g_twid9 + 9 + i);
1124             f4 vw0 = VLD(window + i);
1125             f4 vw1 = VLD(window + 9 + i);
1126             f4 vsum = VADD(VMUL(vc, vr1), VMUL(vs, vr0));
1127             VSTORE(overlap + i, VSUB(VMUL(vc, vr0), VMUL(vs, vr1)));
1128             VSTORE(grbuf + i, VSUB(VMUL(vovl, vw0), VMUL(vsum, vw1)));
1129             vsum = VADD(VMUL(vovl, vw1), VMUL(vsum, vw0));
1130             VSTORE(grbuf + 14 - i, VREV(vsum));
1131         }
1132 #endif /* HAVE_SIMD */
1133         for (; i < 9; i++)
1134         {
1135             float ovl  = overlap[i];
1136             float sum  = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i];
1137             overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i];
1138             grbuf[i]      = ovl*window[0 + i] - sum*window[9 + i];
1139             grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i];
1140         }
1141     }
1142 }
1143 
L3_idct3(float x0,float x1,float x2,float * dst)1144 static void L3_idct3(float x0, float x1, float x2, float *dst)
1145 {
1146     float m1 = x1*0.86602540f;
1147     float a1 = x0 - x2*0.5f;
1148     dst[1] = x0 + x2;
1149     dst[0] = a1 + m1;
1150     dst[2] = a1 - m1;
1151 }
1152 
L3_imdct12(float * x,float * dst,float * overlap)1153 static void L3_imdct12(float *x, float *dst, float *overlap)
1154 {
1155     static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f };
1156     float co[3], si[3];
1157     int i;
1158 
1159     L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co);
1160     L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si);
1161     si[1] = -si[1];
1162 
1163     for (i = 0; i < 3; i++)
1164     {
1165         float ovl  = overlap[i];
1166         float sum  = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i];
1167         overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i];
1168         dst[i]     = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i];
1169         dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i];
1170     }
1171 }
1172 
L3_imdct_short(float * grbuf,float * overlap,int nbands)1173 static void L3_imdct_short(float *grbuf, float *overlap, int nbands)
1174 {
1175     for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)
1176     {
1177         float tmp[18];
1178         memcpy(tmp, grbuf, sizeof(tmp));
1179         memcpy(grbuf, overlap, 6*sizeof(float));
1180         L3_imdct12(tmp, grbuf + 6, overlap + 6);
1181         L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);
1182         L3_imdct12(tmp + 2, overlap, overlap + 6);
1183     }
1184 }
1185 
L3_change_sign(float * grbuf)1186 static void L3_change_sign(float *grbuf)
1187 {
1188     int b, i;
1189     for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36)
1190         for (i = 1; i < 18; i += 2)
1191             grbuf[i] = -grbuf[i];
1192 }
1193 
L3_imdct_gr(float * grbuf,float * overlap,unsigned block_type,unsigned n_long_bands)1194 static void L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands)
1195 {
1196     static const float g_mdct_window[2][18] = {
1197         { 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f },
1198         { 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f }
1199     };
1200     if (n_long_bands)
1201     {
1202         L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands);
1203         grbuf += 18*n_long_bands;
1204         overlap += 9*n_long_bands;
1205     }
1206     if (block_type == SHORT_BLOCK_TYPE)
1207         L3_imdct_short(grbuf, overlap, 32 - n_long_bands);
1208     else
1209         L3_imdct36(grbuf, overlap, g_mdct_window[block_type == STOP_BLOCK_TYPE], 32 - n_long_bands);
1210 }
1211 
L3_save_reservoir(mp3dec_t * h,mp3dec_scratch_t * s)1212 static void L3_save_reservoir(mp3dec_t *h, mp3dec_scratch_t *s)
1213 {
1214     int pos = (s->bs.pos + 7)/8u;
1215     int remains = s->bs.limit/8u - pos;
1216     if (remains > MAX_BITRESERVOIR_BYTES)
1217     {
1218         pos += remains - MAX_BITRESERVOIR_BYTES;
1219         remains = MAX_BITRESERVOIR_BYTES;
1220     }
1221     if (remains > 0)
1222     {
1223         memmove(h->reserv_buf, s->maindata + pos, remains);
1224     }
1225     h->reserv = remains;
1226 }
1227 
L3_restore_reservoir(mp3dec_t * h,bs_t * bs,mp3dec_scratch_t * s,int main_data_begin)1228 static int L3_restore_reservoir(mp3dec_t *h, bs_t *bs, mp3dec_scratch_t *s, int main_data_begin)
1229 {
1230     int frame_bytes = (bs->limit - bs->pos)/8;
1231     int bytes_have = MINIMP3_MIN(h->reserv, main_data_begin);
1232     memcpy(s->maindata, h->reserv_buf + MINIMP3_MAX(0, h->reserv - main_data_begin), MINIMP3_MIN(h->reserv, main_data_begin));
1233     memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
1234     bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);
1235     return h->reserv >= main_data_begin;
1236 }
1237 
L3_decode(mp3dec_t * h,mp3dec_scratch_t * s,L3_gr_info_t * gr_info,int nch)1238 static void L3_decode(mp3dec_t *h, mp3dec_scratch_t *s, L3_gr_info_t *gr_info, int nch)
1239 {
1240     int ch;
1241 
1242     for (ch = 0; ch < nch; ch++)
1243     {
1244         int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length;
1245         L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch);
1246         L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit);
1247     }
1248 
1249     if (HDR_TEST_I_STEREO(h->header))
1250     {
1251         L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header);
1252     } else if (HDR_IS_MS_STEREO(h->header))
1253     {
1254         L3_midside_stereo(s->grbuf[0], 576);
1255     }
1256 
1257     for (ch = 0; ch < nch; ch++, gr_info++)
1258     {
1259         int aa_bands = 31;
1260         int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(HDR_GET_MY_SAMPLE_RATE(h->header) == 2);
1261 
1262         if (gr_info->n_short_sfb)
1263         {
1264             aa_bands = n_long_bands - 1;
1265             L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb);
1266         }
1267 
1268         L3_antialias(s->grbuf[ch], aa_bands);
1269         L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands);
1270         L3_change_sign(s->grbuf[ch]);
1271     }
1272 }
1273 
mp3d_DCT_II(float * grbuf,int n)1274 static void mp3d_DCT_II(float *grbuf, int n)
1275 {
1276     static const float g_sec[24] = {
1277         10.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f
1278     };
1279     int i, k = 0;
1280 #if HAVE_SIMD
1281     if (have_simd()) for (; k < n; k += 4)
1282     {
1283         f4 t[4][8], *x;
1284         float *y = grbuf + k;
1285 
1286         for (x = t[0], i = 0; i < 8; i++, x++)
1287         {
1288             f4 x0 = VLD(&y[i*18]);
1289             f4 x1 = VLD(&y[(15 - i)*18]);
1290             f4 x2 = VLD(&y[(16 + i)*18]);
1291             f4 x3 = VLD(&y[(31 - i)*18]);
1292             f4 t0 = VADD(x0, x3);
1293             f4 t1 = VADD(x1, x2);
1294             f4 t2 = VMUL_S(VSUB(x1, x2), g_sec[3*i + 0]);
1295             f4 t3 = VMUL_S(VSUB(x0, x3), g_sec[3*i + 1]);
1296             x[0] = VADD(t0, t1);
1297             x[8] = VMUL_S(VSUB(t0, t1), g_sec[3*i + 2]);
1298             x[16] = VADD(t3, t2);
1299             x[24] = VMUL_S(VSUB(t3, t2), g_sec[3*i + 2]);
1300         }
1301         for (x = t[0], i = 0; i < 4; i++, x += 8)
1302         {
1303             f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1304             xt = VSUB(x0, x7); x0 = VADD(x0, x7);
1305             x7 = VSUB(x1, x6); x1 = VADD(x1, x6);
1306             x6 = VSUB(x2, x5); x2 = VADD(x2, x5);
1307             x5 = VSUB(x3, x4); x3 = VADD(x3, x4);
1308             x4 = VSUB(x0, x3); x0 = VADD(x0, x3);
1309             x3 = VSUB(x1, x2); x1 = VADD(x1, x2);
1310             x[0] = VADD(x0, x1);
1311             x[4] = VMUL_S(VSUB(x0, x1), 0.70710677f);
1312             x5 = VADD(x5, x6);
1313             x6 = VMUL_S(VADD(x6, x7), 0.70710677f);
1314             x7 = VADD(x7, xt);
1315             x3 = VMUL_S(VADD(x3, x4), 0.70710677f);
1316             x5 = VSUB(x5, VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */
1317             x7 = VADD(x7, VMUL_S(x5, 0.382683432f));
1318             x5 = VSUB(x5, VMUL_S(x7, 0.198912367f));
1319             x0 = VSUB(xt, x6); xt = VADD(xt, x6);
1320             x[1] = VMUL_S(VADD(xt, x7), 0.50979561f);
1321             x[2] = VMUL_S(VADD(x4, x3), 0.54119611f);
1322             x[3] = VMUL_S(VSUB(x0, x5), 0.60134488f);
1323             x[5] = VMUL_S(VADD(x0, x5), 0.89997619f);
1324             x[6] = VMUL_S(VSUB(x4, x3), 1.30656302f);
1325             x[7] = VMUL_S(VSUB(xt, x7), 2.56291556f);
1326         }
1327 
1328         if (k > n - 3)
1329         {
1330 #if HAVE_SSE
1331 #define VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v)
1332 #else /* HAVE_SSE */
1333 #define VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18],  vget_low_f32(v))
1334 #endif /* HAVE_SSE */
1335             for (i = 0; i < 7; i++, y += 4*18)
1336             {
1337                 f4 s = VADD(t[3][i], t[3][i + 1]);
1338                 VSAVE2(0, t[0][i]);
1339                 VSAVE2(1, VADD(t[2][i], s));
1340                 VSAVE2(2, VADD(t[1][i], t[1][i + 1]));
1341                 VSAVE2(3, VADD(t[2][1 + i], s));
1342             }
1343             VSAVE2(0, t[0][7]);
1344             VSAVE2(1, VADD(t[2][7], t[3][7]));
1345             VSAVE2(2, t[1][7]);
1346             VSAVE2(3, t[3][7]);
1347         } else
1348         {
1349 #define VSAVE4(i, v) VSTORE(&y[i*18], v)
1350             for (i = 0; i < 7; i++, y += 4*18)
1351             {
1352                 f4 s = VADD(t[3][i], t[3][i + 1]);
1353                 VSAVE4(0, t[0][i]);
1354                 VSAVE4(1, VADD(t[2][i], s));
1355                 VSAVE4(2, VADD(t[1][i], t[1][i + 1]));
1356                 VSAVE4(3, VADD(t[2][1 + i], s));
1357             }
1358             VSAVE4(0, t[0][7]);
1359             VSAVE4(1, VADD(t[2][7], t[3][7]));
1360             VSAVE4(2, t[1][7]);
1361             VSAVE4(3, t[3][7]);
1362         }
1363     } else
1364 #endif /* HAVE_SIMD */
1365 #ifdef MINIMP3_ONLY_SIMD
1366     {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */
1367 #else /* MINIMP3_ONLY_SIMD */
1368     for (; k < n; k++)
1369     {
1370         float t[4][8], *x, *y = grbuf + k;
1371 
1372         for (x = t[0], i = 0; i < 8; i++, x++)
1373         {
1374             float x0 = y[i*18];
1375             float x1 = y[(15 - i)*18];
1376             float x2 = y[(16 + i)*18];
1377             float x3 = y[(31 - i)*18];
1378             float t0 = x0 + x3;
1379             float t1 = x1 + x2;
1380             float t2 = (x1 - x2)*g_sec[3*i + 0];
1381             float t3 = (x0 - x3)*g_sec[3*i + 1];
1382             x[0] = t0 + t1;
1383             x[8] = (t0 - t1)*g_sec[3*i + 2];
1384             x[16] = t3 + t2;
1385             x[24] = (t3 - t2)*g_sec[3*i + 2];
1386         }
1387         for (x = t[0], i = 0; i < 4; i++, x += 8)
1388         {
1389             float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1390             xt = x0 - x7; x0 += x7;
1391             x7 = x1 - x6; x1 += x6;
1392             x6 = x2 - x5; x2 += x5;
1393             x5 = x3 - x4; x3 += x4;
1394             x4 = x0 - x3; x0 += x3;
1395             x3 = x1 - x2; x1 += x2;
1396             x[0] = x0 + x1;
1397             x[4] = (x0 - x1)*0.70710677f;
1398             x5 =  x5 + x6;
1399             x6 = (x6 + x7)*0.70710677f;
1400             x7 =  x7 + xt;
1401             x3 = (x3 + x4)*0.70710677f;
1402             x5 -= x7*0.198912367f;  /* rotate by PI/8 */
1403             x7 += x5*0.382683432f;
1404             x5 -= x7*0.198912367f;
1405             x0 = xt - x6; xt += x6;
1406             x[1] = (xt + x7)*0.50979561f;
1407             x[2] = (x4 + x3)*0.54119611f;
1408             x[3] = (x0 - x5)*0.60134488f;
1409             x[5] = (x0 + x5)*0.89997619f;
1410             x[6] = (x4 - x3)*1.30656302f;
1411             x[7] = (xt - x7)*2.56291556f;
1412 
1413         }
1414         for (i = 0; i < 7; i++, y += 4*18)
1415         {
1416             y[0*18] = t[0][i];
1417             y[1*18] = t[2][i] + t[3][i] + t[3][i + 1];
1418             y[2*18] = t[1][i] + t[1][i + 1];
1419             y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1];
1420         }
1421         y[0*18] = t[0][7];
1422         y[1*18] = t[2][7] + t[3][7];
1423         y[2*18] = t[1][7];
1424         y[3*18] = t[3][7];
1425     }
1426 #endif /* MINIMP3_ONLY_SIMD */
1427 }
1428 
1429 #ifndef MINIMP3_FLOAT_OUTPUT
mp3d_scale_pcm(float sample)1430 static int16_t mp3d_scale_pcm(float sample)
1431 {
1432 #if HAVE_ARMV6
1433     int32_t s32 = (int32_t)(sample + .5f);
1434     s32 -= (s32 < 0);
1435     int16_t s = (int16_t)minimp3_clip_int16_arm(s32);
1436 #else
1437     if (sample >=  32766.5) return (int16_t) 32767;
1438     if (sample <= -32767.5) return (int16_t)-32768;
1439     int16_t s = (int16_t)(sample + .5f);
1440     s -= (s < 0);   /* away from zero, to be compliant */
1441 #endif
1442     return s;
1443 }
1444 #else /* MINIMP3_FLOAT_OUTPUT */
mp3d_scale_pcm(float sample)1445 static float mp3d_scale_pcm(float sample)
1446 {
1447     return sample*(1.f/32768.f);
1448 }
1449 #endif /* MINIMP3_FLOAT_OUTPUT */
1450 
mp3d_synth_pair(mp3d_sample_t * pcm,int nch,const float * z)1451 static void mp3d_synth_pair(mp3d_sample_t *pcm, int nch, const float *z)
1452 {
1453     float a;
1454     a  = (z[14*64] - z[    0]) * 29;
1455     a += (z[ 1*64] + z[13*64]) * 213;
1456     a += (z[12*64] - z[ 2*64]) * 459;
1457     a += (z[ 3*64] + z[11*64]) * 2037;
1458     a += (z[10*64] - z[ 4*64]) * 5153;
1459     a += (z[ 5*64] + z[ 9*64]) * 6574;
1460     a += (z[ 8*64] - z[ 6*64]) * 37489;
1461     a +=  z[ 7*64]             * 75038;
1462     pcm[0] = mp3d_scale_pcm(a);
1463 
1464     z += 2;
1465     a  = z[14*64] * 104;
1466     a += z[12*64] * 1567;
1467     a += z[10*64] * 9727;
1468     a += z[ 8*64] * 64019;
1469     a += z[ 6*64] * -9975;
1470     a += z[ 4*64] * -45;
1471     a += z[ 2*64] * 146;
1472     a += z[ 0*64] * -5;
1473     pcm[16*nch] = mp3d_scale_pcm(a);
1474 }
1475 
mp3d_synth(float * xl,mp3d_sample_t * dstl,int nch,float * lins)1476 static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins)
1477 {
1478     int i;
1479     float *xr = xl + 576*(nch - 1);
1480     mp3d_sample_t *dstr = dstl + (nch - 1);
1481 
1482     static const float g_win[] = {
1483         -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992,
1484         -1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856,
1485         -1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630,
1486         -1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313,
1487         -1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908,
1488         -1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415,
1489         -2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835,
1490         -2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169,
1491         -2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420,
1492         -2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590,
1493         -3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679,
1494         -3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692,
1495         -4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629,
1496         -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494,
1497         -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290
1498     };
1499     float *zlin = lins + 15*64;
1500     const float *w = g_win;
1501 
1502     zlin[4*15]     = xl[18*16];
1503     zlin[4*15 + 1] = xr[18*16];
1504     zlin[4*15 + 2] = xl[0];
1505     zlin[4*15 + 3] = xr[0];
1506 
1507     zlin[4*31]     = xl[1 + 18*16];
1508     zlin[4*31 + 1] = xr[1 + 18*16];
1509     zlin[4*31 + 2] = xl[1];
1510     zlin[4*31 + 3] = xr[1];
1511 
1512     mp3d_synth_pair(dstr, nch, lins + 4*15 + 1);
1513     mp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1);
1514     mp3d_synth_pair(dstl, nch, lins + 4*15);
1515     mp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64);
1516 
1517 #if HAVE_SIMD
1518     if (have_simd()) for (i = 14; i >= 0; i--)
1519     {
1520 #define VLOAD(k) f4 w0 = VSET(*w++); f4 w1 = VSET(*w++); f4 vz = VLD(&zlin[4*i - 64*k]); f4 vy = VLD(&zlin[4*i - 64*(15 - k)]);
1521 #define V0(k) { VLOAD(k) b =         VADD(VMUL(vz, w1), VMUL(vy, w0)) ; a =         VSUB(VMUL(vz, w0), VMUL(vy, w1));  }
1522 #define V1(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vz, w0), VMUL(vy, w1))); }
1523 #define V2(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vy, w1), VMUL(vz, w0))); }
1524         f4 a, b;
1525         zlin[4*i]     = xl[18*(31 - i)];
1526         zlin[4*i + 1] = xr[18*(31 - i)];
1527         zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1528         zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1529         zlin[4*i + 64] = xl[1 + 18*(1 + i)];
1530         zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)];
1531         zlin[4*i - 64 + 2] = xl[18*(1 + i)];
1532         zlin[4*i - 64 + 3] = xr[18*(1 + i)];
1533 
1534         V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7)
1535 
1536         {
1537 #ifndef MINIMP3_FLOAT_OUTPUT
1538 #if HAVE_SSE
1539             static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
1540             static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
1541             __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
1542                                            _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1543             dstr[(15 - i)*nch] = _mm_extract_epi16(pcm8, 1);
1544             dstr[(17 + i)*nch] = _mm_extract_epi16(pcm8, 5);
1545             dstl[(15 - i)*nch] = _mm_extract_epi16(pcm8, 0);
1546             dstl[(17 + i)*nch] = _mm_extract_epi16(pcm8, 4);
1547             dstr[(47 - i)*nch] = _mm_extract_epi16(pcm8, 3);
1548             dstr[(49 + i)*nch] = _mm_extract_epi16(pcm8, 7);
1549             dstl[(47 - i)*nch] = _mm_extract_epi16(pcm8, 2);
1550             dstl[(49 + i)*nch] = _mm_extract_epi16(pcm8, 6);
1551 #else /* HAVE_SSE */
1552             int16x4_t pcma, pcmb;
1553             a = VADD(a, VSET(0.5f));
1554             b = VADD(b, VSET(0.5f));
1555             pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1556             pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1557             vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1);
1558             vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1);
1559             vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0);
1560             vst1_lane_s16(dstl + (17 + i)*nch, pcmb, 0);
1561             vst1_lane_s16(dstr + (47 - i)*nch, pcma, 3);
1562             vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3);
1563             vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2);
1564             vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2);
1565 #endif /* HAVE_SSE */
1566 
1567 #else /* MINIMP3_FLOAT_OUTPUT */
1568 
1569             static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };
1570             a = VMUL(a, g_scale);
1571             b = VMUL(b, g_scale);
1572 #if HAVE_SSE
1573             _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));
1574             _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
1575             _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));
1576             _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));
1577             _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));
1578             _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));
1579             _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));
1580             _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));
1581 #else /* HAVE_SSE */
1582             vst1q_lane_f32(dstr + (15 - i)*nch, a, 1);
1583             vst1q_lane_f32(dstr + (17 + i)*nch, b, 1);
1584             vst1q_lane_f32(dstl + (15 - i)*nch, a, 0);
1585             vst1q_lane_f32(dstl + (17 + i)*nch, b, 0);
1586             vst1q_lane_f32(dstr + (47 - i)*nch, a, 3);
1587             vst1q_lane_f32(dstr + (49 + i)*nch, b, 3);
1588             vst1q_lane_f32(dstl + (47 - i)*nch, a, 2);
1589             vst1q_lane_f32(dstl + (49 + i)*nch, b, 2);
1590 #endif /* HAVE_SSE */
1591 #endif /* MINIMP3_FLOAT_OUTPUT */
1592         }
1593     } else
1594 #endif /* HAVE_SIMD */
1595 #ifdef MINIMP3_ONLY_SIMD
1596     {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */
1597 #else /* MINIMP3_ONLY_SIMD */
1598     for (i = 14; i >= 0; i--)
1599     {
1600 #define LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64];
1601 #define S0(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j]  = vz[j]*w1 + vy[j]*w0, a[j]  = vz[j]*w0 - vy[j]*w1; }
1602 #define S1(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; }
1603 #define S2(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; }
1604         float a[4], b[4];
1605 
1606         zlin[4*i]     = xl[18*(31 - i)];
1607         zlin[4*i + 1] = xr[18*(31 - i)];
1608         zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1609         zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1610         zlin[4*(i + 16)]   = xl[1 + 18*(1 + i)];
1611         zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)];
1612         zlin[4*(i - 16) + 2] = xl[18*(1 + i)];
1613         zlin[4*(i - 16) + 3] = xr[18*(1 + i)];
1614 
1615         S0(0) S2(1) S1(2) S2(3) S1(4) S2(5) S1(6) S2(7)
1616 
1617         dstr[(15 - i)*nch] = mp3d_scale_pcm(a[1]);
1618         dstr[(17 + i)*nch] = mp3d_scale_pcm(b[1]);
1619         dstl[(15 - i)*nch] = mp3d_scale_pcm(a[0]);
1620         dstl[(17 + i)*nch] = mp3d_scale_pcm(b[0]);
1621         dstr[(47 - i)*nch] = mp3d_scale_pcm(a[3]);
1622         dstr[(49 + i)*nch] = mp3d_scale_pcm(b[3]);
1623         dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]);
1624         dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]);
1625     }
1626 #endif /* MINIMP3_ONLY_SIMD */
1627 }
1628 
mp3d_synth_granule(float * qmf_state,float * grbuf,int nbands,int nch,mp3d_sample_t * pcm,float * lins)1629 static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, mp3d_sample_t *pcm, float *lins)
1630 {
1631     int i;
1632     for (i = 0; i < nch; i++)
1633     {
1634         mp3d_DCT_II(grbuf + 576*i, nbands);
1635     }
1636 
1637     memcpy(lins, qmf_state, sizeof(float)*15*64);
1638 
1639     for (i = 0; i < nbands; i += 2)
1640     {
1641         mp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64);
1642     }
1643 #ifndef MINIMP3_NONSTANDARD_BUT_LOGICAL
1644     if (nch == 1)
1645     {
1646         for (i = 0; i < 15*64; i += 2)
1647         {
1648             qmf_state[i] = lins[nbands*64 + i];
1649         }
1650     } else
1651 #endif /* MINIMP3_NONSTANDARD_BUT_LOGICAL */
1652     {
1653         memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64);
1654     }
1655 }
1656 
mp3d_match_frame(const uint8_t * hdr,int mp3_bytes,int frame_bytes)1657 static int mp3d_match_frame(const uint8_t *hdr, int mp3_bytes, int frame_bytes)
1658 {
1659     int i, nmatch;
1660     for (i = 0, nmatch = 0; nmatch < MAX_FRAME_SYNC_MATCHES; nmatch++)
1661     {
1662         i += hdr_frame_bytes(hdr + i, frame_bytes) + hdr_padding(hdr + i);
1663         if (i + HDR_SIZE > mp3_bytes)
1664             return nmatch > 0;
1665         if (!hdr_compare(hdr, hdr + i))
1666             return 0;
1667     }
1668     return 1;
1669 }
1670 
mp3d_find_frame(const uint8_t * mp3,int mp3_bytes,int * free_format_bytes,int * ptr_frame_bytes)1671 static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes)
1672 {
1673     int i, k;
1674     for (i = 0; i < mp3_bytes - HDR_SIZE; i++, mp3++)
1675     {
1676         if (hdr_valid(mp3))
1677         {
1678             int frame_bytes = hdr_frame_bytes(mp3, *free_format_bytes);
1679             int frame_and_padding = frame_bytes + hdr_padding(mp3);
1680 
1681             for (k = HDR_SIZE; !frame_bytes && k < MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - HDR_SIZE; k++)
1682             {
1683                 if (hdr_compare(mp3, mp3 + k))
1684                 {
1685                     int fb = k - hdr_padding(mp3);
1686                     int nextfb = fb + hdr_padding(mp3 + k);
1687                     if (i + k + nextfb + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + k + nextfb))
1688                         continue;
1689                     frame_and_padding = k;
1690                     frame_bytes = fb;
1691                     *free_format_bytes = fb;
1692                 }
1693             }
1694             if ((frame_bytes && i + frame_and_padding <= mp3_bytes &&
1695                 mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) ||
1696                 (!i && frame_and_padding == mp3_bytes))
1697             {
1698                 *ptr_frame_bytes = frame_and_padding;
1699                 return i;
1700             }
1701             *free_format_bytes = 0;
1702         }
1703     }
1704     *ptr_frame_bytes = 0;
1705     return mp3_bytes;
1706 }
1707 
mp3dec_init(mp3dec_t * dec)1708 void mp3dec_init(mp3dec_t *dec)
1709 {
1710     dec->header[0] = 0;
1711 }
1712 
mp3dec_decode_frame(mp3dec_t * dec,const uint8_t * mp3,int mp3_bytes,mp3d_sample_t * pcm,mp3dec_frame_info_t * info)1713 int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info)
1714 {
1715     int i = 0, igr, frame_size = 0, success = 1;
1716     const uint8_t *hdr;
1717     bs_t bs_frame[1];
1718     mp3dec_scratch_t scratch;
1719 
1720     if (mp3_bytes > 4 && dec->header[0] == 0xff && hdr_compare(dec->header, mp3))
1721     {
1722         frame_size = hdr_frame_bytes(mp3, dec->free_format_bytes) + hdr_padding(mp3);
1723         if (frame_size != mp3_bytes && (frame_size + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + frame_size)))
1724         {
1725             frame_size = 0;
1726         }
1727     }
1728     if (!frame_size)
1729     {
1730         memset(dec, 0, sizeof(mp3dec_t));
1731         i = mp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);
1732         if (!frame_size || i + frame_size > mp3_bytes)
1733         {
1734             info->frame_bytes = i;
1735             return 0;
1736         }
1737     }
1738 
1739     hdr = mp3 + i;
1740     memcpy(dec->header, hdr, HDR_SIZE);
1741     info->frame_bytes = i + frame_size;
1742     info->frame_offset = i;
1743     info->channels = HDR_IS_MONO(hdr) ? 1 : 2;
1744     info->hz = hdr_sample_rate_hz(hdr);
1745     info->layer = 4 - HDR_GET_LAYER(hdr);
1746     info->bitrate_kbps = hdr_bitrate_kbps(hdr);
1747 
1748     if (!pcm)
1749     {
1750         return hdr_frame_samples(hdr);
1751     }
1752 
1753     bs_init(bs_frame, hdr + HDR_SIZE, frame_size - HDR_SIZE);
1754     if (HDR_IS_CRC(hdr))
1755     {
1756         get_bits(bs_frame, 16);
1757     }
1758 
1759     if (info->layer == 3)
1760     {
1761         int main_data_begin = L3_read_side_info(bs_frame, scratch.gr_info, hdr);
1762         if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit)
1763         {
1764             mp3dec_init(dec);
1765             return 0;
1766         }
1767         success = L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin);
1768         if (success)
1769         {
1770             for (igr = 0; igr < (HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm += 576*info->channels)
1771             {
1772                 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1773                 L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
1774                 mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, pcm, scratch.syn[0]);
1775             }
1776         }
1777         L3_save_reservoir(dec, &scratch);
1778     } else
1779     {
1780 #ifdef MINIMP3_ONLY_MP3
1781         return 0;
1782 #else /* MINIMP3_ONLY_MP3 */
1783         L12_scale_info sci[1];
1784         L12_read_scale_info(hdr, bs_frame, sci);
1785 
1786         memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1787         for (i = 0, igr = 0; igr < 3; igr++)
1788         {
1789             if (12 == (i += L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
1790             {
1791                 i = 0;
1792                 L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
1793                 mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, pcm, scratch.syn[0]);
1794                 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1795                 pcm += 384*info->channels;
1796             }
1797             if (bs_frame->pos > bs_frame->limit)
1798             {
1799                 mp3dec_init(dec);
1800                 return 0;
1801             }
1802         }
1803 #endif /* MINIMP3_ONLY_MP3 */
1804     }
1805     return success*hdr_frame_samples(dec->header);
1806 }
1807 
1808 #ifdef MINIMP3_FLOAT_OUTPUT
mp3dec_f32_to_s16(const float * in,int16_t * out,int num_samples)1809 void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples)
1810 {
1811     int i = 0;
1812 #if HAVE_SIMD
1813     int aligned_count = num_samples & ~7;
1814     for(; i < aligned_count; i += 8)
1815     {
1816         static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f };
1817         f4 a = VMUL(VLD(&in[i  ]), g_scale);
1818         f4 b = VMUL(VLD(&in[i+4]), g_scale);
1819 #if HAVE_SSE
1820         static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
1821         static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
1822         __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
1823                                        _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1824         out[i  ] = _mm_extract_epi16(pcm8, 0);
1825         out[i+1] = _mm_extract_epi16(pcm8, 1);
1826         out[i+2] = _mm_extract_epi16(pcm8, 2);
1827         out[i+3] = _mm_extract_epi16(pcm8, 3);
1828         out[i+4] = _mm_extract_epi16(pcm8, 4);
1829         out[i+5] = _mm_extract_epi16(pcm8, 5);
1830         out[i+6] = _mm_extract_epi16(pcm8, 6);
1831         out[i+7] = _mm_extract_epi16(pcm8, 7);
1832 #else /* HAVE_SSE */
1833         int16x4_t pcma, pcmb;
1834         a = VADD(a, VSET(0.5f));
1835         b = VADD(b, VSET(0.5f));
1836         pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1837         pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1838         vst1_lane_s16(out+i  , pcma, 0);
1839         vst1_lane_s16(out+i+1, pcma, 1);
1840         vst1_lane_s16(out+i+2, pcma, 2);
1841         vst1_lane_s16(out+i+3, pcma, 3);
1842         vst1_lane_s16(out+i+4, pcmb, 0);
1843         vst1_lane_s16(out+i+5, pcmb, 1);
1844         vst1_lane_s16(out+i+6, pcmb, 2);
1845         vst1_lane_s16(out+i+7, pcmb, 3);
1846 #endif /* HAVE_SSE */
1847     }
1848 #endif /* HAVE_SIMD */
1849     for(; i < num_samples; i++)
1850     {
1851         float sample = in[i] * 32768.0f;
1852         if (sample >=  32766.5)
1853             out[i] = (int16_t) 32767;
1854         else if (sample <= -32767.5)
1855             out[i] = (int16_t)-32768;
1856         else
1857         {
1858             int16_t s = (int16_t)(sample + .5f);
1859             s -= (s < 0);   /* away from zero, to be compliant */
1860             out[i] = s;
1861         }
1862     }
1863 }
1864 #endif /* MINIMP3_FLOAT_OUTPUT */
1865 #endif /* MINIMP3_IMPLEMENTATION && !_MINIMP3_IMPLEMENTATION_GUARD */
1866