1 #ifndef MINIMP3_H
2 #define MINIMP3_H
3 /*
4     https://github.com/lieff/minimp3
5     To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.
6     This software is distributed without any warranty.
7     See <http://creativecommons.org/publicdomain/zero/1.0/>.
8 */
9 #include <stdint.h>
10 
11 #define MINIMP3_MAX_SAMPLES_PER_FRAME (1152*2)
12 
13 typedef struct
14 {
15     int frame_bytes, frame_offset, channels, hz, layer, bitrate_kbps;
16 } mp3dec_frame_info_t;
17 
18 typedef struct
19 {
20     float mdct_overlap[2][9*32], qmf_state[15*2*32];
21     int reserv, free_format_bytes;
22     unsigned char header[4], reserv_buf[511];
23 } mp3dec_t;
24 
25 #ifdef __cplusplus
26 extern "C" {
27 #endif /* __cplusplus */
28 
29 void mp3dec_init(mp3dec_t *dec);
30 #ifndef MINIMP3_FLOAT_OUTPUT
31 typedef int16_t mp3d_sample_t;
32 #else /* MINIMP3_FLOAT_OUTPUT */
33 typedef float mp3d_sample_t;
34 void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples);
35 #endif /* MINIMP3_FLOAT_OUTPUT */
36 int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info);
37 
38 #ifdef __cplusplus
39 }
40 #endif /* __cplusplus */
41 
42 #endif /* MINIMP3_H */
43 #if defined(MINIMP3_IMPLEMENTATION) && !defined(_MINIMP3_IMPLEMENTATION_GUARD)
44 #define _MINIMP3_IMPLEMENTATION_GUARD
45 
46 #include <stdlib.h>
47 #include <string.h>
48 
49 #define MAX_FREE_FORMAT_FRAME_SIZE  2304    /* more than ISO spec's */
50 #ifndef MAX_FRAME_SYNC_MATCHES
51 #define MAX_FRAME_SYNC_MATCHES      10
52 #endif /* MAX_FRAME_SYNC_MATCHES */
53 
54 #define MAX_L3_FRAME_PAYLOAD_BYTES  MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */
55 
56 #define MAX_BITRESERVOIR_BYTES      511
57 #define SHORT_BLOCK_TYPE            2
58 #define STOP_BLOCK_TYPE             3
59 #define MODE_MONO                   3
60 #define MODE_JOINT_STEREO           1
61 #define HDR_SIZE                    4
62 #define HDR_IS_MONO(h)              (((h[3]) & 0xC0) == 0xC0)
63 #define HDR_IS_MS_STEREO(h)         (((h[3]) & 0xE0) == 0x60)
64 #define HDR_IS_FREE_FORMAT(h)       (((h[2]) & 0xF0) == 0)
65 #define HDR_IS_CRC(h)               (!((h[1]) & 1))
66 #define HDR_TEST_PADDING(h)         ((h[2]) & 0x2)
67 #define HDR_TEST_MPEG1(h)           ((h[1]) & 0x8)
68 #define HDR_TEST_NOT_MPEG25(h)      ((h[1]) & 0x10)
69 #define HDR_TEST_I_STEREO(h)        ((h[3]) & 0x10)
70 #define HDR_TEST_MS_STEREO(h)       ((h[3]) & 0x20)
71 #define HDR_GET_STEREO_MODE(h)      (((h[3]) >> 6) & 3)
72 #define HDR_GET_STEREO_MODE_EXT(h)  (((h[3]) >> 4) & 3)
73 #define HDR_GET_LAYER(h)            (((h[1]) >> 1) & 3)
74 #define HDR_GET_BITRATE(h)          ((h[2]) >> 4)
75 #define HDR_GET_SAMPLE_RATE(h)      (((h[2]) >> 2) & 3)
76 #define HDR_GET_MY_SAMPLE_RATE(h)   (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3)
77 #define HDR_IS_FRAME_576(h)         ((h[1] & 14) == 2)
78 #define HDR_IS_LAYER_1(h)           ((h[1] & 6) == 6)
79 
80 #define BITS_DEQUANTIZER_OUT        -1
81 #define MAX_SCF                     (255 + BITS_DEQUANTIZER_OUT*4 - 210)
82 #define MAX_SCFI                    ((MAX_SCF + 3) & ~3)
83 
84 #define MINIMP3_MIN(a, b)           ((a) > (b) ? (b) : (a))
85 #define MINIMP3_MAX(a, b)           ((a) < (b) ? (b) : (a))
86 
87 #if !defined(MINIMP3_NO_SIMD)
88 
89 #if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(_M_ARM64) || defined(__x86_64__) || defined(__aarch64__))
90 /* x64 always have SSE2, arm64 always have neon, no need for generic code */
91 #define MINIMP3_ONLY_SIMD
92 #endif /* SIMD checks... */
93 
94 #if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
95 #if defined(_MSC_VER)
96 #include <intrin.h>
97 #endif /* defined(_MSC_VER) */
98 #include <immintrin.h>
99 #define HAVE_SSE 1
100 #define HAVE_SIMD 1
101 #define VSTORE _mm_storeu_ps
102 #define VLD _mm_loadu_ps
103 #define VSET _mm_set1_ps
104 #define VADD _mm_add_ps
105 #define VSUB _mm_sub_ps
106 #define VMUL _mm_mul_ps
107 #define VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y))
108 #define VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y))
109 #define VMUL_S(x, s)  _mm_mul_ps(x, _mm_set1_ps(s))
110 #define VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3))
111 typedef __m128 f4;
112 #if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD)
113 #define minimp3_cpuid __cpuid
114 #else /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
minimp3_cpuid(int CPUInfo[],const int InfoType)115 static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType)
116 {
117 #if defined(__PIC__)
118     __asm__ __volatile__(
119 #if defined(__x86_64__)
120         "push %%rbx\n"
121         "cpuid\n"
122         "xchgl %%ebx, %1\n"
123         "pop  %%rbx\n"
124 #else /* defined(__x86_64__) */
125         "xchgl %%ebx, %1\n"
126         "cpuid\n"
127         "xchgl %%ebx, %1\n"
128 #endif /* defined(__x86_64__) */
129         : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
130         : "a" (InfoType));
131 #else /* defined(__PIC__) */
132     __asm__ __volatile__(
133         "cpuid"
134         : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
135         : "a" (InfoType));
136 #endif /* defined(__PIC__)*/
137 }
138 #endif /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
have_simd()139 static int have_simd()
140 {
141 #ifdef MINIMP3_ONLY_SIMD
142     return 1;
143 #else /* MINIMP3_ONLY_SIMD */
144     static int g_have_simd;
145     int CPUInfo[4];
146 #ifdef MINIMP3_TEST
147     static int g_counter;
148     if (g_counter++ > 100)
149         return 0;
150 #endif /* MINIMP3_TEST */
151     if (g_have_simd)
152         goto end;
153     minimp3_cpuid(CPUInfo, 0);
154     g_have_simd = 1;
155     if (CPUInfo[0] > 0)
156     {
157         minimp3_cpuid(CPUInfo, 1);
158         g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */
159     }
160 end:
161     return g_have_simd - 1;
162 #endif /* MINIMP3_ONLY_SIMD */
163 }
164 #elif defined(__ARM_NEON) || defined(__aarch64__)
165 #include <arm_neon.h>
166 #define HAVE_SSE 0
167 #define HAVE_SIMD 1
168 #define VSTORE vst1q_f32
169 #define VLD vld1q_f32
170 #define VSET vmovq_n_f32
171 #define VADD vaddq_f32
172 #define VSUB vsubq_f32
173 #define VMUL vmulq_f32
174 #define VMAC(a, x, y) vmlaq_f32(a, x, y)
175 #define VMSB(a, x, y) vmlsq_f32(a, x, y)
176 #define VMUL_S(x, s)  vmulq_f32(x, vmovq_n_f32(s))
177 #define VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x)))
178 typedef float32x4_t f4;
have_simd()179 static int have_simd()
180 {   /* TODO: detect neon for !MINIMP3_ONLY_SIMD */
181     return 1;
182 }
183 #else /* SIMD checks... */
184 #define HAVE_SSE 0
185 #define HAVE_SIMD 0
186 #ifdef MINIMP3_ONLY_SIMD
187 #error MINIMP3_ONLY_SIMD used, but SSE/NEON not enabled
188 #endif /* MINIMP3_ONLY_SIMD */
189 #endif /* SIMD checks... */
190 #else /* !defined(MINIMP3_NO_SIMD) */
191 #define HAVE_SIMD 0
192 #endif /* !defined(MINIMP3_NO_SIMD) */
193 
194 #if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__)
195 #define HAVE_ARMV6 1
minimp3_clip_int16_arm(int32_t a)196 static __inline__ __attribute__((always_inline)) int32_t minimp3_clip_int16_arm(int32_t a)
197 {
198     int32_t x = 0;
199     __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
200     return x;
201 }
202 #endif
203 
204 typedef struct
205 {
206     const uint8_t *buf;
207     int pos, limit;
208 } bs_t;
209 
210 typedef struct
211 {
212     float scf[3*64];
213     uint8_t total_bands, stereo_bands, bitalloc[64], scfcod[64];
214 } L12_scale_info;
215 
216 typedef struct
217 {
218     uint8_t tab_offset, code_tab_width, band_count;
219 } L12_subband_alloc_t;
220 
221 typedef struct
222 {
223     const uint8_t *sfbtab;
224     uint16_t part_23_length, big_values, scalefac_compress;
225     uint8_t global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb;
226     uint8_t table_select[3], region_count[3], subblock_gain[3];
227     uint8_t preflag, scalefac_scale, count1_table, scfsi;
228 } L3_gr_info_t;
229 
230 typedef struct
231 {
232     bs_t bs;
233     uint8_t maindata[MAX_BITRESERVOIR_BYTES + MAX_L3_FRAME_PAYLOAD_BYTES];
234     L3_gr_info_t gr_info[4];
235     float grbuf[2][576], scf[40], syn[18 + 15][2*32];
236     uint8_t ist_pos[2][39];
237 } mp3dec_scratch_t;
238 
bs_init(bs_t * bs,const uint8_t * data,int bytes)239 static void bs_init(bs_t *bs, const uint8_t *data, int bytes)
240 {
241     bs->buf   = data;
242     bs->pos   = 0;
243     bs->limit = bytes*8;
244 }
245 
get_bits(bs_t * bs,int n)246 static uint32_t get_bits(bs_t *bs, int n)
247 {
248     uint32_t next, cache = 0, s = bs->pos & 7;
249     int shl = n + s;
250     const uint8_t *p = bs->buf + (bs->pos >> 3);
251     if ((bs->pos += n) > bs->limit)
252         return 0;
253     next = *p++ & (255 >> s);
254     while ((shl -= 8) > 0)
255     {
256         cache |= next << shl;
257         next = *p++;
258     }
259     return cache | (next >> -shl);
260 }
261 
hdr_valid(const uint8_t * h)262 static int hdr_valid(const uint8_t *h)
263 {
264     return h[0] == 0xff &&
265         ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) &&
266         (HDR_GET_LAYER(h) != 0) &&
267         (HDR_GET_BITRATE(h) != 15) &&
268         (HDR_GET_SAMPLE_RATE(h) != 3);
269 }
270 
hdr_compare(const uint8_t * h1,const uint8_t * h2)271 static int hdr_compare(const uint8_t *h1, const uint8_t *h2)
272 {
273     return hdr_valid(h2) &&
274         ((h1[1] ^ h2[1]) & 0xFE) == 0 &&
275         ((h1[2] ^ h2[2]) & 0x0C) == 0 &&
276         !(HDR_IS_FREE_FORMAT(h1) ^ HDR_IS_FREE_FORMAT(h2));
277 }
278 
hdr_bitrate_kbps(const uint8_t * h)279 static unsigned hdr_bitrate_kbps(const uint8_t *h)
280 {
281     static const uint8_t halfrate[2][3][15] = {
282         { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } },
283         { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } },
284     };
285     return 2*halfrate[!!HDR_TEST_MPEG1(h)][HDR_GET_LAYER(h) - 1][HDR_GET_BITRATE(h)];
286 }
287 
hdr_sample_rate_hz(const uint8_t * h)288 static unsigned hdr_sample_rate_hz(const uint8_t *h)
289 {
290     static const unsigned g_hz[3] = { 44100, 48000, 32000 };
291     return g_hz[HDR_GET_SAMPLE_RATE(h)] >> (int)!HDR_TEST_MPEG1(h) >> (int)!HDR_TEST_NOT_MPEG25(h);
292 }
293 
hdr_frame_samples(const uint8_t * h)294 static unsigned hdr_frame_samples(const uint8_t *h)
295 {
296     return HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)HDR_IS_FRAME_576(h));
297 }
298 
hdr_frame_bytes(const uint8_t * h,int free_format_size)299 static int hdr_frame_bytes(const uint8_t *h, int free_format_size)
300 {
301     int frame_bytes = hdr_frame_samples(h)*hdr_bitrate_kbps(h)*125/hdr_sample_rate_hz(h);
302     if (HDR_IS_LAYER_1(h))
303     {
304         frame_bytes &= ~3; /* slot align */
305     }
306     return frame_bytes ? frame_bytes : free_format_size;
307 }
308 
hdr_padding(const uint8_t * h)309 static int hdr_padding(const uint8_t *h)
310 {
311     return HDR_TEST_PADDING(h) ? (HDR_IS_LAYER_1(h) ? 4 : 1) : 0;
312 }
313 
314 #ifndef MINIMP3_ONLY_MP3
L12_subband_alloc_table(const uint8_t * hdr,L12_scale_info * sci)315 static const L12_subband_alloc_t *L12_subband_alloc_table(const uint8_t *hdr, L12_scale_info *sci)
316 {
317     const L12_subband_alloc_t *alloc;
318     int mode = HDR_GET_STEREO_MODE(hdr);
319     int nbands, stereo_bands = (mode == MODE_MONO) ? 0 : (mode == MODE_JOINT_STEREO) ? (HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32;
320 
321     if (HDR_IS_LAYER_1(hdr))
322     {
323         static const L12_subband_alloc_t g_alloc_L1[] = { { 76, 4, 32 } };
324         alloc = g_alloc_L1;
325         nbands = 32;
326     } else if (!HDR_TEST_MPEG1(hdr))
327     {
328         static const L12_subband_alloc_t g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } };
329         alloc = g_alloc_L2M2;
330         nbands = 30;
331     } else
332     {
333         static const L12_subband_alloc_t g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } };
334         int sample_rate_idx = HDR_GET_SAMPLE_RATE(hdr);
335         unsigned kbps = hdr_bitrate_kbps(hdr) >> (int)(mode != MODE_MONO);
336         if (!kbps) /* free-format */
337         {
338             kbps = 192;
339         }
340 
341         alloc = g_alloc_L2M1;
342         nbands = 27;
343         if (kbps < 56)
344         {
345             static const L12_subband_alloc_t g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } };
346             alloc = g_alloc_L2M1_lowrate;
347             nbands = sample_rate_idx == 2 ? 12 : 8;
348         } else if (kbps >= 96 && sample_rate_idx != 1)
349         {
350             nbands = 30;
351         }
352     }
353 
354     sci->total_bands = (uint8_t)nbands;
355     sci->stereo_bands = (uint8_t)MINIMP3_MIN(stereo_bands, nbands);
356 
357     return alloc;
358 }
359 
L12_read_scalefactors(bs_t * bs,uint8_t * pba,uint8_t * scfcod,int bands,float * scf)360 static void L12_read_scalefactors(bs_t *bs, uint8_t *pba, uint8_t *scfcod, int bands, float *scf)
361 {
362     static const float g_deq_L12[18*3] = {
363 #define DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x
364         DQ(3),DQ(7),DQ(15),DQ(31),DQ(63),DQ(127),DQ(255),DQ(511),DQ(1023),DQ(2047),DQ(4095),DQ(8191),DQ(16383),DQ(32767),DQ(65535),DQ(3),DQ(5),DQ(9)
365     };
366     int i, m;
367     for (i = 0; i < bands; i++)
368     {
369         float s = 0;
370         int ba = *pba++;
371         int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0;
372         for (m = 4; m; m >>= 1)
373         {
374             if (mask & m)
375             {
376                 int b = get_bits(bs, 6);
377                 s = g_deq_L12[ba*3 - 6 + b % 3]*(1 << 21 >> b/3);
378             }
379             *scf++ = s;
380         }
381     }
382 }
383 
L12_read_scale_info(const uint8_t * hdr,bs_t * bs,L12_scale_info * sci)384 static void L12_read_scale_info(const uint8_t *hdr, bs_t *bs, L12_scale_info *sci)
385 {
386     static const uint8_t g_bitalloc_code_tab[] = {
387         0,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16,
388         0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16,
389         0,17,18, 3,19,4,5,16,
390         0,17,18,16,
391         0,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15,
392         0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14,
393         0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16
394     };
395     const L12_subband_alloc_t *subband_alloc = L12_subband_alloc_table(hdr, sci);
396 
397     int i, k = 0, ba_bits = 0;
398     const uint8_t *ba_code_tab = g_bitalloc_code_tab;
399 
400     for (i = 0; i < sci->total_bands; i++)
401     {
402         uint8_t ba;
403         if (i == k)
404         {
405             k += subband_alloc->band_count;
406             ba_bits = subband_alloc->code_tab_width;
407             ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset;
408             subband_alloc++;
409         }
410         ba = ba_code_tab[get_bits(bs, ba_bits)];
411         sci->bitalloc[2*i] = ba;
412         if (i < sci->stereo_bands)
413         {
414             ba = ba_code_tab[get_bits(bs, ba_bits)];
415         }
416         sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0;
417     }
418 
419     for (i = 0; i < 2*sci->total_bands; i++)
420     {
421         sci->scfcod[i] = sci->bitalloc[i] ? HDR_IS_LAYER_1(hdr) ? 2 : get_bits(bs, 2) : 6;
422     }
423 
424     L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf);
425 
426     for (i = sci->stereo_bands; i < sci->total_bands; i++)
427     {
428         sci->bitalloc[2*i + 1] = 0;
429     }
430 }
431 
L12_dequantize_granule(float * grbuf,bs_t * bs,L12_scale_info * sci,int group_size)432 static int L12_dequantize_granule(float *grbuf, bs_t *bs, L12_scale_info *sci, int group_size)
433 {
434     int i, j, k, choff = 576;
435     for (j = 0; j < 4; j++)
436     {
437         float *dst = grbuf + group_size*j;
438         for (i = 0; i < 2*sci->total_bands; i++)
439         {
440             int ba = sci->bitalloc[i];
441             if (ba != 0)
442             {
443                 if (ba < 17)
444                 {
445                     int half = (1 << (ba - 1)) - 1;
446                     for (k = 0; k < group_size; k++)
447                     {
448                         dst[k] = (float)((int)get_bits(bs, ba) - half);
449                     }
450                 } else
451                 {
452                     unsigned mod = (2 << (ba - 17)) + 1;    /* 3, 5, 9 */
453                     unsigned code = get_bits(bs, mod + 2 - (mod >> 3));  /* 5, 7, 10 */
454                     for (k = 0; k < group_size; k++, code /= mod)
455                     {
456                         dst[k] = (float)((int)(code % mod - mod/2));
457                     }
458                 }
459             }
460             dst += choff;
461             choff = 18 - choff;
462         }
463     }
464     return group_size*4;
465 }
466 
L12_apply_scf_384(L12_scale_info * sci,const float * scf,float * dst)467 static void L12_apply_scf_384(L12_scale_info *sci, const float *scf, float *dst)
468 {
469     int i, k;
470     memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
471     for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)
472     {
473         for (k = 0; k < 12; k++)
474         {
475             dst[k + 0]   *= scf[0];
476             dst[k + 576] *= scf[3];
477         }
478     }
479 }
480 #endif /* MINIMP3_ONLY_MP3 */
481 
L3_read_side_info(bs_t * bs,L3_gr_info_t * gr,const uint8_t * hdr)482 static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr)
483 {
484     static const uint8_t g_scf_long[8][23] = {
485         { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
486         { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 },
487         { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
488         { 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 },
489         { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
490         { 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 },
491         { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 },
492         { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 }
493     };
494     static const uint8_t g_scf_short[8][40] = {
495         { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
496         { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
497         { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
498         { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
499         { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
500         { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
501         { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
502         { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
503     };
504     static const uint8_t g_scf_mixed[8][40] = {
505         { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
506         { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
507         { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
508         { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
509         { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
510         { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
511         { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
512         { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
513     };
514 
515     unsigned tables, scfsi = 0;
516     int main_data_begin, part_23_sum = 0;
517     int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0);
518     int gr_count = HDR_IS_MONO(hdr) ? 1 : 2;
519 
520     if (HDR_TEST_MPEG1(hdr))
521     {
522         gr_count *= 2;
523         main_data_begin = get_bits(bs, 9);
524         scfsi = get_bits(bs, 7 + gr_count);
525     } else
526     {
527         main_data_begin = get_bits(bs, 8 + gr_count) >> gr_count;
528     }
529 
530     do
531     {
532         if (HDR_IS_MONO(hdr))
533         {
534             scfsi <<= 4;
535         }
536         gr->part_23_length = (uint16_t)get_bits(bs, 12);
537         part_23_sum += gr->part_23_length;
538         gr->big_values = (uint16_t)get_bits(bs,  9);
539         if (gr->big_values > 288)
540         {
541             return -1;
542         }
543         gr->global_gain = (uint8_t)get_bits(bs, 8);
544         gr->scalefac_compress = (uint16_t)get_bits(bs, HDR_TEST_MPEG1(hdr) ? 4 : 9);
545         gr->sfbtab = g_scf_long[sr_idx];
546         gr->n_long_sfb  = 22;
547         gr->n_short_sfb = 0;
548         if (get_bits(bs, 1))
549         {
550             gr->block_type = (uint8_t)get_bits(bs, 2);
551             if (!gr->block_type)
552             {
553                 return -1;
554             }
555             gr->mixed_block_flag = (uint8_t)get_bits(bs, 1);
556             gr->region_count[0] = 7;
557             gr->region_count[1] = 255;
558             if (gr->block_type == SHORT_BLOCK_TYPE)
559             {
560                 scfsi &= 0x0F0F;
561                 if (!gr->mixed_block_flag)
562                 {
563                     gr->region_count[0] = 8;
564                     gr->sfbtab = g_scf_short[sr_idx];
565                     gr->n_long_sfb = 0;
566                     gr->n_short_sfb = 39;
567                 } else
568                 {
569                     gr->sfbtab = g_scf_mixed[sr_idx];
570                     gr->n_long_sfb = HDR_TEST_MPEG1(hdr) ? 8 : 6;
571                     gr->n_short_sfb = 30;
572                 }
573             }
574             tables = get_bits(bs, 10);
575             tables <<= 5;
576             gr->subblock_gain[0] = (uint8_t)get_bits(bs, 3);
577             gr->subblock_gain[1] = (uint8_t)get_bits(bs, 3);
578             gr->subblock_gain[2] = (uint8_t)get_bits(bs, 3);
579         } else
580         {
581             gr->block_type = 0;
582             gr->mixed_block_flag = 0;
583             tables = get_bits(bs, 15);
584             gr->region_count[0] = (uint8_t)get_bits(bs, 4);
585             gr->region_count[1] = (uint8_t)get_bits(bs, 3);
586             gr->region_count[2] = 255;
587         }
588         gr->table_select[0] = (uint8_t)(tables >> 10);
589         gr->table_select[1] = (uint8_t)((tables >> 5) & 31);
590         gr->table_select[2] = (uint8_t)((tables) & 31);
591         gr->preflag = HDR_TEST_MPEG1(hdr) ? get_bits(bs, 1) : (gr->scalefac_compress >= 500);
592         gr->scalefac_scale = (uint8_t)get_bits(bs, 1);
593         gr->count1_table = (uint8_t)get_bits(bs, 1);
594         gr->scfsi = (uint8_t)((scfsi >> 12) & 15);
595         scfsi <<= 4;
596         gr++;
597     } while(--gr_count);
598 
599     if (part_23_sum + bs->pos > bs->limit + main_data_begin*8)
600     {
601         return -1;
602     }
603 
604     return main_data_begin;
605 }
606 
L3_read_scalefactors(uint8_t * scf,uint8_t * ist_pos,const uint8_t * scf_size,const uint8_t * scf_count,bs_t * bitbuf,int scfsi)607 static void L3_read_scalefactors(uint8_t *scf, uint8_t *ist_pos, const uint8_t *scf_size, const uint8_t *scf_count, bs_t *bitbuf, int scfsi)
608 {
609     int i, k;
610     for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2)
611     {
612         int cnt = scf_count[i];
613         if (scfsi & 8)
614         {
615             memcpy(scf, ist_pos, cnt);
616         } else
617         {
618             int bits = scf_size[i];
619             if (!bits)
620             {
621                 memset(scf, 0, cnt);
622                 memset(ist_pos, 0, cnt);
623             } else
624             {
625                 int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
626                 for (k = 0; k < cnt; k++)
627                 {
628                     int s = get_bits(bitbuf, bits);
629                     ist_pos[k] = (s == max_scf ? -1 : s);
630                     scf[k] = s;
631                 }
632             }
633         }
634         ist_pos += cnt;
635         scf += cnt;
636     }
637     scf[0] = scf[1] = scf[2] = 0;
638 }
639 
L3_ldexp_q2(float y,int exp_q2)640 static float L3_ldexp_q2(float y, int exp_q2)
641 {
642     static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f };
643     int e;
644     do
645     {
646         e = MINIMP3_MIN(30*4, exp_q2);
647         y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2));
648     } while ((exp_q2 -= e) > 0);
649     return y;
650 }
651 
L3_decode_scalefactors(const uint8_t * hdr,uint8_t * ist_pos,bs_t * bs,const L3_gr_info_t * gr,float * scf,int ch)652 static void L3_decode_scalefactors(const uint8_t *hdr, uint8_t *ist_pos, bs_t *bs, const L3_gr_info_t *gr, float *scf, int ch)
653 {
654     static const uint8_t g_scf_partitions[3][28] = {
655         { 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 },
656         { 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 },
657         { 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 }
658     };
659     const uint8_t *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb];
660     uint8_t scf_size[4], iscf[40];
661     int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi;
662     float gain;
663 
664     if (HDR_TEST_MPEG1(hdr))
665     {
666         static const uint8_t g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 };
667         int part = g_scfc_decode[gr->scalefac_compress];
668         scf_size[1] = scf_size[0] = (uint8_t)(part >> 2);
669         scf_size[3] = scf_size[2] = (uint8_t)(part & 3);
670     } else
671     {
672         static const uint8_t g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 };
673         int k, modprod, sfc, ist = HDR_TEST_I_STEREO(hdr) && ch;
674         sfc = gr->scalefac_compress >> ist;
675         for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4)
676         {
677             for (modprod = 1, i = 3; i >= 0; i--)
678             {
679                 scf_size[i] = (uint8_t)(sfc / modprod % g_mod[k + i]);
680                 modprod *= g_mod[k + i];
681             }
682         }
683         scf_partition += k;
684         scfsi = -16;
685     }
686     L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi);
687 
688     if (gr->n_short_sfb)
689     {
690         int sh = 3 - scf_shift;
691         for (i = 0; i < gr->n_short_sfb; i += 3)
692         {
693             iscf[gr->n_long_sfb + i + 0] += gr->subblock_gain[0] << sh;
694             iscf[gr->n_long_sfb + i + 1] += gr->subblock_gain[1] << sh;
695             iscf[gr->n_long_sfb + i + 2] += gr->subblock_gain[2] << sh;
696         }
697     } else if (gr->preflag)
698     {
699         static const uint8_t g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 };
700         for (i = 0; i < 10; i++)
701         {
702             iscf[11 + i] += g_preamp[i];
703         }
704     }
705 
706     gain_exp = gr->global_gain + BITS_DEQUANTIZER_OUT*4 - 210 - (HDR_IS_MS_STEREO(hdr) ? 2 : 0);
707     gain = L3_ldexp_q2(1 << (MAX_SCFI/4),  MAX_SCFI - gain_exp);
708     for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++)
709     {
710         scf[i] = L3_ldexp_q2(gain, iscf[i] << scf_shift);
711     }
712 }
713 
714 static const float g_pow43[129 + 16] = {
715     0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f,
716     0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f
717 };
718 
L3_pow_43(int x)719 static float L3_pow_43(int x)
720 {
721     float frac;
722     int sign, mult = 256;
723 
724     if (x < 129)
725     {
726         return g_pow43[16 + x];
727     }
728 
729     if (x < 1024)
730     {
731         mult = 16;
732         x <<= 3;
733     }
734 
735     sign = 2*x & 64;
736     frac = (float)((x & 63) - sign) / ((x & ~63) + sign);
737     return g_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult;
738 }
739 
L3_huffman(float * dst,bs_t * bs,const L3_gr_info_t * gr_info,const float * scf,int layer3gr_limit)740 static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const float *scf, int layer3gr_limit)
741 {
742     static const int16_t tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
743         785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
744         -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288,
745         -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288,
746         -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258,
747         -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259,
748         -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258,
749         -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258,
750         -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259,
751         -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258,
752         -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290,
753         -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259,
754         -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258,
755         -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259,
756         -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258,
757         -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 };
758     static const uint8_t tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205 };
759     static const uint8_t tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 };
760     static const int16_t tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 };
761     static const uint8_t g_linbits[] =  { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 };
762 
763 #define PEEK_BITS(n)  (bs_cache >> (32 - n))
764 #define FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); }
765 #define CHECK_BITS    while (bs_sh >= 0) { bs_cache |= (uint32_t)*bs_next_ptr++ << bs_sh; bs_sh -= 8; }
766 #define BSPOS         ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh)
767 
768     float one = 0.0f;
769     int ireg = 0, big_val_cnt = gr_info->big_values;
770     const uint8_t *sfb = gr_info->sfbtab;
771     const uint8_t *bs_next_ptr = bs->buf + bs->pos/8;
772     uint32_t bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7);
773     int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8;
774     bs_next_ptr += 4;
775 
776     while (big_val_cnt > 0)
777     {
778         int tab_num = gr_info->table_select[ireg];
779         int sfb_cnt = gr_info->region_count[ireg++];
780         const int16_t *codebook = tabs + tabindex[tab_num];
781         int linbits = g_linbits[tab_num];
782         if (linbits)
783         {
784             do
785             {
786                 np = *sfb++ / 2;
787                 pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
788                 one = *scf++;
789                 do
790                 {
791                     int j, w = 5;
792                     int leaf = codebook[PEEK_BITS(w)];
793                     while (leaf < 0)
794                     {
795                         FLUSH_BITS(w);
796                         w = leaf & 7;
797                         leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
798                     }
799                     FLUSH_BITS(leaf >> 8);
800 
801                     for (j = 0; j < 2; j++, dst++, leaf >>= 4)
802                     {
803                         int lsb = leaf & 0x0F;
804                         if (lsb == 15)
805                         {
806                             lsb += PEEK_BITS(linbits);
807                             FLUSH_BITS(linbits);
808                             CHECK_BITS;
809                             *dst = one*L3_pow_43(lsb)*((int32_t)bs_cache < 0 ? -1: 1);
810                         } else
811                         {
812                             *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
813                         }
814                         FLUSH_BITS(lsb ? 1 : 0);
815                     }
816                     CHECK_BITS;
817                 } while (--pairs_to_decode);
818             } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
819         } else
820         {
821             do
822             {
823                 np = *sfb++ / 2;
824                 pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
825                 one = *scf++;
826                 do
827                 {
828                     int j, w = 5;
829                     int leaf = codebook[PEEK_BITS(w)];
830                     while (leaf < 0)
831                     {
832                         FLUSH_BITS(w);
833                         w = leaf & 7;
834                         leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
835                     }
836                     FLUSH_BITS(leaf >> 8);
837 
838                     for (j = 0; j < 2; j++, dst++, leaf >>= 4)
839                     {
840                         int lsb = leaf & 0x0F;
841                         *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
842                         FLUSH_BITS(lsb ? 1 : 0);
843                     }
844                     CHECK_BITS;
845                 } while (--pairs_to_decode);
846             } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
847         }
848     }
849 
850     for (np = 1 - big_val_cnt;; dst += 4)
851     {
852         const uint8_t *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32;
853         int leaf = codebook_count1[PEEK_BITS(4)];
854         if (!(leaf & 8))
855         {
856             leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))];
857         }
858         FLUSH_BITS(leaf & 7);
859         if (BSPOS > layer3gr_limit)
860         {
861             break;
862         }
863 #define RELOAD_SCALEFACTOR  if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; }
864 #define DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((int32_t)bs_cache < 0) ? -one : one; FLUSH_BITS(1) }
865         RELOAD_SCALEFACTOR;
866         DEQ_COUNT1(0);
867         DEQ_COUNT1(1);
868         RELOAD_SCALEFACTOR;
869         DEQ_COUNT1(2);
870         DEQ_COUNT1(3);
871         CHECK_BITS;
872     }
873 
874     bs->pos = layer3gr_limit;
875 }
876 
L3_midside_stereo(float * left,int n)877 static void L3_midside_stereo(float *left, int n)
878 {
879     int i = 0;
880     float *right = left + 576;
881 #if HAVE_SIMD
882     if (have_simd()) for (; i < n - 3; i += 4)
883     {
884         f4 vl = VLD(left + i);
885         f4 vr = VLD(right + i);
886         VSTORE(left + i, VADD(vl, vr));
887         VSTORE(right + i, VSUB(vl, vr));
888     }
889 #endif /* HAVE_SIMD */
890     for (; i < n; i++)
891     {
892         float a = left[i];
893         float b = right[i];
894         left[i] = a + b;
895         right[i] = a - b;
896     }
897 }
898 
L3_intensity_stereo_band(float * left,int n,float kl,float kr)899 static void L3_intensity_stereo_band(float *left, int n, float kl, float kr)
900 {
901     int i;
902     for (i = 0; i < n; i++)
903     {
904         left[i + 576] = left[i]*kr;
905         left[i] = left[i]*kl;
906     }
907 }
908 
L3_stereo_top_band(const float * right,const uint8_t * sfb,int nbands,int max_band[3])909 static void L3_stereo_top_band(const float *right, const uint8_t *sfb, int nbands, int max_band[3])
910 {
911     int i, k;
912 
913     max_band[0] = max_band[1] = max_band[2] = -1;
914 
915     for (i = 0; i < nbands; i++)
916     {
917         for (k = 0; k < sfb[i]; k += 2)
918         {
919             if (right[k] != 0 || right[k + 1] != 0)
920             {
921                 max_band[i % 3] = i;
922                 break;
923             }
924         }
925         right += sfb[i];
926     }
927 }
928 
L3_stereo_process(float * left,const uint8_t * ist_pos,const uint8_t * sfb,const uint8_t * hdr,int max_band[3],int mpeg2_sh)929 static void L3_stereo_process(float *left, const uint8_t *ist_pos, const uint8_t *sfb, const uint8_t *hdr, int max_band[3], int mpeg2_sh)
930 {
931     static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 };
932     unsigned i, max_pos = HDR_TEST_MPEG1(hdr) ? 7 : 64;
933 
934     for (i = 0; sfb[i]; i++)
935     {
936         unsigned ipos = ist_pos[i];
937         if ((int)i > max_band[i % 3] && ipos < max_pos)
938         {
939             float kl, kr, s = HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1;
940             if (HDR_TEST_MPEG1(hdr))
941             {
942                 kl = g_pan[2*ipos];
943                 kr = g_pan[2*ipos + 1];
944             } else
945             {
946                 kl = 1;
947                 kr = L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh);
948                 if (ipos & 1)
949                 {
950                     kl = kr;
951                     kr = 1;
952                 }
953             }
954             L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s);
955         } else if (HDR_TEST_MS_STEREO(hdr))
956         {
957             L3_midside_stereo(left, sfb[i]);
958         }
959         left += sfb[i];
960     }
961 }
962 
L3_intensity_stereo(float * left,uint8_t * ist_pos,const L3_gr_info_t * gr,const uint8_t * hdr)963 static void L3_intensity_stereo(float *left, uint8_t *ist_pos, const L3_gr_info_t *gr, const uint8_t *hdr)
964 {
965     int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb;
966     int i, max_blocks = gr->n_short_sfb ? 3 : 1;
967 
968     L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band);
969     if (gr->n_long_sfb)
970     {
971         max_band[0] = max_band[1] = max_band[2] = MINIMP3_MAX(MINIMP3_MAX(max_band[0], max_band[1]), max_band[2]);
972     }
973     for (i = 0; i < max_blocks; i++)
974     {
975         int default_pos = HDR_TEST_MPEG1(hdr) ? 3 : 0;
976         int itop = n_sfb - max_blocks + i;
977         int prev = itop - max_blocks;
978         ist_pos[itop] = max_band[i] >= prev ? default_pos : ist_pos[prev];
979     }
980     L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1);
981 }
982 
L3_reorder(float * grbuf,float * scratch,const uint8_t * sfb)983 static void L3_reorder(float *grbuf, float *scratch, const uint8_t *sfb)
984 {
985     int i, len;
986     float *src = grbuf, *dst = scratch;
987 
988     for (;0 != (len = *sfb); sfb += 3, src += 2*len)
989     {
990         for (i = 0; i < len; i++, src++)
991         {
992             *dst++ = src[0*len];
993             *dst++ = src[1*len];
994             *dst++ = src[2*len];
995         }
996     }
997     memcpy(grbuf, scratch, (dst - scratch)*sizeof(float));
998 }
999 
L3_antialias(float * grbuf,int nbands)1000 static void L3_antialias(float *grbuf, int nbands)
1001 {
1002     static const float g_aa[2][8] = {
1003         {0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f},
1004         {0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f}
1005     };
1006 
1007     for (; nbands > 0; nbands--, grbuf += 18)
1008     {
1009         int i = 0;
1010 #if HAVE_SIMD
1011         if (have_simd()) for (; i < 8; i += 4)
1012         {
1013             f4 vu = VLD(grbuf + 18 + i);
1014             f4 vd = VLD(grbuf + 14 - i);
1015             f4 vc0 = VLD(g_aa[0] + i);
1016             f4 vc1 = VLD(g_aa[1] + i);
1017             vd = VREV(vd);
1018             VSTORE(grbuf + 18 + i, VSUB(VMUL(vu, vc0), VMUL(vd, vc1)));
1019             vd = VADD(VMUL(vu, vc1), VMUL(vd, vc0));
1020             VSTORE(grbuf + 14 - i, VREV(vd));
1021         }
1022 #endif /* HAVE_SIMD */
1023 #ifndef MINIMP3_ONLY_SIMD
1024         for(; i < 8; i++)
1025         {
1026             float u = grbuf[18 + i];
1027             float d = grbuf[17 - i];
1028             grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i];
1029             grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i];
1030         }
1031 #endif /* MINIMP3_ONLY_SIMD */
1032     }
1033 }
1034 
L3_dct3_9(float * y)1035 static void L3_dct3_9(float *y)
1036 {
1037     float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4;
1038 
1039     s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8];
1040     t0 = s0 + s6*0.5f;
1041     s0 -= s6;
1042     t4 = (s4 + s2)*0.93969262f;
1043     t2 = (s8 + s2)*0.76604444f;
1044     s6 = (s4 - s8)*0.17364818f;
1045     s4 += s8 - s2;
1046 
1047     s2 = s0 - s4*0.5f;
1048     y[4] = s4 + s0;
1049     s8 = t0 - t2 + s6;
1050     s0 = t0 - t4 + t2;
1051     s4 = t0 + t4 - s6;
1052 
1053     s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7];
1054 
1055     s3 *= 0.86602540f;
1056     t0 = (s5 + s1)*0.98480775f;
1057     t4 = (s5 - s7)*0.34202014f;
1058     t2 = (s1 + s7)*0.64278761f;
1059     s1 = (s1 - s5 - s7)*0.86602540f;
1060 
1061     s5 = t0 - s3 - t2;
1062     s7 = t4 - s3 - t0;
1063     s3 = t4 + s3 - t2;
1064 
1065     y[0] = s4 - s7;
1066     y[1] = s2 + s1;
1067     y[2] = s0 - s3;
1068     y[3] = s8 + s5;
1069     y[5] = s8 - s5;
1070     y[6] = s0 + s3;
1071     y[7] = s2 - s1;
1072     y[8] = s4 + s7;
1073 }
1074 
L3_imdct36(float * grbuf,float * overlap,const float * window,int nbands)1075 static void L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands)
1076 {
1077     int i, j;
1078     static const float g_twid9[18] = {
1079         0.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f
1080     };
1081 
1082     for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9)
1083     {
1084         float co[9], si[9];
1085         co[0] = -grbuf[0];
1086         si[0] = grbuf[17];
1087         for (i = 0; i < 4; i++)
1088         {
1089             si[8 - 2*i] =   grbuf[4*i + 1] - grbuf[4*i + 2];
1090             co[1 + 2*i] =   grbuf[4*i + 1] + grbuf[4*i + 2];
1091             si[7 - 2*i] =   grbuf[4*i + 4] - grbuf[4*i + 3];
1092             co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]);
1093         }
1094         L3_dct3_9(co);
1095         L3_dct3_9(si);
1096 
1097         si[1] = -si[1];
1098         si[3] = -si[3];
1099         si[5] = -si[5];
1100         si[7] = -si[7];
1101 
1102         i = 0;
1103 
1104 #if HAVE_SIMD
1105         if (have_simd()) for (; i < 8; i += 4)
1106         {
1107             f4 vovl = VLD(overlap + i);
1108             f4 vc = VLD(co + i);
1109             f4 vs = VLD(si + i);
1110             f4 vr0 = VLD(g_twid9 + i);
1111             f4 vr1 = VLD(g_twid9 + 9 + i);
1112             f4 vw0 = VLD(window + i);
1113             f4 vw1 = VLD(window + 9 + i);
1114             f4 vsum = VADD(VMUL(vc, vr1), VMUL(vs, vr0));
1115             VSTORE(overlap + i, VSUB(VMUL(vc, vr0), VMUL(vs, vr1)));
1116             VSTORE(grbuf + i, VSUB(VMUL(vovl, vw0), VMUL(vsum, vw1)));
1117             vsum = VADD(VMUL(vovl, vw1), VMUL(vsum, vw0));
1118             VSTORE(grbuf + 14 - i, VREV(vsum));
1119         }
1120 #endif /* HAVE_SIMD */
1121         for (; i < 9; i++)
1122         {
1123             float ovl  = overlap[i];
1124             float sum  = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i];
1125             overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i];
1126             grbuf[i]      = ovl*window[0 + i] - sum*window[9 + i];
1127             grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i];
1128         }
1129     }
1130 }
1131 
L3_idct3(float x0,float x1,float x2,float * dst)1132 static void L3_idct3(float x0, float x1, float x2, float *dst)
1133 {
1134     float m1 = x1*0.86602540f;
1135     float a1 = x0 - x2*0.5f;
1136     dst[1] = x0 + x2;
1137     dst[0] = a1 + m1;
1138     dst[2] = a1 - m1;
1139 }
1140 
L3_imdct12(float * x,float * dst,float * overlap)1141 static void L3_imdct12(float *x, float *dst, float *overlap)
1142 {
1143     static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f };
1144     float co[3], si[3];
1145     int i;
1146 
1147     L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co);
1148     L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si);
1149     si[1] = -si[1];
1150 
1151     for (i = 0; i < 3; i++)
1152     {
1153         float ovl  = overlap[i];
1154         float sum  = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i];
1155         overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i];
1156         dst[i]     = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i];
1157         dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i];
1158     }
1159 }
1160 
L3_imdct_short(float * grbuf,float * overlap,int nbands)1161 static void L3_imdct_short(float *grbuf, float *overlap, int nbands)
1162 {
1163     for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)
1164     {
1165         float tmp[18];
1166         memcpy(tmp, grbuf, sizeof(tmp));
1167         memcpy(grbuf, overlap, 6*sizeof(float));
1168         L3_imdct12(tmp, grbuf + 6, overlap + 6);
1169         L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);
1170         L3_imdct12(tmp + 2, overlap, overlap + 6);
1171     }
1172 }
1173 
L3_change_sign(float * grbuf)1174 static void L3_change_sign(float *grbuf)
1175 {
1176     int b, i;
1177     for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36)
1178         for (i = 1; i < 18; i += 2)
1179             grbuf[i] = -grbuf[i];
1180 }
1181 
L3_imdct_gr(float * grbuf,float * overlap,unsigned block_type,unsigned n_long_bands)1182 static void L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands)
1183 {
1184     static const float g_mdct_window[2][18] = {
1185         { 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f },
1186         { 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f }
1187     };
1188     if (n_long_bands)
1189     {
1190         L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands);
1191         grbuf += 18*n_long_bands;
1192         overlap += 9*n_long_bands;
1193     }
1194     if (block_type == SHORT_BLOCK_TYPE)
1195         L3_imdct_short(grbuf, overlap, 32 - n_long_bands);
1196     else
1197         L3_imdct36(grbuf, overlap, g_mdct_window[block_type == STOP_BLOCK_TYPE], 32 - n_long_bands);
1198 }
1199 
L3_save_reservoir(mp3dec_t * h,mp3dec_scratch_t * s)1200 static void L3_save_reservoir(mp3dec_t *h, mp3dec_scratch_t *s)
1201 {
1202     int pos = (s->bs.pos + 7)/8u;
1203     int remains = s->bs.limit/8u - pos;
1204     if (remains > MAX_BITRESERVOIR_BYTES)
1205     {
1206         pos += remains - MAX_BITRESERVOIR_BYTES;
1207         remains = MAX_BITRESERVOIR_BYTES;
1208     }
1209     if (remains > 0)
1210     {
1211         memmove(h->reserv_buf, s->maindata + pos, remains);
1212     }
1213     h->reserv = remains;
1214 }
1215 
L3_restore_reservoir(mp3dec_t * h,bs_t * bs,mp3dec_scratch_t * s,int main_data_begin)1216 static int L3_restore_reservoir(mp3dec_t *h, bs_t *bs, mp3dec_scratch_t *s, int main_data_begin)
1217 {
1218     int frame_bytes = (bs->limit - bs->pos)/8;
1219     int bytes_have = MINIMP3_MIN(h->reserv, main_data_begin);
1220     memcpy(s->maindata, h->reserv_buf + MINIMP3_MAX(0, h->reserv - main_data_begin), MINIMP3_MIN(h->reserv, main_data_begin));
1221     memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
1222     bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);
1223     return h->reserv >= main_data_begin;
1224 }
1225 
L3_decode(mp3dec_t * h,mp3dec_scratch_t * s,L3_gr_info_t * gr_info,int nch)1226 static void L3_decode(mp3dec_t *h, mp3dec_scratch_t *s, L3_gr_info_t *gr_info, int nch)
1227 {
1228     int ch;
1229 
1230     for (ch = 0; ch < nch; ch++)
1231     {
1232         int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length;
1233         L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch);
1234         L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit);
1235     }
1236 
1237     if (HDR_TEST_I_STEREO(h->header))
1238     {
1239         L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header);
1240     } else if (HDR_IS_MS_STEREO(h->header))
1241     {
1242         L3_midside_stereo(s->grbuf[0], 576);
1243     }
1244 
1245     for (ch = 0; ch < nch; ch++, gr_info++)
1246     {
1247         int aa_bands = 31;
1248         int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(HDR_GET_MY_SAMPLE_RATE(h->header) == 2);
1249 
1250         if (gr_info->n_short_sfb)
1251         {
1252             aa_bands = n_long_bands - 1;
1253             L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb);
1254         }
1255 
1256         L3_antialias(s->grbuf[ch], aa_bands);
1257         L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands);
1258         L3_change_sign(s->grbuf[ch]);
1259     }
1260 }
1261 
mp3d_DCT_II(float * grbuf,int n)1262 static void mp3d_DCT_II(float *grbuf, int n)
1263 {
1264     static const float g_sec[24] = {
1265         10.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f
1266     };
1267     int i, k = 0;
1268 #if HAVE_SIMD
1269     if (have_simd()) for (; k < n; k += 4)
1270     {
1271         f4 t[4][8], *x;
1272         float *y = grbuf + k;
1273 
1274         for (x = t[0], i = 0; i < 8; i++, x++)
1275         {
1276             f4 x0 = VLD(&y[i*18]);
1277             f4 x1 = VLD(&y[(15 - i)*18]);
1278             f4 x2 = VLD(&y[(16 + i)*18]);
1279             f4 x3 = VLD(&y[(31 - i)*18]);
1280             f4 t0 = VADD(x0, x3);
1281             f4 t1 = VADD(x1, x2);
1282             f4 t2 = VMUL_S(VSUB(x1, x2), g_sec[3*i + 0]);
1283             f4 t3 = VMUL_S(VSUB(x0, x3), g_sec[3*i + 1]);
1284             x[0] = VADD(t0, t1);
1285             x[8] = VMUL_S(VSUB(t0, t1), g_sec[3*i + 2]);
1286             x[16] = VADD(t3, t2);
1287             x[24] = VMUL_S(VSUB(t3, t2), g_sec[3*i + 2]);
1288         }
1289         for (x = t[0], i = 0; i < 4; i++, x += 8)
1290         {
1291             f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1292             xt = VSUB(x0, x7); x0 = VADD(x0, x7);
1293             x7 = VSUB(x1, x6); x1 = VADD(x1, x6);
1294             x6 = VSUB(x2, x5); x2 = VADD(x2, x5);
1295             x5 = VSUB(x3, x4); x3 = VADD(x3, x4);
1296             x4 = VSUB(x0, x3); x0 = VADD(x0, x3);
1297             x3 = VSUB(x1, x2); x1 = VADD(x1, x2);
1298             x[0] = VADD(x0, x1);
1299             x[4] = VMUL_S(VSUB(x0, x1), 0.70710677f);
1300             x5 = VADD(x5, x6);
1301             x6 = VMUL_S(VADD(x6, x7), 0.70710677f);
1302             x7 = VADD(x7, xt);
1303             x3 = VMUL_S(VADD(x3, x4), 0.70710677f);
1304             x5 = VSUB(x5, VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */
1305             x7 = VADD(x7, VMUL_S(x5, 0.382683432f));
1306             x5 = VSUB(x5, VMUL_S(x7, 0.198912367f));
1307             x0 = VSUB(xt, x6); xt = VADD(xt, x6);
1308             x[1] = VMUL_S(VADD(xt, x7), 0.50979561f);
1309             x[2] = VMUL_S(VADD(x4, x3), 0.54119611f);
1310             x[3] = VMUL_S(VSUB(x0, x5), 0.60134488f);
1311             x[5] = VMUL_S(VADD(x0, x5), 0.89997619f);
1312             x[6] = VMUL_S(VSUB(x4, x3), 1.30656302f);
1313             x[7] = VMUL_S(VSUB(xt, x7), 2.56291556f);
1314         }
1315 
1316         if (k > n - 3)
1317         {
1318 #if HAVE_SSE
1319 #define VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v)
1320 #else /* HAVE_SSE */
1321 #define VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18],  vget_low_f32(v))
1322 #endif /* HAVE_SSE */
1323             for (i = 0; i < 7; i++, y += 4*18)
1324             {
1325                 f4 s = VADD(t[3][i], t[3][i + 1]);
1326                 VSAVE2(0, t[0][i]);
1327                 VSAVE2(1, VADD(t[2][i], s));
1328                 VSAVE2(2, VADD(t[1][i], t[1][i + 1]));
1329                 VSAVE2(3, VADD(t[2][1 + i], s));
1330             }
1331             VSAVE2(0, t[0][7]);
1332             VSAVE2(1, VADD(t[2][7], t[3][7]));
1333             VSAVE2(2, t[1][7]);
1334             VSAVE2(3, t[3][7]);
1335         } else
1336         {
1337 #define VSAVE4(i, v) VSTORE(&y[i*18], v)
1338             for (i = 0; i < 7; i++, y += 4*18)
1339             {
1340                 f4 s = VADD(t[3][i], t[3][i + 1]);
1341                 VSAVE4(0, t[0][i]);
1342                 VSAVE4(1, VADD(t[2][i], s));
1343                 VSAVE4(2, VADD(t[1][i], t[1][i + 1]));
1344                 VSAVE4(3, VADD(t[2][1 + i], s));
1345             }
1346             VSAVE4(0, t[0][7]);
1347             VSAVE4(1, VADD(t[2][7], t[3][7]));
1348             VSAVE4(2, t[1][7]);
1349             VSAVE4(3, t[3][7]);
1350         }
1351     } else
1352 #endif /* HAVE_SIMD */
1353 #ifdef MINIMP3_ONLY_SIMD
1354     {}
1355 #else /* MINIMP3_ONLY_SIMD */
1356     for (; k < n; k++)
1357     {
1358         float t[4][8], *x, *y = grbuf + k;
1359 
1360         for (x = t[0], i = 0; i < 8; i++, x++)
1361         {
1362             float x0 = y[i*18];
1363             float x1 = y[(15 - i)*18];
1364             float x2 = y[(16 + i)*18];
1365             float x3 = y[(31 - i)*18];
1366             float t0 = x0 + x3;
1367             float t1 = x1 + x2;
1368             float t2 = (x1 - x2)*g_sec[3*i + 0];
1369             float t3 = (x0 - x3)*g_sec[3*i + 1];
1370             x[0] = t0 + t1;
1371             x[8] = (t0 - t1)*g_sec[3*i + 2];
1372             x[16] = t3 + t2;
1373             x[24] = (t3 - t2)*g_sec[3*i + 2];
1374         }
1375         for (x = t[0], i = 0; i < 4; i++, x += 8)
1376         {
1377             float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1378             xt = x0 - x7; x0 += x7;
1379             x7 = x1 - x6; x1 += x6;
1380             x6 = x2 - x5; x2 += x5;
1381             x5 = x3 - x4; x3 += x4;
1382             x4 = x0 - x3; x0 += x3;
1383             x3 = x1 - x2; x1 += x2;
1384             x[0] = x0 + x1;
1385             x[4] = (x0 - x1)*0.70710677f;
1386             x5 =  x5 + x6;
1387             x6 = (x6 + x7)*0.70710677f;
1388             x7 =  x7 + xt;
1389             x3 = (x3 + x4)*0.70710677f;
1390             x5 -= x7*0.198912367f;  /* rotate by PI/8 */
1391             x7 += x5*0.382683432f;
1392             x5 -= x7*0.198912367f;
1393             x0 = xt - x6; xt += x6;
1394             x[1] = (xt + x7)*0.50979561f;
1395             x[2] = (x4 + x3)*0.54119611f;
1396             x[3] = (x0 - x5)*0.60134488f;
1397             x[5] = (x0 + x5)*0.89997619f;
1398             x[6] = (x4 - x3)*1.30656302f;
1399             x[7] = (xt - x7)*2.56291556f;
1400 
1401         }
1402         for (i = 0; i < 7; i++, y += 4*18)
1403         {
1404             y[0*18] = t[0][i];
1405             y[1*18] = t[2][i] + t[3][i] + t[3][i + 1];
1406             y[2*18] = t[1][i] + t[1][i + 1];
1407             y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1];
1408         }
1409         y[0*18] = t[0][7];
1410         y[1*18] = t[2][7] + t[3][7];
1411         y[2*18] = t[1][7];
1412         y[3*18] = t[3][7];
1413     }
1414 #endif /* MINIMP3_ONLY_SIMD */
1415 }
1416 
1417 #ifndef MINIMP3_FLOAT_OUTPUT
mp3d_scale_pcm(float sample)1418 static int16_t mp3d_scale_pcm(float sample)
1419 {
1420 #if HAVE_ARMV6
1421     int32_t s32 = (int32_t)(sample + .5f);
1422     s32 -= (s32 < 0);
1423     int16_t s = (int16_t)minimp3_clip_int16_arm(s32);
1424 #else
1425     if (sample >=  32766.5) return (int16_t) 32767;
1426     if (sample <= -32767.5) return (int16_t)-32768;
1427     int16_t s = (int16_t)(sample + .5f);
1428     s -= (s < 0);   /* away from zero, to be compliant */
1429 #endif
1430     return s;
1431 }
1432 #else /* MINIMP3_FLOAT_OUTPUT */
mp3d_scale_pcm(float sample)1433 static float mp3d_scale_pcm(float sample)
1434 {
1435     return sample*(1.f/32768.f);
1436 }
1437 #endif /* MINIMP3_FLOAT_OUTPUT */
1438 
mp3d_synth_pair(mp3d_sample_t * pcm,int nch,const float * z)1439 static void mp3d_synth_pair(mp3d_sample_t *pcm, int nch, const float *z)
1440 {
1441     float a;
1442     a  = (z[14*64] - z[    0]) * 29;
1443     a += (z[ 1*64] + z[13*64]) * 213;
1444     a += (z[12*64] - z[ 2*64]) * 459;
1445     a += (z[ 3*64] + z[11*64]) * 2037;
1446     a += (z[10*64] - z[ 4*64]) * 5153;
1447     a += (z[ 5*64] + z[ 9*64]) * 6574;
1448     a += (z[ 8*64] - z[ 6*64]) * 37489;
1449     a +=  z[ 7*64]             * 75038;
1450     pcm[0] = mp3d_scale_pcm(a);
1451 
1452     z += 2;
1453     a  = z[14*64] * 104;
1454     a += z[12*64] * 1567;
1455     a += z[10*64] * 9727;
1456     a += z[ 8*64] * 64019;
1457     a += z[ 6*64] * -9975;
1458     a += z[ 4*64] * -45;
1459     a += z[ 2*64] * 146;
1460     a += z[ 0*64] * -5;
1461     pcm[16*nch] = mp3d_scale_pcm(a);
1462 }
1463 
mp3d_synth(float * xl,mp3d_sample_t * dstl,int nch,float * lins)1464 static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins)
1465 {
1466     int i;
1467     float *xr = xl + 576*(nch - 1);
1468     mp3d_sample_t *dstr = dstl + (nch - 1);
1469 
1470     static const float g_win[] = {
1471         -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992,
1472         -1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856,
1473         -1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630,
1474         -1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313,
1475         -1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908,
1476         -1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415,
1477         -2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835,
1478         -2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169,
1479         -2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420,
1480         -2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590,
1481         -3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679,
1482         -3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692,
1483         -4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629,
1484         -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494,
1485         -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290
1486     };
1487     float *zlin = lins + 15*64;
1488     const float *w = g_win;
1489 
1490     zlin[4*15]     = xl[18*16];
1491     zlin[4*15 + 1] = xr[18*16];
1492     zlin[4*15 + 2] = xl[0];
1493     zlin[4*15 + 3] = xr[0];
1494 
1495     zlin[4*31]     = xl[1 + 18*16];
1496     zlin[4*31 + 1] = xr[1 + 18*16];
1497     zlin[4*31 + 2] = xl[1];
1498     zlin[4*31 + 3] = xr[1];
1499 
1500     mp3d_synth_pair(dstr, nch, lins + 4*15 + 1);
1501     mp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1);
1502     mp3d_synth_pair(dstl, nch, lins + 4*15);
1503     mp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64);
1504 
1505 #if HAVE_SIMD
1506     if (have_simd()) for (i = 14; i >= 0; i--)
1507     {
1508 #define VLOAD(k) f4 w0 = VSET(*w++); f4 w1 = VSET(*w++); f4 vz = VLD(&zlin[4*i - 64*k]); f4 vy = VLD(&zlin[4*i - 64*(15 - k)]);
1509 #define V0(k) { VLOAD(k) b =         VADD(VMUL(vz, w1), VMUL(vy, w0)) ; a =         VSUB(VMUL(vz, w0), VMUL(vy, w1));  }
1510 #define V1(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vz, w0), VMUL(vy, w1))); }
1511 #define V2(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vy, w1), VMUL(vz, w0))); }
1512         f4 a, b;
1513         zlin[4*i]     = xl[18*(31 - i)];
1514         zlin[4*i + 1] = xr[18*(31 - i)];
1515         zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1516         zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1517         zlin[4*i + 64] = xl[1 + 18*(1 + i)];
1518         zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)];
1519         zlin[4*i - 64 + 2] = xl[18*(1 + i)];
1520         zlin[4*i - 64 + 3] = xr[18*(1 + i)];
1521 
1522         V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7)
1523 
1524         {
1525 #ifndef MINIMP3_FLOAT_OUTPUT
1526 #if HAVE_SSE
1527             static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
1528             static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
1529             __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
1530                                            _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1531             dstr[(15 - i)*nch] = _mm_extract_epi16(pcm8, 1);
1532             dstr[(17 + i)*nch] = _mm_extract_epi16(pcm8, 5);
1533             dstl[(15 - i)*nch] = _mm_extract_epi16(pcm8, 0);
1534             dstl[(17 + i)*nch] = _mm_extract_epi16(pcm8, 4);
1535             dstr[(47 - i)*nch] = _mm_extract_epi16(pcm8, 3);
1536             dstr[(49 + i)*nch] = _mm_extract_epi16(pcm8, 7);
1537             dstl[(47 - i)*nch] = _mm_extract_epi16(pcm8, 2);
1538             dstl[(49 + i)*nch] = _mm_extract_epi16(pcm8, 6);
1539 #else /* HAVE_SSE */
1540             int16x4_t pcma, pcmb;
1541             a = VADD(a, VSET(0.5f));
1542             b = VADD(b, VSET(0.5f));
1543             pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1544             pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1545             vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1);
1546             vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1);
1547             vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0);
1548             vst1_lane_s16(dstl + (17 + i)*nch, pcmb, 0);
1549             vst1_lane_s16(dstr + (47 - i)*nch, pcma, 3);
1550             vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3);
1551             vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2);
1552             vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2);
1553 #endif /* HAVE_SSE */
1554 
1555 #else /* MINIMP3_FLOAT_OUTPUT */
1556 
1557             static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };
1558             a = VMUL(a, g_scale);
1559             b = VMUL(b, g_scale);
1560 #if HAVE_SSE
1561             _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));
1562             _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
1563             _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));
1564             _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));
1565             _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));
1566             _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));
1567             _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));
1568             _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));
1569 #else /* HAVE_SSE */
1570             vst1q_lane_f32(dstr + (15 - i)*nch, a, 1);
1571             vst1q_lane_f32(dstr + (17 + i)*nch, b, 1);
1572             vst1q_lane_f32(dstl + (15 - i)*nch, a, 0);
1573             vst1q_lane_f32(dstl + (17 + i)*nch, b, 0);
1574             vst1q_lane_f32(dstr + (47 - i)*nch, a, 3);
1575             vst1q_lane_f32(dstr + (49 + i)*nch, b, 3);
1576             vst1q_lane_f32(dstl + (47 - i)*nch, a, 2);
1577             vst1q_lane_f32(dstl + (49 + i)*nch, b, 2);
1578 #endif /* HAVE_SSE */
1579 #endif /* MINIMP3_FLOAT_OUTPUT */
1580         }
1581     } else
1582 #endif /* HAVE_SIMD */
1583 #ifdef MINIMP3_ONLY_SIMD
1584     {}
1585 #else /* MINIMP3_ONLY_SIMD */
1586     for (i = 14; i >= 0; i--)
1587     {
1588 #define LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64];
1589 #define S0(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j]  = vz[j]*w1 + vy[j]*w0, a[j]  = vz[j]*w0 - vy[j]*w1; }
1590 #define S1(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; }
1591 #define S2(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; }
1592         float a[4], b[4];
1593 
1594         zlin[4*i]     = xl[18*(31 - i)];
1595         zlin[4*i + 1] = xr[18*(31 - i)];
1596         zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1597         zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1598         zlin[4*(i + 16)]   = xl[1 + 18*(1 + i)];
1599         zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)];
1600         zlin[4*(i - 16) + 2] = xl[18*(1 + i)];
1601         zlin[4*(i - 16) + 3] = xr[18*(1 + i)];
1602 
1603         S0(0) S2(1) S1(2) S2(3) S1(4) S2(5) S1(6) S2(7)
1604 
1605         dstr[(15 - i)*nch] = mp3d_scale_pcm(a[1]);
1606         dstr[(17 + i)*nch] = mp3d_scale_pcm(b[1]);
1607         dstl[(15 - i)*nch] = mp3d_scale_pcm(a[0]);
1608         dstl[(17 + i)*nch] = mp3d_scale_pcm(b[0]);
1609         dstr[(47 - i)*nch] = mp3d_scale_pcm(a[3]);
1610         dstr[(49 + i)*nch] = mp3d_scale_pcm(b[3]);
1611         dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]);
1612         dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]);
1613     }
1614 #endif /* MINIMP3_ONLY_SIMD */
1615 }
1616 
mp3d_synth_granule(float * qmf_state,float * grbuf,int nbands,int nch,mp3d_sample_t * pcm,float * lins)1617 static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, mp3d_sample_t *pcm, float *lins)
1618 {
1619     int i;
1620     for (i = 0; i < nch; i++)
1621     {
1622         mp3d_DCT_II(grbuf + 576*i, nbands);
1623     }
1624 
1625     memcpy(lins, qmf_state, sizeof(float)*15*64);
1626 
1627     for (i = 0; i < nbands; i += 2)
1628     {
1629         mp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64);
1630     }
1631 #ifndef MINIMP3_NONSTANDARD_BUT_LOGICAL
1632     if (nch == 1)
1633     {
1634         for (i = 0; i < 15*64; i += 2)
1635         {
1636             qmf_state[i] = lins[nbands*64 + i];
1637         }
1638     } else
1639 #endif /* MINIMP3_NONSTANDARD_BUT_LOGICAL */
1640     {
1641         memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64);
1642     }
1643 }
1644 
mp3d_match_frame(const uint8_t * hdr,int mp3_bytes,int frame_bytes)1645 static int mp3d_match_frame(const uint8_t *hdr, int mp3_bytes, int frame_bytes)
1646 {
1647     int i, nmatch;
1648     for (i = 0, nmatch = 0; nmatch < MAX_FRAME_SYNC_MATCHES; nmatch++)
1649     {
1650         i += hdr_frame_bytes(hdr + i, frame_bytes) + hdr_padding(hdr + i);
1651         if (i + HDR_SIZE > mp3_bytes)
1652             return nmatch > 0;
1653         if (!hdr_compare(hdr, hdr + i))
1654             return 0;
1655     }
1656     return 1;
1657 }
1658 
mp3d_find_frame(const uint8_t * mp3,int mp3_bytes,int * free_format_bytes,int * ptr_frame_bytes)1659 static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes)
1660 {
1661     int i, k;
1662     for (i = 0; i < mp3_bytes - HDR_SIZE; i++, mp3++)
1663     {
1664         if (hdr_valid(mp3))
1665         {
1666             int frame_bytes = hdr_frame_bytes(mp3, *free_format_bytes);
1667             int frame_and_padding = frame_bytes + hdr_padding(mp3);
1668 
1669             for (k = HDR_SIZE; !frame_bytes && k < MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - HDR_SIZE; k++)
1670             {
1671                 if (hdr_compare(mp3, mp3 + k))
1672                 {
1673                     int fb = k - hdr_padding(mp3);
1674                     int nextfb = fb + hdr_padding(mp3 + k);
1675                     if (i + k + nextfb + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + k + nextfb))
1676                         continue;
1677                     frame_and_padding = k;
1678                     frame_bytes = fb;
1679                     *free_format_bytes = fb;
1680                 }
1681             }
1682             if ((frame_bytes && i + frame_and_padding <= mp3_bytes &&
1683                 mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) ||
1684                 (!i && frame_and_padding == mp3_bytes))
1685             {
1686                 *ptr_frame_bytes = frame_and_padding;
1687                 return i;
1688             }
1689             *free_format_bytes = 0;
1690         }
1691     }
1692     *ptr_frame_bytes = 0;
1693     return mp3_bytes;
1694 }
1695 
mp3dec_init(mp3dec_t * dec)1696 void mp3dec_init(mp3dec_t *dec)
1697 {
1698     dec->header[0] = 0;
1699 }
1700 
mp3dec_decode_frame(mp3dec_t * dec,const uint8_t * mp3,int mp3_bytes,mp3d_sample_t * pcm,mp3dec_frame_info_t * info)1701 int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info)
1702 {
1703     int i = 0, igr, frame_size = 0, success = 1;
1704     const uint8_t *hdr;
1705     bs_t bs_frame[1];
1706     mp3dec_scratch_t scratch;
1707 
1708     if (mp3_bytes > 4 && dec->header[0] == 0xff && hdr_compare(dec->header, mp3))
1709     {
1710         frame_size = hdr_frame_bytes(mp3, dec->free_format_bytes) + hdr_padding(mp3);
1711         if (frame_size != mp3_bytes && (frame_size + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + frame_size)))
1712         {
1713             frame_size = 0;
1714         }
1715     }
1716     if (!frame_size)
1717     {
1718         memset(dec, 0, sizeof(mp3dec_t));
1719         i = mp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);
1720         if (!frame_size || i + frame_size > mp3_bytes)
1721         {
1722             info->frame_bytes = i;
1723             return 0;
1724         }
1725     }
1726 
1727     hdr = mp3 + i;
1728     memcpy(dec->header, hdr, HDR_SIZE);
1729     info->frame_bytes = i + frame_size;
1730     info->frame_offset = i;
1731     info->channels = HDR_IS_MONO(hdr) ? 1 : 2;
1732     info->hz = hdr_sample_rate_hz(hdr);
1733     info->layer = 4 - HDR_GET_LAYER(hdr);
1734     info->bitrate_kbps = hdr_bitrate_kbps(hdr);
1735 
1736     if (!pcm)
1737     {
1738         return hdr_frame_samples(hdr);
1739     }
1740 
1741     bs_init(bs_frame, hdr + HDR_SIZE, frame_size - HDR_SIZE);
1742     if (HDR_IS_CRC(hdr))
1743     {
1744         get_bits(bs_frame, 16);
1745     }
1746 
1747     if (info->layer == 3)
1748     {
1749         int main_data_begin = L3_read_side_info(bs_frame, scratch.gr_info, hdr);
1750         if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit)
1751         {
1752             mp3dec_init(dec);
1753             return 0;
1754         }
1755         success = L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin);
1756         if (success)
1757         {
1758             for (igr = 0; igr < (HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm += 576*info->channels)
1759             {
1760                 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1761                 L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
1762                 mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, pcm, scratch.syn[0]);
1763             }
1764         }
1765         L3_save_reservoir(dec, &scratch);
1766     } else
1767     {
1768 #ifdef MINIMP3_ONLY_MP3
1769         return 0;
1770 #else /* MINIMP3_ONLY_MP3 */
1771         L12_scale_info sci[1];
1772         L12_read_scale_info(hdr, bs_frame, sci);
1773 
1774         memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1775         for (i = 0, igr = 0; igr < 3; igr++)
1776         {
1777             if (12 == (i += L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
1778             {
1779                 i = 0;
1780                 L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
1781                 mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, pcm, scratch.syn[0]);
1782                 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1783                 pcm += 384*info->channels;
1784             }
1785             if (bs_frame->pos > bs_frame->limit)
1786             {
1787                 mp3dec_init(dec);
1788                 return 0;
1789             }
1790         }
1791 #endif /* MINIMP3_ONLY_MP3 */
1792     }
1793     return success*hdr_frame_samples(dec->header);
1794 }
1795 
1796 #ifdef MINIMP3_FLOAT_OUTPUT
mp3dec_f32_to_s16(const float * in,int16_t * out,int num_samples)1797 void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples)
1798 {
1799     int i = 0;
1800 #if HAVE_SIMD
1801     int aligned_count = num_samples & ~7;
1802     for(; i < aligned_count; i += 8)
1803     {
1804         static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f };
1805         f4 a = VMUL(VLD(&in[i  ]), g_scale);
1806         f4 b = VMUL(VLD(&in[i+4]), g_scale);
1807 #if HAVE_SSE
1808         static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
1809         static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
1810         __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
1811                                        _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1812         out[i  ] = _mm_extract_epi16(pcm8, 0);
1813         out[i+1] = _mm_extract_epi16(pcm8, 1);
1814         out[i+2] = _mm_extract_epi16(pcm8, 2);
1815         out[i+3] = _mm_extract_epi16(pcm8, 3);
1816         out[i+4] = _mm_extract_epi16(pcm8, 4);
1817         out[i+5] = _mm_extract_epi16(pcm8, 5);
1818         out[i+6] = _mm_extract_epi16(pcm8, 6);
1819         out[i+7] = _mm_extract_epi16(pcm8, 7);
1820 #else /* HAVE_SSE */
1821         int16x4_t pcma, pcmb;
1822         a = VADD(a, VSET(0.5f));
1823         b = VADD(b, VSET(0.5f));
1824         pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1825         pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1826         vst1_lane_s16(out+i  , pcma, 0);
1827         vst1_lane_s16(out+i+1, pcma, 1);
1828         vst1_lane_s16(out+i+2, pcma, 2);
1829         vst1_lane_s16(out+i+3, pcma, 3);
1830         vst1_lane_s16(out+i+4, pcmb, 0);
1831         vst1_lane_s16(out+i+5, pcmb, 1);
1832         vst1_lane_s16(out+i+6, pcmb, 2);
1833         vst1_lane_s16(out+i+7, pcmb, 3);
1834 #endif /* HAVE_SSE */
1835     }
1836 #endif /* HAVE_SIMD */
1837     for(; i < num_samples; i++)
1838     {
1839         float sample = in[i] * 32768.0f;
1840         if (sample >=  32766.5)
1841             out[i] = (int16_t) 32767;
1842         else if (sample <= -32767.5)
1843             out[i] = (int16_t)-32768;
1844         else
1845         {
1846             int16_t s = (int16_t)(sample + .5f);
1847             s -= (s < 0);   /* away from zero, to be compliant */
1848             out[i] = s;
1849         }
1850     }
1851 }
1852 #endif /* MINIMP3_FLOAT_OUTPUT */
1853 #endif /* MINIMP3_IMPLEMENTATION && !_MINIMP3_IMPLEMENTATION_GUARD */
1854