1 #ifndef MINIMP3_H
2 #define MINIMP3_H
3 /*
4 https://github.com/lieff/minimp3
5 To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.
6 This software is distributed without any warranty.
7 See <http://creativecommons.org/publicdomain/zero/1.0/>.
8 */
9 #include <stdint.h>
10
11 #define MINIMP3_MAX_SAMPLES_PER_FRAME (1152*2)
12
13 typedef struct
14 {
15 int frame_bytes, frame_offset, channels, hz, layer, bitrate_kbps;
16 } mp3dec_frame_info_t;
17
18 typedef struct
19 {
20 float mdct_overlap[2][9*32], qmf_state[15*2*32];
21 int reserv, free_format_bytes;
22 unsigned char header[4], reserv_buf[511];
23 } mp3dec_t;
24
25 #ifdef __cplusplus
26 extern "C" {
27 #endif /* __cplusplus */
28
29 void mp3dec_init(mp3dec_t *dec);
30 #ifndef MINIMP3_FLOAT_OUTPUT
31 typedef int16_t mp3d_sample_t;
32 #else /* MINIMP3_FLOAT_OUTPUT */
33 typedef float mp3d_sample_t;
34 void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples);
35 #endif /* MINIMP3_FLOAT_OUTPUT */
36 int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info);
37
38 #ifdef __cplusplus
39 }
40 #endif /* __cplusplus */
41
42 #endif /* MINIMP3_H */
43 #if defined(MINIMP3_IMPLEMENTATION) && !defined(_MINIMP3_IMPLEMENTATION_GUARD)
44 #define _MINIMP3_IMPLEMENTATION_GUARD
45
46 #include <stdlib.h>
47 #include <string.h>
48
49 #define MAX_FREE_FORMAT_FRAME_SIZE 2304 /* more than ISO spec's */
50 #ifndef MAX_FRAME_SYNC_MATCHES
51 #define MAX_FRAME_SYNC_MATCHES 10
52 #endif /* MAX_FRAME_SYNC_MATCHES */
53
54 #define MAX_L3_FRAME_PAYLOAD_BYTES MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */
55
56 #define MAX_BITRESERVOIR_BYTES 511
57 #define SHORT_BLOCK_TYPE 2
58 #define STOP_BLOCK_TYPE 3
59 #define MODE_MONO 3
60 #define MODE_JOINT_STEREO 1
61 #define HDR_SIZE 4
62 #define HDR_IS_MONO(h) (((h[3]) & 0xC0) == 0xC0)
63 #define HDR_IS_MS_STEREO(h) (((h[3]) & 0xE0) == 0x60)
64 #define HDR_IS_FREE_FORMAT(h) (((h[2]) & 0xF0) == 0)
65 #define HDR_IS_CRC(h) (!((h[1]) & 1))
66 #define HDR_TEST_PADDING(h) ((h[2]) & 0x2)
67 #define HDR_TEST_MPEG1(h) ((h[1]) & 0x8)
68 #define HDR_TEST_NOT_MPEG25(h) ((h[1]) & 0x10)
69 #define HDR_TEST_I_STEREO(h) ((h[3]) & 0x10)
70 #define HDR_TEST_MS_STEREO(h) ((h[3]) & 0x20)
71 #define HDR_GET_STEREO_MODE(h) (((h[3]) >> 6) & 3)
72 #define HDR_GET_STEREO_MODE_EXT(h) (((h[3]) >> 4) & 3)
73 #define HDR_GET_LAYER(h) (((h[1]) >> 1) & 3)
74 #define HDR_GET_BITRATE(h) ((h[2]) >> 4)
75 #define HDR_GET_SAMPLE_RATE(h) (((h[2]) >> 2) & 3)
76 #define HDR_GET_MY_SAMPLE_RATE(h) (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3)
77 #define HDR_IS_FRAME_576(h) ((h[1] & 14) == 2)
78 #define HDR_IS_LAYER_1(h) ((h[1] & 6) == 6)
79
80 #define BITS_DEQUANTIZER_OUT -1
81 #define MAX_SCF (255 + BITS_DEQUANTIZER_OUT*4 - 210)
82 #define MAX_SCFI ((MAX_SCF + 3) & ~3)
83
84 #define MINIMP3_MIN(a, b) ((a) > (b) ? (b) : (a))
85 #define MINIMP3_MAX(a, b) ((a) < (b) ? (b) : (a))
86
87 #if !defined(MINIMP3_NO_SIMD)
88
89 #if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(_M_ARM64) || defined(__x86_64__) || defined(__aarch64__))
90 /* x64 always have SSE2, arm64 always have neon, no need for generic code */
91 #define MINIMP3_ONLY_SIMD
92 #endif /* SIMD checks... */
93
94 #if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
95 #if defined(_MSC_VER)
96 #include <intrin.h>
97 #endif /* defined(_MSC_VER) */
98 #include <immintrin.h>
99 #define HAVE_SSE 1
100 #define HAVE_SIMD 1
101 #define VSTORE _mm_storeu_ps
102 #define VLD _mm_loadu_ps
103 #define VSET _mm_set1_ps
104 #define VADD _mm_add_ps
105 #define VSUB _mm_sub_ps
106 #define VMUL _mm_mul_ps
107 #define VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y))
108 #define VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y))
109 #define VMUL_S(x, s) _mm_mul_ps(x, _mm_set1_ps(s))
110 #define VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3))
111 typedef __m128 f4;
112 #if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD)
113 #define minimp3_cpuid __cpuid
114 #else /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
minimp3_cpuid(int CPUInfo[],const int InfoType)115 static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType)
116 {
117 #if defined(__PIC__)
118 __asm__ __volatile__(
119 #if defined(__x86_64__)
120 "push %%rbx\n"
121 "cpuid\n"
122 "xchgl %%ebx, %1\n"
123 "pop %%rbx\n"
124 #else /* defined(__x86_64__) */
125 "xchgl %%ebx, %1\n"
126 "cpuid\n"
127 "xchgl %%ebx, %1\n"
128 #endif /* defined(__x86_64__) */
129 : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
130 : "a" (InfoType));
131 #else /* defined(__PIC__) */
132 __asm__ __volatile__(
133 "cpuid"
134 : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
135 : "a" (InfoType));
136 #endif /* defined(__PIC__)*/
137 }
138 #endif /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
have_simd()139 static int have_simd()
140 {
141 #ifdef MINIMP3_ONLY_SIMD
142 return 1;
143 #else /* MINIMP3_ONLY_SIMD */
144 static int g_have_simd;
145 int CPUInfo[4];
146 #ifdef MINIMP3_TEST
147 static int g_counter;
148 if (g_counter++ > 100)
149 return 0;
150 #endif /* MINIMP3_TEST */
151 if (g_have_simd)
152 goto end;
153 minimp3_cpuid(CPUInfo, 0);
154 g_have_simd = 1;
155 if (CPUInfo[0] > 0)
156 {
157 minimp3_cpuid(CPUInfo, 1);
158 g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */
159 }
160 end:
161 return g_have_simd - 1;
162 #endif /* MINIMP3_ONLY_SIMD */
163 }
164 #elif defined(__ARM_NEON) || defined(__aarch64__)
165 #include <arm_neon.h>
166 #define HAVE_SSE 0
167 #define HAVE_SIMD 1
168 #define VSTORE vst1q_f32
169 #define VLD vld1q_f32
170 #define VSET vmovq_n_f32
171 #define VADD vaddq_f32
172 #define VSUB vsubq_f32
173 #define VMUL vmulq_f32
174 #define VMAC(a, x, y) vmlaq_f32(a, x, y)
175 #define VMSB(a, x, y) vmlsq_f32(a, x, y)
176 #define VMUL_S(x, s) vmulq_f32(x, vmovq_n_f32(s))
177 #define VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x)))
178 typedef float32x4_t f4;
have_simd()179 static int have_simd()
180 { /* TODO: detect neon for !MINIMP3_ONLY_SIMD */
181 return 1;
182 }
183 #else /* SIMD checks... */
184 #define HAVE_SSE 0
185 #define HAVE_SIMD 0
186 #ifdef MINIMP3_ONLY_SIMD
187 #error MINIMP3_ONLY_SIMD used, but SSE/NEON not enabled
188 #endif /* MINIMP3_ONLY_SIMD */
189 #endif /* SIMD checks... */
190 #else /* !defined(MINIMP3_NO_SIMD) */
191 #define HAVE_SIMD 0
192 #endif /* !defined(MINIMP3_NO_SIMD) */
193
194 #if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__)
195 #define HAVE_ARMV6 1
minimp3_clip_int16_arm(int32_t a)196 static __inline__ __attribute__((always_inline)) int32_t minimp3_clip_int16_arm(int32_t a)
197 {
198 int32_t x = 0;
199 __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
200 return x;
201 }
202 #endif
203
204 typedef struct
205 {
206 const uint8_t *buf;
207 int pos, limit;
208 } bs_t;
209
210 typedef struct
211 {
212 float scf[3*64];
213 uint8_t total_bands, stereo_bands, bitalloc[64], scfcod[64];
214 } L12_scale_info;
215
216 typedef struct
217 {
218 uint8_t tab_offset, code_tab_width, band_count;
219 } L12_subband_alloc_t;
220
221 typedef struct
222 {
223 const uint8_t *sfbtab;
224 uint16_t part_23_length, big_values, scalefac_compress;
225 uint8_t global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb;
226 uint8_t table_select[3], region_count[3], subblock_gain[3];
227 uint8_t preflag, scalefac_scale, count1_table, scfsi;
228 } L3_gr_info_t;
229
230 typedef struct
231 {
232 bs_t bs;
233 uint8_t maindata[MAX_BITRESERVOIR_BYTES + MAX_L3_FRAME_PAYLOAD_BYTES];
234 L3_gr_info_t gr_info[4];
235 float grbuf[2][576], scf[40], syn[18 + 15][2*32];
236 uint8_t ist_pos[2][39];
237 } mp3dec_scratch_t;
238
bs_init(bs_t * bs,const uint8_t * data,int bytes)239 static void bs_init(bs_t *bs, const uint8_t *data, int bytes)
240 {
241 bs->buf = data;
242 bs->pos = 0;
243 bs->limit = bytes*8;
244 }
245
get_bits(bs_t * bs,int n)246 static uint32_t get_bits(bs_t *bs, int n)
247 {
248 uint32_t next, cache = 0, s = bs->pos & 7;
249 int shl = n + s;
250 const uint8_t *p = bs->buf + (bs->pos >> 3);
251 if ((bs->pos += n) > bs->limit)
252 return 0;
253 next = *p++ & (255 >> s);
254 while ((shl -= 8) > 0)
255 {
256 cache |= next << shl;
257 next = *p++;
258 }
259 return cache | (next >> -shl);
260 }
261
hdr_valid(const uint8_t * h)262 static int hdr_valid(const uint8_t *h)
263 {
264 return h[0] == 0xff &&
265 ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) &&
266 (HDR_GET_LAYER(h) != 0) &&
267 (HDR_GET_BITRATE(h) != 15) &&
268 (HDR_GET_SAMPLE_RATE(h) != 3);
269 }
270
hdr_compare(const uint8_t * h1,const uint8_t * h2)271 static int hdr_compare(const uint8_t *h1, const uint8_t *h2)
272 {
273 return hdr_valid(h2) &&
274 ((h1[1] ^ h2[1]) & 0xFE) == 0 &&
275 ((h1[2] ^ h2[2]) & 0x0C) == 0 &&
276 !(HDR_IS_FREE_FORMAT(h1) ^ HDR_IS_FREE_FORMAT(h2));
277 }
278
hdr_bitrate_kbps(const uint8_t * h)279 static unsigned hdr_bitrate_kbps(const uint8_t *h)
280 {
281 static const uint8_t halfrate[2][3][15] = {
282 { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } },
283 { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } },
284 };
285 return 2*halfrate[!!HDR_TEST_MPEG1(h)][HDR_GET_LAYER(h) - 1][HDR_GET_BITRATE(h)];
286 }
287
hdr_sample_rate_hz(const uint8_t * h)288 static unsigned hdr_sample_rate_hz(const uint8_t *h)
289 {
290 static const unsigned g_hz[3] = { 44100, 48000, 32000 };
291 return g_hz[HDR_GET_SAMPLE_RATE(h)] >> (int)!HDR_TEST_MPEG1(h) >> (int)!HDR_TEST_NOT_MPEG25(h);
292 }
293
hdr_frame_samples(const uint8_t * h)294 static unsigned hdr_frame_samples(const uint8_t *h)
295 {
296 return HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)HDR_IS_FRAME_576(h));
297 }
298
hdr_frame_bytes(const uint8_t * h,int free_format_size)299 static int hdr_frame_bytes(const uint8_t *h, int free_format_size)
300 {
301 int frame_bytes = hdr_frame_samples(h)*hdr_bitrate_kbps(h)*125/hdr_sample_rate_hz(h);
302 if (HDR_IS_LAYER_1(h))
303 {
304 frame_bytes &= ~3; /* slot align */
305 }
306 return frame_bytes ? frame_bytes : free_format_size;
307 }
308
hdr_padding(const uint8_t * h)309 static int hdr_padding(const uint8_t *h)
310 {
311 return HDR_TEST_PADDING(h) ? (HDR_IS_LAYER_1(h) ? 4 : 1) : 0;
312 }
313
314 #ifndef MINIMP3_ONLY_MP3
L12_subband_alloc_table(const uint8_t * hdr,L12_scale_info * sci)315 static const L12_subband_alloc_t *L12_subband_alloc_table(const uint8_t *hdr, L12_scale_info *sci)
316 {
317 const L12_subband_alloc_t *alloc;
318 int mode = HDR_GET_STEREO_MODE(hdr);
319 int nbands, stereo_bands = (mode == MODE_MONO) ? 0 : (mode == MODE_JOINT_STEREO) ? (HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32;
320
321 if (HDR_IS_LAYER_1(hdr))
322 {
323 static const L12_subband_alloc_t g_alloc_L1[] = { { 76, 4, 32 } };
324 alloc = g_alloc_L1;
325 nbands = 32;
326 } else if (!HDR_TEST_MPEG1(hdr))
327 {
328 static const L12_subband_alloc_t g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } };
329 alloc = g_alloc_L2M2;
330 nbands = 30;
331 } else
332 {
333 static const L12_subband_alloc_t g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } };
334 int sample_rate_idx = HDR_GET_SAMPLE_RATE(hdr);
335 unsigned kbps = hdr_bitrate_kbps(hdr) >> (int)(mode != MODE_MONO);
336 if (!kbps) /* free-format */
337 {
338 kbps = 192;
339 }
340
341 alloc = g_alloc_L2M1;
342 nbands = 27;
343 if (kbps < 56)
344 {
345 static const L12_subband_alloc_t g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } };
346 alloc = g_alloc_L2M1_lowrate;
347 nbands = sample_rate_idx == 2 ? 12 : 8;
348 } else if (kbps >= 96 && sample_rate_idx != 1)
349 {
350 nbands = 30;
351 }
352 }
353
354 sci->total_bands = (uint8_t)nbands;
355 sci->stereo_bands = (uint8_t)MINIMP3_MIN(stereo_bands, nbands);
356
357 return alloc;
358 }
359
L12_read_scalefactors(bs_t * bs,uint8_t * pba,uint8_t * scfcod,int bands,float * scf)360 static void L12_read_scalefactors(bs_t *bs, uint8_t *pba, uint8_t *scfcod, int bands, float *scf)
361 {
362 static const float g_deq_L12[18*3] = {
363 #define DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x
364 DQ(3),DQ(7),DQ(15),DQ(31),DQ(63),DQ(127),DQ(255),DQ(511),DQ(1023),DQ(2047),DQ(4095),DQ(8191),DQ(16383),DQ(32767),DQ(65535),DQ(3),DQ(5),DQ(9)
365 };
366 int i, m;
367 for (i = 0; i < bands; i++)
368 {
369 float s = 0;
370 int ba = *pba++;
371 int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0;
372 for (m = 4; m; m >>= 1)
373 {
374 if (mask & m)
375 {
376 int b = get_bits(bs, 6);
377 s = g_deq_L12[ba*3 - 6 + b % 3]*(1 << 21 >> b/3);
378 }
379 *scf++ = s;
380 }
381 }
382 }
383
L12_read_scale_info(const uint8_t * hdr,bs_t * bs,L12_scale_info * sci)384 static void L12_read_scale_info(const uint8_t *hdr, bs_t *bs, L12_scale_info *sci)
385 {
386 static const uint8_t g_bitalloc_code_tab[] = {
387 0,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16,
388 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16,
389 0,17,18, 3,19,4,5,16,
390 0,17,18,16,
391 0,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15,
392 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14,
393 0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16
394 };
395 const L12_subband_alloc_t *subband_alloc = L12_subband_alloc_table(hdr, sci);
396
397 int i, k = 0, ba_bits = 0;
398 const uint8_t *ba_code_tab = g_bitalloc_code_tab;
399
400 for (i = 0; i < sci->total_bands; i++)
401 {
402 uint8_t ba;
403 if (i == k)
404 {
405 k += subband_alloc->band_count;
406 ba_bits = subband_alloc->code_tab_width;
407 ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset;
408 subband_alloc++;
409 }
410 ba = ba_code_tab[get_bits(bs, ba_bits)];
411 sci->bitalloc[2*i] = ba;
412 if (i < sci->stereo_bands)
413 {
414 ba = ba_code_tab[get_bits(bs, ba_bits)];
415 }
416 sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0;
417 }
418
419 for (i = 0; i < 2*sci->total_bands; i++)
420 {
421 sci->scfcod[i] = sci->bitalloc[i] ? HDR_IS_LAYER_1(hdr) ? 2 : get_bits(bs, 2) : 6;
422 }
423
424 L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf);
425
426 for (i = sci->stereo_bands; i < sci->total_bands; i++)
427 {
428 sci->bitalloc[2*i + 1] = 0;
429 }
430 }
431
L12_dequantize_granule(float * grbuf,bs_t * bs,L12_scale_info * sci,int group_size)432 static int L12_dequantize_granule(float *grbuf, bs_t *bs, L12_scale_info *sci, int group_size)
433 {
434 int i, j, k, choff = 576;
435 for (j = 0; j < 4; j++)
436 {
437 float *dst = grbuf + group_size*j;
438 for (i = 0; i < 2*sci->total_bands; i++)
439 {
440 int ba = sci->bitalloc[i];
441 if (ba != 0)
442 {
443 if (ba < 17)
444 {
445 int half = (1 << (ba - 1)) - 1;
446 for (k = 0; k < group_size; k++)
447 {
448 dst[k] = (float)((int)get_bits(bs, ba) - half);
449 }
450 } else
451 {
452 unsigned mod = (2 << (ba - 17)) + 1; /* 3, 5, 9 */
453 unsigned code = get_bits(bs, mod + 2 - (mod >> 3)); /* 5, 7, 10 */
454 for (k = 0; k < group_size; k++, code /= mod)
455 {
456 dst[k] = (float)((int)(code % mod - mod/2));
457 }
458 }
459 }
460 dst += choff;
461 choff = 18 - choff;
462 }
463 }
464 return group_size*4;
465 }
466
L12_apply_scf_384(L12_scale_info * sci,const float * scf,float * dst)467 static void L12_apply_scf_384(L12_scale_info *sci, const float *scf, float *dst)
468 {
469 int i, k;
470 memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
471 for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)
472 {
473 for (k = 0; k < 12; k++)
474 {
475 dst[k + 0] *= scf[0];
476 dst[k + 576] *= scf[3];
477 }
478 }
479 }
480 #endif /* MINIMP3_ONLY_MP3 */
481
L3_read_side_info(bs_t * bs,L3_gr_info_t * gr,const uint8_t * hdr)482 static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr)
483 {
484 static const uint8_t g_scf_long[8][23] = {
485 { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
486 { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 },
487 { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
488 { 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 },
489 { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
490 { 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 },
491 { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 },
492 { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 }
493 };
494 static const uint8_t g_scf_short[8][40] = {
495 { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
496 { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
497 { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
498 { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
499 { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
500 { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
501 { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
502 { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
503 };
504 static const uint8_t g_scf_mixed[8][40] = {
505 { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
506 { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
507 { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
508 { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
509 { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
510 { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
511 { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
512 { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
513 };
514
515 unsigned tables, scfsi = 0;
516 int main_data_begin, part_23_sum = 0;
517 int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0);
518 int gr_count = HDR_IS_MONO(hdr) ? 1 : 2;
519
520 if (HDR_TEST_MPEG1(hdr))
521 {
522 gr_count *= 2;
523 main_data_begin = get_bits(bs, 9);
524 scfsi = get_bits(bs, 7 + gr_count);
525 } else
526 {
527 main_data_begin = get_bits(bs, 8 + gr_count) >> gr_count;
528 }
529
530 do
531 {
532 if (HDR_IS_MONO(hdr))
533 {
534 scfsi <<= 4;
535 }
536 gr->part_23_length = (uint16_t)get_bits(bs, 12);
537 part_23_sum += gr->part_23_length;
538 gr->big_values = (uint16_t)get_bits(bs, 9);
539 if (gr->big_values > 288)
540 {
541 return -1;
542 }
543 gr->global_gain = (uint8_t)get_bits(bs, 8);
544 gr->scalefac_compress = (uint16_t)get_bits(bs, HDR_TEST_MPEG1(hdr) ? 4 : 9);
545 gr->sfbtab = g_scf_long[sr_idx];
546 gr->n_long_sfb = 22;
547 gr->n_short_sfb = 0;
548 if (get_bits(bs, 1))
549 {
550 gr->block_type = (uint8_t)get_bits(bs, 2);
551 if (!gr->block_type)
552 {
553 return -1;
554 }
555 gr->mixed_block_flag = (uint8_t)get_bits(bs, 1);
556 gr->region_count[0] = 7;
557 gr->region_count[1] = 255;
558 if (gr->block_type == SHORT_BLOCK_TYPE)
559 {
560 scfsi &= 0x0F0F;
561 if (!gr->mixed_block_flag)
562 {
563 gr->region_count[0] = 8;
564 gr->sfbtab = g_scf_short[sr_idx];
565 gr->n_long_sfb = 0;
566 gr->n_short_sfb = 39;
567 } else
568 {
569 gr->sfbtab = g_scf_mixed[sr_idx];
570 gr->n_long_sfb = HDR_TEST_MPEG1(hdr) ? 8 : 6;
571 gr->n_short_sfb = 30;
572 }
573 }
574 tables = get_bits(bs, 10);
575 tables <<= 5;
576 gr->subblock_gain[0] = (uint8_t)get_bits(bs, 3);
577 gr->subblock_gain[1] = (uint8_t)get_bits(bs, 3);
578 gr->subblock_gain[2] = (uint8_t)get_bits(bs, 3);
579 } else
580 {
581 gr->block_type = 0;
582 gr->mixed_block_flag = 0;
583 tables = get_bits(bs, 15);
584 gr->region_count[0] = (uint8_t)get_bits(bs, 4);
585 gr->region_count[1] = (uint8_t)get_bits(bs, 3);
586 gr->region_count[2] = 255;
587 }
588 gr->table_select[0] = (uint8_t)(tables >> 10);
589 gr->table_select[1] = (uint8_t)((tables >> 5) & 31);
590 gr->table_select[2] = (uint8_t)((tables) & 31);
591 gr->preflag = HDR_TEST_MPEG1(hdr) ? get_bits(bs, 1) : (gr->scalefac_compress >= 500);
592 gr->scalefac_scale = (uint8_t)get_bits(bs, 1);
593 gr->count1_table = (uint8_t)get_bits(bs, 1);
594 gr->scfsi = (uint8_t)((scfsi >> 12) & 15);
595 scfsi <<= 4;
596 gr++;
597 } while(--gr_count);
598
599 if (part_23_sum + bs->pos > bs->limit + main_data_begin*8)
600 {
601 return -1;
602 }
603
604 return main_data_begin;
605 }
606
L3_read_scalefactors(uint8_t * scf,uint8_t * ist_pos,const uint8_t * scf_size,const uint8_t * scf_count,bs_t * bitbuf,int scfsi)607 static void L3_read_scalefactors(uint8_t *scf, uint8_t *ist_pos, const uint8_t *scf_size, const uint8_t *scf_count, bs_t *bitbuf, int scfsi)
608 {
609 int i, k;
610 for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2)
611 {
612 int cnt = scf_count[i];
613 if (scfsi & 8)
614 {
615 memcpy(scf, ist_pos, cnt);
616 } else
617 {
618 int bits = scf_size[i];
619 if (!bits)
620 {
621 memset(scf, 0, cnt);
622 memset(ist_pos, 0, cnt);
623 } else
624 {
625 int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
626 for (k = 0; k < cnt; k++)
627 {
628 int s = get_bits(bitbuf, bits);
629 ist_pos[k] = (s == max_scf ? -1 : s);
630 scf[k] = s;
631 }
632 }
633 }
634 ist_pos += cnt;
635 scf += cnt;
636 }
637 scf[0] = scf[1] = scf[2] = 0;
638 }
639
L3_ldexp_q2(float y,int exp_q2)640 static float L3_ldexp_q2(float y, int exp_q2)
641 {
642 static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f };
643 int e;
644 do
645 {
646 e = MINIMP3_MIN(30*4, exp_q2);
647 y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2));
648 } while ((exp_q2 -= e) > 0);
649 return y;
650 }
651
L3_decode_scalefactors(const uint8_t * hdr,uint8_t * ist_pos,bs_t * bs,const L3_gr_info_t * gr,float * scf,int ch)652 static void L3_decode_scalefactors(const uint8_t *hdr, uint8_t *ist_pos, bs_t *bs, const L3_gr_info_t *gr, float *scf, int ch)
653 {
654 static const uint8_t g_scf_partitions[3][28] = {
655 { 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 },
656 { 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 },
657 { 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 }
658 };
659 const uint8_t *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb];
660 uint8_t scf_size[4], iscf[40];
661 int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi;
662 float gain;
663
664 if (HDR_TEST_MPEG1(hdr))
665 {
666 static const uint8_t g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 };
667 int part = g_scfc_decode[gr->scalefac_compress];
668 scf_size[1] = scf_size[0] = (uint8_t)(part >> 2);
669 scf_size[3] = scf_size[2] = (uint8_t)(part & 3);
670 } else
671 {
672 static const uint8_t g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 };
673 int k, modprod, sfc, ist = HDR_TEST_I_STEREO(hdr) && ch;
674 sfc = gr->scalefac_compress >> ist;
675 for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4)
676 {
677 for (modprod = 1, i = 3; i >= 0; i--)
678 {
679 scf_size[i] = (uint8_t)(sfc / modprod % g_mod[k + i]);
680 modprod *= g_mod[k + i];
681 }
682 }
683 scf_partition += k;
684 scfsi = -16;
685 }
686 L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi);
687
688 if (gr->n_short_sfb)
689 {
690 int sh = 3 - scf_shift;
691 for (i = 0; i < gr->n_short_sfb; i += 3)
692 {
693 iscf[gr->n_long_sfb + i + 0] += gr->subblock_gain[0] << sh;
694 iscf[gr->n_long_sfb + i + 1] += gr->subblock_gain[1] << sh;
695 iscf[gr->n_long_sfb + i + 2] += gr->subblock_gain[2] << sh;
696 }
697 } else if (gr->preflag)
698 {
699 static const uint8_t g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 };
700 for (i = 0; i < 10; i++)
701 {
702 iscf[11 + i] += g_preamp[i];
703 }
704 }
705
706 gain_exp = gr->global_gain + BITS_DEQUANTIZER_OUT*4 - 210 - (HDR_IS_MS_STEREO(hdr) ? 2 : 0);
707 gain = L3_ldexp_q2(1 << (MAX_SCFI/4), MAX_SCFI - gain_exp);
708 for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++)
709 {
710 scf[i] = L3_ldexp_q2(gain, iscf[i] << scf_shift);
711 }
712 }
713
714 static const float g_pow43[129 + 16] = {
715 0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f,
716 0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f
717 };
718
L3_pow_43(int x)719 static float L3_pow_43(int x)
720 {
721 float frac;
722 int sign, mult = 256;
723
724 if (x < 129)
725 {
726 return g_pow43[16 + x];
727 }
728
729 if (x < 1024)
730 {
731 mult = 16;
732 x <<= 3;
733 }
734
735 sign = 2*x & 64;
736 frac = (float)((x & 63) - sign) / ((x & ~63) + sign);
737 return g_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult;
738 }
739
L3_huffman(float * dst,bs_t * bs,const L3_gr_info_t * gr_info,const float * scf,int layer3gr_limit)740 static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const float *scf, int layer3gr_limit)
741 {
742 static const int16_t tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
743 785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
744 -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288,
745 -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288,
746 -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258,
747 -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259,
748 -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258,
749 -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258,
750 -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259,
751 -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258,
752 -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290,
753 -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259,
754 -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258,
755 -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259,
756 -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258,
757 -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 };
758 static const uint8_t tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205 };
759 static const uint8_t tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 };
760 static const int16_t tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 };
761 static const uint8_t g_linbits[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 };
762
763 #define PEEK_BITS(n) (bs_cache >> (32 - n))
764 #define FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); }
765 #define CHECK_BITS while (bs_sh >= 0) { bs_cache |= (uint32_t)*bs_next_ptr++ << bs_sh; bs_sh -= 8; }
766 #define BSPOS ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh)
767
768 float one = 0.0f;
769 int ireg = 0, big_val_cnt = gr_info->big_values;
770 const uint8_t *sfb = gr_info->sfbtab;
771 const uint8_t *bs_next_ptr = bs->buf + bs->pos/8;
772 uint32_t bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7);
773 int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8;
774 bs_next_ptr += 4;
775
776 while (big_val_cnt > 0)
777 {
778 int tab_num = gr_info->table_select[ireg];
779 int sfb_cnt = gr_info->region_count[ireg++];
780 const int16_t *codebook = tabs + tabindex[tab_num];
781 int linbits = g_linbits[tab_num];
782 if (linbits)
783 {
784 do
785 {
786 np = *sfb++ / 2;
787 pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
788 one = *scf++;
789 do
790 {
791 int j, w = 5;
792 int leaf = codebook[PEEK_BITS(w)];
793 while (leaf < 0)
794 {
795 FLUSH_BITS(w);
796 w = leaf & 7;
797 leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
798 }
799 FLUSH_BITS(leaf >> 8);
800
801 for (j = 0; j < 2; j++, dst++, leaf >>= 4)
802 {
803 int lsb = leaf & 0x0F;
804 if (lsb == 15)
805 {
806 lsb += PEEK_BITS(linbits);
807 FLUSH_BITS(linbits);
808 CHECK_BITS;
809 *dst = one*L3_pow_43(lsb)*((int32_t)bs_cache < 0 ? -1: 1);
810 } else
811 {
812 *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
813 }
814 FLUSH_BITS(lsb ? 1 : 0);
815 }
816 CHECK_BITS;
817 } while (--pairs_to_decode);
818 } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
819 } else
820 {
821 do
822 {
823 np = *sfb++ / 2;
824 pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
825 one = *scf++;
826 do
827 {
828 int j, w = 5;
829 int leaf = codebook[PEEK_BITS(w)];
830 while (leaf < 0)
831 {
832 FLUSH_BITS(w);
833 w = leaf & 7;
834 leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
835 }
836 FLUSH_BITS(leaf >> 8);
837
838 for (j = 0; j < 2; j++, dst++, leaf >>= 4)
839 {
840 int lsb = leaf & 0x0F;
841 *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
842 FLUSH_BITS(lsb ? 1 : 0);
843 }
844 CHECK_BITS;
845 } while (--pairs_to_decode);
846 } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
847 }
848 }
849
850 for (np = 1 - big_val_cnt;; dst += 4)
851 {
852 const uint8_t *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32;
853 int leaf = codebook_count1[PEEK_BITS(4)];
854 if (!(leaf & 8))
855 {
856 leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))];
857 }
858 FLUSH_BITS(leaf & 7);
859 if (BSPOS > layer3gr_limit)
860 {
861 break;
862 }
863 #define RELOAD_SCALEFACTOR if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; }
864 #define DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((int32_t)bs_cache < 0) ? -one : one; FLUSH_BITS(1) }
865 RELOAD_SCALEFACTOR;
866 DEQ_COUNT1(0);
867 DEQ_COUNT1(1);
868 RELOAD_SCALEFACTOR;
869 DEQ_COUNT1(2);
870 DEQ_COUNT1(3);
871 CHECK_BITS;
872 }
873
874 bs->pos = layer3gr_limit;
875 }
876
L3_midside_stereo(float * left,int n)877 static void L3_midside_stereo(float *left, int n)
878 {
879 int i = 0;
880 float *right = left + 576;
881 #if HAVE_SIMD
882 if (have_simd()) for (; i < n - 3; i += 4)
883 {
884 f4 vl = VLD(left + i);
885 f4 vr = VLD(right + i);
886 VSTORE(left + i, VADD(vl, vr));
887 VSTORE(right + i, VSUB(vl, vr));
888 }
889 #endif /* HAVE_SIMD */
890 for (; i < n; i++)
891 {
892 float a = left[i];
893 float b = right[i];
894 left[i] = a + b;
895 right[i] = a - b;
896 }
897 }
898
L3_intensity_stereo_band(float * left,int n,float kl,float kr)899 static void L3_intensity_stereo_band(float *left, int n, float kl, float kr)
900 {
901 int i;
902 for (i = 0; i < n; i++)
903 {
904 left[i + 576] = left[i]*kr;
905 left[i] = left[i]*kl;
906 }
907 }
908
L3_stereo_top_band(const float * right,const uint8_t * sfb,int nbands,int max_band[3])909 static void L3_stereo_top_band(const float *right, const uint8_t *sfb, int nbands, int max_band[3])
910 {
911 int i, k;
912
913 max_band[0] = max_band[1] = max_band[2] = -1;
914
915 for (i = 0; i < nbands; i++)
916 {
917 for (k = 0; k < sfb[i]; k += 2)
918 {
919 if (right[k] != 0 || right[k + 1] != 0)
920 {
921 max_band[i % 3] = i;
922 break;
923 }
924 }
925 right += sfb[i];
926 }
927 }
928
L3_stereo_process(float * left,const uint8_t * ist_pos,const uint8_t * sfb,const uint8_t * hdr,int max_band[3],int mpeg2_sh)929 static void L3_stereo_process(float *left, const uint8_t *ist_pos, const uint8_t *sfb, const uint8_t *hdr, int max_band[3], int mpeg2_sh)
930 {
931 static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 };
932 unsigned i, max_pos = HDR_TEST_MPEG1(hdr) ? 7 : 64;
933
934 for (i = 0; sfb[i]; i++)
935 {
936 unsigned ipos = ist_pos[i];
937 if ((int)i > max_band[i % 3] && ipos < max_pos)
938 {
939 float kl, kr, s = HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1;
940 if (HDR_TEST_MPEG1(hdr))
941 {
942 kl = g_pan[2*ipos];
943 kr = g_pan[2*ipos + 1];
944 } else
945 {
946 kl = 1;
947 kr = L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh);
948 if (ipos & 1)
949 {
950 kl = kr;
951 kr = 1;
952 }
953 }
954 L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s);
955 } else if (HDR_TEST_MS_STEREO(hdr))
956 {
957 L3_midside_stereo(left, sfb[i]);
958 }
959 left += sfb[i];
960 }
961 }
962
L3_intensity_stereo(float * left,uint8_t * ist_pos,const L3_gr_info_t * gr,const uint8_t * hdr)963 static void L3_intensity_stereo(float *left, uint8_t *ist_pos, const L3_gr_info_t *gr, const uint8_t *hdr)
964 {
965 int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb;
966 int i, max_blocks = gr->n_short_sfb ? 3 : 1;
967
968 L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band);
969 if (gr->n_long_sfb)
970 {
971 max_band[0] = max_band[1] = max_band[2] = MINIMP3_MAX(MINIMP3_MAX(max_band[0], max_band[1]), max_band[2]);
972 }
973 for (i = 0; i < max_blocks; i++)
974 {
975 int default_pos = HDR_TEST_MPEG1(hdr) ? 3 : 0;
976 int itop = n_sfb - max_blocks + i;
977 int prev = itop - max_blocks;
978 ist_pos[itop] = max_band[i] >= prev ? default_pos : ist_pos[prev];
979 }
980 L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1);
981 }
982
L3_reorder(float * grbuf,float * scratch,const uint8_t * sfb)983 static void L3_reorder(float *grbuf, float *scratch, const uint8_t *sfb)
984 {
985 int i, len;
986 float *src = grbuf, *dst = scratch;
987
988 for (;0 != (len = *sfb); sfb += 3, src += 2*len)
989 {
990 for (i = 0; i < len; i++, src++)
991 {
992 *dst++ = src[0*len];
993 *dst++ = src[1*len];
994 *dst++ = src[2*len];
995 }
996 }
997 memcpy(grbuf, scratch, (dst - scratch)*sizeof(float));
998 }
999
L3_antialias(float * grbuf,int nbands)1000 static void L3_antialias(float *grbuf, int nbands)
1001 {
1002 static const float g_aa[2][8] = {
1003 {0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f},
1004 {0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f}
1005 };
1006
1007 for (; nbands > 0; nbands--, grbuf += 18)
1008 {
1009 int i = 0;
1010 #if HAVE_SIMD
1011 if (have_simd()) for (; i < 8; i += 4)
1012 {
1013 f4 vu = VLD(grbuf + 18 + i);
1014 f4 vd = VLD(grbuf + 14 - i);
1015 f4 vc0 = VLD(g_aa[0] + i);
1016 f4 vc1 = VLD(g_aa[1] + i);
1017 vd = VREV(vd);
1018 VSTORE(grbuf + 18 + i, VSUB(VMUL(vu, vc0), VMUL(vd, vc1)));
1019 vd = VADD(VMUL(vu, vc1), VMUL(vd, vc0));
1020 VSTORE(grbuf + 14 - i, VREV(vd));
1021 }
1022 #endif /* HAVE_SIMD */
1023 #ifndef MINIMP3_ONLY_SIMD
1024 for(; i < 8; i++)
1025 {
1026 float u = grbuf[18 + i];
1027 float d = grbuf[17 - i];
1028 grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i];
1029 grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i];
1030 }
1031 #endif /* MINIMP3_ONLY_SIMD */
1032 }
1033 }
1034
L3_dct3_9(float * y)1035 static void L3_dct3_9(float *y)
1036 {
1037 float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4;
1038
1039 s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8];
1040 t0 = s0 + s6*0.5f;
1041 s0 -= s6;
1042 t4 = (s4 + s2)*0.93969262f;
1043 t2 = (s8 + s2)*0.76604444f;
1044 s6 = (s4 - s8)*0.17364818f;
1045 s4 += s8 - s2;
1046
1047 s2 = s0 - s4*0.5f;
1048 y[4] = s4 + s0;
1049 s8 = t0 - t2 + s6;
1050 s0 = t0 - t4 + t2;
1051 s4 = t0 + t4 - s6;
1052
1053 s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7];
1054
1055 s3 *= 0.86602540f;
1056 t0 = (s5 + s1)*0.98480775f;
1057 t4 = (s5 - s7)*0.34202014f;
1058 t2 = (s1 + s7)*0.64278761f;
1059 s1 = (s1 - s5 - s7)*0.86602540f;
1060
1061 s5 = t0 - s3 - t2;
1062 s7 = t4 - s3 - t0;
1063 s3 = t4 + s3 - t2;
1064
1065 y[0] = s4 - s7;
1066 y[1] = s2 + s1;
1067 y[2] = s0 - s3;
1068 y[3] = s8 + s5;
1069 y[5] = s8 - s5;
1070 y[6] = s0 + s3;
1071 y[7] = s2 - s1;
1072 y[8] = s4 + s7;
1073 }
1074
L3_imdct36(float * grbuf,float * overlap,const float * window,int nbands)1075 static void L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands)
1076 {
1077 int i, j;
1078 static const float g_twid9[18] = {
1079 0.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f
1080 };
1081
1082 for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9)
1083 {
1084 float co[9], si[9];
1085 co[0] = -grbuf[0];
1086 si[0] = grbuf[17];
1087 for (i = 0; i < 4; i++)
1088 {
1089 si[8 - 2*i] = grbuf[4*i + 1] - grbuf[4*i + 2];
1090 co[1 + 2*i] = grbuf[4*i + 1] + grbuf[4*i + 2];
1091 si[7 - 2*i] = grbuf[4*i + 4] - grbuf[4*i + 3];
1092 co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]);
1093 }
1094 L3_dct3_9(co);
1095 L3_dct3_9(si);
1096
1097 si[1] = -si[1];
1098 si[3] = -si[3];
1099 si[5] = -si[5];
1100 si[7] = -si[7];
1101
1102 i = 0;
1103
1104 #if HAVE_SIMD
1105 if (have_simd()) for (; i < 8; i += 4)
1106 {
1107 f4 vovl = VLD(overlap + i);
1108 f4 vc = VLD(co + i);
1109 f4 vs = VLD(si + i);
1110 f4 vr0 = VLD(g_twid9 + i);
1111 f4 vr1 = VLD(g_twid9 + 9 + i);
1112 f4 vw0 = VLD(window + i);
1113 f4 vw1 = VLD(window + 9 + i);
1114 f4 vsum = VADD(VMUL(vc, vr1), VMUL(vs, vr0));
1115 VSTORE(overlap + i, VSUB(VMUL(vc, vr0), VMUL(vs, vr1)));
1116 VSTORE(grbuf + i, VSUB(VMUL(vovl, vw0), VMUL(vsum, vw1)));
1117 vsum = VADD(VMUL(vovl, vw1), VMUL(vsum, vw0));
1118 VSTORE(grbuf + 14 - i, VREV(vsum));
1119 }
1120 #endif /* HAVE_SIMD */
1121 for (; i < 9; i++)
1122 {
1123 float ovl = overlap[i];
1124 float sum = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i];
1125 overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i];
1126 grbuf[i] = ovl*window[0 + i] - sum*window[9 + i];
1127 grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i];
1128 }
1129 }
1130 }
1131
L3_idct3(float x0,float x1,float x2,float * dst)1132 static void L3_idct3(float x0, float x1, float x2, float *dst)
1133 {
1134 float m1 = x1*0.86602540f;
1135 float a1 = x0 - x2*0.5f;
1136 dst[1] = x0 + x2;
1137 dst[0] = a1 + m1;
1138 dst[2] = a1 - m1;
1139 }
1140
L3_imdct12(float * x,float * dst,float * overlap)1141 static void L3_imdct12(float *x, float *dst, float *overlap)
1142 {
1143 static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f };
1144 float co[3], si[3];
1145 int i;
1146
1147 L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co);
1148 L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si);
1149 si[1] = -si[1];
1150
1151 for (i = 0; i < 3; i++)
1152 {
1153 float ovl = overlap[i];
1154 float sum = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i];
1155 overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i];
1156 dst[i] = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i];
1157 dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i];
1158 }
1159 }
1160
L3_imdct_short(float * grbuf,float * overlap,int nbands)1161 static void L3_imdct_short(float *grbuf, float *overlap, int nbands)
1162 {
1163 for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)
1164 {
1165 float tmp[18];
1166 memcpy(tmp, grbuf, sizeof(tmp));
1167 memcpy(grbuf, overlap, 6*sizeof(float));
1168 L3_imdct12(tmp, grbuf + 6, overlap + 6);
1169 L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);
1170 L3_imdct12(tmp + 2, overlap, overlap + 6);
1171 }
1172 }
1173
L3_change_sign(float * grbuf)1174 static void L3_change_sign(float *grbuf)
1175 {
1176 int b, i;
1177 for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36)
1178 for (i = 1; i < 18; i += 2)
1179 grbuf[i] = -grbuf[i];
1180 }
1181
L3_imdct_gr(float * grbuf,float * overlap,unsigned block_type,unsigned n_long_bands)1182 static void L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands)
1183 {
1184 static const float g_mdct_window[2][18] = {
1185 { 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f },
1186 { 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f }
1187 };
1188 if (n_long_bands)
1189 {
1190 L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands);
1191 grbuf += 18*n_long_bands;
1192 overlap += 9*n_long_bands;
1193 }
1194 if (block_type == SHORT_BLOCK_TYPE)
1195 L3_imdct_short(grbuf, overlap, 32 - n_long_bands);
1196 else
1197 L3_imdct36(grbuf, overlap, g_mdct_window[block_type == STOP_BLOCK_TYPE], 32 - n_long_bands);
1198 }
1199
L3_save_reservoir(mp3dec_t * h,mp3dec_scratch_t * s)1200 static void L3_save_reservoir(mp3dec_t *h, mp3dec_scratch_t *s)
1201 {
1202 int pos = (s->bs.pos + 7)/8u;
1203 int remains = s->bs.limit/8u - pos;
1204 if (remains > MAX_BITRESERVOIR_BYTES)
1205 {
1206 pos += remains - MAX_BITRESERVOIR_BYTES;
1207 remains = MAX_BITRESERVOIR_BYTES;
1208 }
1209 if (remains > 0)
1210 {
1211 memmove(h->reserv_buf, s->maindata + pos, remains);
1212 }
1213 h->reserv = remains;
1214 }
1215
L3_restore_reservoir(mp3dec_t * h,bs_t * bs,mp3dec_scratch_t * s,int main_data_begin)1216 static int L3_restore_reservoir(mp3dec_t *h, bs_t *bs, mp3dec_scratch_t *s, int main_data_begin)
1217 {
1218 int frame_bytes = (bs->limit - bs->pos)/8;
1219 int bytes_have = MINIMP3_MIN(h->reserv, main_data_begin);
1220 memcpy(s->maindata, h->reserv_buf + MINIMP3_MAX(0, h->reserv - main_data_begin), MINIMP3_MIN(h->reserv, main_data_begin));
1221 memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
1222 bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);
1223 return h->reserv >= main_data_begin;
1224 }
1225
L3_decode(mp3dec_t * h,mp3dec_scratch_t * s,L3_gr_info_t * gr_info,int nch)1226 static void L3_decode(mp3dec_t *h, mp3dec_scratch_t *s, L3_gr_info_t *gr_info, int nch)
1227 {
1228 int ch;
1229
1230 for (ch = 0; ch < nch; ch++)
1231 {
1232 int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length;
1233 L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch);
1234 L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit);
1235 }
1236
1237 if (HDR_TEST_I_STEREO(h->header))
1238 {
1239 L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header);
1240 } else if (HDR_IS_MS_STEREO(h->header))
1241 {
1242 L3_midside_stereo(s->grbuf[0], 576);
1243 }
1244
1245 for (ch = 0; ch < nch; ch++, gr_info++)
1246 {
1247 int aa_bands = 31;
1248 int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(HDR_GET_MY_SAMPLE_RATE(h->header) == 2);
1249
1250 if (gr_info->n_short_sfb)
1251 {
1252 aa_bands = n_long_bands - 1;
1253 L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb);
1254 }
1255
1256 L3_antialias(s->grbuf[ch], aa_bands);
1257 L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands);
1258 L3_change_sign(s->grbuf[ch]);
1259 }
1260 }
1261
mp3d_DCT_II(float * grbuf,int n)1262 static void mp3d_DCT_II(float *grbuf, int n)
1263 {
1264 static const float g_sec[24] = {
1265 10.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f
1266 };
1267 int i, k = 0;
1268 #if HAVE_SIMD
1269 if (have_simd()) for (; k < n; k += 4)
1270 {
1271 f4 t[4][8], *x;
1272 float *y = grbuf + k;
1273
1274 for (x = t[0], i = 0; i < 8; i++, x++)
1275 {
1276 f4 x0 = VLD(&y[i*18]);
1277 f4 x1 = VLD(&y[(15 - i)*18]);
1278 f4 x2 = VLD(&y[(16 + i)*18]);
1279 f4 x3 = VLD(&y[(31 - i)*18]);
1280 f4 t0 = VADD(x0, x3);
1281 f4 t1 = VADD(x1, x2);
1282 f4 t2 = VMUL_S(VSUB(x1, x2), g_sec[3*i + 0]);
1283 f4 t3 = VMUL_S(VSUB(x0, x3), g_sec[3*i + 1]);
1284 x[0] = VADD(t0, t1);
1285 x[8] = VMUL_S(VSUB(t0, t1), g_sec[3*i + 2]);
1286 x[16] = VADD(t3, t2);
1287 x[24] = VMUL_S(VSUB(t3, t2), g_sec[3*i + 2]);
1288 }
1289 for (x = t[0], i = 0; i < 4; i++, x += 8)
1290 {
1291 f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1292 xt = VSUB(x0, x7); x0 = VADD(x0, x7);
1293 x7 = VSUB(x1, x6); x1 = VADD(x1, x6);
1294 x6 = VSUB(x2, x5); x2 = VADD(x2, x5);
1295 x5 = VSUB(x3, x4); x3 = VADD(x3, x4);
1296 x4 = VSUB(x0, x3); x0 = VADD(x0, x3);
1297 x3 = VSUB(x1, x2); x1 = VADD(x1, x2);
1298 x[0] = VADD(x0, x1);
1299 x[4] = VMUL_S(VSUB(x0, x1), 0.70710677f);
1300 x5 = VADD(x5, x6);
1301 x6 = VMUL_S(VADD(x6, x7), 0.70710677f);
1302 x7 = VADD(x7, xt);
1303 x3 = VMUL_S(VADD(x3, x4), 0.70710677f);
1304 x5 = VSUB(x5, VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */
1305 x7 = VADD(x7, VMUL_S(x5, 0.382683432f));
1306 x5 = VSUB(x5, VMUL_S(x7, 0.198912367f));
1307 x0 = VSUB(xt, x6); xt = VADD(xt, x6);
1308 x[1] = VMUL_S(VADD(xt, x7), 0.50979561f);
1309 x[2] = VMUL_S(VADD(x4, x3), 0.54119611f);
1310 x[3] = VMUL_S(VSUB(x0, x5), 0.60134488f);
1311 x[5] = VMUL_S(VADD(x0, x5), 0.89997619f);
1312 x[6] = VMUL_S(VSUB(x4, x3), 1.30656302f);
1313 x[7] = VMUL_S(VSUB(xt, x7), 2.56291556f);
1314 }
1315
1316 if (k > n - 3)
1317 {
1318 #if HAVE_SSE
1319 #define VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v)
1320 #else /* HAVE_SSE */
1321 #define VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18], vget_low_f32(v))
1322 #endif /* HAVE_SSE */
1323 for (i = 0; i < 7; i++, y += 4*18)
1324 {
1325 f4 s = VADD(t[3][i], t[3][i + 1]);
1326 VSAVE2(0, t[0][i]);
1327 VSAVE2(1, VADD(t[2][i], s));
1328 VSAVE2(2, VADD(t[1][i], t[1][i + 1]));
1329 VSAVE2(3, VADD(t[2][1 + i], s));
1330 }
1331 VSAVE2(0, t[0][7]);
1332 VSAVE2(1, VADD(t[2][7], t[3][7]));
1333 VSAVE2(2, t[1][7]);
1334 VSAVE2(3, t[3][7]);
1335 } else
1336 {
1337 #define VSAVE4(i, v) VSTORE(&y[i*18], v)
1338 for (i = 0; i < 7; i++, y += 4*18)
1339 {
1340 f4 s = VADD(t[3][i], t[3][i + 1]);
1341 VSAVE4(0, t[0][i]);
1342 VSAVE4(1, VADD(t[2][i], s));
1343 VSAVE4(2, VADD(t[1][i], t[1][i + 1]));
1344 VSAVE4(3, VADD(t[2][1 + i], s));
1345 }
1346 VSAVE4(0, t[0][7]);
1347 VSAVE4(1, VADD(t[2][7], t[3][7]));
1348 VSAVE4(2, t[1][7]);
1349 VSAVE4(3, t[3][7]);
1350 }
1351 } else
1352 #endif /* HAVE_SIMD */
1353 #ifdef MINIMP3_ONLY_SIMD
1354 {}
1355 #else /* MINIMP3_ONLY_SIMD */
1356 for (; k < n; k++)
1357 {
1358 float t[4][8], *x, *y = grbuf + k;
1359
1360 for (x = t[0], i = 0; i < 8; i++, x++)
1361 {
1362 float x0 = y[i*18];
1363 float x1 = y[(15 - i)*18];
1364 float x2 = y[(16 + i)*18];
1365 float x3 = y[(31 - i)*18];
1366 float t0 = x0 + x3;
1367 float t1 = x1 + x2;
1368 float t2 = (x1 - x2)*g_sec[3*i + 0];
1369 float t3 = (x0 - x3)*g_sec[3*i + 1];
1370 x[0] = t0 + t1;
1371 x[8] = (t0 - t1)*g_sec[3*i + 2];
1372 x[16] = t3 + t2;
1373 x[24] = (t3 - t2)*g_sec[3*i + 2];
1374 }
1375 for (x = t[0], i = 0; i < 4; i++, x += 8)
1376 {
1377 float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1378 xt = x0 - x7; x0 += x7;
1379 x7 = x1 - x6; x1 += x6;
1380 x6 = x2 - x5; x2 += x5;
1381 x5 = x3 - x4; x3 += x4;
1382 x4 = x0 - x3; x0 += x3;
1383 x3 = x1 - x2; x1 += x2;
1384 x[0] = x0 + x1;
1385 x[4] = (x0 - x1)*0.70710677f;
1386 x5 = x5 + x6;
1387 x6 = (x6 + x7)*0.70710677f;
1388 x7 = x7 + xt;
1389 x3 = (x3 + x4)*0.70710677f;
1390 x5 -= x7*0.198912367f; /* rotate by PI/8 */
1391 x7 += x5*0.382683432f;
1392 x5 -= x7*0.198912367f;
1393 x0 = xt - x6; xt += x6;
1394 x[1] = (xt + x7)*0.50979561f;
1395 x[2] = (x4 + x3)*0.54119611f;
1396 x[3] = (x0 - x5)*0.60134488f;
1397 x[5] = (x0 + x5)*0.89997619f;
1398 x[6] = (x4 - x3)*1.30656302f;
1399 x[7] = (xt - x7)*2.56291556f;
1400
1401 }
1402 for (i = 0; i < 7; i++, y += 4*18)
1403 {
1404 y[0*18] = t[0][i];
1405 y[1*18] = t[2][i] + t[3][i] + t[3][i + 1];
1406 y[2*18] = t[1][i] + t[1][i + 1];
1407 y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1];
1408 }
1409 y[0*18] = t[0][7];
1410 y[1*18] = t[2][7] + t[3][7];
1411 y[2*18] = t[1][7];
1412 y[3*18] = t[3][7];
1413 }
1414 #endif /* MINIMP3_ONLY_SIMD */
1415 }
1416
1417 #ifndef MINIMP3_FLOAT_OUTPUT
mp3d_scale_pcm(float sample)1418 static int16_t mp3d_scale_pcm(float sample)
1419 {
1420 #if HAVE_ARMV6
1421 int32_t s32 = (int32_t)(sample + .5f);
1422 s32 -= (s32 < 0);
1423 int16_t s = (int16_t)minimp3_clip_int16_arm(s32);
1424 #else
1425 if (sample >= 32766.5) return (int16_t) 32767;
1426 if (sample <= -32767.5) return (int16_t)-32768;
1427 int16_t s = (int16_t)(sample + .5f);
1428 s -= (s < 0); /* away from zero, to be compliant */
1429 #endif
1430 return s;
1431 }
1432 #else /* MINIMP3_FLOAT_OUTPUT */
mp3d_scale_pcm(float sample)1433 static float mp3d_scale_pcm(float sample)
1434 {
1435 return sample*(1.f/32768.f);
1436 }
1437 #endif /* MINIMP3_FLOAT_OUTPUT */
1438
mp3d_synth_pair(mp3d_sample_t * pcm,int nch,const float * z)1439 static void mp3d_synth_pair(mp3d_sample_t *pcm, int nch, const float *z)
1440 {
1441 float a;
1442 a = (z[14*64] - z[ 0]) * 29;
1443 a += (z[ 1*64] + z[13*64]) * 213;
1444 a += (z[12*64] - z[ 2*64]) * 459;
1445 a += (z[ 3*64] + z[11*64]) * 2037;
1446 a += (z[10*64] - z[ 4*64]) * 5153;
1447 a += (z[ 5*64] + z[ 9*64]) * 6574;
1448 a += (z[ 8*64] - z[ 6*64]) * 37489;
1449 a += z[ 7*64] * 75038;
1450 pcm[0] = mp3d_scale_pcm(a);
1451
1452 z += 2;
1453 a = z[14*64] * 104;
1454 a += z[12*64] * 1567;
1455 a += z[10*64] * 9727;
1456 a += z[ 8*64] * 64019;
1457 a += z[ 6*64] * -9975;
1458 a += z[ 4*64] * -45;
1459 a += z[ 2*64] * 146;
1460 a += z[ 0*64] * -5;
1461 pcm[16*nch] = mp3d_scale_pcm(a);
1462 }
1463
mp3d_synth(float * xl,mp3d_sample_t * dstl,int nch,float * lins)1464 static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins)
1465 {
1466 int i;
1467 float *xr = xl + 576*(nch - 1);
1468 mp3d_sample_t *dstr = dstl + (nch - 1);
1469
1470 static const float g_win[] = {
1471 -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992,
1472 -1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856,
1473 -1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630,
1474 -1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313,
1475 -1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908,
1476 -1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415,
1477 -2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835,
1478 -2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169,
1479 -2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420,
1480 -2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590,
1481 -3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679,
1482 -3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692,
1483 -4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629,
1484 -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494,
1485 -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290
1486 };
1487 float *zlin = lins + 15*64;
1488 const float *w = g_win;
1489
1490 zlin[4*15] = xl[18*16];
1491 zlin[4*15 + 1] = xr[18*16];
1492 zlin[4*15 + 2] = xl[0];
1493 zlin[4*15 + 3] = xr[0];
1494
1495 zlin[4*31] = xl[1 + 18*16];
1496 zlin[4*31 + 1] = xr[1 + 18*16];
1497 zlin[4*31 + 2] = xl[1];
1498 zlin[4*31 + 3] = xr[1];
1499
1500 mp3d_synth_pair(dstr, nch, lins + 4*15 + 1);
1501 mp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1);
1502 mp3d_synth_pair(dstl, nch, lins + 4*15);
1503 mp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64);
1504
1505 #if HAVE_SIMD
1506 if (have_simd()) for (i = 14; i >= 0; i--)
1507 {
1508 #define VLOAD(k) f4 w0 = VSET(*w++); f4 w1 = VSET(*w++); f4 vz = VLD(&zlin[4*i - 64*k]); f4 vy = VLD(&zlin[4*i - 64*(15 - k)]);
1509 #define V0(k) { VLOAD(k) b = VADD(VMUL(vz, w1), VMUL(vy, w0)) ; a = VSUB(VMUL(vz, w0), VMUL(vy, w1)); }
1510 #define V1(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vz, w0), VMUL(vy, w1))); }
1511 #define V2(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vy, w1), VMUL(vz, w0))); }
1512 f4 a, b;
1513 zlin[4*i] = xl[18*(31 - i)];
1514 zlin[4*i + 1] = xr[18*(31 - i)];
1515 zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1516 zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1517 zlin[4*i + 64] = xl[1 + 18*(1 + i)];
1518 zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)];
1519 zlin[4*i - 64 + 2] = xl[18*(1 + i)];
1520 zlin[4*i - 64 + 3] = xr[18*(1 + i)];
1521
1522 V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7)
1523
1524 {
1525 #ifndef MINIMP3_FLOAT_OUTPUT
1526 #if HAVE_SSE
1527 static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
1528 static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
1529 __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
1530 _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1531 dstr[(15 - i)*nch] = _mm_extract_epi16(pcm8, 1);
1532 dstr[(17 + i)*nch] = _mm_extract_epi16(pcm8, 5);
1533 dstl[(15 - i)*nch] = _mm_extract_epi16(pcm8, 0);
1534 dstl[(17 + i)*nch] = _mm_extract_epi16(pcm8, 4);
1535 dstr[(47 - i)*nch] = _mm_extract_epi16(pcm8, 3);
1536 dstr[(49 + i)*nch] = _mm_extract_epi16(pcm8, 7);
1537 dstl[(47 - i)*nch] = _mm_extract_epi16(pcm8, 2);
1538 dstl[(49 + i)*nch] = _mm_extract_epi16(pcm8, 6);
1539 #else /* HAVE_SSE */
1540 int16x4_t pcma, pcmb;
1541 a = VADD(a, VSET(0.5f));
1542 b = VADD(b, VSET(0.5f));
1543 pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1544 pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1545 vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1);
1546 vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1);
1547 vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0);
1548 vst1_lane_s16(dstl + (17 + i)*nch, pcmb, 0);
1549 vst1_lane_s16(dstr + (47 - i)*nch, pcma, 3);
1550 vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3);
1551 vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2);
1552 vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2);
1553 #endif /* HAVE_SSE */
1554
1555 #else /* MINIMP3_FLOAT_OUTPUT */
1556
1557 static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };
1558 a = VMUL(a, g_scale);
1559 b = VMUL(b, g_scale);
1560 #if HAVE_SSE
1561 _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));
1562 _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
1563 _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));
1564 _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));
1565 _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));
1566 _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));
1567 _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));
1568 _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));
1569 #else /* HAVE_SSE */
1570 vst1q_lane_f32(dstr + (15 - i)*nch, a, 1);
1571 vst1q_lane_f32(dstr + (17 + i)*nch, b, 1);
1572 vst1q_lane_f32(dstl + (15 - i)*nch, a, 0);
1573 vst1q_lane_f32(dstl + (17 + i)*nch, b, 0);
1574 vst1q_lane_f32(dstr + (47 - i)*nch, a, 3);
1575 vst1q_lane_f32(dstr + (49 + i)*nch, b, 3);
1576 vst1q_lane_f32(dstl + (47 - i)*nch, a, 2);
1577 vst1q_lane_f32(dstl + (49 + i)*nch, b, 2);
1578 #endif /* HAVE_SSE */
1579 #endif /* MINIMP3_FLOAT_OUTPUT */
1580 }
1581 } else
1582 #endif /* HAVE_SIMD */
1583 #ifdef MINIMP3_ONLY_SIMD
1584 {}
1585 #else /* MINIMP3_ONLY_SIMD */
1586 for (i = 14; i >= 0; i--)
1587 {
1588 #define LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64];
1589 #define S0(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] = vz[j]*w1 + vy[j]*w0, a[j] = vz[j]*w0 - vy[j]*w1; }
1590 #define S1(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; }
1591 #define S2(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; }
1592 float a[4], b[4];
1593
1594 zlin[4*i] = xl[18*(31 - i)];
1595 zlin[4*i + 1] = xr[18*(31 - i)];
1596 zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1597 zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1598 zlin[4*(i + 16)] = xl[1 + 18*(1 + i)];
1599 zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)];
1600 zlin[4*(i - 16) + 2] = xl[18*(1 + i)];
1601 zlin[4*(i - 16) + 3] = xr[18*(1 + i)];
1602
1603 S0(0) S2(1) S1(2) S2(3) S1(4) S2(5) S1(6) S2(7)
1604
1605 dstr[(15 - i)*nch] = mp3d_scale_pcm(a[1]);
1606 dstr[(17 + i)*nch] = mp3d_scale_pcm(b[1]);
1607 dstl[(15 - i)*nch] = mp3d_scale_pcm(a[0]);
1608 dstl[(17 + i)*nch] = mp3d_scale_pcm(b[0]);
1609 dstr[(47 - i)*nch] = mp3d_scale_pcm(a[3]);
1610 dstr[(49 + i)*nch] = mp3d_scale_pcm(b[3]);
1611 dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]);
1612 dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]);
1613 }
1614 #endif /* MINIMP3_ONLY_SIMD */
1615 }
1616
mp3d_synth_granule(float * qmf_state,float * grbuf,int nbands,int nch,mp3d_sample_t * pcm,float * lins)1617 static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, mp3d_sample_t *pcm, float *lins)
1618 {
1619 int i;
1620 for (i = 0; i < nch; i++)
1621 {
1622 mp3d_DCT_II(grbuf + 576*i, nbands);
1623 }
1624
1625 memcpy(lins, qmf_state, sizeof(float)*15*64);
1626
1627 for (i = 0; i < nbands; i += 2)
1628 {
1629 mp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64);
1630 }
1631 #ifndef MINIMP3_NONSTANDARD_BUT_LOGICAL
1632 if (nch == 1)
1633 {
1634 for (i = 0; i < 15*64; i += 2)
1635 {
1636 qmf_state[i] = lins[nbands*64 + i];
1637 }
1638 } else
1639 #endif /* MINIMP3_NONSTANDARD_BUT_LOGICAL */
1640 {
1641 memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64);
1642 }
1643 }
1644
mp3d_match_frame(const uint8_t * hdr,int mp3_bytes,int frame_bytes)1645 static int mp3d_match_frame(const uint8_t *hdr, int mp3_bytes, int frame_bytes)
1646 {
1647 int i, nmatch;
1648 for (i = 0, nmatch = 0; nmatch < MAX_FRAME_SYNC_MATCHES; nmatch++)
1649 {
1650 i += hdr_frame_bytes(hdr + i, frame_bytes) + hdr_padding(hdr + i);
1651 if (i + HDR_SIZE > mp3_bytes)
1652 return nmatch > 0;
1653 if (!hdr_compare(hdr, hdr + i))
1654 return 0;
1655 }
1656 return 1;
1657 }
1658
mp3d_find_frame(const uint8_t * mp3,int mp3_bytes,int * free_format_bytes,int * ptr_frame_bytes)1659 static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes)
1660 {
1661 int i, k;
1662 for (i = 0; i < mp3_bytes - HDR_SIZE; i++, mp3++)
1663 {
1664 if (hdr_valid(mp3))
1665 {
1666 int frame_bytes = hdr_frame_bytes(mp3, *free_format_bytes);
1667 int frame_and_padding = frame_bytes + hdr_padding(mp3);
1668
1669 for (k = HDR_SIZE; !frame_bytes && k < MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - HDR_SIZE; k++)
1670 {
1671 if (hdr_compare(mp3, mp3 + k))
1672 {
1673 int fb = k - hdr_padding(mp3);
1674 int nextfb = fb + hdr_padding(mp3 + k);
1675 if (i + k + nextfb + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + k + nextfb))
1676 continue;
1677 frame_and_padding = k;
1678 frame_bytes = fb;
1679 *free_format_bytes = fb;
1680 }
1681 }
1682 if ((frame_bytes && i + frame_and_padding <= mp3_bytes &&
1683 mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) ||
1684 (!i && frame_and_padding == mp3_bytes))
1685 {
1686 *ptr_frame_bytes = frame_and_padding;
1687 return i;
1688 }
1689 *free_format_bytes = 0;
1690 }
1691 }
1692 *ptr_frame_bytes = 0;
1693 return mp3_bytes;
1694 }
1695
mp3dec_init(mp3dec_t * dec)1696 void mp3dec_init(mp3dec_t *dec)
1697 {
1698 dec->header[0] = 0;
1699 }
1700
mp3dec_decode_frame(mp3dec_t * dec,const uint8_t * mp3,int mp3_bytes,mp3d_sample_t * pcm,mp3dec_frame_info_t * info)1701 int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info)
1702 {
1703 int i = 0, igr, frame_size = 0, success = 1;
1704 const uint8_t *hdr;
1705 bs_t bs_frame[1];
1706 mp3dec_scratch_t scratch;
1707
1708 if (mp3_bytes > 4 && dec->header[0] == 0xff && hdr_compare(dec->header, mp3))
1709 {
1710 frame_size = hdr_frame_bytes(mp3, dec->free_format_bytes) + hdr_padding(mp3);
1711 if (frame_size != mp3_bytes && (frame_size + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + frame_size)))
1712 {
1713 frame_size = 0;
1714 }
1715 }
1716 if (!frame_size)
1717 {
1718 memset(dec, 0, sizeof(mp3dec_t));
1719 i = mp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);
1720 if (!frame_size || i + frame_size > mp3_bytes)
1721 {
1722 info->frame_bytes = i;
1723 return 0;
1724 }
1725 }
1726
1727 hdr = mp3 + i;
1728 memcpy(dec->header, hdr, HDR_SIZE);
1729 info->frame_bytes = i + frame_size;
1730 info->frame_offset = i;
1731 info->channels = HDR_IS_MONO(hdr) ? 1 : 2;
1732 info->hz = hdr_sample_rate_hz(hdr);
1733 info->layer = 4 - HDR_GET_LAYER(hdr);
1734 info->bitrate_kbps = hdr_bitrate_kbps(hdr);
1735
1736 if (!pcm)
1737 {
1738 return hdr_frame_samples(hdr);
1739 }
1740
1741 bs_init(bs_frame, hdr + HDR_SIZE, frame_size - HDR_SIZE);
1742 if (HDR_IS_CRC(hdr))
1743 {
1744 get_bits(bs_frame, 16);
1745 }
1746
1747 if (info->layer == 3)
1748 {
1749 int main_data_begin = L3_read_side_info(bs_frame, scratch.gr_info, hdr);
1750 if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit)
1751 {
1752 mp3dec_init(dec);
1753 return 0;
1754 }
1755 success = L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin);
1756 if (success)
1757 {
1758 for (igr = 0; igr < (HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm += 576*info->channels)
1759 {
1760 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1761 L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
1762 mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, pcm, scratch.syn[0]);
1763 }
1764 }
1765 L3_save_reservoir(dec, &scratch);
1766 } else
1767 {
1768 #ifdef MINIMP3_ONLY_MP3
1769 return 0;
1770 #else /* MINIMP3_ONLY_MP3 */
1771 L12_scale_info sci[1];
1772 L12_read_scale_info(hdr, bs_frame, sci);
1773
1774 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1775 for (i = 0, igr = 0; igr < 3; igr++)
1776 {
1777 if (12 == (i += L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
1778 {
1779 i = 0;
1780 L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
1781 mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, pcm, scratch.syn[0]);
1782 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1783 pcm += 384*info->channels;
1784 }
1785 if (bs_frame->pos > bs_frame->limit)
1786 {
1787 mp3dec_init(dec);
1788 return 0;
1789 }
1790 }
1791 #endif /* MINIMP3_ONLY_MP3 */
1792 }
1793 return success*hdr_frame_samples(dec->header);
1794 }
1795
1796 #ifdef MINIMP3_FLOAT_OUTPUT
mp3dec_f32_to_s16(const float * in,int16_t * out,int num_samples)1797 void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples)
1798 {
1799 int i = 0;
1800 #if HAVE_SIMD
1801 int aligned_count = num_samples & ~7;
1802 for(; i < aligned_count; i += 8)
1803 {
1804 static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f };
1805 f4 a = VMUL(VLD(&in[i ]), g_scale);
1806 f4 b = VMUL(VLD(&in[i+4]), g_scale);
1807 #if HAVE_SSE
1808 static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
1809 static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
1810 __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
1811 _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1812 out[i ] = _mm_extract_epi16(pcm8, 0);
1813 out[i+1] = _mm_extract_epi16(pcm8, 1);
1814 out[i+2] = _mm_extract_epi16(pcm8, 2);
1815 out[i+3] = _mm_extract_epi16(pcm8, 3);
1816 out[i+4] = _mm_extract_epi16(pcm8, 4);
1817 out[i+5] = _mm_extract_epi16(pcm8, 5);
1818 out[i+6] = _mm_extract_epi16(pcm8, 6);
1819 out[i+7] = _mm_extract_epi16(pcm8, 7);
1820 #else /* HAVE_SSE */
1821 int16x4_t pcma, pcmb;
1822 a = VADD(a, VSET(0.5f));
1823 b = VADD(b, VSET(0.5f));
1824 pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1825 pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1826 vst1_lane_s16(out+i , pcma, 0);
1827 vst1_lane_s16(out+i+1, pcma, 1);
1828 vst1_lane_s16(out+i+2, pcma, 2);
1829 vst1_lane_s16(out+i+3, pcma, 3);
1830 vst1_lane_s16(out+i+4, pcmb, 0);
1831 vst1_lane_s16(out+i+5, pcmb, 1);
1832 vst1_lane_s16(out+i+6, pcmb, 2);
1833 vst1_lane_s16(out+i+7, pcmb, 3);
1834 #endif /* HAVE_SSE */
1835 }
1836 #endif /* HAVE_SIMD */
1837 for(; i < num_samples; i++)
1838 {
1839 float sample = in[i] * 32768.0f;
1840 if (sample >= 32766.5)
1841 out[i] = (int16_t) 32767;
1842 else if (sample <= -32767.5)
1843 out[i] = (int16_t)-32768;
1844 else
1845 {
1846 int16_t s = (int16_t)(sample + .5f);
1847 s -= (s < 0); /* away from zero, to be compliant */
1848 out[i] = s;
1849 }
1850 }
1851 }
1852 #endif /* MINIMP3_FLOAT_OUTPUT */
1853 #endif /* MINIMP3_IMPLEMENTATION && !_MINIMP3_IMPLEMENTATION_GUARD */
1854