1 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
2 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
3
4 #include <assert.h>
5
6 #ifdef __cplusplus
7 extern "C" {
8 #endif
9
10 typedef struct
11 {
12 char *alloc_buffer;
13 int alloc_buffer_length_in_bytes;
14 } stb_vorbis_alloc;
15
16
17 /* FUNCTIONS USEABLE WITH ALL INPUT MODES */
18
19 typedef struct stb_vorbis stb_vorbis;
20
21 typedef struct
22 {
23 unsigned int sample_rate;
24 int channels;
25
26 unsigned int setup_memory_required;
27 unsigned int setup_temp_memory_required;
28 unsigned int temp_memory_required;
29
30 int max_frame_size;
31 } stb_vorbis_info;
32
33 /* get general information about the file */
34 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
35
36 /* get the last error detected (clears it, too) */
37 extern int stb_vorbis_get_error(stb_vorbis *f);
38
39 /* close an ogg vorbis file and free all memory in use */
40 extern void stb_vorbis_close(stb_vorbis *f);
41
42 /* this function returns the offset (in samples) from the beginning of the
43 * file that will be returned by the next decode, if it is known, or -1
44 * otherwise. after a flush_pushdata() call, this may take a while before
45 * it becomes valid again.
46 * NOT WORKING YET after a seek with PULLDATA API */
47 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
48
49 /* returns the current seek point within the file, or offset from the beginning
50 * of the memory buffer. In pushdata mode it returns 0. */
51 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
52
53 /* PULLING INPUT API */
54
55 #ifndef STB_VORBIS_NO_PULLDATA_API
56 /* This API assumes stb_vorbis is allowed to pull data from a source--
57 * either a block of memory containing the _entire_ vorbis stream, or a
58 * FILE * that you or it create, or possibly some other reading mechanism
59 * if you go modify the source to replace the FILE * case with some kind
60 * of callback to your code. (But if you don't support seeking, you may
61 * just want to go ahead and use pushdata.) */
62
63 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
64 int *error, stb_vorbis_alloc *alloc_buffer);
65 /* create an ogg vorbis decoder from an ogg vorbis stream in memory (note
66 * this must be the entire stream!). on failure, returns NULL and sets *error */
67
68 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
69 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
70 /* NOT WORKING YET
71 * these functions seek in the Vorbis file to (approximately) 'sample_number'.
72 * after calling seek_frame(), the next call to get_frame_*() will include
73 * the specified sample. after calling stb_vorbis_seek(), the next call to
74 * stb_vorbis_get_samples_* will start with the specified sample. If you
75 * do not need to seek to EXACTLY the target sample when using get_samples_*,
76 * you can also use seek_frame(). */
77
78 extern void stb_vorbis_seek_start(stb_vorbis *f);
79 /* this function is equivalent to stb_vorbis_seek(f,0), but it
80 * actually works */
81
82 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
83 extern float stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
84 /* these functions return the total length of the vorbis stream */
85
86 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
87 /* decode the next frame and return the number of samples. the number of
88 * channels returned are stored in *channels (which can be NULL--it is always
89 * the same as the number of channels reported by get_info). *output will
90 * contain an array of float* buffers, one per channel. These outputs will
91 * be overwritten on the next call to stb_vorbis_get_frame_*.
92 *
93 * You generally should not intermix calls to stb_vorbis_get_frame_*()
94 * and stb_vorbis_get_samples_*(), since the latter calls the former.
95 */
96
97 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
98 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
99 /* gets num_samples samples, not necessarily on a frame boundary--this requires
100 * buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
101 * Returns the number of samples stored per channel; it may be less than requested
102 * at the end of the file. If there are no more samples in the file, returns 0.
103 */
104
105 #endif
106
107 /* ERROR CODES */
108
109 enum STBVorbisError
110 {
111 VORBIS__no_error,
112
113 VORBIS_need_more_data=1, /* not a real error */
114
115 VORBIS_invalid_api_mixing, /* can't mix API modes */
116 VORBIS_outofmem, /* not enough memory */
117 VORBIS_feature_not_supported, /* uses floor 0 */
118 VORBIS_too_many_channels, /* STB_VORBIS_MAX_CHANNELS is too small */
119 VORBIS_file_open_failure, /* fopen() failed */
120 VORBIS_seek_without_length, /* can't seek in unknown-length file */
121
122 VORBIS_unexpected_eof=10, /* file is truncated? */
123 VORBIS_seek_invalid, /* seek past EOF */
124
125 /* decoding errors (corrupt/invalid stream) -- you probably
126 * don't care about the exact details of these */
127
128 /* vorbis errors: */
129 VORBIS_invalid_setup=20,
130 VORBIS_invalid_stream,
131
132 /* ogg errors: */
133 VORBIS_missing_capture_pattern=30,
134 VORBIS_invalid_stream_structure_version,
135 VORBIS_continued_packet_flag_invalid,
136 VORBIS_incorrect_stream_serial_number,
137 VORBIS_invalid_first_page,
138 VORBIS_bad_packet_type,
139 VORBIS_cant_find_last_page,
140 VORBIS_seek_failed
141 };
142
143
144 #ifdef __cplusplus
145 }
146 #endif
147
148 #endif /* STB_VORBIS_INCLUDE_STB_VORBIS_H */
149
150 #ifndef STB_VORBIS_HEADER_ONLY
151
152 /* global configuration settings (e.g. set these in the project/makefile),
153 * or just set them in this file at the top (although ideally the first few
154 * should be visible when the header file is compiled too, although it's not
155 * crucial)
156 */
157
158 /* STB_VORBIS_NO_PULLDATA_API
159 * does not compile the code for the non-pushdata APIs
160 */
161 #if 0
162 #define STB_VORBIS_NO_PULLDATA_API
163 #endif
164
165 /* STB_VORBIS_MAX_CHANNELS [number]
166 * globally define this to the maximum number of channels you need.
167 * The spec does not put a restriction on channels except that
168 * the count is stored in a byte, so 255 is the hard limit.
169 * Reducing this saves about 16 bytes per value, so using 16 saves
170 * (255-16)*16 or around 4KB. Plus anything other memory usage
171 * I forgot to account for. Can probably go as low as 8 (7.1 audio),
172 * 6 (5.1 audio), or 2 (stereo only).
173 */
174 #ifndef STB_VORBIS_MAX_CHANNELS
175 #define STB_VORBIS_MAX_CHANNELS 16 /* enough for anyone? */
176 #endif
177
178 /* STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
179 * sets the log size of the huffman-acceleration table. Maximum
180 * supported value is 24. with larger numbers, more decodings are O(1),
181 * but the table size is larger so worse cache missing, so you'll have
182 * to probe (and try multiple ogg vorbis files) to find the sweet spot.
183 */
184 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
185 #define STB_VORBIS_FAST_HUFFMAN_LENGTH 10
186 #endif
187
188 /* STB_VORBIS_FAST_BINARY_LENGTH [number]
189 * sets the log size of the binary-search acceleration table. this
190 * is used in similar fashion to the fast-huffman size to set initial
191 * parameters for the binary search
192
193 * STB_VORBIS_FAST_HUFFMAN_INT
194 * The fast huffman tables are much more efficient if they can be
195 * stored as 16-bit results instead of 32-bit results. This restricts
196 * the codebooks to having only 65535 possible outcomes, though.
197 * (At least, accelerated by the huffman table.)
198 */
199 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
200 #define STB_VORBIS_FAST_HUFFMAN_SHORT
201 #endif
202
203 /* STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
204 * If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
205 * back on binary searching for the correct one. This requires storing
206 * extra tables with the huffman codes in sorted order. Defining this
207 * symbol trades off space for speed by forcing a linear search in the
208 * non-fast case, except for "sparse" codebooks.
209 */
210 #if 0
211 #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
212 #endif
213
214 /* STB_VORBIS_CODEBOOK_SHORTS
215 * The vorbis file format encodes VQ codebook floats as ax+b where a and
216 * b are floating point per-codebook constants, and x is a 16-bit int.
217 * Normally, stb_vorbis decodes them to floats rather than leaving them
218 * as 16-bit ints and computing ax+b while decoding. This is a speed/space
219 * tradeoff; you can save space by defining this flag.
220 */
221 #ifndef STB_VORBIS_CODEBOOK_SHORTS
222 #define STB_VORBIS_CODEBOOK_FLOATS
223 #endif
224
225 #include <retro_inline.h>
226
227 #define MAX_BLOCKSIZE_LOG 13 /* from specification */
228 #define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG)
229
230 #ifndef TRUE
231 #define TRUE 1
232 #define FALSE 0
233 #endif
234
235 #ifdef STB_VORBIS_CODEBOOK_FLOATS
236 typedef float stb_vorbis_codetype;
237 #else
238 typedef uint16_t stb_vorbis_codetype;
239 #endif
240
241 /* @NOTE
242 *
243 * Some arrays below are tagged "//varies", which means it's actually
244 * a variable-sized piece of data, but rather than malloc I assume it's
245 * small enough it's better to just allocate it all together with the
246 * main thing
247 *
248 * Most of the variables are specified with the smallest size I could pack
249 * them into. It might give better performance to make them all full-sized
250 * integers. It should be safe to freely rearrange the structures or change
251 * the sizes larger--nothing relies on silently truncating etc., nor the
252 * order of variables.
253 */
254
255 #define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
256 #define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1)
257
258 typedef struct
259 {
260 int dimensions, entries;
261 uint8_t *codeword_lengths;
262 float minimum_value;
263 float delta_value;
264 uint8_t value_bits;
265 uint8_t lookup_type;
266 uint8_t sequence_p;
267 uint8_t sparse;
268 uint32_t lookup_values;
269 stb_vorbis_codetype *multiplicands;
270 uint32_t *codewords;
271 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
272 int16_t fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
273 #else
274 int32_t fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
275 #endif
276 uint32_t *sorted_codewords;
277 int *sorted_values;
278 int sorted_entries;
279 } Codebook;
280
281 typedef struct
282 {
283 uint8_t order;
284 uint16_t rate;
285 uint16_t bark_map_size;
286 uint8_t amplitude_bits;
287 uint8_t amplitude_offset;
288 uint8_t number_of_books;
289 uint8_t book_list[16]; /* varies */
290 } Floor0;
291
292 typedef struct
293 {
294 uint8_t partitions;
295 uint8_t partition_class_list[32]; /* varies */
296 uint8_t class_dimensions[16]; /* varies */
297 uint8_t class_subclasses[16]; /* varies */
298 uint8_t class_masterbooks[16]; /* varies */
299 int16_t subclass_books[16][8]; /* varies */
300 uint16_t Xlist[31*8+2]; /* varies */
301 uint8_t sorted_order[31*8+2];
302 uint8_t neighbors[31*8+2][2];
303 uint8_t floor1_multiplier;
304 uint8_t rangebits;
305 int values;
306 } Floor1;
307
308 typedef union
309 {
310 Floor0 floor0;
311 Floor1 floor1;
312 } Floor;
313
314 typedef struct
315 {
316 uint32_t begin, end;
317 uint32_t part_size;
318 uint8_t classifications;
319 uint8_t classbook;
320 uint8_t **classdata;
321 int16_t (*residue_books)[8];
322 } Residue;
323
324 typedef struct
325 {
326 uint8_t magnitude;
327 uint8_t angle;
328 uint8_t mux;
329 } MappingChannel;
330
331 typedef struct
332 {
333 uint16_t coupling_steps;
334 MappingChannel *chan;
335 uint8_t submaps;
336 uint8_t submap_floor[15]; /* varies */
337 uint8_t submap_residue[15]; /* varies */
338 } Mapping;
339
340 typedef struct
341 {
342 uint8_t blockflag;
343 uint8_t mapping;
344 uint16_t windowtype;
345 uint16_t transformtype;
346 } Mode;
347
348 typedef struct
349 {
350 uint32_t goal_crc; /* expected crc if match */
351 int bytes_left; /* bytes left in packet */
352 uint32_t crc_so_far; /* running crc */
353 int bytes_done; /* bytes processed in _current_ chunk */
354 uint32_t sample_loc; /* granule pos encoded in page */
355 } CRCscan;
356
357 typedef struct
358 {
359 uint32_t page_start, page_end;
360 uint32_t after_previous_page_start;
361 uint32_t first_decoded_sample;
362 uint32_t last_decoded_sample;
363 } ProbedPage;
364
365 struct stb_vorbis
366 {
367 /* user-accessible info */
368 unsigned int sample_rate;
369 int channels;
370
371 unsigned int setup_memory_required;
372 unsigned int temp_memory_required;
373 unsigned int setup_temp_memory_required;
374
375 uint8_t *stream;
376 uint8_t *stream_start;
377 uint8_t *stream_end;
378
379 uint32_t stream_len;
380
381 uint8_t push_mode;
382
383 uint32_t first_audio_page_offset;
384
385 ProbedPage p_first, p_last;
386
387 /* memory management */
388 stb_vorbis_alloc alloc;
389 int setup_offset;
390 int temp_offset;
391
392 /* run-time results */
393 int eof;
394 enum STBVorbisError error;
395
396 /* user-useful data */
397
398 /* header info */
399 int blocksize[2];
400 int blocksize_0, blocksize_1;
401 int codebook_count;
402 Codebook *codebooks;
403 int floor_count;
404 uint16_t floor_types[64]; /* varies */
405 Floor *floor_config;
406 int residue_count;
407 uint16_t residue_types[64]; /* varies */
408 Residue *residue_config;
409 int mapping_count;
410 Mapping *mapping;
411 int mode_count;
412 Mode mode_config[64]; /* varies */
413
414 uint32_t total_samples;
415
416 /* decode buffer */
417 float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
418 float *outputs [STB_VORBIS_MAX_CHANNELS];
419
420 float *previous_window[STB_VORBIS_MAX_CHANNELS];
421 int previous_length;
422
423 int16_t *finalY[STB_VORBIS_MAX_CHANNELS];
424
425 uint32_t current_loc; /* sample location of next frame to decode */
426 int current_loc_valid;
427
428 /* per-blocksize precomputed data */
429
430 /* twiddle factors */
431 float *A[2],*B[2],*C[2];
432 float *window[2];
433 uint16_t *bit_reverse[2];
434
435 /* current page/packet/segment streaming info */
436 uint32_t serial; /* stream serial number for verification */
437 int last_page;
438 int segment_count;
439 uint8_t segments[255];
440 uint8_t page_flag;
441 uint8_t bytes_in_seg;
442 uint8_t first_decode;
443 int next_seg;
444 int last_seg; /* flag that we're on the last segment */
445 int last_seg_which; /* what was the segment number of the last seg? */
446 uint32_t acc;
447 int valid_bits;
448 int packet_bytes;
449 int end_seg_with_known_loc;
450 uint32_t known_loc_for_packet;
451 int discard_samples_deferred;
452 uint32_t samples_output;
453
454 /* push mode scanning */
455 int page_crc_tests; /* only in push_mode: number of tests active; -1 if not searching */
456
457 /* sample-access */
458 int channel_buffer_start;
459 int channel_buffer_end;
460 };
461
462 #define IS_PUSH_MODE(f) FALSE
463
464 typedef struct stb_vorbis vorb;
465
error(vorb * f,enum STBVorbisError e)466 static int error(vorb *f, enum STBVorbisError e)
467 {
468 f->error = e;
469 if (!f->eof && e != VORBIS_need_more_data) {
470 f->error=e; /* breakpoint for debugging */
471 }
472 return 0;
473 }
474
475
476 /* these functions are used for allocating temporary memory
477 * while decoding. if you can afford the stack space, use
478 * alloca(); otherwise, provide a temp buffer and it will
479 * allocate out of those.
480 */
481
482 #define array_size_required(count,size) (count*(sizeof(void *)+(size)))
483
484 #define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
485 #define temp_alloc_save(f) ((f)->temp_offset)
486 #define temp_alloc_restore(f,p) ((f)->temp_offset = (p))
487
488 #define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
489
490 /* given a sufficiently large block of memory, make an array of pointers to subblocks of it */
make_block_array(void * mem,int count,int size)491 static void *make_block_array(void *mem, int count, int size)
492 {
493 int i;
494 void ** p = (void **) mem;
495 char *q = (char *) (p + count);
496 for (i=0; i < count; ++i) {
497 p[i] = q;
498 q += size;
499 }
500 return p;
501 }
502
setup_malloc(vorb * f,int sz)503 static void *setup_malloc(vorb *f, int sz)
504 {
505 sz = (sz+3) & ~3;
506 f->setup_memory_required += sz;
507 if (f->alloc.alloc_buffer) {
508 void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
509 if (f->setup_offset + sz > f->temp_offset) return NULL;
510 f->setup_offset += sz;
511 return p;
512 }
513 return sz ? malloc(sz) : NULL;
514 }
515
setup_free(vorb * f,void * p)516 static void setup_free(vorb *f, void *p)
517 {
518 if (f->alloc.alloc_buffer) return; /* do nothing; setup mem is not a stack */
519 free(p);
520 }
521
setup_temp_malloc(vorb * f,int sz)522 static void *setup_temp_malloc(vorb *f, int sz)
523 {
524 sz = (sz+3) & ~3;
525 if (f->alloc.alloc_buffer) {
526 if (f->temp_offset - sz < f->setup_offset) return NULL;
527 f->temp_offset -= sz;
528 return (char *) f->alloc.alloc_buffer + f->temp_offset;
529 }
530 return malloc(sz);
531 }
532
setup_temp_free(vorb * f,void * p,int sz)533 static void setup_temp_free(vorb *f, void *p, int sz)
534 {
535 if (f->alloc.alloc_buffer) {
536 f->temp_offset += (sz+3)&~3;
537 return;
538 }
539 free(p);
540 }
541
542 #define CRC32_POLY 0x04c11db7 /* from spec */
543
544 static uint32_t stb_vorbis_crc_table[256];
crc32_init(void)545 static void crc32_init(void)
546 {
547 int i,j;
548 uint32_t s;
549 for(i=0; i < 256; i++) {
550 for (s=i<<24, j=0; j < 8; ++j)
551 s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
552 stb_vorbis_crc_table[i] = s;
553 }
554 }
555
crc32_update(uint32_t crc,uint8_t byte)556 static INLINE uint32_t crc32_update(uint32_t crc, uint8_t byte)
557 {
558 return (crc << 8) ^ stb_vorbis_crc_table[byte ^ (crc >> 24)];
559 }
560
561 /* used in setup, and for huffman that doesn't go fast path */
bit_reverse(unsigned int n)562 static unsigned int bit_reverse(unsigned int n)
563 {
564 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1);
565 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2);
566 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4);
567 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8);
568 return (n >> 16) | (n << 16);
569 }
570
square(float x)571 static float square(float x)
572 {
573 return x*x;
574 }
575
576 /* this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
577 * as required by the specification. fast(?) implementation from stb.h
578 * @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
579 */
ilog(int32_t n)580 static int ilog(int32_t n)
581 {
582 static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
583
584 /* 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29) */
585 if (n < (1 << 14))
586 if (n < (1 << 4)) return 0 + log2_4[n ];
587 else if (n < (1 << 9)) return 5 + log2_4[n >> 5];
588 else return 10 + log2_4[n >> 10];
589 else if (n < (1 << 24))
590 if (n < (1 << 19)) return 15 + log2_4[n >> 15];
591 else return 20 + log2_4[n >> 20];
592 else if (n < (1 << 29)) return 25 + log2_4[n >> 25];
593 else if (n < (1 << 31)) return 30 + log2_4[n >> 30];
594 else return 0; /* signed n returns 0 */
595 }
596
597 #ifndef M_PI
598 #define M_PI 3.14159265358979323846264f /* from CRC */
599 #endif
600
601 /* code length assigned to a value with no huffman encoding */
602 #define NO_CODE 255
603
604 /* LEAF SETUP FUNCTIONS */
605
606 /* these functions are only called at setup, and only a few times
607 * per file */
608
float32_unpack(uint32_t x)609 static float float32_unpack(uint32_t x)
610 {
611 /* from the specification */
612 uint32_t mantissa = x & 0x1fffff;
613 uint32_t sign = x & 0x80000000;
614 uint32_t exp = (x & 0x7fe00000) >> 21;
615 double res = sign ? -(double)mantissa : (double)mantissa;
616 return (float) ldexp((float)res, exp-788);
617 }
618
619
620 /* zlib & jpeg huffman tables assume that the output symbols
621 * can either be arbitrarily arranged, or have monotonically
622 * increasing frequencies--they rely on the lengths being sorted;
623 * this makes for a very simple generation algorithm.
624 * vorbis allows a huffman table with non-sorted lengths. This
625 * requires a more sophisticated construction, since symbols in
626 * order do not map to huffman codes "in order".
627 */
add_entry(Codebook * c,uint32_t huff_code,int symbol,int count,int len,uint32_t * values)628 static void add_entry(Codebook *c, uint32_t huff_code, int symbol, int count, int len, uint32_t *values)
629 {
630 if (!c->sparse) {
631 c->codewords [symbol] = huff_code;
632 } else {
633 c->codewords [count] = huff_code;
634 c->codeword_lengths[count] = len;
635 values [count] = symbol;
636 }
637 }
638
compute_codewords(Codebook * c,uint8_t * len,int n,uint32_t * values)639 static int compute_codewords(Codebook *c, uint8_t *len, int n, uint32_t *values)
640 {
641 int i,k,m=0;
642 uint32_t available[32];
643
644 memset(available, 0, sizeof(available));
645 /* find the first entry */
646 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
647 if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
648 /* add to the list */
649 add_entry(c, 0, k, m++, len[k], values);
650 /* add all available leaves */
651 for (i=1; i <= len[k]; ++i)
652 available[i] = 1 << (32-i);
653 /* note that the above code treats the first case specially,
654 * but it's really the same as the following code, so they
655 * could probably be combined (except the initial code is 0,
656 * and I use 0 in available[] to mean 'empty') */
657 for (i=k+1; i < n; ++i) {
658 uint32_t res;
659 int z = len[i], y;
660 if (z == NO_CODE) continue;
661 /* find lowest available leaf (should always be earliest,
662 * which is what the specification calls for)
663 * note that this property, and the fact we can never have
664 * more than one free leaf at a given level, isn't totally
665 * trivial to prove, but it seems true and the assert never
666 * fires, so! */
667 while (z > 0 && !available[z]) --z;
668 if (z == 0) { assert(0); return FALSE; }
669 res = available[z];
670 available[z] = 0;
671 add_entry(c, bit_reverse(res), i, m++, len[i], values);
672 /* propogate availability up the tree */
673 if (z != len[i]) {
674 for (y=len[i]; y > z; --y) {
675 assert(available[y] == 0);
676 available[y] = res + (1 << (32-y));
677 }
678 }
679 }
680 return TRUE;
681 }
682
683 /* accelerated huffman table allows fast O(1) match of all symbols
684 * of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH */
compute_accelerated_huffman(Codebook * c)685 static void compute_accelerated_huffman(Codebook *c)
686 {
687 int i, len;
688 for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
689 c->fast_huffman[i] = -1;
690
691 len = c->sparse ? c->sorted_entries : c->entries;
692 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
693 if (len > 32767) len = 32767; /* largest possible value we can encode! */
694 #endif
695 for (i=0; i < len; ++i) {
696 if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
697 uint32_t z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
698 /* set table entries for all bit combinations in the higher bits */
699 while (z < FAST_HUFFMAN_TABLE_SIZE) {
700 c->fast_huffman[z] = i;
701 z += 1 << c->codeword_lengths[i];
702 }
703 }
704 }
705 }
706
707 #ifdef _MSC_VER
708 #define STBV_CDECL __cdecl
709 #else
710 #define STBV_CDECL
711 #endif
712
uint32_t_compare(const void * p,const void * q)713 static int STBV_CDECL uint32_t_compare(const void *p, const void *q)
714 {
715 uint32_t x = * (uint32_t *) p;
716 uint32_t y = * (uint32_t *) q;
717 return x < y ? -1 : x > y;
718 }
719
include_in_sort(Codebook * c,uint8_t len)720 static int include_in_sort(Codebook *c, uint8_t len)
721 {
722 if (c->sparse) { assert(len != NO_CODE); return TRUE; }
723 if (len == NO_CODE) return FALSE;
724 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
725 return FALSE;
726 }
727
728 /* if the fast table above doesn't work, we want to binary
729 * search them... need to reverse the bits */
compute_sorted_huffman(Codebook * c,uint8_t * lengths,uint32_t * values)730 static void compute_sorted_huffman(Codebook *c, uint8_t *lengths, uint32_t *values)
731 {
732 int i, len;
733 /* build a list of all the entries
734 * OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
735 * this is kind of a frivolous optimization--I don't see any performance improvement,
736 * but it's like 4 extra lines of code, so. */
737 if (!c->sparse) {
738 int k = 0;
739 for (i=0; i < c->entries; ++i)
740 if (include_in_sort(c, lengths[i]))
741 c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
742 assert(k == c->sorted_entries);
743 } else {
744 for (i=0; i < c->sorted_entries; ++i)
745 c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
746 }
747
748 qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_t_compare);
749 c->sorted_codewords[c->sorted_entries] = 0xffffffff;
750
751 len = c->sparse ? c->sorted_entries : c->entries;
752 /* now we need to indicate how they correspond; we could either
753 * #1: sort a different data structure that says who they correspond to
754 * #2: for each sorted entry, search the original list to find who corresponds
755 * #3: for each original entry, find the sorted entry
756 * #1 requires extra storage, #2 is slow, #3 can use binary search! */
757 for (i=0; i < len; ++i) {
758 int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
759 if (include_in_sort(c,huff_len)) {
760 uint32_t code = bit_reverse(c->codewords[i]);
761 int x=0, n=c->sorted_entries;
762 while (n > 1) {
763 /* invariant: sc[x] <= code < sc[x+n] */
764 int m = x + (n >> 1);
765 if (c->sorted_codewords[m] <= code) {
766 x = m;
767 n -= (n>>1);
768 } else {
769 n >>= 1;
770 }
771 }
772 assert(c->sorted_codewords[x] == code);
773 if (c->sparse) {
774 c->sorted_values[x] = values[i];
775 c->codeword_lengths[x] = huff_len;
776 } else {
777 c->sorted_values[x] = i;
778 }
779 }
780 }
781 }
782
783 /* only run while parsing the header (3 times) */
vorbis_validate(uint8_t * data)784 static int vorbis_validate(uint8_t *data)
785 {
786 static uint8_t vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
787 return memcmp(data, vorbis, 6) == 0;
788 }
789
790 /* called from setup only, once per code book
791 * (formula implied by specification) */
lookup1_values(int entries,int dim)792 static int lookup1_values(int entries, int dim)
793 {
794 int r = (int) floor(exp((float) log((float) entries) / dim));
795 if ((int) floor(pow((float) r+1, dim)) <= entries) /* (int) cast for MinGW warning; */
796 ++r; /* floor() to avoid _ftol() when non-CRT */
797 assert(pow((float) r+1, dim) > entries);
798 assert((int) floor(pow((float) r, dim)) <= entries); /* (int),floor() as above */
799 return r;
800 }
801
802 /* called twice per file */
compute_twiddle_factors(int n,float * A,float * B,float * C)803 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
804 {
805 int n4 = n >> 2, n8 = n >> 3;
806 int k,k2;
807
808 for (k=k2=0; k < n4; ++k,k2+=2) {
809 A[k2 ] = (float) cos(4*k*M_PI/n);
810 A[k2+1] = (float) -sin(4*k*M_PI/n);
811 B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f;
812 B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f;
813 }
814 for (k=k2=0; k < n8; ++k,k2+=2) {
815 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
816 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
817 }
818 }
819
compute_window(int n,float * window)820 static void compute_window(int n, float *window)
821 {
822 int n2 = n >> 1, i;
823 for (i=0; i < n2; ++i)
824 window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
825 }
826
compute_bitreverse(int n,uint16_t * rev)827 static void compute_bitreverse(int n, uint16_t *rev)
828 {
829 int ld = ilog(n) - 1; /* ilog is off-by-one from normal definitions */
830 int i, n8 = n >> 3;
831 for (i=0; i < n8; ++i)
832 rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
833 }
834
init_blocksize(vorb * f,int b,int n)835 static int init_blocksize(vorb *f, int b, int n)
836 {
837 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
838 f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
839 f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
840 f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
841 if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
842 compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
843 f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
844 if (!f->window[b]) return error(f, VORBIS_outofmem);
845 compute_window(n, f->window[b]);
846 f->bit_reverse[b] = (uint16_t *) setup_malloc(f, sizeof(uint16_t) * n8);
847 if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
848 compute_bitreverse(n, f->bit_reverse[b]);
849 return TRUE;
850 }
851
neighbors(uint16_t * x,int n,int * plow,int * phigh)852 static void neighbors(uint16_t *x, int n, int *plow, int *phigh)
853 {
854 int low = -1;
855 int high = 65536;
856 int i;
857 for (i=0; i < n; ++i) {
858 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; }
859 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
860 }
861 }
862
863 /* this has been repurposed so y is now the original index instead of y */
864 typedef struct
865 {
866 uint16_t x,y;
867 } STBV_Point;
868
point_compare(const void * p,const void * q)869 static int STBV_CDECL point_compare(const void *p, const void *q)
870 {
871 STBV_Point *a = (STBV_Point *) p;
872 STBV_Point *b = (STBV_Point *) q;
873 return a->x < b->x ? -1 : a->x > b->x;
874 }
875
876 /* END LEAF SETUP FUNCTIONS */
877
get8(vorb * z)878 static uint8_t get8(vorb *z)
879 {
880 if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
881 return *z->stream++;
882 }
883
get32(vorb * f)884 static uint32_t get32(vorb *f)
885 {
886 uint32_t x;
887 x = get8(f);
888 x += get8(f) << 8;
889 x += get8(f) << 16;
890 x += get8(f) << 24;
891 return x;
892 }
893
getn(vorb * z,uint8_t * data,int n)894 static int getn(vorb *z, uint8_t *data, int n)
895 {
896 if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
897 memcpy(data, z->stream, n);
898 z->stream += n;
899 return 1;
900 }
901
skip(vorb * z,int n)902 static void skip(vorb *z, int n)
903 {
904 z->stream += n;
905 if (z->stream >= z->stream_end) z->eof = 1;
906 return;
907 }
908
set_file_offset(stb_vorbis * f,unsigned int loc)909 static int set_file_offset(stb_vorbis *f, unsigned int loc)
910 {
911 f->eof = 0;
912 if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
913 f->stream = f->stream_end;
914 f->eof = 1;
915 return 0;
916 } else {
917 f->stream = f->stream_start + loc;
918 return 1;
919 }
920 }
921
922
923 static uint8_t ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
924
capture_pattern(vorb * f)925 static int capture_pattern(vorb *f)
926 {
927 if (0x4f != get8(f)) return FALSE;
928 if (0x67 != get8(f)) return FALSE;
929 if (0x67 != get8(f)) return FALSE;
930 if (0x53 != get8(f)) return FALSE;
931 return TRUE;
932 }
933
934 #define PAGEFLAG_continued_packet 1
935 #define PAGEFLAG_first_page 2
936 #define PAGEFLAG_last_page 4
937
start_page_no_capturepattern(vorb * f)938 static int start_page_no_capturepattern(vorb *f)
939 {
940 uint32_t loc0,loc1,n;
941 /* stream structure version */
942 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
943 /* header flag */
944 f->page_flag = get8(f);
945 /* absolute granule position */
946 loc0 = get32(f);
947 loc1 = get32(f);
948 /* @TODO: validate loc0,loc1 as valid positions?
949 * stream serial number -- vorbis doesn't interleave, so discard */
950 get32(f);
951 /*if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
952 * page sequence number */
953 n = get32(f);
954 f->last_page = n;
955 /* CRC32 */
956 get32(f);
957 /* page_segments */
958 f->segment_count = get8(f);
959 if (!getn(f, f->segments, f->segment_count))
960 return error(f, VORBIS_unexpected_eof);
961 /* assume we _don't_ know any the sample position of any segments */
962 f->end_seg_with_known_loc = -2;
963 if (loc0 != ~0U || loc1 != ~0U) {
964 int i;
965 /* determine which packet is the last one that will complete */
966 for (i=f->segment_count-1; i >= 0; --i)
967 if (f->segments[i] < 255)
968 break;
969 /* 'i' is now the index of the _last_ segment of a packet that ends */
970 if (i >= 0) {
971 f->end_seg_with_known_loc = i;
972 f->known_loc_for_packet = loc0;
973 }
974 }
975 if (f->first_decode) {
976 int i,len;
977 ProbedPage p;
978 len = 0;
979 for (i=0; i < f->segment_count; ++i)
980 len += f->segments[i];
981 len += 27 + f->segment_count;
982 p.page_start = f->first_audio_page_offset;
983 p.page_end = p.page_start + len;
984 p.after_previous_page_start = p.page_start;
985 p.first_decoded_sample = 0;
986 p.last_decoded_sample = loc0;
987 f->p_first = p;
988 }
989 f->next_seg = 0;
990 return TRUE;
991 }
992
start_page(vorb * f)993 static int start_page(vorb *f)
994 {
995 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
996 return start_page_no_capturepattern(f);
997 }
998
start_packet(vorb * f)999 static int start_packet(vorb *f)
1000 {
1001 while (f->next_seg == -1) {
1002 if (!start_page(f)) return FALSE;
1003 if (f->page_flag & PAGEFLAG_continued_packet)
1004 return error(f, VORBIS_continued_packet_flag_invalid);
1005 }
1006 f->last_seg = FALSE;
1007 f->valid_bits = 0;
1008 f->packet_bytes = 0;
1009 f->bytes_in_seg = 0;
1010 /* f->next_seg is now valid */
1011 return TRUE;
1012 }
1013
maybe_start_packet(vorb * f)1014 static int maybe_start_packet(vorb *f)
1015 {
1016 if (f->next_seg == -1) {
1017 int x = get8(f);
1018 if (f->eof) return FALSE; /* EOF at page boundary is not an error! */
1019 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern);
1020 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1021 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1022 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1023 if (!start_page_no_capturepattern(f)) return FALSE;
1024 if (f->page_flag & PAGEFLAG_continued_packet) {
1025 /* set up enough state that we can read this packet if we want,
1026 * e.g. during recovery */
1027 f->last_seg = FALSE;
1028 f->bytes_in_seg = 0;
1029 return error(f, VORBIS_continued_packet_flag_invalid);
1030 }
1031 }
1032 return start_packet(f);
1033 }
1034
next_segment(vorb * f)1035 static int next_segment(vorb *f)
1036 {
1037 int len;
1038 if (f->last_seg) return 0;
1039 if (f->next_seg == -1) {
1040 f->last_seg_which = f->segment_count-1; /* in case start_page fails */
1041 if (!start_page(f)) { f->last_seg = 1; return 0; }
1042 if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1043 }
1044 len = f->segments[f->next_seg++];
1045 if (len < 255) {
1046 f->last_seg = TRUE;
1047 f->last_seg_which = f->next_seg-1;
1048 }
1049 if (f->next_seg >= f->segment_count)
1050 f->next_seg = -1;
1051 assert(f->bytes_in_seg == 0);
1052 f->bytes_in_seg = len;
1053 return len;
1054 }
1055
1056 #define EOP (-1)
1057 #define INVALID_BITS (-1)
1058
get8_packet_raw(vorb * f)1059 static int get8_packet_raw(vorb *f)
1060 {
1061 if (!f->bytes_in_seg) { /* CLANG! */
1062 if (f->last_seg) return EOP;
1063 else if (!next_segment(f)) return EOP;
1064 }
1065 assert(f->bytes_in_seg > 0);
1066 --f->bytes_in_seg;
1067 ++f->packet_bytes;
1068 return get8(f);
1069 }
1070
get8_packet(vorb * f)1071 static int get8_packet(vorb *f)
1072 {
1073 int x = get8_packet_raw(f);
1074 f->valid_bits = 0;
1075 return x;
1076 }
1077
flush_packet(vorb * f)1078 static void flush_packet(vorb *f)
1079 {
1080 while (get8_packet_raw(f) != EOP);
1081 }
1082
1083 /* @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1084 * as the huffman decoder? */
get_bits(vorb * f,int n)1085 static uint32_t get_bits(vorb *f, int n)
1086 {
1087 uint32_t z;
1088
1089 if (f->valid_bits < 0) return 0;
1090 if (f->valid_bits < n) {
1091 if (n > 24) {
1092 /* the accumulator technique below would not work correctly in this case */
1093 z = get_bits(f, 24);
1094 z += get_bits(f, n-24) << 24;
1095 return z;
1096 }
1097 if (f->valid_bits == 0) f->acc = 0;
1098 while (f->valid_bits < n) {
1099 int z = get8_packet_raw(f);
1100 if (z == EOP) {
1101 f->valid_bits = INVALID_BITS;
1102 return 0;
1103 }
1104 f->acc += z << f->valid_bits;
1105 f->valid_bits += 8;
1106 }
1107 }
1108 if (f->valid_bits < 0) return 0;
1109 z = f->acc & ((1 << n)-1);
1110 f->acc >>= n;
1111 f->valid_bits -= n;
1112 return z;
1113 }
1114
1115 /* @OPTIMIZE: primary accumulator for huffman
1116 * expand the buffer to as many bits as possible without reading off end of packet
1117 * it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1118 * e.g. cache them locally and decode locally */
prep_huffman(vorb * f)1119 static INLINE void prep_huffman(vorb *f)
1120 {
1121 if (f->valid_bits <= 24) {
1122 if (f->valid_bits == 0) f->acc = 0;
1123 do {
1124 int z;
1125 if (f->last_seg && !f->bytes_in_seg) return;
1126 z = get8_packet_raw(f);
1127 if (z == EOP) return;
1128 f->acc += z << f->valid_bits;
1129 f->valid_bits += 8;
1130 } while (f->valid_bits <= 24);
1131 }
1132 }
1133
1134 enum
1135 {
1136 VORBIS_packet_id = 1,
1137 VORBIS_packet_comment = 3,
1138 VORBIS_packet_setup = 5
1139 };
1140
codebook_decode_scalar_raw(vorb * f,Codebook * c)1141 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1142 {
1143 int i;
1144 prep_huffman(f);
1145
1146 assert(c->sorted_codewords || c->codewords);
1147 /* cases to use binary search: sorted_codewords && !c->codewords
1148 * sorted_codewords && c->entries > 8 */
1149 if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1150 /* binary search */
1151 uint32_t code = bit_reverse(f->acc);
1152 int x=0, n=c->sorted_entries, len;
1153
1154 while (n > 1) {
1155 /* invariant: sc[x] <= code < sc[x+n] */
1156 int m = x + (n >> 1);
1157 if (c->sorted_codewords[m] <= code) {
1158 x = m;
1159 n -= (n>>1);
1160 } else {
1161 n >>= 1;
1162 }
1163 }
1164 /* x is now the sorted index */
1165 if (!c->sparse) x = c->sorted_values[x];
1166 /* x is now sorted index if sparse, or symbol otherwise */
1167 len = c->codeword_lengths[x];
1168 if (f->valid_bits >= len) {
1169 f->acc >>= len;
1170 f->valid_bits -= len;
1171 return x;
1172 }
1173
1174 f->valid_bits = 0;
1175 return -1;
1176 }
1177
1178 /* if small, linear search */
1179 assert(!c->sparse);
1180 for (i=0; i < c->entries; ++i) {
1181 if (c->codeword_lengths[i] == NO_CODE) continue;
1182 if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1183 if (f->valid_bits >= c->codeword_lengths[i]) {
1184 f->acc >>= c->codeword_lengths[i];
1185 f->valid_bits -= c->codeword_lengths[i];
1186 return i;
1187 }
1188 f->valid_bits = 0;
1189 return -1;
1190 }
1191 }
1192
1193 error(f, VORBIS_invalid_stream);
1194 f->valid_bits = 0;
1195 return -1;
1196 }
1197
1198
codebook_decode_scalar(vorb * f,Codebook * c)1199 static int codebook_decode_scalar(vorb *f, Codebook *c)
1200 {
1201 int i;
1202 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1203 prep_huffman(f);
1204 /* fast huffman table lookup */
1205 i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1206 i = c->fast_huffman[i];
1207 if (i >= 0) {
1208 f->acc >>= c->codeword_lengths[i];
1209 f->valid_bits -= c->codeword_lengths[i];
1210 if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1211 return i;
1212 }
1213 return codebook_decode_scalar_raw(f,c);
1214 }
1215
1216 #define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c);
1217
1218 #define DECODE(var,f,c) \
1219 DECODE_RAW(var,f,c) \
1220 if (c->sparse) var = c->sorted_values[var];
1221
1222 #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c)
1223
1224 /* CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1225 * where we avoid one addition */
1226 #ifndef STB_VORBIS_CODEBOOK_FLOATS
1227 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off] * c->delta_value + c->minimum_value)
1228 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off] * c->delta_value)
1229 #define CODEBOOK_ELEMENT_BASE(c) (c->minimum_value)
1230 #else
1231 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off])
1232 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off])
1233 #define CODEBOOK_ELEMENT_BASE(c) (0)
1234 #endif
1235
codebook_decode_start(vorb * f,Codebook * c)1236 static int codebook_decode_start(vorb *f, Codebook *c)
1237 {
1238 int z = -1;
1239
1240 /* type 0 is only legal in a scalar context */
1241 if (c->lookup_type == 0)
1242 error(f, VORBIS_invalid_stream);
1243 else {
1244 DECODE_VQ(z,f,c);
1245 if (c->sparse) assert(z < c->sorted_entries);
1246 if (z < 0) { /* check for EOP */
1247 if (!f->bytes_in_seg)
1248 if (f->last_seg)
1249 return z;
1250 error(f, VORBIS_invalid_stream);
1251 }
1252 }
1253 return z;
1254 }
1255
codebook_decode(vorb * f,Codebook * c,float * output,int len)1256 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1257 {
1258 int i,z = codebook_decode_start(f,c);
1259 if (z < 0) return FALSE;
1260 if (len > c->dimensions) len = c->dimensions;
1261
1262 z *= c->dimensions;
1263 if (c->sequence_p) {
1264 float last = CODEBOOK_ELEMENT_BASE(c);
1265 for (i=0; i < len; ++i) {
1266 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1267 output[i] += val;
1268 last = val + c->minimum_value;
1269 }
1270 } else {
1271 float last = CODEBOOK_ELEMENT_BASE(c);
1272 for (i=0; i < len; ++i) {
1273 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1274 }
1275 }
1276
1277 return TRUE;
1278 }
1279
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1280 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1281 {
1282 int i,z = codebook_decode_start(f,c);
1283 float last = CODEBOOK_ELEMENT_BASE(c);
1284 if (z < 0) return FALSE;
1285 if (len > c->dimensions) len = c->dimensions;
1286
1287 z *= c->dimensions;
1288 for (i=0; i < len; ++i) {
1289 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1290 output[i*step] += val;
1291 if (c->sequence_p) last = val;
1292 }
1293
1294 return TRUE;
1295 }
1296
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1297 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1298 {
1299 int c_inter = *c_inter_p;
1300 int p_inter = *p_inter_p;
1301 int i,z, effective = c->dimensions;
1302
1303 /* type 0 is only legal in a scalar context */
1304 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1305
1306 while (total_decode > 0) {
1307 float last = CODEBOOK_ELEMENT_BASE(c);
1308 DECODE_VQ(z,f,c);
1309 assert(!c->sparse || z < c->sorted_entries);
1310 if (z < 0) {
1311 if (!f->bytes_in_seg)
1312 if (f->last_seg) return FALSE;
1313 return error(f, VORBIS_invalid_stream);
1314 }
1315
1316 /* if this will take us off the end of the buffers, stop short!
1317 * we check by computing the length of the virtual interleaved
1318 * buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1319 * and the length we'll be using (effective) */
1320 if (c_inter + p_inter*ch + effective > len * ch) {
1321 effective = len*ch - (p_inter*ch - c_inter);
1322 }
1323
1324 z *= c->dimensions;
1325 if (c->sequence_p) {
1326 for (i=0; i < effective; ++i) {
1327 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1328 if (outputs[c_inter])
1329 outputs[c_inter][p_inter] += val;
1330 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1331 last = val;
1332 }
1333 } else {
1334 for (i=0; i < effective; ++i) {
1335 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1336 if (outputs[c_inter])
1337 outputs[c_inter][p_inter] += val;
1338 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1339 }
1340 }
1341
1342 total_decode -= effective;
1343 }
1344 *c_inter_p = c_inter;
1345 *p_inter_p = p_inter;
1346 return TRUE;
1347 }
1348
codebook_decode_deinterleave_repeat_2(vorb * f,Codebook * c,float ** outputs,int * c_inter_p,int * p_inter_p,int len,int total_decode)1349 static int codebook_decode_deinterleave_repeat_2(vorb *f, Codebook *c, float **outputs, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1350 {
1351 int c_inter = *c_inter_p;
1352 int p_inter = *p_inter_p;
1353 int i,z, effective = c->dimensions;
1354
1355 /* type 0 is only legal in a scalar context */
1356 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1357
1358 while (total_decode > 0) {
1359 float last = CODEBOOK_ELEMENT_BASE(c);
1360 DECODE_VQ(z,f,c);
1361
1362 if (z < 0) {
1363 if (!f->bytes_in_seg)
1364 if (f->last_seg) return FALSE;
1365 return error(f, VORBIS_invalid_stream);
1366 }
1367
1368 /* if this will take us off the end of the buffers, stop short!
1369 * we check by computing the length of the virtual interleaved
1370 * buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1371 * and the length we'll be using (effective)
1372 */
1373 if (c_inter + p_inter*2 + effective > len * 2) {
1374 effective = len*2 - (p_inter*2 - c_inter);
1375 }
1376
1377 {
1378 z *= c->dimensions;
1379 if (c->sequence_p) {
1380 /* haven't optimized this case because I don't have any examples */
1381 for (i=0; i < effective; ++i) {
1382 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1383 if (outputs[c_inter])
1384 outputs[c_inter][p_inter] += val;
1385 if (++c_inter == 2) { c_inter = 0; ++p_inter; }
1386 last = val;
1387 }
1388 } else {
1389 i=0;
1390 if (c_inter == 1) {
1391 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1392 if (outputs[c_inter])
1393 outputs[c_inter][p_inter] += val;
1394 c_inter = 0; ++p_inter;
1395 ++i;
1396 }
1397 {
1398 float *z0 = outputs[0];
1399 float *z1 = outputs[1];
1400 for (; i+1 < effective;) {
1401 float v0 = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1402 float v1 = CODEBOOK_ELEMENT_FAST(c,z+i+1) + last;
1403 if (z0)
1404 z0[p_inter] += v0;
1405 if (z1)
1406 z1[p_inter] += v1;
1407 ++p_inter;
1408 i += 2;
1409 }
1410 }
1411 if (i < effective) {
1412 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1413 if (outputs[c_inter])
1414 outputs[c_inter][p_inter] += val;
1415 if (++c_inter == 2) { c_inter = 0; ++p_inter; }
1416 }
1417 }
1418 }
1419
1420 total_decode -= effective;
1421 }
1422 *c_inter_p = c_inter;
1423 *p_inter_p = p_inter;
1424 return TRUE;
1425 }
1426
predict_point(int x,int x0,int x1,int y0,int y1)1427 static int predict_point(int x, int x0, int x1, int y0, int y1)
1428 {
1429 int dy = y1 - y0;
1430 int adx = x1 - x0;
1431 /* @OPTIMIZE: force int division to round in the right direction... is this necessary on x86? */
1432 int err = abs(dy) * (x - x0);
1433 int off = err / adx;
1434 return dy < 0 ? y0 - off : y0 + off;
1435 }
1436
1437 /* the following table is block-copied from the specification */
1438 static float inverse_db_table[256] =
1439 {
1440 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1441 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1442 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1443 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1444 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1445 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1446 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1447 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1448 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1449 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1450 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1451 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1452 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1453 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1454 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1455 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1456 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1457 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1458 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1459 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1460 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1461 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1462 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1463 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1464 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1465 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1466 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1467 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1468 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1469 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1470 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1471 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1472 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1473 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1474 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1475 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1476 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f,
1477 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f,
1478 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f,
1479 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f,
1480 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f,
1481 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f,
1482 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f,
1483 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f,
1484 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f,
1485 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f,
1486 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f,
1487 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f,
1488 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f,
1489 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f,
1490 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f,
1491 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f,
1492 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f,
1493 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f,
1494 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f,
1495 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f,
1496 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f,
1497 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f,
1498 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f,
1499 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f,
1500 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f,
1501 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f,
1502 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f,
1503 0.82788260f, 0.88168307f, 0.9389798f, 1.0f
1504 };
1505
1506
1507 /* @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1508 * note that you must produce bit-identical output to decode correctly;
1509 * this specific sequence of operations is specified in the spec (it's
1510 * drawing integer-quantized frequency-space lines that the encoder
1511 * expects to be exactly the same)
1512 * ... also, isn't the whole point of Bresenham's algorithm to NOT
1513 * have to divide in the setup? sigh.
1514 */
1515 #define LINE_OP(a,b) a *= b
1516
draw_line(float * output,int x0,int y0,int x1,int y1,int n)1517 static INLINE void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1518 {
1519 int dy = y1 - y0;
1520 int adx = x1 - x0;
1521 int ady = abs(dy);
1522 int x=x0,y=y0;
1523 int err = 0;
1524 int sy;
1525 int base = dy / adx;
1526
1527 if (dy < 0)
1528 sy = base - 1;
1529 else
1530 sy = base+1;
1531
1532 ady -= abs(base) * adx;
1533 if (x1 > n) x1 = n;
1534 LINE_OP(output[x], inverse_db_table[y]);
1535 for (++x; x < x1; ++x) {
1536 err += ady;
1537 if (err >= adx) {
1538 err -= adx;
1539 y += sy;
1540 } else
1541 y += base;
1542 LINE_OP(output[x], inverse_db_table[y]);
1543 }
1544 }
1545
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)1546 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
1547 {
1548 int k;
1549 if (rtype == 0) {
1550 int step = n / book->dimensions;
1551 for (k=0; k < step; ++k)
1552 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
1553 return FALSE;
1554 } else {
1555 for (k=0; k < n; ) {
1556 if (!codebook_decode(f, book, target+offset, n-k))
1557 return FALSE;
1558 k += book->dimensions;
1559 offset += book->dimensions;
1560 }
1561 }
1562 return TRUE;
1563 }
1564
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8_t * do_not_decode)1565 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8_t *do_not_decode)
1566 {
1567 int i,j,pass;
1568 Residue *r = f->residue_config + rn;
1569 int rtype = f->residue_types[rn];
1570 int c = r->classbook;
1571 int classwords = f->codebooks[c].dimensions;
1572 int n_read = r->end - r->begin;
1573 int part_read = n_read / r->part_size;
1574 int temp_alloc_point = temp_alloc_save(f);
1575 uint8_t ***part_classdata = (uint8_t ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
1576
1577 for (i=0; i < ch; ++i)
1578 if (!do_not_decode[i])
1579 memset(residue_buffers[i], 0, sizeof(float) * n);
1580
1581 if (rtype == 2 && ch != 1) {
1582 for (j=0; j < ch; ++j)
1583 if (!do_not_decode[j])
1584 break;
1585 if (j == ch)
1586 goto done;
1587
1588 for (pass=0; pass < 8; ++pass) {
1589 int pcount = 0, class_set = 0;
1590 if (ch == 2) {
1591 while (pcount < part_read) {
1592 int z = r->begin + pcount*r->part_size;
1593 int c_inter = (z & 1), p_inter = z>>1;
1594 if (pass == 0) {
1595 Codebook *c = f->codebooks+r->classbook;
1596 int q;
1597 DECODE(q,f,c);
1598 if (q == EOP) goto done;
1599 part_classdata[0][class_set] = r->classdata[q];
1600 }
1601 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1602 int z = r->begin + pcount*r->part_size;
1603 int c = part_classdata[0][class_set][i];
1604 int b = r->residue_books[c][pass];
1605 if (b >= 0) {
1606 Codebook *book = f->codebooks + b;
1607 /* saves 1% */
1608 if (!codebook_decode_deinterleave_repeat_2(f, book, residue_buffers, &c_inter, &p_inter, n, r->part_size))
1609 goto done;
1610 } else {
1611 z += r->part_size;
1612 c_inter = z & 1;
1613 p_inter = z >> 1;
1614 }
1615 }
1616 ++class_set;
1617 }
1618 } else if (ch == 1) {
1619 while (pcount < part_read) {
1620 int z = r->begin + pcount*r->part_size;
1621 int c_inter = 0, p_inter = z;
1622 if (pass == 0) {
1623 Codebook *c = f->codebooks+r->classbook;
1624 int q;
1625 DECODE(q,f,c);
1626 if (q == EOP) goto done;
1627 part_classdata[0][class_set] = r->classdata[q];
1628 }
1629 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1630 int z = r->begin + pcount*r->part_size;
1631 int c = part_classdata[0][class_set][i];
1632 int b = r->residue_books[c][pass];
1633 if (b >= 0) {
1634 Codebook *book = f->codebooks + b;
1635 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
1636 goto done;
1637 } else {
1638 z += r->part_size;
1639 c_inter = 0;
1640 p_inter = z;
1641 }
1642 }
1643 ++class_set;
1644 }
1645 } else {
1646 while (pcount < part_read) {
1647 int z = r->begin + pcount*r->part_size;
1648 int c_inter = z % ch, p_inter = z/ch;
1649 if (pass == 0) {
1650 Codebook *c = f->codebooks+r->classbook;
1651 int q;
1652 DECODE(q,f,c);
1653 if (q == EOP) goto done;
1654 part_classdata[0][class_set] = r->classdata[q];
1655 }
1656 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1657 int z = r->begin + pcount*r->part_size;
1658 int c = part_classdata[0][class_set][i];
1659 int b = r->residue_books[c][pass];
1660 if (b >= 0) {
1661 Codebook *book = f->codebooks + b;
1662 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
1663 goto done;
1664 } else {
1665 z += r->part_size;
1666 c_inter = z % ch;
1667 p_inter = z / ch;
1668 }
1669 }
1670 ++class_set;
1671 }
1672 }
1673 }
1674 goto done;
1675 }
1676
1677 for (pass=0; pass < 8; ++pass) {
1678 int pcount = 0, class_set=0;
1679 while (pcount < part_read) {
1680 if (pass == 0) {
1681 for (j=0; j < ch; ++j) {
1682 if (!do_not_decode[j]) {
1683 Codebook *c = f->codebooks+r->classbook;
1684 int temp;
1685 DECODE(temp,f,c);
1686 if (temp == EOP) goto done;
1687 part_classdata[j][class_set] = r->classdata[temp];
1688 }
1689 }
1690 }
1691 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1692 for (j=0; j < ch; ++j) {
1693 if (!do_not_decode[j]) {
1694 int c = part_classdata[j][class_set][i];
1695 int b = r->residue_books[c][pass];
1696 if (b >= 0) {
1697 float *target = residue_buffers[j];
1698 int offset = r->begin + pcount * r->part_size;
1699 int n = r->part_size;
1700 Codebook *book = f->codebooks + b;
1701 if (!residue_decode(f, book, target, offset, n, rtype))
1702 goto done;
1703 }
1704 }
1705 }
1706 }
1707 ++class_set;
1708 }
1709 }
1710 done:
1711 temp_alloc_restore(f,temp_alloc_point);
1712 }
1713
1714
1715 #ifndef LIBVORBIS_MDCT
1716 #define LIBVORBIS_MDCT 0
1717 #endif
1718
1719 #if LIBVORBIS_MDCT
1720 /* directly call the vorbis MDCT using an interface documented
1721 * by Jeff Roberts... useful for performance comparison */
1722 typedef struct
1723 {
1724 int n;
1725 int log2n;
1726
1727 float *trig;
1728 int *bitrev;
1729
1730 float scale;
1731 } mdct_lookup;
1732
1733 extern void mdct_init(mdct_lookup *lookup, int n);
1734 extern void mdct_clear(mdct_lookup *l);
1735 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
1736
1737 mdct_lookup M1,M2;
1738
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)1739 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
1740 {
1741 mdct_lookup *M;
1742 if (M1.n == n) M = &M1;
1743 else if (M2.n == n) M = &M2;
1744 else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
1745 else {
1746 if (M2.n) __asm int 3;
1747 mdct_init(&M2, n);
1748 M = &M2;
1749 }
1750
1751 mdct_backward(M, buffer, buffer);
1752 }
1753 #endif
1754
1755
1756 /* the following were split out into separate functions while optimizing;
1757 * they could be pushed back up but eh. __forceinline showed no change;
1758 * they're probably already being inlined. */
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)1759 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
1760 {
1761 float *ee0 = e + i_off;
1762 float *ee2 = ee0 + k_off;
1763 int i;
1764
1765 assert((n & 3) == 0);
1766 for (i=(n>>2); i > 0; --i) {
1767 float k00_20, k01_21;
1768 k00_20 = ee0[ 0] - ee2[ 0];
1769 k01_21 = ee0[-1] - ee2[-1];
1770 ee0[ 0] += ee2[ 0];/*ee0[ 0] = ee0[ 0] + ee2[ 0]; */
1771 ee0[-1] += ee2[-1];/*ee0[-1] = ee0[-1] + ee2[-1]; */
1772 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
1773 ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
1774 A += 8;
1775
1776 k00_20 = ee0[-2] - ee2[-2];
1777 k01_21 = ee0[-3] - ee2[-3];
1778 ee0[-2] += ee2[-2];/*ee0[-2] = ee0[-2] + ee2[-2]; */
1779 ee0[-3] += ee2[-3];/*ee0[-3] = ee0[-3] + ee2[-3]; */
1780 ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
1781 ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
1782 A += 8;
1783
1784 k00_20 = ee0[-4] - ee2[-4];
1785 k01_21 = ee0[-5] - ee2[-5];
1786 ee0[-4] += ee2[-4];/*ee0[-4] = ee0[-4] + ee2[-4]; */
1787 ee0[-5] += ee2[-5];/*ee0[-5] = ee0[-5] + ee2[-5]; */
1788 ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
1789 ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
1790 A += 8;
1791
1792 k00_20 = ee0[-6] - ee2[-6];
1793 k01_21 = ee0[-7] - ee2[-7];
1794 ee0[-6] += ee2[-6];/*ee0[-6] = ee0[-6] + ee2[-6]; */
1795 ee0[-7] += ee2[-7];/*ee0[-7] = ee0[-7] + ee2[-7]; */
1796 ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
1797 ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
1798 A += 8;
1799 ee0 -= 8;
1800 ee2 -= 8;
1801 }
1802 }
1803
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)1804 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
1805 {
1806 int i;
1807 float k00_20, k01_21;
1808
1809 float *e0 = e + d0;
1810 float *e2 = e0 + k_off;
1811
1812 for (i=lim >> 2; i > 0; --i) {
1813 k00_20 = e0[-0] - e2[-0];
1814 k01_21 = e0[-1] - e2[-1];
1815 e0[-0] += e2[-0];/*e0[-0] = e0[-0] + e2[-0]; */
1816 e0[-1] += e2[-1];/*e0[-1] = e0[-1] + e2[-1]; */
1817 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
1818 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
1819
1820 A += k1;
1821
1822 k00_20 = e0[-2] - e2[-2];
1823 k01_21 = e0[-3] - e2[-3];
1824 e0[-2] += e2[-2];/*e0[-2] = e0[-2] + e2[-2]; */
1825 e0[-3] += e2[-3];/*e0[-3] = e0[-3] + e2[-3]; */
1826 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
1827 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
1828
1829 A += k1;
1830
1831 k00_20 = e0[-4] - e2[-4];
1832 k01_21 = e0[-5] - e2[-5];
1833 e0[-4] += e2[-4];/*e0[-4] = e0[-4] + e2[-4]; */
1834 e0[-5] += e2[-5];/*e0[-5] = e0[-5] + e2[-5]; */
1835 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
1836 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
1837
1838 A += k1;
1839
1840 k00_20 = e0[-6] - e2[-6];
1841 k01_21 = e0[-7] - e2[-7];
1842 e0[-6] += e2[-6];/*e0[-6] = e0[-6] + e2[-6]; */
1843 e0[-7] += e2[-7];/*e0[-7] = e0[-7] + e2[-7]; */
1844 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
1845 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
1846
1847 e0 -= 8;
1848 e2 -= 8;
1849
1850 A += k1;
1851 }
1852 }
1853
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)1854 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
1855 {
1856 int i;
1857 float A0 = A[0];
1858 float A1 = A[0+1];
1859 float A2 = A[0+a_off];
1860 float A3 = A[0+a_off+1];
1861 float A4 = A[0+a_off*2+0];
1862 float A5 = A[0+a_off*2+1];
1863 float A6 = A[0+a_off*3+0];
1864 float A7 = A[0+a_off*3+1];
1865
1866 float k00,k11;
1867
1868 float *ee0 = e +i_off;
1869 float *ee2 = ee0+k_off;
1870
1871 for (i=n; i > 0; --i) {
1872 k00 = ee0[ 0] - ee2[ 0];
1873 k11 = ee0[-1] - ee2[-1];
1874 ee0[ 0] = ee0[ 0] + ee2[ 0];
1875 ee0[-1] = ee0[-1] + ee2[-1];
1876 ee2[ 0] = (k00) * A0 - (k11) * A1;
1877 ee2[-1] = (k11) * A0 + (k00) * A1;
1878
1879 k00 = ee0[-2] - ee2[-2];
1880 k11 = ee0[-3] - ee2[-3];
1881 ee0[-2] = ee0[-2] + ee2[-2];
1882 ee0[-3] = ee0[-3] + ee2[-3];
1883 ee2[-2] = (k00) * A2 - (k11) * A3;
1884 ee2[-3] = (k11) * A2 + (k00) * A3;
1885
1886 k00 = ee0[-4] - ee2[-4];
1887 k11 = ee0[-5] - ee2[-5];
1888 ee0[-4] = ee0[-4] + ee2[-4];
1889 ee0[-5] = ee0[-5] + ee2[-5];
1890 ee2[-4] = (k00) * A4 - (k11) * A5;
1891 ee2[-5] = (k11) * A4 + (k00) * A5;
1892
1893 k00 = ee0[-6] - ee2[-6];
1894 k11 = ee0[-7] - ee2[-7];
1895 ee0[-6] = ee0[-6] + ee2[-6];
1896 ee0[-7] = ee0[-7] + ee2[-7];
1897 ee2[-6] = (k00) * A6 - (k11) * A7;
1898 ee2[-7] = (k11) * A6 + (k00) * A7;
1899
1900 ee0 -= k0;
1901 ee2 -= k0;
1902 }
1903 }
1904
iter_54(float * z)1905 static INLINE void iter_54(float *z)
1906 {
1907 float k00,k11,k22,k33;
1908 float y0,y1,y2,y3;
1909
1910 k00 = z[ 0] - z[-4];
1911 y0 = z[ 0] + z[-4];
1912 y2 = z[-2] + z[-6];
1913 k22 = z[-2] - z[-6];
1914
1915 z[-0] = y0 + y2; /* z0 + z4 + z2 + z6 */
1916 z[-2] = y0 - y2; /* z0 + z4 - z2 - z6 */
1917
1918 /* done with y0,y2 */
1919
1920 k33 = z[-3] - z[-7];
1921
1922 z[-4] = k00 + k33; /* z0 - z4 + z3 - z7 */
1923 z[-6] = k00 - k33; /* z0 - z4 - z3 + z7 */
1924
1925 /* done with k33 */
1926
1927 k11 = z[-1] - z[-5];
1928 y1 = z[-1] + z[-5];
1929 y3 = z[-3] + z[-7];
1930
1931 z[-1] = y1 + y3; /* z1 + z5 + z3 + z7 */
1932 z[-3] = y1 - y3; /* z1 + z5 - z3 - z7 */
1933 z[-5] = k11 - k22; /* z1 - z5 + z2 - z6 */
1934 z[-7] = k11 + k22; /* z1 - z5 - z2 + z6 */
1935 }
1936
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)1937 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
1938 {
1939 int a_off = base_n >> 3;
1940 float A2 = A[0+a_off];
1941 float *z = e + i_off;
1942 float *base = z - 16 * n;
1943
1944 while (z > base) {
1945 float k00,k11;
1946
1947 k00 = z[-0] - z[-8];
1948 k11 = z[-1] - z[-9];
1949 z[-0] = z[-0] + z[-8];
1950 z[-1] = z[-1] + z[-9];
1951 z[-8] = k00;
1952 z[-9] = k11 ;
1953
1954 k00 = z[ -2] - z[-10];
1955 k11 = z[ -3] - z[-11];
1956 z[ -2] = z[ -2] + z[-10];
1957 z[ -3] = z[ -3] + z[-11];
1958 z[-10] = (k00+k11) * A2;
1959 z[-11] = (k11-k00) * A2;
1960
1961 k00 = z[-12] - z[ -4]; /* reverse to avoid a unary negation */
1962 k11 = z[ -5] - z[-13];
1963 z[ -4] = z[ -4] + z[-12];
1964 z[ -5] = z[ -5] + z[-13];
1965 z[-12] = k11;
1966 z[-13] = k00;
1967
1968 k00 = z[-14] - z[ -6]; /* reverse to avoid a unary negation */
1969 k11 = z[ -7] - z[-15];
1970 z[ -6] = z[ -6] + z[-14];
1971 z[ -7] = z[ -7] + z[-15];
1972 z[-14] = (k00+k11) * A2;
1973 z[-15] = (k00-k11) * A2;
1974
1975 iter_54(z);
1976 iter_54(z-8);
1977 z -= 16;
1978 }
1979 }
1980
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)1981 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
1982 {
1983 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
1984 int ld;
1985 /* @OPTIMIZE: reduce register pressure by using fewer variables? */
1986 int save_point = temp_alloc_save(f);
1987 float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
1988 float *u=NULL,*v=NULL;
1989 /* twiddle factors */
1990 float *A = f->A[blocktype];
1991
1992 /* IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
1993 * See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
1994
1995 * kernel from paper
1996
1997
1998 * merged:
1999 * copy and reflect spectral data
2000 * step 0
2001
2002 * note that it turns out that the items added together during
2003 * this step are, in fact, being added to themselves (as reflected
2004 * by step 0). inexplicable inefficiency! this became obvious
2005 * once I combined the passes.
2006
2007 * so there's a missing 'times 2' here (for adding X to itself).
2008 * this propogates through linearly to the end, where the numbers
2009 * are 1/2 too small, and need to be compensated for.
2010 */
2011
2012 {
2013 float *d,*e, *AA, *e_stop;
2014 d = &buf2[n2-2];
2015 AA = A;
2016 e = &buffer[0];
2017 e_stop = &buffer[n2];
2018 while (e != e_stop) {
2019 d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2020 d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2021 d -= 2;
2022 AA += 2;
2023 e += 4;
2024 }
2025
2026 e = &buffer[n2-3];
2027 while (d >= buf2) {
2028 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2029 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2030 d -= 2;
2031 AA += 2;
2032 e -= 4;
2033 }
2034 }
2035
2036 /* now we use symbolic names for these, so that we can
2037 * possibly swap their meaning as we change which operations
2038 * are in place */
2039
2040 u = buffer;
2041 v = buf2;
2042
2043 /* step 2 (paper output is w, now u)
2044 * this could be in place, but the data ends up in the wrong
2045 * place... _somebody_'s got to swap it, so this is nominated */
2046 {
2047 float *AA = &A[n2-8];
2048 float *d0,*d1, *e0, *e1;
2049
2050 e0 = &v[n4];
2051 e1 = &v[0];
2052
2053 d0 = &u[n4];
2054 d1 = &u[0];
2055
2056 while (AA >= A) {
2057 float v40_20, v41_21;
2058
2059 v41_21 = e0[1] - e1[1];
2060 v40_20 = e0[0] - e1[0];
2061 d0[1] = e0[1] + e1[1];
2062 d0[0] = e0[0] + e1[0];
2063 d1[1] = v41_21*AA[4] - v40_20*AA[5];
2064 d1[0] = v40_20*AA[4] + v41_21*AA[5];
2065
2066 v41_21 = e0[3] - e1[3];
2067 v40_20 = e0[2] - e1[2];
2068 d0[3] = e0[3] + e1[3];
2069 d0[2] = e0[2] + e1[2];
2070 d1[3] = v41_21*AA[0] - v40_20*AA[1];
2071 d1[2] = v40_20*AA[0] + v41_21*AA[1];
2072
2073 AA -= 8;
2074
2075 d0 += 4;
2076 d1 += 4;
2077 e0 += 4;
2078 e1 += 4;
2079 }
2080 }
2081
2082 /* step 3 */
2083 ld = ilog(n) - 1; /* ilog is off-by-one from normal definitions */
2084
2085 /* optimized step 3:
2086
2087 * the original step3 loop can be nested r inside s or s inside r;
2088 * it's written originally as s inside r, but this is dumb when r
2089 * iterates many times, and s few. So I have two copies of it and
2090 * switch between them halfway.
2091
2092 * this is iteration 0 of step 3 */
2093 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2094 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2095
2096 /* this is iteration 1 of step 3 */
2097 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2098 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2099 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2100 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2101
2102 l=2;
2103 for (; l < (ld-3)>>1; ++l) {
2104 int k0 = n >> (l+2), k0_2 = k0>>1;
2105 int lim = 1 << (l+1);
2106 int i;
2107 for (i=0; i < lim; ++i)
2108 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2109 }
2110
2111 for (; l < ld-6; ++l) {
2112 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2113 int rlim = n >> (l+6), r;
2114 int lim = 1 << (l+1);
2115 int i_off;
2116 float *A0 = A;
2117 i_off = n2-1;
2118 for (r=rlim; r > 0; --r) {
2119 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2120 A0 += k1*4;
2121 i_off -= 8;
2122 }
2123 }
2124
2125 /* iterations with count:
2126 * ld-6,-5,-4 all interleaved together
2127 * the big win comes from getting rid of needless flops
2128 * due to the constants on pass 5 & 4 being all 1 and 0;
2129 * combining them to be simultaneous to improve cache made little difference
2130 */
2131 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2132
2133 /* output is u
2134
2135 * step 4, 5, and 6
2136 * cannot be in-place because of step 5 */
2137 {
2138 uint16_t *bitrev = f->bit_reverse[blocktype];
2139 /* weirdly, I'd have thought reading sequentially and writing
2140 * erratically would have been better than vice-versa, but in
2141 * fact that's not what my testing showed. (That is, with
2142 * j = bitreverse(i), do you read i and write j, or read j and write i.) */
2143
2144 float *d0 = &v[n4-4];
2145 float *d1 = &v[n2-4];
2146 while (d0 >= v) {
2147 int k4;
2148
2149 k4 = bitrev[0];
2150 d1[3] = u[k4+0];
2151 d1[2] = u[k4+1];
2152 d0[3] = u[k4+2];
2153 d0[2] = u[k4+3];
2154
2155 k4 = bitrev[1];
2156 d1[1] = u[k4+0];
2157 d1[0] = u[k4+1];
2158 d0[1] = u[k4+2];
2159 d0[0] = u[k4+3];
2160
2161 d0 -= 4;
2162 d1 -= 4;
2163 bitrev += 2;
2164 }
2165 }
2166 /* (paper output is u, now v) */
2167
2168
2169 /* data must be in buf2 */
2170 assert(v == buf2);
2171
2172 /* step 7 (paper output is v, now v)
2173 * this is now in place */
2174 {
2175 float *C = f->C[blocktype];
2176 float *d, *e;
2177
2178 d = v;
2179 e = v + n2 - 4;
2180
2181 while (d < e) {
2182 float a02,a11,b0,b1,b2,b3;
2183
2184 a02 = d[0] - e[2];
2185 a11 = d[1] + e[3];
2186
2187 b0 = C[1]*a02 + C[0]*a11;
2188 b1 = C[1]*a11 - C[0]*a02;
2189
2190 b2 = d[0] + e[ 2];
2191 b3 = d[1] - e[ 3];
2192
2193 d[0] = b2 + b0;
2194 d[1] = b3 + b1;
2195 e[2] = b2 - b0;
2196 e[3] = b1 - b3;
2197
2198 a02 = d[2] - e[0];
2199 a11 = d[3] + e[1];
2200
2201 b0 = C[3]*a02 + C[2]*a11;
2202 b1 = C[3]*a11 - C[2]*a02;
2203
2204 b2 = d[2] + e[ 0];
2205 b3 = d[3] - e[ 1];
2206
2207 d[2] = b2 + b0;
2208 d[3] = b3 + b1;
2209 e[0] = b2 - b0;
2210 e[1] = b1 - b3;
2211
2212 C += 4;
2213 d += 4;
2214 e -= 4;
2215 }
2216 }
2217
2218 /* data must be in buf2
2219
2220
2221 * step 8+decode (paper output is X, now buffer)
2222 * this generates pairs of data a la 8 and pushes them directly through
2223 * the decode kernel (pushing rather than pulling) to avoid having
2224 * to make another pass later
2225
2226 * this cannot POSSIBLY be in place, so we refer to the buffers directly
2227 */
2228
2229 {
2230 float *d0,*d1,*d2,*d3;
2231
2232 float *B = f->B[blocktype] + n2 - 8;
2233 float *e = buf2 + n2 - 8;
2234 d0 = &buffer[0];
2235 d1 = &buffer[n2-4];
2236 d2 = &buffer[n2];
2237 d3 = &buffer[n-4];
2238 while (e >= v) {
2239 float p0,p1,p2,p3;
2240
2241 p3 = e[6]*B[7] - e[7]*B[6];
2242 p2 = -e[6]*B[6] - e[7]*B[7];
2243
2244 d0[0] = p3;
2245 d1[3] = - p3;
2246 d2[0] = p2;
2247 d3[3] = p2;
2248
2249 p1 = e[4]*B[5] - e[5]*B[4];
2250 p0 = -e[4]*B[4] - e[5]*B[5];
2251
2252 d0[1] = p1;
2253 d1[2] = - p1;
2254 d2[1] = p0;
2255 d3[2] = p0;
2256
2257 p3 = e[2]*B[3] - e[3]*B[2];
2258 p2 = -e[2]*B[2] - e[3]*B[3];
2259
2260 d0[2] = p3;
2261 d1[1] = - p3;
2262 d2[2] = p2;
2263 d3[1] = p2;
2264
2265 p1 = e[0]*B[1] - e[1]*B[0];
2266 p0 = -e[0]*B[0] - e[1]*B[1];
2267
2268 d0[3] = p1;
2269 d1[0] = - p1;
2270 d2[3] = p0;
2271 d3[0] = p0;
2272
2273 B -= 8;
2274 e -= 8;
2275 d0 += 4;
2276 d2 += 4;
2277 d1 -= 4;
2278 d3 -= 4;
2279 }
2280 }
2281
2282 temp_alloc_restore(f,save_point);
2283 }
2284
get_window(vorb * f,int len)2285 static float *get_window(vorb *f, int len)
2286 {
2287 len <<= 1;
2288 if (len == f->blocksize_0) return f->window[0];
2289 if (len == f->blocksize_1) return f->window[1];
2290 assert(0);
2291 return NULL;
2292 }
2293
2294 typedef int16_t YTYPE;
2295
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8_t * step2_flag)2296 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8_t *step2_flag)
2297 {
2298 int n2 = n >> 1;
2299 int s = map->chan[i].mux, floor;
2300 floor = map->submap_floor[s];
2301 if (f->floor_types[floor] == 0) {
2302 return error(f, VORBIS_invalid_stream);
2303 } else {
2304 Floor1 *g = &f->floor_config[floor].floor1;
2305 int j,q;
2306 int lx = 0, ly = finalY[0] * g->floor1_multiplier;
2307 for (q=1; q < g->values; ++q) {
2308 j = g->sorted_order[q];
2309 if (finalY[j] >= 0)
2310 {
2311 int hy = finalY[j] * g->floor1_multiplier;
2312 int hx = g->Xlist[j];
2313 draw_line(target, lx,ly, hx,hy, n2);
2314 lx = hx;
2315 ly = hy;
2316 }
2317 }
2318 if (lx < n2)
2319 /* optimization of: draw_line(target, lx,ly, n,ly, n2); */
2320 for (j=lx; j < n2; ++j)
2321 LINE_OP(target[j], inverse_db_table[ly]);
2322 }
2323 return TRUE;
2324 }
2325
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)2326 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
2327 {
2328 Mode *m;
2329 int i, n, prev, next, window_center;
2330 f->channel_buffer_start = f->channel_buffer_end = 0;
2331
2332 retry:
2333 if (f->eof) return FALSE;
2334 if (!maybe_start_packet(f))
2335 return FALSE;
2336 /* check packet type */
2337 if (get_bits(f,1) != 0) {
2338 if (IS_PUSH_MODE(f))
2339 return error(f,VORBIS_bad_packet_type);
2340 while (EOP != get8_packet(f));
2341 goto retry;
2342 }
2343
2344 if (f->alloc.alloc_buffer)
2345 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
2346
2347 i = get_bits(f, ilog(f->mode_count-1));
2348 if (i == EOP) return FALSE;
2349 if (i >= f->mode_count) return FALSE;
2350 *mode = i;
2351 m = f->mode_config + i;
2352 if (m->blockflag) {
2353 n = f->blocksize_1;
2354 prev = get_bits(f,1);
2355 next = get_bits(f,1);
2356 } else {
2357 prev = next = 0;
2358 n = f->blocksize_0;
2359 }
2360
2361 /* WINDOWING */
2362
2363 window_center = n >> 1;
2364 if (m->blockflag && !prev) {
2365 *p_left_start = (n - f->blocksize_0) >> 2;
2366 *p_left_end = (n + f->blocksize_0) >> 2;
2367 } else {
2368 *p_left_start = 0;
2369 *p_left_end = window_center;
2370 }
2371 if (m->blockflag && !next) {
2372 *p_right_start = (n*3 - f->blocksize_0) >> 2;
2373 *p_right_end = (n*3 + f->blocksize_0) >> 2;
2374 } else {
2375 *p_right_start = window_center;
2376 *p_right_end = n;
2377 }
2378 return TRUE;
2379 }
2380
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)2381 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
2382 {
2383 Mapping *map;
2384 int i,j,k,n,n2;
2385 int zero_channel[256];
2386 int really_zero_channel[256];
2387
2388 /* WINDOWING */
2389
2390 n = f->blocksize[m->blockflag];
2391 map = &f->mapping[m->mapping];
2392
2393 /* FLOORS */
2394 n2 = n >> 1;
2395
2396 for (i=0; i < f->channels; ++i) {
2397 int s = map->chan[i].mux, floor;
2398 zero_channel[i] = FALSE;
2399 floor = map->submap_floor[s];
2400 if (f->floor_types[floor] == 0) {
2401 return error(f, VORBIS_invalid_stream);
2402 } else {
2403 Floor1 *g = &f->floor_config[floor].floor1;
2404 if (get_bits(f, 1)) {
2405 short *finalY;
2406 uint8_t step2_flag[256];
2407 static int range_list[4] = { 256, 128, 86, 64 };
2408 int range = range_list[g->floor1_multiplier-1];
2409 int offset = 2;
2410 finalY = f->finalY[i];
2411 finalY[0] = get_bits(f, ilog(range)-1);
2412 finalY[1] = get_bits(f, ilog(range)-1);
2413 for (j=0; j < g->partitions; ++j) {
2414 int pclass = g->partition_class_list[j];
2415 int cdim = g->class_dimensions[pclass];
2416 int cbits = g->class_subclasses[pclass];
2417 int csub = (1 << cbits)-1;
2418 int cval = 0;
2419 if (cbits) {
2420 Codebook *c = f->codebooks + g->class_masterbooks[pclass];
2421 DECODE(cval,f,c);
2422 }
2423 for (k=0; k < cdim; ++k) {
2424 int book = g->subclass_books[pclass][cval & csub];
2425 cval = cval >> cbits;
2426 if (book >= 0) {
2427 int temp;
2428 Codebook *c = f->codebooks + book;
2429 DECODE(temp,f,c);
2430 finalY[offset++] = temp;
2431 } else
2432 finalY[offset++] = 0;
2433 }
2434 }
2435 if (f->valid_bits == INVALID_BITS) goto error; /* behavior according to spec */
2436 step2_flag[0] = step2_flag[1] = 1;
2437 for (j=2; j < g->values; ++j) {
2438 int low, high, pred, highroom, lowroom, room, val;
2439 low = g->neighbors[j][0];
2440 high = g->neighbors[j][1];
2441 #if 0
2442 neighbors(g->Xlist, j, &low, &high);
2443 #endif
2444 pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
2445 val = finalY[j];
2446 highroom = range - pred;
2447 lowroom = pred;
2448 if (highroom < lowroom)
2449 room = highroom * 2;
2450 else
2451 room = lowroom * 2;
2452 if (val) {
2453 step2_flag[low] = step2_flag[high] = 1;
2454 step2_flag[j] = 1;
2455 if (val >= room)
2456 if (highroom > lowroom)
2457 finalY[j] = val - lowroom + pred;
2458 else
2459 finalY[j] = pred - val + highroom - 1;
2460 else
2461 if (val & 1)
2462 finalY[j] = pred - ((val+1)>>1);
2463 else
2464 finalY[j] = pred + (val>>1);
2465 } else {
2466 step2_flag[j] = 0;
2467 finalY[j] = pred;
2468 }
2469 }
2470
2471 /* defer final floor computation until _after_ residue */
2472 for (j=0; j < g->values; ++j) {
2473 if (!step2_flag[j])
2474 finalY[j] = -1;
2475 }
2476 } else {
2477 error:
2478 zero_channel[i] = TRUE;
2479 }
2480 /* So we just defer everything else to later */
2481
2482 /* at this point we've decoded the floor into buffer */
2483 }
2484 }
2485 /* at this point we've decoded all floors */
2486
2487 if (f->alloc.alloc_buffer)
2488 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
2489
2490 /* re-enable coupled channels if necessary */
2491 memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
2492 for (i=0; i < map->coupling_steps; ++i)
2493 if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
2494 zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
2495 }
2496
2497 /* RESIDUE DECODE */
2498 for (i=0; i < map->submaps; ++i) {
2499 float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
2500 int r;
2501 uint8_t do_not_decode[256] = {0};
2502 int ch = 0;
2503 for (j=0; j < f->channels; ++j) {
2504 if (map->chan[j].mux == i) {
2505 if (zero_channel[j]) {
2506 do_not_decode[ch] = TRUE;
2507 residue_buffers[ch] = NULL;
2508 } else {
2509 do_not_decode[ch] = FALSE;
2510 residue_buffers[ch] = f->channel_buffers[j];
2511 }
2512 ++ch;
2513 }
2514 }
2515 r = map->submap_residue[i];
2516 decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
2517 }
2518
2519 if (f->alloc.alloc_buffer)
2520 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
2521
2522 /* INVERSE COUPLING */
2523 for (i = map->coupling_steps-1; i >= 0; --i) {
2524 int n2 = n >> 1;
2525 float *m = f->channel_buffers[map->chan[i].magnitude];
2526 float *a = f->channel_buffers[map->chan[i].angle ];
2527 for (j=0; j < n2; ++j) {
2528 float a2,m2;
2529 if (m[j] > 0)
2530 if (a[j] > 0)
2531 {
2532 m2 = m[j];
2533 a2 = m[j] - a[j];
2534 }
2535 else
2536 {
2537 a2 = m[j];
2538 m2 = m[j] + a[j];
2539 }
2540 else
2541 if (a[j] > 0)
2542 {
2543 m2 = m[j];
2544 a2 = m[j] + a[j];
2545 }
2546 else
2547 {
2548 a2 = m[j];
2549 m2 = m[j] - a[j];
2550 }
2551 m[j] = m2;
2552 a[j] = a2;
2553 }
2554 }
2555
2556 /* finish decoding the floors */
2557 for (i=0; i < f->channels; ++i) {
2558 if (really_zero_channel[i]) {
2559 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
2560 } else {
2561 do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
2562 }
2563 }
2564
2565 /* INVERSE MDCT */
2566 for (i=0; i < f->channels; ++i)
2567 inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
2568
2569 /* this shouldn't be necessary, unless we exited on an error
2570 * and want to flush to get to the next packet */
2571 flush_packet(f);
2572
2573 if (f->first_decode) {
2574 /* assume we start so first non-discarded sample is sample 0
2575 * this isn't to spec, but spec would require us to read ahead
2576 * and decode the size of all current frames--could be done,
2577 * but presumably it's not a commonly used feature */
2578 f->current_loc = -n2; /* start of first frame is positioned for discard */
2579 /* we might have to discard samples "from" the next frame too,
2580 * if we're lapping a large block then a small at the start? */
2581 f->discard_samples_deferred = n - right_end;
2582 f->current_loc_valid = TRUE;
2583 f->first_decode = FALSE;
2584 } else if (f->discard_samples_deferred) {
2585 left_start += f->discard_samples_deferred;
2586 *p_left = left_start;
2587 f->discard_samples_deferred = 0;
2588 } else if (f->previous_length == 0 && f->current_loc_valid) {
2589 /* we're recovering from a seek... that means we're going to discard
2590 * the samples from this packet even though we know our position from
2591 * the last page header, so we need to update the position based on
2592 * the discarded samples here
2593 * but wait, the code below is going to add this in itself even
2594 * on a discard, so we don't need to do it here... */
2595 }
2596
2597 /* check if we have ogg information about the sample # for this packet */
2598 if (f->last_seg_which == f->end_seg_with_known_loc) {
2599 /* if we have a valid current loc, and this is final: */
2600 if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
2601 uint32_t current_end = f->known_loc_for_packet - (n-right_end);
2602 /* then let's infer the size of the (probably) short final frame */
2603 if (current_end < f->current_loc + right_end) {
2604 if (current_end < f->current_loc) {
2605 /* negative truncation, that's impossible! */
2606 *len = 0;
2607 } else {
2608 *len = current_end - f->current_loc;
2609 }
2610 *len += left_start;
2611 f->current_loc += *len;
2612 return TRUE;
2613 }
2614 }
2615 /* otherwise, just set our sample loc
2616 * guess that the ogg granule pos refers to the _middle_ of the
2617 * last frame?
2618 * set f->current_loc to the position of left_start */
2619 f->current_loc = f->known_loc_for_packet - (n2-left_start);
2620 f->current_loc_valid = TRUE;
2621 }
2622 if (f->current_loc_valid)
2623 f->current_loc += (right_start - left_start);
2624
2625 if (f->alloc.alloc_buffer)
2626 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
2627 *len = right_end; /* ignore samples after the window goes to 0 */
2628 return TRUE;
2629 }
2630
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)2631 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
2632 {
2633 int mode, left_end, right_end;
2634 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
2635 return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
2636 }
2637
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)2638 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
2639 {
2640 int prev,i,j;
2641 /* we use right&left (the start of the right- and left-window sin()-regions)
2642 * to determine how much to return, rather than inferring from the rules
2643 * (same result, clearer code); 'left' indicates where our sin() window
2644 * starts, therefore where the previous window's right edge starts, and
2645 * therefore where to start mixing from the previous buffer. 'right'
2646 * indicates where our sin() ending-window starts, therefore that's where
2647 * we start saving, and where our returned-data ends.
2648
2649 * mixin from previous window */
2650 if (f->previous_length) {
2651 int i,j, n = f->previous_length;
2652 float *w = get_window(f, n);
2653 for (i=0; i < f->channels; ++i) {
2654 for (j=0; j < n; ++j)
2655 f->channel_buffers[i][left+j] =
2656 f->channel_buffers[i][left+j]*w[ j] +
2657 f->previous_window[i][ j]*w[n-1-j];
2658 }
2659 }
2660
2661 prev = f->previous_length;
2662
2663 /* last half of this data becomes previous window */
2664 f->previous_length = len - right;
2665
2666 /* @OPTIMIZE: could avoid this copy by double-buffering the
2667 * output (flipping previous_window with channel_buffers), but
2668 * then previous_window would have to be 2x as large, and
2669 * channel_buffers couldn't be temp mem (although they're NOT
2670 * currently temp mem, they could be (unless we want to level
2671 * performance by spreading out the computation)) */
2672 for (i=0; i < f->channels; ++i)
2673 for (j=0; right+j < len; ++j)
2674 f->previous_window[i][j] = f->channel_buffers[i][right+j];
2675
2676 if (!prev)
2677 /* there was no previous packet, so this data isn't valid...
2678 * this isn't entirely true, only the would-have-overlapped data
2679 * isn't valid, but this seems to be what the spec requires */
2680 return 0;
2681
2682 /* truncate a short frame */
2683 if (len < right) right = len;
2684
2685 f->samples_output += right-left;
2686
2687 return right - left;
2688 }
2689
vorbis_pump_first_frame(stb_vorbis * f)2690 static void vorbis_pump_first_frame(stb_vorbis *f)
2691 {
2692 int len, right, left;
2693 if (vorbis_decode_packet(f, &len, &left, &right))
2694 vorbis_finish_frame(f, len, left, right);
2695 }
2696
start_decoder(vorb * f)2697 static int start_decoder(vorb *f)
2698 {
2699 uint8_t header[6], x,y;
2700 int len,i,j,k, max_submaps = 0;
2701 int longest_floorlist=0;
2702
2703 /* first page, first packet */
2704
2705 if (!start_page(f)) return FALSE;
2706 /* validate page flag */
2707 if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page);
2708 if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page);
2709 if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
2710 /* check for expected packet length */
2711 if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
2712 if (f->segments[0] != 30) return error(f, VORBIS_invalid_first_page);
2713 /* read packet
2714 * check packet header */
2715 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
2716 if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof);
2717 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page);
2718 /* vorbis_version */
2719 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page);
2720 f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page);
2721 if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels);
2722 f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page);
2723 get32(f); /* bitrate_maximum */
2724 get32(f); /* bitrate_nominal */
2725 get32(f); /* bitrate_minimum */
2726 x = get8(f);
2727 { int log0,log1;
2728 log0 = x & 15;
2729 log1 = x >> 4;
2730 f->blocksize_0 = 1 << log0;
2731 f->blocksize_1 = 1 << log1;
2732 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup);
2733 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup);
2734 if (log0 > log1) return error(f, VORBIS_invalid_setup);
2735 }
2736
2737 /* framing_flag */
2738 x = get8(f);
2739 if (!(x & 1)) return error(f, VORBIS_invalid_first_page);
2740
2741 /* second packet! */
2742 if (!start_page(f)) return FALSE;
2743
2744 if (!start_packet(f)) return FALSE;
2745 do {
2746 len = next_segment(f);
2747 skip(f, len);
2748 f->bytes_in_seg = 0;
2749 } while (len);
2750
2751 /* third packet! */
2752 if (!start_packet(f)) return FALSE;
2753
2754 crc32_init(); /* always init it, to avoid multithread race conditions */
2755
2756 if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup);
2757 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
2758 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
2759
2760 /* codebooks */
2761
2762 f->codebook_count = get_bits(f,8) + 1;
2763 f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
2764 if (f->codebooks == NULL) return error(f, VORBIS_outofmem);
2765 memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
2766 for (i=0; i < f->codebook_count; ++i) {
2767 uint32_t *values;
2768 int ordered, sorted_count;
2769 int total=0;
2770 uint8_t *lengths;
2771 Codebook *c = f->codebooks+i;
2772 x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup);
2773 x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup);
2774 x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup);
2775 x = get_bits(f, 8);
2776 c->dimensions = (get_bits(f, 8)<<8) + x;
2777 x = get_bits(f, 8);
2778 y = get_bits(f, 8);
2779 c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
2780 ordered = get_bits(f,1);
2781 c->sparse = ordered ? 0 : get_bits(f,1);
2782
2783 if (c->sparse)
2784 lengths = (uint8_t *) setup_temp_malloc(f, c->entries);
2785 else
2786 lengths = c->codeword_lengths = (uint8_t *) setup_malloc(f, c->entries);
2787
2788 if (!lengths) return error(f, VORBIS_outofmem);
2789
2790 if (ordered) {
2791 int current_entry = 0;
2792 int current_length = get_bits(f,5) + 1;
2793 while (current_entry < c->entries) {
2794 int limit = c->entries - current_entry;
2795 int n = get_bits(f, ilog(limit));
2796 if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
2797 memset(lengths + current_entry, current_length, n);
2798 current_entry += n;
2799 ++current_length;
2800 }
2801 } else {
2802 for (j=0; j < c->entries; ++j) {
2803 int present = c->sparse ? get_bits(f,1) : 1;
2804 if (present) {
2805 lengths[j] = get_bits(f, 5) + 1;
2806 ++total;
2807 } else {
2808 lengths[j] = NO_CODE;
2809 }
2810 }
2811 }
2812
2813 if (c->sparse && total >= c->entries >> 2) {
2814 /* convert sparse items to non-sparse! */
2815 if (c->entries > (int) f->setup_temp_memory_required)
2816 f->setup_temp_memory_required = c->entries;
2817
2818 c->codeword_lengths = (uint8_t *) setup_malloc(f, c->entries);
2819 memcpy(c->codeword_lengths, lengths, c->entries);
2820 setup_temp_free(f, lengths, c->entries); /* note this is only safe if there have been no intervening temp mallocs! */
2821 lengths = c->codeword_lengths;
2822 c->sparse = 0;
2823 }
2824
2825 /* compute the size of the sorted tables */
2826 if (c->sparse) {
2827 sorted_count = total;
2828 } else {
2829 sorted_count = 0;
2830 #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
2831 for (j=0; j < c->entries; ++j)
2832 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
2833 ++sorted_count;
2834 #endif
2835 }
2836
2837 c->sorted_entries = sorted_count;
2838 values = NULL;
2839
2840 if (!c->sparse) {
2841 c->codewords = (uint32_t *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
2842 if (!c->codewords) return error(f, VORBIS_outofmem);
2843 } else {
2844 unsigned int size;
2845 if (c->sorted_entries) {
2846 c->codeword_lengths = (uint8_t *) setup_malloc(f, c->sorted_entries);
2847 if (!c->codeword_lengths) return error(f, VORBIS_outofmem);
2848 c->codewords = (uint32_t *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
2849 if (!c->codewords) return error(f, VORBIS_outofmem);
2850 values = (uint32_t *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
2851 if (!values) return error(f, VORBIS_outofmem);
2852 }
2853 size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
2854 if (size > f->setup_temp_memory_required)
2855 f->setup_temp_memory_required = size;
2856 }
2857
2858 if (!compute_codewords(c, lengths, c->entries, values)) {
2859 if (c->sparse) setup_temp_free(f, values, 0);
2860 return error(f, VORBIS_invalid_setup);
2861 }
2862
2863 if (c->sorted_entries) {
2864 /* allocate an extra slot for sentinels */
2865 c->sorted_codewords = (uint32_t *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
2866 /* allocate an extra slot at the front so that c->sorted_values[-1] is defined
2867 * so that we can catch that case without an extra if */
2868 c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1));
2869 if (c->sorted_values) { ++c->sorted_values; c->sorted_values[-1] = -1; }
2870 compute_sorted_huffman(c, lengths, values);
2871 }
2872
2873 if (c->sparse) {
2874 setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
2875 setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
2876 setup_temp_free(f, lengths, c->entries);
2877 c->codewords = NULL;
2878 }
2879
2880 compute_accelerated_huffman(c);
2881
2882 c->lookup_type = get_bits(f, 4);
2883 if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
2884 if (c->lookup_type > 0) {
2885 uint16_t *mults;
2886 c->minimum_value = float32_unpack(get_bits(f, 32));
2887 c->delta_value = float32_unpack(get_bits(f, 32));
2888 c->value_bits = get_bits(f, 4)+1;
2889 c->sequence_p = get_bits(f,1);
2890 if (c->lookup_type == 1) {
2891 c->lookup_values = lookup1_values(c->entries, c->dimensions);
2892 } else {
2893 c->lookup_values = c->entries * c->dimensions;
2894 }
2895 mults = (uint16_t *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
2896 if (mults == NULL) return error(f, VORBIS_outofmem);
2897 for (j=0; j < (int) c->lookup_values; ++j) {
2898 int q = get_bits(f, c->value_bits);
2899 if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
2900 mults[j] = q;
2901 }
2902
2903 if (c->lookup_type == 1) {
2904 int len, sparse = c->sparse;
2905 /* pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop */
2906 if (sparse) {
2907 if (c->sorted_entries == 0) goto skip;
2908 c->multiplicands = (stb_vorbis_codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
2909 } else
2910 c->multiplicands = (stb_vorbis_codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions);
2911 if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
2912 len = sparse ? c->sorted_entries : c->entries;
2913 for (j=0; j < len; ++j) {
2914 int z = sparse ? c->sorted_values[j] : j, div=1;
2915 for (k=0; k < c->dimensions; ++k) {
2916 int off = (z / div) % c->lookup_values;
2917 c->multiplicands[j*c->dimensions + k] =
2918 #ifndef STB_VORBIS_CODEBOOK_FLOATS
2919 mults[off];
2920 #else
2921 mults[off]*c->delta_value + c->minimum_value;
2922 /* in this case (and this case only) we could pre-expand c->sequence_p,
2923 * and throw away the decode logic for it; have to ALSO do
2924 * it in the case below, but it can only be done if
2925 * STB_VORBIS_CODEBOOK_FLOATS */
2926 #endif
2927 div *= c->lookup_values;
2928 }
2929 }
2930 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
2931 c->lookup_type = 2;
2932 }
2933 else
2934 {
2935 c->multiplicands = (stb_vorbis_codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
2936 #ifndef STB_VORBIS_CODEBOOK_FLOATS
2937 memcpy(c->multiplicands, mults, sizeof(c->multiplicands[0]) * c->lookup_values);
2938 #else
2939 for (j=0; j < (int) c->lookup_values; ++j)
2940 c->multiplicands[j] = mults[j] * c->delta_value + c->minimum_value;
2941 #endif
2942 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
2943 }
2944 skip:;
2945
2946 #ifdef STB_VORBIS_CODEBOOK_FLOATS
2947 if (c->lookup_type == 2 && c->sequence_p) {
2948 for (j=1; j < (int) c->lookup_values; ++j)
2949 c->multiplicands[j] = c->multiplicands[j-1];
2950 c->sequence_p = 0;
2951 }
2952 #endif
2953 }
2954 }
2955
2956 /* time domain transfers (notused) */
2957
2958 x = get_bits(f, 6) + 1;
2959 for (i=0; i < x; ++i) {
2960 uint32_t z = get_bits(f, 16);
2961 if (z != 0) return error(f, VORBIS_invalid_setup);
2962 }
2963
2964 /* Floors */
2965 f->floor_count = get_bits(f, 6)+1;
2966 f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
2967 for (i=0; i < f->floor_count; ++i) {
2968 f->floor_types[i] = get_bits(f, 16);
2969 if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
2970 if (f->floor_types[i] == 0) {
2971 Floor0 *g = &f->floor_config[i].floor0;
2972 g->order = get_bits(f,8);
2973 g->rate = get_bits(f,16);
2974 g->bark_map_size = get_bits(f,16);
2975 g->amplitude_bits = get_bits(f,6);
2976 g->amplitude_offset = get_bits(f,8);
2977 g->number_of_books = get_bits(f,4) + 1;
2978 for (j=0; j < g->number_of_books; ++j)
2979 g->book_list[j] = get_bits(f,8);
2980 return error(f, VORBIS_feature_not_supported);
2981 } else {
2982 STBV_Point p[31*8+2];
2983 Floor1 *g = &f->floor_config[i].floor1;
2984 int max_class = -1;
2985 g->partitions = get_bits(f, 5);
2986 for (j=0; j < g->partitions; ++j) {
2987 g->partition_class_list[j] = get_bits(f, 4);
2988 if (g->partition_class_list[j] > max_class)
2989 max_class = g->partition_class_list[j];
2990 }
2991 for (j=0; j <= max_class; ++j) {
2992 g->class_dimensions[j] = get_bits(f, 3)+1;
2993 g->class_subclasses[j] = get_bits(f, 2);
2994 if (g->class_subclasses[j]) {
2995 g->class_masterbooks[j] = get_bits(f, 8);
2996 if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
2997 }
2998 for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
2999 g->subclass_books[j][k] = get_bits(f,8)-1;
3000 if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3001 }
3002 }
3003 g->floor1_multiplier = get_bits(f,2)+1;
3004 g->rangebits = get_bits(f,4);
3005 g->Xlist[0] = 0;
3006 g->Xlist[1] = 1 << g->rangebits;
3007 g->values = 2;
3008 for (j=0; j < g->partitions; ++j) {
3009 int c = g->partition_class_list[j];
3010 for (k=0; k < g->class_dimensions[c]; ++k) {
3011 g->Xlist[g->values] = get_bits(f, g->rangebits);
3012 ++g->values;
3013 }
3014 }
3015 /* precompute the sorting */
3016 for (j=0; j < g->values; ++j) {
3017 p[j].x = g->Xlist[j];
3018 p[j].y = j;
3019 }
3020 qsort(p, g->values, sizeof(p[0]), point_compare);
3021 for (j=0; j < g->values; ++j)
3022 g->sorted_order[j] = (uint8_t) p[j].y;
3023 /* precompute the neighbors */
3024 for (j=2; j < g->values; ++j)
3025 {
3026 int low = 0;
3027 int hi = 0;
3028 neighbors(g->Xlist, j, &low,&hi);
3029 g->neighbors[j][0] = low;
3030 g->neighbors[j][1] = hi;
3031 }
3032
3033 if (g->values > longest_floorlist)
3034 longest_floorlist = g->values;
3035 }
3036 }
3037
3038 /* Residue */
3039 f->residue_count = get_bits(f, 6)+1;
3040 f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(*f->residue_config));
3041 for (i=0; i < f->residue_count; ++i) {
3042 uint8_t residue_cascade[64];
3043 Residue *r = f->residue_config+i;
3044 f->residue_types[i] = get_bits(f, 16);
3045 if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3046 r->begin = get_bits(f, 24);
3047 r->end = get_bits(f, 24);
3048 r->part_size = get_bits(f,24)+1;
3049 r->classifications = get_bits(f,6)+1;
3050 r->classbook = get_bits(f,8);
3051 for (j=0; j < r->classifications; ++j) {
3052 uint8_t high_bits=0;
3053 uint8_t low_bits=get_bits(f,3);
3054 if (get_bits(f,1))
3055 high_bits = get_bits(f,5);
3056 residue_cascade[j] = high_bits*8 + low_bits;
3057 }
3058 r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
3059 for (j=0; j < r->classifications; ++j) {
3060 for (k=0; k < 8; ++k) {
3061 if (residue_cascade[j] & (1 << k)) {
3062 r->residue_books[j][k] = get_bits(f, 8);
3063 if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3064 } else {
3065 r->residue_books[j][k] = -1;
3066 }
3067 }
3068 }
3069 /* precompute the classifications[] array to avoid inner-loop mod/divide
3070 * call it 'classdata' since we already have r->classifications */
3071 r->classdata = (uint8_t **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3072 if (!r->classdata) return error(f, VORBIS_outofmem);
3073 memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3074 for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
3075 int classwords = f->codebooks[r->classbook].dimensions;
3076 int temp = j;
3077 r->classdata[j] = (uint8_t *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
3078 for (k=classwords-1; k >= 0; --k) {
3079 r->classdata[j][k] = temp % r->classifications;
3080 temp /= r->classifications;
3081 }
3082 }
3083 }
3084
3085 f->mapping_count = get_bits(f,6)+1;
3086 f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
3087 for (i=0; i < f->mapping_count; ++i) {
3088 Mapping *m = f->mapping + i;
3089 int mapping_type = get_bits(f,16);
3090 if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3091 m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
3092 if (get_bits(f,1))
3093 m->submaps = get_bits(f,4)+1;
3094 else
3095 m->submaps = 1;
3096 if (m->submaps > max_submaps)
3097 max_submaps = m->submaps;
3098 if (get_bits(f,1)) {
3099 m->coupling_steps = get_bits(f,8)+1;
3100 for (k=0; k < m->coupling_steps; ++k) {
3101 m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
3102 m->chan[k].angle = get_bits(f, ilog(f->channels-1));
3103 if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup);
3104 if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup);
3105 if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup);
3106 }
3107 } else
3108 m->coupling_steps = 0;
3109
3110 /* reserved field */
3111 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
3112 if (m->submaps > 1) {
3113 for (j=0; j < f->channels; ++j) {
3114 m->chan[j].mux = get_bits(f, 4);
3115 if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup);
3116 }
3117 } else
3118 /* @SPECIFICATION: this case is missing from the spec */
3119 for (j=0; j < f->channels; ++j)
3120 m->chan[j].mux = 0;
3121
3122 for (j=0; j < m->submaps; ++j) {
3123 get_bits(f,8); /* discard */
3124 m->submap_floor[j] = get_bits(f,8);
3125 m->submap_residue[j] = get_bits(f,8);
3126 if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup);
3127 if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup);
3128 }
3129 }
3130
3131 /* Modes */
3132 f->mode_count = get_bits(f, 6)+1;
3133 for (i=0; i < f->mode_count; ++i) {
3134 Mode *m = f->mode_config+i;
3135 m->blockflag = get_bits(f,1);
3136 m->windowtype = get_bits(f,16);
3137 m->transformtype = get_bits(f,16);
3138 m->mapping = get_bits(f,8);
3139 if (m->windowtype != 0) return error(f, VORBIS_invalid_setup);
3140 if (m->transformtype != 0) return error(f, VORBIS_invalid_setup);
3141 if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup);
3142 }
3143
3144 flush_packet(f);
3145
3146 f->previous_length = 0;
3147
3148 for (i=0; i < f->channels; ++i) {
3149 f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
3150 f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
3151 f->finalY[i] = (int16_t *) setup_malloc(f, sizeof(int16_t) * longest_floorlist);
3152 }
3153
3154 if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
3155 if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
3156 f->blocksize[0] = f->blocksize_0;
3157 f->blocksize[1] = f->blocksize_1;
3158
3159 /* compute how much temporary memory is needed */
3160
3161 /* 1. */
3162 {
3163 uint32_t imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
3164 uint32_t classify_mem;
3165 int i,max_part_read=0;
3166 for (i=0; i < f->residue_count; ++i) {
3167 Residue *r = f->residue_config + i;
3168 int n_read = r->end - r->begin;
3169 int part_read = n_read / r->part_size;
3170 if (part_read > max_part_read)
3171 max_part_read = part_read;
3172 }
3173 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8_t *));
3174
3175 f->temp_memory_required = classify_mem;
3176 if (imdct_mem > f->temp_memory_required)
3177 f->temp_memory_required = imdct_mem;
3178 }
3179
3180 f->first_decode = TRUE;
3181
3182 if (f->alloc.alloc_buffer) {
3183 assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
3184 /* check if there's enough temp memory so we don't error later */
3185 if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
3186 return error(f, VORBIS_outofmem);
3187 }
3188
3189 f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
3190
3191 return TRUE;
3192 }
3193
vorbis_deinit(stb_vorbis * p)3194 static void vorbis_deinit(stb_vorbis *p)
3195 {
3196 int i,j;
3197 for (i=0; i < p->residue_count; ++i) {
3198 Residue *r = p->residue_config+i;
3199 if (r->classdata) {
3200 for (j=0; j < p->codebooks[r->classbook].entries; ++j)
3201 setup_free(p, r->classdata[j]);
3202 setup_free(p, r->classdata);
3203 }
3204 setup_free(p, r->residue_books);
3205 }
3206
3207 if (p->codebooks) {
3208 for (i=0; i < p->codebook_count; ++i) {
3209 Codebook *c = p->codebooks + i;
3210 setup_free(p, c->codeword_lengths);
3211 setup_free(p, c->multiplicands);
3212 setup_free(p, c->codewords);
3213 setup_free(p, c->sorted_codewords);
3214 /* c->sorted_values[-1] is the first entry in the array */
3215 setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
3216 }
3217 setup_free(p, p->codebooks);
3218 }
3219 setup_free(p, p->floor_config);
3220 setup_free(p, p->residue_config);
3221 for (i=0; i < p->mapping_count; ++i)
3222 setup_free(p, p->mapping[i].chan);
3223 setup_free(p, p->mapping);
3224 for (i=0; i < p->channels; ++i) {
3225 setup_free(p, p->channel_buffers[i]);
3226 setup_free(p, p->previous_window[i]);
3227 setup_free(p, p->finalY[i]);
3228 }
3229 for (i=0; i < 2; ++i) {
3230 setup_free(p, p->A[i]);
3231 setup_free(p, p->B[i]);
3232 setup_free(p, p->C[i]);
3233 setup_free(p, p->window[i]);
3234 setup_free(p, p->bit_reverse[i]);
3235 }
3236 }
3237
stb_vorbis_close(stb_vorbis * p)3238 void stb_vorbis_close(stb_vorbis *p)
3239 {
3240 if (p == NULL) return;
3241 vorbis_deinit(p);
3242 setup_free(p,p);
3243 }
3244
vorbis_init(stb_vorbis * p,stb_vorbis_alloc * z)3245 static void vorbis_init(stb_vorbis *p, stb_vorbis_alloc *z)
3246 {
3247 memset(p, 0, sizeof(*p)); /* NULL out all malloc'd pointers to start */
3248 if (z) {
3249 p->alloc = *z;
3250 p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
3251 p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
3252 }
3253 p->eof = 0;
3254 p->error = VORBIS__no_error;
3255 p->stream = NULL;
3256 p->codebooks = NULL;
3257 p->page_crc_tests = -1;
3258 }
3259
stb_vorbis_get_sample_offset(stb_vorbis * f)3260 int stb_vorbis_get_sample_offset(stb_vorbis *f)
3261 {
3262 if (f->current_loc_valid)
3263 return f->current_loc;
3264 return -1;
3265 }
3266
stb_vorbis_get_info(stb_vorbis * f)3267 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
3268 {
3269 stb_vorbis_info d;
3270 d.channels = f->channels;
3271 d.sample_rate = f->sample_rate;
3272 d.setup_memory_required = f->setup_memory_required;
3273 d.setup_temp_memory_required = f->setup_temp_memory_required;
3274 d.temp_memory_required = f->temp_memory_required;
3275 d.max_frame_size = f->blocksize_1 >> 1;
3276 return d;
3277 }
3278
stb_vorbis_get_error(stb_vorbis * f)3279 int stb_vorbis_get_error(stb_vorbis *f)
3280 {
3281 int e = f->error;
3282 f->error = VORBIS__no_error;
3283 return e;
3284 }
3285
vorbis_alloc(stb_vorbis * f)3286 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
3287 {
3288 stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
3289 return p;
3290 }
3291
stb_vorbis_get_file_offset(stb_vorbis * f)3292 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
3293 {
3294 return (unsigned int)(f->stream - f->stream_start);
3295 }
3296
3297 #ifndef STB_VORBIS_NO_PULLDATA_API
3298 /* DATA-PULLING API */
3299
vorbis_find_page(stb_vorbis * f,uint32_t * end,uint32_t * last)3300 static uint32_t vorbis_find_page(stb_vorbis *f, uint32_t *end, uint32_t *last)
3301 {
3302 for(;;) {
3303 int n;
3304 if (f->eof) return 0;
3305 n = get8(f);
3306 if (n == 0x4f) { /* page header */
3307 unsigned int retry_loc = stb_vorbis_get_file_offset(f);
3308 int i;
3309 /* check if we're off the end of a file_section stream */
3310 if (retry_loc - 25 > f->stream_len)
3311 return 0;
3312 /* check the rest of the header */
3313 for (i=1; i < 4; ++i)
3314 if (get8(f) != ogg_page_header[i])
3315 break;
3316 if (f->eof) return 0;
3317 if (i == 4) {
3318 uint8_t header[27];
3319 uint32_t i, crc, goal, len;
3320 for (i=0; i < 4; ++i)
3321 header[i] = ogg_page_header[i];
3322 for (; i < 27; ++i)
3323 header[i] = get8(f);
3324 if (f->eof) return 0;
3325 if (header[4] != 0) goto invalid;
3326 goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
3327 for (i=22; i < 26; ++i)
3328 header[i] = 0;
3329 crc = 0;
3330 for (i=0; i < 27; ++i)
3331 crc = crc32_update(crc, header[i]);
3332 len = 0;
3333 for (i=0; i < header[26]; ++i) {
3334 int s = get8(f);
3335 crc = crc32_update(crc, s);
3336 len += s;
3337 }
3338 if (len && f->eof) return 0;
3339 for (i=0; i < len; ++i)
3340 crc = crc32_update(crc, get8(f));
3341 /* finished parsing probable page */
3342 if (crc == goal) {
3343 /* we could now check that it's either got the last
3344 * page flag set, OR it's followed by the capture
3345 * pattern, but I guess TECHNICALLY you could have
3346 * a file with garbage between each ogg page and recover
3347 * from it automatically? So even though that paranoia
3348 * might decrease the chance of an invalid decode by
3349 * another 2^32, not worth it since it would hose those
3350 * invalid-but-useful files? */
3351 if (end)
3352 *end = stb_vorbis_get_file_offset(f);
3353 if (last) {
3354 if (header[5] & 0x04)
3355 *last = 1;
3356 else
3357 *last = 0;
3358 }
3359 set_file_offset(f, retry_loc-1);
3360 return 1;
3361 }
3362 }
3363 invalid:
3364 /* not a valid page, so rewind and look for next one */
3365 set_file_offset(f, retry_loc);
3366 }
3367 }
3368 }
3369
3370 /* seek is implemented with 'interpolation search'--this is like
3371 * binary search, but we use the data values to estimate the likely
3372 * location of the data item (plus a bit of a bias so when the
3373 * estimation is wrong we don't waste overly much time)
3374 */
3375
3376 #define SAMPLE_unknown 0xffffffff
3377
3378
3379 /* ogg vorbis, in its insane infinite wisdom, only provides
3380 * information about the sample at the END of the page.
3381 * therefore we COULD have the data we need in the current
3382 * page, and not know it. we could just use the end location
3383 * as our only knowledge for bounds, seek back, and eventually
3384 * the binary search finds it. or we can try to be smart and
3385 * not waste time trying to locate more pages. we try to be
3386 * smart, since this data is already in memory anyway, so
3387 * doing needless I/O would be crazy!
3388 */
vorbis_analyze_page(stb_vorbis * f,ProbedPage * z)3389 static int vorbis_analyze_page(stb_vorbis *f, ProbedPage *z)
3390 {
3391 uint8_t lacing[255];
3392 uint8_t packet_type[255];
3393 int num_packet, packet_start;
3394 int i,len;
3395 uint32_t samples;
3396 uint8_t header[27] = {0};
3397
3398 /* record where the page starts */
3399 z->page_start = stb_vorbis_get_file_offset(f);
3400
3401 /* parse the header */
3402 getn(f, header, 27);
3403 assert(header[0] == 'O' && header[1] == 'g' && header[2] == 'g' && header[3] == 'S');
3404 getn(f, lacing, header[26]);
3405
3406 /* determine the length of the payload */
3407 len = 0;
3408 for (i=0; i < header[26]; ++i)
3409 len += lacing[i];
3410
3411 /* this implies where the page ends */
3412 z->page_end = z->page_start + 27 + header[26] + len;
3413
3414 /* read the last-decoded sample out of the data */
3415 z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 16);
3416
3417 if (header[5] & 4) {
3418 /* if this is the last page, it's not possible to work
3419 * backwards to figure out the first sample! whoops! fuck. */
3420 z->first_decoded_sample = SAMPLE_unknown;
3421 set_file_offset(f, z->page_start);
3422 return 1;
3423 }
3424
3425 /* scan through the frames to determine the sample-count of each one...
3426 * our goal is the sample # of the first fully-decoded sample on the
3427 * page, which is the first decoded sample of the 2nd packet */
3428
3429 num_packet=0;
3430
3431 packet_start = ((header[5] & 1) == 0);
3432
3433 for (i=0; i < header[26]; ++i) {
3434 if (packet_start) {
3435 uint8_t n,b;
3436 if (lacing[i] == 0) goto bail; /* trying to read from zero-length packet */
3437 n = get8(f);
3438 /* if bottom bit is non-zero, we've got corruption */
3439 if (n & 1) goto bail;
3440 n >>= 1;
3441 b = ilog(f->mode_count-1);
3442 n &= (1 << b)-1;
3443 if (n >= f->mode_count) goto bail;
3444 packet_type[num_packet++] = f->mode_config[n].blockflag;
3445 skip(f, lacing[i]-1);
3446 } else
3447 skip(f, lacing[i]);
3448 packet_start = (lacing[i] < 255);
3449 }
3450
3451 /* now that we know the sizes of all the pages, we can start determining
3452 * how much sample data there is. */
3453
3454 samples = 0;
3455
3456 /* for the last packet, we step by its whole length, because the definition
3457 * is that we encoded the end sample loc of the 'last packet completed',
3458 * where 'completed' refers to packets being split, and we are left to guess
3459 * what 'end sample loc' means. we assume it means ignoring the fact that
3460 * the last half of the data is useless without windowing against the next
3461 * packet... (so it's not REALLY complete in that sense)
3462 */
3463 if (num_packet > 1)
3464 samples += f->blocksize[packet_type[num_packet-1]];
3465
3466 for (i=num_packet-2; i >= 1; --i) {
3467 /* now, for this packet, how many samples do we have that
3468 * do not overlap the following packet? */
3469 if (packet_type[i] == 1)
3470 if (packet_type[i+1] == 1)
3471 samples += f->blocksize_1 >> 1;
3472 else
3473 samples += ((f->blocksize_1 - f->blocksize_0) >> 2) + (f->blocksize_0 >> 1);
3474 else
3475 samples += f->blocksize_0 >> 1;
3476 }
3477 /* now, at this point, we've rewound to the very beginning of the
3478 * _second_ packet. if we entirely discard the first packet after
3479 * a seek, this will be exactly the right sample number. HOWEVER!
3480 * we can't as easily compute this number for the LAST page. The
3481 * only way to get the sample offset of the LAST page is to use
3482 * the end loc from the previous page. But what that returns us
3483 * is _exactly_ the place where we get our first non-overlapped
3484 * sample. (I think. Stupid spec for being ambiguous.) So for
3485 * consistency it's better to do that here, too. However, that
3486 * will then require us to NOT discard all of the first frame we
3487 * decode, in some cases, which means an even weirder frame size
3488 * and extra code. what a fucking pain.
3489
3490 * we're going to discard the first packet if we
3491 * start the seek here, so we don't care about it. (we could actually
3492 * do better; if the first packet is long, and the previous packet
3493 * is short, there's actually data in the first half of the first
3494 * packet that doesn't need discarding... but not worth paying the
3495 * effort of tracking that of that here and in the seeking logic)
3496 * except crap, if we infer it from the _previous_ packet's end
3497 * location, we DO need to use that definition... and we HAVE to
3498 * infer the start loc of the LAST packet from the previous packet's
3499 * end location. fuck you, ogg vorbis. */
3500
3501 z->first_decoded_sample = z->last_decoded_sample - samples;
3502
3503 /* restore file state to where we were */
3504 set_file_offset(f, z->page_start);
3505 return 1;
3506
3507 /* restore file state to where we were */
3508 bail:
3509 set_file_offset(f, z->page_start);
3510 return 0;
3511 }
3512
vorbis_seek_frame_from_page(stb_vorbis * f,uint32_t page_start,uint32_t first_sample,uint32_t target_sample,int fine)3513 static int vorbis_seek_frame_from_page(stb_vorbis *f, uint32_t page_start, uint32_t first_sample, uint32_t target_sample, int fine)
3514 {
3515 int left_start, left_end, right_start, right_end, mode,i;
3516 int frame=0;
3517 uint32_t frame_start;
3518 int frames_to_skip, data_to_skip;
3519
3520 /* first_sample is the sample # of the first sample that doesn't
3521 * overlap the previous page... note that this requires us to
3522 * _partially_ discard the first packet! bleh. */
3523 set_file_offset(f, page_start);
3524
3525 f->next_seg = -1; /* force page resync */
3526
3527 frame_start = first_sample;
3528 /* frame start is where the previous packet's last decoded sample
3529 * was, which corresponds to left_end... EXCEPT if the previous
3530 * packet was long and this packet is short? Probably a bug here.
3531
3532
3533 * now, we can start decoding frames... we'll only FAKE decode them,
3534 * until we find the frame that contains our sample; then we'll rewind,
3535 * and try again */
3536 for (;;) {
3537 int start;
3538
3539 if (!vorbis_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
3540 return error(f, VORBIS_seek_failed);
3541
3542 if (frame == 0)
3543 start = left_end;
3544 else
3545 start = left_start;
3546
3547 /* the window starts at left_start; the last valid sample we generate
3548 * before the next frame's window start is right_start-1 */
3549 if (target_sample < frame_start + right_start-start)
3550 break;
3551
3552 flush_packet(f);
3553 if (f->eof)
3554 return error(f, VORBIS_seek_failed);
3555
3556 frame_start += right_start - start;
3557
3558 ++frame;
3559 }
3560
3561 /* ok, at this point, the sample we want is contained in frame #'frame'
3562
3563 * to decode frame #'frame' normally, we have to decode the
3564 * previous frame first... but if it's the FIRST frame of the page
3565 * we can't. if it's the first frame, it means it falls in the part
3566 * of the first frame that doesn't overlap either of the other frames.
3567 * so, if we have to handle that case for the first frame, we might
3568 * as well handle it for all of them, so: */
3569 if (target_sample > frame_start + (left_end - left_start)) {
3570 /* so what we want to do is go ahead and just immediately decode
3571 * this frame, but then make it so the next get_frame_float() uses
3572 * this already-decoded data? or do we want to go ahead and rewind,
3573 * and leave a flag saying to skip the first N data? let's do that
3574 */
3575 frames_to_skip = frame; /* if this is frame #1, skip 1 frame (#0) */
3576 data_to_skip = left_end - left_start;
3577 } else {
3578 /* otherwise, we want to skip frames 0, 1, 2, ... frame-2
3579 * (which means frame-2+1 total frames) then decode frame-1,
3580 * then leave frame pending */
3581 frames_to_skip = frame - 1;
3582 assert(frames_to_skip >= 0);
3583 data_to_skip = -1;
3584 }
3585
3586 set_file_offset(f, page_start);
3587 f->next_seg = - 1; /* force page resync */
3588
3589 for (i=0; i < frames_to_skip; ++i) {
3590 maybe_start_packet(f);
3591 flush_packet(f);
3592 }
3593
3594 if (data_to_skip >= 0) {
3595 int i,j,n = f->blocksize_0 >> 1;
3596 f->discard_samples_deferred = data_to_skip;
3597 for (i=0; i < f->channels; ++i)
3598 for (j=0; j < n; ++j)
3599 f->previous_window[i][j] = 0;
3600 f->previous_length = n;
3601 frame_start += data_to_skip;
3602 } else {
3603 f->previous_length = 0;
3604 vorbis_pump_first_frame(f);
3605 }
3606
3607 /* at this point, the NEXT decoded frame will generate the desired sample */
3608 if (fine) {
3609 /* so if we're doing sample accurate streaming, we want to go ahead and decode it! */
3610 if (target_sample != frame_start) {
3611 int n;
3612 stb_vorbis_get_frame_float(f, &n, NULL);
3613 assert(target_sample > frame_start);
3614 assert(f->channel_buffer_start + (int) (target_sample-frame_start) < f->channel_buffer_end);
3615 f->channel_buffer_start += (target_sample - frame_start);
3616 }
3617 }
3618
3619 return 0;
3620 }
3621
vorbis_seek_base(stb_vorbis * f,unsigned int sample_number,int fine)3622 static int vorbis_seek_base(stb_vorbis *f, unsigned int sample_number, int fine)
3623 {
3624 ProbedPage p[2],q;
3625 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
3626
3627 /* do we know the location of the last page? */
3628 if (f->p_last.page_start == 0) {
3629 uint32_t z = stb_vorbis_stream_length_in_samples(f);
3630 if (z == 0) return error(f, VORBIS_cant_find_last_page);
3631 }
3632
3633 p[0] = f->p_first;
3634 p[1] = f->p_last;
3635
3636 if (sample_number >= f->p_last.last_decoded_sample)
3637 sample_number = f->p_last.last_decoded_sample-1;
3638
3639 if (sample_number < f->p_first.last_decoded_sample) {
3640 vorbis_seek_frame_from_page(f, p[0].page_start, 0, sample_number, fine);
3641 return 0;
3642 } else {
3643 int attempts=0;
3644 while (p[0].page_end < p[1].page_start) {
3645 uint32_t probe;
3646 uint32_t start_offset, end_offset;
3647 uint32_t start_sample, end_sample;
3648
3649 /* copy these into local variables so we can tweak them
3650 * if any are unknown */
3651 start_offset = p[0].page_end;
3652 end_offset = p[1].after_previous_page_start; /* an address known to seek to page p[1] */
3653 start_sample = p[0].last_decoded_sample;
3654 end_sample = p[1].last_decoded_sample;
3655
3656 /* currently there is no such tweaking logic needed/possible? */
3657 if (start_sample == SAMPLE_unknown || end_sample == SAMPLE_unknown)
3658 return error(f, VORBIS_seek_failed);
3659
3660 /* now we want to lerp between these for the target samples... */
3661
3662 /* step 1: we need to bias towards the page start... */
3663 if (start_offset + 4000 < end_offset)
3664 end_offset -= 4000;
3665
3666 /* now compute an interpolated search loc */
3667 probe = start_offset + (int) floor((float) (end_offset - start_offset) / (end_sample - start_sample) * (sample_number - start_sample));
3668
3669 /* next we need to bias towards binary search...
3670 * code is a little wonky to allow for full 32-bit unsigned values */
3671 if (attempts >= 4) {
3672 uint32_t probe2 = start_offset + ((end_offset - start_offset) >> 1);
3673 if (attempts >= 8)
3674 probe = probe2;
3675 else if (probe < probe2)
3676 probe = probe + ((probe2 - probe) >> 1);
3677 else
3678 probe = probe2 + ((probe - probe2) >> 1);
3679 }
3680 ++attempts;
3681
3682 set_file_offset(f, probe);
3683 if (!vorbis_find_page(f, NULL, NULL)) return error(f, VORBIS_seek_failed);
3684 if (!vorbis_analyze_page(f, &q)) return error(f, VORBIS_seek_failed);
3685 q.after_previous_page_start = probe;
3686
3687 /* it's possible we've just found the last page again */
3688 if (q.page_start == p[1].page_start) {
3689 p[1] = q;
3690 continue;
3691 }
3692
3693 if (sample_number < q.last_decoded_sample)
3694 p[1] = q;
3695 else
3696 p[0] = q;
3697 }
3698
3699 if (p[0].last_decoded_sample <= sample_number && sample_number < p[1].last_decoded_sample) {
3700 vorbis_seek_frame_from_page(f, p[1].page_start, p[0].last_decoded_sample, sample_number, fine);
3701 return 0;
3702 }
3703 return error(f, VORBIS_seek_failed);
3704 }
3705 }
3706
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)3707 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
3708 {
3709 return vorbis_seek_base(f, sample_number, FALSE);
3710 }
3711
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)3712 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
3713 {
3714 return vorbis_seek_base(f, sample_number, TRUE);
3715 }
3716
stb_vorbis_seek_start(stb_vorbis * f)3717 void stb_vorbis_seek_start(stb_vorbis *f)
3718 {
3719 if (IS_PUSH_MODE(f)) { error(f, VORBIS_invalid_api_mixing); return; }
3720 set_file_offset(f, f->first_audio_page_offset);
3721 f->previous_length = 0;
3722 f->first_decode = TRUE;
3723 f->next_seg = -1;
3724 vorbis_pump_first_frame(f);
3725 }
3726
stb_vorbis_stream_length_in_samples(stb_vorbis * f)3727 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
3728 {
3729 unsigned int restore_offset, previous_safe;
3730 unsigned int end, last_page_loc;
3731
3732 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
3733 if (!f->total_samples) {
3734 unsigned int last;
3735 uint32_t lo,hi;
3736 char header[6];
3737
3738 /* first, store the current decode position so we can restore it */
3739 restore_offset = stb_vorbis_get_file_offset(f);
3740
3741 /* now we want to seek back 64K from the end (the last page must
3742 * be at most a little less than 64K, but let's allow a little slop) */
3743 if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
3744 previous_safe = f->stream_len - 65536;
3745 else
3746 previous_safe = f->first_audio_page_offset;
3747
3748 set_file_offset(f, previous_safe);
3749 /* previous_safe is now our candidate 'earliest known place that seeking
3750 * to will lead to the final page' */
3751
3752 if (!vorbis_find_page(f, &end, &last)) {
3753 /* if we can't find a page, we're hosed! */
3754 f->error = VORBIS_cant_find_last_page;
3755 f->total_samples = 0xffffffff;
3756 goto done;
3757 }
3758
3759 /* check if there are more pages */
3760 last_page_loc = stb_vorbis_get_file_offset(f);
3761
3762 /* stop when the last_page flag is set, not when we reach eof;
3763 * this allows us to stop short of a 'file_section' end without
3764 * explicitly checking the length of the section */
3765 while (!last) {
3766 set_file_offset(f, end);
3767 if (!vorbis_find_page(f, &end, &last)) {
3768 /* the last page we found didn't have the 'last page' flag
3769 * set. whoops! */
3770 break;
3771 }
3772 previous_safe = last_page_loc+1;
3773 last_page_loc = stb_vorbis_get_file_offset(f);
3774 }
3775
3776 set_file_offset(f, last_page_loc);
3777
3778 /* parse the header */
3779 getn(f, (unsigned char *)header, 6);
3780 /* extract the absolute granule position */
3781 lo = get32(f);
3782 hi = get32(f);
3783 if (lo == 0xffffffff && hi == 0xffffffff) {
3784 f->error = VORBIS_cant_find_last_page;
3785 f->total_samples = SAMPLE_unknown;
3786 goto done;
3787 }
3788 if (hi)
3789 lo = 0xfffffffe; /* saturate */
3790 f->total_samples = lo;
3791
3792 f->p_last.page_start = last_page_loc;
3793 f->p_last.page_end = end;
3794 f->p_last.last_decoded_sample = lo;
3795 f->p_last.first_decoded_sample = SAMPLE_unknown;
3796 f->p_last.after_previous_page_start = previous_safe;
3797
3798 done:
3799 set_file_offset(f, restore_offset);
3800 }
3801 return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
3802 }
3803
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)3804 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
3805 {
3806 return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
3807 }
3808
3809
3810
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)3811 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
3812 {
3813 int len, right,left,i;
3814 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
3815
3816 if (!vorbis_decode_packet(f, &len, &left, &right)) {
3817 f->channel_buffer_start = f->channel_buffer_end = 0;
3818 return 0;
3819 }
3820
3821 len = vorbis_finish_frame(f, len, left, right);
3822 for (i=0; i < f->channels; ++i)
3823 f->outputs[i] = f->channel_buffers[i] + left;
3824
3825 f->channel_buffer_start = left;
3826 f->channel_buffer_end = left+len;
3827
3828 if (channels) *channels = f->channels;
3829 if (output) *output = f->outputs;
3830 return len;
3831 }
3832
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,stb_vorbis_alloc * alloc)3833 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, stb_vorbis_alloc *alloc)
3834 {
3835 stb_vorbis *f, p;
3836 if (data == NULL) return NULL;
3837 vorbis_init(&p, alloc);
3838 p.stream = (uint8_t *) data;
3839 p.stream_end = (uint8_t *) data + len;
3840 p.stream_start = (uint8_t *) p.stream;
3841 p.stream_len = len;
3842 p.push_mode = FALSE;
3843 if (start_decoder(&p)) {
3844 f = vorbis_alloc(&p);
3845 if (f) {
3846 *f = p;
3847 vorbis_pump_first_frame(f);
3848 return f;
3849 }
3850 }
3851 if (error) *error = p.error;
3852 vorbis_deinit(&p);
3853 return NULL;
3854 }
3855
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)3856 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
3857 {
3858 float **outputs;
3859 int len = num_floats / channels;
3860 int n=0;
3861 int z = f->channels;
3862 if (z > channels) z = channels;
3863 while (n < len) {
3864 int i,j;
3865 int k = f->channel_buffer_end - f->channel_buffer_start;
3866 if (n+k >= len) k = len - n;
3867 for (j=0; j < k; ++j) {
3868 for (i=0; i < z; ++i)
3869 *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
3870 for ( ; i < channels; ++i)
3871 *buffer++ = 0;
3872 }
3873 n += k;
3874 f->channel_buffer_start += k;
3875 if (n == len)
3876 break;
3877 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
3878 break;
3879 }
3880 return n;
3881 }
3882
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)3883 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
3884 {
3885 float **outputs;
3886 int n=0;
3887 int z = f->channels;
3888 if (z > channels) z = channels;
3889 while (n < num_samples) {
3890 int i;
3891 int k = f->channel_buffer_end - f->channel_buffer_start;
3892 if (n+k >= num_samples) k = num_samples - n;
3893 if (k) {
3894 for (i=0; i < z; ++i)
3895 memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
3896 for ( ; i < channels; ++i)
3897 memset(buffer[i]+n, 0, sizeof(float) * k);
3898 }
3899 n += k;
3900 f->channel_buffer_start += k;
3901 if (n == num_samples)
3902 break;
3903 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
3904 break;
3905 }
3906 return n;
3907 }
3908 #endif /* STB_VORBIS_NO_PULLDATA_API */
3909
3910 #endif /* STB_VORBIS_HEADER_ONLY */
3911