1 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
2 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
3 
4 #include <assert.h>
5 
6 #ifdef __cplusplus
7 extern "C" {
8 #endif
9 
10 typedef struct
11 {
12    char *alloc_buffer;
13    int   alloc_buffer_length_in_bytes;
14 } stb_vorbis_alloc;
15 
16 
17 /*   FUNCTIONS USEABLE WITH ALL INPUT MODES */
18 
19 typedef struct stb_vorbis stb_vorbis;
20 
21 typedef struct
22 {
23    unsigned int sample_rate;
24    int channels;
25 
26    unsigned int setup_memory_required;
27    unsigned int setup_temp_memory_required;
28    unsigned int temp_memory_required;
29 
30    int max_frame_size;
31 } stb_vorbis_info;
32 
33 /* get general information about the file */
34 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
35 
36 /* get the last error detected (clears it, too) */
37 extern int stb_vorbis_get_error(stb_vorbis *f);
38 
39 /* close an ogg vorbis file and free all memory in use */
40 extern void stb_vorbis_close(stb_vorbis *f);
41 
42 /* this function returns the offset (in samples) from the beginning of the
43  * file that will be returned by the next decode, if it is known, or -1
44  * otherwise. after a flush_pushdata() call, this may take a while before
45  * it becomes valid again.
46  * NOT WORKING YET after a seek with PULLDATA API */
47 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
48 
49 /* returns the current seek point within the file, or offset from the beginning
50  * of the memory buffer. In pushdata mode it returns 0. */
51 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
52 
53 /*   PULLING INPUT API */
54 
55 #ifndef STB_VORBIS_NO_PULLDATA_API
56 /* This API assumes stb_vorbis is allowed to pull data from a source--
57  * either a block of memory containing the _entire_ vorbis stream, or a
58  * FILE * that you or it create, or possibly some other reading mechanism
59  * if you go modify the source to replace the FILE * case with some kind
60  * of callback to your code. (But if you don't support seeking, you may
61  * just want to go ahead and use pushdata.) */
62 
63 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
64                                   int *error, stb_vorbis_alloc *alloc_buffer);
65 /* create an ogg vorbis decoder from an ogg vorbis stream in memory (note
66  * this must be the entire stream!). on failure, returns NULL and sets *error */
67 
68 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
69 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
70 /* NOT WORKING YET
71  * these functions seek in the Vorbis file to (approximately) 'sample_number'.
72  * after calling seek_frame(), the next call to get_frame_*() will include
73  * the specified sample. after calling stb_vorbis_seek(), the next call to
74  * stb_vorbis_get_samples_* will start with the specified sample. If you
75  * do not need to seek to EXACTLY the target sample when using get_samples_*,
76  * you can also use seek_frame(). */
77 
78 extern void stb_vorbis_seek_start(stb_vorbis *f);
79 /* this function is equivalent to stb_vorbis_seek(f,0), but it
80  * actually works */
81 
82 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
83 extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
84 /* these functions return the total length of the vorbis stream */
85 
86 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
87 /* decode the next frame and return the number of samples. the number of
88  * channels returned are stored in *channels (which can be NULL--it is always
89  * the same as the number of channels reported by get_info). *output will
90  * contain an array of float* buffers, one per channel. These outputs will
91  * be overwritten on the next call to stb_vorbis_get_frame_*.
92  *
93  * You generally should not intermix calls to stb_vorbis_get_frame_*()
94  * and stb_vorbis_get_samples_*(), since the latter calls the former.
95  */
96 
97 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
98 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
99 /* gets num_samples samples, not necessarily on a frame boundary--this requires
100  * buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
101  * Returns the number of samples stored per channel; it may be less than requested
102  * at the end of the file. If there are no more samples in the file, returns 0.
103  */
104 
105 #endif
106 
107 /*   ERROR CODES */
108 
109 enum STBVorbisError
110 {
111    VORBIS__no_error,
112 
113    VORBIS_need_more_data=1,             /* not a real error */
114 
115    VORBIS_invalid_api_mixing,           /* can't mix API modes */
116    VORBIS_outofmem,                     /* not enough memory */
117    VORBIS_feature_not_supported,        /* uses floor 0 */
118    VORBIS_too_many_channels,            /* STB_VORBIS_MAX_CHANNELS is too small */
119    VORBIS_file_open_failure,            /* fopen() failed */
120    VORBIS_seek_without_length,          /* can't seek in unknown-length file */
121 
122    VORBIS_unexpected_eof=10,            /* file is truncated? */
123    VORBIS_seek_invalid,                 /* seek past EOF */
124 
125    /* decoding errors (corrupt/invalid stream) -- you probably
126     * don't care about the exact details of these */
127 
128    /* vorbis errors: */
129    VORBIS_invalid_setup=20,
130    VORBIS_invalid_stream,
131 
132    /* ogg errors: */
133    VORBIS_missing_capture_pattern=30,
134    VORBIS_invalid_stream_structure_version,
135    VORBIS_continued_packet_flag_invalid,
136    VORBIS_incorrect_stream_serial_number,
137    VORBIS_invalid_first_page,
138    VORBIS_bad_packet_type,
139    VORBIS_cant_find_last_page,
140    VORBIS_seek_failed
141 };
142 
143 
144 #ifdef __cplusplus
145 }
146 #endif
147 
148 #endif /* STB_VORBIS_INCLUDE_STB_VORBIS_H */
149 
150 #ifndef STB_VORBIS_HEADER_ONLY
151 
152 /* global configuration settings (e.g. set these in the project/makefile),
153  * or just set them in this file at the top (although ideally the first few
154  * should be visible when the header file is compiled too, although it's not
155  * crucial)
156  */
157 
158 /* STB_VORBIS_NO_PULLDATA_API
159  *     does not compile the code for the non-pushdata APIs
160  */
161 #if 0
162 #define STB_VORBIS_NO_PULLDATA_API
163 #endif
164 
165 /* STB_VORBIS_MAX_CHANNELS [number]
166  *     globally define this to the maximum number of channels you need.
167  *     The spec does not put a restriction on channels except that
168  *     the count is stored in a byte, so 255 is the hard limit.
169  *     Reducing this saves about 16 bytes per value, so using 16 saves
170  *     (255-16)*16 or around 4KB. Plus anything other memory usage
171  *     I forgot to account for. Can probably go as low as 8 (7.1 audio),
172  *     6 (5.1 audio), or 2 (stereo only).
173  */
174 #ifndef STB_VORBIS_MAX_CHANNELS
175 #define STB_VORBIS_MAX_CHANNELS    16  /* enough for anyone? */
176 #endif
177 
178 /* STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
179  *     sets the log size of the huffman-acceleration table.  Maximum
180  *     supported value is 24. with larger numbers, more decodings are O(1),
181  *     but the table size is larger so worse cache missing, so you'll have
182  *     to probe (and try multiple ogg vorbis files) to find the sweet spot.
183  */
184 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
185 #define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
186 #endif
187 
188 /* STB_VORBIS_FAST_BINARY_LENGTH [number]
189  *     sets the log size of the binary-search acceleration table. this
190  *     is used in similar fashion to the fast-huffman size to set initial
191  *     parameters for the binary search
192 
193  * STB_VORBIS_FAST_HUFFMAN_INT
194  *     The fast huffman tables are much more efficient if they can be
195  *     stored as 16-bit results instead of 32-bit results. This restricts
196  *     the codebooks to having only 65535 possible outcomes, though.
197  *     (At least, accelerated by the huffman table.)
198  */
199 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
200 #define STB_VORBIS_FAST_HUFFMAN_SHORT
201 #endif
202 
203 /* STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
204  *     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
205  *     back on binary searching for the correct one. This requires storing
206  *     extra tables with the huffman codes in sorted order. Defining this
207  *     symbol trades off space for speed by forcing a linear search in the
208  *     non-fast case, except for "sparse" codebooks.
209  */
210 #if 0
211 #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
212 #endif
213 
214 /* STB_VORBIS_CODEBOOK_SHORTS
215  *     The vorbis file format encodes VQ codebook floats as ax+b where a and
216  *     b are floating point per-codebook constants, and x is a 16-bit int.
217  *     Normally, stb_vorbis decodes them to floats rather than leaving them
218  *     as 16-bit ints and computing ax+b while decoding. This is a speed/space
219  *     tradeoff; you can save space by defining this flag.
220  */
221 #ifndef STB_VORBIS_CODEBOOK_SHORTS
222 #define STB_VORBIS_CODEBOOK_FLOATS
223 #endif
224 
225 #include <retro_inline.h>
226 
227 #define MAX_BLOCKSIZE_LOG  13   /* from specification */
228 #define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
229 
230 #ifndef TRUE
231 #define TRUE 1
232 #define FALSE 0
233 #endif
234 
235 #ifdef STB_VORBIS_CODEBOOK_FLOATS
236 typedef float stb_vorbis_codetype;
237 #else
238 typedef uint16_t stb_vorbis_codetype;
239 #endif
240 
241 /* @NOTE
242  *
243  * Some arrays below are tagged "//varies", which means it's actually
244  * a variable-sized piece of data, but rather than malloc I assume it's
245  * small enough it's better to just allocate it all together with the
246  * main thing
247  *
248  * Most of the variables are specified with the smallest size I could pack
249  * them into. It might give better performance to make them all full-sized
250  * integers. It should be safe to freely rearrange the structures or change
251  * the sizes larger--nothing relies on silently truncating etc., nor the
252  * order of variables.
253  */
254 
255 #define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
256 #define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
257 
258 typedef struct
259 {
260    int dimensions, entries;
261    uint8_t *codeword_lengths;
262    float  minimum_value;
263    float  delta_value;
264    uint8_t  value_bits;
265    uint8_t  lookup_type;
266    uint8_t  sequence_p;
267    uint8_t  sparse;
268    uint32_t lookup_values;
269    stb_vorbis_codetype *multiplicands;
270    uint32_t *codewords;
271    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
272     int16_t  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
273    #else
274     int32_t  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
275    #endif
276    uint32_t *sorted_codewords;
277    int    *sorted_values;
278    int     sorted_entries;
279 } Codebook;
280 
281 typedef struct
282 {
283    uint8_t order;
284    uint16_t rate;
285    uint16_t bark_map_size;
286    uint8_t amplitude_bits;
287    uint8_t amplitude_offset;
288    uint8_t number_of_books;
289    uint8_t book_list[16];             /* varies */
290 } Floor0;
291 
292 typedef struct
293 {
294    uint8_t partitions;
295    uint8_t partition_class_list[32]; /* varies */
296    uint8_t class_dimensions[16];     /* varies */
297    uint8_t class_subclasses[16];     /* varies */
298    uint8_t class_masterbooks[16];    /* varies */
299    int16_t subclass_books[16][8];    /* varies */
300    uint16_t Xlist[31*8+2];           /* varies */
301    uint8_t sorted_order[31*8+2];
302    uint8_t neighbors[31*8+2][2];
303    uint8_t floor1_multiplier;
304    uint8_t rangebits;
305    int values;
306 } Floor1;
307 
308 typedef union
309 {
310    Floor0 floor0;
311    Floor1 floor1;
312 } Floor;
313 
314 typedef struct
315 {
316    uint32_t begin, end;
317    uint32_t part_size;
318    uint8_t classifications;
319    uint8_t classbook;
320    uint8_t **classdata;
321    int16_t (*residue_books)[8];
322 } Residue;
323 
324 typedef struct
325 {
326    uint8_t magnitude;
327    uint8_t angle;
328    uint8_t mux;
329 } MappingChannel;
330 
331 typedef struct
332 {
333    uint16_t coupling_steps;
334    MappingChannel *chan;
335    uint8_t  submaps;
336    uint8_t  submap_floor[15];   /* varies */
337    uint8_t  submap_residue[15]; /* varies */
338 } Mapping;
339 
340 typedef struct
341 {
342    uint8_t blockflag;
343    uint8_t mapping;
344    uint16_t windowtype;
345    uint16_t transformtype;
346 } Mode;
347 
348 typedef struct
349 {
350    uint32_t  goal_crc;    /* expected crc if match */
351    int     bytes_left;  /* bytes left in packet */
352    uint32_t  crc_so_far;  /* running crc */
353    int     bytes_done;  /* bytes processed in _current_ chunk */
354    uint32_t  sample_loc;  /* granule pos encoded in page */
355 } CRCscan;
356 
357 typedef struct
358 {
359    uint32_t page_start, page_end;
360    uint32_t after_previous_page_start;
361    uint32_t first_decoded_sample;
362    uint32_t last_decoded_sample;
363 } ProbedPage;
364 
365 struct stb_vorbis
366 {
367   /* user-accessible info */
368    unsigned int sample_rate;
369    int channels;
370 
371    unsigned int setup_memory_required;
372    unsigned int temp_memory_required;
373    unsigned int setup_temp_memory_required;
374 
375    uint8_t *stream;
376    uint8_t *stream_start;
377    uint8_t *stream_end;
378 
379    uint32_t stream_len;
380 
381    uint8_t  push_mode;
382 
383    uint32_t first_audio_page_offset;
384 
385    ProbedPage p_first, p_last;
386 
387   /* memory management */
388    stb_vorbis_alloc alloc;
389    int setup_offset;
390    int temp_offset;
391 
392   /* run-time results */
393    int eof;
394    enum STBVorbisError error;
395 
396   /* user-useful data */
397 
398   /* header info */
399    int blocksize[2];
400    int blocksize_0, blocksize_1;
401    int codebook_count;
402    Codebook *codebooks;
403    int floor_count;
404    uint16_t floor_types[64]; /* varies */
405    Floor *floor_config;
406    int residue_count;
407    uint16_t residue_types[64]; /* varies */
408    Residue *residue_config;
409    int mapping_count;
410    Mapping *mapping;
411    int mode_count;
412    Mode mode_config[64];  /* varies */
413 
414    uint32_t total_samples;
415 
416   /* decode buffer */
417    float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
418    float *outputs        [STB_VORBIS_MAX_CHANNELS];
419 
420    float *previous_window[STB_VORBIS_MAX_CHANNELS];
421    int previous_length;
422 
423    int16_t *finalY[STB_VORBIS_MAX_CHANNELS];
424 
425    uint32_t current_loc; /* sample location of next frame to decode */
426    int    current_loc_valid;
427 
428   /* per-blocksize precomputed data */
429 
430    /* twiddle factors */
431    float *A[2],*B[2],*C[2];
432    float *window[2];
433    uint16_t *bit_reverse[2];
434 
435   /* current page/packet/segment streaming info */
436    uint32_t serial; /* stream serial number for verification */
437    int last_page;
438    int segment_count;
439    uint8_t segments[255];
440    uint8_t page_flag;
441    uint8_t bytes_in_seg;
442    uint8_t first_decode;
443    int next_seg;
444    int last_seg;  /* flag that we're on the last segment */
445    int last_seg_which; /* what was the segment number of the last seg? */
446    uint32_t acc;
447    int valid_bits;
448    int packet_bytes;
449    int end_seg_with_known_loc;
450    uint32_t known_loc_for_packet;
451    int discard_samples_deferred;
452    uint32_t samples_output;
453 
454   /* push mode scanning */
455    int page_crc_tests; /* only in push_mode: number of tests active; -1 if not searching */
456 
457   /* sample-access */
458    int channel_buffer_start;
459    int channel_buffer_end;
460 };
461 
462 #define IS_PUSH_MODE(f)   FALSE
463 
464 typedef struct stb_vorbis vorb;
465 
error(vorb * f,enum STBVorbisError e)466 static int error(vorb *f, enum STBVorbisError e)
467 {
468    f->error = e;
469    if (!f->eof && e != VORBIS_need_more_data) {
470       f->error=e; /* breakpoint for debugging */
471    }
472    return 0;
473 }
474 
475 
476 /* these functions are used for allocating temporary memory
477  * while decoding. if you can afford the stack space, use
478  * alloca(); otherwise, provide a temp buffer and it will
479  * allocate out of those.
480  */
481 
482 #define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
483 
484 #define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
485 #define temp_alloc_save(f)              ((f)->temp_offset)
486 #define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
487 
488 #define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
489 
490 /* given a sufficiently large block of memory, make an array of pointers to subblocks of it */
make_block_array(void * mem,int count,int size)491 static void *make_block_array(void *mem, int count, int size)
492 {
493    int i;
494    void ** p = (void **) mem;
495    char *q = (char *) (p + count);
496    for (i=0; i < count; ++i) {
497       p[i] = q;
498       q += size;
499    }
500    return p;
501 }
502 
setup_malloc(vorb * f,int sz)503 static void *setup_malloc(vorb *f, int sz)
504 {
505    sz = (sz+3) & ~3;
506    f->setup_memory_required += sz;
507    if (f->alloc.alloc_buffer) {
508       void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
509       if (f->setup_offset + sz > f->temp_offset) return NULL;
510       f->setup_offset += sz;
511       return p;
512    }
513    return sz ? malloc(sz) : NULL;
514 }
515 
setup_free(vorb * f,void * p)516 static void setup_free(vorb *f, void *p)
517 {
518    if (f->alloc.alloc_buffer) return; /* do nothing; setup mem is not a stack */
519    free(p);
520 }
521 
setup_temp_malloc(vorb * f,int sz)522 static void *setup_temp_malloc(vorb *f, int sz)
523 {
524    sz = (sz+3) & ~3;
525    if (f->alloc.alloc_buffer) {
526       if (f->temp_offset - sz < f->setup_offset) return NULL;
527       f->temp_offset -= sz;
528       return (char *) f->alloc.alloc_buffer + f->temp_offset;
529    }
530    return malloc(sz);
531 }
532 
setup_temp_free(vorb * f,void * p,int sz)533 static void setup_temp_free(vorb *f, void *p, int sz)
534 {
535    if (f->alloc.alloc_buffer) {
536       f->temp_offset += (sz+3)&~3;
537       return;
538    }
539    free(p);
540 }
541 
542 #define CRC32_POLY    0x04c11db7   /* from spec */
543 
544 static uint32_t stb_vorbis_crc_table[256];
crc32_init(void)545 static void crc32_init(void)
546 {
547    int i,j;
548    uint32_t s;
549    for(i=0; i < 256; i++) {
550       for (s=i<<24, j=0; j < 8; ++j)
551          s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
552       stb_vorbis_crc_table[i] = s;
553    }
554 }
555 
crc32_update(uint32_t crc,uint8_t byte)556 static INLINE uint32_t crc32_update(uint32_t crc, uint8_t byte)
557 {
558    return (crc << 8) ^ stb_vorbis_crc_table[byte ^ (crc >> 24)];
559 }
560 
561 /* used in setup, and for huffman that doesn't go fast path */
bit_reverse(unsigned int n)562 static unsigned int bit_reverse(unsigned int n)
563 {
564   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
565   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
566   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
567   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
568   return (n >> 16) | (n << 16);
569 }
570 
square(float x)571 static float square(float x)
572 {
573    return x*x;
574 }
575 
576 /* this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
577  * as required by the specification. fast(?) implementation from stb.h
578  * @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
579  */
ilog(int32_t n)580 static int ilog(int32_t n)
581 {
582    static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
583 
584    /* 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29) */
585    if (n < (1 << 14))
586         if (n < (1 <<  4))        return     0 + log2_4[n      ];
587         else if (n < (1 <<  9))      return  5 + log2_4[n >>  5];
588              else                     return 10 + log2_4[n >> 10];
589    else if (n < (1 << 24))
590              if (n < (1 << 19))      return 15 + log2_4[n >> 15];
591              else                     return 20 + log2_4[n >> 20];
592         else if (n < (1 << 29))      return 25 + log2_4[n >> 25];
593              else if (n < (1 << 31)) return 30 + log2_4[n >> 30];
594                   else                return 0; /* signed n returns 0 */
595 }
596 
597 #ifndef M_PI
598   #define M_PI  3.14159265358979323846264f  /* from CRC */
599 #endif
600 
601 /* code length assigned to a value with no huffman encoding */
602 #define NO_CODE   255
603 
604 /* LEAF SETUP FUNCTIONS */
605 
606 /* these functions are only called at setup, and only a few times
607  * per file */
608 
float32_unpack(uint32_t x)609 static float float32_unpack(uint32_t x)
610 {
611    /* from the specification */
612    uint32_t mantissa = x & 0x1fffff;
613    uint32_t sign = x & 0x80000000;
614    uint32_t exp = (x & 0x7fe00000) >> 21;
615    double res = sign ? -(double)mantissa : (double)mantissa;
616    return (float) ldexp((float)res, exp-788);
617 }
618 
619 
620 /* zlib & jpeg huffman tables assume that the output symbols
621  * can either be arbitrarily arranged, or have monotonically
622  * increasing frequencies--they rely on the lengths being sorted;
623  * this makes for a very simple generation algorithm.
624  * vorbis allows a huffman table with non-sorted lengths. This
625  * requires a more sophisticated construction, since symbols in
626  * order do not map to huffman codes "in order".
627  */
add_entry(Codebook * c,uint32_t huff_code,int symbol,int count,int len,uint32_t * values)628 static void add_entry(Codebook *c, uint32_t huff_code, int symbol, int count, int len, uint32_t *values)
629 {
630    if (!c->sparse) {
631       c->codewords      [symbol] = huff_code;
632    } else {
633       c->codewords       [count] = huff_code;
634       c->codeword_lengths[count] = len;
635       values             [count] = symbol;
636    }
637 }
638 
compute_codewords(Codebook * c,uint8_t * len,int n,uint32_t * values)639 static int compute_codewords(Codebook *c, uint8_t *len, int n, uint32_t *values)
640 {
641    int i,k,m=0;
642    uint32_t available[32];
643 
644    memset(available, 0, sizeof(available));
645    /* find the first entry */
646    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
647    if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
648    /* add to the list */
649    add_entry(c, 0, k, m++, len[k], values);
650    /* add all available leaves */
651    for (i=1; i <= len[k]; ++i)
652       available[i] = 1 << (32-i);
653    /* note that the above code treats the first case specially,
654     * but it's really the same as the following code, so they
655     * could probably be combined (except the initial code is 0,
656     * and I use 0 in available[] to mean 'empty') */
657    for (i=k+1; i < n; ++i) {
658       uint32_t res;
659       int z = len[i], y;
660       if (z == NO_CODE) continue;
661       /* find lowest available leaf (should always be earliest,
662        * which is what the specification calls for)
663        * note that this property, and the fact we can never have
664        * more than one free leaf at a given level, isn't totally
665        * trivial to prove, but it seems true and the assert never
666        * fires, so! */
667       while (z > 0 && !available[z]) --z;
668       if (z == 0) { assert(0); return FALSE; }
669       res = available[z];
670       available[z] = 0;
671       add_entry(c, bit_reverse(res), i, m++, len[i], values);
672       /* propogate availability up the tree */
673       if (z != len[i]) {
674          for (y=len[i]; y > z; --y) {
675             assert(available[y] == 0);
676             available[y] = res + (1 << (32-y));
677          }
678       }
679    }
680    return TRUE;
681 }
682 
683 /* accelerated huffman table allows fast O(1) match of all symbols
684  * of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH */
compute_accelerated_huffman(Codebook * c)685 static void compute_accelerated_huffman(Codebook *c)
686 {
687    int i, len;
688    for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
689       c->fast_huffman[i] = -1;
690 
691    len = c->sparse ? c->sorted_entries : c->entries;
692    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
693    if (len > 32767) len = 32767; /* largest possible value we can encode! */
694    #endif
695    for (i=0; i < len; ++i) {
696       if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
697          uint32_t z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
698          /* set table entries for all bit combinations in the higher bits */
699          while (z < FAST_HUFFMAN_TABLE_SIZE) {
700              c->fast_huffman[z] = i;
701              z += 1 << c->codeword_lengths[i];
702          }
703       }
704    }
705 }
706 
707 #ifdef _MSC_VER
708 #define STBV_CDECL __cdecl
709 #else
710 #define STBV_CDECL
711 #endif
712 
uint32_t_compare(const void * p,const void * q)713 static int STBV_CDECL uint32_t_compare(const void *p, const void *q)
714 {
715    uint32_t x = * (uint32_t *) p;
716    uint32_t y = * (uint32_t *) q;
717    return x < y ? -1 : x > y;
718 }
719 
include_in_sort(Codebook * c,uint8_t len)720 static int include_in_sort(Codebook *c, uint8_t len)
721 {
722    if (c->sparse) { assert(len != NO_CODE); return TRUE; }
723    if (len == NO_CODE) return FALSE;
724    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
725    return FALSE;
726 }
727 
728 /* if the fast table above doesn't work, we want to binary
729  * search them... need to reverse the bits */
compute_sorted_huffman(Codebook * c,uint8_t * lengths,uint32_t * values)730 static void compute_sorted_huffman(Codebook *c, uint8_t *lengths, uint32_t *values)
731 {
732    int i, len;
733    /* build a list of all the entries
734     * OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
735     * this is kind of a frivolous optimization--I don't see any performance improvement,
736     * but it's like 4 extra lines of code, so. */
737    if (!c->sparse) {
738       int k = 0;
739       for (i=0; i < c->entries; ++i)
740          if (include_in_sort(c, lengths[i]))
741             c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
742       assert(k == c->sorted_entries);
743    } else {
744       for (i=0; i < c->sorted_entries; ++i)
745          c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
746    }
747 
748    qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_t_compare);
749    c->sorted_codewords[c->sorted_entries] = 0xffffffff;
750 
751    len = c->sparse ? c->sorted_entries : c->entries;
752    /* now we need to indicate how they correspond; we could either
753     *   #1: sort a different data structure that says who they correspond to
754     *   #2: for each sorted entry, search the original list to find who corresponds
755     *   #3: for each original entry, find the sorted entry
756     * #1 requires extra storage, #2 is slow, #3 can use binary search! */
757    for (i=0; i < len; ++i) {
758       int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
759       if (include_in_sort(c,huff_len)) {
760          uint32_t code = bit_reverse(c->codewords[i]);
761          int x=0, n=c->sorted_entries;
762          while (n > 1) {
763             /* invariant: sc[x] <= code < sc[x+n] */
764             int m = x + (n >> 1);
765             if (c->sorted_codewords[m] <= code) {
766                x = m;
767                n -= (n>>1);
768             } else {
769                n >>= 1;
770             }
771          }
772          assert(c->sorted_codewords[x] == code);
773          if (c->sparse) {
774             c->sorted_values[x] = values[i];
775             c->codeword_lengths[x] = huff_len;
776          } else {
777             c->sorted_values[x] = i;
778          }
779       }
780    }
781 }
782 
783 /* only run while parsing the header (3 times) */
vorbis_validate(uint8_t * data)784 static int vorbis_validate(uint8_t *data)
785 {
786    static uint8_t vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
787    return memcmp(data, vorbis, 6) == 0;
788 }
789 
790 /* called from setup only, once per code book
791  * (formula implied by specification) */
lookup1_values(int entries,int dim)792 static int lookup1_values(int entries, int dim)
793 {
794    int r = (int) floor(exp((float) log((float) entries) / dim));
795    if ((int) floor(pow((float) r+1, dim)) <= entries)   /* (int) cast for MinGW warning; */
796       ++r;                                              /* floor() to avoid _ftol() when non-CRT */
797    assert(pow((float) r+1, dim) > entries);
798    assert((int) floor(pow((float) r, dim)) <= entries); /* (int),floor() as above */
799    return r;
800 }
801 
802 /* called twice per file */
compute_twiddle_factors(int n,float * A,float * B,float * C)803 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
804 {
805    int n4 = n >> 2, n8 = n >> 3;
806    int k,k2;
807 
808    for (k=k2=0; k < n4; ++k,k2+=2) {
809       A[k2  ] = (float)  cos(4*k*M_PI/n);
810       A[k2+1] = (float) -sin(4*k*M_PI/n);
811       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
812       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
813    }
814    for (k=k2=0; k < n8; ++k,k2+=2) {
815       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
816       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
817    }
818 }
819 
compute_window(int n,float * window)820 static void compute_window(int n, float *window)
821 {
822    int n2 = n >> 1, i;
823    for (i=0; i < n2; ++i)
824       window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
825 }
826 
compute_bitreverse(int n,uint16_t * rev)827 static void compute_bitreverse(int n, uint16_t *rev)
828 {
829    int ld = ilog(n) - 1; /* ilog is off-by-one from normal definitions */
830    int i, n8 = n >> 3;
831    for (i=0; i < n8; ++i)
832       rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
833 }
834 
init_blocksize(vorb * f,int b,int n)835 static int init_blocksize(vorb *f, int b, int n)
836 {
837    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
838    f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
839    f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
840    f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
841    if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
842    compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
843    f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
844    if (!f->window[b]) return error(f, VORBIS_outofmem);
845    compute_window(n, f->window[b]);
846    f->bit_reverse[b] = (uint16_t *) setup_malloc(f, sizeof(uint16_t) * n8);
847    if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
848    compute_bitreverse(n, f->bit_reverse[b]);
849    return TRUE;
850 }
851 
neighbors(uint16_t * x,int n,int * plow,int * phigh)852 static void neighbors(uint16_t *x, int n, int *plow, int *phigh)
853 {
854    int low = -1;
855    int high = 65536;
856    int i;
857    for (i=0; i < n; ++i) {
858       if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
859       if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
860    }
861 }
862 
863 /* this has been repurposed so y is now the original index instead of y */
864 typedef struct
865 {
866    uint16_t x,y;
867 } STBV_Point;
868 
point_compare(const void * p,const void * q)869 static int STBV_CDECL point_compare(const void *p, const void *q)
870 {
871    STBV_Point *a = (STBV_Point *) p;
872    STBV_Point *b = (STBV_Point *) q;
873    return a->x < b->x ? -1 : a->x > b->x;
874 }
875 
876 /* END LEAF SETUP FUNCTIONS */
877 
get8(vorb * z)878 static uint8_t get8(vorb *z)
879 {
880    if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
881    return *z->stream++;
882 }
883 
get32(vorb * f)884 static uint32_t get32(vorb *f)
885 {
886    uint32_t x;
887    x = get8(f);
888    x += get8(f) << 8;
889    x += get8(f) << 16;
890    x += get8(f) << 24;
891    return x;
892 }
893 
getn(vorb * z,uint8_t * data,int n)894 static int getn(vorb *z, uint8_t *data, int n)
895 {
896    if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
897    memcpy(data, z->stream, n);
898    z->stream += n;
899    return 1;
900 }
901 
skip(vorb * z,int n)902 static void skip(vorb *z, int n)
903 {
904    z->stream += n;
905    if (z->stream >= z->stream_end) z->eof = 1;
906    return;
907 }
908 
set_file_offset(stb_vorbis * f,unsigned int loc)909 static int set_file_offset(stb_vorbis *f, unsigned int loc)
910 {
911    f->eof = 0;
912    if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
913       f->stream = f->stream_end;
914       f->eof = 1;
915       return 0;
916    } else {
917       f->stream = f->stream_start + loc;
918       return 1;
919    }
920 }
921 
922 
923 static uint8_t ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
924 
capture_pattern(vorb * f)925 static int capture_pattern(vorb *f)
926 {
927    if (0x4f != get8(f)) return FALSE;
928    if (0x67 != get8(f)) return FALSE;
929    if (0x67 != get8(f)) return FALSE;
930    if (0x53 != get8(f)) return FALSE;
931    return TRUE;
932 }
933 
934 #define PAGEFLAG_continued_packet   1
935 #define PAGEFLAG_first_page         2
936 #define PAGEFLAG_last_page          4
937 
start_page_no_capturepattern(vorb * f)938 static int start_page_no_capturepattern(vorb *f)
939 {
940    uint32_t loc0,loc1,n;
941    /* stream structure version */
942    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
943    /* header flag */
944    f->page_flag = get8(f);
945    /* absolute granule position */
946    loc0 = get32(f);
947    loc1 = get32(f);
948    /* @TODO: validate loc0,loc1 as valid positions?
949     * stream serial number -- vorbis doesn't interleave, so discard */
950    get32(f);
951    /*if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
952     * page sequence number */
953    n = get32(f);
954    f->last_page = n;
955    /* CRC32 */
956    get32(f);
957    /* page_segments */
958    f->segment_count = get8(f);
959    if (!getn(f, f->segments, f->segment_count))
960       return error(f, VORBIS_unexpected_eof);
961    /* assume we _don't_ know any the sample position of any segments */
962    f->end_seg_with_known_loc = -2;
963    if (loc0 != ~0U || loc1 != ~0U) {
964       int i;
965       /* determine which packet is the last one that will complete */
966       for (i=f->segment_count-1; i >= 0; --i)
967          if (f->segments[i] < 255)
968             break;
969       /* 'i' is now the index of the _last_ segment of a packet that ends */
970       if (i >= 0) {
971          f->end_seg_with_known_loc = i;
972          f->known_loc_for_packet   = loc0;
973       }
974    }
975    if (f->first_decode) {
976       int i,len;
977       ProbedPage p;
978       len = 0;
979       for (i=0; i < f->segment_count; ++i)
980          len += f->segments[i];
981       len += 27 + f->segment_count;
982       p.page_start = f->first_audio_page_offset;
983       p.page_end = p.page_start + len;
984       p.after_previous_page_start = p.page_start;
985       p.first_decoded_sample = 0;
986       p.last_decoded_sample = loc0;
987       f->p_first = p;
988    }
989    f->next_seg = 0;
990    return TRUE;
991 }
992 
start_page(vorb * f)993 static int start_page(vorb *f)
994 {
995    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
996    return start_page_no_capturepattern(f);
997 }
998 
start_packet(vorb * f)999 static int start_packet(vorb *f)
1000 {
1001    while (f->next_seg == -1) {
1002       if (!start_page(f)) return FALSE;
1003       if (f->page_flag & PAGEFLAG_continued_packet)
1004          return error(f, VORBIS_continued_packet_flag_invalid);
1005    }
1006    f->last_seg = FALSE;
1007    f->valid_bits = 0;
1008    f->packet_bytes = 0;
1009    f->bytes_in_seg = 0;
1010    /* f->next_seg is now valid */
1011    return TRUE;
1012 }
1013 
maybe_start_packet(vorb * f)1014 static int maybe_start_packet(vorb *f)
1015 {
1016    if (f->next_seg == -1) {
1017       int x = get8(f);
1018       if (f->eof) return FALSE; /* EOF at page boundary is not an error! */
1019       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
1020       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1021       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1022       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1023       if (!start_page_no_capturepattern(f)) return FALSE;
1024       if (f->page_flag & PAGEFLAG_continued_packet) {
1025          /* set up enough state that we can read this packet if we want,
1026           * e.g. during recovery */
1027          f->last_seg = FALSE;
1028          f->bytes_in_seg = 0;
1029          return error(f, VORBIS_continued_packet_flag_invalid);
1030       }
1031    }
1032    return start_packet(f);
1033 }
1034 
next_segment(vorb * f)1035 static int next_segment(vorb *f)
1036 {
1037    int len;
1038    if (f->last_seg) return 0;
1039    if (f->next_seg == -1) {
1040       f->last_seg_which = f->segment_count-1; /* in case start_page fails */
1041       if (!start_page(f)) { f->last_seg = 1; return 0; }
1042       if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1043    }
1044    len = f->segments[f->next_seg++];
1045    if (len < 255) {
1046       f->last_seg = TRUE;
1047       f->last_seg_which = f->next_seg-1;
1048    }
1049    if (f->next_seg >= f->segment_count)
1050       f->next_seg = -1;
1051    assert(f->bytes_in_seg == 0);
1052    f->bytes_in_seg = len;
1053    return len;
1054 }
1055 
1056 #define EOP    (-1)
1057 #define INVALID_BITS  (-1)
1058 
get8_packet_raw(vorb * f)1059 static int get8_packet_raw(vorb *f)
1060 {
1061    if (!f->bytes_in_seg) {  /* CLANG! */
1062       if (f->last_seg) return EOP;
1063       else if (!next_segment(f)) return EOP;
1064    }
1065    assert(f->bytes_in_seg > 0);
1066    --f->bytes_in_seg;
1067    ++f->packet_bytes;
1068    return get8(f);
1069 }
1070 
get8_packet(vorb * f)1071 static int get8_packet(vorb *f)
1072 {
1073    int x = get8_packet_raw(f);
1074    f->valid_bits = 0;
1075    return x;
1076 }
1077 
flush_packet(vorb * f)1078 static void flush_packet(vorb *f)
1079 {
1080    while (get8_packet_raw(f) != EOP);
1081 }
1082 
1083 /* @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1084  * as the huffman decoder? */
get_bits(vorb * f,int n)1085 static uint32_t get_bits(vorb *f, int n)
1086 {
1087    uint32_t z;
1088 
1089    if (f->valid_bits < 0) return 0;
1090    if (f->valid_bits < n) {
1091       if (n > 24) {
1092          /* the accumulator technique below would not work correctly in this case */
1093          z = get_bits(f, 24);
1094          z += get_bits(f, n-24) << 24;
1095          return z;
1096       }
1097       if (f->valid_bits == 0) f->acc = 0;
1098       while (f->valid_bits < n) {
1099          int z = get8_packet_raw(f);
1100          if (z == EOP) {
1101             f->valid_bits = INVALID_BITS;
1102             return 0;
1103          }
1104          f->acc += z << f->valid_bits;
1105          f->valid_bits += 8;
1106       }
1107    }
1108    if (f->valid_bits < 0) return 0;
1109    z = f->acc & ((1 << n)-1);
1110    f->acc >>= n;
1111    f->valid_bits -= n;
1112    return z;
1113 }
1114 
1115 /* @OPTIMIZE: primary accumulator for huffman
1116  * expand the buffer to as many bits as possible without reading off end of packet
1117  * it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1118  * e.g. cache them locally and decode locally */
prep_huffman(vorb * f)1119 static INLINE void prep_huffman(vorb *f)
1120 {
1121    if (f->valid_bits <= 24) {
1122       if (f->valid_bits == 0) f->acc = 0;
1123       do {
1124          int z;
1125          if (f->last_seg && !f->bytes_in_seg) return;
1126          z = get8_packet_raw(f);
1127          if (z == EOP) return;
1128          f->acc += z << f->valid_bits;
1129          f->valid_bits += 8;
1130       } while (f->valid_bits <= 24);
1131    }
1132 }
1133 
1134 enum
1135 {
1136    VORBIS_packet_id = 1,
1137    VORBIS_packet_comment = 3,
1138    VORBIS_packet_setup = 5
1139 };
1140 
codebook_decode_scalar_raw(vorb * f,Codebook * c)1141 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1142 {
1143    int i;
1144    prep_huffman(f);
1145 
1146    assert(c->sorted_codewords || c->codewords);
1147    /* cases to use binary search: sorted_codewords && !c->codewords
1148     *                             sorted_codewords && c->entries > 8 */
1149    if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1150       /* binary search */
1151       uint32_t code = bit_reverse(f->acc);
1152       int x=0, n=c->sorted_entries, len;
1153 
1154       while (n > 1) {
1155          /* invariant: sc[x] <= code < sc[x+n] */
1156          int m = x + (n >> 1);
1157          if (c->sorted_codewords[m] <= code) {
1158             x = m;
1159             n -= (n>>1);
1160          } else {
1161             n >>= 1;
1162          }
1163       }
1164       /* x is now the sorted index */
1165       if (!c->sparse) x = c->sorted_values[x];
1166       /* x is now sorted index if sparse, or symbol otherwise */
1167       len = c->codeword_lengths[x];
1168       if (f->valid_bits >= len) {
1169          f->acc >>= len;
1170          f->valid_bits -= len;
1171          return x;
1172       }
1173 
1174       f->valid_bits = 0;
1175       return -1;
1176    }
1177 
1178    /* if small, linear search */
1179    assert(!c->sparse);
1180    for (i=0; i < c->entries; ++i) {
1181       if (c->codeword_lengths[i] == NO_CODE) continue;
1182       if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1183          if (f->valid_bits >= c->codeword_lengths[i]) {
1184             f->acc >>= c->codeword_lengths[i];
1185             f->valid_bits -= c->codeword_lengths[i];
1186             return i;
1187          }
1188          f->valid_bits = 0;
1189          return -1;
1190       }
1191    }
1192 
1193    error(f, VORBIS_invalid_stream);
1194    f->valid_bits = 0;
1195    return -1;
1196 }
1197 
1198 
codebook_decode_scalar(vorb * f,Codebook * c)1199 static int codebook_decode_scalar(vorb *f, Codebook *c)
1200 {
1201    int i;
1202    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1203       prep_huffman(f);
1204    /* fast huffman table lookup */
1205    i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1206    i = c->fast_huffman[i];
1207    if (i >= 0) {
1208       f->acc >>= c->codeword_lengths[i];
1209       f->valid_bits -= c->codeword_lengths[i];
1210       if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1211       return i;
1212    }
1213    return codebook_decode_scalar_raw(f,c);
1214 }
1215 
1216 #define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
1217 
1218 #define DECODE(var,f,c)                                       \
1219    DECODE_RAW(var,f,c)                                        \
1220    if (c->sparse) var = c->sorted_values[var];
1221 
1222 #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
1223 
1224 /* CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1225  * where we avoid one addition */
1226 #ifndef STB_VORBIS_CODEBOOK_FLOATS
1227    #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off] * c->delta_value + c->minimum_value)
1228    #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off] * c->delta_value)
1229    #define CODEBOOK_ELEMENT_BASE(c)         (c->minimum_value)
1230 #else
1231    #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
1232    #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
1233    #define CODEBOOK_ELEMENT_BASE(c)         (0)
1234 #endif
1235 
codebook_decode_start(vorb * f,Codebook * c)1236 static int codebook_decode_start(vorb *f, Codebook *c)
1237 {
1238    int z = -1;
1239 
1240    /* type 0 is only legal in a scalar context */
1241    if (c->lookup_type == 0)
1242       error(f, VORBIS_invalid_stream);
1243    else {
1244       DECODE_VQ(z,f,c);
1245       if (c->sparse) assert(z < c->sorted_entries);
1246       if (z < 0) {  /* check for EOP */
1247          if (!f->bytes_in_seg)
1248             if (f->last_seg)
1249                return z;
1250          error(f, VORBIS_invalid_stream);
1251       }
1252    }
1253    return z;
1254 }
1255 
codebook_decode(vorb * f,Codebook * c,float * output,int len)1256 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1257 {
1258    int i,z = codebook_decode_start(f,c);
1259    if (z < 0) return FALSE;
1260    if (len > c->dimensions) len = c->dimensions;
1261 
1262    z *= c->dimensions;
1263    if (c->sequence_p) {
1264       float last = CODEBOOK_ELEMENT_BASE(c);
1265       for (i=0; i < len; ++i) {
1266          float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1267          output[i] += val;
1268          last = val + c->minimum_value;
1269       }
1270    } else {
1271       float last = CODEBOOK_ELEMENT_BASE(c);
1272       for (i=0; i < len; ++i) {
1273          output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1274       }
1275    }
1276 
1277    return TRUE;
1278 }
1279 
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1280 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1281 {
1282    int i,z = codebook_decode_start(f,c);
1283    float last = CODEBOOK_ELEMENT_BASE(c);
1284    if (z < 0) return FALSE;
1285    if (len > c->dimensions) len = c->dimensions;
1286 
1287    z *= c->dimensions;
1288    for (i=0; i < len; ++i) {
1289       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1290       output[i*step] += val;
1291       if (c->sequence_p) last = val;
1292    }
1293 
1294    return TRUE;
1295 }
1296 
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1297 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1298 {
1299    int c_inter = *c_inter_p;
1300    int p_inter = *p_inter_p;
1301    int i,z, effective = c->dimensions;
1302 
1303    /* type 0 is only legal in a scalar context */
1304    if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1305 
1306    while (total_decode > 0) {
1307       float last = CODEBOOK_ELEMENT_BASE(c);
1308       DECODE_VQ(z,f,c);
1309       assert(!c->sparse || z < c->sorted_entries);
1310       if (z < 0) {
1311          if (!f->bytes_in_seg)
1312             if (f->last_seg) return FALSE;
1313          return error(f, VORBIS_invalid_stream);
1314       }
1315 
1316       /* if this will take us off the end of the buffers, stop short!
1317        * we check by computing the length of the virtual interleaved
1318        * buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1319        * and the length we'll be using (effective) */
1320       if (c_inter + p_inter*ch + effective > len * ch) {
1321          effective = len*ch - (p_inter*ch - c_inter);
1322       }
1323 
1324       z *= c->dimensions;
1325       if (c->sequence_p) {
1326          for (i=0; i < effective; ++i) {
1327             float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1328             if (outputs[c_inter])
1329                outputs[c_inter][p_inter] += val;
1330             if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1331             last = val;
1332          }
1333       } else {
1334          for (i=0; i < effective; ++i) {
1335             float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1336             if (outputs[c_inter])
1337                outputs[c_inter][p_inter] += val;
1338             if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1339          }
1340       }
1341 
1342       total_decode -= effective;
1343    }
1344    *c_inter_p = c_inter;
1345    *p_inter_p = p_inter;
1346    return TRUE;
1347 }
1348 
codebook_decode_deinterleave_repeat_2(vorb * f,Codebook * c,float ** outputs,int * c_inter_p,int * p_inter_p,int len,int total_decode)1349 static int codebook_decode_deinterleave_repeat_2(vorb *f, Codebook *c, float **outputs, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1350 {
1351    int c_inter = *c_inter_p;
1352    int p_inter = *p_inter_p;
1353    int i,z, effective = c->dimensions;
1354 
1355    /* type 0 is only legal in a scalar context */
1356    if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1357 
1358    while (total_decode > 0) {
1359       float last = CODEBOOK_ELEMENT_BASE(c);
1360       DECODE_VQ(z,f,c);
1361 
1362       if (z < 0) {
1363          if (!f->bytes_in_seg)
1364             if (f->last_seg) return FALSE;
1365          return error(f, VORBIS_invalid_stream);
1366       }
1367 
1368       /* if this will take us off the end of the buffers, stop short!
1369        * we check by computing the length of the virtual interleaved
1370        * buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1371        * and the length we'll be using (effective)
1372        */
1373       if (c_inter + p_inter*2 + effective > len * 2) {
1374          effective = len*2 - (p_inter*2 - c_inter);
1375       }
1376 
1377       {
1378          z *= c->dimensions;
1379          if (c->sequence_p) {
1380             /* haven't optimized this case because I don't have any examples */
1381             for (i=0; i < effective; ++i) {
1382                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1383                if (outputs[c_inter])
1384                   outputs[c_inter][p_inter] += val;
1385                if (++c_inter == 2) { c_inter = 0; ++p_inter; }
1386                last = val;
1387             }
1388          } else {
1389             i=0;
1390             if (c_inter == 1) {
1391                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1392                if (outputs[c_inter])
1393                   outputs[c_inter][p_inter] += val;
1394                c_inter = 0; ++p_inter;
1395                ++i;
1396             }
1397             {
1398                float *z0 = outputs[0];
1399                float *z1 = outputs[1];
1400                for (; i+1 < effective;) {
1401                   float v0 = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1402                   float v1 = CODEBOOK_ELEMENT_FAST(c,z+i+1) + last;
1403                   if (z0)
1404                      z0[p_inter] += v0;
1405                   if (z1)
1406                      z1[p_inter] += v1;
1407                   ++p_inter;
1408                   i += 2;
1409                }
1410             }
1411             if (i < effective) {
1412                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1413                if (outputs[c_inter])
1414                   outputs[c_inter][p_inter] += val;
1415                if (++c_inter == 2) { c_inter = 0; ++p_inter; }
1416             }
1417          }
1418       }
1419 
1420       total_decode -= effective;
1421    }
1422    *c_inter_p = c_inter;
1423    *p_inter_p = p_inter;
1424    return TRUE;
1425 }
1426 
predict_point(int x,int x0,int x1,int y0,int y1)1427 static int predict_point(int x, int x0, int x1, int y0, int y1)
1428 {
1429    int dy = y1 - y0;
1430    int adx = x1 - x0;
1431    /* @OPTIMIZE: force int division to round in the right direction... is this necessary on x86? */
1432    int err = abs(dy) * (x - x0);
1433    int off = err / adx;
1434    return dy < 0 ? y0 - off : y0 + off;
1435 }
1436 
1437 /* the following table is block-copied from the specification */
1438 static float inverse_db_table[256] =
1439 {
1440   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1441   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1442   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1443   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1444   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1445   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1446   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1447   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1448   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1449   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1450   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1451   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1452   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1453   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1454   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1455   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1456   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1457   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1458   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1459   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1460   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1461   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1462   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1463   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1464   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1465   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1466   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1467   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1468   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1469   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1470   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1471   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1472   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1473   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1474   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1475   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1476   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1477   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1478   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1479   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1480   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1481   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1482   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1483   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1484   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1485   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1486   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1487   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1488   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1489   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1490   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1491   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1492   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1493   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1494   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1495   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1496   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1497   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1498   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1499   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1500   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1501   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1502   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1503   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1504 };
1505 
1506 
1507 /* @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1508  * note that you must produce bit-identical output to decode correctly;
1509  * this specific sequence of operations is specified in the spec (it's
1510  * drawing integer-quantized frequency-space lines that the encoder
1511  * expects to be exactly the same)
1512  *     ... also, isn't the whole point of Bresenham's algorithm to NOT
1513  * have to divide in the setup? sigh.
1514  */
1515 #define LINE_OP(a,b)   a *= b
1516 
draw_line(float * output,int x0,int y0,int x1,int y1,int n)1517 static INLINE void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1518 {
1519    int dy = y1 - y0;
1520    int adx = x1 - x0;
1521    int ady = abs(dy);
1522    int x=x0,y=y0;
1523    int err = 0;
1524    int sy;
1525    int base = dy / adx;
1526 
1527    if (dy < 0)
1528       sy = base - 1;
1529    else
1530       sy = base+1;
1531 
1532    ady -= abs(base) * adx;
1533    if (x1 > n) x1 = n;
1534    LINE_OP(output[x], inverse_db_table[y]);
1535    for (++x; x < x1; ++x) {
1536       err += ady;
1537       if (err >= adx) {
1538          err -= adx;
1539          y += sy;
1540       } else
1541          y += base;
1542       LINE_OP(output[x], inverse_db_table[y]);
1543    }
1544 }
1545 
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)1546 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
1547 {
1548    int k;
1549    if (rtype == 0) {
1550       int step = n / book->dimensions;
1551       for (k=0; k < step; ++k)
1552          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
1553             return FALSE;
1554    } else {
1555       for (k=0; k < n; ) {
1556          if (!codebook_decode(f, book, target+offset, n-k))
1557             return FALSE;
1558          k += book->dimensions;
1559          offset += book->dimensions;
1560       }
1561    }
1562    return TRUE;
1563 }
1564 
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8_t * do_not_decode)1565 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8_t *do_not_decode)
1566 {
1567    int i,j,pass;
1568    Residue *r = f->residue_config + rn;
1569    int rtype = f->residue_types[rn];
1570    int c = r->classbook;
1571    int classwords = f->codebooks[c].dimensions;
1572    int n_read = r->end - r->begin;
1573    int part_read = n_read / r->part_size;
1574    int temp_alloc_point = temp_alloc_save(f);
1575    uint8_t ***part_classdata = (uint8_t ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
1576 
1577    for (i=0; i < ch; ++i)
1578       if (!do_not_decode[i])
1579          memset(residue_buffers[i], 0, sizeof(float) * n);
1580 
1581    if (rtype == 2 && ch != 1) {
1582       for (j=0; j < ch; ++j)
1583          if (!do_not_decode[j])
1584             break;
1585       if (j == ch)
1586          goto done;
1587 
1588       for (pass=0; pass < 8; ++pass) {
1589          int pcount = 0, class_set = 0;
1590          if (ch == 2) {
1591             while (pcount < part_read) {
1592                int z = r->begin + pcount*r->part_size;
1593                int c_inter = (z & 1), p_inter = z>>1;
1594                if (pass == 0) {
1595                   Codebook *c = f->codebooks+r->classbook;
1596                   int q;
1597                   DECODE(q,f,c);
1598                   if (q == EOP) goto done;
1599                   part_classdata[0][class_set] = r->classdata[q];
1600                }
1601                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1602                   int z = r->begin + pcount*r->part_size;
1603                   int c = part_classdata[0][class_set][i];
1604                   int b = r->residue_books[c][pass];
1605                   if (b >= 0) {
1606                      Codebook *book = f->codebooks + b;
1607                      /* saves 1% */
1608                      if (!codebook_decode_deinterleave_repeat_2(f, book, residue_buffers, &c_inter, &p_inter, n, r->part_size))
1609                         goto done;
1610                   } else {
1611                      z += r->part_size;
1612                      c_inter = z & 1;
1613                      p_inter = z >> 1;
1614                   }
1615                }
1616                ++class_set;
1617             }
1618          } else if (ch == 1) {
1619             while (pcount < part_read) {
1620                int z = r->begin + pcount*r->part_size;
1621                int c_inter = 0, p_inter = z;
1622                if (pass == 0) {
1623                   Codebook *c = f->codebooks+r->classbook;
1624                   int q;
1625                   DECODE(q,f,c);
1626                   if (q == EOP) goto done;
1627                   part_classdata[0][class_set] = r->classdata[q];
1628                }
1629                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1630                   int z = r->begin + pcount*r->part_size;
1631                   int c = part_classdata[0][class_set][i];
1632                   int b = r->residue_books[c][pass];
1633                   if (b >= 0) {
1634                      Codebook *book = f->codebooks + b;
1635                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
1636                         goto done;
1637                   } else {
1638                      z += r->part_size;
1639                      c_inter = 0;
1640                      p_inter = z;
1641                   }
1642                }
1643                ++class_set;
1644             }
1645          } else {
1646             while (pcount < part_read) {
1647                int z = r->begin + pcount*r->part_size;
1648                int c_inter = z % ch, p_inter = z/ch;
1649                if (pass == 0) {
1650                   Codebook *c = f->codebooks+r->classbook;
1651                   int q;
1652                   DECODE(q,f,c);
1653                   if (q == EOP) goto done;
1654                   part_classdata[0][class_set] = r->classdata[q];
1655                }
1656                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1657                   int z = r->begin + pcount*r->part_size;
1658                   int c = part_classdata[0][class_set][i];
1659                   int b = r->residue_books[c][pass];
1660                   if (b >= 0) {
1661                      Codebook *book = f->codebooks + b;
1662                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
1663                         goto done;
1664                   } else {
1665                      z += r->part_size;
1666                      c_inter = z % ch;
1667                      p_inter = z / ch;
1668                   }
1669                }
1670                ++class_set;
1671             }
1672          }
1673       }
1674       goto done;
1675    }
1676 
1677    for (pass=0; pass < 8; ++pass) {
1678       int pcount = 0, class_set=0;
1679       while (pcount < part_read) {
1680          if (pass == 0) {
1681             for (j=0; j < ch; ++j) {
1682                if (!do_not_decode[j]) {
1683                   Codebook *c = f->codebooks+r->classbook;
1684                   int temp;
1685                   DECODE(temp,f,c);
1686                   if (temp == EOP) goto done;
1687                   part_classdata[j][class_set] = r->classdata[temp];
1688                }
1689             }
1690          }
1691          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1692             for (j=0; j < ch; ++j) {
1693                if (!do_not_decode[j]) {
1694                   int c = part_classdata[j][class_set][i];
1695                   int b = r->residue_books[c][pass];
1696                   if (b >= 0) {
1697                      float *target = residue_buffers[j];
1698                      int offset = r->begin + pcount * r->part_size;
1699                      int n = r->part_size;
1700                      Codebook *book = f->codebooks + b;
1701                      if (!residue_decode(f, book, target, offset, n, rtype))
1702                         goto done;
1703                   }
1704                }
1705             }
1706          }
1707          ++class_set;
1708       }
1709    }
1710   done:
1711    temp_alloc_restore(f,temp_alloc_point);
1712 }
1713 
1714 
1715 #ifndef LIBVORBIS_MDCT
1716 #define LIBVORBIS_MDCT 0
1717 #endif
1718 
1719 #if LIBVORBIS_MDCT
1720 /* directly call the vorbis MDCT using an interface documented
1721  * by Jeff Roberts... useful for performance comparison */
1722 typedef struct
1723 {
1724   int n;
1725   int log2n;
1726 
1727   float *trig;
1728   int   *bitrev;
1729 
1730   float scale;
1731 } mdct_lookup;
1732 
1733 extern void mdct_init(mdct_lookup *lookup, int n);
1734 extern void mdct_clear(mdct_lookup *l);
1735 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
1736 
1737 mdct_lookup M1,M2;
1738 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)1739 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
1740 {
1741    mdct_lookup *M;
1742    if (M1.n == n) M = &M1;
1743    else if (M2.n == n) M = &M2;
1744    else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
1745    else {
1746       if (M2.n) __asm int 3;
1747       mdct_init(&M2, n);
1748       M = &M2;
1749    }
1750 
1751    mdct_backward(M, buffer, buffer);
1752 }
1753 #endif
1754 
1755 
1756 /* the following were split out into separate functions while optimizing;
1757  * they could be pushed back up but eh. __forceinline showed no change;
1758  * they're probably already being inlined. */
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)1759 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
1760 {
1761    float *ee0 = e + i_off;
1762    float *ee2 = ee0 + k_off;
1763    int i;
1764 
1765    assert((n & 3) == 0);
1766    for (i=(n>>2); i > 0; --i) {
1767       float k00_20, k01_21;
1768       k00_20  = ee0[ 0] - ee2[ 0];
1769       k01_21  = ee0[-1] - ee2[-1];
1770       ee0[ 0] += ee2[ 0];/*ee0[ 0] = ee0[ 0] + ee2[ 0]; */
1771       ee0[-1] += ee2[-1];/*ee0[-1] = ee0[-1] + ee2[-1]; */
1772       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
1773       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
1774       A += 8;
1775 
1776       k00_20  = ee0[-2] - ee2[-2];
1777       k01_21  = ee0[-3] - ee2[-3];
1778       ee0[-2] += ee2[-2];/*ee0[-2] = ee0[-2] + ee2[-2]; */
1779       ee0[-3] += ee2[-3];/*ee0[-3] = ee0[-3] + ee2[-3]; */
1780       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
1781       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
1782       A += 8;
1783 
1784       k00_20  = ee0[-4] - ee2[-4];
1785       k01_21  = ee0[-5] - ee2[-5];
1786       ee0[-4] += ee2[-4];/*ee0[-4] = ee0[-4] + ee2[-4]; */
1787       ee0[-5] += ee2[-5];/*ee0[-5] = ee0[-5] + ee2[-5]; */
1788       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
1789       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
1790       A += 8;
1791 
1792       k00_20  = ee0[-6] - ee2[-6];
1793       k01_21  = ee0[-7] - ee2[-7];
1794       ee0[-6] += ee2[-6];/*ee0[-6] = ee0[-6] + ee2[-6]; */
1795       ee0[-7] += ee2[-7];/*ee0[-7] = ee0[-7] + ee2[-7]; */
1796       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
1797       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
1798       A += 8;
1799       ee0 -= 8;
1800       ee2 -= 8;
1801    }
1802 }
1803 
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)1804 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
1805 {
1806    int i;
1807    float k00_20, k01_21;
1808 
1809    float *e0 = e + d0;
1810    float *e2 = e0 + k_off;
1811 
1812    for (i=lim >> 2; i > 0; --i) {
1813       k00_20 = e0[-0] - e2[-0];
1814       k01_21 = e0[-1] - e2[-1];
1815       e0[-0] += e2[-0];/*e0[-0] = e0[-0] + e2[-0]; */
1816       e0[-1] += e2[-1];/*e0[-1] = e0[-1] + e2[-1]; */
1817       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
1818       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
1819 
1820       A += k1;
1821 
1822       k00_20 = e0[-2] - e2[-2];
1823       k01_21 = e0[-3] - e2[-3];
1824       e0[-2] += e2[-2];/*e0[-2] = e0[-2] + e2[-2]; */
1825       e0[-3] += e2[-3];/*e0[-3] = e0[-3] + e2[-3]; */
1826       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
1827       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
1828 
1829       A += k1;
1830 
1831       k00_20 = e0[-4] - e2[-4];
1832       k01_21 = e0[-5] - e2[-5];
1833       e0[-4] += e2[-4];/*e0[-4] = e0[-4] + e2[-4]; */
1834       e0[-5] += e2[-5];/*e0[-5] = e0[-5] + e2[-5]; */
1835       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
1836       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
1837 
1838       A += k1;
1839 
1840       k00_20 = e0[-6] - e2[-6];
1841       k01_21 = e0[-7] - e2[-7];
1842       e0[-6] += e2[-6];/*e0[-6] = e0[-6] + e2[-6]; */
1843       e0[-7] += e2[-7];/*e0[-7] = e0[-7] + e2[-7]; */
1844       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
1845       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
1846 
1847       e0 -= 8;
1848       e2 -= 8;
1849 
1850       A += k1;
1851    }
1852 }
1853 
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)1854 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
1855 {
1856    int i;
1857    float A0 = A[0];
1858    float A1 = A[0+1];
1859    float A2 = A[0+a_off];
1860    float A3 = A[0+a_off+1];
1861    float A4 = A[0+a_off*2+0];
1862    float A5 = A[0+a_off*2+1];
1863    float A6 = A[0+a_off*3+0];
1864    float A7 = A[0+a_off*3+1];
1865 
1866    float k00,k11;
1867 
1868    float *ee0 = e  +i_off;
1869    float *ee2 = ee0+k_off;
1870 
1871    for (i=n; i > 0; --i) {
1872       k00     = ee0[ 0] - ee2[ 0];
1873       k11     = ee0[-1] - ee2[-1];
1874       ee0[ 0] =  ee0[ 0] + ee2[ 0];
1875       ee0[-1] =  ee0[-1] + ee2[-1];
1876       ee2[ 0] = (k00) * A0 - (k11) * A1;
1877       ee2[-1] = (k11) * A0 + (k00) * A1;
1878 
1879       k00     = ee0[-2] - ee2[-2];
1880       k11     = ee0[-3] - ee2[-3];
1881       ee0[-2] =  ee0[-2] + ee2[-2];
1882       ee0[-3] =  ee0[-3] + ee2[-3];
1883       ee2[-2] = (k00) * A2 - (k11) * A3;
1884       ee2[-3] = (k11) * A2 + (k00) * A3;
1885 
1886       k00     = ee0[-4] - ee2[-4];
1887       k11     = ee0[-5] - ee2[-5];
1888       ee0[-4] =  ee0[-4] + ee2[-4];
1889       ee0[-5] =  ee0[-5] + ee2[-5];
1890       ee2[-4] = (k00) * A4 - (k11) * A5;
1891       ee2[-5] = (k11) * A4 + (k00) * A5;
1892 
1893       k00     = ee0[-6] - ee2[-6];
1894       k11     = ee0[-7] - ee2[-7];
1895       ee0[-6] =  ee0[-6] + ee2[-6];
1896       ee0[-7] =  ee0[-7] + ee2[-7];
1897       ee2[-6] = (k00) * A6 - (k11) * A7;
1898       ee2[-7] = (k11) * A6 + (k00) * A7;
1899 
1900       ee0 -= k0;
1901       ee2 -= k0;
1902    }
1903 }
1904 
iter_54(float * z)1905 static INLINE void iter_54(float *z)
1906 {
1907    float k00,k11,k22,k33;
1908    float y0,y1,y2,y3;
1909 
1910    k00  = z[ 0] - z[-4];
1911    y0   = z[ 0] + z[-4];
1912    y2   = z[-2] + z[-6];
1913    k22  = z[-2] - z[-6];
1914 
1915    z[-0] = y0 + y2;      /* z0 + z4 + z2 + z6 */
1916    z[-2] = y0 - y2;      /* z0 + z4 - z2 - z6 */
1917 
1918    /* done with y0,y2 */
1919 
1920    k33  = z[-3] - z[-7];
1921 
1922    z[-4] = k00 + k33;    /* z0 - z4 + z3 - z7 */
1923    z[-6] = k00 - k33;    /* z0 - z4 - z3 + z7 */
1924 
1925    /* done with k33 */
1926 
1927    k11  = z[-1] - z[-5];
1928    y1   = z[-1] + z[-5];
1929    y3   = z[-3] + z[-7];
1930 
1931    z[-1] = y1 + y3;      /* z1 + z5 + z3 + z7 */
1932    z[-3] = y1 - y3;      /* z1 + z5 - z3 - z7 */
1933    z[-5] = k11 - k22;    /* z1 - z5 + z2 - z6 */
1934    z[-7] = k11 + k22;    /* z1 - z5 - z2 + z6 */
1935 }
1936 
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)1937 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
1938 {
1939    int a_off = base_n >> 3;
1940    float A2 = A[0+a_off];
1941    float *z = e + i_off;
1942    float *base = z - 16 * n;
1943 
1944    while (z > base) {
1945       float k00,k11;
1946 
1947       k00   = z[-0] - z[-8];
1948       k11   = z[-1] - z[-9];
1949       z[-0] = z[-0] + z[-8];
1950       z[-1] = z[-1] + z[-9];
1951       z[-8] =  k00;
1952       z[-9] =  k11 ;
1953 
1954       k00    = z[ -2] - z[-10];
1955       k11    = z[ -3] - z[-11];
1956       z[ -2] = z[ -2] + z[-10];
1957       z[ -3] = z[ -3] + z[-11];
1958       z[-10] = (k00+k11) * A2;
1959       z[-11] = (k11-k00) * A2;
1960 
1961       k00    = z[-12] - z[ -4];  /* reverse to avoid a unary negation */
1962       k11    = z[ -5] - z[-13];
1963       z[ -4] = z[ -4] + z[-12];
1964       z[ -5] = z[ -5] + z[-13];
1965       z[-12] = k11;
1966       z[-13] = k00;
1967 
1968       k00    = z[-14] - z[ -6];  /* reverse to avoid a unary negation */
1969       k11    = z[ -7] - z[-15];
1970       z[ -6] = z[ -6] + z[-14];
1971       z[ -7] = z[ -7] + z[-15];
1972       z[-14] = (k00+k11) * A2;
1973       z[-15] = (k00-k11) * A2;
1974 
1975       iter_54(z);
1976       iter_54(z-8);
1977       z -= 16;
1978    }
1979 }
1980 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)1981 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
1982 {
1983    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
1984    int ld;
1985    /* @OPTIMIZE: reduce register pressure by using fewer variables? */
1986    int save_point = temp_alloc_save(f);
1987    float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
1988    float *u=NULL,*v=NULL;
1989    /* twiddle factors */
1990    float *A = f->A[blocktype];
1991 
1992    /* IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
1993     * See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
1994 
1995     * kernel from paper
1996 
1997 
1998     * merged:
1999     *   copy and reflect spectral data
2000     *   step 0
2001 
2002     * note that it turns out that the items added together during
2003     * this step are, in fact, being added to themselves (as reflected
2004     * by step 0). inexplicable inefficiency! this became obvious
2005     * once I combined the passes.
2006 
2007     * so there's a missing 'times 2' here (for adding X to itself).
2008     * this propogates through linearly to the end, where the numbers
2009     * are 1/2 too small, and need to be compensated for.
2010     */
2011 
2012    {
2013       float *d,*e, *AA, *e_stop;
2014       d = &buf2[n2-2];
2015       AA = A;
2016       e = &buffer[0];
2017       e_stop = &buffer[n2];
2018       while (e != e_stop) {
2019          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2020          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2021          d -= 2;
2022          AA += 2;
2023          e += 4;
2024       }
2025 
2026       e = &buffer[n2-3];
2027       while (d >= buf2) {
2028          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2029          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2030          d -= 2;
2031          AA += 2;
2032          e -= 4;
2033       }
2034    }
2035 
2036    /* now we use symbolic names for these, so that we can
2037     * possibly swap their meaning as we change which operations
2038     * are in place */
2039 
2040    u = buffer;
2041    v = buf2;
2042 
2043    /* step 2    (paper output is w, now u)
2044     * this could be in place, but the data ends up in the wrong
2045     * place... _somebody_'s got to swap it, so this is nominated */
2046    {
2047       float *AA = &A[n2-8];
2048       float *d0,*d1, *e0, *e1;
2049 
2050       e0 = &v[n4];
2051       e1 = &v[0];
2052 
2053       d0 = &u[n4];
2054       d1 = &u[0];
2055 
2056       while (AA >= A) {
2057          float v40_20, v41_21;
2058 
2059          v41_21 = e0[1] - e1[1];
2060          v40_20 = e0[0] - e1[0];
2061          d0[1]  = e0[1] + e1[1];
2062          d0[0]  = e0[0] + e1[0];
2063          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
2064          d1[0]  = v40_20*AA[4] + v41_21*AA[5];
2065 
2066          v41_21 = e0[3] - e1[3];
2067          v40_20 = e0[2] - e1[2];
2068          d0[3]  = e0[3] + e1[3];
2069          d0[2]  = e0[2] + e1[2];
2070          d1[3]  = v41_21*AA[0] - v40_20*AA[1];
2071          d1[2]  = v40_20*AA[0] + v41_21*AA[1];
2072 
2073          AA -= 8;
2074 
2075          d0 += 4;
2076          d1 += 4;
2077          e0 += 4;
2078          e1 += 4;
2079       }
2080    }
2081 
2082    /* step 3 */
2083    ld = ilog(n) - 1; /* ilog is off-by-one from normal definitions */
2084 
2085    /* optimized step 3:
2086 
2087     * the original step3 loop can be nested r inside s or s inside r;
2088     * it's written originally as s inside r, but this is dumb when r
2089     * iterates many times, and s few. So I have two copies of it and
2090     * switch between them halfway.
2091 
2092     * this is iteration 0 of step 3 */
2093    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2094    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2095 
2096    /* this is iteration 1 of step 3 */
2097    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2098    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2099    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2100    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2101 
2102    l=2;
2103    for (; l < (ld-3)>>1; ++l) {
2104       int k0 = n >> (l+2), k0_2 = k0>>1;
2105       int lim = 1 << (l+1);
2106       int i;
2107       for (i=0; i < lim; ++i)
2108          imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2109    }
2110 
2111    for (; l < ld-6; ++l) {
2112       int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2113       int rlim = n >> (l+6), r;
2114       int lim = 1 << (l+1);
2115       int i_off;
2116       float *A0 = A;
2117       i_off = n2-1;
2118       for (r=rlim; r > 0; --r) {
2119          imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2120          A0 += k1*4;
2121          i_off -= 8;
2122       }
2123    }
2124 
2125    /* iterations with count:
2126     *   ld-6,-5,-4 all interleaved together
2127     *       the big win comes from getting rid of needless flops
2128     *         due to the constants on pass 5 & 4 being all 1 and 0;
2129     *       combining them to be simultaneous to improve cache made little difference
2130     */
2131    imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2132 
2133    /* output is u
2134 
2135     * step 4, 5, and 6
2136     * cannot be in-place because of step 5 */
2137    {
2138       uint16_t *bitrev = f->bit_reverse[blocktype];
2139       /* weirdly, I'd have thought reading sequentially and writing
2140        * erratically would have been better than vice-versa, but in
2141        * fact that's not what my testing showed. (That is, with
2142        * j = bitreverse(i), do you read i and write j, or read j and write i.) */
2143 
2144       float *d0 = &v[n4-4];
2145       float *d1 = &v[n2-4];
2146       while (d0 >= v) {
2147          int k4;
2148 
2149          k4 = bitrev[0];
2150          d1[3] = u[k4+0];
2151          d1[2] = u[k4+1];
2152          d0[3] = u[k4+2];
2153          d0[2] = u[k4+3];
2154 
2155          k4 = bitrev[1];
2156          d1[1] = u[k4+0];
2157          d1[0] = u[k4+1];
2158          d0[1] = u[k4+2];
2159          d0[0] = u[k4+3];
2160 
2161          d0 -= 4;
2162          d1 -= 4;
2163          bitrev += 2;
2164       }
2165    }
2166    /* (paper output is u, now v) */
2167 
2168 
2169    /* data must be in buf2 */
2170    assert(v == buf2);
2171 
2172    /* step 7   (paper output is v, now v)
2173     * this is now in place */
2174    {
2175       float *C = f->C[blocktype];
2176       float *d, *e;
2177 
2178       d = v;
2179       e = v + n2 - 4;
2180 
2181       while (d < e) {
2182          float a02,a11,b0,b1,b2,b3;
2183 
2184          a02 = d[0] - e[2];
2185          a11 = d[1] + e[3];
2186 
2187          b0 = C[1]*a02 + C[0]*a11;
2188          b1 = C[1]*a11 - C[0]*a02;
2189 
2190          b2 = d[0] + e[ 2];
2191          b3 = d[1] - e[ 3];
2192 
2193          d[0] = b2 + b0;
2194          d[1] = b3 + b1;
2195          e[2] = b2 - b0;
2196          e[3] = b1 - b3;
2197 
2198          a02 = d[2] - e[0];
2199          a11 = d[3] + e[1];
2200 
2201          b0 = C[3]*a02 + C[2]*a11;
2202          b1 = C[3]*a11 - C[2]*a02;
2203 
2204          b2 = d[2] + e[ 0];
2205          b3 = d[3] - e[ 1];
2206 
2207          d[2] = b2 + b0;
2208          d[3] = b3 + b1;
2209          e[0] = b2 - b0;
2210          e[1] = b1 - b3;
2211 
2212          C += 4;
2213          d += 4;
2214          e -= 4;
2215       }
2216    }
2217 
2218    /* data must be in buf2
2219 
2220 
2221     * step 8+decode   (paper output is X, now buffer)
2222     * this generates pairs of data a la 8 and pushes them directly through
2223     * the decode kernel (pushing rather than pulling) to avoid having
2224     * to make another pass later
2225 
2226     * this cannot POSSIBLY be in place, so we refer to the buffers directly
2227     */
2228 
2229    {
2230       float *d0,*d1,*d2,*d3;
2231 
2232       float *B = f->B[blocktype] + n2 - 8;
2233       float *e = buf2 + n2 - 8;
2234       d0 = &buffer[0];
2235       d1 = &buffer[n2-4];
2236       d2 = &buffer[n2];
2237       d3 = &buffer[n-4];
2238       while (e >= v) {
2239          float p0,p1,p2,p3;
2240 
2241          p3 =  e[6]*B[7] - e[7]*B[6];
2242          p2 = -e[6]*B[6] - e[7]*B[7];
2243 
2244          d0[0] =   p3;
2245          d1[3] = - p3;
2246          d2[0] =   p2;
2247          d3[3] =   p2;
2248 
2249          p1 =  e[4]*B[5] - e[5]*B[4];
2250          p0 = -e[4]*B[4] - e[5]*B[5];
2251 
2252          d0[1] =   p1;
2253          d1[2] = - p1;
2254          d2[1] =   p0;
2255          d3[2] =   p0;
2256 
2257          p3 =  e[2]*B[3] - e[3]*B[2];
2258          p2 = -e[2]*B[2] - e[3]*B[3];
2259 
2260          d0[2] =   p3;
2261          d1[1] = - p3;
2262          d2[2] =   p2;
2263          d3[1] =   p2;
2264 
2265          p1 =  e[0]*B[1] - e[1]*B[0];
2266          p0 = -e[0]*B[0] - e[1]*B[1];
2267 
2268          d0[3] =   p1;
2269          d1[0] = - p1;
2270          d2[3] =   p0;
2271          d3[0] =   p0;
2272 
2273          B -= 8;
2274          e -= 8;
2275          d0 += 4;
2276          d2 += 4;
2277          d1 -= 4;
2278          d3 -= 4;
2279       }
2280    }
2281 
2282    temp_alloc_restore(f,save_point);
2283 }
2284 
get_window(vorb * f,int len)2285 static float *get_window(vorb *f, int len)
2286 {
2287    len <<= 1;
2288    if (len == f->blocksize_0) return f->window[0];
2289    if (len == f->blocksize_1) return f->window[1];
2290    assert(0);
2291    return NULL;
2292 }
2293 
2294 typedef int16_t YTYPE;
2295 
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8_t * step2_flag)2296 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8_t *step2_flag)
2297 {
2298    int n2 = n >> 1;
2299    int s = map->chan[i].mux, floor;
2300    floor = map->submap_floor[s];
2301    if (f->floor_types[floor] == 0) {
2302       return error(f, VORBIS_invalid_stream);
2303    } else {
2304       Floor1 *g = &f->floor_config[floor].floor1;
2305       int j,q;
2306       int lx = 0, ly = finalY[0] * g->floor1_multiplier;
2307       for (q=1; q < g->values; ++q) {
2308          j = g->sorted_order[q];
2309          if (finalY[j] >= 0)
2310          {
2311             int hy = finalY[j] * g->floor1_multiplier;
2312             int hx = g->Xlist[j];
2313             draw_line(target, lx,ly, hx,hy, n2);
2314              lx = hx;
2315              ly = hy;
2316          }
2317       }
2318       if (lx < n2)
2319          /* optimization of: draw_line(target, lx,ly, n,ly, n2); */
2320          for (j=lx; j < n2; ++j)
2321             LINE_OP(target[j], inverse_db_table[ly]);
2322    }
2323    return TRUE;
2324 }
2325 
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)2326 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
2327 {
2328    Mode *m;
2329    int i, n, prev, next, window_center;
2330    f->channel_buffer_start = f->channel_buffer_end = 0;
2331 
2332   retry:
2333    if (f->eof) return FALSE;
2334    if (!maybe_start_packet(f))
2335       return FALSE;
2336    /* check packet type */
2337    if (get_bits(f,1) != 0) {
2338       if (IS_PUSH_MODE(f))
2339          return error(f,VORBIS_bad_packet_type);
2340       while (EOP != get8_packet(f));
2341       goto retry;
2342    }
2343 
2344    if (f->alloc.alloc_buffer)
2345       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
2346 
2347    i = get_bits(f, ilog(f->mode_count-1));
2348    if (i == EOP) return FALSE;
2349    if (i >= f->mode_count) return FALSE;
2350    *mode = i;
2351    m = f->mode_config + i;
2352    if (m->blockflag) {
2353       n = f->blocksize_1;
2354       prev = get_bits(f,1);
2355       next = get_bits(f,1);
2356    } else {
2357       prev = next = 0;
2358       n = f->blocksize_0;
2359    }
2360 
2361 /* WINDOWING */
2362 
2363    window_center = n >> 1;
2364    if (m->blockflag && !prev) {
2365       *p_left_start = (n - f->blocksize_0) >> 2;
2366       *p_left_end   = (n + f->blocksize_0) >> 2;
2367    } else {
2368       *p_left_start = 0;
2369       *p_left_end   = window_center;
2370    }
2371    if (m->blockflag && !next) {
2372       *p_right_start = (n*3 - f->blocksize_0) >> 2;
2373       *p_right_end   = (n*3 + f->blocksize_0) >> 2;
2374    } else {
2375       *p_right_start = window_center;
2376       *p_right_end   = n;
2377    }
2378    return TRUE;
2379 }
2380 
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)2381 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
2382 {
2383    Mapping *map;
2384    int i,j,k,n,n2;
2385    int zero_channel[256];
2386    int really_zero_channel[256];
2387 
2388 /* WINDOWING */
2389 
2390    n = f->blocksize[m->blockflag];
2391    map = &f->mapping[m->mapping];
2392 
2393 /* FLOORS */
2394    n2 = n >> 1;
2395 
2396    for (i=0; i < f->channels; ++i) {
2397       int s = map->chan[i].mux, floor;
2398       zero_channel[i] = FALSE;
2399       floor = map->submap_floor[s];
2400       if (f->floor_types[floor] == 0) {
2401          return error(f, VORBIS_invalid_stream);
2402       } else {
2403          Floor1 *g = &f->floor_config[floor].floor1;
2404          if (get_bits(f, 1)) {
2405             short *finalY;
2406             uint8_t step2_flag[256];
2407             static int range_list[4] = { 256, 128, 86, 64 };
2408             int range = range_list[g->floor1_multiplier-1];
2409             int offset = 2;
2410             finalY = f->finalY[i];
2411             finalY[0] = get_bits(f, ilog(range)-1);
2412             finalY[1] = get_bits(f, ilog(range)-1);
2413             for (j=0; j < g->partitions; ++j) {
2414                int pclass = g->partition_class_list[j];
2415                int cdim = g->class_dimensions[pclass];
2416                int cbits = g->class_subclasses[pclass];
2417                int csub = (1 << cbits)-1;
2418                int cval = 0;
2419                if (cbits) {
2420                   Codebook *c = f->codebooks + g->class_masterbooks[pclass];
2421                   DECODE(cval,f,c);
2422                }
2423                for (k=0; k < cdim; ++k) {
2424                   int book = g->subclass_books[pclass][cval & csub];
2425                   cval = cval >> cbits;
2426                   if (book >= 0) {
2427                      int temp;
2428                      Codebook *c = f->codebooks + book;
2429                      DECODE(temp,f,c);
2430                      finalY[offset++] = temp;
2431                   } else
2432                      finalY[offset++] = 0;
2433                }
2434             }
2435             if (f->valid_bits == INVALID_BITS) goto error; /* behavior according to spec */
2436             step2_flag[0] = step2_flag[1] = 1;
2437             for (j=2; j < g->values; ++j) {
2438                int low, high, pred, highroom, lowroom, room, val;
2439                low = g->neighbors[j][0];
2440                high = g->neighbors[j][1];
2441 #if 0
2442                neighbors(g->Xlist, j, &low, &high);
2443 #endif
2444                pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
2445                val = finalY[j];
2446                highroom = range - pred;
2447                lowroom = pred;
2448                if (highroom < lowroom)
2449                   room = highroom * 2;
2450                else
2451                   room = lowroom * 2;
2452                if (val) {
2453                   step2_flag[low] = step2_flag[high] = 1;
2454                   step2_flag[j] = 1;
2455                   if (val >= room)
2456                      if (highroom > lowroom)
2457                         finalY[j] = val - lowroom + pred;
2458                      else
2459                         finalY[j] = pred - val + highroom - 1;
2460                   else
2461                      if (val & 1)
2462                         finalY[j] = pred - ((val+1)>>1);
2463                      else
2464                         finalY[j] = pred + (val>>1);
2465                } else {
2466                   step2_flag[j] = 0;
2467                   finalY[j] = pred;
2468                }
2469             }
2470 
2471             /* defer final floor computation until _after_ residue */
2472             for (j=0; j < g->values; ++j) {
2473                if (!step2_flag[j])
2474                   finalY[j] = -1;
2475             }
2476          } else {
2477            error:
2478             zero_channel[i] = TRUE;
2479          }
2480          /* So we just defer everything else to later */
2481 
2482          /* at this point we've decoded the floor into buffer */
2483       }
2484    }
2485    /* at this point we've decoded all floors */
2486 
2487    if (f->alloc.alloc_buffer)
2488       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
2489 
2490    /* re-enable coupled channels if necessary */
2491    memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
2492    for (i=0; i < map->coupling_steps; ++i)
2493       if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
2494          zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
2495       }
2496 
2497 /* RESIDUE DECODE */
2498    for (i=0; i < map->submaps; ++i) {
2499       float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
2500       int r;
2501       uint8_t do_not_decode[256] = {0};
2502       int ch = 0;
2503       for (j=0; j < f->channels; ++j) {
2504          if (map->chan[j].mux == i) {
2505             if (zero_channel[j]) {
2506                do_not_decode[ch] = TRUE;
2507                residue_buffers[ch] = NULL;
2508             } else {
2509                do_not_decode[ch] = FALSE;
2510                residue_buffers[ch] = f->channel_buffers[j];
2511             }
2512             ++ch;
2513          }
2514       }
2515       r = map->submap_residue[i];
2516       decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
2517    }
2518 
2519    if (f->alloc.alloc_buffer)
2520       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
2521 
2522 /* INVERSE COUPLING */
2523    for (i = map->coupling_steps-1; i >= 0; --i) {
2524       int n2 = n >> 1;
2525       float *m = f->channel_buffers[map->chan[i].magnitude];
2526       float *a = f->channel_buffers[map->chan[i].angle    ];
2527       for (j=0; j < n2; ++j) {
2528          float a2,m2;
2529          if (m[j] > 0)
2530             if (a[j] > 0)
2531             {
2532                 m2 = m[j];
2533                 a2 = m[j] - a[j];
2534             }
2535             else
2536             {
2537                 a2 = m[j];
2538                 m2 = m[j] + a[j];
2539             }
2540          else
2541             if (a[j] > 0)
2542             {
2543                 m2 = m[j];
2544                 a2 = m[j] + a[j];
2545             }
2546             else
2547             {
2548                 a2 = m[j];
2549                 m2 = m[j] - a[j];
2550             }
2551          m[j] = m2;
2552          a[j] = a2;
2553       }
2554    }
2555 
2556    /* finish decoding the floors */
2557    for (i=0; i < f->channels; ++i) {
2558       if (really_zero_channel[i]) {
2559          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
2560       } else {
2561          do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
2562       }
2563    }
2564 
2565 /* INVERSE MDCT */
2566    for (i=0; i < f->channels; ++i)
2567       inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
2568 
2569    /* this shouldn't be necessary, unless we exited on an error
2570     * and want to flush to get to the next packet */
2571    flush_packet(f);
2572 
2573    if (f->first_decode) {
2574       /* assume we start so first non-discarded sample is sample 0
2575        * this isn't to spec, but spec would require us to read ahead
2576        * and decode the size of all current frames--could be done,
2577        * but presumably it's not a commonly used feature */
2578       f->current_loc = -n2; /* start of first frame is positioned for discard */
2579       /* we might have to discard samples "from" the next frame too,
2580        * if we're lapping a large block then a small at the start? */
2581       f->discard_samples_deferred = n - right_end;
2582       f->current_loc_valid = TRUE;
2583       f->first_decode = FALSE;
2584    } else if (f->discard_samples_deferred) {
2585       left_start += f->discard_samples_deferred;
2586       *p_left = left_start;
2587       f->discard_samples_deferred = 0;
2588    } else if (f->previous_length == 0 && f->current_loc_valid) {
2589       /* we're recovering from a seek... that means we're going to discard
2590        * the samples from this packet even though we know our position from
2591        * the last page header, so we need to update the position based on
2592        * the discarded samples here
2593        * but wait, the code below is going to add this in itself even
2594        * on a discard, so we don't need to do it here... */
2595    }
2596 
2597    /* check if we have ogg information about the sample # for this packet */
2598    if (f->last_seg_which == f->end_seg_with_known_loc) {
2599       /* if we have a valid current loc, and this is final: */
2600       if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
2601          uint32_t current_end = f->known_loc_for_packet - (n-right_end);
2602          /* then let's infer the size of the (probably) short final frame */
2603          if (current_end < f->current_loc + right_end) {
2604             if (current_end < f->current_loc) {
2605                /* negative truncation, that's impossible! */
2606                *len = 0;
2607             } else {
2608                *len = current_end - f->current_loc;
2609             }
2610             *len += left_start;
2611             f->current_loc += *len;
2612             return TRUE;
2613          }
2614       }
2615       /* otherwise, just set our sample loc
2616        * guess that the ogg granule pos refers to the _middle_ of the
2617        * last frame?
2618        * set f->current_loc to the position of left_start */
2619       f->current_loc = f->known_loc_for_packet - (n2-left_start);
2620       f->current_loc_valid = TRUE;
2621    }
2622    if (f->current_loc_valid)
2623       f->current_loc += (right_start - left_start);
2624 
2625    if (f->alloc.alloc_buffer)
2626       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
2627    *len = right_end;  /* ignore samples after the window goes to 0 */
2628    return TRUE;
2629 }
2630 
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)2631 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
2632 {
2633    int mode, left_end, right_end;
2634    if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
2635    return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
2636 }
2637 
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)2638 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
2639 {
2640    int prev,i,j;
2641    /* we use right&left (the start of the right- and left-window sin()-regions)
2642     * to determine how much to return, rather than inferring from the rules
2643     * (same result, clearer code); 'left' indicates where our sin() window
2644     * starts, therefore where the previous window's right edge starts, and
2645     * therefore where to start mixing from the previous buffer. 'right'
2646     * indicates where our sin() ending-window starts, therefore that's where
2647     * we start saving, and where our returned-data ends.
2648 
2649     * mixin from previous window */
2650    if (f->previous_length) {
2651       int i,j, n = f->previous_length;
2652       float *w = get_window(f, n);
2653       for (i=0; i < f->channels; ++i) {
2654          for (j=0; j < n; ++j)
2655             f->channel_buffers[i][left+j] =
2656                f->channel_buffers[i][left+j]*w[    j] +
2657                f->previous_window[i][     j]*w[n-1-j];
2658       }
2659    }
2660 
2661    prev = f->previous_length;
2662 
2663    /* last half of this data becomes previous window */
2664    f->previous_length = len - right;
2665 
2666    /* @OPTIMIZE: could avoid this copy by double-buffering the
2667     * output (flipping previous_window with channel_buffers), but
2668     * then previous_window would have to be 2x as large, and
2669     * channel_buffers couldn't be temp mem (although they're NOT
2670     * currently temp mem, they could be (unless we want to level
2671     * performance by spreading out the computation)) */
2672    for (i=0; i < f->channels; ++i)
2673       for (j=0; right+j < len; ++j)
2674          f->previous_window[i][j] = f->channel_buffers[i][right+j];
2675 
2676    if (!prev)
2677       /* there was no previous packet, so this data isn't valid...
2678        * this isn't entirely true, only the would-have-overlapped data
2679        * isn't valid, but this seems to be what the spec requires */
2680       return 0;
2681 
2682    /* truncate a short frame */
2683    if (len < right) right = len;
2684 
2685    f->samples_output += right-left;
2686 
2687    return right - left;
2688 }
2689 
vorbis_pump_first_frame(stb_vorbis * f)2690 static void vorbis_pump_first_frame(stb_vorbis *f)
2691 {
2692    int len, right, left;
2693    if (vorbis_decode_packet(f, &len, &left, &right))
2694       vorbis_finish_frame(f, len, left, right);
2695 }
2696 
start_decoder(vorb * f)2697 static int start_decoder(vorb *f)
2698 {
2699    uint8_t header[6], x,y;
2700    int len,i,j,k, max_submaps = 0;
2701    int longest_floorlist=0;
2702 
2703    /* first page, first packet */
2704 
2705    if (!start_page(f))                              return FALSE;
2706    /* validate page flag */
2707    if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
2708    if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
2709    if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
2710    /* check for expected packet length */
2711    if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
2712    if (f->segments[0] != 30)                        return error(f, VORBIS_invalid_first_page);
2713    /* read packet
2714     * check packet header */
2715    if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
2716    if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
2717    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
2718    /* vorbis_version */
2719    if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
2720    f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
2721    if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
2722    f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
2723    get32(f); /* bitrate_maximum */
2724    get32(f); /* bitrate_nominal */
2725    get32(f); /* bitrate_minimum */
2726    x = get8(f);
2727    { int log0,log1;
2728    log0 = x & 15;
2729    log1 = x >> 4;
2730    f->blocksize_0 = 1 << log0;
2731    f->blocksize_1 = 1 << log1;
2732    if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
2733    if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
2734    if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
2735    }
2736 
2737    /* framing_flag */
2738    x = get8(f);
2739    if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
2740 
2741    /* second packet! */
2742    if (!start_page(f))                              return FALSE;
2743 
2744    if (!start_packet(f))                            return FALSE;
2745    do {
2746       len = next_segment(f);
2747       skip(f, len);
2748       f->bytes_in_seg = 0;
2749    } while (len);
2750 
2751    /* third packet! */
2752    if (!start_packet(f))                            return FALSE;
2753 
2754    crc32_init(); /* always init it, to avoid multithread race conditions */
2755 
2756    if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
2757    for (i=0; i < 6; ++i) header[i] = get8_packet(f);
2758    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
2759 
2760    /* codebooks */
2761 
2762    f->codebook_count = get_bits(f,8) + 1;
2763    f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
2764    if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
2765    memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
2766    for (i=0; i < f->codebook_count; ++i) {
2767       uint32_t *values;
2768       int ordered, sorted_count;
2769       int total=0;
2770       uint8_t *lengths;
2771       Codebook *c = f->codebooks+i;
2772       x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
2773       x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
2774       x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
2775       x = get_bits(f, 8);
2776       c->dimensions = (get_bits(f, 8)<<8) + x;
2777       x = get_bits(f, 8);
2778       y = get_bits(f, 8);
2779       c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
2780       ordered = get_bits(f,1);
2781       c->sparse = ordered ? 0 : get_bits(f,1);
2782 
2783       if (c->sparse)
2784          lengths = (uint8_t *) setup_temp_malloc(f, c->entries);
2785       else
2786          lengths = c->codeword_lengths = (uint8_t *) setup_malloc(f, c->entries);
2787 
2788       if (!lengths) return error(f, VORBIS_outofmem);
2789 
2790       if (ordered) {
2791          int current_entry = 0;
2792          int current_length = get_bits(f,5) + 1;
2793          while (current_entry < c->entries) {
2794             int limit = c->entries - current_entry;
2795             int n = get_bits(f, ilog(limit));
2796             if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
2797             memset(lengths + current_entry, current_length, n);
2798             current_entry += n;
2799             ++current_length;
2800          }
2801       } else {
2802          for (j=0; j < c->entries; ++j) {
2803             int present = c->sparse ? get_bits(f,1) : 1;
2804             if (present) {
2805                lengths[j] = get_bits(f, 5) + 1;
2806                ++total;
2807             } else {
2808                lengths[j] = NO_CODE;
2809             }
2810          }
2811       }
2812 
2813       if (c->sparse && total >= c->entries >> 2) {
2814          /* convert sparse items to non-sparse! */
2815          if (c->entries > (int) f->setup_temp_memory_required)
2816             f->setup_temp_memory_required = c->entries;
2817 
2818          c->codeword_lengths = (uint8_t *) setup_malloc(f, c->entries);
2819          memcpy(c->codeword_lengths, lengths, c->entries);
2820          setup_temp_free(f, lengths, c->entries); /* note this is only safe if there have been no intervening temp mallocs! */
2821          lengths = c->codeword_lengths;
2822          c->sparse = 0;
2823       }
2824 
2825       /* compute the size of the sorted tables */
2826       if (c->sparse) {
2827          sorted_count = total;
2828       } else {
2829          sorted_count = 0;
2830          #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
2831          for (j=0; j < c->entries; ++j)
2832             if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
2833                ++sorted_count;
2834          #endif
2835       }
2836 
2837       c->sorted_entries = sorted_count;
2838       values = NULL;
2839 
2840       if (!c->sparse) {
2841          c->codewords = (uint32_t *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
2842          if (!c->codewords)                  return error(f, VORBIS_outofmem);
2843       } else {
2844          unsigned int size;
2845          if (c->sorted_entries) {
2846             c->codeword_lengths = (uint8_t *) setup_malloc(f, c->sorted_entries);
2847             if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
2848             c->codewords = (uint32_t *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
2849             if (!c->codewords)                  return error(f, VORBIS_outofmem);
2850             values = (uint32_t *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
2851             if (!values)                        return error(f, VORBIS_outofmem);
2852          }
2853          size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
2854          if (size > f->setup_temp_memory_required)
2855             f->setup_temp_memory_required = size;
2856       }
2857 
2858       if (!compute_codewords(c, lengths, c->entries, values)) {
2859          if (c->sparse) setup_temp_free(f, values, 0);
2860          return error(f, VORBIS_invalid_setup);
2861       }
2862 
2863       if (c->sorted_entries) {
2864          /* allocate an extra slot for sentinels */
2865          c->sorted_codewords = (uint32_t *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
2866          /* allocate an extra slot at the front so that c->sorted_values[-1] is defined
2867           * so that we can catch that case without an extra if */
2868          c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
2869          if (c->sorted_values) { ++c->sorted_values; c->sorted_values[-1] = -1; }
2870          compute_sorted_huffman(c, lengths, values);
2871       }
2872 
2873       if (c->sparse) {
2874          setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
2875          setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
2876          setup_temp_free(f, lengths, c->entries);
2877          c->codewords = NULL;
2878       }
2879 
2880       compute_accelerated_huffman(c);
2881 
2882       c->lookup_type = get_bits(f, 4);
2883       if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
2884       if (c->lookup_type > 0) {
2885          uint16_t *mults;
2886          c->minimum_value = float32_unpack(get_bits(f, 32));
2887          c->delta_value = float32_unpack(get_bits(f, 32));
2888          c->value_bits = get_bits(f, 4)+1;
2889          c->sequence_p = get_bits(f,1);
2890          if (c->lookup_type == 1) {
2891             c->lookup_values = lookup1_values(c->entries, c->dimensions);
2892          } else {
2893             c->lookup_values = c->entries * c->dimensions;
2894          }
2895          mults = (uint16_t *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
2896          if (mults == NULL) return error(f, VORBIS_outofmem);
2897          for (j=0; j < (int) c->lookup_values; ++j) {
2898             int q = get_bits(f, c->value_bits);
2899             if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
2900             mults[j] = q;
2901          }
2902 
2903          if (c->lookup_type == 1) {
2904             int len, sparse = c->sparse;
2905             /* pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop */
2906             if (sparse) {
2907                if (c->sorted_entries == 0) goto skip;
2908                c->multiplicands = (stb_vorbis_codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
2909             } else
2910                c->multiplicands = (stb_vorbis_codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
2911             if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
2912             len = sparse ? c->sorted_entries : c->entries;
2913             for (j=0; j < len; ++j) {
2914                int z = sparse ? c->sorted_values[j] : j, div=1;
2915                for (k=0; k < c->dimensions; ++k) {
2916                   int off = (z / div) % c->lookup_values;
2917                   c->multiplicands[j*c->dimensions + k] =
2918                          #ifndef STB_VORBIS_CODEBOOK_FLOATS
2919                             mults[off];
2920                          #else
2921                             mults[off]*c->delta_value + c->minimum_value;
2922                             /* in this case (and this case only) we could pre-expand c->sequence_p,
2923                              * and throw away the decode logic for it; have to ALSO do
2924                              * it in the case below, but it can only be done if
2925                              *    STB_VORBIS_CODEBOOK_FLOATS     */
2926                          #endif
2927                   div *= c->lookup_values;
2928                }
2929             }
2930             setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
2931             c->lookup_type = 2;
2932          }
2933          else
2934          {
2935             c->multiplicands = (stb_vorbis_codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
2936             #ifndef STB_VORBIS_CODEBOOK_FLOATS
2937             memcpy(c->multiplicands, mults, sizeof(c->multiplicands[0]) * c->lookup_values);
2938             #else
2939             for (j=0; j < (int) c->lookup_values; ++j)
2940                c->multiplicands[j] = mults[j] * c->delta_value + c->minimum_value;
2941             #endif
2942             setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
2943          }
2944         skip:;
2945 
2946          #ifdef STB_VORBIS_CODEBOOK_FLOATS
2947          if (c->lookup_type == 2 && c->sequence_p) {
2948             for (j=1; j < (int) c->lookup_values; ++j)
2949                c->multiplicands[j] = c->multiplicands[j-1];
2950             c->sequence_p = 0;
2951          }
2952          #endif
2953       }
2954    }
2955 
2956    /* time domain transfers (notused) */
2957 
2958    x = get_bits(f, 6) + 1;
2959    for (i=0; i < x; ++i) {
2960       uint32_t z = get_bits(f, 16);
2961       if (z != 0) return error(f, VORBIS_invalid_setup);
2962    }
2963 
2964    /* Floors */
2965    f->floor_count = get_bits(f, 6)+1;
2966    f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
2967    for (i=0; i < f->floor_count; ++i) {
2968       f->floor_types[i] = get_bits(f, 16);
2969       if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
2970       if (f->floor_types[i] == 0) {
2971          Floor0 *g = &f->floor_config[i].floor0;
2972          g->order = get_bits(f,8);
2973          g->rate = get_bits(f,16);
2974          g->bark_map_size = get_bits(f,16);
2975          g->amplitude_bits = get_bits(f,6);
2976          g->amplitude_offset = get_bits(f,8);
2977          g->number_of_books = get_bits(f,4) + 1;
2978          for (j=0; j < g->number_of_books; ++j)
2979             g->book_list[j] = get_bits(f,8);
2980          return error(f, VORBIS_feature_not_supported);
2981       } else {
2982          STBV_Point p[31*8+2];
2983          Floor1 *g = &f->floor_config[i].floor1;
2984          int max_class = -1;
2985          g->partitions = get_bits(f, 5);
2986          for (j=0; j < g->partitions; ++j) {
2987             g->partition_class_list[j] = get_bits(f, 4);
2988             if (g->partition_class_list[j] > max_class)
2989                max_class = g->partition_class_list[j];
2990          }
2991          for (j=0; j <= max_class; ++j) {
2992             g->class_dimensions[j] = get_bits(f, 3)+1;
2993             g->class_subclasses[j] = get_bits(f, 2);
2994             if (g->class_subclasses[j]) {
2995                g->class_masterbooks[j] = get_bits(f, 8);
2996                if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
2997             }
2998             for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
2999                g->subclass_books[j][k] = get_bits(f,8)-1;
3000                if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3001             }
3002          }
3003          g->floor1_multiplier = get_bits(f,2)+1;
3004          g->rangebits = get_bits(f,4);
3005          g->Xlist[0] = 0;
3006          g->Xlist[1] = 1 << g->rangebits;
3007          g->values = 2;
3008          for (j=0; j < g->partitions; ++j) {
3009             int c = g->partition_class_list[j];
3010             for (k=0; k < g->class_dimensions[c]; ++k) {
3011                g->Xlist[g->values] = get_bits(f, g->rangebits);
3012                ++g->values;
3013             }
3014          }
3015          /* precompute the sorting */
3016          for (j=0; j < g->values; ++j) {
3017             p[j].x = g->Xlist[j];
3018             p[j].y = j;
3019          }
3020          qsort(p, g->values, sizeof(p[0]), point_compare);
3021          for (j=0; j < g->values; ++j)
3022             g->sorted_order[j] = (uint8_t) p[j].y;
3023          /* precompute the neighbors */
3024          for (j=2; j < g->values; ++j)
3025          {
3026             int low = 0;
3027             int hi  = 0;
3028             neighbors(g->Xlist, j, &low,&hi);
3029             g->neighbors[j][0] = low;
3030             g->neighbors[j][1] = hi;
3031          }
3032 
3033          if (g->values > longest_floorlist)
3034             longest_floorlist = g->values;
3035       }
3036    }
3037 
3038    /* Residue */
3039    f->residue_count = get_bits(f, 6)+1;
3040    f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(*f->residue_config));
3041    for (i=0; i < f->residue_count; ++i) {
3042       uint8_t residue_cascade[64];
3043       Residue *r = f->residue_config+i;
3044       f->residue_types[i] = get_bits(f, 16);
3045       if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3046       r->begin = get_bits(f, 24);
3047       r->end = get_bits(f, 24);
3048       r->part_size = get_bits(f,24)+1;
3049       r->classifications = get_bits(f,6)+1;
3050       r->classbook = get_bits(f,8);
3051       for (j=0; j < r->classifications; ++j) {
3052          uint8_t high_bits=0;
3053          uint8_t low_bits=get_bits(f,3);
3054          if (get_bits(f,1))
3055             high_bits = get_bits(f,5);
3056          residue_cascade[j] = high_bits*8 + low_bits;
3057       }
3058       r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
3059       for (j=0; j < r->classifications; ++j) {
3060          for (k=0; k < 8; ++k) {
3061             if (residue_cascade[j] & (1 << k)) {
3062                r->residue_books[j][k] = get_bits(f, 8);
3063                if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3064             } else {
3065                r->residue_books[j][k] = -1;
3066             }
3067          }
3068       }
3069       /* precompute the classifications[] array to avoid inner-loop mod/divide
3070        * call it 'classdata' since we already have r->classifications */
3071       r->classdata = (uint8_t **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3072       if (!r->classdata) return error(f, VORBIS_outofmem);
3073       memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3074       for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
3075          int classwords = f->codebooks[r->classbook].dimensions;
3076          int temp = j;
3077          r->classdata[j] = (uint8_t *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
3078          for (k=classwords-1; k >= 0; --k) {
3079             r->classdata[j][k] = temp % r->classifications;
3080             temp /= r->classifications;
3081          }
3082       }
3083    }
3084 
3085    f->mapping_count = get_bits(f,6)+1;
3086    f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
3087    for (i=0; i < f->mapping_count; ++i) {
3088       Mapping *m = f->mapping + i;
3089       int mapping_type = get_bits(f,16);
3090       if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3091       m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
3092       if (get_bits(f,1))
3093          m->submaps = get_bits(f,4)+1;
3094       else
3095          m->submaps = 1;
3096       if (m->submaps > max_submaps)
3097          max_submaps = m->submaps;
3098       if (get_bits(f,1)) {
3099          m->coupling_steps = get_bits(f,8)+1;
3100          for (k=0; k < m->coupling_steps; ++k) {
3101             m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
3102             m->chan[k].angle = get_bits(f, ilog(f->channels-1));
3103             if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
3104             if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
3105             if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
3106          }
3107       } else
3108          m->coupling_steps = 0;
3109 
3110       /* reserved field */
3111       if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
3112       if (m->submaps > 1) {
3113          for (j=0; j < f->channels; ++j) {
3114             m->chan[j].mux = get_bits(f, 4);
3115             if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
3116          }
3117       } else
3118          /* @SPECIFICATION: this case is missing from the spec */
3119          for (j=0; j < f->channels; ++j)
3120             m->chan[j].mux = 0;
3121 
3122       for (j=0; j < m->submaps; ++j) {
3123          get_bits(f,8); /* discard */
3124          m->submap_floor[j] = get_bits(f,8);
3125          m->submap_residue[j] = get_bits(f,8);
3126          if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
3127          if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
3128       }
3129    }
3130 
3131    /* Modes */
3132    f->mode_count = get_bits(f, 6)+1;
3133    for (i=0; i < f->mode_count; ++i) {
3134       Mode *m = f->mode_config+i;
3135       m->blockflag = get_bits(f,1);
3136       m->windowtype = get_bits(f,16);
3137       m->transformtype = get_bits(f,16);
3138       m->mapping = get_bits(f,8);
3139       if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
3140       if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
3141       if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
3142    }
3143 
3144    flush_packet(f);
3145 
3146    f->previous_length = 0;
3147 
3148    for (i=0; i < f->channels; ++i) {
3149       f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
3150       f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
3151       f->finalY[i]          = (int16_t *) setup_malloc(f, sizeof(int16_t) * longest_floorlist);
3152    }
3153 
3154    if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
3155    if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
3156    f->blocksize[0] = f->blocksize_0;
3157    f->blocksize[1] = f->blocksize_1;
3158 
3159    /* compute how much temporary memory is needed */
3160 
3161    /* 1. */
3162    {
3163       uint32_t imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
3164       uint32_t classify_mem;
3165       int i,max_part_read=0;
3166       for (i=0; i < f->residue_count; ++i) {
3167          Residue *r = f->residue_config + i;
3168          int n_read = r->end - r->begin;
3169          int part_read = n_read / r->part_size;
3170          if (part_read > max_part_read)
3171             max_part_read = part_read;
3172       }
3173       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8_t *));
3174 
3175       f->temp_memory_required = classify_mem;
3176       if (imdct_mem > f->temp_memory_required)
3177          f->temp_memory_required = imdct_mem;
3178    }
3179 
3180    f->first_decode = TRUE;
3181 
3182    if (f->alloc.alloc_buffer) {
3183       assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
3184       /* check if there's enough temp memory so we don't error later */
3185       if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
3186          return error(f, VORBIS_outofmem);
3187    }
3188 
3189    f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
3190 
3191    return TRUE;
3192 }
3193 
vorbis_deinit(stb_vorbis * p)3194 static void vorbis_deinit(stb_vorbis *p)
3195 {
3196    int i,j;
3197    for (i=0; i < p->residue_count; ++i) {
3198       Residue *r = p->residue_config+i;
3199       if (r->classdata) {
3200          for (j=0; j < p->codebooks[r->classbook].entries; ++j)
3201             setup_free(p, r->classdata[j]);
3202          setup_free(p, r->classdata);
3203       }
3204       setup_free(p, r->residue_books);
3205    }
3206 
3207    if (p->codebooks) {
3208       for (i=0; i < p->codebook_count; ++i) {
3209          Codebook *c = p->codebooks + i;
3210          setup_free(p, c->codeword_lengths);
3211          setup_free(p, c->multiplicands);
3212          setup_free(p, c->codewords);
3213          setup_free(p, c->sorted_codewords);
3214          /* c->sorted_values[-1] is the first entry in the array */
3215          setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
3216       }
3217       setup_free(p, p->codebooks);
3218    }
3219    setup_free(p, p->floor_config);
3220    setup_free(p, p->residue_config);
3221    for (i=0; i < p->mapping_count; ++i)
3222       setup_free(p, p->mapping[i].chan);
3223    setup_free(p, p->mapping);
3224    for (i=0; i < p->channels; ++i) {
3225       setup_free(p, p->channel_buffers[i]);
3226       setup_free(p, p->previous_window[i]);
3227       setup_free(p, p->finalY[i]);
3228    }
3229    for (i=0; i < 2; ++i) {
3230       setup_free(p, p->A[i]);
3231       setup_free(p, p->B[i]);
3232       setup_free(p, p->C[i]);
3233       setup_free(p, p->window[i]);
3234       setup_free(p, p->bit_reverse[i]);
3235    }
3236 }
3237 
stb_vorbis_close(stb_vorbis * p)3238 void stb_vorbis_close(stb_vorbis *p)
3239 {
3240    if (p == NULL) return;
3241    vorbis_deinit(p);
3242    setup_free(p,p);
3243 }
3244 
vorbis_init(stb_vorbis * p,stb_vorbis_alloc * z)3245 static void vorbis_init(stb_vorbis *p, stb_vorbis_alloc *z)
3246 {
3247    memset(p, 0, sizeof(*p)); /* NULL out all malloc'd pointers to start */
3248    if (z) {
3249       p->alloc = *z;
3250       p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
3251       p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
3252    }
3253    p->eof = 0;
3254    p->error = VORBIS__no_error;
3255    p->stream = NULL;
3256    p->codebooks = NULL;
3257    p->page_crc_tests = -1;
3258 }
3259 
stb_vorbis_get_sample_offset(stb_vorbis * f)3260 int stb_vorbis_get_sample_offset(stb_vorbis *f)
3261 {
3262    if (f->current_loc_valid)
3263       return f->current_loc;
3264    return -1;
3265 }
3266 
stb_vorbis_get_info(stb_vorbis * f)3267 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
3268 {
3269    stb_vorbis_info d;
3270    d.channels = f->channels;
3271    d.sample_rate = f->sample_rate;
3272    d.setup_memory_required = f->setup_memory_required;
3273    d.setup_temp_memory_required = f->setup_temp_memory_required;
3274    d.temp_memory_required = f->temp_memory_required;
3275    d.max_frame_size = f->blocksize_1 >> 1;
3276    return d;
3277 }
3278 
stb_vorbis_get_error(stb_vorbis * f)3279 int stb_vorbis_get_error(stb_vorbis *f)
3280 {
3281    int e = f->error;
3282    f->error = VORBIS__no_error;
3283    return e;
3284 }
3285 
vorbis_alloc(stb_vorbis * f)3286 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
3287 {
3288    stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
3289    return p;
3290 }
3291 
stb_vorbis_get_file_offset(stb_vorbis * f)3292 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
3293 {
3294    return (unsigned int)(f->stream - f->stream_start);
3295 }
3296 
3297 #ifndef STB_VORBIS_NO_PULLDATA_API
3298 /* DATA-PULLING API */
3299 
vorbis_find_page(stb_vorbis * f,uint32_t * end,uint32_t * last)3300 static uint32_t vorbis_find_page(stb_vorbis *f, uint32_t *end, uint32_t *last)
3301 {
3302    for(;;) {
3303       int n;
3304       if (f->eof) return 0;
3305       n = get8(f);
3306       if (n == 0x4f) { /* page header */
3307          unsigned int retry_loc = stb_vorbis_get_file_offset(f);
3308          int i;
3309          /* check if we're off the end of a file_section stream */
3310          if (retry_loc - 25 > f->stream_len)
3311             return 0;
3312          /* check the rest of the header */
3313          for (i=1; i < 4; ++i)
3314             if (get8(f) != ogg_page_header[i])
3315                break;
3316          if (f->eof) return 0;
3317          if (i == 4) {
3318             uint8_t header[27];
3319             uint32_t i, crc, goal, len;
3320             for (i=0; i < 4; ++i)
3321                header[i] = ogg_page_header[i];
3322             for (; i < 27; ++i)
3323                header[i] = get8(f);
3324             if (f->eof) return 0;
3325             if (header[4] != 0) goto invalid;
3326             goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
3327             for (i=22; i < 26; ++i)
3328                header[i] = 0;
3329             crc = 0;
3330             for (i=0; i < 27; ++i)
3331                crc = crc32_update(crc, header[i]);
3332             len = 0;
3333             for (i=0; i < header[26]; ++i) {
3334                int s = get8(f);
3335                crc = crc32_update(crc, s);
3336                len += s;
3337             }
3338             if (len && f->eof) return 0;
3339             for (i=0; i < len; ++i)
3340                crc = crc32_update(crc, get8(f));
3341             /* finished parsing probable page */
3342             if (crc == goal) {
3343                /* we could now check that it's either got the last
3344                 * page flag set, OR it's followed by the capture
3345                 * pattern, but I guess TECHNICALLY you could have
3346                 * a file with garbage between each ogg page and recover
3347                 * from it automatically? So even though that paranoia
3348                 * might decrease the chance of an invalid decode by
3349                 * another 2^32, not worth it since it would hose those
3350                 * invalid-but-useful files? */
3351                if (end)
3352                   *end = stb_vorbis_get_file_offset(f);
3353                if (last) {
3354                   if (header[5] & 0x04)
3355                      *last = 1;
3356                   else
3357                      *last = 0;
3358                }
3359                set_file_offset(f, retry_loc-1);
3360                return 1;
3361             }
3362          }
3363         invalid:
3364          /* not a valid page, so rewind and look for next one */
3365          set_file_offset(f, retry_loc);
3366       }
3367    }
3368 }
3369 
3370 /* seek is implemented with 'interpolation search'--this is like
3371  * binary search, but we use the data values to estimate the likely
3372  * location of the data item (plus a bit of a bias so when the
3373  * estimation is wrong we don't waste overly much time)
3374  */
3375 
3376 #define SAMPLE_unknown  0xffffffff
3377 
3378 
3379 /* ogg vorbis, in its insane infinite wisdom, only provides
3380  * information about the sample at the END of the page.
3381  * therefore we COULD have the data we need in the current
3382  * page, and not know it. we could just use the end location
3383  * as our only knowledge for bounds, seek back, and eventually
3384  * the binary search finds it. or we can try to be smart and
3385  * not waste time trying to locate more pages. we try to be
3386  * smart, since this data is already in memory anyway, so
3387  * doing needless I/O would be crazy!
3388  */
vorbis_analyze_page(stb_vorbis * f,ProbedPage * z)3389 static int vorbis_analyze_page(stb_vorbis *f, ProbedPage *z)
3390 {
3391    uint8_t lacing[255];
3392    uint8_t packet_type[255];
3393    int num_packet, packet_start;
3394    int i,len;
3395    uint32_t samples;
3396    uint8_t header[27] = {0};
3397 
3398    /* record where the page starts */
3399    z->page_start = stb_vorbis_get_file_offset(f);
3400 
3401    /* parse the header */
3402    getn(f, header, 27);
3403    assert(header[0] == 'O' && header[1] == 'g' && header[2] == 'g' && header[3] == 'S');
3404    getn(f, lacing, header[26]);
3405 
3406    /* determine the length of the payload */
3407    len = 0;
3408    for (i=0; i < header[26]; ++i)
3409       len += lacing[i];
3410 
3411    /* this implies where the page ends */
3412    z->page_end = z->page_start + 27 + header[26] + len;
3413 
3414    /* read the last-decoded sample out of the data */
3415    z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 16);
3416 
3417    if (header[5] & 4) {
3418       /* if this is the last page, it's not possible to work
3419        * backwards to figure out the first sample! whoops! fuck. */
3420       z->first_decoded_sample = SAMPLE_unknown;
3421       set_file_offset(f, z->page_start);
3422       return 1;
3423    }
3424 
3425    /* scan through the frames to determine the sample-count of each one...
3426     * our goal is the sample # of the first fully-decoded sample on the
3427     * page, which is the first decoded sample of the 2nd packet */
3428 
3429    num_packet=0;
3430 
3431    packet_start = ((header[5] & 1) == 0);
3432 
3433    for (i=0; i < header[26]; ++i) {
3434       if (packet_start) {
3435          uint8_t n,b;
3436          if (lacing[i] == 0) goto bail; /* trying to read from zero-length packet */
3437          n = get8(f);
3438          /* if bottom bit is non-zero, we've got corruption */
3439          if (n & 1) goto bail;
3440          n >>= 1;
3441          b = ilog(f->mode_count-1);
3442          n &= (1 << b)-1;
3443          if (n >= f->mode_count) goto bail;
3444          packet_type[num_packet++] = f->mode_config[n].blockflag;
3445          skip(f, lacing[i]-1);
3446       } else
3447          skip(f, lacing[i]);
3448       packet_start = (lacing[i] < 255);
3449    }
3450 
3451    /* now that we know the sizes of all the pages, we can start determining
3452     * how much sample data there is. */
3453 
3454    samples = 0;
3455 
3456    /* for the last packet, we step by its whole length, because the definition
3457     * is that we encoded the end sample loc of the 'last packet completed',
3458     * where 'completed' refers to packets being split, and we are left to guess
3459     * what 'end sample loc' means. we assume it means ignoring the fact that
3460     * the last half of the data is useless without windowing against the next
3461     * packet... (so it's not REALLY complete in that sense)
3462     */
3463    if (num_packet > 1)
3464       samples += f->blocksize[packet_type[num_packet-1]];
3465 
3466    for (i=num_packet-2; i >= 1; --i) {
3467       /* now, for this packet, how many samples do we have that
3468        * do not overlap the following packet? */
3469       if (packet_type[i] == 1)
3470          if (packet_type[i+1] == 1)
3471             samples += f->blocksize_1 >> 1;
3472          else
3473             samples += ((f->blocksize_1 - f->blocksize_0) >> 2) + (f->blocksize_0 >> 1);
3474       else
3475          samples += f->blocksize_0 >> 1;
3476    }
3477    /* now, at this point, we've rewound to the very beginning of the
3478     * _second_ packet. if we entirely discard the first packet after
3479     * a seek, this will be exactly the right sample number. HOWEVER!
3480     * we can't as easily compute this number for the LAST page. The
3481     * only way to get the sample offset of the LAST page is to use
3482     * the end loc from the previous page. But what that returns us
3483     * is _exactly_ the place where we get our first non-overlapped
3484     * sample. (I think. Stupid spec for being ambiguous.) So for
3485     * consistency it's better to do that here, too. However, that
3486     * will then require us to NOT discard all of the first frame we
3487     * decode, in some cases, which means an even weirder frame size
3488     * and extra code. what a fucking pain.
3489 
3490     * we're going to discard the first packet if we
3491     * start the seek here, so we don't care about it. (we could actually
3492     * do better; if the first packet is long, and the previous packet
3493     * is short, there's actually data in the first half of the first
3494     * packet that doesn't need discarding... but not worth paying the
3495     * effort of tracking that of that here and in the seeking logic)
3496     * except crap, if we infer it from the _previous_ packet's end
3497     * location, we DO need to use that definition... and we HAVE to
3498     * infer the start loc of the LAST packet from the previous packet's
3499     * end location. fuck you, ogg vorbis. */
3500 
3501    z->first_decoded_sample = z->last_decoded_sample - samples;
3502 
3503    /* restore file state to where we were */
3504    set_file_offset(f, z->page_start);
3505    return 1;
3506 
3507    /* restore file state to where we were */
3508   bail:
3509    set_file_offset(f, z->page_start);
3510    return 0;
3511 }
3512 
vorbis_seek_frame_from_page(stb_vorbis * f,uint32_t page_start,uint32_t first_sample,uint32_t target_sample,int fine)3513 static int vorbis_seek_frame_from_page(stb_vorbis *f, uint32_t page_start, uint32_t first_sample, uint32_t target_sample, int fine)
3514 {
3515    int left_start, left_end, right_start, right_end, mode,i;
3516    int frame=0;
3517    uint32_t frame_start;
3518    int frames_to_skip, data_to_skip;
3519 
3520    /* first_sample is the sample # of the first sample that doesn't
3521     * overlap the previous page... note that this requires us to
3522     * _partially_ discard the first packet! bleh. */
3523    set_file_offset(f, page_start);
3524 
3525    f->next_seg = -1;  /* force page resync */
3526 
3527    frame_start = first_sample;
3528    /* frame start is where the previous packet's last decoded sample
3529     * was, which corresponds to left_end... EXCEPT if the previous
3530     * packet was long and this packet is short? Probably a bug here.
3531 
3532 
3533     * now, we can start decoding frames... we'll only FAKE decode them,
3534     * until we find the frame that contains our sample; then we'll rewind,
3535     * and try again */
3536    for (;;) {
3537       int start;
3538 
3539       if (!vorbis_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
3540          return error(f, VORBIS_seek_failed);
3541 
3542       if (frame == 0)
3543          start = left_end;
3544       else
3545          start = left_start;
3546 
3547       /* the window starts at left_start; the last valid sample we generate
3548        * before the next frame's window start is right_start-1 */
3549       if (target_sample < frame_start + right_start-start)
3550          break;
3551 
3552       flush_packet(f);
3553       if (f->eof)
3554          return error(f, VORBIS_seek_failed);
3555 
3556       frame_start += right_start - start;
3557 
3558       ++frame;
3559    }
3560 
3561    /* ok, at this point, the sample we want is contained in frame #'frame'
3562 
3563     * to decode frame #'frame' normally, we have to decode the
3564     * previous frame first... but if it's the FIRST frame of the page
3565     * we can't. if it's the first frame, it means it falls in the part
3566     * of the first frame that doesn't overlap either of the other frames.
3567     * so, if we have to handle that case for the first frame, we might
3568     * as well handle it for all of them, so: */
3569    if (target_sample > frame_start + (left_end - left_start)) {
3570       /* so what we want to do is go ahead and just immediately decode
3571        * this frame, but then make it so the next get_frame_float() uses
3572        * this already-decoded data? or do we want to go ahead and rewind,
3573        * and leave a flag saying to skip the first N data? let's do that
3574        */
3575       frames_to_skip = frame;  /* if this is frame #1, skip 1 frame (#0) */
3576       data_to_skip = left_end - left_start;
3577    } else {
3578       /* otherwise, we want to skip frames 0, 1, 2, ... frame-2
3579        * (which means frame-2+1 total frames) then decode frame-1,
3580        * then leave frame pending */
3581       frames_to_skip = frame - 1;
3582       assert(frames_to_skip >= 0);
3583       data_to_skip = -1;
3584    }
3585 
3586    set_file_offset(f, page_start);
3587    f->next_seg = - 1; /* force page resync */
3588 
3589    for (i=0; i < frames_to_skip; ++i) {
3590       maybe_start_packet(f);
3591       flush_packet(f);
3592    }
3593 
3594    if (data_to_skip >= 0) {
3595       int i,j,n = f->blocksize_0 >> 1;
3596       f->discard_samples_deferred = data_to_skip;
3597       for (i=0; i < f->channels; ++i)
3598          for (j=0; j < n; ++j)
3599             f->previous_window[i][j] = 0;
3600       f->previous_length = n;
3601       frame_start += data_to_skip;
3602    } else {
3603       f->previous_length = 0;
3604       vorbis_pump_first_frame(f);
3605    }
3606 
3607    /* at this point, the NEXT decoded frame will generate the desired sample */
3608    if (fine) {
3609       /* so if we're doing sample accurate streaming, we want to go ahead and decode it! */
3610       if (target_sample != frame_start) {
3611          int n;
3612          stb_vorbis_get_frame_float(f, &n, NULL);
3613          assert(target_sample > frame_start);
3614          assert(f->channel_buffer_start + (int) (target_sample-frame_start) < f->channel_buffer_end);
3615          f->channel_buffer_start += (target_sample - frame_start);
3616       }
3617    }
3618 
3619    return 0;
3620 }
3621 
vorbis_seek_base(stb_vorbis * f,unsigned int sample_number,int fine)3622 static int vorbis_seek_base(stb_vorbis *f, unsigned int sample_number, int fine)
3623 {
3624    ProbedPage p[2],q;
3625    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
3626 
3627    /* do we know the location of the last page? */
3628    if (f->p_last.page_start == 0) {
3629       uint32_t z = stb_vorbis_stream_length_in_samples(f);
3630       if (z == 0) return error(f, VORBIS_cant_find_last_page);
3631    }
3632 
3633    p[0] = f->p_first;
3634    p[1] = f->p_last;
3635 
3636    if (sample_number >= f->p_last.last_decoded_sample)
3637       sample_number = f->p_last.last_decoded_sample-1;
3638 
3639    if (sample_number < f->p_first.last_decoded_sample) {
3640       vorbis_seek_frame_from_page(f, p[0].page_start, 0, sample_number, fine);
3641       return 0;
3642    } else {
3643       int attempts=0;
3644       while (p[0].page_end < p[1].page_start) {
3645          uint32_t probe;
3646          uint32_t start_offset, end_offset;
3647          uint32_t start_sample, end_sample;
3648 
3649          /* copy these into local variables so we can tweak them
3650           * if any are unknown */
3651          start_offset = p[0].page_end;
3652          end_offset   = p[1].after_previous_page_start; /* an address known to seek to page p[1] */
3653          start_sample = p[0].last_decoded_sample;
3654          end_sample   = p[1].last_decoded_sample;
3655 
3656          /* currently there is no such tweaking logic needed/possible? */
3657          if (start_sample == SAMPLE_unknown || end_sample == SAMPLE_unknown)
3658             return error(f, VORBIS_seek_failed);
3659 
3660          /* now we want to lerp between these for the target samples... */
3661 
3662          /* step 1: we need to bias towards the page start... */
3663          if (start_offset + 4000 < end_offset)
3664             end_offset -= 4000;
3665 
3666          /* now compute an interpolated search loc */
3667          probe = start_offset + (int) floor((float) (end_offset - start_offset) / (end_sample - start_sample) * (sample_number - start_sample));
3668 
3669          /* next we need to bias towards binary search...
3670           * code is a little wonky to allow for full 32-bit unsigned values */
3671          if (attempts >= 4) {
3672             uint32_t probe2 = start_offset + ((end_offset - start_offset) >> 1);
3673             if (attempts >= 8)
3674                probe = probe2;
3675             else if (probe < probe2)
3676                probe = probe + ((probe2 - probe) >> 1);
3677             else
3678                probe = probe2 + ((probe - probe2) >> 1);
3679          }
3680          ++attempts;
3681 
3682          set_file_offset(f, probe);
3683          if (!vorbis_find_page(f, NULL, NULL))   return error(f, VORBIS_seek_failed);
3684          if (!vorbis_analyze_page(f, &q))        return error(f, VORBIS_seek_failed);
3685          q.after_previous_page_start = probe;
3686 
3687          /* it's possible we've just found the last page again */
3688          if (q.page_start == p[1].page_start) {
3689             p[1] = q;
3690             continue;
3691          }
3692 
3693          if (sample_number < q.last_decoded_sample)
3694             p[1] = q;
3695          else
3696             p[0] = q;
3697       }
3698 
3699       if (p[0].last_decoded_sample <= sample_number && sample_number < p[1].last_decoded_sample) {
3700          vorbis_seek_frame_from_page(f, p[1].page_start, p[0].last_decoded_sample, sample_number, fine);
3701          return 0;
3702       }
3703       return error(f, VORBIS_seek_failed);
3704    }
3705 }
3706 
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)3707 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
3708 {
3709    return vorbis_seek_base(f, sample_number, FALSE);
3710 }
3711 
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)3712 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
3713 {
3714    return vorbis_seek_base(f, sample_number, TRUE);
3715 }
3716 
stb_vorbis_seek_start(stb_vorbis * f)3717 void stb_vorbis_seek_start(stb_vorbis *f)
3718 {
3719    if (IS_PUSH_MODE(f)) { error(f, VORBIS_invalid_api_mixing); return; }
3720    set_file_offset(f, f->first_audio_page_offset);
3721    f->previous_length = 0;
3722    f->first_decode = TRUE;
3723    f->next_seg = -1;
3724    vorbis_pump_first_frame(f);
3725 }
3726 
stb_vorbis_stream_length_in_samples(stb_vorbis * f)3727 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
3728 {
3729    unsigned int restore_offset, previous_safe;
3730    unsigned int end, last_page_loc;
3731 
3732    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
3733    if (!f->total_samples) {
3734       unsigned int last;
3735       uint32_t lo,hi;
3736       char header[6];
3737 
3738       /* first, store the current decode position so we can restore it */
3739       restore_offset = stb_vorbis_get_file_offset(f);
3740 
3741       /* now we want to seek back 64K from the end (the last page must
3742        * be at most a little less than 64K, but let's allow a little slop) */
3743       if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
3744          previous_safe = f->stream_len - 65536;
3745       else
3746          previous_safe = f->first_audio_page_offset;
3747 
3748       set_file_offset(f, previous_safe);
3749       /* previous_safe is now our candidate 'earliest known place that seeking
3750        * to will lead to the final page' */
3751 
3752       if (!vorbis_find_page(f, &end, &last)) {
3753          /* if we can't find a page, we're hosed! */
3754          f->error = VORBIS_cant_find_last_page;
3755          f->total_samples = 0xffffffff;
3756          goto done;
3757       }
3758 
3759       /* check if there are more pages */
3760       last_page_loc = stb_vorbis_get_file_offset(f);
3761 
3762       /* stop when the last_page flag is set, not when we reach eof;
3763        * this allows us to stop short of a 'file_section' end without
3764        * explicitly checking the length of the section */
3765       while (!last) {
3766          set_file_offset(f, end);
3767          if (!vorbis_find_page(f, &end, &last)) {
3768             /* the last page we found didn't have the 'last page' flag
3769              * set. whoops! */
3770             break;
3771          }
3772          previous_safe = last_page_loc+1;
3773          last_page_loc = stb_vorbis_get_file_offset(f);
3774       }
3775 
3776       set_file_offset(f, last_page_loc);
3777 
3778       /* parse the header */
3779       getn(f, (unsigned char *)header, 6);
3780       /* extract the absolute granule position */
3781       lo = get32(f);
3782       hi = get32(f);
3783       if (lo == 0xffffffff && hi == 0xffffffff) {
3784          f->error = VORBIS_cant_find_last_page;
3785          f->total_samples = SAMPLE_unknown;
3786          goto done;
3787       }
3788       if (hi)
3789          lo = 0xfffffffe; /* saturate */
3790       f->total_samples = lo;
3791 
3792       f->p_last.page_start = last_page_loc;
3793       f->p_last.page_end   = end;
3794       f->p_last.last_decoded_sample = lo;
3795       f->p_last.first_decoded_sample = SAMPLE_unknown;
3796       f->p_last.after_previous_page_start = previous_safe;
3797 
3798      done:
3799       set_file_offset(f, restore_offset);
3800    }
3801    return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
3802 }
3803 
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)3804 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
3805 {
3806    return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
3807 }
3808 
3809 
3810 
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)3811 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
3812 {
3813    int len, right,left,i;
3814    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
3815 
3816    if (!vorbis_decode_packet(f, &len, &left, &right)) {
3817       f->channel_buffer_start = f->channel_buffer_end = 0;
3818       return 0;
3819    }
3820 
3821    len = vorbis_finish_frame(f, len, left, right);
3822    for (i=0; i < f->channels; ++i)
3823       f->outputs[i] = f->channel_buffers[i] + left;
3824 
3825    f->channel_buffer_start = left;
3826    f->channel_buffer_end   = left+len;
3827 
3828    if (channels) *channels = f->channels;
3829    if (output)   *output = f->outputs;
3830    return len;
3831 }
3832 
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,stb_vorbis_alloc * alloc)3833 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, stb_vorbis_alloc *alloc)
3834 {
3835    stb_vorbis *f, p;
3836    if (data == NULL) return NULL;
3837    vorbis_init(&p, alloc);
3838    p.stream = (uint8_t *) data;
3839    p.stream_end = (uint8_t *) data + len;
3840    p.stream_start = (uint8_t *) p.stream;
3841    p.stream_len = len;
3842    p.push_mode = FALSE;
3843    if (start_decoder(&p)) {
3844       f = vorbis_alloc(&p);
3845       if (f) {
3846          *f = p;
3847          vorbis_pump_first_frame(f);
3848          return f;
3849       }
3850    }
3851    if (error) *error = p.error;
3852    vorbis_deinit(&p);
3853    return NULL;
3854 }
3855 
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)3856 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
3857 {
3858    float **outputs;
3859    int len = num_floats / channels;
3860    int n=0;
3861    int z = f->channels;
3862    if (z > channels) z = channels;
3863    while (n < len) {
3864       int i,j;
3865       int k = f->channel_buffer_end - f->channel_buffer_start;
3866       if (n+k >= len) k = len - n;
3867       for (j=0; j < k; ++j) {
3868          for (i=0; i < z; ++i)
3869             *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
3870          for (   ; i < channels; ++i)
3871             *buffer++ = 0;
3872       }
3873       n += k;
3874       f->channel_buffer_start += k;
3875       if (n == len)
3876          break;
3877       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
3878          break;
3879    }
3880    return n;
3881 }
3882 
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)3883 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
3884 {
3885    float **outputs;
3886    int n=0;
3887    int z = f->channels;
3888    if (z > channels) z = channels;
3889    while (n < num_samples) {
3890       int i;
3891       int k = f->channel_buffer_end - f->channel_buffer_start;
3892       if (n+k >= num_samples) k = num_samples - n;
3893       if (k) {
3894          for (i=0; i < z; ++i)
3895             memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
3896          for (   ; i < channels; ++i)
3897             memset(buffer[i]+n, 0, sizeof(float) * k);
3898       }
3899       n += k;
3900       f->channel_buffer_start += k;
3901       if (n == num_samples)
3902          break;
3903       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
3904          break;
3905    }
3906    return n;
3907 }
3908 #endif /* STB_VORBIS_NO_PULLDATA_API */
3909 
3910 #endif /* STB_VORBIS_HEADER_ONLY */
3911