1 // Ogg Vorbis audio decoder - v1.20 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking implementation
7 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
8 // Elias Software, Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        github:alxprd
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    github:manxorist   saga musix          github:infatum
33 //    Timur Gagiev       Maxwell Koo         Peter Waller
34 //    github:audinowho   Dougall Johnson     David Reid
35 //    github:Clownacy    Pedro J. Estebanez  Remi Verschelde
36 //
37 // Partial history:
38 //    1.20    - 2020-07-11 - several small fixes
39 //    1.19    - 2020-02-05 - warnings
40 //    1.18    - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
41 //    1.17    - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
42 //    1.16    - 2019-03-04 - fix warnings
43 //    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
44 //    1.14    - 2018-02-11 - delete bogus dealloca usage
45 //    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
46 //    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
47 //    1.11    - 2017-07-23 - fix MinGW compilation
48 //    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
49 //    1.09    - 2016-04-04 - back out 'truncation of last frame' fix from previous version
50 //    1.08    - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
51 //    1.07    - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
52 //    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
53 //                           some crash fixes when out of memory or with corrupt files
54 //                           fix some inappropriately signed shifts
55 //    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
56 //    1.04    - 2014-08-27 - fix missing const-correct case in API
57 //    1.03    - 2014-08-07 - warning fixes
58 //    1.02    - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
59 //    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
60 //    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
61 //                           (API change) report sample rate for decode-full-file funcs
62 //
63 // See end of file for full version history.
64 
65 
66 //////////////////////////////////////////////////////////////////////////////
67 //
68 //  HEADER BEGINS HERE
69 //
70 
71 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
72 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
73 
74 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
75 #define STB_VORBIS_NO_STDIO 1
76 #endif
77 
78 #ifndef STB_VORBIS_NO_STDIO
79 #include <stdio.h>
80 #endif
81 
82 #ifdef __cplusplus
83 extern "C" {
84 #endif
85 
86 ///////////   THREAD SAFETY
87 
88 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
89 // them from multiple threads at the same time. However, you can have multiple
90 // stb_vorbis* handles and decode from them independently in multiple thrads.
91 
92 
93 ///////////   MEMORY ALLOCATION
94 
95 // normally stb_vorbis uses malloc() to allocate memory at startup,
96 // and alloca() to allocate temporary memory during a frame on the
97 // stack. (Memory consumption will depend on the amount of setup
98 // data in the file and how you set the compile flags for speed
99 // vs. size. In my test files the maximal-size usage is ~150KB.)
100 //
101 // You can modify the wrapper functions in the source (setup_malloc,
102 // setup_temp_malloc, temp_malloc) to change this behavior, or you
103 // can use a simpler allocation model: you pass in a buffer from
104 // which stb_vorbis will allocate _all_ its memory (including the
105 // temp memory). "open" may fail with a VORBIS_outofmem if you
106 // do not pass in enough data; there is no way to determine how
107 // much you do need except to succeed (at which point you can
108 // query get_info to find the exact amount required. yes I know
109 // this is lame).
110 //
111 // If you pass in a non-NULL buffer of the type below, allocation
112 // will occur from it as described above. Otherwise just pass NULL
113 // to use malloc()/alloca()
114 
115 typedef struct
116 {
117    char *alloc_buffer;
118    int   alloc_buffer_length_in_bytes;
119 } stb_vorbis_alloc;
120 
121 
122 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
123 
124 typedef struct stb_vorbis stb_vorbis;
125 
126 typedef struct
127 {
128    unsigned int sample_rate;
129    int channels;
130 
131    unsigned int setup_memory_required;
132    unsigned int setup_temp_memory_required;
133    unsigned int temp_memory_required;
134 
135    int max_frame_size;
136 } stb_vorbis_info;
137 
138 typedef struct
139 {
140    char *vendor;
141 
142    int comment_list_length;
143    char **comment_list;
144 } stb_vorbis_comment;
145 
146 // get general information about the file
147 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
148 
149 // get ogg comments
150 extern stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f);
151 
152 // get the last error detected (clears it, too)
153 extern int stb_vorbis_get_error(stb_vorbis *f);
154 
155 // close an ogg vorbis file and free all memory in use
156 extern void stb_vorbis_close(stb_vorbis *f);
157 
158 // this function returns the offset (in samples) from the beginning of the
159 // file that will be returned by the next decode, if it is known, or -1
160 // otherwise. after a flush_pushdata() call, this may take a while before
161 // it becomes valid again.
162 // NOT WORKING YET after a seek with PULLDATA API
163 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
164 
165 // returns the current seek point within the file, or offset from the beginning
166 // of the memory buffer. In pushdata mode it returns 0.
167 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
168 
169 ///////////   PUSHDATA API
170 
171 #ifndef STB_VORBIS_NO_PUSHDATA_API
172 
173 // this API allows you to get blocks of data from any source and hand
174 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
175 // you how much it used, and you have to give it the rest next time;
176 // and stb_vorbis may not have enough data to work with and you will
177 // need to give it the same data again PLUS more. Note that the Vorbis
178 // specification does not bound the size of an individual frame.
179 
180 extern stb_vorbis *stb_vorbis_open_pushdata(
181          const unsigned char * datablock, int datablock_length_in_bytes,
182          int *datablock_memory_consumed_in_bytes,
183          int *error,
184          const stb_vorbis_alloc *alloc_buffer);
185 // create a vorbis decoder by passing in the initial data block containing
186 //    the ogg&vorbis headers (you don't need to do parse them, just provide
187 //    the first N bytes of the file--you're told if it's not enough, see below)
188 // on success, returns an stb_vorbis *, does not set error, returns the amount of
189 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
190 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
191 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
192 //       incomplete and you need to pass in a larger block from the start of the file
193 
194 extern int stb_vorbis_decode_frame_pushdata(
195          stb_vorbis *f,
196          const unsigned char *datablock, int datablock_length_in_bytes,
197          int *channels,             // place to write number of float * buffers
198          float ***output,           // place to write float ** array of float * buffers
199          int *samples               // place to write number of output samples
200      );
201 // decode a frame of audio sample data if possible from the passed-in data block
202 //
203 // return value: number of bytes we used from datablock
204 //
205 // possible cases:
206 //     0 bytes used, 0 samples output (need more data)
207 //     N bytes used, 0 samples output (resynching the stream, keep going)
208 //     N bytes used, M samples output (one frame of data)
209 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
210 // frame, because Vorbis always "discards" the first frame.
211 //
212 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
213 // instead only datablock_length_in_bytes-3 or less. This is because it wants
214 // to avoid missing parts of a page header if they cross a datablock boundary,
215 // without writing state-machiney code to record a partial detection.
216 //
217 // The number of channels returned are stored in *channels (which can be
218 // NULL--it is always the same as the number of channels reported by
219 // get_info). *output will contain an array of float* buffers, one per
220 // channel. In other words, (*output)[0][0] contains the first sample from
221 // the first channel, and (*output)[1][0] contains the first sample from
222 // the second channel.
223 
224 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
225 // inform stb_vorbis that your next datablock will not be contiguous with
226 // previous ones (e.g. you've seeked in the data); future attempts to decode
227 // frames will cause stb_vorbis to resynchronize (as noted above), and
228 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
229 // will begin decoding the _next_ frame.
230 //
231 // if you want to seek using pushdata, you need to seek in your file, then
232 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
233 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
234 // if you don't like the result, seek your file again and repeat.
235 #endif
236 
237 
238 //////////   PULLING INPUT API
239 
240 #ifndef STB_VORBIS_NO_PULLDATA_API
241 // This API assumes stb_vorbis is allowed to pull data from a source--
242 // either a block of memory containing the _entire_ vorbis stream, or a
243 // FILE * that you or it create, or possibly some other reading mechanism
244 // if you go modify the source to replace the FILE * case with some kind
245 // of callback to your code. (But if you don't support seeking, you may
246 // just want to go ahead and use pushdata.)
247 
248 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
249 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
250 #endif
251 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
252 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
253 #endif
254 // decode an entire file and output the data interleaved into a malloc()ed
255 // buffer stored in *output. The return value is the number of samples
256 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
257 // When you're done with it, just free() the pointer returned in *output.
258 
259 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
260                                   int *error, const stb_vorbis_alloc *alloc_buffer);
261 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
262 // this must be the entire stream!). on failure, returns NULL and sets *error
263 
264 #ifndef STB_VORBIS_NO_STDIO
265 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
266                                   int *error, const stb_vorbis_alloc *alloc_buffer);
267 // create an ogg vorbis decoder from a filename via fopen(). on failure,
268 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
269 
270 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
271                                   int *error, const stb_vorbis_alloc *alloc_buffer);
272 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
273 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
274 // note that stb_vorbis must "own" this stream; if you seek it in between
275 // calls to stb_vorbis, it will become confused. Moreover, if you attempt to
276 // perform stb_vorbis_seek_*() operations on this file, it will assume it
277 // owns the _entire_ rest of the file after the start point. Use the next
278 // function, stb_vorbis_open_file_section(), to limit it.
279 
280 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
281                 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
282 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
283 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
284 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
285 // this stream; if you seek it in between calls to stb_vorbis, it will become
286 // confused.
287 #endif
288 
289 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
290 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
291 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
292 // after calling seek_frame(), the next call to get_frame_*() will include
293 // the specified sample. after calling stb_vorbis_seek(), the next call to
294 // stb_vorbis_get_samples_* will start with the specified sample. If you
295 // do not need to seek to EXACTLY the target sample when using get_samples_*,
296 // you can also use seek_frame().
297 
298 extern int stb_vorbis_seek_start(stb_vorbis *f);
299 // this function is equivalent to stb_vorbis_seek(f,0)
300 
301 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
302 extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
303 // these functions return the total length of the vorbis stream
304 
305 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
306 // decode the next frame and return the number of samples. the number of
307 // channels returned are stored in *channels (which can be NULL--it is always
308 // the same as the number of channels reported by get_info). *output will
309 // contain an array of float* buffers, one per channel. These outputs will
310 // be overwritten on the next call to stb_vorbis_get_frame_*.
311 //
312 // You generally should not intermix calls to stb_vorbis_get_frame_*()
313 // and stb_vorbis_get_samples_*(), since the latter calls the former.
314 
315 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
316 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
317 extern int stb_vorbis_get_frame_short            (stb_vorbis *f, int num_c, short **buffer, int num_samples);
318 #endif
319 // decode the next frame and return the number of *samples* per channel.
320 // Note that for interleaved data, you pass in the number of shorts (the
321 // size of your array), but the return value is the number of samples per
322 // channel, not the total number of samples.
323 //
324 // The data is coerced to the number of channels you request according to the
325 // channel coercion rules (see below). You must pass in the size of your
326 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
327 // The maximum buffer size needed can be gotten from get_info(); however,
328 // the Vorbis I specification implies an absolute maximum of 4096 samples
329 // per channel.
330 
331 // Channel coercion rules:
332 //    Let M be the number of channels requested, and N the number of channels present,
333 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
334 //    and stereo R be the sum of all R and center channels (channel assignment from the
335 //    vorbis spec).
336 //        M    N       output
337 //        1    k      sum(Ck) for all k
338 //        2    *      stereo L, stereo R
339 //        k    l      k > l, the first l channels, then 0s
340 //        k    l      k <= l, the first k channels
341 //    Note that this is not _good_ surround etc. mixing at all! It's just so
342 //    you get something useful.
343 
344 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
345 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
346 // gets num_samples samples, not necessarily on a frame boundary--this requires
347 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
348 // Returns the number of samples stored per channel; it may be less than requested
349 // at the end of the file. If there are no more samples in the file, returns 0.
350 
351 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
352 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
353 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
354 #endif
355 // gets num_samples samples, not necessarily on a frame boundary--this requires
356 // buffering so you have to supply the buffers. Applies the coercion rules above
357 // to produce 'channels' channels. Returns the number of samples stored per channel;
358 // it may be less than requested at the end of the file. If there are no more
359 // samples in the file, returns 0.
360 
361 #endif
362 
363 ////////   ERROR CODES
364 
365 enum STBVorbisError
366 {
367    VORBIS__no_error,
368 
369    VORBIS_need_more_data=1,             // not a real error
370 
371    VORBIS_invalid_api_mixing,           // can't mix API modes
372    VORBIS_outofmem,                     // not enough memory
373    VORBIS_feature_not_supported,        // uses floor 0
374    VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
375    VORBIS_file_open_failure,            // fopen() failed
376    VORBIS_seek_without_length,          // can't seek in unknown-length file
377 
378    VORBIS_unexpected_eof=10,            // file is truncated?
379    VORBIS_seek_invalid,                 // seek past EOF
380 
381    // decoding errors (corrupt/invalid stream) -- you probably
382    // don't care about the exact details of these
383 
384    // vorbis errors:
385    VORBIS_invalid_setup=20,
386    VORBIS_invalid_stream,
387 
388    // ogg errors:
389    VORBIS_missing_capture_pattern=30,
390    VORBIS_invalid_stream_structure_version,
391    VORBIS_continued_packet_flag_invalid,
392    VORBIS_incorrect_stream_serial_number,
393    VORBIS_invalid_first_page,
394    VORBIS_bad_packet_type,
395    VORBIS_cant_find_last_page,
396    VORBIS_seek_failed,
397    VORBIS_ogg_skeleton_not_supported
398 };
399 
400 
401 #ifdef __cplusplus
402 }
403 #endif
404 
405 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
406 //
407 //  HEADER ENDS HERE
408 //
409 //////////////////////////////////////////////////////////////////////////////
410 
411 #ifndef STB_VORBIS_HEADER_ONLY
412 
413 // global configuration settings (e.g. set these in the project/makefile),
414 // or just set them in this file at the top (although ideally the first few
415 // should be visible when the header file is compiled too, although it's not
416 // crucial)
417 
418 // STB_VORBIS_NO_PUSHDATA_API
419 //     does not compile the code for the various stb_vorbis_*_pushdata()
420 //     functions
421 // #define STB_VORBIS_NO_PUSHDATA_API
422 
423 // STB_VORBIS_NO_PULLDATA_API
424 //     does not compile the code for the non-pushdata APIs
425 // #define STB_VORBIS_NO_PULLDATA_API
426 
427 // STB_VORBIS_NO_STDIO
428 //     does not compile the code for the APIs that use FILE *s internally
429 //     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
430 // #define STB_VORBIS_NO_STDIO
431 
432 // STB_VORBIS_NO_INTEGER_CONVERSION
433 //     does not compile the code for converting audio sample data from
434 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
435 // #define STB_VORBIS_NO_INTEGER_CONVERSION
436 
437 // STB_VORBIS_NO_FAST_SCALED_FLOAT
438 //      does not use a fast float-to-int trick to accelerate float-to-int on
439 //      most platforms which requires endianness be defined correctly.
440 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
441 
442 
443 // STB_VORBIS_MAX_CHANNELS [number]
444 //     globally define this to the maximum number of channels you need.
445 //     The spec does not put a restriction on channels except that
446 //     the count is stored in a byte, so 255 is the hard limit.
447 //     Reducing this saves about 16 bytes per value, so using 16 saves
448 //     (255-16)*16 or around 4KB. Plus anything other memory usage
449 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
450 //     6 (5.1 audio), or 2 (stereo only).
451 #ifndef STB_VORBIS_MAX_CHANNELS
452 #define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
453 #endif
454 
455 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
456 //     after a flush_pushdata(), stb_vorbis begins scanning for the
457 //     next valid page, without backtracking. when it finds something
458 //     that looks like a page, it streams through it and verifies its
459 //     CRC32. Should that validation fail, it keeps scanning. But it's
460 //     possible that _while_ streaming through to check the CRC32 of
461 //     one candidate page, it sees another candidate page. This #define
462 //     determines how many "overlapping" candidate pages it can search
463 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
464 //     garbage pages could be as big as 64KB, but probably average ~16KB.
465 //     So don't hose ourselves by scanning an apparent 64KB page and
466 //     missing a ton of real ones in the interim; so minimum of 2
467 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
468 #define STB_VORBIS_PUSHDATA_CRC_COUNT  4
469 #endif
470 
471 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
472 //     sets the log size of the huffman-acceleration table.  Maximum
473 //     supported value is 24. with larger numbers, more decodings are O(1),
474 //     but the table size is larger so worse cache missing, so you'll have
475 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
476 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
477 #define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
478 #endif
479 
480 // STB_VORBIS_FAST_BINARY_LENGTH [number]
481 //     sets the log size of the binary-search acceleration table. this
482 //     is used in similar fashion to the fast-huffman size to set initial
483 //     parameters for the binary search
484 
485 // STB_VORBIS_FAST_HUFFMAN_INT
486 //     The fast huffman tables are much more efficient if they can be
487 //     stored as 16-bit results instead of 32-bit results. This restricts
488 //     the codebooks to having only 65535 possible outcomes, though.
489 //     (At least, accelerated by the huffman table.)
490 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
491 #define STB_VORBIS_FAST_HUFFMAN_SHORT
492 #endif
493 
494 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
495 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
496 //     back on binary searching for the correct one. This requires storing
497 //     extra tables with the huffman codes in sorted order. Defining this
498 //     symbol trades off space for speed by forcing a linear search in the
499 //     non-fast case, except for "sparse" codebooks.
500 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
501 
502 // STB_VORBIS_DIVIDES_IN_RESIDUE
503 //     stb_vorbis precomputes the result of the scalar residue decoding
504 //     that would otherwise require a divide per chunk. you can trade off
505 //     space for time by defining this symbol.
506 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
507 
508 // STB_VORBIS_DIVIDES_IN_CODEBOOK
509 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
510 //     stored, or with all elements being chosen from a small range of values,
511 //     and all values possible in all elements. By default, stb_vorbis expands
512 //     this latter kind out to look like the former kind for ease of decoding,
513 //     because otherwise an integer divide-per-vector-element is required to
514 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
515 //     trade off storage for speed.
516 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
517 
518 #ifdef STB_VORBIS_CODEBOOK_SHORTS
519 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
520 #endif
521 
522 // STB_VORBIS_DIVIDE_TABLE
523 //     this replaces small integer divides in the floor decode loop with
524 //     table lookups. made less than 1% difference, so disabled by default.
525 
526 // STB_VORBIS_NO_INLINE_DECODE
527 //     disables the inlining of the scalar codebook fast-huffman decode.
528 //     might save a little codespace; useful for debugging
529 // #define STB_VORBIS_NO_INLINE_DECODE
530 
531 // STB_VORBIS_NO_DEFER_FLOOR
532 //     Normally we only decode the floor without synthesizing the actual
533 //     full curve. We can instead synthesize the curve immediately. This
534 //     requires more memory and is very likely slower, so I don't think
535 //     you'd ever want to do it except for debugging.
536 // #define STB_VORBIS_NO_DEFER_FLOOR
537 
538 
539 
540 
541 //////////////////////////////////////////////////////////////////////////////
542 
543 #ifdef STB_VORBIS_NO_PULLDATA_API
544    #define STB_VORBIS_NO_INTEGER_CONVERSION
545    #define STB_VORBIS_NO_STDIO
546 #endif
547 
548 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
549    #define STB_VORBIS_NO_STDIO 1
550 #endif
551 
552 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
553 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
554 
555    // only need endianness for fast-float-to-int, which we don't
556    // use for pushdata
557 
558    #ifndef STB_VORBIS_BIG_ENDIAN
559      #define STB_VORBIS_ENDIAN  0
560    #else
561      #define STB_VORBIS_ENDIAN  1
562    #endif
563 
564 #endif
565 #endif
566 
567 
568 #ifndef STB_VORBIS_NO_STDIO
569 #include <stdio.h>
570 #endif
571 
572 #ifndef STB_VORBIS_NO_CRT
573    #include <stdlib.h>
574    #include <string.h>
575    #include <assert.h>
576    #include <math.h>
577 
578    // find definition of alloca if it's not in stdlib.h:
579    #if defined(_MSC_VER) || defined(__MINGW32__)
580       #include <malloc.h>
581    #endif
582    #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__) || defined(__NEWLIB__)
583       #include <alloca.h>
584    #endif
585 #else // STB_VORBIS_NO_CRT
586    #define NULL 0
587    #define malloc(s)   0
588    #define free(s)     ((void) 0)
589    #define realloc(s)  0
590 #endif // STB_VORBIS_NO_CRT
591 
592 #include <limits.h>
593 
594 #ifdef __MINGW32__
595    // eff you mingw:
596    //     "fixed":
597    //         http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
598    //     "no that broke the build, reverted, who cares about C":
599    //         http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
600    #ifdef __forceinline
601    #undef __forceinline
602    #endif
603    #define __forceinline
604    #ifndef alloca
605    #define alloca __builtin_alloca
606    #endif
607 #elif !defined(_MSC_VER)
608    #if __GNUC__
609       #define __forceinline inline
610    #else
611       #define __forceinline
612    #endif
613 #endif
614 
615 #if STB_VORBIS_MAX_CHANNELS > 256
616 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
617 #endif
618 
619 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
620 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
621 #endif
622 
623 
624 #if 0
625 #include <crtdbg.h>
626 #define CHECK(f)   _CrtIsValidHeapPointer(f->channel_buffers[1])
627 #else
628 #define CHECK(f)   ((void) 0)
629 #endif
630 
631 #define MAX_BLOCKSIZE_LOG  13   // from specification
632 #define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
633 
634 
635 typedef unsigned char  uint8;
636 typedef   signed char   int8;
637 typedef unsigned short uint16;
638 typedef   signed short  int16;
639 typedef unsigned int   uint32;
640 typedef   signed int    int32;
641 
642 #ifndef TRUE
643 #define TRUE 1
644 #define FALSE 0
645 #endif
646 
647 typedef float codetype;
648 
649 // @NOTE
650 //
651 // Some arrays below are tagged "//varies", which means it's actually
652 // a variable-sized piece of data, but rather than malloc I assume it's
653 // small enough it's better to just allocate it all together with the
654 // main thing
655 //
656 // Most of the variables are specified with the smallest size I could pack
657 // them into. It might give better performance to make them all full-sized
658 // integers. It should be safe to freely rearrange the structures or change
659 // the sizes larger--nothing relies on silently truncating etc., nor the
660 // order of variables.
661 
662 #define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
663 #define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
664 
665 typedef struct
666 {
667    int dimensions, entries;
668    uint8 *codeword_lengths;
669    float  minimum_value;
670    float  delta_value;
671    uint8  value_bits;
672    uint8  lookup_type;
673    uint8  sequence_p;
674    uint8  sparse;
675    uint32 lookup_values;
676    codetype *multiplicands;
677    uint32 *codewords;
678    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
679     int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
680    #else
681     int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
682    #endif
683    uint32 *sorted_codewords;
684    int    *sorted_values;
685    int     sorted_entries;
686 } Codebook;
687 
688 typedef struct
689 {
690    uint8 order;
691    uint16 rate;
692    uint16 bark_map_size;
693    uint8 amplitude_bits;
694    uint8 amplitude_offset;
695    uint8 number_of_books;
696    uint8 book_list[16]; // varies
697 } Floor0;
698 
699 typedef struct
700 {
701    uint8 partitions;
702    uint8 partition_class_list[32]; // varies
703    uint8 class_dimensions[16]; // varies
704    uint8 class_subclasses[16]; // varies
705    uint8 class_masterbooks[16]; // varies
706    int16 subclass_books[16][8]; // varies
707    uint16 Xlist[31*8+2]; // varies
708    uint8 sorted_order[31*8+2];
709    uint8 neighbors[31*8+2][2];
710    uint8 floor1_multiplier;
711    uint8 rangebits;
712    int values;
713 } Floor1;
714 
715 typedef union
716 {
717    Floor0 floor0;
718    Floor1 floor1;
719 } Floor;
720 
721 typedef struct
722 {
723    uint32 begin, end;
724    uint32 part_size;
725    uint8 classifications;
726    uint8 classbook;
727    uint8 **classdata;
728    int16 (*residue_books)[8];
729 } Residue;
730 
731 typedef struct
732 {
733    uint8 magnitude;
734    uint8 angle;
735    uint8 mux;
736 } MappingChannel;
737 
738 typedef struct
739 {
740    uint16 coupling_steps;
741    MappingChannel *chan;
742    uint8  submaps;
743    uint8  submap_floor[15]; // varies
744    uint8  submap_residue[15]; // varies
745 } Mapping;
746 
747 typedef struct
748 {
749    uint8 blockflag;
750    uint8 mapping;
751    uint16 windowtype;
752    uint16 transformtype;
753 } Mode;
754 
755 typedef struct
756 {
757    uint32  goal_crc;    // expected crc if match
758    int     bytes_left;  // bytes left in packet
759    uint32  crc_so_far;  // running crc
760    int     bytes_done;  // bytes processed in _current_ chunk
761    uint32  sample_loc;  // granule pos encoded in page
762 } CRCscan;
763 
764 typedef struct
765 {
766    uint32 page_start, page_end;
767    uint32 last_decoded_sample;
768 } ProbedPage;
769 
770 struct stb_vorbis
771 {
772   // user-accessible info
773    unsigned int sample_rate;
774    int channels;
775 
776    unsigned int setup_memory_required;
777    unsigned int temp_memory_required;
778    unsigned int setup_temp_memory_required;
779 
780    char *vendor;
781    int comment_list_length;
782    char **comment_list;
783 
784   // input config
785 #ifndef STB_VORBIS_NO_STDIO
786    FILE *f;
787    uint32 f_start;
788    int close_on_free;
789 #endif
790 
791    uint8 *stream;
792    uint8 *stream_start;
793    uint8 *stream_end;
794 
795    uint32 stream_len;
796 
797    uint8  push_mode;
798 
799    // the page to seek to when seeking to start, may be zero
800    uint32 first_audio_page_offset;
801 
802    // p_first is the page on which the first audio packet ends
803    // (but not necessarily the page on which it starts)
804    ProbedPage p_first, p_last;
805 
806   // memory management
807    stb_vorbis_alloc alloc;
808    int setup_offset;
809    int temp_offset;
810 
811   // run-time results
812    int eof;
813    enum STBVorbisError error;
814 
815   // user-useful data
816 
817   // header info
818    int blocksize[2];
819    int blocksize_0, blocksize_1;
820    int codebook_count;
821    Codebook *codebooks;
822    int floor_count;
823    uint16 floor_types[64]; // varies
824    Floor *floor_config;
825    int residue_count;
826    uint16 residue_types[64]; // varies
827    Residue *residue_config;
828    int mapping_count;
829    Mapping *mapping;
830    int mode_count;
831    Mode mode_config[64];  // varies
832 
833    uint32 total_samples;
834 
835   // decode buffer
836    float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
837    float *outputs        [STB_VORBIS_MAX_CHANNELS];
838 
839    float *previous_window[STB_VORBIS_MAX_CHANNELS];
840    int previous_length;
841 
842    #ifndef STB_VORBIS_NO_DEFER_FLOOR
843    int16 *finalY[STB_VORBIS_MAX_CHANNELS];
844    #else
845    float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
846    #endif
847 
848    uint32 current_loc; // sample location of next frame to decode
849    int    current_loc_valid;
850 
851   // per-blocksize precomputed data
852 
853    // twiddle factors
854    float *A[2],*B[2],*C[2];
855    float *window[2];
856    uint16 *bit_reverse[2];
857 
858   // current page/packet/segment streaming info
859    uint32 serial; // stream serial number for verification
860    int last_page;
861    int segment_count;
862    uint8 segments[255];
863    uint8 page_flag;
864    uint8 bytes_in_seg;
865    uint8 first_decode;
866    int next_seg;
867    int last_seg;  // flag that we're on the last segment
868    int last_seg_which; // what was the segment number of the last seg?
869    uint32 acc;
870    int valid_bits;
871    int packet_bytes;
872    int end_seg_with_known_loc;
873    uint32 known_loc_for_packet;
874    int discard_samples_deferred;
875    uint32 samples_output;
876 
877   // push mode scanning
878    int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
879 #ifndef STB_VORBIS_NO_PUSHDATA_API
880    CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
881 #endif
882 
883   // sample-access
884    int channel_buffer_start;
885    int channel_buffer_end;
886 };
887 
888 #if defined(STB_VORBIS_NO_PUSHDATA_API)
889    #define IS_PUSH_MODE(f)   FALSE
890 #elif defined(STB_VORBIS_NO_PULLDATA_API)
891    #define IS_PUSH_MODE(f)   TRUE
892 #else
893    #define IS_PUSH_MODE(f)   ((f)->push_mode)
894 #endif
895 
896 typedef struct stb_vorbis vorb;
897 
error(vorb * f,enum STBVorbisError e)898 static int error(vorb *f, enum STBVorbisError e)
899 {
900    f->error = e;
901    if (!f->eof && e != VORBIS_need_more_data) {
902       f->error=e; // breakpoint for debugging
903    }
904    return 0;
905 }
906 
907 
908 // these functions are used for allocating temporary memory
909 // while decoding. if you can afford the stack space, use
910 // alloca(); otherwise, provide a temp buffer and it will
911 // allocate out of those.
912 
913 #define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
914 
915 #define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
916 #define temp_free(f,p)                  (void)0
917 #define temp_alloc_save(f)              ((f)->temp_offset)
918 #define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
919 
920 #define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
921 
922 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
make_block_array(void * mem,int count,int size)923 static void *make_block_array(void *mem, int count, int size)
924 {
925    int i;
926    void ** p = (void **) mem;
927    char *q = (char *) (p + count);
928    for (i=0; i < count; ++i) {
929       p[i] = q;
930       q += size;
931    }
932    return p;
933 }
934 
setup_malloc(vorb * f,int sz)935 static void *setup_malloc(vorb *f, int sz)
936 {
937    sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
938    f->setup_memory_required += sz;
939    if (f->alloc.alloc_buffer) {
940       void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
941       if (f->setup_offset + sz > f->temp_offset) return NULL;
942       f->setup_offset += sz;
943       return p;
944    }
945    return sz ? malloc(sz) : NULL;
946 }
947 
setup_free(vorb * f,void * p)948 static void setup_free(vorb *f, void *p)
949 {
950    if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
951    free(p);
952 }
953 
setup_temp_malloc(vorb * f,int sz)954 static void *setup_temp_malloc(vorb *f, int sz)
955 {
956    sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
957    if (f->alloc.alloc_buffer) {
958       if (f->temp_offset - sz < f->setup_offset) return NULL;
959       f->temp_offset -= sz;
960       return (char *) f->alloc.alloc_buffer + f->temp_offset;
961    }
962    return malloc(sz);
963 }
964 
setup_temp_free(vorb * f,void * p,int sz)965 static void setup_temp_free(vorb *f, void *p, int sz)
966 {
967    if (f->alloc.alloc_buffer) {
968       f->temp_offset += (sz+7)&~7;
969       return;
970    }
971    free(p);
972 }
973 
974 #define CRC32_POLY    0x04c11db7   // from spec
975 
976 static uint32 crc_table[256];
crc32_init(void)977 static void crc32_init(void)
978 {
979    int i,j;
980    uint32 s;
981    for(i=0; i < 256; i++) {
982       for (s=(uint32) i << 24, j=0; j < 8; ++j)
983          s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
984       crc_table[i] = s;
985    }
986 }
987 
crc32_update(uint32 crc,uint8 byte)988 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
989 {
990    return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
991 }
992 
993 
994 // used in setup, and for huffman that doesn't go fast path
bit_reverse(unsigned int n)995 static unsigned int bit_reverse(unsigned int n)
996 {
997   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
998   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
999   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
1000   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
1001   return (n >> 16) | (n << 16);
1002 }
1003 
square(float x)1004 static float square(float x)
1005 {
1006    return x*x;
1007 }
1008 
1009 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
1010 // as required by the specification. fast(?) implementation from stb.h
1011 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
ilog(int32 n)1012 static int ilog(int32 n)
1013 {
1014    static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
1015 
1016    if (n < 0) return 0; // signed n returns 0
1017 
1018    // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
1019    if (n < (1 << 14))
1020         if (n < (1 <<  4))            return  0 + log2_4[n      ];
1021         else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
1022              else                     return 10 + log2_4[n >> 10];
1023    else if (n < (1 << 24))
1024              if (n < (1 << 19))       return 15 + log2_4[n >> 15];
1025              else                     return 20 + log2_4[n >> 20];
1026         else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
1027              else                     return 30 + log2_4[n >> 30];
1028 }
1029 
1030 #ifndef M_PI
1031   #define M_PI  3.14159265358979323846264f  // from CRC
1032 #endif
1033 
1034 // code length assigned to a value with no huffman encoding
1035 #define NO_CODE   255
1036 
1037 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1038 //
1039 // these functions are only called at setup, and only a few times
1040 // per file
1041 
float32_unpack(uint32 x)1042 static float float32_unpack(uint32 x)
1043 {
1044    // from the specification
1045    uint32 mantissa = x & 0x1fffff;
1046    uint32 sign = x & 0x80000000;
1047    uint32 exp = (x & 0x7fe00000) >> 21;
1048    double res = sign ? -(double)mantissa : (double)mantissa;
1049    return (float) ldexp((float)res, exp-788);
1050 }
1051 
1052 
1053 // zlib & jpeg huffman tables assume that the output symbols
1054 // can either be arbitrarily arranged, or have monotonically
1055 // increasing frequencies--they rely on the lengths being sorted;
1056 // this makes for a very simple generation algorithm.
1057 // vorbis allows a huffman table with non-sorted lengths. This
1058 // requires a more sophisticated construction, since symbols in
1059 // order do not map to huffman codes "in order".
add_entry(Codebook * c,uint32 huff_code,int symbol,int count,int len,uint32 * values)1060 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1061 {
1062    if (!c->sparse) {
1063       c->codewords      [symbol] = huff_code;
1064    } else {
1065       c->codewords       [count] = huff_code;
1066       c->codeword_lengths[count] = len;
1067       values             [count] = symbol;
1068    }
1069 }
1070 
compute_codewords(Codebook * c,uint8 * len,int n,uint32 * values)1071 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1072 {
1073    int i,k,m=0;
1074    uint32 available[32];
1075 
1076    memset(available, 0, sizeof(available));
1077    // find the first entry
1078    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1079    if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1080    // add to the list
1081    add_entry(c, 0, k, m++, len[k], values);
1082    // add all available leaves
1083    for (i=1; i <= len[k]; ++i)
1084       available[i] = 1U << (32-i);
1085    // note that the above code treats the first case specially,
1086    // but it's really the same as the following code, so they
1087    // could probably be combined (except the initial code is 0,
1088    // and I use 0 in available[] to mean 'empty')
1089    for (i=k+1; i < n; ++i) {
1090       uint32 res;
1091       int z = len[i], y;
1092       if (z == NO_CODE) continue;
1093       // find lowest available leaf (should always be earliest,
1094       // which is what the specification calls for)
1095       // note that this property, and the fact we can never have
1096       // more than one free leaf at a given level, isn't totally
1097       // trivial to prove, but it seems true and the assert never
1098       // fires, so!
1099       while (z > 0 && !available[z]) --z;
1100       if (z == 0) { return FALSE; }
1101       res = available[z];
1102       assert(z >= 0 && z < 32);
1103       available[z] = 0;
1104       add_entry(c, bit_reverse(res), i, m++, len[i], values);
1105       // propagate availability up the tree
1106       if (z != len[i]) {
1107          assert(len[i] >= 0 && len[i] < 32);
1108          for (y=len[i]; y > z; --y) {
1109             assert(available[y] == 0);
1110             available[y] = res + (1 << (32-y));
1111          }
1112       }
1113    }
1114    return TRUE;
1115 }
1116 
1117 // accelerated huffman table allows fast O(1) match of all symbols
1118 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
compute_accelerated_huffman(Codebook * c)1119 static void compute_accelerated_huffman(Codebook *c)
1120 {
1121    int i, len;
1122    for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1123       c->fast_huffman[i] = -1;
1124 
1125    len = c->sparse ? c->sorted_entries : c->entries;
1126    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1127    if (len > 32767) len = 32767; // largest possible value we can encode!
1128    #endif
1129    for (i=0; i < len; ++i) {
1130       if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1131          uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1132          // set table entries for all bit combinations in the higher bits
1133          while (z < FAST_HUFFMAN_TABLE_SIZE) {
1134              c->fast_huffman[z] = i;
1135              z += 1 << c->codeword_lengths[i];
1136          }
1137       }
1138    }
1139 }
1140 
1141 #ifdef _MSC_VER
1142 #define STBV_CDECL __cdecl
1143 #else
1144 #define STBV_CDECL
1145 #endif
1146 
uint32_compare(const void * p,const void * q)1147 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1148 {
1149    uint32 x = * (uint32 *) p;
1150    uint32 y = * (uint32 *) q;
1151    return x < y ? -1 : x > y;
1152 }
1153 
include_in_sort(Codebook * c,uint8 len)1154 static int include_in_sort(Codebook *c, uint8 len)
1155 {
1156    if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1157    if (len == NO_CODE) return FALSE;
1158    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1159    return FALSE;
1160 }
1161 
1162 // if the fast table above doesn't work, we want to binary
1163 // search them... need to reverse the bits
compute_sorted_huffman(Codebook * c,uint8 * lengths,uint32 * values)1164 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1165 {
1166    int i, len;
1167    // build a list of all the entries
1168    // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1169    // this is kind of a frivolous optimization--I don't see any performance improvement,
1170    // but it's like 4 extra lines of code, so.
1171    if (!c->sparse) {
1172       int k = 0;
1173       for (i=0; i < c->entries; ++i)
1174          if (include_in_sort(c, lengths[i]))
1175             c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1176       assert(k == c->sorted_entries);
1177    } else {
1178       for (i=0; i < c->sorted_entries; ++i)
1179          c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1180    }
1181 
1182    qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1183    c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1184 
1185    len = c->sparse ? c->sorted_entries : c->entries;
1186    // now we need to indicate how they correspond; we could either
1187    //   #1: sort a different data structure that says who they correspond to
1188    //   #2: for each sorted entry, search the original list to find who corresponds
1189    //   #3: for each original entry, find the sorted entry
1190    // #1 requires extra storage, #2 is slow, #3 can use binary search!
1191    for (i=0; i < len; ++i) {
1192       int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1193       if (include_in_sort(c,huff_len)) {
1194          uint32 code = bit_reverse(c->codewords[i]);
1195          int x=0, n=c->sorted_entries;
1196          while (n > 1) {
1197             // invariant: sc[x] <= code < sc[x+n]
1198             int m = x + (n >> 1);
1199             if (c->sorted_codewords[m] <= code) {
1200                x = m;
1201                n -= (n>>1);
1202             } else {
1203                n >>= 1;
1204             }
1205          }
1206          assert(c->sorted_codewords[x] == code);
1207          if (c->sparse) {
1208             c->sorted_values[x] = values[i];
1209             c->codeword_lengths[x] = huff_len;
1210          } else {
1211             c->sorted_values[x] = i;
1212          }
1213       }
1214    }
1215 }
1216 
1217 // only run while parsing the header (3 times)
vorbis_validate(uint8 * data)1218 static int vorbis_validate(uint8 *data)
1219 {
1220    static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1221    return memcmp(data, vorbis, 6) == 0;
1222 }
1223 
1224 // called from setup only, once per code book
1225 // (formula implied by specification)
lookup1_values(int entries,int dim)1226 static int lookup1_values(int entries, int dim)
1227 {
1228    int r = (int) floor(exp((float) log((float) entries) / dim));
1229    if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
1230       ++r;                                              // floor() to avoid _ftol() when non-CRT
1231    if (pow((float) r+1, dim) <= entries)
1232       return -1;
1233    if ((int) floor(pow((float) r, dim)) > entries)
1234       return -1;
1235    return r;
1236 }
1237 
1238 // called twice per file
compute_twiddle_factors(int n,float * A,float * B,float * C)1239 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1240 {
1241    int n4 = n >> 2, n8 = n >> 3;
1242    int k,k2;
1243 
1244    for (k=k2=0; k < n4; ++k,k2+=2) {
1245       A[k2  ] = (float)  cos(4*k*M_PI/n);
1246       A[k2+1] = (float) -sin(4*k*M_PI/n);
1247       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
1248       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
1249    }
1250    for (k=k2=0; k < n8; ++k,k2+=2) {
1251       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
1252       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1253    }
1254 }
1255 
compute_window(int n,float * window)1256 static void compute_window(int n, float *window)
1257 {
1258    int n2 = n >> 1, i;
1259    for (i=0; i < n2; ++i)
1260       window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1261 }
1262 
compute_bitreverse(int n,uint16 * rev)1263 static void compute_bitreverse(int n, uint16 *rev)
1264 {
1265    int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1266    int i, n8 = n >> 3;
1267    for (i=0; i < n8; ++i)
1268       rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1269 }
1270 
init_blocksize(vorb * f,int b,int n)1271 static int init_blocksize(vorb *f, int b, int n)
1272 {
1273    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1274    f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1275    f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1276    f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1277    if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1278    compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1279    f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1280    if (!f->window[b]) return error(f, VORBIS_outofmem);
1281    compute_window(n, f->window[b]);
1282    f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1283    if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1284    compute_bitreverse(n, f->bit_reverse[b]);
1285    return TRUE;
1286 }
1287 
neighbors(uint16 * x,int n,int * plow,int * phigh)1288 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1289 {
1290    int low = -1;
1291    int high = 65536;
1292    int i;
1293    for (i=0; i < n; ++i) {
1294       if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
1295       if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1296    }
1297 }
1298 
1299 // this has been repurposed so y is now the original index instead of y
1300 typedef struct
1301 {
1302    uint16 x,id;
1303 } stbv__floor_ordering;
1304 
point_compare(const void * p,const void * q)1305 static int STBV_CDECL point_compare(const void *p, const void *q)
1306 {
1307    stbv__floor_ordering *a = (stbv__floor_ordering *) p;
1308    stbv__floor_ordering *b = (stbv__floor_ordering *) q;
1309    return a->x < b->x ? -1 : a->x > b->x;
1310 }
1311 
1312 //
1313 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1314 
1315 
1316 #if defined(STB_VORBIS_NO_STDIO)
1317    #define USE_MEMORY(z)    TRUE
1318 #else
1319    #define USE_MEMORY(z)    ((z)->stream)
1320 #endif
1321 
get8(vorb * z)1322 static uint8 get8(vorb *z)
1323 {
1324    if (USE_MEMORY(z)) {
1325       if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1326       return *z->stream++;
1327    }
1328 
1329    #ifndef STB_VORBIS_NO_STDIO
1330    {
1331    int c = fgetc(z->f);
1332    if (c == EOF) { z->eof = TRUE; return 0; }
1333    return c;
1334    }
1335    #endif
1336 }
1337 
get32(vorb * f)1338 static uint32 get32(vorb *f)
1339 {
1340    uint32 x;
1341    x = get8(f);
1342    x += get8(f) << 8;
1343    x += get8(f) << 16;
1344    x += (uint32) get8(f) << 24;
1345    return x;
1346 }
1347 
getn(vorb * z,uint8 * data,int n)1348 static int getn(vorb *z, uint8 *data, int n)
1349 {
1350    if (USE_MEMORY(z)) {
1351       if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1352       memcpy(data, z->stream, n);
1353       z->stream += n;
1354       return 1;
1355    }
1356 
1357    #ifndef STB_VORBIS_NO_STDIO
1358    if (fread(data, n, 1, z->f) == 1)
1359       return 1;
1360    else {
1361       z->eof = 1;
1362       return 0;
1363    }
1364    #endif
1365 }
1366 
skip(vorb * z,int n)1367 static void skip(vorb *z, int n)
1368 {
1369    if (USE_MEMORY(z)) {
1370       z->stream += n;
1371       if (z->stream >= z->stream_end) z->eof = 1;
1372       return;
1373    }
1374    #ifndef STB_VORBIS_NO_STDIO
1375    {
1376       long x = ftell(z->f);
1377       fseek(z->f, x+n, SEEK_SET);
1378    }
1379    #endif
1380 }
1381 
set_file_offset(stb_vorbis * f,unsigned int loc)1382 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1383 {
1384    #ifndef STB_VORBIS_NO_PUSHDATA_API
1385    if (f->push_mode) return 0;
1386    #endif
1387    f->eof = 0;
1388    if (USE_MEMORY(f)) {
1389       if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1390          f->stream = f->stream_end;
1391          f->eof = 1;
1392          return 0;
1393       } else {
1394          f->stream = f->stream_start + loc;
1395          return 1;
1396       }
1397    }
1398    #ifndef STB_VORBIS_NO_STDIO
1399    if (loc + f->f_start < loc || loc >= 0x80000000) {
1400       loc = 0x7fffffff;
1401       f->eof = 1;
1402    } else {
1403       loc += f->f_start;
1404    }
1405    if (!fseek(f->f, loc, SEEK_SET))
1406       return 1;
1407    f->eof = 1;
1408    fseek(f->f, f->f_start, SEEK_END);
1409    return 0;
1410    #endif
1411 }
1412 
1413 
1414 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1415 
capture_pattern(vorb * f)1416 static int capture_pattern(vorb *f)
1417 {
1418    if (0x4f != get8(f)) return FALSE;
1419    if (0x67 != get8(f)) return FALSE;
1420    if (0x67 != get8(f)) return FALSE;
1421    if (0x53 != get8(f)) return FALSE;
1422    return TRUE;
1423 }
1424 
1425 #define PAGEFLAG_continued_packet   1
1426 #define PAGEFLAG_first_page         2
1427 #define PAGEFLAG_last_page          4
1428 
start_page_no_capturepattern(vorb * f)1429 static int start_page_no_capturepattern(vorb *f)
1430 {
1431    uint32 loc0,loc1,n;
1432    if (f->first_decode && !IS_PUSH_MODE(f)) {
1433       f->p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
1434    }
1435    // stream structure version
1436    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1437    // header flag
1438    f->page_flag = get8(f);
1439    // absolute granule position
1440    loc0 = get32(f);
1441    loc1 = get32(f);
1442    // @TODO: validate loc0,loc1 as valid positions?
1443    // stream serial number -- vorbis doesn't interleave, so discard
1444    get32(f);
1445    //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1446    // page sequence number
1447    n = get32(f);
1448    f->last_page = n;
1449    // CRC32
1450    get32(f);
1451    // page_segments
1452    f->segment_count = get8(f);
1453    if (!getn(f, f->segments, f->segment_count))
1454       return error(f, VORBIS_unexpected_eof);
1455    // assume we _don't_ know any the sample position of any segments
1456    f->end_seg_with_known_loc = -2;
1457    if (loc0 != ~0U || loc1 != ~0U) {
1458       int i;
1459       // determine which packet is the last one that will complete
1460       for (i=f->segment_count-1; i >= 0; --i)
1461          if (f->segments[i] < 255)
1462             break;
1463       // 'i' is now the index of the _last_ segment of a packet that ends
1464       if (i >= 0) {
1465          f->end_seg_with_known_loc = i;
1466          f->known_loc_for_packet   = loc0;
1467       }
1468    }
1469    if (f->first_decode) {
1470       int i,len;
1471       len = 0;
1472       for (i=0; i < f->segment_count; ++i)
1473          len += f->segments[i];
1474       len += 27 + f->segment_count;
1475       f->p_first.page_end = f->p_first.page_start + len;
1476       f->p_first.last_decoded_sample = loc0;
1477    }
1478    f->next_seg = 0;
1479    return TRUE;
1480 }
1481 
start_page(vorb * f)1482 static int start_page(vorb *f)
1483 {
1484    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1485    return start_page_no_capturepattern(f);
1486 }
1487 
start_packet(vorb * f)1488 static int start_packet(vorb *f)
1489 {
1490    while (f->next_seg == -1) {
1491       if (!start_page(f)) return FALSE;
1492       if (f->page_flag & PAGEFLAG_continued_packet)
1493          return error(f, VORBIS_continued_packet_flag_invalid);
1494    }
1495    f->last_seg = FALSE;
1496    f->valid_bits = 0;
1497    f->packet_bytes = 0;
1498    f->bytes_in_seg = 0;
1499    // f->next_seg is now valid
1500    return TRUE;
1501 }
1502 
maybe_start_packet(vorb * f)1503 static int maybe_start_packet(vorb *f)
1504 {
1505    if (f->next_seg == -1) {
1506       int x = get8(f);
1507       if (f->eof) return FALSE; // EOF at page boundary is not an error!
1508       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
1509       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1510       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1511       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1512       if (!start_page_no_capturepattern(f)) return FALSE;
1513       if (f->page_flag & PAGEFLAG_continued_packet) {
1514          // set up enough state that we can read this packet if we want,
1515          // e.g. during recovery
1516          f->last_seg = FALSE;
1517          f->bytes_in_seg = 0;
1518          return error(f, VORBIS_continued_packet_flag_invalid);
1519       }
1520    }
1521    return start_packet(f);
1522 }
1523 
next_segment(vorb * f)1524 static int next_segment(vorb *f)
1525 {
1526    int len;
1527    if (f->last_seg) return 0;
1528    if (f->next_seg == -1) {
1529       f->last_seg_which = f->segment_count-1; // in case start_page fails
1530       if (!start_page(f)) { f->last_seg = 1; return 0; }
1531       if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1532    }
1533    len = f->segments[f->next_seg++];
1534    if (len < 255) {
1535       f->last_seg = TRUE;
1536       f->last_seg_which = f->next_seg-1;
1537    }
1538    if (f->next_seg >= f->segment_count)
1539       f->next_seg = -1;
1540    assert(f->bytes_in_seg == 0);
1541    f->bytes_in_seg = len;
1542    return len;
1543 }
1544 
1545 #define EOP    (-1)
1546 #define INVALID_BITS  (-1)
1547 
get8_packet_raw(vorb * f)1548 static int get8_packet_raw(vorb *f)
1549 {
1550    if (!f->bytes_in_seg) {  // CLANG!
1551       if (f->last_seg) return EOP;
1552       else if (!next_segment(f)) return EOP;
1553    }
1554    assert(f->bytes_in_seg > 0);
1555    --f->bytes_in_seg;
1556    ++f->packet_bytes;
1557    return get8(f);
1558 }
1559 
get8_packet(vorb * f)1560 static int get8_packet(vorb *f)
1561 {
1562    int x = get8_packet_raw(f);
1563    f->valid_bits = 0;
1564    return x;
1565 }
1566 
get32_packet(vorb * f)1567 static int get32_packet(vorb *f)
1568 {
1569    uint32 x;
1570    x = get8_packet(f);
1571    x += get8_packet(f) << 8;
1572    x += get8_packet(f) << 16;
1573    x += (uint32) get8_packet(f) << 24;
1574    return x;
1575 }
1576 
flush_packet(vorb * f)1577 static void flush_packet(vorb *f)
1578 {
1579    while (get8_packet_raw(f) != EOP);
1580 }
1581 
1582 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1583 // as the huffman decoder?
get_bits(vorb * f,int n)1584 static uint32 get_bits(vorb *f, int n)
1585 {
1586    uint32 z;
1587 
1588    if (f->valid_bits < 0) return 0;
1589    if (f->valid_bits < n) {
1590       if (n > 24) {
1591          // the accumulator technique below would not work correctly in this case
1592          z = get_bits(f, 24);
1593          z += get_bits(f, n-24) << 24;
1594          return z;
1595       }
1596       if (f->valid_bits == 0) f->acc = 0;
1597       while (f->valid_bits < n) {
1598          int z = get8_packet_raw(f);
1599          if (z == EOP) {
1600             f->valid_bits = INVALID_BITS;
1601             return 0;
1602          }
1603          f->acc += z << f->valid_bits;
1604          f->valid_bits += 8;
1605       }
1606    }
1607 
1608    assert(f->valid_bits >= n);
1609    z = f->acc & ((1 << n)-1);
1610    f->acc >>= n;
1611    f->valid_bits -= n;
1612    return z;
1613 }
1614 
1615 // @OPTIMIZE: primary accumulator for huffman
1616 // expand the buffer to as many bits as possible without reading off end of packet
1617 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1618 // e.g. cache them locally and decode locally
prep_huffman(vorb * f)1619 static __forceinline void prep_huffman(vorb *f)
1620 {
1621    if (f->valid_bits <= 24) {
1622       if (f->valid_bits == 0) f->acc = 0;
1623       do {
1624          int z;
1625          if (f->last_seg && !f->bytes_in_seg) return;
1626          z = get8_packet_raw(f);
1627          if (z == EOP) return;
1628          f->acc += (unsigned) z << f->valid_bits;
1629          f->valid_bits += 8;
1630       } while (f->valid_bits <= 24);
1631    }
1632 }
1633 
1634 enum
1635 {
1636    VORBIS_packet_id = 1,
1637    VORBIS_packet_comment = 3,
1638    VORBIS_packet_setup = 5
1639 };
1640 
codebook_decode_scalar_raw(vorb * f,Codebook * c)1641 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1642 {
1643    int i;
1644    prep_huffman(f);
1645 
1646    if (c->codewords == NULL && c->sorted_codewords == NULL)
1647       return -1;
1648 
1649    // cases to use binary search: sorted_codewords && !c->codewords
1650    //                             sorted_codewords && c->entries > 8
1651    if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1652       // binary search
1653       uint32 code = bit_reverse(f->acc);
1654       int x=0, n=c->sorted_entries, len;
1655 
1656       while (n > 1) {
1657          // invariant: sc[x] <= code < sc[x+n]
1658          int m = x + (n >> 1);
1659          if (c->sorted_codewords[m] <= code) {
1660             x = m;
1661             n -= (n>>1);
1662          } else {
1663             n >>= 1;
1664          }
1665       }
1666       // x is now the sorted index
1667       if (!c->sparse) x = c->sorted_values[x];
1668       // x is now sorted index if sparse, or symbol otherwise
1669       len = c->codeword_lengths[x];
1670       if (f->valid_bits >= len) {
1671          f->acc >>= len;
1672          f->valid_bits -= len;
1673          return x;
1674       }
1675 
1676       f->valid_bits = 0;
1677       return -1;
1678    }
1679 
1680    // if small, linear search
1681    assert(!c->sparse);
1682    for (i=0; i < c->entries; ++i) {
1683       if (c->codeword_lengths[i] == NO_CODE) continue;
1684       if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1685          if (f->valid_bits >= c->codeword_lengths[i]) {
1686             f->acc >>= c->codeword_lengths[i];
1687             f->valid_bits -= c->codeword_lengths[i];
1688             return i;
1689          }
1690          f->valid_bits = 0;
1691          return -1;
1692       }
1693    }
1694 
1695    error(f, VORBIS_invalid_stream);
1696    f->valid_bits = 0;
1697    return -1;
1698 }
1699 
1700 #ifndef STB_VORBIS_NO_INLINE_DECODE
1701 
1702 #define DECODE_RAW(var, f,c)                                  \
1703    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
1704       prep_huffman(f);                                        \
1705    var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
1706    var = c->fast_huffman[var];                                \
1707    if (var >= 0) {                                            \
1708       int n = c->codeword_lengths[var];                       \
1709       f->acc >>= n;                                           \
1710       f->valid_bits -= n;                                     \
1711       if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1712    } else {                                                   \
1713       var = codebook_decode_scalar_raw(f,c);                  \
1714    }
1715 
1716 #else
1717 
codebook_decode_scalar(vorb * f,Codebook * c)1718 static int codebook_decode_scalar(vorb *f, Codebook *c)
1719 {
1720    int i;
1721    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1722       prep_huffman(f);
1723    // fast huffman table lookup
1724    i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1725    i = c->fast_huffman[i];
1726    if (i >= 0) {
1727       f->acc >>= c->codeword_lengths[i];
1728       f->valid_bits -= c->codeword_lengths[i];
1729       if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1730       return i;
1731    }
1732    return codebook_decode_scalar_raw(f,c);
1733 }
1734 
1735 #define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
1736 
1737 #endif
1738 
1739 #define DECODE(var,f,c)                                       \
1740    DECODE_RAW(var,f,c)                                        \
1741    if (c->sparse) var = c->sorted_values[var];
1742 
1743 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1744   #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
1745 #else
1746   #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
1747 #endif
1748 
1749 
1750 
1751 
1752 
1753 
1754 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1755 // where we avoid one addition
1756 #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
1757 #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
1758 #define CODEBOOK_ELEMENT_BASE(c)         (0)
1759 
codebook_decode_start(vorb * f,Codebook * c)1760 static int codebook_decode_start(vorb *f, Codebook *c)
1761 {
1762    int z = -1;
1763 
1764    // type 0 is only legal in a scalar context
1765    if (c->lookup_type == 0)
1766       error(f, VORBIS_invalid_stream);
1767    else {
1768       DECODE_VQ(z,f,c);
1769       if (c->sparse) assert(z < c->sorted_entries);
1770       if (z < 0) {  // check for EOP
1771          if (!f->bytes_in_seg)
1772             if (f->last_seg)
1773                return z;
1774          error(f, VORBIS_invalid_stream);
1775       }
1776    }
1777    return z;
1778 }
1779 
codebook_decode(vorb * f,Codebook * c,float * output,int len)1780 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1781 {
1782    int i,z = codebook_decode_start(f,c);
1783    if (z < 0) return FALSE;
1784    if (len > c->dimensions) len = c->dimensions;
1785 
1786 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1787    if (c->lookup_type == 1) {
1788       float last = CODEBOOK_ELEMENT_BASE(c);
1789       int div = 1;
1790       for (i=0; i < len; ++i) {
1791          int off = (z / div) % c->lookup_values;
1792          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1793          output[i] += val;
1794          if (c->sequence_p) last = val + c->minimum_value;
1795          div *= c->lookup_values;
1796       }
1797       return TRUE;
1798    }
1799 #endif
1800 
1801    z *= c->dimensions;
1802    if (c->sequence_p) {
1803       float last = CODEBOOK_ELEMENT_BASE(c);
1804       for (i=0; i < len; ++i) {
1805          float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1806          output[i] += val;
1807          last = val + c->minimum_value;
1808       }
1809    } else {
1810       float last = CODEBOOK_ELEMENT_BASE(c);
1811       for (i=0; i < len; ++i) {
1812          output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1813       }
1814    }
1815 
1816    return TRUE;
1817 }
1818 
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1819 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1820 {
1821    int i,z = codebook_decode_start(f,c);
1822    float last = CODEBOOK_ELEMENT_BASE(c);
1823    if (z < 0) return FALSE;
1824    if (len > c->dimensions) len = c->dimensions;
1825 
1826 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1827    if (c->lookup_type == 1) {
1828       int div = 1;
1829       for (i=0; i < len; ++i) {
1830          int off = (z / div) % c->lookup_values;
1831          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1832          output[i*step] += val;
1833          if (c->sequence_p) last = val;
1834          div *= c->lookup_values;
1835       }
1836       return TRUE;
1837    }
1838 #endif
1839 
1840    z *= c->dimensions;
1841    for (i=0; i < len; ++i) {
1842       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1843       output[i*step] += val;
1844       if (c->sequence_p) last = val;
1845    }
1846 
1847    return TRUE;
1848 }
1849 
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1850 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1851 {
1852    int c_inter = *c_inter_p;
1853    int p_inter = *p_inter_p;
1854    int i,z, effective = c->dimensions;
1855 
1856    // type 0 is only legal in a scalar context
1857    if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1858 
1859    while (total_decode > 0) {
1860       float last = CODEBOOK_ELEMENT_BASE(c);
1861       DECODE_VQ(z,f,c);
1862       #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1863       assert(!c->sparse || z < c->sorted_entries);
1864       #endif
1865       if (z < 0) {
1866          if (!f->bytes_in_seg)
1867             if (f->last_seg) return FALSE;
1868          return error(f, VORBIS_invalid_stream);
1869       }
1870 
1871       // if this will take us off the end of the buffers, stop short!
1872       // we check by computing the length of the virtual interleaved
1873       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1874       // and the length we'll be using (effective)
1875       if (c_inter + p_inter*ch + effective > len * ch) {
1876          effective = len*ch - (p_inter*ch - c_inter);
1877       }
1878 
1879    #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1880       if (c->lookup_type == 1) {
1881          int div = 1;
1882          for (i=0; i < effective; ++i) {
1883             int off = (z / div) % c->lookup_values;
1884             float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1885             if (outputs[c_inter])
1886                outputs[c_inter][p_inter] += val;
1887             if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1888             if (c->sequence_p) last = val;
1889             div *= c->lookup_values;
1890          }
1891       } else
1892    #endif
1893       {
1894          z *= c->dimensions;
1895          if (c->sequence_p) {
1896             for (i=0; i < effective; ++i) {
1897                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1898                if (outputs[c_inter])
1899                   outputs[c_inter][p_inter] += val;
1900                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1901                last = val;
1902             }
1903          } else {
1904             for (i=0; i < effective; ++i) {
1905                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1906                if (outputs[c_inter])
1907                   outputs[c_inter][p_inter] += val;
1908                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1909             }
1910          }
1911       }
1912 
1913       total_decode -= effective;
1914    }
1915    *c_inter_p = c_inter;
1916    *p_inter_p = p_inter;
1917    return TRUE;
1918 }
1919 
predict_point(int x,int x0,int x1,int y0,int y1)1920 static int predict_point(int x, int x0, int x1, int y0, int y1)
1921 {
1922    int dy = y1 - y0;
1923    int adx = x1 - x0;
1924    // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1925    int err = abs(dy) * (x - x0);
1926    int off = err / adx;
1927    return dy < 0 ? y0 - off : y0 + off;
1928 }
1929 
1930 // the following table is block-copied from the specification
1931 static float inverse_db_table[256] =
1932 {
1933   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1934   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1935   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1936   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1937   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1938   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1939   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1940   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1941   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1942   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1943   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1944   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1945   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1946   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1947   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1948   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1949   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1950   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1951   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1952   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1953   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1954   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1955   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1956   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1957   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1958   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1959   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1960   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1961   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1962   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1963   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1964   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1965   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1966   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1967   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1968   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1969   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1970   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1971   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1972   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1973   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1974   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1975   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1976   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1977   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1978   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1979   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1980   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1981   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1982   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1983   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1984   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1985   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1986   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1987   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1988   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1989   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1990   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1991   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1992   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1993   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1994   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1995   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1996   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1997 };
1998 
1999 
2000 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
2001 // note that you must produce bit-identical output to decode correctly;
2002 // this specific sequence of operations is specified in the spec (it's
2003 // drawing integer-quantized frequency-space lines that the encoder
2004 // expects to be exactly the same)
2005 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
2006 // have to divide in the setup? sigh.
2007 #ifndef STB_VORBIS_NO_DEFER_FLOOR
2008 #define LINE_OP(a,b)   a *= b
2009 #else
2010 #define LINE_OP(a,b)   a = b
2011 #endif
2012 
2013 #ifdef STB_VORBIS_DIVIDE_TABLE
2014 #define DIVTAB_NUMER   32
2015 #define DIVTAB_DENOM   64
2016 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
2017 #endif
2018 
draw_line(float * output,int x0,int y0,int x1,int y1,int n)2019 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
2020 {
2021    int dy = y1 - y0;
2022    int adx = x1 - x0;
2023    int ady = abs(dy);
2024    int base;
2025    int x=x0,y=y0;
2026    int err = 0;
2027    int sy;
2028 
2029 #ifdef STB_VORBIS_DIVIDE_TABLE
2030    if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
2031       if (dy < 0) {
2032          base = -integer_divide_table[ady][adx];
2033          sy = base-1;
2034       } else {
2035          base =  integer_divide_table[ady][adx];
2036          sy = base+1;
2037       }
2038    } else {
2039       base = dy / adx;
2040       if (dy < 0)
2041          sy = base - 1;
2042       else
2043          sy = base+1;
2044    }
2045 #else
2046    base = dy / adx;
2047    if (dy < 0)
2048       sy = base - 1;
2049    else
2050       sy = base+1;
2051 #endif
2052    ady -= abs(base) * adx;
2053    if (x1 > n) x1 = n;
2054    if (x < x1) {
2055       LINE_OP(output[x], inverse_db_table[y&255]);
2056       for (++x; x < x1; ++x) {
2057          err += ady;
2058          if (err >= adx) {
2059             err -= adx;
2060             y += sy;
2061          } else
2062             y += base;
2063          LINE_OP(output[x], inverse_db_table[y&255]);
2064       }
2065    }
2066 }
2067 
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)2068 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2069 {
2070    int k;
2071    if (rtype == 0) {
2072       int step = n / book->dimensions;
2073       for (k=0; k < step; ++k)
2074          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2075             return FALSE;
2076    } else {
2077       for (k=0; k < n; ) {
2078          if (!codebook_decode(f, book, target+offset, n-k))
2079             return FALSE;
2080          k += book->dimensions;
2081          offset += book->dimensions;
2082       }
2083    }
2084    return TRUE;
2085 }
2086 
2087 // n is 1/2 of the blocksize --
2088 // specification: "Correct per-vector decode length is [n]/2"
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8 * do_not_decode)2089 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2090 {
2091    int i,j,pass;
2092    Residue *r = f->residue_config + rn;
2093    int rtype = f->residue_types[rn];
2094    int c = r->classbook;
2095    int classwords = f->codebooks[c].dimensions;
2096    unsigned int actual_size = rtype == 2 ? n*2 : n;
2097    unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
2098    unsigned int limit_r_end   = (r->end   < actual_size ? r->end   : actual_size);
2099    int n_read = limit_r_end - limit_r_begin;
2100    int part_read = n_read / r->part_size;
2101    int temp_alloc_point = temp_alloc_save(f);
2102    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2103    uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2104    #else
2105    int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2106    #endif
2107 
2108    CHECK(f);
2109 
2110    for (i=0; i < ch; ++i)
2111       if (!do_not_decode[i])
2112          memset(residue_buffers[i], 0, sizeof(float) * n);
2113 
2114    if (rtype == 2 && ch != 1) {
2115       for (j=0; j < ch; ++j)
2116          if (!do_not_decode[j])
2117             break;
2118       if (j == ch)
2119          goto done;
2120 
2121       for (pass=0; pass < 8; ++pass) {
2122          int pcount = 0, class_set = 0;
2123          if (ch == 2) {
2124             while (pcount < part_read) {
2125                int z = r->begin + pcount*r->part_size;
2126                int c_inter = (z & 1), p_inter = z>>1;
2127                if (pass == 0) {
2128                   Codebook *c = f->codebooks+r->classbook;
2129                   int q;
2130                   DECODE(q,f,c);
2131                   if (q == EOP) goto done;
2132                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2133                   part_classdata[0][class_set] = r->classdata[q];
2134                   #else
2135                   for (i=classwords-1; i >= 0; --i) {
2136                      classifications[0][i+pcount] = q % r->classifications;
2137                      q /= r->classifications;
2138                   }
2139                   #endif
2140                }
2141                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2142                   int z = r->begin + pcount*r->part_size;
2143                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2144                   int c = part_classdata[0][class_set][i];
2145                   #else
2146                   int c = classifications[0][pcount];
2147                   #endif
2148                   int b = r->residue_books[c][pass];
2149                   if (b >= 0) {
2150                      Codebook *book = f->codebooks + b;
2151                      #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2152                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2153                         goto done;
2154                      #else
2155                      // saves 1%
2156                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2157                         goto done;
2158                      #endif
2159                   } else {
2160                      z += r->part_size;
2161                      c_inter = z & 1;
2162                      p_inter = z >> 1;
2163                   }
2164                }
2165                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2166                ++class_set;
2167                #endif
2168             }
2169          } else if (ch > 2) {
2170             while (pcount < part_read) {
2171                int z = r->begin + pcount*r->part_size;
2172                int c_inter = z % ch, p_inter = z/ch;
2173                if (pass == 0) {
2174                   Codebook *c = f->codebooks+r->classbook;
2175                   int q;
2176                   DECODE(q,f,c);
2177                   if (q == EOP) goto done;
2178                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2179                   part_classdata[0][class_set] = r->classdata[q];
2180                   #else
2181                   for (i=classwords-1; i >= 0; --i) {
2182                      classifications[0][i+pcount] = q % r->classifications;
2183                      q /= r->classifications;
2184                   }
2185                   #endif
2186                }
2187                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2188                   int z = r->begin + pcount*r->part_size;
2189                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2190                   int c = part_classdata[0][class_set][i];
2191                   #else
2192                   int c = classifications[0][pcount];
2193                   #endif
2194                   int b = r->residue_books[c][pass];
2195                   if (b >= 0) {
2196                      Codebook *book = f->codebooks + b;
2197                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2198                         goto done;
2199                   } else {
2200                      z += r->part_size;
2201                      c_inter = z % ch;
2202                      p_inter = z / ch;
2203                   }
2204                }
2205                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2206                ++class_set;
2207                #endif
2208             }
2209          }
2210       }
2211       goto done;
2212    }
2213    CHECK(f);
2214 
2215    for (pass=0; pass < 8; ++pass) {
2216       int pcount = 0, class_set=0;
2217       while (pcount < part_read) {
2218          if (pass == 0) {
2219             for (j=0; j < ch; ++j) {
2220                if (!do_not_decode[j]) {
2221                   Codebook *c = f->codebooks+r->classbook;
2222                   int temp;
2223                   DECODE(temp,f,c);
2224                   if (temp == EOP) goto done;
2225                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2226                   part_classdata[j][class_set] = r->classdata[temp];
2227                   #else
2228                   for (i=classwords-1; i >= 0; --i) {
2229                      classifications[j][i+pcount] = temp % r->classifications;
2230                      temp /= r->classifications;
2231                   }
2232                   #endif
2233                }
2234             }
2235          }
2236          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2237             for (j=0; j < ch; ++j) {
2238                if (!do_not_decode[j]) {
2239                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2240                   int c = part_classdata[j][class_set][i];
2241                   #else
2242                   int c = classifications[j][pcount];
2243                   #endif
2244                   int b = r->residue_books[c][pass];
2245                   if (b >= 0) {
2246                      float *target = residue_buffers[j];
2247                      int offset = r->begin + pcount * r->part_size;
2248                      int n = r->part_size;
2249                      Codebook *book = f->codebooks + b;
2250                      if (!residue_decode(f, book, target, offset, n, rtype))
2251                         goto done;
2252                   }
2253                }
2254             }
2255          }
2256          #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2257          ++class_set;
2258          #endif
2259       }
2260    }
2261   done:
2262    CHECK(f);
2263    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2264    temp_free(f,part_classdata);
2265    #else
2266    temp_free(f,classifications);
2267    #endif
2268    temp_alloc_restore(f,temp_alloc_point);
2269 }
2270 
2271 
2272 #if 0
2273 // slow way for debugging
2274 void inverse_mdct_slow(float *buffer, int n)
2275 {
2276    int i,j;
2277    int n2 = n >> 1;
2278    float *x = (float *) malloc(sizeof(*x) * n2);
2279    memcpy(x, buffer, sizeof(*x) * n2);
2280    for (i=0; i < n; ++i) {
2281       float acc = 0;
2282       for (j=0; j < n2; ++j)
2283          // formula from paper:
2284          //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2285          // formula from wikipedia
2286          //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2287          // these are equivalent, except the formula from the paper inverts the multiplier!
2288          // however, what actually works is NO MULTIPLIER!?!
2289          //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2290          acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2291       buffer[i] = acc;
2292    }
2293    free(x);
2294 }
2295 #elif 0
2296 // same as above, but just barely able to run in real time on modern machines
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2297 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2298 {
2299    float mcos[16384];
2300    int i,j;
2301    int n2 = n >> 1, nmask = (n << 2) -1;
2302    float *x = (float *) malloc(sizeof(*x) * n2);
2303    memcpy(x, buffer, sizeof(*x) * n2);
2304    for (i=0; i < 4*n; ++i)
2305       mcos[i] = (float) cos(M_PI / 2 * i / n);
2306 
2307    for (i=0; i < n; ++i) {
2308       float acc = 0;
2309       for (j=0; j < n2; ++j)
2310          acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2311       buffer[i] = acc;
2312    }
2313    free(x);
2314 }
2315 #elif 0
2316 // transform to use a slow dct-iv; this is STILL basically trivial,
2317 // but only requires half as many ops
dct_iv_slow(float * buffer,int n)2318 void dct_iv_slow(float *buffer, int n)
2319 {
2320    float mcos[16384];
2321    float x[2048];
2322    int i,j;
2323    int n2 = n >> 1, nmask = (n << 3) - 1;
2324    memcpy(x, buffer, sizeof(*x) * n);
2325    for (i=0; i < 8*n; ++i)
2326       mcos[i] = (float) cos(M_PI / 4 * i / n);
2327    for (i=0; i < n; ++i) {
2328       float acc = 0;
2329       for (j=0; j < n; ++j)
2330          acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2331       buffer[i] = acc;
2332    }
2333 }
2334 
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2335 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2336 {
2337    int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2338    float temp[4096];
2339 
2340    memcpy(temp, buffer, n2 * sizeof(float));
2341    dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
2342 
2343    for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
2344    for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
2345    for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
2346 }
2347 #endif
2348 
2349 #ifndef LIBVORBIS_MDCT
2350 #define LIBVORBIS_MDCT 0
2351 #endif
2352 
2353 #if LIBVORBIS_MDCT
2354 // directly call the vorbis MDCT using an interface documented
2355 // by Jeff Roberts... useful for performance comparison
2356 typedef struct
2357 {
2358   int n;
2359   int log2n;
2360 
2361   float *trig;
2362   int   *bitrev;
2363 
2364   float scale;
2365 } mdct_lookup;
2366 
2367 extern void mdct_init(mdct_lookup *lookup, int n);
2368 extern void mdct_clear(mdct_lookup *l);
2369 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2370 
2371 mdct_lookup M1,M2;
2372 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2373 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2374 {
2375    mdct_lookup *M;
2376    if (M1.n == n) M = &M1;
2377    else if (M2.n == n) M = &M2;
2378    else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2379    else {
2380       if (M2.n) __asm int 3;
2381       mdct_init(&M2, n);
2382       M = &M2;
2383    }
2384 
2385    mdct_backward(M, buffer, buffer);
2386 }
2387 #endif
2388 
2389 
2390 // the following were split out into separate functions while optimizing;
2391 // they could be pushed back up but eh. __forceinline showed no change;
2392 // they're probably already being inlined.
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)2393 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2394 {
2395    float *ee0 = e + i_off;
2396    float *ee2 = ee0 + k_off;
2397    int i;
2398 
2399    assert((n & 3) == 0);
2400    for (i=(n>>2); i > 0; --i) {
2401       float k00_20, k01_21;
2402       k00_20  = ee0[ 0] - ee2[ 0];
2403       k01_21  = ee0[-1] - ee2[-1];
2404       ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2405       ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2406       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2407       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2408       A += 8;
2409 
2410       k00_20  = ee0[-2] - ee2[-2];
2411       k01_21  = ee0[-3] - ee2[-3];
2412       ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2413       ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2414       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2415       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2416       A += 8;
2417 
2418       k00_20  = ee0[-4] - ee2[-4];
2419       k01_21  = ee0[-5] - ee2[-5];
2420       ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2421       ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2422       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2423       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2424       A += 8;
2425 
2426       k00_20  = ee0[-6] - ee2[-6];
2427       k01_21  = ee0[-7] - ee2[-7];
2428       ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2429       ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2430       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2431       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2432       A += 8;
2433       ee0 -= 8;
2434       ee2 -= 8;
2435    }
2436 }
2437 
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)2438 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2439 {
2440    int i;
2441    float k00_20, k01_21;
2442 
2443    float *e0 = e + d0;
2444    float *e2 = e0 + k_off;
2445 
2446    for (i=lim >> 2; i > 0; --i) {
2447       k00_20 = e0[-0] - e2[-0];
2448       k01_21 = e0[-1] - e2[-1];
2449       e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2450       e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2451       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2452       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2453 
2454       A += k1;
2455 
2456       k00_20 = e0[-2] - e2[-2];
2457       k01_21 = e0[-3] - e2[-3];
2458       e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2459       e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2460       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2461       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2462 
2463       A += k1;
2464 
2465       k00_20 = e0[-4] - e2[-4];
2466       k01_21 = e0[-5] - e2[-5];
2467       e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2468       e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2469       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2470       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2471 
2472       A += k1;
2473 
2474       k00_20 = e0[-6] - e2[-6];
2475       k01_21 = e0[-7] - e2[-7];
2476       e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2477       e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2478       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2479       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2480 
2481       e0 -= 8;
2482       e2 -= 8;
2483 
2484       A += k1;
2485    }
2486 }
2487 
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)2488 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2489 {
2490    int i;
2491    float A0 = A[0];
2492    float A1 = A[0+1];
2493    float A2 = A[0+a_off];
2494    float A3 = A[0+a_off+1];
2495    float A4 = A[0+a_off*2+0];
2496    float A5 = A[0+a_off*2+1];
2497    float A6 = A[0+a_off*3+0];
2498    float A7 = A[0+a_off*3+1];
2499 
2500    float k00,k11;
2501 
2502    float *ee0 = e  +i_off;
2503    float *ee2 = ee0+k_off;
2504 
2505    for (i=n; i > 0; --i) {
2506       k00     = ee0[ 0] - ee2[ 0];
2507       k11     = ee0[-1] - ee2[-1];
2508       ee0[ 0] =  ee0[ 0] + ee2[ 0];
2509       ee0[-1] =  ee0[-1] + ee2[-1];
2510       ee2[ 0] = (k00) * A0 - (k11) * A1;
2511       ee2[-1] = (k11) * A0 + (k00) * A1;
2512 
2513       k00     = ee0[-2] - ee2[-2];
2514       k11     = ee0[-3] - ee2[-3];
2515       ee0[-2] =  ee0[-2] + ee2[-2];
2516       ee0[-3] =  ee0[-3] + ee2[-3];
2517       ee2[-2] = (k00) * A2 - (k11) * A3;
2518       ee2[-3] = (k11) * A2 + (k00) * A3;
2519 
2520       k00     = ee0[-4] - ee2[-4];
2521       k11     = ee0[-5] - ee2[-5];
2522       ee0[-4] =  ee0[-4] + ee2[-4];
2523       ee0[-5] =  ee0[-5] + ee2[-5];
2524       ee2[-4] = (k00) * A4 - (k11) * A5;
2525       ee2[-5] = (k11) * A4 + (k00) * A5;
2526 
2527       k00     = ee0[-6] - ee2[-6];
2528       k11     = ee0[-7] - ee2[-7];
2529       ee0[-6] =  ee0[-6] + ee2[-6];
2530       ee0[-7] =  ee0[-7] + ee2[-7];
2531       ee2[-6] = (k00) * A6 - (k11) * A7;
2532       ee2[-7] = (k11) * A6 + (k00) * A7;
2533 
2534       ee0 -= k0;
2535       ee2 -= k0;
2536    }
2537 }
2538 
iter_54(float * z)2539 static __forceinline void iter_54(float *z)
2540 {
2541    float k00,k11,k22,k33;
2542    float y0,y1,y2,y3;
2543 
2544    k00  = z[ 0] - z[-4];
2545    y0   = z[ 0] + z[-4];
2546    y2   = z[-2] + z[-6];
2547    k22  = z[-2] - z[-6];
2548 
2549    z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
2550    z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
2551 
2552    // done with y0,y2
2553 
2554    k33  = z[-3] - z[-7];
2555 
2556    z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
2557    z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
2558 
2559    // done with k33
2560 
2561    k11  = z[-1] - z[-5];
2562    y1   = z[-1] + z[-5];
2563    y3   = z[-3] + z[-7];
2564 
2565    z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
2566    z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
2567    z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
2568    z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
2569 }
2570 
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)2571 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2572 {
2573    int a_off = base_n >> 3;
2574    float A2 = A[0+a_off];
2575    float *z = e + i_off;
2576    float *base = z - 16 * n;
2577 
2578    while (z > base) {
2579       float k00,k11;
2580 
2581       k00   = z[-0] - z[-8];
2582       k11   = z[-1] - z[-9];
2583       z[-0] = z[-0] + z[-8];
2584       z[-1] = z[-1] + z[-9];
2585       z[-8] =  k00;
2586       z[-9] =  k11 ;
2587 
2588       k00    = z[ -2] - z[-10];
2589       k11    = z[ -3] - z[-11];
2590       z[ -2] = z[ -2] + z[-10];
2591       z[ -3] = z[ -3] + z[-11];
2592       z[-10] = (k00+k11) * A2;
2593       z[-11] = (k11-k00) * A2;
2594 
2595       k00    = z[-12] - z[ -4];  // reverse to avoid a unary negation
2596       k11    = z[ -5] - z[-13];
2597       z[ -4] = z[ -4] + z[-12];
2598       z[ -5] = z[ -5] + z[-13];
2599       z[-12] = k11;
2600       z[-13] = k00;
2601 
2602       k00    = z[-14] - z[ -6];  // reverse to avoid a unary negation
2603       k11    = z[ -7] - z[-15];
2604       z[ -6] = z[ -6] + z[-14];
2605       z[ -7] = z[ -7] + z[-15];
2606       z[-14] = (k00+k11) * A2;
2607       z[-15] = (k00-k11) * A2;
2608 
2609       iter_54(z);
2610       iter_54(z-8);
2611       z -= 16;
2612    }
2613 }
2614 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2615 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2616 {
2617    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2618    int ld;
2619    // @OPTIMIZE: reduce register pressure by using fewer variables?
2620    int save_point = temp_alloc_save(f);
2621    float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2622    float *u=NULL,*v=NULL;
2623    // twiddle factors
2624    float *A = f->A[blocktype];
2625 
2626    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2627    // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2628 
2629    // kernel from paper
2630 
2631 
2632    // merged:
2633    //   copy and reflect spectral data
2634    //   step 0
2635 
2636    // note that it turns out that the items added together during
2637    // this step are, in fact, being added to themselves (as reflected
2638    // by step 0). inexplicable inefficiency! this became obvious
2639    // once I combined the passes.
2640 
2641    // so there's a missing 'times 2' here (for adding X to itself).
2642    // this propagates through linearly to the end, where the numbers
2643    // are 1/2 too small, and need to be compensated for.
2644 
2645    {
2646       float *d,*e, *AA, *e_stop;
2647       d = &buf2[n2-2];
2648       AA = A;
2649       e = &buffer[0];
2650       e_stop = &buffer[n2];
2651       while (e != e_stop) {
2652          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2653          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2654          d -= 2;
2655          AA += 2;
2656          e += 4;
2657       }
2658 
2659       e = &buffer[n2-3];
2660       while (d >= buf2) {
2661          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2662          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2663          d -= 2;
2664          AA += 2;
2665          e -= 4;
2666       }
2667    }
2668 
2669    // now we use symbolic names for these, so that we can
2670    // possibly swap their meaning as we change which operations
2671    // are in place
2672 
2673    u = buffer;
2674    v = buf2;
2675 
2676    // step 2    (paper output is w, now u)
2677    // this could be in place, but the data ends up in the wrong
2678    // place... _somebody_'s got to swap it, so this is nominated
2679    {
2680       float *AA = &A[n2-8];
2681       float *d0,*d1, *e0, *e1;
2682 
2683       e0 = &v[n4];
2684       e1 = &v[0];
2685 
2686       d0 = &u[n4];
2687       d1 = &u[0];
2688 
2689       while (AA >= A) {
2690          float v40_20, v41_21;
2691 
2692          v41_21 = e0[1] - e1[1];
2693          v40_20 = e0[0] - e1[0];
2694          d0[1]  = e0[1] + e1[1];
2695          d0[0]  = e0[0] + e1[0];
2696          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
2697          d1[0]  = v40_20*AA[4] + v41_21*AA[5];
2698 
2699          v41_21 = e0[3] - e1[3];
2700          v40_20 = e0[2] - e1[2];
2701          d0[3]  = e0[3] + e1[3];
2702          d0[2]  = e0[2] + e1[2];
2703          d1[3]  = v41_21*AA[0] - v40_20*AA[1];
2704          d1[2]  = v40_20*AA[0] + v41_21*AA[1];
2705 
2706          AA -= 8;
2707 
2708          d0 += 4;
2709          d1 += 4;
2710          e0 += 4;
2711          e1 += 4;
2712       }
2713    }
2714 
2715    // step 3
2716    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2717 
2718    // optimized step 3:
2719 
2720    // the original step3 loop can be nested r inside s or s inside r;
2721    // it's written originally as s inside r, but this is dumb when r
2722    // iterates many times, and s few. So I have two copies of it and
2723    // switch between them halfway.
2724 
2725    // this is iteration 0 of step 3
2726    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2727    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2728 
2729    // this is iteration 1 of step 3
2730    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2731    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2732    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2733    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2734 
2735    l=2;
2736    for (; l < (ld-3)>>1; ++l) {
2737       int k0 = n >> (l+2), k0_2 = k0>>1;
2738       int lim = 1 << (l+1);
2739       int i;
2740       for (i=0; i < lim; ++i)
2741          imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2742    }
2743 
2744    for (; l < ld-6; ++l) {
2745       int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2746       int rlim = n >> (l+6), r;
2747       int lim = 1 << (l+1);
2748       int i_off;
2749       float *A0 = A;
2750       i_off = n2-1;
2751       for (r=rlim; r > 0; --r) {
2752          imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2753          A0 += k1*4;
2754          i_off -= 8;
2755       }
2756    }
2757 
2758    // iterations with count:
2759    //   ld-6,-5,-4 all interleaved together
2760    //       the big win comes from getting rid of needless flops
2761    //         due to the constants on pass 5 & 4 being all 1 and 0;
2762    //       combining them to be simultaneous to improve cache made little difference
2763    imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2764 
2765    // output is u
2766 
2767    // step 4, 5, and 6
2768    // cannot be in-place because of step 5
2769    {
2770       uint16 *bitrev = f->bit_reverse[blocktype];
2771       // weirdly, I'd have thought reading sequentially and writing
2772       // erratically would have been better than vice-versa, but in
2773       // fact that's not what my testing showed. (That is, with
2774       // j = bitreverse(i), do you read i and write j, or read j and write i.)
2775 
2776       float *d0 = &v[n4-4];
2777       float *d1 = &v[n2-4];
2778       while (d0 >= v) {
2779          int k4;
2780 
2781          k4 = bitrev[0];
2782          d1[3] = u[k4+0];
2783          d1[2] = u[k4+1];
2784          d0[3] = u[k4+2];
2785          d0[2] = u[k4+3];
2786 
2787          k4 = bitrev[1];
2788          d1[1] = u[k4+0];
2789          d1[0] = u[k4+1];
2790          d0[1] = u[k4+2];
2791          d0[0] = u[k4+3];
2792 
2793          d0 -= 4;
2794          d1 -= 4;
2795          bitrev += 2;
2796       }
2797    }
2798    // (paper output is u, now v)
2799 
2800 
2801    // data must be in buf2
2802    assert(v == buf2);
2803 
2804    // step 7   (paper output is v, now v)
2805    // this is now in place
2806    {
2807       float *C = f->C[blocktype];
2808       float *d, *e;
2809 
2810       d = v;
2811       e = v + n2 - 4;
2812 
2813       while (d < e) {
2814          float a02,a11,b0,b1,b2,b3;
2815 
2816          a02 = d[0] - e[2];
2817          a11 = d[1] + e[3];
2818 
2819          b0 = C[1]*a02 + C[0]*a11;
2820          b1 = C[1]*a11 - C[0]*a02;
2821 
2822          b2 = d[0] + e[ 2];
2823          b3 = d[1] - e[ 3];
2824 
2825          d[0] = b2 + b0;
2826          d[1] = b3 + b1;
2827          e[2] = b2 - b0;
2828          e[3] = b1 - b3;
2829 
2830          a02 = d[2] - e[0];
2831          a11 = d[3] + e[1];
2832 
2833          b0 = C[3]*a02 + C[2]*a11;
2834          b1 = C[3]*a11 - C[2]*a02;
2835 
2836          b2 = d[2] + e[ 0];
2837          b3 = d[3] - e[ 1];
2838 
2839          d[2] = b2 + b0;
2840          d[3] = b3 + b1;
2841          e[0] = b2 - b0;
2842          e[1] = b1 - b3;
2843 
2844          C += 4;
2845          d += 4;
2846          e -= 4;
2847       }
2848    }
2849 
2850    // data must be in buf2
2851 
2852 
2853    // step 8+decode   (paper output is X, now buffer)
2854    // this generates pairs of data a la 8 and pushes them directly through
2855    // the decode kernel (pushing rather than pulling) to avoid having
2856    // to make another pass later
2857 
2858    // this cannot POSSIBLY be in place, so we refer to the buffers directly
2859 
2860    {
2861       float *d0,*d1,*d2,*d3;
2862 
2863       float *B = f->B[blocktype] + n2 - 8;
2864       float *e = buf2 + n2 - 8;
2865       d0 = &buffer[0];
2866       d1 = &buffer[n2-4];
2867       d2 = &buffer[n2];
2868       d3 = &buffer[n-4];
2869       while (e >= v) {
2870          float p0,p1,p2,p3;
2871 
2872          p3 =  e[6]*B[7] - e[7]*B[6];
2873          p2 = -e[6]*B[6] - e[7]*B[7];
2874 
2875          d0[0] =   p3;
2876          d1[3] = - p3;
2877          d2[0] =   p2;
2878          d3[3] =   p2;
2879 
2880          p1 =  e[4]*B[5] - e[5]*B[4];
2881          p0 = -e[4]*B[4] - e[5]*B[5];
2882 
2883          d0[1] =   p1;
2884          d1[2] = - p1;
2885          d2[1] =   p0;
2886          d3[2] =   p0;
2887 
2888          p3 =  e[2]*B[3] - e[3]*B[2];
2889          p2 = -e[2]*B[2] - e[3]*B[3];
2890 
2891          d0[2] =   p3;
2892          d1[1] = - p3;
2893          d2[2] =   p2;
2894          d3[1] =   p2;
2895 
2896          p1 =  e[0]*B[1] - e[1]*B[0];
2897          p0 = -e[0]*B[0] - e[1]*B[1];
2898 
2899          d0[3] =   p1;
2900          d1[0] = - p1;
2901          d2[3] =   p0;
2902          d3[0] =   p0;
2903 
2904          B -= 8;
2905          e -= 8;
2906          d0 += 4;
2907          d2 += 4;
2908          d1 -= 4;
2909          d3 -= 4;
2910       }
2911    }
2912 
2913    temp_free(f,buf2);
2914    temp_alloc_restore(f,save_point);
2915 }
2916 
2917 #if 0
2918 // this is the original version of the above code, if you want to optimize it from scratch
2919 void inverse_mdct_naive(float *buffer, int n)
2920 {
2921    float s;
2922    float A[1 << 12], B[1 << 12], C[1 << 11];
2923    int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2924    int n3_4 = n - n4, ld;
2925    // how can they claim this only uses N words?!
2926    // oh, because they're only used sparsely, whoops
2927    float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2928    // set up twiddle factors
2929 
2930    for (k=k2=0; k < n4; ++k,k2+=2) {
2931       A[k2  ] = (float)  cos(4*k*M_PI/n);
2932       A[k2+1] = (float) -sin(4*k*M_PI/n);
2933       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2);
2934       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2);
2935    }
2936    for (k=k2=0; k < n8; ++k,k2+=2) {
2937       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
2938       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2939    }
2940 
2941    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2942    // Note there are bugs in that pseudocode, presumably due to them attempting
2943    // to rename the arrays nicely rather than representing the way their actual
2944    // implementation bounces buffers back and forth. As a result, even in the
2945    // "some formulars corrected" version, a direct implementation fails. These
2946    // are noted below as "paper bug".
2947 
2948    // copy and reflect spectral data
2949    for (k=0; k < n2; ++k) u[k] = buffer[k];
2950    for (   ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2951    // kernel from paper
2952    // step 1
2953    for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2954       v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2]   - (u[k4+2] - u[n-k4-3])*A[k2+1];
2955       v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2956    }
2957    // step 2
2958    for (k=k4=0; k < n8; k+=1, k4+=4) {
2959       w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2960       w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2961       w[k4+3]    = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2962       w[k4+1]    = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2963    }
2964    // step 3
2965    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2966    for (l=0; l < ld-3; ++l) {
2967       int k0 = n >> (l+2), k1 = 1 << (l+3);
2968       int rlim = n >> (l+4), r4, r;
2969       int s2lim = 1 << (l+2), s2;
2970       for (r=r4=0; r < rlim; r4+=4,++r) {
2971          for (s2=0; s2 < s2lim; s2+=2) {
2972             u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2973             u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2974             u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2975                                 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2976             u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2977                                 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2978          }
2979       }
2980       if (l+1 < ld-3) {
2981          // paper bug: ping-ponging of u&w here is omitted
2982          memcpy(w, u, sizeof(u));
2983       }
2984    }
2985 
2986    // step 4
2987    for (i=0; i < n8; ++i) {
2988       int j = bit_reverse(i) >> (32-ld+3);
2989       assert(j < n8);
2990       if (i == j) {
2991          // paper bug: original code probably swapped in place; if copying,
2992          //            need to directly copy in this case
2993          int i8 = i << 3;
2994          v[i8+1] = u[i8+1];
2995          v[i8+3] = u[i8+3];
2996          v[i8+5] = u[i8+5];
2997          v[i8+7] = u[i8+7];
2998       } else if (i < j) {
2999          int i8 = i << 3, j8 = j << 3;
3000          v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
3001          v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
3002          v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
3003          v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
3004       }
3005    }
3006    // step 5
3007    for (k=0; k < n2; ++k) {
3008       w[k] = v[k*2+1];
3009    }
3010    // step 6
3011    for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3012       u[n-1-k2] = w[k4];
3013       u[n-2-k2] = w[k4+1];
3014       u[n3_4 - 1 - k2] = w[k4+2];
3015       u[n3_4 - 2 - k2] = w[k4+3];
3016    }
3017    // step 7
3018    for (k=k2=0; k < n8; ++k, k2 += 2) {
3019       v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3020       v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3021       v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3022       v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3023    }
3024    // step 8
3025    for (k=k2=0; k < n4; ++k,k2 += 2) {
3026       X[k]      = v[k2+n2]*B[k2  ] + v[k2+1+n2]*B[k2+1];
3027       X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2  ];
3028    }
3029 
3030    // decode kernel to output
3031    // determined the following value experimentally
3032    // (by first figuring out what made inverse_mdct_slow work); then matching that here
3033    // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3034    s = 0.5; // theoretically would be n4
3035 
3036    // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3037    //     so it needs to use the "old" B values to behave correctly, or else
3038    //     set s to 1.0 ]]]
3039    for (i=0; i < n4  ; ++i) buffer[i] = s * X[i+n4];
3040    for (   ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3041    for (   ; i < n   ; ++i) buffer[i] = -s * X[i - n3_4];
3042 }
3043 #endif
3044 
get_window(vorb * f,int len)3045 static float *get_window(vorb *f, int len)
3046 {
3047    len <<= 1;
3048    if (len == f->blocksize_0) return f->window[0];
3049    if (len == f->blocksize_1) return f->window[1];
3050    return NULL;
3051 }
3052 
3053 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3054 typedef int16 YTYPE;
3055 #else
3056 typedef int YTYPE;
3057 #endif
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8 * step2_flag)3058 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3059 {
3060    int n2 = n >> 1;
3061    int s = map->chan[i].mux, floor;
3062    floor = map->submap_floor[s];
3063    if (f->floor_types[floor] == 0) {
3064       return error(f, VORBIS_invalid_stream);
3065    } else {
3066       Floor1 *g = &f->floor_config[floor].floor1;
3067       int j,q;
3068       int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3069       for (q=1; q < g->values; ++q) {
3070          j = g->sorted_order[q];
3071          #ifndef STB_VORBIS_NO_DEFER_FLOOR
3072          if (finalY[j] >= 0)
3073          #else
3074          if (step2_flag[j])
3075          #endif
3076          {
3077             int hy = finalY[j] * g->floor1_multiplier;
3078             int hx = g->Xlist[j];
3079             if (lx != hx)
3080                draw_line(target, lx,ly, hx,hy, n2);
3081             CHECK(f);
3082             lx = hx, ly = hy;
3083          }
3084       }
3085       if (lx < n2) {
3086          // optimization of: draw_line(target, lx,ly, n,ly, n2);
3087          for (j=lx; j < n2; ++j)
3088             LINE_OP(target[j], inverse_db_table[ly]);
3089          CHECK(f);
3090       }
3091    }
3092    return TRUE;
3093 }
3094 
3095 // The meaning of "left" and "right"
3096 //
3097 // For a given frame:
3098 //     we compute samples from 0..n
3099 //     window_center is n/2
3100 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
3101 //     all of the samples from left_end to right_start can be output without mixing; however,
3102 //        this interval is 0-length except when transitioning between short and long frames
3103 //     all of the samples from right_start to right_end need to be mixed with the next frame,
3104 //        which we don't have, so those get saved in a buffer
3105 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
3106 //        has to be the same as frame N+1's left_end-left_start (which they are by
3107 //        construction)
3108 
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)3109 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3110 {
3111    Mode *m;
3112    int i, n, prev, next, window_center;
3113    f->channel_buffer_start = f->channel_buffer_end = 0;
3114 
3115   retry:
3116    if (f->eof) return FALSE;
3117    if (!maybe_start_packet(f))
3118       return FALSE;
3119    // check packet type
3120    if (get_bits(f,1) != 0) {
3121       if (IS_PUSH_MODE(f))
3122          return error(f,VORBIS_bad_packet_type);
3123       while (EOP != get8_packet(f));
3124       goto retry;
3125    }
3126 
3127    if (f->alloc.alloc_buffer)
3128       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3129 
3130    i = get_bits(f, ilog(f->mode_count-1));
3131    if (i == EOP) return FALSE;
3132    if (i >= f->mode_count) return FALSE;
3133    *mode = i;
3134    m = f->mode_config + i;
3135    if (m->blockflag) {
3136       n = f->blocksize_1;
3137       prev = get_bits(f,1);
3138       next = get_bits(f,1);
3139    } else {
3140       prev = next = 0;
3141       n = f->blocksize_0;
3142    }
3143 
3144 // WINDOWING
3145 
3146    window_center = n >> 1;
3147    if (m->blockflag && !prev) {
3148       *p_left_start = (n - f->blocksize_0) >> 2;
3149       *p_left_end   = (n + f->blocksize_0) >> 2;
3150    } else {
3151       *p_left_start = 0;
3152       *p_left_end   = window_center;
3153    }
3154    if (m->blockflag && !next) {
3155       *p_right_start = (n*3 - f->blocksize_0) >> 2;
3156       *p_right_end   = (n*3 + f->blocksize_0) >> 2;
3157    } else {
3158       *p_right_start = window_center;
3159       *p_right_end   = n;
3160    }
3161 
3162    return TRUE;
3163 }
3164 
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)3165 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3166 {
3167    Mapping *map;
3168    int i,j,k,n,n2;
3169    int zero_channel[256];
3170    int really_zero_channel[256];
3171 
3172 // WINDOWING
3173 
3174    n = f->blocksize[m->blockflag];
3175    map = &f->mapping[m->mapping];
3176 
3177 // FLOORS
3178    n2 = n >> 1;
3179 
3180    CHECK(f);
3181 
3182    for (i=0; i < f->channels; ++i) {
3183       int s = map->chan[i].mux, floor;
3184       zero_channel[i] = FALSE;
3185       floor = map->submap_floor[s];
3186       if (f->floor_types[floor] == 0) {
3187          return error(f, VORBIS_invalid_stream);
3188       } else {
3189          Floor1 *g = &f->floor_config[floor].floor1;
3190          if (get_bits(f, 1)) {
3191             short *finalY;
3192             uint8 step2_flag[256];
3193             static int range_list[4] = { 256, 128, 86, 64 };
3194             int range = range_list[g->floor1_multiplier-1];
3195             int offset = 2;
3196             finalY = f->finalY[i];
3197             finalY[0] = get_bits(f, ilog(range)-1);
3198             finalY[1] = get_bits(f, ilog(range)-1);
3199             for (j=0; j < g->partitions; ++j) {
3200                int pclass = g->partition_class_list[j];
3201                int cdim = g->class_dimensions[pclass];
3202                int cbits = g->class_subclasses[pclass];
3203                int csub = (1 << cbits)-1;
3204                int cval = 0;
3205                if (cbits) {
3206                   Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3207                   DECODE(cval,f,c);
3208                }
3209                for (k=0; k < cdim; ++k) {
3210                   int book = g->subclass_books[pclass][cval & csub];
3211                   cval = cval >> cbits;
3212                   if (book >= 0) {
3213                      int temp;
3214                      Codebook *c = f->codebooks + book;
3215                      DECODE(temp,f,c);
3216                      finalY[offset++] = temp;
3217                   } else
3218                      finalY[offset++] = 0;
3219                }
3220             }
3221             if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3222             step2_flag[0] = step2_flag[1] = 1;
3223             for (j=2; j < g->values; ++j) {
3224                int low, high, pred, highroom, lowroom, room, val;
3225                low = g->neighbors[j][0];
3226                high = g->neighbors[j][1];
3227                //neighbors(g->Xlist, j, &low, &high);
3228                pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3229                val = finalY[j];
3230                highroom = range - pred;
3231                lowroom = pred;
3232                if (highroom < lowroom)
3233                   room = highroom * 2;
3234                else
3235                   room = lowroom * 2;
3236                if (val) {
3237                   step2_flag[low] = step2_flag[high] = 1;
3238                   step2_flag[j] = 1;
3239                   if (val >= room)
3240                      if (highroom > lowroom)
3241                         finalY[j] = val - lowroom + pred;
3242                      else
3243                         finalY[j] = pred - val + highroom - 1;
3244                   else
3245                      if (val & 1)
3246                         finalY[j] = pred - ((val+1)>>1);
3247                      else
3248                         finalY[j] = pred + (val>>1);
3249                } else {
3250                   step2_flag[j] = 0;
3251                   finalY[j] = pred;
3252                }
3253             }
3254 
3255 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3256             do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3257 #else
3258             // defer final floor computation until _after_ residue
3259             for (j=0; j < g->values; ++j) {
3260                if (!step2_flag[j])
3261                   finalY[j] = -1;
3262             }
3263 #endif
3264          } else {
3265            error:
3266             zero_channel[i] = TRUE;
3267          }
3268          // So we just defer everything else to later
3269 
3270          // at this point we've decoded the floor into buffer
3271       }
3272    }
3273    CHECK(f);
3274    // at this point we've decoded all floors
3275 
3276    if (f->alloc.alloc_buffer)
3277       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3278 
3279    // re-enable coupled channels if necessary
3280    memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3281    for (i=0; i < map->coupling_steps; ++i)
3282       if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3283          zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3284       }
3285 
3286    CHECK(f);
3287 // RESIDUE DECODE
3288    for (i=0; i < map->submaps; ++i) {
3289       float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3290       int r;
3291       uint8 do_not_decode[256];
3292       int ch = 0;
3293       for (j=0; j < f->channels; ++j) {
3294          if (map->chan[j].mux == i) {
3295             if (zero_channel[j]) {
3296                do_not_decode[ch] = TRUE;
3297                residue_buffers[ch] = NULL;
3298             } else {
3299                do_not_decode[ch] = FALSE;
3300                residue_buffers[ch] = f->channel_buffers[j];
3301             }
3302             ++ch;
3303          }
3304       }
3305       r = map->submap_residue[i];
3306       decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3307    }
3308 
3309    if (f->alloc.alloc_buffer)
3310       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3311    CHECK(f);
3312 
3313 // INVERSE COUPLING
3314    for (i = map->coupling_steps-1; i >= 0; --i) {
3315       int n2 = n >> 1;
3316       float *m = f->channel_buffers[map->chan[i].magnitude];
3317       float *a = f->channel_buffers[map->chan[i].angle    ];
3318       for (j=0; j < n2; ++j) {
3319          float a2,m2;
3320          if (m[j] > 0)
3321             if (a[j] > 0)
3322                m2 = m[j], a2 = m[j] - a[j];
3323             else
3324                a2 = m[j], m2 = m[j] + a[j];
3325          else
3326             if (a[j] > 0)
3327                m2 = m[j], a2 = m[j] + a[j];
3328             else
3329                a2 = m[j], m2 = m[j] - a[j];
3330          m[j] = m2;
3331          a[j] = a2;
3332       }
3333    }
3334    CHECK(f);
3335 
3336    // finish decoding the floors
3337 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3338    for (i=0; i < f->channels; ++i) {
3339       if (really_zero_channel[i]) {
3340          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3341       } else {
3342          do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3343       }
3344    }
3345 #else
3346    for (i=0; i < f->channels; ++i) {
3347       if (really_zero_channel[i]) {
3348          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3349       } else {
3350          for (j=0; j < n2; ++j)
3351             f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3352       }
3353    }
3354 #endif
3355 
3356 // INVERSE MDCT
3357    CHECK(f);
3358    for (i=0; i < f->channels; ++i)
3359       inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3360    CHECK(f);
3361 
3362    // this shouldn't be necessary, unless we exited on an error
3363    // and want to flush to get to the next packet
3364    flush_packet(f);
3365 
3366    if (f->first_decode) {
3367       // assume we start so first non-discarded sample is sample 0
3368       // this isn't to spec, but spec would require us to read ahead
3369       // and decode the size of all current frames--could be done,
3370       // but presumably it's not a commonly used feature
3371       f->current_loc = -n2; // start of first frame is positioned for discard
3372       // we might have to discard samples "from" the next frame too,
3373       // if we're lapping a large block then a small at the start?
3374       f->discard_samples_deferred = n - right_end;
3375       f->current_loc_valid = TRUE;
3376       f->first_decode = FALSE;
3377    } else if (f->discard_samples_deferred) {
3378       if (f->discard_samples_deferred >= right_start - left_start) {
3379          f->discard_samples_deferred -= (right_start - left_start);
3380          left_start = right_start;
3381          *p_left = left_start;
3382       } else {
3383          left_start += f->discard_samples_deferred;
3384          *p_left = left_start;
3385          f->discard_samples_deferred = 0;
3386       }
3387    } else if (f->previous_length == 0 && f->current_loc_valid) {
3388       // we're recovering from a seek... that means we're going to discard
3389       // the samples from this packet even though we know our position from
3390       // the last page header, so we need to update the position based on
3391       // the discarded samples here
3392       // but wait, the code below is going to add this in itself even
3393       // on a discard, so we don't need to do it here...
3394    }
3395 
3396    // check if we have ogg information about the sample # for this packet
3397    if (f->last_seg_which == f->end_seg_with_known_loc) {
3398       // if we have a valid current loc, and this is final:
3399       if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3400          uint32 current_end = f->known_loc_for_packet;
3401          // then let's infer the size of the (probably) short final frame
3402          if (current_end < f->current_loc + (right_end-left_start)) {
3403             if (current_end < f->current_loc) {
3404                // negative truncation, that's impossible!
3405                *len = 0;
3406             } else {
3407                *len = current_end - f->current_loc;
3408             }
3409             *len += left_start; // this doesn't seem right, but has no ill effect on my test files
3410             if (*len > right_end) *len = right_end; // this should never happen
3411             f->current_loc += *len;
3412             return TRUE;
3413          }
3414       }
3415       // otherwise, just set our sample loc
3416       // guess that the ogg granule pos refers to the _middle_ of the
3417       // last frame?
3418       // set f->current_loc to the position of left_start
3419       f->current_loc = f->known_loc_for_packet - (n2-left_start);
3420       f->current_loc_valid = TRUE;
3421    }
3422    if (f->current_loc_valid)
3423       f->current_loc += (right_start - left_start);
3424 
3425    if (f->alloc.alloc_buffer)
3426       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3427    *len = right_end;  // ignore samples after the window goes to 0
3428    CHECK(f);
3429 
3430    return TRUE;
3431 }
3432 
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)3433 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3434 {
3435    int mode, left_end, right_end;
3436    if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3437    return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3438 }
3439 
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)3440 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3441 {
3442    int prev,i,j;
3443    // we use right&left (the start of the right- and left-window sin()-regions)
3444    // to determine how much to return, rather than inferring from the rules
3445    // (same result, clearer code); 'left' indicates where our sin() window
3446    // starts, therefore where the previous window's right edge starts, and
3447    // therefore where to start mixing from the previous buffer. 'right'
3448    // indicates where our sin() ending-window starts, therefore that's where
3449    // we start saving, and where our returned-data ends.
3450 
3451    // mixin from previous window
3452    if (f->previous_length) {
3453       int i,j, n = f->previous_length;
3454       float *w = get_window(f, n);
3455       if (w == NULL) return 0;
3456       for (i=0; i < f->channels; ++i) {
3457          for (j=0; j < n; ++j)
3458             f->channel_buffers[i][left+j] =
3459                f->channel_buffers[i][left+j]*w[    j] +
3460                f->previous_window[i][     j]*w[n-1-j];
3461       }
3462    }
3463 
3464    prev = f->previous_length;
3465 
3466    // last half of this data becomes previous window
3467    f->previous_length = len - right;
3468 
3469    // @OPTIMIZE: could avoid this copy by double-buffering the
3470    // output (flipping previous_window with channel_buffers), but
3471    // then previous_window would have to be 2x as large, and
3472    // channel_buffers couldn't be temp mem (although they're NOT
3473    // currently temp mem, they could be (unless we want to level
3474    // performance by spreading out the computation))
3475    for (i=0; i < f->channels; ++i)
3476       for (j=0; right+j < len; ++j)
3477          f->previous_window[i][j] = f->channel_buffers[i][right+j];
3478 
3479    if (!prev)
3480       // there was no previous packet, so this data isn't valid...
3481       // this isn't entirely true, only the would-have-overlapped data
3482       // isn't valid, but this seems to be what the spec requires
3483       return 0;
3484 
3485    // truncate a short frame
3486    if (len < right) right = len;
3487 
3488    f->samples_output += right-left;
3489 
3490    return right - left;
3491 }
3492 
vorbis_pump_first_frame(stb_vorbis * f)3493 static int vorbis_pump_first_frame(stb_vorbis *f)
3494 {
3495    int len, right, left, res;
3496    res = vorbis_decode_packet(f, &len, &left, &right);
3497    if (res)
3498       vorbis_finish_frame(f, len, left, right);
3499    return res;
3500 }
3501 
3502 #ifndef STB_VORBIS_NO_PUSHDATA_API
is_whole_packet_present(stb_vorbis * f)3503 static int is_whole_packet_present(stb_vorbis *f)
3504 {
3505    // make sure that we have the packet available before continuing...
3506    // this requires a full ogg parse, but we know we can fetch from f->stream
3507 
3508    // instead of coding this out explicitly, we could save the current read state,
3509    // read the next packet with get8() until end-of-packet, check f->eof, then
3510    // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3511    // of state to restore (primarily the page segment table)
3512 
3513    int s = f->next_seg, first = TRUE;
3514    uint8 *p = f->stream;
3515 
3516    if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3517       for (; s < f->segment_count; ++s) {
3518          p += f->segments[s];
3519          if (f->segments[s] < 255)               // stop at first short segment
3520             break;
3521       }
3522       // either this continues, or it ends it...
3523       if (s == f->segment_count)
3524          s = -1; // set 'crosses page' flag
3525       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3526       first = FALSE;
3527    }
3528    for (; s == -1;) {
3529       uint8 *q;
3530       int n;
3531 
3532       // check that we have the page header ready
3533       if (p + 26 >= f->stream_end)               return error(f, VORBIS_need_more_data);
3534       // validate the page
3535       if (memcmp(p, ogg_page_header, 4))         return error(f, VORBIS_invalid_stream);
3536       if (p[4] != 0)                             return error(f, VORBIS_invalid_stream);
3537       if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3538          if (f->previous_length)
3539             if ((p[5] & PAGEFLAG_continued_packet))  return error(f, VORBIS_invalid_stream);
3540          // if no previous length, we're resynching, so we can come in on a continued-packet,
3541          // which we'll just drop
3542       } else {
3543          if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3544       }
3545       n = p[26]; // segment counts
3546       q = p+27;  // q points to segment table
3547       p = q + n; // advance past header
3548       // make sure we've read the segment table
3549       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3550       for (s=0; s < n; ++s) {
3551          p += q[s];
3552          if (q[s] < 255)
3553             break;
3554       }
3555       if (s == n)
3556          s = -1; // set 'crosses page' flag
3557       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3558       first = FALSE;
3559    }
3560    return TRUE;
3561 }
3562 #endif // !STB_VORBIS_NO_PUSHDATA_API
3563 
start_decoder(vorb * f)3564 static int start_decoder(vorb *f)
3565 {
3566    uint8 header[6], x,y;
3567    int len,i,j,k, max_submaps = 0;
3568    int longest_floorlist=0;
3569 
3570    // first page, first packet
3571    f->first_decode = TRUE;
3572 
3573    if (!start_page(f))                              return FALSE;
3574    // validate page flag
3575    if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
3576    if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
3577    if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
3578    // check for expected packet length
3579    if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
3580    if (f->segments[0] != 30) {
3581       // check for the Ogg skeleton fishead identifying header to refine our error
3582       if (f->segments[0] == 64 &&
3583           getn(f, header, 6) &&
3584           header[0] == 'f' &&
3585           header[1] == 'i' &&
3586           header[2] == 's' &&
3587           header[3] == 'h' &&
3588           header[4] == 'e' &&
3589           header[5] == 'a' &&
3590           get8(f)   == 'd' &&
3591           get8(f)   == '\0')                        return error(f, VORBIS_ogg_skeleton_not_supported);
3592       else
3593                                                     return error(f, VORBIS_invalid_first_page);
3594    }
3595 
3596    // read packet
3597    // check packet header
3598    if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
3599    if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
3600    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
3601    // vorbis_version
3602    if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
3603    f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
3604    if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
3605    f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
3606    get32(f); // bitrate_maximum
3607    get32(f); // bitrate_nominal
3608    get32(f); // bitrate_minimum
3609    x = get8(f);
3610    {
3611       int log0,log1;
3612       log0 = x & 15;
3613       log1 = x >> 4;
3614       f->blocksize_0 = 1 << log0;
3615       f->blocksize_1 = 1 << log1;
3616       if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
3617       if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
3618       if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
3619    }
3620 
3621    // framing_flag
3622    x = get8(f);
3623    if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
3624 
3625    // second packet!
3626    if (!start_page(f))                              return FALSE;
3627 
3628    if (!start_packet(f))                            return FALSE;
3629 
3630    if (!next_segment(f))                            return FALSE;
3631 
3632    if (get8_packet(f) != VORBIS_packet_comment)            return error(f, VORBIS_invalid_setup);
3633    for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3634    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
3635    //file vendor
3636    len = get32_packet(f);
3637    f->vendor = (char*)setup_malloc(f, sizeof(char) * (len+1));
3638    if (f->vendor == NULL)                           return error(f, VORBIS_outofmem);
3639    for(i=0; i < len; ++i) {
3640       f->vendor[i] = get8_packet(f);
3641    }
3642    f->vendor[len] = (char)'\0';
3643    //user comments
3644    f->comment_list_length = get32_packet(f);
3645    f->comment_list = (char**)setup_malloc(f, sizeof(char*) * (f->comment_list_length));
3646    if (f->comment_list == NULL)                     return error(f, VORBIS_outofmem);
3647 
3648    for(i=0; i < f->comment_list_length; ++i) {
3649       len = get32_packet(f);
3650       f->comment_list[i] = (char*)setup_malloc(f, sizeof(char) * (len+1));
3651       if (f->comment_list[i] == NULL)               return error(f, VORBIS_outofmem);
3652 
3653       for(j=0; j < len; ++j) {
3654          f->comment_list[i][j] = get8_packet(f);
3655       }
3656       f->comment_list[i][len] = (char)'\0';
3657    }
3658 
3659    // framing_flag
3660    x = get8_packet(f);
3661    if (!(x & 1))                                    return error(f, VORBIS_invalid_setup);
3662 
3663 
3664    skip(f, f->bytes_in_seg);
3665    f->bytes_in_seg = 0;
3666 
3667    do {
3668       len = next_segment(f);
3669       skip(f, len);
3670       f->bytes_in_seg = 0;
3671    } while (len);
3672 
3673    // third packet!
3674    if (!start_packet(f))                            return FALSE;
3675 
3676    #ifndef STB_VORBIS_NO_PUSHDATA_API
3677    if (IS_PUSH_MODE(f)) {
3678       if (!is_whole_packet_present(f)) {
3679          // convert error in ogg header to write type
3680          if (f->error == VORBIS_invalid_stream)
3681             f->error = VORBIS_invalid_setup;
3682          return FALSE;
3683       }
3684    }
3685    #endif
3686 
3687    crc32_init(); // always init it, to avoid multithread race conditions
3688 
3689    if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
3690    for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3691    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
3692 
3693    // codebooks
3694 
3695    f->codebook_count = get_bits(f,8) + 1;
3696    f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3697    if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
3698    memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3699    for (i=0; i < f->codebook_count; ++i) {
3700       uint32 *values;
3701       int ordered, sorted_count;
3702       int total=0;
3703       uint8 *lengths;
3704       Codebook *c = f->codebooks+i;
3705       CHECK(f);
3706       x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
3707       x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
3708       x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
3709       x = get_bits(f, 8);
3710       c->dimensions = (get_bits(f, 8)<<8) + x;
3711       x = get_bits(f, 8);
3712       y = get_bits(f, 8);
3713       c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3714       ordered = get_bits(f,1);
3715       c->sparse = ordered ? 0 : get_bits(f,1);
3716 
3717       if (c->dimensions == 0 && c->entries != 0)    return error(f, VORBIS_invalid_setup);
3718 
3719       if (c->sparse)
3720          lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3721       else
3722          lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3723 
3724       if (!lengths) return error(f, VORBIS_outofmem);
3725 
3726       if (ordered) {
3727          int current_entry = 0;
3728          int current_length = get_bits(f,5) + 1;
3729          while (current_entry < c->entries) {
3730             int limit = c->entries - current_entry;
3731             int n = get_bits(f, ilog(limit));
3732             if (current_length >= 32) return error(f, VORBIS_invalid_setup);
3733             if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3734             memset(lengths + current_entry, current_length, n);
3735             current_entry += n;
3736             ++current_length;
3737          }
3738       } else {
3739          for (j=0; j < c->entries; ++j) {
3740             int present = c->sparse ? get_bits(f,1) : 1;
3741             if (present) {
3742                lengths[j] = get_bits(f, 5) + 1;
3743                ++total;
3744                if (lengths[j] == 32)
3745                   return error(f, VORBIS_invalid_setup);
3746             } else {
3747                lengths[j] = NO_CODE;
3748             }
3749          }
3750       }
3751 
3752       if (c->sparse && total >= c->entries >> 2) {
3753          // convert sparse items to non-sparse!
3754          if (c->entries > (int) f->setup_temp_memory_required)
3755             f->setup_temp_memory_required = c->entries;
3756 
3757          c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3758          if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3759          memcpy(c->codeword_lengths, lengths, c->entries);
3760          setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3761          lengths = c->codeword_lengths;
3762          c->sparse = 0;
3763       }
3764 
3765       // compute the size of the sorted tables
3766       if (c->sparse) {
3767          sorted_count = total;
3768       } else {
3769          sorted_count = 0;
3770          #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3771          for (j=0; j < c->entries; ++j)
3772             if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3773                ++sorted_count;
3774          #endif
3775       }
3776 
3777       c->sorted_entries = sorted_count;
3778       values = NULL;
3779 
3780       CHECK(f);
3781       if (!c->sparse) {
3782          c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3783          if (!c->codewords)                  return error(f, VORBIS_outofmem);
3784       } else {
3785          unsigned int size;
3786          if (c->sorted_entries) {
3787             c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3788             if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
3789             c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3790             if (!c->codewords)                  return error(f, VORBIS_outofmem);
3791             values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3792             if (!values)                        return error(f, VORBIS_outofmem);
3793          }
3794          size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3795          if (size > f->setup_temp_memory_required)
3796             f->setup_temp_memory_required = size;
3797       }
3798 
3799       if (!compute_codewords(c, lengths, c->entries, values)) {
3800          if (c->sparse) setup_temp_free(f, values, 0);
3801          return error(f, VORBIS_invalid_setup);
3802       }
3803 
3804       if (c->sorted_entries) {
3805          // allocate an extra slot for sentinels
3806          c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3807          if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3808          // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3809          // so that we can catch that case without an extra if
3810          c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
3811          if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3812          ++c->sorted_values;
3813          c->sorted_values[-1] = -1;
3814          compute_sorted_huffman(c, lengths, values);
3815       }
3816 
3817       if (c->sparse) {
3818          setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3819          setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3820          setup_temp_free(f, lengths, c->entries);
3821          c->codewords = NULL;
3822       }
3823 
3824       compute_accelerated_huffman(c);
3825 
3826       CHECK(f);
3827       c->lookup_type = get_bits(f, 4);
3828       if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3829       if (c->lookup_type > 0) {
3830          uint16 *mults;
3831          c->minimum_value = float32_unpack(get_bits(f, 32));
3832          c->delta_value = float32_unpack(get_bits(f, 32));
3833          c->value_bits = get_bits(f, 4)+1;
3834          c->sequence_p = get_bits(f,1);
3835          if (c->lookup_type == 1) {
3836             int values = lookup1_values(c->entries, c->dimensions);
3837             if (values < 0) return error(f, VORBIS_invalid_setup);
3838             c->lookup_values = (uint32) values;
3839          } else {
3840             c->lookup_values = c->entries * c->dimensions;
3841          }
3842          if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3843          mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3844          if (mults == NULL) return error(f, VORBIS_outofmem);
3845          for (j=0; j < (int) c->lookup_values; ++j) {
3846             int q = get_bits(f, c->value_bits);
3847             if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3848             mults[j] = q;
3849          }
3850 
3851 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3852          if (c->lookup_type == 1) {
3853             int len, sparse = c->sparse;
3854             float last=0;
3855             // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3856             if (sparse) {
3857                if (c->sorted_entries == 0) goto skip;
3858                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3859             } else
3860                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
3861             if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3862             len = sparse ? c->sorted_entries : c->entries;
3863             for (j=0; j < len; ++j) {
3864                unsigned int z = sparse ? c->sorted_values[j] : j;
3865                unsigned int div=1;
3866                for (k=0; k < c->dimensions; ++k) {
3867                   int off = (z / div) % c->lookup_values;
3868                   float val = mults[off];
3869                   val = mults[off]*c->delta_value + c->minimum_value + last;
3870                   c->multiplicands[j*c->dimensions + k] = val;
3871                   if (c->sequence_p)
3872                      last = val;
3873                   if (k+1 < c->dimensions) {
3874                      if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3875                         setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3876                         return error(f, VORBIS_invalid_setup);
3877                      }
3878                      div *= c->lookup_values;
3879                   }
3880                }
3881             }
3882             c->lookup_type = 2;
3883          }
3884          else
3885 #endif
3886          {
3887             float last=0;
3888             CHECK(f);
3889             c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3890             if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3891             for (j=0; j < (int) c->lookup_values; ++j) {
3892                float val = mults[j] * c->delta_value + c->minimum_value + last;
3893                c->multiplicands[j] = val;
3894                if (c->sequence_p)
3895                   last = val;
3896             }
3897          }
3898 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3899         skip:;
3900 #endif
3901          setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3902 
3903          CHECK(f);
3904       }
3905       CHECK(f);
3906    }
3907 
3908    // time domain transfers (notused)
3909 
3910    x = get_bits(f, 6) + 1;
3911    for (i=0; i < x; ++i) {
3912       uint32 z = get_bits(f, 16);
3913       if (z != 0) return error(f, VORBIS_invalid_setup);
3914    }
3915 
3916    // Floors
3917    f->floor_count = get_bits(f, 6)+1;
3918    f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3919    if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3920    for (i=0; i < f->floor_count; ++i) {
3921       f->floor_types[i] = get_bits(f, 16);
3922       if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3923       if (f->floor_types[i] == 0) {
3924          Floor0 *g = &f->floor_config[i].floor0;
3925          g->order = get_bits(f,8);
3926          g->rate = get_bits(f,16);
3927          g->bark_map_size = get_bits(f,16);
3928          g->amplitude_bits = get_bits(f,6);
3929          g->amplitude_offset = get_bits(f,8);
3930          g->number_of_books = get_bits(f,4) + 1;
3931          for (j=0; j < g->number_of_books; ++j)
3932             g->book_list[j] = get_bits(f,8);
3933          return error(f, VORBIS_feature_not_supported);
3934       } else {
3935          stbv__floor_ordering p[31*8+2];
3936          Floor1 *g = &f->floor_config[i].floor1;
3937          int max_class = -1;
3938          g->partitions = get_bits(f, 5);
3939          for (j=0; j < g->partitions; ++j) {
3940             g->partition_class_list[j] = get_bits(f, 4);
3941             if (g->partition_class_list[j] > max_class)
3942                max_class = g->partition_class_list[j];
3943          }
3944          for (j=0; j <= max_class; ++j) {
3945             g->class_dimensions[j] = get_bits(f, 3)+1;
3946             g->class_subclasses[j] = get_bits(f, 2);
3947             if (g->class_subclasses[j]) {
3948                g->class_masterbooks[j] = get_bits(f, 8);
3949                if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3950             }
3951             for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3952                g->subclass_books[j][k] = get_bits(f,8)-1;
3953                if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3954             }
3955          }
3956          g->floor1_multiplier = get_bits(f,2)+1;
3957          g->rangebits = get_bits(f,4);
3958          g->Xlist[0] = 0;
3959          g->Xlist[1] = 1 << g->rangebits;
3960          g->values = 2;
3961          for (j=0; j < g->partitions; ++j) {
3962             int c = g->partition_class_list[j];
3963             for (k=0; k < g->class_dimensions[c]; ++k) {
3964                g->Xlist[g->values] = get_bits(f, g->rangebits);
3965                ++g->values;
3966             }
3967          }
3968          // precompute the sorting
3969          for (j=0; j < g->values; ++j) {
3970             p[j].x = g->Xlist[j];
3971             p[j].id = j;
3972          }
3973          qsort(p, g->values, sizeof(p[0]), point_compare);
3974          for (j=0; j < g->values-1; ++j)
3975             if (p[j].x == p[j+1].x)
3976                return error(f, VORBIS_invalid_setup);
3977          for (j=0; j < g->values; ++j)
3978             g->sorted_order[j] = (uint8) p[j].id;
3979          // precompute the neighbors
3980          for (j=2; j < g->values; ++j) {
3981             int low = 0,hi = 0;
3982             neighbors(g->Xlist, j, &low,&hi);
3983             g->neighbors[j][0] = low;
3984             g->neighbors[j][1] = hi;
3985          }
3986 
3987          if (g->values > longest_floorlist)
3988             longest_floorlist = g->values;
3989       }
3990    }
3991 
3992    // Residue
3993    f->residue_count = get_bits(f, 6)+1;
3994    f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3995    if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3996    memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3997    for (i=0; i < f->residue_count; ++i) {
3998       uint8 residue_cascade[64];
3999       Residue *r = f->residue_config+i;
4000       f->residue_types[i] = get_bits(f, 16);
4001       if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
4002       r->begin = get_bits(f, 24);
4003       r->end = get_bits(f, 24);
4004       if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
4005       r->part_size = get_bits(f,24)+1;
4006       r->classifications = get_bits(f,6)+1;
4007       r->classbook = get_bits(f,8);
4008       if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4009       for (j=0; j < r->classifications; ++j) {
4010          uint8 high_bits=0;
4011          uint8 low_bits=get_bits(f,3);
4012          if (get_bits(f,1))
4013             high_bits = get_bits(f,5);
4014          residue_cascade[j] = high_bits*8 + low_bits;
4015       }
4016       r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
4017       if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
4018       for (j=0; j < r->classifications; ++j) {
4019          for (k=0; k < 8; ++k) {
4020             if (residue_cascade[j] & (1 << k)) {
4021                r->residue_books[j][k] = get_bits(f, 8);
4022                if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4023             } else {
4024                r->residue_books[j][k] = -1;
4025             }
4026          }
4027       }
4028       // precompute the classifications[] array to avoid inner-loop mod/divide
4029       // call it 'classdata' since we already have r->classifications
4030       r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4031       if (!r->classdata) return error(f, VORBIS_outofmem);
4032       memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4033       for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
4034          int classwords = f->codebooks[r->classbook].dimensions;
4035          int temp = j;
4036          r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
4037          if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
4038          for (k=classwords-1; k >= 0; --k) {
4039             r->classdata[j][k] = temp % r->classifications;
4040             temp /= r->classifications;
4041          }
4042       }
4043    }
4044 
4045    f->mapping_count = get_bits(f,6)+1;
4046    f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
4047    if (f->mapping == NULL) return error(f, VORBIS_outofmem);
4048    memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
4049    for (i=0; i < f->mapping_count; ++i) {
4050       Mapping *m = f->mapping + i;
4051       int mapping_type = get_bits(f,16);
4052       if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
4053       m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
4054       if (m->chan == NULL) return error(f, VORBIS_outofmem);
4055       if (get_bits(f,1))
4056          m->submaps = get_bits(f,4)+1;
4057       else
4058          m->submaps = 1;
4059       if (m->submaps > max_submaps)
4060          max_submaps = m->submaps;
4061       if (get_bits(f,1)) {
4062          m->coupling_steps = get_bits(f,8)+1;
4063          if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
4064          for (k=0; k < m->coupling_steps; ++k) {
4065             m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4066             m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4067             if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
4068             if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
4069             if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
4070          }
4071       } else
4072          m->coupling_steps = 0;
4073 
4074       // reserved field
4075       if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4076       if (m->submaps > 1) {
4077          for (j=0; j < f->channels; ++j) {
4078             m->chan[j].mux = get_bits(f, 4);
4079             if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
4080          }
4081       } else
4082          // @SPECIFICATION: this case is missing from the spec
4083          for (j=0; j < f->channels; ++j)
4084             m->chan[j].mux = 0;
4085 
4086       for (j=0; j < m->submaps; ++j) {
4087          get_bits(f,8); // discard
4088          m->submap_floor[j] = get_bits(f,8);
4089          m->submap_residue[j] = get_bits(f,8);
4090          if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
4091          if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
4092       }
4093    }
4094 
4095    // Modes
4096    f->mode_count = get_bits(f, 6)+1;
4097    for (i=0; i < f->mode_count; ++i) {
4098       Mode *m = f->mode_config+i;
4099       m->blockflag = get_bits(f,1);
4100       m->windowtype = get_bits(f,16);
4101       m->transformtype = get_bits(f,16);
4102       m->mapping = get_bits(f,8);
4103       if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
4104       if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
4105       if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
4106    }
4107 
4108    flush_packet(f);
4109 
4110    f->previous_length = 0;
4111 
4112    for (i=0; i < f->channels; ++i) {
4113       f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4114       f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4115       f->finalY[i]          = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4116       if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4117       memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
4118       #ifdef STB_VORBIS_NO_DEFER_FLOOR
4119       f->floor_buffers[i]   = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4120       if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4121       #endif
4122    }
4123 
4124    if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4125    if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4126    f->blocksize[0] = f->blocksize_0;
4127    f->blocksize[1] = f->blocksize_1;
4128 
4129 #ifdef STB_VORBIS_DIVIDE_TABLE
4130    if (integer_divide_table[1][1]==0)
4131       for (i=0; i < DIVTAB_NUMER; ++i)
4132          for (j=1; j < DIVTAB_DENOM; ++j)
4133             integer_divide_table[i][j] = i / j;
4134 #endif
4135 
4136    // compute how much temporary memory is needed
4137 
4138    // 1.
4139    {
4140       uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4141       uint32 classify_mem;
4142       int i,max_part_read=0;
4143       for (i=0; i < f->residue_count; ++i) {
4144          Residue *r = f->residue_config + i;
4145          unsigned int actual_size = f->blocksize_1 / 2;
4146          unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
4147          unsigned int limit_r_end   = r->end   < actual_size ? r->end   : actual_size;
4148          int n_read = limit_r_end - limit_r_begin;
4149          int part_read = n_read / r->part_size;
4150          if (part_read > max_part_read)
4151             max_part_read = part_read;
4152       }
4153       #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4154       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4155       #else
4156       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4157       #endif
4158 
4159       // maximum reasonable partition size is f->blocksize_1
4160 
4161       f->temp_memory_required = classify_mem;
4162       if (imdct_mem > f->temp_memory_required)
4163          f->temp_memory_required = imdct_mem;
4164    }
4165 
4166 
4167    if (f->alloc.alloc_buffer) {
4168       assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4169       // check if there's enough temp memory so we don't error later
4170       if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4171          return error(f, VORBIS_outofmem);
4172    }
4173 
4174    // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page
4175    // without PAGEFLAG_continued_packet, so this either points to the first page, or
4176    // the page after the end of the headers. It might be cleaner to point to a page
4177    // in the middle of the headers, when that's the page where the first audio packet
4178    // starts, but we'd have to also correctly skip the end of any continued packet in
4179    // stb_vorbis_seek_start.
4180    if (f->next_seg == -1) {
4181       f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4182    } else {
4183       f->first_audio_page_offset = 0;
4184    }
4185 
4186    return TRUE;
4187 }
4188 
vorbis_deinit(stb_vorbis * p)4189 static void vorbis_deinit(stb_vorbis *p)
4190 {
4191    int i,j;
4192 
4193    setup_free(p, p->vendor);
4194    for (i=0; i < p->comment_list_length; ++i) {
4195       setup_free(p, p->comment_list[i]);
4196    }
4197    setup_free(p, p->comment_list);
4198 
4199    if (p->residue_config) {
4200       for (i=0; i < p->residue_count; ++i) {
4201          Residue *r = p->residue_config+i;
4202          if (r->classdata) {
4203             for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4204                setup_free(p, r->classdata[j]);
4205             setup_free(p, r->classdata);
4206          }
4207          setup_free(p, r->residue_books);
4208       }
4209    }
4210 
4211    if (p->codebooks) {
4212       CHECK(p);
4213       for (i=0; i < p->codebook_count; ++i) {
4214          Codebook *c = p->codebooks + i;
4215          setup_free(p, c->codeword_lengths);
4216          setup_free(p, c->multiplicands);
4217          setup_free(p, c->codewords);
4218          setup_free(p, c->sorted_codewords);
4219          // c->sorted_values[-1] is the first entry in the array
4220          setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4221       }
4222       setup_free(p, p->codebooks);
4223    }
4224    setup_free(p, p->floor_config);
4225    setup_free(p, p->residue_config);
4226    if (p->mapping) {
4227       for (i=0; i < p->mapping_count; ++i)
4228          setup_free(p, p->mapping[i].chan);
4229       setup_free(p, p->mapping);
4230    }
4231    CHECK(p);
4232    for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4233       setup_free(p, p->channel_buffers[i]);
4234       setup_free(p, p->previous_window[i]);
4235       #ifdef STB_VORBIS_NO_DEFER_FLOOR
4236       setup_free(p, p->floor_buffers[i]);
4237       #endif
4238       setup_free(p, p->finalY[i]);
4239    }
4240    for (i=0; i < 2; ++i) {
4241       setup_free(p, p->A[i]);
4242       setup_free(p, p->B[i]);
4243       setup_free(p, p->C[i]);
4244       setup_free(p, p->window[i]);
4245       setup_free(p, p->bit_reverse[i]);
4246    }
4247    #ifndef STB_VORBIS_NO_STDIO
4248    if (p->close_on_free) fclose(p->f);
4249    #endif
4250 }
4251 
stb_vorbis_close(stb_vorbis * p)4252 void stb_vorbis_close(stb_vorbis *p)
4253 {
4254    if (p == NULL) return;
4255    vorbis_deinit(p);
4256    setup_free(p,p);
4257 }
4258 
vorbis_init(stb_vorbis * p,const stb_vorbis_alloc * z)4259 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4260 {
4261    memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4262    if (z) {
4263       p->alloc = *z;
4264       p->alloc.alloc_buffer_length_in_bytes &= ~7;
4265       p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4266    }
4267    p->eof = 0;
4268    p->error = VORBIS__no_error;
4269    p->stream = NULL;
4270    p->codebooks = NULL;
4271    p->page_crc_tests = -1;
4272    #ifndef STB_VORBIS_NO_STDIO
4273    p->close_on_free = FALSE;
4274    p->f = NULL;
4275    #endif
4276 }
4277 
stb_vorbis_get_sample_offset(stb_vorbis * f)4278 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4279 {
4280    if (f->current_loc_valid)
4281       return f->current_loc;
4282    else
4283       return -1;
4284 }
4285 
stb_vorbis_get_info(stb_vorbis * f)4286 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4287 {
4288    stb_vorbis_info d;
4289    d.channels = f->channels;
4290    d.sample_rate = f->sample_rate;
4291    d.setup_memory_required = f->setup_memory_required;
4292    d.setup_temp_memory_required = f->setup_temp_memory_required;
4293    d.temp_memory_required = f->temp_memory_required;
4294    d.max_frame_size = f->blocksize_1 >> 1;
4295    return d;
4296 }
4297 
stb_vorbis_get_comment(stb_vorbis * f)4298 stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f)
4299 {
4300    stb_vorbis_comment d;
4301    d.vendor = f->vendor;
4302    d.comment_list_length = f->comment_list_length;
4303    d.comment_list = f->comment_list;
4304    return d;
4305 }
4306 
stb_vorbis_get_error(stb_vorbis * f)4307 int stb_vorbis_get_error(stb_vorbis *f)
4308 {
4309    int e = f->error;
4310    f->error = VORBIS__no_error;
4311    return e;
4312 }
4313 
vorbis_alloc(stb_vorbis * f)4314 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4315 {
4316    stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4317    return p;
4318 }
4319 
4320 #ifndef STB_VORBIS_NO_PUSHDATA_API
4321 
stb_vorbis_flush_pushdata(stb_vorbis * f)4322 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4323 {
4324    f->previous_length = 0;
4325    f->page_crc_tests  = 0;
4326    f->discard_samples_deferred = 0;
4327    f->current_loc_valid = FALSE;
4328    f->first_decode = FALSE;
4329    f->samples_output = 0;
4330    f->channel_buffer_start = 0;
4331    f->channel_buffer_end = 0;
4332 }
4333 
vorbis_search_for_page_pushdata(vorb * f,uint8 * data,int data_len)4334 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4335 {
4336    int i,n;
4337    for (i=0; i < f->page_crc_tests; ++i)
4338       f->scan[i].bytes_done = 0;
4339 
4340    // if we have room for more scans, search for them first, because
4341    // they may cause us to stop early if their header is incomplete
4342    if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4343       if (data_len < 4) return 0;
4344       data_len -= 3; // need to look for 4-byte sequence, so don't miss
4345                      // one that straddles a boundary
4346       for (i=0; i < data_len; ++i) {
4347          if (data[i] == 0x4f) {
4348             if (0==memcmp(data+i, ogg_page_header, 4)) {
4349                int j,len;
4350                uint32 crc;
4351                // make sure we have the whole page header
4352                if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4353                   // only read up to this page start, so hopefully we'll
4354                   // have the whole page header start next time
4355                   data_len = i;
4356                   break;
4357                }
4358                // ok, we have it all; compute the length of the page
4359                len = 27 + data[i+26];
4360                for (j=0; j < data[i+26]; ++j)
4361                   len += data[i+27+j];
4362                // scan everything up to the embedded crc (which we must 0)
4363                crc = 0;
4364                for (j=0; j < 22; ++j)
4365                   crc = crc32_update(crc, data[i+j]);
4366                // now process 4 0-bytes
4367                for (   ; j < 26; ++j)
4368                   crc = crc32_update(crc, 0);
4369                // len is the total number of bytes we need to scan
4370                n = f->page_crc_tests++;
4371                f->scan[n].bytes_left = len-j;
4372                f->scan[n].crc_so_far = crc;
4373                f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4374                // if the last frame on a page is continued to the next, then
4375                // we can't recover the sample_loc immediately
4376                if (data[i+27+data[i+26]-1] == 255)
4377                   f->scan[n].sample_loc = ~0;
4378                else
4379                   f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4380                f->scan[n].bytes_done = i+j;
4381                if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4382                   break;
4383                // keep going if we still have room for more
4384             }
4385          }
4386       }
4387    }
4388 
4389    for (i=0; i < f->page_crc_tests;) {
4390       uint32 crc;
4391       int j;
4392       int n = f->scan[i].bytes_done;
4393       int m = f->scan[i].bytes_left;
4394       if (m > data_len - n) m = data_len - n;
4395       // m is the bytes to scan in the current chunk
4396       crc = f->scan[i].crc_so_far;
4397       for (j=0; j < m; ++j)
4398          crc = crc32_update(crc, data[n+j]);
4399       f->scan[i].bytes_left -= m;
4400       f->scan[i].crc_so_far = crc;
4401       if (f->scan[i].bytes_left == 0) {
4402          // does it match?
4403          if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4404             // Houston, we have page
4405             data_len = n+m; // consumption amount is wherever that scan ended
4406             f->page_crc_tests = -1; // drop out of page scan mode
4407             f->previous_length = 0; // decode-but-don't-output one frame
4408             f->next_seg = -1;       // start a new page
4409             f->current_loc = f->scan[i].sample_loc; // set the current sample location
4410                                     // to the amount we'd have decoded had we decoded this page
4411             f->current_loc_valid = f->current_loc != ~0U;
4412             return data_len;
4413          }
4414          // delete entry
4415          f->scan[i] = f->scan[--f->page_crc_tests];
4416       } else {
4417          ++i;
4418       }
4419    }
4420 
4421    return data_len;
4422 }
4423 
4424 // return value: number of bytes we used
stb_vorbis_decode_frame_pushdata(stb_vorbis * f,const uint8 * data,int data_len,int * channels,float *** output,int * samples)4425 int stb_vorbis_decode_frame_pushdata(
4426          stb_vorbis *f,                   // the file we're decoding
4427          const uint8 *data, int data_len, // the memory available for decoding
4428          int *channels,                   // place to write number of float * buffers
4429          float ***output,                 // place to write float ** array of float * buffers
4430          int *samples                     // place to write number of output samples
4431      )
4432 {
4433    int i;
4434    int len,right,left;
4435 
4436    if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4437 
4438    if (f->page_crc_tests >= 0) {
4439       *samples = 0;
4440       return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4441    }
4442 
4443    f->stream     = (uint8 *) data;
4444    f->stream_end = (uint8 *) data + data_len;
4445    f->error      = VORBIS__no_error;
4446 
4447    // check that we have the entire packet in memory
4448    if (!is_whole_packet_present(f)) {
4449       *samples = 0;
4450       return 0;
4451    }
4452 
4453    if (!vorbis_decode_packet(f, &len, &left, &right)) {
4454       // save the actual error we encountered
4455       enum STBVorbisError error = f->error;
4456       if (error == VORBIS_bad_packet_type) {
4457          // flush and resynch
4458          f->error = VORBIS__no_error;
4459          while (get8_packet(f) != EOP)
4460             if (f->eof) break;
4461          *samples = 0;
4462          return (int) (f->stream - data);
4463       }
4464       if (error == VORBIS_continued_packet_flag_invalid) {
4465          if (f->previous_length == 0) {
4466             // we may be resynching, in which case it's ok to hit one
4467             // of these; just discard the packet
4468             f->error = VORBIS__no_error;
4469             while (get8_packet(f) != EOP)
4470                if (f->eof) break;
4471             *samples = 0;
4472             return (int) (f->stream - data);
4473          }
4474       }
4475       // if we get an error while parsing, what to do?
4476       // well, it DEFINITELY won't work to continue from where we are!
4477       stb_vorbis_flush_pushdata(f);
4478       // restore the error that actually made us bail
4479       f->error = error;
4480       *samples = 0;
4481       return 1;
4482    }
4483 
4484    // success!
4485    len = vorbis_finish_frame(f, len, left, right);
4486    for (i=0; i < f->channels; ++i)
4487       f->outputs[i] = f->channel_buffers[i] + left;
4488 
4489    if (channels) *channels = f->channels;
4490    *samples = len;
4491    *output = f->outputs;
4492    return (int) (f->stream - data);
4493 }
4494 
stb_vorbis_open_pushdata(const unsigned char * data,int data_len,int * data_used,int * error,const stb_vorbis_alloc * alloc)4495 stb_vorbis *stb_vorbis_open_pushdata(
4496          const unsigned char *data, int data_len, // the memory available for decoding
4497          int *data_used,              // only defined if result is not NULL
4498          int *error, const stb_vorbis_alloc *alloc)
4499 {
4500    stb_vorbis *f, p;
4501    vorbis_init(&p, alloc);
4502    p.stream     = (uint8 *) data;
4503    p.stream_end = (uint8 *) data + data_len;
4504    p.push_mode  = TRUE;
4505    if (!start_decoder(&p)) {
4506       if (p.eof)
4507          *error = VORBIS_need_more_data;
4508       else
4509          *error = p.error;
4510       return NULL;
4511    }
4512    f = vorbis_alloc(&p);
4513    if (f) {
4514       *f = p;
4515       *data_used = (int) (f->stream - data);
4516       *error = 0;
4517       return f;
4518    } else {
4519       vorbis_deinit(&p);
4520       return NULL;
4521    }
4522 }
4523 #endif // STB_VORBIS_NO_PUSHDATA_API
4524 
stb_vorbis_get_file_offset(stb_vorbis * f)4525 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4526 {
4527    #ifndef STB_VORBIS_NO_PUSHDATA_API
4528    if (f->push_mode) return 0;
4529    #endif
4530    if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4531    #ifndef STB_VORBIS_NO_STDIO
4532    return (unsigned int) (ftell(f->f) - f->f_start);
4533    #endif
4534 }
4535 
4536 #ifndef STB_VORBIS_NO_PULLDATA_API
4537 //
4538 // DATA-PULLING API
4539 //
4540 
vorbis_find_page(stb_vorbis * f,uint32 * end,uint32 * last)4541 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4542 {
4543    for(;;) {
4544       int n;
4545       if (f->eof) return 0;
4546       n = get8(f);
4547       if (n == 0x4f) { // page header candidate
4548          unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4549          int i;
4550          // check if we're off the end of a file_section stream
4551          if (retry_loc - 25 > f->stream_len)
4552             return 0;
4553          // check the rest of the header
4554          for (i=1; i < 4; ++i)
4555             if (get8(f) != ogg_page_header[i])
4556                break;
4557          if (f->eof) return 0;
4558          if (i == 4) {
4559             uint8 header[27];
4560             uint32 i, crc, goal, len;
4561             for (i=0; i < 4; ++i)
4562                header[i] = ogg_page_header[i];
4563             for (; i < 27; ++i)
4564                header[i] = get8(f);
4565             if (f->eof) return 0;
4566             if (header[4] != 0) goto invalid;
4567             goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4568             for (i=22; i < 26; ++i)
4569                header[i] = 0;
4570             crc = 0;
4571             for (i=0; i < 27; ++i)
4572                crc = crc32_update(crc, header[i]);
4573             len = 0;
4574             for (i=0; i < header[26]; ++i) {
4575                int s = get8(f);
4576                crc = crc32_update(crc, s);
4577                len += s;
4578             }
4579             if (len && f->eof) return 0;
4580             for (i=0; i < len; ++i)
4581                crc = crc32_update(crc, get8(f));
4582             // finished parsing probable page
4583             if (crc == goal) {
4584                // we could now check that it's either got the last
4585                // page flag set, OR it's followed by the capture
4586                // pattern, but I guess TECHNICALLY you could have
4587                // a file with garbage between each ogg page and recover
4588                // from it automatically? So even though that paranoia
4589                // might decrease the chance of an invalid decode by
4590                // another 2^32, not worth it since it would hose those
4591                // invalid-but-useful files?
4592                if (end)
4593                   *end = stb_vorbis_get_file_offset(f);
4594                if (last) {
4595                   if (header[5] & 0x04)
4596                      *last = 1;
4597                   else
4598                      *last = 0;
4599                }
4600                set_file_offset(f, retry_loc-1);
4601                return 1;
4602             }
4603          }
4604         invalid:
4605          // not a valid page, so rewind and look for next one
4606          set_file_offset(f, retry_loc);
4607       }
4608    }
4609 }
4610 
4611 
4612 #define SAMPLE_unknown  0xffffffff
4613 
4614 // seeking is implemented with a binary search, which narrows down the range to
4615 // 64K, before using a linear search (because finding the synchronization
4616 // pattern can be expensive, and the chance we'd find the end page again is
4617 // relatively high for small ranges)
4618 //
4619 // two initial interpolation-style probes are used at the start of the search
4620 // to try to bound either side of the binary search sensibly, while still
4621 // working in O(log n) time if they fail.
4622 
get_seek_page_info(stb_vorbis * f,ProbedPage * z)4623 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4624 {
4625    uint8 header[27], lacing[255];
4626    int i,len;
4627 
4628    // record where the page starts
4629    z->page_start = stb_vorbis_get_file_offset(f);
4630 
4631    // parse the header
4632    getn(f, header, 27);
4633    if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4634       return 0;
4635    getn(f, lacing, header[26]);
4636 
4637    // determine the length of the payload
4638    len = 0;
4639    for (i=0; i < header[26]; ++i)
4640       len += lacing[i];
4641 
4642    // this implies where the page ends
4643    z->page_end = z->page_start + 27 + header[26] + len;
4644 
4645    // read the last-decoded sample out of the data
4646    z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4647 
4648    // restore file state to where we were
4649    set_file_offset(f, z->page_start);
4650    return 1;
4651 }
4652 
4653 // rarely used function to seek back to the preceding page while finding the
4654 // start of a packet
go_to_page_before(stb_vorbis * f,unsigned int limit_offset)4655 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4656 {
4657    unsigned int previous_safe, end;
4658 
4659    // now we want to seek back 64K from the limit
4660    if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4661       previous_safe = limit_offset - 65536;
4662    else
4663       previous_safe = f->first_audio_page_offset;
4664 
4665    set_file_offset(f, previous_safe);
4666 
4667    while (vorbis_find_page(f, &end, NULL)) {
4668       if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4669          return 1;
4670       set_file_offset(f, end);
4671    }
4672 
4673    return 0;
4674 }
4675 
4676 // implements the search logic for finding a page and starting decoding. if
4677 // the function succeeds, current_loc_valid will be true and current_loc will
4678 // be less than or equal to the provided sample number (the closer the
4679 // better).
seek_to_sample_coarse(stb_vorbis * f,uint32 sample_number)4680 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4681 {
4682    ProbedPage left, right, mid;
4683    int i, start_seg_with_known_loc, end_pos, page_start;
4684    uint32 delta, stream_length, padding, last_sample_limit;
4685    double offset = 0.0, bytes_per_sample = 0.0;
4686    int probe = 0;
4687 
4688    // find the last page and validate the target sample
4689    stream_length = stb_vorbis_stream_length_in_samples(f);
4690    if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
4691    if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4692 
4693    // this is the maximum difference between the window-center (which is the
4694    // actual granule position value), and the right-start (which the spec
4695    // indicates should be the granule position (give or take one)).
4696    padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4697    if (sample_number < padding)
4698       last_sample_limit = 0;
4699    else
4700       last_sample_limit = sample_number - padding;
4701 
4702    left = f->p_first;
4703    while (left.last_decoded_sample == ~0U) {
4704       // (untested) the first page does not have a 'last_decoded_sample'
4705       set_file_offset(f, left.page_end);
4706       if (!get_seek_page_info(f, &left)) goto error;
4707    }
4708 
4709    right = f->p_last;
4710    assert(right.last_decoded_sample != ~0U);
4711 
4712    // starting from the start is handled differently
4713    if (last_sample_limit <= left.last_decoded_sample) {
4714       if (stb_vorbis_seek_start(f)) {
4715          if (f->current_loc > sample_number)
4716             return error(f, VORBIS_seek_failed);
4717          return 1;
4718       }
4719       return 0;
4720    }
4721 
4722    while (left.page_end != right.page_start) {
4723       assert(left.page_end < right.page_start);
4724       // search range in bytes
4725       delta = right.page_start - left.page_end;
4726       if (delta <= 65536) {
4727          // there's only 64K left to search - handle it linearly
4728          set_file_offset(f, left.page_end);
4729       } else {
4730          if (probe < 2) {
4731             if (probe == 0) {
4732                // first probe (interpolate)
4733                double data_bytes = right.page_end - left.page_start;
4734                bytes_per_sample = data_bytes / right.last_decoded_sample;
4735                offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample);
4736             } else {
4737                // second probe (try to bound the other side)
4738                double error = ((double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample;
4739                if (error >= 0 && error <  8000) error =  8000;
4740                if (error <  0 && error > -8000) error = -8000;
4741                offset += error * 2;
4742             }
4743 
4744             // ensure the offset is valid
4745             if (offset < left.page_end)
4746                offset = left.page_end;
4747             if (offset > right.page_start - 65536)
4748                offset = right.page_start - 65536;
4749 
4750             set_file_offset(f, (unsigned int) offset);
4751          } else {
4752             // binary search for large ranges (offset by 32K to ensure
4753             // we don't hit the right page)
4754             set_file_offset(f, left.page_end + (delta / 2) - 32768);
4755          }
4756 
4757          if (!vorbis_find_page(f, NULL, NULL)) goto error;
4758       }
4759 
4760       for (;;) {
4761          if (!get_seek_page_info(f, &mid)) goto error;
4762          if (mid.last_decoded_sample != ~0U) break;
4763          // (untested) no frames end on this page
4764          set_file_offset(f, mid.page_end);
4765          assert(mid.page_start < right.page_start);
4766       }
4767 
4768       // if we've just found the last page again then we're in a tricky file,
4769       // and we're close enough (if it wasn't an interpolation probe).
4770       if (mid.page_start == right.page_start) {
4771          if (probe >= 2 || delta <= 65536)
4772             break;
4773       } else {
4774          if (last_sample_limit < mid.last_decoded_sample)
4775             right = mid;
4776          else
4777             left = mid;
4778       }
4779 
4780       ++probe;
4781    }
4782 
4783    // seek back to start of the last packet
4784    page_start = left.page_start;
4785    set_file_offset(f, page_start);
4786    if (!start_page(f)) return error(f, VORBIS_seek_failed);
4787    end_pos = f->end_seg_with_known_loc;
4788    assert(end_pos >= 0);
4789 
4790    for (;;) {
4791       for (i = end_pos; i > 0; --i)
4792          if (f->segments[i-1] != 255)
4793             break;
4794 
4795       start_seg_with_known_loc = i;
4796 
4797       if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4798          break;
4799 
4800       // (untested) the final packet begins on an earlier page
4801       if (!go_to_page_before(f, page_start))
4802          goto error;
4803 
4804       page_start = stb_vorbis_get_file_offset(f);
4805       if (!start_page(f)) goto error;
4806       end_pos = f->segment_count - 1;
4807    }
4808 
4809    // prepare to start decoding
4810    f->current_loc_valid = FALSE;
4811    f->last_seg = FALSE;
4812    f->valid_bits = 0;
4813    f->packet_bytes = 0;
4814    f->bytes_in_seg = 0;
4815    f->previous_length = 0;
4816    f->next_seg = start_seg_with_known_loc;
4817 
4818    for (i = 0; i < start_seg_with_known_loc; i++)
4819       skip(f, f->segments[i]);
4820 
4821    // start decoding (optimizable - this frame is generally discarded)
4822    if (!vorbis_pump_first_frame(f))
4823       return 0;
4824    if (f->current_loc > sample_number)
4825       return error(f, VORBIS_seek_failed);
4826    return 1;
4827 
4828 error:
4829    // try to restore the file to a valid state
4830    stb_vorbis_seek_start(f);
4831    return error(f, VORBIS_seek_failed);
4832 }
4833 
4834 // the same as vorbis_decode_initial, but without advancing
peek_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)4835 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4836 {
4837    int bits_read, bytes_read;
4838 
4839    if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4840       return 0;
4841 
4842    // either 1 or 2 bytes were read, figure out which so we can rewind
4843    bits_read = 1 + ilog(f->mode_count-1);
4844    if (f->mode_config[*mode].blockflag)
4845       bits_read += 2;
4846    bytes_read = (bits_read + 7) / 8;
4847 
4848    f->bytes_in_seg += bytes_read;
4849    f->packet_bytes -= bytes_read;
4850    skip(f, -bytes_read);
4851    if (f->next_seg == -1)
4852       f->next_seg = f->segment_count - 1;
4853    else
4854       f->next_seg--;
4855    f->valid_bits = 0;
4856 
4857    return 1;
4858 }
4859 
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)4860 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4861 {
4862    uint32 max_frame_samples;
4863 
4864    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4865 
4866    // fast page-level search
4867    if (!seek_to_sample_coarse(f, sample_number))
4868       return 0;
4869 
4870    assert(f->current_loc_valid);
4871    assert(f->current_loc <= sample_number);
4872 
4873    // linear search for the relevant packet
4874    max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4875    while (f->current_loc < sample_number) {
4876       int left_start, left_end, right_start, right_end, mode, frame_samples;
4877       if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4878          return error(f, VORBIS_seek_failed);
4879       // calculate the number of samples returned by the next frame
4880       frame_samples = right_start - left_start;
4881       if (f->current_loc + frame_samples > sample_number) {
4882          return 1; // the next frame will contain the sample
4883       } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4884          // there's a chance the frame after this could contain the sample
4885          vorbis_pump_first_frame(f);
4886       } else {
4887          // this frame is too early to be relevant
4888          f->current_loc += frame_samples;
4889          f->previous_length = 0;
4890          maybe_start_packet(f);
4891          flush_packet(f);
4892       }
4893    }
4894    // the next frame should start with the sample
4895    if (f->current_loc != sample_number) return error(f, VORBIS_seek_failed);
4896    return 1;
4897 }
4898 
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)4899 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4900 {
4901    if (!stb_vorbis_seek_frame(f, sample_number))
4902       return 0;
4903 
4904    if (sample_number != f->current_loc) {
4905       int n;
4906       uint32 frame_start = f->current_loc;
4907       stb_vorbis_get_frame_float(f, &n, NULL);
4908       assert(sample_number > frame_start);
4909       assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4910       f->channel_buffer_start += (sample_number - frame_start);
4911    }
4912 
4913    return 1;
4914 }
4915 
stb_vorbis_seek_start(stb_vorbis * f)4916 int stb_vorbis_seek_start(stb_vorbis *f)
4917 {
4918    if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
4919    set_file_offset(f, f->first_audio_page_offset);
4920    f->previous_length = 0;
4921    f->first_decode = TRUE;
4922    f->next_seg = -1;
4923    return vorbis_pump_first_frame(f);
4924 }
4925 
stb_vorbis_stream_length_in_samples(stb_vorbis * f)4926 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4927 {
4928    unsigned int restore_offset, previous_safe;
4929    unsigned int end, last_page_loc;
4930 
4931    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4932    if (!f->total_samples) {
4933       unsigned int last;
4934       uint32 lo,hi;
4935       char header[6];
4936 
4937       // first, store the current decode position so we can restore it
4938       restore_offset = stb_vorbis_get_file_offset(f);
4939 
4940       // now we want to seek back 64K from the end (the last page must
4941       // be at most a little less than 64K, but let's allow a little slop)
4942       if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4943          previous_safe = f->stream_len - 65536;
4944       else
4945          previous_safe = f->first_audio_page_offset;
4946 
4947       set_file_offset(f, previous_safe);
4948       // previous_safe is now our candidate 'earliest known place that seeking
4949       // to will lead to the final page'
4950 
4951       if (!vorbis_find_page(f, &end, &last)) {
4952          // if we can't find a page, we're hosed!
4953          f->error = VORBIS_cant_find_last_page;
4954          f->total_samples = 0xffffffff;
4955          goto done;
4956       }
4957 
4958       // check if there are more pages
4959       last_page_loc = stb_vorbis_get_file_offset(f);
4960 
4961       // stop when the last_page flag is set, not when we reach eof;
4962       // this allows us to stop short of a 'file_section' end without
4963       // explicitly checking the length of the section
4964       while (!last) {
4965          set_file_offset(f, end);
4966          if (!vorbis_find_page(f, &end, &last)) {
4967             // the last page we found didn't have the 'last page' flag
4968             // set. whoops!
4969             break;
4970          }
4971          previous_safe = last_page_loc+1;
4972          last_page_loc = stb_vorbis_get_file_offset(f);
4973       }
4974 
4975       set_file_offset(f, last_page_loc);
4976 
4977       // parse the header
4978       getn(f, (unsigned char *)header, 6);
4979       // extract the absolute granule position
4980       lo = get32(f);
4981       hi = get32(f);
4982       if (lo == 0xffffffff && hi == 0xffffffff) {
4983          f->error = VORBIS_cant_find_last_page;
4984          f->total_samples = SAMPLE_unknown;
4985          goto done;
4986       }
4987       if (hi)
4988          lo = 0xfffffffe; // saturate
4989       f->total_samples = lo;
4990 
4991       f->p_last.page_start = last_page_loc;
4992       f->p_last.page_end   = end;
4993       f->p_last.last_decoded_sample = lo;
4994 
4995      done:
4996       set_file_offset(f, restore_offset);
4997    }
4998    return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4999 }
5000 
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)5001 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
5002 {
5003    return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
5004 }
5005 
5006 
5007 
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)5008 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
5009 {
5010    int len, right,left,i;
5011    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
5012 
5013    if (!vorbis_decode_packet(f, &len, &left, &right)) {
5014       f->channel_buffer_start = f->channel_buffer_end = 0;
5015       return 0;
5016    }
5017 
5018    len = vorbis_finish_frame(f, len, left, right);
5019    for (i=0; i < f->channels; ++i)
5020       f->outputs[i] = f->channel_buffers[i] + left;
5021 
5022    f->channel_buffer_start = left;
5023    f->channel_buffer_end   = left+len;
5024 
5025    if (channels) *channels = f->channels;
5026    if (output)   *output = f->outputs;
5027    return len;
5028 }
5029 
5030 #ifndef STB_VORBIS_NO_STDIO
5031 
stb_vorbis_open_file_section(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc,unsigned int length)5032 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
5033 {
5034    stb_vorbis *f, p;
5035    vorbis_init(&p, alloc);
5036    p.f = file;
5037    p.f_start = (uint32) ftell(file);
5038    p.stream_len   = length;
5039    p.close_on_free = close_on_free;
5040    if (start_decoder(&p)) {
5041       f = vorbis_alloc(&p);
5042       if (f) {
5043          *f = p;
5044          vorbis_pump_first_frame(f);
5045          return f;
5046       }
5047    }
5048    if (error) *error = p.error;
5049    vorbis_deinit(&p);
5050    return NULL;
5051 }
5052 
stb_vorbis_open_file(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc)5053 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
5054 {
5055    unsigned int len, start;
5056    start = (unsigned int) ftell(file);
5057    fseek(file, 0, SEEK_END);
5058    len = (unsigned int) (ftell(file) - start);
5059    fseek(file, start, SEEK_SET);
5060    return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
5061 }
5062 
stb_vorbis_open_filename(const char * filename,int * error,const stb_vorbis_alloc * alloc)5063 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
5064 {
5065    FILE *f;
5066 #if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
5067    if (0 != fopen_s(&f, filename, "rb"))
5068       f = NULL;
5069 #else
5070    f = fopen(filename, "rb");
5071 #endif
5072    if (f)
5073       return stb_vorbis_open_file(f, TRUE, error, alloc);
5074    if (error) *error = VORBIS_file_open_failure;
5075    return NULL;
5076 }
5077 #endif // STB_VORBIS_NO_STDIO
5078 
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,const stb_vorbis_alloc * alloc)5079 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
5080 {
5081    stb_vorbis *f, p;
5082    if (data == NULL) return NULL;
5083    vorbis_init(&p, alloc);
5084    p.stream = (uint8 *) data;
5085    p.stream_end = (uint8 *) data + len;
5086    p.stream_start = (uint8 *) p.stream;
5087    p.stream_len = len;
5088    p.push_mode = FALSE;
5089    if (start_decoder(&p)) {
5090       f = vorbis_alloc(&p);
5091       if (f) {
5092          *f = p;
5093          vorbis_pump_first_frame(f);
5094          if (error) *error = VORBIS__no_error;
5095          return f;
5096       }
5097    }
5098    if (error) *error = p.error;
5099    vorbis_deinit(&p);
5100    return NULL;
5101 }
5102 
5103 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
5104 #define PLAYBACK_MONO     1
5105 #define PLAYBACK_LEFT     2
5106 #define PLAYBACK_RIGHT    4
5107 
5108 #define L  (PLAYBACK_LEFT  | PLAYBACK_MONO)
5109 #define C  (PLAYBACK_LEFT  | PLAYBACK_RIGHT | PLAYBACK_MONO)
5110 #define R  (PLAYBACK_RIGHT | PLAYBACK_MONO)
5111 
5112 static int8 channel_position[7][6] =
5113 {
5114    { 0 },
5115    { C },
5116    { L, R },
5117    { L, C, R },
5118    { L, R, L, R },
5119    { L, C, R, L, R },
5120    { L, C, R, L, R, C },
5121 };
5122 
5123 
5124 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5125    typedef union {
5126       float f;
5127       int i;
5128    } float_conv;
5129    typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5130    #define FASTDEF(x) float_conv x
5131    // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5132    #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5133    #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5134    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5135    #define check_endianness()
5136 #else
5137    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5138    #define check_endianness()
5139    #define FASTDEF(x)
5140 #endif
5141 
copy_samples(short * dest,float * src,int len)5142 static void copy_samples(short *dest, float *src, int len)
5143 {
5144    int i;
5145    check_endianness();
5146    for (i=0; i < len; ++i) {
5147       FASTDEF(temp);
5148       int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5149       if ((unsigned int) (v + 32768) > 65535)
5150          v = v < 0 ? -32768 : 32767;
5151       dest[i] = v;
5152    }
5153 }
5154 
compute_samples(int mask,short * output,int num_c,float ** data,int d_offset,int len)5155 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5156 {
5157    #define BUFFER_SIZE  32
5158    float buffer[BUFFER_SIZE];
5159    int i,j,o,n = BUFFER_SIZE;
5160    check_endianness();
5161    for (o = 0; o < len; o += BUFFER_SIZE) {
5162       memset(buffer, 0, sizeof(buffer));
5163       if (o + n > len) n = len - o;
5164       for (j=0; j < num_c; ++j) {
5165          if (channel_position[num_c][j] & mask) {
5166             for (i=0; i < n; ++i)
5167                buffer[i] += data[j][d_offset+o+i];
5168          }
5169       }
5170       for (i=0; i < n; ++i) {
5171          FASTDEF(temp);
5172          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5173          if ((unsigned int) (v + 32768) > 65535)
5174             v = v < 0 ? -32768 : 32767;
5175          output[o+i] = v;
5176       }
5177    }
5178 }
5179 
compute_stereo_samples(short * output,int num_c,float ** data,int d_offset,int len)5180 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5181 {
5182    #define BUFFER_SIZE  32
5183    float buffer[BUFFER_SIZE];
5184    int i,j,o,n = BUFFER_SIZE >> 1;
5185    // o is the offset in the source data
5186    check_endianness();
5187    for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5188       // o2 is the offset in the output data
5189       int o2 = o << 1;
5190       memset(buffer, 0, sizeof(buffer));
5191       if (o + n > len) n = len - o;
5192       for (j=0; j < num_c; ++j) {
5193          int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5194          if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5195             for (i=0; i < n; ++i) {
5196                buffer[i*2+0] += data[j][d_offset+o+i];
5197                buffer[i*2+1] += data[j][d_offset+o+i];
5198             }
5199          } else if (m == PLAYBACK_LEFT) {
5200             for (i=0; i < n; ++i) {
5201                buffer[i*2+0] += data[j][d_offset+o+i];
5202             }
5203          } else if (m == PLAYBACK_RIGHT) {
5204             for (i=0; i < n; ++i) {
5205                buffer[i*2+1] += data[j][d_offset+o+i];
5206             }
5207          }
5208       }
5209       for (i=0; i < (n<<1); ++i) {
5210          FASTDEF(temp);
5211          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5212          if ((unsigned int) (v + 32768) > 65535)
5213             v = v < 0 ? -32768 : 32767;
5214          output[o2+i] = v;
5215       }
5216    }
5217 }
5218 
convert_samples_short(int buf_c,short ** buffer,int b_offset,int data_c,float ** data,int d_offset,int samples)5219 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5220 {
5221    int i;
5222    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5223       static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5224       for (i=0; i < buf_c; ++i)
5225          compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5226    } else {
5227       int limit = buf_c < data_c ? buf_c : data_c;
5228       for (i=0; i < limit; ++i)
5229          copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5230       for (   ; i < buf_c; ++i)
5231          memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5232    }
5233 }
5234 
stb_vorbis_get_frame_short(stb_vorbis * f,int num_c,short ** buffer,int num_samples)5235 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5236 {
5237    float **output = NULL;
5238    int len = stb_vorbis_get_frame_float(f, NULL, &output);
5239    if (len > num_samples) len = num_samples;
5240    if (len)
5241       convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5242    return len;
5243 }
5244 
convert_channels_short_interleaved(int buf_c,short * buffer,int data_c,float ** data,int d_offset,int len)5245 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5246 {
5247    int i;
5248    check_endianness();
5249    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5250       assert(buf_c == 2);
5251       for (i=0; i < buf_c; ++i)
5252          compute_stereo_samples(buffer, data_c, data, d_offset, len);
5253    } else {
5254       int limit = buf_c < data_c ? buf_c : data_c;
5255       int j;
5256       for (j=0; j < len; ++j) {
5257          for (i=0; i < limit; ++i) {
5258             FASTDEF(temp);
5259             float f = data[i][d_offset+j];
5260             int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5261             if ((unsigned int) (v + 32768) > 65535)
5262                v = v < 0 ? -32768 : 32767;
5263             *buffer++ = v;
5264          }
5265          for (   ; i < buf_c; ++i)
5266             *buffer++ = 0;
5267       }
5268    }
5269 }
5270 
stb_vorbis_get_frame_short_interleaved(stb_vorbis * f,int num_c,short * buffer,int num_shorts)5271 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5272 {
5273    float **output;
5274    int len;
5275    if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5276    len = stb_vorbis_get_frame_float(f, NULL, &output);
5277    if (len) {
5278       if (len*num_c > num_shorts) len = num_shorts / num_c;
5279       convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5280    }
5281    return len;
5282 }
5283 
stb_vorbis_get_samples_short_interleaved(stb_vorbis * f,int channels,short * buffer,int num_shorts)5284 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5285 {
5286    float **outputs;
5287    int len = num_shorts / channels;
5288    int n=0;
5289    int z = f->channels;
5290    if (z > channels) z = channels;
5291    while (n < len) {
5292       int k = f->channel_buffer_end - f->channel_buffer_start;
5293       if (n+k >= len) k = len - n;
5294       if (k)
5295          convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5296       buffer += k*channels;
5297       n += k;
5298       f->channel_buffer_start += k;
5299       if (n == len) break;
5300       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5301    }
5302    return n;
5303 }
5304 
stb_vorbis_get_samples_short(stb_vorbis * f,int channels,short ** buffer,int len)5305 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5306 {
5307    float **outputs;
5308    int n=0;
5309    int z = f->channels;
5310    if (z > channels) z = channels;
5311    while (n < len) {
5312       int k = f->channel_buffer_end - f->channel_buffer_start;
5313       if (n+k >= len) k = len - n;
5314       if (k)
5315          convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5316       n += k;
5317       f->channel_buffer_start += k;
5318       if (n == len) break;
5319       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5320    }
5321    return n;
5322 }
5323 
5324 #ifndef STB_VORBIS_NO_STDIO
stb_vorbis_decode_filename(const char * filename,int * channels,int * sample_rate,short ** output)5325 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5326 {
5327    int data_len, offset, total, limit, error;
5328    short *data;
5329    stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5330    if (v == NULL) return -1;
5331    limit = v->channels * 4096;
5332    *channels = v->channels;
5333    if (sample_rate)
5334       *sample_rate = v->sample_rate;
5335    offset = data_len = 0;
5336    total = limit;
5337    data = (short *) malloc(total * sizeof(*data));
5338    if (data == NULL) {
5339       stb_vorbis_close(v);
5340       return -2;
5341    }
5342    for (;;) {
5343       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5344       if (n == 0) break;
5345       data_len += n;
5346       offset += n * v->channels;
5347       if (offset + limit > total) {
5348          short *data2;
5349          total *= 2;
5350          data2 = (short *) realloc(data, total * sizeof(*data));
5351          if (data2 == NULL) {
5352             free(data);
5353             stb_vorbis_close(v);
5354             return -2;
5355          }
5356          data = data2;
5357       }
5358    }
5359    *output = data;
5360    stb_vorbis_close(v);
5361    return data_len;
5362 }
5363 #endif // NO_STDIO
5364 
stb_vorbis_decode_memory(const uint8 * mem,int len,int * channels,int * sample_rate,short ** output)5365 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5366 {
5367    int data_len, offset, total, limit, error;
5368    short *data;
5369    stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5370    if (v == NULL) return -1;
5371    limit = v->channels * 4096;
5372    *channels = v->channels;
5373    if (sample_rate)
5374       *sample_rate = v->sample_rate;
5375    offset = data_len = 0;
5376    total = limit;
5377    data = (short *) malloc(total * sizeof(*data));
5378    if (data == NULL) {
5379       stb_vorbis_close(v);
5380       return -2;
5381    }
5382    for (;;) {
5383       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5384       if (n == 0) break;
5385       data_len += n;
5386       offset += n * v->channels;
5387       if (offset + limit > total) {
5388          short *data2;
5389          total *= 2;
5390          data2 = (short *) realloc(data, total * sizeof(*data));
5391          if (data2 == NULL) {
5392             free(data);
5393             stb_vorbis_close(v);
5394             return -2;
5395          }
5396          data = data2;
5397       }
5398    }
5399    *output = data;
5400    stb_vorbis_close(v);
5401    return data_len;
5402 }
5403 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5404 
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)5405 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5406 {
5407    float **outputs;
5408    int len = num_floats / channels;
5409    int n=0;
5410    int z = f->channels;
5411    if (z > channels) z = channels;
5412    while (n < len) {
5413       int i,j;
5414       int k = f->channel_buffer_end - f->channel_buffer_start;
5415       if (n+k >= len) k = len - n;
5416       for (j=0; j < k; ++j) {
5417          for (i=0; i < z; ++i)
5418             *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5419          for (   ; i < channels; ++i)
5420             *buffer++ = 0;
5421       }
5422       n += k;
5423       f->channel_buffer_start += k;
5424       if (n == len)
5425          break;
5426       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5427          break;
5428    }
5429    return n;
5430 }
5431 
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)5432 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5433 {
5434    float **outputs;
5435    int n=0;
5436    int z = f->channels;
5437    if (z > channels) z = channels;
5438    while (n < num_samples) {
5439       int i;
5440       int k = f->channel_buffer_end - f->channel_buffer_start;
5441       if (n+k >= num_samples) k = num_samples - n;
5442       if (k) {
5443          for (i=0; i < z; ++i)
5444             memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5445          for (   ; i < channels; ++i)
5446             memset(buffer[i]+n, 0, sizeof(float) * k);
5447       }
5448       n += k;
5449       f->channel_buffer_start += k;
5450       if (n == num_samples)
5451          break;
5452       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5453          break;
5454    }
5455    return n;
5456 }
5457 #endif // STB_VORBIS_NO_PULLDATA_API
5458 
5459 /* Version history
5460     1.17    - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
5461                            found with Mayhem by ForAllSecure
5462     1.16    - 2019-03-04 - fix warnings
5463     1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
5464     1.14    - 2018-02-11 - delete bogus dealloca usage
5465     1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
5466     1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
5467     1.11    - 2017-07-23 - fix MinGW compilation
5468     1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
5469     1.09    - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
5470     1.08    - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
5471                            avoid discarding last frame of audio data
5472     1.07    - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
5473                            some more crash fixes when out of memory or with corrupt files
5474     1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
5475                            some crash fixes when out of memory or with corrupt files
5476     1.05    - 2015-04-19 - don't define __forceinline if it's redundant
5477     1.04    - 2014-08-27 - fix missing const-correct case in API
5478     1.03    - 2014-08-07 - Warning fixes
5479     1.02    - 2014-07-09 - Declare qsort compare function _cdecl on windows
5480     1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float
5481     1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
5482                            (API change) report sample rate for decode-full-file funcs
5483     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5484     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5485     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5486     0.99993 - remove assert that fired on legal files with empty tables
5487     0.99992 - rewind-to-start
5488     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5489     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5490     0.9998 - add a full-decode function with a memory source
5491     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5492     0.9996 - query length of vorbis stream in samples/seconds
5493     0.9995 - bugfix to another optimization that only happened in certain files
5494     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5495     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5496     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5497     0.9991 - performance improvement of IMDCT
5498     0.999 - (should have been 0.9990) performance improvement of IMDCT
5499     0.998 - no-CRT support from Casey Muratori
5500     0.997 - bugfixes for bugs found by Terje Mathisen
5501     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5502     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5503     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5504     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5505     0.992 - fixes for MinGW warning
5506     0.991 - turn fast-float-conversion on by default
5507     0.990 - fix push-mode seek recovery if you seek into the headers
5508     0.98b - fix to bad release of 0.98
5509     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5510     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5511     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5512     0.95 - clamping code for 16-bit functions
5513     0.94 - not publically released
5514     0.93 - fixed all-zero-floor case (was decoding garbage)
5515     0.92 - fixed a memory leak
5516     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5517     0.90 - first public release
5518 */
5519 
5520 #endif // STB_VORBIS_HEADER_ONLY
5521 
5522 
5523 /*
5524 ------------------------------------------------------------------------------
5525 This software is available under 2 licenses -- choose whichever you prefer.
5526 ------------------------------------------------------------------------------
5527 ALTERNATIVE A - MIT License
5528 Copyright (c) 2017 Sean Barrett
5529 Permission is hereby granted, free of charge, to any person obtaining a copy of
5530 this software and associated documentation files (the "Software"), to deal in
5531 the Software without restriction, including without limitation the rights to
5532 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5533 of the Software, and to permit persons to whom the Software is furnished to do
5534 so, subject to the following conditions:
5535 The above copyright notice and this permission notice shall be included in all
5536 copies or substantial portions of the Software.
5537 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5538 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5539 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5540 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5541 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5542 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5543 SOFTWARE.
5544 ------------------------------------------------------------------------------
5545 ALTERNATIVE B - Public Domain (www.unlicense.org)
5546 This is free and unencumbered software released into the public domain.
5547 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5548 software, either in source code form or as a compiled binary, for any purpose,
5549 commercial or non-commercial, and by any means.
5550 In jurisdictions that recognize copyright laws, the author or authors of this
5551 software dedicate any and all copyright interest in the software to the public
5552 domain. We make this dedication for the benefit of the public at large and to
5553 the detriment of our heirs and successors. We intend this dedication to be an
5554 overt act of relinquishment in perpetuity of all present and future rights to
5555 this software under copyright law.
5556 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5557 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5558 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5559 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5560 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5561 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5562 ------------------------------------------------------------------------------
5563 */
5564