1 // Ogg Vorbis audio decoder - v1.19 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking implementation
7 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
8 // Elias Software, Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        github:alxprd
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    github:manxorist   saga musix          github:infatum
33 //    Timur Gagiev       Maxwell Koo         Peter Waller
34 //    github:audinowho   Dougall Johnson
35 //
36 // Partial history:
37 //    1.19    - 2020-02-05 - warnings
38 //    1.18    - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
39 //    1.17    - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
40 //    1.16    - 2019-03-04 - fix warnings
41 //    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
42 //    1.14    - 2018-02-11 - delete bogus dealloca usage
43 //    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
44 //    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
45 //    1.11    - 2017-07-23 - fix MinGW compilation
46 //    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
47 //    1.09    - 2016-04-04 - back out 'truncation of last frame' fix from previous version
48 //    1.08    - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
49 //    1.07    - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
50 //    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
51 //                           some crash fixes when out of memory or with corrupt files
52 //                           fix some inappropriately signed shifts
53 //    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
54 //    1.04    - 2014-08-27 - fix missing const-correct case in API
55 //    1.03    - 2014-08-07 - warning fixes
56 //    1.02    - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
57 //    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
58 //    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
59 //                           (API change) report sample rate for decode-full-file funcs
60 //
61 // See end of file for full version history.
62 
63 
64 //////////////////////////////////////////////////////////////////////////////
65 //
66 //  HEADER BEGINS HERE
67 //
68 
69 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
70 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
71 
72 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
73 #define STB_VORBIS_NO_STDIO 1
74 #endif
75 
76 #ifndef STB_VORBIS_NO_STDIO
77 #include <stdio.h>
78 #endif
79 
80 #ifdef __cplusplus
81 extern "C" {
82 #endif
83 
84 ///////////   THREAD SAFETY
85 
86 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
87 // them from multiple threads at the same time. However, you can have multiple
88 // stb_vorbis* handles and decode from them independently in multiple thrads.
89 
90 
91 ///////////   MEMORY ALLOCATION
92 
93 // normally stb_vorbis uses malloc() to allocate memory at startup,
94 // and alloca() to allocate temporary memory during a frame on the
95 // stack. (Memory consumption will depend on the amount of setup
96 // data in the file and how you set the compile flags for speed
97 // vs. size. In my test files the maximal-size usage is ~150KB.)
98 //
99 // You can modify the wrapper functions in the source (setup_malloc,
100 // setup_temp_malloc, temp_malloc) to change this behavior, or you
101 // can use a simpler allocation model: you pass in a buffer from
102 // which stb_vorbis will allocate _all_ its memory (including the
103 // temp memory). "open" may fail with a VORBIS_outofmem if you
104 // do not pass in enough data; there is no way to determine how
105 // much you do need except to succeed (at which point you can
106 // query get_info to find the exact amount required. yes I know
107 // this is lame).
108 //
109 // If you pass in a non-NULL buffer of the type below, allocation
110 // will occur from it as described above. Otherwise just pass NULL
111 // to use malloc()/alloca()
112 
113 typedef struct
114 {
115    char *alloc_buffer;
116    int   alloc_buffer_length_in_bytes;
117 } stb_vorbis_alloc;
118 
119 
120 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
121 
122 typedef struct stb_vorbis stb_vorbis;
123 
124 typedef struct
125 {
126    unsigned int sample_rate;
127    int channels;
128 
129    unsigned int setup_memory_required;
130    unsigned int setup_temp_memory_required;
131    unsigned int temp_memory_required;
132 
133    int max_frame_size;
134 } stb_vorbis_info;
135 
136 typedef struct
137 {
138    char *vendor;
139 
140    int comment_list_length;
141    char **comment_list;
142 } stb_vorbis_comment;
143 
144 // get general information about the file
145 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
146 
147 // get ogg comments
148 extern stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f);
149 
150 // get the last error detected (clears it, too)
151 extern int stb_vorbis_get_error(stb_vorbis *f);
152 
153 // close an ogg vorbis file and free all memory in use
154 extern void stb_vorbis_close(stb_vorbis *f);
155 
156 // this function returns the offset (in samples) from the beginning of the
157 // file that will be returned by the next decode, if it is known, or -1
158 // otherwise. after a flush_pushdata() call, this may take a while before
159 // it becomes valid again.
160 // NOT WORKING YET after a seek with PULLDATA API
161 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
162 
163 // returns the current seek point within the file, or offset from the beginning
164 // of the memory buffer. In pushdata mode it returns 0.
165 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
166 
167 ///////////   PUSHDATA API
168 
169 #ifndef STB_VORBIS_NO_PUSHDATA_API
170 
171 // this API allows you to get blocks of data from any source and hand
172 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
173 // you how much it used, and you have to give it the rest next time;
174 // and stb_vorbis may not have enough data to work with and you will
175 // need to give it the same data again PLUS more. Note that the Vorbis
176 // specification does not bound the size of an individual frame.
177 
178 extern stb_vorbis *stb_vorbis_open_pushdata(
179          const unsigned char * datablock, int datablock_length_in_bytes,
180          int *datablock_memory_consumed_in_bytes,
181          int *error,
182          const stb_vorbis_alloc *alloc_buffer);
183 // create a vorbis decoder by passing in the initial data block containing
184 //    the ogg&vorbis headers (you don't need to do parse them, just provide
185 //    the first N bytes of the file--you're told if it's not enough, see below)
186 // on success, returns an stb_vorbis *, does not set error, returns the amount of
187 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
188 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
189 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
190 //       incomplete and you need to pass in a larger block from the start of the file
191 
192 extern int stb_vorbis_decode_frame_pushdata(
193          stb_vorbis *f,
194          const unsigned char *datablock, int datablock_length_in_bytes,
195          int *channels,             // place to write number of float * buffers
196          float ***output,           // place to write float ** array of float * buffers
197          int *samples               // place to write number of output samples
198      );
199 // decode a frame of audio sample data if possible from the passed-in data block
200 //
201 // return value: number of bytes we used from datablock
202 //
203 // possible cases:
204 //     0 bytes used, 0 samples output (need more data)
205 //     N bytes used, 0 samples output (resynching the stream, keep going)
206 //     N bytes used, M samples output (one frame of data)
207 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
208 // frame, because Vorbis always "discards" the first frame.
209 //
210 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
211 // instead only datablock_length_in_bytes-3 or less. This is because it wants
212 // to avoid missing parts of a page header if they cross a datablock boundary,
213 // without writing state-machiney code to record a partial detection.
214 //
215 // The number of channels returned are stored in *channels (which can be
216 // NULL--it is always the same as the number of channels reported by
217 // get_info). *output will contain an array of float* buffers, one per
218 // channel. In other words, (*output)[0][0] contains the first sample from
219 // the first channel, and (*output)[1][0] contains the first sample from
220 // the second channel.
221 
222 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
223 // inform stb_vorbis that your next datablock will not be contiguous with
224 // previous ones (e.g. you've seeked in the data); future attempts to decode
225 // frames will cause stb_vorbis to resynchronize (as noted above), and
226 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
227 // will begin decoding the _next_ frame.
228 //
229 // if you want to seek using pushdata, you need to seek in your file, then
230 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
231 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
232 // if you don't like the result, seek your file again and repeat.
233 #endif
234 
235 
236 //////////   PULLING INPUT API
237 
238 #ifndef STB_VORBIS_NO_PULLDATA_API
239 // This API assumes stb_vorbis is allowed to pull data from a source--
240 // either a block of memory containing the _entire_ vorbis stream, or a
241 // FILE * that you or it create, or possibly some other reading mechanism
242 // if you go modify the source to replace the FILE * case with some kind
243 // of callback to your code. (But if you don't support seeking, you may
244 // just want to go ahead and use pushdata.)
245 
246 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
247 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
248 #endif
249 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
250 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
251 #endif
252 // decode an entire file and output the data interleaved into a malloc()ed
253 // buffer stored in *output. The return value is the number of samples
254 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
255 // When you're done with it, just free() the pointer returned in *output.
256 
257 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
258                                   int *error, const stb_vorbis_alloc *alloc_buffer);
259 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
260 // this must be the entire stream!). on failure, returns NULL and sets *error
261 
262 #ifndef STB_VORBIS_NO_STDIO
263 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
264                                   int *error, const stb_vorbis_alloc *alloc_buffer);
265 // create an ogg vorbis decoder from a filename via fopen(). on failure,
266 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
267 
268 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
269                                   int *error, const stb_vorbis_alloc *alloc_buffer);
270 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
271 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
272 // note that stb_vorbis must "own" this stream; if you seek it in between
273 // calls to stb_vorbis, it will become confused. Moreover, if you attempt to
274 // perform stb_vorbis_seek_*() operations on this file, it will assume it
275 // owns the _entire_ rest of the file after the start point. Use the next
276 // function, stb_vorbis_open_file_section(), to limit it.
277 
278 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
279                 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
280 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
281 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
282 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
283 // this stream; if you seek it in between calls to stb_vorbis, it will become
284 // confused.
285 #endif
286 
287 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
288 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
289 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
290 // after calling seek_frame(), the next call to get_frame_*() will include
291 // the specified sample. after calling stb_vorbis_seek(), the next call to
292 // stb_vorbis_get_samples_* will start with the specified sample. If you
293 // do not need to seek to EXACTLY the target sample when using get_samples_*,
294 // you can also use seek_frame().
295 
296 extern int stb_vorbis_seek_start(stb_vorbis *f);
297 // this function is equivalent to stb_vorbis_seek(f,0)
298 
299 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
300 extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
301 // these functions return the total length of the vorbis stream
302 
303 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
304 // decode the next frame and return the number of samples. the number of
305 // channels returned are stored in *channels (which can be NULL--it is always
306 // the same as the number of channels reported by get_info). *output will
307 // contain an array of float* buffers, one per channel. These outputs will
308 // be overwritten on the next call to stb_vorbis_get_frame_*.
309 //
310 // You generally should not intermix calls to stb_vorbis_get_frame_*()
311 // and stb_vorbis_get_samples_*(), since the latter calls the former.
312 
313 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
314 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
315 extern int stb_vorbis_get_frame_short            (stb_vorbis *f, int num_c, short **buffer, int num_samples);
316 #endif
317 // decode the next frame and return the number of *samples* per channel.
318 // Note that for interleaved data, you pass in the number of shorts (the
319 // size of your array), but the return value is the number of samples per
320 // channel, not the total number of samples.
321 //
322 // The data is coerced to the number of channels you request according to the
323 // channel coercion rules (see below). You must pass in the size of your
324 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
325 // The maximum buffer size needed can be gotten from get_info(); however,
326 // the Vorbis I specification implies an absolute maximum of 4096 samples
327 // per channel.
328 
329 // Channel coercion rules:
330 //    Let M be the number of channels requested, and N the number of channels present,
331 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
332 //    and stereo R be the sum of all R and center channels (channel assignment from the
333 //    vorbis spec).
334 //        M    N       output
335 //        1    k      sum(Ck) for all k
336 //        2    *      stereo L, stereo R
337 //        k    l      k > l, the first l channels, then 0s
338 //        k    l      k <= l, the first k channels
339 //    Note that this is not _good_ surround etc. mixing at all! It's just so
340 //    you get something useful.
341 
342 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
343 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
344 // gets num_samples samples, not necessarily on a frame boundary--this requires
345 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
346 // Returns the number of samples stored per channel; it may be less than requested
347 // at the end of the file. If there are no more samples in the file, returns 0.
348 
349 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
350 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
351 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
352 #endif
353 // gets num_samples samples, not necessarily on a frame boundary--this requires
354 // buffering so you have to supply the buffers. Applies the coercion rules above
355 // to produce 'channels' channels. Returns the number of samples stored per channel;
356 // it may be less than requested at the end of the file. If there are no more
357 // samples in the file, returns 0.
358 
359 #endif
360 
361 ////////   ERROR CODES
362 
363 enum STBVorbisError
364 {
365    VORBIS__no_error,
366 
367    VORBIS_need_more_data=1,             // not a real error
368 
369    VORBIS_invalid_api_mixing,           // can't mix API modes
370    VORBIS_outofmem,                     // not enough memory
371    VORBIS_feature_not_supported,        // uses floor 0
372    VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
373    VORBIS_file_open_failure,            // fopen() failed
374    VORBIS_seek_without_length,          // can't seek in unknown-length file
375 
376    VORBIS_unexpected_eof=10,            // file is truncated?
377    VORBIS_seek_invalid,                 // seek past EOF
378 
379    // decoding errors (corrupt/invalid stream) -- you probably
380    // don't care about the exact details of these
381 
382    // vorbis errors:
383    VORBIS_invalid_setup=20,
384    VORBIS_invalid_stream,
385 
386    // ogg errors:
387    VORBIS_missing_capture_pattern=30,
388    VORBIS_invalid_stream_structure_version,
389    VORBIS_continued_packet_flag_invalid,
390    VORBIS_incorrect_stream_serial_number,
391    VORBIS_invalid_first_page,
392    VORBIS_bad_packet_type,
393    VORBIS_cant_find_last_page,
394    VORBIS_seek_failed,
395    VORBIS_ogg_skeleton_not_supported
396 };
397 
398 
399 #ifdef __cplusplus
400 }
401 #endif
402 
403 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
404 //
405 //  HEADER ENDS HERE
406 //
407 //////////////////////////////////////////////////////////////////////////////
408 
409 #ifndef STB_VORBIS_HEADER_ONLY
410 
411 // global configuration settings (e.g. set these in the project/makefile),
412 // or just set them in this file at the top (although ideally the first few
413 // should be visible when the header file is compiled too, although it's not
414 // crucial)
415 
416 // STB_VORBIS_NO_PUSHDATA_API
417 //     does not compile the code for the various stb_vorbis_*_pushdata()
418 //     functions
419 // #define STB_VORBIS_NO_PUSHDATA_API
420 
421 // STB_VORBIS_NO_PULLDATA_API
422 //     does not compile the code for the non-pushdata APIs
423 // #define STB_VORBIS_NO_PULLDATA_API
424 
425 // STB_VORBIS_NO_STDIO
426 //     does not compile the code for the APIs that use FILE *s internally
427 //     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
428 // #define STB_VORBIS_NO_STDIO
429 
430 // STB_VORBIS_NO_INTEGER_CONVERSION
431 //     does not compile the code for converting audio sample data from
432 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
433 // #define STB_VORBIS_NO_INTEGER_CONVERSION
434 
435 // STB_VORBIS_NO_FAST_SCALED_FLOAT
436 //      does not use a fast float-to-int trick to accelerate float-to-int on
437 //      most platforms which requires endianness be defined correctly.
438 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
439 
440 
441 // STB_VORBIS_MAX_CHANNELS [number]
442 //     globally define this to the maximum number of channels you need.
443 //     The spec does not put a restriction on channels except that
444 //     the count is stored in a byte, so 255 is the hard limit.
445 //     Reducing this saves about 16 bytes per value, so using 16 saves
446 //     (255-16)*16 or around 4KB. Plus anything other memory usage
447 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
448 //     6 (5.1 audio), or 2 (stereo only).
449 #ifndef STB_VORBIS_MAX_CHANNELS
450 #define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
451 #endif
452 
453 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
454 //     after a flush_pushdata(), stb_vorbis begins scanning for the
455 //     next valid page, without backtracking. when it finds something
456 //     that looks like a page, it streams through it and verifies its
457 //     CRC32. Should that validation fail, it keeps scanning. But it's
458 //     possible that _while_ streaming through to check the CRC32 of
459 //     one candidate page, it sees another candidate page. This #define
460 //     determines how many "overlapping" candidate pages it can search
461 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
462 //     garbage pages could be as big as 64KB, but probably average ~16KB.
463 //     So don't hose ourselves by scanning an apparent 64KB page and
464 //     missing a ton of real ones in the interim; so minimum of 2
465 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
466 #define STB_VORBIS_PUSHDATA_CRC_COUNT  4
467 #endif
468 
469 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
470 //     sets the log size of the huffman-acceleration table.  Maximum
471 //     supported value is 24. with larger numbers, more decodings are O(1),
472 //     but the table size is larger so worse cache missing, so you'll have
473 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
474 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
475 #define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
476 #endif
477 
478 // STB_VORBIS_FAST_BINARY_LENGTH [number]
479 //     sets the log size of the binary-search acceleration table. this
480 //     is used in similar fashion to the fast-huffman size to set initial
481 //     parameters for the binary search
482 
483 // STB_VORBIS_FAST_HUFFMAN_INT
484 //     The fast huffman tables are much more efficient if they can be
485 //     stored as 16-bit results instead of 32-bit results. This restricts
486 //     the codebooks to having only 65535 possible outcomes, though.
487 //     (At least, accelerated by the huffman table.)
488 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
489 #define STB_VORBIS_FAST_HUFFMAN_SHORT
490 #endif
491 
492 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
493 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
494 //     back on binary searching for the correct one. This requires storing
495 //     extra tables with the huffman codes in sorted order. Defining this
496 //     symbol trades off space for speed by forcing a linear search in the
497 //     non-fast case, except for "sparse" codebooks.
498 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
499 
500 // STB_VORBIS_DIVIDES_IN_RESIDUE
501 //     stb_vorbis precomputes the result of the scalar residue decoding
502 //     that would otherwise require a divide per chunk. you can trade off
503 //     space for time by defining this symbol.
504 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
505 
506 // STB_VORBIS_DIVIDES_IN_CODEBOOK
507 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
508 //     stored, or with all elements being chosen from a small range of values,
509 //     and all values possible in all elements. By default, stb_vorbis expands
510 //     this latter kind out to look like the former kind for ease of decoding,
511 //     because otherwise an integer divide-per-vector-element is required to
512 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
513 //     trade off storage for speed.
514 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
515 
516 #ifdef STB_VORBIS_CODEBOOK_SHORTS
517 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
518 #endif
519 
520 // STB_VORBIS_DIVIDE_TABLE
521 //     this replaces small integer divides in the floor decode loop with
522 //     table lookups. made less than 1% difference, so disabled by default.
523 
524 // STB_VORBIS_NO_INLINE_DECODE
525 //     disables the inlining of the scalar codebook fast-huffman decode.
526 //     might save a little codespace; useful for debugging
527 // #define STB_VORBIS_NO_INLINE_DECODE
528 
529 // STB_VORBIS_NO_DEFER_FLOOR
530 //     Normally we only decode the floor without synthesizing the actual
531 //     full curve. We can instead synthesize the curve immediately. This
532 //     requires more memory and is very likely slower, so I don't think
533 //     you'd ever want to do it except for debugging.
534 // #define STB_VORBIS_NO_DEFER_FLOOR
535 
536 
537 
538 
539 //////////////////////////////////////////////////////////////////////////////
540 
541 #ifdef STB_VORBIS_NO_PULLDATA_API
542    #define STB_VORBIS_NO_INTEGER_CONVERSION
543    #define STB_VORBIS_NO_STDIO
544 #endif
545 
546 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
547    #define STB_VORBIS_NO_STDIO 1
548 #endif
549 
550 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
551 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
552 
553    // only need endianness for fast-float-to-int, which we don't
554    // use for pushdata
555 
556    #ifndef STB_VORBIS_BIG_ENDIAN
557      #define STB_VORBIS_ENDIAN  0
558    #else
559      #define STB_VORBIS_ENDIAN  1
560    #endif
561 
562 #endif
563 #endif
564 
565 
566 #ifndef STB_VORBIS_NO_STDIO
567 #include <stdio.h>
568 #endif
569 
570 #ifndef STB_VORBIS_NO_CRT
571    #include <stdlib.h>
572    #include <string.h>
573    #include <assert.h>
574    #include <math.h>
575 
576    // find definition of alloca if it's not in stdlib.h:
577    #if defined(_MSC_VER) || defined(__MINGW32__)
578       #include <malloc.h>
579    #endif
580    #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
581       #include <alloca.h>
582    #endif
583 #else // STB_VORBIS_NO_CRT
584    #define NULL 0
585    #define malloc(s)   0
586    #define free(s)     ((void) 0)
587    #define realloc(s)  0
588 #endif // STB_VORBIS_NO_CRT
589 
590 #include <limits.h>
591 
592 #ifdef __MINGW32__
593    // eff you mingw:
594    //     "fixed":
595    //         http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
596    //     "no that broke the build, reverted, who cares about C":
597    //         http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
598    #ifdef __forceinline
599    #undef __forceinline
600    #endif
601    #define __forceinline
602    #define alloca __builtin_alloca
603 #elif !defined(_MSC_VER)
604    #if __GNUC__
605       #define __forceinline inline
606    #else
607       #define __forceinline
608    #endif
609 #endif
610 
611 #if STB_VORBIS_MAX_CHANNELS > 256
612 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
613 #endif
614 
615 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
616 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
617 #endif
618 
619 
620 #if 0
621 #include <crtdbg.h>
622 #define CHECK(f)   _CrtIsValidHeapPointer(f->channel_buffers[1])
623 #else
624 #define CHECK(f)   ((void) 0)
625 #endif
626 
627 #define MAX_BLOCKSIZE_LOG  13   // from specification
628 #define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
629 
630 
631 typedef unsigned char  uint8;
632 typedef   signed char   int8;
633 typedef unsigned short uint16;
634 typedef   signed short  int16;
635 typedef unsigned int   uint32;
636 typedef   signed int    int32;
637 
638 #ifndef TRUE
639 #define TRUE 1
640 #define FALSE 0
641 #endif
642 
643 typedef float codetype;
644 
645 // @NOTE
646 //
647 // Some arrays below are tagged "//varies", which means it's actually
648 // a variable-sized piece of data, but rather than malloc I assume it's
649 // small enough it's better to just allocate it all together with the
650 // main thing
651 //
652 // Most of the variables are specified with the smallest size I could pack
653 // them into. It might give better performance to make them all full-sized
654 // integers. It should be safe to freely rearrange the structures or change
655 // the sizes larger--nothing relies on silently truncating etc., nor the
656 // order of variables.
657 
658 #define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
659 #define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
660 
661 typedef struct
662 {
663    int dimensions, entries;
664    uint8 *codeword_lengths;
665    float  minimum_value;
666    float  delta_value;
667    uint8  value_bits;
668    uint8  lookup_type;
669    uint8  sequence_p;
670    uint8  sparse;
671    uint32 lookup_values;
672    codetype *multiplicands;
673    uint32 *codewords;
674    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
675     int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
676    #else
677     int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
678    #endif
679    uint32 *sorted_codewords;
680    int    *sorted_values;
681    int     sorted_entries;
682 } Codebook;
683 
684 typedef struct
685 {
686    uint8 order;
687    uint16 rate;
688    uint16 bark_map_size;
689    uint8 amplitude_bits;
690    uint8 amplitude_offset;
691    uint8 number_of_books;
692    uint8 book_list[16]; // varies
693 } Floor0;
694 
695 typedef struct
696 {
697    uint8 partitions;
698    uint8 partition_class_list[32]; // varies
699    uint8 class_dimensions[16]; // varies
700    uint8 class_subclasses[16]; // varies
701    uint8 class_masterbooks[16]; // varies
702    int16 subclass_books[16][8]; // varies
703    uint16 Xlist[31*8+2]; // varies
704    uint8 sorted_order[31*8+2];
705    uint8 neighbors[31*8+2][2];
706    uint8 floor1_multiplier;
707    uint8 rangebits;
708    int values;
709 } Floor1;
710 
711 typedef union
712 {
713    Floor0 floor0;
714    Floor1 floor1;
715 } Floor;
716 
717 typedef struct
718 {
719    uint32 begin, end;
720    uint32 part_size;
721    uint8 classifications;
722    uint8 classbook;
723    uint8 **classdata;
724    int16 (*residue_books)[8];
725 } Residue;
726 
727 typedef struct
728 {
729    uint8 magnitude;
730    uint8 angle;
731    uint8 mux;
732 } MappingChannel;
733 
734 typedef struct
735 {
736    uint16 coupling_steps;
737    MappingChannel *chan;
738    uint8  submaps;
739    uint8  submap_floor[15]; // varies
740    uint8  submap_residue[15]; // varies
741 } Mapping;
742 
743 typedef struct
744 {
745    uint8 blockflag;
746    uint8 mapping;
747    uint16 windowtype;
748    uint16 transformtype;
749 } Mode;
750 
751 typedef struct
752 {
753    uint32  goal_crc;    // expected crc if match
754    int     bytes_left;  // bytes left in packet
755    uint32  crc_so_far;  // running crc
756    int     bytes_done;  // bytes processed in _current_ chunk
757    uint32  sample_loc;  // granule pos encoded in page
758 } CRCscan;
759 
760 typedef struct
761 {
762    uint32 page_start, page_end;
763    uint32 last_decoded_sample;
764 } ProbedPage;
765 
766 struct stb_vorbis
767 {
768   // user-accessible info
769    unsigned int sample_rate;
770    int channels;
771 
772    unsigned int setup_memory_required;
773    unsigned int temp_memory_required;
774    unsigned int setup_temp_memory_required;
775 
776    char *vendor;
777    int comment_list_length;
778    char **comment_list;
779 
780   // input config
781 #ifndef STB_VORBIS_NO_STDIO
782    FILE *f;
783    uint32 f_start;
784    int close_on_free;
785 #endif
786 
787    uint8 *stream;
788    uint8 *stream_start;
789    uint8 *stream_end;
790 
791    uint32 stream_len;
792 
793    uint8  push_mode;
794 
795    // the page to seek to when seeking to start, may be zero
796    uint32 first_audio_page_offset;
797 
798    // p_first is the page on which the first audio packet ends
799    // (but not necessarily the page on which it starts)
800    ProbedPage p_first, p_last;
801 
802   // memory management
803    stb_vorbis_alloc alloc;
804    int setup_offset;
805    int temp_offset;
806 
807   // run-time results
808    int eof;
809    enum STBVorbisError error;
810 
811   // user-useful data
812 
813   // header info
814    int blocksize[2];
815    int blocksize_0, blocksize_1;
816    int codebook_count;
817    Codebook *codebooks;
818    int floor_count;
819    uint16 floor_types[64]; // varies
820    Floor *floor_config;
821    int residue_count;
822    uint16 residue_types[64]; // varies
823    Residue *residue_config;
824    int mapping_count;
825    Mapping *mapping;
826    int mode_count;
827    Mode mode_config[64];  // varies
828 
829    uint32 total_samples;
830 
831   // decode buffer
832    float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
833    float *outputs        [STB_VORBIS_MAX_CHANNELS];
834 
835    float *previous_window[STB_VORBIS_MAX_CHANNELS];
836    int previous_length;
837 
838    #ifndef STB_VORBIS_NO_DEFER_FLOOR
839    int16 *finalY[STB_VORBIS_MAX_CHANNELS];
840    #else
841    float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
842    #endif
843 
844    uint32 current_loc; // sample location of next frame to decode
845    int    current_loc_valid;
846 
847   // per-blocksize precomputed data
848 
849    // twiddle factors
850    float *A[2],*B[2],*C[2];
851    float *window[2];
852    uint16 *bit_reverse[2];
853 
854   // current page/packet/segment streaming info
855    uint32 serial; // stream serial number for verification
856    int last_page;
857    int segment_count;
858    uint8 segments[255];
859    uint8 page_flag;
860    uint8 bytes_in_seg;
861    uint8 first_decode;
862    int next_seg;
863    int last_seg;  // flag that we're on the last segment
864    int last_seg_which; // what was the segment number of the last seg?
865    uint32 acc;
866    int valid_bits;
867    int packet_bytes;
868    int end_seg_with_known_loc;
869    uint32 known_loc_for_packet;
870    int discard_samples_deferred;
871    uint32 samples_output;
872 
873   // push mode scanning
874    int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
875 #ifndef STB_VORBIS_NO_PUSHDATA_API
876    CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
877 #endif
878 
879   // sample-access
880    int channel_buffer_start;
881    int channel_buffer_end;
882 };
883 
884 #if defined(STB_VORBIS_NO_PUSHDATA_API)
885    #define IS_PUSH_MODE(f)   FALSE
886 #elif defined(STB_VORBIS_NO_PULLDATA_API)
887    #define IS_PUSH_MODE(f)   TRUE
888 #else
889    #define IS_PUSH_MODE(f)   ((f)->push_mode)
890 #endif
891 
892 typedef struct stb_vorbis vorb;
893 
error(vorb * f,enum STBVorbisError e)894 static int error(vorb *f, enum STBVorbisError e)
895 {
896    f->error = e;
897    if (!f->eof && e != VORBIS_need_more_data) {
898       f->error=e; // breakpoint for debugging
899    }
900    return 0;
901 }
902 
903 
904 // these functions are used for allocating temporary memory
905 // while decoding. if you can afford the stack space, use
906 // alloca(); otherwise, provide a temp buffer and it will
907 // allocate out of those.
908 
909 #define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
910 
911 #define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
912 #define temp_free(f,p)                  (void)0
913 #define temp_alloc_save(f)              ((f)->temp_offset)
914 #define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
915 
916 #define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
917 
918 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
make_block_array(void * mem,int count,int size)919 static void *make_block_array(void *mem, int count, int size)
920 {
921    int i;
922    void ** p = (void **) mem;
923    char *q = (char *) (p + count);
924    for (i=0; i < count; ++i) {
925       p[i] = q;
926       q += size;
927    }
928    return p;
929 }
930 
setup_malloc(vorb * f,int sz)931 static void *setup_malloc(vorb *f, int sz)
932 {
933    sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
934    f->setup_memory_required += sz;
935    if (f->alloc.alloc_buffer) {
936       void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
937       if (f->setup_offset + sz > f->temp_offset) return NULL;
938       f->setup_offset += sz;
939       return p;
940    }
941    return sz ? malloc(sz) : NULL;
942 }
943 
setup_free(vorb * f,void * p)944 static void setup_free(vorb *f, void *p)
945 {
946    if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
947    free(p);
948 }
949 
setup_temp_malloc(vorb * f,int sz)950 static void *setup_temp_malloc(vorb *f, int sz)
951 {
952    sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
953    if (f->alloc.alloc_buffer) {
954       if (f->temp_offset - sz < f->setup_offset) return NULL;
955       f->temp_offset -= sz;
956       return (char *) f->alloc.alloc_buffer + f->temp_offset;
957    }
958    return malloc(sz);
959 }
960 
setup_temp_free(vorb * f,void * p,int sz)961 static void setup_temp_free(vorb *f, void *p, int sz)
962 {
963    if (f->alloc.alloc_buffer) {
964       f->temp_offset += (sz+3)&~3;
965       return;
966    }
967    free(p);
968 }
969 
970 #define CRC32_POLY    0x04c11db7   // from spec
971 
972 static uint32 crc_table[256];
crc32_init(void)973 static void crc32_init(void)
974 {
975    int i,j;
976    uint32 s;
977    for(i=0; i < 256; i++) {
978       for (s=(uint32) i << 24, j=0; j < 8; ++j)
979          s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
980       crc_table[i] = s;
981    }
982 }
983 
crc32_update(uint32 crc,uint8 byte)984 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
985 {
986    return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
987 }
988 
989 
990 // used in setup, and for huffman that doesn't go fast path
bit_reverse(unsigned int n)991 static unsigned int bit_reverse(unsigned int n)
992 {
993   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
994   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
995   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
996   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
997   return (n >> 16) | (n << 16);
998 }
999 
square(float x)1000 static float square(float x)
1001 {
1002    return x*x;
1003 }
1004 
1005 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
1006 // as required by the specification. fast(?) implementation from stb.h
1007 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
ilog(int32 n)1008 static int ilog(int32 n)
1009 {
1010    static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
1011 
1012    if (n < 0) return 0; // signed n returns 0
1013 
1014    // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
1015    if (n < (1 << 14))
1016         if (n < (1 <<  4))            return  0 + log2_4[n      ];
1017         else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
1018              else                     return 10 + log2_4[n >> 10];
1019    else if (n < (1 << 24))
1020              if (n < (1 << 19))       return 15 + log2_4[n >> 15];
1021              else                     return 20 + log2_4[n >> 20];
1022         else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
1023              else                     return 30 + log2_4[n >> 30];
1024 }
1025 
1026 #ifndef M_PI
1027   #define M_PI  3.14159265358979323846264f  // from CRC
1028 #endif
1029 
1030 // code length assigned to a value with no huffman encoding
1031 #define NO_CODE   255
1032 
1033 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1034 //
1035 // these functions are only called at setup, and only a few times
1036 // per file
1037 
float32_unpack(uint32 x)1038 static float float32_unpack(uint32 x)
1039 {
1040    // from the specification
1041    uint32 mantissa = x & 0x1fffff;
1042    uint32 sign = x & 0x80000000;
1043    uint32 exp = (x & 0x7fe00000) >> 21;
1044    double res = sign ? -(double)mantissa : (double)mantissa;
1045    return (float) ldexp((float)res, exp-788);
1046 }
1047 
1048 
1049 // zlib & jpeg huffman tables assume that the output symbols
1050 // can either be arbitrarily arranged, or have monotonically
1051 // increasing frequencies--they rely on the lengths being sorted;
1052 // this makes for a very simple generation algorithm.
1053 // vorbis allows a huffman table with non-sorted lengths. This
1054 // requires a more sophisticated construction, since symbols in
1055 // order do not map to huffman codes "in order".
add_entry(Codebook * c,uint32 huff_code,int symbol,int count,int len,uint32 * values)1056 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1057 {
1058    if (!c->sparse) {
1059       c->codewords      [symbol] = huff_code;
1060    } else {
1061       c->codewords       [count] = huff_code;
1062       c->codeword_lengths[count] = len;
1063       values             [count] = symbol;
1064    }
1065 }
1066 
compute_codewords(Codebook * c,uint8 * len,int n,uint32 * values)1067 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1068 {
1069    int i,k,m=0;
1070    uint32 available[32];
1071 
1072    memset(available, 0, sizeof(available));
1073    // find the first entry
1074    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1075    if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1076    // add to the list
1077    add_entry(c, 0, k, m++, len[k], values);
1078    // add all available leaves
1079    for (i=1; i <= len[k]; ++i)
1080       available[i] = 1U << (32-i);
1081    // note that the above code treats the first case specially,
1082    // but it's really the same as the following code, so they
1083    // could probably be combined (except the initial code is 0,
1084    // and I use 0 in available[] to mean 'empty')
1085    for (i=k+1; i < n; ++i) {
1086       uint32 res;
1087       int z = len[i], y;
1088       if (z == NO_CODE) continue;
1089       // find lowest available leaf (should always be earliest,
1090       // which is what the specification calls for)
1091       // note that this property, and the fact we can never have
1092       // more than one free leaf at a given level, isn't totally
1093       // trivial to prove, but it seems true and the assert never
1094       // fires, so!
1095       while (z > 0 && !available[z]) --z;
1096       if (z == 0) { return FALSE; }
1097       res = available[z];
1098       assert(z >= 0 && z < 32);
1099       available[z] = 0;
1100       add_entry(c, bit_reverse(res), i, m++, len[i], values);
1101       // propagate availability up the tree
1102       if (z != len[i]) {
1103          assert(len[i] >= 0 && len[i] < 32);
1104          for (y=len[i]; y > z; --y) {
1105             assert(available[y] == 0);
1106             available[y] = res + (1 << (32-y));
1107          }
1108       }
1109    }
1110    return TRUE;
1111 }
1112 
1113 // accelerated huffman table allows fast O(1) match of all symbols
1114 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
compute_accelerated_huffman(Codebook * c)1115 static void compute_accelerated_huffman(Codebook *c)
1116 {
1117    int i, len;
1118    for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1119       c->fast_huffman[i] = -1;
1120 
1121    len = c->sparse ? c->sorted_entries : c->entries;
1122    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1123    if (len > 32767) len = 32767; // largest possible value we can encode!
1124    #endif
1125    for (i=0; i < len; ++i) {
1126       if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1127          uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1128          // set table entries for all bit combinations in the higher bits
1129          while (z < FAST_HUFFMAN_TABLE_SIZE) {
1130              c->fast_huffman[z] = i;
1131              z += 1 << c->codeword_lengths[i];
1132          }
1133       }
1134    }
1135 }
1136 
1137 #ifdef _MSC_VER
1138 #define STBV_CDECL __cdecl
1139 #else
1140 #define STBV_CDECL
1141 #endif
1142 
uint32_compare(const void * p,const void * q)1143 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1144 {
1145    uint32 x = * (uint32 *) p;
1146    uint32 y = * (uint32 *) q;
1147    return x < y ? -1 : x > y;
1148 }
1149 
include_in_sort(Codebook * c,uint8 len)1150 static int include_in_sort(Codebook *c, uint8 len)
1151 {
1152    if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1153    if (len == NO_CODE) return FALSE;
1154    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1155    return FALSE;
1156 }
1157 
1158 // if the fast table above doesn't work, we want to binary
1159 // search them... need to reverse the bits
compute_sorted_huffman(Codebook * c,uint8 * lengths,uint32 * values)1160 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1161 {
1162    int i, len;
1163    // build a list of all the entries
1164    // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1165    // this is kind of a frivolous optimization--I don't see any performance improvement,
1166    // but it's like 4 extra lines of code, so.
1167    if (!c->sparse) {
1168       int k = 0;
1169       for (i=0; i < c->entries; ++i)
1170          if (include_in_sort(c, lengths[i]))
1171             c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1172       assert(k == c->sorted_entries);
1173    } else {
1174       for (i=0; i < c->sorted_entries; ++i)
1175          c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1176    }
1177 
1178    qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1179    c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1180 
1181    len = c->sparse ? c->sorted_entries : c->entries;
1182    // now we need to indicate how they correspond; we could either
1183    //   #1: sort a different data structure that says who they correspond to
1184    //   #2: for each sorted entry, search the original list to find who corresponds
1185    //   #3: for each original entry, find the sorted entry
1186    // #1 requires extra storage, #2 is slow, #3 can use binary search!
1187    for (i=0; i < len; ++i) {
1188       int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1189       if (include_in_sort(c,huff_len)) {
1190          uint32 code = bit_reverse(c->codewords[i]);
1191          int x=0, n=c->sorted_entries;
1192          while (n > 1) {
1193             // invariant: sc[x] <= code < sc[x+n]
1194             int m = x + (n >> 1);
1195             if (c->sorted_codewords[m] <= code) {
1196                x = m;
1197                n -= (n>>1);
1198             } else {
1199                n >>= 1;
1200             }
1201          }
1202          assert(c->sorted_codewords[x] == code);
1203          if (c->sparse) {
1204             c->sorted_values[x] = values[i];
1205             c->codeword_lengths[x] = huff_len;
1206          } else {
1207             c->sorted_values[x] = i;
1208          }
1209       }
1210    }
1211 }
1212 
1213 // only run while parsing the header (3 times)
vorbis_validate(uint8 * data)1214 static int vorbis_validate(uint8 *data)
1215 {
1216    static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1217    return memcmp(data, vorbis, 6) == 0;
1218 }
1219 
1220 // called from setup only, once per code book
1221 // (formula implied by specification)
lookup1_values(int entries,int dim)1222 static int lookup1_values(int entries, int dim)
1223 {
1224    int r = (int) floor(exp((float) log((float) entries) / dim));
1225    if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
1226       ++r;                                              // floor() to avoid _ftol() when non-CRT
1227    if (pow((float) r+1, dim) <= entries)
1228       return -1;
1229    if ((int) floor(pow((float) r, dim)) > entries)
1230       return -1;
1231    return r;
1232 }
1233 
1234 // called twice per file
compute_twiddle_factors(int n,float * A,float * B,float * C)1235 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1236 {
1237    int n4 = n >> 2, n8 = n >> 3;
1238    int k,k2;
1239 
1240    for (k=k2=0; k < n4; ++k,k2+=2) {
1241       A[k2  ] = (float)  cos(4*k*M_PI/n);
1242       A[k2+1] = (float) -sin(4*k*M_PI/n);
1243       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
1244       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
1245    }
1246    for (k=k2=0; k < n8; ++k,k2+=2) {
1247       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
1248       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1249    }
1250 }
1251 
compute_window(int n,float * window)1252 static void compute_window(int n, float *window)
1253 {
1254    int n2 = n >> 1, i;
1255    for (i=0; i < n2; ++i)
1256       window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1257 }
1258 
compute_bitreverse(int n,uint16 * rev)1259 static void compute_bitreverse(int n, uint16 *rev)
1260 {
1261    int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1262    int i, n8 = n >> 3;
1263    for (i=0; i < n8; ++i)
1264       rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1265 }
1266 
init_blocksize(vorb * f,int b,int n)1267 static int init_blocksize(vorb *f, int b, int n)
1268 {
1269    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1270    f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1271    f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1272    f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1273    if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1274    compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1275    f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1276    if (!f->window[b]) return error(f, VORBIS_outofmem);
1277    compute_window(n, f->window[b]);
1278    f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1279    if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1280    compute_bitreverse(n, f->bit_reverse[b]);
1281    return TRUE;
1282 }
1283 
neighbors(uint16 * x,int n,int * plow,int * phigh)1284 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1285 {
1286    int low = -1;
1287    int high = 65536;
1288    int i;
1289    for (i=0; i < n; ++i) {
1290       if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
1291       if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1292    }
1293 }
1294 
1295 // this has been repurposed so y is now the original index instead of y
1296 typedef struct
1297 {
1298    uint16 x,id;
1299 } stbv__floor_ordering;
1300 
point_compare(const void * p,const void * q)1301 static int STBV_CDECL point_compare(const void *p, const void *q)
1302 {
1303    stbv__floor_ordering *a = (stbv__floor_ordering *) p;
1304    stbv__floor_ordering *b = (stbv__floor_ordering *) q;
1305    return a->x < b->x ? -1 : a->x > b->x;
1306 }
1307 
1308 //
1309 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1310 
1311 
1312 #if defined(STB_VORBIS_NO_STDIO)
1313    #define USE_MEMORY(z)    TRUE
1314 #else
1315    #define USE_MEMORY(z)    ((z)->stream)
1316 #endif
1317 
get8(vorb * z)1318 static uint8 get8(vorb *z)
1319 {
1320    if (USE_MEMORY(z)) {
1321       if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1322       return *z->stream++;
1323    }
1324 
1325    #ifndef STB_VORBIS_NO_STDIO
1326    {
1327    int c = fgetc(z->f);
1328    if (c == EOF) { z->eof = TRUE; return 0; }
1329    return c;
1330    }
1331    #endif
1332 }
1333 
get32(vorb * f)1334 static uint32 get32(vorb *f)
1335 {
1336    uint32 x;
1337    x = get8(f);
1338    x += get8(f) << 8;
1339    x += get8(f) << 16;
1340    x += (uint32) get8(f) << 24;
1341    return x;
1342 }
1343 
getn(vorb * z,uint8 * data,int n)1344 static int getn(vorb *z, uint8 *data, int n)
1345 {
1346    if (USE_MEMORY(z)) {
1347       if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1348       memcpy(data, z->stream, n);
1349       z->stream += n;
1350       return 1;
1351    }
1352 
1353    #ifndef STB_VORBIS_NO_STDIO
1354    if (fread(data, n, 1, z->f) == 1)
1355       return 1;
1356    else {
1357       z->eof = 1;
1358       return 0;
1359    }
1360    #endif
1361 }
1362 
skip(vorb * z,int n)1363 static void skip(vorb *z, int n)
1364 {
1365    if (USE_MEMORY(z)) {
1366       z->stream += n;
1367       if (z->stream >= z->stream_end) z->eof = 1;
1368       return;
1369    }
1370    #ifndef STB_VORBIS_NO_STDIO
1371    {
1372       long x = ftell(z->f);
1373       fseek(z->f, x+n, SEEK_SET);
1374    }
1375    #endif
1376 }
1377 
set_file_offset(stb_vorbis * f,unsigned int loc)1378 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1379 {
1380    #ifndef STB_VORBIS_NO_PUSHDATA_API
1381    if (f->push_mode) return 0;
1382    #endif
1383    f->eof = 0;
1384    if (USE_MEMORY(f)) {
1385       if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1386          f->stream = f->stream_end;
1387          f->eof = 1;
1388          return 0;
1389       } else {
1390          f->stream = f->stream_start + loc;
1391          return 1;
1392       }
1393    }
1394    #ifndef STB_VORBIS_NO_STDIO
1395    if (loc + f->f_start < loc || loc >= 0x80000000) {
1396       loc = 0x7fffffff;
1397       f->eof = 1;
1398    } else {
1399       loc += f->f_start;
1400    }
1401    if (!fseek(f->f, loc, SEEK_SET))
1402       return 1;
1403    f->eof = 1;
1404    fseek(f->f, f->f_start, SEEK_END);
1405    return 0;
1406    #endif
1407 }
1408 
1409 
1410 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1411 
capture_pattern(vorb * f)1412 static int capture_pattern(vorb *f)
1413 {
1414    if (0x4f != get8(f)) return FALSE;
1415    if (0x67 != get8(f)) return FALSE;
1416    if (0x67 != get8(f)) return FALSE;
1417    if (0x53 != get8(f)) return FALSE;
1418    return TRUE;
1419 }
1420 
1421 #define PAGEFLAG_continued_packet   1
1422 #define PAGEFLAG_first_page         2
1423 #define PAGEFLAG_last_page          4
1424 
start_page_no_capturepattern(vorb * f)1425 static int start_page_no_capturepattern(vorb *f)
1426 {
1427    uint32 loc0,loc1,n;
1428    if (f->first_decode && !IS_PUSH_MODE(f)) {
1429       f->p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
1430    }
1431    // stream structure version
1432    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1433    // header flag
1434    f->page_flag = get8(f);
1435    // absolute granule position
1436    loc0 = get32(f);
1437    loc1 = get32(f);
1438    // @TODO: validate loc0,loc1 as valid positions?
1439    // stream serial number -- vorbis doesn't interleave, so discard
1440    get32(f);
1441    //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1442    // page sequence number
1443    n = get32(f);
1444    f->last_page = n;
1445    // CRC32
1446    get32(f);
1447    // page_segments
1448    f->segment_count = get8(f);
1449    if (!getn(f, f->segments, f->segment_count))
1450       return error(f, VORBIS_unexpected_eof);
1451    // assume we _don't_ know any the sample position of any segments
1452    f->end_seg_with_known_loc = -2;
1453    if (loc0 != ~0U || loc1 != ~0U) {
1454       int i;
1455       // determine which packet is the last one that will complete
1456       for (i=f->segment_count-1; i >= 0; --i)
1457          if (f->segments[i] < 255)
1458             break;
1459       // 'i' is now the index of the _last_ segment of a packet that ends
1460       if (i >= 0) {
1461          f->end_seg_with_known_loc = i;
1462          f->known_loc_for_packet   = loc0;
1463       }
1464    }
1465    if (f->first_decode) {
1466       int i,len;
1467       len = 0;
1468       for (i=0; i < f->segment_count; ++i)
1469          len += f->segments[i];
1470       len += 27 + f->segment_count;
1471       f->p_first.page_end = f->p_first.page_start + len;
1472       f->p_first.last_decoded_sample = loc0;
1473    }
1474    f->next_seg = 0;
1475    return TRUE;
1476 }
1477 
start_page(vorb * f)1478 static int start_page(vorb *f)
1479 {
1480    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1481    return start_page_no_capturepattern(f);
1482 }
1483 
start_packet(vorb * f)1484 static int start_packet(vorb *f)
1485 {
1486    while (f->next_seg == -1) {
1487       if (!start_page(f)) return FALSE;
1488       if (f->page_flag & PAGEFLAG_continued_packet)
1489          return error(f, VORBIS_continued_packet_flag_invalid);
1490    }
1491    f->last_seg = FALSE;
1492    f->valid_bits = 0;
1493    f->packet_bytes = 0;
1494    f->bytes_in_seg = 0;
1495    // f->next_seg is now valid
1496    return TRUE;
1497 }
1498 
maybe_start_packet(vorb * f)1499 static int maybe_start_packet(vorb *f)
1500 {
1501    if (f->next_seg == -1) {
1502       int x = get8(f);
1503       if (f->eof) return FALSE; // EOF at page boundary is not an error!
1504       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
1505       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1506       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1507       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1508       if (!start_page_no_capturepattern(f)) return FALSE;
1509       if (f->page_flag & PAGEFLAG_continued_packet) {
1510          // set up enough state that we can read this packet if we want,
1511          // e.g. during recovery
1512          f->last_seg = FALSE;
1513          f->bytes_in_seg = 0;
1514          return error(f, VORBIS_continued_packet_flag_invalid);
1515       }
1516    }
1517    return start_packet(f);
1518 }
1519 
next_segment(vorb * f)1520 static int next_segment(vorb *f)
1521 {
1522    int len;
1523    if (f->last_seg) return 0;
1524    if (f->next_seg == -1) {
1525       f->last_seg_which = f->segment_count-1; // in case start_page fails
1526       if (!start_page(f)) { f->last_seg = 1; return 0; }
1527       if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1528    }
1529    len = f->segments[f->next_seg++];
1530    if (len < 255) {
1531       f->last_seg = TRUE;
1532       f->last_seg_which = f->next_seg-1;
1533    }
1534    if (f->next_seg >= f->segment_count)
1535       f->next_seg = -1;
1536    assert(f->bytes_in_seg == 0);
1537    f->bytes_in_seg = len;
1538    return len;
1539 }
1540 
1541 #define EOP    (-1)
1542 #define INVALID_BITS  (-1)
1543 
get8_packet_raw(vorb * f)1544 static int get8_packet_raw(vorb *f)
1545 {
1546    if (!f->bytes_in_seg) {  // CLANG!
1547       if (f->last_seg) return EOP;
1548       else if (!next_segment(f)) return EOP;
1549    }
1550    assert(f->bytes_in_seg > 0);
1551    --f->bytes_in_seg;
1552    ++f->packet_bytes;
1553    return get8(f);
1554 }
1555 
get8_packet(vorb * f)1556 static int get8_packet(vorb *f)
1557 {
1558    int x = get8_packet_raw(f);
1559    f->valid_bits = 0;
1560    return x;
1561 }
1562 
get32_packet(vorb * f)1563 static int get32_packet(vorb *f)
1564 {
1565    uint32 x;
1566    x = get8_packet(f);
1567    x += get8_packet(f) << 8;
1568    x += get8_packet(f) << 16;
1569    x += (uint32) get8_packet(f) << 24;
1570    return x;
1571 }
1572 
flush_packet(vorb * f)1573 static void flush_packet(vorb *f)
1574 {
1575    while (get8_packet_raw(f) != EOP);
1576 }
1577 
1578 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1579 // as the huffman decoder?
get_bits(vorb * f,int n)1580 static uint32 get_bits(vorb *f, int n)
1581 {
1582    uint32 z;
1583 
1584    if (f->valid_bits < 0) return 0;
1585    if (f->valid_bits < n) {
1586       if (n > 24) {
1587          // the accumulator technique below would not work correctly in this case
1588          z = get_bits(f, 24);
1589          z += get_bits(f, n-24) << 24;
1590          return z;
1591       }
1592       if (f->valid_bits == 0) f->acc = 0;
1593       while (f->valid_bits < n) {
1594          int z = get8_packet_raw(f);
1595          if (z == EOP) {
1596             f->valid_bits = INVALID_BITS;
1597             return 0;
1598          }
1599          f->acc += z << f->valid_bits;
1600          f->valid_bits += 8;
1601       }
1602    }
1603    if (f->valid_bits < 0) return 0;
1604    z = f->acc & ((1 << n)-1);
1605    f->acc >>= n;
1606    f->valid_bits -= n;
1607    return z;
1608 }
1609 
1610 // @OPTIMIZE: primary accumulator for huffman
1611 // expand the buffer to as many bits as possible without reading off end of packet
1612 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1613 // e.g. cache them locally and decode locally
prep_huffman(vorb * f)1614 static __forceinline void prep_huffman(vorb *f)
1615 {
1616    if (f->valid_bits <= 24) {
1617       if (f->valid_bits == 0) f->acc = 0;
1618       do {
1619          int z;
1620          if (f->last_seg && !f->bytes_in_seg) return;
1621          z = get8_packet_raw(f);
1622          if (z == EOP) return;
1623          f->acc += (unsigned) z << f->valid_bits;
1624          f->valid_bits += 8;
1625       } while (f->valid_bits <= 24);
1626    }
1627 }
1628 
1629 enum
1630 {
1631    VORBIS_packet_id = 1,
1632    VORBIS_packet_comment = 3,
1633    VORBIS_packet_setup = 5
1634 };
1635 
codebook_decode_scalar_raw(vorb * f,Codebook * c)1636 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1637 {
1638    int i;
1639    prep_huffman(f);
1640 
1641    if (c->codewords == NULL && c->sorted_codewords == NULL)
1642       return -1;
1643 
1644    // cases to use binary search: sorted_codewords && !c->codewords
1645    //                             sorted_codewords && c->entries > 8
1646    if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1647       // binary search
1648       uint32 code = bit_reverse(f->acc);
1649       int x=0, n=c->sorted_entries, len;
1650 
1651       while (n > 1) {
1652          // invariant: sc[x] <= code < sc[x+n]
1653          int m = x + (n >> 1);
1654          if (c->sorted_codewords[m] <= code) {
1655             x = m;
1656             n -= (n>>1);
1657          } else {
1658             n >>= 1;
1659          }
1660       }
1661       // x is now the sorted index
1662       if (!c->sparse) x = c->sorted_values[x];
1663       // x is now sorted index if sparse, or symbol otherwise
1664       len = c->codeword_lengths[x];
1665       if (f->valid_bits >= len) {
1666          f->acc >>= len;
1667          f->valid_bits -= len;
1668          return x;
1669       }
1670 
1671       f->valid_bits = 0;
1672       return -1;
1673    }
1674 
1675    // if small, linear search
1676    assert(!c->sparse);
1677    for (i=0; i < c->entries; ++i) {
1678       if (c->codeword_lengths[i] == NO_CODE) continue;
1679       if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1680          if (f->valid_bits >= c->codeword_lengths[i]) {
1681             f->acc >>= c->codeword_lengths[i];
1682             f->valid_bits -= c->codeword_lengths[i];
1683             return i;
1684          }
1685          f->valid_bits = 0;
1686          return -1;
1687       }
1688    }
1689 
1690    error(f, VORBIS_invalid_stream);
1691    f->valid_bits = 0;
1692    return -1;
1693 }
1694 
1695 #ifndef STB_VORBIS_NO_INLINE_DECODE
1696 
1697 #define DECODE_RAW(var, f,c)                                  \
1698    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
1699       prep_huffman(f);                                        \
1700    var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
1701    var = c->fast_huffman[var];                                \
1702    if (var >= 0) {                                            \
1703       int n = c->codeword_lengths[var];                       \
1704       f->acc >>= n;                                           \
1705       f->valid_bits -= n;                                     \
1706       if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1707    } else {                                                   \
1708       var = codebook_decode_scalar_raw(f,c);                  \
1709    }
1710 
1711 #else
1712 
codebook_decode_scalar(vorb * f,Codebook * c)1713 static int codebook_decode_scalar(vorb *f, Codebook *c)
1714 {
1715    int i;
1716    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1717       prep_huffman(f);
1718    // fast huffman table lookup
1719    i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1720    i = c->fast_huffman[i];
1721    if (i >= 0) {
1722       f->acc >>= c->codeword_lengths[i];
1723       f->valid_bits -= c->codeword_lengths[i];
1724       if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1725       return i;
1726    }
1727    return codebook_decode_scalar_raw(f,c);
1728 }
1729 
1730 #define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
1731 
1732 #endif
1733 
1734 #define DECODE(var,f,c)                                       \
1735    DECODE_RAW(var,f,c)                                        \
1736    if (c->sparse) var = c->sorted_values[var];
1737 
1738 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1739   #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
1740 #else
1741   #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
1742 #endif
1743 
1744 
1745 
1746 
1747 
1748 
1749 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1750 // where we avoid one addition
1751 #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
1752 #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
1753 #define CODEBOOK_ELEMENT_BASE(c)         (0)
1754 
codebook_decode_start(vorb * f,Codebook * c)1755 static int codebook_decode_start(vorb *f, Codebook *c)
1756 {
1757    int z = -1;
1758 
1759    // type 0 is only legal in a scalar context
1760    if (c->lookup_type == 0)
1761       error(f, VORBIS_invalid_stream);
1762    else {
1763       DECODE_VQ(z,f,c);
1764       if (c->sparse) assert(z < c->sorted_entries);
1765       if (z < 0) {  // check for EOP
1766          if (!f->bytes_in_seg)
1767             if (f->last_seg)
1768                return z;
1769          error(f, VORBIS_invalid_stream);
1770       }
1771    }
1772    return z;
1773 }
1774 
codebook_decode(vorb * f,Codebook * c,float * output,int len)1775 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1776 {
1777    int i,z = codebook_decode_start(f,c);
1778    if (z < 0) return FALSE;
1779    if (len > c->dimensions) len = c->dimensions;
1780 
1781 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1782    if (c->lookup_type == 1) {
1783       float last = CODEBOOK_ELEMENT_BASE(c);
1784       int div = 1;
1785       for (i=0; i < len; ++i) {
1786          int off = (z / div) % c->lookup_values;
1787          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1788          output[i] += val;
1789          if (c->sequence_p) last = val + c->minimum_value;
1790          div *= c->lookup_values;
1791       }
1792       return TRUE;
1793    }
1794 #endif
1795 
1796    z *= c->dimensions;
1797    if (c->sequence_p) {
1798       float last = CODEBOOK_ELEMENT_BASE(c);
1799       for (i=0; i < len; ++i) {
1800          float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1801          output[i] += val;
1802          last = val + c->minimum_value;
1803       }
1804    } else {
1805       float last = CODEBOOK_ELEMENT_BASE(c);
1806       for (i=0; i < len; ++i) {
1807          output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1808       }
1809    }
1810 
1811    return TRUE;
1812 }
1813 
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1814 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1815 {
1816    int i,z = codebook_decode_start(f,c);
1817    float last = CODEBOOK_ELEMENT_BASE(c);
1818    if (z < 0) return FALSE;
1819    if (len > c->dimensions) len = c->dimensions;
1820 
1821 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1822    if (c->lookup_type == 1) {
1823       int div = 1;
1824       for (i=0; i < len; ++i) {
1825          int off = (z / div) % c->lookup_values;
1826          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1827          output[i*step] += val;
1828          if (c->sequence_p) last = val;
1829          div *= c->lookup_values;
1830       }
1831       return TRUE;
1832    }
1833 #endif
1834 
1835    z *= c->dimensions;
1836    for (i=0; i < len; ++i) {
1837       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1838       output[i*step] += val;
1839       if (c->sequence_p) last = val;
1840    }
1841 
1842    return TRUE;
1843 }
1844 
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1845 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1846 {
1847    int c_inter = *c_inter_p;
1848    int p_inter = *p_inter_p;
1849    int i,z, effective = c->dimensions;
1850 
1851    // type 0 is only legal in a scalar context
1852    if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1853 
1854    while (total_decode > 0) {
1855       float last = CODEBOOK_ELEMENT_BASE(c);
1856       DECODE_VQ(z,f,c);
1857       #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1858       assert(!c->sparse || z < c->sorted_entries);
1859       #endif
1860       if (z < 0) {
1861          if (!f->bytes_in_seg)
1862             if (f->last_seg) return FALSE;
1863          return error(f, VORBIS_invalid_stream);
1864       }
1865 
1866       // if this will take us off the end of the buffers, stop short!
1867       // we check by computing the length of the virtual interleaved
1868       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1869       // and the length we'll be using (effective)
1870       if (c_inter + p_inter*ch + effective > len * ch) {
1871          effective = len*ch - (p_inter*ch - c_inter);
1872       }
1873 
1874    #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1875       if (c->lookup_type == 1) {
1876          int div = 1;
1877          for (i=0; i < effective; ++i) {
1878             int off = (z / div) % c->lookup_values;
1879             float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1880             if (outputs[c_inter])
1881                outputs[c_inter][p_inter] += val;
1882             if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1883             if (c->sequence_p) last = val;
1884             div *= c->lookup_values;
1885          }
1886       } else
1887    #endif
1888       {
1889          z *= c->dimensions;
1890          if (c->sequence_p) {
1891             for (i=0; i < effective; ++i) {
1892                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1893                if (outputs[c_inter])
1894                   outputs[c_inter][p_inter] += val;
1895                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1896                last = val;
1897             }
1898          } else {
1899             for (i=0; i < effective; ++i) {
1900                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1901                if (outputs[c_inter])
1902                   outputs[c_inter][p_inter] += val;
1903                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1904             }
1905          }
1906       }
1907 
1908       total_decode -= effective;
1909    }
1910    *c_inter_p = c_inter;
1911    *p_inter_p = p_inter;
1912    return TRUE;
1913 }
1914 
predict_point(int x,int x0,int x1,int y0,int y1)1915 static int predict_point(int x, int x0, int x1, int y0, int y1)
1916 {
1917    int dy = y1 - y0;
1918    int adx = x1 - x0;
1919    // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1920    int err = abs(dy) * (x - x0);
1921    int off = err / adx;
1922    return dy < 0 ? y0 - off : y0 + off;
1923 }
1924 
1925 // the following table is block-copied from the specification
1926 static float inverse_db_table[256] =
1927 {
1928   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1929   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1930   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1931   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1932   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1933   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1934   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1935   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1936   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1937   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1938   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1939   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1940   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1941   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1942   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1943   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1944   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1945   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1946   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1947   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1948   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1949   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1950   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1951   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1952   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1953   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1954   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1955   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1956   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1957   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1958   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1959   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1960   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1961   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1962   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1963   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1964   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1965   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1966   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1967   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1968   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1969   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1970   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1971   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1972   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1973   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1974   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1975   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1976   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1977   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1978   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1979   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1980   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1981   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1982   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1983   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1984   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1985   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1986   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1987   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1988   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1989   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1990   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1991   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1992 };
1993 
1994 
1995 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1996 // note that you must produce bit-identical output to decode correctly;
1997 // this specific sequence of operations is specified in the spec (it's
1998 // drawing integer-quantized frequency-space lines that the encoder
1999 // expects to be exactly the same)
2000 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
2001 // have to divide in the setup? sigh.
2002 #ifndef STB_VORBIS_NO_DEFER_FLOOR
2003 #define LINE_OP(a,b)   a *= b
2004 #else
2005 #define LINE_OP(a,b)   a = b
2006 #endif
2007 
2008 #ifdef STB_VORBIS_DIVIDE_TABLE
2009 #define DIVTAB_NUMER   32
2010 #define DIVTAB_DENOM   64
2011 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
2012 #endif
2013 
draw_line(float * output,int x0,int y0,int x1,int y1,int n)2014 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
2015 {
2016    int dy = y1 - y0;
2017    int adx = x1 - x0;
2018    int ady = abs(dy);
2019    int base;
2020    int x=x0,y=y0;
2021    int err = 0;
2022    int sy;
2023 
2024 #ifdef STB_VORBIS_DIVIDE_TABLE
2025    if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
2026       if (dy < 0) {
2027          base = -integer_divide_table[ady][adx];
2028          sy = base-1;
2029       } else {
2030          base =  integer_divide_table[ady][adx];
2031          sy = base+1;
2032       }
2033    } else {
2034       base = dy / adx;
2035       if (dy < 0)
2036          sy = base - 1;
2037       else
2038          sy = base+1;
2039    }
2040 #else
2041    base = dy / adx;
2042    if (dy < 0)
2043       sy = base - 1;
2044    else
2045       sy = base+1;
2046 #endif
2047    ady -= abs(base) * adx;
2048    if (x1 > n) x1 = n;
2049    if (x < x1) {
2050       LINE_OP(output[x], inverse_db_table[y&255]);
2051       for (++x; x < x1; ++x) {
2052          err += ady;
2053          if (err >= adx) {
2054             err -= adx;
2055             y += sy;
2056          } else
2057             y += base;
2058          LINE_OP(output[x], inverse_db_table[y&255]);
2059       }
2060    }
2061 }
2062 
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)2063 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2064 {
2065    int k;
2066    if (rtype == 0) {
2067       int step = n / book->dimensions;
2068       for (k=0; k < step; ++k)
2069          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2070             return FALSE;
2071    } else {
2072       for (k=0; k < n; ) {
2073          if (!codebook_decode(f, book, target+offset, n-k))
2074             return FALSE;
2075          k += book->dimensions;
2076          offset += book->dimensions;
2077       }
2078    }
2079    return TRUE;
2080 }
2081 
2082 // n is 1/2 of the blocksize --
2083 // specification: "Correct per-vector decode length is [n]/2"
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8 * do_not_decode)2084 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2085 {
2086    int i,j,pass;
2087    Residue *r = f->residue_config + rn;
2088    int rtype = f->residue_types[rn];
2089    int c = r->classbook;
2090    int classwords = f->codebooks[c].dimensions;
2091    unsigned int actual_size = rtype == 2 ? n*2 : n;
2092    unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
2093    unsigned int limit_r_end   = (r->end   < actual_size ? r->end   : actual_size);
2094    int n_read = limit_r_end - limit_r_begin;
2095    int part_read = n_read / r->part_size;
2096    int temp_alloc_point = temp_alloc_save(f);
2097    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2098    uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2099    #else
2100    int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2101    #endif
2102 
2103    CHECK(f);
2104 
2105    for (i=0; i < ch; ++i)
2106       if (!do_not_decode[i])
2107          memset(residue_buffers[i], 0, sizeof(float) * n);
2108 
2109    if (rtype == 2 && ch != 1) {
2110       for (j=0; j < ch; ++j)
2111          if (!do_not_decode[j])
2112             break;
2113       if (j == ch)
2114          goto done;
2115 
2116       for (pass=0; pass < 8; ++pass) {
2117          int pcount = 0, class_set = 0;
2118          if (ch == 2) {
2119             while (pcount < part_read) {
2120                int z = r->begin + pcount*r->part_size;
2121                int c_inter = (z & 1), p_inter = z>>1;
2122                if (pass == 0) {
2123                   Codebook *c = f->codebooks+r->classbook;
2124                   int q;
2125                   DECODE(q,f,c);
2126                   if (q == EOP) goto done;
2127                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2128                   part_classdata[0][class_set] = r->classdata[q];
2129                   #else
2130                   for (i=classwords-1; i >= 0; --i) {
2131                      classifications[0][i+pcount] = q % r->classifications;
2132                      q /= r->classifications;
2133                   }
2134                   #endif
2135                }
2136                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2137                   int z = r->begin + pcount*r->part_size;
2138                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2139                   int c = part_classdata[0][class_set][i];
2140                   #else
2141                   int c = classifications[0][pcount];
2142                   #endif
2143                   int b = r->residue_books[c][pass];
2144                   if (b >= 0) {
2145                      Codebook *book = f->codebooks + b;
2146                      #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2147                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2148                         goto done;
2149                      #else
2150                      // saves 1%
2151                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2152                         goto done;
2153                      #endif
2154                   } else {
2155                      z += r->part_size;
2156                      c_inter = z & 1;
2157                      p_inter = z >> 1;
2158                   }
2159                }
2160                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2161                ++class_set;
2162                #endif
2163             }
2164          } else if (ch > 2) {
2165             while (pcount < part_read) {
2166                int z = r->begin + pcount*r->part_size;
2167                int c_inter = z % ch, p_inter = z/ch;
2168                if (pass == 0) {
2169                   Codebook *c = f->codebooks+r->classbook;
2170                   int q;
2171                   DECODE(q,f,c);
2172                   if (q == EOP) goto done;
2173                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2174                   part_classdata[0][class_set] = r->classdata[q];
2175                   #else
2176                   for (i=classwords-1; i >= 0; --i) {
2177                      classifications[0][i+pcount] = q % r->classifications;
2178                      q /= r->classifications;
2179                   }
2180                   #endif
2181                }
2182                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2183                   int z = r->begin + pcount*r->part_size;
2184                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2185                   int c = part_classdata[0][class_set][i];
2186                   #else
2187                   int c = classifications[0][pcount];
2188                   #endif
2189                   int b = r->residue_books[c][pass];
2190                   if (b >= 0) {
2191                      Codebook *book = f->codebooks + b;
2192                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2193                         goto done;
2194                   } else {
2195                      z += r->part_size;
2196                      c_inter = z % ch;
2197                      p_inter = z / ch;
2198                   }
2199                }
2200                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2201                ++class_set;
2202                #endif
2203             }
2204          }
2205       }
2206       goto done;
2207    }
2208    CHECK(f);
2209 
2210    for (pass=0; pass < 8; ++pass) {
2211       int pcount = 0, class_set=0;
2212       while (pcount < part_read) {
2213          if (pass == 0) {
2214             for (j=0; j < ch; ++j) {
2215                if (!do_not_decode[j]) {
2216                   Codebook *c = f->codebooks+r->classbook;
2217                   int temp;
2218                   DECODE(temp,f,c);
2219                   if (temp == EOP) goto done;
2220                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2221                   part_classdata[j][class_set] = r->classdata[temp];
2222                   #else
2223                   for (i=classwords-1; i >= 0; --i) {
2224                      classifications[j][i+pcount] = temp % r->classifications;
2225                      temp /= r->classifications;
2226                   }
2227                   #endif
2228                }
2229             }
2230          }
2231          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2232             for (j=0; j < ch; ++j) {
2233                if (!do_not_decode[j]) {
2234                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2235                   int c = part_classdata[j][class_set][i];
2236                   #else
2237                   int c = classifications[j][pcount];
2238                   #endif
2239                   int b = r->residue_books[c][pass];
2240                   if (b >= 0) {
2241                      float *target = residue_buffers[j];
2242                      int offset = r->begin + pcount * r->part_size;
2243                      int n = r->part_size;
2244                      Codebook *book = f->codebooks + b;
2245                      if (!residue_decode(f, book, target, offset, n, rtype))
2246                         goto done;
2247                   }
2248                }
2249             }
2250          }
2251          #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2252          ++class_set;
2253          #endif
2254       }
2255    }
2256   done:
2257    CHECK(f);
2258    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2259    temp_free(f,part_classdata);
2260    #else
2261    temp_free(f,classifications);
2262    #endif
2263    temp_alloc_restore(f,temp_alloc_point);
2264 }
2265 
2266 
2267 #if 0
2268 // slow way for debugging
2269 void inverse_mdct_slow(float *buffer, int n)
2270 {
2271    int i,j;
2272    int n2 = n >> 1;
2273    float *x = (float *) malloc(sizeof(*x) * n2);
2274    memcpy(x, buffer, sizeof(*x) * n2);
2275    for (i=0; i < n; ++i) {
2276       float acc = 0;
2277       for (j=0; j < n2; ++j)
2278          // formula from paper:
2279          //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2280          // formula from wikipedia
2281          //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2282          // these are equivalent, except the formula from the paper inverts the multiplier!
2283          // however, what actually works is NO MULTIPLIER!?!
2284          //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2285          acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2286       buffer[i] = acc;
2287    }
2288    free(x);
2289 }
2290 #elif 0
2291 // same as above, but just barely able to run in real time on modern machines
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2292 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2293 {
2294    float mcos[16384];
2295    int i,j;
2296    int n2 = n >> 1, nmask = (n << 2) -1;
2297    float *x = (float *) malloc(sizeof(*x) * n2);
2298    memcpy(x, buffer, sizeof(*x) * n2);
2299    for (i=0; i < 4*n; ++i)
2300       mcos[i] = (float) cos(M_PI / 2 * i / n);
2301 
2302    for (i=0; i < n; ++i) {
2303       float acc = 0;
2304       for (j=0; j < n2; ++j)
2305          acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2306       buffer[i] = acc;
2307    }
2308    free(x);
2309 }
2310 #elif 0
2311 // transform to use a slow dct-iv; this is STILL basically trivial,
2312 // but only requires half as many ops
dct_iv_slow(float * buffer,int n)2313 void dct_iv_slow(float *buffer, int n)
2314 {
2315    float mcos[16384];
2316    float x[2048];
2317    int i,j;
2318    int n2 = n >> 1, nmask = (n << 3) - 1;
2319    memcpy(x, buffer, sizeof(*x) * n);
2320    for (i=0; i < 8*n; ++i)
2321       mcos[i] = (float) cos(M_PI / 4 * i / n);
2322    for (i=0; i < n; ++i) {
2323       float acc = 0;
2324       for (j=0; j < n; ++j)
2325          acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2326       buffer[i] = acc;
2327    }
2328 }
2329 
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2330 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2331 {
2332    int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2333    float temp[4096];
2334 
2335    memcpy(temp, buffer, n2 * sizeof(float));
2336    dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
2337 
2338    for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
2339    for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
2340    for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
2341 }
2342 #endif
2343 
2344 #ifndef LIBVORBIS_MDCT
2345 #define LIBVORBIS_MDCT 0
2346 #endif
2347 
2348 #if LIBVORBIS_MDCT
2349 // directly call the vorbis MDCT using an interface documented
2350 // by Jeff Roberts... useful for performance comparison
2351 typedef struct
2352 {
2353   int n;
2354   int log2n;
2355 
2356   float *trig;
2357   int   *bitrev;
2358 
2359   float scale;
2360 } mdct_lookup;
2361 
2362 extern void mdct_init(mdct_lookup *lookup, int n);
2363 extern void mdct_clear(mdct_lookup *l);
2364 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2365 
2366 mdct_lookup M1,M2;
2367 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2368 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2369 {
2370    mdct_lookup *M;
2371    if (M1.n == n) M = &M1;
2372    else if (M2.n == n) M = &M2;
2373    else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2374    else {
2375       if (M2.n) __asm int 3;
2376       mdct_init(&M2, n);
2377       M = &M2;
2378    }
2379 
2380    mdct_backward(M, buffer, buffer);
2381 }
2382 #endif
2383 
2384 
2385 // the following were split out into separate functions while optimizing;
2386 // they could be pushed back up but eh. __forceinline showed no change;
2387 // they're probably already being inlined.
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)2388 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2389 {
2390    float *ee0 = e + i_off;
2391    float *ee2 = ee0 + k_off;
2392    int i;
2393 
2394    assert((n & 3) == 0);
2395    for (i=(n>>2); i > 0; --i) {
2396       float k00_20, k01_21;
2397       k00_20  = ee0[ 0] - ee2[ 0];
2398       k01_21  = ee0[-1] - ee2[-1];
2399       ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2400       ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2401       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2402       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2403       A += 8;
2404 
2405       k00_20  = ee0[-2] - ee2[-2];
2406       k01_21  = ee0[-3] - ee2[-3];
2407       ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2408       ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2409       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2410       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2411       A += 8;
2412 
2413       k00_20  = ee0[-4] - ee2[-4];
2414       k01_21  = ee0[-5] - ee2[-5];
2415       ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2416       ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2417       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2418       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2419       A += 8;
2420 
2421       k00_20  = ee0[-6] - ee2[-6];
2422       k01_21  = ee0[-7] - ee2[-7];
2423       ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2424       ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2425       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2426       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2427       A += 8;
2428       ee0 -= 8;
2429       ee2 -= 8;
2430    }
2431 }
2432 
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)2433 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2434 {
2435    int i;
2436    float k00_20, k01_21;
2437 
2438    float *e0 = e + d0;
2439    float *e2 = e0 + k_off;
2440 
2441    for (i=lim >> 2; i > 0; --i) {
2442       k00_20 = e0[-0] - e2[-0];
2443       k01_21 = e0[-1] - e2[-1];
2444       e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2445       e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2446       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2447       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2448 
2449       A += k1;
2450 
2451       k00_20 = e0[-2] - e2[-2];
2452       k01_21 = e0[-3] - e2[-3];
2453       e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2454       e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2455       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2456       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2457 
2458       A += k1;
2459 
2460       k00_20 = e0[-4] - e2[-4];
2461       k01_21 = e0[-5] - e2[-5];
2462       e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2463       e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2464       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2465       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2466 
2467       A += k1;
2468 
2469       k00_20 = e0[-6] - e2[-6];
2470       k01_21 = e0[-7] - e2[-7];
2471       e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2472       e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2473       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2474       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2475 
2476       e0 -= 8;
2477       e2 -= 8;
2478 
2479       A += k1;
2480    }
2481 }
2482 
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)2483 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2484 {
2485    int i;
2486    float A0 = A[0];
2487    float A1 = A[0+1];
2488    float A2 = A[0+a_off];
2489    float A3 = A[0+a_off+1];
2490    float A4 = A[0+a_off*2+0];
2491    float A5 = A[0+a_off*2+1];
2492    float A6 = A[0+a_off*3+0];
2493    float A7 = A[0+a_off*3+1];
2494 
2495    float k00,k11;
2496 
2497    float *ee0 = e  +i_off;
2498    float *ee2 = ee0+k_off;
2499 
2500    for (i=n; i > 0; --i) {
2501       k00     = ee0[ 0] - ee2[ 0];
2502       k11     = ee0[-1] - ee2[-1];
2503       ee0[ 0] =  ee0[ 0] + ee2[ 0];
2504       ee0[-1] =  ee0[-1] + ee2[-1];
2505       ee2[ 0] = (k00) * A0 - (k11) * A1;
2506       ee2[-1] = (k11) * A0 + (k00) * A1;
2507 
2508       k00     = ee0[-2] - ee2[-2];
2509       k11     = ee0[-3] - ee2[-3];
2510       ee0[-2] =  ee0[-2] + ee2[-2];
2511       ee0[-3] =  ee0[-3] + ee2[-3];
2512       ee2[-2] = (k00) * A2 - (k11) * A3;
2513       ee2[-3] = (k11) * A2 + (k00) * A3;
2514 
2515       k00     = ee0[-4] - ee2[-4];
2516       k11     = ee0[-5] - ee2[-5];
2517       ee0[-4] =  ee0[-4] + ee2[-4];
2518       ee0[-5] =  ee0[-5] + ee2[-5];
2519       ee2[-4] = (k00) * A4 - (k11) * A5;
2520       ee2[-5] = (k11) * A4 + (k00) * A5;
2521 
2522       k00     = ee0[-6] - ee2[-6];
2523       k11     = ee0[-7] - ee2[-7];
2524       ee0[-6] =  ee0[-6] + ee2[-6];
2525       ee0[-7] =  ee0[-7] + ee2[-7];
2526       ee2[-6] = (k00) * A6 - (k11) * A7;
2527       ee2[-7] = (k11) * A6 + (k00) * A7;
2528 
2529       ee0 -= k0;
2530       ee2 -= k0;
2531    }
2532 }
2533 
iter_54(float * z)2534 static __forceinline void iter_54(float *z)
2535 {
2536    float k00,k11,k22,k33;
2537    float y0,y1,y2,y3;
2538 
2539    k00  = z[ 0] - z[-4];
2540    y0   = z[ 0] + z[-4];
2541    y2   = z[-2] + z[-6];
2542    k22  = z[-2] - z[-6];
2543 
2544    z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
2545    z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
2546 
2547    // done with y0,y2
2548 
2549    k33  = z[-3] - z[-7];
2550 
2551    z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
2552    z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
2553 
2554    // done with k33
2555 
2556    k11  = z[-1] - z[-5];
2557    y1   = z[-1] + z[-5];
2558    y3   = z[-3] + z[-7];
2559 
2560    z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
2561    z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
2562    z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
2563    z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
2564 }
2565 
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)2566 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2567 {
2568    int a_off = base_n >> 3;
2569    float A2 = A[0+a_off];
2570    float *z = e + i_off;
2571    float *base = z - 16 * n;
2572 
2573    while (z > base) {
2574       float k00,k11;
2575 
2576       k00   = z[-0] - z[-8];
2577       k11   = z[-1] - z[-9];
2578       z[-0] = z[-0] + z[-8];
2579       z[-1] = z[-1] + z[-9];
2580       z[-8] =  k00;
2581       z[-9] =  k11 ;
2582 
2583       k00    = z[ -2] - z[-10];
2584       k11    = z[ -3] - z[-11];
2585       z[ -2] = z[ -2] + z[-10];
2586       z[ -3] = z[ -3] + z[-11];
2587       z[-10] = (k00+k11) * A2;
2588       z[-11] = (k11-k00) * A2;
2589 
2590       k00    = z[-12] - z[ -4];  // reverse to avoid a unary negation
2591       k11    = z[ -5] - z[-13];
2592       z[ -4] = z[ -4] + z[-12];
2593       z[ -5] = z[ -5] + z[-13];
2594       z[-12] = k11;
2595       z[-13] = k00;
2596 
2597       k00    = z[-14] - z[ -6];  // reverse to avoid a unary negation
2598       k11    = z[ -7] - z[-15];
2599       z[ -6] = z[ -6] + z[-14];
2600       z[ -7] = z[ -7] + z[-15];
2601       z[-14] = (k00+k11) * A2;
2602       z[-15] = (k00-k11) * A2;
2603 
2604       iter_54(z);
2605       iter_54(z-8);
2606       z -= 16;
2607    }
2608 }
2609 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2610 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2611 {
2612    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2613    int ld;
2614    // @OPTIMIZE: reduce register pressure by using fewer variables?
2615    int save_point = temp_alloc_save(f);
2616    float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2617    float *u=NULL,*v=NULL;
2618    // twiddle factors
2619    float *A = f->A[blocktype];
2620 
2621    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2622    // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2623 
2624    // kernel from paper
2625 
2626 
2627    // merged:
2628    //   copy and reflect spectral data
2629    //   step 0
2630 
2631    // note that it turns out that the items added together during
2632    // this step are, in fact, being added to themselves (as reflected
2633    // by step 0). inexplicable inefficiency! this became obvious
2634    // once I combined the passes.
2635 
2636    // so there's a missing 'times 2' here (for adding X to itself).
2637    // this propagates through linearly to the end, where the numbers
2638    // are 1/2 too small, and need to be compensated for.
2639 
2640    {
2641       float *d,*e, *AA, *e_stop;
2642       d = &buf2[n2-2];
2643       AA = A;
2644       e = &buffer[0];
2645       e_stop = &buffer[n2];
2646       while (e != e_stop) {
2647          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2648          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2649          d -= 2;
2650          AA += 2;
2651          e += 4;
2652       }
2653 
2654       e = &buffer[n2-3];
2655       while (d >= buf2) {
2656          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2657          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2658          d -= 2;
2659          AA += 2;
2660          e -= 4;
2661       }
2662    }
2663 
2664    // now we use symbolic names for these, so that we can
2665    // possibly swap their meaning as we change which operations
2666    // are in place
2667 
2668    u = buffer;
2669    v = buf2;
2670 
2671    // step 2    (paper output is w, now u)
2672    // this could be in place, but the data ends up in the wrong
2673    // place... _somebody_'s got to swap it, so this is nominated
2674    {
2675       float *AA = &A[n2-8];
2676       float *d0,*d1, *e0, *e1;
2677 
2678       e0 = &v[n4];
2679       e1 = &v[0];
2680 
2681       d0 = &u[n4];
2682       d1 = &u[0];
2683 
2684       while (AA >= A) {
2685          float v40_20, v41_21;
2686 
2687          v41_21 = e0[1] - e1[1];
2688          v40_20 = e0[0] - e1[0];
2689          d0[1]  = e0[1] + e1[1];
2690          d0[0]  = e0[0] + e1[0];
2691          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
2692          d1[0]  = v40_20*AA[4] + v41_21*AA[5];
2693 
2694          v41_21 = e0[3] - e1[3];
2695          v40_20 = e0[2] - e1[2];
2696          d0[3]  = e0[3] + e1[3];
2697          d0[2]  = e0[2] + e1[2];
2698          d1[3]  = v41_21*AA[0] - v40_20*AA[1];
2699          d1[2]  = v40_20*AA[0] + v41_21*AA[1];
2700 
2701          AA -= 8;
2702 
2703          d0 += 4;
2704          d1 += 4;
2705          e0 += 4;
2706          e1 += 4;
2707       }
2708    }
2709 
2710    // step 3
2711    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2712 
2713    // optimized step 3:
2714 
2715    // the original step3 loop can be nested r inside s or s inside r;
2716    // it's written originally as s inside r, but this is dumb when r
2717    // iterates many times, and s few. So I have two copies of it and
2718    // switch between them halfway.
2719 
2720    // this is iteration 0 of step 3
2721    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2722    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2723 
2724    // this is iteration 1 of step 3
2725    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2726    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2727    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2728    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2729 
2730    l=2;
2731    for (; l < (ld-3)>>1; ++l) {
2732       int k0 = n >> (l+2), k0_2 = k0>>1;
2733       int lim = 1 << (l+1);
2734       int i;
2735       for (i=0; i < lim; ++i)
2736          imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2737    }
2738 
2739    for (; l < ld-6; ++l) {
2740       int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2741       int rlim = n >> (l+6), r;
2742       int lim = 1 << (l+1);
2743       int i_off;
2744       float *A0 = A;
2745       i_off = n2-1;
2746       for (r=rlim; r > 0; --r) {
2747          imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2748          A0 += k1*4;
2749          i_off -= 8;
2750       }
2751    }
2752 
2753    // iterations with count:
2754    //   ld-6,-5,-4 all interleaved together
2755    //       the big win comes from getting rid of needless flops
2756    //         due to the constants on pass 5 & 4 being all 1 and 0;
2757    //       combining them to be simultaneous to improve cache made little difference
2758    imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2759 
2760    // output is u
2761 
2762    // step 4, 5, and 6
2763    // cannot be in-place because of step 5
2764    {
2765       uint16 *bitrev = f->bit_reverse[blocktype];
2766       // weirdly, I'd have thought reading sequentially and writing
2767       // erratically would have been better than vice-versa, but in
2768       // fact that's not what my testing showed. (That is, with
2769       // j = bitreverse(i), do you read i and write j, or read j and write i.)
2770 
2771       float *d0 = &v[n4-4];
2772       float *d1 = &v[n2-4];
2773       while (d0 >= v) {
2774          int k4;
2775 
2776          k4 = bitrev[0];
2777          d1[3] = u[k4+0];
2778          d1[2] = u[k4+1];
2779          d0[3] = u[k4+2];
2780          d0[2] = u[k4+3];
2781 
2782          k4 = bitrev[1];
2783          d1[1] = u[k4+0];
2784          d1[0] = u[k4+1];
2785          d0[1] = u[k4+2];
2786          d0[0] = u[k4+3];
2787 
2788          d0 -= 4;
2789          d1 -= 4;
2790          bitrev += 2;
2791       }
2792    }
2793    // (paper output is u, now v)
2794 
2795 
2796    // data must be in buf2
2797    assert(v == buf2);
2798 
2799    // step 7   (paper output is v, now v)
2800    // this is now in place
2801    {
2802       float *C = f->C[blocktype];
2803       float *d, *e;
2804 
2805       d = v;
2806       e = v + n2 - 4;
2807 
2808       while (d < e) {
2809          float a02,a11,b0,b1,b2,b3;
2810 
2811          a02 = d[0] - e[2];
2812          a11 = d[1] + e[3];
2813 
2814          b0 = C[1]*a02 + C[0]*a11;
2815          b1 = C[1]*a11 - C[0]*a02;
2816 
2817          b2 = d[0] + e[ 2];
2818          b3 = d[1] - e[ 3];
2819 
2820          d[0] = b2 + b0;
2821          d[1] = b3 + b1;
2822          e[2] = b2 - b0;
2823          e[3] = b1 - b3;
2824 
2825          a02 = d[2] - e[0];
2826          a11 = d[3] + e[1];
2827 
2828          b0 = C[3]*a02 + C[2]*a11;
2829          b1 = C[3]*a11 - C[2]*a02;
2830 
2831          b2 = d[2] + e[ 0];
2832          b3 = d[3] - e[ 1];
2833 
2834          d[2] = b2 + b0;
2835          d[3] = b3 + b1;
2836          e[0] = b2 - b0;
2837          e[1] = b1 - b3;
2838 
2839          C += 4;
2840          d += 4;
2841          e -= 4;
2842       }
2843    }
2844 
2845    // data must be in buf2
2846 
2847 
2848    // step 8+decode   (paper output is X, now buffer)
2849    // this generates pairs of data a la 8 and pushes them directly through
2850    // the decode kernel (pushing rather than pulling) to avoid having
2851    // to make another pass later
2852 
2853    // this cannot POSSIBLY be in place, so we refer to the buffers directly
2854 
2855    {
2856       float *d0,*d1,*d2,*d3;
2857 
2858       float *B = f->B[blocktype] + n2 - 8;
2859       float *e = buf2 + n2 - 8;
2860       d0 = &buffer[0];
2861       d1 = &buffer[n2-4];
2862       d2 = &buffer[n2];
2863       d3 = &buffer[n-4];
2864       while (e >= v) {
2865          float p0,p1,p2,p3;
2866 
2867          p3 =  e[6]*B[7] - e[7]*B[6];
2868          p2 = -e[6]*B[6] - e[7]*B[7];
2869 
2870          d0[0] =   p3;
2871          d1[3] = - p3;
2872          d2[0] =   p2;
2873          d3[3] =   p2;
2874 
2875          p1 =  e[4]*B[5] - e[5]*B[4];
2876          p0 = -e[4]*B[4] - e[5]*B[5];
2877 
2878          d0[1] =   p1;
2879          d1[2] = - p1;
2880          d2[1] =   p0;
2881          d3[2] =   p0;
2882 
2883          p3 =  e[2]*B[3] - e[3]*B[2];
2884          p2 = -e[2]*B[2] - e[3]*B[3];
2885 
2886          d0[2] =   p3;
2887          d1[1] = - p3;
2888          d2[2] =   p2;
2889          d3[1] =   p2;
2890 
2891          p1 =  e[0]*B[1] - e[1]*B[0];
2892          p0 = -e[0]*B[0] - e[1]*B[1];
2893 
2894          d0[3] =   p1;
2895          d1[0] = - p1;
2896          d2[3] =   p0;
2897          d3[0] =   p0;
2898 
2899          B -= 8;
2900          e -= 8;
2901          d0 += 4;
2902          d2 += 4;
2903          d1 -= 4;
2904          d3 -= 4;
2905       }
2906    }
2907 
2908    temp_free(f,buf2);
2909    temp_alloc_restore(f,save_point);
2910 }
2911 
2912 #if 0
2913 // this is the original version of the above code, if you want to optimize it from scratch
2914 void inverse_mdct_naive(float *buffer, int n)
2915 {
2916    float s;
2917    float A[1 << 12], B[1 << 12], C[1 << 11];
2918    int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2919    int n3_4 = n - n4, ld;
2920    // how can they claim this only uses N words?!
2921    // oh, because they're only used sparsely, whoops
2922    float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2923    // set up twiddle factors
2924 
2925    for (k=k2=0; k < n4; ++k,k2+=2) {
2926       A[k2  ] = (float)  cos(4*k*M_PI/n);
2927       A[k2+1] = (float) -sin(4*k*M_PI/n);
2928       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2);
2929       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2);
2930    }
2931    for (k=k2=0; k < n8; ++k,k2+=2) {
2932       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
2933       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2934    }
2935 
2936    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2937    // Note there are bugs in that pseudocode, presumably due to them attempting
2938    // to rename the arrays nicely rather than representing the way their actual
2939    // implementation bounces buffers back and forth. As a result, even in the
2940    // "some formulars corrected" version, a direct implementation fails. These
2941    // are noted below as "paper bug".
2942 
2943    // copy and reflect spectral data
2944    for (k=0; k < n2; ++k) u[k] = buffer[k];
2945    for (   ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2946    // kernel from paper
2947    // step 1
2948    for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2949       v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2]   - (u[k4+2] - u[n-k4-3])*A[k2+1];
2950       v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2951    }
2952    // step 2
2953    for (k=k4=0; k < n8; k+=1, k4+=4) {
2954       w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2955       w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2956       w[k4+3]    = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2957       w[k4+1]    = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2958    }
2959    // step 3
2960    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2961    for (l=0; l < ld-3; ++l) {
2962       int k0 = n >> (l+2), k1 = 1 << (l+3);
2963       int rlim = n >> (l+4), r4, r;
2964       int s2lim = 1 << (l+2), s2;
2965       for (r=r4=0; r < rlim; r4+=4,++r) {
2966          for (s2=0; s2 < s2lim; s2+=2) {
2967             u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2968             u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2969             u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2970                                 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2971             u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2972                                 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2973          }
2974       }
2975       if (l+1 < ld-3) {
2976          // paper bug: ping-ponging of u&w here is omitted
2977          memcpy(w, u, sizeof(u));
2978       }
2979    }
2980 
2981    // step 4
2982    for (i=0; i < n8; ++i) {
2983       int j = bit_reverse(i) >> (32-ld+3);
2984       assert(j < n8);
2985       if (i == j) {
2986          // paper bug: original code probably swapped in place; if copying,
2987          //            need to directly copy in this case
2988          int i8 = i << 3;
2989          v[i8+1] = u[i8+1];
2990          v[i8+3] = u[i8+3];
2991          v[i8+5] = u[i8+5];
2992          v[i8+7] = u[i8+7];
2993       } else if (i < j) {
2994          int i8 = i << 3, j8 = j << 3;
2995          v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
2996          v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
2997          v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
2998          v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
2999       }
3000    }
3001    // step 5
3002    for (k=0; k < n2; ++k) {
3003       w[k] = v[k*2+1];
3004    }
3005    // step 6
3006    for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3007       u[n-1-k2] = w[k4];
3008       u[n-2-k2] = w[k4+1];
3009       u[n3_4 - 1 - k2] = w[k4+2];
3010       u[n3_4 - 2 - k2] = w[k4+3];
3011    }
3012    // step 7
3013    for (k=k2=0; k < n8; ++k, k2 += 2) {
3014       v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3015       v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3016       v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3017       v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3018    }
3019    // step 8
3020    for (k=k2=0; k < n4; ++k,k2 += 2) {
3021       X[k]      = v[k2+n2]*B[k2  ] + v[k2+1+n2]*B[k2+1];
3022       X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2  ];
3023    }
3024 
3025    // decode kernel to output
3026    // determined the following value experimentally
3027    // (by first figuring out what made inverse_mdct_slow work); then matching that here
3028    // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3029    s = 0.5; // theoretically would be n4
3030 
3031    // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3032    //     so it needs to use the "old" B values to behave correctly, or else
3033    //     set s to 1.0 ]]]
3034    for (i=0; i < n4  ; ++i) buffer[i] = s * X[i+n4];
3035    for (   ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3036    for (   ; i < n   ; ++i) buffer[i] = -s * X[i - n3_4];
3037 }
3038 #endif
3039 
get_window(vorb * f,int len)3040 static float *get_window(vorb *f, int len)
3041 {
3042    len <<= 1;
3043    if (len == f->blocksize_0) return f->window[0];
3044    if (len == f->blocksize_1) return f->window[1];
3045    return NULL;
3046 }
3047 
3048 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3049 typedef int16 YTYPE;
3050 #else
3051 typedef int YTYPE;
3052 #endif
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8 * step2_flag)3053 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3054 {
3055    int n2 = n >> 1;
3056    int s = map->chan[i].mux, floor;
3057    floor = map->submap_floor[s];
3058    if (f->floor_types[floor] == 0) {
3059       return error(f, VORBIS_invalid_stream);
3060    } else {
3061       Floor1 *g = &f->floor_config[floor].floor1;
3062       int j,q;
3063       int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3064       for (q=1; q < g->values; ++q) {
3065          j = g->sorted_order[q];
3066          #ifndef STB_VORBIS_NO_DEFER_FLOOR
3067          if (finalY[j] >= 0)
3068          #else
3069          if (step2_flag[j])
3070          #endif
3071          {
3072             int hy = finalY[j] * g->floor1_multiplier;
3073             int hx = g->Xlist[j];
3074             if (lx != hx)
3075                draw_line(target, lx,ly, hx,hy, n2);
3076             CHECK(f);
3077             lx = hx, ly = hy;
3078          }
3079       }
3080       if (lx < n2) {
3081          // optimization of: draw_line(target, lx,ly, n,ly, n2);
3082          for (j=lx; j < n2; ++j)
3083             LINE_OP(target[j], inverse_db_table[ly]);
3084          CHECK(f);
3085       }
3086    }
3087    return TRUE;
3088 }
3089 
3090 // The meaning of "left" and "right"
3091 //
3092 // For a given frame:
3093 //     we compute samples from 0..n
3094 //     window_center is n/2
3095 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
3096 //     all of the samples from left_end to right_start can be output without mixing; however,
3097 //        this interval is 0-length except when transitioning between short and long frames
3098 //     all of the samples from right_start to right_end need to be mixed with the next frame,
3099 //        which we don't have, so those get saved in a buffer
3100 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
3101 //        has to be the same as frame N+1's left_end-left_start (which they are by
3102 //        construction)
3103 
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)3104 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3105 {
3106    Mode *m;
3107    int i, n, prev, next, window_center;
3108    f->channel_buffer_start = f->channel_buffer_end = 0;
3109 
3110   retry:
3111    if (f->eof) return FALSE;
3112    if (!maybe_start_packet(f))
3113       return FALSE;
3114    // check packet type
3115    if (get_bits(f,1) != 0) {
3116       if (IS_PUSH_MODE(f))
3117          return error(f,VORBIS_bad_packet_type);
3118       while (EOP != get8_packet(f));
3119       goto retry;
3120    }
3121 
3122    if (f->alloc.alloc_buffer)
3123       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3124 
3125    i = get_bits(f, ilog(f->mode_count-1));
3126    if (i == EOP) return FALSE;
3127    if (i >= f->mode_count) return FALSE;
3128    *mode = i;
3129    m = f->mode_config + i;
3130    if (m->blockflag) {
3131       n = f->blocksize_1;
3132       prev = get_bits(f,1);
3133       next = get_bits(f,1);
3134    } else {
3135       prev = next = 0;
3136       n = f->blocksize_0;
3137    }
3138 
3139 // WINDOWING
3140 
3141    window_center = n >> 1;
3142    if (m->blockflag && !prev) {
3143       *p_left_start = (n - f->blocksize_0) >> 2;
3144       *p_left_end   = (n + f->blocksize_0) >> 2;
3145    } else {
3146       *p_left_start = 0;
3147       *p_left_end   = window_center;
3148    }
3149    if (m->blockflag && !next) {
3150       *p_right_start = (n*3 - f->blocksize_0) >> 2;
3151       *p_right_end   = (n*3 + f->blocksize_0) >> 2;
3152    } else {
3153       *p_right_start = window_center;
3154       *p_right_end   = n;
3155    }
3156 
3157    return TRUE;
3158 }
3159 
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)3160 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3161 {
3162    Mapping *map;
3163    int i,j,k,n,n2;
3164    int zero_channel[256];
3165    int really_zero_channel[256];
3166 
3167 // WINDOWING
3168 
3169    n = f->blocksize[m->blockflag];
3170    map = &f->mapping[m->mapping];
3171 
3172 // FLOORS
3173    n2 = n >> 1;
3174 
3175    CHECK(f);
3176 
3177    for (i=0; i < f->channels; ++i) {
3178       int s = map->chan[i].mux, floor;
3179       zero_channel[i] = FALSE;
3180       floor = map->submap_floor[s];
3181       if (f->floor_types[floor] == 0) {
3182          return error(f, VORBIS_invalid_stream);
3183       } else {
3184          Floor1 *g = &f->floor_config[floor].floor1;
3185          if (get_bits(f, 1)) {
3186             short *finalY;
3187             uint8 step2_flag[256];
3188             static int range_list[4] = { 256, 128, 86, 64 };
3189             int range = range_list[g->floor1_multiplier-1];
3190             int offset = 2;
3191             finalY = f->finalY[i];
3192             finalY[0] = get_bits(f, ilog(range)-1);
3193             finalY[1] = get_bits(f, ilog(range)-1);
3194             for (j=0; j < g->partitions; ++j) {
3195                int pclass = g->partition_class_list[j];
3196                int cdim = g->class_dimensions[pclass];
3197                int cbits = g->class_subclasses[pclass];
3198                int csub = (1 << cbits)-1;
3199                int cval = 0;
3200                if (cbits) {
3201                   Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3202                   DECODE(cval,f,c);
3203                }
3204                for (k=0; k < cdim; ++k) {
3205                   int book = g->subclass_books[pclass][cval & csub];
3206                   cval = cval >> cbits;
3207                   if (book >= 0) {
3208                      int temp;
3209                      Codebook *c = f->codebooks + book;
3210                      DECODE(temp,f,c);
3211                      finalY[offset++] = temp;
3212                   } else
3213                      finalY[offset++] = 0;
3214                }
3215             }
3216             if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3217             step2_flag[0] = step2_flag[1] = 1;
3218             for (j=2; j < g->values; ++j) {
3219                int low, high, pred, highroom, lowroom, room, val;
3220                low = g->neighbors[j][0];
3221                high = g->neighbors[j][1];
3222                //neighbors(g->Xlist, j, &low, &high);
3223                pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3224                val = finalY[j];
3225                highroom = range - pred;
3226                lowroom = pred;
3227                if (highroom < lowroom)
3228                   room = highroom * 2;
3229                else
3230                   room = lowroom * 2;
3231                if (val) {
3232                   step2_flag[low] = step2_flag[high] = 1;
3233                   step2_flag[j] = 1;
3234                   if (val >= room)
3235                      if (highroom > lowroom)
3236                         finalY[j] = val - lowroom + pred;
3237                      else
3238                         finalY[j] = pred - val + highroom - 1;
3239                   else
3240                      if (val & 1)
3241                         finalY[j] = pred - ((val+1)>>1);
3242                      else
3243                         finalY[j] = pred + (val>>1);
3244                } else {
3245                   step2_flag[j] = 0;
3246                   finalY[j] = pred;
3247                }
3248             }
3249 
3250 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3251             do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3252 #else
3253             // defer final floor computation until _after_ residue
3254             for (j=0; j < g->values; ++j) {
3255                if (!step2_flag[j])
3256                   finalY[j] = -1;
3257             }
3258 #endif
3259          } else {
3260            error:
3261             zero_channel[i] = TRUE;
3262          }
3263          // So we just defer everything else to later
3264 
3265          // at this point we've decoded the floor into buffer
3266       }
3267    }
3268    CHECK(f);
3269    // at this point we've decoded all floors
3270 
3271    if (f->alloc.alloc_buffer)
3272       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3273 
3274    // re-enable coupled channels if necessary
3275    memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3276    for (i=0; i < map->coupling_steps; ++i)
3277       if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3278          zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3279       }
3280 
3281    CHECK(f);
3282 // RESIDUE DECODE
3283    for (i=0; i < map->submaps; ++i) {
3284       float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3285       int r;
3286       uint8 do_not_decode[256];
3287       int ch = 0;
3288       for (j=0; j < f->channels; ++j) {
3289          if (map->chan[j].mux == i) {
3290             if (zero_channel[j]) {
3291                do_not_decode[ch] = TRUE;
3292                residue_buffers[ch] = NULL;
3293             } else {
3294                do_not_decode[ch] = FALSE;
3295                residue_buffers[ch] = f->channel_buffers[j];
3296             }
3297             ++ch;
3298          }
3299       }
3300       r = map->submap_residue[i];
3301       decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3302    }
3303 
3304    if (f->alloc.alloc_buffer)
3305       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3306    CHECK(f);
3307 
3308 // INVERSE COUPLING
3309    for (i = map->coupling_steps-1; i >= 0; --i) {
3310       int n2 = n >> 1;
3311       float *m = f->channel_buffers[map->chan[i].magnitude];
3312       float *a = f->channel_buffers[map->chan[i].angle    ];
3313       for (j=0; j < n2; ++j) {
3314          float a2,m2;
3315          if (m[j] > 0)
3316             if (a[j] > 0)
3317                m2 = m[j], a2 = m[j] - a[j];
3318             else
3319                a2 = m[j], m2 = m[j] + a[j];
3320          else
3321             if (a[j] > 0)
3322                m2 = m[j], a2 = m[j] + a[j];
3323             else
3324                a2 = m[j], m2 = m[j] - a[j];
3325          m[j] = m2;
3326          a[j] = a2;
3327       }
3328    }
3329    CHECK(f);
3330 
3331    // finish decoding the floors
3332 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3333    for (i=0; i < f->channels; ++i) {
3334       if (really_zero_channel[i]) {
3335          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3336       } else {
3337          do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3338       }
3339    }
3340 #else
3341    for (i=0; i < f->channels; ++i) {
3342       if (really_zero_channel[i]) {
3343          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3344       } else {
3345          for (j=0; j < n2; ++j)
3346             f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3347       }
3348    }
3349 #endif
3350 
3351 // INVERSE MDCT
3352    CHECK(f);
3353    for (i=0; i < f->channels; ++i)
3354       inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3355    CHECK(f);
3356 
3357    // this shouldn't be necessary, unless we exited on an error
3358    // and want to flush to get to the next packet
3359    flush_packet(f);
3360 
3361    if (f->first_decode) {
3362       // assume we start so first non-discarded sample is sample 0
3363       // this isn't to spec, but spec would require us to read ahead
3364       // and decode the size of all current frames--could be done,
3365       // but presumably it's not a commonly used feature
3366       f->current_loc = -n2; // start of first frame is positioned for discard
3367       // we might have to discard samples "from" the next frame too,
3368       // if we're lapping a large block then a small at the start?
3369       f->discard_samples_deferred = n - right_end;
3370       f->current_loc_valid = TRUE;
3371       f->first_decode = FALSE;
3372    } else if (f->discard_samples_deferred) {
3373       if (f->discard_samples_deferred >= right_start - left_start) {
3374          f->discard_samples_deferred -= (right_start - left_start);
3375          left_start = right_start;
3376          *p_left = left_start;
3377       } else {
3378          left_start += f->discard_samples_deferred;
3379          *p_left = left_start;
3380          f->discard_samples_deferred = 0;
3381       }
3382    } else if (f->previous_length == 0 && f->current_loc_valid) {
3383       // we're recovering from a seek... that means we're going to discard
3384       // the samples from this packet even though we know our position from
3385       // the last page header, so we need to update the position based on
3386       // the discarded samples here
3387       // but wait, the code below is going to add this in itself even
3388       // on a discard, so we don't need to do it here...
3389    }
3390 
3391    // check if we have ogg information about the sample # for this packet
3392    if (f->last_seg_which == f->end_seg_with_known_loc) {
3393       // if we have a valid current loc, and this is final:
3394       if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3395          uint32 current_end = f->known_loc_for_packet;
3396          // then let's infer the size of the (probably) short final frame
3397          if (current_end < f->current_loc + (right_end-left_start)) {
3398             if (current_end < f->current_loc) {
3399                // negative truncation, that's impossible!
3400                *len = 0;
3401             } else {
3402                *len = current_end - f->current_loc;
3403             }
3404             *len += left_start; // this doesn't seem right, but has no ill effect on my test files
3405             if (*len > right_end) *len = right_end; // this should never happen
3406             f->current_loc += *len;
3407             return TRUE;
3408          }
3409       }
3410       // otherwise, just set our sample loc
3411       // guess that the ogg granule pos refers to the _middle_ of the
3412       // last frame?
3413       // set f->current_loc to the position of left_start
3414       f->current_loc = f->known_loc_for_packet - (n2-left_start);
3415       f->current_loc_valid = TRUE;
3416    }
3417    if (f->current_loc_valid)
3418       f->current_loc += (right_start - left_start);
3419 
3420    if (f->alloc.alloc_buffer)
3421       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3422    *len = right_end;  // ignore samples after the window goes to 0
3423    CHECK(f);
3424 
3425    return TRUE;
3426 }
3427 
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)3428 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3429 {
3430    int mode, left_end, right_end;
3431    if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3432    return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3433 }
3434 
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)3435 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3436 {
3437    int prev,i,j;
3438    // we use right&left (the start of the right- and left-window sin()-regions)
3439    // to determine how much to return, rather than inferring from the rules
3440    // (same result, clearer code); 'left' indicates where our sin() window
3441    // starts, therefore where the previous window's right edge starts, and
3442    // therefore where to start mixing from the previous buffer. 'right'
3443    // indicates where our sin() ending-window starts, therefore that's where
3444    // we start saving, and where our returned-data ends.
3445 
3446    // mixin from previous window
3447    if (f->previous_length) {
3448       int i,j, n = f->previous_length;
3449       float *w = get_window(f, n);
3450       if (w == NULL) return 0;
3451       for (i=0; i < f->channels; ++i) {
3452          for (j=0; j < n; ++j)
3453             f->channel_buffers[i][left+j] =
3454                f->channel_buffers[i][left+j]*w[    j] +
3455                f->previous_window[i][     j]*w[n-1-j];
3456       }
3457    }
3458 
3459    prev = f->previous_length;
3460 
3461    // last half of this data becomes previous window
3462    f->previous_length = len - right;
3463 
3464    // @OPTIMIZE: could avoid this copy by double-buffering the
3465    // output (flipping previous_window with channel_buffers), but
3466    // then previous_window would have to be 2x as large, and
3467    // channel_buffers couldn't be temp mem (although they're NOT
3468    // currently temp mem, they could be (unless we want to level
3469    // performance by spreading out the computation))
3470    for (i=0; i < f->channels; ++i)
3471       for (j=0; right+j < len; ++j)
3472          f->previous_window[i][j] = f->channel_buffers[i][right+j];
3473 
3474    if (!prev)
3475       // there was no previous packet, so this data isn't valid...
3476       // this isn't entirely true, only the would-have-overlapped data
3477       // isn't valid, but this seems to be what the spec requires
3478       return 0;
3479 
3480    // truncate a short frame
3481    if (len < right) right = len;
3482 
3483    f->samples_output += right-left;
3484 
3485    return right - left;
3486 }
3487 
vorbis_pump_first_frame(stb_vorbis * f)3488 static int vorbis_pump_first_frame(stb_vorbis *f)
3489 {
3490    int len, right, left, res;
3491    res = vorbis_decode_packet(f, &len, &left, &right);
3492    if (res)
3493       vorbis_finish_frame(f, len, left, right);
3494    return res;
3495 }
3496 
3497 #ifndef STB_VORBIS_NO_PUSHDATA_API
is_whole_packet_present(stb_vorbis * f)3498 static int is_whole_packet_present(stb_vorbis *f)
3499 {
3500    // make sure that we have the packet available before continuing...
3501    // this requires a full ogg parse, but we know we can fetch from f->stream
3502 
3503    // instead of coding this out explicitly, we could save the current read state,
3504    // read the next packet with get8() until end-of-packet, check f->eof, then
3505    // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3506    // of state to restore (primarily the page segment table)
3507 
3508    int s = f->next_seg, first = TRUE;
3509    uint8 *p = f->stream;
3510 
3511    if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3512       for (; s < f->segment_count; ++s) {
3513          p += f->segments[s];
3514          if (f->segments[s] < 255)               // stop at first short segment
3515             break;
3516       }
3517       // either this continues, or it ends it...
3518       if (s == f->segment_count)
3519          s = -1; // set 'crosses page' flag
3520       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3521       first = FALSE;
3522    }
3523    for (; s == -1;) {
3524       uint8 *q;
3525       int n;
3526 
3527       // check that we have the page header ready
3528       if (p + 26 >= f->stream_end)               return error(f, VORBIS_need_more_data);
3529       // validate the page
3530       if (memcmp(p, ogg_page_header, 4))         return error(f, VORBIS_invalid_stream);
3531       if (p[4] != 0)                             return error(f, VORBIS_invalid_stream);
3532       if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3533          if (f->previous_length)
3534             if ((p[5] & PAGEFLAG_continued_packet))  return error(f, VORBIS_invalid_stream);
3535          // if no previous length, we're resynching, so we can come in on a continued-packet,
3536          // which we'll just drop
3537       } else {
3538          if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3539       }
3540       n = p[26]; // segment counts
3541       q = p+27;  // q points to segment table
3542       p = q + n; // advance past header
3543       // make sure we've read the segment table
3544       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3545       for (s=0; s < n; ++s) {
3546          p += q[s];
3547          if (q[s] < 255)
3548             break;
3549       }
3550       if (s == n)
3551          s = -1; // set 'crosses page' flag
3552       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3553       first = FALSE;
3554    }
3555    return TRUE;
3556 }
3557 #endif // !STB_VORBIS_NO_PUSHDATA_API
3558 
start_decoder(vorb * f)3559 static int start_decoder(vorb *f)
3560 {
3561    uint8 header[6], x,y;
3562    int len,i,j,k, max_submaps = 0;
3563    int longest_floorlist=0;
3564 
3565    // first page, first packet
3566    f->first_decode = TRUE;
3567 
3568    if (!start_page(f))                              return FALSE;
3569    // validate page flag
3570    if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
3571    if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
3572    if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
3573    // check for expected packet length
3574    if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
3575    if (f->segments[0] != 30) {
3576       // check for the Ogg skeleton fishead identifying header to refine our error
3577       if (f->segments[0] == 64 &&
3578           getn(f, header, 6) &&
3579           header[0] == 'f' &&
3580           header[1] == 'i' &&
3581           header[2] == 's' &&
3582           header[3] == 'h' &&
3583           header[4] == 'e' &&
3584           header[5] == 'a' &&
3585           get8(f)   == 'd' &&
3586           get8(f)   == '\0')                        return error(f, VORBIS_ogg_skeleton_not_supported);
3587       else
3588                                                     return error(f, VORBIS_invalid_first_page);
3589    }
3590 
3591    // read packet
3592    // check packet header
3593    if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
3594    if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
3595    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
3596    // vorbis_version
3597    if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
3598    f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
3599    if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
3600    f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
3601    get32(f); // bitrate_maximum
3602    get32(f); // bitrate_nominal
3603    get32(f); // bitrate_minimum
3604    x = get8(f);
3605    {
3606       int log0,log1;
3607       log0 = x & 15;
3608       log1 = x >> 4;
3609       f->blocksize_0 = 1 << log0;
3610       f->blocksize_1 = 1 << log1;
3611       if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
3612       if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
3613       if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
3614    }
3615 
3616    // framing_flag
3617    x = get8(f);
3618    if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
3619 
3620    // second packet!
3621    if (!start_page(f))                              return FALSE;
3622 
3623    if (!start_packet(f))                            return FALSE;
3624 
3625    if (!next_segment(f))                            return FALSE;
3626 
3627    if (get8_packet(f) != VORBIS_packet_comment)            return error(f, VORBIS_invalid_setup);
3628    for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3629    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
3630    //file vendor
3631    len = get32_packet(f);
3632    f->vendor = (char*)setup_malloc(f, sizeof(char) * (len+1));
3633    for(i=0; i < len; ++i) {
3634       f->vendor[i] = get8_packet(f);
3635    }
3636    f->vendor[len] = (char)'\0';
3637    //user comments
3638    f->comment_list_length = get32_packet(f);
3639    f->comment_list = (char**)setup_malloc(f, sizeof(char*) * (f->comment_list_length));
3640 
3641    for(i=0; i < f->comment_list_length; ++i) {
3642       len = get32_packet(f);
3643       f->comment_list[i] = (char*)setup_malloc(f, sizeof(char) * (len+1));
3644 
3645       for(j=0; j < len; ++j) {
3646          f->comment_list[i][j] = get8_packet(f);
3647       }
3648       f->comment_list[i][len] = (char)'\0';
3649    }
3650 
3651    // framing_flag
3652    x = get8_packet(f);
3653    if (!(x & 1))                                    return error(f, VORBIS_invalid_setup);
3654 
3655 
3656    skip(f, f->bytes_in_seg);
3657    f->bytes_in_seg = 0;
3658 
3659    do {
3660       len = next_segment(f);
3661       skip(f, len);
3662       f->bytes_in_seg = 0;
3663    } while (len);
3664 
3665    // third packet!
3666    if (!start_packet(f))                            return FALSE;
3667 
3668    #ifndef STB_VORBIS_NO_PUSHDATA_API
3669    if (IS_PUSH_MODE(f)) {
3670       if (!is_whole_packet_present(f)) {
3671          // convert error in ogg header to write type
3672          if (f->error == VORBIS_invalid_stream)
3673             f->error = VORBIS_invalid_setup;
3674          return FALSE;
3675       }
3676    }
3677    #endif
3678 
3679    crc32_init(); // always init it, to avoid multithread race conditions
3680 
3681    if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
3682    for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3683    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
3684 
3685    // codebooks
3686 
3687    f->codebook_count = get_bits(f,8) + 1;
3688    f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3689    if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
3690    memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3691    for (i=0; i < f->codebook_count; ++i) {
3692       uint32 *values;
3693       int ordered, sorted_count;
3694       int total=0;
3695       uint8 *lengths;
3696       Codebook *c = f->codebooks+i;
3697       CHECK(f);
3698       x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
3699       x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
3700       x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
3701       x = get_bits(f, 8);
3702       c->dimensions = (get_bits(f, 8)<<8) + x;
3703       x = get_bits(f, 8);
3704       y = get_bits(f, 8);
3705       c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3706       ordered = get_bits(f,1);
3707       c->sparse = ordered ? 0 : get_bits(f,1);
3708 
3709       if (c->dimensions == 0 && c->entries != 0)    return error(f, VORBIS_invalid_setup);
3710 
3711       if (c->sparse)
3712          lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3713       else
3714          lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3715 
3716       if (!lengths) return error(f, VORBIS_outofmem);
3717 
3718       if (ordered) {
3719          int current_entry = 0;
3720          int current_length = get_bits(f,5) + 1;
3721          while (current_entry < c->entries) {
3722             int limit = c->entries - current_entry;
3723             int n = get_bits(f, ilog(limit));
3724             if (current_length >= 32) return error(f, VORBIS_invalid_setup);
3725             if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3726             memset(lengths + current_entry, current_length, n);
3727             current_entry += n;
3728             ++current_length;
3729          }
3730       } else {
3731          for (j=0; j < c->entries; ++j) {
3732             int present = c->sparse ? get_bits(f,1) : 1;
3733             if (present) {
3734                lengths[j] = get_bits(f, 5) + 1;
3735                ++total;
3736                if (lengths[j] == 32)
3737                   return error(f, VORBIS_invalid_setup);
3738             } else {
3739                lengths[j] = NO_CODE;
3740             }
3741          }
3742       }
3743 
3744       if (c->sparse && total >= c->entries >> 2) {
3745          // convert sparse items to non-sparse!
3746          if (c->entries > (int) f->setup_temp_memory_required)
3747             f->setup_temp_memory_required = c->entries;
3748 
3749          c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3750          if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3751          memcpy(c->codeword_lengths, lengths, c->entries);
3752          setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3753          lengths = c->codeword_lengths;
3754          c->sparse = 0;
3755       }
3756 
3757       // compute the size of the sorted tables
3758       if (c->sparse) {
3759          sorted_count = total;
3760       } else {
3761          sorted_count = 0;
3762          #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3763          for (j=0; j < c->entries; ++j)
3764             if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3765                ++sorted_count;
3766          #endif
3767       }
3768 
3769       c->sorted_entries = sorted_count;
3770       values = NULL;
3771 
3772       CHECK(f);
3773       if (!c->sparse) {
3774          c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3775          if (!c->codewords)                  return error(f, VORBIS_outofmem);
3776       } else {
3777          unsigned int size;
3778          if (c->sorted_entries) {
3779             c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3780             if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
3781             c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3782             if (!c->codewords)                  return error(f, VORBIS_outofmem);
3783             values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3784             if (!values)                        return error(f, VORBIS_outofmem);
3785          }
3786          size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3787          if (size > f->setup_temp_memory_required)
3788             f->setup_temp_memory_required = size;
3789       }
3790 
3791       if (!compute_codewords(c, lengths, c->entries, values)) {
3792          if (c->sparse) setup_temp_free(f, values, 0);
3793          return error(f, VORBIS_invalid_setup);
3794       }
3795 
3796       if (c->sorted_entries) {
3797          // allocate an extra slot for sentinels
3798          c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3799          if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3800          // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3801          // so that we can catch that case without an extra if
3802          c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
3803          if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3804          ++c->sorted_values;
3805          c->sorted_values[-1] = -1;
3806          compute_sorted_huffman(c, lengths, values);
3807       }
3808 
3809       if (c->sparse) {
3810          setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3811          setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3812          setup_temp_free(f, lengths, c->entries);
3813          c->codewords = NULL;
3814       }
3815 
3816       compute_accelerated_huffman(c);
3817 
3818       CHECK(f);
3819       c->lookup_type = get_bits(f, 4);
3820       if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3821       if (c->lookup_type > 0) {
3822          uint16 *mults;
3823          c->minimum_value = float32_unpack(get_bits(f, 32));
3824          c->delta_value = float32_unpack(get_bits(f, 32));
3825          c->value_bits = get_bits(f, 4)+1;
3826          c->sequence_p = get_bits(f,1);
3827          if (c->lookup_type == 1) {
3828             int values = lookup1_values(c->entries, c->dimensions);
3829             if (values < 0) return error(f, VORBIS_invalid_setup);
3830             c->lookup_values = (uint32) values;
3831          } else {
3832             c->lookup_values = c->entries * c->dimensions;
3833          }
3834          if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3835          mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3836          if (mults == NULL) return error(f, VORBIS_outofmem);
3837          for (j=0; j < (int) c->lookup_values; ++j) {
3838             int q = get_bits(f, c->value_bits);
3839             if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3840             mults[j] = q;
3841          }
3842 
3843 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3844          if (c->lookup_type == 1) {
3845             int len, sparse = c->sparse;
3846             float last=0;
3847             // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3848             if (sparse) {
3849                if (c->sorted_entries == 0) goto skip;
3850                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3851             } else
3852                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
3853             if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3854             len = sparse ? c->sorted_entries : c->entries;
3855             for (j=0; j < len; ++j) {
3856                unsigned int z = sparse ? c->sorted_values[j] : j;
3857                unsigned int div=1;
3858                for (k=0; k < c->dimensions; ++k) {
3859                   int off = (z / div) % c->lookup_values;
3860                   float val = mults[off];
3861                   val = mults[off]*c->delta_value + c->minimum_value + last;
3862                   c->multiplicands[j*c->dimensions + k] = val;
3863                   if (c->sequence_p)
3864                      last = val;
3865                   if (k+1 < c->dimensions) {
3866                      if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3867                         setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3868                         return error(f, VORBIS_invalid_setup);
3869                      }
3870                      div *= c->lookup_values;
3871                   }
3872                }
3873             }
3874             c->lookup_type = 2;
3875          }
3876          else
3877 #endif
3878          {
3879             float last=0;
3880             CHECK(f);
3881             c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3882             if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3883             for (j=0; j < (int) c->lookup_values; ++j) {
3884                float val = mults[j] * c->delta_value + c->minimum_value + last;
3885                c->multiplicands[j] = val;
3886                if (c->sequence_p)
3887                   last = val;
3888             }
3889          }
3890 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3891         skip:;
3892 #endif
3893          setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3894 
3895          CHECK(f);
3896       }
3897       CHECK(f);
3898    }
3899 
3900    // time domain transfers (notused)
3901 
3902    x = get_bits(f, 6) + 1;
3903    for (i=0; i < x; ++i) {
3904       uint32 z = get_bits(f, 16);
3905       if (z != 0) return error(f, VORBIS_invalid_setup);
3906    }
3907 
3908    // Floors
3909    f->floor_count = get_bits(f, 6)+1;
3910    f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3911    if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3912    for (i=0; i < f->floor_count; ++i) {
3913       f->floor_types[i] = get_bits(f, 16);
3914       if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3915       if (f->floor_types[i] == 0) {
3916          Floor0 *g = &f->floor_config[i].floor0;
3917          g->order = get_bits(f,8);
3918          g->rate = get_bits(f,16);
3919          g->bark_map_size = get_bits(f,16);
3920          g->amplitude_bits = get_bits(f,6);
3921          g->amplitude_offset = get_bits(f,8);
3922          g->number_of_books = get_bits(f,4) + 1;
3923          for (j=0; j < g->number_of_books; ++j)
3924             g->book_list[j] = get_bits(f,8);
3925          return error(f, VORBIS_feature_not_supported);
3926       } else {
3927          stbv__floor_ordering p[31*8+2];
3928          Floor1 *g = &f->floor_config[i].floor1;
3929          int max_class = -1;
3930          g->partitions = get_bits(f, 5);
3931          for (j=0; j < g->partitions; ++j) {
3932             g->partition_class_list[j] = get_bits(f, 4);
3933             if (g->partition_class_list[j] > max_class)
3934                max_class = g->partition_class_list[j];
3935          }
3936          for (j=0; j <= max_class; ++j) {
3937             g->class_dimensions[j] = get_bits(f, 3)+1;
3938             g->class_subclasses[j] = get_bits(f, 2);
3939             if (g->class_subclasses[j]) {
3940                g->class_masterbooks[j] = get_bits(f, 8);
3941                if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3942             }
3943             for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3944                g->subclass_books[j][k] = get_bits(f,8)-1;
3945                if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3946             }
3947          }
3948          g->floor1_multiplier = get_bits(f,2)+1;
3949          g->rangebits = get_bits(f,4);
3950          g->Xlist[0] = 0;
3951          g->Xlist[1] = 1 << g->rangebits;
3952          g->values = 2;
3953          for (j=0; j < g->partitions; ++j) {
3954             int c = g->partition_class_list[j];
3955             for (k=0; k < g->class_dimensions[c]; ++k) {
3956                g->Xlist[g->values] = get_bits(f, g->rangebits);
3957                ++g->values;
3958             }
3959          }
3960          // precompute the sorting
3961          for (j=0; j < g->values; ++j) {
3962             p[j].x = g->Xlist[j];
3963             p[j].id = j;
3964          }
3965          qsort(p, g->values, sizeof(p[0]), point_compare);
3966          for (j=0; j < g->values-1; ++j)
3967             if (p[j].x == p[j+1].x)
3968                return error(f, VORBIS_invalid_setup);
3969          for (j=0; j < g->values; ++j)
3970             g->sorted_order[j] = (uint8) p[j].id;
3971          // precompute the neighbors
3972          for (j=2; j < g->values; ++j) {
3973             int low = 0,hi = 0;
3974             neighbors(g->Xlist, j, &low,&hi);
3975             g->neighbors[j][0] = low;
3976             g->neighbors[j][1] = hi;
3977          }
3978 
3979          if (g->values > longest_floorlist)
3980             longest_floorlist = g->values;
3981       }
3982    }
3983 
3984    // Residue
3985    f->residue_count = get_bits(f, 6)+1;
3986    f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3987    if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3988    memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3989    for (i=0; i < f->residue_count; ++i) {
3990       uint8 residue_cascade[64];
3991       Residue *r = f->residue_config+i;
3992       f->residue_types[i] = get_bits(f, 16);
3993       if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3994       r->begin = get_bits(f, 24);
3995       r->end = get_bits(f, 24);
3996       if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
3997       r->part_size = get_bits(f,24)+1;
3998       r->classifications = get_bits(f,6)+1;
3999       r->classbook = get_bits(f,8);
4000       if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4001       for (j=0; j < r->classifications; ++j) {
4002          uint8 high_bits=0;
4003          uint8 low_bits=get_bits(f,3);
4004          if (get_bits(f,1))
4005             high_bits = get_bits(f,5);
4006          residue_cascade[j] = high_bits*8 + low_bits;
4007       }
4008       r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
4009       if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
4010       for (j=0; j < r->classifications; ++j) {
4011          for (k=0; k < 8; ++k) {
4012             if (residue_cascade[j] & (1 << k)) {
4013                r->residue_books[j][k] = get_bits(f, 8);
4014                if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4015             } else {
4016                r->residue_books[j][k] = -1;
4017             }
4018          }
4019       }
4020       // precompute the classifications[] array to avoid inner-loop mod/divide
4021       // call it 'classdata' since we already have r->classifications
4022       r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4023       if (!r->classdata) return error(f, VORBIS_outofmem);
4024       memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4025       for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
4026          int classwords = f->codebooks[r->classbook].dimensions;
4027          int temp = j;
4028          r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
4029          if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
4030          for (k=classwords-1; k >= 0; --k) {
4031             r->classdata[j][k] = temp % r->classifications;
4032             temp /= r->classifications;
4033          }
4034       }
4035    }
4036 
4037    f->mapping_count = get_bits(f,6)+1;
4038    f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
4039    if (f->mapping == NULL) return error(f, VORBIS_outofmem);
4040    memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
4041    for (i=0; i < f->mapping_count; ++i) {
4042       Mapping *m = f->mapping + i;
4043       int mapping_type = get_bits(f,16);
4044       if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
4045       m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
4046       if (m->chan == NULL) return error(f, VORBIS_outofmem);
4047       if (get_bits(f,1))
4048          m->submaps = get_bits(f,4)+1;
4049       else
4050          m->submaps = 1;
4051       if (m->submaps > max_submaps)
4052          max_submaps = m->submaps;
4053       if (get_bits(f,1)) {
4054          m->coupling_steps = get_bits(f,8)+1;
4055          if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
4056          for (k=0; k < m->coupling_steps; ++k) {
4057             m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4058             m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4059             if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
4060             if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
4061             if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
4062          }
4063       } else
4064          m->coupling_steps = 0;
4065 
4066       // reserved field
4067       if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4068       if (m->submaps > 1) {
4069          for (j=0; j < f->channels; ++j) {
4070             m->chan[j].mux = get_bits(f, 4);
4071             if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
4072          }
4073       } else
4074          // @SPECIFICATION: this case is missing from the spec
4075          for (j=0; j < f->channels; ++j)
4076             m->chan[j].mux = 0;
4077 
4078       for (j=0; j < m->submaps; ++j) {
4079          get_bits(f,8); // discard
4080          m->submap_floor[j] = get_bits(f,8);
4081          m->submap_residue[j] = get_bits(f,8);
4082          if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
4083          if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
4084       }
4085    }
4086 
4087    // Modes
4088    f->mode_count = get_bits(f, 6)+1;
4089    for (i=0; i < f->mode_count; ++i) {
4090       Mode *m = f->mode_config+i;
4091       m->blockflag = get_bits(f,1);
4092       m->windowtype = get_bits(f,16);
4093       m->transformtype = get_bits(f,16);
4094       m->mapping = get_bits(f,8);
4095       if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
4096       if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
4097       if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
4098    }
4099 
4100    flush_packet(f);
4101 
4102    f->previous_length = 0;
4103 
4104    for (i=0; i < f->channels; ++i) {
4105       f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4106       f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4107       f->finalY[i]          = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4108       if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4109       memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
4110       #ifdef STB_VORBIS_NO_DEFER_FLOOR
4111       f->floor_buffers[i]   = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4112       if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4113       #endif
4114    }
4115 
4116    if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4117    if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4118    f->blocksize[0] = f->blocksize_0;
4119    f->blocksize[1] = f->blocksize_1;
4120 
4121 #ifdef STB_VORBIS_DIVIDE_TABLE
4122    if (integer_divide_table[1][1]==0)
4123       for (i=0; i < DIVTAB_NUMER; ++i)
4124          for (j=1; j < DIVTAB_DENOM; ++j)
4125             integer_divide_table[i][j] = i / j;
4126 #endif
4127 
4128    // compute how much temporary memory is needed
4129 
4130    // 1.
4131    {
4132       uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4133       uint32 classify_mem;
4134       int i,max_part_read=0;
4135       for (i=0; i < f->residue_count; ++i) {
4136          Residue *r = f->residue_config + i;
4137          unsigned int actual_size = f->blocksize_1 / 2;
4138          unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
4139          unsigned int limit_r_end   = r->end   < actual_size ? r->end   : actual_size;
4140          int n_read = limit_r_end - limit_r_begin;
4141          int part_read = n_read / r->part_size;
4142          if (part_read > max_part_read)
4143             max_part_read = part_read;
4144       }
4145       #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4146       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4147       #else
4148       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4149       #endif
4150 
4151       // maximum reasonable partition size is f->blocksize_1
4152 
4153       f->temp_memory_required = classify_mem;
4154       if (imdct_mem > f->temp_memory_required)
4155          f->temp_memory_required = imdct_mem;
4156    }
4157 
4158 
4159    if (f->alloc.alloc_buffer) {
4160       assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4161       // check if there's enough temp memory so we don't error later
4162       if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4163          return error(f, VORBIS_outofmem);
4164    }
4165 
4166    // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page
4167    // without PAGEFLAG_continued_packet, so this either points to the first page, or
4168    // the page after the end of the headers. It might be cleaner to point to a page
4169    // in the middle of the headers, when that's the page where the first audio packet
4170    // starts, but we'd have to also correctly skip the end of any continued packet in
4171    // stb_vorbis_seek_start.
4172    if (f->next_seg == -1) {
4173       f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4174    } else {
4175       f->first_audio_page_offset = 0;
4176    }
4177 
4178    return TRUE;
4179 }
4180 
vorbis_deinit(stb_vorbis * p)4181 static void vorbis_deinit(stb_vorbis *p)
4182 {
4183    int i,j;
4184 
4185    setup_free(p, p->vendor);
4186    for (i=0; i < p->comment_list_length; ++i) {
4187       setup_free(p, p->comment_list[i]);
4188    }
4189    setup_free(p, p->comment_list);
4190 
4191    if (p->residue_config) {
4192       for (i=0; i < p->residue_count; ++i) {
4193          Residue *r = p->residue_config+i;
4194          if (r->classdata) {
4195             for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4196                setup_free(p, r->classdata[j]);
4197             setup_free(p, r->classdata);
4198          }
4199          setup_free(p, r->residue_books);
4200       }
4201    }
4202 
4203    if (p->codebooks) {
4204       CHECK(p);
4205       for (i=0; i < p->codebook_count; ++i) {
4206          Codebook *c = p->codebooks + i;
4207          setup_free(p, c->codeword_lengths);
4208          setup_free(p, c->multiplicands);
4209          setup_free(p, c->codewords);
4210          setup_free(p, c->sorted_codewords);
4211          // c->sorted_values[-1] is the first entry in the array
4212          setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4213       }
4214       setup_free(p, p->codebooks);
4215    }
4216    setup_free(p, p->floor_config);
4217    setup_free(p, p->residue_config);
4218    if (p->mapping) {
4219       for (i=0; i < p->mapping_count; ++i)
4220          setup_free(p, p->mapping[i].chan);
4221       setup_free(p, p->mapping);
4222    }
4223    CHECK(p);
4224    for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4225       setup_free(p, p->channel_buffers[i]);
4226       setup_free(p, p->previous_window[i]);
4227       #ifdef STB_VORBIS_NO_DEFER_FLOOR
4228       setup_free(p, p->floor_buffers[i]);
4229       #endif
4230       setup_free(p, p->finalY[i]);
4231    }
4232    for (i=0; i < 2; ++i) {
4233       setup_free(p, p->A[i]);
4234       setup_free(p, p->B[i]);
4235       setup_free(p, p->C[i]);
4236       setup_free(p, p->window[i]);
4237       setup_free(p, p->bit_reverse[i]);
4238    }
4239    #ifndef STB_VORBIS_NO_STDIO
4240    if (p->close_on_free) fclose(p->f);
4241    #endif
4242 }
4243 
stb_vorbis_close(stb_vorbis * p)4244 void stb_vorbis_close(stb_vorbis *p)
4245 {
4246    if (p == NULL) return;
4247    vorbis_deinit(p);
4248    setup_free(p,p);
4249 }
4250 
vorbis_init(stb_vorbis * p,const stb_vorbis_alloc * z)4251 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4252 {
4253    memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4254    if (z) {
4255       p->alloc = *z;
4256       p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
4257       p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4258    }
4259    p->eof = 0;
4260    p->error = VORBIS__no_error;
4261    p->stream = NULL;
4262    p->codebooks = NULL;
4263    p->page_crc_tests = -1;
4264    #ifndef STB_VORBIS_NO_STDIO
4265    p->close_on_free = FALSE;
4266    p->f = NULL;
4267    #endif
4268 }
4269 
stb_vorbis_get_sample_offset(stb_vorbis * f)4270 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4271 {
4272    if (f->current_loc_valid)
4273       return f->current_loc;
4274    else
4275       return -1;
4276 }
4277 
stb_vorbis_get_info(stb_vorbis * f)4278 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4279 {
4280    stb_vorbis_info d;
4281    d.channels = f->channels;
4282    d.sample_rate = f->sample_rate;
4283    d.setup_memory_required = f->setup_memory_required;
4284    d.setup_temp_memory_required = f->setup_temp_memory_required;
4285    d.temp_memory_required = f->temp_memory_required;
4286    d.max_frame_size = f->blocksize_1 >> 1;
4287    return d;
4288 }
4289 
stb_vorbis_get_comment(stb_vorbis * f)4290 stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f)
4291 {
4292    stb_vorbis_comment d;
4293    d.vendor = f->vendor;
4294    d.comment_list_length = f->comment_list_length;
4295    d.comment_list = f->comment_list;
4296    return d;
4297 }
4298 
stb_vorbis_get_error(stb_vorbis * f)4299 int stb_vorbis_get_error(stb_vorbis *f)
4300 {
4301    int e = f->error;
4302    f->error = VORBIS__no_error;
4303    return e;
4304 }
4305 
vorbis_alloc(stb_vorbis * f)4306 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4307 {
4308    stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4309    return p;
4310 }
4311 
4312 #ifndef STB_VORBIS_NO_PUSHDATA_API
4313 
stb_vorbis_flush_pushdata(stb_vorbis * f)4314 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4315 {
4316    f->previous_length = 0;
4317    f->page_crc_tests  = 0;
4318    f->discard_samples_deferred = 0;
4319    f->current_loc_valid = FALSE;
4320    f->first_decode = FALSE;
4321    f->samples_output = 0;
4322    f->channel_buffer_start = 0;
4323    f->channel_buffer_end = 0;
4324 }
4325 
vorbis_search_for_page_pushdata(vorb * f,uint8 * data,int data_len)4326 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4327 {
4328    int i,n;
4329    for (i=0; i < f->page_crc_tests; ++i)
4330       f->scan[i].bytes_done = 0;
4331 
4332    // if we have room for more scans, search for them first, because
4333    // they may cause us to stop early if their header is incomplete
4334    if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4335       if (data_len < 4) return 0;
4336       data_len -= 3; // need to look for 4-byte sequence, so don't miss
4337                      // one that straddles a boundary
4338       for (i=0; i < data_len; ++i) {
4339          if (data[i] == 0x4f) {
4340             if (0==memcmp(data+i, ogg_page_header, 4)) {
4341                int j,len;
4342                uint32 crc;
4343                // make sure we have the whole page header
4344                if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4345                   // only read up to this page start, so hopefully we'll
4346                   // have the whole page header start next time
4347                   data_len = i;
4348                   break;
4349                }
4350                // ok, we have it all; compute the length of the page
4351                len = 27 + data[i+26];
4352                for (j=0; j < data[i+26]; ++j)
4353                   len += data[i+27+j];
4354                // scan everything up to the embedded crc (which we must 0)
4355                crc = 0;
4356                for (j=0; j < 22; ++j)
4357                   crc = crc32_update(crc, data[i+j]);
4358                // now process 4 0-bytes
4359                for (   ; j < 26; ++j)
4360                   crc = crc32_update(crc, 0);
4361                // len is the total number of bytes we need to scan
4362                n = f->page_crc_tests++;
4363                f->scan[n].bytes_left = len-j;
4364                f->scan[n].crc_so_far = crc;
4365                f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4366                // if the last frame on a page is continued to the next, then
4367                // we can't recover the sample_loc immediately
4368                if (data[i+27+data[i+26]-1] == 255)
4369                   f->scan[n].sample_loc = ~0;
4370                else
4371                   f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4372                f->scan[n].bytes_done = i+j;
4373                if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4374                   break;
4375                // keep going if we still have room for more
4376             }
4377          }
4378       }
4379    }
4380 
4381    for (i=0; i < f->page_crc_tests;) {
4382       uint32 crc;
4383       int j;
4384       int n = f->scan[i].bytes_done;
4385       int m = f->scan[i].bytes_left;
4386       if (m > data_len - n) m = data_len - n;
4387       // m is the bytes to scan in the current chunk
4388       crc = f->scan[i].crc_so_far;
4389       for (j=0; j < m; ++j)
4390          crc = crc32_update(crc, data[n+j]);
4391       f->scan[i].bytes_left -= m;
4392       f->scan[i].crc_so_far = crc;
4393       if (f->scan[i].bytes_left == 0) {
4394          // does it match?
4395          if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4396             // Houston, we have page
4397             data_len = n+m; // consumption amount is wherever that scan ended
4398             f->page_crc_tests = -1; // drop out of page scan mode
4399             f->previous_length = 0; // decode-but-don't-output one frame
4400             f->next_seg = -1;       // start a new page
4401             f->current_loc = f->scan[i].sample_loc; // set the current sample location
4402                                     // to the amount we'd have decoded had we decoded this page
4403             f->current_loc_valid = f->current_loc != ~0U;
4404             return data_len;
4405          }
4406          // delete entry
4407          f->scan[i] = f->scan[--f->page_crc_tests];
4408       } else {
4409          ++i;
4410       }
4411    }
4412 
4413    return data_len;
4414 }
4415 
4416 // return value: number of bytes we used
stb_vorbis_decode_frame_pushdata(stb_vorbis * f,const uint8 * data,int data_len,int * channels,float *** output,int * samples)4417 int stb_vorbis_decode_frame_pushdata(
4418          stb_vorbis *f,                   // the file we're decoding
4419          const uint8 *data, int data_len, // the memory available for decoding
4420          int *channels,                   // place to write number of float * buffers
4421          float ***output,                 // place to write float ** array of float * buffers
4422          int *samples                     // place to write number of output samples
4423      )
4424 {
4425    int i;
4426    int len,right,left;
4427 
4428    if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4429 
4430    if (f->page_crc_tests >= 0) {
4431       *samples = 0;
4432       return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4433    }
4434 
4435    f->stream     = (uint8 *) data;
4436    f->stream_end = (uint8 *) data + data_len;
4437    f->error      = VORBIS__no_error;
4438 
4439    // check that we have the entire packet in memory
4440    if (!is_whole_packet_present(f)) {
4441       *samples = 0;
4442       return 0;
4443    }
4444 
4445    if (!vorbis_decode_packet(f, &len, &left, &right)) {
4446       // save the actual error we encountered
4447       enum STBVorbisError error = f->error;
4448       if (error == VORBIS_bad_packet_type) {
4449          // flush and resynch
4450          f->error = VORBIS__no_error;
4451          while (get8_packet(f) != EOP)
4452             if (f->eof) break;
4453          *samples = 0;
4454          return (int) (f->stream - data);
4455       }
4456       if (error == VORBIS_continued_packet_flag_invalid) {
4457          if (f->previous_length == 0) {
4458             // we may be resynching, in which case it's ok to hit one
4459             // of these; just discard the packet
4460             f->error = VORBIS__no_error;
4461             while (get8_packet(f) != EOP)
4462                if (f->eof) break;
4463             *samples = 0;
4464             return (int) (f->stream - data);
4465          }
4466       }
4467       // if we get an error while parsing, what to do?
4468       // well, it DEFINITELY won't work to continue from where we are!
4469       stb_vorbis_flush_pushdata(f);
4470       // restore the error that actually made us bail
4471       f->error = error;
4472       *samples = 0;
4473       return 1;
4474    }
4475 
4476    // success!
4477    len = vorbis_finish_frame(f, len, left, right);
4478    for (i=0; i < f->channels; ++i)
4479       f->outputs[i] = f->channel_buffers[i] + left;
4480 
4481    if (channels) *channels = f->channels;
4482    *samples = len;
4483    *output = f->outputs;
4484    return (int) (f->stream - data);
4485 }
4486 
stb_vorbis_open_pushdata(const unsigned char * data,int data_len,int * data_used,int * error,const stb_vorbis_alloc * alloc)4487 stb_vorbis *stb_vorbis_open_pushdata(
4488          const unsigned char *data, int data_len, // the memory available for decoding
4489          int *data_used,              // only defined if result is not NULL
4490          int *error, const stb_vorbis_alloc *alloc)
4491 {
4492    stb_vorbis *f, p;
4493    vorbis_init(&p, alloc);
4494    p.stream     = (uint8 *) data;
4495    p.stream_end = (uint8 *) data + data_len;
4496    p.push_mode  = TRUE;
4497    if (!start_decoder(&p)) {
4498       if (p.eof)
4499          *error = VORBIS_need_more_data;
4500       else
4501          *error = p.error;
4502       return NULL;
4503    }
4504    f = vorbis_alloc(&p);
4505    if (f) {
4506       *f = p;
4507       *data_used = (int) (f->stream - data);
4508       *error = 0;
4509       return f;
4510    } else {
4511       vorbis_deinit(&p);
4512       return NULL;
4513    }
4514 }
4515 #endif // STB_VORBIS_NO_PUSHDATA_API
4516 
stb_vorbis_get_file_offset(stb_vorbis * f)4517 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4518 {
4519    #ifndef STB_VORBIS_NO_PUSHDATA_API
4520    if (f->push_mode) return 0;
4521    #endif
4522    if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4523    #ifndef STB_VORBIS_NO_STDIO
4524    return (unsigned int) (ftell(f->f) - f->f_start);
4525    #endif
4526 }
4527 
4528 #ifndef STB_VORBIS_NO_PULLDATA_API
4529 //
4530 // DATA-PULLING API
4531 //
4532 
vorbis_find_page(stb_vorbis * f,uint32 * end,uint32 * last)4533 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4534 {
4535    for(;;) {
4536       int n;
4537       if (f->eof) return 0;
4538       n = get8(f);
4539       if (n == 0x4f) { // page header candidate
4540          unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4541          int i;
4542          // check if we're off the end of a file_section stream
4543          if (retry_loc - 25 > f->stream_len)
4544             return 0;
4545          // check the rest of the header
4546          for (i=1; i < 4; ++i)
4547             if (get8(f) != ogg_page_header[i])
4548                break;
4549          if (f->eof) return 0;
4550          if (i == 4) {
4551             uint8 header[27];
4552             uint32 i, crc, goal, len;
4553             for (i=0; i < 4; ++i)
4554                header[i] = ogg_page_header[i];
4555             for (; i < 27; ++i)
4556                header[i] = get8(f);
4557             if (f->eof) return 0;
4558             if (header[4] != 0) goto invalid;
4559             goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4560             for (i=22; i < 26; ++i)
4561                header[i] = 0;
4562             crc = 0;
4563             for (i=0; i < 27; ++i)
4564                crc = crc32_update(crc, header[i]);
4565             len = 0;
4566             for (i=0; i < header[26]; ++i) {
4567                int s = get8(f);
4568                crc = crc32_update(crc, s);
4569                len += s;
4570             }
4571             if (len && f->eof) return 0;
4572             for (i=0; i < len; ++i)
4573                crc = crc32_update(crc, get8(f));
4574             // finished parsing probable page
4575             if (crc == goal) {
4576                // we could now check that it's either got the last
4577                // page flag set, OR it's followed by the capture
4578                // pattern, but I guess TECHNICALLY you could have
4579                // a file with garbage between each ogg page and recover
4580                // from it automatically? So even though that paranoia
4581                // might decrease the chance of an invalid decode by
4582                // another 2^32, not worth it since it would hose those
4583                // invalid-but-useful files?
4584                if (end)
4585                   *end = stb_vorbis_get_file_offset(f);
4586                if (last) {
4587                   if (header[5] & 0x04)
4588                      *last = 1;
4589                   else
4590                      *last = 0;
4591                }
4592                set_file_offset(f, retry_loc-1);
4593                return 1;
4594             }
4595          }
4596         invalid:
4597          // not a valid page, so rewind and look for next one
4598          set_file_offset(f, retry_loc);
4599       }
4600    }
4601 }
4602 
4603 
4604 #define SAMPLE_unknown  0xffffffff
4605 
4606 // seeking is implemented with a binary search, which narrows down the range to
4607 // 64K, before using a linear search (because finding the synchronization
4608 // pattern can be expensive, and the chance we'd find the end page again is
4609 // relatively high for small ranges)
4610 //
4611 // two initial interpolation-style probes are used at the start of the search
4612 // to try to bound either side of the binary search sensibly, while still
4613 // working in O(log n) time if they fail.
4614 
get_seek_page_info(stb_vorbis * f,ProbedPage * z)4615 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4616 {
4617    uint8 header[27], lacing[255];
4618    int i,len;
4619 
4620    // record where the page starts
4621    z->page_start = stb_vorbis_get_file_offset(f);
4622 
4623    // parse the header
4624    getn(f, header, 27);
4625    if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4626       return 0;
4627    getn(f, lacing, header[26]);
4628 
4629    // determine the length of the payload
4630    len = 0;
4631    for (i=0; i < header[26]; ++i)
4632       len += lacing[i];
4633 
4634    // this implies where the page ends
4635    z->page_end = z->page_start + 27 + header[26] + len;
4636 
4637    // read the last-decoded sample out of the data
4638    z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4639 
4640    // restore file state to where we were
4641    set_file_offset(f, z->page_start);
4642    return 1;
4643 }
4644 
4645 // rarely used function to seek back to the preceding page while finding the
4646 // start of a packet
go_to_page_before(stb_vorbis * f,unsigned int limit_offset)4647 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4648 {
4649    unsigned int previous_safe, end;
4650 
4651    // now we want to seek back 64K from the limit
4652    if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4653       previous_safe = limit_offset - 65536;
4654    else
4655       previous_safe = f->first_audio_page_offset;
4656 
4657    set_file_offset(f, previous_safe);
4658 
4659    while (vorbis_find_page(f, &end, NULL)) {
4660       if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4661          return 1;
4662       set_file_offset(f, end);
4663    }
4664 
4665    return 0;
4666 }
4667 
4668 // implements the search logic for finding a page and starting decoding. if
4669 // the function succeeds, current_loc_valid will be true and current_loc will
4670 // be less than or equal to the provided sample number (the closer the
4671 // better).
seek_to_sample_coarse(stb_vorbis * f,uint32 sample_number)4672 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4673 {
4674    ProbedPage left, right, mid;
4675    int i, start_seg_with_known_loc, end_pos, page_start;
4676    uint32 delta, stream_length, padding, last_sample_limit;
4677    double offset = 0.0, bytes_per_sample = 0.0;
4678    int probe = 0;
4679 
4680    // find the last page and validate the target sample
4681    stream_length = stb_vorbis_stream_length_in_samples(f);
4682    if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
4683    if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4684 
4685    // this is the maximum difference between the window-center (which is the
4686    // actual granule position value), and the right-start (which the spec
4687    // indicates should be the granule position (give or take one)).
4688    padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4689    if (sample_number < padding)
4690       last_sample_limit = 0;
4691    else
4692       last_sample_limit = sample_number - padding;
4693 
4694    left = f->p_first;
4695    while (left.last_decoded_sample == ~0U) {
4696       // (untested) the first page does not have a 'last_decoded_sample'
4697       set_file_offset(f, left.page_end);
4698       if (!get_seek_page_info(f, &left)) goto error;
4699    }
4700 
4701    right = f->p_last;
4702    assert(right.last_decoded_sample != ~0U);
4703 
4704    // starting from the start is handled differently
4705    if (last_sample_limit <= left.last_decoded_sample) {
4706       if (stb_vorbis_seek_start(f)) {
4707          if (f->current_loc > sample_number)
4708             return error(f, VORBIS_seek_failed);
4709          return 1;
4710       }
4711       return 0;
4712    }
4713 
4714    while (left.page_end != right.page_start) {
4715       assert(left.page_end < right.page_start);
4716       // search range in bytes
4717       delta = right.page_start - left.page_end;
4718       if (delta <= 65536) {
4719          // there's only 64K left to search - handle it linearly
4720          set_file_offset(f, left.page_end);
4721       } else {
4722          if (probe < 2) {
4723             if (probe == 0) {
4724                // first probe (interpolate)
4725                double data_bytes = right.page_end - left.page_start;
4726                bytes_per_sample = data_bytes / right.last_decoded_sample;
4727                offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample);
4728             } else {
4729                // second probe (try to bound the other side)
4730                double error = ((double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample;
4731                if (error >= 0 && error <  8000) error =  8000;
4732                if (error <  0 && error > -8000) error = -8000;
4733                offset += error * 2;
4734             }
4735 
4736             // ensure the offset is valid
4737             if (offset < left.page_end)
4738                offset = left.page_end;
4739             if (offset > right.page_start - 65536)
4740                offset = right.page_start - 65536;
4741 
4742             set_file_offset(f, (unsigned int) offset);
4743          } else {
4744             // binary search for large ranges (offset by 32K to ensure
4745             // we don't hit the right page)
4746             set_file_offset(f, left.page_end + (delta / 2) - 32768);
4747          }
4748 
4749          if (!vorbis_find_page(f, NULL, NULL)) goto error;
4750       }
4751 
4752       for (;;) {
4753          if (!get_seek_page_info(f, &mid)) goto error;
4754          if (mid.last_decoded_sample != ~0U) break;
4755          // (untested) no frames end on this page
4756          set_file_offset(f, mid.page_end);
4757          assert(mid.page_start < right.page_start);
4758       }
4759 
4760       // if we've just found the last page again then we're in a tricky file,
4761       // and we're close enough (if it wasn't an interpolation probe).
4762       if (mid.page_start == right.page_start) {
4763          if (probe >= 2 || delta <= 65536)
4764             break;
4765       } else {
4766          if (last_sample_limit < mid.last_decoded_sample)
4767             right = mid;
4768          else
4769             left = mid;
4770       }
4771 
4772       ++probe;
4773    }
4774 
4775    // seek back to start of the last packet
4776    page_start = left.page_start;
4777    set_file_offset(f, page_start);
4778    if (!start_page(f)) return error(f, VORBIS_seek_failed);
4779    end_pos = f->end_seg_with_known_loc;
4780    assert(end_pos >= 0);
4781 
4782    for (;;) {
4783       for (i = end_pos; i > 0; --i)
4784          if (f->segments[i-1] != 255)
4785             break;
4786 
4787       start_seg_with_known_loc = i;
4788 
4789       if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4790          break;
4791 
4792       // (untested) the final packet begins on an earlier page
4793       if (!go_to_page_before(f, page_start))
4794          goto error;
4795 
4796       page_start = stb_vorbis_get_file_offset(f);
4797       if (!start_page(f)) goto error;
4798       end_pos = f->segment_count - 1;
4799    }
4800 
4801    // prepare to start decoding
4802    f->current_loc_valid = FALSE;
4803    f->last_seg = FALSE;
4804    f->valid_bits = 0;
4805    f->packet_bytes = 0;
4806    f->bytes_in_seg = 0;
4807    f->previous_length = 0;
4808    f->next_seg = start_seg_with_known_loc;
4809 
4810    for (i = 0; i < start_seg_with_known_loc; i++)
4811       skip(f, f->segments[i]);
4812 
4813    // start decoding (optimizable - this frame is generally discarded)
4814    if (!vorbis_pump_first_frame(f))
4815       return 0;
4816    if (f->current_loc > sample_number)
4817       return error(f, VORBIS_seek_failed);
4818    return 1;
4819 
4820 error:
4821    // try to restore the file to a valid state
4822    stb_vorbis_seek_start(f);
4823    return error(f, VORBIS_seek_failed);
4824 }
4825 
4826 // the same as vorbis_decode_initial, but without advancing
peek_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)4827 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4828 {
4829    int bits_read, bytes_read;
4830 
4831    if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4832       return 0;
4833 
4834    // either 1 or 2 bytes were read, figure out which so we can rewind
4835    bits_read = 1 + ilog(f->mode_count-1);
4836    if (f->mode_config[*mode].blockflag)
4837       bits_read += 2;
4838    bytes_read = (bits_read + 7) / 8;
4839 
4840    f->bytes_in_seg += bytes_read;
4841    f->packet_bytes -= bytes_read;
4842    skip(f, -bytes_read);
4843    if (f->next_seg == -1)
4844       f->next_seg = f->segment_count - 1;
4845    else
4846       f->next_seg--;
4847    f->valid_bits = 0;
4848 
4849    return 1;
4850 }
4851 
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)4852 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4853 {
4854    uint32 max_frame_samples;
4855 
4856    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4857 
4858    // fast page-level search
4859    if (!seek_to_sample_coarse(f, sample_number))
4860       return 0;
4861 
4862    assert(f->current_loc_valid);
4863    assert(f->current_loc <= sample_number);
4864 
4865    // linear search for the relevant packet
4866    max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4867    while (f->current_loc < sample_number) {
4868       int left_start, left_end, right_start, right_end, mode, frame_samples;
4869       if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4870          return error(f, VORBIS_seek_failed);
4871       // calculate the number of samples returned by the next frame
4872       frame_samples = right_start - left_start;
4873       if (f->current_loc + frame_samples > sample_number) {
4874          return 1; // the next frame will contain the sample
4875       } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4876          // there's a chance the frame after this could contain the sample
4877          vorbis_pump_first_frame(f);
4878       } else {
4879          // this frame is too early to be relevant
4880          f->current_loc += frame_samples;
4881          f->previous_length = 0;
4882          maybe_start_packet(f);
4883          flush_packet(f);
4884       }
4885    }
4886    // the next frame should start with the sample
4887    if (f->current_loc != sample_number) return error(f, VORBIS_seek_failed);
4888    return 1;
4889 }
4890 
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)4891 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4892 {
4893    if (!stb_vorbis_seek_frame(f, sample_number))
4894       return 0;
4895 
4896    if (sample_number != f->current_loc) {
4897       int n;
4898       uint32 frame_start = f->current_loc;
4899       stb_vorbis_get_frame_float(f, &n, NULL);
4900       assert(sample_number > frame_start);
4901       assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4902       f->channel_buffer_start += (sample_number - frame_start);
4903    }
4904 
4905    return 1;
4906 }
4907 
stb_vorbis_seek_start(stb_vorbis * f)4908 int stb_vorbis_seek_start(stb_vorbis *f)
4909 {
4910    if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
4911    set_file_offset(f, f->first_audio_page_offset);
4912    f->previous_length = 0;
4913    f->first_decode = TRUE;
4914    f->next_seg = -1;
4915    return vorbis_pump_first_frame(f);
4916 }
4917 
stb_vorbis_stream_length_in_samples(stb_vorbis * f)4918 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4919 {
4920    unsigned int restore_offset, previous_safe;
4921    unsigned int end, last_page_loc;
4922 
4923    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4924    if (!f->total_samples) {
4925       unsigned int last;
4926       uint32 lo,hi;
4927       char header[6];
4928 
4929       // first, store the current decode position so we can restore it
4930       restore_offset = stb_vorbis_get_file_offset(f);
4931 
4932       // now we want to seek back 64K from the end (the last page must
4933       // be at most a little less than 64K, but let's allow a little slop)
4934       if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4935          previous_safe = f->stream_len - 65536;
4936       else
4937          previous_safe = f->first_audio_page_offset;
4938 
4939       set_file_offset(f, previous_safe);
4940       // previous_safe is now our candidate 'earliest known place that seeking
4941       // to will lead to the final page'
4942 
4943       if (!vorbis_find_page(f, &end, &last)) {
4944          // if we can't find a page, we're hosed!
4945          f->error = VORBIS_cant_find_last_page;
4946          f->total_samples = 0xffffffff;
4947          goto done;
4948       }
4949 
4950       // check if there are more pages
4951       last_page_loc = stb_vorbis_get_file_offset(f);
4952 
4953       // stop when the last_page flag is set, not when we reach eof;
4954       // this allows us to stop short of a 'file_section' end without
4955       // explicitly checking the length of the section
4956       while (!last) {
4957          set_file_offset(f, end);
4958          if (!vorbis_find_page(f, &end, &last)) {
4959             // the last page we found didn't have the 'last page' flag
4960             // set. whoops!
4961             break;
4962          }
4963          previous_safe = last_page_loc+1;
4964          last_page_loc = stb_vorbis_get_file_offset(f);
4965       }
4966 
4967       set_file_offset(f, last_page_loc);
4968 
4969       // parse the header
4970       getn(f, (unsigned char *)header, 6);
4971       // extract the absolute granule position
4972       lo = get32(f);
4973       hi = get32(f);
4974       if (lo == 0xffffffff && hi == 0xffffffff) {
4975          f->error = VORBIS_cant_find_last_page;
4976          f->total_samples = SAMPLE_unknown;
4977          goto done;
4978       }
4979       if (hi)
4980          lo = 0xfffffffe; // saturate
4981       f->total_samples = lo;
4982 
4983       f->p_last.page_start = last_page_loc;
4984       f->p_last.page_end   = end;
4985       f->p_last.last_decoded_sample = lo;
4986 
4987      done:
4988       set_file_offset(f, restore_offset);
4989    }
4990    return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4991 }
4992 
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)4993 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4994 {
4995    return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4996 }
4997 
4998 
4999 
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)5000 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
5001 {
5002    int len, right,left,i;
5003    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
5004 
5005    if (!vorbis_decode_packet(f, &len, &left, &right)) {
5006       f->channel_buffer_start = f->channel_buffer_end = 0;
5007       return 0;
5008    }
5009 
5010    len = vorbis_finish_frame(f, len, left, right);
5011    for (i=0; i < f->channels; ++i)
5012       f->outputs[i] = f->channel_buffers[i] + left;
5013 
5014    f->channel_buffer_start = left;
5015    f->channel_buffer_end   = left+len;
5016 
5017    if (channels) *channels = f->channels;
5018    if (output)   *output = f->outputs;
5019    return len;
5020 }
5021 
5022 #ifndef STB_VORBIS_NO_STDIO
5023 
stb_vorbis_open_file_section(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc,unsigned int length)5024 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
5025 {
5026    stb_vorbis *f, p;
5027    vorbis_init(&p, alloc);
5028    p.f = file;
5029    p.f_start = (uint32) ftell(file);
5030    p.stream_len   = length;
5031    p.close_on_free = close_on_free;
5032    if (start_decoder(&p)) {
5033       f = vorbis_alloc(&p);
5034       if (f) {
5035          *f = p;
5036          vorbis_pump_first_frame(f);
5037          return f;
5038       }
5039    }
5040    if (error) *error = p.error;
5041    vorbis_deinit(&p);
5042    return NULL;
5043 }
5044 
stb_vorbis_open_file(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc)5045 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
5046 {
5047    unsigned int len, start;
5048    start = (unsigned int) ftell(file);
5049    fseek(file, 0, SEEK_END);
5050    len = (unsigned int) (ftell(file) - start);
5051    fseek(file, start, SEEK_SET);
5052    return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
5053 }
5054 
stb_vorbis_open_filename(const char * filename,int * error,const stb_vorbis_alloc * alloc)5055 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
5056 {
5057    FILE *f;
5058 #if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
5059    if (0 != fopen_s(&f, filename, "rb"))
5060       f = NULL;
5061 #else
5062    f = fopen(filename, "rb");
5063 #endif
5064    if (f)
5065       return stb_vorbis_open_file(f, TRUE, error, alloc);
5066    if (error) *error = VORBIS_file_open_failure;
5067    return NULL;
5068 }
5069 #endif // STB_VORBIS_NO_STDIO
5070 
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,const stb_vorbis_alloc * alloc)5071 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
5072 {
5073    stb_vorbis *f, p;
5074    if (data == NULL) return NULL;
5075    vorbis_init(&p, alloc);
5076    p.stream = (uint8 *) data;
5077    p.stream_end = (uint8 *) data + len;
5078    p.stream_start = (uint8 *) p.stream;
5079    p.stream_len = len;
5080    p.push_mode = FALSE;
5081    if (start_decoder(&p)) {
5082       f = vorbis_alloc(&p);
5083       if (f) {
5084          *f = p;
5085          vorbis_pump_first_frame(f);
5086          if (error) *error = VORBIS__no_error;
5087          return f;
5088       }
5089    }
5090    if (error) *error = p.error;
5091    vorbis_deinit(&p);
5092    return NULL;
5093 }
5094 
5095 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
5096 #define PLAYBACK_MONO     1
5097 #define PLAYBACK_LEFT     2
5098 #define PLAYBACK_RIGHT    4
5099 
5100 #define L  (PLAYBACK_LEFT  | PLAYBACK_MONO)
5101 #define C  (PLAYBACK_LEFT  | PLAYBACK_RIGHT | PLAYBACK_MONO)
5102 #define R  (PLAYBACK_RIGHT | PLAYBACK_MONO)
5103 
5104 static int8 channel_position[7][6] =
5105 {
5106    { 0 },
5107    { C },
5108    { L, R },
5109    { L, C, R },
5110    { L, R, L, R },
5111    { L, C, R, L, R },
5112    { L, C, R, L, R, C },
5113 };
5114 
5115 
5116 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5117    typedef union {
5118       float f;
5119       int i;
5120    } float_conv;
5121    typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5122    #define FASTDEF(x) float_conv x
5123    // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5124    #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5125    #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5126    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5127    #define check_endianness()
5128 #else
5129    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5130    #define check_endianness()
5131    #define FASTDEF(x)
5132 #endif
5133 
copy_samples(short * dest,float * src,int len)5134 static void copy_samples(short *dest, float *src, int len)
5135 {
5136    int i;
5137    check_endianness();
5138    for (i=0; i < len; ++i) {
5139       FASTDEF(temp);
5140       int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5141       if ((unsigned int) (v + 32768) > 65535)
5142          v = v < 0 ? -32768 : 32767;
5143       dest[i] = v;
5144    }
5145 }
5146 
compute_samples(int mask,short * output,int num_c,float ** data,int d_offset,int len)5147 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5148 {
5149    #define BUFFER_SIZE  32
5150    float buffer[BUFFER_SIZE];
5151    int i,j,o,n = BUFFER_SIZE;
5152    check_endianness();
5153    for (o = 0; o < len; o += BUFFER_SIZE) {
5154       memset(buffer, 0, sizeof(buffer));
5155       if (o + n > len) n = len - o;
5156       for (j=0; j < num_c; ++j) {
5157          if (channel_position[num_c][j] & mask) {
5158             for (i=0; i < n; ++i)
5159                buffer[i] += data[j][d_offset+o+i];
5160          }
5161       }
5162       for (i=0; i < n; ++i) {
5163          FASTDEF(temp);
5164          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5165          if ((unsigned int) (v + 32768) > 65535)
5166             v = v < 0 ? -32768 : 32767;
5167          output[o+i] = v;
5168       }
5169    }
5170 }
5171 
compute_stereo_samples(short * output,int num_c,float ** data,int d_offset,int len)5172 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5173 {
5174    #define BUFFER_SIZE  32
5175    float buffer[BUFFER_SIZE];
5176    int i,j,o,n = BUFFER_SIZE >> 1;
5177    // o is the offset in the source data
5178    check_endianness();
5179    for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5180       // o2 is the offset in the output data
5181       int o2 = o << 1;
5182       memset(buffer, 0, sizeof(buffer));
5183       if (o + n > len) n = len - o;
5184       for (j=0; j < num_c; ++j) {
5185          int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5186          if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5187             for (i=0; i < n; ++i) {
5188                buffer[i*2+0] += data[j][d_offset+o+i];
5189                buffer[i*2+1] += data[j][d_offset+o+i];
5190             }
5191          } else if (m == PLAYBACK_LEFT) {
5192             for (i=0; i < n; ++i) {
5193                buffer[i*2+0] += data[j][d_offset+o+i];
5194             }
5195          } else if (m == PLAYBACK_RIGHT) {
5196             for (i=0; i < n; ++i) {
5197                buffer[i*2+1] += data[j][d_offset+o+i];
5198             }
5199          }
5200       }
5201       for (i=0; i < (n<<1); ++i) {
5202          FASTDEF(temp);
5203          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5204          if ((unsigned int) (v + 32768) > 65535)
5205             v = v < 0 ? -32768 : 32767;
5206          output[o2+i] = v;
5207       }
5208    }
5209 }
5210 
convert_samples_short(int buf_c,short ** buffer,int b_offset,int data_c,float ** data,int d_offset,int samples)5211 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5212 {
5213    int i;
5214    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5215       static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5216       for (i=0; i < buf_c; ++i)
5217          compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5218    } else {
5219       int limit = buf_c < data_c ? buf_c : data_c;
5220       for (i=0; i < limit; ++i)
5221          copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5222       for (   ; i < buf_c; ++i)
5223          memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5224    }
5225 }
5226 
stb_vorbis_get_frame_short(stb_vorbis * f,int num_c,short ** buffer,int num_samples)5227 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5228 {
5229    float **output = NULL;
5230    int len = stb_vorbis_get_frame_float(f, NULL, &output);
5231    if (len > num_samples) len = num_samples;
5232    if (len)
5233       convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5234    return len;
5235 }
5236 
convert_channels_short_interleaved(int buf_c,short * buffer,int data_c,float ** data,int d_offset,int len)5237 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5238 {
5239    int i;
5240    check_endianness();
5241    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5242       assert(buf_c == 2);
5243       for (i=0; i < buf_c; ++i)
5244          compute_stereo_samples(buffer, data_c, data, d_offset, len);
5245    } else {
5246       int limit = buf_c < data_c ? buf_c : data_c;
5247       int j;
5248       for (j=0; j < len; ++j) {
5249          for (i=0; i < limit; ++i) {
5250             FASTDEF(temp);
5251             float f = data[i][d_offset+j];
5252             int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5253             if ((unsigned int) (v + 32768) > 65535)
5254                v = v < 0 ? -32768 : 32767;
5255             *buffer++ = v;
5256          }
5257          for (   ; i < buf_c; ++i)
5258             *buffer++ = 0;
5259       }
5260    }
5261 }
5262 
stb_vorbis_get_frame_short_interleaved(stb_vorbis * f,int num_c,short * buffer,int num_shorts)5263 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5264 {
5265    float **output;
5266    int len;
5267    if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5268    len = stb_vorbis_get_frame_float(f, NULL, &output);
5269    if (len) {
5270       if (len*num_c > num_shorts) len = num_shorts / num_c;
5271       convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5272    }
5273    return len;
5274 }
5275 
stb_vorbis_get_samples_short_interleaved(stb_vorbis * f,int channels,short * buffer,int num_shorts)5276 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5277 {
5278    float **outputs;
5279    int len = num_shorts / channels;
5280    int n=0;
5281    int z = f->channels;
5282    if (z > channels) z = channels;
5283    while (n < len) {
5284       int k = f->channel_buffer_end - f->channel_buffer_start;
5285       if (n+k >= len) k = len - n;
5286       if (k)
5287          convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5288       buffer += k*channels;
5289       n += k;
5290       f->channel_buffer_start += k;
5291       if (n == len) break;
5292       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5293    }
5294    return n;
5295 }
5296 
stb_vorbis_get_samples_short(stb_vorbis * f,int channels,short ** buffer,int len)5297 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5298 {
5299    float **outputs;
5300    int n=0;
5301    int z = f->channels;
5302    if (z > channels) z = channels;
5303    while (n < len) {
5304       int k = f->channel_buffer_end - f->channel_buffer_start;
5305       if (n+k >= len) k = len - n;
5306       if (k)
5307          convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5308       n += k;
5309       f->channel_buffer_start += k;
5310       if (n == len) break;
5311       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5312    }
5313    return n;
5314 }
5315 
5316 #ifndef STB_VORBIS_NO_STDIO
stb_vorbis_decode_filename(const char * filename,int * channels,int * sample_rate,short ** output)5317 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5318 {
5319    int data_len, offset, total, limit, error;
5320    short *data;
5321    stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5322    if (v == NULL) return -1;
5323    limit = v->channels * 4096;
5324    *channels = v->channels;
5325    if (sample_rate)
5326       *sample_rate = v->sample_rate;
5327    offset = data_len = 0;
5328    total = limit;
5329    data = (short *) malloc(total * sizeof(*data));
5330    if (data == NULL) {
5331       stb_vorbis_close(v);
5332       return -2;
5333    }
5334    for (;;) {
5335       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5336       if (n == 0) break;
5337       data_len += n;
5338       offset += n * v->channels;
5339       if (offset + limit > total) {
5340          short *data2;
5341          total *= 2;
5342          data2 = (short *) realloc(data, total * sizeof(*data));
5343          if (data2 == NULL) {
5344             free(data);
5345             stb_vorbis_close(v);
5346             return -2;
5347          }
5348          data = data2;
5349       }
5350    }
5351    *output = data;
5352    stb_vorbis_close(v);
5353    return data_len;
5354 }
5355 #endif // NO_STDIO
5356 
stb_vorbis_decode_memory(const uint8 * mem,int len,int * channels,int * sample_rate,short ** output)5357 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5358 {
5359    int data_len, offset, total, limit, error;
5360    short *data;
5361    stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5362    if (v == NULL) return -1;
5363    limit = v->channels * 4096;
5364    *channels = v->channels;
5365    if (sample_rate)
5366       *sample_rate = v->sample_rate;
5367    offset = data_len = 0;
5368    total = limit;
5369    data = (short *) malloc(total * sizeof(*data));
5370    if (data == NULL) {
5371       stb_vorbis_close(v);
5372       return -2;
5373    }
5374    for (;;) {
5375       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5376       if (n == 0) break;
5377       data_len += n;
5378       offset += n * v->channels;
5379       if (offset + limit > total) {
5380          short *data2;
5381          total *= 2;
5382          data2 = (short *) realloc(data, total * sizeof(*data));
5383          if (data2 == NULL) {
5384             free(data);
5385             stb_vorbis_close(v);
5386             return -2;
5387          }
5388          data = data2;
5389       }
5390    }
5391    *output = data;
5392    stb_vorbis_close(v);
5393    return data_len;
5394 }
5395 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5396 
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)5397 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5398 {
5399    float **outputs;
5400    int len = num_floats / channels;
5401    int n=0;
5402    int z = f->channels;
5403    if (z > channels) z = channels;
5404    while (n < len) {
5405       int i,j;
5406       int k = f->channel_buffer_end - f->channel_buffer_start;
5407       if (n+k >= len) k = len - n;
5408       for (j=0; j < k; ++j) {
5409          for (i=0; i < z; ++i)
5410             *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5411          for (   ; i < channels; ++i)
5412             *buffer++ = 0;
5413       }
5414       n += k;
5415       f->channel_buffer_start += k;
5416       if (n == len)
5417          break;
5418       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5419          break;
5420    }
5421    return n;
5422 }
5423 
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)5424 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5425 {
5426    float **outputs;
5427    int n=0;
5428    int z = f->channels;
5429    if (z > channels) z = channels;
5430    while (n < num_samples) {
5431       int i;
5432       int k = f->channel_buffer_end - f->channel_buffer_start;
5433       if (n+k >= num_samples) k = num_samples - n;
5434       if (k) {
5435          for (i=0; i < z; ++i)
5436             memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5437          for (   ; i < channels; ++i)
5438             memset(buffer[i]+n, 0, sizeof(float) * k);
5439       }
5440       n += k;
5441       f->channel_buffer_start += k;
5442       if (n == num_samples)
5443          break;
5444       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5445          break;
5446    }
5447    return n;
5448 }
5449 #endif // STB_VORBIS_NO_PULLDATA_API
5450 
5451 /* Version history
5452     1.17    - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
5453                            found with Mayhem by ForAllSecure
5454     1.16    - 2019-03-04 - fix warnings
5455     1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
5456     1.14    - 2018-02-11 - delete bogus dealloca usage
5457     1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
5458     1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
5459     1.11    - 2017-07-23 - fix MinGW compilation
5460     1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
5461     1.09    - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
5462     1.08    - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
5463                            avoid discarding last frame of audio data
5464     1.07    - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
5465                            some more crash fixes when out of memory or with corrupt files
5466     1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
5467                            some crash fixes when out of memory or with corrupt files
5468     1.05    - 2015-04-19 - don't define __forceinline if it's redundant
5469     1.04    - 2014-08-27 - fix missing const-correct case in API
5470     1.03    - 2014-08-07 - Warning fixes
5471     1.02    - 2014-07-09 - Declare qsort compare function _cdecl on windows
5472     1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float
5473     1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
5474                            (API change) report sample rate for decode-full-file funcs
5475     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5476     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5477     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5478     0.99993 - remove assert that fired on legal files with empty tables
5479     0.99992 - rewind-to-start
5480     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5481     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5482     0.9998 - add a full-decode function with a memory source
5483     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5484     0.9996 - query length of vorbis stream in samples/seconds
5485     0.9995 - bugfix to another optimization that only happened in certain files
5486     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5487     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5488     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5489     0.9991 - performance improvement of IMDCT
5490     0.999 - (should have been 0.9990) performance improvement of IMDCT
5491     0.998 - no-CRT support from Casey Muratori
5492     0.997 - bugfixes for bugs found by Terje Mathisen
5493     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5494     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5495     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5496     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5497     0.992 - fixes for MinGW warning
5498     0.991 - turn fast-float-conversion on by default
5499     0.990 - fix push-mode seek recovery if you seek into the headers
5500     0.98b - fix to bad release of 0.98
5501     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5502     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5503     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5504     0.95 - clamping code for 16-bit functions
5505     0.94 - not publically released
5506     0.93 - fixed all-zero-floor case (was decoding garbage)
5507     0.92 - fixed a memory leak
5508     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5509     0.90 - first public release
5510 */
5511 
5512 #endif // STB_VORBIS_HEADER_ONLY
5513 
5514 
5515 /*
5516 ------------------------------------------------------------------------------
5517 This software is available under 2 licenses -- choose whichever you prefer.
5518 ------------------------------------------------------------------------------
5519 ALTERNATIVE A - MIT License
5520 Copyright (c) 2017 Sean Barrett
5521 Permission is hereby granted, free of charge, to any person obtaining a copy of
5522 this software and associated documentation files (the "Software"), to deal in
5523 the Software without restriction, including without limitation the rights to
5524 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5525 of the Software, and to permit persons to whom the Software is furnished to do
5526 so, subject to the following conditions:
5527 The above copyright notice and this permission notice shall be included in all
5528 copies or substantial portions of the Software.
5529 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5530 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5531 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5532 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5533 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5534 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5535 SOFTWARE.
5536 ------------------------------------------------------------------------------
5537 ALTERNATIVE B - Public Domain (www.unlicense.org)
5538 This is free and unencumbered software released into the public domain.
5539 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5540 software, either in source code form or as a compiled binary, for any purpose,
5541 commercial or non-commercial, and by any means.
5542 In jurisdictions that recognize copyright laws, the author or authors of this
5543 software dedicate any and all copyright interest in the software to the public
5544 domain. We make this dedication for the benefit of the public at large and to
5545 the detriment of our heirs and successors. We intend this dedication to be an
5546 overt act of relinquishment in perpetuity of all present and future rights to
5547 this software under copyright law.
5548 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5549 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5550 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5551 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5552 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5553 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5554 ------------------------------------------------------------------------------
5555 */
5556